[PATCH] libxl: Fix domain shutdown
Michal Privoznik
mprivozn at redhat.com
Mon Feb 22 12:46:00 UTC 2021
On 2/20/21 1:19 AM, Jim Fehlig wrote:
> Commit fa30ee04a2 caused a regression in normal domain shutown.
> Initiating a shutdown from within the domain or via 'virsh shutdown'
> does cause the guest OS running in the domain to shutdown, but libvirt
> never reaps the domain so it is always shown in a running state until
> calling 'virsh destroy'.
>
> The shutdown thread is also an internal user of the driver shutdown
> machinery and eventually calls libxlDomainDestroyInternal where
> the ignoreDeathEvent inhibitor is set, but running in a thread
> introduces the possibility of racing with the death event from
> libxl. This can be prevented by setting ignoreDeathEvent before
> running the shutdown thread.
>
> An additional improvement is to handle the destroy event synchronously
> instead of spawning a thread. The time consuming aspects of destroying
> a domain have been completed when the destroy event is delivered.
>
> Signed-off-by: Jim Fehlig <jfehlig at suse.com>
> ---
> src/libxl/libxl_domain.c | 115 ++++++++++++++++++---------------------
> 1 file changed, 54 insertions(+), 61 deletions(-)
>
> diff --git a/src/libxl/libxl_domain.c b/src/libxl/libxl_domain.c
> index d59153fffa..32dc503089 100644
> --- a/src/libxl/libxl_domain.c
> +++ b/src/libxl/libxl_domain.c
> @@ -685,42 +658,62 @@ libxlDomainEventHandler(void *data, VIR_LIBXL_EVENT_CONST libxl_event *event)
> * after calling libxl_domain_suspend() are handled by its callers.
> */
> if (xl_reason == LIBXL_SHUTDOWN_REASON_SUSPEND)
> - goto error;
> + goto cleanup;
>
> - /*
> - * Start a thread to handle shutdown. We don't want to be tying up
> - * libxl's event machinery by doing a potentially lengthy shutdown.
> - */
> - shutdown_info = g_new0(struct libxlShutdownThreadInfo, 1);
> + vm = virDomainObjListFindByID(driver->domains, event->domid);
> + if (!vm) {
> + /* Nothing to do if we can't find the virDomainObj */
> + goto cleanup;
> + }
>
> - shutdown_info->driver = driver;
> - shutdown_info->event = (libxl_event *)event;
> - name = g_strdup_printf("ev-%d", event->domid);
> - if (event->type == LIBXL_EVENT_TYPE_DOMAIN_SHUTDOWN)
> - ret = virThreadCreateFull(&thread, false, libxlDomainShutdownThread,
> - name, false, shutdown_info);
> - else if (event->type == LIBXL_EVENT_TYPE_DOMAIN_DEATH)
> - ret = virThreadCreateFull(&thread, false, libxlDomainDeathThread,
> - name, false, shutdown_info);
> + if (event->type == LIBXL_EVENT_TYPE_DOMAIN_SHUTDOWN) {
> + libxlDomainObjPrivatePtr priv = vm->privateData;
> + struct libxlShutdownThreadInfo *shutdown_info = NULL;
> + virThread thread;
> + g_autofree char *name = NULL;
>
> - if (ret < 0) {
> /*
> - * Not much we can do on error here except log it.
> + * Start a thread to handle shutdown. We don't want to be tying up
> + * libxl's event machinery by doing a potentially lengthy shutdown.
> */
> - VIR_ERROR(_("Failed to create thread to handle domain shutdown"));
> - goto error;
> - }
> + shutdown_info = g_new0(struct libxlShutdownThreadInfo, 1);
>
> - /*
> - * libxlShutdownThreadInfo and libxl_event are freed in shutdown thread
> - */
> - return;
> + shutdown_info->driver = driver;
> + shutdown_info->vm = vm;
Aaah, wanted to suggest that you increment @vm's refcounter here, because ..
> + shutdown_info->event = (libxl_event *)event;
> + name = g_strdup_printf("ev-%d", event->domid);
> + /*
> + * Cleanup will be handled by the shutdown thread.
> + * Ignore the forthcoming death event from libxl
> + */
> + priv->ignoreDeathEvent = true;
> + if (virThreadCreateFull(&thread, false, libxlDomainShutdownThread,
> + name, false, shutdown_info) < 0) {
.. what if this thread is scheduled after [1]? But then noticed this
subtle ..
> + /*
> + * Not much we can do on error here except log it.
> + */
> + VIR_ERROR(_("Failed to create thread to handle domain shutdown"));
> + VIR_FREE(shutdown_info);
> + goto cleanup;
> + }
> + /*
> + * virDomainObjEndAPI is called in the shutdown thread, where
> + * libxlShutdownThreadInfo and libxl_event are also freed.
> + */
> + return;
.. return. So the refcount is okay :-)
> + } else if (event->type == LIBXL_EVENT_TYPE_DOMAIN_DEATH) {
> + /*
> + * On death the domain is cleaned up from Xen's perspective.
> + * Cleanup on the libvirt side can be done synchronously.
> + */
> + libxlDomainHandleDeath(driver, vm);
> + }
>
> - error:
> + cleanup:
> + virDomainObjEndAPI(&vm);
1: this ^^^
> cfg = libxlDriverConfigGet(driver);
> /* Cast away any const */
> libxl_event_free(cfg->ctx, (libxl_event *)event);
> - VIR_FREE(shutdown_info);
> }
>
> char *
>
Reviewed-by: Michal Privoznik <mprivozn at redhat.com>
Michal
More information about the libvir-list
mailing list