[PATCH] libxl: Fix domain shutdown

Michal Privoznik mprivozn at redhat.com
Mon Feb 22 12:46:00 UTC 2021


On 2/20/21 1:19 AM, Jim Fehlig wrote:
> Commit fa30ee04a2 caused a regression in normal domain shutown.
> Initiating a shutdown from within the domain or via 'virsh shutdown'
> does cause the guest OS running in the domain to shutdown, but libvirt
> never reaps the domain so it is always shown in a running state until
> calling 'virsh destroy'.
> 
> The shutdown thread is also an internal user of the driver shutdown
> machinery and eventually calls libxlDomainDestroyInternal where
> the ignoreDeathEvent inhibitor is set, but running in a thread
> introduces the possibility of racing with the death event from
> libxl. This can be prevented by setting ignoreDeathEvent before
> running the shutdown thread.
> 
> An additional improvement is to handle the destroy event synchronously
> instead of spawning a thread. The time consuming aspects of destroying
> a domain have been completed when the destroy event is delivered.
> 
> Signed-off-by: Jim Fehlig <jfehlig at suse.com>
> ---
>   src/libxl/libxl_domain.c | 115 ++++++++++++++++++---------------------
>   1 file changed, 54 insertions(+), 61 deletions(-)
> 
> diff --git a/src/libxl/libxl_domain.c b/src/libxl/libxl_domain.c
> index d59153fffa..32dc503089 100644
> --- a/src/libxl/libxl_domain.c
> +++ b/src/libxl/libxl_domain.c

> @@ -685,42 +658,62 @@ libxlDomainEventHandler(void *data, VIR_LIBXL_EVENT_CONST libxl_event *event)
>        * after calling libxl_domain_suspend() are handled by its callers.
>        */
>       if (xl_reason == LIBXL_SHUTDOWN_REASON_SUSPEND)
> -        goto error;
> +        goto cleanup;
>   
> -    /*
> -     * Start a thread to handle shutdown.  We don't want to be tying up
> -     * libxl's event machinery by doing a potentially lengthy shutdown.
> -     */
> -    shutdown_info = g_new0(struct libxlShutdownThreadInfo, 1);
> +    vm = virDomainObjListFindByID(driver->domains, event->domid);
> +    if (!vm) {
> +        /* Nothing to do if we can't find the virDomainObj */
> +        goto cleanup;
> +    }
>   
> -    shutdown_info->driver = driver;
> -    shutdown_info->event = (libxl_event *)event;
> -    name = g_strdup_printf("ev-%d", event->domid);
> -    if (event->type == LIBXL_EVENT_TYPE_DOMAIN_SHUTDOWN)
> -        ret = virThreadCreateFull(&thread, false, libxlDomainShutdownThread,
> -                                  name, false, shutdown_info);
> -    else if (event->type == LIBXL_EVENT_TYPE_DOMAIN_DEATH)
> -        ret = virThreadCreateFull(&thread, false, libxlDomainDeathThread,
> -                                  name, false, shutdown_info);
> +    if (event->type == LIBXL_EVENT_TYPE_DOMAIN_SHUTDOWN) {
> +        libxlDomainObjPrivatePtr priv = vm->privateData;
> +        struct libxlShutdownThreadInfo *shutdown_info = NULL;
> +        virThread thread;
> +        g_autofree char *name = NULL;
>   
> -    if (ret < 0) {
>           /*
> -         * Not much we can do on error here except log it.
> +         * Start a thread to handle shutdown.  We don't want to be tying up
> +         * libxl's event machinery by doing a potentially lengthy shutdown.
>            */
> -        VIR_ERROR(_("Failed to create thread to handle domain shutdown"));
> -        goto error;
> -    }
> +        shutdown_info = g_new0(struct libxlShutdownThreadInfo, 1);
>   
> -    /*
> -     * libxlShutdownThreadInfo and libxl_event are freed in shutdown thread
> -     */
> -    return;
> +        shutdown_info->driver = driver;
> +        shutdown_info->vm = vm;

Aaah, wanted to suggest that you increment @vm's refcounter here, because ..

> +        shutdown_info->event = (libxl_event *)event;
> +        name = g_strdup_printf("ev-%d", event->domid);
> +        /*
> +         * Cleanup will be handled by the shutdown thread.
> +         * Ignore the forthcoming death event from libxl
> +         */
> +        priv->ignoreDeathEvent = true;
> +        if (virThreadCreateFull(&thread, false, libxlDomainShutdownThread,
> +                                name, false, shutdown_info) < 0) {

.. what if this thread is scheduled after [1]? But then noticed this 
subtle ..

> +            /*
> +             * Not much we can do on error here except log it.
> +             */
> +            VIR_ERROR(_("Failed to create thread to handle domain shutdown"));
> +            VIR_FREE(shutdown_info);
> +            goto cleanup;
> +        }
> +        /*
> +         * virDomainObjEndAPI is called in the shutdown thread, where
> +         * libxlShutdownThreadInfo and libxl_event are also freed.
> +         */
> +        return;

.. return. So the refcount is okay :-)

> +    } else if (event->type == LIBXL_EVENT_TYPE_DOMAIN_DEATH) {
> +        /*
> +         * On death the domain is cleaned up from Xen's perspective.
> +         * Cleanup on the libvirt side can be done synchronously.
> +         */
> +        libxlDomainHandleDeath(driver, vm);
> +    }
>   
> - error:
> + cleanup:
> +    virDomainObjEndAPI(&vm);

1: this ^^^

>       cfg = libxlDriverConfigGet(driver);
>       /* Cast away any const */
>       libxl_event_free(cfg->ctx, (libxl_event *)event);
> -    VIR_FREE(shutdown_info);
>   }
>   
>   char *
> 

Reviewed-by: Michal Privoznik <mprivozn at redhat.com>

Michal




More information about the libvir-list mailing list