[libvirt] [PATCH] qemu: Allow graceful domain destroy

Daniel Veillard veillard at redhat.com
Mon Aug 22 08:48:31 UTC 2011


On Sat, Aug 20, 2011 at 01:06:10PM +0200, Michal Privoznik wrote:
> This patch introduces support for domain destroying via 'quit' monitor
> command which gives qemu time to flush caches and therefore prevent
> disks corruption. However, qemu can be unresponsive and to prevent
> waiting indefinitely, execute command in a separate thread and wait
> reasonable time (QEMU_QUIT_WAIT_SECONDS) on a condition. If we hit
> timeout, qemu is qemuProcessKill'ed which causes monitor close and
> therefore also thread being terminable.
> 
> The synchronization between qemu driver and monitor-quit thread is done
> through mutex and condition. However, this alone is not enough. If a
> condition is signalized but without anybody listening signal is lost. To
> prevent this a boolean variable is used that is set iff nobody is
> listening but condition would be signalized, or iff driver is waiting on
> given condition.
> ---
>  include/libvirt/libvirt.h.in |   11 ++-
>  src/qemu/qemu_driver.c       |  132 +++++++++++++++++++++++++++++++++++++++--
>  src/qemu/qemu_monitor.c      |   13 ++++
>  src/qemu/qemu_monitor.h      |    2 +
>  src/qemu/qemu_monitor_json.c |   22 +++++++
>  src/qemu/qemu_monitor_json.h |    1 +
>  src/qemu/qemu_monitor_text.c |   20 ++++++
>  src/qemu/qemu_monitor_text.h |    1 +
>  tools/virsh.c                |    8 ++-
>  9 files changed, 198 insertions(+), 12 deletions(-)
> 
> diff --git a/include/libvirt/libvirt.h.in b/include/libvirt/libvirt.h.in
> index aa29fb6..fa98b8d 100644
> --- a/include/libvirt/libvirt.h.in
> +++ b/include/libvirt/libvirt.h.in
> @@ -919,10 +919,13 @@ virConnectPtr           virDomainGetConnect     (virDomainPtr domain);
>   * Domain creation and destruction
>   */
>  
> -/*
> - * typedef enum {
> - * } virDomainDestroyFlagsValues;
> - */
> +
> +typedef enum {
> +    VIR_DOMAIN_DESTROY_MONITOR = 1 << 0, /* In hypervisors supporting this,
> +                                            try to send 'quit' command prior
> +                                            to killing hypervisor */
> +} virDomainDestroyFlagsValues;

  Rather than describing the flag based on the QEmu command I would
rather describe it's intended effects, i.e.

  VIR_DOMAIN_DESTROY_FLUSH_CACHE = 1 << 0, /* In hypervisors supporting this,
                                              try to get cache flushed prior
                                              to destroying the domain */

>  virDomainPtr            virDomainCreateXML      (virConnectPtr conn,
>                                                   const char *xmlDesc,
>                                                   unsigned int flags);
> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
> index 421a98e..6585cb4 100644
> --- a/src/qemu/qemu_driver.c
> +++ b/src/qemu/qemu_driver.c
> @@ -1564,6 +1564,42 @@ cleanup:
>      return ret;
>  }
>  
> +struct qemuDomainDestroyHelperData {
> +    struct qemud_driver *driver;
> +    virDomainObjPtr vm;
> +    qemuDomainObjPrivatePtr priv;
> +    virMutex lock;
> +    virCond cond;
> +    bool guard;
> +};
> +
> +static void
> +qemuDomainDestroyHelper(void *opaque)
> +{
> +    struct qemuDomainDestroyHelperData *data = opaque;
> +    struct qemud_driver *driver = data->driver;
> +    virDomainObjPtr vm = data->vm;
> +    qemuDomainObjPrivatePtr priv = data->priv;
> +
> +    /* Job was already obtained by caller */
> +    qemuDomainObjEnterMonitorWithDriver(driver, vm);
> +    qemuMonitorQuit(priv->mon);
> +    qemuDomainObjExitMonitorWithDriver(driver, vm);
> +
> +    /* To avoid loosing signal, we need to remember
> +     * we tried to send one, but nobody was waiting
> +     * for it.
> +     */
> +    virMutexLock(&data->lock);
> +    if (data->guard) {
> +        virCondSignal(&data->cond);
> +    } else {
> +        data->guard = true;
> +    }
> +    virMutexUnlock(&data->lock);
> +}
> +
> +#define QEMU_QUIT_WAIT_SECONDS 5
>  
>  static int
>  qemuDomainDestroyFlags(virDomainPtr dom,
> @@ -1574,8 +1610,13 @@ qemuDomainDestroyFlags(virDomainPtr dom,
>      int ret = -1;
>      virDomainEventPtr event = NULL;
>      qemuDomainObjPrivatePtr priv;
> +    bool use_thread = false;
> +    bool use_kill = false;
> +    virThread destroy_thread;
> +    struct qemuDomainDestroyHelperData data;
> +    unsigned long long then;
>  
> -    virCheckFlags(0, -1);
> +    virCheckFlags(VIR_DOMAIN_DESTROY_MONITOR, -1);
>  
>      qemuDriverLock(driver);
>      vm  = virDomainFindByUUID(&driver->domains, dom->uuid);
> @@ -1590,12 +1631,14 @@ qemuDomainDestroyFlags(virDomainPtr dom,
>      priv = vm->privateData;
>      priv->fakeReboot = false;
>  
> -    /* Although qemuProcessStop does this already, there may
> -     * be an outstanding job active. We want to make sure we
> -     * can kill the process even if a job is active. Killing
> -     * it now means the job will be released
> -     */
> -    qemuProcessKill(vm);
> +    if (!flags) {

  please test the given flag here instead of assuming the bit we know
  about now.

> +        /* Although qemuProcessStop does this already, there may
> +         * be an outstanding job active. We want to make sure we
> +         * can kill the process even if a job is active. Killing
> +         * it now means the job will be released
> +         */
> +        qemuProcessKill(vm);
> +    }
>  
>      if (qemuDomainObjBeginJobWithDriver(driver, vm, QEMU_JOB_DESTROY) < 0)
>          goto cleanup;
> @@ -1606,6 +1649,70 @@ qemuDomainDestroyFlags(virDomainPtr dom,
>          goto endjob;
>      }
>  
> +    if (flags & VIR_DOMAIN_DESTROY_MONITOR) {
> +        /* Try to shutdown domain gracefully.
> +         * Send 'quit' to qemu. However, qemu can be unresponsive.
> +         * Therefore create a separate thread in which we execute
> +         * that monitor comand. Wait max QEMU_QUIT_WAIT_SECONDS.
> +         */
> +        if (virMutexInit(&data.lock) < 0) {
> +            virReportOOMError();
> +            goto endjob;
> +        }
> +
> +        if (virCondInit(&data.cond) < 0) {
> +            qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> +                            _("cannot initialize thread condition"));
> +            virMutexDestroy(&data.lock);
> +            goto endjob;
> +        }
> +
> +        data.driver = driver;
> +        data.vm = vm;
> +        data.priv = priv;
> +        data.guard = false;
> +
> +        if (virThreadCreate(&destroy_thread, true,
> +                            qemuDomainDestroyHelper, &data) < 0) {
> +            virReportSystemError(errno, "%s",
> +                                 _("unable to create destroy thread"));
> +            ignore_value(virCondDestroy(&data.cond));
> +            virMutexDestroy(&data.lock);
> +            goto endjob;
> +        }
> +        use_thread = true;
> +
> +        if (virTimeMs(&then) < 0)
> +            goto endjob;
> +
> +        then += QEMU_QUIT_WAIT_SECONDS * 1000;
> +
> +        /* How synchronization with destroy thread works:
> +         * Basically wait on data.cond. But because conditions
> +         * does not 'remember' that they have been signalized
> +         * data.guard is used. Practically, data.guard says
> +         * to destroy thread if we are waiting on condition
> +         * and to us whether we should even try.
> +         */
> +        virMutexLock(&data.lock);
> +        if (!data.guard) {
> +            data.guard = true;
> +            if (virCondWaitUntil(&data.cond, &data.lock, then) < 0) {
> +                if (errno == ETIMEDOUT) {
> +                    use_kill = true;
> +                    data.guard = false;
> +                } else {
> +                    virMutexUnlock(&data.lock);
> +                    goto endjob;
> +                }
> +            }
> +        }
> +        virMutexUnlock(&data.lock);
> +
> +        if (use_kill)
> +            qemuProcessKill(vm);
> +    }
> +
>      qemuProcessStop(driver, vm, 0, VIR_DOMAIN_SHUTOFF_DESTROYED);
>      event = virDomainEventNewFromObj(vm,
>                                       VIR_DOMAIN_EVENT_STOPPED,
> @@ -1626,6 +1733,17 @@ endjob:
>          vm = NULL;
>  
>  cleanup:
> +    if (use_thread) {
> +        virMutexLock(&data.lock);
> +        if (!data.guard) {
> +            data.guard = true;
> +            ignore_value(virCondWait(&data.cond, &data.lock));
> +        }
> +        virMutexUnlock(&data.lock);
> +        ignore_value(virCondDestroy(&data.cond));
> +        virMutexDestroy(&data.lock);
> +        virThreadJoin(&destroy_thread);
> +    }
>      if (vm)
>          virDomainObjUnlock(vm);
>      if (event)
> diff --git a/src/qemu/qemu_monitor.c b/src/qemu/qemu_monitor.c
> index db6107c..41b9c5c 100644
> --- a/src/qemu/qemu_monitor.c
> +++ b/src/qemu/qemu_monitor.c
> @@ -2475,3 +2475,16 @@ int qemuMonitorBlockJob(qemuMonitorPtr mon,
>          ret = qemuMonitorTextBlockJob(mon, device, bandwidth, info, mode);
>      return ret;
>  }
> +
> +int qemuMonitorQuit(qemuMonitorPtr mon)
> +{
> +    int ret;
> +
> +    VIR_DEBUG("mon=%p", mon);
> +
> +    if (mon->json)
> +        ret = qemuMonitorJSONQuit(mon);
> +    else
> +        ret = qemuMonitorTextQuit(mon);
> +    return ret;
> +}
> diff --git a/src/qemu/qemu_monitor.h b/src/qemu/qemu_monitor.h
> index f241c9e..3fe6bb9 100644
> --- a/src/qemu/qemu_monitor.h
> +++ b/src/qemu/qemu_monitor.h
> @@ -475,6 +475,8 @@ int qemuMonitorBlockJob(qemuMonitorPtr mon,
>                          virDomainBlockJobInfoPtr info,
>                          int mode);
>  
> +int qemuMonitorQuit(qemuMonitorPtr mon);
> +
>  /**
>   * When running two dd process and using <> redirection, we need a
>   * shell that will not truncate files.  These two strings serve that
> diff --git a/src/qemu/qemu_monitor_json.c b/src/qemu/qemu_monitor_json.c
> index 7adfb26..1f078cd 100644
> --- a/src/qemu/qemu_monitor_json.c
> +++ b/src/qemu/qemu_monitor_json.c
> @@ -2956,3 +2956,25 @@ int qemuMonitorJSONBlockJob(qemuMonitorPtr mon,
>      virJSONValueFree(reply);
>      return ret;
>  }
> +
> +
> +int qemuMonitorJSONQuit(qemuMonitorPtr mon)
> +{
> +    int ret = -1;
> +    virJSONValuePtr cmd = NULL;
> +    virJSONValuePtr reply = NULL;
> +
> +    cmd = qemuMonitorJSONMakeCommand("quit", NULL);
> +
> +    if (!cmd)
> +        return -1;
> +
> +    ret =qemuMonitorJSONCommand(mon, cmd, &reply);
> +
> +    if (ret == 0)
> +        ret = qemuMonitorJSONCheckError(cmd, reply);
> +
> +    virJSONValueFree(cmd);
> +    virJSONValueFree(reply);
> +    return ret;
> +}
> diff --git a/src/qemu/qemu_monitor_json.h b/src/qemu/qemu_monitor_json.h
> index 9512793..2a7df76 100644
> --- a/src/qemu/qemu_monitor_json.h
> +++ b/src/qemu/qemu_monitor_json.h
> @@ -231,4 +231,5 @@ int qemuMonitorJSONBlockJob(qemuMonitorPtr mon,
>                              virDomainBlockJobInfoPtr info,
>                              int mode);
>  
> +int qemuMonitorJSONQuit(qemuMonitorPtr mon);
>  #endif /* QEMU_MONITOR_JSON_H */
> diff --git a/src/qemu/qemu_monitor_text.c b/src/qemu/qemu_monitor_text.c
> index 335e39e..19c2690 100644
> --- a/src/qemu/qemu_monitor_text.c
> +++ b/src/qemu/qemu_monitor_text.c
> @@ -3046,3 +3046,23 @@ cleanup:
>      VIR_FREE(reply);
>      return ret;
>  }
> +
> +
> +int qemuMonitorTextQuit(qemuMonitorPtr mon)
> +{
> +    const char *cmd = "quit";
> +    char *reply = NULL;
> +    int ret = -1;
> +
> +    if (qemuMonitorHMPCommand(mon, cmd, &reply) < 0) {
> +        qemuReportError(VIR_ERR_INTERNAL_ERROR,
> +                        "%s", _("cannot run monitor command"));
> +        goto cleanup;
> +    }
> +
> +    ret = 0;
> +
> +cleanup:
> +    VIR_FREE(reply);
> +    return ret;
> +}
> diff --git a/src/qemu/qemu_monitor_text.h b/src/qemu/qemu_monitor_text.h
> index b250738..9ade938 100644
> --- a/src/qemu/qemu_monitor_text.h
> +++ b/src/qemu/qemu_monitor_text.h
> @@ -224,4 +224,5 @@ int qemuMonitorTextBlockJob(qemuMonitorPtr mon,
>                              virDomainBlockJobInfoPtr info,
>                              int mode);
>  
> +int qemuMonitorTextQuit(qemuMonitorPtr mon);
>  #endif /* QEMU_MONITOR_TEXT_H */
> diff --git a/tools/virsh.c b/tools/virsh.c
> index f1eb4ca..0c18d8b 100644
> --- a/tools/virsh.c
> +++ b/tools/virsh.c
> @@ -2565,6 +2565,7 @@ static const vshCmdInfo info_destroy[] = {
>  
>  static const vshCmdOptDef opts_destroy[] = {
>      {"domain", VSH_OT_DATA, VSH_OFLAG_REQ, N_("domain name, id or uuid")},
> +    {"monitor", VSH_OT_BOOL, 0, N_("try graceful destroy via monitor first")},
>      {NULL, 0, 0, NULL}
>  };
>  
> @@ -2574,6 +2575,7 @@ cmdDestroy(vshControl *ctl, const vshCmd *cmd)
>      virDomainPtr dom;
>      bool ret = true;
>      const char *name;
> +    int flags = 0;
>  
>      if (!vshConnectionUsability(ctl, ctl->conn))
>          return false;
> @@ -2581,7 +2583,11 @@ cmdDestroy(vshControl *ctl, const vshCmd *cmd)
>      if (!(dom = vshCommandOptDomain(ctl, cmd, &name)))
>          return false;
>  
> -    if (virDomainDestroy(dom) == 0) {
> +    if (vshCommandOptBool(cmd, "monitor"))
> +        flags |= VIR_DOMAIN_DESTROY_MONITOR;
> +
> +    if ((!flags && virDomainDestroy(dom) == 0) ||
> +        virDomainDestroyFlags(dom, flags) == 0)  {
>          vshPrint(ctl, _("Domain %s destroyed\n"), name);
>      } else {
>          vshError(ctl, _("Failed to destroy domain %s"), name);
> -- 
> 1.7.3.4

  This sounds right, but the devil lies in the detail, I would add far
  more debug in the thread and around synchronization as this is
  informations we would need if something goes really wrong.

Daniel

-- 
Daniel Veillard      | libxml Gnome XML XSLT toolkit  http://xmlsoft.org/
daniel at veillard.com  | Rpmfind RPM search engine http://rpmfind.net/
http://veillard.com/ | virtualization library  http://libvirt.org/




More information about the libvir-list mailing list