[libvirt] [PATCH v3] qemu: qemuDomainRename and virDomainObjListNumOfDomains ABBA deadlock fix

Nikolay Shirokovskiy nshirokovskiy at virtuozzo.com
Mon Feb 1 09:34:19 UTC 2016


Please, disregard this patch.

On 01.02.2016 12:21, Nikolay Shirokovskiy wrote:
> A pretty nasty deadlock occurs while trying to rename a VM in parallel
> with virDomainObjListNumOfDomains.
> The short description of the problem is as follows:
> 
> Thread #1:
> 
> qemuDomainRename:
>     ------> aquires domain lock by qemuDomObjFromDomain
>        ---------> waits for domain list lock in any of the listed functions:
>           - virDomainObjListFindByName
>           - virDomainObjListRenameAddNew
>           - virDomainObjListRenameRemove
> 
> Thread #2:
> 
> virDomainObjListNumOfDomains:
>     ------> aquires domain list lock
>        ---------> waits for domain lock in virDomainObjListCount
> 
> Introduce generic virDomainObjListRename function for renaming domains.
> It aquires list lock in right order to avoid deadlock. Callback is used
> to make driver specific domain updates.
> ---
>  src/conf/virdomainobjlist.c |  98 +++++++++++++++++++-----------
>  src/conf/virdomainobjlist.h |  16 +++--
>  src/libvirt_private.syms    |   3 +-
>  src/qemu/qemu_driver.c      | 142 ++++++++++++++++++++------------------------
>  4 files changed, 142 insertions(+), 117 deletions(-)
> 
> diff --git a/src/conf/virdomainobjlist.c b/src/conf/virdomainobjlist.c
> index 7568f93..6c0be62 100644
> --- a/src/conf/virdomainobjlist.c
> +++ b/src/conf/virdomainobjlist.c
> @@ -313,40 +313,6 @@ virDomainObjPtr virDomainObjListAdd(virDomainObjListPtr doms,
>  }
>  
>  
> -int
> -virDomainObjListRenameAddNew(virDomainObjListPtr doms,
> -                             virDomainObjPtr vm,
> -                             const char *name)
> -{
> -    int ret = -1;
> -    virObjectLock(doms);
> -
> -    /* Add new name into the hash table of domain names. */
> -    if (virHashAddEntry(doms->objsName, name, vm) < 0)
> -        goto cleanup;
> -
> -    /* Okay, this is crazy. virHashAddEntry() does not increment
> -     * the refcounter of @vm, but virHashRemoveEntry() does
> -     * decrement it. We need to work around it. */
> -    virObjectRef(vm);
> -
> -    ret = 0;
> - cleanup:
> -    virObjectUnlock(doms);
> -    return ret;
> -}
> -
> -
> -int
> -virDomainObjListRenameRemove(virDomainObjListPtr doms, const char *name)
> -{
> -    virObjectLock(doms);
> -    virHashRemoveEntry(doms->objsName, name);
> -    virObjectUnlock(doms);
> -    return 0;
> -}
> -
> -
>  /*
>   * The caller must hold a lock on the driver owning 'doms',
>   * and must also have locked 'dom', to ensure no one else
> @@ -372,6 +338,70 @@ void virDomainObjListRemove(virDomainObjListPtr doms,
>  }
>  
>  
> +/*
> + * The caller must hold a lock on dom. Callbacks should not sleep/wait othewise
> + * operations on all domains will be blocked as the callback is called with
> + * domains lock hold. Domain lock is dropped/reaquired during this operation
> + * thus domain consistency must not rely on this lock solely.
> + */
> +
> +int
> +virDomainObjListRename(virDomainObjListPtr doms,
> +                       virDomainObjPtr dom,
> +                       const char *new_name,
> +                       unsigned int flags,
> +                       virDomainObjListRenameCallback callback,
> +                       void *opaque)
> +{
> +    int ret = -1;
> +    char *old_name = NULL;
> +    int rc;
> +
> +    /* doms and dom locks must be attained in right order thus relock dom. */
> +    /* dom reference is touched for the benefit of those callers that
> +     * hold a lock on dom but not refcount it. */
> +    virObjectRef(dom);
> +    virObjectUnlock(dom);
> +    virObjectLock(doms);
> +    virObjectLock(dom);
> +    virObjectUnref(dom);
> +
> +    if (STREQ(dom->def->name, new_name)) {
> +        virReportError(VIR_ERR_OPERATION_INVALID, "%s",
> +                       _("Can't rename domain to itself"));
> +        goto cleanup;
> +    }
> +
> +    if (virHashLookup(doms->objsName, new_name) != NULL) {
> +        virReportError(VIR_ERR_OPERATION_INVALID,
> +                       _("domain with name '%s' already exists"),
> +                       new_name);
> +        goto cleanup;
> +    }
> +
> +    if (virHashAddEntry(doms->objsName, new_name, dom) < 0)
> +        goto cleanup;
> +
> +    /* Okay, this is crazy. virHashAddEntry() does not increment
> +     * the refcounter of @dom, but virHashRemoveEntry() does
> +     * decrement it. We need to work around it. */
> +    virObjectRef(dom);
> +
> +    if (VIR_STRDUP(old_name, dom->def->name) < 0)
> +        goto cleanup;
> +
> +    rc = callback(dom, new_name, flags, opaque);
> +    virHashRemoveEntry(doms->objsName, rc < 0 ? new_name : old_name);
> +    if (rc < 0)
> +        goto cleanup;
> +
> +    ret = 0;
> + cleanup:
> +    virObjectUnlock(doms);
> +    VIR_FREE(old_name);
> +    return ret;
> +}
> +
>  /* The caller must hold lock on 'doms' in addition to 'virDomainObjListRemove'
>   * requirements
>   *
> diff --git a/src/conf/virdomainobjlist.h b/src/conf/virdomainobjlist.h
> index f479598..c0f856c 100644
> --- a/src/conf/virdomainobjlist.h
> +++ b/src/conf/virdomainobjlist.h
> @@ -51,11 +51,17 @@ virDomainObjPtr virDomainObjListAdd(virDomainObjListPtr doms,
>                                      unsigned int flags,
>                                      virDomainDefPtr *oldDef);
>  
> -int virDomainObjListRenameAddNew(virDomainObjListPtr doms,
> -                                 virDomainObjPtr vm,
> -                                 const char *name);
> -int virDomainObjListRenameRemove(virDomainObjListPtr doms,
> -                                 const char *name);
> +typedef int (*virDomainObjListRenameCallback)(virDomainObjPtr dom,
> +                                              const char *new_name,
> +                                              unsigned int flags,
> +                                              void *opaque);
> +int virDomainObjListRename(virDomainObjListPtr doms,
> +                           virDomainObjPtr dom,
> +                           const char *new_name,
> +                           unsigned int flags,
> +                           virDomainObjListRenameCallback callback,
> +                           void *opaque);
> +
>  void virDomainObjListRemove(virDomainObjListPtr doms,
>                              virDomainObjPtr dom);
>  void virDomainObjListRemoveLocked(virDomainObjListPtr doms,
> diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
> index 3d0ec9d..3ef3468 100644
> --- a/src/libvirt_private.syms
> +++ b/src/libvirt_private.syms
> @@ -893,8 +893,7 @@ virDomainObjListNew;
>  virDomainObjListNumOfDomains;
>  virDomainObjListRemove;
>  virDomainObjListRemoveLocked;
> -virDomainObjListRenameAddNew;
> -virDomainObjListRenameRemove;
> +virDomainObjListRename;
>  
>  
>  # cpu/cpu.h
> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
> index abcdbe6..c0e750d 100644
> --- a/src/qemu/qemu_driver.c
> +++ b/src/qemu/qemu_driver.c
> @@ -19976,14 +19976,14 @@ qemuDomainSetUserPassword(virDomainPtr dom,
>  }
>  
>  
> -static int qemuDomainRename(virDomainPtr dom,
> -                            const char *new_name,
> -                            unsigned int flags)
> +static int
> +qemuDomainRenameCallback(virDomainObjPtr vm,
> +                         const char *new_name,
> +                         unsigned int flags,
> +                         void *opaque)
>  {
> -    virQEMUDriverPtr driver = dom->conn->privateData;
> +    virQEMUDriverPtr driver = opaque;
>      virQEMUDriverConfigPtr cfg = NULL;
> -    virDomainObjPtr vm = NULL;
> -    virDomainObjPtr tmp_dom = NULL;
>      virObjectEventPtr event_new = NULL;
>      virObjectEventPtr event_old = NULL;
>      int ret = -1;
> @@ -19993,73 +19993,16 @@ static int qemuDomainRename(virDomainPtr dom,
>  
>      virCheckFlags(0, ret);
>  
> -    if (!(vm = qemuDomObjFromDomain(dom)))
> -        goto cleanup;
> -
> -    if (virDomainRenameEnsureACL(dom->conn, vm->def) < 0)
> -        goto cleanup;
> -
>      cfg = virQEMUDriverGetConfig(driver);
>  
> -    if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_MODIFY) < 0)
> -        goto cleanup;
> -
> -    if (virDomainObjIsActive(vm)) {
> -        virReportError(VIR_ERR_OPERATION_INVALID, "%s",
> -                       _("cannot rename active domain"));
> -        goto endjob;
> -    }
> -
> -    if (!vm->persistent) {
> -        virReportError(VIR_ERR_OPERATION_INVALID, "%s",
> -                       _("cannot rename a transient domain"));
> -        goto endjob;
> -    }
> -
> -    if (virDomainObjGetState(vm, NULL) != VIR_DOMAIN_SHUTOFF) {
> -        virReportError(VIR_ERR_OPERATION_INVALID,
> -                       "%s", _("domain has to be shutoff before renaming"));
> -        goto endjob;
> -    }
> -
> -    if (virDomainSnapshotObjListNum(vm->snapshots, NULL, 0) > 0) {
> -        virReportError(VIR_ERR_OPERATION_INVALID,
> -                       "%s", _("cannot rename domain with snapshots"));
> -        goto endjob;
> -    }
> -
> -    if (STREQ(vm->def->name, new_name)) {
> -        virReportError(VIR_ERR_OPERATION_INVALID, "%s",
> -                       _("Can't rename domain to itself"));
> -        goto endjob;
> -    }
> -
> -    /*
> -     * This is a rather racy check, but still better than reporting
> -     * internal error.  And since new_name != name here, there's no
> -     * deadlock imminent.
> -     */
> -    tmp_dom = virDomainObjListFindByName(driver->domains, new_name);
> -    if (tmp_dom) {
> -        virObjectUnlock(tmp_dom);
> -        virObjectUnref(tmp_dom);
> -        virReportError(VIR_ERR_OPERATION_INVALID,
> -                       _("domain with name '%s' already exists"),
> -                       new_name);
> -        goto endjob;
> -    }
> -
>      if (VIR_STRDUP(new_dom_name, new_name) < 0)
> -        goto endjob;
> +        goto cleanup;
>  
>      if (!(old_dom_cfg_file = virDomainConfigFile(cfg->configDir,
>                                                   vm->def->name))) {
> -        goto endjob;
> +        goto cleanup;
>      }
>  
> -    if (virDomainObjListRenameAddNew(driver->domains, vm, new_name) < 0)
> -        goto endjob;
> -
>      event_old = virDomainEventLifecycleNewFromObj(vm,
>                                              VIR_DOMAIN_EVENT_UNDEFINED,
>                                              VIR_DOMAIN_EVENT_UNDEFINED_RENAMED);
> @@ -20080,21 +20023,12 @@ static int qemuDomainRename(virDomainPtr dom,
>          goto rollback;
>      }
>  
> -    /* Remove old domain name from table. */
> -    virDomainObjListRenameRemove(driver->domains, old_dom_name);
> -
>      event_new = virDomainEventLifecycleNewFromObj(vm,
>                                                VIR_DOMAIN_EVENT_DEFINED,
>                                                VIR_DOMAIN_EVENT_DEFINED_RENAMED);
> -
> -    /* Success, domain has been renamed. */
>      ret = 0;
>  
> - endjob:
> -    qemuDomainObjEndJob(driver, vm);
> -
>   cleanup:
> -    virDomainObjEndAPI(&vm);
>      VIR_FREE(old_dom_cfg_file);
>      VIR_FREE(old_dom_name);
>      VIR_FREE(new_dom_name);
> @@ -20109,9 +20043,65 @@ static int qemuDomainRename(virDomainPtr dom,
>          vm->def->name = old_dom_name;
>          old_dom_name = NULL;
>      }
> +    goto cleanup;
> +}
>  
> -    virDomainObjListRenameRemove(driver->domains, new_name);
> -    goto endjob;
> +static int qemuDomainRename(virDomainPtr dom,
> +                            const char *new_name,
> +                            unsigned int flags)
> +{
> +    virQEMUDriverPtr driver = dom->conn->privateData;
> +    virDomainObjPtr vm = NULL;
> +    int ret = -1;
> +
> +    virCheckFlags(0, ret);
> +
> +    if (!(vm = qemuDomObjFromDomain(dom)))
> +        goto cleanup;
> +
> +    if (virDomainRenameEnsureACL(dom->conn, vm->def) < 0)
> +        goto cleanup;
> +
> +    if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_MODIFY) < 0)
> +        goto cleanup;
> +
> +    if (virDomainObjIsActive(vm)) {
> +        virReportError(VIR_ERR_OPERATION_INVALID, "%s",
> +                       _("cannot rename active domain"));
> +        goto endjob;
> +    }
> +
> +    if (!vm->persistent) {
> +        virReportError(VIR_ERR_OPERATION_INVALID, "%s",
> +                       _("cannot rename a transient domain"));
> +        goto endjob;
> +    }
> +
> +    if (virDomainObjGetState(vm, NULL) != VIR_DOMAIN_SHUTOFF) {
> +        virReportError(VIR_ERR_OPERATION_INVALID,
> +                       "%s", _("domain has to be shutoff before renaming"));
> +        goto endjob;
> +    }
> +
> +    if (virDomainSnapshotObjListNum(vm->snapshots, NULL, 0) > 0) {
> +        virReportError(VIR_ERR_OPERATION_INVALID,
> +                       "%s", _("cannot rename domain with snapshots"));
> +        goto endjob;
> +    }
> +
> +    if (virDomainObjListRename(driver->domains, vm, new_name, flags,
> +                               qemuDomainRenameCallback, driver) < 0)
> +        goto endjob;
> +
> +    /* Success, domain has been renamed. */
> +    ret = 0;
> +
> + endjob:
> +    qemuDomainObjEndJob(driver, vm);
> +
> + cleanup:
> +    virDomainObjEndAPI(&vm);
> +    return ret;
>  }
>  
>  static virHypervisorDriver qemuHypervisorDriver = {
> 




More information about the libvir-list mailing list