[libvirt] [PATCH v3] qemu: qemuDomainRename and virDomainObjListNumOfDomains ABBA deadlock fix
Nikolay Shirokovskiy
nshirokovskiy at virtuozzo.com
Mon Feb 1 09:34:19 UTC 2016
Please, disregard this patch.
On 01.02.2016 12:21, Nikolay Shirokovskiy wrote:
> A pretty nasty deadlock occurs while trying to rename a VM in parallel
> with virDomainObjListNumOfDomains.
> The short description of the problem is as follows:
>
> Thread #1:
>
> qemuDomainRename:
> ------> aquires domain lock by qemuDomObjFromDomain
> ---------> waits for domain list lock in any of the listed functions:
> - virDomainObjListFindByName
> - virDomainObjListRenameAddNew
> - virDomainObjListRenameRemove
>
> Thread #2:
>
> virDomainObjListNumOfDomains:
> ------> aquires domain list lock
> ---------> waits for domain lock in virDomainObjListCount
>
> Introduce generic virDomainObjListRename function for renaming domains.
> It aquires list lock in right order to avoid deadlock. Callback is used
> to make driver specific domain updates.
> ---
> src/conf/virdomainobjlist.c | 98 +++++++++++++++++++-----------
> src/conf/virdomainobjlist.h | 16 +++--
> src/libvirt_private.syms | 3 +-
> src/qemu/qemu_driver.c | 142 ++++++++++++++++++++------------------------
> 4 files changed, 142 insertions(+), 117 deletions(-)
>
> diff --git a/src/conf/virdomainobjlist.c b/src/conf/virdomainobjlist.c
> index 7568f93..6c0be62 100644
> --- a/src/conf/virdomainobjlist.c
> +++ b/src/conf/virdomainobjlist.c
> @@ -313,40 +313,6 @@ virDomainObjPtr virDomainObjListAdd(virDomainObjListPtr doms,
> }
>
>
> -int
> -virDomainObjListRenameAddNew(virDomainObjListPtr doms,
> - virDomainObjPtr vm,
> - const char *name)
> -{
> - int ret = -1;
> - virObjectLock(doms);
> -
> - /* Add new name into the hash table of domain names. */
> - if (virHashAddEntry(doms->objsName, name, vm) < 0)
> - goto cleanup;
> -
> - /* Okay, this is crazy. virHashAddEntry() does not increment
> - * the refcounter of @vm, but virHashRemoveEntry() does
> - * decrement it. We need to work around it. */
> - virObjectRef(vm);
> -
> - ret = 0;
> - cleanup:
> - virObjectUnlock(doms);
> - return ret;
> -}
> -
> -
> -int
> -virDomainObjListRenameRemove(virDomainObjListPtr doms, const char *name)
> -{
> - virObjectLock(doms);
> - virHashRemoveEntry(doms->objsName, name);
> - virObjectUnlock(doms);
> - return 0;
> -}
> -
> -
> /*
> * The caller must hold a lock on the driver owning 'doms',
> * and must also have locked 'dom', to ensure no one else
> @@ -372,6 +338,70 @@ void virDomainObjListRemove(virDomainObjListPtr doms,
> }
>
>
> +/*
> + * The caller must hold a lock on dom. Callbacks should not sleep/wait othewise
> + * operations on all domains will be blocked as the callback is called with
> + * domains lock hold. Domain lock is dropped/reaquired during this operation
> + * thus domain consistency must not rely on this lock solely.
> + */
> +
> +int
> +virDomainObjListRename(virDomainObjListPtr doms,
> + virDomainObjPtr dom,
> + const char *new_name,
> + unsigned int flags,
> + virDomainObjListRenameCallback callback,
> + void *opaque)
> +{
> + int ret = -1;
> + char *old_name = NULL;
> + int rc;
> +
> + /* doms and dom locks must be attained in right order thus relock dom. */
> + /* dom reference is touched for the benefit of those callers that
> + * hold a lock on dom but not refcount it. */
> + virObjectRef(dom);
> + virObjectUnlock(dom);
> + virObjectLock(doms);
> + virObjectLock(dom);
> + virObjectUnref(dom);
> +
> + if (STREQ(dom->def->name, new_name)) {
> + virReportError(VIR_ERR_OPERATION_INVALID, "%s",
> + _("Can't rename domain to itself"));
> + goto cleanup;
> + }
> +
> + if (virHashLookup(doms->objsName, new_name) != NULL) {
> + virReportError(VIR_ERR_OPERATION_INVALID,
> + _("domain with name '%s' already exists"),
> + new_name);
> + goto cleanup;
> + }
> +
> + if (virHashAddEntry(doms->objsName, new_name, dom) < 0)
> + goto cleanup;
> +
> + /* Okay, this is crazy. virHashAddEntry() does not increment
> + * the refcounter of @dom, but virHashRemoveEntry() does
> + * decrement it. We need to work around it. */
> + virObjectRef(dom);
> +
> + if (VIR_STRDUP(old_name, dom->def->name) < 0)
> + goto cleanup;
> +
> + rc = callback(dom, new_name, flags, opaque);
> + virHashRemoveEntry(doms->objsName, rc < 0 ? new_name : old_name);
> + if (rc < 0)
> + goto cleanup;
> +
> + ret = 0;
> + cleanup:
> + virObjectUnlock(doms);
> + VIR_FREE(old_name);
> + return ret;
> +}
> +
> /* The caller must hold lock on 'doms' in addition to 'virDomainObjListRemove'
> * requirements
> *
> diff --git a/src/conf/virdomainobjlist.h b/src/conf/virdomainobjlist.h
> index f479598..c0f856c 100644
> --- a/src/conf/virdomainobjlist.h
> +++ b/src/conf/virdomainobjlist.h
> @@ -51,11 +51,17 @@ virDomainObjPtr virDomainObjListAdd(virDomainObjListPtr doms,
> unsigned int flags,
> virDomainDefPtr *oldDef);
>
> -int virDomainObjListRenameAddNew(virDomainObjListPtr doms,
> - virDomainObjPtr vm,
> - const char *name);
> -int virDomainObjListRenameRemove(virDomainObjListPtr doms,
> - const char *name);
> +typedef int (*virDomainObjListRenameCallback)(virDomainObjPtr dom,
> + const char *new_name,
> + unsigned int flags,
> + void *opaque);
> +int virDomainObjListRename(virDomainObjListPtr doms,
> + virDomainObjPtr dom,
> + const char *new_name,
> + unsigned int flags,
> + virDomainObjListRenameCallback callback,
> + void *opaque);
> +
> void virDomainObjListRemove(virDomainObjListPtr doms,
> virDomainObjPtr dom);
> void virDomainObjListRemoveLocked(virDomainObjListPtr doms,
> diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
> index 3d0ec9d..3ef3468 100644
> --- a/src/libvirt_private.syms
> +++ b/src/libvirt_private.syms
> @@ -893,8 +893,7 @@ virDomainObjListNew;
> virDomainObjListNumOfDomains;
> virDomainObjListRemove;
> virDomainObjListRemoveLocked;
> -virDomainObjListRenameAddNew;
> -virDomainObjListRenameRemove;
> +virDomainObjListRename;
>
>
> # cpu/cpu.h
> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
> index abcdbe6..c0e750d 100644
> --- a/src/qemu/qemu_driver.c
> +++ b/src/qemu/qemu_driver.c
> @@ -19976,14 +19976,14 @@ qemuDomainSetUserPassword(virDomainPtr dom,
> }
>
>
> -static int qemuDomainRename(virDomainPtr dom,
> - const char *new_name,
> - unsigned int flags)
> +static int
> +qemuDomainRenameCallback(virDomainObjPtr vm,
> + const char *new_name,
> + unsigned int flags,
> + void *opaque)
> {
> - virQEMUDriverPtr driver = dom->conn->privateData;
> + virQEMUDriverPtr driver = opaque;
> virQEMUDriverConfigPtr cfg = NULL;
> - virDomainObjPtr vm = NULL;
> - virDomainObjPtr tmp_dom = NULL;
> virObjectEventPtr event_new = NULL;
> virObjectEventPtr event_old = NULL;
> int ret = -1;
> @@ -19993,73 +19993,16 @@ static int qemuDomainRename(virDomainPtr dom,
>
> virCheckFlags(0, ret);
>
> - if (!(vm = qemuDomObjFromDomain(dom)))
> - goto cleanup;
> -
> - if (virDomainRenameEnsureACL(dom->conn, vm->def) < 0)
> - goto cleanup;
> -
> cfg = virQEMUDriverGetConfig(driver);
>
> - if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_MODIFY) < 0)
> - goto cleanup;
> -
> - if (virDomainObjIsActive(vm)) {
> - virReportError(VIR_ERR_OPERATION_INVALID, "%s",
> - _("cannot rename active domain"));
> - goto endjob;
> - }
> -
> - if (!vm->persistent) {
> - virReportError(VIR_ERR_OPERATION_INVALID, "%s",
> - _("cannot rename a transient domain"));
> - goto endjob;
> - }
> -
> - if (virDomainObjGetState(vm, NULL) != VIR_DOMAIN_SHUTOFF) {
> - virReportError(VIR_ERR_OPERATION_INVALID,
> - "%s", _("domain has to be shutoff before renaming"));
> - goto endjob;
> - }
> -
> - if (virDomainSnapshotObjListNum(vm->snapshots, NULL, 0) > 0) {
> - virReportError(VIR_ERR_OPERATION_INVALID,
> - "%s", _("cannot rename domain with snapshots"));
> - goto endjob;
> - }
> -
> - if (STREQ(vm->def->name, new_name)) {
> - virReportError(VIR_ERR_OPERATION_INVALID, "%s",
> - _("Can't rename domain to itself"));
> - goto endjob;
> - }
> -
> - /*
> - * This is a rather racy check, but still better than reporting
> - * internal error. And since new_name != name here, there's no
> - * deadlock imminent.
> - */
> - tmp_dom = virDomainObjListFindByName(driver->domains, new_name);
> - if (tmp_dom) {
> - virObjectUnlock(tmp_dom);
> - virObjectUnref(tmp_dom);
> - virReportError(VIR_ERR_OPERATION_INVALID,
> - _("domain with name '%s' already exists"),
> - new_name);
> - goto endjob;
> - }
> -
> if (VIR_STRDUP(new_dom_name, new_name) < 0)
> - goto endjob;
> + goto cleanup;
>
> if (!(old_dom_cfg_file = virDomainConfigFile(cfg->configDir,
> vm->def->name))) {
> - goto endjob;
> + goto cleanup;
> }
>
> - if (virDomainObjListRenameAddNew(driver->domains, vm, new_name) < 0)
> - goto endjob;
> -
> event_old = virDomainEventLifecycleNewFromObj(vm,
> VIR_DOMAIN_EVENT_UNDEFINED,
> VIR_DOMAIN_EVENT_UNDEFINED_RENAMED);
> @@ -20080,21 +20023,12 @@ static int qemuDomainRename(virDomainPtr dom,
> goto rollback;
> }
>
> - /* Remove old domain name from table. */
> - virDomainObjListRenameRemove(driver->domains, old_dom_name);
> -
> event_new = virDomainEventLifecycleNewFromObj(vm,
> VIR_DOMAIN_EVENT_DEFINED,
> VIR_DOMAIN_EVENT_DEFINED_RENAMED);
> -
> - /* Success, domain has been renamed. */
> ret = 0;
>
> - endjob:
> - qemuDomainObjEndJob(driver, vm);
> -
> cleanup:
> - virDomainObjEndAPI(&vm);
> VIR_FREE(old_dom_cfg_file);
> VIR_FREE(old_dom_name);
> VIR_FREE(new_dom_name);
> @@ -20109,9 +20043,65 @@ static int qemuDomainRename(virDomainPtr dom,
> vm->def->name = old_dom_name;
> old_dom_name = NULL;
> }
> + goto cleanup;
> +}
>
> - virDomainObjListRenameRemove(driver->domains, new_name);
> - goto endjob;
> +static int qemuDomainRename(virDomainPtr dom,
> + const char *new_name,
> + unsigned int flags)
> +{
> + virQEMUDriverPtr driver = dom->conn->privateData;
> + virDomainObjPtr vm = NULL;
> + int ret = -1;
> +
> + virCheckFlags(0, ret);
> +
> + if (!(vm = qemuDomObjFromDomain(dom)))
> + goto cleanup;
> +
> + if (virDomainRenameEnsureACL(dom->conn, vm->def) < 0)
> + goto cleanup;
> +
> + if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_MODIFY) < 0)
> + goto cleanup;
> +
> + if (virDomainObjIsActive(vm)) {
> + virReportError(VIR_ERR_OPERATION_INVALID, "%s",
> + _("cannot rename active domain"));
> + goto endjob;
> + }
> +
> + if (!vm->persistent) {
> + virReportError(VIR_ERR_OPERATION_INVALID, "%s",
> + _("cannot rename a transient domain"));
> + goto endjob;
> + }
> +
> + if (virDomainObjGetState(vm, NULL) != VIR_DOMAIN_SHUTOFF) {
> + virReportError(VIR_ERR_OPERATION_INVALID,
> + "%s", _("domain has to be shutoff before renaming"));
> + goto endjob;
> + }
> +
> + if (virDomainSnapshotObjListNum(vm->snapshots, NULL, 0) > 0) {
> + virReportError(VIR_ERR_OPERATION_INVALID,
> + "%s", _("cannot rename domain with snapshots"));
> + goto endjob;
> + }
> +
> + if (virDomainObjListRename(driver->domains, vm, new_name, flags,
> + qemuDomainRenameCallback, driver) < 0)
> + goto endjob;
> +
> + /* Success, domain has been renamed. */
> + ret = 0;
> +
> + endjob:
> + qemuDomainObjEndJob(driver, vm);
> +
> + cleanup:
> + virDomainObjEndAPI(&vm);
> + return ret;
> }
>
> static virHypervisorDriver qemuHypervisorDriver = {
>
More information about the libvir-list
mailing list