[libvirt] [PATCH RFC] qemu: add some synchronizations for snapshot

Peter Krempa pkrempa at redhat.com
Tue Sep 2 14:38:24 UTC 2014


On 08/28/14 13:27, Jincheng Miao wrote:
> Currently it lacks synchronization to modify domain's snapshot object list,
> that race condition causes unsafe access to some freed snapshot objects.
> Therefore, this patch wraps all access of snapshot object list in vm job lock.
> 
> Only the qemuDomainSnapshotCreateXML is not synchronized, it is related to
> QEMU_ASYNC_JOB_SNAPSHOT async job for --disk-only snapshot. I am not sure
> if it's ok to remove it, so need your ideas for qemuDomainSnapshotCreateXML.
> 
> Signed-off-by: Jincheng Miao <jmiao at redhat.com>
> ---
>  src/qemu/qemu_driver.c |  137 +++++++++++++++++++++++++++++++++++++++---------
>  1 files changed, 112 insertions(+), 25 deletions(-)
> 
> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
> index 73959da..7329aa9 100644
> --- a/src/qemu/qemu_driver.c
> +++ b/src/qemu/qemu_driver.c
> @@ -13609,6 +13609,7 @@ static int qemuDomainSnapshotListNames(virDomainPtr domain, char **names,
>  {
>      virDomainObjPtr vm = NULL;
>      int n = -1;
> +    virQEMUDriverPtr driver = domain->conn->privateData;
>  
>      virCheckFlags(VIR_DOMAIN_SNAPSHOT_LIST_ROOTS |
>                    VIR_DOMAIN_SNAPSHOT_FILTERS_ALL, -1);
> @@ -13619,8 +13620,12 @@ static int qemuDomainSnapshotListNames(virDomainPtr domain, char **names,
>      if (virDomainSnapshotListNamesEnsureACL(domain->conn, vm->def) < 0)
>          goto cleanup;
>  
> +    if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_QUERY) < 0)
> +        goto cleanup;

This one isn't necessary. The domain obj is locked the whole time. Also
returning a list at a certain point in time is okay.

> +
>      n = virDomainSnapshotObjListGetNames(vm->snapshots, NULL, names, nameslen,
>                                           flags);
> +    ignore_value(qemuDomainObjEndJob(driver, vm));
>  
>   cleanup:
>      if (vm)
> @@ -13633,6 +13638,7 @@ static int qemuDomainSnapshotNum(virDomainPtr domain,
>  {
>      virDomainObjPtr vm = NULL;
>      int n = -1;
> +    virQEMUDriverPtr driver = domain->conn->privateData;
>  
>      virCheckFlags(VIR_DOMAIN_SNAPSHOT_LIST_ROOTS |
>                    VIR_DOMAIN_SNAPSHOT_FILTERS_ALL, -1);
> @@ -13643,8 +13649,13 @@ static int qemuDomainSnapshotNum(virDomainPtr domain,
>      if (virDomainSnapshotNumEnsureACL(domain->conn, vm->def) < 0)
>          goto cleanup;
>  
> +    if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_QUERY) < 0)
> +        goto cleanup;
> +

Again, no need to enter the job here.

>      n = virDomainSnapshotObjListNum(vm->snapshots, NULL, flags);
>  
> +    ignore_value(qemuDomainObjEndJob(driver, vm));
> +
>   cleanup:
>      if (vm)
>          virObjectUnlock(vm);
> @@ -13657,6 +13668,7 @@ qemuDomainListAllSnapshots(virDomainPtr domain, virDomainSnapshotPtr **snaps,
>  {
>      virDomainObjPtr vm = NULL;
>      int n = -1;
> +    virQEMUDriverPtr driver = domain->conn->privateData;
>  
>      virCheckFlags(VIR_DOMAIN_SNAPSHOT_LIST_ROOTS |
>                    VIR_DOMAIN_SNAPSHOT_FILTERS_ALL, -1);
> @@ -13667,8 +13679,13 @@ qemuDomainListAllSnapshots(virDomainPtr domain, virDomainSnapshotPtr **snaps,
>      if (virDomainListAllSnapshotsEnsureACL(domain->conn, vm->def) < 0)
>          goto cleanup;
>  
> +    if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_QUERY) < 0)
> +        goto cleanup;
> +


Same here, listing function doesn't need it.

>      n = virDomainListSnapshots(vm->snapshots, NULL, domain, snaps, flags);
>  
> +    ignore_value(qemuDomainObjEndJob(driver, vm));
> +
>   cleanup:
>      if (vm)
>          virObjectUnlock(vm);
> @@ -13684,6 +13701,7 @@ qemuDomainSnapshotListChildrenNames(virDomainSnapshotPtr snapshot,
>      virDomainObjPtr vm = NULL;
>      virDomainSnapshotObjPtr snap = NULL;
>      int n = -1;
> +    virQEMUDriverPtr driver = snapshot->domain->conn->privateData;
>  
>      virCheckFlags(VIR_DOMAIN_SNAPSHOT_LIST_DESCENDANTS |
>                    VIR_DOMAIN_SNAPSHOT_FILTERS_ALL, -1);
> @@ -13694,12 +13712,18 @@ qemuDomainSnapshotListChildrenNames(virDomainSnapshotPtr snapshot,
>      if (virDomainSnapshotListChildrenNamesEnsureACL(snapshot->domain->conn, vm->def) < 0)
>          goto cleanup;
>  
> -    if (!(snap = qemuSnapObjFromSnapshot(vm, snapshot)))
> +    if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_QUERY) < 0)
>          goto cleanup;
>  
> +    if (!(snap = qemuSnapObjFromSnapshot(vm, snapshot)))
> +        goto endjob;
> +

Also here having a job shouldn't be necessary. The snapshot list should
be consistent at all times.

>      n = virDomainSnapshotObjListGetNames(vm->snapshots, snap, names, nameslen,
>                                           flags);
>  
> + endjob:
> +    ignore_value(qemuDomainObjEndJob(driver, vm));
> +
>   cleanup:
>      if (vm)
>          virObjectUnlock(vm);
> @@ -13713,6 +13737,7 @@ qemuDomainSnapshotNumChildren(virDomainSnapshotPtr snapshot,
>      virDomainObjPtr vm = NULL;
>      virDomainSnapshotObjPtr snap = NULL;
>      int n = -1;
> +    virQEMUDriverPtr driver = snapshot->domain->conn->privateData;
>  
>      virCheckFlags(VIR_DOMAIN_SNAPSHOT_LIST_DESCENDANTS |
>                    VIR_DOMAIN_SNAPSHOT_FILTERS_ALL, -1);
> @@ -13723,11 +13748,17 @@ qemuDomainSnapshotNumChildren(virDomainSnapshotPtr snapshot,
>      if (virDomainSnapshotNumChildrenEnsureACL(snapshot->domain->conn, vm->def) < 0)
>          goto cleanup;
>  
> -    if (!(snap = qemuSnapObjFromSnapshot(vm, snapshot)))
> +    if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_QUERY) < 0)
>          goto cleanup;
>  
> +    if (!(snap = qemuSnapObjFromSnapshot(vm, snapshot)))
> +        goto endjob;
> +

Same here.

>      n = virDomainSnapshotObjListNum(vm->snapshots, snap, flags);
>  
> + endjob:
> +    ignore_value(qemuDomainObjEndJob(driver, vm));
> +
>   cleanup:
>      if (vm)
>          virObjectUnlock(vm);
> @@ -13742,6 +13773,7 @@ qemuDomainSnapshotListAllChildren(virDomainSnapshotPtr snapshot,
>      virDomainObjPtr vm = NULL;
>      virDomainSnapshotObjPtr snap = NULL;
>      int n = -1;
> +    virQEMUDriverPtr driver = snapshot->domain->conn->privateData;
>  
>      virCheckFlags(VIR_DOMAIN_SNAPSHOT_LIST_DESCENDANTS |
>                    VIR_DOMAIN_SNAPSHOT_FILTERS_ALL, -1);
> @@ -13752,12 +13784,18 @@ qemuDomainSnapshotListAllChildren(virDomainSnapshotPtr snapshot,
>      if (virDomainSnapshotListAllChildrenEnsureACL(snapshot->domain->conn, vm->def) < 0)
>          goto cleanup;
>  
> -    if (!(snap = qemuSnapObjFromSnapshot(vm, snapshot)))
> +    if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_QUERY) < 0)
>          goto cleanup;

no locking necessary.

>  
> +    if (!(snap = qemuSnapObjFromSnapshot(vm, snapshot)))
> +        goto endjob;
> +
>      n = virDomainListSnapshots(vm->snapshots, snap, snapshot->domain, snaps,
>                                 flags);
>  
> + endjob:
> +    ignore_value(qemuDomainObjEndJob(driver, vm));
> +
>   cleanup:
>      if (vm)
>          virObjectUnlock(vm);
> @@ -13771,6 +13809,7 @@ static virDomainSnapshotPtr qemuDomainSnapshotLookupByName(virDomainPtr domain,
>      virDomainObjPtr vm;
>      virDomainSnapshotObjPtr snap = NULL;
>      virDomainSnapshotPtr snapshot = NULL;
> +    virQEMUDriverPtr driver = domain->conn->privateData;
>  
>      virCheckFlags(0, NULL);
>  
> @@ -13780,11 +13819,17 @@ static virDomainSnapshotPtr qemuDomainSnapshotLookupByName(virDomainPtr domain,
>      if (virDomainSnapshotLookupByNameEnsureACL(domain->conn, vm->def) < 0)
>          goto cleanup;
>  
> -    if (!(snap = qemuSnapObjFromName(vm, name)))
> +    if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_QUERY) < 0)
>          goto cleanup;
>  
> +    if (!(snap = qemuSnapObjFromName(vm, name)))
> +        goto endjob;

no locking necessary.

> +
>      snapshot = virGetDomainSnapshot(domain, snap->def->name);
>  
> + endjob:
> +    ignore_value(qemuDomainObjEndJob(driver, vm));
> +
>   cleanup:
>      if (vm)
>          virObjectUnlock(vm);
> @@ -13796,6 +13841,7 @@ static int qemuDomainHasCurrentSnapshot(virDomainPtr domain,
>  {
>      virDomainObjPtr vm;
>      int ret = -1;
> +    virQEMUDriverPtr driver = domain->conn->privateData;
>  
>      virCheckFlags(0, -1);
>  
> @@ -13805,8 +13851,13 @@ static int qemuDomainHasCurrentSnapshot(virDomainPtr domain,
>      if (virDomainHasCurrentSnapshotEnsureACL(domain->conn, vm->def) < 0)
>          goto cleanup;
>  
> +    if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_QUERY) < 0)
> +        goto cleanup;
> +

Same here

>      ret = (vm->current_snapshot != NULL);
>  
> +    ignore_value(qemuDomainObjEndJob(driver, vm));
> +
>   cleanup:
>      if (vm)
>          virObjectUnlock(vm);
> @@ -13820,6 +13871,7 @@ qemuDomainSnapshotGetParent(virDomainSnapshotPtr snapshot,
>      virDomainObjPtr vm;
>      virDomainSnapshotObjPtr snap = NULL;
>      virDomainSnapshotPtr parent = NULL;
> +    virQEMUDriverPtr driver = snapshot->domain->conn->privateData;
>  
>      virCheckFlags(0, NULL);
>  
> @@ -13829,18 +13881,24 @@ qemuDomainSnapshotGetParent(virDomainSnapshotPtr snapshot,
>      if (virDomainSnapshotGetParentEnsureACL(snapshot->domain->conn, vm->def) < 0)
>          goto cleanup;
>  
> -    if (!(snap = qemuSnapObjFromSnapshot(vm, snapshot)))
> +    if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_QUERY) < 0)
>          goto cleanup;
>  
> +    if (!(snap = qemuSnapObjFromSnapshot(vm, snapshot)))
> +        goto endjob;
> +

Same here

>      if (!snap->def->parent) {
>          virReportError(VIR_ERR_NO_DOMAIN_SNAPSHOT,
>                         _("snapshot '%s' does not have a parent"),
>                         snap->def->name);
> -        goto cleanup;
> +        goto endjob;
>      }
>  
>      parent = virGetDomainSnapshot(snapshot->domain, snap->def->parent);
>  
> + endjob:
> +    ignore_value(qemuDomainObjEndJob(driver, vm));
> +
>   cleanup:
>      if (vm)
>          virObjectUnlock(vm);
> @@ -13852,6 +13910,7 @@ static virDomainSnapshotPtr qemuDomainSnapshotCurrent(virDomainPtr domain,
>  {
>      virDomainObjPtr vm;
>      virDomainSnapshotPtr snapshot = NULL;
> +    virQEMUDriverPtr driver = domain->conn->privateData;
>  
>      virCheckFlags(0, NULL);
>  
> @@ -13861,14 +13920,20 @@ static virDomainSnapshotPtr qemuDomainSnapshotCurrent(virDomainPtr domain,
>      if (virDomainSnapshotCurrentEnsureACL(domain->conn, vm->def) < 0)
>          goto cleanup;
>  
> +    if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_QUERY) < 0)
> +        goto cleanup;
> +

Same here.

>      if (!vm->current_snapshot) {
>          virReportError(VIR_ERR_NO_DOMAIN_SNAPSHOT, "%s",
>                         _("the domain does not have a current snapshot"));
> -        goto cleanup;
> +        goto endjob;
>      }
>  
>      snapshot = virGetDomainSnapshot(domain, vm->current_snapshot->def->name);
>  
> + endjob:
> +    ignore_value(qemuDomainObjEndJob(driver, vm));
> +
>   cleanup:
>      if (vm)
>          virObjectUnlock(vm);
> @@ -13882,6 +13947,7 @@ static char *qemuDomainSnapshotGetXMLDesc(virDomainSnapshotPtr snapshot,
>      char *xml = NULL;
>      virDomainSnapshotObjPtr snap = NULL;
>      char uuidstr[VIR_UUID_STRING_BUFLEN];
> +    virQEMUDriverPtr driver = snapshot->domain->conn->privateData;
>  
>      virCheckFlags(VIR_DOMAIN_XML_SECURE, NULL);
>  
> @@ -13891,13 +13957,19 @@ static char *qemuDomainSnapshotGetXMLDesc(virDomainSnapshotPtr snapshot,
>      if (virDomainSnapshotGetXMLDescEnsureACL(snapshot->domain->conn, vm->def) < 0)
>          goto cleanup;
>  
> -    if (!(snap = qemuSnapObjFromSnapshot(vm, snapshot)))
> +    if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_QUERY) < 0)
>          goto cleanup;

As long as the internal strucutres are consistent, no locking is required.

>  
> +    if (!(snap = qemuSnapObjFromSnapshot(vm, snapshot)))
> +        goto endjob;
> +
>      virUUIDFormat(snapshot->domain->uuid, uuidstr);
>  
>      xml = virDomainSnapshotDefFormat(uuidstr, snap->def, flags, 0);
>  
> + endjob:
> +    ignore_value(qemuDomainObjEndJob(driver, vm));
> +
>   cleanup:
>      if (vm)
>          virObjectUnlock(vm);
> @@ -13911,6 +13983,7 @@ qemuDomainSnapshotIsCurrent(virDomainSnapshotPtr snapshot,
>      virDomainObjPtr vm = NULL;
>      int ret = -1;
>      virDomainSnapshotObjPtr snap = NULL;
> +    virQEMUDriverPtr driver = snapshot->domain->conn->privateData;
>  
>      virCheckFlags(0, -1);
>  
> @@ -13920,12 +13993,18 @@ qemuDomainSnapshotIsCurrent(virDomainSnapshotPtr snapshot,
>      if (virDomainSnapshotIsCurrentEnsureACL(snapshot->domain->conn, vm->def) < 0)
>          goto cleanup;
>  
> -    if (!(snap = qemuSnapObjFromSnapshot(vm, snapshot)))
> +    if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_QUERY) < 0)
>          goto cleanup;
>  
> +    if (!(snap = qemuSnapObjFromSnapshot(vm, snapshot)))
> +        goto endjob;
> +

Same here

>      ret = (vm->current_snapshot &&
>             STREQ(snapshot->name, vm->current_snapshot->def->name));
>  
> + endjob:
> +    ignore_value(qemuDomainObjEndJob(driver, vm));
> +
>   cleanup:
>      if (vm)
>          virObjectUnlock(vm);
> @@ -13940,6 +14019,7 @@ qemuDomainSnapshotHasMetadata(virDomainSnapshotPtr snapshot,
>      virDomainObjPtr vm = NULL;
>      int ret = -1;
>      virDomainSnapshotObjPtr snap = NULL;
> +    virQEMUDriverPtr driver = snapshot->domain->conn->privateData;
>  
>      virCheckFlags(0, -1);
>  
> @@ -13949,14 +14029,20 @@ qemuDomainSnapshotHasMetadata(virDomainSnapshotPtr snapshot,
>      if (virDomainSnapshotHasMetadataEnsureACL(snapshot->domain->conn, vm->def) < 0)
>          goto cleanup;
>  
> -    if (!(snap = qemuSnapObjFromSnapshot(vm, snapshot)))
> +    if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_QUERY) < 0)
>          goto cleanup;

Same here.

>  
> +    if (!(snap = qemuSnapObjFromSnapshot(vm, snapshot)))
> +        goto endjob;
> +
>      /* XXX Someday, we should recognize internal snapshots in qcow2
>       * images that are not tied to a libvirt snapshot; if we ever do
>       * that, then we would have a reason to return 0 here.  */
>      ret = 1;
>  
> + endjob:
> +    ignore_value(qemuDomainObjEndJob(driver, vm));
> +
>   cleanup:
>      if (vm)
>          virObjectUnlock(vm);
> @@ -14027,9 +14113,12 @@ static int qemuDomainRevertToSnapshot(virDomainSnapshotPtr snapshot,
>          goto cleanup;
>      }
>  
> -    if (!(snap = qemuSnapObjFromSnapshot(vm, snapshot)))
> +    if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_MODIFY) < 0)
>          goto cleanup;

The problem lies here. This would modify the state without entering the
job. This one is necessary.

>  
> +    if (!(snap = qemuSnapObjFromSnapshot(vm, snapshot)))
> +        goto endjob;
> +
>      if (!vm->persistent &&
>          snap->def->state != VIR_DOMAIN_RUNNING &&
>          snap->def->state != VIR_DOMAIN_PAUSED &&
> @@ -14038,13 +14127,13 @@ static int qemuDomainRevertToSnapshot(virDomainSnapshotPtr snapshot,
>          virReportError(VIR_ERR_OPERATION_INVALID, "%s",
>                         _("transient domain needs to request run or pause "
>                           "to revert to inactive snapshot"));
> -        goto cleanup;
> +        goto endjob;
>      }
>  
>      if (virDomainSnapshotIsExternal(snap)) {
>          virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
>                         _("revert to external snapshot not supported yet"));
> -        goto cleanup;
> +        goto endjob;
>      }
>  
>      if (!(flags & VIR_DOMAIN_SNAPSHOT_REVERT_FORCE)) {
> @@ -14052,7 +14141,7 @@ static int qemuDomainRevertToSnapshot(virDomainSnapshotPtr snapshot,
>              virReportError(VIR_ERR_SNAPSHOT_REVERT_RISKY,
>                             _("snapshot '%s' lacks domain '%s' rollback info"),
>                             snap->def->name, vm->def->name);
> -            goto cleanup;
> +            goto endjob;
>          }
>          if (virDomainObjIsActive(vm) &&
>              !(snap->def->state == VIR_DOMAIN_RUNNING
> @@ -14061,7 +14150,7 @@ static int qemuDomainRevertToSnapshot(virDomainSnapshotPtr snapshot,
>                        VIR_DOMAIN_SNAPSHOT_REVERT_PAUSED))) {
>              virReportError(VIR_ERR_SNAPSHOT_REVERT_RISKY, "%s",
>                             _("must respawn qemu to start inactive snapshot"));
> -            goto cleanup;
> +            goto endjob;
>          }
>      }
>  
> @@ -14070,7 +14159,7 @@ static int qemuDomainRevertToSnapshot(virDomainSnapshotPtr snapshot,
>          vm->current_snapshot->def->current = false;
>          if (qemuDomainSnapshotWriteMetadata(vm, vm->current_snapshot,
>                                              cfg->snapshotDir) < 0)
> -            goto cleanup;
> +            goto endjob;
>          vm->current_snapshot = NULL;
>          /* XXX Should we restore vm->current_snapshot after this point
>           * in the failure cases where we know there was no change?  */
> @@ -14085,12 +14174,9 @@ static int qemuDomainRevertToSnapshot(virDomainSnapshotPtr snapshot,
>      if (snap->def->dom) {
>          config = virDomainDefCopy(snap->def->dom, caps, driver->xmlopt, true);
>          if (!config)
> -            goto cleanup;
> +            goto endjob;
>      }
>  
> -    if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_MODIFY) < 0)
> -        goto cleanup;
> -
>      switch ((virDomainState) snap->def->state) {
>      case VIR_DOMAIN_RUNNING:
>      case VIR_DOMAIN_PAUSED:
> @@ -14400,9 +14486,13 @@ static int qemuDomainSnapshotDelete(virDomainSnapshotPtr snapshot,
>      if (virDomainSnapshotDeleteEnsureACL(snapshot->domain->conn, vm->def) < 0)
>          goto cleanup;
>  
> -    if (!(snap = qemuSnapObjFromSnapshot(vm, snapshot)))
> +
> +    if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_MODIFY) < 0)
>          goto cleanup;

Here too. This would change the domain state without interlocking and
enter the monitor meanwhile. Here it's fine.

>  
> +    if (!(snap = qemuSnapObjFromSnapshot(vm, snapshot)))
> +        goto endjob;
> +
>      if (!metadata_only) {
>          if (!(flags & VIR_DOMAIN_SNAPSHOT_DELETE_CHILDREN_ONLY) &&
>              virDomainSnapshotIsExternal(snap))
> @@ -14415,13 +14505,10 @@ static int qemuDomainSnapshotDelete(virDomainSnapshotPtr snapshot,
>              virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
>                             _("deletion of %d external disk snapshots not "
>                               "supported yet"), external);
> -            goto cleanup;
> +            goto endjob;
>          }
>      }
>  
> -    if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_MODIFY) < 0)
> -        goto cleanup;
> -
>      if (flags & (VIR_DOMAIN_SNAPSHOT_DELETE_CHILDREN |
>                   VIR_DOMAIN_SNAPSHOT_DELETE_CHILDREN_ONLY)) {
>          rem.driver = driver;
> 

This patch introduces too much locking in unnecessary places. I'll post
a V2 trimmed of the parts that don't need locking.

Peter

-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 819 bytes
Desc: OpenPGP digital signature
URL: <http://listman.redhat.com/archives/libvir-list/attachments/20140902/bfcaf9bf/attachment-0001.sig>


More information about the libvir-list mailing list