[libvirt] [PATCH RFC] qemu: add some synchronizations for snapshot
Peter Krempa
pkrempa at redhat.com
Tue Sep 2 14:38:24 UTC 2014
On 08/28/14 13:27, Jincheng Miao wrote:
> Currently it lacks synchronization to modify domain's snapshot object list,
> that race condition causes unsafe access to some freed snapshot objects.
> Therefore, this patch wraps all access of snapshot object list in vm job lock.
>
> Only the qemuDomainSnapshotCreateXML is not synchronized, it is related to
> QEMU_ASYNC_JOB_SNAPSHOT async job for --disk-only snapshot. I am not sure
> if it's ok to remove it, so need your ideas for qemuDomainSnapshotCreateXML.
>
> Signed-off-by: Jincheng Miao <jmiao at redhat.com>
> ---
> src/qemu/qemu_driver.c | 137 +++++++++++++++++++++++++++++++++++++++---------
> 1 files changed, 112 insertions(+), 25 deletions(-)
>
> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
> index 73959da..7329aa9 100644
> --- a/src/qemu/qemu_driver.c
> +++ b/src/qemu/qemu_driver.c
> @@ -13609,6 +13609,7 @@ static int qemuDomainSnapshotListNames(virDomainPtr domain, char **names,
> {
> virDomainObjPtr vm = NULL;
> int n = -1;
> + virQEMUDriverPtr driver = domain->conn->privateData;
>
> virCheckFlags(VIR_DOMAIN_SNAPSHOT_LIST_ROOTS |
> VIR_DOMAIN_SNAPSHOT_FILTERS_ALL, -1);
> @@ -13619,8 +13620,12 @@ static int qemuDomainSnapshotListNames(virDomainPtr domain, char **names,
> if (virDomainSnapshotListNamesEnsureACL(domain->conn, vm->def) < 0)
> goto cleanup;
>
> + if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_QUERY) < 0)
> + goto cleanup;
This one isn't necessary. The domain obj is locked the whole time. Also
returning a list at a certain point in time is okay.
> +
> n = virDomainSnapshotObjListGetNames(vm->snapshots, NULL, names, nameslen,
> flags);
> + ignore_value(qemuDomainObjEndJob(driver, vm));
>
> cleanup:
> if (vm)
> @@ -13633,6 +13638,7 @@ static int qemuDomainSnapshotNum(virDomainPtr domain,
> {
> virDomainObjPtr vm = NULL;
> int n = -1;
> + virQEMUDriverPtr driver = domain->conn->privateData;
>
> virCheckFlags(VIR_DOMAIN_SNAPSHOT_LIST_ROOTS |
> VIR_DOMAIN_SNAPSHOT_FILTERS_ALL, -1);
> @@ -13643,8 +13649,13 @@ static int qemuDomainSnapshotNum(virDomainPtr domain,
> if (virDomainSnapshotNumEnsureACL(domain->conn, vm->def) < 0)
> goto cleanup;
>
> + if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_QUERY) < 0)
> + goto cleanup;
> +
Again, no need to enter the job here.
> n = virDomainSnapshotObjListNum(vm->snapshots, NULL, flags);
>
> + ignore_value(qemuDomainObjEndJob(driver, vm));
> +
> cleanup:
> if (vm)
> virObjectUnlock(vm);
> @@ -13657,6 +13668,7 @@ qemuDomainListAllSnapshots(virDomainPtr domain, virDomainSnapshotPtr **snaps,
> {
> virDomainObjPtr vm = NULL;
> int n = -1;
> + virQEMUDriverPtr driver = domain->conn->privateData;
>
> virCheckFlags(VIR_DOMAIN_SNAPSHOT_LIST_ROOTS |
> VIR_DOMAIN_SNAPSHOT_FILTERS_ALL, -1);
> @@ -13667,8 +13679,13 @@ qemuDomainListAllSnapshots(virDomainPtr domain, virDomainSnapshotPtr **snaps,
> if (virDomainListAllSnapshotsEnsureACL(domain->conn, vm->def) < 0)
> goto cleanup;
>
> + if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_QUERY) < 0)
> + goto cleanup;
> +
Same here, listing function doesn't need it.
> n = virDomainListSnapshots(vm->snapshots, NULL, domain, snaps, flags);
>
> + ignore_value(qemuDomainObjEndJob(driver, vm));
> +
> cleanup:
> if (vm)
> virObjectUnlock(vm);
> @@ -13684,6 +13701,7 @@ qemuDomainSnapshotListChildrenNames(virDomainSnapshotPtr snapshot,
> virDomainObjPtr vm = NULL;
> virDomainSnapshotObjPtr snap = NULL;
> int n = -1;
> + virQEMUDriverPtr driver = snapshot->domain->conn->privateData;
>
> virCheckFlags(VIR_DOMAIN_SNAPSHOT_LIST_DESCENDANTS |
> VIR_DOMAIN_SNAPSHOT_FILTERS_ALL, -1);
> @@ -13694,12 +13712,18 @@ qemuDomainSnapshotListChildrenNames(virDomainSnapshotPtr snapshot,
> if (virDomainSnapshotListChildrenNamesEnsureACL(snapshot->domain->conn, vm->def) < 0)
> goto cleanup;
>
> - if (!(snap = qemuSnapObjFromSnapshot(vm, snapshot)))
> + if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_QUERY) < 0)
> goto cleanup;
>
> + if (!(snap = qemuSnapObjFromSnapshot(vm, snapshot)))
> + goto endjob;
> +
Also here having a job shouldn't be necessary. The snapshot list should
be consistent at all times.
> n = virDomainSnapshotObjListGetNames(vm->snapshots, snap, names, nameslen,
> flags);
>
> + endjob:
> + ignore_value(qemuDomainObjEndJob(driver, vm));
> +
> cleanup:
> if (vm)
> virObjectUnlock(vm);
> @@ -13713,6 +13737,7 @@ qemuDomainSnapshotNumChildren(virDomainSnapshotPtr snapshot,
> virDomainObjPtr vm = NULL;
> virDomainSnapshotObjPtr snap = NULL;
> int n = -1;
> + virQEMUDriverPtr driver = snapshot->domain->conn->privateData;
>
> virCheckFlags(VIR_DOMAIN_SNAPSHOT_LIST_DESCENDANTS |
> VIR_DOMAIN_SNAPSHOT_FILTERS_ALL, -1);
> @@ -13723,11 +13748,17 @@ qemuDomainSnapshotNumChildren(virDomainSnapshotPtr snapshot,
> if (virDomainSnapshotNumChildrenEnsureACL(snapshot->domain->conn, vm->def) < 0)
> goto cleanup;
>
> - if (!(snap = qemuSnapObjFromSnapshot(vm, snapshot)))
> + if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_QUERY) < 0)
> goto cleanup;
>
> + if (!(snap = qemuSnapObjFromSnapshot(vm, snapshot)))
> + goto endjob;
> +
Same here.
> n = virDomainSnapshotObjListNum(vm->snapshots, snap, flags);
>
> + endjob:
> + ignore_value(qemuDomainObjEndJob(driver, vm));
> +
> cleanup:
> if (vm)
> virObjectUnlock(vm);
> @@ -13742,6 +13773,7 @@ qemuDomainSnapshotListAllChildren(virDomainSnapshotPtr snapshot,
> virDomainObjPtr vm = NULL;
> virDomainSnapshotObjPtr snap = NULL;
> int n = -1;
> + virQEMUDriverPtr driver = snapshot->domain->conn->privateData;
>
> virCheckFlags(VIR_DOMAIN_SNAPSHOT_LIST_DESCENDANTS |
> VIR_DOMAIN_SNAPSHOT_FILTERS_ALL, -1);
> @@ -13752,12 +13784,18 @@ qemuDomainSnapshotListAllChildren(virDomainSnapshotPtr snapshot,
> if (virDomainSnapshotListAllChildrenEnsureACL(snapshot->domain->conn, vm->def) < 0)
> goto cleanup;
>
> - if (!(snap = qemuSnapObjFromSnapshot(vm, snapshot)))
> + if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_QUERY) < 0)
> goto cleanup;
no locking necessary.
>
> + if (!(snap = qemuSnapObjFromSnapshot(vm, snapshot)))
> + goto endjob;
> +
> n = virDomainListSnapshots(vm->snapshots, snap, snapshot->domain, snaps,
> flags);
>
> + endjob:
> + ignore_value(qemuDomainObjEndJob(driver, vm));
> +
> cleanup:
> if (vm)
> virObjectUnlock(vm);
> @@ -13771,6 +13809,7 @@ static virDomainSnapshotPtr qemuDomainSnapshotLookupByName(virDomainPtr domain,
> virDomainObjPtr vm;
> virDomainSnapshotObjPtr snap = NULL;
> virDomainSnapshotPtr snapshot = NULL;
> + virQEMUDriverPtr driver = domain->conn->privateData;
>
> virCheckFlags(0, NULL);
>
> @@ -13780,11 +13819,17 @@ static virDomainSnapshotPtr qemuDomainSnapshotLookupByName(virDomainPtr domain,
> if (virDomainSnapshotLookupByNameEnsureACL(domain->conn, vm->def) < 0)
> goto cleanup;
>
> - if (!(snap = qemuSnapObjFromName(vm, name)))
> + if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_QUERY) < 0)
> goto cleanup;
>
> + if (!(snap = qemuSnapObjFromName(vm, name)))
> + goto endjob;
no locking necessary.
> +
> snapshot = virGetDomainSnapshot(domain, snap->def->name);
>
> + endjob:
> + ignore_value(qemuDomainObjEndJob(driver, vm));
> +
> cleanup:
> if (vm)
> virObjectUnlock(vm);
> @@ -13796,6 +13841,7 @@ static int qemuDomainHasCurrentSnapshot(virDomainPtr domain,
> {
> virDomainObjPtr vm;
> int ret = -1;
> + virQEMUDriverPtr driver = domain->conn->privateData;
>
> virCheckFlags(0, -1);
>
> @@ -13805,8 +13851,13 @@ static int qemuDomainHasCurrentSnapshot(virDomainPtr domain,
> if (virDomainHasCurrentSnapshotEnsureACL(domain->conn, vm->def) < 0)
> goto cleanup;
>
> + if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_QUERY) < 0)
> + goto cleanup;
> +
Same here
> ret = (vm->current_snapshot != NULL);
>
> + ignore_value(qemuDomainObjEndJob(driver, vm));
> +
> cleanup:
> if (vm)
> virObjectUnlock(vm);
> @@ -13820,6 +13871,7 @@ qemuDomainSnapshotGetParent(virDomainSnapshotPtr snapshot,
> virDomainObjPtr vm;
> virDomainSnapshotObjPtr snap = NULL;
> virDomainSnapshotPtr parent = NULL;
> + virQEMUDriverPtr driver = snapshot->domain->conn->privateData;
>
> virCheckFlags(0, NULL);
>
> @@ -13829,18 +13881,24 @@ qemuDomainSnapshotGetParent(virDomainSnapshotPtr snapshot,
> if (virDomainSnapshotGetParentEnsureACL(snapshot->domain->conn, vm->def) < 0)
> goto cleanup;
>
> - if (!(snap = qemuSnapObjFromSnapshot(vm, snapshot)))
> + if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_QUERY) < 0)
> goto cleanup;
>
> + if (!(snap = qemuSnapObjFromSnapshot(vm, snapshot)))
> + goto endjob;
> +
Same here
> if (!snap->def->parent) {
> virReportError(VIR_ERR_NO_DOMAIN_SNAPSHOT,
> _("snapshot '%s' does not have a parent"),
> snap->def->name);
> - goto cleanup;
> + goto endjob;
> }
>
> parent = virGetDomainSnapshot(snapshot->domain, snap->def->parent);
>
> + endjob:
> + ignore_value(qemuDomainObjEndJob(driver, vm));
> +
> cleanup:
> if (vm)
> virObjectUnlock(vm);
> @@ -13852,6 +13910,7 @@ static virDomainSnapshotPtr qemuDomainSnapshotCurrent(virDomainPtr domain,
> {
> virDomainObjPtr vm;
> virDomainSnapshotPtr snapshot = NULL;
> + virQEMUDriverPtr driver = domain->conn->privateData;
>
> virCheckFlags(0, NULL);
>
> @@ -13861,14 +13920,20 @@ static virDomainSnapshotPtr qemuDomainSnapshotCurrent(virDomainPtr domain,
> if (virDomainSnapshotCurrentEnsureACL(domain->conn, vm->def) < 0)
> goto cleanup;
>
> + if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_QUERY) < 0)
> + goto cleanup;
> +
Same here.
> if (!vm->current_snapshot) {
> virReportError(VIR_ERR_NO_DOMAIN_SNAPSHOT, "%s",
> _("the domain does not have a current snapshot"));
> - goto cleanup;
> + goto endjob;
> }
>
> snapshot = virGetDomainSnapshot(domain, vm->current_snapshot->def->name);
>
> + endjob:
> + ignore_value(qemuDomainObjEndJob(driver, vm));
> +
> cleanup:
> if (vm)
> virObjectUnlock(vm);
> @@ -13882,6 +13947,7 @@ static char *qemuDomainSnapshotGetXMLDesc(virDomainSnapshotPtr snapshot,
> char *xml = NULL;
> virDomainSnapshotObjPtr snap = NULL;
> char uuidstr[VIR_UUID_STRING_BUFLEN];
> + virQEMUDriverPtr driver = snapshot->domain->conn->privateData;
>
> virCheckFlags(VIR_DOMAIN_XML_SECURE, NULL);
>
> @@ -13891,13 +13957,19 @@ static char *qemuDomainSnapshotGetXMLDesc(virDomainSnapshotPtr snapshot,
> if (virDomainSnapshotGetXMLDescEnsureACL(snapshot->domain->conn, vm->def) < 0)
> goto cleanup;
>
> - if (!(snap = qemuSnapObjFromSnapshot(vm, snapshot)))
> + if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_QUERY) < 0)
> goto cleanup;
As long as the internal strucutres are consistent, no locking is required.
>
> + if (!(snap = qemuSnapObjFromSnapshot(vm, snapshot)))
> + goto endjob;
> +
> virUUIDFormat(snapshot->domain->uuid, uuidstr);
>
> xml = virDomainSnapshotDefFormat(uuidstr, snap->def, flags, 0);
>
> + endjob:
> + ignore_value(qemuDomainObjEndJob(driver, vm));
> +
> cleanup:
> if (vm)
> virObjectUnlock(vm);
> @@ -13911,6 +13983,7 @@ qemuDomainSnapshotIsCurrent(virDomainSnapshotPtr snapshot,
> virDomainObjPtr vm = NULL;
> int ret = -1;
> virDomainSnapshotObjPtr snap = NULL;
> + virQEMUDriverPtr driver = snapshot->domain->conn->privateData;
>
> virCheckFlags(0, -1);
>
> @@ -13920,12 +13993,18 @@ qemuDomainSnapshotIsCurrent(virDomainSnapshotPtr snapshot,
> if (virDomainSnapshotIsCurrentEnsureACL(snapshot->domain->conn, vm->def) < 0)
> goto cleanup;
>
> - if (!(snap = qemuSnapObjFromSnapshot(vm, snapshot)))
> + if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_QUERY) < 0)
> goto cleanup;
>
> + if (!(snap = qemuSnapObjFromSnapshot(vm, snapshot)))
> + goto endjob;
> +
Same here
> ret = (vm->current_snapshot &&
> STREQ(snapshot->name, vm->current_snapshot->def->name));
>
> + endjob:
> + ignore_value(qemuDomainObjEndJob(driver, vm));
> +
> cleanup:
> if (vm)
> virObjectUnlock(vm);
> @@ -13940,6 +14019,7 @@ qemuDomainSnapshotHasMetadata(virDomainSnapshotPtr snapshot,
> virDomainObjPtr vm = NULL;
> int ret = -1;
> virDomainSnapshotObjPtr snap = NULL;
> + virQEMUDriverPtr driver = snapshot->domain->conn->privateData;
>
> virCheckFlags(0, -1);
>
> @@ -13949,14 +14029,20 @@ qemuDomainSnapshotHasMetadata(virDomainSnapshotPtr snapshot,
> if (virDomainSnapshotHasMetadataEnsureACL(snapshot->domain->conn, vm->def) < 0)
> goto cleanup;
>
> - if (!(snap = qemuSnapObjFromSnapshot(vm, snapshot)))
> + if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_QUERY) < 0)
> goto cleanup;
Same here.
>
> + if (!(snap = qemuSnapObjFromSnapshot(vm, snapshot)))
> + goto endjob;
> +
> /* XXX Someday, we should recognize internal snapshots in qcow2
> * images that are not tied to a libvirt snapshot; if we ever do
> * that, then we would have a reason to return 0 here. */
> ret = 1;
>
> + endjob:
> + ignore_value(qemuDomainObjEndJob(driver, vm));
> +
> cleanup:
> if (vm)
> virObjectUnlock(vm);
> @@ -14027,9 +14113,12 @@ static int qemuDomainRevertToSnapshot(virDomainSnapshotPtr snapshot,
> goto cleanup;
> }
>
> - if (!(snap = qemuSnapObjFromSnapshot(vm, snapshot)))
> + if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_MODIFY) < 0)
> goto cleanup;
The problem lies here. This would modify the state without entering the
job. This one is necessary.
>
> + if (!(snap = qemuSnapObjFromSnapshot(vm, snapshot)))
> + goto endjob;
> +
> if (!vm->persistent &&
> snap->def->state != VIR_DOMAIN_RUNNING &&
> snap->def->state != VIR_DOMAIN_PAUSED &&
> @@ -14038,13 +14127,13 @@ static int qemuDomainRevertToSnapshot(virDomainSnapshotPtr snapshot,
> virReportError(VIR_ERR_OPERATION_INVALID, "%s",
> _("transient domain needs to request run or pause "
> "to revert to inactive snapshot"));
> - goto cleanup;
> + goto endjob;
> }
>
> if (virDomainSnapshotIsExternal(snap)) {
> virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
> _("revert to external snapshot not supported yet"));
> - goto cleanup;
> + goto endjob;
> }
>
> if (!(flags & VIR_DOMAIN_SNAPSHOT_REVERT_FORCE)) {
> @@ -14052,7 +14141,7 @@ static int qemuDomainRevertToSnapshot(virDomainSnapshotPtr snapshot,
> virReportError(VIR_ERR_SNAPSHOT_REVERT_RISKY,
> _("snapshot '%s' lacks domain '%s' rollback info"),
> snap->def->name, vm->def->name);
> - goto cleanup;
> + goto endjob;
> }
> if (virDomainObjIsActive(vm) &&
> !(snap->def->state == VIR_DOMAIN_RUNNING
> @@ -14061,7 +14150,7 @@ static int qemuDomainRevertToSnapshot(virDomainSnapshotPtr snapshot,
> VIR_DOMAIN_SNAPSHOT_REVERT_PAUSED))) {
> virReportError(VIR_ERR_SNAPSHOT_REVERT_RISKY, "%s",
> _("must respawn qemu to start inactive snapshot"));
> - goto cleanup;
> + goto endjob;
> }
> }
>
> @@ -14070,7 +14159,7 @@ static int qemuDomainRevertToSnapshot(virDomainSnapshotPtr snapshot,
> vm->current_snapshot->def->current = false;
> if (qemuDomainSnapshotWriteMetadata(vm, vm->current_snapshot,
> cfg->snapshotDir) < 0)
> - goto cleanup;
> + goto endjob;
> vm->current_snapshot = NULL;
> /* XXX Should we restore vm->current_snapshot after this point
> * in the failure cases where we know there was no change? */
> @@ -14085,12 +14174,9 @@ static int qemuDomainRevertToSnapshot(virDomainSnapshotPtr snapshot,
> if (snap->def->dom) {
> config = virDomainDefCopy(snap->def->dom, caps, driver->xmlopt, true);
> if (!config)
> - goto cleanup;
> + goto endjob;
> }
>
> - if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_MODIFY) < 0)
> - goto cleanup;
> -
> switch ((virDomainState) snap->def->state) {
> case VIR_DOMAIN_RUNNING:
> case VIR_DOMAIN_PAUSED:
> @@ -14400,9 +14486,13 @@ static int qemuDomainSnapshotDelete(virDomainSnapshotPtr snapshot,
> if (virDomainSnapshotDeleteEnsureACL(snapshot->domain->conn, vm->def) < 0)
> goto cleanup;
>
> - if (!(snap = qemuSnapObjFromSnapshot(vm, snapshot)))
> +
> + if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_MODIFY) < 0)
> goto cleanup;
Here too. This would change the domain state without interlocking and
enter the monitor meanwhile. Here it's fine.
>
> + if (!(snap = qemuSnapObjFromSnapshot(vm, snapshot)))
> + goto endjob;
> +
> if (!metadata_only) {
> if (!(flags & VIR_DOMAIN_SNAPSHOT_DELETE_CHILDREN_ONLY) &&
> virDomainSnapshotIsExternal(snap))
> @@ -14415,13 +14505,10 @@ static int qemuDomainSnapshotDelete(virDomainSnapshotPtr snapshot,
> virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
> _("deletion of %d external disk snapshots not "
> "supported yet"), external);
> - goto cleanup;
> + goto endjob;
> }
> }
>
> - if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_MODIFY) < 0)
> - goto cleanup;
> -
> if (flags & (VIR_DOMAIN_SNAPSHOT_DELETE_CHILDREN |
> VIR_DOMAIN_SNAPSHOT_DELETE_CHILDREN_ONLY)) {
> rem.driver = driver;
>
This patch introduces too much locking in unnecessary places. I'll post
a V2 trimmed of the parts that don't need locking.
Peter
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 819 bytes
Desc: OpenPGP digital signature
URL: <http://listman.redhat.com/archives/libvir-list/attachments/20140902/bfcaf9bf/attachment-0001.sig>
More information about the libvir-list
mailing list