[libvirt] [PATCHv3.5 18/43] snapshot: prevent migration from stranding snapshot data

Eric Blake eblake at redhat.com
Tue Aug 30 22:10:47 UTC 2011


Migration is another case of stranding metadata.  And since
snapshot metadata is arbitrarily large, there's no way to
shoehorn it into the migration cookie of migration v3.

This patch consolidates two existing locations for migration
validation into one helper function, then enhances that function
to also do the new checks.  If we could always trust the source
to validate migration, then the destination would not have to
do anything; but since older servers that did not do checking
can migrate to newer destinations, we have to repeat some of
the same checks on the destination; meanwhile, we want to
detect failures as soon as possible.  With migration v2, this
means that validation will reject things at Prepare on the
destination if the XML exposes the problem, otherwise at Perform
on the source; with migration v3, this means that validation
will reject things at Begin on the source, or if the source
is old and the XML exposes the problem, then at Prepare on the
destination.

A future patch will make it possible to manually recreate the
snapshot metadata on the destination.  But even that is limited,
since if we delete the snapshot metadata prior to migration,
then we won't know the name of the current snapshot to pass
along; and if we delete the snapshot metadata after migration
and use the v3 migration cookie to pass along the name of the
current snapshot, then we need a way to bypass the fact that
this patch refuses migration with snapshot metadata present.

So eventually, we may have to introduce migration protocol v4
that allows feature negotiation and an arbitrary number of
handshake exchanges, so as to pass as many rpc calls as needed
to transfer all the snapshot xml hierarchy.

But all of that is thoughts for the future; for now, the best
course of action is to quit early, rather than get into a
funky state of stale metadata.

* src/qemu/qemu_migration.h (qemuMigrationIsAllowed): Make static.
* src/qemu/qemu_migration.c (qemuMigrationIsAllowed): Alter
signature, and allow checks for both outgoing and incoming.
(qemuMigrationBegin, qemuMigrationPrepareAny)
(qemuMigrationPerformJob): Update callers.
---

diff in v3.5: move checks to qemu_migration rather than qemu_driver

I still need to post a v4 of my pending series, which has some more
rebase updates; mainly to resolve comments that Dan has already made
about v3.

 src/qemu/qemu_migration.c |   48 ++++++++++++++++++++++++++++++--------------
 src/qemu/qemu_migration.h |    2 -
 2 files changed, 33 insertions(+), 17 deletions(-)

diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c
index d239cc8..f849d05 100644
--- a/src/qemu/qemu_migration.c
+++ b/src/qemu/qemu_migration.c
@@ -701,9 +701,36 @@ error:
     return NULL;
 }

-bool
-qemuMigrationIsAllowed(virDomainDefPtr def)
+/* Validate whether the domain is safe to migrate.  If vm is NULL,
+ * then this is being run in the v2 Prepare stage on the destination
+ * (where we only have the target xml); if vm is provided, then this
+ * is being run in either v2 Perform or v3 Begin (where we also have
+ * access to all of the domain's metadata, such as whether it is
+ * marked autodestroy or has snapshots).  While it would be nice to
+ * assume that checking on source is sufficient to prevent ever
+ * talking to the destination in the first place, we are stuck with
+ * the fact that older servers did not do checks on the source. */
+static bool
+qemuMigrationIsAllowed(struct qemud_driver *driver, virDomainObjPtr vm,
+                       virDomainDefPtr def)
 {
+    int nsnapshots;
+
+    if (vm) {
+        if (qemuProcessAutoDestroyActive(driver, vm)) {
+            qemuReportError(VIR_ERR_OPERATION_INVALID,
+                            "%s", _("domain is marked for auto destroy"));
+            return false;
+        }
+        if ((nsnapshots = virDomainSnapshotObjListNum(&vm->snapshots, 0))) {
+            qemuReportError(VIR_ERR_OPERATION_INVALID,
+                            _("cannot migrate domain with %d snapshots"),
+                            nsnapshots);
+            return false;
+        }
+
+        def = vm->def;
+    }
     if (def->nhostdevs > 0) {
         qemuReportError(VIR_ERR_OPERATION_INVALID,
             "%s", _("Domain with assigned host devices cannot be migrated"));
@@ -915,13 +942,7 @@ char *qemuMigrationBegin(struct qemud_driver *driver,
     if (priv->job.asyncJob == QEMU_ASYNC_JOB_MIGRATION_OUT)
         qemuMigrationJobSetPhase(driver, vm, QEMU_MIGRATION_PHASE_BEGIN3);

-    if (qemuProcessAutoDestroyActive(driver, vm)) {
-        qemuReportError(VIR_ERR_OPERATION_INVALID,
-                        "%s", _("domain is marked for auto destroy"));
-        goto cleanup;
-    }
-
-    if (!qemuMigrationIsAllowed(vm->def))
+    if (!qemuMigrationIsAllowed(driver, vm, NULL))
         goto cleanup;

     if (!(mig = qemuMigrationEatCookie(driver, vm, NULL, 0, 0)))
@@ -990,7 +1011,7 @@ qemuMigrationPrepareAny(struct qemud_driver *driver,
                                         VIR_DOMAIN_XML_INACTIVE)))
         goto cleanup;

-    if (!qemuMigrationIsAllowed(def))
+    if (!qemuMigrationIsAllowed(driver, NULL, def))
         goto cleanup;

     /* Target domain name, maybe renamed. */
@@ -2194,11 +2215,8 @@ qemuMigrationPerformJob(struct qemud_driver *driver,
         goto endjob;
     }

-    if (qemuProcessAutoDestroyActive(driver, vm)) {
-        qemuReportError(VIR_ERR_OPERATION_INVALID,
-                        "%s", _("domain is marked for auto destroy"));
-        goto endjob;
-    }
+    if (!qemuMigrationIsAllowed(driver, vm, NULL))
+        goto cleanup;

     resume = virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING;

diff --git a/src/qemu/qemu_migration.h b/src/qemu/qemu_migration.h
index ace411d..ec70422 100644
--- a/src/qemu/qemu_migration.h
+++ b/src/qemu/qemu_migration.h
@@ -73,8 +73,6 @@ bool qemuMigrationJobIsActive(virDomainObjPtr vm,
 int qemuMigrationJobFinish(struct qemud_driver *driver, virDomainObjPtr obj)
     ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2) ATTRIBUTE_RETURN_CHECK;

-bool qemuMigrationIsAllowed(virDomainDefPtr def)
-    ATTRIBUTE_NONNULL(1);
 int qemuMigrationSetOffline(struct qemud_driver *driver,
                             virDomainObjPtr vm);

-- 
1.7.4.4




More information about the libvir-list mailing list