[libvirt] [PATCH] qemu: keep pre-migration domain state after failed migration

Martin Kletzander mkletzan at redhat.com
Thu Feb 6 16:33:14 UTC 2014


Couple of codepaths shared the same code which can be moved out to a
function and on one of such places, qemuMigrationConfirmPhase(), the
domain was resumed even if it wasn't running before the migration
started.

Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=1057407

Signed-off-by: Martin Kletzander <mkletzan at redhat.com>
---
 src/qemu/qemu_domain.h    |   3 +-
 src/qemu/qemu_migration.c | 117 +++++++++++++++++++++++++---------------------
 2 files changed, 66 insertions(+), 54 deletions(-)

diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h
index 6a92351..84624f9 100644
--- a/src/qemu/qemu_domain.h
+++ b/src/qemu/qemu_domain.h
@@ -1,7 +1,7 @@
 /*
  * qemu_domain.h: QEMU domain private state
  *
- * Copyright (C) 2006-2013 Red Hat, Inc.
+ * Copyright (C) 2006-2014 Red Hat, Inc.
  * Copyright (C) 2006 Daniel P. Berrange
  *
  * This library is free software; you can redistribute it and/or
@@ -161,6 +161,7 @@ struct _qemuDomainObjPrivate {
     char *origname;
     int nbdPort; /* Port used for migration with NBD */
     unsigned short migrationPort;
+    int preMigrationState;

     virChrdevsPtr devs;

diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c
index 407fb70..1a29efa 100644
--- a/src/qemu/qemu_migration.c
+++ b/src/qemu/qemu_migration.c
@@ -1075,6 +1075,53 @@ error:
     return NULL;
 }

+static void
+qemuDomainObjStorePreMigrationState(virDomainObjPtr vm)
+{
+    qemuDomainObjPrivatePtr priv = vm->privateData;
+    priv->preMigrationState = virDomainObjGetState(vm, NULL);
+
+    VIR_DEBUG("Storing pre-migration state=%d domain=%p",
+              priv->preMigrationState, vm);
+}
+
+/* Returns true if the domain was resumed, false otherwise */
+static bool
+qemuDomainObjRestorePreMigrationState(virConnectPtr conn, virDomainObjPtr vm)
+{
+    virQEMUDriverPtr driver = conn->privateData;
+    qemuDomainObjPrivatePtr priv = vm->privateData;
+    int state = virDomainObjGetState(vm, NULL);
+    bool ret = false;
+
+    VIR_DEBUG("driver=%p, vm=%p, pre-mig-state=%d, state=%d",
+              driver, vm, priv->preMigrationState, state);
+
+    if (state == VIR_DOMAIN_PAUSED &&
+        priv->preMigrationState == VIR_DOMAIN_RUNNING) {
+        /* This is basically the only restore possibility that's safe
+         * and we should attemt to do */
+
+        VIR_DEBUG("Restoring pre-migration state due to migration error");
+
+        /* we got here through some sort of failure; start the domain again */
+        if (qemuProcessStartCPUs(driver, vm, conn,
+                                 VIR_DOMAIN_RUNNING_MIGRATION_CANCELED,
+                                 QEMU_ASYNC_JOB_MIGRATION_OUT) < 0) {
+            /* Hm, we already know we are in error here.  We don't want to
+             * overwrite the previous error, though, so we just throw something
+             * to the logs and hope for the best */
+            VIR_ERROR(_("Failed to resume guest %s after failure"), vm->def->name);
+            goto cleanup;
+        }
+        ret = true;
+    }
+
+ cleanup:
+    priv->preMigrationState = VIR_DOMAIN_NOSTATE;
+    return ret;
+}
+
 /**
  * qemuMigrationStartNBDServer:
  * @driver: qemu driver
@@ -1952,8 +1999,8 @@ cleanup:


 /* The caller is supposed to lock the vm and start a migration job. */
-static char
-*qemuMigrationBeginPhase(virQEMUDriverPtr driver,
+static char *
+qemuMigrationBeginPhase(virQEMUDriverPtr driver,
                          virDomainObjPtr vm,
                          const char *xmlin,
                          const char *dname,
@@ -2075,6 +2122,8 @@ qemuMigrationBegin(virConnectPtr conn,
         asyncJob = QEMU_ASYNC_JOB_NONE;
     }

+    qemuDomainObjStorePreMigrationState(vm);
+
     if (!virDomainObjIsActive(vm) && !(flags & VIR_MIGRATE_OFFLINE)) {
         virReportError(VIR_ERR_OPERATION_INVALID,
                        "%s", _("domain is not running"));
@@ -2744,22 +2793,12 @@ qemuMigrationConfirmPhase(virQEMUDriverPtr driver,
         /* cancel any outstanding NBD jobs */
         qemuMigrationCancelDriveMirror(mig, driver, vm);

-        /* run 'cont' on the destination, which allows migration on qemu
-         * >= 0.10.6 to work properly.  This isn't strictly necessary on
-         * older qemu's, but it also doesn't hurt anything there
-         */
-        if (qemuProcessStartCPUs(driver, vm, conn,
-                                 VIR_DOMAIN_RUNNING_MIGRATED,
-                                 QEMU_ASYNC_JOB_MIGRATION_OUT) < 0) {
-            if (virGetLastError() == NULL)
-                virReportError(VIR_ERR_INTERNAL_ERROR,
-                               "%s", _("resume operation failed"));
-            goto cleanup;
+        if (qemuDomainObjRestorePreMigrationState(conn, vm)) {
+            event = virDomainEventLifecycleNewFromObj(vm,
+                                                      VIR_DOMAIN_EVENT_RESUMED,
+                                                      VIR_DOMAIN_EVENT_RESUMED_MIGRATED);
         }

-        event = virDomainEventLifecycleNewFromObj(vm,
-                                         VIR_DOMAIN_EVENT_RESUMED,
-                                         VIR_DOMAIN_EVENT_RESUMED_MIGRATED);
         if (virDomainSaveStatus(driver->xmlopt, cfg->stateDir, vm) < 0) {
             VIR_WARN("Failed to save status on vm %s", vm->def->name);
             goto cleanup;
@@ -4065,7 +4104,6 @@ qemuMigrationPerformJob(virQEMUDriverPtr driver,
 {
     virObjectEventPtr event = NULL;
     int ret = -1;
-    int resume = 0;
     virErrorPtr orig_err = NULL;
     virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
     bool abort_on_error = !!(flags & VIR_MIGRATE_ABORT_ON_ERROR);
@@ -4085,7 +4123,7 @@ qemuMigrationPerformJob(virQEMUDriverPtr driver,
     if (!(flags & VIR_MIGRATE_UNSAFE) && !qemuMigrationIsSafe(vm->def))
         goto endjob;

-    resume = virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING;
+    qemuDomainObjStorePreMigrationState(vm);

     if ((flags & (VIR_MIGRATE_TUNNELLED | VIR_MIGRATE_PEER2PEER))) {
         ret = doPeer2PeerMigrate(driver, conn, vm, xmlin,
@@ -4112,25 +4150,13 @@ qemuMigrationPerformJob(virQEMUDriverPtr driver,
                                          VIR_DOMAIN_EVENT_STOPPED,
                                          VIR_DOMAIN_EVENT_STOPPED_MIGRATED);
     }
-    resume = 0;

 endjob:
     if (ret < 0)
         orig_err = virSaveLastError();

-    if (resume && virDomainObjGetState(vm, NULL) == VIR_DOMAIN_PAUSED) {
-        /* we got here through some sort of failure; start the domain again */
-        if (qemuProcessStartCPUs(driver, vm, conn,
-                                 VIR_DOMAIN_RUNNING_MIGRATION_CANCELED,
-                                 QEMU_ASYNC_JOB_MIGRATION_OUT) < 0) {
-            /* Hm, we already know we are in error here.  We don't want to
-             * overwrite the previous error, though, so we just throw something
-             * to the logs and hope for the best
-             */
-            VIR_ERROR(_("Failed to resume guest %s after failure"),
-                      vm->def->name);
-        }
-
+    if (!v3proto &&
+        qemuDomainObjRestorePreMigrationState(conn, vm)) {
         event = virDomainEventLifecycleNewFromObj(vm,
                                          VIR_DOMAIN_EVENT_RESUMED,
                                          VIR_DOMAIN_EVENT_RESUMED_MIGRATED);
@@ -4179,7 +4205,6 @@ qemuMigrationPerformPhase(virQEMUDriverPtr driver,
 {
     virObjectEventPtr event = NULL;
     int ret = -1;
-    bool resume;
     bool hasrefs;

     /* If we didn't start the job in the begin phase, start it now. */
@@ -4194,32 +4219,18 @@ qemuMigrationPerformPhase(virQEMUDriverPtr driver,
     virCloseCallbacksUnset(driver->closeCallbacks, vm,
                            qemuMigrationCleanup);

-    resume = virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING;
     ret = doNativeMigrate(driver, vm, uri, cookiein, cookieinlen,
                           cookieout, cookieoutlen,
                           flags, resource, NULL, graphicsuri);

-    if (ret < 0 && resume &&
-        virDomainObjGetState(vm, NULL) == VIR_DOMAIN_PAUSED) {
-        /* we got here through some sort of failure; start the domain again */
-        if (qemuProcessStartCPUs(driver, vm, conn,
-                                 VIR_DOMAIN_RUNNING_MIGRATION_CANCELED,
-                                 QEMU_ASYNC_JOB_MIGRATION_OUT) < 0) {
-            /* Hm, we already know we are in error here.  We don't want to
-             * overwrite the previous error, though, so we just throw something
-             * to the logs and hope for the best
-             */
-            VIR_ERROR(_("Failed to resume guest %s after failure"),
-                      vm->def->name);
+    if (ret < 0) {
+        if (qemuDomainObjRestorePreMigrationState(conn, vm)) {
+            event = virDomainEventLifecycleNewFromObj(vm,
+                                                      VIR_DOMAIN_EVENT_RESUMED,
+                                                      VIR_DOMAIN_EVENT_RESUMED_MIGRATED);
         }
-
-        event = virDomainEventLifecycleNewFromObj(vm,
-                                         VIR_DOMAIN_EVENT_RESUMED,
-                                         VIR_DOMAIN_EVENT_RESUMED_MIGRATED);
-    }
-
-    if (ret < 0)
         goto endjob;
+    }

     qemuMigrationJobSetPhase(driver, vm, QEMU_MIGRATION_PHASE_PERFORM3_DONE);

-- 
1.8.5.3




More information about the libvir-list mailing list