[libvirt] [PATCH 12/19] qemu: Recover from interrupted migration

Jiri Denemark jdenemar at redhat.com
Thu Jul 7 23:34:17 UTC 2011


---
 src/qemu/qemu_process.c |  110 ++++++++++++++++++++++++++++++++++++++++++++++-
 1 files changed, 109 insertions(+), 1 deletions(-)

diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index 49625b5..ddb29c1 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -38,6 +38,7 @@
 #include "qemu_hostdev.h"
 #include "qemu_hotplug.h"
 #include "qemu_bridge_filter.h"
+#include "qemu_migration.h"
 
 #if HAVE_NUMACTL
 # define NUMA_VERSION1_COMPATIBILITY 1
@@ -2224,6 +2225,111 @@ qemuProcessUpdateState(struct qemud_driver *driver, virDomainObjPtr vm)
 }
 
 static int
+qemuProcessRecoverMigration(struct qemud_driver *driver,
+                            virDomainObjPtr vm,
+                            virConnectPtr conn,
+                            enum qemuDomainAsyncJob job,
+                            enum qemuMigrationJobPhase phase,
+                            virDomainState state,
+                            int reason)
+{
+    if (job == QEMU_ASYNC_JOB_MIGRATION_IN) {
+        switch (phase) {
+        case QEMU_MIGRATION_PHASE_NONE:
+        case QEMU_MIGRATION_PHASE_PERFORM2:
+        case QEMU_MIGRATION_PHASE_BEGIN3:
+        case QEMU_MIGRATION_PHASE_PERFORM3:
+        case QEMU_MIGRATION_PHASE_PERFORM3_DONE:
+        case QEMU_MIGRATION_PHASE_CONFIRM3_CANCELLED:
+        case QEMU_MIGRATION_PHASE_CONFIRM3:
+        case QEMU_MIGRATION_PHASE_LAST:
+            break;
+
+        case QEMU_MIGRATION_PHASE_PREPARE:
+            VIR_DEBUG("Killing unfinished incoming migration for domain %s",
+                      vm->def->name);
+            return -1;
+
+        case QEMU_MIGRATION_PHASE_FINISH2:
+            /* source domain is already killed so let's just resume the domain
+             * and hope we are all set */
+            VIR_DEBUG("Incoming migration finished, resuming domain %s",
+                      vm->def->name);
+            if (qemuProcessStartCPUs(driver, vm, conn,
+                                     VIR_DOMAIN_RUNNING_UNPAUSED) < 0) {
+                VIR_WARN("Could not resume domain %s", vm->def->name);
+            }
+            break;
+
+        case QEMU_MIGRATION_PHASE_FINISH3:
+            /* migration finished, we started resuming the domain but didn't
+             * confirm success or failure yet; killing it seems safest */
+            VIR_DEBUG("Killing migrated domain %s", vm->def->name);
+            return -1;
+        }
+    } else if (job == QEMU_ASYNC_JOB_MIGRATION_OUT) {
+        switch (phase) {
+        case QEMU_MIGRATION_PHASE_NONE:
+        case QEMU_MIGRATION_PHASE_PREPARE:
+        case QEMU_MIGRATION_PHASE_FINISH2:
+        case QEMU_MIGRATION_PHASE_FINISH3:
+        case QEMU_MIGRATION_PHASE_LAST:
+            break;
+
+        case QEMU_MIGRATION_PHASE_BEGIN3:
+            /* nothing happen so far, just forget we were about to migrate the
+             * domain */
+            break;
+
+        case QEMU_MIGRATION_PHASE_PERFORM2:
+        case QEMU_MIGRATION_PHASE_PERFORM3:
+            /* migration is still in progress, let's cancel it and resume the
+             * domain */
+            VIR_DEBUG("Canceling unfinished outgoing migration of domain %s",
+                      vm->def->name);
+            /* TODO cancel possibly running migrate operation */
+            /* resume the domain but only if it was paused as a result of
+             * migration */
+            if (state == VIR_DOMAIN_PAUSED &&
+                (reason == VIR_DOMAIN_PAUSED_MIGRATION ||
+                 reason == VIR_DOMAIN_PAUSED_UNKNOWN)) {
+                if (qemuProcessStartCPUs(driver, vm, conn,
+                                         VIR_DOMAIN_RUNNING_UNPAUSED) < 0) {
+                    VIR_WARN("Could not resume domain %s", vm->def->name);
+                }
+            }
+            break;
+
+        case QEMU_MIGRATION_PHASE_PERFORM3_DONE:
+            /* migration finished but we didn't have a chance to get the result
+             * of Finish3 step; third party needs to check what to do next
+             */
+            break;
+
+        case QEMU_MIGRATION_PHASE_CONFIRM3_CANCELLED:
+            /* Finish3 failed, we need to resume the domain */
+            VIR_DEBUG("Resuming domain %s after failed migration",
+                      vm->def->name);
+            if (state == VIR_DOMAIN_PAUSED &&
+                (reason == VIR_DOMAIN_PAUSED_MIGRATION ||
+                 reason == VIR_DOMAIN_PAUSED_UNKNOWN)) {
+                if (qemuProcessStartCPUs(driver, vm, conn,
+                                         VIR_DOMAIN_RUNNING_UNPAUSED) < 0) {
+                    VIR_WARN("Could not resume domain %s", vm->def->name);
+                }
+            }
+            break;
+
+        case QEMU_MIGRATION_PHASE_CONFIRM3:
+            /* migration completed, we need to kill the domain here */
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
+static int
 qemuProcessRecoverJob(struct qemud_driver *driver,
                       virDomainObjPtr vm,
                       virConnectPtr conn,
@@ -2237,7 +2343,9 @@ qemuProcessRecoverJob(struct qemud_driver *driver,
     switch (job->asyncJob) {
     case QEMU_ASYNC_JOB_MIGRATION_OUT:
     case QEMU_ASYNC_JOB_MIGRATION_IN:
-        /* we don't know what to do yet */
+        if (qemuProcessRecoverMigration(driver, vm, conn, job->asyncJob,
+                                        job->phase, state, reason) < 0)
+            return -1;
         break;
 
     case QEMU_ASYNC_JOB_SAVE:
-- 
1.7.6




More information about the libvir-list mailing list