[libvirt PATCH v2 43/81] qemu: Add new migration phases for post-copy recovery

Jiri Denemark jdenemar at redhat.com
Wed Jun 1 12:49:43 UTC 2022


When recovering from a failed post-copy migration, we need to go through
all migration phases again, but don't need to repeat all the steps in
each phase. Let's create a new set of migration phases dedicated to
post-copy recovery so that we can easily distinguish between normal and
recovery code.

Signed-off-by: Jiri Denemark <jdenemar at redhat.com>
Reviewed-by: Peter Krempa <pkrempa at redhat.com>
Reviewed-by: Pavel Hrdina <phrdina at redhat.com>
---

Notes:
    Version 2:
    - additional comments

 src/qemu/qemu_migration.c | 20 +++++++++++++++++++-
 src/qemu/qemu_migration.h |  6 ++++++
 src/qemu/qemu_process.c   | 29 +++++++++++++++++++++++++++--
 3 files changed, 52 insertions(+), 3 deletions(-)

diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c
index 02827bd975..710aae3eb7 100644
--- a/src/qemu/qemu_migration.c
+++ b/src/qemu/qemu_migration.c
@@ -79,6 +79,12 @@ VIR_ENUM_IMPL(qemuMigrationJobPhase,
               "prepare",
               "finish2",
               "finish3",
+              "postcopy_failed",
+              "begin_resume",
+              "perform_resume",
+              "confirm_resume",
+              "prepare_resume",
+              "finish_resume",
 );
 
 
@@ -139,7 +145,8 @@ qemuMigrationJobSetPhase(virDomainObj *vm,
 {
     qemuDomainObjPrivate *priv = vm->privateData;
 
-    if (phase < priv->job.phase) {
+    if (phase < QEMU_MIGRATION_PHASE_POSTCOPY_FAILED &&
+        phase < priv->job.phase) {
         VIR_ERROR(_("migration protocol going backwards %s => %s"),
                   qemuMigrationJobPhaseTypeToString(priv->job.phase),
                   qemuMigrationJobPhaseTypeToString(phase));
@@ -2328,18 +2335,29 @@ qemuMigrationSrcCleanup(virDomainObj *vm,
         }
         break;
 
+    case QEMU_MIGRATION_PHASE_BEGIN_RESUME:
+    case QEMU_MIGRATION_PHASE_PERFORM_RESUME:
+        qemuMigrationSrcPostcopyFailed(vm);
+        qemuDomainCleanupAdd(vm, qemuProcessCleanupMigrationJob);
+        qemuMigrationJobContinue(vm);
+        break;
+
     case QEMU_MIGRATION_PHASE_PERFORM3:
         /* cannot be seen without an active migration API; unreachable */
     case QEMU_MIGRATION_PHASE_CONFIRM3:
     case QEMU_MIGRATION_PHASE_CONFIRM3_CANCELLED:
+    case QEMU_MIGRATION_PHASE_CONFIRM_RESUME:
         /* all done; unreachable */
     case QEMU_MIGRATION_PHASE_PREPARE:
     case QEMU_MIGRATION_PHASE_FINISH2:
     case QEMU_MIGRATION_PHASE_FINISH3:
+    case QEMU_MIGRATION_PHASE_PREPARE_RESUME:
+    case QEMU_MIGRATION_PHASE_FINISH_RESUME:
         /* incoming migration; unreachable */
     case QEMU_MIGRATION_PHASE_PERFORM2:
         /* single phase outgoing migration; unreachable */
     case QEMU_MIGRATION_PHASE_NONE:
+    case QEMU_MIGRATION_PHASE_POSTCOPY_FAILED:
     case QEMU_MIGRATION_PHASE_LAST:
         /* unreachable */
         ;
diff --git a/src/qemu/qemu_migration.h b/src/qemu/qemu_migration.h
index 9351d6ac51..7eb0d4fe02 100644
--- a/src/qemu/qemu_migration.h
+++ b/src/qemu/qemu_migration.h
@@ -100,6 +100,12 @@ typedef enum {
     QEMU_MIGRATION_PHASE_PREPARE,
     QEMU_MIGRATION_PHASE_FINISH2,
     QEMU_MIGRATION_PHASE_FINISH3,
+    QEMU_MIGRATION_PHASE_POSTCOPY_FAILED, /* marker for resume phases */
+    QEMU_MIGRATION_PHASE_BEGIN_RESUME,
+    QEMU_MIGRATION_PHASE_PERFORM_RESUME,
+    QEMU_MIGRATION_PHASE_CONFIRM_RESUME,
+    QEMU_MIGRATION_PHASE_PREPARE_RESUME,
+    QEMU_MIGRATION_PHASE_FINISH_RESUME,
 
     QEMU_MIGRATION_PHASE_LAST
 } qemuMigrationJobPhase;
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index 6dd643a38b..f752668b2f 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -3507,6 +3507,10 @@ qemuProcessRecoverMigrationIn(virQEMUDriver *driver,
     case QEMU_MIGRATION_PHASE_PERFORM3_DONE:
     case QEMU_MIGRATION_PHASE_CONFIRM3_CANCELLED:
     case QEMU_MIGRATION_PHASE_CONFIRM3:
+    case QEMU_MIGRATION_PHASE_POSTCOPY_FAILED:
+    case QEMU_MIGRATION_PHASE_BEGIN_RESUME:
+    case QEMU_MIGRATION_PHASE_PERFORM_RESUME:
+    case QEMU_MIGRATION_PHASE_CONFIRM_RESUME:
     case QEMU_MIGRATION_PHASE_LAST:
         /* N/A for incoming migration */
         break;
@@ -3540,6 +3544,10 @@ qemuProcessRecoverMigrationIn(virQEMUDriver *driver,
             return -1;
         }
         break;
+
+    case QEMU_MIGRATION_PHASE_PREPARE_RESUME:
+    case QEMU_MIGRATION_PHASE_FINISH_RESUME:
+        return 1;
     }
 
     return 0;
@@ -3548,7 +3556,8 @@ qemuProcessRecoverMigrationIn(virQEMUDriver *driver,
 
 /*
  * Returns
- *     -1 on error, the domain will be killed,
+ *     -1 the domain should be killed (either after a successful migration or
+ *        on error),
  *      0 the domain should remain running with the migration job discarded,
  *      1 the daemon was restarted during post-copy phase
  */
@@ -3556,6 +3565,7 @@ static int
 qemuProcessRecoverMigrationOut(virQEMUDriver *driver,
                                virDomainObj *vm,
                                qemuDomainJobObj *job,
+                               virDomainJobStatus migStatus,
                                virDomainState state,
                                int reason,
                                unsigned int *stopFlags)
@@ -3571,6 +3581,9 @@ qemuProcessRecoverMigrationOut(virQEMUDriver *driver,
     case QEMU_MIGRATION_PHASE_PREPARE:
     case QEMU_MIGRATION_PHASE_FINISH2:
     case QEMU_MIGRATION_PHASE_FINISH3:
+    case QEMU_MIGRATION_PHASE_POSTCOPY_FAILED:
+    case QEMU_MIGRATION_PHASE_PREPARE_RESUME:
+    case QEMU_MIGRATION_PHASE_FINISH_RESUME:
     case QEMU_MIGRATION_PHASE_LAST:
         /* N/A for outgoing migration */
         break;
@@ -3621,6 +3634,18 @@ qemuProcessRecoverMigrationOut(virQEMUDriver *driver,
         /* migration completed, we need to kill the domain here */
         *stopFlags |= VIR_QEMU_PROCESS_STOP_MIGRATED;
         return -1;
+
+    case QEMU_MIGRATION_PHASE_CONFIRM_RESUME:
+        if (migStatus == VIR_DOMAIN_JOB_STATUS_HYPERVISOR_COMPLETED) {
+            /* migration completed, we need to kill the domain here */
+            *stopFlags |= VIR_QEMU_PROCESS_STOP_MIGRATED;
+            return -1;
+        }
+        return 1;
+
+    case QEMU_MIGRATION_PHASE_BEGIN_RESUME:
+    case QEMU_MIGRATION_PHASE_PERFORM_RESUME:
+        return 1;
     }
 
     if (resume) {
@@ -3659,7 +3684,7 @@ qemuProcessRecoverMigration(virQEMUDriver *driver,
     qemuMigrationAnyRefreshStatus(driver, vm, VIR_ASYNC_JOB_NONE, &migStatus);
 
     if (job->asyncJob == VIR_ASYNC_JOB_MIGRATION_OUT) {
-        rc = qemuProcessRecoverMigrationOut(driver, vm, job,
+        rc = qemuProcessRecoverMigrationOut(driver, vm, job, migStatus,
                                             state, reason, stopFlags);
     } else {
         rc = qemuProcessRecoverMigrationIn(driver, vm, job, state);
-- 
2.35.1



More information about the libvir-list mailing list