[libvirt PATCH 50/80] qemu: Use QEMU_MIGRATION_PHASE_POSTCOPY_FAILED

Jiri Denemark jdenemar at redhat.com
Tue May 10 15:21:11 UTC 2022


This phase marks a migration protocol as broken in a post-copy phase.
Libvirt is no longer actively watching the migration in this phase as
the migration API that started the migration failed.

This may either happen when post-copy migration really fails (QEMU
enters postcopy-paused migration state) or when the migration still
progresses between both QEMU processes, but libvirt lost control of it
because the connection between libvirt daemons (in p2p migration) or a
daemon and client (non-p2p migration) was closed. For example, when one
of the daemons was restarted.

Signed-off-by: Jiri Denemark <jdenemar at redhat.com>
---
 src/qemu/qemu_migration.c | 15 +++++++++++----
 src/qemu/qemu_process.c   | 16 +++++++++++++---
 2 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c
index 3f6921b4b2..c111dd8686 100644
--- a/src/qemu/qemu_migration.c
+++ b/src/qemu/qemu_migration.c
@@ -2369,6 +2369,7 @@ qemuMigrationSrcCleanup(virDomainObj *vm,
                  vm->def->name);
 
         if (virDomainObjIsPostcopy(vm, VIR_DOMAIN_JOB_OPERATION_MIGRATION_OUT)) {
+            ignore_value(qemuMigrationJobSetPhase(vm, QEMU_MIGRATION_PHASE_POSTCOPY_FAILED));
             qemuMigrationSrcPostcopyFailed(vm);
             qemuDomainCleanupAdd(vm, qemuProcessCleanupMigrationJob);
             qemuMigrationJobContinue(vm);
@@ -2380,8 +2381,10 @@ qemuMigrationSrcCleanup(virDomainObj *vm,
         }
         break;
 
+    case QEMU_MIGRATION_PHASE_POSTCOPY_FAILED:
     case QEMU_MIGRATION_PHASE_BEGIN_RESUME:
     case QEMU_MIGRATION_PHASE_PERFORM_RESUME:
+        ignore_value(qemuMigrationJobSetPhase(vm, QEMU_MIGRATION_PHASE_POSTCOPY_FAILED));
         qemuMigrationSrcPostcopyFailed(vm);
         qemuDomainCleanupAdd(vm, qemuProcessCleanupMigrationJob);
         qemuMigrationJobContinue(vm);
@@ -2402,7 +2405,6 @@ qemuMigrationSrcCleanup(virDomainObj *vm,
     case QEMU_MIGRATION_PHASE_PERFORM2:
         /* single phase outgoing migration; unreachable */
     case QEMU_MIGRATION_PHASE_NONE:
-    case QEMU_MIGRATION_PHASE_POSTCOPY_FAILED:
     case QEMU_MIGRATION_PHASE_LAST:
         /* unreachable */
         ;
@@ -3774,6 +3776,7 @@ qemuMigrationSrcConfirm(virQEMUDriver *driver,
                                        flags, cancelled);
 
     if (virDomainObjIsFailedPostcopy(vm)) {
+        ignore_value(qemuMigrationJobSetPhase(vm, QEMU_MIGRATION_PHASE_POSTCOPY_FAILED));
         qemuDomainCleanupAdd(vm, qemuProcessCleanupMigrationJob);
         qemuMigrationJobContinue(vm);
     } else {
@@ -5607,6 +5610,7 @@ qemuMigrationSrcPerformJob(virQEMUDriver *driver,
         virErrorPreserveLast(&orig_err);
 
     if (virDomainObjIsFailedPostcopy(vm)) {
+        ignore_value(qemuMigrationJobSetPhase(vm, QEMU_MIGRATION_PHASE_POSTCOPY_FAILED));
         qemuDomainCleanupAdd(vm, qemuProcessCleanupMigrationJob);
         qemuMigrationJobContinue(vm);
     } else {
@@ -5699,6 +5703,8 @@ qemuMigrationSrcPerformPhase(virQEMUDriver *driver,
                                  jobPriv->migParams, priv->job.apiFlags);
         qemuMigrationJobFinish(vm);
     } else {
+        if (ret < 0)
+            ignore_value(qemuMigrationJobSetPhase(vm, QEMU_MIGRATION_PHASE_POSTCOPY_FAILED));
         qemuDomainCleanupAdd(vm, qemuProcessCleanupMigrationJob);
         qemuMigrationJobContinue(vm);
     }
@@ -5938,7 +5944,7 @@ qemuMigrationDstComplete(virQEMUDriver *driver,
     /* Guest is successfully running, so cancel previous auto destroy. There's
      * nothing to remove when we are resuming post-copy migration.
      */
-    if (!virDomainObjIsFailedPostcopy(vm))
+    if (job->phase < QEMU_MIGRATION_PHASE_POSTCOPY_FAILED)
         qemuProcessAutoDestroyRemove(driver, vm);
 
     /* Remove completed stats for post-copy, everything but timing fields
@@ -6205,6 +6211,7 @@ qemuMigrationDstFinishActive(virQEMUDriver *driver,
     }
 
     if (virDomainObjIsFailedPostcopy(vm)) {
+        ignore_value(qemuMigrationJobSetPhase(vm, QEMU_MIGRATION_PHASE_POSTCOPY_FAILED));
         qemuProcessAutoDestroyRemove(driver, vm);
         qemuDomainCleanupAdd(vm, qemuProcessCleanupMigrationJob);
         *finishJob = false;
@@ -6327,9 +6334,9 @@ qemuMigrationProcessUnattended(virQEMUDriver *driver,
               vm->def->name);
 
     if (job == VIR_ASYNC_JOB_MIGRATION_IN)
-        phase = QEMU_MIGRATION_PHASE_FINISH3;
+        phase = QEMU_MIGRATION_PHASE_FINISH_RESUME;
     else
-        phase = QEMU_MIGRATION_PHASE_CONFIRM3;
+        phase = QEMU_MIGRATION_PHASE_CONFIRM_RESUME;
 
     if (qemuMigrationJobStartPhase(vm, phase) < 0)
         return;
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index c7ed0a5c56..f42c9a3018 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -1624,7 +1624,8 @@ qemuProcessHandleMigrationStatus(qemuMonitor *mon G_GNUC_UNUSED,
      * watching it in any thread. Let's make sure the migration is properly
      * finished in case we get a "completed" event.
      */
-    if (virDomainObjIsFailedPostcopy(vm) &&
+    if (virDomainObjIsPostcopy(vm, priv->job.current->operation) &&
+        priv->job.phase == QEMU_MIGRATION_PHASE_POSTCOPY_FAILED &&
         priv->job.asyncOwner == 0 &&
         status == QEMU_MONITOR_MIGRATION_STATUS_COMPLETED) {
         struct qemuProcessEvent *proc = g_new0(struct qemuProcessEvent, 1);
@@ -3566,7 +3567,6 @@ qemuProcessRecoverMigrationIn(virQEMUDriver *driver,
     case QEMU_MIGRATION_PHASE_PERFORM3_DONE:
     case QEMU_MIGRATION_PHASE_CONFIRM3_CANCELLED:
     case QEMU_MIGRATION_PHASE_CONFIRM3:
-    case QEMU_MIGRATION_PHASE_POSTCOPY_FAILED:
     case QEMU_MIGRATION_PHASE_BEGIN_RESUME:
     case QEMU_MIGRATION_PHASE_PERFORM_RESUME:
     case QEMU_MIGRATION_PHASE_CONFIRM_RESUME:
@@ -3604,6 +3604,7 @@ qemuProcessRecoverMigrationIn(virQEMUDriver *driver,
         }
         break;
 
+    case QEMU_MIGRATION_PHASE_POSTCOPY_FAILED:
     case QEMU_MIGRATION_PHASE_PREPARE_RESUME:
     case QEMU_MIGRATION_PHASE_FINISH_RESUME:
         return 1;
@@ -3639,7 +3640,6 @@ qemuProcessRecoverMigrationOut(virQEMUDriver *driver,
     case QEMU_MIGRATION_PHASE_PREPARE:
     case QEMU_MIGRATION_PHASE_FINISH2:
     case QEMU_MIGRATION_PHASE_FINISH3:
-    case QEMU_MIGRATION_PHASE_POSTCOPY_FAILED:
     case QEMU_MIGRATION_PHASE_PREPARE_RESUME:
     case QEMU_MIGRATION_PHASE_FINISH_RESUME:
     case QEMU_MIGRATION_PHASE_LAST:
@@ -3700,6 +3700,7 @@ qemuProcessRecoverMigrationOut(virQEMUDriver *driver,
         }
         return 1;
 
+    case QEMU_MIGRATION_PHASE_POSTCOPY_FAILED:
     case QEMU_MIGRATION_PHASE_BEGIN_RESUME:
     case QEMU_MIGRATION_PHASE_PERFORM_RESUME:
         return 1;
@@ -3751,9 +3752,18 @@ qemuProcessRecoverMigration(virQEMUDriver *driver,
         return -1;
 
     if (rc > 0) {
+        job->phase = QEMU_MIGRATION_PHASE_POSTCOPY_FAILED;
+
         if (migStatus == VIR_DOMAIN_JOB_STATUS_POSTCOPY) {
             VIR_DEBUG("Post-copy migration of domain %s still running, it "
                       "will be handled as unattended", vm->def->name);
+
+            if (state == VIR_DOMAIN_RUNNING)
+                reason = VIR_DOMAIN_RUNNING_POSTCOPY;
+            else
+                reason = VIR_DOMAIN_PAUSED_POSTCOPY;
+
+            virDomainObjSetState(vm, state, reason);
             qemuProcessRestoreMigrationJob(vm, job);
             return 0;
         }
-- 
2.35.1



More information about the libvir-list mailing list