[libvirt PATCH v2 48/81] qemu: Use QEMU_MIGRATION_PHASE_POSTCOPY_FAILED

Jiri Denemark jdenemar at redhat.com
Wed Jun 1 12:49:48 UTC 2022


This phase marks a migration protocol as broken in a post-copy phase.
Libvirt is no longer actively watching the migration in this phase as
the migration API that started the migration failed.

This may either happen when post-copy migration really fails (QEMU
enters postcopy-paused migration state) or when the migration still
progresses between both QEMU processes, but libvirt lost control of it
because the connection between libvirt daemons (in p2p migration) or a
daemon and client (non-p2p migration) was closed. For example, when one
of the daemons was restarted.

Signed-off-by: Jiri Denemark <jdenemar at redhat.com>
Reviewed-by: Peter Krempa <pkrempa at redhat.com>
Reviewed-by: Pavel Hrdina <phrdina at redhat.com>
---

Notes:
    Version 2:
    - moved most of the last hunk to a separate patch

 src/qemu/qemu_migration.c | 15 +++++++++++----
 src/qemu/qemu_process.c   | 11 ++++++++---
 2 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c
index 88702c94e4..302589b63c 100644
--- a/src/qemu/qemu_migration.c
+++ b/src/qemu/qemu_migration.c
@@ -2341,6 +2341,7 @@ qemuMigrationSrcCleanup(virDomainObj *vm,
                  vm->def->name);
 
         if (virDomainObjIsPostcopy(vm, VIR_DOMAIN_JOB_OPERATION_MIGRATION_OUT)) {
+            ignore_value(qemuMigrationJobSetPhase(vm, QEMU_MIGRATION_PHASE_POSTCOPY_FAILED));
             qemuMigrationSrcPostcopyFailed(vm);
             qemuDomainCleanupAdd(vm, qemuProcessCleanupMigrationJob);
             qemuMigrationJobContinue(vm);
@@ -2352,8 +2353,10 @@ qemuMigrationSrcCleanup(virDomainObj *vm,
         }
         break;
 
+    case QEMU_MIGRATION_PHASE_POSTCOPY_FAILED:
     case QEMU_MIGRATION_PHASE_BEGIN_RESUME:
     case QEMU_MIGRATION_PHASE_PERFORM_RESUME:
+        ignore_value(qemuMigrationJobSetPhase(vm, QEMU_MIGRATION_PHASE_POSTCOPY_FAILED));
         qemuMigrationSrcPostcopyFailed(vm);
         qemuDomainCleanupAdd(vm, qemuProcessCleanupMigrationJob);
         qemuMigrationJobContinue(vm);
@@ -2374,7 +2377,6 @@ qemuMigrationSrcCleanup(virDomainObj *vm,
     case QEMU_MIGRATION_PHASE_PERFORM2:
         /* single phase outgoing migration; unreachable */
     case QEMU_MIGRATION_PHASE_NONE:
-    case QEMU_MIGRATION_PHASE_POSTCOPY_FAILED:
     case QEMU_MIGRATION_PHASE_LAST:
         /* unreachable */
         ;
@@ -3744,6 +3746,7 @@ qemuMigrationSrcConfirm(virQEMUDriver *driver,
                                        flags, cancelled);
 
     if (virDomainObjIsFailedPostcopy(vm)) {
+        ignore_value(qemuMigrationJobSetPhase(vm, QEMU_MIGRATION_PHASE_POSTCOPY_FAILED));
         qemuDomainCleanupAdd(vm, qemuProcessCleanupMigrationJob);
         qemuMigrationJobContinue(vm);
     } else {
@@ -5572,6 +5575,7 @@ qemuMigrationSrcPerformJob(virQEMUDriver *driver,
         virErrorPreserveLast(&orig_err);
 
     if (virDomainObjIsFailedPostcopy(vm)) {
+        ignore_value(qemuMigrationJobSetPhase(vm, QEMU_MIGRATION_PHASE_POSTCOPY_FAILED));
         qemuDomainCleanupAdd(vm, qemuProcessCleanupMigrationJob);
         qemuMigrationJobContinue(vm);
     } else {
@@ -5664,6 +5668,8 @@ qemuMigrationSrcPerformPhase(virQEMUDriver *driver,
                                  jobPriv->migParams, priv->job.apiFlags);
         qemuMigrationJobFinish(vm);
     } else {
+        if (ret < 0)
+            ignore_value(qemuMigrationJobSetPhase(vm, QEMU_MIGRATION_PHASE_POSTCOPY_FAILED));
         qemuDomainCleanupAdd(vm, qemuProcessCleanupMigrationJob);
         qemuMigrationJobContinue(vm);
     }
@@ -5903,7 +5909,7 @@ qemuMigrationDstComplete(virQEMUDriver *driver,
     /* Guest is successfully running, so cancel previous auto destroy. There's
      * nothing to remove when we are resuming post-copy migration.
      */
-    if (!virDomainObjIsFailedPostcopy(vm))
+    if (job->phase < QEMU_MIGRATION_PHASE_POSTCOPY_FAILED)
         qemuProcessAutoDestroyRemove(driver, vm);
 
     /* Remove completed stats for post-copy, everything but timing fields
@@ -6170,6 +6176,7 @@ qemuMigrationDstFinishActive(virQEMUDriver *driver,
     }
 
     if (virDomainObjIsFailedPostcopy(vm)) {
+        ignore_value(qemuMigrationJobSetPhase(vm, QEMU_MIGRATION_PHASE_POSTCOPY_FAILED));
         qemuProcessAutoDestroyRemove(driver, vm);
         qemuDomainCleanupAdd(vm, qemuProcessCleanupMigrationJob);
         *finishJob = false;
@@ -6290,9 +6297,9 @@ qemuMigrationProcessUnattended(virQEMUDriver *driver,
               vm->def->name);
 
     if (job == VIR_ASYNC_JOB_MIGRATION_IN)
-        phase = QEMU_MIGRATION_PHASE_FINISH3;
+        phase = QEMU_MIGRATION_PHASE_FINISH_RESUME;
     else
-        phase = QEMU_MIGRATION_PHASE_CONFIRM3;
+        phase = QEMU_MIGRATION_PHASE_CONFIRM_RESUME;
 
     if (qemuMigrationJobStartPhase(vm, phase) < 0)
         return;
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index f752668b2f..8a98c03395 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -1555,9 +1555,12 @@ qemuProcessHandleMigrationStatus(qemuMonitor *mon G_GNUC_UNUSED,
          * watching it in any thread. Let's make sure the migration is properly
          * finished in case we get a "completed" event.
          */
-        if (virDomainObjIsFailedPostcopy(vm) && priv->job.asyncOwner == 0)
+        if (virDomainObjIsPostcopy(vm, priv->job.current->operation) &&
+            priv->job.phase == QEMU_MIGRATION_PHASE_POSTCOPY_FAILED &&
+            priv->job.asyncOwner == 0) {
             qemuProcessEventSubmit(vm, QEMU_PROCESS_EVENT_UNATTENDED_MIGRATION,
                                    priv->job.asyncJob, status, NULL);
+        }
         break;
 
     case QEMU_MONITOR_MIGRATION_STATUS_INACTIVE:
@@ -3507,7 +3510,6 @@ qemuProcessRecoverMigrationIn(virQEMUDriver *driver,
     case QEMU_MIGRATION_PHASE_PERFORM3_DONE:
     case QEMU_MIGRATION_PHASE_CONFIRM3_CANCELLED:
     case QEMU_MIGRATION_PHASE_CONFIRM3:
-    case QEMU_MIGRATION_PHASE_POSTCOPY_FAILED:
     case QEMU_MIGRATION_PHASE_BEGIN_RESUME:
     case QEMU_MIGRATION_PHASE_PERFORM_RESUME:
     case QEMU_MIGRATION_PHASE_CONFIRM_RESUME:
@@ -3545,6 +3547,7 @@ qemuProcessRecoverMigrationIn(virQEMUDriver *driver,
         }
         break;
 
+    case QEMU_MIGRATION_PHASE_POSTCOPY_FAILED:
     case QEMU_MIGRATION_PHASE_PREPARE_RESUME:
     case QEMU_MIGRATION_PHASE_FINISH_RESUME:
         return 1;
@@ -3581,7 +3584,6 @@ qemuProcessRecoverMigrationOut(virQEMUDriver *driver,
     case QEMU_MIGRATION_PHASE_PREPARE:
     case QEMU_MIGRATION_PHASE_FINISH2:
     case QEMU_MIGRATION_PHASE_FINISH3:
-    case QEMU_MIGRATION_PHASE_POSTCOPY_FAILED:
     case QEMU_MIGRATION_PHASE_PREPARE_RESUME:
     case QEMU_MIGRATION_PHASE_FINISH_RESUME:
     case QEMU_MIGRATION_PHASE_LAST:
@@ -3643,6 +3645,7 @@ qemuProcessRecoverMigrationOut(virQEMUDriver *driver,
         }
         return 1;
 
+    case QEMU_MIGRATION_PHASE_POSTCOPY_FAILED:
     case QEMU_MIGRATION_PHASE_BEGIN_RESUME:
     case QEMU_MIGRATION_PHASE_PERFORM_RESUME:
         return 1;
@@ -3694,6 +3697,8 @@ qemuProcessRecoverMigration(virQEMUDriver *driver,
         return -1;
 
     if (rc > 0) {
+        job->phase = QEMU_MIGRATION_PHASE_POSTCOPY_FAILED;
+
         if (migStatus == VIR_DOMAIN_JOB_STATUS_POSTCOPY) {
             VIR_DEBUG("Post-copy migration of domain %s still running, it will be handled as unattended",
                       vm->def->name);
-- 
2.35.1



More information about the libvir-list mailing list