[libvirt PATCH v2 04/81] qemu: Keep domain running on dst on failed post-copy migration

Jiri Denemark jdenemar at redhat.com
Wed Jun 1 12:49:04 UTC 2022


There's no need to artificially pause a domain when post-copy fails
from our point of view unless QEMU connection is broken too as migration
may still be progressing well.

Signed-off-by: Jiri Denemark <jdenemar at redhat.com>
---

Notes:
    Version 2:
    - commit message and warning text updated
    - dropped dead code from qemuMigrationSrcPostcopyFailed
        - source domain is always paused once it enters post-copy, handling
          RUNNING state there was a leftover from before this patch

 src/qemu/qemu_migration.c | 51 ++++++++++++++++++++++++++-------------
 src/qemu/qemu_migration.h |  6 +++--
 src/qemu/qemu_process.c   |  8 +++---
 3 files changed, 42 insertions(+), 23 deletions(-)

diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c
index 6cc68a567a..326e17ddd7 100644
--- a/src/qemu/qemu_migration.c
+++ b/src/qemu/qemu_migration.c
@@ -1577,34 +1577,51 @@ qemuMigrationSrcIsSafe(virDomainDef *def,
 
 
 void
-qemuMigrationAnyPostcopyFailed(virQEMUDriver *driver,
-                               virDomainObj *vm)
+qemuMigrationSrcPostcopyFailed(virDomainObj *vm)
 {
     virDomainState state;
     int reason;
 
     state = virDomainObjGetState(vm, &reason);
 
-    if (state != VIR_DOMAIN_PAUSED &&
-        state != VIR_DOMAIN_RUNNING)
-        return;
+    VIR_DEBUG("%s/%s",
+              virDomainStateTypeToString(state),
+              virDomainStateReasonToString(state, reason));
 
-    if (state == VIR_DOMAIN_PAUSED &&
+    if (state != VIR_DOMAIN_PAUSED ||
         reason == VIR_DOMAIN_PAUSED_POSTCOPY_FAILED)
         return;
 
     VIR_WARN("Migration of domain %s failed during post-copy; "
              "leaving the domain paused", vm->def->name);
 
-    if (state == VIR_DOMAIN_RUNNING) {
-        if (qemuProcessStopCPUs(driver, vm,
-                                VIR_DOMAIN_PAUSED_POSTCOPY_FAILED,
-                                VIR_ASYNC_JOB_MIGRATION_IN) < 0)
-            VIR_WARN("Unable to pause guest CPUs for %s", vm->def->name);
-    } else {
-        virDomainObjSetState(vm, VIR_DOMAIN_PAUSED,
-                             VIR_DOMAIN_PAUSED_POSTCOPY_FAILED);
-    }
+    virDomainObjSetState(vm, VIR_DOMAIN_PAUSED,
+                         VIR_DOMAIN_PAUSED_POSTCOPY_FAILED);
+}
+
+
+void
+qemuMigrationDstPostcopyFailed(virDomainObj *vm)
+{
+    virDomainState state;
+    int reason;
+
+    state = virDomainObjGetState(vm, &reason);
+
+    VIR_DEBUG("%s/%s",
+              virDomainStateTypeToString(state),
+              virDomainStateReasonToString(state, reason));
+
+    if (state != VIR_DOMAIN_RUNNING ||
+        reason == VIR_DOMAIN_RUNNING_POSTCOPY_FAILED)
+        return;
+
+    VIR_WARN("Migration protocol failed during incoming migration of domain "
+             "%s, but QEMU keeps migrating; leaving the domain running, the "
+             "migration will be handled as unattended", vm->def->name);
+
+    virDomainObjSetState(vm, VIR_DOMAIN_RUNNING,
+                         VIR_DOMAIN_RUNNING_POSTCOPY_FAILED);
 }
 
 
@@ -3453,7 +3470,7 @@ qemuMigrationSrcConfirmPhase(virQEMUDriver *driver,
 
         if (virDomainObjGetState(vm, &reason) == VIR_DOMAIN_PAUSED &&
             reason == VIR_DOMAIN_PAUSED_POSTCOPY)
-            qemuMigrationAnyPostcopyFailed(driver, vm);
+            qemuMigrationSrcPostcopyFailed(vm);
         else
             qemuMigrationSrcRestoreDomainState(driver, vm);
 
@@ -5826,7 +5843,7 @@ qemuMigrationDstFinish(virQEMUDriver *driver,
                                 VIR_DOMAIN_EVENT_STOPPED_FAILED);
             virObjectEventStateQueue(driver->domainEventState, event);
         } else {
-            qemuMigrationAnyPostcopyFailed(driver, vm);
+            qemuMigrationDstPostcopyFailed(vm);
         }
     }
 
diff --git a/src/qemu/qemu_migration.h b/src/qemu/qemu_migration.h
index a8afa66119..c4e4228282 100644
--- a/src/qemu/qemu_migration.h
+++ b/src/qemu/qemu_migration.h
@@ -251,8 +251,10 @@ qemuMigrationDstRun(virQEMUDriver *driver,
                     virDomainAsyncJob asyncJob);
 
 void
-qemuMigrationAnyPostcopyFailed(virQEMUDriver *driver,
-                            virDomainObj *vm);
+qemuMigrationSrcPostcopyFailed(virDomainObj *vm);
+
+void
+qemuMigrationDstPostcopyFailed(virDomainObj *vm);
 
 int
 qemuMigrationSrcFetchMirrorStats(virQEMUDriver *driver,
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index e8936cd623..0d39c67dfc 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -3411,7 +3411,7 @@ qemuProcessRecoverMigrationIn(virQEMUDriver *driver,
          * confirm success or failure yet; killing it seems safest unless
          * we already started guest CPUs or we were in post-copy mode */
         if (postcopy) {
-            qemuMigrationAnyPostcopyFailed(driver, vm);
+            qemuMigrationDstPostcopyFailed(vm);
         } else if (state != VIR_DOMAIN_RUNNING) {
             VIR_DEBUG("Killing migrated domain %s", vm->def->name);
             return -1;
@@ -3462,7 +3462,7 @@ qemuProcessRecoverMigrationOut(virQEMUDriver *driver,
          * post-copy mode
          */
         if (postcopy) {
-            qemuMigrationAnyPostcopyFailed(driver, vm);
+            qemuMigrationSrcPostcopyFailed(vm);
         } else {
             VIR_DEBUG("Cancelling unfinished migration of domain %s",
                       vm->def->name);
@@ -3480,7 +3480,7 @@ qemuProcessRecoverMigrationOut(virQEMUDriver *driver,
          * post-copy mode we can use PAUSED_POSTCOPY_FAILED state for this
          */
         if (postcopy)
-            qemuMigrationAnyPostcopyFailed(driver, vm);
+            qemuMigrationSrcPostcopyFailed(vm);
         break;
 
     case QEMU_MIGRATION_PHASE_CONFIRM3_CANCELLED:
@@ -3489,7 +3489,7 @@ qemuProcessRecoverMigrationOut(virQEMUDriver *driver,
          * as broken in that case
          */
         if (postcopy) {
-            qemuMigrationAnyPostcopyFailed(driver, vm);
+            qemuMigrationSrcPostcopyFailed(vm);
         } else {
             VIR_DEBUG("Resuming domain %s after failed migration",
                       vm->def->name);
-- 
2.35.1



More information about the libvir-list mailing list