[libvirt PATCH 06/80] qemu: Keep domain running on dst on failed post-copy migration

Jiri Denemark jdenemar at redhat.com
Tue May 10 15:20:27 UTC 2022


There's no need to artificially pause a domain when post-copy fails. The
virtual CPUs may continue running, only the guest tasks that decide to
read a page which has not been migrated yet will get blocked.

Signed-off-by: Jiri Denemark <jdenemar at redhat.com>
---
 src/qemu/qemu_migration.c | 37 +++++++++++++++++++++++++++++++++----
 src/qemu/qemu_migration.h |  6 ++++--
 src/qemu/qemu_process.c   |  8 ++++----
 3 files changed, 41 insertions(+), 10 deletions(-)

diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c
index b735bdb391..a5c7a27124 100644
--- a/src/qemu/qemu_migration.c
+++ b/src/qemu/qemu_migration.c
@@ -1577,14 +1577,19 @@ qemuMigrationSrcIsSafe(virDomainDef *def,
 
 
 void
-qemuMigrationAnyPostcopyFailed(virQEMUDriver *driver,
-                               virDomainObj *vm)
+qemuMigrationSrcPostcopyFailed(virDomainObj *vm)
 {
+    qemuDomainObjPrivate *priv = vm->privateData;
+    virQEMUDriver *driver = priv->driver;
     virDomainState state;
     int reason;
 
     state = virDomainObjGetState(vm, &reason);
 
+    VIR_DEBUG("%s/%s",
+              virDomainStateTypeToString(state),
+              virDomainStateReasonToString(state, reason));
+
     if (state != VIR_DOMAIN_PAUSED &&
         state != VIR_DOMAIN_RUNNING)
         return;
@@ -1608,6 +1613,30 @@ qemuMigrationAnyPostcopyFailed(virQEMUDriver *driver,
 }
 
 
+void
+qemuMigrationDstPostcopyFailed(virDomainObj *vm)
+{
+    virDomainState state;
+    int reason;
+
+    state = virDomainObjGetState(vm, &reason);
+
+    VIR_DEBUG("%s/%s",
+              virDomainStateTypeToString(state),
+              virDomainStateReasonToString(state, reason));
+
+    if (state != VIR_DOMAIN_RUNNING ||
+        reason == VIR_DOMAIN_RUNNING_POSTCOPY_FAILED)
+        return;
+
+    VIR_WARN("Incoming migration of domain %s failed during post-copy; "
+             "leaving the domain running in a degraded mode", vm->def->name);
+
+    virDomainObjSetState(vm, VIR_DOMAIN_RUNNING,
+                         VIR_DOMAIN_RUNNING_POSTCOPY_FAILED);
+}
+
+
 static int
 qemuMigrationSrcWaitForSpice(virDomainObj *vm)
 {
@@ -3470,7 +3499,7 @@ qemuMigrationSrcConfirmPhase(virQEMUDriver *driver,
 
         if (virDomainObjGetState(vm, &reason) == VIR_DOMAIN_PAUSED &&
             reason == VIR_DOMAIN_PAUSED_POSTCOPY)
-            qemuMigrationAnyPostcopyFailed(driver, vm);
+            qemuMigrationSrcPostcopyFailed(vm);
         else
             qemuMigrationSrcRestoreDomainState(driver, vm);
 
@@ -5847,7 +5876,7 @@ qemuMigrationDstFinish(virQEMUDriver *driver,
                                 VIR_DOMAIN_EVENT_STOPPED_FAILED);
             virObjectEventStateQueue(driver->domainEventState, event);
         } else {
-            qemuMigrationAnyPostcopyFailed(driver, vm);
+            qemuMigrationDstPostcopyFailed(vm);
         }
     }
 
diff --git a/src/qemu/qemu_migration.h b/src/qemu/qemu_migration.h
index a8afa66119..c4e4228282 100644
--- a/src/qemu/qemu_migration.h
+++ b/src/qemu/qemu_migration.h
@@ -251,8 +251,10 @@ qemuMigrationDstRun(virQEMUDriver *driver,
                     virDomainAsyncJob asyncJob);
 
 void
-qemuMigrationAnyPostcopyFailed(virQEMUDriver *driver,
-                            virDomainObj *vm);
+qemuMigrationSrcPostcopyFailed(virDomainObj *vm);
+
+void
+qemuMigrationDstPostcopyFailed(virDomainObj *vm);
 
 int
 qemuMigrationSrcFetchMirrorStats(virQEMUDriver *driver,
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index 1925559fad..a3192a7196 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -3482,7 +3482,7 @@ qemuProcessRecoverMigrationIn(virQEMUDriver *driver,
          * confirm success or failure yet; killing it seems safest unless
          * we already started guest CPUs or we were in post-copy mode */
         if (postcopy) {
-            qemuMigrationAnyPostcopyFailed(driver, vm);
+            qemuMigrationDstPostcopyFailed(vm);
         } else if (state != VIR_DOMAIN_RUNNING) {
             VIR_DEBUG("Killing migrated domain %s", vm->def->name);
             return -1;
@@ -3533,7 +3533,7 @@ qemuProcessRecoverMigrationOut(virQEMUDriver *driver,
          * post-copy mode
          */
         if (postcopy) {
-            qemuMigrationAnyPostcopyFailed(driver, vm);
+            qemuMigrationSrcPostcopyFailed(vm);
         } else {
             VIR_DEBUG("Cancelling unfinished migration of domain %s",
                       vm->def->name);
@@ -3551,7 +3551,7 @@ qemuProcessRecoverMigrationOut(virQEMUDriver *driver,
          * post-copy mode we can use PAUSED_POSTCOPY_FAILED state for this
          */
         if (postcopy)
-            qemuMigrationAnyPostcopyFailed(driver, vm);
+            qemuMigrationSrcPostcopyFailed(vm);
         break;
 
     case QEMU_MIGRATION_PHASE_CONFIRM3_CANCELLED:
@@ -3560,7 +3560,7 @@ qemuProcessRecoverMigrationOut(virQEMUDriver *driver,
          * as broken in that case
          */
         if (postcopy) {
-            qemuMigrationAnyPostcopyFailed(driver, vm);
+            qemuMigrationSrcPostcopyFailed(vm);
         } else {
             VIR_DEBUG("Resuming domain %s after failed migration",
                       vm->def->name);
-- 
2.35.1



More information about the libvir-list mailing list