[libvirt PATCH v2 40/81] qemu: Improve post-copy migration handling on reconnect

Jiri Denemark jdenemar at redhat.com
Wed Jun 1 12:49:40 UTC 2022


When libvirt daemon is restarted during an active post-copy migration,
we do not always mark the migration as broken. In this phase libvirt is
not really needed for migration to finish successfully. In fact the
migration could have even finished while libvirt was not running or it
may still be happily running.

Signed-off-by: Jiri Denemark <jdenemar at redhat.com>
Reviewed-by: Peter Krempa <pkrempa at redhat.com>
Reviewed-by: Pavel Hrdina <phrdina at redhat.com>
---

Notes:
    Version 2:
    - dropped line breaks from debug messages

 src/qemu/qemu_migration.c | 26 ++++++++++++++++++++++++++
 src/qemu/qemu_migration.h |  6 ++++++
 src/qemu/qemu_process.c   | 39 +++++++++++++++++++++++++++++----------
 3 files changed, 61 insertions(+), 10 deletions(-)

diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c
index d427840d14..5765647ad7 100644
--- a/src/qemu/qemu_migration.c
+++ b/src/qemu/qemu_migration.c
@@ -2432,6 +2432,32 @@ qemuMigrationSrcBeginPhaseBlockDirtyBitmaps(qemuMigrationCookie *mig,
 }
 
 
+int
+qemuMigrationAnyRefreshStatus(virQEMUDriver *driver,
+                              virDomainObj *vm,
+                              virDomainAsyncJob asyncJob,
+                              virDomainJobStatus *status)
+{
+    g_autoptr(virDomainJobData) jobData = NULL;
+    qemuDomainJobDataPrivate *priv;
+
+    jobData = virDomainJobDataInit(&qemuJobDataPrivateDataCallbacks);
+    priv = jobData->privateData;
+
+    if (qemuMigrationAnyFetchStats(driver, vm, asyncJob, jobData, NULL) < 0)
+        return -1;
+
+    qemuMigrationUpdateJobType(jobData);
+    VIR_DEBUG("QEMU reports domain '%s' is in '%s' migration state, translated as %d",
+              vm->def->name,
+              qemuMonitorMigrationStatusTypeToString(priv->stats.mig.status),
+              jobData->status);
+
+    *status = jobData->status;
+    return 0;
+}
+
+
 /* The caller is supposed to lock the vm and start a migration job. */
 static char *
 qemuMigrationSrcBeginPhase(virQEMUDriver *driver,
diff --git a/src/qemu/qemu_migration.h b/src/qemu/qemu_migration.h
index eeb69a52bf..9351d6ac51 100644
--- a/src/qemu/qemu_migration.h
+++ b/src/qemu/qemu_migration.h
@@ -279,3 +279,9 @@ qemuMigrationSrcFetchMirrorStats(virQEMUDriver *driver,
                                  virDomainObj *vm,
                                  virDomainAsyncJob asyncJob,
                                  virDomainJobData *jobData);
+
+int
+qemuMigrationAnyRefreshStatus(virQEMUDriver *driver,
+                              virDomainObj *vm,
+                              virDomainAsyncJob asyncJob,
+                              virDomainJobStatus *status);
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index 97d84893be..6dd643a38b 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -3532,10 +3532,8 @@ qemuProcessRecoverMigrationIn(virQEMUDriver *driver,
         /* migration finished, we started resuming the domain but didn't
          * confirm success or failure yet; killing it seems safest unless
          * we already started guest CPUs or we were in post-copy mode */
-        if (virDomainObjIsPostcopy(vm, VIR_DOMAIN_JOB_OPERATION_MIGRATION_IN)) {
-            qemuMigrationDstPostcopyFailed(vm);
+        if (virDomainObjIsPostcopy(vm, VIR_DOMAIN_JOB_OPERATION_MIGRATION_IN))
             return 1;
-        }
 
         if (state != VIR_DOMAIN_RUNNING) {
             VIR_DEBUG("Killing migrated domain %s", vm->def->name);
@@ -3602,10 +3600,8 @@ qemuProcessRecoverMigrationOut(virQEMUDriver *driver,
          * of Finish3 step; third party needs to check what to do next; in
          * post-copy mode we can use PAUSED_POSTCOPY_FAILED state for this
          */
-        if (postcopy) {
-            qemuMigrationSrcPostcopyFailed(vm);
+        if (postcopy)
             return 1;
-        }
         break;
 
     case QEMU_MIGRATION_PHASE_CONFIRM3_CANCELLED:
@@ -3613,10 +3609,8 @@ qemuProcessRecoverMigrationOut(virQEMUDriver *driver,
          * post-copy mode there's no way back, so let's just mark the domain
          * as broken in that case
          */
-        if (postcopy) {
-            qemuMigrationSrcPostcopyFailed(vm);
+        if (postcopy)
             return 1;
-        }
 
         VIR_DEBUG("Resuming domain %s after failed migration",
                   vm->def->name);
@@ -3654,6 +3648,7 @@ qemuProcessRecoverMigration(virQEMUDriver *driver,
                             qemuDomainJobObj *job,
                             unsigned int *stopFlags)
 {
+    virDomainJobStatus migStatus = VIR_DOMAIN_JOB_STATUS_NONE;
     qemuDomainJobPrivate *jobPriv = job->privateData;
     virDomainState state;
     int reason;
@@ -3661,6 +3656,8 @@ qemuProcessRecoverMigration(virQEMUDriver *driver,
 
     state = virDomainObjGetState(vm, &reason);
 
+    qemuMigrationAnyRefreshStatus(driver, vm, VIR_ASYNC_JOB_NONE, &migStatus);
+
     if (job->asyncJob == VIR_ASYNC_JOB_MIGRATION_OUT) {
         rc = qemuProcessRecoverMigrationOut(driver, vm, job,
                                             state, reason, stopFlags);
@@ -3672,7 +3669,29 @@ qemuProcessRecoverMigration(virQEMUDriver *driver,
         return -1;
 
     if (rc > 0) {
-        qemuProcessRestoreMigrationJob(vm, job);
+        if (migStatus == VIR_DOMAIN_JOB_STATUS_POSTCOPY) {
+            VIR_DEBUG("Post-copy migration of domain %s still running, it will be handled as unattended",
+                      vm->def->name);
+            qemuProcessRestoreMigrationJob(vm, job);
+            return 0;
+        }
+
+        if (migStatus != VIR_DOMAIN_JOB_STATUS_HYPERVISOR_COMPLETED) {
+            if (job->asyncJob == VIR_ASYNC_JOB_MIGRATION_OUT)
+                qemuMigrationSrcPostcopyFailed(vm);
+            else
+                qemuMigrationDstPostcopyFailed(vm);
+
+            qemuProcessRestoreMigrationJob(vm, job);
+            return 0;
+        }
+
+        VIR_DEBUG("Post-copy migration of domain %s already finished",
+                  vm->def->name);
+        if (job->asyncJob == VIR_ASYNC_JOB_MIGRATION_OUT)
+            qemuMigrationSrcComplete(driver, vm, VIR_ASYNC_JOB_NONE);
+        else
+            qemuMigrationDstComplete(driver, vm, true, VIR_ASYNC_JOB_NONE, job);
         return 0;
     }
 
-- 
2.35.1



More information about the libvir-list mailing list