[libvirt] [PATCHv4] qemu: allow blkstat/blkinfo calls during migration

Eric Blake eblake at redhat.com
Mon May 16 17:30:15 UTC 2011


From: Federico Simoncelli <fsimonce at redhat.com>

Originally most of libvirt domain-specific calls were blocking
during a migration.
A new mechanism to allow specific calls (blkstat/blkinfo) to be
executed in such condition has been implemented.
In the long term it'd be desirable to get a more general
solution to mark further APIs as migration safe, without needing
special case code.

 * src/qemu/qemu_migration.c: add some additional job signal
   flags for doing blkstat/blkinfo during a migration
 * src/qemu/qemu_domain.c: add a condition variable that can be
   used to efficiently wait for the migration code to clear the
   signal flag
 * src/qemu/qemu_driver.c: execute blkstat/blkinfo using the
   job signal flags during migration
---

My changes in v4:
Fix the mailmap (elided from this mail) for Federico's preferred address.
Add cleanup for jobCond.
Defer changes to jobSignals until after jobSignalData is stable.
Alter qemuMigrationProcessJobSignals to have a mode of operation
that guarantees that a jobSignals bit is cleared, regardless of
other errors.

 .mailmap                  |    1 +
 AUTHORS                   |    2 +-
 src/qemu/qemu_domain.c    |   14 ++++++
 src/qemu/qemu_domain.h    |    9 ++++
 src/qemu/qemu_driver.c    |  107 +++++++++++++++++++++++++++++++-------------
 src/qemu/qemu_migration.c |   76 +++++++++++++++++++++++++-------
 6 files changed, 159 insertions(+), 50 deletions(-)

diff --git a/.mailmap b/.mailmap
index f73e26b..2b98322 100644
--- a/.mailmap
+++ b/.mailmap
diff --git a/AUTHORS b/AUTHORS
index 1bb1f0f..47860a9 100644
--- a/AUTHORS
+++ b/AUTHORS
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
index bcacb18..8f4915c 100644
--- a/src/qemu/qemu_domain.c
+++ b/src/qemu/qemu_domain.c
@@ -89,7 +89,19 @@ static void *qemuDomainObjPrivateAlloc(void)
     if (VIR_ALLOC(priv) < 0)
         return NULL;

+    if (virCondInit(&priv->jobCond) < 0)
+        goto initfail;
+
+    if (virCondInit(&priv->signalCond) < 0) {
+        ignore_value(virCondDestroy(&priv->jobCond));
+        goto initfail;
+    }
+
     return priv;
+
+initfail:
+    VIR_FREE(priv);
+    return NULL;
 }

 static void qemuDomainObjPrivateFree(void *data)
@@ -101,6 +113,8 @@ static void qemuDomainObjPrivateFree(void *data)
     qemuDomainPCIAddressSetFree(priv->pciaddrs);
     virDomainChrSourceDefFree(priv->monConfig);
     VIR_FREE(priv->vcpupids);
+    ignore_value(virCondDestroy(&priv->jobCond));
+    ignore_value(virCondDestroy(&priv->signalCond));

     /* This should never be non-NULL if we get here, but just in case... */
     if (priv->mon) {
diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h
index 6d24f53..af513e7 100644
--- a/src/qemu/qemu_domain.h
+++ b/src/qemu/qemu_domain.h
@@ -47,11 +47,19 @@ enum qemuDomainJobSignals {
     QEMU_JOB_SIGNAL_SUSPEND = 1 << 1, /* Request VM suspend to finish live migration offline */
     QEMU_JOB_SIGNAL_MIGRATE_DOWNTIME = 1 << 2, /* Request migration downtime change */
     QEMU_JOB_SIGNAL_MIGRATE_SPEED = 1 << 3, /* Request migration speed change */
+    QEMU_JOB_SIGNAL_BLKSTAT = 1 << 4, /* Request blkstat during migration */
+    QEMU_JOB_SIGNAL_BLKINFO = 1 << 5, /* Request blkinfo during migration */
 };

 struct qemuDomainJobSignalsData {
     unsigned long long migrateDowntime; /* Data for QEMU_JOB_SIGNAL_MIGRATE_DOWNTIME */
     unsigned long migrateBandwidth; /* Data for QEMU_JOB_SIGNAL_MIGRATE_SPEED */
+    char *statDevName; /* Device name used by blkstat calls */
+    virDomainBlockStatsPtr blockStat; /* Block statistics for QEMU_JOB_SIGNAL_BLKSTAT */
+    int statRetCode; /* Return code for the blkstat calls */
+    char *infoDevName; /* Device name used by blkinfo calls */
+    virDomainBlockInfoPtr blockInfo; /* Block information for QEMU_JOB_SIGNAL_BLKINFO */
+    int infoRetCode; /* Return code for the blkinfo calls */
 };

 typedef struct _qemuDomainPCIAddressSet qemuDomainPCIAddressSet;
@@ -61,6 +69,7 @@ typedef struct _qemuDomainObjPrivate qemuDomainObjPrivate;
 typedef qemuDomainObjPrivate *qemuDomainObjPrivatePtr;
 struct _qemuDomainObjPrivate {
     virCond jobCond; /* Use in conjunction with main virDomainObjPtr lock */
+    virCond signalCond; /* Use to coordinate the safe queries during migration */
     enum qemuDomainJob jobActive;   /* Currently running job */
     unsigned int jobSignals;        /* Signals for running job */
     struct qemuDomainJobSignalsData jobSignalsData; /* Signal specific data */
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index ccaae66..2bd4d0b 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -5205,15 +5205,6 @@ qemudDomainBlockStats (virDomainPtr dom,
         goto cleanup;
     }

-    if (qemuDomainObjBeginJob(vm) < 0)
-        goto cleanup;
-
-    if (!virDomainObjIsActive(vm)) {
-        qemuReportError(VIR_ERR_OPERATION_INVALID,
-                        "%s", _("domain is not running"));
-        goto endjob;
-    }
-
     for (i = 0 ; i < vm->def->ndisks ; i++) {
         if (STREQ(path, vm->def->disks[i]->dst)) {
             disk = vm->def->disks[i];
@@ -5224,29 +5215,57 @@ qemudDomainBlockStats (virDomainPtr dom,
     if (!disk) {
         qemuReportError(VIR_ERR_INVALID_ARG,
                         _("invalid path: %s"), path);
-        goto endjob;
+        goto cleanup;
     }

     if (!disk->info.alias) {
         qemuReportError(VIR_ERR_INTERNAL_ERROR,
                         _("missing disk device alias name for %s"), disk->dst);
-        goto endjob;
+        goto cleanup;
     }

     priv = vm->privateData;
-    qemuDomainObjEnterMonitor(vm);
-    ret = qemuMonitorGetBlockStatsInfo(priv->mon,
-                                       disk->info.alias,
-                                       &stats->rd_req,
-                                       &stats->rd_bytes,
-                                       &stats->wr_req,
-                                       &stats->wr_bytes,
-                                       &stats->errs);
-    qemuDomainObjExitMonitor(vm);
+    if ((priv->jobActive == QEMU_JOB_MIGRATION_OUT)
+        || (priv->jobActive == QEMU_JOB_SAVE)) {
+        virDomainObjRef(vm);
+        while (priv->jobSignals & QEMU_JOB_SIGNAL_BLKSTAT)
+            ignore_value(virCondWait(&priv->signalCond, &vm->lock));
+
+        priv->jobSignalsData.statDevName = disk->info.alias;
+        priv->jobSignalsData.blockStat = stats;
+        priv->jobSignalsData.statRetCode = -1;
+        priv->jobSignals |= QEMU_JOB_SIGNAL_BLKSTAT;
+
+        while (priv->jobSignals & QEMU_JOB_SIGNAL_BLKSTAT)
+            ignore_value(virCondWait(&priv->signalCond, &vm->lock));
+
+        ret = priv->jobSignalsData.statRetCode;
+        if (virDomainObjUnref(vm) == 0)
+            vm = NULL;
+    } else {
+        if (qemuDomainObjBeginJob(vm) < 0)
+            goto cleanup;
+
+        if (!virDomainObjIsActive(vm)) {
+            qemuReportError(VIR_ERR_OPERATION_INVALID,
+                            "%s", _("domain is not running"));
+            goto endjob;
+        }
+
+        qemuDomainObjEnterMonitor(vm);
+        ret = qemuMonitorGetBlockStatsInfo(priv->mon,
+                                           disk->info.alias,
+                                           &stats->rd_req,
+                                           &stats->rd_bytes,
+                                           &stats->wr_req,
+                                           &stats->wr_bytes,
+                                           &stats->errs);
+        qemuDomainObjExitMonitor(vm);

 endjob:
-    if (qemuDomainObjEndJob(vm) == 0)
-        vm = NULL;
+        if (qemuDomainObjEndJob(vm) == 0)
+            vm = NULL;
+    }

 cleanup:
     if (vm)
@@ -5650,20 +5669,44 @@ static int qemuDomainGetBlockInfo(virDomainPtr dom,
         format != VIR_STORAGE_FILE_RAW &&
         S_ISBLK(sb.st_mode)) {
         qemuDomainObjPrivatePtr priv = vm->privateData;
-        if (qemuDomainObjBeginJob(vm) < 0)
-            goto cleanup;
-        if (!virDomainObjIsActive(vm))
-            ret = 0;
-        else {
+
+        if ((priv->jobActive == QEMU_JOB_MIGRATION_OUT)
+            || (priv->jobActive == QEMU_JOB_SAVE)) {
+            virDomainObjRef(vm);
+            while (priv->jobSignals & QEMU_JOB_SIGNAL_BLKINFO)
+                ignore_value(virCondWait(&priv->signalCond, &vm->lock));
+
+            priv->jobSignalsData.infoDevName = disk->info.alias;
+            priv->jobSignalsData.blockInfo = info;
+            priv->jobSignalsData.infoRetCode = -1;
+            priv->jobSignals |= QEMU_JOB_SIGNAL_BLKINFO;
+
+            while (priv->jobSignals & QEMU_JOB_SIGNAL_BLKINFO)
+                ignore_value(virCondWait(&priv->signalCond, &vm->lock));
+
+            ret = priv->jobSignalsData.infoRetCode;
+            if (virDomainObjUnref(vm) == 0)
+                vm = NULL;
+        } else {
+            if (qemuDomainObjBeginJob(vm) < 0)
+                goto cleanup;
+
+            if (!virDomainObjIsActive(vm)) {
+                qemuReportError(VIR_ERR_OPERATION_INVALID,
+                                "%s", _("domain is not running"));
+                goto endjob;
+            }
+
             qemuDomainObjEnterMonitor(vm);
             ret = qemuMonitorGetBlockExtent(priv->mon,
                                             disk->info.alias,
                                             &info->allocation);
             qemuDomainObjExitMonitor(vm);
-        }

-        if (qemuDomainObjEndJob(vm) == 0)
-            vm = NULL;
+endjob:
+            if (qemuDomainObjEndJob(vm) == 0)
+                vm = NULL;
+        }
     } else {
         ret = 0;
     }
@@ -6543,8 +6586,8 @@ qemuDomainMigrateSetMaxDowntime(virDomainPtr dom,
     }

     VIR_DEBUG("Requesting migration downtime change to %llums", downtime);
-    priv->jobSignals |= QEMU_JOB_SIGNAL_MIGRATE_DOWNTIME;
     priv->jobSignalsData.migrateDowntime = downtime;
+    priv->jobSignals |= QEMU_JOB_SIGNAL_MIGRATE_DOWNTIME;
     ret = 0;

 cleanup:
@@ -6592,8 +6635,8 @@ qemuDomainMigrateSetMaxSpeed(virDomainPtr dom,
     }

     VIR_DEBUG("Requesting migration speed change to %luMbs", bandwidth);
-    priv->jobSignals |= QEMU_JOB_SIGNAL_MIGRATE_SPEED;
     priv->jobSignalsData.migrateBandwidth = bandwidth;
+    priv->jobSignals |= QEMU_JOB_SIGNAL_MIGRATE_SPEED;
     ret = 0;

 cleanup:
diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c
index cb408ac..a091e2a 100644
--- a/src/qemu/qemu_migration.c
+++ b/src/qemu/qemu_migration.c
@@ -586,7 +586,8 @@ qemuMigrationSetOffline(struct qemud_driver *driver,
 static int
 qemuMigrationProcessJobSignals(struct qemud_driver *driver,
                                virDomainObjPtr vm,
-                               const char *job)
+                               const char *job,
+                               bool cleanup)
 {
     qemuDomainObjPrivatePtr priv = vm->privateData;
     int ret = -1;
@@ -594,6 +595,11 @@ qemuMigrationProcessJobSignals(struct qemud_driver *driver,
     if (!virDomainObjIsActive(vm)) {
         qemuReportError(VIR_ERR_INTERNAL_ERROR, _("%s: %s"),
                         job, _("guest unexpectedly quit"));
+        if (cleanup) {
+            priv->jobSignals = 0;
+            priv->jobSignalsData.statRetCode = -1;
+            priv->jobSignalsData.infoRetCode = -1;
+        }
         return -1;
     }

@@ -633,6 +639,34 @@ qemuMigrationProcessJobSignals(struct qemud_driver *driver,
         qemuDomainObjExitMonitorWithDriver(driver, vm);
         if (ret < 0)
             VIR_WARN("Unable to set migration speed");
+    } else if (priv->jobSignals & QEMU_JOB_SIGNAL_BLKSTAT) {
+        qemuDomainObjEnterMonitorWithDriver(driver, vm);
+        ret = qemuMonitorGetBlockStatsInfo(priv->mon,
+                              priv->jobSignalsData.statDevName,
+                              &priv->jobSignalsData.blockStat->rd_req,
+                              &priv->jobSignalsData.blockStat->rd_bytes,
+                              &priv->jobSignalsData.blockStat->wr_req,
+                              &priv->jobSignalsData.blockStat->wr_bytes,
+                              &priv->jobSignalsData.blockStat->errs);
+        qemuDomainObjExitMonitorWithDriver(driver, vm);
+
+        priv->jobSignalsData.statRetCode = ret;
+        priv->jobSignals ^= QEMU_JOB_SIGNAL_BLKSTAT;
+
+        if (ret < 0)
+            VIR_WARN("Unable to get block statistics");
+    } else if (priv->jobSignals & QEMU_JOB_SIGNAL_BLKINFO) {
+        qemuDomainObjEnterMonitorWithDriver(driver, vm);
+        ret = qemuMonitorGetBlockExtent(priv->mon,
+                           priv->jobSignalsData.infoDevName,
+                           &priv->jobSignalsData.blockInfo->allocation);
+        qemuDomainObjExitMonitorWithDriver(driver, vm);
+
+        priv->jobSignalsData.infoRetCode = ret;
+        priv->jobSignals ^= QEMU_JOB_SIGNAL_BLKINFO;
+
+        if (ret < 0)
+            VIR_WARN("Unable to get block information");
     } else {
         ret = 0;
     }
@@ -726,30 +760,33 @@ int
 qemuMigrationWaitForCompletion(struct qemud_driver *driver, virDomainObjPtr vm)
 {
     qemuDomainObjPrivatePtr priv = vm->privateData;
+    const char *job;
+
+    switch (priv->jobActive) {
+    case QEMU_JOB_MIGRATION_OUT:
+        job = _("migration job");
+        break;
+    case QEMU_JOB_SAVE:
+        job = _("domain save job");
+        break;
+    case QEMU_JOB_DUMP:
+        job = _("domain core dump job");
+        break;
+    default:
+        job = _("job");
+    }

     priv->jobInfo.type = VIR_DOMAIN_JOB_UNBOUNDED;

     while (priv->jobInfo.type == VIR_DOMAIN_JOB_UNBOUNDED) {
         /* Poll every 50ms for progress & to allow cancellation */
         struct timespec ts = { .tv_sec = 0, .tv_nsec = 50 * 1000 * 1000ull };
-        const char *job;
-
-        switch (priv->jobActive) {
-            case QEMU_JOB_MIGRATION_OUT:
-                job = _("migration job");
-                break;
-            case QEMU_JOB_SAVE:
-                job = _("domain save job");
-                break;
-            case QEMU_JOB_DUMP:
-                job = _("domain core dump job");
-                break;
-            default:
-                job = _("job");
+        while (priv->jobSignals) {
+            if (qemuMigrationProcessJobSignals(driver, vm, job, false) < 0)
+                goto cleanup;
         }

-        if (qemuMigrationProcessJobSignals(driver, vm, job) < 0)
-            goto cleanup;
+        virCondSignal(&priv->signalCond);

         if (qemuMigrationUpdateJobStatus(driver, vm, job) < 0)
             goto cleanup;
@@ -765,6 +802,11 @@ qemuMigrationWaitForCompletion(struct qemud_driver *driver, virDomainObjPtr vm)
     }

 cleanup:
+    while (priv->jobSignals) {
+        qemuMigrationProcessJobSignals(driver, vm, job, true);
+    }
+    virCondBroadcast(&priv->signalCond);
+
     if (priv->jobInfo.type == VIR_DOMAIN_JOB_COMPLETED)
         return 0;
     else
-- 
1.7.4.4




More information about the libvir-list mailing list