[libvirt] [PATCH] qemu: fix crash when mixing sync and async monitor jobs

Eric Blake eblake at redhat.com
Thu Jul 28 23:47:09 UTC 2011


Currently, we attempt to run sync job and async job at the same time. It
means that the monitor commands for two jobs can be run in any order.

In the function qemuDomainObjEnterMonitorInternal():
    if (priv->job.active == QEMU_JOB_NONE && priv->job.asyncJob) {
        if (qemuDomainObjBeginNestedJob(driver, obj) < 0)
We check whether the caller is an async job by priv->job.active and
priv->job.asynJob. But when an async job is running, and a sync job is
also running at the time of the check, then priv->job.active is not
QEMU_JOB_NONE. So we cannot check whether the caller is an async job
in the function qemuDomainObjEnterMonitorInternal(), and must instead
put the burden on the caller to tell us when an async command wants
to do a nested job.

* src/qemu/THREADS.txt: Reflect new rules.
* src/qemu/qemu_domain.h (qemuDomainObjEnterMonitorAsync): New
prototype.
* src/qemu/qemu_process.h (qemuProcessStartCPUs)
(qemuProcessStopCPUs): Add parameter.
* src/qemu/qemu_migration.h (qemuMigrationToFile): Likewise.
(qemuMigrationWaitForCompletion): Make static.
* src/qemu/qemu_domain.c (qemuDomainObjEnterMonitorInternal): Add
parameter.
(qemuDomainObjEnterMonitorAsync): New function.
(qemuDomainObjEnterMonitor, qemuDomainObjEnterMonitorWithDriver):
Update callers.
* src/qemu/qemu_driver.c (qemuDomainSaveInternal)
(qemudDomainCoreDump, doCoreDump, processWatchdogEvent)
(qemudDomainSuspend, qemudDomainResume, qemuDomainSaveImageStartVM)
(qemuDomainSnapshotCreateActive, qemuDomainRevertToSnapshot):
Likewise.
* src/qemu/qemu_process.c (qemuProcessStopCPUs)
(qemuProcessFakeReboot, qemuProcessRecoverMigration)
(qemuProcessRecoverJob, qemuProcessStart): Likewise.
* src/qemu/qemu_migration.c (qemuMigrationToFile)
(qemuMigrationWaitForCompletion, qemuMigrationUpdateJobStatus)
(qemuMigrationJobStart, qemuDomainMigrateGraphicsRelocate)
(doNativeMigrate, doTunnelMigrate, qemuMigrationPerformJob)
(qemuMigrationPerformPhase, qemuMigrationFinish)
(qemuMigrationConfirm): Likewise.
---

My initial smoke testing shows that this fixes 'virsh managedsave',
but I still have more testing to do before I'm convinced I got
everything (for example, I need to test migration still).

 src/qemu/THREADS.txt      |    8 ++++--
 src/qemu/qemu_domain.c    |   43 +++++++++++++++++++++++++++-------
 src/qemu/qemu_domain.h    |    4 +++
 src/qemu/qemu_driver.c    |   39 +++++++++++++++++++++----------
 src/qemu/qemu_migration.c |   55 ++++++++++++++++++++++++++++----------------
 src/qemu/qemu_migration.h |    5 +--
 src/qemu/qemu_process.c   |   32 ++++++++++++++++++--------
 src/qemu/qemu_process.h   |    7 ++++-
 8 files changed, 133 insertions(+), 60 deletions(-)

diff --git a/src/qemu/THREADS.txt b/src/qemu/THREADS.txt
index e73076c..125bd5d 100644
--- a/src/qemu/THREADS.txt
+++ b/src/qemu/THREADS.txt
@@ -374,7 +374,7 @@ Design patterns
      qemuDriverUnlock(driver);


- * Running asynchronous job
+ * Running asynchronous job with driver lock held

      virDomainObjPtr obj;
      qemuDomainObjPrivatePtr priv;
@@ -387,7 +387,8 @@ Design patterns

      ...do prep work...

-     if (qemuDomainObjEnterMonitorWithDriver(driver, obj) < 0) {
+     if (qemuDomainObjEnterMonitorAsync(driver, obj,
+                                        QEMU_ASYNC_JOB_TYPE) < 0) {
          /* domain died in the meantime */
          goto error;
      }
@@ -395,7 +396,8 @@ Design patterns
      qemuDomainObjExitMonitorWithDriver(driver, obj);

      while (!finished) {
-         if (qemuDomainObjEnterMonitorWithDriver(driver, obj) < 0) {
+         if (qemuDomainObjEnterMonitorAsync(driver, true, obj,
+                                            QEMU_ASYNC_JOB_TYPE) < 0) {
              /* domain died in the meantime */
              goto error;
          }
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
index 2eaaf3a..4cf6888 100644
--- a/src/qemu/qemu_domain.c
+++ b/src/qemu/qemu_domain.c
@@ -863,14 +863,20 @@ qemuDomainObjEndAsyncJob(struct qemud_driver *driver, virDomainObjPtr obj)
     return virDomainObjUnref(obj);
 }

-static int ATTRIBUTE_NONNULL(1)
+static int
 qemuDomainObjEnterMonitorInternal(struct qemud_driver *driver,
                                   bool driver_locked,
-                                  virDomainObjPtr obj)
+                                  virDomainObjPtr obj,
+                                  enum qemuDomainAsyncJob asyncJob)
 {
     qemuDomainObjPrivatePtr priv = obj->privateData;

-    if (priv->job.active == QEMU_JOB_NONE && priv->job.asyncJob) {
+    if (asyncJob != QEMU_ASYNC_JOB_NONE) {
+        if (asyncJob != priv->job.asyncJob) {
+            qemuReportError(VIR_ERR_INTERNAL_ERROR,
+                            _("unepxected async job %d"), asyncJob);
+            return -1;
+        }
         if (qemuDomainObjBeginJobInternal(driver, driver_locked, obj,
                                           QEMU_JOB_ASYNC_NESTED,
                                           QEMU_ASYNC_JOB_NONE) < 0)
@@ -930,15 +936,15 @@ qemuDomainObjExitMonitorInternal(struct qemud_driver *driver,
  *
  * To be called immediately before any QEMU monitor API call
  * Must have already either called qemuDomainObjBeginJob() and checked
- * that the VM is still active or called qemuDomainObjBeginAsyncJob, in which
- * case this will start a nested job.
+ * that the VM is still active; may not be used for nested async jobs.
  *
  * To be followed with qemuDomainObjExitMonitor() once complete
  */
 int qemuDomainObjEnterMonitor(struct qemud_driver *driver,
                               virDomainObjPtr obj)
 {
-    return qemuDomainObjEnterMonitorInternal(driver, false, obj);
+    return qemuDomainObjEnterMonitorInternal(driver, false, obj,
+                                             QEMU_ASYNC_JOB_NONE);
 }

 /* obj must NOT be locked before calling, qemud_driver must be unlocked
@@ -956,15 +962,34 @@ void qemuDomainObjExitMonitor(struct qemud_driver *driver,
  *
  * To be called immediately before any QEMU monitor API call
  * Must have already either called qemuDomainObjBeginJobWithDriver() and
- * checked that the VM is still active or called qemuDomainObjBeginAsyncJob,
- * in which case this will start a nested job.
+ * checked that the VM is still active; may not be used for nested async jobs.
  *
  * To be followed with qemuDomainObjExitMonitorWithDriver() once complete
  */
 int qemuDomainObjEnterMonitorWithDriver(struct qemud_driver *driver,
                                         virDomainObjPtr obj)
 {
-    return qemuDomainObjEnterMonitorInternal(driver, true, obj);
+    return qemuDomainObjEnterMonitorInternal(driver, true, obj,
+                                             QEMU_ASYNC_JOB_NONE);
+}
+
+/*
+ * obj and qemud_driver must be locked before calling
+ *
+ * To be called immediately before any QEMU monitor API call.
+ * Must have already either called qemuDomainObjBeginJob[WithDriver]()
+ * and checked that the VM is still active, with asyncJob of
+ * QEMU_ASYNC_JOB_NONE; or already called qemuDomainObjBeginAsyncJob,
+ * with the same asyncJob.
+ *
+ * To be followed with qemuDomainObjExitMonitorWithDriver() once complete
+ */
+int
+qemuDomainObjEnterMonitorAsync(struct qemud_driver *driver,
+                               virDomainObjPtr obj,
+                               enum qemuDomainAsyncJob asyncJob)
+{
+    return qemuDomainObjEnterMonitorInternal(driver, true, obj, asyncJob);
 }

 /* obj must NOT be locked before calling, qemud_driver must be unlocked,
diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h
index 8bff8b0..41e1c72 100644
--- a/src/qemu/qemu_domain.h
+++ b/src/qemu/qemu_domain.h
@@ -176,6 +176,10 @@ void qemuDomainObjExitMonitor(struct qemud_driver *driver,
 int qemuDomainObjEnterMonitorWithDriver(struct qemud_driver *driver,
                                         virDomainObjPtr obj)
     ATTRIBUTE_RETURN_CHECK;
+int qemuDomainObjEnterMonitorAsync(struct qemud_driver *driver,
+                                   virDomainObjPtr obj,
+                                   enum qemuDomainAsyncJob asyncJob)
+    ATTRIBUTE_NONNULL(1) ATTRIBUTE_RETURN_CHECK;
 void qemuDomainObjExitMonitorWithDriver(struct qemud_driver *driver,
                                         virDomainObjPtr obj);
 void qemuDomainObjEnterRemoteWithDriver(struct qemud_driver *driver,
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index b673fd5..dcbedb9 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -1375,7 +1375,7 @@ static int qemudDomainSuspend(virDomainPtr dom) {
         goto endjob;
     }
     if (virDomainObjGetState(vm, NULL) != VIR_DOMAIN_PAUSED) {
-        if (qemuProcessStopCPUs(driver, vm, reason) < 0) {
+        if (qemuProcessStopCPUs(driver, vm, reason, QEMU_ASYNC_JOB_NONE) < 0) {
             goto endjob;
         }
         event = virDomainEventNewFromObj(vm,
@@ -1428,7 +1428,8 @@ static int qemudDomainResume(virDomainPtr dom) {
     }
     if (virDomainObjGetState(vm, NULL) == VIR_DOMAIN_PAUSED) {
         if (qemuProcessStartCPUs(driver, vm, dom->conn,
-                                 VIR_DOMAIN_RUNNING_UNPAUSED) < 0) {
+                                 VIR_DOMAIN_RUNNING_UNPAUSED,
+                                 QEMU_ASYNC_JOB_NONE) < 0) {
             if (virGetLastError() == NULL)
                 qemuReportError(VIR_ERR_OPERATION_FAILED,
                                 "%s", _("resume operation failed"));
@@ -2232,7 +2233,8 @@ qemuDomainSaveInternal(struct qemud_driver *driver, virDomainPtr dom,
     /* Pause */
     if (virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING) {
         header.was_running = 1;
-        if (qemuProcessStopCPUs(driver, vm, VIR_DOMAIN_PAUSED_SAVE) < 0)
+        if (qemuProcessStopCPUs(driver, vm, VIR_DOMAIN_PAUSED_SAVE,
+                                QEMU_ASYNC_JOB_SAVE) < 0)
             goto endjob;

         if (!virDomainObjIsActive(vm)) {
@@ -2404,7 +2406,8 @@ qemuDomainSaveInternal(struct qemud_driver *driver, virDomainPtr dom,
     /* Perform the migration */
     if (qemuMigrationToFile(driver, vm, fd, offset, path,
                             qemuCompressProgramName(compressed),
-                            is_reg, bypassSecurityDriver) < 0)
+                            is_reg, bypassSecurityDriver,
+                            QEMU_ASYNC_JOB_SAVE) < 0)
         goto endjob;
     if (VIR_CLOSE(fd) < 0) {
         virReportSystemError(errno, _("unable to close %s"), path);
@@ -2433,7 +2436,8 @@ endjob:
         if (ret != 0) {
             if (header.was_running && virDomainObjIsActive(vm)) {
                 rc = qemuProcessStartCPUs(driver, vm, dom->conn,
-                                          VIR_DOMAIN_RUNNING_SAVE_CANCELED);
+                                          VIR_DOMAIN_RUNNING_SAVE_CANCELED,
+                                          QEMU_ASYNC_JOB_SAVE);
                 if (rc < 0)
                     VIR_WARN("Unable to resume guest CPUs after save failure");
             }
@@ -2696,7 +2700,8 @@ doCoreDump(struct qemud_driver *driver,
         goto cleanup;

     if (qemuMigrationToFile(driver, vm, fd, 0, path,
-                            qemuCompressProgramName(compress), true, false) < 0)
+                            qemuCompressProgramName(compress), true, false,
+                            QEMU_ASYNC_JOB_DUMP) < 0)
         goto cleanup;

     if (VIR_CLOSE(fd) < 0) {
@@ -2787,7 +2792,8 @@ static int qemudDomainCoreDump(virDomainPtr dom,
     /* Pause domain for non-live dump */
     if (!(flags & VIR_DUMP_LIVE) &&
         virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING) {
-        if (qemuProcessStopCPUs(driver, vm, VIR_DOMAIN_PAUSED_DUMP) < 0)
+        if (qemuProcessStopCPUs(driver, vm, VIR_DOMAIN_PAUSED_DUMP,
+                                QEMU_ASYNC_JOB_DUMP) < 0)
             goto endjob;
         paused = 1;

@@ -2819,7 +2825,8 @@ endjob:
        the migration is complete.  */
     else if (resume && paused && virDomainObjIsActive(vm)) {
         if (qemuProcessStartCPUs(driver, vm, dom->conn,
-                                 VIR_DOMAIN_RUNNING_UNPAUSED) < 0) {
+                                 VIR_DOMAIN_RUNNING_UNPAUSED,
+                                 QEMU_ASYNC_JOB_DUMP) < 0) {
             if (virGetLastError() == NULL)
                 qemuReportError(VIR_ERR_OPERATION_FAILED,
                                 "%s", _("resuming after dump failed"));
@@ -2978,7 +2985,8 @@ static void processWatchdogEvent(void *data, void *opaque)
                                 "%s", _("Dump failed"));

             ret = qemuProcessStartCPUs(driver, wdEvent->vm, NULL,
-                                       VIR_DOMAIN_RUNNING_UNPAUSED);
+                                       VIR_DOMAIN_RUNNING_UNPAUSED,
+                                       QEMU_ASYNC_JOB_DUMP);

             if (ret < 0)
                 qemuReportError(VIR_ERR_OPERATION_FAILED,
@@ -3934,7 +3942,8 @@ qemuDomainSaveImageStartVM(virConnectPtr conn,
     /* If it was running before, resume it now. */
     if (header->was_running) {
         if (qemuProcessStartCPUs(driver, vm, conn,
-                                 VIR_DOMAIN_RUNNING_RESTORED) < 0) {
+                                 VIR_DOMAIN_RUNNING_RESTORED,
+                                 QEMU_ASYNC_JOB_NONE) < 0) {
             if (virGetLastError() == NULL)
                 qemuReportError(VIR_ERR_OPERATION_FAILED,
                                 "%s", _("failed to resume domain"));
@@ -8395,7 +8404,8 @@ qemuDomainSnapshotCreateActive(virConnectPtr conn,
          * confuses libvirt since it's not notified when qemu resumes the
          * domain. Thus we stop and start CPUs ourselves.
          */
-        if (qemuProcessStopCPUs(driver, vm, VIR_DOMAIN_PAUSED_SAVE) < 0)
+        if (qemuProcessStopCPUs(driver, vm, VIR_DOMAIN_PAUSED_SAVE,
+                                QEMU_ASYNC_JOB_NONE) < 0)
             goto cleanup;

         resume = true;
@@ -8413,7 +8423,8 @@ qemuDomainSnapshotCreateActive(virConnectPtr conn,
 cleanup:
     if (resume && virDomainObjIsActive(vm) &&
         qemuProcessStartCPUs(driver, vm, conn,
-                             VIR_DOMAIN_RUNNING_UNPAUSED) < 0 &&
+                             VIR_DOMAIN_RUNNING_UNPAUSED,
+                             QEMU_ASYNC_JOB_NONE) < 0 &&
         virGetLastError() == NULL) {
         qemuReportError(VIR_ERR_OPERATION_FAILED, "%s",
                         _("resuming after snapshot failed"));
@@ -8762,9 +8773,11 @@ static int qemuDomainRevertToSnapshot(virDomainSnapshotPtr snapshot,
         if (snap->def->state == VIR_DOMAIN_PAUSED) {
             /* qemu unconditionally starts the domain running again after
              * loadvm, so let's pause it to keep consistency
+             * XXX we should have used qemuProcessStart's start_paused instead
              */
             rc = qemuProcessStopCPUs(driver, vm,
-                                     VIR_DOMAIN_PAUSED_FROM_SNAPSHOT);
+                                     VIR_DOMAIN_PAUSED_FROM_SNAPSHOT,
+                                     QEMU_ASYNC_JOB_NONE);
             if (rc < 0)
                 goto endjob;
         } else {
diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c
index 2843189..8354360 100644
--- a/src/qemu/qemu_migration.c
+++ b/src/qemu/qemu_migration.c
@@ -727,7 +727,8 @@ qemuMigrationSetOffline(struct qemud_driver *driver,
 {
     int ret;
     VIR_DEBUG("driver=%p vm=%p", driver, vm);
-    ret = qemuProcessStopCPUs(driver, vm, VIR_DOMAIN_PAUSED_MIGRATION);
+    ret = qemuProcessStopCPUs(driver, vm, VIR_DOMAIN_PAUSED_MIGRATION,
+                              QEMU_ASYNC_JOB_MIGRATION_OUT);
     if (ret == 0) {
         virDomainEventPtr event;

@@ -745,7 +746,8 @@ qemuMigrationSetOffline(struct qemud_driver *driver,
 static int
 qemuMigrationUpdateJobStatus(struct qemud_driver *driver,
                              virDomainObjPtr vm,
-                             const char *job)
+                             const char *job,
+                             enum qemuDomainAsyncJob asyncJob)
 {
     qemuDomainObjPrivatePtr priv = vm->privateData;
     int ret = -1;
@@ -760,7 +762,7 @@ qemuMigrationUpdateJobStatus(struct qemud_driver *driver,
         return -1;
     }

-    ret = qemuDomainObjEnterMonitorWithDriver(driver, vm);
+    ret = qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob);
     if (ret == 0) {
         ret = qemuMonitorGetMigrationStatus(priv->mon,
                                             &status,
@@ -817,8 +819,9 @@ qemuMigrationUpdateJobStatus(struct qemud_driver *driver,
 }


-int
-qemuMigrationWaitForCompletion(struct qemud_driver *driver, virDomainObjPtr vm)
+static int
+qemuMigrationWaitForCompletion(struct qemud_driver *driver, virDomainObjPtr vm,
+                               enum qemuDomainAsyncJob asyncJob)
 {
     qemuDomainObjPrivatePtr priv = vm->privateData;
     const char *job;
@@ -843,7 +846,7 @@ qemuMigrationWaitForCompletion(struct qemud_driver *driver, virDomainObjPtr vm)
         /* Poll every 50ms for progress & to allow cancellation */
         struct timespec ts = { .tv_sec = 0, .tv_nsec = 50 * 1000 * 1000ull };

-        if (qemuMigrationUpdateJobStatus(driver, vm, job) < 0)
+        if (qemuMigrationUpdateJobStatus(driver, vm, job, asyncJob) < 0)
             goto cleanup;

         virDomainObjUnlock(vm);
@@ -883,7 +886,8 @@ qemuDomainMigrateGraphicsRelocate(struct qemud_driver *driver,
     if (cookie->graphics->type != VIR_DOMAIN_GRAPHICS_TYPE_SPICE)
         return 0;

-    ret = qemuDomainObjEnterMonitorWithDriver(driver, vm);
+    ret = qemuDomainObjEnterMonitorAsync(driver, vm,
+                                         QEMU_ASYNC_JOB_MIGRATION_OUT);
     if (ret == 0) {
         ret = qemuMonitorGraphicsRelocate(priv->mon,
                                           cookie->graphics->type,
@@ -1330,7 +1334,8 @@ static int doNativeMigrate(struct qemud_driver *driver,
             goto cleanup;
     }

-    if (qemuDomainObjEnterMonitorWithDriver(driver, vm) < 0)
+    if (qemuDomainObjEnterMonitorAsync(driver, vm,
+                                       QEMU_ASYNC_JOB_MIGRATION_OUT) < 0)
         goto cleanup;

     if (resource > 0 &&
@@ -1352,7 +1357,8 @@ static int doNativeMigrate(struct qemud_driver *driver,
     }
     qemuDomainObjExitMonitorWithDriver(driver, vm);

-    if (qemuMigrationWaitForCompletion(driver, vm) < 0)
+    if (qemuMigrationWaitForCompletion(driver, vm,
+                                       QEMU_ASYNC_JOB_MIGRATION_OUT) < 0)
         goto cleanup;

     /* When migration completed, QEMU will have paused the
@@ -1591,7 +1597,8 @@ static int doTunnelMigrate(struct qemud_driver *driver,
             goto cleanup;
     }

-    if (qemuDomainObjEnterMonitorWithDriver(driver, vm) < 0)
+    if (qemuDomainObjEnterMonitorAsync(driver, vm,
+                                       QEMU_ASYNC_JOB_MIGRATION_OUT) < 0)
         goto cleanup;

     if (resource > 0 &&
@@ -1634,7 +1641,8 @@ static int doTunnelMigrate(struct qemud_driver *driver,
     /* it is also possible that the migrate didn't fail initially, but
      * rather failed later on.  Check the output of "info migrate"
      */
-    if (qemuDomainObjEnterMonitorWithDriver(driver, vm) < 0)
+    if (qemuDomainObjEnterMonitorAsync(driver, vm,
+                                       QEMU_ASYNC_JOB_MIGRATION_OUT) < 0)
         goto cancel;
     if (qemuMonitorGetMigrationStatus(priv->mon,
                                       &status,
@@ -1664,7 +1672,8 @@ static int doTunnelMigrate(struct qemud_driver *driver,
     if (!(iothread = qemuMigrationStartTunnel(st, client_sock)))
         goto cancel;

-    ret = qemuMigrationWaitForCompletion(driver, vm);
+    ret = qemuMigrationWaitForCompletion(driver, vm,
+                                         QEMU_ASYNC_JOB_MIGRATION_OUT);

     /* When migration completed, QEMU will have paused the
      * CPUs for us, but unless we're using the JSON monitor
@@ -1693,7 +1702,8 @@ cancel:
     if (ret != 0 && virDomainObjIsActive(vm)) {
         VIR_FORCE_CLOSE(client_sock);
         VIR_FORCE_CLOSE(qemu_sock);
-        if (qemuDomainObjEnterMonitorWithDriver(driver, vm) == 0) {
+        if (qemuDomainObjEnterMonitorAsync(driver, vm,
+                                           QEMU_ASYNC_JOB_MIGRATION_OUT) == 0) {
             qemuMonitorMigrateCancel(priv->mon);
             qemuDomainObjExitMonitorWithDriver(driver, vm);
         }
@@ -2201,7 +2211,8 @@ endjob:
     if (resume && virDomainObjGetState(vm, NULL) == VIR_DOMAIN_PAUSED) {
         /* we got here through some sort of failure; start the domain again */
         if (qemuProcessStartCPUs(driver, vm, conn,
-                                 VIR_DOMAIN_RUNNING_MIGRATION_CANCELED) < 0) {
+                                 VIR_DOMAIN_RUNNING_MIGRATION_CANCELED,
+                                 QEMU_ASYNC_JOB_MIGRATION_OUT) < 0) {
             /* Hm, we already know we are in error here.  We don't want to
              * overwrite the previous error, though, so we just throw something
              * to the logs and hope for the best
@@ -2274,7 +2285,8 @@ qemuMigrationPerformPhase(struct qemud_driver *driver,
         virDomainObjGetState(vm, NULL) == VIR_DOMAIN_PAUSED) {
         /* we got here through some sort of failure; start the domain again */
         if (qemuProcessStartCPUs(driver, vm, conn,
-                                 VIR_DOMAIN_RUNNING_MIGRATION_CANCELED) < 0) {
+                                 VIR_DOMAIN_RUNNING_MIGRATION_CANCELED,
+                                 QEMU_ASYNC_JOB_MIGRATION_OUT) < 0) {
             /* Hm, we already know we are in error here.  We don't want to
              * overwrite the previous error, though, so we just throw something
              * to the logs and hope for the best
@@ -2500,7 +2512,8 @@ qemuMigrationFinish(struct qemud_driver *driver,
              * older qemu's, but it also doesn't hurt anything there
              */
             if (qemuProcessStartCPUs(driver, vm, dconn,
-                                     VIR_DOMAIN_RUNNING_MIGRATED) < 0) {
+                                     VIR_DOMAIN_RUNNING_MIGRATED,
+                                     QEMU_ASYNC_JOB_MIGRATION_IN) < 0) {
                 if (virGetLastError() == NULL)
                     qemuReportError(VIR_ERR_INTERNAL_ERROR,
                                     "%s", _("resume operation failed"));
@@ -2626,7 +2639,8 @@ int qemuMigrationConfirm(struct qemud_driver *driver,
          * older qemu's, but it also doesn't hurt anything there
          */
         if (qemuProcessStartCPUs(driver, vm, conn,
-                                 VIR_DOMAIN_RUNNING_MIGRATED) < 0) {
+                                 VIR_DOMAIN_RUNNING_MIGRATED,
+                                 QEMU_ASYNC_JOB_MIGRATION_OUT) < 0) {
             if (virGetLastError() == NULL)
                 qemuReportError(VIR_ERR_INTERNAL_ERROR,
                                 "%s", _("resume operation failed"));
@@ -2657,7 +2671,8 @@ int
 qemuMigrationToFile(struct qemud_driver *driver, virDomainObjPtr vm,
                     int fd, off_t offset, const char *path,
                     const char *compressor,
-                    bool is_reg, bool bypassSecurityDriver)
+                    bool is_reg, bool bypassSecurityDriver,
+                    enum qemuDomainAsyncJob asyncJob)
 {
     qemuDomainObjPrivatePtr priv = vm->privateData;
     virCgroupPtr cgroup = NULL;
@@ -2709,7 +2724,7 @@ qemuMigrationToFile(struct qemud_driver *driver, virDomainObjPtr vm,
         restoreLabel = true;
     }

-    if (qemuDomainObjEnterMonitorWithDriver(driver, vm) < 0)
+    if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0)
         goto cleanup;

     if (!compressor) {
@@ -2763,7 +2778,7 @@ qemuMigrationToFile(struct qemud_driver *driver, virDomainObjPtr vm,
     if (rc < 0)
         goto cleanup;

-    rc = qemuMigrationWaitForCompletion(driver, vm);
+    rc = qemuMigrationWaitForCompletion(driver, vm, asyncJob);

     if (rc < 0)
         goto cleanup;
diff --git a/src/qemu/qemu_migration.h b/src/qemu/qemu_migration.h
index 9e88271..5c6921d 100644
--- a/src/qemu/qemu_migration.h
+++ b/src/qemu/qemu_migration.h
@@ -78,8 +78,6 @@ bool qemuMigrationIsAllowed(virDomainDefPtr def)
 int qemuMigrationSetOffline(struct qemud_driver *driver,
                             virDomainObjPtr vm);

-int qemuMigrationWaitForCompletion(struct qemud_driver *driver, virDomainObjPtr vm);
-
 char *qemuMigrationBegin(struct qemud_driver *driver,
                          virDomainObjPtr vm,
                          const char *xmlin,
@@ -145,7 +143,8 @@ int qemuMigrationConfirm(struct qemud_driver *driver,
 int qemuMigrationToFile(struct qemud_driver *driver, virDomainObjPtr vm,
                         int fd, off_t offset, const char *path,
                         const char *compressor,
-                        bool is_reg, bool bypassSecurityDriver)
+                        bool is_reg, bool bypassSecurityDriver,
+                        enum qemuDomainAsyncJob asyncJob)
     ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2) ATTRIBUTE_NONNULL(5)
     ATTRIBUTE_RETURN_CHECK;

diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index d160642..7613749 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -402,7 +402,8 @@ qemuProcessFakeReboot(void *opaque)
     }

     if (qemuProcessStartCPUs(driver, vm, NULL,
-                             VIR_DOMAIN_RUNNING_BOOTED) < 0) {
+                             VIR_DOMAIN_RUNNING_BOOTED,
+                             QEMU_ASYNC_JOB_NONE) < 0) {
         if (virGetLastError() == NULL)
             qemuReportError(VIR_ERR_INTERNAL_ERROR,
                             "%s", _("resume operation failed"));
@@ -2148,7 +2149,8 @@ qemuProcessPrepareMonitorChr(struct qemud_driver *driver,
  */
 int
 qemuProcessStartCPUs(struct qemud_driver *driver, virDomainObjPtr vm,
-                     virConnectPtr conn, virDomainRunningReason reason)
+                     virConnectPtr conn, virDomainRunningReason reason,
+                     enum qemuDomainAsyncJob asyncJob)
 {
     int ret;
     qemuDomainObjPrivatePtr priv = vm->privateData;
@@ -2163,7 +2165,7 @@ qemuProcessStartCPUs(struct qemud_driver *driver, virDomainObjPtr vm,
     }
     VIR_FREE(priv->lockState);

-    ignore_value(qemuDomainObjEnterMonitorWithDriver(driver, vm));
+    ignore_value(qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob));
     ret = qemuMonitorStartCPUs(priv->mon, conn);
     qemuDomainObjExitMonitorWithDriver(driver, vm);

@@ -2180,7 +2182,8 @@ qemuProcessStartCPUs(struct qemud_driver *driver, virDomainObjPtr vm,


 int qemuProcessStopCPUs(struct qemud_driver *driver, virDomainObjPtr vm,
-                        virDomainPausedReason reason)
+                        virDomainPausedReason reason,
+                        enum qemuDomainAsyncJob asyncJob)
 {
     int ret;
     int oldState;
@@ -2191,7 +2194,7 @@ int qemuProcessStopCPUs(struct qemud_driver *driver, virDomainObjPtr vm,
     oldState = virDomainObjGetState(vm, &oldReason);
     virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, reason);

-    ret = qemuDomainObjEnterMonitorWithDriver(driver, vm);
+    ret = qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob);
     if (ret == 0) {
         ret = qemuMonitorStopCPUs(priv->mon);
         qemuDomainObjExitMonitorWithDriver(driver, vm);
@@ -2315,7 +2318,8 @@ qemuProcessRecoverMigration(struct qemud_driver *driver,
             VIR_DEBUG("Incoming migration finished, resuming domain %s",
                       vm->def->name);
             if (qemuProcessStartCPUs(driver, vm, conn,
-                                     VIR_DOMAIN_RUNNING_UNPAUSED) < 0) {
+                                     VIR_DOMAIN_RUNNING_UNPAUSED,
+                                     QEMU_ASYNC_JOB_NONE) < 0) {
                 VIR_WARN("Could not resume domain %s", vm->def->name);
             }
             break;
@@ -2355,7 +2359,8 @@ qemuProcessRecoverMigration(struct qemud_driver *driver,
                 (reason == VIR_DOMAIN_PAUSED_MIGRATION ||
                  reason == VIR_DOMAIN_PAUSED_UNKNOWN)) {
                 if (qemuProcessStartCPUs(driver, vm, conn,
-                                         VIR_DOMAIN_RUNNING_UNPAUSED) < 0) {
+                                         VIR_DOMAIN_RUNNING_UNPAUSED,
+                                         QEMU_ASYNC_JOB_NONE) < 0) {
                     VIR_WARN("Could not resume domain %s", vm->def->name);
                 }
             }
@@ -2375,7 +2380,8 @@ qemuProcessRecoverMigration(struct qemud_driver *driver,
                 (reason == VIR_DOMAIN_PAUSED_MIGRATION ||
                  reason == VIR_DOMAIN_PAUSED_UNKNOWN)) {
                 if (qemuProcessStartCPUs(driver, vm, conn,
-                                         VIR_DOMAIN_RUNNING_UNPAUSED) < 0) {
+                                         VIR_DOMAIN_RUNNING_UNPAUSED,
+                                         QEMU_ASYNC_JOB_NONE) < 0) {
                     VIR_WARN("Could not resume domain %s", vm->def->name);
                 }
             }
@@ -2424,7 +2430,8 @@ qemuProcessRecoverJob(struct qemud_driver *driver,
               reason == VIR_DOMAIN_PAUSED_SAVE) ||
              reason == VIR_DOMAIN_PAUSED_UNKNOWN)) {
             if (qemuProcessStartCPUs(driver, vm, conn,
-                                     VIR_DOMAIN_RUNNING_UNPAUSED) < 0) {
+                                     VIR_DOMAIN_RUNNING_UNPAUSED,
+                                     job->asyncJob) < 0) {
                 VIR_WARN("Could not resume domain %s after", vm->def->name);
             }
         }
@@ -2974,6 +2981,10 @@ int qemuProcessStart(virConnectPtr conn,
             goto cleanup;
     }

+    /* Technically, qemuProcessStart can be called from inside
+     * QEMU_ASYNC_JOB_MIGRATION_IN, but we are okay treating this like
+     * a sync job since no other job can call into the domain until
+     * migration completes.  */
     VIR_DEBUG("Setting initial memory amount");
     cur_balloon = vm->def->mem.cur_balloon;
     ignore_value(qemuDomainObjEnterMonitorWithDriver(driver, vm));
@@ -2987,7 +2998,8 @@ int qemuProcessStart(virConnectPtr conn,
         VIR_DEBUG("Starting domain CPUs");
         /* Allow the CPUS to start executing */
         if (qemuProcessStartCPUs(driver, vm, conn,
-                                 VIR_DOMAIN_RUNNING_BOOTED) < 0) {
+                                 VIR_DOMAIN_RUNNING_BOOTED,
+                                 QEMU_ASYNC_JOB_NONE) < 0) {
             if (virGetLastError() == NULL)
                 qemuReportError(VIR_ERR_INTERNAL_ERROR,
                                 "%s", _("resume operation failed"));
diff --git a/src/qemu/qemu_process.h b/src/qemu/qemu_process.h
index 449d7f1..e9b910d 100644
--- a/src/qemu/qemu_process.h
+++ b/src/qemu/qemu_process.h
@@ -23,6 +23,7 @@
 # define __QEMU_PROCESS_H__

 # include "qemu_conf.h"
+# include "qemu_domain.h"

 int qemuProcessPrepareMonitorChr(struct qemud_driver *driver,
                                  virDomainChrSourceDefPtr monConfig,
@@ -31,10 +32,12 @@ int qemuProcessPrepareMonitorChr(struct qemud_driver *driver,
 int qemuProcessStartCPUs(struct qemud_driver *driver,
                          virDomainObjPtr vm,
                          virConnectPtr conn,
-                         virDomainRunningReason reason);
+                         virDomainRunningReason reason,
+                         enum qemuDomainAsyncJob asyncJob);
 int qemuProcessStopCPUs(struct qemud_driver *driver,
                         virDomainObjPtr vm,
-                        virDomainPausedReason reason);
+                        virDomainPausedReason reason,
+                        enum qemuDomainAsyncJob asyncJob);

 void qemuProcessAutostartAll(struct qemud_driver *driver);
 void qemuProcessReconnectAll(virConnectPtr conn, struct qemud_driver *driver);
-- 
1.7.4.4




More information about the libvir-list mailing list