[libvirt] [PATCH 8/9] qemu: Avoid calling qemuProcessStop without a job

Jiri Denemark jdenemar at redhat.com
Tue Feb 16 14:37:00 UTC 2016


Calling qemuProcessStop without a job opens a way to race conditions
with qemuDomainObjExitMonitor called in another thread. A real world
example of such a race condition:

  - migration thread (A) calls qemuMigrationWaitForSpice
  - another thread (B) starts processing qemuDomainAbortJob API
  - thread B signals thread A via qemuDomainObjAbortAsyncJob
  - thread B enters monitor (qemuDomainObjEnterMonitor)
  - thread B calls qemuMonitorSend
  - thread A awakens and calls qemuProcessStop
  - thread A calls qemuMonitorClose and sets priv->mon to NULL
  - thread B calls qemuDomainObjExitMonitor with priv->mon == NULL
  => monitor stays ref'ed and locked

Depending on how lucky we are, the race may result in a memory leak or
it can even deadlock libvirtd's event loop if it tries to lock the
monitor to process an event received before qemuMonitorClose was called.

Signed-off-by: Jiri Denemark <jdenemar at redhat.com>
---
 src/qemu/qemu_driver.c    | 28 +++++++++++-------
 src/qemu/qemu_migration.c |  6 +++-
 src/qemu/qemu_process.c   | 72 +++++++++++++++++++++++++++++++----------------
 src/qemu/qemu_process.h   |  1 +
 4 files changed, 71 insertions(+), 36 deletions(-)

diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index dc448fb..3bccb55 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -2287,7 +2287,8 @@ qemuDomainDestroyFlags(virDomainPtr dom,
     if (priv->job.asyncJob == QEMU_ASYNC_JOB_MIGRATION_IN)
         stopFlags |= VIR_QEMU_PROCESS_STOP_MIGRATED;
 
-    qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_DESTROYED, stopFlags);
+    qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_DESTROYED,
+                    QEMU_ASYNC_JOB_NONE, stopFlags);
     event = virDomainEventLifecycleNewFromObj(vm,
                                      VIR_DOMAIN_EVENT_STOPPED,
                                      VIR_DOMAIN_EVENT_STOPPED_DESTROYED);
@@ -3297,7 +3298,8 @@ qemuDomainSaveInternal(virQEMUDriverPtr driver, virDomainPtr dom,
         goto endjob;
 
     /* Shut it down */
-    qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_SAVED, 0);
+    qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_SAVED,
+                    QEMU_ASYNC_JOB_SAVE, 0);
     virDomainAuditStop(vm, "saved");
     event = virDomainEventLifecycleNewFromObj(vm, VIR_DOMAIN_EVENT_STOPPED,
                                               VIR_DOMAIN_EVENT_STOPPED_SAVED);
@@ -3790,7 +3792,8 @@ qemuDomainCoreDumpWithFormat(virDomainPtr dom,
 
  endjob:
     if ((ret == 0) && (flags & VIR_DUMP_CRASH)) {
-        qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_CRASHED, 0);
+        qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_CRASHED,
+                        QEMU_ASYNC_JOB_DUMP, 0);
         virDomainAuditStop(vm, "crashed");
         event = virDomainEventLifecycleNewFromObj(vm,
                                          VIR_DOMAIN_EVENT_STOPPED,
@@ -4070,7 +4073,8 @@ processGuestPanicEvent(virQEMUDriverPtr driver,
         /* fall through */
 
     case VIR_DOMAIN_LIFECYCLE_CRASH_DESTROY:
-        qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_CRASHED, 0);
+        qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_CRASHED,
+                        QEMU_ASYNC_JOB_DUMP, 0);
         event = virDomainEventLifecycleNewFromObj(vm,
                                          VIR_DOMAIN_EVENT_STOPPED,
                                          VIR_DOMAIN_EVENT_STOPPED_CRASHED);
@@ -4599,7 +4603,7 @@ processMonitorEOFEvent(virQEMUDriverPtr driver,
 
     event = virDomainEventLifecycleNewFromObj(vm, VIR_DOMAIN_EVENT_STOPPED,
                                               eventReason);
-    qemuProcessStop(driver, vm, stopReason, stopFlags);
+    qemuProcessStop(driver, vm, stopReason, QEMU_ASYNC_JOB_NONE, stopFlags);
     virDomainAuditStop(vm, auditReason);
     qemuDomainEventQueue(driver, event);
 
@@ -6609,7 +6613,7 @@ qemuDomainSaveImageStartVM(virConnectPtr conn,
         }
 
         if (virCommandWait(cmd, NULL) < 0) {
-            qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_FAILED, 0);
+            qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_FAILED, asyncJob, 0);
             restored = false;
         }
         VIR_DEBUG("Decompression binary stderr: %s", NULLSTR(errbuf));
@@ -13626,7 +13630,8 @@ qemuDomainSnapshotCreateActiveInternal(virConnectPtr conn,
     if (flags & VIR_DOMAIN_SNAPSHOT_CREATE_HALT) {
         event = virDomainEventLifecycleNewFromObj(vm, VIR_DOMAIN_EVENT_STOPPED,
                                          VIR_DOMAIN_EVENT_STOPPED_FROM_SNAPSHOT);
-        qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_FROM_SNAPSHOT, 0);
+        qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_FROM_SNAPSHOT,
+                        QEMU_ASYNC_JOB_SNAPSHOT, 0);
         virDomainAuditStop(vm, "from-snapshot");
         resume = false;
     }
@@ -14468,7 +14473,8 @@ qemuDomainSnapshotCreateActiveExternal(virConnectPtr conn,
     if (flags & VIR_DOMAIN_SNAPSHOT_CREATE_HALT) {
         event = virDomainEventLifecycleNewFromObj(vm, VIR_DOMAIN_EVENT_STOPPED,
                                          VIR_DOMAIN_EVENT_STOPPED_FROM_SNAPSHOT);
-        qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_FROM_SNAPSHOT, 0);
+        qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_FROM_SNAPSHOT,
+                        QEMU_ASYNC_JOB_SNAPSHOT, 0);
         virDomainAuditStop(vm, "from-snapshot");
         resume = false;
         thaw = 0;
@@ -15328,7 +15334,8 @@ qemuDomainRevertToSnapshot(virDomainSnapshotPtr snapshot,
                 }
                 virResetError(err);
                 qemuProcessStop(driver, vm,
-                                VIR_DOMAIN_SHUTOFF_FROM_SNAPSHOT, 0);
+                                VIR_DOMAIN_SHUTOFF_FROM_SNAPSHOT,
+                                QEMU_ASYNC_JOB_START, 0);
                 virDomainAuditStop(vm, "from-snapshot");
                 detail = VIR_DOMAIN_EVENT_STOPPED_FROM_SNAPSHOT;
                 event = virDomainEventLifecycleNewFromObj(vm,
@@ -15448,7 +15455,8 @@ qemuDomainRevertToSnapshot(virDomainSnapshotPtr snapshot,
 
         if (virDomainObjIsActive(vm)) {
             /* Transitions 4, 7 */
-            qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_FROM_SNAPSHOT, 0);
+            qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_FROM_SNAPSHOT,
+                            QEMU_ASYNC_JOB_START, 0);
             virDomainAuditStop(vm, "from-snapshot");
             detail = VIR_DOMAIN_EVENT_STOPPED_FROM_SNAPSHOT;
             event = virDomainEventLifecycleNewFromObj(vm,
diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c
index 72d7144..21804f6 100644
--- a/src/qemu/qemu_migration.c
+++ b/src/qemu/qemu_migration.c
@@ -3594,7 +3594,8 @@ qemuMigrationPrepareAny(virQEMUDriverPtr driver,
         if (!relabel)
             stopFlags |= VIR_QEMU_PROCESS_STOP_NO_RELABEL;
         virDomainAuditStart(vm, "migrated", false);
-        qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_FAILED, stopFlags);
+        qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_FAILED,
+                        QEMU_ASYNC_JOB_MIGRATION_IN, stopFlags);
     }
 
     qemuMigrationJobFinish(driver, vm);
@@ -3903,6 +3904,7 @@ qemuMigrationConfirmPhase(virQEMUDriverPtr driver,
         qemuMigrationWaitForSpice(vm);
 
         qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_MIGRATED,
+                        QEMU_ASYNC_JOB_MIGRATION_OUT,
                         VIR_QEMU_PROCESS_STOP_MIGRATED);
         virDomainAuditStop(vm, "migrated");
 
@@ -5414,6 +5416,7 @@ qemuMigrationPerformJob(virQEMUDriverPtr driver,
      */
     if (!v3proto) {
         qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_MIGRATED,
+                        QEMU_ASYNC_JOB_MIGRATION_OUT,
                         VIR_QEMU_PROCESS_STOP_MIGRATED);
         virDomainAuditStop(vm, "migrated");
         event = virDomainEventLifecycleNewFromObj(vm,
@@ -5896,6 +5899,7 @@ qemuMigrationFinish(virQEMUDriverPtr driver,
         !(flags & VIR_MIGRATE_OFFLINE) &&
         virDomainObjIsActive(vm)) {
         qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_FAILED,
+                        QEMU_ASYNC_JOB_MIGRATION_IN,
                         VIR_QEMU_PROCESS_STOP_MIGRATED);
         virDomainAuditStop(vm, "failed");
         event = virDomainEventLifecycleNewFromObj(vm,
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index 19ecf59..9d6afc4 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -3526,7 +3526,11 @@ qemuProcessReconnect(void *opaque)
              * really is and FAILED means "failed to start" */
             state = VIR_DOMAIN_SHUTOFF_UNKNOWN;
         }
-        qemuProcessStop(driver, obj, state, stopFlags);
+        /* If BeginJob failed, we jumped here without a job, let's hope another
+         * thread didn't have a chance to start playing with the domain yet
+         * (it's all we can do anyway).
+         */
+        qemuProcessStop(driver, obj, state, QEMU_ASYNC_JOB_NONE, stopFlags);
     }
     goto cleanup;
 }
@@ -3564,8 +3568,13 @@ qemuProcessReconnectHelper(virDomainObjPtr obj,
         virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                        _("Could not create thread. QEMU initialization "
                          "might be incomplete"));
-       /* We can't spawn a thread and thus connect to monitor. Kill qemu. */
-        qemuProcessStop(src->driver, obj, VIR_DOMAIN_SHUTOFF_FAILED, 0);
+        /* We can't spawn a thread and thus connect to monitor. Kill qemu.
+         * It's safe to call qemuProcessStop without a job here since there
+         * is no thread that could be doing anything else with the same domain
+         * object.
+         */
+        qemuProcessStop(src->driver, obj, VIR_DOMAIN_SHUTOFF_FAILED,
+                        QEMU_ASYNC_JOB_NONE, 0);
         qemuDomainRemoveInactive(src->driver, obj);
 
         virDomainObjEndAPI(&obj);
@@ -4308,7 +4317,7 @@ qemuProcessStartValidate(virDomainDefPtr def,
 int
 qemuProcessInit(virQEMUDriverPtr driver,
                 virDomainObjPtr vm,
-                qemuDomainAsyncJob asyncJob ATTRIBUTE_UNUSED,
+                qemuDomainAsyncJob asyncJob,
                 bool migration,
                 bool snap)
 {
@@ -4381,7 +4390,7 @@ qemuProcessInit(virQEMUDriverPtr driver,
     stopFlags = VIR_QEMU_PROCESS_STOP_NO_RELABEL;
     if (migration)
         stopFlags |= VIR_QEMU_PROCESS_STOP_MIGRATED;
-    qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_FAILED, stopFlags);
+    qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_FAILED, asyncJob, stopFlags);
     goto cleanup;
 }
 
@@ -5298,7 +5307,7 @@ qemuProcessStart(virConnectPtr conn,
         stopFlags |= VIR_QEMU_PROCESS_STOP_MIGRATED;
     if (priv->mon)
         qemuMonitorSetDomainLog(priv->mon, NULL, NULL, NULL);
-    qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_FAILED, stopFlags);
+    qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_FAILED, asyncJob, stopFlags);
     goto cleanup;
 }
 
@@ -5375,6 +5384,7 @@ qemuProcessBeginStopJob(virQEMUDriverPtr driver,
 void qemuProcessStop(virQEMUDriverPtr driver,
                      virDomainObjPtr vm,
                      virDomainShutoffReason reason,
+                     qemuDomainAsyncJob asyncJob,
                      unsigned int flags)
 {
     int ret;
@@ -5389,27 +5399,34 @@ void qemuProcessStop(virQEMUDriverPtr driver,
     virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
     qemuDomainLogContextPtr logCtxt = NULL;
 
-    VIR_DEBUG("Shutting down vm=%p name=%s id=%d pid=%llu flags=%x",
+    VIR_DEBUG("Shutting down vm=%p name=%s id=%d pid=%llu, "
+              "reason=%s, asyncJob=%s, flags=%x",
               vm, vm->def->name, vm->def->id,
-              (unsigned long long)vm->pid, flags);
-
-    if (!virDomainObjIsActive(vm)) {
-        VIR_DEBUG("VM '%s' not active", vm->def->name);
-        virObjectUnref(cfg);
-        return;
-    }
+              (unsigned long long)vm->pid,
+              virDomainShutoffReasonTypeToString(reason),
+              qemuDomainAsyncJobTypeToString(asyncJob),
+              flags);
 
     /* This method is routinely used in clean up paths. Disable error
      * reporting so we don't squash a legit error. */
     orig_err = virSaveLastError();
 
-    /*
-     * We may unlock the vm in qemuProcessKill(), and another thread
-     * can lock the vm, and then call qemuProcessStop(). So we should
-     * set vm->def->id to -1 here to avoid qemuProcessStop() to be called twice.
-     */
+    if (asyncJob != QEMU_ASYNC_JOB_NONE) {
+        if (qemuDomainObjBeginNestedJob(driver, vm, asyncJob) < 0)
+            goto cleanup;
+    } else if (priv->job.asyncJob != QEMU_ASYNC_JOB_NONE &&
+               priv->job.asyncOwner == virThreadSelfID() &&
+               priv->job.active != QEMU_JOB_ASYNC_NESTED) {
+        VIR_WARN("qemuProcessStop called without a nested job (async=%s)",
+                 qemuDomainAsyncJobTypeToString(asyncJob));
+    }
+
+    if (!virDomainObjIsActive(vm)) {
+        VIR_DEBUG("VM '%s' not active", vm->def->name);
+        goto endjob;
+    }
+
     vm->def->id = -1;
-
     if (virAtomicIntDecAndTest(&driver->nactive) && driver->inhibitCallback)
         driver->inhibitCallback(false, driver->inhibitOpaque);
 
@@ -5664,6 +5681,11 @@ void qemuProcessStop(virQEMUDriverPtr driver,
         vm->newDef = NULL;
     }
 
+ endjob:
+    if (asyncJob != QEMU_ASYNC_JOB_NONE)
+        qemuDomainObjEndJob(driver, vm);
+
+ cleanup:
     if (orig_err) {
         virSetError(orig_err);
         virFreeError(orig_err);
@@ -5946,13 +5968,13 @@ qemuProcessAutoDestroy(virDomainObjPtr dom,
         qemuDomainObjDiscardAsyncJob(driver, dom);
     }
 
-    if (qemuDomainObjBeginJob(driver, dom,
-                              QEMU_JOB_DESTROY) < 0)
-        goto cleanup;
-
     VIR_DEBUG("Killing domain");
 
-    qemuProcessStop(driver, dom, VIR_DOMAIN_SHUTOFF_DESTROYED, stopFlags);
+    if (qemuProcessBeginStopJob(driver, dom, QEMU_JOB_DESTROY, true) < 0)
+        goto cleanup;
+
+    qemuProcessStop(driver, dom, VIR_DOMAIN_SHUTOFF_DESTROYED,
+                    QEMU_ASYNC_JOB_NONE, stopFlags);
 
     virDomainAuditStop(dom, "destroyed");
     event = virDomainEventLifecycleNewFromObj(dom,
diff --git a/src/qemu/qemu_process.h b/src/qemu/qemu_process.h
index 69be99e..eb37863 100644
--- a/src/qemu/qemu_process.h
+++ b/src/qemu/qemu_process.h
@@ -121,6 +121,7 @@ int qemuProcessBeginStopJob(virQEMUDriverPtr driver,
 void qemuProcessStop(virQEMUDriverPtr driver,
                      virDomainObjPtr vm,
                      virDomainShutoffReason reason,
+                     qemuDomainAsyncJob asyncJob,
                      unsigned int flags);
 
 int qemuProcessAttach(virConnectPtr conn,
-- 
2.7.1




More information about the libvir-list mailing list