[libvirt] [PATCH RFC v2] qemu: fix deadlock when waiting in non async jobs

Nikolay Shirokovskiy nshirokovskiy at virtuozzo.com
Mon Oct 8 08:10:05 UTC 2018


Block job abort operation can not handle properly qemu crashes when waiting for
abort/pivot completion. Deadlock scenario is next:

- qemuDomainBlockJobAbort waits for pivot/abort completion
- qemu crashes, then qemuProcessBeginStopJob broadcasts for VM condition and
  then waits for job condition (taken by qemuDomainBlockJobAbort)
- qemuDomainBlockJobAbort awakes but nothing really changed, VM is still
  active (vm->def->id != -1) so thread starts waiting for completion again.
  Now two threads are in deadlock.

First let's remove broadcast in qemuProcessBeginStopJob. It is simply wrong
because it is not set any condition before broadcast so that awaked threads can
not detect any changes. Crashing domain during async job will continue to be
handled properly because destroy job can run concurrently with async job and
destroy job calls qemuProcessStop which sets vm->def->id to -1 and broadcasts.

Second let's introduce flag that EOF is received and broadcast after that.
Now non async jobs can check this flag in wait loop.

Signed-off-by: Nikolay Shirokovskiy <nshirokovskiy at virtuozzo.com>

---

Diff from v1:

- patches 1 and 2 are already merged
- don't bother with reporting monitor EOF reason to user as most of
  time it is simply "unexpected eof" (this implies dropping patch 3)
- drop patch 5 as we now always report "domain is being stopped"
  in qemuDomainObjWait
- don't signal on monitor error for simplicity (otherwise we need to report
  something more elaborate that "domain is being stopped" as we don't
  kill domain on monitor errors. On the other hand I guess monitor
  error is rare case to handle it right now)
- keep virDomainObjWait for async jobs

It's a bit uneven that for async jobs domain is destroyed concurrently and for
non async jobs it will be actually destroyed after job get completed.  Also if
non async job needs issuing commands to qemu on cleanup then we will send these
commands in vain polluting logs etc because qemu process in not running at this
moment but typical check (virDomainObjIsActive) will think it is still running.

Domain is destroyed (qemuProcessStop) in a job due to patches [1] and [2].
However AFAIU it is not neccessary. If qemuProcessStop does not drop VM lock
then we don't need extra job to make qemuProcessStop and main job not
interleave. And we can drop the lock now only in qemuDomainObjBeginNestedJob in
qemuProcessStop which is introduced in [2]. AFAIU we can fix issues mentioned in
[2] the other way for example like it is done for qemu agent - we save agent
monitor reference on stack for entering/exiting agent monitor.

So I wonder can we instead of this fix remove job for qemuProcessStop and run
destroying domain cuncurrently for non async jobs too.

[1]
commit 8c9ff9960b29d4703a99efdd1cadcf6f48799cc0
Author: Jiri Denemark <jdenemar at redhat.com>
Date:   Thu Feb 11 15:32:48 2016 +0100

    qemu: Process monitor EOF in a job

[2]
commit 81f50cb92d16643bcd749e3ab5b404b8b7cec643
Author: Jiri Denemark <jdenemar at redhat.com>
Date:   Thu Feb 11 11:20:28 2016 +0100

    qemu: Avoid calling qemuProcessStop without a job

 src/qemu/qemu_domain.c  | 39 +++++++++++++++++++++++++++++++++++++++
 src/qemu/qemu_domain.h  |  4 ++++
 src/qemu/qemu_driver.c  |  2 +-
 src/qemu/qemu_hotplug.c |  4 ++--
 src/qemu/qemu_process.c |  9 +++++----
 5 files changed, 51 insertions(+), 7 deletions(-)

diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
index 939b2a3..aead72b 100644
--- a/src/qemu/qemu_domain.c
+++ b/src/qemu/qemu_domain.c
@@ -13534,3 +13534,42 @@ qemuDomainRunningReasonToResumeEvent(virDomainRunningReason reason)
 
     return VIR_DOMAIN_EVENT_RESUMED_UNPAUSED;
 }
+
+
+/**
+ * Waits for domain condition to be triggered for a specific period of time.
+ * if @until is 0 then waits indefinetely.
+ *
+ * Returns:
+ *  -1 on error
+ *   0 on success
+ *   1 on timeout
+ */
+int
+qemuDomainObjWait(virDomainObjPtr vm, unsigned long long until)
+{
+    qemuDomainObjPrivatePtr priv = vm->privateData;
+    int rc;
+
+    if (until)
+        rc = virCondWaitUntil(&vm->cond, &vm->parent.lock, until);
+    else
+        rc = virCondWait(&vm->cond, &vm->parent.lock);
+
+    if (rc < 0) {
+        if (until && errno == ETIMEDOUT)
+            return 1;
+
+        virReportSystemError(errno, "%s",
+                             _("failed to wait for domain condition"));
+        return -1;
+    }
+
+    if (priv->monEOF) {
+        virReportError(VIR_ERR_OPERATION_FAILED, "%s",
+                             _("domain is being stopped"));
+        return -1;
+    }
+
+    return 0;
+}
diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h
index 2f8a1bf..36ab294 100644
--- a/src/qemu/qemu_domain.h
+++ b/src/qemu/qemu_domain.h
@@ -281,6 +281,7 @@ struct _qemuDomainObjPrivate {
     virDomainChrSourceDefPtr monConfig;
     bool monJSON;
     bool monError;
+    bool monEOF;
     unsigned long long monStart;
 
     qemuAgentPtr agent;
@@ -1085,4 +1086,7 @@ void qemuDomainStorageIdReset(qemuDomainObjPrivatePtr priv);
 virDomainEventResumedDetailType
 qemuDomainRunningReasonToResumeEvent(virDomainRunningReason reason);
 
+int
+qemuDomainObjWait(virDomainObjPtr vm, unsigned long long until);
+
 #endif /* __QEMU_DOMAIN_H__ */
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index b238309..f4250da 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -17142,7 +17142,7 @@ qemuDomainBlockJobAbort(virDomainPtr dom,
         qemuDomainDiskPrivatePtr diskPriv = QEMU_DOMAIN_DISK_PRIVATE(disk);
         qemuBlockJobUpdate(vm, QEMU_ASYNC_JOB_NONE, disk, NULL);
         while (diskPriv->blockjob) {
-            if (virDomainObjWait(vm) < 0) {
+            if (qemuDomainObjWait(vm, 0) < 0) {
                 ret = -1;
                 goto endjob;
             }
diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c
index 4558a3c..8189629 100644
--- a/src/qemu/qemu_hotplug.c
+++ b/src/qemu/qemu_hotplug.c
@@ -165,7 +165,7 @@ qemuHotplugWaitForTrayEject(virDomainObjPtr vm,
         return -1;
 
     while (disk->tray_status != VIR_DOMAIN_DISK_TRAY_OPEN) {
-        if ((rc = virDomainObjWaitUntil(vm, now + CHANGE_MEDIA_TIMEOUT)) < 0)
+        if ((rc = qemuDomainObjWait(vm, now + CHANGE_MEDIA_TIMEOUT)) < 0)
             return -1;
 
         if (rc > 0) {
@@ -5002,7 +5002,7 @@ qemuDomainWaitForDeviceRemoval(virDomainObjPtr vm)
     until += qemuDomainRemoveDeviceWaitTime;
 
     while (priv->unplug.alias) {
-        if ((rc = virDomainObjWaitUntil(vm, until)) == 1)
+        if ((rc = qemuDomainObjWait(vm, until)) == 1)
             return 0;
 
         if (rc < 0) {
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index 29b0ba1..dd03269 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -290,9 +290,12 @@ qemuProcessHandleMonitorEOF(qemuMonitorPtr mon,
 
     virObjectLock(vm);
 
+    priv = vm->privateData;
+    priv->monEOF = true;
+    virDomainObjBroadcast(vm);
+
     VIR_DEBUG("Received EOF on %p '%s'", vm, vm->def->name);
 
-    priv = vm->privateData;
     if (priv->beingDestroyed) {
         VIR_DEBUG("Domain is being destroyed, EOF is expected");
         goto cleanup;
@@ -5996,6 +5999,7 @@ qemuProcessPrepareDomain(virQEMUDriverPtr driver,
 
     priv->monJSON = true;
     priv->monError = false;
+    priv->monEOF = false;
     priv->monStart = 0;
     priv->gotShutdown = false;
     priv->runningReason = VIR_DOMAIN_RUNNING_UNKNOWN;
@@ -6965,9 +6969,6 @@ qemuProcessBeginStopJob(virQEMUDriverPtr driver,
     if (qemuProcessKill(vm, killFlags) < 0)
         goto cleanup;
 
-    /* Wake up anything waiting on domain condition */
-    virDomainObjBroadcast(vm);
-
     if (qemuDomainObjBeginJob(driver, vm, job) < 0)
         goto cleanup;
 
-- 
1.8.3.1




More information about the libvir-list mailing list