[libvirt] [PATCH 7/8] qemu: Track the API which started the current job

Jiri Denemark jdenemar at redhat.com
Mon Mar 23 13:26:06 UTC 2015


This is very helpful when we want to log and report why we could not
acquire a state change lock. Reporting what job keeps it locked helps
with understanding the issue. Moreover, after calling
virDomainGetControlInfo, it's possible to tell whether libvirt is just
stuck somewhere within the API (or it just forgot to cleanup the job) or
whether libvirt is waiting for QEMU to reply.

The error message will look like the following:

    # virsh resume cd
    error: Failed to resume domain cd
    error: Timed out during operation: cannot acquire state change lock
    (held by remoteDispatchDomainSuspend)

https://bugzilla.redhat.com/show_bug.cgi?id=853839

Signed-off-by: Jiri Denemark <jdenemar at redhat.com>
---
 src/qemu/qemu_domain.c | 43 +++++++++++++++++++++++++++++++++----------
 src/qemu/qemu_domain.h |  2 ++
 2 files changed, 35 insertions(+), 10 deletions(-)

diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
index 559400c..dc55016 100644
--- a/src/qemu/qemu_domain.c
+++ b/src/qemu/qemu_domain.c
@@ -153,6 +153,7 @@ qemuDomainObjResetJob(qemuDomainObjPrivatePtr priv)
 
     job->active = QEMU_JOB_NONE;
     job->owner = 0;
+    job->ownerAPI = NULL;
 }
 
 static void
@@ -162,6 +163,7 @@ qemuDomainObjResetAsyncJob(qemuDomainObjPrivatePtr priv)
 
     job->asyncJob = QEMU_ASYNC_JOB_NONE;
     job->asyncOwner = 0;
+    job->asyncOwnerAPI = NULL;
     job->phase = 0;
     job->mask = QEMU_JOB_DEFAULT_MASK;
     job->dump_memory_only = false;
@@ -1322,6 +1324,7 @@ qemuDomainObjBeginJobInternal(virQEMUDriverPtr driver,
     unsigned long long then;
     bool nested = job == QEMU_JOB_ASYNC_NESTED;
     virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
+    const char *blocker = NULL;
     int ret = -1;
 
     VIR_DEBUG("Starting %s: %s (async=%s vm=%p name=%s)",
@@ -1370,6 +1373,7 @@ qemuDomainObjBeginJobInternal(virQEMUDriverPtr driver,
                   obj, obj->def->name);
         priv->job.active = job;
         priv->job.owner = virThreadSelfID();
+        priv->job.ownerAPI = virThreadJobGet();
     } else {
         VIR_DEBUG("Started async job: %s (vm=%p name=%s)",
                   qemuDomainAsyncJobTypeToString(asyncJob),
@@ -1379,6 +1383,7 @@ qemuDomainObjBeginJobInternal(virQEMUDriverPtr driver,
             goto cleanup;
         priv->job.asyncJob = asyncJob;
         priv->job.asyncOwner = virThreadSelfID();
+        priv->job.asyncOwnerAPI = virThreadJobGet();
         priv->job.current->started = now;
     }
 
@@ -1389,29 +1394,47 @@ qemuDomainObjBeginJobInternal(virQEMUDriverPtr driver,
     return 0;
 
  error:
-    VIR_WARN("Cannot start job (%s, %s) for domain %s;"
-             " current job is (%s, %s) owned by (%llu, %llu)",
+    VIR_WARN("Cannot start job (%s, %s) for domain %s; "
+             "current job is (%s, %s) owned by (%llu %s, %llu %s)",
              qemuDomainJobTypeToString(job),
              qemuDomainAsyncJobTypeToString(asyncJob),
              obj->def->name,
              qemuDomainJobTypeToString(priv->job.active),
              qemuDomainAsyncJobTypeToString(priv->job.asyncJob),
-             priv->job.owner, priv->job.asyncOwner);
+             priv->job.owner, NULLSTR(priv->job.ownerAPI),
+             priv->job.asyncOwner, NULLSTR(priv->job.asyncOwnerAPI));
+
+    if (nested || qemuDomainNestedJobAllowed(priv, job))
+        blocker = priv->job.ownerAPI;
+    else
+        blocker = priv->job.asyncOwnerAPI;
 
     ret = -1;
     if (errno == ETIMEDOUT) {
-        virReportError(VIR_ERR_OPERATION_TIMEOUT,
-                       "%s", _("cannot acquire state change lock"));
+        if (blocker) {
+            virReportError(VIR_ERR_OPERATION_TIMEOUT,
+                           _("cannot acquire state change lock (held by %s)"),
+                           blocker);
+        } else {
+            virReportError(VIR_ERR_OPERATION_TIMEOUT, "%s",
+                           _("cannot acquire state change lock"));
+        }
         ret = -2;
     } else if (cfg->maxQueuedJobs &&
                priv->jobs_queued > cfg->maxQueuedJobs) {
-        virReportError(VIR_ERR_OPERATION_FAILED,
-                       "%s", _("cannot acquire state change lock "
-                               "due to max_queued limit"));
+        if (blocker) {
+            virReportError(VIR_ERR_OPERATION_FAILED,
+                           _("cannot acquire state change lock (held by %s) "
+                             "due to max_queued limit"),
+                           blocker);
+        } else {
+            virReportError(VIR_ERR_OPERATION_FAILED, "%s",
+                           _("cannot acquire state change lock "
+                             "due to max_queued limit"));
+        }
         ret = -2;
     } else {
-        virReportSystemError(errno,
-                             "%s", _("cannot acquire job mutex"));
+        virReportSystemError(errno, "%s", _("cannot acquire job mutex"));
     }
 
  cleanup:
diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h
index ba8d398..a468982 100644
--- a/src/qemu/qemu_domain.h
+++ b/src/qemu/qemu_domain.h
@@ -117,10 +117,12 @@ struct qemuDomainJobObj {
     virCond cond;                       /* Use to coordinate jobs */
     qemuDomainJob active;               /* Currently running job */
     unsigned long long owner;           /* Thread id which set current job */
+    const char *ownerAPI;               /* The API which owns the job */
 
     virCond asyncCond;                  /* Use to coordinate with async jobs */
     qemuDomainAsyncJob asyncJob;        /* Currently active async job */
     unsigned long long asyncOwner;      /* Thread which set current async job */
+    const char *asyncOwnerAPI;          /* The API which owns the async job */
     int phase;                          /* Job phase (mainly for migrations) */
     unsigned long long mask;            /* Jobs allowed during async job */
     bool dump_memory_only;              /* use dump-guest-memory to do dump */
-- 
2.3.3




More information about the libvir-list mailing list