[libvirt] [PATCH 03/14] Fix crash when deleting monitor while a command is in progress

Daniel P. Berrange berrange at redhat.com
Mon Dec 7 14:18:44 UTC 2009


On Thu, Dec 03, 2009 at 11:12:40AM +0000, Daniel P. Berrange wrote:
> On Thu, Dec 03, 2009 at 12:47:02AM +0100, Matthias Bolte wrote:
> > 2009/11/26 Daniel P. Berrange <berrange at redhat.com>:
> > > If QEMU shuts down while we're in the middle of processing a
> > > monitor command, the monitor will be freed, and upon cleaning
> > > up we attempt to do  qemuMonitorUnlock(priv->mon) when priv->mon
> > > is NULL.
> > >
> > > To address this we introduce proper reference counting into
> > > the qemuMonitorPtr object, and hold an extra reference whenever
> > > executing a command.
> > >
> > > * src/qemu/qemu_driver.c: Hold a reference on the monitor while
> > >  executing commands, and only NULL-ify the priv->mon field when
> > >  the last reference is released
> > > * src/qemu/qemu_monitor.h, src/qemu/qemu_monitor.c: Add reference
> > >  counting to handle safe deletion of monitor objects
> > 
> > The locking pattern below results in destroying a locked mutex. It
> > this intended?
> 
> No, that's a bug, the qemuMonitorUnref() call itself should have
> called unlock if the ref count hit 0, before destroying it.
> 
> > qemuMonitorLock(mon);
> > [...]
> > if (qemuMonitorUnref(mon) > 0)
> >     qemuMonitorUnlock(mon);
> > 
> > Well, this patch makes the TCK deadlock for me, seems to be a lock
> > ordering issue combined with a race condition; it doesn't happen every
> > run. I don't understand all details of the locking and refcounting
> > scheme of the QEMU monitor yet, it's quite complex and gets even more
> > complex.
> 
> Yes, that problem shown by valgrind will indeed deadlock. I'll fix
> this. The domain object lock must never be acquired if the thread
> holds the monitor lock already. We must have strict ordering from
> top to bottom (driver -> domain object -> qemu monitor)


This is the patch I'm proposing instead. It removes the ref count adjust
of virDomainObjPtr from qemuMonitor, since that requires that you have
the bad lock ordering. Instead that extra ref count is added/removed by
the qemu driver code. This means the qemuMonitor never has to touch any
locks on virDomainObjPtr which complies with our strict top->bottom
ordering requirement.


commit c2b32f964b0eed861dea11e5037993e3a3c3fb3d
Author: Daniel P. Berrange <berrange at redhat.com>
Date:   Thu Nov 26 13:29:29 2009 +0000

    Fix crash when deleting monitor while a command is in progress
    
    If QEMU shuts down while we're in the middle of processing a
    monitor command, the monitor will be freed, and upon cleaning
    up we attempt to do  qemuMonitorUnlock(priv->mon) when priv->mon
    is NULL.
    
    To address this we introduce proper reference counting into
    the qemuMonitorPtr object, and hold an extra reference whenever
    executing a command.
    
    * src/qemu/qemu_driver.c: Hold a reference on the monitor while
      executing commands, and only NULL-ify the priv->mon field when
      the last reference is released
    * src/qemu/qemu_monitor.h, src/qemu/qemu_monitor.c: Add reference
      counting to handle safe deletion of monitor objects

diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index 43d20ea..fd47dc0 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -272,6 +272,7 @@ static void qemuDomainObjEnterMonitor(virDomainObjPtr obj)
     qemuDomainObjPrivatePtr priv = obj->privateData;
 
     qemuMonitorLock(priv->mon);
+    qemuMonitorRef(priv->mon);
     virDomainObjUnlock(obj);
 }
 
@@ -283,9 +284,16 @@ static void qemuDomainObjEnterMonitor(virDomainObjPtr obj)
 static void qemuDomainObjExitMonitor(virDomainObjPtr obj)
 {
     qemuDomainObjPrivatePtr priv = obj->privateData;
+    int refs;
 
+    refs = qemuMonitorUnref(priv->mon);
     qemuMonitorUnlock(priv->mon);
     virDomainObjLock(obj);
+
+    if (refs == 0) {
+        virDomainObjUnref(obj);
+        priv->mon = NULL;
+    }
 }
 
 
@@ -302,6 +310,7 @@ static void qemuDomainObjEnterMonitorWithDriver(struct qemud_driver *driver, vir
     qemuDomainObjPrivatePtr priv = obj->privateData;
 
     qemuMonitorLock(priv->mon);
+    qemuMonitorRef(priv->mon);
     virDomainObjUnlock(obj);
     qemuDriverUnlock(driver);
 }
@@ -315,10 +324,17 @@ static void qemuDomainObjEnterMonitorWithDriver(struct qemud_driver *driver, vir
 static void qemuDomainObjExitMonitorWithDriver(struct qemud_driver *driver, virDomainObjPtr obj)
 {
     qemuDomainObjPrivatePtr priv = obj->privateData;
+    int refs;
 
+    refs = qemuMonitorUnref(priv->mon);
     qemuMonitorUnlock(priv->mon);
     qemuDriverLock(driver);
     virDomainObjLock(obj);
+
+    if (refs == 0) {
+        virDomainObjUnref(obj);
+        priv->mon = NULL;
+    }
 }
 
 
@@ -653,6 +669,10 @@ qemuConnectMonitor(virDomainObjPtr vm)
 {
     qemuDomainObjPrivatePtr priv = vm->privateData;
 
+    /* Hold an extra reference because we can't allow 'vm' to be
+     * deleted while the monitor is active */
+    virDomainObjRef(vm);
+
     if ((priv->mon = qemuMonitorOpen(vm, qemuHandleMonitorEOF)) == NULL) {
         VIR_ERROR(_("Failed to connect monitor for %s\n"), vm->def->name);
         return -1;
@@ -2400,8 +2420,9 @@ static void qemudShutdownVMDaemon(virConnectPtr conn,
                              _("Failed to send SIGTERM to %s (%d)"),
                              vm->def->name, vm->pid);
 
-    if (priv->mon) {
-        qemuMonitorClose(priv->mon);
+    if (priv->mon &&
+        qemuMonitorClose(priv->mon) == 0) {
+        virDomainObjUnref(vm);
         priv->mon = NULL;
     }
 
diff --git a/src/qemu/qemu_monitor.c b/src/qemu/qemu_monitor.c
index f0ef81b..a2e10bc 100644
--- a/src/qemu/qemu_monitor.c
+++ b/src/qemu/qemu_monitor.c
@@ -42,7 +42,7 @@ struct _qemuMonitor {
     virMutex lock;
     virCond notify;
 
-    virDomainObjPtr dom;
+    int refs;
 
     int fd;
     int watch;
@@ -67,12 +67,14 @@ struct _qemuMonitor {
      * the next monitor msg */
     int lastErrno;
 
-    /* If the monitor callback is currently active */
+    /* If the monitor EOF callback is currently active (stops more commands being run) */
     unsigned eofcb: 1;
-    /* If the monitor callback should free the closed monitor */
+    /* If the monitor is in process of shutting down */
     unsigned closed: 1;
+
 };
 
+
 void qemuMonitorLock(qemuMonitorPtr mon)
 {
     virMutexLock(&mon->lock);
@@ -84,21 +86,33 @@ void qemuMonitorUnlock(qemuMonitorPtr mon)
 }
 
 
-static void qemuMonitorFree(qemuMonitorPtr mon, int lockDomain)
+static void qemuMonitorFree(qemuMonitorPtr mon)
 {
-    VIR_DEBUG("mon=%p, lockDomain=%d", mon, lockDomain);
-    if (mon->vm) {
-        if (lockDomain)
-            virDomainObjLock(mon->vm);
-        if (!virDomainObjUnref(mon->vm) && lockDomain)
-            virDomainObjUnlock(mon->vm);
-    }
+    VIR_DEBUG("mon=%p", mon);
     if (virCondDestroy(&mon->notify) < 0)
     {}
     virMutexDestroy(&mon->lock);
     VIR_FREE(mon);
 }
 
+int qemuMonitorRef(qemuMonitorPtr mon)
+{
+    mon->refs++;
+    return mon->refs;
+}
+
+int qemuMonitorUnref(qemuMonitorPtr mon)
+{
+    mon->refs--;
+
+    if (mon->refs == 0) {
+        qemuMonitorUnlock(mon);
+        qemuMonitorFree(mon);
+    }
+
+    return mon->refs;
+}
+
 
 static int
 qemuMonitorOpenUnix(const char *monitor)
@@ -348,6 +362,7 @@ qemuMonitorIO(int watch, int fd, int events, void *opaque) {
     int quit = 0, failed = 0;
 
     qemuMonitorLock(mon);
+    qemuMonitorRef(mon);
     VIR_DEBUG("Monitor %p I/O on watch %d fd %d events %d", mon, watch, fd, events);
 
     if (mon->fd != fd || mon->watch != watch) {
@@ -407,23 +422,17 @@ qemuMonitorIO(int watch, int fd, int events, void *opaque) {
      * but is this safe ?  I think it is, because the callback
      * will try to acquire the virDomainObjPtr mutex next */
     if (failed || quit) {
+        qemuMonitorEOFNotify eofCB = mon->eofCB;
+        virDomainObjPtr vm = mon->vm;
         /* Make sure anyone waiting wakes up now */
         virCondSignal(&mon->notify);
-        mon->eofcb = 1;
-        qemuMonitorUnlock(mon);
-        VIR_DEBUG("Triggering EOF callback error? %d", failed);
-        mon->eofCB(mon, mon->vm, failed);
-
-        qemuMonitorLock(mon);
-        if (mon->closed) {
+        if (qemuMonitorUnref(mon) > 0)
             qemuMonitorUnlock(mon);
-            VIR_DEBUG("Delayed free of monitor %p", mon);
-            qemuMonitorFree(mon, 1);
-        } else {
-            qemuMonitorUnlock(mon);
-        }
+        VIR_DEBUG("Triggering EOF callback error? %d", failed);
+        (eofCB)(mon, vm, failed);
     } else {
-        qemuMonitorUnlock(mon);
+        if (qemuMonitorUnref(mon) > 0)
+            qemuMonitorUnlock(mon);
     }
 }
 
@@ -453,10 +462,10 @@ qemuMonitorOpen(virDomainObjPtr vm,
         return NULL;
     }
     mon->fd = -1;
+    mon->refs = 1;
     mon->vm = vm;
     mon->eofCB = eofCB;
     qemuMonitorLock(mon);
-    virDomainObjRef(vm);
 
     switch (vm->monitor_chr->type) {
     case VIR_DOMAIN_CHR_TYPE_UNIX:
@@ -512,10 +521,14 @@ cleanup:
 }
 
 
-void qemuMonitorClose(qemuMonitorPtr mon)
+int qemuMonitorClose(qemuMonitorPtr mon)
 {
+    int refs;
+
     if (!mon)
-        return;
+        return 0;
+
+    VIR_DEBUG("mon=%p", mon);
 
     qemuMonitorLock(mon);
     if (!mon->closed) {
@@ -523,18 +536,17 @@ void qemuMonitorClose(qemuMonitorPtr mon)
             virEventRemoveHandle(mon->watch);
         if (mon->fd != -1)
             close(mon->fd);
-        /* NB: don't reset  fd / watch fields, since active
-         * callback may still want them */
+        /* NB: ordinarily one might immediately set mon->watch to -1
+         * and mon->fd to -1, but there may be a callback active
+         * that is still relying on these fields being valid. So
+         * we merely close them, but not clear their values and
+         * use this explicit 'closed' flag to track this state */
         mon->closed = 1;
     }
 
-    if (mon->eofcb) {
-        VIR_DEBUG("Mark monitor to be deleted %p", mon);
+    if ((refs = qemuMonitorUnref(mon)) > 0)
         qemuMonitorUnlock(mon);
-    } else {
-        VIR_DEBUG("Delete monitor now %p", mon);
-        qemuMonitorFree(mon, 0);
-    }
+    return refs;
 }
 
 
@@ -552,7 +564,6 @@ int qemuMonitorSend(qemuMonitorPtr mon,
 
     if (mon->eofcb) {
         msg->lastErrno = EIO;
-        qemuMonitorUnlock(mon);
         return -1;
     }
 
diff --git a/src/qemu/qemu_monitor.h b/src/qemu/qemu_monitor.h
index 71688cb..27b43b7 100644
--- a/src/qemu/qemu_monitor.h
+++ b/src/qemu/qemu_monitor.h
@@ -78,11 +78,14 @@ typedef int (*qemuMonitorDiskSecretLookup)(qemuMonitorPtr mon,
 qemuMonitorPtr qemuMonitorOpen(virDomainObjPtr vm,
                                qemuMonitorEOFNotify eofCB);
 
-void qemuMonitorClose(qemuMonitorPtr mon);
+int qemuMonitorClose(qemuMonitorPtr mon);
 
 void qemuMonitorLock(qemuMonitorPtr mon);
 void qemuMonitorUnlock(qemuMonitorPtr mon);
 
+int qemuMonitorRef(qemuMonitorPtr mon);
+int qemuMonitorUnref(qemuMonitorPtr mon);
+
 void qemuMonitorRegisterDiskSecretLookup(qemuMonitorPtr mon,
                                          qemuMonitorDiskSecretLookup secretCB);
 

-- 
|: Red Hat, Engineering, London   -o-   http://people.redhat.com/berrange/ :|
|: http://libvirt.org  -o-  http://virt-manager.org  -o-  http://ovirt.org :|
|: http://autobuild.org       -o-         http://search.cpan.org/~danberr/ :|
|: GnuPG: 7D3B9505  -o-  F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|




More information about the libvir-list mailing list