[libvirt] [PATCH 03/14] Fix crash when deleting monitor while a command is in progress

Nikola Ciprich extmaillist at linuxbox.cz
Mon Dec 7 15:40:48 UTC 2009


Hi Daniel,
I'm giving it some heavy testing and seems to be rock solid! Thanks a
lot for Your effort!
I'm going to add few tens of guests to torture libvirt with multiple simultaneous
accesses even more...
regards
nik


On Mon, Dec 07, 2009 at 02:18:44PM +0000, Daniel P. Berrange wrote:
> On Thu, Dec 03, 2009 at 11:12:40AM +0000, Daniel P. Berrange wrote:
> > On Thu, Dec 03, 2009 at 12:47:02AM +0100, Matthias Bolte wrote:
> > > 2009/11/26 Daniel P. Berrange <berrange at redhat.com>:
> > > > If QEMU shuts down while we're in the middle of processing a
> > > > monitor command, the monitor will be freed, and upon cleaning
> > > > up we attempt to do  qemuMonitorUnlock(priv->mon) when priv->mon
> > > > is NULL.
> > > >
> > > > To address this we introduce proper reference counting into
> > > > the qemuMonitorPtr object, and hold an extra reference whenever
> > > > executing a command.
> > > >
> > > > * src/qemu/qemu_driver.c: Hold a reference on the monitor while
> > > >  executing commands, and only NULL-ify the priv->mon field when
> > > >  the last reference is released
> > > > * src/qemu/qemu_monitor.h, src/qemu/qemu_monitor.c: Add reference
> > > >  counting to handle safe deletion of monitor objects
> > > 
> > > The locking pattern below results in destroying a locked mutex. It
> > > this intended?
> > 
> > No, that's a bug, the qemuMonitorUnref() call itself should have
> > called unlock if the ref count hit 0, before destroying it.
> > 
> > > qemuMonitorLock(mon);
> > > [...]
> > > if (qemuMonitorUnref(mon) > 0)
> > >     qemuMonitorUnlock(mon);
> > > 
> > > Well, this patch makes the TCK deadlock for me, seems to be a lock
> > > ordering issue combined with a race condition; it doesn't happen every
> > > run. I don't understand all details of the locking and refcounting
> > > scheme of the QEMU monitor yet, it's quite complex and gets even more
> > > complex.
> > 
> > Yes, that problem shown by valgrind will indeed deadlock. I'll fix
> > this. The domain object lock must never be acquired if the thread
> > holds the monitor lock already. We must have strict ordering from
> > top to bottom (driver -> domain object -> qemu monitor)
> 
> 
> This is the patch I'm proposing instead. It removes the ref count adjust
> of virDomainObjPtr from qemuMonitor, since that requires that you have
> the bad lock ordering. Instead that extra ref count is added/removed by
> the qemu driver code. This means the qemuMonitor never has to touch any
> locks on virDomainObjPtr which complies with our strict top->bottom
> ordering requirement.
> 
> 
> commit c2b32f964b0eed861dea11e5037993e3a3c3fb3d
> Author: Daniel P. Berrange <berrange at redhat.com>
> Date:   Thu Nov 26 13:29:29 2009 +0000
> 
>     Fix crash when deleting monitor while a command is in progress
>     
>     If QEMU shuts down while we're in the middle of processing a
>     monitor command, the monitor will be freed, and upon cleaning
>     up we attempt to do  qemuMonitorUnlock(priv->mon) when priv->mon
>     is NULL.
>     
>     To address this we introduce proper reference counting into
>     the qemuMonitorPtr object, and hold an extra reference whenever
>     executing a command.
>     
>     * src/qemu/qemu_driver.c: Hold a reference on the monitor while
>       executing commands, and only NULL-ify the priv->mon field when
>       the last reference is released
>     * src/qemu/qemu_monitor.h, src/qemu/qemu_monitor.c: Add reference
>       counting to handle safe deletion of monitor objects
> 
> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
> index 43d20ea..fd47dc0 100644
> --- a/src/qemu/qemu_driver.c
> +++ b/src/qemu/qemu_driver.c
> @@ -272,6 +272,7 @@ static void qemuDomainObjEnterMonitor(virDomainObjPtr obj)
>      qemuDomainObjPrivatePtr priv = obj->privateData;
>  
>      qemuMonitorLock(priv->mon);
> +    qemuMonitorRef(priv->mon);
>      virDomainObjUnlock(obj);
>  }
>  
> @@ -283,9 +284,16 @@ static void qemuDomainObjEnterMonitor(virDomainObjPtr obj)
>  static void qemuDomainObjExitMonitor(virDomainObjPtr obj)
>  {
>      qemuDomainObjPrivatePtr priv = obj->privateData;
> +    int refs;
>  
> +    refs = qemuMonitorUnref(priv->mon);
>      qemuMonitorUnlock(priv->mon);
>      virDomainObjLock(obj);
> +
> +    if (refs == 0) {
> +        virDomainObjUnref(obj);
> +        priv->mon = NULL;
> +    }
>  }
>  
>  
> @@ -302,6 +310,7 @@ static void qemuDomainObjEnterMonitorWithDriver(struct qemud_driver *driver, vir
>      qemuDomainObjPrivatePtr priv = obj->privateData;
>  
>      qemuMonitorLock(priv->mon);
> +    qemuMonitorRef(priv->mon);
>      virDomainObjUnlock(obj);
>      qemuDriverUnlock(driver);
>  }
> @@ -315,10 +324,17 @@ static void qemuDomainObjEnterMonitorWithDriver(struct qemud_driver *driver, vir
>  static void qemuDomainObjExitMonitorWithDriver(struct qemud_driver *driver, virDomainObjPtr obj)
>  {
>      qemuDomainObjPrivatePtr priv = obj->privateData;
> +    int refs;
>  
> +    refs = qemuMonitorUnref(priv->mon);
>      qemuMonitorUnlock(priv->mon);
>      qemuDriverLock(driver);
>      virDomainObjLock(obj);
> +
> +    if (refs == 0) {
> +        virDomainObjUnref(obj);
> +        priv->mon = NULL;
> +    }
>  }
>  
>  
> @@ -653,6 +669,10 @@ qemuConnectMonitor(virDomainObjPtr vm)
>  {
>      qemuDomainObjPrivatePtr priv = vm->privateData;
>  
> +    /* Hold an extra reference because we can't allow 'vm' to be
> +     * deleted while the monitor is active */
> +    virDomainObjRef(vm);
> +
>      if ((priv->mon = qemuMonitorOpen(vm, qemuHandleMonitorEOF)) == NULL) {
>          VIR_ERROR(_("Failed to connect monitor for %s\n"), vm->def->name);
>          return -1;
> @@ -2400,8 +2420,9 @@ static void qemudShutdownVMDaemon(virConnectPtr conn,
>                               _("Failed to send SIGTERM to %s (%d)"),
>                               vm->def->name, vm->pid);
>  
> -    if (priv->mon) {
> -        qemuMonitorClose(priv->mon);
> +    if (priv->mon &&
> +        qemuMonitorClose(priv->mon) == 0) {
> +        virDomainObjUnref(vm);
>          priv->mon = NULL;
>      }
>  
> diff --git a/src/qemu/qemu_monitor.c b/src/qemu/qemu_monitor.c
> index f0ef81b..a2e10bc 100644
> --- a/src/qemu/qemu_monitor.c
> +++ b/src/qemu/qemu_monitor.c
> @@ -42,7 +42,7 @@ struct _qemuMonitor {
>      virMutex lock;
>      virCond notify;
>  
> -    virDomainObjPtr dom;
> +    int refs;
>  
>      int fd;
>      int watch;
> @@ -67,12 +67,14 @@ struct _qemuMonitor {
>       * the next monitor msg */
>      int lastErrno;
>  
> -    /* If the monitor callback is currently active */
> +    /* If the monitor EOF callback is currently active (stops more commands being run) */
>      unsigned eofcb: 1;
> -    /* If the monitor callback should free the closed monitor */
> +    /* If the monitor is in process of shutting down */
>      unsigned closed: 1;
> +
>  };
>  
> +
>  void qemuMonitorLock(qemuMonitorPtr mon)
>  {
>      virMutexLock(&mon->lock);
> @@ -84,21 +86,33 @@ void qemuMonitorUnlock(qemuMonitorPtr mon)
>  }
>  
>  
> -static void qemuMonitorFree(qemuMonitorPtr mon, int lockDomain)
> +static void qemuMonitorFree(qemuMonitorPtr mon)
>  {
> -    VIR_DEBUG("mon=%p, lockDomain=%d", mon, lockDomain);
> -    if (mon->vm) {
> -        if (lockDomain)
> -            virDomainObjLock(mon->vm);
> -        if (!virDomainObjUnref(mon->vm) && lockDomain)
> -            virDomainObjUnlock(mon->vm);
> -    }
> +    VIR_DEBUG("mon=%p", mon);
>      if (virCondDestroy(&mon->notify) < 0)
>      {}
>      virMutexDestroy(&mon->lock);
>      VIR_FREE(mon);
>  }
>  
> +int qemuMonitorRef(qemuMonitorPtr mon)
> +{
> +    mon->refs++;
> +    return mon->refs;
> +}
> +
> +int qemuMonitorUnref(qemuMonitorPtr mon)
> +{
> +    mon->refs--;
> +
> +    if (mon->refs == 0) {
> +        qemuMonitorUnlock(mon);
> +        qemuMonitorFree(mon);
> +    }
> +
> +    return mon->refs;
> +}
> +
>  
>  static int
>  qemuMonitorOpenUnix(const char *monitor)
> @@ -348,6 +362,7 @@ qemuMonitorIO(int watch, int fd, int events, void *opaque) {
>      int quit = 0, failed = 0;
>  
>      qemuMonitorLock(mon);
> +    qemuMonitorRef(mon);
>      VIR_DEBUG("Monitor %p I/O on watch %d fd %d events %d", mon, watch, fd, events);
>  
>      if (mon->fd != fd || mon->watch != watch) {
> @@ -407,23 +422,17 @@ qemuMonitorIO(int watch, int fd, int events, void *opaque) {
>       * but is this safe ?  I think it is, because the callback
>       * will try to acquire the virDomainObjPtr mutex next */
>      if (failed || quit) {
> +        qemuMonitorEOFNotify eofCB = mon->eofCB;
> +        virDomainObjPtr vm = mon->vm;
>          /* Make sure anyone waiting wakes up now */
>          virCondSignal(&mon->notify);
> -        mon->eofcb = 1;
> -        qemuMonitorUnlock(mon);
> -        VIR_DEBUG("Triggering EOF callback error? %d", failed);
> -        mon->eofCB(mon, mon->vm, failed);
> -
> -        qemuMonitorLock(mon);
> -        if (mon->closed) {
> +        if (qemuMonitorUnref(mon) > 0)
>              qemuMonitorUnlock(mon);
> -            VIR_DEBUG("Delayed free of monitor %p", mon);
> -            qemuMonitorFree(mon, 1);
> -        } else {
> -            qemuMonitorUnlock(mon);
> -        }
> +        VIR_DEBUG("Triggering EOF callback error? %d", failed);
> +        (eofCB)(mon, vm, failed);
>      } else {
> -        qemuMonitorUnlock(mon);
> +        if (qemuMonitorUnref(mon) > 0)
> +            qemuMonitorUnlock(mon);
>      }
>  }
>  
> @@ -453,10 +462,10 @@ qemuMonitorOpen(virDomainObjPtr vm,
>          return NULL;
>      }
>      mon->fd = -1;
> +    mon->refs = 1;
>      mon->vm = vm;
>      mon->eofCB = eofCB;
>      qemuMonitorLock(mon);
> -    virDomainObjRef(vm);
>  
>      switch (vm->monitor_chr->type) {
>      case VIR_DOMAIN_CHR_TYPE_UNIX:
> @@ -512,10 +521,14 @@ cleanup:
>  }
>  
>  
> -void qemuMonitorClose(qemuMonitorPtr mon)
> +int qemuMonitorClose(qemuMonitorPtr mon)
>  {
> +    int refs;
> +
>      if (!mon)
> -        return;
> +        return 0;
> +
> +    VIR_DEBUG("mon=%p", mon);
>  
>      qemuMonitorLock(mon);
>      if (!mon->closed) {
> @@ -523,18 +536,17 @@ void qemuMonitorClose(qemuMonitorPtr mon)
>              virEventRemoveHandle(mon->watch);
>          if (mon->fd != -1)
>              close(mon->fd);
> -        /* NB: don't reset  fd / watch fields, since active
> -         * callback may still want them */
> +        /* NB: ordinarily one might immediately set mon->watch to -1
> +         * and mon->fd to -1, but there may be a callback active
> +         * that is still relying on these fields being valid. So
> +         * we merely close them, but not clear their values and
> +         * use this explicit 'closed' flag to track this state */
>          mon->closed = 1;
>      }
>  
> -    if (mon->eofcb) {
> -        VIR_DEBUG("Mark monitor to be deleted %p", mon);
> +    if ((refs = qemuMonitorUnref(mon)) > 0)
>          qemuMonitorUnlock(mon);
> -    } else {
> -        VIR_DEBUG("Delete monitor now %p", mon);
> -        qemuMonitorFree(mon, 0);
> -    }
> +    return refs;
>  }
>  
>  
> @@ -552,7 +564,6 @@ int qemuMonitorSend(qemuMonitorPtr mon,
>  
>      if (mon->eofcb) {
>          msg->lastErrno = EIO;
> -        qemuMonitorUnlock(mon);
>          return -1;
>      }
>  
> diff --git a/src/qemu/qemu_monitor.h b/src/qemu/qemu_monitor.h
> index 71688cb..27b43b7 100644
> --- a/src/qemu/qemu_monitor.h
> +++ b/src/qemu/qemu_monitor.h
> @@ -78,11 +78,14 @@ typedef int (*qemuMonitorDiskSecretLookup)(qemuMonitorPtr mon,
>  qemuMonitorPtr qemuMonitorOpen(virDomainObjPtr vm,
>                                 qemuMonitorEOFNotify eofCB);
>  
> -void qemuMonitorClose(qemuMonitorPtr mon);
> +int qemuMonitorClose(qemuMonitorPtr mon);
>  
>  void qemuMonitorLock(qemuMonitorPtr mon);
>  void qemuMonitorUnlock(qemuMonitorPtr mon);
>  
> +int qemuMonitorRef(qemuMonitorPtr mon);
> +int qemuMonitorUnref(qemuMonitorPtr mon);
> +
>  void qemuMonitorRegisterDiskSecretLookup(qemuMonitorPtr mon,
>                                           qemuMonitorDiskSecretLookup secretCB);
>  
> 
> -- 
> |: Red Hat, Engineering, London   -o-   http://people.redhat.com/berrange/ :|
> |: http://libvirt.org  -o-  http://virt-manager.org  -o-  http://ovirt.org :|
> |: http://autobuild.org       -o-         http://search.cpan.org/~danberr/ :|
> |: GnuPG: 7D3B9505  -o-  F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|
> 
> --
> Libvir-list mailing list
> Libvir-list at redhat.com
> https://www.redhat.com/mailman/listinfo/libvir-list
> 

-- 
-------------------------------------
Nikola CIPRICH
LinuxBox.cz, s.r.o.
28. rijna 168, 709 01 Ostrava

tel.:   +420 596 603 142
fax:    +420 596 621 273
mobil:  +420 777 093 799

www.linuxbox.cz

mobil servis: +420 737 238 656
email servis: servis at linuxbox.cz
-------------------------------------




More information about the libvir-list mailing list