[libvirt] [PATCH 03/14] Fix crash when deleting monitor while a command is in progress
Nikola Ciprich
extmaillist at linuxbox.cz
Mon Dec 7 15:40:48 UTC 2009
Hi Daniel,
I'm giving it some heavy testing and seems to be rock solid! Thanks a
lot for Your effort!
I'm going to add few tens of guests to torture libvirt with multiple simultaneous
accesses even more...
regards
nik
On Mon, Dec 07, 2009 at 02:18:44PM +0000, Daniel P. Berrange wrote:
> On Thu, Dec 03, 2009 at 11:12:40AM +0000, Daniel P. Berrange wrote:
> > On Thu, Dec 03, 2009 at 12:47:02AM +0100, Matthias Bolte wrote:
> > > 2009/11/26 Daniel P. Berrange <berrange at redhat.com>:
> > > > If QEMU shuts down while we're in the middle of processing a
> > > > monitor command, the monitor will be freed, and upon cleaning
> > > > up we attempt to do qemuMonitorUnlock(priv->mon) when priv->mon
> > > > is NULL.
> > > >
> > > > To address this we introduce proper reference counting into
> > > > the qemuMonitorPtr object, and hold an extra reference whenever
> > > > executing a command.
> > > >
> > > > * src/qemu/qemu_driver.c: Hold a reference on the monitor while
> > > > executing commands, and only NULL-ify the priv->mon field when
> > > > the last reference is released
> > > > * src/qemu/qemu_monitor.h, src/qemu/qemu_monitor.c: Add reference
> > > > counting to handle safe deletion of monitor objects
> > >
> > > The locking pattern below results in destroying a locked mutex. It
> > > this intended?
> >
> > No, that's a bug, the qemuMonitorUnref() call itself should have
> > called unlock if the ref count hit 0, before destroying it.
> >
> > > qemuMonitorLock(mon);
> > > [...]
> > > if (qemuMonitorUnref(mon) > 0)
> > > qemuMonitorUnlock(mon);
> > >
> > > Well, this patch makes the TCK deadlock for me, seems to be a lock
> > > ordering issue combined with a race condition; it doesn't happen every
> > > run. I don't understand all details of the locking and refcounting
> > > scheme of the QEMU monitor yet, it's quite complex and gets even more
> > > complex.
> >
> > Yes, that problem shown by valgrind will indeed deadlock. I'll fix
> > this. The domain object lock must never be acquired if the thread
> > holds the monitor lock already. We must have strict ordering from
> > top to bottom (driver -> domain object -> qemu monitor)
>
>
> This is the patch I'm proposing instead. It removes the ref count adjust
> of virDomainObjPtr from qemuMonitor, since that requires that you have
> the bad lock ordering. Instead that extra ref count is added/removed by
> the qemu driver code. This means the qemuMonitor never has to touch any
> locks on virDomainObjPtr which complies with our strict top->bottom
> ordering requirement.
>
>
> commit c2b32f964b0eed861dea11e5037993e3a3c3fb3d
> Author: Daniel P. Berrange <berrange at redhat.com>
> Date: Thu Nov 26 13:29:29 2009 +0000
>
> Fix crash when deleting monitor while a command is in progress
>
> If QEMU shuts down while we're in the middle of processing a
> monitor command, the monitor will be freed, and upon cleaning
> up we attempt to do qemuMonitorUnlock(priv->mon) when priv->mon
> is NULL.
>
> To address this we introduce proper reference counting into
> the qemuMonitorPtr object, and hold an extra reference whenever
> executing a command.
>
> * src/qemu/qemu_driver.c: Hold a reference on the monitor while
> executing commands, and only NULL-ify the priv->mon field when
> the last reference is released
> * src/qemu/qemu_monitor.h, src/qemu/qemu_monitor.c: Add reference
> counting to handle safe deletion of monitor objects
>
> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
> index 43d20ea..fd47dc0 100644
> --- a/src/qemu/qemu_driver.c
> +++ b/src/qemu/qemu_driver.c
> @@ -272,6 +272,7 @@ static void qemuDomainObjEnterMonitor(virDomainObjPtr obj)
> qemuDomainObjPrivatePtr priv = obj->privateData;
>
> qemuMonitorLock(priv->mon);
> + qemuMonitorRef(priv->mon);
> virDomainObjUnlock(obj);
> }
>
> @@ -283,9 +284,16 @@ static void qemuDomainObjEnterMonitor(virDomainObjPtr obj)
> static void qemuDomainObjExitMonitor(virDomainObjPtr obj)
> {
> qemuDomainObjPrivatePtr priv = obj->privateData;
> + int refs;
>
> + refs = qemuMonitorUnref(priv->mon);
> qemuMonitorUnlock(priv->mon);
> virDomainObjLock(obj);
> +
> + if (refs == 0) {
> + virDomainObjUnref(obj);
> + priv->mon = NULL;
> + }
> }
>
>
> @@ -302,6 +310,7 @@ static void qemuDomainObjEnterMonitorWithDriver(struct qemud_driver *driver, vir
> qemuDomainObjPrivatePtr priv = obj->privateData;
>
> qemuMonitorLock(priv->mon);
> + qemuMonitorRef(priv->mon);
> virDomainObjUnlock(obj);
> qemuDriverUnlock(driver);
> }
> @@ -315,10 +324,17 @@ static void qemuDomainObjEnterMonitorWithDriver(struct qemud_driver *driver, vir
> static void qemuDomainObjExitMonitorWithDriver(struct qemud_driver *driver, virDomainObjPtr obj)
> {
> qemuDomainObjPrivatePtr priv = obj->privateData;
> + int refs;
>
> + refs = qemuMonitorUnref(priv->mon);
> qemuMonitorUnlock(priv->mon);
> qemuDriverLock(driver);
> virDomainObjLock(obj);
> +
> + if (refs == 0) {
> + virDomainObjUnref(obj);
> + priv->mon = NULL;
> + }
> }
>
>
> @@ -653,6 +669,10 @@ qemuConnectMonitor(virDomainObjPtr vm)
> {
> qemuDomainObjPrivatePtr priv = vm->privateData;
>
> + /* Hold an extra reference because we can't allow 'vm' to be
> + * deleted while the monitor is active */
> + virDomainObjRef(vm);
> +
> if ((priv->mon = qemuMonitorOpen(vm, qemuHandleMonitorEOF)) == NULL) {
> VIR_ERROR(_("Failed to connect monitor for %s\n"), vm->def->name);
> return -1;
> @@ -2400,8 +2420,9 @@ static void qemudShutdownVMDaemon(virConnectPtr conn,
> _("Failed to send SIGTERM to %s (%d)"),
> vm->def->name, vm->pid);
>
> - if (priv->mon) {
> - qemuMonitorClose(priv->mon);
> + if (priv->mon &&
> + qemuMonitorClose(priv->mon) == 0) {
> + virDomainObjUnref(vm);
> priv->mon = NULL;
> }
>
> diff --git a/src/qemu/qemu_monitor.c b/src/qemu/qemu_monitor.c
> index f0ef81b..a2e10bc 100644
> --- a/src/qemu/qemu_monitor.c
> +++ b/src/qemu/qemu_monitor.c
> @@ -42,7 +42,7 @@ struct _qemuMonitor {
> virMutex lock;
> virCond notify;
>
> - virDomainObjPtr dom;
> + int refs;
>
> int fd;
> int watch;
> @@ -67,12 +67,14 @@ struct _qemuMonitor {
> * the next monitor msg */
> int lastErrno;
>
> - /* If the monitor callback is currently active */
> + /* If the monitor EOF callback is currently active (stops more commands being run) */
> unsigned eofcb: 1;
> - /* If the monitor callback should free the closed monitor */
> + /* If the monitor is in process of shutting down */
> unsigned closed: 1;
> +
> };
>
> +
> void qemuMonitorLock(qemuMonitorPtr mon)
> {
> virMutexLock(&mon->lock);
> @@ -84,21 +86,33 @@ void qemuMonitorUnlock(qemuMonitorPtr mon)
> }
>
>
> -static void qemuMonitorFree(qemuMonitorPtr mon, int lockDomain)
> +static void qemuMonitorFree(qemuMonitorPtr mon)
> {
> - VIR_DEBUG("mon=%p, lockDomain=%d", mon, lockDomain);
> - if (mon->vm) {
> - if (lockDomain)
> - virDomainObjLock(mon->vm);
> - if (!virDomainObjUnref(mon->vm) && lockDomain)
> - virDomainObjUnlock(mon->vm);
> - }
> + VIR_DEBUG("mon=%p", mon);
> if (virCondDestroy(&mon->notify) < 0)
> {}
> virMutexDestroy(&mon->lock);
> VIR_FREE(mon);
> }
>
> +int qemuMonitorRef(qemuMonitorPtr mon)
> +{
> + mon->refs++;
> + return mon->refs;
> +}
> +
> +int qemuMonitorUnref(qemuMonitorPtr mon)
> +{
> + mon->refs--;
> +
> + if (mon->refs == 0) {
> + qemuMonitorUnlock(mon);
> + qemuMonitorFree(mon);
> + }
> +
> + return mon->refs;
> +}
> +
>
> static int
> qemuMonitorOpenUnix(const char *monitor)
> @@ -348,6 +362,7 @@ qemuMonitorIO(int watch, int fd, int events, void *opaque) {
> int quit = 0, failed = 0;
>
> qemuMonitorLock(mon);
> + qemuMonitorRef(mon);
> VIR_DEBUG("Monitor %p I/O on watch %d fd %d events %d", mon, watch, fd, events);
>
> if (mon->fd != fd || mon->watch != watch) {
> @@ -407,23 +422,17 @@ qemuMonitorIO(int watch, int fd, int events, void *opaque) {
> * but is this safe ? I think it is, because the callback
> * will try to acquire the virDomainObjPtr mutex next */
> if (failed || quit) {
> + qemuMonitorEOFNotify eofCB = mon->eofCB;
> + virDomainObjPtr vm = mon->vm;
> /* Make sure anyone waiting wakes up now */
> virCondSignal(&mon->notify);
> - mon->eofcb = 1;
> - qemuMonitorUnlock(mon);
> - VIR_DEBUG("Triggering EOF callback error? %d", failed);
> - mon->eofCB(mon, mon->vm, failed);
> -
> - qemuMonitorLock(mon);
> - if (mon->closed) {
> + if (qemuMonitorUnref(mon) > 0)
> qemuMonitorUnlock(mon);
> - VIR_DEBUG("Delayed free of monitor %p", mon);
> - qemuMonitorFree(mon, 1);
> - } else {
> - qemuMonitorUnlock(mon);
> - }
> + VIR_DEBUG("Triggering EOF callback error? %d", failed);
> + (eofCB)(mon, vm, failed);
> } else {
> - qemuMonitorUnlock(mon);
> + if (qemuMonitorUnref(mon) > 0)
> + qemuMonitorUnlock(mon);
> }
> }
>
> @@ -453,10 +462,10 @@ qemuMonitorOpen(virDomainObjPtr vm,
> return NULL;
> }
> mon->fd = -1;
> + mon->refs = 1;
> mon->vm = vm;
> mon->eofCB = eofCB;
> qemuMonitorLock(mon);
> - virDomainObjRef(vm);
>
> switch (vm->monitor_chr->type) {
> case VIR_DOMAIN_CHR_TYPE_UNIX:
> @@ -512,10 +521,14 @@ cleanup:
> }
>
>
> -void qemuMonitorClose(qemuMonitorPtr mon)
> +int qemuMonitorClose(qemuMonitorPtr mon)
> {
> + int refs;
> +
> if (!mon)
> - return;
> + return 0;
> +
> + VIR_DEBUG("mon=%p", mon);
>
> qemuMonitorLock(mon);
> if (!mon->closed) {
> @@ -523,18 +536,17 @@ void qemuMonitorClose(qemuMonitorPtr mon)
> virEventRemoveHandle(mon->watch);
> if (mon->fd != -1)
> close(mon->fd);
> - /* NB: don't reset fd / watch fields, since active
> - * callback may still want them */
> + /* NB: ordinarily one might immediately set mon->watch to -1
> + * and mon->fd to -1, but there may be a callback active
> + * that is still relying on these fields being valid. So
> + * we merely close them, but not clear their values and
> + * use this explicit 'closed' flag to track this state */
> mon->closed = 1;
> }
>
> - if (mon->eofcb) {
> - VIR_DEBUG("Mark monitor to be deleted %p", mon);
> + if ((refs = qemuMonitorUnref(mon)) > 0)
> qemuMonitorUnlock(mon);
> - } else {
> - VIR_DEBUG("Delete monitor now %p", mon);
> - qemuMonitorFree(mon, 0);
> - }
> + return refs;
> }
>
>
> @@ -552,7 +564,6 @@ int qemuMonitorSend(qemuMonitorPtr mon,
>
> if (mon->eofcb) {
> msg->lastErrno = EIO;
> - qemuMonitorUnlock(mon);
> return -1;
> }
>
> diff --git a/src/qemu/qemu_monitor.h b/src/qemu/qemu_monitor.h
> index 71688cb..27b43b7 100644
> --- a/src/qemu/qemu_monitor.h
> +++ b/src/qemu/qemu_monitor.h
> @@ -78,11 +78,14 @@ typedef int (*qemuMonitorDiskSecretLookup)(qemuMonitorPtr mon,
> qemuMonitorPtr qemuMonitorOpen(virDomainObjPtr vm,
> qemuMonitorEOFNotify eofCB);
>
> -void qemuMonitorClose(qemuMonitorPtr mon);
> +int qemuMonitorClose(qemuMonitorPtr mon);
>
> void qemuMonitorLock(qemuMonitorPtr mon);
> void qemuMonitorUnlock(qemuMonitorPtr mon);
>
> +int qemuMonitorRef(qemuMonitorPtr mon);
> +int qemuMonitorUnref(qemuMonitorPtr mon);
> +
> void qemuMonitorRegisterDiskSecretLookup(qemuMonitorPtr mon,
> qemuMonitorDiskSecretLookup secretCB);
>
>
> --
> |: Red Hat, Engineering, London -o- http://people.redhat.com/berrange/ :|
> |: http://libvirt.org -o- http://virt-manager.org -o- http://ovirt.org :|
> |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :|
> |: GnuPG: 7D3B9505 -o- F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|
>
> --
> Libvir-list mailing list
> Libvir-list at redhat.com
> https://www.redhat.com/mailman/listinfo/libvir-list
>
--
-------------------------------------
Nikola CIPRICH
LinuxBox.cz, s.r.o.
28. rijna 168, 709 01 Ostrava
tel.: +420 596 603 142
fax: +420 596 621 273
mobil: +420 777 093 799
www.linuxbox.cz
mobil servis: +420 737 238 656
email servis: servis at linuxbox.cz
-------------------------------------
More information about the libvir-list
mailing list