[libvirt] RFC regarding libvirtd deadlock

Daniel P. Berrangé berrange at redhat.com
Thu Mar 8 10:04:47 UTC 2018


On Wed, Mar 07, 2018 at 04:52:26PM -0700, Jim Fehlig wrote:
> Doing something foolish like
> 
> # while true; do killall -HUP libvirtd; sleep 1; done
> # virsh create vm.xml
> 
> can deadlock libvirtd. Threads of interest are
> 
> Thread 4 (Thread 0x7fc13b53e700 (LWP 64084)):
> #0  0x00007fc13fba10bf in pthread_cond_wait@@GLIBC_2.3.2 () from
> /lib64/libpthread.so.0
> #1  0x00007fc14310213c in virCondWait (c=0x7fc110017fa8, m=0x7fc110017f80)
>     at util/virthread.c:154
> #2  0x00007fc1280244e9 in qemuMonitorSend (mon=0x7fc110017f70, msg=0x7fc13b53d240)
>     at qemu/qemu_monitor.c:1083
> #3  0x00007fc12803bf5a in qemuMonitorJSONCommandWithFd (mon=0x7fc110017f70,
>     cmd=0x7fc110017700, scm_fd=-1, reply=0x7fc13b53d318) at
> qemu/qemu_monitor_json.c:305
> #4  0x00007fc12803c09c in qemuMonitorJSONCommand (mon=0x7fc110017f70,
> cmd=0x7fc110017700,
>     reply=0x7fc13b53d318) at qemu/qemu_monitor_json.c:335
> ---Type <return> to continue, or q <return> to quit---
> #5  0x00007fc12803f116 in qemuMonitorJSONSetCapabilities (mon=0x7fc110017f70)
>     at qemu/qemu_monitor_json.c:1298
> #6  0x00007fc128026e14 in qemuMonitorSetCapabilities (mon=0x7fc110017f70)
>     at qemu/qemu_monitor.c:1697
> #7  0x00007fc127ffe250 in qemuProcessInitMonitor (driver=0x7fc12004e1e0,
>     vm=0x7fc110003d00, asyncJob=QEMU_ASYNC_JOB_START) at qemu/qemu_process.c:1763
> #8  0x00007fc127ffe564 in qemuConnectMonitor (driver=0x7fc12004e1e0,
> vm=0x7fc110003d00,
>     asyncJob=6, logCtxt=0x7fc1100089c0) at qemu/qemu_process.c:1835
> #9  0x00007fc127fff386 in qemuProcessWaitForMonitor (driver=0x7fc12004e1e0,
>     vm=0x7fc110003d00, asyncJob=6, logCtxt=0x7fc1100089c0) at
> qemu/qemu_process.c:2180
> #10 0x00007fc128009269 in qemuProcessLaunch (conn=0x7fc1100009a0,
> driver=0x7fc12004e1e0,
>     vm=0x7fc110003d00, asyncJob=QEMU_ASYNC_JOB_START, incoming=0x0, snapshot=0x0,
>     vmop=VIR_NETDEV_VPORT_PROFILE_OP_CREATE, flags=17) at qemu/qemu_process.c:6111
> #11 0x00007fc128009e85 in qemuProcessStart (conn=0x7fc1100009a0,
> driver=0x7fc12004e1e0,
>     vm=0x7fc110003d00, updatedCPU=0x0, asyncJob=QEMU_ASYNC_JOB_START,
> migrateFrom=0x0,
>     migrateFd=-1, migratePath=0x0, snapshot=0x0,
> vmop=VIR_NETDEV_VPORT_PROFILE_OP_CREATE,
>     flags=17) at qemu/qemu_process.c:6334
> #12 0x00007fc1280552f1 in qemuDomainCreateXML (conn=0x7fc1100009a0,
>     xml=0x7fc110000ed0 "<!--\nWARNING: THIS IS AN AUTO-GENERATED FILE.
> CHANGES TO IT ARE LIKELY TO BE\nOVERWRITTEN AND LOST. Changes to this xml
> configuration should be made using:\n  virsh edit sles12sp2-kvm\nor other
> applicati"..., flags=0) at qemu/qemu_driver.c:1776
> ...
> 
> Thread 1 (Thread 0x7fc143c66880 (LWP 64081)):
> #0  0x00007fc13fb9aac8 in __pthread_rwlock_wrlock_slow () from
> /lib64/libpthread.so.0
> #1  0x00007fc143101ffa in virRWLockWrite (m=0x7fc143678cc0 <updateLock>)
>     at util/virthread.c:122
> #2  0x00007fc1431a394f in virNWFilterWriteLockFilterUpdates () at
> conf/nwfilter_conf.c:159
> #3  0x00007fc12a5230a0 in nwfilterStateReload () at nwfilter/nwfilter_driver.c:299
> #4  0x00007fc1433170c2 in virStateReload () at libvirt.c:829
> #5  0x0000558c522d5686 in daemonReloadHandler (dmn=0x558c5328b230,
> sig=0x7ffe0a831e30,
>     opaque=0x0) at remote/remote_daemon.c:724
> #6  0x00007fc14321e3c7 in virNetDaemonSignalEvent (watch=2, fd=9, events=1,
>     opaque=0x558c5328b230) at rpc/virnetdaemon.c:654
> #7  0x00007fc143085cc7 in virEventPollDispatchHandles (nfds=11, fds=0x558c532cd930)
>     at util/vireventpoll.c:508
> #8  0x00007fc143086586 in virEventPollRunOnce () at util/vireventpoll.c:657
> #9  0x00007fc143084312 in virEventRunDefaultImpl () at util/virevent.c:327
> #10 0x00007fc14321ecb8 in virNetDaemonRun (dmn=0x558c5328b230) at
> rpc/virnetdaemon.c:858
> #11 0x0000558c522d7add in main (argc=3, argv=0x7ffe0a832758) at
> remote/remote_daemon.c:1496
> (gdb) thr 1
> [Switching to thread 1 (Thread 0x7fc143c66880 (LWP 64081))]
> #0  0x00007fc13fb9aac8 in __pthread_rwlock_wrlock_slow () from
> /lib64/libpthread.so.0
> (gdb) f 1
> #1  0x00007fc143101ffa in virRWLockWrite (m=0x7fc143678cc0 <updateLock>)
>     at util/virthread.c:122
> 122	    pthread_rwlock_wrlock(&m->lock);
> (gdb) p updateLock
> $1 = {lock = {__data = {__lock = 0, __nr_readers = 1, __readers_wakeup = 0,
>       __writer_wakeup = 0, __nr_readers_queued = 0, __nr_writers_queued = 1,
> __writer = 0,
>       __shared = 0, __rwelision = 0 '\000', __pad1 = "\000\000\000\000\000\000",
>       __pad2 = 0, __flags = 0},
>     __size = "\000\000\000\000\001", '\000' <repeats 15 times>, "\001",
> '\000' <repeats 34 times>, __align = 4294967296}}
> 
> Reloading of the nwfilter driver is stuck waiting for a write lock, which
> already has a reader (from qemuDomainCreateXML) in the critical section.
> 
> I'm not really sure how to best fix this deadlock. One hack that avoids the
> deadlock is to load the nwfilter driver after the hypervisor drivers, as the
> below patch does. This is obviously problematic if loading a hypervisor
> driver requires the nwfilter one.

Yeah, that's not going to work for the reason you describe - the secondary
drivers all need to be loaded before HV drivers.

> Any suggestions on better fixes for this issue?

IIUC, the key problem is that the QEMU thread is waiting for monitor
IO which is processed by the main event loop thread. The HUP reload
is also run from the main event loop thread, which prevents the
monitor IO for QEMU being processed.

It might be sufficient to just move the virStateReload() call into
a background thread, so that it doesn't block ongoing work needed
by other threads

> 
> Regards,
> Jim
> 
> diff --git a/src/remote/remote_daemon.c b/src/remote/remote_daemon.c
> index f8082f62f..7a6655a23 100644
> --- a/src/remote/remote_daemon.c
> +++ b/src/remote/remote_daemon.c
> @@ -327,9 +327,6 @@ static void daemonInitialize(void)
>  #ifdef WITH_SECRETS
>      VIR_DAEMON_LOAD_MODULE(secretRegister, "secret");
>  #endif
> -#ifdef WITH_NWFILTER
> -    VIR_DAEMON_LOAD_MODULE(nwfilterRegister, "nwfilter");
> -#endif
>  #ifdef WITH_XEN
>      VIR_DAEMON_LOAD_MODULE(xenRegister, "xen");
>  #endif
> @@ -354,6 +351,9 @@ static void daemonInitialize(void)
>  #ifdef WITH_VZ
>      VIR_DAEMON_LOAD_MODULE(vzRegister, "vz");
>  #endif
> +#ifdef WITH_NWFILTER
> +    VIR_DAEMON_LOAD_MODULE(nwfilterRegister, "nwfilter");
> +#endif
>  }
>  #undef VIR_DAEMON_LOAD_MODULE
> 
> --
> libvir-list mailing list
> libvir-list at redhat.com
> https://www.redhat.com/mailman/listinfo/libvir-list

Regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|




More information about the libvir-list mailing list