[libvirt] [PATCHv3 1/4] util: fix libvirtd startup failure due to netlink error

Laine Stump laine at laine.org
Fri May 4 18:51:13 UTC 2012


This solves the problem detailed in:

  https://bugzilla.redhat.com/show_bug.cgi?id=816465

and further detailed in

  https://www.redhat.com/archives/libvir-list/2012-May/msg00202.htm

A short explanation is included in the comments of the patch itself.

Even with ACK, I will wait to push this until I have verification that
it does not break lldpad<-->libvirtd communication (if it does, I may
need to use the nl_handle allocated during virNetlinkStartup() for
virNetlinkEventServiceStart()).
---
 daemon/libvirtd.c        |    6 +++++
 src/libvirt_private.syms |    2 ++
 src/util/virnetlink.c    |   65 ++++++++++++++++++++++++++++++++++++++++++++++
 src/util/virnetlink.h    |    5 +++-
 4 files changed, 77 insertions(+), 1 deletion(-)

diff --git a/daemon/libvirtd.c b/daemon/libvirtd.c
index b098f6a..5d57b50 100644
--- a/daemon/libvirtd.c
+++ b/daemon/libvirtd.c
@@ -1007,6 +1007,11 @@ int main(int argc, char **argv) {
         goto cleanup;
     }
 
+    if (virNetlinkStartup() < 0) {
+        ret = VIR_DAEMON_ERR_INIT;
+        goto cleanup;
+    }
+
     if (!(srv = virNetServerNew(config->min_workers,
                                 config->max_workers,
                                 config->prio_workers,
@@ -1143,6 +1148,7 @@ cleanup:
     virNetServerProgramFree(qemuProgram);
     virNetServerClose(srv);
     virNetServerFree(srv);
+    virNetlinkShutdown();
     if (statuswrite != -1) {
         if (ret != 0) {
             /* Tell parent of daemon what failed */
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index 88f8a21..9b20dd4 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -1333,6 +1333,8 @@ virNetlinkEventRemoveClient;
 virNetlinkEventServiceIsRunning;
 virNetlinkEventServiceStop;
 virNetlinkEventServiceStart;
+virNetlinkShutdown;
+virNetlinkStartup;
 
 
 # virnetmessage.h
diff --git a/src/util/virnetlink.c b/src/util/virnetlink.c
index 2cbf32a..1b64033 100644
--- a/src/util/virnetlink.c
+++ b/src/util/virnetlink.c
@@ -98,10 +98,63 @@ static int nextWatch = 1;
 # define NETLINK_EVENT_ALLOC_EXTENT 10
 
 static virNetlinkEventSrvPrivatePtr server = NULL;
+static struct nl_handle *placeholder_nlhandle = NULL;
 
 /* Function definitions */
 
 /**
+ * virNetlinkStartup:
+ *
+ * Perform any initialization that needs to take place before the
+ * program starts up worker threads. This is currently used to assure
+ * that an nl_handle is allocated prior to any attempts to bind a
+ * netlink socket. For a discussion of why this is necessary, please
+ * see the following email message:
+ *
+ *   https://www.redhat.com/archives/libvir-list/2012-May/msg00202.html
+ *
+ * The short version is that, without this placeholder allocation of
+ * an nl_handle that is never used, it is possible for nl_connect() in
+ * one thread to collide with a direct bind() of a netlink socket in
+ * another thread, leading to failure of the operation (which could
+ * lead to failure of libvirtd to start). Since getaddrinfo() (used by
+ * libvirtd in virSocketAddrParse, which is called quite frequently
+ * during startup) directly calls bind() on a netlink socket, this is
+ * actually a very common occurence (15-20% failure rate on some
+ * hardware).
+ *
+ * Returns 0 on success, -1 on failure.
+ */
+int
+virNetlinkStartup(void)
+{
+    if (placeholder_nlhandle)
+        return 0;
+    placeholder_nlhandle = nl_handle_alloc();
+    if (!placeholder_nlhandle) {
+        virReportSystemError(errno, "%s",
+                             _("cannot allocate placeholder nlhandle for netlink"));
+        return -1;
+    }
+    return 0;
+}
+
+/**
+ * virNetlinkShutdown:
+ *
+ * Undo any initialization done by virNetlinkStartup. This currently
+ * destroys the placeholder nl_handle.
+ */
+void
+virNetlinkShutdown(void)
+{
+    if (placeholder_nlhandle) {
+        nl_handle_destroy(placeholder_nlhandle);
+        placeholder_nlhandle = NULL;
+    }
+}
+
+/**
  * virNetlinkCommand:
  * @nlmsg: pointer to netlink message
  * @respbuf: pointer to pointer where response buffer will be allocated
@@ -546,6 +599,18 @@ static const char *unsupported = N_("libnl was not available at build time");
 static const char *unsupported = N_("not supported on non-linux platforms");
 # endif
 
+int
+virNetlinkStartup(void)
+{
+    return 0;
+}
+
+void
+virNetlinkShutdown(void)
+{
+    return;
+}
+
 int virNetlinkCommand(struct nl_msg *nl_msg ATTRIBUTE_UNUSED,
            unsigned char **respbuf ATTRIBUTE_UNUSED,
            unsigned int *respbuflen ATTRIBUTE_UNUSED,
diff --git a/src/util/virnetlink.h b/src/util/virnetlink.h
index a72612e..93df59a 100644
--- a/src/util/virnetlink.h
+++ b/src/util/virnetlink.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2010-2011 Red Hat, Inc.
+ * Copyright (C) 2010-2012 Red Hat, Inc.
  * Copyright (C) 2010-2012 IBM Corporation
  *
  * This library is free software; you can redistribute it and/or
@@ -35,6 +35,9 @@ struct nlattr;
 
 # endif /* __linux__ */
 
+int virNetlinkStartup(void);
+void virNetlinkShutdown(void);
+
 int virNetlinkCommand(struct nl_msg *nl_msg,
                       unsigned char **respbuf, unsigned int *respbuflen,
                       int nl_pid);
-- 
1.7.10




More information about the libvir-list mailing list