[libvirt] [PATCHv2] util: fix libvirtd startup failure due to netlink error
Daniel P. Berrange
berrange at redhat.com
Thu May 3 15:48:25 UTC 2012
On Thu, May 03, 2012 at 11:10:49AM -0400, Laine Stump wrote:
> This solves the problem detailed in:
>
> https://bugzilla.redhat.com/show_bug.cgi?id=816465
>
> and further detailed in
>
> https://www.redhat.com/archives/libvir-list/2012-May/msg00202.htm
>
> A short explanation is included in the comments of the patch itself.
>
> Even with ACK, I will wait to push this until I have verification that
> it does not break lldpad<-->libvirtd communication (if it does, I may
> need to use the nl_handle allocated during virNetlinkStartup() for
> virNetlinkEventServiceStart()).
> ---
> daemon/libvirtd.c | 6 +++++
> src/libvirt_private.syms | 2 ++
> src/util/virnetlink.c | 67 +++++++++++++++++++++++++++++++++++++++++++++-
> src/util/virnetlink.h | 5 +++-
> 4 files changed, 78 insertions(+), 2 deletions(-)
>
> diff --git a/daemon/libvirtd.c b/daemon/libvirtd.c
> index b098f6a..5d57b50 100644
> --- a/daemon/libvirtd.c
> +++ b/daemon/libvirtd.c
> @@ -1007,6 +1007,11 @@ int main(int argc, char **argv) {
> goto cleanup;
> }
>
> + if (virNetlinkStartup() < 0) {
> + ret = VIR_DAEMON_ERR_INIT;
> + goto cleanup;
> + }
> +
> if (!(srv = virNetServerNew(config->min_workers,
> config->max_workers,
> config->prio_workers,
> @@ -1143,6 +1148,7 @@ cleanup:
> virNetServerProgramFree(qemuProgram);
> virNetServerClose(srv);
> virNetServerFree(srv);
> + virNetlinkShutdown();
> if (statuswrite != -1) {
> if (ret != 0) {
> /* Tell parent of daemon what failed */
> diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
> index d4038b2..e911774 100644
> --- a/src/libvirt_private.syms
> +++ b/src/libvirt_private.syms
> @@ -1330,6 +1330,8 @@ virNetlinkEventRemoveClient;
> virNetlinkEventServiceIsRunning;
> virNetlinkEventServiceStop;
> virNetlinkEventServiceStart;
> +virNetlinkShutdown;
> +virNetlinkStartup;
>
>
> # virnetmessage.h
> diff --git a/src/util/virnetlink.c b/src/util/virnetlink.c
> index b2e9d51..a249e94 100644
> --- a/src/util/virnetlink.c
> +++ b/src/util/virnetlink.c
> @@ -1,5 +1,5 @@
> /*
> - * Copyright (C) 2010-2011 Red Hat, Inc.
> + * Copyright (C) 2010-2012 Red Hat, Inc.
> * Copyright (C) 2010-2012 IBM Corporation
> *
> * This library is free software; you can redistribute it and/or
> @@ -88,10 +88,63 @@ static int nextWatch = 1;
> # define NETLINK_EVENT_ALLOC_EXTENT 10
>
> static virNetlinkEventSrvPrivatePtr server = NULL;
> +static struct nl_handle *placeholder_nlhandle = NULL;
>
> /* Function definitions */
>
> /**
> + * virNetlinkStartup:
> + *
> + * Perform any initialization that needs to take place before the
> + * program starts up worker threads. This is currently used to assure
> + * that an nl_handle is allocated prior to any attempts to bind a
> + * netlink socket. For a discussion of why this is necessary, please
> + * see the following email message:
> + *
> + * https://www.redhat.com/archives/libvir-list/2012-May/msg00202.html
> + *
> + * The short version is that, without this placeholder allocation of
> + * an nl_handle that is never used, it is possible for nl_connect() in
> + * one thread to collide with a direct bind() of a netlink socket in
> + * another thread, leading to failure of the operation (which could
> + * lead to failure of libvirtd to start). Since getaddrinfo() (used by
> + * libvirtd in virSocketAddrParse, which is called quite frequently
> + * during startup) directly calls bind() on a netlink socket, this is
> + * actually a very common occurence (15-20% failure rate on some
> + * hardware).
> + *
> + * Returns 0 on success, -1 on failure.
> + */
> +int
> +virNetlinkStartup(void)
> +{
> + if (placeholder_nlhandle)
> + return 0;
> + placeholder_nlhandle = nl_handle_alloc();
> + if (!placeholder_nlhandle) {
> + virReportSystemError(errno, "%s",
> + _("cannot allocate placeholder nlhandle for netlink"));
> + return -1;
> + }
> + return 0;
> +}
> +
> +/**
> + * virNetlinkShutdown:
> + *
> + * Undo any initialization done by virNetlinkStartup. This currently
> + * destroys the placeholder nl_handle.
> + */
> +void
> +virNetlinkShutdown(void)
> +{
> + if (placeholder_nlhandle) {
> + nl_handle_destroy(placeholder_nlhandle);
> + placeholder_nlhandle = NULL;
> + }
> +}
> +
> +/**
> * virNetlinkCommand:
> * @nlmsg: pointer to netlink message
> * @respbuf: pointer to pointer where response buffer will be allocated
> @@ -535,6 +588,18 @@ static const char *unsupported = N_("libnl was not available at build time");
> static const char *unsupported = N_("not supported on non-linux platforms");
> # endif
>
> +int
> +virNetlinkStartup(void)
> +{
> + return 0;
> +}
> +
> +void
> +virNetlinkShutdown(void)
> +{
> + return;
> +}
> +
> int virNetlinkCommand(struct nl_msg *nl_msg ATTRIBUTE_UNUSED,
> unsigned char **respbuf ATTRIBUTE_UNUSED,
> unsigned int *respbuflen ATTRIBUTE_UNUSED,
> diff --git a/src/util/virnetlink.h b/src/util/virnetlink.h
> index a72612e..93df59a 100644
> --- a/src/util/virnetlink.h
> +++ b/src/util/virnetlink.h
> @@ -1,5 +1,5 @@
> /*
> - * Copyright (C) 2010-2011 Red Hat, Inc.
> + * Copyright (C) 2010-2012 Red Hat, Inc.
> * Copyright (C) 2010-2012 IBM Corporation
> *
> * This library is free software; you can redistribute it and/or
> @@ -35,6 +35,9 @@ struct nlattr;
>
> # endif /* __linux__ */
>
> +int virNetlinkStartup(void);
> +void virNetlinkShutdown(void);
> +
> int virNetlinkCommand(struct nl_msg *nl_msg,
> unsigned char **respbuf, unsigned int *respbuflen,
> int nl_pid);
ACK, assuming real world testing confirms it fixes the problm
Daniel
--
|: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :|
|: http://libvirt.org -o- http://virt-manager.org :|
|: http://autobuild.org -o- http://search.cpan.org/~danberr/ :|
|: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|
More information about the libvir-list
mailing list