[libvirt] [PATCH v4 1/1] util: Add netlink event handling to virnetlink.c

Laine Stump laine at laine.org
Mon Feb 13 18:31:51 UTC 2012


On 02/09/2012 10:36 AM, D. Herrendoerfer wrote:
> From: "D. Herrendoerfer" <d.herrendoerfer at herrendoerfer.name>
>
> This code adds a netlink event interface to libvirt.
> It is based upon the event_poll code and makes use of
> it. An event is generated for each netlink message sent
> to the libvirt pid.
>
> Signed-off-by: D. Herrendoerfer <d.herrendoerfer at herrendoerfer.name>
> ---
>  daemon/libvirtd.c        |    8 +
>  src/libvirt_private.syms |    6 +
>  src/util/virnetlink.c    |  436 +++++++++++++++++++++++++++++++++++++++++++++-
>  src/util/virnetlink.h    |   29 +++
>  4 files changed, 478 insertions(+), 1 deletions(-)
>
> diff --git a/daemon/libvirtd.c b/daemon/libvirtd.c
> index b1b542b..ca8074d 100644
> --- a/daemon/libvirtd.c
> +++ b/daemon/libvirtd.c
> @@ -47,6 +47,7 @@
>  #include "conf.h"
>  #include "memory.h"
>  #include "conf.h"
> +#include "virnetlink.h"
>  #include "virnetserver.h"
>  #include "threads.h"
>  #include "remote.h"
> @@ -1598,6 +1599,12 @@ int main(int argc, char **argv) {
>          goto cleanup;
>      }
>  
> +    /* Register the netlink event service */
> +    if (virNetlinkEventServiceStart() < 0) {
> +        ret = VIR_DAEMON_ERR_NETWORK;
> +        goto cleanup;
> +    }
> +
>      /* Run event loop. */
>      virNetServerRun(srv);
>  
> @@ -1607,6 +1614,7 @@ int main(int argc, char **argv) {
>                  0, "shutdown", NULL);
>  
>  cleanup:
> +    virNetlinkEventServiceStop();
>      virNetServerProgramFree(remoteProgram);
>      virNetServerProgramFree(qemuProgram);
>      virNetServerClose(srv);
> diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
> index d6ad36c..008470e 100644
> --- a/src/libvirt_private.syms
> +++ b/src/libvirt_private.syms
> @@ -1258,6 +1258,12 @@ virNetDevVPortProfileOpTypeToString;
>  
>  #virnetlink.h
>  virNetlinkCommand;
> +virNetlinkEventServiceIsRunning;
> +virNetlinkEventServiceStop;
> +virNetlinkEventServiceStart;
> +virNetlinkEventAddClient;
> +virNetlinkEventRemoveClient;
> +
>  
>  
>  # virnetmessage.h
> diff --git a/src/util/virnetlink.c b/src/util/virnetlink.c
> index d03d171..7e70251 100644
> --- a/src/util/virnetlink.c
> +++ b/src/util/virnetlink.c
> @@ -35,7 +35,10 @@
>  #include <sys/types.h>
>  
>  #include "virnetlink.h"
> +#include "logging.h"
>  #include "memory.h"
> +#include "threads.h"
> +#include "virmacaddr.h"
>  #include "virterror_internal.h"
>  
>  #define VIR_FROM_THIS VIR_FROM_NET
> @@ -46,6 +49,54 @@
>  
>  #define NETLINK_ACK_TIMEOUT_S  2
>  
> +#if defined(__linux__) && defined(HAVE_LIBNL)
> +/* State for a single netlink event handle */
> +struct virNetlinkEventHandle {
> +    int watch;
> +    virNetlinkEventHandleCallback cb;
> +    void *opaque;
> +    unsigned char macaddr[VIR_MAC_BUFLEN];
> +    int deleted;
> +};
> +
> +/* State for the main netlink event loop */
> +struct virNetlinkEventLoop {
> +    virMutex lock;
> +    int handled;
> +    size_t handlesCount;
> +    size_t handlesAlloc;
> +    struct virNetlinkEventHandle *handles;
> +};
> +
> +typedef struct _virNetlinkEventSrvPrivate virNetlinkEventSrvPrivate;
> +typedef virNetlinkEventSrvPrivate *virNetlinkEventSrvPrivatePtr;
> +struct _virNetlinkEventSrvPrivate {
> +    virMutex lock;
> +    int eventwatch;
> +    int netlinkfd;
> +    struct nl_handle *netlinknh;
> +};

After looking through the code and spending some time trying to
understand it, I can't see any reason why these two structures need to
be separate, or to have separate locks. The design would be simpler if
there was a single structure and a single lock. (Can you point out any
instance where one thread would need one of the locks but not the other,
and another thread could benefit from having only that other lock?

What I can see is:


setupNetlinkEventServer (and therefore virNetlinkEventServiceStart)
virNetlinkEventServiceStop

   Since both of these should only be called when there's only a single thread
   active, it shouldn't really matter how many locks there are on what pieces
   of the data.

virNetlinkEventCallback

   This locks both (first srv->lock, then eventLoop.lock), and outside of the srv-lock
   just does the "actual" removal of a client with eventLoop unlocked.

virNetlinkEventAddClient
virNetlinkEventRemoveClient

   These lock only the eventloop data structure, and when an client is "removed"
   they just mark it to be deleted later. I don't see anything preventing these
   functions from locking srv->lock, in which case they could just directly move
   the client items into DELETED state. (this in turn would mean that everything in
   virNetlinkEventCallback could be put inside srv->lock, and thus the need for
   eventLoop.lock is eliminated).


Having the extra lock doesn't gain anything, and removing it makes for less code to maintain.


> +
> +enum virNetlinkDeleteMode {
> +    VIR_NETLINK_HANDLE_VALID,
> +    VIR_NETLINK_HANDLE_DELETE,
> +    VIR_NETLINK_HANDLE_FREE,
> +};
> +
> +/* Only have one event loop */
> +static struct virNetlinkEventLoop eventLoop;
> +
> +/* Unique ID for the next netlink watch to be registered */
> +static int nextWatch = 1;
> +
> +/* Allocate extra slots for virEventPollHandle/virEventPollTimeout
> + records in this multiple */
> +#define NETLINK_EVENT_ALLOC_EXTENT 10
> +
> +static virNetlinkEventSrvPrivatePtr server = 0;


Use NULL instead of 0 to initialize pointers (just makes it more obvious
that it's a pointer :-)

> +
> +/* Function definitions */
> +
>  /**
>   * virNetlinkCommand:
>   * @nlmsg: pointer to netlink message
> @@ -58,7 +109,6 @@
>   * Returns 0 on success, -1 on error. In case of error, no response
>   * buffer will be returned.
>   */
> -#if defined(__linux__) && defined(HAVE_LIBNL)
>  int virNetlinkCommand(struct nl_msg *nl_msg,
>                        unsigned char **respbuf, unsigned int *respbuflen,
>                        int nl_pid)
> @@ -138,6 +188,325 @@ err_exit:
>      return rc;
>  }
>  
> +static void
> +virNetlinkEventServerLock(virNetlinkEventSrvPrivatePtr driver) {
> +    virMutexLock(&driver->lock);
> +}
> +
> +static void
> +virNetlinkEventServerUnlock(virNetlinkEventSrvPrivatePtr driver) {
> +    virMutexUnlock(&driver->lock);
> +}
> +
> +static void
> +virNetlinkEventCallback(int watch,
> +                        int fd ATTRIBUTE_UNUSED,
> +                        int events ATTRIBUTE_UNUSED,
> +                        void *opaque) {
> +    virNetlinkEventSrvPrivatePtr srv = opaque;
> +    unsigned char *msg;
> +    struct sockaddr_nl peer;
> +    struct ucred *creds = NULL;
> +    int i, length, handled;
> +
> +    length = nl_recv(srv->netlinknh, &peer, &msg, &creds);

You need some error handling here. so you can avoid the vallback if the
recv failed for some reason.

> +
> +    virNetlinkEventServerLock(srv);
> +
> +    handled=0;

handled should be a bool and initialized to false, to make it easier to
read.

> +
> +    virMutexLock(&eventLoop.lock);
> +
> +    VIR_DEBUG("dispatching to max %d clients, called from event watch %d",
> +            (int)eventLoop.handlesCount, watch);
> +
> +    for (i = 0; i < eventLoop.handlesCount; i++) {
> +        if (eventLoop.handles[i].deleted != VIR_NETLINK_HANDLE_VALID) {
> +            continue;
> +        }
> +
> +        VIR_DEBUG("dispatching client %d.",i);
> +
> +        virNetlinkEventHandleCallback cb = eventLoop.handles[i].cb;
> +        void *cpopaque = eventLoop.handles[i].opaque;
> +        (cb)( msg, length, &peer, &handled, cpopaque);
> +    }
> +
> +    virMutexUnlock(&eventLoop.lock);
> +
> +    if (handled == 0) {
   if (!handled)
> +        VIR_DEBUG("nobody cared.");
> +    }
> +
> +    VIR_FREE(msg);
> +
> +    for (i = 0; i < eventLoop.handlesCount; i++) {
> +        if (eventLoop.handles[i].deleted == VIR_NETLINK_HANDLE_DELETE) {
> +            VIR_FREE(eventLoop.handles[i].opaque);
> +            eventLoop.handles[i].deleted = VIR_NETLINK_HANDLE_FREE;
> +        }
> +    }


I don't see the need for having this done in the event loop - just allow
the removeclient function to grab srv->lock (to verify that there's not
a callback currently trawling through the client list and potentially
using this data) and do the deletion directly at the time it's requested.

(Aside from that, in this case you're actually violating your own
locking rules, since you're modifying something in the eventLoop object
at a time when you don't have the eventLoop lock held. This is a big
warning flag that something is wrong with your locking model.)


> +    virNetlinkEventServerUnlock(srv);
> +}
> +
> +static int
> +setupNetlinkEventServer(virNetlinkEventSrvPrivatePtr srv) {
> +    int fd;
> +    int ret = -1;
> +
> +    /* Init the mutexes */
> +    if ( virMutexInit(&srv->lock) < 0)
> +        return -1;
> +
> +    if ( virMutexInit(&eventLoop.lock) < 0) {
> +        virMutexDestroy(&srv->lock);
> +        return -1;
> +    }
> +
> +    virNetlinkEventServerLock(srv);
> +
> +    /* Allocate a new socket and get fd */
> +    srv->netlinknh = nl_handle_alloc();
> +
> +    if (!srv->netlinknh) {
> +        netlinkError(errno,
> +                "%s", _("cannot allocate nlhandle for virNetlinkEvent server"));
> +        goto exit;
> +    }

You're not properly unwinding here (or later error exits) in case of an
error - according to the code above, srv->lock is destroyed in case of
error, so in these later error cases, srv->lock and eventLoop.lock
should be destroyed before returning.


> +
> +    if (nl_connect(srv->netlinknh, NETLINK_ROUTE) < 0) {
> +        netlinkError(errno,
> +                "%s", _("cannot connect to netlink socket"));
> +        goto exit_cleanup;
> +    }
> +
> +    fd = nl_socket_get_fd(srv->netlinknh);
> +    nl_socket_set_nonblocking(srv->netlinknh);


Is it possible that either of these fail? If not, put them inside
ignore_value(), otherwise check for failure and appropriately unwind.


> +
> +    if ((srv->eventwatch = virEventAddHandle(fd,
> +                            VIR_EVENT_HANDLE_READABLE,
> +                            virNetlinkEventCallback,
> +                            srv, NULL)) < 0) {
> +        netlinkError(VIR_ERR_INTERNAL_ERROR, "%s",
> +                _("Failed to add netlink event handle watch"));
> +
> +        goto exit_cleanup;
> +    }
> +
> +    srv->netlinkfd = fd;
> +    VIR_DEBUG("netlink event listener on fd: %i",fd);
> +
> +    virNetlinkEventServerUnlock(srv);
> +    ret = 0;
> +    goto exit;

You've just unlocked srv->lock, and now you're going to exit:, which
will... unlock srv->lock. Maybe you meant to just return?

> +
> +exit_cleanup:
> +    nl_close(srv->netlinknh);
> +    nl_handle_destroy(srv->netlinknh);
> +exit:
> +    virNetlinkEventServerUnlock(srv);
> +    return ret;
> +}

Really I think you're only going to end up going to either of these
labels if you encounter an error, so it would be better to follow the
more standard libvirt practice of using the label "error:" rather than
"exit:" (yes, I know virnetlink.c already uses exit:, but that's the
exception rather than the norm).

> +
> +/**
> + * virNetlinkEventServiceStop:
> + *
> + * stop the monitor to receive netlink messages for libvirtd.
> + * This removes the netlink socket fd from the event handler.
> + *
> + * returns -1 if the monitor cannot be unregistered, 0 upon success
> + */
> +int
> +virNetlinkEventServiceStop(void) {
> +    virNetlinkEventSrvPrivatePtr srv = server;
> +
> +    VIR_INFO("stopping netlink event service");
> +
> +    if (!server) {
> +        return 0;
> +    }
> +
> +    virNetlinkEventServerLock(srv);
> +
> +    nl_close(srv->netlinknh);
> +    nl_handle_destroy(srv->netlinknh);
> +
> +    virEventRemoveHandle(srv->eventwatch);
> +    server=0;
> +
> +    virNetlinkEventServerUnlock(srv);
> +
> +    virMutexDestroy(&srv->lock);
> +    virMutexDestroy(&eventLoop.lock);

This function must also VIR_FREE(srv).


> +
> +    return 0;
> +}
> +
> +/**
> + * virNetlinkEventServiceIsRunning:
> + *
> + * returns if the netlink event service is running.
> + *
> + * returns 1 if the service is running, 0 if stopped.
> + */
> +int
> +virNetlinkEventServiceIsRunning(void) {
> +    if (server)
> +        return 1;
> +    return 0;


This function should return a bool (true or false). (We try to use bool
whenever appropriate, although there is still a lot of use of 0/1 in an
int or even a 1 bit bitfield).


> +}
> +
> +/**
> + * virNetlinkEventServiceStart:
> + *
> + * start a monitor to receive netlink messages for libvirtd.
> + * This registers a netlink socket with the event interface.
> + *
> + * returns -1 if the monitor cannot be registered, 0 upon success
> + */
> +int
> +virNetlinkEventServiceStart(void) {
> +    virNetlinkEventSrvPrivatePtr srv;
> +
> +    if (server) {
> +        return 0;
> +    }
> +
> +    VIR_INFO("starting netlink event service");
> +
> +    if (VIR_ALLOC(srv) < 0)
> +    goto no_memory;

Bad indentation.

> +
> +    if (setupNetlinkEventServer(srv)) {
> +        VIR_FREE(srv);
> +        goto error;
> +    }
> +
> +    VIR_DEBUG("netlink event service running");
> +
> +    server=srv;
> +    return 0;
> +
> +no_memory:
> +    virReportOOMError();
> +error:
> +    return -1;
> +}

As simple as this function is, I don't think it really needs the extra
labels at the bottom. For that matter, it looks simple enough that you
should probably just merge the code from setupNetlinkEventServer into
this function.


> +
> +/**
> + * virNetlinkEventAddClient:
> + *
> + * @cb: callback to invoke when an event occurs
> + * @opaque: user data to pass to callback
> + * @macaddr: macaddr to store with the data. Used to identify callers. May be null.
> + *
> + * register a callback for handling of netlink messages. The
> + * registered function receives the entire netlink message and
> + * may choose to act upon it.
> + *
> + * returns -1 if the file handle cannot be registered, number of monitor upon success
> + */
> +int
> +virNetlinkEventAddClient(virNetlinkEventHandleCallback cb,
> +                         void *opaque,
> +                         const unsigned char *macaddr) {
> +                         int i,r, result;
> +
> +    if ( cb == NULL )
> +        return -1;
> +
> +    virMutexLock(&eventLoop.lock);
> +
> +    VIR_DEBUG("adding client: %d.",nextWatch);
> +
> +    r = 0;
> +    /* first try to re-use deleted free slots */
> +    for (i = 0; i < eventLoop.handlesCount; i++) {
> +        if (eventLoop.handles[i].deleted == VIR_NETLINK_HANDLE_FREE) {
> +            r = i;
> +            goto addentry;
> +        }
> +    }
> +    /* Resize the eventLoop array if needed */
> +    if (eventLoop.handlesCount == eventLoop.handlesAlloc) {
> +        VIR_DEBUG("Used %zu handle slots, adding at least %d more",
> +                eventLoop.handlesAlloc, NETLINK_EVENT_ALLOC_EXTENT);
> +        if (VIR_RESIZE_N(eventLoop.handles, eventLoop.handlesAlloc,
> +                        eventLoop.handlesCount, NETLINK_EVENT_ALLOC_EXTENT) < 0) {
> +            result = -1;
> +            goto err_exit;
> +        }
> +    }
> +    r = eventLoop.handlesCount++;
> +
> +    addentry:

The label should be un-indented by 4 spaces so that it stands out.

> +    eventLoop.handles[r].watch = nextWatch;
> +    eventLoop.handles[r].cb = cb;
> +    eventLoop.handles[r].opaque = opaque;
> +    eventLoop.handles[r].deleted = VIR_NETLINK_HANDLE_VALID;
> +    if (!macaddr)
> +        memcpy(eventLoop.handles[r].macaddr, macaddr,VIR_MAC_BUFLEN);

You missed this from my previous review - this should be "if (macaddr)",
not "if (!macaddr)".


> +
> +    VIR_DEBUG("added client to loop slot: %d.",r);
> +
> +    result = nextWatch++;
> +
> +err_exit:


Would prefer "error:" instead of "err_exit:" for consistency in the
codebase (I realize there are several files that use err_exit instead,
all of them suspiciously associated with nwfilter and port profiles ;-)


> +    virMutexUnlock(&eventLoop.lock);
> +
> +    return result;
> +}
> +
> +/**
> + * virNetlinkEventRemoveClient:
> + *
> + * @watch: watch whose handle to remove
> + * @macaddr: macaddr whose handle to remove
> + *
> + * Unregister a callback from a netlink monitor.
> + * The handler function referenced will no longer receive netlink messages.
> + * Either watch or macaddr may be used, the other should be null.
> + *
> + * returns -1 if the file handle was not registered, 0 upon success
> + */
> +int
> +virNetlinkEventRemoveClient(int watch, const unsigned char *macaddr) {
> +    int i;
> +    int ret = -1;
> +
> +    if (watch <= 0 && macaddr == 0) {

For pointers, I prefer either "macaddr == NULL" or "!macaddr" so there's
one more visual clue that it's a pointer.


> +        VIR_WARN("Ignoring invalid netlink client id: %d", watch);
> +        return -1;
> +    }
> +
> +    virMutexLock(&eventLoop.lock);
> +    for (i = 0; i < eventLoop.handlesCount; i++) {
> +        if (eventLoop.handles[i].deleted != VIR_NETLINK_HANDLE_VALID)
> +        continue;
> +
> +        if (watch != 0 && eventLoop.handles[i].watch == watch) {
> +            eventLoop.handles[i].deleted = VIR_NETLINK_HANDLE_DELETE;
> +            VIR_DEBUG("removed client: %d by index.",
> +                    eventLoop.handles[i].watch);
> +            ret = 0;
> +            goto unlock_exit;
> +        }
> +        if (watch == 0 && memcmp(macaddr, eventLoop.handles[i].macaddr, VIR_MAC_BUFLEN)) {
> +            eventLoop.handles[i].deleted = VIR_NETLINK_HANDLE_DELETE;

Again, I think this function should be grabbing the one-and-only lock,
and just directly removing these, rather than marking them "to be deleted".

> +            VIR_DEBUG("removed client: %d by mac.",
> +                    eventLoop.handles[i].watch);
> +            ret = 0;
> +            goto unlock_exit;
> +        }
> +    }
> +    VIR_DEBUG("client not found to remove.");
> +
> +unlock_exit:
> +    virMutexUnlock(&eventLoop.lock);
> +    return ret;
> +}
> +
> +
>  #else
>  
>  int virNetlinkCommand(struct nl_msg *nl_msg ATTRIBUTE_UNUSED,
> @@ -154,4 +523,69 @@ int virNetlinkCommand(struct nl_msg *nl_msg ATTRIBUTE_UNUSED,
>      return -1;
>  }
>  
> +/**
> + * stopNetlinkEventServer: stop the monitor to receive netlink messages for libvirtd
> + */
> +int virNetlinkEventServiceStop(void) {
> +    netlinkError(VIR_ERR_INTERNAL_ERROR,
> +                "%s",
> +# if defined(__linux__) && !defined(HAVE_LIBNL)
> +                _("virNetlinkEventServiceStop is not supported since libnl was not available"));
> +# endif
> +    return 0;
> +}
> +
> +/**
> + * startNetlinkEventServer: start a monitor to receive netlink messages for libvirtd
> + */
> +int virNetlinkEventServiceStart(void) {
> +# if defined(__linux__) && !defined(HAVE_LIBNL)
> +    netlinkError(VIR_ERR_INTERNAL_ERROR,
> +                "%s",
> +                _("virNetlinkEventServiceStart is not supported since libnl was not available"));
> +# endif
> +    return 0;
> +}
> +
> +/**
> + * virNetlinkEventServiceIsRunning: returns if the netlink event service is running.
> + */
> +int virNetlinkEventServiceIsRunning(void) {
> +# if defined(__linux__) && !defined(HAVE_LIBNL)
> +    netlinkError(VIR_ERR_INTERNAL_ERROR,
> +                "%s",
> +                _("virNetlinkEventServiceIsRunning is not supported since libnl was not available"));
> +# endif
> +    return 0;
> +}
> +
> +/**
> + * virNetlinkEventAddClient: register a callback for handling of netlink messages
> + */
> +int virNetlinkEventAddClient(virNetlinkEventHandleCallback cb, void *opaque,
> +                             const unsigned char *macaddr) {
> +    netlinkError(VIR_ERR_INTERNAL_ERROR,
> +                "%s",
> +# if defined(__linux__) && !defined(HAVE_LIBNL)
> +                _("virNetlinkEventServiceAddClient is not supported since libnl was not available"));
> +# else
> +                _("virNetlinkEventServiceAddClient is not supported on non-linux platforms"));
> +# endif
> +    return -1;
> +}
> +
> +/**
> + * virNetlinkEventRemoveClient: unregister a callback from a netlink monitor
> + */
> +int virNetlinkEventRemoveClient(int watch, const unsigned char *macaddr) {
> +    netlinkError(VIR_ERR_INTERNAL_ERROR,
> +                "%s",
> +# if defined(__linux__) && !defined(HAVE_LIBNL)
> +                _("virNetlinkEventRemoveClient is not supported since libnl was not available"));
> +# else
> +                _("virNetlinkEventRemoveClient is not supported on non-linux platforms"));
> +# endif
> +    return -1;
> +}
> +
>  #endif /* __linux__ */
> diff --git a/src/util/virnetlink.h b/src/util/virnetlink.h
> index a70abb6..e76c624 100644
> --- a/src/util/virnetlink.h
> +++ b/src/util/virnetlink.h
> @@ -21,6 +21,7 @@
>  # define __VIR_NETLINK_H__
>  
>  # include "config.h"
> +# include "internal.h"
>  
>  # if defined(__linux__) && defined(HAVE_LIBNL)
>  
> @@ -29,6 +30,7 @@
>  # else
>  
>  struct nl_msg;
> +struct sockaddr_nl;
>  
>  # endif /* __linux__ */
>  
> @@ -36,4 +38,31 @@ int virNetlinkCommand(struct nl_msg *nl_msg,
>                        unsigned char **respbuf, unsigned int *respbuflen,
>                        int nl_pid);
>  
> +typedef void (*virNetlinkEventHandleCallback)( unsigned char *msg, int length, struct sockaddr_nl *peer, int *handled, void *opaque);
> +
> +/**
> + * stopNetlinkEventServer: stop the monitor to receive netlink messages for libvirtd
> + */
> +int virNetlinkEventServiceStop(void);
> +
> +/**
> + * startNetlinkEventServer: start a monitor to receive netlink messages for libvirtd
> + */
> +int virNetlinkEventServiceStart(void);
> +
> +/**
> + * virNetlinkEventServiceIsRunning: returns if the netlink event service is running.
> + */
> +int virNetlinkEventServiceIsRunning(void);
> +
> +/**
> + * virNetlinkEventAddClient: register a callback for handling of netlink messages
> + */
> +int virNetlinkEventAddClient(virNetlinkEventHandleCallback cb, void *opaque, const unsigned char *macaddr);
> +
> +/**
> + * virNetlinkEventRemoveClient: unregister a callback from a netlink monitor
> + */
> +int virNetlinkEventRemoveClient(int watch, const unsigned char *macaddr);
> +
>  #endif /* __VIR_NETLINK_H__ */




More information about the libvir-list mailing list