[Libguestfs] [PATCH nbdkit] Add support for socket activation.

Richard W.M. Jones rjones at redhat.com
Tue Jan 31 16:11:48 UTC 2017


Socket activation (aka systemd socket activation) is a simple protocol
that lets you pass in an opened, listening socket to a server.
Supporting socket activation allows you to use a modern superserver to
serve infrequent NBD requests without needing nbdkit to be running the
whole time.

Although the protocol was invented by systemd, it has been implemented
in a few other places, and the protocol is almost trivially simple.
This implementation is based on the one in libvirt.

Thanks: Dan Berrange, libvirt team
---
 .gitignore                     |   1 +
 docs/nbdkit.pod                |  67 +++++++++++-
 src/main.c                     | 111 ++++++++++++++++++++
 tests/Makefile.am              |  10 +-
 tests/test-socket-activation.c | 226 +++++++++++++++++++++++++++++++++++++++++
 5 files changed, 411 insertions(+), 4 deletions(-)
 create mode 100644 tests/test-socket-activation.c

diff --git a/.gitignore b/.gitignore
index 4a51dea..f8918a7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -50,6 +50,7 @@ Makefile.in
 /tests/test-perl
 /tests/test-python
 /tests/test-ruby
+/tests/test-socket-activation
 /tests/test-streaming
 /tests/test-xz
 /test-driver
diff --git a/docs/nbdkit.pod b/docs/nbdkit.pod
index 2b11eed..ad02ec3 100644
--- a/docs/nbdkit.pod
+++ b/docs/nbdkit.pod
@@ -192,9 +192,11 @@ This option implies I<--foreground>.
 Don't fork.  Handle a single NBD connection on stdin/stdout.  After
 stdin closes, the server exits.
 
-You can use this option to run nbdkit from inetd, systemd or similar
+You can use this option to run nbdkit from inetd or similar
 superservers; or just for testing; or if you want to run nbdkit in a
-non-conventional way.
+non-conventional way.  Note that if you want to run nbdkit from
+systemd, then it may be better to use L</SOCKET ACTIVATION> instead of
+this option.
 
 This option implies I<--foreground>.
 
@@ -263,6 +265,48 @@ To list all the options supported by a plugin, do:
 
  nbdkit --help file
 
+=head1 SOCKET ACTIVATION
+
+nbdkit supports socket activation (sometimes called systemd socket
+activation).  This is a simple protocol where instead of nbdkit itself
+opening the listening socket(s), the parent process (typically
+systemd) passes in pre-opened file descriptors.  Socket activation
+lets you serve infrequent NBD requests using a superserver without
+needing nbdkit to be running the whole time.
+
+Socket activation is triggered when both the C<LISTEN_FDS> and
+C<LISTEN_PID> environment variables are set.  In this mode using
+I<-i>, I<-p>, I<--run>, I<-s> or I<-U> flags on the command line is
+illegal and will cause an error.  Also in this mode nbdkit does not
+fork into the background (ie. I<-f> is implied).
+
+=head2 Using socket activation with systemd
+
+To use nbdkit with socket activation from systemd, create a unit file
+ending in C<.socket> (eg. C</etc/systemd/system/nbdkit.socket>)
+containing:
+
+ [Unit]
+ Description=NBDKit Network Block Device server
+ 
+ [Socket]
+ ListenStream=0.0.0.0:10809
+ 
+ [Install]
+ WantedBy=sockets.target
+
+There are various formats for the C<ListenStream> key.  See
+L<systemd.socket(5)> for more information.
+
+Also create a service unit (eg. C</etc/systemd/system/nbdkit.service>)
+containing:
+
+ [Service]
+ ExecStart=/usr/sbin/nbdkit file file=/path/to/serve
+
+For more information on systemd and socket activation, see
+L<http://0pointer.de/blog/projects/socket-activation.html>
+
 =head1 CAPTIVE NBDKIT
 
 You can run nbdkit as a "captive process", using the I<--run> option.
@@ -382,8 +426,23 @@ This signal is ignored.
 
 =back
 
+=head1 ENVIRONMENT VARIABLES
+
+=over 4
+
+=item C<LISTEN_FDS>
+
+=item C<LISTEN_PID>
+
+If present in the environment when nbdkit starts up, these trigger
+L</SOCKET ACTIVATION>.
+
+=back
+
 =head1 SEE ALSO
 
+Other nbdkit manual pages:
+
 L<nbdkit-plugin(3)>,
 L<nbdkit-curl-plugin(1)>,
 L<nbdkit-example1-plugin(1)>,
@@ -398,6 +457,10 @@ L<nbdkit-python-plugin(3)>,
 L<nbdkit-vddk-plugin(1)>.
 L<nbdkit-xz-plugin(1)>.
 
+Other manual pages of interest:
+
+L<systemd.socket(5)>.
+
 =head1 AUTHORS
 
 Richard W.M. Jones
diff --git a/src/main.c b/src/main.c
index 9453cce..e935da2 100644
--- a/src/main.c
+++ b/src/main.c
@@ -54,6 +54,8 @@
 #include "nbdkit-plugin.h"
 #include "internal.h"
 
+#define FIRST_SOCKET_ACTIVATION_FD 3 /* defined by systemd ABI */
+
 static char *make_random_fifo (void);
 static void open_plugin_so (const char *filename);
 static void start_serving (void);
@@ -64,6 +66,7 @@ static void write_pidfile (void);
 static void fork_into_background (void);
 static uid_t parseuser (const char *);
 static gid_t parsegroup (const char *);
+static unsigned int get_socket_activation (void);
 
 const char *exportname;         /* -e */
 int foreground;                 /* -f */
@@ -77,6 +80,7 @@ int listen_stdin;               /* -s */
 char *unixsocket;               /* -U */
 const char *user, *group;       /* -u & -g */
 int verbose;                    /* -v */
+unsigned int socket_activation  /* $LISTEN_FDS and $LISTEN_PID set */;
 
 volatile int quit;
 
@@ -157,6 +161,9 @@ main (int argc, char *argv[])
 
   tls_init ();
 
+  /* Returns 0 if no socket activation, or the number of FDs. */
+  socket_activation = get_socket_activation ();
+
   for (;;) {
     c = getopt_long (argc, argv, short_options, long_options, &option_index);
     if (c == -1)
@@ -172,6 +179,11 @@ main (int argc, char *argv[])
         dump_plugin = 1;
       }
       else if (strcmp (long_options[option_index].name, "run") == 0) {
+        if (socket_activation) {
+          fprintf (stderr, "%s: cannot use socket activation with --run flag\n",
+                   program_name);
+          exit (EXIT_FAILURE);
+        }
         run = optarg;
         foreground = 1;
       }
@@ -196,6 +208,11 @@ main (int argc, char *argv[])
       break;
 
     case 'i':
+      if (socket_activation) {
+        fprintf (stderr, "%s: cannot use socket activation with -i flag\n",
+                 program_name);
+        exit (EXIT_FAILURE);
+      }
       ipaddr = optarg;
       break;
 
@@ -214,6 +231,11 @@ main (int argc, char *argv[])
       break;
 
     case 'p':
+      if (socket_activation) {
+        fprintf (stderr, "%s: cannot use socket activation with -p flag\n",
+                 program_name);
+        exit (EXIT_FAILURE);
+      }
       port = optarg;
       break;
 
@@ -222,10 +244,20 @@ main (int argc, char *argv[])
       break;
 
     case 's':
+      if (socket_activation) {
+        fprintf (stderr, "%s: cannot use socket activation with -s flag\n",
+                 program_name);
+        exit (EXIT_FAILURE);
+      }
       listen_stdin = 1;
       break;
 
     case 'U':
+      if (socket_activation) {
+        fprintf (stderr, "%s: cannot use socket activation with -U flag\n",
+                 program_name);
+        exit (EXIT_FAILURE);
+      }
       if (strcmp (optarg, "-") == 0)
         unixsocket = make_random_fifo ();
       else
@@ -454,6 +486,7 @@ start_serving (void)
 {
   int *socks;
   size_t nr_socks;
+  size_t i;
 
   /* If the user has mixed up -p/-U/-s options, then give an error.
    *
@@ -470,6 +503,27 @@ start_serving (void)
 
   set_up_signals ();
 
+  /* Socket activation -- we are handling connections on pre-opened
+   * file descriptors [FIRST_SOCKET_ACTIVATION_FD ..
+   * FIRST_SOCKET_ACTIVATION_FD+nr_socks-1].
+   */
+  if (socket_activation) {
+    nr_socks = socket_activation;
+    debug ("using socket activation, nr_socks = %zu", nr_socks);
+    socks = malloc (sizeof (int) * nr_socks);
+    if (socks == NULL) {
+      perror ("malloc");
+      exit (EXIT_FAILURE);
+    }
+    for (i = 0; i < nr_socks; ++i)
+      socks[i] = FIRST_SOCKET_ACTIVATION_FD + i;
+    change_user ();
+    write_pidfile ();
+    accept_incoming_connections (socks, nr_socks);
+    free_listening_sockets (socks, nr_socks); /* also closes them */
+    return;
+  }
+
   /* Handling a single connection on stdin/stdout. */
   if (listen_stdin) {
     change_user ();
@@ -752,3 +806,60 @@ parsegroup (const char *id)
 
   return grp->gr_gid;
 }
+
+/* Returns 0 if no socket activation, or the number of FDs.
+ * See also virGetListenFDs in libvirt.org:src/util/virutil.c
+ */
+static unsigned int
+get_socket_activation (void)
+{
+  const char *s;
+  unsigned int pid;
+  unsigned int nr_fds;
+  unsigned int i;
+  int fd;
+
+  s = getenv ("LISTEN_PID");
+  if (s == NULL)
+    return 0;
+  if (sscanf (s, "%u", &pid) != 1) {
+    fprintf (stderr, "%s: malformed %s environment variable (ignored)\n",
+             program_name, "LISTEN_PID");
+    return 0;
+  }
+  if (pid != getpid ()) {
+    fprintf (stderr, "%s: %s was not for us (ignored)\n",
+             program_name, "LISTEN_PID");
+    return 0;
+  }
+
+  s = getenv ("LISTEN_FDS");
+  if (s == NULL)
+    return 0;
+  if (sscanf (s, "%u", &nr_fds) != 1) {
+    fprintf (stderr, "%s: malformed %s environment variable (ignored)\n",
+             program_name, "LISTEN_FDS");
+    return 0;
+  }
+
+  /* So these are not passed to any child processes we might start. */
+  unsetenv ("LISTEN_FDS");
+  unsetenv ("LISTEN_PID");
+
+  /* So the file descriptors don't leak into child processes. */
+  for (i = 0; i < nr_fds; ++i) {
+    fd = FIRST_SOCKET_ACTIVATION_FD + i;
+    if (fcntl (fd, F_SETFD, FD_CLOEXEC) == -1) {
+      /* If we cannot set FD_CLOEXEC then it probably means the file
+       * descriptor is invalid, so socket activation has gone wrong
+       * and we should exit.
+       */
+      fprintf (stderr, "%s: socket activation: "
+               "invalid file descriptor fd = %d: %m\n",
+               program_name, fd);
+      exit (EXIT_FAILURE);
+    }
+  }
+
+  return nr_fds;
+}
diff --git a/tests/Makefile.am b/tests/Makefile.am
index afb9975..389bcfa 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -83,7 +83,14 @@ TESTS = \
 	test-single.sh \
 	test-captive.sh \
 	test-random-sock.sh \
-	test-ipv4.sh
+	test-ipv4.sh \
+	test-socket-activation
+
+check_PROGRAMS = \
+	test-socket-activation
+
+test_socket_activation_SOURCES = test-socket-activation.c
+test_socket_activation_CFLAGS = $(WARNINGS_CFLAGS)
 
 # In-depth tests need libguestfs, since that is a convenient way to
 # drive qemu.
@@ -103,7 +110,6 @@ check_LTLIBRARIES = libtest.la
 libtest_la_SOURCES = test.c test.h
 libtest_la_CFLAGS = $(WARNINGS_CFLAGS)
 
-check_PROGRAMS =
 check_DATA =
 check_SCRIPTS =
 
diff --git a/tests/test-socket-activation.c b/tests/test-socket-activation.c
new file mode 100644
index 0000000..5ecdde1
--- /dev/null
+++ b/tests/test-socket-activation.c
@@ -0,0 +1,226 @@
+/* nbdkit
+ * Copyright (C) 2017 Red Hat Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * * Neither the name of Red Hat nor the names of its contributors may be
+ * used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/* Test socket activation.
+ *
+ * We cannot use the test framework for this since the framework
+ * always uses the -U flag which is incompatible with socket
+ * activation.  Unfortunately this does mean we duplicate some code
+ * from the test framework.
+ *
+ * It's *almost* possible to test this from a shell script
+ * (cf. test-ipv4.sh) but as far as I can tell setting LISTEN_PID
+ * correctly is impossible from shell.
+ */
+
+#include <config.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <signal.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <sys/wait.h>
+
+#define FIRST_SOCKET_ACTIVATION_FD 3
+
+#define NBDKIT_START_TIMEOUT 30 /* seconds */
+
+#define NBDKIT_PLUGIN(name) \
+  "../plugins/" name "/.libs/nbdkit-" name "-plugin.so"
+
+/* Declare program_name. */
+#if HAVE_DECL_PROGRAM_INVOCATION_SHORT_NAME == 1
+#include <errno.h>
+#define program_name program_invocation_short_name
+#else
+#define program_name "nbdkit"
+#endif
+
+static char tmpdir[] =   "/tmp/nbdkitXXXXXX";
+static char sockpath[] = "/tmp/nbdkitXXXXXX/sock";
+static char pidpath[] =  "/tmp/nbdkitXXXXXX/pid";
+
+static pid_t pid = 0;
+
+static void
+cleanup (void)
+{
+  if (pid > 0)
+    kill (pid, SIGTERM);
+
+  unlink (pidpath);
+  unlink (sockpath);
+  rmdir (tmpdir);
+}
+
+int
+main (int argc, char *argv[])
+{
+  int sock;
+  struct sockaddr_un addr;
+  char pid_str[16];
+  size_t i, len;
+  char magic[8];
+
+  if (mkdtemp (tmpdir) == NULL) {
+    perror ("mkdtemp");
+    exit (EXIT_FAILURE);
+  }
+  len = strlen (tmpdir);
+  memcpy (sockpath, tmpdir, len);
+  memcpy (pidpath, tmpdir, len);
+
+  atexit (cleanup);
+
+  /* Open the listening socket which will be passed into nbdkit. */
+  sock = socket (AF_UNIX, SOCK_STREAM /* NB do not use SOCK_CLOEXEC */, 0);
+  if (sock == -1) {
+    perror ("socket");
+    exit (EXIT_FAILURE);
+  }
+
+  addr.sun_family = AF_UNIX;
+  len = strlen (sockpath);
+  memcpy (addr.sun_path, sockpath, len+1 /* trailing \0 */);
+
+  if (bind (sock, (struct sockaddr *) &addr, sizeof addr) == -1) {
+    perror (sockpath);
+    exit (EXIT_FAILURE);
+  }
+
+  if (listen (sock, 1) == -1) {
+    perror ("listen");
+    exit (EXIT_FAILURE);
+  }
+
+  if (sock != FIRST_SOCKET_ACTIVATION_FD) {
+    if (dup2 (sock, FIRST_SOCKET_ACTIVATION_FD) == -1) {
+      perror ("dup2");
+      exit (EXIT_FAILURE);
+    }
+    close (sock);
+  }
+
+  /* Run nbdkit. */
+  pid = fork ();
+  if (pid == -1) {
+    perror ("fork");
+    exit (EXIT_FAILURE);
+  }
+  if (pid == 0) {
+    /* Run nbdkit in the child. */
+    setenv ("LISTEN_FDS", "1", 1);
+    snprintf (pid_str, sizeof pid_str, "%d", (int) getpid ());
+    setenv ("LISTEN_PID", pid_str, 1);
+
+    execlp ("../src/nbdkit",
+            "nbdkit",
+            "-P", pidpath,
+            "-o",
+            "-v",
+            NBDKIT_PLUGIN ("example1"), NULL);
+    perror ("exec: nbdkit");
+    _exit (EXIT_FAILURE);
+  }
+
+  /* We don't need the listening socket now. */
+  close (sock);
+
+  /* Wait for the pidfile to turn up, which indicates that nbdkit has
+   * started up successfully and is ready to serve requests.  However
+   * if 'pid' exits in this time it indicates a failure to start up.
+   * Also there is a timeout in case nbdkit hangs.
+   */
+  for (i = 0; i < NBDKIT_START_TIMEOUT; ++i) {
+    if (waitpid (pid, NULL, WNOHANG) == pid)
+      goto early_exit;
+
+    if (kill (pid, 0) == -1) {
+      if (errno == ESRCH) {
+      early_exit:
+        fprintf (stderr,
+                 "%s FAILED: nbdkit exited before starting to serve files\n",
+                 program_name);
+        pid = 0;
+        exit (EXIT_FAILURE);
+      }
+      perror ("kill");
+    }
+
+    if (access (pidpath, F_OK) == 0)
+      break;
+
+    sleep (1);
+  }
+
+  /* Now nbdkit is supposed to be listening on the Unix domain socket
+   * (which it got via the listening socket that we passed down to it,
+   * not from the path), so we should be able to connect to the Unix
+   * domain socket by its path and receive an NBD magic string.
+   */
+  sock = socket (AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0);
+  if (sock == -1) {
+    perror ("socket");
+    exit (EXIT_FAILURE);
+  }
+
+  /* Reuse addr which was set up above. */
+  if (connect (sock, (struct sockaddr *) &addr, sizeof addr) == -1) {
+    perror (sockpath);
+    exit (EXIT_FAILURE);
+  }
+
+  if (read (sock, magic, 8) != 8) {
+    perror ("read");
+    exit (EXIT_FAILURE);
+  }
+
+  if (memcmp (magic, "NBDMAGIC", 8) != 0) {
+    fprintf (stderr, "%s FAILED: did not read magic string from server\n",
+             program_name);
+    exit (EXIT_FAILURE);
+  }
+
+  close (sock);
+
+  /* Test succeeded. */
+  exit (EXIT_SUCCESS);
+}
-- 
2.10.2




More information about the Libguestfs mailing list