[Libguestfs] [PATCH nbdkit v2] Add support for AF_VSOCK.

Richard W.M. Jones rjones at redhat.com
Fri Oct 18 17:05:09 UTC 2019


On platforms which support it (only Linux currently) nbdkit can act as
a vsock server.  Guests running on the host see a raw NBD socket which
they can connect to by opening an AF_VSOCK connection.  (Although only
libnbd supports this).

The current limitations are:

 * There is no access control.  Any guest which has vsock enabled can
   open the socket.

 * nbdkit can only listen on either TCP/IP or AF_VSOCK, not both at
   the same time.  (The same currently applies to TCP/IP vs AF_UNIX so
   this is not a new restriction).

 * Lacks a test because you cannot use vsock to communicate host to
   host.

See: https://wiki.qemu.org/Features/VirtioVsock

Thanks: Stefan Hajnoczi and Eric Blake
---
 configure.ac            |  2 ++
 docs/nbdkit-service.pod | 47 +++++++++++++++++++++++++-
 docs/nbdkit.pod         |  5 +++
 docs/synopsis.txt       |  2 +-
 server/internal.h       |  2 ++
 server/main.c           | 37 +++++++++++++++++---
 server/options.h        |  2 ++
 server/sockets.c        | 75 +++++++++++++++++++++++++++++++++++++++++
 8 files changed, 165 insertions(+), 7 deletions(-)

diff --git a/configure.ac b/configure.ac
index 83eefb6..bd34b8f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -203,6 +203,8 @@ AC_CHECK_HEADERS([\
 	sys/prctl.h \
 	sys/procctl.h])
 
+AC_CHECK_HEADERS([linux/vm_sockets.h], [], [], [#include <sys/socket.h>])
+
 dnl Check for functions in libc, all optional.
 AC_CHECK_FUNCS([\
 	accept4 \
diff --git a/docs/nbdkit-service.pod b/docs/nbdkit-service.pod
index ad5ca50..e743628 100644
--- a/docs/nbdkit-service.pod
+++ b/docs/nbdkit-service.pod
@@ -82,6 +82,50 @@ Using I<--log=syslog> forces all messages to go to the system log.
 Debug messages (I<-v>/I<--verbose>) always go to standard error and
 are never sent to the system log.
 
+=head1 AF_VSOCK
+
+On Linux nbdkit supports the C<AF_VSOCK> address family / protocol.
+This allows you to serve NBD devices into virtual machines without
+using a regular network connection.
+
+B<Note> that this is different from the usual case where you present
+NBD as a virtual block device to a guest (which the guest sees as
+something like a SATA or virtio-scsi disk).  With C<AF_VSOCK> the
+virtual machine sees a raw NBD socket which it can connect to by
+opening an C<AF_VSOCK> connection.  Only libnbd supports C<AF_VSOCK>
+NBD client connections at the time of writing (2019).  For more about
+this protocol, see L<https://wiki.qemu.org/Features/VirtioVsock>
+
+=head2 AF_VSOCK example
+
+To set up an C<AF_VSOCK> server, use for example:
+
+ nbdkit --vsock [--port PORT] memory 1G
+
+The optional I<-p>/I<--port> argument is used to change the
+C<AF_VSOCK> port number.  These port numbers exist in a different
+namespace from TCP/IP port numbers.  Also unlike TCP, the port numbers
+are 32 bit.  The default port is 10809.
+
+The guest that wishes to access nbdkit must be configured for
+virtio-vsock.  On the qemu command line use:
+
+ qemu ... -device vhost-vsock-pci,id=vhost-vsock-pci0
+
+For libvirt add this element to the C<<< <devices> >>> section:
+
+ <vsock/>
+
+If you see the error C<unable to open vhost-vsock device> then you may
+have to unload the VMCI transport on the host:
+
+ modprobe -r vmw_vsock_vmci_transport
+
+Once nbdkit and the guest are running, from inside the guest you can
+connect to nbdkit on the host using libnbd:
+
+ nbdsh -c 'h.connect_vsock(2, 10809)' -c 'print(h.get_size())'
+
 =head1 ENVIRONMENT VARIABLES
 
 =over 4
@@ -102,7 +146,8 @@ L<systemd(1)>,
 L<systemd.socket(5)>,
 L<syslog(3)>,
 L<rsyslogd(8)>,
-L<journalctl(1)>.
+L<journalctl(1)>,
+L<nbdsh(1)>.
 
 =head1 AUTHORS
 
diff --git a/docs/nbdkit.pod b/docs/nbdkit.pod
index 0fa3061..094cfca 100644
--- a/docs/nbdkit.pod
+++ b/docs/nbdkit.pod
@@ -444,6 +444,11 @@ into the background (but not required).
 
 Print the version number of nbdkit and exit.
 
+=item B<--vsock>
+
+Use the AF_VSOCK protocol (instead of TCP/IP).  You must use this in
+conjunction with I<-p>/I<--port>.  See L<nbdkit-service(1)/AF_VSOCK>.
+
 =back
 
 =head1 PLUGIN NAME
diff --git a/docs/synopsis.txt b/docs/synopsis.txt
index 5fc57fd..a6b6028 100644
--- a/docs/synopsis.txt
+++ b/docs/synopsis.txt
@@ -12,7 +12,7 @@ nbdkit [-D|--debug PLUGIN|FILTER.FLAG=N]
        [--tls-certificates /path/to/certificates]
        [--tls-psk /path/to/pskfile] [--tls-verify-peer]
        [-U|--unix SOCKET] [-u|--user USER]
-       [-v|--verbose] [-V|--version]
+       [-v|--verbose] [-V|--version] [--vsock]
        PLUGIN [[KEY=]VALUE [KEY=VALUE [...]]]
 
 nbdkit --dump-config
diff --git a/server/internal.h b/server/internal.h
index 167da59..5e11e1a 100644
--- a/server/internal.h
+++ b/server/internal.h
@@ -454,6 +454,8 @@ extern int *bind_unix_socket (size_t *)
   __attribute__((__nonnull__ (1)));
 extern int *bind_tcpip_socket (size_t *)
   __attribute__((__nonnull__ (1)));
+extern int *bind_vsock (size_t *)
+  __attribute__((__nonnull__ (1)));
 extern void accept_incoming_connections (int *socks, size_t nr_socks)
   __attribute__((__nonnull__ (1)));
 extern void free_listening_sockets (int *socks, size_t nr_socks)
diff --git a/server/main.c b/server/main.c
index 5623149..115fa98 100644
--- a/server/main.c
+++ b/server/main.c
@@ -45,6 +45,11 @@
 #include <syslog.h>
 #include <sys/types.h>
 #include <sys/stat.h>
+#include <sys/socket.h>
+
+#ifdef HAVE_LINUX_VM_SOCKETS_H
+#include <linux/vm_sockets.h>
+#endif
 
 #include <pthread.h>
 
@@ -85,6 +90,7 @@ bool tls_verify_peer;           /* --tls-verify-peer */
 char *unixsocket;               /* -U */
 const char *user, *group;       /* -u & -g */
 bool verbose;                   /* -v */
+bool vsock;                     /* --vsock */
 unsigned int socket_activation  /* $LISTEN_FDS and $LISTEN_PID set */;
 
 /* The currently loaded plugin. */
@@ -329,6 +335,16 @@ main (int argc, char *argv[])
       tls_verify_peer = true;
       break;
 
+    case VSOCK_OPTION:
+#ifdef AF_VSOCK
+      vsock = true;
+      break;
+#else
+      fprintf (stderr, "%s: AF_VSOCK is not supported on this platform\n",
+               program_name);
+      exit (EXIT_FAILURE);
+#endif
+
     case 'e':
       exportname = optarg;
       if (strnlen (exportname, NBD_MAX_STRING + 1) > NBD_MAX_STRING) {
@@ -826,15 +842,22 @@ start_serving (void)
   size_t nr_socks;
   size_t i;
 
-  /* If the user has mixed up -p/-U/-s options, then give an error.
+  /* If the user has mixed up -p/--run/-s/-U/--vsock options, then
+   * give an error.
    *
    * XXX Actually the server could easily be extended to handle both
    * TCP/IP and Unix sockets, or even multiple TCP/IP ports.
    */
-  if ((port && unixsocket) || (port && listen_stdin) ||
-      (unixsocket && listen_stdin) || (listen_stdin && run)) {
+  if ((port && unixsocket) ||
+      (port && listen_stdin) ||
+      (unixsocket && listen_stdin) ||
+      (listen_stdin && run) ||
+      (vsock && unixsocket) ||
+      (vsock && listen_stdin) ||
+      (vsock && run)) {
     fprintf (stderr,
-             "%s: -p, -U and -s options cannot appear at the same time\n",
+             "%s: -p, --run, -s, -U or --vsock options cannot be used"
+             "in this combination\n",
              program_name);
     exit (EXIT_FAILURE);
   }
@@ -873,9 +896,13 @@ start_serving (void)
     return;
   }
 
-  /* Handling multiple connections on TCP/IP or a Unix domain socket. */
+  /* Handling multiple connections on TCP/IP, Unix domain socket or
+   * AF_VSOCK.
+   */
   if (unixsocket)
     socks = bind_unix_socket (&nr_socks);
+  else if (vsock)
+    socks = bind_vsock (&nr_socks);
   else
     socks = bind_tcpip_socket (&nr_socks);
 
diff --git a/server/options.h b/server/options.h
index c74e0b8..56dda10 100644
--- a/server/options.h
+++ b/server/options.h
@@ -55,6 +55,7 @@ enum {
   TLS_CERTIFICATES_OPTION,
   TLS_PSK_OPTION,
   TLS_VERIFY_PEER_OPTION,
+  VSOCK_OPTION,
 };
 
 static const char *short_options = "D:e:fg:i:nop:P:rst:u:U:vV";
@@ -100,6 +101,7 @@ static const struct option long_options[] = {
   { "user",             required_argument, NULL, 'u' },
   { "verbose",          no_argument,       NULL, 'v' },
   { "version",          no_argument,       NULL, 'V' },
+  { "vsock",            no_argument,       NULL, VSOCK_OPTION },
   { NULL },
 };
 
diff --git a/server/sockets.c b/server/sockets.c
index 3514c69..2af5600 100644
--- a/server/sockets.c
+++ b/server/sockets.c
@@ -35,6 +35,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdbool.h>
+#include <inttypes.h>
 #include <string.h>
 #include <unistd.h>
 #include <poll.h>
@@ -47,6 +48,10 @@
 #include <netinet/tcp.h>
 #include <netdb.h>
 
+#ifdef HAVE_LINUX_VM_SOCKETS_H
+#include <linux/vm_sockets.h>
+#endif
+
 #ifdef HAVE_LIBSELINUX
 #include <selinux/selinux.h>
 #endif
@@ -247,6 +252,76 @@ bind_tcpip_socket (size_t *nr_socks)
   return socks;
 }
 
+int *
+bind_vsock (size_t *nr_socks)
+{
+#ifdef AF_VSOCK
+  uint32_t vsock_port;
+  int sock;
+  int *ret;
+  struct sockaddr_vm addr;
+
+  if (port == NULL)
+    vsock_port = 10809;
+  else {
+    /* --port parameter must be numeric for vsock, unless
+     * /etc/services is extended but that seems unlikely. XXX
+     */
+    if (nbdkit_parse_uint32_t ("port", port, &vsock_port) == -1)
+      exit (EXIT_FAILURE);
+  }
+
+  /* Any platform with AF_VSOCK also supports SOCK_CLOEXEC so there is
+   * no fallback path.
+   */
+  sock = socket (AF_VSOCK, SOCK_STREAM|SOCK_CLOEXEC, 0);
+  if (sock == -1) {
+    perror ("bind_unix_socket: socket");
+    exit (EXIT_FAILURE);
+  }
+
+  memset (&addr, 0, sizeof addr);
+  addr.svm_family = AF_VSOCK;
+  addr.svm_cid = VMADDR_CID_ANY;
+  addr.svm_port = vsock_port;
+
+  if (bind (sock, (struct sockaddr *) &addr, sizeof addr) == -1) {
+    perror (unixsocket);
+    exit (EXIT_FAILURE);
+  }
+
+  if (listen (sock, SOMAXCONN) == -1) {
+    perror ("listen");
+    exit (EXIT_FAILURE);
+  }
+
+  ret = malloc (sizeof (int));
+  if (!ret) {
+    perror ("malloc");
+    exit (EXIT_FAILURE);
+  }
+  ret[0] = sock;
+  *nr_socks = 1;
+
+  /* It's not easy to get the actual CID here.
+   * IOCTL_VM_SOCKETS_GET_LOCAL_CID is documented, but requires
+   * opening /dev/vsock which is not accessible to non-root users.
+   * bind above doesn't update the sockaddr.  Using getsockname
+   * doesn't work.
+   */
+  debug ("bound to vsock any:%" PRIu32, addr.svm_port);
+
+  return ret;
+
+#else
+  /* Can't happen because main() checks if AF_VSOCK is defined and
+   * prevents vsock from being set, so this function can never be
+   * called.
+   */
+  abort ();
+#endif
+}
+
 void
 free_listening_sockets (int *socks, size_t nr_socks)
 {
-- 
2.23.0




More information about the Libguestfs mailing list