[Libguestfs] [PATCH nbdkit 2/2] server: Split out NBD protocol code from connections code.

Richard W.M. Jones rjones at redhat.com
Mon Mar 18 17:02:44 UTC 2019


The code handling the NBD protocol was located in the same file as the
code handling connections, for not really any reason except historical.

This is quite a large code movement which splits out the protocol code
into four new files:

server/protocol-handshake.c           initial handshake
server/protocol-handshake-newstyle.c     "        "     (newstyle)
server/protocol-handshake-oldstyle.c     "        "     (oldstyle)
server/protocol.c                     requests and replies

This also renames functions more logically and gets rid of functions
named with leading underscores.
---
 server/internal.h                    |   22 +
 server/connections.c                 | 1203 +-------------------------
 server/protocol-handshake-newstyle.c |  609 +++++++++++++
 server/protocol-handshake-oldstyle.c |   94 ++
 server/protocol-handshake.c          |  130 +++
 server/protocol.c                    |  515 +++++++++++
 server/Makefile.am                   |    4 +
 7 files changed, 1389 insertions(+), 1188 deletions(-)

diff --git a/server/internal.h b/server/internal.h
index 8427401..d40a82d 100644
--- a/server/internal.h
+++ b/server/internal.h
@@ -194,6 +194,28 @@ extern int connection_set_handle (struct connection *conn,
   __attribute__((__nonnull__ (1 /* not 3 */)));
 extern void *connection_get_handle (struct connection *conn, size_t i)
   __attribute__((__nonnull__ (1)));
+extern int connection_get_status (struct connection *conn)
+  __attribute__((__nonnull__ (1)));
+extern int connection_set_status (struct connection *conn, int value)
+  __attribute__((__nonnull__ (1)));
+
+/* protocol-handshake.c */
+extern int protocol_handshake (struct connection *conn)
+  __attribute__((__nonnull__ (1)));
+extern int protocol_compute_eflags (struct connection *conn, uint16_t *flags)
+  __attribute__((__nonnull__ (1, 2)));
+
+/* protocol-handshake-oldstyle.c */
+extern int protocol_handshake_oldstyle (struct connection *conn)
+  __attribute__((__nonnull__ (1)));
+
+/* protocol-handshake-newstyle.c */
+extern int protocol_handshake_newstyle (struct connection *conn)
+  __attribute__((__nonnull__ (1)));
+
+/* protocol.c */
+extern int protocol_recv_request_send_reply (struct connection *conn)
+  __attribute__((__nonnull__ (1)));
 
 /* crypto.c */
 #define root_tls_certificates_dir sysconfdir "/pki/" PACKAGE_NAME
diff --git a/server/connections.c b/server/connections.c
index 7be282f..4ded28d 100644
--- a/server/connections.c
+++ b/server/connections.c
@@ -39,25 +39,8 @@
 #include <inttypes.h>
 #include <string.h>
 #include <unistd.h>
-#include <errno.h>
-#include <sys/types.h>
-#include <stddef.h>
-#include <assert.h>
-
-#include <pthread.h>
 
 #include "internal.h"
-#include "byte-swapping.h"
-#include "protocol.h"
-
-/* Maximum read or write request that we will handle. */
-#define MAX_REQUEST_SIZE (64 * 1024 * 1024)
-
-/* Maximum number of client options we allow before giving up. */
-#define MAX_NR_OPTIONS 32
-
-/* Maximum length of any option data (bytes). */
-#define MAX_OPTION_LENGTH 4096
 
 /* Default number of parallel requests. */
 #define DEFAULT_PARALLEL_REQUESTS 16
@@ -65,8 +48,6 @@
 static struct connection *new_connection (int sockin, int sockout,
                                           int nworkers);
 static void free_connection (struct connection *conn);
-static int negotiate_handshake (struct connection *conn);
-static int recv_request_send_reply (struct connection *conn);
 
 /* Don't call these raw socket functions directly.  Use conn->recv etc. */
 static int raw_recv (struct connection *, void *buf, size_t len);
@@ -106,8 +87,8 @@ connection_get_handle (struct connection *conn, size_t i)
     return NULL;
 }
 
-static int
-get_status (struct connection *conn)
+int
+connection_get_status (struct connection *conn)
 {
   int r;
 
@@ -120,9 +101,10 @@ get_status (struct connection *conn)
 }
 
 /* Update the status if the new value is lower than the existing value.
- * For convenience, return the incoming value. */
-static int
-set_status (struct connection *conn, int value)
+ * For convenience, return the incoming value.
+ */
+int
+connection_set_status (struct connection *conn, int value)
 {
   if (conn->nworkers)
     pthread_mutex_lock (&conn->status_lock);
@@ -150,8 +132,8 @@ connection_worker (void *data)
   threadlocal_set_name (name);
   free (worker);
 
-  while (!quit && get_status (conn) > 0)
-    recv_request_send_reply (conn);
+  while (!quit && connection_get_status (conn) > 0)
+    protocol_recv_request_send_reply (conn);
   debug ("exiting worker thread %s", threadlocal_get_name ());
   free (name);
   return NULL;
@@ -199,14 +181,14 @@ _handle_single_connection (int sockin, int sockout)
     goto done;
 
   /* Handshake. */
-  if (negotiate_handshake (conn) == -1)
+  if (protocol_handshake (conn) == -1)
     goto done;
 
   if (!nworkers) {
     /* No need for a separate thread. */
     debug ("handshake complete, processing requests serially");
-    while (!quit && get_status (conn) > 0)
-      recv_request_send_reply (conn);
+    while (!quit && connection_get_status (conn) > 0)
+      protocol_recv_request_send_reply (conn);
   }
   else {
     /* Create thread pool to process requests. */
@@ -224,12 +206,12 @@ _handle_single_connection (int sockin, int sockout)
 
       if (!worker) {
         perror ("malloc");
-        set_status (conn, -1);
+        connection_set_status (conn, -1);
         goto wait;
       }
       if (asprintf (&worker->name, "%s.%d", plugin_name, nworkers) < 0) {
         perror ("asprintf");
-        set_status (conn, -1);
+        connection_set_status (conn, -1);
         free (worker);
         goto wait;
       }
@@ -239,7 +221,7 @@ _handle_single_connection (int sockin, int sockout)
       if (err) {
         errno = err;
         perror ("pthread_create");
-        set_status (conn, -1);
+        connection_set_status (conn, -1);
         free (worker);
         goto wait;
       }
@@ -261,7 +243,7 @@ _handle_single_connection (int sockin, int sockout)
   if (r == -1)
     goto done;
 
-  ret = get_status (conn);
+  ret = connection_get_status (conn);
  done:
   free_connection (conn);
   return ret;
@@ -335,1161 +317,6 @@ free_connection (struct connection *conn)
   free (conn);
 }
 
-static int
-compute_eflags (struct connection *conn, uint16_t *flags)
-{
-  uint16_t eflags = NBD_FLAG_HAS_FLAGS;
-  int fl;
-
-  fl = backend->can_write (backend, conn);
-  if (fl == -1)
-    return -1;
-  if (readonly || !fl) {
-    eflags |= NBD_FLAG_READ_ONLY;
-    conn->readonly = true;
-  }
-  if (!conn->readonly) {
-    fl = backend->can_zero (backend, conn);
-    if (fl == -1)
-      return -1;
-    if (fl) {
-      eflags |= NBD_FLAG_SEND_WRITE_ZEROES;
-      conn->can_zero = true;
-    }
-
-    fl = backend->can_trim (backend, conn);
-    if (fl == -1)
-      return -1;
-    if (fl) {
-      eflags |= NBD_FLAG_SEND_TRIM;
-      conn->can_trim = true;
-    }
-
-    fl = backend->can_fua (backend, conn);
-    if (fl == -1)
-      return -1;
-    if (fl) {
-      eflags |= NBD_FLAG_SEND_FUA;
-      conn->can_fua = true;
-    }
-  }
-
-  fl = backend->can_flush (backend, conn);
-  if (fl == -1)
-    return -1;
-  if (fl) {
-    eflags |= NBD_FLAG_SEND_FLUSH;
-    conn->can_flush = true;
-  }
-
-  fl = backend->is_rotational (backend, conn);
-  if (fl == -1)
-    return -1;
-  if (fl) {
-    eflags |= NBD_FLAG_ROTATIONAL;
-    conn->is_rotational = true;
-  }
-
-  fl = backend->can_multi_conn (backend, conn);
-  if (fl == -1)
-    return -1;
-  if (fl) {
-    eflags |= NBD_FLAG_CAN_MULTI_CONN;
-    conn->can_multi_conn = true;
-  }
-
-  *flags = eflags;
-  return 0;
-}
-
-static int
-_negotiate_handshake_oldstyle (struct connection *conn)
-{
-  struct old_handshake handshake;
-  int64_t r;
-  uint64_t exportsize;
-  uint16_t gflags, eflags;
-
-  /* In --tls=require / FORCEDTLS mode, old style handshakes are
-   * rejected because they cannot support TLS.
-   */
-  if (tls == 2) {
-    nbdkit_error ("non-TLS client tried to connect in --tls=require mode");
-    return -1;
-  }
-
-  r = backend->get_size (backend, conn);
-  if (r == -1)
-    return -1;
-  if (r < 0) {
-    nbdkit_error (".get_size function returned invalid value "
-                  "(%" PRIi64 ")", r);
-    return -1;
-  }
-  exportsize = (uint64_t) r;
-  conn->exportsize = exportsize;
-
-  gflags = 0;
-  if (compute_eflags (conn, &eflags) < 0)
-    return -1;
-
-  debug ("oldstyle negotiation: flags: global 0x%x export 0x%x",
-         gflags, eflags);
-
-  memset (&handshake, 0, sizeof handshake);
-  memcpy (handshake.nbdmagic, "NBDMAGIC", 8);
-  handshake.version = htobe64 (OLD_VERSION);
-  handshake.exportsize = htobe64 (exportsize);
-  handshake.gflags = htobe16 (gflags);
-  handshake.eflags = htobe16 (eflags);
-
-  if (conn->send (conn, &handshake, sizeof handshake) == -1) {
-    nbdkit_error ("write: %m");
-    return -1;
-  }
-
-  return 0;
-}
-
-/* Receive newstyle options. */
-
-static int
-send_newstyle_option_reply (struct connection *conn,
-                            uint32_t option, uint32_t reply)
-{
-  struct fixed_new_option_reply fixed_new_option_reply;
-
-  fixed_new_option_reply.magic = htobe64 (NBD_REP_MAGIC);
-  fixed_new_option_reply.option = htobe32 (option);
-  fixed_new_option_reply.reply = htobe32 (reply);
-  fixed_new_option_reply.replylen = htobe32 (0);
-
-  if (conn->send (conn,
-                  &fixed_new_option_reply,
-                  sizeof fixed_new_option_reply) == -1) {
-    nbdkit_error ("write: %m");
-    return -1;
-  }
-
-  return 0;
-}
-
-static int
-send_newstyle_option_reply_exportname (struct connection *conn,
-                                       uint32_t option, uint32_t reply,
-                                       const char *exportname)
-{
-  struct fixed_new_option_reply fixed_new_option_reply;
-  size_t name_len = strlen (exportname);
-  uint32_t len;
-
-  fixed_new_option_reply.magic = htobe64 (NBD_REP_MAGIC);
-  fixed_new_option_reply.option = htobe32 (option);
-  fixed_new_option_reply.reply = htobe32 (reply);
-  fixed_new_option_reply.replylen = htobe32 (name_len + sizeof (len));
-
-  if (conn->send (conn,
-                  &fixed_new_option_reply,
-                  sizeof fixed_new_option_reply) == -1) {
-    nbdkit_error ("write: %m");
-    return -1;
-  }
-
-  len = htobe32 (name_len);
-  if (conn->send (conn, &len, sizeof len) == -1) {
-    nbdkit_error ("write: %m");
-    return -1;
-  }
-  if (conn->send (conn, exportname, name_len) == -1) {
-    nbdkit_error ("write: %m");
-    return -1;
-  }
-
-  return 0;
-}
-
-static int
-send_newstyle_option_reply_info_export (struct connection *conn,
-                                        uint32_t option, uint32_t reply,
-                                        uint16_t info)
-{
-  struct fixed_new_option_reply fixed_new_option_reply;
-  struct fixed_new_option_reply_info_export export;
-
-  fixed_new_option_reply.magic = htobe64 (NBD_REP_MAGIC);
-  fixed_new_option_reply.option = htobe32 (option);
-  fixed_new_option_reply.reply = htobe32 (reply);
-  fixed_new_option_reply.replylen = htobe32 (sizeof export);
-  export.info = htobe16 (info);
-  export.exportsize = htobe64 (conn->exportsize);
-  export.eflags = htobe16 (conn->eflags);
-
-  if (conn->send (conn,
-                  &fixed_new_option_reply,
-                  sizeof fixed_new_option_reply) == -1 ||
-      conn->send (conn, &export, sizeof export) == -1) {
-    nbdkit_error ("write: %m");
-    return -1;
-  }
-
-  return 0;
-}
-
-/* Sub-function during _negotiate_handshake_newstyle, to uniformly handle
- * a client hanging up on a message boundary.
- */
-static int __attribute__ ((format (printf, 4, 5)))
-conn_recv_full (struct connection *conn, void *buf, size_t len,
-                const char *fmt, ...)
-{
-  int r = conn->recv (conn, buf, len);
-  va_list args;
-
-  if (r == -1) {
-    va_start (args, fmt);
-    nbdkit_verror (fmt, args);
-    va_end (args);
-    return -1;
-  }
-  if (r == 0) {
-    /* During negotiation, client EOF on message boundary is less
-     * severe than failure in the middle of the buffer. */
-    debug ("client closed input socket, closing connection");
-    return -1;
-  }
-  return r;
-}
-
-/* Sub-function of _negotiate_handshake_newstyle_options below.  It
- * must be called on all non-error paths out of the options for-loop
- * in that function.
- */
-static int
-finish_newstyle_options (struct connection *conn)
-{
-  int64_t r;
-
-  r = backend->get_size (backend, conn);
-  if (r == -1)
-    return -1;
-  if (r < 0) {
-    nbdkit_error (".get_size function returned invalid value "
-                  "(%" PRIi64 ")", r);
-    return -1;
-  }
-  conn->exportsize = (uint64_t) r;
-
-  if (compute_eflags (conn, &conn->eflags) < 0)
-    return -1;
-
-  debug ("newstyle negotiation: flags: export 0x%x", conn->eflags);
-  return 0;
-}
-
-static int
-_negotiate_handshake_newstyle_options (struct connection *conn)
-{
-  struct new_option new_option;
-  size_t nr_options;
-  uint64_t version;
-  uint32_t option;
-  uint32_t optlen;
-  char data[MAX_OPTION_LENGTH+1];
-  struct new_handshake_finish handshake_finish;
-  const char *optname;
-
-  for (nr_options = 0; nr_options < MAX_NR_OPTIONS; ++nr_options) {
-    if (conn_recv_full (conn, &new_option, sizeof new_option,
-                        "reading option: conn->recv: %m") == -1)
-      return -1;
-
-    version = be64toh (new_option.version);
-    if (version != NEW_VERSION) {
-      nbdkit_error ("unknown option version %" PRIx64
-                    ", expecting %" PRIx64,
-                    version, NEW_VERSION);
-      return -1;
-    }
-
-    /* There is a maximum option length we will accept, regardless
-     * of the option type.
-     */
-    optlen = be32toh (new_option.optlen);
-    if (optlen > MAX_OPTION_LENGTH) {
-      nbdkit_error ("client option data too long (%" PRIu32 ")", optlen);
-      return -1;
-    }
-
-    option = be32toh (new_option.option);
-
-    /* In --tls=require / FORCEDTLS mode the only options allowed
-     * before TLS negotiation are NBD_OPT_ABORT and NBD_OPT_STARTTLS.
-     */
-    if (tls == 2 && !conn->using_tls &&
-        !(option == NBD_OPT_ABORT || option == NBD_OPT_STARTTLS)) {
-      if (send_newstyle_option_reply (conn, option, NBD_REP_ERR_TLS_REQD))
-        return -1;
-      continue;
-    }
-
-    switch (option) {
-    case NBD_OPT_EXPORT_NAME:
-      if (conn_recv_full (conn, data, optlen,
-                          "read: %s: %m", name_of_nbd_opt (option)) == -1)
-        return -1;
-      /* Apart from printing it, ignore the export name. */
-      data[optlen] = '\0';
-      debug ("newstyle negotiation: %s: "
-             "client requested export '%s' (ignored)",
-             name_of_nbd_opt (option), data);
-
-      /* We have to finish the handshake by sending handshake_finish. */
-      if (finish_newstyle_options (conn) == -1)
-        return -1;
-
-      memset (&handshake_finish, 0, sizeof handshake_finish);
-      handshake_finish.exportsize = htobe64 (conn->exportsize);
-      handshake_finish.eflags = htobe16 (conn->eflags);
-
-      if (conn->send (conn,
-                      &handshake_finish,
-                      (conn->cflags & NBD_FLAG_NO_ZEROES)
-                      ? offsetof (struct new_handshake_finish, zeroes)
-                      : sizeof handshake_finish) == -1) {
-        nbdkit_error ("write: %m");
-        return -1;
-      }
-      break;
-
-    case NBD_OPT_ABORT:
-      if (send_newstyle_option_reply (conn, option, NBD_REP_ACK) == -1)
-        return -1;
-      debug ("client sent %s to abort the connection",
-             name_of_nbd_opt (option));
-      return -1;
-
-    case NBD_OPT_LIST:
-      if (optlen != 0) {
-        if (send_newstyle_option_reply (conn, option, NBD_REP_ERR_INVALID)
-            == -1)
-          return -1;
-        if (conn_recv_full (conn, data, optlen,
-                            "read: %s: %m", name_of_nbd_opt (option)) == -1)
-          return -1;
-        continue;
-      }
-
-      /* Send back the exportname. */
-      debug ("newstyle negotiation: %s: advertising export '%s'",
-             name_of_nbd_opt (option), exportname);
-      if (send_newstyle_option_reply_exportname (conn, option, NBD_REP_SERVER,
-                                                 exportname) == -1)
-        return -1;
-
-      if (send_newstyle_option_reply (conn, option, NBD_REP_ACK) == -1)
-        return -1;
-      break;
-
-    case NBD_OPT_STARTTLS:
-      if (optlen != 0) {
-        if (send_newstyle_option_reply (conn, option, NBD_REP_ERR_INVALID)
-            == -1)
-          return -1;
-        if (conn_recv_full (conn, data, optlen,
-                            "read: %s: %m", name_of_nbd_opt (option)) == -1)
-          return -1;
-        continue;
-      }
-
-      if (tls == 0) {           /* --tls=off (NOTLS mode). */
-#ifdef HAVE_GNUTLS
-#define NO_TLS_REPLY NBD_REP_ERR_POLICY
-#else
-#define NO_TLS_REPLY NBD_REP_ERR_UNSUP
-#endif
-        if (send_newstyle_option_reply (conn, option, NO_TLS_REPLY) == -1)
-          return -1;
-      }
-      else /* --tls=on or --tls=require */ {
-        /* We can't upgrade to TLS twice on the same connection. */
-        if (conn->using_tls) {
-          if (send_newstyle_option_reply (conn, option,
-                                          NBD_REP_ERR_INVALID) == -1)
-            return -1;
-          continue;
-        }
-
-        /* We have to send the (unencrypted) reply before starting
-         * the handshake.
-         */
-        if (send_newstyle_option_reply (conn, option, NBD_REP_ACK) == -1)
-          return -1;
-
-        /* Upgrade the connection to TLS.  Also performs access control. */
-        if (crypto_negotiate_tls (conn, conn->sockin, conn->sockout) == -1)
-          return -1;
-        conn->using_tls = true;
-        debug ("using TLS on this connection");
-      }
-      break;
-
-    case NBD_OPT_INFO:
-    case NBD_OPT_GO:
-      optname = name_of_nbd_opt (option);
-      if (conn_recv_full (conn, data, optlen,
-                          "read: %s: %m", optname) == -1)
-        return -1;
-
-      if (optlen < 6) { /* 32 bit export length + 16 bit nr info */
-        debug ("newstyle negotiation: %s option length < 6", optname);
-
-        if (send_newstyle_option_reply (conn, option, NBD_REP_ERR_INVALID)
-            == -1)
-          return -1;
-        continue;
-      }
-
-      {
-        uint32_t exportnamelen;
-        uint16_t nrinfos;
-        uint16_t info;
-        size_t i;
-        CLEANUP_FREE char *requested_exportname = NULL;
-
-        /* Validate the name length and number of INFO requests. */
-        memcpy (&exportnamelen, &data[0], 4);
-        exportnamelen = be32toh (exportnamelen);
-        if (exportnamelen > optlen-6 /* NB optlen >= 6, see above */) {
-          debug ("newstyle negotiation: %s: export name too long", optname);
-          if (send_newstyle_option_reply (conn, option, NBD_REP_ERR_INVALID)
-              == -1)
-            return -1;
-          continue;
-        }
-        memcpy (&nrinfos, &data[exportnamelen+4], 2);
-        nrinfos = be16toh (nrinfos);
-        if (optlen != 4 + exportnamelen + 2 + 2*nrinfos) {
-          debug ("newstyle negotiation: %s: "
-                 "number of information requests incorrect", optname);
-          if (send_newstyle_option_reply (conn, option, NBD_REP_ERR_INVALID)
-              == -1)
-            return -1;
-          continue;
-        }
-
-        /* As with NBD_OPT_EXPORT_NAME we print the export name and then
-         * ignore it.
-         */
-        requested_exportname = malloc (exportnamelen+1);
-        if (requested_exportname == NULL) {
-          nbdkit_error ("malloc: %m");
-          return -1;
-        }
-        memcpy (requested_exportname, &data[4], exportnamelen);
-        requested_exportname[exportnamelen] = '\0';
-        debug ("newstyle negotiation: %s: "
-               "client requested export '%s' (ignored)",
-               optname, requested_exportname);
-
-        /* The spec is confusing, but it is required that we send back
-         * NBD_INFO_EXPORT, even if the client did not request it!
-         * qemu client in particular does not request this, but will
-         * fail if we don't send it.
-         */
-        if (finish_newstyle_options (conn) == -1)
-          return -1;
-
-        if (send_newstyle_option_reply_info_export (conn, option,
-                                                    NBD_REP_INFO,
-                                                    NBD_INFO_EXPORT) == -1)
-          return -1;
-
-        /* For now we ignore all other info requests (but we must
-         * ignore NBD_INFO_EXPORT if it was requested, because we
-         * replied already above).  Therefore this loop doesn't do
-         * much at the moment.
-         */
-        for (i = 0; i < nrinfos; ++i) {
-          memcpy (&info, &data[4 + exportnamelen + 2 + i*2], 2);
-          info = be16toh (info);
-          switch (info) {
-          case NBD_INFO_EXPORT: /* ignore - reply sent above */ break;
-          default:
-            debug ("newstyle negotiation: %s: "
-                   "ignoring NBD_INFO_* request %u (%s)",
-                   optname, (unsigned) info, name_of_nbd_info (info));
-            break;
-          }
-        }
-      }
-
-      /* Unlike NBD_OPT_EXPORT_NAME, NBD_OPT_GO sends back an ACK
-       * or ERROR packet.
-       */
-      if (send_newstyle_option_reply (conn, option, NBD_REP_ACK) == -1)
-        return -1;
-
-      break;
-
-    case NBD_OPT_STRUCTURED_REPLY:
-      if (optlen != 0) {
-        if (send_newstyle_option_reply (conn, option, NBD_REP_ERR_INVALID)
-            == -1)
-          return -1;
-        if (conn_recv_full (conn, data, optlen,
-                            "read: %s: %m", name_of_nbd_opt (option)) == -1)
-          return -1;
-        continue;
-      }
-
-      debug ("newstyle negotiation: %s: client requested structured replies",
-             name_of_nbd_opt (option));
-
-      if (send_newstyle_option_reply (conn, option, NBD_REP_ACK) == -1)
-        return -1;
-
-      conn->structured_replies = true;
-      break;
-
-    case NBD_OPT_LIST_META_CONTEXT:
-    case NBD_OPT_SET_META_CONTEXT:
-      {
-        uint32_t opt_index;
-        uint32_t exportnamelen;
-        uint32_t nr_queries;
-        uint32_t querylen;
-        const char *what;
-
-        optname = name_of_nbd_opt (option);
-        if (conn_recv_full (conn, data, optlen, "read: %s: %m", optname) == -1)
-          return -1;
-
-        if (!conn->structured_replies) {
-          if (send_newstyle_option_reply (conn, option, NBD_REP_ERR_INVALID)
-              == -1)
-            return -1;
-          continue;
-        }
-
-        /* Minimum length of the option payload is:
-         *   32 bit export name length followed by empty export name
-         * + 32 bit number of queries followed by no queries
-         * = 8 bytes.
-         */
-        what = "optlen < 8";
-        if (optlen < 8) {
-        opt_meta_invalid_option_len:
-          debug ("newstyle negotiation: %s: invalid option length: %s",
-                 optname, what);
-
-          if (send_newstyle_option_reply (conn, option, NBD_REP_ERR_INVALID)
-              == -1)
-            return -1;
-          continue;
-        }
-
-        /* Discard the export name. */
-        memcpy (&exportnamelen, &data[0], 4);
-        exportnamelen = be32toh (exportnamelen);
-        opt_index = 4 + exportnamelen;
-
-        /* Read the number of queries. */
-        what = "reading number of queries";
-        if (opt_index+4 > optlen)
-          goto opt_meta_invalid_option_len;
-        memcpy (&nr_queries, &data[opt_index], 4);
-        nr_queries = be32toh (nr_queries);
-        opt_index += 4;
-
-        /* for LIST: nr_queries == 0 means return all meta contexts
-         * for SET: nr_queries == 0 means reset all contexts
-         */
-        if (nr_queries == 0) {
-          /* Nothing is supported now. */
-          if (send_newstyle_option_reply (conn, option, NBD_REP_ACK) == -1)
-            return -1;
-        }
-        else {
-          /* Read and answer each query. */
-          while (nr_queries > 0) {
-            what = "reading query string length";
-            if (opt_index+4 > optlen)
-              goto opt_meta_invalid_option_len;
-            memcpy (&querylen, &data[opt_index], 4);
-            querylen = be32toh (querylen);
-            opt_index += 4;
-            what = "reading query string";
-            if (opt_index + querylen > optlen)
-              goto opt_meta_invalid_option_len;
-
-            debug ("newstyle negotiation: %s: %s %.*s",
-                   optname,
-                   option == NBD_OPT_LIST_META_CONTEXT ? "query" : "set",
-                   (int) querylen, &data[opt_index]);
-
-            /* Ignore query - nothing is supported. */
-
-            opt_index += querylen;
-            nr_queries--;
-          }
-          if (send_newstyle_option_reply (conn, option, NBD_REP_ACK) == -1)
-            return -1;
-        }
-      }
-      break;
-
-    default:
-      /* Unknown option. */
-      if (send_newstyle_option_reply (conn, option, NBD_REP_ERR_UNSUP) == -1)
-        return -1;
-      if (conn_recv_full (conn, data, optlen,
-                          "reading unknown option data: conn->recv: %m") == -1)
-        return -1;
-    }
-
-    /* Note, since it's not very clear from the protocol doc, that the
-     * client must send NBD_OPT_EXPORT_NAME or NBD_OPT_GO last, and
-     * that ends option negotiation.
-     */
-    if (option == NBD_OPT_EXPORT_NAME || option == NBD_OPT_GO)
-      break;
-  }
-
-  if (nr_options >= MAX_NR_OPTIONS) {
-    nbdkit_error ("client exceeded maximum number of options (%d)",
-                  MAX_NR_OPTIONS);
-    return -1;
-  }
-
-  /* In --tls=require / FORCEDTLS mode, we must have upgraded to TLS
-   * by the time we finish option negotiation.  If not, give up.
-   */
-  if (tls == 2 && !conn->using_tls) {
-    nbdkit_error ("non-TLS client tried to connect in --tls=require mode");
-    return -1;
-  }
-
-  return 0;
-}
-
-static int
-_negotiate_handshake_newstyle (struct connection *conn)
-{
-  struct new_handshake handshake;
-  uint16_t gflags;
-
-  gflags = NBD_FLAG_FIXED_NEWSTYLE | NBD_FLAG_NO_ZEROES;
-
-  debug ("newstyle negotiation: flags: global 0x%x", gflags);
-
-  memcpy (handshake.nbdmagic, "NBDMAGIC", 8);
-  handshake.version = htobe64 (NEW_VERSION);
-  handshake.gflags = htobe16 (gflags);
-
-  if (conn->send (conn, &handshake, sizeof handshake) == -1) {
-    nbdkit_error ("write: %m");
-    return -1;
-  }
-
-  /* Client now sends us its 32 bit flags word ... */
-  if (conn_recv_full (conn, &conn->cflags, sizeof conn->cflags,
-                      "reading initial client flags: conn->recv: %m") == -1)
-    return -1;
-  conn->cflags = be32toh (conn->cflags);
-  /* ... which we check for accuracy. */
-  debug ("newstyle negotiation: client flags: 0x%x", conn->cflags);
-  if (conn->cflags & ~gflags) {
-    nbdkit_error ("client requested unknown flags 0x%x", conn->cflags);
-    return -1;
-  }
-
-  /* Receive newstyle options. */
-  if (_negotiate_handshake_newstyle_options (conn) == -1)
-    return -1;
-
-  return 0;
-}
-
-static int
-negotiate_handshake (struct connection *conn)
-{
-  int r;
-
-  lock_request (conn);
-  if (!newstyle)
-    r = _negotiate_handshake_oldstyle (conn);
-  else
-    r = _negotiate_handshake_newstyle (conn);
-  unlock_request (conn);
-
-  return r;
-}
-
-static bool
-valid_range (struct connection *conn, uint64_t offset, uint32_t count)
-{
-  uint64_t exportsize = conn->exportsize;
-
-  return count > 0 && offset <= exportsize && offset + count <= exportsize;
-}
-
-static bool
-validate_request (struct connection *conn,
-                  uint16_t cmd, uint16_t flags, uint64_t offset, uint32_t count,
-                  uint32_t *error)
-{
-  /* Readonly connection? */
-  if (conn->readonly &&
-      (cmd == NBD_CMD_WRITE || cmd == NBD_CMD_TRIM ||
-       cmd == NBD_CMD_WRITE_ZEROES)) {
-    nbdkit_error ("invalid request: %s: write request on readonly connection",
-                  name_of_nbd_cmd (cmd));
-    *error = EROFS;
-    return false;
-  }
-
-  /* Validate cmd, offset, count. */
-  switch (cmd) {
-  case NBD_CMD_READ:
-  case NBD_CMD_WRITE:
-  case NBD_CMD_TRIM:
-  case NBD_CMD_WRITE_ZEROES:
-    if (!valid_range (conn, offset, count)) {
-      /* XXX Allow writes to extend the disk? */
-      nbdkit_error ("invalid request: %s: offset and count are out of range: "
-                    "offset=%" PRIu64 " count=%" PRIu32,
-                    name_of_nbd_cmd (cmd), offset, count);
-      *error = (cmd == NBD_CMD_WRITE ||
-                cmd == NBD_CMD_WRITE_ZEROES) ? ENOSPC : EINVAL;
-      return false;
-    }
-    break;
-
-  case NBD_CMD_FLUSH:
-    if (offset != 0 || count != 0) {
-      nbdkit_error ("invalid request: %s: expecting offset and count = 0",
-                    name_of_nbd_cmd (cmd));
-      *error = EINVAL;
-      return false;
-    }
-    break;
-
-  default:
-    nbdkit_error ("invalid request: unknown command (%" PRIu32 ") ignored",
-                  cmd);
-    *error = EINVAL;
-    return false;
-  }
-
-  /* Validate flags */
-  if (flags & ~(NBD_CMD_FLAG_FUA | NBD_CMD_FLAG_NO_HOLE)) {
-    nbdkit_error ("invalid request: unknown flag (0x%x)", flags);
-    *error = EINVAL;
-    return false;
-  }
-  if ((flags & NBD_CMD_FLAG_NO_HOLE) &&
-      cmd != NBD_CMD_WRITE_ZEROES) {
-    nbdkit_error ("invalid request: NO_HOLE flag needs WRITE_ZEROES request");
-    *error = EINVAL;
-    return false;
-  }
-  if (!conn->can_fua && (flags & NBD_CMD_FLAG_FUA)) {
-    nbdkit_error ("invalid request: FUA flag not supported");
-    *error = EINVAL;
-    return false;
-  }
-
-  /* Refuse over-large read and write requests. */
-  if ((cmd == NBD_CMD_WRITE || cmd == NBD_CMD_READ) &&
-      count > MAX_REQUEST_SIZE) {
-    nbdkit_error ("invalid request: %s: data request is too large (%" PRIu32
-                  " > %d)",
-                  name_of_nbd_cmd (cmd), count, MAX_REQUEST_SIZE);
-    *error = ENOMEM;
-    return false;
-  }
-
-  /* Flush allowed? */
-  if (!conn->can_flush && cmd == NBD_CMD_FLUSH) {
-    nbdkit_error ("invalid request: %s: flush operation not supported",
-                  name_of_nbd_cmd (cmd));
-    *error = EINVAL;
-    return false;
-  }
-
-  /* Trim allowed? */
-  if (!conn->can_trim && cmd == NBD_CMD_TRIM) {
-    nbdkit_error ("invalid request: %s: trim operation not supported",
-                  name_of_nbd_cmd (cmd));
-    *error = EINVAL;
-    return false;
-  }
-
-  /* Zero allowed? */
-  if (!conn->can_zero && cmd == NBD_CMD_WRITE_ZEROES) {
-    nbdkit_error ("invalid request: %s: write zeroes operation not supported",
-                  name_of_nbd_cmd (cmd));
-    *error = EINVAL;
-    return false;
-  }
-
-  return true;                     /* Command validates. */
-}
-
-/* This is called with the request lock held to actually execute the
- * request (by calling the plugin).  Note that the request fields have
- * been validated already in 'validate_request' so we don't have to
- * check them again.  'buf' is either the data to be written or the
- * data to be returned, and points to a buffer of size 'count' bytes.
- *
- * In all cases, the return value is the system errno value that will
- * later be converted to the nbd error to send back to the client (0
- * for success).
- */
-static uint32_t
-handle_request (struct connection *conn,
-                uint16_t cmd, uint16_t flags, uint64_t offset, uint32_t count,
-                void *buf)
-{
-  uint32_t f = 0;
-  bool fua = conn->can_fua && (flags & NBD_CMD_FLAG_FUA);
-  int err = 0;
-
-  /* Clear the error, so that we know if the plugin calls
-   * nbdkit_set_error() or relied on errno.  */
-  threadlocal_set_error (0);
-
-  switch (cmd) {
-  case NBD_CMD_READ:
-    if (backend->pread (backend, conn, buf, count, offset, 0, &err) == -1)
-      return err;
-    break;
-
-  case NBD_CMD_WRITE:
-    if (fua)
-      f |= NBDKIT_FLAG_FUA;
-    if (backend->pwrite (backend, conn, buf, count, offset, f, &err) == -1)
-      return err;
-    break;
-
-  case NBD_CMD_FLUSH:
-    if (backend->flush (backend, conn, 0, &err) == -1)
-      return err;
-    break;
-
-  case NBD_CMD_TRIM:
-    if (fua)
-      f |= NBDKIT_FLAG_FUA;
-    if (backend->trim (backend, conn, count, offset, f, &err) == -1)
-      return err;
-    break;
-
-  case NBD_CMD_WRITE_ZEROES:
-    if (!(flags & NBD_CMD_FLAG_NO_HOLE))
-      f |= NBDKIT_FLAG_MAY_TRIM;
-    if (fua)
-      f |= NBDKIT_FLAG_FUA;
-    if (backend->zero (backend, conn, count, offset, f, &err) == -1)
-      return err;
-    break;
-
-  default:
-    abort ();
-  }
-
-  return 0;
-}
-
-static int
-skip_over_write_buffer (int sock, size_t count)
-{
-  char buf[BUFSIZ];
-  ssize_t r;
-
-  if (count > MAX_REQUEST_SIZE * 2) {
-    nbdkit_error ("write request too large to skip");
-    return -1;
-  }
-
-  while (count > 0) {
-    r = read (sock, buf, count > BUFSIZ ? BUFSIZ : count);
-    if (r == -1) {
-      nbdkit_error ("skipping write buffer: %m");
-      return -1;
-    }
-    if (r == 0)  {
-      nbdkit_error ("unexpected early EOF");
-      errno = EBADMSG;
-      return -1;
-    }
-    count -= r;
-  }
-  return 0;
-}
-
-/* Convert a system errno to an NBD_E* error code. */
-static int
-nbd_errno (int error)
-{
-  switch (error) {
-  case 0:
-    return NBD_SUCCESS;
-  case EROFS:
-  case EPERM:
-    return NBD_EPERM;
-  case EIO:
-    return NBD_EIO;
-  case ENOMEM:
-    return NBD_ENOMEM;
-#ifdef EDQUOT
-  case EDQUOT:
-#endif
-  case EFBIG:
-  case ENOSPC:
-    return NBD_ENOSPC;
-#ifdef ESHUTDOWN
-  case ESHUTDOWN:
-    return NBD_ESHUTDOWN;
-#endif
-  case EINVAL:
-  default:
-    return NBD_EINVAL;
-  }
-}
-
-static int
-send_simple_reply (struct connection *conn,
-                   uint64_t handle, uint16_t cmd,
-                   const char *buf, uint32_t count,
-                   uint32_t error)
-{
-  ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&conn->write_lock);
-  struct simple_reply reply;
-  int r;
-
-  reply.magic = htobe32 (NBD_SIMPLE_REPLY_MAGIC);
-  reply.handle = handle;
-  reply.error = htobe32 (nbd_errno (error));
-
-  r = conn->send (conn, &reply, sizeof reply);
-  if (r == -1) {
-    nbdkit_error ("write reply: %s: %m", name_of_nbd_cmd (cmd));
-    return set_status (conn, -1);
-  }
-
-  /* Send the read data buffer. */
-  if (cmd == NBD_CMD_READ && !error) {
-    r = conn->send (conn, buf, count);
-    if (r == -1) {
-      nbdkit_error ("write data: %s: %m", name_of_nbd_cmd (cmd));
-      return set_status (conn, -1);
-    }
-  }
-
-  return 1;                     /* command processed ok */
-}
-
-static int
-send_structured_reply_read (struct connection *conn,
-                            uint64_t handle, uint16_t cmd,
-                            const char *buf, uint32_t count, uint64_t offset)
-{
-  /* Once we are really using structured replies and sending data back
-   * in chunks, we'll be able to grab the write lock for each chunk,
-   * allowing other threads to interleave replies.  As we're not doing
-   * that yet we acquire the lock for the whole function.
-   */
-  ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&conn->write_lock);
-  struct structured_reply reply;
-  struct structured_reply_offset_data offset_data;
-  int r;
-
-  assert (cmd == NBD_CMD_READ);
-
-  reply.magic = htobe32 (NBD_STRUCTURED_REPLY_MAGIC);
-  reply.handle = handle;
-  reply.flags = htobe16 (NBD_REPLY_FLAG_DONE);
-  reply.type = htobe16 (NBD_REPLY_TYPE_OFFSET_DATA);
-  reply.length = htobe32 (count + sizeof offset_data);
-
-  r = conn->send (conn, &reply, sizeof reply);
-  if (r == -1) {
-    nbdkit_error ("write reply: %s: %m", name_of_nbd_cmd (cmd));
-    return set_status (conn, -1);
-  }
-
-  /* Send the offset + read data buffer. */
-  offset_data.offset = htobe64 (offset);
-  r = conn->send (conn, &offset_data, sizeof offset_data);
-  if (r == -1) {
-    nbdkit_error ("write data: %s: %m", name_of_nbd_cmd (cmd));
-    return set_status (conn, -1);
-  }
-
-  r = conn->send (conn, buf, count);
-  if (r == -1) {
-    nbdkit_error ("write data: %s: %m", name_of_nbd_cmd (cmd));
-    return set_status (conn, -1);
-  }
-
-  return 1;                     /* command processed ok */
-}
-
-static int
-send_structured_reply_error (struct connection *conn,
-                             uint64_t handle, uint16_t cmd, uint32_t error)
-{
-  ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&conn->write_lock);
-  struct structured_reply reply;
-  struct structured_reply_error error_data;
-  int r;
-
-  reply.magic = htobe32 (NBD_STRUCTURED_REPLY_MAGIC);
-  reply.handle = handle;
-  reply.flags = htobe16 (NBD_REPLY_FLAG_DONE);
-  reply.type = htobe16 (NBD_REPLY_TYPE_ERROR);
-  reply.length = htobe32 (0 /* no human readable error */ + sizeof error_data);
-
-  r = conn->send (conn, &reply, sizeof reply);
-  if (r == -1) {
-    nbdkit_error ("write error reply: %m");
-    return set_status (conn, -1);
-  }
-
-  /* Send the error. */
-  error_data.error = htobe32 (error);
-  error_data.len = htobe16 (0);
-  r = conn->send (conn, &error_data, sizeof error_data);
-  if (r == -1) {
-    nbdkit_error ("write data: %s: %m", name_of_nbd_cmd (cmd));
-    return set_status (conn, -1);
-  }
-  /* No human readable error message at the moment. */
-
-  return 1;                     /* command processed ok */
-}
-
-static int
-recv_request_send_reply (struct connection *conn)
-{
-  int r;
-  struct request request;
-  uint16_t cmd, flags;
-  uint32_t magic, count, error = 0;
-  uint64_t offset;
-  CLEANUP_FREE char *buf = NULL;
-
-  /* Read the request packet. */
-  {
-    ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&conn->read_lock);
-    r = get_status (conn);
-    if (r <= 0)
-      return r;
-    r = conn->recv (conn, &request, sizeof request);
-    if (r == -1) {
-      nbdkit_error ("read request: %m");
-      return set_status (conn, -1);
-    }
-    if (r == 0) {
-      debug ("client closed input socket, closing connection");
-      return set_status (conn, 0);                   /* disconnect */
-    }
-
-    magic = be32toh (request.magic);
-    if (magic != NBD_REQUEST_MAGIC) {
-      nbdkit_error ("invalid request: 'magic' field is incorrect (0x%x)",
-                    magic);
-      return set_status (conn, -1);
-    }
-
-    flags = be16toh (request.flags);
-    cmd = be16toh (request.type);
-
-    offset = be64toh (request.offset);
-    count = be32toh (request.count);
-
-    if (cmd == NBD_CMD_DISC) {
-      debug ("client sent %s, closing connection", name_of_nbd_cmd (cmd));
-      return set_status (conn, 0);                   /* disconnect */
-    }
-
-    /* Validate the request. */
-    if (!validate_request (conn, cmd, flags, offset, count, &error)) {
-      if (cmd == NBD_CMD_WRITE &&
-          skip_over_write_buffer (conn->sockin, count) < 0)
-        return set_status (conn, -1);
-      goto send_reply;
-    }
-
-    /* Allocate the data buffer used for either read or write requests. */
-    if (cmd == NBD_CMD_READ || cmd == NBD_CMD_WRITE) {
-      buf = malloc (count);
-      if (buf == NULL) {
-        perror ("malloc");
-        error = ENOMEM;
-        if (cmd == NBD_CMD_WRITE &&
-            skip_over_write_buffer (conn->sockin, count) < 0)
-          return set_status (conn, -1);
-        goto send_reply;
-      }
-    }
-
-    /* Receive the write data buffer. */
-    if (cmd == NBD_CMD_WRITE) {
-      r = conn->recv (conn, buf, count);
-      if (r == 0) {
-        errno = EBADMSG;
-        r = -1;
-      }
-      if (r == -1) {
-        nbdkit_error ("read data: %s: %m", name_of_nbd_cmd (cmd));
-        return set_status (conn, -1);
-      }
-    }
-  }
-
-  /* Perform the request.  Only this part happens inside the request lock. */
-  if (quit || !get_status (conn)) {
-    error = ESHUTDOWN;
-  }
-  else {
-    lock_request (conn);
-    error = handle_request (conn, cmd, flags, offset, count, buf);
-    assert ((int) error >= 0);
-    unlock_request (conn);
-  }
-
-  /* Send the reply packet. */
- send_reply:
-  if (get_status (conn) < 0)
-    return -1;
-
-  if (error != 0) {
-    /* Since we're about to send only the limited NBD_E* errno to the
-     * client, don't lose the information about what really happened
-     * on the server side.  Make sure there is a way for the operator
-     * to retrieve the real error.
-     */
-    debug ("sending error reply: %s", strerror (error));
-  }
-
-  /* Currently we prefer to send simple replies for everything except
-   * where we have to (ie. NBD_CMD_READ when structured_replies have
-   * been negotiated).  However this prevents us from sending
-   * human-readable error messages to the client, so we should
-   * reconsider this in future.
-   */
-  if (conn->structured_replies && cmd == NBD_CMD_READ) {
-    if (!error)
-      return send_structured_reply_read (conn, request.handle, cmd,
-                                         buf, count, offset);
-    else
-      return send_structured_reply_error (conn, request.handle, cmd, error);
-  }
-  else
-    return send_simple_reply (conn, request.handle, cmd, buf, count, error);
-}
-
 /* Write buffer to conn->sockout and either succeed completely
  * (returns 0) or fail (returns -1).
  */
diff --git a/server/protocol-handshake-newstyle.c b/server/protocol-handshake-newstyle.c
new file mode 100644
index 0000000..db01f7b
--- /dev/null
+++ b/server/protocol-handshake-newstyle.c
@@ -0,0 +1,609 @@
+/* nbdkit
+ * Copyright (C) 2013-2019 Red Hat Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * * Neither the name of Red Hat nor the names of its contributors may be
+ * used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <config.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "internal.h"
+#include "byte-swapping.h"
+#include "protocol.h"
+
+/* Maximum number of client options we allow before giving up. */
+#define MAX_NR_OPTIONS 32
+
+/* Maximum length of any option data (bytes). */
+#define MAX_OPTION_LENGTH 4096
+
+/* Receive newstyle options. */
+static int
+send_newstyle_option_reply (struct connection *conn,
+                            uint32_t option, uint32_t reply)
+{
+  struct fixed_new_option_reply fixed_new_option_reply;
+
+  fixed_new_option_reply.magic = htobe64 (NBD_REP_MAGIC);
+  fixed_new_option_reply.option = htobe32 (option);
+  fixed_new_option_reply.reply = htobe32 (reply);
+  fixed_new_option_reply.replylen = htobe32 (0);
+
+  if (conn->send (conn,
+                  &fixed_new_option_reply,
+                  sizeof fixed_new_option_reply) == -1) {
+    nbdkit_error ("write: %m");
+    return -1;
+  }
+
+  return 0;
+}
+
+static int
+send_newstyle_option_reply_exportname (struct connection *conn,
+                                       uint32_t option, uint32_t reply,
+                                       const char *exportname)
+{
+  struct fixed_new_option_reply fixed_new_option_reply;
+  size_t name_len = strlen (exportname);
+  uint32_t len;
+
+  fixed_new_option_reply.magic = htobe64 (NBD_REP_MAGIC);
+  fixed_new_option_reply.option = htobe32 (option);
+  fixed_new_option_reply.reply = htobe32 (reply);
+  fixed_new_option_reply.replylen = htobe32 (name_len + sizeof (len));
+
+  if (conn->send (conn,
+                  &fixed_new_option_reply,
+                  sizeof fixed_new_option_reply) == -1) {
+    nbdkit_error ("write: %m");
+    return -1;
+  }
+
+  len = htobe32 (name_len);
+  if (conn->send (conn, &len, sizeof len) == -1) {
+    nbdkit_error ("write: %m");
+    return -1;
+  }
+  if (conn->send (conn, exportname, name_len) == -1) {
+    nbdkit_error ("write: %m");
+    return -1;
+  }
+
+  return 0;
+}
+
+static int
+send_newstyle_option_reply_info_export (struct connection *conn,
+                                        uint32_t option, uint32_t reply,
+                                        uint16_t info)
+{
+  struct fixed_new_option_reply fixed_new_option_reply;
+  struct fixed_new_option_reply_info_export export;
+
+  fixed_new_option_reply.magic = htobe64 (NBD_REP_MAGIC);
+  fixed_new_option_reply.option = htobe32 (option);
+  fixed_new_option_reply.reply = htobe32 (reply);
+  fixed_new_option_reply.replylen = htobe32 (sizeof export);
+  export.info = htobe16 (info);
+  export.exportsize = htobe64 (conn->exportsize);
+  export.eflags = htobe16 (conn->eflags);
+
+  if (conn->send (conn,
+                  &fixed_new_option_reply,
+                  sizeof fixed_new_option_reply) == -1 ||
+      conn->send (conn, &export, sizeof export) == -1) {
+    nbdkit_error ("write: %m");
+    return -1;
+  }
+
+  return 0;
+}
+
+/* Sub-function during negotiate_handshake_newstyle, to uniformly handle
+ * a client hanging up on a message boundary.
+ */
+static int __attribute__ ((format (printf, 4, 5)))
+conn_recv_full (struct connection *conn, void *buf, size_t len,
+                const char *fmt, ...)
+{
+  int r = conn->recv (conn, buf, len);
+  va_list args;
+
+  if (r == -1) {
+    va_start (args, fmt);
+    nbdkit_verror (fmt, args);
+    va_end (args);
+    return -1;
+  }
+  if (r == 0) {
+    /* During negotiation, client EOF on message boundary is less
+     * severe than failure in the middle of the buffer. */
+    debug ("client closed input socket, closing connection");
+    return -1;
+  }
+  return r;
+}
+
+/* Sub-function of negotiate_handshake_newstyle_options below.  It
+ * must be called on all non-error paths out of the options for-loop
+ * in that function.
+ */
+static int
+finish_newstyle_options (struct connection *conn)
+{
+  int64_t r;
+
+  r = backend->get_size (backend, conn);
+  if (r == -1)
+    return -1;
+  if (r < 0) {
+    nbdkit_error (".get_size function returned invalid value "
+                  "(%" PRIi64 ")", r);
+    return -1;
+  }
+  conn->exportsize = (uint64_t) r;
+
+  if (protocol_compute_eflags (conn, &conn->eflags) < 0)
+    return -1;
+
+  debug ("newstyle negotiation: flags: export 0x%x", conn->eflags);
+  return 0;
+}
+
+static int
+negotiate_handshake_newstyle_options (struct connection *conn)
+{
+  struct new_option new_option;
+  size_t nr_options;
+  uint64_t version;
+  uint32_t option;
+  uint32_t optlen;
+  char data[MAX_OPTION_LENGTH+1];
+  struct new_handshake_finish handshake_finish;
+  const char *optname;
+
+  for (nr_options = 0; nr_options < MAX_NR_OPTIONS; ++nr_options) {
+    if (conn_recv_full (conn, &new_option, sizeof new_option,
+                        "reading option: conn->recv: %m") == -1)
+      return -1;
+
+    version = be64toh (new_option.version);
+    if (version != NEW_VERSION) {
+      nbdkit_error ("unknown option version %" PRIx64
+                    ", expecting %" PRIx64,
+                    version, NEW_VERSION);
+      return -1;
+    }
+
+    /* There is a maximum option length we will accept, regardless
+     * of the option type.
+     */
+    optlen = be32toh (new_option.optlen);
+    if (optlen > MAX_OPTION_LENGTH) {
+      nbdkit_error ("client option data too long (%" PRIu32 ")", optlen);
+      return -1;
+    }
+
+    option = be32toh (new_option.option);
+
+    /* In --tls=require / FORCEDTLS mode the only options allowed
+     * before TLS negotiation are NBD_OPT_ABORT and NBD_OPT_STARTTLS.
+     */
+    if (tls == 2 && !conn->using_tls &&
+        !(option == NBD_OPT_ABORT || option == NBD_OPT_STARTTLS)) {
+      if (send_newstyle_option_reply (conn, option, NBD_REP_ERR_TLS_REQD))
+        return -1;
+      continue;
+    }
+
+    switch (option) {
+    case NBD_OPT_EXPORT_NAME:
+      if (conn_recv_full (conn, data, optlen,
+                          "read: %s: %m", name_of_nbd_opt (option)) == -1)
+        return -1;
+      /* Apart from printing it, ignore the export name. */
+      data[optlen] = '\0';
+      debug ("newstyle negotiation: %s: "
+             "client requested export '%s' (ignored)",
+             name_of_nbd_opt (option), data);
+
+      /* We have to finish the handshake by sending handshake_finish. */
+      if (finish_newstyle_options (conn) == -1)
+        return -1;
+
+      memset (&handshake_finish, 0, sizeof handshake_finish);
+      handshake_finish.exportsize = htobe64 (conn->exportsize);
+      handshake_finish.eflags = htobe16 (conn->eflags);
+
+      if (conn->send (conn,
+                      &handshake_finish,
+                      (conn->cflags & NBD_FLAG_NO_ZEROES)
+                      ? offsetof (struct new_handshake_finish, zeroes)
+                      : sizeof handshake_finish) == -1) {
+        nbdkit_error ("write: %m");
+        return -1;
+      }
+      break;
+
+    case NBD_OPT_ABORT:
+      if (send_newstyle_option_reply (conn, option, NBD_REP_ACK) == -1)
+        return -1;
+      debug ("client sent %s to abort the connection",
+             name_of_nbd_opt (option));
+      return -1;
+
+    case NBD_OPT_LIST:
+      if (optlen != 0) {
+        if (send_newstyle_option_reply (conn, option, NBD_REP_ERR_INVALID)
+            == -1)
+          return -1;
+        if (conn_recv_full (conn, data, optlen,
+                            "read: %s: %m", name_of_nbd_opt (option)) == -1)
+          return -1;
+        continue;
+      }
+
+      /* Send back the exportname. */
+      debug ("newstyle negotiation: %s: advertising export '%s'",
+             name_of_nbd_opt (option), exportname);
+      if (send_newstyle_option_reply_exportname (conn, option, NBD_REP_SERVER,
+                                                 exportname) == -1)
+        return -1;
+
+      if (send_newstyle_option_reply (conn, option, NBD_REP_ACK) == -1)
+        return -1;
+      break;
+
+    case NBD_OPT_STARTTLS:
+      if (optlen != 0) {
+        if (send_newstyle_option_reply (conn, option, NBD_REP_ERR_INVALID)
+            == -1)
+          return -1;
+        if (conn_recv_full (conn, data, optlen,
+                            "read: %s: %m", name_of_nbd_opt (option)) == -1)
+          return -1;
+        continue;
+      }
+
+      if (tls == 0) {           /* --tls=off (NOTLS mode). */
+#ifdef HAVE_GNUTLS
+#define NO_TLS_REPLY NBD_REP_ERR_POLICY
+#else
+#define NO_TLS_REPLY NBD_REP_ERR_UNSUP
+#endif
+        if (send_newstyle_option_reply (conn, option, NO_TLS_REPLY) == -1)
+          return -1;
+      }
+      else /* --tls=on or --tls=require */ {
+        /* We can't upgrade to TLS twice on the same connection. */
+        if (conn->using_tls) {
+          if (send_newstyle_option_reply (conn, option,
+                                          NBD_REP_ERR_INVALID) == -1)
+            return -1;
+          continue;
+        }
+
+        /* We have to send the (unencrypted) reply before starting
+         * the handshake.
+         */
+        if (send_newstyle_option_reply (conn, option, NBD_REP_ACK) == -1)
+          return -1;
+
+        /* Upgrade the connection to TLS.  Also performs access control. */
+        if (crypto_negotiate_tls (conn, conn->sockin, conn->sockout) == -1)
+          return -1;
+        conn->using_tls = true;
+        debug ("using TLS on this connection");
+      }
+      break;
+
+    case NBD_OPT_INFO:
+    case NBD_OPT_GO:
+      optname = name_of_nbd_opt (option);
+      if (conn_recv_full (conn, data, optlen,
+                          "read: %s: %m", optname) == -1)
+        return -1;
+
+      if (optlen < 6) { /* 32 bit export length + 16 bit nr info */
+        debug ("newstyle negotiation: %s option length < 6", optname);
+
+        if (send_newstyle_option_reply (conn, option, NBD_REP_ERR_INVALID)
+            == -1)
+          return -1;
+        continue;
+      }
+
+      {
+        uint32_t exportnamelen;
+        uint16_t nrinfos;
+        uint16_t info;
+        size_t i;
+        CLEANUP_FREE char *requested_exportname = NULL;
+
+        /* Validate the name length and number of INFO requests. */
+        memcpy (&exportnamelen, &data[0], 4);
+        exportnamelen = be32toh (exportnamelen);
+        if (exportnamelen > optlen-6 /* NB optlen >= 6, see above */) {
+          debug ("newstyle negotiation: %s: export name too long", optname);
+          if (send_newstyle_option_reply (conn, option, NBD_REP_ERR_INVALID)
+              == -1)
+            return -1;
+          continue;
+        }
+        memcpy (&nrinfos, &data[exportnamelen+4], 2);
+        nrinfos = be16toh (nrinfos);
+        if (optlen != 4 + exportnamelen + 2 + 2*nrinfos) {
+          debug ("newstyle negotiation: %s: "
+                 "number of information requests incorrect", optname);
+          if (send_newstyle_option_reply (conn, option, NBD_REP_ERR_INVALID)
+              == -1)
+            return -1;
+          continue;
+        }
+
+        /* As with NBD_OPT_EXPORT_NAME we print the export name and then
+         * ignore it.
+         */
+        requested_exportname = malloc (exportnamelen+1);
+        if (requested_exportname == NULL) {
+          nbdkit_error ("malloc: %m");
+          return -1;
+        }
+        memcpy (requested_exportname, &data[4], exportnamelen);
+        requested_exportname[exportnamelen] = '\0';
+        debug ("newstyle negotiation: %s: "
+               "client requested export '%s' (ignored)",
+               optname, requested_exportname);
+
+        /* The spec is confusing, but it is required that we send back
+         * NBD_INFO_EXPORT, even if the client did not request it!
+         * qemu client in particular does not request this, but will
+         * fail if we don't send it.
+         */
+        if (finish_newstyle_options (conn) == -1)
+          return -1;
+
+        if (send_newstyle_option_reply_info_export (conn, option,
+                                                    NBD_REP_INFO,
+                                                    NBD_INFO_EXPORT) == -1)
+          return -1;
+
+        /* For now we ignore all other info requests (but we must
+         * ignore NBD_INFO_EXPORT if it was requested, because we
+         * replied already above).  Therefore this loop doesn't do
+         * much at the moment.
+         */
+        for (i = 0; i < nrinfos; ++i) {
+          memcpy (&info, &data[4 + exportnamelen + 2 + i*2], 2);
+          info = be16toh (info);
+          switch (info) {
+          case NBD_INFO_EXPORT: /* ignore - reply sent above */ break;
+          default:
+            debug ("newstyle negotiation: %s: "
+                   "ignoring NBD_INFO_* request %u (%s)",
+                   optname, (unsigned) info, name_of_nbd_info (info));
+            break;
+          }
+        }
+      }
+
+      /* Unlike NBD_OPT_EXPORT_NAME, NBD_OPT_GO sends back an ACK
+       * or ERROR packet.
+       */
+      if (send_newstyle_option_reply (conn, option, NBD_REP_ACK) == -1)
+        return -1;
+
+      break;
+
+    case NBD_OPT_STRUCTURED_REPLY:
+      if (optlen != 0) {
+        if (send_newstyle_option_reply (conn, option, NBD_REP_ERR_INVALID)
+            == -1)
+          return -1;
+        if (conn_recv_full (conn, data, optlen,
+                            "read: %s: %m", name_of_nbd_opt (option)) == -1)
+          return -1;
+        continue;
+      }
+
+      debug ("newstyle negotiation: %s: client requested structured replies",
+             name_of_nbd_opt (option));
+
+      if (send_newstyle_option_reply (conn, option, NBD_REP_ACK) == -1)
+        return -1;
+
+      conn->structured_replies = true;
+      break;
+
+    case NBD_OPT_LIST_META_CONTEXT:
+    case NBD_OPT_SET_META_CONTEXT:
+      {
+        uint32_t opt_index;
+        uint32_t exportnamelen;
+        uint32_t nr_queries;
+        uint32_t querylen;
+        const char *what;
+
+        optname = name_of_nbd_opt (option);
+        if (conn_recv_full (conn, data, optlen, "read: %s: %m", optname) == -1)
+          return -1;
+
+        if (!conn->structured_replies) {
+          if (send_newstyle_option_reply (conn, option, NBD_REP_ERR_INVALID)
+              == -1)
+            return -1;
+          continue;
+        }
+
+        /* Minimum length of the option payload is:
+         *   32 bit export name length followed by empty export name
+         * + 32 bit number of queries followed by no queries
+         * = 8 bytes.
+         */
+        what = "optlen < 8";
+        if (optlen < 8) {
+        opt_meta_invalid_option_len:
+          debug ("newstyle negotiation: %s: invalid option length: %s",
+                 optname, what);
+
+          if (send_newstyle_option_reply (conn, option, NBD_REP_ERR_INVALID)
+              == -1)
+            return -1;
+          continue;
+        }
+
+        /* Discard the export name. */
+        memcpy (&exportnamelen, &data[0], 4);
+        exportnamelen = be32toh (exportnamelen);
+        opt_index = 4 + exportnamelen;
+
+        /* Read the number of queries. */
+        what = "reading number of queries";
+        if (opt_index+4 > optlen)
+          goto opt_meta_invalid_option_len;
+        memcpy (&nr_queries, &data[opt_index], 4);
+        nr_queries = be32toh (nr_queries);
+        opt_index += 4;
+
+        /* for LIST: nr_queries == 0 means return all meta contexts
+         * for SET: nr_queries == 0 means reset all contexts
+         */
+        if (nr_queries == 0) {
+          /* Nothing is supported now. */
+          if (send_newstyle_option_reply (conn, option, NBD_REP_ACK) == -1)
+            return -1;
+        }
+        else {
+          /* Read and answer each query. */
+          while (nr_queries > 0) {
+            what = "reading query string length";
+            if (opt_index+4 > optlen)
+              goto opt_meta_invalid_option_len;
+            memcpy (&querylen, &data[opt_index], 4);
+            querylen = be32toh (querylen);
+            opt_index += 4;
+            what = "reading query string";
+            if (opt_index + querylen > optlen)
+              goto opt_meta_invalid_option_len;
+
+            debug ("newstyle negotiation: %s: %s %.*s",
+                   optname,
+                   option == NBD_OPT_LIST_META_CONTEXT ? "query" : "set",
+                   (int) querylen, &data[opt_index]);
+
+            /* Ignore query - nothing is supported. */
+
+            opt_index += querylen;
+            nr_queries--;
+          }
+          if (send_newstyle_option_reply (conn, option, NBD_REP_ACK) == -1)
+            return -1;
+        }
+      }
+      break;
+
+    default:
+      /* Unknown option. */
+      if (send_newstyle_option_reply (conn, option, NBD_REP_ERR_UNSUP) == -1)
+        return -1;
+      if (conn_recv_full (conn, data, optlen,
+                          "reading unknown option data: conn->recv: %m") == -1)
+        return -1;
+    }
+
+    /* Note, since it's not very clear from the protocol doc, that the
+     * client must send NBD_OPT_EXPORT_NAME or NBD_OPT_GO last, and
+     * that ends option negotiation.
+     */
+    if (option == NBD_OPT_EXPORT_NAME || option == NBD_OPT_GO)
+      break;
+  }
+
+  if (nr_options >= MAX_NR_OPTIONS) {
+    nbdkit_error ("client exceeded maximum number of options (%d)",
+                  MAX_NR_OPTIONS);
+    return -1;
+  }
+
+  /* In --tls=require / FORCEDTLS mode, we must have upgraded to TLS
+   * by the time we finish option negotiation.  If not, give up.
+   */
+  if (tls == 2 && !conn->using_tls) {
+    nbdkit_error ("non-TLS client tried to connect in --tls=require mode");
+    return -1;
+  }
+
+  return 0;
+}
+
+int
+protocol_handshake_newstyle (struct connection *conn)
+{
+  struct new_handshake handshake;
+  uint16_t gflags;
+
+  gflags = NBD_FLAG_FIXED_NEWSTYLE | NBD_FLAG_NO_ZEROES;
+
+  debug ("newstyle negotiation: flags: global 0x%x", gflags);
+
+  memcpy (handshake.nbdmagic, "NBDMAGIC", 8);
+  handshake.version = htobe64 (NEW_VERSION);
+  handshake.gflags = htobe16 (gflags);
+
+  if (conn->send (conn, &handshake, sizeof handshake) == -1) {
+    nbdkit_error ("write: %m");
+    return -1;
+  }
+
+  /* Client now sends us its 32 bit flags word ... */
+  if (conn_recv_full (conn, &conn->cflags, sizeof conn->cflags,
+                      "reading initial client flags: conn->recv: %m") == -1)
+    return -1;
+  conn->cflags = be32toh (conn->cflags);
+  /* ... which we check for accuracy. */
+  debug ("newstyle negotiation: client flags: 0x%x", conn->cflags);
+  if (conn->cflags & ~gflags) {
+    nbdkit_error ("client requested unknown flags 0x%x", conn->cflags);
+    return -1;
+  }
+
+  /* Receive newstyle options. */
+  if (negotiate_handshake_newstyle_options (conn) == -1)
+    return -1;
+
+  return 0;
+}
diff --git a/server/protocol-handshake-oldstyle.c b/server/protocol-handshake-oldstyle.c
new file mode 100644
index 0000000..c5dd256
--- /dev/null
+++ b/server/protocol-handshake-oldstyle.c
@@ -0,0 +1,94 @@
+/* nbdkit
+ * Copyright (C) 2013-2019 Red Hat Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * * Neither the name of Red Hat nor the names of its contributors may be
+ * used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <config.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "internal.h"
+#include "byte-swapping.h"
+#include "protocol.h"
+
+int
+protocol_handshake_oldstyle (struct connection *conn)
+{
+  struct old_handshake handshake;
+  int64_t r;
+  uint64_t exportsize;
+  uint16_t gflags, eflags;
+
+  /* In --tls=require / FORCEDTLS mode, old style handshakes are
+   * rejected because they cannot support TLS.
+   */
+  if (tls == 2) {
+    nbdkit_error ("non-TLS client tried to connect in --tls=require mode");
+    return -1;
+  }
+
+  r = backend->get_size (backend, conn);
+  if (r == -1)
+    return -1;
+  if (r < 0) {
+    nbdkit_error (".get_size function returned invalid value "
+                  "(%" PRIi64 ")", r);
+    return -1;
+  }
+  exportsize = (uint64_t) r;
+  conn->exportsize = exportsize;
+
+  gflags = 0;
+  if (protocol_compute_eflags (conn, &eflags) < 0)
+    return -1;
+
+  debug ("oldstyle negotiation: flags: global 0x%x export 0x%x",
+         gflags, eflags);
+
+  memset (&handshake, 0, sizeof handshake);
+  memcpy (handshake.nbdmagic, "NBDMAGIC", 8);
+  handshake.version = htobe64 (OLD_VERSION);
+  handshake.exportsize = htobe64 (exportsize);
+  handshake.gflags = htobe16 (gflags);
+  handshake.eflags = htobe16 (eflags);
+
+  if (conn->send (conn, &handshake, sizeof handshake) == -1) {
+    nbdkit_error ("write: %m");
+    return -1;
+  }
+
+  return 0;
+}
diff --git a/server/protocol-handshake.c b/server/protocol-handshake.c
new file mode 100644
index 0000000..79a5999
--- /dev/null
+++ b/server/protocol-handshake.c
@@ -0,0 +1,130 @@
+/* nbdkit
+ * Copyright (C) 2013-2019 Red Hat Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * * Neither the name of Red Hat nor the names of its contributors may be
+ * used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <config.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "internal.h"
+#include "byte-swapping.h"
+#include "protocol.h"
+
+/* eflags calculation is the same between oldstyle and newstyle
+ * protocols.
+ */
+int
+protocol_compute_eflags (struct connection *conn, uint16_t *flags)
+{
+  uint16_t eflags = NBD_FLAG_HAS_FLAGS;
+  int fl;
+
+  fl = backend->can_write (backend, conn);
+  if (fl == -1)
+    return -1;
+  if (readonly || !fl) {
+    eflags |= NBD_FLAG_READ_ONLY;
+    conn->readonly = true;
+  }
+  if (!conn->readonly) {
+    fl = backend->can_zero (backend, conn);
+    if (fl == -1)
+      return -1;
+    if (fl) {
+      eflags |= NBD_FLAG_SEND_WRITE_ZEROES;
+      conn->can_zero = true;
+    }
+
+    fl = backend->can_trim (backend, conn);
+    if (fl == -1)
+      return -1;
+    if (fl) {
+      eflags |= NBD_FLAG_SEND_TRIM;
+      conn->can_trim = true;
+    }
+
+    fl = backend->can_fua (backend, conn);
+    if (fl == -1)
+      return -1;
+    if (fl) {
+      eflags |= NBD_FLAG_SEND_FUA;
+      conn->can_fua = true;
+    }
+  }
+
+  fl = backend->can_flush (backend, conn);
+  if (fl == -1)
+    return -1;
+  if (fl) {
+    eflags |= NBD_FLAG_SEND_FLUSH;
+    conn->can_flush = true;
+  }
+
+  fl = backend->is_rotational (backend, conn);
+  if (fl == -1)
+    return -1;
+  if (fl) {
+    eflags |= NBD_FLAG_ROTATIONAL;
+    conn->is_rotational = true;
+  }
+
+  fl = backend->can_multi_conn (backend, conn);
+  if (fl == -1)
+    return -1;
+  if (fl) {
+    eflags |= NBD_FLAG_CAN_MULTI_CONN;
+    conn->can_multi_conn = true;
+  }
+
+  *flags = eflags;
+  return 0;
+}
+
+int
+protocol_handshake (struct connection *conn)
+{
+  int r;
+
+  lock_request (conn);
+  if (!newstyle)
+    r = protocol_handshake_oldstyle (conn);
+  else
+    r = protocol_handshake_newstyle (conn);
+  unlock_request (conn);
+
+  return r;
+}
diff --git a/server/protocol.c b/server/protocol.c
new file mode 100644
index 0000000..f117d42
--- /dev/null
+++ b/server/protocol.c
@@ -0,0 +1,515 @@
+/* nbdkit
+ * Copyright (C) 2013-2019 Red Hat Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * * Neither the name of Red Hat nor the names of its contributors may be
+ * used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <config.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include <assert.h>
+
+#include "internal.h"
+#include "byte-swapping.h"
+#include "protocol.h"
+
+/* Maximum read or write request that we will handle. */
+#define MAX_REQUEST_SIZE (64 * 1024 * 1024)
+
+static bool
+valid_range (struct connection *conn, uint64_t offset, uint32_t count)
+{
+  uint64_t exportsize = conn->exportsize;
+
+  return count > 0 && offset <= exportsize && offset + count <= exportsize;
+}
+
+static bool
+validate_request (struct connection *conn,
+                  uint16_t cmd, uint16_t flags, uint64_t offset, uint32_t count,
+                  uint32_t *error)
+{
+  /* Readonly connection? */
+  if (conn->readonly &&
+      (cmd == NBD_CMD_WRITE || cmd == NBD_CMD_TRIM ||
+       cmd == NBD_CMD_WRITE_ZEROES)) {
+    nbdkit_error ("invalid request: %s: write request on readonly connection",
+                  name_of_nbd_cmd (cmd));
+    *error = EROFS;
+    return false;
+  }
+
+  /* Validate cmd, offset, count. */
+  switch (cmd) {
+  case NBD_CMD_READ:
+  case NBD_CMD_WRITE:
+  case NBD_CMD_TRIM:
+  case NBD_CMD_WRITE_ZEROES:
+    if (!valid_range (conn, offset, count)) {
+      /* XXX Allow writes to extend the disk? */
+      nbdkit_error ("invalid request: %s: offset and count are out of range: "
+                    "offset=%" PRIu64 " count=%" PRIu32,
+                    name_of_nbd_cmd (cmd), offset, count);
+      *error = (cmd == NBD_CMD_WRITE ||
+                cmd == NBD_CMD_WRITE_ZEROES) ? ENOSPC : EINVAL;
+      return false;
+    }
+    break;
+
+  case NBD_CMD_FLUSH:
+    if (offset != 0 || count != 0) {
+      nbdkit_error ("invalid request: %s: expecting offset and count = 0",
+                    name_of_nbd_cmd (cmd));
+      *error = EINVAL;
+      return false;
+    }
+    break;
+
+  default:
+    nbdkit_error ("invalid request: unknown command (%" PRIu32 ") ignored",
+                  cmd);
+    *error = EINVAL;
+    return false;
+  }
+
+  /* Validate flags */
+  if (flags & ~(NBD_CMD_FLAG_FUA | NBD_CMD_FLAG_NO_HOLE)) {
+    nbdkit_error ("invalid request: unknown flag (0x%x)", flags);
+    *error = EINVAL;
+    return false;
+  }
+  if ((flags & NBD_CMD_FLAG_NO_HOLE) &&
+      cmd != NBD_CMD_WRITE_ZEROES) {
+    nbdkit_error ("invalid request: NO_HOLE flag needs WRITE_ZEROES request");
+    *error = EINVAL;
+    return false;
+  }
+  if (!conn->can_fua && (flags & NBD_CMD_FLAG_FUA)) {
+    nbdkit_error ("invalid request: FUA flag not supported");
+    *error = EINVAL;
+    return false;
+  }
+
+  /* Refuse over-large read and write requests. */
+  if ((cmd == NBD_CMD_WRITE || cmd == NBD_CMD_READ) &&
+      count > MAX_REQUEST_SIZE) {
+    nbdkit_error ("invalid request: %s: data request is too large (%" PRIu32
+                  " > %d)",
+                  name_of_nbd_cmd (cmd), count, MAX_REQUEST_SIZE);
+    *error = ENOMEM;
+    return false;
+  }
+
+  /* Flush allowed? */
+  if (!conn->can_flush && cmd == NBD_CMD_FLUSH) {
+    nbdkit_error ("invalid request: %s: flush operation not supported",
+                  name_of_nbd_cmd (cmd));
+    *error = EINVAL;
+    return false;
+  }
+
+  /* Trim allowed? */
+  if (!conn->can_trim && cmd == NBD_CMD_TRIM) {
+    nbdkit_error ("invalid request: %s: trim operation not supported",
+                  name_of_nbd_cmd (cmd));
+    *error = EINVAL;
+    return false;
+  }
+
+  /* Zero allowed? */
+  if (!conn->can_zero && cmd == NBD_CMD_WRITE_ZEROES) {
+    nbdkit_error ("invalid request: %s: write zeroes operation not supported",
+                  name_of_nbd_cmd (cmd));
+    *error = EINVAL;
+    return false;
+  }
+
+  return true;                     /* Command validates. */
+}
+
+/* This is called with the request lock held to actually execute the
+ * request (by calling the plugin).  Note that the request fields have
+ * been validated already in 'validate_request' so we don't have to
+ * check them again.  'buf' is either the data to be written or the
+ * data to be returned, and points to a buffer of size 'count' bytes.
+ *
+ * In all cases, the return value is the system errno value that will
+ * later be converted to the nbd error to send back to the client (0
+ * for success).
+ */
+static uint32_t
+handle_request (struct connection *conn,
+                uint16_t cmd, uint16_t flags, uint64_t offset, uint32_t count,
+                void *buf)
+{
+  uint32_t f = 0;
+  bool fua = conn->can_fua && (flags & NBD_CMD_FLAG_FUA);
+  int err = 0;
+
+  /* Clear the error, so that we know if the plugin calls
+   * nbdkit_set_error() or relied on errno.  */
+  threadlocal_set_error (0);
+
+  switch (cmd) {
+  case NBD_CMD_READ:
+    if (backend->pread (backend, conn, buf, count, offset, 0, &err) == -1)
+      return err;
+    break;
+
+  case NBD_CMD_WRITE:
+    if (fua)
+      f |= NBDKIT_FLAG_FUA;
+    if (backend->pwrite (backend, conn, buf, count, offset, f, &err) == -1)
+      return err;
+    break;
+
+  case NBD_CMD_FLUSH:
+    if (backend->flush (backend, conn, 0, &err) == -1)
+      return err;
+    break;
+
+  case NBD_CMD_TRIM:
+    if (fua)
+      f |= NBDKIT_FLAG_FUA;
+    if (backend->trim (backend, conn, count, offset, f, &err) == -1)
+      return err;
+    break;
+
+  case NBD_CMD_WRITE_ZEROES:
+    if (!(flags & NBD_CMD_FLAG_NO_HOLE))
+      f |= NBDKIT_FLAG_MAY_TRIM;
+    if (fua)
+      f |= NBDKIT_FLAG_FUA;
+    if (backend->zero (backend, conn, count, offset, f, &err) == -1)
+      return err;
+    break;
+
+  default:
+    abort ();
+  }
+
+  return 0;
+}
+
+static int
+skip_over_write_buffer (int sock, size_t count)
+{
+  char buf[BUFSIZ];
+  ssize_t r;
+
+  if (count > MAX_REQUEST_SIZE * 2) {
+    nbdkit_error ("write request too large to skip");
+    return -1;
+  }
+
+  while (count > 0) {
+    r = read (sock, buf, count > BUFSIZ ? BUFSIZ : count);
+    if (r == -1) {
+      nbdkit_error ("skipping write buffer: %m");
+      return -1;
+    }
+    if (r == 0)  {
+      nbdkit_error ("unexpected early EOF");
+      errno = EBADMSG;
+      return -1;
+    }
+    count -= r;
+  }
+  return 0;
+}
+
+/* Convert a system errno to an NBD_E* error code. */
+static int
+nbd_errno (int error)
+{
+  switch (error) {
+  case 0:
+    return NBD_SUCCESS;
+  case EROFS:
+  case EPERM:
+    return NBD_EPERM;
+  case EIO:
+    return NBD_EIO;
+  case ENOMEM:
+    return NBD_ENOMEM;
+#ifdef EDQUOT
+  case EDQUOT:
+#endif
+  case EFBIG:
+  case ENOSPC:
+    return NBD_ENOSPC;
+#ifdef ESHUTDOWN
+  case ESHUTDOWN:
+    return NBD_ESHUTDOWN;
+#endif
+  case EINVAL:
+  default:
+    return NBD_EINVAL;
+  }
+}
+
+static int
+send_simple_reply (struct connection *conn,
+                   uint64_t handle, uint16_t cmd,
+                   const char *buf, uint32_t count,
+                   uint32_t error)
+{
+  ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&conn->write_lock);
+  struct simple_reply reply;
+  int r;
+
+  reply.magic = htobe32 (NBD_SIMPLE_REPLY_MAGIC);
+  reply.handle = handle;
+  reply.error = htobe32 (nbd_errno (error));
+
+  r = conn->send (conn, &reply, sizeof reply);
+  if (r == -1) {
+    nbdkit_error ("write reply: %s: %m", name_of_nbd_cmd (cmd));
+    return connection_set_status (conn, -1);
+  }
+
+  /* Send the read data buffer. */
+  if (cmd == NBD_CMD_READ && !error) {
+    r = conn->send (conn, buf, count);
+    if (r == -1) {
+      nbdkit_error ("write data: %s: %m", name_of_nbd_cmd (cmd));
+      return connection_set_status (conn, -1);
+    }
+  }
+
+  return 1;                     /* command processed ok */
+}
+
+static int
+send_structured_reply_read (struct connection *conn,
+                            uint64_t handle, uint16_t cmd,
+                            const char *buf, uint32_t count, uint64_t offset)
+{
+  /* Once we are really using structured replies and sending data back
+   * in chunks, we'll be able to grab the write lock for each chunk,
+   * allowing other threads to interleave replies.  As we're not doing
+   * that yet we acquire the lock for the whole function.
+   */
+  ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&conn->write_lock);
+  struct structured_reply reply;
+  struct structured_reply_offset_data offset_data;
+  int r;
+
+  assert (cmd == NBD_CMD_READ);
+
+  reply.magic = htobe32 (NBD_STRUCTURED_REPLY_MAGIC);
+  reply.handle = handle;
+  reply.flags = htobe16 (NBD_REPLY_FLAG_DONE);
+  reply.type = htobe16 (NBD_REPLY_TYPE_OFFSET_DATA);
+  reply.length = htobe32 (count + sizeof offset_data);
+
+  r = conn->send (conn, &reply, sizeof reply);
+  if (r == -1) {
+    nbdkit_error ("write reply: %s: %m", name_of_nbd_cmd (cmd));
+    return connection_set_status (conn, -1);
+  }
+
+  /* Send the offset + read data buffer. */
+  offset_data.offset = htobe64 (offset);
+  r = conn->send (conn, &offset_data, sizeof offset_data);
+  if (r == -1) {
+    nbdkit_error ("write data: %s: %m", name_of_nbd_cmd (cmd));
+    return connection_set_status (conn, -1);
+  }
+
+  r = conn->send (conn, buf, count);
+  if (r == -1) {
+    nbdkit_error ("write data: %s: %m", name_of_nbd_cmd (cmd));
+    return connection_set_status (conn, -1);
+  }
+
+  return 1;                     /* command processed ok */
+}
+
+static int
+send_structured_reply_error (struct connection *conn,
+                             uint64_t handle, uint16_t cmd, uint32_t error)
+{
+  ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&conn->write_lock);
+  struct structured_reply reply;
+  struct structured_reply_error error_data;
+  int r;
+
+  reply.magic = htobe32 (NBD_STRUCTURED_REPLY_MAGIC);
+  reply.handle = handle;
+  reply.flags = htobe16 (NBD_REPLY_FLAG_DONE);
+  reply.type = htobe16 (NBD_REPLY_TYPE_ERROR);
+  reply.length = htobe32 (0 /* no human readable error */ + sizeof error_data);
+
+  r = conn->send (conn, &reply, sizeof reply);
+  if (r == -1) {
+    nbdkit_error ("write error reply: %m");
+    return connection_set_status (conn, -1);
+  }
+
+  /* Send the error. */
+  error_data.error = htobe32 (error);
+  error_data.len = htobe16 (0);
+  r = conn->send (conn, &error_data, sizeof error_data);
+  if (r == -1) {
+    nbdkit_error ("write data: %s: %m", name_of_nbd_cmd (cmd));
+    return connection_set_status (conn, -1);
+  }
+  /* No human readable error message at the moment. */
+
+  return 1;                     /* command processed ok */
+}
+
+int
+protocol_recv_request_send_reply (struct connection *conn)
+{
+  int r;
+  struct request request;
+  uint16_t cmd, flags;
+  uint32_t magic, count, error = 0;
+  uint64_t offset;
+  CLEANUP_FREE char *buf = NULL;
+
+  /* Read the request packet. */
+  {
+    ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&conn->read_lock);
+    r = connection_get_status (conn);
+    if (r <= 0)
+      return r;
+    r = conn->recv (conn, &request, sizeof request);
+    if (r == -1) {
+      nbdkit_error ("read request: %m");
+      return connection_set_status (conn, -1);
+    }
+    if (r == 0) {
+      debug ("client closed input socket, closing connection");
+      return connection_set_status (conn, 0); /* disconnect */
+    }
+
+    magic = be32toh (request.magic);
+    if (magic != NBD_REQUEST_MAGIC) {
+      nbdkit_error ("invalid request: 'magic' field is incorrect (0x%x)",
+                    magic);
+      return connection_set_status (conn, -1);
+    }
+
+    flags = be16toh (request.flags);
+    cmd = be16toh (request.type);
+
+    offset = be64toh (request.offset);
+    count = be32toh (request.count);
+
+    if (cmd == NBD_CMD_DISC) {
+      debug ("client sent %s, closing connection", name_of_nbd_cmd (cmd));
+      return connection_set_status (conn, 0); /* disconnect */
+    }
+
+    /* Validate the request. */
+    if (!validate_request (conn, cmd, flags, offset, count, &error)) {
+      if (cmd == NBD_CMD_WRITE &&
+          skip_over_write_buffer (conn->sockin, count) < 0)
+        return connection_set_status (conn, -1);
+      goto send_reply;
+    }
+
+    /* Allocate the data buffer used for either read or write requests. */
+    if (cmd == NBD_CMD_READ || cmd == NBD_CMD_WRITE) {
+      buf = malloc (count);
+      if (buf == NULL) {
+        perror ("malloc");
+        error = ENOMEM;
+        if (cmd == NBD_CMD_WRITE &&
+            skip_over_write_buffer (conn->sockin, count) < 0)
+          return connection_set_status (conn, -1);
+        goto send_reply;
+      }
+    }
+
+    /* Receive the write data buffer. */
+    if (cmd == NBD_CMD_WRITE) {
+      r = conn->recv (conn, buf, count);
+      if (r == 0) {
+        errno = EBADMSG;
+        r = -1;
+      }
+      if (r == -1) {
+        nbdkit_error ("read data: %s: %m", name_of_nbd_cmd (cmd));
+        return connection_set_status (conn, -1);
+      }
+    }
+  }
+
+  /* Perform the request.  Only this part happens inside the request lock. */
+  if (quit || !connection_get_status (conn)) {
+    error = ESHUTDOWN;
+  }
+  else {
+    lock_request (conn);
+    error = handle_request (conn, cmd, flags, offset, count, buf);
+    assert ((int) error >= 0);
+    unlock_request (conn);
+  }
+
+  /* Send the reply packet. */
+ send_reply:
+  if (connection_get_status (conn) < 0)
+    return -1;
+
+  if (error != 0) {
+    /* Since we're about to send only the limited NBD_E* errno to the
+     * client, don't lose the information about what really happened
+     * on the server side.  Make sure there is a way for the operator
+     * to retrieve the real error.
+     */
+    debug ("sending error reply: %s", strerror (error));
+  }
+
+  /* Currently we prefer to send simple replies for everything except
+   * where we have to (ie. NBD_CMD_READ when structured_replies have
+   * been negotiated).  However this prevents us from sending
+   * human-readable error messages to the client, so we should
+   * reconsider this in future.
+   */
+  if (conn->structured_replies && cmd == NBD_CMD_READ) {
+    if (!error)
+      return send_structured_reply_read (conn, request.handle, cmd,
+                                         buf, count, offset);
+    else
+      return send_structured_reply_error (conn, request.handle, cmd, error);
+  }
+  else
+    return send_simple_reply (conn, request.handle, cmd, buf, count, error);
+}
diff --git a/server/Makefile.am b/server/Makefile.am
index b198afb..5eb575e 100644
--- a/server/Makefile.am
+++ b/server/Makefile.am
@@ -52,6 +52,10 @@ nbdkit_SOURCES = \
 	main.c \
 	options.h \
 	plugins.c \
+	protocol.c \
+	protocol-handshake.c \
+	protocol-handshake-oldstyle.c \
+	protocol-handshake-newstyle.c \
 	protocol.h \
 	protostrings.c \
 	quit.c \
-- 
2.20.1




More information about the Libguestfs mailing list