[Libguestfs] [nbdkit PATCH v2 2/2] server: Group related transmission send()s

Eric Blake eblake at redhat.com
Fri Jun 7 14:15:37 UTC 2019


We disabled Nagle's algorithm to allow less latency in our responses
reaching the client; but as a side effect, it leads to more network
overhead when we send a reply split across more than one write().
Take advantage of various means for grouping related writes (Linux'
MSG_MORE for sockets, gnutls' corking for TLS) to send a larger
packet, and adjust callers to pass in our internal SEND_MORE flag as a
hint when to use the improvements.  I tested with appropriate
combinations from:

$ nbdkit {-p 10810,-U -} \
  {--tls=require --tls-verify-peer --tls-psk=./keys.psk,} memory size=64m \
  --run './aio-parallel-load{-tls,} {$unixsocket,nbd://localhost:10810}'

with the following IOPS measurements averaged over multiple runs:

               pre       post      gain
unix plain:    802627.8  822944.1  2.53%
unix tls:      121871.6  125538.0  3.01%
tcp plain:     656400.1  685795.0  4.48%
tcp tls:       114552.1  120197.3  4.93%

which looks like an overall improvement, even though it is still close
to the margins for being in the noise.

Signed-off-by: Eric Blake <eblake at redhat.com>
---
 server/internal.h                    |  4 ++++
 server/connections.c                 | 10 ++++++++--
 server/crypto.c                      |  7 +++++++
 server/protocol-handshake-newstyle.c | 10 +++++-----
 server/protocol.c                    | 16 +++++++++-------
 5 files changed, 33 insertions(+), 14 deletions(-)

diff --git a/server/internal.h b/server/internal.h
index 50525f3..2281937 100644
--- a/server/internal.h
+++ b/server/internal.h
@@ -139,6 +139,10 @@ extern void change_user (void);
 /* connections.c */
 struct connection;

+enum {
+  SEND_MORE = 1, /* Hint to use MSG_MORE/corking to group send()s */
+};
+
 typedef int (*connection_recv_function) (struct connection *,
                                          void *buf, size_t len)
   __attribute__((__nonnull__ (1, 2)));
diff --git a/server/connections.c b/server/connections.c
index 1a749b6..f56590c 100644
--- a/server/connections.c
+++ b/server/connections.c
@@ -330,7 +330,8 @@ free_connection (struct connection *conn)
 }

 /* Write buffer to conn->sockout with send() and either succeed completely
- * (returns 0) or fail (returns -1). flags is ignored for now.
+ * (returns 0) or fail (returns -1). flags may include SEND_MORE as a hint
+ * that this send will be followed by related data.
  */
 static int
 raw_send_socket (struct connection *conn, const void *vbuf, size_t len,
@@ -339,9 +340,14 @@ raw_send_socket (struct connection *conn, const void *vbuf, size_t len,
   int sock = conn->sockout;
   const char *buf = vbuf;
   ssize_t r;
+  int f = 0;

+#ifdef MSG_MORE
+  if (flags & SEND_MORE)
+    f |= MSG_MORE;
+#endif
   while (len > 0) {
-    r = send (sock, buf, len, 0);
+    r = send (sock, buf, len, f);
     if (r == -1) {
       if (errno == EINTR || errno == EAGAIN)
         continue;
diff --git a/server/crypto.c b/server/crypto.c
index 3f87944..3f3d96b 100644
--- a/server/crypto.c
+++ b/server/crypto.c
@@ -357,6 +357,9 @@ crypto_send (struct connection *conn, const void *vbuf, size_t len, int flags)

   assert (session != NULL);

+  if (flags & SEND_MORE)
+    gnutls_record_cork (session);
+
   while (len > 0) {
     r = gnutls_record_send (session, buf, len);
     if (r < 0) {
@@ -368,6 +371,10 @@ crypto_send (struct connection *conn, const void *vbuf, size_t len, int flags)
     len -= r;
   }

+  if (!(flags & SEND_MORE) &&
+      gnutls_record_uncork (session, GNUTLS_RECORD_WAIT) < 0)
+    return -1;
+
   return 0;
 }

diff --git a/server/protocol-handshake-newstyle.c b/server/protocol-handshake-newstyle.c
index 6993c8e..e0136de 100644
--- a/server/protocol-handshake-newstyle.c
+++ b/server/protocol-handshake-newstyle.c
@@ -94,13 +94,13 @@ send_newstyle_option_reply_exportname (struct connection *conn,

   if (conn->send (conn,
                   &fixed_new_option_reply,
-                  sizeof fixed_new_option_reply, 0) == -1) {
+                  sizeof fixed_new_option_reply, SEND_MORE) == -1) {
     nbdkit_error ("write: %s: %m", name_of_nbd_opt (option));
     return -1;
   }

   len = htobe32 (name_len);
-  if (conn->send (conn, &len, sizeof len, 0) == -1) {
+  if (conn->send (conn, &len, sizeof len, SEND_MORE) == -1) {
     nbdkit_error ("write: %s: %s: %m",
                   name_of_nbd_opt (option), "sending length");
     return -1;
@@ -132,7 +132,7 @@ send_newstyle_option_reply_info_export (struct connection *conn,

   if (conn->send (conn,
                   &fixed_new_option_reply,
-                  sizeof fixed_new_option_reply, 0) == -1 ||
+                  sizeof fixed_new_option_reply, SEND_MORE) == -1 ||
       conn->send (conn, &export, sizeof export, 0) == -1) {
     nbdkit_error ("write: %s: %m", name_of_nbd_opt (option));
     return -1;
@@ -161,8 +161,8 @@ send_newstyle_option_reply_meta_context (struct connection *conn,

   if (conn->send (conn,
                   &fixed_new_option_reply,
-                  sizeof fixed_new_option_reply, 0) == -1 ||
-      conn->send (conn, &context, sizeof context, 0) == -1 ||
+                  sizeof fixed_new_option_reply, SEND_MORE) == -1 ||
+      conn->send (conn, &context, sizeof context, SEND_MORE) == -1 ||
       conn->send (conn, name, namelen, 0) == -1) {
     nbdkit_error ("write: %s: %m", name_of_nbd_opt (option));
     return -1;
diff --git a/server/protocol.c b/server/protocol.c
index 0e054ee..5967622 100644
--- a/server/protocol.c
+++ b/server/protocol.c
@@ -393,12 +393,13 @@ send_simple_reply (struct connection *conn,
   ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&conn->write_lock);
   struct simple_reply reply;
   int r;
+  int f = (cmd == NBD_CMD_READ && !error) ? SEND_MORE : 0;

   reply.magic = htobe32 (NBD_SIMPLE_REPLY_MAGIC);
   reply.handle = handle;
   reply.error = htobe32 (nbd_errno (error, false));

-  r = conn->send (conn, &reply, sizeof reply, 0);
+  r = conn->send (conn, &reply, sizeof reply, f);
   if (r == -1) {
     nbdkit_error ("write reply: %s: %m", name_of_nbd_cmd (cmd));
     return connection_set_status (conn, -1);
@@ -439,7 +440,7 @@ send_structured_reply_read (struct connection *conn,
   reply.type = htobe16 (NBD_REPLY_TYPE_OFFSET_DATA);
   reply.length = htobe32 (count + sizeof offset_data);

-  r = conn->send (conn, &reply, sizeof reply, 0);
+  r = conn->send (conn, &reply, sizeof reply, SEND_MORE);
   if (r == -1) {
     nbdkit_error ("write reply: %s: %m", name_of_nbd_cmd (cmd));
     return connection_set_status (conn, -1);
@@ -447,7 +448,7 @@ send_structured_reply_read (struct connection *conn,

   /* Send the offset + read data buffer. */
   offset_data.offset = htobe64 (offset);
-  r = conn->send (conn, &offset_data, sizeof offset_data, 0);
+  r = conn->send (conn, &offset_data, sizeof offset_data, SEND_MORE);
   if (r == -1) {
     nbdkit_error ("write data: %s: %m", name_of_nbd_cmd (cmd));
     return connection_set_status (conn, -1);
@@ -573,7 +574,7 @@ send_structured_reply_block_status (struct connection *conn,
   reply.length = htobe32 (sizeof context_id +
                           nr_blocks * sizeof (struct block_descriptor));

-  r = conn->send (conn, &reply, sizeof reply, 0);
+  r = conn->send (conn, &reply, sizeof reply, SEND_MORE);
   if (r == -1) {
     nbdkit_error ("write reply: %s: %m", name_of_nbd_cmd (cmd));
     return connection_set_status (conn, -1);
@@ -581,7 +582,7 @@ send_structured_reply_block_status (struct connection *conn,

   /* Send the base:allocation context ID. */
   context_id = htobe32 (base_allocation_id);
-  r = conn->send (conn, &context_id, sizeof context_id, 0);
+  r = conn->send (conn, &context_id, sizeof context_id, SEND_MORE);
   if (r == -1) {
     nbdkit_error ("write reply: %s: %m", name_of_nbd_cmd (cmd));
     return connection_set_status (conn, -1);
@@ -589,7 +590,8 @@ send_structured_reply_block_status (struct connection *conn,

   /* Send each block descriptor. */
   for (i = 0; i < nr_blocks; ++i) {
-    r = conn->send (conn, &blocks[i], sizeof blocks[i], 0);
+    r = conn->send (conn, &blocks[i], sizeof blocks[i],
+                    i == nr_blocks - 1 ? 0 : SEND_MORE);
     if (r == -1) {
       nbdkit_error ("write reply: %s: %m", name_of_nbd_cmd (cmd));
       return connection_set_status (conn, -1);
@@ -615,7 +617,7 @@ send_structured_reply_error (struct connection *conn,
   reply.type = htobe16 (NBD_REPLY_TYPE_ERROR);
   reply.length = htobe32 (0 /* no human readable error */ + sizeof error_data);

-  r = conn->send (conn, &reply, sizeof reply, 0);
+  r = conn->send (conn, &reply, sizeof reply, SEND_MORE);
   if (r == -1) {
     nbdkit_error ("write error reply: %m");
     return connection_set_status (conn, -1);
-- 
2.20.1




More information about the Libguestfs mailing list