[Libguestfs] [libnbd PATCH 1/2] api: Add LIBNBD_STRICT_PAYLOAD to honor qemu 32M write limit

Eric Blake eblake at redhat.com
Wed Nov 9 23:26:06 UTC 2022


Modern qemu tends to advertise a strict 32M payload limit.  Yet we
default to allowing the user to attempt up to 64M in pwrite.  For
pread, qemu will reply with EINVAL but keep the connection up; but for
pwrite, an overlarge buffer is fatal.  It's time to teach libnbd to
honor qemu's max buffer by default, while still allowing the user to
disable the strict mode setting if they want to try 64M anyways.

This also involves advertising a new LIBNBD_SIZE_PAYLOAD via
nbd_get_block_size, which is more reliable than asking the user to
manually obey LIBNBD_SIZE_MAXIMUM which may not be set or may be
larger than 64M.
---
 lib/internal.h                              |  4 +++
 generator/API.ml                            | 39 ++++++++++++++++-----
 generator/states-newstyle-opt-export-name.c |  1 +
 generator/states-newstyle-opt-go.c          |  1 +
 generator/states-oldstyle.c                 |  1 +
 lib/flags.c                                 | 25 +++++++++++++
 lib/rw.c                                    |  8 ++++-
 7 files changed, 70 insertions(+), 9 deletions(-)

diff --git a/lib/internal.h b/lib/internal.h
index 22f500b4..bbbd2639 100644
--- a/lib/internal.h
+++ b/lib/internal.h
@@ -146,6 +146,8 @@ struct nbd_handle {
   uint32_t block_minimum;
   uint32_t block_preferred;
   uint32_t block_maximum;
+  /* Payload cap, always non-zero, based on block_maximum when feasible */
+  uint32_t payload_maximum;

   /* Server canonical name and description, or NULL if not advertised. */
   char *canonical_name;
@@ -448,6 +450,8 @@ extern int nbd_internal_set_size_and_flags (struct nbd_handle *h,
 extern int nbd_internal_set_block_size (struct nbd_handle *h, uint32_t min,
                                         uint32_t pref, uint32_t max)
   LIBNBD_ATTRIBUTE_NONNULL(1);
+extern void nbd_internal_set_payload (struct nbd_handle *h)
+  LIBNBD_ATTRIBUTE_NONNULL(1);

 /* is-state.c */
 extern bool nbd_internal_is_state_created (enum state state);
diff --git a/generator/API.ml b/generator/API.ml
index 8d0d9d15..0ebc9298 100644
--- a/generator/API.ml
+++ b/generator/API.ml
@@ -184,6 +184,7 @@ let block_size_enum =
     "MINIMUM",   0;
     "PREFERRED", 1;
     "MAXIMUM",   2;
+    "PAYLOAD",   3;
   ]
 }
 let all_enums = [ tls_enum; block_size_enum ]
@@ -221,6 +222,7 @@ let strict_flags =
     "BOUNDS",         1 lsl 2;
     "ZERO_SIZE",      1 lsl 3;
     "ALIGN",          1 lsl 4;
+    "PAYLOAD",        1 lsl 5;
   ]
 }
 let allow_transport_flags = {
@@ -1060,10 +1062,21 @@   "set_strict_mode", {
 =item C<LIBNBD_STRICT_ALIGN> = 5

 If set, and the server provided minimum block sizes (see
-L<nbd_get_block_size(3)>, this flag rejects client requests that
-do not have length and offset aligned to the server's minimum
-requirements.  If clear, unaligned requests are sent to the server,
-where it is up to the server whether to honor or reject the request.
+C<LIBNBD_SIZE_MINIMUM> for L<nbd_get_block_size(3)>), this
+flag rejects client requests that do not have length and offset
+aligned to the server's minimum requirements.  If clear,
+unaligned requests are sent to the server, where it is up to
+the server whether to honor or reject the request.
+
+=item C<LIBNBD_STRICT_PAYLOAD> = 6
+
+If set, the client refuses to send a command to the server
+with more than libnbd's outgoing payload maximum (see
+C<LIBNBD_SIZE_PAYLOAD> for L<nbd_get_block_size(3)>), whether
+or not the server advertised a block size maximum.  If clear,
+oversize requests up to 64MiB may be attempted, although
+requests larger than 32MiB are liable to cause some servers to
+disconnect.

 =back

@@ -2286,6 +2299,15 @@   "get_block_size", {
 itself imposes a maximum of 64M.  If zero, some NBD servers will
 abruptly disconnect if a transaction involves more than 32M.

+=item C<LIBNBD_SIZE_PAYLOAD> = 3
+
+This value is not advertised by the server, but rather represents
+the maximum outgoing payload size for a given connection that
+libnbd will enforce unless C<LIBNBD_STRICT_PAYLOAD> is cleared
+in C<nbd_set_strict_mode(3)>.  It is always non-zero: never
+smaller than 1M, never larger than 64M, and matches
+C<LIBNBD_SIZE_MAXIMUM> when possible.
+
 =back

 Future NBD extensions may result in additional C<size_type> values.
@@ -2449,10 +2471,11 @@   "pwrite", {
 acknowledged by the server, or there is an error.  Note this will
 generally return an error if L<nbd_is_read_only(3)> is true.

-Note that libnbd currently enforces a maximum write buffer of 64MiB,
-even if the server would permit a larger buffer in a single transaction;
-attempts to exceed this will result in an C<ERANGE> error.  The server
-may enforce a smaller limit, which can be learned with
+Note that libnbd defaults to enforcing a maximum write buffer
+of the lesser of 64MiB or any maximum payload size advertised
+by the server; attempts to exceed this will generally result in
+a client-side C<ERANGE> error, rather than a server-side
+disconnection.  The actual limit can be learned with
 L<nbd_get_block_size(3)>.

 The C<flags> parameter may be C<0> for no flags, or may contain
diff --git a/generator/states-newstyle-opt-export-name.c b/generator/states-newstyle-opt-export-name.c
index 398aa680..88296297 100644
--- a/generator/states-newstyle-opt-export-name.c
+++ b/generator/states-newstyle-opt-export-name.c
@@ -70,6 +70,7 @@  NEWSTYLE.OPT_EXPORT_NAME.CHECK_REPLY:
     SET_NEXT_STATE (%.DEAD);
     return 0;
   }
+  nbd_internal_set_payload (h);
   SET_NEXT_STATE (%^FINISHED);
   CALL_CALLBACK (h->opt_cb.completion, &err);
   nbd_internal_free_option (h);
diff --git a/generator/states-newstyle-opt-go.c b/generator/states-newstyle-opt-go.c
index f2d9f846..ac9613d4 100644
--- a/generator/states-newstyle-opt-go.c
+++ b/generator/states-newstyle-opt-go.c
@@ -276,6 +276,7 @@  NEWSTYLE.OPT_GO.CHECK_REPLY:
     err = nbd_get_errno () ? : ENOTSUP;
     break;
   case NBD_REP_ACK:
+    nbd_internal_set_payload (h);
     err = 0;
     break;
   }
diff --git a/generator/states-oldstyle.c b/generator/states-oldstyle.c
index 7f668936..1eb5e940 100644
--- a/generator/states-oldstyle.c
+++ b/generator/states-oldstyle.c
@@ -68,6 +68,7 @@  OLDSTYLE.CHECK:
     SET_NEXT_STATE (%.DEAD);
     return 0;
   }
+  nbd_internal_set_payload (h);

   h->protocol = "oldstyle";

diff --git a/lib/flags.c b/lib/flags.c
index 5def16e8..eaf4ff85 100644
--- a/lib/flags.c
+++ b/lib/flags.c
@@ -26,6 +26,7 @@
 #include <errno.h>

 #include "internal.h"
+#include "minmax.h"

 /* Reset connection data.  Called after swapping export name, after
  * failed OPT_GO/OPT_INFO, or after successful OPT_STARTTLS.
@@ -38,6 +39,7 @@ nbd_internal_reset_size_and_flags (struct nbd_handle *h)
   h->block_minimum = 0;
   h->block_preferred = 0;
   h->block_maximum = 0;
+  h->payload_maximum = 0;
   free (h->canonical_name);
   h->canonical_name = NULL;
   free (h->description);
@@ -118,6 +120,27 @@ nbd_internal_set_block_size (struct nbd_handle *h, uint32_t min,
   return 0; /* Use return -1 if we want to reject such servers */
 }

+/* Set the payload size constraint on the handle.
+ * This is called from the state machine after all other information
+ * about an export is available, regardless of oldstyle or newstyle.
+ */
+void
+nbd_internal_set_payload (struct nbd_handle *h)
+{
+  /* The NBD protocol says we should not assume the server will allow
+   * more than 32M payload if it did not advertise anything.  If it
+   * advertised more than 64M, we still cap at the maximum buffer size
+   * we are willing to allocate as a client; if it advertised smaller
+   * than 1M (presumably because the export itself was small), the NBD
+   * protocol says we can still send payloads that large.
+   */
+  if (h->block_maximum)
+    h->payload_maximum = MIN (MAX_REQUEST_SIZE,
+                              MAX (1024 * 1024, h->block_maximum));
+  else
+    h->payload_maximum = 32 * 1024 * 1024;
+}
+
 static int
 get_flag (struct nbd_handle *h, uint16_t flag)
 {
@@ -237,6 +260,8 @@ nbd_unlocked_get_block_size (struct nbd_handle *h, int type)
     return h->block_preferred;
   case LIBNBD_SIZE_MAXIMUM:
     return h->block_maximum;
+  case LIBNBD_SIZE_PAYLOAD:
+    return h->payload_maximum;
   }
   return 0;
 }
diff --git a/lib/rw.c b/lib/rw.c
index 2ab85f3d..6120edd0 100644
--- a/lib/rw.c
+++ b/lib/rw.c
@@ -207,8 +207,14 @@ nbd_internal_command_common (struct nbd_handle *h,

   switch (type) {
     /* Commands which send or receive data are limited to MAX_REQUEST_SIZE. */
-  case NBD_CMD_READ:
   case NBD_CMD_WRITE:
+    if (h->strict & LIBNBD_STRICT_PAYLOAD && count > h->payload_maximum) {
+      set_error (ERANGE, "request too large: maximum payload size is %d",
+                 h->payload_maximum);
+      goto err;
+    }
+    /* fallthrough */
+  case NBD_CMD_READ:
     if (count > MAX_REQUEST_SIZE) {
       set_error (ERANGE, "request too large: maximum request size is %d",
                  MAX_REQUEST_SIZE);
-- 
2.38.1



More information about the Libguestfs mailing list