[Libguestfs] [PATCH v3 08/14] nbd/server: Support 64-bit block status

Vladimir Sementsov-Ogievskiy vsementsov at yandex-team.ru
Wed May 31 14:10:37 UTC 2023


On 15.05.23 22:53, Eric Blake wrote:
> The NBD spec states that if the client negotiates extended headers,
> the server must avoid NBD_REPLY_TYPE_BLOCK_STATUS and instead use
> NBD_REPLY_TYPE_BLOCK_STATUS_EXT which supports 64-bit lengths, even if
> the reply does not need more than 32 bits.  As of this patch,
> client->header_style is still never NBD_HEADER_EXTENDED, so the code
> added here does not take effect until the next patch enables
> negotiation.
> 
> For now, all metacontexts that we know how to export never populate
> more than 32 bits of information, so we don't have to worry about
> NBD_REP_ERR_EXT_HEADER_REQD or filtering during handshake, and we
> always send all zeroes for the upper 32 bits of status during
> NBD_CMD_BLOCK_STATUS.
> 
> Note that we previously had some interesting size-juggling on call
> chains, such as:
> 
> nbd_co_send_block_status(uint32_t length)
> -> blockstatus_to_extents(uint32_t bytes)
>    -> bdrv_block_status_above(bytes, &uint64_t num)
>    -> nbd_extent_array_add(uint64_t num)
>      -> store num in 32-bit length
> 
> But we were lucky that it never overflowed: bdrv_block_status_above
> never sets num larger than bytes, and we had previously been capping
> 'bytes' at 32 bits (since the protocol does not allow sending a larger
> request without extended headers).  This patch adds some assertions
> that ensure we continue to avoid overflowing 32 bits for a narrow
> client, while fully utilizing 64-bits all the way through when the
> client understands that.
> 
> Signed-off-by: Eric Blake <eblake at redhat.com>
> ---
>   nbd/server.c | 86 +++++++++++++++++++++++++++++++++++++---------------
>   1 file changed, 62 insertions(+), 24 deletions(-)
> 
> diff --git a/nbd/server.c b/nbd/server.c
> index ffab51efd26..b4c15ae1a14 100644
> --- a/nbd/server.c
> +++ b/nbd/server.c
> @@ -2073,7 +2073,15 @@ static int coroutine_fn nbd_co_send_sparse_read(NBDClient *client,
>   }
> 
>   typedef struct NBDExtentArray {
> -    NBDExtent *extents;
> +    NBDHeaderStyle style;           /* 32- or 64-bit extent descriptions */
> +    union {
> +        NBDStructuredMeta id;       /* style == NBD_HEADER_STRUCTURED */
> +        NBDStructuredMetaExt meta;  /* style == NBD_HEADER_EXTENDED */
> +    };
> +    union {
> +        NBDExtent *narrow;          /* style == NBD_HEADER_STRUCTURED */
> +        NBDExtentExt *extents;      /* style == NBD_HEADER_EXTENDED */
> +    };
>       unsigned int nb_alloc;
>       unsigned int count;
>       uint64_t total_length;
> @@ -2081,12 +2089,15 @@ typedef struct NBDExtentArray {
>       bool converted_to_be;
>   } NBDExtentArray;
> 
> -static NBDExtentArray *nbd_extent_array_new(unsigned int nb_alloc)
> +static NBDExtentArray *nbd_extent_array_new(unsigned int nb_alloc,
> +                                            NBDHeaderStyle style)
>   {
>       NBDExtentArray *ea = g_new0(NBDExtentArray, 1);
> 
> +    assert(style >= NBD_HEADER_STRUCTURED);
>       ea->nb_alloc = nb_alloc;
> -    ea->extents = g_new(NBDExtent, nb_alloc);
> +    ea->extents = g_new(NBDExtentExt, nb_alloc);
> +    ea->style = style;
>       ea->can_add = true;
> 
>       return ea;
> @@ -2100,17 +2111,37 @@ static void nbd_extent_array_free(NBDExtentArray *ea)
>   G_DEFINE_AUTOPTR_CLEANUP_FUNC(NBDExtentArray, nbd_extent_array_free)
> 
>   /* Further modifications of the array after conversion are abandoned */
> -static void nbd_extent_array_convert_to_be(NBDExtentArray *ea)
> +static void nbd_extent_array_convert_to_be(NBDExtentArray *ea,
> +                                           uint32_t context_id,
> +                                           struct iovec *iov)
>   {
>       int i;
> 
>       assert(!ea->converted_to_be);
> +    assert(iov[0].iov_base == &ea->meta);
> +    assert(iov[1].iov_base == ea->extents);

Hmm. Maybe just pass unitialized iov, and set bases here? It would be more clear interface.

>       ea->can_add = false;
>       ea->converted_to_be = true;
> 
> -    for (i = 0; i < ea->count; i++) {
> -        ea->extents[i].flags = cpu_to_be32(ea->extents[i].flags);
> -        ea->extents[i].length = cpu_to_be32(ea->extents[i].length);
> +    stl_be_p(&ea->meta.context_id, context_id);
> +    if (ea->style >= NBD_HEADER_EXTENDED) {
> +        stl_be_p(&ea->meta.count, ea->count);
> +        for (i = 0; i < ea->count; i++) {
> +            ea->extents[i].length = cpu_to_be64(ea->extents[i].length);
> +            ea->extents[i].flags = cpu_to_be64(ea->extents[i].flags);
> +        }
> +        iov[0].iov_len = sizeof(ea->meta);
> +        iov[1].iov_len = ea->count * sizeof(ea->extents[0]);
> +    } else {
> +        /* Conversion reduces memory usage, order of iteration matters */
> +        for (i = 0; i < ea->count; i++) {
> +            assert(ea->extents[i].length <= UINT32_MAX);
> +            assert((uint32_t) ea->extents[i].flags == ea->extents[i].flags);
> +            ea->narrow[i].length = cpu_to_be32(ea->extents[i].length);
> +            ea->narrow[i].flags = cpu_to_be32(ea->extents[i].flags);

IMHO, this union-magic significantly increases the complexity of the code..

For example, if simply swap these two lines:

            ea->narrow[i].flags = cpu_to_be32(ea->extents[i].flags);
            ea->narrow[i].length = cpu_to_be32(ea->extents[i].length);

that would be a bug, which's not simple to find.


I have an idea:

1. rewrite the common logic to work with new extended structures
2. add a separate function, which produces old style array, allocating it in flight.

let me try. Something like this (applied on top of this patch). This way we can avoid both unions and passing half-initialized iovs:

diff --git a/nbd/server.c b/nbd/server.c
index 70cc1808c4..b0075dd1ee 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -2073,14 +2073,7 @@ static int coroutine_fn nbd_co_send_sparse_read(NBDClient *client,
  
  typedef struct NBDExtentArray {
      NBDHeaderStyle style;           /* 32- or 64-bit extent descriptions */
-    union {
-        NBDStructuredMeta id;       /* style == NBD_HEADER_STRUCTURED */
-        NBDStructuredMetaExt meta;  /* style == NBD_HEADER_EXTENDED */
-    };
-    union {
-        NBDExtent *narrow;          /* style == NBD_HEADER_STRUCTURED */
-        NBDExtentExt *extents;      /* style == NBD_HEADER_EXTENDED */
-    };
+    NBDExtentExt *extents;
      unsigned int nb_alloc;
      unsigned int count;
      uint64_t total_length;
@@ -2110,38 +2103,35 @@ static void nbd_extent_array_free(NBDExtentArray *ea)
  G_DEFINE_AUTOPTR_CLEANUP_FUNC(NBDExtentArray, nbd_extent_array_free)
  
  /* Further modifications of the array after conversion are abandoned */
-static void nbd_extent_array_convert_to_be(NBDExtentArray *ea,
-                                           uint32_t context_id,
-                                           struct iovec *iov)
+static void nbd_extent_array_convert_to_be(NBDExtentArray *ea)
  {
      int i;
  
      assert(!ea->converted_to_be);
-    assert(iov[0].iov_base == &ea->meta);
-    assert(iov[1].iov_base == ea->extents);
      ea->can_add = false;
      ea->converted_to_be = true;
  
-    stl_be_p(&ea->meta.context_id, context_id);
-    if (ea->style >= NBD_HEADER_EXTENDED) {
-        stl_be_p(&ea->meta.count, ea->count);
-        for (i = 0; i < ea->count; i++) {
-            ea->extents[i].length = cpu_to_be64(ea->extents[i].length);
-            ea->extents[i].flags = cpu_to_be64(ea->extents[i].flags);
-        }
-        iov[0].iov_len = sizeof(ea->meta);
-        iov[1].iov_len = ea->count * sizeof(ea->extents[0]);
-    } else {
-        /* Conversion reduces memory usage, order of iteration matters */
-        for (i = 0; i < ea->count; i++) {
-            assert(ea->extents[i].length <= UINT32_MAX);
-            assert((uint32_t) ea->extents[i].flags == ea->extents[i].flags);
-            ea->narrow[i].length = cpu_to_be32(ea->extents[i].length);
-            ea->narrow[i].flags = cpu_to_be32(ea->extents[i].flags);
-        }
-        iov[0].iov_len = sizeof(ea->id);
-        iov[1].iov_len = ea->count * sizeof(ea->narrow[0]);
+    for (i = 0; i < ea->count; i++) {
+        ea->extents[i].flags = cpu_to_be32(ea->extents[i].flags);
+        ea->extents[i].length = cpu_to_be32(ea->extents[i].length);
+    }
+}
+
+static NBDExtent *nbd_extent_array_make_old_style_extents(NBDExtentArray *ea)
+{
+    int i;
+    NBDExtent *extents = g_new(NBDExtent, ea->count);
+
+    assert(!ea->converted_to_be);
+
+    for (i = 0; i < ea->count; i++) {
+        assert(ea->extents[i].length <= UINT32_MAX);
+        assert((uint32_t) ea->extents[i].flags == ea->extents[i].flags);
+        ea->extents[i].flags = cpu_to_be32(ea->extents[i].flags);
+        ea->extents[i].length = cpu_to_be32(ea->extents[i].length);
      }
+
+    return extents;
  }
  
  /*
@@ -2244,7 +2234,7 @@ static int coroutine_fn blockalloc_to_extents(BlockBackend *blk,
  /*
   * nbd_co_send_extents
   *
- * @ea is converted to BE by the function
+ * @ea may be converted to BE by the function
   * @last controls whether NBD_REPLY_FLAG_DONE is sent.
   */
  static int coroutine_fn
@@ -2252,15 +2242,35 @@ nbd_co_send_extents(NBDClient *client, NBDRequest *request, NBDExtentArray *ea,
                      bool last, uint32_t context_id, Error **errp)
  {
      NBDReply hdr;
-    struct iovec iov[] = {
-        {.iov_base = &hdr},
-        {.iov_base = &ea->meta},
-        {.iov_base = ea->extents}
-    };
-    uint16_t type = client->header_style == NBD_HEADER_EXTENDED ?
-        NBD_REPLY_TYPE_BLOCK_STATUS_EXT : NBD_REPLY_TYPE_BLOCK_STATUS;
+    NBDStructuredMeta meta;
+    NBDStructuredMetaExt meta_ext;
+    g_autofree NBDExtent *extents = NULL;
+    uint16_t type;
+    struct iovec iov[] = { {.iov_base = &hdr}, {0}, {0} };
+
+    if (client->header_style == NBD_HEADER_EXTENDED) {
+        type = NBD_REPLY_TYPE_BLOCK_STATUS_EXT;
+
+        iov[1].iov_base = &meta_ext;
+        iov[1].iov_len = sizeof(meta_ext);
+        stl_be_p(&meta_ext.context_id, context_id);
+        stl_be_p(&meta_ext.count, ea->count);
+
+        nbd_extent_array_convert_to_be(ea);
+        iov[2].iov_base = ea->extents;
+        iov[2].iov_len = ea->count * sizeof(ea->extents[0]);
+    } else {
+        type = NBD_REPLY_TYPE_BLOCK_STATUS;
+
+        iov[1].iov_base = &meta;
+        iov[1].iov_len = sizeof(meta);
+        stl_be_p(&meta.context_id, context_id);
+
+        extents = nbd_extent_array_make_old_style_extents(ea);
+        iov[2].iov_base = extents;
+        iov[2].iov_len = ea->count * sizeof(extents[0]);
+    }
  
-    nbd_extent_array_convert_to_be(ea, context_id, &iov[1]);
  
      trace_nbd_co_send_extents(request->handle, ea->count, context_id,
                                ea->total_length, last);



-- 
Best regards,
Vladimir



More information about the Libguestfs mailing list