[Libguestfs] [libnbd PATCH v4 2/6] block_status: Refactor array storage

Laszlo Ersek lersek at redhat.com
Mon Jul 24 09:52:39 UTC 2023


On 7/21/23 18:08, Eric Blake wrote:
> For 32-bit block status, we were able to cheat and use an array with
> an odd number of elements, with array[0] holding the context id, and
> passing &array[1] to the user's callback.  But once we have 64-bit
> extents, we can no longer abuse array element 0 like that, for two
> reasons: 64-bit extents contain uint64_t which might not be
> alignment-compatible with an array of uint32_t on all architectures,
> and the new NBD_REPLY_TYPE_BLOCK_STATUS_EXT adds an additional count
> field before the array.  What's more, there's no need to byte-swap the
> entries if we won't be calling the user's callback, when the server
> sent us an unknown context id.
> 
> Split out a new state CHUNK_REPLY.BS_HEADER to receive the context id
> (and eventually the new count field for 64-bit replies) separately
> from the extents array.  Add another type nbd_chunk_block_status_32 in
> the payload section for tracking it (with a name anticipating the
> 64-bit counterpart type under extended headers).  Add a helper
> function bs_reply_length_ok() to enable more assertions that our
> memory management is sane without quite so much open-coding of magic
> values.  With these changes, the byteswap of the entries can be
> delayed until we know we need to call the user's callback.  No
> behavioral change, other than the rare possibility of landing in the
> new state.
> 
> Signed-off-by: Eric Blake <eblake at redhat.com>
> ---
> 
> v4: add more assertions, add bs_reply_length_ok() to factor out
> computation of length checks [Laszlo], rebase to state machine rename,
> do byte-swap later, add bs->count for tracking number of descriptors
> ---
>  lib/internal.h                 |   2 +
>  lib/nbd-protocol.h             |  16 +++--
>  generator/state_machine.ml     |   9 ++-
>  generator/states-reply-chunk.c | 108 ++++++++++++++++++++++++---------
>  4 files changed, 98 insertions(+), 37 deletions(-)
> 
> diff --git a/lib/internal.h b/lib/internal.h
> index b38ae524..c7793f1f 100644
> --- a/lib/internal.h
> +++ b/lib/internal.h
> @@ -254,6 +254,7 @@ struct nbd_handle {
>          uint64_t align_; /* Start reply.payload on an 8-byte alignment */
>          struct nbd_chunk_offset_data offset_data;
>          struct nbd_chunk_offset_hole offset_hole;
> +        struct nbd_chunk_block_status_32 bs_hdr_32;
>          struct {
>            struct nbd_chunk_error error;
>            char msg[NBD_MAX_STRING]; /* Common to all error types */
> @@ -308,6 +309,7 @@ struct nbd_handle {
>    size_t payload_left;
> 
>    /* When receiving block status, this is used. */
> +  size_t bs_count; /* count of block descriptors (not array entries!) */
>    uint32_t *bs_entries;
> 
>    /* Commands which are waiting to be issued [meaning the request
> diff --git a/lib/nbd-protocol.h b/lib/nbd-protocol.h
> index c967c840..58583d1d 100644
> --- a/lib/nbd-protocol.h
> +++ b/lib/nbd-protocol.h
> @@ -182,12 +182,6 @@ struct nbd_fixed_new_option_reply_meta_context {
>    /* followed by a string */
>  } NBD_ATTRIBUTE_PACKED;
> 
> -/* NBD_REPLY_TYPE_BLOCK_STATUS block descriptor. */
> -struct nbd_block_descriptor {
> -  uint32_t length;              /* length of block */
> -  uint32_t status_flags;        /* block type (hole etc) */
> -} NBD_ATTRIBUTE_PACKED;
> -
>  /* Request (client -> server). */
>  struct nbd_request {
>    uint32_t magic;               /* NBD_REQUEST_MAGIC. */
> @@ -224,6 +218,16 @@ struct nbd_chunk_offset_hole {
>    uint32_t length;              /* Length of hole. */
>  } NBD_ATTRIBUTE_PACKED;
> 
> +struct nbd_chunk_block_status_32 {
> +  uint32_t context_id;          /* metadata context ID */
> +  /* followed by array of nbd_block_descriptor_32 extents */
> +} NBD_ATTRIBUTE_PACKED;
> +
> +struct nbd_block_descriptor_32 {
> +  uint32_t length;              /* length of block */
> +  uint32_t status_flags;        /* block type (hole etc) */
> +} NBD_ATTRIBUTE_PACKED;
> +
>  struct nbd_chunk_error {
>    uint32_t error;               /* NBD_E* error number */
>    uint16_t len;                 /* Length of human readable error. */
> diff --git a/generator/state_machine.ml b/generator/state_machine.ml
> index 3a912508..7a5bc59b 100644
> --- a/generator/state_machine.ml
> +++ b/generator/state_machine.ml
> @@ -864,10 +864,17 @@ and
>      external_events = [];
>    };
> 
> +  State {
> +    default_state with
> +    name = "RECV_BS_HEADER";
> +    comment = "Receive header of a chunk reply block-status payload";
> +    external_events = [];
> +  };
> +
>    State {
>      default_state with
>      name = "RECV_BS_ENTRIES";
> -    comment = "Receive a chunk reply block-status payload";
> +    comment = "Receive entries array of chunk reply block-status payload";
>      external_events = [];
>    };
> 
> diff --git a/generator/states-reply-chunk.c b/generator/states-reply-chunk.c
> index da5fc426..0d76c159 100644
> --- a/generator/states-reply-chunk.c
> +++ b/generator/states-reply-chunk.c
> @@ -43,6 +43,28 @@ structured_reply_in_bounds (uint64_t offset, uint32_t length,
>    return true;
>  }
> 
> +/* Return true if payload length of block status reply is valid.
> + */
> +static bool
> +bs_reply_length_ok (uint16_t type, uint32_t length)
> +{
> +  /* TODO support 64-bit replies */
> +  size_t prefix_len = sizeof (struct nbd_chunk_block_status_32);
> +  size_t extent_len = sizeof (struct nbd_block_descriptor_32);
> +  assert (type == NBD_REPLY_TYPE_BLOCK_STATUS);
> +
> +  /* At least one descriptor is needed after id prefix */
> +  if (length < prefix_len + extent_len)
> +    return false;
> +
> +  /* There must be an integral number of extents */
> +  length -= prefix_len;
> +  if (length % extent_len != 0)
> +    return false;
> +
> +  return true;
> +}
> +
>  STATE_MACHINE {
>   REPLY.CHUNK_REPLY.START:
>    struct command *cmd = h->reply_cmd;
> @@ -105,22 +127,13 @@  REPLY.CHUNK_REPLY.START:
>    case NBD_REPLY_TYPE_BLOCK_STATUS:
>      if (cmd->type != NBD_CMD_BLOCK_STATUS ||
>          !h->meta_valid || h->meta_contexts.len == 0 ||
> -        length < 12 || ((length-4) & 7) != 0)
> +        !bs_reply_length_ok (type, length))
>        goto resync;
>      assert (CALLBACK_IS_NOT_NULL (cmd->cb.fn.extent));
> -    /* We read the context ID followed by all the entries into a
> -     * single array and deal with it at the end.
> -     */
> -    free (h->bs_entries);
> -    h->bs_entries = malloc (length);
> -    if (h->bs_entries == NULL) {
> -      SET_NEXT_STATE (%.DEAD);
> -      set_error (errno, "malloc");
> -      break;
> -    }
> -    h->rbuf = h->bs_entries;
> -    h->rlen = length;
> -    SET_NEXT_STATE (%RECV_BS_ENTRIES);
> +    /* Start by reading the context ID. */
> +    h->rbuf = &h->sbuf.reply.payload.bs_hdr_32;
> +    h->rlen = sizeof h->sbuf.reply.payload.bs_hdr_32;
> +    SET_NEXT_STATE (%RECV_BS_HEADER);
>      break;
> 
>    default:
> @@ -400,10 +413,46 @@  REPLY.CHUNK_REPLY.RECV_OFFSET_HOLE:
>    }
>    return 0;
> 
> + REPLY.CHUNK_REPLY.RECV_BS_HEADER:
> +  struct command *cmd = h->reply_cmd;
> +  uint16_t type;
> +
> +  switch (recv_into_rbuf (h)) {
> +  case -1: SET_NEXT_STATE (%.DEAD); return 0;
> +  case 1:
> +    save_reply_state (h);
> +    SET_NEXT_STATE (%.READY);
> +    return 0;
> +  case 0:
> +    type = be16toh (h->sbuf.reply.hdr.structured.type);
> +
> +    assert (cmd); /* guaranteed by CHECK */
> +    assert (cmd->type == NBD_CMD_BLOCK_STATUS);
> +    assert (bs_reply_length_ok (type, h->payload_left));
> +    STATIC_ASSERT (sizeof (struct nbd_block_descriptor_32) ==
> +                   2 * sizeof *h->bs_entries,
> +                   _block_desc_is_multiple_of_bs_entry);
> +    h->payload_left -= sizeof h->sbuf.reply.payload.bs_hdr_32;
> +    assert (h->payload_left % sizeof (struct nbd_block_descriptor_32) == 0);
> +    h->bs_count = h->payload_left / sizeof (struct nbd_block_descriptor_32);
> +
> +    free (h->bs_entries);
> +    h->bs_entries = malloc (h->payload_left);
> +    if (h->bs_entries == NULL) {
> +      SET_NEXT_STATE (%.DEAD);
> +      set_error (errno, "malloc");
> +      return 0;
> +    }
> +    h->rbuf = h->bs_entries;
> +    h->rlen = h->payload_left;
> +    h->payload_left = 0;
> +    SET_NEXT_STATE (%RECV_BS_ENTRIES);
> +  }
> +  return 0;
> +
>   REPLY.CHUNK_REPLY.RECV_BS_ENTRIES:
>    struct command *cmd = h->reply_cmd;
>    size_t i;
> -  size_t count;
>    uint32_t context_id;
> 
>    switch (recv_into_rbuf (h)) {
> @@ -416,31 +465,30 @@  REPLY.CHUNK_REPLY.RECV_BS_ENTRIES:
>      assert (cmd); /* guaranteed by CHECK */
>      assert (cmd->type == NBD_CMD_BLOCK_STATUS);
>      assert (CALLBACK_IS_NOT_NULL (cmd->cb.fn.extent));
> -    assert (h->bs_entries);
> -    assert (h->payload_left >= 12);
> +    assert (h->bs_count && h->bs_entries);
>      assert (h->meta_valid);
> 
> -    /* Need to byte-swap the entries returned, but apart from that we
> -     * don't validate them.
> -     */
> -    for (i = 0; i < h->payload_left / sizeof *h->bs_entries; ++i)
> -      h->bs_entries[i] = be32toh (h->bs_entries[i]);
> -    count = (h->payload_left / sizeof *h->bs_entries) - 1;
> -    h->payload_left = 0;
> -
>      /* Look up the context ID. */
> -    context_id = h->bs_entries[0];
> +    context_id = be32toh (h->sbuf.reply.payload.bs_hdr_32.context_id);
>      for (i = 0; i < h->meta_contexts.len; ++i)
>        if (context_id == h->meta_contexts.ptr[i].context_id)
>          break;
> 
>      if (i < h->meta_contexts.len) {
> -      /* Call the caller's extent function. */
>        int error = cmd->error;
> +      const char *name = h->meta_contexts.ptr[i].name;
> 
> -      if (CALL_CALLBACK (cmd->cb.fn.extent,
> -                         h->meta_contexts.ptr[i].name, cmd->offset,
> -                         &h->bs_entries[1], count, &error) == -1)
> +      /* Need to byte-swap the entries returned, but apart from that
> +       * we don't validate them.  Yes, our 32-bit public API foolishly
> +       * tracks the number of uint32_t instead of block descriptors;
> +       * see _block_desc_is_multiple_of_bs_entry above.
> +       */
> +      for (i = 0; i < h->bs_count * 2; ++i)
> +        h->bs_entries[i] = be32toh (h->bs_entries[i]);
> +
> +      /* Call the caller's extent function.  */
> +      if (CALL_CALLBACK (cmd->cb.fn.extent, name, cmd->offset,
> +                         h->bs_entries, h->bs_count * 2, &error) == -1)
>          if (cmd->error == 0)
>            cmd->error = error ? error : EPROTO;
>      }

Acked-by: Laszlo Ersek <lersek at redhat.com>



More information about the Libguestfs mailing list