[Libguestfs] [PATCH v3 10/14] nbd/client: Initial support for extended headers
Vladimir Sementsov-Ogievskiy
vsementsov at yandex-team.ru
Wed May 31 15:26:17 UTC 2023
On 15.05.23 22:53, Eric Blake wrote:
> Update the client code to be able to send an extended request, and
> parse an extended header from the server. Note that since we reject
> any structured reply with a too-large payload, we can always normalize
> a valid header back into the compact form, so that the caller need not
> deal with two branches of a union. Still, until a later patch lets
> the client negotiate extended headers, the code added here should not
> be reached. Note that because of the different magic numbers, it is
> just as easy to trace and then tolerate a non-compliant server sending
> the wrong header reply as it would be to insist that the server is
> compliant.
>
> The only caller to nbd_receive_reply() always passed NULL for errp;
> since we are changing the signature anyways, I decided to sink the
> decision to ignore errors one layer lower.
This way nbd_receive_simple_reply() and nbd_receive_structured_reply_chunk() are called now only with explicit NULL last argument.. And we start to drop all errors.
Also, actually, we'd better add errp parameter to the caller - nbd_receive_replies(), because its caller (nbd_co_do_receive_one_chunk()) will benefit of it, as it already has errp.
>
> Signed-off-by: Eric Blake <eblake at redhat.com>
> ---
> include/block/nbd.h | 2 +-
> block/nbd.c | 3 +-
> nbd/client.c | 86 +++++++++++++++++++++++++++++++--------------
> nbd/trace-events | 1 +
> 4 files changed, 63 insertions(+), 29 deletions(-)
>
> diff --git a/include/block/nbd.h b/include/block/nbd.h
> index d753fb8006f..865bb4ee2e1 100644
> --- a/include/block/nbd.h
> +++ b/include/block/nbd.h
> @@ -371,7 +371,7 @@ int nbd_init(int fd, QIOChannelSocket *sioc, NBDExportInfo *info,
> Error **errp);
> int nbd_send_request(QIOChannel *ioc, NBDRequest *request, NBDHeaderStyle hdr);
> int coroutine_fn nbd_receive_reply(BlockDriverState *bs, QIOChannel *ioc,
> - NBDReply *reply, Error **errp);
> + NBDReply *reply, NBDHeaderStyle hdr);
> int nbd_client(int fd);
> int nbd_disconnect(int fd);
> int nbd_errno_to_system_errno(int err);
> diff --git a/block/nbd.c b/block/nbd.c
> index 6ad6a4f5ecd..d6caea44928 100644
> --- a/block/nbd.c
> +++ b/block/nbd.c
> @@ -458,7 +458,8 @@ static coroutine_fn int nbd_receive_replies(BDRVNBDState *s, uint64_t handle)
>
> /* We are under mutex and handle is 0. We have to do the dirty work. */
> assert(s->reply.handle == 0);
> - ret = nbd_receive_reply(s->bs, s->ioc, &s->reply, NULL);
> + ret = nbd_receive_reply(s->bs, s->ioc, &s->reply,
> + s->info.header_style);
> if (ret <= 0) {
> ret = ret ? ret : -EIO;
> nbd_channel_error(s, ret);
> diff --git a/nbd/client.c b/nbd/client.c
> index 17d1f57da60..e5db3c8b79d 100644
> --- a/nbd/client.c
> +++ b/nbd/client.c
> @@ -1350,22 +1350,29 @@ int nbd_disconnect(int fd)
>
> int nbd_send_request(QIOChannel *ioc, NBDRequest *request, NBDHeaderStyle hdr)
> {
> - uint8_t buf[NBD_REQUEST_SIZE];
> + uint8_t buf[NBD_EXTENDED_REQUEST_SIZE];
> + size_t len;
>
> - assert(hdr < NBD_HEADER_EXTENDED);
> - assert(request->len <= UINT32_MAX);
> trace_nbd_send_request(request->from, request->len, request->handle,
> request->flags, request->type,
> nbd_cmd_lookup(request->type));
>
> - stl_be_p(buf, NBD_REQUEST_MAGIC);
> stw_be_p(buf + 4, request->flags);
> stw_be_p(buf + 6, request->type);
> stq_be_p(buf + 8, request->handle);
> stq_be_p(buf + 16, request->from);
> - stl_be_p(buf + 24, request->len);
> + if (hdr >= NBD_HEADER_EXTENDED) {
> + stl_be_p(buf, NBD_EXTENDED_REQUEST_MAGIC);
> + stq_be_p(buf + 24, request->len);
> + len = NBD_EXTENDED_REQUEST_SIZE;
> + } else {
> + assert(request->len <= UINT32_MAX);
> + stl_be_p(buf, NBD_REQUEST_MAGIC);
> + stl_be_p(buf + 24, request->len);
> + len = NBD_REQUEST_SIZE;
> + }
>
> - return nbd_write(ioc, buf, sizeof(buf), NULL);
> + return nbd_write(ioc, buf, len, NULL);
> }
>
> /* nbd_receive_simple_reply
> @@ -1394,28 +1401,34 @@ static int nbd_receive_simple_reply(QIOChannel *ioc, NBDSimpleReply *reply,
>
> /* nbd_receive_structured_reply_chunk
> * Read structured reply chunk except magic field (which should be already
> - * read).
> + * read). Normalize into the compact form.
> * Payload is not read.
> */
> -static int nbd_receive_structured_reply_chunk(QIOChannel *ioc,
> - NBDStructuredReplyChunk *chunk,
> +static int nbd_receive_structured_reply_chunk(QIOChannel *ioc, NBDReply *chunk,
> Error **errp)
Hmm, _structured_or_extened_ ? Or at least in comment above the function we should mention this.
> {
> int ret;
> + size_t len;
> + uint64_t payload_len;
>
> - assert(chunk->magic == NBD_STRUCTURED_REPLY_MAGIC);
> + if (chunk->magic == NBD_STRUCTURED_REPLY_MAGIC) {
> + len = sizeof(chunk->structured);
> + } else {
> + assert(chunk->magic == NBD_EXTENDED_REPLY_MAGIC);
> + len = sizeof(chunk->extended);
> + }
>
> ret = nbd_read(ioc, (uint8_t *)chunk + sizeof(chunk->magic),
> - sizeof(*chunk) - sizeof(chunk->magic), "structured chunk",
Would be good to print "extended chunk" in error message for EXTENDED case.
> + len - sizeof(chunk->magic), "structured chunk",
> errp);
> if (ret < 0) {
> return ret;
> }
>
> - chunk->flags = be16_to_cpu(chunk->flags);
> - chunk->type = be16_to_cpu(chunk->type);
> - chunk->handle = be64_to_cpu(chunk->handle);
> - chunk->length = be32_to_cpu(chunk->length);
> + /* flags, type, and handle occupy same space between forms */
> + chunk->structured.flags = be16_to_cpu(chunk->structured.flags);
> + chunk->structured.type = be16_to_cpu(chunk->structured.type);
> + chunk->structured.handle = be64_to_cpu(chunk->structured.handle);
>
> /*
> * Because we use BLOCK_STATUS with REQ_ONE, and cap READ requests
> @@ -1423,11 +1436,20 @@ static int nbd_receive_structured_reply_chunk(QIOChannel *ioc,
> * this. Even if we stopped using REQ_ONE, sane servers will cap
> * the number of extents they return for block status.
> */
> - if (chunk->length > NBD_MAX_BUFFER_SIZE + sizeof(NBDStructuredReadData)) {
> + if (chunk->magic == NBD_STRUCTURED_REPLY_MAGIC) {
> + payload_len = be32_to_cpu(chunk->structured.length);
> + } else {
> + /* For now, we are ignoring the extended header offset. */
> + payload_len = be64_to_cpu(chunk->extended.length);
> + chunk->magic = NBD_STRUCTURED_REPLY_MAGIC;
> + }
> + if (payload_len > NBD_MAX_BUFFER_SIZE + sizeof(NBDStructuredReadData)) {
> error_setg(errp, "server chunk %" PRIu32 " (%s) payload is too long",
> - chunk->type, nbd_rep_lookup(chunk->type));
> + chunk->structured.type,
> + nbd_rep_lookup(chunk->structured.type));
> return -EINVAL;
> }
> + chunk->structured.length = payload_len;
>
> return 0;
> }
> @@ -1474,30 +1496,35 @@ nbd_read_eof(BlockDriverState *bs, QIOChannel *ioc, void *buffer, size_t size,
>
> /* nbd_receive_reply
> *
> - * Decreases bs->in_flight while waiting for a new reply. This yield is where
> - * we wait indefinitely and the coroutine must be able to be safely reentered
> - * for nbd_client_attach_aio_context().
> + * Wait for a new reply. If this yields, the coroutine must be able to be
> + * safely reentered for nbd_client_attach_aio_context(). @hdr determines
> + * which reply magic we are expecting, although this normalizes the result
> + * so that the caller only has to work with compact headers.
> *
> * Returns 1 on success
> - * 0 on eof, when no data was read (errp is not set)
> - * negative errno on failure (errp is set)
> + * 0 on eof, when no data was read
> + * negative errno on failure
> */
> int coroutine_fn nbd_receive_reply(BlockDriverState *bs, QIOChannel *ioc,
> - NBDReply *reply, Error **errp)
> + NBDReply *reply, NBDHeaderStyle hdr)
> {
> int ret;
> const char *type;
>
> - ret = nbd_read_eof(bs, ioc, &reply->magic, sizeof(reply->magic), errp);
> + ret = nbd_read_eof(bs, ioc, &reply->magic, sizeof(reply->magic), NULL);
> if (ret <= 0) {
> return ret;
> }
>
> reply->magic = be32_to_cpu(reply->magic);
>
> + /* Diagnose but accept wrong-width header */
> switch (reply->magic) {
> case NBD_SIMPLE_REPLY_MAGIC:
> - ret = nbd_receive_simple_reply(ioc, &reply->simple, errp);
> + if (hdr >= NBD_HEADER_EXTENDED) {
> + trace_nbd_receive_wrong_header(reply->magic);
maybe, trace also expected style
> + }
> + ret = nbd_receive_simple_reply(ioc, &reply->simple, NULL);
> if (ret < 0) {
> break;
> }
> @@ -1506,7 +1533,12 @@ int coroutine_fn nbd_receive_reply(BlockDriverState *bs, QIOChannel *ioc,
> reply->handle);
> break;
> case NBD_STRUCTURED_REPLY_MAGIC:
> - ret = nbd_receive_structured_reply_chunk(ioc, &reply->structured, errp);
> + case NBD_EXTENDED_REPLY_MAGIC:
> + if ((hdr >= NBD_HEADER_EXTENDED) !=
> + (reply->magic == NBD_EXTENDED_REPLY_MAGIC)) {
> + trace_nbd_receive_wrong_header(reply->magic);
> + }
> + ret = nbd_receive_structured_reply_chunk(ioc, reply, NULL);
> if (ret < 0) {
> break;
> }
> @@ -1517,7 +1549,7 @@ int coroutine_fn nbd_receive_reply(BlockDriverState *bs, QIOChannel *ioc,
> reply->structured.length);
> break;
> default:
> - error_setg(errp, "invalid magic (got 0x%" PRIx32 ")", reply->magic);
> + trace_nbd_receive_wrong_header(reply->magic);
> return -EINVAL;
> }
> if (ret < 0) {
> diff --git a/nbd/trace-events b/nbd/trace-events
> index adf5666e207..c20df33a431 100644
> --- a/nbd/trace-events
> +++ b/nbd/trace-events
> @@ -34,6 +34,7 @@ nbd_client_clear_socket(void) "Clearing NBD socket"
> nbd_send_request(uint64_t from, uint64_t len, uint64_t handle, uint16_t flags, uint16_t type, const char *name) "Sending request to server: { .from = %" PRIu64", .len = %" PRIu64 ", .handle = %" PRIu64 ", .flags = 0x%" PRIx16 ", .type = %" PRIu16 " (%s) }"
> nbd_receive_simple_reply(int32_t error, const char *errname, uint64_t handle) "Got simple reply: { .error = %" PRId32 " (%s), handle = %" PRIu64" }"
> nbd_receive_structured_reply_chunk(uint16_t flags, uint16_t type, const char *name, uint64_t handle, uint32_t length) "Got structured reply chunk: { flags = 0x%" PRIx16 ", type = %d (%s), handle = %" PRIu64 ", length = %" PRIu32 " }"
> +nbd_receive_wrong_header(uint32_t magic) "Server sent unexpected magic 0x%" PRIx32
>
> # common.c
> nbd_unknown_error(int err) "Squashing unexpected error %d to EINVAL"
--
Best regards,
Vladimir
More information about the Libguestfs
mailing list