[Libguestfs] [libnbd PATCH v2 3/5] states: Add nbd_pread_structured API

Eric Blake eblake at redhat.com
Fri Jun 21 01:45:06 UTC 2019


Time to wire up user access to react to structured reads as the chunks
come in, exposing the framework added in the last patch.  I chose to
use Int for the callback status rather than an enum for ease of wiring
things up to the various language bindings.

It's easy to test callback behavior for LIBNBD_READ_DATA/HOLE (the
next patch will add interop tests with qemu-nbd), but at present, I
don't know of any server that responds with
NBD_REPLY_TYPE_ERROR_OFFSET, which in turn makes testing
LIBNBD_READ_ERROR difficult. I used the following hack on top of
qemu.git commit d1bf88e5 to trigger an offset error for local testing:

| diff --git i/nbd/server.c w/nbd/server.c
| index 10faedcfc55d..804dced0bc8d 100644
| --- i/nbd/server.c
| +++ w/nbd/server.c
| @@ -1838,12 +1838,30 @@ static int coroutine_fn nbd_co_send_sparse_read(NBDClient *client,
|
|              trace_nbd_co_send_structured_read_hole(handle, offset + progress,
|                                                     pnum);
| -            set_be_chunk(&chunk.h, final ? NBD_REPLY_FLAG_DONE : 0,
| +            set_be_chunk(&chunk.h, 0,
|                           NBD_REPLY_TYPE_OFFSET_HOLE,
|                           handle, sizeof(chunk) - sizeof(chunk.h));
|              stq_be_p(&chunk.offset, offset + progress);
|              stl_be_p(&chunk.length, pnum);
|              ret = nbd_co_send_iov(client, iov, 1, errp);
| +            if (ret == 0) {
| +                NBDStructuredError chunk;
| +                int64_t off;
| +                struct iovec iov[] = {
| +                    {.iov_base = &chunk, .iov_len = sizeof(chunk)},
| +                    {.iov_base = (char*)"MSG", .iov_len = 3},
| +                    {.iov_base = &off, .iov_len = sizeof(off)},
| +                };
| +
| +                set_be_chunk(&chunk.h, final ? NBD_REPLY_FLAG_DONE : 0,
| +                             NBD_REPLY_TYPE_ERROR_OFFSET,
| +                             handle, sizeof(chunk) - sizeof(chunk.h) +
| +                             3 + sizeof(off));
| +                stl_be_p(&chunk.error, NBD_EPERM);
| +                stw_be_p(&chunk.message_length, 3);
| +                stq_be_p(&off, offset + progress);
| +                ret = nbd_co_send_iov(client, iov, 3, errp);
| +            }
|          } else {
|              ret = blk_pread(exp->blk, offset + progress + exp->dev_offset,
|                              data + progress, pnum);
---
 generator/generator | 94 ++++++++++++++++++++++++++++++++++++++++++++-
 lib/rw.c            | 32 +++++++++++++++
 2 files changed, 125 insertions(+), 1 deletion(-)

diff --git a/generator/generator b/generator/generator
index b408509..cf9876f 100755
--- a/generator/generator
+++ b/generator/generator
@@ -1305,7 +1305,79 @@ Issue a read command to the NBD server for the range starting
 at C<offset> and ending at C<offset> + C<count> - 1.  NBD
 can only read all or nothing using this call.  The call
 returns when the data has been read fully into C<buf> or there is an
-error.
+error.  See also C<nbd_pread_structured>, if finer visibility is
+required into the server's replies.
+
+The C<flags> parameter must be C<0> for now (it exists for future NBD
+protocol extensions).";
+  };
+
+  "pread_structured", {
+    default_call with
+    args = [ BytesOut ("buf", "count"); UInt64 "offset";
+             Opaque "data";
+             Callback ("chunk", [ Opaque "data"; BytesIn ("subbuf", "count");
+                                  UInt64 "offset"; Int "error";
+                                  Int "status" ]);
+             Flags "flags" ];
+    ret = RErr;
+    permitted_states = [ Connected ];
+    shortdesc = "read from the NBD server";
+    longdesc = "\
+Issue a read command to the NBD server for the range starting
+at C<offset> and ending at C<offset> + C<count> - 1.  The server's
+response may be subdivided into chunks which may arrive out of order
+before reassembly into the original buffer; the C<chunk> callback
+is used for notification after each chunk arrives, and may perform
+additional sanity checking on the server's reply. The callback cannot
+call C<nbd_*> APIs on the same handle since it holds the handle lock
+and will cause a deadlock.  If the callback returns C<-1>, and no
+earlier error has been detected, then the overall read command will
+fail with the same value of C<errno> left after the failing callback;
+but any further chunks will still invoke the callback.
+
+The C<chunk> function is called once per chunk of data received.  The
+C<subbuf> and C<count> parameters represent the subset of the original
+buffer which has just been populated by results from the server (in C,
+C<subbuf> always points within the original C<buf>; but this guarantee
+may not extend to other language bindings). The C<offset> parameter
+represents the absolute offset at which C<subbuf> begins within the
+image (note that this is not the relative offset of C<subbuf> within
+the original buffer C<buf>). The C<error> parameter is controlled by
+the C<status> parameter, which is one of
+
+=over 4
+
+=item C<LIBNBD_READ_DATA> = 1
+
+C<subbuf> was populated with C<count> bytes of data. C<error> is the
+errno value of any earlier detected error, or zero.
+
+=item C<LIBNBD_READ_HOLE> = 2
+
+C<subbuf> represents a hole, and contains C<count> NUL bytes. C<error>
+is the errno value of any earlier detected error, or zero.
+
+=item C<LIBNBD_READ_ERROR> = 3
+
+C<count> is 0, so C<subbuf> is unusable. C<error> is the errno value
+reported by the server as occurring while reading that C<offset>.
+
+=back
+
+Future NBD extensions may permit other values for C<status>, but those
+will not be returned to a client that has not opted in to requesting
+such extensions. If the server is non-compliant, it is possible for
+the C<chunk> function to be called more times than you expect or with
+C<count> 0 for C<LIBNBD_READ_DATA> or C<LIBNBD_READ_HOLE>. It is also
+possible that the C<chunk> function is not called at all (in
+particular, C<LIBNBD_READ_ERROR> is used only when an error is
+associated with a particular offset, and not when the server reports a
+generic error), but you are guaranteed that the callback was called at
+least once if the overall read succeeds. Libnbd does not validate that
+the server obeyed the requirement that a read call must not have
+overlapping chunks and must not succeed without enough chunks to cover
+the entire request.

 The C<flags> parameter must be C<0> for now (it exists for future NBD
 protocol extensions).";
@@ -1591,6 +1663,26 @@ C<buf> is valid until the command has completed.  Other
 parameters behave as documented in C<nbd_pread>.";
   };

+  "aio_pread_structured", {
+    default_call with
+    args = [ BytesPersistOut ("buf", "count"); UInt64 "offset";
+             Opaque "data";
+             CallbackPersist ("chunk", [ Opaque "data";
+                                         BytesIn ("subbuf", "count");
+                                         UInt64 "offset"; Int "error";
+                                         Int "status" ]);
+             Flags "flags" ];
+    ret = RInt64;
+    permitted_states = [ Connected ];
+    shortdesc = "read from the NBD server";
+    longdesc = "\
+Issue a read command to the NBD server.  This returns the
+unique positive 64 bit handle for this command, or C<-1> on
+error.  To check if the command completed, call
+C<nbd_aio_command_completed>.  Parameters behave as documented
+in C<nbd_pread_structured>.";
+  };
+
   "aio_pwrite", {
     default_call with
     args = [ BytesPersistIn ("buf", "count"); UInt64 "offset"; Flags "flags" ];
diff --git a/lib/rw.c b/lib/rw.c
index dc81c57..24dbc4e 100644
--- a/lib/rw.c
+++ b/lib/rw.c
@@ -55,6 +55,22 @@ nbd_unlocked_pread (struct nbd_handle *h, void *buf,
   return wait_for_command (h, ch);
 }

+/* Issue a read command with callbacks and wait for the reply. */
+int
+nbd_unlocked_pread_structured (struct nbd_handle *h, void *buf,
+                               size_t count, uint64_t offset,
+                               void *opaque, read_fn read, uint32_t flags)
+{
+  int64_t ch;
+
+  ch = nbd_unlocked_aio_pread_structured (h, buf, count, offset,
+                                          opaque, read, flags);
+  if (ch == -1)
+    return -1;
+
+  return wait_for_command (h, ch);
+}
+
 /* Issue a write command and wait for the reply. */
 int
 nbd_unlocked_pwrite (struct nbd_handle *h, const void *buf,
@@ -231,6 +247,22 @@ nbd_unlocked_aio_pread (struct nbd_handle *h, void *buf,
                                       buf, NULL);
 }

+int64_t
+nbd_unlocked_aio_pread_structured (struct nbd_handle *h, void *buf,
+                                   size_t count, uint64_t offset,
+                                   void *opaque, read_fn read, uint32_t flags)
+{
+  struct command_cb cb = { .opaque = opaque, .fn.read = read, };
+
+  if (flags != 0) {
+    set_error (EINVAL, "invalid flag: %" PRIu32, flags);
+    return -1;
+  }
+
+  return nbd_internal_command_common (h, 0, NBD_CMD_READ, offset, count,
+                                      buf, &cb);
+}
+
 int64_t
 nbd_unlocked_aio_pwrite (struct nbd_handle *h, const void *buf,
                          size_t count, uint64_t offset,
-- 
2.20.1




More information about the Libguestfs mailing list