[Libguestfs] [libnbd PATCH v2 5/5] states: Add DF flag support for pread

Eric Blake eblake at redhat.com
Fri Jun 21 01:45:08 UTC 2019


When structured replies are negotiated, the server may advertise
support for the DF flag (the server promises to return at most one
data/hole chunk, or to fail with NBD_EOVERFLOW if the chunk would be
too large). As both nbdkit and qemu-nbd support this flag (the former
only trivially, but the latter by not compressing holes over the
wire), it is worth exposing to clients, if only for testing purposes.

I chose to specifically reject the flag for plain nbd_[aio_]pread
(about all it can do is cause a read to fail that would otherwise
succeed) and only accept it for nbd_[aio_]pread_structured, but it
would be easy enough to change the one to forward to the other if we
wanted.
---
 generator/generator              | 22 +++++++++++++++++++---
 interop/structured-read.c        | 31 ++++++++++++++++++++++++-------
 lib/flags.c                      | 11 +++++++++++
 lib/nbd-protocol.h               |  1 +
 lib/protocol.c                   |  1 +
 lib/rw.c                         | 14 ++++++++++++--
 python/t/405-pread-structured.py |  6 ++++++
 7 files changed, 74 insertions(+), 12 deletions(-)

diff --git a/generator/generator b/generator/generator
index cf9876f..da60486 100755
--- a/generator/generator
+++ b/generator/generator
@@ -1243,6 +1243,17 @@ Returns true if the server supports the zero command
 the server does not.";
   };

+  "can_df", {
+    default_call with
+    args = []; ret = RBool;
+    shortdesc = "does the server support the don't fragment flag to pread?";
+    longdesc = "\
+Returns true if the server supports structured reads with an
+ability to request a non-fragmented read (see C<nbd_pread_structured>,
+C<nbd_aio_pread_structured>).  Returns false if the server either lacks
+structured reads or if it does not support a non-fragmented read request.";
+  };
+
   "can_multi_conn", {
     default_call with
     args = []; ret = RBool;
@@ -1306,7 +1317,8 @@ at C<offset> and ending at C<offset> + C<count> - 1.  NBD
 can only read all or nothing using this call.  The call
 returns when the data has been read fully into C<buf> or there is an
 error.  See also C<nbd_pread_structured>, if finer visibility is
-required into the server's replies.
+required into the server's replies, or if you want to use
+C<LIBNBD_CMD_FLAG_DF>.

 The C<flags> parameter must be C<0> for now (it exists for future NBD
 protocol extensions).";
@@ -1379,8 +1391,11 @@ the server obeyed the requirement that a read call must not have
 overlapping chunks and must not succeed without enough chunks to cover
 the entire request.

-The C<flags> parameter must be C<0> for now (it exists for future NBD
-protocol extensions).";
+The C<flags> parameter may be C<0> for no flags, or may contain
+C<LIBNBD_CMD_FLAG_DF> meaning that the server should not reply with
+more than one fragment (if that is supported - some servers cannot do
+this, see C<nbd_can_df>). Libnbd does not validate that the server
+actually obeys the flag.";
   };

   "pwrite", {
@@ -2025,6 +2040,7 @@ let constants = [

   "CMD_FLAG_FUA",        1 lsl 0;
   "CMD_FLAG_NO_HOLE",    1 lsl 1;
+  "CMD_FLAG_DF",         1 lsl 2;
   "CMD_FLAG_REQ_ONE",    1 lsl 3;

   "READ_DATA",           1;
diff --git a/interop/structured-read.c b/interop/structured-read.c
index ea47254..cf8b893 100644
--- a/interop/structured-read.c
+++ b/interop/structured-read.c
@@ -38,7 +38,7 @@ static const char *unixsocket;
 static char rbuf[1024];

 struct data {
-  //XXX  bool df;    /* input: true if DF flag was passed to request */
+  bool df;         /* input: true if DF flag was passed to request */
   int count;       /* input: count of expected remaining calls */
   bool fail;       /* input: true to return failure */
   bool seen_hole;  /* output: true if hole encountered */
@@ -60,15 +60,24 @@ read_cb (void *opaque, const void *bufv, size_t count, uint64_t offset,

   switch (status) {
   case LIBNBD_READ_DATA:
-    // XXX if (df...)
-    assert (buf == rbuf + 512);
-    assert (count == 512);
-    assert (offset == 2048 + 512);
-    assert (buf[0] == 1 && memcmp (buf, buf + 1, 511) == 0);
+    if (data->df) {
+      assert (buf == rbuf);
+      assert (count == 1024);
+      assert (offset == 2048);
+      assert (buf[0] == 0 && memcmp (buf, buf + 1, 511) == 0);
+      assert (buf[512] == 1 && memcmp (buf + 512, buf + 513, 511) == 0);
+    }
+    else {
+      assert (buf == rbuf + 512);
+      assert (count == 512);
+      assert (offset == 2048 + 512);
+      assert (buf[0] == 1 && memcmp (buf, buf + 1, 511) == 0);
+    }
     assert (!data->seen_data);
     data->seen_data = true;
     break;
   case LIBNBD_READ_HOLE:
+    assert (!data->df); /* Relies on qemu-nbd's behavior */
     assert (buf == rbuf);
     assert (count == 512);
     assert (offset == 2048);
@@ -134,7 +143,15 @@ main (int argc, char *argv[])
   }
   assert (data.seen_data && data.seen_hole);

-  // XXX Repeat with DF flag
+  /* Repeat with DF flag. */
+  memset (rbuf, 2, sizeof rbuf);
+  data = (struct data) { .df = true, .count = 1, };
+  if (nbd_pread_structured (nbd, rbuf, sizeof rbuf, 2048, &data, read_cb,
+                            LIBNBD_CMD_FLAG_DF) == -1) {
+    fprintf (stderr, "%s\n", nbd_get_error ());
+    exit (EXIT_FAILURE);
+  }
+  assert (data.seen_data && !data.seen_hole);

   /* Trigger a failed callback, to prove connection stays up. With
    * reads, all chunks trigger a callback even after failure, but the
diff --git a/lib/flags.c b/lib/flags.c
index 421a7d2..cdbc28f 100644
--- a/lib/flags.c
+++ b/lib/flags.c
@@ -42,6 +42,11 @@ nbd_internal_set_size_and_flags (struct nbd_handle *h,
     return -1;
   }

+  if (eflags & NBD_FLAG_SEND_DF && !h->structured_replies) {
+    debug (h, "server lacks structured replies, ignoring claim of df");
+    eflags &= ~NBD_FLAG_SEND_DF;
+  }
+
   h->exportsize = exportsize;
   h->eflags = eflags;
   return 0;
@@ -95,6 +100,12 @@ nbd_unlocked_can_zero (struct nbd_handle *h)
   return get_flag (h, NBD_FLAG_SEND_WRITE_ZEROES);
 }

+int
+nbd_unlocked_can_df (struct nbd_handle *h)
+{
+  return get_flag (h, NBD_FLAG_SEND_DF);
+}
+
 int
 nbd_unlocked_can_multi_conn (struct nbd_handle *h)
 {
diff --git a/lib/nbd-protocol.h b/lib/nbd-protocol.h
index 071971e..405af3e 100644
--- a/lib/nbd-protocol.h
+++ b/lib/nbd-protocol.h
@@ -245,6 +245,7 @@ struct nbd_structured_reply_error {
 #define NBD_ENOMEM     12
 #define NBD_EINVAL     22
 #define NBD_ENOSPC     28
+#define NBD_EOVERFLOW  75
 #define NBD_ESHUTDOWN 108

 #endif /* NBD_PROTOCOL_H */
diff --git a/lib/protocol.c b/lib/protocol.c
index d3ac0b4..6087887 100644
--- a/lib/protocol.c
+++ b/lib/protocol.c
@@ -35,6 +35,7 @@ nbd_internal_errno_of_nbd_error (uint32_t error)
   case NBD_ENOMEM: return ENOMEM;
   case NBD_EINVAL: return EINVAL;
   case NBD_ENOSPC: return ENOSPC;
+  case NBD_EOVERFLOW: return EOVERFLOW;
   case NBD_ESHUTDOWN: return ESHUTDOWN;
   default: return EINVAL;
   }
diff --git a/lib/rw.c b/lib/rw.c
index 24dbc4e..2dc60de 100644
--- a/lib/rw.c
+++ b/lib/rw.c
@@ -238,6 +238,10 @@ int64_t
 nbd_unlocked_aio_pread (struct nbd_handle *h, void *buf,
                         size_t count, uint64_t offset, uint32_t flags)
 {
+  /* We could silently accept flag DF, but it really only makes sense
+   * with callbacks, because otherwise there is no observable change
+   * except that the server may fail where it would otherwise succeed.
+   */
   if (flags != 0) {
     set_error (EINVAL, "invalid flag: %" PRIu32, flags);
     return -1;
@@ -254,12 +258,18 @@ nbd_unlocked_aio_pread_structured (struct nbd_handle *h, void *buf,
 {
   struct command_cb cb = { .opaque = opaque, .fn.read = read, };

-  if (flags != 0) {
+  if ((flags & ~LIBNBD_CMD_FLAG_DF) != 0) {
     set_error (EINVAL, "invalid flag: %" PRIu32, flags);
     return -1;
   }

-  return nbd_internal_command_common (h, 0, NBD_CMD_READ, offset, count,
+  if ((flags & LIBNBD_CMD_FLAG_DF) != 0 &&
+      nbd_unlocked_can_df (h) != 1) {
+    set_error (EINVAL, "server does not support the DF flag");
+    return -1;
+  }
+
+  return nbd_internal_command_common (h, flags, NBD_CMD_READ, offset, count,
                                       buf, &cb);
 }

diff --git a/python/t/405-pread-structured.py b/python/t/405-pread-structured.py
index 51f0144..1bfa162 100644
--- a/python/t/405-pread-structured.py
+++ b/python/t/405-pread-structured.py
@@ -35,3 +35,9 @@ buf = h.pread_structured (512, 0, 42, f)
 print ("%r" % buf)

 assert buf == expected
+
+buf = h.pread_structured (512, 0, 42, f, nbd.CMD_FLAG_DF)
+
+print ("%r" % buf)
+
+assert buf == expected
-- 
2.20.1




More information about the Libguestfs mailing list