[Libguestfs] [libnbd PATCH 8/8] states: Add DF flag support for pread

Eric Blake eblake at redhat.com
Tue Jun 18 00:07:58 UTC 2019


When structured replies are negotiated, the server may advertise
support for the DF flag (the server promises to return at most one
data/hole chunk, or to fail with NBD_EOVERFLOW if the chunk would be
too large). As both nbdkit and qemu-nbd support this flag (the former
only trivially, but the latter by not compressing holes over the
wire), it is worth exposing to clients, if only for testing purposes.

I chose to specifically reject the flag for plain nbd_[aio_]pread
(about all it can do is cause a read to fail that would otherwise
succeed) and only accept it for nbd_[aio_]pread_callback.
---
 generator/generator            | 22 ++++++++++++++++++----
 interop/structured-read.c      | 31 ++++++++++++++++++++++++-------
 lib/flags.c                    | 11 +++++++++++
 lib/nbd-protocol.h             |  1 +
 lib/protocol.c                 |  1 +
 lib/rw.c                       | 14 ++++++++++++--
 python/t/405-pread-callback.py |  6 ++++++
 7 files changed, 73 insertions(+), 13 deletions(-)

diff --git a/generator/generator b/generator/generator
index ce77f17..7a32570 100755
--- a/generator/generator
+++ b/generator/generator
@@ -1243,6 +1243,16 @@ Returns true if the server supports the zero command
 the server does not.";
   };

+  "can_df", {
+    default_call with
+    args = []; ret = RBool;
+    shortdesc = "does the server support the don't fragment flag to pread?";
+    longdesc = "\
+Returns true if the server supports structured reads with an
+ability to request a non-fragmented read (see C<nbd_pread_callback>,
+C<nbd_aio_pread_callback>).  Returns false if the server does not.";
+  };
+
   "can_multi_conn", {
     default_call with
     args = []; ret = RBool;
@@ -1306,9 +1316,10 @@ at C<offset> and ending at C<offset> + C<count> - 1.  NBD
 can only read all or nothing using this call.  The call
 returns when the data has been read fully into C<buf> or there is an
 error.  See also C<nbd_pread_callback>, if finer visibility is
-required into the server's replies.
+required into the server's replies, or if you want to use
+C<LIBNBD_CMD_FLAG_DF>.

-The C<flags> parameter must be C<0> for now (it exists for future NBD
+The C<flags> parameter should be C<0> for now (it exists for future NBD
 protocol extensions).";
   };

@@ -1372,8 +1383,10 @@ validate that the server obeyed the requirement that a read call must
 not have overlapping chunks and must not succeed without enough chunks
 to cover the entire request.

-The C<flags> parameter must be C<0> for now (it exists for future NBD
-protocol extensions).";
+The C<flags> parameter may be C<0> for no flags, or may contain
+C<LIBNBD_CMD_FLAG_DF> meaning that the server should not reply with
+more than one fragment (if that is supported - some servers cannot do
+this, see C<nbd_can_df>).";
   };

   "pwrite", {
@@ -2018,6 +2031,7 @@ let constants = [

   "CMD_FLAG_FUA",        1 lsl 0;
   "CMD_FLAG_NO_HOLE",    1 lsl 1;
+  "CMD_FLAG_DF",         1 lsl 2;
   "CMD_FLAG_REQ_ONE",    1 lsl 3;

   "READ_DATA",           1;
diff --git a/interop/structured-read.c b/interop/structured-read.c
index b740e98..0d87aa8 100644
--- a/interop/structured-read.c
+++ b/interop/structured-read.c
@@ -38,7 +38,7 @@ static const char *unixsocket;
 static char rbuf[1024];

 struct data {
-  //XXX  bool df;    /* input: true if DF flag was passed to request */
+  bool df;         /* input: true if DF flag was passed to request */
   int count;       /* input: count of expected remaining calls */
   bool fail;       /* input: true to return failure */
   bool seen_hole;  /* output: true if hole encountered */
@@ -59,15 +59,24 @@ read_cb (void *opaque, const void *bufv, size_t count, uint64_t offset,

   switch (status) {
   case LIBNBD_READ_DATA:
-    // XXX if (df...)
-    assert (buf == rbuf + 512);
-    assert (count == 512);
-    assert (offset == 2048 + 512);
-    assert (buf[0] == 1 && memcmp (buf, buf + 1, 511) == 0);
+    if (data->df) {
+      assert (buf == rbuf);
+      assert (count == 1024);
+      assert (offset == 2048);
+      assert (buf[0] == 0 && memcmp (buf, buf + 1, 511) == 0);
+      assert (buf[512] == 1 && memcmp (buf + 512, buf + 513, 511) == 0);
+    }
+    else {
+      assert (buf == rbuf + 512);
+      assert (count == 512);
+      assert (offset == 2048 + 512);
+      assert (buf[0] == 1 && memcmp (buf, buf + 1, 511) == 0);
+    }
     assert (!data->seen_data);
     data->seen_data = true;
     break;
   case LIBNBD_READ_HOLE:
+    assert (!data->df); /* Relies on qemu-nbd's behavior */
     assert (buf == rbuf);
     assert (count == 512);
     assert (offset == 2048);
@@ -133,7 +142,15 @@ main (int argc, char *argv[])
   }
   assert (data.seen_data && data.seen_hole);

-  // XXX Repeat with DF flag
+  /* Repeat with DF flag. */
+  memset (rbuf, 2, sizeof rbuf);
+  data = (struct data) { .df = true, .count = 1, };
+  if (nbd_pread_callback (nbd, rbuf, sizeof rbuf, 2048, &data, read_cb,
+                          LIBNBD_CMD_FLAG_DF) == -1) {
+    fprintf (stderr, "%s\n", nbd_get_error ());
+    exit (EXIT_FAILURE);
+  }
+  assert (data.seen_data && !data.seen_hole);

   /* Trigger a failed callback, to prove connection stays up. With
    * reads, all chunks trigger a callback even after failure, but the
diff --git a/lib/flags.c b/lib/flags.c
index 421a7d2..cdbc28f 100644
--- a/lib/flags.c
+++ b/lib/flags.c
@@ -42,6 +42,11 @@ nbd_internal_set_size_and_flags (struct nbd_handle *h,
     return -1;
   }

+  if (eflags & NBD_FLAG_SEND_DF && !h->structured_replies) {
+    debug (h, "server lacks structured replies, ignoring claim of df");
+    eflags &= ~NBD_FLAG_SEND_DF;
+  }
+
   h->exportsize = exportsize;
   h->eflags = eflags;
   return 0;
@@ -95,6 +100,12 @@ nbd_unlocked_can_zero (struct nbd_handle *h)
   return get_flag (h, NBD_FLAG_SEND_WRITE_ZEROES);
 }

+int
+nbd_unlocked_can_df (struct nbd_handle *h)
+{
+  return get_flag (h, NBD_FLAG_SEND_DF);
+}
+
 int
 nbd_unlocked_can_multi_conn (struct nbd_handle *h)
 {
diff --git a/lib/nbd-protocol.h b/lib/nbd-protocol.h
index 071971e..405af3e 100644
--- a/lib/nbd-protocol.h
+++ b/lib/nbd-protocol.h
@@ -245,6 +245,7 @@ struct nbd_structured_reply_error {
 #define NBD_ENOMEM     12
 #define NBD_EINVAL     22
 #define NBD_ENOSPC     28
+#define NBD_EOVERFLOW  75
 #define NBD_ESHUTDOWN 108

 #endif /* NBD_PROTOCOL_H */
diff --git a/lib/protocol.c b/lib/protocol.c
index d3ac0b4..6087887 100644
--- a/lib/protocol.c
+++ b/lib/protocol.c
@@ -35,6 +35,7 @@ nbd_internal_errno_of_nbd_error (uint32_t error)
   case NBD_ENOMEM: return ENOMEM;
   case NBD_EINVAL: return EINVAL;
   case NBD_ENOSPC: return ENOSPC;
+  case NBD_EOVERFLOW: return EOVERFLOW;
   case NBD_ESHUTDOWN: return ESHUTDOWN;
   default: return EINVAL;
   }
diff --git a/lib/rw.c b/lib/rw.c
index 669987e..d30575b 100644
--- a/lib/rw.c
+++ b/lib/rw.c
@@ -238,6 +238,10 @@ int64_t
 nbd_unlocked_aio_pread (struct nbd_handle *h, void *buf,
                         size_t count, uint64_t offset, uint32_t flags)
 {
+  /* We could silently accept flag DF, but it really only makes sense
+   * with callbacks, because otherwise there is no observable change
+   * except that the server may fail where it would otherwise succeed.
+   */
   if (flags != 0) {
     set_error (EINVAL, "invalid flag: %" PRIu32, flags);
     return -1;
@@ -254,12 +258,18 @@ nbd_unlocked_aio_pread_callback (struct nbd_handle *h, void *buf,
 {
   struct command_cb cb = { .opaque = opaque, .fn.read = read, };

-  if (flags != 0) {
+  if ((flags & ~LIBNBD_CMD_FLAG_DF) != 0) {
     set_error (EINVAL, "invalid flag: %" PRIu32, flags);
     return -1;
   }

-  return nbd_internal_command_common (h, 0, NBD_CMD_READ, offset, count,
+  if ((flags & LIBNBD_CMD_FLAG_DF) != 0 &&
+      nbd_unlocked_can_df (h) != 1) {
+    set_error (EINVAL, "server does not support the DF flag");
+    return -1;
+  }
+
+  return nbd_internal_command_common (h, flags, NBD_CMD_READ, offset, count,
                                       buf, &cb);
 }

diff --git a/python/t/405-pread-callback.py b/python/t/405-pread-callback.py
index 7946dac..af90765 100644
--- a/python/t/405-pread-callback.py
+++ b/python/t/405-pread-callback.py
@@ -34,3 +34,9 @@ buf = h.pread_callback (512, 0, 42, f)
 print ("%r" % buf)

 assert buf == expected
+
+buf = h.pread_callback (512, 0, 42, f, nbd.CMD_FLAG_DF)
+
+print ("%r" % buf)
+
+assert buf == expected
-- 
2.20.1




More information about the Libguestfs mailing list