[Libguestfs] [libnbd PATCH 2/2] api: Recover from block status callback failure

Eric Blake eblake at redhat.com
Tue Jun 4 17:12:09 UTC 2019


If the user's extent_fn callback fails, we want to fail the overall
block status command with the same errno, but not give up on the
server.  Do this by skipping any further context chunks from the
server, and relying on the previous patch's ability to preserve the
first error encountered.

Update the interop test with qemu's dirty bitmap context to provoke
callback failure, as well as prove that the connection is still valid.

I did not go as far as documenting EPROTO as our default errno if the
callback returns -1 with errno unchanged.
---
 generator/generator                 |  5 ++-
 generator/states-reply-structured.c | 57 +++++++++++++------------
 interop/dirty-bitmap.c              | 65 +++++++++++++++++++++++------
 3 files changed, 86 insertions(+), 41 deletions(-)

diff --git a/generator/generator b/generator/generator
index 91545ce..1261d54 100755
--- a/generator/generator
+++ b/generator/generator
@@ -1414,7 +1414,10 @@ supported by the server (see C<nbd_can_meta_context>) this call
 returns information about extents by calling back to the
 extent function.  The callback cannot call C<nbd_*> APIs on the
 same handle since it holds the handle lock and will
-cause a deadlock.
+cause a deadlock.  If the callback returns C<-1>, any remaining
+contexts will be ignored and the overall block status command
+will fail with the same value of C<errno> left after the
+failing callback.

 The extent function is called once per type of metadata
 available.  The C<metacontext> parameter is a string
diff --git a/generator/states-reply-structured.c b/generator/states-reply-structured.c
index c7ba892..5791360 100644
--- a/generator/states-reply-structured.c
+++ b/generator/states-reply-structured.c
@@ -357,34 +357,37 @@
     assert (h->bs_entries);
     assert (length >= 12);

-    /* Need to byte-swap the entries returned, but apart from that we
-     * don't validate them.
-     */
-    for (i = 0; i < length/4; ++i)
-      h->bs_entries[i] = be32toh (h->bs_entries[i]);
-
-    /* Look up the context ID. */
-    context_id = h->bs_entries[0];
-    for (meta_context = h->meta_contexts;
-         meta_context;
-         meta_context = meta_context->next)
-      if (context_id == meta_context->context_id)
-        break;
-
-    if (meta_context) {
-      /* Call the caller's extent function. */
-      if (cmd->extent_fn (cmd->data, meta_context->name, cmd->offset,
-                          &h->bs_entries[1], (length-4) / 4) == -1) {
-        SET_NEXT_STATE (%.DEAD); /* XXX We should be able to recover. */
-        if (errno == 0) errno = EPROTO;
-        set_error (errno, "extent function failed");
-        return -1;
-      }
-    }
-    else
+    if (cmd->error)
       /* Emit a debug message, but ignore it. */
-      debug (h, "server sent unexpected meta context ID %" PRIu32,
-             context_id);
+      debug (h, "ignoring meta context ID %" PRIu32 " after callback failure",
+             be32toh (h->bs_entries[0]));
+    else {
+      /* Need to byte-swap the entries returned, but apart from that we
+       * don't validate them.
+       */
+      for (i = 0; i < length/4; ++i)
+        h->bs_entries[i] = be32toh (h->bs_entries[i]);
+
+      /* Look up the context ID. */
+      context_id = h->bs_entries[0];
+      for (meta_context = h->meta_contexts;
+           meta_context;
+           meta_context = meta_context->next)
+        if (context_id == meta_context->context_id)
+          break;
+
+      if (meta_context) {
+        /* Call the caller's extent function. */
+        errno = 0;
+        if (cmd->extent_fn (cmd->data, meta_context->name, cmd->offset,
+                            &h->bs_entries[1], (length-4) / 4) == -1)
+          cmd->error = errno ? errno : EPROTO;
+      }
+      else
+        /* Emit a debug message, but ignore it. */
+        debug (h, "server sent unexpected meta context ID %" PRIu32,
+               context_id);
+    }

     if (flags & NBD_REPLY_FLAG_DONE)
       SET_NEXT_STATE (%^FINISH_COMMAND);
diff --git a/interop/dirty-bitmap.c b/interop/dirty-bitmap.c
index 8d34173..59be0c8 100644
--- a/interop/dirty-bitmap.c
+++ b/interop/dirty-bitmap.c
@@ -23,6 +23,8 @@
 #include <string.h>
 #include <unistd.h>
 #include <assert.h>
+#include <stdbool.h>
+#include <errno.h>

 #include <libnbd.h>

@@ -30,21 +32,31 @@ static const char *unixsocket;
 static const char *bitmap;
 static const char *base_allocation = "base:allocation";

-static int calls; /* Track which contexts passed through callback */
+struct data {
+  bool req_one;    /* input: true if req_one was passed to request */
+  int count;       /* input: count of expected remaining calls */
+  bool fail;       /* input: true to return failure */
+  bool seen_base;  /* output: true if base:allocation encountered */
+  bool seen_dirty; /* output: true if qemu:dirty-bitmap encountered */
+};

 static int
-cb (void *data, const char *metacontext, uint64_t offset,
+cb (void *opaque, const char *metacontext, uint64_t offset,
     uint32_t *entries, size_t len)
 {
-  /* Hack: data is non-NULL if request included REQ_ONE flag */
+  struct data *data = opaque;
+
   assert (offset == 0);
+  assert (data->count-- > 0);
+
   if (strcmp (metacontext, base_allocation) == 0) {
-    calls += 0x1;
-    assert (len == (data ? 2 : 8));
+    assert (!data->seen_base);
+    data->seen_base = true;
+    assert (len == (data->req_one ? 2 : 8));

     /* Data block offset 0 size 128k */
     assert (entries[0] == 131072); assert (entries[1] == 0);
-    if (!data) {
+    if (!data->req_one) {
       /* hole|zero offset 128k size 384k */
       assert (entries[2] == 393216); assert (entries[3] == 3);
       /* allocated zero offset 512k size 64k */
@@ -54,11 +66,12 @@ cb (void *data, const char *metacontext, uint64_t offset,
     }
   }
   else if (strcmp (metacontext, bitmap) == 0) {
-    calls += 0x10;
-    assert (len == (data ? 2 : 10));
+    assert (!data->seen_dirty);
+    data->seen_dirty = true;
+    assert (len == (data->req_one ? 2 : 10));

     assert (entries[0] ==  65536); assert (entries[1] == 0);
-    if (!data) {
+    if (!data->req_one) {
       /* dirty block offset 64K size 64K */
       assert (entries[2] ==  65536); assert (entries[3] == 1);
       assert (entries[4] == 393216); assert (entries[5] == 0);
@@ -72,6 +85,10 @@ cb (void *data, const char *metacontext, uint64_t offset,
     exit (EXIT_FAILURE);
   }

+  if (data->fail) {
+    errno = EPROTO; /* Something NBD servers can't send */
+    return -1;
+  }
   return 0;
 }

@@ -80,6 +97,8 @@ main (int argc, char *argv[])
 {
   struct nbd_handle *nbd;
   int64_t exportsize;
+  struct data data;
+  char c;

   if (argc != 3) {
     fprintf (stderr, "%s unixsocket bitmap\n", argv[0]);
@@ -108,17 +127,37 @@ main (int argc, char *argv[])
     exit (EXIT_FAILURE);
   }

-  if (nbd_block_status (nbd, exportsize, 0, NULL, cb, 0) == -1) {
+  data = (struct data) { .count = 2, };
+  if (nbd_block_status (nbd, exportsize, 0, &data, cb, 0) == -1) {
     fprintf (stderr, "%s\n", nbd_get_error ());
     exit (EXIT_FAILURE);
   }
-  assert (calls == 0x11);
-  if (nbd_block_status (nbd, exportsize, 0, &exportsize, cb,
+  assert (data.seen_base && data.seen_dirty);
+
+  data = (struct data) { .req_one = true, .count = 2, };
+  if (nbd_block_status (nbd, exportsize, 0, &data, cb,
                         LIBNBD_CMD_FLAG_REQ_ONE) == -1) {
     fprintf (stderr, "%s\n", nbd_get_error ());
     exit (EXIT_FAILURE);
   }
-  assert (calls == 0x22);
+  assert (data.seen_base && data.seen_dirty);
+
+  /* Trigger a failed callback, to prove connection stays up. No way
+   * to know which context will respond first, but we can check that
+   * the other one did not get visited.
+   */
+  data = (struct data) { .count = 1, .fail = true, };
+  if (nbd_block_status (nbd, exportsize, 0, &data, cb, 0) != -1) {
+    fprintf (stderr, "unexpected block status success\n");
+    exit (EXIT_FAILURE);
+  }
+  assert (nbd_get_errno () == EPROTO && nbd_aio_is_ready (nbd));
+  assert (data.seen_base ^ data.seen_dirty);
+
+  if (nbd_pread (nbd, &c, 1, 0, 0) == -1) {
+    fprintf (stderr, "%s\n", nbd_get_error ());
+    exit (EXIT_FAILURE);
+  }

   if (nbd_shutdown (nbd) == -1) {
     fprintf (stderr, "%s\n", nbd_get_error ());
-- 
2.20.1




More information about the Libguestfs mailing list