[Libguestfs] [nbdkit PATCH v3 16/16] multi-conn: Knob to limit consistency emulation by export name

Fri Mar 5 23:31:20 UTC 2021

We can't enable the knob by default since many nbdkit plugins don't
care about export name, but it is certainly more efficient to limit
consistency to just clients visiting the same export.  Thankfully,
NBD_CMD_FLUSH does not take size and offset, and therefore is safe
(although wasting CPU cycles) to use even when a different export has
a different size.

Done by adding another layer of indirection: all connections are
combined into a group (a singleton when the knob is off, or by name
when the knob is on), moving fast dirty tracking into that group.
---
 .../multi-conn/nbdkit-multi-conn-filter.pod   |  27 +++-
 tests/Makefile.am                             |   2 +
 filters/multi-conn/multi-conn.c               | 118 +++++++++++++++---
 tests/test-multi-conn-name.sh                 |  88 +++++++++++++
 tests/test-multi-conn-plugin.sh               |  63 ++++++----
 5 files changed, 255 insertions(+), 43 deletions(-)
 create mode 100755 tests/test-multi-conn-name.sh

diff --git a/filters/multi-conn/nbdkit-multi-conn-filter.pod b/filters/multi-conn/nbdkit-multi-conn-filter.pod
index c8f334b3..efeba570 100644
--- a/filters/multi-conn/nbdkit-multi-conn-filter.pod
+++ b/filters/multi-conn/nbdkit-multi-conn-filter.pod
@@ -5,7 +5,7 @@ nbdkit-multi-conn-filter - nbdkit multi-conn filter
 =head1 SYNOPSIS

  nbdkit --filter=multi-conn plugin [multi-conn-mode=MODE] \
-   [multi-conn-track-dirty=LEVEL] [plugin-args...]
+   [multi-conn-track-dirty=LEVEL] [multi-conn-exportname=BOOL] [plugin-args...]

 =head1 DESCRIPTION

@@ -26,7 +26,8 @@ it also has additional modes useful for evaluating performance and
 correctness of client and plugin multi-conn behaviors.  This filter
 assumes that multiple connections to a plugin will eventually share
 data, other than any caching effects; it is not suitable for use with
-a plugin that produces completely independent data per connection.
+a plugin that produces completely independent data per connection from
+the same export name.

 Additional control over the behavior of client flush commands is
 possible by combining this filter with L<nbdkit-fua-filter(1)>.  Note
@@ -125,6 +126,27 @@ B<multi-conn-mode=emulate>, a client which disregards
 NBD_FLAG_MULTI_CONN by flushing on each connection itself results in a
 quadratic number of flush operations on the plugin.

+=item B<multi-conn-exportname=false>
+
+The exportname switch defaults to false for safety, and causes the
+filter to flush across all active connections regardless of the export
+name in use by that connection when doing emulation.  However, when a
+plugin supports distinct data according to export name, this behavior
+will penalize the performance of clients visiting an unrelated export
+by spending time on replicated flush operations not actually relevant
+to that export.
+
+=item B<multi-conn-exportname=true>
+
+Setting the exportname switch to true causes the filter to only
+synchronize flushes to connections visiting the same export name.
+This avoids penalizing clients visiting an unrelated export name (such
+as L<nbdkit-file-plugin(1)> in B<dir=> mode), but is unsafe when used
+with a plugin that serves shared content across all connections
+regardless of the export name requested by the client, if that plugin
+is not already multi-conn consistent (such as
+L<nbdkit-vddk-plugin(1)>).
+
 =back

 =head1 EXAMPLES
@@ -161,6 +183,7 @@ C<nbdkit-multi-conn-filter> first appeared in nbdkit 1.26.

 L<nbdkit(1)>,
 L<nbdkit-file-plugin(1)>,
+L<nbdkit-vddk-plugin(1)>,
 L<nbdkit-filter(3)>,
 L<nbdkit-cache-filter(1)>,
 L<nbdkit-fua-filter(1)>,
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 22cafa21..631d5014 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -1547,10 +1547,12 @@ EXTRA_DIST += \
 # multi-conn filter test.
 TESTS += \
 	test-multi-conn.sh \
+	test-multi-conn-name.sh \
 	$(NULL)
 EXTRA_DIST += \
 	test-multi-conn-plugin.sh \
 	test-multi-conn.sh \
+	test-multi-conn-name.sh \
 	$(NULL)

 # nofilter test.
diff --git a/filters/multi-conn/multi-conn.c b/filters/multi-conn/multi-conn.c
index ed17f7c6..1b1c9d8c 100644
--- a/filters/multi-conn/multi-conn.c
+++ b/filters/multi-conn/multi-conn.c
@@ -60,6 +60,8 @@ static enum TrackDirtyMode {
   OFF,
 } track;

+static bool byname = false;
+
 enum dirty {
   WRITE = 1, /* A write may have populated a cache */
   READ = 2, /* A read may have populated a cache */
@@ -73,12 +75,21 @@ struct handle {
   nbdkit_next *next;
   enum MultiConnMode mode; /* Runtime resolution of mode==AUTO */
   enum dirty dirty; /* What aspects of this connection are dirty */
+  char *name; /* Used when byname==true to assign group */
+  struct group *group; /* All connections grouped with this one */
 };
 DEFINE_VECTOR_TYPE(conns_vector, struct handle *);
-static conns_vector conns = empty_vector;
-static bool dirty; /* True if any connection is dirty */
+struct group {
+  conns_vector conns;
+  char *name;
+  bool dirty; /* True if any connection in group is dirty */
+};
+DEFINE_VECTOR_TYPE(group_vector, struct group *);
+static group_vector groups = empty_vector;

-/* Accept 'multi-conn-mode=mode' and 'multi-conn-track-dirty=level' */
+/* Accept 'multi-conn-mode=mode', 'multi-conn-track-dirty=level', and
+ * 'multi-conn-exportname=bool'.
+ */
 static int
 multi_conn_config (nbdkit_next_config *next, nbdkit_backend *nxdata,
                    const char *key, const char *value)
@@ -114,13 +125,24 @@ multi_conn_config (nbdkit_next_config *next, nbdkit_backend *nxdata,
     }
     return 0;
   }
+  else if (strcmp (key, "multi-conn-exportname") == 0 ||
+           strcmp (key, "multi-conn-export-name") == 0) {
+    int r;
+
+    r = nbdkit_parse_bool (value);
+    if (r == -1)
+      return -1;
+    byname = r;
+    return 0;
+  }
   return next (nxdata, key, value);
 }

 #define multi_conn_config_help \
   "multi-conn-mode=<MODE>          'auto' (default), 'emulate', 'plugin',\n" \
   "                                'disable', or 'unsafe'.\n" \
-  "multi-conn-track-dirty=<LEVEL>  'conn' (default), 'fast', or 'off'.\n"
+  "multi-conn-track-dirty=<LEVEL>  'conn' (default), 'fast', or 'off'.\n" \
+  "multi-conn-exportname=<BOOL>    true to limit emulation by export name.\n"

 static int
 multi_conn_get_ready (nbdkit_next_get_ready *next, nbdkit_backend *nxdata,
@@ -147,6 +169,13 @@ multi_conn_open (nbdkit_next_open *next, nbdkit_context *nxdata,
     nbdkit_error ("calloc: %m");
     return NULL;
   }
+  if (byname) {
+    h->name = strdup (exportname);
+    if (h->name == NULL) {
+      nbdkit_error ("strdup: %m");
+      return NULL;
+    }
+  }
   return h;
 }

@@ -154,6 +183,8 @@ static int
 multi_conn_prepare (nbdkit_next *next, void *handle, int readonly)
 {
   struct handle *h = handle;
+  struct group *g;
+  bool new_group = false;
   int r;

   h->next = next;
@@ -174,7 +205,39 @@ multi_conn_prepare (nbdkit_next *next, void *handle, int readonly)
   }

   ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&lock);
-  return conns_vector_append (&conns, h);
+  if (byname) {
+    g = NULL;
+    for (size_t i = 0; i < groups.size; i++)
+      if (strcmp (groups.ptr[i]->name, h->name) == 0) {
+        g = groups.ptr[i];
+        break;
+      }
+  }
+  else
+    g = groups.size ? groups.ptr[0] : NULL;
+
+  if (!g) {
+    g = calloc (1, sizeof *g);
+    if (g == NULL) {
+      nbdkit_error ("calloc: %m");
+      return -1;
+    }
+    if (group_vector_append (&groups, g) == -1)
+      return -1;
+    g->name = h->name;
+    h->name = NULL;
+    new_group = true;
+  }
+  if (conns_vector_append (&g->conns, h) == -1) {
+    if (new_group) {
+      group_vector_remove (&groups, groups.size - 1);
+      free (g->name);
+      free (g);
+    }
+    return -1;
+  }
+  h->group = g;
+  return 0;
 }

 static int
@@ -184,21 +247,36 @@ multi_conn_finalize (nbdkit_next *next, void *handle)

   ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&lock);
   assert (h->next == next);
+  assert (h->group);

   /* XXX should we add a config param to flush if the client forgot? */
-  for (size_t i = 0; i < conns.size; i++) {
-    if (conns.ptr[i] == h) {
-      conns_vector_remove (&conns, i);
+  for (size_t i = 0; i < h->group->conns.size; i++) {
+    if (h->group->conns.ptr[i] == h) {
+      conns_vector_remove (&h->group->conns, i);
       break;
     }
   }
+  if (h->group->conns.size == 0) {
+    for (size_t i = 0; i < groups.size; i++)
+      if (groups.ptr[i] == h->group) {
+        group_vector_remove (&groups, i);
+        free (h->group->name);
+        free (h->group);
+        break;
+      }
+  }
+  h->group = NULL;
   return 0;
 }

 static void
 multi_conn_close (void *handle)
 {
-  free (handle);
+  struct handle *h = handle;
+
+  assert (h->group == NULL);
+  free (h->name);
+  free (h);
 }

 static int
@@ -252,7 +330,7 @@ mark_dirty (struct handle *h, bool is_write)
     /* fallthrough */
   case FAST:
     if (is_write)
-      dirty = true;
+      h->group->dirty = true;
     break;
   case OFF:
     break;
@@ -369,22 +447,24 @@ multi_conn_flush (nbdkit_next *next,
   struct handle *h = handle, *h2;
   size_t i;

+  assert (h->group);
   if (h->mode == EMULATE) {
     ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&lock);
-    for (i = 0; i < conns.size; i++) {
-      h2 = conns.ptr[i];
-      if (track == OFF || (dirty && (track == FAST || h2->dirty & READ)) ||
+    for (i = 0; i < h->group->conns.size; i++) {
+      h2 = h->group->conns.ptr[i];
+      if (track == OFF || (h->group->dirty &&
+                           (track == FAST || h2->dirty & READ)) ||
           h2->dirty & WRITE) {
         if (h2->next->flush (h2->next, flags, err) == -1)
           return -1;
         h2->dirty = 0;
       }
     }
-    dirty = 0;
+    h->group->dirty = 0;
   }
   else {
     /* !EMULATE: Check if the image is clean, allowing us to skip a flush. */
-    if (track != OFF && !dirty)
+    if (track != OFF && !h->group->dirty)
       return 0;
     /* Perform the flush, then update dirty tracking. */
     if (next->flush (next, flags, err) == -1)
@@ -393,15 +473,15 @@ multi_conn_flush (nbdkit_next *next,
     case CONN:
       if (next->can_multi_conn (next) == 1) {
         ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&lock);
-        for (i = 0; i < conns.size; i++)
-          conns.ptr[i]->dirty = 0;
-        dirty = 0;
+        for (i = 0; i < h->group->conns.size; i++)
+          h->group->conns.ptr[i]->dirty = 0;
+        h->group->dirty = 0;
       }
       else
         h->dirty = 0;
       break;
     case FAST:
-      dirty = false;
+      h->group->dirty = false;
       break;
     case OFF:
       break;
diff --git a/tests/test-multi-conn-name.sh b/tests/test-multi-conn-name.sh
new file mode 100755
index 00000000..698d386e
--- /dev/null
+++ b/tests/test-multi-conn-name.sh
@@ -0,0 +1,88 @@
+#!/usr/bin/env bash
+# nbdkit
+# Copyright (C) 2018-2021 Red Hat Inc.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# * Neither the name of Red Hat nor the names of its contributors may be
+# used to endorse or promote products derived from this software without
+# specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+
+# Demonstrate effect of multi-conn-exportname config flag
+
+source ./functions.sh
+set -e
+set -x
+
+requires_plugin sh
+requires_nbdsh_uri
+requires nbdsh -c 'print(h.set_opt_mode)'
+requires dd iflag=count_bytes </dev/null
+
+files="test-multi-conn-name.out"
+rm -f $files
+cleanup_fn rm -f $files
+
+fail=0
+export script='
+import os
+
+uri = os.environ["uri"]
+h = {}
+for name in ["a", "b"]:
+  for conn in [1, 2]:
+    key = "%s%d" % (name, conn)
+    h[key] = nbd.NBD()
+    h[key].set_opt_mode(True)
+    h[key].connect_uri(uri)
+    h[key].set_export_name(name)
+    h[key].opt_go()
+h["a1"].pread(1, 0)
+h["a2"].pwrite(b"A", 0)
+h["b1"].pread(1, 0)
+h["b2"].pwrite(b"B", 0, nbd.CMD_FLAG_FUA)
+print(h["a1"].pread(1, 0))
+print(h["b1"].pread(1, 0))
+'
+
+# Without the knob we flush all exports
+nbdkit -vf -U - sh test-multi-conn-plugin.sh --filter=multi-conn \
+  --run 'export uri; nbdsh -c "$script"' > test-multi-conn-name.out || fail=1
+diff -u <(cat <<\EOF
+b'A'
+b'B'
+EOF
+         ) test-multi-conn-name.out || fail=1
+# But with the knob, our flush is specific to the correct export
+nbdkit -vf -U - sh test-multi-conn-plugin.sh --filter=multi-conn \
+  multi-conn-exportname=true \
+  --run 'export uri; nbdsh -c "$script"' > test-multi-conn-name.out || fail=1
+diff -u <(cat <<\EOF
+b'I'
+b'B'
+EOF
+         ) test-multi-conn-name.out || fail=1
+
+exit $fail
diff --git a/tests/test-multi-conn-plugin.sh b/tests/test-multi-conn-plugin.sh
index 7262a348..5d627a07 100755
--- a/tests/test-multi-conn-plugin.sh
+++ b/tests/test-multi-conn-plugin.sh
@@ -39,30 +39,36 @@
 # it ever sends overlapping writes without coordinating flushes and still
 # expects any particular write to occur last).

+get_export() {
+    case $1 in
+        */*) export="$tmpdir/$(dirname $1)" conn=$(basename $1) ;;
+        *) export="$tmpdir" conn=$1 ;;
+    esac
+}
 fill_cache() {
-    if test ! -f "$tmpdir/$1"; then
-        cp "$tmpdir/0" "$tmpdir/$1"
+    if test ! -f "$export/$conn"; then
+        cp "$export/0" "$export/$conn"
     fi
 }
 do_fua() {
-    case ,$4, in
+    case ,$3, in
         *,fua,*)
             if test -f "$tmpdir/strictfua"; then
-                dd of="$tmpdir/0" if="$tmpdir/$1" skip=$3 seek=$3 count=$2 \
+                dd of="$export/0" if="$export/$conn" skip=$2 seek=$2 count=$1 \
                   conv=notrunc iflag=count_bytes,skip_bytes oflag=seek_bytes
             else
-                do_flush $1
+                do_flush
             fi ;;
     esac
 }
 do_flush() {
-    if test -f "$tmpdir/$1-replay"; then
+    if test -f "$export/$conn-replay"; then
         while read cnt off; do
-            dd of="$tmpdir/0" if="$tmpdir/$1" skip=$off seek=$off count=$cnt \
+            dd of="$export/0" if="$export/$conn" skip=$off seek=$off count=$cnt \
                conv=notrunc iflag=count_bytes,skip_bytes oflag=seek_bytes
-        done < "$tmpdir/$1-replay"
+        done < "$export/$conn-replay"
     fi
-    rm -f "$tmpdir/$1" "$tmpdir/$1-replay"
+    rm -f "$export/$conn" "$export/$conn-replay"
 }
 case "$1" in
     config)
@@ -91,30 +97,43 @@ case "$1" in
         ;;
     open)
         read i < "$tmpdir/counter"
-        echo $((i+1)) | tee "$tmpdir/counter"
+        i=$((i+1))
+        echo $i > "$tmpdir/counter"
+        if test -z "$3"; then
+            echo $i
+        else
+            mkdir -p "$tmpdir/$3" || exit 1
+            cp "$tmpdir/0" "$tmpdir/$3/0"
+            echo "$3/$i"
+        fi
         ;;
     pread)
-        fill_cache $2
-        dd if="$tmpdir/$2" skip=$4 count=$3 iflag=count_bytes,skip_bytes
+        get_export $2
+        fill_cache
+        dd if="$export/$conn" skip=$4 count=$3 iflag=count_bytes,skip_bytes
         ;;
     cache)
-        fill_cache $2
+        get_export $2
+        fill_cache
         ;;
     pwrite)
-        fill_cache $2
-        dd of="$tmpdir/$2" seek=$4 conv=notrunc oflag=seek_bytes
-        echo $3 $4 >> "$tmpdir/$2-replay"
-        do_fua $2 $3 $4 $5
+        get_export $2
+        fill_cache
+        dd of="$export/$conn" seek=$4 conv=notrunc oflag=seek_bytes
+        echo $3 $4 >> "$export/$conn-replay"
+        do_fua $3 $4 $5
         ;;
     zero | trim)
-        fill_cache $2
-        dd of="$tmpdir/$2" if="/dev/zero" count=$3 seek=$4 conv=notrunc\
+        get_export $2
+        fill_cache
+        dd of="$export/$conn" if="/dev/zero" count=$3 seek=$4 conv=notrunc\
            oflag=seek_bytes iflag=count_bytes
-        echo $3 $4 >> "$tmpdir/$2-replay"
-        do_fua $2 $3 $4 $5
+        echo $3 $4 >> "$export/$conn-replay"
+        do_fua $3 $4 $5
         ;;
     flush)
-        do_flush $2
+        get_export $2
+        do_flush
         ;;
     *)
         exit 2
-- 
2.30.1