[Libguestfs] [nbdkit PATCH v2 15/17] sh: Enable parallel thread model, when possible

Fri Aug 2 19:26:16 UTC 2019

When we first created the sh plugin, we copied the thread model used
in most other language bindings of SERIALIZE_ALL_REQUESTS, because it
was easier to reason about and there was no way for a script to
override our choice.  However, we've since added support for scripts
to request a tighter model when needed, and we've also just fixed
remaining bugs about fds inadvertently leaked into the shell script
(at least, when pipe2 and accept4 are supported).  And the shell
itself is sufficiently expressive (even if not necessarily the most
efficient) for two scripts to come up with ways to enforce mutual
exclusion (for example, a script can use the success or failure of
'mkdir' on a coordinated filename to learn if it is first past the
gate, or set up FIFOs for interprocess communication).

Thus, it is time to promote the sh plugin to fully-parallel, when
supported by the underlying system.  (Thanks to .fork_safe, Haiku
remains serialized so as to avoid one thread fork()ing while another
may leak an fd).

The new test copies heavily from test-parallel-file.sh, and also
checks for some of the leaks plugged in recent previous patches.

Signed-off-by: Eric Blake <eblake at redhat.com>
---
 plugins/sh/nbdkit-sh-plugin.pod |  11 ++--
 plugins/sh/sh.c                 |   2 +-
 tests/Makefile.am               |   2 +
 tests/test-dump-plugin.sh       |  11 ++--
 tests/test-parallel-sh.sh       | 108 ++++++++++++++++++++++++++++++++
 5 files changed, 122 insertions(+), 12 deletions(-)
 create mode 100755 tests/test-parallel-sh.sh

diff --git a/plugins/sh/nbdkit-sh-plugin.pod b/plugins/sh/nbdkit-sh-plugin.pod
index 41b9e7c7..7c1781cd 100644
--- a/plugins/sh/nbdkit-sh-plugin.pod
+++ b/plugins/sh/nbdkit-sh-plugin.pod
@@ -189,12 +189,11 @@ one of C<"serialize_connections">, C<"serialize_all_requests">,
 C<"serialize_requests">, or C<"parallel">.

 This method is I<not> required; if omitted, then the plugin will be
-executed under the default sh thread model (currently
-C<"serialize_all_requests">, which implies this callback only makes a
-difference with an output of C<"serialize_connections">; look for
-'max_thread_model' in C<nbdkit --dump-plugin sh>).  If an error
-occurs, the script should output an error message and exit with status
-C<1>; unrecognized output is ignored.
+executed under the default sh thread model (either C<"parallel"> or
+C<"serialize_all_requests">, based on whether the platform supports
+L<pipe2(2)>; look for 'max_thread_model' in C<nbdkit --dump-plugin
+sh>).  If an error occurs, the script should output an error message
+and exit with status C<1>; unrecognized output is ignored.

 =item C<open>

diff --git a/plugins/sh/sh.c b/plugins/sh/sh.c
index 79c9a4ac..6c7954fe 100644
--- a/plugins/sh/sh.c
+++ b/plugins/sh/sh.c
@@ -264,7 +264,7 @@ sh_config_complete (void)
 }

 /* See also .fork_safe and the comments in call.c:call3() */
-#define THREAD_MODEL NBDKIT_THREAD_MODEL_SERIALIZE_ALL_REQUESTS
+#define THREAD_MODEL NBDKIT_THREAD_MODEL_PARALLEL

 static int
 sh_thread_model (void)
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 8b8a6557..3d78e7a2 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -91,6 +91,7 @@ EXTRA_DIST = \
 	test-offset-extents.sh \
 	test-parallel-file.sh \
 	test-parallel-nbd.sh \
+	test-parallel-sh.sh \
 	test-partition1.sh \
 	test-partition2.sh \
 	test-partitioning1.sh \
@@ -342,6 +343,7 @@ file-data: generate-file-data.sh
 TESTS += \
 	test-parallel-file.sh \
 	test-parallel-nbd.sh \
+	test-parallel-sh.sh \
 	$(NULL)

 # Most in-depth tests need libguestfs, since that is a convenient way to
diff --git a/tests/test-dump-plugin.sh b/tests/test-dump-plugin.sh
index 982d5062..8f02f659 100755
--- a/tests/test-dump-plugin.sh
+++ b/tests/test-dump-plugin.sh
@@ -72,8 +72,9 @@ foreach_plugin do_test
 # Test that --dump-plugin can be used to introspect a resulting dynamic
 # thread model.
 out=$({
-    # sh does not yet support parallel
-    nbdkit --dump-plugin sh
+    # Here, thread_model depends on atomic CLOEXEC support
+    nbdkit --dump-plugin sh | \
+        sed 's/^thread_model=parallel/thread_model=serialize_all_requests/'
     # Here, the script further reduces things
     nbdkit --dump-plugin sh - <<\EOF
 case $1 in
@@ -85,13 +86,13 @@ EOF
     # Here, the filter further reduces things
     nbdkit --dump-plugin --filter=noparallel sh serialize=connections
 } | grep thread_model)
-exp="max_thread_model=serialize_all_requests
+exp="max_thread_model=parallel
 thread_model=serialize_all_requests
 has_thread_model=1
-max_thread_model=serialize_all_requests
+max_thread_model=parallel
 thread_model=serialize_connections
 has_thread_model=1
-max_thread_model=serialize_all_requests
+max_thread_model=parallel
 thread_model=serialize_connections
 has_thread_model=1"
 if [ "$out" != "$exp" ]; then
diff --git a/tests/test-parallel-sh.sh b/tests/test-parallel-sh.sh
new file mode 100755
index 00000000..21bc010a
--- /dev/null
+++ b/tests/test-parallel-sh.sh
@@ -0,0 +1,108 @@
+#!/usr/bin/env bash
+# nbdkit
+# Copyright (C) 2017-2019 Red Hat Inc.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# * Neither the name of Red Hat nor the names of its contributors may be
+# used to endorse or promote products derived from this software without
+# specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+
+source ./functions.sh
+
+# Check file-data was created by Makefile and qemu-io exists.
+requires test -f file-data
+requires qemu-io --version
+
+nbdkit --dump-plugin sh | grep -q ^thread_model=parallel ||
+    { echo "nbdkit lacks support for parallel requests"; exit 77; }
+
+cleanup_fn rm -f test-parallel-sh.data test-parallel-sh.out test-parallel-sh.script
+
+# Populate file, and sanity check that qemu-io can issue parallel requests
+printf '%1024s' . > test-parallel-sh.data
+qemu-io -f raw -c "aio_write -P 1 0 512" -c "aio_write -P 2 512 512" \
+         -c aio_flush test-parallel-sh.data ||
+    { echo "'qemu-io' can't drive parallel requests"; exit 77; }
+
+# Set up the sh plugin to delay both reads and writes (for a good
+# chance that parallel requests are in flight), and with writes longer
+# than reads (to more easily detect if out-of-order completion
+# happens). This test may have spurious failures under heavy loads on
+# the test machine, where tuning the delays may help.
+
+cat > test-parallel-sh.script <<EOF
+#!/usr/bin/env bash
+f=test-parallel-sh.data
+if ! test -f \$f; then
+  echo "can't locate test-parallel-sh.data" >&2; exit 5
+fi
+case \$1 in
+  get_size) stat -L -c %s \$f || exit 1 ;;
+  pread) dd iflag=skip_bytes,count_bytes skip=\$4 count=\$3 if=\$f || exit 1 ;;
+  pwrite) dd oflag=seek_bytes conv=notrunc seek=\$4 of=\$f || exit 1 ;;
+  can_write) ;;
+  *) exit 2 ;;
+esac
+exit 0
+EOF
+chmod +x test-parallel-sh.script
+
+# With --threads=1, the write should complete first because it was issued first
+nbdkit -v -t 1 -U - --filter=delay sh test-parallel-sh.script \
+  wdelay=2 rdelay=1 --run 'qemu-io -f raw -c "aio_write -P 2 512 512" \
+                           -c "aio_read -P 1 0 512" -c aio_flush $nbd' |
+    tee test-parallel-sh.out
+if test "$(grep '512/512' test-parallel-sh.out)" != \
+"wrote 512/512 bytes at offset 512
+read 512/512 bytes at offset 0"; then
+  exit 1
+fi
+
+# With default --threads, the faster read should complete first
+nbdkit -v -U - --filter=delay sh test-parallel-sh.script \
+  wdelay=2 rdelay=1 --run 'qemu-io -f raw -c "aio_write -P 2 512 512" \
+                           -c "aio_read -P 1 0 512" -c aio_flush $nbd' |
+    tee test-parallel-sh.out
+if test "$(grep '512/512' test-parallel-sh.out)" != \
+"read 512/512 bytes at offset 0
+wrote 512/512 bytes at offset 512"; then
+  exit 1
+fi
+
+# With --filter=noparallel, the write should complete first because it was
+# issued first. Also test that the log filter doesn't leak an fd
+nbdkit -v -U - --filter=noparallel --filter=log --filter=delay \
+  sh test-parallel-sh.script logfile=/dev/null \
+  wdelay=2 rdelay=1 --run 'qemu-io -f raw -c "aio_write -P 2 512 512" \
+                           -c "aio_read -P 1 0 512" -c aio_flush $nbd' |
+    tee test-parallel-sh.out
+if test "$(grep '512/512' test-parallel-sh.out)" != \
+"wrote 512/512 bytes at offset 512
+read 512/512 bytes at offset 0"; then
+  exit 1
+fi
+
+exit 0
-- 
2.20.1