[Libguestfs] [nbdkit PATCH 5/5] ext2: Support parallel requests and connections

Fri May 7 01:59:40 UTC 2021

Instead of serializing connections (which in turn serializes requests)
because it is unsafe to open more than one handle into the filesystem
at the same time, use the recently-added APIs to open the filesystem
during .after_fork, then use the fact that ext2 code then supports
parallel access to files within the single system handle.
---
 filters/ext2/ext2.c | 225 ++++++++++++++++++++++++++------------------
 1 file changed, 131 insertions(+), 94 deletions(-)

diff --git a/filters/ext2/ext2.c b/filters/ext2/ext2.c
index cc80458f..5b047246 100644
--- a/filters/ext2/ext2.c
+++ b/filters/ext2/ext2.c
@@ -36,6 +36,7 @@
 #include <stdlib.h>
 #include <inttypes.h>
 #include <errno.h>
+#include <stdbool.h>

 /* Inlining is broken in the ext2fs header file.  Disable it by
  * defining the following:
@@ -55,6 +56,14 @@
  */
 static const char *file;

+/* Filesystem handle, shared between all client connections. */
+static ext2_filsys fs;
+
+/* Plugin access shared between all client connections, and doubles as
+ * the "name" parameter for ext2fs_open.
+ */
+static nbdkit_next *plugin;
+
 static void
 ext2_load (void)
 {
@@ -89,7 +98,8 @@ ext2_config_complete (nbdkit_next_config_complete *next, nbdkit_backend *nxdata)
   if (strcmp (file, "exportname") == 0)
     file = NULL;
   else if (file[0] != '/') {
-    nbdkit_error ("the file parameter must refer to an absolute path");
+    nbdkit_error ("the file parameter must be 'exportname' or refer to "
+                  "an absolute path");
     return -1;
   }

@@ -100,13 +110,89 @@ ext2_config_complete (nbdkit_next_config_complete *next, nbdkit_backend *nxdata)
   "ext2file=<FILENAME>  (required) Absolute name of file to serve inside the\n" \
   "                     disk image, or 'exportname' for client choice."

+/* Opening more than one instance of the filesystem in parallel is a
+ * recipe for disaster, so instead we open a single instance during
+ * after_fork to share among all client connections.  If the
+ * underlying plugin supports parallel requests, then we do too since
+ * ext2 code is re-entrant through our one open handle.
+ */
+static int
+ext2_after_fork (nbdkit_backend *nxdata)
+{
+  CLEANUP_FREE char *name = NULL;
+  int fs_flags;
+  int64_t r;
+  errcode_t err;
+
+  /* It would be desirable for ‘nbdkit -r’ to behave the same way as
+   * ‘mount -o ro’.  But we don't know the state of the readonly flag
+   * until ext2_open is called, so for now we can't do that.  We could
+   * add a knob during .config if desired; but until then, we blindly
+   * request write access to the underlying plugin, for journal
+   * replay.
+   *
+   * Similarly, there is no sane way to pass the client's exportname
+   * through to the plugin (whether or not .config was set to serve a
+   * single file or to let the client choose by exportname), so
+   * blindly ask for "" and rely on the plugin's default.
+   */
+  plugin = nbdkit_next_context_open (nxdata, 0, "", true);
+  if (plugin == NULL) {
+    nbdkit_error ("unable to open plugin");
+    return -1;
+  }
+  if (plugin->prepare (plugin) == -1)
+    goto fail;
+
+  fs_flags = 0;
+#ifdef EXT2_FLAG_64BITS
+  fs_flags |= EXT2_FLAG_64BITS;
+#endif
+  r = plugin->get_size (plugin);
+  if (r == -1)
+    goto fail;
+  /* XXX See note above about a knob for read-only */
+  r = plugin->can_write (plugin);
+  if (r == -1)
+    goto fail;
+  if (r == 1)
+    fs_flags |= EXT2_FLAG_RW;
+
+  name = nbdkit_io_encode (plugin);
+  if (!name) {
+    nbdkit_error ("nbdkit_io_encode: %m");
+    goto fail;
+  }
+
+  err = ext2fs_open (name, fs_flags, 0, 0, nbdkit_io_manager, &fs);
+  if (err != 0) {
+    nbdkit_error ("open: %s", error_message (err));
+    goto fail;
+  }
+
+  return 0;
+
+ fail:
+  plugin->finalize (plugin);
+  nbdkit_next_context_close (plugin);
+  return -1;
+}
+
+static void
+ext2_cleanup (nbdkit_backend *nxdata)
+{
+  if (fs)
+    ext2fs_close (fs);
+  plugin->finalize (plugin);
+  nbdkit_next_context_close (plugin);
+}
+
 /* The per-connection handle. */
 struct handle {
   const char *exportname;       /* Client export name. */
-  ext2_filsys fs;               /* Filesystem handle. */
   ext2_ino_t ino;               /* Inode of open file. */
   ext2_file_t file;             /* File handle. */
-  nbdkit_next *next;            /* "name" parameter to ext2fs_open. */
+  nbdkit_context *context;
 };

 /* Export list. */
@@ -117,18 +203,17 @@ ext2_list_exports (nbdkit_next_list_exports *next, nbdkit_backend *nxdata,
   /* If we are honoring export names, the default export "" won't
    * work, and we must not leak export names from the underlying
    * plugin.  Advertising all filenames within the ext2 image could be
-   * huge, and even if we wanted to, it would require that we could
-   * open the plugin prior to the client reaching our .open.  So leave
-   * the list empty instead.
+   * huge, although we could do it if we wanted to since the
+   * filesystem was already opened in .after_fork.
    */
   if (!file)
     return 0;

   /* If we are serving a specific ext2file, we don't care what export
-   * name the user passes, but the underlying plugin might; there's no
-   * harm in advertising that list.
+   * name the user passes, but it's too late to pass that on to the
+   * underlying plugin, so advertise just "".
    */
-  return next (nxdata, readonly, exports);
+  return nbdkit_use_default_export (exports);
 }

 /* Default export. */
@@ -170,17 +255,7 @@ ext2_open (nbdkit_next_open *next, nbdkit_context *nxdata,
     return NULL;
   }

-  /* If file == NULL (ie. using exportname) then don't
-   * pass the client exportname to the lower layers.
-   */
-  exportname = file ? exportname : "";
-
-  /* Request write access to the underlying plugin, for journal replay. */
-  if (next (nxdata, 0, exportname) == -1) {
-    free (h);
-    return NULL;
-  }
-
+  h->context = nxdata;
   return h;
 }

@@ -189,36 +264,12 @@ ext2_prepare (nbdkit_next *next, void *handle, int readonly)
 {
   struct handle *h = handle;
   errcode_t err;
-  int fs_flags;
   int file_flags;
   struct ext2_inode inode;
-  int64_t r;
   CLEANUP_FREE char *name = NULL;
   const char *fname = file ?: h->exportname;
   CLEANUP_FREE char *absname = NULL;
-
-  fs_flags = 0;
-#ifdef EXT2_FLAG_64BITS
-  fs_flags |= EXT2_FLAG_64BITS;
-#endif
-  r = next->get_size (next);
-  if (r == -1)
-    return -1;
-  r = next->can_write (next);
-  if (r == -1)
-    return -1;
-  if (r == 0)
-    readonly = 1;
-
-  if (!readonly)
-    fs_flags |= EXT2_FLAG_RW;
-
-  h->next = next;
-  name = nbdkit_io_encode (next);
-  if (!name) {
-    nbdkit_error ("nbdkit_io_encode: %m");
-    return -1;
-  }
+  nbdkit_next *old;

   if (fname[0] != '/') {
     if (asprintf (&absname, "/%s", fname) < 0) {
@@ -228,53 +279,58 @@ ext2_prepare (nbdkit_next *next, void *handle, int readonly)
     fname = absname;
   }

-  err = ext2fs_open (name, fs_flags, 0, 0, nbdkit_io_manager, &h->fs);
-  if (err != 0) {
-    nbdkit_error ("open: %s", error_message (err));
-    goto err0;
-  }
-
   if (strcmp (fname, "/") == 0)
     /* probably gonna fail, but we'll catch it later */
     h->ino = EXT2_ROOT_INO;
   else {
-    err = ext2fs_namei (h->fs, EXT2_ROOT_INO, EXT2_ROOT_INO,
+    err = ext2fs_namei (fs, EXT2_ROOT_INO, EXT2_ROOT_INO,
                         &fname[1], &h->ino);
     if (err != 0) {
       nbdkit_error ("%s: namei: %s", fname, error_message (err));
-      goto err1;
+      return -1;
     }
   }

   /* Check that fname is a regular file.
    * XXX This won't follow symlinks, we'd have to do that manually.
    */
-  err = ext2fs_read_inode (h->fs, h->ino, &inode);
+  err = ext2fs_read_inode (fs, h->ino, &inode);
   if (err != 0) {
     nbdkit_error ("%s: inode: %s", fname, error_message (err));
-    goto err1;
+    return -1;
   }
   if (!LINUX_S_ISREG (inode.i_mode)) {
     nbdkit_error ("%s: must be a regular file in the disk image", fname);
-    goto err1;
+    return -1;
   }

   file_flags = 0;
   if (!readonly)
     file_flags |= EXT2_FILE_WRITE;
-  err = ext2fs_file_open2 (h->fs, h->ino, NULL, file_flags, &h->file);
+  err = ext2fs_file_open2 (fs, h->ino, NULL, file_flags, &h->file);
   if (err != 0) {
     nbdkit_error ("%s: open: %s", fname, error_message (err));
-    goto err1;
+    return -1;
   }

+  /* Associate our shared backend with this connection, so we don't
+   * have to override every single callback function.
+   */
+  old = nbdkit_context_set_next (h->context, plugin);
+  assert (old == NULL);
   return 0;
+}

- err1:
-  ext2fs_close (h->fs);
-  h->fs = NULL;
- err0:
-  return -1;
+static int
+ext2_finalize (nbdkit_next *next, void *handle)
+{
+  struct handle *h = handle;
+  nbdkit_next *old;
+
+  /* Ensure our shared plugin handle survives past this connection. */
+  old = nbdkit_context_set_next (h->context, NULL);
+  assert (old == next);
+  return 0;
 }

 /* Free up the per-connection handle. */
@@ -283,10 +339,8 @@ ext2_close (void *handle)
 {
   struct handle *h = handle;

-  if (h->fs) {
+  if (h->file)
     ext2fs_file_close (h->file);
-    ext2fs_close (h->fs);
-  }
   free (h);
 }

@@ -306,12 +360,10 @@ ext2_can_cache (nbdkit_next *next, void *handle)
 static int
 ext2_can_multi_conn (nbdkit_next *next, void *handle)
 {
-  /* Since we do not permit parallel connections, it does not matter
-   * what we advertise here, and we could just as easily inherit the
-   * plugin's .can_multi_conn.  But realistically, if we adjust
-   * .thread_model, we cannot advertise support unless .flush is
-   * consistent, and that would require inspecting the ext2 source
-   * code, so for now, we hard-code a safe answer.
+  /* Although we permit parallel client connections multiplexed into
+   * the single shared filesystem handle, we would have to audit the
+   * ext2 source to learn if it is cache-consistent.  So for now,
+   * hard-code a safe answer.
    */
   return 0;
 }
@@ -324,7 +376,7 @@ ext2_can_flush (nbdkit_next *next, void *handle)
    * plugin ability, since ext2 wants to flush the filesystem into
    * permanent storage when possible.
    */
-  if (next->can_flush (next) == -1)
+  if (plugin->can_flush (plugin) == -1)
     return -1;
   return 1;
 }
@@ -339,7 +391,7 @@ ext2_can_zero (nbdkit_next *next, void *handle)
   /* For now, tell nbdkit to call .pwrite instead of any optimization.
    * However, we also want to cache the underlying plugin support.
    */
-  if (next->can_zero (next) == -1)
+  if (plugin->can_zero (plugin) == -1)
     return -1;
   return NBDKIT_ZERO_EMULATE;
 }
@@ -350,28 +402,11 @@ ext2_can_trim (nbdkit_next *next, void *handle)
   /* For now, tell nbdkit to never call .trim.  However, we also want
    * to cache the underlying plugin support.
    */
-  if (next->can_trim (next) == -1)
+  if (plugin->can_trim (plugin) == -1)
     return -1;
   return 0;
 }

-/* It might be possible to relax this, but it's complicated.
- *
- * It's desirable for ‘nbdkit -r’ to behave the same way as
- * ‘mount -o ro’.  But we don't know the state of the readonly flag
- * until ext2_open is called (because the NBD client can also request
- * a readonly connection).  So we could not set the "ro" flag if we
- * opened the filesystem any earlier (eg in ext2_config).
- *
- * So out of necessity we have one ext2_filsys handle per connection,
- * but if we allowed parallel work on those handles then we would get
- * data corruption, so we need to serialize connections.
- */
-static int ext2_thread_model (void)
-{
-  return NBDKIT_THREAD_MODEL_SERIALIZE_CONNECTIONS;
-}
-
 /* Description. */
 static const char *
 ext2_export_description (nbdkit_next *next, void *handle)
@@ -379,7 +414,7 @@ ext2_export_description (nbdkit_next *next, void *handle)
   struct handle *h = handle;
   const char *fname = file ?: h->exportname;
   const char *slash = fname[0] == '/' ? "" : "/";
-  const char *base = next->export_description (next);
+  const char *base = plugin->export_description (plugin);

   if (!base)
     return NULL;
@@ -506,11 +541,13 @@ static struct nbdkit_filter filter = {
   .config             = ext2_config,
   .config_complete    = ext2_config_complete,
   .config_help        = ext2_config_help,
-  .thread_model       = ext2_thread_model,
+  .after_fork         = ext2_after_fork,
+  .cleanup            = ext2_cleanup,
   .list_exports       = ext2_list_exports,
   .default_export     = ext2_default_export,
   .open               = ext2_open,
   .prepare            = ext2_prepare,
+  .finalize           = ext2_finalize,
   .close              = ext2_close,
   .can_fua            = ext2_can_fua,
   .can_cache          = ext2_can_cache,
-- 
2.31.1