[Libguestfs] [PATCH nbdkit 2/7] cache, cow: Add blk_read_multiple function

Mon Jul 26 17:28:55 UTC 2021

Currently the cache and cow filters break up large requests into many
single block-sized requests to the underlying plugin.  For some
plugins (eg. curl) this is very inefficient and causes huge
slow-downs.

For example I tested nbdkit + curl alone versus nbdkit + cache + curl
against a slow, remote VMware server.  A simple run of virt-inspector
was at least 6-7 times slower.  (It was so slow that I didn't actually
let it run to completion - I am estimating the slowdown multiple using
interim debug messages).

As part of fixing this, introduce a blk_read_multiple primitive.  At
the moment the implementation is simply a loop that calls blk_read so
this won't make any performance difference yet.  This change is just
refactoring.
---
 filters/cache/blk.h   |  6 ++++++
 filters/cow/blk.h     |  6 ++++++
 filters/cache/blk.c   | 16 ++++++++++++++++
 filters/cache/cache.c | 21 ++++++++-------------
 filters/cow/blk.c     | 20 ++++++++++++++++++--
 filters/cow/cow.c     | 21 ++++++++-------------
 6 files changed, 62 insertions(+), 28 deletions(-)

diff --git a/filters/cache/blk.h b/filters/cache/blk.h
index 87c753e2..1ee33ed7 100644
--- a/filters/cache/blk.h
+++ b/filters/cache/blk.h
@@ -55,6 +55,12 @@ extern int blk_read (nbdkit_next *next,
                      uint64_t blknum, uint8_t *block, int *err)
   __attribute__((__nonnull__ (1, 3, 4)));
 
+/* As above, but read multiple blocks. */
+extern int blk_read_multiple (nbdkit_next *next,
+                              uint64_t blknum, uint64_t nrblocks,
+                              uint8_t *block, int *err)
+  __attribute__((__nonnull__ (1, 4, 5)));
+
 /* If a single block is not cached, copy it from the plugin. */
 extern int blk_cache (nbdkit_next *next,
                       uint64_t blknum, uint8_t *block, int *err)
diff --git a/filters/cow/blk.h b/filters/cow/blk.h
index e6fd7417..b066c602 100644
--- a/filters/cow/blk.h
+++ b/filters/cow/blk.h
@@ -55,6 +55,12 @@ extern int blk_read (nbdkit_next *next,
                      uint64_t blknum, uint8_t *block, int *err)
   __attribute__((__nonnull__ (1, 3, 4)));
 
+/* Read multiple blocks from the overlay or plugin. */
+extern int blk_read_multiple (nbdkit_next *next,
+                              uint64_t blknum, uint64_t nrblocks,
+                              uint8_t *block, int *err)
+  __attribute__((__nonnull__ (1, 4, 5)));
+
 /* Cache mode for blocks not already in overlay */
 enum cache_mode {
   BLK_CACHE_IGNORE,      /* Do nothing */
diff --git a/filters/cache/blk.c b/filters/cache/blk.c
index f52f30e3..0d15411b 100644
--- a/filters/cache/blk.c
+++ b/filters/cache/blk.c
@@ -256,6 +256,22 @@ blk_read (nbdkit_next *next,
   }
 }
 
+int
+blk_read_multiple (nbdkit_next *next,
+                   uint64_t blknum, uint64_t nrblocks,
+                   uint8_t *block, int *err)
+{
+  while (nrblocks > 0) {
+    if (blk_read (next, blknum, block, err) == -1)
+      return -1;
+    blknum++;
+    nrblocks--;
+    block += blksize;
+  }
+
+  return 0;
+}
+
 int
 blk_cache (nbdkit_next *next,
            uint64_t blknum, uint8_t *block, int *err)
diff --git a/filters/cache/cache.c b/filters/cache/cache.c
index 499aec68..9c081948 100644
--- a/filters/cache/cache.c
+++ b/filters/cache/cache.c
@@ -313,7 +313,7 @@ cache_pread (nbdkit_next *next,
              uint32_t flags, int *err)
 {
   CLEANUP_FREE uint8_t *block = NULL;
-  uint64_t blknum, blkoffs;
+  uint64_t blknum, blkoffs, nrblocks;
   int r;
 
   assert (!flags);
@@ -348,22 +348,17 @@ cache_pread (nbdkit_next *next,
   }
 
   /* Aligned body */
-  /* XXX This breaks up large read requests into smaller ones, which
-   * is a problem for plugins which have a large, fixed per-request
-   * overhead (hello, curl).  We should try to keep large requests
-   * together as much as possible, but that requires us to be much
-   * smarter here.
-   */
-  while (count >= blksize) {
+  nrblocks = count / blksize;
+  if (nrblocks > 0) {
     ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&lock);
-    r = blk_read (next, blknum, buf, err);
+    r = blk_read_multiple (next, blknum, nrblocks, buf, err);
     if (r == -1)
       return -1;
 
-    buf += blksize;
-    count -= blksize;
-    offset += blksize;
-    blknum++;
+    buf += nrblocks * blksize;
+    count -= nrblocks * blksize;
+    offset += nrblocks * blksize;
+    blknum += nrblocks;
   }
 
   /* Unaligned tail */
diff --git a/filters/cow/blk.c b/filters/cow/blk.c
index 0f12d510..9d6e3e67 100644
--- a/filters/cow/blk.c
+++ b/filters/cow/blk.c
@@ -223,8 +223,8 @@ blk_status (uint64_t blknum, bool *present, bool *trimmed)
   *trimmed = state == BLOCK_TRIMMED;
 }
 
-/* These are the block operations.  They always read or write a single
- * whole block of size ‘blksize’.
+/* These are the block operations.  They always read or write whole
+ * blocks of size ‘blksize’.
  */
 int
 blk_read (nbdkit_next *next,
@@ -280,6 +280,22 @@ blk_read (nbdkit_next *next,
   }
 }
 
+int
+blk_read_multiple (nbdkit_next *next,
+                   uint64_t blknum, uint64_t nrblocks,
+                   uint8_t *block, int *err)
+{
+  while (nrblocks > 0) {
+    if (blk_read (next, blknum, block, err) == -1)
+      return -1;
+    blknum++;
+    nrblocks--;
+    block += BLKSIZE;
+  }
+
+  return 0;
+}
+
 int
 blk_cache (nbdkit_next *next,
            uint64_t blknum, uint8_t *block, enum cache_mode mode, int *err)
diff --git a/filters/cow/cow.c b/filters/cow/cow.c
index 3bd09399..f74c0a34 100644
--- a/filters/cow/cow.c
+++ b/filters/cow/cow.c
@@ -210,7 +210,7 @@ cow_pread (nbdkit_next *next,
            uint32_t flags, int *err)
 {
   CLEANUP_FREE uint8_t *block = NULL;
-  uint64_t blknum, blkoffs;
+  uint64_t blknum, blkoffs, nrblocks;
   int r;
 
   if (!IS_ALIGNED (count | offset, BLKSIZE)) {
@@ -243,21 +243,16 @@ cow_pread (nbdkit_next *next,
   }
 
   /* Aligned body */
-  /* XXX This breaks up large read requests into smaller ones, which
-   * is a problem for plugins which have a large, fixed per-request
-   * overhead (hello, curl).  We should try to keep large requests
-   * together as much as possible, but that requires us to be much
-   * smarter here.
-   */
-  while (count >= BLKSIZE) {
-    r = blk_read (next, blknum, buf, err);
+  nrblocks = count / BLKSIZE;
+  if (nrblocks > 0) {
+    r = blk_read_multiple (next, blknum, nrblocks, buf, err);
     if (r == -1)
       return -1;
 
-    buf += BLKSIZE;
-    count -= BLKSIZE;
-    offset += BLKSIZE;
-    blknum++;
+    buf += nrblocks * BLKSIZE;
+    count -= nrblocks * BLKSIZE;
+    offset += nrblocks * BLKSIZE;
+    blknum += nrblocks;
   }
 
   /* Unaligned tail */
-- 
2.32.0