[Libguestfs] [PATCH nbdkit 3/7] cache, cow: Implement blk_read_multiple efficiently

Richard W.M. Jones rjones at redhat.com
Mon Jul 26 17:28:56 UTC 2021


Continuing from the previous commit, reimplement blk_read_multiple so
that it does not break up "runs" of blocks which all have the same
cache state.
---
 filters/cache/blk.c | 83 ++++++++++++++++++++++++++++++---------------
 filters/cow/blk.c   | 67 ++++++++++++++++++++++--------------
 2 files changed, 96 insertions(+), 54 deletions(-)

diff --git a/filters/cache/blk.c b/filters/cache/blk.c
index 0d15411b..f85ada35 100644
--- a/filters/cache/blk.c
+++ b/filters/cache/blk.c
@@ -44,6 +44,7 @@
 #include <string.h>
 #include <unistd.h>
 #include <fcntl.h>
+#include <limits.h>
 #include <errno.h>
 
 #ifdef HAVE_SYS_STATVFS_H
@@ -193,26 +194,40 @@ blk_set_size (uint64_t new_size)
   return 0;
 }
 
-int
-blk_read (nbdkit_next *next,
-          uint64_t blknum, uint8_t *block, int *err)
+static int
+_blk_read_multiple (nbdkit_next *next,
+                    uint64_t blknum, uint64_t nrblocks,
+                    uint8_t *block, int *err)
 {
   off_t offset = blknum * blksize;
-  enum bm_entry state = bitmap_get_blk (&bm, blknum, BLOCK_NOT_CACHED);
+  bool not_cached =
+    bitmap_get_blk (&bm, blknum, BLOCK_NOT_CACHED) == BLOCK_NOT_CACHED;
+  uint64_t b, runblocks;
 
-  reclaim (fd, &bm);
+  assert (nrblocks > 0);
 
   if (cache_debug_verbose)
-    nbdkit_debug ("cache: blk_read block %" PRIu64
+    nbdkit_debug ("cache: blk_read_multiple block %" PRIu64
                   " (offset %" PRIu64 ") is %s",
                   blknum, (uint64_t) offset,
-                  state == BLOCK_NOT_CACHED ? "not cached" :
-                  state == BLOCK_CLEAN ? "clean" :
-                  state == BLOCK_DIRTY ? "dirty" :
-                  "unknown");
+                  not_cached ? "not cached" : "cached");
 
-  if (state == BLOCK_NOT_CACHED) { /* Read underlying plugin. */
-    unsigned n = blksize, tail = 0;
+  /* Find out how many of the following blocks form a "run" with the
+   * same cached/not-cached state.  We can process that many blocks in
+   * one go.
+   */
+  for (b = 1, runblocks = 1; b < nrblocks; ++b, ++runblocks) {
+    bool s =
+      bitmap_get_blk (&bm, blknum + b, BLOCK_NOT_CACHED) == BLOCK_NOT_CACHED;
+    if (not_cached != s)
+      break;
+  }
+
+  if (not_cached) {             /* Read underlying plugin. */
+    unsigned n, tail = 0;
+
+    assert (blksize * runblocks <= UINT_MAX);
+    n = blksize * runblocks;
 
     if (offset + n > size) {
       tail = offset + n - size;
@@ -228,32 +243,44 @@ blk_read (nbdkit_next *next,
      */
     memset (block + n, 0, tail);
 
-    /* If cache-on-read, copy the block to the cache. */
+    /* If cache-on-read, copy the blocks to the cache. */
     if (cache_on_read) {
       if (cache_debug_verbose)
         nbdkit_debug ("cache: cache-on-read block %" PRIu64
                       " (offset %" PRIu64 ")",
                       blknum, (uint64_t) offset);
 
-      if (pwrite (fd, block, blksize, offset) == -1) {
+      if (pwrite (fd, block, blksize * runblocks, offset) == -1) {
         *err = errno;
         nbdkit_error ("pwrite: %m");
         return -1;
       }
-      bitmap_set_blk (&bm, blknum, BLOCK_CLEAN);
-      lru_set_recently_accessed (blknum);
+      for (b = 0; b < runblocks; ++b) {
+        bitmap_set_blk (&bm, blknum + b, BLOCK_CLEAN);
+        lru_set_recently_accessed (blknum + b);
+      }
     }
-    return 0;
   }
   else {                        /* Read cache. */
-    if (pread (fd, block, blksize, offset) == -1) {
+    if (pread (fd, block, blksize * runblocks, offset) == -1) {
       *err = errno;
       nbdkit_error ("pread: %m");
       return -1;
     }
-    lru_set_recently_accessed (blknum);
-    return 0;
+    for (b = 0; b < runblocks; ++b)
+      lru_set_recently_accessed (blknum + b);
   }
+
+  /* If all done, return. */
+  if (runblocks == nrblocks)
+    return 0;
+
+  /* Recurse to read remaining blocks. */
+  return _blk_read_multiple (next,
+                             blknum + runblocks,
+                             nrblocks - runblocks,
+                             block + blksize * runblocks,
+                             err);
 }
 
 int
@@ -261,15 +288,15 @@ blk_read_multiple (nbdkit_next *next,
                    uint64_t blknum, uint64_t nrblocks,
                    uint8_t *block, int *err)
 {
-  while (nrblocks > 0) {
-    if (blk_read (next, blknum, block, err) == -1)
-      return -1;
-    blknum++;
-    nrblocks--;
-    block += blksize;
-  }
+  reclaim (fd, &bm);
+  return _blk_read_multiple (next, blknum, nrblocks, block, err);
+}
 
-  return 0;
+int
+blk_read (nbdkit_next *next,
+          uint64_t blknum, uint8_t *block, int *err)
+{
+  return blk_read_multiple (next, blknum, 1, block, err);
 }
 
 int
diff --git a/filters/cow/blk.c b/filters/cow/blk.c
index 9d6e3e67..9e6c8879 100644
--- a/filters/cow/blk.c
+++ b/filters/cow/blk.c
@@ -79,6 +79,7 @@
 #include <inttypes.h>
 #include <unistd.h>
 #include <fcntl.h>
+#include <limits.h>
 #include <errno.h>
 #include <sys/types.h>
 
@@ -227,29 +228,44 @@ blk_status (uint64_t blknum, bool *present, bool *trimmed)
  * blocks of size ‘blksize’.
  */
 int
-blk_read (nbdkit_next *next,
-          uint64_t blknum, uint8_t *block, int *err)
+blk_read_multiple (nbdkit_next *next,
+                   uint64_t blknum, uint64_t nrblocks,
+                   uint8_t *block, int *err)
 {
   off_t offset = blknum * BLKSIZE;
   enum bm_entry state;
+  uint64_t b, runblocks;
 
-  /* The state might be modified from another thread - for example
-   * another thread might write (BLOCK_NOT_ALLOCATED ->
-   * BLOCK_ALLOCATED) while we are reading from the plugin, returning
-   * the old data.  However a read issued after the write returns
-   * should always return the correct data.
+  /* Find out how many of the following blocks form a "run" with the
+   * same state.  We can process that many blocks in one go.
+   *
+   * About the locking: The state might be modified from another
+   * thread - for example another thread might write
+   * (BLOCK_NOT_ALLOCATED -> BLOCK_ALLOCATED) while we are reading
+   * from the plugin, returning the old data.  However a read issued
+   * after the write returns should always return the correct data.
    */
   {
     ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&lock);
     state = bitmap_get_blk (&bm, blknum, BLOCK_NOT_ALLOCATED);
+
+    for (b = 1, runblocks = 1; b < nrblocks; ++b, ++runblocks) {
+      enum bm_entry s = bitmap_get_blk (&bm, blknum + b, BLOCK_NOT_ALLOCATED);
+      if (state != s)
+        break;
+    }
   }
 
   if (cow_debug_verbose)
-    nbdkit_debug ("cow: blk_read block %" PRIu64 " (offset %" PRIu64 ") is %s",
+    nbdkit_debug ("cow: blk_read_multiple block %" PRIu64
+                  " (offset %" PRIu64 ") is %s",
                   blknum, (uint64_t) offset, state_to_string (state));
 
   if (state == BLOCK_NOT_ALLOCATED) { /* Read underlying plugin. */
-    unsigned n = BLKSIZE, tail = 0;
+    unsigned n, tail = 0;
+
+    assert (BLKSIZE * runblocks <= UINT_MAX);
+    n = BLKSIZE * runblocks;
 
     if (offset + n > size) {
       tail = offset + n - size;
@@ -264,36 +280,35 @@ blk_read (nbdkit_next *next,
      * zeroing the tail.
      */
     memset (block + n, 0, tail);
-    return 0;
   }
   else if (state == BLOCK_ALLOCATED) { /* Read overlay. */
-    if (pread (fd, block, BLKSIZE, offset) == -1) {
+    if (pread (fd, block, BLKSIZE * runblocks, offset) == -1) {
       *err = errno;
       nbdkit_error ("pread: %m");
       return -1;
     }
-    return 0;
   }
   else /* state == BLOCK_TRIMMED */ {
-    memset (block, 0, BLKSIZE);
+    memset (block, 0, BLKSIZE * runblocks);
+  }
+
+  /* If all done, return. */
+  if (runblocks == nrblocks)
     return 0;
-  }
+
+  /* Recurse to read remaining blocks. */
+  return blk_read_multiple (next,
+                            blknum + runblocks,
+                            nrblocks - runblocks,
+                            block + BLKSIZE * runblocks,
+                            err);
 }
 
 int
-blk_read_multiple (nbdkit_next *next,
-                   uint64_t blknum, uint64_t nrblocks,
-                   uint8_t *block, int *err)
+blk_read (nbdkit_next *next,
+          uint64_t blknum, uint8_t *block, int *err)
 {
-  while (nrblocks > 0) {
-    if (blk_read (next, blknum, block, err) == -1)
-      return -1;
-    blknum++;
-    nrblocks--;
-    block += BLKSIZE;
-  }
-
-  return 0;
+  return blk_read_multiple (next, blknum, 1, block, err);
 }
 
 int
-- 
2.32.0




More information about the Libguestfs mailing list