[Libguestfs] [PATCH] nbdcopy: Speed up raw images copy

Nir Soffer nirsof at gmail.com
Thu Feb 18 23:32:55 UTC 2021


When exporting sparse raw image, qemu-nbd reports unallocated area as
zero:

$ qemu-nbd --persistent --socket /tmp/nbd.sock --read-only --shared=10 \
    --format raw empty-6g.raw --cache=none --aio=native

$ nbdinfo --map nbd+unix:///?socket=/tmp/nbd.sock
         0  6442450944    2  zero

When using qcow2 image, it reports unallocated areas as a hole:

$ qemu-nbd --persistent --socket /tmp/nbd.sock --read-only --shared=10 \
    --format qcow2 empty-6g.qcow2 --cache=none --aio=native

$ nbdinfo --map nbd+unix:///?socket=/tmp/nbd.sock
         0  6442450944    3  hole,zero

Since nbdcopy is ignoring the ZERO flag and using only the HOLE flag,
coping raw images is extremely slow:

$ hyperfine -w3 "./nbdcopy nbd+unix:///?socket=/tmp/nbd.sock null:"
Benchmark #1: ./nbdcopy nbd+unix:///?socket=/tmp/nbd.sock null:
  Time (mean ± σ):      1.595 s ±  0.034 s    [User: 2.284 s, System: 3.565 s]
  Range (min … max):    1.522 s …  1.630 s    10 runs

This is 69 times slower than qemu-img:

$ hyperfine -w3 "qemu-img convert -n nbd+unix:///?socket=/tmp/nbd.sock \
    'json:{\"file.driver\":\"null-co\",\"file.size\":\"6g\"}'"
Benchmark #1: qemu-img convert -n nbd+unix:///?socket=/tmp/nbd.sock 'json:{"file.driver":"null-co","file.size":"6g"}'
  Time (mean ± σ):      23.1 ms ±   0.5 ms    [User: 6.3 ms, System: 16.5 ms]
  Range (min … max):    22.6 ms …  25.5 ms    124 runs

Using ZERO instead of HOLE, nbdcopy does not read zero extents from the
server so it can copy this image 165 times faster:

$ hyperfine -w3 "./nbdcopy nbd+unix:///?socket=/tmp/nbd.sock null:"
Benchmark #1: ./nbdcopy nbd+unix:///?socket=/tmp/nbd.sock null:
  Time (mean ± σ):       9.8 ms ±   0.8 ms    [User: 6.7 ms, System: 5.2 ms]
  Range (min … max):     9.2 ms …  15.4 ms    287 runs

Real images show smaller speedup, only 2 times faster:

$ qemu-nbd --persistent --socket /tmp/nbd.sock --read-only --shared=10 \
    --format raw fedora-32.raw --cache=none --aio=native

Before:

$ hyperfine -w3 "./nbdcopy nbd+unix:///?socket=/tmp/nbd.sock null:"
Benchmark #1: ./nbdcopy nbd+unix:///?socket=/tmp/nbd.sock null:
  Time (mean ± σ):      1.613 s ±  0.181 s    [User: 1.843 s, System: 2.820 s]
  Range (min … max):    1.407 s …  1.829 s    10 runs

After:

$ hyperfine -w3 "./nbdcopy nbd+unix:///?socket=/tmp/nbd.sock null:"
Benchmark #1: ./nbdcopy nbd+unix:///?socket=/tmp/nbd.sock null:
  Time (mean ± σ):     795.5 ms ±  78.7 ms    [User: 198.3 ms, System: 743.1 ms]
  Range (min … max):   743.3 ms … 1012.0 ms    10 runs

For reference, copying same image with qemu-img:

$ hyperfine -w3 "qemu-img convert -n nbd+unix:///?socket=/tmp/nbd.sock \
    'json:{\"file.driver\":\"null-co\",\"file.size\":\"6g\"}'"
Benchmark #1: qemu-img convert -n nbd+unix:///?socket=/tmp/nbd.sock 'json:{"file.driver":"null-co","file.size":"6g"}'
  Time (mean ± σ):      1.046 s ±  0.028 s    [User: 122.3 ms, System: 354.5 ms]
  Range (min … max):    1.026 s …  1.121 s    10 runs

This issue does not exist when copying from file, since in this case we
detect unallocated areas as holes.

Signed-off-by: Nir Soffer <nsoffer at redhat.com>
---
 copy/file-ops.c             |  4 ++--
 copy/main.c                 |  2 +-
 copy/multi-thread-copying.c |  4 ++--
 copy/nbd-ops.c              | 10 ++++++++--
 copy/nbdcopy.h              |  2 +-
 copy/synch-copying.c        |  2 +-
 6 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/copy/file-ops.c b/copy/file-ops.c
index f61b67e..2a239d0 100644
--- a/copy/file-ops.c
+++ b/copy/file-ops.c
@@ -250,7 +250,7 @@ file_get_extents (struct rw *rw, uintptr_t index,
       if (pos > offset) {
         e.offset = offset;
         e.length = pos - offset;
-        e.hole = true;
+        e.zero = true;
         if (extent_list_append (ret, e) == -1) {
           perror ("realloc");
           exit (EXIT_FAILURE);
@@ -271,7 +271,7 @@ file_get_extents (struct rw *rw, uintptr_t index,
       if (pos > offset) {
         e.offset = offset;
         e.length = pos - offset;
-        e.hole = false;
+        e.zero = false;
         if (extent_list_append (ret, e) == -1) {
           perror ("realloc");
           exit (EXIT_FAILURE);
diff --git a/copy/main.c b/copy/main.c
index cfecb32..68a6030 100644
--- a/copy/main.c
+++ b/copy/main.c
@@ -667,7 +667,7 @@ default_get_extents (struct rw *rw, uintptr_t index,
 
   e.offset = offset;
   e.length = count;
-  e.hole = false;
+  e.zero = false;
   if (extent_list_append (ret, e) == -1) {
     perror ("realloc");
     exit (EXIT_FAILURE);
diff --git a/copy/multi-thread-copying.c b/copy/multi-thread-copying.c
index 4576119..98b4056 100644
--- a/copy/multi-thread-copying.c
+++ b/copy/multi-thread-copying.c
@@ -157,8 +157,8 @@ worker_thread (void *indexp)
       char *data;
       size_t len;
 
-      if (exts.ptr[i].hole) {
-        /* The source is a hole so we can proceed directly to
+      if (exts.ptr[i].zero) {
+        /* The source is zero so we can proceed directly to
          * skipping, trimming or writing zeroes at the destination.
          */
         command = calloc (1, sizeof *command);
diff --git a/copy/nbd-ops.c b/copy/nbd-ops.c
index f7dc37c..0bcf29b 100644
--- a/copy/nbd-ops.c
+++ b/copy/nbd-ops.c
@@ -190,8 +190,14 @@ add_extent (void *vp, const char *metacontext,
 
     e.offset = offset;
     e.length = entries[i];
-    /* Note we deliberately don't care about the ZERO flag. */
-    e.hole = (entries[i+1] & LIBNBD_STATE_HOLE) != 0;
+
+    /*
+     * Note we deliberately don't care about the HOLE flag. There is no need to
+     * read extent that reads as zeroes. We will convert to it to a hole or
+     * allocated extents based on the command line arguments.
+     */
+    e.zero = (entries[i+1] & LIBNBD_STATE_ZERO) != 0;
+
     if (extent_list_append (ret, e) == -1) {
       perror ("realloc");
       exit (EXIT_FAILURE);
diff --git a/copy/nbdcopy.h b/copy/nbdcopy.h
index f586fc5..69fac2a 100644
--- a/copy/nbdcopy.h
+++ b/copy/nbdcopy.h
@@ -100,7 +100,7 @@ struct command {
 struct extent {
   uint64_t offset;
   uint64_t length;
-  bool hole;
+  bool zero;
 };
 DEFINE_VECTOR_TYPE(extent_list, struct extent);
 
diff --git a/copy/synch-copying.c b/copy/synch-copying.c
index 043893f..2712c10 100644
--- a/copy/synch-copying.c
+++ b/copy/synch-copying.c
@@ -68,7 +68,7 @@ synch_copying (void)
       for (i = 0; i < exts.size; ++i) {
         assert (exts.ptr[i].length <= count);
 
-        if (exts.ptr[i].hole) {
+        if (exts.ptr[i].zero) {
           if (!dst.ops->synch_trim (&dst, offset, exts.ptr[i].length) &&
               !dst.ops->synch_zero (&dst, offset, exts.ptr[i].length)) {
             /* If neither trimming nor efficient zeroing are possible,
-- 
2.26.2




More information about the Libguestfs mailing list