[Libguestfs] [PATCH libnbd v3 1/2] copy: Do not use trim for zeroing

Nir Soffer nirsof at gmail.com
Mon Feb 22 20:58:51 UTC 2021


Current code uses NBD_CMD_TRIM for writing zeroes without allocating
space (--allocate not specified). This incorrect and can lead to
corrupting the target image.

NBD manual warns about NBD_CMD_TRIM:

    After issuing this command, a client MUST NOT make any assumptions
    about the contents of the export affected by this command, until
    overwriting it again with NBD_CMD_WRITE or NBD_CMD_WRITE_ZEROES

Add allocate flag to the zero operation, to allow zeroing by punching
hole (default) or zeroing by allocating space.

nbd-ops use NBD_CMD_WRITE_ZEROES with or without NBD_FLAG_NO_HOLE
flag. file-ops use fallocate with PUNCH_HOLE or ZERO_RANGE flag.

The copy-sparse tests was modified to require zero operation with
may_trim flag.

Since trimming is not used now, remove synch_trim, asynch_trim, can_trim
and update the comments.

Signed-off-by: Nir Soffer <nsoffer at redhat.com>
---
 copy/copy-sparse.sh         | 66 +++++++++++++--------------
 copy/file-ops.c             | 90 ++++++++++++++++++-------------------
 copy/multi-thread-copying.c | 24 ++++------
 copy/nbd-ops.c              | 51 +++------------------
 copy/nbdcopy.h              | 20 +++------
 copy/null-ops.c             | 28 ++----------
 copy/pipe-ops.c             | 12 +++--
 copy/synch-copying.c        |  8 ++--
 8 files changed, 111 insertions(+), 188 deletions(-)

diff --git a/copy/copy-sparse.sh b/copy/copy-sparse.sh
index 846767b..c43b41a 100755
--- a/copy/copy-sparse.sh
+++ b/copy/copy-sparse.sh
@@ -33,7 +33,7 @@ cleanup_fn rm -f $out
 
 # Copy from a sparse data disk to an nbdkit-eval-plugin instance which
 # is logging everything.  This allows us to see exactly what nbdcopy
-# is writing, to ensure it is writing and trimming the target as
+# is writing, to ensure it is writing and zeroing the target as
 # expected.
 $VG nbdcopy -S 0 -- \
     [ nbdkit --exit-with-parent data data='
@@ -60,38 +60,38 @@ if [ "$(cat $out)" != "pwrite 1 4294967296
 pwrite 32768 0
 pwrite 32768 1073709056
 pwrite 32768 4294934528
-trim 134184960 32768
-trim 134184960 4160749568
-trim 134184960 939524096
-trim 134217728 1073741824
-trim 134217728 1207959552
-trim 134217728 134217728
-trim 134217728 1342177280
-trim 134217728 1476395008
-trim 134217728 1610612736
-trim 134217728 1744830464
-trim 134217728 1879048192
-trim 134217728 2013265920
-trim 134217728 2147483648
-trim 134217728 2281701376
-trim 134217728 2415919104
-trim 134217728 2550136832
-trim 134217728 268435456
-trim 134217728 2684354560
-trim 134217728 2818572288
-trim 134217728 2952790016
-trim 134217728 3087007744
-trim 134217728 3221225472
-trim 134217728 3355443200
-trim 134217728 3489660928
-trim 134217728 3623878656
-trim 134217728 3758096384
-trim 134217728 3892314112
-trim 134217728 402653184
-trim 134217728 4026531840
-trim 134217728 536870912
-trim 134217728 671088640
-trim 134217728 805306368" ]; then
+zero 134184960 32768 may_trim
+zero 134184960 4160749568 may_trim
+zero 134184960 939524096 may_trim
+zero 134217728 1073741824 may_trim
+zero 134217728 1207959552 may_trim
+zero 134217728 1342177280 may_trim
+zero 134217728 134217728 may_trim
+zero 134217728 1476395008 may_trim
+zero 134217728 1610612736 may_trim
+zero 134217728 1744830464 may_trim
+zero 134217728 1879048192 may_trim
+zero 134217728 2013265920 may_trim
+zero 134217728 2147483648 may_trim
+zero 134217728 2281701376 may_trim
+zero 134217728 2415919104 may_trim
+zero 134217728 2550136832 may_trim
+zero 134217728 2684354560 may_trim
+zero 134217728 268435456 may_trim
+zero 134217728 2818572288 may_trim
+zero 134217728 2952790016 may_trim
+zero 134217728 3087007744 may_trim
+zero 134217728 3221225472 may_trim
+zero 134217728 3355443200 may_trim
+zero 134217728 3489660928 may_trim
+zero 134217728 3623878656 may_trim
+zero 134217728 3758096384 may_trim
+zero 134217728 3892314112 may_trim
+zero 134217728 4026531840 may_trim
+zero 134217728 402653184 may_trim
+zero 134217728 536870912 may_trim
+zero 134217728 671088640 may_trim
+zero 134217728 805306368 may_trim" ]; then
     echo "$0: output does not match expected"
     exit 1
 fi
diff --git a/copy/file-ops.c b/copy/file-ops.c
index 2860764..1ae1cfc 100644
--- a/copy/file-ops.c
+++ b/copy/file-ops.c
@@ -221,11 +221,9 @@ file_synch_write (struct rw *rw,
 }
 
 static bool
-file_synch_trim (struct rw *rw, uint64_t offset, uint64_t count)
+file_punch_hole(int fd, uint64_t offset, uint64_t count)
 {
 #ifdef FALLOC_FL_PUNCH_HOLE
-  struct rw_file *rwf = (struct rw_file *)rw;
-  int fd = rwf->fd;
   int r;
 
   r = fallocate (fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
@@ -235,42 +233,58 @@ file_synch_trim (struct rw *rw, uint64_t offset, uint64_t count)
     exit (EXIT_FAILURE);
   }
   return true;
-#else /* !FALLOC_FL_PUNCH_HOLE */
-  return false;
 #endif
+  return false;
 }
 
 static bool
-file_synch_zero (struct rw *rw, uint64_t offset, uint64_t count)
+file_zero_range(int fd, uint64_t offset, uint64_t count)
 {
-  struct rw_file *rwf = (struct rw_file *)rw;
-
-  if (! rwf->is_block) {
 #ifdef FALLOC_FL_ZERO_RANGE
-    int fd = rwf->fd;
-    int r;
+  int r;
 
-    r = fallocate (fd, FALLOC_FL_ZERO_RANGE, offset, count);
-    if (r == -1) {
-      perror ("fallocate: FALLOC_FL_ZERO_RANGE");
-      exit (EXIT_FAILURE);
-    }
-    return true;
-#endif
+  r = fallocate (fd, FALLOC_FL_ZERO_RANGE, offset, count);
+  if (r == -1) {
+    perror ("fallocate: FALLOC_FL_ZERO_RANGE");
+    exit (EXIT_FAILURE);
   }
-  else if (rwf->is_block && IS_ALIGNED (offset | count, rwf->sector_size)) {
+  return true;
+#endif
+  return false;
+}
+
+static bool
+file_zeroout(int fd, uint64_t offset, uint64_t count)
+{
 #ifdef BLKZEROOUT
-    int fd = rwf->fd;
-    int r;
-    uint64_t range[2] = {offset, count};
+  int r;
+  uint64_t range[2] = {offset, count};
 
-    r = ioctl (fd, BLKZEROOUT, &range);
-    if (r == -1) {
-      perror ("ioctl: BLKZEROOUT");
-      exit (EXIT_FAILURE);
-    }
-    return true;
+  r = ioctl (fd, BLKZEROOUT, &range);
+  if (r == -1) {
+    perror ("ioctl: BLKZEROOUT");
+    exit (EXIT_FAILURE);
+  }
+  return true;
 #endif
+    return false;
+}
+
+static bool
+file_synch_zero (struct rw *rw, uint64_t offset, uint64_t count, bool allocate)
+{
+  struct rw_file *rwf = (struct rw_file *)rw;
+
+  if (!rwf->is_block) {
+    if (allocate) {
+        return file_zero_range (rwf->fd, offset, count);
+    } else {
+        return file_punch_hole (rwf->fd, offset, count);
+    }
+  }
+  else if (IS_ALIGNED (offset | count, rwf->sector_size)) {
+    /* Always allocate, discard and gurantee zeroing. */
+    return file_zeroout (rwf->fd, offset, count);
   }
 
   return false;
@@ -304,25 +318,11 @@ file_asynch_write (struct rw *rw,
   }
 }
 
-static bool
-file_asynch_trim (struct rw *rw, struct command *command,
-                  nbd_completion_callback cb)
-{
-  if (!file_synch_trim (rw, command->offset, command->slice.len))
-    return false;
-  errno = 0;
-  if (cb.callback (cb.user_data, &errno) == -1) {
-    perror (rw->name);
-    exit (EXIT_FAILURE);
-  }
-  return true;
-}
-
 static bool
 file_asynch_zero (struct rw *rw, struct command *command,
-                  nbd_completion_callback cb)
+                  nbd_completion_callback cb, bool allocate)
 {
-  if (!file_synch_zero (rw, command->offset, command->slice.len))
+  if (!file_synch_zero (rw, command->offset, command->slice.len, allocate))
     return false;
   errno = 0;
   if (cb.callback (cb.user_data, &errno) == -1) {
@@ -437,11 +437,9 @@ static struct rw_ops file_ops = {
   .flush = file_flush,
   .synch_read = file_synch_read,
   .synch_write = file_synch_write,
-  .synch_trim = file_synch_trim,
   .synch_zero = file_synch_zero,
   .asynch_read = file_asynch_read,
   .asynch_write = file_asynch_write,
-  .asynch_trim = file_asynch_trim,
   .asynch_zero = file_asynch_zero,
   .in_flight = file_in_flight,
   .get_polling_fd = get_polling_fd_not_supported,
diff --git a/copy/multi-thread-copying.c b/copy/multi-thread-copying.c
index 4f57054..c36a165 100644
--- a/copy/multi-thread-copying.c
+++ b/copy/multi-thread-copying.c
@@ -160,8 +160,8 @@ worker_thread (void *indexp)
       size_t len;
 
       if (exts.ptr[i].zero) {
-        /* The source is zero so we can proceed directly to
-         * skipping, trimming or writing zeroes at the destination.
+        /* The source is zero so we can proceed directly to skipping,
+         * fast zeroing, or writing zeroes at the destination.
          */
         command = calloc (1, sizeof *command);
         if (command == NULL) {
@@ -487,11 +487,12 @@ finished_read (void *vp, int *error)
  * --destination-is-zero:
  *                 do nothing
  *
- * --allocated:    we must write zeroes either using an efficient
+ * --allocated:    write zeroes allocating space using an efficient
  *                 zeroing command or writing a command of zeroes
  *
- * (neither flag)  try trimming if supported, else write zeroes
- *                 as above
+ * (neither flag)  write zeroes punching a hole using an efficient
+ *                 zeroing command or fallback to writing a command
+ *                 of zeroes.
  *
  * This takes over ownership of the command and frees it eventually.
  */
@@ -503,22 +504,13 @@ fill_dst_range_with_zeroes (struct command *command)
   if (destination_is_zero)
     goto free_and_return;
 
-  if (!allocated) {
-    /* Try trimming. */
-    if (dst->ops->asynch_trim (dst, command,
-                               (nbd_completion_callback) {
-                                 .callback = free_command,
-                                 .user_data = command,
-                               }))
-      return;
-  }
-
   /* Try efficient zeroing. */
   if (dst->ops->asynch_zero (dst, command,
                              (nbd_completion_callback) {
                                .callback = free_command,
                                .user_data = command,
-                             }))
+                             },
+                             allocated))
     return;
 
   /* Fall back to loop writing zeroes.  This is going to be slow
diff --git a/copy/nbd-ops.c b/copy/nbd-ops.c
index 0f52587..05b7b21 100644
--- a/copy/nbd-ops.c
+++ b/copy/nbd-ops.c
@@ -45,7 +45,7 @@ struct rw_nbd {
   direction d;
 
   handles handles;              /* One handle per connection. */
-  bool can_trim, can_zero;      /* Cached nbd_can_trim, nbd_can_zero. */
+  bool can_zero;                /* Cached nbd_can_zero. */
 };
 
 static void
@@ -91,7 +91,6 @@ open_one_nbd_handle (struct rw_nbd *rwn)
    * the same way.
    */
   if (rwn->handles.size == 0) {
-    rwn->can_trim = nbd_can_trim (nbd) > 0;
     rwn->can_zero = nbd_can_zero (nbd) > 0;
     rwn->rw.size = nbd_get_size (nbd);
     if (rwn->rw.size == -1) {
@@ -261,30 +260,16 @@ nbd_ops_synch_write (struct rw *rw,
 }
 
 static bool
-nbd_ops_synch_trim (struct rw *rw, uint64_t offset, uint64_t count)
-{
-  struct rw_nbd *rwn = (struct rw_nbd *) rw;
-
-  if (!rwn->can_trim)
-    return false;
-
-  if (nbd_trim (rwn->handles.ptr[0], count, offset, 0) == -1) {
-    fprintf (stderr, "%s: %s\n", rw->name, nbd_get_error ());
-    exit (EXIT_FAILURE);
-  }
-  return true;
-}
-
-static bool
-nbd_ops_synch_zero (struct rw *rw, uint64_t offset, uint64_t count)
+nbd_ops_synch_zero (struct rw *rw, uint64_t offset, uint64_t count,
+                    bool allocate)
 {
   struct rw_nbd *rwn = (struct rw_nbd *) rw;
 
   if (!rwn->can_zero)
     return false;
 
-  if (nbd_zero (rwn->handles.ptr[0],
-                count, offset, LIBNBD_CMD_FLAG_NO_HOLE) == -1) {
+  if (nbd_zero (rwn->handles.ptr[0], count, offset,
+                allocate ? LIBNBD_CMD_FLAG_NO_HOLE : 0) == -1) {
     fprintf (stderr, "%s: %s\n", rw->name, nbd_get_error ());
     exit (EXIT_FAILURE);
   }
@@ -323,29 +308,9 @@ nbd_ops_asynch_write (struct rw *rw,
   }
 }
 
-static bool
-nbd_ops_asynch_trim (struct rw *rw, struct command *command,
-                     nbd_completion_callback cb)
-{
-  struct rw_nbd *rwn = (struct rw_nbd *) rw;
-
-  if (!rwn->can_trim)
-    return false;
-
-  assert (command->slice.len <= UINT32_MAX);
-
-  if (nbd_aio_trim (rwn->handles.ptr[command->index],
-                    command->slice.len, command->offset,
-                    cb, 0) == -1) {
-    fprintf (stderr, "%s: %s\n", rw->name, nbd_get_error ());
-    exit (EXIT_FAILURE);
-  }
-  return true;
-}
-
 static bool
 nbd_ops_asynch_zero (struct rw *rw, struct command *command,
-                     nbd_completion_callback cb)
+                     nbd_completion_callback cb, bool allocate)
 {
   struct rw_nbd *rwn = (struct rw_nbd *) rw;
 
@@ -356,7 +321,7 @@ nbd_ops_asynch_zero (struct rw *rw, struct command *command,
 
   if (nbd_aio_zero (rwn->handles.ptr[command->index],
                     command->slice.len, command->offset,
-                    cb, LIBNBD_CMD_FLAG_NO_HOLE) == -1) {
+                    cb, allocate ? LIBNBD_CMD_FLAG_NO_HOLE : 0) == -1) {
     fprintf (stderr, "%s: %s\n", rw->name, nbd_get_error ());
     exit (EXIT_FAILURE);
   }
@@ -536,11 +501,9 @@ static struct rw_ops nbd_ops = {
   .flush = nbd_ops_flush,
   .synch_read = nbd_ops_synch_read,
   .synch_write = nbd_ops_synch_write,
-  .synch_trim = nbd_ops_synch_trim,
   .synch_zero = nbd_ops_synch_zero,
   .asynch_read = nbd_ops_asynch_read,
   .asynch_write = nbd_ops_asynch_write,
-  .asynch_trim = nbd_ops_asynch_trim,
   .asynch_zero = nbd_ops_asynch_zero,
   .in_flight = nbd_ops_in_flight,
   .get_polling_fd = nbd_ops_get_polling_fd,
diff --git a/copy/nbdcopy.h b/copy/nbdcopy.h
index 2db0383..4496722 100644
--- a/copy/nbdcopy.h
+++ b/copy/nbdcopy.h
@@ -79,11 +79,11 @@ struct slice {
 
 /* Commands for asynchronous operations in flight.
  *
- * We don't store the command type (read/write/trim/etc) because it is
+ * We don't store the command type (read/write/zero/etc) because it is
  * implicit in the function being called and because commands
- * naturally change from read -> write/trim/etc as they progress.
+ * naturally change from read -> write/zero/etc as they progress.
  *
- * slice.buffer may be NULL for commands (like trim) that have no
+ * slice.buffer may be NULL for commands (like zero) that have no
  * associated data.
  *
  * A separate set of commands, slices and buffers is maintained per
@@ -151,11 +151,9 @@ struct rw_ops {
   void (*synch_write) (struct rw *rw,
                        const void *data, size_t len, uint64_t offset);
 
-  /* Synchronously trim.  If not possible, returns false. */
-  bool (*synch_trim) (struct rw *rw, uint64_t offset, uint64_t count);
-
   /* Synchronously zero.  If not possible, returns false. */
-  bool (*synch_zero) (struct rw *rw, uint64_t offset, uint64_t count);
+  bool (*synch_zero) (struct rw *rw, uint64_t offset, uint64_t count,
+                      bool allocate);
 
   /* Asynchronous I/O operations.  These start the operation and call
    * 'cb' on completion.
@@ -173,17 +171,11 @@ struct rw_ops {
                         struct command *command,
                         nbd_completion_callback cb);
 
-  /* Asynchronously trim.  command->slice.buffer is not used.  If not
-   * possible, returns false.
-   */
-  bool (*asynch_trim) (struct rw *rw, struct command *command,
-                       nbd_completion_callback cb);
-
   /* Asynchronously zero.  command->slice.buffer is not used.  If not possible,
    * returns false.
    */
   bool (*asynch_zero) (struct rw *rw, struct command *command,
-                       nbd_completion_callback cb);
+                       nbd_completion_callback cb, bool allocate);
 
   /* Number of asynchronous commands in flight for a particular thread. */
   unsigned (*in_flight) (struct rw *rw, uintptr_t index);
diff --git a/copy/null-ops.c b/copy/null-ops.c
index b75ca1f..a38666d 100644
--- a/copy/null-ops.c
+++ b/copy/null-ops.c
@@ -26,8 +26,8 @@
 #include "nbdcopy.h"
 
 /* This sinks writes and aborts on any read-like operations.  It
- * should be faster than using /dev/null because it "supports" trim
- * and fast zeroing.
+ * should be faster than using /dev/null because it "supports" fast
+ * zeroing.
  */
 
 static struct rw_ops null_ops;
@@ -99,13 +99,7 @@ null_synch_write (struct rw *rw,
 }
 
 static bool
-null_synch_trim (struct rw *rw, uint64_t offset, uint64_t count)
-{
-  return true;
-}
-
-static bool
-null_synch_zero (struct rw *rw, uint64_t offset, uint64_t count)
+null_synch_zero (struct rw *rw, uint64_t offset, uint64_t count, bool allocate)
 {
   return true;
 }
@@ -130,21 +124,9 @@ null_asynch_write (struct rw *rw,
   }
 }
 
-static bool
-null_asynch_trim (struct rw *rw, struct command *command,
-                  nbd_completion_callback cb)
-{
-  errno = 0;
-  if (cb.callback (cb.user_data, &errno) == -1) {
-    perror (rw->name);
-    exit (EXIT_FAILURE);
-  }
-  return true;
-}
-
 static bool
 null_asynch_zero (struct rw *rw, struct command *command,
-                  nbd_completion_callback cb)
+                  nbd_completion_callback cb, bool allocate)
 {
   errno = 0;
   if (cb.callback (cb.user_data, &errno) == -1) {
@@ -178,11 +160,9 @@ static struct rw_ops null_ops = {
   .flush = null_flush,
   .synch_read = null_synch_read,
   .synch_write = null_synch_write,
-  .synch_trim = null_synch_trim,
   .synch_zero = null_synch_zero,
   .asynch_read = null_asynch_read,
   .asynch_write = null_asynch_write,
-  .asynch_trim = null_asynch_trim,
   .asynch_zero = null_asynch_zero,
   .in_flight = null_in_flight,
   .get_polling_fd = get_polling_fd_not_supported,
diff --git a/copy/pipe-ops.c b/copy/pipe-ops.c
index 08f7211..f9b8599 100644
--- a/copy/pipe-ops.c
+++ b/copy/pipe-ops.c
@@ -125,7 +125,7 @@ pipe_synch_write (struct rw *rw,
 }
 
 static bool
-pipe_synch_trim_zero (struct rw *rw, uint64_t offset, uint64_t count)
+pipe_synch_zero (struct rw *rw, uint64_t offset, uint64_t count, bool allocate)
 {
   return false; /* not supported by pipes */
 }
@@ -147,8 +147,8 @@ pipe_asynch_write (struct rw *rw,
 }
 
 static bool
-pipe_asynch_trim_zero (struct rw *rw, struct command *command,
-                       nbd_completion_callback cb)
+pipe_asynch_zero (struct rw *rw, struct command *command,
+                       nbd_completion_callback cb, bool allocate)
 {
   return false; /* not supported by pipes */
 }
@@ -173,8 +173,7 @@ static struct rw_ops pipe_ops = {
 
   .synch_read = pipe_synch_read,
   .synch_write = pipe_synch_write,
-  .synch_trim = pipe_synch_trim_zero,
-  .synch_zero = pipe_synch_trim_zero,
+  .synch_zero = pipe_synch_zero,
 
   /* Asynch pipe read/write operations are not defined.  These should
    * never be called because pipes/streams/sockets force synchronous
@@ -185,8 +184,7 @@ static struct rw_ops pipe_ops = {
   .asynch_read = pipe_asynch_read,
   .asynch_write = pipe_asynch_write,
 
-  .asynch_trim = pipe_asynch_trim_zero,
-  .asynch_zero = pipe_asynch_trim_zero,
+  .asynch_zero = pipe_asynch_zero,
   .in_flight = pipe_in_flight,
 
   .get_polling_fd = get_polling_fd_not_supported,
diff --git a/copy/synch-copying.c b/copy/synch-copying.c
index 985f005..17bda16 100644
--- a/copy/synch-copying.c
+++ b/copy/synch-copying.c
@@ -69,10 +69,10 @@ synch_copying (void)
         assert (exts.ptr[i].length <= count);
 
         if (exts.ptr[i].zero) {
-          if (!dst->ops->synch_trim (dst, offset, exts.ptr[i].length) &&
-              !dst->ops->synch_zero (dst, offset, exts.ptr[i].length)) {
-            /* If neither trimming nor efficient zeroing are possible,
-             * write zeroes the hard way.
+          if (!dst->ops->synch_zero (dst, offset, exts.ptr[i].length, false) &&
+              !dst->ops->synch_zero (dst, offset, exts.ptr[i].length, true)) {
+            /* If efficient zeroing (punching a hole or allocating
+             * space) are possible, write zeroes the hard way.
              */
             memset (buf, 0, exts.ptr[i].length);
             dst->ops->synch_write (dst, buf, exts.ptr[i].length, offset);
-- 
2.26.2




More information about the Libguestfs mailing list