[Libguestfs] [PATCH libnbd v2 3/3] copy/file-ops.c: Port zero strategy from nbdkit

Nir Soffer nirsof at gmail.com
Mon Feb 22 19:34:23 UTC 2021


Port the zero strategy from nbdkit file plugin, improving the
reliability and compatibility with block devices on modern kernels.

Local rw have now capability flags: can_punch_hole, can_zero_range,
can_fallocate, and can_zeroout. The flags are initialized based on the
type of the file descriptor and compile time checks:

- For regular file, we enable can_punch_hole, can_zero_range, and
  can_fallocate.
- For block device, we enable can_punch_hole, can_zero_range, an
  can_zeroout.
- For pipes and sockets we don't enable anything.

When calling zero() in the first time, we try the following methods,
returning on the first success:

- If don't need to allocate, try to punch a hole.
- Try to zero the range
- Try to combine punching a hole and fallocate
- Try BLKZEROOUT ioctl

If a method is not supported, we disable the capability flag, so the
next call can try only what works.

The fallocate and ioctl wrappers return false when the call is not
supported by the underlying storage so we can disable the capability.
Previously the process would exit with an error.

Signed-off-by: Nir Soffer <nsoffer at redhat.com>
---
 copy/file-ops.c | 103 ++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 95 insertions(+), 8 deletions(-)

diff --git a/copy/file-ops.c b/copy/file-ops.c
index d22273a..970e81e 100644
--- a/copy/file-ops.c
+++ b/copy/file-ops.c
@@ -44,6 +44,12 @@ struct rw_file {
   bool is_block;
   bool seek_hole_supported;
   int sector_size;
+
+  /* We try to use the most eficient zeroing first. If an efficent zero
+   * method is not available, we disable the flag so next time we use
+   * the working method.
+   */
+  bool can_punch_hole, can_zero_range, can_fallocate, can_zeroout;
 };
 
 static bool
@@ -84,12 +90,30 @@ file_create (const char *name, int fd, off_t st_size, bool is_block)
 #ifdef BLKSSZGET
     if (ioctl (fd, BLKSSZGET, &rwf->sector_size))
       fprintf (stderr, "warning: cannot get sector size: %s: %m", name);
+#endif
+    /* Possible efficient zero methods for block device. */
+#ifdef FALLOC_FL_PUNCH_HOLE
+    rwf->can_punch_hole = true;
+#endif
+#ifdef FALLOC_FL_ZERO_RANGE
+    rwf->can_zero_range = true;
+#endif
+#ifdef BLKZEROOUT
+    rwf->can_zeroout = true;
 #endif
   }
   else {
     /* Regular file. */
     rwf->rw.size = st_size;
     rwf->seek_hole_supported = seek_hole_supported (fd);
+    /* Possible efficient zero methods for regular file. */
+#ifdef FALLOC_FL_PUNCH_HOLE
+    rwf->can_punch_hole = true;
+#endif
+#ifdef FALLOC_FL_ZERO_RANGE
+    rwf->can_zero_range = true;
+#endif
+    rwf->can_fallocate = true;
   }
 
   return &rwf->rw;
@@ -220,6 +244,12 @@ file_synch_write (struct rw *rw,
   }
 }
 
+static inline bool
+is_not_supported (int err)
+{
+  return err == ENOTSUP || err == EOPNOTSUPP;
+}
+
 static bool
 file_punch_hole(int fd, uint64_t offset, uint64_t count)
 {
@@ -229,6 +259,9 @@ file_punch_hole(int fd, uint64_t offset, uint64_t count)
   r = fallocate (fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
                  offset, count);
   if (r == -1) {
+    if (is_not_supported (errno))
+      return false;
+
     perror ("fallocate: FALLOC_FL_PUNCH_HOLE");
     exit (EXIT_FAILURE);
   }
@@ -245,6 +278,9 @@ file_zero_range(int fd, uint64_t offset, uint64_t count)
 
   r = fallocate (fd, FALLOC_FL_ZERO_RANGE, offset, count);
   if (r == -1) {
+    if (is_not_supported (errno))
+      return false;
+
     perror ("fallocate: FALLOC_FL_ZERO_RANGE");
     exit (EXIT_FAILURE);
   }
@@ -262,6 +298,9 @@ file_zeroout(int fd, uint64_t offset, uint64_t count)
 
   r = ioctl (fd, BLKZEROOUT, &range);
   if (r == -1) {
+    if (errno == ENOTTY)
+      return false;
+
     perror ("ioctl: BLKZEROOUT");
     exit (EXIT_FAILURE);
   }
@@ -275,7 +314,14 @@ file_synch_trim (struct rw *rw, uint64_t offset, uint64_t count)
 {
   struct rw_file *rwf = (struct rw_file *)rw;
 
-  return file_punch_hole (rwf->fd, offset, count);
+  if (rwf->can_punch_hole) {
+    if (file_punch_hole (rwf->fd, offset, count))
+      return true;
+
+    rwf->can_punch_hole = false;
+  }
+
+  return false;
 }
 
 static bool
@@ -283,16 +329,57 @@ file_synch_zero (struct rw *rw, uint64_t offset, uint64_t count, bool allocate)
 {
   struct rw_file *rwf = (struct rw_file *)rw;
 
-  if (!rwf->is_block) {
-    if (allocate) {
-        return file_zero_range (rwf->fd, offset, count);
+  /* The first call will try several options, discovering the
+   * capabilities of the underlying storage, and disabling non working
+   * options. The next calls will try only what works.
+   *
+   * If we don't need to allocate try to punch a hole. This works for
+   * both files and block devices with modern kernels.
+   */
+
+  if (!allocate && rwf->can_punch_hole) {
+    if (file_punch_hole (rwf->fd, offset, count))
+      return true;
+
+    rwf->can_punch_hole = false;
+  }
+
+  /* Try to zero the range. This works for both files and block devices
+   * with modern kernels.
+   */
+
+  if (rwf->can_zero_range) {
+    if (file_zero_range (rwf->fd, offset, count))
+      return true;
+
+    rwf->can_zero_range = false;
+  }
+
+  /* If we can punch a hole and fallocate, we can combine both
+   * operations. This is expected to be more efficient than actually
+   * writing zeroes. This works only for files.
+   */
+
+  if (rwf->can_punch_hole && rwf->can_fallocate) {
+    if (file_punch_hole (rwf->fd, offset, count)) {
+      if (fallocate (rwf->fd, 0, offset, count))
+          return true;
+
+      rwf->can_fallocate = false;
     } else {
-        return file_punch_hole (rwf->fd, offset, count);
+      rwf->can_punch_hole = false;
     }
   }
-  else if (IS_ALIGNED (offset | count, rwf->sector_size)) {
-    /* Always allocate, discard and gurantee zeroing. */
-    return file_zeroout (rwf->fd, offset, count);
+
+  /* Finally try BLKZEROOUT. This works only for block device if offset
+   * and count are aligned to device sector size.
+   */
+  else if (rwf->can_zeroout &&
+           IS_ALIGNED (offset | count, rwf->sector_size)) {
+    if (file_zeroout (rwf->fd, offset, count))
+      return true;
+
+    rwf->can_zeroout = false;
   }
 
   return false;
-- 
2.26.2




More information about the Libguestfs mailing list