[Libguestfs] [PATCH] file: Zero support for block devices and NFS 4.2

Nir Soffer nsoffer at redhat.com
Thu Aug 2 19:09:18 UTC 2018


Oops, please ignore this, this was already sent and reviewed here:
https://www.redhat.com/archives/libguestfs/2018-July/msg00084.html

The patch was hiding in my tree and selected by a careless glob :-)

On Thu, Aug 2, 2018 at 10:06 PM Nir Soffer <nirsof at gmail.com> wrote:

> If we may not trim, we tried ZERO_RANGE, but this is not well supported
> yet, for example it is not available on NFS 4.2. ZERO_RANGE and
> PUNCH_HOLE are supported now on block devices, but not on RHRL 7, so we
> fallback to slow manual zeroing there.
>
> Change the logic to support block devices on RHEL 7, and file systems
> that do not support ZERO_RANGE.
>
> The new logic:
> - If we may trim, try PUNCH_HOLE
> - If we can zero range, Try ZERO_RANGE
> - If we can punch hole and fallocate, try fallocate(PUNCH_HOLE) followed
>   by fallocate(0).
> - If underlying file is a block device, try ioctl(BLKZEROOUT)
> - Otherwise fallback to manual zeroing
>
> The handle keeps now the underlying file capabilities, so once we
> discover that an operation is not supported, we never try it again.
>
> Here are examples runs on a server based on Intel(R) Xeon(R) CPU E5-2630
> v4 @ 2.20GHz, using XtremIO storage via 4G FC HBA and 4 paths to
> storage.
>
> $ export SOCK=/tmp/nbd.sock
> $ export
> BLOCK=/dev/e30bfac2-8e13-479d-8cd6-c6da5e306c4e/c9864222-bc52-4359-80d7-76e47d619b15
>
> $ src/nbdkit -f plugins/file/.libs/nbdkit-file-plugin.so file=$BLOCK -U
> $SOCK
>
> $ time qemu-img convert -n -f raw -O raw /var/tmp/fedora-27.img
> nbd:unix:$SOCK
>
> real    0m2.741s
> user    0m0.224s
> sys     0m0.634s
>
> $ time qemu-img convert -n -f raw -O raw -W /var/tmp/fedora-27.img
> nbd:unix:$SOCK
>
> real    0m1.920s
> user    0m0.163s
> sys     0m0.735s
>
> Issues:
> - ioctl(BLKZEROOUT) will fail if offset or count are not aligned to
>   logical sector size. I'm not sure if nbdkit or qemu-img ensure this.
> - Need testing with NFS
> ---
>  plugins/file/file.c | 126 ++++++++++++++++++++++++++++++++++++--------
>  1 file changed, 103 insertions(+), 23 deletions(-)
>
> diff --git a/plugins/file/file.c b/plugins/file/file.c
> index fb20622..bce2ed1 100644
> --- a/plugins/file/file.c
> +++ b/plugins/file/file.c
> @@ -33,6 +33,7 @@
>
>  #include <config.h>
>
> +#include <stdbool.h>
>  #include <stdio.h>
>  #include <stdlib.h>
>  #include <string.h>
> @@ -42,6 +43,8 @@
>  #include <sys/stat.h>
>  #include <errno.h>
>  #include <linux/falloc.h>   /* For FALLOC_FL_* on RHEL, glibc < 2.18 */
> +#include <sys/ioctl.h>
> +#include <linux/fs.h>
>
>  #include <nbdkit-plugin.h>
>
> @@ -116,6 +119,10 @@ file_config_complete (void)
>  /* The per-connection handle. */
>  struct handle {
>    int fd;
> +  bool is_block_device;
> +  bool can_punch_hole;
> +  bool can_zero_range;
> +  bool can_fallocate;
>  };
>
>  /* Create the per-connection handle. */
> @@ -123,6 +130,7 @@ static void *
>  file_open (int readonly)
>  {
>    struct handle *h;
> +  struct stat statbuf;
>    int flags;
>
>    h = malloc (sizeof *h);
> @@ -144,6 +152,23 @@ file_open (int readonly)
>      return NULL;
>    }
>
> +  if (fstat (h->fd, &statbuf) == -1) {
> +    nbdkit_error ("fstat: %s: %m", filename);
> +    free (h);
> +    return NULL;
> +  }
> +
> +  h->is_block_device = S_ISBLK(statbuf.st_mode);
> +
> +  /* These flags will disabled if an operation is not supported. */
> +#ifdef FALLOC_FL_PUNCH_HOLE
> +  h->can_punch_hole = true;
> +#endif
> +#ifdef FALLOC_FL_ZERO_RANGE
> +  h->can_zero_range = true;
> +#endif
> +  h->can_fallocate = true;
> +
>    return h;
>  }
>
> @@ -164,27 +189,29 @@ static int64_t
>  file_get_size (void *handle)
>  {
>    struct handle *h = handle;
> -  struct stat statbuf;
>
> -  if (fstat (h->fd, &statbuf) == -1) {
> -    nbdkit_error ("stat: %m");
> -    return -1;
> -  }
> -
> -  if (S_ISBLK (statbuf.st_mode)) {
> +  if (h->is_block_device) {
> +    /* Block device, so st_size will not be the true size. */
>      off_t size;
>
> -    /* Block device, so st_size will not be the true size. */
>      size = lseek (h->fd, 0, SEEK_END);
>      if (size == -1) {
>        nbdkit_error ("lseek (to find device size): %m");
>        return -1;
>      }
> +
>      return size;
> -  }
> +  } else {
> +    /* Regular file. */
> +    struct stat statbuf;
> +
> +    if (fstat (h->fd, &statbuf) == -1) {
> +      nbdkit_error ("fstat: %m");
> +      return -1;
> +    }
>
> -  /* Else regular file. */
> -  return statbuf.st_size;
> +    return statbuf.st_size;
> +  }
>  }
>
>  static int
> @@ -250,33 +277,86 @@ file_pwrite (void *handle, const void *buf, uint32_t
> count, uint64_t offset)
>  static int
>  file_zero (void *handle, uint32_t count, uint64_t offset, int may_trim)
>  {
> -#if defined(FALLOC_FL_PUNCH_HOLE) || defined(FALLOC_FL_ZERO_RANGE)
>    struct handle *h = handle;
> -#endif
>    int r = -1;
>
>  #ifdef FALLOC_FL_PUNCH_HOLE
> -  if (may_trim) {
> +  /* If we can and may trim, punching hole is our best option. */
> +  if (h->can_punch_hole && may_trim) {
>      r = do_fallocate (h->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
>                        offset, count);
> -    if (r == -1 && errno != EOPNOTSUPP) {
> +    if (r == 0)
> +        return 0;
> +
> +    if (errno != EOPNOTSUPP) {
>        nbdkit_error ("zero: %m");
> +      return r;
>      }
> -    /* PUNCH_HOLE is older; if it is not supported, it is likely that
> -       ZERO_RANGE will not work either, so fall back to write. */
> -    return r;
> +
> +    h->can_punch_hole = false;
>    }
>  #endif
>
>  #ifdef FALLOC_FL_ZERO_RANGE
> -  r = do_fallocate (h->fd, FALLOC_FL_ZERO_RANGE, offset, count);
> -  if (r == -1 && errno != EOPNOTSUPP) {
> -    nbdkit_error ("zero: %m");
> +  /* ZERO_RANGE is not well supported yet, but it the next best option. */
> +  if (h->can_zero_range) {
> +    r = do_fallocate (h->fd, FALLOC_FL_ZERO_RANGE, offset, count);
> +    if (r == 0)
> +      return 0;
> +
> +    if (errno != EOPNOTSUPP) {
> +      nbdkit_error ("zero: %m");
> +      return r;
> +    }
> +
> +    h->can_zero_range = false;
>    }
> -#else
> +#endif
> +
> +#ifdef FALLOC_FL_PUNCH_HOLE
> +  /* If we can punch hole but may not trim, we can combine punching hole
> and
> +     fallocate to zero a range. This is much more efficient than writing
> zeros
> +     manually. */
> +  if (h->can_punch_hole && h->can_fallocate) {
> +    r = do_fallocate (h->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
> +                      offset, count);
> +    if (r == 0) {
> +      r = do_fallocate(h->fd, 0, offset, count);
> +      if (r == 0)
> +        return 0;
> +
> +      if (errno != EOPNOTSUPP) {
> +        nbdkit_error ("zero: %m");
> +        return r;
> +      }
> +
> +      h->can_fallocate = false;
> +    } else {
> +      if (errno != EOPNOTSUPP) {
> +        nbdkit_error ("zero: %m");
> +        return r;
> +      }
> +
> +      h->can_punch_hole = false;
> +    }
> +  }
> +#endif
> +
> +  /* For block devices, we can use BLKZEROOUT.
> +     NOTE: count and offset must be aligned to logical block size. */
> +  if (h->is_block_device) {
> +    uint64_t range[2] = {offset, count};
> +
> +    r = ioctl(h->fd, BLKZEROOUT, &range);
> +    if (r == 0)
> +      return 0;
> +
> +    nbdkit_error("zero: %m");
> +    return r;
> +  }
> +
>    /* Trigger a fall back to writing */
>    errno = EOPNOTSUPP;
> -#endif
>
>    return r;
>  }
> --
> 2.17.1
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://listman.redhat.com/archives/libguestfs/attachments/20180802/52095a36/attachment.htm>


More information about the Libguestfs mailing list