[Libguestfs] [PATCH] file: Zero support for block devices and NFS 4.2
Nir Soffer
nsoffer at redhat.com
Thu Aug 2 19:09:18 UTC 2018
Oops, please ignore this, this was already sent and reviewed here:
https://www.redhat.com/archives/libguestfs/2018-July/msg00084.html
The patch was hiding in my tree and selected by a careless glob :-)
On Thu, Aug 2, 2018 at 10:06 PM Nir Soffer <nirsof at gmail.com> wrote:
> If we may not trim, we tried ZERO_RANGE, but this is not well supported
> yet, for example it is not available on NFS 4.2. ZERO_RANGE and
> PUNCH_HOLE are supported now on block devices, but not on RHRL 7, so we
> fallback to slow manual zeroing there.
>
> Change the logic to support block devices on RHEL 7, and file systems
> that do not support ZERO_RANGE.
>
> The new logic:
> - If we may trim, try PUNCH_HOLE
> - If we can zero range, Try ZERO_RANGE
> - If we can punch hole and fallocate, try fallocate(PUNCH_HOLE) followed
> by fallocate(0).
> - If underlying file is a block device, try ioctl(BLKZEROOUT)
> - Otherwise fallback to manual zeroing
>
> The handle keeps now the underlying file capabilities, so once we
> discover that an operation is not supported, we never try it again.
>
> Here are examples runs on a server based on Intel(R) Xeon(R) CPU E5-2630
> v4 @ 2.20GHz, using XtremIO storage via 4G FC HBA and 4 paths to
> storage.
>
> $ export SOCK=/tmp/nbd.sock
> $ export
> BLOCK=/dev/e30bfac2-8e13-479d-8cd6-c6da5e306c4e/c9864222-bc52-4359-80d7-76e47d619b15
>
> $ src/nbdkit -f plugins/file/.libs/nbdkit-file-plugin.so file=$BLOCK -U
> $SOCK
>
> $ time qemu-img convert -n -f raw -O raw /var/tmp/fedora-27.img
> nbd:unix:$SOCK
>
> real 0m2.741s
> user 0m0.224s
> sys 0m0.634s
>
> $ time qemu-img convert -n -f raw -O raw -W /var/tmp/fedora-27.img
> nbd:unix:$SOCK
>
> real 0m1.920s
> user 0m0.163s
> sys 0m0.735s
>
> Issues:
> - ioctl(BLKZEROOUT) will fail if offset or count are not aligned to
> logical sector size. I'm not sure if nbdkit or qemu-img ensure this.
> - Need testing with NFS
> ---
> plugins/file/file.c | 126 ++++++++++++++++++++++++++++++++++++--------
> 1 file changed, 103 insertions(+), 23 deletions(-)
>
> diff --git a/plugins/file/file.c b/plugins/file/file.c
> index fb20622..bce2ed1 100644
> --- a/plugins/file/file.c
> +++ b/plugins/file/file.c
> @@ -33,6 +33,7 @@
>
> #include <config.h>
>
> +#include <stdbool.h>
> #include <stdio.h>
> #include <stdlib.h>
> #include <string.h>
> @@ -42,6 +43,8 @@
> #include <sys/stat.h>
> #include <errno.h>
> #include <linux/falloc.h> /* For FALLOC_FL_* on RHEL, glibc < 2.18 */
> +#include <sys/ioctl.h>
> +#include <linux/fs.h>
>
> #include <nbdkit-plugin.h>
>
> @@ -116,6 +119,10 @@ file_config_complete (void)
> /* The per-connection handle. */
> struct handle {
> int fd;
> + bool is_block_device;
> + bool can_punch_hole;
> + bool can_zero_range;
> + bool can_fallocate;
> };
>
> /* Create the per-connection handle. */
> @@ -123,6 +130,7 @@ static void *
> file_open (int readonly)
> {
> struct handle *h;
> + struct stat statbuf;
> int flags;
>
> h = malloc (sizeof *h);
> @@ -144,6 +152,23 @@ file_open (int readonly)
> return NULL;
> }
>
> + if (fstat (h->fd, &statbuf) == -1) {
> + nbdkit_error ("fstat: %s: %m", filename);
> + free (h);
> + return NULL;
> + }
> +
> + h->is_block_device = S_ISBLK(statbuf.st_mode);
> +
> + /* These flags will disabled if an operation is not supported. */
> +#ifdef FALLOC_FL_PUNCH_HOLE
> + h->can_punch_hole = true;
> +#endif
> +#ifdef FALLOC_FL_ZERO_RANGE
> + h->can_zero_range = true;
> +#endif
> + h->can_fallocate = true;
> +
> return h;
> }
>
> @@ -164,27 +189,29 @@ static int64_t
> file_get_size (void *handle)
> {
> struct handle *h = handle;
> - struct stat statbuf;
>
> - if (fstat (h->fd, &statbuf) == -1) {
> - nbdkit_error ("stat: %m");
> - return -1;
> - }
> -
> - if (S_ISBLK (statbuf.st_mode)) {
> + if (h->is_block_device) {
> + /* Block device, so st_size will not be the true size. */
> off_t size;
>
> - /* Block device, so st_size will not be the true size. */
> size = lseek (h->fd, 0, SEEK_END);
> if (size == -1) {
> nbdkit_error ("lseek (to find device size): %m");
> return -1;
> }
> +
> return size;
> - }
> + } else {
> + /* Regular file. */
> + struct stat statbuf;
> +
> + if (fstat (h->fd, &statbuf) == -1) {
> + nbdkit_error ("fstat: %m");
> + return -1;
> + }
>
> - /* Else regular file. */
> - return statbuf.st_size;
> + return statbuf.st_size;
> + }
> }
>
> static int
> @@ -250,33 +277,86 @@ file_pwrite (void *handle, const void *buf, uint32_t
> count, uint64_t offset)
> static int
> file_zero (void *handle, uint32_t count, uint64_t offset, int may_trim)
> {
> -#if defined(FALLOC_FL_PUNCH_HOLE) || defined(FALLOC_FL_ZERO_RANGE)
> struct handle *h = handle;
> -#endif
> int r = -1;
>
> #ifdef FALLOC_FL_PUNCH_HOLE
> - if (may_trim) {
> + /* If we can and may trim, punching hole is our best option. */
> + if (h->can_punch_hole && may_trim) {
> r = do_fallocate (h->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
> offset, count);
> - if (r == -1 && errno != EOPNOTSUPP) {
> + if (r == 0)
> + return 0;
> +
> + if (errno != EOPNOTSUPP) {
> nbdkit_error ("zero: %m");
> + return r;
> }
> - /* PUNCH_HOLE is older; if it is not supported, it is likely that
> - ZERO_RANGE will not work either, so fall back to write. */
> - return r;
> +
> + h->can_punch_hole = false;
> }
> #endif
>
> #ifdef FALLOC_FL_ZERO_RANGE
> - r = do_fallocate (h->fd, FALLOC_FL_ZERO_RANGE, offset, count);
> - if (r == -1 && errno != EOPNOTSUPP) {
> - nbdkit_error ("zero: %m");
> + /* ZERO_RANGE is not well supported yet, but it the next best option. */
> + if (h->can_zero_range) {
> + r = do_fallocate (h->fd, FALLOC_FL_ZERO_RANGE, offset, count);
> + if (r == 0)
> + return 0;
> +
> + if (errno != EOPNOTSUPP) {
> + nbdkit_error ("zero: %m");
> + return r;
> + }
> +
> + h->can_zero_range = false;
> }
> -#else
> +#endif
> +
> +#ifdef FALLOC_FL_PUNCH_HOLE
> + /* If we can punch hole but may not trim, we can combine punching hole
> and
> + fallocate to zero a range. This is much more efficient than writing
> zeros
> + manually. */
> + if (h->can_punch_hole && h->can_fallocate) {
> + r = do_fallocate (h->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
> + offset, count);
> + if (r == 0) {
> + r = do_fallocate(h->fd, 0, offset, count);
> + if (r == 0)
> + return 0;
> +
> + if (errno != EOPNOTSUPP) {
> + nbdkit_error ("zero: %m");
> + return r;
> + }
> +
> + h->can_fallocate = false;
> + } else {
> + if (errno != EOPNOTSUPP) {
> + nbdkit_error ("zero: %m");
> + return r;
> + }
> +
> + h->can_punch_hole = false;
> + }
> + }
> +#endif
> +
> + /* For block devices, we can use BLKZEROOUT.
> + NOTE: count and offset must be aligned to logical block size. */
> + if (h->is_block_device) {
> + uint64_t range[2] = {offset, count};
> +
> + r = ioctl(h->fd, BLKZEROOUT, &range);
> + if (r == 0)
> + return 0;
> +
> + nbdkit_error("zero: %m");
> + return r;
> + }
> +
> /* Trigger a fall back to writing */
> errno = EOPNOTSUPP;
> -#endif
>
> return r;
> }
> --
> 2.17.1
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://listman.redhat.com/archives/libguestfs/attachments/20180802/52095a36/attachment.htm>
More information about the Libguestfs
mailing list