<div dir="ltr">Oops, please ignore this, this was already sent and reviewed here:<div><a href="https://www.redhat.com/archives/libguestfs/2018-July/msg00084.html">https://www.redhat.com/archives/libguestfs/2018-July/msg00084.html</a></div><div><br></div><div>The patch was hiding in my tree and selected by a careless glob :-)<br><br><div class="gmail_quote"><div dir="ltr">On Thu, Aug 2, 2018 at 10:06 PM Nir Soffer <<a href="mailto:nirsof@gmail.com">nirsof@gmail.com</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">If we may not trim, we tried ZERO_RANGE, but this is not well supported<br>
yet, for example it is not available on NFS 4.2. ZERO_RANGE and<br>
PUNCH_HOLE are supported now on block devices, but not on RHRL 7, so we<br>
fallback to slow manual zeroing there.<br>
<br>
Change the logic to support block devices on RHEL 7, and file systems<br>
that do not support ZERO_RANGE.<br>
<br>
The new logic:<br>
- If we may trim, try PUNCH_HOLE<br>
- If we can zero range, Try ZERO_RANGE<br>
- If we can punch hole and fallocate, try fallocate(PUNCH_HOLE) followed<br>
by fallocate(0).<br>
- If underlying file is a block device, try ioctl(BLKZEROOUT)<br>
- Otherwise fallback to manual zeroing<br>
<br>
The handle keeps now the underlying file capabilities, so once we<br>
discover that an operation is not supported, we never try it again.<br>
<br>
Here are examples runs on a server based on Intel(R) Xeon(R) CPU E5-2630<br>
v4 @ 2.20GHz, using XtremIO storage via 4G FC HBA and 4 paths to<br>
storage.<br>
<br>
$ export SOCK=/tmp/nbd.sock<br>
$ export BLOCK=/dev/e30bfac2-8e13-479d-8cd6-c6da5e306c4e/c9864222-bc52-4359-80d7-76e47d619b15<br>
<br>
$ src/nbdkit -f plugins/file/.libs/nbdkit-file-plugin.so file=$BLOCK -U $SOCK<br>
<br>
$ time qemu-img convert -n -f raw -O raw /var/tmp/fedora-27.img nbd:unix:$SOCK<br>
<br>
real 0m2.741s<br>
user 0m0.224s<br>
sys 0m0.634s<br>
<br>
$ time qemu-img convert -n -f raw -O raw -W /var/tmp/fedora-27.img nbd:unix:$SOCK<br>
<br>
real 0m1.920s<br>
user 0m0.163s<br>
sys 0m0.735s<br>
<br>
Issues:<br>
- ioctl(BLKZEROOUT) will fail if offset or count are not aligned to<br>
logical sector size. I'm not sure if nbdkit or qemu-img ensure this.<br>
- Need testing with NFS<br>
---<br>
plugins/file/file.c | 126 ++++++++++++++++++++++++++++++++++++--------<br>
1 file changed, 103 insertions(+), 23 deletions(-)<br>
<br>
diff --git a/plugins/file/file.c b/plugins/file/file.c<br>
index fb20622..bce2ed1 100644<br>
--- a/plugins/file/file.c<br>
+++ b/plugins/file/file.c<br>
@@ -33,6 +33,7 @@<br>
<br>
#include <config.h><br>
<br>
+#include <stdbool.h><br>
#include <stdio.h><br>
#include <stdlib.h><br>
#include <string.h><br>
@@ -42,6 +43,8 @@<br>
#include <sys/stat.h><br>
#include <errno.h><br>
#include <linux/falloc.h> /* For FALLOC_FL_* on RHEL, glibc < 2.18 */<br>
+#include <sys/ioctl.h><br>
+#include <linux/fs.h><br>
<br>
#include <nbdkit-plugin.h><br>
<br>
@@ -116,6 +119,10 @@ file_config_complete (void)<br>
/* The per-connection handle. */<br>
struct handle {<br>
int fd;<br>
+ bool is_block_device;<br>
+ bool can_punch_hole;<br>
+ bool can_zero_range;<br>
+ bool can_fallocate;<br>
};<br>
<br>
/* Create the per-connection handle. */<br>
@@ -123,6 +130,7 @@ static void *<br>
file_open (int readonly)<br>
{<br>
struct handle *h;<br>
+ struct stat statbuf;<br>
int flags;<br>
<br>
h = malloc (sizeof *h);<br>
@@ -144,6 +152,23 @@ file_open (int readonly)<br>
return NULL;<br>
}<br>
<br>
+ if (fstat (h->fd, &statbuf) == -1) {<br>
+ nbdkit_error ("fstat: %s: %m", filename);<br>
+ free (h);<br>
+ return NULL;<br>
+ }<br>
+<br>
+ h->is_block_device = S_ISBLK(statbuf.st_mode);<br>
+<br>
+ /* These flags will disabled if an operation is not supported. */<br>
+#ifdef FALLOC_FL_PUNCH_HOLE<br>
+ h->can_punch_hole = true;<br>
+#endif<br>
+#ifdef FALLOC_FL_ZERO_RANGE<br>
+ h->can_zero_range = true;<br>
+#endif<br>
+ h->can_fallocate = true;<br>
+<br>
return h;<br>
}<br>
<br>
@@ -164,27 +189,29 @@ static int64_t<br>
file_get_size (void *handle)<br>
{<br>
struct handle *h = handle;<br>
- struct stat statbuf;<br>
<br>
- if (fstat (h->fd, &statbuf) == -1) {<br>
- nbdkit_error ("stat: %m");<br>
- return -1;<br>
- }<br>
-<br>
- if (S_ISBLK (statbuf.st_mode)) {<br>
+ if (h->is_block_device) {<br>
+ /* Block device, so st_size will not be the true size. */<br>
off_t size;<br>
<br>
- /* Block device, so st_size will not be the true size. */<br>
size = lseek (h->fd, 0, SEEK_END);<br>
if (size == -1) {<br>
nbdkit_error ("lseek (to find device size): %m");<br>
return -1;<br>
}<br>
+<br>
return size;<br>
- }<br>
+ } else {<br>
+ /* Regular file. */<br>
+ struct stat statbuf;<br>
+<br>
+ if (fstat (h->fd, &statbuf) == -1) {<br>
+ nbdkit_error ("fstat: %m");<br>
+ return -1;<br>
+ }<br>
<br>
- /* Else regular file. */<br>
- return statbuf.st_size;<br>
+ return statbuf.st_size;<br>
+ }<br>
}<br>
<br>
static int<br>
@@ -250,33 +277,86 @@ file_pwrite (void *handle, const void *buf, uint32_t count, uint64_t offset)<br>
static int<br>
file_zero (void *handle, uint32_t count, uint64_t offset, int may_trim)<br>
{<br>
-#if defined(FALLOC_FL_PUNCH_HOLE) || defined(FALLOC_FL_ZERO_RANGE)<br>
struct handle *h = handle;<br>
-#endif<br>
int r = -1;<br>
<br>
#ifdef FALLOC_FL_PUNCH_HOLE<br>
- if (may_trim) {<br>
+ /* If we can and may trim, punching hole is our best option. */<br>
+ if (h->can_punch_hole && may_trim) {<br>
r = do_fallocate (h->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,<br>
offset, count);<br>
- if (r == -1 && errno != EOPNOTSUPP) {<br>
+ if (r == 0)<br>
+ return 0;<br>
+<br>
+ if (errno != EOPNOTSUPP) {<br>
nbdkit_error ("zero: %m");<br>
+ return r;<br>
}<br>
- /* PUNCH_HOLE is older; if it is not supported, it is likely that<br>
- ZERO_RANGE will not work either, so fall back to write. */<br>
- return r;<br>
+<br>
+ h->can_punch_hole = false;<br>
}<br>
#endif<br>
<br>
#ifdef FALLOC_FL_ZERO_RANGE<br>
- r = do_fallocate (h->fd, FALLOC_FL_ZERO_RANGE, offset, count);<br>
- if (r == -1 && errno != EOPNOTSUPP) {<br>
- nbdkit_error ("zero: %m");<br>
+ /* ZERO_RANGE is not well supported yet, but it the next best option. */<br>
+ if (h->can_zero_range) {<br>
+ r = do_fallocate (h->fd, FALLOC_FL_ZERO_RANGE, offset, count);<br>
+ if (r == 0)<br>
+ return 0;<br>
+<br>
+ if (errno != EOPNOTSUPP) {<br>
+ nbdkit_error ("zero: %m");<br>
+ return r;<br>
+ }<br>
+<br>
+ h->can_zero_range = false;<br>
}<br>
-#else<br>
+#endif<br>
+<br>
+#ifdef FALLOC_FL_PUNCH_HOLE<br>
+ /* If we can punch hole but may not trim, we can combine punching hole and<br>
+ fallocate to zero a range. This is much more efficient than writing zeros<br>
+ manually. */<br>
+ if (h->can_punch_hole && h->can_fallocate) {<br>
+ r = do_fallocate (h->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,<br>
+ offset, count);<br>
+ if (r == 0) {<br>
+ r = do_fallocate(h->fd, 0, offset, count);<br>
+ if (r == 0)<br>
+ return 0;<br>
+<br>
+ if (errno != EOPNOTSUPP) {<br>
+ nbdkit_error ("zero: %m");<br>
+ return r;<br>
+ }<br>
+<br>
+ h->can_fallocate = false;<br>
+ } else {<br>
+ if (errno != EOPNOTSUPP) {<br>
+ nbdkit_error ("zero: %m");<br>
+ return r;<br>
+ }<br>
+<br>
+ h->can_punch_hole = false;<br>
+ }<br>
+ }<br>
+#endif<br>
+<br>
+ /* For block devices, we can use BLKZEROOUT.<br>
+ NOTE: count and offset must be aligned to logical block size. */<br>
+ if (h->is_block_device) {<br>
+ uint64_t range[2] = {offset, count};<br>
+<br>
+ r = ioctl(h->fd, BLKZEROOUT, &range);<br>
+ if (r == 0)<br>
+ return 0;<br>
+<br>
+ nbdkit_error("zero: %m");<br>
+ return r;<br>
+ }<br>
+<br>
/* Trigger a fall back to writing */<br>
errno = EOPNOTSUPP;<br>
-#endif<br>
<br>
return r;<br>
}<br>
-- <br>
2.17.1<br>
<br>
</blockquote></div></div></div>