[Libguestfs] [PATCH libnbd] copy: Set POSIX_FADV_SEQUENTIAL and POSIX_FADV_DONTNEED.

Richard W.M. Jones rjones at redhat.com
Fri Feb 19 17:56:58 UTC 2021


Set POSIX_FADV_SEQUENTIAL flag on the file when copying to or from a
local file.  This tells the operating system that we intend to read or
write sequentially and to optimize for that case.

Avoid polluting the page cache when copying to or from a local file or
block device by calling POSIX_FADV_DONTNEED.  This is similar to what
we do in nbdkit-file-plugin:
https://gitlab.com/nbdkit/nbdkit/-/commit/aa5a2183a6d16afd919174b5ba8222b2bccf4039

Before this change:

$ cachedel /var/tmp/random
$ cachestats /var/tmp/random
pages in cache: 0/8388608 (0.0%)  [filesize=33554432.0K, pagesize=4K]
$ free -m; time ./run nbdcopy /var/tmp/random null: ; free -m ; cachestats /var/tmp/random
              total        used        free      shared  buff/cache   available
Mem:          32080        1069       14432           3       16578       30550
Swap:         16135          86       16049

real	0m21.484s
user	0m2.275s
sys	0m25.837s
              total        used        free      shared  buff/cache   available
Mem:          32080        1041         297           3       30741       30581
Swap:         16135          86       16049
pages in cache: 4971577/8388608 (59.3%)  [filesize=33554432.0K, pagesize=4K]

After this change:

$ cachedel /var/tmp/random
$ free -m; time ./run nbdcopy /var/tmp/random null: ; free -m ; cachestats /var/tmp/random
              total        used        free      shared  buff/cache   available
Mem:          32080        1071       14431           3       16578       30548
Swap:         16135          86       16049

real	0m22.423s
user	0m2.180s
sys	0m31.059s
              total        used        free      shared  buff/cache   available
Mem:          32080        1053       14238           3       16789       30566
Swap:         16135          86       16049
pages in cache: 53888/8388608 (0.6%)  [filesize=33554432.0K, pagesize=4K]
---
 configure.ac    |  3 +++
 copy/file-ops.c | 17 +++++++++++++++--
 copy/main.c     | 11 +++++++++++
 3 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/configure.ac b/configure.ac
index 58a7724..b36ae09 100644
--- a/configure.ac
+++ b/configure.ac
@@ -79,6 +79,9 @@ AC_CHECK_HEADERS([\
 
 AC_CHECK_HEADERS([linux/vm_sockets.h], [], [], [#include <sys/socket.h>])
 
+dnl posix_fadvise helps to optimise linear reads and writes (optional).
+AC_CHECK_FUNCS([posix_fadvise])
+
 dnl Check for strerrordesc_np (optional, glibc only).
 dnl Prefer this over sys_errlist.
 dnl https://lists.fedoraproject.org/archives/list/glibc@lists.fedoraproject.org/thread/WJHGG2OO7ABNAYICGA5WQZ2Q34Q2FEHU/
diff --git a/copy/file-ops.c b/copy/file-ops.c
index 2a239d0..1310f08 100644
--- a/copy/file-ops.c
+++ b/copy/file-ops.c
@@ -60,11 +60,12 @@ static size_t
 file_synch_read (struct rw *rw,
                  void *data, size_t len, uint64_t offset)
 {
+  int fd = rw->u.local.fd;
   size_t n = 0;
   ssize_t r;
 
   while (len > 0) {
-    r = pread (rw->u.local.fd, data, len, offset);
+    r = pread (fd, data, len, offset);
     if (r == -1) {
       perror (rw->name);
       exit (EXIT_FAILURE);
@@ -72,6 +73,11 @@ file_synch_read (struct rw *rw,
     if (r == 0)
       return n;
 
+#if defined (HAVE_POSIX_FADVISE) && defined (POSIX_FADV_DONTNEED)
+    /* On Linux this will evict the pages we just read from the page cache. */
+    posix_fadvise (fd, offset, r, POSIX_FADV_DONTNEED);
+#endif
+
     data += r;
     offset += r;
     len -= r;
@@ -85,14 +91,21 @@ static void
 file_synch_write (struct rw *rw,
                   const void *data, size_t len, uint64_t offset)
 {
+  int fd = rw->u.local.fd;
   ssize_t r;
 
   while (len > 0) {
-    r = pwrite (rw->u.local.fd, data, len, offset);
+    r = pwrite (fd, data, len, offset);
     if (r == -1) {
       perror (rw->name);
       exit (EXIT_FAILURE);
     }
+
+#if defined (HAVE_POSIX_FADVISE) && defined (POSIX_FADV_DONTNEED)
+    /* On Linux this will evict the pages we just wrote from the page cache. */
+    posix_fadvise (fd, offset, r, POSIX_FADV_DONTNEED);
+#endif
+
     data += r;
     offset += r;
     len -= r;
diff --git a/copy/main.c b/copy/main.c
index 68a6030..502ad28 100644
--- a/copy/main.c
+++ b/copy/main.c
@@ -460,6 +460,15 @@ open_null (struct rw *rw)
   rw->size = INT64_MAX;
 }
 
+/* Set the POSIX_FADV_SEQUENTIAL flag on a file descriptor, but don't fail. */
+static inline void
+fadvise_sequential (int fd)
+{
+#if defined (HAVE_POSIX_FADVISE) && defined (POSIX_FADV_SEQUENTIAL)
+  posix_fadvise (fd, 0, 0, POSIX_FADV_SEQUENTIAL);
+#endif
+}
+
 /* Open a local (non-NBD) file, ie. a file, device, or "-" for stdio.
  * Returns the open file descriptor which the caller must close.
  *
@@ -524,12 +533,14 @@ open_local (const char *prog,
     if (ioctl (fd, BLKSSZGET, &rw->u.local.sector_size))
       fprintf (stderr, "warning: cannot get sector size: %s: %m", rw->name);
 #endif
+    fadvise_sequential (fd);
   }
   else if (S_ISREG (rw->u.local.stat.st_mode)) {
     /* Regular file. */
     rw->ops = &file_ops;
     rw->size = rw->u.local.stat.st_size;
     rw->u.local.seek_hole_supported = seek_hole_supported (fd);
+    fadvise_sequential (fd);
   }
   else {
     /* Probably stdin/stdout, a pipe or a socket.  Set size == -1
-- 
2.29.0.rc2




More information about the Libguestfs mailing list