[Libguestfs] [PATCH POSSIBLE DATA CORRUPTOR] daemon: Run fsync on block devices after sync (RHBZ#836710).

Richard W.M. Jones rjones at redhat.com
Mon Jul 2 20:24:09 UTC 2012


This is by no means a complete fix for RHBZ#836710, because
I haven't completely got to the bottom of this bug yet, but
it is an improvement.

Rich.

-- 
Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones
New in Fedora 11: Fedora Windows cross-compiler. Compile Windows
programs, test, and build Windows installers. Over 70 libraries supprt'd
http://fedoraproject.org/wiki/MinGW http://www.annexia.org/fedora_mingw
-------------- next part --------------
>From c0a3c9ce70b98171e737e49e6dccc4457963f2ec Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones at redhat.com>
Date: Mon, 2 Jul 2012 16:38:19 +0100
Subject: [PATCH] daemon: Run fsync on block devices after sync (RHBZ#836710).

On Linux, sync(2) does not actually issue a write barrier, thus it
doesn't force a flush of the underlying hardware write cache (or
qemu's disk cache in the virtual case).

This can be a problem, because libguestfs relies on running sync in
the appliance, followed by killing qemu (using SIGTERM).

In most cases, this is fine, because killing qemu with SIGTERM should
cause it to flush out the disk cache before it exits.  However we have
found various bugs in qemu which cause qemu to crash while doing the
flush, leaving the data unwritten (see RHBZ#836913).

The solution is to issue fsync(2) to the block devices.  This has a
write barrier, so it ensures that qemu writes out its cache long
before we get around to killing qemu.
---
 configure.ac  |    1 +
 daemon/sync.c |   78 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 79 insertions(+)

diff --git a/configure.ac b/configure.ac
index c37b7f5..e4b3e70 100644
--- a/configure.ac
+++ b/configure.ac
@@ -204,6 +204,7 @@ AC_CHECK_HEADERS([\
 
 dnl Functions.
 AC_CHECK_FUNCS([\
+        fsync \
         futimens \
         getxattr \
         htonl \
diff --git a/daemon/sync.c b/daemon/sync.c
index fcb887e..2338a3d 100644
--- a/daemon/sync.c
+++ b/daemon/sync.c
@@ -23,7 +23,11 @@
 #endif
 
 #include <stdio.h>
+#include <stdlib.h>
 #include <unistd.h>
+#include <fcntl.h>
+#include <dirent.h>
+#include <sys/types.h>
 
 #include "daemon.h"
 #include "actions.h"
@@ -32,6 +36,10 @@
 static int sync_win32 (void);
 #endif
 
+#ifdef HAVE_FSYNC
+static void fsync_devices (void);
+#endif
+
 int
 do_sync (void)
 {
@@ -52,6 +60,18 @@ sync_disks (void)
 {
 #if defined(HAVE_SYNC)
   sync ();
+
+  /* On Linux, sync(2) doesn't perform a barrier, so qemu (which may
+   * have a writeback cache, even with cache=none) will still have
+   * some unwritten data.  Force the data out of any qemu caches, by
+   * calling fsync on all block devices.  Note we still need the
+   * call to sync above in order to schedule the writes.
+   * Thanks to: Avi Kivity, Kevin Wolf.
+   */
+#ifdef HAVE_FSYNC
+  fsync_devices ();
+#endif
+
   return 0;
 #elif defined(WIN32)
   return sync_win32 ();
@@ -60,6 +80,64 @@ sync_disks (void)
 #endif
 }
 
+#ifdef HAVE_FSYNC
+static void
+fsync_devices (void)
+{
+  DIR *dir;
+  struct dirent *d;
+  char dev_path[256];
+  int fd;
+
+  dir = opendir ("/sys/block");
+  if (!dir) {
+    perror ("opendir: /sys/block");
+    return;
+  }
+
+  for (;;) {
+    errno = 0;
+    d = readdir(dir);
+    if (!d) break;
+
+    if (STREQLEN (d->d_name, "sd", 2) ||
+        STREQLEN (d->d_name, "hd", 2) ||
+        STREQLEN (d->d_name, "vd", 2) ||
+        STREQLEN (d->d_name, "sr", 2)) {
+      snprintf (dev_path, sizeof dev_path, "/dev/%s", d->d_name);
+
+      /* Ignore the root device. */
+      if (is_root_device (dev_path))
+        continue;
+
+      fd = open (dev_path, O_RDONLY|O_CLOEXEC);
+      if (fd == -1) {
+        perror (dev_path);
+        continue;
+      }
+
+      /* fsync the device. */
+      if (verbose)
+        fprintf (stderr, "fsync %s\n", dev_path);
+
+      if (fsync (fd) == -1)
+        perror ("fsync");
+
+      if (close (fd) == -1)
+        perror ("close");
+    }
+  }
+
+  /* Check readdir didn't fail */
+  if (errno != 0)
+    perror ("readdir: /sys/block");
+
+  /* Close the directory handle */
+  if (closedir (dir) == -1)
+    perror ("closedir");
+}
+#endif /* HAVE_FSYNC */
+
 #ifdef WIN32
 static int
 sync_win32 (void)
-- 
1.7.10.4



More information about the Libguestfs mailing list