[libvirt] [PATCHv2 12/16] save: add virFileDirectFd wrapper type

Eric Blake eblake at redhat.com
Wed Jul 20 04:20:35 UTC 2011


O_DIRECT has stringent requirements.  Rather than make lots of changes
at each site that wants to use O_DIRECT, it is easier to offload
the work through a helper process that mirrors the I/O between a
pipe and the actual direct fd, so that the other end of the pipe
no longer has to worry about constraints.

Plus, if the kernel ever gains better posix_fadvise support, then we
only have to touch a single file to let all callers benefit from a
more efficient way to avoid file system caching.

* src/util/virfile.h (virFileDirectFdFlag, virFileDirectFdNew)
(virFileDirectFdClose, virFileDirectFdFree): New prototypes.
* src/util/virdirect.c: Implement new wrapper object.
* src/libvirt_private.syms (virfile.h): Export new symbols.
* cfg.mk (useless_free_options): Add to list.
* po/POTFILES.in: Add new translations.
---

v2: move everything into virfile.c rather than creating a new file,
with renames everywhere.  Fail rather than silently ignore systems
where O_DIRECT is 0.  Add comments to take advantage of posix_fadvise
rather than O_DIRECT if the Linux kernel ever gets fixed.  Add
virFileDirectFdFlag as a better probe of which cache-bypass mechanism
is actually being used.  Better documentation.

 cfg.mk                   |    1 +
 po/POTFILES.in           |    1 +
 src/libvirt_private.syms |    4 +
 src/util/virfile.c       |  175 +++++++++++++++++++++++++++++++++++++++++++++-
 src/util/virfile.h       |   15 ++++
 5 files changed, 195 insertions(+), 1 deletions(-)

diff --git a/cfg.mk b/cfg.mk
index 0a624f1..773a3df 100644
--- a/cfg.mk
+++ b/cfg.mk
@@ -119,6 +119,7 @@ useless_free_options =				\
   --name=virDomainSoundDefFree			\
   --name=virDomainVideoDefFree			\
   --name=virDomainWatchdogDefFree		\
+  --name=virFileDirectFdFree			\
   --name=virHashFree				\
   --name=virInterfaceDefFree			\
   --name=virInterfaceIpDefFree			\
diff --git a/po/POTFILES.in b/po/POTFILES.in
index 5782cbf..7bb08b0 100644
--- a/po/POTFILES.in
+++ b/po/POTFILES.in
@@ -122,6 +122,7 @@ src/util/storage_file.c
 src/util/sysinfo.c
 src/util/util.c
 src/util/viraudit.c
+src/util/virfile.c
 src/util/virterror.c
 src/util/xml.c
 src/vbox/vbox_MSCOMGlue.c
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index d42e0a0..0bab64d 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -1082,6 +1082,10 @@ virAuditSend;

 # virfile.h
 virFileClose;
+virFileDirectFdClose;
+virFileDirectFdFlag;
+virFileDirectFdFree;
+virFileDirectFdNew;
 virFileFclose;
 virFileFdopen;

diff --git a/src/util/virfile.c b/src/util/virfile.c
index 6576921..8b32518 100644
--- a/src/util/virfile.c
+++ b/src/util/virfile.c
@@ -23,10 +23,24 @@
  */

 #include <config.h>
+#include "internal.h"

+#include "virfile.h"
+
+#include <fcntl.h>
+#include <sys/stat.h>
 #include <unistd.h>

-#include "virfile.h"
+#include "command.h"
+#include "configmake.h"
+#include "memory.h"
+#include "virterror_internal.h"
+
+#define VIR_FROM_THIS VIR_FROM_NONE
+#define virFileError(code, ...)                                   \
+    virReportErrorHelper(VIR_FROM_THIS, code, __FILE__,           \
+                         __FUNCTION__, __LINE__, __VA_ARGS__)
+

 int virFileClose(int *fdptr, bool preserve_errno)
 {
@@ -78,3 +92,162 @@ FILE *virFileFdopen(int *fdptr, const char *mode)

     return file;
 }
+
+
+/* Opaque type for managing a wrapper around an O_DIRECT fd.  For now,
+ * read-write is not supported, just a single direction.  */
+struct _virFileDirectFd {
+    virCommandPtr cmd; /* Child iohelper process to do the I/O.  */
+};
+
+/**
+ * virFileDirectFdFlag:
+ *
+ * Returns 0 if the kernel can avoid file system cache pollution
+ * without any additional flags, O_DIRECT if the original fd must be
+ * opened in direct mode, or -1 if there is no support for bypassing
+ * the file system cache.
+ */
+int
+virFileDirectFdFlag(void)
+{
+    /* XXX For now, Linux posix_fadvise is not powerful enough to
+     * avoid O_DIRECT.  */
+    return O_DIRECT ? O_DIRECT : -1;
+}
+
+/**
+ * virFileDirectFdNew:
+ * @fd: pointer to fd to wrap
+ * @name: name of fd, for diagnostics
+ *
+ * Update *FD (created with virFileDirectFdFlag() among the flags to
+ * open()) to ensure that all I/O to that file will bypass the system
+ * cache.  This must be called after open() and optional fchown() or
+ * fchmod(), but before any seek or I/O, and only on seekable fd.  The
+ * file must be O_RDONLY (to read the entire existing file) or
+ * O_WRONLY (to write to an empty file).  In some cases, *FD is
+ * changed to a non-seekable pipe; in this case, the caller must not
+ * do anything further with the original fd.
+ *
+ * On success, the new wrapper object is returned, which must be later
+ * freed with virFileDirectFdFree().  On failure, *FD is unchanged, an
+ * error message is output, and NULL is returned.
+ */
+virFileDirectFdPtr
+virFileDirectFdNew(int *fd, const char *name)
+{
+    virFileDirectFdPtr ret = NULL;
+    bool output = false;
+    int pipefd[2] = { -1, -1 };
+    int mode = -1;
+
+    /* XXX support posix_fadvise rather than spawning a child, if the
+     * kernel support for that is decent enough.  */
+
+    if (!O_DIRECT) {
+        virFileError(VIR_ERR_INTERNAL_ERROR, "%s",
+                     _("O_DIRECT unsupported on this platform"));
+        return NULL;
+    }
+
+    if (VIR_ALLOC(ret) < 0) {
+        virReportOOMError();
+        return NULL;
+    }
+
+#ifdef F_GETFL
+    /* Mingw lacks F_GETFL, but it also lacks O_DIRECT so didn't get
+     * here in the first place.  All other platforms reach this
+     * line.  */
+    mode = fcntl(*fd, F_GETFL);
+#endif
+
+    if (mode < 0) {
+        virFileError(VIR_ERR_INTERNAL_ERROR, _("invalid fd %d for %s"),
+                     *fd, name);
+        goto error;
+    } else if ((mode & O_ACCMODE) == O_WRONLY) {
+        output = true;
+    } else if ((mode & O_ACCMODE) != O_RDONLY) {
+        virFileError(VIR_ERR_INTERNAL_ERROR, _("unexpected mode %x for %s"),
+                     mode & O_ACCMODE, name);
+        goto error;
+    }
+
+    if (pipe2(pipefd, O_CLOEXEC) < 0) {
+        virFileError(VIR_ERR_INTERNAL_ERROR,
+                     _("unable to create pipe for %s"), name);
+        goto error;
+    }
+
+    ret->cmd = virCommandNewArgList(LIBEXECDIR "/libvirt_iohelper",
+                                    name, "0", NULL);
+    if (output) {
+        virCommandSetInputFD(ret->cmd, pipefd[0]);
+        virCommandSetOutputFD(ret->cmd, fd);
+        virCommandAddArg(ret->cmd, "1");
+    } else {
+        virCommandSetInputFD(ret->cmd, *fd);
+        virCommandSetOutputFD(ret->cmd, &pipefd[1]);
+        virCommandAddArg(ret->cmd, "0");
+    }
+
+    if (virCommandRunAsync(ret->cmd, NULL) < 0)
+        goto error;
+
+    if (VIR_CLOSE(pipefd[!output]) < 0) {
+        virFileError(VIR_ERR_INTERNAL_ERROR, "%s", _("unable to close pipe"));
+        goto error;
+    }
+
+    VIR_FORCE_CLOSE(*fd);
+    *fd = pipefd[output];
+    return ret;
+
+error:
+    VIR_FORCE_CLOSE(pipefd[0]);
+    VIR_FORCE_CLOSE(pipefd[1]);
+    virFileDirectFdFree(ret);
+    return NULL;
+}
+
+/**
+ * virFileDirectFdClose:
+ * @dfd: direct fd wrapper, or NULL
+ *
+ * If DFD is valid, then ensure that I/O has completed, which may
+ * include reaping a child process.  Return 0 if all data for the
+ * wrapped fd is complete, or -1 on failure with an error emitted.
+ * This function intentionally returns 0 when DFD is NULL, so that
+ * callers can conditionally create a virFileDirectFd wrapper but
+ * unconditionally call the cleanup code.  To avoid deadlock, only
+ * call this after closing the fd resulting from virFileDirectFdNew().
+ */
+int
+virFileDirectFdClose(virFileDirectFdPtr dfd)
+{
+    if (!dfd)
+        return 0;
+
+    return virCommandWait(dfd->cmd, NULL);
+}
+
+/**
+ * virFileDirectFdFree:
+ * @dfd: direct fd wrapper, or NULL
+ *
+ * Free all remaining resources associated with DFD.  If
+ * virFileDirectFdClose() was not previously called, then this may
+ * discard some previous I/O.  To avoid deadlock, only call this after
+ * closing the fd resulting from virFileDirectFdNew().
+ */
+void
+virFileDirectFdFree(virFileDirectFdPtr dfd)
+{
+    if (!dfd)
+        return;
+
+    virCommandFree(dfd->cmd);
+    VIR_FREE(dfd);
+}
diff --git a/src/util/virfile.h b/src/util/virfile.h
index d11f902..0906568 100644
--- a/src/util/virfile.h
+++ b/src/util/virfile.h
@@ -50,4 +50,19 @@ FILE *virFileFdopen(int *fdptr, const char *mode) ATTRIBUTE_RETURN_CHECK;
 # define VIR_FORCE_CLOSE(FD) ignore_value(virFileClose(&(FD), true))
 # define VIR_FORCE_FCLOSE(FILE) ignore_value(virFileFclose(&(FILE), true))

+/* Opaque type for managing a wrapper around an O_DIRECT fd.  */
+struct _virFileDirectFd;
+
+typedef struct _virFileDirectFd virFileDirectFd;
+typedef virFileDirectFd *virFileDirectFdPtr;
+
+int virFileDirectFdFlag(void);
+
+virFileDirectFdPtr virFileDirectFdNew(int *fd, const char *name)
+    ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2) ATTRIBUTE_RETURN_CHECK;
+
+int virFileDirectFdClose(virFileDirectFdPtr dfd);
+
+void virFileDirectFdFree(virFileDirectFdPtr dfd);
+
 #endif /* __VIR_FILES_H */
-- 
1.7.4.4




More information about the libvir-list mailing list