[libvirt RFC 3/5] qemu: saveimage: rework image read/write to be O_DIRECT friendly

Claudio Fontana cfontana at suse.de
Tue May 24 10:49:52 UTC 2022


maintain a compatible image format (still QEMU_SAVE_VERSION 2),
but change the on-disk representation to be more O_DIRECT friendly:

1) ensure that the header struct fields are packed, so we can be sure
   no padding will ever ruin the day in the future

2) finish the libvirt header (header + xml + cookie) with zero padding,
   in order to ensure that the QEMU VM (QEVM Magic) is aligned.

Adapt the read and write of the libvirt header accordingly.

The old images should be still loadable after this change,
and the new images should be still loadable in old libvirt as well.

Signed-off-by: Claudio Fontana <cfontana at suse.de>
---
 src/qemu/qemu_saveimage.c | 267 ++++++++++++++++++++++++--------------
 src/qemu/qemu_saveimage.h |  20 +--
 2 files changed, 182 insertions(+), 105 deletions(-)

diff --git a/src/qemu/qemu_saveimage.c b/src/qemu/qemu_saveimage.c
index 4fd4c5cfcd..7262c598f8 100644
--- a/src/qemu/qemu_saveimage.c
+++ b/src/qemu/qemu_saveimage.c
@@ -139,12 +139,12 @@ virQEMUSaveDataWrite(virQEMUSaveData *data,
                      int fd,
                      const char *path)
 {
+    g_autofree void *base = NULL;
+    char *buf, *cur;
     virQEMUSaveHeader *header = &data->header;
     size_t len;
     size_t xml_len;
     size_t cookie_len = 0;
-    size_t zerosLen = 0;
-    g_autofree char *zeros = NULL;
 
     xml_len = strlen(data->xml) + 1;
     if (data->cookie)
@@ -165,42 +165,190 @@ virQEMUSaveDataWrite(virQEMUSaveData *data,
             return -1;
         }
     }
+    /* align data_len */
+    len = sizeof(*header) + header->data_len;
+    header->data_len += virFileDirectAlign(len) - len;
 
-    zerosLen = header->data_len - len;
-    zeros = g_new0(char, zerosLen);
+    buf = virFileDirectBufferNew(&base, sizeof(*header) + header->data_len);
+    cur = buf;
 
     if (data->cookie)
         header->cookieOffset = xml_len;
 
-    if (safewrite(fd, header, sizeof(*header)) != sizeof(*header)) {
-        virReportSystemError(errno,
-                             _("failed to write header to domain save file '%s'"),
-                             path);
-        return -1;
+    memcpy(cur, header, sizeof(*header));
+    cur += sizeof(*header);
+    memcpy(cur, data->xml, xml_len);
+    cur += xml_len;
+    if (data->cookie) {
+        memcpy(cur, data->cookie, cookie_len);
+        cur += cookie_len;
     }
 
-    if (safewrite(fd, data->xml, xml_len) != xml_len) {
+    if (virFileDirectWriteLim(fd, buf, sizeof(*header) + header->data_len) < 0) {
         virReportSystemError(errno,
-                             _("failed to write domain xml to '%s'"),
+                             _("failed to write libvirt header of domain save file '%s'"),
                              path);
         return -1;
     }
 
-    if (data->cookie &&
-        safewrite(fd, data->cookie, cookie_len) != cookie_len) {
+    return 0;
+}
+
+/* virQEMUSaveDataRead:
+ *
+ * Reads libvirt's header (including domain XML) from a saved image.
+ *
+ * Returns -1 on generic failure, -3 on a corrupted image, or 0 on success.
+ */
+int
+virQEMUSaveDataRead(virQEMUSaveData *data,
+                    int fd,
+                    const char *path)
+{
+    g_autofree void *base = NULL;
+    virQEMUSaveHeader *header = &data->header;
+    size_t xml_len;
+    size_t cookie_len;
+    ssize_t rv;
+    void *dst;
+    int oflags = fcntl(fd, F_GETFL);
+    bool isDirect = O_DIRECT && (oflags & O_DIRECT);
+    size_t buflen = 1024 * 1024;
+    char *buf = NULL;
+    void *src = NULL;
+
+    header = &data->header;
+    if (isDirect) {
+        buf = virFileDirectBufferNew(&base, buflen);
+        src = buf;
+        rv = virFileDirectReadCopy(fd, &src, buflen, header, sizeof(*header));
+    } else {
+        rv = saferead(fd, header, sizeof(*header));
+    }
+    if (rv < 0) {
         virReportSystemError(errno,
-                             _("failed to write cookie to '%s'"),
+                             _("failed to read libvirt header of domain save file '%s'"),
                              path);
         return -1;
     }
+    if (rv < sizeof(*header)) {
+        virReportError(VIR_ERR_OPERATION_FAILED,
+                       _("domain save file '%s' libvirt header appears truncated"),
+                       path);
+        return -3;
+    }
+    rv -= sizeof(*header);
 
-    if (safewrite(fd, zeros, zerosLen) != zerosLen) {
-        virReportSystemError(errno,
-                             _("failed to write padding to '%s'"),
-                             path);
+    if (memcmp(header->magic, QEMU_SAVE_MAGIC, sizeof(header->magic)) != 0) {
+        if (memcmp(header->magic, QEMU_SAVE_PARTIAL, sizeof(header->magic)) == 0) {
+            virReportError(VIR_ERR_OPERATION_FAILED,
+                           _("domain save file '%s' seems incomplete"),
+                           path);
+            return -3;
+        }
+        virReportError(VIR_ERR_OPERATION_FAILED, "%s",
+                       _("image magic is incorrect"));
+        return -1;
+    }
+    if (header->version > QEMU_SAVE_VERSION) {
+        /* convert endianness and try again */
+        qemuSaveImageBswapHeader(header);
+    }
+    if (header->version > QEMU_SAVE_VERSION) {
+        virReportError(VIR_ERR_OPERATION_FAILED,
+                       _("image version is not supported (%d > %d)"),
+                       header->version, QEMU_SAVE_VERSION);
         return -1;
     }
+    if (header->cookieOffset)
+        xml_len = header->cookieOffset;
+    else
+        xml_len = header->data_len;
 
+    if (xml_len <= 0) {
+        virReportError(VIR_ERR_OPERATION_FAILED,
+                       _("invalid xml length: %lu"), (unsigned long)xml_len);
+        return -1;
+    }
+    if (header->data_len < xml_len) {
+        virReportError(VIR_ERR_OPERATION_FAILED,
+                       _("invalid cookieOffset: %u"), header->cookieOffset);
+        return -1;
+    }
+    cookie_len = header->data_len - xml_len;
+    data->xml = g_new0(char, xml_len);
+    dst = data->xml;
+    if (rv > 0) {
+        if (isDirect) {
+            rv -= virFileDirectCopyBuf(&src, rv, &dst, &xml_len);
+        } else {
+            virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("unexpected leftover data in non-direct mode"));
+            return -1;
+        }
+    }
+    if (xml_len > 0) {
+        if (isDirect) {
+            rv = virFileDirectReadCopy(fd, &src, buflen, dst, xml_len);
+        } else {
+            rv = saferead(fd, dst, xml_len);
+        }
+        if (rv < 0) {
+            virReportSystemError(errno,
+                                 _("failed to read libvirt xml in domain save file '%s'"),
+                                 path);
+            return -1;
+        }
+        if (rv < xml_len) {
+            virReportError(VIR_ERR_OPERATION_FAILED,
+                           _("domain save file '%s' xml seems incomplete"),
+                           path);
+            return -3;
+        }
+        rv -= xml_len;
+    }
+    if (cookie_len > 0) {
+        data->cookie = g_new0(char, cookie_len);
+        dst = data->cookie;
+        if (rv > 0) {
+            if (isDirect) {
+                rv -= virFileDirectCopyBuf(&src, rv, &dst, &cookie_len);
+            } else {
+                virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("unexpected leftover data in non-direct mode"));
+                return -1;
+            }
+        }
+        if (cookie_len > 0) {
+            if (isDirect) {
+                rv = virFileDirectReadCopy(fd, &src, buflen, dst, cookie_len);
+            } else {
+                rv = saferead(fd, dst, cookie_len);
+            }
+            if (rv < 0) {
+                virReportSystemError(errno,
+                                     _("failed to read libvirt cookie in domain save file '%s'"),
+                                     path);
+                return -1;
+            }
+            if (rv < cookie_len) {
+                virReportError(VIR_ERR_OPERATION_FAILED,
+                               _("domain save file '%s' cookie seems incomplete"),
+                               path);
+                return -3;
+            }
+            rv -= cookie_len;
+        }
+    }
+    /*
+     * we should now be aligned and ready to read the QEVM.
+     */
+    if (rv > 0) {
+        if (isDirect) {
+            virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("unexpected unaligned data in direct mode"));
+        } else {
+            virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("unexpected leftover data in non-direct mode"));
+            return -1;
+        }
+    }
     return 0;
 }
 
@@ -444,11 +592,8 @@ qemuSaveImageOpen(virQEMUDriver *driver,
     VIR_AUTOCLOSE fd = -1;
     int ret = -1;
     g_autoptr(virQEMUSaveData) data = NULL;
-    virQEMUSaveHeader *header;
     g_autoptr(virDomainDef) def = NULL;
     int oflags = open_write ? O_RDWR : O_RDONLY;
-    size_t xml_len;
-    size_t cookie_len;
 
     if (bypass_cache) {
         int directFlag = virFileDirectFdFlag();
@@ -469,89 +614,17 @@ qemuSaveImageOpen(virQEMUDriver *driver,
         return -1;
 
     data = g_new0(virQEMUSaveData, 1);
-
-    header = &data->header;
-    if (saferead(fd, header, sizeof(*header)) != sizeof(*header)) {
-        if (unlink_corrupt) {
+    ret = virQEMUSaveDataRead(data, fd, path);
+    if (ret < 0) {
+        if (unlink_corrupt && ret == -3) {
             if (unlink(path) < 0) {
                 virReportSystemError(errno,
                                      _("cannot remove corrupt file: %s"),
                                      path);
                 return -1;
-            } else {
-                return -3;
             }
         }
-
-        virReportError(VIR_ERR_OPERATION_FAILED,
-                       "%s", _("failed to read qemu header"));
-        return -1;
-    }
-
-    if (memcmp(header->magic, QEMU_SAVE_MAGIC, sizeof(header->magic)) != 0) {
-        if (memcmp(header->magic, QEMU_SAVE_PARTIAL, sizeof(header->magic)) == 0) {
-            if (unlink_corrupt) {
-                if (unlink(path) < 0) {
-                    virReportSystemError(errno,
-                                         _("cannot remove corrupt file: %s"),
-                                         path);
-                    return -1;
-                } else {
-                    return -3;
-                }
-            }
-
-            virReportError(VIR_ERR_OPERATION_FAILED, "%s",
-                           _("save image is incomplete"));
-            return -1;
-        }
-
-        virReportError(VIR_ERR_OPERATION_FAILED, "%s",
-                       _("image magic is incorrect"));
-        return -1;
-    }
-
-    if (header->version > QEMU_SAVE_VERSION) {
-        /* convert endianness and try again */
-        qemuSaveImageBswapHeader(header);
-    }
-
-    if (header->version > QEMU_SAVE_VERSION) {
-        virReportError(VIR_ERR_OPERATION_FAILED,
-                       _("image version is not supported (%d > %d)"),
-                       header->version, QEMU_SAVE_VERSION);
-        return -1;
-    }
-
-    if (header->data_len <= 0) {
-        virReportError(VIR_ERR_OPERATION_FAILED,
-                       _("invalid header data length: %d"), header->data_len);
-        return -1;
-    }
-
-    if (header->cookieOffset)
-        xml_len = header->cookieOffset;
-    else
-        xml_len = header->data_len;
-
-    cookie_len = header->data_len - xml_len;
-
-    data->xml = g_new0(char, xml_len);
-
-    if (saferead(fd, data->xml, xml_len) != xml_len) {
-        virReportError(VIR_ERR_OPERATION_FAILED,
-                       "%s", _("failed to read domain XML"));
-        return -1;
-    }
-
-    if (cookie_len > 0) {
-        data->cookie = g_new0(char, cookie_len);
-
-        if (saferead(fd, data->cookie, cookie_len) != cookie_len) {
-            virReportError(VIR_ERR_OPERATION_FAILED, "%s",
-                           _("failed to read cookie"));
-            return -1;
-        }
+        return ret;
     }
 
     /* Create a domain from this XML */
diff --git a/src/qemu/qemu_saveimage.h b/src/qemu/qemu_saveimage.h
index 391cd55ed0..f12374b3d7 100644
--- a/src/qemu/qemu_saveimage.h
+++ b/src/qemu/qemu_saveimage.h
@@ -36,14 +36,14 @@ G_STATIC_ASSERT(sizeof(QEMU_SAVE_MAGIC) == sizeof(QEMU_SAVE_PARTIAL));
 
 typedef struct _virQEMUSaveHeader virQEMUSaveHeader;
 struct _virQEMUSaveHeader {
-    char magic[sizeof(QEMU_SAVE_MAGIC)-1];
-    uint32_t version;
-    uint32_t data_len;
-    uint32_t was_running;
-    uint32_t compressed;
-    uint32_t cookieOffset;
-    uint32_t unused[14];
-};
+    char magic[sizeof(QEMU_SAVE_MAGIC)-1]; /*   16 bytes */
+    uint32_t version;                      /*    4 bytes */
+    uint32_t data_len;                     /*    4 bytes */
+    uint32_t was_running;                  /*    4 bytes */
+    uint32_t compressed;                   /*    4 bytes */
+    uint32_t cookieOffset;                 /*    4 bytes */
+    uint32_t unused[14];                   /*   56 bytes */
+} ATTRIBUTE_PACKED;                        /* = 92 bytes */
 
 
 typedef struct _virQEMUSaveData virQEMUSaveData;
@@ -103,6 +103,10 @@ int
 virQEMUSaveDataWrite(virQEMUSaveData *data,
                      int fd,
                      const char *path);
+int
+virQEMUSaveDataRead(virQEMUSaveData *data,
+                    int fd,
+                    const char *path);
 
 virQEMUSaveData *
 virQEMUSaveDataNew(char *domXML,
-- 
2.26.2



More information about the libvir-list mailing list