[libvirt] [RFC v2 02/16] qemu: add memfd memory backing

marcandre.lureau at redhat.com marcandre.lureau at redhat.com
Tue Aug 28 21:39:20 UTC 2018


From: Marc-André Lureau <marcandre.lureau at redhat.com>

Add a new memoryBacking source type "memfd", support by QEMU (when the
capability is available).

Sealing is enabled by default in qemu, and hugepage is easier to
setup, which makes it often a better choice than memory-backend-file.

Signed-off-by: Marc-André Lureau <marcandre.lureau at redhat.com>
---
 docs/formatdomain.html.in                     |  8 ++-
 docs/schemas/domaincommon.rng                 |  1 +
 src/conf/domain_conf.c                        |  3 +-
 src/conf/domain_conf.h                        |  1 +
 src/qemu/qemu_command.c                       | 62 +++++++++++++------
 tests/qemuxml2argvdata/memfd-memory-numa.args | 27 ++++++++
 tests/qemuxml2argvdata/memfd-memory-numa.xml  | 33 ++++++++++
 tests/qemuxml2argvtest.c                      |  3 +
 8 files changed, 116 insertions(+), 22 deletions(-)
 create mode 100644 tests/qemuxml2argvdata/memfd-memory-numa.args
 create mode 100644 tests/qemuxml2argvdata/memfd-memory-numa.xml

diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in
index eb619a1656..ca656c9f7e 100644
--- a/docs/formatdomain.html.in
+++ b/docs/formatdomain.html.in
@@ -1099,7 +1099,7 @@
     </hugepages>
     <nosharepages/>
     <locked/>
-    <source type="file|anonymous"/>
+    <source type="file|anonymous|memfd"/>
     <access mode="shared|private"/>
     <allocation mode="immediate|ondemand"/>
     <discard/>
@@ -1150,8 +1150,10 @@
         suitable for the specific environment at the same time to mitigate
         the risks described above. <span class="since">Since 1.0.6</span></dd>
        <dt><code>source</code></dt>
-       <dd>In this attribute you can switch to file memorybacking or keep
-         default anonymous.</dd>
+       <dd>In this attribute you can switch to file memorybacking or
+       keep default anonymous. <span class="since">Since 4.8.0</span>,
+       you may choose <code>memfd</code> backing. (QEMU/KVM only)
+       </dd>
        <dt><code>access</code></dt>
        <dd>Specify if memory is shared or private. This can be overridden per
          numa node by <code>memAccess</code></dd>
diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng
index 3796eb4b5e..04d7b69dd7 100644
--- a/docs/schemas/domaincommon.rng
+++ b/docs/schemas/domaincommon.rng
@@ -655,6 +655,7 @@
                   <choice>
                     <value>file</value>
                     <value>anonymous</value>
+                    <value>memfd</value>
                   </choice>
                 </attribute>
               </element>
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
index 38cac07913..b3a1158e75 100644
--- a/src/conf/domain_conf.c
+++ b/src/conf/domain_conf.c
@@ -908,7 +908,8 @@ VIR_ENUM_IMPL(virDomainDiskMirrorState, VIR_DOMAIN_DISK_MIRROR_STATE_LAST,
 VIR_ENUM_IMPL(virDomainMemorySource, VIR_DOMAIN_MEMORY_SOURCE_LAST,
               "none",
               "file",
-              "anonymous")
+              "anonymous",
+	      "memfd")
 
 VIR_ENUM_IMPL(virDomainMemoryAllocation, VIR_DOMAIN_MEMORY_ALLOCATION_LAST,
               "none",
diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
index 8a3673361a..e72b824226 100644
--- a/src/conf/domain_conf.h
+++ b/src/conf/domain_conf.h
@@ -607,6 +607,7 @@ typedef enum {
     VIR_DOMAIN_MEMORY_SOURCE_NONE = 0,  /* No memory source defined */
     VIR_DOMAIN_MEMORY_SOURCE_FILE,      /* Memory source is set as file */
     VIR_DOMAIN_MEMORY_SOURCE_ANONYMOUS, /* Memory source is set as anonymous */
+    VIR_DOMAIN_MEMORY_SOURCE_MEMFD,     /* Memory source is set as memfd */
 
     VIR_DOMAIN_MEMORY_SOURCE_LAST,
 } virDomainMemorySource;
diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
index 8aa20496bc..830695a147 100644
--- a/src/qemu/qemu_command.c
+++ b/src/qemu/qemu_command.c
@@ -3100,6 +3100,24 @@ qemuBuildControllerDevCommandLine(virCommandPtr cmd,
     return ret;
 }
 
+static int
+qemuBuildMemoryBackendPropsShare(virJSONValuePtr props,
+                                 virDomainMemoryAccess memAccess)
+{
+    switch (memAccess) {
+    case VIR_DOMAIN_MEMORY_ACCESS_SHARED:
+        return virJSONValueObjectAdd(props, "b:share", true, NULL);
+
+    case VIR_DOMAIN_MEMORY_ACCESS_PRIVATE:
+        return virJSONValueObjectAdd(props, "b:share", false, NULL);
+
+    case VIR_DOMAIN_MEMORY_ACCESS_DEFAULT:
+    case VIR_DOMAIN_MEMORY_ACCESS_LAST:
+        break;
+    }
+
+    return 0;
+}
 
 /**
  * qemuBuildMemoryBackendProps:
@@ -3246,7 +3264,18 @@ qemuBuildMemoryBackendProps(virJSONValuePtr *backendProps,
     if (!(props = virJSONValueNewObject()))
         return -1;
 
-    if (useHugepage || mem->nvdimmPath || memAccess ||
+    if (def->mem.source == VIR_DOMAIN_MEMORY_SOURCE_MEMFD) {
+        backendType = "memory-backend-memfd";
+
+        if (qemuBuildMemoryBackendPropsShare(props, memAccess) < 0) {
+            goto cleanup;
+        }
+        if (useHugepage &&
+            (virJSONValueObjectAdd(props, "b:hugetlb", useHugepage, NULL) < 0 ||
+             virJSONValueObjectAdd(props, "U:hugetlbsize", pagesize << 10, NULL) < 0)) {
+            goto cleanup;
+        }
+    } else if (useHugepage || mem->nvdimmPath || memAccess ||
         def->mem.source == VIR_DOMAIN_MEMORY_SOURCE_FILE) {
 
         if (mem->nvdimmPath) {
@@ -3284,20 +3313,8 @@ qemuBuildMemoryBackendProps(virJSONValuePtr *backendProps,
                 goto cleanup;
         }
 
-        switch (memAccess) {
-        case VIR_DOMAIN_MEMORY_ACCESS_SHARED:
-            if (virJSONValueObjectAdd(props, "b:share", true, NULL) < 0)
-                goto cleanup;
-            break;
-
-        case VIR_DOMAIN_MEMORY_ACCESS_PRIVATE:
-            if (virJSONValueObjectAdd(props, "b:share", false, NULL) < 0)
-                goto cleanup;
-            break;
-
-        case VIR_DOMAIN_MEMORY_ACCESS_DEFAULT:
-        case VIR_DOMAIN_MEMORY_ACCESS_LAST:
-            break;
+        if (qemuBuildMemoryBackendPropsShare(props, memAccess) < 0) {
+            goto cleanup;
         }
     } else {
         backendType = "memory-backend-ram";
@@ -3346,6 +3363,12 @@ qemuBuildMemoryBackendProps(virJSONValuePtr *backendProps,
                            _("this qemu doesn't support the "
                              "memory-backend-ram object"));
             goto cleanup;
+        } else if (STREQ(backendType, "memory-backend-memfd") &&
+                   !virQEMUCapsGet(qemuCaps, QEMU_CAPS_OBJECT_MEMORY_MEMFD)) {
+            virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+                           _("this qemu doesn't support the "
+                             "memory-backend-memfd object"));
+            goto cleanup;
         }
 
         ret = 0;
@@ -7650,7 +7673,8 @@ qemuBuildNumaArgStr(virQEMUDriverConfigPtr cfg,
 
     if (virDomainNumatuneHasPerNodeBinding(def->numa) &&
         !(virQEMUCapsGet(qemuCaps, QEMU_CAPS_OBJECT_MEMORY_RAM) ||
-          virQEMUCapsGet(qemuCaps, QEMU_CAPS_OBJECT_MEMORY_FILE))) {
+          virQEMUCapsGet(qemuCaps, QEMU_CAPS_OBJECT_MEMORY_FILE) ||
+          virQEMUCapsGet(qemuCaps, QEMU_CAPS_OBJECT_MEMORY_MEMFD))) {
         virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
                        _("Per-node memory binding is not supported "
                          "with this QEMU"));
@@ -7659,7 +7683,8 @@ qemuBuildNumaArgStr(virQEMUDriverConfigPtr cfg,
 
     if (def->mem.nhugepages &&
         def->mem.hugepages[0].size != system_page_size &&
-        !virQEMUCapsGet(qemuCaps, QEMU_CAPS_OBJECT_MEMORY_FILE)) {
+        !(virQEMUCapsGet(qemuCaps, QEMU_CAPS_OBJECT_MEMORY_FILE) ||
+          virQEMUCapsGet(qemuCaps, QEMU_CAPS_OBJECT_MEMORY_MEMFD))) {
         virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
                        _("huge pages per NUMA node are not "
                          "supported with this QEMU"));
@@ -7676,7 +7701,8 @@ qemuBuildNumaArgStr(virQEMUDriverConfigPtr cfg,
      * need to check which approach to use */
     for (i = 0; i < ncells; i++) {
         if (virQEMUCapsGet(qemuCaps, QEMU_CAPS_OBJECT_MEMORY_RAM) ||
-            virQEMUCapsGet(qemuCaps, QEMU_CAPS_OBJECT_MEMORY_FILE)) {
+            virQEMUCapsGet(qemuCaps, QEMU_CAPS_OBJECT_MEMORY_FILE) ||
+            virQEMUCapsGet(qemuCaps, QEMU_CAPS_OBJECT_MEMORY_MEMFD)) {
 
             if ((rc = qemuBuildMemoryCellBackendStr(def, cfg, i, priv,
                                                     &nodeBackends[i])) < 0)
diff --git a/tests/qemuxml2argvdata/memfd-memory-numa.args b/tests/qemuxml2argvdata/memfd-memory-numa.args
new file mode 100644
index 0000000000..04b28a8085
--- /dev/null
+++ b/tests/qemuxml2argvdata/memfd-memory-numa.args
@@ -0,0 +1,27 @@
+LC_ALL=C \
+PATH=/bin \
+HOME=/home/test \
+USER=test \
+LOGNAME=test \
+QEMU_AUDIO_DRV=none \
+/usr/bin/qemu-system-x86_64 \
+-name instance-00000092 \
+-S \
+-machine pc-i440fx-wily,accel=kvm,usb=off,dump-guest-core=off \
+-m 14336 \
+-mem-prealloc \
+-smp 20,sockets=1,cores=8,threads=1 \
+-object memory-backend-memfd,id=ram-node0,share=yes,hugetlb=yes,hugetlbsize=2097152,size=15032385536 \
+-numa node,nodeid=0,cpus=0-7,memdev=ram-node0 \
+-uuid 126f2720-6f8e-45ab-a886-ec9277079a67 \
+-display none \
+-no-user-config \
+-nodefaults \
+-chardev socket,id=charmonitor,\
+path=/tmp/lib/domain--1-instance-00000092/monitor.sock,server,nowait \
+-mon chardev=charmonitor,id=monitor,mode=control \
+-rtc base=utc \
+-no-shutdown \
+-no-acpi \
+-usb \
+-device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x3
diff --git a/tests/qemuxml2argvdata/memfd-memory-numa.xml b/tests/qemuxml2argvdata/memfd-memory-numa.xml
new file mode 100644
index 0000000000..868e3c765f
--- /dev/null
+++ b/tests/qemuxml2argvdata/memfd-memory-numa.xml
@@ -0,0 +1,33 @@
+  <domain type='kvm' id='56'>
+    <name>instance-00000092</name>
+    <uuid>126f2720-6f8e-45ab-a886-ec9277079a67</uuid>
+    <memory unit='KiB'>14680064</memory>
+    <currentMemory unit='KiB'>14680064</currentMemory>
+    <memoryBacking>
+      <hugepages>
+          <page size="2" unit="M"/>
+      </hugepages>
+      <source type='memfd'/>
+      <access mode='shared'/>
+      <allocation mode='immediate'/>
+    </memoryBacking>
+    <vcpu placement='static'>20</vcpu>
+    <os>
+      <type arch='x86_64' machine='pc-i440fx-wily'>hvm</type>
+      <boot dev='hd'/>
+    </os>
+    <cpu>
+      <topology sockets='1' cores='8' threads='1'/>
+      <numa>
+        <cell id='0' cpus='0-7' memory='14680064' unit='KiB'/>
+      </numa>
+    </cpu>
+    <clock offset='utc'/>
+    <on_poweroff>destroy</on_poweroff>
+    <on_reboot>restart</on_reboot>
+    <on_crash>destroy</on_crash>
+    <devices>
+      <emulator>/usr/bin/qemu-system-x86_64</emulator>
+      <memballoon model='virtio'/>
+    </devices>
+  </domain>
diff --git a/tests/qemuxml2argvtest.c b/tests/qemuxml2argvtest.c
index 16c3540798..84edbe7230 100644
--- a/tests/qemuxml2argvtest.c
+++ b/tests/qemuxml2argvtest.c
@@ -2921,6 +2921,9 @@ mymain(void)
     DO_TEST("fd-memory-no-numa-topology", QEMU_CAPS_OBJECT_MEMORY_FILE,
             QEMU_CAPS_KVM);
 
+    DO_TEST("memfd-memory-numa", QEMU_CAPS_OBJECT_MEMORY_MEMFD,
+            QEMU_CAPS_KVM);
+
     DO_TEST("cpu-check-none", QEMU_CAPS_KVM);
     DO_TEST("cpu-check-partial", QEMU_CAPS_KVM);
     DO_TEST("cpu-check-full", QEMU_CAPS_KVM);
-- 
2.19.0.rc0.48.gb9dfa238d5




More information about the libvir-list mailing list