<div dir="ltr"><div dir="ltr"><br></div><br><div class="gmail_quote"><div dir="ltr" class="gmail_attr">On Wed, Jan 6, 2021 at 3:17 PM Luyao Zhong <<a href="mailto:luyao.zhong@intel.com">luyao.zhong@intel.com</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">Reviewed-by: Daniel Henrique Barboza <<a href="mailto:danielhb413@gmail.com" target="_blank">danielhb413@gmail.com</a>><br>
Signed-off-by: Luyao Zhong <<a href="mailto:luyao.zhong@intel.com" target="_blank">luyao.zhong@intel.com</a>><br>
---<br>
 include/libvirt/libvirt-domain.h              |  1 +<br>
 src/conf/numa_conf.c                          |  9 +++++<br>
 src/qemu/qemu_command.c                       |  6 ++-<br>
 src/qemu/qemu_process.c                       | 27 +++++++++++++<br>
 src/util/virnuma.c                            |  3 ++<br>
 .../numatune-memnode-invalid-mode.err         |  1 +<br>
 .../numatune-memnode-invalid-mode.xml         | 33 +++++++++++++++<br>
 ...emnode-restrictive-mode.x86_64-latest.args | 40 +++++++++++++++++++<br>
 .../numatune-memnode-restrictive-mode.xml     | 33 +++++++++++++++<br>
 tests/qemuxml2argvtest.c                      |  2 +<br>
 ...memnode-restrictive-mode.x86_64-latest.xml | 40 +++++++++++++++++++<br>
 tests/qemuxml2xmltest.c                       |  1 +<br>
 12 files changed, 195 insertions(+), 1 deletion(-)<br>
 create mode 100644 tests/qemuxml2argvdata/numatune-memnode-invalid-mode.err<br>
 create mode 100644 tests/qemuxml2argvdata/numatune-memnode-invalid-mode.xml<br>
 create mode 100644 tests/qemuxml2argvdata/numatune-memnode-restrictive-mode.x86_64-latest.args<br>
 create mode 100644 tests/qemuxml2argvdata/numatune-memnode-restrictive-mode.xml<br>
 create mode 100644 tests/qemuxml2xmloutdata/numatune-memnode-restrictive-mode.x86_64-latest.xml<br>
<br>
diff --git a/include/libvirt/libvirt-domain.h b/include/libvirt/libvirt-domain.h<br>
index de2456812c..eabb3c091b 100644<br>
--- a/include/libvirt/libvirt-domain.h<br>
+++ b/include/libvirt/libvirt-domain.h<br>
@@ -1527,6 +1527,7 @@ typedef enum {<br>
     VIR_DOMAIN_NUMATUNE_MEM_STRICT      = 0,<br>
     VIR_DOMAIN_NUMATUNE_MEM_PREFERRED   = 1,<br>
     VIR_DOMAIN_NUMATUNE_MEM_INTERLEAVE  = 2,<br>
+    VIR_DOMAIN_NUMATUNE_MEM_RESTRICTIVE = 3,<br>
<br>
 # ifdef VIR_ENUM_SENTINELS<br>
     VIR_DOMAIN_NUMATUNE_MEM_LAST /* This constant is subject to change */<br>
diff --git a/src/conf/numa_conf.c b/src/conf/numa_conf.c<br>
index f8a7a01ac9..df888a8dfb 100644<br>
--- a/src/conf/numa_conf.c<br>
+++ b/src/conf/numa_conf.c<br>
@@ -43,6 +43,7 @@ VIR_ENUM_IMPL(virDomainNumatuneMemMode,<br>
               "strict",<br>
               "preferred",<br>
               "interleave",<br>
+              "restrictive",<br>
 );<br>
<br>
 VIR_ENUM_IMPL(virDomainNumatunePlacement,<br>
@@ -234,6 +235,14 @@ virDomainNumatuneNodeParseXML(virDomainNumaPtr numa,<br>
                                _("Invalid mode attribute in memnode element"));<br>
                 goto cleanup;<br>
             }<br>
+<br>
+            if (numa->memory.mode == VIR_DOMAIN_NUMATUNE_MEM_RESTRICTIVE &&<br>
+                mode != VIR_DOMAIN_NUMATUNE_MEM_RESTRICTIVE) {<br>
+                virReportError(VIR_ERR_XML_ERROR, "%s",<br>
+                               _("'restrictive' mode is required in memnode element "<br>
+                                 "when mode is 'restrictive' in memory element"));<br>
+                goto cleanup;<br>
+            }<br>
             VIR_FREE(tmp);<br>
             mem_node->mode = mode;<br>
         }<br>
diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c<br>
index b06a086e18..9bf2cc8ae8 100644<br>
--- a/src/qemu/qemu_command.c<br>
+++ b/src/qemu/qemu_command.c<br>
@@ -174,6 +174,7 @@ VIR_ENUM_IMPL(qemuNumaPolicy,<br>
               "bind",<br>
               "preferred",<br>
               "interleave",<br>
+              "restricted",<br>
 );<br>
<br>
<br>
@@ -3159,7 +3160,10 @@ qemuBuildMemoryBackendProps(virJSONValuePtr *backendProps,<br>
             return -1;<br>
     }<br>
<br>
-    if (nodemask) {<br>
+    /* If mode is "restrictive", we should only use cgroups setting allowed memory<br>
+     * nodes, and skip passing the host-nodes and policy parameters to QEMU command<br>
+     * line which means we will use system default memory policy. */<br>
+    if (nodemask && mode != VIR_DOMAIN_NUMATUNE_MEM_RESTRICTIVE) {<br>
         if (!virNumaNodesetIsAvailable(nodemask))<br>
             return -1;<br>
         if (virJSONValueObjectAdd(props,<br>
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c<br>
index e7421b415f..0080985dd7 100644<br>
--- a/src/qemu/qemu_process.c<br>
+++ b/src/qemu/qemu_process.c<br>
@@ -2713,6 +2713,7 @@ qemuProcessSetupPid(virDomainObjPtr vm,<br>
     g_autoptr(virBitmap) hostcpumap = NULL;<br>
     g_autofree char *mem_mask = NULL;<br>
     int ret = -1;<br>
+    size_t i;<br>
<br>
     if ((period || quota) &&<br>
         !virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU)) {<br>
@@ -2753,6 +2754,32 @@ qemuProcessSetupPid(virDomainObjPtr vm,<br>
                                                 &mem_mask, -1) < 0)<br>
             goto cleanup;<br>
<br>
+        /* For vCPU threads, mem_mask is different among cells and mem_mask<br>
+         * is used to set cgroups cpuset.mems for vcpu threads. If we specify<br>
+         * 'restrictive' mode, that means we will set system default memory<br>
+         * policy and only use cgroups to restrict allowed memory nodes. */<br>
+        if (nameval == VIR_CGROUP_THREAD_VCPU) {<br>
+            virDomainNumaPtr numatune = vm->def->numa;<br>
+            virBitmapPtr numanode_cpumask = NULL;<br>
+            for (i = 0; i < virDomainNumaGetNodeCount(numatune); i++) {<br>
+                numanode_cpumask = virDomainNumaGetNodeCpumask(numatune, i);<br>
+                /* 'i' indicates the cell id, if the vCPU id is in this cell<br>
+                 * and mode is 'restrictive', we need get the corresponding<br>
+                 * nodeset. */<br>
+                if (virBitmapIsBitSet(numanode_cpumask, id) &&<br>
+                    virDomainNumatuneGetMode(numatune, i, &mem_mode) == 0 &&<br>
+                    mem_mode == VIR_DOMAIN_NUMATUNE_MEM_RESTRICTIVE) {<br>
+                    if (virDomainNumatuneMaybeFormatNodeset(numatune,<br>
+                                                            priv->autoNodeset,<br>
+                                                            &mem_mask, i) < 0) {<br>
+                        goto cleanup;<br>
+                    } else {<br>
+                        break;<br>
+                    }<br>
+                }<br>
+            }<br>
+        }<br>
+<br>
         if (virCgroupNewThread(priv->cgroup, nameval, id, true, &cgroup) < 0)<br>
             goto cleanup;<br>
<br>
diff --git a/src/util/virnuma.c b/src/util/virnuma.c<br>
index a05e4ac72c..ef912492c6 100644<br>
--- a/src/util/virnuma.c<br>
+++ b/src/util/virnuma.c<br>
@@ -152,6 +152,9 @@ virNumaSetupMemoryPolicy(virDomainNumatuneMemMode mode,<br>
         numa_set_interleave_mask(&mask);<br>
         break;<br>
<br>
+    case VIR_DOMAIN_NUMATUNE_MEM_RESTRICTIVE:<br>
+        break;<br>
+<br>
     case VIR_DOMAIN_NUMATUNE_MEM_LAST:<br>
         break;<br>
     }<br>
diff --git a/tests/qemuxml2argvdata/numatune-memnode-invalid-mode.err b/tests/qemuxml2argvdata/numatune-memnode-invalid-mode.err<br>
new file mode 100644<br>
index 0000000000..180e64d1d8<br>
--- /dev/null<br>
+++ b/tests/qemuxml2argvdata/numatune-memnode-invalid-mode.err<br>
@@ -0,0 +1 @@<br>
+XML error: 'restrictive' mode is required in memnode element when mode is 'restrictive' in memory element<br>
diff --git a/tests/qemuxml2argvdata/numatune-memnode-invalid-mode.xml b/tests/qemuxml2argvdata/numatune-memnode-invalid-mode.xml<br>
new file mode 100644<br>
index 0000000000..a7c18d4d50<br>
--- /dev/null<br>
+++ b/tests/qemuxml2argvdata/numatune-memnode-invalid-mode.xml<br>
@@ -0,0 +1,33 @@<br>
+<domain type='qemu'><br>
+  <name>QEMUGuest</name><br>
+  <uuid>9f4b6512-e73a-4a25-93e8-5307802821ce</uuid><br>
+  <memory unit='KiB'>24682468</memory><br>
+  <currentMemory unit='KiB'>24682468</currentMemory><br>
+  <vcpu placement='static'>32</vcpu><br>
+  <numatune><br>
+    <memory mode='restrictive' nodeset='0-7'/><br>
+    <memnode cellid='0' mode='restrictive' nodeset='3'/><br>
+    <memnode cellid='2' mode='strict' nodeset='1-2,5-7,^6'/><br>
+  </numatune><br>
+  <os><br>
+    <type arch='x86_64' machine='pc'>hvm</type><br>
+    <boot dev='hd'/><br>
+  </os><br>
+  <cpu><br>
+    <numa><br>
+      <cell id='0' cpus='0' memory='20002' unit='KiB'/><br>
+      <cell id='1' cpus='1-27,29' memory='660066' unit='KiB'/><br>
+      <cell id='2' cpus='28,30-31' memory='24002400' unit='KiB'/><br>
+    </numa><br>
+  </cpu><br>
+  <clock offset='utc'/><br>
+  <on_poweroff>destroy</on_poweroff><br>
+  <on_reboot>restart</on_reboot><br>
+  <on_crash>destroy</on_crash><br>
+  <devices><br>
+    <emulator>/usr/bin/qemu-system-x86_64</emulator><br>
+    <controller type='usb' index='0'/><br>
+    <controller type='pci' index='0' model='pci-root'/><br>
+    <memballoon model='virtio'/><br>
+  </devices><br>
+</domain><br>
diff --git a/tests/qemuxml2argvdata/numatune-memnode-restrictive-mode.x86_64-latest.args b/tests/qemuxml2argvdata/numatune-memnode-restrictive-mode.x86_64-latest.args<br>
new file mode 100644<br>
index 0000000000..1f15c4396e<br>
--- /dev/null<br>
+++ b/tests/qemuxml2argvdata/numatune-memnode-restrictive-mode.x86_64-latest.args<br>
@@ -0,0 +1,40 @@<br>
+LC_ALL=C \<br>
+PATH=/bin \<br>
+HOME=/tmp/lib/domain--1-QEMUGuest \<br>
+USER=test \<br>
+LOGNAME=test \<br>
+XDG_DATA_HOME=/tmp/lib/domain--1-QEMUGuest/.local/share \<br>
+XDG_CACHE_HOME=/tmp/lib/domain--1-QEMUGuest/.cache \<br>
+XDG_CONFIG_HOME=/tmp/lib/domain--1-QEMUGuest/.config \<br>
+QEMU_AUDIO_DRV=none \<br>
+/usr/bin/qemu-system-x86_64 \<br>
+-name guest=QEMUGuest,debug-threads=on \<br>
+-S \<br>
+-object secret,id=masterKey0,format=raw,\<br>
+file=/tmp/lib/domain--1-QEMUGuest/master-key.aes \<br>
+-machine pc,accel=tcg,usb=off,dump-guest-core=off \<br>
+-cpu qemu64 \<br>
+-m 24105 \<br>
+-overcommit mem-lock=off \<br>
+-smp 32,sockets=32,cores=1,threads=1 \<br>
+-object memory-backend-ram,id=ram-node0,size=20971520 \<br>
+-numa node,nodeid=0,cpus=0,memdev=ram-node0 \<br>
+-object memory-backend-ram,id=ram-node1,size=676331520 \<br>
+-numa node,nodeid=1,cpus=1-27,cpus=29,memdev=ram-node1 \<br>
+-object memory-backend-ram,id=ram-node2,size=24578621440 \<br>
+-numa node,nodeid=2,cpus=28,cpus=30-31,memdev=ram-node2 \<br>
+-uuid 9f4b6512-e73a-4a25-93e8-5307802821ce \<br>
+-display none \<br>
+-no-user-config \<br>
+-nodefaults \<br>
+-chardev socket,id=charmonitor,fd=1729,server,nowait \<br>
+-mon chardev=charmonitor,id=monitor,mode=control \<br>
+-rtc base=utc \<br>
+-no-shutdown \<br>
+-no-acpi \<br>
+-boot strict=on \<br>
+-device piix3-usb-uhci,id=usb,bus=pci.0,addr=0x1.0x2 \<br>
+-device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x2 \<br>
+-sandbox on,obsolete=deny,elevateprivileges=deny,spawn=deny,\<br>
+resourcecontrol=deny \<br>
+-msg timestamp=on<br>
diff --git a/tests/qemuxml2argvdata/numatune-memnode-restrictive-mode.xml b/tests/qemuxml2argvdata/numatune-memnode-restrictive-mode.xml<br>
new file mode 100644<br>
index 0000000000..72949b0657<br>
--- /dev/null<br>
+++ b/tests/qemuxml2argvdata/numatune-memnode-restrictive-mode.xml<br>
@@ -0,0 +1,33 @@<br>
+<domain type='qemu'><br>
+  <name>QEMUGuest</name><br>
+  <uuid>9f4b6512-e73a-4a25-93e8-5307802821ce</uuid><br>
+  <memory unit='KiB'>24682468</memory><br>
+  <currentMemory unit='KiB'>24682468</currentMemory><br>
+  <vcpu placement='static'>32</vcpu><br>
+  <numatune><br>
+    <memnode cellid='0' mode='restrictive' nodeset='3'/><br>
+    <memory mode='restrictive' nodeset='0-7'/><br>
+    <memnode cellid='2' mode='restrictive' nodeset='1-2,5-7,^6'/><br>
+  </numatune><br>
+  <os><br>
+    <type arch='x86_64' machine='pc'>hvm</type><br>
+    <boot dev='hd'/><br>
+  </os><br>
+  <cpu><br>
+    <numa><br>
+      <cell id='0' cpus='0' memory='20002' unit='KiB'/><br>
+      <cell id='1' cpus='1-27,29' memory='660066' unit='KiB'/><br>
+      <cell id='2' cpus='28,30-31' memory='24002400' unit='KiB'/><br>
+    </numa><br>
+  </cpu><br>
+  <clock offset='utc'/><br>
+  <on_poweroff>destroy</on_poweroff><br>
+  <on_reboot>restart</on_reboot><br>
+  <on_crash>destroy</on_crash><br>
+  <devices><br>
+    <emulator>/usr/bin/qemu-system-x86_64</emulator><br>
+    <controller type='usb' index='0'/><br>
+    <controller type='pci' index='0' model='pci-root'/><br>
+    <memballoon model='virtio'/><br>
+  </devices><br>
+</domain><br>
diff --git a/tests/qemuxml2argvtest.c b/tests/qemuxml2argvtest.c<br>
index 96a2b95331..e26197a30b 100644<br>
--- a/tests/qemuxml2argvtest.c<br>
+++ b/tests/qemuxml2argvtest.c<br>
@@ -1994,6 +1994,8 @@ mymain(void)<br>
             QEMU_CAPS_NUMA,<br>
             QEMU_CAPS_OBJECT_MEMORY_RAM);<br>
     DO_TEST_PARSE_ERROR("numatune-memnode", NONE);<br>
+    DO_TEST_CAPS_LATEST("numatune-memnode-restrictive-mode");<br>
+    DO_TEST_PARSE_ERROR("numatune-memnode-invalid-mode", NONE);<br>
<br>
     DO_TEST("numatune-memnode-no-memory",<br>
             QEMU_CAPS_NUMA,<br>
diff --git a/tests/qemuxml2xmloutdata/numatune-memnode-restrictive-mode.x86_64-latest.xml b/tests/qemuxml2xmloutdata/numatune-memnode-restrictive-mode.x86_64-latest.xml<br>
new file mode 100644<br>
index 0000000000..e54b2483b1<br>
--- /dev/null<br>
+++ b/tests/qemuxml2xmloutdata/numatune-memnode-restrictive-mode.x86_64-latest.xml<br>
@@ -0,0 +1,40 @@<br>
+<domain type='qemu'><br>
+  <name>QEMUGuest</name><br>
+  <uuid>9f4b6512-e73a-4a25-93e8-5307802821ce</uuid><br>
+  <memory unit='KiB'>24682468</memory><br>
+  <currentMemory unit='KiB'>24682468</currentMemory><br>
+  <vcpu placement='static'>32</vcpu><br>
+  <numatune><br>
+    <memory mode='restrictive' nodeset='0-7'/><br>
+    <memnode cellid='0' mode='restrictive' nodeset='3'/><br>
+    <memnode cellid='2' mode='restrictive' nodeset='1-2,5,7'/><br>
+  </numatune><br>
+  <os><br>
+    <type arch='x86_64' machine='pc'>hvm</type><br>
+    <boot dev='hd'/><br>
+  </os><br>
+  <cpu mode='custom' match='exact' check='none'><br>
+    <model fallback='forbid'>qemu64</model><br>
+    <numa><br>
+      <cell id='0' cpus='0' memory='20002' unit='KiB'/><br>
+      <cell id='1' cpus='1-27,29' memory='660066' unit='KiB'/><br>
+      <cell id='2' cpus='28,30-31' memory='24002400' unit='KiB'/><br>
+    </numa><br>
+  </cpu><br>
+  <clock offset='utc'/><br>
+  <on_poweroff>destroy</on_poweroff><br>
+  <on_reboot>restart</on_reboot><br>
+  <on_crash>destroy</on_crash><br>
+  <devices><br>
+    <emulator>/usr/bin/qemu-system-x86_64</emulator><br>
+    <controller type='usb' index='0' model='piix3-uhci'><br>
+      <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x2'/><br>
+    </controller><br>
+    <controller type='pci' index='0' model='pci-root'/><br>
+    <input type='mouse' bus='ps2'/><br>
+    <input type='keyboard' bus='ps2'/><br>
+    <memballoon model='virtio'><br>
+      <address type='pci' domain='0x0000' bus='0x00' slot='0x02' function='0x0'/><br>
+    </memballoon><br>
+  </devices><br>
+</domain><br>
diff --git a/tests/qemuxml2xmltest.c b/tests/qemuxml2xmltest.c<br>
index 1968be6782..dea11c2e5f 100644<br>
--- a/tests/qemuxml2xmltest.c<br>
+++ b/tests/qemuxml2xmltest.c<br>
@@ -1098,6 +1098,7 @@ mymain(void)<br>
     DO_TEST("numatune-distances", QEMU_CAPS_NUMA, QEMU_CAPS_NUMA_DIST);<br>
     DO_TEST("numatune-no-vcpu", QEMU_CAPS_NUMA);<br>
     DO_TEST("numatune-hmat", QEMU_CAPS_NUMA_HMAT, QEMU_CAPS_OBJECT_MEMORY_RAM);<br>
+    DO_TEST_CAPS_LATEST("numatune-memnode-restrictive-mode");<br>
<br>
     DO_TEST("bios-nvram", NONE);<br>
     DO_TEST("bios-nvram-os-interleave", NONE);<br>
-- <br>
2.25.4<br>
<br></blockquote><div>Test results from Meina Li:</div><div>Test on v6.10.0-333-gc9e55f92fd and qemu-kvm-5.2.0-0.7.rc2.fc34.x86_64<br>Basic test scenarios:<br>1) Start guest with memory tuning restrictive mode and check numastat <br>2) [Negative] Define a guest with strict/interleave/preferred mode in restrictive memory element<br>Test result: passed<br>Tested-by: Meina Li <<a href="mailto:meili@redhat.com">meili@redhat.com</a>> </div></div></div>