[libvirt] [PATCH 2/2] qemu: Enable NUMA node tag in pci-root for PPC64

Shivaprasad G Bhat sbhat at linux.vnet.ibm.com
Tue Nov 8 12:05:38 UTC 2016


This patch addresses the same aspects on PPC the bug 1103314 addressed
on x86.

PCI expander bus creates multiple primary PCI busses, where each of these
busses can be assigned a specific NUMA affinity, which, on x86 is
advertised through ACPI on a per-bus basis.

For SPAPR, a PHB's NUMA affinities are assigned on a per-PHB basis, and
there is no mechanism for advertising NUMA affinities to a guest on a
per-bus basis. So, even if qemu-ppc manages to get some sort of multi-bus
topology working using PXB, there is no way to expose the affinities
of these busses to the guest. It can only be exposed on a per-PHB/per-domain
basis.

So, enable NUMA node tag in pci-root controller on PPC.

Signed-off-by: Shivaprasad G Bhat <sbhat at linux.vnet.ibm.com>
---
 docs/formatdomain.html.in                          |    5 ++
 src/qemu/qemu_command.c                            |   25 +++++++++++-
 src/qemu/qemu_domain.c                             |   15 ++++---
 ...emuxml2argv-spapr-pci-hos-bridge-numa-node.args |   26 ++++++++++++
 ...qemuxml2argv-spapr-pci-hos-bridge-numa-node.xml |   43 ++++++++++++++++++++
 tests/qemuxml2argvtest.c                           |    2 +
 6 files changed, 109 insertions(+), 7 deletions(-)
 create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-spapr-pci-hos-bridge-numa-node.args
 create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-spapr-pci-hos-bridge-numa-node.xml

diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in
index 11b3330..ea45146 100644
--- a/docs/formatdomain.html.in
+++ b/docs/formatdomain.html.in
@@ -3492,6 +3492,11 @@
         part of the specified NUMA node (it is up to the user of the
         libvirt API to attach host devices to the correct
         pci-expander-bus when assigning them to the domain).
+        On PPC64, the PCI devices can be specified to be part of a NUMA
+        node using only the pci-root controller with an optional
+        <code><node></code> subelement within the
+        <code><target></code> subelement. All the PCI devices of
+        the guest will be part of the specified NUMA node.
       </dd>
     </dl>
     <p>
diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
index 9adf0fe..ec794f0 100644
--- a/src/qemu/qemu_command.c
+++ b/src/qemu/qemu_command.c
@@ -3055,6 +3055,25 @@ qemuBuildControllerDevStr(const virDomainDef *domainDef,
     return NULL;
 }
 
+static int qemuBuildSPAPRGlobalPCIRootNodeCommandLine(virCommandPtr cmd,
+                                                      virDomainControllerDefPtr def,
+                                                      virQEMUCapsPtr qemuCaps)
+{
+    if (def->opts.pciopts.numaNode != -1) {
+        if (!virQEMUCapsGet(qemuCaps, QEMU_CAPS_SPAPR_PCI_HOST_BRIDGE_NUMA_NODE)) {
+            virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+                           _("the numa_node option with spapr-pci-host-bridge controller "
+                             "is not supported in this QEMU binary"));
+            return -1;
+        }
+        virCommandAddArg(cmd, "-global");
+        virCommandAddArgFormat(cmd, "spapr-pci-host-bridge.numa_node=%d",
+                               def->opts.pciopts.numaNode);
+    }
+
+    return 0;
+}
+
 
 static int
 qemuBuildControllerDevCommandLine(virCommandPtr cmd,
@@ -3107,8 +3126,12 @@ qemuBuildControllerDevCommandLine(virCommandPtr cmd,
             /* skip pci-root/pcie-root */
             if (cont->type == VIR_DOMAIN_CONTROLLER_TYPE_PCI &&
                 (cont->model == VIR_DOMAIN_CONTROLLER_MODEL_PCI_ROOT ||
-                 cont->model == VIR_DOMAIN_CONTROLLER_MODEL_PCIE_ROOT))
+                 cont->model == VIR_DOMAIN_CONTROLLER_MODEL_PCIE_ROOT)) {
+                if (ARCH_IS_PPC64(def->os.arch))
+                    if (qemuBuildSPAPRGlobalPCIRootNodeCommandLine(cmd, cont, qemuCaps) < 0)
+                        return -1;
                 continue;
+            }
 
             /* first SATA controller on Q35 machines is implicit */
             if (cont->type == VIR_DOMAIN_CONTROLLER_TYPE_SATA &&
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
index 8cba755..b5f89a6 100644
--- a/src/qemu/qemu_domain.c
+++ b/src/qemu/qemu_domain.c
@@ -3058,12 +3058,14 @@ qemuDomainDeviceDefPostParse(virDomainDeviceDefPtr dev,
             /* if a PCI expander bus has a NUMA node set, make sure
              * that NUMA node is configured in the guest <cpu><numa>
              * array. NUMA cell id's in this array are numbered
-             * from 0 .. size-1.
+             * from 0 .. size-1. Or On PPC, if the pci/pcie-root has the
+             * NUMA node set, do the same.
              */
-            if ((cont->model == VIR_DOMAIN_CONTROLLER_MODEL_PCI_EXPANDER_BUS ||
-                 cont->model == VIR_DOMAIN_CONTROLLER_MODEL_PCIE_EXPANDER_BUS) &&
-                (int) virDomainNumaGetNodeCount(def->numa)
-                <= cont->opts.pciopts.numaNode) {
+            if (((cont->model == VIR_DOMAIN_CONTROLLER_MODEL_PCI_EXPANDER_BUS ||
+                  cont->model == VIR_DOMAIN_CONTROLLER_MODEL_PCIE_EXPANDER_BUS) ||
+                 (qemuDomainMachineIsPSeries(def) &&
+                  cont->model == VIR_DOMAIN_CONTROLLER_MODEL_PCI_ROOT)) &&
+                (int) virDomainNumaGetNodeCount(def->numa) <= cont->opts.pciopts.numaNode) {
                 virReportError(VIR_ERR_XML_ERROR,
                                _("%s with index %d is "
                                  "configured for a NUMA node (%d) "
@@ -3814,7 +3816,8 @@ qemuDomainDefFormatBuf(virQEMUDriverPtr driver,
         }
 
         if (pci && pci->idx == 0 &&
-            pci->model == VIR_DOMAIN_CONTROLLER_MODEL_PCI_ROOT) {
+            pci->model == VIR_DOMAIN_CONTROLLER_MODEL_PCI_ROOT &&
+            pci->opts.pciopts.numaNode == -1) {
             VIR_DEBUG("Removing default pci-root from domain '%s'"
                       " for migration compatibility", def->name);
             toremove++;
diff --git a/tests/qemuxml2argvdata/qemuxml2argv-spapr-pci-hos-bridge-numa-node.args b/tests/qemuxml2argvdata/qemuxml2argv-spapr-pci-hos-bridge-numa-node.args
new file mode 100644
index 0000000..7b70cb6
--- /dev/null
+++ b/tests/qemuxml2argvdata/qemuxml2argv-spapr-pci-hos-bridge-numa-node.args
@@ -0,0 +1,26 @@
+LC_ALL=C \
+PATH=/bin \
+HOME=/home/test \
+USER=test \
+LOGNAME=test \
+QEMU_AUDIO_DRV=none \
+/usr/bin/qemu-system-ppc64 \
+-name QEMUGuest1 \
+-S \
+-M pseries \
+-m 2048 \
+-smp 8,sockets=3,cores=1,threads=8 \
+-numa node,nodeid=0,cpus=0-3,mem=1024 \
+-numa node,nodeid=1,cpus=4-7,mem=1024 \
+-uuid 87eedafe-eedc-4336-8130-ed9fe5dc90c8 \
+-nographic \
+-nodefaults \
+-monitor unix:/tmp/lib/domain--1-QEMUGuest1/monitor.sock,server,nowait \
+-no-acpi \
+-boot c \
+-global spapr-pci-host-bridge.numa_node=1 \
+-device spapr-vscsi,id=scsi0,reg=0x2000 \
+-usb \
+-drive file=/dev/HostVG/QEMUGuest1,format=raw,if=none,id=drive-scsi0-0-0-0 \
+-device scsi-disk,bus=scsi0.0,channel=0,scsi-id=0,lun=0,\
+drive=drive-scsi0-0-0-0,id=scsi0-0-0-0
diff --git a/tests/qemuxml2argvdata/qemuxml2argv-spapr-pci-hos-bridge-numa-node.xml b/tests/qemuxml2argvdata/qemuxml2argv-spapr-pci-hos-bridge-numa-node.xml
new file mode 100644
index 0000000..4dcd68b
--- /dev/null
+++ b/tests/qemuxml2argvdata/qemuxml2argv-spapr-pci-hos-bridge-numa-node.xml
@@ -0,0 +1,43 @@
+<domain type='qemu'>
+  <name>QEMUGuest1</name>
+  <uuid>87eedafe-eedc-4336-8130-ed9fe5dc90c8</uuid>
+  <memory unit='KiB'>2097152</memory>
+  <currentMemory unit='MiB'>2048</currentMemory>
+  <vcpu placement='static'>8</vcpu>
+  <numatune>
+    <memory mode='strict' nodeset='1'/>
+  </numatune>
+  <cpu>
+    <topology sockets='3' cores='1' threads='8'/>
+    <numa>
+      <cell id='0' cpus='0-3' memory='1048576' unit='KiB'/>
+      <cell id='1' cpus='4-7' memory='1048576' unit='KiB'/>
+    </numa>
+  </cpu>
+  <os>
+    <type arch='ppc64' machine='pseries'>hvm</type>
+    <boot dev='hd'/>
+  </os>
+  <clock offset='utc'/>
+  <on_poweroff>destroy</on_poweroff>
+  <on_reboot>restart</on_reboot>
+  <on_crash>destroy</on_crash>
+  <devices>
+    <emulator>/usr/bin/qemu-system-ppc64</emulator>
+    <disk type='block' device='disk'>
+      <driver name='qemu' type='raw'/>
+      <source dev='/dev/HostVG/QEMUGuest1'/>
+      <target dev='hda' bus='scsi'/>
+      <address type='drive' controller='0' bus='0' target='0' unit='0'/>
+    </disk>
+    <controller type='usb' index='0'/>
+    <controller type='scsi' index='0'/>
+    <controller type='pci' index='0' model='pci-root'>
+      <target>
+        <node>1</node>
+      </target>
+    </controller>
+    <memballoon model='none'/>
+    <panic model='pseries'/>
+  </devices>
+</domain>
diff --git a/tests/qemuxml2argvtest.c b/tests/qemuxml2argvtest.c
index d025930..8a5b96a 100644
--- a/tests/qemuxml2argvtest.c
+++ b/tests/qemuxml2argvtest.c
@@ -2219,6 +2219,8 @@ mymain(void)
             QEMU_CAPS_DEVICE_DMI_TO_PCI_BRIDGE, QEMU_CAPS_MACHINE_IOMMU);
 
     DO_TEST("cpu-hotplug-startup", QEMU_CAPS_QUERY_HOTPLUGGABLE_CPUS);
+    DO_TEST("spapr-pci-hos-bridge-numa-node", QEMU_CAPS_NUMA,
+            QEMU_CAPS_SPAPR_PCI_HOST_BRIDGE_NUMA_NODE);
 
     qemuTestDriverFree(&driver);
 




More information about the libvir-list mailing list