[libvirt] [PATCH 2/6] conf, schema: add support for numatune memnode element

Martin Kletzander mkletzan at redhat.com
Wed Jun 4 14:56:28 UTC 2014


This element specifies similar settings as the memory element,
although memnode can be used per guest NUMA node.

Signed-off-by: Martin Kletzander <mkletzan at redhat.com>
---
 docs/formatdomain.html.in     |  15 +++
 docs/schemas/domaincommon.rng |  17 ++++
 src/conf/domain_conf.c        | 220 +++++++++++++++++++++++++++++++++++-------
 src/qemu/qemu_domain.c        |  23 ++++-
 src/qemu/qemu_driver.c        |  11 +++
 src/util/virnuma.h            |  14 ++-
 6 files changed, 260 insertions(+), 40 deletions(-)

diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in
index 041f70d..2d855ea 100644
--- a/docs/formatdomain.html.in
+++ b/docs/formatdomain.html.in
@@ -709,6 +709,8 @@
   ...
   <numatune>
     <memory mode="strict" nodeset="1-4,^3"/>
+    <memnode cellid="0" mode="strict" nodeset="1"/>
+    <memnode cellid="2" mode="preferred" nodeset="2"/>
   </numatune>
   ...
 </domain>
@@ -745,6 +747,19 @@

         <span class='since'>Since 0.9.3</span>
       </dd>
+      <dt><code>memnode</code></dt>
+      <dd>
+        Optional <code>memnode</code> elements can specify memory allocation
+        policies per each guest NUMA node.  For those nodes having no
+        corresponding <code>memnode</code> element, the default from
+        element <code>memory</code> will be used.  Attribute <code>cellid</code>
+        addresses guest NUMA node for which the settings are applied.
+        Attributes <code>mode</code> and <code>nodeset</code> have the same
+        meaning and syntax as in <code>memory</code> element.
+
+        This setting is not compatible with automatic placement.
+        <span class='since'>QEMU Since 1.2.6</span>
+      </dd>
     </dl>


diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng
index 0787b5a..a8e3ba0 100644
--- a/docs/schemas/domaincommon.rng
+++ b/docs/schemas/domaincommon.rng
@@ -789,6 +789,23 @@
               </choice>
         </element>
       </optional>
+      <zeroOrMore>
+        <element name="memnode">
+          <attribute name="cellid">
+            <ref name="unsignedInt"/>
+          </attribute>
+          <attribute name="mode">
+            <choice>
+              <value>strict</value>
+              <value>preferred</value>
+              <value>interleave</value>
+            </choice>
+          </attribute>
+          <attribute name='nodeset'>
+            <ref name='cpuset'/>
+          </attribute>
+        </element>
+      </zeroOrMore>
     </element>
   </define>

diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
index fe06921..352ba92 100644
--- a/src/conf/domain_conf.c
+++ b/src/conf/domain_conf.c
@@ -2085,6 +2085,9 @@ void virDomainDefFree(virDomainDefPtr def)
     virDomainVcpuPinDefFree(def->cputune.emulatorpin);

     virBitmapFree(def->numatune.memory.nodemask);
+    for (i = 0; i < def->numatune.nmem_nodes; i++)
+        virBitmapFree(def->numatune.mem_nodes[i].nodemask);
+    VIR_FREE(def->numatune.mem_nodes);

     virSysinfoDefFree(def->sysinfo);

@@ -11232,6 +11235,8 @@ virDomainDefParseXML(xmlDocPtr xml,
     bool usb_other = false;
     bool usb_master = false;
     bool primaryVideo = false;
+    bool mem_nodes = false;
+

     if (VIR_ALLOC(def) < 0)
         return NULL;
@@ -11666,6 +11671,33 @@ virDomainDefParseXML(xmlDocPtr xml,
     }
     VIR_FREE(nodes);

+
+    /* analysis of cpu handling */
+    if ((node = virXPathNode("./cpu[1]", ctxt)) != NULL) {
+        xmlNodePtr oldnode = ctxt->node;
+        ctxt->node = node;
+        def->cpu = virCPUDefParseXML(node, ctxt, VIR_CPU_TYPE_GUEST);
+        ctxt->node = oldnode;
+
+        if (def->cpu == NULL)
+            goto error;
+
+        if (def->cpu->sockets &&
+            def->maxvcpus >
+            def->cpu->sockets * def->cpu->cores * def->cpu->threads) {
+            virReportError(VIR_ERR_XML_DETAIL, "%s",
+                           _("Maximum CPUs greater than topology limit"));
+            goto error;
+        }
+
+        if (def->cpu->cells_cpus > def->maxvcpus) {
+            virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+                           _("Number of CPUs in <numa> exceeds the"
+                             " <vcpu> count"));
+            goto error;
+        }
+    }
+
     /* Extract numatune if exists. */
     if ((n = virXPathNodeSet("./numatune", ctxt, &nodes)) < 0) {
         virReportError(VIR_ERR_INTERNAL_ERROR,
@@ -11682,6 +11714,12 @@ virDomainDefParseXML(xmlDocPtr xml,

     if (n) {
         cur = nodes[0]->children;
+        if (def->cpu) {
+            if (VIR_ALLOC_N(def->numatune.mem_nodes, def->cpu->ncells) < 0)
+                goto error;
+            def->numatune.nmem_nodes = def->cpu->ncells;
+        }
+
         while (cur != NULL) {
             if (cur->type == XML_ELEMENT_NODE) {
                 if (xmlStrEqual(cur->name, BAD_CAST "memory")) {
@@ -11764,6 +11802,80 @@ virDomainDefParseXML(xmlDocPtr xml,
                         def->placement_mode = VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO;

                     def->numatune.memory.placement_mode = placement_mode;
+
+                } else if (xmlStrEqual(cur->name, BAD_CAST "memnode")) {
+                    unsigned int cellid;
+                    struct mem_node *mem_node = NULL;
+
+                    if (!def->numatune.nmem_nodes) {
+                        virReportError(VIR_ERR_XML_ERROR, "%s",
+                                       _("Element 'memnode' is invalid without "
+                                         "any guest NUMA cells"));
+                        goto error;
+                    }
+                    tmp = virXMLPropString(cur, "cellid");
+                    if (!tmp) {
+                        virReportError(VIR_ERR_XML_ERROR, "%s",
+                                       _("Missing required cellid attribute "
+                                         "in numatune memnode element"));
+                        goto error;
+                    }
+                    if (virStrToLong_ui(tmp, NULL, 10, &cellid) < 0) {
+                        virReportError(VIR_ERR_XML_ERROR, "%s",
+                                       _("Invalid cellid attribute "
+                                         "in numatune memnode element"));
+                        goto error;
+                    }
+                    VIR_FREE(tmp);
+
+                    if (cellid >= def->numatune.nmem_nodes) {
+                        virReportError(VIR_ERR_XML_ERROR, "%s",
+                                       _("Argument 'cellid' in numatune "
+                                         "memnode element must correspond to "
+                                         "existing guest's NUMA cell"));
+                        goto error;
+                    }
+
+                    mem_node = &def->numatune.mem_nodes[cellid];
+
+                    if (mem_node->specified) {
+                        virReportError(VIR_ERR_XML_ERROR, "%s",
+                                       _("Multiple numatune memnode elements "
+                                         "with duplicate 'cellid'"));
+                        goto error;
+                    }
+
+                    mem_node->specified = true;
+                    mem_nodes = true;
+
+                    tmp = virXMLPropString(cur, "mode");
+                    if (!tmp) {
+                        mem_node->mode = VIR_DOMAIN_NUMATUNE_MEM_STRICT;
+                    } else {
+                        if ((mem_node->mode =
+                             virDomainNumatuneMemModeTypeFromString(tmp)) < 0) {
+                            virReportError(VIR_ERR_XML_ERROR, "%s",
+                                           _("Invalid mode attribute "
+                                             "in numatune memnode element"));
+                            goto error;
+                        }
+                        VIR_FREE(tmp);
+                    }
+
+                    tmp = virXMLPropString(cur, "nodeset");
+                    if (!tmp) {
+                        virReportError(VIR_ERR_XML_ERROR, "%s",
+                                       _("Missing required nodeset attribute "
+                                         "in numatune memnode element"));
+                        goto error;
+                    }
+                    if (virBitmapParse(tmp, 0,
+                                       &mem_node->nodemask,
+                                       VIR_DOMAIN_CPUMASK_LEN) < 0) {
+                        goto error;
+                    }
+                    VIR_FREE(tmp);
+
                 } else {
                     virReportError(VIR_ERR_XML_ERROR,
                                    _("unsupported XML element %s"),
@@ -11784,6 +11896,42 @@ virDomainDefParseXML(xmlDocPtr xml,
     }
     VIR_FREE(nodes);

+    if (def->numatune.nmem_nodes &&
+        def->numatune.memory.placement_mode ==
+        VIR_NUMA_TUNE_MEM_PLACEMENT_MODE_AUTO) {
+            virReportError(VIR_ERR_XML_DETAIL, "%s",
+                           _("Per-node binding is not compatible with "
+                             "automatic NUMA placement."));
+            goto error;
+    }
+
+    if (!mem_nodes) {
+        /* If there are no <memnode> settings, clear all these data.
+         * If any driver wants to use these in the future, this code
+         * can be cleared.  Until then it's easier to keep it this
+         * way. */
+        for (i = 0; i < def->numatune.nmem_nodes; i++)
+            virBitmapFree(def->numatune.mem_nodes[i].nodemask);
+        VIR_FREE(def->numatune.mem_nodes);
+        def->numatune.nmem_nodes = 0;
+    } else {
+        /* Copy numatune/memory information into each node, but leave
+         * specified == false.  This eases the process of determination
+         * of each node's nodemask */
+        for (i = 0; i < def->numatune.nmem_nodes; i++) {
+            struct mem_node *mem_node = &def->numatune.mem_nodes[i];
+
+            if (mem_node->specified)
+                continue;
+
+            mem_node->mode = def->numatune.memory.mode;
+            mem_node->nodemask = virBitmapNewCopy(def->numatune.memory.nodemask);
+
+            if (!mem_node->nodemask)
+                goto error;
+        }
+    }
+
     if ((n = virXPathNodeSet("./resource", ctxt, &nodes)) < 0) {
         virReportError(VIR_ERR_INTERNAL_ERROR,
                        "%s", _("cannot extract resource nodes"));
@@ -12863,32 +13011,6 @@ virDomainDefParseXML(xmlDocPtr xml,
             goto error;
     }

-    /* analysis of cpu handling */
-    if ((node = virXPathNode("./cpu[1]", ctxt)) != NULL) {
-        xmlNodePtr oldnode = ctxt->node;
-        ctxt->node = node;
-        def->cpu = virCPUDefParseXML(node, ctxt, VIR_CPU_TYPE_GUEST);
-        ctxt->node = oldnode;
-
-        if (def->cpu == NULL)
-            goto error;
-
-        if (def->cpu->sockets &&
-            def->maxvcpus >
-            def->cpu->sockets * def->cpu->cores * def->cpu->threads) {
-            virReportError(VIR_ERR_XML_DETAIL, "%s",
-                           _("Maximum CPUs greater than topology limit"));
-            goto error;
-        }
-
-        if (def->cpu->cells_cpus > def->maxvcpus) {
-            virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
-                           _("Number of CPUs in <numa> exceeds the"
-                             " <vcpu> count"));
-            goto error;
-        }
-    }
-
     if ((node = virXPathNode("./sysinfo[1]", ctxt)) != NULL) {
         xmlNodePtr oldnode = ctxt->node;
         ctxt->node = node;
@@ -17395,31 +17517,57 @@ virDomainDefFormatInternal(virDomainDefPtr def,
         virBufferAddLit(buf, "</cputune>\n");

     if (def->numatune.memory.nodemask ||
-        def->numatune.memory.placement_mode) {
+        def->numatune.memory.placement_mode ||
+        def->numatune.nmem_nodes) {
         const char *mode;
         char *nodemask = NULL;
         const char *placement;

         virBufferAddLit(buf, "<numatune>\n");
         virBufferAdjustIndent(buf, 2);
-        mode = virDomainNumatuneMemModeTypeToString(def->numatune.memory.mode);
-        virBufferAsprintf(buf, "<memory mode='%s' ", mode);

-        if (def->numatune.memory.placement_mode ==
-            VIR_NUMA_TUNE_MEM_PLACEMENT_MODE_STATIC) {
-            nodemask = virBitmapFormat(def->numatune.memory.nodemask);
+        if (def->numatune.memory.nodemask ||
+            def->numatune.memory.placement_mode) {
+
+            mode = virDomainNumatuneMemModeTypeToString(def->numatune.memory.mode);
+            virBufferAsprintf(buf, "<memory mode='%s' ", mode);
+
+            if (def->numatune.memory.placement_mode ==
+                VIR_NUMA_TUNE_MEM_PLACEMENT_MODE_STATIC) {
+                nodemask = virBitmapFormat(def->numatune.memory.nodemask);
+                if (nodemask == NULL) {
+                    virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+                                   _("failed to format nodeset for "
+                                     "NUMA memory tuning"));
+                    goto error;
+                }
+                virBufferAsprintf(buf, "nodeset='%s'/>\n", nodemask);
+                VIR_FREE(nodemask);
+            } else if (def->numatune.memory.placement_mode) {
+                placement = virNumaTuneMemPlacementModeTypeToString(def->numatune.memory.placement_mode);
+                virBufferAsprintf(buf, "placement='%s'/>\n", placement);
+            }
+        }
+
+        for (i = 0; i < def->numatune.nmem_nodes; i++) {
+            struct mem_node *mem_node = &def->numatune.mem_nodes[i];
+            if (!mem_node->specified)
+                continue;
+
+            nodemask = virBitmapFormat(mem_node->nodemask);
+            mode = virDomainNumatuneMemModeTypeToString(mem_node->mode);
             if (nodemask == NULL) {
                 virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                                _("failed to format nodeset for "
                                  "NUMA memory tuning"));
                 goto error;
             }
-            virBufferAsprintf(buf, "nodeset='%s'/>\n", nodemask);
+            virBufferAsprintf(buf,
+                              "<memnode cellid='%zu' mode='%s' nodeset='%s'/>\n",
+                              i, mode, nodemask);
             VIR_FREE(nodemask);
-        } else if (def->numatune.memory.placement_mode) {
-            placement = virNumaTuneMemPlacementModeTypeToString(def->numatune.memory.placement_mode);
-            virBufferAsprintf(buf, "placement='%s'/>\n", placement);
         }
+
         virBufferAdjustIndent(buf, -2);
         virBufferAddLit(buf, "</numatune>\n");
     }
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
index bbe32a0..99f9c48 100644
--- a/src/qemu/qemu_domain.c
+++ b/src/qemu/qemu_domain.c
@@ -712,7 +712,28 @@ qemuDomainDefPostParse(virDomainDefPtr def,
     bool addDefaultMemballoon = true;
     bool addDefaultUSBKBD = false;
     bool addDefaultUSBMouse = false;
-
+    size_t i = 0;
+
+    if (def->numatune.memory.nodemask) {
+        for (i = 0; i < def->numatune.nmem_nodes; i++) {
+            struct mem_node *mem_node = &def->numatune.mem_nodes[i];
+            ssize_t pos = -1;
+            bool bit = false;
+
+            if (!mem_node->specified)
+                continue;
+
+            while ((pos = virBitmapNextSetBit(mem_node->nodemask, pos)) >= 0) {
+                if (virBitmapGetBit(def->numatune.memory.nodemask, pos, &bit) < 0 ||
+                    !bit) {
+                    virReportError(VIR_ERR_XML_DETAIL, "%s",
+                                   _("memnode nodeset must be subset of the "
+                                     "global memory nodeset"));
+                    return -1;
+                }
+            }
+        }
+    }
     /* check for emulator and create a default one if needed */
     if (!def->emulator &&
         !(def->emulator = virDomainDefGetDefaultEmulator(def, caps)))
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index 3a7622a..545516e 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -8646,6 +8646,7 @@ qemuDomainSetNumaParamsLive(virDomainObjPtr vm,
     virCgroupPtr cgroup_temp = NULL;
     virBitmapPtr temp_nodeset = NULL;
     qemuDomainObjPrivatePtr priv = vm->privateData;
+    virDomainDefPtr def = vm->def;
     char *nodeset_str = NULL;
     size_t i = 0;
     int ret = -1;
@@ -8657,6 +8658,16 @@ qemuDomainSetNumaParamsLive(virDomainObjPtr vm,
         goto cleanup;
     }

+    for (i = 0; i < def->numatune.nmem_nodes; i++) {
+        if (def->numatune.mem_nodes[i].specified) {
+            virReportError(VIR_ERR_OPERATION_INVALID, "%s",
+                           _("change of nodeset for running domain "
+                             "with per guest NUMA node numatune settings "
+                             "is not supported"));
+            goto cleanup;
+        }
+    }
+
     /* Get existing nodeset values */
     if (virCgroupGetCpusetMems(priv->cgroup, &nodeset_str) < 0 ||
         virBitmapParse(nodeset_str, 0, &temp_nodeset,
diff --git a/src/util/virnuma.h b/src/util/virnuma.h
index fe1e966..50fa3f8 100644
--- a/src/util/virnuma.h
+++ b/src/util/virnuma.h
@@ -1,7 +1,7 @@
 /*
  * virnuma.h: helper APIs for managing numa
  *
- * Copyright (C) 2011-2013 Red Hat, Inc.
+ * Copyright (C) 2011-2014 Red Hat, Inc.
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
@@ -43,9 +43,17 @@ typedef virNumaTuneDef *virNumaTuneDefPtr;
 struct _virNumaTuneDef {
     struct {
         virBitmapPtr nodemask;
-        int mode;
+        int mode;           /* enum virDomainNumatuneMemMode */
         int placement_mode; /* enum virNumaTuneMemPlacementMode */
-    } memory;
+    } memory;               /* pinning for all the memory */
+
+    struct mem_node {
+        bool specified;
+        unsigned int nodeid;
+        virBitmapPtr nodemask;
+        int mode;
+    } *mem_nodes;          /* pinning per guest's NUMA node */
+    size_t nmem_nodes;

     /* Future NUMA tuning related stuff should go here. */
 };
-- 
2.0.0




More information about the libvir-list mailing list