[libvirt] [PATCH v2 4/6] domain: Introduce ./hugepages/page/[@size, @unit, @nodeset]

Michal Privoznik mprivozn at redhat.com
Wed Jul 23 15:37:20 UTC 2014


  <memoryBacking>
    <hugepages>
      <page size="1" unit="G" nodeset="0-3,5"/>
      <page size="2" unit="M" nodeset="4"/>
    </hugepages>
  </memoryBacking>

Signed-off-by: Michal Privoznik <mprivozn at redhat.com>
---
 docs/formatdomain.html.in                          |  18 +-
 docs/schemas/domaincommon.rng                      |  19 +-
 src/conf/domain_conf.c                             | 197 +++++++++++++++++++--
 src/conf/domain_conf.h                             |  13 +-
 src/parallels/parallels_driver.c                   |   2 +-
 src/qemu/qemu_command.c                            |   2 +-
 src/qemu/qemu_process.c                            |   2 +-
 .../qemuxml2argv-hugepages-pages.xml               |  45 +++++
 tests/qemuxml2xmltest.c                            |   1 +
 9 files changed, 277 insertions(+), 22 deletions(-)
 create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-hugepages-pages.xml

diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in
index 8950959..bce5885 100644
--- a/docs/formatdomain.html.in
+++ b/docs/formatdomain.html.in
@@ -617,7 +617,9 @@
 <domain>
   ...
   <memoryBacking>
-    <hugepages/>
+    <hugepages>
+      <page size="1" unit="G" nodeset="0-3,5"/>
+      <page size="2" unit="M" nodeset="4"/>
     <nosharepages/>
     <locked/>
   </memoryBacking>
@@ -632,7 +634,19 @@
     <dl>
       <dt><code>hugepages</code></dt>
       <dd>This tells the hypervisor that the guest should have its memory
-        allocated using hugepages instead of the normal native page size.</dd>
+      allocated using hugepages instead of the normal native page size.
+      <span class='since'>Since 1.2.5</span> it's possible to set hugepages
+      more specifically per numa node. The <code>page</code> element is
+      introduced. It has one compulsory attribute <code>size</code> which
+      specifies which hugepages should be used (especially useful on systems
+      supporting hugepages of different sizes). The default unit for the
+      <code>size</code> attribute is kilobytes (multiplier of 1024). If you
+      want to use different unit, use optional <code>unit</code> attribute.
+      For systems with NUMA, the optional <code>nodeset</code> attribute may
+      come handy as it ties given guest's NUMA nodes to certain hugepage
+      sizes. From the example snippet, one gigabyte hugepages are used for
+      every NUMA node except node number four. For the correct syntax see
+      <a href="#elementsNUMATuning">this</a>.</dd>
       <dt><code>nosharepages</code></dt>
       <dd>Instructs hypervisor to disable shared pages (memory merge, KSM) for
         this domain. <span class="since">Since 1.0.6</span></dd>
diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng
index f6f697c..cf4cda8 100644
--- a/docs/schemas/domaincommon.rng
+++ b/docs/schemas/domaincommon.rng
@@ -567,7 +567,24 @@
           <interleave>
             <optional>
               <element name="hugepages">
-                <empty/>
+                <zeroOrMore>
+                  <element name="page">
+                    <attribute name="size">
+                      <ref name="unsignedLong"/>
+                    </attribute>
+                    <optional>
+                      <attribute name='unit'>
+                        <ref name='unit'/>
+                      </attribute>
+                    </optional>
+                    <optional>
+                      <attribute name="nodeset">
+                        <ref name='cpuset'/>
+                      </attribute>
+                    </optional>
+                    <empty/>
+                  </element>
+                </zeroOrMore>
               </element>
             </optional>
             <optional>
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
index d8d1fe7..ff50252 100644
--- a/src/conf/domain_conf.c
+++ b/src/conf/domain_conf.c
@@ -11260,6 +11260,57 @@ virDomainParseMemory(const char *xpath, xmlXPathContextPtr ctxt,
 }
 
 
+static int
+virDomainHugepagesParseXML(xmlNodePtr node,
+                           xmlXPathContextPtr ctxt,
+                           virDomainHugePagePtr hugepage)
+{
+    int ret = -1;
+    xmlNodePtr oldnode = ctxt->node;
+    unsigned long long bytes, max;
+    char *unit = NULL, *nodeset = NULL;
+
+    ctxt->node = node;
+
+    /* On 32-bit machines, our bound is 0xffffffff * KiB. On 64-bit
+     * machines, our bound is off_t (2^63).  */
+    if (sizeof(unsigned long) < sizeof(long long))
+        max = 1024ull * ULONG_MAX;
+    else
+        max = LLONG_MAX;
+
+    if (virXPathULongLong("string(./@size)", ctxt, &bytes) < 0) {
+        virReportError(VIR_ERR_XML_DETAIL, "%s",
+                       _("unable to parse size attribute"));
+        goto cleanup;
+    }
+
+    unit = virXPathString("string(./@unit)", ctxt);
+
+    if (virScaleInteger(&bytes, unit, 1024, max) < 0)
+        goto cleanup;
+
+    if (!(hugepage->size = VIR_DIV_UP(bytes, 1024))) {
+        virReportError(VIR_ERR_XML_DETAIL, "%s",
+                       _("hugepage size can't be zero"));
+        goto cleanup;
+    }
+
+    if ((nodeset = virXMLPropString(node, "nodeset"))) {
+        if (virBitmapParse(nodeset, 0, &hugepage->nodemask,
+                           VIR_DOMAIN_CPUMASK_LEN) < 0)
+            goto cleanup;
+    }
+
+    ret = 0;
+ cleanup:
+    VIR_FREE(unit);
+    VIR_FREE(nodeset);
+    ctxt->node = oldnode;
+    return ret;
+}
+
+
 static virDomainResourceDefPtr
 virDomainResourceDefParse(xmlNodePtr node,
                           xmlXPathContextPtr ctxt)
@@ -11327,7 +11378,7 @@ virDomainDefParseXML(xmlDocPtr xml,
 {
     xmlNodePtr *nodes = NULL, node = NULL;
     char *tmp = NULL;
-    size_t i;
+    size_t i, j;
     int n;
     long id = -1;
     virDomainDefPtr def;
@@ -11477,8 +11528,55 @@ virDomainDefParseXML(xmlDocPtr xml,
         def->mem.cur_balloon = def->mem.max_balloon;
     }
 
-    if ((node = virXPathNode("./memoryBacking/hugepages", ctxt)))
-        def->mem.hugepage_backed = true;
+
+    if ((n = virXPathNodeSet("./memoryBacking/hugepages/page", ctxt, &nodes)) < 0) {
+        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+                       _("cannot extract hugepages nodes"));
+        goto error;
+    }
+
+    if (n) {
+        if (VIR_ALLOC_N(def->mem.hugepages, n) < 0)
+            goto error;
+
+        for (i = 0; i < n; i++) {
+            if (virDomainHugepagesParseXML(nodes[i], ctxt,
+                                           &def->mem.hugepages[i]) < 0)
+                goto error;
+            def->mem.nhugepages++;
+
+            for (j = 0; j < i; j++) {
+                if (def->mem.hugepages[i].nodemask &&
+                    def->mem.hugepages[j].nodemask &&
+                    virBitmapOverlaps(def->mem.hugepages[i].nodemask,
+                                      def->mem.hugepages[j].nodemask)) {
+                    virReportError(VIR_ERR_XML_DETAIL,
+                                   _("nodeset attribute of hugepages "
+                                     "of sizes %llu and %llu intersect"),
+                                   def->mem.hugepages[i].size,
+                                   def->mem.hugepages[j].size);
+                    goto error;
+                } else if (!def->mem.hugepages[i].nodemask &&
+                           !def->mem.hugepages[j].nodemask) {
+                    virReportError(VIR_ERR_XML_DETAIL,
+                                   _("two master hugepages detected: "
+                                     "%llu and %llu"),
+                                   def->mem.hugepages[i].size,
+                                   def->mem.hugepages[j].size);
+                    goto error;
+                }
+            }
+        }
+
+        VIR_FREE(nodes);
+    } else {
+        if ((node = virXPathNode("./memoryBacking/hugepages", ctxt))) {
+            if (VIR_ALLOC(def->mem.hugepages) < 0)
+                goto error;
+
+            def->mem.nhugepages = 1;
+        }
+    }
 
     if ((node = virXPathNode("./memoryBacking/nosharepages", ctxt)))
         def->mem.nosharepages = true;
@@ -11500,7 +11598,6 @@ virDomainDefParseXML(xmlDocPtr xml,
         goto error;
 
     for (i = 0; i < n; i++) {
-        size_t j;
         if (virDomainBlkioDeviceParseXML(nodes[i],
                                          &def->blkio.devices[i]) < 0)
             goto error;
@@ -12432,7 +12529,6 @@ virDomainDefParseXML(xmlDocPtr xml,
 
         if (chr->target.port == -1) {
             int maxport = -1;
-            size_t j;
             for (j = 0; j < i; j++) {
                 if (def->parallels[j]->target.port > maxport)
                     maxport = def->parallels[j]->target.port;
@@ -12460,7 +12556,6 @@ virDomainDefParseXML(xmlDocPtr xml,
 
         if (chr->target.port == -1) {
             int maxport = -1;
-            size_t j;
             for (j = 0; j < i; j++) {
                 if (def->serials[j]->target.port > maxport)
                     maxport = def->serials[j]->target.port;
@@ -12518,7 +12613,6 @@ virDomainDefParseXML(xmlDocPtr xml,
         if (chr->info.type == VIR_DOMAIN_DEVICE_ADDRESS_TYPE_VIRTIO_SERIAL &&
             chr->info.addr.vioserial.port == 0) {
             int maxport = 0;
-            size_t j;
             for (j = 0; j < i; j++) {
                 virDomainChrDefPtr thischr = def->channels[j];
                 if (thischr->info.type == VIR_DOMAIN_DEVICE_ADDRESS_TYPE_VIRTIO_SERIAL &&
@@ -12635,7 +12729,7 @@ virDomainDefParseXML(xmlDocPtr xml,
     if (n && VIR_ALLOC_N(def->videos, n) < 0)
         goto error;
     for (i = 0; i < n; i++) {
-        size_t j = def->nvideos;
+        j = def->nvideos;
         virDomainVideoDefPtr video = virDomainVideoDefParseXML(nodes[j],
                                                                def,
                                                                flags);
@@ -14073,13 +14167,38 @@ virDomainDefCheckABIStability(virDomainDefPtr src,
                        dst->mem.cur_balloon, src->mem.cur_balloon);
         goto error;
     }
-    if (src->mem.hugepage_backed != dst->mem.hugepage_backed) {
+    if (src->mem.nhugepages != dst->mem.nhugepages) {
         virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
-                       _("Target domain huge page backing %d does not match source %d"),
-                       dst->mem.hugepage_backed,
-                       src->mem.hugepage_backed);
+                       _("Target domain huge pages count %zu does not match source %zu"),
+                       dst->mem.nhugepages, src->mem.nhugepages);
         goto error;
     }
+    for (i = 0; i < src->mem.nhugepages; i++) {
+        virDomainHugePagePtr src_huge = &src->mem.hugepages[i];
+        virDomainHugePagePtr dst_huge = &dst->mem.hugepages[i];
+
+        if (src_huge->size != dst_huge->size) {
+            virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+                           _("Target domain huge page size %llu "
+                             "does not match source %llu"),
+                           dst_huge->size, src_huge->size);
+            goto error;
+        }
+
+        if (src_huge->nodemask && dst_huge->nodemask) {
+            if (!virBitmapEqual(src_huge->nodemask, dst_huge->nodemask)) {
+                virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+                               _("Target huge page nodemask does not match source"));
+                goto error;
+            }
+        } else {
+            if (src_huge->nodemask || dst_huge->nodemask) {
+                virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+                               _("Target huge page nodemask does not match source"));
+                goto error;
+            }
+        }
+    }
 
     if (src->vcpus != dst->vcpus) {
         virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
@@ -17196,6 +17315,54 @@ virDomainResourceDefFormat(virBufferPtr buf,
 }
 
 
+static int
+virDomainHugepagesFormatBuf(virBufferPtr buf,
+                            virDomainHugePagePtr hugepage)
+{
+    int ret = -1;
+
+    virBufferAsprintf(buf, "<page size='%llu' unit='KiB'",
+                      hugepage->size);
+
+    if (hugepage->nodemask) {
+        char *nodeset = NULL;
+        if (!(nodeset = virBitmapFormat(hugepage->nodemask)))
+            goto cleanup;
+        virBufferAsprintf(buf, " nodeset='%s'", nodeset);
+        VIR_FREE(nodeset);
+    }
+
+    virBufferAddLit(buf, "/>\n");
+
+    ret = 0;
+ cleanup:
+    return ret;
+}
+
+static void
+virDomainHugepagesFormat(virBufferPtr buf,
+                         virDomainHugePagePtr hugepages,
+                         size_t nhugepages)
+{
+    size_t i;
+
+    if (nhugepages == 1 &&
+        hugepages[0].size == 0) {
+        virBufferAddLit(buf, "<hugepages/>\n");
+        return;
+    }
+
+    virBufferAddLit(buf, "<hugepages>\n");
+    virBufferAdjustIndent(buf, 2);
+
+    for (i = 0; i < nhugepages; i++)
+        virDomainHugepagesFormatBuf(buf, &hugepages[i]);
+
+    virBufferAdjustIndent(buf, -2);
+    virBufferAddLit(buf, "</hugepages>\n");
+}
+
+
 #define DUMPXML_FLAGS                           \
     (VIR_DOMAIN_XML_SECURE |                    \
      VIR_DOMAIN_XML_INACTIVE |                  \
@@ -17392,11 +17559,11 @@ virDomainDefFormatInternal(virDomainDefPtr def,
         virBufferAddLit(buf, "</memtune>\n");
     }
 
-    if (def->mem.hugepage_backed || def->mem.nosharepages || def->mem.locked) {
+    if (def->mem.nhugepages || def->mem.nosharepages || def->mem.locked) {
         virBufferAddLit(buf, "<memoryBacking>\n");
         virBufferAdjustIndent(buf, 2);
-        if (def->mem.hugepage_backed)
-            virBufferAddLit(buf, "<hugepages/>\n");
+        if (def->mem.nhugepages)
+            virDomainHugepagesFormat(buf, def->mem.hugepages, def->mem.nhugepages);
         if (def->mem.nosharepages)
             virBufferAddLit(buf, "<nosharepages/>\n");
         if (def->mem.locked)
diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
index 623ca80..f0e00f5 100644
--- a/src/conf/domain_conf.h
+++ b/src/conf/domain_conf.h
@@ -1803,6 +1803,14 @@ struct _virDomainResourceDef {
     char *partition;
 };
 
+typedef struct _virDomaiHugePage virDomainHugePage;
+typedef virDomainHugePage *virDomainHugePagePtr;
+
+struct _virDomaiHugePage {
+    virBitmapPtr nodemask;      /* guest's NUMA node mask */
+    unsigned long long size;    /* hugepage size in KiB */
+};
+
 /*
  * Guest VM main configuration
  *
@@ -1827,7 +1835,10 @@ struct _virDomainDef {
     struct {
         unsigned long long max_balloon; /* in kibibytes */
         unsigned long long cur_balloon; /* in kibibytes */
-        bool hugepage_backed;
+
+        virDomainHugePagePtr hugepages;
+        size_t nhugepages;
+
         bool nosharepages;
         bool locked;
         int dump_core; /* enum virTristateSwitch */
diff --git a/src/parallels/parallels_driver.c b/src/parallels/parallels_driver.c
index a503dea..bb9538f 100644
--- a/src/parallels/parallels_driver.c
+++ b/src/parallels/parallels_driver.c
@@ -2023,7 +2023,7 @@ parallelsApplyChanges(virDomainObjPtr dom, virDomainDefPtr new)
             return -1;
     }
 
-    if (old->mem.hugepage_backed != new->mem.hugepage_backed ||
+    if (old->mem.nhugepages != new->mem.nhugepages ||
         old->mem.hard_limit != new->mem.hard_limit ||
         old->mem.soft_limit != new->mem.soft_limit ||
         old->mem.min_guarantee != new->mem.min_guarantee ||
diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
index 091447a..d5f5f02 100644
--- a/src/qemu/qemu_command.c
+++ b/src/qemu/qemu_command.c
@@ -7332,7 +7332,7 @@ qemuBuildCommandLine(virConnectPtr conn,
     virCommandAddArg(cmd, "-m");
     def->mem.max_balloon = VIR_DIV_UP(def->mem.max_balloon, 1024) * 1024;
     virCommandAddArgFormat(cmd, "%llu", def->mem.max_balloon / 1024);
-    if (def->mem.hugepage_backed) {
+    if (def->mem.nhugepages) {
         char *mem_path;
 
         if (!cfg->nhugetlbfs) {
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index 7626cef..36922cb 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -3791,7 +3791,7 @@ int qemuProcessStart(virConnectPtr conn,
     }
     virDomainAuditSecurityLabel(vm, true);
 
-    if (vm->def->mem.hugepage_backed) {
+    if (vm->def->mem.nhugepages) {
         for (i = 0; i < cfg->nhugetlbfs; i++) {
             char *hugepagePath = qemuGetHugepagePath(&cfg->hugetlbfs[i]);
 
diff --git a/tests/qemuxml2argvdata/qemuxml2argv-hugepages-pages.xml b/tests/qemuxml2argvdata/qemuxml2argv-hugepages-pages.xml
new file mode 100644
index 0000000..5ad0695
--- /dev/null
+++ b/tests/qemuxml2argvdata/qemuxml2argv-hugepages-pages.xml
@@ -0,0 +1,45 @@
+<domain type='qemu'>
+  <name>QEMUGuest1</name>
+  <uuid>c7a5fdbd-edaf-9455-926a-d65c16db1809</uuid>
+  <memory unit='KiB'>4194304</memory>
+  <currentMemory unit='KiB'>4194304</currentMemory>
+  <memoryBacking>
+    <hugepages>
+      <page size='2048' unit='KiB' nodeset='1'/>
+      <page size='1048576' unit='KiB' nodeset='0,2-3'/>
+    </hugepages>
+  </memoryBacking>
+  <vcpu placement='static'>4</vcpu>
+  <numatune>
+    <memory mode='strict' nodeset='0-3'/>
+    <memnode cellid='3' mode='strict' nodeset='3'/>
+  </numatune>
+  <os>
+    <type arch='i686' machine='pc'>hvm</type>
+    <boot dev='hd'/>
+  </os>
+  <cpu>
+    <numa>
+      <cell id='0' cpus='0' memory='1048576'/>
+      <cell id='1' cpus='1' memory='1048576'/>
+      <cell id='2' cpus='2' memory='1048576'/>
+      <cell id='3' cpus='3' memory='1048576'/>
+    </numa>
+  </cpu>
+  <clock offset='utc'/>
+  <on_poweroff>destroy</on_poweroff>
+  <on_reboot>restart</on_reboot>
+  <on_crash>destroy</on_crash>
+  <devices>
+    <emulator>/usr/bin/qemu</emulator>
+    <disk type='block' device='disk'>
+      <source dev='/dev/HostVG/QEMUGuest1'/>
+      <target dev='hda' bus='ide'/>
+      <address type='drive' controller='0' bus='0' target='0' unit='0'/>
+    </disk>
+    <controller type='usb' index='0'/>
+    <controller type='ide' index='0'/>
+    <controller type='pci' index='0' model='pci-root'/>
+    <memballoon model='virtio'/>
+  </devices>
+</domain>
diff --git a/tests/qemuxml2xmltest.c b/tests/qemuxml2xmltest.c
index cefe05b..09cb228 100644
--- a/tests/qemuxml2xmltest.c
+++ b/tests/qemuxml2xmltest.c
@@ -197,6 +197,7 @@ mymain(void)
     DO_TEST("hyperv-off");
 
     DO_TEST("hugepages");
+    DO_TEST("hugepages-pages");
     DO_TEST("nosharepages");
     DO_TEST("disk-aio");
     DO_TEST("disk-cdrom");
-- 
1.8.5.5




More information about the libvir-list mailing list