[libvirt] [RFC PATCH v1 1/4] numa: describe siblings distances within cells
Jim Fehlig
jfehlig at suse.com
Mon Jun 19 18:26:20 UTC 2017
On 06/12/2017 12:54 PM, Wim Ten Have wrote:
> From: Wim ten Have <wim.ten.have at oracle.com>
>
> Add libvirtd NUMA cell domain administration functionality to
> describe underlying cell id sibling distances in full fashion
> when configuring HVM guests.
Thanks for the detailed cover letter :-). Some of that information (e.g. ACPI
spec, SLIT table, qemu support) would be useful in this commit message.
> [below is an example of a 4 node setup]
>
> <cpu>
> <numa>
> <cell id='0' cpus='0' memory='2097152' unit='KiB'>
> <distances>
> <sibling id='0' value='10'/>
> <sibling id='1' value='21'/>
> <sibling id='2' value='31'/>
> <sibling id='3' value='41'/>
> </distances>
> </cell>
Others on the list are better XML designers, but the following seems to achieve
the same and is a bit more compact
<cell id='0' cpus='0' memory='2097152' unit='KiB'>
<sibling id='0' distance='10'/>
<sibling id='1' distance='21'/>
<sibling id='2' distance='31'/>
<sibling id='3' distance='41'/>
</cell>
CC danpb, who often has good ideas on schema design.
> <cell id='1' cpus='1' memory='2097152' unit='KiB'>
> <distances>
> <sibling id='0' value='21'/>
> <sibling id='1' value='10'/>
> <sibling id='2' value='31'/>
> <sibling id='3' value='41'/>
> </distances>
> </cell>
> <cell id='2' cpus='2' memory='2097152' unit='KiB'>
> <distances>
> <sibling id='0' value='31'/>
> <sibling id='1' value='21'/>
> <sibling id='2' value='10'/>
> <sibling id='3' value='21'/>
> </distances>
> <cell id='3' cpus='3' memory='2097152' unit='KiB'>
> <distances>
> <sibling id='0' value='41'/>
> <sibling id='1' value='31'/>
> <sibling id='2' value='21'/>
> <sibling id='3' value='10'/>
> </distances>
> </cell>
> </numa>
> </cpu>
>
> Changes under this commit concern all those that require adding
> the valid data structures, virDomainNuma* functional routines and the
> XML doc schema enhancements to enforce appropriate administration.
One item you forgot was docs/formatdomain.html.in. Changes in schema should
always be described by accompanying changes in documentation.
Regards,
Jim
>
> These changes alter the docs/schemas/cputypes.rng enforcing
> domain administration to follow the syntax below per numa cell id.
>
> These changes also alter docs/schemas/basictypes.rng to add
> "numaDistanceValue" which is an "unsignedInt" with a minimum value
> of 10 as 0-9 are reserved values and can not be used as System
> Locality Distance Information Table data.
>
> Signed-off-by: Wim ten Have <wim.ten.have at oracle.com>
> ---
> docs/schemas/basictypes.rng | 8 ++
> docs/schemas/cputypes.rng | 18 +++
> src/conf/cpu_conf.c | 2 +-
> src/conf/numa_conf.c | 260 +++++++++++++++++++++++++++++++++++++++++++-
> src/conf/numa_conf.h | 25 ++++-
> src/libvirt_private.syms | 6 +
> 6 files changed, 313 insertions(+), 6 deletions(-)
>
> diff --git a/docs/schemas/basictypes.rng b/docs/schemas/basictypes.rng
> index 1ea667c..a335b5d 100644
> --- a/docs/schemas/basictypes.rng
> +++ b/docs/schemas/basictypes.rng
> @@ -77,6 +77,14 @@
> </choice>
> </define>
>
> + <define name="numaDistanceValue">
> + <choice>
> + <data type="unsignedInt">
> + <param name="minInclusive">10</param>
> + </data>
> + </choice>
> + </define>
> +
> <define name="pciaddress">
> <optional>
> <attribute name="domain">
> diff --git a/docs/schemas/cputypes.rng b/docs/schemas/cputypes.rng
> index 3eef16a..c45b6df 100644
> --- a/docs/schemas/cputypes.rng
> +++ b/docs/schemas/cputypes.rng
> @@ -129,6 +129,24 @@
> </choice>
> </attribute>
> </optional>
> + <optional>
> + <element name="distances">
> + <oneOrMore>
> + <ref name="numaDistance"/>
> + </oneOrMore>
> + </element>
> + </optional>
> + </element>
> + </define>
> +
> + <define name="numaDistance">
> + <element name="sibling">
> + <attribute name="id">
> + <ref name="unsignedInt"/>
> + </attribute>
> + <attribute name="value">
> + <ref name="numaDistanceValue"/>
> + </attribute>
> </element>
> </define>
>
> diff --git a/src/conf/cpu_conf.c b/src/conf/cpu_conf.c
> index da40e9b..5d8f7be3 100644
> --- a/src/conf/cpu_conf.c
> +++ b/src/conf/cpu_conf.c
> @@ -643,7 +643,7 @@ virCPUDefFormatBufFull(virBufferPtr buf,
> if (virCPUDefFormatBuf(&childrenBuf, def, updateCPU) < 0)
> goto cleanup;
>
> - if (virDomainNumaDefCPUFormat(&childrenBuf, numa) < 0)
> + if (virDomainNumaDefCPUFormatXML(&childrenBuf, numa) < 0)
> goto cleanup;
>
> /* Put it all together */
> diff --git a/src/conf/numa_conf.c b/src/conf/numa_conf.c
> index bfd3703..1914810 100644
> --- a/src/conf/numa_conf.c
> +++ b/src/conf/numa_conf.c
> @@ -48,6 +48,8 @@ VIR_ENUM_IMPL(virDomainMemoryAccess, VIR_DOMAIN_MEMORY_ACCESS_LAST,
> "shared",
> "private")
>
> +typedef struct _virDomainNumaDistance virDomainNumaDistance;
> +typedef virDomainNumaDistance *virDomainNumaDistancePtr;
>
> typedef struct _virDomainNumaNode virDomainNumaNode;
> typedef virDomainNumaNode *virDomainNumaNodePtr;
> @@ -66,6 +68,12 @@ struct _virDomainNuma {
> virBitmapPtr nodeset; /* host memory nodes where this guest node resides */
> virDomainNumatuneMemMode mode; /* memory mode selection */
> virDomainMemoryAccess memAccess; /* shared memory access configuration */
> +
> + struct _virDomainNumaDistance {
> + unsigned int value; /* locality value for node i*j */
> + unsigned int cellid;
> + } *distances; /* remote node distances */
> + size_t ndistances;
> } *mem_nodes; /* guest node configuration */
> size_t nmem_nodes;
>
> @@ -686,6 +694,95 @@ virDomainNumatuneNodesetIsAvailable(virDomainNumaPtr numatune,
> }
>
>
> +static int
> +virDomainNumaDefNodeDistanceParseXML(virDomainNumaPtr def,
> + xmlXPathContextPtr ctxt,
> + unsigned int cur_cell)
> +{
> + int ret = -1;
> + char *tmp = NULL;
> + size_t i;
> + xmlNodePtr *nodes = NULL;
> + int ndistances;
> + virDomainNumaDistancePtr distances = NULL;
> +
> +
> + if (!virXPathNode("./distances[1]/sibling", ctxt))
> + return 0;
> +
> + if ((ndistances = virXPathNodeSet("./distances[1]/sibling", ctxt, &nodes)) <= 0) {
> + virReportError(VIR_ERR_XML_ERROR, "%s",
> + _("NUMA distances defined without siblings"));
> + goto cleanup;
> + }
> +
> + if (ndistances < def->nmem_nodes) {
> + virReportError(VIR_ERR_XML_ERROR,
> + _("NUMA distances defined with fewer siblings than nodes for cell id: '%d'"),
> + cur_cell);
> + goto cleanup;
> + }
> +
> + if (VIR_ALLOC_N(distances, ndistances) < 0)
> + goto cleanup;
> +
> + for (i = 0; i < ndistances; i++) {
> + unsigned int sibling_id = i, sibling_value;
> +
> + /* siblings are in order of parsing or explicitly numbered */
> + if ((tmp = virXMLPropString(nodes[i], "id"))) {
> + if (virStrToLong_uip(tmp, NULL, 10, &sibling_id) < 0) {
> + virReportError(VIR_ERR_XML_ERROR,
> + _("Invalid 'id' attribute in NUMA distances for sibling: '%s'"),
> + tmp);
> + goto cleanup;
> + }
> +
> + if (sibling_id >= ndistances) {
> + virReportError(VIR_ERR_XML_ERROR, "%s",
> + _("Exactly one 'sibling' element per NUMA distance "
> + "is allowed, non-contiguous ranges or ranges not "
> + "starting from 0 are not allowed"));
> + goto cleanup;
> + }
> + }
> + VIR_FREE(tmp);
> +
> + /* We need a locality value */
> + if (!(tmp = virXMLPropString(nodes[i], "value"))) {
> + virReportError(VIR_ERR_XML_ERROR,
> + _("Missing 'value' attribute in NUMA distances for sibling id: '%d'"),
> + sibling_id);
> + goto cleanup;
> + }
> +
> + /* It needs to be applicable */
> + if (virStrToLong_uip(tmp, NULL, 10, &sibling_value) < 0) {
> + virReportError(VIR_ERR_XML_ERROR,
> + _("Invalid 'value' attribute in NUMA distances for sibling id: '%d'"),
> + sibling_id);
> + goto cleanup;
> + }
> + VIR_FREE(tmp);
> +
> + distances[sibling_id].cellid = sibling_id;
> + distances[sibling_id].value = sibling_value;
> + }
> +
> + def->mem_nodes[cur_cell].distances = distances;
> + def->mem_nodes[cur_cell].ndistances = ndistances;
> +
> + ret = 0;
> +
> + cleanup:
> + if (ret)
> + VIR_FREE(distances);
> + VIR_FREE(nodes);
> + VIR_FREE(tmp);
> +
> + return ret;
> +}
> +
> int
> virDomainNumaDefCPUParseXML(virDomainNumaPtr def,
> xmlXPathContextPtr ctxt)
> @@ -788,6 +885,14 @@ virDomainNumaDefCPUParseXML(virDomainNumaPtr def,
> def->mem_nodes[cur_cell].memAccess = rc;
> VIR_FREE(tmp);
> }
> +
> + /* Parse NUMA distances info */
> + if (virDomainNumaDefNodeDistanceParseXML(def, ctxt, cur_cell) < 0) {
> + virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
> + _("NUMA cell %d has incorrect 'distances' configured"),
> + cur_cell);
> + goto cleanup;
> + }
> }
>
> ret = 0;
> @@ -801,8 +906,8 @@ virDomainNumaDefCPUParseXML(virDomainNumaPtr def,
>
>
> int
> -virDomainNumaDefCPUFormat(virBufferPtr buf,
> - virDomainNumaPtr def)
> +virDomainNumaDefCPUFormatXML(virBufferPtr buf,
> + virDomainNumaPtr def)
> {
> virDomainMemoryAccess memAccess;
> char *cpustr;
> @@ -815,6 +920,8 @@ virDomainNumaDefCPUFormat(virBufferPtr buf,
> virBufferAddLit(buf, "<numa>\n");
> virBufferAdjustIndent(buf, 2);
> for (i = 0; i < ncells; i++) {
> + int ndistances;
> +
> memAccess = virDomainNumaGetNodeMemoryAccessMode(def, i);
>
> if (!(cpustr = virBitmapFormat(virDomainNumaGetNodeCpumask(def, i))))
> @@ -829,7 +936,30 @@ virDomainNumaDefCPUFormat(virBufferPtr buf,
> if (memAccess)
> virBufferAsprintf(buf, " memAccess='%s'",
> virDomainMemoryAccessTypeToString(memAccess));
> - virBufferAddLit(buf, "/>\n");
> +
> + ndistances = def->mem_nodes[i].ndistances;
> + if (!ndistances) {
> + virBufferAddLit(buf, "/>\n");
> + } else {
> + size_t j;
> + virDomainNumaDistancePtr distances = def->mem_nodes[i].distances;
> +
> + virBufferAddLit(buf, ">\n");
> + virBufferAdjustIndent(buf, 2);
> + virBufferAddLit(buf, "<distances>\n");
> + virBufferAdjustIndent(buf, 2);
> + for (j = 0; j < ndistances; j++) {
> + virBufferAddLit(buf, "<sibling");
> + virBufferAsprintf(buf, " id='%d'", distances[j].cellid);
> + virBufferAsprintf(buf, " value='%d'", distances[j].value);
> + virBufferAddLit(buf, "/>\n");
> + }
> + virBufferAdjustIndent(buf, -2);
> + virBufferAddLit(buf, "</distances>\n");
> + virBufferAdjustIndent(buf, -2);
> + virBufferAddLit(buf, "</cell>\n");
> + }
> +
> VIR_FREE(cpustr);
> }
> virBufferAdjustIndent(buf, -2);
> @@ -922,13 +1052,121 @@ virDomainNumaCheckABIStability(virDomainNumaPtr src,
> size_t
> virDomainNumaGetNodeCount(virDomainNumaPtr numa)
> {
> - if (!numa)
> + if (!numa || !numa->mem_nodes)
> return 0;
>
> return numa->nmem_nodes;
> }
>
>
> +size_t
> +virDomainNumaSetNodeCount(virDomainNumaPtr numa, size_t nmem_nodes)
> +{
> + if (!numa || !nmem_nodes)
> + return 0;
> +
> + if (numa->mem_nodes) {
> + virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> + _("Cannot alter an existing nmem_nodes set"));
> + return 0;
> + }
> +
> + if (VIR_ALLOC_N(numa->mem_nodes, nmem_nodes) < 0)
> + return 0;
> +
> + numa->nmem_nodes = nmem_nodes;
> +
> + return numa->nmem_nodes;
> +}
> +
> +
> +size_t
> +virDomainNumaGetNodeDistance(virDomainNumaPtr numa,
> + size_t node,
> + size_t cellid)
> +{
> + virDomainNumaDistancePtr distances;
> +
> + if (!numa)
> + return 0;
> +
> + distances = numa->mem_nodes[node].distances;
> + if (!numa->mem_nodes[node].ndistances || !distances)
> + return 0;
> +
> + return distances[cellid].value;
> +}
> +
> +
> +size_t
> +virDomainNumaSetNodeDistance(virDomainNumaPtr numa,
> + size_t node,
> + size_t cellid,
> + unsigned int value)
> +{
> + virDomainNumaDistancePtr distances;
> +
> + /*
> + * Advanced Configuration and Power Interface
> + * Specification version 6.1. Chapter 5.2.17
> + * System Locality Distance Information Table
> + * ... Distance values of 0-9 are reserved.
> + */
> + if (!numa || value < 10)
> + return 0;
> +
> + distances = numa->mem_nodes[node].distances;
> +
> + if (numa->mem_nodes[node].ndistances > 0 &&
> + distances[cellid].value)
> + return 0;
> +
> + distances[cellid].cellid = cellid;
> + distances[cellid].value = value;
> +
> + return distances[cellid].value;
> +}
> +
> +
> +size_t
> +virDomainNumaGetNodeDistanceCount(virDomainNumaPtr numa,
> + size_t node)
> +{
> + if (!numa)
> + return 0;
> +
> + return numa->mem_nodes[node].ndistances;
> +}
> +
> +
> +size_t
> +virDomainNumaSetNodeDistanceCount(virDomainNumaPtr numa,
> + size_t node,
> + size_t ndistances)
> +{
> + virDomainNumaDistancePtr distances;
> +
> + if (!numa || !ndistances)
> + return 0;
> +
> + distances = numa->mem_nodes[node].distances;
> + if (distances) {
> + virReportError(VIR_ERR_INTERNAL_ERROR,
> + _("Cannot alter an existing nmem_nodes distances set for node: %zu"),
> + node);
> + return 0;
> + }
> +
> + if (VIR_ALLOC_N(distances, ndistances) < 0)
> + return 0;
> +
> + numa->mem_nodes[node].distances = distances;
> + numa->mem_nodes[node].ndistances = ndistances;
> +
> + return numa->mem_nodes[node].ndistances;
> +}
> +
> +
> virBitmapPtr
> virDomainNumaGetNodeCpumask(virDomainNumaPtr numa,
> size_t node)
> @@ -937,6 +1175,20 @@ virDomainNumaGetNodeCpumask(virDomainNumaPtr numa,
> }
>
>
> +virBitmapPtr
> +virDomainNumaSetNodeCpumask(virDomainNumaPtr numa,
> + size_t node,
> + virBitmapPtr cpumask)
> +{
> + if (!numa || !cpumask)
> + return NULL;
> +
> + numa->mem_nodes[node].cpumask = cpumask;
> +
> + return numa->mem_nodes[node].cpumask;
> +}
> +
> +
> virDomainMemoryAccess
> virDomainNumaGetNodeMemoryAccessMode(virDomainNumaPtr numa,
> size_t node)
> diff --git a/src/conf/numa_conf.h b/src/conf/numa_conf.h
> index b6a5354..96dda90 100644
> --- a/src/conf/numa_conf.h
> +++ b/src/conf/numa_conf.h
> @@ -87,12 +87,35 @@ int virDomainNumatuneMaybeGetNodeset(virDomainNumaPtr numatune,
>
> size_t virDomainNumaGetNodeCount(virDomainNumaPtr numa);
>
> +size_t virDomainNumaSetNodeCount(virDomainNumaPtr numa,
> + size_t nmem_nodes);
> +
> +size_t virDomainNumaGetNodeDistance(virDomainNumaPtr numa,
> + size_t node,
> + size_t sibling)
> + ATTRIBUTE_NONNULL(1);
> +size_t virDomainNumaSetNodeDistance(virDomainNumaPtr numa,
> + size_t node,
> + size_t sibling,
> + unsigned int value)
> + ATTRIBUTE_NONNULL(1);
> +size_t virDomainNumaGetNodeDistanceCount(virDomainNumaPtr numa,
> + size_t node)
> + ATTRIBUTE_NONNULL(1);
> +size_t virDomainNumaSetNodeDistanceCount(virDomainNumaPtr numa,
> + size_t node,
> + size_t ndistances)
> + ATTRIBUTE_NONNULL(1);
> virBitmapPtr virDomainNumaGetNodeCpumask(virDomainNumaPtr numa,
> size_t node)
> ATTRIBUTE_NONNULL(1);
> virDomainMemoryAccess virDomainNumaGetNodeMemoryAccessMode(virDomainNumaPtr numa,
> size_t node)
> ATTRIBUTE_NONNULL(1);
> +virBitmapPtr virDomainNumaSetNodeCpumask(virDomainNumaPtr numa,
> + size_t node,
> + virBitmapPtr cpumask)
> + ATTRIBUTE_NONNULL(1);
> unsigned long long virDomainNumaGetNodeMemorySize(virDomainNumaPtr numa,
> size_t node)
> ATTRIBUTE_NONNULL(1);
> @@ -151,7 +174,7 @@ bool virDomainNumatuneNodeSpecified(virDomainNumaPtr numatune,
> int cellid);
>
> int virDomainNumaDefCPUParseXML(virDomainNumaPtr def, xmlXPathContextPtr ctxt);
> -int virDomainNumaDefCPUFormat(virBufferPtr buf, virDomainNumaPtr def);
> +int virDomainNumaDefCPUFormatXML(virBufferPtr buf, virDomainNumaPtr def);
>
> unsigned int virDomainNumaGetCPUCountTotal(virDomainNumaPtr numa);
>
> diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
> index 044510f..e7fb9c0 100644
> --- a/src/libvirt_private.syms
> +++ b/src/libvirt_private.syms
> @@ -703,9 +703,15 @@ virDomainNumaGetMaxCPUID;
> virDomainNumaGetMemorySize;
> virDomainNumaGetNodeCount;
> virDomainNumaGetNodeCpumask;
> +virDomainNumaGetNodeDistance;
> +virDomainNumaGetNodeDistanceCount;
> virDomainNumaGetNodeMemoryAccessMode;
> virDomainNumaGetNodeMemorySize;
> virDomainNumaNew;
> +virDomainNumaSetNodeCount;
> +virDomainNumaSetNodeCpumask;
> +virDomainNumaSetNodeDistance;
> +virDomainNumaSetNodeDistanceCount;
> virDomainNumaSetNodeMemorySize;
> virDomainNumatuneFormatNodeset;
> virDomainNumatuneFormatXML;
>
More information about the libvir-list
mailing list