[libvirt] [PATCH] Support CPU less NUMA node.
Feng, Shaohe
shaohe.feng at intel.com
Thu Apr 20 11:22:49 UTC 2017
Thanks Peter for the comments.
Will update it in the next version.
On 2017年04月20日 19:17, Peter Krempa wrote:
> On Thu, Apr 20, 2017 at 03:49:29 +0000, Shaohe Feng wrote:
>> Some platform NUMA node does not include cpu, such as Phi.
>>
>> This patch support CPU less NUMA node.
>> But there must be one CPU cell node for a guest.
>> Also must assign the host nodeset for this guest cell node.
> Additionally to the review below:
>
> Did qemu always support this? If not, you need to add code which will
> allow such configuration only if qemu supports this (adding a capability
> bit etc ...).
need to check it.
> Also please elaborat how is this better than adding a regular numa node
> whith cpus which are disabled. In such case you can still add more cpus
> via the hotplug API in case you desire so.
do you means add an extra attribute "disable" for cpus?
such as
<cell id='1' cpus="" cpu_disable="true" "memory='512000' unit='KiB'/>
?
>
>
>> please enable numactl with "--with-numactl" for libvirt config.
>>
>> Test this patch:
>> 1. define a numa cell without "cpus", such as cell 1.
>> <numatune>
>> <memnode cellid='1' mode='strict' nodeset='0'/>
>> </numatune>
>> <cpu>
>> <numa>
>> <cell id='0' cpus='0-1' memory='512000' unit='KiB'/>
>> <cell id='1' memory='512000' unit='KiB'/>
>> </numa>
>> </cpu>
>>
>> libvirt can edit and start the VM successfully.
>>
>> 2. define a numa cell without "cpus", without nodeset.
>> <cpu>
>> <numa>
>> <cell id='0' cpus='0-1' memory='512000' unit='KiB'/>
>> <cell id='1' memory='512000' unit='KiB'/>
> The cpus attribute is not optional in the schema, nor does your patch
> make it optional. This will make suxh XML invalid.
Yes, forget this change.
--- a/docs/schemas/cputypes.rng
+++ b/docs/schemas/cputypes.rng
@@ -110,9 +110,11 @@
<ref name="unsignedInt"/>
</attribute>
</optional>
- <attribute name="cpus">
- <ref name="cpuset"/>
- </attribute>
+ <optional>
+ <attribute name="cpus">
+ <ref name="cpuset"/>
+ </attribute>
+ </optional>
>
>> </numa>
>> </cpu>
>>
>> This case, libvirt will failed to edit the CPUS.
>> ---
>> src/conf/domain_conf.c | 4 +++
>> src/conf/numa_conf.c | 89 ++++++++++++++++++++++++++++++++++---------------
>> src/conf/numa_conf.h | 2 ++
>> src/qemu/qemu_command.c | 12 ++++---
>> src/util/virbitmap.c | 10 ++++--
>> 5 files changed, 83 insertions(+), 34 deletions(-)
> Chhange such as this require test case addition for the XML parser and
> also command line generator.
>
>> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
>> index 705deb3..d6e5489 100644
>> --- a/src/conf/domain_conf.c
>> +++ b/src/conf/domain_conf.c
>> @@ -17330,6 +17330,10 @@ virDomainDefParseXML(xmlDocPtr xml,
>> ctxt) < 0)
>> goto error;
>>
>> + if (virDomainNumaCPULessCheck(def->numa) < 0){
>> + goto error;
>> + }
> You did not run make syntax-check prior to posting this patch. Note that
> all patches must pass this check prior to be pushed. Make sure your next
> posting fixes ALL issues pointed out by the syntax-check tool. I did not
> point them out in this review.
Thanks for reminder.
>
>> +
>> if (virDomainNumatuneHasPlacementAuto(def->numa) &&
>> !def->cpumask && !virDomainDefHasVcpuPin(def) &&
>> !def->cputune.emulatorpin &&
>> diff --git a/src/conf/numa_conf.c b/src/conf/numa_conf.c
>> index bfd3703..1b6f7ff 100644
>> --- a/src/conf/numa_conf.c
>> +++ b/src/conf/numa_conf.c
>> @@ -742,34 +742,31 @@ virDomainNumaDefCPUParseXML(virDomainNumaPtr def,
>> goto cleanup;
>> }
>>
>> - if (!(tmp = virXMLPropString(nodes[i], "cpus"))) {
>> - virReportError(VIR_ERR_XML_ERROR, "%s",
>> - _("Missing 'cpus' attribute in NUMA cell"));
>> - goto cleanup;
>> - }
>> -
>> - if (virBitmapParse(tmp, &def->mem_nodes[cur_cell].cpumask,
>> - VIR_DOMAIN_CPUMASK_LEN) < 0)
>> - goto cleanup;
>> -
>> - if (virBitmapIsAllClear(def->mem_nodes[cur_cell].cpumask)) {
>> - virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
>> - _("NUMA cell %d has no vCPUs assigned"), cur_cell);
>> - goto cleanup;
>> - }
>> - VIR_FREE(tmp);
>> -
>> - for (j = 0; j < n; j++) {
>> - if (j == cur_cell || !def->mem_nodes[j].cpumask)
>> - continue;
>> + tmp = virXMLPropString(nodes[i], "cpus");
>> + if (tmp) {
>> + if (virBitmapParse(tmp, &def->mem_nodes[cur_cell].cpumask,
>> + VIR_DOMAIN_CPUMASK_LEN) < 0)
>> + goto cleanup;
>>
>> - if (virBitmapOverlaps(def->mem_nodes[j].cpumask,
>> - def->mem_nodes[cur_cell].cpumask)) {
>> + if (virBitmapIsAllClear(def->mem_nodes[cur_cell].cpumask)) {
>> virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
>> - _("NUMA cells %u and %zu have overlapping vCPU ids"),
>> - cur_cell, j);
>> + _("NUMA cell %d has no vCPUs assigned"), cur_cell);
>> goto cleanup;
>> }
>> + VIR_FREE(tmp);
>> +
>> + for (j = 0; j < n; j++) {
>> + if (j == cur_cell || !def->mem_nodes[j].cpumask)
>> + continue;
>> +
>> + if (virBitmapOverlaps(def->mem_nodes[j].cpumask,
>> + def->mem_nodes[cur_cell].cpumask)) {
>> + virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
>> + _("NUMA cells %u and %zu have overlapping vCPU ids"),
>> + cur_cell, j);
>> + goto cleanup;
>> + }
>> + }
>> }
>>
>> ctxt->node = nodes[i];
>> @@ -808,6 +805,7 @@ virDomainNumaDefCPUFormat(virBufferPtr buf,
>> char *cpustr;
>> size_t ncells = virDomainNumaGetNodeCount(def);
>> size_t i;
>> + size_t ncpuless = 0;
>>
>> if (ncells == 0)
>> return 0;
>> @@ -817,12 +815,24 @@ virDomainNumaDefCPUFormat(virBufferPtr buf,
>> for (i = 0; i < ncells; i++) {
>> memAccess = virDomainNumaGetNodeMemoryAccessMode(def, i);
>>
>> - if (!(cpustr = virBitmapFormat(virDomainNumaGetNodeCpumask(def, i))))
>> - return -1;
>> + cpustr = virBitmapFormat(virDomainNumaGetNodeCpumask(def, i));
>>
>> virBufferAddLit(buf, "<cell");
>> virBufferAsprintf(buf, " id='%zu'", i);
>> - virBufferAsprintf(buf, " cpus='%s'", cpustr);
>> + if (cpustr && (*cpustr != 0))
>> + virBufferAsprintf(buf, " cpus='%s'", cpustr);
>> + else {
>> + /* Note, at present we only allow cpuless numa node with */
>> + /* nodeset assign. */
>> + ncpuless++;
>> + if (!virDomainNumatuneNodeSpecified(def, i)) {
>> + VIR_FREE(cpustr);
>> + virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
>> + _("CPUless NUMA node cellid %zu must be "
>> + "specified node set."), i);
>> + return -1;
> See below.
>
>> + }
>> + }
>> virBufferAsprintf(buf, " memory='%llu'",
>> virDomainNumaGetNodeMemorySize(def, i));
>> virBufferAddLit(buf, " unit='KiB'");
>> @@ -835,6 +845,12 @@ virDomainNumaDefCPUFormat(virBufferPtr buf,
>> virBufferAdjustIndent(buf, -2);
>> virBufferAddLit(buf, "</numa>\n");
>>
>> + if (ncpuless == ncells) {
>> + virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
>> + _("At lease one NUMA node should "
>> + "include CPU element."));
>> + return -1;
> This is the wrong place for such check. This formats the XML. At this
> point the definition needs to be valid.
yes. so we should check it in parser xml, instead of formats XML.
Thanks.
>
>> + }
>> return 0;
>> }
>>
>> @@ -861,6 +877,7 @@ virDomainNumaGetMaxCPUID(virDomainNumaPtr numa)
>> int bit;
>>
>> bit = virBitmapLastSetBit(virDomainNumaGetNodeCpumask(numa, i));
>> + bit = (bit > 0) ? bit : 0;
> Please use a if-else construct rather than a ternary operator.
OK.
>
>> if (bit > ret)
>> ret = bit;
>> }
>> @@ -973,3 +990,21 @@ virDomainNumaGetMemorySize(virDomainNumaPtr numa)
>>
>> return ret;
>> }
>> +
>> +// NOTE, For some plateforms, there are some node without cpus, such as Phi.
>> +// We just need do some check for these plateforms, we must assigned nodeset
>> +// for CPU less node.
> Single line comments are not allowed in libvirt. Please use /* */
>
>> +int
>> +virDomainNumaCPULessCheck(virDomainNumaPtr numa)
>> +{
>> + int ret = 0;
>> + int i = 0;
>> + for (i = 0; i < numa->nmem_nodes; i++){
>> + if (!numa->mem_nodes[i].cpumask && !numa->mem_nodes[i].nodeset) {
>> + virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
>> + _("Cell: %d is a cpuless numa, must specfiy the nodeset."), i);
>> + return -1;
>> + }
>> + }
>> + return ret;
> ret is pointless.
>
>> +}
>> diff --git a/src/conf/numa_conf.h b/src/conf/numa_conf.h
>> index b6a5354..1f711e6 100644
>> --- a/src/conf/numa_conf.h
>> +++ b/src/conf/numa_conf.h
>> @@ -155,5 +155,7 @@ int virDomainNumaDefCPUFormat(virBufferPtr buf, virDomainNumaPtr def);
>>
>> unsigned int virDomainNumaGetCPUCountTotal(virDomainNumaPtr numa);
>>
>> +int virDomainNumaCPULessCheck(virDomainNumaPtr numa);
>> +
>>
>> #endif /* __NUMA_CONF_H__ */
>> diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
>> index b2e76ca..b07ca66 100644
>> --- a/src/qemu/qemu_command.c
>> +++ b/src/qemu/qemu_command.c
>> @@ -7665,11 +7665,13 @@ qemuBuildNumaArgStr(virQEMUDriverConfigPtr cfg,
>> virCommandAddArg(cmd, "-numa");
>> virBufferAsprintf(&buf, "node,nodeid=%zu", i);
>>
>> - for (tmpmask = cpumask; tmpmask; tmpmask = next) {
>> - if ((next = strchr(tmpmask, ',')))
>> - *(next++) = '\0';
>> - virBufferAddLit(&buf, ",cpus=");
>> - virBufferAdd(&buf, tmpmask, -1);
>> + if (*cpumask != 0) {
> Use the char representation of the nul byte instead of 0.
>
>> + for (tmpmask = cpumask; tmpmask; tmpmask = next) {
>> + if ((next = strchr(tmpmask, ',')))
>> + *(next++) = '\0';
>> + virBufferAddLit(&buf, ",cpus=");
>> + virBufferAdd(&buf, tmpmask, -1);
>> + }
>> }
>>
>> if (needBackend)
>> diff --git a/src/util/virbitmap.c b/src/util/virbitmap.c
>> index eac63d9..b2803ce 100644
>> --- a/src/util/virbitmap.c
>> +++ b/src/util/virbitmap.c
>> @@ -953,6 +953,9 @@ virBitmapLastSetBit(virBitmapPtr bitmap)
>> unsigned long bits;
>>
>> /* If bitmap is empty then there is no set bit */
>> + if (!bitmap)
>> + return -1;
> This change is not justified and also should be in a separate patch.
>
>> +
>> if (bitmap->map_len == 0)
>> return -1;
>>
>> @@ -1039,9 +1042,12 @@ virBitmapCountBits(virBitmapPtr bitmap)
>> size_t i;
>> size_t ret = 0;
>>
>> - for (i = 0; i < bitmap->map_len; i++)
>> - ret += count_one_bits_l(bitmap->map[i]);
>> + if (bitmap == NULL)
>> + return ret;
>>
>> + for (i = 0; i < bitmap->map_len; i++) {
>> + ret += count_one_bits_l(bitmap->map[i]);
>> + }
> Same here. This is for separate patch and also breaks the coding style.
>
> Additionally I disagree with adding NULL checks here. The calling code
> should make sure not to call these.
OK, I will separate and fix them.
>
>> return ret;
>> }
>>
>> --
>> 2.7.4
>>
>> --
>> libvir-list mailing list
>> libvir-list at redhat.com
>> https://www.redhat.com/mailman/listinfo/libvir-list
More information about the libvir-list
mailing list