[libvirt] [PATCH v3] Pin guest to memory node on NUMA system

Shivaprasad G Bhat shivaprasadbhat at gmail.com
Tue Nov 26 14:29:31 UTC 2013


Version 3:
Addressed comments on V2.

Version 2:
Fixed the string formatting errors in v1.

The patch contains the fix for defect 1009880 reported at redhat bugzilla.
The root cause is, ever since the subcpusets(vcpu,emulator) were introduced, the
parent cpuset cannot be modified to remove the nodes that are in use by the
subcpusets.
The fix is to break the memory node modification into three steps as to assign
new nodes into the parent first. Change the nodes in the child nodes. Then
remove the old nodes on the parent node.

Signed-off-by: Shivaprasad G Bhat <sbhat at linux.vnet.ibm.com>
---
 src/qemu/qemu_driver.c |  115 ++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 110 insertions(+), 5 deletions(-)

diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index 8a1eefd..4bc9d1d 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -8132,6 +8132,47 @@ cleanup:
 }
 
 static int
+qemuSetVcpuCpusetMems(virDomainObjPtr vm,
+                      char *nodeset_str)
+{
+    size_t j = 0;
+    qemuDomainObjPrivatePtr priv = vm->privateData;
+    virCgroupPtr cgroup_vcpu = NULL;
+
+    for (j = 0; j < priv->nvcpupids; j++) {
+        if (virCgroupNewVcpu(priv->cgroup, j, false, &cgroup_vcpu) < 0) {
+            return -1;
+        }
+        if (virCgroupSetCpusetMems(cgroup_vcpu, nodeset_str) < 0) {
+            virCgroupFree(&cgroup_vcpu);
+            return -1;
+        }
+        virCgroupFree(&cgroup_vcpu);
+    }
+
+    return 0;
+}
+
+static int
+qemuSetEmulatorCpusetMems(virDomainObjPtr vm,
+                          char *nodeset_str)
+{
+    qemuDomainObjPrivatePtr priv = vm->privateData;
+    virCgroupPtr cgroup_emulator = NULL;
+
+    if (virCgroupNewEmulator(priv->cgroup, false, &cgroup_emulator) < 0) {
+        return -1;
+    }
+    if (virCgroupSetCpusetMems(cgroup_emulator, nodeset_str) < 0) {
+        virCgroupFree(&cgroup_emulator);
+        return -1;
+    }
+    virCgroupFree(&cgroup_emulator);
+
+    return 0;
+}
+
+static int
 qemuDomainSetNumaParameters(virDomainPtr dom,
                             virTypedParameterPtr params,
                             int nparams,
@@ -8198,7 +8239,11 @@ qemuDomainSetNumaParameters(virDomainPtr dom,
             }
         } else if (STREQ(param->field, VIR_DOMAIN_NUMA_NODESET)) {
             virBitmapPtr nodeset = NULL;
+            virBitmapPtr old_nodeset = NULL;
+            virBitmapPtr temp_nodeset = NULL;
             char *nodeset_str = NULL;
+            char *old_nodeset_str = NULL;
+            char *temp_nodeset_str = NULL;
 
             if (virBitmapParse(params[i].value.s,
                                0, &nodeset,
@@ -8208,32 +8253,92 @@ qemuDomainSetNumaParameters(virDomainPtr dom,
             }
 
             if (flags & VIR_DOMAIN_AFFECT_LIVE) {
+                size_t j;
+
                 if (vm->def->numatune.memory.mode !=
                     VIR_DOMAIN_NUMATUNE_MEM_STRICT) {
                     virReportError(VIR_ERR_OPERATION_INVALID, "%s",
                                    _("change of nodeset for running domain "
                                      "requires strict numa mode"));
-                    virBitmapFree(nodeset);
                     ret = -1;
-                    continue;
+                    goto next;
                 }
 
                 /* Ensure the cpuset string is formated before passing to cgroup */
                 if (!(nodeset_str = virBitmapFormat(nodeset))) {
                     virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                                    _("Failed to format nodeset"));
-                    virBitmapFree(nodeset);
                     ret = -1;
-                    continue;
+                    goto next;
+                }
+
+                /*Get Exisitng nodeset values */
+                if (virCgroupGetCpusetMems(priv->cgroup, &old_nodeset_str) < 0) {
+                    ret = -1;
+                    goto next;
+                }
+                if (virBitmapParse(old_nodeset_str, 0, &old_nodeset,
+                                   VIR_DOMAIN_CPUMASK_LEN) < 0){
+                    ret = -1;
+                    goto next;
+                }
+
+                /* Merge the existing and new nodeset values */
+                if ((temp_nodeset = virBitmapNewCopy(old_nodeset)) == NULL) {
+                    ret = -1;
+                    goto next;
+                }
+
+                for (j = 0; j < caps->host.nnumaCell; j++) {
+                    bool result;
+                    if (virBitmapGetBit(nodeset, j, &result) < 0) {
+                        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+                                       _("Failed to get cpuset bit values"));
+                        ret = -1;
+                        goto next;
+                    }
+                    if (result && (virBitmapSetBit(temp_nodeset, j) < 0)) {
+                        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+                                       _("Failed to set temporary cpuset bit values"));
+                        ret = -1;
+                        goto next;
+                    }
+                }
+
+                if (!(temp_nodeset_str = virBitmapFormat(temp_nodeset))) {
+                    virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+                                   _("Failed to format nodeset"));
+                    ret = -1;
+                    goto next;
+                }
+
+                if (virCgroupSetCpusetMems(priv->cgroup, temp_nodeset_str) < 0) {
+                    ret = -1;
+                    goto next;
+                }
+
+                if (qemuSetVcpuCpusetMems(vm, nodeset_str) ||
+                    qemuSetEmulatorCpusetMems(vm, nodeset_str)) {
+                    ret = -1;
+                    goto next;
                 }
 
                 if (virCgroupSetCpusetMems(priv->cgroup, nodeset_str) < 0) {
                     virBitmapFree(nodeset);
                     VIR_FREE(nodeset_str);
                     ret = -1;
-                    continue;
+                    goto next;
                 }
+next :
                 VIR_FREE(nodeset_str);
+                VIR_FREE(old_nodeset_str);
+                virBitmapFree(old_nodeset);
+                VIR_FREE(temp_nodeset_str);
+                virBitmapFree(temp_nodeset);
+                if (ret) {
+                    virBitmapFree(nodeset);
+                    continue;
+                }
 
                 /* update vm->def here so that dumpxml can read the new
                  * values from vm->def. */




More information about the libvir-list mailing list