[libvirt] [PATCH v3] Pin guest to memory node on NUMA system
Shivaprasad bhat
shivaprasadbhat at gmail.com
Tue Dec 3 12:36:12 UTC 2013
Thanks a lot Martin. I have sent v4 for review.
Regards,
Shiva
On Mon, Dec 2, 2013 at 7:55 PM, Martin Kletzander <mkletzan at redhat.com> wrote:
> On Tue, Nov 26, 2013 at 07:59:31PM +0530, Shivaprasad G Bhat wrote:
>> Version 3:
>> Addressed comments on V2.
>>
>> Version 2:
>> Fixed the string formatting errors in v1.
>>
>> The patch contains the fix for defect 1009880 reported at redhat bugzilla.
>> The root cause is, ever since the subcpusets(vcpu,emulator) were introduced, the
>> parent cpuset cannot be modified to remove the nodes that are in use by the
>> subcpusets.
>> The fix is to break the memory node modification into three steps as to assign
>> new nodes into the parent first. Change the nodes in the child nodes. Then
>> remove the old nodes on the parent node.
>>
>> Signed-off-by: Shivaprasad G Bhat <sbhat at linux.vnet.ibm.com>
>> ---
>> src/qemu/qemu_driver.c | 115 ++++++++++++++++++++++++++++++++++++++++++++++--
>> 1 file changed, 110 insertions(+), 5 deletions(-)
>>
>> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
>> index 8a1eefd..4bc9d1d 100644
>> --- a/src/qemu/qemu_driver.c
>> +++ b/src/qemu/qemu_driver.c
>> @@ -8132,6 +8132,47 @@ cleanup:
>> }
>>
>> static int
>> +qemuSetVcpuCpusetMems(virDomainObjPtr vm,
>> + char *nodeset_str)
>> +{
>> + size_t j = 0;
>> + qemuDomainObjPrivatePtr priv = vm->privateData;
>> + virCgroupPtr cgroup_vcpu = NULL;
>> +
>> + for (j = 0; j < priv->nvcpupids; j++) {
>> + if (virCgroupNewVcpu(priv->cgroup, j, false, &cgroup_vcpu) < 0) {
>> + return -1;
>> + }
>> + if (virCgroupSetCpusetMems(cgroup_vcpu, nodeset_str) < 0) {
>> + virCgroupFree(&cgroup_vcpu);
>> + return -1;
>> + }
>> + virCgroupFree(&cgroup_vcpu);
>> + }
>> +
>> + return 0;
>> +}
>> +
>> +static int
>> +qemuSetEmulatorCpusetMems(virDomainObjPtr vm,
>> + char *nodeset_str)
>> +{
>> + qemuDomainObjPrivatePtr priv = vm->privateData;
>> + virCgroupPtr cgroup_emulator = NULL;
>> +
>> + if (virCgroupNewEmulator(priv->cgroup, false, &cgroup_emulator) < 0) {
>> + return -1;
>> + }
>> + if (virCgroupSetCpusetMems(cgroup_emulator, nodeset_str) < 0) {
>> + virCgroupFree(&cgroup_emulator);
>> + return -1;
>> + }
>> + virCgroupFree(&cgroup_emulator);
>> +
>> + return 0;
>> +}
>> +
>
> I suggested to offload this to a different function just in case it is
> used in more places than this one. If it is not then it just adds
> more code.
>
>> +static int
>> qemuDomainSetNumaParameters(virDomainPtr dom,
>> virTypedParameterPtr params,
>> int nparams,
>> @@ -8198,7 +8239,11 @@ qemuDomainSetNumaParameters(virDomainPtr dom,
>> }
>> } else if (STREQ(param->field, VIR_DOMAIN_NUMA_NODESET)) {
>> virBitmapPtr nodeset = NULL;
>> + virBitmapPtr old_nodeset = NULL;
>> + virBitmapPtr temp_nodeset = NULL;
>> char *nodeset_str = NULL;
>> + char *old_nodeset_str = NULL;
>> + char *temp_nodeset_str = NULL;
>>
>> if (virBitmapParse(params[i].value.s,
>> 0, &nodeset,
>> @@ -8208,32 +8253,92 @@ qemuDomainSetNumaParameters(virDomainPtr dom,
>> }
>>
>> if (flags & VIR_DOMAIN_AFFECT_LIVE) {
>> + size_t j;
>> +
>> if (vm->def->numatune.memory.mode !=
>> VIR_DOMAIN_NUMATUNE_MEM_STRICT) {
>> virReportError(VIR_ERR_OPERATION_INVALID, "%s",
>> _("change of nodeset for running domain "
>> "requires strict numa mode"));
>> - virBitmapFree(nodeset);
>> ret = -1;
>> - continue;
>> + goto next;
>> }
>>
>> /* Ensure the cpuset string is formated before passing to cgroup */
>> if (!(nodeset_str = virBitmapFormat(nodeset))) {
>> virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
>> _("Failed to format nodeset"));
>> - virBitmapFree(nodeset);
>> ret = -1;
>> - continue;
>> + goto next;
>> + }
>> +
>> + /*Get Exisitng nodeset values */
>> + if (virCgroupGetCpusetMems(priv->cgroup, &old_nodeset_str) < 0) {
>> + ret = -1;
>> + goto next;
>> + }
>> + if (virBitmapParse(old_nodeset_str, 0, &old_nodeset,
>> + VIR_DOMAIN_CPUMASK_LEN) < 0){
>> + ret = -1;
>> + goto next;
>> + }
>> +
>> + /* Merge the existing and new nodeset values */
>> + if ((temp_nodeset = virBitmapNewCopy(old_nodeset)) == NULL) {
>> + ret = -1;
>> + goto next;
>> + }
>> +
>> + for (j = 0; j < caps->host.nnumaCell; j++) {
>> + bool result;
>> + if (virBitmapGetBit(nodeset, j, &result) < 0) {
>> + virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
>> + _("Failed to get cpuset bit values"));
>> + ret = -1;
>> + goto next;
>> + }
>> + if (result && (virBitmapSetBit(temp_nodeset, j) < 0)) {
>> + virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
>> + _("Failed to set temporary cpuset bit values"));
>> + ret = -1;
>> + goto next;
>> + }
>> + }
>> +
>> + if (!(temp_nodeset_str = virBitmapFormat(temp_nodeset))) {
>> + virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
>> + _("Failed to format nodeset"));
>> + ret = -1;
>> + goto next;
>> + }
>> +
>> + if (virCgroupSetCpusetMems(priv->cgroup, temp_nodeset_str) < 0) {
>> + ret = -1;
>> + goto next;
>> + }
>> +
>> + if (qemuSetVcpuCpusetMems(vm, nodeset_str) ||
>> + qemuSetEmulatorCpusetMems(vm, nodeset_str)) {
>> + ret = -1;
>> + goto next;
>> }
>>
>> if (virCgroupSetCpusetMems(priv->cgroup, nodeset_str) < 0) {
>> virBitmapFree(nodeset);
>> VIR_FREE(nodeset_str);
>> ret = -1;
>> - continue;
>> + goto next;
>> }
>> +next :
>> VIR_FREE(nodeset_str);
>> + VIR_FREE(old_nodeset_str);
>> + virBitmapFree(old_nodeset);
>> + VIR_FREE(temp_nodeset_str);
>> + virBitmapFree(temp_nodeset);
>> + if (ret) {
>> + virBitmapFree(nodeset);
>> + continue;
>> + }
>>
>
> This label makes the code unclean, but when I tried "refactoring" it
> in the way I had on my mind (Free-ing the pointers on start of the
> cycle, changing it to continue, etc.), it looked also ugly. This is
> however perfect piece of code to make it into a function. You can
> then do a 'cleanup:' label there, clean all the memory after it, set
> 'ret = -1' on start and reset it before the label; basically the same
> way we do it everywhere else. The problem is that this for() cycle is
> special in the way that it continues working with other options when a
> problem appears. Sorry I haven't mentioned it at first, but when
> looking at the code today it seems like it should be done that way.
>
> You'll get rid of those "ret = -1; goto next;" thanks to that.
>
> Thanks,
> Martin
More information about the libvir-list
mailing list