[libvirt] [PATCH v3] Pin guest to memory node on NUMA system

Shivaprasad bhat shivaprasadbhat at gmail.com
Tue Dec 3 12:36:12 UTC 2013


Thanks a lot Martin. I have sent v4 for review.

Regards,
Shiva

On Mon, Dec 2, 2013 at 7:55 PM, Martin Kletzander <mkletzan at redhat.com> wrote:
> On Tue, Nov 26, 2013 at 07:59:31PM +0530, Shivaprasad G Bhat wrote:
>> Version 3:
>> Addressed comments on V2.
>>
>> Version 2:
>> Fixed the string formatting errors in v1.
>>
>> The patch contains the fix for defect 1009880 reported at redhat bugzilla.
>> The root cause is, ever since the subcpusets(vcpu,emulator) were introduced, the
>> parent cpuset cannot be modified to remove the nodes that are in use by the
>> subcpusets.
>> The fix is to break the memory node modification into three steps as to assign
>> new nodes into the parent first. Change the nodes in the child nodes. Then
>> remove the old nodes on the parent node.
>>
>> Signed-off-by: Shivaprasad G Bhat <sbhat at linux.vnet.ibm.com>
>> ---
>>  src/qemu/qemu_driver.c |  115 ++++++++++++++++++++++++++++++++++++++++++++++--
>>  1 file changed, 110 insertions(+), 5 deletions(-)
>>
>> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
>> index 8a1eefd..4bc9d1d 100644
>> --- a/src/qemu/qemu_driver.c
>> +++ b/src/qemu/qemu_driver.c
>> @@ -8132,6 +8132,47 @@ cleanup:
>>  }
>>
>>  static int
>> +qemuSetVcpuCpusetMems(virDomainObjPtr vm,
>> +                      char *nodeset_str)
>> +{
>> +    size_t j = 0;
>> +    qemuDomainObjPrivatePtr priv = vm->privateData;
>> +    virCgroupPtr cgroup_vcpu = NULL;
>> +
>> +    for (j = 0; j < priv->nvcpupids; j++) {
>> +        if (virCgroupNewVcpu(priv->cgroup, j, false, &cgroup_vcpu) < 0) {
>> +            return -1;
>> +        }
>> +        if (virCgroupSetCpusetMems(cgroup_vcpu, nodeset_str) < 0) {
>> +            virCgroupFree(&cgroup_vcpu);
>> +            return -1;
>> +        }
>> +        virCgroupFree(&cgroup_vcpu);
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static int
>> +qemuSetEmulatorCpusetMems(virDomainObjPtr vm,
>> +                          char *nodeset_str)
>> +{
>> +    qemuDomainObjPrivatePtr priv = vm->privateData;
>> +    virCgroupPtr cgroup_emulator = NULL;
>> +
>> +    if (virCgroupNewEmulator(priv->cgroup, false, &cgroup_emulator) < 0) {
>> +        return -1;
>> +    }
>> +    if (virCgroupSetCpusetMems(cgroup_emulator, nodeset_str) < 0) {
>> +        virCgroupFree(&cgroup_emulator);
>> +        return -1;
>> +    }
>> +    virCgroupFree(&cgroup_emulator);
>> +
>> +    return 0;
>> +}
>> +
>
> I suggested to offload this to a different function just in case it is
> used in more places than this one.  If it is not then it just adds
> more code.
>
>> +static int
>>  qemuDomainSetNumaParameters(virDomainPtr dom,
>>                              virTypedParameterPtr params,
>>                              int nparams,
>> @@ -8198,7 +8239,11 @@ qemuDomainSetNumaParameters(virDomainPtr dom,
>>              }
>>          } else if (STREQ(param->field, VIR_DOMAIN_NUMA_NODESET)) {
>>              virBitmapPtr nodeset = NULL;
>> +            virBitmapPtr old_nodeset = NULL;
>> +            virBitmapPtr temp_nodeset = NULL;
>>              char *nodeset_str = NULL;
>> +            char *old_nodeset_str = NULL;
>> +            char *temp_nodeset_str = NULL;
>>
>>              if (virBitmapParse(params[i].value.s,
>>                                 0, &nodeset,
>> @@ -8208,32 +8253,92 @@ qemuDomainSetNumaParameters(virDomainPtr dom,
>>              }
>>
>>              if (flags & VIR_DOMAIN_AFFECT_LIVE) {
>> +                size_t j;
>> +
>>                  if (vm->def->numatune.memory.mode !=
>>                      VIR_DOMAIN_NUMATUNE_MEM_STRICT) {
>>                      virReportError(VIR_ERR_OPERATION_INVALID, "%s",
>>                                     _("change of nodeset for running domain "
>>                                       "requires strict numa mode"));
>> -                    virBitmapFree(nodeset);
>>                      ret = -1;
>> -                    continue;
>> +                    goto next;
>>                  }
>>
>>                  /* Ensure the cpuset string is formated before passing to cgroup */
>>                  if (!(nodeset_str = virBitmapFormat(nodeset))) {
>>                      virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
>>                                     _("Failed to format nodeset"));
>> -                    virBitmapFree(nodeset);
>>                      ret = -1;
>> -                    continue;
>> +                    goto next;
>> +                }
>> +
>> +                /*Get Exisitng nodeset values */
>> +                if (virCgroupGetCpusetMems(priv->cgroup, &old_nodeset_str) < 0) {
>> +                    ret = -1;
>> +                    goto next;
>> +                }
>> +                if (virBitmapParse(old_nodeset_str, 0, &old_nodeset,
>> +                                   VIR_DOMAIN_CPUMASK_LEN) < 0){
>> +                    ret = -1;
>> +                    goto next;
>> +                }
>> +
>> +                /* Merge the existing and new nodeset values */
>> +                if ((temp_nodeset = virBitmapNewCopy(old_nodeset)) == NULL) {
>> +                    ret = -1;
>> +                    goto next;
>> +                }
>> +
>> +                for (j = 0; j < caps->host.nnumaCell; j++) {
>> +                    bool result;
>> +                    if (virBitmapGetBit(nodeset, j, &result) < 0) {
>> +                        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
>> +                                       _("Failed to get cpuset bit values"));
>> +                        ret = -1;
>> +                        goto next;
>> +                    }
>> +                    if (result && (virBitmapSetBit(temp_nodeset, j) < 0)) {
>> +                        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
>> +                                       _("Failed to set temporary cpuset bit values"));
>> +                        ret = -1;
>> +                        goto next;
>> +                    }
>> +                }
>> +
>> +                if (!(temp_nodeset_str = virBitmapFormat(temp_nodeset))) {
>> +                    virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
>> +                                   _("Failed to format nodeset"));
>> +                    ret = -1;
>> +                    goto next;
>> +                }
>> +
>> +                if (virCgroupSetCpusetMems(priv->cgroup, temp_nodeset_str) < 0) {
>> +                    ret = -1;
>> +                    goto next;
>> +                }
>> +
>> +                if (qemuSetVcpuCpusetMems(vm, nodeset_str) ||
>> +                    qemuSetEmulatorCpusetMems(vm, nodeset_str)) {
>> +                    ret = -1;
>> +                    goto next;
>>                  }
>>
>>                  if (virCgroupSetCpusetMems(priv->cgroup, nodeset_str) < 0) {
>>                      virBitmapFree(nodeset);
>>                      VIR_FREE(nodeset_str);
>>                      ret = -1;
>> -                    continue;
>> +                    goto next;
>>                  }
>> +next :
>>                  VIR_FREE(nodeset_str);
>> +                VIR_FREE(old_nodeset_str);
>> +                virBitmapFree(old_nodeset);
>> +                VIR_FREE(temp_nodeset_str);
>> +                virBitmapFree(temp_nodeset);
>> +                if (ret) {
>> +                    virBitmapFree(nodeset);
>> +                    continue;
>> +                }
>>
>
> This label makes the code unclean, but when I tried "refactoring" it
> in the way I had on my mind (Free-ing the pointers on start of the
> cycle, changing it to continue, etc.), it looked also ugly.  This is
> however perfect piece of code to make it into a function.  You can
> then do a 'cleanup:' label there, clean all the memory after it, set
> 'ret = -1' on start and reset it before the label; basically the same
> way we do it everywhere else.  The problem is that this for() cycle is
> special in the way that it continues working with other options when a
> problem appears.  Sorry I haven't mentioned it at first, but when
> looking at the code today it seems like it should be done that way.
>
> You'll get rid of those "ret = -1; goto next;" thanks to that.
>
> Thanks,
> Martin




More information about the libvir-list mailing list