[libvirt] [PATCH v3] Pin guest to memory node on NUMA system

Martin Kletzander mkletzan at redhat.com
Mon Dec 2 14:25:56 UTC 2013


On Tue, Nov 26, 2013 at 07:59:31PM +0530, Shivaprasad G Bhat wrote:
> Version 3:
> Addressed comments on V2.
> 
> Version 2:
> Fixed the string formatting errors in v1.
> 
> The patch contains the fix for defect 1009880 reported at redhat bugzilla.
> The root cause is, ever since the subcpusets(vcpu,emulator) were introduced, the
> parent cpuset cannot be modified to remove the nodes that are in use by the
> subcpusets.
> The fix is to break the memory node modification into three steps as to assign
> new nodes into the parent first. Change the nodes in the child nodes. Then
> remove the old nodes on the parent node.
> 
> Signed-off-by: Shivaprasad G Bhat <sbhat at linux.vnet.ibm.com>
> ---
>  src/qemu/qemu_driver.c |  115 ++++++++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 110 insertions(+), 5 deletions(-)
> 
> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
> index 8a1eefd..4bc9d1d 100644
> --- a/src/qemu/qemu_driver.c
> +++ b/src/qemu/qemu_driver.c
> @@ -8132,6 +8132,47 @@ cleanup:
>  }
>  
>  static int
> +qemuSetVcpuCpusetMems(virDomainObjPtr vm,
> +                      char *nodeset_str)
> +{
> +    size_t j = 0;
> +    qemuDomainObjPrivatePtr priv = vm->privateData;
> +    virCgroupPtr cgroup_vcpu = NULL;
> +
> +    for (j = 0; j < priv->nvcpupids; j++) {
> +        if (virCgroupNewVcpu(priv->cgroup, j, false, &cgroup_vcpu) < 0) {
> +            return -1;
> +        }
> +        if (virCgroupSetCpusetMems(cgroup_vcpu, nodeset_str) < 0) {
> +            virCgroupFree(&cgroup_vcpu);
> +            return -1;
> +        }
> +        virCgroupFree(&cgroup_vcpu);
> +    }
> +
> +    return 0;
> +}
> +
> +static int
> +qemuSetEmulatorCpusetMems(virDomainObjPtr vm,
> +                          char *nodeset_str)
> +{
> +    qemuDomainObjPrivatePtr priv = vm->privateData;
> +    virCgroupPtr cgroup_emulator = NULL;
> +
> +    if (virCgroupNewEmulator(priv->cgroup, false, &cgroup_emulator) < 0) {
> +        return -1;
> +    }
> +    if (virCgroupSetCpusetMems(cgroup_emulator, nodeset_str) < 0) {
> +        virCgroupFree(&cgroup_emulator);
> +        return -1;
> +    }
> +    virCgroupFree(&cgroup_emulator);
> +
> +    return 0;
> +}
> +

I suggested to offload this to a different function just in case it is
used in more places than this one.  If it is not then it just adds
more code.

> +static int
>  qemuDomainSetNumaParameters(virDomainPtr dom,
>                              virTypedParameterPtr params,
>                              int nparams,
> @@ -8198,7 +8239,11 @@ qemuDomainSetNumaParameters(virDomainPtr dom,
>              }
>          } else if (STREQ(param->field, VIR_DOMAIN_NUMA_NODESET)) {
>              virBitmapPtr nodeset = NULL;
> +            virBitmapPtr old_nodeset = NULL;
> +            virBitmapPtr temp_nodeset = NULL;
>              char *nodeset_str = NULL;
> +            char *old_nodeset_str = NULL;
> +            char *temp_nodeset_str = NULL;
>  
>              if (virBitmapParse(params[i].value.s,
>                                 0, &nodeset,
> @@ -8208,32 +8253,92 @@ qemuDomainSetNumaParameters(virDomainPtr dom,
>              }
>  
>              if (flags & VIR_DOMAIN_AFFECT_LIVE) {
> +                size_t j;
> +
>                  if (vm->def->numatune.memory.mode !=
>                      VIR_DOMAIN_NUMATUNE_MEM_STRICT) {
>                      virReportError(VIR_ERR_OPERATION_INVALID, "%s",
>                                     _("change of nodeset for running domain "
>                                       "requires strict numa mode"));
> -                    virBitmapFree(nodeset);
>                      ret = -1;
> -                    continue;
> +                    goto next;
>                  }
>  
>                  /* Ensure the cpuset string is formated before passing to cgroup */
>                  if (!(nodeset_str = virBitmapFormat(nodeset))) {
>                      virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
>                                     _("Failed to format nodeset"));
> -                    virBitmapFree(nodeset);
>                      ret = -1;
> -                    continue;
> +                    goto next;
> +                }
> +
> +                /*Get Exisitng nodeset values */
> +                if (virCgroupGetCpusetMems(priv->cgroup, &old_nodeset_str) < 0) {
> +                    ret = -1;
> +                    goto next;
> +                }
> +                if (virBitmapParse(old_nodeset_str, 0, &old_nodeset,
> +                                   VIR_DOMAIN_CPUMASK_LEN) < 0){
> +                    ret = -1;
> +                    goto next;
> +                }
> +
> +                /* Merge the existing and new nodeset values */
> +                if ((temp_nodeset = virBitmapNewCopy(old_nodeset)) == NULL) {
> +                    ret = -1;
> +                    goto next;
> +                }
> +
> +                for (j = 0; j < caps->host.nnumaCell; j++) {
> +                    bool result;
> +                    if (virBitmapGetBit(nodeset, j, &result) < 0) {
> +                        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> +                                       _("Failed to get cpuset bit values"));
> +                        ret = -1;
> +                        goto next;
> +                    }
> +                    if (result && (virBitmapSetBit(temp_nodeset, j) < 0)) {
> +                        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> +                                       _("Failed to set temporary cpuset bit values"));
> +                        ret = -1;
> +                        goto next;
> +                    }
> +                }
> +
> +                if (!(temp_nodeset_str = virBitmapFormat(temp_nodeset))) {
> +                    virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> +                                   _("Failed to format nodeset"));
> +                    ret = -1;
> +                    goto next;
> +                }
> +
> +                if (virCgroupSetCpusetMems(priv->cgroup, temp_nodeset_str) < 0) {
> +                    ret = -1;
> +                    goto next;
> +                }
> +
> +                if (qemuSetVcpuCpusetMems(vm, nodeset_str) ||
> +                    qemuSetEmulatorCpusetMems(vm, nodeset_str)) {
> +                    ret = -1;
> +                    goto next;
>                  }
>  
>                  if (virCgroupSetCpusetMems(priv->cgroup, nodeset_str) < 0) {
>                      virBitmapFree(nodeset);
>                      VIR_FREE(nodeset_str);
>                      ret = -1;
> -                    continue;
> +                    goto next;
>                  }
> +next :
>                  VIR_FREE(nodeset_str);
> +                VIR_FREE(old_nodeset_str);
> +                virBitmapFree(old_nodeset);
> +                VIR_FREE(temp_nodeset_str);
> +                virBitmapFree(temp_nodeset);
> +                if (ret) {
> +                    virBitmapFree(nodeset);
> +                    continue;
> +                }
>  

This label makes the code unclean, but when I tried "refactoring" it
in the way I had on my mind (Free-ing the pointers on start of the
cycle, changing it to continue, etc.), it looked also ugly.  This is
however perfect piece of code to make it into a function.  You can
then do a 'cleanup:' label there, clean all the memory after it, set
'ret = -1' on start and reset it before the label; basically the same
way we do it everywhere else.  The problem is that this for() cycle is
special in the way that it continues working with other options when a
problem appears.  Sorry I haven't mentioned it at first, but when
looking at the code today it seems like it should be done that way.

You'll get rid of those "ret = -1; goto next;" thanks to that.

Thanks,
Martin
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 836 bytes
Desc: Digital signature
URL: <http://listman.redhat.com/archives/libvir-list/attachments/20131202/2e6dc4cb/attachment-0001.sig>


More information about the libvir-list mailing list