[libvirt] [PATCH 2/2] qemu: Migrate memory on numatune change

Thu Mar 19 23:30:26 UTC 2015

On 03/11/2015 08:39 AM, Martin Kletzander wrote:
> We've never set the cpuset.memory_migrate value to anything, keeping it
> on default.  However, we allow changing cpuset.mems on live domain.
> That setting, however, don't have any consequence on a domain unless
> it's going to allocate new memory.
> 
> I managed to make 'virsh numatune' move all the memory to any node I
> wanted even without disabling libnuma's numa_set_membind(), so this
> should be safe to use with it as well.
> 
> Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=1198497
> 
> Signed-off-by: Martin Kletzander <mkletzan at redhat.com>
> ---
>  src/qemu/qemu_cgroup.c | 36 +++++++++++++++++++++++++++++++++++-
>  1 file changed, 35 insertions(+), 1 deletion(-)
> 
> diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c
> index 2b5a182..976a8c4 100644
> --- a/src/qemu/qemu_cgroup.c
> +++ b/src/qemu/qemu_cgroup.c
> @@ -1,7 +1,7 @@
>  /*
>   * qemu_cgroup.c: QEMU cgroup management
>   *
> - * Copyright (C) 2006-2014 Red Hat, Inc.
> + * Copyright (C) 2006-2015 Red Hat, Inc.
>   * Copyright (C) 2006 Daniel P. Berrange
>   *
>   * This library is free software; you can redistribute it and/or
> @@ -652,6 +652,9 @@ qemuSetupCpusetCgroup(virDomainObjPtr vm,
>      if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET))
>          return 0;
> 
> +    if (virCgroupSetCpusetMemoryMigrate(priv->cgroup, true) < 0)
> +        return -1;
> +

Is this OK to set even if NUMA isn't enabled?  I didn't follow the
callers all the way back and this isn't something I know a lot about, so
I'm curious. I do see from the man page it's available since Linux
2.6.16, so one would hope/think that we're OK in assuming we can access
it...

Is it possible that someone wouldn't want to migrate the memory?  That
is should this be a configurable attribute?

>      if (vm->def->cpumask ||
>          (vm->def->placement_mode == VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO)) {
> 
> @@ -792,9 +795,12 @@ static void
>  qemuRestoreCgroupState(virDomainObjPtr vm)
>  {
>      char *mem_mask = NULL;
> +    char *nodeset = NULL;
>      int empty = -1;
>      qemuDomainObjPrivatePtr priv = vm->privateData;
> +    size_t i = 0;
>      virBitmapPtr all_nodes;
> +    virCgroupPtr cgroup_temp = NULL;
> 
>      if (!(all_nodes = virNumaGetHostNodeset()))
>          goto error;
> @@ -809,9 +815,37 @@ qemuRestoreCgroupState(virDomainObjPtr vm)

So this is the path for currently running guests to be adjusted on
livirtd restart after install, right?

>      if (virCgroupSetCpusetMems(priv->cgroup, mem_mask) < 0)
>          goto error;
> 
> +    for (i = 0; i < priv->nvcpupids; i++) {
> +        if (virCgroupNewVcpu(priv->cgroup, i, false, &cgroup_temp) < 0 ||
> +            virCgroupSetCpusetMemoryMigrate(cgroup_temp, true) < 0 ||
> +            virCgroupGetCpusetMems(cgroup_temp, &nodeset) < 0 ||
> +            virCgroupSetCpusetMems(cgroup_temp, nodeset) < 0)
> +            goto cleanup;
> +
> +        virCgroupFree(&cgroup_temp);
> +    }
> +
> +    for (i = 0; i < priv->niothreadpids; i++) {
> +        if (virCgroupNewIOThread(priv->cgroup, i, false, &cgroup_temp) < 0 ||

cgroup iothread id's are 1..n, so I believe this should be 'i + 1'

simple enough to test by adding <iothreads>2</iothreads> to your domain
definition and making sure this works...

Beyond that - seems reasonable to me

John
> +            virCgroupSetCpusetMemoryMigrate(cgroup_temp, true) < 0 ||
> +            virCgroupGetCpusetMems(cgroup_temp, &nodeset) < 0 ||
> +            virCgroupSetCpusetMems(cgroup_temp, nodeset) < 0)
> +            goto cleanup;
> +
> +        virCgroupFree(&cgroup_temp);
> +    }
> +
> +    if (virCgroupNewEmulator(priv->cgroup, false, &cgroup_temp) < 0 ||
> +        virCgroupSetCpusetMemoryMigrate(cgroup_temp, true) < 0 ||
> +        virCgroupGetCpusetMems(cgroup_temp, &nodeset) < 0 ||
> +        virCgroupSetCpusetMems(cgroup_temp, nodeset) < 0)
> +        goto cleanup;
> +
>   cleanup:
>      VIR_FREE(mem_mask);
> +    VIR_FREE(nodeset);
>      virBitmapFree(all_nodes);
> +    virCgroupFree(&cgroup_temp);
>      return;
> 
>   error:
>