[libvirt] [PATCH 04/10] qemu: Implement period and quota tunable XML configuration and parsing.

Wen Congyang wency at cn.fujitsu.com
Thu Jun 30 07:54:53 UTC 2011


At 06/30/2011 11:09 AM, Wen Congyang Write:
> ---
>  src/conf/domain_conf.c                          |  272 ++++++++++++++++++++++-
>  src/conf/domain_conf.h                          |   25 ++
>  src/libvirt_private.syms                        |    4 +
>  src/qemu/qemu_cgroup.c                          |  131 +++++++++++
>  src/qemu/qemu_cgroup.h                          |    2 +
>  src/qemu/qemu_process.c                         |    4 +
>  tests/qemuxml2argvdata/qemuxml2argv-cputune.xml |    2 +
>  7 files changed, 438 insertions(+), 2 deletions(-)
> 
> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
> index 60e0318..0a1f973 100644
> --- a/src/conf/domain_conf.c
> +++ b/src/conf/domain_conf.c
> @@ -997,6 +997,21 @@ virDomainVcpuPinDefFree(virDomainVcpuPinDefPtr *def,
>      VIR_FREE(def);
>  }
>  
> +static void
> +virDomainVcpuBWDefFree(virDomainVcpuBWDefPtr *def,
> +                       int nvcpubw)
> +{
> +    int i;
> +
> +    if (!def || !nvcpubw)
> +        return;
> +
> +    for(i = 0; i < nvcpubw; i++)
> +        VIR_FREE(def[i]);
> +
> +    VIR_FREE(def);
> +}
> +
>  void virDomainDefFree(virDomainDefPtr def)
>  {
>      unsigned int i;
> @@ -1089,6 +1104,9 @@ void virDomainDefFree(virDomainDefPtr def)
>  
>      virCPUDefFree(def->cpu);
>  
> +    virDomainVcpuBWDefFree(def->cputune.vcpubw,
> +                           def->cputune.nvcpubw);
> +
>      virDomainVcpuPinDefFree(def->cputune.vcpupin, def->cputune.nvcpupin);
>  
>      VIR_FREE(def->numatune.memory.nodemask);
> @@ -5715,6 +5733,62 @@ error:
>      goto cleanup;
>  }
>  
> +/* Parse the XML definition for a vcpubandwidth */
> +static virDomainVcpuBWDefPtr
> +virDomainVcpuBWDefParseXML(const xmlNodePtr node,
> +                           xmlXPathContextPtr ctxt,
> +                           int maxvcpus)
> +{
> +    virDomainVcpuBWDefPtr def;
> +    xmlNodePtr oldnode = ctxt->node;
> +    unsigned int vcpuid;
> +    unsigned long long period;
> +    long long quota;
> +    int ret;
> +
> +    if (VIR_ALLOC(def) < 0) {
> +        virReportOOMError();
> +        return NULL;
> +    }
> +
> +    ctxt->node = node;
> +
> +    ret = virXPathUInt("string(./@vcpu)", ctxt, &vcpuid);
> +    if (ret == -2) {
> +        virDomainReportError(VIR_ERR_INTERNAL_ERROR,
> +                             "%s", _("vcpu id must be an unsigned integer"));
> +        goto error;
> +    } else if (ret == -1) {
> +        virDomainReportError(VIR_ERR_INTERNAL_ERROR,
> +                             "%s", _("can't parse vcpupin node"));
> +        goto error;
> +    }
> +
> +    if (vcpuid >= maxvcpus) {
> +        virDomainReportError(VIR_ERR_INTERNAL_ERROR,
> +                             "%s", _("vcpu id must be less than maxvcpus"));
> +        goto error;
> +    }
> +
> +    if (virXPathULongLong("string(./@period)", ctxt, &period) < 0)
> +        period = 0;
> +
> +    if (virXPathLongLong("string(./@quota)", ctxt, &quota) < 0)
> +        quota = 0;
> +
> +    def->vcpuid = vcpuid;
> +    def->period = period;
> +    def->quota = quota;
> +
> +cleanup:
> +    ctxt->node = oldnode;
> +    return def;
> +
> +error:
> +    VIR_FREE(def);
> +    goto cleanup;
> +}
> +
>  
>  static virDomainDefPtr virDomainDefParseXML(virCapsPtr caps,
>                                              xmlDocPtr xml,
> @@ -5881,6 +5955,49 @@ static virDomainDefPtr virDomainDefParseXML(virCapsPtr caps,
>                        &def->cputune.shares) < 0)
>          def->cputune.shares = 0;
>  
> +    if ((n = virXPathNodeSet("./cputune/bandwidth", ctxt, &nodes)) < 0)
> +        goto error;
> +
> +    if (n > def->maxvcpus) {
> +        virDomainReportError(VIR_ERR_INTERNAL_ERROR,
> +                             "%s", _("bandwith nodes must be less than"
> +                                     " maxvcpus"));
> +        goto error;
> +    }
> +
> +    if (n && VIR_ALLOC_N(def->cputune.vcpubw, n) < 0)
> +        goto no_memory;
> +
> +    for (i = 0; i < n; i++) {
> +        virDomainVcpuBWDefPtr vcpubw = NULL;
> +        vcpubw = virDomainVcpuBWDefParseXML(nodes[i], ctxt, def->maxvcpus);
> +
> +        if (!vcpubw)
> +            goto error;
> +
> +        if (virDomainVcpuBWIsDuplicate(def->cputune.vcpubw,
> +                                       def->cputune.nvcpubw,
> +                                       vcpubw->vcpuid)) {
> +            virDomainReportError(VIR_ERR_INTERNAL_ERROR,
> +                                 "%s", _("duplicate vcpubandwidth for same"
> +                                         " vcpu"));
> +            VIR_FREE(vcpubw);
> +            goto error;
> +        }
> +
> +        if (vcpubw->period || vcpubw->quota)
> +            def->cputune.vcpubw[def->cputune.nvcpubw++] = vcpubw;
> +        else
> +            VIR_FREE(vcpubw);
> +    }
> +    if (def->cputune.nvcpubw)
> +        ignore_value(VIR_REALLOC_N(def->cputune.vcpubw,
> +                                   def->cputune.nvcpubw));
> +    else
> +        VIR_FREE(def->cputune.vcpubw);
> +
> +    VIR_FREE(nodes);
> +
>      if ((n = virXPathNodeSet("./cputune/vcpupin", ctxt, &nodes)) < 0) {
>          goto error;
>      }
> @@ -8274,6 +8391,144 @@ virDomainVcpuPinDel(virDomainDefPtr def, int vcpu)
>      return 0;
>  }
>  
> +/* Check if vcpupin with same vcpuid already exists.
> + * Return 1 if exists, 0 if not. */
> +int
> +virDomainVcpuBWIsDuplicate(virDomainVcpuBWDefPtr *def,
> +                           int nvcpubw,
> +                           int vcpu)
> +{
> +    int i;
> +
> +    if (!def || !nvcpubw)
> +        return 0;
> +
> +    for (i = 0; i < nvcpubw; i++) {
> +        if (def[i]->vcpuid == vcpu)
> +            return 1;
> +    }
> +
> +    return 0;
> +}
> +
> +virDomainVcpuBWDefPtr
> +virDomainVcpuBWFindByVcpu(virDomainVcpuBWDefPtr *def,
> +                          int nvcpubw,
> +                          int vcpu)
> +{
> +    int i;
> +
> +    if (!def || !nvcpubw)
> +        return NULL;
> +
> +    for (i = 0; i < nvcpubw; i++) {
> +        if (def[i]->vcpuid == vcpu)
> +            return def[i];
> +    }
> +
> +    return NULL;
> +}
> +
> +int
> +virDomainVcpuBWAdd(virDomainDefPtr def,
> +                   unsigned long long period,
> +                   long long quota,
> +                   int vcpu)
> +{
> +    virDomainVcpuBWDefPtr *vcpubw_list = NULL;
> +    virDomainVcpuBWDefPtr vcpubw = NULL;
> +
> +    /* No vcpubw exists yet. */
> +    if (!def->cputune.nvcpubw) {
> +        if (period == 0 && quota == 0)
> +            return 0;
> +
> +        if (VIR_ALLOC(vcpubw) < 0)
> +            goto no_memory;
> +
> +        if (VIR_ALLOC(vcpubw_list) < 0)
> +            goto no_memory;
> +
> +        vcpubw->vcpuid = vcpu;
> +        vcpubw->period = period;
> +        vcpubw->quota = quota;
> +        vcpubw_list[def->cputune.nvcpubw++] = vcpubw;
> +
> +        def->cputune.vcpubw = vcpubw_list;
> +    } else {
> +        int nvcpubw = def->cputune.nvcpubw;
> +        vcpubw_list = def->cputune.vcpubw;
> +        if (virDomainVcpuBWIsDuplicate(vcpubw_list, nvcpubw, vcpu)) {
> +            vcpubw = virDomainVcpuBWFindByVcpu(vcpubw_list, nvcpubw, vcpu);
> +            if (period == 0 && quota == 0) {
> +                return virDomainVcpuBWDel(def, vcpu);
> +            } else {
> +                vcpubw->vcpuid = vcpu;
> +                vcpubw->period = period;
> +                vcpubw->quota = quota;
> +            }
> +        } else {
> +            if (period == 0 && quota == 0)
> +                return 0;
> +
> +            if (VIR_ALLOC(vcpubw) < 0)
> +                goto no_memory;
> +
> +            if (VIR_REALLOC_N(vcpubw_list, nvcpubw + 1) < 0)
> +                goto no_memory;
> +
> +            vcpubw->vcpuid = vcpu;
> +            vcpubw->period = period;
> +            vcpubw->quota = quota;
> +            vcpubw_list[def->cputune.nvcpubw++] = vcpubw;
> +       }
> +    }
> +
> +    return 0;
> +
> +no_memory:
> +    virReportOOMError();
> +    VIR_FREE(vcpubw);
> +    return -1;
> +}
> +
> +int
> +virDomainVcpuBWDel(virDomainDefPtr def, int vcpu)
> +{
> +    int n;
> +    bool deleted = false;
> +    virDomainVcpuBWDefPtr *vcpubw_list = def->cputune.vcpubw;
> +
> +    /* No vcpubw exists yet */
> +    if (!def->cputune.nvcpubw)
> +        return 0;
> +
> +    for (n = 0; n < def->cputune.nvcpubw; n++) {
> +        if (vcpubw_list[n]->vcpuid == vcpu) {
> +            VIR_FREE(vcpubw_list[n]);
> +            memmove(&vcpubw_list[n], &vcpubw_list[n+1],
> +                    (def->cputune.nvcpubw - n - 1) *
> +                    sizeof(virDomainVcpuBWDefPtr));
> +            deleted = true;
> +            break;
> +        }
> +    }
> +
> +    if (!deleted)
> +        return 0;
> +
> +    if (--def->cputune.nvcpubw == 0) {
> +        VIR_FREE(def->cputune.vcpubw);
> +    } else {
> +        if (VIR_REALLOC_N(def->cputune.vcpubw,
> +                          def->cputune.nvcpubw) < 0) {
> +            /* ignore, harmless */
> +        }
> +    }
> +
> +    return 0;
> +}
> +
>  static int
>  virDomainLifecycleDefFormat(virBufferPtr buf,
>                              int type,
> @@ -9553,12 +9808,24 @@ char *virDomainDefFormat(virDomainDefPtr def,
>          virBufferAsprintf(&buf, " current='%u'", def->vcpus);
>      virBufferAsprintf(&buf, ">%u</vcpu>\n", def->maxvcpus);
>  
> -    if (def->cputune.shares || def->cputune.vcpupin)
> +    if (def->cputune.shares || def->cputune.vcpupin ||
> +        def->cputune.vcpubw)
>          virBufferAddLit(&buf, "  <cputune>\n");
>  
>      if (def->cputune.shares)
>          virBufferAsprintf(&buf, "    <shares>%lu</shares>\n",
>                            def->cputune.shares);
> +    if (def->cputune.vcpubw) {
> +        int i;
> +        for (i = 0; i < def->cputune.nvcpubw; i++) {
> +            virBufferAsprintf(&buf, "    <bandwidth vcpu='%u' ",
> +                              def->cputune.vcpubw[i]->vcpuid);
> +            virBufferAsprintf(&buf, "period='%llu' ",
> +                              def->cputune.vcpubw[i]->period);
> +            virBufferAsprintf(&buf, "quota='%lld'/>\n",
> +                              def->cputune.vcpubw[i]->quota);
> +        }
> +    }
>      if (def->cputune.vcpupin) {
>          int i;
>          for (i = 0; i < def->cputune.nvcpupin; i++) {
> @@ -9580,7 +9847,8 @@ char *virDomainDefFormat(virDomainDefPtr def,
>          }
>      }
>  
> -    if (def->cputune.shares || def->cputune.vcpupin)
> +    if (def->cputune.shares || def->cputune.vcpupin ||
> +        def->cputune.vcpubw)
>          virBufferAddLit(&buf, "  </cputune>\n");
>  
>      if (def->numatune.memory.nodemask)
> diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
> index e81977c..a2929b5 100644
> --- a/src/conf/domain_conf.h
> +++ b/src/conf/domain_conf.h
> @@ -1108,6 +1108,14 @@ struct _virDomainVcpuPinDef {
>      char *cpumask;
>  };
>  
> +typedef struct _virDomainVcpuBWDef virDomainVcpuBWDef;
> +typedef virDomainVcpuBWDef *virDomainVcpuBWDefPtr;
> +struct _virDomainVcpuBWDef {
> +    int vcpuid;
> +    unsigned long long period;
> +    long long quota;
> +};
> +
>  int virDomainVcpuPinIsDuplicate(virDomainVcpuPinDefPtr *def,
>                                  int nvcpupin,
>                                  int vcpu);
> @@ -1116,6 +1124,14 @@ virDomainVcpuPinDefPtr virDomainVcpuPinFindByVcpu(virDomainVcpuPinDefPtr *def,
>                                                    int nvcpupin,
>                                                    int vcpu);
>  
> +int virDomainVcpuBWIsDuplicate(virDomainVcpuBWDefPtr *def,
> +                               int nvcpubw,
> +                               int vcpu);
> +
> +virDomainVcpuBWDefPtr virDomainVcpuBWFindByVcpu(virDomainVcpuBWDefPtr *def,
> +                                                int nvcpubw,
> +                                                int vcpu);
> +
>  enum virDomainNumatuneMemMode {
>      VIR_DOMAIN_NUMATUNE_MEM_STRICT,
>      VIR_DOMAIN_NUMATUNE_MEM_PREFERRED,
> @@ -1170,6 +1186,8 @@ struct _virDomainDef {
>  
>      struct {
>          unsigned long shares;
> +        int nvcpubw;
> +        virDomainVcpuBWDefPtr *vcpubw;
>          int nvcpupin;
>          virDomainVcpuPinDefPtr *vcpupin;
>      } cputune;
> @@ -1413,6 +1431,13 @@ int virDomainVcpuPinAdd(virDomainDefPtr def,
>  
>  int virDomainVcpuPinDel(virDomainDefPtr def, int vcpu);
>  
> +int virDomainVcpuBWAdd(virDomainDefPtr def,
> +                       unsigned long long period,
> +                       long long quota,
> +                       int vcpu);
> +
> +int virDomainVcpuBWDel(virDomainDefPtr def, int vcpu);
> +
>  int virDomainDiskIndexByName(virDomainDefPtr def, const char *name);
>  int virDomainDiskInsert(virDomainDefPtr def,
>                          virDomainDiskDefPtr disk);
> diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
> index 9b9b6ce..aad0c3a 100644
> --- a/src/libvirt_private.syms
> +++ b/src/libvirt_private.syms
> @@ -371,6 +371,10 @@ virDomainTimerTickpolicyTypeFromString;
>  virDomainTimerTickpolicyTypeToString;
>  virDomainTimerTrackTypeFromString;
>  virDomainTimerTrackTypeToString;
> +virDomainVcpuBWAdd;
> +virDomainVcpuBWDel;
> +virDomainVcpuBWFindByVcpu;
> +virDomainVcpuBWIsDuplicate;
>  virDomainVcpuPinAdd;
>  virDomainVcpuPinDel;
>  virDomainVcpuPinFindByVcpu;
> diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c
> index 1298924..201c0b8 100644
> --- a/src/qemu/qemu_cgroup.c
> +++ b/src/qemu/qemu_cgroup.c
> @@ -24,6 +24,7 @@
>  #include <config.h>
>  
>  #include "qemu_cgroup.h"
> +#include "qemu_domain.h"
>  #include "cgroup.h"
>  #include "logging.h"
>  #include "memory.h"
> @@ -376,6 +377,136 @@ cleanup:
>      return -1;
>  }
>  
> +int qemuSetupCgroupVcpuBW(virCgroupPtr cgroup, virDomainVcpuBWDefPtr vcpubw)
> +{
> +    int rc;
> +    unsigned long long old_period;
> +
> +    if (!vcpubw)
> +        return 0;
> +
> +    if (vcpubw->period == 0 && vcpubw->quota == 0)
> +        return 0;
> +
> +    if (vcpubw->period) {
> +        /* get old period, and we can rollback if set quota failed */
> +        rc = virCgroupGetCpuCfsPeriod(cgroup, &old_period);
> +        if (rc < 0) {
> +            virReportSystemError(-rc,
> +                                 _("%s"), "Unable to get cpu bandwidth period");
> +            return -1;
> +        }
> +
> +        rc = virCgroupSetCpuCfsPeriod(cgroup, vcpubw->period);
> +        if (rc < 0) {
> +            virReportSystemError(-rc,
> +                                 _("%s"), "Unable to set cpu bandwidth period");
> +            return -1;
> +        }
> +    }
> +
> +    if (vcpubw->quota) {
> +        rc = virCgroupSetCpuCfsQuota(cgroup, vcpubw->quota);
> +        if (rc < 0) {
> +            virReportSystemError(-rc,
> +                                 _("%s"), "Unable to set cpu bandwidth quota");
> +            goto cleanup;
> +        }
> +    }
> +
> +    return 0;
> +
> +cleanup:
> +    if (vcpubw->period) {
> +        rc = virCgroupSetCpuCfsPeriod(cgroup, old_period);
> +        if (rc < 0)
> +            virReportSystemError(-rc,
> +                                 _("%s"),
> +                                 "Unable to rollback cpu bandwidth period");
> +    }
> +
> +    return -1;
> +}
> +
> +int qemuSetupCgroupForVcpu(struct qemud_driver *driver, virDomainObjPtr vm)
> +{
> +    virCgroupPtr cgroup = NULL;
> +    virCgroupPtr cgroup_vcpu = NULL;
> +    qemuDomainObjPrivatePtr priv = vm->privateData;
> +    int rc;
> +    unsigned int i;
> +    virDomainVcpuBWDefPtr *vcpubw_list = vm->def->cputune.vcpubw;
> +    virDomainVcpuBWDefPtr vcpubw = NULL;
> +    int nvcpubw = vm->def->cputune.nvcpubw;
> +
> +    if (driver->cgroup == NULL)
> +        return 0; /* Not supported, so claim success */
> +
> +    rc = virCgroupForDomain(driver->cgroup, vm->def->name, &cgroup, 0);
> +    if (rc != 0) {
> +        virReportSystemError(-rc,
> +                             _("Unable to find cgroup for %s"),
> +                             vm->def->name);
> +        goto cleanup;
> +    }
> +
> +    if (priv->nvcpupids == 0 || priv->vcpupids[0] == vm->pid) {
> +        /* If we does not know VCPU<->PID mapping or all vcpu runs in the same
> +         * thread, we can not control each vcpu. So just use the last config.
> +         */
> +        if (vcpubw_list) {
> +            if (qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) {
> +                if (qemuSetupCgroupVcpuBW(cgroup, vcpubw_list[nvcpubw - 1]) < 0)
> +                    goto cleanup;
> +            }
> +        }
> +        return 0;
> +    }
> +
> +    for (i = 0; i < priv->nvcpupids; i++) {
> +        rc = virCgroupForVcpu(cgroup, i, &cgroup_vcpu, 1);
> +        if (rc < 0) {
> +            virReportSystemError(-rc,
> +                                 _("Unable to create vcpu cgroup for %s(vcpu:"
> +                                   " %d)"),
> +                                 vm->def->name, i);
> +            goto cleanup;
> +        }
> +
> +        /* move the thread for vcpu to sub dir */
> +        rc = virCgroupAddTask(cgroup_vcpu, priv->vcpupids[i]);
> +        if (rc < 0) {
> +            virReportSystemError(-rc,
> +                                 _("unable to add vcpu %d task %d to cgroup"),
> +                                 i, priv->vcpupids[i]);
> +            goto cleanup;
> +        }
> +
> +        if (vcpubw_list) {
> +            if (qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) {
> +                vcpubw = virDomainVcpuBWFindByVcpu(vcpubw_list, nvcpubw, i);
> +                if (qemuSetupCgroupVcpuBW(cgroup, vcpubw) < 0)

s/cgroup/cgroup_vcpu/

> +                    goto cleanup;
> +            }
> +        }
> +
> +        virCgroupFree(&cgroup_vcpu);
> +    }
> +
> +    virCgroupFree(&cgroup_vcpu);
> +    virCgroupFree(&cgroup);
> +    return 0;
> +
> +cleanup:
> +    virCgroupFree(&cgroup_vcpu);
> +    if (cgroup) {
> +        virCgroupRemove(cgroup);
> +        virCgroupFree(&cgroup);
> +    }
> +
> +    return -1;
> +}
> +
>  
>  int qemuRemoveCgroup(struct qemud_driver *driver,
>                       virDomainObjPtr vm,
> diff --git a/src/qemu/qemu_cgroup.h b/src/qemu/qemu_cgroup.h
> index e8abfb4..f0a5cee 100644
> --- a/src/qemu/qemu_cgroup.h
> +++ b/src/qemu/qemu_cgroup.h
> @@ -49,6 +49,8 @@ int qemuSetupHostUsbDeviceCgroup(usbDevice *dev,
>                                   void *opaque);
>  int qemuSetupCgroup(struct qemud_driver *driver,
>                      virDomainObjPtr vm);
> +int qemuSetupCgroupVcpuBW(virCgroupPtr cgroup, virDomainVcpuBWDefPtr vcpubw);
> +int qemuSetupCgroupForVcpu(struct qemud_driver *driver, virDomainObjPtr vm);
>  int qemuRemoveCgroup(struct qemud_driver *driver,
>                       virDomainObjPtr vm,
>                       int quiet);
> diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
> index 88a31a3..ce3a4bb 100644
> --- a/src/qemu/qemu_process.c
> +++ b/src/qemu/qemu_process.c
> @@ -2677,6 +2677,10 @@ int qemuProcessStart(virConnectPtr conn,
>      if (qemuProcessDetectVcpuPIDs(driver, vm) < 0)
>          goto cleanup;
>  
> +    VIR_DEBUG("Setting cgroup for each VCPU(if required)");
> +    if (qemuSetupCgroupForVcpu(driver, vm) < 0)
> +        goto cleanup;
> +
>      VIR_DEBUG("Setting VCPU affinities");
>      if (qemuProcessSetVcpuAffinites(conn, vm) < 0)
>          goto cleanup;
> diff --git a/tests/qemuxml2argvdata/qemuxml2argv-cputune.xml b/tests/qemuxml2argvdata/qemuxml2argv-cputune.xml
> index 0afbadb..0a67e40 100644
> --- a/tests/qemuxml2argvdata/qemuxml2argv-cputune.xml
> +++ b/tests/qemuxml2argvdata/qemuxml2argv-cputune.xml
> @@ -6,6 +6,8 @@
>    <vcpu>2</vcpu>
>    <cputune>
>      <shares>2048</shares>
> +    <bandwidth vcpu='0' period='1000000' quota='-1'/>
> +    <bandwidth vcpu='1' period='1000' quota='1000'/>
>      <vcpupin vcpu='0' cpuset='0'/>
>      <vcpupin vcpu='1' cpuset='1'/>
>    </cputune>




More information about the libvir-list mailing list