[libvirt] [PATCH 04/10] qemu: Implement period and quota tunable XML configuration and parsing.
Wen Congyang
wency at cn.fujitsu.com
Thu Jun 30 07:54:53 UTC 2011
At 06/30/2011 11:09 AM, Wen Congyang Write:
> ---
> src/conf/domain_conf.c | 272 ++++++++++++++++++++++-
> src/conf/domain_conf.h | 25 ++
> src/libvirt_private.syms | 4 +
> src/qemu/qemu_cgroup.c | 131 +++++++++++
> src/qemu/qemu_cgroup.h | 2 +
> src/qemu/qemu_process.c | 4 +
> tests/qemuxml2argvdata/qemuxml2argv-cputune.xml | 2 +
> 7 files changed, 438 insertions(+), 2 deletions(-)
>
> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
> index 60e0318..0a1f973 100644
> --- a/src/conf/domain_conf.c
> +++ b/src/conf/domain_conf.c
> @@ -997,6 +997,21 @@ virDomainVcpuPinDefFree(virDomainVcpuPinDefPtr *def,
> VIR_FREE(def);
> }
>
> +static void
> +virDomainVcpuBWDefFree(virDomainVcpuBWDefPtr *def,
> + int nvcpubw)
> +{
> + int i;
> +
> + if (!def || !nvcpubw)
> + return;
> +
> + for(i = 0; i < nvcpubw; i++)
> + VIR_FREE(def[i]);
> +
> + VIR_FREE(def);
> +}
> +
> void virDomainDefFree(virDomainDefPtr def)
> {
> unsigned int i;
> @@ -1089,6 +1104,9 @@ void virDomainDefFree(virDomainDefPtr def)
>
> virCPUDefFree(def->cpu);
>
> + virDomainVcpuBWDefFree(def->cputune.vcpubw,
> + def->cputune.nvcpubw);
> +
> virDomainVcpuPinDefFree(def->cputune.vcpupin, def->cputune.nvcpupin);
>
> VIR_FREE(def->numatune.memory.nodemask);
> @@ -5715,6 +5733,62 @@ error:
> goto cleanup;
> }
>
> +/* Parse the XML definition for a vcpubandwidth */
> +static virDomainVcpuBWDefPtr
> +virDomainVcpuBWDefParseXML(const xmlNodePtr node,
> + xmlXPathContextPtr ctxt,
> + int maxvcpus)
> +{
> + virDomainVcpuBWDefPtr def;
> + xmlNodePtr oldnode = ctxt->node;
> + unsigned int vcpuid;
> + unsigned long long period;
> + long long quota;
> + int ret;
> +
> + if (VIR_ALLOC(def) < 0) {
> + virReportOOMError();
> + return NULL;
> + }
> +
> + ctxt->node = node;
> +
> + ret = virXPathUInt("string(./@vcpu)", ctxt, &vcpuid);
> + if (ret == -2) {
> + virDomainReportError(VIR_ERR_INTERNAL_ERROR,
> + "%s", _("vcpu id must be an unsigned integer"));
> + goto error;
> + } else if (ret == -1) {
> + virDomainReportError(VIR_ERR_INTERNAL_ERROR,
> + "%s", _("can't parse vcpupin node"));
> + goto error;
> + }
> +
> + if (vcpuid >= maxvcpus) {
> + virDomainReportError(VIR_ERR_INTERNAL_ERROR,
> + "%s", _("vcpu id must be less than maxvcpus"));
> + goto error;
> + }
> +
> + if (virXPathULongLong("string(./@period)", ctxt, &period) < 0)
> + period = 0;
> +
> + if (virXPathLongLong("string(./@quota)", ctxt, "a) < 0)
> + quota = 0;
> +
> + def->vcpuid = vcpuid;
> + def->period = period;
> + def->quota = quota;
> +
> +cleanup:
> + ctxt->node = oldnode;
> + return def;
> +
> +error:
> + VIR_FREE(def);
> + goto cleanup;
> +}
> +
>
> static virDomainDefPtr virDomainDefParseXML(virCapsPtr caps,
> xmlDocPtr xml,
> @@ -5881,6 +5955,49 @@ static virDomainDefPtr virDomainDefParseXML(virCapsPtr caps,
> &def->cputune.shares) < 0)
> def->cputune.shares = 0;
>
> + if ((n = virXPathNodeSet("./cputune/bandwidth", ctxt, &nodes)) < 0)
> + goto error;
> +
> + if (n > def->maxvcpus) {
> + virDomainReportError(VIR_ERR_INTERNAL_ERROR,
> + "%s", _("bandwith nodes must be less than"
> + " maxvcpus"));
> + goto error;
> + }
> +
> + if (n && VIR_ALLOC_N(def->cputune.vcpubw, n) < 0)
> + goto no_memory;
> +
> + for (i = 0; i < n; i++) {
> + virDomainVcpuBWDefPtr vcpubw = NULL;
> + vcpubw = virDomainVcpuBWDefParseXML(nodes[i], ctxt, def->maxvcpus);
> +
> + if (!vcpubw)
> + goto error;
> +
> + if (virDomainVcpuBWIsDuplicate(def->cputune.vcpubw,
> + def->cputune.nvcpubw,
> + vcpubw->vcpuid)) {
> + virDomainReportError(VIR_ERR_INTERNAL_ERROR,
> + "%s", _("duplicate vcpubandwidth for same"
> + " vcpu"));
> + VIR_FREE(vcpubw);
> + goto error;
> + }
> +
> + if (vcpubw->period || vcpubw->quota)
> + def->cputune.vcpubw[def->cputune.nvcpubw++] = vcpubw;
> + else
> + VIR_FREE(vcpubw);
> + }
> + if (def->cputune.nvcpubw)
> + ignore_value(VIR_REALLOC_N(def->cputune.vcpubw,
> + def->cputune.nvcpubw));
> + else
> + VIR_FREE(def->cputune.vcpubw);
> +
> + VIR_FREE(nodes);
> +
> if ((n = virXPathNodeSet("./cputune/vcpupin", ctxt, &nodes)) < 0) {
> goto error;
> }
> @@ -8274,6 +8391,144 @@ virDomainVcpuPinDel(virDomainDefPtr def, int vcpu)
> return 0;
> }
>
> +/* Check if vcpupin with same vcpuid already exists.
> + * Return 1 if exists, 0 if not. */
> +int
> +virDomainVcpuBWIsDuplicate(virDomainVcpuBWDefPtr *def,
> + int nvcpubw,
> + int vcpu)
> +{
> + int i;
> +
> + if (!def || !nvcpubw)
> + return 0;
> +
> + for (i = 0; i < nvcpubw; i++) {
> + if (def[i]->vcpuid == vcpu)
> + return 1;
> + }
> +
> + return 0;
> +}
> +
> +virDomainVcpuBWDefPtr
> +virDomainVcpuBWFindByVcpu(virDomainVcpuBWDefPtr *def,
> + int nvcpubw,
> + int vcpu)
> +{
> + int i;
> +
> + if (!def || !nvcpubw)
> + return NULL;
> +
> + for (i = 0; i < nvcpubw; i++) {
> + if (def[i]->vcpuid == vcpu)
> + return def[i];
> + }
> +
> + return NULL;
> +}
> +
> +int
> +virDomainVcpuBWAdd(virDomainDefPtr def,
> + unsigned long long period,
> + long long quota,
> + int vcpu)
> +{
> + virDomainVcpuBWDefPtr *vcpubw_list = NULL;
> + virDomainVcpuBWDefPtr vcpubw = NULL;
> +
> + /* No vcpubw exists yet. */
> + if (!def->cputune.nvcpubw) {
> + if (period == 0 && quota == 0)
> + return 0;
> +
> + if (VIR_ALLOC(vcpubw) < 0)
> + goto no_memory;
> +
> + if (VIR_ALLOC(vcpubw_list) < 0)
> + goto no_memory;
> +
> + vcpubw->vcpuid = vcpu;
> + vcpubw->period = period;
> + vcpubw->quota = quota;
> + vcpubw_list[def->cputune.nvcpubw++] = vcpubw;
> +
> + def->cputune.vcpubw = vcpubw_list;
> + } else {
> + int nvcpubw = def->cputune.nvcpubw;
> + vcpubw_list = def->cputune.vcpubw;
> + if (virDomainVcpuBWIsDuplicate(vcpubw_list, nvcpubw, vcpu)) {
> + vcpubw = virDomainVcpuBWFindByVcpu(vcpubw_list, nvcpubw, vcpu);
> + if (period == 0 && quota == 0) {
> + return virDomainVcpuBWDel(def, vcpu);
> + } else {
> + vcpubw->vcpuid = vcpu;
> + vcpubw->period = period;
> + vcpubw->quota = quota;
> + }
> + } else {
> + if (period == 0 && quota == 0)
> + return 0;
> +
> + if (VIR_ALLOC(vcpubw) < 0)
> + goto no_memory;
> +
> + if (VIR_REALLOC_N(vcpubw_list, nvcpubw + 1) < 0)
> + goto no_memory;
> +
> + vcpubw->vcpuid = vcpu;
> + vcpubw->period = period;
> + vcpubw->quota = quota;
> + vcpubw_list[def->cputune.nvcpubw++] = vcpubw;
> + }
> + }
> +
> + return 0;
> +
> +no_memory:
> + virReportOOMError();
> + VIR_FREE(vcpubw);
> + return -1;
> +}
> +
> +int
> +virDomainVcpuBWDel(virDomainDefPtr def, int vcpu)
> +{
> + int n;
> + bool deleted = false;
> + virDomainVcpuBWDefPtr *vcpubw_list = def->cputune.vcpubw;
> +
> + /* No vcpubw exists yet */
> + if (!def->cputune.nvcpubw)
> + return 0;
> +
> + for (n = 0; n < def->cputune.nvcpubw; n++) {
> + if (vcpubw_list[n]->vcpuid == vcpu) {
> + VIR_FREE(vcpubw_list[n]);
> + memmove(&vcpubw_list[n], &vcpubw_list[n+1],
> + (def->cputune.nvcpubw - n - 1) *
> + sizeof(virDomainVcpuBWDefPtr));
> + deleted = true;
> + break;
> + }
> + }
> +
> + if (!deleted)
> + return 0;
> +
> + if (--def->cputune.nvcpubw == 0) {
> + VIR_FREE(def->cputune.vcpubw);
> + } else {
> + if (VIR_REALLOC_N(def->cputune.vcpubw,
> + def->cputune.nvcpubw) < 0) {
> + /* ignore, harmless */
> + }
> + }
> +
> + return 0;
> +}
> +
> static int
> virDomainLifecycleDefFormat(virBufferPtr buf,
> int type,
> @@ -9553,12 +9808,24 @@ char *virDomainDefFormat(virDomainDefPtr def,
> virBufferAsprintf(&buf, " current='%u'", def->vcpus);
> virBufferAsprintf(&buf, ">%u</vcpu>\n", def->maxvcpus);
>
> - if (def->cputune.shares || def->cputune.vcpupin)
> + if (def->cputune.shares || def->cputune.vcpupin ||
> + def->cputune.vcpubw)
> virBufferAddLit(&buf, " <cputune>\n");
>
> if (def->cputune.shares)
> virBufferAsprintf(&buf, " <shares>%lu</shares>\n",
> def->cputune.shares);
> + if (def->cputune.vcpubw) {
> + int i;
> + for (i = 0; i < def->cputune.nvcpubw; i++) {
> + virBufferAsprintf(&buf, " <bandwidth vcpu='%u' ",
> + def->cputune.vcpubw[i]->vcpuid);
> + virBufferAsprintf(&buf, "period='%llu' ",
> + def->cputune.vcpubw[i]->period);
> + virBufferAsprintf(&buf, "quota='%lld'/>\n",
> + def->cputune.vcpubw[i]->quota);
> + }
> + }
> if (def->cputune.vcpupin) {
> int i;
> for (i = 0; i < def->cputune.nvcpupin; i++) {
> @@ -9580,7 +9847,8 @@ char *virDomainDefFormat(virDomainDefPtr def,
> }
> }
>
> - if (def->cputune.shares || def->cputune.vcpupin)
> + if (def->cputune.shares || def->cputune.vcpupin ||
> + def->cputune.vcpubw)
> virBufferAddLit(&buf, " </cputune>\n");
>
> if (def->numatune.memory.nodemask)
> diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
> index e81977c..a2929b5 100644
> --- a/src/conf/domain_conf.h
> +++ b/src/conf/domain_conf.h
> @@ -1108,6 +1108,14 @@ struct _virDomainVcpuPinDef {
> char *cpumask;
> };
>
> +typedef struct _virDomainVcpuBWDef virDomainVcpuBWDef;
> +typedef virDomainVcpuBWDef *virDomainVcpuBWDefPtr;
> +struct _virDomainVcpuBWDef {
> + int vcpuid;
> + unsigned long long period;
> + long long quota;
> +};
> +
> int virDomainVcpuPinIsDuplicate(virDomainVcpuPinDefPtr *def,
> int nvcpupin,
> int vcpu);
> @@ -1116,6 +1124,14 @@ virDomainVcpuPinDefPtr virDomainVcpuPinFindByVcpu(virDomainVcpuPinDefPtr *def,
> int nvcpupin,
> int vcpu);
>
> +int virDomainVcpuBWIsDuplicate(virDomainVcpuBWDefPtr *def,
> + int nvcpubw,
> + int vcpu);
> +
> +virDomainVcpuBWDefPtr virDomainVcpuBWFindByVcpu(virDomainVcpuBWDefPtr *def,
> + int nvcpubw,
> + int vcpu);
> +
> enum virDomainNumatuneMemMode {
> VIR_DOMAIN_NUMATUNE_MEM_STRICT,
> VIR_DOMAIN_NUMATUNE_MEM_PREFERRED,
> @@ -1170,6 +1186,8 @@ struct _virDomainDef {
>
> struct {
> unsigned long shares;
> + int nvcpubw;
> + virDomainVcpuBWDefPtr *vcpubw;
> int nvcpupin;
> virDomainVcpuPinDefPtr *vcpupin;
> } cputune;
> @@ -1413,6 +1431,13 @@ int virDomainVcpuPinAdd(virDomainDefPtr def,
>
> int virDomainVcpuPinDel(virDomainDefPtr def, int vcpu);
>
> +int virDomainVcpuBWAdd(virDomainDefPtr def,
> + unsigned long long period,
> + long long quota,
> + int vcpu);
> +
> +int virDomainVcpuBWDel(virDomainDefPtr def, int vcpu);
> +
> int virDomainDiskIndexByName(virDomainDefPtr def, const char *name);
> int virDomainDiskInsert(virDomainDefPtr def,
> virDomainDiskDefPtr disk);
> diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
> index 9b9b6ce..aad0c3a 100644
> --- a/src/libvirt_private.syms
> +++ b/src/libvirt_private.syms
> @@ -371,6 +371,10 @@ virDomainTimerTickpolicyTypeFromString;
> virDomainTimerTickpolicyTypeToString;
> virDomainTimerTrackTypeFromString;
> virDomainTimerTrackTypeToString;
> +virDomainVcpuBWAdd;
> +virDomainVcpuBWDel;
> +virDomainVcpuBWFindByVcpu;
> +virDomainVcpuBWIsDuplicate;
> virDomainVcpuPinAdd;
> virDomainVcpuPinDel;
> virDomainVcpuPinFindByVcpu;
> diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c
> index 1298924..201c0b8 100644
> --- a/src/qemu/qemu_cgroup.c
> +++ b/src/qemu/qemu_cgroup.c
> @@ -24,6 +24,7 @@
> #include <config.h>
>
> #include "qemu_cgroup.h"
> +#include "qemu_domain.h"
> #include "cgroup.h"
> #include "logging.h"
> #include "memory.h"
> @@ -376,6 +377,136 @@ cleanup:
> return -1;
> }
>
> +int qemuSetupCgroupVcpuBW(virCgroupPtr cgroup, virDomainVcpuBWDefPtr vcpubw)
> +{
> + int rc;
> + unsigned long long old_period;
> +
> + if (!vcpubw)
> + return 0;
> +
> + if (vcpubw->period == 0 && vcpubw->quota == 0)
> + return 0;
> +
> + if (vcpubw->period) {
> + /* get old period, and we can rollback if set quota failed */
> + rc = virCgroupGetCpuCfsPeriod(cgroup, &old_period);
> + if (rc < 0) {
> + virReportSystemError(-rc,
> + _("%s"), "Unable to get cpu bandwidth period");
> + return -1;
> + }
> +
> + rc = virCgroupSetCpuCfsPeriod(cgroup, vcpubw->period);
> + if (rc < 0) {
> + virReportSystemError(-rc,
> + _("%s"), "Unable to set cpu bandwidth period");
> + return -1;
> + }
> + }
> +
> + if (vcpubw->quota) {
> + rc = virCgroupSetCpuCfsQuota(cgroup, vcpubw->quota);
> + if (rc < 0) {
> + virReportSystemError(-rc,
> + _("%s"), "Unable to set cpu bandwidth quota");
> + goto cleanup;
> + }
> + }
> +
> + return 0;
> +
> +cleanup:
> + if (vcpubw->period) {
> + rc = virCgroupSetCpuCfsPeriod(cgroup, old_period);
> + if (rc < 0)
> + virReportSystemError(-rc,
> + _("%s"),
> + "Unable to rollback cpu bandwidth period");
> + }
> +
> + return -1;
> +}
> +
> +int qemuSetupCgroupForVcpu(struct qemud_driver *driver, virDomainObjPtr vm)
> +{
> + virCgroupPtr cgroup = NULL;
> + virCgroupPtr cgroup_vcpu = NULL;
> + qemuDomainObjPrivatePtr priv = vm->privateData;
> + int rc;
> + unsigned int i;
> + virDomainVcpuBWDefPtr *vcpubw_list = vm->def->cputune.vcpubw;
> + virDomainVcpuBWDefPtr vcpubw = NULL;
> + int nvcpubw = vm->def->cputune.nvcpubw;
> +
> + if (driver->cgroup == NULL)
> + return 0; /* Not supported, so claim success */
> +
> + rc = virCgroupForDomain(driver->cgroup, vm->def->name, &cgroup, 0);
> + if (rc != 0) {
> + virReportSystemError(-rc,
> + _("Unable to find cgroup for %s"),
> + vm->def->name);
> + goto cleanup;
> + }
> +
> + if (priv->nvcpupids == 0 || priv->vcpupids[0] == vm->pid) {
> + /* If we does not know VCPU<->PID mapping or all vcpu runs in the same
> + * thread, we can not control each vcpu. So just use the last config.
> + */
> + if (vcpubw_list) {
> + if (qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) {
> + if (qemuSetupCgroupVcpuBW(cgroup, vcpubw_list[nvcpubw - 1]) < 0)
> + goto cleanup;
> + }
> + }
> + return 0;
> + }
> +
> + for (i = 0; i < priv->nvcpupids; i++) {
> + rc = virCgroupForVcpu(cgroup, i, &cgroup_vcpu, 1);
> + if (rc < 0) {
> + virReportSystemError(-rc,
> + _("Unable to create vcpu cgroup for %s(vcpu:"
> + " %d)"),
> + vm->def->name, i);
> + goto cleanup;
> + }
> +
> + /* move the thread for vcpu to sub dir */
> + rc = virCgroupAddTask(cgroup_vcpu, priv->vcpupids[i]);
> + if (rc < 0) {
> + virReportSystemError(-rc,
> + _("unable to add vcpu %d task %d to cgroup"),
> + i, priv->vcpupids[i]);
> + goto cleanup;
> + }
> +
> + if (vcpubw_list) {
> + if (qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) {
> + vcpubw = virDomainVcpuBWFindByVcpu(vcpubw_list, nvcpubw, i);
> + if (qemuSetupCgroupVcpuBW(cgroup, vcpubw) < 0)
s/cgroup/cgroup_vcpu/
> + goto cleanup;
> + }
> + }
> +
> + virCgroupFree(&cgroup_vcpu);
> + }
> +
> + virCgroupFree(&cgroup_vcpu);
> + virCgroupFree(&cgroup);
> + return 0;
> +
> +cleanup:
> + virCgroupFree(&cgroup_vcpu);
> + if (cgroup) {
> + virCgroupRemove(cgroup);
> + virCgroupFree(&cgroup);
> + }
> +
> + return -1;
> +}
> +
>
> int qemuRemoveCgroup(struct qemud_driver *driver,
> virDomainObjPtr vm,
> diff --git a/src/qemu/qemu_cgroup.h b/src/qemu/qemu_cgroup.h
> index e8abfb4..f0a5cee 100644
> --- a/src/qemu/qemu_cgroup.h
> +++ b/src/qemu/qemu_cgroup.h
> @@ -49,6 +49,8 @@ int qemuSetupHostUsbDeviceCgroup(usbDevice *dev,
> void *opaque);
> int qemuSetupCgroup(struct qemud_driver *driver,
> virDomainObjPtr vm);
> +int qemuSetupCgroupVcpuBW(virCgroupPtr cgroup, virDomainVcpuBWDefPtr vcpubw);
> +int qemuSetupCgroupForVcpu(struct qemud_driver *driver, virDomainObjPtr vm);
> int qemuRemoveCgroup(struct qemud_driver *driver,
> virDomainObjPtr vm,
> int quiet);
> diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
> index 88a31a3..ce3a4bb 100644
> --- a/src/qemu/qemu_process.c
> +++ b/src/qemu/qemu_process.c
> @@ -2677,6 +2677,10 @@ int qemuProcessStart(virConnectPtr conn,
> if (qemuProcessDetectVcpuPIDs(driver, vm) < 0)
> goto cleanup;
>
> + VIR_DEBUG("Setting cgroup for each VCPU(if required)");
> + if (qemuSetupCgroupForVcpu(driver, vm) < 0)
> + goto cleanup;
> +
> VIR_DEBUG("Setting VCPU affinities");
> if (qemuProcessSetVcpuAffinites(conn, vm) < 0)
> goto cleanup;
> diff --git a/tests/qemuxml2argvdata/qemuxml2argv-cputune.xml b/tests/qemuxml2argvdata/qemuxml2argv-cputune.xml
> index 0afbadb..0a67e40 100644
> --- a/tests/qemuxml2argvdata/qemuxml2argv-cputune.xml
> +++ b/tests/qemuxml2argvdata/qemuxml2argv-cputune.xml
> @@ -6,6 +6,8 @@
> <vcpu>2</vcpu>
> <cputune>
> <shares>2048</shares>
> + <bandwidth vcpu='0' period='1000000' quota='-1'/>
> + <bandwidth vcpu='1' period='1000' quota='1000'/>
> <vcpupin vcpu='0' cpuset='0'/>
> <vcpupin vcpu='1' cpuset='1'/>
> </cputune>
More information about the libvir-list
mailing list