[libvirt] [PATCH 5/6] qemu: Implement hypervisor's period and quota tunable XML configuration and parsing
Wen Congyang
wency at cn.fujitsu.com
Thu May 17 05:07:04 UTC 2012
At 05/17/2012 12:46 PM, KAMEZAWA Hiroyuki Wrote:
> (2012/05/17 12:54), Wen Congyang wrote:
>
>> At 05/17/2012 11:13 AM, Hu Tao Wrote:
>>> On Wed, Apr 25, 2012 at 05:46:06PM +0800, Wen Congyang wrote:
>>>> set hypervisor's period and quota when the vm starts. It will affect to set
>>>> vm's period and quota: donot set vm's period and quota if we limit hypevisor
>>>> thread's bandwidth(hypervisor_quota > 0).
>>>> ---
>>>> src/conf/domain_conf.c | 25 ++++++++++++++-
>>>> src/conf/domain_conf.h | 2 +
>>>> src/qemu/qemu_cgroup.c | 37 ++++++++++++++++-------
>>>> src/qemu/qemu_driver.c | 75 ++++++++++++++++++++++++++++++------------------
>>>> 4 files changed, 98 insertions(+), 41 deletions(-)
>>>>
>>>> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
>>>> index 5ab052a..28a6436 100644
>>>> --- a/src/conf/domain_conf.c
>>>> +++ b/src/conf/domain_conf.c
>>>> @@ -7931,6 +7931,14 @@ static virDomainDefPtr virDomainDefParseXML(virCapsPtr caps,
>>>> &def->cputune.quota) < 0)
>>>> def->cputune.quota = 0;
>>>>
>>>> + if (virXPathULongLong("string(./cputune/hypervisor_period[1])", ctxt,
>>>> + &def->cputune.hypervisor_period) < 0)
>>>> + def->cputune.hypervisor_period = 0;
>>>> +
>>>> + if (virXPathLongLong("string(./cputune/hypervisor_quota[1])", ctxt,
>>>> + &def->cputune.hypervisor_quota) < 0)
>>>> + def->cputune.hypervisor_quota = 0;
>>>> +
>>>> if ((n = virXPathNodeSet("./cputune/vcpupin", ctxt, &nodes)) < 0) {
>>>> goto error;
>>>> }
>>>> @@ -12406,7 +12414,8 @@ virDomainDefFormatInternal(virDomainDefPtr def,
>>>> virBufferAsprintf(buf, ">%u</vcpu>\n", def->maxvcpus);
>>>>
>>>> if (def->cputune.shares || def->cputune.vcpupin ||
>>>> - def->cputune.period || def->cputune.quota)
>>>> + def->cputune.period || def->cputune.quota ||
>>>> + def->cputune.hypervisor_period || def->cputune.hypervisor_quota)
>>>> virBufferAddLit(buf, " <cputune>\n");
>>>>
>>>> if (def->cputune.shares)
>>>> @@ -12418,6 +12427,17 @@ virDomainDefFormatInternal(virDomainDefPtr def,
>>>> if (def->cputune.quota)
>>>> virBufferAsprintf(buf, " <quota>%lld</quota>\n",
>>>> def->cputune.quota);
>>>> +
>>>> + if (def->cputune.hypervisor_period)
>>>> + virBufferAsprintf(buf, " <hypervisor_period>%llu"
>>>> + "</hypervisor_period>\n",
>>>> + def->cputune.hypervisor_period);
>>>> +
>>>> + if (def->cputune.hypervisor_period)
>>>> + virBufferAsprintf(buf, " <hypervisor_quota>%lld"
>>>> + "</hypervisor_quota>\n",
>>>> + def->cputune.hypervisor_quota);
>>>> +
>>>> if (def->cputune.vcpupin) {
>>>> for (i = 0; i < def->cputune.nvcpupin; i++) {
>>>> virBufferAsprintf(buf, " <vcpupin vcpu='%u' ",
>>>> @@ -12439,7 +12459,8 @@ virDomainDefFormatInternal(virDomainDefPtr def,
>>>> }
>>>>
>>>> if (def->cputune.shares || def->cputune.vcpupin ||
>>>> - def->cputune.period || def->cputune.quota)
>>>> + def->cputune.period || def->cputune.quota ||
>>>> + def->cputune.hypervisor_period || def->cputune.hypervisor_quota)
>>>> virBufferAddLit(buf, " </cputune>\n");
>>>>
>>>> if (def->numatune.memory.nodemask) {
>>>> diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
>>>> index 0eed60e..2a37e26 100644
>>>> --- a/src/conf/domain_conf.h
>>>> +++ b/src/conf/domain_conf.h
>>>> @@ -1558,6 +1558,8 @@ struct _virDomainDef {
>>>> unsigned long shares;
>>>> unsigned long long period;
>>>> long long quota;
>>>> + unsigned long long hypervisor_period;
>>>> + long long hypervisor_quota;
>>>> int nvcpupin;
>>>> virDomainVcpuPinDefPtr *vcpupin;
>>>> } cputune;
>>>> diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c
>>>> index 727c0d4..7c6ef33 100644
>>>> --- a/src/qemu/qemu_cgroup.c
>>>> +++ b/src/qemu/qemu_cgroup.c
>>>> @@ -493,17 +493,23 @@ int qemuSetupCgroupForVcpu(struct qemud_driver *driver, virDomainObjPtr vm)
>>>> goto cleanup;
>>>> }
>>>
>>> Check if cgroup CPU is active here:
>>>
>>> if (!qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) {
>>> virReportSystemError(EINVAL,
>>> _("%s"),
>>> "Cgroup CPU is not active.");
>>> goto cleanup;
>>> }
>>>
>>> and remove all following checks in this function.
>>>
>>>>
>>>> - /* Set cpu bandwidth for the vm */
>>>> - if (period || quota) {
>>>> - if (qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) {
>>>> - /* Ensure that we can multiply by vcpus without overflowing. */
>>>> - if (quota > LLONG_MAX / vm->def->vcpus) {
>>>> - virReportSystemError(EINVAL,
>>>> - _("%s"),
>>>> - "Unable to set cpu bandwidth quota");
>>>> - goto cleanup;
>>>> - }
>>>> + /*
>>>> + * Set cpu bandwidth for the vm if any of the following is true:
>>>> + * 1. we donot know VCPU<->PID mapping or all vcpus run in the same thread
>>>> + * 2. the hypervisor threads are not limited(quota <= 0)
>>>> + */
>>>> + if ((period || quota) &&
>>>> + qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) {
>>>> + /* Ensure that we can multiply by vcpus without overflowing. */
>>>> + if (quota > LLONG_MAX / vm->def->vcpus) {
>>>> + virReportSystemError(EINVAL,
>>>> + _("%s"),
>>>> + "Unable to set cpu bandwidth quota");
>>>> + goto cleanup;
>>>> + }
>>>>
>>>> + if (priv->nvcpupids == 0 || priv->vcpupids[0] == vm->pid ||
>>>> + vm->def->cputune.hypervisor_quota <= 0) {
>>>> if (quota > 0)
>>>> vm_quota = quota * vm->def->vcpus;
>>>> else
>>>> @@ -514,7 +520,7 @@ int qemuSetupCgroupForVcpu(struct qemud_driver *driver, virDomainObjPtr vm)
>>>> }
>>>>
>>>> if (priv->nvcpupids == 0 || priv->vcpupids[0] == vm->pid) {
>>>> - /* If we does not know VCPU<->PID mapping or all vcpu runs in the same
>>>> + /* If we donot know VCPU<->PID mapping or all vcpus run in the same
>>>> * thread, we cannot control each vcpu.
>>>> */
>>>> virCgroupFree(&cgroup);
>>>> @@ -570,6 +576,8 @@ int qemuSetupCgroupForHypervisor(struct qemud_driver *driver,
>>>> virCgroupPtr cgroup = NULL;
>>>> virCgroupPtr cgroup_hypervisor = NULL;
>>>> qemuDomainObjPrivatePtr priv = vm->privateData;
>>>> + unsigned long long period = vm->def->cputune.hypervisor_period;
>>>> + long long quota = vm->def->cputune.hypervisor_quota;
>>>> int rc;
>>>>
>>>> if (driver->cgroup == NULL)
>>>> @@ -608,6 +616,13 @@ int qemuSetupCgroupForHypervisor(struct qemud_driver *driver,
>>>> goto cleanup;
>>>> }
>>>>
>>>> + if (period || quota) {
>>>> + if (qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) {
>>>> + if (qemuSetupCgroupVcpuBW(cgroup_hypervisor, period, quota) < 0)
>>>
>>> Actually, qemuSetupCgroupVcpuBW just changes the cgroup's
>>> settings(period, quota), it doesn't care about what the
>>> cgroup is associated with. So can we change the name to
>>> qemuSetupCgroupBW?
>>>
>>>> + goto cleanup;
>>>> + }
>>>> + }
>>>> +
>>>> virCgroupFree(&cgroup_hypervisor);
>>>> virCgroupFree(&cgroup);
>>>> return 0;
>>>> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
>>>> index c3555ca..2e40aee 100644
>>>> --- a/src/qemu/qemu_driver.c
>>>> +++ b/src/qemu/qemu_driver.c
>>>> @@ -7007,6 +7007,7 @@ qemuSetVcpusBWLive(virDomainObjPtr vm, virCgroupPtr cgroup,
>>>> int rc;
>>>> long long vm_quota = 0;
>>>> long long old_quota = 0;
>>>> + long long hypervisor_quota = vm->def->cputune.hypervisor_quota;
>>>> unsigned long long old_period = 0;
>>>>
>>>> if (period == 0 && quota == 0)
>>>> @@ -7039,6 +7040,16 @@ qemuSetVcpusBWLive(virDomainObjPtr vm, virCgroupPtr cgroup,
>>>> goto cleanup;
>>>> }
>>>>
>>>> + /* If we donot know VCPU<->PID mapping or all vcpu runs in the same
>>>> + * thread, we cannot control each vcpu. So we only modify cpu bandwidth
>>>> + * for the vm.
>>>> + */
>>>> + if (priv->nvcpupids == 0 || priv->vcpupids[0] == vm->pid) {
>>>> + if (qemuSetupCgroupVcpuBW(cgroup, period, vm_quota) < 0)
>>>> + goto cleanup;
>>>> + return 0;
>>>> + }
>>>> +
>>>> /*
>>>> * If quota will be changed to a small value, we should modify vcpu's quota
>>>> * first. Otherwise, we should modify vm's quota first.
>>>> @@ -7048,8 +7059,12 @@ qemuSetVcpusBWLive(virDomainObjPtr vm, virCgroupPtr cgroup,
>>>> *
>>>> * If both quota and period will be changed to a big/small value, we cannot
>>>> * modify period and quota together.
>>>> + *
>>>> + * If the hypervisor threads are limited, we can update period for vm first,
>>>> + * and then we can modify period and quota for the vcpu together even if
>>>> + * they will be changed to a big/small value.
>>>> */
>>>> - if ((quota != 0) && (period != 0)) {
>>>> + if (hypervisor_quota <= 0 && quota != 0 && period != 0) {
>>>> if (((quota > old_quota) && (period > old_period)) ||
>>>> ((quota < old_quota) && (period < old_period))) {
>>>> /* modify period */
>>>> @@ -7063,40 +7078,44 @@ qemuSetVcpusBWLive(virDomainObjPtr vm, virCgroupPtr cgroup,
>>>> }
>>>> }
>>>>
>>>> - if (((vm_quota != 0) && (vm_quota > old_quota)) ||
>>>> - ((period != 0) && (period < old_period)))
>>>> - /* Set cpu bandwidth for the vm */
>>>> - if (qemuSetupCgroupVcpuBW(cgroup, period, vm_quota) < 0)
>>>> - goto cleanup;
>>>> -
>>>> - /* If we does not know VCPU<->PID mapping or all vcpu runs in the same
>>>> - * thread, we cannot control each vcpu. So we only modify cpu bandwidth
>>>> - * when each vcpu has a separated thread.
>>>> + /*
>>>> + * If the hypervisor threads are not limited, set cpu bandwidth for the
>>>> + * vm.
>>>> */
>>>> - if (priv->nvcpupids != 0 && priv->vcpupids[0] != vm->pid) {
>>>> - for (i = 0; i < priv->nvcpupids; i++) {
>>>> - rc = virCgroupForVcpu(cgroup, i, &cgroup_vcpu, 0);
>>>> - if (rc < 0) {
>>>> - virReportSystemError(-rc,
>>>> - _("Unable to find vcpu cgroup for %s(vcpu:"
>>>> - " %d)"),
>>>> - vm->def->name, i);
>>>> - goto cleanup;
>>>> - }
>>>> -
>>>> - if (qemuSetupCgroupVcpuBW(cgroup_vcpu, period, quota) < 0)
>>>> + if (vm->def->cputune.hypervisor_quota <= 0) {
>>>> + if (((vm_quota != 0) && (vm_quota > old_quota)) ||
>>>> + ((period != 0) && (period < old_period)))
>>>> + if (qemuSetupCgroupVcpuBW(cgroup, period, vm_quota) < 0)
>>>> goto cleanup;
>>>> + }
>>>>
>>>> - virCgroupFree(&cgroup_vcpu);
>>>> + /* each vcpu has a separated thread, so we modify cpu bandwidth for vcpu. */
>>>> + for (i = 0; i < priv->nvcpupids; i++) {
>>>> + rc = virCgroupForVcpu(cgroup, i, &cgroup_vcpu, 0);
>>>> + if (rc < 0) {
>>>> + virReportSystemError(-rc,
>>>> + _("Unable to find vcpu cgroup for %s(vcpu:"
>>>> + " %d)"),
>>>> + vm->def->name, i);
>>>> + goto cleanup;
>>>> }
>>>> - }
>>>>
>>>> - if (((vm_quota != 0) && (vm_quota <= old_quota)) ||
>>>> - ((period != 0) && (period >= old_period)))
>>>> - /* Set cpu bandwidth for the vm */
>>>> - if (qemuSetupCgroupVcpuBW(cgroup, period, vm_quota) < 0)
>>>> + if (qemuSetupCgroupVcpuBW(cgroup_vcpu, period, quota) < 0)
>>>> goto cleanup;
>>>>
>>>> + virCgroupFree(&cgroup_vcpu);
>>>> + }
>>>> +
>>>> + /*
>>>> + * If the hypervisor threads are not limited, set cpu bandwidth for the vm.
>>>> + */
>>>
>>> This makes me confused. Why do we have to limit the whole vm when the
>>> hypervisor threads are not limited? Users may want to limit the vcpus
>>> only.
>>>
>>> We have 3 situations now:
>>>
>>> 1. limit the vcpus only.
>>> 2. limit the hypervisor but not vcpus.
>>> 3. limit the whole vm.
>>
>> Before this patch, we limit the whole vm when we limit the vcpus
>> because we donot want to the hypervisor threads eat too much cpu
>> time.
>>
>> So, if we donot limit the hypervisor, the behavior shoule not be
>> changed. So we should limit the whole vm. If the hypervisor threads
>> are limited, there is no need to limit the whole vm.
>>
>
>
> need to tune hypervisor quota to limit vcpu-only quota ?
> Sounds strange to me.
No, current implemetion is:
1. limit vcpu and hypervisor: vm is not limited
2. limit vcpu only: vm is limited
3. limit hypervisor: vm is not limited
4. vcpu and hypervisor are not limited: vm is not limited.
Thanks
Wen Congyang
>
> Thanks,
> -Kame
>
>
>
>
>
>
More information about the libvir-list
mailing list