[libvirt] [PATCH 5/6] qemu: Implement hypervisor's period and quota tunable XML configuration and parsing

Wen Congyang wency at cn.fujitsu.com
Thu May 17 05:07:04 UTC 2012


At 05/17/2012 12:46 PM, KAMEZAWA Hiroyuki Wrote:
> (2012/05/17 12:54), Wen Congyang wrote:
> 
>> At 05/17/2012 11:13 AM, Hu Tao Wrote:
>>> On Wed, Apr 25, 2012 at 05:46:06PM +0800, Wen Congyang wrote:
>>>> set hypervisor's period and quota when the vm starts. It will affect to set
>>>> vm's period and quota: donot set vm's period and quota if we limit hypevisor
>>>> thread's bandwidth(hypervisor_quota > 0).
>>>> ---
>>>>  src/conf/domain_conf.c |   25 ++++++++++++++-
>>>>  src/conf/domain_conf.h |    2 +
>>>>  src/qemu/qemu_cgroup.c |   37 ++++++++++++++++-------
>>>>  src/qemu/qemu_driver.c |   75 ++++++++++++++++++++++++++++++------------------
>>>>  4 files changed, 98 insertions(+), 41 deletions(-)
>>>>
>>>> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
>>>> index 5ab052a..28a6436 100644
>>>> --- a/src/conf/domain_conf.c
>>>> +++ b/src/conf/domain_conf.c
>>>> @@ -7931,6 +7931,14 @@ static virDomainDefPtr virDomainDefParseXML(virCapsPtr caps,
>>>>                           &def->cputune.quota) < 0)
>>>>          def->cputune.quota = 0;
>>>>  
>>>> +    if (virXPathULongLong("string(./cputune/hypervisor_period[1])", ctxt,
>>>> +                          &def->cputune.hypervisor_period) < 0)
>>>> +        def->cputune.hypervisor_period = 0;
>>>> +
>>>> +    if (virXPathLongLong("string(./cputune/hypervisor_quota[1])", ctxt,
>>>> +                         &def->cputune.hypervisor_quota) < 0)
>>>> +        def->cputune.hypervisor_quota = 0;
>>>> +
>>>>      if ((n = virXPathNodeSet("./cputune/vcpupin", ctxt, &nodes)) < 0) {
>>>>          goto error;
>>>>      }
>>>> @@ -12406,7 +12414,8 @@ virDomainDefFormatInternal(virDomainDefPtr def,
>>>>      virBufferAsprintf(buf, ">%u</vcpu>\n", def->maxvcpus);
>>>>  
>>>>      if (def->cputune.shares || def->cputune.vcpupin ||
>>>> -        def->cputune.period || def->cputune.quota)
>>>> +        def->cputune.period || def->cputune.quota ||
>>>> +        def->cputune.hypervisor_period || def->cputune.hypervisor_quota)
>>>>          virBufferAddLit(buf, "  <cputune>\n");
>>>>  
>>>>      if (def->cputune.shares)
>>>> @@ -12418,6 +12427,17 @@ virDomainDefFormatInternal(virDomainDefPtr def,
>>>>      if (def->cputune.quota)
>>>>          virBufferAsprintf(buf, "    <quota>%lld</quota>\n",
>>>>                            def->cputune.quota);
>>>> +
>>>> +    if (def->cputune.hypervisor_period)
>>>> +        virBufferAsprintf(buf, "    <hypervisor_period>%llu"
>>>> +                          "</hypervisor_period>\n",
>>>> +                          def->cputune.hypervisor_period);
>>>> +
>>>> +    if (def->cputune.hypervisor_period)
>>>> +        virBufferAsprintf(buf, "    <hypervisor_quota>%lld"
>>>> +                          "</hypervisor_quota>\n",
>>>> +                          def->cputune.hypervisor_quota);
>>>> +
>>>>      if (def->cputune.vcpupin) {
>>>>          for (i = 0; i < def->cputune.nvcpupin; i++) {
>>>>              virBufferAsprintf(buf, "    <vcpupin vcpu='%u' ",
>>>> @@ -12439,7 +12459,8 @@ virDomainDefFormatInternal(virDomainDefPtr def,
>>>>      }
>>>>  
>>>>      if (def->cputune.shares || def->cputune.vcpupin ||
>>>> -        def->cputune.period || def->cputune.quota)
>>>> +        def->cputune.period || def->cputune.quota ||
>>>> +        def->cputune.hypervisor_period || def->cputune.hypervisor_quota)
>>>>          virBufferAddLit(buf, "  </cputune>\n");
>>>>  
>>>>      if (def->numatune.memory.nodemask) {
>>>> diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
>>>> index 0eed60e..2a37e26 100644
>>>> --- a/src/conf/domain_conf.h
>>>> +++ b/src/conf/domain_conf.h
>>>> @@ -1558,6 +1558,8 @@ struct _virDomainDef {
>>>>          unsigned long shares;
>>>>          unsigned long long period;
>>>>          long long quota;
>>>> +        unsigned long long hypervisor_period;
>>>> +        long long hypervisor_quota;
>>>>          int nvcpupin;
>>>>          virDomainVcpuPinDefPtr *vcpupin;
>>>>      } cputune;
>>>> diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c
>>>> index 727c0d4..7c6ef33 100644
>>>> --- a/src/qemu/qemu_cgroup.c
>>>> +++ b/src/qemu/qemu_cgroup.c
>>>> @@ -493,17 +493,23 @@ int qemuSetupCgroupForVcpu(struct qemud_driver *driver, virDomainObjPtr vm)
>>>>          goto cleanup;
>>>>      }
>>>
>>> Check if cgroup CPU is active here:
>>>
>>>        if (!qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) {
>>>            virReportSystemError(EINVAL,
>>> 	                        _("%s"),
>>> 				"Cgroup CPU is not active.");
>>>            goto cleanup;
>>>        }
>>>
>>> and remove all following checks in this function.
>>>
>>>>  
>>>> -    /* Set cpu bandwidth for the vm */
>>>> -    if (period || quota) {
>>>> -        if (qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) {
>>>> -            /* Ensure that we can multiply by vcpus without overflowing. */
>>>> -            if (quota > LLONG_MAX / vm->def->vcpus) {
>>>> -                virReportSystemError(EINVAL,
>>>> -                                     _("%s"),
>>>> -                                     "Unable to set cpu bandwidth quota");
>>>> -                goto cleanup;
>>>> -            }
>>>> +    /*
>>>> +     * Set cpu bandwidth for the vm if any of the following is true:
>>>> +     * 1. we donot know VCPU<->PID mapping or all vcpus run in the same thread
>>>> +     * 2. the hypervisor threads are not limited(quota <= 0)
>>>> +     */
>>>> +    if ((period || quota) &&
>>>> +        qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) {
>>>> +        /* Ensure that we can multiply by vcpus without overflowing. */
>>>> +        if (quota > LLONG_MAX / vm->def->vcpus) {
>>>> +            virReportSystemError(EINVAL,
>>>> +                                 _("%s"),
>>>> +                                 "Unable to set cpu bandwidth quota");
>>>> +            goto cleanup;
>>>> +        }
>>>>  
>>>> +        if (priv->nvcpupids == 0 || priv->vcpupids[0] == vm->pid ||
>>>> +            vm->def->cputune.hypervisor_quota <= 0) {
>>>>              if (quota > 0)
>>>>                  vm_quota = quota * vm->def->vcpus;
>>>>              else
>>>> @@ -514,7 +520,7 @@ int qemuSetupCgroupForVcpu(struct qemud_driver *driver, virDomainObjPtr vm)
>>>>      }
>>>>  
>>>>      if (priv->nvcpupids == 0 || priv->vcpupids[0] == vm->pid) {
>>>> -        /* If we does not know VCPU<->PID mapping or all vcpu runs in the same
>>>> +        /* If we donot know VCPU<->PID mapping or all vcpus run in the same
>>>>           * thread, we cannot control each vcpu.
>>>>           */
>>>>          virCgroupFree(&cgroup);
>>>> @@ -570,6 +576,8 @@ int qemuSetupCgroupForHypervisor(struct qemud_driver *driver,
>>>>      virCgroupPtr cgroup = NULL;
>>>>      virCgroupPtr cgroup_hypervisor = NULL;
>>>>      qemuDomainObjPrivatePtr priv = vm->privateData;
>>>> +    unsigned long long period = vm->def->cputune.hypervisor_period;
>>>> +    long long quota = vm->def->cputune.hypervisor_quota;
>>>>      int rc;
>>>>  
>>>>      if (driver->cgroup == NULL)
>>>> @@ -608,6 +616,13 @@ int qemuSetupCgroupForHypervisor(struct qemud_driver *driver,
>>>>          goto cleanup;
>>>>      }
>>>>  
>>>> +    if (period || quota) {
>>>> +        if (qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) {
>>>> +            if (qemuSetupCgroupVcpuBW(cgroup_hypervisor, period, quota) < 0)
>>>
>>> Actually, qemuSetupCgroupVcpuBW just changes the cgroup's
>>> settings(period, quota), it doesn't care about what the
>>> cgroup is associated with. So can we change the name to
>>> qemuSetupCgroupBW?
>>>
>>>> +                goto cleanup;
>>>> +        }
>>>> +    }
>>>> +
>>>>      virCgroupFree(&cgroup_hypervisor);
>>>>      virCgroupFree(&cgroup);
>>>>      return 0;
>>>> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
>>>> index c3555ca..2e40aee 100644
>>>> --- a/src/qemu/qemu_driver.c
>>>> +++ b/src/qemu/qemu_driver.c
>>>> @@ -7007,6 +7007,7 @@ qemuSetVcpusBWLive(virDomainObjPtr vm, virCgroupPtr cgroup,
>>>>      int rc;
>>>>      long long vm_quota = 0;
>>>>      long long old_quota = 0;
>>>> +    long long hypervisor_quota = vm->def->cputune.hypervisor_quota;
>>>>      unsigned long long old_period = 0;
>>>>  
>>>>      if (period == 0 && quota == 0)
>>>> @@ -7039,6 +7040,16 @@ qemuSetVcpusBWLive(virDomainObjPtr vm, virCgroupPtr cgroup,
>>>>          goto cleanup;
>>>>      }
>>>>  
>>>> +    /* If we donot know VCPU<->PID mapping or all vcpu runs in the same
>>>> +     * thread, we cannot control each vcpu. So we only modify cpu bandwidth
>>>> +     * for the vm.
>>>> +     */
>>>> +    if (priv->nvcpupids == 0 || priv->vcpupids[0] == vm->pid) {
>>>> +        if (qemuSetupCgroupVcpuBW(cgroup, period, vm_quota) < 0)
>>>> +            goto cleanup;
>>>> +        return 0;
>>>> +    }
>>>> +
>>>>      /*
>>>>       * If quota will be changed to a small value, we should modify vcpu's quota
>>>>       * first. Otherwise, we should modify vm's quota first.
>>>> @@ -7048,8 +7059,12 @@ qemuSetVcpusBWLive(virDomainObjPtr vm, virCgroupPtr cgroup,
>>>>       *
>>>>       * If both quota and period will be changed to a big/small value, we cannot
>>>>       * modify period and quota together.
>>>> +     *
>>>> +     * If the hypervisor threads are limited, we can update period for vm first,
>>>> +     * and then we can modify period and quota for the vcpu together even if
>>>> +     * they will be changed to a big/small value.
>>>>       */
>>>> -    if ((quota != 0) && (period != 0)) {
>>>> +    if (hypervisor_quota <= 0 && quota != 0 && period != 0) {
>>>>          if (((quota > old_quota) && (period > old_period)) ||
>>>>              ((quota < old_quota) && (period < old_period))) {
>>>>              /* modify period */
>>>> @@ -7063,40 +7078,44 @@ qemuSetVcpusBWLive(virDomainObjPtr vm, virCgroupPtr cgroup,
>>>>          }
>>>>      }
>>>>  
>>>> -    if (((vm_quota != 0) && (vm_quota > old_quota)) ||
>>>> -        ((period != 0) && (period < old_period)))
>>>> -        /* Set cpu bandwidth for the vm */
>>>> -        if (qemuSetupCgroupVcpuBW(cgroup, period, vm_quota) < 0)
>>>> -            goto cleanup;
>>>> -
>>>> -    /* If we does not know VCPU<->PID mapping or all vcpu runs in the same
>>>> -     * thread, we cannot control each vcpu. So we only modify cpu bandwidth
>>>> -     * when each vcpu has a separated thread.
>>>> +    /*
>>>> +     * If the hypervisor threads are not limited, set cpu bandwidth for the
>>>> +     * vm.
>>>>       */
>>>> -    if (priv->nvcpupids != 0 && priv->vcpupids[0] != vm->pid) {
>>>> -        for (i = 0; i < priv->nvcpupids; i++) {
>>>> -            rc = virCgroupForVcpu(cgroup, i, &cgroup_vcpu, 0);
>>>> -            if (rc < 0) {
>>>> -                virReportSystemError(-rc,
>>>> -                                     _("Unable to find vcpu cgroup for %s(vcpu:"
>>>> -                                       " %d)"),
>>>> -                                     vm->def->name, i);
>>>> -                goto cleanup;
>>>> -            }
>>>> -
>>>> -            if (qemuSetupCgroupVcpuBW(cgroup_vcpu, period, quota) < 0)
>>>> +    if (vm->def->cputune.hypervisor_quota <= 0) {
>>>> +        if (((vm_quota != 0) && (vm_quota > old_quota)) ||
>>>> +            ((period != 0) && (period < old_period)))
>>>> +            if (qemuSetupCgroupVcpuBW(cgroup, period, vm_quota) < 0)
>>>>                  goto cleanup;
>>>> +    }
>>>>  
>>>> -            virCgroupFree(&cgroup_vcpu);
>>>> +    /* each vcpu has a separated thread, so we modify cpu bandwidth for vcpu. */
>>>> +    for (i = 0; i < priv->nvcpupids; i++) {
>>>> +        rc = virCgroupForVcpu(cgroup, i, &cgroup_vcpu, 0);
>>>> +        if (rc < 0) {
>>>> +            virReportSystemError(-rc,
>>>> +                                 _("Unable to find vcpu cgroup for %s(vcpu:"
>>>> +                                   " %d)"),
>>>> +                                 vm->def->name, i);
>>>> +            goto cleanup;
>>>>          }
>>>> -    }
>>>>  
>>>> -    if (((vm_quota != 0) && (vm_quota <= old_quota)) ||
>>>> -        ((period != 0) && (period >= old_period)))
>>>> -        /* Set cpu bandwidth for the vm */
>>>> -        if (qemuSetupCgroupVcpuBW(cgroup, period, vm_quota) < 0)
>>>> +        if (qemuSetupCgroupVcpuBW(cgroup_vcpu, period, quota) < 0)
>>>>              goto cleanup;
>>>>  
>>>> +        virCgroupFree(&cgroup_vcpu);
>>>> +    }
>>>> +
>>>> +    /*
>>>> +     * If the hypervisor threads are not limited, set cpu bandwidth for the vm.
>>>> +     */
>>>
>>> This makes me confused. Why do we have to limit the whole vm when the
>>> hypervisor threads are not limited? Users may want to limit the vcpus
>>> only.
>>>
>>> We have 3 situations now:
>>>
>>> 1. limit the vcpus only.
>>> 2. limit the hypervisor but not vcpus.
>>> 3. limit the whole vm.
>>
>> Before this patch, we limit the whole vm when we limit the vcpus
>> because we donot want to the hypervisor threads eat too much cpu
>> time.
>>
>> So, if we donot limit the hypervisor, the behavior shoule not be
>> changed. So we should limit the whole vm. If the hypervisor threads
>> are limited, there is no need to limit the whole vm.
>>
> 
> 
> need to tune hypervisor quota to limit vcpu-only quota ?
> Sounds strange to me.

No, current implemetion is:
1. limit vcpu and hypervisor: vm is not limited
2. limit vcpu only: vm is limited
3. limit hypervisor: vm is not limited
4. vcpu and hypervisor are not limited: vm is not limited.

Thanks
Wen Congyang

> 
> Thanks,
> -Kame
> 
> 
> 
> 
> 
> 




More information about the libvir-list mailing list