[libvirt] [PATCH 5/6] qemu: Implement hypervisor's period and quota tunable XML configuration and parsing

KAMEZAWA Hiroyuki kamezawa.hiroyu at jp.fujitsu.com
Thu May 17 04:46:59 UTC 2012


(2012/05/17 12:54), Wen Congyang wrote:

> At 05/17/2012 11:13 AM, Hu Tao Wrote:
>> On Wed, Apr 25, 2012 at 05:46:06PM +0800, Wen Congyang wrote:
>>> set hypervisor's period and quota when the vm starts. It will affect to set
>>> vm's period and quota: donot set vm's period and quota if we limit hypevisor
>>> thread's bandwidth(hypervisor_quota > 0).
>>> ---
>>>  src/conf/domain_conf.c |   25 ++++++++++++++-
>>>  src/conf/domain_conf.h |    2 +
>>>  src/qemu/qemu_cgroup.c |   37 ++++++++++++++++-------
>>>  src/qemu/qemu_driver.c |   75 ++++++++++++++++++++++++++++++------------------
>>>  4 files changed, 98 insertions(+), 41 deletions(-)
>>>
>>> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
>>> index 5ab052a..28a6436 100644
>>> --- a/src/conf/domain_conf.c
>>> +++ b/src/conf/domain_conf.c
>>> @@ -7931,6 +7931,14 @@ static virDomainDefPtr virDomainDefParseXML(virCapsPtr caps,
>>>                           &def->cputune.quota) < 0)
>>>          def->cputune.quota = 0;
>>>  
>>> +    if (virXPathULongLong("string(./cputune/hypervisor_period[1])", ctxt,
>>> +                          &def->cputune.hypervisor_period) < 0)
>>> +        def->cputune.hypervisor_period = 0;
>>> +
>>> +    if (virXPathLongLong("string(./cputune/hypervisor_quota[1])", ctxt,
>>> +                         &def->cputune.hypervisor_quota) < 0)
>>> +        def->cputune.hypervisor_quota = 0;
>>> +
>>>      if ((n = virXPathNodeSet("./cputune/vcpupin", ctxt, &nodes)) < 0) {
>>>          goto error;
>>>      }
>>> @@ -12406,7 +12414,8 @@ virDomainDefFormatInternal(virDomainDefPtr def,
>>>      virBufferAsprintf(buf, ">%u</vcpu>\n", def->maxvcpus);
>>>  
>>>      if (def->cputune.shares || def->cputune.vcpupin ||
>>> -        def->cputune.period || def->cputune.quota)
>>> +        def->cputune.period || def->cputune.quota ||
>>> +        def->cputune.hypervisor_period || def->cputune.hypervisor_quota)
>>>          virBufferAddLit(buf, "  <cputune>\n");
>>>  
>>>      if (def->cputune.shares)
>>> @@ -12418,6 +12427,17 @@ virDomainDefFormatInternal(virDomainDefPtr def,
>>>      if (def->cputune.quota)
>>>          virBufferAsprintf(buf, "    <quota>%lld</quota>\n",
>>>                            def->cputune.quota);
>>> +
>>> +    if (def->cputune.hypervisor_period)
>>> +        virBufferAsprintf(buf, "    <hypervisor_period>%llu"
>>> +                          "</hypervisor_period>\n",
>>> +                          def->cputune.hypervisor_period);
>>> +
>>> +    if (def->cputune.hypervisor_period)
>>> +        virBufferAsprintf(buf, "    <hypervisor_quota>%lld"
>>> +                          "</hypervisor_quota>\n",
>>> +                          def->cputune.hypervisor_quota);
>>> +
>>>      if (def->cputune.vcpupin) {
>>>          for (i = 0; i < def->cputune.nvcpupin; i++) {
>>>              virBufferAsprintf(buf, "    <vcpupin vcpu='%u' ",
>>> @@ -12439,7 +12459,8 @@ virDomainDefFormatInternal(virDomainDefPtr def,
>>>      }
>>>  
>>>      if (def->cputune.shares || def->cputune.vcpupin ||
>>> -        def->cputune.period || def->cputune.quota)
>>> +        def->cputune.period || def->cputune.quota ||
>>> +        def->cputune.hypervisor_period || def->cputune.hypervisor_quota)
>>>          virBufferAddLit(buf, "  </cputune>\n");
>>>  
>>>      if (def->numatune.memory.nodemask) {
>>> diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
>>> index 0eed60e..2a37e26 100644
>>> --- a/src/conf/domain_conf.h
>>> +++ b/src/conf/domain_conf.h
>>> @@ -1558,6 +1558,8 @@ struct _virDomainDef {
>>>          unsigned long shares;
>>>          unsigned long long period;
>>>          long long quota;
>>> +        unsigned long long hypervisor_period;
>>> +        long long hypervisor_quota;
>>>          int nvcpupin;
>>>          virDomainVcpuPinDefPtr *vcpupin;
>>>      } cputune;
>>> diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c
>>> index 727c0d4..7c6ef33 100644
>>> --- a/src/qemu/qemu_cgroup.c
>>> +++ b/src/qemu/qemu_cgroup.c
>>> @@ -493,17 +493,23 @@ int qemuSetupCgroupForVcpu(struct qemud_driver *driver, virDomainObjPtr vm)
>>>          goto cleanup;
>>>      }
>>
>> Check if cgroup CPU is active here:
>>
>>        if (!qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) {
>>            virReportSystemError(EINVAL,
>> 	                        _("%s"),
>> 				"Cgroup CPU is not active.");
>>            goto cleanup;
>>        }
>>
>> and remove all following checks in this function.
>>
>>>  
>>> -    /* Set cpu bandwidth for the vm */
>>> -    if (period || quota) {
>>> -        if (qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) {
>>> -            /* Ensure that we can multiply by vcpus without overflowing. */
>>> -            if (quota > LLONG_MAX / vm->def->vcpus) {
>>> -                virReportSystemError(EINVAL,
>>> -                                     _("%s"),
>>> -                                     "Unable to set cpu bandwidth quota");
>>> -                goto cleanup;
>>> -            }
>>> +    /*
>>> +     * Set cpu bandwidth for the vm if any of the following is true:
>>> +     * 1. we donot know VCPU<->PID mapping or all vcpus run in the same thread
>>> +     * 2. the hypervisor threads are not limited(quota <= 0)
>>> +     */
>>> +    if ((period || quota) &&
>>> +        qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) {
>>> +        /* Ensure that we can multiply by vcpus without overflowing. */
>>> +        if (quota > LLONG_MAX / vm->def->vcpus) {
>>> +            virReportSystemError(EINVAL,
>>> +                                 _("%s"),
>>> +                                 "Unable to set cpu bandwidth quota");
>>> +            goto cleanup;
>>> +        }
>>>  
>>> +        if (priv->nvcpupids == 0 || priv->vcpupids[0] == vm->pid ||
>>> +            vm->def->cputune.hypervisor_quota <= 0) {
>>>              if (quota > 0)
>>>                  vm_quota = quota * vm->def->vcpus;
>>>              else
>>> @@ -514,7 +520,7 @@ int qemuSetupCgroupForVcpu(struct qemud_driver *driver, virDomainObjPtr vm)
>>>      }
>>>  
>>>      if (priv->nvcpupids == 0 || priv->vcpupids[0] == vm->pid) {
>>> -        /* If we does not know VCPU<->PID mapping or all vcpu runs in the same
>>> +        /* If we donot know VCPU<->PID mapping or all vcpus run in the same
>>>           * thread, we cannot control each vcpu.
>>>           */
>>>          virCgroupFree(&cgroup);
>>> @@ -570,6 +576,8 @@ int qemuSetupCgroupForHypervisor(struct qemud_driver *driver,
>>>      virCgroupPtr cgroup = NULL;
>>>      virCgroupPtr cgroup_hypervisor = NULL;
>>>      qemuDomainObjPrivatePtr priv = vm->privateData;
>>> +    unsigned long long period = vm->def->cputune.hypervisor_period;
>>> +    long long quota = vm->def->cputune.hypervisor_quota;
>>>      int rc;
>>>  
>>>      if (driver->cgroup == NULL)
>>> @@ -608,6 +616,13 @@ int qemuSetupCgroupForHypervisor(struct qemud_driver *driver,
>>>          goto cleanup;
>>>      }
>>>  
>>> +    if (period || quota) {
>>> +        if (qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) {
>>> +            if (qemuSetupCgroupVcpuBW(cgroup_hypervisor, period, quota) < 0)
>>
>> Actually, qemuSetupCgroupVcpuBW just changes the cgroup's
>> settings(period, quota), it doesn't care about what the
>> cgroup is associated with. So can we change the name to
>> qemuSetupCgroupBW?
>>
>>> +                goto cleanup;
>>> +        }
>>> +    }
>>> +
>>>      virCgroupFree(&cgroup_hypervisor);
>>>      virCgroupFree(&cgroup);
>>>      return 0;
>>> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
>>> index c3555ca..2e40aee 100644
>>> --- a/src/qemu/qemu_driver.c
>>> +++ b/src/qemu/qemu_driver.c
>>> @@ -7007,6 +7007,7 @@ qemuSetVcpusBWLive(virDomainObjPtr vm, virCgroupPtr cgroup,
>>>      int rc;
>>>      long long vm_quota = 0;
>>>      long long old_quota = 0;
>>> +    long long hypervisor_quota = vm->def->cputune.hypervisor_quota;
>>>      unsigned long long old_period = 0;
>>>  
>>>      if (period == 0 && quota == 0)
>>> @@ -7039,6 +7040,16 @@ qemuSetVcpusBWLive(virDomainObjPtr vm, virCgroupPtr cgroup,
>>>          goto cleanup;
>>>      }
>>>  
>>> +    /* If we donot know VCPU<->PID mapping or all vcpu runs in the same
>>> +     * thread, we cannot control each vcpu. So we only modify cpu bandwidth
>>> +     * for the vm.
>>> +     */
>>> +    if (priv->nvcpupids == 0 || priv->vcpupids[0] == vm->pid) {
>>> +        if (qemuSetupCgroupVcpuBW(cgroup, period, vm_quota) < 0)
>>> +            goto cleanup;
>>> +        return 0;
>>> +    }
>>> +
>>>      /*
>>>       * If quota will be changed to a small value, we should modify vcpu's quota
>>>       * first. Otherwise, we should modify vm's quota first.
>>> @@ -7048,8 +7059,12 @@ qemuSetVcpusBWLive(virDomainObjPtr vm, virCgroupPtr cgroup,
>>>       *
>>>       * If both quota and period will be changed to a big/small value, we cannot
>>>       * modify period and quota together.
>>> +     *
>>> +     * If the hypervisor threads are limited, we can update period for vm first,
>>> +     * and then we can modify period and quota for the vcpu together even if
>>> +     * they will be changed to a big/small value.
>>>       */
>>> -    if ((quota != 0) && (period != 0)) {
>>> +    if (hypervisor_quota <= 0 && quota != 0 && period != 0) {
>>>          if (((quota > old_quota) && (period > old_period)) ||
>>>              ((quota < old_quota) && (period < old_period))) {
>>>              /* modify period */
>>> @@ -7063,40 +7078,44 @@ qemuSetVcpusBWLive(virDomainObjPtr vm, virCgroupPtr cgroup,
>>>          }
>>>      }
>>>  
>>> -    if (((vm_quota != 0) && (vm_quota > old_quota)) ||
>>> -        ((period != 0) && (period < old_period)))
>>> -        /* Set cpu bandwidth for the vm */
>>> -        if (qemuSetupCgroupVcpuBW(cgroup, period, vm_quota) < 0)
>>> -            goto cleanup;
>>> -
>>> -    /* If we does not know VCPU<->PID mapping or all vcpu runs in the same
>>> -     * thread, we cannot control each vcpu. So we only modify cpu bandwidth
>>> -     * when each vcpu has a separated thread.
>>> +    /*
>>> +     * If the hypervisor threads are not limited, set cpu bandwidth for the
>>> +     * vm.
>>>       */
>>> -    if (priv->nvcpupids != 0 && priv->vcpupids[0] != vm->pid) {
>>> -        for (i = 0; i < priv->nvcpupids; i++) {
>>> -            rc = virCgroupForVcpu(cgroup, i, &cgroup_vcpu, 0);
>>> -            if (rc < 0) {
>>> -                virReportSystemError(-rc,
>>> -                                     _("Unable to find vcpu cgroup for %s(vcpu:"
>>> -                                       " %d)"),
>>> -                                     vm->def->name, i);
>>> -                goto cleanup;
>>> -            }
>>> -
>>> -            if (qemuSetupCgroupVcpuBW(cgroup_vcpu, period, quota) < 0)
>>> +    if (vm->def->cputune.hypervisor_quota <= 0) {
>>> +        if (((vm_quota != 0) && (vm_quota > old_quota)) ||
>>> +            ((period != 0) && (period < old_period)))
>>> +            if (qemuSetupCgroupVcpuBW(cgroup, period, vm_quota) < 0)
>>>                  goto cleanup;
>>> +    }
>>>  
>>> -            virCgroupFree(&cgroup_vcpu);
>>> +    /* each vcpu has a separated thread, so we modify cpu bandwidth for vcpu. */
>>> +    for (i = 0; i < priv->nvcpupids; i++) {
>>> +        rc = virCgroupForVcpu(cgroup, i, &cgroup_vcpu, 0);
>>> +        if (rc < 0) {
>>> +            virReportSystemError(-rc,
>>> +                                 _("Unable to find vcpu cgroup for %s(vcpu:"
>>> +                                   " %d)"),
>>> +                                 vm->def->name, i);
>>> +            goto cleanup;
>>>          }
>>> -    }
>>>  
>>> -    if (((vm_quota != 0) && (vm_quota <= old_quota)) ||
>>> -        ((period != 0) && (period >= old_period)))
>>> -        /* Set cpu bandwidth for the vm */
>>> -        if (qemuSetupCgroupVcpuBW(cgroup, period, vm_quota) < 0)
>>> +        if (qemuSetupCgroupVcpuBW(cgroup_vcpu, period, quota) < 0)
>>>              goto cleanup;
>>>  
>>> +        virCgroupFree(&cgroup_vcpu);
>>> +    }
>>> +
>>> +    /*
>>> +     * If the hypervisor threads are not limited, set cpu bandwidth for the vm.
>>> +     */
>>
>> This makes me confused. Why do we have to limit the whole vm when the
>> hypervisor threads are not limited? Users may want to limit the vcpus
>> only.
>>
>> We have 3 situations now:
>>
>> 1. limit the vcpus only.
>> 2. limit the hypervisor but not vcpus.
>> 3. limit the whole vm.
> 
> Before this patch, we limit the whole vm when we limit the vcpus
> because we donot want to the hypervisor threads eat too much cpu
> time.
> 
> So, if we donot limit the hypervisor, the behavior shoule not be
> changed. So we should limit the whole vm. If the hypervisor threads
> are limited, there is no need to limit the whole vm.
> 


need to tune hypervisor quota to limit vcpu-only quota ?
Sounds strange to me.

Thanks,
-Kame








More information about the libvir-list mailing list