[libvirt] [PATCHv8 16/17] qemu: Report cache occupancy (CMT) with domstats

John Ferlan jferlan at redhat.com
Wed Nov 14 16:18:26 UTC 2018



On 11/12/18 8:31 AM, Wang Huaqiang wrote:
> Adding the interface in qemu to report CMT statistic information
> through command 'virsh domstats --cpu-total'.
> 
> Below is a typical output:
> 
>          # virsh domstats 1 --cpu-total
>          Domain: 'ubuntu16.04-base'
>            ...
>            cpu.cache.monitor.count=2
>            cpu.cache.monitor.0.name=vcpus_1
>            cpu.cache.monitor.0.vcpus=1
>            cpu.cache.monitor.0.bank.count=2
>            cpu.cache.monitor.0.bank.0.id=0
>            cpu.cache.monitor.0.bank.0.bytes=4505600
>            cpu.cache.monitor.0.bank.1.id=1
>            cpu.cache.monitor.0.bank.1.bytes=5586944
>            cpu.cache.monitor.1.name=vcpus_4-6
>            cpu.cache.monitor.1.vcpus=4,5,6
>            cpu.cache.monitor.1.bank.count=2
>            cpu.cache.monitor.1.bank.0.id=0
>            cpu.cache.monitor.1.bank.0.bytes=17571840
>            cpu.cache.monitor.1.bank.1.id=1
>            cpu.cache.monitor.1.bank.1.bytes=29106176
> 
> Signed-off-by: Wang Huaqiang <huaqiang.wang at intel.com>
> ---
>  src/libvirt-domain.c   |   9 +++
>  src/qemu/qemu_driver.c | 198 +++++++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 207 insertions(+)
> 
> diff --git a/src/libvirt-domain.c b/src/libvirt-domain.c
> index 7690339..4895f9f 100644
> --- a/src/libvirt-domain.c
> +++ b/src/libvirt-domain.c
> @@ -11345,6 +11345,15 @@ virConnectGetDomainCapabilities(virConnectPtr conn,
>   *     "cpu.user" - user cpu time spent in nanoseconds as unsigned long long.
>   *     "cpu.system" - system cpu time spent in nanoseconds as unsigned long
>   *                    long.
> + *     "cpu.cache.monitor.count" - tocal cache monitoring groups
> + *     "cpu.cache.monitor.M.name" - name of cache monitoring group 'M'
> + *     "cpu.cache.monitor.M.vcpus" - vcpus for cache monitoring group 'M'
> + *     "cpu.cache.monitor.M.bank.count" - total bank number of cache monitoring
> + *                    group 'M'
> + *     "cpu.cache.monitor.M.bank.N.id" - OS assigned cache bank id for cache
> + *                    'N' in cache monitoring group 'M'
> + *     "cpu.cache.monitor.M.bank.N.bytes" - monitor's cache occupancy of cache
> + *                    bank 'N' in cache monitoring group 'M'

I'll comment on these in your update...

>   *
>   * VIR_DOMAIN_STATS_BALLOON:
>   *     Return memory balloon device information.
> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
> index 89d46ee..d41ae66 100644
> --- a/src/qemu/qemu_driver.c
> +++ b/src/qemu/qemu_driver.c
> @@ -19698,6 +19698,199 @@ typedef enum {
>  #define HAVE_JOB(flags) ((flags) & QEMU_DOMAIN_STATS_HAVE_JOB)
>  
>  
> +typedef struct _virQEMUCpuResMonitorData virQEMUCpuResMonitorData;
> +typedef virQEMUCpuResMonitorData *virQEMUCpuResMonitorDataPtr;
> +struct _virQEMUCpuResMonitorData{

Data {

> +    const char *name;
> +    char *vcpus;
> +    virResctrlMonitorType tag;
> +    virResctrlMonitorStatsPtr stats;
> +    size_t nstats;
> +};
> +
> +
> +static int
> +qemuDomainGetCpuResMonitorData(virDomainObjPtr dom,
> +                               virQEMUCpuResMonitorDataPtr mondata)
> +{
> +    virDomainResctrlDefPtr resctrl = NULL;
> +    size_t i = 0;
> +    size_t j = 0;
> +    size_t l = 0;
> +
> +    for (i = 0; i < dom->def->nresctrls; i++) {
> +        resctrl = dom->def->resctrls[i];
> +
> +        for (j = 0; j < resctrl->nmonitors; j++) {
> +            virDomainResctrlMonDefPtr domresmon = NULL;
> +            virResctrlMonitorPtr monitor = resctrl->monitors[j]->instance;
> +
> +            domresmon = resctrl->monitors[j];
> +            mondata[l].tag = domresmon->tag;

Why "l" (BTW: 'k' would be preferred because '1' and 'l' are very
difficult to delineate).

> +
> +            /* If virBitmapFormat successfully returns an vcpu string, then
> +             * mondata[l].vcpus is assigned with an memory space holding it,
> +             * let this newly allocated memory buffer to be freed along with
> +             * the free of 'mondata' */
> +            if (!(mondata[l].vcpus = virBitmapFormat(domresmon->vcpus)))
> +                return -1;
> +
> +            if (!(mondata[l].name = virResctrlMonitorGetID(monitor))) {
> +                virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> +                               _("Could not get monitor ID"));
> +                return -1;
> +            }
> +
> +            if (domresmon->tag == VIR_RESCTRL_MONITOR_TYPE_CACHE) {

Something doesn't quite add up with this... Since we're only filling in
types with 'cache' types and erroring out otherwise ... see [1] data
points below...

> +                if (virResctrlMonitorGetCacheOccupancy(monitor,
> +                                                       &mondata[l].stats,
> +                                                       &mondata[l].nstats) < 0)
> +                    return -1;
> +            } else {
> +                virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> +                               _("Invalid CPU resource type"));
> +                return -1;

[1] Perhaps this should be done up front and "avoided" since this helper
should only care above getting cache stats...

IOW:

       for (j = 0; j < resctrl->nmonitors; j++) {
           virDomainResctrlMonDefPtr domresmon = NULL;
           virResctrlMonitorPtr monitor = resctrl->monitors[j]->instance;

           domresmon = resctrl->monitors[j];

           if (domresmon->tag != VIR_RESCTRL_MONITOR_TYPE_CACHE)
               continue;

This this loop only fetches cache information and then the 'l' (or
preferably 'k') makes more sense

> +            }
> +
> +            l++;
> +        }
> +    }
> +
> +    return 0;
> +}
> +
> +

Might be nice to add comments...

> +static int
> +qemuDomainGetStatsCpuResMonitorPerTag(virQEMUCpuResMonitorDataPtr mondata,
> +                                      size_t nmondata,
> +                                      virResctrlMonitorType tag,
> +                                      virDomainStatsRecordPtr record,
> +                                      int *maxparams)
> +{
> +    char param_name[VIR_TYPED_PARAM_FIELD_LENGTH];
> +    unsigned int nmonitors = 0;
> +    const char *resname = NULL;
> +    const char *resnodename = NULL;
> +    size_t i = 0;
> +
> +    for (i = 0; i < nmondata; i++) {
> +        if (mondata[i].tag == tag)
> +            nmonitors++;
> +    }
> +
> +    if (!nmonitors)
> +        return 0;

I'd place the above two below the following hunk - perf wise and
logically since @tag is the important piece here.  However, it may not
be important to compare the [i].tag == tag as long as you've done your
collection of *only* one type.  Hope this makes sense!

Thus your code is just:

    if (nmondata == 0)
        return 0;

> +
> +    if (tag == VIR_RESCTRL_MONITOR_TYPE_CACHE) {
> +        resname = "cache";
> +        resnodename = "bank";
> +    } else if (tag == VIR_RESCTRL_MONITOR_TYPE_MEMBW) {
> +        resname = "memBW";
> +        resnodename = "node";

[1] In the current scheme of qemuDomainGetStatsCpuResMonitor, how could
we get this tag?

If your goal was to make a "generic" printing API to be used by both
cpustats and memstats (eventually), then perhaps the name of the helper
should just be qemuDomainGetStatsResMonitor (or *MonitorParams). Since
@tag is a parameter and it's fairly easy to see it's use and the
formatting of the params is based purely on the tag in the generically
output data.

The helper should also be appropriately placed in qemu_driver.c such
that when memBW stats support is added it can just be used and doesn't
need to be moved.

> +    } else {
> +        return 0;
> +    }
> +
> +    snprintf(param_name, VIR_TYPED_PARAM_FIELD_LENGTH,
> +             "cpu.%s.monitor.count", resname);
> +    if (virTypedParamsAddUInt(&record->params, &record->nparams,
> +                              maxparams, param_name, nmonitors) < 0)
> +        return -1;
> +
> +    for (i = 0; i < nmonitors; i++) {
> +        size_t l = 0;

Similar 'l' concerns here use 'j' instead

> +
> +        snprintf(param_name, VIR_TYPED_PARAM_FIELD_LENGTH,
> +                 "cpu.%s.monitor.%zd.name", resname, i);
> +        if (virTypedParamsAddString(&record->params,
> +                                    &record->nparams,
> +                                    maxparams,
> +                                    param_name,
> +                                    mondata[i].name) < 0)
> +            return -1;
> +
> +        snprintf(param_name, VIR_TYPED_PARAM_FIELD_LENGTH,
> +                 "cpu.%s.monitor.%zd.vcpus", resname, i);
> +        if (virTypedParamsAddString(&record->params, &record->nparams,
> +                                    maxparams, param_name,
> +                                    mondata[i].vcpus) < 0)
> +            return -1;
> +
> +        snprintf(param_name, VIR_TYPED_PARAM_FIELD_LENGTH,
> +                 "cpu.%s.monitor.%zd.%s.count", resname, i, resnodename);
> +        if (virTypedParamsAddUInt(&record->params, &record->nparams,
> +                                  maxparams, param_name,
> +                                  mondata[i].nstats) < 0)
> +            return -1;
> +
> +        for (l = 0; l < mondata[i].nstats; l++) {
> +            snprintf(param_name, VIR_TYPED_PARAM_FIELD_LENGTH,
> +                     "cpu.%s.monitor.%zd.%s.%zd.id",
> +                     resname, i, resnodename, l);
> +            if (virTypedParamsAddUInt(&record->params, &record->nparams,
> +                                      maxparams, param_name,
> +                                      mondata[i].stats[l].id) < 0)
> +                return -1;
> +
> +            snprintf(param_name, VIR_TYPED_PARAM_FIELD_LENGTH,
> +                     "cpu.%s.monitor.%zd.%s.%zd.bytes",
> +                     resname, i, resnodename, l);
> +            if (virTypedParamsAddUInt(&record->params, &record->nparams,
> +                                      maxparams, param_name,
> +                                      mondata[i].stats[l].val) < 0)
> +                return -1;
> +        }
> +    }
> +
> +    return 0;
> +}
> +
> +
> +static int
> +qemuDomainGetStatsCpuResMonitor(virDomainObjPtr dom,
> +                                virDomainStatsRecordPtr record,
> +                                int *maxparams)
> +{
> +    virDomainResctrlDefPtr resctrl = NULL;
> +    virQEMUCpuResMonitorDataPtr mondata = NULL;
> +    unsigned int nmonitors = 0;
> +    size_t i = 0;
> +    int ret = -1;
> +
> +    if (!virDomainObjIsActive(dom))
> +        return 0;
> +
> +    for (i = 0; i < dom->def->nresctrls; i++) {
> +        resctrl = dom->def->resctrls[i];
> +        nmonitors += resctrl->nmonitors;

[1] To further the above conversation, @nmonitors won't be limited by
cache stats only, but the collection of the @mondata is. So I think here
nmonitors should only include tags w/ *TYPE_CACHE.

> +    }
> +
> +    if (!nmonitors)
> +        return 0;
> +
> +    if (VIR_ALLOC_N(mondata, nmonitors) < 0)
> +        return -1;
> +
> +    if (qemuDomainGetCpuResMonitorData(dom, mondata) < 0)

You could pass @nmonitors here and use as 'nmondata' in the above
function to "ensure" that the nmonitors looping and filling of mondata
doesn't go beyond bounds - although that shouldn't happen, so it's not a
requirement as long as the algorithm to calculate @nmonitors and
allocate @mondata is the same algorithm used to fill in @mondata.

> +        goto cleanup;
> +
> +    for (i = VIR_RESCTRL_MONITOR_TYPE_UNSUPPORT + 1;
> +         i < VIR_RESCTRL_MONITOR_TYPE_LAST; i++) {
> +        if (qemuDomainGetStatsCpuResMonitorPerTag(mondata, nmonitors, i,
> +                                                  record, maxparams) < 0)
> +            goto cleanup;
> +    }

Similar comment here - this is only being called from
qemuDomainGetStatsCpu thus it should only pass
VIR_RESCTRL_MONITOR_TYPE_CACHE and not be run in a loop. If eventually
memBW data was filled in - it would be printed next to cpu-stats data
and that doesn't necessarily make sense, does it?


> +
> +    ret = 0;
> + cleanup:
> +    for (i = 0; i < nmonitors; i++)
> +        VIR_FREE(mondata[i].vcpus);
> +    VIR_FREE(mondata);
> +
> +    return ret;
> +}
> +
> +
>  static int
>  qemuDomainGetStatsCpuCgroup(virDomainObjPtr dom,
>                              virDomainStatsRecordPtr record,
> @@ -19747,6 +19940,11 @@ qemuDomainGetStatsCpu(virQEMUDriverPtr driver ATTRIBUTE_UNUSED,
>  {
>      if (qemuDomainGetStatsCpuCgroup(dom, record, maxparams) < 0)
>          return -1;
> +
> +    if (qemuDomainGetStatsCpuResMonitor(dom, record, maxparams) < 0)
> +        return -1;
> +
> +    return 0;

This obviously would have some differences based on my comments from
patch15.

Rather than have you post patches 1-15 again just to fix 16, I'll push
1-15 and let you work on 16 and 17.  We still have time before the next
release.

Besides once I push, we'll find out fairly quickly whether some other
arch has build/compile problems!

John

>  }
>  
>  
> 




More information about the libvir-list mailing list