[libvirt] [PATCH 2/3] Qemu: add CMT support

Ren, Qiaowei qiaowei.ren at intel.com
Sat Oct 31 13:08:03 UTC 2015


> -----Original Message-----
> From: Jiri Denemark [mailto:jdenemar at redhat.com]
> Sent: Thursday, October 29, 2015 5:52 PM
> To: Ren, Qiaowei
> Cc: libvir-list at redhat.com; Feng, Shaohe
> Subject: Re: [libvirt] [PATCH 2/3] Qemu: add CMT support
> 
> On Thu, Oct 29, 2015 at 14:02:29 +0800, Qiaowei Ren wrote:
> > One RFC in
> > https://www.redhat.com/archives/libvir-list/2015-June/msg01509.html
> >
> > CMT (Cache Monitoring Technology) can be used to measure the usage of
> > cache by VM running on the host. This patch will extend the bulk stats
> > API (virDomainListGetStats) to add this field. Applications based on
> > libvirt can use this API to achieve cache usage of VM. Because CMT
> > implementation in Linux kernel is based on perf mechanism, this patch
> > will enable perf event for CMT when VM is created and disable it when
> > VM is destroyed.
> >
> > Signed-off-by: Qiaowei Ren <qiaowei.ren at intel.com>
> > ---
> >  include/libvirt/libvirt-domain.h |  1 +
> >  src/qemu/qemu_domain.h           |  3 ++
> >  src/qemu/qemu_driver.c           | 48 ++++++++++++++++++++++
> >  src/qemu/qemu_process.c          | 86
> ++++++++++++++++++++++++++++++++++++++++
> >  4 files changed, 138 insertions(+)
> >
> > diff --git a/include/libvirt/libvirt-domain.h
> > b/include/libvirt/libvirt-domain.h
> > index e8202cf..fb5e1f4 100644
> > --- a/include/libvirt/libvirt-domain.h
> > +++ b/include/libvirt/libvirt-domain.h
> > @@ -1764,6 +1764,7 @@ typedef enum {
> >      VIR_DOMAIN_STATS_VCPU = (1 << 3), /* return domain virtual CPU info */
> >      VIR_DOMAIN_STATS_INTERFACE = (1 << 4), /* return domain interfaces
> info */
> >      VIR_DOMAIN_STATS_BLOCK = (1 << 5), /* return domain block info */
> > +    VIR_DOMAIN_STATS_CACHE = (1 << 6), /* return domain block info */
> 
> This flag is not documented anywhere and the comment is completely wrong.
> 
> >  } virDomainStatsTypes;
> >
> >  typedef enum {
> > diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h index
> > 54e1e7b..31bce33 100644
> > --- a/src/qemu/qemu_domain.h
> > +++ b/src/qemu/qemu_domain.h
> > @@ -196,6 +196,9 @@ struct _qemuDomainObjPrivate {
> >
> >      bool hookRun;  /* true if there was a hook run over this domain
> > */
> >
> > +    int cmt_fd;  /* perf handler for CMT */
> 
> So you declare cmt_fd as int...
> 
> > +
> > +
> 
> Why two empty lines?
> 
> >      /* Bitmaps below hold data from the auto NUMA feature */
> >      virBitmapPtr autoNodeset;
> >      virBitmapPtr autoCpuset;
> > diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index
> > 4cfae03..8c678c9 100644
> > --- a/src/qemu/qemu_driver.c
> > +++ b/src/qemu/qemu_driver.c
> > @@ -19320,6 +19320,53 @@ qemuDomainGetStatsBlock(virQEMUDriverPtr
> > driver,
> >
> >  #undef QEMU_ADD_COUNT_PARAM
> >
> > +static int
> > +qemuDomainGetStatsCache(virQEMUDriverPtr driver ATTRIBUTE_UNUSED,
> > +                        virDomainObjPtr dom,
> > +                        virDomainStatsRecordPtr record,
> > +                        int *maxparams,
> > +                        unsigned int privflags ATTRIBUTE_UNUSED) {
> > +    qemuDomainObjPrivatePtr priv = dom->privateData;
> > +    FILE *fd;
> > +    unsigned long long cache = 0;
> > +    int scaling_factor = 0;
> > +
> > +    if (priv->cmt_fd <= 0)
> 
> Shouldn't this only check for cmt_fd < 0?
> 
> > +        return -1;
> > +
> > +    if (read(priv->cmt_fd, &cache, sizeof(uint64_t)) < 0) {
> 
> Shouldn't cache be declared as uint64_t then?
> 
> > +        virReportSystemError(errno, "%s",
> > +                             _("Unable to read cache data"));
> > +        return -1;
> > +    }
> > +
> > +    fd = fopen("/sys/devices/intel_cqm/events/llc_occupancy.scale", "r");
> > +    if (!fd) {
> > +        virReportSystemError(errno, "%s",
> > +                             _("Unable to open CMT scale file"));
> > +        return -1;
> > +    }
> > +    if (fscanf(fd, "%d", &scaling_factor) != 1) {
> > +        virReportSystemError(errno, "%s",
> > +                             _("Unable to read CMT scale file"));
> > +        VIR_FORCE_FCLOSE(fd);
> > +        return -1;
> > +    }
> > +    VIR_FORCE_FCLOSE(fd);
> > +
> > +    cache *= scaling_factor;
> > +
> > +    if (virTypedParamsAddULLong(&record->params,
> > +                                &record->nparams,
> > +                                maxparams,
> > +                                "cache.current",
> > +                                cache) < 0)
> 
> Any documentation of this new statistics is missing. The commit message
> doesn't really help understanding it either.
> 
> > +        return -1;
> > +
> > +    return 0;
> > +}
> > +
> >  typedef int
> >  (*qemuDomainGetStatsFunc)(virQEMUDriverPtr driver,
> >                            virDomainObjPtr dom, @@ -19340,6 +19387,7
> > @@ static struct qemuDomainGetStatsWorker qemuDomainGetStatsWorkers[]
> = {
> >      { qemuDomainGetStatsVcpu, VIR_DOMAIN_STATS_VCPU, false },
> >      { qemuDomainGetStatsInterface, VIR_DOMAIN_STATS_INTERFACE, false },
> >      { qemuDomainGetStatsBlock, VIR_DOMAIN_STATS_BLOCK, true },
> > +    { qemuDomainGetStatsCache, VIR_DOMAIN_STATS_CACHE, false },
> >      { NULL, 0, false }
> >  };
> >
> > diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index
> > ba84182..00b889d 100644
> > --- a/src/qemu/qemu_process.c
> > +++ b/src/qemu/qemu_process.c
> > @@ -25,8 +25,11 @@
> >  #include <unistd.h>
> >  #include <signal.h>
> >  #include <sys/stat.h>
> > +#include <sys/ioctl.h>
> > +#include <sys/syscall.h>
> >  #if defined(__linux__)
> >  # include <linux/capability.h>
> > +# include <linux/perf_event.h>
> 
> What if there's no perf_event.h header file?
> 
> >  #elif defined(__FreeBSD__)
> >  # include <sys/param.h>
> >  # include <sys/cpuset.h>
> > @@ -4274,6 +4277,75 @@ qemuLogOperation(virDomainObjPtr vm,
> >      goto cleanup;
> >  }
> >
> > +/*
> > + * Enable CMT(Cache Monitoring Technology) to measure the usage of
> > + * cache by VM running on the node.
> > + *
> > + * Because the hypervisor implement CMT support basedon perf
> > +mechanism,
> > + * we should enable perf event for CMT. The function 'sys_erf_event_open'
> > + * is perf syscall wrapper.
> > + */
> > +#ifdef __linux__
> > +static long sys_perf_event_open(struct perf_event_attr *hw_event,
> > +                                pid_t pid, int cpu, int group_fd,
> 
> One parameter per line is easier to read.
> 
> > +                                unsigned long flags) {
> > +    return syscall(__NR_perf_event_open, hw_event, pid, cpu,
> > +                        group_fd, flags);
> 
> Wrong indentation.
> 
> > +}
> > +static int qemuCmtEnable(virDomainObjPtr vm) {
> > +    qemuDomainObjPrivatePtr priv = vm->privateData;
> > +    struct perf_event_attr cmt_attr;
> > +    int event_type;
> > +    FILE *fp;
> > +
> > +    fp = fopen("/sys/devices/intel_cqm/type", "r");
> > +    if (!fp) {
> > +        virReportSystemError(errno, "%s",
> > +                             _("CMT is not available on this host"));
> > +        return -1;
> > +    }
> > +    if (fscanf(fp, "%d", &event_type) != 1) {
> > +        virReportSystemError(errno, "%s",
> > +                             _("Unable to read event type file."));
> > +        VIR_FORCE_FCLOSE(fp);
> > +        return -1;
> > +    }
> > +    VIR_FORCE_FCLOSE(fp);
> > +
> > +    memset(&cmt_attr, 0, sizeof(struct perf_event_attr));
> > +    cmt_attr.size = sizeof(struct perf_event_attr);
> > +    cmt_attr.type = event_type;
> > +    cmt_attr.config = 1;
> > +    cmt_attr.inherit = 1;
> > +    cmt_attr.disabled = 1;
> > +    cmt_attr.enable_on_exec = 0;
> > +
> > +    priv->cmt_fd = sys_perf_event_open(&cmt_attr, vm->pid, -1, -1,
> > + 0);
> 
> ...while here you assign long (from sys_perf_event_open) to cmt_fd.
> 
> > +    if (priv->cmt_fd < 0) {
> > +        virReportSystemError(errno,
> > +                             _("Unable to open perf type=%d for pid=%d"),
> > +                             event_type, vm->pid);
> > +        return -1;
> > +    }
> > +
> > +    if (ioctl(priv->cmt_fd, PERF_EVENT_IOC_ENABLE) < 0) {
> > +        virReportSystemError(errno, "%s",
> > +                             _("Unable to enable perf event for CMT"));
> > +        return -1;
> > +    }
> > +
> > +    return 0;
> > +}
> > +#else
> > +static int qemuCmtEnable(virDomainObjPtr vm) {
> > +    virReportUnsupportedError();
> > +    return -1;
> > +}
> > +#endif
> > +
> >  int qemuProcessStart(virConnectPtr conn,
> >                       virQEMUDriverPtr driver,
> >                       virDomainObjPtr vm, @@ -4954,6 +5026,11 @@ int
> > qemuProcessStart(virConnectPtr conn,
> >      if (virDomainSaveStatus(driver->xmlopt, cfg->stateDir, vm) < 0)
> >          goto cleanup;
> >
> > +    VIR_DEBUG("Setting CMT perf counter");
> > +    if (qemuCmtEnable(vm) < 0)
> > +        virReportSystemError(errno, "%s",
> > +                             _("CMT is not available on this host"));
> > +
> 
> By setting en error here, you overwrite any useful error set inside
> qemuCmtEnable. Anyway since you apparently want to make the error non fatal,
> you should not even set it. The messages should be logged as warnings or even
> lower.
> 
> If there is no performance or other impact of having cmt enabled, we could
> enable it on hosts that support it, but we either need to log any errors with info
> level or first check whether the host supports cmt and only try to enable it if it
> does, in which case we could report the errors as warnings.
> 
> However, if a negative impact of enabling cmt is possible, we need an explicit
> configuration option (turned off by default) that would ask libvirt to enable cmt.
> Any errors could be logged as warnings in this case.
> 
> And what if libvirtd is restarted? Shouldn't we call sys_perf_event_open again?
> 

Jirka, thanks for your nice feedback, and I will fix these problems in next patch series.

In addition, performance events won't slow down kernel and applications and so we could enable it on hosts that support it. Certainly this patch only check if kernel support cmt feature and does not check if host cpu support it through /proc/cpuinfo ...

If libvirtd is restarted, perf handler need to be got again. I will think it over next version.

> >      /* finally we can call the 'started' hook script if any */
> >      if (virHookPresent(VIR_HOOK_DRIVER_QEMU)) {
> >          char *xml = qemuDomainDefFormatXML(driver, vm->def, 0); @@
> > -5122,6 +5199,15 @@ void qemuProcessStop(virQEMUDriverPtr driver,
> >      virPortAllocatorRelease(driver->migrationPorts, priv->nbdPort);
> >      priv->nbdPort = 0;
> >
> > +    /* Disable CMT */
> > +    if (priv->cmt_fd > 0) {
> 
> cmt_fd >= 0 ?
> 
> > +        if (ioctl(priv->cmt_fd, PERF_EVENT_IOC_DISABLE) < 0) {
> > +            virReportSystemError(errno, "%s",
> > +                                 _("Unable to disable perf event for
> > + CMT"));
> 
> A warning should be good enough.
> 
> > +        }
> > +        VIR_FORCE_CLOSE(priv->cmt_fd);
> > +    }
> > +
> 
> This should go into a dedicated function.
> 
> >      if (priv->agent) {
> >          qemuAgentClose(priv->agent);
> >          priv->agent = NULL;
> 
> Jirka




More information about the libvir-list mailing list