[libvirt] PATCH: Support vCPU pinning in QEMU driver

Daniel P. Berrange berrange at redhat.com
Fri May 16 21:47:40 UTC 2008


KVM added ability to get the thread ID for vCPUs via the monitor

(qemu) info cpus
* CPU #0: pc=0x00000000000ffff0 thread_id=11463
  CPU #1: pc=0x00000000fffffff0 thread_id=11464
  CPU #2: pc=0x00000000fffffff0 thread_id=11465

With this we have enough information to be able to support vCPU pinning in
the QEMU driver for KVM. For QEMU/KQEMU it is trivial, since they have a
single thread.

The following patch implements CPU pinning and fetching of CPU affinity
information.  In this example I pin one of the 2 cpus in a guest:

[berrange at t60wlan libvirt-numa]$ ./src/virsh --connect qemu:///system start VirtTest
Domain VirtTest started

[berrange at t60wlan libvirt-numa]$ ./src/virsh --connect qemu:///system vcpuinfo VirtTest
VCPU:           0
CPU:            0
State:          running
CPU Affinity:   yy

VCPU:           1
CPU:            0
State:          running
CPU Affinity:   yy

[berrange at t60wlan libvirt-numa]$ ./src/virsh --connect qemu:///system vcpupin VirtTest 1 0

[berrange at t60wlan libvirt-numa]$ ./src/virsh --connect qemu:///system vcpuinfo VirtTest
VCPU:           0
CPU:            0
State:          running
CPU Affinity:   yy

VCPU:           1
CPU:            0
State:          running
CPU Affinity:   y-



This is implemented using sched_setaffinity/sched_getaffinity which are 
Linux specific. There doesn't appear to be a portable process affinity
API in POSIX.

If the KVM instance does not support the 'thread_id' data in 'info cpus',
we simply print out a suitable error message. We detect the mapping at
startup and cache it thereafter.

Dan.

diff -r 0f537442ce97 src/qemu_conf.h
--- a/src/qemu_conf.h	Fri May 16 16:09:57 2008 -0400
+++ b/src/qemu_conf.h	Fri May 16 17:39:29 2008 -0400
@@ -328,6 +328,9 @@
     int *tapfds;
     int ntapfds;
 
+    int nvcpupids;
+    int *vcpupids;
+
     int qemuVersion;
     int qemuCmdFlags; /* values from enum qemud_cmd_flags */
 
diff -r 0f537442ce97 src/qemu_driver.c
--- a/src/qemu_driver.c	Fri May 16 16:09:57 2008 -0400
+++ b/src/qemu_driver.c	Fri May 16 17:39:29 2008 -0400
@@ -61,6 +61,7 @@
 #include "nodeinfo.h"
 #include "stats_linux.h"
 #include "capabilities.h"
+#include "memory.h"
 
 static int qemudShutdown(void);
 
@@ -118,6 +119,10 @@
                                       struct qemud_network *network);
 
 static int qemudDomainGetMaxVcpus(virDomainPtr dom);
+static int qemudMonitorCommand (const struct qemud_driver *driver,
+                                const struct qemud_vm *vm,
+                                const char *cmd,
+                                char **reply);
 
 static struct qemud_driver *qemu_driver = NULL;
 
@@ -608,6 +613,106 @@
     return ret;
 }
 
+static int
+qemudDetectVcpuPIDs(virConnectPtr conn,
+                    struct qemud_driver *driver,
+                    struct qemud_vm *vm) {
+    char *qemucpus = NULL;
+    char *line;
+    int lastVcpu = -1;
+
+    /* Only KVM has seperate threads for CPUs,
+       others just use main QEMU process for CPU */
+    if (vm->def->virtType != QEMUD_VIRT_KVM)
+        vm->nvcpupids = 1;
+    else
+        vm->nvcpupids = vm->def->vcpus;
+
+    if (VIR_ALLOC_N(vm->vcpupids, vm->nvcpupids) < 0) {
+        qemudReportError(conn, NULL, NULL, VIR_ERR_NO_MEMORY,
+                         "%s", _("allocate cpumap"));
+        return -1;
+    }
+
+    if (vm->def->virtType != QEMUD_VIRT_KVM) {
+        vm->vcpupids[0] = vm->pid;
+        return 0;
+    }
+
+    if (qemudMonitorCommand(driver, vm, "info cpus", &qemucpus) < 0) {
+        qemudReportError(conn, NULL, NULL, VIR_ERR_INTERNAL_ERROR,
+                         "%s", _("cannot run monitor command to fetch CPU thread info"));
+        VIR_FREE(vm->vcpupids);
+        vm->nvcpupids = 0;
+        return -1;
+    }
+
+    /*
+     * This is the gross format we're about to parse :-{
+     *
+     * (qemu) info cpus
+     * * CPU #0: pc=0x00000000000f0c4a thread_id=30019
+     *   CPU #1: pc=0x00000000fffffff0 thread_id=30020
+     *   CPU #2: pc=0x00000000fffffff0 thread_id=30021
+     *
+     */
+    line = qemucpus;
+    do {
+        char *offset = strchr(line, '#');
+        char *end = NULL;
+        int vcpu = 0, tid = 0;
+
+        /* See if we're all done */
+        if (offset == NULL)
+            break;
+
+        /* Extract VCPU number */
+        if (virStrToLong_i(offset + 1, &end, 10, &vcpu) < 0)
+            goto error;
+        if (end == NULL || *end != ':')
+            goto error;
+
+        /* Extract host Thread ID */
+        if ((offset = strstr(line, "thread_id=")) == NULL)
+            goto error;
+        if (virStrToLong_i(offset + strlen("thread_id="), &end, 10, &tid) < 0)
+            goto error;
+        if (end == NULL || !c_isspace(*end))
+            goto error;
+
+        /* Validate the VCPU is in expected range & order */
+        if (vcpu > vm->nvcpupids ||
+            vcpu != (lastVcpu + 1))
+            goto error;
+
+        lastVcpu = vcpu;
+        vm->vcpupids[vcpu] = tid;
+
+        /* Skip to next data line */
+        line = strchr(offset, '\r');
+        if (line == NULL)
+            line = strchr(offset, '\n');
+    } while (line != NULL);
+
+    /* Validate we got data for all VCPUs we expected */
+    if (lastVcpu != (vm->def->vcpus - 1))
+        goto error;
+
+    free(qemucpus);
+    return 0;
+
+error:
+    VIR_FREE(vm->vcpupids);
+    vm->vcpupids = 0;
+    free(qemucpus);
+
+    /* Explicitly return success, not error. Older KVM does
+       not have vCPU -> Thread mapping info and we don't
+       want to break its use. This merely disables ability
+       to pin vCPUS with libvirt */
+    return 0;
+}
+
 static int qemudNextFreeVNCPort(struct qemud_driver *driver ATTRIBUTE_UNUSED) {
     int i;
 
@@ -785,6 +890,11 @@
             qemudShutdownVMDaemon(conn, driver, vm);
             return -1;
         }
+
+        if (qemudDetectVcpuPIDs(conn, driver, vm) < 0) {
+            qemudShutdownVMDaemon(conn, driver, vm);
+            return -1;
+        }
     }
 
     return ret;
@@ -857,6 +967,9 @@
     vm->pid = -1;
     vm->id = -1;
     vm->state = VIR_DOMAIN_SHUTOFF;
+    free(vm->vcpupids);
+    vm->vcpupids = NULL;
+    vm->nvcpupids = 0;
 
     if (vm->newDef) {
         qemudFreeVMDef(vm->def);
@@ -2271,6 +2384,127 @@
 
     vm->def->vcpus = nvcpus;
     return 0;
+}
+
+
+static int
+qemudDomainPinVcpu(virDomainPtr dom,
+                   unsigned int vcpu,
+                   unsigned char *cpumap,
+                   int maplen) {
+    struct qemud_driver *driver = (struct qemud_driver *)dom->conn->privateData;
+    struct qemud_vm *vm = qemudFindVMByUUID(driver, dom->uuid);
+    cpu_set_t mask;
+    int i, maxcpu;
+    virNodeInfo nodeinfo;
+
+    if (!qemudIsActiveVM(vm)) {
+        qemudReportError(dom->conn, dom, NULL, VIR_ERR_INVALID_ARG,
+                         "%s",_("cannot pin vcpus on an inactive domain"));
+        return -1;
+    }
+
+    if (vcpu > (vm->nvcpupids-1)) {
+        qemudReportError(dom->conn, dom, NULL, VIR_ERR_INVALID_ARG,
+                         _("vcpu number out of range %d > %d"),
+                         vcpu, vm->nvcpupids);
+        return -1;
+    }
+
+    if (virNodeInfoPopulate(dom->conn, &nodeinfo) < 0)
+        return -1;
+
+    maxcpu = maplen * 8;
+    if (maxcpu > nodeinfo.cpus)
+        maxcpu = nodeinfo.cpus;
+
+    CPU_ZERO(&mask);
+    for (i = 0 ; i < maxcpu ; i++) {
+        if ((cpumap[i/8] >> (i % 8)) & 1)
+            CPU_SET(i, &mask);
+    }
+
+    if (vm->vcpupids != NULL) {
+        if (sched_setaffinity(vm->vcpupids[vcpu], sizeof(mask), &mask) < 0) {
+            qemudReportError(dom->conn, dom, NULL, VIR_ERR_INVALID_ARG,
+                             _("cannot set affinity: %s"), strerror(errno));
+            return -1;
+        }
+    } else {
+        qemudReportError(dom->conn, dom, NULL, VIR_ERR_NO_SUPPORT,
+                         "%s", _("cpu affinity is not supported"));
+        return -1;
+    }
+
+    return 0;
+}
+
+static int
+qemudDomainGetVcpus(virDomainPtr dom,
+                    virVcpuInfoPtr info,
+                    int maxinfo,
+                    unsigned char *cpumaps,
+                    int maplen) {
+    struct qemud_driver *driver = (struct qemud_driver *)dom->conn->privateData;
+    struct qemud_vm *vm = qemudFindVMByUUID(driver, dom->uuid);
+    virNodeInfo nodeinfo;
+    int i, v, maxcpu;
+
+    if (!qemudIsActiveVM(vm)) {
+        qemudReportError(dom->conn, dom, NULL, VIR_ERR_INVALID_ARG,
+                         "%s",_("cannot pin vcpus on an inactive domain"));
+        return -1;
+    }
+
+    if (virNodeInfoPopulate(dom->conn, &nodeinfo) < 0)
+        return -1;
+
+    maxcpu = maplen * 8;
+    if (maxcpu > nodeinfo.cpus)
+        maxcpu = nodeinfo.cpus;
+
+    /* Clamp to actual number of vcpus */
+    if (maxinfo > vm->nvcpupids)
+        maxinfo = vm->nvcpupids;
+
+    if (maxinfo < 1)
+        return 0;
+
+    if (info != NULL) {
+        memset(info, 0, sizeof(*info) * maxinfo);
+        for (i = 0 ; i < maxinfo ; i++) {
+            info[i].number = i;
+            info[i].state = VIR_VCPU_RUNNING;
+            /* XXX cpu time, current pCPU mapping */
+        }
+    }
+
+    if (cpumaps != NULL) {
+        memset(cpumaps, 0, maplen * maxinfo);
+        if (vm->vcpupids != NULL) {
+            for (v = 0 ; v < maxinfo ; v++) {
+                cpu_set_t mask;
+                unsigned char *cpumap = VIR_GET_CPUMAP(cpumaps, maplen, v);
+                CPU_ZERO(&mask);
+
+                if (sched_getaffinity(vm->vcpupids[v], sizeof(mask), &mask) < 0) {
+                    qemudReportError(dom->conn, dom, NULL, VIR_ERR_INVALID_ARG,
+                                     _("cannot get affinity: %s"), strerror(errno));
+                    return -1;
+                }
+
+                for (i = 0 ; i < maxcpu ; i++)
+                    if (CPU_ISSET(i, &mask))
+                        VIR_USE_CPU(cpumap, i);
+            }
+        } else {
+            qemudReportError(dom->conn, dom, NULL, VIR_ERR_NO_SUPPORT,
+                             "%s", _("cpu affinity is not available"));
+            return -1;
+        }
+    }
+
+    return maxinfo;
 }
 
 static int qemudDomainGetMaxVcpus(virDomainPtr dom) {
@@ -3221,8 +3455,8 @@
     qemudDomainRestore, /* domainRestore */
     NULL, /* domainCoreDump */
     qemudDomainSetVcpus, /* domainSetVcpus */
-    NULL, /* domainPinVcpu */
-    NULL, /* domainGetVcpus */
+    qemudDomainPinVcpu, /* domainPinVcpu */
+    qemudDomainGetVcpus, /* domainGetVcpus */
     qemudDomainGetMaxVcpus, /* domainGetMaxVcpus */
     qemudDomainDumpXML, /* domainDumpXML */
     qemudListDefinedDomains, /* listDomains */


-- 
|: Red Hat, Engineering, Boston   -o-   http://people.redhat.com/berrange/ :|
|: http://libvirt.org  -o-  http://virt-manager.org  -o-  http://ovirt.org :|
|: http://autobuild.org       -o-         http://search.cpan.org/~danberr/ :|
|: GnuPG: 7D3B9505  -o-  F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|




More information about the libvir-list mailing list