[libvirt] [PATCH v1 1/1] qemu_process.c: add CAP_IPC_LOCK when using libcap-ng

Daniel Henrique Barboza danielhb413 at gmail.com
Wed Feb 6 15:44:53 UTC 2019


QEMU virtual machines with PCI passthrough of certain devices,
such as the NVIDIA Tesla V100 GPU, requires allocation of an
amount of memory pages that can break KVM limits. When that
happens, the KVM module checks if the process is IPC_LOCK capable
and, in case it doesn't, it refuses to allocate the mem pages,
causing the guest to misbehave.

When Libvirt is compiled to use libcap-ng support, the resulting
QEMU guest Libvirt creates does not have CAP_IPC_LOCK, hurting
guests that are doing PCI passthrough and that requires extra
memory to work.

This patch addresses this issue by checking whether we're
running with libcap-ng support (WITH_CAPNG) and if the
guest is using PCI passthrough with the VFIO driver. If those
conditions are met, allow CAP_IPC_LOCK capability to the
QEMU process.

Signed-off-by: Daniel Henrique Barboza <danielhb413 at gmail.com>
---
 src/libvirt_private.syms |  1 +
 src/qemu/qemu_process.c  | 34 ++++++++++++++++++++++++++++++++++
 src/util/virhostdev.c    |  2 +-
 src/util/virhostdev.h    |  4 ++++
 4 files changed, 40 insertions(+), 1 deletion(-)

diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index 6b401aa5e0..b7384a67a8 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -1992,6 +1992,7 @@ virHostCPUStatsAssign;
 
 # util/virhostdev.h
 virHostdevFindUSBDevice;
+virHostdevGetPCIHostDeviceList;
 virHostdevIsMdevDevice;
 virHostdevIsSCSIDevice;
 virHostdevManagerGetDefault;
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index 0583eb03f2..24aef5904f 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -4997,6 +4997,38 @@ qemuProcessSetupRawIO(virQEMUDriverPtr driver,
 }
 
 
+static void
+qemuProcessSetupVFIOCaps(virDomainObjPtr vm ATTRIBUTE_UNUSED,
+                         virCommandPtr cmd ATTRIBUTE_UNUSED)
+{
+#if WITH_CAPNG
+    virPCIDeviceListPtr pcidevs = NULL;
+    bool has_vfio = false;
+    size_t i;
+
+    pcidevs = virHostdevGetPCIHostDeviceList(vm->def->hostdevs,
+		                             vm->def->nhostdevs);
+    if (!pcidevs)
+        return;
+
+    for (i = 0; i < virPCIDeviceListCount(pcidevs); i++) {
+        virPCIDevicePtr pci = virPCIDeviceListGet(pcidevs, i);
+        if (virPCIDeviceGetStubDriver(pci) == VIR_PCI_STUB_DRIVER_VFIO) {
+	    has_vfio = true;
+	    break;
+	}
+    }
+
+    if (has_vfio) {
+        VIR_DEBUG("Adding CAP_IPC_LOCK to QEMU process");
+        virCommandAllowCap(cmd, CAP_IPC_LOCK);
+    }
+
+    virObjectUnref(pcidevs);
+#endif
+}
+
+
 static int
 qemuProcessSetupBalloon(virQEMUDriverPtr driver,
                         virDomainObjPtr vm,
@@ -6569,6 +6601,8 @@ qemuProcessLaunch(virConnectPtr conn,
     if (qemuProcessSetupRawIO(driver, vm, cmd) < 0)
         goto cleanup;
 
+    qemuProcessSetupVFIOCaps(vm, cmd);
+
     virCommandSetPreExecHook(cmd, qemuProcessHook, &hookData);
     virCommandSetMaxProcesses(cmd, cfg->maxProcesses);
     virCommandSetMaxFiles(cmd, cfg->maxFiles);
diff --git a/src/util/virhostdev.c b/src/util/virhostdev.c
index 6be395cdda..ee389a1729 100644
--- a/src/util/virhostdev.c
+++ b/src/util/virhostdev.c
@@ -218,7 +218,7 @@ virHostdevManagerGetDefault(void)
     return virObjectRef(manager);
 }
 
-static virPCIDeviceListPtr
+virPCIDeviceListPtr
 virHostdevGetPCIHostDeviceList(virDomainHostdevDefPtr *hostdevs, int nhostdevs)
 {
     virPCIDeviceListPtr pcidevs;
diff --git a/src/util/virhostdev.h b/src/util/virhostdev.h
index 7263f320a2..9235581242 100644
--- a/src/util/virhostdev.h
+++ b/src/util/virhostdev.h
@@ -57,6 +57,10 @@ struct _virHostdevManager {
 };
 
 virHostdevManagerPtr virHostdevManagerGetDefault(void);
+virPCIDeviceListPtr
+virHostdevGetPCIHostDeviceList(virDomainHostdevDefPtr *hostdevs,
+		               int nhostdevs)
+    ATTRIBUTE_NONNULL(1);
 int
 virHostdevPreparePCIDevices(virHostdevManagerPtr hostdev_mgr,
                             const char *drv_name,
-- 
2.20.1




More information about the libvir-list mailing list