[libvirt] [PATCH 5/5] qemu: Prefer VFIO for PCI device passthrough

Peter Krempa pkrempa at redhat.com
Fri Sep 20 09:07:00 UTC 2013


Prefer using VFIO (if available) to the legacy KVM device passthrough.

With this patch a PCI passthrough device without the driver configured
will be started with VFIO if it's available on the host. If not legacy
KVM passthrough is checked and error is reported if it's not available.
---
 docs/formatdomain.html.in                          |  9 ++++----
 src/conf/domain_conf.h                             |  2 +-
 src/qemu/qemu_command.c                            |  2 +-
 src/qemu/qemu_hotplug.c                            | 26 +++++++++++++++++++++-
 src/qemu/qemu_process.c                            | 18 ++++++++++++++-
 .../qemuxml2argv-hostdev-pci-address-device.args   |  2 +-
 .../qemuxml2argvdata/qemuxml2argv-net-hostdev.args |  2 +-
 tests/qemuxml2argvdata/qemuxml2argv-pci-rom.args   |  4 ++--
 8 files changed, 52 insertions(+), 13 deletions(-)

diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in
index 3689399..6f3f7cf 100644
--- a/docs/formatdomain.html.in
+++ b/docs/formatdomain.html.in
@@ -2755,11 +2755,10 @@
         backend, which is compatible with UEFI SecureBoot) or "kvm"
         (for the legacy device assignment handled directly by the KVM
         kernel module)<span class="since">Since 1.0.5 (QEMU and KVM
-        only, requires kernel 3.6 or newer)</span>. Currently, "kvm"
-        is the default used by libvirt when not explicitly provided,
-        but since the two are functionally equivalent, this default
-        could be changed in the future with no impact to domains that
-        don't specify anything.
+        only, requires kernel 3.6 or newer)</span>. The default, when
+        the driver name is not explicitly specified, is to check wether
+        VFIO is available and use it if it's the case. If VFIO is not
+        available, the legacy "kvm" assignment is attempted.
       </dd>
       <dt><code>readonly</code></dt>
       <dd>Indicates that the device is readonly, only supported by SCSI host
diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
index 9414ebf..43d8746 100644
--- a/src/conf/domain_conf.h
+++ b/src/conf/domain_conf.h
@@ -399,7 +399,7 @@ enum virDomainHostdevSubsysType {

 /* the backend driver used for PCI hostdev devices */
 typedef enum {
-    VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT, /* currently kvm, could change */
+    VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT, /* detect automaticaly, prefer VFIO */
     VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM,    /* force legacy kvm style */
     VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO,   /* force vfio */

diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
index 1411533..0d7c02e 100644
--- a/src/qemu/qemu_command.c
+++ b/src/qemu/qemu_command.c
@@ -5395,13 +5395,13 @@ qemuBuildPCIHostdevDevStr(virDomainDefPtr def,

     switch ((virDomainHostdevSubsysPciBackendType)
             dev->source.subsys.u.pci.backend) {
-    case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT:
     case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM:
         virBufferAddLit(&buf, "pci-assign");
         if (configfd && *configfd)
             virBufferAsprintf(&buf, ",configfd=%s", configfd);
         break;

+    case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT:
     case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO:
         virBufferAddLit(&buf, "vfio-pci");
         break;
diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c
index 9320ab9..5a8fa44 100644
--- a/src/qemu/qemu_hotplug.c
+++ b/src/qemu/qemu_hotplug.c
@@ -1144,6 +1144,31 @@ int qemuDomainAttachHostPciDevice(virQEMUDriverPtr driver,
         return -1;

     switch ((virDomainHostdevSubsysPciBackendType) *backend) {
+    case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT:
+        if (qemuHostdevHostSupportsPassthroughVFIO() &&
+            virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE_VFIO_PCI)) {
+            *backend = VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO;
+            /* VFIO requires all of the guest's memory to be locked resident.
+             * In this case, the guest's memory may already be locked, but it
+             * doesn't hurt to "change" the limit to the same value.
+             */
+            if (vm->def->mem.hard_limit)
+                virProcessSetMaxMemLock(vm->pid, vm->def->mem.hard_limit);
+            else
+                virProcessSetMaxMemLock(vm->pid,
+                                        vm->def->mem.max_balloon + (1024 * 1024));
+        } else if (qemuHostdevHostSupportsPassthroughLegacy() &&
+                   (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_PCIDEVICE) ||
+                    virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE))) {
+            *backend = VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM;
+        } else {
+            virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+                           _("host doesn't support passthrough of "
+                             "host PCI devices"));
+        }
+
+        break;
+
     case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO:
         if (!virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE_VFIO_PCI)) {
             virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
@@ -1170,7 +1195,6 @@ int qemuDomainAttachHostPciDevice(virQEMUDriverPtr driver,

         break;

-    case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT:
     case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM:
         if (!qemuHostdevHostSupportsPassthroughLegacy()) {
             virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index e36ab99..9648aa0 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -3730,6 +3730,23 @@ int qemuProcessStart(virConnectPtr conn,
             }

             switch ((virDomainHostdevSubsysPciBackendType) *backend) {
+            case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT:
+                if (supportsPassthroughVFIO &&
+                    virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE_VFIO_PCI)) {
+                    *backend = VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO;
+                } else if (supportsPassthroughKVM &&
+                           (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_PCIDEVICE) ||
+                            virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE))) {
+                    *backend = VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM;
+                } else {
+                    virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+                                   _("host doesn't support passthrough of "
+                                     "host PCI devices"));
+                    goto cleanup;
+                }
+
+                break;
+
             case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO:
                 if (!supportsPassthroughVFIO) {
                     virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
@@ -3738,7 +3755,6 @@ int qemuProcessStart(virConnectPtr conn,
                 }
                 break;

-            case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT:
             case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM:
                 if (!supportsPassthroughKVM) {
                     virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
diff --git a/tests/qemuxml2argvdata/qemuxml2argv-hostdev-pci-address-device.args b/tests/qemuxml2argvdata/qemuxml2argv-hostdev-pci-address-device.args
index 214b246..557b733 100644
--- a/tests/qemuxml2argvdata/qemuxml2argv-hostdev-pci-address-device.args
+++ b/tests/qemuxml2argvdata/qemuxml2argv-hostdev-pci-address-device.args
@@ -2,5 +2,5 @@ LC_ALL=C PATH=/bin HOME=/home/test USER=test LOGNAME=test QEMU_AUDIO_DRV=none \
 /usr/bin/qemu -S -M \
 pc -m 214 -smp 1 -nographic -nodefconfig -nodefaults -monitor \
 unix:/tmp/test-monitor,server,nowait -no-acpi -boot c -usb -hda \
-/dev/HostVG/QEMUGuest2 -device pci-assign,host=06:12.5,id=hostdev0,\
+/dev/HostVG/QEMUGuest2 -device vfio-pci,host=06:12.5,id=hostdev0,\
 bus=pci.0,addr=0x3 -device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x4
diff --git a/tests/qemuxml2argvdata/qemuxml2argv-net-hostdev.args b/tests/qemuxml2argvdata/qemuxml2argv-net-hostdev.args
index 184811b..3a3963c 100644
--- a/tests/qemuxml2argvdata/qemuxml2argv-net-hostdev.args
+++ b/tests/qemuxml2argvdata/qemuxml2argv-net-hostdev.args
@@ -3,5 +3,5 @@ LC_ALL=C PATH=/bin HOME=/home/test USER=test LOGNAME=test QEMU_AUDIO_DRV=none \
 -M pc -m 214 -smp 1 -nographic -nodefconfig -nodefaults -monitor \
 unix:/tmp/test-monitor,server,nowait -no-acpi -boot c \
 -usb -hda /dev/HostVG/QEMUGuest1 \
--device pci-assign,host=03:07.1,id=hostdev0,bus=pci.0,addr=0x3 \
+-device vfio-pci,host=03:07.1,id=hostdev0,bus=pci.0,addr=0x3 \
 -device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x4
diff --git a/tests/qemuxml2argvdata/qemuxml2argv-pci-rom.args b/tests/qemuxml2argvdata/qemuxml2argv-pci-rom.args
index c850613..615047a 100644
--- a/tests/qemuxml2argvdata/qemuxml2argv-pci-rom.args
+++ b/tests/qemuxml2argvdata/qemuxml2argv-pci-rom.args
@@ -7,7 +7,7 @@ unix:/tmp/test-monitor,server,nowait -no-acpi -boot c -usb -hda \
 -net user,vlan=0,name=hostnet0 \
 -device virtio-net-pci,vlan=1,id=net1,mac=52:54:00:24:a5:9e,bus=pci.0,addr=0x4,\
 romfile=/etc/fake/bootrom.bin -net user,vlan=1,name=hostnet1 \
--device pci-assign,host=06:12.5,id=hostdev0,bus=pci.0,addr=0x5,rombar=0 \
--device pci-assign,host=06:12.6,id=hostdev1,bus=pci.0,addr=0x6,rombar=1,\
+-device vfio-pci,host=06:12.5,id=hostdev0,bus=pci.0,addr=0x5,rombar=0 \
+-device vfio-pci,host=06:12.6,id=hostdev1,bus=pci.0,addr=0x6,rombar=1,\
 romfile=/etc/fake/bootrom.bin \
 -device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x7
-- 
1.8.3.2




More information about the libvir-list mailing list