[libvirt] [PATCHv3 3/3] qemu: Prefer VFIO for PCI device passthrough
Laine Stump
laine at laine.org
Mon Oct 7 14:19:15 UTC 2013
On 10/04/2013 08:55 AM, Peter Krempa wrote:
> Prefer using VFIO (if available) to the legacy KVM device passthrough.
>
> With this patch a PCI passthrough device without the driver configured
> will be started with VFIO if it's available on the host. If not legacy
> KVM passthrough is checked and error is reported if it's not available.
> ---
> docs/formatdomain.html.in | 9 ++++-----
> src/conf/domain_conf.h | 2 +-
> src/qemu/qemu_command.c | 3 ++-
> src/qemu/qemu_hostdev.c | 21 +++++++++++++++++++--
> src/qemu/qemu_hostdev.h | 3 ++-
> src/qemu/qemu_hotplug.c | 2 +-
> src/qemu/qemu_process.c | 15 ++++++++-------
> tests/qemuxml2argvtest.c | 11 +++++++++++
> 8 files changed, 48 insertions(+), 18 deletions(-)
>
> diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in
> index 3689399..6f3f7cf 100644
> --- a/docs/formatdomain.html.in
> +++ b/docs/formatdomain.html.in
> @@ -2755,11 +2755,10 @@
> backend, which is compatible with UEFI SecureBoot) or "kvm"
> (for the legacy device assignment handled directly by the KVM
> kernel module)<span class="since">Since 1.0.5 (QEMU and KVM
> - only, requires kernel 3.6 or newer)</span>. Currently, "kvm"
> - is the default used by libvirt when not explicitly provided,
> - but since the two are functionally equivalent, this default
> - could be changed in the future with no impact to domains that
> - don't specify anything.
> + only, requires kernel 3.6 or newer)</span>. The default, when
> + the driver name is not explicitly specified, is to check wether
> + VFIO is available and use it if it's the case. If VFIO is not
> + available, the legacy "kvm" assignment is attempted.
> </dd>
> <dt><code>readonly</code></dt>
> <dd>Indicates that the device is readonly, only supported by SCSI host
> diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
> index f20a916..6b825d8 100644
> --- a/src/conf/domain_conf.h
> +++ b/src/conf/domain_conf.h
> @@ -399,7 +399,7 @@ enum virDomainHostdevSubsysType {
>
> /* the backend driver used for PCI hostdev devices */
> typedef enum {
> - VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT, /* currently kvm, could change */
> + VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT, /* detect automaticaly, prefer VFIO */
> VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM, /* force legacy kvm style */
> VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO, /* force vfio */
>
> diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
> index ecf26cc..a4742fa 100644
> --- a/src/qemu/qemu_command.c
> +++ b/src/qemu/qemu_command.c
> @@ -5487,7 +5487,6 @@ qemuBuildPCIHostdevDevStr(virDomainDefPtr def,
>
> switch ((virDomainHostdevSubsysPciBackendType)
> dev->source.subsys.u.pci.backend) {
> - case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT:
> case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM:
> virBufferAddLit(&buf, "pci-assign");
> if (configfd && *configfd)
> @@ -5498,6 +5497,8 @@ qemuBuildPCIHostdevDevStr(virDomainDefPtr def,
> virBufferAddLit(&buf, "vfio-pci");
> break;
>
> +
extra blank line.
> + case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT:
> case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_TYPE_LAST:
> virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> _("PCI passhthrough type needs to be specified"));
> diff --git a/src/qemu/qemu_hostdev.c b/src/qemu/qemu_hostdev.c
> index dbbc2b4..ad408d8 100644
> --- a/src/qemu/qemu_hostdev.c
> +++ b/src/qemu/qemu_hostdev.c
> @@ -1366,7 +1366,8 @@ qemuHostdevHostSupportsPassthroughLegacy(void)
>
> bool
> qemuHostdevHostVerifySupport(virDomainHostdevDefPtr *hostdevs,
> - size_t nhostdevs)
> + size_t nhostdevs,
> + virQEMUCapsPtr qemuCaps)
Aha. I guess if you follow my recommendation in 2/3, you'll need to pass
qemuCaps down through the qemuPrepareHostDevices() call chain (and I
don't think that's a bad thing).
> {
> int supportsPassthroughKVM = -1;
> int supportsPassthroughVFIO = -1;
> @@ -1387,6 +1388,23 @@ qemuHostdevHostVerifySupport(virDomainHostdevDefPtr *hostdevs,
> }
>
> switch ((virDomainHostdevSubsysPciBackendType) *backend) {
> + case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT:
> + if (supportsPassthroughVFIO &&
> + virQEMUCapsGet(qemuCaps, QEMU_CAPS_DEVICE_VFIO_PCI)) {
> + *backend = VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO;
> + } else if (supportsPassthroughKVM &&
> + (virQEMUCapsGet(qemuCaps, QEMU_CAPS_PCIDEVICE) ||
> + virQEMUCapsGet(qemuCaps, QEMU_CAPS_DEVICE))) {
> + *backend = VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM;
> + } else {
> + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
> + _("host doesn't support passthrough of "
> + "host PCI devices"));
> + return false;
> + }
> +
> + break;
> +
Ah! And there is the separate case I was asking for in 2/3!
But you're changing the "backend" in the data itself - that will lead to
incorrect display when someone does a dumpxml (will it cause problems if
someone loads the vfio driver? I guess not, since this should only be
the "live" data, not the persistent data)
> case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO:
> if (!supportsPassthroughVFIO) {
> virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
> @@ -1395,7 +1413,6 @@ qemuHostdevHostVerifySupport(virDomainHostdevDefPtr *hostdevs,
> }
> break;
>
> - case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT:
> case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM:
> if (!supportsPassthroughKVM) {
> virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
> diff --git a/src/qemu/qemu_hostdev.h b/src/qemu/qemu_hostdev.h
> index 6d88830..afe67a5 100644
> --- a/src/qemu/qemu_hostdev.h
> +++ b/src/qemu/qemu_hostdev.h
> @@ -70,7 +70,8 @@ int qemuDomainHostdevNetConfigRestore(virDomainHostdevDefPtr hostdev,
> char *stateDir);
>
> bool qemuHostdevHostVerifySupport(virDomainHostdevDefPtr *hostdevs,
> - size_t nhostdevs);
> + size_t nhostdevs,
> + virQEMUCapsPtr qemuCaps);
>
>
> #endif /* __QEMU_HOSTDEV_H__ */
> diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c
> index c72fdc3..0bbbf6e 100644
> --- a/src/qemu/qemu_hotplug.c
> +++ b/src/qemu/qemu_hotplug.c
> @@ -1144,7 +1144,7 @@ int qemuDomainAttachHostPciDevice(virQEMUDriverPtr driver,
> return -1;
>
> /* verify the availability of passthrough support */
> - if (!qemuHostdevHostVerifySupport(&hostdev, 1))
> + if (!qemuHostdevHostVerifySupport(&hostdev, 1, priv->qemuCaps))
> goto error;
>
> switch ((virDomainHostdevSubsysPciBackendType) *backend) {
> diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
> index 4aa9582..64d9326 100644
> --- a/src/qemu/qemu_process.c
> +++ b/src/qemu/qemu_process.c
> @@ -3567,9 +3567,16 @@ int qemuProcessStart(virConnectPtr conn,
> goto cleanup;
> }
>
> + VIR_DEBUG("Determining emulator version");
> + virObjectUnref(priv->qemuCaps);
> + if (!(priv->qemuCaps = virQEMUCapsCacheLookupCopy(driver->qemuCapsCache,
> + vm->def->emulator)))
> + goto cleanup;
> +
> /* check and assign device assignment settings */
> if (!qemuHostdevHostVerifySupport(vm->def->hostdevs,
> - vm->def->nhostdevs))
> + vm->def->nhostdevs,
> + priv->qemuCaps))
> goto cleanup;
>
> /* network devices must be "prepared" before hostdevs, because
> @@ -3664,12 +3671,6 @@ int qemuProcessStart(virConnectPtr conn,
> }
> }
>
> - VIR_DEBUG("Determining emulator version");
> - virObjectUnref(priv->qemuCaps);
> - if (!(priv->qemuCaps = virQEMUCapsCacheLookupCopy(driver->qemuCapsCache,
> - vm->def->emulator)))
> - goto cleanup;
> -
> if (!qemuValidateCpuMax(vm->def, priv->qemuCaps))
> goto cleanup;
>
> diff --git a/tests/qemuxml2argvtest.c b/tests/qemuxml2argvtest.c
> index 38319e5..6fe8c6a 100644
> --- a/tests/qemuxml2argvtest.c
> +++ b/tests/qemuxml2argvtest.c
> @@ -98,6 +98,7 @@ static int testCompareXMLToArgvFiles(const char *xml,
> virConnectPtr conn;
> char *log = NULL;
> virCommandPtr cmd = NULL;
> + size_t i;
>
> if (!(conn = virGetConnect()))
> goto out;
> @@ -154,6 +155,16 @@ static int testCompareXMLToArgvFiles(const char *xml,
> if (qemuAssignDeviceAliases(vmdef, extraFlags) < 0)
> goto out;
>
> + for (i = 0; i < vmdef->nhostdevs; i++) {
> + virDomainHostdevDefPtr hostdev = vmdef->hostdevs[i];
> +
> + if (hostdev->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS &&
> + hostdev->source.subsys.type == VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI &&
> + hostdev->source.subsys.u.pci.backend == VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT) {
> + hostdev->source.subsys.u.pci.backend = VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM;
> + }
> + }
> +
I'm in too much of a rush to read the code and understand why you're
changing this unconditionally for the test. Can you just comment on what
you're doing to save me the time?
> if (!(cmd = qemuBuildCommandLine(conn, &driver, vmdef, &monitor_chr,
> (flags & FLAG_JSON), extraFlags,
> migrateFrom, migrateFd, NULL,
More information about the libvir-list
mailing list