[libvirt] [PATCHv3 3/3] qemu: Prefer VFIO for PCI device passthrough

Laine Stump laine at laine.org
Mon Oct 7 14:19:15 UTC 2013


On 10/04/2013 08:55 AM, Peter Krempa wrote:
> Prefer using VFIO (if available) to the legacy KVM device passthrough.
>
> With this patch a PCI passthrough device without the driver configured
> will be started with VFIO if it's available on the host. If not legacy
> KVM passthrough is checked and error is reported if it's not available.
> ---
>  docs/formatdomain.html.in |  9 ++++-----
>  src/conf/domain_conf.h    |  2 +-
>  src/qemu/qemu_command.c   |  3 ++-
>  src/qemu/qemu_hostdev.c   | 21 +++++++++++++++++++--
>  src/qemu/qemu_hostdev.h   |  3 ++-
>  src/qemu/qemu_hotplug.c   |  2 +-
>  src/qemu/qemu_process.c   | 15 ++++++++-------
>  tests/qemuxml2argvtest.c  | 11 +++++++++++
>  8 files changed, 48 insertions(+), 18 deletions(-)
>
> diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in
> index 3689399..6f3f7cf 100644
> --- a/docs/formatdomain.html.in
> +++ b/docs/formatdomain.html.in
> @@ -2755,11 +2755,10 @@
>          backend, which is compatible with UEFI SecureBoot) or "kvm"
>          (for the legacy device assignment handled directly by the KVM
>          kernel module)<span class="since">Since 1.0.5 (QEMU and KVM
> -        only, requires kernel 3.6 or newer)</span>. Currently, "kvm"
> -        is the default used by libvirt when not explicitly provided,
> -        but since the two are functionally equivalent, this default
> -        could be changed in the future with no impact to domains that
> -        don't specify anything.
> +        only, requires kernel 3.6 or newer)</span>. The default, when
> +        the driver name is not explicitly specified, is to check wether
> +        VFIO is available and use it if it's the case. If VFIO is not
> +        available, the legacy "kvm" assignment is attempted.
>        </dd>
>        <dt><code>readonly</code></dt>
>        <dd>Indicates that the device is readonly, only supported by SCSI host
> diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
> index f20a916..6b825d8 100644
> --- a/src/conf/domain_conf.h
> +++ b/src/conf/domain_conf.h
> @@ -399,7 +399,7 @@ enum virDomainHostdevSubsysType {
>
>  /* the backend driver used for PCI hostdev devices */
>  typedef enum {
> -    VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT, /* currently kvm, could change */
> +    VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT, /* detect automaticaly, prefer VFIO */
>      VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM,    /* force legacy kvm style */
>      VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO,   /* force vfio */
>
> diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
> index ecf26cc..a4742fa 100644
> --- a/src/qemu/qemu_command.c
> +++ b/src/qemu/qemu_command.c
> @@ -5487,7 +5487,6 @@ qemuBuildPCIHostdevDevStr(virDomainDefPtr def,
>
>      switch ((virDomainHostdevSubsysPciBackendType)
>              dev->source.subsys.u.pci.backend) {
> -    case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT:
>      case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM:
>          virBufferAddLit(&buf, "pci-assign");
>          if (configfd && *configfd)
> @@ -5498,6 +5497,8 @@ qemuBuildPCIHostdevDevStr(virDomainDefPtr def,
>          virBufferAddLit(&buf, "vfio-pci");
>          break;
>
> +

extra blank line.

> +    case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT:
>      case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_TYPE_LAST:
>          virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
>                         _("PCI passhthrough type needs to be specified"));
> diff --git a/src/qemu/qemu_hostdev.c b/src/qemu/qemu_hostdev.c
> index dbbc2b4..ad408d8 100644
> --- a/src/qemu/qemu_hostdev.c
> +++ b/src/qemu/qemu_hostdev.c
> @@ -1366,7 +1366,8 @@ qemuHostdevHostSupportsPassthroughLegacy(void)
>
>  bool
>  qemuHostdevHostVerifySupport(virDomainHostdevDefPtr *hostdevs,
> -                             size_t nhostdevs)
> +                             size_t nhostdevs,
> +                             virQEMUCapsPtr qemuCaps)

Aha. I guess if you follow my recommendation in 2/3, you'll need to pass
qemuCaps down through the qemuPrepareHostDevices() call chain (and I
don't think that's a bad thing).

>  {
>      int supportsPassthroughKVM = -1;
>      int supportsPassthroughVFIO = -1;
> @@ -1387,6 +1388,23 @@ qemuHostdevHostVerifySupport(virDomainHostdevDefPtr *hostdevs,
>              }
>
>              switch ((virDomainHostdevSubsysPciBackendType) *backend) {
> +            case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT:
> +                if (supportsPassthroughVFIO &&
> +                    virQEMUCapsGet(qemuCaps, QEMU_CAPS_DEVICE_VFIO_PCI)) {
> +                    *backend = VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO;
> +                } else if (supportsPassthroughKVM &&
> +                           (virQEMUCapsGet(qemuCaps, QEMU_CAPS_PCIDEVICE) ||
> +                            virQEMUCapsGet(qemuCaps, QEMU_CAPS_DEVICE))) {
> +                    *backend = VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM;
> +                } else {
> +                    virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
> +                                   _("host doesn't support passthrough of "
> +                                     "host PCI devices"));
> +                    return false;
> +                }
> +
> +                break;
> +

Ah! And there is the separate case I was asking for in 2/3!

But you're changing the "backend" in the data itself - that will lead to
incorrect display when someone does a dumpxml (will it cause problems if
someone loads the vfio driver? I guess not, since this should only be
the "live" data, not the persistent data)


>              case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO:
>                  if (!supportsPassthroughVFIO) {
>                      virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
> @@ -1395,7 +1413,6 @@ qemuHostdevHostVerifySupport(virDomainHostdevDefPtr *hostdevs,
>                  }
>                  break;
>
> -            case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT:
>              case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM:
>                  if (!supportsPassthroughKVM) {
>                      virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
> diff --git a/src/qemu/qemu_hostdev.h b/src/qemu/qemu_hostdev.h
> index 6d88830..afe67a5 100644
> --- a/src/qemu/qemu_hostdev.h
> +++ b/src/qemu/qemu_hostdev.h
> @@ -70,7 +70,8 @@ int qemuDomainHostdevNetConfigRestore(virDomainHostdevDefPtr hostdev,
>                                        char *stateDir);
>
>  bool qemuHostdevHostVerifySupport(virDomainHostdevDefPtr *hostdevs,
> -                                  size_t nhostdevs);
> +                                  size_t nhostdevs,
> +                                  virQEMUCapsPtr qemuCaps);
>
>
>  #endif /* __QEMU_HOSTDEV_H__ */
> diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c
> index c72fdc3..0bbbf6e 100644
> --- a/src/qemu/qemu_hotplug.c
> +++ b/src/qemu/qemu_hotplug.c
> @@ -1144,7 +1144,7 @@ int qemuDomainAttachHostPciDevice(virQEMUDriverPtr driver,
>          return -1;
>
>      /* verify the availability of passthrough support */
> -    if (!qemuHostdevHostVerifySupport(&hostdev, 1))
> +    if (!qemuHostdevHostVerifySupport(&hostdev, 1, priv->qemuCaps))
>          goto error;
>
>      switch ((virDomainHostdevSubsysPciBackendType) *backend) {
> diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
> index 4aa9582..64d9326 100644
> --- a/src/qemu/qemu_process.c
> +++ b/src/qemu/qemu_process.c
> @@ -3567,9 +3567,16 @@ int qemuProcessStart(virConnectPtr conn,
>              goto cleanup;
>      }
>
> +    VIR_DEBUG("Determining emulator version");
> +    virObjectUnref(priv->qemuCaps);
> +    if (!(priv->qemuCaps = virQEMUCapsCacheLookupCopy(driver->qemuCapsCache,
> +                                                      vm->def->emulator)))
> +        goto cleanup;
> +
>      /* check and assign device assignment settings */
>      if (!qemuHostdevHostVerifySupport(vm->def->hostdevs,
> -                                      vm->def->nhostdevs))
> +                                      vm->def->nhostdevs,
> +                                      priv->qemuCaps))
>          goto cleanup;
>
>      /* network devices must be "prepared" before hostdevs, because
> @@ -3664,12 +3671,6 @@ int qemuProcessStart(virConnectPtr conn,
>          }
>      }
>
> -    VIR_DEBUG("Determining emulator version");
> -    virObjectUnref(priv->qemuCaps);
> -    if (!(priv->qemuCaps = virQEMUCapsCacheLookupCopy(driver->qemuCapsCache,
> -                                                      vm->def->emulator)))
> -        goto cleanup;
> -
>      if (!qemuValidateCpuMax(vm->def, priv->qemuCaps))
>          goto cleanup;
>
> diff --git a/tests/qemuxml2argvtest.c b/tests/qemuxml2argvtest.c
> index 38319e5..6fe8c6a 100644
> --- a/tests/qemuxml2argvtest.c
> +++ b/tests/qemuxml2argvtest.c
> @@ -98,6 +98,7 @@ static int testCompareXMLToArgvFiles(const char *xml,
>      virConnectPtr conn;
>      char *log = NULL;
>      virCommandPtr cmd = NULL;
> +    size_t i;
>
>      if (!(conn = virGetConnect()))
>          goto out;
> @@ -154,6 +155,16 @@ static int testCompareXMLToArgvFiles(const char *xml,
>      if (qemuAssignDeviceAliases(vmdef, extraFlags) < 0)
>          goto out;
>
> +    for (i = 0; i < vmdef->nhostdevs; i++) {
> +        virDomainHostdevDefPtr hostdev = vmdef->hostdevs[i];
> +
> +        if (hostdev->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS &&
> +            hostdev->source.subsys.type == VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI &&
> +            hostdev->source.subsys.u.pci.backend == VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT) {
> +            hostdev->source.subsys.u.pci.backend = VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM;
> +        }
> +    }
> +


I'm in too much of a rush to read the code and understand why you're
changing this unconditionally for the test. Can you just comment on what
you're doing to save me the time?


>      if (!(cmd = qemuBuildCommandLine(conn, &driver, vmdef, &monitor_chr,
>                                       (flags & FLAG_JSON), extraFlags,
>                                       migrateFrom, migrateFd, NULL,




More information about the libvir-list mailing list