[libvirt] [PATCH 1/2] add hostdev passthrough common library
Laine Stump
laine at laine.org
Tue May 28 17:41:49 UTC 2013
On 05/27/2013 12:52 AM, Marek Marczykowski wrote:
> On 16.05.2013 08:07, Chunyan Liu wrote:
>> Write separate module for hostdev passthrough so that it could be used by all
>> hypervisor drivers and maintain a global hostdev state.
>>
>> Signed-off-by: Chunyan Liu <cyliu at suse.com>
> Some comments inline. Besides that seems to be working (with your next patch
> for libxl) :)
>
>> ---
>> po/POTFILES.in | 1 +
>> src/Makefile.am | 1 +
>> src/libvirt.c | 5 +
>> src/libvirt_private.syms | 15 +
>> src/util/virhostdevmanager.c | 1218 ++++++++++++++++++++++++++++++++++++++++++
>> src/util/virhostdevmanager.h | 91 ++++
>> src/util/virpci.c | 17 +-
>> src/util/virpci.h | 7 +-
>> src/util/virusb.c | 19 +-
>> src/util/virusb.h | 4 +-
>> 10 files changed, 1362 insertions(+), 16 deletions(-)
>> create mode 100644 src/util/virhostdevmanager.c
>> create mode 100644 src/util/virhostdevmanager.h
>>
>> diff --git a/po/POTFILES.in b/po/POTFILES.in
>> index f3ea4da..a7c21bf 100644
>> --- a/po/POTFILES.in
>> +++ b/po/POTFILES.in
>> @@ -188,6 +188,7 @@ src/util/viruri.c
>> src/util/virusb.c
>> src/util/virutil.c
>> src/util/virxml.c
>> +src/util/virhostdevmanager.c
>> src/vbox/vbox_MSCOMGlue.c
>> src/vbox/vbox_XPCOMCGlue.c
>> src/vbox/vbox_driver.c
>> diff --git a/src/Makefile.am b/src/Makefile.am
>> index 4312c3c..a197c2b 100644
>> --- a/src/Makefile.am
>> +++ b/src/Makefile.am
>> @@ -130,6 +130,7 @@ UTIL_SOURCES = \
>> util/virutil.c util/virutil.h \
>> util/viruuid.c util/viruuid.h \
>> util/virxml.c util/virxml.h \
>> + util/virhostdevmanager.c util/virhostdevmanager.h \
>> $(NULL)
>>
>>
>> diff --git a/src/libvirt.c b/src/libvirt.c
>> index 2b3515e..d9af5a6 100644
>> --- a/src/libvirt.c
>> +++ b/src/libvirt.c
>> @@ -65,6 +65,7 @@
>> #include "virthread.h"
>> #include "virstring.h"
>> #include "virutil.h"
>> +#include "virhostdevmanager.h"
>>
>> #ifdef WITH_TEST
>> # include "test/test_driver.h"
>> @@ -827,6 +828,7 @@ int virStateInitialize(bool privileged,
>> if (virInitialize() < 0)
>> return -1;
>>
>> + virHostdevManagerInit();
>> for (i = 0 ; i < virStateDriverTabCount ; i++) {
>> if (virStateDriverTab[i]->stateInitialize) {
>> VIR_DEBUG("Running global init for %s state driver",
>> @@ -858,6 +860,9 @@ int virStateCleanup(void) {
>> virStateDriverTab[i]->stateCleanup() < 0)
>> ret = -1;
>> }
>> +
>> + virHostdevManagerCleanup();
>> +
>> return ret;
>> }
>>
>> diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
>> index cdd0b41..824de4e 100644
>> --- a/src/libvirt_private.syms
>> +++ b/src/libvirt_private.syms
>> @@ -1991,6 +1991,21 @@ virXPathULong;
>> virXPathULongHex;
>> virXPathULongLong;
>>
>> +#util/virhostdevmanager.h
>> +virHostdevManagerGetDefault;
>> +virHostdevManagerInit;
>> +virHostdevManagerCleanup;
>> +virHostdevManagerPrepareHostdevs;
>> +virHostdevManagerReAttachHostdevs;
>> +virHostdevManagerPreparePciHostdevs;
>> +virHostdevManagerPrepareUsbHostdevs;
>> +virHostdevManagerReAttachPciHostdevs;
>> +virHostdevManagerReAttachUsbHostdevs;
>> +virGetActivePciHostdevs;
>> +virGetActiveUsbHostdevs;
>> +virGetDomainActivePciHostdevs;
>> +virGetDomainActiveUsbHostdevs;
>> +virFreeHostdevNameList;
> "make check" reports wrong symbol order here.
>
>>
>> # Let emacs know we want case-insensitive sorting
>> # Local Variables:
>> diff --git a/src/util/virhostdevmanager.c b/src/util/virhostdevmanager.c
>> new file mode 100644
>> index 0000000..9034212
>> --- /dev/null
>> +++ b/src/util/virhostdevmanager.c
>> @@ -0,0 +1,1218 @@
>> +/*
>> + * Copyright (C) 2013 SUSE LINUX Products GmbH, Nuernberg, Germany.
>> + *
>> + * This library is free software; you can redistribute it and/or
>> + * modify it under the terms of the GNU Lesser General Public
>> + * License as published by the Free Software Foundation; either
>> + * version 2.1 of the License, or (at your option) any later version.
>> + *
>> + * This library is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
>> + * Lesser General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU Lesser General Public
>> + * License along with this library. If not, see
>> + * <http://www.gnu.org/licenses/>.
>> + *
>> + * Author: Chunyan Liu <cyliu at suse.com>
>> + */
>> +#include <config.h>
>> +
>> +#include "virhostdevmanager.h"
>> +
>> +#include <sys/types.h>
>> +#include <sys/stat.h>
>> +#include <unistd.h>
>> +#include <stdlib.h>
>> +#include <stdio.h>
>> +
>> +#include "viralloc.h"
>> +#include "virstring.h"
>> +#include "virfile.h"
>> +#include "virerror.h"
>> +#include "virlog.h"
>> +#include "virpci.h"
>> +#include "virusb.h"
>> +#include "virnetdev.h"
>> +
>> +/* For virReportOOMError() and virReportSystemError() */
>> +#define VIR_FROM_THIS VIR_FROM_NONE
>> +
>> +struct _virHostdevManager{
>> + char *stateDir;
>> +
>> + virPCIDeviceListPtr activePciHostdevs;
>> + virPCIDeviceListPtr inactivePciHostdevs;
>> + virUSBDeviceListPtr activeUsbHostdevs;
>> +};
>> +
>> +static virHostdevManagerPtr hostdevMgr;
>> +
>> +int virHostdevManagerInit(void)
>> +{
>> + char ebuf[1024];
>> + if (VIR_ALLOC(hostdevMgr) < 0)
>> + return -1;
>> +
>> + if ((hostdevMgr->activePciHostdevs = virPCIDeviceListNew()) == NULL)
>> + return -1;
> goto out_of_memory ?
>
>> +
>> + if ((hostdevMgr->activeUsbHostdevs = virUSBDeviceListNew()) == NULL)
>> + return -1;
> goto out_of_memory ?
>
>> +
>> + if ((hostdevMgr->inactivePciHostdevs = virPCIDeviceListNew()) == NULL)
>> + return -1;
> goto out_of_memory ?
>
>> +
>> + if (virAsprintf(&hostdevMgr->stateDir,
>> + "%s",HOSTDEV_STATE_DIR) == -1)
>> + goto out_of_memory;
>> +
>> + if (virFileMakePath(hostdevMgr->stateDir) < 0) {
>> + VIR_ERROR(_("Failed to create state dir '%s': %s"),
>> + hostdevMgr->stateDir, virStrerror(errno, ebuf, sizeof(ebuf)));
>> + goto error;
>> + }
>> +
>> + return 0;
>> +
>> +out_of_memory:
>> + virReportOOMError();
>> +error:
>> + return -1;
>> +}
>> +
>> +void virHostdevManagerCleanup(void)
>> +{
>> + if(!hostdevMgr)
>> + return;
>> +
>> + virObjectUnref(hostdevMgr->activePciHostdevs);
>> + virObjectUnref(hostdevMgr->inactivePciHostdevs);
>> + virObjectUnref(hostdevMgr->activeUsbHostdevs);
>> +
>> + VIR_FREE(hostdevMgr->stateDir);
>> +}
>> +
>> +virHostdevManagerPtr virHostdevManagerGetDefault(void)
>> +{
>> + return hostdevMgr;
>> +}
>> +
>> +static int
>> +virDomainHostdevPciSysfsPath(virDomainHostdevDefPtr hostdev, char **sysfs_path)
>> +{
>> + virPCIDeviceAddress config_address;
>> +
>> + config_address.domain = hostdev->source.subsys.u.pci.addr.domain;
>> + config_address.bus = hostdev->source.subsys.u.pci.addr.bus;
>> + config_address.slot = hostdev->source.subsys.u.pci.addr.slot;
>> + config_address.function = hostdev->source.subsys.u.pci.addr.function;
>> +
>> + return virPCIDeviceAddressGetSysfsFile(&config_address, sysfs_path);
>> +}
>> +
>> +static int
>> +virDomainHostdevIsVirtualFunction(virDomainHostdevDefPtr hostdev)
>> +{
>> + char *sysfs_path = NULL;
>> + int ret = -1;
>> +
>> + if (virDomainHostdevPciSysfsPath(hostdev, &sysfs_path) < 0)
>> + return ret;
>> +
>> + ret = virPCIIsVirtualFunction(sysfs_path);
>> +
>> + VIR_FREE(sysfs_path);
>> +
>> + return ret;
>> +}
>> +
>> +static int
>> +virDomainHostdevNetDevice(virDomainHostdevDefPtr hostdev, char **linkdev,
>> + int *vf)
>> +{
>> + int ret = -1;
>> + char *sysfs_path = NULL;
>> +
>> + if (virDomainHostdevPciSysfsPath(hostdev, &sysfs_path) < 0)
>> + return ret;
>> +
>> + if (virPCIIsVirtualFunction(sysfs_path) == 1) {
>> + if (virPCIGetVirtualFunctionInfo(sysfs_path, linkdev,
>> + vf) < 0)
>> + goto cleanup;
>> + } else {
>> + if (virPCIGetNetName(sysfs_path, linkdev) < 0)
>> + goto cleanup;
>> + *vf = -1;
>> + }
>> +
>> + ret = 0;
>> +
>> +cleanup:
>> + VIR_FREE(sysfs_path);
>> +
>> + return ret;
>> +}
>> +
>> +static int
>> +virDomainHostdevNetConfigVirtPortProfile(const char *linkdev, int vf,
>> + virNetDevVPortProfilePtr virtPort,
>> + const virMacAddrPtr macaddr,
>> + const unsigned char *uuid,
>> + int associate)
>> +{
>> + int ret = -1;
>> +
>> + if (!virtPort)
>> + return ret;
>> +
>> + switch (virtPort->virtPortType) {
>> + case VIR_NETDEV_VPORT_PROFILE_NONE:
>> + case VIR_NETDEV_VPORT_PROFILE_OPENVSWITCH:
>> + case VIR_NETDEV_VPORT_PROFILE_8021QBG:
>> + case VIR_NETDEV_VPORT_PROFILE_LAST:
>> + virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
>> + _("virtualport type %s is "
>> + "currently not supported on interfaces of type "
>> + "hostdev"),
>> + virNetDevVPortTypeToString(virtPort->virtPortType));
>> + break;
>> +
>> + case VIR_NETDEV_VPORT_PROFILE_8021QBH:
>> + if (associate)
>> + ret = virNetDevVPortProfileAssociate(NULL, virtPort, macaddr,
>> + linkdev, vf, uuid,
>> + VIR_NETDEV_VPORT_PROFILE_OP_CREATE, false);
>> + else
>> + ret = virNetDevVPortProfileDisassociate(NULL, virtPort,
>> + macaddr, linkdev, vf,
>> + VIR_NETDEV_VPORT_PROFILE_OP_DESTROY);
>> + break;
>> + }
>> +
>> + return ret;
>> +}
>> +
>> +static int
>> +virDomainHostdevNetConfigReplace(virDomainHostdevDefPtr hostdev,
>> + const unsigned char *uuid,
>> + char *stateDir)
>> +{
>> + char *linkdev = NULL;
>> + virNetDevVlanPtr vlan;
>> + virNetDevVPortProfilePtr virtPort;
>> + int ret = -1;
>> + int vf = -1;
>> + int vlanid = -1;
>> + int port_profile_associate = 1;
>> + int isvf;
>> +
>> + isvf = virDomainHostdevIsVirtualFunction(hostdev);
>> + if (isvf <= 0) {
>> + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
>> + _("Interface type hostdev is currently supported on"
>> + " SR-IOV Virtual Functions only"));
>> + return ret;
>> + }
>> +
>> + if (virDomainHostdevNetDevice(hostdev, &linkdev, &vf) < 0)
>> + return ret;
>> +
>> + vlan = virDomainNetGetActualVlan(hostdev->parent.data.net);
>> + virtPort = virDomainNetGetActualVirtPortProfile(
>> + hostdev->parent.data.net);
>> + if (virtPort) {
>> + if (vlan) {
>> + virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
>> + _("direct setting of the vlan tag is not allowed "
>> + "for hostdev devices using %s mode"),
>> + virNetDevVPortTypeToString(virtPort->virtPortType));
>> + goto cleanup;
>> + }
>> + ret = virDomainHostdevNetConfigVirtPortProfile(linkdev, vf,
>> + virtPort, &hostdev->parent.data.net->mac, uuid,
>> + port_profile_associate);
>> + } else {
>> + /* Set only mac and vlan */
>> + if (vlan) {
>> + if (vlan->nTags != 1 || vlan->trunk) {
>> + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
>> + _("vlan trunking is not supported "
>> + "by SR-IOV network devices"));
>> + goto cleanup;
>> + }
>> + if (vf == -1) {
>> + virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
>> + _("vlan can only be set for SR-IOV VFs, but "
>> + "%s is not a VF"), linkdev);
>> + goto cleanup;
>> + }
>> + vlanid = vlan->tag[0];
>> + } else if (vf >= 0) {
>> + vlanid = 0; /* assure any current vlan tag is reset */
>> + }
>> +
>> + ret = virNetDevReplaceNetConfig(linkdev, vf,
>> + &hostdev->parent.data.net->mac,
>> + vlanid, stateDir);
>> + }
>> +cleanup:
>> + VIR_FREE(linkdev);
>> + return ret;
>> +}
>> +
>> +static int
>> +virDomainHostdevNetConfigRestore(virDomainHostdevDefPtr hostdev,
>> + char *stateDir)
>> +{
>> + char *linkdev = NULL;
>> + virNetDevVPortProfilePtr virtPort;
>> + int ret = -1;
>> + int vf = -1;
>> + int port_profile_associate = 0;
>> + int isvf;
>> +
>> + isvf = virDomainHostdevIsVirtualFunction(hostdev);
>> + if (isvf <= 0) {
>> + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
>> + _("Interface type hostdev is currently supported on"
>> + " SR-IOV Virtual Functions only"));
>> + return ret;
>> + }
>> +
>> + if (virDomainHostdevNetDevice(hostdev, &linkdev, &vf) < 0)
>> + return ret;
>> +
>> + virtPort = virDomainNetGetActualVirtPortProfile(
>> + hostdev->parent.data.net);
>> + if (virtPort)
>> + ret = virDomainHostdevNetConfigVirtPortProfile(linkdev, vf, virtPort,
>> + &hostdev->parent.data.net->mac, NULL,
>> + port_profile_associate);
>> + else
>> + ret = virNetDevRestoreNetConfig(linkdev, vf, stateDir);
>> +
>> + VIR_FREE(linkdev);
>> +
>> + return ret;
>> +}
>> +
>> +static virPCIDeviceListPtr
>> +virHostdevManagerGetPciHostDeviceList(
>> + virDomainHostdevDefPtr *hostdevs,
>> + int nhostdevs)
>> +{
>> + virPCIDeviceListPtr list;
>> + int i;
>> +
>> + if (!(list = virPCIDeviceListNew()))
>> + return NULL;
>> +
>> + for (i = 0 ; i < nhostdevs ; i++) {
>> + virDomainHostdevDefPtr hostdev = hostdevs[i];
>> + virPCIDevicePtr dev;
>> +
>> + if (hostdev->mode != VIR_DOMAIN_HOSTDEV_MODE_SUBSYS)
>> + continue;
>> + if (hostdev->source.subsys.type != VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI)
>> + continue;
>> +
>> + dev = virPCIDeviceNew(hostdev->source.subsys.u.pci.addr.domain,
>> + hostdev->source.subsys.u.pci.addr.bus,
>> + hostdev->source.subsys.u.pci.addr.slot,
>> + hostdev->source.subsys.u.pci.addr.function);
>> + if (!dev) {
>> + virObjectUnref(list);
>> + return NULL;
>> + }
>> +
>> + if (virPCIDeviceListAdd(list, dev) < 0) {
>> + virPCIDeviceFree(dev);
>> + virObjectUnref(list);
>> + return NULL;
>> + }
>> +
>> + virPCIDeviceSetManaged(dev, hostdev->managed);
>> + }
>> +
>> + return list;
>> +}
>> +
>> +int
>> +virHostdevManagerPreparePciHostdevs(virHostdevManagerPtr mgr,
>> + const char *drv_name,
>> + const char *dom_name,
>> + const unsigned char *uuid,
>> + virDomainHostdevDefPtr *hostdevs,
>> + int nhostdevs,
>> + unsigned int flags)
>> +{
>> + virPCIDeviceListPtr pcidevs;
>> + int last_processed_hostdev_vf = -1;
>> + int i;
>> + int ret = -1;
>> +
>> + virObjectLock(mgr->activePciHostdevs);
>> + virObjectLock(mgr->inactivePciHostdevs);
>> +
>> + if (!(pcidevs = virHostdevManagerGetPciHostDeviceList(hostdevs, nhostdevs)))
>> + goto cleanup;
>> +
>> + /* We have to use 9 loops here. *All* devices must
>> + * be detached before we reset any of them, because
>> + * in some cases you have to reset the whole PCI,
>> + * which impacts all devices on it. Also, all devices
>> + * must be reset before being marked as active.
>> + */
>> +
>> + /* Loop 1: validate that non-managed device isn't in use, eg
>> + * by checking that device is either un-bound, or bound
>> + * to pci-stub.ko
>> + */
>> +
>> + for (i = 0; i < virPCIDeviceListCount(pcidevs); i++) {
>> + virPCIDevicePtr dev = virPCIDeviceListGet(pcidevs, i);
>> + virPCIDevicePtr other;
>> + bool strict_acs_check = (flags & VIR_STRICT_ACS_CHECK)?true:false;
>> +
>> + if (!virPCIDeviceIsAssignable(dev, strict_acs_check)) {
>> + virReportError(VIR_ERR_OPERATION_INVALID,
>> + _("PCI device %s is not assignable"),
>> + virPCIDeviceGetName(dev));
>> + goto cleanup;
>> + }
>> + /* The device is in use by other active domain if
>> + * the dev is in list activePciHostdevs.
>> + */
>> + if ((other = virPCIDeviceListFind(mgr->activePciHostdevs, dev))) {
>> + char *other_drvname = NULL;
>> + char *other_domname = NULL;
>> + virPCIDeviceGetUsedBy(other, &other_drvname, &other_domname);
>> +
>> + if (other_drvname && other_domname)
>> + virReportError(VIR_ERR_OPERATION_INVALID,
>> + _("PCI device %s is in use by driver %s,domain %s"),
>> + virPCIDeviceGetName(dev), other_drvname,
>> + other_domname);
>> + else
>> + virReportError(VIR_ERR_OPERATION_INVALID,
>> + _("PCI device %s is already in use"),
>> + virPCIDeviceGetName(dev));
>> + VIR_FREE(other_drvname);
>> + VIR_FREE(other_domname);
>> + goto cleanup;
>> + }
>> + }
>> +
>> + /* Loop 2: detach managed devices */
>> + for (i = 0; i < virPCIDeviceListCount(pcidevs); i++) {
>> + virPCIDevicePtr dev = virPCIDeviceListGet(pcidevs, i);
>> + const char *stub_driver;
>> + if (STREQ_NULLABLE(drv_name, "xenlight"))
>> + stub_driver = "pciback";
>> + else
>> + stub_driver = "pci_stub";
> Really need hardcode driver name here?
Actually, no, it's not needed, and the code this is replacing has
changed, so this all needs a rebase.
The way that it works now (with the code that's in qemu) is that
qemuGetPciHostDeviceList() (which builds a list of virPCIDevice from a
list of virDomainHostDevDef) checks for a <driver name='xxx'/> setting,
and if driver name is set to "vfio", it calls
virPCIDeviceSetStubDriver(dev, "vfio-pci"), otherwise it calls
virPCIDeviceSetStubDriver(dev, "pci-stub"). I guess your new
virHostdevManagerGetPciHostDeviceList() needs to be made aware of which
hypervisor is being used, and set the stub driver accordingly.
Everything else in the device detach/reattach should be made to either
use the stub driver name stored in the virPCIDevice, or to auto-detect
it (virPCIDeviceUnbindFromStub() shows how to do that).
> BTW "xen" driver missing here.
> Perhaps this can be passes as parameter (with default to "pci_stub" if NULL)?
> Or set by driver before calling this function (virPCIDeviceSetSubDriver?).
Right, that function is newer than the code this patch is based from.
[...]
>> +
>> +/*
>> + * Pre-condition: mgr->inactivePciHostdevs & mgr->activePciHostdevs
>> + * are locked
>> + */
>> +static void
>> +virReattachPciDevice(virHostdevManagerPtr mgr, virPCIDevicePtr dev, const char *driver)
>> +{
>> + /* If the device is not managed and was attached to guest
>> + * successfully, it must have been inactive.
>> + */
>> + if (!virPCIDeviceGetManaged(dev)) {
>> + if (virPCIDeviceListAdd(mgr->inactivePciHostdevs, dev) < 0)
>> + virPCIDeviceFree(dev);
>> + return;
>> + }
>> +
>> + if (STREQ_NULLABLE(driver, "QEMU")) {
> Same as earlier, perhaps the better approach will be checking for current stub
> driver here?
Hmm, yes. I wonder what this does for vfio. Is this needed for something
in the pci-stub driver? Or is it common to any device assignment in
qemu? (Fortunately it's harmless (although a bit wasteful of CPU time)
when the device hasn't been used by qemu/kvm.
>> + int retries = 100;
>> +
>> + while (virPCIDeviceWaitForCleanup(dev, "kvm_assigned_device")
>> + && retries) {
>> + usleep(100*1000);
>> + retries--;
>> + }
>> + }
>> +
>> + if (virPCIDeviceReattach(dev, mgr->activePciHostdevs,
>> + mgr->inactivePciHostdevs) < 0) {
>> + virErrorPtr err = virGetLastError();
>> + VIR_ERROR(_("Failed to re-attach PCI device: %s"),
>> + err ? err->message : _("unknown error"));
>> + virResetError(err);
>> + }
>> + virPCIDeviceFree(dev);
>> +}
>> +
>> +/*
>>
Generally, please check for recent changes to the functions you're
basing this on. We don't want any of that to mysteriously disappear when
we switch from the qemu-specific functions to these general functions.
More information about the libvir-list
mailing list