[libvirt] [PATCH] expose SR IOV physical/virtual function relationships
Dave Allan
dallan at redhat.com
Thu Dec 10 21:51:12 UTC 2009
On 12/10/2009 03:16 PM, Daniel P. Berrange wrote:
> On Fri, Dec 04, 2009 at 10:30:46AM -0500, Dave Allan wrote:
>> Daniel P. Berrange wrote:
>>>
>>> I don't much like including the raw BDF format, because it is effectively
>>> adding a 3rd way of specifying PCI addresses in libvirt XML. We already
>>> have 2 different ways, which is 1 too many
>>>
>>> Node dev XML
>>>
>>> <capability type='pci'>
>>> <domain>0</domain>
>>> <bus>0</bus>
>>> <slot>31</slot>
>>> <function>1</function>
>>> </capability>
>>>
>>>
>>> Domain XML for host devices& guest addresses
>>>
>>> <address domain='0x0000' bus='0x00' slot='0x1f' function='0x5'/>
>>>
>>> Yes, my bad for screwing up the nodedev XML format when I designed
>>> it, stupidly choosing decimal instead of hex :-(
>>>
>>> I think we should make the virtual/physical function follow the domain
>>> XML style, with an<address/> element. Perhaps also use nested capabilities
>>> in the way we did for NPIV host/vport stuff
>>>
>>> <capability type='physical_function'>
>>> <address domain='0x0000' bus='0x00' slot='0x1f' function='0x1'/>
>>> </capability>
>>>
>>> <capability type='virtual_functions'>
>>> <address domain='0x0000' bus='0x00' slot='0x1f' function='0x2'/>
>>> <address domain='0x0000' bus='0x00' slot='0x1f' function='0x3'/>
>>> <address domain='0x0000' bus='0x00' slot='0x1f' function='0x4'/>
>>> </capability>
>>>
>>>
>>> I'd even be inclined to retro-fit the existing<capability type='pci'>
>>> XML to duplicate the address info in this saner format eg
>>>
>>> <capability type='pci'>
>>> <address domain='0x0000' bus='0x00' slot='0x1f' function='0x1' />
>>> <domain>0</domain>
>>> <bus>0</bus>
>>> <slot>31</slot>
>>> <function>1</function>
>>> </capability>
>>>
>>>
>>> So, a PCI card with SR-IOV would end up looking like
>>>
>>> <capability type='pci'>
>>> <domain>0</domain>
>>> <bus>0</bus>
>>> <slot>31</slot>
>>> <function>1</function>
>>> <address domain='0x0000' bus='0x00' slot='0x1f' function='0x1' />
>>>
>>> <capability type='virtual_functions'>
>>> <address domain='0x0000' bus='0x00' slot='0x1f' function='0x2'/>
>>> <address domain='0x0000' bus='0x00' slot='0x1f' function='0x3'/>
>>> <address domain='0x0000' bus='0x00' slot='0x1f' function='0x4'/>
>>> </capability>
>>> </capability>
>>>
>>> The nice thing about this kind of nesting is that it would let us show
>>> that a card is capable of exposing virtual functions, even if it does
>>> not currently have any enabled
>>>
>>>
>>> Regards,
>>> Daniel
>>
>> Here's an updated patch reflecting the above suggestions.
>>
>> Dave
>>
>
>> From d11d00d93c4bfbfd1125560ce80abfdbf88de94b Mon Sep 17 00:00:00 2001
>> From: David Allan<dallan at redhat.com>
>> Date: Mon, 30 Nov 2009 15:58:47 -0500
>> Subject: [PATCH 1/1] Add SR IOV physical and virtual function relationships
>>
>> ---
>> src/conf/node_device_conf.c | 30 +++++
>> src/conf/node_device_conf.h | 16 +++
>> src/node_device/node_device_driver.h | 14 ++
>> src/node_device/node_device_hal.c | 6 +
>> src/node_device/node_device_linux_sysfs.c | 200 ++++++++++++++++++++++++++++-
>> 5 files changed, 265 insertions(+), 1 deletions(-)
>>
>> diff --git a/src/conf/node_device_conf.c b/src/conf/node_device_conf.c
>> index 6003ab1..a8ecfb0 100644
>> --- a/src/conf/node_device_conf.c
>> +++ b/src/conf/node_device_conf.c
>> @@ -246,6 +246,7 @@ char *virNodeDeviceDefFormat(virConnectPtr conn,
>> {
>> virBuffer buf = VIR_BUFFER_INITIALIZER;
>> virNodeDevCapsDefPtr caps;
>> + unsigned int i = 0;
>> char *tmp;
>>
>> virBufferAddLit(&buf, "<device>\n");
>> @@ -318,6 +319,30 @@ char *virNodeDeviceDefFormat(virConnectPtr conn,
>> data->pci_dev.vendor_name);
>> else
>> virBufferAddLit(&buf, " />\n");
>> + if (data->pci_dev.flags& VIR_NODE_DEV_CAP_FLAG_PCI_PHYSICAL_FUNCTION) {
>> + virBufferAddLit(&buf, "<capability type='physical function'>\n");
>
> Can we change that to 'phys_function'
>
>> + virBufferVSprintf(&buf,
>> + "<address domain='0x%.4x' bus='0x%.2x' "
>> + "slot='0x%.2x' function='0x%.1x'/>\n",
>> + data->pci_dev.physical_function->domain,
>> + data->pci_dev.physical_function->bus,
>> + data->pci_dev.physical_function->slot,
>> + data->pci_dev.physical_function->function);
>> + virBufferAddLit(&buf, "</capability>\n");
>> + }
>> + if (data->pci_dev.flags& VIR_NODE_DEV_CAP_FLAG_PCI_VIRTUAL_FUNCTION) {
>> + virBufferAddLit(&buf, "<capability type='virtual functions'>\n");
>
> And 'virt_function', because its nice to not have whitespace in the values
>
>> + for (i = 0 ; i< data->pci_dev.num_virtual_functions ; i++) {
>> + virBufferVSprintf(&buf,
>> + "<address domain='0x%.4x' bus='0x%.2x' "
>> + "slot='0x%.2x' function='0x%.1x'/>\n",
>> + data->pci_dev.virtual_functions[i]->domain,
>> + data->pci_dev.virtual_functions[i]->bus,
>> + data->pci_dev.virtual_functions[i]->slot,
>> + data->pci_dev.virtual_functions[i]->function);
>> + }
>> + virBufferAddLit(&buf, "</capability>\n");
>> + }
>> break;
>> case VIR_NODE_DEV_CAP_USB_DEV:
>> virBufferVSprintf(&buf, "<bus>%d</bus>\n", data->usb_dev.bus);
>> @@ -1387,6 +1412,7 @@ out:
>>
>> void virNodeDevCapsDefFree(virNodeDevCapsDefPtr caps)
>> {
>> + int i = 0;
>> union _virNodeDevCapData *data =&caps->data;
>>
>> switch (caps->type) {
>> @@ -1402,6 +1428,10 @@ void virNodeDevCapsDefFree(virNodeDevCapsDefPtr caps)
>> case VIR_NODE_DEV_CAP_PCI_DEV:
>> VIR_FREE(data->pci_dev.product_name);
>> VIR_FREE(data->pci_dev.vendor_name);
>> + VIR_FREE(data->pci_dev.physical_function);
>> + for (i = 0 ; i< data->pci_dev.num_virtual_functions ; i++) {
>> + VIR_FREE(data->pci_dev.virtual_functions[i]);
>> + }
>> break;
>> case VIR_NODE_DEV_CAP_USB_DEV:
>> VIR_FREE(data->usb_dev.product_name);
>> diff --git a/src/conf/node_device_conf.h b/src/conf/node_device_conf.h
>> index 7a20bd6..4bfac90 100644
>> --- a/src/conf/node_device_conf.h
>> +++ b/src/conf/node_device_conf.h
>> @@ -76,6 +76,18 @@ enum virNodeDevScsiHostCapFlags {
>> VIR_NODE_DEV_CAP_FLAG_HBA_VPORT_OPS = (1<< 1),
>> };
>>
>> +enum virNodeDevPCICapFlags {
>> + VIR_NODE_DEV_CAP_FLAG_PCI_PHYSICAL_FUNCTION = (1<< 0),
>> + VIR_NODE_DEV_CAP_FLAG_PCI_VIRTUAL_FUNCTION = (1<< 1),
>> +};
>> +
>> +struct pci_config_address {
>> + unsigned domain;
>> + unsigned bus;
>> + unsigned slot;
>> + unsigned function;
>> +};
>> +
>> typedef struct _virNodeDevCapsDef virNodeDevCapsDef;
>> typedef virNodeDevCapsDef *virNodeDevCapsDefPtr;
>> struct _virNodeDevCapsDef {
>> @@ -105,6 +117,10 @@ struct _virNodeDevCapsDef {
>> unsigned class;
>> char *product_name;
>> char *vendor_name;
>> + struct pci_config_address *physical_function;
>> + struct pci_config_address **virtual_functions;
>> + unsigned num_virtual_functions;
>> + unsigned flags;
>> } pci_dev;
>> struct {
>> unsigned bus;
>> diff --git a/src/node_device/node_device_driver.h b/src/node_device/node_device_driver.h
>> index 4f0822c..7b68f41 100644
>> --- a/src/node_device/node_device_driver.h
>> +++ b/src/node_device/node_device_driver.h
>> @@ -37,6 +37,10 @@
>> #define LINUX_SYSFS_VPORT_CREATE_POSTFIX "/vport_create"
>> #define LINUX_SYSFS_VPORT_DELETE_POSTFIX "/vport_delete"
>>
>> +#define SRIOV_FOUND 0
>> +#define SRIOV_NOT_FOUND 1
>> +#define SRIOV_ERROR -1
>> +
>> #define LINUX_NEW_DEVICE_WAIT_TIME 60
>>
>> #ifdef HAVE_HAL
>> @@ -61,6 +65,14 @@ int check_fc_host_linux(union _virNodeDevCapData *d);
>> #define check_vport_capable(d) check_vport_capable_linux(d)
>> int check_vport_capable_linux(union _virNodeDevCapData *d);
>>
>> +#define get_physical_function(s,d) get_physical_function_linux(s,d)
>> +int get_physical_function_linux(const char *sysfs_path,
>> + union _virNodeDevCapData *d);
>> +
>> +#define get_virtual_functions(s,d) get_virtual_functions_linux(s,d)
>> +int get_virtual_functions_linux(const char *sysfs_path,
>> + union _virNodeDevCapData *d);
>> +
>> #define read_wwn(host, file, wwn) read_wwn_linux(host, file, wwn)
>> int read_wwn_linux(int host, const char *file, char **wwn);
>>
>> @@ -68,6 +80,8 @@ int read_wwn_linux(int host, const char *file, char **wwn);
>>
>> #define check_fc_host(d)
>> #define check_vport_capable(d)
>> +#define get_physical_function(sysfs_path, d)
>> +#define get_virtual_functions(sysfs_path, d)
>> #define read_wwn(host, file, wwn)
>>
>> #endif /* __linux__ */
>> diff --git a/src/node_device/node_device_hal.c b/src/node_device/node_device_hal.c
>> index 6de7e3d..4496301 100644
>> --- a/src/node_device/node_device_hal.c
>> +++ b/src/node_device/node_device_hal.c
>> @@ -145,14 +145,20 @@ static int gather_pci_cap(LibHalContext *ctx, const char *udi,
>> (void)virStrToLong_ui(p+1,&p, 16,&d->pci_dev.slot);
>> (void)virStrToLong_ui(p+1,&p, 16,&d->pci_dev.function);
>> }
>> +
>> + get_physical_function(sysfs_path, d);
>> + get_virtual_functions(sysfs_path, d);
>> +
>> VIR_FREE(sysfs_path);
>> }
>> +
>> (void)get_int_prop(ctx, udi, "pci.vendor_id", (int *)&d->pci_dev.vendor);
>> if (get_str_prop(ctx, udi, "pci.vendor",&d->pci_dev.vendor_name) != 0)
>> (void)get_str_prop(ctx, udi, "info.vendor",&d->pci_dev.vendor_name);
>> (void)get_int_prop(ctx, udi, "pci.product_id", (int *)&d->pci_dev.product);
>> if (get_str_prop(ctx, udi, "pci.product",&d->pci_dev.product_name) != 0)
>> (void)get_str_prop(ctx, udi, "info.product",&d->pci_dev.product_name);
>> +
>> return 0;
>> }
>>
>> diff --git a/src/node_device/node_device_linux_sysfs.c b/src/node_device/node_device_linux_sysfs.c
>> index b7cf782..d2b10dd 100644
>> --- a/src/node_device/node_device_linux_sysfs.c
>> +++ b/src/node_device/node_device_linux_sysfs.c
>> @@ -29,6 +29,7 @@
>> #include "virterror_internal.h"
>> #include "memory.h"
>> #include "logging.h"
>> +#include<dirent.h>
>>
>> #define VIR_FROM_THIS VIR_FROM_NODEDEV
>>
>> @@ -70,7 +71,7 @@ int read_wwn_linux(int host, const char *file, char **wwn)
>> char buf[64];
>>
>> if (open_wwn_file(LINUX_SYSFS_FC_HOST_PREFIX, host, file,&fd)< 0) {
>> - goto out;
>> + goto out;
>> }
>>
>> memset(buf, 0, sizeof(buf));
>> @@ -184,4 +185,201 @@ out:
>> return retval;
>> }
>>
>> +
>> +static int logStrToLong_ui(char const *s,
>> + char **end_ptr,
>> + int base,
>> + unsigned int *result)
>> +{
>> + int ret = 0;
>> +
>> + ret = virStrToLong_ui(s, end_ptr, base, result);
>> + if (ret != 0) {
>> + VIR_ERROR("Failed to convert '%s' to unsigned int\n", s);
>> + } else {
>> + VIR_DEBUG("Converted '%s' to unsigned int %u\n", s, *result);
>> + }
>
> There's no need for '\n' in log messages.
>
>> +
>> + return ret;
>> +}
>> +
>> +
>> +static int parse_pci_config_address(char *address, struct pci_config_address *bdf)
>> +{
>> + char *p = NULL;
>> + int ret = -1;
>> +
>> + if ((address == NULL) || (logStrToLong_ui(address,
>> +&p,
>> + 16,
>> +&bdf->domain) == -1)) {
>> + goto out;
>> + }
>> +
>> + if ((p == NULL) || (logStrToLong_ui(p+1,
>> +&p,
>> + 16,
>> +&bdf->bus) == -1)) {
>> + goto out;
>> + }
>> +
>> + if ((p == NULL) || (logStrToLong_ui(p+1,
>> +&p,
>> + 16,
>> +&bdf->slot) == -1)) {
>> + goto out;
>> + }
>> +
>> + if ((p == NULL) || (logStrToLong_ui(p+1,
>> +&p,
>> + 16,
>> +&bdf->function) == -1)) {
>> + goto out;
>> + }
>> +
>> + ret = 0;
>> +
>> +out:
>> + return ret;
>> +}
>> +
>> +
>> +
>> +
>> +static int get_sriov_function(const char *device_link,
>> + struct pci_config_address **bdf)
>> +{
>> + char *device_path = NULL, *config_address = NULL;
>> + char errbuf[64];
>> + int ret = SRIOV_ERROR;
>> +
>> + VIR_DEBUG("Attempting to resolve device path from device link '%s'\n",
>> + device_link);
>> +
>> + if (!virFileExists(device_link)) {
>> +
>> + VIR_DEBUG("SR IOV function link '%s' does not exist\n", device_link);
>> + /* Not an SR IOV device, not an error, either. */
>> + ret = SRIOV_NOT_FOUND;
>> +
>> + goto out;
>> +
>> + }
>> +
>> + device_path = realpath(device_link, device_path);
>> + if (device_path == NULL) {
>> + memset(errbuf, '\0', sizeof(errbuf));
>> + VIR_ERROR("Failed to resolve device link '%s': '%s'\n", device_link,
>> + strerror_r(errno, errbuf, sizeof(errbuf)));
>
> Can you replace that with virStrerror() because strerror_r() has an
> annoying API difference on Linux compared to POSIX (returns char *
> instead of int, or vica-verca).
>
>> + goto out;
>> + }
>> +
>> + VIR_DEBUG("SR IOV device path is '%s'\n", device_path);
>> + config_address = basename(device_path);
>> + if (VIR_ALLOC(*bdf) != 0) {
>> + VIR_ERROR0("Failed to allocate memory for PCI device name\n");
>> + goto out;
>> + }
>> +
>> + if (parse_pci_config_address(config_address, *bdf) != 0) {
>> + VIR_ERROR("Failed to parse PCI config address '%s'\n", config_address);
>> + goto out;
>> + }
>> +
>> + VIR_DEBUG("SR IOV function %.4x:%.2x:%.2x.%.1x/>\n",
>> + (*bdf)->domain,
>> + (*bdf)->bus,
>> + (*bdf)->slot,
>> + (*bdf)->function);
>> +
>> + ret = SRIOV_FOUND;
>> +
>> +out:
>> + VIR_FREE(device_path);
>> + return ret;
>> +}
>> +
>> +
>> +int get_physical_function_linux(const char *sysfs_path,
>> + union _virNodeDevCapData *d ATTRIBUTE_UNUSED)
>> +{
>> + int ret = -1;
>> + char *device_link = NULL;
>> +
>> + VIR_DEBUG("Attempting to get SR IOV physical function for device "
>> + "with sysfs path '%s'\n", sysfs_path);
>> +
>> + if (virBuildPath(&device_link, sysfs_path, "physfn") == -1) {
>> + virReportOOMError(NULL);
>> + } else {
>> + ret = get_sriov_function(device_link,&d->pci_dev.physical_function);
>> + if (ret == SRIOV_FOUND) {
>> + d->pci_dev.flags |= VIR_NODE_DEV_CAP_FLAG_PCI_PHYSICAL_FUNCTION;
>> + }
>> + }
>> +
>> + VIR_FREE(device_link);
>> + return ret;
>> +}
>> +
>> +
>> +int get_virtual_functions_linux(const char *sysfs_path,
>> + union _virNodeDevCapData *d)
>> +{
>> + int ret = -1;
>> + DIR *dir = NULL;
>> + struct dirent *entry = NULL;
>> + char *device_link = NULL;
>> +
>> + VIR_DEBUG("Attempting to get SR IOV virtual functions for device"
>> + "with sysfs path '%s'\n", sysfs_path);
>> +
>> + dir = opendir(sysfs_path);
>> + if (dir == NULL) {
>> + goto out;
>> + }
>> +
>> + while ((entry = readdir(dir))) {
>> + if (STRPREFIX(entry->d_name, "virtfn")) {
>> + /* This local is just to avoid lines of code much> 80 col. */
>> + unsigned int *num_funcs =&d->pci_dev.num_virtual_functions;
>> +
>> + if (virBuildPath(&device_link, sysfs_path, entry->d_name) == -1) {
>> + virReportOOMError(NULL);
>> + goto out;
>> + }
>> +
>> + VIR_DEBUG("Number of virtual functions: %d\n", *num_funcs);
>> + if (VIR_REALLOC_N(d->pci_dev.virtual_functions, (*num_funcs) + 1) != 0) {
>> + virReportOOMError(NULL);
>> + goto out;
>> + }
>> +
>> + if (get_sriov_function(device_link,
>> +&d->pci_dev.virtual_functions[*num_funcs]) !=
>> + SRIOV_FOUND) {
>> +
>> + /* We should not get back SRIOV_NOT_FOUND in this
>> + * case, so if we do, it's an error. */
>> + VIR_ERROR("Failed to get SR IOV function from device link '%s'\n",
>> + device_link);
>> + goto out;
>> + } else {
>> + (*num_funcs)++;
>> + d->pci_dev.flags |= VIR_NODE_DEV_CAP_FLAG_PCI_VIRTUAL_FUNCTION;
>> + }
>> +
>> + VIR_FREE(device_link);
>> + }
>> + }
>> +
>> + closedir(dir);
>> +
>> + ret = 0;
>> +
>> +out:
>> + VIR_FREE(device_link);
>> + return 0;
>> +}
>> +
>> #endif /* __linux__ */
>> --
>
> ACK aside from the minor logging fixes& attribute whitespace
>
> Daniel
Ok, thanks. Final version attached.
Dave
-------------- next part --------------
An embedded and charset-unspecified text was scrubbed...
Name: 0001-Add-SR-IOV-physical-and-virtual-function-relationshi.patch
URL: <http://listman.redhat.com/archives/libvir-list/attachments/20091210/432ed572/attachment-0001.ksh>
More information about the libvir-list
mailing list