[libvirt] [PATCH v4 4/6] nodedev: Introduce the mdev capability to a PCI parent device

John Ferlan jferlan at redhat.com
Wed May 17 21:22:45 UTC 2017



On 05/15/2017 08:10 AM, Erik Skultety wrote:
> The parent device needs to report the generic stuff about the supported
> mediated devices types, like device API, available instances, type name,
> etc. Therefore this patch introduces a new nested capability element of
> type 'mdev_types' with the resulting XML of the following format:
> 
> <device>
>   ...
>   <capability type='pci'>
>     ...
>     <capability type='mdev_types'>
>       <type id='vendor_supplied_id'>
>         <name>optional_vendor_supplied_codename</name>
>         <deviceAPI>vfio-pci</deviceAPI>
>         <availableInstances>NUM</availableInstances>
>       </type>
>         ...
>       <type>
>         ...
>       </type>
>     </capability>
>   </capability>
>   ...
> </device>

I haven't followed all the discussions that closely - so a comment from
the peanut gallery...  Since mdev_types is a sub-capability of
type='pci', does the "vfio-pci" feel redundant? Ok just the -pci part...

> 
> Signed-off-by: Erik Skultety <eskultet at redhat.com>
> ---
>  docs/schemas/nodedev.rng                           |  26 +++++
>  src/conf/node_device_conf.c                        |  97 +++++++++++++++++
>  src/conf/node_device_conf.h                        |  15 +++
>  src/conf/virnodedeviceobj.c                        |   7 ++
>  src/libvirt_private.syms                           |   1 +
>  src/node_device/node_device_udev.c                 | 119 +++++++++++++++++++++
>  .../pci_0000_02_10_7_mdev_types.xml                |  32 ++++++
>  tests/nodedevxml2xmltest.c                         |   1 +
>  8 files changed, 298 insertions(+)
>  create mode 100644 tests/nodedevschemadata/pci_0000_02_10_7_mdev_types.xml
> 
> diff --git a/docs/schemas/nodedev.rng b/docs/schemas/nodedev.rng
> index 0f90a73c8..e0a2c5032 100644
> --- a/docs/schemas/nodedev.rng
> +++ b/docs/schemas/nodedev.rng
> @@ -205,6 +205,32 @@
>      </optional>
>  
>      <optional>
> +      <element name='capability'>
> +        <attribute name='type'>
> +          <value>mdev_types</value>
> +        </attribute>
> +        <oneOrMore>
> +          <element name='type'>
> +            <attribute name='id'>
> +              <data type='string'/>
> +            </attribute>
> +            <optional>
> +              <element name='name'><text/></element>
> +            </optional>
> +            <element name='deviceAPI'>
> +              <choice>
> +                <value>vfio-pci</value>
> +              </choice>
> +            </element>
> +            <element name='availableInstances'>
> +              <ref name='unsignedInt'/>
> +            </element>
> +          </element>
> +        </oneOrMore>
> +      </element>
> +   </optional>
> +
> +    <optional>
>        <element name='iommuGroup'>
>          <attribute name='number'>
>            <ref name='unsignedInt'/>
> diff --git a/src/conf/node_device_conf.c b/src/conf/node_device_conf.c
> index 40d71f277..f26b1ffc7 100644
> --- a/src/conf/node_device_conf.c
> +++ b/src/conf/node_device_conf.c
> @@ -89,6 +89,19 @@ virNodeDevCapsDefParseString(const char *xpath,
>  
>  
>  void
> +virNodeDevCapMdevTypeFree(virNodeDevCapMdevTypePtr type)
> +{
> +    if (!type)
> +        return;
> +
> +    VIR_FREE(type->id);
> +    VIR_FREE(type->name);
> +    VIR_FREE(type->device_api);
> +    VIR_FREE(type);
> +}
> +
> +
> +void
>  virNodeDeviceDefFree(virNodeDeviceDefPtr def)
>  {
>      virNodeDevCapsDefPtr caps;
> @@ -265,6 +278,27 @@ virNodeDeviceCapPCIDefFormat(virBufferPtr buf,
>          virBufferAsprintf(buf, "<capability type='%s'/>\n",
>                            virPCIHeaderTypeToString(data->pci_dev.hdrType));
>      }
> +    if (data->pci_dev.flags & VIR_NODE_DEV_CAP_FLAG_PCI_MDEV) {
> +        virBufferAddLit(buf, "<capability type='mdev_types'>\n");
> +        virBufferAdjustIndent(buf, 2);
> +        for (i = 0; i < data->pci_dev.nmdev_types; i++) {
> +            virNodeDevCapMdevTypePtr type = data->pci_dev.mdev_types[i];
> +            virBufferEscapeString(buf, "<type id='%s'>\n", type->id);
> +            virBufferAdjustIndent(buf, 2);
> +            if (type->name)
> +                virBufferEscapeString(buf, "<name>%s</name>\n",
> +                                      type->name);
> +            virBufferEscapeString(buf, "<deviceAPI>%s</deviceAPI>\n",
> +                                  type->device_api);
> +            virBufferAsprintf(buf,
> +                              "<availableInstances>%u</availableInstances>\n",
> +                              type->available_instances);
> +            virBufferAdjustIndent(buf, -2);
> +            virBufferAddLit(buf, "</type>\n");
> +        }
> +        virBufferAdjustIndent(buf, -2);
> +        virBufferAddLit(buf, "</capability>\n");
> +    }
>      if (data->pci_dev.nIommuGroupDevices) {
>          virBufferAsprintf(buf, "<iommuGroup number='%d'>\n",
>                            data->pci_dev.iommuGroupNumber);
> @@ -1365,6 +1399,63 @@ virNodeDevPCICapSRIOVVirtualParseXML(xmlXPathContextPtr ctxt,
>  
>  
>  static int
> +virNodeDevPCICapMdevTypesParseXML(xmlXPathContextPtr ctxt,
> +                                  virNodeDevCapPCIDevPtr pci_dev)
> +{
> +    int ret = -1;
> +    xmlNodePtr orignode = NULL;
> +    xmlNodePtr *nodes = NULL;
> +    int nmdev_types = virXPathNodeSet("./type", ctxt, &nodes);

Coverity lets me know that virXPathNodeSet allocates memory into @nodes,
but we never clean it up before leaving.

Coverity also is proud to announce that nmdev_types could be < 0 which
would really mess up the following for loop!

> +    virNodeDevCapMdevTypePtr type = NULL;
> +    size_t i;

I would just go with:

    if ((nmdev_types = virXPathNodeSet("./type", ctxt, &nodes)) < 0)
        goto cleanup;

> +
> +    orignode = ctxt->node;
> +    for (i = 0; i < nmdev_types; i++) {
> +        ctxt->node = nodes[i];
> +
> +        if (VIR_ALLOC(type) < 0)
> +            goto cleanup;
> +
> +        if (!(type->id = virXPathString("string(./@id[1])", ctxt))) {
> +            virReportError(VIR_ERR_XML_ERROR, "%s",
> +                           _("missing 'id' attribute for mediated device's "
> +                             "<type> element"));
> +            goto cleanup;
> +        }
> +
> +        if (!(type->device_api = virXPathString("string(./deviceAPI[1])", ctxt))) {
> +            virReportError(VIR_ERR_XML_ERROR,
> +                           _("missing device API for mediated device type '%s'"),
> +                           type->id);
> +            goto cleanup;
> +        }
> +
> +        if (virXPathUInt("number(./availableInstances)", ctxt,
> +                         &type->available_instances) < 0) {
> +            virReportError(VIR_ERR_XML_ERROR,
> +                           _("missing number of available instances for "
> +                             "mediated device type '%s'"),
> +                           type->id);
> +            goto cleanup;
> +        }
> +
> +        type->name = virXPathString("string(./name)", ctxt);
> +
> +        if (VIR_APPEND_ELEMENT(pci_dev->mdev_types,
> +                               pci_dev->nmdev_types, type) < 0)
> +            goto cleanup;
> +    }
> +
> +    pci_dev->flags |= VIR_NODE_DEV_CAP_FLAG_PCI_MDEV;
> +    ret = 0;
> + cleanup:

       VIR_FREE(nodes);

> +    virNodeDevCapMdevTypeFree(type);
> +    ctxt->node = orignode;
> +    return ret;
> +}
> +
> +
> +static int
>  virNodeDevPCICapabilityParseXML(xmlXPathContextPtr ctxt,
>                                  xmlNodePtr node,
>                                  virNodeDevCapPCIDevPtr pci_dev)
> @@ -1386,6 +1477,9 @@ virNodeDevPCICapabilityParseXML(xmlXPathContextPtr ctxt,
>      } else if (STREQ(type, "virt_functions") &&
>                 virNodeDevPCICapSRIOVVirtualParseXML(ctxt, pci_dev) < 0) {
>          goto cleanup;
> +    } else if (STREQ(type, "mdev_types") &&
> +        virNodeDevPCICapMdevTypesParseXML(ctxt, pci_dev) < 0) {
> +        goto cleanup;
>      } else {
>          int hdrType = virPCIHeaderTypeFromString(type);
>  
> @@ -1899,6 +1993,9 @@ virNodeDevCapsDefFree(virNodeDevCapsDefPtr caps)
>              VIR_FREE(data->pci_dev.iommuGroupDevices[i]);
>          VIR_FREE(data->pci_dev.iommuGroupDevices);
>          virPCIEDeviceInfoFree(data->pci_dev.pci_express);
> +        for (i = 0; i < data->pci_dev.nmdev_types; i++)
> +            virNodeDevCapMdevTypeFree(data->pci_dev.mdev_types[i]);
> +        VIR_FREE(data->pci_dev.mdev_types);
>          break;
>      case VIR_NODE_DEV_CAP_USB_DEV:
>          VIR_FREE(data->usb_dev.product_name);
> diff --git a/src/conf/node_device_conf.h b/src/conf/node_device_conf.h
> index 273d49f76..629f732e6 100644
> --- a/src/conf/node_device_conf.h
> +++ b/src/conf/node_device_conf.h
> @@ -95,6 +95,7 @@ typedef enum {
>      VIR_NODE_DEV_CAP_FLAG_PCI_PHYSICAL_FUNCTION     = (1 << 0),
>      VIR_NODE_DEV_CAP_FLAG_PCI_VIRTUAL_FUNCTION      = (1 << 1),
>      VIR_NODE_DEV_CAP_FLAG_PCIE                      = (1 << 2),
> +    VIR_NODE_DEV_CAP_FLAG_PCI_MDEV                  = (1 << 3),
>  } virNodeDevPCICapFlags;
>  
>  typedef enum {
> @@ -133,6 +134,15 @@ struct _virNodeDevCapSystem {
>      virNodeDevCapSystemFirmware firmware;
>  };
>  
> +typedef struct _virNodeDevCapMdevType virNodeDevCapMdevType;
> +typedef virNodeDevCapMdevType *virNodeDevCapMdevTypePtr;
> +struct _virNodeDevCapMdevType {
> +    char *id;
> +    char *name;
> +    char *device_api;
> +    unsigned int available_instances;
> +};
> +
>  typedef struct _virNodeDevCapPCIDev virNodeDevCapPCIDev;
>  typedef virNodeDevCapPCIDev *virNodeDevCapPCIDevPtr;
>  struct _virNodeDevCapPCIDev {
> @@ -156,6 +166,8 @@ struct _virNodeDevCapPCIDev {
>      int numa_node;
>      virPCIEDeviceInfoPtr pci_express;
>      int hdrType; /* enum virPCIHeaderType or -1 */
> +    virNodeDevCapMdevTypePtr *mdev_types;
> +    size_t nmdev_types;
>  };
>  
>  typedef struct _virNodeDevCapUSBDev virNodeDevCapUSBDev;
> @@ -340,6 +352,9 @@ virNodeDeviceDefFree(virNodeDeviceDefPtr def);
>  void
>  virNodeDevCapsDefFree(virNodeDevCapsDefPtr caps);
>  
> +void
> +virNodeDevCapMdevTypeFree(virNodeDevCapMdevTypePtr type);
> +
>  # define VIR_CONNECT_LIST_NODE_DEVICES_FILTERS_CAP \
>                  (VIR_CONNECT_LIST_NODE_DEVICES_CAP_SYSTEM        | \
>                   VIR_CONNECT_LIST_NODE_DEVICES_CAP_PCI_DEV       | \
> diff --git a/src/conf/virnodedeviceobj.c b/src/conf/virnodedeviceobj.c
> index 181d2efe1..fc5f3708f 100644
> --- a/src/conf/virnodedeviceobj.c
> +++ b/src/conf/virnodedeviceobj.c
> @@ -468,6 +468,13 @@ virNodeDeviceCapMatch(virNodeDeviceObjPtr devobj,
>                   VIR_NODE_DEV_CAP_FLAG_HBA_VPORT_OPS))
>                  return true;
>          }
> +
> +        if (cap->data.type == VIR_NODE_DEV_CAP_PCI_DEV) {
> +            if (type == VIR_NODE_DEV_CAP_MDEV_TYPES &&
> +                (cap->data.pci_dev.flags &
> +                 VIR_NODE_DEV_CAP_FLAG_PCI_MDEV))
> +                return true;
> +        }
>      }
>  
>      return false;

Something that I had to do with VPORTS in the past tells me you may need
to also modify virNodeDeviceObjHasCap with similar code.

This only matches for the virNodeDeviceObjListExport API...

Aha! found it, see commit id 'e8fcac8ec'


> diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
> index bbe283529..a2a30bd58 100644
> --- a/src/libvirt_private.syms
> +++ b/src/libvirt_private.syms
> @@ -668,6 +668,7 @@ virNetDevIPRouteParseXML;
>  
>  
>  # conf/node_device_conf.h
> +virNodeDevCapMdevTypeFree;
>  virNodeDevCapsDefFree;
>  virNodeDevCapTypeFromString;
>  virNodeDevCapTypeToString;
> diff --git a/src/node_device/node_device_udev.c b/src/node_device/node_device_udev.c
> index 1ddb55c80..4c75dec2b 100644
> --- a/src/node_device/node_device_udev.c
> +++ b/src/node_device/node_device_udev.c
> @@ -314,6 +314,119 @@ static int udevTranslatePCIIds(unsigned int vendor,
>  }
>  
>  
> +static int
> +udevFillMdevType(struct udev_device *device,
> +                 const char *dir,
> +                 virNodeDevCapMdevTypePtr type)
> +{
> +    int ret = -1;
> +    char *attrpath = NULL;
> +
> +#define MDEV_GET_SYSFS_ATTR(attr_name, cb, ...)                             \
> +    do {                                                                    \
> +        if (virAsprintf(&attrpath, "%s/%s", dir, #attr_name) < 0)           \
> +            goto cleanup;                                                   \
> +                                                                            \
> +        if (cb(device, attrpath, __VA_ARGS__) < 0)                          \
> +            goto cleanup;                                                   \
> +                                                                            \
> +        VIR_FREE(attrpath);                                                 \
> +    } while (0)                                                             \
> +
> +    if (VIR_STRDUP(type->id, last_component(dir)) < 0)
> +        goto cleanup;
> +
> +    /* query udev for the attributes under subdirectories using the relative
> +     * path stored in @dir, i.e. 'mdev_supported_types/<type_id>'
> +     */
> +    MDEV_GET_SYSFS_ATTR(name, udevGetStringSysfsAttr, &type->name);

Does this imply that name isn't necessarily optional as defined in RNG?
Can '@name' not exist and if it is optional how does that adjust the macro?

> +    MDEV_GET_SYSFS_ATTR(device_api, udevGetStringSysfsAttr, &type->device_api);
> +    MDEV_GET_SYSFS_ATTR(available_instances, udevGetUintSysfsAttr,
> +                        &type->available_instances, 10);
> +
> +#undef MDEV_GET_SYSFS_ATTR
> +
> +    ret = 0;
> + cleanup:
> +    VIR_FREE(attrpath);
> +    return ret;
> +}
> +


With changes to

 1. fix the Coverity issues
 2. determine whether the virNodeDeviceObjHasCap change is needed
 3. address the optional name

You have my:

Reviewed-by: John Ferlan <jferlan at redhat.com>

While I'm not a fan of 'deviceAPI'

John
> +
> +static int
> +udevPCIGetMdevTypesCap(struct udev_device *device,
> +                       virNodeDevCapPCIDevPtr pcidata)
> +{
> +    int ret = -1;
> +    int dirret = -1;
> +    DIR *dir = NULL;
> +    struct dirent *entry;
> +    char *path = NULL;
> +    char *tmppath = NULL;
> +    virNodeDevCapMdevTypePtr type = NULL;
> +    virNodeDevCapMdevTypePtr *types = NULL;
> +    size_t ntypes = 0;
> +    size_t i;
> +
> +    if (virAsprintf(&path, "%s/mdev_supported_types",
> +                    udev_device_get_syspath(device)) < 0)
> +        return -1;
> +
> +    if ((dirret = virDirOpenIfExists(&dir, path)) < 0)
> +        goto cleanup;
> +
> +    if (dirret == 0) {
> +        ret = 0;
> +        goto cleanup;
> +    }
> +
> +    if (VIR_ALLOC(types) < 0)
> +        goto cleanup;
> +
> +    /* UDEV doesn't report attributes under subdirectories by default but is
> +     * able to query them if the path to the attribute is relative to the
> +     * device's base path, e.g. /sys/devices/../0000:00:01.0/ is the device's
> +     * base path as udev reports it, but we're interested in attributes under
> +     * /sys/devices/../0000:00:01.0/mdev_supported_types/<type>/. So, we need to
> +     * scan the subdirectories ourselves.
> +     */
> +    while ((dirret = virDirRead(dir, &entry, path)) > 0) {
> +        if (VIR_ALLOC(type) < 0)
> +            goto cleanup;
> +
> +        /* construct the relative mdev type path bit for udev */
> +        if (virAsprintf(&tmppath, "mdev_supported_types/%s", entry->d_name) < 0)
> +            goto cleanup;
> +
> +        if (udevFillMdevType(device, tmppath, type) < 0)
> +            goto cleanup;
> +
> +        if (VIR_APPEND_ELEMENT(types, ntypes, type) < 0)
> +            goto cleanup;
> +
> +        VIR_FREE(tmppath);
> +    }
> +
> +    if (dirret < 0)
> +        goto cleanup;
> +
> +    VIR_STEAL_PTR(pcidata->mdev_types, types);
> +    pcidata->nmdev_types = ntypes;
> +    pcidata->flags |= VIR_NODE_DEV_CAP_FLAG_PCI_MDEV;
> +    ntypes = 0;
> +    ret = 0;
> + cleanup:
> +    virNodeDevCapMdevTypeFree(type);
> +    for (i = 0; i < ntypes; i++)
> +        virNodeDevCapMdevTypeFree(types[i]);
> +    VIR_FREE(types);
> +    VIR_FREE(path);
> +    VIR_FREE(tmppath);
> +    VIR_DIR_CLOSE(dir);
> +    return ret;
> +}
> +
> +
>  static int udevProcessPCI(struct udev_device *device,
>                            virNodeDeviceDefPtr def)
>  {
> @@ -400,6 +513,12 @@ static int udevProcessPCI(struct udev_device *device,
>          }
>      }
>  
> +    /* check whether the device is mediated devices framework capable, if so,
> +     * process it
> +     */
> +    if (udevPCIGetMdevTypesCap(device, pci_dev) < 0)
> +        goto cleanup;
> +
>      ret = 0;
>  
>   cleanup:
> diff --git a/tests/nodedevschemadata/pci_0000_02_10_7_mdev_types.xml b/tests/nodedevschemadata/pci_0000_02_10_7_mdev_types.xml
> new file mode 100644
> index 000000000..a2d57569a
> --- /dev/null
> +++ b/tests/nodedevschemadata/pci_0000_02_10_7_mdev_types.xml
> @@ -0,0 +1,32 @@
> +<device>
> +  <name>pci_0000_02_10_7</name>
> +  <parent>pci_0000_00_04_0</parent>
> +  <capability type='pci'>
> +    <domain>0</domain>
> +    <bus>2</bus>
> +    <slot>16</slot>
> +    <function>7</function>
> +    <product id='0x10ca'>82576 Virtual Function</product>
> +    <vendor id='0x8086'>Intel Corporation</vendor>
> +    <capability type='mdev_types'>
> +      <type id='foo1'>
> +        <name>bar1</name>
> +        <deviceAPI>vfio-pci</deviceAPI>
> +        <availableInstances>1</availableInstances>
> +      </type>
> +      <type id='foo2'>
> +        <name>bar2</name>
> +        <deviceAPI>vfio-pci</deviceAPI>
> +        <availableInstances>2</availableInstances>
> +      </type>
> +    </capability>
> +    <iommuGroup number='31'>
> +      <address domain='0x0000' bus='0x02' slot='0x10' function='0x7'/>
> +    </iommuGroup>
> +    <numa node='0'/>
> +    <pci-express>
> +      <link validity='cap' port='0' speed='2.5' width='4'/>
> +      <link validity='sta' width='0'/>
> +    </pci-express>
> +  </capability>
> +</device>
> diff --git a/tests/nodedevxml2xmltest.c b/tests/nodedevxml2xmltest.c
> index f023d8a13..e3a77646c 100644
> --- a/tests/nodedevxml2xmltest.c
> +++ b/tests/nodedevxml2xmltest.c
> @@ -101,6 +101,7 @@ mymain(void)
>      DO_TEST("pci_0000_02_10_7_sriov_pf_vfs_all");
>      DO_TEST("pci_0000_02_10_7_sriov_pf_vfs_all_header_type");
>      DO_TEST("drm_renderD129");
> +    DO_TEST("pci_0000_02_10_7_mdev_types");
>  
>      return ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE;
>  }
> 




More information about the libvir-list mailing list