[libvirt] [PATCH v2 07/10] nodedev: Introduce the mdev capability to a PCI parent device

Erik Skultety eskultet at redhat.com
Thu Apr 20 13:05:57 UTC 2017


The parent device needs to report the generic stuff about the supported
mediated devices types, like device API, available instances, type name,
etc. Therefore this patch introduces a new nested capability element of
type 'mdev' with the resulting XML of the following format:

<device>
...
  <capability type='pci'>
  ...
    <capability type='mdev'>
      <type id='vendor-supplied-id'>
        <description>optional, raw, unstructured resource allocation data
        </description>
        <deviceAPI>vfio-pci</deviceAPI>
        <availableInstances>NUM</availableInstances>
      </type>
      ...
      <type>
      ...
      </type>
    </capability>
  </capability>
...
</device>

Signed-off-by: Erik Skultety <eskultet at redhat.com>
---
 docs/schemas/nodedev.rng                          |  24 ++++
 src/conf/node_device_conf.c                       | 103 +++++++++++++++++
 src/conf/node_device_conf.h                       |  17 +++
 src/libvirt_private.syms                          |   1 +
 src/node_device/node_device_udev.c                | 133 ++++++++++++++++++++++
 tests/nodedevschemadata/pci_0000_02_10_7_mdev.xml |  27 +++++
 6 files changed, 305 insertions(+)
 create mode 100644 tests/nodedevschemadata/pci_0000_02_10_7_mdev.xml

diff --git a/docs/schemas/nodedev.rng b/docs/schemas/nodedev.rng
index 0f90a73c8..4b5dca777 100644
--- a/docs/schemas/nodedev.rng
+++ b/docs/schemas/nodedev.rng
@@ -205,6 +205,30 @@
     </optional>
 
     <optional>
+      <element name='capability'>
+        <attribute name='type'>
+          <value>mdev</value>
+        </attribute>
+        <element name='type'>
+          <attribute name='id'>
+            <data type='string'/>
+          </attribute>
+          <optional>
+            <element name='name'><text/></element>
+          </optional>
+          <element name='deviceAPI'>
+            <choice>
+              <value>vfio-pci</value>
+            </choice>
+          </element>
+          <element name='availableInstances'>
+            <ref name='unsignedInt'/>
+          </element>
+        </element>
+      </element>
+   </optional>
+
+    <optional>
       <element name='iommuGroup'>
         <attribute name='number'>
           <ref name='unsignedInt'/>
diff --git a/src/conf/node_device_conf.c b/src/conf/node_device_conf.c
index fdddc97eb..fe4f1bc60 100644
--- a/src/conf/node_device_conf.c
+++ b/src/conf/node_device_conf.c
@@ -87,6 +87,26 @@ virNodeDevCapsDefParseString(const char *xpath,
 }
 
 
+static void
+virNodeDevCapMdevClear(virNodeDevCapMdevPtr mdev)
+{
+    VIR_FREE(mdev->type);
+    VIR_FREE(mdev->name);
+    VIR_FREE(mdev->device_api);
+}
+
+
+void
+virNodeDevCapMdevFree(virNodeDevCapMdevPtr mdev)
+{
+    if (!mdev)
+        return;
+
+    virNodeDevCapMdevClear(mdev);
+    VIR_FREE(mdev);
+}
+
+
 void
 virNodeDeviceDefFree(virNodeDeviceDefPtr def)
 {
@@ -264,6 +284,27 @@ virNodeDeviceCapPCIDefFormat(virBufferPtr buf,
         virBufferAsprintf(buf, "<capability type='%s'/>\n",
                           virPCIHeaderTypeToString(data->pci_dev.hdrType));
     }
+    if (data->pci_dev.flags & VIR_NODE_DEV_CAP_FLAG_PCI_MDEV) {
+        virBufferAddLit(buf, "<capability type='mdev'>\n");
+        virBufferAdjustIndent(buf, 2);
+        for (i = 0; i < data->pci_dev.nmdevs; i++) {
+            virNodeDevCapMdevPtr mdev = data->pci_dev.mdevs[i];
+            virBufferEscapeString(buf, "<type id='%s'>\n", mdev->type);
+            virBufferAdjustIndent(buf, 2);
+            if (mdev->name)
+                virBufferAsprintf(buf, "<name>%s</name>\n",
+                                  mdev->name);
+            virBufferAsprintf(buf, "<deviceAPI>%s</deviceAPI>\n",
+                              mdev->device_api);
+            virBufferAsprintf(buf,
+                              "<availableInstances>%u</availableInstances>\n",
+                              mdev->available_instances);
+            virBufferAdjustIndent(buf, -2);
+            virBufferAddLit(buf, "</type>\n");
+        }
+        virBufferAdjustIndent(buf, -2);
+        virBufferAddLit(buf, "</capability>\n");
+    }
     if (data->pci_dev.nIommuGroupDevices) {
         virBufferAsprintf(buf, "<iommuGroup number='%d'>\n",
                           data->pci_dev.iommuGroupNumber);
@@ -1358,6 +1399,62 @@ virNodeDevPCICapSRIOVParseXML(xmlXPathContextPtr ctxt,
 
 
 static int
+virNodeDevPCICapMediatedDevParseXML(xmlXPathContextPtr ctxt,
+                                    virNodeDevCapPCIDevPtr pci_dev)
+{
+    int ret = -1;
+    xmlNodePtr orignode = NULL;
+    xmlNodePtr *nodes = NULL;
+    int nmdevs = virXPathNodeSet("./type", ctxt, &nodes);
+    virNodeDevCapMdevPtr mdev = NULL;
+    size_t i;
+
+    orignode = ctxt->node;
+    for (i = 0; i < nmdevs; i++) {
+        ctxt->node = nodes[i];
+
+        if (VIR_ALLOC(mdev) < 0)
+            goto cleanup;
+
+        if (!(mdev->type = virXPathString("string(./@id[1])", ctxt))) {
+            virReportError(VIR_ERR_XML_ERROR, "%s",
+                           _("missing 'id' attribute for mediated device's "
+                             "<type> element"));
+            goto cleanup;
+        }
+
+        if (!(mdev->device_api = virXPathString("string(./deviceAPI[1])", ctxt))) {
+            virReportError(VIR_ERR_XML_ERROR,
+                           _("missing device API for mediated device type '%s'"),
+                           mdev->type);
+            goto cleanup;
+        }
+
+        if (virXPathUInt("number(./availableInstances)", ctxt,
+                         &mdev->available_instances) < 0) {
+            virReportError(VIR_ERR_XML_ERROR,
+                           _("missing number of available instances for "
+                             "mediated device type '%s'"),
+                           mdev->type);
+            goto cleanup;
+        }
+
+        mdev->name = virXPathString("string(./name)", ctxt);
+
+        if (VIR_APPEND_ELEMENT(pci_dev->mdevs, pci_dev->nmdevs, mdev) < 0)
+            goto cleanup;
+    }
+
+    pci_dev->flags |= VIR_NODE_DEV_CAP_FLAG_PCI_MDEV;
+    ret = 0;
+ cleanup:
+    virNodeDevCapMdevFree(mdev);
+    ctxt->node = orignode;
+    return ret;
+}
+
+
+static int
 virNodeDevPCICapabilityParseXML(xmlXPathContextPtr ctxt,
                                 xmlNodePtr node,
                                 virNodeDevCapPCIDevPtr pci_dev)
@@ -1382,6 +1479,9 @@ virNodeDevPCICapabilityParseXML(xmlXPathContextPtr ctxt,
     if (sriov_cap &&
         virNodeDevPCICapSRIOVParseXML(ctxt, node, pci_dev, sriov_cap) < 0) {
         goto cleanup;
+    } if (STREQ(type, "mdev") &&
+          virNodeDevPCICapMediatedDevParseXML(ctxt, pci_dev)) {
+          goto cleanup;
     } else {
         int hdrType = virPCIHeaderTypeFromString(type);
 
@@ -1894,6 +1994,9 @@ virNodeDevCapsDefFree(virNodeDevCapsDefPtr caps)
             VIR_FREE(data->pci_dev.iommuGroupDevices[i]);
         VIR_FREE(data->pci_dev.iommuGroupDevices);
         virPCIEDeviceInfoFree(data->pci_dev.pci_express);
+        for (i = 0; i < data->pci_dev.nmdevs; i++)
+            virNodeDevCapMdevFree(data->pci_dev.mdevs[i]);
+        VIR_FREE(data->pci_dev.mdevs);
         break;
     case VIR_NODE_DEV_CAP_USB_DEV:
         VIR_FREE(data->usb_dev.product_name);
diff --git a/src/conf/node_device_conf.h b/src/conf/node_device_conf.h
index 375f97256..883fa017e 100644
--- a/src/conf/node_device_conf.h
+++ b/src/conf/node_device_conf.h
@@ -94,6 +94,7 @@ typedef enum {
     VIR_NODE_DEV_CAP_FLAG_PCI_PHYSICAL_FUNCTION     = (1 << 0),
     VIR_NODE_DEV_CAP_FLAG_PCI_VIRTUAL_FUNCTION      = (1 << 1),
     VIR_NODE_DEV_CAP_FLAG_PCIE                      = (1 << 2),
+    VIR_NODE_DEV_CAP_FLAG_PCI_MDEV                  = (1 << 3),
 } virNodeDevPCICapFlags;
 
 typedef enum {
@@ -132,6 +133,16 @@ struct _virNodeDevCapSystem {
     virNodeDevCapSystemFirmware firmware;
 };
 
+typedef struct _virNodeDevCapMdev virNodeDevCapMdev;
+typedef virNodeDevCapMdev *virNodeDevCapMdevPtr;
+struct _virNodeDevCapMdev {
+    char *type;
+    char *name;
+    char *device_api;
+    unsigned int available_instances;
+    unsigned int iommuGroupNumber;
+};
+
 typedef struct _virNodeDevCapPCIDev virNodeDevCapPCIDev;
 typedef virNodeDevCapPCIDev *virNodeDevCapPCIDevPtr;
 struct _virNodeDevCapPCIDev {
@@ -155,6 +166,8 @@ struct _virNodeDevCapPCIDev {
     int numa_node;
     virPCIEDeviceInfoPtr pci_express;
     int hdrType; /* enum virPCIHeaderType or -1 */
+    virNodeDevCapMdevPtr *mdevs;
+    size_t nmdevs;
 };
 
 typedef struct _virNodeDevCapUSBDev virNodeDevCapUSBDev;
@@ -263,6 +276,7 @@ struct _virNodeDevCapData {
         virNodeDevCapStorage storage;
         virNodeDevCapSCSIGeneric sg;
         virNodeDevCapDRM drm;
+        virNodeDevCapMdev mdev;
     };
 };
 
@@ -339,6 +353,9 @@ virNodeDeviceDefFree(virNodeDeviceDefPtr def);
 void
 virNodeDevCapsDefFree(virNodeDevCapsDefPtr caps);
 
+void
+virNodeDevCapMdevFree(virNodeDevCapMdevPtr mdev);
+
 # define VIR_CONNECT_LIST_NODE_DEVICES_FILTERS_CAP \
                 (VIR_CONNECT_LIST_NODE_DEVICES_CAP_SYSTEM        | \
                  VIR_CONNECT_LIST_NODE_DEVICES_CAP_PCI_DEV       | \
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index 181e17875..d1e872e60 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -665,6 +665,7 @@ virNetDevIPRouteParseXML;
 
 
 # conf/node_device_conf.h
+virNodeDevCapMdevFree;
 virNodeDevCapsDefFree;
 virNodeDevCapTypeFromString;
 virNodeDevCapTypeToString;
diff --git a/src/node_device/node_device_udev.c b/src/node_device/node_device_udev.c
index 95c1aee29..79f1537d9 100644
--- a/src/node_device/node_device_udev.c
+++ b/src/node_device/node_device_udev.c
@@ -314,6 +314,133 @@ static int udevTranslatePCIIds(unsigned int vendor,
 }
 
 
+static int
+udevGetMdevCaps(struct udev_device *device,
+                const char *sysfspath,
+                virNodeDevCapMdevPtr mdev)
+{
+    int ret = -1;
+    char *attrpath = NULL;  /* relative path to the actual sysfs attribute */
+    const char *devpath = NULL;   /* base sysfs path as reported by udev */
+    const char *relpath = NULL;   /* diff between @sysfspath and @devpath */
+    char *tmp = NULL;
+
+#define MDEV_GET_SYSFS_ATTR(attr_name, dir, cb, ...)                        \
+    do {                                                                    \
+        if (virAsprintf(&attrpath, "%s/%s", dir, #attr_name) < 0)           \
+            goto cleanup;                                                   \
+                                                                            \
+        if (cb(device, attrpath, __VA_ARGS__) < 0)                          \
+            goto cleanup;                                                   \
+                                                                            \
+        VIR_FREE(attrpath);                                                 \
+    } while (0)                                                             \
+
+    /* UDEV doesn't report attributes under subdirectories by default but is
+     * able to query them if the path to the attribute is relative to the
+     * device's base path, e.g. /sys/devices/../0000:00:01.0/ is the device's
+     * base path as udev reports it, but we're interested in attributes under
+     * /sys/devices/../0000:00:01.0/mdev_supported_types/<type>/. So, let's
+     * strip the common part of the path and let udev chew the relative bit.
+     */
+    devpath = udev_device_get_syspath(device);
+    relpath = sysfspath + strlen(devpath);
+
+    /* When calling from the mdev child device, @sysfspath is a symbolic link
+     * to the actual mdev type (rather than a physical path), so we need to
+     * resolve it in order to get the type's name.
+     */
+    if (virFileResolveLink(sysfspath, &tmp) < 0)
+        goto cleanup;
+
+    if (VIR_STRDUP(mdev->type, last_component(tmp)) < 0)
+        goto cleanup;
+
+    MDEV_GET_SYSFS_ATTR(name, relpath,
+                        udevGetStringSysfsAttr, &mdev->name);
+    MDEV_GET_SYSFS_ATTR(device_api, relpath,
+                        udevGetStringSysfsAttr, &mdev->device_api);
+    MDEV_GET_SYSFS_ATTR(available_instances, relpath,
+                        udevGetUintSysfsAttr, &mdev->available_instances, 10);
+
+#undef MDEV_GET_SYSFS_ATTR
+
+    ret = 0;
+ cleanup:
+    VIR_FREE(attrpath);
+    VIR_FREE(tmp);
+    return ret;
+}
+
+
+static int
+udevPCIGetMdevCaps(struct udev_device *device,
+                   virNodeDevCapPCIDevPtr pcidata)
+{
+    int ret = -1;
+    int direrr = -1;
+    DIR *dir = NULL;
+    struct dirent *entry;
+    char *path = NULL;
+    char *tmppath = NULL;
+    virNodeDevCapMdevPtr mdev = NULL;
+    virNodeDevCapMdevPtr *mdevs = NULL;
+    size_t nmdevs = 0;
+    size_t i;
+
+    if (virAsprintf(&path, "%s/mdev_supported_types",
+                    udev_device_get_syspath(device)) < 0)
+        return -1;
+
+    if ((direrr = virDirOpenIfExists(&dir, path)) < 0)
+        goto cleanup;
+
+    if (direrr == 0) {
+        ret = 0;
+        goto cleanup;
+    }
+
+    if (VIR_ALLOC(mdevs) < 0)
+        goto cleanup;
+
+    /* since udev doesn't provide means to list other than top-level
+     * attributes, we need to scan the subdirectories ourselves
+     */
+    while ((direrr = virDirRead(dir, &entry, path)) > 0) {
+        if (VIR_ALLOC(mdev) < 0)
+            goto cleanup;
+
+        if (virAsprintf(&tmppath, "%s/%s", path, entry->d_name) < 0)
+            goto cleanup;
+
+        if (udevGetMdevCaps(device, tmppath, mdev) < 0)
+            goto cleanup;
+
+        if (VIR_APPEND_ELEMENT(mdevs, nmdevs, mdev) < 0)
+            goto cleanup;
+
+        VIR_FREE(tmppath);
+    }
+
+    if (direrr < 0)
+        goto cleanup;
+
+    VIR_STEAL_PTR(pcidata->mdevs, mdevs);
+    pcidata->nmdevs = nmdevs;
+    nmdevs = 0;
+    ret = 0;
+ cleanup:
+    virNodeDevCapMdevFree(mdev);
+    for (i = 0; i < nmdevs; i++)
+        virNodeDevCapMdevFree(mdevs[i]);
+    VIR_FREE(mdevs);
+    VIR_FREE(path);
+    VIR_FREE(tmppath);
+    VIR_DIR_CLOSE(dir);
+    return ret;
+}
+
+
 static int udevProcessPCI(struct udev_device *device,
                           virNodeDeviceDefPtr def)
 {
@@ -400,6 +527,12 @@ static int udevProcessPCI(struct udev_device *device,
         }
     }
 
+    /* check whether the device is mediated devices framework capable, if so,
+     * process it
+     */
+    if (udevPCIGetMdevCaps(device, pci_dev) < 0)
+        goto cleanup;
+
     ret = 0;
 
  cleanup:
diff --git a/tests/nodedevschemadata/pci_0000_02_10_7_mdev.xml b/tests/nodedevschemadata/pci_0000_02_10_7_mdev.xml
new file mode 100644
index 000000000..b745686d3
--- /dev/null
+++ b/tests/nodedevschemadata/pci_0000_02_10_7_mdev.xml
@@ -0,0 +1,27 @@
+<device>
+  <name>pci_0000_02_10_7</name>
+  <parent>pci_0000_00_04_0</parent>
+  <capability type='pci'>
+    <domain>0</domain>
+    <bus>2</bus>
+    <slot>16</slot>
+    <function>7</function>
+    <product id='0x10ca'>82576 Virtual Function</product>
+    <vendor id='0x8086'>Intel Corporation</vendor>
+    <capability type='mdev'>
+      <type id='foo'>
+        <name>bar</name>
+        <deviceAPI>vfio-pci</deviceAPI>
+        <availableInstances>1</availableInstances>
+      </type>
+    </capability>
+    <iommuGroup number='31'>
+      <address domain='0x0000' bus='0x02' slot='0x10' function='0x7'/>
+    </iommuGroup>
+    <numa node='0'/>
+    <pci-express>
+      <link validity='cap' port='0' speed='2.5' width='4'/>
+      <link validity='sta' width='0'/>
+    </pci-express>
+  </capability>
+</device>
-- 
2.12.2




More information about the libvir-list mailing list