[libvirt] [PATCH] nodedev: add switchdev to NIC capabilities

Edan David edand at mellanox.com
Wed Aug 2 10:00:51 UTC 2017


Adding functionality to libvirt that will allow it query the interface
for the availability of switchdev Offloading NIC capabilities
---
 configure.ac                                      |  13 ++
 docs/formatnode.html.in                           |   1 +
 src/util/virnetdev.c                              | 204 +++++++++++++++++++++-
 src/util/virnetdev.h                              |   1 +
 tests/nodedevschemadata/net_00_13_02_b9_f9_d3.xml |   1 +
 tests/nodedevschemadata/net_00_15_58_2f_e9_55.xml |   1 +
 6 files changed, 220 insertions(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index afacf40..a050b99 100644
--- a/configure.ac
+++ b/configure.ac
@@ -627,6 +627,19 @@ if test "$with_linux" = "yes"; then
     AC_CHECK_HEADERS([linux/btrfs.h])
 fi
 
+dnl
+dnl check for kernel headers required by devlink
+dnl
+if test "$with_linux" = "yes"; then
+    AC_CHECK_HEADERS([linux/devlink.h])
+    AC_CHECK_DECLS([DEVLINK_GENL_VERSION, DEVLINK_GENL_NAME, DEVLINK_ATTR_MAX, DEVLINK_CMD_ESWITCH_GET, DEVLINK_ATTR_BUS_NAME, DEVLINK_ATTR_DEV_NAME, DEVLINK_ATTR_ESWITCH_MODE, DEVLINK_ESWITCH_MODE_SWITCHDEV],
+                   [AC_DEFINE([HAVE_DECL_DEVLINK],
+                              [1],
+                              [whether devlink declarations is available])],
+                   [],
+                   [[#include <linux/devlink.h>]])
+fi
+
 dnl Allow perl/python overrides
 AC_PATH_PROGS([PYTHON], [python2 python])
 if test -z "$PYTHON"; then
diff --git a/docs/formatnode.html.in b/docs/formatnode.html.in
index 32451d5..e7b30ea 100644
--- a/docs/formatnode.html.in
+++ b/docs/formatnode.html.in
@@ -227,6 +227,7 @@
                     <dt><code>rxhash</code></dt><dd>receive-hashing</dd>
                     <dt><code>rdma</code></dt><dd>remote-direct-memory-access</dd>
                     <dt><code>txudptnl</code></dt><dd>tx-udp-tunnel-segmentation</dd>
+                    <dt><code>switchdev</code></dt><dd>kernel-forward-plane-offload</dd>
                 </dl>
               </dd>
               <dt><code>capability</code></dt>
diff --git a/src/util/virnetdev.c b/src/util/virnetdev.c
index 90b7bee..084fb41 100644
--- a/src/util/virnetdev.c
+++ b/src/util/virnetdev.c
@@ -59,6 +59,10 @@
 # include <net/if_dl.h>
 #endif
 
+#if HAVE_DECL_DEVLINK
+# include <linux/devlink.h>
+#endif
+
 #ifndef IFNAMSIZ
 # define IFNAMSIZ 16
 #endif
@@ -95,6 +99,7 @@ VIR_LOG_INIT("util.netdev");
     (FEATURE_WORD(blocks, index, field) & FEATURE_FIELD_FLAG(index))
 #endif
 
+
 typedef enum {
     VIR_MCAST_TYPE_INDEX_TOKEN,
     VIR_MCAST_TYPE_NAME_TOKEN,
@@ -2396,7 +2401,8 @@ VIR_ENUM_IMPL(virNetDevFeature,
               "ntuple",
               "rxhash",
               "rdma",
-              "txudptnl")
+              "txudptnl",
+              "switchdev")
 
 #ifdef __linux__
 int
@@ -2851,6 +2857,199 @@ int virNetDevGetRxFilter(const char *ifname,
     return ret;
 }
 
+
+
+#if HAVE_DECL_DEVLINK
+/**
+ * virNetDevPutExtraHeader
+ * reserve and prepare room for an extra header
+ * This function sets to zero the room that is required to put the extra
+ * header after the initial Netlink header. This function also increases
+ * the nlmsg_len field. You have to invoke mnl_nlmsg_put_header() before
+ * you call this function. This function returns a pointer to the extra
+ * header.
+ *
+ * @nlh: pointer to Netlink header
+ * @size: size of the extra header that we want to put
+ *
+ * Returns pointer to the start of the extended header
+ */
+static void *
+virNetDevPutExtraHeader(struct nlmsghdr *nlh, size_t size)
+{
+    char *ptr = (char *)nlh + nlh->nlmsg_len;
+    size_t len = NLMSG_ALIGN(size);
+    nlh->nlmsg_len += len;
+    memset(ptr, 0, len);
+    return ptr;
+}
+
+
+/**
+ * virNetDevGetFamilyId:
+ * This function supplies the devlink family id
+ *
+ * @family_name: the name of the family to query
+ *
+ * Returns family id or 0 on failure.
+ */
+static int
+virNetDevGetFamilyId(const char *family_name)
+{
+    struct nl_msg *nl_msg = NULL;
+    struct nlmsghdr *resp = NULL;
+    struct genlmsghdr* gmsgh = NULL;
+    struct nlattr *tb[CTRL_ATTR_MAX + 1] = {NULL, };
+    unsigned int recvbuflen;
+    uint32_t family_id = 0;
+
+    if (!(nl_msg = nlmsg_alloc_simple(GENL_ID_CTRL,
+                                      NLM_F_REQUEST | NLM_F_ACK))) {
+        virReportOOMError();
+        goto cleanup;
+    }
+
+    gmsgh = virNetDevPutExtraHeader(nlmsg_hdr(nl_msg), sizeof(struct genlmsghdr));
+    if (!gmsgh)
+        goto cleanup;
+
+    gmsgh->cmd = CTRL_CMD_GETFAMILY;
+    gmsgh->version = DEVLINK_GENL_VERSION;
+
+    if (nla_put_string(nl_msg, CTRL_ATTR_FAMILY_NAME, family_name) < 0)
+        goto buffer_too_small;
+
+    if (virNetlinkCommand(nl_msg, &resp, &recvbuflen, 0, 0, NETLINK_GENERIC, 0) < 0)
+        goto cleanup;
+
+    if (nlmsg_parse(resp, sizeof(struct nlmsghdr), tb, CTRL_CMD_MAX, NULL) < 0)
+        goto malformed_resp;
+
+    if (tb[CTRL_ATTR_FAMILY_ID] == NULL)
+        goto cleanup;
+
+    family_id = *(int *)RTA_DATA(tb[CTRL_ATTR_FAMILY_ID]);
+
+ cleanup:
+    nlmsg_free(nl_msg);
+    VIR_FREE(resp);
+    return family_id;
+
+ malformed_resp:
+    virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+                   _("malformed netlink response message"));
+    goto cleanup;
+
+ buffer_too_small:
+    virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+                   _("allocated netlink buffer is too small"));
+    goto cleanup;
+}
+
+
+/**
+ * virNetDevSwitchdevFeature
+ * This function checks for the availability of Switchdev feature
+ * and add it to bitmap
+ *
+ * @ifname: name of the interface
+ * @out: add Switchdev feature if exist to bitmap
+ *
+ * Returns 0 on success, -1 on failure.
+ */
+static int
+virNetDevSwitchdevFeature(const char *ifname,
+                          virBitmapPtr *out)
+{
+    struct nl_msg *nl_msg = NULL;
+    struct nlmsghdr *resp = NULL;
+    unsigned int recvbuflen;
+    struct nlattr *tb[DEVLINK_ATTR_MAX + 1] = {NULL, };
+    virPCIDevicePtr pci_device_ptr = NULL;
+    struct genlmsghdr* gmsgh = NULL;
+    const char *pci_name;
+    char *pfname = NULL;
+    int func_ret_val = -1;
+    int ret = -1;
+
+    int family_id = virNetDevGetFamilyId(DEVLINK_GENL_NAME);
+    if (family_id == 0)
+        goto cleanup;
+
+    func_ret_val = virNetDevIsVirtualFunction(ifname);
+    if (func_ret_val == 1) {
+        if (virNetDevGetPhysicalFunction(ifname, &pfname) < 0)
+            goto cleanup;
+    }
+    else if (func_ret_val < 0)
+        goto cleanup;
+
+    if (!(nl_msg = nlmsg_alloc_simple(family_id,
+                                      NLM_F_REQUEST | NLM_F_ACK))) {
+        virReportOOMError();
+        goto cleanup;
+    }
+
+    pci_device_ptr = pfname ? virNetDevGetPCIDevice(pfname) :
+                              virNetDevGetPCIDevice(ifname);
+    if (!pci_device_ptr)
+        goto cleanup;
+
+    pci_name = virPCIDeviceGetName(pci_device_ptr);
+
+    gmsgh = virNetDevPutExtraHeader(nlmsg_hdr(nl_msg), sizeof(struct genlmsghdr));
+    if (!gmsgh)
+        goto cleanup;
+
+    gmsgh->cmd = DEVLINK_CMD_ESWITCH_GET;
+    gmsgh->version = DEVLINK_GENL_VERSION;
+
+    if (nla_put(nl_msg, DEVLINK_ATTR_BUS_NAME, strlen("pci")+1, "pci") < 0)
+        goto buffer_too_small;
+
+    if (nla_put(nl_msg, DEVLINK_ATTR_DEV_NAME, strlen(pci_name)+1, pci_name) < 0)
+        goto buffer_too_small;
+
+    if (virNetlinkCommand(nl_msg, &resp, &recvbuflen, 0, 0,
+                          NETLINK_GENERIC, 0) < 0)
+        goto cleanup;
+
+    if (nlmsg_parse(resp, sizeof(struct genlmsghdr), tb, DEVLINK_ATTR_MAX, NULL) < 0)
+        goto malformed_resp;
+
+    if (tb[DEVLINK_ATTR_ESWITCH_MODE] &&
+        *(int *)RTA_DATA(tb[DEVLINK_ATTR_ESWITCH_MODE]) == DEVLINK_ESWITCH_MODE_SWITCHDEV) {
+        ignore_value(virBitmapSetBit(*out, VIR_NET_DEV_FEAT_SWITCHDEV));
+    }
+
+    ret = 0;
+
+ cleanup:
+    nlmsg_free(nl_msg);
+    virPCIDeviceFree(pci_device_ptr);
+    VIR_FREE(resp);
+    return ret;
+
+ malformed_resp:
+    virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+                   _("malformed netlink response message"));
+    goto cleanup;
+
+ buffer_too_small:
+    virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+                   _("allocated netlink buffer is too small"));
+    goto cleanup;
+}
+# else
+static int
+virNetDevSwitchdevFeature(const char *ifname,
+                          virBitmapPtr *out)
+{
+    return 0;
+}
+# endif
+
+
 #if defined(SIOCETHTOOL) && defined(HAVE_STRUCT_IFREQ)
 
 /**
@@ -3230,6 +3429,9 @@ virNetDevGetFeatures(const char *ifname,
     if (virNetDevRDMAFeature(ifname, out) < 0)
         goto cleanup;
 
+    if (virNetDevSwitchdevFeature(ifname, out) < 0)
+        goto cleanup;
+
     ret = 0;
  cleanup:
     VIR_FORCE_CLOSE(fd);
diff --git a/src/util/virnetdev.h b/src/util/virnetdev.h
index 2e9a9c4..8fd6036 100644
--- a/src/util/virnetdev.h
+++ b/src/util/virnetdev.h
@@ -112,6 +112,7 @@ typedef enum {
     VIR_NET_DEV_FEAT_RXHASH,
     VIR_NET_DEV_FEAT_RDMA,
     VIR_NET_DEV_FEAT_TXUDPTNL,
+    VIR_NET_DEV_FEAT_SWITCHDEV,
     VIR_NET_DEV_FEAT_LAST
 } virNetDevFeature;
 
diff --git a/tests/nodedevschemadata/net_00_13_02_b9_f9_d3.xml b/tests/nodedevschemadata/net_00_13_02_b9_f9_d3.xml
index d4c96e8..88252e6 100644
--- a/tests/nodedevschemadata/net_00_13_02_b9_f9_d3.xml
+++ b/tests/nodedevschemadata/net_00_13_02_b9_f9_d3.xml
@@ -15,6 +15,7 @@
     <feature name='rxhash'/>
     <feature name='rdma'/>
     <feature name='txudptnl'/>
+    <feature name='switchdev'/>
     <capability type='80211'/>
   </capability>
 </device>
diff --git a/tests/nodedevschemadata/net_00_15_58_2f_e9_55.xml b/tests/nodedevschemadata/net_00_15_58_2f_e9_55.xml
index 71bf90e..f77dfcc 100644
--- a/tests/nodedevschemadata/net_00_15_58_2f_e9_55.xml
+++ b/tests/nodedevschemadata/net_00_15_58_2f_e9_55.xml
@@ -15,6 +15,7 @@
     <feature name='rxhash'/>
     <feature name='rdma'/>
     <feature name='txudptnl'/>
+    <feature name='switchdev'/>
     <capability type='80203'/>
   </capability>
 </device>
-- 
2.1.4




More information about the libvir-list mailing list