[libvirt] [PATCH] nodedev: add switchdev to NIC capabilities

John Ferlan jferlan at redhat.com
Wed Aug 16 20:28:21 UTC 2017



On 08/02/2017 06:00 AM, Edan David wrote:
> Adding functionality to libvirt that will allow it query the interface
> for the availability of switchdev Offloading NIC capabilities

Not my area of expertise, but I'll point out some "basic" stuff...

You really should have considered adding some of what you added to :

https://www.redhat.com/archives/libvir-list/2017-August/msg00072.html

into your commit message to describe what you're doing and why

If only one patch, then the details/links or background can go after the
following "---" in order to help the reviewer get some extra details. If
you had multiple patches, then that'd be part of the cover letter


> ---
>  configure.ac                                      |  13 ++
>  docs/formatnode.html.in                           |   1 +
>  src/util/virnetdev.c                              | 204 +++++++++++++++++++++-
>  src/util/virnetdev.h                              |   1 +
>  tests/nodedevschemadata/net_00_13_02_b9_f9_d3.xml |   1 +
>  tests/nodedevschemadata/net_00_15_58_2f_e9_55.xml |   1 +
>  6 files changed, 220 insertions(+), 1 deletion(-)
> 

Before sending patches, please be sure to run w/ "make check
syntax-check" - this one fails syntax-check in a few places:


preprocessor_indentation
cppi: src/util/virnetdev.c: line 3128: not properly indented
cppi: src/util/virnetdev.c: line 3135: not properly indented
maint.mk: incorrect preprocessor indentation
cfg.mk:684: recipe for target 'sc_preprocessor_indentation' failed
make: *** [sc_preprocessor_indentation] Error 1



> diff --git a/configure.ac b/configure.ac
> index afacf40..a050b99 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -627,6 +627,19 @@ if test "$with_linux" = "yes"; then
>      AC_CHECK_HEADERS([linux/btrfs.h])
>  fi
>  
> +dnl
> +dnl check for kernel headers required by devlink
> +dnl
> +if test "$with_linux" = "yes"; then
> +    AC_CHECK_HEADERS([linux/devlink.h])
> +    AC_CHECK_DECLS([DEVLINK_GENL_VERSION, DEVLINK_GENL_NAME, DEVLINK_ATTR_MAX, DEVLINK_CMD_ESWITCH_GET, DEVLINK_ATTR_BUS_NAME, DEVLINK_ATTR_DEV_NAME, DEVLINK_ATTR_ESWITCH_MODE, DEVLINK_ESWITCH_MODE_SWITCHDEV],
> +                   [AC_DEFINE([HAVE_DECL_DEVLINK],
> +                              [1],
> +                              [whether devlink declarations is available])],
> +                   [],
> +                   [[#include <linux/devlink.h>]])
> +fi
> +
>  dnl Allow perl/python overrides
>  AC_PATH_PROGS([PYTHON], [python2 python])
>  if test -z "$PYTHON"; then
> diff --git a/docs/formatnode.html.in b/docs/formatnode.html.in
> index 32451d5..e7b30ea 100644
> --- a/docs/formatnode.html.in
> +++ b/docs/formatnode.html.in
> @@ -227,6 +227,7 @@
>                      <dt><code>rxhash</code></dt><dd>receive-hashing</dd>
>                      <dt><code>rdma</code></dt><dd>remote-direct-memory-access</dd>
>                      <dt><code>txudptnl</code></dt><dd>tx-udp-tunnel-segmentation</dd>
> +                    <dt><code>switchdev</code></dt><dd>kernel-forward-plane-offload</dd>
>                  </dl>
>                </dd>
>                <dt><code>capability</code></dt>
> diff --git a/src/util/virnetdev.c b/src/util/virnetdev.c
> index 90b7bee..084fb41 100644
> --- a/src/util/virnetdev.c
> +++ b/src/util/virnetdev.c
> @@ -59,6 +59,10 @@
>  # include <net/if_dl.h>
>  #endif
>  
> +#if HAVE_DECL_DEVLINK
> +# include <linux/devlink.h>
> +#endif
> +
>  #ifndef IFNAMSIZ
>  # define IFNAMSIZ 16
>  #endif
> @@ -95,6 +99,7 @@ VIR_LOG_INIT("util.netdev");
>      (FEATURE_WORD(blocks, index, field) & FEATURE_FIELD_FLAG(index))
>  #endif
>  
> +

Spurious whitespace

>  typedef enum {
>      VIR_MCAST_TYPE_INDEX_TOKEN,
>      VIR_MCAST_TYPE_NAME_TOKEN,
> @@ -2396,7 +2401,8 @@ VIR_ENUM_IMPL(virNetDevFeature,
>                "ntuple",
>                "rxhash",
>                "rdma",
> -              "txudptnl")
> +              "txudptnl",
> +              "switchdev")
>  
>  #ifdef __linux__
>  int
> @@ -2851,6 +2857,199 @@ int virNetDevGetRxFilter(const char *ifname,
>      return ret;
>  }
>  
> +
> +

Only 2 lines between functions, not 3

> +#if HAVE_DECL_DEVLINK
> +/**
> + * virNetDevPutExtraHeader
> + * reserve and prepare room for an extra header
> + * This function sets to zero the room that is required to put the extra
> + * header after the initial Netlink header. This function also increases
> + * the nlmsg_len field. You have to invoke mnl_nlmsg_put_header() before
> + * you call this function. This function returns a pointer to the extra
> + * header.

This seems like a cut-n-paste from somewhere - it's not fully formed and
certainly what it says to invoke isn't invoked prior to this call!

> + *
> + * @nlh: pointer to Netlink header
> + * @size: size of the extra header that we want to put
> + *
> + * Returns pointer to the start of the extended header
> + */
> +static void *
> +virNetDevPutExtraHeader(struct nlmsghdr *nlh, size_t size)

use multiple lines for multiple args:

e.g.

    virNetDevPutExtraHeader(struct nlmsghdr *nlh,
                            size_t size)

> +{
> +    char *ptr = (char *)nlh + nlh->nlmsg_len;
> +    size_t len = NLMSG_ALIGN(size);

None of those NLMSG_* macros do the trick?

> +    nlh->nlmsg_len += len;
> +    memset(ptr, 0, len);
> +    return ptr;
> +}
> +
> +
> +/**
> + * virNetDevGetFamilyId:
> + * This function supplies the devlink family id
> + *
> + * @family_name: the name of the family to query
> + *
> + * Returns family id or 0 on failure.
> + */
> +static int
> +virNetDevGetFamilyId(const char *family_name)
> +{
> +    struct nl_msg *nl_msg = NULL;
> +    struct nlmsghdr *resp = NULL;
> +    struct genlmsghdr* gmsgh = NULL;
> +    struct nlattr *tb[CTRL_ATTR_MAX + 1] = {NULL, };
> +    unsigned int recvbuflen;
> +    uint32_t family_id = 0;

uint32_t being returned to an int function.

> +
> +    if (!(nl_msg = nlmsg_alloc_simple(GENL_ID_CTRL,
> +                                      NLM_F_REQUEST | NLM_F_ACK))) {
> +        virReportOOMError();
> +        goto cleanup;
> +    }
> +
> +    gmsgh = virNetDevPutExtraHeader(nlmsg_hdr(nl_msg), sizeof(struct genlmsghdr));
> +    if (!gmsgh)
> +        goto cleanup;

if (!(gmsgh = virNet*()))
    goto cleanup;

> +
> +    gmsgh->cmd = CTRL_CMD_GETFAMILY;
> +    gmsgh->version = DEVLINK_GENL_VERSION;
> +
> +    if (nla_put_string(nl_msg, CTRL_ATTR_FAMILY_NAME, family_name) < 0)
> +        goto buffer_too_small;

Only one place to have the goto - just put the error message inline and
goto cleanup;

> +
> +    if (virNetlinkCommand(nl_msg, &resp, &recvbuflen, 0, 0, NETLINK_GENERIC, 0) < 0)
> +        goto cleanup;
> +
> +    if (nlmsg_parse(resp, sizeof(struct nlmsghdr), tb, CTRL_CMD_MAX, NULL) < 0)
> +        goto malformed_resp;

same here

> +
> +    if (tb[CTRL_ATTR_FAMILY_ID] == NULL)
> +        goto cleanup;

Is this the case where this isn't really supported?

I wonder if perhaps this function should return -1 on failures, 0 on
unsupported, and some value on supported.  After Moshe Levi added the
rdma and txudptnl code (commit id 'ac3ed2085') there was a bug that was
reported by testing that I fixed via commit id '136f17efd'. There may
have been something else too, but it's been too long to remember!

I guess it comes down to the reason why this would be NULL when
HAVE_DECL_DEVLINK is set and whether you really want things like "virsh
nodedev-list --cap net" to fail in that case.

> +
> +    family_id = *(int *)RTA_DATA(tb[CTRL_ATTR_FAMILY_ID]);
> +
> + cleanup:
> +    nlmsg_free(nl_msg);
> +    VIR_FREE(resp);
> +    return family_id;
> +
> + malformed_resp:
> +    virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> +                   _("malformed netlink response message"));
> +    goto cleanup;
> +
> + buffer_too_small:
> +    virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> +                   _("allocated netlink buffer is too small"));
> +    goto cleanup;
> +}
> +
> +
> +/**
> + * virNetDevSwitchdevFeature
> + * This function checks for the availability of Switchdev feature
> + * and add it to bitmap
> + *
> + * @ifname: name of the interface
> + * @out: add Switchdev feature if exist to bitmap
> + *
> + * Returns 0 on success, -1 on failure.
> + */
> +static int
> +virNetDevSwitchdevFeature(const char *ifname,
> +                          virBitmapPtr *out)
> +{
> +    struct nl_msg *nl_msg = NULL;
> +    struct nlmsghdr *resp = NULL;
> +    unsigned int recvbuflen;
> +    struct nlattr *tb[DEVLINK_ATTR_MAX + 1] = {NULL, };
> +    virPCIDevicePtr pci_device_ptr = NULL;
> +    struct genlmsghdr* gmsgh = NULL;
> +    const char *pci_name;
> +    char *pfname = NULL;
> +    int func_ret_val = -1;
> +    int ret = -1;
> +
> +    int family_id = virNetDevGetFamilyId(DEVLINK_GENL_NAME);
> +    if (family_id == 0)
> +        goto cleanup;

    uint32_t family_id;

    if ((family_id = virNetDevGetFamilyId(DEVLINK_GENL_NAME)) == 0)
        return -1

of course depends on whether you decide to use -1, 0, & value type
returns, but you get the general gist.

> +
> +    func_ret_val = virNetDevIsVirtualFunction(ifname);
> +    if (func_ret_val == 1) {
> +        if (virNetDevGetPhysicalFunction(ifname, &pfname) < 0)
> +            goto cleanup;
> +    }
> +    else if (func_ret_val < 0)
> +        goto cleanup;

    if ((is_vf = virNetDevIsVirtualFunction(ifname)) < 0)
        return -1

    if (is_vf == 1 && virNetDevGetPhysicalFunction(ifname, &pfname) < 0)
        return -1;

> +
> +    if (!(nl_msg = nlmsg_alloc_simple(family_id,
> +                                      NLM_F_REQUEST | NLM_F_ACK))) {
> +        virReportOOMError();
> +        goto cleanup;

           return -1;

> +    }
> +
> +    pci_device_ptr = pfname ? virNetDevGetPCIDevice(pfname) :
> +                              virNetDevGetPCIDevice(ifname);
> +    if (!pci_device_ptr)
> +        goto cleanup;
> +
> +    pci_name = virPCIDeviceGetName(pci_device_ptr);
> +
> +    gmsgh = virNetDevPutExtraHeader(nlmsg_hdr(nl_msg), sizeof(struct genlmsghdr));
> +    if (!gmsgh)
> +        goto cleanup;

could just be "if (!(gsmsg = virNetDev*()))"

> +
> +    gmsgh->cmd = DEVLINK_CMD_ESWITCH_GET;
> +    gmsgh->version = DEVLINK_GENL_VERSION;
> +
> +    if (nla_put(nl_msg, DEVLINK_ATTR_BUS_NAME, strlen("pci")+1, "pci") < 0)
> +        goto buffer_too_small;
> +
> +    if (nla_put(nl_msg, DEVLINK_ATTR_DEV_NAME, strlen(pci_name)+1, pci_name) < 0)
> +        goto buffer_too_small;

Not my preferred style to have multiple goto's like this, but I see it's
generally used in this manner for the various virnetdev*.c

BTW: I think it looks better as something like:

    if ((nla_put() < 0) ||
        (nla_put() < 0)) {
        virReportError();
        goto cleanup;

> +
> +    if (virNetlinkCommand(nl_msg, &resp, &recvbuflen, 0, 0,
> +                          NETLINK_GENERIC, 0) < 0)
> +        goto cleanup;
> +
> +    if (nlmsg_parse(resp, sizeof(struct genlmsghdr), tb, DEVLINK_ATTR_MAX, NULL) < 0)
> +        goto malformed_resp;

For this one, there's no need to have the additional goto as there's
only one place that
> +
> +    if (tb[DEVLINK_ATTR_ESWITCH_MODE] &&
> +        *(int *)RTA_DATA(tb[DEVLINK_ATTR_ESWITCH_MODE]) == DEVLINK_ESWITCH_MODE_SWITCHDEV) {
> +        ignore_value(virBitmapSetBit(*out, VIR_NET_DEV_FEAT_SWITCHDEV));
> +    }
> +
> +    ret = 0;
> +
> + cleanup:
> +    nlmsg_free(nl_msg);
> +    virPCIDeviceFree(pci_device_ptr);
> +    VIR_FREE(resp);
> +    return ret;
> +
> + malformed_resp:
> +    virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> +                   _("malformed netlink response message"));
> +    goto cleanup;
> +
> + buffer_too_small:
> +    virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> +                   _("allocated netlink buffer is too small"));
> +    goto cleanup;
> +}
> +# else
> +static int
> +virNetDevSwitchdevFeature(const char *ifname,
> +                          virBitmapPtr *out)

You need to add ATTRIBUTE_UNUSED after each argument like
virNetDevGetEthtoolGFeatures

> +{
> +    return 0;
> +}
> +# endif
> +
> +

Since this is called after virNetDevRDMAFeature it could have gone after
the hunk that supports that - IOW: this next hunk. Not a requirement -
it's one of those type A code flow things ;-)

John

>  #if defined(SIOCETHTOOL) && defined(HAVE_STRUCT_IFREQ)
>  
>  /**
> @@ -3230,6 +3429,9 @@ virNetDevGetFeatures(const char *ifname,
>      if (virNetDevRDMAFeature(ifname, out) < 0)
>          goto cleanup;
>  
> +    if (virNetDevSwitchdevFeature(ifname, out) < 0)
> +        goto cleanup;
> +
>      ret = 0;
>   cleanup:
>      VIR_FORCE_CLOSE(fd);
> diff --git a/src/util/virnetdev.h b/src/util/virnetdev.h
> index 2e9a9c4..8fd6036 100644
> --- a/src/util/virnetdev.h
> +++ b/src/util/virnetdev.h
> @@ -112,6 +112,7 @@ typedef enum {
>      VIR_NET_DEV_FEAT_RXHASH,
>      VIR_NET_DEV_FEAT_RDMA,
>      VIR_NET_DEV_FEAT_TXUDPTNL,
> +    VIR_NET_DEV_FEAT_SWITCHDEV,
>      VIR_NET_DEV_FEAT_LAST
>  } virNetDevFeature;
>  
> diff --git a/tests/nodedevschemadata/net_00_13_02_b9_f9_d3.xml b/tests/nodedevschemadata/net_00_13_02_b9_f9_d3.xml
> index d4c96e8..88252e6 100644
> --- a/tests/nodedevschemadata/net_00_13_02_b9_f9_d3.xml
> +++ b/tests/nodedevschemadata/net_00_13_02_b9_f9_d3.xml
> @@ -15,6 +15,7 @@
>      <feature name='rxhash'/>
>      <feature name='rdma'/>
>      <feature name='txudptnl'/>
> +    <feature name='switchdev'/>
>      <capability type='80211'/>
>    </capability>
>  </device>
> diff --git a/tests/nodedevschemadata/net_00_15_58_2f_e9_55.xml b/tests/nodedevschemadata/net_00_15_58_2f_e9_55.xml
> index 71bf90e..f77dfcc 100644
> --- a/tests/nodedevschemadata/net_00_15_58_2f_e9_55.xml
> +++ b/tests/nodedevschemadata/net_00_15_58_2f_e9_55.xml
> @@ -15,6 +15,7 @@
>      <feature name='rxhash'/>
>      <feature name='rdma'/>
>      <feature name='txudptnl'/>
> +    <feature name='switchdev'/>
>      <capability type='80203'/>
>    </capability>
>  </device>
> 




More information about the libvir-list mailing list