[libvirt] [PATCH] nodedev: add RDMA and tx-udp_tnl-segmentation NIC capabilities

Moshe Levi moshele at mellanox.com
Sun Jul 19 10:19:49 UTC 2015


Hi John,
I responded to some of your comments inline.
Thank you  for reviewing  my patch.  
I just posted V2 of it, hopefully if will be better :) 

Thanks,
	Moshe Levi.


> -----Original Message-----
> From: John Ferlan [mailto:jferlan at redhat.com]
> Sent: Tuesday, July 14, 2015 5:36 PM
> To: Moshe Levi; libvir-list at redhat.com
> Subject: Re: [libvirt] [PATCH] nodedev: add RDMA and tx-udp_tnl-
> segmentation NIC capabilities
> 
> 
> 
> On 06/18/2015 04:45 AM, Moshe Levi wrote:
> > Adding functionality to libvirt that will allow it query the interface
> > for the availability of RDMA and tx-udp_tnl-segmentation Offloading
> > NIC capabilities
> 
> Any chance for a shorter name?  Eg txudptnl  (
Yes will do
> 
> >
> > Here is an example of the feature XML definition:
> >
> > <device>
> > <name>net_eth4_90_e2_ba_5e_a5_45</name>
> >
> <path>/sys/devices/pci0000:00/0000:00:03.0/0000:08:00.1/net/eth4</path>
> >   <parent>pci_0000_08_00_1</parent>
> >   <capability type='net'>
> >     <interface>eth4</interface>
> >     <address>90:e2:ba:5e:a5:45</address>
> >     <link speed='10000' state='up'/>
> >     <feature name='rx'/>
> >     <feature name='tx'/>
> >     <feature name='sg'/>
> >     <feature name='tso'/>
> >     <feature name='gso'/>
> >     <feature name='gro'/>
> >     <feature name='rxvlan'/>
> >     <feature name='txvlan'/>
> >     <feature name='rxhash'/>
> >     <feature name='rdma'/>
> >     <feature name='tx-udp_tnl-segmentation'/>
> >     <capability type='80203'/>
> >   </capability>
> > </device>
> > ---
> >  docs/formatnode.html.in |    2 +
> >  src/conf/device_conf.c  |    4 +-
> >  src/conf/device_conf.h  |    2 +
> >  src/util/virnetdev.c    |   97
> ++++++++++++++++++++++++++++++++++++++--------
> >  src/util/virnetdev.h    |    1 +
> >  5 files changed, 88 insertions(+), 18 deletions(-)
> >
> 
> Any chance to have some sort of test?  Check out commit id 'c9027d8f4'
> for an example when other features were added
Yes will do
> 
> > diff --git a/docs/formatnode.html.in b/docs/formatnode.html.in index
> > 3ff1bef..9b32dd1 100644
> > --- a/docs/formatnode.html.in
> > +++ b/docs/formatnode.html.in
> > @@ -199,6 +199,8 @@
> >                      <dt><code>txvlan</code></dt><dd>tx-vlan-offload</dd>
> >                      <dt><code>ntuple</code></dt><dd>ntuple-filters</dd>
> >
> > <dt><code>rxhash</code></dt><dd>receive-hashing</dd>
> > +                    <dt><code>rdma</code></dt><dd>remote-direct-memory-
> access</dd>
> > +
> > + <dt><code>tx-udp_tnl-segmentation</code></dt><dd>tx-udp-tunnel-
> segme
> > + ntation</dd>
> 
> Here's where the shorter name would be used inside the <code>, but keep
> the longer name inside the <dd>
> 
> >                  </dl>
> >                </dd>
> >                <dt><code>capability</code></dt> diff --git
> > a/src/conf/device_conf.c b/src/conf/device_conf.c index
> > 98808e2..8e8d557 100644
> > --- a/src/conf/device_conf.c
> > +++ b/src/conf/device_conf.c
> > @@ -51,7 +51,9 @@ VIR_ENUM_IMPL(virNetDevFeature,
> >                "rxvlan",
> >                "txvlan",
> >                "ntuple",
> > -              "rxhash")
> > +              "rxhash",
> > +              "rdma",
> > +              "tx-udp_tnl-segmentation")
> 
> shorter name
> 
> >
> >  int virDevicePCIAddressIsValid(virDevicePCIAddressPtr addr)  { diff
> > --git a/src/conf/device_conf.h b/src/conf/device_conf.h index
> > 7ea90f6..07298c9 100644
> > --- a/src/conf/device_conf.h
> > +++ b/src/conf/device_conf.h
> > @@ -74,6 +74,8 @@ typedef enum {
> >      VIR_NET_DEV_FEAT_TXVLAN,
> >      VIR_NET_DEV_FEAT_NTUPLE,
> >      VIR_NET_DEV_FEAT_RXHASH,
> > +    VIR_NET_DEV_FEAT_RDMA,
> > +    VIR_NET_DEV_FEAT_TX_UDP_TNL_SEGMENTATION,
> 
> shorter name
> 
> >      VIR_NET_DEV_FEAT_LAST
> >  } virNetDevFeature;
> >
> > diff --git a/src/util/virnetdev.c b/src/util/virnetdev.c index
> > e4fcd81..3086616 100644
> > --- a/src/util/virnetdev.c
> > +++ b/src/util/virnetdev.c
> > @@ -87,6 +87,14 @@ VIR_LOG_INIT("util.netdev");  # define
> > VIR_IFF_ALLMULTI 0  #endif
> >
> > +#define RESOURCE_FILE_LEN 4096
> > +#define TX_UDP_TNL 25
> > +#define GFEATURES_SIZE 2
> > +#define FEATURE_WORD(blocks, index, field)  ((blocks)[(index) /
> 32U].field)
> > +#define FEATURE_FIELD_FLAG(index)      (1U << (index) % 32U)
> > +#define FEATURE_BIT_IS_SET(blocks, index, field)        \
> > +    (FEATURE_WORD(blocks, index, field) & FEATURE_FIELD_FLAG(index))
> > +
> >  typedef enum {
> >      VIR_MCAST_TYPE_INDEX_TOKEN,
> >      VIR_MCAST_TYPE_NAME_TOKEN,
> > @@ -2943,6 +2951,58 @@ int virNetDevGetRxFilter(const char *ifname,
> >      return ret;
> >  }
> >
> > +
> > +/**
> > + * virNetDevRDMAFeature
> > + * This function checks for the availability of RDMA feature
> > + * and add it to bitmap
> > + *
> > + * @ifname: name of the interface
> > + * @out: add RDMA feature if exist to bitmap
> > + *
> > + * Returns 0 on success, -1 on failure.
> > + */
> > +static int
> > +virNetDevRDMAFeature(const char *ifname,
> > +                     virBitmapPtr *out) {
> > +    char *eth_devpath = NULL;
> > +    char *ib_devpath = NULL;
> > +    char *eth_res_buf = NULL;
> > +    char *ib_res_buf = NULL;
> > +    struct dirent *dp;
> > +
> > +    DIR *dirp = opendir(SYSFS_INFINIBAND_DIR);
> 
> The extra space should be between dirp and if (!dirp) not between dp and
> dirp...
> 
> Also leak dirp.
> 
> Add an "int ret = -1" and use standard "goto cleanup;" type logic on failures in
> order to clean up anything allocated.
> 
Yes will do
> So the construct/code would be:
> 
>     DIR *dirp = NULL;
> 
>     if (!(dirp = opendir(SYSFS_INFINIBAND_DIR))) {
> 
> > +    if (dirp == NULL) {
> > +        virReportSystemError(errno,
> > +                             _("Failed to opendir path '%s'"),
> > +                             SYSFS_INFINIBAND_DIR);
> > +        return -1;
>            goto cleanup;
> > +    }
> > +
> > +    if (virAsprintf(&eth_devpath, SYSFS_NET_DIR "%s/device/resource",
> > + ifname) < 0)
> 
> eth_devpath would be leaked
> 
> > +        return -1;
> > +    if (!virFileExists(eth_devpath))
> > +        return 0;
> 
>      ret = 0 ???
>      goto cleanup;
> 
> It's a success to not have the file?  Wouldn't that make it difficult for the caller
> to determine if something existed and/or whether the bitmap was updated?
> 
> If that doesn't matter then fine, but still have proper exit syntax
Yes you are right 

> 
> 
> > +    if (virFileReadAll(eth_devpath, RESOURCE_FILE_LEN, &eth_res_buf)
> > + < 0)
> 
> eth_res_buf would be leaked
> 
> > +        return -1;
> 
> goto cleanup;
> 
> > +    while (virDirRead(dirp, &dp, SYSFS_INFINIBAND_DIR) > 0) {
> > +        if (STREQ(dp->d_name, ".") ||
> > +            STREQ(dp->d_name, ".."))
> > +            continue;
> 
> Consider instead:
> 
>     if (dp->d_name[0] == '.')
>         continue;
> 
> > +
> > +        if (virAsprintf(&ib_devpath, SYSFS_INFINIBAND_DIR
> "%s/device/resource", dp->d_name) < 0)
> > +            continue;
> 
> ib_devpath is leaked
> 
> > +        if (virFileReadAll(ib_devpath, RESOURCE_FILE_LEN,
> > + &ib_res_buf) < 0)
> 
> ib_res_buf is leaked multiple times
> 
>         VIR_FREE(ib_devpath);
> 
> > +            continue;
> > +        if  (STREQ(eth_res_buf, ib_res_buf)) {
> 
>               ^
> Extra space
> 
> > +            ignore_value(virBitmapSetBit(*out, VIR_NET_DEV_FEAT_RDMA));
> > +            break;
> > +        }
>            VIR_FREE(ib_res_buf);
> > +    }
> > +    return 0;
> 
> here you would have:
>     ret = 0;
> 
>  cleanup:
>     closedir(dirp);
>     VIR_FREE(eth_devpath);
>     VIR_FREE(ib_devpath);
>     VIR_FREE(eth_res_buf);
>     VIR_FREE(ib_res_buf);
>     return ret;
> 
> > +}
> > +
> >  #if defined(SIOCETHTOOL) && defined(HAVE_STRUCT_IFREQ)
> >
> >  /**
> > @@ -2952,12 +3012,10 @@ int virNetDevGetRxFilter(const char *ifname,
> >   * @ifname: name of the interface
> >   * @cmd: reference to an ethtool command structure
> 
> Would this parameter need an adjustment to indicate either command or
> feature structure?
Looking on the code this the correct comment for virNetDevGetRxFilter
* virNetDevGetRxFilter:
 * This function supplies the RX filter list for a given device interface
 *
 * @ifname: Name of the interface
 * @filter: The RX filter list
So there is no change to do here, unless I am missing something.

> 
> >   *
> > - * Returns 0 on success, -1 on failure.
> 
> hrmph - looks like this was wrong ... 0 if not found, 1 on success, and
> -1 on failure...
Updated 
> 
> I'm 50/50 on the switch from int to void... Of course the name of the function
> to me implies a value return
Ok I split it to 3 functions 
virNetDevSendEthtoolIoctl - just send the ioctl command
virNetDevFeatureAvailable - will be same as before
virNetDevGFeatureAvailable - check if GFeature is Available

> 
> >   */
> > -static int
> > -virNetDevFeatureAvailable(const char *ifname, struct ethtool_value
> > *cmd)
> > +static void
> > +virNetDevFeatureAvailable(const char *ifname, void *cmd)
> >  {
> > -    int ret = -1;
> >      int sock = -1;
> >      virIfreq ifr;
> >
> > @@ -2969,8 +3027,7 @@ virNetDevFeatureAvailable(const char *ifname,
> > struct ethtool_value *cmd)
> >
> >      memset(&ifr, 0, sizeof(ifr));
> >      strcpy(ifr.ifr_name, ifname);
> > -    ifr.ifr_data = (void*) cmd;
> > -
> > +    ifr.ifr_data = cmd;
> >      if (ioctl(sock, SIOCETHTOOL, &ifr) != 0) {
> >          switch (errno) {
> >              case EPERM:
> > @@ -2988,12 +3045,9 @@ virNetDevFeatureAvailable(const char *ifname,
> struct ethtool_value *cmd)
> >          }
> >      }
> >
> > -    ret = cmd->data > 0 ? 1: 0;
> >   cleanup:
> >      if (sock)
> >          VIR_FORCE_CLOSE(sock);
> > -
> > -    return ret;
> >  }
> >
> >
> > @@ -3013,7 +3067,7 @@ virNetDevGetFeatures(const char *ifname,  {
> >      size_t i = -1;
> >      struct ethtool_value cmd = { 0 };
> > -
> > +    struct ethtool_gfeatures g_cmd = { 0 };
> >      struct elem{
> >          const int cmd;
> >          const virNetDevFeature feat;
> > @@ -3037,7 +3091,8 @@ virNetDevGetFeatures(const char *ifname,
> >
> >      for (i = 0; i < ARRAY_CARDINALITY(cmds); i++) {
> >          cmd.cmd = cmds[i].cmd;
> > -        if (virNetDevFeatureAvailable(ifname, &cmd))
> > +        virNetDevFeatureAvailable(ifname, &cmd);
> > +        if (cmd.data > 0)
> 
> Could just go with cmd.data unless it could be negative...
> 
> >              ignore_value(virBitmapSetBit(*out, cmds[i].feat));
> >      }
> >
> > @@ -3061,14 +3116,22 @@ virNetDevGetFeatures(const char *ifname,
> >      };
> >
> >      cmd.cmd = ETHTOOL_GFLAGS;
> > -    if (virNetDevFeatureAvailable(ifname, &cmd)) {
> > -        for (j = 0; j < ARRAY_CARDINALITY(flags); j++) {
> > -            if (cmd.data & flags[j].cmd)
> > -                ignore_value(virBitmapSetBit(*out, flags[j].feat));
> > +    virNetDevFeatureAvailable(ifname, &cmd);
> > +        if (cmd.data > 0) {
> > +            for (j = 0; j < ARRAY_CARDINALITY(flags); j++) {
> > +                if (cmd.data & flags[j].cmd)
> > +                    ignore_value(virBitmapSetBit(*out, flags[j].feat));
> > +            }
> 
> There are 4 extra spaces for each of the previous 5 lines...
> 
> Also I suppose you could just use "if (cmd.data)", unless of course it could be
> negative...
> 
> >          }
> > -    }
> > -# endif
> 
> What would happen if some arch didn't have ETHTOOL_GFEATURES? or is
> that assumed due to opening "#if"?
I update the configure to check if ETHTOOL_GFEATURES exist and used 
> 
> #if defined(SIOCETHTOOL) && defined(HAVE_STRUCT_IFREQ)
> 
> 
> > +    g_cmd.cmd = ETHTOOL_GFEATURES;
> > +    g_cmd.size = GFEATURES_SIZE;
> > +    virNetDevFeatureAvailable(ifname, &g_cmd);
> > +    if FEATURE_BIT_IS_SET(g_cmd.features, TX_UDP_TNL, active)
> 
> This looks odd... should be "if (FEATURE_BIT...))
> 
> > +        ignore_value(virBitmapSetBit(*out,
> > + VIR_NET_DEV_FEAT_TX_UDP_TNL_SEGMENTATION));
> 
> Long name...
> 
> Looking forward to a V2
> 
> John
> 
> >
> > +# endif
> > +    if (virNetDevRDMAFeature(ifname, out))
> > +        return -1;
> >      return 0;
> >  }
> >  #else
> > diff --git a/src/util/virnetdev.h b/src/util/virnetdev.h index
> > 190b70e..fff881c 100644
> > --- a/src/util/virnetdev.h
> > +++ b/src/util/virnetdev.h
> > @@ -210,6 +210,7 @@ int virNetDevGetRcvAllMulti(const char *ifname,
> bool *receive)
> >      ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2)
> ATTRIBUTE_RETURN_CHECK;
> >
> >  # define SYSFS_NET_DIR "/sys/class/net/"
> > +# define SYSFS_INFINIBAND_DIR "/sys/class/infiniband/"
> >  int virNetDevSysfsFile(char **pf_sysfs_device_link,
> >                         const char *ifname,
> >                         const char *file)
> >




More information about the libvir-list mailing list