[PATCH] util: Add phys_port_name support on virPCIGetNetName

Dmytro Linkin dlinkin at nvidia.com
Mon Aug 31 07:57:09 UTC 2020


+Adrian,Moshe

On Fri, Aug 28, 2020 at 01:53:21PM +0300, Dmytro Linkin wrote:
> Current virPCIGetNetName() logic is to get net device name by checking
> it's phys_port_id, if caller provide it, or by it's index (eg, by it's
> position at sysfs net directory). This approach worked fine up until
> linux kernel version 5.8, where NVIDIA Mellanox driver implemented
> linking of VFs' representors to PCI device in switchdev mode. This mean
> that device's sysfs net directory will hold multiple net devices. Ex.:
> 
> $ ls '/sys/bus/pci/devices/0000:82:00.0/net'
> ens1f0  eth0  eth1
> 
> Most switch devices support phys_port_name instead of phys_port_id, so
> virPCIGetNetName() will try to get PF name by it's index - 0. The
> problem here is that the PF nedev entry may not be the first.
> 
> To fix that, for switch devices, we introduce a new logic to select the
> PF uplink netdev according to the content of phys_port_name. Extend
> virPCIGetNetName() with physPortNameRegex variable to get proper device
> by it's phys_port_name scheme, for ex., "p[0-9]+$" to get PF,
> "pf[0-9]+vf[0-9]+$" to get VF or "p1$" to get exact net device. So now
> virPCIGetNetName() logic work in following sequence:
>  - filter by phys_port_id, if it's provided,
>  or
>  - filter by phys_port_name, if it's regex provided,
>  or
>  - get net device by it's index (position) in sysfs net directory.
> Also, make getting content of iface sysfs files more generic.
> 
> Signed-off-by: Dmytro Linkin <dlinkin at nvidia.com>
> Reviewed-by: Adrian Chiris <adrianc at nvidia.com>
> ---
>  src/hypervisor/virhostdev.c |  2 +-
>  src/util/virnetdev.c        | 74 ++++++++++++++++++++++++++++++++++++---------
>  src/util/virnetdev.h        |  4 +++
>  src/util/virpci.c           | 63 ++++++++++++++++++++++++++++++++++++--
>  src/util/virpci.h           |  6 ++++
>  5 files changed, 130 insertions(+), 19 deletions(-)
> 
> diff --git a/src/hypervisor/virhostdev.c b/src/hypervisor/virhostdev.c
> index 69102b8..1f5c347 100644
> --- a/src/hypervisor/virhostdev.c
> +++ b/src/hypervisor/virhostdev.c
> @@ -333,7 +333,7 @@ virHostdevNetDevice(virDomainHostdevDefPtr hostdev,
>           * type='hostdev'>, and it is only those devices that should
>           * end up calling this function.
>           */
> -        if (virPCIGetNetName(sysfs_path, 0, NULL, linkdev) < 0)
> +        if (virPCIGetNetName(sysfs_path, 0, NULL, NULL, linkdev) < 0)
>              return -1;
>  
>          if (!(*linkdev)) {
> diff --git a/src/util/virnetdev.c b/src/util/virnetdev.c
> index b42fa86..99e3b35 100644
> --- a/src/util/virnetdev.c
> +++ b/src/util/virnetdev.c
> @@ -1112,6 +1112,29 @@ virNetDevGetPCIDevice(const char *devName)
>  }
>  
>  
> +/* A wrapper to get content of file from ifname SYSFS_NET_DIR
> + */
> +static int
> +virNetDevGetSysfsFileValue(const char *ifname,
> +                           const char *fileName,
> +                           char **sysfsFileData)
> +{
> +    g_autofree char *sysfsFile = NULL;
> +
> +    *sysfsFileData = NULL;
> +
> +    if (virNetDevSysfsFile(&sysfsFile, ifname, fileName) < 0)
> +        return -1;
> +
> +    /* a failure to read just means the driver doesn't support
> +     * <fileName>, so set success now and ignore the return from
> +     * virFileReadAllQuiet().
> +     */
> +
> +    ignore_value(virFileReadAllQuiet(sysfsFile, 1024, sysfsFileData));
> +    return 0;
> +}
> +
>  /**
>   * virNetDevGetPhysPortID:
>   *
> @@ -1130,20 +1153,29 @@ int
>  virNetDevGetPhysPortID(const char *ifname,
>                         char **physPortID)
>  {
> -    g_autofree char *physPortIDFile = NULL;
> -
> -    *physPortID = NULL;
> -
> -    if (virNetDevSysfsFile(&physPortIDFile, ifname, "phys_port_id") < 0)
> -        return -1;
> +    return virNetDevGetSysfsFileValue(ifname, "phys_port_id", physPortID);
> +}
>  
> -    /* a failure to read just means the driver doesn't support
> -     * phys_port_id, so set success now and ignore the return from
> -     * virFileReadAllQuiet().
> -     */
>  
> -    ignore_value(virFileReadAllQuiet(physPortIDFile, 1024, physPortID));
> -    return 0;
> +/**
> + * virNetDevGetPhysPortName:
> + *
> + * @ifname: name of a netdev
> + *
> + * @physPortName: pointer to char* that will receive @ifname's
> + *                phys_port_name from sysfs (null terminated
> + *                string). Could be NULL if @ifname's net driver doesn't
> + *                support phys_port_name (most netdev drivers
> + *                don't). Caller is responsible for freeing the string
> + *                when finished.
> + *
> + * Returns 0 on success or -1 on failure.
> + */
> +int
> +virNetDevGetPhysPortName(const char *ifname,
> +                         char **physPortName)
> +{
> +    return virNetDevGetSysfsFileValue(ifname, "phys_port_name", physPortName);
>  }
>  
>  
> @@ -1200,7 +1232,7 @@ virNetDevGetVirtualFunctions(const char *pfname,
>          }
>  
>          if (virPCIGetNetName(pci_sysfs_device_link, 0,
> -                             pfPhysPortID, &((*vfname)[i])) < 0) {
> +                             pfPhysPortID, NULL, &((*vfname)[i])) < 0) {
>              goto cleanup;
>          }
>  
> @@ -1295,7 +1327,8 @@ virNetDevGetPhysicalFunction(const char *ifname, char **pfname)
>          return -1;
>  
>      if (virPCIGetNetName(physfn_sysfs_path, 0,
> -                         vfPhysPortID, pfname) < 0) {
> +                         vfPhysPortID,
> +                         VIR_PF_PHYS_PORT_NAME_REGEX, pfname) < 0) {
>          return -1;
>      }
>  
> @@ -1358,7 +1391,7 @@ virNetDevPFGetVF(const char *pfname, int vf, char **vfname)
>       * isn't bound to a netdev driver, it won't have a netdev name,
>       * and vfname will be NULL).
>       */
> -    return virPCIGetNetName(virtfnSysfsPath, 0, pfPhysPortID, vfname);
> +    return virPCIGetNetName(virtfnSysfsPath, 0, pfPhysPortID, NULL, vfname);
>  }
>  
>  
> @@ -1403,6 +1436,17 @@ virNetDevGetPhysPortID(const char *ifname G_GNUC_UNUSED,
>  }
>  
>  int
> +virNetDevGetPhysPortName(const char *ifname G_GNUC_UNUSED,
> +                       char **physPortName)
> +{
> +    /* this actually should never be called, and is just here to
> +     * satisfy the linker.
> +     */
> +    *physPortName = NULL;
> +    return 0;
> +}
> +
> +int
>  virNetDevGetVirtualFunctions(const char *pfname G_GNUC_UNUSED,
>                               char ***vfname G_GNUC_UNUSED,
>                               virPCIDeviceAddressPtr **virt_fns G_GNUC_UNUSED,
> diff --git a/src/util/virnetdev.h b/src/util/virnetdev.h
> index 55e3948..712421d 100644
> --- a/src/util/virnetdev.h
> +++ b/src/util/virnetdev.h
> @@ -229,6 +229,10 @@ int virNetDevGetPhysPortID(const char *ifname,
>                             char **physPortID)
>      ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2)
>      G_GNUC_WARN_UNUSED_RESULT;
> +int virNetDevGetPhysPortName(const char *ifname,
> +                           char **physPortName)
> +    ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2)
> +    G_GNUC_WARN_UNUSED_RESULT;
>  
>  int virNetDevGetVirtualFunctions(const char *pfname,
>                                   char ***vfname,
> diff --git a/src/util/virpci.c b/src/util/virpci.c
> index 47c671d..18b3f66 100644
> --- a/src/util/virpci.c
> +++ b/src/util/virpci.c
> @@ -2409,8 +2409,10 @@ virPCIDeviceAddressGetSysfsFile(virPCIDeviceAddressPtr addr,
>   * virPCIGetNetName:
>   * @device_link_sysfs_path: sysfs path to the PCI device
>   * @idx: used to choose which netdev when there are several
> - *       (ignored if physPortID is set)
> + *       (ignored if physPortID or physPortNameRegex is set)
>   * @physPortID: match this string in the netdev's phys_port_id
> + *       (or NULL to ignore and use phys_port_name or idx instead)
> + * @physPortNameRegex: match this regex with netdev's phys_port_name
>   *       (or NULL to ignore and use idx instead)
>   * @netname: used to return the name of the netdev
>   *       (set to NULL (but returns success) if there is no netdev)
> @@ -2421,11 +2423,13 @@ int
>  virPCIGetNetName(const char *device_link_sysfs_path,
>                   size_t idx,
>                   char *physPortID,
> +                 char *physPortNameRegex,
>                   char **netname)
>  {
>      g_autofree char *pcidev_sysfs_net_path = NULL;
>      g_autofree char *firstEntryName = NULL;
>      g_autofree char *thisPhysPortID = NULL;
> +    g_autofree char *thisPhysPortName = NULL;
>      int ret = -1;
>      DIR *dir = NULL;
>      struct dirent *entry = NULL;
> @@ -2466,6 +2470,41 @@ virPCIGetNetName(const char *device_link_sysfs_path,
>  
>                  continue;
>              }
> +        } else if (physPortNameRegex) {
> +            /* Most switch devices use phys_port_name instead of
> +             * phys_port_id.
> +             * NOTE: VFs' representors net devices can be linked to PF's PCI
> +             * device, which mean that there'll be multiple net devices
> +             * instances and to get a proper net device need to match on
> +             * specific regex.
> +             * To get PF netdev, for ex., used following regex:
> +             * "(p[0-9]+$)|(p[0-9]+s[0-9]+$)"
> +             * or to get exact VF's netdev next regex is used:
> +             * "pf0vf1$"
> +             */
> +            if (virNetDevGetPhysPortName(entry->d_name, &thisPhysPortName) < 0)
> +                goto cleanup;
> +
> +            if (thisPhysPortName) {
> +                /* if this one doesn't match, keep looking */
> +                if (!virStringMatch(thisPhysPortName, physPortNameRegex)) {
> +                    VIR_FREE(thisPhysPortName);
> +                    /* Save the first entry we find to use as a failsafe
> +                     * in case we fail to match on regex.
> +                     */
> +                    if (!firstEntryName)
> +                        firstEntryName = g_strdup(entry->d_name);
> +
> +                    continue;
> +                }
> +            } else {
> +                /* Save the first entry we find to use as a failsafe in case
> +                 * phys_port_name is not supported.
> +                 */
> +                if (!firstEntryName)
> +                    firstEntryName = g_strdup(entry->d_name);
> +                continue;
> +            }
>          } else {
>              if (i++ < idx)
>                  continue;
> @@ -2494,6 +2533,22 @@ virPCIGetNetName(const char *device_link_sysfs_path,
>                                   "phys_port_id '%s' under PCI device at %s"),
>                                 physPortID, device_link_sysfs_path);
>              }
> +        } else if (physPortNameRegex) {
> +            if (firstEntryName) {
> +                /* We didn't match the provided phys_port_name regex, probably
> +                 * because kernel or NIC driver doesn't support it, so just
> +                 * return first netname we found.
> +                 */
> +                *netname = firstEntryName;
> +                firstEntryName = NULL;
> +                ret = 0;
> +            } else {
> +                virReportError(VIR_ERR_INTERNAL_ERROR,
> +                               _("Could not find network device with "
> +                                 "phys_port_name matching regex '%s' "
> +                                 "under PCI device at %s"),
> +                               physPortNameRegex, device_link_sysfs_path);
> +            }
>          } else {
>              ret = 0; /* no netdev at the given index is *not* an error */
>          }
> @@ -2539,7 +2594,7 @@ virPCIGetVirtualFunctionInfo(const char *vf_sysfs_device_path,
>       * correct.
>       */
>      if (pfNetDevIdx == -1) {
> -        if (virPCIGetNetName(vf_sysfs_device_path, 0, NULL, &vfname) < 0)
> +        if (virPCIGetNetName(vf_sysfs_device_path, 0, NULL, NULL, &vfname) < 0)
>              goto cleanup;
>  
>          if (vfname) {
> @@ -2550,7 +2605,8 @@ virPCIGetVirtualFunctionInfo(const char *vf_sysfs_device_path,
>      }
>  
>      if (virPCIGetNetName(pf_sysfs_device_path,
> -                         pfNetDevIdx, vfPhysPortID, pfname) < 0) {
> +                         pfNetDevIdx, vfPhysPortID,
> +                         VIR_PF_PHYS_PORT_NAME_REGEX, pfname) < 0) {
>          goto cleanup;
>      }
>  
> @@ -2688,6 +2744,7 @@ int
>  virPCIGetNetName(const char *device_link_sysfs_path G_GNUC_UNUSED,
>                   size_t idx G_GNUC_UNUSED,
>                   char *physPortID G_GNUC_UNUSED,
> +                 char *physPortNameScheme G_GNUC_UNUSED,
>                   char **netname G_GNUC_UNUSED)
>  {
>      virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
> diff --git a/src/util/virpci.h b/src/util/virpci.h
> index b3322ba..6ea0873 100644
> --- a/src/util/virpci.h
> +++ b/src/util/virpci.h
> @@ -55,6 +55,11 @@ struct _virZPCIDeviceAddress {
>  
>  #define VIR_PCI_DEVICE_ADDRESS_FMT "%04x:%02x:%02x.%d"
>  
> +/* Represents format of PF's phys_port_name in switchdev mode:
> + * 'p%u' or 'p%us%u'. New line checked since value is readed from sysfs file.
> + */
> +# define VIR_PF_PHYS_PORT_NAME_REGEX ((char *)"(p[0-9]+$)|(p[0-9]+s[0-9]+$)")
> +
>  struct _virPCIDeviceAddress {
>      unsigned int domain;
>      unsigned int bus;
> @@ -232,6 +237,7 @@ int virPCIDeviceAddressGetSysfsFile(virPCIDeviceAddressPtr addr,
>  int virPCIGetNetName(const char *device_link_sysfs_path,
>                       size_t idx,
>                       char *physPortID,
> +                     char *physPortNameRegex,
>                       char **netname);
>  
>  int virPCIGetSysfsFile(char *virPCIDeviceName,
> -- 
> 1.8.3.1
> 




More information about the libvir-list mailing list