[PATCH] util: Add phys_port_name support on virPCIGetNetName
Dmytro Linkin
dlinkin at nvidia.com
Mon Aug 31 07:57:09 UTC 2020
+Adrian,Moshe
On Fri, Aug 28, 2020 at 01:53:21PM +0300, Dmytro Linkin wrote:
> Current virPCIGetNetName() logic is to get net device name by checking
> it's phys_port_id, if caller provide it, or by it's index (eg, by it's
> position at sysfs net directory). This approach worked fine up until
> linux kernel version 5.8, where NVIDIA Mellanox driver implemented
> linking of VFs' representors to PCI device in switchdev mode. This mean
> that device's sysfs net directory will hold multiple net devices. Ex.:
>
> $ ls '/sys/bus/pci/devices/0000:82:00.0/net'
> ens1f0 eth0 eth1
>
> Most switch devices support phys_port_name instead of phys_port_id, so
> virPCIGetNetName() will try to get PF name by it's index - 0. The
> problem here is that the PF nedev entry may not be the first.
>
> To fix that, for switch devices, we introduce a new logic to select the
> PF uplink netdev according to the content of phys_port_name. Extend
> virPCIGetNetName() with physPortNameRegex variable to get proper device
> by it's phys_port_name scheme, for ex., "p[0-9]+$" to get PF,
> "pf[0-9]+vf[0-9]+$" to get VF or "p1$" to get exact net device. So now
> virPCIGetNetName() logic work in following sequence:
> - filter by phys_port_id, if it's provided,
> or
> - filter by phys_port_name, if it's regex provided,
> or
> - get net device by it's index (position) in sysfs net directory.
> Also, make getting content of iface sysfs files more generic.
>
> Signed-off-by: Dmytro Linkin <dlinkin at nvidia.com>
> Reviewed-by: Adrian Chiris <adrianc at nvidia.com>
> ---
> src/hypervisor/virhostdev.c | 2 +-
> src/util/virnetdev.c | 74 ++++++++++++++++++++++++++++++++++++---------
> src/util/virnetdev.h | 4 +++
> src/util/virpci.c | 63 ++++++++++++++++++++++++++++++++++++--
> src/util/virpci.h | 6 ++++
> 5 files changed, 130 insertions(+), 19 deletions(-)
>
> diff --git a/src/hypervisor/virhostdev.c b/src/hypervisor/virhostdev.c
> index 69102b8..1f5c347 100644
> --- a/src/hypervisor/virhostdev.c
> +++ b/src/hypervisor/virhostdev.c
> @@ -333,7 +333,7 @@ virHostdevNetDevice(virDomainHostdevDefPtr hostdev,
> * type='hostdev'>, and it is only those devices that should
> * end up calling this function.
> */
> - if (virPCIGetNetName(sysfs_path, 0, NULL, linkdev) < 0)
> + if (virPCIGetNetName(sysfs_path, 0, NULL, NULL, linkdev) < 0)
> return -1;
>
> if (!(*linkdev)) {
> diff --git a/src/util/virnetdev.c b/src/util/virnetdev.c
> index b42fa86..99e3b35 100644
> --- a/src/util/virnetdev.c
> +++ b/src/util/virnetdev.c
> @@ -1112,6 +1112,29 @@ virNetDevGetPCIDevice(const char *devName)
> }
>
>
> +/* A wrapper to get content of file from ifname SYSFS_NET_DIR
> + */
> +static int
> +virNetDevGetSysfsFileValue(const char *ifname,
> + const char *fileName,
> + char **sysfsFileData)
> +{
> + g_autofree char *sysfsFile = NULL;
> +
> + *sysfsFileData = NULL;
> +
> + if (virNetDevSysfsFile(&sysfsFile, ifname, fileName) < 0)
> + return -1;
> +
> + /* a failure to read just means the driver doesn't support
> + * <fileName>, so set success now and ignore the return from
> + * virFileReadAllQuiet().
> + */
> +
> + ignore_value(virFileReadAllQuiet(sysfsFile, 1024, sysfsFileData));
> + return 0;
> +}
> +
> /**
> * virNetDevGetPhysPortID:
> *
> @@ -1130,20 +1153,29 @@ int
> virNetDevGetPhysPortID(const char *ifname,
> char **physPortID)
> {
> - g_autofree char *physPortIDFile = NULL;
> -
> - *physPortID = NULL;
> -
> - if (virNetDevSysfsFile(&physPortIDFile, ifname, "phys_port_id") < 0)
> - return -1;
> + return virNetDevGetSysfsFileValue(ifname, "phys_port_id", physPortID);
> +}
>
> - /* a failure to read just means the driver doesn't support
> - * phys_port_id, so set success now and ignore the return from
> - * virFileReadAllQuiet().
> - */
>
> - ignore_value(virFileReadAllQuiet(physPortIDFile, 1024, physPortID));
> - return 0;
> +/**
> + * virNetDevGetPhysPortName:
> + *
> + * @ifname: name of a netdev
> + *
> + * @physPortName: pointer to char* that will receive @ifname's
> + * phys_port_name from sysfs (null terminated
> + * string). Could be NULL if @ifname's net driver doesn't
> + * support phys_port_name (most netdev drivers
> + * don't). Caller is responsible for freeing the string
> + * when finished.
> + *
> + * Returns 0 on success or -1 on failure.
> + */
> +int
> +virNetDevGetPhysPortName(const char *ifname,
> + char **physPortName)
> +{
> + return virNetDevGetSysfsFileValue(ifname, "phys_port_name", physPortName);
> }
>
>
> @@ -1200,7 +1232,7 @@ virNetDevGetVirtualFunctions(const char *pfname,
> }
>
> if (virPCIGetNetName(pci_sysfs_device_link, 0,
> - pfPhysPortID, &((*vfname)[i])) < 0) {
> + pfPhysPortID, NULL, &((*vfname)[i])) < 0) {
> goto cleanup;
> }
>
> @@ -1295,7 +1327,8 @@ virNetDevGetPhysicalFunction(const char *ifname, char **pfname)
> return -1;
>
> if (virPCIGetNetName(physfn_sysfs_path, 0,
> - vfPhysPortID, pfname) < 0) {
> + vfPhysPortID,
> + VIR_PF_PHYS_PORT_NAME_REGEX, pfname) < 0) {
> return -1;
> }
>
> @@ -1358,7 +1391,7 @@ virNetDevPFGetVF(const char *pfname, int vf, char **vfname)
> * isn't bound to a netdev driver, it won't have a netdev name,
> * and vfname will be NULL).
> */
> - return virPCIGetNetName(virtfnSysfsPath, 0, pfPhysPortID, vfname);
> + return virPCIGetNetName(virtfnSysfsPath, 0, pfPhysPortID, NULL, vfname);
> }
>
>
> @@ -1403,6 +1436,17 @@ virNetDevGetPhysPortID(const char *ifname G_GNUC_UNUSED,
> }
>
> int
> +virNetDevGetPhysPortName(const char *ifname G_GNUC_UNUSED,
> + char **physPortName)
> +{
> + /* this actually should never be called, and is just here to
> + * satisfy the linker.
> + */
> + *physPortName = NULL;
> + return 0;
> +}
> +
> +int
> virNetDevGetVirtualFunctions(const char *pfname G_GNUC_UNUSED,
> char ***vfname G_GNUC_UNUSED,
> virPCIDeviceAddressPtr **virt_fns G_GNUC_UNUSED,
> diff --git a/src/util/virnetdev.h b/src/util/virnetdev.h
> index 55e3948..712421d 100644
> --- a/src/util/virnetdev.h
> +++ b/src/util/virnetdev.h
> @@ -229,6 +229,10 @@ int virNetDevGetPhysPortID(const char *ifname,
> char **physPortID)
> ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2)
> G_GNUC_WARN_UNUSED_RESULT;
> +int virNetDevGetPhysPortName(const char *ifname,
> + char **physPortName)
> + ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2)
> + G_GNUC_WARN_UNUSED_RESULT;
>
> int virNetDevGetVirtualFunctions(const char *pfname,
> char ***vfname,
> diff --git a/src/util/virpci.c b/src/util/virpci.c
> index 47c671d..18b3f66 100644
> --- a/src/util/virpci.c
> +++ b/src/util/virpci.c
> @@ -2409,8 +2409,10 @@ virPCIDeviceAddressGetSysfsFile(virPCIDeviceAddressPtr addr,
> * virPCIGetNetName:
> * @device_link_sysfs_path: sysfs path to the PCI device
> * @idx: used to choose which netdev when there are several
> - * (ignored if physPortID is set)
> + * (ignored if physPortID or physPortNameRegex is set)
> * @physPortID: match this string in the netdev's phys_port_id
> + * (or NULL to ignore and use phys_port_name or idx instead)
> + * @physPortNameRegex: match this regex with netdev's phys_port_name
> * (or NULL to ignore and use idx instead)
> * @netname: used to return the name of the netdev
> * (set to NULL (but returns success) if there is no netdev)
> @@ -2421,11 +2423,13 @@ int
> virPCIGetNetName(const char *device_link_sysfs_path,
> size_t idx,
> char *physPortID,
> + char *physPortNameRegex,
> char **netname)
> {
> g_autofree char *pcidev_sysfs_net_path = NULL;
> g_autofree char *firstEntryName = NULL;
> g_autofree char *thisPhysPortID = NULL;
> + g_autofree char *thisPhysPortName = NULL;
> int ret = -1;
> DIR *dir = NULL;
> struct dirent *entry = NULL;
> @@ -2466,6 +2470,41 @@ virPCIGetNetName(const char *device_link_sysfs_path,
>
> continue;
> }
> + } else if (physPortNameRegex) {
> + /* Most switch devices use phys_port_name instead of
> + * phys_port_id.
> + * NOTE: VFs' representors net devices can be linked to PF's PCI
> + * device, which mean that there'll be multiple net devices
> + * instances and to get a proper net device need to match on
> + * specific regex.
> + * To get PF netdev, for ex., used following regex:
> + * "(p[0-9]+$)|(p[0-9]+s[0-9]+$)"
> + * or to get exact VF's netdev next regex is used:
> + * "pf0vf1$"
> + */
> + if (virNetDevGetPhysPortName(entry->d_name, &thisPhysPortName) < 0)
> + goto cleanup;
> +
> + if (thisPhysPortName) {
> + /* if this one doesn't match, keep looking */
> + if (!virStringMatch(thisPhysPortName, physPortNameRegex)) {
> + VIR_FREE(thisPhysPortName);
> + /* Save the first entry we find to use as a failsafe
> + * in case we fail to match on regex.
> + */
> + if (!firstEntryName)
> + firstEntryName = g_strdup(entry->d_name);
> +
> + continue;
> + }
> + } else {
> + /* Save the first entry we find to use as a failsafe in case
> + * phys_port_name is not supported.
> + */
> + if (!firstEntryName)
> + firstEntryName = g_strdup(entry->d_name);
> + continue;
> + }
> } else {
> if (i++ < idx)
> continue;
> @@ -2494,6 +2533,22 @@ virPCIGetNetName(const char *device_link_sysfs_path,
> "phys_port_id '%s' under PCI device at %s"),
> physPortID, device_link_sysfs_path);
> }
> + } else if (physPortNameRegex) {
> + if (firstEntryName) {
> + /* We didn't match the provided phys_port_name regex, probably
> + * because kernel or NIC driver doesn't support it, so just
> + * return first netname we found.
> + */
> + *netname = firstEntryName;
> + firstEntryName = NULL;
> + ret = 0;
> + } else {
> + virReportError(VIR_ERR_INTERNAL_ERROR,
> + _("Could not find network device with "
> + "phys_port_name matching regex '%s' "
> + "under PCI device at %s"),
> + physPortNameRegex, device_link_sysfs_path);
> + }
> } else {
> ret = 0; /* no netdev at the given index is *not* an error */
> }
> @@ -2539,7 +2594,7 @@ virPCIGetVirtualFunctionInfo(const char *vf_sysfs_device_path,
> * correct.
> */
> if (pfNetDevIdx == -1) {
> - if (virPCIGetNetName(vf_sysfs_device_path, 0, NULL, &vfname) < 0)
> + if (virPCIGetNetName(vf_sysfs_device_path, 0, NULL, NULL, &vfname) < 0)
> goto cleanup;
>
> if (vfname) {
> @@ -2550,7 +2605,8 @@ virPCIGetVirtualFunctionInfo(const char *vf_sysfs_device_path,
> }
>
> if (virPCIGetNetName(pf_sysfs_device_path,
> - pfNetDevIdx, vfPhysPortID, pfname) < 0) {
> + pfNetDevIdx, vfPhysPortID,
> + VIR_PF_PHYS_PORT_NAME_REGEX, pfname) < 0) {
> goto cleanup;
> }
>
> @@ -2688,6 +2744,7 @@ int
> virPCIGetNetName(const char *device_link_sysfs_path G_GNUC_UNUSED,
> size_t idx G_GNUC_UNUSED,
> char *physPortID G_GNUC_UNUSED,
> + char *physPortNameScheme G_GNUC_UNUSED,
> char **netname G_GNUC_UNUSED)
> {
> virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
> diff --git a/src/util/virpci.h b/src/util/virpci.h
> index b3322ba..6ea0873 100644
> --- a/src/util/virpci.h
> +++ b/src/util/virpci.h
> @@ -55,6 +55,11 @@ struct _virZPCIDeviceAddress {
>
> #define VIR_PCI_DEVICE_ADDRESS_FMT "%04x:%02x:%02x.%d"
>
> +/* Represents format of PF's phys_port_name in switchdev mode:
> + * 'p%u' or 'p%us%u'. New line checked since value is readed from sysfs file.
> + */
> +# define VIR_PF_PHYS_PORT_NAME_REGEX ((char *)"(p[0-9]+$)|(p[0-9]+s[0-9]+$)")
> +
> struct _virPCIDeviceAddress {
> unsigned int domain;
> unsigned int bus;
> @@ -232,6 +237,7 @@ int virPCIDeviceAddressGetSysfsFile(virPCIDeviceAddressPtr addr,
> int virPCIGetNetName(const char *device_link_sysfs_path,
> size_t idx,
> char *physPortID,
> + char *physPortNameRegex,
> char **netname);
>
> int virPCIGetSysfsFile(char *virPCIDeviceName,
> --
> 1.8.3.1
>
More information about the libvir-list
mailing list