[PATCH v5 1/2] util: support PCI passthrough net device stats collection

Laine Stump laine at redhat.com
Tue Oct 20 21:31:17 UTC 2020


On 10/15/20 7:21 AM, zhenwei pi wrote:
> Collect PCI passthrough net device stats from kernel by netlink
> API.
> 
> Currently, libvirt can not get PCI passthrough net device stats,
> run command:
>   #virsh domifstat instance --interface=52:54:00:2d:b2:35
>   error: Failed to get interface stats instance 52:54:00:2d:b2:35
>   error: internal error: Interface name not provided
> 
> The PCI device(usually SR-IOV virtual function device) is detached

(Actually it is *always* an SR-IOV VF - SR-IOV functionality is required 
to set the MAC address and clan tag of the interface; without that 
capability, it can only be assigned to the guest as a plain <hostdev>, 
and in that case libvirt wouldn't even know that it was a network device.)

> while it's used in PCI passthrough mode. And we can not parse this
> device from /proc/net/dev any more.
> 
> In this patch, libvirt checks net device is VF of not firstly, then
> query virNetDevVFInterfaceStats(new API).
> virNetDevVFInterfaceStats gets PF name, then dumps VF stats by
> netlink, compares with VF index and get stats(suggest by Laine,
> original version is implemented by zhenwei which parses VFs info
> of all PFs, compares MAC address until the two MAC addresses match).
> 
> address until the two MAC addresses match.
(you forgot to remove this bit - I'll clean it up before pushing)


> '#ip -s link show' can get the same result. Instead of parsing the
> output result, implement this feature by libnl API.
> 
> Notice that this feature depends on driver of PF.
> Test on Mellanox ConnectX-4 Lx, it works well.
> Also test on Intel Corporation 82599ES, it works, but only get 0.
> (ip-link command get the same result).
> 
> IFLA_VF_STATS is supported since Linux-4.2, suggested by Laine,
> just using defined(__linux__) && WITH_LIBNL is enough.

(I'll also remove these historical artifacts of the review/revision process)

> 
> Thanks to Laine, Daniel<berrange at redhat.com> & DHB for suggestions.
> 
> Signed-off-by: zhenwei pi <pizhenwei at bytedance.com>
> ---
>   src/libvirt_private.syms |  1 +
>   src/qemu/qemu_driver.c   | 13 ++++++++
>   src/util/virnetdev.c     | 82 ++++++++++++++++++++++++++++++++++++++++++++++--
>   src/util/virnetdev.h     |  4 +++
>   4 files changed, 98 insertions(+), 2 deletions(-)
> 
> diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
> index 52e9c6313f..ae543589f1 100644
> --- a/src/libvirt_private.syms
> +++ b/src/libvirt_private.syms
> @@ -2584,6 +2584,7 @@ virNetDevSetRootQDisc;
>   virNetDevSetupControl;
>   virNetDevSysfsFile;
>   virNetDevValidateConfig;
> +virNetDevVFInterfaceStats;
>   
>   
>   # util/virnetdevbandwidth.h
> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
> index 825bdd9119..519af71fc1 100644
> --- a/src/qemu/qemu_driver.c
> +++ b/src/qemu/qemu_driver.c
> @@ -10132,6 +10132,19 @@ qemuDomainInterfaceStats(virDomainPtr dom,
>       if (virDomainNetGetActualType(net) == VIR_DOMAIN_NET_TYPE_VHOSTUSER) {
>           if (virNetDevOpenvswitchInterfaceStats(net->ifname, stats) < 0)
>               goto cleanup;
> +    } else if (virDomainNetGetActualType(net) == VIR_DOMAIN_NET_TYPE_HOSTDEV) {
> +        virDomainHostdevDefPtr hostdev = virDomainNetGetActualHostdev(net);
> +        virPCIDeviceAddressPtr vfAddr;
> +
> +        if (!hostdev) {
> +            virReportError(VIR_ERR_INTERNAL_ERROR,
> +                           "%s", _("hostdev interface missing hostdev data"));
> +            goto cleanup;
> +        }
> +
> +        vfAddr = &hostdev->source.subsys.u.pci.addr;
> +        if (virNetDevVFInterfaceStats(vfAddr, stats) < 0)
> +            goto cleanup;
>       } else {


And extra line below goto cleanup would improve readability

>           if (virNetDevTapInterfaceStats(net->ifname, stats,
>                                          !virDomainNetTypeSharesHostView(net)) < 0)
> diff --git a/src/util/virnetdev.c b/src/util/virnetdev.c
> index 5c7660dab4..f53e1751b3 100644
> --- a/src/util/virnetdev.c
> +++ b/src/util/virnetdev.c
> @@ -1514,6 +1514,17 @@ static struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = {
>                               .maxlen = sizeof(struct ifla_vf_mac) },
>       [IFLA_VF_VLAN]      = { .type = NLA_UNSPEC,
>                               .maxlen = sizeof(struct ifla_vf_vlan) },
> +    [IFLA_VF_STATS]     = { .type = NLA_NESTED },
> +};
> +
> +
> +static struct nla_policy ifla_vfstats_policy[IFLA_VF_STATS_MAX+1] = {
> +    [IFLA_VF_STATS_RX_PACKETS]  = { .type = NLA_U64 },
> +    [IFLA_VF_STATS_TX_PACKETS]  = { .type = NLA_U64 },
> +    [IFLA_VF_STATS_RX_BYTES]    = { .type = NLA_U64 },
> +    [IFLA_VF_STATS_TX_BYTES]    = { .type = NLA_U64 },
> +    [IFLA_VF_STATS_BROADCAST]   = { .type = NLA_U64 },
> +    [IFLA_VF_STATS_MULTICAST]   = { .type = NLA_U64 },
>   };
>   
>   
> @@ -1651,13 +1662,14 @@ virNetDevSetVfConfig(const char *ifname, int vf,
>   
>   static int
>   virNetDevParseVfConfig(struct nlattr **tb, int32_t vf, virMacAddrPtr mac,
> -                       int *vlanid)
> +                       int *vlanid, virDomainInterfaceStatsPtr stats)
>   {
>       int rc = -1;
>       struct ifla_vf_mac *vf_mac;
>       struct ifla_vf_vlan *vf_vlan;
>       struct nlattr *tb_vf_info = {NULL, };
>       struct nlattr *tb_vf[IFLA_VF_MAX+1];
> +    struct nlattr *tb_vf_stats[IFLA_VF_STATS_MAX+1];
>       int rem;
>   
>       if (!tb[IFLA_VFINFO_LIST]) {
> @@ -1693,6 +1705,26 @@ virNetDevParseVfConfig(struct nlattr **tb, int32_t vf, virMacAddrPtr mac,
>               }
>           }
>   
> +        if (stats && tb_vf[IFLA_VF_STATS] && tb_vf[IFLA_VF_MAC]) {
> +            vf_mac = RTA_DATA(tb_vf[IFLA_VF_MAC]);

Hmm. Looking at this (correct) code made me realize that there has been 
a bug higher up in this function ever since it was first implemented in 
2012 (commit 5095bf06f)! - it has been checking

    if (mac && tb[IFLA_VF_MAC])

instead of

    if (mac && tb_vf[IFLA_VF_MAC])

I'll send a patch for that as soon as I'm finished reviewing and pushing 
your patches.


> +            if (vf_mac && vf_mac->vf == vf)  {
> +                rc = nla_parse_nested(tb_vf_stats, IFLA_VF_STATS_MAX,
> +                                      tb_vf[IFLA_VF_STATS],
> +                                      ifla_vfstats_policy);
> +                if (rc < 0) {
> +                    virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> +                                   _("error parsing IFLA_VF_STATS"));
> +                     return rc;
> +                }
> +
> +                stats->rx_bytes = nla_get_u64(tb_vf_stats[IFLA_VF_STATS_RX_BYTES]);
> +                stats->tx_bytes = nla_get_u64(tb_vf_stats[IFLA_VF_STATS_TX_BYTES]);
> +                stats->rx_packets = nla_get_u64(tb_vf_stats[IFLA_VF_STATS_RX_PACKETS]);
> +                stats->tx_packets = nla_get_u64(tb_vf_stats[IFLA_VF_STATS_TX_PACKETS]);
> +                rc = 0;
> +            }
> +        }
> +
>           if (rc == 0)
>               break;
>       }
> @@ -1714,7 +1746,43 @@ virNetDevGetVfConfig(const char *ifname, int vf, virMacAddrPtr mac,
>       if (virNetlinkDumpLink(ifname, ifindex, &nlData, tb, 0, 0) < 0)
>           return -1;
>   
> -    return virNetDevParseVfConfig(tb, vf, mac, vlanid);
> +    return virNetDevParseVfConfig(tb, vf, mac, vlanid, NULL);
> +}
> +
> +
> +/**
> + * virNetDevVFInterfaceStats:
> + * @vfAddr: PCI address of a VF
> + * @stats: returns stats of the VF interface
> + *
> + * Get the VF interface from kernel by netlink.
> + * Returns 0 on success, -1 on failure.
> + */
> +int
> +virNetDevVFInterfaceStats(virPCIDeviceAddressPtr vfAddr,
> +                          virDomainInterfaceStatsPtr stats)
> +{
> +    g_autofree void *nlData = NULL;
> +    struct nlattr *tb[IFLA_MAX + 1] = {NULL, };
> +    g_autofree char *vfSysfsPath = NULL;
> +    g_autofree char *pfname = NULL;
> +    int vf = -1;
> +
> +    if (virPCIDeviceAddressGetSysfsFile(vfAddr, &vfSysfsPath) < 0)
> +        return -1;
> +
> +    if (!virPCIIsVirtualFunction(vfSysfsPath)) {
> +        virReportError(VIR_ERR_INTERNAL_ERROR, _("'%s' is not a VF device"), vfSysfsPath);
> +       return -1;
> +    }
> +
> +    if (virPCIGetVirtualFunctionInfo(vfSysfsPath, -1, &pfname, &vf) < 0)
> +        return -1;
> +
> +    if (virNetlinkDumpLink(pfname, -1, &nlData, tb, 0, 0) < 0)
> +        return -1;
> +
> +    return virNetDevParseVfConfig(tb, vf, NULL, NULL, stats);
>   }
>   
>   
> @@ -2330,6 +2398,16 @@ virNetDevSetNetConfig(const char *linkdev G_GNUC_UNUSED,
>   }
>   
>   
> +int
> +virNetDevVFInterfaceStats(virPCIDeviceAddressPtr vfAddr G_GNUC_UNUSED,
> +                          virDomainInterfaceStatsPtr stats G_GNUC_UNUSED)
> +{
> +    virReportSystemError(ENOSYS, "%s",
> +                         _("Unable to get VF net device stats on this platform"));
> +    return -1;
> +}
> +
> +
>   #endif /* defined(WITH_LIBNL) */
>   
>   VIR_ENUM_IMPL(virNetDevIfState,
> diff --git a/src/util/virnetdev.h b/src/util/virnetdev.h
> index dfef49938f..53e606c61c 100644
> --- a/src/util/virnetdev.h
> +++ b/src/util/virnetdev.h
> @@ -316,4 +316,8 @@ int virNetDevSetRootQDisc(const char *ifname,
>                             const char *qdisc)
>       G_GNUC_NO_INLINE;
>   
> +int virNetDevVFInterfaceStats(virPCIDeviceAddressPtr vfAddr,
> +                              virDomainInterfaceStatsPtr stats)
> +ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2);
> +
>   G_DEFINE_AUTOPTR_CLEANUP_FUNC(virNetDevRxFilter, virNetDevRxFilterFree);
> 


I'm unable to test on a system that has interface stats, but am assuming 
that you have :-).

This looks fine to me, so:

Reviewed-by: Laine Stump <laine at redhat.com>

and pushed. Thanks for your contribution!




More information about the libvir-list mailing list