[PATCH v5 1/2] util: support PCI passthrough net device stats collection
Laine Stump
laine at redhat.com
Tue Oct 20 21:31:17 UTC 2020
On 10/15/20 7:21 AM, zhenwei pi wrote:
> Collect PCI passthrough net device stats from kernel by netlink
> API.
>
> Currently, libvirt can not get PCI passthrough net device stats,
> run command:
> #virsh domifstat instance --interface=52:54:00:2d:b2:35
> error: Failed to get interface stats instance 52:54:00:2d:b2:35
> error: internal error: Interface name not provided
>
> The PCI device(usually SR-IOV virtual function device) is detached
(Actually it is *always* an SR-IOV VF - SR-IOV functionality is required
to set the MAC address and clan tag of the interface; without that
capability, it can only be assigned to the guest as a plain <hostdev>,
and in that case libvirt wouldn't even know that it was a network device.)
> while it's used in PCI passthrough mode. And we can not parse this
> device from /proc/net/dev any more.
>
> In this patch, libvirt checks net device is VF of not firstly, then
> query virNetDevVFInterfaceStats(new API).
> virNetDevVFInterfaceStats gets PF name, then dumps VF stats by
> netlink, compares with VF index and get stats(suggest by Laine,
> original version is implemented by zhenwei which parses VFs info
> of all PFs, compares MAC address until the two MAC addresses match).
>
> address until the two MAC addresses match.
(you forgot to remove this bit - I'll clean it up before pushing)
> '#ip -s link show' can get the same result. Instead of parsing the
> output result, implement this feature by libnl API.
>
> Notice that this feature depends on driver of PF.
> Test on Mellanox ConnectX-4 Lx, it works well.
> Also test on Intel Corporation 82599ES, it works, but only get 0.
> (ip-link command get the same result).
>
> IFLA_VF_STATS is supported since Linux-4.2, suggested by Laine,
> just using defined(__linux__) && WITH_LIBNL is enough.
(I'll also remove these historical artifacts of the review/revision process)
>
> Thanks to Laine, Daniel<berrange at redhat.com> & DHB for suggestions.
>
> Signed-off-by: zhenwei pi <pizhenwei at bytedance.com>
> ---
> src/libvirt_private.syms | 1 +
> src/qemu/qemu_driver.c | 13 ++++++++
> src/util/virnetdev.c | 82 ++++++++++++++++++++++++++++++++++++++++++++++--
> src/util/virnetdev.h | 4 +++
> 4 files changed, 98 insertions(+), 2 deletions(-)
>
> diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
> index 52e9c6313f..ae543589f1 100644
> --- a/src/libvirt_private.syms
> +++ b/src/libvirt_private.syms
> @@ -2584,6 +2584,7 @@ virNetDevSetRootQDisc;
> virNetDevSetupControl;
> virNetDevSysfsFile;
> virNetDevValidateConfig;
> +virNetDevVFInterfaceStats;
>
>
> # util/virnetdevbandwidth.h
> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
> index 825bdd9119..519af71fc1 100644
> --- a/src/qemu/qemu_driver.c
> +++ b/src/qemu/qemu_driver.c
> @@ -10132,6 +10132,19 @@ qemuDomainInterfaceStats(virDomainPtr dom,
> if (virDomainNetGetActualType(net) == VIR_DOMAIN_NET_TYPE_VHOSTUSER) {
> if (virNetDevOpenvswitchInterfaceStats(net->ifname, stats) < 0)
> goto cleanup;
> + } else if (virDomainNetGetActualType(net) == VIR_DOMAIN_NET_TYPE_HOSTDEV) {
> + virDomainHostdevDefPtr hostdev = virDomainNetGetActualHostdev(net);
> + virPCIDeviceAddressPtr vfAddr;
> +
> + if (!hostdev) {
> + virReportError(VIR_ERR_INTERNAL_ERROR,
> + "%s", _("hostdev interface missing hostdev data"));
> + goto cleanup;
> + }
> +
> + vfAddr = &hostdev->source.subsys.u.pci.addr;
> + if (virNetDevVFInterfaceStats(vfAddr, stats) < 0)
> + goto cleanup;
> } else {
And extra line below goto cleanup would improve readability
> if (virNetDevTapInterfaceStats(net->ifname, stats,
> !virDomainNetTypeSharesHostView(net)) < 0)
> diff --git a/src/util/virnetdev.c b/src/util/virnetdev.c
> index 5c7660dab4..f53e1751b3 100644
> --- a/src/util/virnetdev.c
> +++ b/src/util/virnetdev.c
> @@ -1514,6 +1514,17 @@ static struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = {
> .maxlen = sizeof(struct ifla_vf_mac) },
> [IFLA_VF_VLAN] = { .type = NLA_UNSPEC,
> .maxlen = sizeof(struct ifla_vf_vlan) },
> + [IFLA_VF_STATS] = { .type = NLA_NESTED },
> +};
> +
> +
> +static struct nla_policy ifla_vfstats_policy[IFLA_VF_STATS_MAX+1] = {
> + [IFLA_VF_STATS_RX_PACKETS] = { .type = NLA_U64 },
> + [IFLA_VF_STATS_TX_PACKETS] = { .type = NLA_U64 },
> + [IFLA_VF_STATS_RX_BYTES] = { .type = NLA_U64 },
> + [IFLA_VF_STATS_TX_BYTES] = { .type = NLA_U64 },
> + [IFLA_VF_STATS_BROADCAST] = { .type = NLA_U64 },
> + [IFLA_VF_STATS_MULTICAST] = { .type = NLA_U64 },
> };
>
>
> @@ -1651,13 +1662,14 @@ virNetDevSetVfConfig(const char *ifname, int vf,
>
> static int
> virNetDevParseVfConfig(struct nlattr **tb, int32_t vf, virMacAddrPtr mac,
> - int *vlanid)
> + int *vlanid, virDomainInterfaceStatsPtr stats)
> {
> int rc = -1;
> struct ifla_vf_mac *vf_mac;
> struct ifla_vf_vlan *vf_vlan;
> struct nlattr *tb_vf_info = {NULL, };
> struct nlattr *tb_vf[IFLA_VF_MAX+1];
> + struct nlattr *tb_vf_stats[IFLA_VF_STATS_MAX+1];
> int rem;
>
> if (!tb[IFLA_VFINFO_LIST]) {
> @@ -1693,6 +1705,26 @@ virNetDevParseVfConfig(struct nlattr **tb, int32_t vf, virMacAddrPtr mac,
> }
> }
>
> + if (stats && tb_vf[IFLA_VF_STATS] && tb_vf[IFLA_VF_MAC]) {
> + vf_mac = RTA_DATA(tb_vf[IFLA_VF_MAC]);
Hmm. Looking at this (correct) code made me realize that there has been
a bug higher up in this function ever since it was first implemented in
2012 (commit 5095bf06f)! - it has been checking
if (mac && tb[IFLA_VF_MAC])
instead of
if (mac && tb_vf[IFLA_VF_MAC])
I'll send a patch for that as soon as I'm finished reviewing and pushing
your patches.
> + if (vf_mac && vf_mac->vf == vf) {
> + rc = nla_parse_nested(tb_vf_stats, IFLA_VF_STATS_MAX,
> + tb_vf[IFLA_VF_STATS],
> + ifla_vfstats_policy);
> + if (rc < 0) {
> + virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> + _("error parsing IFLA_VF_STATS"));
> + return rc;
> + }
> +
> + stats->rx_bytes = nla_get_u64(tb_vf_stats[IFLA_VF_STATS_RX_BYTES]);
> + stats->tx_bytes = nla_get_u64(tb_vf_stats[IFLA_VF_STATS_TX_BYTES]);
> + stats->rx_packets = nla_get_u64(tb_vf_stats[IFLA_VF_STATS_RX_PACKETS]);
> + stats->tx_packets = nla_get_u64(tb_vf_stats[IFLA_VF_STATS_TX_PACKETS]);
> + rc = 0;
> + }
> + }
> +
> if (rc == 0)
> break;
> }
> @@ -1714,7 +1746,43 @@ virNetDevGetVfConfig(const char *ifname, int vf, virMacAddrPtr mac,
> if (virNetlinkDumpLink(ifname, ifindex, &nlData, tb, 0, 0) < 0)
> return -1;
>
> - return virNetDevParseVfConfig(tb, vf, mac, vlanid);
> + return virNetDevParseVfConfig(tb, vf, mac, vlanid, NULL);
> +}
> +
> +
> +/**
> + * virNetDevVFInterfaceStats:
> + * @vfAddr: PCI address of a VF
> + * @stats: returns stats of the VF interface
> + *
> + * Get the VF interface from kernel by netlink.
> + * Returns 0 on success, -1 on failure.
> + */
> +int
> +virNetDevVFInterfaceStats(virPCIDeviceAddressPtr vfAddr,
> + virDomainInterfaceStatsPtr stats)
> +{
> + g_autofree void *nlData = NULL;
> + struct nlattr *tb[IFLA_MAX + 1] = {NULL, };
> + g_autofree char *vfSysfsPath = NULL;
> + g_autofree char *pfname = NULL;
> + int vf = -1;
> +
> + if (virPCIDeviceAddressGetSysfsFile(vfAddr, &vfSysfsPath) < 0)
> + return -1;
> +
> + if (!virPCIIsVirtualFunction(vfSysfsPath)) {
> + virReportError(VIR_ERR_INTERNAL_ERROR, _("'%s' is not a VF device"), vfSysfsPath);
> + return -1;
> + }
> +
> + if (virPCIGetVirtualFunctionInfo(vfSysfsPath, -1, &pfname, &vf) < 0)
> + return -1;
> +
> + if (virNetlinkDumpLink(pfname, -1, &nlData, tb, 0, 0) < 0)
> + return -1;
> +
> + return virNetDevParseVfConfig(tb, vf, NULL, NULL, stats);
> }
>
>
> @@ -2330,6 +2398,16 @@ virNetDevSetNetConfig(const char *linkdev G_GNUC_UNUSED,
> }
>
>
> +int
> +virNetDevVFInterfaceStats(virPCIDeviceAddressPtr vfAddr G_GNUC_UNUSED,
> + virDomainInterfaceStatsPtr stats G_GNUC_UNUSED)
> +{
> + virReportSystemError(ENOSYS, "%s",
> + _("Unable to get VF net device stats on this platform"));
> + return -1;
> +}
> +
> +
> #endif /* defined(WITH_LIBNL) */
>
> VIR_ENUM_IMPL(virNetDevIfState,
> diff --git a/src/util/virnetdev.h b/src/util/virnetdev.h
> index dfef49938f..53e606c61c 100644
> --- a/src/util/virnetdev.h
> +++ b/src/util/virnetdev.h
> @@ -316,4 +316,8 @@ int virNetDevSetRootQDisc(const char *ifname,
> const char *qdisc)
> G_GNUC_NO_INLINE;
>
> +int virNetDevVFInterfaceStats(virPCIDeviceAddressPtr vfAddr,
> + virDomainInterfaceStatsPtr stats)
> +ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2);
> +
> G_DEFINE_AUTOPTR_CLEANUP_FUNC(virNetDevRxFilter, virNetDevRxFilterFree);
>
I'm unable to test on a system that has interface stats, but am assuming
that you have :-).
This looks fine to me, so:
Reviewed-by: Laine Stump <laine at redhat.com>
and pushed. Thanks for your contribution!
More information about the libvir-list
mailing list