[PATCH v3] util: support PCI passthrough net device stats collection
Laine Stump
laine at redhat.com
Mon Oct 12 21:20:00 UTC 2020
(After playing around with virPCIDeviceGetVirtualFunctionInfo() for a
few days, I came to a point where I decided it was best to leave its API
as it is (taking a path to the device in sysfs rather than a
virPCIDeviceAddress as the first argument). And *THEN* I took a close
look at virNetDevParseVfStats() and finally realized that it is nearly
identical to virNetDevParseVfConfig(), and that
virNetDevVFInterfaceStats() is just a wrapper around the same code that
is in VirNetDevGetVfConfig() (just searching through all the host
interfaces looking for a VF with matching MAC address.
So I've further changed my mind about the form this patch should take.
Instead of "almost duplicating" so much code, what about this:
0) the changes you've made to the ifla_vf_policy struct are correct and
should remain.
1) Instead of duplicating all that code from virNetDevParseVfConfig()
into a new function (virNetDevParseVfStats()), just expand
virNetDevParseVfConfig() itself with a new arg for stats, and add in the
small bit of new code that retrieves the stats *iff the stats arg is
non-NULL. This way it can still be used for its original purpose, but
also for the new purpose, leaving us less "almost identical" code to
maintain:
static int
virNetDevParseVfConfig(struct nlattr **tb, int32_t vf,
virMacAddrPtr mac,
int *vlanid,
virDomainInterfaceStatsPtr stats) <=== new
...
if (vlanid && tb[IFLA_VF_VLAN]) {
[...]
}
/* new code starts here */
if (stats && tb[IFLA_VF_STATS] && tb[IFLA_VF_MAC]) {
vf_mac = RTA_DATA(tb_vf[IFLA_VF_MAC]);
if (vf_mac && vf_mac->vf == vf) {
ret = nla_parse_nested(stb, IFLA_VF_STATS_MAX,
t[IFLA_VF_STATS], ifla_vfstats_policy);
if (ret < 0) {
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("error parsing IFLA_VF_STATS"));
return -1;
}
stats->rx_bytes =
nla_get_u64(stb[IFLA_VF_STATS_RX_BYTES]);
stats->tx_bytes =
nla_get_u64(stb[IFLA_VF_STATS_TX_BYTES]);
stats->rx_packets =
nla_get_u64(stb[IFLA_VF_STATS_RX_PACKETS]);
stats->tx_packets =
nla_get_u64(stb[IFLA_VF_STATS_TX_PACKETS]);
}
(bonus points for a separate preliminary patch to rename
virNetDevParseVfConfig() to virNetDevParseVFInfo())
2) virNetDevVFInterfaceStats() should take the PCI address of the VF *on
the host* as its first argument, rather than the MAC address. If will
then use that to determine the netdev name of the PF, and the VF# of the
VF, using the pfName to call virNetlinkDumpLink() to get the vfinfo_list
for the correct PF, and then the vf# when calling
virNetDevParseVfConfig() to get the stats from the correct vfinfo entry
in the list, something like this:
int
virNetDevVFInterfaceStats(virPCIDeviceAddressPtr vfAddr,
virDomainInterfaceStatsPtr stats)
{
g_autofree char *vfSysFsPath = NULL;
g_autofree char *pfName = NULL;
int vf = -1;
if (virPCIDeviceAddressGetSysfsFile(vfAddr, &vfSysfsPath) < 0)
return -1;
if (!virPCIIsVirtualFunction(vfSysfsPath)) {
virReportError(VIR_ERR_INTERNAL_ERROR, _("'%s' is not a VF device"),
vfSysfsPath);
return -1;
}
if (virPCIGetVirtualFunctionInfo(vsSysfsPath, -1, &pfName, &vf) < 0)
return -1;
/* we know the pfname and vf# - use that to retrieve the correct
vfinfo */
if (virNetlinkDumpLink(pfname, -1, &nlData, tb, 0, 0) < 0)
return -1;
return virNetDevParseVfConfig(tb, vf, NULL, NULL, stats);
}
... or something like that.
3) When calling virNetDevVFInterfaceStats() (from
qemuDomainInterfaceStats()), send it the PCI address of the device like
this:
[...]
} else if (virDomainNetGetActualType(net) ==
VIR_DOMAIN_NET_TYPE_HOSTDEV) {
virDomainHostdevDef const *hostdev =
virDomainNetGetActualHostdev(net);
if (!hostdev) {
/* some things shouldn't happen. This *definitely* should
never ever be possible */
virReportError(VIR_ERR_INTERNAL_ERROR,
"%s", _("hostdev interface missing hostdev
data"));
goto cleanup;
}
if
virNetDevVFInterfaceStats(&hostdev->source.subsys.u.pci.addr, stats) < 0)
goto cleanup;
...
Does all of that make sense? (Hopefully I haven't skipped over something
crucial).
On 9/30/20 11:06 AM, Laine Stump wrote:
> On 9/24/20 4:18 AM, zhenwei pi wrote:
>> Collect PCI passthrough net device stats from kernel by netlink
>> API.
>>
>> Currently, libvirt can not get PCI passthrough net device stats,
>> run command:
>> #virsh domifstat instance --interface=52:54:00:2d:b2:35
>> error: Failed to get interface stats instance 52:54:00:2d:b2:35
>> error: internal error: Interface name not provided
>>
>> The PCI device(usually SR-IOV virtual function device) is detached
>> while it's used in PCI passthrough mode. And we can not parse this
>> device from /proc/net/dev any more.
>>
>> In this patch, libvirt check net device is VF of not firstly, then
>> query virNetDevVFInterfaceStats(new API).
>> virNetDevVFInterfaceStats parses VFs info of all PFs, compares MAC
>> address until the two MAC addresses match.
>> '#ip -s link show' can get the same result. Instead of parsing the
>> output result, implement this feature by libnl API.
>>
>> Notice that this feature deponds on driver of PF.
>> Test on Mellanox ConnectX-4 Lx, it works well.
>> Also test on Intel Corporation 82599ES, it works, but only get 0.
>> (ip-link command get the same result).
>>
>> IFLA_VF_STATS is supported since Linux-4.2, suggested by Laine,
>> just using defined(__linux__) && WITH_LIBNL is enough.
>>
>> Signed-off-by: zhenwei pi <pizhenwei at bytedance.com>
>> ---
>> src/libvirt_private.syms | 1 +
>> src/qemu/qemu_driver.c | 3 ++
>> src/util/virnetdev.c | 121
>> +++++++++++++++++++++++++++++++++++++++++++++++
>> src/util/virnetdev.h | 5 ++
>> 4 files changed, 130 insertions(+)
>>
>> diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
>> index bdbe3431b8..bcc40b8d69 100644
>> --- a/src/libvirt_private.syms
>> +++ b/src/libvirt_private.syms
>> @@ -2585,6 +2585,7 @@ virNetDevSetRcvMulti;
>> virNetDevSetupControl;
>> virNetDevSysfsFile;
>> virNetDevValidateConfig;
>> +virNetDevVFInterfaceStats;
>> # util/virnetdevbandwidth.h
>> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
>> index ae715c01d7..f554010c40 100644
>> --- a/src/qemu/qemu_driver.c
>> +++ b/src/qemu/qemu_driver.c
>> @@ -10196,6 +10196,9 @@ qemuDomainInterfaceStats(virDomainPtr dom,
>> if (virDomainNetGetActualType(net) ==
>> VIR_DOMAIN_NET_TYPE_VHOSTUSER) {
>> if (virNetDevOpenvswitchInterfaceStats(net->ifname, stats)
>> < 0)
>> goto cleanup;
>> + } else if (virDomainNetGetActualType(net) ==
>> VIR_DOMAIN_NET_TYPE_HOSTDEV) {
>> + if (virNetDevVFInterfaceStats(&net->mac, stats) < 0)
1) this should be:
>> + goto cleanup;
>> } else {
>> if (virNetDevTapInterfaceStats(net->ifname, stats,
>> !virDomainNetTypeSharesHostView(net)) < 0)
>> diff --git a/src/util/virnetdev.c b/src/util/virnetdev.c
>> index e1a4cc2bef..3d54d07606 100644
>> --- a/src/util/virnetdev.c
>> +++ b/src/util/virnetdev.c
>> @@ -1489,6 +1489,7 @@ static struct nla_policy
>> ifla_vf_policy[IFLA_VF_MAX+1] = {
>> .maxlen = sizeof(struct ifla_vf_mac) },
>> [IFLA_VF_VLAN] = { .type = NLA_UNSPEC,
>> .maxlen = sizeof(struct ifla_vf_vlan) },
>> + [IFLA_VF_STATS] = { .type = NLA_NESTED },
>> };
>> @@ -2265,6 +2266,116 @@ virNetDevSetNetConfig(const char
>> *linkdev, int vf,
>> return 0;
>> }
>> +static struct nla_policy ifla_vfstats_policy[IFLA_VF_STATS_MAX+1] = {
>> + [IFLA_VF_STATS_RX_PACKETS] = { .type = NLA_U64 },
>> + [IFLA_VF_STATS_TX_PACKETS] = { .type = NLA_U64 },
>> + [IFLA_VF_STATS_RX_BYTES] = { .type = NLA_U64 },
>> + [IFLA_VF_STATS_TX_BYTES] = { .type = NLA_U64 },
>> + [IFLA_VF_STATS_BROADCAST] = { .type = NLA_U64 },
>> + [IFLA_VF_STATS_MULTICAST] = { .type = NLA_U64 },
>> +};
>> +
>> +static int
>> +virNetDevParseVfStats(struct nlattr **tb, virMacAddrPtr mac,
>> + virDomainInterfaceStatsPtr stats)
>> +{
>> + int ret = -1, len;
>> + struct ifla_vf_mac *vf_lladdr;
>> + struct nlattr *nla, *t[IFLA_VF_MAX+1];
>> + struct nlattr *stb[IFLA_VF_STATS_MAX+1];
>> +
>> + if (tb == NULL || mac == NULL || stats == NULL) {
>> + return -1;
>> + }
>> +
>> + if (!tb[IFLA_VFINFO_LIST])
>> + return -1;
>> +
>> + len = nla_len(tb[IFLA_VFINFO_LIST]);
>> +
>> + for (nla = nla_data(tb[IFLA_VFINFO_LIST]); nla_ok(nla, len);
>> + nla = nla_next(nla, &len)) {
>> + ret = nla_parse(t, IFLA_VF_MAX, nla_data(nla), nla_len(nla),
>> + ifla_vf_policy);
>> + if (ret < 0)
>> + return -1;
>> +
>> + if (t[IFLA_VF_MAC] == NULL) {
>> + continue;
>> + }
>> +
>> + vf_lladdr = nla_data(t[IFLA_VF_MAC]);
>> + if (virMacAddrCmpRaw(mac, vf_lladdr->mac)) {
>> + continue;
>> + }
>> +
>> + if (t[IFLA_VF_STATS]) {
>> + ret = nla_parse_nested(stb, IFLA_VF_STATS_MAX,
>> + t[IFLA_VF_STATS],
>> + ifla_vfstats_policy);
>> + if (ret < 0)
>> + return -1;
>> +
>> + stats->rx_bytes = nla_get_u64(stb[IFLA_VF_STATS_RX_BYTES]);
>> + stats->tx_bytes = nla_get_u64(stb[IFLA_VF_STATS_TX_BYTES]);
>> + stats->rx_packets =
>> nla_get_u64(stb[IFLA_VF_STATS_RX_PACKETS]);
>> + stats->tx_packets =
>> nla_get_u64(stb[IFLA_VF_STATS_TX_PACKETS]);
>> + }
>> + return 0;
>> + }
>> +
>> + return ret;
>> +}
>> +
>> +/**
>> + * virNetDevVFInterfaceStats:
>> + * @mac: MAC address of the VF interface
>> + * @stats: returns stats of the VF interface
>> + *
>> + * Get the VF interface from kernel by netlink.
>> + * Returns 0 on success, -1 on failure.
>> + */
>> +int
>> +virNetDevVFInterfaceStats(virMacAddrPtr mac,
>> + virDomainInterfaceStatsPtr stats)
>> +{
>> + int rc = -1;
>> + void *nlData = NULL;
>> + struct nlattr *tb[IFLA_MAX + 1] = {NULL, };
>> + char *sysfsDevicePath = NULL;
>> + DIR *dirp = NULL;
>> + struct dirent *dp;
>> +
>> + if (virDirOpen(&dirp, SYSFS_NET_DIR) < 0)
>> + return -1;
>> +
>> + /* get all PCI net devices, and parse VFs list from netlink API.
>> + * compare MAC address, collect device stats if matching.
>> + */
>
>
> Sorry, I hate to bring this up only on v3 of the patches (In the
> initial version I hadn't looked at implementation details, only at
> build CI, and hadn't gotten back to look at the later versions before
> other people commented on also-macro-level issues), but we shouldn't
> be determining which VF stats of which PF to report by scanning
> through all the PFs until we find a VF with a matching MAC address.
> Aside from being really inefficient (you shouldn't need to get a dump
> of all interfaces, just of the one interface you're interested in),
> also the MAC address of a device isn't unique (although it usually
> is), so you can't be guaranteed this is the correct VF.
>
> Instead we need to start with the PCI address in the
> virDomainHostdevDef associated with the VF (which can be retrieved
> with virDomainNetDefActualHostdev(net)) and then learn the name of the
> PF, and what the VF# is for this VF using
> virPCIGetVirtualFunctionInfo(), (*or even better* - that function
> refactored to be more like virHostdevNetDevice() - it provides exactly
> what you need, but is just located in a file (virhostdev.c) that
> shouldn't be referenced from virnetdev.c).
>
> Once we know the PF device name and which VF, we can get the
> NetlinkDump *only* for that device, and then directly grab the details
> from the particular VFINFO in the list, rather than relying on a match
> of the (unreliable) MAC address.
>
> Since I already have an idea of the change I'd like to see in
> virPCIGetVirtualFunctionInfo(), let me refactor that function (and a
> few surrounding) so that they're more generally useful, and I'll Cc
> the patch for it to you so that you can use the updated function. I'll
> try to do it in the next couple days.
>
>
>> + while (virDirRead(dirp, &dp, SYSFS_NET_DIR) > 0) {
>> + if (virNetDevSysfsFile(&sysfsDevicePath, dp->d_name,
>> "device") < 0)
>> + break;
>> +
>> + if (virNetDevIsPCIDevice(sysfsDevicePath)) {
>> + rc = virNetlinkDumpLink(dp->d_name, -1, &nlData, tb, 0, 0);
>> + if (rc < 0) {
>> + rc = -1;
>> + goto cleanup;
>> + }
>> +
>> + rc = virNetDevParseVfStats(tb, mac, stats);
>> + VIR_FREE(nlData);
>> + if (rc == 0)
>> + goto cleanup;
>> + }
>> + VIR_FREE(sysfsDevicePath);
>> + }
>> +
>> + cleanup:
>> + VIR_FREE(sysfsDevicePath);
>> + VIR_DIR_CLOSE(dirp);
>> + return rc;
>> +}
>> #else /* defined(__linux__) && defined(WITH_LIBNL) &&
>> defined(IFLA_VF_MAX) */
>> @@ -2309,6 +2420,16 @@ virNetDevSetNetConfig(const char *linkdev
>> G_GNUC_UNUSED,
>> }
>> +int
>> +virNetDevVFInterfaceStats(virMacAddrPtr mac G_GNUC_UNUSED,
>> + virDomainInterfaceStatsPtr stats G_GNUC_UNUSED)
>> +{
>> + virReportSystemError(ENOSYS, "%s",
>> + _("Unable to get VF net device stats on
>> this platform"));
>> + return -1;
>> +}
>> +
>> +
>> #endif /* defined(__linux__) && defined(WITH_LIBNL) &&
>> defined(IFLA_VF_MAX) */
>> VIR_ENUM_IMPL(virNetDevIfState,
>> diff --git a/src/util/virnetdev.h b/src/util/virnetdev.h
>> index 5f581323ed..ff59d9d341 100644
>> --- a/src/util/virnetdev.h
>> +++ b/src/util/virnetdev.h
>> @@ -312,4 +312,9 @@ int virNetDevSysfsFile(char **pf_sysfs_device_link,
>> int virNetDevRunEthernetScript(const char *ifname, const char *script)
>> G_GNUC_NO_INLINE;
>> +int virNetDevVFInterfaceStats(virMacAddrPtr mac,
>> + virDomainInterfaceStatsPtr stats)
>> +ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2);
>> +
>> +
>> G_DEFINE_AUTOPTR_CLEANUP_FUNC(virNetDevRxFilter,
>> virNetDevRxFilterFree);
>>
>
More information about the libvir-list
mailing list