[PATCH v1 09/10] capabilities: Expose NUMA interconnects
Martin Kletzander
mkletzan at redhat.com
Fri Jun 4 12:50:34 UTC 2021
On Mon, May 31, 2021 at 10:36:10AM +0200, Michal Privoznik wrote:
>Links between NUMA nodes can have different latencies and
>bandwidths. This info is newly defined in ACPI 6.2 under
>Heterogeneous Memory Attribute Table (HMAT) table. Linux kernel
>learned how to report these values under sysfs and thus we can
>expose them in our capabilities XML. The sysfs interface is
>documented in kernel's Documentation/admin-guide/mm/numaperf.rst.
>
>Long story short, two nodes can be in initiator-target
>relationship. A node can be initiator if it has a CPU or a device
>that's capable of initiating memory transfer. Therefore a node
>that has just memory can only be target. An initiator-target link
>can then have any combination of {bandwidth, latency} - {access,
>read, write} attribute (6 in total). However, the standard says
>access is applicable iff read and write values are the same.
>Therefore, we really have just four combinations of attributes:
>bandwidth-read, bandwidth-write, latency-read, latency-write.
>
>This is the combination that kernel reports anyway.
>
>Then, under /sys/system/devices/node/nodeX/acccess0/initiators we
>find values for those 4 attributes and also symlinks named
>"nodeN" which then represent initiators to nodeX. For instance:
>
> /sys/system/node/node1/access0/initiators/node0 -> ../../node0
> /sys/system/node/node1/access0/initiators/read_bandwidth
> /sys/system/node/node1/access0/initiators/read_latency
> /sys/system/node/node1/access0/initiators/write_bandwidth
> /sys/system/node/node1/access0/initiators/write_latency
>
>This means that node0 is initiator and node1 is target and values
>of the interconnect can be read.
>
>In theory, there can be separate links to memory side caches too
>(e.g. one link from node X to node Y's main memory, another from
>node X to node Y's L1 cache, another one to L2 cache and so on).
>But sysfs does not express this relationship just yet.
>
>Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=1786309
>Signed-off-by: Michal Privoznik <mprivozn at redhat.com>
>---
> docs/schemas/capability.rng | 3 +
> src/conf/capabilities.c | 181 +++++++++++++++++++++++++++++++++++-
> src/conf/capabilities.h | 1 +
> 3 files changed, 184 insertions(+), 1 deletion(-)
>
>diff --git a/src/conf/capabilities.c b/src/conf/capabilities.c
>index 7b60676070..8f2d7b75d7 100644
>--- a/src/conf/capabilities.c
>+++ b/src/conf/capabilities.c
>@@ -1735,6 +1743,174 @@ virCapabilitiesHostNUMAInitFake(virCapsHostNUMA *caps)
> }
>
>
>+static void
>+virCapabilitiesHostInsertHMAT(GArray *interconnects,
>+ int initiator,
>+ int target,
>+ unsigned int read_bandwidth,
>+ unsigned int write_bandwidth,
>+ unsigned int read_latency,
>+ unsigned int write_latency)
>+{
>+ virNumaInterconnect ni;
>+
>+ ni = (virNumaInterconnect) { VIR_NUMA_INTERCONNECT_TYPE_BANDWIDTH,
>+ initiator, target, 0, VIR_MEMORY_LATENCY_READ, read_bandwidth};
>+ g_array_append_val(interconnects, ni);
>+
>+ ni = (virNumaInterconnect) { VIR_NUMA_INTERCONNECT_TYPE_BANDWIDTH,
>+ initiator, target, 0, VIR_MEMORY_LATENCY_WRITE, write_bandwidth};
>+ g_array_append_val(interconnects, ni);
>+
>+ ni = (virNumaInterconnect) { VIR_NUMA_INTERCONNECT_TYPE_LATENCY,
>+ initiator, target, 0, VIR_MEMORY_LATENCY_READ, read_latency};
>+ g_array_append_val(interconnects, ni);
>+
>+ ni = (virNumaInterconnect) { VIR_NUMA_INTERCONNECT_TYPE_LATENCY,
>+ initiator, target, 0, VIR_MEMORY_LATENCY_WRITE, write_latency};
>+ g_array_append_val(interconnects, ni);
>+}
>+
>+
>+static int
>+virCapabilitiesHostNUMAInitInterconnectsNode(GArray *interconnects,
>+ int node)
>+{
>+ g_autofree char *path = NULL;
>+ g_autofree char *initPath = NULL;
>+ g_autoptr(DIR) dir = NULL;
>+ int direrr = 0;
>+ struct dirent *entry;
>+ unsigned int read_bandwidth;
>+ unsigned int write_bandwidth;
>+ unsigned int read_latency;
>+ unsigned int write_latency;
>+
>+ path = g_strdup_printf(SYSFS_SYSTEM_PATH "/node/node%d/access0", node);
>+
How come you are not checking the relationships for the other access
classes? Missing or forgotten code change? Or do I misunderstand the
documentation?
>+ if (!virFileExists(path))
>+ return 0;
>+
>+ if (virCapabilitiesGetNodeCacheReadFile(path, "initiators",
>+ "read_bandwidth",
>+ &read_bandwidth) < 0)
>+ return -1;
>+ if (virCapabilitiesGetNodeCacheReadFile(path, "initiators",
>+ "write_bandwidth",
>+ &write_bandwidth) < 0)
>+ return -1;
>+
>+ /* Bandwidths are read in MiB but stored in KiB */
>+ read_bandwidth <<= 10;
>+ write_bandwidth <<= 10;
>+
>+ if (virCapabilitiesGetNodeCacheReadFile(path, "initiators",
>+ "read_latency",
>+ &read_latency) < 0)
>+ return -1;
>+ if (virCapabilitiesGetNodeCacheReadFile(path, "initiators",
>+ "write_latency",
>+ &write_latency) < 0)
>+ return -1;
>+
>+ initPath = g_strdup_printf("%s/initiators", path);
>+
>+ if (virDirOpen(&dir, initPath) < 0)
>+ return -1;
>+
>+ while ((direrr = virDirRead(dir, &entry, path)) > 0) {
>+ const char *dname = STRSKIP(entry->d_name, "node");
>+ int initNode;
>+
>+ if (!dname)
>+ continue;
>+
>+ if (virStrToLong_i(dname, NULL, 10, &initNode) < 0) {
I do not see the value having an "unused" state, e.g. with `-1`, so I
think it should be unsigned. Similarly below [1]
>+ virReportError(VIR_ERR_INTERNAL_ERROR,
>+ _("unable to parse %s"),
>+ entry->d_name);
>+ return -1;
>+ }
>+
>+ virCapabilitiesHostInsertHMAT(interconnects,
>+ initNode, node,
>+ read_bandwidth,
>+ write_bandwidth,
>+ read_latency,
>+ write_latency);
>+ }
>+
>+ return 0;
>+}
>+
>+
>+static int
>+virCapsHostNUMAInterconnectComparator(const void *a,
>+ const void *b)
>+{
>+ const virNumaInterconnect *aa = a;
>+ const virNumaInterconnect *bb = b;
>+
>+ if (aa->type != bb->type)
>+ return aa->type - bb->type;
>+
>+ if (aa->initiator != bb->initiator)
>+ return aa->initiator - bb->initiator;
>+
>+ if (aa->target != bb->target)
>+ return aa->target - bb->target;
>+
>+ if (aa->cache != bb->cache)
>+ return aa->cache - bb->cache;
>+
>+ if (aa->accessType != bb->accessType)
>+ return aa->accessType - bb->accessType;
>+
>+ return aa->value - bb->value;
>+}
>+
>+
>+static int
>+virCapabilitiesHostNUMAInitInterconnects(virCapsHostNUMA *caps)
>+{
>+ g_autoptr(DIR) dir = NULL;
>+ int direrr = 0;
>+ struct dirent *entry;
>+ g_autofree char *path = NULL;
>+ g_autoptr(GArray) interconnects = g_array_new(FALSE, FALSE, sizeof(virNumaInterconnect));
>+
>+ path = g_strdup_printf(SYSFS_SYSTEM_PATH "/node/");
This could just be a:
const char *path = SYSFS_SYSTEM_PATH "/node/";
>+
>+ if (virDirOpenIfExists(&dir, path) < 0)
>+ return -1;
>+
>+ while (dir && (direrr = virDirRead(dir, &entry, path)) > 0) {
>+ const char *dname = STRSKIP(entry->d_name, "node");
>+ int node;
>+
>+ if (!dname)
>+ continue;
>+
>+ if (virStrToLong_i(dname, NULL, 10, &node) < 0) {
[1] again, unsigned?
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: not available
URL: <http://listman.redhat.com/archives/libvir-list/attachments/20210604/4c3bfb0a/attachment-0001.sig>
More information about the libvir-list
mailing list