[PATCH v1 09/10] capabilities: Expose NUMA interconnects

Martin Kletzander mkletzan at redhat.com
Fri Jun 4 12:50:34 UTC 2021


On Mon, May 31, 2021 at 10:36:10AM +0200, Michal Privoznik wrote:
>Links between NUMA nodes can have different latencies and
>bandwidths. This info is newly defined in ACPI 6.2 under
>Heterogeneous Memory Attribute Table (HMAT) table. Linux kernel
>learned how to report these values under sysfs and thus we can
>expose them in our capabilities XML. The sysfs interface is
>documented in kernel's Documentation/admin-guide/mm/numaperf.rst.
>
>Long story short, two nodes can be in initiator-target
>relationship. A node can be initiator if it has a CPU or a device
>that's capable of initiating memory transfer. Therefore a node
>that has just memory can only be target. An initiator-target link
>can then have any combination of {bandwidth, latency} - {access,
>read, write} attribute (6 in total). However, the standard says
>access is applicable iff read and write values are the same.
>Therefore, we really have just four combinations of attributes:
>bandwidth-read, bandwidth-write, latency-read, latency-write.
>
>This is the combination that kernel reports anyway.
>
>Then, under /sys/system/devices/node/nodeX/acccess0/initiators we
>find values for those 4 attributes and also symlinks named
>"nodeN" which then represent initiators to nodeX. For instance:
>
>  /sys/system/node/node1/access0/initiators/node0 -> ../../node0
>  /sys/system/node/node1/access0/initiators/read_bandwidth
>  /sys/system/node/node1/access0/initiators/read_latency
>  /sys/system/node/node1/access0/initiators/write_bandwidth
>  /sys/system/node/node1/access0/initiators/write_latency
>
>This means that node0 is initiator and node1 is target and values
>of the interconnect can be read.
>
>In theory, there can be separate links to memory side caches too
>(e.g. one link from node X to node Y's main memory, another from
>node X to node Y's L1 cache, another one to L2 cache and so on).
>But sysfs does not express this relationship just yet.
>
>Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=1786309
>Signed-off-by: Michal Privoznik <mprivozn at redhat.com>
>---
> docs/schemas/capability.rng |   3 +
> src/conf/capabilities.c     | 181 +++++++++++++++++++++++++++++++++++-
> src/conf/capabilities.h     |   1 +
> 3 files changed, 184 insertions(+), 1 deletion(-)
>
>diff --git a/src/conf/capabilities.c b/src/conf/capabilities.c
>index 7b60676070..8f2d7b75d7 100644
>--- a/src/conf/capabilities.c
>+++ b/src/conf/capabilities.c
>@@ -1735,6 +1743,174 @@ virCapabilitiesHostNUMAInitFake(virCapsHostNUMA *caps)
> }
>
>
>+static void
>+virCapabilitiesHostInsertHMAT(GArray *interconnects,
>+                              int initiator,
>+                              int target,
>+                              unsigned int read_bandwidth,
>+                              unsigned int write_bandwidth,
>+                              unsigned int read_latency,
>+                              unsigned int write_latency)
>+{
>+    virNumaInterconnect ni;
>+
>+    ni = (virNumaInterconnect) { VIR_NUMA_INTERCONNECT_TYPE_BANDWIDTH,
>+        initiator, target, 0, VIR_MEMORY_LATENCY_READ, read_bandwidth};
>+    g_array_append_val(interconnects, ni);
>+
>+    ni = (virNumaInterconnect) { VIR_NUMA_INTERCONNECT_TYPE_BANDWIDTH,
>+        initiator, target, 0, VIR_MEMORY_LATENCY_WRITE, write_bandwidth};
>+    g_array_append_val(interconnects, ni);
>+
>+    ni = (virNumaInterconnect) { VIR_NUMA_INTERCONNECT_TYPE_LATENCY,
>+        initiator, target, 0, VIR_MEMORY_LATENCY_READ, read_latency};
>+    g_array_append_val(interconnects, ni);
>+
>+    ni = (virNumaInterconnect) { VIR_NUMA_INTERCONNECT_TYPE_LATENCY,
>+        initiator, target, 0, VIR_MEMORY_LATENCY_WRITE, write_latency};
>+    g_array_append_val(interconnects, ni);
>+}
>+
>+
>+static int
>+virCapabilitiesHostNUMAInitInterconnectsNode(GArray *interconnects,
>+                                             int node)
>+{
>+    g_autofree char *path = NULL;
>+    g_autofree char *initPath = NULL;
>+    g_autoptr(DIR) dir = NULL;
>+    int direrr = 0;
>+    struct dirent *entry;
>+    unsigned int read_bandwidth;
>+    unsigned int write_bandwidth;
>+    unsigned int read_latency;
>+    unsigned int write_latency;
>+
>+    path = g_strdup_printf(SYSFS_SYSTEM_PATH "/node/node%d/access0", node);
>+

How come you are not checking the relationships for the other access
classes?  Missing or forgotten code change?  Or do I misunderstand the
documentation?

>+    if (!virFileExists(path))
>+        return 0;
>+
>+    if (virCapabilitiesGetNodeCacheReadFile(path, "initiators",
>+                                            "read_bandwidth",
>+                                            &read_bandwidth) < 0)
>+        return -1;
>+    if (virCapabilitiesGetNodeCacheReadFile(path, "initiators",
>+                                            "write_bandwidth",
>+                                            &write_bandwidth) < 0)
>+        return -1;
>+
>+    /* Bandwidths are read in MiB but stored in KiB */
>+    read_bandwidth <<= 10;
>+    write_bandwidth <<= 10;
>+
>+    if (virCapabilitiesGetNodeCacheReadFile(path, "initiators",
>+                                            "read_latency",
>+                                            &read_latency) < 0)
>+        return -1;
>+    if (virCapabilitiesGetNodeCacheReadFile(path, "initiators",
>+                                            "write_latency",
>+                                            &write_latency) < 0)
>+        return -1;
>+
>+    initPath = g_strdup_printf("%s/initiators", path);
>+
>+    if (virDirOpen(&dir, initPath) < 0)
>+        return -1;
>+
>+    while ((direrr = virDirRead(dir, &entry, path)) > 0) {
>+        const char *dname = STRSKIP(entry->d_name, "node");
>+        int initNode;
>+
>+        if (!dname)
>+            continue;
>+
>+        if (virStrToLong_i(dname, NULL, 10, &initNode) < 0) {

I do not see the value having an "unused" state, e.g. with `-1`, so I
think it should be unsigned.  Similarly below [1]

>+            virReportError(VIR_ERR_INTERNAL_ERROR,
>+                           _("unable to parse %s"),
>+                           entry->d_name);
>+            return -1;
>+        }
>+
>+        virCapabilitiesHostInsertHMAT(interconnects,
>+                                      initNode, node,
>+                                      read_bandwidth,
>+                                      write_bandwidth,
>+                                      read_latency,
>+                                      write_latency);
>+    }
>+
>+    return 0;
>+}
>+
>+
>+static int
>+virCapsHostNUMAInterconnectComparator(const void *a,
>+                                      const void *b)
>+{
>+    const virNumaInterconnect *aa = a;
>+    const virNumaInterconnect *bb = b;
>+
>+    if (aa->type != bb->type)
>+        return aa->type - bb->type;
>+
>+    if (aa->initiator != bb->initiator)
>+        return aa->initiator - bb->initiator;
>+
>+    if (aa->target != bb->target)
>+        return aa->target - bb->target;
>+
>+    if (aa->cache != bb->cache)
>+        return aa->cache - bb->cache;
>+
>+    if (aa->accessType != bb->accessType)
>+        return aa->accessType - bb->accessType;
>+
>+    return aa->value - bb->value;
>+}
>+
>+
>+static int
>+virCapabilitiesHostNUMAInitInterconnects(virCapsHostNUMA *caps)
>+{
>+    g_autoptr(DIR) dir = NULL;
>+    int direrr = 0;
>+    struct dirent *entry;
>+    g_autofree char *path = NULL;
>+    g_autoptr(GArray) interconnects = g_array_new(FALSE, FALSE, sizeof(virNumaInterconnect));
>+
>+    path = g_strdup_printf(SYSFS_SYSTEM_PATH "/node/");

This could just be a:

     const char *path = SYSFS_SYSTEM_PATH "/node/";

>+
>+    if (virDirOpenIfExists(&dir, path) < 0)
>+        return -1;
>+
>+    while (dir && (direrr = virDirRead(dir, &entry, path)) > 0) {
>+        const char *dname = STRSKIP(entry->d_name, "node");
>+        int node;
>+
>+        if (!dname)
>+            continue;
>+
>+        if (virStrToLong_i(dname, NULL, 10, &node) < 0) {

[1] again, unsigned?
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: not available
URL: <http://listman.redhat.com/archives/libvir-list/attachments/20210604/4c3bfb0a/attachment-0001.sig>


More information about the libvir-list mailing list