[Crash-utility] [PATCH RFC] Add "kmem -r" to display accumulated slab statistics like /proc/slabinfo

Kazuhito Hagio k-hagio at ab.jp.nec.com
Thu Aug 23 20:11:22 UTC 2018


Nowadays, the "kmem -s" output can become very long vertically too,
due to the memcg kmem caches.  It look like the longer a system has
run, the longer it becomes.

  crash> kmem -s | wc -l
  19855

On the other hand, since /proc/slabinfo accumulates the values of
each slab_root_caches and its children, it's still short relatively.
And I think there are many cases that support folks want to see the
accumulated values like /proc/slabinfo from vmcore, in order to
grasp the overview of slab activity quickly.

We can use something like the attached script to accumulate them,
but I believe it would be more useful to implement it in crash.

This patch introduces the "kmem -r" option to imitate /proc/slabinfo,
but it is limited to CONFIG_SLUB for now.

I tested this patch with the kmem-s2r.awk script:

  crash> kmem -s | awk -f kmem-s2r.awk > kmem-s2r.txt
  crash> kmem -r > kmem-r.txt

  # diff -u kmem-s2r.txt kmem-r.txt

Supported:
  crash> kmem -r
  crash> kmem -r list
  crash> kmem -r <slab name>

Signed-off-by: Kazuhito Hagio <k-hagio at ab.jp.nec.com>
---
 defs.h    |   5 ++
 help.c    |  10 +--
 memory.c  | 219 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++------
 symbols.c |   9 +++
 4 files changed, 220 insertions(+), 23 deletions(-)

diff --git a/defs.h b/defs.h
index 6fdb478..8687ff1 100644
--- a/defs.h
+++ b/defs.h
@@ -2032,6 +2032,10 @@ struct offset_table {                    /* stash of commonly-used offsets */
 	long bpf_prog_aux_user;
 	long user_struct_uid;
 	long idr_cur;
+	long kmem_cache_memcg_params;
+	long memcg_cache_params___root_caches_node;
+	long memcg_cache_params_children;
+	long memcg_cache_params_children_node;
 };
 
 struct size_table {         /* stash of commonly-used sizes */
@@ -2438,6 +2442,7 @@ struct vm_table {                /* kernel VM-related data */
 #define PAGEFLAGS             (0x4000000)
 #define SLAB_OVERLOAD_PAGE    (0x8000000)
 #define SLAB_CPU_CACHE       (0x10000000)
+#define SLAB_ROOT_CACHES     (0x20000000)
 
 #define IS_FLATMEM()		(vt->flags & FLATMEM)
 #define IS_DISCONTIGMEM()	(vt->flags & DISCONTIGMEM)
diff --git a/help.c b/help.c
index aeeb056..ee8b999 100644
--- a/help.c
+++ b/help.c
@@ -6448,7 +6448,7 @@ char *help_kmem[] = {
 "kmem",
 "kernel memory",
 "[-f|-F|-c|-C|-i|-v|-V|-n|-z|-o|-h] [-p | -m member[,member]]\n"
-"       [[-s|-S] [slab] [-I slab[,slab]]] [-g [flags]] [[-P] address]]",
+"       [[-s|-S|-r] [slab] [-I slab[,slab]]] [-g [flags]] [[-P] address]]",
 "  This command displays information about the use of kernel memory.\n",
 "        -f  displays the contents of the system free memory headers.",
 "            also verifies that the page count equals nr_free_pages.",
@@ -6490,10 +6490,12 @@ char *help_kmem[] = {
 "            slab data for each per-cpu slab is displayed, along with the",
 "            address of each kmem_cache_node, its count of full and partial",
 "            slabs, and a list of all tracked slabs.",
-"      slab  when used with -s or -S, limits the command to only the slab cache",
-"            of name \"slab\".  If the slab argument is \"list\", then",
+"        -r  displays accumulated kmalloc() slab data of each slab_root_caches",
+"            and its children.  Available only if CONFIG_SLUB for now.",
+"      slab  when used with -s, -S or -r, limits the command to only the slab",
+"            cache of name \"slab\".  If the slab argument is \"list\", then",
 "            all slab cache names and addresses are listed.",
-"   -I slab  when used with -s or -S, one or more slab cache names in a",
+"   -I slab  when used with -s, -S or -r, one or more slab cache names in a",
 "            comma-separated list may be specified as slab caches to ignore.",
 "        -g  displays the enumerator value of all bits in the page structure's",
 "            \"flags\" field.",
diff --git a/memory.c b/memory.c
index e02ba68..1501b21 100644
--- a/memory.c
+++ b/memory.c
@@ -167,12 +167,12 @@ static int kmem_cache_downsize(void);
 static int ignore_cache(struct meminfo *, char *);
 static char *is_kmem_cache_addr(ulong, char *);
 static char *is_kmem_cache_addr_common(ulong, char *);
-static void kmem_cache_list(void);
+static void kmem_cache_list(struct meminfo *);
 static void dump_kmem_cache(struct meminfo *);
 static void dump_kmem_cache_percpu_v1(struct meminfo *);
 static void dump_kmem_cache_percpu_v2(struct meminfo *);
 static void dump_kmem_cache_slub(struct meminfo *);
-static void kmem_cache_list_common(void);
+static void kmem_cache_list_common(struct meminfo *);
 static ulong get_cpu_slab_ptr(struct meminfo *, int, ulong *);
 static unsigned int oo_order(ulong);
 static unsigned int oo_objects(ulong);
@@ -276,6 +276,8 @@ static int generic_read_dumpfile(ulonglong, void *, long, char *, ulong);
 static int generic_write_dumpfile(ulonglong, void *, long, char *, ulong);
 static int page_to_nid(ulong);
 static int get_kmem_cache_list(ulong **);
+static int get_kmem_cache_root_list(ulong **);
+static int get_kmem_cache_child_list(ulong **, ulong);
 static int get_kmem_cache_slub_data(long, struct meminfo *);
 static ulong compound_head(ulong);
 static long count_partial(ulong, struct meminfo *, ulong *);
@@ -815,6 +817,23 @@ vm_init(void)
 			"kmem_slab_s", "s_magic");
 	}
 
+	if (kernel_symbol_exists("slab_root_caches")) {
+		MEMBER_OFFSET_INIT(kmem_cache_memcg_params,
+			"kmem_cache", "memcg_params");
+		MEMBER_OFFSET_INIT(memcg_cache_params___root_caches_node,
+			"memcg_cache_params", "__root_caches_node");
+		MEMBER_OFFSET_INIT(memcg_cache_params_children,
+			"memcg_cache_params", "children");
+		MEMBER_OFFSET_INIT(memcg_cache_params_children_node,
+			"memcg_cache_params", "children_node");
+
+		if (VALID_MEMBER(kmem_cache_memcg_params)
+		    && VALID_MEMBER(memcg_cache_params___root_caches_node)
+		    && VALID_MEMBER(memcg_cache_params_children)
+		    && VALID_MEMBER(memcg_cache_params_children_node))
+			vt->flags |= SLAB_ROOT_CACHES;
+	}
+
 	if (!kt->kernel_NR_CPUS) {
 		if (enumerator_value("WORK_CPU_UNBOUND", (long *)&value1))
 			kt->kernel_NR_CPUS = (int)value1;
@@ -4713,6 +4732,7 @@ get_task_mem_usage(ulong task, struct task_mem_usage *tm)
 #define SLAB_OVERLOAD_PAGE_PTR (ADDRESS_SPECIFIED << 24)
 #define SLAB_BITFIELD          (ADDRESS_SPECIFIED << 25)
 #define SLAB_GATHER_FAILURE    (ADDRESS_SPECIFIED << 26)
+#define GET_SLAB_ROOT_CACHES   (ADDRESS_SPECIFIED << 27)
 
 #define GET_ALL \
 	(GET_SHARED_PAGES|GET_TOTALRAM_PAGES|GET_BUFFERS_PAGES|GET_SLAB_PAGES)
@@ -4724,6 +4744,7 @@ cmd_kmem(void)
 	int c;
 	int sflag, Sflag, pflag, fflag, Fflag, vflag, zflag, oflag, gflag; 
 	int nflag, cflag, Cflag, iflag, lflag, Lflag, Pflag, Vflag, hflag;
+	int rflag;
 	struct meminfo meminfo;
 	ulonglong value[MAXARGS];
 	char buf[BUFSIZE];
@@ -4733,13 +4754,13 @@ cmd_kmem(void)
 	spec_addr = 0;
         sflag =	Sflag = pflag = fflag = Fflag = Pflag = zflag = oflag = 0;
 	vflag = Cflag = cflag = iflag = nflag = lflag = Lflag = Vflag = 0;
-	gflag = hflag = 0;
+	gflag = hflag = rflag = 0;
 	escape = FALSE;
 	BZERO(&meminfo, sizeof(struct meminfo));
 	BZERO(&value[0], sizeof(ulonglong)*MAXARGS);
 	pc->curcmd_flags &= ~HEADER_PRINTED;
 
-        while ((c = getopt(argcnt, args, "gI:sSFfm:pvczCinl:L:PVoh")) != EOF) {
+        while ((c = getopt(argcnt, args, "gI:sSrFfm:pvczCinl:L:PVoh")) != EOF) {
                 switch(c)
 		{
 		case 'V':
@@ -4775,11 +4796,15 @@ cmd_kmem(void)
 			break;
 
 		case 's':
-			sflag = 1; Sflag = 0;
+			sflag = 1; Sflag = rflag = 0;
 			break;
 
 		case 'S':
-			Sflag = 1; sflag = 0;
+			Sflag = 1; sflag = rflag = 0;
+			break;
+
+		case 'r':
+			rflag = 1; sflag = Sflag = 0;
 			break;
 
 		case 'F':
@@ -4859,12 +4884,13 @@ cmd_kmem(void)
 		cmd_usage(pc->curcmd, SYNOPSIS);
 
         if ((sflag + Sflag + pflag + fflag + Fflag + Vflag + oflag +
-            vflag + Cflag + cflag + iflag + lflag + Lflag + gflag + hflag) > 1) {
+            vflag + Cflag + cflag + iflag + lflag + Lflag + gflag +
+            hflag + rflag) > 1) {
 		error(INFO, "only one flag allowed!\n");
 		cmd_usage(pc->curcmd, SYNOPSIS);
 	} 
 
-	if (sflag || Sflag || !(vt->flags & KMEM_CACHE_INIT))
+	if (sflag || Sflag || rflag || !(vt->flags & KMEM_CACHE_INIT))
 		kmem_cache_init();
 
 	while (args[optind]) {
@@ -4881,7 +4907,7 @@ cmd_kmem(void)
 				escape = TRUE;
 			} else
 				meminfo.reqname = args[optind];
-                        if (!sflag && !Sflag)
+                        if (!sflag && !Sflag && !rflag)
                                 cmd_usage(pc->curcmd, SYNOPSIS);
                 }
 
@@ -4994,7 +5020,7 @@ cmd_kmem(void)
                  * no value arguments allowed! 
                  */
                 if (zflag || nflag || iflag || Fflag || Cflag || Lflag || 
-		    Vflag || oflag || hflag) {
+		    Vflag || oflag || hflag || rflag) {
 			error(INFO, 
 			    "no address arguments allowed with this option\n");
                         cmd_usage(pc->curcmd, SYNOPSIS);
@@ -5030,9 +5056,17 @@ cmd_kmem(void)
 	if (hflag == 1) 
 		dump_hstates();
 
-	if (sflag == 1) {
+	if (sflag == 1 || rflag == 1) {
+		if (rflag) {
+			if (!((vt->flags & KMALLOC_SLUB)
+			    && (vt->flags & SLAB_ROOT_CACHES)))
+				error(FATAL,
+			    "-r option doesn't support this kernel\n");
+
+			meminfo.flags = GET_SLAB_ROOT_CACHES;
+		}
 		if (!escape && STREQ(meminfo.reqname, "list"))
-			kmem_cache_list();
+			kmem_cache_list(&meminfo);
                 else if (vt->flags & KMEM_CACHE_UNAVAIL)
                      	error(FATAL, 
 			    "kmem cache slab subsystem not available\n");
@@ -5042,7 +5076,7 @@ cmd_kmem(void)
 
 	if (Sflag == 1) {
 		if (STREQ(meminfo.reqname, "list"))
-			kmem_cache_list();
+			kmem_cache_list(&meminfo);
                 else if (vt->flags & KMEM_CACHE_UNAVAIL)
                      	error(FATAL, 
 			    "kmem cache slab subsystem not available\n");
@@ -5092,7 +5126,8 @@ cmd_kmem(void)
 
 	if (!(sflag + Sflag + pflag + fflag + Fflag + vflag + 
 	      Vflag + zflag + oflag + cflag + Cflag + iflag + 
-	      nflag + lflag + Lflag + gflag + hflag + meminfo.calls))
+	      nflag + lflag + Lflag + gflag + hflag + rflag +
+	      meminfo.calls))
 		cmd_usage(pc->curcmd, SYNOPSIS);
 
 }
@@ -9117,7 +9152,7 @@ is_kmem_cache_addr(ulong vaddr, char *kbuf)
  *  dumps all slab cache names and their addresses.
  */
 static void
-kmem_cache_list(void)
+kmem_cache_list(struct meminfo *mi)
 {
         ulong cache, cache_cache, name;
 	long next_offset, name_offset;
@@ -9132,7 +9167,7 @@ kmem_cache_list(void)
 	}
 
 	if (vt->flags & (KMALLOC_SLUB|KMALLOC_COMMON)) {
-		kmem_cache_list_common();
+		kmem_cache_list_common(mi);
 		return;		
 	}
 
@@ -13564,6 +13599,8 @@ dump_vm_table(int verbose)
 		fprintf(fp, "%sSLAB_OVERLOAD_PAGE", others++ ? "|" : "");\
 	if (vt->flags & SLAB_CPU_CACHE)
 		fprintf(fp, "%sSLAB_CPU_CACHE", others++ ? "|" : "");\
+	if (vt->flags & SLAB_ROOT_CACHES)
+		fprintf(fp, "%sSLAB_ROOT_CACHES", others++ ? "|" : "");\
 	if (vt->flags & USE_VMAP_AREA)
 		fprintf(fp, "%sUSE_VMAP_AREA", others++ ? "|" : "");\
 	if (vt->flags & CONFIG_NUMA)
@@ -18044,14 +18081,17 @@ kmem_cache_init_slub(void)
 }
 
 static void 
-kmem_cache_list_common(void)
+kmem_cache_list_common(struct meminfo *mi)
 {
         int i, cnt;
         ulong *cache_list;
         ulong name;
 	char buf[BUFSIZE];
 
-	cnt = get_kmem_cache_list(&cache_list);
+	if (mi->flags & GET_SLAB_ROOT_CACHES)
+		cnt = get_kmem_cache_root_list(&cache_list);
+	else
+		cnt = get_kmem_cache_list(&cache_list);
 
 	for (i = 0; i < cnt; i++) {
 		fprintf(fp, "%lx ", cache_list[i]);
@@ -18087,7 +18127,11 @@ dump_kmem_cache_slub(struct meminfo *si)
 	}
 
 	order = objects = 0;
-	si->cache_count = get_kmem_cache_list(&si->cache_list);
+	if (si->flags & GET_SLAB_ROOT_CACHES)
+		si->cache_count = get_kmem_cache_root_list(&si->cache_list);
+	else
+		si->cache_count = get_kmem_cache_list(&si->cache_list);
+
 	si->cache_buf = GETBUF(SIZE(kmem_cache));
 
 	if (VALID_MEMBER(page_objects) &&
@@ -18168,6 +18212,79 @@ dump_kmem_cache_slub(struct meminfo *si)
 		    !get_kmem_cache_slub_data(GET_SLUB_OBJECTS, si))
 			si->flags |= SLAB_GATHER_FAILURE;
 
+		/* accumulate children's slabinfo */
+		if (si->flags & GET_SLAB_ROOT_CACHES) {
+			struct meminfo *mi;
+			int j;
+			char buf2[BUFSIZE];
+
+			mi = (struct meminfo *)GETBUF(sizeof(struct meminfo));
+			memcpy(mi, si, sizeof(struct meminfo));
+
+			mi->cache_count = get_kmem_cache_child_list(&mi->cache_list,
+						si->cache_list[i]);
+
+			if (!mi->cache_count)
+				goto no_children;
+
+			mi->cache_buf = GETBUF(SIZE(kmem_cache));
+
+			for (j = 0; j < mi->cache_count; j++) {
+				BZERO(mi->cache_buf, SIZE(kmem_cache));
+				if (!readmem(mi->cache_list[j], KVADDR, mi->cache_buf,
+				    SIZE(kmem_cache), "kmem_cache buffer",
+				    RETURN_ON_ERROR|RETURN_PARTIAL))
+					continue;
+
+				name = ULONG(mi->cache_buf + OFFSET(kmem_cache_name));
+				if (!read_string(name, buf2, BUFSIZE-1))
+					sprintf(buf2, "(unknown)");
+
+				objsize = UINT(mi->cache_buf + OFFSET(kmem_cache_objsize));
+				size = UINT(mi->cache_buf + OFFSET(kmem_cache_size));
+				offset = UINT(mi->cache_buf + OFFSET(kmem_cache_offset));
+				if (VALID_MEMBER(kmem_cache_objects)) {
+					objects = UINT(mi->cache_buf +
+						OFFSET(kmem_cache_objects));
+					order = UINT(mi->cache_buf + OFFSET(kmem_cache_order));
+				} else if (VALID_MEMBER(kmem_cache_oo)) {
+					oo = ULONG(mi->cache_buf + OFFSET(kmem_cache_oo));
+					objects = oo_objects(oo);
+					order = oo_order(oo);
+				} else
+					error(FATAL, "cannot determine "
+						"kmem_cache objects/order values\n");
+
+				mi->cache = mi->cache_list[j];
+				mi->curname = buf2;
+				mi->objsize = objsize;
+				mi->size = size;
+				mi->objects = objects;
+				mi->slabsize = (PAGESIZE() << order);
+				mi->inuse = mi->num_slabs = 0;
+				mi->slab_offset = offset;
+				mi->random = VALID_MEMBER(kmem_cache_random) ?
+					ULONG(mi->cache_buf + OFFSET(kmem_cache_random)) : 0;
+
+				if (!get_kmem_cache_slub_data(GET_SLUB_SLABS, mi) ||
+				    !get_kmem_cache_slub_data(GET_SLUB_OBJECTS, mi)) {
+					si->flags |= SLAB_GATHER_FAILURE;
+					continue;
+				}
+
+				si->inuse += mi->inuse;
+				si->free += mi->free;
+				si->num_slabs += mi->num_slabs;
+
+				if (CRASHDEBUG(1))
+					dump_kmem_cache_info(mi);
+			}
+			FREEBUF(mi->cache_buf);
+			FREEBUF(mi->cache_list);
+no_children:
+			FREEBUF(mi);
+		}
+
 		DUMP_KMEM_CACHE_INFO();
 
 		if (si->flags & SLAB_GATHER_FAILURE) {
@@ -18964,6 +19081,70 @@ get_kmem_cache_list(ulong **cache_buf)
 	return cnt;
 }
 
+static int
+get_kmem_cache_root_list(ulong **cache_buf)
+{
+	int cnt;
+	ulong vaddr;
+	struct list_data list_data, *ld;
+
+	get_symbol_data("slab_root_caches", sizeof(void *), &vaddr);
+
+	ld = &list_data;
+	BZERO(ld, sizeof(struct list_data));
+	ld->flags |= LIST_ALLOCATE;
+	ld->start = vaddr;
+	ld->list_head_offset = OFFSET(kmem_cache_memcg_params)
+		+ OFFSET(memcg_cache_params___root_caches_node);
+	ld->end = symbol_value("slab_root_caches");
+	if (CRASHDEBUG(3))
+		ld->flags |= VERBOSE;
+
+	cnt = do_list(ld);
+	*cache_buf = ld->list_ptr;
+
+	return cnt;
+}
+
+static int
+get_kmem_cache_child_list(ulong **cache_buf, ulong root)
+{
+	int cnt;
+	ulong vaddr, children;
+	struct list_data list_data, *ld;
+
+	children = root + OFFSET(kmem_cache_memcg_params)
+			+ OFFSET(memcg_cache_params_children);
+
+	readmem(children, KVADDR, &vaddr, sizeof(ulong),
+		"kmem_cache.memcg_params.children",
+		FAULT_ON_ERROR);
+
+	/*
+	 * When no children, since there is the difference of offset
+	 * of children list between root and child, do_list returns
+	 * an incorrect cache_buf[0]. So we determine wheather it has
+	 * children or not with the value of list_head.next.
+	 */
+	if (children == vaddr)
+		return 0;
+
+	ld = &list_data;
+	BZERO(ld, sizeof(struct list_data));
+	ld->flags |= LIST_ALLOCATE;
+	ld->start = vaddr;
+	ld->list_head_offset =
+		OFFSET(kmem_cache_memcg_params)
+		+ OFFSET(memcg_cache_params_children_node);
+	ld->end = children;
+	if (CRASHDEBUG(3))
+		ld->flags |= VERBOSE;
+
+	cnt = do_list(ld);
+	*cache_buf = ld->list_ptr;
+
+	return cnt;
+}
 
 /*
  *  Get the address of the head page of a compound page.
diff --git a/symbols.c b/symbols.c
index bee60ba..2e6713a 100644
--- a/symbols.c
+++ b/symbols.c
@@ -9451,6 +9451,15 @@ dump_offset_table(char *spec, ulong makestruct)
         fprintf(fp, "              kmem_cache_flags: %ld\n",
                 OFFSET(kmem_cache_flags));
 
+	fprintf(fp, "       kmem_cache_memcg_params: %ld\n",
+		OFFSET(kmem_cache_memcg_params));
+	fprintf(fp, "memcg_cache_params___root_caches_node: %ld\n",
+		OFFSET(memcg_cache_params___root_caches_node));
+	fprintf(fp, "          memcg_cache_params_children: %ld\n",
+		OFFSET(memcg_cache_params_children));
+	fprintf(fp, "     memcg_cache_params_children_node: %ld\n",
+		OFFSET(memcg_cache_params_children_node));
+
 	fprintf(fp, "               net_device_next: %ld\n",
         	OFFSET(net_device_next));
 	fprintf(fp, "               net_device_name: %ld\n",
-- 
1.8.3.1
-------------- next part --------------
#!/bin/gawk -f

NR > 1 {
	if (!sub(/\([0-9]+:.*\)$/, "", $7)) {
		# should be root
		name[i++] = $7
		cache[$7] = $1
		objsize[$7] = $2
		ssize[$7] = $6
	}
	alloc[$7] += $3
	total[$7] += $4
	slabs[$7] += $5
}

END {
	printf("%-16s %8s  %9s  %8s  %5s  %4s  %s\n",
		"CACHE", "OBJSIZE", "ALLOCATED", "TOTAL",
		"SLABS", "SSIZE", "NAME")
	for (j = 0; j < i; j++) {
		n = name[j]
		printf("%16s %8d  %9d  %8d  %5d  %4dk  %s\n",
			cache[n], objsize[n], alloc[n],
			total[n], slabs[n], ssize[n], n)
	}
}


More information about the Crash-utility mailing list