[Crash-utility] [PATCH RFC] Add "kmem -r" to display accumulated slab statistics like /proc/slabinfo

Dave Anderson anderson at redhat.com
Thu Aug 23 20:31:26 UTC 2018



----- Original Message -----
> Nowadays, the "kmem -s" output can become very long vertically too,
> due to the memcg kmem caches.  It look like the longer a system has
> run, the longer it becomes.
> 
>   crash> kmem -s | wc -l
>   19855
> 
> On the other hand, since /proc/slabinfo accumulates the values of
> each slab_root_caches and its children, it's still short relatively.
> And I think there are many cases that support folks want to see the
> accumulated values like /proc/slabinfo from vmcore, in order to
> grasp the overview of slab activity quickly.
> 
> We can use something like the attached script to accumulate them,
> but I believe it would be more useful to implement it in crash.
> 
> This patch introduces the "kmem -r" option to imitate /proc/slabinfo,
> but it is limited to CONFIG_SLUB for now.

And it looks like it's limited to Linux 4.11 and later, correct?

Thanks,
  Dave



> 
> I tested this patch with the kmem-s2r.awk script:
> 
>   crash> kmem -s | awk -f kmem-s2r.awk > kmem-s2r.txt
>   crash> kmem -r > kmem-r.txt
> 
>   # diff -u kmem-s2r.txt kmem-r.txt
> 
> Supported:
>   crash> kmem -r
>   crash> kmem -r list
>   crash> kmem -r <slab name>
> 
> Signed-off-by: Kazuhito Hagio <k-hagio at ab.jp.nec.com>
> ---
>  defs.h    |   5 ++
>  help.c    |  10 +--
>  memory.c  | 219
>  ++++++++++++++++++++++++++++++++++++++++++++++++++++++++------
>  symbols.c |   9 +++
>  4 files changed, 220 insertions(+), 23 deletions(-)
> 
> diff --git a/defs.h b/defs.h
> index 6fdb478..8687ff1 100644
> --- a/defs.h
> +++ b/defs.h
> @@ -2032,6 +2032,10 @@ struct offset_table {                    /* stash of
> commonly-used offsets */
>  	long bpf_prog_aux_user;
>  	long user_struct_uid;
>  	long idr_cur;
> +	long kmem_cache_memcg_params;
> +	long memcg_cache_params___root_caches_node;
> +	long memcg_cache_params_children;
> +	long memcg_cache_params_children_node;
>  };
>  
>  struct size_table {         /* stash of commonly-used sizes */
> @@ -2438,6 +2442,7 @@ struct vm_table {                /* kernel VM-related
> data */
>  #define PAGEFLAGS             (0x4000000)
>  #define SLAB_OVERLOAD_PAGE    (0x8000000)
>  #define SLAB_CPU_CACHE       (0x10000000)
> +#define SLAB_ROOT_CACHES     (0x20000000)
>  
>  #define IS_FLATMEM()		(vt->flags & FLATMEM)
>  #define IS_DISCONTIGMEM()	(vt->flags & DISCONTIGMEM)
> diff --git a/help.c b/help.c
> index aeeb056..ee8b999 100644
> --- a/help.c
> +++ b/help.c
> @@ -6448,7 +6448,7 @@ char *help_kmem[] = {
>  "kmem",
>  "kernel memory",
>  "[-f|-F|-c|-C|-i|-v|-V|-n|-z|-o|-h] [-p | -m member[,member]]\n"
> -"       [[-s|-S] [slab] [-I slab[,slab]]] [-g [flags]] [[-P] address]]",
> +"       [[-s|-S|-r] [slab] [-I slab[,slab]]] [-g [flags]] [[-P] address]]",
>  "  This command displays information about the use of kernel memory.\n",
>  "        -f  displays the contents of the system free memory headers.",
>  "            also verifies that the page count equals nr_free_pages.",
> @@ -6490,10 +6490,12 @@ char *help_kmem[] = {
>  "            slab data for each per-cpu slab is displayed, along with the",
>  "            address of each kmem_cache_node, its count of full and
>  partial",
>  "            slabs, and a list of all tracked slabs.",
> -"      slab  when used with -s or -S, limits the command to only the slab
> cache",
> -"            of name \"slab\".  If the slab argument is \"list\", then",
> +"        -r  displays accumulated kmalloc() slab data of each
> slab_root_caches",
> +"            and its children.  Available only if CONFIG_SLUB for now.",
> +"      slab  when used with -s, -S or -r, limits the command to only the
> slab",
> +"            cache of name \"slab\".  If the slab argument is \"list\",
> then",
>  "            all slab cache names and addresses are listed.",
> -"   -I slab  when used with -s or -S, one or more slab cache names in a",
> +"   -I slab  when used with -s, -S or -r, one or more slab cache names in
> a",
>  "            comma-separated list may be specified as slab caches to
>  ignore.",
>  "        -g  displays the enumerator value of all bits in the page
>  structure's",
>  "            \"flags\" field.",
> diff --git a/memory.c b/memory.c
> index e02ba68..1501b21 100644
> --- a/memory.c
> +++ b/memory.c
> @@ -167,12 +167,12 @@ static int kmem_cache_downsize(void);
>  static int ignore_cache(struct meminfo *, char *);
>  static char *is_kmem_cache_addr(ulong, char *);
>  static char *is_kmem_cache_addr_common(ulong, char *);
> -static void kmem_cache_list(void);
> +static void kmem_cache_list(struct meminfo *);
>  static void dump_kmem_cache(struct meminfo *);
>  static void dump_kmem_cache_percpu_v1(struct meminfo *);
>  static void dump_kmem_cache_percpu_v2(struct meminfo *);
>  static void dump_kmem_cache_slub(struct meminfo *);
> -static void kmem_cache_list_common(void);
> +static void kmem_cache_list_common(struct meminfo *);
>  static ulong get_cpu_slab_ptr(struct meminfo *, int, ulong *);
>  static unsigned int oo_order(ulong);
>  static unsigned int oo_objects(ulong);
> @@ -276,6 +276,8 @@ static int generic_read_dumpfile(ulonglong, void *, long,
> char *, ulong);
>  static int generic_write_dumpfile(ulonglong, void *, long, char *, ulong);
>  static int page_to_nid(ulong);
>  static int get_kmem_cache_list(ulong **);
> +static int get_kmem_cache_root_list(ulong **);
> +static int get_kmem_cache_child_list(ulong **, ulong);
>  static int get_kmem_cache_slub_data(long, struct meminfo *);
>  static ulong compound_head(ulong);
>  static long count_partial(ulong, struct meminfo *, ulong *);
> @@ -815,6 +817,23 @@ vm_init(void)
>  			"kmem_slab_s", "s_magic");
>  	}
>  
> +	if (kernel_symbol_exists("slab_root_caches")) {
> +		MEMBER_OFFSET_INIT(kmem_cache_memcg_params,
> +			"kmem_cache", "memcg_params");
> +		MEMBER_OFFSET_INIT(memcg_cache_params___root_caches_node,
> +			"memcg_cache_params", "__root_caches_node");
> +		MEMBER_OFFSET_INIT(memcg_cache_params_children,
> +			"memcg_cache_params", "children");
> +		MEMBER_OFFSET_INIT(memcg_cache_params_children_node,
> +			"memcg_cache_params", "children_node");
> +
> +		if (VALID_MEMBER(kmem_cache_memcg_params)
> +		    && VALID_MEMBER(memcg_cache_params___root_caches_node)
> +		    && VALID_MEMBER(memcg_cache_params_children)
> +		    && VALID_MEMBER(memcg_cache_params_children_node))
> +			vt->flags |= SLAB_ROOT_CACHES;
> +	}
> +
>  	if (!kt->kernel_NR_CPUS) {
>  		if (enumerator_value("WORK_CPU_UNBOUND", (long *)&value1))
>  			kt->kernel_NR_CPUS = (int)value1;
> @@ -4713,6 +4732,7 @@ get_task_mem_usage(ulong task, struct task_mem_usage
> *tm)
>  #define SLAB_OVERLOAD_PAGE_PTR (ADDRESS_SPECIFIED << 24)
>  #define SLAB_BITFIELD          (ADDRESS_SPECIFIED << 25)
>  #define SLAB_GATHER_FAILURE    (ADDRESS_SPECIFIED << 26)
> +#define GET_SLAB_ROOT_CACHES   (ADDRESS_SPECIFIED << 27)
>  
>  #define GET_ALL \
>  	(GET_SHARED_PAGES|GET_TOTALRAM_PAGES|GET_BUFFERS_PAGES|GET_SLAB_PAGES)
> @@ -4724,6 +4744,7 @@ cmd_kmem(void)
>  	int c;
>  	int sflag, Sflag, pflag, fflag, Fflag, vflag, zflag, oflag, gflag;
>  	int nflag, cflag, Cflag, iflag, lflag, Lflag, Pflag, Vflag, hflag;
> +	int rflag;
>  	struct meminfo meminfo;
>  	ulonglong value[MAXARGS];
>  	char buf[BUFSIZE];
> @@ -4733,13 +4754,13 @@ cmd_kmem(void)
>  	spec_addr = 0;
>          sflag =	Sflag = pflag = fflag = Fflag = Pflag = zflag = oflag = 0;
>  	vflag = Cflag = cflag = iflag = nflag = lflag = Lflag = Vflag = 0;
> -	gflag = hflag = 0;
> +	gflag = hflag = rflag = 0;
>  	escape = FALSE;
>  	BZERO(&meminfo, sizeof(struct meminfo));
>  	BZERO(&value[0], sizeof(ulonglong)*MAXARGS);
>  	pc->curcmd_flags &= ~HEADER_PRINTED;
>  
> -        while ((c = getopt(argcnt, args, "gI:sSFfm:pvczCinl:L:PVoh")) !=
> EOF) {
> +        while ((c = getopt(argcnt, args, "gI:sSrFfm:pvczCinl:L:PVoh")) !=
> EOF) {
>                  switch(c)
>  		{
>  		case 'V':
> @@ -4775,11 +4796,15 @@ cmd_kmem(void)
>  			break;
>  
>  		case 's':
> -			sflag = 1; Sflag = 0;
> +			sflag = 1; Sflag = rflag = 0;
>  			break;
>  
>  		case 'S':
> -			Sflag = 1; sflag = 0;
> +			Sflag = 1; sflag = rflag = 0;
> +			break;
> +
> +		case 'r':
> +			rflag = 1; sflag = Sflag = 0;
>  			break;
>  
>  		case 'F':
> @@ -4859,12 +4884,13 @@ cmd_kmem(void)
>  		cmd_usage(pc->curcmd, SYNOPSIS);
>  
>          if ((sflag + Sflag + pflag + fflag + Fflag + Vflag + oflag +
> -            vflag + Cflag + cflag + iflag + lflag + Lflag + gflag + hflag) >
> 1) {
> +            vflag + Cflag + cflag + iflag + lflag + Lflag + gflag +
> +            hflag + rflag) > 1) {
>  		error(INFO, "only one flag allowed!\n");
>  		cmd_usage(pc->curcmd, SYNOPSIS);
>  	}
>  
> -	if (sflag || Sflag || !(vt->flags & KMEM_CACHE_INIT))
> +	if (sflag || Sflag || rflag || !(vt->flags & KMEM_CACHE_INIT))
>  		kmem_cache_init();
>  
>  	while (args[optind]) {
> @@ -4881,7 +4907,7 @@ cmd_kmem(void)
>  				escape = TRUE;
>  			} else
>  				meminfo.reqname = args[optind];
> -                        if (!sflag && !Sflag)
> +                        if (!sflag && !Sflag && !rflag)
>                                  cmd_usage(pc->curcmd, SYNOPSIS);
>                  }
>  
> @@ -4994,7 +5020,7 @@ cmd_kmem(void)
>                   * no value arguments allowed!
>                   */
>                  if (zflag || nflag || iflag || Fflag || Cflag || Lflag ||
> -		    Vflag || oflag || hflag) {
> +		    Vflag || oflag || hflag || rflag) {
>  			error(INFO,
>  			    "no address arguments allowed with this option\n");
>                          cmd_usage(pc->curcmd, SYNOPSIS);
> @@ -5030,9 +5056,17 @@ cmd_kmem(void)
>  	if (hflag == 1)
>  		dump_hstates();
>  
> -	if (sflag == 1) {
> +	if (sflag == 1 || rflag == 1) {
> +		if (rflag) {
> +			if (!((vt->flags & KMALLOC_SLUB)
> +			    && (vt->flags & SLAB_ROOT_CACHES)))
> +				error(FATAL,
> +			    "-r option doesn't support this kernel\n");
> +
> +			meminfo.flags = GET_SLAB_ROOT_CACHES;
> +		}
>  		if (!escape && STREQ(meminfo.reqname, "list"))
> -			kmem_cache_list();
> +			kmem_cache_list(&meminfo);
>                  else if (vt->flags & KMEM_CACHE_UNAVAIL)
>                       	error(FATAL,
>  			    "kmem cache slab subsystem not available\n");
> @@ -5042,7 +5076,7 @@ cmd_kmem(void)
>  
>  	if (Sflag == 1) {
>  		if (STREQ(meminfo.reqname, "list"))
> -			kmem_cache_list();
> +			kmem_cache_list(&meminfo);
>                  else if (vt->flags & KMEM_CACHE_UNAVAIL)
>                       	error(FATAL,
>  			    "kmem cache slab subsystem not available\n");
> @@ -5092,7 +5126,8 @@ cmd_kmem(void)
>  
>  	if (!(sflag + Sflag + pflag + fflag + Fflag + vflag +
>  	      Vflag + zflag + oflag + cflag + Cflag + iflag +
> -	      nflag + lflag + Lflag + gflag + hflag + meminfo.calls))
> +	      nflag + lflag + Lflag + gflag + hflag + rflag +
> +	      meminfo.calls))
>  		cmd_usage(pc->curcmd, SYNOPSIS);
>  
>  }
> @@ -9117,7 +9152,7 @@ is_kmem_cache_addr(ulong vaddr, char *kbuf)
>   *  dumps all slab cache names and their addresses.
>   */
>  static void
> -kmem_cache_list(void)
> +kmem_cache_list(struct meminfo *mi)
>  {
>          ulong cache, cache_cache, name;
>  	long next_offset, name_offset;
> @@ -9132,7 +9167,7 @@ kmem_cache_list(void)
>  	}
>  
>  	if (vt->flags & (KMALLOC_SLUB|KMALLOC_COMMON)) {
> -		kmem_cache_list_common();
> +		kmem_cache_list_common(mi);
>  		return;
>  	}
>  
> @@ -13564,6 +13599,8 @@ dump_vm_table(int verbose)
>  		fprintf(fp, "%sSLAB_OVERLOAD_PAGE", others++ ? "|" : "");\
>  	if (vt->flags & SLAB_CPU_CACHE)
>  		fprintf(fp, "%sSLAB_CPU_CACHE", others++ ? "|" : "");\
> +	if (vt->flags & SLAB_ROOT_CACHES)
> +		fprintf(fp, "%sSLAB_ROOT_CACHES", others++ ? "|" : "");\
>  	if (vt->flags & USE_VMAP_AREA)
>  		fprintf(fp, "%sUSE_VMAP_AREA", others++ ? "|" : "");\
>  	if (vt->flags & CONFIG_NUMA)
> @@ -18044,14 +18081,17 @@ kmem_cache_init_slub(void)
>  }
>  
>  static void
> -kmem_cache_list_common(void)
> +kmem_cache_list_common(struct meminfo *mi)
>  {
>          int i, cnt;
>          ulong *cache_list;
>          ulong name;
>  	char buf[BUFSIZE];
>  
> -	cnt = get_kmem_cache_list(&cache_list);
> +	if (mi->flags & GET_SLAB_ROOT_CACHES)
> +		cnt = get_kmem_cache_root_list(&cache_list);
> +	else
> +		cnt = get_kmem_cache_list(&cache_list);
>  
>  	for (i = 0; i < cnt; i++) {
>  		fprintf(fp, "%lx ", cache_list[i]);
> @@ -18087,7 +18127,11 @@ dump_kmem_cache_slub(struct meminfo *si)
>  	}
>  
>  	order = objects = 0;
> -	si->cache_count = get_kmem_cache_list(&si->cache_list);
> +	if (si->flags & GET_SLAB_ROOT_CACHES)
> +		si->cache_count = get_kmem_cache_root_list(&si->cache_list);
> +	else
> +		si->cache_count = get_kmem_cache_list(&si->cache_list);
> +
>  	si->cache_buf = GETBUF(SIZE(kmem_cache));
>  
>  	if (VALID_MEMBER(page_objects) &&
> @@ -18168,6 +18212,79 @@ dump_kmem_cache_slub(struct meminfo *si)
>  		    !get_kmem_cache_slub_data(GET_SLUB_OBJECTS, si))
>  			si->flags |= SLAB_GATHER_FAILURE;
>  
> +		/* accumulate children's slabinfo */
> +		if (si->flags & GET_SLAB_ROOT_CACHES) {
> +			struct meminfo *mi;
> +			int j;
> +			char buf2[BUFSIZE];
> +
> +			mi = (struct meminfo *)GETBUF(sizeof(struct meminfo));
> +			memcpy(mi, si, sizeof(struct meminfo));
> +
> +			mi->cache_count = get_kmem_cache_child_list(&mi->cache_list,
> +						si->cache_list[i]);
> +
> +			if (!mi->cache_count)
> +				goto no_children;
> +
> +			mi->cache_buf = GETBUF(SIZE(kmem_cache));
> +
> +			for (j = 0; j < mi->cache_count; j++) {
> +				BZERO(mi->cache_buf, SIZE(kmem_cache));
> +				if (!readmem(mi->cache_list[j], KVADDR, mi->cache_buf,
> +				    SIZE(kmem_cache), "kmem_cache buffer",
> +				    RETURN_ON_ERROR|RETURN_PARTIAL))
> +					continue;
> +
> +				name = ULONG(mi->cache_buf + OFFSET(kmem_cache_name));
> +				if (!read_string(name, buf2, BUFSIZE-1))
> +					sprintf(buf2, "(unknown)");
> +
> +				objsize = UINT(mi->cache_buf + OFFSET(kmem_cache_objsize));
> +				size = UINT(mi->cache_buf + OFFSET(kmem_cache_size));
> +				offset = UINT(mi->cache_buf + OFFSET(kmem_cache_offset));
> +				if (VALID_MEMBER(kmem_cache_objects)) {
> +					objects = UINT(mi->cache_buf +
> +						OFFSET(kmem_cache_objects));
> +					order = UINT(mi->cache_buf + OFFSET(kmem_cache_order));
> +				} else if (VALID_MEMBER(kmem_cache_oo)) {
> +					oo = ULONG(mi->cache_buf + OFFSET(kmem_cache_oo));
> +					objects = oo_objects(oo);
> +					order = oo_order(oo);
> +				} else
> +					error(FATAL, "cannot determine "
> +						"kmem_cache objects/order values\n");
> +
> +				mi->cache = mi->cache_list[j];
> +				mi->curname = buf2;
> +				mi->objsize = objsize;
> +				mi->size = size;
> +				mi->objects = objects;
> +				mi->slabsize = (PAGESIZE() << order);
> +				mi->inuse = mi->num_slabs = 0;
> +				mi->slab_offset = offset;
> +				mi->random = VALID_MEMBER(kmem_cache_random) ?
> +					ULONG(mi->cache_buf + OFFSET(kmem_cache_random)) : 0;
> +
> +				if (!get_kmem_cache_slub_data(GET_SLUB_SLABS, mi) ||
> +				    !get_kmem_cache_slub_data(GET_SLUB_OBJECTS, mi)) {
> +					si->flags |= SLAB_GATHER_FAILURE;
> +					continue;
> +				}
> +
> +				si->inuse += mi->inuse;
> +				si->free += mi->free;
> +				si->num_slabs += mi->num_slabs;
> +
> +				if (CRASHDEBUG(1))
> +					dump_kmem_cache_info(mi);
> +			}
> +			FREEBUF(mi->cache_buf);
> +			FREEBUF(mi->cache_list);
> +no_children:
> +			FREEBUF(mi);
> +		}
> +
>  		DUMP_KMEM_CACHE_INFO();
>  
>  		if (si->flags & SLAB_GATHER_FAILURE) {
> @@ -18964,6 +19081,70 @@ get_kmem_cache_list(ulong **cache_buf)
>  	return cnt;
>  }
>  
> +static int
> +get_kmem_cache_root_list(ulong **cache_buf)
> +{
> +	int cnt;
> +	ulong vaddr;
> +	struct list_data list_data, *ld;
> +
> +	get_symbol_data("slab_root_caches", sizeof(void *), &vaddr);
> +
> +	ld = &list_data;
> +	BZERO(ld, sizeof(struct list_data));
> +	ld->flags |= LIST_ALLOCATE;
> +	ld->start = vaddr;
> +	ld->list_head_offset = OFFSET(kmem_cache_memcg_params)
> +		+ OFFSET(memcg_cache_params___root_caches_node);
> +	ld->end = symbol_value("slab_root_caches");
> +	if (CRASHDEBUG(3))
> +		ld->flags |= VERBOSE;
> +
> +	cnt = do_list(ld);
> +	*cache_buf = ld->list_ptr;
> +
> +	return cnt;
> +}
> +
> +static int
> +get_kmem_cache_child_list(ulong **cache_buf, ulong root)
> +{
> +	int cnt;
> +	ulong vaddr, children;
> +	struct list_data list_data, *ld;
> +
> +	children = root + OFFSET(kmem_cache_memcg_params)
> +			+ OFFSET(memcg_cache_params_children);
> +
> +	readmem(children, KVADDR, &vaddr, sizeof(ulong),
> +		"kmem_cache.memcg_params.children",
> +		FAULT_ON_ERROR);
> +
> +	/*
> +	 * When no children, since there is the difference of offset
> +	 * of children list between root and child, do_list returns
> +	 * an incorrect cache_buf[0]. So we determine wheather it has
> +	 * children or not with the value of list_head.next.
> +	 */
> +	if (children == vaddr)
> +		return 0;
> +
> +	ld = &list_data;
> +	BZERO(ld, sizeof(struct list_data));
> +	ld->flags |= LIST_ALLOCATE;
> +	ld->start = vaddr;
> +	ld->list_head_offset =
> +		OFFSET(kmem_cache_memcg_params)
> +		+ OFFSET(memcg_cache_params_children_node);
> +	ld->end = children;
> +	if (CRASHDEBUG(3))
> +		ld->flags |= VERBOSE;
> +
> +	cnt = do_list(ld);
> +	*cache_buf = ld->list_ptr;
> +
> +	return cnt;
> +}
>  
>  /*
>   *  Get the address of the head page of a compound page.
> diff --git a/symbols.c b/symbols.c
> index bee60ba..2e6713a 100644
> --- a/symbols.c
> +++ b/symbols.c
> @@ -9451,6 +9451,15 @@ dump_offset_table(char *spec, ulong makestruct)
>          fprintf(fp, "              kmem_cache_flags: %ld\n",
>                  OFFSET(kmem_cache_flags));
>  
> +	fprintf(fp, "       kmem_cache_memcg_params: %ld\n",
> +		OFFSET(kmem_cache_memcg_params));
> +	fprintf(fp, "memcg_cache_params___root_caches_node: %ld\n",
> +		OFFSET(memcg_cache_params___root_caches_node));
> +	fprintf(fp, "          memcg_cache_params_children: %ld\n",
> +		OFFSET(memcg_cache_params_children));
> +	fprintf(fp, "     memcg_cache_params_children_node: %ld\n",
> +		OFFSET(memcg_cache_params_children_node));
> +
>  	fprintf(fp, "               net_device_next: %ld\n",
>          	OFFSET(net_device_next));
>  	fprintf(fp, "               net_device_name: %ld\n",
> --
> 1.8.3.1
> 
> --
> Crash-utility mailing list
> Crash-utility at redhat.com
> https://www.redhat.com/mailman/listinfo/crash-utility




More information about the Crash-utility mailing list