[Crash-utility] [PATCH 1/2] Support for "dev -d|-D" options by parsing bitmap in blk-mq layer

Lianbo Jiang lijiang at redhat.com
Wed Apr 27 03:37:03 UTC 2022


Currently, crash doesn't support to display disk I/O statistics
for blk-mq devices. For more details, please refer to the following
commit: <98b417fc6346> ("Handle blk_mq_ctx member changes for kernels
5.16-rc1 and later").

Lets parse the bitmap in blk-mq layer to achieve it.

Signed-off-by: Lianbo Jiang <lijiang at redhat.com>
---
 defs.h    |  13 +++
 dev.c     | 302 +++++++++++++++++++++++++++++++++++++++++++++++-------
 symbols.c |  26 +++++
 3 files changed, 302 insertions(+), 39 deletions(-)

diff --git a/defs.h b/defs.h
index 81ac0498dac7..f3c05fb44e62 100644
--- a/defs.h
+++ b/defs.h
@@ -2168,6 +2168,18 @@ struct offset_table {                    /* stash of commonly-used offsets */
 	long sbitmap_queue_min_shallow_depth;
 	long sbq_wait_state_wait_cnt;
 	long sbq_wait_state_wait;
+	long request_q;
+	long request_cmd_flags;
+	long request_queue_queue_hw_ctx;
+	long request_queue_nr_hw_queues;
+	long blk_mq_hw_ctx_tags;
+	long blk_mq_hw_ctx_sched_tags;
+	long blk_mq_tags_bitmap_tags;
+	long blk_mq_tags_breserved_tags;
+	long blk_mq_tags_nr_reserved_tags;
+	long blk_mq_tags_nr_tags;
+	long blk_mq_tags_rqs;
+	long blk_mq_tags_static_rqs;
 };
 
 struct size_table {         /* stash of commonly-used sizes */
@@ -2337,6 +2349,7 @@ struct size_table {         /* stash of commonly-used sizes */
 	long sbitmap;
 	long sbitmap_queue;
 	long sbq_wait_state;
+	long blk_mq_tags;
 };
 
 struct array_table {
diff --git a/dev.c b/dev.c
index a493e51ac95c..4d574f06494f 100644
--- a/dev.c
+++ b/dev.c
@@ -4238,19 +4238,224 @@ get_one_mctx_diskio(unsigned long mctx, struct diskio *io)
 	io->write = (dispatch[1] - comp[1]);
 }
 
+struct blk_mq_tags_context {
+	uint nr_tags;
+	uint nr_reserved_tags;
+	ulong bitmap_tags;
+	ulong breserved_tags;
+	ulong rqs;
+	ulong static_rqs;
+};
+
+static void load_blk_mq_tags_context(ulong addr, struct blk_mq_tags_context *bmtc)
+{
+	char *tag_buf = NULL;
+
+	tag_buf = GETBUF(SIZE(blk_mq_tags));
+	if (!tag_buf)
+		error(FATAL, "fail to get memory for blk_mq_tags\n");
+
+	if (!readmem(addr, KVADDR, tag_buf, SIZE(blk_mq_tags), "blk_mq_tags", RETURN_ON_ERROR)) {
+		FREEBUF(tag_buf);
+		error(FATAL, "cannot read blk_mq_tags\n");
+	}
+
+	bmtc->nr_tags = UINT(tag_buf + OFFSET(blk_mq_tags_nr_tags));
+	bmtc->nr_reserved_tags = UINT(tag_buf + OFFSET(blk_mq_tags_nr_reserved_tags));
+	bmtc->bitmap_tags = addr + OFFSET(blk_mq_tags_bitmap_tags);
+	bmtc->breserved_tags = addr + OFFSET(blk_mq_tags_breserved_tags);
+	bmtc->rqs = ULONG(tag_buf + OFFSET(blk_mq_tags_rqs));
+	bmtc->static_rqs = ULONG(tag_buf + OFFSET(blk_mq_tags_static_rqs));
+
+	FREEBUF(tag_buf);
+}
+
+static void load_blk_mq_rqs(ulong rqs_addr, uint counts, ulong **rqs)
+{
+	char *rqs_buf = NULL;
+	uint rqs_buf_size = sizeof(void *) * counts;
+
+	if (!IS_KVADDR(rqs_addr)) {
+		*rqs = NULL;
+		return;
+	}
+
+	rqs_buf = GETBUF(rqs_buf_size);
+	if (!rqs_buf)
+		error(FATAL, "fail to get memory for the rqs buf\n");
+
+	if (!readmem(rqs_addr, KVADDR, rqs_buf, rqs_buf_size,
+			"blk_mq_tags.[static_]rqs", RETURN_ON_ERROR)) {
+		FREEBUF(rqs_buf);
+		error(FATAL, "fail to load blk_mq_tags.[static_]rqs\n");
+	}
+
+	*rqs = (ulong*)rqs_buf;
+}
+
+static uint op_is_write(uint op)
+{
+#define REQ_OP_BITS     8
+#define REQ_OP_MASK     ((1 << REQ_OP_BITS) - 1)
+
+	return (op & REQ_OP_MASK) & 1;
+}
+
+static void find_mq_diskio_by_parsing_bitmap(ulong q, ulong tags, uint offset, ulong *rqs, struct diskio **io_counts)
+{
+	int i,j;
+	char *sbitmap_word_buf = NULL;
+	ulong depth = 0, word = 0, cleared = 0;
+	struct sbitmap_context sc = {0};
+	uint idx = offset;
+	ulong addr = tags + OFFSET(sbitmap_queue_sb);
+	ulong sbitmap_word_size = SIZE(sbitmap_word);
+
+	sbitmap_word_buf = GETBUF(sbitmap_word_size);
+	if (!sbitmap_word_buf)
+		error(FATAL, "fail to get memory for the sbitmap word\n");
+
+	sbitmap_context_load(addr, &sc);
+	if (sc.map_nr == 0 || !IS_KVADDR(sc.map_addr))
+		goto sbitmap_word_buf_free;
+
+	addr = sc.map_addr;
+	for (i = 0; i < sc.map_nr; i++, addr += sbitmap_word_size) {
+		if (!readmem(addr, KVADDR, sbitmap_word_buf, sbitmap_word_size,
+				"sbitmap_word", RETURN_ON_ERROR)) {
+			error(INFO, "cannot read sbitmap_word\n");
+			goto sbitmap_word_buf_free;
+		}
+
+		word = ULONG(sbitmap_word_buf + OFFSET(sbitmap_word_word));
+		cleared = ULONG(sbitmap_word_buf + OFFSET(sbitmap_word_cleared));
+		word = word & ~cleared;
+		if (!word)
+			continue;
+
+		if (i == sc.map_nr - 1)
+			depth = sc.depth - (i << sc.shift);
+		else
+			depth = 1U << sc.shift;
+
+		for (j = 0; j < depth; j++) {
+			if ((word & 1) && (rqs[idx])) {
+				ulong queue = 0;
+				uint cmd_flags = 0;
+
+				if (!readmem(rqs[idx] + OFFSET(request_q), KVADDR, &queue,
+						sizeof(ulong), "request.q", RETURN_ON_ERROR))
+					goto next;
+
+				if (!readmem(rqs[idx] + OFFSET(request_cmd_flags), KVADDR, &cmd_flags,
+						sizeof(uint), "request.cmd_flags", RETURN_ON_ERROR))
+					goto next;
+
+				if (q == queue) {
+					if (op_is_write(cmd_flags))
+						(*io_counts)->write++;
+					else
+						(*io_counts)->read++;
+				}
+
+			}
+			next:
+				idx++;
+				word >>= 1;
+		}
+	}
+
+sbitmap_word_buf_free:
+	FREEBUF(sbitmap_word_buf);
+}
+
+static void get_mq_diskio_from_hw_queues(unsigned long q, struct diskio *io_counts)
+{
+	unsigned long *queue_hw_ctx = NULL;
+	unsigned long addr = 0;
+	unsigned int i, nr_hw_queues;
+
+	addr = q + OFFSET(request_queue_nr_hw_queues);
+	readmem(addr, KVADDR, &nr_hw_queues, sizeof(uint),
+		"request_queue.nr_hw_queues", FAULT_ON_ERROR);
+
+	queue_hw_ctx = (ulong *)GETBUF(sizeof(void *) * nr_hw_queues);
+	if (!queue_hw_ctx)
+		error(FATAL, "fail to get memory for the queue_hw_ctx\n");
+
+	addr = q + OFFSET(request_queue_queue_hw_ctx);
+	if (!readmem(addr, KVADDR, queue_hw_ctx, sizeof(void *) * nr_hw_queues,
+		     "request_queue.queue_hw_ctx", RETURN_ON_ERROR))
+		goto queue_hw_ctx_free;
+
+	for (i = 0; i < nr_hw_queues; i++) {
+		ulong tags = 0, sched_tags = 0, queue_hw_ctx_addr = 0;
+		unsigned long *rqs = NULL, *static_rqs = NULL;
+		struct blk_mq_tags_context tags_ctx = {0};
+		struct blk_mq_tags_context sched_tags_ctx = {0};
+
+		if(!IS_KVADDR(queue_hw_ctx[i]))
+			continue;
+
+		if (!readmem(queue_hw_ctx[i], KVADDR, &queue_hw_ctx_addr,
+				sizeof(ulong), "blk_mq_hw_ctx", RETURN_ON_ERROR))
+			goto queue_hw_ctx_free;
+
+		if(!IS_KVADDR(queue_hw_ctx_addr))
+			continue;
+
+		addr = queue_hw_ctx_addr + OFFSET(blk_mq_hw_ctx_tags);
+		if (!readmem(addr, KVADDR, &tags, sizeof(ulong),
+				"blk_mq_hw_ctx.tags", RETURN_ON_ERROR))
+			goto queue_hw_ctx_free;
+
+		addr = queue_hw_ctx_addr + OFFSET(blk_mq_hw_ctx_sched_tags);
+		if (!readmem(addr, KVADDR, &sched_tags, sizeof(ulong),
+				"blk_mq_hw_ctx.sched_tags", RETURN_ON_ERROR))
+			goto queue_hw_ctx_free;
+
+		if (IS_KVADDR(tags)) {
+			load_blk_mq_tags_context(tags, &tags_ctx);
+			load_blk_mq_rqs(tags_ctx.rqs, tags_ctx.nr_tags, &rqs);
+			if (!rqs)
+				goto next;
+			find_mq_diskio_by_parsing_bitmap(q, tags_ctx.breserved_tags, 0, rqs, &io_counts);
+			find_mq_diskio_by_parsing_bitmap(q, tags_ctx.bitmap_tags, tags_ctx.nr_reserved_tags, rqs, &io_counts);
+			FREEBUF(rqs);
+		}
+
+	next:
+		if (IS_KVADDR(sched_tags)) {
+			load_blk_mq_tags_context(sched_tags, &sched_tags_ctx);
+			load_blk_mq_rqs(sched_tags_ctx.static_rqs, sched_tags_ctx.nr_tags, &static_rqs);
+			if (!static_rqs)
+				continue;
+			find_mq_diskio_by_parsing_bitmap(q, sched_tags_ctx.breserved_tags, 0, static_rqs, &io_counts);
+			find_mq_diskio_by_parsing_bitmap(q, sched_tags_ctx.bitmap_tags, sched_tags_ctx.nr_reserved_tags,
+							static_rqs, &io_counts);
+			FREEBUF(static_rqs);
+		}
+	}
+
+queue_hw_ctx_free:
+	FREEBUF(queue_hw_ctx);
+}
+
 static void
 get_mq_diskio(unsigned long q, unsigned long *mq_count)
 {
 	int cpu;
 	unsigned long queue_ctx;
 	unsigned long mctx_addr;
-	struct diskio tmp;
+	struct diskio tmp = {0};
 
 	if (INVALID_MEMBER(blk_mq_ctx_rq_dispatched) ||
-	    INVALID_MEMBER(blk_mq_ctx_rq_completed))
+	    INVALID_MEMBER(blk_mq_ctx_rq_completed)) {
+		get_mq_diskio_from_hw_queues(q, &tmp);
+		mq_count[0] = tmp.read;
+		mq_count[1] = tmp.write;
 		return;
-
-	memset(&tmp, 0x00, sizeof(struct diskio));
+	}
 
 	readmem(q + OFFSET(request_queue_queue_ctx), KVADDR, &queue_ctx,
 		sizeof(ulong), "request_queue.queue_ctx",
@@ -4479,41 +4684,24 @@ display_one_diskio(struct iter *i, unsigned long gendisk, ulong flags)
 		&& (io.read + io.write == 0))
 		return;
 
-	if (use_mq_interface(queue_addr) &&
-	    (INVALID_MEMBER(blk_mq_ctx_rq_dispatched) ||
-	     INVALID_MEMBER(blk_mq_ctx_rq_completed)))
-		fprintf(fp, "%s%s%s  %s%s%s%s  %s%s%s",
-			mkstring(buf0, 5, RJUST|INT_DEC, (char *)(unsigned long)major),
-			space(MINSPACE),
-			mkstring(buf1, VADDR_PRLEN, LJUST|LONG_HEX, (char *)gendisk),
-			space(MINSPACE),
-			mkstring(buf2, 10, LJUST, disk_name),
-			space(MINSPACE),
-			mkstring(buf3, VADDR_PRLEN <= 11 ? 11 : VADDR_PRLEN,
-				 LJUST|LONG_HEX, (char *)queue_addr),
-			space(MINSPACE),
-			mkstring(buf4, 17, RJUST, "(not supported)"),
-			space(MINSPACE));
-
-	else
-		fprintf(fp, "%s%s%s  %s%s%s%s  %s%5d%s%s%s%s%s",
-			mkstring(buf0, 5, RJUST|INT_DEC, (char *)(unsigned long)major),
-			space(MINSPACE),
-			mkstring(buf1, VADDR_PRLEN, LJUST|LONG_HEX, (char *)gendisk),
-			space(MINSPACE),
-			mkstring(buf2, 10, LJUST, disk_name),
-			space(MINSPACE),
-			mkstring(buf3, VADDR_PRLEN <= 11 ? 11 : VADDR_PRLEN,
-				 LJUST|LONG_HEX, (char *)queue_addr),
-			space(MINSPACE),
-			io.read + io.write,
-			space(MINSPACE),
-			mkstring(buf4, 5, RJUST|INT_DEC,
-				(char *)(unsigned long)io.read),
-			space(MINSPACE),
-			mkstring(buf5, 5, RJUST|INT_DEC,
-				(char *)(unsigned long)io.write),
-			space(MINSPACE));
+	fprintf(fp, "%s%s%s  %s%s%s%s  %s%5d%s%s%s%s%s",
+		mkstring(buf0, 5, RJUST|INT_DEC, (char *)(unsigned long)major),
+		space(MINSPACE),
+		mkstring(buf1, VADDR_PRLEN, LJUST|LONG_HEX, (char *)gendisk),
+		space(MINSPACE),
+		mkstring(buf2, 10, LJUST, disk_name),
+		space(MINSPACE),
+		mkstring(buf3, VADDR_PRLEN <= 11 ? 11 : VADDR_PRLEN,
+			 LJUST|LONG_HEX, (char *)queue_addr),
+		space(MINSPACE),
+		io.read + io.write,
+		space(MINSPACE),
+		mkstring(buf4, 5, RJUST|INT_DEC,
+			(char *)(unsigned long)io.read),
+		space(MINSPACE),
+		mkstring(buf5, 5, RJUST|INT_DEC,
+			(char *)(unsigned long)io.write),
+		space(MINSPACE));
 
 	if (VALID_MEMBER(request_queue_in_flight)) {
 		if (!use_mq_interface(queue_addr)) {
@@ -4603,15 +4791,51 @@ void diskio_init(void)
 		MEMBER_OFFSET_INIT(request_queue_rq, "request_queue", "rq");
 	else
 		MEMBER_OFFSET_INIT(request_queue_rq, "request_queue", "root_rl");
+	if (MEMBER_EXISTS("request", "q"))
+		MEMBER_OFFSET_INIT(request_q, "request", "q");
+	if (MEMBER_EXISTS("request", "cmd_flags"))
+		MEMBER_OFFSET_INIT(request_cmd_flags, "request", "cmd_flags");
 	if (MEMBER_EXISTS("request_queue", "mq_ops")) {
 		MEMBER_OFFSET_INIT(request_queue_mq_ops, "request_queue",
 			"mq_ops");
 		ANON_MEMBER_OFFSET_INIT(request_queue_queue_ctx,
 			"request_queue", "queue_ctx");
+		MEMBER_OFFSET_INIT(request_queue_queue_hw_ctx,
+			"request_queue", "queue_hw_ctx");
+		MEMBER_OFFSET_INIT(request_queue_nr_hw_queues,
+			"request_queue", "nr_hw_queues");
 		MEMBER_OFFSET_INIT(blk_mq_ctx_rq_dispatched, "blk_mq_ctx",
 			"rq_dispatched");
 		MEMBER_OFFSET_INIT(blk_mq_ctx_rq_completed, "blk_mq_ctx",
 			"rq_completed");
+		MEMBER_OFFSET_INIT(blk_mq_hw_ctx_tags, "blk_mq_hw_ctx",
+			"tags");
+		MEMBER_OFFSET_INIT(blk_mq_hw_ctx_sched_tags, "blk_mq_hw_ctx",
+			"sched_tags");
+		MEMBER_OFFSET_INIT(blk_mq_tags_bitmap_tags, "blk_mq_tags",
+			"bitmap_tags");
+		MEMBER_OFFSET_INIT(blk_mq_tags_breserved_tags, "blk_mq_tags",
+			"breserved_tags");
+		MEMBER_OFFSET_INIT(blk_mq_tags_nr_reserved_tags, "blk_mq_tags",
+			"nr_reserved_tags");
+		MEMBER_OFFSET_INIT(blk_mq_tags_nr_tags, "blk_mq_tags",
+			"nr_tags");
+		MEMBER_OFFSET_INIT(blk_mq_tags_rqs, "blk_mq_tags",
+			"rqs");
+		MEMBER_OFFSET_INIT(blk_mq_tags_static_rqs, "blk_mq_tags",
+			"static_rqs");
+		STRUCT_SIZE_INIT(blk_mq_tags, "blk_mq_tags");
+		STRUCT_SIZE_INIT(sbitmap, "sbitmap");
+		STRUCT_SIZE_INIT(sbitmap_word, "sbitmap_word");
+		MEMBER_OFFSET_INIT(sbitmap_word_depth, "sbitmap_word", "depth");
+		MEMBER_OFFSET_INIT(sbitmap_word_word, "sbitmap_word", "word");
+		MEMBER_OFFSET_INIT(sbitmap_word_cleared, "sbitmap_word", "cleared");
+		MEMBER_OFFSET_INIT(sbitmap_depth, "sbitmap", "depth");
+		MEMBER_OFFSET_INIT(sbitmap_shift, "sbitmap", "shift");
+		MEMBER_OFFSET_INIT(sbitmap_map_nr, "sbitmap", "map_nr");
+		MEMBER_OFFSET_INIT(sbitmap_map, "sbitmap", "map");
+		MEMBER_OFFSET_INIT(sbitmap_queue_sb, "sbitmap_queue", "sb");
+
 	}
 	MEMBER_OFFSET_INIT(subsys_private_klist_devices, "subsys_private",
 		"klist_devices");
diff --git a/symbols.c b/symbols.c
index ba5e2741347d..0612255b6e34 100644
--- a/symbols.c
+++ b/symbols.c
@@ -10385,6 +10385,10 @@ dump_offset_table(char *spec, ulong makestruct)
 		OFFSET(kset_list));
 	fprintf(fp, "            request_list_count: %ld\n",
 		OFFSET(request_list_count));
+	fprintf(fp, "            request_q: %ld\n",
+		OFFSET(request_q));
+	fprintf(fp, "            request_cmd_flags: %ld\n",
+		OFFSET(request_cmd_flags));
 	fprintf(fp, "       request_queue_in_flight: %ld\n",
 		OFFSET(request_queue_in_flight));
 	fprintf(fp, "              request_queue_rq: %ld\n",
@@ -10393,10 +10397,31 @@ dump_offset_table(char *spec, ulong makestruct)
 		OFFSET(request_queue_mq_ops));
 	fprintf(fp, "       request_queue_queue_ctx: %ld\n",
 		OFFSET(request_queue_queue_ctx));
+	fprintf(fp, "       request_queue_queue_hw_ctx: %ld\n",
+		OFFSET(request_queue_queue_hw_ctx));
+	fprintf(fp, "       request_queue_nr_hw_queues: %ld\n",
+		OFFSET(request_queue_nr_hw_queues));
 	fprintf(fp, "      blk_mq_ctx_rq_dispatched: %ld\n",
 		OFFSET(blk_mq_ctx_rq_dispatched));
 	fprintf(fp, "       blk_mq_ctx_rq_completed: %ld\n",
 		OFFSET(blk_mq_ctx_rq_completed));
+	fprintf(fp, "       blk_mq_hw_ctx_tags: %ld\n",
+		OFFSET(blk_mq_hw_ctx_tags));
+	fprintf(fp, "       blk_mq_hw_ctx_sched_tags: %ld\n",
+		OFFSET(blk_mq_hw_ctx_sched_tags));
+	fprintf(fp, "       blk_mq_tags_bitmap_tags: %ld\n",
+		OFFSET(blk_mq_tags_bitmap_tags));
+	fprintf(fp, "       blk_mq_tags_breserved_tags: %ld\n",
+		OFFSET(blk_mq_tags_breserved_tags));
+	fprintf(fp, "       blk_mq_tags_nr_reserved_tags: %ld\n",
+		OFFSET(blk_mq_tags_nr_reserved_tags));
+	fprintf(fp, "       blk_mq_tags_nr_tags: %ld\n",
+		OFFSET(blk_mq_tags_nr_tags));
+	fprintf(fp, "       blk_mq_tags_rqs: %ld\n",
+		OFFSET(blk_mq_tags_rqs));
+	fprintf(fp, "       blk_mq_tags_static_rqs: %ld\n",
+		OFFSET(blk_mq_tags_static_rqs));
+
 	fprintf(fp, "  subsys_private_klist_devices: %ld\n",
 		OFFSET(subsys_private_klist_devices));
 	fprintf(fp, "                subsystem_kset: %ld\n",
@@ -10999,6 +11024,7 @@ dump_offset_table(char *spec, ulong makestruct)
 	fprintf(fp, "                       sbitmap: %ld\n", SIZE(sbitmap));
 	fprintf(fp, "                 sbitmap_queue: %ld\n", SIZE(sbitmap_queue));
 	fprintf(fp, "                sbq_wait_state: %ld\n", SIZE(sbq_wait_state));
+	fprintf(fp, "                blk_mq_tags: %ld\n", SIZE(blk_mq_tags));
 
         fprintf(fp, "\n                   array_table:\n");
 	/*
-- 
2.20.1



More information about the Crash-utility mailing list