[dm-devel] [patch 4/5] dm: use percpu counters

Mikulas Patocka mpatocka at redhat.com
Tue Nov 6 21:35:02 UTC 2018


Use percpu inflight counters to avoid cache line bouncing and improve
performance.

Signed-off-by: Mikulas Patocka <mpatocka at redhat.com>

---
 drivers/md/dm-core.h |    5 ++++
 drivers/md/dm-rq.c   |   10 +++++++--
 drivers/md/dm.c      |   52 +++++++++++++++++++++++++++++++++++----------------
 3 files changed, 49 insertions(+), 18 deletions(-)

Index: linux-2.6/drivers/md/dm-core.h
===================================================================
--- linux-2.6.orig/drivers/md/dm-core.h	2018-11-06 00:45:06.320000000 +0100
+++ linux-2.6/drivers/md/dm-core.h	2018-11-06 00:45:35.640000000 +0100
@@ -24,6 +24,10 @@ struct dm_kobject_holder {
 	struct completion completion;
 };
 
+struct dm_percpu {
+	unsigned inflight[2];
+};
+
 /*
  * DM core internal structure that used directly by dm.c and dm-rq.c
  * DM targets must _not_ deference a mapped_device to directly access its members!
@@ -63,6 +67,7 @@ struct mapped_device {
 	/*
 	 * A list of ios that arrived while we were suspended.
 	 */
+	struct dm_percpu __percpu *counters;
 	struct work_struct work;
 	wait_queue_head_t wait;
 	spinlock_t deferred_lock;
Index: linux-2.6/drivers/md/dm.c
===================================================================
--- linux-2.6.orig/drivers/md/dm.c	2018-11-06 00:45:06.320000000 +0100
+++ linux-2.6/drivers/md/dm.c	2018-11-06 22:31:33.980000000 +0100
@@ -597,19 +597,33 @@ static void free_tio(struct dm_target_io
 
 int md_in_flight(struct mapped_device *md)
 {
-	return atomic_read(&dm_disk(md)->part0.in_flight[READ]) +
-	       atomic_read(&dm_disk(md)->part0.in_flight[WRITE]);
+	int cpu;
+	unsigned sum = 0;
+	for_each_possible_cpu(cpu) {
+		struct dm_percpu *p = per_cpu_ptr(md->counters, cpu);
+		sum += p->inflight[READ] + p->inflight[WRITE];
+	}
+	return (int)sum;
 }
 
 static void start_io_acct(struct dm_io *io)
 {
 	struct mapped_device *md = io->md;
 	struct bio *bio = io->orig_bio;
+	struct hd_struct *part;
+	int sgrp, cpu;
 
 	io->start_time = jiffies;
 
-	generic_start_io_acct(md->queue, bio_op(bio), bio_sectors(bio),
-			      &dm_disk(md)->part0);
+	part = &dm_disk(md)->part0;
+	sgrp = op_stat_group(bio_op(bio));
+	cpu = part_stat_lock();
+	part_round_stats(md->queue, cpu, part);
+	part_stat_inc(cpu, part, ios[sgrp]);
+	part_stat_add(cpu, part, sectors[sgrp], bio_sectors(bio));
+	part_stat_unlock();
+
+	this_cpu_inc(md->counters->inflight[bio_data_dir(bio)]);
 
 	if (unlikely(dm_stats_used(&md->stats)))
 		dm_stats_account_io(&md->stats, bio_data_dir(bio),
@@ -622,25 +636,25 @@ static void end_io_acct(struct dm_io *io
 	struct mapped_device *md = io->md;
 	struct bio *bio = io->orig_bio;
 	unsigned long duration = jiffies - io->start_time;
+	struct hd_struct *part;
+	int sgrp, cpu;
 
-	/*
-	 * make sure that atomic_dec in generic_end_io_acct is not reordered
-	 * with previous writes
-	 */
-	smp_mb__before_atomic();
-	generic_end_io_acct(md->queue, bio_op(bio), &dm_disk(md)->part0,
-			    io->start_time);
-	/*
-	 * generic_end_io_acct does atomic_dec, this barrier makes sure that
-	 * atomic_dec is not reordered with waitqueue_active
-	 */
-	smp_mb__after_atomic();
+	part = &dm_disk(md)->part0;
+	sgrp = op_stat_group(bio_op(bio));
+	cpu = part_stat_lock();
+	part_stat_add(cpu, part, nsecs[sgrp], jiffies_to_nsecs(duration));
+	part_round_stats(md->queue, cpu, part);
+	part_stat_unlock();
+
+	smp_wmb();
+	this_cpu_dec(md->counters->inflight[bio_data_dir(bio)]);
 
 	if (unlikely(dm_stats_used(&md->stats)))
 		dm_stats_account_io(&md->stats, bio_data_dir(bio),
 				    bio->bi_iter.bi_sector, bio_sectors(bio),
 				    true, duration, &io->stats_aux);
 
+	smp_mb();
 	/* nudge anyone waiting on suspend queue */
 	if (unlikely(waitqueue_active(&md->wait))) {
 		if (!md_in_flight(md))
@@ -1828,6 +1842,8 @@ static void cleanup_mapped_device(struct
 	if (md->queue)
 		blk_cleanup_queue(md->queue);
 
+	free_percpu(md->counters);
+
 	cleanup_srcu_struct(&md->io_barrier);
 
 	if (md->bdev) {
@@ -1899,6 +1915,10 @@ static struct mapped_device *alloc_dev(i
 	if (!md->disk)
 		goto bad;
 
+	md->counters = alloc_percpu(struct dm_percpu);
+	if (!md->counters)
+		goto bad;
+
 	init_waitqueue_head(&md->wait);
 	INIT_WORK(&md->work, dm_wq_work);
 	init_waitqueue_head(&md->eventq);
Index: linux-2.6/drivers/md/dm-rq.c
===================================================================
--- linux-2.6.orig/drivers/md/dm-rq.c	2018-10-30 15:09:45.770000000 +0100
+++ linux-2.6/drivers/md/dm-rq.c	2018-11-06 00:53:13.870000000 +0100
@@ -172,6 +172,12 @@ static void rq_end_stats(struct mapped_d
 	}
 }
 
+static unsigned rq_md_in_flight(struct mapped_device *md)
+{
+	return	atomic_read(&dm_disk(md)->part0.in_flight[READ]) +
+		atomic_read(&dm_disk(md)->part0.in_flight[WRITE]);
+}
+
 /*
  * Don't touch any member of the md after calling this function because
  * the md may be freed in dm_put() at the end of this function.
@@ -185,7 +191,7 @@ static void rq_completed(struct mapped_d
 	atomic_dec(&dm_disk(md)->part0.in_flight[rw]);
 
 	/* nudge anyone waiting on suspend queue */
-	if (!md_in_flight(md))
+	if (!rq_md_in_flight(md))
 		wake_up(&md->wait);
 
 	/*
@@ -674,7 +680,7 @@ static void dm_old_request_fn(struct req
 			pos = blk_rq_pos(rq);
 
 		if ((dm_old_request_peeked_before_merge_deadline(md) &&
-		     md_in_flight(md) && rq->bio && !bio_multiple_segments(rq->bio) &&
+		     rq_md_in_flight(md) && rq->bio && !bio_multiple_segments(rq->bio) &&
 		     md->last_rq_pos == pos && md->last_rq_rw == rq_data_dir(rq)) ||
 		    (ti->type->busy && ti->type->busy(ti))) {
 			blk_delay_queue(q, 10);




More information about the dm-devel mailing list