[dm-devel] [PATCH 1/1] block: Convert hd_struct in_flight from atomic to percpu

Brian King brking at linux.vnet.ibm.com
Wed Jun 28 21:12:39 UTC 2017


This patch converts the in_flight counter in struct hd_struct from a pair of
atomics to a pair of percpu counters. This eliminates a couple of atomics from
the hot path. When running this on a Power system, to a single null_blk device
with 80 submission queues, irq mode 0, with 80 fio jobs, I saw IOPs go from
1.5M IO/s to 11.4 IO/s.

Signed-off-by: Brian King <brking at linux.vnet.ibm.com>
---

 block/bio.c               |    4 ++--
 block/blk-core.c          |    4 ++--
 block/blk-merge.c         |    2 +-
 block/genhd.c             |    2 +-
 block/partition-generic.c |    6 +++---
 drivers/md/dm.c           |   10 ++++++----
 include/linux/genhd.h     |   18 +++++++++---------
 7 files changed, 24 insertions(+), 22 deletions(-)

diff -puN include/linux/genhd.h~blk_in_flight_atomic_remove include/linux/genhd.h
--- linux-block/include/linux/genhd.h~blk_in_flight_atomic_remove	2017-06-28 16:06:43.037948079 -0500
+++ linux-block-bjking1/include/linux/genhd.h	2017-06-28 16:06:43.064947978 -0500
@@ -87,6 +87,7 @@ struct disk_stats {
 	unsigned long ticks[2];
 	unsigned long io_ticks;
 	unsigned long time_in_queue;
+	unsigned long in_flight[2];
 };
 
 #define PARTITION_META_INFO_VOLNAMELTH	64
@@ -120,7 +121,6 @@ struct hd_struct {
 	int make_it_fail;
 #endif
 	unsigned long stamp;
-	atomic_t in_flight[2];
 #ifdef	CONFIG_SMP
 	struct disk_stats __percpu *dkstats;
 #else
@@ -362,23 +362,23 @@ static inline void free_part_stats(struc
 #define part_stat_sub(cpu, gendiskp, field, subnd)			\
 	part_stat_add(cpu, gendiskp, field, -subnd)
 
-static inline void part_inc_in_flight(struct hd_struct *part, int rw)
+static inline void part_inc_in_flight(int cpu, struct hd_struct *part, int rw)
 {
-	atomic_inc(&part->in_flight[rw]);
+	part_stat_inc(cpu, part, in_flight[rw]);
 	if (part->partno)
-		atomic_inc(&part_to_disk(part)->part0.in_flight[rw]);
+		part_stat_inc(cpu, &part_to_disk(part)->part0, in_flight[rw]);
 }
 
-static inline void part_dec_in_flight(struct hd_struct *part, int rw)
+static inline void part_dec_in_flight(int cpu, struct hd_struct *part, int rw)
 {
-	atomic_dec(&part->in_flight[rw]);
+	part_stat_dec(cpu, part, in_flight[rw]);
 	if (part->partno)
-		atomic_dec(&part_to_disk(part)->part0.in_flight[rw]);
+		part_stat_dec(cpu, &part_to_disk(part)->part0, in_flight[rw]);
 }
 
-static inline int part_in_flight(struct hd_struct *part)
+static inline unsigned long part_in_flight(struct hd_struct *part)
 {
-	return atomic_read(&part->in_flight[0]) + atomic_read(&part->in_flight[1]);
+	return part_stat_read(part, in_flight[0]) + part_stat_read(part, in_flight[1]);
 }
 
 static inline struct partition_meta_info *alloc_part_info(struct gendisk *disk)
diff -puN block/bio.c~blk_in_flight_atomic_remove block/bio.c
--- linux-block/block/bio.c~blk_in_flight_atomic_remove	2017-06-28 16:06:43.041948064 -0500
+++ linux-block-bjking1/block/bio.c	2017-06-28 16:06:43.065947974 -0500
@@ -1737,7 +1737,7 @@ void generic_start_io_acct(int rw, unsig
 	part_round_stats(cpu, part);
 	part_stat_inc(cpu, part, ios[rw]);
 	part_stat_add(cpu, part, sectors[rw], sectors);
-	part_inc_in_flight(part, rw);
+	part_inc_in_flight(cpu, part, rw);
 
 	part_stat_unlock();
 }
@@ -1751,7 +1751,7 @@ void generic_end_io_acct(int rw, struct
 
 	part_stat_add(cpu, part, ticks[rw], duration);
 	part_round_stats(cpu, part);
-	part_dec_in_flight(part, rw);
+	part_dec_in_flight(cpu, part, rw);
 
 	part_stat_unlock();
 }
diff -puN block/blk-core.c~blk_in_flight_atomic_remove block/blk-core.c
--- linux-block/block/blk-core.c~blk_in_flight_atomic_remove	2017-06-28 16:06:43.045948049 -0500
+++ linux-block-bjking1/block/blk-core.c	2017-06-28 16:06:43.066947970 -0500
@@ -2435,7 +2435,7 @@ void blk_account_io_done(struct request
 		part_stat_inc(cpu, part, ios[rw]);
 		part_stat_add(cpu, part, ticks[rw], duration);
 		part_round_stats(cpu, part);
-		part_dec_in_flight(part, rw);
+		part_dec_in_flight(cpu, part, rw);
 
 		hd_struct_put(part);
 		part_stat_unlock();
@@ -2493,7 +2493,7 @@ void blk_account_io_start(struct request
 			hd_struct_get(part);
 		}
 		part_round_stats(cpu, part);
-		part_inc_in_flight(part, rw);
+		part_inc_in_flight(cpu, part, rw);
 		rq->part = part;
 	}
 
diff -puN block/blk-merge.c~blk_in_flight_atomic_remove block/blk-merge.c
--- linux-block/block/blk-merge.c~blk_in_flight_atomic_remove	2017-06-28 16:06:43.048948038 -0500
+++ linux-block-bjking1/block/blk-merge.c	2017-06-28 16:06:43.067947967 -0500
@@ -634,7 +634,7 @@ static void blk_account_io_merge(struct
 		part = req->part;
 
 		part_round_stats(cpu, part);
-		part_dec_in_flight(part, rq_data_dir(req));
+		part_dec_in_flight(cpu, part, rq_data_dir(req));
 
 		hd_struct_put(part);
 		part_stat_unlock();
diff -puN block/genhd.c~blk_in_flight_atomic_remove block/genhd.c
--- linux-block/block/genhd.c~blk_in_flight_atomic_remove	2017-06-28 16:06:43.052948023 -0500
+++ linux-block-bjking1/block/genhd.c	2017-06-28 16:06:43.068947963 -0500
@@ -1220,7 +1220,7 @@ static int diskstats_show(struct seq_fil
 		part_round_stats(cpu, hd);
 		part_stat_unlock();
 		seq_printf(seqf, "%4d %7d %s %lu %lu %lu "
-			   "%u %lu %lu %lu %u %u %u %u\n",
+			   "%u %lu %lu %lu %u %lu %u %u\n",
 			   MAJOR(part_devt(hd)), MINOR(part_devt(hd)),
 			   disk_name(gp, hd->partno, buf),
 			   part_stat_read(hd, ios[READ]),
diff -puN block/partition-generic.c~blk_in_flight_atomic_remove block/partition-generic.c
--- linux-block/block/partition-generic.c~blk_in_flight_atomic_remove	2017-06-28 16:06:43.055948012 -0500
+++ linux-block-bjking1/block/partition-generic.c	2017-06-28 16:06:43.069947959 -0500
@@ -120,7 +120,7 @@ ssize_t part_stat_show(struct device *de
 	return sprintf(buf,
 		"%8lu %8lu %8llu %8u "
 		"%8lu %8lu %8llu %8u "
-		"%8u %8u %8u"
+		"%8lu %8u %8u"
 		"\n",
 		part_stat_read(p, ios[READ]),
 		part_stat_read(p, merges[READ]),
@@ -140,8 +140,8 @@ ssize_t part_inflight_show(struct device
 {
 	struct hd_struct *p = dev_to_part(dev);
 
-	return sprintf(buf, "%8u %8u\n", atomic_read(&p->in_flight[0]),
-		atomic_read(&p->in_flight[1]));
+	return sprintf(buf, "%8lu %8lu\n", part_stat_read(p, in_flight[0]),
+		part_stat_read(p, in_flight[1]));
 }
 
 #ifdef CONFIG_FAIL_MAKE_REQUEST
diff -puN drivers/md/dm.c~blk_in_flight_atomic_remove drivers/md/dm.c
--- linux-block/drivers/md/dm.c~blk_in_flight_atomic_remove	2017-06-28 16:06:43.058948000 -0500
+++ linux-block-bjking1/drivers/md/dm.c	2017-06-28 16:06:43.070947955 -0500
@@ -517,9 +517,9 @@ static void start_io_acct(struct dm_io *
 
 	cpu = part_stat_lock();
 	part_round_stats(cpu, &dm_disk(md)->part0);
+	part_inc_in_flight(cpu, &dm_disk(md)->part0, rw);
+	atomic_inc(&md->pending[rw]);
 	part_stat_unlock();
-	atomic_set(&dm_disk(md)->part0.in_flight[rw],
-		atomic_inc_return(&md->pending[rw]));
 
 	if (unlikely(dm_stats_used(&md->stats)))
 		dm_stats_account_io(&md->stats, bio_data_dir(bio),
@@ -532,7 +532,7 @@ static void end_io_acct(struct dm_io *io
 	struct mapped_device *md = io->md;
 	struct bio *bio = io->bio;
 	unsigned long duration = jiffies - io->start_time;
-	int pending;
+	int pending, cpu;
 	int rw = bio_data_dir(bio);
 
 	generic_end_io_acct(rw, &dm_disk(md)->part0, io->start_time);
@@ -546,9 +546,11 @@ static void end_io_acct(struct dm_io *io
 	 * After this is decremented the bio must not be touched if it is
 	 * a flush.
 	 */
+	cpu = part_stat_lock();
 	pending = atomic_dec_return(&md->pending[rw]);
-	atomic_set(&dm_disk(md)->part0.in_flight[rw], pending);
+	part_dec_in_flight(cpu, &dm_disk(md)->part0, rw);
 	pending += atomic_read(&md->pending[rw^0x1]);
+	part_stat_unlock();
 
 	/* nudge anyone waiting on suspend queue */
 	if (!pending)
_




More information about the dm-devel mailing list