[dm-devel] [PATCH 2/4] dm-latency: add I/O latency measurement in device mapper

Coly Li colyli at gmail.com
Thu Feb 26 06:42:33 UTC 2015


From: Coly Li <bosong.ly at alibaba-inc.com>

This patch adds I/O latency measurement code in dm.c. The basic idea is,
1) add timestamp in struct dm_io as unsigned long start_time_usec
2) set timestamp when dm_io issued to underlaying block layer.
3) when all sub-I/O completed, calculate time difference against
start_time_usec as I/O latency.
4) account the latency to corresponding counters like,
    - latency_stats_us
    - latency_stats_ms
    - latency_stats_s

Signed-off-by: Coly Li <bosong.ly at alibaba-inc.com>
Reviewed-by: Robin Dong <sanbai at alibaba-inc.com>
Reviewed-by: Tao Ma <boyu.mt at taobao.com>
---
  drivers/md/dm.c |   69
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  drivers/md/dm.h |   13 +++++++++++++
  2 files changed, 82 insertions(+)

diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index e2532c8..200954a 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -20,6 +20,7 @@
  #include <linux/hdreg.h>
  #include <linux/delay.h>
  #include <linux/wait.h>
+#include <linux/clocksource.h>

  #include <trace/events/block.h>

@@ -67,6 +68,7 @@ struct dm_io {
  	atomic_t io_count;
  	struct bio *bio;
  	unsigned long start_time;
+	unsigned long start_time_usec;
  	spinlock_t endio_lock;
  	struct dm_stats_aux stats_aux;
  };
@@ -521,14 +523,72 @@ static int md_in_flight(struct mapped_device *md)
  	       atomic_read(&md->pending[WRITE]);
  }

+static unsigned long long us2msecs(unsigned long long usec)
+{
+	usec += 500;
+	do_div(usec, 1000);
+	return usec;
+}
+
+static unsigned long long us2secs(unsigned long long usec)
+{
+	usec += 500;
+	do_div(usec, 1000);
+	usec += 500;
+	do_div(usec, 1000);
+	return usec;
+}
+
+static void update_latency_stats(struct mapped_device *md, struct dm_io
*io)
+{
+	uint64_t now, latency;
+	int idx;
+	ktime_t ts;
+
+	ts = ktime_get();
+	now = (uint64_t)ktime_to_us(ts);
+
+	/*
+	 * if now <= io->start_time_usec, it means counter
+	 * in ktime_get() over flows, just ignore this I/O
+	 */
+	if (unlikely(now < io->start_time_usec))
+		return;
+
+	latency = (uint64_t)(now - io->start_time_usec);
+	if (latency < 1000) {
+		/* microseconds */
+		idx = latency/DM_LATENCY_STATS_US_GRAINSIZE;
+		if (idx > (DM_LATENCY_STATS_US_NR - 1))
+			idx = DM_LATENCY_STATS_US_NR - 1;
+		atomic_inc(&(md->latency_stats_us[idx]));
+	} else if (latency < 1000000) {
+		/* milliseconds */
+		idx = us2msecs(latency)/DM_LATENCY_STATS_MS_GRAINSIZE;
+		if (idx > (DM_LATENCY_STATS_MS_NR - 1))
+			idx = DM_LATENCY_STATS_MS_NR - 1;
+		atomic_inc(&(md->latency_stats_ms[idx]));
+	} else {
+		/* seconds */
+		idx = us2secs(latency)/DM_LATENCY_STATS_S_GRAINSIZE;
+		if (idx > (DM_LATENCY_STATS_S_NR - 1))
+			idx = DM_LATENCY_STATS_S_NR - 1;
+		atomic_inc(&(md->latency_stats_s[idx]));
+	}
+}
+
  static void start_io_acct(struct dm_io *io)
  {
  	struct mapped_device *md = io->md;
  	struct bio *bio = io->bio;
+	ktime_t ts;
+
  	int cpu;
  	int rw = bio_data_dir(bio);

  	io->start_time = jiffies;
+	ts = ktime_get();
+	io->start_time_usec = (unsigned long)ktime_to_us(ts);

  	cpu = part_stat_lock();
  	part_round_stats(cpu, &dm_disk(md)->part0);
@@ -549,6 +609,7 @@ static void end_io_acct(struct dm_io *io)
  	int pending;
  	int rw = bio_data_dir(bio);

+	update_latency_stats(md, io);
  	generic_end_io_acct(rw, &dm_disk(md)->part0, io->start_time);

  	if (unlikely(dm_stats_used(&md->stats)))
@@ -2118,6 +2179,14 @@ static struct mapped_device *alloc_dev(int minor)

  	dm_stats_init(&md->stats);

+	/* initial latency stats buckets */
+	for (r = 0; r < DM_LATENCY_STATS_S_NR; r++)
+		atomic_set(&(md->latency_stats_s[r]), 0);
+	for (r = 0; r < DM_LATENCY_STATS_MS_NR; r++)
+		atomic_set(&(md->latency_stats_ms[r]), 0);
+	for (r = 0; r < DM_LATENCY_STATS_US_NR; r++)
+		atomic_set(&(md->latency_stats_us[r]), 0);
+
  	/* Populate the mapping, nobody knows we exist yet */
  	spin_lock(&_minor_lock);
  	old_md = idr_replace(&_minor_idr, md, minor);
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index 3014667..6c254f4 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -32,6 +32,14 @@
   */
  #define DM_STATUS_NOFLUSH_FLAG		(1 << 0)

+/* 100s is long enough to measure disk I/O latency */
+#define DM_LATENCY_STATS_S_NR		100
+#define DM_LATENCY_STATS_S_GRAINSIZE	(1000/DM_LATENCY_STATS_S_NR)
+#define DM_LATENCY_STATS_MS_NR		100
+#define DM_LATENCY_STATS_MS_GRAINSIZE	(1000/DM_LATENCY_STATS_MS_NR)
+#define DM_LATENCY_STATS_US_NR		100
+#define DM_LATENCY_STATS_US_GRAINSIZE	(1000/DM_LATENCY_STATS_US_NR)
+
  /*
   * Type of table and mapped_device's mempool
   */
@@ -135,6 +143,11 @@ struct mapped_device {

  	struct kthread_worker kworker;
  	struct task_struct *kworker_task;
+
+	/* latency statistic buckets */
+	atomic_t latency_stats_s[DM_LATENCY_STATS_S_NR];
+	atomic_t latency_stats_ms[DM_LATENCY_STATS_MS_NR];
+	atomic_t latency_stats_us[DM_LATENCY_STATS_US_NR];
  };

  /*




More information about the dm-devel mailing list