[dm-devel] [PATCH] dm writecache: make writeback pause configurable

Mikulas Patocka mpatocka at redhat.com
Mon Jun 28 13:59:37 UTC 2021


The patch 95b88f4d71cb953e02206be3c757083601391a0f ("dm writecache: pause
writeback if cache full and origin being written directly") introduced a
code that pauses cache flushing if we are issuing writes directly to the
origin.

This patch makes the timeout code configurable (via the option
"pause_writeback"). It also changes the default from 1s to 3s because it
performed better.

Fixes: 95b88f4d71cb ("dm writecache: pausewriteback if cache full and origin being written directly")

---
 Documentation/admin-guide/device-mapper/writecache.rst |    5 +
 drivers/md/dm-io-tracker.h                             |   12 ++++
 drivers/md/dm-writecache.c                             |   47 ++++++++++++++---
 3 files changed, 54 insertions(+), 10 deletions(-)

Index: linux-dm/drivers/md/dm-writecache.c
===================================================================
--- linux-dm.orig/drivers/md/dm-writecache.c	2021-06-28 14:13:15.000000000 +0200
+++ linux-dm/drivers/md/dm-writecache.c	2021-06-28 14:13:15.000000000 +0200
@@ -30,6 +30,7 @@
 #define AUTOCOMMIT_MSEC			1000
 #define MAX_AGE_DIV			16
 #define MAX_AGE_UNSPECIFIED		-1UL
+#define PAUSE_WRITEBACK			(HZ * 3)
 
 #define BITMAP_GRANULARITY	65536
 #if BITMAP_GRANULARITY < PAGE_SIZE
@@ -125,6 +126,7 @@ struct dm_writecache {
 	size_t freelist_high_watermark;
 	size_t freelist_low_watermark;
 	unsigned long max_age;
+	unsigned long pause;
 
 	unsigned uncommitted_blocks;
 	unsigned autocommit_blocks;
@@ -174,11 +176,13 @@ struct dm_writecache {
 	bool cleaner:1;
 	bool cleaner_set:1;
 	bool metadata_only:1;
+	bool pause_set:1;
 
 	unsigned high_wm_percent_value;
 	unsigned low_wm_percent_value;
 	unsigned autocommit_time_value;
 	unsigned max_age_value;
+	unsigned pause_value;
 
 	unsigned writeback_all;
 	struct workqueue_struct *writeback_wq;
@@ -1470,9 +1474,11 @@ bio_copy:
 	}
 
 unlock_remap_origin:
-	if (bio_data_dir(bio) != READ) {
-		dm_iot_io_begin(&wc->iot, 1);
-		bio->bi_private = (void *)2;
+	if (likely(wc->pause != 0)) {
+		 if (bio_op(bio) == REQ_OP_WRITE) {
+			dm_iot_io_begin(&wc->iot, 1);
+			bio->bi_private = (void *)2;
+		}
 	}
 	bio_set_dev(bio, wc->dev->bdev);
 	wc_unlock(wc);
@@ -1837,10 +1843,18 @@ static void writecache_writeback(struct
 		dm_kcopyd_client_flush(wc->dm_kcopyd);
 	}
 
-	if (!wc->writeback_all && !dm_suspended(wc->ti)) {
-		while (!dm_iot_idle_for(&wc->iot, HZ)) {
-			cond_resched();
-			msleep(1000);
+	if (likely(wc->pause != 0)) {
+		while (1) {
+			unsigned long id;
+			if (unlikely(wc->cleaner) || unlikely(wc->writeback_all) || unlikely(dm_suspended(wc->ti)))
+				break;
+			id = dm_iot_idle_time(&wc->iot);
+			if (id >= wc->pause)
+				break;
+			id = wc->pause - id;
+			if (id > HZ)
+				id = HZ;
+			schedule_timeout_idle(id);
 		}
 	}
 
@@ -2113,7 +2127,7 @@ static int writecache_ctr(struct dm_targ
 	struct wc_memory_superblock s;
 
 	static struct dm_arg _args[] = {
-		{0, 17, "Invalid number of feature args"},
+		{0, 18, "Invalid number of feature args"},
 	};
 
 	as.argc = argc;
@@ -2206,6 +2220,7 @@ static int writecache_ctr(struct dm_targ
 			goto bad;
 		}
 	} else {
+		wc->pause = PAUSE_WRITEBACK;
 		r = mempool_init_kmalloc_pool(&wc->copy_pool, 1, sizeof(struct copy_struct));
 		if (r) {
 			ti->error = "Could not allocate mempool";
@@ -2344,6 +2359,18 @@ static int writecache_ctr(struct dm_targ
 			} else goto invalid_optional;
 		} else if (!strcasecmp(string, "metadata_only")) {
 			wc->metadata_only = true;
+		} else if (!strcasecmp(string, "pause_writeback") && opt_params >= 1) {
+			unsigned pause_msecs;
+			if (WC_MODE_PMEM(wc))
+				goto invalid_optional;
+			string = dm_shift_arg(&as), opt_params--;
+			if (sscanf(string, "%u%c", &pause_msecs, &dummy) != 1)
+				goto invalid_optional;
+			if (pause_msecs > 60000)
+				goto invalid_optional;
+			wc->pause = msecs_to_jiffies(pause_msecs);
+			wc->pause_set = true;
+			wc->pause_value = pause_msecs;
 		} else {
 invalid_optional:
 			r = -EINVAL;
@@ -2569,6 +2596,8 @@ static void writecache_status(struct dm_
 			extra_args++;
 		if (wc->metadata_only)
 			extra_args++;
+		if (wc->pause_set)
+			extra_args += 2;
 
 		DMEMIT("%u", extra_args);
 		if (wc->start_sector_set)
@@ -2591,6 +2620,8 @@ static void writecache_status(struct dm_
 			DMEMIT(" %sfua", wc->writeback_fua ? "" : "no");
 		if (wc->metadata_only)
 			DMEMIT(" metadata_only");
+		if (wc->pause_set)
+			DMEMIT(" pause_writeback %u", wc->pause_value);
 		break;
 	}
 }
Index: linux-dm/drivers/md/dm-io-tracker.h
===================================================================
--- linux-dm.orig/drivers/md/dm-io-tracker.h	2021-06-28 14:13:15.000000000 +0200
+++ linux-dm/drivers/md/dm-io-tracker.h	2021-06-28 14:13:15.000000000 +0200
@@ -45,6 +45,18 @@ static inline bool dm_iot_idle_for(struc
 	return r;
 }
 
+static inline unsigned long dm_iot_idle_time(struct dm_io_tracker *iot)
+{
+	unsigned long r = 0;
+
+	spin_lock_irq(&iot->lock);
+	if (!iot->in_flight)
+		r = jiffies - iot->idle_time;
+	spin_unlock_irq(&iot->lock);
+
+	return r;
+}
+
 static inline void dm_iot_io_begin(struct dm_io_tracker *iot, sector_t len)
 {
 	spin_lock_irq(&iot->lock);
Index: linux-dm/Documentation/admin-guide/device-mapper/writecache.rst
===================================================================
--- linux-dm.orig/Documentation/admin-guide/device-mapper/writecache.rst	2021-06-28 14:12:30.000000000 +0200
+++ linux-dm/Documentation/admin-guide/device-mapper/writecache.rst	2021-06-28 14:19:55.000000000 +0200
@@ -12,7 +12,6 @@ first sector should contain valid superb
 Constructor parameters:
 
 1. type of the cache device - "p" or "s"
-
 	- p - persistent memory
 	- s - SSD
 2. the underlying device that will be cached
@@ -21,7 +20,6 @@ Constructor parameters:
    size)
 5. the number of optional parameters (the parameters with an argument
    count as two)
-
 	start_sector n		(default: 0)
 		offset from the start of cache device in 512-byte sectors
 	high_watermark n	(default: 50)
@@ -71,6 +69,9 @@ Constructor parameters:
 	metadata_only
 		only metadata is promoted to the cache. This option
 		improves performance for heavier REQ_META workloads.
+	pause_writeback n	(default: 3000)
+		pause writeback if there was some write I/O redirected to
+		the origin volume in the last n milliseconds
 
 Status:
 1. error indicator - 0 if there was no error, otherwise error number




More information about the dm-devel mailing list