[dm-devel] [PATCH] dm writecache: make writeback pause configurable
Mikulas Patocka
mpatocka at redhat.com
Mon Jun 28 13:59:37 UTC 2021
The patch 95b88f4d71cb953e02206be3c757083601391a0f ("dm writecache: pause
writeback if cache full and origin being written directly") introduced a
code that pauses cache flushing if we are issuing writes directly to the
origin.
This patch makes the timeout code configurable (via the option
"pause_writeback"). It also changes the default from 1s to 3s because it
performed better.
Fixes: 95b88f4d71cb ("dm writecache: pausewriteback if cache full and origin being written directly")
---
Documentation/admin-guide/device-mapper/writecache.rst | 5 +
drivers/md/dm-io-tracker.h | 12 ++++
drivers/md/dm-writecache.c | 47 ++++++++++++++---
3 files changed, 54 insertions(+), 10 deletions(-)
Index: linux-dm/drivers/md/dm-writecache.c
===================================================================
--- linux-dm.orig/drivers/md/dm-writecache.c 2021-06-28 14:13:15.000000000 +0200
+++ linux-dm/drivers/md/dm-writecache.c 2021-06-28 14:13:15.000000000 +0200
@@ -30,6 +30,7 @@
#define AUTOCOMMIT_MSEC 1000
#define MAX_AGE_DIV 16
#define MAX_AGE_UNSPECIFIED -1UL
+#define PAUSE_WRITEBACK (HZ * 3)
#define BITMAP_GRANULARITY 65536
#if BITMAP_GRANULARITY < PAGE_SIZE
@@ -125,6 +126,7 @@ struct dm_writecache {
size_t freelist_high_watermark;
size_t freelist_low_watermark;
unsigned long max_age;
+ unsigned long pause;
unsigned uncommitted_blocks;
unsigned autocommit_blocks;
@@ -174,11 +176,13 @@ struct dm_writecache {
bool cleaner:1;
bool cleaner_set:1;
bool metadata_only:1;
+ bool pause_set:1;
unsigned high_wm_percent_value;
unsigned low_wm_percent_value;
unsigned autocommit_time_value;
unsigned max_age_value;
+ unsigned pause_value;
unsigned writeback_all;
struct workqueue_struct *writeback_wq;
@@ -1470,9 +1474,11 @@ bio_copy:
}
unlock_remap_origin:
- if (bio_data_dir(bio) != READ) {
- dm_iot_io_begin(&wc->iot, 1);
- bio->bi_private = (void *)2;
+ if (likely(wc->pause != 0)) {
+ if (bio_op(bio) == REQ_OP_WRITE) {
+ dm_iot_io_begin(&wc->iot, 1);
+ bio->bi_private = (void *)2;
+ }
}
bio_set_dev(bio, wc->dev->bdev);
wc_unlock(wc);
@@ -1837,10 +1843,18 @@ static void writecache_writeback(struct
dm_kcopyd_client_flush(wc->dm_kcopyd);
}
- if (!wc->writeback_all && !dm_suspended(wc->ti)) {
- while (!dm_iot_idle_for(&wc->iot, HZ)) {
- cond_resched();
- msleep(1000);
+ if (likely(wc->pause != 0)) {
+ while (1) {
+ unsigned long id;
+ if (unlikely(wc->cleaner) || unlikely(wc->writeback_all) || unlikely(dm_suspended(wc->ti)))
+ break;
+ id = dm_iot_idle_time(&wc->iot);
+ if (id >= wc->pause)
+ break;
+ id = wc->pause - id;
+ if (id > HZ)
+ id = HZ;
+ schedule_timeout_idle(id);
}
}
@@ -2113,7 +2127,7 @@ static int writecache_ctr(struct dm_targ
struct wc_memory_superblock s;
static struct dm_arg _args[] = {
- {0, 17, "Invalid number of feature args"},
+ {0, 18, "Invalid number of feature args"},
};
as.argc = argc;
@@ -2206,6 +2220,7 @@ static int writecache_ctr(struct dm_targ
goto bad;
}
} else {
+ wc->pause = PAUSE_WRITEBACK;
r = mempool_init_kmalloc_pool(&wc->copy_pool, 1, sizeof(struct copy_struct));
if (r) {
ti->error = "Could not allocate mempool";
@@ -2344,6 +2359,18 @@ static int writecache_ctr(struct dm_targ
} else goto invalid_optional;
} else if (!strcasecmp(string, "metadata_only")) {
wc->metadata_only = true;
+ } else if (!strcasecmp(string, "pause_writeback") && opt_params >= 1) {
+ unsigned pause_msecs;
+ if (WC_MODE_PMEM(wc))
+ goto invalid_optional;
+ string = dm_shift_arg(&as), opt_params--;
+ if (sscanf(string, "%u%c", &pause_msecs, &dummy) != 1)
+ goto invalid_optional;
+ if (pause_msecs > 60000)
+ goto invalid_optional;
+ wc->pause = msecs_to_jiffies(pause_msecs);
+ wc->pause_set = true;
+ wc->pause_value = pause_msecs;
} else {
invalid_optional:
r = -EINVAL;
@@ -2569,6 +2596,8 @@ static void writecache_status(struct dm_
extra_args++;
if (wc->metadata_only)
extra_args++;
+ if (wc->pause_set)
+ extra_args += 2;
DMEMIT("%u", extra_args);
if (wc->start_sector_set)
@@ -2591,6 +2620,8 @@ static void writecache_status(struct dm_
DMEMIT(" %sfua", wc->writeback_fua ? "" : "no");
if (wc->metadata_only)
DMEMIT(" metadata_only");
+ if (wc->pause_set)
+ DMEMIT(" pause_writeback %u", wc->pause_value);
break;
}
}
Index: linux-dm/drivers/md/dm-io-tracker.h
===================================================================
--- linux-dm.orig/drivers/md/dm-io-tracker.h 2021-06-28 14:13:15.000000000 +0200
+++ linux-dm/drivers/md/dm-io-tracker.h 2021-06-28 14:13:15.000000000 +0200
@@ -45,6 +45,18 @@ static inline bool dm_iot_idle_for(struc
return r;
}
+static inline unsigned long dm_iot_idle_time(struct dm_io_tracker *iot)
+{
+ unsigned long r = 0;
+
+ spin_lock_irq(&iot->lock);
+ if (!iot->in_flight)
+ r = jiffies - iot->idle_time;
+ spin_unlock_irq(&iot->lock);
+
+ return r;
+}
+
static inline void dm_iot_io_begin(struct dm_io_tracker *iot, sector_t len)
{
spin_lock_irq(&iot->lock);
Index: linux-dm/Documentation/admin-guide/device-mapper/writecache.rst
===================================================================
--- linux-dm.orig/Documentation/admin-guide/device-mapper/writecache.rst 2021-06-28 14:12:30.000000000 +0200
+++ linux-dm/Documentation/admin-guide/device-mapper/writecache.rst 2021-06-28 14:19:55.000000000 +0200
@@ -12,7 +12,6 @@ first sector should contain valid superb
Constructor parameters:
1. type of the cache device - "p" or "s"
-
- p - persistent memory
- s - SSD
2. the underlying device that will be cached
@@ -21,7 +20,6 @@ Constructor parameters:
size)
5. the number of optional parameters (the parameters with an argument
count as two)
-
start_sector n (default: 0)
offset from the start of cache device in 512-byte sectors
high_watermark n (default: 50)
@@ -71,6 +69,9 @@ Constructor parameters:
metadata_only
only metadata is promoted to the cache. This option
improves performance for heavier REQ_META workloads.
+ pause_writeback n (default: 3000)
+ pause writeback if there was some write I/O redirected to
+ the origin volume in the last n milliseconds
Status:
1. error indicator - 0 if there was no error, otherwise error number
More information about the dm-devel
mailing list