[dm-devel] [PATCH 1/2] dm-zoned: cache device for zones
Hannes Reinecke
hare at suse.de
Mon Mar 23 15:03:51 UTC 2020
Implement 'cache' zones which reside on a different device.
The device is logically split into zones, which then will be
used as 'cache' zones, similar to the existing randow write
zones.
Signed-off-by: Hannes Reinecke <hare at suse.de>
---
drivers/md/dm-zoned-metadata.c | 174 ++++++++++++++++++++++++++++-----
drivers/md/dm-zoned-reclaim.c | 76 +++++++++++---
drivers/md/dm-zoned-target.c | 109 ++++++++++++++++++---
drivers/md/dm-zoned.h | 31 +++++-
4 files changed, 339 insertions(+), 51 deletions(-)
diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c
index 369de15c4e80..41cc3a29db0b 100644
--- a/drivers/md/dm-zoned-metadata.c
+++ b/drivers/md/dm-zoned-metadata.c
@@ -132,6 +132,8 @@ struct dmz_sb {
struct dmz_metadata {
struct dmz_dev *dev;
+ struct dmz_cdev *cdev;
+
sector_t zone_bitmap_size;
unsigned int zone_nr_bitmap_blocks;
unsigned int zone_bits_per_mblk;
@@ -139,10 +141,12 @@ struct dmz_metadata {
unsigned int nr_bitmap_blocks;
unsigned int nr_map_blocks;
+ unsigned int nr_zones;
unsigned int nr_useable_zones;
unsigned int nr_meta_blocks;
unsigned int nr_meta_zones;
unsigned int nr_data_zones;
+ unsigned int nr_cache_zones;
unsigned int nr_rnd_zones;
unsigned int nr_reserved_seq;
unsigned int nr_chunks;
@@ -173,6 +177,11 @@ struct dmz_metadata {
struct list_head unmap_rnd_list;
struct list_head map_rnd_list;
+ unsigned int nr_cache;
+ atomic_t unmap_nr_cache;
+ struct list_head unmap_cache_list;
+ struct list_head map_cache_list;
+
unsigned int nr_seq;
atomic_t unmap_nr_seq;
struct list_head unmap_seq_list;
@@ -189,17 +198,25 @@ struct dmz_metadata {
*/
unsigned int dmz_id(struct dmz_metadata *zmd, struct dm_zone *zone)
{
- return ((unsigned int)(zone - zmd->zones));
+ return zone->id;
}
sector_t dmz_start_sect(struct dmz_metadata *zmd, struct dm_zone *zone)
{
- return (sector_t)dmz_id(zmd, zone) << zmd->dev->zone_nr_sectors_shift;
+ sector_t zone_id = dmz_id(zmd, zone);
+
+ if (dmz_is_cache(zone))
+ zone_id -= zmd->dev->nr_zones;
+ return zone_id << zmd->dev->zone_nr_sectors_shift;
}
sector_t dmz_start_block(struct dmz_metadata *zmd, struct dm_zone *zone)
{
- return (sector_t)dmz_id(zmd, zone) << zmd->dev->zone_nr_blocks_shift;
+ sector_t zone_id = dmz_id(zmd, zone);
+
+ if (dmz_is_cache(zone))
+ zone_id -= zmd->dev->nr_zones;
+ return zone_id << zmd->dev->zone_nr_blocks_shift;
}
unsigned int dmz_nr_chunks(struct dmz_metadata *zmd)
@@ -217,6 +234,16 @@ unsigned int dmz_nr_unmap_rnd_zones(struct dmz_metadata *zmd)
return atomic_read(&zmd->unmap_nr_rnd);
}
+unsigned int dmz_nr_cache_zones(struct dmz_metadata *zmd)
+{
+ return zmd->nr_cache;
+}
+
+unsigned int dmz_nr_unmap_cache_zones(struct dmz_metadata *zmd)
+{
+ return atomic_read(&zmd->unmap_nr_cache);
+}
+
/*
* Lock/unlock mapping table.
* The map lock also protects all the zone lists.
@@ -865,6 +892,10 @@ static int dmz_check_sb(struct dmz_metadata *zmd, struct dmz_super *sb)
dmz_dev_err(dev, "Invalid number of metadata blocks");
return -ENXIO;
}
+ if (zmd->nr_cache_zones && nr_meta_zones >= zmd->nr_cache_zones) {
+ dmz_dev_err(dev, "Cache too small to hold metadata");
+ return -ENXIO;\
+ }
if (!le32_to_cpu(sb->nr_reserved_seq) ||
le32_to_cpu(sb->nr_reserved_seq) >= (zmd->nr_useable_zones - nr_meta_zones)) {
@@ -1104,6 +1135,7 @@ static int dmz_init_zone(struct blk_zone *blkz, unsigned int idx, void *data)
INIT_LIST_HEAD(&zone->link);
atomic_set(&zone->refcount, 0);
+ zone->id = idx;
zone->chunk = DMZ_MAP_UNMAPPED;
switch (blkz->type) {
@@ -1157,6 +1189,7 @@ static void dmz_drop_zones(struct dmz_metadata *zmd)
static int dmz_init_zones(struct dmz_metadata *zmd)
{
struct dmz_dev *dev = zmd->dev;
+ struct dmz_cdev *cdev = zmd->cdev;
int ret;
/* Init */
@@ -1167,12 +1200,16 @@ static int dmz_init_zones(struct dmz_metadata *zmd)
DMZ_BLOCK_SIZE_BITS);
/* Allocate zone array */
- zmd->zones = kcalloc(dev->nr_zones, sizeof(struct dm_zone), GFP_KERNEL);
+ zmd->nr_zones = dev->nr_zones;
+ if (cdev)
+ zmd->nr_zones += cdev->capacity / dev->zone_nr_sectors;
+
+ zmd->zones = kcalloc(zmd->nr_zones, sizeof(struct dm_zone), GFP_KERNEL);
if (!zmd->zones)
return -ENOMEM;
dmz_dev_info(dev, "Using %zu B for zone information",
- sizeof(struct dm_zone) * dev->nr_zones);
+ sizeof(struct dm_zone) * zmd->nr_zones);
/*
* Get zone information and initialize zone descriptors. At the same
@@ -1185,7 +1222,26 @@ static int dmz_init_zones(struct dmz_metadata *zmd)
dmz_drop_zones(zmd);
return ret;
}
+ if (cdev) {
+ int idx;
+
+ for (idx = dev->nr_zones; idx < zmd->nr_zones; idx++) {
+ struct dm_zone *zone = &zmd->zones[idx];
+ INIT_LIST_HEAD(&zone->link);
+ atomic_set(&zone->refcount, 0);
+ zone->id = idx;
+ zone->chunk = DMZ_MAP_UNMAPPED;
+ set_bit(DMZ_CACHE, &zone->flags);
+ zmd->nr_cache_zones++;
+ zone->wp_block = 0;
+ zmd->nr_useable_zones++;
+ if (!zmd->sb_zone) {
+ /* Super block zone */
+ zmd->sb_zone = zone;
+ }
+ }
+ }
return 0;
}
@@ -1216,6 +1272,9 @@ static int dmz_update_zone(struct dmz_metadata *zmd, struct dm_zone *zone)
unsigned int noio_flag;
int ret;
+ if (dmz_is_cache(zone))
+ return 0;
+
/*
* Get zone information from disk. Since blkdev_report_zones() uses
* GFP_KERNEL by default for memory allocations, set the per-task
@@ -1283,7 +1342,8 @@ static int dmz_reset_zone(struct dmz_metadata *zmd, struct dm_zone *zone)
*/
if (dmz_is_offline(zone) ||
dmz_is_readonly(zone) ||
- dmz_is_rnd(zone))
+ dmz_is_rnd(zone) ||
+ dmz_is_cache(zone))
return 0;
if (!dmz_is_empty(zone) || dmz_seq_write_err(zone)) {
@@ -1345,7 +1405,7 @@ static int dmz_load_mapping(struct dmz_metadata *zmd)
if (dzone_id == DMZ_MAP_UNMAPPED)
goto next;
- if (dzone_id >= dev->nr_zones) {
+ if (dzone_id >= zmd->nr_zones) {
dmz_dev_err(dev, "Chunk %u mapping: invalid data zone ID %u",
chunk, dzone_id);
return -EIO;
@@ -1358,6 +1418,8 @@ static int dmz_load_mapping(struct dmz_metadata *zmd)
if (dmz_is_rnd(dzone))
list_add_tail(&dzone->link, &zmd->map_rnd_list);
+ else if (dmz_is_cache(dzone))
+ list_add_tail(&dzone->link, &zmd->map_cache_list);
else
list_add_tail(&dzone->link, &zmd->map_seq_list);
@@ -1366,14 +1428,14 @@ static int dmz_load_mapping(struct dmz_metadata *zmd)
if (bzone_id == DMZ_MAP_UNMAPPED)
goto next;
- if (bzone_id >= dev->nr_zones) {
+ if (bzone_id >= zmd->nr_zones) {
dmz_dev_err(dev, "Chunk %u mapping: invalid buffer zone ID %u",
chunk, bzone_id);
return -EIO;
}
bzone = dmz_get(zmd, bzone_id);
- if (!dmz_is_rnd(bzone)) {
+ if (!dmz_is_rnd(bzone) && !dmz_is_cache(bzone)) {
dmz_dev_err(dev, "Chunk %u mapping: invalid buffer zone %u",
chunk, bzone_id);
return -EIO;
@@ -1385,7 +1447,10 @@ static int dmz_load_mapping(struct dmz_metadata *zmd)
bzone->bzone = dzone;
dzone->bzone = bzone;
dmz_get_zone_weight(zmd, bzone);
- list_add_tail(&bzone->link, &zmd->map_rnd_list);
+ if (dmz_is_cache(bzone))
+ list_add_tail(&bzone->link, &zmd->map_cache_list);
+ else
+ list_add_tail(&bzone->link, &zmd->map_rnd_list);
next:
chunk++;
e++;
@@ -1398,13 +1463,15 @@ static int dmz_load_mapping(struct dmz_metadata *zmd)
* fully initialized. All remaining zones are unmapped data
* zones. Finish initializing those here.
*/
- for (i = 0; i < dev->nr_zones; i++) {
+ for (i = 0; i < zmd->nr_zones; i++) {
dzone = dmz_get(zmd, i);
if (dmz_is_meta(dzone))
continue;
if (dmz_is_rnd(dzone))
zmd->nr_rnd++;
+ else if (dmz_is_cache(dzone))
+ zmd->nr_cache++;
else
zmd->nr_seq++;
@@ -1419,6 +1486,9 @@ static int dmz_load_mapping(struct dmz_metadata *zmd)
if (dmz_is_rnd(dzone)) {
list_add_tail(&dzone->link, &zmd->unmap_rnd_list);
atomic_inc(&zmd->unmap_nr_rnd);
+ } else if (dmz_is_cache(dzone)) {
+ list_add_tail(&dzone->link, &zmd->unmap_cache_list);
+ atomic_inc(&zmd->unmap_nr_cache);
} else if (atomic_read(&zmd->nr_reserved_seq_zones) < zmd->nr_reserved_seq) {
list_add_tail(&dzone->link, &zmd->reserved_seq_zones_list);
atomic_inc(&zmd->nr_reserved_seq_zones);
@@ -1460,6 +1530,9 @@ static void __dmz_lru_zone(struct dmz_metadata *zmd, struct dm_zone *zone)
if (dmz_is_seq(zone)) {
/* LRU rotate sequential zone */
list_add_tail(&zone->link, &zmd->map_seq_list);
+ } else if (dmz_is_cache(zone)) {
+ /* LRU rotate cache zone */
+ list_add_tail(&zone->link, &zmd->map_cache_list);
} else {
/* LRU rotate random zone */
list_add_tail(&zone->link, &zmd->map_rnd_list);
@@ -1557,6 +1630,29 @@ static struct dm_zone *dmz_get_rnd_zone_for_reclaim(struct dmz_metadata *zmd)
return ERR_PTR(-EBUSY);
}
+/*
+ * Select a cache zone for reclaim.
+ */
+static struct dm_zone *dmz_get_cache_zone_for_reclaim(struct dmz_metadata *zmd)
+{
+ struct dm_zone *dzone = NULL;
+ struct dm_zone *zone;
+
+ if (list_empty(&zmd->map_cache_list))
+ return ERR_PTR(-EBUSY);
+
+ list_for_each_entry(zone, &zmd->map_cache_list, link) {
+ if (dmz_is_buf(zone))
+ dzone = zone->bzone;
+ else
+ dzone = zone;
+ if (dmz_lock_zone_reclaim(dzone))
+ return dzone;
+ }
+
+ return ERR_PTR(-EBUSY);
+}
+
/*
* Select a buffered sequential zone for reclaim.
*/
@@ -1590,13 +1686,17 @@ struct dm_zone *dmz_get_zone_for_reclaim(struct dmz_metadata *zmd)
* cannot be reclaimed. So choose a sequential zone to reclaim so
* that afterward a random zone can be reclaimed.
* (2) At least one free sequential zone is available, then choose
- * the oldest random zone (data or buffer) that can be locked.
+ * either the oldest cache zone, or, failing that, the oldest
+ * random zone (data or buffer) that can be locked.
*/
dmz_lock_map(zmd);
if (list_empty(&zmd->reserved_seq_zones_list))
zone = dmz_get_seq_zone_for_reclaim(zmd);
- else
- zone = dmz_get_rnd_zone_for_reclaim(zmd);
+ else {
+ zone = dmz_get_cache_zone_for_reclaim(zmd);
+ if (!zone)
+ zone = dmz_get_rnd_zone_for_reclaim(zmd);
+ }
dmz_unlock_map(zmd);
return zone;
@@ -1629,8 +1729,12 @@ struct dm_zone *dmz_get_chunk_mapping(struct dmz_metadata *zmd, unsigned int chu
if (op != REQ_OP_WRITE)
goto out;
- /* Allocate a random zone */
- dzone = dmz_alloc_zone(zmd, DMZ_ALLOC_RND);
+ /* Try to allocate a cache zone first */
+ dzone = dmz_alloc_zone(zmd, DMZ_ALLOC_CACHE);
+ if (!dzone) {
+ /* Allocate a random zone */
+ dzone = dmz_alloc_zone(zmd, DMZ_ALLOC_RND);
+ }
if (!dzone) {
if (dmz_bdev_is_dying(zmd->dev)) {
dzone = ERR_PTR(-EIO);
@@ -1730,8 +1834,12 @@ struct dm_zone *dmz_get_chunk_buffer(struct dmz_metadata *zmd,
if (bzone)
goto out;
- /* Allocate a random zone */
- bzone = dmz_alloc_zone(zmd, DMZ_ALLOC_RND);
+ /* Try to allocate a cache zone first */
+ bzone = dmz_alloc_zone(zmd, DMZ_ALLOC_CACHE);
+ if (!bzone) {
+ /* Allocate a random zone */
+ bzone = dmz_alloc_zone(zmd, DMZ_ALLOC_RND);
+ }
if (!bzone) {
if (dmz_bdev_is_dying(zmd->dev)) {
bzone = ERR_PTR(-EIO);
@@ -1749,7 +1857,10 @@ struct dm_zone *dmz_get_chunk_buffer(struct dmz_metadata *zmd,
bzone->chunk = dzone->chunk;
bzone->bzone = dzone;
dzone->bzone = bzone;
- list_add_tail(&bzone->link, &zmd->map_rnd_list);
+ if (dmz_is_cache(bzone))
+ list_add_tail(&bzone->link, &zmd->map_cache_list);
+ else
+ list_add_tail(&bzone->link, &zmd->map_rnd_list);
out:
dmz_unlock_map(zmd);
@@ -1765,7 +1876,9 @@ struct dm_zone *dmz_alloc_zone(struct dmz_metadata *zmd, unsigned long flags)
struct list_head *list;
struct dm_zone *zone;
- if (flags & DMZ_ALLOC_RND)
+ if (flags & DMZ_ALLOC_CACHE)
+ list = &zmd->unmap_cache_list;
+ else if (flags & DMZ_ALLOC_RND)
list = &zmd->unmap_rnd_list;
else
list = &zmd->unmap_seq_list;
@@ -1791,6 +1904,8 @@ struct dm_zone *dmz_alloc_zone(struct dmz_metadata *zmd, unsigned long flags)
if (dmz_is_rnd(zone))
atomic_dec(&zmd->unmap_nr_rnd);
+ else if (dmz_is_cache(zone))
+ atomic_dec(&zmd->unmap_nr_cache);
else
atomic_dec(&zmd->unmap_nr_seq);
@@ -1817,6 +1932,9 @@ void dmz_free_zone(struct dmz_metadata *zmd, struct dm_zone *zone)
if (dmz_is_rnd(zone)) {
list_add_tail(&zone->link, &zmd->unmap_rnd_list);
atomic_inc(&zmd->unmap_nr_rnd);
+ } else if (dmz_is_cache(zone)) {
+ list_add_tail(&zone->link, &zmd->unmap_cache_list);
+ atomic_inc(&zmd->unmap_nr_cache);
} else if (atomic_read(&zmd->nr_reserved_seq_zones) <
zmd->nr_reserved_seq) {
list_add_tail(&zone->link, &zmd->reserved_seq_zones_list);
@@ -1842,6 +1960,8 @@ void dmz_map_zone(struct dmz_metadata *zmd, struct dm_zone *dzone,
dzone->chunk = chunk;
if (dmz_is_rnd(dzone))
list_add_tail(&dzone->link, &zmd->map_rnd_list);
+ else if (dmz_is_cache(dzone))
+ list_add_tail(&dzone->link, &zmd->map_cache_list);
else
list_add_tail(&dzone->link, &zmd->map_seq_list);
}
@@ -2360,7 +2480,8 @@ static void dmz_cleanup_metadata(struct dmz_metadata *zmd)
/*
* Initialize the zoned metadata.
*/
-int dmz_ctr_metadata(struct dmz_dev *dev, struct dmz_metadata **metadata)
+int dmz_ctr_metadata(struct dmz_dev *dev, struct dmz_cdev *cdev,
+ struct dmz_metadata **metadata)
{
struct dmz_metadata *zmd;
unsigned int i, zid;
@@ -2372,6 +2493,7 @@ int dmz_ctr_metadata(struct dmz_dev *dev, struct dmz_metadata **metadata)
return -ENOMEM;
zmd->dev = dev;
+ zmd->cdev = cdev;
zmd->mblk_rbtree = RB_ROOT;
init_rwsem(&zmd->mblk_sem);
mutex_init(&zmd->mblk_flush_lock);
@@ -2384,6 +2506,10 @@ int dmz_ctr_metadata(struct dmz_dev *dev, struct dmz_metadata **metadata)
INIT_LIST_HEAD(&zmd->unmap_rnd_list);
INIT_LIST_HEAD(&zmd->map_rnd_list);
+ atomic_set(&zmd->unmap_nr_cache, 0);
+ INIT_LIST_HEAD(&zmd->unmap_cache_list);
+ INIT_LIST_HEAD(&zmd->map_cache_list);
+
atomic_set(&zmd->unmap_nr_seq, 0);
INIT_LIST_HEAD(&zmd->unmap_seq_list);
INIT_LIST_HEAD(&zmd->map_seq_list);
@@ -2407,7 +2533,7 @@ int dmz_ctr_metadata(struct dmz_dev *dev, struct dmz_metadata **metadata)
zid = dmz_id(zmd, zmd->sb_zone);
for (i = 0; i < zmd->nr_meta_zones << 1; i++) {
zone = dmz_get(zmd, zid + i);
- if (!dmz_is_rnd(zone))
+ if (!dmz_is_rnd(zone) && !dmz_is_cache(zone))
goto err;
set_bit(DMZ_META, &zone->flags);
}
@@ -2449,6 +2575,8 @@ int dmz_ctr_metadata(struct dmz_dev *dev, struct dmz_metadata **metadata)
zmd->nr_data_zones, zmd->nr_chunks);
dmz_dev_info(dev, " %u random zones (%u unmapped)",
zmd->nr_rnd, atomic_read(&zmd->unmap_nr_rnd));
+ dmz_dev_info(dev, " %u cache zones (%u unmapped)",
+ zmd->nr_cache, atomic_read(&zmd->unmap_nr_cache));
dmz_dev_info(dev, " %u sequential zones (%u unmapped)",
zmd->nr_seq, atomic_read(&zmd->unmap_nr_seq));
dmz_dev_info(dev, " %u reserved sequential data zones",
@@ -2495,7 +2623,7 @@ int dmz_resume_metadata(struct dmz_metadata *zmd)
int ret;
/* Check zones */
- for (i = 0; i < dev->nr_zones; i++) {
+ for (i = 0; i < zmd->nr_zones; i++) {
zone = dmz_get(zmd, i);
if (!zone) {
dmz_dev_err(dev, "Unable to get zone %u", i);
diff --git a/drivers/md/dm-zoned-reclaim.c b/drivers/md/dm-zoned-reclaim.c
index e7ace908a9b7..c394ba19cf9b 100644
--- a/drivers/md/dm-zoned-reclaim.c
+++ b/drivers/md/dm-zoned-reclaim.c
@@ -14,6 +14,7 @@
struct dmz_reclaim {
struct dmz_metadata *metadata;
struct dmz_dev *dev;
+ struct dmz_cdev *cdev;
struct delayed_work work;
struct workqueue_struct *wq;
@@ -44,13 +45,15 @@ enum {
* Percentage of unmapped (free) random zones below which reclaim starts
* even if the target is busy.
*/
-#define DMZ_RECLAIM_LOW_UNMAP_RND 30
+#define DMZ_RECLAIM_LOW_UNMAP_RND 25
+#define DMZ_RECLAIM_LOW_UNMAP_CACHE 40
/*
* Percentage of unmapped (free) random zones above which reclaim will
* stop if the target is busy.
*/
#define DMZ_RECLAIM_HIGH_UNMAP_RND 50
+#define DMZ_RECLAIM_HIGH_UNMAP_CACHE 60
/*
* Align a sequential zone write pointer to chunk_block.
@@ -117,6 +120,7 @@ static int dmz_reclaim_copy(struct dmz_reclaim *zrc,
{
struct dmz_metadata *zmd = zrc->metadata;
struct dmz_dev *dev = zrc->dev;
+ struct dmz_cdev *cdev = zrc->cdev;
struct dm_io_region src, dst;
sector_t block = 0, end_block;
sector_t nr_blocks;
@@ -156,11 +160,17 @@ static int dmz_reclaim_copy(struct dmz_reclaim *zrc,
return ret;
}
- src.bdev = dev->bdev;
+ if (dmz_is_cache(src_zone))
+ src.bdev = cdev->bdev;
+ else
+ src.bdev = dev->bdev;
src.sector = dmz_blk2sect(src_zone_block + block);
src.count = dmz_blk2sect(nr_blocks);
- dst.bdev = dev->bdev;
+ if (dmz_is_cache(dst_zone))
+ dst.bdev = cdev->bdev;
+ else
+ dst.bdev = dev->bdev;
dst.sector = dmz_blk2sect(dst_zone_block + block);
dst.count = src.count;
@@ -354,7 +364,7 @@ static int dmz_do_reclaim(struct dmz_reclaim *zrc)
start = jiffies;
- if (dmz_is_rnd(dzone)) {
+ if (dmz_is_rnd(dzone) || dmz_is_cache(dzone)) {
if (!dmz_weight(dzone)) {
/* Empty zone */
dmz_reclaim_empty(zrc, dzone);
@@ -423,7 +433,7 @@ static inline int dmz_target_idle(struct dmz_reclaim *zrc)
/*
* Test if reclaim is necessary.
*/
-static bool dmz_should_reclaim(struct dmz_reclaim *zrc)
+static bool dmz_should_rnd_reclaim(struct dmz_reclaim *zrc)
{
struct dmz_metadata *zmd = zrc->metadata;
unsigned int nr_rnd = dmz_nr_rnd_zones(zmd);
@@ -445,6 +455,32 @@ static bool dmz_should_reclaim(struct dmz_reclaim *zrc)
return p_unmap_rnd <= DMZ_RECLAIM_LOW_UNMAP_RND;
}
+static bool dmz_should_cache_reclaim(struct dmz_reclaim *zrc)
+{
+ struct dmz_metadata *zmd = zrc->metadata;
+ unsigned int nr_cache = dmz_nr_cache_zones(zmd);
+ unsigned int nr_unmap_cache = dmz_nr_unmap_cache_zones(zmd);
+ unsigned int p_unmap_cache;
+
+ if (!nr_cache)
+ return false;
+
+ /* Reclaim when idle */
+ if (dmz_target_idle(zrc) && nr_unmap_cache < nr_cache)
+ return true;
+
+ /* If there are still plenty of cache zones, do not reclaim */
+ p_unmap_cache = nr_unmap_cache * 100 / nr_cache;
+ if (p_unmap_cache >= DMZ_RECLAIM_HIGH_UNMAP_CACHE)
+ return false;
+
+ /*
+ * If the percentage of unmapped cache zones is low,
+ * reclaim even if the target is busy.
+ */
+ return p_unmap_cache <= DMZ_RECLAIM_LOW_UNMAP_CACHE;
+}
+
/*
* Reclaim work function.
*/
@@ -452,14 +488,14 @@ static void dmz_reclaim_work(struct work_struct *work)
{
struct dmz_reclaim *zrc = container_of(work, struct dmz_reclaim, work.work);
struct dmz_metadata *zmd = zrc->metadata;
- unsigned int nr_rnd, nr_unmap_rnd;
- unsigned int p_unmap_rnd;
+ unsigned int nr_rnd, nr_unmap_rnd, nr_cache, nr_unmap_cache;
+ unsigned int p_unmap_rnd, p_unmap_cache = 100;
int ret;
if (dmz_bdev_is_dying(zrc->dev))
return;
- if (!dmz_should_reclaim(zrc)) {
+ if (!dmz_should_cache_reclaim(zrc) && !dmz_should_rnd_reclaim(zrc)) {
mod_delayed_work(zrc->wq, &zrc->work, DMZ_IDLE_PERIOD);
return;
}
@@ -470,21 +506,33 @@ static void dmz_reclaim_work(struct work_struct *work)
* and slower if there are still some free random zones to avoid
* as much as possible to negatively impact the user workload.
*/
+ nr_cache = dmz_nr_cache_zones(zmd);
+ if (nr_cache) {
+ nr_unmap_cache = dmz_nr_unmap_cache_zones(zmd);
+ p_unmap_cache = nr_unmap_cache * 100 / nr_cache;
+ }
nr_rnd = dmz_nr_rnd_zones(zmd);
nr_unmap_rnd = dmz_nr_unmap_rnd_zones(zmd);
p_unmap_rnd = nr_unmap_rnd * 100 / nr_rnd;
- if (dmz_target_idle(zrc) || p_unmap_rnd < DMZ_RECLAIM_LOW_UNMAP_RND / 2) {
- /* Idle or very low percentage: go fast */
+ if (dmz_target_idle(zrc)) {
+ /* Idle; go fast */
+ zrc->kc_throttle.throttle = 100;
+ } else if (p_unmap_rnd < DMZ_RECLAIM_LOW_UNMAP_CACHE / 2) {
+ /* Low percentage of cache zones; go fast */
zrc->kc_throttle.throttle = 100;
+ } else if (p_unmap_rnd < DMZ_RECLAIM_LOW_UNMAP_RND / 2) {
+ /* Busy, but low percentage of random zones; throttle */
+ zrc->kc_throttle.throttle = min(75U, 100U - p_unmap_cache / 2);
} else {
/* Busy but we still have some random zone: throttle */
zrc->kc_throttle.throttle = min(75U, 100U - p_unmap_rnd / 2);
}
dmz_dev_debug(zrc->dev,
- "Reclaim (%u): %s, %u%% free rnd zones (%u/%u)",
+ "Reclaim (%u): %s, free zones: cache %u%% (%u/%u) rnd %uy%% (%u/%u)",
zrc->kc_throttle.throttle,
(dmz_target_idle(zrc) ? "Idle" : "Busy"),
+ p_unmap_cache, nr_unmap_cache, nr_cache,
p_unmap_rnd, nr_unmap_rnd, nr_rnd);
ret = dmz_do_reclaim(zrc);
@@ -500,7 +548,8 @@ static void dmz_reclaim_work(struct work_struct *work)
/*
* Initialize reclaim.
*/
-int dmz_ctr_reclaim(struct dmz_dev *dev, struct dmz_metadata *zmd,
+int dmz_ctr_reclaim(struct dmz_dev *dev, struct dmz_cdev *cdev,
+ struct dmz_metadata *zmd,
struct dmz_reclaim **reclaim)
{
struct dmz_reclaim *zrc;
@@ -511,6 +560,7 @@ int dmz_ctr_reclaim(struct dmz_dev *dev, struct dmz_metadata *zmd,
return -ENOMEM;
zrc->dev = dev;
+ zrc->cdev = cdev;
zrc->metadata = zmd;
zrc->atime = jiffies;
@@ -583,7 +633,7 @@ void dmz_reclaim_bio_acc(struct dmz_reclaim *zrc)
*/
void dmz_schedule_reclaim(struct dmz_reclaim *zrc)
{
- if (dmz_should_reclaim(zrc))
+ if (dmz_should_cache_reclaim(zrc) || dmz_should_rnd_reclaim(zrc))
mod_delayed_work(zrc->wq, &zrc->work, 0);
}
diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c
index f4f83d39b3dc..3f25953672b9 100644
--- a/drivers/md/dm-zoned-target.c
+++ b/drivers/md/dm-zoned-target.c
@@ -39,12 +39,16 @@ struct dm_chunk_work {
*/
struct dmz_target {
struct dm_dev *ddev;
+ struct dm_dev *cddev;
unsigned long flags;
/* Zoned block device information */
struct dmz_dev *dev;
+ /* Cache block device information */
+ struct dmz_cdev *cdev;
+
/* For metadata handling */
struct dmz_metadata *metadata;
@@ -125,7 +129,10 @@ static int dmz_submit_bio(struct dmz_target *dmz, struct dm_zone *zone,
if (!clone)
return -ENOMEM;
- bio_set_dev(clone, dmz->dev->bdev);
+ if (dmz_is_cache(zone))
+ bio_set_dev(clone, dmz->cdev->bdev);
+ else
+ bio_set_dev(clone, dmz->dev->bdev);
clone->bi_iter.bi_sector =
dmz_start_sect(dmz->metadata, zone) + dmz_blk2sect(chunk_block);
clone->bi_iter.bi_size = dmz_blk2sect(nr_blocks) << SECTOR_SHIFT;
@@ -179,7 +186,8 @@ static int dmz_handle_read(struct dmz_target *dmz, struct dm_zone *zone,
dmz_dev_debug(dmz->dev, "READ chunk %llu -> %s zone %u, block %llu, %u blocks",
(unsigned long long)dmz_bio_chunk(dmz->dev, bio),
- (dmz_is_rnd(zone) ? "RND" : "SEQ"),
+ (dmz_is_rnd(zone) ?
+ (dmz_is_cache(zone) ? "CACHE" : "RND") : "SEQ"),
dmz_id(dmz->metadata, zone),
(unsigned long long)chunk_block, nr_blocks);
@@ -187,7 +195,8 @@ static int dmz_handle_read(struct dmz_target *dmz, struct dm_zone *zone,
bzone = zone->bzone;
while (chunk_block < end_block) {
nr_blocks = 0;
- if (dmz_is_rnd(zone) || chunk_block < zone->wp_block) {
+ if (dmz_is_rnd(zone) || dmz_is_cache(zone) ||
+ chunk_block < zone->wp_block) {
/* Test block validity in the data zone */
ret = dmz_block_valid(dmz->metadata, zone, chunk_block);
if (ret < 0)
@@ -316,11 +325,13 @@ static int dmz_handle_write(struct dmz_target *dmz, struct dm_zone *zone,
dmz_dev_debug(dmz->dev, "WRITE chunk %llu -> %s zone %u, block %llu, %u blocks",
(unsigned long long)dmz_bio_chunk(dmz->dev, bio),
- (dmz_is_rnd(zone) ? "RND" : "SEQ"),
+ (dmz_is_rnd(zone) ?
+ (dmz_is_cache(zone) ? "CACHE" : "RND") : "SEQ"),
dmz_id(dmz->metadata, zone),
(unsigned long long)chunk_block, nr_blocks);
- if (dmz_is_rnd(zone) || chunk_block == zone->wp_block) {
+ if (dmz_is_rnd(zone) || dmz_is_cache(zone) ||
+ chunk_block == zone->wp_block) {
/*
* zone is a random zone or it is a sequential zone
* and the BIO is aligned to the zone write pointer:
@@ -364,7 +375,8 @@ static int dmz_handle_discard(struct dmz_target *dmz, struct dm_zone *zone,
* Invalidate blocks in the data zone and its
* buffer zone if one is mapped.
*/
- if (dmz_is_rnd(zone) || chunk_block < zone->wp_block)
+ if (dmz_is_rnd(zone) || dmz_is_cache(zone) ||
+ chunk_block < zone->wp_block)
ret = dmz_invalidate_blocks(zmd, zone, chunk_block, nr_blocks);
if (ret == 0 && zone->bzone)
ret = dmz_invalidate_blocks(zmd, zone->bzone,
@@ -714,8 +726,7 @@ static int dmz_get_zoned_device(struct dm_target *ti, char *path)
dev->capacity = i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT;
aligned_capacity = dev->capacity &
~((sector_t)blk_queue_zone_sectors(q) - 1);
- if (ti->begin ||
- ((ti->len != dev->capacity) && (ti->len != aligned_capacity))) {
+ if (ti->begin || ti->len < dev->capacity) {
ti->error = "Partial mapping not supported";
ret = -EINVAL;
goto err;
@@ -751,6 +762,64 @@ static void dmz_put_zoned_device(struct dm_target *ti)
dmz->dev = NULL;
}
+/*
+ * Get cache device information.
+ */
+static int dmz_get_cache_device(struct dm_target *ti, char *path)
+{
+ struct dmz_target *dmz = ti->private;
+ struct dmz_cdev *cdev;
+ int ret;
+
+ /* Get the target device */
+ ret = dm_get_device(ti, path, dm_table_get_mode(ti->table),
+ &dmz->cddev);
+ if (ret) {
+ ti->error = "Get target device failed";
+ dmz->cdev = NULL;
+ return ret;
+ }
+
+ cdev = kzalloc(sizeof(struct dmz_cdev), GFP_KERNEL);
+ if (!cdev) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ cdev->bdev = dmz->cddev->bdev;
+ (void)bdevname(cdev->bdev, cdev->name);
+
+ if (bdev_zoned_model(cdev->bdev) != BLK_ZONED_NONE) {
+ ti->error = "Cache device must not be a zoned block device";
+ ret = -EINVAL;
+ goto err;
+ }
+
+ cdev->capacity = i_size_read(cdev->bdev->bd_inode) >> SECTOR_SHIFT;
+ dmz->cdev = cdev;
+
+ return 0;
+err:
+ dm_put_device(ti, dmz->cddev);
+ kfree(cdev);
+
+ return ret;
+}
+
+/*
+ * Cleanup cache device information.
+ */
+static void dmz_put_cache_device(struct dm_target *ti)
+{
+ struct dmz_target *dmz = ti->private;
+
+ if (!dmz->cdev)
+ return;
+ dm_put_device(ti, dmz->cddev);
+ kfree(dmz->cdev);
+ dmz->cdev = NULL;
+}
+
/*
* Setup target.
*/
@@ -758,10 +827,11 @@ static int dmz_ctr(struct dm_target *ti, unsigned int argc, char **argv)
{
struct dmz_target *dmz;
struct dmz_dev *dev;
+ struct dmz_cdev *cdev;
int ret;
/* Check arguments */
- if (argc != 1) {
+ if (argc > 2) {
ti->error = "Invalid argument count";
return -EINVAL;
}
@@ -781,12 +851,23 @@ static int dmz_ctr(struct dm_target *ti, unsigned int argc, char **argv)
goto err;
}
+ /* Get the cache device for random zones */
+ if (argc == 2) {
+ ret = dmz_get_cache_device(ti, argv[1]);
+ if (ret) {
+ dmz->cdev = NULL;
+ ti->error = "Cache device failed";
+ goto err_dev;
+ }
+ }
+
/* Initialize metadata */
dev = dmz->dev;
- ret = dmz_ctr_metadata(dev, &dmz->metadata);
+ cdev = dmz->cdev;
+ ret = dmz_ctr_metadata(dev, cdev, &dmz->metadata);
if (ret) {
ti->error = "Metadata initialization failed";
- goto err_dev;
+ goto err_cdev;
}
/* Set target (no write same support) */
@@ -833,7 +914,7 @@ static int dmz_ctr(struct dm_target *ti, unsigned int argc, char **argv)
mod_delayed_work(dmz->flush_wq, &dmz->flush_work, DMZ_FLUSH_PERIOD);
/* Initialize reclaim */
- ret = dmz_ctr_reclaim(dev, dmz->metadata, &dmz->reclaim);
+ ret = dmz_ctr_reclaim(dev, cdev, dmz->metadata, &dmz->reclaim);
if (ret) {
ti->error = "Zone reclaim initialization failed";
goto err_fwq;
@@ -853,6 +934,8 @@ static int dmz_ctr(struct dm_target *ti, unsigned int argc, char **argv)
bioset_exit(&dmz->bio_set);
err_meta:
dmz_dtr_metadata(dmz->metadata);
+err_cdev:
+ dmz_put_cache_device(ti);
err_dev:
dmz_put_zoned_device(ti);
err:
@@ -882,6 +965,8 @@ static void dmz_dtr(struct dm_target *ti)
bioset_exit(&dmz->bio_set);
+ dmz_put_cache_device(ti);
+
dmz_put_zoned_device(ti);
mutex_destroy(&dmz->chunk_lock);
diff --git a/drivers/md/dm-zoned.h b/drivers/md/dm-zoned.h
index 5b5e493d479c..c2b6a919681a 100644
--- a/drivers/md/dm-zoned.h
+++ b/drivers/md/dm-zoned.h
@@ -66,6 +66,21 @@ struct dmz_dev {
sector_t zone_nr_blocks_shift;
};
+/*
+ * Cache block device information.
+ */
+struct dmz_cdev {
+ struct block_device *bdev;
+
+ char name[BDEVNAME_SIZE];
+
+ sector_t capacity;
+
+ unsigned int nr_zones;
+
+ unsigned int flags;
+};
+
#define dmz_bio_chunk(dev, bio) ((bio)->bi_iter.bi_sector >> \
(dev)->zone_nr_sectors_shift)
#define dmz_chunk_block(dev, b) ((b) & ((dev)->zone_nr_blocks - 1))
@@ -73,6 +88,7 @@ struct dmz_dev {
/* Device flags. */
#define DMZ_BDEV_DYING (1 << 0)
#define DMZ_CHECK_BDEV (2 << 0)
+#define DMZ_BDEV_CACHE (4 << 0)
/*
* Zone descriptor.
@@ -87,6 +103,9 @@ struct dm_zone {
/* Zone activation reference count */
atomic_t refcount;
+ /* Zone id */
+ unsigned int id;
+
/* Zone write pointer block (relative to the zone start block) */
unsigned int wp_block;
@@ -111,6 +130,7 @@ enum {
/* Zone write type */
DMZ_RND,
DMZ_SEQ,
+ DMZ_CACHE,
/* Zone critical condition */
DMZ_OFFLINE,
@@ -131,6 +151,7 @@ enum {
*/
#define dmz_is_rnd(z) test_bit(DMZ_RND, &(z)->flags)
#define dmz_is_seq(z) test_bit(DMZ_SEQ, &(z)->flags)
+#define dmz_is_cache(z) test_bit(DMZ_CACHE, &(z)->flags)
#define dmz_is_empty(z) ((z)->wp_block == 0)
#define dmz_is_offline(z) test_bit(DMZ_OFFLINE, &(z)->flags)
#define dmz_is_readonly(z) test_bit(DMZ_READ_ONLY, &(z)->flags)
@@ -164,7 +185,8 @@ struct dmz_reclaim;
/*
* Functions defined in dm-zoned-metadata.c
*/
-int dmz_ctr_metadata(struct dmz_dev *dev, struct dmz_metadata **zmd);
+int dmz_ctr_metadata(struct dmz_dev *dev, struct dmz_cdev *cdev,
+ struct dmz_metadata **zmd);
void dmz_dtr_metadata(struct dmz_metadata *zmd);
int dmz_resume_metadata(struct dmz_metadata *zmd);
@@ -183,6 +205,7 @@ unsigned int dmz_nr_chunks(struct dmz_metadata *zmd);
#define DMZ_ALLOC_RND 0x01
#define DMZ_ALLOC_RECLAIM 0x02
+#define DMZ_ALLOC_CACHE 0x04
struct dm_zone *dmz_alloc_zone(struct dmz_metadata *zmd, unsigned long flags);
void dmz_free_zone(struct dmz_metadata *zmd, struct dm_zone *zone);
@@ -192,6 +215,8 @@ void dmz_map_zone(struct dmz_metadata *zmd, struct dm_zone *zone,
void dmz_unmap_zone(struct dmz_metadata *zmd, struct dm_zone *zone);
unsigned int dmz_nr_rnd_zones(struct dmz_metadata *zmd);
unsigned int dmz_nr_unmap_rnd_zones(struct dmz_metadata *zmd);
+unsigned int dmz_nr_cache_zones(struct dmz_metadata *zmd);
+unsigned int dmz_nr_unmap_cache_zones(struct dmz_metadata *zmd);
/*
* Activate a zone (increment its reference count).
@@ -244,8 +269,8 @@ int dmz_merge_valid_blocks(struct dmz_metadata *zmd, struct dm_zone *from_zone,
/*
* Functions defined in dm-zoned-reclaim.c
*/
-int dmz_ctr_reclaim(struct dmz_dev *dev, struct dmz_metadata *zmd,
- struct dmz_reclaim **zrc);
+int dmz_ctr_reclaim(struct dmz_dev *dev, struct dmz_cdev *cdev,
+ struct dmz_metadata *zmd, struct dmz_reclaim **zrc);
void dmz_dtr_reclaim(struct dmz_reclaim *zrc);
void dmz_suspend_reclaim(struct dmz_reclaim *zrc);
void dmz_resume_reclaim(struct dmz_reclaim *zrc);
--
2.25.0
More information about the dm-devel
mailing list