[dm-devel] [RFC PATCH v2 2/3] dm zoned: introduce regular device to dm-zoned-target
Damien Le Moal
Damien.LeMoal at wdc.com
Wed Mar 25 06:20:47 UTC 2020
On 2020/03/24 20:03, Bob Liu wrote:
> Introduce a regular device for storing metadata and buffer write, zoned
> device is used by default if no regular device was set by dmsetup.
>
> The corresponding dmsetup cmd is:
> echo "0 $size zoned $regular_device $zoned_device" | dmsetup create $dm-zoned-name
>
> Signed-off-by: Bob Liu <bob.liu at oracle.com>
> ---
> drivers/md/dm-zoned-target.c | 141 +++++++++++++++++++++++++------------------
> drivers/md/dm-zoned.h | 50 +++++++++++++--
> 2 files changed, 127 insertions(+), 64 deletions(-)
>
> diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c
> index 28f4d00..cae4bfe 100644
> --- a/drivers/md/dm-zoned-target.c
> +++ b/drivers/md/dm-zoned-target.c
> @@ -35,38 +35,6 @@ struct dm_chunk_work {
> };
>
> /*
> - * Target descriptor.
> - */
> -struct dmz_target {
> - struct dm_dev *ddev;
> -
> - unsigned long flags;
> -
> - /* Zoned block device information */
> - struct dmz_dev *zoned_dev;
> -
> - /* For metadata handling */
> - struct dmz_metadata *metadata;
> -
> - /* For reclaim */
> - struct dmz_reclaim *reclaim;
> -
> - /* For chunk work */
> - struct radix_tree_root chunk_rxtree;
> - struct workqueue_struct *chunk_wq;
> - struct mutex chunk_lock;
> -
> - /* For cloned BIOs to zones */
> - struct bio_set bio_set;
> -
> - /* For flush */
> - spinlock_t flush_lock;
> - struct bio_list flush_list;
> - struct delayed_work flush_work;
> - struct workqueue_struct *flush_wq;
> -};
I am not sure I understand why this needs to be moved from here
into dm-zoned.h...
> -
> -/*
> * Flush intervals (seconds).
> */
> #define DMZ_FLUSH_PERIOD (10 * HZ)
> @@ -679,7 +647,7 @@ static int dmz_map(struct dm_target *ti, struct bio *bio)
> /*
> * Get zoned device information.
> */
> -static int dmz_get_zoned_device(struct dm_target *ti, char *path)
> +static int dmz_get_device(struct dm_target *ti, char *path, bool zoned)
I do not think you need the zoned argument here. You can easily detect this
using bdev_is_zoned() once you get the bdev.
> {
> struct dmz_target *dmz = ti->private;
> struct request_queue *q;
> @@ -688,11 +656,22 @@ static int dmz_get_zoned_device(struct dm_target *ti, char *path)
> int ret;
>
> /* Get the target device */
> - ret = dm_get_device(ti, path, dm_table_get_mode(ti->table), &dmz->ddev);
> - if (ret) {
> - ti->error = "Get target device failed";
> - dmz->ddev = NULL;
> - return ret;
> + if (zoned) {
> + ret = dm_get_device(ti, path, dm_table_get_mode(ti->table),
> + &dmz->ddev);
> + if (ret) {
> + ti->error = "Get target device failed";
> + dmz->ddev = NULL;
> + return ret;
> + }
> + } else {
> + ret = dm_get_device(ti, path, dm_table_get_mode(ti->table),
> + &dmz->regu_dm_dev);
> + if (ret) {
> + ti->error = "Get target device failed";
> + dmz->regu_dm_dev = NULL;
> + return ret;
> + }
If you use a local variable ddev, you do not need to duplicate this hunk.
All you need is:
if (zoned)
dmz->zddev = ddev;
else
dmz->cddev = ddev;
> }
>
> dev = kzalloc(sizeof(struct dmz_dev), GFP_KERNEL);
> @@ -701,39 +680,61 @@ static int dmz_get_zoned_device(struct dm_target *ti, char *path)
> goto err;
> }
>
> - dev->bdev = dmz->ddev->bdev;
> - (void)bdevname(dev->bdev, dev->name);
> -
> - if (bdev_zoned_model(dev->bdev) == BLK_ZONED_NONE) {
> - ti->error = "Not a zoned block device";
> - ret = -EINVAL;
> - goto err;
> + if (zoned) {
> + dev->bdev = dmz->ddev->bdev;
> + if (bdev_zoned_model(dev->bdev) == BLK_ZONED_NONE) {
> + ti->error = "Not a zoned block device";
> + ret = -EINVAL;
> + goto err;
> + }
> }
> + else
> + dev->bdev = dmz->regu_dm_dev->bdev;
> +
> + (void)bdevname(dev->bdev, dev->name);
> + dev->target = dmz;
>
> q = bdev_get_queue(dev->bdev);
> dev->capacity = i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT;
> aligned_capacity = dev->capacity &
> ~((sector_t)blk_queue_zone_sectors(q) - 1);
> - if (ti->begin ||
> - ((ti->len != dev->capacity) && (ti->len != aligned_capacity))) {
> - ti->error = "Partial mapping not supported";
> - ret = -EINVAL;
> - goto err;
> - }
>
> - dev->zone_nr_sectors = blk_queue_zone_sectors(q);
> - dev->zone_nr_sectors_shift = ilog2(dev->zone_nr_sectors);
> + if (zoned) {
> + if (ti->begin || ((ti->len != dev->capacity) &&
> + (ti->len != aligned_capacity))) {
> + ti->error = "Partial mapping not supported";
> + ret = -EINVAL;
> + goto err;
> + }
> + dev->zone_nr_sectors = blk_queue_zone_sectors(q);
> + dev->zone_nr_sectors_shift = ilog2(dev->zone_nr_sectors);
> +
> + dev->zone_nr_blocks = dmz_sect2blk(dev->zone_nr_sectors);
> + dev->zone_nr_blocks_shift = ilog2(dev->zone_nr_blocks);
>
> - dev->zone_nr_blocks = dmz_sect2blk(dev->zone_nr_sectors);
> - dev->zone_nr_blocks_shift = ilog2(dev->zone_nr_blocks);
> + dev->nr_zones = blkdev_nr_zones(dev->bdev->bd_disk);
>
> - dev->nr_zones = blkdev_nr_zones(dev->bdev->bd_disk);
> + dmz->zoned_dev = dev;
> + } else {
> + /* Emulate regular device zone info by using the same zone size.*/
> + dev->zone_nr_sectors = dmz->zoned_dev->zone_nr_sectors;
> + dev->zone_nr_sectors_shift = ilog2(dev->zone_nr_sectors);
>
> - dmz->zoned_dev = dev;
> + dev->zone_nr_blocks = dmz_sect2blk(dev->zone_nr_sectors);
> + dev->zone_nr_blocks_shift = ilog2(dev->zone_nr_blocks);
> +
> + dev->nr_zones = (get_capacity(dev->bdev->bd_disk) >>
> + ilog2(dev->zone_nr_sectors));
> +
> + dmz->regu_dmz_dev = dev;
> + }
>
> return 0;
> err:
> - dm_put_device(ti, dmz->ddev);
> + if (zoned)
> + dm_put_device(ti, dmz->ddev);
> + else
> + dm_put_device(ti, dmz->regu_dm_dev);
A local ddev variable will avoid the need for the if/else here.
> kfree(dev);
>
> return ret;
> @@ -746,6 +747,12 @@ static void dmz_put_zoned_device(struct dm_target *ti)
> {
> struct dmz_target *dmz = ti->private;
>
> + if (dmz->regu_dm_dev)
> + dm_put_device(ti, dmz->regu_dm_dev);
> + if (dmz->regu_dmz_dev) {
> + kfree(dmz->regu_dmz_dev);
> + dmz->regu_dmz_dev = NULL;
> + }
> dm_put_device(ti, dmz->ddev);
> kfree(dmz->zoned_dev);
> dmz->zoned_dev = NULL;
> @@ -761,7 +768,7 @@ static int dmz_ctr(struct dm_target *ti, unsigned int argc, char **argv)
> int ret;
>
> /* Check arguments */
> - if (argc != 1) {
> + if ((argc != 1) && (argc != 2)) {
> ti->error = "Invalid argument count";
> return -EINVAL;
> }
> @@ -775,12 +782,25 @@ static int dmz_ctr(struct dm_target *ti, unsigned int argc, char **argv)
> ti->private = dmz;
>
> /* Get the target zoned block device */
> - ret = dmz_get_zoned_device(ti, argv[0]);
> + ret = dmz_get_device(ti, argv[0], 1);
> if (ret) {
> dmz->ddev = NULL;
> goto err;
> }
>
> + snprintf(dmz->name, BDEVNAME_SIZE, "%s", dmz->zoned_dev->name);
> + dmz->nr_zones = dmz->zoned_dev->nr_zones;
> + if (argc == 2) {
> + ret = dmz_get_device(ti, argv[1], 0);
> + if (ret) {
> + dmz->regu_dm_dev = NULL;
> + goto err;
> + }
> + snprintf(dmz->name, BDEVNAME_SIZE * 2, "%s:%s",
> + dmz->zoned_dev->name, dmz->regu_dmz_dev->name);
> + dmz->nr_zones += dmz->regu_dmz_dev->nr_zones;
> + }
> +
> /* Initialize metadata */
> dev = dmz->zoned_dev;
> ret = dmz_ctr_metadata(dev, &dmz->metadata);
> @@ -962,6 +982,7 @@ static int dmz_iterate_devices(struct dm_target *ti,
> struct dmz_dev *dev = dmz->zoned_dev;
> sector_t capacity = dev->capacity & ~(dev->zone_nr_sectors - 1);
>
> + /* Todo: fn(dmz->regu_dm_dev) */
> return fn(ti, dmz->ddev, 0, capacity, data);
> }
>
> diff --git a/drivers/md/dm-zoned.h b/drivers/md/dm-zoned.h
> index 5b5e493..a3535bc 100644
> --- a/drivers/md/dm-zoned.h
> +++ b/drivers/md/dm-zoned.h
> @@ -46,9 +46,51 @@
> #define dmz_bio_blocks(bio) dmz_sect2blk(bio_sectors(bio))
>
> /*
> + * Target descriptor.
> + */
> +struct dmz_target {
> + struct dm_dev *ddev;
> + /*
> + * Regular device for store metdata and buffer write, use zoned device
> + * by default if no regular device was set.
> + */
> + struct dm_dev *regu_dm_dev;
rddev is shorter...
> + struct dmz_dev *regu_dmz_dev;
And rdev here ? Or "cdev" with the c standing for "cache" and "conventional
(=not zoned)" at the same time.
> + /* Total nr_zones. */
> + unsigned int nr_zones;
> + char name[BDEVNAME_SIZE * 2];
I would define 2 fields rather than doubling the nbame length. The field names
can follow the same pattern and zdev/cdev, you add zname and cname. Anyway, this
string is already in dmz_dev, so why add it ?
> +
> + unsigned long flags;
Flags are currently for target and backedn device. This needs to sorted out
because there will be a need for per backend device (e.g. dying flag etc) flags,
so this needs to be split, one flag field for each cache and zoned dev.
> +
> + /* Zoned block device information */
> + struct dmz_dev *zoned_dev;
Similarly to regu_dmz_dev, it would be better to pair this one with struct
dm_dev *ddev above and rename ddev field to zddev.
And to simplify everything, you could move ddev to struct dmz_dev and add a
flags field there. Then all you need in struct dmz_target is:
struct dm_dev *cdev;
struct dm_dev *zdev;
> +
> + /* For metadata handling */
> + struct dmz_metadata *metadata;
> +
> + /* For reclaim */
> + struct dmz_reclaim *reclaim;
> +
> + /* For chunk work */
> + struct radix_tree_root chunk_rxtree;
> + struct workqueue_struct *chunk_wq;
> + struct mutex chunk_lock;
> +
> + /* For cloned BIOs to zones */
> + struct bio_set bio_set;
> +
> + /* For flush */
> + spinlock_t flush_lock;
> + struct bio_list flush_list;
> + struct delayed_work flush_work;
> + struct workqueue_struct *flush_wq;
> +};
> +
> +/*
> * Zoned block device information.
> */
> struct dmz_dev {
> + struct dmz_target *target;
> struct block_device *bdev;
>
> char name[BDEVNAME_SIZE];
> @@ -147,16 +189,16 @@ enum {
> * Message functions.
> */
> #define dmz_dev_info(dev, format, args...) \
> - DMINFO("(%s): " format, (dev)->name, ## args)
> + DMINFO("(%s): " format, (dev)->target->name, ## args)
>
> #define dmz_dev_err(dev, format, args...) \
> - DMERR("(%s): " format, (dev)->name, ## args)
> + DMERR("(%s): " format, (dev)->target->name, ## args)
>
> #define dmz_dev_warn(dev, format, args...) \
> - DMWARN("(%s): " format, (dev)->name, ## args)
> + DMWARN("(%s): " format, (dev)->target->name, ## args)
>
> #define dmz_dev_debug(dev, format, args...) \
> - DMDEBUG("(%s): " format, (dev)->name, ## args)
> + DMDEBUG("(%s): " format, (dev)->target->name, ## args)
>
> struct dmz_metadata;
> struct dmz_reclaim;
>
--
Damien Le Moal
Western Digital Research
More information about the dm-devel
mailing list