[dm-devel] [PATCH 11/11] dm-zoned: metadata version 2

Hannes Reinecke hare at suse.de
Tue Apr 14 05:46:56 UTC 2020


On 4/13/20 6:55 AM, Damien Le Moal wrote:
> On 2020/04/09 15:45, Hannes Reinecke wrote:
>> Implement handling for metadata version 2. The new metadata adds
>> a label and UUID for the device mapper device, and additional UUID
>> for the underlying block devices.
>> It also allows for an additional regular drive to be used for
>> emulating random access zones. The emulated zones will be placed
>> logically in front of the zones from the zoned block device, causing
>> the superblocks and metadata to be stored on that device.
>> The first zone of the original zoned device will be used to hold
>> another, tertiary copy of the metadata; this copy carries a
>> generation number of 0 and is never updated; it's just used
>> for identification.
>>
>> Signed-off-by: Hannes Reinecke <hare at suse.de>
>> ---
>>   drivers/md/dm-zoned-metadata.c | 277 +++++++++++++++++++++++++++------
>>   drivers/md/dm-zoned-target.c   | 126 ++++++++-------
>>   drivers/md/dm-zoned.h          |   7 +-
>>   3 files changed, 306 insertions(+), 104 deletions(-)
>>
>> diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c
>> index 554ff32abada..36a71f50165d 100644
>> --- a/drivers/md/dm-zoned-metadata.c
>> +++ b/drivers/md/dm-zoned-metadata.c
>> @@ -16,7 +16,7 @@
>>   /*
>>    * Metadata version.
>>    */
>> -#define DMZ_META_VER	1
>> +#define DMZ_META_VER	2
>>   
>>   /*
>>    * On-disk super block magic.
>> @@ -69,8 +69,17 @@ struct dmz_super {
>>   	/* Checksum */
>>   	__le32		crc;			/*  48 */
>>   
>> +	/* DM-Zoned label */
>> +	u8		dmz_label[32];		/*  80 */
>> +
>> +	/* DM-Zoned UUID */
>> +	u8		dmz_uuid[16];		/*  96 */
>> +
>> +	/* Device UUID */
>> +	u8		dev_uuid[16];		/* 112 */
>> +
>>   	/* Padding to full 512B sector */
>> -	u8		reserved[464];		/* 512 */
>> +	u8		reserved[400];		/* 512 */
>>   };
>>   
>>   /*
>> @@ -135,6 +144,8 @@ struct dmz_metadata {
>>   	struct dmz_dev		*dev;
>>   
>>   	char			devname[BDEVNAME_SIZE];
>> +	char			label[BDEVNAME_SIZE];
>> +	uuid_t			uuid;
>>   
>>   	sector_t		zone_bitmap_size;
>>   	unsigned int		zone_nr_bitmap_blocks;
>> @@ -161,8 +172,9 @@ struct dmz_metadata {
>>   	/* Zone information array */
>>   	struct dm_zone		*zones;
>>   
>> -	struct dmz_sb		sb[2];
>> +	struct dmz_sb		sb[3];
>>   	unsigned int		mblk_primary;
>> +	unsigned int		sb_version;
>>   	u64			sb_gen;
>>   	unsigned int		min_nr_mblks;
>>   	unsigned int		max_nr_mblks;
>> @@ -195,31 +207,56 @@ struct dmz_metadata {
>>   };
>>   
>>   #define dmz_zmd_info(zmd, format, args...)	\
>> -	DMINFO("(%s): " format, (zmd)->devname, ## args)
>> +	DMINFO("(%s): " format, (zmd)->label, ## args)
>>   
>>   #define dmz_zmd_err(zmd, format, args...)	\
>> -	DMERR("(%s): " format, (zmd)->devname, ## args)
>> +	DMERR("(%s): " format, (zmd)->label, ## args)
>>   
>>   #define dmz_zmd_warn(zmd, format, args...)	\
>> -	DMWARN("(%s): " format, (zmd)->devname, ## args)
>> +	DMWARN("(%s): " format, (zmd)->label, ## args)
>>   
>>   #define dmz_zmd_debug(zmd, format, args...)	\
>> -	DMDEBUG("(%s): " format, (zmd)->devname, ## args)
>> +	DMDEBUG("(%s): " format, (zmd)->label, ## args)
>>   /*
>>    * Various accessors
>>    */
>> +unsigned int dmz_dev_zone_id(struct dmz_metadata *zmd, struct dm_zone *zone)
>> +{
>> +	unsigned int zone_id;
>> +
>> +	if (WARN_ON(!zone))
>> +		return 0;
>> +
>> +	zone_id = zone->id;
>> +	if (zmd->dev[0].zone_offset &&
> 
> The name zone_offset is maybe a little confusing as one could think it is a
> sector offset. May be call that zone_id_offset ?
> 
> 
Yeah, right. I'm not utterly happy with the naming myself.

>> +	    (zone_id >= zmd->dev[0].zone_offset))
>> +		zone_id -= zmd->dev[0].zone_offset;
>> +	return zone_id;
>> +}
>> +
>>   sector_t dmz_start_sect(struct dmz_metadata *zmd, struct dm_zone *zone)
>>   {
>> -	return (sector_t)zone->id << zmd->zone_nr_sectors_shift;
>> +	unsigned int zone_id = dmz_dev_zone_id(zmd, zone);
>> +
>> +	return (sector_t)zone_id << zmd->zone_nr_sectors_shift;
>>   }
>>   
>>   sector_t dmz_start_block(struct dmz_metadata *zmd, struct dm_zone *zone)
>>   {
>> -	return (sector_t)zone->id << zmd->zone_nr_blocks_shift;
>> +	unsigned int zone_id = dmz_dev_zone_id(zmd, zone);
>> +
>> +	return (sector_t)zone_id << zmd->zone_nr_blocks_shift;
>>   }
>>   
>>   struct dmz_dev *dmz_zone_to_dev(struct dmz_metadata *zmd, struct dm_zone *zone)
>>   {
>> +	if (WARN_ON(!zone))
>> +		return &zmd->dev[0];
> 
> return NULL here so that we get EIO (or other error) from caller ? Otherwise,
> the bug here could go unnoticed and bad IOs go to dev[0].
> 
Ok.

>> +
>> +	if (zmd->dev[0].zone_offset &&
>> +	    zone->id < zmd->dev[0].zone_offset)
>> +			return &zmd->dev[1];
>> +
>>   	return &zmd->dev[0];
>>   }
> 
> OK. This one still confuses me. I think we need to have a comment here reminding
> that when there are 2 devices, the second one holds the low ID zones and the
> first one (the SMR drive) the high ID zones. While I think it is OK as is (with
> the comment), I still think we should reverse that as the reverse would be more
> natural...
> 
Sure, we can reverse the order of devices by using the regular disk 
device as first and the SMR drive as second argument when creating the 
device-mapper device:

0 <size> zoned <regular device> <zoned device>

That would make it more natural, and we don't have to rearrange disks 
within the code.

We'd lose compability with v1, but one could argue it's not a bad thing.

[ .. ]

>> @@ -693,60 +697,47 @@ static int dmz_map(struct dm_target *ti, struct bio *bio)
>>   /*
>>    * Get zoned device information.
>>    */
>> -static int dmz_get_zoned_device(struct dm_target *ti, char *path)
>> +static int dmz_get_zoned_device(struct dm_target *ti, char *path,
>> +				struct dmz_dev *dev, int num)
>>   {
>>   	struct dmz_target *dmz = ti->private;
>>   	struct request_queue *q;
>> -	struct dmz_dev *dev;
>> -	sector_t aligned_capacity;
>>   	int ret;
>>   
>>   	/* Get the target device */
>> -	ret = dm_get_device(ti, path, dm_table_get_mode(ti->table), &dmz->ddev);
>> +	ret = dm_get_device(ti, path, dm_table_get_mode(ti->table),
>> +			    &dmz->ddev[num]);
>>   	if (ret) {
>>   		ti->error = "Get target device failed";
>> -		dmz->ddev = NULL;
>> +		dmz->ddev[num] = NULL;
>>   		return ret;
>>   	}
>>   
>> -	dev = kzalloc(sizeof(struct dmz_dev), GFP_KERNEL);
>> -	if (!dev) {
>> -		ret = -ENOMEM;
>> -		goto err;
>> -	}
>> -
>> -	dev->bdev = dmz->ddev->bdev;
>> +	dev->bdev = dmz->ddev[num]->bdev;
>>   	(void)bdevname(dev->bdev, dev->name);
>>   
>> -	if (bdev_zoned_model(dev->bdev) == BLK_ZONED_NONE) {
>> -		ti->error = "Not a zoned block device";
>> -		ret = -EINVAL;
>> -		goto err;
>> -	}
>> +	if (bdev_zoned_model(dev->bdev) == BLK_ZONED_NONE)
>> +		dev->flags = DMZ_BDEV_REGULAR;
>>   
>>   	q = bdev_get_queue(dev->bdev);
>>   	dev->capacity = i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT;
>> -	aligned_capacity = dev->capacity &
>> -				~((sector_t)blk_queue_zone_sectors(q) - 1);
>> -	if (ti->begin ||
>> -	    ((ti->len != dev->capacity) && (ti->len != aligned_capacity))) {
>> -		ti->error = "Partial mapping not supported";
>> -		ret = -EINVAL;
>> -		goto err;
>> +	if (ti->begin) {
>> +		ti->error = "Partial mapping is not supported";
>> +		dm_put_device(ti, dmz->ddev[num]);
>> +		dmz->ddev[num] = NULL;
>> +		return -EINVAL;
>>   	}
>>   
>> -	dev->zone_nr_sectors = blk_queue_zone_sectors(q);
>> -
>> -	dev->nr_zones = blkdev_nr_zones(dev->bdev->bd_disk);
>> -
>> -	dmz->dev = dev;
>> -
>> +	if (num == 1) {
>> +		dev->zone_nr_sectors = dmz->dev[0].zone_nr_sectors;
>> +		dev->nr_zones = dev->capacity / dev->zone_nr_sectors;
>> +		if (dev->capacity % dev->nr_zones)
>> +			dev->nr_zones++;
> 
> dev->nr_zones =
> 	(dev->capacity + dev->zone_nr_sectors - 1) / dev->zone_nr_sectors;
> 
> or use DIV_ROUND_UP() ?
> 
OK.

> And may be add a comment to remind (again) that znd->dev[1] is the normal disk
> so we cannot use blk_queue_zone_sectors() and blkdev_nr_zones().
> 
Or, better still, check if it's a regular device and drop the magic 'num 
== 1' comparison.

Thanks for the review, will be sending a v4.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke            Teamlead Storage & Networking
hare at suse.de                               +49 911 74053 688
SUSE Software Solutions GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), Geschäftsführer: Felix Imendörffer





More information about the dm-devel mailing list