[dm-devel] [PATCH 10/12] dm-zoned: support arbitrary number of devices

Hannes Reinecke hare at suse.de
Mon May 25 07:52:20 UTC 2020


On 5/25/20 4:36 AM, Damien Le Moal wrote:
> On 2020/05/23 0:39, Hannes Reinecke wrote:
>> Remove the hard-coded limit of two devices and support an unlimited
>> number of additional zoned devices.
>> With that we need to increase the device-mapper version number to
>> 3.0.0 as we've modified the interface.
>>
>> Signed-off-by: Hannes Reinecke <hare at suse.de>
>> ---
>>   drivers/md/dm-zoned-metadata.c |  68 +++++++++++-----------
>>   drivers/md/dm-zoned-reclaim.c  |  28 ++++++---
>>   drivers/md/dm-zoned-target.c   | 129 +++++++++++++++++++++++++----------------
>>   drivers/md/dm-zoned.h          |   9 +--
>>   4 files changed, 139 insertions(+), 95 deletions(-)
>>
>> diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c
>> index 5f44970a6187..87784e7785bc 100644
>> --- a/drivers/md/dm-zoned-metadata.c
>> +++ b/drivers/md/dm-zoned-metadata.c
>> @@ -260,6 +260,11 @@ unsigned int dmz_zone_nr_sectors_shift(struct dmz_metadata *zmd)
>>   	return zmd->zone_nr_sectors_shift;
>>   }
>>   
>> +unsigned int dmz_nr_devs(struct dmz_metadata *zmd)
>> +{
>> +	return zmd->nr_devs;
>> +}
> 
> Is this helper really needed ?
> 

Yes, in dm-zoned-reclaim.c

>> +
>>   unsigned int dmz_nr_zones(struct dmz_metadata *zmd)
>>   {
>>   	return zmd->nr_zones;
>> @@ -270,24 +275,14 @@ unsigned int dmz_nr_chunks(struct dmz_metadata *zmd)
>>   	return zmd->nr_chunks;
>>   }
>>   
>> -unsigned int dmz_nr_rnd_zones(struct dmz_metadata *zmd)
>> +unsigned int dmz_nr_rnd_zones(struct dmz_metadata *zmd, int idx)
>>   {
>> -	unsigned int nr_rnd_zones = 0;
>> -	int i;
>> -
>> -	for (i = 0; i < zmd->nr_devs; i++)
>> -		nr_rnd_zones += zmd->dev[i].nr_rnd;
>> -	return nr_rnd_zones;
>> +	return zmd->dev[idx].nr_rnd;
> 
> AH. OK. So my comment on patch 8 is voided :)
> 
Yeah, the patch arrangement could be improved; I'll see to roll both 
changes into one patch.

>>   }
>>   
>> -unsigned int dmz_nr_unmap_rnd_zones(struct dmz_metadata *zmd)
>> +unsigned int dmz_nr_unmap_rnd_zones(struct dmz_metadata *zmd, int idx)
>>   {
>> -	unsigned int nr_unmap_rnd_zones = 0;
>> -	int i;
>> -
>> -	for (i = 0; i < zmd->nr_devs; i++)
>> -		nr_unmap_rnd_zones += atomic_read(&zmd->dev[i].unmap_nr_rnd);
>> -	return nr_unmap_rnd_zones;
>> +	return atomic_read(&zmd->dev[idx].unmap_nr_rnd);
>>   }
>>   
>>   unsigned int dmz_nr_cache_zones(struct dmz_metadata *zmd)
>> @@ -300,24 +295,14 @@ unsigned int dmz_nr_unmap_cache_zones(struct dmz_metadata *zmd)
>>   	return atomic_read(&zmd->unmap_nr_cache);
>>   }
>>   
>> -unsigned int dmz_nr_seq_zones(struct dmz_metadata *zmd)
>> +unsigned int dmz_nr_seq_zones(struct dmz_metadata *zmd, int idx)
>>   {
>> -	unsigned int nr_seq_zones = 0;
>> -	int i;
>> -
>> -	for (i = 0; i < zmd->nr_devs; i++)
>> -		nr_seq_zones += zmd->dev[i].nr_seq;
>> -	return nr_seq_zones;
>> +	return zmd->dev[idx].nr_seq;
>>   }
>>   
>> -unsigned int dmz_nr_unmap_seq_zones(struct dmz_metadata *zmd)
>> +unsigned int dmz_nr_unmap_seq_zones(struct dmz_metadata *zmd, int idx)
>>   {
>> -	unsigned int nr_unmap_seq_zones = 0;
>> -	int i;
>> -
>> -	for (i = 0; i < zmd->nr_devs; i++)
>> -		nr_unmap_seq_zones += atomic_read(&zmd->dev[i].unmap_nr_seq);
>> -	return nr_unmap_seq_zones;
>> +	return atomic_read(&zmd->dev[idx].unmap_nr_seq);
>>   }
>>   
>>   static struct dm_zone *dmz_get(struct dmz_metadata *zmd, unsigned int zone_id)
>> @@ -1530,7 +1515,20 @@ static int dmz_init_zones(struct dmz_metadata *zmd)
>>   		 */
>>   		zmd->sb[0].zone = dmz_get(zmd, 0);
>>   
>> -		zoned_dev = &zmd->dev[1];
>> +		for (i = 1; i < zmd->nr_devs; i++) {
>> +			zoned_dev = &zmd->dev[i];
>> +
>> +			ret = blkdev_report_zones(zoned_dev->bdev, 0,
>> +						  BLK_ALL_ZONES,
>> +						  dmz_init_zone, zoned_dev);
>> +			if (ret < 0) {
>> +				DMDEBUG("(%s): Failed to report zones, error %d",
>> +					zmd->devname, ret);
>> +				dmz_drop_zones(zmd);
>> +				return ret;
>> +			}
>> +		}
>> +		return 0;
>>   	}
>>   
>>   	/*
>> @@ -2921,10 +2919,14 @@ int dmz_ctr_metadata(struct dmz_dev *dev, int num_dev,
>>   		      zmd->nr_data_zones, zmd->nr_chunks);
>>   	dmz_zmd_debug(zmd, "    %u cache zones (%u unmapped)",
>>   		      zmd->nr_cache, atomic_read(&zmd->unmap_nr_cache));
>> -	dmz_zmd_debug(zmd, "    %u random zones (%u unmapped)",
>> -		      dmz_nr_rnd_zones(zmd), dmz_nr_unmap_rnd_zones(zmd));
>> -	dmz_zmd_debug(zmd, "    %u sequential zones (%u unmapped)",
>> -		      dmz_nr_seq_zones(zmd), dmz_nr_unmap_seq_zones(zmd));
>> +	for (i = 0; i < zmd->nr_devs; i++) {
>> +		dmz_zmd_debug(zmd, "    %u random zones (%u unmapped)",
>> +			      dmz_nr_rnd_zones(zmd, i),
>> +			      dmz_nr_unmap_rnd_zones(zmd, i));
>> +		dmz_zmd_debug(zmd, "    %u sequential zones (%u unmapped)",
>> +			      dmz_nr_seq_zones(zmd, i),
>> +			      dmz_nr_unmap_seq_zones(zmd, i));
>> +	}
>>   	dmz_zmd_debug(zmd, "  %u reserved sequential data zones",
>>   		      zmd->nr_reserved_seq);
>>   	dmz_zmd_debug(zmd, "Format:");
>> diff --git a/drivers/md/dm-zoned-reclaim.c b/drivers/md/dm-zoned-reclaim.c
>> index fba0d48e38a7..f2e053b5f2db 100644
>> --- a/drivers/md/dm-zoned-reclaim.c
>> +++ b/drivers/md/dm-zoned-reclaim.c
>> @@ -442,15 +442,18 @@ static unsigned int dmz_reclaim_percentage(struct dmz_reclaim *zrc)
>>   {
>>   	struct dmz_metadata *zmd = zrc->metadata;
>>   	unsigned int nr_cache = dmz_nr_cache_zones(zmd);
>> -	unsigned int nr_rnd = dmz_nr_rnd_zones(zmd);
>> -	unsigned int nr_unmap, nr_zones;
>> +	unsigned int nr_unmap = 0, nr_zones = 0;
>>   
>>   	if (nr_cache) {
>>   		nr_zones = nr_cache;
>>   		nr_unmap = dmz_nr_unmap_cache_zones(zmd);
>>   	} else {
>> -		nr_zones = nr_rnd;
>> -		nr_unmap = dmz_nr_unmap_rnd_zones(zmd);
>> +		int i;
>> +
>> +		for (i = 0; i < dmz_nr_devs(zmd); i++) {
>> +			nr_zones += dmz_nr_rnd_zones(zmd, i);
> 
> May be not... We could keep constant totals in zmd to avoid this.
> 
>> +			nr_unmap += dmz_nr_unmap_rnd_zones(zmd, i);
>> +		}
>>   	}
>>   	return nr_unmap * 100 / nr_zones;
>>   }
>> @@ -460,7 +463,11 @@ static unsigned int dmz_reclaim_percentage(struct dmz_reclaim *zrc)
>>    */
>>   static bool dmz_should_reclaim(struct dmz_reclaim *zrc, unsigned int p_unmap)
>>   {
>> -	unsigned int nr_reclaim = dmz_nr_rnd_zones(zrc->metadata);
>> +	int i;
>> +	unsigned int nr_reclaim = 0;
>> +
>> +	for (i = 0; i < dmz_nr_devs(zrc->metadata); i++)
>> +		nr_reclaim += dmz_nr_rnd_zones(zrc->metadata, i);
>>   
>>   	if (dmz_nr_cache_zones(zrc->metadata))
>>   		nr_reclaim += dmz_nr_cache_zones(zrc->metadata);
>> @@ -487,8 +494,8 @@ static void dmz_reclaim_work(struct work_struct *work)
>>   {
>>   	struct dmz_reclaim *zrc = container_of(work, struct dmz_reclaim, work.work);
>>   	struct dmz_metadata *zmd = zrc->metadata;
>> -	unsigned int p_unmap;
>> -	int ret;
>> +	unsigned int p_unmap, nr_unmap_rnd = 0, nr_rnd = 0;
>> +	int ret, i;
>>   
>>   	if (dmz_dev_is_dying(zmd))
>>   		return;
>> @@ -513,14 +520,17 @@ static void dmz_reclaim_work(struct work_struct *work)
>>   		zrc->kc_throttle.throttle = min(75U, 100U - p_unmap / 2);
>>   	}
>>   
>> +	for (i = 0; i < dmz_nr_devs(zmd); i++) {
>> +		nr_unmap_rnd += dmz_nr_unmap_rnd_zones(zmd, i);
>> +		nr_rnd += dmz_nr_rnd_zones(zmd, i);
>> +	}
>>   	DMDEBUG("(%s): Reclaim (%u): %s, %u%% free zones (%u/%u cache %u/%u random)",
>>   		dmz_metadata_label(zmd),
>>   		zrc->kc_throttle.throttle,
>>   		(dmz_target_idle(zrc) ? "Idle" : "Busy"),
>>   		p_unmap, dmz_nr_unmap_cache_zones(zmd),
>>   		dmz_nr_cache_zones(zmd),
>> -		dmz_nr_unmap_rnd_zones(zmd),
>> -		dmz_nr_rnd_zones(zmd));
>> +		nr_unmap_rnd, nr_rnd);
>>   
>>   	ret = dmz_do_reclaim(zrc);
>>   	if (ret && ret != -EINTR) {

In the light of this I guess there is a benefit from having the counters
in the metadata; that indeed would save us to having to export the 
number of devices.
I'll give it a go with the next round.

>> diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c
>> index bca9a611b8dd..f34fcc3f7cc6 100644
>> --- a/drivers/md/dm-zoned-target.c
>> +++ b/drivers/md/dm-zoned-target.c
>> @@ -13,8 +13,6 @@
>>   
>>   #define DMZ_MIN_BIOS		8192
>>   
>> -#define DMZ_MAX_DEVS		2
>> -
>>   /*
>>    * Zone BIO context.
>>    */
>> @@ -40,9 +38,10 @@ struct dm_chunk_work {
>>    * Target descriptor.
>>    */
>>   struct dmz_target {
>> -	struct dm_dev		*ddev[DMZ_MAX_DEVS];
>> +	struct dm_dev		**ddev;
>> +	unsigned int		nr_ddevs;
>>   
>> -	unsigned long		flags;
>> +	unsigned int		flags;
>>   
>>   	/* Zoned block device information */
>>   	struct dmz_dev		*dev;
>> @@ -764,7 +763,7 @@ static void dmz_put_zoned_device(struct dm_target *ti)
>>   	struct dmz_target *dmz = ti->private;
>>   	int i;
>>   
>> -	for (i = 0; i < DMZ_MAX_DEVS; i++) {
>> +	for (i = 0; i < dmz->nr_ddevs; i++) {
>>   		if (dmz->ddev[i]) {
>>   			dm_put_device(ti, dmz->ddev[i]);
>>   			dmz->ddev[i] = NULL;
>> @@ -777,21 +776,35 @@ static int dmz_fixup_devices(struct dm_target *ti)
>>   	struct dmz_target *dmz = ti->private;
>>   	struct dmz_dev *reg_dev, *zoned_dev;
>>   	struct request_queue *q;
>> +	sector_t zone_nr_sectors = 0;
>> +	int i;
>>   
>>   	/*
>> -	 * When we have two devices, the first one must be a regular block
>> -	 * device and the second a zoned block device.
>> +	 * When we have more than on devices, the first one must be a
>> +	 * regular block device and the others zoned block devices.
>>   	 */
>> -	if (dmz->ddev[0] && dmz->ddev[1]) {
>> +	if (dmz->nr_ddevs > 1) {
>>   		reg_dev = &dmz->dev[0];
>>   		if (!(reg_dev->flags & DMZ_BDEV_REGULAR)) {
>>   			ti->error = "Primary disk is not a regular device";
>>   			return -EINVAL;
>>   		}
>> -		zoned_dev = &dmz->dev[1];
>> -		if (zoned_dev->flags & DMZ_BDEV_REGULAR) {
>> -			ti->error = "Secondary disk is not a zoned device";
>> -			return -EINVAL;
>> +		for (i = 1; i < dmz->nr_ddevs; i++) {
>> +			zoned_dev = &dmz->dev[i];
>> +			if (zoned_dev->flags & DMZ_BDEV_REGULAR) {
>> +				ti->error = "Secondary disk is not a zoned device";
>> +				return -EINVAL;
>> +			}
>> +			q = bdev_get_queue(zoned_dev->bdev);
> 
> May be add a comment here that we must check that all zoned devices have the
> same zone size ?
> 

I thought it was self-explanatory; but maybe not.
Will be adding it.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke            Teamlead Storage & Networking
hare at suse.de                               +49 911 74053 688
SUSE Software Solutions GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), Geschäftsführer: Felix Imendörffer





More information about the dm-devel mailing list