[dm-devel] [PATCH 2/2] dm-zoned: split off random and cache zones

Hannes Reinecke hare at suse.de
Thu May 14 06:41:28 UTC 2020


On 5/13/20 2:44 PM, Damien Le Moal wrote:
> On 2020/05/13 16:07, Hannes Reinecke wrote:
>> Instead of emulating zones on the regular disk as random zones
>> this patch adds a new 'cache' zone type.
>> This allows us to use the random zones on the zoned disk as
>> data zones (if cache zones are present), and improves performance
>> as the zones on the (slower) zoned disk are then never used
>> for caching.
>>
>> Signed-off-by: Hannes Reinecke <hare at suse.de>
>> ---
[ .. ]
>> @@ -1830,17 +1861,19 @@ static void dmz_wait_for_reclaim(struct dmz_metadata *zmd, struct dm_zone *zone)
>>   }
>>   
>>   /*
>> - * Select a random write zone for reclaim.
>> + * Select a cache or random write zone for reclaim.
>>    */
>>   static struct dm_zone *dmz_get_rnd_zone_for_reclaim(struct dmz_metadata *zmd)
>>   {
>>   	struct dm_zone *dzone = NULL;
>>   	struct dm_zone *zone;
>> +	struct list_head *zone_list = &zmd->map_rnd_list;
>>   
>> -	if (list_empty(&zmd->map_rnd_list))
>> -		return ERR_PTR(-EBUSY);
>> +	/* If we have cache zones select from the cache zone list */
>> +	if (zmd->nr_cache)
>> +		zone_list = &zmd->map_cache_list;
>>   
>> -	list_for_each_entry(zone, &zmd->map_rnd_list, link) {
>> +	list_for_each_entry(zone, zone_list, link) {
>>   		if (dmz_is_buf(zone))
>>   			dzone = zone->bzone;
>>   		else
>> @@ -1853,15 +1886,21 @@ static struct dm_zone *dmz_get_rnd_zone_for_reclaim(struct dmz_metadata *zmd)
>>   }
>>   
>>   /*
>> - * Select a buffered sequential zone for reclaim.
>> + * Select a buffered random write or sequential zone for reclaim.
> 
> Random write zoned should never be "buffered", or to be very precise, they will
> be only during the time reclaim moves a cache zone data to a random zone. That
> is visible in the dmz_handle_write() change that execute
> dmz_handle_direct_write() for cache or buffered zones instead of using
> dmz_handle_buffered_write(). So I think this comment can stay as is.
> 
>>    */
>>   static struct dm_zone *dmz_get_seq_zone_for_reclaim(struct dmz_metadata *zmd)
>>   {
>>   	struct dm_zone *zone;
>>   
>> -	if (list_empty(&zmd->map_seq_list))
>> -		return ERR_PTR(-EBUSY);
>> -
>> +	if (zmd->nr_cache) {
>> +		/* If we have cache zones start with random zones */
>> +		list_for_each_entry(zone, &zmd->map_rnd_list, link) {
>> +			if (!zone->bzone)
>> +				continue;
>> +			if (dmz_lock_zone_reclaim(zone))
>> +				return zone;
>> +		}
>> +	}
> 
> For the reason stated above, I think this change is not necessary either.
> 
Ah. Indeed. The above hunk makes us reclaim the random zones, too, which
strictly speaking isn't necessary.
I'll be dropping it and see how things behave.

>>   	list_for_each_entry(zone, &zmd->map_seq_list, link) {
>>   		if (!zone->bzone)
>>   			continue;
>> @@ -1911,6 +1950,7 @@ struct dm_zone *dmz_get_chunk_mapping(struct dmz_metadata *zmd, unsigned int chu
>>   	unsigned int dzone_id;
>>   	struct dm_zone *dzone = NULL;
>>   	int ret = 0;
>> +	int alloc_flags = zmd->nr_cache ? DMZ_ALLOC_CACHE : DMZ_ALLOC_RND;
>>   
>>   	dmz_lock_map(zmd);
>>   again:
>> @@ -1925,7 +1965,7 @@ struct dm_zone *dmz_get_chunk_mapping(struct dmz_metadata *zmd, unsigned int chu
>>   			goto out;
>>   
>>   		/* Allocate a random zone */
>> -		dzone = dmz_alloc_zone(zmd, DMZ_ALLOC_RND);
>> +		dzone = dmz_alloc_zone(zmd, alloc_flags);
>>   		if (!dzone) {
>>   			if (dmz_dev_is_dying(zmd)) {
>>   				dzone = ERR_PTR(-EIO);
>> @@ -2018,6 +2058,7 @@ struct dm_zone *dmz_get_chunk_buffer(struct dmz_metadata *zmd,
>>   				     struct dm_zone *dzone)
>>   {
>>   	struct dm_zone *bzone;
>> +	int alloc_flags = zmd->nr_cache ? DMZ_ALLOC_CACHE : DMZ_ALLOC_RND;
>>   
>>   	dmz_lock_map(zmd);
>>   again:
>> @@ -2026,7 +2067,7 @@ struct dm_zone *dmz_get_chunk_buffer(struct dmz_metadata *zmd,
>>   		goto out;
>>   
>>   	/* Allocate a random zone */
>> -	bzone = dmz_alloc_zone(zmd, DMZ_ALLOC_RND);
>> +	bzone = dmz_alloc_zone(zmd, alloc_flags);
>>   	if (!bzone) {
>>   		if (dmz_dev_is_dying(zmd)) {
>>   			bzone = ERR_PTR(-EIO);
>> @@ -2043,7 +2084,10 @@ struct dm_zone *dmz_get_chunk_buffer(struct dmz_metadata *zmd,
>>   	bzone->chunk = dzone->chunk;
>>   	bzone->bzone = dzone;
>>   	dzone->bzone = bzone;
>> -	list_add_tail(&bzone->link, &zmd->map_rnd_list);
>> +	if (alloc_flags == DMZ_ALLOC_CACHE)
>> +		list_add_tail(&bzone->link, &zmd->map_cache_list);
>> +	else
>> +		list_add_tail(&bzone->link, &zmd->map_rnd_list);
>>   out:
>>   	dmz_unlock_map(zmd);
>>   
>> @@ -2059,31 +2103,53 @@ struct dm_zone *dmz_alloc_zone(struct dmz_metadata *zmd, unsigned long flags)
>>   	struct list_head *list;
>>   	struct dm_zone *zone;
>>   
>> -	if (flags & DMZ_ALLOC_RND)
>> +	switch (flags) {
>> +	case DMZ_ALLOC_CACHE:
>> +		list = &zmd->unmap_cache_list;
>> +		break;
>> +	case DMZ_ALLOC_RND:
>>   		list = &zmd->unmap_rnd_list;
>> -	else
>> -		list = &zmd->unmap_seq_list;
>> +		break;
>> +	default:
>> +		if (zmd->nr_cache)> +			list = &zmd->unmap_rnd_list;
>> +		else
>> +			list = &zmd->unmap_seq_list;
>> +		break;
>> +	}
>>   again:
>>   	if (list_empty(list)) {
>>   		/*
>> -		 * No free zone: if this is for reclaim, allow using the
>> -		 * reserved sequential zones.
>> +		 * No free zone: return NULL if this is for not reclaim.
> 
> s/for not reclaim/not for reclaim
> 
[ .. ]
> 
> Apart from the nits above, all look good. I am running this right now and it is
> running at SMR drive speed ! Awesome ! Will send a plot once the run is over.
> 
Thanks. I'll be respinning the patch and wil be reposting it.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke            Teamlead Storage & Networking
hare at suse.de                               +49 911 74053 688
SUSE Software Solutions GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), Geschäftsführer: Felix Imendörffer





More information about the dm-devel mailing list