[dm-devel] [PATCH 08/12] dm-zoned: move random and sequential zones into struct dmz_dev
Hannes Reinecke
hare at suse.de
Mon May 25 07:47:25 UTC 2020
On 5/25/20 4:27 AM, Damien Le Moal wrote:
> On 2020/05/23 0:39, Hannes Reinecke wrote:
>> Random and sequential zones should be part of the respective
>> device structure to make arbitration between devices possible.
>>
>> Signed-off-by: Hannes Reinecke <hare at suse.de>
>> ---
>> drivers/md/dm-zoned-metadata.c | 143 +++++++++++++++++++++++++----------------
>> drivers/md/dm-zoned.h | 10 +++
>> 2 files changed, 99 insertions(+), 54 deletions(-)
>>
>> diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c
>> index 1b9da698a812..5f44970a6187 100644
>> --- a/drivers/md/dm-zoned-metadata.c
>> +++ b/drivers/md/dm-zoned-metadata.c
>> @@ -192,21 +192,12 @@ struct dmz_metadata {
>> /* Zone allocation management */
>> struct mutex map_lock;
>> struct dmz_mblock **map_mblk;
>> - unsigned int nr_rnd;
>> - atomic_t unmap_nr_rnd;
>> - struct list_head unmap_rnd_list;
>> - struct list_head map_rnd_list;
>>
>> unsigned int nr_cache;
>> atomic_t unmap_nr_cache;
>> struct list_head unmap_cache_list;
>> struct list_head map_cache_list;
>>
>> - unsigned int nr_seq;
>> - atomic_t unmap_nr_seq;
>> - struct list_head unmap_seq_list;
>> - struct list_head map_seq_list;
>> -
>> atomic_t nr_reserved_seq_zones;
>> struct list_head reserved_seq_zones_list;
>>
>> @@ -281,12 +272,22 @@ unsigned int dmz_nr_chunks(struct dmz_metadata *zmd)
>>
>> unsigned int dmz_nr_rnd_zones(struct dmz_metadata *zmd)
>> {
>> - return zmd->nr_rnd;
>> + unsigned int nr_rnd_zones = 0;
>> + int i;
>> +
>> + for (i = 0; i < zmd->nr_devs; i++)
>> + nr_rnd_zones += zmd->dev[i].nr_rnd;
>
> We could keep the total nr_rnd_zones in dmz_metadata to avoid this one since the
> value will never change at run time.
>
Yeah, we could, but in the end this is only used for logging, so it's
hardly performance critical.
And I have an aversion against having two counters for the same thing;
they inevitably tend to get out of sync.
>> + return nr_rnd_zones;
>> }
>>
>> unsigned int dmz_nr_unmap_rnd_zones(struct dmz_metadata *zmd)
>> {
>> - return atomic_read(&zmd->unmap_nr_rnd);
>> + unsigned int nr_unmap_rnd_zones = 0;
>> + int i;
>> +
>> + for (i = 0; i < zmd->nr_devs; i++)
>> + nr_unmap_rnd_zones += atomic_read(&zmd->dev[i].unmap_nr_rnd);
>> + return nr_unmap_rnd_zones;
>> }
>>
>> unsigned int dmz_nr_cache_zones(struct dmz_metadata *zmd)
>> @@ -301,12 +302,22 @@ unsigned int dmz_nr_unmap_cache_zones(struct dmz_metadata *zmd)
>>
>> unsigned int dmz_nr_seq_zones(struct dmz_metadata *zmd)
>> {
>> - return zmd->nr_seq;
>> + unsigned int nr_seq_zones = 0;
>> + int i;
>> +
>> + for (i = 0; i < zmd->nr_devs; i++)
>> + nr_seq_zones += zmd->dev[i].nr_seq;
>
> Same here. This value does not change at runtime.
>
>> + return nr_seq_zones;
>> }
>>
>> unsigned int dmz_nr_unmap_seq_zones(struct dmz_metadata *zmd)
>> {
>> - return atomic_read(&zmd->unmap_nr_seq);
>> + unsigned int nr_unmap_seq_zones = 0;
>> + int i;
>> +
>> + for (i = 0; i < zmd->nr_devs; i++)
>> + nr_unmap_seq_zones += atomic_read(&zmd->dev[i].unmap_nr_seq);
>> + return nr_unmap_seq_zones;
>> }
>>
>> static struct dm_zone *dmz_get(struct dmz_metadata *zmd, unsigned int zone_id)
>> @@ -1485,6 +1496,14 @@ static int dmz_init_zones(struct dmz_metadata *zmd)
>>
>> dev->metadata = zmd;
>> zmd->nr_zones += dev->nr_zones;
>> +
>> + atomic_set(&dev->unmap_nr_rnd, 0);
>> + INIT_LIST_HEAD(&dev->unmap_rnd_list);
>> + INIT_LIST_HEAD(&dev->map_rnd_list);
>> +
>> + atomic_set(&dev->unmap_nr_seq, 0);
>> + INIT_LIST_HEAD(&dev->unmap_seq_list);
>> + INIT_LIST_HEAD(&dev->map_seq_list);
>> }
>>
>> if (!zmd->nr_zones) {
>> @@ -1702,9 +1721,9 @@ static int dmz_load_mapping(struct dmz_metadata *zmd)
>> if (dmz_is_cache(dzone))
>> list_add_tail(&dzone->link, &zmd->map_cache_list);
>> else if (dmz_is_rnd(dzone))
>> - list_add_tail(&dzone->link, &zmd->map_rnd_list);
>> + list_add_tail(&dzone->link, &dzone->dev->map_rnd_list);
>> else
>> - list_add_tail(&dzone->link, &zmd->map_seq_list);
>> + list_add_tail(&dzone->link, &dzone->dev->map_seq_list);
>>
>> /* Check buffer zone */
>> bzone_id = le32_to_cpu(dmap[e].bzone_id);
>> @@ -1738,7 +1757,7 @@ static int dmz_load_mapping(struct dmz_metadata *zmd)
>> if (dmz_is_cache(bzone))
>> list_add_tail(&bzone->link, &zmd->map_cache_list);
>> else
>> - list_add_tail(&bzone->link, &zmd->map_rnd_list);
>> + list_add_tail(&bzone->link, &bzone->dev->map_rnd_list);
>> next:
>> chunk++;
>> e++;
>> @@ -1763,9 +1782,9 @@ static int dmz_load_mapping(struct dmz_metadata *zmd)
>> if (dmz_is_cache(dzone))
>> zmd->nr_cache++;
>> else if (dmz_is_rnd(dzone))
>> - zmd->nr_rnd++;
>> + dzone->dev->nr_rnd++;
>> else
>> - zmd->nr_seq++;
>> + dzone->dev->nr_seq++;
>>
>> if (dmz_is_data(dzone)) {
>> /* Already initialized */
>> @@ -1779,16 +1798,18 @@ static int dmz_load_mapping(struct dmz_metadata *zmd)
>> list_add_tail(&dzone->link, &zmd->unmap_cache_list);
>> atomic_inc(&zmd->unmap_nr_cache);
>> } else if (dmz_is_rnd(dzone)) {
>> - list_add_tail(&dzone->link, &zmd->unmap_rnd_list);
>> - atomic_inc(&zmd->unmap_nr_rnd);
>> + list_add_tail(&dzone->link,
>> + &dzone->dev->unmap_rnd_list);
>> + atomic_inc(&dzone->dev->unmap_nr_rnd);
>> } else if (atomic_read(&zmd->nr_reserved_seq_zones) < zmd->nr_reserved_seq) {
>> list_add_tail(&dzone->link, &zmd->reserved_seq_zones_list);
>> set_bit(DMZ_RESERVED, &dzone->flags);
>> atomic_inc(&zmd->nr_reserved_seq_zones);
>> - zmd->nr_seq--;
>> + dzone->dev->nr_seq--;
>> } else {
>> - list_add_tail(&dzone->link, &zmd->unmap_seq_list);
>> - atomic_inc(&zmd->unmap_nr_seq);
>> + list_add_tail(&dzone->link,
>> + &dzone->dev->unmap_seq_list);
>> + atomic_inc(&dzone->dev->unmap_nr_seq);
>> }
>> }
>>
>> @@ -1822,13 +1843,13 @@ static void __dmz_lru_zone(struct dmz_metadata *zmd, struct dm_zone *zone)
>> list_del_init(&zone->link);
>> if (dmz_is_seq(zone)) {
>> /* LRU rotate sequential zone */
>> - list_add_tail(&zone->link, &zmd->map_seq_list);
>> + list_add_tail(&zone->link, &zone->dev->map_seq_list);
>> } else if (dmz_is_cache(zone)) {
>> /* LRU rotate cache zone */
>> list_add_tail(&zone->link, &zmd->map_cache_list);
>> } else {
>> /* LRU rotate random zone */
>> - list_add_tail(&zone->link, &zmd->map_rnd_list);
>> + list_add_tail(&zone->link, &zone->dev->map_rnd_list);
>> }
>> }
>>
>> @@ -1910,14 +1931,24 @@ static struct dm_zone *dmz_get_rnd_zone_for_reclaim(struct dmz_metadata *zmd,
>> {
>> struct dm_zone *dzone = NULL;
>> struct dm_zone *zone;
>> - struct list_head *zone_list = &zmd->map_rnd_list;
>> + struct list_head *zone_list;
>>
>> /* If we have cache zones select from the cache zone list */
>> if (zmd->nr_cache) {
>> zone_list = &zmd->map_cache_list;
>> /* Try to relaim random zones, too, when idle */
>> - if (idle && list_empty(zone_list))
>> - zone_list = &zmd->map_rnd_list;
>> + if (idle && list_empty(zone_list)) {
>> + int i;
>> +
>> + for (i = 1; i < zmd->nr_devs; i++) {
>> + zone_list = &zmd->dev[i].map_rnd_list;
>> + if (!list_empty(zone_list))
>> + break;
>> + }
>
> This is going to use the first zoned dev until it has no more random zones, then
> switch to the next zoned dev. What about going round-robin on the devices to
> increase parallelism between the drives ?
>
>
That will happen in a later patch.
This patch just has the basic necessities to get the infrastructure in
place.
>> + }
>> + } else {
>> + /* Otherwise the random zones are on the first disk */
>> + zone_list = &zmd->dev[0].map_rnd_list;
>> }
>>
>> list_for_each_entry(zone, zone_list, link) {
>> @@ -1938,12 +1969,17 @@ static struct dm_zone *dmz_get_rnd_zone_for_reclaim(struct dmz_metadata *zmd,
>> static struct dm_zone *dmz_get_seq_zone_for_reclaim(struct dmz_metadata *zmd)
>> {
>> struct dm_zone *zone;
>> + int i;
>>
>> - list_for_each_entry(zone, &zmd->map_seq_list, link) {
>> - if (!zone->bzone)
>> - continue;
>> - if (dmz_lock_zone_reclaim(zone))
>> - return zone;
>> + for (i = 0; i < zmd->nr_devs; i++) {
>> + struct dmz_dev *dev = &zmd->dev[i];
>> +
>> + list_for_each_entry(zone, &dev->map_seq_list, link) {
>> + if (!zone->bzone)
>> + continue;
>> + if (dmz_lock_zone_reclaim(zone))
>> + return zone;
>> + }
>
> Same comment here.
>
Same response here :-)
Cheers,
Hannes
--
Dr. Hannes Reinecke Teamlead Storage & Networking
hare at suse.de +49 911 74053 688
SUSE Software Solutions GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), Geschäftsführer: Felix Imendörffer
More information about the dm-devel
mailing list