[dm-devel] [PATCH 2/2] dm-zoned: split off random and cache zones

Damien Le Moal Damien.LeMoal at wdc.com
Wed May 13 12:44:42 UTC 2020


On 2020/05/13 16:07, Hannes Reinecke wrote:
> Instead of emulating zones on the regular disk as random zones
> this patch adds a new 'cache' zone type.
> This allows us to use the random zones on the zoned disk as
> data zones (if cache zones are present), and improves performance
> as the zones on the (slower) zoned disk are then never used
> for caching.
> 
> Signed-off-by: Hannes Reinecke <hare at suse.de>
> ---
>  .../admin-guide/device-mapper/dm-zoned.rst    |  17 +-
>  drivers/md/dm-zoned-metadata.c                | 145 ++++++++++++++----
>  drivers/md/dm-zoned-reclaim.c                 |  70 +++++----
>  drivers/md/dm-zoned-target.c                  |  19 ++-
>  drivers/md/dm-zoned.h                         |   7 +-
>  5 files changed, 181 insertions(+), 77 deletions(-)
> 
> diff --git a/Documentation/admin-guide/device-mapper/dm-zoned.rst b/Documentation/admin-guide/device-mapper/dm-zoned.rst
> index 553752ea2521..d4933638737a 100644
> --- a/Documentation/admin-guide/device-mapper/dm-zoned.rst
> +++ b/Documentation/admin-guide/device-mapper/dm-zoned.rst
> @@ -174,17 +174,18 @@ Ex::
>  
>  will return a line
>  
> -	0 <size> zoned <nr_zones> zones <nr_unmap_rnd>/<nr_rnd> random <nr_unmap_seq>/<nr_seq> sequential
> +	0 <size> zoned <nr_zones> zones <nr_unmap/nr_total> cache <nr_unmap>/<nr_total> random <nr_unmap>/<nr_total> sequential
>  
> -where <nr_zones> is the total number of zones, <nr_unmap_rnd> is the number
> -of unmapped (ie free) random zones, <nr_rnd> the total number of zones,
> -<nr_unmap_seq> the number of unmapped sequential zones, and <nr_seq> the
> -total number of sequential zones.
> +where <nr_zones> is the total number of zones, followed by statistics for
> +the zone types (cache, random, and sequential), where <nr_unmap>/<nr_total>
> +is the number of unmapped (ie free) vs the overall number of zones.
> +'cache' zones are located on the regular disk, 'random' and 'sequential'
> +on the zoned disk.
>  
>  Normally the reclaim process will be started once there are less than 50
> -percent free random zones. In order to start the reclaim process manually
> -even before reaching this threshold the 'dmsetup message' function can be
> -used:
> +percent free cache or random zones. In order to start the reclaim process
> +manually even before reaching this threshold the 'dmsetup message' function
> +can be used:
>  
>  Ex::
>  
> diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c
> index 9b93d7ff1dfc..dbcbcb0ddf56 100644
> --- a/drivers/md/dm-zoned-metadata.c
> +++ b/drivers/md/dm-zoned-metadata.c
> @@ -166,6 +166,7 @@ struct dmz_metadata {
>  	unsigned int		nr_meta_blocks;
>  	unsigned int		nr_meta_zones;
>  	unsigned int		nr_data_zones;
> +	unsigned int		nr_cache_zones;
>  	unsigned int		nr_rnd_zones;
>  	unsigned int		nr_reserved_seq;
>  	unsigned int		nr_chunks;
> @@ -196,6 +197,11 @@ struct dmz_metadata {
>  	struct list_head	unmap_rnd_list;
>  	struct list_head	map_rnd_list;
>  
> +	unsigned int		nr_cache;
> +	atomic_t		unmap_nr_cache;
> +	struct list_head	unmap_cache_list;
> +	struct list_head	map_cache_list;
> +
>  	unsigned int		nr_seq;
>  	atomic_t		unmap_nr_seq;
>  	struct list_head	unmap_seq_list;
> @@ -301,6 +307,16 @@ unsigned int dmz_nr_unmap_rnd_zones(struct dmz_metadata *zmd)
>  	return atomic_read(&zmd->unmap_nr_rnd);
>  }
>  
> +unsigned int dmz_nr_cache_zones(struct dmz_metadata *zmd)
> +{
> +	return zmd->nr_cache;
> +}
> +
> +unsigned int dmz_nr_unmap_cache_zones(struct dmz_metadata *zmd)
> +{
> +	return atomic_read(&zmd->unmap_nr_cache);
> +}
> +
>  unsigned int dmz_nr_seq_zones(struct dmz_metadata *zmd)
>  {
>  	return zmd->nr_seq;
> @@ -1390,9 +1406,9 @@ static void dmz_emulate_zones(struct dmz_metadata *zmd, struct dmz_dev *dev)
>  		atomic_set(&zone->refcount, 0);
>  		zone->id = idx;
>  		zone->chunk = DMZ_MAP_UNMAPPED;
> -		set_bit(DMZ_RND, &zone->flags);
> +		set_bit(DMZ_CACHE, &zone->flags);
>  		zone->wp_block = 0;
> -		zmd->nr_rnd_zones++;
> +		zmd->nr_cache_zones++;
>  		zmd->nr_useable_zones++;
>  		if (dev->capacity - zone_offset < zmd->zone_nr_sectors) {
>  			/* Disable runt zone */
> @@ -1651,7 +1667,9 @@ static int dmz_load_mapping(struct dmz_metadata *zmd)
>  		dzone->chunk = chunk;
>  		dmz_get_zone_weight(zmd, dzone);
>  
> -		if (dmz_is_rnd(dzone))
> +		if (dmz_is_cache(dzone))
> +			list_add_tail(&dzone->link, &zmd->map_cache_list);
> +		else if (dmz_is_rnd(dzone))
>  			list_add_tail(&dzone->link, &zmd->map_rnd_list);
>  		else
>  			list_add_tail(&dzone->link, &zmd->map_seq_list);
> @@ -1668,7 +1686,7 @@ static int dmz_load_mapping(struct dmz_metadata *zmd)
>  		}
>  
>  		bzone = dmz_get(zmd, bzone_id);
> -		if (!dmz_is_rnd(bzone)) {
> +		if (!dmz_is_rnd(bzone) && !dmz_is_cache(bzone)) {
>  			dmz_zmd_err(zmd, "Chunk %u mapping: invalid buffer zone %u",
>  				    chunk, bzone_id);
>  			return -EIO;
> @@ -1680,7 +1698,10 @@ static int dmz_load_mapping(struct dmz_metadata *zmd)
>  		bzone->bzone = dzone;
>  		dzone->bzone = bzone;
>  		dmz_get_zone_weight(zmd, bzone);
> -		list_add_tail(&bzone->link, &zmd->map_rnd_list);
> +		if (dmz_is_cache(bzone))
> +			list_add_tail(&bzone->link, &zmd->map_cache_list);
> +		else
> +			list_add_tail(&bzone->link, &zmd->map_rnd_list);
>  next:
>  		chunk++;
>  		e++;
> @@ -1697,8 +1718,12 @@ static int dmz_load_mapping(struct dmz_metadata *zmd)
>  		dzone = dmz_get(zmd, i);
>  		if (dmz_is_meta(dzone))
>  			continue;
> +		if (dmz_is_offline(dzone))
> +			continue;
>  
> -		if (dmz_is_rnd(dzone))
> +		if (dmz_is_cache(dzone))
> +			zmd->nr_cache++;
> +		else if (dmz_is_rnd(dzone))
>  			zmd->nr_rnd++;
>  		else
>  			zmd->nr_seq++;
> @@ -1711,7 +1736,10 @@ static int dmz_load_mapping(struct dmz_metadata *zmd)
>  		/* Unmapped data zone */
>  		set_bit(DMZ_DATA, &dzone->flags);
>  		dzone->chunk = DMZ_MAP_UNMAPPED;
> -		if (dmz_is_rnd(dzone)) {
> +		if (dmz_is_cache(dzone)) {
> +			list_add_tail(&dzone->link, &zmd->unmap_cache_list);
> +			atomic_inc(&zmd->unmap_nr_cache);
> +		} else if (dmz_is_rnd(dzone)) {
>  			list_add_tail(&dzone->link, &zmd->unmap_rnd_list);
>  			atomic_inc(&zmd->unmap_nr_rnd);
>  		} else if (atomic_read(&zmd->nr_reserved_seq_zones) < zmd->nr_reserved_seq) {
> @@ -1755,6 +1783,9 @@ static void __dmz_lru_zone(struct dmz_metadata *zmd, struct dm_zone *zone)
>  	if (dmz_is_seq(zone)) {
>  		/* LRU rotate sequential zone */
>  		list_add_tail(&zone->link, &zmd->map_seq_list);
> +	} else if (dmz_is_cache(zone)) {
> +		/* LRU rotate cache zone */
> +		list_add_tail(&zone->link, &zmd->map_cache_list);
>  	} else {
>  		/* LRU rotate random zone */
>  		list_add_tail(&zone->link, &zmd->map_rnd_list);
> @@ -1830,17 +1861,19 @@ static void dmz_wait_for_reclaim(struct dmz_metadata *zmd, struct dm_zone *zone)
>  }
>  
>  /*
> - * Select a random write zone for reclaim.
> + * Select a cache or random write zone for reclaim.
>   */
>  static struct dm_zone *dmz_get_rnd_zone_for_reclaim(struct dmz_metadata *zmd)
>  {
>  	struct dm_zone *dzone = NULL;
>  	struct dm_zone *zone;
> +	struct list_head *zone_list = &zmd->map_rnd_list;
>  
> -	if (list_empty(&zmd->map_rnd_list))
> -		return ERR_PTR(-EBUSY);
> +	/* If we have cache zones select from the cache zone list */
> +	if (zmd->nr_cache)
> +		zone_list = &zmd->map_cache_list;
>  
> -	list_for_each_entry(zone, &zmd->map_rnd_list, link) {
> +	list_for_each_entry(zone, zone_list, link) {
>  		if (dmz_is_buf(zone))
>  			dzone = zone->bzone;
>  		else
> @@ -1853,15 +1886,21 @@ static struct dm_zone *dmz_get_rnd_zone_for_reclaim(struct dmz_metadata *zmd)
>  }
>  
>  /*
> - * Select a buffered sequential zone for reclaim.
> + * Select a buffered random write or sequential zone for reclaim.

Random write zoned should never be "buffered", or to be very precise, they will
be only during the time reclaim moves a cache zone data to a random zone. That
is visible in the dmz_handle_write() change that execute
dmz_handle_direct_write() for cache or buffered zones instead of using
dmz_handle_buffered_write(). So I think this comment can stay as is.

>   */
>  static struct dm_zone *dmz_get_seq_zone_for_reclaim(struct dmz_metadata *zmd)
>  {
>  	struct dm_zone *zone;
>  
> -	if (list_empty(&zmd->map_seq_list))
> -		return ERR_PTR(-EBUSY);
> -
> +	if (zmd->nr_cache) {
> +		/* If we have cache zones start with random zones */
> +		list_for_each_entry(zone, &zmd->map_rnd_list, link) {
> +			if (!zone->bzone)
> +				continue;
> +			if (dmz_lock_zone_reclaim(zone))
> +				return zone;
> +		}
> +	}

For the reason stated above, I think this change is not necessary either.

>  	list_for_each_entry(zone, &zmd->map_seq_list, link) {
>  		if (!zone->bzone)
>  			continue;
> @@ -1911,6 +1950,7 @@ struct dm_zone *dmz_get_chunk_mapping(struct dmz_metadata *zmd, unsigned int chu
>  	unsigned int dzone_id;
>  	struct dm_zone *dzone = NULL;
>  	int ret = 0;
> +	int alloc_flags = zmd->nr_cache ? DMZ_ALLOC_CACHE : DMZ_ALLOC_RND;
>  
>  	dmz_lock_map(zmd);
>  again:
> @@ -1925,7 +1965,7 @@ struct dm_zone *dmz_get_chunk_mapping(struct dmz_metadata *zmd, unsigned int chu
>  			goto out;
>  
>  		/* Allocate a random zone */
> -		dzone = dmz_alloc_zone(zmd, DMZ_ALLOC_RND);
> +		dzone = dmz_alloc_zone(zmd, alloc_flags);
>  		if (!dzone) {
>  			if (dmz_dev_is_dying(zmd)) {
>  				dzone = ERR_PTR(-EIO);
> @@ -2018,6 +2058,7 @@ struct dm_zone *dmz_get_chunk_buffer(struct dmz_metadata *zmd,
>  				     struct dm_zone *dzone)
>  {
>  	struct dm_zone *bzone;
> +	int alloc_flags = zmd->nr_cache ? DMZ_ALLOC_CACHE : DMZ_ALLOC_RND;
>  
>  	dmz_lock_map(zmd);
>  again:
> @@ -2026,7 +2067,7 @@ struct dm_zone *dmz_get_chunk_buffer(struct dmz_metadata *zmd,
>  		goto out;
>  
>  	/* Allocate a random zone */
> -	bzone = dmz_alloc_zone(zmd, DMZ_ALLOC_RND);
> +	bzone = dmz_alloc_zone(zmd, alloc_flags);
>  	if (!bzone) {
>  		if (dmz_dev_is_dying(zmd)) {
>  			bzone = ERR_PTR(-EIO);
> @@ -2043,7 +2084,10 @@ struct dm_zone *dmz_get_chunk_buffer(struct dmz_metadata *zmd,
>  	bzone->chunk = dzone->chunk;
>  	bzone->bzone = dzone;
>  	dzone->bzone = bzone;
> -	list_add_tail(&bzone->link, &zmd->map_rnd_list);
> +	if (alloc_flags == DMZ_ALLOC_CACHE)
> +		list_add_tail(&bzone->link, &zmd->map_cache_list);
> +	else
> +		list_add_tail(&bzone->link, &zmd->map_rnd_list);
>  out:
>  	dmz_unlock_map(zmd);
>  
> @@ -2059,31 +2103,53 @@ struct dm_zone *dmz_alloc_zone(struct dmz_metadata *zmd, unsigned long flags)
>  	struct list_head *list;
>  	struct dm_zone *zone;
>  
> -	if (flags & DMZ_ALLOC_RND)
> +	switch (flags) {
> +	case DMZ_ALLOC_CACHE:
> +		list = &zmd->unmap_cache_list;
> +		break;
> +	case DMZ_ALLOC_RND:
>  		list = &zmd->unmap_rnd_list;
> -	else
> -		list = &zmd->unmap_seq_list;
> +		break;
> +	default:
> +		if (zmd->nr_cache)> +			list = &zmd->unmap_rnd_list;
> +		else
> +			list = &zmd->unmap_seq_list;
> +		break;
> +	}
>  again:
>  	if (list_empty(list)) {
>  		/*
> -		 * No free zone: if this is for reclaim, allow using the
> -		 * reserved sequential zones.
> +		 * No free zone: return NULL if this is for not reclaim.

s/for not reclaim/not for reclaim

>  		 */
> -		if (!(flags & DMZ_ALLOC_RECLAIM) ||
> -		    list_empty(&zmd->reserved_seq_zones_list))
> +		if (!(flags & DMZ_ALLOC_RECLAIM))
>  			return NULL;
> -
> -		zone = list_first_entry(&zmd->reserved_seq_zones_list,
> -					struct dm_zone, link);
> -		list_del_init(&zone->link);
> -		atomic_dec(&zmd->nr_reserved_seq_zones);
> +		/*
> +		 * Use sequential write zones if we started off with random
> +		 * zones and the list is empty
> +		 */
> +		if (list == &zmd->unmap_rnd_list) {
> +			list = &zmd->unmap_seq_list;
> +			goto again;
> +		}
> +		/*
> +		 * Fallback to the reserved sequential zones
> +		 */
> +		zone = list_first_entry_or_null(&zmd->reserved_seq_zones_list,
> +						struct dm_zone, link);
> +		if (zone) {
> +			list_del_init(&zone->link);
> +			atomic_dec(&zmd->nr_reserved_seq_zones);
> +		}
>  		return zone;
>  	}
>  
>  	zone = list_first_entry(list, struct dm_zone, link);
>  	list_del_init(&zone->link);
>  
> -	if (dmz_is_rnd(zone))
> +	if (dmz_is_cache(zone))
> +		atomic_dec(&zmd->unmap_nr_cache);
> +	else if (dmz_is_rnd(zone))
>  		atomic_dec(&zmd->unmap_nr_rnd);
>  	else
>  		atomic_dec(&zmd->unmap_nr_seq);
> @@ -2114,7 +2180,10 @@ void dmz_free_zone(struct dmz_metadata *zmd, struct dm_zone *zone)
>  		dmz_reset_zone(zmd, zone);
>  
>  	/* Return the zone to its type unmap list */
> -	if (dmz_is_rnd(zone)) {
> +	if (dmz_is_cache(zone)) {
> +		list_add_tail(&zone->link, &zmd->unmap_cache_list);
> +		atomic_inc(&zmd->unmap_nr_cache);
> +	} else if (dmz_is_rnd(zone)) {
>  		list_add_tail(&zone->link, &zmd->unmap_rnd_list);
>  		atomic_inc(&zmd->unmap_nr_rnd);
>  	} else if (atomic_read(&zmd->nr_reserved_seq_zones) <
> @@ -2140,7 +2209,9 @@ void dmz_map_zone(struct dmz_metadata *zmd, struct dm_zone *dzone,
>  	dmz_set_chunk_mapping(zmd, chunk, dzone->id,
>  			      DMZ_MAP_UNMAPPED);
>  	dzone->chunk = chunk;
> -	if (dmz_is_rnd(dzone))
> +	if (dmz_is_cache(dzone))
> +		list_add_tail(&dzone->link, &zmd->map_cache_list);
> +	else if (dmz_is_rnd(dzone))
>  		list_add_tail(&dzone->link, &zmd->map_rnd_list);
>  	else
>  		list_add_tail(&dzone->link, &zmd->map_seq_list);
> @@ -2714,6 +2785,10 @@ int dmz_ctr_metadata(struct dmz_dev *dev, int num_dev,
>  	INIT_LIST_HEAD(&zmd->unmap_rnd_list);
>  	INIT_LIST_HEAD(&zmd->map_rnd_list);
>  
> +	atomic_set(&zmd->unmap_nr_cache, 0);
> +	INIT_LIST_HEAD(&zmd->unmap_cache_list);
> +	INIT_LIST_HEAD(&zmd->map_cache_list);
> +
>  	atomic_set(&zmd->unmap_nr_seq, 0);
>  	INIT_LIST_HEAD(&zmd->unmap_seq_list);
>  	INIT_LIST_HEAD(&zmd->map_seq_list);
> @@ -2736,7 +2811,7 @@ int dmz_ctr_metadata(struct dmz_dev *dev, int num_dev,
>  	/* Set metadata zones starting from sb_zone */
>  	for (i = 0; i < zmd->nr_meta_zones << 1; i++) {
>  		zone = dmz_get(zmd, zmd->sb[0].zone->id + i);
> -		if (!dmz_is_rnd(zone)) {
> +		if (!dmz_is_rnd(zone) && !dmz_is_cache(zone)) {
>  			dmz_zmd_err(zmd,
>  				    "metadata zone %d is not random", i);
>  			ret = -ENXIO;
> @@ -2788,6 +2863,8 @@ int dmz_ctr_metadata(struct dmz_dev *dev, int num_dev,
>  		      zmd->nr_meta_zones * 2);
>  	dmz_zmd_debug(zmd, "  %u data zones for %u chunks",
>  		      zmd->nr_data_zones, zmd->nr_chunks);
> +	dmz_zmd_debug(zmd, "    %u cache zones (%u unmapped)",
> +		      zmd->nr_cache, atomic_read(&zmd->unmap_nr_cache));
>  	dmz_zmd_debug(zmd, "    %u random zones (%u unmapped)",
>  		      zmd->nr_rnd, atomic_read(&zmd->unmap_nr_rnd));
>  	dmz_zmd_debug(zmd, "    %u sequential zones (%u unmapped)",
> diff --git a/drivers/md/dm-zoned-reclaim.c b/drivers/md/dm-zoned-reclaim.c
> index 39ea0d5d4706..6004cf71a000 100644
> --- a/drivers/md/dm-zoned-reclaim.c
> +++ b/drivers/md/dm-zoned-reclaim.c
> @@ -43,13 +43,13 @@ enum {
>   * Percentage of unmapped (free) random zones below which reclaim starts
>   * even if the target is busy.
>   */
> -#define DMZ_RECLAIM_LOW_UNMAP_RND	30
> +#define DMZ_RECLAIM_LOW_UNMAP_ZONES	30
>  
>  /*
>   * Percentage of unmapped (free) random zones above which reclaim will
>   * stop if the target is busy.
>   */
> -#define DMZ_RECLAIM_HIGH_UNMAP_RND	50
> +#define DMZ_RECLAIM_HIGH_UNMAP_ZONES	50
>  
>  /*
>   * Align a sequential zone write pointer to chunk_block.
> @@ -289,9 +289,11 @@ static int dmz_reclaim_rnd_data(struct dmz_reclaim *zrc, struct dm_zone *dzone)
>  	if (!szone)
>  		return -ENOSPC;
>  
> -	DMDEBUG("(%s): Chunk %u, move rnd zone %u (weight %u) to seq zone %u",
> -		dmz_metadata_label(zmd),
> -		chunk, dzone->id, dmz_weight(dzone), szone->id);
> +	DMDEBUG("(%s): Chunk %u, move %s zone %u (weight %u) to %s zone %u",
> +		dmz_metadata_label(zmd), chunk,
> +		dmz_is_cache(dzone) ? "cache" : "rnd",
> +		dzone->id, dmz_weight(dzone),
> +		dmz_is_rnd(szone) ? "rnd" : "seq", szone->id);
>  
>  	/* Flush the random data zone into the sequential zone */
>  	ret = dmz_reclaim_copy(zrc, dzone, szone);
> @@ -358,7 +360,7 @@ static int dmz_do_reclaim(struct dmz_reclaim *zrc)
>  
>  	start = jiffies;
>  	dev = dmz_zone_to_dev(zmd, dzone);
> -	if (dmz_is_rnd(dzone)) {
> +	if (dmz_is_cache(dzone) || dmz_is_rnd(dzone)) {
>  		if (!dmz_weight(dzone)) {
>  			/* Empty zone */
>  			dmz_reclaim_empty(zrc, dzone);
> @@ -424,29 +426,41 @@ static inline int dmz_target_idle(struct dmz_reclaim *zrc)
>  	return time_is_before_jiffies(zrc->atime + DMZ_IDLE_PERIOD);
>  }
>  
> -/*
> - * Test if reclaim is necessary.
> - */
> -static bool dmz_should_reclaim(struct dmz_reclaim *zrc)
> +static unsigned int dmz_reclaim_percentage(struct dmz_reclaim *zrc)
>  {
>  	struct dmz_metadata *zmd = zrc->metadata;
> +	unsigned int nr_cache = dmz_nr_cache_zones(zmd);
>  	unsigned int nr_rnd = dmz_nr_rnd_zones(zmd);
> -	unsigned int nr_unmap_rnd = dmz_nr_unmap_rnd_zones(zmd);
> -	unsigned int p_unmap_rnd = nr_unmap_rnd * 100 / nr_rnd;
> +	unsigned int nr_unmap, nr_zones;
>  
> +	if (nr_cache) {
> +		nr_zones = nr_cache;
> +		nr_unmap = dmz_nr_unmap_cache_zones(zmd);
> +	} else {
> +		nr_zones = nr_rnd;
> +		nr_unmap = dmz_nr_unmap_rnd_zones(zmd);
> +	}
> +	return nr_unmap * 100 / nr_zones;
> +}
> +
> +/*
> + * Test if reclaim is necessary.
> + */
> +static bool dmz_should_reclaim(struct dmz_reclaim *zrc, unsigned int p_unmap)
> +{
>  	/* Reclaim when idle */
> -	if (dmz_target_idle(zrc) && nr_unmap_rnd < nr_rnd)
> +	if (dmz_target_idle(zrc) && p_unmap < 100)
>  		return true;
>  
> -	/* If there are still plenty of random zones, do not reclaim */
> -	if (p_unmap_rnd >= DMZ_RECLAIM_HIGH_UNMAP_RND)
> +	/* If there are still plenty of cache zones, do not reclaim */
> +	if (p_unmap >= DMZ_RECLAIM_HIGH_UNMAP_ZONES)
>  		return false;
>  
>  	/*
> -	 * If the percentage of unmapped random zones is low,
> +	 * If the percentage of unmapped cache zones is low,
>  	 * reclaim even if the target is busy.
>  	 */
> -	return p_unmap_rnd <= DMZ_RECLAIM_LOW_UNMAP_RND;
> +	return p_unmap <= DMZ_RECLAIM_LOW_UNMAP_ZONES;
>  }
>  
>  /*
> @@ -456,14 +470,14 @@ static void dmz_reclaim_work(struct work_struct *work)
>  {
>  	struct dmz_reclaim *zrc = container_of(work, struct dmz_reclaim, work.work);
>  	struct dmz_metadata *zmd = zrc->metadata;
> -	unsigned int nr_rnd, nr_unmap_rnd;
> -	unsigned int p_unmap_rnd;
> +	unsigned int p_unmap;
>  	int ret;
>  
>  	if (dmz_dev_is_dying(zmd))
>  		return;
>  
> -	if (!dmz_should_reclaim(zrc)) {
> +	p_unmap = dmz_reclaim_percentage(zrc);
> +	if (!dmz_should_reclaim(zrc, p_unmap)) {
>  		mod_delayed_work(zrc->wq, &zrc->work, DMZ_IDLE_PERIOD);
>  		return;
>  	}
> @@ -474,22 +488,20 @@ static void dmz_reclaim_work(struct work_struct *work)
>  	 * and slower if there are still some free random zones to avoid
>  	 * as much as possible to negatively impact the user workload.
>  	 */
> -	nr_rnd = dmz_nr_rnd_zones(zmd);
> -	nr_unmap_rnd = dmz_nr_unmap_rnd_zones(zmd);
> -	p_unmap_rnd = nr_unmap_rnd * 100 / nr_rnd;
> -	if (dmz_target_idle(zrc) || p_unmap_rnd < DMZ_RECLAIM_LOW_UNMAP_RND / 2) {
> +	if (dmz_target_idle(zrc) || p_unmap < DMZ_RECLAIM_LOW_UNMAP_ZONES / 2) {
>  		/* Idle or very low percentage: go fast */
>  		zrc->kc_throttle.throttle = 100;
>  	} else {
>  		/* Busy but we still have some random zone: throttle */
> -		zrc->kc_throttle.throttle = min(75U, 100U - p_unmap_rnd / 2);
> +		zrc->kc_throttle.throttle = min(75U, 100U - p_unmap / 2);
>  	}
>  
> -	DMDEBUG("(%s): Reclaim (%u): %s, %u%% free rnd zones (%u/%u)",
> +	DMDEBUG("(%s): Reclaim (%u): %s, %u%% free cache zones (%u/%u)",
>  		dmz_metadata_label(zmd),
>  		zrc->kc_throttle.throttle,
>  		(dmz_target_idle(zrc) ? "Idle" : "Busy"),
> -		p_unmap_rnd, nr_unmap_rnd, nr_rnd);
> +		p_unmap, dmz_nr_unmap_cache_zones(zmd),
> +		dmz_nr_cache_zones(zmd));
>  
>  	ret = dmz_do_reclaim(zrc);
>  	if (ret) {
> @@ -587,7 +599,9 @@ void dmz_reclaim_bio_acc(struct dmz_reclaim *zrc)
>   */
>  void dmz_schedule_reclaim(struct dmz_reclaim *zrc)
>  {
> -	if (dmz_should_reclaim(zrc))
> +	unsigned int p_unmap = dmz_reclaim_percentage(zrc);
> +
> +	if (dmz_should_reclaim(zrc, p_unmap))
>  		mod_delayed_work(zrc->wq, &zrc->work, 0);
>  }
>  
> diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c
> index ea43f6892ced..8999de07cddb 100644
> --- a/drivers/md/dm-zoned-target.c
> +++ b/drivers/md/dm-zoned-target.c
> @@ -190,7 +190,8 @@ static int dmz_handle_read(struct dmz_target *dmz, struct dm_zone *zone,
>  	DMDEBUG("(%s): READ chunk %llu -> %s zone %u, block %llu, %u blocks",
>  		dmz_metadata_label(zmd),
>  		(unsigned long long)dmz_bio_chunk(zmd, bio),
> -		(dmz_is_rnd(zone) ? "RND" : "SEQ"),
> +		(dmz_is_rnd(zone) ? "RND" :
> +		 (dmz_is_cache(zone) ? "CACHE" : "SEQ")),
>  		zone->id,
>  		(unsigned long long)chunk_block, nr_blocks);
>  
> @@ -198,7 +199,8 @@ static int dmz_handle_read(struct dmz_target *dmz, struct dm_zone *zone,
>  	bzone = zone->bzone;
>  	while (chunk_block < end_block) {
>  		nr_blocks = 0;
> -		if (dmz_is_rnd(zone) || chunk_block < zone->wp_block) {
> +		if (dmz_is_rnd(zone) || dmz_is_cache(zone) ||
> +		    chunk_block < zone->wp_block) {
>  			/* Test block validity in the data zone */
>  			ret = dmz_block_valid(zmd, zone, chunk_block);
>  			if (ret < 0)
> @@ -331,11 +333,13 @@ static int dmz_handle_write(struct dmz_target *dmz, struct dm_zone *zone,
>  	DMDEBUG("(%s): WRITE chunk %llu -> %s zone %u, block %llu, %u blocks",
>  		dmz_metadata_label(zmd),
>  		(unsigned long long)dmz_bio_chunk(zmd, bio),
> -		(dmz_is_rnd(zone) ? "RND" : "SEQ"),
> +		(dmz_is_rnd(zone) ? "RND" :
> +		 (dmz_is_cache(zone) ? "CACHE" : "SEQ")),
>  		zone->id,
>  		(unsigned long long)chunk_block, nr_blocks);
>  
> -	if (dmz_is_rnd(zone) || chunk_block == zone->wp_block) {
> +	if (dmz_is_rnd(zone) || dmz_is_cache(zone) ||
> +	    chunk_block == zone->wp_block) {
>  		/*
>  		 * zone is a random zone or it is a sequential zone
>  		 * and the BIO is aligned to the zone write pointer:
> @@ -381,7 +385,8 @@ static int dmz_handle_discard(struct dmz_target *dmz, struct dm_zone *zone,
>  	 * Invalidate blocks in the data zone and its
>  	 * buffer zone if one is mapped.
>  	 */
> -	if (dmz_is_rnd(zone) || chunk_block < zone->wp_block)
> +	if (dmz_is_rnd(zone) || dmz_is_cache(zone) ||
> +	    chunk_block < zone->wp_block)
>  		ret = dmz_invalidate_blocks(zmd, zone, chunk_block, nr_blocks);
>  	if (ret == 0 && zone->bzone)
>  		ret = dmz_invalidate_blocks(zmd, zone->bzone,
> @@ -1064,8 +1069,10 @@ static void dmz_status(struct dm_target *ti, status_type_t type,
>  
>  	switch (type) {
>  	case STATUSTYPE_INFO:
> -		DMEMIT("%u zones %u/%u random %u/%u sequential",
> +		DMEMIT("%u zones %u/%u cache %u/%u random %u/%u sequential",
>  		       dmz_nr_zones(dmz->metadata),
> +		       dmz_nr_unmap_cache_zones(dmz->metadata),
> +		       dmz_nr_cache_zones(dmz->metadata),
>  		       dmz_nr_unmap_rnd_zones(dmz->metadata),
>  		       dmz_nr_rnd_zones(dmz->metadata),
>  		       dmz_nr_unmap_seq_zones(dmz->metadata),
> diff --git a/drivers/md/dm-zoned.h b/drivers/md/dm-zoned.h
> index 4971a765be55..b1bdfa3c957a 100644
> --- a/drivers/md/dm-zoned.h
> +++ b/drivers/md/dm-zoned.h
> @@ -111,6 +111,7 @@ struct dm_zone {
>   */
>  enum {
>  	/* Zone write type */
> +	DMZ_CACHE,
>  	DMZ_RND,
>  	DMZ_SEQ,
>  
> @@ -131,6 +132,7 @@ enum {
>  /*
>   * Zone data accessors.
>   */
> +#define dmz_is_cache(z)		test_bit(DMZ_CACHE, &(z)->flags)
>  #define dmz_is_rnd(z)		test_bit(DMZ_RND, &(z)->flags)
>  #define dmz_is_seq(z)		test_bit(DMZ_SEQ, &(z)->flags)
>  #define dmz_is_empty(z)		((z)->wp_block == 0)
> @@ -189,7 +191,8 @@ bool dmz_check_dev(struct dmz_metadata *zmd);
>  bool dmz_dev_is_dying(struct dmz_metadata *zmd);
>  
>  #define DMZ_ALLOC_RND		0x01
> -#define DMZ_ALLOC_RECLAIM	0x02
> +#define DMZ_ALLOC_CACHE		0x02
> +#define DMZ_ALLOC_RECLAIM	0x04
>  
>  struct dm_zone *dmz_alloc_zone(struct dmz_metadata *zmd, unsigned long flags);
>  void dmz_free_zone(struct dmz_metadata *zmd, struct dm_zone *zone);
> @@ -198,6 +201,8 @@ void dmz_map_zone(struct dmz_metadata *zmd, struct dm_zone *zone,
>  		  unsigned int chunk);
>  void dmz_unmap_zone(struct dmz_metadata *zmd, struct dm_zone *zone);
>  unsigned int dmz_nr_zones(struct dmz_metadata *zmd);
> +unsigned int dmz_nr_cache_zones(struct dmz_metadata *zmd);
> +unsigned int dmz_nr_unmap_cache_zones(struct dmz_metadata *zmd);
>  unsigned int dmz_nr_rnd_zones(struct dmz_metadata *zmd);
>  unsigned int dmz_nr_unmap_rnd_zones(struct dmz_metadata *zmd);
>  unsigned int dmz_nr_seq_zones(struct dmz_metadata *zmd);
> 

Apart from the nits above, all look good. I am running this right now and it is
running at SMR drive speed ! Awesome ! Will send a plot once the run is over.

Cheers.

-- 
Damien Le Moal
Western Digital Research






More information about the dm-devel mailing list