[dm-devel] [PATCH v2 10/11] dm: introduce zone append emulation
Damien Le Moal
Damien.LeMoal at wdc.com
Thu May 20 06:25:16 UTC 2021
On 2021/05/20 15:10, Hannes Reinecke wrote:
[...]
>> +/*
>> + * First phase of BIO mapping for targets with zone append emulation:
>> + * check all BIO that change a zone writer pointer and change zone
>> + * append operations into regular write operations.
>> + */
>> +static bool dm_zone_map_bio_begin(struct mapped_device *md,
>> + struct bio *orig_bio, struct bio *clone)
>> +{
>> + sector_t zone_sectors = blk_queue_zone_sectors(md->queue);
>> + unsigned int zno = bio_zone_no(orig_bio);
>> + unsigned long flags;
>> + bool good_io = false;
>> +
>> + spin_lock_irqsave(&md->zwp_offset_lock, flags);
>> +
>> + /*
>> + * If the target zone is in an error state, recover by inspecting the
>> + * zone to get its current write pointer position. Note that since the
>> + * target zone is already locked, a BIO issuing context should never
>> + * see the zone write in the DM_ZONE_UPDATING_WP_OFST state.
>> + */
>> + if (md->zwp_offset[zno] == DM_ZONE_INVALID_WP_OFST) {
>> + unsigned int wp_offset;
>> + int ret;
>> +
>> + md->zwp_offset[zno] = DM_ZONE_UPDATING_WP_OFST;
>> +
>> + spin_unlock_irqrestore(&md->zwp_offset_lock, flags);
>> + ret = dm_update_zone_wp_offset(md, zno, &wp_offset);
>> + spin_lock_irqsave(&md->zwp_offset_lock, flags);
>> +
>> + if (ret) {
>> + md->zwp_offset[zno] = DM_ZONE_INVALID_WP_OFST;
>> + goto out;
>> + }
>> + md->zwp_offset[zno] = wp_offset;
>> + } else if (md->zwp_offset[zno] == DM_ZONE_UPDATING_WP_OFST) {
>> + DMWARN_LIMIT("Invalid DM_ZONE_UPDATING_WP_OFST state");
>> + goto out;
>> + }
>> +
>> + switch (bio_op(orig_bio)) {
>> + case REQ_OP_WRITE_ZEROES:
>> + case REQ_OP_WRITE_SAME:
>> + case REQ_OP_WRITE:
>> + break;
>> + case REQ_OP_ZONE_RESET:
>> + case REQ_OP_ZONE_FINISH:
>> + goto good;
>> + case REQ_OP_ZONE_APPEND:
>> + /*
>> + * Change zone append operations into a non-mergeable regular
>> + * writes directed at the current write pointer position of the
>> + * target zone.
>> + */
>> + clone->bi_opf = REQ_OP_WRITE | REQ_NOMERGE |
>> + (orig_bio->bi_opf & (~REQ_OP_MASK));
>> + clone->bi_iter.bi_sector =
>> + orig_bio->bi_iter.bi_sector + md->zwp_offset[zno];
>> + break;
>> + default:
>> + DMWARN_LIMIT("Invalid BIO operation");
>> + goto out;
>> + }
>> +
>> + /* Cannot write to a full zone */
>> + if (md->zwp_offset[zno] >= zone_sectors)
>> + goto out;
>> +
>> + /* Writes must be aligned to the zone write pointer */
>> + if ((clone->bi_iter.bi_sector & (zone_sectors - 1)) != md->zwp_offset[zno])
>> + goto out;
>> +
>> +good:
>> + good_io = true;
>> +
>> +out:
>> + spin_unlock_irqrestore(&md->zwp_offset_lock, flags);
>
> I'm not happy with the spinlock. Can't the same effect be achieved with
> some clever READ_ONCE()/WRITE_ONCE/cmpexch magic?
> Especially as you have a separate 'zone lock' mechanism ...
hmmm... Let me see. Given that what the bio completion is relatively simple, it
may be possible. With more coffee, I amy be able to come up with something clever.
>
>> +
>> + return good_io;
>> +}
>> +
>> +/*
>> + * Second phase of BIO mapping for targets with zone append emulation:
>> + * update the zone write pointer offset array to account for the additional
>> + * data written to a zone. Note that at this point, the remapped clone BIO
>> + * may already have completed, so we do not touch it.
>> + */
>> +static blk_status_t dm_zone_map_bio_end(struct mapped_device *md,
>> + struct bio *orig_bio,
>> + unsigned int nr_sectors)
>> +{
>> + unsigned int zno = bio_zone_no(orig_bio);
>> + blk_status_t sts = BLK_STS_OK;
>> + unsigned long flags;
>> +
>> + spin_lock_irqsave(&md->zwp_offset_lock, flags);
>> +
>> + /* Update the zone wp offset */
>> + switch (bio_op(orig_bio)) {
>> + case REQ_OP_ZONE_RESET:
>> + md->zwp_offset[zno] = 0;
>> + break;
>> + case REQ_OP_ZONE_FINISH:
>> + md->zwp_offset[zno] = blk_queue_zone_sectors(md->queue);
>> + break;
>> + case REQ_OP_WRITE_ZEROES:
>> + case REQ_OP_WRITE_SAME:
>> + case REQ_OP_WRITE:
>> + md->zwp_offset[zno] += nr_sectors;
>> + break;
>> + case REQ_OP_ZONE_APPEND:
>> + /*
>> + * Check that the target did not truncate the write operation
>> + * emulating a zone append.
>> + */
>> + if (nr_sectors != bio_sectors(orig_bio)) {
>> + DMWARN_LIMIT("Truncated write for zone append");
>> + sts = BLK_STS_IOERR;
>> + break;
>> + }
>> + md->zwp_offset[zno] += nr_sectors;
>> + break;
>> + default:
>> + DMWARN_LIMIT("Invalid BIO operation");
>> + sts = BLK_STS_IOERR;
>> + break;
>> + }
>> +
>> + spin_unlock_irqrestore(&md->zwp_offset_lock, flags);
>
> You don't need the spinlock here; using WRITE_ONCE() should be sufficient.
If other references to zwp_offset use READ_ONCE(), no ?
[...]
>> +void dm_zone_endio(struct dm_io *io, struct bio *clone)
>> +{
>> + struct mapped_device *md = io->md;
>> + struct request_queue *q = md->queue;
>> + struct bio *orig_bio = io->orig_bio;
>> + unsigned long flags;
>> + unsigned int zno;
>> +
>> + /*
>> + * For targets that do not emulate zone append, we only need to
>> + * handle native zone-append bios.
>> + */
>> + if (!dm_emulate_zone_append(md)) {
>> + /*
>> + * Get the offset within the zone of the written sector
>> + * and add that to the original bio sector position.
>> + */
>> + if (clone->bi_status == BLK_STS_OK &&
>> + bio_op(clone) == REQ_OP_ZONE_APPEND) {
>> + sector_t mask = (sector_t)blk_queue_zone_sectors(q) - 1;
>> +
>> + orig_bio->bi_iter.bi_sector +=
>> + clone->bi_iter.bi_sector & mask;
>> + }
>> +
>> + return;
>> + }
>> +
>> + /*
>> + * For targets that do emulate zone append, if the clone BIO does not
>> + * own the target zone write lock, we have nothing to do.
>> + */
>> + if (!bio_flagged(clone, BIO_ZONE_WRITE_LOCKED))
>> + return;
>> +
>> + zno = bio_zone_no(orig_bio);
>> +
>> + spin_lock_irqsave(&md->zwp_offset_lock, flags);
>> + if (clone->bi_status != BLK_STS_OK) {
>> + /*
>> + * BIOs that modify a zone write pointer may leave the zone
>> + * in an unknown state in case of failure (e.g. the write
>> + * pointer was only partially advanced). In this case, set
>> + * the target zone write pointer as invalid unless it is
>> + * already being updated.
>> + */
>> + if (md->zwp_offset[zno] != DM_ZONE_UPDATING_WP_OFST)
>> + md->zwp_offset[zno] = DM_ZONE_INVALID_WP_OFST;
>> + } else if (bio_op(orig_bio) == REQ_OP_ZONE_APPEND) {
>> + /*
>> + * Get the written sector for zone append operation that were
>> + * emulated using regular write operations.
>> + */
>> + if (WARN_ON_ONCE(md->zwp_offset[zno] < bio_sectors(orig_bio)))
>> + md->zwp_offset[zno] = DM_ZONE_INVALID_WP_OFST;
>> + else
>> + orig_bio->bi_iter.bi_sector +=
>> + md->zwp_offset[zno] - bio_sectors(orig_bio);
>> + }
>> + spin_unlock_irqrestore(&md->zwp_offset_lock, flags);
>> +
>
> Similar comments to the spinlock here.
OK.
Thanks for the review.
--
Damien Le Moal
Western Digital Research
More information about the dm-devel
mailing list