[dm-devel] [PATCH v2] dm thin: Fix bug wrt FUA request completion
Joe Thornber
thornber at redhat.com
Fri Feb 15 13:54:07 UTC 2019
Ack.
Thanks for this I was under the mistaken impression that FUA requests got split
by core dm into separate payload and PREFLUSH requests.
I've audited dm-cache and that looks ok.
How did you test this patch? That missing bio_list_init() in V1 must
have caused memory corruption?
- Joe
On Fri, Feb 15, 2019 at 01:21:38AM +0200, Nikos Tsironis wrote:
> When provisioning a new data block for a virtual block, either because
> the block was previously unallocated or because we are breaking sharing,
> if the whole block of data is being overwritten the bio that triggered
> the provisioning is issued immediately, skipping copying or zeroing of
> the data block.
>
> When this bio completes the new mapping is inserted in to the pool's
> metadata by process_prepared_mapping(), where the bio completion is
> signaled to the upper layers.
>
> This completion is signaled without first committing the metadata. If
> the bio in question has the REQ_FUA flag set and the system crashes
> right after its completion and before the next metadata commit, then the
> write is lost despite the REQ_FUA flag requiring that I/O completion for
> this request is only signaled after the data has been committed to
> non-volatile storage.
>
> Fix this by deferring the completion of overwrite bios, with the REQ_FUA
> flag set, after the metadata has been committed.
>
> Signed-off-by: Nikos Tsironis <ntsironis at arrikto.com>
> ---
> drivers/md/dm-thin.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++-----
> 1 file changed, 50 insertions(+), 5 deletions(-)
>
> Changes in v2:
> - Add missing bio_list_init() in pool_create()
>
> v1: https://www.redhat.com/archives/dm-devel/2019-February/msg00064.html
>
> diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
> index ca8af21bf644..e83b63608262 100644
> --- a/drivers/md/dm-thin.c
> +++ b/drivers/md/dm-thin.c
> @@ -257,6 +257,7 @@ struct pool {
>
> spinlock_t lock;
> struct bio_list deferred_flush_bios;
> + struct bio_list deferred_flush_completions;
> struct list_head prepared_mappings;
> struct list_head prepared_discards;
> struct list_head prepared_discards_pt2;
> @@ -956,6 +957,39 @@ static void process_prepared_mapping_fail(struct dm_thin_new_mapping *m)
> mempool_free(m, &m->tc->pool->mapping_pool);
> }
>
> +static void complete_overwrite_bio(struct thin_c *tc, struct bio *bio)
> +{
> + struct pool *pool = tc->pool;
> + unsigned long flags;
> +
> + /*
> + * If the bio has the REQ_FUA flag set we must commit the metadata
> + * before signaling its completion.
> + */
> + if (!bio_triggers_commit(tc, bio)) {
> + bio_endio(bio);
> + return;
> + }
> +
> + /*
> + * Complete bio with an error if earlier I/O caused changes to the
> + * metadata that can't be committed, e.g, due to I/O errors on the
> + * metadata device.
> + */
> + if (dm_thin_aborted_changes(tc->td)) {
> + bio_io_error(bio);
> + return;
> + }
> +
> + /*
> + * Batch together any bios that trigger commits and then issue a
> + * single commit for them in process_deferred_bios().
> + */
> + spin_lock_irqsave(&pool->lock, flags);
> + bio_list_add(&pool->deferred_flush_completions, bio);
> + spin_unlock_irqrestore(&pool->lock, flags);
> +}
> +
> static void process_prepared_mapping(struct dm_thin_new_mapping *m)
> {
> struct thin_c *tc = m->tc;
> @@ -988,7 +1022,7 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m)
> */
> if (bio) {
> inc_remap_and_issue_cell(tc, m->cell, m->data_block);
> - bio_endio(bio);
> + complete_overwrite_bio(tc, bio);
> } else {
> inc_all_io_entry(tc->pool, m->cell->holder);
> remap_and_issue(tc, m->cell->holder, m->data_block);
> @@ -2317,7 +2351,7 @@ static void process_deferred_bios(struct pool *pool)
> {
> unsigned long flags;
> struct bio *bio;
> - struct bio_list bios;
> + struct bio_list bios, bio_completions;
> struct thin_c *tc;
>
> tc = get_first_thin(pool);
> @@ -2328,26 +2362,36 @@ static void process_deferred_bios(struct pool *pool)
> }
>
> /*
> - * If there are any deferred flush bios, we must commit
> - * the metadata before issuing them.
> + * If there are any deferred flush bios, we must commit the metadata
> + * before issuing them or signaling their completion.
> */
> bio_list_init(&bios);
> + bio_list_init(&bio_completions);
> +
> spin_lock_irqsave(&pool->lock, flags);
> bio_list_merge(&bios, &pool->deferred_flush_bios);
> bio_list_init(&pool->deferred_flush_bios);
> +
> + bio_list_merge(&bio_completions, &pool->deferred_flush_completions);
> + bio_list_init(&pool->deferred_flush_completions);
> spin_unlock_irqrestore(&pool->lock, flags);
>
> - if (bio_list_empty(&bios) &&
> + if (bio_list_empty(&bios) && bio_list_empty(&bio_completions) &&
> !(dm_pool_changed_this_transaction(pool->pmd) && need_commit_due_to_time(pool)))
> return;
>
> if (commit(pool)) {
> + bio_list_merge(&bios, &bio_completions);
> +
> while ((bio = bio_list_pop(&bios)))
> bio_io_error(bio);
> return;
> }
> pool->last_commit_jiffies = jiffies;
>
> + while ((bio = bio_list_pop(&bio_completions)))
> + bio_endio(bio);
> +
> while ((bio = bio_list_pop(&bios)))
> generic_make_request(bio);
> }
> @@ -2954,6 +2998,7 @@ static struct pool *pool_create(struct mapped_device *pool_md,
> INIT_DELAYED_WORK(&pool->no_space_timeout, do_no_space_timeout);
> spin_lock_init(&pool->lock);
> bio_list_init(&pool->deferred_flush_bios);
> + bio_list_init(&pool->deferred_flush_completions);
> INIT_LIST_HEAD(&pool->prepared_mappings);
> INIT_LIST_HEAD(&pool->prepared_discards);
> INIT_LIST_HEAD(&pool->prepared_discards_pt2);
> --
> 2.11.0
>
More information about the dm-devel
mailing list