[dm-devel] [PATCH v2] dm thin: Fix bug wrt FUA request completion

Nikos Tsironis ntsironis at arrikto.com
Fri Feb 15 14:33:29 UTC 2019


On 2/15/19 3:54 PM, Joe Thornber wrote:
> Ack.
> 
> Thanks for this I was under the mistaken impression that FUA requests got split 
> by core dm into separate payload and PREFLUSH requests.
> 
> I've audited dm-cache and that looks ok.
> 
> How did you test this patch?  That missing bio_list_init() in V1 must
> have caused memory corruption?
> 
> - Joe

Hi Joe,

bio_list_init() initializes the bio list's head and tail pointers to
NULL and pool_create() allocates the struct pool structure using
kzalloc() so the bio list was implicitly correctly initialized and no
memory corruption occurred.

Nikos

> 
> 
> On Fri, Feb 15, 2019 at 01:21:38AM +0200, Nikos Tsironis wrote:
>> When provisioning a new data block for a virtual block, either because
>> the block was previously unallocated or because we are breaking sharing,
>> if the whole block of data is being overwritten the bio that triggered
>> the provisioning is issued immediately, skipping copying or zeroing of
>> the data block.
>>
>> When this bio completes the new mapping is inserted in to the pool's
>> metadata by process_prepared_mapping(), where the bio completion is
>> signaled to the upper layers.
>>
>> This completion is signaled without first committing the metadata. If
>> the bio in question has the REQ_FUA flag set and the system crashes
>> right after its completion and before the next metadata commit, then the
>> write is lost despite the REQ_FUA flag requiring that I/O completion for
>> this request is only signaled after the data has been committed to
>> non-volatile storage.
>>
>> Fix this by deferring the completion of overwrite bios, with the REQ_FUA
>> flag set, after the metadata has been committed.
>>
>> Signed-off-by: Nikos Tsironis <ntsironis at arrikto.com>
>> ---
>>  drivers/md/dm-thin.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++-----
>>  1 file changed, 50 insertions(+), 5 deletions(-)
>>
>> Changes in v2:
>>   - Add missing bio_list_init() in pool_create()
>>
>> v1: https://www.redhat.com/archives/dm-devel/2019-February/msg00064.html
>>
>> diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
>> index ca8af21bf644..e83b63608262 100644
>> --- a/drivers/md/dm-thin.c
>> +++ b/drivers/md/dm-thin.c
>> @@ -257,6 +257,7 @@ struct pool {
>>  
>>  	spinlock_t lock;
>>  	struct bio_list deferred_flush_bios;
>> +	struct bio_list deferred_flush_completions;
>>  	struct list_head prepared_mappings;
>>  	struct list_head prepared_discards;
>>  	struct list_head prepared_discards_pt2;
>> @@ -956,6 +957,39 @@ static void process_prepared_mapping_fail(struct dm_thin_new_mapping *m)
>>  	mempool_free(m, &m->tc->pool->mapping_pool);
>>  }
>>  
>> +static void complete_overwrite_bio(struct thin_c *tc, struct bio *bio)
>> +{
>> +	struct pool *pool = tc->pool;
>> +	unsigned long flags;
>> +
>> +	/*
>> +	 * If the bio has the REQ_FUA flag set we must commit the metadata
>> +	 * before signaling its completion.
>> +	 */
>> +	if (!bio_triggers_commit(tc, bio)) {
>> +		bio_endio(bio);
>> +		return;
>> +	}
>> +
>> +	/*
>> +	 * Complete bio with an error if earlier I/O caused changes to the
>> +	 * metadata that can't be committed, e.g, due to I/O errors on the
>> +	 * metadata device.
>> +	 */
>> +	if (dm_thin_aborted_changes(tc->td)) {
>> +		bio_io_error(bio);
>> +		return;
>> +	}
>> +
>> +	/*
>> +	 * Batch together any bios that trigger commits and then issue a
>> +	 * single commit for them in process_deferred_bios().
>> +	 */
>> +	spin_lock_irqsave(&pool->lock, flags);
>> +	bio_list_add(&pool->deferred_flush_completions, bio);
>> +	spin_unlock_irqrestore(&pool->lock, flags);
>> +}
>> +
>>  static void process_prepared_mapping(struct dm_thin_new_mapping *m)
>>  {
>>  	struct thin_c *tc = m->tc;
>> @@ -988,7 +1022,7 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m)
>>  	 */
>>  	if (bio) {
>>  		inc_remap_and_issue_cell(tc, m->cell, m->data_block);
>> -		bio_endio(bio);
>> +		complete_overwrite_bio(tc, bio);
>>  	} else {
>>  		inc_all_io_entry(tc->pool, m->cell->holder);
>>  		remap_and_issue(tc, m->cell->holder, m->data_block);
>> @@ -2317,7 +2351,7 @@ static void process_deferred_bios(struct pool *pool)
>>  {
>>  	unsigned long flags;
>>  	struct bio *bio;
>> -	struct bio_list bios;
>> +	struct bio_list bios, bio_completions;
>>  	struct thin_c *tc;
>>  
>>  	tc = get_first_thin(pool);
>> @@ -2328,26 +2362,36 @@ static void process_deferred_bios(struct pool *pool)
>>  	}
>>  
>>  	/*
>> -	 * If there are any deferred flush bios, we must commit
>> -	 * the metadata before issuing them.
>> +	 * If there are any deferred flush bios, we must commit the metadata
>> +	 * before issuing them or signaling their completion.
>>  	 */
>>  	bio_list_init(&bios);
>> +	bio_list_init(&bio_completions);
>> +
>>  	spin_lock_irqsave(&pool->lock, flags);
>>  	bio_list_merge(&bios, &pool->deferred_flush_bios);
>>  	bio_list_init(&pool->deferred_flush_bios);
>> +
>> +	bio_list_merge(&bio_completions, &pool->deferred_flush_completions);
>> +	bio_list_init(&pool->deferred_flush_completions);
>>  	spin_unlock_irqrestore(&pool->lock, flags);
>>  
>> -	if (bio_list_empty(&bios) &&
>> +	if (bio_list_empty(&bios) && bio_list_empty(&bio_completions) &&
>>  	    !(dm_pool_changed_this_transaction(pool->pmd) && need_commit_due_to_time(pool)))
>>  		return;
>>  
>>  	if (commit(pool)) {
>> +		bio_list_merge(&bios, &bio_completions);
>> +
>>  		while ((bio = bio_list_pop(&bios)))
>>  			bio_io_error(bio);
>>  		return;
>>  	}
>>  	pool->last_commit_jiffies = jiffies;
>>  
>> +	while ((bio = bio_list_pop(&bio_completions)))
>> +		bio_endio(bio);
>> +
>>  	while ((bio = bio_list_pop(&bios)))
>>  		generic_make_request(bio);
>>  }
>> @@ -2954,6 +2998,7 @@ static struct pool *pool_create(struct mapped_device *pool_md,
>>  	INIT_DELAYED_WORK(&pool->no_space_timeout, do_no_space_timeout);
>>  	spin_lock_init(&pool->lock);
>>  	bio_list_init(&pool->deferred_flush_bios);
>> +	bio_list_init(&pool->deferred_flush_completions);
>>  	INIT_LIST_HEAD(&pool->prepared_mappings);
>>  	INIT_LIST_HEAD(&pool->prepared_discards);
>>  	INIT_LIST_HEAD(&pool->prepared_discards_pt2);
>> -- 
>> 2.11.0
>>
> 
> --
> dm-devel mailing list
> dm-devel at redhat.com
> https://www.redhat.com/mailman/listinfo/dm-devel
> 




More information about the dm-devel mailing list