[dm-devel] [PATCH 14/14] barriers

Mon Mar 30 06:36:01 UTC 2009

Hi Mikulas,

This patch doesn't apply to 2.6.29.
Probably you are using a pretty older kernel?

Thanks,
Kiyoshi Ueda

On 2009/03/27 15:11 +0900, Mikulas Patocka wrote:
> Barrier support.
> 
> Barriers are submitted to a worker thread that issues them in-order.
> 
> __split_and_process_bio function is modified that when it sees a barrier
> request, it waits for all pending IO before the request, then submits
> the barrier and waits for it (we must wait, otherwise it could be
> intermixed with following requests).
> 
> Errors from the barrier request are recorded in per-device barrier_error
> variable. There may be only one barrier request in progress, so using
> a per-device variable is correct.
> 
> The barrier request is converted to non-barrier request when sending it
> to the underlying device.
> 
> This patch guarantees correct barrier behavior if the underlying device
> doesn't perform write-back caching. The same requirement existed before
> barriers were supported in dm.
> 
> Bottom layer barrier support (sending barriers by target drivers) and
> handling devices with write-back caches will be done in further patches.
> 
> Signed-off-by: Mikulas Patocka <mpatocka at redhat.com>
> 
> ---
>  drivers/md/dm.c |   79 +++++++++++++++++++++++++++++++++++++++++++-------------
>  1 file changed, 61 insertions(+), 18 deletions(-)
> 
> Index: linux-2.6.29-rc8-devel/drivers/md/dm.c
> ===================================================================
> --- linux-2.6.29-rc8-devel.orig/drivers/md/dm.c	2009-03-27 06:51:29.000000000 +0100
> +++ linux-2.6.29-rc8-devel/drivers/md/dm.c	2009-03-27 06:51:33.000000000 +0100
> @@ -125,6 +125,11 @@ struct mapped_device {
>  	spinlock_t deferred_lock;
>  
>  	/*
> +	 * An error from the barrier request currently being processed.
> +	 */
> +	int barrier_error;
> +
> +	/*
>  	 * Processing queue (flush/barriers)
>  	 */
>  	struct workqueue_struct *wq;
> @@ -425,6 +430,10 @@ static void end_io_acct(struct dm_io *io
>  	part_stat_add(cpu, &dm_disk(md)->part0, ticks[rw], duration);
>  	part_stat_unlock();
>  
> +	/*
> +	 * after this is decremented, the bio must not be touched if it is
> +	 * barrier bio
> +	 */
>  	dm_disk(md)->part0.in_flight = pending =
>  		atomic_dec_return(&md->pending);
>  
> @@ -525,19 +534,29 @@ static void dec_pending(struct dm_io *io
>  			 */
>  			spin_lock_irqsave(&io->md->deferred_lock, flags);
>  			if (__noflush_suspending(io->md))
> -				bio_list_add(&io->md->deferred, io->bio);
> +				bio_list_add_head(&io->md->deferred, io->bio);
>  			else
>  				/* noflush suspend was interrupted. */
>  				io->error = -EIO;
>  			spin_unlock_irqrestore(&io->md->deferred_lock, flags);
>  		}
>  
> -		end_io_acct(io);
> +		if (bio_barrier(io->bio)) {
> +			/*
> +			 * There could be just one barrier request, so we use
> +			 * per-device variable for error reporting is OK.
> +			 * Note that you can't touch the bio after end_io_acct
> +			 */
> +			io->md->barrier_error = io->error;
> +			end_io_acct(io);
> +		} else {
> +			end_io_acct(io);
>  
> -		if (io->error != DM_ENDIO_REQUEUE) {
> -			trace_block_bio_complete(io->md->queue, io->bio);
> +			if (io->error != DM_ENDIO_REQUEUE) {
> +				trace_block_bio_complete(io->md->queue, io->bio);
>  
> -			bio_endio(io->bio, io->error);
> +				bio_endio(io->bio, io->error);
> +			}
>  		}
>  
>  		free_io(io->md, io);
> @@ -682,7 +701,7 @@ static struct bio *split_bvec(struct bio
>  
>  	clone->bi_sector = sector;
>  	clone->bi_bdev = bio->bi_bdev;
> -	clone->bi_rw = bio->bi_rw;
> +	clone->bi_rw = bio->bi_rw & ~(1 << BIO_RW_BARRIER);
>  	clone->bi_vcnt = 1;
>  	clone->bi_size = to_bytes(len);
>  	clone->bi_io_vec->bv_offset = offset;
> @@ -703,6 +722,7 @@ static struct bio *clone_bio(struct bio 
>  
>  	clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs);
>  	__bio_clone(clone, bio);
> +	clone->bi_rw &= ~(1 << BIO_RW_BARRIER);
>  	clone->bi_destructor = dm_bio_destructor;
>  	clone->bi_sector = sector;
>  	clone->bi_idx = idx;
> @@ -823,7 +843,10 @@ static void __split_and_process_bio(stru
>  
>  	ci.map = dm_get_table(md);
>  	if (unlikely(!ci.map)) {
> -		bio_io_error(bio);
> +		if (!bio_barrier(bio))
> +			bio_io_error(bio);
> +		else
> +			md->barrier_error = -EIO;
>  		return;
>  	}
>  
> @@ -907,15 +930,6 @@ static int dm_request(struct request_que
>  	struct mapped_device *md = q->queuedata;
>  	int cpu;
>  
> -	/*
> -	 * There is no use in forwarding any barrier request since we can't
> -	 * guarantee it is (or can be) handled by the targets correctly.
> -	 */
> -	if (unlikely(bio_barrier(bio))) {
> -		bio_endio(bio, -EOPNOTSUPP);
> -		return 0;
> -	}
> -
>  	down_read(&md->io_lock);
>  
>  	cpu = part_stat_lock();
> @@ -927,7 +941,8 @@ static int dm_request(struct request_que
>  	 * If we're suspended or the thread is processing barriers
>  	 * we have to queue this io for later.
>  	 */
> -	if (unlikely(test_bit(DMF_QUEUE_IO_FOR_THREAD, &md->flags))) {
> +	if (unlikely(test_bit(DMF_QUEUE_IO_FOR_THREAD, &md->flags)) ||
> +	    unlikely(bio_barrier(bio))) {
>  		up_read(&md->io_lock);
>  
>  		if (unlikely(test_bit(DMF_BLOCK_FOR_SUSPEND, &md->flags)) &&
> @@ -1393,6 +1408,12 @@ static int dm_wait_for_completion(struct
>  	return r;
>  }
>  
> +static int dm_flush(struct mapped_device *md)
> +{
> +	dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
> +	return 0;
> +}
> +
>  /*
>   * Process the deferred bios
>   */
> @@ -1415,8 +1436,30 @@ static void dm_wq_work(struct work_struc
>  			break;
>  		}
>  
> -		__split_and_process_bio(md, c);
> +		if (!bio_barrier(c))
> +			__split_and_process_bio(md, c);
> +		else {
> +			int error = dm_flush(md);
> +			if (unlikely(error)) {
> +				bio_endio(c, error);
> +				goto next_bio;
> +			}
> +			if (bio_empty_barrier(c)) {
> +				bio_endio(c, 0);
> +				goto next_bio;
> +			}
> +
> +			__split_and_process_bio(md, c);
> +
> +			error = dm_flush(md);
> +			if (!error && md->barrier_error)
> +				error = md->barrier_error;
> +
> +			if (md->barrier_error != DM_ENDIO_REQUEUE)
> +				bio_endio(c, error);
> +		}
>  
> +next_bio:
>  		down_write(&md->io_lock);
>  	}
>  }
> 
> --
> dm-devel mailing list
> dm-devel at redhat.com
> https://www.redhat.com/mailman/listinfo/dm-devel