[Cluster-devel] [PATCH 2/2] gfs2: Extended attribute readahead optimization

Fri Nov 13 13:48:45 UTC 2015

----- Original Message -----
> Here is an updated version of this patch, please review.
> 
> Thanks,
> Andreas
> 
> --
> 
> Instead of submitting a READ_SYNC bio for the inode and a READA bio for
> the inode's extended attributes through submit_bh, submit a single READ
> bio for both strough submit_bio when possible.  This can be more
> efficient on some kinds of block devices.
> 
> Signed-off-by: Andreas Gruenbacher <agruenba at redhat.com>
> ---
>  fs/gfs2/meta_io.c | 81
>  ++++++++++++++++++++++++++++++++++++++++++-------------
>  1 file changed, 63 insertions(+), 18 deletions(-)
> 
> diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
> index 0f24828..e137d96 100644
> --- a/fs/gfs2/meta_io.c
> +++ b/fs/gfs2/meta_io.c
> @@ -187,19 +187,50 @@ struct buffer_head *gfs2_meta_new(struct gfs2_glock
> *gl, u64 blkno)
>  	return bh;
>  }
>  
> -static void gfs2_meta_readahead(struct gfs2_glock *gl, u64 blkno)
> +static void gfs2_meta_read_endio(struct bio *bio)
>  {
> -	struct buffer_head *bh;
> +	struct bio_vec *bvec;
> +	int i;
> +
> +	bio_for_each_segment_all(bvec, bio, i) {
> +		struct page *page = bvec->bv_page;
> +		struct buffer_head *bh = page_buffers(page);
> +		unsigned int len = bvec->bv_len;
> +
> +		while (bh_offset(bh) < bvec->bv_offset)
> +			bh = bh->b_this_page;
> +		do {
> +			struct buffer_head *next = bh->b_this_page;
> +			len -= bh->b_size;
> +			bh->b_end_io(bh, !bio->bi_error);
> +			bh = next;
> +		} while (bh && len);
> +	}
> +	bio_put(bio);
> +}
>  
> -	bh = gfs2_getbuf(gl, blkno, 1);
> -	lock_buffer(bh);
> -	if (buffer_uptodate(bh)) {
> -		unlock_buffer(bh);
> -		brelse(bh);
> +/*
> + * Submit several consecutive buffer head I/O requests as a single bio I/O
> + * request.  (See submit_bh_wbc.)
> + */
> +static void gfs2_submit_bhs(int rw, struct buffer_head *bhs[], int num)
> +{
> +	struct buffer_head *bh = bhs[0];
> +	struct bio *bio;
> +	int i;
> +
> +	if (!num)
>  		return;
> +
> +	bio = bio_alloc(GFP_NOIO, num);
> +	bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
> +	bio->bi_bdev = bh->b_bdev;
> +	for (i = 0; i < num; i++) {
> +		bh = bhs[i];
> +		bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
>  	}
> -	bh->b_end_io = end_buffer_read_sync;
> -	submit_bh(READA | REQ_META | REQ_PRIO, bh);
> +	bio->bi_end_io = gfs2_meta_read_endio;
> +	submit_bio(rw, bio);
>  }
>  
>  /**
> @@ -216,7 +247,8 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int
> flags,
>  		   int rahead, struct buffer_head **bhp)
>  {
>  	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
> -	struct buffer_head *bh;
> +	struct buffer_head *bh, *bhs[2];
> +	int num = 0;
>  
>  	if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) {
>  		*bhp = NULL;
> @@ -228,18 +260,31 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno,
> int flags,
>  	lock_buffer(bh);
>  	if (buffer_uptodate(bh)) {
>  		unlock_buffer(bh);
> -		if (rahead)
> -			gfs2_meta_readahead(gl, blkno + 1);
> -		return 0;
> +		flags &= ~DIO_WAIT;
> +	} else {
> +		bh->b_end_io = end_buffer_read_sync;
> +		get_bh(bh);
> +		bhs[num++] = bh;
>  	}
> -	bh->b_end_io = end_buffer_read_sync;
> -	get_bh(bh);
> -	submit_bh(READ_SYNC | REQ_META | REQ_PRIO, bh);
> -	if (rahead)
> -		gfs2_meta_readahead(gl, blkno + 1);
> +
> +	if (rahead) {
> +		bh = gfs2_getbuf(gl, blkno + 1, CREATE);
> +
> +		lock_buffer(bh);
> +		if (buffer_uptodate(bh)) {
> +			unlock_buffer(bh);
> +			brelse(bh);
> +		} else {
> +			bh->b_end_io = end_buffer_read_sync;
> +			bhs[num++] = bh;
> +		}
> +	}
> +
> +	gfs2_submit_bhs(READ_SYNC | REQ_META | REQ_PRIO, bhs, num);
>  	if (!(flags & DIO_WAIT))
>  		return 0;
>  
> +	bh = *bhp;
>  	wait_on_buffer(bh);
>  	if (unlikely(!buffer_uptodate(bh))) {
>  		struct gfs2_trans *tr = current->journal_info;
> --
> 2.5.0
> 
> 
Hi,

ACK to both patches

Looks good. I'll hold onto them until this merge window closes.

Regards,

Bob Peterson
Red Hat File Systems