[Cluster-devel] [PATCH 2/2] gfs2: Extended attribute readahead optimization

Steven Whitehouse swhiteho at redhat.com
Thu Nov 12 13:44:22 UTC 2015


Hi,

On 01/11/15 19:02, Andreas Gruenbacher wrote:
> Instead of submitting separate bio for the inode and its extended
> attributes, submit a single bio for both when possible.  The entire
> request becomes a normal read, not a readahead.
>
> To keep track of the buffer heads that make up the bio, we allocate
> temporary buffer head arrays: in the endio handler, it would also be
> possible to compute the buffer head block numbers from the bio and to
> grab the buffer heads with gfs2_getbuf, but the code would become even
> messier.
> ---
>   fs/gfs2/meta_io.c | 94 ++++++++++++++++++++++++++++++++++++++++++++-----------
>   1 file changed, 76 insertions(+), 18 deletions(-)
>
> diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
> index 0f24828..e650127 100644
> --- a/fs/gfs2/meta_io.c
> +++ b/fs/gfs2/meta_io.c
> @@ -187,19 +187,63 @@ struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno)
>   	return bh;
>   }
>   
> -static void gfs2_meta_readahead(struct gfs2_glock *gl, u64 blkno)
> +struct gfs2_meta_read {
> +	int num;
> +	struct buffer_head *bhs[0];
> +};
> +
> +static void gfs2_meta_read_endio(struct bio *bio) {
> +	struct gfs2_meta_read *r = bio->bi_private;
> +	int i;
> +
> +	for (i = 0; i < r->num; i++) {
> +		struct buffer_head *bh = r->bhs[i];
> +
> +		if (unlikely(bio_flagged(bio, BIO_QUIET)))
> +			set_bit(BH_Quiet, &bh->b_state);
> +
> +		bh->b_end_io(bh, !bio->bi_error);
> +	}
> +	bio_put(bio);
> +	kfree(r);
> +}
> +
> +/*
> + * (See submit_bh_wbc.)
> + */
> +static void gfs2_submit_bhs(int rw, struct buffer_head *bhs[], int num)
>   {
> -	struct buffer_head *bh;
> +	struct gfs2_meta_read *r;
> +	struct buffer_head *bh = bhs[0];
> +	struct bio *bio;
> +	int i;
>   
> -	bh = gfs2_getbuf(gl, blkno, 1);
> -	lock_buffer(bh);
> -	if (buffer_uptodate(bh)) {
> -		unlock_buffer(bh);
> -		brelse(bh);
> +	if (!num)
> +		return;
> +
> +	if (num == 1) {
> +		bh->b_end_io = end_buffer_read_sync;
> +		submit_bh(rw, bh);
>   		return;
>   	}
> -	bh->b_end_io = end_buffer_read_sync;
> -	submit_bh(READA | REQ_META | REQ_PRIO, bh);
> +
> +	r = kmalloc(sizeof(*r) + num * sizeof(r->bhs[0]),
> +		    GFP_NOIO | __GFP_NOFAIL);
Can we avoid doing this I wonder? I would like to avoid adding 
GFP_NOFAIL allocations, and I think it should be possible to figure out 
where the buffers are using the bio itself (i.e. iterating over it, 
similar to gfs2_end_log_write() in lops.c) so that we don't need to 
allocate this additional memory. Otherwise this looks like the right 
approach I think,

Steve.

> +	r->num = num;
> +	for (i = 0; i < num; i++)
> +		r->bhs[i] = bhs[i];
> +
> +	bio = bio_alloc(GFP_NOIO, num);
> +	bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
> +	bio->bi_bdev = bh->b_bdev;
> +	for (i = 0; i < num; i++) {
> +		bh = bhs[i];
> +		bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
> +	}
> +	bio->bi_end_io = gfs2_meta_read_endio;
> +	bio->bi_private = r;
> +
> +	submit_bio(rw, bio);
>   }
>   
>   /**
> @@ -216,7 +260,8 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
>   		   int rahead, struct buffer_head **bhp)
>   {
>   	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
> -	struct buffer_head *bh;
> +	struct buffer_head *bh, *bhs[2];
> +	int num = 0;
>   
>   	if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) {
>   		*bhp = NULL;
> @@ -228,18 +273,31 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
>   	lock_buffer(bh);
>   	if (buffer_uptodate(bh)) {
>   		unlock_buffer(bh);
> -		if (rahead)
> -			gfs2_meta_readahead(gl, blkno + 1);
> -		return 0;
> +		flags &= ~DIO_WAIT;
> +	} else {
> +		bh->b_end_io = end_buffer_read_sync;
> +		get_bh(bh);
> +		bhs[num++] = bh;
>   	}
> -	bh->b_end_io = end_buffer_read_sync;
> -	get_bh(bh);
> -	submit_bh(READ_SYNC | REQ_META | REQ_PRIO, bh);
> -	if (rahead)
> -		gfs2_meta_readahead(gl, blkno + 1);
> +
> +	if (rahead) {
> +		bh = gfs2_getbuf(gl, blkno + 1, CREATE);
> +
> +		lock_buffer(bh);
> +		if (buffer_uptodate(bh)) {
> +			unlock_buffer(bh);
> +			brelse(bh);
> +		} else {
> +			bh->b_end_io = end_buffer_read_sync;
> +			bhs[num++] = bh;
> +		}
> +	}
> +
> +	gfs2_submit_bhs(READ_SYNC | REQ_META | REQ_PRIO, bhs, num);
>   	if (!(flags & DIO_WAIT))
>   		return 0;
>   
> +	bh = *bhp;
>   	wait_on_buffer(bh);
>   	if (unlikely(!buffer_uptodate(bh))) {
>   		struct gfs2_trans *tr = current->journal_info;




More information about the Cluster-devel mailing list