[Cluster-devel] [PATCH 2/2] gfs2: Extended attribute readahead optimization
Bob Peterson
rpeterso at redhat.com
Fri Nov 13 13:48:45 UTC 2015
----- Original Message -----
> Here is an updated version of this patch, please review.
>
> Thanks,
> Andreas
>
> --
>
> Instead of submitting a READ_SYNC bio for the inode and a READA bio for
> the inode's extended attributes through submit_bh, submit a single READ
> bio for both strough submit_bio when possible. This can be more
> efficient on some kinds of block devices.
>
> Signed-off-by: Andreas Gruenbacher <agruenba at redhat.com>
> ---
> fs/gfs2/meta_io.c | 81
> ++++++++++++++++++++++++++++++++++++++++++-------------
> 1 file changed, 63 insertions(+), 18 deletions(-)
>
> diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
> index 0f24828..e137d96 100644
> --- a/fs/gfs2/meta_io.c
> +++ b/fs/gfs2/meta_io.c
> @@ -187,19 +187,50 @@ struct buffer_head *gfs2_meta_new(struct gfs2_glock
> *gl, u64 blkno)
> return bh;
> }
>
> -static void gfs2_meta_readahead(struct gfs2_glock *gl, u64 blkno)
> +static void gfs2_meta_read_endio(struct bio *bio)
> {
> - struct buffer_head *bh;
> + struct bio_vec *bvec;
> + int i;
> +
> + bio_for_each_segment_all(bvec, bio, i) {
> + struct page *page = bvec->bv_page;
> + struct buffer_head *bh = page_buffers(page);
> + unsigned int len = bvec->bv_len;
> +
> + while (bh_offset(bh) < bvec->bv_offset)
> + bh = bh->b_this_page;
> + do {
> + struct buffer_head *next = bh->b_this_page;
> + len -= bh->b_size;
> + bh->b_end_io(bh, !bio->bi_error);
> + bh = next;
> + } while (bh && len);
> + }
> + bio_put(bio);
> +}
>
> - bh = gfs2_getbuf(gl, blkno, 1);
> - lock_buffer(bh);
> - if (buffer_uptodate(bh)) {
> - unlock_buffer(bh);
> - brelse(bh);
> +/*
> + * Submit several consecutive buffer head I/O requests as a single bio I/O
> + * request. (See submit_bh_wbc.)
> + */
> +static void gfs2_submit_bhs(int rw, struct buffer_head *bhs[], int num)
> +{
> + struct buffer_head *bh = bhs[0];
> + struct bio *bio;
> + int i;
> +
> + if (!num)
> return;
> +
> + bio = bio_alloc(GFP_NOIO, num);
> + bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
> + bio->bi_bdev = bh->b_bdev;
> + for (i = 0; i < num; i++) {
> + bh = bhs[i];
> + bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
> }
> - bh->b_end_io = end_buffer_read_sync;
> - submit_bh(READA | REQ_META | REQ_PRIO, bh);
> + bio->bi_end_io = gfs2_meta_read_endio;
> + submit_bio(rw, bio);
> }
>
> /**
> @@ -216,7 +247,8 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int
> flags,
> int rahead, struct buffer_head **bhp)
> {
> struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
> - struct buffer_head *bh;
> + struct buffer_head *bh, *bhs[2];
> + int num = 0;
>
> if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) {
> *bhp = NULL;
> @@ -228,18 +260,31 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno,
> int flags,
> lock_buffer(bh);
> if (buffer_uptodate(bh)) {
> unlock_buffer(bh);
> - if (rahead)
> - gfs2_meta_readahead(gl, blkno + 1);
> - return 0;
> + flags &= ~DIO_WAIT;
> + } else {
> + bh->b_end_io = end_buffer_read_sync;
> + get_bh(bh);
> + bhs[num++] = bh;
> }
> - bh->b_end_io = end_buffer_read_sync;
> - get_bh(bh);
> - submit_bh(READ_SYNC | REQ_META | REQ_PRIO, bh);
> - if (rahead)
> - gfs2_meta_readahead(gl, blkno + 1);
> +
> + if (rahead) {
> + bh = gfs2_getbuf(gl, blkno + 1, CREATE);
> +
> + lock_buffer(bh);
> + if (buffer_uptodate(bh)) {
> + unlock_buffer(bh);
> + brelse(bh);
> + } else {
> + bh->b_end_io = end_buffer_read_sync;
> + bhs[num++] = bh;
> + }
> + }
> +
> + gfs2_submit_bhs(READ_SYNC | REQ_META | REQ_PRIO, bhs, num);
> if (!(flags & DIO_WAIT))
> return 0;
>
> + bh = *bhp;
> wait_on_buffer(bh);
> if (unlikely(!buffer_uptodate(bh))) {
> struct gfs2_trans *tr = current->journal_info;
> --
> 2.5.0
>
>
Hi,
ACK to both patches
Looks good. I'll hold onto them until this merge window closes.
Regards,
Bob Peterson
Red Hat File Systems
More information about the Cluster-devel
mailing list