[Cluster-devel] [PATCH 2/2] gfs2: Extended attribute readahead optimization
Steven Whitehouse
swhiteho at redhat.com
Thu Nov 12 13:44:22 UTC 2015
Hi,
On 01/11/15 19:02, Andreas Gruenbacher wrote:
> Instead of submitting separate bio for the inode and its extended
> attributes, submit a single bio for both when possible. The entire
> request becomes a normal read, not a readahead.
>
> To keep track of the buffer heads that make up the bio, we allocate
> temporary buffer head arrays: in the endio handler, it would also be
> possible to compute the buffer head block numbers from the bio and to
> grab the buffer heads with gfs2_getbuf, but the code would become even
> messier.
> ---
> fs/gfs2/meta_io.c | 94 ++++++++++++++++++++++++++++++++++++++++++++-----------
> 1 file changed, 76 insertions(+), 18 deletions(-)
>
> diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
> index 0f24828..e650127 100644
> --- a/fs/gfs2/meta_io.c
> +++ b/fs/gfs2/meta_io.c
> @@ -187,19 +187,63 @@ struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno)
> return bh;
> }
>
> -static void gfs2_meta_readahead(struct gfs2_glock *gl, u64 blkno)
> +struct gfs2_meta_read {
> + int num;
> + struct buffer_head *bhs[0];
> +};
> +
> +static void gfs2_meta_read_endio(struct bio *bio) {
> + struct gfs2_meta_read *r = bio->bi_private;
> + int i;
> +
> + for (i = 0; i < r->num; i++) {
> + struct buffer_head *bh = r->bhs[i];
> +
> + if (unlikely(bio_flagged(bio, BIO_QUIET)))
> + set_bit(BH_Quiet, &bh->b_state);
> +
> + bh->b_end_io(bh, !bio->bi_error);
> + }
> + bio_put(bio);
> + kfree(r);
> +}
> +
> +/*
> + * (See submit_bh_wbc.)
> + */
> +static void gfs2_submit_bhs(int rw, struct buffer_head *bhs[], int num)
> {
> - struct buffer_head *bh;
> + struct gfs2_meta_read *r;
> + struct buffer_head *bh = bhs[0];
> + struct bio *bio;
> + int i;
>
> - bh = gfs2_getbuf(gl, blkno, 1);
> - lock_buffer(bh);
> - if (buffer_uptodate(bh)) {
> - unlock_buffer(bh);
> - brelse(bh);
> + if (!num)
> + return;
> +
> + if (num == 1) {
> + bh->b_end_io = end_buffer_read_sync;
> + submit_bh(rw, bh);
> return;
> }
> - bh->b_end_io = end_buffer_read_sync;
> - submit_bh(READA | REQ_META | REQ_PRIO, bh);
> +
> + r = kmalloc(sizeof(*r) + num * sizeof(r->bhs[0]),
> + GFP_NOIO | __GFP_NOFAIL);
Can we avoid doing this I wonder? I would like to avoid adding
GFP_NOFAIL allocations, and I think it should be possible to figure out
where the buffers are using the bio itself (i.e. iterating over it,
similar to gfs2_end_log_write() in lops.c) so that we don't need to
allocate this additional memory. Otherwise this looks like the right
approach I think,
Steve.
> + r->num = num;
> + for (i = 0; i < num; i++)
> + r->bhs[i] = bhs[i];
> +
> + bio = bio_alloc(GFP_NOIO, num);
> + bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
> + bio->bi_bdev = bh->b_bdev;
> + for (i = 0; i < num; i++) {
> + bh = bhs[i];
> + bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
> + }
> + bio->bi_end_io = gfs2_meta_read_endio;
> + bio->bi_private = r;
> +
> + submit_bio(rw, bio);
> }
>
> /**
> @@ -216,7 +260,8 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
> int rahead, struct buffer_head **bhp)
> {
> struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
> - struct buffer_head *bh;
> + struct buffer_head *bh, *bhs[2];
> + int num = 0;
>
> if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) {
> *bhp = NULL;
> @@ -228,18 +273,31 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
> lock_buffer(bh);
> if (buffer_uptodate(bh)) {
> unlock_buffer(bh);
> - if (rahead)
> - gfs2_meta_readahead(gl, blkno + 1);
> - return 0;
> + flags &= ~DIO_WAIT;
> + } else {
> + bh->b_end_io = end_buffer_read_sync;
> + get_bh(bh);
> + bhs[num++] = bh;
> }
> - bh->b_end_io = end_buffer_read_sync;
> - get_bh(bh);
> - submit_bh(READ_SYNC | REQ_META | REQ_PRIO, bh);
> - if (rahead)
> - gfs2_meta_readahead(gl, blkno + 1);
> +
> + if (rahead) {
> + bh = gfs2_getbuf(gl, blkno + 1, CREATE);
> +
> + lock_buffer(bh);
> + if (buffer_uptodate(bh)) {
> + unlock_buffer(bh);
> + brelse(bh);
> + } else {
> + bh->b_end_io = end_buffer_read_sync;
> + bhs[num++] = bh;
> + }
> + }
> +
> + gfs2_submit_bhs(READ_SYNC | REQ_META | REQ_PRIO, bhs, num);
> if (!(flags & DIO_WAIT))
> return 0;
>
> + bh = *bhp;
> wait_on_buffer(bh);
> if (unlikely(!buffer_uptodate(bh))) {
> struct gfs2_trans *tr = current->journal_info;
More information about the Cluster-devel
mailing list