[Cluster-devel] [GFS2 PATCH 4/4] gfs2: read journal in large chunks to locate the head
Andreas Gruenbacher
agruenba at redhat.com
Fri Sep 7 12:14:29 UTC 2018
Abhi,
On 6 September 2018 at 19:02, Abhi Das <adas at redhat.com> wrote:
> Use bio(s) to read in the journal sequentially in large chunks and
> locate the head of the journal.
> This is faster in most cases when compared to the existing bisect
> method which operates one block at a time.
>
> Signed-off-by: Abhi Das <adas at redhat.com>
> ---
> fs/gfs2/incore.h | 8 +++-
> fs/gfs2/lops.c | 122 +++++++++++++++++++++++++++++++++++++++++++++------
> fs/gfs2/lops.h | 1 +
> fs/gfs2/ops_fstype.c | 1 +
> fs/gfs2/recovery.c | 115 +++++-------------------------------------------
> 5 files changed, 129 insertions(+), 118 deletions(-)
>
> diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
> index b96d39c..b24c105 100644
> --- a/fs/gfs2/incore.h
> +++ b/fs/gfs2/incore.h
> @@ -529,6 +529,11 @@ struct gfs2_journal_extent {
> u64 blocks;
> };
>
> +enum {
> + JDF_RECOVERY = 1,
> + JDF_JHEAD = 2,
> +};
> +
> struct gfs2_jdesc {
> struct list_head jd_list;
> struct list_head extent_list;
> @@ -536,12 +541,13 @@ struct gfs2_jdesc {
> struct work_struct jd_work;
> struct inode *jd_inode;
> unsigned long jd_flags;
> -#define JDF_RECOVERY 1
> unsigned int jd_jid;
> unsigned int jd_blocks;
> int jd_recover_error;
> /* Replay stuff */
>
> + struct gfs2_log_header_host jd_jhead;
> + struct bio *jd_rd_bio; /* bio used for reading this journal */
> unsigned int jd_found_blocks;
> unsigned int jd_found_revokes;
> unsigned int jd_replayed_blocks;
> diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
> index 4cc19af..21979b2 100644
> --- a/fs/gfs2/lops.c
> +++ b/fs/gfs2/lops.c
> @@ -18,6 +18,7 @@
> #include <linux/fs.h>
> #include <linux/list_sort.h>
>
> +#include "bmap.h"
> #include "dir.h"
> #include "gfs2.h"
> #include "incore.h"
> @@ -227,6 +228,50 @@ static void gfs2_end_log_write(struct bio *bio)
> wake_up(&sdp->sd_log_flush_wait);
> }
>
> +static void gfs2_end_log_read(struct bio *bio)
> +{
> + struct gfs2_jdesc *jd = bio->bi_private;
> + struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
> + struct page *page;
> + struct bio_vec *bvec;
> + int i, last;
> +
> + if (bio->bi_status) {
> + fs_err(sdp, "Error %d reading from journal, jid=%u\n",
> + bio->bi_status, jd->jd_jid);
> + }
> +
> + bio_for_each_segment_all(bvec, bio, i) {
> + struct gfs2_log_header_host uninitialized_var(lh);
> + void *ptr;
> +
> + page = bvec->bv_page;
> + ptr = page_address(page);
> + last = page_private(page);
> +
> + if (!test_bit(JDF_JHEAD, &jd->jd_flags)) {
> + mempool_free(page, gfs2_page_pool);
> + continue;
> + }
> +
> + if (!__get_log_header(sdp, ptr, 0, &lh)) {
> + if (lh.lh_sequence > jd->jd_jhead.lh_sequence)
> + jd->jd_jhead = lh;
> + else
> + goto found;
> + }
> +
> + if (last) {
> + found:
> + clear_bit(JDF_JHEAD, &jd->jd_flags);
> + wake_up_bit(&jd->jd_flags, JDF_JHEAD);
> + }
> + mempool_free(page, gfs2_page_pool);
> + }
> +
> + bio_put(bio);
> +}
> +
> /**
> * gfs2_log_flush_bio - Submit any pending log bio
> * @biop: Address of the bio pointer
> @@ -241,8 +286,10 @@ void gfs2_log_flush_bio(struct bio **biop, int op, int op_flags)
> {
> struct bio *bio = *biop;
> if (bio) {
> - struct gfs2_sbd *sdp = bio->bi_private;
> - atomic_inc(&sdp->sd_log_in_flight);
> + if (op != REQ_OP_READ) {
> + struct gfs2_sbd *sdp = bio->bi_private;
> + atomic_inc(&sdp->sd_log_in_flight);
> + }
> bio_set_op_attrs(bio, op, op_flags);
> submit_bio(bio);
> *biop = NULL;
> @@ -253,6 +300,7 @@ void gfs2_log_flush_bio(struct bio **biop, int op, int op_flags)
> * gfs2_log_alloc_bio - Allocate a new bio for log writing
> * @jd: The journal descriptor
> * @blkno: The next device block number we want to write to
> + * @op: REQ_OP
> *
> * This should never be called when there is a cached bio in the
> * super block. When it returns, there will be a cached bio in the
> @@ -262,21 +310,24 @@ void gfs2_log_flush_bio(struct bio **biop, int op, int op_flags)
> * Returns: Newly allocated bio
> */
>
> -static struct bio *gfs2_log_alloc_bio(struct gfs2_jdesc *jd, u64 blkno)
> +static struct bio *gfs2_log_alloc_bio(struct gfs2_jdesc *jd, u64 blkno, int op)
> {
> struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
> struct super_block *sb = sdp->sd_vfs;
> struct bio *bio;
>
> - BUG_ON(sdp->sd_log_bio);
> + BUG_ON((op == REQ_OP_READ ? jd->jd_rd_bio : sdp->sd_log_bio));
>
> bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES);
> bio->bi_iter.bi_sector = blkno * (sb->s_blocksize >> 9);
> bio_set_dev(bio, sb->s_bdev);
> - bio->bi_end_io = gfs2_end_log_write;
> - bio->bi_private = sdp;
> + bio->bi_end_io = op == REQ_OP_READ ? gfs2_end_log_read : gfs2_end_log_write;
> + bio->bi_private = op == REQ_OP_READ ? (void*)jd : (void*)sdp;
>
> - sdp->sd_log_bio = bio;
> + if (op == REQ_OP_READ)
> + jd->jd_rd_bio = bio;
> + else
> + sdp->sd_log_bio = bio;
>
> return bio;
> }
> @@ -285,6 +336,7 @@ static struct bio *gfs2_log_alloc_bio(struct gfs2_jdesc *jd, u64 blkno)
> * gfs2_log_get_bio - Get cached log bio, or allocate a new one
> * @jd: The journal descriptor
> * @blkno: The device block number we want to write to
> + * @op: REQ_OP
> *
> * If there is a cached bio, then if the next block number is sequential
> * with the previous one, return it, otherwise flush the bio to the
> @@ -294,10 +346,10 @@ static struct bio *gfs2_log_alloc_bio(struct gfs2_jdesc *jd, u64 blkno)
> * Returns: The bio to use for log writes
> */
>
> -static struct bio *gfs2_log_get_bio(struct gfs2_jdesc *jd, u64 blkno)
> +static struct bio *gfs2_log_get_bio(struct gfs2_jdesc *jd, u64 blkno, int op)
> {
> struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
> - struct bio *bio = sdp->sd_log_bio;
> + struct bio *bio = op == REQ_OP_READ ? jd->jd_rd_bio : sdp->sd_log_bio;
> u64 nblk;
>
> if (bio) {
> @@ -305,10 +357,12 @@ static struct bio *gfs2_log_get_bio(struct gfs2_jdesc *jd, u64 blkno)
> nblk >>= sdp->sd_fsb2bb_shift;
> if (blkno == nblk)
> return bio;
> - gfs2_log_flush_bio(&sdp->sd_log_bio, REQ_OP_WRITE, 0);
> + gfs2_log_flush_bio(op == REQ_OP_READ ? &jd->jd_rd_bio
> + : &sdp->sd_log_bio, REQ_OP_WRITE, 0);
Shouldn't it be "op" here instead of "REQ_OP_WRITE"?
> }
>
> - return gfs2_log_alloc_bio(sdp->sd_jdesc, blkno);
> + return gfs2_log_alloc_bio(op == REQ_OP_READ ? jd : sdp->sd_jdesc,
> + blkno, op);
> }
>
> /**
> @@ -330,11 +384,11 @@ void gfs2_log_write(struct gfs2_sbd *sdp, struct page *page,
> struct bio *bio;
> int ret;
>
> - bio = gfs2_log_get_bio(sdp->sd_jdesc, blkno);
> + bio = gfs2_log_get_bio(sdp->sd_jdesc, blkno, REQ_OP_WRITE);
> ret = bio_add_page(bio, page, size, offset);
> if (ret == 0) {
> gfs2_log_flush_bio(&sdp->sd_log_bio, REQ_OP_WRITE, 0);
> - bio = gfs2_log_alloc_bio(sdp->sd_jdesc, blkno);
> + bio = gfs2_log_alloc_bio(sdp->sd_jdesc, blkno, REQ_OP_WRITE);
> ret = bio_add_page(bio, page, size, offset);
> WARN_ON(ret == 0);
> }
> @@ -374,6 +428,48 @@ void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page)
> gfs2_log_bmap(sdp));
> }
>
> +static void gfs2_log_read_extent(struct gfs2_jdesc *jd, u64 dblock,
> + unsigned int blocks, int last)
> +{
> + struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
> + struct super_block *sb = sdp->sd_vfs;
> + struct page *page;
> + int i, ret;
> + struct bio *bio;
> +
> + for (i=0; i<blocks; i++) {
> + page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
> + /* flag the last page of the journal we plan to read in */
> + page_private(page) = (last && i == (blocks - 1));
> +
> + bio = gfs2_log_get_bio(jd, dblock + i, REQ_OP_READ);
> + ret = bio_add_page(bio, page, sb->s_blocksize, 0);
> + if (ret == 0) {
> + gfs2_log_flush_bio(&jd->jd_rd_bio, REQ_OP_READ, 0);
> + bio = gfs2_log_alloc_bio(jd, dblock + i, REQ_OP_READ);
> + ret = bio_add_page(bio, page, sb->s_blocksize, 0);
> + WARN_ON(ret == 0);
> + }
> + bio->bi_private = jd;
> + }
> +}
> +
> +void gfs2_log_read(struct gfs2_jdesc *jd)
> +{
> + struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
> + int last = 0;
> + struct gfs2_journal_extent *je;
> +
> + if (list_empty(&jd->extent_list))
> + gfs2_map_journal_extents(sdp, jd);
> +
> + list_for_each_entry(je, &jd->extent_list, list) {
> + last = list_is_last(&je->list, &jd->extent_list);
> + gfs2_log_read_extent(jd, je->dblock, je->blocks, last);
> + gfs2_log_flush_bio(&jd->jd_rd_bio, REQ_OP_READ, 0);
> + }
> +}
> +
> static struct page *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type,
> u32 ld_length, u32 ld_data1)
> {
> diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h
> index d709d99..23392c5d 100644
> --- a/fs/gfs2/lops.h
> +++ b/fs/gfs2/lops.h
> @@ -32,6 +32,7 @@ extern void gfs2_log_write(struct gfs2_sbd *sdp, struct page *page,
> extern void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page);
> extern void gfs2_log_flush_bio(struct bio **biop, int op, int op_flags);
> extern void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh);
> +extern void gfs2_log_read(struct gfs2_jdesc *jd);
>
> static inline unsigned int buf_limit(struct gfs2_sbd *sdp)
> {
> diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
> index c2469833b..dcc488b4 100644
> --- a/fs/gfs2/ops_fstype.c
> +++ b/fs/gfs2/ops_fstype.c
> @@ -578,6 +578,7 @@ static int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh)
> kfree(jd);
> break;
> }
> + jd->jd_rd_bio = NULL;
>
> spin_lock(&sdp->sd_jindex_spin);
> jd->jd_jid = sdp->sd_journals++;
> diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
> index 1b95294..e90abe6 100644
> --- a/fs/gfs2/recovery.c
> +++ b/fs/gfs2/recovery.c
> @@ -182,85 +182,11 @@ static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk,
> }
>
> /**
> - * find_good_lh - find a good log header
> - * @jd: the journal
> - * @blk: the segment to start searching from
> - * @lh: the log header to fill in
> - * @forward: if true search forward in the log, else search backward
> - *
> - * Call get_log_header() to get a log header for a segment, but if the
> - * segment is bad, either scan forward or backward until we find a good one.
> - *
> - * Returns: errno
> - */
> -
> -static int find_good_lh(struct gfs2_jdesc *jd, unsigned int *blk,
> - struct gfs2_log_header_host *head)
> -{
> - unsigned int orig_blk = *blk;
> - int error;
> -
> - for (;;) {
> - error = get_log_header(jd, *blk, head);
> - if (error <= 0)
> - return error;
> -
> - if (++*blk == jd->jd_blocks)
> - *blk = 0;
> -
> - if (*blk == orig_blk) {
> - gfs2_consist_inode(GFS2_I(jd->jd_inode));
> - return -EIO;
> - }
> - }
> -}
> -
> -/**
> - * jhead_scan - make sure we've found the head of the log
> - * @jd: the journal
> - * @head: this is filled in with the log descriptor of the head
> - *
> - * At this point, seg and lh should be either the head of the log or just
> - * before. Scan forward until we find the head.
> - *
> - * Returns: errno
> - */
> -
> -static int jhead_scan(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
> -{
> - unsigned int blk = head->lh_blkno;
> - struct gfs2_log_header_host lh;
> - int error;
> -
> - for (;;) {
> - if (++blk == jd->jd_blocks)
> - blk = 0;
> -
> - error = get_log_header(jd, blk, &lh);
> - if (error < 0)
> - return error;
> - if (error == 1)
> - continue;
> -
> - if (lh.lh_sequence == head->lh_sequence) {
> - gfs2_consist_inode(GFS2_I(jd->jd_inode));
> - return -EIO;
> - }
> - if (lh.lh_sequence < head->lh_sequence)
> - break;
> -
> - *head = lh;
> - }
> -
> - return 0;
> -}
> -
> -/**
> * gfs2_find_jhead - find the head of a log
> * @jd: the journal
> * @head: the log descriptor for the head of the log is returned here
> *
> - * Do a binary search of a journal and find the valid log entry with the
> + * Do a search of a journal and find the valid log entry with the
> * highest sequence number. (i.e. the log head)
> *
> * Returns: errno
> @@ -268,38 +194,19 @@ static int jhead_scan(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
>
> int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
> {
> - struct gfs2_log_header_host lh_1, lh_m;
> - u32 blk_1, blk_2, blk_m;
> - int error;
> -
> - blk_1 = 0;
> - blk_2 = jd->jd_blocks - 1;
> -
> - for (;;) {
> - blk_m = (blk_1 + blk_2) / 2;
> -
> - error = find_good_lh(jd, &blk_1, &lh_1);
> - if (error)
> - return error;
> -
> - error = find_good_lh(jd, &blk_m, &lh_m);
> - if (error)
> - return error;
> -
> - if (blk_1 == blk_m || blk_m == blk_2)
> - break;
> + int error = 0;
>
> - if (lh_1.lh_sequence <= lh_m.lh_sequence)
> - blk_1 = blk_m;
> - else
> - blk_2 = blk_m;
> - }
> + memset(&jd->jd_jhead, 0, sizeof(struct gfs2_log_header_host));
> + set_bit(JDF_JHEAD, &jd->jd_flags);
> + gfs2_log_read(jd);
>
> - error = jhead_scan(jd, &lh_1);
> - if (error)
> - return error;
> + if (test_bit(JDF_JHEAD, &jd->jd_flags))
> + wait_on_bit(&jd->jd_flags, JDF_JHEAD, TASK_INTERRUPTIBLE);
>
> - *head = lh_1;
> + if (jd->jd_jhead.lh_sequence == 0)
> + error = 1;
> + else
> + *head = jd->jd_jhead;
>
> return error;
> }
> --
> 2.4.11
>
Thanks,
Andreas
More information about the Cluster-devel
mailing list