[Cluster-devel] [GFS2 PATCH 4/4] gfs2: read journal in large chunks to locate the head

Andreas Gruenbacher agruenba at redhat.com
Fri Sep 7 12:14:29 UTC 2018


Abhi,

On 6 September 2018 at 19:02, Abhi Das <adas at redhat.com> wrote:
> Use bio(s) to read in the journal sequentially in large chunks and
> locate the head of the journal.
> This is faster in most cases when compared to the existing bisect
> method which operates one block at a time.
>
> Signed-off-by: Abhi Das <adas at redhat.com>
> ---
>  fs/gfs2/incore.h     |   8 +++-
>  fs/gfs2/lops.c       | 122 +++++++++++++++++++++++++++++++++++++++++++++------
>  fs/gfs2/lops.h       |   1 +
>  fs/gfs2/ops_fstype.c |   1 +
>  fs/gfs2/recovery.c   | 115 +++++-------------------------------------------
>  5 files changed, 129 insertions(+), 118 deletions(-)
>
> diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
> index b96d39c..b24c105 100644
> --- a/fs/gfs2/incore.h
> +++ b/fs/gfs2/incore.h
> @@ -529,6 +529,11 @@ struct gfs2_journal_extent {
>         u64 blocks;
>  };
>
> +enum {
> +       JDF_RECOVERY = 1,
> +       JDF_JHEAD    = 2,
> +};
> +
>  struct gfs2_jdesc {
>         struct list_head jd_list;
>         struct list_head extent_list;
> @@ -536,12 +541,13 @@ struct gfs2_jdesc {
>         struct work_struct jd_work;
>         struct inode *jd_inode;
>         unsigned long jd_flags;
> -#define JDF_RECOVERY 1
>         unsigned int jd_jid;
>         unsigned int jd_blocks;
>         int jd_recover_error;
>         /* Replay stuff */
>
> +       struct gfs2_log_header_host jd_jhead;
> +       struct bio *jd_rd_bio; /* bio used for reading this journal */
>         unsigned int jd_found_blocks;
>         unsigned int jd_found_revokes;
>         unsigned int jd_replayed_blocks;
> diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
> index 4cc19af..21979b2 100644
> --- a/fs/gfs2/lops.c
> +++ b/fs/gfs2/lops.c
> @@ -18,6 +18,7 @@
>  #include <linux/fs.h>
>  #include <linux/list_sort.h>
>
> +#include "bmap.h"
>  #include "dir.h"
>  #include "gfs2.h"
>  #include "incore.h"
> @@ -227,6 +228,50 @@ static void gfs2_end_log_write(struct bio *bio)
>                 wake_up(&sdp->sd_log_flush_wait);
>  }
>
> +static void gfs2_end_log_read(struct bio *bio)
> +{
> +       struct gfs2_jdesc *jd = bio->bi_private;
> +       struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
> +       struct page *page;
> +       struct bio_vec *bvec;
> +       int i, last;
> +
> +       if (bio->bi_status) {
> +               fs_err(sdp, "Error %d reading from journal, jid=%u\n",
> +                      bio->bi_status, jd->jd_jid);
> +       }
> +
> +       bio_for_each_segment_all(bvec, bio, i) {
> +               struct gfs2_log_header_host uninitialized_var(lh);
> +               void *ptr;
> +
> +               page = bvec->bv_page;
> +               ptr = page_address(page);
> +               last = page_private(page);
> +
> +               if (!test_bit(JDF_JHEAD, &jd->jd_flags)) {
> +                       mempool_free(page, gfs2_page_pool);
> +                       continue;
> +               }
> +
> +               if (!__get_log_header(sdp, ptr, 0, &lh)) {
> +                       if (lh.lh_sequence > jd->jd_jhead.lh_sequence)
> +                               jd->jd_jhead = lh;
> +                       else
> +                               goto found;
> +               }
> +
> +               if (last) {
> +               found:
> +                       clear_bit(JDF_JHEAD, &jd->jd_flags);
> +                       wake_up_bit(&jd->jd_flags, JDF_JHEAD);
> +               }
> +               mempool_free(page, gfs2_page_pool);
> +       }
> +
> +       bio_put(bio);
> +}
> +
>  /**
>   * gfs2_log_flush_bio - Submit any pending log bio
>   * @biop: Address of the bio pointer
> @@ -241,8 +286,10 @@ void gfs2_log_flush_bio(struct bio **biop, int op, int op_flags)
>  {
>         struct bio *bio = *biop;
>         if (bio) {
> -               struct gfs2_sbd *sdp = bio->bi_private;
> -               atomic_inc(&sdp->sd_log_in_flight);
> +               if (op != REQ_OP_READ) {
> +                       struct gfs2_sbd *sdp = bio->bi_private;
> +                       atomic_inc(&sdp->sd_log_in_flight);
> +               }
>                 bio_set_op_attrs(bio, op, op_flags);
>                 submit_bio(bio);
>                 *biop = NULL;
> @@ -253,6 +300,7 @@ void gfs2_log_flush_bio(struct bio **biop, int op, int op_flags)
>   * gfs2_log_alloc_bio - Allocate a new bio for log writing
>   * @jd: The journal descriptor
>   * @blkno: The next device block number we want to write to
> + * @op: REQ_OP
>   *
>   * This should never be called when there is a cached bio in the
>   * super block. When it returns, there will be a cached bio in the
> @@ -262,21 +310,24 @@ void gfs2_log_flush_bio(struct bio **biop, int op, int op_flags)
>   * Returns: Newly allocated bio
>   */
>
> -static struct bio *gfs2_log_alloc_bio(struct gfs2_jdesc *jd, u64 blkno)
> +static struct bio *gfs2_log_alloc_bio(struct gfs2_jdesc *jd, u64 blkno, int op)
>  {
>         struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
>         struct super_block *sb = sdp->sd_vfs;
>         struct bio *bio;
>
> -       BUG_ON(sdp->sd_log_bio);
> +       BUG_ON((op == REQ_OP_READ ? jd->jd_rd_bio : sdp->sd_log_bio));
>
>         bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES);
>         bio->bi_iter.bi_sector = blkno * (sb->s_blocksize >> 9);
>         bio_set_dev(bio, sb->s_bdev);
> -       bio->bi_end_io = gfs2_end_log_write;
> -       bio->bi_private = sdp;
> +       bio->bi_end_io = op == REQ_OP_READ ? gfs2_end_log_read : gfs2_end_log_write;
> +       bio->bi_private = op == REQ_OP_READ ? (void*)jd : (void*)sdp;
>
> -       sdp->sd_log_bio = bio;
> +       if (op == REQ_OP_READ)
> +               jd->jd_rd_bio = bio;
> +       else
> +               sdp->sd_log_bio = bio;
>
>         return bio;
>  }
> @@ -285,6 +336,7 @@ static struct bio *gfs2_log_alloc_bio(struct gfs2_jdesc *jd, u64 blkno)
>   * gfs2_log_get_bio - Get cached log bio, or allocate a new one
>   * @jd: The journal descriptor
>   * @blkno: The device block number we want to write to
> + * @op: REQ_OP
>   *
>   * If there is a cached bio, then if the next block number is sequential
>   * with the previous one, return it, otherwise flush the bio to the
> @@ -294,10 +346,10 @@ static struct bio *gfs2_log_alloc_bio(struct gfs2_jdesc *jd, u64 blkno)
>   * Returns: The bio to use for log writes
>   */
>
> -static struct bio *gfs2_log_get_bio(struct gfs2_jdesc *jd, u64 blkno)
> +static struct bio *gfs2_log_get_bio(struct gfs2_jdesc *jd, u64 blkno, int op)
>  {
>         struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
> -       struct bio *bio = sdp->sd_log_bio;
> +       struct bio *bio = op == REQ_OP_READ ? jd->jd_rd_bio : sdp->sd_log_bio;
>         u64 nblk;
>
>         if (bio) {
> @@ -305,10 +357,12 @@ static struct bio *gfs2_log_get_bio(struct gfs2_jdesc *jd, u64 blkno)
>                 nblk >>= sdp->sd_fsb2bb_shift;
>                 if (blkno == nblk)
>                         return bio;
> -               gfs2_log_flush_bio(&sdp->sd_log_bio, REQ_OP_WRITE, 0);
> +               gfs2_log_flush_bio(op == REQ_OP_READ ? &jd->jd_rd_bio
> +                                  : &sdp->sd_log_bio, REQ_OP_WRITE, 0);

Shouldn't it be "op" here instead of "REQ_OP_WRITE"?

>         }
>
> -       return gfs2_log_alloc_bio(sdp->sd_jdesc, blkno);
> +       return gfs2_log_alloc_bio(op == REQ_OP_READ ? jd : sdp->sd_jdesc,
> +                                 blkno, op);
>  }
>
>  /**
> @@ -330,11 +384,11 @@ void gfs2_log_write(struct gfs2_sbd *sdp, struct page *page,
>         struct bio *bio;
>         int ret;
>
> -       bio = gfs2_log_get_bio(sdp->sd_jdesc, blkno);
> +       bio = gfs2_log_get_bio(sdp->sd_jdesc, blkno, REQ_OP_WRITE);
>         ret = bio_add_page(bio, page, size, offset);
>         if (ret == 0) {
>                 gfs2_log_flush_bio(&sdp->sd_log_bio, REQ_OP_WRITE, 0);
> -               bio = gfs2_log_alloc_bio(sdp->sd_jdesc, blkno);
> +               bio = gfs2_log_alloc_bio(sdp->sd_jdesc, blkno, REQ_OP_WRITE);
>                 ret = bio_add_page(bio, page, size, offset);
>                 WARN_ON(ret == 0);
>         }
> @@ -374,6 +428,48 @@ void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page)
>                        gfs2_log_bmap(sdp));
>  }
>
> +static void gfs2_log_read_extent(struct gfs2_jdesc *jd, u64 dblock,
> +                         unsigned int blocks, int last)
> +{
> +       struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
> +       struct super_block *sb = sdp->sd_vfs;
> +       struct page *page;
> +       int i, ret;
> +       struct bio *bio;
> +
> +       for (i=0; i<blocks; i++) {
> +               page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
> +               /* flag the last page of the journal we plan to read in */
> +               page_private(page) = (last && i == (blocks - 1));
> +
> +               bio = gfs2_log_get_bio(jd, dblock + i, REQ_OP_READ);
> +               ret = bio_add_page(bio, page, sb->s_blocksize, 0);
> +               if (ret == 0) {
> +                       gfs2_log_flush_bio(&jd->jd_rd_bio, REQ_OP_READ, 0);
> +                       bio = gfs2_log_alloc_bio(jd, dblock + i, REQ_OP_READ);
> +                       ret = bio_add_page(bio, page, sb->s_blocksize, 0);
> +                       WARN_ON(ret == 0);
> +               }
> +               bio->bi_private = jd;
> +       }
> +}
> +
> +void gfs2_log_read(struct gfs2_jdesc *jd)
> +{
> +       struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
> +       int last = 0;
> +       struct gfs2_journal_extent *je;
> +
> +       if (list_empty(&jd->extent_list))
> +               gfs2_map_journal_extents(sdp, jd);
> +
> +       list_for_each_entry(je, &jd->extent_list, list) {
> +               last = list_is_last(&je->list, &jd->extent_list);
> +               gfs2_log_read_extent(jd, je->dblock, je->blocks, last);
> +               gfs2_log_flush_bio(&jd->jd_rd_bio, REQ_OP_READ, 0);
> +       }
> +}
> +
>  static struct page *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type,
>                                       u32 ld_length, u32 ld_data1)
>  {
> diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h
> index d709d99..23392c5d 100644
> --- a/fs/gfs2/lops.h
> +++ b/fs/gfs2/lops.h
> @@ -32,6 +32,7 @@ extern void gfs2_log_write(struct gfs2_sbd *sdp, struct page *page,
>  extern void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page);
>  extern void gfs2_log_flush_bio(struct bio **biop, int op, int op_flags);
>  extern void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh);
> +extern void gfs2_log_read(struct gfs2_jdesc *jd);
>
>  static inline unsigned int buf_limit(struct gfs2_sbd *sdp)
>  {
> diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
> index c2469833b..dcc488b4 100644
> --- a/fs/gfs2/ops_fstype.c
> +++ b/fs/gfs2/ops_fstype.c
> @@ -578,6 +578,7 @@ static int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh)
>                         kfree(jd);
>                         break;
>                 }
> +               jd->jd_rd_bio = NULL;
>
>                 spin_lock(&sdp->sd_jindex_spin);
>                 jd->jd_jid = sdp->sd_journals++;
> diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
> index 1b95294..e90abe6 100644
> --- a/fs/gfs2/recovery.c
> +++ b/fs/gfs2/recovery.c
> @@ -182,85 +182,11 @@ static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk,
>  }
>
>  /**
> - * find_good_lh - find a good log header
> - * @jd: the journal
> - * @blk: the segment to start searching from
> - * @lh: the log header to fill in
> - * @forward: if true search forward in the log, else search backward
> - *
> - * Call get_log_header() to get a log header for a segment, but if the
> - * segment is bad, either scan forward or backward until we find a good one.
> - *
> - * Returns: errno
> - */
> -
> -static int find_good_lh(struct gfs2_jdesc *jd, unsigned int *blk,
> -                       struct gfs2_log_header_host *head)
> -{
> -       unsigned int orig_blk = *blk;
> -       int error;
> -
> -       for (;;) {
> -               error = get_log_header(jd, *blk, head);
> -               if (error <= 0)
> -                       return error;
> -
> -               if (++*blk == jd->jd_blocks)
> -                       *blk = 0;
> -
> -               if (*blk == orig_blk) {
> -                       gfs2_consist_inode(GFS2_I(jd->jd_inode));
> -                       return -EIO;
> -               }
> -       }
> -}
> -
> -/**
> - * jhead_scan - make sure we've found the head of the log
> - * @jd: the journal
> - * @head: this is filled in with the log descriptor of the head
> - *
> - * At this point, seg and lh should be either the head of the log or just
> - * before.  Scan forward until we find the head.
> - *
> - * Returns: errno
> - */
> -
> -static int jhead_scan(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
> -{
> -       unsigned int blk = head->lh_blkno;
> -       struct gfs2_log_header_host lh;
> -       int error;
> -
> -       for (;;) {
> -               if (++blk == jd->jd_blocks)
> -                       blk = 0;
> -
> -               error = get_log_header(jd, blk, &lh);
> -               if (error < 0)
> -                       return error;
> -               if (error == 1)
> -                       continue;
> -
> -               if (lh.lh_sequence == head->lh_sequence) {
> -                       gfs2_consist_inode(GFS2_I(jd->jd_inode));
> -                       return -EIO;
> -               }
> -               if (lh.lh_sequence < head->lh_sequence)
> -                       break;
> -
> -               *head = lh;
> -       }
> -
> -       return 0;
> -}
> -
> -/**
>   * gfs2_find_jhead - find the head of a log
>   * @jd: the journal
>   * @head: the log descriptor for the head of the log is returned here
>   *
> - * Do a binary search of a journal and find the valid log entry with the
> + * Do a search of a journal and find the valid log entry with the
>   * highest sequence number.  (i.e. the log head)
>   *
>   * Returns: errno
> @@ -268,38 +194,19 @@ static int jhead_scan(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
>
>  int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
>  {
> -       struct gfs2_log_header_host lh_1, lh_m;
> -       u32 blk_1, blk_2, blk_m;
> -       int error;
> -
> -       blk_1 = 0;
> -       blk_2 = jd->jd_blocks - 1;
> -
> -       for (;;) {
> -               blk_m = (blk_1 + blk_2) / 2;
> -
> -               error = find_good_lh(jd, &blk_1, &lh_1);
> -               if (error)
> -                       return error;
> -
> -               error = find_good_lh(jd, &blk_m, &lh_m);
> -               if (error)
> -                       return error;
> -
> -               if (blk_1 == blk_m || blk_m == blk_2)
> -                       break;
> +       int error = 0;
>
> -               if (lh_1.lh_sequence <= lh_m.lh_sequence)
> -                       blk_1 = blk_m;
> -               else
> -                       blk_2 = blk_m;
> -       }
> +       memset(&jd->jd_jhead, 0, sizeof(struct gfs2_log_header_host));
> +       set_bit(JDF_JHEAD, &jd->jd_flags);
> +       gfs2_log_read(jd);
>
> -       error = jhead_scan(jd, &lh_1);
> -       if (error)
> -               return error;
> +       if (test_bit(JDF_JHEAD, &jd->jd_flags))
> +               wait_on_bit(&jd->jd_flags, JDF_JHEAD, TASK_INTERRUPTIBLE);
>
> -       *head = lh_1;
> +       if (jd->jd_jhead.lh_sequence == 0)
> +               error = 1;
> +       else
> +               *head = jd->jd_jhead;
>
>         return error;
>  }
> --
> 2.4.11
>

Thanks,
Andreas




More information about the Cluster-devel mailing list