[Cluster-devel] [PATCH] gfs2: read journal in large chunks to locate the head

Andreas Gruenbacher agruenba at redhat.com
Fri May 3 11:49:50 UTC 2019


On Thu, 2 May 2019 at 21:17, Abhi Das <adas at redhat.com> wrote:
> Use bio(s) to read in the journal sequentially in large chunks and
> locate the head of the journal.

I've tweaked the commit message a bit before pushing this into for-next.

Thanks,
Andreas

> Signed-off-by: Abhi Das <adas at redhat.com>
> Signed-off-by: Andreas Gruenbacher <agruenba at redhat.com>
> ---
>  fs/gfs2/glops.c      |   3 +-
>  fs/gfs2/log.c        |   4 +-
>  fs/gfs2/lops.c       | 212 +++++++++++++++++++++++++++++++++++++++++--
>  fs/gfs2/lops.h       |   4 +-
>  fs/gfs2/ops_fstype.c |   3 +-
>  fs/gfs2/recovery.c   | 125 +------------------------
>  fs/gfs2/recovery.h   |   2 -
>  fs/gfs2/super.c      |   5 +-
>  8 files changed, 219 insertions(+), 139 deletions(-)
>
> diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
> index 78510ab91835..24ada3ccc525 100644
> --- a/fs/gfs2/glops.c
> +++ b/fs/gfs2/glops.c
> @@ -28,6 +28,7 @@
>  #include "util.h"
>  #include "trans.h"
>  #include "dir.h"
> +#include "lops.h"
>
>  struct workqueue_struct *gfs2_freeze_wq;
>
> @@ -531,7 +532,7 @@ static int freeze_go_xmote_bh(struct gfs2_glock *gl, struct gfs2_holder *gh)
>         if (test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
>                 j_gl->gl_ops->go_inval(j_gl, DIO_METADATA);
>
> -               error = gfs2_find_jhead(sdp->sd_jdesc, &head);
> +               error = gfs2_find_jhead(sdp->sd_jdesc, &head, false);
>                 if (error)
>                         gfs2_consist(sdp);
>                 if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT))
> diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
> index a7febb4bd400..a2e1df488df0 100644
> --- a/fs/gfs2/log.c
> +++ b/fs/gfs2/log.c
> @@ -744,7 +744,7 @@ void gfs2_write_log_header(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
>         lh->lh_crc = cpu_to_be32(crc);
>
>         gfs2_log_write(sdp, page, sb->s_blocksize, 0, dblock);
> -       gfs2_log_submit_bio(&sdp->sd_log_bio, REQ_OP_WRITE, op_flags);
> +       gfs2_log_submit_bio(&sdp->sd_log_bio, REQ_OP_WRITE | op_flags);
>         log_flush_wait(sdp);
>  }
>
> @@ -821,7 +821,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
>
>         gfs2_ordered_write(sdp);
>         lops_before_commit(sdp, tr);
> -       gfs2_log_submit_bio(&sdp->sd_log_bio, REQ_OP_WRITE, 0);
> +       gfs2_log_submit_bio(&sdp->sd_log_bio, REQ_OP_WRITE);
>
>         if (sdp->sd_log_head != sdp->sd_log_flush_head) {
>                 log_flush_wait(sdp);
> diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
> index 6af6a3cea967..ce048a9e058d 100644
> --- a/fs/gfs2/lops.c
> +++ b/fs/gfs2/lops.c
> @@ -17,7 +17,9 @@
>  #include <linux/bio.h>
>  #include <linux/fs.h>
>  #include <linux/list_sort.h>
> +#include <linux/blkdev.h>
>
> +#include "bmap.h"
>  #include "dir.h"
>  #include "gfs2.h"
>  #include "incore.h"
> @@ -194,7 +196,6 @@ static void gfs2_end_log_write_bh(struct gfs2_sbd *sdp,
>  /**
>   * gfs2_end_log_write - end of i/o to the log
>   * @bio: The bio
> - * @error: Status of i/o request
>   *
>   * Each bio_vec contains either data from the pagecache or data
>   * relating to the log itself. Here we iterate over the bio_vec
> @@ -232,20 +233,19 @@ static void gfs2_end_log_write(struct bio *bio)
>  /**
>   * gfs2_log_submit_bio - Submit any pending log bio
>   * @biop: Address of the bio pointer
> - * @op: REQ_OP
> - * @op_flags: req_flag_bits
> + * @opf: REQ_OP | op_flags
>   *
>   * Submit any pending part-built or full bio to the block device. If
>   * there is no pending bio, then this is a no-op.
>   */
>
> -void gfs2_log_submit_bio(struct bio **biop, int op, int op_flags)
> +void gfs2_log_submit_bio(struct bio **biop, int opf)
>  {
>         struct bio *bio = *biop;
>         if (bio) {
>                 struct gfs2_sbd *sdp = bio->bi_private;
>                 atomic_inc(&sdp->sd_log_in_flight);
> -               bio_set_op_attrs(bio, op, op_flags);
> +               bio->bi_opf = opf;
>                 submit_bio(bio);
>                 *biop = NULL;
>         }
> @@ -306,7 +306,7 @@ static struct bio *gfs2_log_get_bio(struct gfs2_sbd *sdp, u64 blkno,
>                 nblk >>= sdp->sd_fsb2bb_shift;
>                 if (blkno == nblk && !flush)
>                         return bio;
> -               gfs2_log_submit_bio(biop, op, 0);
> +               gfs2_log_submit_bio(biop, op);
>         }
>
>         *biop = gfs2_log_alloc_bio(sdp, blkno, end_io);
> @@ -377,6 +377,206 @@ void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page)
>                        gfs2_log_bmap(sdp));
>  }
>
> +/**
> + * gfs2_end_log_read - end I/O callback for reads from the log
> + * @bio: The bio
> + *
> + * Simply unlock the pages in the bio. The main thread will wait on them and
> + * process them in order as necessary.
> + */
> +
> +static void gfs2_end_log_read(struct bio *bio)
> +{
> +       struct page *page;
> +       struct bio_vec *bvec;
> +       int i;
> +       struct bvec_iter_all iter_all;
> +
> +       bio_for_each_segment_all(bvec, bio, i, iter_all) {
> +               page = bvec->bv_page;
> +               if (bio->bi_status) {
> +                       int err = blk_status_to_errno(bio->bi_status);
> +
> +                       SetPageError(page);
> +                       mapping_set_error(page->mapping, err);
> +               }
> +               unlock_page(page);
> +       }
> +
> +       bio_put(bio);
> +}
> +
> +/**
> + * gfs2_jhead_pg_srch - Look for the journal head in a given page.
> + * @jd: The journal descriptor
> + * @page: The page to look in
> + *
> + * Returns: 1 if found, 0 otherwise.
> + */
> +
> +static bool gfs2_jhead_pg_srch(struct gfs2_jdesc *jd,
> +                             struct gfs2_log_header_host *head,
> +                             struct page *page)
> +{
> +       struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
> +       struct gfs2_log_header_host uninitialized_var(lh);
> +       void *kaddr = kmap_atomic(page);
> +       unsigned int offset;
> +       bool ret = false;
> +
> +       for (offset = 0; offset < PAGE_SIZE; offset += sdp->sd_sb.sb_bsize) {
> +               if (!__get_log_header(sdp, kaddr + offset, 0, &lh)) {
> +                       if (lh.lh_sequence > head->lh_sequence)
> +                               *head = lh;
> +                       else {
> +                               ret = true;
> +                               break;
> +                       }
> +               }
> +       }
> +       kunmap_atomic(kaddr);
> +       return ret;
> +}
> +
> +/**
> + * gfs2_jhead_process_page - Search/cleanup a page
> + * @jd: The journal descriptor
> + * @index: Index of the page to look into
> + * @done: If set, perform only cleanup, else search and set if found.
> + *
> + * Find the page with 'index' in the journal's mapping. Search the page for
> + * the journal head if requested (cleanup == false). Release refs on the
> + * page so the page cache can reclaim it (put_page() twice). We grabbed a
> + * reference on this page two times, first when we did a find_or_create_page()
> + * to obtain the page to add it to the bio and second when we do a
> + * find_get_page() here to get the page to wait on while I/O on it is being
> + * completed.
> + * This function is also used to free up a page we might've grabbed but not
> + * used. Maybe we added it to a bio, but not submitted it for I/O. Or we
> + * submitted the I/O, but we already found the jhead so we only need to drop
> + * our references to the page.
> + */
> +
> +static void gfs2_jhead_process_page(struct gfs2_jdesc *jd, unsigned long index,
> +                                   struct gfs2_log_header_host *head,
> +                                   bool *done)
> +{
> +       struct page *page;
> +
> +       page = find_get_page(jd->jd_inode->i_mapping, index);
> +       wait_on_page_locked(page);
> +
> +       if (PageError(page))
> +               *done = true;
> +
> +       if (!*done)
> +               *done = gfs2_jhead_pg_srch(jd, head, page);
> +
> +       put_page(page); /* Once for find_get_page */
> +       put_page(page); /* Once more for find_or_create_page */
> +}
> +
> +/**
> + * gfs2_find_jhead - find the head of a log
> + * @jd: The journal descriptor
> + * @head: The log descriptor for the head of the log is returned here
> + *
> + * Do a search of a journal by reading it in large chunks using bios and find
> + * the valid log entry with the highest sequence number.  (i.e. the log head)
> + *
> + * Returns: 0 on success, errno otherwise
> + */
> +int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head,
> +                   bool keep_cache)
> +{
> +       struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
> +       struct address_space *mapping = jd->jd_inode->i_mapping;
> +       unsigned int block = 0, blocks_submitted = 0, blocks_read = 0;
> +       unsigned int bsize = sdp->sd_sb.sb_bsize;
> +       unsigned int bsize_shift = sdp->sd_sb.sb_bsize_shift;
> +       unsigned int shift = PAGE_SHIFT - bsize_shift;
> +       unsigned int readhead_blocks = BIO_MAX_PAGES << shift;
> +       struct gfs2_journal_extent *je;
> +       int sz, ret = 0;
> +       struct bio *bio = NULL;
> +       struct page *page = NULL;
> +       bool done = false;
> +       errseq_t since;
> +
> +       memset(head, 0, sizeof(*head));
> +       if (list_empty(&jd->extent_list))
> +               gfs2_map_journal_extents(sdp, jd);
> +
> +       since = filemap_sample_wb_err(mapping);
> +       list_for_each_entry(je, &jd->extent_list, list) {
> +               for (; block < je->lblock + je->blocks; block++) {
> +                       u64 dblock;
> +
> +                       if (!page) {
> +                               page = find_or_create_page(mapping,
> +                                               block >> shift, GFP_NOFS);
> +                               if (!page) {
> +                                       ret = -ENOMEM;
> +                                       done = true;
> +                                       goto out;
> +                               }
> +                       }
> +
> +                       if (bio) {
> +                               unsigned int off;
> +
> +                               off = (block << bsize_shift) & ~PAGE_MASK;
> +                               sz = bio_add_page(bio, page, bsize, off);
> +                               if (sz == bsize) { /* block added */
> +                                       if (off + bsize == PAGE_SIZE) {
> +                                               page = NULL;
> +                                               goto page_added;
> +                                       }
> +                                       continue;
> +                               }
> +                               blocks_submitted = block + 1;
> +                               submit_bio(bio);
> +                               bio = NULL;
> +                       }
> +
> +                       dblock = je->dblock + (block - je->lblock);
> +                       bio = gfs2_log_alloc_bio(sdp, dblock, gfs2_end_log_read);
> +                       bio->bi_opf = REQ_OP_READ;
> +                       sz = bio_add_page(bio, page, bsize, 0);
> +                       gfs2_assert_warn(sdp, sz == bsize);
> +                       if (bsize == PAGE_SIZE)
> +                               page = NULL;
> +
> +page_added:
> +                       if (blocks_submitted < blocks_read + readhead_blocks) {
> +                               /* Keep at least one bio in flight */
> +                               continue;
> +                       }
> +
> +                       gfs2_jhead_process_page(jd, blocks_read >> shift, head, &done);
> +                       blocks_read += PAGE_SIZE >> bsize_shift;
> +                       if (done)
> +                               goto out;  /* found */
> +               }
> +       }
> +
> +out:
> +       if (bio)
> +               submit_bio(bio);
> +       while (blocks_read < block) {
> +               gfs2_jhead_process_page(jd, blocks_read >> shift, head, &done);
> +               blocks_read += PAGE_SIZE >> bsize_shift;
> +       }
> +
> +       if (!ret)
> +               ret = filemap_check_wb_err(mapping, since);
> +
> +       if (!keep_cache)
> +               truncate_inode_pages(mapping, 0);
> +
> +       return ret;
> +}
> +
>  static struct page *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type,
>                                       u32 ld_length, u32 ld_data1)
>  {
> diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h
> index 320fbf28d2fb..f195ffb435ac 100644
> --- a/fs/gfs2/lops.h
> +++ b/fs/gfs2/lops.h
> @@ -25,8 +25,10 @@ extern u64 gfs2_log_bmap(struct gfs2_sbd *sdp);
>  extern void gfs2_log_write(struct gfs2_sbd *sdp, struct page *page,
>                            unsigned size, unsigned offset, u64 blkno);
>  extern void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page);
> -extern void gfs2_log_submit_bio(struct bio **biop, int op, int op_flags);
> +extern void gfs2_log_submit_bio(struct bio **biop, int opf);
>  extern void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh);
> +extern int gfs2_find_jhead(struct gfs2_jdesc *jd,
> +                          struct gfs2_log_header_host *head, bool keep_cache);
>
>  static inline unsigned int buf_limit(struct gfs2_sbd *sdp)
>  {
> diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
> index abfaecde0e3d..46f6615eaf12 100644
> --- a/fs/gfs2/ops_fstype.c
> +++ b/fs/gfs2/ops_fstype.c
> @@ -41,6 +41,7 @@
>  #include "dir.h"
>  #include "meta_io.h"
>  #include "trace_gfs2.h"
> +#include "lops.h"
>
>  #define DO 0
>  #define UNDO 1
> @@ -616,7 +617,7 @@ static int check_journal_clean(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd)
>                 fs_err(sdp, "Error checking journal for spectator mount.\n");
>                 goto out_unlock;
>         }
> -       error = gfs2_find_jhead(jd, &head);
> +       error = gfs2_find_jhead(jd, &head, false);
>         if (error) {
>                 fs_err(sdp, "Error parsing journal for spectator mount.\n");
>                 goto out_unlock;
> diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
> index fa575d1676b9..389b3ef77e20 100644
> --- a/fs/gfs2/recovery.c
> +++ b/fs/gfs2/recovery.c
> @@ -181,129 +181,6 @@ static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk,
>         return error;
>  }
>
> -/**
> - * find_good_lh - find a good log header
> - * @jd: the journal
> - * @blk: the segment to start searching from
> - * @lh: the log header to fill in
> - * @forward: if true search forward in the log, else search backward
> - *
> - * Call get_log_header() to get a log header for a segment, but if the
> - * segment is bad, either scan forward or backward until we find a good one.
> - *
> - * Returns: errno
> - */
> -
> -static int find_good_lh(struct gfs2_jdesc *jd, unsigned int *blk,
> -                       struct gfs2_log_header_host *head)
> -{
> -       unsigned int orig_blk = *blk;
> -       int error;
> -
> -       for (;;) {
> -               error = get_log_header(jd, *blk, head);
> -               if (error <= 0)
> -                       return error;
> -
> -               if (++*blk == jd->jd_blocks)
> -                       *blk = 0;
> -
> -               if (*blk == orig_blk) {
> -                       gfs2_consist_inode(GFS2_I(jd->jd_inode));
> -                       return -EIO;
> -               }
> -       }
> -}
> -
> -/**
> - * jhead_scan - make sure we've found the head of the log
> - * @jd: the journal
> - * @head: this is filled in with the log descriptor of the head
> - *
> - * At this point, seg and lh should be either the head of the log or just
> - * before.  Scan forward until we find the head.
> - *
> - * Returns: errno
> - */
> -
> -static int jhead_scan(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
> -{
> -       unsigned int blk = head->lh_blkno;
> -       struct gfs2_log_header_host lh;
> -       int error;
> -
> -       for (;;) {
> -               if (++blk == jd->jd_blocks)
> -                       blk = 0;
> -
> -               error = get_log_header(jd, blk, &lh);
> -               if (error < 0)
> -                       return error;
> -               if (error == 1)
> -                       continue;
> -
> -               if (lh.lh_sequence == head->lh_sequence) {
> -                       gfs2_consist_inode(GFS2_I(jd->jd_inode));
> -                       return -EIO;
> -               }
> -               if (lh.lh_sequence < head->lh_sequence)
> -                       break;
> -
> -               *head = lh;
> -       }
> -
> -       return 0;
> -}
> -
> -/**
> - * gfs2_find_jhead - find the head of a log
> - * @jd: the journal
> - * @head: the log descriptor for the head of the log is returned here
> - *
> - * Do a binary search of a journal and find the valid log entry with the
> - * highest sequence number.  (i.e. the log head)
> - *
> - * Returns: errno
> - */
> -
> -int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
> -{
> -       struct gfs2_log_header_host lh_1, lh_m;
> -       u32 blk_1, blk_2, blk_m;
> -       int error;
> -
> -       blk_1 = 0;
> -       blk_2 = jd->jd_blocks - 1;
> -
> -       for (;;) {
> -               blk_m = (blk_1 + blk_2) / 2;
> -
> -               error = find_good_lh(jd, &blk_1, &lh_1);
> -               if (error)
> -                       return error;
> -
> -               error = find_good_lh(jd, &blk_m, &lh_m);
> -               if (error)
> -                       return error;
> -
> -               if (blk_1 == blk_m || blk_m == blk_2)
> -                       break;
> -
> -               if (lh_1.lh_sequence <= lh_m.lh_sequence)
> -                       blk_1 = blk_m;
> -               else
> -                       blk_2 = blk_m;
> -       }
> -
> -       error = jhead_scan(jd, &lh_1);
> -       if (error)
> -               return error;
> -
> -       *head = lh_1;
> -
> -       return error;
> -}
> -
>  /**
>   * foreach_descriptor - go through the active part of the log
>   * @jd: the journal
> @@ -469,7 +346,7 @@ void gfs2_recover_func(struct work_struct *work)
>         if (error)
>                 goto fail_gunlock_ji;
>
> -       error = gfs2_find_jhead(jd, &head);
> +       error = gfs2_find_jhead(jd, &head, true);
>         if (error)
>                 goto fail_gunlock_ji;
>         t_jhd = ktime_get();
> diff --git a/fs/gfs2/recovery.h b/fs/gfs2/recovery.h
> index 5932d4b6f43e..1831a1974c8c 100644
> --- a/fs/gfs2/recovery.h
> +++ b/fs/gfs2/recovery.h
> @@ -27,8 +27,6 @@ extern int gfs2_revoke_add(struct gfs2_jdesc *jd, u64 blkno, unsigned int where)
>  extern int gfs2_revoke_check(struct gfs2_jdesc *jd, u64 blkno, unsigned int where);
>  extern void gfs2_revoke_clean(struct gfs2_jdesc *jd);
>
> -extern int gfs2_find_jhead(struct gfs2_jdesc *jd,
> -                   struct gfs2_log_header_host *head);
>  extern int gfs2_recover_journal(struct gfs2_jdesc *gfs2_jd, bool wait);
>  extern void gfs2_recover_func(struct work_struct *work);
>  extern int __get_log_header(struct gfs2_sbd *sdp,
> diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
> index ceec631efa49..43e7c2c87014 100644
> --- a/fs/gfs2/super.c
> +++ b/fs/gfs2/super.c
> @@ -45,6 +45,7 @@
>  #include "util.h"
>  #include "sys.h"
>  #include "xattr.h"
> +#include "lops.h"
>
>  #define args_neq(a1, a2, x) ((a1)->ar_##x != (a2)->ar_##x)
>
> @@ -425,7 +426,7 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp)
>
>         j_gl->gl_ops->go_inval(j_gl, DIO_METADATA);
>
> -       error = gfs2_find_jhead(sdp->sd_jdesc, &head);
> +       error = gfs2_find_jhead(sdp->sd_jdesc, &head, false);
>         if (error)
>                 goto fail;
>
> @@ -680,7 +681,7 @@ static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp,
>                 error = gfs2_jdesc_check(jd);
>                 if (error)
>                         break;
> -               error = gfs2_find_jhead(jd, &lh);
> +               error = gfs2_find_jhead(jd, &lh, false);
>                 if (error)
>                         break;
>                 if (!(lh.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
> --
> 2.20.1
>




More information about the Cluster-devel mailing list