[Cluster-devel] [RFC v2 PATCH 4/5] gfs2: read journal in large chunks to locate the head

Abhi Das adas at redhat.com
Mon Aug 13 04:48:48 UTC 2018


Use bio(s) to read in the journal sequentially in large chunks and
locate the head of the journal.
This is faster in most cases when compared to the existing bisect
method which operates one block at a time.

Signed-off-by: Abhi Das <adas at redhat.com>
---
 fs/gfs2/incore.h     |   8 +++-
 fs/gfs2/lops.c       | 121 +++++++++++++++++++++++++++++++++++++++++++++------
 fs/gfs2/lops.h       |  13 ++++++
 fs/gfs2/ops_fstype.c |   1 +
 fs/gfs2/recovery.c   | 118 +++++--------------------------------------------
 fs/gfs2/recovery.h   |   1 +
 6 files changed, 142 insertions(+), 120 deletions(-)

diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index f303616..31188c0 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -494,18 +494,24 @@ struct gfs2_journal_extent {
 	u64 blocks;
 };
 
+enum {
+	JDF_RECOVERY = 1,
+	JDF_JHEAD    = 2,
+};
+
 struct gfs2_jdesc {
 	struct list_head jd_list;
 	struct list_head extent_list;
 	struct work_struct jd_work;
 	struct inode *jd_inode;
 	unsigned long jd_flags;
-#define JDF_RECOVERY 1
 	unsigned int jd_jid;
 	unsigned int jd_blocks;
 	int jd_recover_error;
 	/* Replay stuff */
 
+	struct gfs2_log_header_host jd_jhead;
+	struct bio *jd_rd_bio; /* bio used for reading this journal */
 	unsigned int jd_found_blocks;
 	unsigned int jd_found_revokes;
 	unsigned int jd_replayed_blocks;
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 0284648..518b786 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -228,6 +228,53 @@ static void gfs2_end_log_write(struct bio *bio, int error)
 		wake_up(&sdp->sd_log_flush_wait);
 }
 
+static void gfs2_end_log_read(struct bio *bio, int error)
+{
+	struct gfs2_jdesc *jd = bio->bi_private;
+	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
+	struct page *page;
+	struct bio_vec *bvec;
+	int i, last;
+
+	if (error) {
+		sdp->sd_log_error = error;
+		fs_err(sdp, "Error %d reading from journal, jid=%u\n", error,
+		       jd->jd_jid);
+	}
+
+	bio_for_each_segment_all(bvec, bio, i) {
+		struct gfs2_log_header_host uninitialized_var(lh);
+		void *ptr;
+
+		page = bvec->bv_page;
+		ptr = page_address(page);
+		error = gfs2_log_header_in(&lh, ptr);
+		last = page_private(page);
+
+		if (!test_bit(JDF_JHEAD, &jd->jd_flags)) {
+			mempool_free(page, gfs2_page_pool);
+			continue;
+		}
+
+		if (!error && lh.lh_hash == compute_hash(ptr)) {
+			if (lh.lh_sequence > jd->jd_jhead.lh_sequence)
+				jd->jd_jhead = lh;
+			else
+				goto found;
+		}
+
+		if (last) {
+		found:
+			clear_bit(JDF_JHEAD, &jd->jd_flags);
+			smp_mb__after_clear_bit();
+			wake_up_bit(&jd->jd_flags, JDF_JHEAD);
+		}
+		mempool_free(page, gfs2_page_pool);
+	}
+
+	bio_put(bio);
+}
+
 /**
  * gfs2_log_flush_bio - Submit any pending log bio
  * @biop: Pointer to the bio we want to flush
@@ -241,8 +288,10 @@ void gfs2_log_flush_bio(struct bio **biop, int rw)
 {
 	struct bio *bio = *biop;
 	if (bio) {
-		struct gfs2_sbd *sdp = bio->bi_private;
-		atomic_inc(&sdp->sd_log_in_flight);
+		if (rw != READ) {
+			struct gfs2_sbd *sdp = bio->bi_private;
+			atomic_inc(&sdp->sd_log_in_flight);
+		}
 		submit_bio(rw, bio);
 		*biop = NULL;
 	}
@@ -261,14 +310,14 @@ void gfs2_log_flush_bio(struct bio **biop, int rw)
  * Returns: Newly allocated bio
  */
 
-static struct bio *gfs2_log_alloc_bio(struct gfs2_jdesc *jd, u64 blkno)
+static struct bio *gfs2_log_alloc_bio(struct gfs2_jdesc *jd, u64 blkno, int rw)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
 	struct super_block *sb = sdp->sd_vfs;
 	unsigned nrvecs = bio_get_nr_vecs(sb->s_bdev);
 	struct bio *bio;
 
-	BUG_ON(sdp->sd_log_bio);
+	BUG_ON((rw == READ ? jd->jd_rd_bio : sdp->sd_log_bio));
 
 	while (1) {
 		bio = bio_alloc(GFP_NOIO, nrvecs);
@@ -279,10 +328,13 @@ static struct bio *gfs2_log_alloc_bio(struct gfs2_jdesc *jd, u64 blkno)
 
 	bio->bi_sector = blkno * (sb->s_blocksize >> 9);
 	bio->bi_bdev = sb->s_bdev;
-	bio->bi_end_io = gfs2_end_log_write;
-	bio->bi_private = sdp;
+	bio->bi_end_io = rw == READ ? gfs2_end_log_read : gfs2_end_log_write;
+	bio->bi_private = rw == READ ? (void*)jd : (void*)sdp;
 
-	sdp->sd_log_bio = bio;
+	if (rw == READ)
+		jd->jd_rd_bio = bio;
+	else
+		sdp->sd_log_bio = bio;
 
 	return bio;
 }
@@ -300,10 +352,10 @@ static struct bio *gfs2_log_alloc_bio(struct gfs2_jdesc *jd, u64 blkno)
  * Returns: The bio to use for log writes
  */
 
-static struct bio *gfs2_log_get_bio(struct gfs2_jdesc *jd, u64 blkno)
+static struct bio *gfs2_log_get_bio(struct gfs2_jdesc *jd, u64 blkno, int rw)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
-	struct bio *bio = sdp->sd_log_bio;
+	struct bio *bio = rw == READ ? jd->jd_rd_bio : sdp->sd_log_bio;
 	u64 nblk;
 
 	if (bio) {
@@ -311,10 +363,11 @@ static struct bio *gfs2_log_get_bio(struct gfs2_jdesc *jd, u64 blkno)
 		nblk >>= sdp->sd_fsb2bb_shift;
 		if (blkno == nblk)
 			return bio;
-		gfs2_log_flush_bio(&sdp->sd_log_bio, WRITE);
+		gfs2_log_flush_bio(rw == READ ? &jd->jd_rd_bio
+				   : &sdp->sd_log_bio, rw);
 	}
 
-	return gfs2_log_alloc_bio(sdp->sd_jdesc, blkno);
+	return gfs2_log_alloc_bio(rw == READ ? jd : sdp->sd_jdesc, blkno, rw);
 }
 
 
@@ -337,11 +390,11 @@ static void gfs2_log_write(struct gfs2_sbd *sdp, struct page *page,
 	struct bio *bio;
 	int ret;
 
-	bio = gfs2_log_get_bio(sdp->sd_jdesc, blkno);
+	bio = gfs2_log_get_bio(sdp->sd_jdesc, blkno, WRITE);
 	ret = bio_add_page(bio, page, size, offset);
 	if (ret == 0) {
 		gfs2_log_flush_bio(&sdp->sd_log_bio, WRITE);
-		bio = gfs2_log_alloc_bio(sdp->sd_jdesc, blkno);
+		bio = gfs2_log_alloc_bio(sdp->sd_jdesc, blkno, WRITE);
 		ret = bio_add_page(bio, page, size, offset);
 		WARN_ON(ret == 0);
 	}
@@ -379,6 +432,48 @@ void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page)
 	gfs2_log_write(sdp, page, sb->s_blocksize, 0);
 }
 
+void gfs2_log_read_extent(struct gfs2_jdesc *jd, u64 dblock,
+			  unsigned int blocks, int last)
+{
+	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
+	struct super_block *sb = sdp->sd_vfs;
+	struct page *page;
+	int i, ret;
+	struct bio *bio;
+
+	for (i=0; i<blocks; i++) {
+		page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
+		/* flag the last page of the journal we plan to read in */
+		page_private(page) = (last && i == (blocks - 1));
+
+		bio = gfs2_log_get_bio(jd, dblock + i, READ);
+		ret = bio_add_page(bio, page, sb->s_blocksize, 0);
+		if (ret == 0) {
+			gfs2_log_flush_bio(&jd->jd_rd_bio, READ);
+			bio = gfs2_log_alloc_bio(jd, dblock + i, READ);
+			ret = bio_add_page(bio, page, sb->s_blocksize, 0);
+			WARN_ON(ret == 0);
+		}
+		bio->bi_private = jd;
+	}
+}
+
+void gfs2_log_read(struct gfs2_jdesc *jd)
+{
+	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
+	int last = 0;
+	struct gfs2_journal_extent *je;
+
+	if (list_empty(&jd->extent_list))
+		map_journal_extents(sdp, jd);
+
+	list_for_each_entry(je, &jd->extent_list, extent_list) {
+		last = list_is_last(&je->extent_list, &jd->extent_list);
+		gfs2_log_read_extent(jd, je->dblock, je->blocks, last);
+		gfs2_log_flush_bio(&jd->jd_rd_bio, READ);
+	}
+}
+
 static struct page *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type,
 				      u32 ld_length, u32 ld_data1)
 {
diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h
index 3044347..4d7841f 100644
--- a/fs/gfs2/lops.h
+++ b/fs/gfs2/lops.h
@@ -11,6 +11,7 @@
 #define __LOPS_DOT_H__
 
 #include <linux/list.h>
+#include <linux/crc32.h>
 #include "incore.h"
 
 #define BUF_OFFSET \
@@ -30,6 +31,7 @@ extern const struct gfs2_log_operations *gfs2_log_ops[];
 extern void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page);
 extern void gfs2_log_flush_bio(struct bio **biop, int rw);
 extern void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh);
+extern void gfs2_log_read(struct gfs2_jdesc *jd);
 
 static inline unsigned int buf_limit(struct gfs2_sbd *sdp)
 {
@@ -101,5 +103,16 @@ static inline void lops_after_scan(struct gfs2_jdesc *jd, int error,
 			gfs2_log_ops[x]->lo_after_scan(jd, error, pass);
 }
 
+static inline u32 compute_hash(const void *ptr)
+{
+	const u32 nothing = 0;
+	u32 hash;
+
+	hash = crc32_le((u32)~0, ptr, sizeof(struct gfs2_log_header) - sizeof(u32));
+	hash = crc32_le(hash, (unsigned char const *)&nothing, sizeof(nothing));
+	hash ^= (u32)~0;
+
+	return hash;
+}
 #endif /* __LOPS_DOT_H__ */
 
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index fd460c1..4a17eaf 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -642,6 +642,7 @@ static int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh)
 			kfree(jd);
 			break;
 		}
+		jd->jd_rd_bio = NULL;
 
 		spin_lock(&sdp->sd_jindex_spin);
 		jd->jd_jid = sdp->sd_journals++;
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index 4b042db..7a844c4 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -118,7 +118,7 @@ void gfs2_revoke_clean(struct gfs2_jdesc *jd)
 	}
 }
 
-static int gfs2_log_header_in(struct gfs2_log_header_host *lh, const void *buf)
+int gfs2_log_header_in(struct gfs2_log_header_host *lh, const void *buf)
 {
 	const struct gfs2_log_header *str = buf;
 
@@ -177,85 +177,11 @@ static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk,
 }
 
 /**
- * find_good_lh - find a good log header
- * @jd: the journal
- * @blk: the segment to start searching from
- * @lh: the log header to fill in
- * @forward: if true search forward in the log, else search backward
- *
- * Call get_log_header() to get a log header for a segment, but if the
- * segment is bad, either scan forward or backward until we find a good one.
- *
- * Returns: errno
- */
-
-static int find_good_lh(struct gfs2_jdesc *jd, unsigned int *blk,
-			struct gfs2_log_header_host *head)
-{
-	unsigned int orig_blk = *blk;
-	int error;
-
-	for (;;) {
-		error = get_log_header(jd, *blk, head);
-		if (error <= 0)
-			return error;
-
-		if (++*blk == jd->jd_blocks)
-			*blk = 0;
-
-		if (*blk == orig_blk) {
-			gfs2_consist_inode(GFS2_I(jd->jd_inode));
-			return -EIO;
-		}
-	}
-}
-
-/**
- * jhead_scan - make sure we've found the head of the log
- * @jd: the journal
- * @head: this is filled in with the log descriptor of the head
- *
- * At this point, seg and lh should be either the head of the log or just
- * before.  Scan forward until we find the head.
- *
- * Returns: errno
- */
-
-static int jhead_scan(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
-{
-	unsigned int blk = head->lh_blkno;
-	struct gfs2_log_header_host lh;
-	int error;
-
-	for (;;) {
-		if (++blk == jd->jd_blocks)
-			blk = 0;
-
-		error = get_log_header(jd, blk, &lh);
-		if (error < 0)
-			return error;
-		if (error == 1)
-			continue;
-
-		if (lh.lh_sequence == head->lh_sequence) {
-			gfs2_consist_inode(GFS2_I(jd->jd_inode));
-			return -EIO;
-		}
-		if (lh.lh_sequence < head->lh_sequence)
-			break;
-
-		*head = lh;
-	}
-
-	return 0;
-}
-
-/**
  * gfs2_find_jhead - find the head of a log
  * @jd: the journal
  * @head: the log descriptor for the head of the log is returned here
  *
- * Do a binary search of a journal and find the valid log entry with the
+ * Do a search of a journal and find the valid log entry with the
  * highest sequence number.  (i.e. the log head)
  *
  * Returns: errno
@@ -263,39 +189,19 @@ static int jhead_scan(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
 
 int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
 {
-	struct gfs2_log_header_host lh_1, lh_m;
-	u32 blk_1, blk_2, blk_m;
-	int error;
-
-	blk_1 = 0;
-	blk_2 = jd->jd_blocks - 1;
-
-	for (;;) {
-		blk_m = (blk_1 + blk_2) / 2;
-
-		error = find_good_lh(jd, &blk_1, &lh_1);
-		if (error)
-			return error;
-
-		error = find_good_lh(jd, &blk_m, &lh_m);
-		if (error)
-			return error;
-
-		if (blk_1 == blk_m || blk_m == blk_2)
-			break;
-
-		if (lh_1.lh_sequence <= lh_m.lh_sequence)
-			blk_1 = blk_m;
-		else
-			blk_2 = blk_m;
-	}
+	int error = 0;
 
-	error = jhead_scan(jd, &lh_1);
-	if (error)
-		return error;
+	memset(&jd->jd_jhead, 0, sizeof(struct gfs2_log_header_host));
+	set_bit(JDF_JHEAD, &jd->jd_flags);
+	gfs2_log_read(jd);
 
-	*head = lh_1;
+	if (test_bit(JDF_JHEAD, &jd->jd_flags))
+		wait_on_bit(&jd->jd_flags, JDF_JHEAD, TASK_INTERRUPTIBLE);
 
+	if (jd->jd_jhead.lh_sequence == 0)
+		error = 1;
+	else
+		*head = jd->jd_jhead;
 	return error;
 }
 
diff --git a/fs/gfs2/recovery.h b/fs/gfs2/recovery.h
index 11fdfab..cd691ff 100644
--- a/fs/gfs2/recovery.h
+++ b/fs/gfs2/recovery.h
@@ -29,6 +29,7 @@ extern void gfs2_revoke_clean(struct gfs2_jdesc *jd);
 
 extern int gfs2_find_jhead(struct gfs2_jdesc *jd,
 		    struct gfs2_log_header_host *head);
+extern int gfs2_log_header_in(struct gfs2_log_header_host *lh, const void *buf);
 extern int gfs2_recover_journal(struct gfs2_jdesc *gfs2_jd, bool wait);
 extern void gfs2_recover_func(struct work_struct *work);
 
-- 
2.4.11




More information about the Cluster-devel mailing list