[Cluster-devel] libgfs2: Add readahead for rgrp headers

Steven Whitehouse swhiteho at redhat.com
Mon Feb 18 10:27:00 UTC 2013


This adds readahead to rgrp headers, greatly improving the speed with
which they can be read in during fsck. Also, the multiple reads which
were used before are replaced with a single read per resource group.

This is an example of the kinds of speed up which may well be possible
elsewhere in the code. I started with this example simply because it was
the easiest one to do.

An alternative implementation might O_DIRECT and aio, but I'm not sure
that there would be much benefit compared with this method. A further
thought would be to use drop behind in places where we know that we will
not be looking at the data again.

Taking timings for just the rgrp reading section of fsck, I see almost a
10x speed up for that section of code using this patch on a 500G
filesystem.

Signed-off-by: Steven Whitehouse <swhiteho at redhat.com>

diff --git a/gfs2/libgfs2/buf.c b/gfs2/libgfs2/buf.c
index 5bc1a4e..68f0731 100644
--- a/gfs2/libgfs2/buf.c
+++ b/gfs2/libgfs2/buf.c
@@ -7,6 +7,7 @@
 #include <inttypes.h>
 #include <sys/types.h>
 #include <sys/stat.h>
+#include <sys/time.h>
 #include <fcntl.h>
 #include <unistd.h>
 #include <errno.h>
@@ -30,39 +31,54 @@ struct gfs2_buffer_head *bget(struct gfs2_sbd *sdp, uint64_t num)
 	return bh;
 }
 
-struct gfs2_buffer_head *__bread(struct gfs2_sbd *sdp, uint64_t num, int line,
-				 const char *caller)
+int __breadm(struct gfs2_sbd *sdp, struct gfs2_buffer_head **bhs, size_t n,
+	     uint64_t block, int line, const char *caller)
 {
-	struct gfs2_buffer_head *bh = bget(sdp, num);
-	if (bh == NULL)
-		return bh;
-	if (lseek(sdp->device_fd, num * sdp->bsize, SEEK_SET) !=
-	    num * sdp->bsize) {
-		fprintf(stderr, "bad seek: %s from %s:%d: block "
-			"%llu (0x%llx)\n", strerror(errno),
-			caller, line, (unsigned long long)num,
-			(unsigned long long)num);
-		exit(-1);
+	struct iovec *iov = alloca(n * sizeof(struct iovec));
+	struct iovec *iovbase = iov;
+	uint64_t b = block;
+	size_t size = 0;
+	size_t i;
+	int ret;
+
+	for (i = 0; i < n; i++) {
+		bhs[i] = bget(sdp, b++);
+		if (bhs[i] == NULL)
+			return -1;
+		*iov++ = bhs[i]->iov;
+		size += bhs[i]->iov.iov_len;
 	}
-	if (readv(sdp->device_fd, &bh->iov, 1) < 0) {
+
+	ret = preadv(sdp->device_fd, iovbase, n, block * sdp->bsize);
+
+	if (ret != size) {
 		fprintf(stderr, "bad read: %s from %s:%d: block "
-			"%llu (0x%llx)\n", strerror(errno),
-			caller, line, (unsigned long long)num,
-			(unsigned long long)num);
+				"%llu (0x%llx)\n", strerror(errno),
+				caller, line, (unsigned long long)block,
+				(unsigned long long)block);
 		exit(-1);
 	}
-	return bh;
+
+	return 0;
+}
+
+struct gfs2_buffer_head *__bread(struct gfs2_sbd *sdp, uint64_t num, int line,
+				 const char *caller)
+{
+	struct gfs2_buffer_head *bh;
+	int ret;
+
+	ret = __breadm(sdp, &bh, 1, num, line, caller);
+	if (ret >= 0)
+		return bh;
+	return NULL;
 }
 
 int bwrite(struct gfs2_buffer_head *bh)
 {
 	struct gfs2_sbd *sdp = bh->sdp;
 
-	if (lseek(sdp->device_fd, bh->b_blocknr * sdp->bsize, SEEK_SET) !=
-	    bh->b_blocknr * sdp->bsize) {
-		return -1;
-	}
-	if (writev(sdp->device_fd, &bh->iov, 1) != bh->iov.iov_len)
+	if (pwritev(sdp->device_fd, &bh->iov, 1, bh->b_blocknr * sdp->bsize) != bh->iov.iov_len)
 		return -1;
 	sdp->writes++;
 	bh->b_modified = 0;
diff --git a/gfs2/libgfs2/libgfs2.h b/gfs2/libgfs2/libgfs2.h
index 2b109fb..46d4d67 100644
--- a/gfs2/libgfs2/libgfs2.h
+++ b/gfs2/libgfs2/libgfs2.h
@@ -382,6 +382,7 @@ extern void gfs2_special_clear(struct special_blocks *blocklist,
 extern struct gfs2_buffer_head *bget(struct gfs2_sbd *sdp, uint64_t num);
 extern struct gfs2_buffer_head *__bread(struct gfs2_sbd *sdp, uint64_t num,
 					int line, const char *caller);
+extern int __breadm(struct gfs2_sbd *sdp, struct gfs2_buffer_head **bhs, size_t n, uint64_t block, int line, const char *caller);
 extern int bwrite(struct gfs2_buffer_head *bh);
 extern int brelse(struct gfs2_buffer_head *bh);
 extern uint32_t lgfs2_get_block_type(const struct gfs2_buffer_head *lbh);
@@ -389,6 +390,7 @@ extern uint32_t lgfs2_get_block_type(const struct gfs2_buffer_head *lbh);
 #define bmodified(bh) do { bh->b_modified = 1; } while(0)
 
 #define bread(bl, num) __bread(bl, num, __LINE__, __FUNCTION__)
+#define breadm(bl, bhs, n, block) __breadm(bl, bhs, n, block, __LINE__, __FUNCTION__)
 
 /* device_geometry.c */
 extern int lgfs2_get_dev_info(int fd, struct lgfs2_dev_info *i);
diff --git a/gfs2/libgfs2/rgrp.c b/gfs2/libgfs2/rgrp.c
index cbab2a3..f7dc01e 100644
--- a/gfs2/libgfs2/rgrp.c
+++ b/gfs2/libgfs2/rgrp.c
@@ -127,10 +127,10 @@ uint64_t gfs2_rgrp_read(struct gfs2_sbd *sdp, struct rgrp_tree *rgd)
 		return -1;
 	if (gfs2_check_range(sdp, rgd->ri.ri_addr))
 		return -1;
+	if (breadm(sdp, rgd->bh, length, rgd->ri.ri_addr))
+		return -1;
 	for (x = 0; x < length; x++){
-		rgd->bh[x] = bread(sdp, rgd->ri.ri_addr + x);
-		if(gfs2_check_meta(rgd->bh[x],
-				   (x) ? GFS2_METATYPE_RB : GFS2_METATYPE_RG))
+		if(gfs2_check_meta(rgd->bh[x], (x) ? GFS2_METATYPE_RB : GFS2_METATYPE_RG))
 		{
 			uint64_t error;
 
diff --git a/gfs2/libgfs2/super.c b/gfs2/libgfs2/super.c
index 8317862..21c9f7b 100644
--- a/gfs2/libgfs2/super.c
+++ b/gfs2/libgfs2/super.c
@@ -7,6 +7,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <errno.h>
+#include <fcntl.h>
 
 #include "libgfs2.h"
 #include "osi_list.h"
@@ -198,6 +199,29 @@ int rindex_read(struct gfs2_sbd *sdp, int fd, int *count1, int *sane)
 	return 0;
 }
 
+#define RA_WINDOW 32
+
+static unsigned gfs2_rgrp_reada(struct gfs2_sbd *sdp, unsigned cur_window,
+				struct osi_node *n)
+{
+	struct rgrp_tree *rgd;
+	unsigned i;
+	off_t start, len;
+
+	for (i = 0; i < RA_WINDOW; i++, n = osi_next(n)) {
+		if (n == NULL)
+			return i;
+		if (i < cur_window)
+			continue;
+		rgd = (struct rgrp_tree *)n;
+		start = rgd->ri.ri_addr * sdp->bsize;
+		len = rgd->ri.ri_length * sdp->bsize;
+		posix_fadvise(sdp->device_fd, start, len, POSIX_FADV_WILLNEED);
+	}
+
+	return i;
+}
+
 /**
  * ri_update - attach rgrps to the super block
  * @sdp: incore superblock data
@@ -218,15 +242,24 @@ static int __ri_update(struct gfs2_sbd *sdp, int fd, int *rgcount, int *sane,
 	uint64_t errblock = 0;
 	uint64_t rmax = 0;
 	struct osi_node *n, *next = NULL;
+	unsigned ra_window = 0;
+
+	/* Turn off generic readhead */
+	posix_fadvise(sdp->device_fd, 0, 0, POSIX_FADV_RANDOM);
 
 	if (rindex_read(sdp, fd, &count1, sane))
 		goto fail;
 	for (n = osi_first(&sdp->rgtree); n; n = next) {
 		next = osi_next(n);
 		rgd = (struct rgrp_tree *)n;
+		/* Readahead resource group headers */
+		if (ra_window < RA_WINDOW/2)
+			ra_window = gfs2_rgrp_reada(sdp, ra_window, n);
+		/* Read resource group header */
 		errblock = gfs2_rgrp_read(sdp, rgd);
 		if (errblock)
 			return errblock;
+		ra_window--;
 		count2++;
 		if (!quiet && count2 % 100 == 0) {
 			printf(".");
@@ -242,9 +275,11 @@ static int __ri_update(struct gfs2_sbd *sdp, int fd, int *rgcount, int *sane,
 	if (count1 != count2)
 		goto fail;
 
+	posix_fadvise(sdp->device_fd, 0, 0, POSIX_FADV_NORMAL);
 	return 0;
 
  fail:
+	posix_fadvise(sdp->device_fd, 0, 0, POSIX_FADV_NORMAL);
 	gfs2_rgrp_free(&sdp->rgtree);
 	return -1;
 }





More information about the Cluster-devel mailing list