[Cluster-devel] libgfs2: Add readahead for rgrp headers

Andrew Price anprice at redhat.com
Mon Feb 18 10:55:59 UTC 2013


Hi,

On 18/02/13 10:27, Steven Whitehouse wrote:
>
> This adds readahead to rgrp headers, greatly improving the speed with
> which they can be read in during fsck. Also, the multiple reads which
> were used before are replaced with a single read per resource group.
>
> This is an example of the kinds of speed up which may well be possible
> elsewhere in the code. I started with this example simply because it was
> the easiest one to do.
>
> An alternative implementation might O_DIRECT and aio, but I'm not sure
> that there would be much benefit compared with this method. A further
> thought would be to use drop behind in places where we know that we will
> not be looking at the data again.
>
> Taking timings for just the rgrp reading section of fsck, I see almost a
> 10x speed up for that section of code using this patch on a 500G
> filesystem.

Looks good to me,

Andy

>
> Signed-off-by: Steven Whitehouse <swhiteho at redhat.com>
>
> diff --git a/gfs2/libgfs2/buf.c b/gfs2/libgfs2/buf.c
> index 5bc1a4e..68f0731 100644
> --- a/gfs2/libgfs2/buf.c
> +++ b/gfs2/libgfs2/buf.c
> @@ -7,6 +7,7 @@
>   #include <inttypes.h>
>   #include <sys/types.h>
>   #include <sys/stat.h>
> +#include <sys/time.h>
>   #include <fcntl.h>
>   #include <unistd.h>
>   #include <errno.h>
> @@ -30,39 +31,54 @@ struct gfs2_buffer_head *bget(struct gfs2_sbd *sdp, uint64_t num)
>   	return bh;
>   }
>
> -struct gfs2_buffer_head *__bread(struct gfs2_sbd *sdp, uint64_t num, int line,
> -				 const char *caller)
> +int __breadm(struct gfs2_sbd *sdp, struct gfs2_buffer_head **bhs, size_t n,
> +	     uint64_t block, int line, const char *caller)
>   {
> -	struct gfs2_buffer_head *bh = bget(sdp, num);
> -	if (bh == NULL)
> -		return bh;
> -	if (lseek(sdp->device_fd, num * sdp->bsize, SEEK_SET) !=
> -	    num * sdp->bsize) {
> -		fprintf(stderr, "bad seek: %s from %s:%d: block "
> -			"%llu (0x%llx)\n", strerror(errno),
> -			caller, line, (unsigned long long)num,
> -			(unsigned long long)num);
> -		exit(-1);
> +	struct iovec *iov = alloca(n * sizeof(struct iovec));
> +	struct iovec *iovbase = iov;
> +	uint64_t b = block;
> +	size_t size = 0;
> +	size_t i;
> +	int ret;
> +
> +	for (i = 0; i < n; i++) {
> +		bhs[i] = bget(sdp, b++);
> +		if (bhs[i] == NULL)
> +			return -1;
> +		*iov++ = bhs[i]->iov;
> +		size += bhs[i]->iov.iov_len;
>   	}
> -	if (readv(sdp->device_fd, &bh->iov, 1) < 0) {
> +
> +	ret = preadv(sdp->device_fd, iovbase, n, block * sdp->bsize);
> +
> +	if (ret != size) {
>   		fprintf(stderr, "bad read: %s from %s:%d: block "
> -			"%llu (0x%llx)\n", strerror(errno),
> -			caller, line, (unsigned long long)num,
> -			(unsigned long long)num);
> +				"%llu (0x%llx)\n", strerror(errno),
> +				caller, line, (unsigned long long)block,
> +				(unsigned long long)block);
>   		exit(-1);
>   	}
> -	return bh;
> +
> +	return 0;
> +}
> +
> +struct gfs2_buffer_head *__bread(struct gfs2_sbd *sdp, uint64_t num, int line,
> +				 const char *caller)
> +{
> +	struct gfs2_buffer_head *bh;
> +	int ret;
> +
> +	ret = __breadm(sdp, &bh, 1, num, line, caller);
> +	if (ret >= 0)
> +		return bh;
> +	return NULL;
>   }
>
>   int bwrite(struct gfs2_buffer_head *bh)
>   {
>   	struct gfs2_sbd *sdp = bh->sdp;
>
> -	if (lseek(sdp->device_fd, bh->b_blocknr * sdp->bsize, SEEK_SET) !=
> -	    bh->b_blocknr * sdp->bsize) {
> -		return -1;
> -	}
> -	if (writev(sdp->device_fd, &bh->iov, 1) != bh->iov.iov_len)
> +	if (pwritev(sdp->device_fd, &bh->iov, 1, bh->b_blocknr * sdp->bsize) != bh->iov.iov_len)
>   		return -1;
>   	sdp->writes++;
>   	bh->b_modified = 0;
> diff --git a/gfs2/libgfs2/libgfs2.h b/gfs2/libgfs2/libgfs2.h
> index 2b109fb..46d4d67 100644
> --- a/gfs2/libgfs2/libgfs2.h
> +++ b/gfs2/libgfs2/libgfs2.h
> @@ -382,6 +382,7 @@ extern void gfs2_special_clear(struct special_blocks *blocklist,
>   extern struct gfs2_buffer_head *bget(struct gfs2_sbd *sdp, uint64_t num);
>   extern struct gfs2_buffer_head *__bread(struct gfs2_sbd *sdp, uint64_t num,
>   					int line, const char *caller);
> +extern int __breadm(struct gfs2_sbd *sdp, struct gfs2_buffer_head **bhs, size_t n, uint64_t block, int line, const char *caller);
>   extern int bwrite(struct gfs2_buffer_head *bh);
>   extern int brelse(struct gfs2_buffer_head *bh);
>   extern uint32_t lgfs2_get_block_type(const struct gfs2_buffer_head *lbh);
> @@ -389,6 +390,7 @@ extern uint32_t lgfs2_get_block_type(const struct gfs2_buffer_head *lbh);
>   #define bmodified(bh) do { bh->b_modified = 1; } while(0)
>
>   #define bread(bl, num) __bread(bl, num, __LINE__, __FUNCTION__)
> +#define breadm(bl, bhs, n, block) __breadm(bl, bhs, n, block, __LINE__, __FUNCTION__)
>
>   /* device_geometry.c */
>   extern int lgfs2_get_dev_info(int fd, struct lgfs2_dev_info *i);
> diff --git a/gfs2/libgfs2/rgrp.c b/gfs2/libgfs2/rgrp.c
> index cbab2a3..f7dc01e 100644
> --- a/gfs2/libgfs2/rgrp.c
> +++ b/gfs2/libgfs2/rgrp.c
> @@ -127,10 +127,10 @@ uint64_t gfs2_rgrp_read(struct gfs2_sbd *sdp, struct rgrp_tree *rgd)
>   		return -1;
>   	if (gfs2_check_range(sdp, rgd->ri.ri_addr))
>   		return -1;
> +	if (breadm(sdp, rgd->bh, length, rgd->ri.ri_addr))
> +		return -1;
>   	for (x = 0; x < length; x++){
> -		rgd->bh[x] = bread(sdp, rgd->ri.ri_addr + x);
> -		if(gfs2_check_meta(rgd->bh[x],
> -				   (x) ? GFS2_METATYPE_RB : GFS2_METATYPE_RG))
> +		if(gfs2_check_meta(rgd->bh[x], (x) ? GFS2_METATYPE_RB : GFS2_METATYPE_RG))
>   		{
>   			uint64_t error;
>
> diff --git a/gfs2/libgfs2/super.c b/gfs2/libgfs2/super.c
> index 8317862..21c9f7b 100644
> --- a/gfs2/libgfs2/super.c
> +++ b/gfs2/libgfs2/super.c
> @@ -7,6 +7,7 @@
>   #include <stdlib.h>
>   #include <string.h>
>   #include <errno.h>
> +#include <fcntl.h>
>
>   #include "libgfs2.h"
>   #include "osi_list.h"
> @@ -198,6 +199,29 @@ int rindex_read(struct gfs2_sbd *sdp, int fd, int *count1, int *sane)
>   	return 0;
>   }
>
> +#define RA_WINDOW 32
> +
> +static unsigned gfs2_rgrp_reada(struct gfs2_sbd *sdp, unsigned cur_window,
> +				struct osi_node *n)
> +{
> +	struct rgrp_tree *rgd;
> +	unsigned i;
> +	off_t start, len;
> +
> +	for (i = 0; i < RA_WINDOW; i++, n = osi_next(n)) {
> +		if (n == NULL)
> +			return i;
> +		if (i < cur_window)
> +			continue;
> +		rgd = (struct rgrp_tree *)n;
> +		start = rgd->ri.ri_addr * sdp->bsize;
> +		len = rgd->ri.ri_length * sdp->bsize;
> +		posix_fadvise(sdp->device_fd, start, len, POSIX_FADV_WILLNEED);
> +	}
> +
> +	return i;
> +}
> +
>   /**
>    * ri_update - attach rgrps to the super block
>    * @sdp: incore superblock data
> @@ -218,15 +242,24 @@ static int __ri_update(struct gfs2_sbd *sdp, int fd, int *rgcount, int *sane,
>   	uint64_t errblock = 0;
>   	uint64_t rmax = 0;
>   	struct osi_node *n, *next = NULL;
> +	unsigned ra_window = 0;
> +
> +	/* Turn off generic readhead */
> +	posix_fadvise(sdp->device_fd, 0, 0, POSIX_FADV_RANDOM);
>
>   	if (rindex_read(sdp, fd, &count1, sane))
>   		goto fail;
>   	for (n = osi_first(&sdp->rgtree); n; n = next) {
>   		next = osi_next(n);
>   		rgd = (struct rgrp_tree *)n;
> +		/* Readahead resource group headers */
> +		if (ra_window < RA_WINDOW/2)
> +			ra_window = gfs2_rgrp_reada(sdp, ra_window, n);
> +		/* Read resource group header */
>   		errblock = gfs2_rgrp_read(sdp, rgd);
>   		if (errblock)
>   			return errblock;
> +		ra_window--;
>   		count2++;
>   		if (!quiet && count2 % 100 == 0) {
>   			printf(".");
> @@ -242,9 +275,11 @@ static int __ri_update(struct gfs2_sbd *sdp, int fd, int *rgcount, int *sane,
>   	if (count1 != count2)
>   		goto fail;
>
> +	posix_fadvise(sdp->device_fd, 0, 0, POSIX_FADV_NORMAL);
>   	return 0;
>
>    fail:
> +	posix_fadvise(sdp->device_fd, 0, 0, POSIX_FADV_NORMAL);
>   	gfs2_rgrp_free(&sdp->rgtree);
>   	return -1;
>   }
>
>




More information about the Cluster-devel mailing list