[Cluster-devel] libgfs2: Add readahead for rgrp headers
Andrew Price
anprice at redhat.com
Mon Feb 18 10:55:59 UTC 2013
Hi,
On 18/02/13 10:27, Steven Whitehouse wrote:
>
> This adds readahead to rgrp headers, greatly improving the speed with
> which they can be read in during fsck. Also, the multiple reads which
> were used before are replaced with a single read per resource group.
>
> This is an example of the kinds of speed up which may well be possible
> elsewhere in the code. I started with this example simply because it was
> the easiest one to do.
>
> An alternative implementation might O_DIRECT and aio, but I'm not sure
> that there would be much benefit compared with this method. A further
> thought would be to use drop behind in places where we know that we will
> not be looking at the data again.
>
> Taking timings for just the rgrp reading section of fsck, I see almost a
> 10x speed up for that section of code using this patch on a 500G
> filesystem.
Looks good to me,
Andy
>
> Signed-off-by: Steven Whitehouse <swhiteho at redhat.com>
>
> diff --git a/gfs2/libgfs2/buf.c b/gfs2/libgfs2/buf.c
> index 5bc1a4e..68f0731 100644
> --- a/gfs2/libgfs2/buf.c
> +++ b/gfs2/libgfs2/buf.c
> @@ -7,6 +7,7 @@
> #include <inttypes.h>
> #include <sys/types.h>
> #include <sys/stat.h>
> +#include <sys/time.h>
> #include <fcntl.h>
> #include <unistd.h>
> #include <errno.h>
> @@ -30,39 +31,54 @@ struct gfs2_buffer_head *bget(struct gfs2_sbd *sdp, uint64_t num)
> return bh;
> }
>
> -struct gfs2_buffer_head *__bread(struct gfs2_sbd *sdp, uint64_t num, int line,
> - const char *caller)
> +int __breadm(struct gfs2_sbd *sdp, struct gfs2_buffer_head **bhs, size_t n,
> + uint64_t block, int line, const char *caller)
> {
> - struct gfs2_buffer_head *bh = bget(sdp, num);
> - if (bh == NULL)
> - return bh;
> - if (lseek(sdp->device_fd, num * sdp->bsize, SEEK_SET) !=
> - num * sdp->bsize) {
> - fprintf(stderr, "bad seek: %s from %s:%d: block "
> - "%llu (0x%llx)\n", strerror(errno),
> - caller, line, (unsigned long long)num,
> - (unsigned long long)num);
> - exit(-1);
> + struct iovec *iov = alloca(n * sizeof(struct iovec));
> + struct iovec *iovbase = iov;
> + uint64_t b = block;
> + size_t size = 0;
> + size_t i;
> + int ret;
> +
> + for (i = 0; i < n; i++) {
> + bhs[i] = bget(sdp, b++);
> + if (bhs[i] == NULL)
> + return -1;
> + *iov++ = bhs[i]->iov;
> + size += bhs[i]->iov.iov_len;
> }
> - if (readv(sdp->device_fd, &bh->iov, 1) < 0) {
> +
> + ret = preadv(sdp->device_fd, iovbase, n, block * sdp->bsize);
> +
> + if (ret != size) {
> fprintf(stderr, "bad read: %s from %s:%d: block "
> - "%llu (0x%llx)\n", strerror(errno),
> - caller, line, (unsigned long long)num,
> - (unsigned long long)num);
> + "%llu (0x%llx)\n", strerror(errno),
> + caller, line, (unsigned long long)block,
> + (unsigned long long)block);
> exit(-1);
> }
> - return bh;
> +
> + return 0;
> +}
> +
> +struct gfs2_buffer_head *__bread(struct gfs2_sbd *sdp, uint64_t num, int line,
> + const char *caller)
> +{
> + struct gfs2_buffer_head *bh;
> + int ret;
> +
> + ret = __breadm(sdp, &bh, 1, num, line, caller);
> + if (ret >= 0)
> + return bh;
> + return NULL;
> }
>
> int bwrite(struct gfs2_buffer_head *bh)
> {
> struct gfs2_sbd *sdp = bh->sdp;
>
> - if (lseek(sdp->device_fd, bh->b_blocknr * sdp->bsize, SEEK_SET) !=
> - bh->b_blocknr * sdp->bsize) {
> - return -1;
> - }
> - if (writev(sdp->device_fd, &bh->iov, 1) != bh->iov.iov_len)
> + if (pwritev(sdp->device_fd, &bh->iov, 1, bh->b_blocknr * sdp->bsize) != bh->iov.iov_len)
> return -1;
> sdp->writes++;
> bh->b_modified = 0;
> diff --git a/gfs2/libgfs2/libgfs2.h b/gfs2/libgfs2/libgfs2.h
> index 2b109fb..46d4d67 100644
> --- a/gfs2/libgfs2/libgfs2.h
> +++ b/gfs2/libgfs2/libgfs2.h
> @@ -382,6 +382,7 @@ extern void gfs2_special_clear(struct special_blocks *blocklist,
> extern struct gfs2_buffer_head *bget(struct gfs2_sbd *sdp, uint64_t num);
> extern struct gfs2_buffer_head *__bread(struct gfs2_sbd *sdp, uint64_t num,
> int line, const char *caller);
> +extern int __breadm(struct gfs2_sbd *sdp, struct gfs2_buffer_head **bhs, size_t n, uint64_t block, int line, const char *caller);
> extern int bwrite(struct gfs2_buffer_head *bh);
> extern int brelse(struct gfs2_buffer_head *bh);
> extern uint32_t lgfs2_get_block_type(const struct gfs2_buffer_head *lbh);
> @@ -389,6 +390,7 @@ extern uint32_t lgfs2_get_block_type(const struct gfs2_buffer_head *lbh);
> #define bmodified(bh) do { bh->b_modified = 1; } while(0)
>
> #define bread(bl, num) __bread(bl, num, __LINE__, __FUNCTION__)
> +#define breadm(bl, bhs, n, block) __breadm(bl, bhs, n, block, __LINE__, __FUNCTION__)
>
> /* device_geometry.c */
> extern int lgfs2_get_dev_info(int fd, struct lgfs2_dev_info *i);
> diff --git a/gfs2/libgfs2/rgrp.c b/gfs2/libgfs2/rgrp.c
> index cbab2a3..f7dc01e 100644
> --- a/gfs2/libgfs2/rgrp.c
> +++ b/gfs2/libgfs2/rgrp.c
> @@ -127,10 +127,10 @@ uint64_t gfs2_rgrp_read(struct gfs2_sbd *sdp, struct rgrp_tree *rgd)
> return -1;
> if (gfs2_check_range(sdp, rgd->ri.ri_addr))
> return -1;
> + if (breadm(sdp, rgd->bh, length, rgd->ri.ri_addr))
> + return -1;
> for (x = 0; x < length; x++){
> - rgd->bh[x] = bread(sdp, rgd->ri.ri_addr + x);
> - if(gfs2_check_meta(rgd->bh[x],
> - (x) ? GFS2_METATYPE_RB : GFS2_METATYPE_RG))
> + if(gfs2_check_meta(rgd->bh[x], (x) ? GFS2_METATYPE_RB : GFS2_METATYPE_RG))
> {
> uint64_t error;
>
> diff --git a/gfs2/libgfs2/super.c b/gfs2/libgfs2/super.c
> index 8317862..21c9f7b 100644
> --- a/gfs2/libgfs2/super.c
> +++ b/gfs2/libgfs2/super.c
> @@ -7,6 +7,7 @@
> #include <stdlib.h>
> #include <string.h>
> #include <errno.h>
> +#include <fcntl.h>
>
> #include "libgfs2.h"
> #include "osi_list.h"
> @@ -198,6 +199,29 @@ int rindex_read(struct gfs2_sbd *sdp, int fd, int *count1, int *sane)
> return 0;
> }
>
> +#define RA_WINDOW 32
> +
> +static unsigned gfs2_rgrp_reada(struct gfs2_sbd *sdp, unsigned cur_window,
> + struct osi_node *n)
> +{
> + struct rgrp_tree *rgd;
> + unsigned i;
> + off_t start, len;
> +
> + for (i = 0; i < RA_WINDOW; i++, n = osi_next(n)) {
> + if (n == NULL)
> + return i;
> + if (i < cur_window)
> + continue;
> + rgd = (struct rgrp_tree *)n;
> + start = rgd->ri.ri_addr * sdp->bsize;
> + len = rgd->ri.ri_length * sdp->bsize;
> + posix_fadvise(sdp->device_fd, start, len, POSIX_FADV_WILLNEED);
> + }
> +
> + return i;
> +}
> +
> /**
> * ri_update - attach rgrps to the super block
> * @sdp: incore superblock data
> @@ -218,15 +242,24 @@ static int __ri_update(struct gfs2_sbd *sdp, int fd, int *rgcount, int *sane,
> uint64_t errblock = 0;
> uint64_t rmax = 0;
> struct osi_node *n, *next = NULL;
> + unsigned ra_window = 0;
> +
> + /* Turn off generic readhead */
> + posix_fadvise(sdp->device_fd, 0, 0, POSIX_FADV_RANDOM);
>
> if (rindex_read(sdp, fd, &count1, sane))
> goto fail;
> for (n = osi_first(&sdp->rgtree); n; n = next) {
> next = osi_next(n);
> rgd = (struct rgrp_tree *)n;
> + /* Readahead resource group headers */
> + if (ra_window < RA_WINDOW/2)
> + ra_window = gfs2_rgrp_reada(sdp, ra_window, n);
> + /* Read resource group header */
> errblock = gfs2_rgrp_read(sdp, rgd);
> if (errblock)
> return errblock;
> + ra_window--;
> count2++;
> if (!quiet && count2 % 100 == 0) {
> printf(".");
> @@ -242,9 +275,11 @@ static int __ri_update(struct gfs2_sbd *sdp, int fd, int *rgcount, int *sane,
> if (count1 != count2)
> goto fail;
>
> + posix_fadvise(sdp->device_fd, 0, 0, POSIX_FADV_NORMAL);
> return 0;
>
> fail:
> + posix_fadvise(sdp->device_fd, 0, 0, POSIX_FADV_NORMAL);
> gfs2_rgrp_free(&sdp->rgtree);
> return -1;
> }
>
>
More information about the Cluster-devel
mailing list