[Cluster-devel] [GFS2] Faster gfs2_bitfit algorithm
Steven Whitehouse
swhiteho at redhat.com
Tue Mar 11 10:33:50 UTC 2008
Hi,
Thats a worthwhile improvement. Now in the -nmw git tree. Thanks,
Steve.
On Mon, 2008-03-10 at 18:17 -0500, Bob Peterson wrote:
> Hi,
>
> This version of the gfs2_bitfit algorithm includes the latest
> suggestions from Steve Whitehouse. It is typically eight to
> ten times faster than the version we're using today. If there
> is a lot of metadata mixed in (lots of small files) the
> algorithm is often 15 times faster, and given the right
> conditions, I've seen peaks of 20 times faster.
>
> Regards,
>
> Bob Peterson
>
> Signed-off-by: Bob Peterson <rpeterso at redhat.com>
> --
> fs/gfs2/rgrp.c | 93 ++++++++++++++++++++++++++++++++++++-------------------
> 1 files changed, 61 insertions(+), 32 deletions(-)
>
> diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
> index 4291375..7e8f0b1 100644
> --- a/fs/gfs2/rgrp.c
> +++ b/fs/gfs2/rgrp.c
> @@ -14,6 +14,7 @@
> #include <linux/fs.h>
> #include <linux/gfs2_ondisk.h>
> #include <linux/lm_interface.h>
> +#include <linux/prefetch.h>
>
> #include "gfs2.h"
> #include "incore.h"
> @@ -33,6 +34,16 @@
> #define BFITNOENT ((u32)~0)
> #define NO_BLOCK ((u64)~0)
>
> +#if BITS_PER_LONG == 32
> +#define LBITMASK (0x55555555UL)
> +#define LBITSKIP55 (0x55555555UL)
> +#define LBITSKIP00 (0x00000000UL)
> +#else
> +#define LBITMASK (0x5555555555555555UL)
> +#define LBITSKIP55 (0x5555555555555555UL)
> +#define LBITSKIP00 (0x0000000000000000UL)
> +#endif
> +
> /*
> * These routines are used by the resource group routines (rgrp.c)
> * to keep track of block allocation. Each block is represented by two
> @@ -138,45 +149,63 @@ static inline unsigned char gfs2_testbit(struct gfs2_rgrpd *rgd,
> static u32 gfs2_bitfit(const u8 *buffer, unsigned int buflen, u32 goal,
> u8 old_state)
> {
> - const u8 *byte;
> - u32 blk = goal;
> - unsigned int bit, bitlong;
> - const unsigned long *plong;
> -#if BITS_PER_LONG == 32
> - const unsigned long plong55 = 0x55555555;
> -#else
> - const unsigned long plong55 = 0x5555555555555555;
> -#endif
> -
> - byte = buffer + (goal / GFS2_NBBY);
> - plong = (const unsigned long *)(buffer + (goal / GFS2_NBBY));
> - bit = (goal % GFS2_NBBY) * GFS2_BIT_SIZE;
> - bitlong = bit;
> -
> - while (byte < buffer + buflen) {
> -
> - if (bitlong == 0 && old_state == 0 && *plong == plong55) {
> - plong++;
> - byte += sizeof(unsigned long);
> - blk += sizeof(unsigned long) * GFS2_NBBY;
> - continue;
> + const u8 *byte, *start, *end;
> + int bit, startbit;
> + u32 g1, g2, misaligned;
> + unsigned long *plong;
> + unsigned long lskipval;
> +
> + lskipval = (old_state & GFS2_BLKST_USED) ? LBITSKIP00 : LBITSKIP55;
> + g1 = (goal / GFS2_NBBY);
> + start = buffer + g1;
> + byte = start;
> + end = buffer + buflen;
> + g2 = ALIGN(g1, sizeof(unsigned long));
> + plong = (unsigned long *)(buffer + g2);
> + startbit = bit = (goal % GFS2_NBBY) * GFS2_BIT_SIZE;
> + misaligned = g2 - g1;
> + if (!misaligned)
> + goto ulong_aligned;
> +/* parse the bitmap a byte at a time */
> +misaligned:
> + while (byte < end) {
> + if (((*byte >> bit) & GFS2_BIT_MASK) == old_state) {
> + return goal +
> + (((byte - start) * GFS2_NBBY) +
> + ((bit - startbit) >> 1));
> }
> - if (((*byte >> bit) & GFS2_BIT_MASK) == old_state)
> - return blk;
> bit += GFS2_BIT_SIZE;
> - if (bit >= 8) {
> + if (bit >= GFS2_NBBY * GFS2_BIT_SIZE) {
> bit = 0;
> byte++;
> + misaligned--;
> + if (!misaligned) {
> + plong = (unsigned long *)byte;
> + goto ulong_aligned;
> + }
> }
> - bitlong += GFS2_BIT_SIZE;
> - if (bitlong >= sizeof(unsigned long) * 8) {
> - bitlong = 0;
> - plong++;
> - }
> -
> - blk++;
> }
> + return BFITNOENT;
>
> +/* parse the bitmap a unsigned long at a time */
> +ulong_aligned:
> + /* Stop at "end - 1" or else prefetch can go past the end and segfault.
> + We could "if" it but we'd lose some of the performance gained.
> + This way will only slow down searching the very last 4/8 bytes
> + depending on architecture. I've experimented with several ways
> + of writing this section such as using an else before the goto
> + but this one seems to be the fastest. */
> + while ((unsigned char *)plong < end - 1) {
> + prefetch(plong + 1);
> + if (((*plong) & LBITMASK) != lskipval)
> + break;
> + plong++;
> + }
> + if ((unsigned char *)plong < end) {
> + byte = (const u8 *)plong;
> + misaligned += sizeof(unsigned long) - 1;
> + goto misaligned;
> + }
> return BFITNOENT;
> }
>
>
>
More information about the Cluster-devel
mailing list