[Cluster-devel] [GFS2 PATCH] GFS2: If requested is too large, use the largest extent in the rgrp
Steven Whitehouse
swhiteho at redhat.com
Tue Nov 5 11:00:04 UTC 2013
Hi,
On Mon, 2013-11-04 at 11:52 -0500, Bob Peterson wrote:
> Hi,
>
> Before this patch, GFS2 would keep searching through all the rgrps
> until it found one that had a chunk of free blocks big enough to
> satisfy the size hint, which is based on the file write size,
> regardless of whether the chunk was big enough to perform the write.
> However, when doing big writes there may not be a large enough
> chunk of free blocks in any rgrp, due to file system fragmentation.
> The largest chunk may be big enough to satisfy the write request,
> but it may not meet the ideal reservation size from the "size hint".
> The writes would slow to a crawl because every write would search
> every rgrp, then finally give up and default to a single-block write.
> In my case, performance would drop from 425MB/s to 18KB/s, or 24000
> times slower.
>
> This patch basically makes it so that if we can't find a contiguous
> chunk of blocks big enough to satisfy the sizehint, we'll use the
> largest chunk of blocks we found that will still contain the write.
> It does so by keeping track of the largest run of blocks within the
> rgrp.
>
> Regards,
>
> Bob Peterson
> Red Hat File Systems
>
> Signed-off-by: Bob Peterson <rpeterso at redhat.com>
> ---
> diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
> index 4d83abd..711e835 100644
> --- a/fs/gfs2/rgrp.c
> +++ b/fs/gfs2/rgrp.c
> @@ -57,6 +57,11 @@
> * 3 = Used (metadata)
> */
>
> +struct gfs2_extent {
> + struct gfs2_rbm ex_rbm;
> + u32 ex_len;
> +};
> +
> static const char valid_change[16] = {
> /* current */
> /* n */ 0, 1, 1, 1,
> @@ -65,8 +70,9 @@ static const char valid_change[16] = {
> 1, 0, 0, 0
> };
>
> -static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 minext,
> - const struct gfs2_inode *ip, bool nowrap);
> +static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext,
> + const struct gfs2_inode *ip, bool nowrap,
> + const struct gfs2_alloc_parms *ap);
>
>
> /**
> @@ -1455,7 +1461,7 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip,
> if (WARN_ON(gfs2_rbm_from_block(&rbm, goal)))
> return;
>
> - ret = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, extlen, ip, true);
> + ret = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &extlen, ip, true, ap);
> if (ret == 0) {
> rs->rs_rbm = rbm;
> rs->rs_free = extlen;
> @@ -1520,6 +1526,7 @@ static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block,
> * @rbm: The current position in the resource group
> * @ip: The inode for which we are searching for blocks
> * @minext: The minimum extent length
> + * @maxext: A pointer to the maximum extent structure
> *
> * This checks the current position in the rgrp to see whether there is
> * a reservation covering this block. If not then this function is a
> @@ -1532,7 +1539,8 @@ static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block,
>
> static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm,
> const struct gfs2_inode *ip,
> - u32 minext)
> + u32 minext,
> + struct gfs2_extent *maxext)
> {
> u64 block = gfs2_rbm_to_block(rbm);
> u32 extlen = 1;
> @@ -1545,8 +1553,7 @@ static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm,
> */
> if (minext) {
> extlen = gfs2_free_extlen(rbm, minext);
> - nblock = block + extlen;
> - if (extlen < minext)
> + if (extlen <= maxext->ex_len)
> goto fail;
> }
>
> @@ -1555,9 +1562,17 @@ static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm,
> * and skip if parts of it are already reserved
> */
> nblock = gfs2_next_unreserved_block(rbm->rgd, block, extlen, ip);
> - if (nblock == block)
> - return 0;
> + if (nblock == block) {
> + if (!minext || extlen >= minext)
> + return 0;
> +
> + if (extlen > maxext->ex_len) {
> + maxext->ex_len = extlen;
> + maxext->ex_rbm = *rbm;
> + }
> fail:
> + nblock = block + extlen;
> + }
> ret = gfs2_rbm_from_block(rbm, nblock);
> if (ret < 0)
> return ret;
> @@ -1568,10 +1583,12 @@ fail:
> * gfs2_rbm_find - Look for blocks of a particular state
> * @rbm: Value/result starting position and final position
> * @state: The state which we want to find
> - * @minext: The requested extent length (0 for a single block)
> + * @minext: Pointer to the requested extent length (NULL for a single block)
> + * This is updated to be the actual reservation size.
> * @ip: If set, check for reservations
> * @nowrap: Stop looking at the end of the rgrp, rather than wrapping
> * around until we've reached the starting point.
> + * @ap: the allocation parameters
> *
> * Side effects:
> * - If looking for free blocks, we set GBF_FULL on each bitmap which
> @@ -1580,8 +1597,9 @@ fail:
> * Returns: 0 on success, -ENOSPC if there is no block of the requested state
> */
>
> -static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 minext,
> - const struct gfs2_inode *ip, bool nowrap)
> +static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext,
> + const struct gfs2_inode *ip, bool nowrap,
> + const struct gfs2_alloc_parms *ap)
> {
> struct buffer_head *bh;
> int initial_bii;
> @@ -1592,6 +1610,12 @@ static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 minext,
> int iters = rbm->rgd->rd_length;
> int ret;
> struct gfs2_bitmap *bi;
> + struct gfs2_extent maxext;
> +
> + maxext.ex_len = 0;
> + maxext.ex_rbm.rgd = rbm->rgd;
> + maxext.ex_rbm.bii = 0;
> + maxext.ex_rbm.offset = 0;
This can be shortened to:
struct gfs2_extent maxext = { .ex_rbm.rgd = rbm->rgd, };
to save zeroing the fields explicitly. It might be a bit more readable
without the ex_ prefix too, possibly?
Steve.
>
> /* If we are not starting at the beginning of a bitmap, then we
> * need to add one to the bitmap count to ensure that we search
> @@ -1620,7 +1644,9 @@ static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 minext,
> return 0;
>
> initial_bii = rbm->bii;
> - ret = gfs2_reservation_check_and_update(rbm, ip, minext);
> + ret = gfs2_reservation_check_and_update(rbm, ip,
> + minext ? *minext : 0,
> + &maxext);
> if (ret == 0)
> return 0;
> if (ret > 0) {
> @@ -1655,6 +1681,17 @@ next_iter:
> break;
> }
>
> + if (minext == NULL || state != GFS2_BLKST_FREE)
> + return -ENOSPC;
> +
> + /* If the maximum extent we found is big enough to fulfill the
> + minimum requirements, use it anyway. */
> + if (maxext.ex_len) {
> + *rbm = maxext.ex_rbm;
> + *minext = maxext.ex_len;
> + return 0;
> + }
> +
> return -ENOSPC;
> }
>
> @@ -1680,7 +1717,8 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip
>
> while (1) {
> down_write(&sdp->sd_log_flush_lock);
> - error = gfs2_rbm_find(&rbm, GFS2_BLKST_UNLINKED, 0, NULL, true);
> + error = gfs2_rbm_find(&rbm, GFS2_BLKST_UNLINKED, NULL, NULL,
> + true, NULL);
> up_write(&sdp->sd_log_flush_lock);
> if (error == -ENOSPC)
> break;
> @@ -2184,11 +2222,12 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
> int error;
>
> gfs2_set_alloc_start(&rbm, ip, dinode);
> - error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, 0, ip, false);
> + error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, ip, false, NULL);
>
> if (error == -ENOSPC) {
> gfs2_set_alloc_start(&rbm, ip, dinode);
> - error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, 0, NULL, false);
> + error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, NULL, false,
> + NULL);
> }
>
> /* Since all blocks are reserved in advance, this shouldn't happen */
>
More information about the Cluster-devel
mailing list