[Cluster-devel] [GFS2 PATCH] [preliminary] gfs2: non-recursive delete

Wed Jan 11 11:36:21 UTC 2017

Hi,


On 10/01/17 21:22, Bob Peterson wrote:
> Hi,
>
> This patch implements non-recursive delete for GFS2.
> It's kind of big, but that's because it deletes the old algorithm
> based on recursive_scan, and replaces it with the new version.
>
> The idea here is to process the lowest height of metadata, the inode,
> and build a linked list of metadata buffers. Then, traverse each buffer
> finding the first rgrp pointed to by that buffer. We lock the glock for
> that rgrp. Then, for all buffers in the list, we look for references to
> that rgrp, and remove them in contiguous spans. If a buffer references
> *ONLY* that rgrp, it's removed from the list. Then the process repeats
> starting with the first buffer that hasn't been completely cleaned of
> all rgrp references.
>
> This way, we can keep only a single rgrp locked at one time, thus
> allowing other processes to allocate and use the others. We also need
> to allocate fewer blocks in the journal each iteration, allowing us
> to delete very large files in smaller chunks, which turns out to be
> more efficient and requires a lot less journal space.
>
> It also resolves a nasty problem that's plagued GFS2 for years, and
> that is: before this patch, when deleting a very large file, it needed
> to allocate a somewhat big array of rlist pointers. If you have 128MB
> resource groups (rgrps) in order to delete a file that's 500GB in size,
> you need an array of 4000 rgrp pointers. This often results warnings
> like this:
>
> kernel: WARNING: at mm/page_alloc.c:1550 get_page_from_freelist+0x997/0x9b0()
>
> This this patch processes the delete one rgrp at a time, this is no
> longer an issue.
>
> This patch has NOT been thoroughly tested, so it needs some serious
> vetting and review before I give it to Linus. I just wanted to post
> it early so people can spot any errors.
>
> Signed-off-by: Bob Peterson <rpeterso at redhat.com>
> ---
> diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
> index cbad0ef..e17e59c 100644
> --- a/fs/gfs2/bmap.c
> +++ b/fs/gfs2/bmap.c
> @@ -19,6 +19,7 @@
>   #include "bmap.h"
>   #include "glock.h"
>   #include "inode.h"
> +#include "log.h"
>   #include "meta_io.h"
>   #include "quota.h"
>   #include "rgrp.h"
> @@ -29,6 +30,8 @@
>   #include "util.h"
>   #include "trace_gfs2.h"
>   
> +#define MAX_BH_RGRP_MISSES 3
> +
>   /* This doesn't need to be that large as max 64 bit pointers in a 4k
>    * block is 512, so __u16 is fine for that. It saves stack space to
>    * keep it small.
> @@ -38,9 +41,13 @@ struct metapath {
>   	__u16 mp_list[GFS2_MAX_META_HEIGHT];
>   };
>   
> -struct strip_mine {
> -	int sm_first;
> -	unsigned int sm_height;
> +struct gfs2_buflist {
> +	struct list_head bl_list;
> +	struct buffer_head *bl_bh;
> +	sector_t bl_blocknr;
> +	int bl_h;
> +	unsigned int bl_metablocks;
> +	bool first;
>   };
>   
>   /**
> @@ -701,252 +708,358 @@ int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsi
>   	return ret;
>   }
>   
> -/**
> - * do_strip - Look for a layer a particular layer of the file and strip it off
> - * @ip: the inode
> - * @dibh: the dinode buffer
> - * @bh: A buffer of pointers
> - * @top: The first pointer in the buffer
> - * @bottom: One more than the last pointer
> - * @height: the height this buffer is at
> - * @sm: a pointer to a struct strip_mine
> +static inline void gfs2_sub_inode_blocks(struct inode *inode, s64 change)
> +{
> +	gfs2_assert(GFS2_SB(inode), (change >= 0 || inode->i_blocks > -change));
> +	change *= (GFS2_SB(inode)->sd_sb.sb_bsize/GFS2_BASIC_BLOCK);
> +	inode->i_blocks -= change;
> +}
> +
See gfs2_add_inode_blocks() no need for two functions.

> +/*
> + * find_buflist_rgrp - find any rgrp referenced by the buflist's buffer
>    *
> - * Returns: errno
> + * Returns: any rgrp referenced by the buflist
>    */
> -
> -static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
> -		    struct buffer_head *bh, __be64 *top, __be64 *bottom,
> -		    unsigned int height, struct strip_mine *sm)
> +static struct gfs2_rgrpd *find_buflist_rgrp(struct gfs2_inode *ip,
> +					    struct metapath *mp,
> +					    struct gfs2_buflist *cur)
>   {
>   	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
> -	struct gfs2_rgrp_list rlist;
> -	struct gfs2_trans *tr;
> -	u64 bn, bstart;
> -	u32 blen, btotal;
> -	__be64 *p;
> -	unsigned int rg_blocks = 0;
> -	int metadata;
> -	unsigned int revokes = 0;
> -	int x;
> -	int error;
> -	int jblocks_rqsted;
> -
> -	error = gfs2_rindex_update(sdp);
> -	if (error)
> -		return error;
> -
> -	if (!*top)
> -		sm->sm_first = 0;
> -
> -	if (height != sm->sm_height)
> -		return 0;
> +	struct gfs2_rgrpd *rgd;
> +	__be64 *p, *top, *bottom;
> +	u64 bn;
>   
> -	if (sm->sm_first) {
> -		top++;
> -		sm->sm_first = 0;
> +	if (!cur->bl_h) {
> +		top = (__be64 *)(cur->bl_bh->b_data + sizeof(struct gfs2_dinode));
> +		bottom = top + sdp->sd_diptrs;
> +		top += mp->mp_list[0];
> +	} else {
> +		top = (__be64 *)(cur->bl_bh->b_data + sizeof(struct gfs2_meta_header));
> +		bottom = top + sdp->sd_inptrs;
> +		if (cur->first)
> +			top += mp->mp_list[cur->bl_h];
>   	}
> -
> -	metadata = (height != ip->i_height - 1);
> -	if (metadata)
> -		revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs;
> -	else if (ip->i_depth)
> -		revokes = sdp->sd_inptrs;
> -
> -	memset(&rlist, 0, sizeof(struct gfs2_rgrp_list));
> -	bstart = 0;
> -	blen = 0;
> -
>   	for (p = top; p < bottom; p++) {
>   		if (!*p)
>   			continue;
> -
>   		bn = be64_to_cpu(*p);
> -
> -		if (bstart + blen == bn)
> -			blen++;
> -		else {
> -			if (bstart)
> -				gfs2_rlist_add(ip, &rlist, bstart);
> -
> -			bstart = bn;
> -			blen = 1;
> -		}
> +		rgd = gfs2_blk2rgrpd(sdp, bn, false);
> +		return rgd;
>   	}
> +	return NULL;
> +}
>   
> -	if (bstart)
> -		gfs2_rlist_add(ip, &rlist, bstart);
> -	else
> -		goto out; /* Nothing to do */
> -
> -	gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE);
> -
> -	for (x = 0; x < rlist.rl_rgrps; x++) {
> -		struct gfs2_rgrpd *rgd;
> -		rgd = rlist.rl_ghs[x].gh_gl->gl_object;
> -		rg_blocks += rgd->rd_length;
> -	}
> +/*
> + * free_all_for_rgrp - run the buffer list and free all blocks inside the rgrp
> + *
> + * Returns: the number of blocks found within the "fbh" buffer that are still
> + *          outside the given rgrp
> + */
> +static int free_all_for_rgrp(struct gfs2_inode *ip, struct metapath *mp,
> +			     int del_all, struct list_head *buflist,
> +			     struct gfs2_rgrpd *rgd, u32 *btotal,
> +			     int jblocks_rqsted, int *buflist_frees)
> +{
> +	struct gfs2_buflist *cur, *next;
> +	struct buffer_head *bh;
> +	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
> +	int blks_outside_rgrp;
> +	__be64 *p, *top, *bottom;
> +	int meta, metacount = RES_DINODE + RES_INDIRECT;
> +	u64 bn, bstart = 0;
> +	u32 blen = 0;
> +	bool bh_added_to_trans = false;
> +	int miss_count = 0;
> +
> +	/* Process and free all buffers corresponding to this rgrp. */
> +	list_for_each_entry_safe(cur, next, buflist, bl_list) {
> +		bh = cur->bl_bh;
> +		bh_added_to_trans = false;
> +		if (!cur->bl_h) {
> +			top = (__be64 *)(bh->b_data +
> +					 sizeof(struct gfs2_dinode));
> +			bottom = top + sdp->sd_diptrs;
> +			top += mp->mp_list[0];
> +		} else {
> +			top = (__be64 *)(bh->b_data +
> +					 sizeof(struct gfs2_meta_header));
> +			bottom = top + sdp->sd_inptrs;
> +			if (cur->first)
> +				top += mp->mp_list[cur->bl_h];
> +		}
> +		meta = (cur->bl_h != ip->i_height - 1);
> +		bstart = 0;
> +		blen = 0;
> +		blks_outside_rgrp = 0;
>   
> -	error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs);
> -	if (error)
> -		goto out_rlist;
> -
> -	if (gfs2_rs_active(&ip->i_res)) /* needs to be done with the rgrp glock held */
> -		gfs2_rs_deltree(&ip->i_res);
> -
> -restart:
> -	jblocks_rqsted = rg_blocks + RES_DINODE +
> -		RES_INDIRECT + RES_STATFS + RES_QUOTA +
> -		gfs2_struct2blk(sdp, revokes, sizeof(u64));
> -	if (jblocks_rqsted > atomic_read(&sdp->sd_log_thresh2))
> -		jblocks_rqsted = atomic_read(&sdp->sd_log_thresh2);
> -	error = gfs2_trans_begin(sdp, jblocks_rqsted, revokes);
> -	if (error)
> -		goto out_rg_gunlock;
> +		for (p = top; p < bottom; p++) {
> +			if (!*p)
> +				continue;
>   
> -	tr = current->journal_info;
> -	down_write(&ip->i_rw_mutex);
> +			bn = be64_to_cpu(*p);
>   
> -	gfs2_trans_add_meta(ip->i_gl, dibh);
> -	gfs2_trans_add_meta(ip->i_gl, bh);
> +			if (!rgrp_contains_block(rgd, bn)) {
> +				blks_outside_rgrp++;
> +				continue;
> +			}
>   
> -	bstart = 0;
> -	blen = 0;
> -	btotal = 0;
> +			if (!bh_added_to_trans) {
> +				metacount++;
> +				/* If too many blocks for the transaction,
> +				   return -EAGAIN to make it end the current
> +				   transaction and start a new one. */
> +				if (metacount >= jblocks_rqsted)
> +					return -EAGAIN;
>   
> -	for (p = top; p < bottom; p++) {
> -		if (!*p)
> -			continue;
> +				gfs2_trans_add_meta(ip->i_gl, bh);
> +				bh_added_to_trans = true;
> +			}
> +			*p = 0;
>   
> -		/* check for max reasonable journal transaction blocks */
> -		if (tr->tr_num_buf_new + RES_STATFS +
> -		    RES_QUOTA >= atomic_read(&sdp->sd_log_thresh2)) {
> -			if (rg_blocks >= tr->tr_num_buf_new)
> -				rg_blocks -= tr->tr_num_buf_new;
> -			else
> -				rg_blocks = 0;
> -			break;
> -		}
> +			if (meta)
> +				cur->bl_metablocks--;
>   
> -		bn = be64_to_cpu(*p);
> +			if (bstart + blen == bn) {
> +				blen++;
> +				continue;
> +			}
>   
> -		if (bstart + blen == bn)
> -			blen++;
> -		else {
>   			if (bstart) {
> -				__gfs2_free_blocks(ip, bstart, blen, metadata);
> -				btotal += blen;
> +				/* NOTE: It's important that we send "false"
> +				   to function __gfs2_free_blocks. If we send
> +				   the true meta status, it would call
> +				   meta_wipe, which will try to re-read the
> +				   metadata buffer, and result in !uptodate,
> +				   which manifests as an io error. */
That doesn't sound correct. How are you removing these blocks from the 
journal, assuming that these really are meta blocks?

> +				__gfs2_free_blocks(ip, bstart, blen, false);
> +				gfs2_sub_inode_blocks(&ip->i_inode, blen);
> +				(*btotal) += blen;
>   			}
>   
>   			bstart = bn;
>   			blen = 1;
>   		}
> -
> -		*p = 0;
> -		gfs2_add_inode_blocks(&ip->i_inode, -1);
> -	}
> -	if (p == bottom)
> -		rg_blocks = 0;
> -
> -	if (bstart) {
> -		__gfs2_free_blocks(ip, bstart, blen, metadata);
> -		btotal += blen;
> +		if (bstart) {
> +			__gfs2_free_blocks(ip, bstart, blen, false);
> +			gfs2_sub_inode_blocks(&ip->i_inode, blen);
> +			(*btotal) += blen;
> +		}
> +		if (blks_outside_rgrp == 0) { /* Done with this buffer */
> +			if (cur->bl_h) /* Not the inode bh but all others */
> +				brelse(bh);
> +			list_del(&cur->bl_list);
> +			kfree(cur);
> +			(*buflist_frees)++;
> +			continue;
> +		}
> +		/*
> +		 * Optimization: if we've encountered X blocks that don't
> +		 * reference this rgrp at all, skip looking through the rest.
> +		 * If we missed a couple references, they'll be removed later
> +		 * at the expense of an extra transaction. But if we
> +		 * meticulously sweep all buffers for this rgrp, we may
> +		 * waste tons of time checking hundreds of thousands of buffers
> +		 * for every rgrp referenced: a huge time waster. The time to
> +		 * delete a 500GB file goes from 2m35s to 26s.
> +		 */
> +		if (!bh_added_to_trans) { /* no hits? */
> +			miss_count++;
> +			if (miss_count >= MAX_BH_RGRP_MISSES)
> +				break;
> +		}
>   	}
> -
> -	gfs2_statfs_change(sdp, 0, +btotal, 0);
> -	gfs2_quota_change(ip, -(s64)btotal, ip->i_inode.i_uid,
> -			  ip->i_inode.i_gid);
> -
> -	ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
> -
> -	gfs2_dinode_out(ip, dibh->b_data);
> -
> -	up_write(&ip->i_rw_mutex);
> -
> -	gfs2_trans_end(sdp);
> -
> -	if (rg_blocks)
> -		goto restart;
> -
> -out_rg_gunlock:
> -	gfs2_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs);
> -out_rlist:
> -	gfs2_rlist_free(&rlist);
> -out:
> -	return error;
> +	return 0;
>   }
>   
>   /**
> - * recursive_scan - recursively scan through the end of a file
> + * trunc_buflist - delete all of a file's metadata and data blocks
>    * @ip: the inode
> - * @dibh: the dinode buffer
> - * @mp: the path through the metadata to the point to start
> - * @height: the height the recursion is at
> - * @block: the indirect block to look at
> - * @first: 1 if this is the first block
> - * @sm: data opaque to this function to pass to @bc
> - *
> - * When this is first called @height and @block should be zero and
> - * @first should be 1.
> + * @mp: a metapath corresponding to the truncation point
> + * @del_all: true if we're supposed to delete everything, false for truncate
>    *
>    * Returns: errno
>    */
> -
> -static int recursive_scan(struct gfs2_inode *ip, struct buffer_head *dibh,
> -			  struct metapath *mp, unsigned int height,
> -			  u64 block, int first, struct strip_mine *sm)
> +static int trunc_buflist(struct gfs2_inode *ip, struct metapath *mp,
> +			 int del_all)
>   {
>   	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
> -	struct buffer_head *bh = NULL;
> -	__be64 *top, *bottom;
> +	struct gfs2_buflist *cur, *next;
> +	struct buffer_head *bh, *dibh = NULL;
> +	struct gfs2_holder gh;
> +	struct gfs2_rgrpd *rgd;
> +	LIST_HEAD(buflist);
>   	u64 bn;
> -	int error;
> -	int mh_size = sizeof(struct gfs2_meta_header);
> +	u32 btotal = 0;
> +	__be64 *p, *top, *bottom;
> +	int error, prev_hgt = 0;
> +	int buflist_mallocs = 1, buflist_frees = 0;
> +	int jblocks_rqsted, max_metablks;
> +
> +	cur = kmalloc(sizeof(struct gfs2_buflist), GFP_NOFS);
> +	if (!cur)
> +		return -ENOMEM;
>   
> -	if (!height) {
> -		error = gfs2_meta_inode_buffer(ip, &bh);
> -		if (error)
> -			return error;
> -		dibh = bh;
> +	/* build the list of metadata buffers, and rgrp list and count
> +	   the revokes. */
> +	error = gfs2_meta_inode_buffer(ip, &dibh);
> +	if (error)
> +		return -ENOMEM;
>   
> -		top = (__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + mp->mp_list[0];
> -		bottom = (__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + sdp->sd_diptrs;
> -	} else {
> -		error = gfs2_meta_indirect_buffer(ip, height, block, &bh);
> -		if (error)
> -			return error;
> +	cur->bl_bh = dibh;
> +	cur->bl_h = 0;
> +	cur->bl_metablocks = 1;
> +	cur->bl_blocknr = ip->i_no_addr;
> +	list_add_tail(&cur->bl_list, &buflist);
> +
> +	list_for_each_entry(cur, &buflist, bl_list) {
> +		bh = cur->bl_bh;
> +		if (cur->bl_h == 0) {
> +			top = (__be64 *)(bh->b_data +
> +					 sizeof(struct gfs2_dinode));
> +			bottom = (__be64 *)top + sdp->sd_diptrs;
> +			top += mp->mp_list[0];
> +		} else {
> +			top = (__be64 *)(bh->b_data +
> +					 sizeof(struct gfs2_meta_header));
> +			bottom = top + sdp->sd_inptrs;
> +			/* If we changed heights, we must start where the
> +			   metapath tells us to. */
> +			if (cur->bl_h > prev_hgt) {
> +				cur->first = true;
> +				prev_hgt = cur->bl_h;
> +				top += mp->mp_list[cur->bl_h];
> +			}
> +		}
> +		if (cur->bl_h >= ip->i_height - 1)
> +			continue; /* skip data blocks */
>   
> -		top = (__be64 *)(bh->b_data + mh_size) +
> -				  (first ? mp->mp_list[height] : 0);
> +		gfs2_metapath_ra(ip->i_gl, bh, top);
> +		for (p = top; p < bottom; p++) {
> +			if (!*p)
> +				continue;
>   
> -		bottom = (__be64 *)(bh->b_data + mh_size) + sdp->sd_inptrs;
> -	}
> +			bn = be64_to_cpu(*p);
> +			cur->bl_metablocks++;
> +			/* This is a metadata block, so we have to queue it
> +			   up to have its indirect blocks processed as well. */
> +			next = kmalloc(sizeof(struct gfs2_buflist), GFP_NOFS);
We really don't want to be creating long lists of buffers to free. Just 
use the indirect buffers to keep the list. I'm not quite sure how you 
are updating the indirect buffers during the deletion.

> +			if (!next) {
> +				error = -ENOMEM;
> +				goto out;
> +			}
>   
> -	error = do_strip(ip, dibh, bh, top, bottom, height, sm);
> -	if (error)
> -		goto out;
> +			buflist_mallocs++;
> +			next->bl_h = cur->bl_h + 1;
> +			next->bl_metablocks = 1;
> +			next->first = false;
> +			next->bl_blocknr = bn;
> +			error = gfs2_meta_indirect_buffer(ip, next->bl_h, bn,
> +							  &next->bl_bh);
> +			if (error)
> +				goto out;
>   
> -	if (height < ip->i_height - 1) {
> +			BUG_ON(!buffer_uptodate(next->bl_bh));
> +			list_add_tail(&next->bl_list, &buflist);
> +		}
> +	}
>   
> -		gfs2_metapath_ra(ip->i_gl, bh, top);
> +	down_write(&ip->i_rw_mutex);
> +	prev_hgt = -1;
>   
> -		for (; top < bottom; top++, first = 0) {
> -			if (!*top)
> -				continue;
> +	while (!error && !list_empty(&buflist)) {
> +		cur = list_entry(buflist.next, struct gfs2_buflist, bl_list);
>   
> -			bn = be64_to_cpu(*top);
> +		rgd = find_buflist_rgrp(ip, mp, cur);
> +		if (IS_ERR(rgd)) {
> +			error = PTR_ERR(rgd);
> +			goto out;
> +		}
>   
> -			error = recursive_scan(ip, dibh, mp, height + 1, bn,
> -					       first, sm);
> -			if (error)
> -				break;
> +		BUG_ON(rgd == NULL);
> +		error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0,
> +					   &gh);
> +		if (error)
> +			goto out;
> +
> +		/* needs to be done with the proper rgrp glock held: */
> +		if (rgd == ip->i_rgd && gfs2_rs_active(&ip->i_res))
> +			gfs2_rs_deltree(&ip->i_res);
> +
> +		/* Calculate the maximum number of metadata blocks we can
> +		   add to the log. Every metablock potentially coule be
> +		   revoked. At this point we know we're working with one
> +		   single rgrp, but there may be tens of thousands of
> +		   buffers referencing blocks therein. There's no good way
> +		   to know without traversing them. The most we could have
> +		   is the number of blocks in the rgrp, ri_data. It seems
> +		   wrong to request the max in the rgrp. It also seems wrong
> +		   to request the most we can fit in a transaction. For now,
> +		   let's estimate it to be: the number of metablocks
> +		   referenced by _this_ buffer TIMES the number of buffers.
> +		   If we guess too many, it will be curtailed by the max.
> +		   If we guess too few, it will be broken into smaller
> +		   transactions regardless. */
> +		jblocks_rqsted = rgd->rd_length +
> +			(cur->bl_metablocks * buflist_mallocs) +
> +			RES_DINODE + RES_INDIRECT +
> +			gfs2_struct2blk(sdp, cur->bl_metablocks, sizeof(u64));
> +		if (jblocks_rqsted > atomic_read(&sdp->sd_log_thresh2)) {
> +			max_metablks = atomic_read(&sdp->sd_log_thresh2) -
> +				(RES_DINODE + RES_INDIRECT);
> +			max_metablks -= gfs2_struct2blk(sdp, max_metablks,
> +							sizeof(u64));
> +
> +			jblocks_rqsted = max_metablks;
>   		}
> +		error = gfs2_trans_begin(sdp, jblocks_rqsted, jblocks_rqsted);
> +		if (error) {
> +			gfs2_glock_dq_uninit(&gh);
> +			goto out;
> +		}
> +
> +		error = free_all_for_rgrp(ip, mp, del_all, &buflist, rgd,
> +					  &btotal, jblocks_rqsted,
> +					  &buflist_frees);
> +		if (error == -EAGAIN)
> +			error = 0;
> +
> +		gfs2_trans_end(sdp);
> +
> +		gfs2_glock_dq_uninit(&gh);
> +	}
> +
> +	if (btotal) {
> +		error = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS +
> +					 RES_QUOTA, 0);
> +		if (error)
> +			goto out;
> +		gfs2_statfs_change(sdp, 0, +btotal, 0);
> +		gfs2_quota_change(ip, -(s64)btotal, ip->i_inode.i_uid,
> +				  ip->i_inode.i_gid);
> +		ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
> +		gfs2_trans_add_meta(ip->i_gl, dibh);
> +		gfs2_dinode_out(ip, dibh->b_data);
> +		brelse(dibh);
>   	}
> +
>   out:
> -	brelse(bh);
> +	if (current->journal_info)
> +		gfs2_trans_end(sdp);
> +
> +	up_write(&ip->i_rw_mutex);
> +
> +	/* In the normal non-error code path, the list is already empty. */
> +	while (!list_empty(&buflist)) {
> +		cur = list_entry(buflist.next, struct gfs2_buflist, bl_list);
> +		list_del(&cur->bl_list);
> +		brelse(cur->bl_bh);
> +		kfree(cur);
> +		buflist_frees++;
> +	}
> +	BUG_ON(buflist_mallocs != buflist_frees);
>   	return error;
>   }
>   
> -
>   /**
>    * gfs2_block_truncate_page - Deal with zeroing out data for truncate
>    *
> @@ -1101,6 +1214,7 @@ static int trunc_start(struct inode *inode, u64 oldsize, u64 newsize)
>   
>   out_brelse:
>   	brelse(dibh);
> +
>   out:
>   	gfs2_trans_end(sdp);
>   	return error;
> @@ -1109,7 +1223,6 @@ out:
>   static int trunc_dealloc(struct gfs2_inode *ip, u64 size)
>   {
>   	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
> -	unsigned int height = ip->i_height;
>   	u64 lblock;
>   	struct metapath mp;
>   	int error;
> @@ -1120,6 +1233,7 @@ static int trunc_dealloc(struct gfs2_inode *ip, u64 size)
>   		lblock = (size - 1) >> sdp->sd_sb.sb_bsize_shift;
>   
>   	find_metapath(sdp, lblock, &mp, ip->i_height);
> +
>   	error = gfs2_rindex_update(sdp);
>   	if (error)
>   		return error;
> @@ -1128,16 +1242,7 @@ static int trunc_dealloc(struct gfs2_inode *ip, u64 size)
>   	if (error)
>   		return error;
>   
> -	while (height--) {
> -		struct strip_mine sm;
> -		sm.sm_first = !!size;
> -		sm.sm_height = height;
> -
> -		error = recursive_scan(ip, NULL, &mp, 0, 0, 1, &sm);
> -		if (error)
> -			break;
> -	}
> -
> +	error = trunc_buflist(ip, &mp, size == 0);
>   	gfs2_quota_unhold(ip);
>   
>   	return error;
> diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
> index 86ccc015..83c9909 100644
> --- a/fs/gfs2/rgrp.c
> +++ b/fs/gfs2/rgrp.c
> @@ -483,13 +483,6 @@ void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd)
>   	}
>   }
>   
> -static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block)
> -{
> -	u64 first = rgd->rd_data0;
> -	u64 last = first + rgd->rd_data;
> -	return first <= block && block < last;
> -}
> -
>   /**
>    * gfs2_blk2rgrpd - Find resource group for a given data/meta block number
>    * @sdp: The GFS2 superblock
> diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
> index 66b51cf..e90478e 100644
> --- a/fs/gfs2/rgrp.h
> +++ b/fs/gfs2/rgrp.h
> @@ -83,5 +83,12 @@ static inline bool gfs2_rs_active(const struct gfs2_blkreserv *rs)
>   	return rs && !RB_EMPTY_NODE(&rs->rs_node);
>   }
>   
> +static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block)
> +{
> +	u64 first = rgd->rd_data0;
> +	u64 last = first + rgd->rd_data;
> +	return first <= block && block < last;
> +}
> +
>   extern void check_and_update_goal(struct gfs2_inode *ip);
>   #endif /* __RGRP_DOT_H__ */
>