[Cluster-devel] [GFS2 PATCH] GFS2: Eliminate bitmap clones

Bob Peterson rpeterso at redhat.com
Mon Jul 2 17:58:04 UTC 2018


Hi,

Do we really still need "clone bitmaps" in gfs2? If so, why?
I think maybe we can get rid of them. Can someone (Steve Whitehouse
perhaps?) think of a scenario in which they're still needed? If so,
please elaborate and give an example.

Regards,

Bob Peterson
---
Before this patch, gfs2 kept track of "clone" bitmaps. The theory
is that while a file is kept open on one node, if it's unlinked
on another node, no other node should be able to re-use the blocks
assigned to that file until the last guy out closes the file.
So all bitmap "sets" are maintained in the normal bitmap and the
clone bitmap, but bitmap "clears" are maintained only in the real
bitmap. That way those blocks cannot be found in the clone and
reassigned while the file is open.

However, that should all be unnecessary. If a file is unlinked,
its blocks will remain in the "data" state until it is transitioned
from unlinked to free, and its inode is evicted from the file system.
At that time, the blocks are actually deleted and the inode is
changed from unlinked to free. So in theory, the blocks won't be
free until they're truly free, and therefore nobody can reassign them
anyway.

So this patch eliminates the whole effort of maintaining clone
bitmaps.

Signed-off-by: Bob Peterson <rpeterso at redhat.com>
---
 fs/gfs2/glops.c      |  6 ------
 fs/gfs2/incore.h     |  2 --
 fs/gfs2/lops.c       |  5 -----
 fs/gfs2/rgrp.c       | 55 +++++++++++-----------------------------------------
 fs/gfs2/rgrp.h       |  1 -
 fs/gfs2/trace_gfs2.h | 12 ++++++------
 6 files changed, 17 insertions(+), 64 deletions(-)

diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index c63bee9adb6a..5ee8ec6663b0 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -165,12 +165,6 @@ static void rgrp_go_sync(struct gfs2_glock *gl)
 	error = filemap_fdatawait_range(mapping, gl->gl_vm.start, gl->gl_vm.end);
 	mapping_set_error(mapping, error);
 	gfs2_ail_empty_gl(gl);
-
-	spin_lock(&gl->gl_lockref.lock);
-	rgd = gl->gl_object;
-	if (rgd)
-		gfs2_free_clones(rgd);
-	spin_unlock(&gl->gl_lockref.lock);
 }
 
 /**
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index b50908211b69..eb5fdce85e82 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -67,7 +67,6 @@ struct gfs2_log_operations {
 
 struct gfs2_bitmap {
 	struct buffer_head *bi_bh;
-	char *bi_clone;
 	unsigned long bi_flags;
 	u32 bi_offset;
 	u32 bi_start;
@@ -85,7 +84,6 @@ struct gfs2_rgrpd {
 	u32 rd_bitbytes;		/* number of bytes in data bitmaps */
 	u32 rd_free;
 	u32 rd_reserved;                /* number of blocks reserved */
-	u32 rd_free_clone;
 	u32 rd_dinodes;
 	u64 rd_igeneration;
 	struct gfs2_bitmap *rd_bits;
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index f2567f958d00..b20b4a6f2e9a 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -76,14 +76,9 @@ static void maybe_release_space(struct gfs2_bufdata *bd)
 	unsigned int index = bd->bd_bh->b_blocknr - gl->gl_name.ln_number;
 	struct gfs2_bitmap *bi = rgd->rd_bits + index;
 
-	if (bi->bi_clone == NULL)
-		return;
 	if (sdp->sd_args.ar_discard)
 		gfs2_rgrp_send_discards(sdp, rgd->rd_data0, bd->bd_bh, bi, 1, NULL);
-	memcpy(bi->bi_clone + bi->bi_offset,
-	       bd->bd_bh->b_data + bi->bi_offset, bi->bi_len);
 	clear_bit(GBF_FULL, &bi->bi_flags);
-	rgd->rd_free_clone = rgd->rd_free;
 	rgd->rd_extfail_pt = rgd->rd_free;
 }
 
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 2ce364a05f70..2bc8a74f7601 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -85,10 +85,10 @@ static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext,
  *
  */
 
-static inline void gfs2_setbit(const struct gfs2_rbm *rbm, bool do_clone,
+static inline void gfs2_setbit(const struct gfs2_rbm *rbm,
 			       unsigned char new_state)
 {
-	unsigned char *byte1, *byte2, *end, cur_state;
+	unsigned char *byte1, *end, cur_state;
 	struct gfs2_bitmap *bi = rbm_bi(rbm);
 	unsigned int buflen = bi->bi_len;
 	const unsigned int bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE;
@@ -112,12 +112,6 @@ static inline void gfs2_setbit(const struct gfs2_rbm *rbm, bool do_clone,
 		return;
 	}
 	*byte1 ^= (cur_state ^ new_state) << bit;
-
-	if (do_clone && bi->bi_clone) {
-		byte2 = bi->bi_clone + bi->bi_offset + (rbm->offset / GFS2_NBBY);
-		cur_state = (*byte2 >> bit) & GFS2_BIT_MASK;
-		*byte2 ^= (cur_state ^ new_state) << bit;
-	}
 }
 
 /**
@@ -370,8 +364,6 @@ static u32 gfs2_free_extlen(const struct gfs2_rbm *rrbm, u32 len)
 	while (len > 3) {
 		bi = rbm_bi(&rbm);
 		start = bi->bi_bh->b_data;
-		if (bi->bi_clone)
-			start = bi->bi_clone;
 		start += bi->bi_offset;
 		end = start + bi->bi_len;
 		BUG_ON(rbm.offset & 3);
@@ -584,17 +576,6 @@ void check_and_update_goal(struct gfs2_inode *ip)
 		ip->i_goal = ip->i_no_addr;
 }
 
-void gfs2_free_clones(struct gfs2_rgrpd *rgd)
-{
-	int x;
-
-	for (x = 0; x < rgd->rd_length; x++) {
-		struct gfs2_bitmap *bi = rgd->rd_bits + x;
-		kfree(bi->bi_clone);
-		bi->bi_clone = NULL;
-	}
-}
-
 /**
  * gfs2_rsqa_alloc - make sure we have a reservation assigned to the inode
  *                 plus a quota allocations data structure, if necessary
@@ -719,7 +700,6 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
 			gfs2_glock_put(gl);
 		}
 
-		gfs2_free_clones(rgd);
 		kfree(rgd->rd_bits);
 		rgd->rd_bits = NULL;
 		return_all_reservations(rgd);
@@ -1179,7 +1159,6 @@ static int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
 			clear_bit(GBF_FULL, &rgd->rd_bits[x].bi_flags);
 		gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data);
 		rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK);
-		rgd->rd_free_clone = rgd->rd_free;
 		/* max out the rgrp allocation failure point */
 		rgd->rd_extfail_pt = rgd->rd_free;
 	}
@@ -1204,7 +1183,6 @@ static int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
 		bi = rgd->rd_bits + x;
 		brelse(bi->bi_bh);
 		bi->bi_bh = NULL;
-		gfs2_assert_warn(sdp, !bi->bi_clone);
 	}
 
 	return error;
@@ -1227,7 +1205,6 @@ static int update_rgrp_lvb(struct gfs2_rgrpd *rgd)
 	if (rgd->rd_rgl->rl_unlinked == 0)
 		rgd->rd_flags &= ~GFS2_RDF_CHECK;
 	rgd->rd_free = be32_to_cpu(rgd->rd_rgl->rl_free);
-	rgd->rd_free_clone = rgd->rd_free;
 	rgd->rd_dinodes = be32_to_cpu(rgd->rd_rgl->rl_dinodes);
 	rgd->rd_igeneration = be64_to_cpu(rgd->rd_rgl->rl_igeneration);
 	return 0;
@@ -1293,7 +1270,7 @@ int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
 	u8 diff;
 
 	for (x = 0; x < bi->bi_len; x++) {
-		const u8 *clone = bi->bi_clone ? bi->bi_clone : bi->bi_bh->b_data;
+		const u8 *clone = bi->bi_bh->b_data;
 		clone += bi->bi_offset;
 		clone += x;
 		if (bh) {
@@ -1508,8 +1485,8 @@ static inline u32 rgd_free(struct gfs2_rgrpd *rgd, struct gfs2_blkreserv *rs)
 	BUG_ON(rgd->rd_reserved < rs->rs_free);
 	tot_reserved = rgd->rd_reserved - rs->rs_free;
 
-	BUG_ON(rgd->rd_free_clone < tot_reserved);
-	tot_free = rgd->rd_free_clone - tot_reserved;
+	BUG_ON(rgd->rd_free < tot_reserved);
+	tot_free = rgd->rd_free - tot_reserved;
 
 	return tot_free;
 }
@@ -1539,7 +1516,7 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip,
 		extlen = max_t(u32, atomic_read(&rs->rs_sizehint), ap->target);
 		extlen = clamp(extlen, RGRP_RSRV_MINBLKS, free_blocks);
 	}
-	if ((rgd->rd_free_clone < rgd->rd_reserved) || (free_blocks < extlen))
+	if ((rgd->rd_free < rgd->rd_reserved) || (free_blocks < extlen))
 		return;
 
 	/* Find bitmap block that contains bits for goal block */
@@ -1720,8 +1697,6 @@ static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext,
 		bh = bi->bi_bh;
 		buffer = bh->b_data + bi->bi_offset;
 		WARN_ON(!buffer_uptodate(bh));
-		if (state != GFS2_BLKST_UNLINKED && bi->bi_clone)
-			buffer = bi->bi_clone + bi->bi_offset;
 		initial_offset = rbm->offset;
 		offset = gfs2_bitfit(buffer, bi->bi_len, rbm->offset, state);
 		if (offset == BFITNOENT)
@@ -2181,14 +2156,14 @@ static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode,
 	*n = 1;
 	block = gfs2_rbm_to_block(rbm);
 	gfs2_trans_add_meta(rbm->rgd->rd_gl, rbm_bi(rbm)->bi_bh);
-	gfs2_setbit(rbm, true, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
+	gfs2_setbit(rbm, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
 	block++;
 	while (*n < elen) {
 		ret = gfs2_rbm_from_block(&pos, block);
 		if (ret || gfs2_testbit(&pos) != GFS2_BLKST_FREE)
 			break;
 		gfs2_trans_add_meta(pos.rgd->rd_gl, rbm_bi(&pos)->bi_bh);
-		gfs2_setbit(&pos, true, GFS2_BLKST_USED);
+		gfs2_setbit(&pos, GFS2_BLKST_USED);
 		(*n)++;
 		block++;
 	}
@@ -2221,17 +2196,10 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
 	while (blen--) {
 		bi = rbm_bi(&rbm);
 		if (bi != bi_prev) {
-			if (!bi->bi_clone) {
-				bi->bi_clone = kmalloc(bi->bi_bh->b_size,
-						      GFP_NOFS | __GFP_NOFAIL);
-				memcpy(bi->bi_clone + bi->bi_offset,
-				       bi->bi_bh->b_data + bi->bi_offset,
-				       bi->bi_len);
-			}
 			gfs2_trans_add_meta(rbm.rgd->rd_gl, bi->bi_bh);
 			bi_prev = bi;
 		}
-		gfs2_setbit(&rbm, false, new_state);
+		gfs2_setbit(&rbm, new_state);
 		gfs2_rbm_incr(&rbm);
 	}
 
@@ -2253,9 +2221,9 @@ void gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl)
 
 	if (rgd == NULL)
 		return;
-	gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u r:%u e:%u\n",
+	gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u i:%u r:%u e:%u\n",
 		       (unsigned long long)rgd->rd_addr, rgd->rd_flags,
-		       rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes,
+		       rgd->rd_free, rgd->rd_dinodes,
 		       rgd->rd_reserved, rgd->rd_extfail_pt);
 	spin_lock(&rgd->rd_rsspin);
 	for (n = rb_first(&rgd->rd_rstree); n; n = rb_next(&trs->rs_node)) {
@@ -2427,7 +2395,6 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
 
 	gfs2_quota_change(ip, *nblocks, ip->i_inode.i_uid, ip->i_inode.i_gid);
 
-	rbm.rgd->rd_free_clone -= *nblocks;
 	trace_gfs2_block_alloc(ip, rbm.rgd, block, *nblocks,
 			       dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
 	*bn = block;
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index e90478e2f545..4528e33e9504 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -34,7 +34,6 @@ extern struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd);
 
 extern void gfs2_clear_rgrpd(struct gfs2_sbd *sdp);
 extern int gfs2_rindex_update(struct gfs2_sbd *sdp);
-extern void gfs2_free_clones(struct gfs2_rgrpd *rgd);
 extern int gfs2_rgrp_go_lock(struct gfs2_holder *gh);
 extern void gfs2_rgrp_brelse(struct gfs2_rgrpd *rgd);
 extern void gfs2_rgrp_go_unlock(struct gfs2_holder *gh);
diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h
index e0025258107a..4729c4dfd656 100644
--- a/fs/gfs2/trace_gfs2.h
+++ b/fs/gfs2/trace_gfs2.h
@@ -558,7 +558,7 @@ TRACE_EVENT(gfs2_block_alloc,
 		__field(	u32,	len			)
 		__field(	u8,	block_state		)
 		__field(        u64,	rd_addr			)
-		__field(        u32,	rd_free_clone		)
+		__field(        u32,	rd_free			)
 		__field(	u32,	rd_reserved		)
 	),
 
@@ -569,7 +569,7 @@ TRACE_EVENT(gfs2_block_alloc,
 		__entry->len		= len;
 		__entry->block_state	= block_state;
 		__entry->rd_addr	= rgd->rd_addr;
-		__entry->rd_free_clone	= rgd->rd_free_clone;
+		__entry->rd_free	= rgd->rd_free;
 		__entry->rd_reserved	= rgd->rd_reserved;
 	),
 
@@ -580,7 +580,7 @@ TRACE_EVENT(gfs2_block_alloc,
 		  (unsigned long)__entry->len,
 		  block_state_name(__entry->block_state),
 		  (unsigned long long)__entry->rd_addr,
-		  __entry->rd_free_clone, (unsigned long)__entry->rd_reserved)
+		  __entry->rd_free, (unsigned long)__entry->rd_reserved)
 );
 
 /* Keep track of multi-block reservations as they are allocated/freed */
@@ -593,7 +593,7 @@ TRACE_EVENT(gfs2_rs,
 	TP_STRUCT__entry(
 		__field(        dev_t,  dev                     )
 		__field(	u64,	rd_addr			)
-		__field(	u32,	rd_free_clone		)
+		__field(	u32,	rd_free			)
 		__field(	u32,	rd_reserved		)
 		__field(	u64,	inum			)
 		__field(	u64,	start			)
@@ -604,7 +604,7 @@ TRACE_EVENT(gfs2_rs,
 	TP_fast_assign(
 		__entry->dev		= rs->rs_rbm.rgd->rd_sbd->sd_vfs->s_dev;
 		__entry->rd_addr	= rs->rs_rbm.rgd->rd_addr;
-		__entry->rd_free_clone	= rs->rs_rbm.rgd->rd_free_clone;
+		__entry->rd_free	= rs->rs_rbm.rgd->rd_free;
 		__entry->rd_reserved	= rs->rs_rbm.rgd->rd_reserved;
 		__entry->inum		= container_of(rs, struct gfs2_inode,
 						       i_res)->i_no_addr;
@@ -618,7 +618,7 @@ TRACE_EVENT(gfs2_rs,
 		  (unsigned long long)__entry->inum,
 		  (unsigned long long)__entry->start,
 		  (unsigned long long)__entry->rd_addr,
-		  (unsigned long)__entry->rd_free_clone,
+		  (unsigned long)__entry->rd_free,
 		  (unsigned long)__entry->rd_reserved,
 		  rs_func_name(__entry->func), (unsigned long)__entry->free)
 );




More information about the Cluster-devel mailing list