[Cluster-devel] [GFS2 PATCH 3/3] GFS2: Reduce contention on gfs2_log_lock

Steven Whitehouse swhiteho at redhat.com
Thu Jan 26 11:39:51 UTC 2017


Hi,


On 25/01/17 19:58, Bob Peterson wrote:
> This patch reworks some of the log locks to provide less contention.
This is a very complicated patch, with not a lot of explanation (I know 
there is more detail on the cover letter, but it should be on the patch 
itself, since the cover letter will not go into the git tree). I'm not 
sure I understand the reason for switching the locking order here. The 
original intent was simply to avoid taking the log lock in cases where 
the bh was already in the log.

I would expect gfs2_trans_add_meta to land up looking something like this:

     lock_buffer(bh);
     if (buffer_pinned(bh)) {
         /* If the buffer is pinned, then we know it must have a bd 
attached and be in the log already */
         set_bit(TR_TOUCHED, &tr->tr_flags);
         goto out;
     }
     gfs2_log_lock(sdp);

     .... rest of function unchanged here

     gfs2_log_unlock(sdp);
out:
     unlock_buffer(bh);
}

That is unless the test in meta_lo_add() "if 
(!list_empty(&bd->bd_list))" is not equivalent to testing 
buffer_pinned() for some reason, but even if that is the case, all we 
need to do is find a better test than "if (buffer_pinned(bh))" I think, 
which at worst case might mean adding an additional flag to the bh which 
would follow the on/off the bd->bd_list state,

Steve.

> Signed-off-by: Bob Peterson <rpeterso at redhat.com>
> ---
>   fs/gfs2/aops.c    | 23 ++++++-------------
>   fs/gfs2/glops.c   |  4 ++--
>   fs/gfs2/log.c     |  6 ++---
>   fs/gfs2/lops.c    |  3 ++-
>   fs/gfs2/meta_io.c | 11 +++++----
>   fs/gfs2/trans.c   | 67 +++++++++++++++++++++++++------------------------------
>   6 files changed, 51 insertions(+), 63 deletions(-)
>
> diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
> index 5a6f52e..d3cd30d 100644
> --- a/fs/gfs2/aops.c
> +++ b/fs/gfs2/aops.c
> @@ -986,23 +986,13 @@ static sector_t gfs2_bmap(struct address_space *mapping, sector_t lblock)
>   
>   static void gfs2_discard(struct gfs2_sbd *sdp, struct buffer_head *bh)
>   {
> -	struct gfs2_bufdata *bd;
> -
>   	lock_buffer(bh);
> -	gfs2_log_lock(sdp);
>   	clear_buffer_dirty(bh);
> -	bd = bh->b_private;
> -	if (bd) {
> -		if (!list_empty(&bd->bd_list) && !buffer_pinned(bh))
> -			list_del_init(&bd->bd_list);
> -		else
> -			gfs2_remove_from_journal(bh, REMOVE_JDATA);
> -	}
> +	gfs2_remove_from_journal(bh, REMOVE_JDATA);
>   	bh->b_bdev = NULL;
>   	clear_buffer_mapped(bh);
>   	clear_buffer_req(bh);
>   	clear_buffer_new(bh);
> -	gfs2_log_unlock(sdp);
>   	unlock_buffer(bh);
>   }
>   
> @@ -1157,7 +1147,6 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask)
>   	 * on dirty buffers like we used to here again.
>   	 */
>   
> -	gfs2_log_lock(sdp);
>   	spin_lock(&sdp->sd_ail_lock);
>   	head = bh = page_buffers(page);
>   	do {
> @@ -1174,25 +1163,27 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask)
>   
>   	head = bh = page_buffers(page);
>   	do {
> +		lock_buffer(bh);
>   		bd = bh->b_private;
>   		if (bd) {
>   			gfs2_assert_warn(sdp, bd->bd_bh == bh);
> +			gfs2_log_lock(sdp);
>   			if (!list_empty(&bd->bd_list))
>   				list_del_init(&bd->bd_list);
> +			gfs2_log_unlock(sdp);
>   			bd->bd_bh = NULL;
>   			bh->b_private = NULL;
> -			kmem_cache_free(gfs2_bufdata_cachep, bd);
>   		}
> -
> +		unlock_buffer(bh);
> +		if (bd)
> +			kmem_cache_free(gfs2_bufdata_cachep, bd);
>   		bh = bh->b_this_page;
>   	} while (bh != head);
> -	gfs2_log_unlock(sdp);
>   
>   	return try_to_free_buffers(page);
>   
>   cannot_release:
>   	spin_unlock(&sdp->sd_ail_lock);
> -	gfs2_log_unlock(sdp);
>   	return 0;
>   }
>   
> diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
> index 5db59d4..fced99b 100644
> --- a/fs/gfs2/glops.c
> +++ b/fs/gfs2/glops.c
> @@ -61,7 +61,6 @@ static void __gfs2_ail_flush(struct gfs2_glock *gl, bool fsync,
>   	struct buffer_head *bh;
>   	const unsigned long b_state = (1UL << BH_Dirty)|(1UL << BH_Pinned)|(1UL << BH_Lock);
>   
> -	gfs2_log_lock(sdp);
>   	spin_lock(&sdp->sd_ail_lock);
>   	list_for_each_entry_safe_reverse(bd, tmp, head, bd_ail_gl_list) {
>   		if (nr_revokes == 0)
> @@ -72,12 +71,13 @@ static void __gfs2_ail_flush(struct gfs2_glock *gl, bool fsync,
>   				continue;
>   			gfs2_ail_error(gl, bh);
>   		}
> +		gfs2_log_lock(sdp);
>   		gfs2_trans_add_revoke(sdp, bd);
> +		gfs2_log_unlock(sdp);
>   		nr_revokes--;
>   	}
>   	GLOCK_BUG_ON(gl, !fsync && atomic_read(&gl->gl_ail_count));
>   	spin_unlock(&sdp->sd_ail_lock);
> -	gfs2_log_unlock(sdp);
>   }
>   
>   
> diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
> index 4fb76c0..4d5f3a1 100644
> --- a/fs/gfs2/log.c
> +++ b/fs/gfs2/log.c
> @@ -624,8 +624,8 @@ done:
>   		if (!sdp->sd_log_blks_reserved)
>   			atomic_dec(&sdp->sd_log_blks_free);
>   	}
> -	gfs2_log_lock(sdp);
>   	spin_lock(&sdp->sd_ail_lock);
> +	gfs2_log_lock(sdp);
>   	list_for_each_entry(tr, &sdp->sd_ail1_list, tr_list) {
>   		list_for_each_entry_safe(bd, tmp, &tr->tr_ail2_list, bd_ail_st_list) {
>   			if (max_revokes == 0)
> @@ -637,8 +637,8 @@ done:
>   		}
>   	}
>   out_of_blocks:
> -	spin_unlock(&sdp->sd_ail_lock);
>   	gfs2_log_unlock(sdp);
> +	spin_unlock(&sdp->sd_ail_lock);
>   
>   	if (!sdp->sd_log_num_revoke) {
>   		atomic_inc(&sdp->sd_log_blks_free);
> @@ -756,6 +756,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl,
>   	sdp->sd_log_head = sdp->sd_log_flush_head;
>   	sdp->sd_log_blks_reserved = 0;
>   	sdp->sd_log_commited_revoke = 0;
> +	gfs2_log_unlock(sdp);
>   
>   	spin_lock(&sdp->sd_ail_lock);
>   	if (tr && !list_empty(&tr->tr_ail1_list)) {
> @@ -763,7 +764,6 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl,
>   		tr = NULL;
>   	}
>   	spin_unlock(&sdp->sd_ail_lock);
> -	gfs2_log_unlock(sdp);
>   
>   	if (type != NORMAL_FLUSH) {
>   		if (!sdp->sd_log_idle) {
> diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
> index 49d5a1b..d6a3bdb 100644
> --- a/fs/gfs2/lops.c
> +++ b/fs/gfs2/lops.c
> @@ -98,12 +98,13 @@ static void maybe_release_space(struct gfs2_bufdata *bd)
>   static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
>   		       struct gfs2_trans *tr)
>   {
> -	struct gfs2_bufdata *bd = bh->b_private;
> +	struct gfs2_bufdata *bd;
>   
>   	BUG_ON(!buffer_uptodate(bh));
>   	BUG_ON(!buffer_pinned(bh));
>   
>   	lock_buffer(bh);
> +	bd = bh->b_private;
>   	mark_buffer_dirty(bh);
>   	clear_buffer_pinned(bh);
>   
> diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
> index a88a347..d870b15 100644
> --- a/fs/gfs2/meta_io.c
> +++ b/fs/gfs2/meta_io.c
> @@ -341,18 +341,24 @@ void gfs2_remove_from_journal(struct buffer_head *bh, int meta)
>   	if (test_clear_buffer_pinned(bh)) {
>   		trace_gfs2_pin(bd, 0);
>   		atomic_dec(&sdp->sd_log_pinned);
> +		gfs2_log_lock(sdp);
>   		list_del_init(&bd->bd_list);
>   		if (meta == REMOVE_META)
>   			tr->tr_num_buf_rm++;
>   		else
>   			tr->tr_num_databuf_rm++;
>   		set_bit(TR_TOUCHED, &tr->tr_flags);
> +		gfs2_log_unlock(sdp);
>   		was_pinned = 1;
>   		brelse(bh);
>   	}
>   	if (bd) {
>   		spin_lock(&sdp->sd_ail_lock);
> -		if (bd->bd_tr) {
> +		if (!meta && !was_pinned && !list_empty(&bd->bd_list)) {
> +			gfs2_log_lock(sdp);
> +			list_del_init(&bd->bd_list);
> +			gfs2_log_unlock(sdp);
> +		} else if (bd->bd_tr) {
>   			gfs2_trans_add_revoke(sdp, bd);
>   		} else if (was_pinned) {
>   			bh->b_private = NULL;
> @@ -374,16 +380,13 @@ void gfs2_remove_from_journal(struct buffer_head *bh, int meta)
>   
>   void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen)
>   {
> -	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
>   	struct buffer_head *bh;
>   
>   	while (blen) {
>   		bh = gfs2_getbuf(ip->i_gl, bstart, NO_CREATE);
>   		if (bh) {
>   			lock_buffer(bh);
> -			gfs2_log_lock(sdp);
>   			gfs2_remove_from_journal(bh, REMOVE_META);
> -			gfs2_log_unlock(sdp);
>   			unlock_buffer(bh);
>   			brelse(bh);
>   		}
> diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
> index 483c5a7..f7addc5 100644
> --- a/fs/gfs2/trans.c
> +++ b/fs/gfs2/trans.c
> @@ -123,18 +123,37 @@ void gfs2_trans_end(struct gfs2_sbd *sdp)
>   		sb_end_intwrite(sdp->sd_vfs);
>   }
>   
> -static struct gfs2_bufdata *gfs2_alloc_bufdata(struct gfs2_glock *gl,
> -					       struct buffer_head *bh,
> -					       const struct gfs2_log_operations *lops)
> +/* lock_bh_get_bd - lock buffer_head and return its bd
> + *
> + * This function locks the buffer_head and returns its bd.
> + * If there's no bd, a new one is created, protected by the bh lock.
> + */
> +static struct gfs2_bufdata *lock_bh_get_bd(struct gfs2_glock *gl,
> +					   struct buffer_head *bh,
> +					   const struct gfs2_log_operations *lops)
>   {
>   	struct gfs2_bufdata *bd;
>   
> +	lock_buffer(bh);
> +	bd = bh->b_private;
> +	if (bd)
> +		goto out;
> +
> +	unlock_buffer(bh);
>   	bd = kmem_cache_zalloc(gfs2_bufdata_cachep, GFP_NOFS | __GFP_NOFAIL);
>   	bd->bd_bh = bh;
>   	bd->bd_gl = gl;
>   	bd->bd_ops = lops;
>   	INIT_LIST_HEAD(&bd->bd_list);
> +	lock_buffer(bh);
> +	if (bh->b_private) { /* someone prempted us */
> +		kmem_cache_free(gfs2_bufdata_cachep, bd);
> +		bd = bh->b_private;
> +		goto out;
> +	}
> +	gfs2_assert(gl->gl_name.ln_sbd, bd->bd_gl == gl);
>   	bh->b_private = bd;
> +out:
>   	return bd;
>   }
>   
> @@ -169,29 +188,17 @@ void gfs2_trans_add_data(struct gfs2_glock *gl, struct buffer_head *bh)
>   		return;
>   	}
>   
> -	lock_buffer(bh);
> -	gfs2_log_lock(sdp);
> -	bd = bh->b_private;
> -	if (bd == NULL) {
> -		gfs2_log_unlock(sdp);
> -		unlock_buffer(bh);
> -		if (bh->b_private == NULL)
> -			bd = gfs2_alloc_bufdata(gl, bh, &gfs2_databuf_lops);
> -		else
> -			bd = bh->b_private;
> -		lock_buffer(bh);
> -		gfs2_log_lock(sdp);
> -	}
> -	gfs2_assert(sdp, bd->bd_gl == gl);
> +	bd = lock_bh_get_bd(gl, bh, &gfs2_databuf_lops);
>   	set_bit(TR_TOUCHED, &tr->tr_flags);
>   	if (list_empty(&bd->bd_list)) {
> +		gfs2_pin(sdp, bd->bd_bh);
>   		set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
>   		set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
> -		gfs2_pin(sdp, bd->bd_bh);
>   		tr->tr_num_databuf_new++;
> +		gfs2_log_lock(sdp);
>   		list_add_tail(&bd->bd_list, &tr->tr_databuf);
> +		gfs2_log_unlock(sdp);
>   	}
> -	gfs2_log_unlock(sdp);
>   	unlock_buffer(bh);
>   }
>   
> @@ -204,26 +211,12 @@ void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh)
>   	struct gfs2_trans *tr;
>   	enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state);
>   
> -	lock_buffer(bh);
> -	gfs2_log_lock(sdp);
> -	bd = bh->b_private;
> -	if (bd == NULL) {
> -		gfs2_log_unlock(sdp);
> -		unlock_buffer(bh);
> -		lock_page(bh->b_page);
> -		if (bh->b_private == NULL)
> -			bd = gfs2_alloc_bufdata(gl, bh, &gfs2_buf_lops);
> -		else
> -			bd = bh->b_private;
> -		unlock_page(bh->b_page);
> -		lock_buffer(bh);
> -		gfs2_log_lock(sdp);
> -	}
> -	gfs2_assert(sdp, bd->bd_gl == gl);
> +	bd = lock_bh_get_bd(gl, bh, &gfs2_buf_lops);
>   	tr = current->journal_info;
>   	set_bit(TR_TOUCHED, &tr->tr_flags);
>   	if (!list_empty(&bd->bd_list))
>   		goto out_unlock;
> +	gfs2_pin(sdp, bd->bd_bh);
>   	set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
>   	set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
>   	mh = (struct gfs2_meta_header *)bd->bd_bh->b_data;
> @@ -236,13 +229,13 @@ void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh)
>   		printk(KERN_INFO "GFS2:adding buf while frozen\n");
>   		gfs2_assert_withdraw(sdp, 0);
>   	}
> -	gfs2_pin(sdp, bd->bd_bh);
>   	mh->__pad0 = cpu_to_be64(0);
>   	mh->mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid);
> +	gfs2_log_lock(sdp);
>   	list_add(&bd->bd_list, &tr->tr_buf);
> +	gfs2_log_unlock(sdp);
>   	tr->tr_num_buf_new++;
>   out_unlock:
> -	gfs2_log_unlock(sdp);
>   	unlock_buffer(bh);
>   }
>   




More information about the Cluster-devel mailing list