[Cluster-devel] [PATCH][GFS2] Bouncing locks in a cluster is slow in GFS2

Wed Jan 26 20:54:43 UTC 2011

Hi,

You shouldn't need to do the dual workqueue trick upstream since the
workqueues will already start as many threads as required (so even
better than just using the two here). If that isn't happening we should
ask Tejun about it.

So I think we only need the min hold time bit here,

Steve.

On Wed, 2011-01-26 at 15:22 -0500, Bob Peterson wrote:
> Hi,
> 
> This patch is a performance improvement for GFS2 in a clustered
> environment.  It makes the glock hold time self-adjusting.
> 
> Regards,
> 
> Bob Peterson
> Red Hat File Systems
> 
> Signed-off-by: Bob Peterson <rpeterso at redhat.com> 
> 
> Bouncing locks in a cluster is slow in GFS2
> --
>  fs/gfs2/glock.c  |   89 ++++++++++++++++++++++++++++++++++++++++--------------
>  fs/gfs2/glock.h  |    6 ++++
>  fs/gfs2/glops.c  |    2 -
>  fs/gfs2/incore.h |    2 +-
>  4 files changed, 73 insertions(+), 26 deletions(-)
> 
> diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
> index c75d499..117d8e2 100644
> --- a/fs/gfs2/glock.c
> +++ b/fs/gfs2/glock.c
> @@ -58,7 +58,6 @@ static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl);
>  static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target);
>  
>  static struct dentry *gfs2_root;
> -static struct workqueue_struct *glock_workqueue;
>  struct workqueue_struct *gfs2_delete_workqueue;
>  static LIST_HEAD(lru_list);
>  static atomic_t lru_count = ATOMIC_INIT(0);
> @@ -67,9 +66,23 @@ static DEFINE_SPINLOCK(lru_lock);
>  #define GFS2_GL_HASH_SHIFT      15
>  #define GFS2_GL_HASH_SIZE       (1 << GFS2_GL_HASH_SHIFT)
>  #define GFS2_GL_HASH_MASK       (GFS2_GL_HASH_SIZE - 1)
> +#define GL_WORKQUEUES            0x2
> +#define GL_WQ_MASK               0x1
>  
>  static struct hlist_bl_head gl_hash_table[GFS2_GL_HASH_SIZE];
>  static struct dentry *gfs2_root;
> +static struct workqueue_struct *glock_workqueue[GL_WORKQUEUES];
> +
> +static inline int qwork(struct gfs2_glock *gl, unsigned long delay)
> +{
> +	struct workqueue_struct *wq;
> +
> +	wq = glock_workqueue[gl->gl_name.ln_type & GL_WQ_MASK];
> +
> +	if (gl->gl_name.ln_type != LM_TYPE_INODE)
> +		delay = 0;
> +	return queue_delayed_work(wq, &gl->gl_work, delay);
> +}
>  
>  /**
>   * gl_hash() - Turn glock number into hash bucket number
> @@ -407,6 +420,10 @@ static void state_change(struct gfs2_glock *gl, unsigned int new_state)
>  	if (held1 && held2 && list_empty(&gl->gl_holders))
>  		clear_bit(GLF_QUEUED, &gl->gl_flags);
>  
> +	if (new_state != gl->gl_target)
> +		/* shorten our minimum hold time */
> +		gl->gl_hold_time = max(gl->gl_hold_time - GL_GLOCK_HOLD_DECR,
> +				       GL_GLOCK_MIN_HOLD);
>  	gl->gl_state = new_state;
>  	gl->gl_tchange = jiffies;
>  }
> @@ -550,7 +567,7 @@ __acquires(&gl->gl_spin)
>  		GLOCK_BUG_ON(gl, ret);
>  	} else { /* lock_nolock */
>  		finish_xmote(gl, target);
> -		if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
> +		if (qwork(gl, 0) == 0)
>  			gfs2_glock_put(gl);
>  	}
>  
> @@ -623,7 +640,7 @@ out_sched:
>  	clear_bit(GLF_LOCK, &gl->gl_flags);
>  	smp_mb__after_clear_bit();
>  	gfs2_glock_hold(gl);
> -	if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
> +	if (qwork(gl, 0) == 0)
>  		gfs2_glock_put_nolock(gl);
>  	return;
>  
> @@ -670,15 +687,14 @@ static void glock_work_func(struct work_struct *work)
>  	    gl->gl_state != LM_ST_UNLOCKED &&
>  	    gl->gl_demote_state != LM_ST_EXCLUSIVE) {
>  		unsigned long holdtime, now = jiffies;
> -		holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time;
> +		holdtime = gl->gl_tchange + gl->gl_hold_time;
>  		if (time_before(now, holdtime))
>  			delay = holdtime - now;
>  		set_bit(delay ? GLF_PENDING_DEMOTE : GLF_DEMOTE, &gl->gl_flags);
>  	}
>  	run_queue(gl, 0);
>  	spin_unlock(&gl->gl_spin);
> -	if (!delay ||
> -	    queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0)
> +	if (!delay || qwork(gl, delay) == 0)
>  		gfs2_glock_put(gl);
>  	if (drop_ref)
>  		gfs2_glock_put(gl);
> @@ -741,6 +757,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
>  	gl->gl_tchange = jiffies;
>  	gl->gl_object = NULL;
>  	gl->gl_sbd = sdp;
> +	gl->gl_hold_time = GL_GLOCK_DFT_HOLD;
>  	INIT_DELAYED_WORK(&gl->gl_work, glock_work_func);
>  	INIT_WORK(&gl->gl_delete, delete_work_func);
>  
> @@ -852,8 +869,15 @@ static int gfs2_glock_demote_wait(void *word)
>  
>  static void wait_on_holder(struct gfs2_holder *gh)
>  {
> +	unsigned long time1 = jiffies;
> +
>  	might_sleep();
>  	wait_on_bit(&gh->gh_iflags, HIF_WAIT, gfs2_glock_holder_wait, TASK_UNINTERRUPTIBLE);
> +	if (time_after(jiffies, time1 + HZ)) /* have we waited > a second? */
> +		/* Lengthen the minimum hold time. */
> +		gh->gh_gl->gl_hold_time = min(gh->gh_gl->gl_hold_time +
> +					      GL_GLOCK_HOLD_INCR,
> +					      GL_GLOCK_MAX_HOLD);
>  }
>  
>  static void wait_on_demote(struct gfs2_glock *gl)
> @@ -1087,8 +1111,8 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
>  	gfs2_glock_hold(gl);
>  	if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
>  	    !test_bit(GLF_DEMOTE, &gl->gl_flags))
> -		delay = gl->gl_ops->go_min_hold_time;
> -	if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0)
> +		delay = gl->gl_hold_time;
> +	if (qwork(gl, delay) == 0)
>  		gfs2_glock_put(gl);
>  }
>  
> @@ -1270,18 +1294,18 @@ void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state)
>  	unsigned long now = jiffies;
>  
>  	gfs2_glock_hold(gl);
> -	holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time;
> +	holdtime = gl->gl_tchange + gl->gl_hold_time;
>  	if (test_bit(GLF_QUEUED, &gl->gl_flags)) {
>  		if (time_before(now, holdtime))
>  			delay = holdtime - now;
>  		if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags))
> -			delay = gl->gl_ops->go_min_hold_time;
> +			delay = gl->gl_hold_time;
>  	}
>  
>  	spin_lock(&gl->gl_spin);
>  	handle_callback(gl, state, delay);
>  	spin_unlock(&gl->gl_spin);
> -	if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0)
> +	if (qwork(gl, delay) == 0)
>  		gfs2_glock_put(gl);
>  }
>  
> @@ -1343,7 +1367,7 @@ void gfs2_glock_complete(struct gfs2_glock *gl, int ret)
>  	set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
>  	smp_wmb();
>  	gfs2_glock_hold(gl);
> -	if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
> +	if (qwork(gl, 0) == 0)
>  		gfs2_glock_put(gl);
>  }
>  
> @@ -1379,7 +1403,7 @@ static int gfs2_shrink_glock_memory(struct shrinker *shrink, int nr, gfp_t gfp_m
>  			}
>  			clear_bit(GLF_LOCK, &gl->gl_flags);
>  			smp_mb__after_clear_bit();
> -			if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
> +			if (qwork(gl, 0) == 0)
>  				gfs2_glock_put_nolock(gl);
>  			spin_unlock(&gl->gl_spin);
>  			spin_lock(&lru_lock);
> @@ -1447,7 +1471,7 @@ static void thaw_glock(struct gfs2_glock *gl)
>  		return;
>  	set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
>  	gfs2_glock_hold(gl);
> -	if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
> +	if (qwork(gl, 0) == 0)
>  		gfs2_glock_put(gl);
>  }
>  
> @@ -1471,7 +1495,7 @@ static void clear_glock(struct gfs2_glock *gl)
>  		handle_callback(gl, LM_ST_UNLOCKED, 0);
>  	spin_unlock(&gl->gl_spin);
>  	gfs2_glock_hold(gl);
> -	if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
> +	if (qwork(gl, 0) == 0)
>  		gfs2_glock_put(gl);
>  }
>  
> @@ -1510,8 +1534,11 @@ static void dump_glock_func(struct gfs2_glock *gl)
>  
>  void gfs2_gl_hash_clear(struct gfs2_sbd *sdp)
>  {
> +	unsigned int x;
> +
>  	glock_hash_walk(clear_glock, sdp);
> -	flush_workqueue(glock_workqueue);
> +	for (x = 0; x < GL_WORKQUEUES; x++)
> +		flush_workqueue(glock_workqueue[x]);
>  	wait_event(sdp->sd_glock_wait, atomic_read(&sdp->sd_glock_disposal) == 0);
>  	glock_hash_walk(dump_glock_func, sdp);
>  }
> @@ -1658,7 +1685,7 @@ static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl)
>  	dtime *= 1000000/HZ; /* demote time in uSec */
>  	if (!test_bit(GLF_DEMOTE, &gl->gl_flags))
>  		dtime = 0;
> -	gfs2_print_dbg(seq, "G:  s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d r:%d\n",
> +	gfs2_print_dbg(seq, "G:  s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d r:%d m:%ld\n",
>  		  state2str(gl->gl_state),
>  		  gl->gl_name.ln_type,
>  		  (unsigned long long)gl->gl_name.ln_number,
> @@ -1666,7 +1693,7 @@ static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl)
>  		  state2str(gl->gl_target),
>  		  state2str(gl->gl_demote_state), dtime,
>  		  atomic_read(&gl->gl_ail_count),
> -		  atomic_read(&gl->gl_ref));
> +		  atomic_read(&gl->gl_ref), gl->gl_hold_time);
>  
>  	list_for_each_entry(gh, &gl->gl_holders, gh_list) {
>  		error = dump_holder(seq, gh);
> @@ -1685,19 +1712,32 @@ out:
>  int __init gfs2_glock_init(void)
>  {
>  	unsigned i;
> +	char qn[32];
> +
>  	for(i = 0; i < GFS2_GL_HASH_SIZE; i++) {
>  		INIT_HLIST_BL_HEAD(&gl_hash_table[i]);
>  	}
>  
> -	glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM |
> +	for (i = 0; i < GL_WORKQUEUES; i++) {
> +		sprintf(qn, "gfs2workq%d", i);
> +		glock_workqueue[i] = alloc_workqueue(qn, WQ_MEM_RECLAIM |
>  					  WQ_HIGHPRI | WQ_FREEZEABLE, 0);
> -	if (IS_ERR(glock_workqueue))
> -		return PTR_ERR(glock_workqueue);
> +		if (IS_ERR(glock_workqueue[i])) {
> +			int error = PTR_ERR(glock_workqueue[i]);
> +
> +			while (i > 0) {
> +				i--;
> +				destroy_workqueue(glock_workqueue[i]);
> +			}
> +			return error;
> +		}
> +	}
>  	gfs2_delete_workqueue = alloc_workqueue("delete_workqueue",
>  						WQ_MEM_RECLAIM | WQ_FREEZEABLE,
>  						0);
>  	if (IS_ERR(gfs2_delete_workqueue)) {
> -		destroy_workqueue(glock_workqueue);
> +		for (i = 0; i < GL_WORKQUEUES; i++)
> +			destroy_workqueue(glock_workqueue[i]);
>  		return PTR_ERR(gfs2_delete_workqueue);
>  	}
>  
> @@ -1708,9 +1748,12 @@ int __init gfs2_glock_init(void)
>  
>  void gfs2_glock_exit(void)
>  {
> +	int i;
> +
>  	unregister_shrinker(&glock_shrinker);
> -	destroy_workqueue(glock_workqueue);
>  	destroy_workqueue(gfs2_delete_workqueue);
> +	for (i = 0; i < GL_WORKQUEUES; i++)
> +		destroy_workqueue(glock_workqueue[i]);
>  }
>  
>  static inline struct gfs2_glock *glock_hash_chain(unsigned hash)
> diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
> index afa8bfe..3233add 100644
> --- a/fs/gfs2/glock.h
> +++ b/fs/gfs2/glock.h
> @@ -113,6 +113,12 @@ enum {
>  
>  #define GLR_TRYFAILED		13
>  
> +#define GL_GLOCK_MAX_HOLD        (long)(HZ / 5)
> +#define GL_GLOCK_DFT_HOLD        (long)(HZ / 5)
> +#define GL_GLOCK_MIN_HOLD        (long)(0)
> +#define GL_GLOCK_HOLD_INCR       (long)(HZ / 20)
> +#define GL_GLOCK_HOLD_DECR       (long)(HZ / 40)
> +
>  struct lm_lockops {
>  	const char *lm_proto_name;
>  	int (*lm_mount) (struct gfs2_sbd *sdp, const char *fsname);
> diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
> index ac5fac9..bba125e 100644
> --- a/fs/gfs2/glops.c
> +++ b/fs/gfs2/glops.c
> @@ -399,7 +399,6 @@ const struct gfs2_glock_operations gfs2_inode_glops = {
>  	.go_lock = inode_go_lock,
>  	.go_dump = inode_go_dump,
>  	.go_type = LM_TYPE_INODE,
> -	.go_min_hold_time = HZ / 5,
>  	.go_flags = GLOF_ASPACE,
>  };
>  
> @@ -410,7 +409,6 @@ const struct gfs2_glock_operations gfs2_rgrp_glops = {
>  	.go_unlock = rgrp_go_unlock,
>  	.go_dump = gfs2_rgrp_dump,
>  	.go_type = LM_TYPE_RGRP,
> -	.go_min_hold_time = HZ / 5,
>  	.go_flags = GLOF_ASPACE,
>  };
>  
> diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
> index 720c1e6..f21f075 100644
> --- a/fs/gfs2/incore.h
> +++ b/fs/gfs2/incore.h
> @@ -163,7 +163,6 @@ struct gfs2_glock_operations {
>  	int (*go_dump)(struct seq_file *seq, const struct gfs2_glock *gl);
>  	void (*go_callback) (struct gfs2_glock *gl);
>  	const int go_type;
> -	const unsigned long go_min_hold_time;
>  	const unsigned long go_flags;
>  #define GLOF_ASPACE 1
>  };
> @@ -237,6 +236,7 @@ struct gfs2_glock {
>  	struct delayed_work gl_work;
>  	struct work_struct gl_delete;
>  	struct rcu_head gl_rcu;
> +	long gl_hold_time;
>  };
>  
>  #define GFS2_MIN_LVB_SIZE 32	/* Min size of LVB that gfs2 supports */
>