[Cluster-devel] [PATCH][GFS2] Bouncing locks in a cluster is slow in GFS2
Bob Peterson
rpeterso at redhat.com
Wed Jan 26 20:22:18 UTC 2011
Hi,
This patch is a performance improvement for GFS2 in a clustered
environment. It makes the glock hold time self-adjusting.
Regards,
Bob Peterson
Red Hat File Systems
Signed-off-by: Bob Peterson <rpeterso at redhat.com>
Bouncing locks in a cluster is slow in GFS2
--
fs/gfs2/glock.c | 89 ++++++++++++++++++++++++++++++++++++++++--------------
fs/gfs2/glock.h | 6 ++++
fs/gfs2/glops.c | 2 -
fs/gfs2/incore.h | 2 +-
4 files changed, 73 insertions(+), 26 deletions(-)
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index c75d499..117d8e2 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -58,7 +58,6 @@ static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl);
static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target);
static struct dentry *gfs2_root;
-static struct workqueue_struct *glock_workqueue;
struct workqueue_struct *gfs2_delete_workqueue;
static LIST_HEAD(lru_list);
static atomic_t lru_count = ATOMIC_INIT(0);
@@ -67,9 +66,23 @@ static DEFINE_SPINLOCK(lru_lock);
#define GFS2_GL_HASH_SHIFT 15
#define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT)
#define GFS2_GL_HASH_MASK (GFS2_GL_HASH_SIZE - 1)
+#define GL_WORKQUEUES 0x2
+#define GL_WQ_MASK 0x1
static struct hlist_bl_head gl_hash_table[GFS2_GL_HASH_SIZE];
static struct dentry *gfs2_root;
+static struct workqueue_struct *glock_workqueue[GL_WORKQUEUES];
+
+static inline int qwork(struct gfs2_glock *gl, unsigned long delay)
+{
+ struct workqueue_struct *wq;
+
+ wq = glock_workqueue[gl->gl_name.ln_type & GL_WQ_MASK];
+
+ if (gl->gl_name.ln_type != LM_TYPE_INODE)
+ delay = 0;
+ return queue_delayed_work(wq, &gl->gl_work, delay);
+}
/**
* gl_hash() - Turn glock number into hash bucket number
@@ -407,6 +420,10 @@ static void state_change(struct gfs2_glock *gl, unsigned int new_state)
if (held1 && held2 && list_empty(&gl->gl_holders))
clear_bit(GLF_QUEUED, &gl->gl_flags);
+ if (new_state != gl->gl_target)
+ /* shorten our minimum hold time */
+ gl->gl_hold_time = max(gl->gl_hold_time - GL_GLOCK_HOLD_DECR,
+ GL_GLOCK_MIN_HOLD);
gl->gl_state = new_state;
gl->gl_tchange = jiffies;
}
@@ -550,7 +567,7 @@ __acquires(&gl->gl_spin)
GLOCK_BUG_ON(gl, ret);
} else { /* lock_nolock */
finish_xmote(gl, target);
- if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
+ if (qwork(gl, 0) == 0)
gfs2_glock_put(gl);
}
@@ -623,7 +640,7 @@ out_sched:
clear_bit(GLF_LOCK, &gl->gl_flags);
smp_mb__after_clear_bit();
gfs2_glock_hold(gl);
- if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
+ if (qwork(gl, 0) == 0)
gfs2_glock_put_nolock(gl);
return;
@@ -670,15 +687,14 @@ static void glock_work_func(struct work_struct *work)
gl->gl_state != LM_ST_UNLOCKED &&
gl->gl_demote_state != LM_ST_EXCLUSIVE) {
unsigned long holdtime, now = jiffies;
- holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time;
+ holdtime = gl->gl_tchange + gl->gl_hold_time;
if (time_before(now, holdtime))
delay = holdtime - now;
set_bit(delay ? GLF_PENDING_DEMOTE : GLF_DEMOTE, &gl->gl_flags);
}
run_queue(gl, 0);
spin_unlock(&gl->gl_spin);
- if (!delay ||
- queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0)
+ if (!delay || qwork(gl, delay) == 0)
gfs2_glock_put(gl);
if (drop_ref)
gfs2_glock_put(gl);
@@ -741,6 +757,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
gl->gl_tchange = jiffies;
gl->gl_object = NULL;
gl->gl_sbd = sdp;
+ gl->gl_hold_time = GL_GLOCK_DFT_HOLD;
INIT_DELAYED_WORK(&gl->gl_work, glock_work_func);
INIT_WORK(&gl->gl_delete, delete_work_func);
@@ -852,8 +869,15 @@ static int gfs2_glock_demote_wait(void *word)
static void wait_on_holder(struct gfs2_holder *gh)
{
+ unsigned long time1 = jiffies;
+
might_sleep();
wait_on_bit(&gh->gh_iflags, HIF_WAIT, gfs2_glock_holder_wait, TASK_UNINTERRUPTIBLE);
+ if (time_after(jiffies, time1 + HZ)) /* have we waited > a second? */
+ /* Lengthen the minimum hold time. */
+ gh->gh_gl->gl_hold_time = min(gh->gh_gl->gl_hold_time +
+ GL_GLOCK_HOLD_INCR,
+ GL_GLOCK_MAX_HOLD);
}
static void wait_on_demote(struct gfs2_glock *gl)
@@ -1087,8 +1111,8 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
gfs2_glock_hold(gl);
if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
!test_bit(GLF_DEMOTE, &gl->gl_flags))
- delay = gl->gl_ops->go_min_hold_time;
- if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0)
+ delay = gl->gl_hold_time;
+ if (qwork(gl, delay) == 0)
gfs2_glock_put(gl);
}
@@ -1270,18 +1294,18 @@ void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state)
unsigned long now = jiffies;
gfs2_glock_hold(gl);
- holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time;
+ holdtime = gl->gl_tchange + gl->gl_hold_time;
if (test_bit(GLF_QUEUED, &gl->gl_flags)) {
if (time_before(now, holdtime))
delay = holdtime - now;
if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags))
- delay = gl->gl_ops->go_min_hold_time;
+ delay = gl->gl_hold_time;
}
spin_lock(&gl->gl_spin);
handle_callback(gl, state, delay);
spin_unlock(&gl->gl_spin);
- if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0)
+ if (qwork(gl, delay) == 0)
gfs2_glock_put(gl);
}
@@ -1343,7 +1367,7 @@ void gfs2_glock_complete(struct gfs2_glock *gl, int ret)
set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
smp_wmb();
gfs2_glock_hold(gl);
- if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
+ if (qwork(gl, 0) == 0)
gfs2_glock_put(gl);
}
@@ -1379,7 +1403,7 @@ static int gfs2_shrink_glock_memory(struct shrinker *shrink, int nr, gfp_t gfp_m
}
clear_bit(GLF_LOCK, &gl->gl_flags);
smp_mb__after_clear_bit();
- if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
+ if (qwork(gl, 0) == 0)
gfs2_glock_put_nolock(gl);
spin_unlock(&gl->gl_spin);
spin_lock(&lru_lock);
@@ -1447,7 +1471,7 @@ static void thaw_glock(struct gfs2_glock *gl)
return;
set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
gfs2_glock_hold(gl);
- if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
+ if (qwork(gl, 0) == 0)
gfs2_glock_put(gl);
}
@@ -1471,7 +1495,7 @@ static void clear_glock(struct gfs2_glock *gl)
handle_callback(gl, LM_ST_UNLOCKED, 0);
spin_unlock(&gl->gl_spin);
gfs2_glock_hold(gl);
- if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
+ if (qwork(gl, 0) == 0)
gfs2_glock_put(gl);
}
@@ -1510,8 +1534,11 @@ static void dump_glock_func(struct gfs2_glock *gl)
void gfs2_gl_hash_clear(struct gfs2_sbd *sdp)
{
+ unsigned int x;
+
glock_hash_walk(clear_glock, sdp);
- flush_workqueue(glock_workqueue);
+ for (x = 0; x < GL_WORKQUEUES; x++)
+ flush_workqueue(glock_workqueue[x]);
wait_event(sdp->sd_glock_wait, atomic_read(&sdp->sd_glock_disposal) == 0);
glock_hash_walk(dump_glock_func, sdp);
}
@@ -1658,7 +1685,7 @@ static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl)
dtime *= 1000000/HZ; /* demote time in uSec */
if (!test_bit(GLF_DEMOTE, &gl->gl_flags))
dtime = 0;
- gfs2_print_dbg(seq, "G: s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d r:%d\n",
+ gfs2_print_dbg(seq, "G: s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d r:%d m:%ld\n",
state2str(gl->gl_state),
gl->gl_name.ln_type,
(unsigned long long)gl->gl_name.ln_number,
@@ -1666,7 +1693,7 @@ static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl)
state2str(gl->gl_target),
state2str(gl->gl_demote_state), dtime,
atomic_read(&gl->gl_ail_count),
- atomic_read(&gl->gl_ref));
+ atomic_read(&gl->gl_ref), gl->gl_hold_time);
list_for_each_entry(gh, &gl->gl_holders, gh_list) {
error = dump_holder(seq, gh);
@@ -1685,19 +1712,32 @@ out:
int __init gfs2_glock_init(void)
{
unsigned i;
+ char qn[32];
+
for(i = 0; i < GFS2_GL_HASH_SIZE; i++) {
INIT_HLIST_BL_HEAD(&gl_hash_table[i]);
}
- glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM |
+ for (i = 0; i < GL_WORKQUEUES; i++) {
+ sprintf(qn, "gfs2workq%d", i);
+ glock_workqueue[i] = alloc_workqueue(qn, WQ_MEM_RECLAIM |
WQ_HIGHPRI | WQ_FREEZEABLE, 0);
- if (IS_ERR(glock_workqueue))
- return PTR_ERR(glock_workqueue);
+ if (IS_ERR(glock_workqueue[i])) {
+ int error = PTR_ERR(glock_workqueue[i]);
+
+ while (i > 0) {
+ i--;
+ destroy_workqueue(glock_workqueue[i]);
+ }
+ return error;
+ }
+ }
gfs2_delete_workqueue = alloc_workqueue("delete_workqueue",
WQ_MEM_RECLAIM | WQ_FREEZEABLE,
0);
if (IS_ERR(gfs2_delete_workqueue)) {
- destroy_workqueue(glock_workqueue);
+ for (i = 0; i < GL_WORKQUEUES; i++)
+ destroy_workqueue(glock_workqueue[i]);
return PTR_ERR(gfs2_delete_workqueue);
}
@@ -1708,9 +1748,12 @@ int __init gfs2_glock_init(void)
void gfs2_glock_exit(void)
{
+ int i;
+
unregister_shrinker(&glock_shrinker);
- destroy_workqueue(glock_workqueue);
destroy_workqueue(gfs2_delete_workqueue);
+ for (i = 0; i < GL_WORKQUEUES; i++)
+ destroy_workqueue(glock_workqueue[i]);
}
static inline struct gfs2_glock *glock_hash_chain(unsigned hash)
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index afa8bfe..3233add 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -113,6 +113,12 @@ enum {
#define GLR_TRYFAILED 13
+#define GL_GLOCK_MAX_HOLD (long)(HZ / 5)
+#define GL_GLOCK_DFT_HOLD (long)(HZ / 5)
+#define GL_GLOCK_MIN_HOLD (long)(0)
+#define GL_GLOCK_HOLD_INCR (long)(HZ / 20)
+#define GL_GLOCK_HOLD_DECR (long)(HZ / 40)
+
struct lm_lockops {
const char *lm_proto_name;
int (*lm_mount) (struct gfs2_sbd *sdp, const char *fsname);
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index ac5fac9..bba125e 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -399,7 +399,6 @@ const struct gfs2_glock_operations gfs2_inode_glops = {
.go_lock = inode_go_lock,
.go_dump = inode_go_dump,
.go_type = LM_TYPE_INODE,
- .go_min_hold_time = HZ / 5,
.go_flags = GLOF_ASPACE,
};
@@ -410,7 +409,6 @@ const struct gfs2_glock_operations gfs2_rgrp_glops = {
.go_unlock = rgrp_go_unlock,
.go_dump = gfs2_rgrp_dump,
.go_type = LM_TYPE_RGRP,
- .go_min_hold_time = HZ / 5,
.go_flags = GLOF_ASPACE,
};
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 720c1e6..f21f075 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -163,7 +163,6 @@ struct gfs2_glock_operations {
int (*go_dump)(struct seq_file *seq, const struct gfs2_glock *gl);
void (*go_callback) (struct gfs2_glock *gl);
const int go_type;
- const unsigned long go_min_hold_time;
const unsigned long go_flags;
#define GLOF_ASPACE 1
};
@@ -237,6 +236,7 @@ struct gfs2_glock {
struct delayed_work gl_work;
struct work_struct gl_delete;
struct rcu_head gl_rcu;
+ long gl_hold_time;
};
#define GFS2_MIN_LVB_SIZE 32 /* Min size of LVB that gfs2 supports */
More information about the Cluster-devel
mailing list