[Cluster-devel] [GFS2 PATCH 2/2] gfs2: Use async glocks for rename
Bob Peterson
rpeterso at redhat.com
Thu Aug 15 13:27:22 UTC 2019
Because s_vfs_rename_mutex is not cluster-wide, multiple nodes can
reverse the roles of which directories are "old" and which are "new"
for the purposes of rename. This can cause deadlocks where two nodes
can lock old-then-new but since their roles are reversed, they wait
for each other.
This patch fixes the problem by acquiring all gfs2_rename's inode
glocks asychronously and waits for all glocks to be acquired.
That way all inodes are locked regardless of the order.
Signed-off-by: Bob Peterson <rpeterso at redhat.com>
---
fs/gfs2/glock.c | 40 ++++++++++++++++++++++++++++++++++++++++
fs/gfs2/glock.h | 1 +
fs/gfs2/incore.h | 1 +
fs/gfs2/inode.c | 13 +++++++++----
fs/gfs2/ops_fstype.c | 1 +
5 files changed, 52 insertions(+), 4 deletions(-)
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index a27dbd3dec01..3334101c9921 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -305,6 +305,11 @@ static void gfs2_holder_wake(struct gfs2_holder *gh)
clear_bit(HIF_WAIT, &gh->gh_iflags);
smp_mb__after_atomic();
wake_up_bit(&gh->gh_iflags, HIF_WAIT);
+ if (gh->gh_flags & GL_ASYNC) {
+ struct gfs2_sbd *sdp = gh->gh_gl->gl_name.ln_sbd;
+
+ wake_up(&sdp->sd_async_glock_wait);
+ }
}
/**
@@ -952,6 +957,41 @@ int gfs2_glock_wait(struct gfs2_holder *gh)
return gh->gh_error;
}
+static int all_glocks_held(unsigned int num_gh, struct gfs2_holder *ghs)
+{
+ struct gfs2_glock *gl = ghs[0].gh_gl;
+ int i;
+
+ for (i = 0; i < num_gh; i++) {
+ if (test_bit(HIF_WAIT, &ghs[i].gh_iflags) ||
+ !test_bit(HIF_HOLDER, &ghs[i].gh_iflags) ||
+ gl->gl_state != ghs[i].gh_state)
+ return 0;
+ if (ghs[i].gh_error)
+ return ghs[i].gh_error;
+ }
+ return 1;
+}
+
+/**
+ * gfs2_glock_async_wait - wait on multiple asynchronous glock acquisitions
+ * @gh: the glock holder
+ *
+ * Returns: 0 on success
+ */
+
+int gfs2_glock_async_wait(unsigned int num_gh, struct gfs2_holder *ghs)
+{
+ struct gfs2_glock *gl = ghs[0].gh_gl;
+ struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
+
+ might_sleep();
+ if (!wait_event_timeout(sdp->sd_async_glock_wait,
+ all_glocks_held(num_gh, ghs), HZ * 2))
+ return -ESTALE;
+ return 0;
+}
+
/**
* handle_callback - process a demote request
* @gl: the glock
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index e4e0bed5257c..a997c42726a4 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -190,6 +190,7 @@ extern void gfs2_holder_uninit(struct gfs2_holder *gh);
extern int gfs2_glock_nq(struct gfs2_holder *gh);
extern int gfs2_glock_poll(struct gfs2_holder *gh);
extern int gfs2_glock_wait(struct gfs2_holder *gh);
+extern int gfs2_glock_async_wait(unsigned int num_gh, struct gfs2_holder *ghs);
extern void gfs2_glock_dq(struct gfs2_holder *gh);
extern void gfs2_glock_dq_wait(struct gfs2_holder *gh);
extern void gfs2_glock_dq_uninit(struct gfs2_holder *gh);
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 7a993d7c022e..6b450065b9d5 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -725,6 +725,7 @@ struct gfs2_sbd {
struct gfs2_glock *sd_freeze_gl;
struct work_struct sd_freeze_work;
wait_queue_head_t sd_glock_wait;
+ wait_queue_head_t sd_async_glock_wait;
atomic_t sd_glock_disposal;
struct completion sd_locking_init;
struct completion sd_wdack;
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 1ced4d0a3b04..cb184d6bed9b 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -1388,16 +1388,18 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
}
num_gh = 1;
- gfs2_holder_init(odip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
+ gfs2_holder_init(odip->i_gl, LM_ST_EXCLUSIVE, GL_ASYNC, ghs);
if (odip != ndip) {
- gfs2_holder_init(ndip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
+ gfs2_holder_init(ndip->i_gl, LM_ST_EXCLUSIVE,GL_ASYNC,
+ ghs + num_gh);
num_gh++;
}
- gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
+ gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ASYNC, ghs + num_gh);
num_gh++;
if (nip) {
- gfs2_holder_init(nip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
+ gfs2_holder_init(nip->i_gl, LM_ST_EXCLUSIVE, GL_ASYNC,
+ ghs + num_gh);
num_gh++;
/* grab the resource lock for unlink flag twiddling
* this is the case of the target file already existing
@@ -1414,6 +1416,9 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
if (error)
goto out_gunlock;
}
+ error = gfs2_glock_async_wait(num_gh, ghs);
+ if (error)
+ goto out_gunlock;
if (gfs2_holder_initialized(&rd_gh)) {
error = gfs2_glock_nq(&rd_gh);
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 4a8e5a7310f0..f3fd5cd9d43f 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -87,6 +87,7 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
gfs2_tune_init(&sdp->sd_tune);
init_waitqueue_head(&sdp->sd_glock_wait);
+ init_waitqueue_head(&sdp->sd_async_glock_wait);
atomic_set(&sdp->sd_glock_disposal, 0);
init_completion(&sdp->sd_locking_init);
init_completion(&sdp->sd_wdack);
--
2.21.0
More information about the Cluster-devel
mailing list