[Cluster-devel] [PATCH 17/20] gfs2: Smarter iopen glock waiting

Mon Jun 8 16:00:36 UTC 2020

When trying to upgrade the iopen glock from a shared to an exclusive lock in
gfs2_evict_inode, abort the wait if there is contention on the corresponding
inode glock: in that case, the inode must still be in active use on another
node, and we're not guaranteed to get the iopen glock anytime soon.

To make this work even better, when we notice contention on the iopen glock and
we can't evict the corresponsing inode and release the iopen glock immediately,
poke the inode glock.  The other node(s) trying to acquire the lock can then
abort instead of timing out.

Thanks to Heinz Mauelshagen for pointing out a locking bug in a previous
version of this patch.

Signed-off-by: Andreas Gruenbacher <agruenba at redhat.com>
---
 fs/gfs2/glock.c | 34 ++++++++++++++++++++++++++++++++--
 fs/gfs2/super.c | 11 ++++++++---
 2 files changed, 40 insertions(+), 5 deletions(-)

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 7ad06dd49352..71091e35f83d 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -783,6 +783,17 @@ bool gfs2_inode_already_deleted(struct gfs2_glock *gl, u64 generation)
 	return generation <= be64_to_cpu(ri->ri_generation_deleted);
 }
 
+static void gfs2_glock_poke(struct gfs2_glock *gl)
+{
+	int flags = LM_FLAG_TRY_1CB | LM_FLAG_ANY | GL_SKIP;
+	struct gfs2_holder gh;
+	int error;
+
+	error = gfs2_glock_nq_init(gl, LM_ST_SHARED, flags, &gh);
+	if (!error)
+		gfs2_glock_dq(&gh);
+}
+
 static bool gfs2_try_evict(struct gfs2_glock *gl)
 {
 	struct gfs2_inode *ip;
@@ -804,6 +815,8 @@ static bool gfs2_try_evict(struct gfs2_glock *gl)
 		ip = NULL;
 	spin_unlock(&gl->gl_lockref.lock);
 	if (ip) {
+		struct gfs2_glock *inode_gl = NULL;
+
 		gl->gl_no_formal_ino = ip->i_no_formal_ino;
 		set_bit(GIF_DEFERRED_DELETE, &ip->i_flags);
 		d_prune_aliases(&ip->i_inode);
@@ -812,9 +825,16 @@ static bool gfs2_try_evict(struct gfs2_glock *gl)
 		/* If the inode was evicted, gl->gl_object will now be NULL. */
 		spin_lock(&gl->gl_lockref.lock);
 		ip = gl->gl_object;
-		if (ip)
+		if (ip) {
+			inode_gl = ip->i_gl;
+			lockref_get(&inode_gl->gl_lockref);
 			clear_bit(GIF_DEFERRED_DELETE, &ip->i_flags);
+		}
 		spin_unlock(&gl->gl_lockref.lock);
+		if (inode_gl) {
+			gfs2_glock_poke(inode_gl);
+			gfs2_glock_put(inode_gl);
+		}
 		evicted = !ip;
 	}
 	return evicted;
@@ -845,12 +865,22 @@ static void delete_work_func(struct work_struct *work)
 		 * has happened.  Otherwise, if we cause contention on the inode glock
 		 * immediately, the remote node will think that we still have
 		 * the inode in use, and so it will give up waiting.
+		 *
+		 * If we can't evict the inode, signal to the remote node that
+		 * the inode is still in use.  We'll later try to delete the
+		 * inode locally in gfs2_evict_inode.
+		 *
+		 * FIXME: We only need to verify that the remote node has
+		 * deleted the inode because nodes before this remote delete
+		 * rework won't cooperate.  At a later time, when we no longer
+		 * care about compatibility with such nodes, we can skip this
+		 * step entirely.
 		 */
 		if (gfs2_try_evict(gl)) {
 			if (gfs2_queue_delete_work(gl, 5 * HZ))
 				return;
-			goto out;
 		}
+		goto out;
 	}
 
 	inode = gfs2_lookup_by_inum(sdp, no_addr, gl->gl_no_formal_ino,
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index bcb56f13f50a..32d8d26126a1 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1273,8 +1273,12 @@ static bool gfs2_upgrade_iopen_glock(struct inode *inode)
 	 * If there are no other lock holders, we'll get the lock immediately.
 	 * Otherwise, the other nodes holding the lock will be notified about
 	 * our locking request.  If they don't have the inode open, they'll
-	 * evict the cached inode and release the lock.  As a last resort,
-	 * we'll eventually time out.
+	 * evict the cached inode and release the lock.  Otherwise, if they
+	 * poke the inode glock, we'll take this as an indication that they
+	 * still need the iopen glock and that they'll take care of deleting
+	 * the inode when they're done.  As a last resort, if another node
+	 * keeps holding the iopen glock without showing any activity on the
+	 * inode glock, we'll eventually time out.
 	 *
 	 * Note that we're passing the LM_FLAG_TRY_1CB flag to the first
 	 * locking request as an optimization to notify lock holders as soon as
@@ -1293,7 +1297,8 @@ static bool gfs2_upgrade_iopen_glock(struct inode *inode)
 		return false;
 
 	timeout = wait_event_interruptible_timeout(sdp->sd_async_glock_wait,
-		!test_bit(HIF_WAIT, &gh->gh_iflags),
+		!test_bit(HIF_WAIT, &gh->gh_iflags) ||
+		test_bit(GLF_DEMOTE, &ip->i_gl->gl_flags),
 		timeout);
 	if (!test_bit(HIF_HOLDER, &gh->gh_iflags)) {
 		gfs2_glock_dq(gh);
-- 
2.26.2