[Cluster-devel] [PATCH 7/8] gfs2: gfs2_evict_inode: Put glocks asynchronously

Andreas Gruenbacher agruenba at redhat.com
Wed May 31 15:03:11 UTC 2017


gfs2_evict_inode is called to free inodes under memory pressure.  The
function calls into DLM when an inode's last cluster-wide reference goes
away (remote unlink) and to release the glock and associated DLM lock
before finally destroying the inode.  However, if DLM is blocked on
memory to become available, calling into DLM again will deadlock.

Avoid that by decoupling releasing glocks from destroying inodes in that
case: with gfs2_glock_queue_put, glocks will be dequeued asynchronously
in work queue context, when the associated inodes have most likely
already been destroyed.

With this change, it appears that inodes can end up being unlinked,
remote-unlink can be triggered, and then the inode can be reallocated
before all remote-unlink callbacks are processed.  Revalidate the link
count in gfs2_evict_inode to make sure we're not destroying an
allocated, referenced inode.

In addition, skip remote unlinks under memory pressure; the next inode
allocation in the same resource group will take care of destroying
unlinked inodes.

Signed-off-by: Andreas Gruenbacher <agruenba at redhat.com>
---
 fs/gfs2/glock.c | 10 +++++++++-
 fs/gfs2/glock.h |  2 ++
 fs/gfs2/super.c | 30 ++++++++++++++++++++++++++++--
 3 files changed, 39 insertions(+), 3 deletions(-)

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index e6d32d2..4ba53e9 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -170,7 +170,7 @@ void gfs2_glock_free(struct gfs2_glock *gl)
  *
  */
 
-static void gfs2_glock_hold(struct gfs2_glock *gl)
+void gfs2_glock_hold(struct gfs2_glock *gl)
 {
 	GLOCK_BUG_ON(gl, __lockref_is_dead(&gl->gl_lockref));
 	lockref_get(&gl->gl_lockref);
@@ -269,6 +269,14 @@ void gfs2_glock_put(struct gfs2_glock *gl)
 	sdp->sd_lockstruct.ls_ops->lm_put_lock(gl);
 }
 
+/*
+ * Cause the glock to be put in work queue context.
+ */
+void gfs2_glock_queue_put(struct gfs2_glock *gl)
+{
+	gfs2_glock_queue_work(gl, 0);
+}
+
 /**
  * may_grant - check if its ok to grant a new lock
  * @gl: The glock
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index 9ad4a6a..33e0511 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -181,7 +181,9 @@ static inline struct address_space *gfs2_glock2aspace(struct gfs2_glock *gl)
 extern int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
 			  const struct gfs2_glock_operations *glops,
 			  int create, struct gfs2_glock **glp);
+extern void gfs2_glock_hold(struct gfs2_glock *gl);
 extern void gfs2_glock_put(struct gfs2_glock *gl);
+extern void gfs2_glock_queue_put(struct gfs2_glock *gl);
 extern void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state,
 			     u16 flags, struct gfs2_holder *gh);
 extern void gfs2_holder_reinit(unsigned int state, u16 flags,
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index c651983..ace4814 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1541,6 +1541,16 @@ static void gfs2_evict_inode(struct inode *inode)
 	if (inode->i_nlink || (sb->s_flags & MS_RDONLY))
 		goto out;
 
+	/*
+	 * If we are in shrinker context, DLM may depend on us to make
+	 * progress.  In that case, calling into DLM again would deadlock.  To
+	 * prevent that from happening, skip deallocating the inode here; it
+	 * will be deallocated when another inode is allocated in the same
+	 * resource group.
+	 */
+	if (current->flags & PF_MEMALLOC)
+		goto out;
+
 	/* Must not read inode block until block type has been verified */
 	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, &gh);
 	if (unlikely(error)) {
@@ -1561,6 +1571,12 @@ static void gfs2_evict_inode(struct inode *inode)
 			goto out_truncate;
 	}
 
+	/*
+	 * The inode may have been recreated in the meantime.
+	 */
+	if (inode->i_nlink)
+		goto out_truncate;
+
 	if (gfs2_holder_initialized(&ip->i_iopen_gh) &&
 	    test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) {
 		ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
@@ -1640,12 +1656,22 @@ static void gfs2_evict_inode(struct inode *inode)
 	glock_set_object(ip->i_gl, NULL);
 	wait_on_bit_io(&ip->i_flags, GIF_GLOP_PENDING, TASK_UNINTERRUPTIBLE);
 	gfs2_glock_add_to_lru(ip->i_gl);
-	gfs2_glock_put(ip->i_gl);
+	if (current->flags & PF_MEMALLOC)
+		gfs2_glock_queue_put(ip->i_gl);
+	else
+		gfs2_glock_put(ip->i_gl);
 	ip->i_gl = NULL;
 	if (gfs2_holder_initialized(&ip->i_iopen_gh)) {
-		glock_set_object(ip->i_iopen_gh.gh_gl, NULL);
+		struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl;
+
+		glock_set_object(gl, NULL);
 		ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
+		gfs2_glock_hold(gl);
 		gfs2_glock_dq_uninit(&ip->i_iopen_gh);
+		if (current->flags & PF_MEMALLOC)
+			gfs2_glock_queue_put(gl);
+		else
+			gfs2_glock_put(ip->i_gl);
 	}
 }
 
-- 
2.7.4




More information about the Cluster-devel mailing list