[Cluster-devel] [GFS2 PATCH 09/10] gfs2: fix deadlock in gfs2_ail1_empty withdraw

Bob Peterson rpeterso at redhat.com
Tue Jul 13 18:09:57 UTC 2021


Before this patch, function gfs2_ail1_empty could issue a file system
withdraw when IO errors were discovered. However, there are several
callers, including gfs2_flush_revokes() which holds the gfs2_log_lock
before calling gfs2_ail1_empty. If gfs2_ail1_empty needed to withdraw
it would leave the gfs2_log_lock held, which resulted in a deadlock
due to other processes that needed the log_lock.

This patch moves the withdraw out of function gfs2_ail1_empty and
makes each of the callers check for a withdraw by calling new function
check_ail1_withdraw. Function gfs2_flush_revokes now does this check
after releasing the gfs2_log_lock to avoid the deadlock.

Signed-off-by: Bob Peterson <rpeterso at redhat.com>
---
 fs/gfs2/log.c | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index bd2ff5ef4b91..7e0ac87f7d71 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -379,11 +379,6 @@ static int gfs2_ail1_empty(struct gfs2_sbd *sdp, int max_revokes)
 	ret = list_empty(&sdp->sd_ail1_list);
 	spin_unlock(&sdp->sd_ail_lock);
 
-	if (test_bit(SDF_WITHDRAWING, &sdp->sd_flags)) {
-		gfs2_lm(sdp, "fatal: I/O error(s)\n");
-		gfs2_withdraw(sdp);
-	}
-
 	return ret;
 }
 
@@ -801,6 +796,15 @@ void gfs2_glock_remove_revoke(struct gfs2_glock *gl)
 	}
 }
 
+static void check_ail1_withdraw(struct gfs2_sbd *sdp)
+{
+	if (!test_bit(SDF_WITHDRAWING, &sdp->sd_flags))
+		return;
+
+	gfs2_lm(sdp, "fatal: I/O error(s)\n");
+	gfs2_withdraw(sdp);
+}
+
 /**
  * gfs2_flush_revokes - Add as many revokes to the system transaction as we can
  * @sdp: The GFS2 superblock
@@ -821,6 +825,7 @@ void gfs2_flush_revokes(struct gfs2_sbd *sdp)
 	gfs2_log_lock(sdp);
 	gfs2_ail1_empty(sdp, max_revokes);
 	gfs2_log_unlock(sdp);
+	check_ail1_withdraw(sdp);
 }
 
 /**
@@ -982,6 +987,7 @@ void gfs2_ail_drain(struct gfs2_sbd *sdp)
 static void empty_ail1_list(struct gfs2_sbd *sdp)
 {
 	unsigned long start = jiffies;
+	int empty;
 
 	for (;;) {
 		if (time_after(jiffies, start + (HZ * 600))) {
@@ -992,7 +998,9 @@ static void empty_ail1_list(struct gfs2_sbd *sdp)
 		}
 		gfs2_ail1_start(sdp);
 		gfs2_ail1_wait(sdp);
-		if (gfs2_ail1_empty(sdp, 0))
+		empty = gfs2_ail1_empty(sdp, 0);
+		check_ail1_withdraw(sdp);
+		if (empty)
 			return;
 	}
 }
@@ -1364,6 +1372,7 @@ int gfs2_logd(void *data)
 
 		if (gfs2_jrnl_flush_reqd(sdp) || t == 0) {
 			gfs2_ail1_empty(sdp, 0);
+			check_ail1_withdraw(sdp);
 			gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
 						  GFS2_LFC_LOGD_JFLUSH_REQD);
 		}
@@ -1372,6 +1381,7 @@ int gfs2_logd(void *data)
 			gfs2_ail1_start(sdp);
 			gfs2_ail1_wait(sdp);
 			gfs2_ail1_empty(sdp, 0);
+			check_ail1_withdraw(sdp);
 			gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
 						  GFS2_LFC_LOGD_AIL_FLUSH_REQD);
 		}
-- 
2.31.1




More information about the Cluster-devel mailing list