[Cluster-devel] [GFS2 PATCH 09/10] gfs2: fix deadlock in gfs2_ail1_empty withdraw
Bob Peterson
rpeterso at redhat.com
Tue Jul 13 18:09:57 UTC 2021
Before this patch, function gfs2_ail1_empty could issue a file system
withdraw when IO errors were discovered. However, there are several
callers, including gfs2_flush_revokes() which holds the gfs2_log_lock
before calling gfs2_ail1_empty. If gfs2_ail1_empty needed to withdraw
it would leave the gfs2_log_lock held, which resulted in a deadlock
due to other processes that needed the log_lock.
This patch moves the withdraw out of function gfs2_ail1_empty and
makes each of the callers check for a withdraw by calling new function
check_ail1_withdraw. Function gfs2_flush_revokes now does this check
after releasing the gfs2_log_lock to avoid the deadlock.
Signed-off-by: Bob Peterson <rpeterso at redhat.com>
---
fs/gfs2/log.c | 22 ++++++++++++++++------
1 file changed, 16 insertions(+), 6 deletions(-)
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index bd2ff5ef4b91..7e0ac87f7d71 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -379,11 +379,6 @@ static int gfs2_ail1_empty(struct gfs2_sbd *sdp, int max_revokes)
ret = list_empty(&sdp->sd_ail1_list);
spin_unlock(&sdp->sd_ail_lock);
- if (test_bit(SDF_WITHDRAWING, &sdp->sd_flags)) {
- gfs2_lm(sdp, "fatal: I/O error(s)\n");
- gfs2_withdraw(sdp);
- }
-
return ret;
}
@@ -801,6 +796,15 @@ void gfs2_glock_remove_revoke(struct gfs2_glock *gl)
}
}
+static void check_ail1_withdraw(struct gfs2_sbd *sdp)
+{
+ if (!test_bit(SDF_WITHDRAWING, &sdp->sd_flags))
+ return;
+
+ gfs2_lm(sdp, "fatal: I/O error(s)\n");
+ gfs2_withdraw(sdp);
+}
+
/**
* gfs2_flush_revokes - Add as many revokes to the system transaction as we can
* @sdp: The GFS2 superblock
@@ -821,6 +825,7 @@ void gfs2_flush_revokes(struct gfs2_sbd *sdp)
gfs2_log_lock(sdp);
gfs2_ail1_empty(sdp, max_revokes);
gfs2_log_unlock(sdp);
+ check_ail1_withdraw(sdp);
}
/**
@@ -982,6 +987,7 @@ void gfs2_ail_drain(struct gfs2_sbd *sdp)
static void empty_ail1_list(struct gfs2_sbd *sdp)
{
unsigned long start = jiffies;
+ int empty;
for (;;) {
if (time_after(jiffies, start + (HZ * 600))) {
@@ -992,7 +998,9 @@ static void empty_ail1_list(struct gfs2_sbd *sdp)
}
gfs2_ail1_start(sdp);
gfs2_ail1_wait(sdp);
- if (gfs2_ail1_empty(sdp, 0))
+ empty = gfs2_ail1_empty(sdp, 0);
+ check_ail1_withdraw(sdp);
+ if (empty)
return;
}
}
@@ -1364,6 +1372,7 @@ int gfs2_logd(void *data)
if (gfs2_jrnl_flush_reqd(sdp) || t == 0) {
gfs2_ail1_empty(sdp, 0);
+ check_ail1_withdraw(sdp);
gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
GFS2_LFC_LOGD_JFLUSH_REQD);
}
@@ -1372,6 +1381,7 @@ int gfs2_logd(void *data)
gfs2_ail1_start(sdp);
gfs2_ail1_wait(sdp);
gfs2_ail1_empty(sdp, 0);
+ check_ail1_withdraw(sdp);
gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
GFS2_LFC_LOGD_AIL_FLUSH_REQD);
}
--
2.31.1
More information about the Cluster-devel
mailing list