[Cluster-devel] [GFS2 PATCH 08/10] gfs2: New log flush watchdog

Bob Peterson rpeterso at redhat.com
Tue Jul 13 18:09:56 UTC 2021


This patch adds a new watchdog whose sole purpose is to complain when
gfs2_log_flush operations are taking too long.

Signed-off-by: Bob Peterson <rpeterso at redhat.com>
---
 fs/gfs2/incore.h     |  6 ++++++
 fs/gfs2/log.c        | 47 ++++++++++++++++++++++++++++++++++++++++++++
 fs/gfs2/log.h        |  1 +
 fs/gfs2/main.c       |  8 ++++++++
 fs/gfs2/ops_fstype.c |  2 ++
 fs/gfs2/sys.c        |  6 ++++--
 6 files changed, 68 insertions(+), 2 deletions(-)

diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 6f31a067a5f2..566c0053b7c5 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -683,6 +683,8 @@ struct local_statfs_inode {
 	unsigned int si_jid; /* journal id this statfs inode corresponds to */
 };
 
+#define GFS2_LOG_FLUSH_TIMEOUT (HZ / 10) /* arbitrary: 1/10 second per page */
+
 struct gfs2_sbd {
 	struct super_block *sd_vfs;
 	struct gfs2_pcpu_lkstats __percpu *sd_lkstats;
@@ -849,6 +851,10 @@ struct gfs2_sbd {
 	unsigned long sd_last_warning;
 	struct dentry *debugfs_dir;    /* debugfs directory */
 	unsigned long sd_glock_dqs_held;
+
+	struct delayed_work sd_log_flush_watchdog;
+	unsigned long sd_dirty_pages;
+	unsigned long sd_log_flush_start;
 };
 
 static inline void gfs2_glstats_inc(struct gfs2_glock *gl, int which)
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index f0ee3ff6f9a8..bd2ff5ef4b91 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -19,6 +19,7 @@
 #include <linux/blkdev.h>
 #include <linux/writeback.h>
 #include <linux/list_sort.h>
+#include <linux/sched/debug.h>
 
 #include "gfs2.h"
 #include "incore.h"
@@ -32,8 +33,22 @@
 #include "trace_gfs2.h"
 #include "trans.h"
 
+extern struct workqueue_struct *gfs2_log_flush_wq;
+
 static void gfs2_log_shutdown(struct gfs2_sbd *sdp);
 
+void gfs2_log_flush_watchdog_func(struct work_struct *work)
+{
+	struct delayed_work *dwork = to_delayed_work(work);
+	struct gfs2_sbd *sdp = container_of(dwork, struct gfs2_sbd,
+					    sd_log_flush_watchdog);
+
+	fs_err(sdp, "log flush pid %u took > %lu secs to write %lu pages.\n",
+	       sdp->sd_logd_process ? pid_nr(task_pid(sdp->sd_logd_process)) :
+	       0, (jiffies - sdp->sd_log_flush_start) / HZ,
+	       sdp->sd_dirty_pages);
+}
+
 /**
  * gfs2_struct2blk - compute stuff
  * @sdp: the filesystem
@@ -1016,6 +1031,26 @@ static void trans_drain(struct gfs2_trans *tr)
 	}
 }
 
+/**
+ * count_dirty_pages - rough count the dirty ordered writes pages
+ * @sdp: the filesystem
+ *
+ * This is not meant to be exact. It's simply a rough estimate of how many
+ * dirty pages are on the ordered writes list. The actual number of pages
+ * may change because we don't keep the lock held during the log flush.
+ */
+static unsigned long count_dirty_pages(struct gfs2_sbd *sdp)
+{
+	struct gfs2_inode *ip;
+	unsigned long dpages = 0;
+
+	spin_lock(&sdp->sd_ordered_lock);
+	list_for_each_entry(ip, &sdp->sd_log_ordered, i_ordered)
+		dpages += ip->i_inode.i_mapping->nrpages;
+	spin_unlock(&sdp->sd_ordered_lock);
+	return dpages;
+}
+
 /**
  * gfs2_log_flush - flush incore transaction(s)
  * @sdp: The filesystem
@@ -1031,8 +1066,19 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
 	enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state);
 	unsigned int first_log_head;
 	unsigned int reserved_revokes = 0;
+	unsigned long dpages;
+
+	dpages = count_dirty_pages(sdp);
 
 	down_write(&sdp->sd_log_flush_lock);
+	if (dpages)
+		if (queue_delayed_work(gfs2_log_flush_wq,
+				       &sdp->sd_log_flush_watchdog,
+				       round_up(dpages *
+						GFS2_LOG_FLUSH_TIMEOUT, HZ))) {
+			sdp->sd_dirty_pages = dpages;
+			sdp->sd_log_flush_start = jiffies;
+		}
 	trace_gfs2_log_flush(sdp, 1, flags);
 
 repeat:
@@ -1144,6 +1190,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
 		gfs2_assert_withdraw_delayed(sdp, used_blocks < reserved_blocks);
 		gfs2_log_release(sdp, reserved_blocks - used_blocks);
 	}
+	cancel_delayed_work(&sdp->sd_log_flush_watchdog);
 	up_write(&sdp->sd_log_flush_lock);
 	gfs2_trans_free(sdp, tr);
 	if (gfs2_withdrawing(sdp))
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
index fc905c2af53c..962044fba53a 100644
--- a/fs/gfs2/log.h
+++ b/fs/gfs2/log.h
@@ -94,5 +94,6 @@ extern void gfs2_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd);
 extern void gfs2_glock_remove_revoke(struct gfs2_glock *gl);
 extern void gfs2_flush_revokes(struct gfs2_sbd *sdp);
 extern void gfs2_ail_drain(struct gfs2_sbd *sdp);
+extern void gfs2_log_flush_watchdog_func(struct work_struct *work);
 
 #endif /* __LOG_DOT_H__ */
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 28d0eb23e18e..55a7f29742b3 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -30,6 +30,7 @@
 #include "glops.h"
 
 struct workqueue_struct *gfs2_control_wq;
+struct workqueue_struct *gfs2_log_flush_wq;
 
 static void gfs2_init_inode_once(void *foo)
 {
@@ -178,6 +179,10 @@ static int __init init_gfs2_fs(void)
 	if (!gfs2_freeze_wq)
 		goto fail_wq3;
 
+	gfs2_log_flush_wq = alloc_workqueue("gfs2_log_flush_wq", 0, 0);
+	if (!gfs2_log_flush_wq)
+		goto fail_wq4;
+
 	gfs2_page_pool = mempool_create_page_pool(64, 0);
 	if (!gfs2_page_pool)
 		goto fail_mempool;
@@ -189,6 +194,8 @@ static int __init init_gfs2_fs(void)
 	return 0;
 
 fail_mempool:
+	destroy_workqueue(gfs2_log_flush_wq);
+fail_wq4:
 	destroy_workqueue(gfs2_freeze_wq);
 fail_wq3:
 	destroy_workqueue(gfs2_control_wq);
@@ -240,6 +247,7 @@ static void __exit exit_gfs2_fs(void)
 	destroy_workqueue(gfs_recovery_wq);
 	destroy_workqueue(gfs2_control_wq);
 	destroy_workqueue(gfs2_freeze_wq);
+	destroy_workqueue(gfs2_log_flush_wq);
 	list_lru_destroy(&gfs2_qd_lru);
 
 	rcu_barrier();
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 6a950c4a61e9..b09e61457b23 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -139,6 +139,8 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
 	init_waitqueue_head(&sdp->sd_log_flush_wait);
 	atomic_set(&sdp->sd_freeze_state, SFS_UNFROZEN);
 	mutex_init(&sdp->sd_freeze_mutex);
+	INIT_DELAYED_WORK(&sdp->sd_log_flush_watchdog,
+			  gfs2_log_flush_watchdog_func);
 
 	return sdp;
 
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index c0a34d9ddee4..c90d9f48571a 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -96,7 +96,8 @@ static ssize_t status_show(struct gfs2_sbd *sdp, char *buf)
 		     "sd_log_flush_head:        %d\n"
 		     "sd_log_flush_tail:        %d\n"
 		     "sd_log_blks_reserved:     %d\n"
-		     "sd_log_revokes_available: %d\n",
+		     "sd_log_revokes_available: %d\n"
+		     "sd_dirty_pages:           %lu\n",
 		     test_bit(SDF_JOURNAL_CHECKED, &f),
 		     test_bit(SDF_JOURNAL_LIVE, &f),
 		     (sdp->sd_jdesc ? sdp->sd_jdesc->jd_jid : 0),
@@ -124,7 +125,8 @@ static ssize_t status_show(struct gfs2_sbd *sdp, char *buf)
 		     sdp->sd_log_flush_head,
 		     sdp->sd_log_flush_tail,
 		     sdp->sd_log_blks_reserved,
-		     atomic_read(&sdp->sd_log_revokes_available));
+		     atomic_read(&sdp->sd_log_revokes_available),
+		     sdp->sd_dirty_pages);
 	return s;
 }
 
-- 
2.31.1




More information about the Cluster-devel mailing list