[dm-devel] [PATCH V2 3/6] multipathd: Occasionally allow waiters to interrupt checking paths

Benjamin Marzinski bmarzins at redhat.com
Wed Aug 17 20:48:33 UTC 2022


If there are a very large number of paths that all get checked at the
same time, it is possible for the path checkers to starve out other
vecs->lock users, like uevent processing. To avoid this, if the path
checkers are taking a long time, checkerloop now occasionally unlocks
and allows other vecs->lock waiters to run.

In order to make sure that path checking is always making real progress,
checkerloop() only checks if it should unlock every 128 path checks. To
check, it sees if there are waiters and more than a second has passed
since it acquired the vecs->lock. If both of these are true, it drops
the lock and calls nanosleep() to schedule.

When checkerloop() reacquires the lock, it may no longer be at the
correct place in the vector. While paths can be deleted from any point
of the pathvec, they can only be added at the end. This means that the
next path to check must be between its previous location and the start
of the vector. To help determine the correct starting place,
checkerloop() marks each path as not checked at the start of each
checker loop. New paths start in the unchecked state. As paths are
checked, they are marked as such. If the checker loop is interrupted,
when it reacquires the lock, it will iterate backwards from the last
location in checked to the start of the vector. The first path it finds
that is marked as checked must be the last path it checked.

Signed-off-by: Benjamin Marzinski <bmarzins at redhat.com>
---
 libmultipath/structs.h |  1 +
 multipathd/main.c      | 82 +++++++++++++++++++++++++++++++++---------
 2 files changed, 67 insertions(+), 16 deletions(-)

diff --git a/libmultipath/structs.h b/libmultipath/structs.h
index a6a09441..9d4fb9c8 100644
--- a/libmultipath/structs.h
+++ b/libmultipath/structs.h
@@ -351,6 +351,7 @@ struct path {
 	int fast_io_fail;
 	unsigned int dev_loss;
 	int eh_deadline;
+	bool is_checked;
 	/* configlet pointers */
 	vector hwe;
 	struct gen_path generic_path;
diff --git a/multipathd/main.c b/multipathd/main.c
index 71079113..37c04018 100644
--- a/multipathd/main.c
+++ b/multipathd/main.c
@@ -2548,6 +2548,11 @@ check_path (struct vectors * vecs, struct path * pp, unsigned int ticks)
 	}
 	return 1;
 }
+enum checker_state {
+	CHECKER_STARTING,
+	CHECKER_RUNNING,
+	CHECKER_FINISHED,
+};
 
 static void *
 checkerloop (void *ap)
@@ -2555,7 +2560,6 @@ checkerloop (void *ap)
 	struct vectors *vecs;
 	struct path *pp;
 	int count = 0;
-	unsigned int i;
 	struct timespec last_time;
 	struct config *conf;
 	int foreign_tick = 0;
@@ -2581,8 +2585,9 @@ checkerloop (void *ap)
 
 	while (1) {
 		struct timespec diff_time, start_time, end_time;
-		int num_paths = 0, strict_timing, rc = 0;
+		int num_paths = 0, strict_timing, rc = 0, i = 0;
 		unsigned int ticks = 0;
+		enum checker_state checker_state = CHECKER_STARTING;
 
 		if (set_config_state(DAEMON_RUNNING) != DAEMON_RUNNING)
 			/* daemon shutdown */
@@ -2603,22 +2608,67 @@ checkerloop (void *ap)
 		if (use_watchdog)
 			sd_notify(0, "WATCHDOG=1");
 #endif
+		while (checker_state != CHECKER_FINISHED) {
+			unsigned int paths_checked = 0;
+			struct timespec chk_start_time;
 
-		pthread_cleanup_push(cleanup_lock, &vecs->lock);
-		lock(&vecs->lock);
-		pthread_testcancel();
-		vector_foreach_slot (vecs->pathvec, pp, i) {
-			rc = check_path(vecs, pp, ticks);
-			if (rc < 0) {
-				condlog(1, "%s: check_path() failed, removing",
-					pp->dev);
-				vector_del_slot(vecs->pathvec, i);
-				free_path(pp);
-				i--;
-			} else
-				num_paths += rc;
+			pthread_cleanup_push(cleanup_lock, &vecs->lock);
+			lock(&vecs->lock);
+			pthread_testcancel();
+			get_monotonic_time(&chk_start_time);
+			if (checker_state == CHECKER_STARTING) {
+				vector_foreach_slot(vecs->pathvec, pp, i)
+					pp->is_checked = false;
+				i = 0;
+				checker_state = CHECKER_RUNNING;
+			} else {
+				/*
+				 * Paths could have been removed since we
+				 * dropped the lock. Find the path to continue
+				 * checking at. Since paths can be removed from
+				 * anywhere in the vector, but can only be added
+				 * at the end, the last checked path must be
+				 * between its old location, and the start or
+				 * the vector.
+				 */
+				if (i >= VECTOR_SIZE(vecs->pathvec))
+					i = VECTOR_SIZE(vecs->pathvec) - 1;
+				while ((pp = VECTOR_SLOT(vecs->pathvec, i))) {
+					if (pp->is_checked == true)
+						break;
+					i--;
+				}
+				i++;
+			}
+			vector_foreach_slot_after (vecs->pathvec, pp, i) {
+				pp->is_checked = true;
+				rc = check_path(vecs, pp, ticks);
+				if (rc < 0) {
+					condlog(1, "%s: check_path() failed, removing",
+						pp->dev);
+					vector_del_slot(vecs->pathvec, i);
+					free_path(pp);
+					i--;
+				} else
+					num_paths += rc;
+				if (++paths_checked % 128 == 0 &&
+				    lock_has_waiters(&vecs->lock)) {
+					get_monotonic_time(&end_time);
+					timespecsub(&end_time, &chk_start_time,
+						    &diff_time);
+					if (diff_time.tv_sec > 0)
+						goto unlock;
+				}
+			}
+			checker_state = CHECKER_FINISHED;
+unlock:
+			lock_cleanup_pop(vecs->lock);
+			if (checker_state != CHECKER_FINISHED) {
+				/* Yield to waiters */
+				struct timespec wait = { .tv_nsec = 10000, };
+				nanosleep(&wait, NULL);
+			}
 		}
-		lock_cleanup_pop(vecs->lock);
 
 		pthread_cleanup_push(cleanup_lock, &vecs->lock);
 		lock(&vecs->lock);
-- 
2.17.2



More information about the dm-devel mailing list