[dm-devel] [PATCH 4/5] multipathd: reload map if the path groups are out of order

Benjamin Marzinski bmarzins at redhat.com
Wed May 24 23:21:11 UTC 2023


need_switch_pathgroup() only checks if the currently used pathgroup is
not the highest priority pathgroup. If it isn't, all multipathd does is
instruct the kernel to switch to the correct pathgroup.  However, the
kernel treats the pathgroups as if they were ordered by priority. When
the kernel runs out of paths to use in the currently selected pathgroup,
it will start checking the pathgroups in order until it finds one with
usable paths.

need_switch_pathgroup() should also check if the pathgroups are out of
order, and if so, multipathd should reload the map to reorder them
correctly.

Signed-off-by: Benjamin Marzinski <bmarzins at redhat.com>
---
 libmultipath/libmultipath.version |  5 ++++
 libmultipath/switchgroup.c        | 27 ++++++++++++++++++++++
 libmultipath/switchgroup.h        |  1 +
 multipathd/main.c                 | 38 +++++++++++++++++++++----------
 4 files changed, 59 insertions(+), 12 deletions(-)

diff --git a/libmultipath/libmultipath.version b/libmultipath/libmultipath.version
index 8f72c452..38074699 100644
--- a/libmultipath/libmultipath.version
+++ b/libmultipath/libmultipath.version
@@ -237,3 +237,8 @@ global:
 local:
 	*;
 };
+
+LIBMULTIPATH_19.1.0 {
+global:
+	path_groups_in_order;
+} LIBMULTIPATH_19.0.0;
diff --git a/libmultipath/switchgroup.c b/libmultipath/switchgroup.c
index b1e1f39b..b1180839 100644
--- a/libmultipath/switchgroup.c
+++ b/libmultipath/switchgroup.c
@@ -7,6 +7,33 @@
 #include "structs.h"
 #include "switchgroup.h"
 
+bool path_groups_in_order(struct multipath *mpp)
+{
+	int i;
+	struct pathgroup *pgp;
+	bool seen_marginal_pg = false;
+	int last_prio = INT_MAX;
+
+	if (VECTOR_SIZE(mpp->pg) < 2)
+		return true;
+
+	vector_foreach_slot(mpp->pg, pgp, i) {
+		/* skip pgs with PRIO_UNDEF, since this is likely temporary */
+		if (!pgp->paths || pgp->priority == PRIO_UNDEF)
+			continue;
+		if (pgp->marginal && !seen_marginal_pg) {
+			last_prio = INT_MAX;
+			continue;
+		}
+		if (seen_marginal_pg && !pgp->marginal)
+			return false;
+		if (pgp->priority > last_prio)
+			return false;
+		last_prio = pgp->priority;
+	}
+	return true;
+}
+
 void path_group_prio_update(struct pathgroup *pgp)
 {
 	int i;
diff --git a/libmultipath/switchgroup.h b/libmultipath/switchgroup.h
index 9365e2e2..43dbb6c9 100644
--- a/libmultipath/switchgroup.h
+++ b/libmultipath/switchgroup.h
@@ -1,2 +1,3 @@
 void path_group_prio_update (struct pathgroup * pgp);
 int select_path_group (struct multipath * mpp);
+bool path_groups_in_order(struct multipath *mpp);
diff --git a/multipathd/main.c b/multipathd/main.c
index e7c272ad..2ea7c76b 100644
--- a/multipathd/main.c
+++ b/multipathd/main.c
@@ -396,7 +396,7 @@ void put_multipath_config(__attribute__((unused)) void *arg)
 }
 
 static int
-need_switch_pathgroup (struct multipath * mpp, int refresh)
+need_switch_pathgroup (struct multipath * mpp, int refresh, bool *need_reload)
 {
 	struct pathgroup * pgp;
 	struct path * pp;
@@ -404,6 +404,7 @@ need_switch_pathgroup (struct multipath * mpp, int refresh)
 	struct config *conf;
 	int bestpg;
 
+	*need_reload = false;
 	if (!mpp)
 		return 0;
 
@@ -430,10 +431,9 @@ need_switch_pathgroup (struct multipath * mpp, int refresh)
 		return 0;
 
 	mpp->bestpg = bestpg;
-	if (mpp->bestpg != mpp->nextpg)
-		return 1;
+	*need_reload = !path_groups_in_order(mpp);
 
-	return 0;
+	return (*need_reload || mpp->bestpg != mpp->nextpg);
 }
 
 static void
@@ -1982,20 +1982,26 @@ ghost_delay_tick(struct vectors *vecs)
 }
 
 static void
-deferred_failback_tick (vector mpvec)
+deferred_failback_tick (struct vectors *vecs)
 {
 	struct multipath * mpp;
 	unsigned int i;
+	bool need_reload;
 
-	vector_foreach_slot (mpvec, mpp, i) {
+	vector_foreach_slot (vecs->mpvec, mpp, i) {
 		/*
 		 * deferred failback getting sooner
 		 */
 		if (mpp->pgfailback > 0 && mpp->failback_tick > 0) {
 			mpp->failback_tick--;
 
-			if (!mpp->failback_tick && need_switch_pathgroup(mpp, 1))
-				switch_pathgroup(mpp);
+			if (!mpp->failback_tick &&
+			    need_switch_pathgroup(mpp, 1, &need_reload)) {
+				if (need_reload)
+					reload_and_sync_map(mpp, vecs, 0);
+				else
+					switch_pathgroup(mpp);
+			}
 		}
 	}
 }
@@ -2579,6 +2585,7 @@ check_path (struct vectors * vecs, struct path * pp, unsigned int ticks)
 		int prio_changed = update_prio(pp, new_path_up);
 		bool need_refresh = (!new_path_up && prio_changed &&
 				     pp->priority != PRIO_UNDEF);
+		bool need_reload;
 
 		if (prio_changed &&
 		    pp->mpp->pgpolicyfn == (pgpolicyfn *)group_by_prio &&
@@ -2586,15 +2593,22 @@ check_path (struct vectors * vecs, struct path * pp, unsigned int ticks)
 			condlog(2, "%s: path priorities changed. reloading",
 				pp->mpp->alias);
 			reload_and_sync_map(pp->mpp, vecs, !new_path_up);
-		} else if (need_switch_pathgroup(pp->mpp, need_refresh)) {
+		} else if (need_switch_pathgroup(pp->mpp, need_refresh,
+			                         &need_reload)) {
 			if (pp->mpp->pgfailback > 0 &&
 			    (new_path_up || pp->mpp->failback_tick <= 0))
 				pp->mpp->failback_tick =
 					pp->mpp->pgfailback + 1;
 			else if (pp->mpp->pgfailback == -FAILBACK_IMMEDIATE ||
 				 (chkr_new_path_up &&
-				  followover_should_failback(pp)))
-				switch_pathgroup(pp->mpp);
+				  followover_should_failback(pp))) {
+				if (need_reload)
+					reload_and_sync_map(pp->mpp, vecs,
+							    !need_refresh &&
+							    !new_path_up);
+				else
+					switch_pathgroup(pp->mpp);
+			}
 		}
 	}
 	return 1;
@@ -2720,7 +2734,7 @@ unlock:
 		pthread_cleanup_push(cleanup_lock, &vecs->lock);
 		lock(&vecs->lock);
 		pthread_testcancel();
-		deferred_failback_tick(vecs->mpvec);
+		deferred_failback_tick(vecs);
 		retry_count_tick(vecs->mpvec);
 		missing_uev_wait_tick(vecs);
 		ghost_delay_tick(vecs);
-- 
2.17.2



More information about the dm-devel mailing list