[dm-devel] [PATCH 13/15] multipathd: Add delayed path reintegration

Benjamin Marzinski bmarzins at redhat.com
Sun Mar 8 03:31:44 UTC 2015


This patch adds two configuration parameters, "delay_watch_checks" and
"delay_wait_checks". delay_watch_checks sets the number of checks that
a path will be watched for, after coming back from a failure. If the
path fails again within this number of checks, when it comes back up
the next time, it will not be used until it has remained up for
delay_wait_checks checks, assuming that there are other paths to the
device.  If it is the only available path, it will immediately be
reintegrated.

This helps setups were a path either won't stay up, or takes some time
to stabilize before it should be used.

Signed-off-by: Benjamin Marzinski <bmarzins at redhat.com>
---
 libmultipath/checkers.c    |  1 +
 libmultipath/checkers.h    |  9 ++++++
 libmultipath/config.c      |  2 ++
 libmultipath/config.h      |  6 ++++
 libmultipath/configure.c   |  2 ++
 libmultipath/defaults.h    |  1 +
 libmultipath/dict.c        | 60 +++++++++++++++++++++++++++++++++++++++
 libmultipath/dict.h        |  1 +
 libmultipath/print.c       |  2 ++
 libmultipath/propsel.c     | 32 +++++++++++++++++++++
 libmultipath/propsel.h     |  2 ++
 libmultipath/structs.h     |  9 ++++++
 multipath.conf.annotated   | 70 ++++++++++++++++++++++++++++++++++++++++++++++
 multipath.conf.defaults    |  2 ++
 multipath/multipath.conf.5 | 31 ++++++++++++++++++++
 multipathd/main.c          | 34 ++++++++++++++++++----
 16 files changed, 258 insertions(+), 6 deletions(-)

diff --git a/libmultipath/checkers.c b/libmultipath/checkers.c
index 4a4cd7c..1dd5525 100644
--- a/libmultipath/checkers.c
+++ b/libmultipath/checkers.c
@@ -19,6 +19,7 @@ char *checker_state_names[] = {
       "pending",
       "timeout",
       "removed",
+      "delayed",
 };
 
 static LIST_HEAD(checkers);
diff --git a/libmultipath/checkers.h b/libmultipath/checkers.h
index e62b52f..a935b3f 100644
--- a/libmultipath/checkers.h
+++ b/libmultipath/checkers.h
@@ -54,6 +54,14 @@
  * PATH REMOVED:
  * - Use: All checkers
  * - Description: Device has been removed from the system
+ *
+ * PATH_DELAYED:
+ * - Use: None of the checkers (returned if the path is being delayed before
+ *   reintegration.
+ * - Description: If a path fails after being up for less than
+ *   delay_watch_checks checks, when it comes back up again, it will not
+ *   be marked as up until it has been up for delay_wait_checks checks.
+ *   During this time, it is marked as "delayed"
  */
 enum path_check_state {
 	PATH_WILD,
@@ -65,6 +73,7 @@ enum path_check_state {
 	PATH_PENDING,
 	PATH_TIMEOUT,
 	PATH_REMOVED,
+	PATH_DELAYED,
 	PATH_MAX_STATE
 };
 
diff --git a/libmultipath/config.c b/libmultipath/config.c
index c36e9db..e88bae0 100644
--- a/libmultipath/config.c
+++ b/libmultipath/config.c
@@ -344,6 +344,8 @@ merge_hwe (struct hwentry * dst, struct hwentry * src)
 	merge_num(retain_hwhandler);
 	merge_num(detect_prio);
 	merge_num(deferred_remove);
+	merge_num(delay_watch_checks);
+	merge_num(delay_wait_checks);
 
 	/*
 	 * Make sure features is consistent with
diff --git a/libmultipath/config.h b/libmultipath/config.h
index cb3be62..9b1d9a1 100644
--- a/libmultipath/config.h
+++ b/libmultipath/config.h
@@ -60,6 +60,8 @@ struct hwentry {
 	int retain_hwhandler;
 	int detect_prio;
 	int deferred_remove;
+	int delay_watch_checks;
+	int delay_wait_checks;
 	char * bl_product;
 };
 
@@ -84,6 +86,8 @@ struct mpentry {
 	int attribute_flags;
 	int user_friendly_names;
 	int deferred_remove;
+	int delay_watch_checks;
+	int delay_wait_checks;
 	uid_t uid;
 	gid_t gid;
 	mode_t mode;
@@ -128,6 +132,8 @@ struct config {
 	int force_sync;
 	int deferred_remove;
 	int processed_main_config;
+	int delay_watch_checks;
+	int delay_wait_checks;
 	unsigned int version[3];
 
 	char * dev;
diff --git a/libmultipath/configure.c b/libmultipath/configure.c
index a22d16a..6c96633 100644
--- a/libmultipath/configure.c
+++ b/libmultipath/configure.c
@@ -290,6 +290,8 @@ setup_map (struct multipath * mpp, char * params, int params_size)
 	select_reservation_key(mpp);
 	select_retain_hwhandler(mpp);
 	select_deferred_remove(mpp);
+	select_delay_watch_checks(mpp);
+	select_delay_wait_checks(mpp);
 
 	sysfs_set_scsi_tmo(mpp);
 	/*
diff --git a/libmultipath/defaults.h b/libmultipath/defaults.h
index a7f1c11..23a0871 100644
--- a/libmultipath/defaults.h
+++ b/libmultipath/defaults.h
@@ -17,6 +17,7 @@
 #define DEFAULT_RETAIN_HWHANDLER RETAIN_HWHANDLER_OFF
 #define DEFAULT_DETECT_PRIO DETECT_PRIO_OFF
 #define DEFAULT_DEFERRED_REMOVE DEFERRED_REMOVE_OFF
+#define DEFAULT_DELAY_CHECKS DELAY_CHECKS_OFF
 
 #define DEFAULT_CHECKINT	5
 #define MAX_CHECKINT(a)		(a << 2)
diff --git a/libmultipath/dict.c b/libmultipath/dict.c
index 7350231..4a79445 100644
--- a/libmultipath/dict.c
+++ b/libmultipath/dict.c
@@ -979,6 +979,58 @@ declare_def_snprint(reservation_key, print_reservation_key)
 declare_mp_handler(reservation_key, set_reservation_key)
 declare_mp_snprint(reservation_key, print_reservation_key)
 
+static int
+set_delay_checks(vector strvec, void *ptr)
+{
+	int *int_ptr = (int *)ptr;
+	char * buff;
+
+	buff = set_value(strvec);
+	if (!buff)
+		return 1;
+
+	if (!strcmp(buff, "no") || !strcmp(buff, "0"))
+		*int_ptr = DELAY_CHECKS_OFF;
+	else if ((*int_ptr = atoi(buff)) < 1)
+		*int_ptr = DELAY_CHECKS_UNDEF;
+
+	FREE(buff);
+	return 0;
+}
+
+int
+print_delay_checks(char * buff, int len, void *ptr)
+{
+	int *int_ptr = (int *)ptr;
+
+	switch(*int_ptr) {
+	case DELAY_CHECKS_UNDEF:
+		return 0;
+	case DELAY_CHECKS_OFF:
+		return snprintf(buff, len, "\"off\"");
+	default:
+		return snprintf(buff, len, "%i", *int_ptr);
+	}
+}
+
+declare_def_handler(delay_watch_checks, set_delay_checks)
+declare_def_snprint(delay_watch_checks, print_delay_checks)
+declare_ovr_handler(delay_watch_checks, set_delay_checks)
+declare_ovr_snprint(delay_watch_checks, print_delay_checks)
+declare_hw_handler(delay_watch_checks, set_delay_checks)
+declare_hw_snprint(delay_watch_checks, print_delay_checks)
+declare_mp_handler(delay_watch_checks, set_delay_checks)
+declare_mp_snprint(delay_watch_checks, print_delay_checks)
+
+declare_def_handler(delay_wait_checks, set_delay_checks)
+declare_def_snprint(delay_wait_checks, print_delay_checks)
+declare_ovr_handler(delay_wait_checks, set_delay_checks)
+declare_ovr_snprint(delay_wait_checks, print_delay_checks)
+declare_hw_handler(delay_wait_checks, set_delay_checks)
+declare_hw_snprint(delay_wait_checks, print_delay_checks)
+declare_mp_handler(delay_wait_checks, set_delay_checks)
+declare_mp_snprint(delay_wait_checks, print_delay_checks)
+
 /*
  * blacklist block handlers
  */
@@ -1277,6 +1329,8 @@ init_keywords(void)
 	install_keyword("deferred_remove", &def_deferred_remove_handler, &snprint_def_deferred_remove);
 	install_keyword("partition_delimiter", &def_partition_delim_handler, &snprint_def_partition_delim);
 	install_keyword("config_dir", &def_config_dir_handler, &snprint_def_config_dir);
+	install_keyword("delay_watch_checks", &def_delay_watch_checks_handler, &snprint_def_delay_watch_checks);
+	install_keyword("delay_wait_checks", &def_delay_wait_checks_handler, &snprint_def_delay_wait_checks);
 	__deprecated install_keyword("default_selector", &def_selector_handler, NULL);
 	__deprecated install_keyword("default_path_grouping_policy", &def_pgpolicy_handler, NULL);
 	__deprecated install_keyword("default_uid_attribute", &def_uid_attribute_handler, NULL);
@@ -1345,6 +1399,8 @@ init_keywords(void)
 	install_keyword("retain_attached_hw_handler", &hw_retain_hwhandler_handler, &snprint_hw_retain_hwhandler);
 	install_keyword("detect_prio", &hw_detect_prio_handler, &snprint_hw_detect_prio);
 	install_keyword("deferred_remove", &hw_deferred_remove_handler, &snprint_hw_deferred_remove);
+	install_keyword("delay_watch_checks", &hw_delay_watch_checks_handler, &snprint_hw_delay_watch_checks);
+	install_keyword("delay_wait_checks", &hw_delay_wait_checks_handler, &snprint_hw_delay_wait_checks);
 	install_sublevel_end();
 
 	install_keyword_root("overrides", &overrides_handler);
@@ -1370,6 +1426,8 @@ init_keywords(void)
 	install_keyword("retain_attached_hw_handler", &ovr_retain_hwhandler_handler, &snprint_ovr_retain_hwhandler);
 	install_keyword("detect_prio", &ovr_detect_prio_handler, &snprint_ovr_detect_prio);
 	install_keyword("deferred_remove", &ovr_deferred_remove_handler, &snprint_ovr_deferred_remove);
+	install_keyword("delay_watch_checks", &ovr_delay_watch_checks_handler, &snprint_ovr_delay_watch_checks);
+	install_keyword("delay_wait_checks", &ovr_delay_wait_checks_handler, &snprint_ovr_delay_wait_checks);
 
 	install_keyword_root("multipaths", &multipaths_handler);
 	install_keyword_multi("multipath", &multipath_handler, NULL);
@@ -1394,5 +1452,7 @@ init_keywords(void)
 	install_keyword("reservation_key", &mp_reservation_key_handler, &snprint_mp_reservation_key);
 	install_keyword("user_friendly_names", &mp_user_friendly_names_handler, &snprint_mp_user_friendly_names);
 	install_keyword("deferred_remove", &mp_deferred_remove_handler, &snprint_mp_deferred_remove);
+	install_keyword("delay_watch_checks", &mp_delay_watch_checks_handler, &snprint_mp_delay_watch_checks);
+	install_keyword("delay_wait_checks", &mp_delay_wait_checks_handler, &snprint_mp_delay_wait_checks);
 	install_sublevel_end();
 }
diff --git a/libmultipath/dict.h b/libmultipath/dict.h
index 84b6180..4fdd576 100644
--- a/libmultipath/dict.h
+++ b/libmultipath/dict.h
@@ -14,5 +14,6 @@ int print_no_path_retry(char * buff, int len, void *ptr);
 int print_fast_io_fail(char * buff, int len, void *ptr);
 int print_dev_loss(char * buff, int len, void *ptr);
 int print_reservation_key(char * buff, int len, void * ptr);
+int print_delay_checks(char * buff, int len, void *ptr);
 
 #endif /* _DICT_H */
diff --git a/libmultipath/print.c b/libmultipath/print.c
index 9762f1c..130a9af 100644
--- a/libmultipath/print.c
+++ b/libmultipath/print.c
@@ -340,6 +340,8 @@ snprint_chk_state (char * buff, size_t len, struct path * pp)
 		return snprintf(buff, len, "i/o pending");
 	case PATH_TIMEOUT:
 		return snprintf(buff, len, "i/o timeout");
+	case PATH_DELAYED:
+		return snprintf(buff, len, "delayed");
 	default:
 		return snprintf(buff, len, "undef");
 	}
diff --git a/libmultipath/propsel.c b/libmultipath/propsel.c
index f5c158b..46f8f63 100644
--- a/libmultipath/propsel.c
+++ b/libmultipath/propsel.c
@@ -616,3 +616,35 @@ out:
 		origin);
 	return 0;
 }
+
+extern int
+select_delay_watch_checks(struct multipath *mp)
+{
+	char *origin, buff[12];
+
+	mp_set_mpe(delay_watch_checks);
+	mp_set_ovr(delay_watch_checks);
+	mp_set_hwe(delay_watch_checks);
+	mp_set_conf(delay_watch_checks);
+	mp_set_default(delay_watch_checks, DEFAULT_DELAY_CHECKS);
+out:
+	print_delay_checks(buff, 12, &mp->delay_watch_checks);
+	condlog(3, "%s: delay_watch_checks = %s %s", mp->alias, buff, origin);
+	return 0;
+}
+
+extern int
+select_delay_wait_checks(struct multipath *mp)
+{
+	char *origin, buff[12];
+
+	mp_set_mpe(delay_wait_checks);
+	mp_set_ovr(delay_wait_checks);
+	mp_set_hwe(delay_wait_checks);
+	mp_set_conf(delay_wait_checks);
+	mp_set_default(delay_wait_checks, DEFAULT_DELAY_CHECKS);
+out:
+	print_delay_checks(buff, 12, &mp->delay_wait_checks);
+	condlog(3, "%s: delay_wait_checks = %s %s", mp->alias, buff, origin);
+	return 0;
+}
diff --git a/libmultipath/propsel.h b/libmultipath/propsel.h
index ffb58a5..f9598e7 100644
--- a/libmultipath/propsel.h
+++ b/libmultipath/propsel.h
@@ -20,3 +20,5 @@ int select_reservation_key(struct multipath *mp);
 int select_retain_hwhandler (struct multipath * mp);
 int select_detect_prio(struct path * pp);
 int select_deferred_remove(struct multipath *mp);
+int select_delay_watch_checks (struct multipath * mp);
+int select_delay_wait_checks (struct multipath * mp);
diff --git a/libmultipath/structs.h b/libmultipath/structs.h
index b6cfff8..c02c76d 100644
--- a/libmultipath/structs.h
+++ b/libmultipath/structs.h
@@ -140,6 +140,11 @@ enum scsi_protocol {
 	SCSI_PROTOCOL_UNSPEC = 0xf, /* No specific protocol */
 };
 
+enum delay_checks_states {
+	DELAY_CHECKS_OFF = -1,
+	DELAY_CHECKS_UNDEF = 0,
+};
+
 struct sg_id {
 	int host_no;
 	int channel;
@@ -186,6 +191,8 @@ struct path {
 	int priority;
 	int pgindex;
 	int detect_prio;
+	int watch_checks;
+	int wait_checks;
 	char * uid_attribute;
 	char * getuid;
 	struct prio prio;
@@ -221,6 +228,8 @@ struct multipath {
 	int fast_io_fail;
 	int retain_hwhandler;
 	int deferred_remove;
+	int delay_watch_checks;
+	int delay_wait_checks;
 	unsigned int dev_loss;
 	uid_t uid;
 	gid_t gid;
diff --git a/multipath.conf.annotated b/multipath.conf.annotated
index 2b148ac..0be034d 100644
--- a/multipath.conf.annotated
+++ b/multipath.conf.annotated
@@ -314,6 +314,30 @@
 #	#           files, just as if it was in /etc/multipath.conf
 #	# values  : "" or a fully qualified pathname
 #	# default : "/etc/multipath/conf.d"
+#
+#	#
+#	# name    : delay_watch_checks
+#	# scope   : multipathd
+#	# desc    : If set to a value greater than 0, multipathd will watch
+#	#           paths that have recently become valid for this many
+#	#           checks.  If they fail again while they are being watched,
+#	#           when they next become valid, they will not be used until
+#	#           they have stayed up for delay_wait_checks checks.
+#	# values  : no|<n> > 0
+#	# default : no
+#	delay_watch_checks 12
+#
+#	#
+#	# name    : delay_wait_checks
+#	# scope   : multipathd
+#	# desc    : If set to a value greater than 0, when a device that has
+#	#           recently come back online fails again within
+#	#           delay_watch_checks checks, the next time it comes back
+#	#           online, it will marked and delayed, and not used until
+#	#           it has passed delay_wait_checks checks.
+#	# values  : no|<n> > 0
+#	# default : no
+#	delay_wait_checks 12
 #}
 #	
 ##
@@ -482,6 +506,28 @@
 #		# default : determined by the process
 #		gid 0
 #
+#		#
+#		# name    : delay_watch_checks
+#		# scope   : multipathd
+#		# desc    : If set to a value greater than 0, multipathd will
+#		#           watch paths that have recently become valid for
+#		#           this many checks.  If they fail again while they
+#		#           are being watched, when they next become valid,
+#		#           they will not be used until they have stayed up for
+#		#           delay_wait_checks checks.
+#		# values  : no|<n> > 0
+#		delay_watch_checks 12
+#
+#		#
+#		# name    : delay_wait_checks
+#		# scope   : multipathd
+#		# desc    : If set to a value greater than 0, when a device
+#		#           that has recently come back online fails again
+#		#           within delay_watch_checks checks, the next time it
+#		#           comes online, it will marked and delayed, and not
+#		#           used until it has passed delay_wait_checks checks.
+#		# values  : no|<n> > 0
+#		delay_wait_checks 12
 #	}
 #	multipath {
 #		wwid	1DEC_____321816758474
@@ -653,6 +699,30 @@
 #		#           before removing it from the system.
 #		# values  : n > 0
 #		dev_loss_tmo 600
+#
+#		#
+#		# name    : delay_watch_checks
+#		# scope   : multipathd
+#		# desc    : If set to a value greater than 0, multipathd will
+#		#           watch paths that have recently become valid for
+#		#           this many checks.  If they fail again while they
+#		#           are being watched, when they next become valid,
+#		#           they will not be used until they have stayed up for
+#		#           delay_wait_checks checks.
+#		# values  : no|<n> > 0
+#		delay_watch_checks 12
+#
+#		#
+#		# name    : delay_wait_checks
+#		# scope   : multipathd
+#		# desc    : If set to a value greater than 0, when a device
+#		#           that has recently come back online fails again
+#		#           within delay_watch_checks checks, the next time it
+#		#           comes online, it will marked and delayed, and not
+#		#           used until it has passed delay_wait_checks checks.
+#		# values  : no|<n> > 0
+#		delay_wait_checks 12
+#
 #	}
 #	device {
 #		vendor			"COMPAQ  "
diff --git a/multipath.conf.defaults b/multipath.conf.defaults
index 9244f71..5f43c57 100644
--- a/multipath.conf.defaults
+++ b/multipath.conf.defaults
@@ -27,6 +27,8 @@
 #	retain_attached_hw_handler no
 #	detect_prio no
 #	config_dir "/etc/multipath/conf.d"
+#	delay_watch_checks no
+#	delay_wait_checks no
 #}
 #blacklist {
 #	devnode "^(ram|raw|loop|fd|md|dm-|sr|scd|st)[0-9]*"
diff --git a/multipath/multipath.conf.5 b/multipath/multipath.conf.5
index 4eb238d..3fe56bc 100644
--- a/multipath/multipath.conf.5
+++ b/multipath/multipath.conf.5
@@ -439,6 +439,25 @@ alphabetically for file ending in ".conf" and it will read configuration
 information from them, just as if it was in /etc/multipath.conf.  config_dir
 must either be "" or a fully qualified directory name. Default is
 .I "/etc/multipath/conf.d"
+.TP
+.B delay_watch_checks
+If set to a value greater than 0, multipathd will watch paths that have
+recently become valid for this many checks.  If they fail again while they are
+being watched, when they next become valid, they will not be used until they
+have stayed up for
+.I delay_wait_checks
+checks. Default is
+.I no
+.TP
+.B delay_wait_checks
+If set to a value greater than 0, when a device that has recently come back
+online fails again within
+.I delay_watch_checks
+checks, the next time it comes back online, it will marked and delayed, and not
+used until it has passed
+.I delay_wait_checks
+checks. Default is
+.I no
 .
 .SH "blacklist section"
 The
@@ -559,6 +578,10 @@ section:
 .B reservation_key
 .TP
 .B deferred_remove
+.TP
+.B delay_watch_checks
+.TP
+.B delay_wait_checks
 .RE
 .PD
 .LP
@@ -651,6 +674,10 @@ section:
 .B detect_prio
 .TP
 .B deferred_remove
+.TP
+.B delay_watch_checks
+.TP
+.B delay_wait_checks
 .RE
 .PD
 .LP
@@ -706,6 +733,10 @@ sections:
 .B detect_prio
 .TP
 .B deferred_remove
+.TP
+.B delay_watch_checks
+.TP
+.B delay_wait_checks
 .RE
 .PD
 .LP
diff --git a/multipathd/main.c b/multipathd/main.c
index 7429f66..aac8a19 100644
--- a/multipathd/main.c
+++ b/multipathd/main.c
@@ -192,7 +192,8 @@ sync_map_state(struct multipath *mpp)
 	vector_foreach_slot (mpp->pg, pgp, i){
 		vector_foreach_slot (pgp->paths, pp, j){
 			if (pp->state == PATH_UNCHECKED || 
-			    pp->state == PATH_WILD)
+			    pp->state == PATH_WILD ||
+			    pp->state == PATH_DELAYED)
 				continue;
 			if ((pp->dmstate == PSTATE_FAILED ||
 			     pp->dmstate == PSTATE_UNDEF) &&
@@ -1184,6 +1185,16 @@ check_path (struct vectors * vecs, struct path * pp)
 	if (!pp->mpp)
 		return 0;
 
+	if ((newstate == PATH_UP || newstate == PATH_GHOST) &&
+	     pp->wait_checks > 0) {
+		if (pp->mpp && pp->mpp->nr_active > 0) {
+			pp->state = PATH_DELAYED;
+			pp->wait_checks--;
+			return 1;
+		} else
+			pp->wait_checks = 0;
+	}
+
 	pp->chkrstate = newstate;
 	if (newstate != pp->state) {
 		int oldstate = pp->state;
@@ -1203,9 +1214,14 @@ check_path (struct vectors * vecs, struct path * pp)
 			 * proactively fail path in the DM
 			 */
 			if (oldstate == PATH_UP ||
-			    oldstate == PATH_GHOST)
+			    oldstate == PATH_GHOST) {
 				fail_path(pp, 1);
-			else
+				if (pp->mpp->delay_wait_checks > 0 &&
+				    pp->watch_checks > 0) {
+					pp->wait_checks = pp->mpp->delay_wait_checks;
+					pp->watch_checks = 0;
+				}
+			}else
 				fail_path(pp, 0);
 
 			/*
@@ -1232,11 +1248,15 @@ check_path (struct vectors * vecs, struct path * pp)
 		 * reinstate this path
 		 */
 		if (oldstate != PATH_UP &&
-		    oldstate != PATH_GHOST)
+		    oldstate != PATH_GHOST) {
+			if (pp->mpp->delay_watch_checks > 0)
+				pp->watch_checks = pp->mpp->delay_watch_checks;
 			reinstate_path(pp, 1);
-		else
+		} else {
+			if (pp->watch_checks > 0)
+				pp->watch_checks--;
 			reinstate_path(pp, 0);
-
+		}
 		new_path_up = 1;
 
 		if (oldchkrstate != PATH_UP && oldchkrstate != PATH_GHOST)
@@ -1269,6 +1289,8 @@ check_path (struct vectors * vecs, struct path * pp)
 				condlog(4, "%s: delay next check %is",
 					pp->dev_t, pp->checkint);
 			}
+			if (pp->watch_checks > 0)
+				pp->watch_checks--;
 			pp->tick = pp->checkint;
 		}
 	}
-- 
1.8.3.1




More information about the dm-devel mailing list