[dm-devel] [PATCH V2 0/6] allowing path checking to be interrupted.

Wu Guanghao wuguanghao3 at huawei.com
Mon Oct 17 11:31:07 UTC 2022



在 2022/9/20 17:20, Wu Guanghao 写道:
> 
> 
> 在 2022/9/16 2:17, Benjamin Marzinski 写道:
>> On Thu, Sep 15, 2022 at 02:56:36PM +0800, Wu Guanghao wrote:
>>> Sorry for the late feedback.
>>>
>>> The version we are currently testing is 0.8.4, so we only merge the
>>> first 3 patches in this series of patches. Then after the actual test,
>>> it was found that the effect improvement is not very obvious.
>>>
>>> Test environment: 1000 multipath devices, 16 paths per device
>>> Test command:  Aggregate multipath devices using multipathd add path
>>> Time consuming (time for 16 paths to aggregate 1 multipath device):
>>> 1. Original:
>>> 	< 4s:   76.9%;
>>> 	4s~10s: 18.4%;
>>> 	>10s:	 4.7%;
>>> 2. After using the patches:
>>> 	< 4s:	79.1%;
>>> 	4s~10s: 18.2%;
>>> 	>10s:	 2.7%;
>>>
>>> >From the results, the time-consuming improvement of the patch is not obvious,
>>> so we made a few changes to the patch and it worked as expected. The modification
>>> of the patch is as follows:
>>>
>>> Paths_checked is changed to configurable, current setting n = 2.
>>> Removed judgment on diff_time.
>>> Sleep change from 10us to 5ms
>>
>> I worry that this is giving too much deference to the uevents. If there
>> is a uevent storm, checking will stop for 5ms every 2 paths checked. I'm
>> worried that this will make it take significantly longer for the the
>> path checker to complete a cycle.  Making paths_checked configurable, so
>> that this doesn't trigger too often does help to avoid the issue that
>> checking the time from chk_start_time was dealing with, and makes the
>> mechanics of this simpler.  But 5ms seems like a long time to have to
>> wait, just to make sure that another process had the time to grab the
>> vecs lock.  Perhaps it would be better to sleep for a shorter length of
>> time, but in a loop, where we can check to see if progress has been
>> made, perhaps by checking some counter of events and user commands
>> serviced. This way, we aren't sleeping too long for no good reason.
>> I agree with this, we are also going to adjust the sleep time, and then
> continue the test.

We have tested the delay times of 10us, 100us and 1ms, but the results
weren't very good. More than 30% of the luns aggregation time is greater
than 4s. Whether the delay time can also be used as a configurable item
and configured according to the situation.

The following is the patch content after we have modified checker_loop().

Signed-off-by: wuguanghao <wuguanghao3 at huawei.com>
---
 libmultipath/config.c   |  3 ++
 libmultipath/config.h   |  3 ++
 libmultipath/defaults.h |  3 ++
 libmultipath/dict.c     | 58 +++++++++++++++++++++++++++
 libmultipath/structs.h  |  1 +
 multipathd/main.c       | 87 +++++++++++++++++++++++++++++++++--------
 6 files changed, 138 insertions(+), 17 deletions(-)

diff --git a/libmultipath/config.c b/libmultipath/config.c
index 7d0d711..7658626 100644
--- a/libmultipath/config.c
+++ b/libmultipath/config.c
@@ -752,6 +752,9 @@ load_config (char * file)
 	conf->remove_retries = 0;
 	conf->ghost_delay = DEFAULT_GHOST_DELAY;
 	conf->all_tg_pt = DEFAULT_ALL_TG_PT;
+	conf->check_delay_time = DEFAULT_CHECK_DELAY_TIME;
+	conf->check_path_num_per_splice = DEFAULT_CHECK_PATH_NUM_PER_SPLICE;
+
 	/*
 	 * preload default hwtable
 	 */
diff --git a/libmultipath/config.h b/libmultipath/config.h
index ceecff2..a26dd99 100644
--- a/libmultipath/config.h
+++ b/libmultipath/config.h
@@ -229,6 +229,9 @@ struct config {
 	vector elist_property;
 	vector elist_protocol;
 	char *enable_foreign;
+
+	int check_delay_time;
+	int check_path_num_per_splice;
 };

 extern struct udev * udev;
diff --git a/libmultipath/defaults.h b/libmultipath/defaults.h
index 52fe05b..b3fb1bc 100644
--- a/libmultipath/defaults.h
+++ b/libmultipath/defaults.h
@@ -50,6 +50,9 @@
 #define DEFAULT_FIND_MULTIPATHS_TIMEOUT -10
 #define DEFAULT_UNKNOWN_FIND_MULTIPATHS_TIMEOUT 1
 #define DEFAULT_ALL_TG_PT ALL_TG_PT_OFF
+#define DEFAULT_CHECK_DELAY_TIME 10
+#define DEFAULT_CHECK_PATH_NUM_PER_SPLICE 1
+
 /* Enable all foreign libraries by default */
 #define DEFAULT_ENABLE_FOREIGN ""

diff --git a/libmultipath/dict.c b/libmultipath/dict.c
index 2c8ea10..2262caa 100644
--- a/libmultipath/dict.c
+++ b/libmultipath/dict.c
@@ -1407,6 +1407,46 @@ def_uxsock_timeout_handler(struct config *conf, vector strvec)
 	free(buff);
 	return 0;
 }
+static int
+def_check_delay_time_handler(struct config *conf, vector strvec)
+{
+	int local_check_delay_time;
+	char *buff;
+
+	buff = set_value(strvec);
+	if (!buff)
+		return 1;
+
+	if (sscanf(buff, "%u", &local_check_delay_time) == 1 &&
+	    local_check_delay_time > DEFAULT_CHECK_DELAY_TIME)
+		conf->check_delay_time = local_check_delay_time;
+	else
+		conf->check_delay_time = DEFAULT_CHECK_DELAY_TIME;
+
+	free(buff);
+	return 0;
+}
+
+static int
+def_check_path_num_per_splice_handler(struct config *conf, vector strvec)
+{
+	int local_check_path_num_per_splice;
+	char *buff;
+
+	buff = set_value(strvec);
+	if (!buff)
+		return 1;
+
+	if (sscanf(buff, "%u", &local_check_path_num_per_splice) == 1 &&
+	    local_check_path_num_per_splice > DEFAULT_CHECK_PATH_NUM_PER_SPLICE)
+		conf->check_path_num_per_splice = local_check_path_num_per_splice;
+	else
+		conf->check_path_num_per_splice = DEFAULT_CHECK_PATH_NUM_PER_SPLICE;
+
+	free(buff);
+	return 0;
+}
+

 static int
 hw_vpd_vendor_handler(struct config *conf, vector strvec)
@@ -1546,6 +1586,21 @@ snprint_def_uxsock_timeout(struct config *conf, char * buff, int len,
 	return snprintf(buff, len, "%u", conf->uxsock_timeout);
 }

+static int
+snprint_def_check_delay_time(struct config *conf, char * buff, int len,
+			   const void *data)
+{
+	return snprintf(buff, len, "%u", conf->check_delay_time);
+}
+
+static int
+snprint_def_check_path_num_per_splice(struct config *conf, char * buff, int len,
+			   const void *data)
+{
+	return snprintf(buff, len, "%u", conf->check_path_num_per_splice);
+}
+
+
 static int
 snprint_ble_simple (struct config *conf, char * buff, int len,
 		    const void * data)
@@ -1804,6 +1859,9 @@ init_keywords(vector keywords)
 	install_keyword("enable_foreign", &def_enable_foreign_handler,
 			&snprint_def_enable_foreign);
 	install_keyword("marginal_pathgroups", &def_marginal_pathgroups_handler, &snprint_def_marginal_pathgroups);
+	install_keyword("check_delay_time", &def_check_delay_time_handler, &snprint_def_check_delay_time);
+	install_keyword("check_path_num_per_splice", &def_check_path_num_per_splice_handler, &snprint_def_check_path_num_per_splice);
+
 	__deprecated install_keyword("default_selector", &def_selector_handler, NULL);
 	__deprecated install_keyword("default_path_grouping_policy", &def_pgpolicy_handler, NULL);
 	__deprecated install_keyword("default_uid_attribute", &def_uid_attribute_handler, NULL);
diff --git a/libmultipath/structs.h b/libmultipath/structs.h
index 11e516a..391de96 100644
--- a/libmultipath/structs.h
+++ b/libmultipath/structs.h
@@ -288,6 +288,7 @@ struct path {
 	int find_multipaths_timeout;
 	int marginal;
 	int vpd_vendor_id;
+	bool is_checked;
 	/* configlet pointers */
 	vector hwe;
 	struct gen_path generic_path;
diff --git a/multipathd/main.c b/multipathd/main.c
index 5035d5b..865b7b5 100644
--- a/multipathd/main.c
+++ b/multipathd/main.c
@@ -2388,6 +2388,11 @@ check_path (struct vectors * vecs, struct path * pp, unsigned int ticks)
 	}
 	return 1;
 }
+enum checker_state {
+	CHECKER_STARTING,
+	CHECKER_RUNNING,
+	CHECKER_FINISHED,
+};

 static void *
 checkerloop (void *ap)
@@ -2395,11 +2400,11 @@ checkerloop (void *ap)
 	struct vectors *vecs;
 	struct path *pp;
 	int count = 0;
-	unsigned int i;
 	struct timespec last_time;
 	struct config *conf;
 	int foreign_tick = 0;
 	bool use_watchdog;
+	int check_delay_time, check_path_num_per_splice;

 	pthread_cleanup_push(rcu_unregister, NULL);
 	rcu_register_thread();
@@ -2414,12 +2419,15 @@ checkerloop (void *ap)
 	/* use_watchdog is set from process environment and never changes */
 	conf = get_multipath_config();
 	use_watchdog = conf->use_watchdog;
+	check_delay_time = conf->check_delay_time;
+	check_path_num_per_splice = conf->check_path_num_per_splice;
 	put_multipath_config(conf);

 	while (1) {
 		struct timespec diff_time, start_time, end_time;
-		int num_paths = 0, strict_timing, rc = 0;
+		int num_paths = 0, strict_timing, rc = 0, i = 0;
 		unsigned int ticks = 0;
+		enum checker_state checker_state = CHECKER_STARTING;

 		get_monotonic_time(&start_time);
 		if (start_time.tv_sec && last_time.tv_sec) {
@@ -2443,21 +2451,66 @@ checkerloop (void *ap)
 		} else if (rc == EINVAL)
 			/* daemon shutdown */
 			break;
-
-		pthread_cleanup_push(cleanup_lock, &vecs->lock);
-		lock(&vecs->lock);
-		pthread_testcancel();
-		vector_foreach_slot (vecs->pathvec, pp, i) {
-			rc = check_path(vecs, pp, ticks);
-			if (rc < 0) {
-				vector_del_slot(vecs->pathvec, i);
-				free_path(pp);
-				i--;
-			} else
-				num_paths += rc;
-		}
-		lock_cleanup_pop(vecs->lock);
-
+		condlog(4, "check_delay_time is %u, check_path_num_per_splice is %u", check_delay_time, check_path_num_per_splice);
+
+		while (checker_state != CHECKER_FINISHED) {
+			unsigned int paths_checked = 0;
+			struct timespec chk_start_time;
+			pthread_cleanup_push(cleanup_lock, &vecs->lock);
+			lock(&vecs->lock);
+			pthread_testcancel();
+			get_monotonic_time(&chk_start_time);
+			if (checker_state == CHECKER_STARTING) {
+				vector_foreach_slot(vecs->pathvec, pp, i)
+					pp->is_checked = false;
+				i = 0;
+				checker_state = CHECKER_RUNNING;
+			} else {
+				/*
+				 * Paths could have been removed since we
+				 * dropped the lock. Find the path to continue
+				 * checking at. Since paths can be removed from
+				 * anywhere in the vector, but can only be added
+				 * at the end, the last checked path must be
+				 * between its old location, and the start or
+				 * the vector.
+				 */
+				if (i >= VECTOR_SIZE(vecs->pathvec))
+					i = VECTOR_SIZE(vecs->pathvec) - 1;
+				while ((pp = VECTOR_SLOT(vecs->pathvec, i))) {
+					if (pp->is_checked == true)
+						break;
+					i--;
+				}
+				i++;
+			}
+			vector_foreach_slot_after (vecs->pathvec, pp, i) {
+				pp->is_checked = true;
+				rc = check_path(vecs, pp, ticks);
+				if (rc < 0) {
+					condlog(1, "%s: check_path() failed, removing",
+						pp->dev);
+					vector_del_slot(vecs->pathvec, i);
+					free_path(pp);
+					i--;
+				} else
+					num_paths += rc;
+				if (++paths_checked % check_path_num_per_splice == 0 &&
+				    lock_has_waiters(&vecs->lock)) {
+					get_monotonic_time(&end_time);
+					timespecsub(&end_time, &chk_start_time,
+						    &diff_time);
+					goto unlock;
+				}
+			}
+			checker_state = CHECKER_FINISHED;
+unlock:
+			lock_cleanup_pop(vecs->lock);
+			if (checker_state != CHECKER_FINISHED) {
+				/* Yield to waiters */
+				usleep(check_delay_time);
+			}
+		}
 		pthread_cleanup_push(cleanup_lock, &vecs->lock);
 		lock(&vecs->lock);
 		pthread_testcancel();
-- 
2.27.0



More information about the dm-devel mailing list