[dm-devel] [PATCH V2 0/6] allowing path checking to be interrupted.

Wu Guanghao wuguanghao3 at huawei.com
Mon Oct 24 02:22:26 UTC 2022


Friendly ping ...
Is the series of patches for vecs->lock ready to be merged into mainline?
Thanks

在 2022/10/17 19:31, Wu Guanghao 写道:
> 
> 
> 在 2022/9/20 17:20, Wu Guanghao 写道:
>>
>>
>> 在 2022/9/16 2:17, Benjamin Marzinski 写道:
>>> On Thu, Sep 15, 2022 at 02:56:36PM +0800, Wu Guanghao wrote:
>>>> Sorry for the late feedback.
>>>>
>>>> The version we are currently testing is 0.8.4, so we only merge the
>>>> first 3 patches in this series of patches. Then after the actual test,
>>>> it was found that the effect improvement is not very obvious.
>>>>
>>>> Test environment: 1000 multipath devices, 16 paths per device
>>>> Test command:  Aggregate multipath devices using multipathd add path
>>>> Time consuming (time for 16 paths to aggregate 1 multipath device):
>>>> 1. Original:
>>>> 	< 4s:   76.9%;
>>>> 	4s~10s: 18.4%;
>>>> 	>10s:	 4.7%;
>>>> 2. After using the patches:
>>>> 	< 4s:	79.1%;
>>>> 	4s~10s: 18.2%;
>>>> 	>10s:	 2.7%;
>>>>
>>>> >From the results, the time-consuming improvement of the patch is not obvious,
>>>> so we made a few changes to the patch and it worked as expected. The modification
>>>> of the patch is as follows:
>>>>
>>>> Paths_checked is changed to configurable, current setting n = 2.
>>>> Removed judgment on diff_time.
>>>> Sleep change from 10us to 5ms
>>>
>>> I worry that this is giving too much deference to the uevents. If there
>>> is a uevent storm, checking will stop for 5ms every 2 paths checked. I'm
>>> worried that this will make it take significantly longer for the the
>>> path checker to complete a cycle.  Making paths_checked configurable, so
>>> that this doesn't trigger too often does help to avoid the issue that
>>> checking the time from chk_start_time was dealing with, and makes the
>>> mechanics of this simpler.  But 5ms seems like a long time to have to
>>> wait, just to make sure that another process had the time to grab the
>>> vecs lock.  Perhaps it would be better to sleep for a shorter length of
>>> time, but in a loop, where we can check to see if progress has been
>>> made, perhaps by checking some counter of events and user commands
>>> serviced. This way, we aren't sleeping too long for no good reason.
>>> I agree with this, we are also going to adjust the sleep time, and then
>> continue the test.
> 
> We have tested the delay times of 10us, 100us and 1ms, but the results
> weren't very good. More than 30% of the luns aggregation time is greater
> than 4s. Whether the delay time can also be used as a configurable item
> and configured according to the situation.
> 
> The following is the patch content after we have modified checker_loop().
> 
> Signed-off-by: wuguanghao <wuguanghao3 at huawei.com>
> ---
>  libmultipath/config.c   |  3 ++
>  libmultipath/config.h   |  3 ++
>  libmultipath/defaults.h |  3 ++
>  libmultipath/dict.c     | 58 +++++++++++++++++++++++++++
>  libmultipath/structs.h  |  1 +
>  multipathd/main.c       | 87 +++++++++++++++++++++++++++++++++--------
>  6 files changed, 138 insertions(+), 17 deletions(-)
> 
> diff --git a/libmultipath/config.c b/libmultipath/config.c
> index 7d0d711..7658626 100644
> --- a/libmultipath/config.c
> +++ b/libmultipath/config.c
> @@ -752,6 +752,9 @@ load_config (char * file)
>  	conf->remove_retries = 0;
>  	conf->ghost_delay = DEFAULT_GHOST_DELAY;
>  	conf->all_tg_pt = DEFAULT_ALL_TG_PT;
> +	conf->check_delay_time = DEFAULT_CHECK_DELAY_TIME;
> +	conf->check_path_num_per_splice = DEFAULT_CHECK_PATH_NUM_PER_SPLICE;
> +
>  	/*
>  	 * preload default hwtable
>  	 */
> diff --git a/libmultipath/config.h b/libmultipath/config.h
> index ceecff2..a26dd99 100644
> --- a/libmultipath/config.h
> +++ b/libmultipath/config.h
> @@ -229,6 +229,9 @@ struct config {
>  	vector elist_property;
>  	vector elist_protocol;
>  	char *enable_foreign;
> +
> +	int check_delay_time;
> +	int check_path_num_per_splice;
>  };
> 
>  extern struct udev * udev;
> diff --git a/libmultipath/defaults.h b/libmultipath/defaults.h
> index 52fe05b..b3fb1bc 100644
> --- a/libmultipath/defaults.h
> +++ b/libmultipath/defaults.h
> @@ -50,6 +50,9 @@
>  #define DEFAULT_FIND_MULTIPATHS_TIMEOUT -10
>  #define DEFAULT_UNKNOWN_FIND_MULTIPATHS_TIMEOUT 1
>  #define DEFAULT_ALL_TG_PT ALL_TG_PT_OFF
> +#define DEFAULT_CHECK_DELAY_TIME 10
> +#define DEFAULT_CHECK_PATH_NUM_PER_SPLICE 1
> +
>  /* Enable all foreign libraries by default */
>  #define DEFAULT_ENABLE_FOREIGN ""
> 
> diff --git a/libmultipath/dict.c b/libmultipath/dict.c
> index 2c8ea10..2262caa 100644
> --- a/libmultipath/dict.c
> +++ b/libmultipath/dict.c
> @@ -1407,6 +1407,46 @@ def_uxsock_timeout_handler(struct config *conf, vector strvec)
>  	free(buff);
>  	return 0;
>  }
> +static int
> +def_check_delay_time_handler(struct config *conf, vector strvec)
> +{
> +	int local_check_delay_time;
> +	char *buff;
> +
> +	buff = set_value(strvec);
> +	if (!buff)
> +		return 1;
> +
> +	if (sscanf(buff, "%u", &local_check_delay_time) == 1 &&
> +	    local_check_delay_time > DEFAULT_CHECK_DELAY_TIME)
> +		conf->check_delay_time = local_check_delay_time;
> +	else
> +		conf->check_delay_time = DEFAULT_CHECK_DELAY_TIME;
> +
> +	free(buff);
> +	return 0;
> +}
> +
> +static int
> +def_check_path_num_per_splice_handler(struct config *conf, vector strvec)
> +{
> +	int local_check_path_num_per_splice;
> +	char *buff;
> +
> +	buff = set_value(strvec);
> +	if (!buff)
> +		return 1;
> +
> +	if (sscanf(buff, "%u", &local_check_path_num_per_splice) == 1 &&
> +	    local_check_path_num_per_splice > DEFAULT_CHECK_PATH_NUM_PER_SPLICE)
> +		conf->check_path_num_per_splice = local_check_path_num_per_splice;
> +	else
> +		conf->check_path_num_per_splice = DEFAULT_CHECK_PATH_NUM_PER_SPLICE;
> +
> +	free(buff);
> +	return 0;
> +}
> +
> 
>  static int
>  hw_vpd_vendor_handler(struct config *conf, vector strvec)
> @@ -1546,6 +1586,21 @@ snprint_def_uxsock_timeout(struct config *conf, char * buff, int len,
>  	return snprintf(buff, len, "%u", conf->uxsock_timeout);
>  }
> 
> +static int
> +snprint_def_check_delay_time(struct config *conf, char * buff, int len,
> +			   const void *data)
> +{
> +	return snprintf(buff, len, "%u", conf->check_delay_time);
> +}
> +
> +static int
> +snprint_def_check_path_num_per_splice(struct config *conf, char * buff, int len,
> +			   const void *data)
> +{
> +	return snprintf(buff, len, "%u", conf->check_path_num_per_splice);
> +}
> +
> +
>  static int
>  snprint_ble_simple (struct config *conf, char * buff, int len,
>  		    const void * data)
> @@ -1804,6 +1859,9 @@ init_keywords(vector keywords)
>  	install_keyword("enable_foreign", &def_enable_foreign_handler,
>  			&snprint_def_enable_foreign);
>  	install_keyword("marginal_pathgroups", &def_marginal_pathgroups_handler, &snprint_def_marginal_pathgroups);
> +	install_keyword("check_delay_time", &def_check_delay_time_handler, &snprint_def_check_delay_time);
> +	install_keyword("check_path_num_per_splice", &def_check_path_num_per_splice_handler, &snprint_def_check_path_num_per_splice);
> +
>  	__deprecated install_keyword("default_selector", &def_selector_handler, NULL);
>  	__deprecated install_keyword("default_path_grouping_policy", &def_pgpolicy_handler, NULL);
>  	__deprecated install_keyword("default_uid_attribute", &def_uid_attribute_handler, NULL);
> diff --git a/libmultipath/structs.h b/libmultipath/structs.h
> index 11e516a..391de96 100644
> --- a/libmultipath/structs.h
> +++ b/libmultipath/structs.h
> @@ -288,6 +288,7 @@ struct path {
>  	int find_multipaths_timeout;
>  	int marginal;
>  	int vpd_vendor_id;
> +	bool is_checked;
>  	/* configlet pointers */
>  	vector hwe;
>  	struct gen_path generic_path;
> diff --git a/multipathd/main.c b/multipathd/main.c
> index 5035d5b..865b7b5 100644
> --- a/multipathd/main.c
> +++ b/multipathd/main.c
> @@ -2388,6 +2388,11 @@ check_path (struct vectors * vecs, struct path * pp, unsigned int ticks)
>  	}
>  	return 1;
>  }
> +enum checker_state {
> +	CHECKER_STARTING,
> +	CHECKER_RUNNING,
> +	CHECKER_FINISHED,
> +};
> 
>  static void *
>  checkerloop (void *ap)
> @@ -2395,11 +2400,11 @@ checkerloop (void *ap)
>  	struct vectors *vecs;
>  	struct path *pp;
>  	int count = 0;
> -	unsigned int i;
>  	struct timespec last_time;
>  	struct config *conf;
>  	int foreign_tick = 0;
>  	bool use_watchdog;
> +	int check_delay_time, check_path_num_per_splice;
> 
>  	pthread_cleanup_push(rcu_unregister, NULL);
>  	rcu_register_thread();
> @@ -2414,12 +2419,15 @@ checkerloop (void *ap)
>  	/* use_watchdog is set from process environment and never changes */
>  	conf = get_multipath_config();
>  	use_watchdog = conf->use_watchdog;
> +	check_delay_time = conf->check_delay_time;
> +	check_path_num_per_splice = conf->check_path_num_per_splice;
>  	put_multipath_config(conf);
> 
>  	while (1) {
>  		struct timespec diff_time, start_time, end_time;
> -		int num_paths = 0, strict_timing, rc = 0;
> +		int num_paths = 0, strict_timing, rc = 0, i = 0;
>  		unsigned int ticks = 0;
> +		enum checker_state checker_state = CHECKER_STARTING;
> 
>  		get_monotonic_time(&start_time);
>  		if (start_time.tv_sec && last_time.tv_sec) {
> @@ -2443,21 +2451,66 @@ checkerloop (void *ap)
>  		} else if (rc == EINVAL)
>  			/* daemon shutdown */
>  			break;
> -
> -		pthread_cleanup_push(cleanup_lock, &vecs->lock);
> -		lock(&vecs->lock);
> -		pthread_testcancel();
> -		vector_foreach_slot (vecs->pathvec, pp, i) {
> -			rc = check_path(vecs, pp, ticks);
> -			if (rc < 0) {
> -				vector_del_slot(vecs->pathvec, i);
> -				free_path(pp);
> -				i--;
> -			} else
> -				num_paths += rc;
> -		}
> -		lock_cleanup_pop(vecs->lock);
> -
> +		condlog(4, "check_delay_time is %u, check_path_num_per_splice is %u", check_delay_time, check_path_num_per_splice);
> +
> +		while (checker_state != CHECKER_FINISHED) {
> +			unsigned int paths_checked = 0;
> +			struct timespec chk_start_time;
> +			pthread_cleanup_push(cleanup_lock, &vecs->lock);
> +			lock(&vecs->lock);
> +			pthread_testcancel();
> +			get_monotonic_time(&chk_start_time);
> +			if (checker_state == CHECKER_STARTING) {
> +				vector_foreach_slot(vecs->pathvec, pp, i)
> +					pp->is_checked = false;
> +				i = 0;
> +				checker_state = CHECKER_RUNNING;
> +			} else {
> +				/*
> +				 * Paths could have been removed since we
> +				 * dropped the lock. Find the path to continue
> +				 * checking at. Since paths can be removed from
> +				 * anywhere in the vector, but can only be added
> +				 * at the end, the last checked path must be
> +				 * between its old location, and the start or
> +				 * the vector.
> +				 */
> +				if (i >= VECTOR_SIZE(vecs->pathvec))
> +					i = VECTOR_SIZE(vecs->pathvec) - 1;
> +				while ((pp = VECTOR_SLOT(vecs->pathvec, i))) {
> +					if (pp->is_checked == true)
> +						break;
> +					i--;
> +				}
> +				i++;
> +			}
> +			vector_foreach_slot_after (vecs->pathvec, pp, i) {
> +				pp->is_checked = true;
> +				rc = check_path(vecs, pp, ticks);
> +				if (rc < 0) {
> +					condlog(1, "%s: check_path() failed, removing",
> +						pp->dev);
> +					vector_del_slot(vecs->pathvec, i);
> +					free_path(pp);
> +					i--;
> +				} else
> +					num_paths += rc;
> +				if (++paths_checked % check_path_num_per_splice == 0 &&
> +				    lock_has_waiters(&vecs->lock)) {
> +					get_monotonic_time(&end_time);
> +					timespecsub(&end_time, &chk_start_time,
> +						    &diff_time);
> +					goto unlock;
> +				}
> +			}
> +			checker_state = CHECKER_FINISHED;
> +unlock:
> +			lock_cleanup_pop(vecs->lock);
> +			if (checker_state != CHECKER_FINISHED) {
> +				/* Yield to waiters */
> +				usleep(check_delay_time);
> +			}
> +		}
>  		pthread_cleanup_push(cleanup_lock, &vecs->lock);
>  		lock(&vecs->lock);
>  		pthread_testcancel();
> 



More information about the dm-devel mailing list