[dm-devel] [PATCH V2 0/6] allowing path checking to be interrupted.
Wu Guanghao
wuguanghao3 at huawei.com
Mon Oct 17 11:31:07 UTC 2022
在 2022/9/20 17:20, Wu Guanghao 写道:
>
>
> 在 2022/9/16 2:17, Benjamin Marzinski 写道:
>> On Thu, Sep 15, 2022 at 02:56:36PM +0800, Wu Guanghao wrote:
>>> Sorry for the late feedback.
>>>
>>> The version we are currently testing is 0.8.4, so we only merge the
>>> first 3 patches in this series of patches. Then after the actual test,
>>> it was found that the effect improvement is not very obvious.
>>>
>>> Test environment: 1000 multipath devices, 16 paths per device
>>> Test command: Aggregate multipath devices using multipathd add path
>>> Time consuming (time for 16 paths to aggregate 1 multipath device):
>>> 1. Original:
>>> < 4s: 76.9%;
>>> 4s~10s: 18.4%;
>>> >10s: 4.7%;
>>> 2. After using the patches:
>>> < 4s: 79.1%;
>>> 4s~10s: 18.2%;
>>> >10s: 2.7%;
>>>
>>> >From the results, the time-consuming improvement of the patch is not obvious,
>>> so we made a few changes to the patch and it worked as expected. The modification
>>> of the patch is as follows:
>>>
>>> Paths_checked is changed to configurable, current setting n = 2.
>>> Removed judgment on diff_time.
>>> Sleep change from 10us to 5ms
>>
>> I worry that this is giving too much deference to the uevents. If there
>> is a uevent storm, checking will stop for 5ms every 2 paths checked. I'm
>> worried that this will make it take significantly longer for the the
>> path checker to complete a cycle. Making paths_checked configurable, so
>> that this doesn't trigger too often does help to avoid the issue that
>> checking the time from chk_start_time was dealing with, and makes the
>> mechanics of this simpler. But 5ms seems like a long time to have to
>> wait, just to make sure that another process had the time to grab the
>> vecs lock. Perhaps it would be better to sleep for a shorter length of
>> time, but in a loop, where we can check to see if progress has been
>> made, perhaps by checking some counter of events and user commands
>> serviced. This way, we aren't sleeping too long for no good reason.
>> I agree with this, we are also going to adjust the sleep time, and then
> continue the test.
We have tested the delay times of 10us, 100us and 1ms, but the results
weren't very good. More than 30% of the luns aggregation time is greater
than 4s. Whether the delay time can also be used as a configurable item
and configured according to the situation.
The following is the patch content after we have modified checker_loop().
Signed-off-by: wuguanghao <wuguanghao3 at huawei.com>
---
libmultipath/config.c | 3 ++
libmultipath/config.h | 3 ++
libmultipath/defaults.h | 3 ++
libmultipath/dict.c | 58 +++++++++++++++++++++++++++
libmultipath/structs.h | 1 +
multipathd/main.c | 87 +++++++++++++++++++++++++++++++++--------
6 files changed, 138 insertions(+), 17 deletions(-)
diff --git a/libmultipath/config.c b/libmultipath/config.c
index 7d0d711..7658626 100644
--- a/libmultipath/config.c
+++ b/libmultipath/config.c
@@ -752,6 +752,9 @@ load_config (char * file)
conf->remove_retries = 0;
conf->ghost_delay = DEFAULT_GHOST_DELAY;
conf->all_tg_pt = DEFAULT_ALL_TG_PT;
+ conf->check_delay_time = DEFAULT_CHECK_DELAY_TIME;
+ conf->check_path_num_per_splice = DEFAULT_CHECK_PATH_NUM_PER_SPLICE;
+
/*
* preload default hwtable
*/
diff --git a/libmultipath/config.h b/libmultipath/config.h
index ceecff2..a26dd99 100644
--- a/libmultipath/config.h
+++ b/libmultipath/config.h
@@ -229,6 +229,9 @@ struct config {
vector elist_property;
vector elist_protocol;
char *enable_foreign;
+
+ int check_delay_time;
+ int check_path_num_per_splice;
};
extern struct udev * udev;
diff --git a/libmultipath/defaults.h b/libmultipath/defaults.h
index 52fe05b..b3fb1bc 100644
--- a/libmultipath/defaults.h
+++ b/libmultipath/defaults.h
@@ -50,6 +50,9 @@
#define DEFAULT_FIND_MULTIPATHS_TIMEOUT -10
#define DEFAULT_UNKNOWN_FIND_MULTIPATHS_TIMEOUT 1
#define DEFAULT_ALL_TG_PT ALL_TG_PT_OFF
+#define DEFAULT_CHECK_DELAY_TIME 10
+#define DEFAULT_CHECK_PATH_NUM_PER_SPLICE 1
+
/* Enable all foreign libraries by default */
#define DEFAULT_ENABLE_FOREIGN ""
diff --git a/libmultipath/dict.c b/libmultipath/dict.c
index 2c8ea10..2262caa 100644
--- a/libmultipath/dict.c
+++ b/libmultipath/dict.c
@@ -1407,6 +1407,46 @@ def_uxsock_timeout_handler(struct config *conf, vector strvec)
free(buff);
return 0;
}
+static int
+def_check_delay_time_handler(struct config *conf, vector strvec)
+{
+ int local_check_delay_time;
+ char *buff;
+
+ buff = set_value(strvec);
+ if (!buff)
+ return 1;
+
+ if (sscanf(buff, "%u", &local_check_delay_time) == 1 &&
+ local_check_delay_time > DEFAULT_CHECK_DELAY_TIME)
+ conf->check_delay_time = local_check_delay_time;
+ else
+ conf->check_delay_time = DEFAULT_CHECK_DELAY_TIME;
+
+ free(buff);
+ return 0;
+}
+
+static int
+def_check_path_num_per_splice_handler(struct config *conf, vector strvec)
+{
+ int local_check_path_num_per_splice;
+ char *buff;
+
+ buff = set_value(strvec);
+ if (!buff)
+ return 1;
+
+ if (sscanf(buff, "%u", &local_check_path_num_per_splice) == 1 &&
+ local_check_path_num_per_splice > DEFAULT_CHECK_PATH_NUM_PER_SPLICE)
+ conf->check_path_num_per_splice = local_check_path_num_per_splice;
+ else
+ conf->check_path_num_per_splice = DEFAULT_CHECK_PATH_NUM_PER_SPLICE;
+
+ free(buff);
+ return 0;
+}
+
static int
hw_vpd_vendor_handler(struct config *conf, vector strvec)
@@ -1546,6 +1586,21 @@ snprint_def_uxsock_timeout(struct config *conf, char * buff, int len,
return snprintf(buff, len, "%u", conf->uxsock_timeout);
}
+static int
+snprint_def_check_delay_time(struct config *conf, char * buff, int len,
+ const void *data)
+{
+ return snprintf(buff, len, "%u", conf->check_delay_time);
+}
+
+static int
+snprint_def_check_path_num_per_splice(struct config *conf, char * buff, int len,
+ const void *data)
+{
+ return snprintf(buff, len, "%u", conf->check_path_num_per_splice);
+}
+
+
static int
snprint_ble_simple (struct config *conf, char * buff, int len,
const void * data)
@@ -1804,6 +1859,9 @@ init_keywords(vector keywords)
install_keyword("enable_foreign", &def_enable_foreign_handler,
&snprint_def_enable_foreign);
install_keyword("marginal_pathgroups", &def_marginal_pathgroups_handler, &snprint_def_marginal_pathgroups);
+ install_keyword("check_delay_time", &def_check_delay_time_handler, &snprint_def_check_delay_time);
+ install_keyword("check_path_num_per_splice", &def_check_path_num_per_splice_handler, &snprint_def_check_path_num_per_splice);
+
__deprecated install_keyword("default_selector", &def_selector_handler, NULL);
__deprecated install_keyword("default_path_grouping_policy", &def_pgpolicy_handler, NULL);
__deprecated install_keyword("default_uid_attribute", &def_uid_attribute_handler, NULL);
diff --git a/libmultipath/structs.h b/libmultipath/structs.h
index 11e516a..391de96 100644
--- a/libmultipath/structs.h
+++ b/libmultipath/structs.h
@@ -288,6 +288,7 @@ struct path {
int find_multipaths_timeout;
int marginal;
int vpd_vendor_id;
+ bool is_checked;
/* configlet pointers */
vector hwe;
struct gen_path generic_path;
diff --git a/multipathd/main.c b/multipathd/main.c
index 5035d5b..865b7b5 100644
--- a/multipathd/main.c
+++ b/multipathd/main.c
@@ -2388,6 +2388,11 @@ check_path (struct vectors * vecs, struct path * pp, unsigned int ticks)
}
return 1;
}
+enum checker_state {
+ CHECKER_STARTING,
+ CHECKER_RUNNING,
+ CHECKER_FINISHED,
+};
static void *
checkerloop (void *ap)
@@ -2395,11 +2400,11 @@ checkerloop (void *ap)
struct vectors *vecs;
struct path *pp;
int count = 0;
- unsigned int i;
struct timespec last_time;
struct config *conf;
int foreign_tick = 0;
bool use_watchdog;
+ int check_delay_time, check_path_num_per_splice;
pthread_cleanup_push(rcu_unregister, NULL);
rcu_register_thread();
@@ -2414,12 +2419,15 @@ checkerloop (void *ap)
/* use_watchdog is set from process environment and never changes */
conf = get_multipath_config();
use_watchdog = conf->use_watchdog;
+ check_delay_time = conf->check_delay_time;
+ check_path_num_per_splice = conf->check_path_num_per_splice;
put_multipath_config(conf);
while (1) {
struct timespec diff_time, start_time, end_time;
- int num_paths = 0, strict_timing, rc = 0;
+ int num_paths = 0, strict_timing, rc = 0, i = 0;
unsigned int ticks = 0;
+ enum checker_state checker_state = CHECKER_STARTING;
get_monotonic_time(&start_time);
if (start_time.tv_sec && last_time.tv_sec) {
@@ -2443,21 +2451,66 @@ checkerloop (void *ap)
} else if (rc == EINVAL)
/* daemon shutdown */
break;
-
- pthread_cleanup_push(cleanup_lock, &vecs->lock);
- lock(&vecs->lock);
- pthread_testcancel();
- vector_foreach_slot (vecs->pathvec, pp, i) {
- rc = check_path(vecs, pp, ticks);
- if (rc < 0) {
- vector_del_slot(vecs->pathvec, i);
- free_path(pp);
- i--;
- } else
- num_paths += rc;
- }
- lock_cleanup_pop(vecs->lock);
-
+ condlog(4, "check_delay_time is %u, check_path_num_per_splice is %u", check_delay_time, check_path_num_per_splice);
+
+ while (checker_state != CHECKER_FINISHED) {
+ unsigned int paths_checked = 0;
+ struct timespec chk_start_time;
+ pthread_cleanup_push(cleanup_lock, &vecs->lock);
+ lock(&vecs->lock);
+ pthread_testcancel();
+ get_monotonic_time(&chk_start_time);
+ if (checker_state == CHECKER_STARTING) {
+ vector_foreach_slot(vecs->pathvec, pp, i)
+ pp->is_checked = false;
+ i = 0;
+ checker_state = CHECKER_RUNNING;
+ } else {
+ /*
+ * Paths could have been removed since we
+ * dropped the lock. Find the path to continue
+ * checking at. Since paths can be removed from
+ * anywhere in the vector, but can only be added
+ * at the end, the last checked path must be
+ * between its old location, and the start or
+ * the vector.
+ */
+ if (i >= VECTOR_SIZE(vecs->pathvec))
+ i = VECTOR_SIZE(vecs->pathvec) - 1;
+ while ((pp = VECTOR_SLOT(vecs->pathvec, i))) {
+ if (pp->is_checked == true)
+ break;
+ i--;
+ }
+ i++;
+ }
+ vector_foreach_slot_after (vecs->pathvec, pp, i) {
+ pp->is_checked = true;
+ rc = check_path(vecs, pp, ticks);
+ if (rc < 0) {
+ condlog(1, "%s: check_path() failed, removing",
+ pp->dev);
+ vector_del_slot(vecs->pathvec, i);
+ free_path(pp);
+ i--;
+ } else
+ num_paths += rc;
+ if (++paths_checked % check_path_num_per_splice == 0 &&
+ lock_has_waiters(&vecs->lock)) {
+ get_monotonic_time(&end_time);
+ timespecsub(&end_time, &chk_start_time,
+ &diff_time);
+ goto unlock;
+ }
+ }
+ checker_state = CHECKER_FINISHED;
+unlock:
+ lock_cleanup_pop(vecs->lock);
+ if (checker_state != CHECKER_FINISHED) {
+ /* Yield to waiters */
+ usleep(check_delay_time);
+ }
+ }
pthread_cleanup_push(cleanup_lock, &vecs->lock);
lock(&vecs->lock);
pthread_testcancel();
--
2.27.0
More information about the dm-devel
mailing list