[dm-devel] [RFC PATCH] libmultipath: drop mpp->nr_active field
Benjamin Marzinski
bmarzins at redhat.com
Thu Nov 14 19:20:29 UTC 2019
On Wed, Nov 13, 2019 at 10:23:16PM +0000, Martin Wilck wrote:
> From: Martin Wilck <mwilck at suse.com>
>
> The tracking of nr_active has turned out to be error prone and hard
> to verify. Calculating it on the fly is a quick operation, so
> do this rather than trying to track nr_active. Use a boolean
> field instead to track whether a map is currently in recovery mode.
>
> Signed-off-by: Martin Wilck <mwilck at suse.com>
> ---
> libmultipath/configure.c | 5 ++---
> libmultipath/devmapper.c | 2 +-
> libmultipath/io_err_stat.c | 4 ++--
> libmultipath/print.c | 5 +++--
> libmultipath/structs.c | 19 +++++++++++++++++++
> libmultipath/structs.h | 3 ++-
> libmultipath/structs_vec.c | 14 ++++++++++----
> multipathd/cli_handlers.c | 8 ++++----
> multipathd/main.c | 24 ++++++++----------------
> 9 files changed, 51 insertions(+), 33 deletions(-)
>
> diff --git a/libmultipath/configure.c b/libmultipath/configure.c
> index 5ac7d903..c95848a0 100644
> --- a/libmultipath/configure.c
> +++ b/libmultipath/configure.c
> @@ -401,7 +401,6 @@ int setup_map(struct multipath *mpp, char *params, int params_size,
> condlog(2, "%s: setting up map with %d/%d path checkers pending",
> mpp->alias, n_pending, n_paths);
> }
> - mpp->nr_active = pathcount(mpp, PATH_UP) + pathcount(mpp, PATH_GHOST);
>
> /*
> * ponders each path group and determine highest prio pg
> @@ -934,8 +933,8 @@ int domap(struct multipath *mpp, char *params, int is_daemon)
> }
>
> sysfs_set_max_sectors_kb(mpp, 0);
> - if (is_daemon && mpp->ghost_delay > 0 && mpp->nr_active &&
> - pathcount(mpp, PATH_GHOST) == mpp->nr_active)
> + if (is_daemon && mpp->ghost_delay > 0 && count_active_paths(mpp) &&
> + pathcount(mpp, PATH_UP) == 0)
> mpp->ghost_delay_tick = mpp->ghost_delay;
> r = dm_addmap_create(mpp, params);
>
> diff --git a/libmultipath/devmapper.c b/libmultipath/devmapper.c
> index 0f0c3a34..641dc9b6 100644
> --- a/libmultipath/devmapper.c
> +++ b/libmultipath/devmapper.c
> @@ -403,7 +403,7 @@ static uint16_t build_udev_flags(const struct multipath *mpp, int reload)
> /* DM_UDEV_DISABLE_LIBRARY_FALLBACK is added in dm_addmap */
> return (mpp->skip_kpartx == SKIP_KPARTX_ON ?
> MPATH_UDEV_NO_KPARTX_FLAG : 0) |
> - ((mpp->nr_active == 0 || mpp->ghost_delay_tick > 0)?
> + ((count_active_paths(mpp) == 0 || mpp->ghost_delay_tick > 0) ?
> MPATH_UDEV_NO_PATHS_FLAG : 0) |
> (reload && !mpp->force_udev_reload ?
> MPATH_UDEV_RELOAD_FLAG : 0);
> diff --git a/libmultipath/io_err_stat.c b/libmultipath/io_err_stat.c
> index 554b7778..7d662f1a 100644
> --- a/libmultipath/io_err_stat.c
> +++ b/libmultipath/io_err_stat.c
> @@ -383,7 +383,7 @@ int need_io_err_check(struct path *pp)
>
> if (uatomic_read(&io_err_thread_running) == 0)
> return 0;
> - if (pp->mpp->nr_active <= 0) {
> + if (count_active_paths(pp->mpp) <= 0) {
> io_err_stat_log(2, "%s: recover path early", pp->dev);
> goto recover;
> }
> @@ -481,7 +481,7 @@ static int poll_io_err_stat(struct vectors *vecs, struct io_err_stat_path *pp)
> */
> path->tick = 1;
>
> - } else if (path->mpp && path->mpp->nr_active > 0) {
> + } else if (path->mpp && count_active_paths(path->mpp) > 0) {
> io_err_stat_log(3, "%s: keep failing the dm path %s",
> path->mpp->alias, path->dev);
> path->io_err_pathfail_cnt = PATH_IO_ERR_WAITING_TO_CHECK;
> diff --git a/libmultipath/print.c b/libmultipath/print.c
> index 907469ad..05e8f385 100644
> --- a/libmultipath/print.c
> +++ b/libmultipath/print.c
> @@ -181,9 +181,10 @@ snprint_queueing (char * buff, size_t len, const struct multipath * mpp)
> return snprintf(buff, len, "-");
> else if (mpp->no_path_retry > 0) {
> if (mpp->retry_tick > 0)
> +
> return snprintf(buff, len, "%i sec",
> mpp->retry_tick);
> - else if (mpp->retry_tick == 0 && mpp->nr_active > 0)
> + else if (mpp->retry_tick == 0 && count_active_paths(mpp) > 0)
> return snprintf(buff, len, "%i chk",
> mpp->no_path_retry);
> else
> @@ -195,7 +196,7 @@ snprint_queueing (char * buff, size_t len, const struct multipath * mpp)
> static int
> snprint_nb_paths (char * buff, size_t len, const struct multipath * mpp)
> {
> - return snprint_int(buff, len, mpp->nr_active);
> + return snprint_int(buff, len, count_active_paths(mpp));
> }
>
> static int
> diff --git a/libmultipath/structs.c b/libmultipath/structs.c
> index bf7fdd73..f63bae53 100644
> --- a/libmultipath/structs.c
> +++ b/libmultipath/structs.c
> @@ -478,6 +478,25 @@ int pathcount(const struct multipath *mpp, int state)
> return count;
> }
>
> +int count_active_paths(const struct multipath *mpp)
> +{
> + struct pathgroup *pgp;
> + struct path *pp;
> + int count = 0;
> + int i, j;
> +
> + if (!mpp->pg)
> + return 0;
> +
> + vector_foreach_slot (mpp->pg, pgp, i) {
> + vector_foreach_slot (pgp->paths, pp, j) {
> + if (pp->state == PATH_UP || pp->state == PATH_GHOST)
> + count++;
> + }
> + }
> + return count;
> +}
> +
> int pathcmp(const struct pathgroup *pgp, const struct pathgroup *cpgp)
> {
> int i, j;
> diff --git a/libmultipath/structs.h b/libmultipath/structs.h
> index a3adf906..3a4d8f46 100644
> --- a/libmultipath/structs.h
> +++ b/libmultipath/structs.h
> @@ -309,7 +309,6 @@ struct multipath {
> int pgfailback;
> int failback_tick;
> int rr_weight;
> - int nr_active; /* current available(= not known as failed) paths */
> int no_path_retry; /* number of retries after all paths are down */
> int retry_tick; /* remaining times for retries */
> int disable_queueing;
> @@ -319,6 +318,7 @@ struct multipath {
> int fast_io_fail;
> int retain_hwhandler;
> int deferred_remove;
> + int in_recovery;
> int san_path_err_threshold;
> int san_path_err_forget_rate;
> int san_path_err_recovery_time;
> @@ -448,6 +448,7 @@ struct path * first_path (const struct multipath *mpp);
>
> int pathcountgr (const struct pathgroup *, int);
> int pathcount (const struct multipath *, int);
> +int count_active_paths(const struct multipath *);
> int pathcmp (const struct pathgroup *, const struct pathgroup *);
> int add_feature (char **, const char *);
> int remove_feature (char **, const char *);
> diff --git a/libmultipath/structs_vec.c b/libmultipath/structs_vec.c
> index c43b58fb..ff124e10 100644
> --- a/libmultipath/structs_vec.c
> +++ b/libmultipath/structs_vec.c
> @@ -302,6 +302,7 @@ void enter_recovery_mode(struct multipath *mpp)
> * meaning of +1: retry_tick may be decremented in checkerloop before
> * starting retry.
> */
> + mpp->in_recovery = 1;
> mpp->stat_queueing_timeouts++;
> mpp->retry_tick = mpp->no_path_retry * checkint + 1;
> condlog(1, "%s: Entering recovery mode: max_retries=%d",
> @@ -450,25 +451,30 @@ int verify_paths(struct multipath *mpp, struct vectors *vecs)
> */
> void update_queue_mode_del_path(struct multipath *mpp)
> {
> - if (--mpp->nr_active == 0) {
> + int active = count_active_paths(mpp);
> +
> + if (!mpp->in_recovery && active == 0) {
> if (mpp->no_path_retry > 0)
> enter_recovery_mode(mpp);
> else if (mpp->no_path_retry != NO_PATH_RETRY_QUEUE)
> mpp->stat_map_failures++;
> }
> - condlog(2, "%s: remaining active paths: %d", mpp->alias, mpp->nr_active);
> + condlog(2, "%s: remaining active paths: %d", mpp->alias, active);
> }
I think that this patch will end up having cases where multipathd leaves
recovery mode twice, which isn't a big deal, but can be avoided. The
issue is that before this patch, we exit recovery mode by setting
mpp->retry_tick to zero, when an active path exists and no_path_retry is
something other than NO_PATH_RETRY_FAIL. With this patch
update_queue_mode_add_path() also sets in_recovery to 0 when we exit
recovery mode, but we don't clear in_recovery in set_no_path_retry(). We
could possibly have an exit_recovery_mode() function that clears both of
these and sets queue_if_no_path, and use that just like we do with
enter_recovery_mode().
> void update_queue_mode_add_path(struct multipath *mpp)
> {
> - if (mpp->nr_active++ == 0 && mpp->no_path_retry > 0) {
> + int active = count_active_paths(mpp);
> +
> + if (mpp->in_recovery && active > 0 && mpp->no_path_retry > 0) {
> /* come back to normal mode from retry mode */
> + mpp->in_recovery = 0;
> mpp->retry_tick = 0;
> dm_queue_if_no_path(mpp->alias, 1);
> condlog(2, "%s: queue_if_no_path enabled", mpp->alias);
> condlog(1, "%s: Recovered to normal mode", mpp->alias);
> }
> - condlog(2, "%s: remaining active paths: %d", mpp->alias, mpp->nr_active);
> + condlog(2, "%s: remaining active paths: %d", mpp->alias, active);
> }
>
> vector get_used_hwes(const struct _vector *pathvec)
> diff --git a/multipathd/cli_handlers.c b/multipathd/cli_handlers.c
> index 8a899049..06f32e30 100644
> --- a/multipathd/cli_handlers.c
> +++ b/multipathd/cli_handlers.c
> @@ -1028,7 +1028,7 @@ cli_restore_queueing(void *v, char **reply, int *len, void *data)
> mpp->no_path_retry != NO_PATH_RETRY_FAIL) {
> dm_queue_if_no_path(mpp->alias, 1);
> if (mpp->no_path_retry > 0) {
> - if (mpp->nr_active > 0)
> + if (count_active_paths(mpp) > 0)
I don't think we should get here when we're in recovery mode, since
there are active paths. However, it certainly wouldn't hurt to clear
in_recovery, since we've enabled queue_if_no_path, and cleared
retry_tick. Obviously, it would take some refactoring to use an
exit_recovery_mode function here while avoiding calling
dm_queue_if_no_path(mpp->alias, 1) twice.
> mpp->retry_tick = 0;
> else
> enter_recovery_mode(mpp);
> @@ -1055,7 +1055,7 @@ cli_restore_all_queueing(void *v, char **reply, int *len, void *data)
> mpp->no_path_retry != NO_PATH_RETRY_FAIL) {
> dm_queue_if_no_path(mpp->alias, 1);
> if (mpp->no_path_retry > 0) {
> - if (mpp->nr_active > 0)
> + if (count_active_paths(mpp) > 0)
Same here.
> mpp->retry_tick = 0;
> else
> enter_recovery_mode(mpp);
> @@ -1085,7 +1085,7 @@ cli_disable_queueing(void *v, char **reply, int *len, void *data)
> return 1;
> }
>
> - if (mpp->nr_active == 0)
> + if (count_active_paths(mpp) == 0)
> mpp->stat_map_failures++;
> mpp->retry_tick = 0;
> mpp->no_path_retry = NO_PATH_RETRY_FAIL;
> @@ -1103,7 +1103,7 @@ cli_disable_all_queueing(void *v, char **reply, int *len, void *data)
>
> condlog(2, "disable queueing (operator)");
> vector_foreach_slot(vecs->mpvec, mpp, i) {
> - if (mpp->nr_active == 0)
> + if (count_active_paths(mpp) == 0)
> mpp->stat_map_failures++;
> mpp->retry_tick = 0;
> mpp->no_path_retry = NO_PATH_RETRY_FAIL;
> diff --git a/multipathd/main.c b/multipathd/main.c
> index 34a57689..540fe5f3 100644
> --- a/multipathd/main.c
> +++ b/multipathd/main.c
> @@ -413,7 +413,6 @@ static void set_no_path_retry(struct multipath *mpp)
> {
> char is_queueing = 0;
>
> - mpp->nr_active = pathcount(mpp, PATH_UP) + pathcount(mpp, PATH_GHOST);
> if (mpp->features && strstr(mpp->features, "queue_if_no_path"))
> is_queueing = 1;
>
> @@ -429,11 +428,11 @@ static void set_no_path_retry(struct multipath *mpp)
> dm_queue_if_no_path(mpp->alias, 1);
> break;
> default:
> - if (mpp->nr_active > 0) {
> + if (count_active_paths(mpp) > 0) {
It is possible to be in recovery mode here. If we got here from
ev_add_path -> __setup_multipath -> set_no_path_retry
Then the active path is a new active path, that is just getting added
now, so in_recovery may be set, and we should clear it.
> mpp->retry_tick = 0;
> if (!is_queueing)
> dm_queue_if_no_path(mpp->alias, 1);
> - } else if (is_queueing && mpp->retry_tick == 0)
> + } else if (is_queueing && !mpp->in_recovery)
> enter_recovery_mode(mpp);
> break;
> }
> @@ -1646,7 +1645,7 @@ fail_path (struct path * pp, int del_active)
> * caller must have locked the path list before calling that function
> */
> static int
> -reinstate_path (struct path * pp, int add_active)
> +reinstate_path (struct path * pp)
> {
> int ret = 0;
>
> @@ -1658,8 +1657,7 @@ reinstate_path (struct path * pp, int add_active)
> ret = 1;
> } else {
> condlog(2, "%s: reinstated", pp->dev_t);
> - if (add_active)
> - update_queue_mode_add_path(pp->mpp);
> + update_queue_mode_add_path(pp->mpp);
> }
> return ret;
> }
> @@ -1891,7 +1889,7 @@ static int check_path_reinstate_state(struct path * pp) {
>
> if (pp->disable_reinstate) {
> /* If there are no other usable paths, reinstate the path */
> - if (pp->mpp->nr_active == 0) {
> + if (count_active_paths(pp->mpp) == 0) {
> condlog(2, "%s : reinstating path early", pp->dev);
> goto reinstate_path;
> }
> @@ -1989,7 +1987,6 @@ check_path (struct vectors * vecs, struct path * pp, int ticks)
> int newstate;
> int new_path_up = 0;
> int chkr_new_path_up = 0;
> - int add_active;
> int disable_reinstate = 0;
> int oldchkrstate = pp->chkrstate;
> int retrigger_tries, checkint, max_checkint, verbosity;
> @@ -2162,7 +2159,7 @@ check_path (struct vectors * vecs, struct path * pp, int ticks)
> * paths if there are no other active paths in map.
> */
> disable_reinstate = (newstate == PATH_GHOST &&
> - pp->mpp->nr_active == 0 &&
> + count_active_paths(pp->mpp) == 0 &&
> path_get_tpgs(pp) == TPGS_IMPLICIT) ? 1 : 0;
>
> pp->chkrstate = newstate;
> @@ -2213,12 +2210,7 @@ check_path (struct vectors * vecs, struct path * pp, int ticks)
> /*
> * reinstate this path
> */
> - if (oldstate != PATH_UP &&
> - oldstate != PATH_GHOST)
> - add_active = 1;
> - else
> - add_active = 0;
> - if (!disable_reinstate && reinstate_path(pp, add_active)) {
> + if (!disable_reinstate && reinstate_path(pp)) {
> condlog(3, "%s: reload map", pp->dev);
> ev_add_path(pp, vecs, 1);
> pp->tick = 1;
> @@ -2241,7 +2233,7 @@ check_path (struct vectors * vecs, struct path * pp, int ticks)
> pp->dmstate == PSTATE_UNDEF) &&
> !disable_reinstate) {
> /* Clear IO errors */
> - if (reinstate_path(pp, 0)) {
> + if (reinstate_path(pp)) {
> condlog(3, "%s: reload map", pp->dev);
> ev_add_path(pp, vecs, 1);
> pp->tick = 1;
> --
> 2.24.0
More information about the dm-devel
mailing list