[dm-devel] [PATCH V2 10/11] multipathd: reload map if the path groups are out of order

Martin Wilck martin.wilck at suse.com
Wed Jun 7 18:59:21 UTC 2023


On Tue, 2023-06-06 at 15:13 -0500, Benjamin Marzinski wrote:
> need_switch_pathgroup() only checks if the currently used pathgroup
> is
> not the highest priority pathgroup. If it isn't, all multipathd does
> is
> instruct the kernel to switch to the correct pathgroup.  However, the
> kernel treats the pathgroups as if they were ordered by priority.
> When
> the kernel runs out of paths to use in the currently selected
> pathgroup,
> it will start checking the pathgroups in order until it finds one
> with
> usable paths.
> 
> need_switch_pathgroup() should also check if the pathgroups are out
> of
> order, and if so, multipathd should reload the map to reorder them
> correctly.
> 
> Signed-off-by: Benjamin Marzinski <bmarzins at redhat.com>
> ---
>  multipathd/main.c | 69 ++++++++++++++++++++++++++++++++++++++-------
> --
>  1 file changed, 57 insertions(+), 12 deletions(-)
> 
> diff --git a/multipathd/main.c b/multipathd/main.c
> index f603d143..05c74e9e 100644
> --- a/multipathd/main.c
> +++ b/multipathd/main.c
> @@ -395,11 +395,46 @@ void
> put_multipath_config(__attribute__((unused)) void *arg)
>         rcu_read_unlock();
>  }
>  
> +/*
> + * The path group orderings that this function finds acceptible are
> different
> + * from now select_path_group determines the best pathgroup. The
> idea here is
> + * to only trigger a kernel reload when it is obvious that the
> pathgroups would
> + * be out of order, even if all the paths were usable. Thus
> pathgroups with
> + * PRIO_UNDEF are skipped, and the number of enabled paths doesn't
> matter here.
> + */
> +bool path_groups_in_order(struct multipath *mpp)
> +{
> +       int i;
> +       struct pathgroup *pgp;
> +       bool seen_marginal_pg = false;
> +       int last_prio = INT_MAX;
> +
> +       if (VECTOR_SIZE(mpp->pg) < 2)
> +               return true;
> +
> +       vector_foreach_slot(mpp->pg, pgp, i) {
> +               /* skip pgs with PRIO_UNDEF, since this is likely
> temporary */
> +               if (!pgp->paths || pgp->priority == PRIO_UNDEF)
> +                       continue;
> +               if (pgp->marginal && !seen_marginal_pg) {
> +                       last_prio = INT_MAX;
> +                       continue;
> +               }
> +               if (seen_marginal_pg && !pgp->marginal)
> +                       return false;
> +               if (pgp->priority > last_prio)
> +                       return false;
> +               last_prio = pgp->priority;
> +       }
> +       return true;
> +}

I still don't get the logic here. What's the point of using
seen_marginal_pg if it is always false? See my previous reply to v1 of
this patch.

Regards
Martin



> +
>  static int
> -need_switch_pathgroup (struct multipath * mpp)
> +need_switch_pathgroup (struct multipath * mpp, bool *need_reload)
>  {
>         int bestpg;
>  
> +       *need_reload = false;
>         if (!mpp)
>                 return 0;
>  
> @@ -411,10 +446,9 @@ need_switch_pathgroup (struct multipath * mpp)
>                 return 0;
>  
>         mpp->bestpg = bestpg;
> -       if (mpp->bestpg != mpp->nextpg)
> -               return 1;
> +       *need_reload = !path_groups_in_order(mpp);
>  
> -       return 0;
> +       return (*need_reload || mpp->bestpg != mpp->nextpg);
>  }
>  
>  static void
> @@ -1963,20 +1997,26 @@ ghost_delay_tick(struct vectors *vecs)
>  }
>  
>  static void
> -deferred_failback_tick (vector mpvec)
> +deferred_failback_tick (struct vectors *vecs)
>  {
>         struct multipath * mpp;
>         unsigned int i;
> +       bool need_reload;
>  
> -       vector_foreach_slot (mpvec, mpp, i) {
> +       vector_foreach_slot (vecs->mpvec, mpp, i) {
>                 /*
>                  * deferred failback getting sooner
>                  */
>                 if (mpp->pgfailback > 0 && mpp->failback_tick > 0) {
>                         mpp->failback_tick--;
>  
> -                       if (!mpp->failback_tick &&
> need_switch_pathgroup(mpp))
> -                               switch_pathgroup(mpp);
> +                       if (!mpp->failback_tick &&
> +                           need_switch_pathgroup(mpp, &need_reload))
> {
> +                               if (need_reload)
> +                                       reload_and_sync_map(mpp,
> vecs);
> +                               else
> +                                       switch_pathgroup(mpp);
> +                       }
>                 }
>         }
>  }
> @@ -2219,6 +2259,7 @@ check_path (struct vectors * vecs, struct path
> * pp, unsigned int ticks)
>         struct config *conf;
>         int marginal_pathgroups, marginal_changed = 0;
>         int ret;
> +       bool need_reload;
>  
>         if (((pp->initialized == INIT_OK || pp->initialized ==
> INIT_PARTIAL ||
>               pp->initialized == INIT_REQUESTED_UDEV) && !pp->mpp) ||
> @@ -2548,13 +2589,17 @@ check_path (struct vectors * vecs, struct
> path * pp, unsigned int ticks)
>                 condlog(2, "%s: path priorities changed. reloading",
>                         pp->mpp->alias);
>                 reload_and_sync_map(pp->mpp, vecs);
> -       } else if (need_switch_pathgroup(pp->mpp)) {
> +       } else if (need_switch_pathgroup(pp->mpp, &need_reload)) {
>                 if (pp->mpp->pgfailback > 0 &&
>                     (new_path_up || pp->mpp->failback_tick <= 0))
>                         pp->mpp->failback_tick = pp->mpp->pgfailback
> + 1;
>                 else if (pp->mpp->pgfailback == -FAILBACK_IMMEDIATE
> ||
> -                        (chkr_new_path_up &&
> followover_should_failback(pp)))
> -                       switch_pathgroup(pp->mpp);
> +                        (chkr_new_path_up &&
> followover_should_failback(pp))) {
> +                       if (need_reload)
> +                               reload_and_sync_map(pp->mpp, vecs);
> +                       else
> +                               switch_pathgroup(pp->mpp);
> +               }
>         }
>         return 1;
>  }
> @@ -2680,7 +2725,7 @@ unlock:
>                 pthread_cleanup_push(cleanup_lock, &vecs->lock);
>                 lock(&vecs->lock);
>                 pthread_testcancel();
> -               deferred_failback_tick(vecs->mpvec);
> +               deferred_failback_tick(vecs);
>                 retry_count_tick(vecs->mpvec);
>                 missing_uev_wait_tick(vecs);
>                 ghost_delay_tick(vecs);





More information about the dm-devel mailing list