[dm-devel] [PATCH 4/5] multipathd: reload map if the path groups are out of order

Benjamin Marzinski bmarzins at redhat.com
Tue Jun 6 04:42:15 UTC 2023


On Mon, Jun 05, 2023 at 02:08:07PM -0500, Benjamin Marzinski wrote:
> On Wed, May 31, 2023 at 04:27:30PM +0000, Martin Wilck wrote:
> > On Wed, 2023-05-24 at 18:21 -0500, Benjamin Marzinski wrote:
> > > need_switch_pathgroup() only checks if the currently used pathgroup
> > > is
> > > not the highest priority pathgroup. If it isn't, all multipathd does
> > > is
> > > instruct the kernel to switch to the correct pathgroup.  However, the
> > > kernel treats the pathgroups as if they were ordered by priority.
> > > When
> > > the kernel runs out of paths to use in the currently selected
> > > pathgroup,
> > > it will start checking the pathgroups in order until it finds one
> > > with
> > > usable paths.
> > > 
> > > need_switch_pathgroup() should also check if the pathgroups are out
> > > of
> > > order, and if so, multipathd should reload the map to reorder them
> > > correctly.
> > > 
> > > Signed-off-by: Benjamin Marzinski <bmarzins at redhat.com>
> > > ---
> > >  libmultipath/libmultipath.version |  5 ++++
> > >  libmultipath/switchgroup.c        | 27 ++++++++++++++++++++++
> > >  libmultipath/switchgroup.h        |  1 +
> > >  multipathd/main.c                 | 38 +++++++++++++++++++++--------
> > > --
> > >  4 files changed, 59 insertions(+), 12 deletions(-)
> > > 
> > > diff --git a/libmultipath/libmultipath.version
> > > b/libmultipath/libmultipath.version
> > > index 8f72c452..38074699 100644
> > > --- a/libmultipath/libmultipath.version
> > > +++ b/libmultipath/libmultipath.version
> > > @@ -237,3 +237,8 @@ global:
> > >  local:
> > >         *;
> > >  };
> > > +
> > > +LIBMULTIPATH_19.1.0 {
> > > +global:
> > > +       path_groups_in_order;
> > > +} LIBMULTIPATH_19.0.0;
> > > diff --git a/libmultipath/switchgroup.c b/libmultipath/switchgroup.c
> > > index b1e1f39b..b1180839 100644
> > > --- a/libmultipath/switchgroup.c
> > > +++ b/libmultipath/switchgroup.c
> > > @@ -7,6 +7,33 @@
> > >  #include "structs.h"
> > >  #include "switchgroup.h"
> > >  
> > > +bool path_groups_in_order(struct multipath *mpp)
> > > +{
> > > +       int i;
> > > +       struct pathgroup *pgp;
> > > +       bool seen_marginal_pg = false;
> > > +       int last_prio = INT_MAX;
> > > +
> > > +       if (VECTOR_SIZE(mpp->pg) < 2)
> > > +               return true;
> > > +
> > > +       vector_foreach_slot(mpp->pg, pgp, i) {
> > > +               /* skip pgs with PRIO_UNDEF, since this is likely
> > > temporary */
> > > +               if (!pgp->paths || pgp->priority == PRIO_UNDEF)
> > > +                       continue;
> > > +               if (pgp->marginal && !seen_marginal_pg) {
> > > +                       last_prio = INT_MAX;
> > > +                       continue;
> > > +               }
> > > +               if (seen_marginal_pg && !pgp->marginal)
> > > +                       return false;
> > > +               if (pgp->priority > last_prio)
> > > +                       return false;
> > > +               last_prio = pgp->priority;
> > > +       }
> > > +       return true;
> > > +}
> > > +
> > >  void path_group_prio_update(struct pathgroup *pgp)
> > >  {
> > >         int i;
> > > diff --git a/libmultipath/switchgroup.h b/libmultipath/switchgroup.h
> > > index 9365e2e2..43dbb6c9 100644
> > > --- a/libmultipath/switchgroup.h
> > > +++ b/libmultipath/switchgroup.h
> > > @@ -1,2 +1,3 @@
> > >  void path_group_prio_update (struct pathgroup * pgp);
> > >  int select_path_group (struct multipath * mpp);
> > > +bool path_groups_in_order(struct multipath *mpp);
> > > diff --git a/multipathd/main.c b/multipathd/main.c
> > > index e7c272ad..2ea7c76b 100644
> > > --- a/multipathd/main.c
> > > +++ b/multipathd/main.c
> > > @@ -396,7 +396,7 @@ void put_multipath_config(__attribute__((unused))
> > > void *arg)
> > >  }
> > >  
> > >  static int
> > > -need_switch_pathgroup (struct multipath * mpp, int refresh)
> > > +need_switch_pathgroup (struct multipath * mpp, int refresh, bool
> > > *need_reload)
> > >  {
> > >         struct pathgroup * pgp;
> > >         struct path * pp;
> > > @@ -404,6 +404,7 @@ need_switch_pathgroup (struct multipath * mpp,
> > > int refresh)
> > >         struct config *conf;
> > >         int bestpg;
> > >  
> > > +       *need_reload = false;
> > >         if (!mpp)
> > >                 return 0;
> > >  
> > > @@ -430,10 +431,9 @@ need_switch_pathgroup (struct multipath * mpp,
> > > int refresh)
> > >                 return 0;
> > >  
> > >         mpp->bestpg = bestpg;
> > > -       if (mpp->bestpg != mpp->nextpg)
> > > -               return 1;
> > > +       *need_reload = !path_groups_in_order(mpp);
> > 
> > This will start another loop over the path groups. Can we just
> > integrate the path_groups_in_order() logic into the loop right here?
> > 
> 
> Sure

Actually, after looking into this more, pushing those two functions
together makes the logic more confusing. Plus select_path_group() is
used by multiple other functions that don't need to check if the path
groups are out of order.

There's no reason for path_groups_in_order to be in libmultipath, since
it's only needed by multipathd. I'll fix that. But I would rather not
join it with select_path_group(). Since we just loop over the pathgroups
and not the paths within them, we will likely go through the loop a
couple of times, and we don't actually perform any costly actions during
the loops that would make combining them look more attractive. The
performance gains for need_switch_pathgroup() aren't worth making the
logic harder to follow (and the minor performance hits when we don't
need to check the order), IMHO. 

-Ben

> -Ben
> 
> > 
> > 
> > >  
> > > -       return 0;
> > > +       return (*need_reload || mpp->bestpg != mpp->nextpg);
> > >  }
> > >  
> > >  static void
> > > @@ -1982,20 +1982,26 @@ ghost_delay_tick(struct vectors *vecs)
> > >  }
> > >  
> > >  static void
> > > -deferred_failback_tick (vector mpvec)
> > > +deferred_failback_tick (struct vectors *vecs)
> > >  {
> > >         struct multipath * mpp;
> > >         unsigned int i;
> > > +       bool need_reload;
> > >  
> > > -       vector_foreach_slot (mpvec, mpp, i) {
> > > +       vector_foreach_slot (vecs->mpvec, mpp, i) {
> > >                 /*
> > >                  * deferred failback getting sooner
> > >                  */
> > >                 if (mpp->pgfailback > 0 && mpp->failback_tick > 0) {
> > >                         mpp->failback_tick--;
> > >  
> > > -                       if (!mpp->failback_tick &&
> > > need_switch_pathgroup(mpp, 1))
> > > -                               switch_pathgroup(mpp);
> > > +                       if (!mpp->failback_tick &&
> > > +                           need_switch_pathgroup(mpp, 1,
> > > &need_reload)) {
> > > +                               if (need_reload)
> > > +                                       reload_and_sync_map(mpp,
> > > vecs, 0);
> > > +                               else
> > > +                                       switch_pathgroup(mpp);
> > > +                       }
> > >                 }
> > >         }
> > >  }
> > > @@ -2579,6 +2585,7 @@ check_path (struct vectors * vecs, struct path
> > > * pp, unsigned int ticks)
> > >                 int prio_changed = update_prio(pp, new_path_up);
> > >                 bool need_refresh = (!new_path_up && prio_changed &&
> > >                                      pp->priority != PRIO_UNDEF);
> > > +               bool need_reload;
> > >  
> > >                 if (prio_changed &&
> > >                     pp->mpp->pgpolicyfn == (pgpolicyfn
> > > *)group_by_prio &&
> > > @@ -2586,15 +2593,22 @@ check_path (struct vectors * vecs, struct
> > > path * pp, unsigned int ticks)
> > >                         condlog(2, "%s: path priorities changed.
> > > reloading",
> > >                                 pp->mpp->alias);
> > >                         reload_and_sync_map(pp->mpp, vecs,
> > > !new_path_up);
> > > -               } else if (need_switch_pathgroup(pp->mpp,
> > > need_refresh)) {
> > > +               } else if (need_switch_pathgroup(pp->mpp,
> > > need_refresh,
> > > +                                                &need_reload)) {
> > >                         if (pp->mpp->pgfailback > 0 &&
> > >                             (new_path_up || pp->mpp->failback_tick <=
> > > 0))
> > >                                 pp->mpp->failback_tick =
> > >                                         pp->mpp->pgfailback + 1;
> > >                         else if (pp->mpp->pgfailback == -
> > > FAILBACK_IMMEDIATE ||
> > >                                  (chkr_new_path_up &&
> > > -                                 followover_should_failback(pp)))
> > > -                               switch_pathgroup(pp->mpp);
> > > +                                 followover_should_failback(pp))) {
> > > +                               if (need_reload)
> > > +                                       reload_and_sync_map(pp->mpp,
> > > vecs,
> > > +                                                          
> > > !need_refresh &&
> > > +                                                          
> > > !new_path_up);
> > > +                               else
> > > +                                       switch_pathgroup(pp->mpp);
> > > +                       }
> > >                 }
> > >         }
> > >         return 1;
> > > @@ -2720,7 +2734,7 @@ unlock:
> > >                 pthread_cleanup_push(cleanup_lock, &vecs->lock);
> > >                 lock(&vecs->lock);
> > >                 pthread_testcancel();
> > > -               deferred_failback_tick(vecs->mpvec);
> > > +               deferred_failback_tick(vecs);
> > >                 retry_count_tick(vecs->mpvec);
> > >                 missing_uev_wait_tick(vecs);
> > >                 ghost_delay_tick(vecs);
> > 
> > 
> --
> dm-devel mailing list
> dm-devel at redhat.com
> https://listman.redhat.com/mailman/listinfo/dm-devel


More information about the dm-devel mailing list