[dm-devel] [PATCH v2 3/3] multipathd: add recheck_wwid option to verify the path wwid

Martin Wilck martin.wilck at suse.com
Thu Mar 11 20:30:51 UTC 2021


On Wed, 2021-02-24 at 00:33 -0600, Benjamin Marzinski wrote:
> There are cases where the wwid of a path changes due to LUN remapping
> without triggering uevent for the changed path. Multipathd has no
> method
> for trying to catch these cases, and corruption has resulted because
> of
> it.
> 
> In order to have a better chance at catching these cases, multipath
> now
> has a recheck_wwid option. If this is set to "yes", when a failed
> path
> has become active again, multipathd will recheck its wwid. If
> multipathd
> notices that a path's wwid has changed, it will remove and re-add the
> path, just like the existing wwid checking code for change events
> does.
> In cases where the no uevent occurs, both the udev database entry and
> sysfs will have the old wwid, so the only way to get a current wwid
> is
> to ask the device directly. Currently multipath only has code to
> directly get the wwid for scsi devices, so this option only effects
> scsi
> devices, and they must be configured to be able to use the
> uid_fallback
> methods. To make sure both the sysfs and udev database values are
> updated, multipathd triggers a both a rescan of the device and a udev
> add event.
> 
> Co-developed-by: Chongyun Wu <wucy11 at chinatelecom.cn>
> Signed-off-by: Benjamin Marzinski <bmarzins at redhat.com>

Two minor remarks below, but:

Reviewed-by: Martin Wilck <mwilck at suse.com>

>  
> +static void
> +rescan_path(struct udev_device *parent)
> +{
> +       while(parent) {
> +               const char *subsys =
> udev_device_get_subsystem(parent);
> +               if (subsys && !strncmp(subsys, "scsi", 4))
> +                       break;
> +               parent = udev_device_get_parent(parent);
> +       }

You could have used udev_device_get_parent_with_subsystem_devtype()
here. 

> +       if (parent)
> +               sysfs_attr_set_value(parent, "rescan", "1",
> strlen("1"));
> +}
> +
> +void
> +handle_path_wwid_change(struct path *pp, struct vectors *vecs)
> +{
> +       struct udev_device *udd;
> +
> +       if (!pp || !pp->udev)
> +               return;
> +
> +       udd = udev_device_ref(pp->udev);
> +       if (ev_remove_path(pp, vecs, 1) != 0 && pp->mpp) {
> +               pp->dmstate = PSTATE_FAILED;
> +               dm_fail_path(pp->mpp->alias, pp->dev_t);
> +       }
> +       rescan_path(udd);
> +       sysfs_attr_set_value(udd, "uevent", "add", strlen("add"));
> +       trigger_partitions_udev_change(udd, "add", strlen("add"));

Why do you need to do this for the partitions?

> +       udev_device_unref(udd);
> +}
> +
> +bool
> +check_path_wwid_change(struct path *pp)
> +{
> +       char wwid[WWID_SIZE];
> +       int len = 0;
> +       size_t i;
> +
> +       if (!strlen(pp->wwid))
> +               return false;
> +
> +       /* Get the real fresh device wwid by sgio. sysfs still has
> old
> +        * data, so only get_vpd_sgio will work to get the new wwid
> */
> +       len = get_vpd_sgio(pp->fd, 0x83, 0, wwid, WWID_SIZE);
> +
> +       if (len <= 0) {
> +               condlog(2, "%s: failed to check wwid by sgio: len =
> %d",
> +                       pp->dev, len);
> +               return false;
> +       }
> +
> +       /*Strip any trailing blanks */
> +       for (i = strlen(pp->wwid); i > 0 && pp->wwid[i-1] == ' '; i--
> );
> +               /* no-op */
> +       pp->wwid[i] = '\0';
> +       condlog(4, "%s: Got wwid %s by sgio", pp->dev, wwid);
> +
> +       if (strncmp(wwid, pp->wwid, WWID_SIZE)) {
> +               condlog(0, "%s: wwid '%s' doesn't match wwid '%s'
> from device",
> +                       pp->dev, pp->wwid, wwid);
> +               return true;
> +       }
> +
> +       return false;
> +}
> +
>  static int
>  uev_add_path (struct uevent *uev, struct vectors * vecs, int
> need_do_map)
>  {
> @@ -1296,6 +1363,7 @@ uev_update_path (struct uevent *uev, struct
> vectors * vecs)
>                         condlog(0, "%s: path wwid changed from '%s'
> to '%s'",
>                                 uev->kernel, wwid, pp->wwid);
>                         ev_remove_path(pp, vecs, 1);
> +                       rescan_path(uev->udev);
>                         needs_reinit = 1;
>                         goto out;
>                 } else {
> @@ -2197,6 +2265,16 @@ check_path (struct vectors * vecs, struct path
> * pp, unsigned int ticks)
>                 return 0;
>         set_no_path_retry(pp->mpp);
>  
> +       if (pp->recheck_wwid == RECHECK_WWID_ON &&
> +           (newstate == PATH_UP || newstate == PATH_GHOST) &&
> +           ((pp->state != PATH_UP && pp->state != PATH_GHOST) ||
> +            pp->dmstate == PSTATE_FAILED) &&
> +           check_path_wwid_change(pp)) {
> +               condlog(0, "%s: path wwid change detected. Removing",
> pp->dev);
> +               handle_path_wwid_change(pp, vecs);
> +               return 0;
> +       }
> +
>         if ((newstate == PATH_UP || newstate == PATH_GHOST) &&
>             (san_path_check_enabled(pp->mpp) ||
>              marginal_path_check_enabled(pp->mpp))) {
> diff --git a/multipathd/main.h b/multipathd/main.h
> index 5abbe97b..ddd953f9 100644
> --- a/multipathd/main.h
> +++ b/multipathd/main.h
> @@ -50,4 +50,6 @@ int update_multipath (struct vectors *vecs, char
> *mapname, int reset);
>  int reload_and_sync_map(struct multipath *mpp, struct vectors *vecs,
>                         int refresh);
>  
> +void handle_path_wwid_change(struct path *pp, struct vectors *vecs);
> +bool check_path_wwid_change(struct path *pp);
>  #endif /* MAIN_H */

-- 
Dr. Martin Wilck <mwilck at suse.com>, Tel. +49 (0)911 74053 2107
SUSE Software Solutions Germany GmbH
HRB 36809, AG Nürnberg GF: Felix Imendörffer






More information about the dm-devel mailing list