[dm-devel] [PATCH v2 3/3] multipathd: add recheck_wwid option to verify the path wwid
Martin Wilck
martin.wilck at suse.com
Thu Mar 11 20:30:51 UTC 2021
On Wed, 2021-02-24 at 00:33 -0600, Benjamin Marzinski wrote:
> There are cases where the wwid of a path changes due to LUN remapping
> without triggering uevent for the changed path. Multipathd has no
> method
> for trying to catch these cases, and corruption has resulted because
> of
> it.
>
> In order to have a better chance at catching these cases, multipath
> now
> has a recheck_wwid option. If this is set to "yes", when a failed
> path
> has become active again, multipathd will recheck its wwid. If
> multipathd
> notices that a path's wwid has changed, it will remove and re-add the
> path, just like the existing wwid checking code for change events
> does.
> In cases where the no uevent occurs, both the udev database entry and
> sysfs will have the old wwid, so the only way to get a current wwid
> is
> to ask the device directly. Currently multipath only has code to
> directly get the wwid for scsi devices, so this option only effects
> scsi
> devices, and they must be configured to be able to use the
> uid_fallback
> methods. To make sure both the sysfs and udev database values are
> updated, multipathd triggers a both a rescan of the device and a udev
> add event.
>
> Co-developed-by: Chongyun Wu <wucy11 at chinatelecom.cn>
> Signed-off-by: Benjamin Marzinski <bmarzins at redhat.com>
Two minor remarks below, but:
Reviewed-by: Martin Wilck <mwilck at suse.com>
>
> +static void
> +rescan_path(struct udev_device *parent)
> +{
> + while(parent) {
> + const char *subsys =
> udev_device_get_subsystem(parent);
> + if (subsys && !strncmp(subsys, "scsi", 4))
> + break;
> + parent = udev_device_get_parent(parent);
> + }
You could have used udev_device_get_parent_with_subsystem_devtype()
here.
> + if (parent)
> + sysfs_attr_set_value(parent, "rescan", "1",
> strlen("1"));
> +}
> +
> +void
> +handle_path_wwid_change(struct path *pp, struct vectors *vecs)
> +{
> + struct udev_device *udd;
> +
> + if (!pp || !pp->udev)
> + return;
> +
> + udd = udev_device_ref(pp->udev);
> + if (ev_remove_path(pp, vecs, 1) != 0 && pp->mpp) {
> + pp->dmstate = PSTATE_FAILED;
> + dm_fail_path(pp->mpp->alias, pp->dev_t);
> + }
> + rescan_path(udd);
> + sysfs_attr_set_value(udd, "uevent", "add", strlen("add"));
> + trigger_partitions_udev_change(udd, "add", strlen("add"));
Why do you need to do this for the partitions?
> + udev_device_unref(udd);
> +}
> +
> +bool
> +check_path_wwid_change(struct path *pp)
> +{
> + char wwid[WWID_SIZE];
> + int len = 0;
> + size_t i;
> +
> + if (!strlen(pp->wwid))
> + return false;
> +
> + /* Get the real fresh device wwid by sgio. sysfs still has
> old
> + * data, so only get_vpd_sgio will work to get the new wwid
> */
> + len = get_vpd_sgio(pp->fd, 0x83, 0, wwid, WWID_SIZE);
> +
> + if (len <= 0) {
> + condlog(2, "%s: failed to check wwid by sgio: len =
> %d",
> + pp->dev, len);
> + return false;
> + }
> +
> + /*Strip any trailing blanks */
> + for (i = strlen(pp->wwid); i > 0 && pp->wwid[i-1] == ' '; i--
> );
> + /* no-op */
> + pp->wwid[i] = '\0';
> + condlog(4, "%s: Got wwid %s by sgio", pp->dev, wwid);
> +
> + if (strncmp(wwid, pp->wwid, WWID_SIZE)) {
> + condlog(0, "%s: wwid '%s' doesn't match wwid '%s'
> from device",
> + pp->dev, pp->wwid, wwid);
> + return true;
> + }
> +
> + return false;
> +}
> +
> static int
> uev_add_path (struct uevent *uev, struct vectors * vecs, int
> need_do_map)
> {
> @@ -1296,6 +1363,7 @@ uev_update_path (struct uevent *uev, struct
> vectors * vecs)
> condlog(0, "%s: path wwid changed from '%s'
> to '%s'",
> uev->kernel, wwid, pp->wwid);
> ev_remove_path(pp, vecs, 1);
> + rescan_path(uev->udev);
> needs_reinit = 1;
> goto out;
> } else {
> @@ -2197,6 +2265,16 @@ check_path (struct vectors * vecs, struct path
> * pp, unsigned int ticks)
> return 0;
> set_no_path_retry(pp->mpp);
>
> + if (pp->recheck_wwid == RECHECK_WWID_ON &&
> + (newstate == PATH_UP || newstate == PATH_GHOST) &&
> + ((pp->state != PATH_UP && pp->state != PATH_GHOST) ||
> + pp->dmstate == PSTATE_FAILED) &&
> + check_path_wwid_change(pp)) {
> + condlog(0, "%s: path wwid change detected. Removing",
> pp->dev);
> + handle_path_wwid_change(pp, vecs);
> + return 0;
> + }
> +
> if ((newstate == PATH_UP || newstate == PATH_GHOST) &&
> (san_path_check_enabled(pp->mpp) ||
> marginal_path_check_enabled(pp->mpp))) {
> diff --git a/multipathd/main.h b/multipathd/main.h
> index 5abbe97b..ddd953f9 100644
> --- a/multipathd/main.h
> +++ b/multipathd/main.h
> @@ -50,4 +50,6 @@ int update_multipath (struct vectors *vecs, char
> *mapname, int reset);
> int reload_and_sync_map(struct multipath *mpp, struct vectors *vecs,
> int refresh);
>
> +void handle_path_wwid_change(struct path *pp, struct vectors *vecs);
> +bool check_path_wwid_change(struct path *pp);
> #endif /* MAIN_H */
--
Dr. Martin Wilck <mwilck at suse.com>, Tel. +49 (0)911 74053 2107
SUSE Software Solutions Germany GmbH
HRB 36809, AG Nürnberg GF: Felix Imendörffer
More information about the dm-devel
mailing list