[dm-devel] [PATCH] multipathd: LUN protection by checking path's wwid change status

Chongyun Wu wucy11 at chinatelecom.cn
Mon Dec 28 03:34:26 UTC 2020


 From 37c74873acfc1587e79a6504ca3d42b8fa00d49e Mon Sep 17 00:00:00 2001

From: Chongyun Wu <wucy11 at chinatelecom.cn>
Date: Mon, 21 Dec 2020 09:51:20 +0800
Subject: [PATCH] multipathd: LUN data protection by checking path's wwid
  change status

Issue description:
A) Device sda and sdb correspend to LUN1 and LUN2 in storage backend and
the upper layer application uses those two devices.
B) Doing illegal operation: unmapping LUN1 and LUN2 in storage backend,
and export LUN2 and LUN1 to host with exchanged assignment relation
between sda and sdb.
C) The upper layer application run for a while and found that the data
in both LUN1 and LUN2 has been corrupted.

Protection method:
Before processing below process check the path wwid first, if wwid
changed just remove this path not to use it again:
1)process add path uevent;
2)process path reinstate;
3)process add path cli command;
Note: multipath already have chang uvevent wwid change check but
fellow above issue reproduce step, there is not change uevent but
have add path event and remove uevent(sometimes).

Test result:
After switching the assignment relation in storage backend, the related
device has been removed or keep failed state and LUN data not been
corrupted again.

Signed-off-by: Chongyun Wu <wucy11 at chinatelecom.cn>
---
  libmultipath/configure.c  | 13 ++++++++
  libmultipath/configure.h  |  1 +
  multipathd/cli_handlers.c |  7 +++++
  multipathd/main.c         | 63 +++++++++++++++++++++++++++++++++++++++
  multipathd/main.h         |  2 +-
  5 files changed, 85 insertions(+), 1 deletion(-)

diff --git a/libmultipath/configure.c b/libmultipath/configure.c
index 6fb477fc..6dc8245f 100644
--- a/libmultipath/configure.c
+++ b/libmultipath/configure.c
@@ -569,6 +569,19 @@ unref:
      udev_enumerate_unref(part_enum);
  }

+void
+trigger_path_udev_remove(struct path *pp)
+{
+    const char *action = "remove";
+
+    if (!pp || !pp->udev)
+        return;
+
+    condlog(0, "%s wwid changed, trigger %s uevent", pp->dev, action);
+    sysfs_attr_set_value(pp->udev, "uevent", action, strlen(action));
+    trigger_partitions_udev_change(pp->udev, action, strlen(action));
+}
+
  void
  trigger_paths_udev_change(struct multipath *mpp, bool is_mpath)
  {
diff --git a/libmultipath/configure.h b/libmultipath/configure.h
index 6b23ccbb..2c1697ef 100644
--- a/libmultipath/configure.h
+++ b/libmultipath/configure.h
@@ -58,3 +58,4 @@ int get_refwwid (enum mpath_cmds cmd, const char *dev, 
enum devtypes dev_type,
           vector pathvec, char **wwid);
  struct udev_device *get_udev_device(const char *dev, enum devtypes 
dev_type);
  void trigger_paths_udev_change(struct multipath *mpp, bool is_mpath);
+void trigger_path_udev_remove(struct path *pp);
diff --git a/multipathd/cli_handlers.c b/multipathd/cli_handlers.c
index 235e2a2e..836f5465 100644
--- a/multipathd/cli_handlers.c
+++ b/multipathd/cli_handlers.c
@@ -715,6 +715,13 @@ cli_add_path (void * v, char ** reply, int * len, 
void * data)
      pp = find_path_by_dev(vecs->pathvec, param);
      if (pp && pp->initialized != INIT_REMOVED) {
          condlog(2, "%s: path already in pathvec", param);
+
+        /* Check if WWID has changed to protect data from being wirtten 
to the wrong device */
+        if (check_wwid_change(pp, vecs)) {
+            condlog(0, "cli add path failed for %s wwid changed", pp->dev);
+            return 1;
+        }
+
          if (pp->mpp)
              return 0;
      } else if (pp) {
diff --git a/multipathd/main.c b/multipathd/main.c
index a4abbb27..adedaf7a 100644
--- a/multipathd/main.c
+++ b/multipathd/main.c
@@ -813,6 +813,43 @@ ev_remove_map (char * devname, char * alias, int 
minor, struct vectors * vecs)
      return flush_map(mpp, vecs, 0);
  }

+bool check_wwid_change(struct path *pp, struct vectors *vecs)
+{
+    char wwid[WWID_SIZE];
+    int len =0;
+    char *c;
+
+    if (!strlen(pp->wwid))
+        return false;
+
+    /* Get the real fresh device wwid by sgio */
+    len = get_vpd_sgio(pp->fd, 0x83, 0, wwid, WWID_SIZE);
+    condlog(3, "Get wwid(%s) of dev(%s) by sgio", wwid, pp->dev);
+
+    if (len <= 0) {
+        condlog(3, "Failed to get wwid for %s by sgio: len = %d", 
pp->dev, len);
+        return false;
+    } else {
+        /*Strip any trailing blanks */
+        c = strchr(wwid, '\0');
+        c--;
+        while (c && c >= wwid && *c == ' ') {
+            *c = '\0';
+            c--;
+        }
+    }
+
+    if (strncmp(wwid, pp->wwid, WWID_SIZE)) {
+        condlog(0, "path %s wwid changed from '%s' to '%s', try to 
remove it",
+                pp->dev, pp->wwid, wwid);
+        trigger_path_udev_remove(pp);
+        ev_remove_path(pp, vecs, 1);
+        return true;
+    }
+
+    return false;
+}
+
  static int
  uev_add_path (struct uevent *uev, struct vectors * vecs, int need_do_map)
  {
@@ -910,6 +947,19 @@ uev_add_path (struct uevent *uev, struct vectors * 
vecs, int need_do_map)
                      uev->kernel);
                  ret = 1;
              }
+        } else if (pp->mpp && strlen(pp->wwid)) {
+            /*
+             * Data protection: check path wwid and remove this path 
when wwid changed,
+             * cause of storage backend assignment might been changed
+             * by illegal operation make this path actually indicte to 
other LUN and kernel might
+             * not adapt scsi layer assginment automatic, so multipathd 
need to check it and
+             * make some protection by removing this path.
+             */
+            if (check_wwid_change(pp, vecs)) {
+                condlog(2, "Detect dev %s wwid changed when processing 
add uvent",
+                       pp->dev);
+                ret = 1;
+            }
          }
      }
      if (pp)
@@ -1700,6 +1750,19 @@ reinstate_path (struct path * pp)
      if (!pp->mpp)
          return;

+    /*
+     * Data protection before reinstate path: check path wwid and 
remove this path
+     * when wwid changed, cause of storage backend assignment might 
been changed
+     * by illegal operation make this path actually indicte to other 
LUN and kernel might
+     * not adapt scsi layer assginment automatic, so multipathd need to 
check it and
+     * make some protection by removing this path or at lest not to use 
this path.
+     */
+    if (check_wwid_change(pp, gvecs)) {
+        condlog(0, "Device %s wwid changed not to reinstate this path 
and try to remove it",
+                pp->dev);
+        return;
+    }
+
      if (dm_reinstate_path(pp->mpp->alias, pp->dev_t))
          condlog(0, "%s: reinstate failed", pp->dev_t);
      else {
diff --git a/multipathd/main.h b/multipathd/main.h
index 5abbe97b..1b4328e0 100644
--- a/multipathd/main.h
+++ b/multipathd/main.h
@@ -49,5 +49,5 @@ int __setup_multipath (struct vectors * vecs, struct 
multipath * mpp,
  int update_multipath (struct vectors *vecs, char *mapname, int reset);
  int reload_and_sync_map(struct multipath *mpp, struct vectors *vecs,
              int refresh);
-
+bool check_wwid_change(struct path * pp, struct vectors * vecs);
  #endif /* MAIN_H */
-- 





More information about the dm-devel mailing list