[dm-devel] [PATCH 1/1] dm-mpath: Extend path selector interface for supporting Dell EqualLogic path selector

Narendran Ganapathy Narendran_Ganapathy at Dell.com
Mon Jun 28 13:53:20 UTC 2010


This patch extends the dm-path-selector interface to allow path selectors to use extra information from the IO request when selecting a path.

Dell EqualLogic and other iSCSI storage arrays use a distributed frameless architecture.  In this architecture, the storage group consists of a number of distinct storage arrays ("members"), each having independent controllers, disk storage and network adapters.  When a LUN is created it is spread across multiple members.  The details of the distribution are hidden from initiators connected to this storage system.  The storage group exposes a single target discovery portal, no matter how many members are being used.  When iSCSI sessions are created, each session is connected to an eth port on a single member.  Data to a LUN can be sent on any iSCSI session, and if the blocks being accessed are stored on another member the IO will be forwarded as required.  This forwarding is invisible to the initiator.  The storage layout is also dynamic, and the blocks stored on disk may be moved from member to member as needed to balance the load.

This architecture simplifies the management and configuration of both the storage group and initiators.  In a multipathing configuration, it is possible to set up multiple iSCSI sessions to use multiple network interfaces on both the host and target to take advantage of the increased network bandwidth.  An initiator can use a simple round robin algorithm to send IO on all paths and let the storage array members forward it as necessary.  However, there is a performance advantage to sending data directly to the correct member.  At the same time, the existing techniques of building a separate priority group for paths to each controller does not fit this model, because the block ranges may be moved at any time from member to member, and it is also acceptable to send IO to any member in the group when no direct path exists or there is a path error.

We propose to develop a new path selector to perform this location-based routing.  The basic idea is to use knowledge about the current location of data to prefer paths directly to the owning member, but fall back to use any available path when no direct path is available.  This submission includes the necessary changes to the dm-path-selector interface.  In the current interface, the only information passed to the select_path routine is the path_selector struct and the number of bytes in the request.  To do location based routing, we need the address information of the request.

We propose to extend the path selector interface to pass the entire request pointer to the 'select_path' / 'start_io' /  'end_io' functions so that the path selector can use any information therein to route the I/O.

We also propose extending the dm_mpath_io structure used to hold information about each I/O to include extra fields for the path selector to store I/O specific flags and a timestamp, so the path selector can determine the latency of I/Os on different paths and that information is passed to the 'select_path' / 'start_io' /  'end_io' functions for path selector usage. These additions to the dm_mpath_io allow future flexibility in developing algorithms that route IO based on other information from the request in the future.


Signed-off-by: Narendran Ganapathy <Narendran_Ganapathy at dell.com>
Signed-off-by: Jason Shamberger <Jason_Shamberger at dell.com>
---
 drivers/md/dm-mpath.c         |   37 +++++++++++++++++--------------------
 drivers/md/dm-mpath.h         |   20 ++++++++++++++++++++
 drivers/md/dm-path-selector.h |    7 ++++---
 drivers/md/dm-queue-length.c  |    8 +++++---
 drivers/md/dm-round-robin.c   |    4 +++-
 drivers/md/dm-service-time.c  |   11 ++++++++---
 6 files changed, 57 insertions(+), 30 deletions(-)

diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 826bce7..cc45a99 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -98,14 +98,6 @@ struct multipath {
 	struct mutex work_mutex;
 };
 
-/*
- * Context information attached to each bio we process.
- */
-struct dm_mpath_io {
-	struct pgpath *pgpath;
-	size_t nr_bytes;
-};
-
 typedef int (*action_fn) (struct pgpath *pgpath);
 
 #define MIN_IOS 256	/* Mempool size */
@@ -267,11 +259,13 @@ static void __switch_pg(struct multipath *m, struct pgpath *pgpath)
 }
 
 static int __choose_path_in_pg(struct multipath *m, struct priority_group *pg,
-			       size_t nr_bytes)
+				union dm_mpath_ps_io *psio,
+				struct request *clone)
 {
 	struct dm_path *path;
 
-	path = pg->ps.type->select_path(&pg->ps, &m->repeat_count, nr_bytes);
+	path = pg->ps.type->select_path(&pg->ps, &m->repeat_count, psio,
+					clone);
 	if (!path)
 		return -ENXIO;
 
@@ -283,7 +277,8 @@ static int __choose_path_in_pg(struct multipath *m, struct priority_group *pg,
 	return 0;
 }
 
-static void __choose_pgpath(struct multipath *m, size_t nr_bytes)
+static void __choose_pgpath(struct multipath *m, union dm_mpath_ps_io *psio,
+				struct request *clone)
 {
 	struct priority_group *pg;
 	unsigned bypassed = 1;
@@ -295,12 +290,13 @@ static void __choose_pgpath(struct multipath *m, size_t nr_bytes)
 	if (m->next_pg) {
 		pg = m->next_pg;
 		m->next_pg = NULL;
-		if (!__choose_path_in_pg(m, pg, nr_bytes))
+		if (!__choose_path_in_pg(m, pg, psio, clone))
 			return;
 	}
 
 	/* Don't change PG until it has no remaining paths */
-	if (m->current_pg && !__choose_path_in_pg(m, m->current_pg, nr_bytes))
+	if (m->current_pg &&
+		!__choose_path_in_pg(m, m->current_pg, psio, clone))
 		return;
 
 	/*
@@ -312,7 +308,7 @@ static void __choose_pgpath(struct multipath *m, size_t nr_bytes)
 		list_for_each_entry(pg, &m->priority_groups, list) {
 			if (pg->bypassed == bypassed)
 				continue;
-			if (!__choose_path_in_pg(m, pg, nr_bytes))
+			if (!__choose_path_in_pg(m, pg, psio, clone))
 				return;
 		}
 	} while (bypassed--);
@@ -350,10 +346,12 @@ static int map_io(struct multipath *m, struct request *clone,
 
 	spin_lock_irqsave(&m->lock, flags);
 
+	mpio->u.nr_bytes = nr_bytes;
+
 	/* Do we need to select a new pgpath? */
 	if (!m->current_pgpath ||
 	    (!m->queue_io && (m->repeat_count && --m->repeat_count == 0)))
-		__choose_pgpath(m, nr_bytes);
+		__choose_pgpath(m, &mpio->u, clone);
 
 	pgpath = m->current_pgpath;
 
@@ -380,11 +378,10 @@ static int map_io(struct multipath *m, struct request *clone,
 		r = -EIO;	/* Failed */
 
 	mpio->pgpath = pgpath;
-	mpio->nr_bytes = nr_bytes;
 
 	if (r == DM_MAPIO_REMAPPED && pgpath->pg->ps.type->start_io)
 		pgpath->pg->ps.type->start_io(&pgpath->pg->ps, &pgpath->path,
-					      nr_bytes);
+					      &mpio->u, clone);
 
 	spin_unlock_irqrestore(&m->lock, flags);
 
@@ -464,7 +461,7 @@ static void process_queued_ios(struct work_struct *work)
 		goto out;
 
 	if (!m->current_pgpath)
-		__choose_pgpath(m, 0);
+		__choose_pgpath(m, NULL, NULL);
 
 	pgpath = m->current_pgpath;
 
@@ -1295,7 +1292,7 @@ static int multipath_end_io(struct dm_target *ti, struct request *clone,
 	if (pgpath) {
 		ps = &pgpath->pg->ps;
 		if (ps->type->end_io)
-			ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes);
+			ps->type->end_io(ps, &pgpath->path, &mpio->u, clone);
 	}
 	mempool_free(mpio, m->mpio_pool);
 
@@ -1533,7 +1530,7 @@ static int multipath_ioctl(struct dm_target *ti, unsigned int cmd,
 	spin_lock_irqsave(&m->lock, flags);
 
 	if (!m->current_pgpath)
-		__choose_pgpath(m, 0);
+		__choose_pgpath(m, NULL, NULL);
 
 	if (m->current_pgpath) {
 		bdev = m->current_pgpath->path.dev->bdev;
diff --git a/drivers/md/dm-mpath.h b/drivers/md/dm-mpath.h
index e230f71..45e9c58 100644
--- a/drivers/md/dm-mpath.h
+++ b/drivers/md/dm-mpath.h
@@ -16,6 +16,26 @@ struct dm_path {
 	void *pscontext;	/* For path-selector use */
 };
 
+
+/*
+ * Context information attached to each bio we process.
+ */
+struct dm_ps_io_ctx {
+	uint32_t flags;
+	unsigned long iotime;
+};
+
+union dm_mpath_ps_io {
+	size_t nr_bytes;
+	struct dm_ps_io_ctx ps_ctx;
+};
+
+struct dm_mpath_io {
+	struct pgpath *pgpath;
+	union dm_mpath_ps_io u;
+};
+
+
 /* Callback for hwh_pg_init_fn to use when complete */
 void dm_pg_init_complete(struct dm_path *path, unsigned err_flags);
 
diff --git a/drivers/md/dm-path-selector.h b/drivers/md/dm-path-selector.h
index e7d1fa8..cff8ca5 100644
--- a/drivers/md/dm-path-selector.h
+++ b/drivers/md/dm-path-selector.h
@@ -57,7 +57,8 @@ struct path_selector_type {
 	 */
 	struct dm_path *(*select_path) (struct path_selector *ps,
 					unsigned *repeat_count,
-					size_t nr_bytes);
+					union dm_mpath_ps_io *psio,
+					struct request *clone);
 
 	/*
 	 * Notify the selector that a path has failed.
@@ -77,9 +78,9 @@ struct path_selector_type {
 		       status_type_t type, char *result, unsigned int maxlen);
 
 	int (*start_io) (struct path_selector *ps, struct dm_path *path,
-			 size_t nr_bytes);
+			 union dm_mpath_ps_io *psio, struct request *clone);
 	int (*end_io) (struct path_selector *ps, struct dm_path *path,
-		       size_t nr_bytes);
+			 union dm_mpath_ps_io *psio, struct request *clone);
 };
 
 /* Register a path selector */
diff --git a/drivers/md/dm-queue-length.c b/drivers/md/dm-queue-length.c
index f92b6ce..f4d9b47 100644
--- a/drivers/md/dm-queue-length.c
+++ b/drivers/md/dm-queue-length.c
@@ -168,7 +168,9 @@ static int ql_reinstate_path(struct path_selector *ps, struct dm_path *path)
  * Select a path having the minimum number of in-flight I/Os
  */
 static struct dm_path *ql_select_path(struct path_selector *ps,
-				      unsigned *repeat_count, size_t nr_bytes)
+					unsigned *repeat_count,
+					union dm_mpath_ps_io *psio,
+					struct request *clone)
 {
 	struct selector *s = ps->context;
 	struct path_info *pi = NULL, *best = NULL;
@@ -197,7 +199,7 @@ static struct dm_path *ql_select_path(struct path_selector *ps,
 }
 
 static int ql_start_io(struct path_selector *ps, struct dm_path *path,
-		       size_t nr_bytes)
+		       union dm_mpath_ps_io *psio, struct request *clone)
 {
 	struct path_info *pi = path->pscontext;
 
@@ -207,7 +209,7 @@ static int ql_start_io(struct path_selector *ps, struct dm_path *path,
 }
 
 static int ql_end_io(struct path_selector *ps, struct dm_path *path,
-		     size_t nr_bytes)
+		     union dm_mpath_ps_io *psio, struct request *clone)
 {
 	struct path_info *pi = path->pscontext;
 
diff --git a/drivers/md/dm-round-robin.c b/drivers/md/dm-round-robin.c
index 24752f4..ecbde5f 100644
--- a/drivers/md/dm-round-robin.c
+++ b/drivers/md/dm-round-robin.c
@@ -161,7 +161,9 @@ static int rr_reinstate_path(struct path_selector *ps, struct dm_path *p)
 }
 
 static struct dm_path *rr_select_path(struct path_selector *ps,
-				      unsigned *repeat_count, size_t nr_bytes)
+					unsigned *repeat_count,
+					union dm_mpath_ps_io *psio,
+					struct request *clone)
 {
 	struct selector *s = (struct selector *) ps->context;
 	struct path_info *pi = NULL;
diff --git a/drivers/md/dm-service-time.c b/drivers/md/dm-service-time.c
index 9c6c2e4..83f7534 100644
--- a/drivers/md/dm-service-time.c
+++ b/drivers/md/dm-service-time.c
@@ -254,10 +254,13 @@ static int st_compare_load(struct path_info *pi1, struct path_info *pi2,
 }
 
 static struct dm_path *st_select_path(struct path_selector *ps,
-				      unsigned *repeat_count, size_t nr_bytes)
+					unsigned *repeat_count,
+					union dm_mpath_ps_io *psio,
+					struct request *clone)
 {
 	struct selector *s = ps->context;
 	struct path_info *pi = NULL, *best = NULL;
+	size_t nr_bytes = psio ? psio->nr_bytes : 0 ;
 
 	if (list_empty(&s->valid_paths))
 		return NULL;
@@ -278,9 +281,10 @@ static struct dm_path *st_select_path(struct path_selector *ps,
 }
 
 static int st_start_io(struct path_selector *ps, struct dm_path *path,
-		       size_t nr_bytes)
+		       union dm_mpath_ps_io *psio, struct request *clone)
 {
 	struct path_info *pi = path->pscontext;
+	size_t nr_bytes = psio ? psio->nr_bytes : 0 ;
 
 	atomic_add(nr_bytes, &pi->in_flight_size);
 
@@ -288,9 +292,10 @@ static int st_start_io(struct path_selector *ps, struct dm_path *path,
 }
 
 static int st_end_io(struct path_selector *ps, struct dm_path *path,
-		     size_t nr_bytes)
+		       union dm_mpath_ps_io *psio, struct request *clone)
 {
 	struct path_info *pi = path->pscontext;
+	size_t nr_bytes = psio ? psio->nr_bytes : 0 ;
 
 	atomic_sub(nr_bytes, &pi->in_flight_size);
 
-- 
1.6.5.2

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://listman.redhat.com/archives/dm-devel/attachments/20100628/6eadaeef/attachment.htm>


More information about the dm-devel mailing list