[dm-devel] [PATCH]multipath-tools: Re-ordering of child paths in priority group for round-robin path selector

Christophe Varoqui christophe.varoqui at opensvc.com
Thu Feb 20 06:58:13 UTC 2014


Interesting work indeed.
With all this new information gathered, it would be nice to have new
'show' wildcards to report it.

best regards,
Christophe Varoqui
OpenSVC

On Thu, Feb 20, 2014 at 12:25 AM, Merla, ShivaKrishna
<ShivaKrishna.Merla at netapp.com> wrote:
> The default Linux bus scanning order is depth first. Due to this default
> ordering of paths in priority group is by host. With round robin path selector
> this will lead to i/o clumping on each host controller and starvation on other
> host controller for certain duration before routing i/o to alternate one.
> Even with multiple LUNs sharing same host controllers, it will not yield flat
> evenly distributed i/o pattern. This will impact performance when more than
> eight active paths are available per LUN. This will be even more worse when
> repeat count is more than one.
>
> This patch addresses this issue by re-ordering paths in priority group by
> alternate host ports and even PCI adapters. This re-ordering is only necessary
> for round-robin path selector. We have observed better IOPS and Throughput with
> this crafted ordering of paths.
>
> Paths belonging to same host are grouped into host groups and hosts from same
> adapter are grouped into adapter groups. Later paths are selected to yield
> crafted order to help route i/o to alternate host ports and adapters available
> within priority group of LUN.
>
> Signed-off-by: Shiva Krishna Merla<shivakrishna.merla at netapp.com>
> Reviewed-by: Krishnasamy Somasundaram<somasundaram.krishnasamy at netapp.com>
> ---
>  libmultipath/configure.c |  224 ++++++++++++++++++++++++++++++++++++++++++++++
>  libmultipath/discovery.c |   87 ++++++++++++++++++
>  libmultipath/structs.c   |   84 +++++++++++++++++
>  libmultipath/structs.h   |   25 +++++-
>  4 files changed, 419 insertions(+), 1 deletions(-)
>
> diff --git a/libmultipath/configure.c b/libmultipath/configure.c
> index 8c09791..74e5a0b 100644
> --- a/libmultipath/configure.c
> +++ b/libmultipath/configure.c
> @@ -39,6 +39,214 @@
>  #include "uxsock.h"
>  #include "wwids.h"
>
> +/* group paths in pg by host adapter
> + */
> +int group_by_host_adapter(struct pathgroup *pgp, vector adapters)
> +{
> +       struct adapter_group *agp;
> +       struct host_group *hgp;
> +       struct path *pp, *pp1;
> +       char adapter_name1[SLOT_NAME_SIZE];
> +       char adapter_name2[SLOT_NAME_SIZE];
> +       int i, j;
> +       int found_hostgroup = 0;
> +
> +       while (VECTOR_SIZE(pgp->paths) > 0) {
> +
> +               pp = VECTOR_SLOT(pgp->paths, 0);
> +
> +               if (sysfs_get_host_adapter_name(pp, adapter_name1))
> +                       return 1;
> +               /* create a new host adapter group
> +                */
> +               agp = alloc_adaptergroup();
> +               if (!agp)
> +                       goto out;
> +               agp->pgp = pgp;
> +
> +               strncpy(agp->adapter_name, adapter_name1, SLOT_NAME_SIZE);
> +               store_adaptergroup(adapters, agp);
> +
> +               /* create a new host port group
> +                */
> +               hgp = alloc_hostgroup();
> +               if (!hgp)
> +                       goto out;
> +               if (store_hostgroup(agp->host_groups, hgp))
> +                       goto out;
> +
> +               hgp->host_no = pp->sg_id.host_no;
> +               agp->num_hosts++;
> +               if (store_path(hgp->paths, pp))
> +                       goto out;
> +
> +               hgp->num_paths++;
> +               /* delete path from path group
> +                */
> +               vector_del_slot(pgp->paths, 0);
> +
> +               /* add all paths belonging to same host adapter
> +                */
> +               vector_foreach_slot(pgp->paths, pp1, i) {
> +                       if (sysfs_get_host_adapter_name(pp1, adapter_name2))
> +                               goto out;
> +                       if (strcmp(adapter_name1, adapter_name2) == 0) {
> +                               found_hostgroup = 0;
> +                               vector_foreach_slot(agp->host_groups, hgp, j) {
> +                                       if (hgp->host_no == pp1->sg_id.host_no) {
> +                                               if (store_path(hgp->paths, pp1))
> +                                                       goto out;
> +                                               hgp->num_paths++;
> +                                               found_hostgroup = 1;
> +                                               break;
> +                                       }
> +                               }
> +                               if (!found_hostgroup) {
> +                                       /* this path belongs to new host port
> +                                        * within this adapter
> +                                        */
> +                                       hgp = alloc_hostgroup();
> +                                       if (!hgp)
> +                                               goto out;
> +
> +                                       if (store_hostgroup(agp->host_groups, hgp))
> +                                               goto out;
> +
> +                                       agp->num_hosts++;
> +                                       if (store_path(hgp->paths, pp1))
> +                                               goto out;
> +
> +                                       hgp->host_no = pp1->sg_id.host_no;
> +                                       hgp->num_paths++;
> +                               }
> +                               /* delete paths from original path_group
> +                                * as they are added into adapter group now
> +                                */
> +                               vector_del_slot(pgp->paths, i);
> +                               i--;
> +                       }
> +               }
> +       }
> +       return 0;
> +
> +out:   /* add back paths into pg as re-ordering failed
> +        */
> +       vector_foreach_slot(adapters, agp, i) {
> +                       vector_foreach_slot(agp->host_groups, hgp, j) {
> +                               while (VECTOR_SIZE(hgp->paths) > 0) {
> +                                       pp = VECTOR_SLOT(hgp->paths, 0);
> +                                       if (store_path(pgp->paths, pp))
> +                                               condlog(3, "failed to restore "
> +                                               "path %s into path group",
> +                                                pp->dev);
> +                                       vector_del_slot(hgp->paths, 0);
> +                               }
> +                       }
> +               }
> +       free_adaptergroup(adapters);
> +       return 1;
> +}
> +
> +/* re-order paths in pg by alternating adapters and host ports
> + * for optimized selection
> + */
> +int order_paths_in_pg_by_alt_adapters(struct pathgroup *pgp, vector adapters,
> +                int total_paths)
> +{
> +       int next_adapter_index = 0;
> +       int num_adapters = 0;
> +       struct adapter_group *agp;
> +       struct host_group *hgp;
> +       struct path *pp;
> +
> +       num_adapters = VECTOR_SIZE(adapters);
> +
> +       while (total_paths > 0) {
> +               agp = VECTOR_SLOT(adapters, next_adapter_index);
> +
> +               hgp = VECTOR_SLOT(agp->host_groups, agp->next_host_index);
> +
> +               if (!hgp->num_paths) {
> +                       agp->next_host_index++;
> +                       agp->next_host_index %= agp->num_hosts;
> +                       next_adapter_index++;
> +                       next_adapter_index %= VECTOR_SIZE(adapters);
> +                       continue;
> +               }
> +
> +               pp  = VECTOR_SLOT(hgp->paths, 0);
> +
> +               if (store_path(pgp->paths, pp))
> +                       return 1;
> +
> +               total_paths--;
> +
> +               vector_del_slot(hgp->paths, 0);
> +
> +               hgp->num_paths--;
> +
> +               agp->next_host_index++;
> +               agp->next_host_index %= agp->num_hosts;
> +               next_adapter_index++;
> +               next_adapter_index %= VECTOR_SIZE(adapters);
> +       }
> +
> +       /* all paths are added into path_group
> +        * in crafted child order
> +        */
> +       return 0;
> +}
> +
> +/* round-robin: order paths in path group to alternate
> + * between all host adapters
> + */
> +int rr_optimize_path_order(struct pathgroup *pgp)
> +{
> +       vector adapters;
> +       struct path *pp;
> +       int total_paths;
> +       int i;
> +
> +       total_paths = VECTOR_SIZE(pgp->paths);
> +       vector_foreach_slot(pgp->paths, pp, i) {
> +               if (pp->sg_id.proto_id != SCSI_PROTOCOL_FCP ||
> +                       pp->sg_id.proto_id != SCSI_PROTOCOL_SAS ||
> +                       pp->sg_id.proto_id != SCSI_PROTOCOL_ISCSI ||
> +                       pp->sg_id.proto_id != SCSI_PROTOCOL_SRP) {
> +                       /* return success as default path order
> +                        * is maintained in path group
> +                        */
> +                       return 0;
> +               }
> +       }
> +       adapters = vector_alloc();
> +       if (!adapters)
> +               return 0;
> +
> +       /* group paths in path group by host adapters
> +        */
> +       if (group_by_host_adapter(pgp, adapters)) {
> +               condlog(3, "Failed to group paths by adapters");
> +               free_adaptergroup(adapters);
> +               return 0;
> +       }
> +
> +       /* re-order paths in pg to alternate between adapters and host ports
> +        */
> +       if (order_paths_in_pg_by_alt_adapters(pgp, adapters, total_paths)) {
> +               condlog(3, "Failed to re-order paths in pg by adapters "
> +                       "and host ports");
> +               free_adaptergroup(adapters);
> +               /* return failure as original paths are
> +                * removed form pgp
> +                */
> +               return 1;
> +       }
> +
> +       free_adaptergroup(adapters);
> +       return 0;
> +}
> +
>  extern int
>  setup_map (struct multipath * mpp, char * params, int params_size)
>  {
> @@ -100,6 +308,22 @@ setup_map (struct multipath * mpp, char * params, int params_size)
>          */
>         mpp->bestpg = select_path_group(mpp);
>
> +       /* re-order paths in all path groups in an optimized way
> +        * for round-robin path selectors to get maximum throughput.
> +        */
> +       if (!strncmp(mpp->selector, "round-robin", 11)) {
> +               vector_foreach_slot(mpp->pg, pgp, i) {
> +                       if (VECTOR_SIZE(pgp->paths) <= 2)
> +                               continue;
> +                       if (rr_optimize_path_order(pgp)) {
> +                               condlog(2, "cannot re-order paths for "
> +                                       "optimization: %s",
> +                                       mpp->alias);
> +                               return 1;
> +                       }
> +               }
> +       }
> +
>         /*
>          * transform the mp->pg vector of vectors of paths
>          * into a mp->params strings to feed the device-mapper
> diff --git a/libmultipath/discovery.c b/libmultipath/discovery.c
> index 228ffd3..b88edd5 100644
> --- a/libmultipath/discovery.c
> +++ b/libmultipath/discovery.c
> @@ -317,6 +317,93 @@ sysfs_get_tgt_nodename (struct path *pp, char * node)
>         return 1;
>  }
>
> +int sysfs_get_host_adapter_name(struct path *pp, char *adapter_name)
> +{
> +       int proto_id;
> +
> +       if (!pp || !adapter_name)
> +               return 1;
> +
> +       proto_id = pp->sg_id.proto_id;
> +
> +       if (proto_id != SCSI_PROTOCOL_FCP ||
> +           proto_id != SCSI_PROTOCOL_SAS ||
> +           proto_id != SCSI_PROTOCOL_ISCSI ||
> +           proto_id != SCSI_PROTOCOL_SRP) {
> +               return 1;
> +       }
> +       /* iscsi doesn't have adapter info in sysfs
> +        * get ip_address for grouping paths
> +        */
> +       if (pp->sg_id.proto_id == SCSI_PROTOCOL_ISCSI)
> +               return sysfs_get_iscsi_ip_address(pp, adapter_name);
> +
> +       /* fetch adapter pci name for other protocols
> +        */
> +       return sysfs_get_host_pci_name(pp, adapter_name);
> +}
> +
> +int sysfs_get_host_pci_name(struct path *pp, char *pci_name)
> +{
> +       struct udev_device *hostdev, *parent;
> +       char host_name[HOST_NAME_LEN];
> +       char *driver_name, *value;
> +
> +       if (!pp || !pci_name)
> +               return 1;
> +
> +       sprintf(host_name, "host%d", pp->sg_id.host_no);
> +       hostdev = udev_device_new_from_subsystem_sysname(conf->udev,
> +                       "scsi_host", host_name);
> +       if (!hostdev)
> +               return 1;
> +
> +       parent = udev_device_get_parent(hostdev);
> +       while (parent) {
> +               driver_name = udev_device_get_driver(parent);
> +               if (!driver_name) {
> +                       parent = udev_device_get_parent(parent);
> +                       continue;
> +               }
> +               if (!strcmp(driver_name, "pcieport"))
> +                       break;
> +               parent = udev_device_get_parent(parent);
> +       }
> +       if (parent) {
> +               /* pci_device found
> +                */
> +               value = udev_device_get_sysname(parent);
> +
> +               strncpy(pci_name, value, SLOT_NAME_SIZE);
> +               udev_device_unref(hostdev);
> +               return 0;
> +       }
> +       udev_device_unref(hostdev);
> +       return 1;
> +}
> +
> +int sysfs_get_iscsi_ip_address(struct path *pp, char *ip_address)
> +{
> +       struct udev_device *hostdev;
> +       char host_name[HOST_NAME_LEN];
> +       char *value;
> +
> +       sprintf(host_name, "host%d", pp->sg_id.host_no);
> +       hostdev = udev_device_new_from_subsystem_sysname(conf->udev,
> +                       "iscsi_host", host_name);
> +       if (hostdev) {
> +               value = udev_device_get_sysattr_value(hostdev,
> +                               "ip_address");
> +               if (value) {
> +                       strncpy(ip_address, value, SLOT_NAME_SIZE);
> +                       udev_device_unref(hostdev);
> +                       return 0;
> +               } else
> +                       udev_device_unref(hostdev);
> +       }
> +       return 1;
> +}
> +
>  static void
>  sysfs_set_rport_tmo(struct multipath *mpp, struct path *pp)
>  {
> diff --git a/libmultipath/structs.c b/libmultipath/structs.c
> index 049f17d..30d247d 100644
> --- a/libmultipath/structs.c
> +++ b/libmultipath/structs.c
> @@ -18,6 +18,70 @@
>  #include "blacklist.h"
>  #include "prio.h"
>
> +struct adapter_group *
> +alloc_adaptergroup(void)
> +{
> +       struct adapter_group *agp;
> +
> +       agp = (struct adapter_group *)MALLOC(sizeof(struct adapter_group));
> +
> +       if (!agp)
> +               return NULL;
> +
> +       agp->host_groups = vector_alloc();
> +       if (!agp->host_groups) {
> +               FREE(agp);
> +               agp = NULL;
> +       }
> +       return agp;
> +}
> +
> +void free_adaptergroup(vector adapters)
> +{
> +       int i;
> +       struct adapter_group *agp;
> +
> +       vector_foreach_slot(adapters, agp, i) {
> +               free_hostgroup(agp->host_groups);
> +               FREE(agp);
> +       }
> +       vector_free(adapters);
> +}
> +
> +void free_hostgroup(vector hostgroups)
> +{
> +       int i;
> +       struct host_group *hgp;
> +
> +       if (!hostgroups)
> +               return;
> +
> +       vector_foreach_slot(hostgroups, hgp, i) {
> +               vector_free(hgp->paths);
> +               FREE(hgp);
> +       }
> +       vector_free(hostgroups);
> +}
> +
> +struct host_group *
> +alloc_hostgroup(void)
> +{
> +       struct host_group *hgp;
> +
> +       hgp = (struct host_group *)MALLOC(sizeof(struct host_group));
> +
> +       if (!hgp)
> +               return NULL;
> +
> +       hgp->paths = vector_alloc();
> +
> +       if (!hgp->paths) {
> +               FREE(hgp);
> +               hgp = NULL;
> +       }
> +       return hgp;
> +}
> +
>  struct path *
>  alloc_path (void)
>  {
> @@ -242,6 +306,26 @@ store_pathgroup (vector pgvec, struct pathgroup * pgp)
>         return 0;
>  }
>
> +int
> +store_hostgroup(vector hostgroupvec, struct host_group * hgp)
> +{
> +       if (!vector_alloc_slot(hostgroupvec))
> +               return 1;
> +
> +       vector_set_slot(hostgroupvec, hgp);
> +       return 0;
> +}
> +
> +int
> +store_adaptergroup(vector adapters, struct adapter_group * agp)
> +{
> +       if (!vector_alloc_slot(adapters))
> +               return 1;
> +
> +       vector_set_slot(adapters, agp);
> +       return 0;
> +}
> +
>  struct multipath *
>  find_mp_by_minor (vector mpvec, int minor)
>  {
> diff --git a/libmultipath/structs.h b/libmultipath/structs.h
> index 64de06e..772a7d7 100644
> --- a/libmultipath/structs.h
> +++ b/libmultipath/structs.h
> @@ -15,7 +15,8 @@
>  #define BLK_DEV_SIZE           33
>  #define PATH_SIZE              512
>  #define NAME_SIZE              512
> -
> +#define HOST_NAME_LEN          8
> +#define SLOT_NAME_SIZE         32
>
>  #define SCSI_VENDOR_SIZE       9
>  #define SCSI_PRODUCT_SIZE      17
> @@ -251,6 +252,20 @@ struct pathgroup {
>         char * selector;
>  };
>
> +struct adapter_group {
> +       char adapter_name[SLOT_NAME_SIZE];
> +       struct pathgroup *pgp;
> +       int num_hosts;
> +       vector host_groups;
> +       int next_host_index;
> +};
> +
> +struct host_group {
> +       int host_no;
> +       int num_paths;
> +       vector paths;
> +};
> +
>  struct path * alloc_path (void);
>  struct pathgroup * alloc_pathgroup (void);
>  struct multipath * alloc_multipath (void);
> @@ -263,6 +278,14 @@ void free_multipath_attributes (struct multipath *);
>  void drop_multipath (vector mpvec, char * wwid, enum free_path_mode free_paths);
>  void free_multipathvec (vector mpvec, enum free_path_mode free_paths);
>
> +struct adapter_group * alloc_adaptergroup(void);
> +struct host_group * alloc_hostgroup(void);
> +void free_adaptergroup(vector adapters);
> +void free_hostgroup(vector hostgroups);
> +
> +int store_adaptergroup(vector adapters, struct adapter_group *agp);
> +int store_hostgroup(vector hostgroupvec, struct host_group *hgp);
> +
>  int store_path (vector pathvec, struct path * pp);
>  int store_pathgroup (vector pgvec, struct pathgroup * pgp);
> --




More information about the dm-devel mailing list