[dm-devel] [PATCH 15/19] libmultipath: ANA prioritzer: use nvme wrapper library

Benjamin Marzinski bmarzins at redhat.com
Thu Dec 20 22:58:59 UTC 2018


On Wed, Dec 19, 2018 at 12:19:27AM +0100, Martin Wilck wrote:
> Use the previously introduced NVME wrapper library for
> the passthrough commands from the ANA prioritizer. Discard
> code duplicated from nvme-cli from the ana code itself.
> 
> Furthermore, make additional cleanups in the ANA prioritizer:
> 
>  - don't use the same enum for priorities and error codes
>  - use char* arrays for error messages and state names
>  - return -1 prio to libmultipath for all error cases
>  - check if a device is NVMe before trying ioctl

It's not a big deal, but I do wonder what's the point with this.
Presumably the nvme prioritizer was configured for this device. Also,
the first thing we do is get the identify controller data structure via
a NVMe ioctl. This should fail if the device isn't an NVMe device. Doing
this check does give us more informational error messages, but we don't
do this kind of double-checking for the ALUA prioritizer, for instance.

>  - check for overflow in check_ana_state()
>  - get_ana_info(): improve readability with is_anagrpid_const
>  - priorities: PERSISTENT_LOSS state is worse than INACCESSIBLE
>  and CHANGE
> 
> Cc: lijie <lijie34 at huawei.com>
> Signed-off-by: Martin Wilck <mwilck at suse.com>
> ---
>  libmultipath/prioritizers/Makefile |   6 +-
>  libmultipath/prioritizers/ana.c    | 305 ++++++++++-------------------

Is there a reason why this patch doesn't remove ana.h?

Otherwise, this looks good.

-Ben

>  2 files changed, 113 insertions(+), 198 deletions(-)
> 
> diff --git a/libmultipath/prioritizers/Makefile b/libmultipath/prioritizers/Makefile
> index 15afaba3..4d80c20c 100644
> --- a/libmultipath/prioritizers/Makefile
> +++ b/libmultipath/prioritizers/Makefile
> @@ -19,9 +19,13 @@ LIBS = \
>  	libpriordac.so \
>  	libprioweightedpath.so \
>  	libpriopath_latency.so \
> -	libprioana.so \
>  	libpriosysfs.so
>  
> +ifneq ($(call check_file,/usr/include/linux/nvme_ioctl.h),0)
> +	LIBS += libprioana.so
> +	CFLAGS += -I../nvme
> +endif
> +
>  all: $(LIBS)
>  
>  libprioalua.so: alua.o alua_rtpg.o
> diff --git a/libmultipath/prioritizers/ana.c b/libmultipath/prioritizers/ana.c
> index c5aaa5fb..88edb224 100644
> --- a/libmultipath/prioritizers/ana.c
> +++ b/libmultipath/prioritizers/ana.c
> @@ -17,155 +17,91 @@
>  #include <sys/stat.h>
>  #include <sys/types.h>
>  #include <stdbool.h>
> +#include <libudev.h>
>  
>  #include "debug.h"
> +#include "nvme-lib.h"
>  #include "prio.h"
> +#include "util.h"
>  #include "structs.h"
> -#include "ana.h"
>  
>  enum {
> -	ANA_PRIO_OPTIMIZED		= 50,
> -	ANA_PRIO_NONOPTIMIZED		= 10,
> -	ANA_PRIO_INACCESSIBLE		= 5,
> -	ANA_PRIO_PERSISTENT_LOSS	= 1,
> -	ANA_PRIO_CHANGE			= 0,
> -	ANA_PRIO_RESERVED		= 0,
> -	ANA_PRIO_GETCTRL_FAILED		= -1,
> -	ANA_PRIO_NOT_SUPPORTED		= -2,
> -	ANA_PRIO_GETANAS_FAILED		= -3,
> -	ANA_PRIO_GETANALOG_FAILED	= -4,
> -	ANA_PRIO_GETNSID_FAILED		= -5,
> -	ANA_PRIO_GETNS_FAILED		= -6,
> -	ANA_PRIO_NO_MEMORY		= -7,
> -	ANA_PRIO_NO_INFORMATION		= -8,
> +	ANA_ERR_GETCTRL_FAILED		= 1,
> +	ANA_ERR_NOT_NVME,
> +	ANA_ERR_NOT_SUPPORTED,
> +	ANA_ERR_GETANAS_OVERFLOW,
> +	ANA_ERR_GETANAS_NOTFOUND,
> +	ANA_ERR_GETANALOG_FAILED,
> +	ANA_ERR_GETNSID_FAILED,
> +	ANA_ERR_GETNS_FAILED,
> +	ANA_ERR_NO_MEMORY,
> +	ANA_ERR_NO_INFORMATION,
>  };
>  
> -static const char * anas_string[] = {
> +static const char *ana_errmsg[] = {
> +	[ANA_ERR_GETCTRL_FAILED]	= "couldn't get ctrl info",
> +	[ANA_ERR_NOT_NVME]		= "not an NVMe device",
> +	[ANA_ERR_NOT_SUPPORTED]		= "ANA not supported",
> +	[ANA_ERR_GETANAS_OVERFLOW]	= "buffer overflow in ANA log",
> +	[ANA_ERR_GETANAS_NOTFOUND]	= "NSID or ANAGRPID not found",
> +	[ANA_ERR_GETANALOG_FAILED]	= "couldn't get ana log",
> +	[ANA_ERR_GETNSID_FAILED]	= "couldn't get NSID",
> +	[ANA_ERR_GETNS_FAILED]		= "couldn't get namespace info",
> +	[ANA_ERR_NO_MEMORY]		= "out of memory",
> +	[ANA_ERR_NO_INFORMATION]	= "invalid fd",
> +};
> +
> +/* Use the implicit initialization: value 0 is "invalid" */
> +static const int ana_prio [] = {
> +	[NVME_ANA_OPTIMIZED]		= 50,
> +	[NVME_ANA_NONOPTIMIZED]		= 10,
> +	[NVME_ANA_INACCESSIBLE]		=  5,
> +	[NVME_ANA_PERSISTENT_LOSS]	=  1,
> +	[NVME_ANA_CHANGE]		=  5,
> +};
> +
> +static const char *anas_string[] = {
>  	[NVME_ANA_OPTIMIZED]			= "ANA Optimized State",
>  	[NVME_ANA_NONOPTIMIZED]			= "ANA Non-Optimized State",
>  	[NVME_ANA_INACCESSIBLE]			= "ANA Inaccessible State",
>  	[NVME_ANA_PERSISTENT_LOSS]		= "ANA Persistent Loss State",
>  	[NVME_ANA_CHANGE]			= "ANA Change state",
> -	[NVME_ANA_RESERVED]			= "Invalid namespace group state!",
>  };
>  
>  static const char *aas_print_string(int rc)
>  {
>  	rc &= 0xff;
> -
> -	switch(rc) {
> -	case NVME_ANA_OPTIMIZED:
> -	case NVME_ANA_NONOPTIMIZED:
> -	case NVME_ANA_INACCESSIBLE:
> -	case NVME_ANA_PERSISTENT_LOSS:
> -	case NVME_ANA_CHANGE:
> +	if (rc >= 0 && rc < ARRAY_SIZE(anas_string) &&
> +	    anas_string[rc] != NULL)
>  		return anas_string[rc];
> -	default:
> -		return anas_string[NVME_ANA_RESERVED];
> -	}
> -
> -	return anas_string[NVME_ANA_RESERVED];
> -}
> -
> -static int nvme_get_nsid(int fd, unsigned *nsid)
> -{
> -	static struct stat nvme_stat;
> -	int err = fstat(fd, &nvme_stat);
> -	if (err < 0)
> -		return 1;
> -
> -	if (!S_ISBLK(nvme_stat.st_mode)) {
> -		condlog(0, "Error: requesting namespace-id from non-block device\n");
> -		return 1;
> -	}
> -
> -	*nsid = ioctl(fd, NVME_IOCTL_ID);
> -	return 0;
> -}
> -
> -static int nvme_submit_admin_passthru(int fd, struct nvme_passthru_cmd *cmd)
> -{
> -	return ioctl(fd, NVME_IOCTL_ADMIN_CMD, cmd);
> -}
> -
> -int nvme_get_log13(int fd, __u32 nsid, __u8 log_id, __u8 lsp, __u64 lpo,
> -                 __u16 lsi, bool rae, __u32 data_len, void *data)
> -{
> -	struct nvme_admin_cmd cmd = {
> -		.opcode		= nvme_admin_get_log_page,
> -		.nsid		= nsid,
> -		.addr		= (__u64)(uintptr_t) data,
> -		.data_len	= data_len,
> -	};
> -	__u32 numd = (data_len >> 2) - 1;
> -	__u16 numdu = numd >> 16, numdl = numd & 0xffff;
> -
> -	cmd.cdw10 = log_id | (numdl << 16) | (rae ? 1 << 15 : 0);
> -	if (lsp)
> -		cmd.cdw10 |= lsp << 8;
> -
> -	cmd.cdw11 = numdu | (lsi << 16);
> -	cmd.cdw12 = lpo;
> -	cmd.cdw13 = (lpo >> 32);
> -
> -	return nvme_submit_admin_passthru(fd, &cmd);
> -
> -}
> -
> -int nvme_identify13(int fd, __u32 nsid, __u32 cdw10, __u32 cdw11, void *data)
> -{
> -	struct nvme_admin_cmd cmd = {
> -		.opcode		= nvme_admin_identify,
> -		.nsid		= nsid,
> -		.addr		= (__u64)(uintptr_t) data,
> -		.data_len	= NVME_IDENTIFY_DATA_SIZE,
> -		.cdw10		= cdw10,
> -		.cdw11		= cdw11,
> -	};
> -
> -	return nvme_submit_admin_passthru(fd, &cmd);
> -}
> -
> -int nvme_identify(int fd, __u32 nsid, __u32 cdw10, void *data)
> -{
> -	return nvme_identify13(fd, nsid, cdw10, 0, data);
> -}
>  
> -int nvme_identify_ctrl(int fd, void *data)
> -{
> -	return nvme_identify(fd, 0, NVME_ID_CNS_CTRL, data);
> -}
> -
> -int nvme_identify_ns(int fd, __u32 nsid, void *data)
> -{
> -	return nvme_identify(fd, nsid, NVME_ID_CNS_NS, data);
> -}
> -
> -int nvme_ana_log(int fd, void *ana_log, size_t ana_log_len, int rgo)
> -{
> -	__u64 lpo = 0;
> -
> -	return nvme_get_log13(fd, NVME_NSID_ALL, NVME_LOG_ANA, rgo, lpo, 0,
> -			true, ana_log_len, ana_log);
> +	return "invalid ANA state";
>  }
>  
> -static int get_ana_state(__u32 nsid, __u32 anagrpid, void *ana_log)
> +static int get_ana_state(__u32 nsid, __u32 anagrpid, void *ana_log,
> +			 size_t ana_log_len)
>  {
> -	int	rc = ANA_PRIO_GETANAS_FAILED;
>  	void *base = ana_log;
>  	struct nvme_ana_rsp_hdr *hdr = base;
>  	struct nvme_ana_group_desc *ana_desc;
> -	int offset = sizeof(struct nvme_ana_rsp_hdr);
> +	size_t offset = sizeof(struct nvme_ana_rsp_hdr);
>  	__u32 nr_nsids;
>  	size_t nsid_buf_size;
>  	int i, j;
>  
>  	for (i = 0; i < le16_to_cpu(hdr->ngrps); i++) {
>  		ana_desc = base + offset;
> +
> +		offset += sizeof(*ana_desc);
> +		if (offset > ana_log_len)
> +			return -ANA_ERR_GETANAS_OVERFLOW;
> +
>  		nr_nsids = le32_to_cpu(ana_desc->nnsids);
>  		nsid_buf_size = nr_nsids * sizeof(__le32);
>  
> -		offset += sizeof(*ana_desc);
> +		offset += nsid_buf_size;
> +		if (offset > ana_log_len)
> +			return -ANA_ERR_GETANAS_OVERFLOW;
>  
>  		for (j = 0; j < nr_nsids; j++) {
>  			if (nsid == le32_to_cpu(ana_desc->nsids[j]))
> @@ -173,12 +109,10 @@ static int get_ana_state(__u32 nsid, __u32 anagrpid, void *ana_log)
>  		}
>  
>  		if (anagrpid != 0 && anagrpid == le32_to_cpu(ana_desc->grpid))
> -			rc = ana_desc->state;
> +			return ana_desc->state;
>  
> -		offset += nsid_buf_size;
>  	}
> -
> -	return rc;
> +	return -ANA_ERR_GETANAS_NOTFOUND;
>  }
>  
>  int get_ana_info(struct path * pp, unsigned int timeout)
> @@ -189,104 +123,81 @@ int get_ana_info(struct path * pp, unsigned int timeout)
>  	struct nvme_id_ns ns;
>  	void *ana_log;
>  	size_t ana_log_len;
> +	bool is_anagrpid_const;
>  
>  	rc = nvme_identify_ctrl(pp->fd, &ctrl);
> -	if (rc)
> -		return ANA_PRIO_GETCTRL_FAILED;
> +	if (rc < 0) {
> +		log_nvme_errcode(rc, pp->dev, "nvme_identify_ctrl");
> +		return -ANA_ERR_GETCTRL_FAILED;
> +	}
>  
>  	if(!(ctrl.cmic & (1 << 3)))
> -		return ANA_PRIO_NOT_SUPPORTED;
> -
> -	rc = nvme_get_nsid(pp->fd, &nsid);
> -	if (rc)
> -		return ANA_PRIO_GETNSID_FAILED;
> +		return -ANA_ERR_NOT_SUPPORTED;
>  
> -	rc = nvme_identify_ns(pp->fd, nsid, &ns);
> -	if (rc)
> -		return ANA_PRIO_GETNS_FAILED;
> +	nsid = nvme_get_nsid(pp->fd);
> +	if (nsid <= 0) {
> +		log_nvme_errcode(rc, pp->dev, "nvme_get_nsid");
> +		return -ANA_ERR_GETNSID_FAILED;
> +	}
> +	is_anagrpid_const = ctrl.anacap & (1 << 6);
>  
> +	/*
> +	 * Code copied from nvme-cli/nvme.c. We don't need to allocate an
> +	 * [nanagrpid*mnan] array of NSIDs because each NSID can occur at most
> +	 * in one ANA group.
> +	 */
>  	ana_log_len = sizeof(struct nvme_ana_rsp_hdr) +
> -		le32_to_cpu(ctrl.nanagrpid) * sizeof(struct nvme_ana_group_desc);
> -	if (!(ctrl.anacap & (1 << 6)))
> +		le32_to_cpu(ctrl.nanagrpid)
> +		* sizeof(struct nvme_ana_group_desc);
> +
> +	if (is_anagrpid_const) {
> +		rc = nvme_identify_ns(pp->fd, nsid, 0, &ns);
> +		if (rc) {
> +			log_nvme_errcode(rc, pp->dev, "nvme_identify_ns");
> +			return -ANA_ERR_GETNS_FAILED;
> +		}
> +	} else
>  		ana_log_len += le32_to_cpu(ctrl.mnan) * sizeof(__le32);
>  
>  	ana_log = malloc(ana_log_len);
>  	if (!ana_log)
> -		return ANA_PRIO_NO_MEMORY;
> -
> +		return -ANA_ERR_NO_MEMORY;
> +	pthread_cleanup_push(free, ana_log);
>  	rc = nvme_ana_log(pp->fd, ana_log, ana_log_len,
> -		(ctrl.anacap & (1 << 6)) ? NVME_ANA_LOG_RGO : 0);
> +			  is_anagrpid_const ? NVME_ANA_LOG_RGO : 0);
>  	if (rc) {
> -		free(ana_log);
> -		return ANA_PRIO_GETANALOG_FAILED;
> -	}
> -
> -	rc = get_ana_state(nsid, le32_to_cpu(ns.anagrpid), ana_log);
> -	if (rc < 0){
> -		free(ana_log);
> -		return ANA_PRIO_GETANAS_FAILED;
> -	}
> -
> -	free(ana_log);
> -	condlog(3, "%s: ana state = %02x [%s]", pp->dev, rc, aas_print_string(rc));
> -
> +		log_nvme_errcode(rc, pp->dev, "nvme_ana_log");
> +		rc = -ANA_ERR_GETANALOG_FAILED;
> +	} else
> +		rc = get_ana_state(nsid,
> +				   is_anagrpid_const ?
> +				   le32_to_cpu(ns.anagrpid) : 0,
> +				   ana_log, ana_log_len);
> +	pthread_cleanup_pop(1);
> +	if (rc >= 0)
> +		condlog(3, "%s: ana state = %02x [%s]", pp->dev, rc,
> +			aas_print_string(rc));
>  	return rc;
>  }
>  
> -int getprio(struct path * pp, char * args, unsigned int timeout)
> +int getprio(struct path *pp, char *args, unsigned int timeout)
>  {
>  	int rc;
>  
>  	if (pp->fd < 0)
> -		return ANA_PRIO_NO_INFORMATION;
> -
> -	rc = get_ana_info(pp, timeout);
> -	if (rc >= 0) {
> -		rc &= 0x0f;
> -		switch(rc) {
> -		case NVME_ANA_OPTIMIZED:
> -			rc = ANA_PRIO_OPTIMIZED;
> -			break;
> -		case NVME_ANA_NONOPTIMIZED:
> -			rc = ANA_PRIO_NONOPTIMIZED;
> -			break;
> -		case NVME_ANA_INACCESSIBLE:
> -			rc = ANA_PRIO_INACCESSIBLE;
> -			break;
> -		case NVME_ANA_PERSISTENT_LOSS:
> -			rc = ANA_PRIO_PERSISTENT_LOSS;
> -			break;
> -		case NVME_ANA_CHANGE:
> -			rc = ANA_PRIO_CHANGE;
> -			break;
> -		default:
> -			rc = ANA_PRIO_RESERVED;
> -		}
> -	} else {
> -		switch(rc) {
> -		case ANA_PRIO_GETCTRL_FAILED:
> -			condlog(0, "%s: couldn't get ctrl info", pp->dev);
> -			break;
> -		case ANA_PRIO_NOT_SUPPORTED:
> -			condlog(0, "%s: ana not supported", pp->dev);
> -			break;
> -		case ANA_PRIO_GETANAS_FAILED:
> -			condlog(0, "%s: couldn't get ana state", pp->dev);
> -			break;
> -		case ANA_PRIO_GETANALOG_FAILED:
> -			condlog(0, "%s: couldn't get ana log", pp->dev);
> -			break;
> -		case ANA_PRIO_GETNS_FAILED:
> -			condlog(0, "%s: couldn't get namespace", pp->dev);
> -			break;
> -		case ANA_PRIO_GETNSID_FAILED:
> -			condlog(0, "%s: couldn't get namespace id", pp->dev);
> -			break;
> -		case ANA_PRIO_NO_MEMORY:
> -			condlog(0, "%s: couldn't alloc memory", pp->dev);
> -			break;
> -		}
> +		rc = -ANA_ERR_NO_INFORMATION;
> +	else if (udev_device_get_parent_with_subsystem_devtype(pp->udev,
> +							       "nvme", NULL)
> +		 == NULL)
> +		rc = -ANA_ERR_NOT_NVME;
> +	else {
> +		rc = get_ana_info(pp, timeout);
> +		if (rc >= 0 && rc < ARRAY_SIZE(ana_prio) && ana_prio[rc] != 0)
> +			return ana_prio[rc];
>  	}
> -	return rc;
> +	if (rc < 0 && -rc < ARRAY_SIZE(ana_errmsg))
> +		condlog(2, "%s: ANA error: %s", pp->dev, ana_errmsg[-rc]);
> +	else
> +		condlog(1, "%s: invalid ANA rc code %d", pp->dev, rc);
> +	return -1;
>  }
> -
> -- 
> 2.19.2




More information about the dm-devel mailing list