[dm-devel] [PATCH]prioritizers/path_latency: Fix failure of sg_read to a nvme device
Yang Feng
philip.yang at huawei.com
Fri Jul 14 01:41:04 UTC 2017
Closed and traced by another patch.
-Yang
On 2017/6/28 12:42, Yang Feng wrote:
> Fixed failure of sg_read to a nvme device:
> 1. Send sg_read command to a nvme device failed,
> because the block size in user-mode is 4096 bytes,
> not equal to the bolck size in kernel-mode, who is
> 512 bytes. According to the func nvme_trans_io of
> scsi.c in the dir "drivers/nvme/host". The code
> fragmentS as follows:
> /* If block count and actual data buffer size dont match, error out */
> if (xfer_bytes != (cdb_info.xfer_len << ns->lba_shift)) {
> res = -EINVAL;
> goto out;
> }
> 2. And the SCSI-to-NVMe translations will be removed in
> the patch "nvme: Remove SCSI translations" in the linux-nvme,
> so using the nvme_read command is a good idea.
>
> Signed-off-by: Yang Feng <philip.yang at huawei.com>
> ---
> libmultipath/prioritizers/path_latency.c | 78 +++++++++++++++++++++++++++-----
> 1 file changed, 66 insertions(+), 12 deletions(-)
>
> diff --git a/libmultipath/prioritizers/path_latency.c b/libmultipath/prioritizers/path_latency.c
> index 34b734b..8f633e0 100644
> --- a/libmultipath/prioritizers/path_latency.c
> +++ b/libmultipath/prioritizers/path_latency.c
> @@ -23,14 +23,32 @@
> #include <math.h>
> #include <ctype.h>
> #include <time.h>
> -
> #include "debug.h"
> #include "prio.h"
> #include "structs.h"
> +#include <linux/types.h>
> +#include <sys/ioctl.h>
> #include "../checkers/libsg.h"
>
> #define pp_pl_log(prio, fmt, args...) condlog(prio, "path_latency prio: " fmt, ##args)
>
> +struct nvme_user_io {
> + __u8 opcode;
> + __u8 flags;
> + __u16 control;
> + __u16 nblocks;
> + __u16 rsvd;
> + __u64 metadata;
> + __u64 addr;
> + __u64 slba;
> + __u32 dsmgmt;
> + __u32 reftag;
> + __u16 apptag;
> + __u16 appmask;
> +};
> +
> +#define NVME_IOCTL_SUBMIT_IO _IOW('N', 0x42, struct nvme_user_io)
> +
> #define MAX_IO_NUM 200
> #define MIN_IO_NUM 2
>
> @@ -51,19 +69,55 @@ static long long path_latency[MAX_IO_NUM];
>
> static inline long long timeval_to_us(const struct timespec *tv)
> {
> - return ((long long) tv->tv_sec * USEC_PER_SEC) + (tv->tv_nsec / NSEC_PER_USEC);
> + return ((long long) tv->tv_sec * USEC_PER_SEC) + (tv->tv_nsec / NSEC_PER_USEC);
> +}
> +
> +int nvme_io(int fd, __u8 opcode, __u64 slba, __u16 nblocks, __u16 control,
> + __u32 dsmgmt, __u32 reftag, __u16 apptag, __u16 appmask, void *data, void *metadata)
> +{
> + struct nvme_user_io io = {
> + .opcode = opcode,
> + .flags = 0,
> + .control = control,
> + .nblocks = nblocks,
> + .rsvd = 0,
> + .metadata = (__u64)(uintptr_t) metadata,
> + .addr = (__u64)(uintptr_t) data,
> + .slba = slba,
> + .dsmgmt = dsmgmt,
> + .reftag = reftag,
> + .appmask = apptag,
> + .apptag = appmask,
> + };
> +
> + return ioctl(fd, NVME_IOCTL_SUBMIT_IO, &io);
> +}
> +
> +int nvme_read(int fd, __u64 slba, __u16 nblocks, __u16 control, __u32 dsmgmt,
> + __u32 reftag, __u16 apptag, __u16 appmask, void *data, void *metadata)
> +{
> + return nvme_io(fd, 0x2, slba, nblocks, control, dsmgmt,
> + reftag, apptag, appmask, data, metadata);
> }
>
> -static int do_readsector0(int fd, unsigned int timeout)
> +static int do_readsector0(struct path *pp, unsigned int timeout)
> {
> - unsigned char buf[4096];
> - unsigned char sbuf[SENSE_BUFF_LEN];
> - int ret;
> + unsigned char buf[4096];
> + unsigned char mbuf[512];
> + unsigned char sbuf[SENSE_BUFF_LEN];
>
> - ret = sg_read(fd, &buf[0], 4096, &sbuf[0],
> - SENSE_BUFF_LEN, timeout);
> + if (!strncmp(pp->dev, "nvme", 4))
> + {
> + if (nvme_read(pp->fd, 0, 1, 0, 0, 0, 0, 0, buf, mbuf) < 0)
> + return 0;
> + }
> + else
> + {
> + if (sg_read(pp->fd, &buf[0], 4096, &sbuf[0],SENSE_BUFF_LEN, timeout) == 2)
> + return 0;
> + }
>
> - return ret;
> + return 1;
> }
>
> int check_args_valid(int io_num, int base_num)
> @@ -218,9 +272,9 @@ int getprio (struct path *pp, char *args, unsigned int timeout)
> (void)clock_gettime(CLOCK_MONOTONIC, &tv);
> before = timeval_to_us(&tv);
>
> - if (do_readsector0(pp->fd, timeout) == 2)
> + if (do_readsector0(pp, timeout) == 0)
> {
> - pp_pl_log(0, "%s: path down", pp->dev);
> + pp_pl_log(0, "%s: send read sector0 command fail", pp->dev);
> return -1;
> }
>
> @@ -246,7 +300,7 @@ int getprio (struct path *pp, char *args, unsigned int timeout)
> Warn the user if latency_interval is smaller than (2 * standard_deviation), or equal */
> standard_deviation = calc_standard_deviation(path_latency, index, avglatency);
> latency_interval = calc_latency_interval(avglatency, MAX_AVG_LATENCY, MIN_AVG_LATENCY, base_num);
> - if ((latency_interval!= 0)
> + if ((latency_interval != 0)
> && (latency_interval <= (2 * standard_deviation)))
> pp_pl_log(3, "%s: latency interval (%lld) according to average latency (%lld us) is smaller than "
> "2 * standard deviation (%lld us), or equal, args base_num (%d) needs to be set bigger value",
>
More information about the dm-devel
mailing list