[dm-devel] [PATCH 1/2] multipath-tools/libmultipath: Support for the native NVMe Ioctl command.
Yang Feng
philip.yang at huawei.com
Thu Jul 13 07:51:27 UTC 2017
1. The SCSI-to-NVMe translations have been removed in the patch "nvme:
Remove SCSI translations" in the linux-nvme, so the native NVMe Ioctl
command should be supported in the multipath-tools.
2. In the prioritizers/path_latency.c, modify the func do_readsector0():
send a native NVMe Read Ioctl command to the nvme device, and send a SG
Read Ioctl command to the scsi device.
3. In the checkers, delete the file tur.c and create the new file ping.c:
ping.c can support the native NVMe Keep Alive Ioctl command to the nvme
device, and can support the SG TUR Ioctl command to the scsi device.
Signed-off-by: Yang Feng <philip.yang at huawei.com>
---
libmultipath/checkers.c | 7 +
libmultipath/checkers.h | 6 +-
libmultipath/checkers/Makefile | 6 +-
libmultipath/checkers/emc_clariion.c | 4 +-
libmultipath/checkers/libsg.c | 94 -------
libmultipath/checkers/libsg.h | 9 -
libmultipath/checkers/ping.c | 453 +++++++++++++++++++++++++++++++
libmultipath/checkers/readsector0.c | 4 +-
libmultipath/checkers/tur.c | 427 -----------------------------
libmultipath/checkers/tur.h | 8 -
libmultipath/defaults.h | 2 +-
libmultipath/discovery.c | 1 +
libmultipath/hwtable.c | 2 +-
libmultipath/libnvme.c | 130 +++++++++
libmultipath/libnvme.h | 10 +
libmultipath/libsg.c | 113 ++++++++
libmultipath/libsg.h | 13 +
libmultipath/prioritizers/Makefile | 2 +-
libmultipath/prioritizers/path_latency.c | 58 +---
libmultipath/propsel.c | 2 +-
multipath/multipath.conf.5 | 4 +-
21 files changed, 754 insertions(+), 601 deletions(-)
delete mode 100644 libmultipath/checkers/libsg.c
delete mode 100644 libmultipath/checkers/libsg.h
create mode 100644 libmultipath/checkers/ping.c
delete mode 100644 libmultipath/checkers/tur.c
delete mode 100644 libmultipath/checkers/tur.h
create mode 100644 libmultipath/libnvme.c
create mode 100644 libmultipath/libnvme.h
create mode 100644 libmultipath/libsg.c
create mode 100644 libmultipath/libsg.h
diff --git a/libmultipath/checkers.c b/libmultipath/checkers.c
index 05e024f..00fbd6e 100644
--- a/libmultipath/checkers.c
+++ b/libmultipath/checkers.c
@@ -162,6 +162,13 @@ void checker_set_fd (struct checker * c, int fd)
c->fd = fd;
}
+void checker_set_dev(struct checker *c, char *dev)
+{
+ if (!c)
+ return;
+ strncpy(c->dev, dev, strlen(dev)+1);
+}
+
void checker_set_sync (struct checker * c)
{
if (!c)
diff --git a/libmultipath/checkers.h b/libmultipath/checkers.h
index 1d225de..506dd4c 100644
--- a/libmultipath/checkers.h
+++ b/libmultipath/checkers.h
@@ -79,7 +79,7 @@ enum path_check_state {
};
#define DIRECTIO "directio"
-#define TUR "tur"
+#define PING "ping"
#define HP_SW "hp_sw"
#define RDAC "rdac"
#define EMC_CLARIION "emc_clariion"
@@ -97,6 +97,8 @@ enum path_check_state {
#define CHECKER_DEV_LEN 256
#define LIB_CHECKER_NAMELEN 256
+#define FILE_NAME_SIZE 256
+
struct checker {
struct list_head node;
void *handle;
@@ -107,6 +109,7 @@ struct checker {
int disable;
char name[CHECKER_NAME_LEN];
char message[CHECKER_MSG_LEN]; /* comm with callers */
+ char dev[FILE_NAME_SIZE];
void * context; /* store for persistent data */
void ** mpcontext; /* store for persistent data shared
multipath-wide. Use MALLOC if
@@ -132,6 +135,7 @@ void checker_reset (struct checker *);
void checker_set_sync (struct checker *);
void checker_set_async (struct checker *);
void checker_set_fd (struct checker *, int);
+void checker_set_dev(struct checker *c, char *dev);
void checker_enable (struct checker *);
void checker_disable (struct checker *);
void checker_repair (struct checker *);
diff --git a/libmultipath/checkers/Makefile b/libmultipath/checkers/Makefile
index bce6b8b..3ab04ef 100644
--- a/libmultipath/checkers/Makefile
+++ b/libmultipath/checkers/Makefile
@@ -9,7 +9,7 @@ CFLAGS += $(LIB_CFLAGS) -I..
LIBS= \
libcheckcciss_tur.so \
libcheckreadsector0.so \
- libchecktur.so \
+ libcheckping.so \
libcheckdirectio.so \
libcheckemc_clariion.so \
libcheckhp_sw.so \
@@ -24,10 +24,10 @@ all: $(LIBS)
libcheckrbd.so: rbd.o
$(CC) $(LDFLAGS) $(SHARED_FLAGS) -o $@ $^ -lrados -ludev
-libcheckdirectio.so: libsg.o directio.o
+libcheckdirectio.so: ../libsg.o ../libnvme.o directio.o
$(CC) $(LDFLAGS) $(SHARED_FLAGS) -o $@ $^ -laio
-libcheck%.so: libsg.o %.o
+libcheck%.so: ../libsg.o ../libnvme.o %.o
$(CC) $(LDFLAGS) $(SHARED_FLAGS) -o $@ $^
install:
diff --git a/libmultipath/checkers/emc_clariion.c b/libmultipath/checkers/emc_clariion.c
index 9c1ffed..12c1e3e 100644
--- a/libmultipath/checkers/emc_clariion.c
+++ b/libmultipath/checkers/emc_clariion.c
@@ -12,7 +12,7 @@
#include <errno.h>
#include "../libmultipath/sg_include.h"
-#include "libsg.h"
+#include "../libmultipath/libsg.h"
#include "checkers.h"
#include "debug.h"
#include "memory.h"
@@ -21,6 +21,8 @@
#define INQUIRY_CMDLEN 6
#define HEAVY_CHECK_COUNT 10
+#define SENSE_BUFF_LEN 32
+
/*
* Mechanism to track CLARiiON inactive snapshot LUs.
* This is done so that we can fail passive paths
diff --git a/libmultipath/checkers/libsg.c b/libmultipath/checkers/libsg.c
deleted file mode 100644
index 958ea92..0000000
--- a/libmultipath/checkers/libsg.c
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Copyright (c) 2004, 2005 Christophe Varoqui
- */
-#include <string.h>
-#include <sys/ioctl.h>
-#include <errno.h>
-#include <sys/stat.h>
-
-#include "checkers.h"
-#include "libsg.h"
-#include "../libmultipath/sg_include.h"
-
-int
-sg_read (int sg_fd, unsigned char * buff, int buff_len,
- unsigned char * sense, int sense_len, unsigned int timeout)
-{
- /* defaults */
- int blocks;
- long long start_block = 0;
- int bs = 512;
- int cdbsz = 10;
-
- unsigned char rdCmd[cdbsz];
- unsigned char *sbb = sense;
- struct sg_io_hdr io_hdr;
- int res;
- int rd_opcode[] = {0x8, 0x28, 0xa8, 0x88};
- int sz_ind;
- struct stat filestatus;
- int retry_count = 3;
-
- if (fstat(sg_fd, &filestatus) != 0)
- return PATH_DOWN;
- bs = (filestatus.st_blksize > 4096)? 4096: filestatus.st_blksize;
- blocks = buff_len / bs;
- memset(rdCmd, 0, cdbsz);
- sz_ind = 1;
- rdCmd[0] = rd_opcode[sz_ind];
- rdCmd[2] = (unsigned char)((start_block >> 24) & 0xff);
- rdCmd[3] = (unsigned char)((start_block >> 16) & 0xff);
- rdCmd[4] = (unsigned char)((start_block >> 8) & 0xff);
- rdCmd[5] = (unsigned char)(start_block & 0xff);
- rdCmd[7] = (unsigned char)((blocks >> 8) & 0xff);
- rdCmd[8] = (unsigned char)(blocks & 0xff);
-
- memset(&io_hdr, 0, sizeof(struct sg_io_hdr));
- io_hdr.interface_id = 'S';
- io_hdr.cmd_len = cdbsz;
- io_hdr.cmdp = rdCmd;
- io_hdr.dxfer_direction = SG_DXFER_FROM_DEV;
- io_hdr.dxfer_len = bs * blocks;
- io_hdr.dxferp = buff;
- io_hdr.mx_sb_len = sense_len;
- io_hdr.sbp = sense;
- io_hdr.timeout = timeout * 1000;
- io_hdr.pack_id = (int)start_block;
-
-retry:
- memset(sense, 0, sense_len);
- while (((res = ioctl(sg_fd, SG_IO, &io_hdr)) < 0) && (EINTR == errno));
-
- if (res < 0) {
- if (ENOMEM == errno) {
- return PATH_UP;
- }
- return PATH_DOWN;
- }
-
- if ((0 == io_hdr.status) &&
- (0 == io_hdr.host_status) &&
- (0 == io_hdr.driver_status)) {
- return PATH_UP;
- } else {
- int key = 0;
-
- if (io_hdr.sb_len_wr > 3) {
- if (sbb[0] == 0x72 || sbb[0] == 0x73)
- key = sbb[1] & 0x0f;
- else if (io_hdr.sb_len_wr > 13 &&
- ((sbb[0] & 0x7f) == 0x70 ||
- (sbb[0] & 0x7f) == 0x71))
- key = sbb[2] & 0x0f;
- }
-
- /*
- * Retry if UNIT_ATTENTION check condition.
- */
- if (key == 0x6) {
- if (--retry_count)
- goto retry;
- }
- return PATH_DOWN;
- }
-}
diff --git a/libmultipath/checkers/libsg.h b/libmultipath/checkers/libsg.h
deleted file mode 100644
index 3994f45..0000000
--- a/libmultipath/checkers/libsg.h
+++ /dev/null
@@ -1,9 +0,0 @@
-#ifndef _LIBSG_H
-#define _LIBSG_H
-
-#define SENSE_BUFF_LEN 32
-
-int sg_read (int sg_fd, unsigned char * buff, int buff_len,
- unsigned char * sense, int sense_len, unsigned int timeout);
-
-#endif /* _LIBSG_H */
diff --git a/libmultipath/checkers/ping.c b/libmultipath/checkers/ping.c
new file mode 100644
index 0000000..3a87571
--- /dev/null
+++ b/libmultipath/checkers/ping.c
@@ -0,0 +1,453 @@
+/*
+ * Some code borrowed from sg-utils and
+ * NVM-Express command line utility,
+ * including using of a TUR command and
+ * a Keep Alive command.
+ *
+ * Copyright (c) 2004 Christophe Varoqui
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/sysmacros.h>
+#include <errno.h>
+#include <sys/time.h>
+#include <pthread.h>
+
+#include "checkers.h"
+
+#include "../libmultipath/debug.h"
+#include "../libmultipath/sg_include.h"
+#include "../libmultipath/util.h"
+#include "../libmultipath/time-util.h"
+#include "../libmultipath/libsg.h"
+#include "../libmultipath/libnvme.h"
+
+#define SENSE_BUFF_LEN 32
+
+#define MSG_PING_UP "ping checker reports path is up"
+#define MSG_PING_DOWN "ping checker reports path is down"
+#define MSG_PING_GHOST "ping checker reports path is in standby state"
+#define MSG_PING_RUNNING "ping checker still running"
+#define MSG_PING_TIMEOUT "ping checker timed out"
+#define MSG_PING_FAILED "ping checker failed to initialize"
+
+struct ping_checker_context {
+ dev_t devt;
+ int state;
+ int running;
+ int fd;
+ char dev[FILE_NAME_SIZE];
+ unsigned int timeout;
+ time_t time;
+ pthread_t thread;
+ pthread_mutex_t lock;
+ pthread_cond_t active;
+ pthread_spinlock_t hldr_lock;
+ int holders;
+ char message[CHECKER_MSG_LEN];
+};
+
+static const char *ping_devt(char *devt_buf, int size,
+ struct ping_checker_context *ct)
+{
+ dev_t devt;
+
+ pthread_mutex_lock(&ct->lock);
+ devt = ct->devt;
+ pthread_mutex_unlock(&ct->lock);
+
+ snprintf(devt_buf, size, "%d:%d", major(devt), minor(devt));
+ return devt_buf;
+}
+
+int libcheck_init (struct checker * c)
+{
+ struct ping_checker_context *ct;
+ pthread_mutexattr_t attr;
+
+ ct = malloc(sizeof(struct ping_checker_context));
+ if (!ct)
+ return 1;
+ memset(ct, 0, sizeof(struct ping_checker_context));
+
+ ct->state = PATH_UNCHECKED;
+ ct->fd = -1;
+ ct->holders = 1;
+ memset(ct->dev, 0, sizeof(ct->dev));
+ pthread_cond_init_mono(&ct->active);
+ pthread_mutexattr_init(&attr);
+ pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE);
+ pthread_mutex_init(&ct->lock, &attr);
+ pthread_mutexattr_destroy(&attr);
+ pthread_spin_init(&ct->hldr_lock, PTHREAD_PROCESS_PRIVATE);
+ c->context = ct;
+
+ return 0;
+}
+
+static void cleanup_context(struct ping_checker_context *ct)
+{
+ pthread_mutex_destroy(&ct->lock);
+ pthread_cond_destroy(&ct->active);
+ pthread_spin_destroy(&ct->hldr_lock);
+ free(ct);
+}
+
+void libcheck_free (struct checker * c)
+{
+ if (c->context) {
+ struct ping_checker_context *ct = c->context;
+ int holders;
+ pthread_t thread;
+
+ pthread_spin_lock(&ct->hldr_lock);
+ ct->holders--;
+ holders = ct->holders;
+ thread = ct->thread;
+ pthread_spin_unlock(&ct->hldr_lock);
+ if (holders)
+ pthread_cancel(thread);
+ else
+ cleanup_context(ct);
+ c->context = NULL;
+ }
+ return;
+}
+
+void libcheck_repair (struct checker * c)
+{
+ return;
+}
+
+#define PING_MSG(fmt, args...) \
+ do { \
+ char msg[CHECKER_MSG_LEN]; \
+ \
+ snprintf(msg, sizeof(msg), fmt, ##args); \
+ copy_message(cb_arg, msg); \
+ } while (0)
+
+static int
+tur_check(int fd, unsigned int timeout,
+ void (*copy_message)(void *, const char *), void *cb_arg)
+{
+ struct sg_io_hdr io_hdr;
+ unsigned char sense_buffer[SENSE_BUFF_LEN];
+ int retry_tur = 5;
+
+retry:
+ if (sg_tur(fd, &io_hdr, sense_buffer,
+ sizeof(sense_buffer), timeout) < 0) {
+ PING_MSG(MSG_PING_DOWN);
+ return PATH_DOWN;
+ }
+
+ if ((io_hdr.status & 0x7e) == 0x18) {
+ /*
+ * SCSI-3 arrays might return
+ * reservation conflict on TUR
+ */
+ PING_MSG(MSG_PING_UP);
+ return PATH_UP;
+ }
+ if (io_hdr.info & SG_INFO_OK_MASK) {
+ int key = 0, asc, ascq;
+
+ switch (io_hdr.host_status) {
+ case DID_OK:
+ case DID_NO_CONNECT:
+ case DID_BAD_TARGET:
+ case DID_ABORT:
+ case DID_TRANSPORT_FAILFAST:
+ break;
+ default:
+ /* Driver error, retry */
+ if (--retry_tur)
+ goto retry;
+ break;
+ }
+ if (io_hdr.sb_len_wr > 3) {
+ if (io_hdr.sbp[0] == 0x72 || io_hdr.sbp[0] == 0x73) {
+ key = io_hdr.sbp[1] & 0x0f;
+ asc = io_hdr.sbp[2];
+ ascq = io_hdr.sbp[3];
+ } else if (io_hdr.sb_len_wr > 13 &&
+ ((io_hdr.sbp[0] & 0x7f) == 0x70 ||
+ (io_hdr.sbp[0] & 0x7f) == 0x71)) {
+ key = io_hdr.sbp[2] & 0x0f;
+ asc = io_hdr.sbp[12];
+ ascq = io_hdr.sbp[13];
+ }
+ }
+ if (key == 0x6) {
+ /* Unit Attention, retry */
+ if (--retry_tur)
+ goto retry;
+ }
+ else if (key == 0x2) {
+ /* Not Ready */
+ /* Note: Other ALUA states are either UP or DOWN */
+ if( asc == 0x04 && ascq == 0x0b){
+ /*
+ * LOGICAL UNIT NOT ACCESSIBLE,
+ * TARGET PORT IN STANDBY STATE
+ */
+ PING_MSG(MSG_PING_GHOST);
+ return PATH_GHOST;
+ }
+ }
+ PING_MSG(MSG_PING_DOWN);
+ return PATH_DOWN;
+ }
+ PING_MSG(MSG_PING_UP);
+ return PATH_UP;
+}
+
+static int
+keep_alive_check(int fd, unsigned int timeout,
+ void (*copy_message)(void *, const char *), void *cb_arg)
+{
+ int err;
+
+ err = nvme_keep_alive(fd, timeout);
+ if (err == 0) {
+ PING_MSG(MSG_PING_UP);
+ return PATH_UP;
+ }
+
+ PING_MSG(MSG_PING_DOWN);
+ return PATH_DOWN;
+}
+
+static int
+ping_check(int fd, char *dev, unsigned int timeout,
+ void (*copy_message)(void *, const char *), void *cb_arg)
+{
+ if (!strncmp(dev, "nvme", 4))
+ {
+ return keep_alive_check(fd, timeout, copy_message, cb_arg);
+ }
+ else
+ {
+ return tur_check(fd, timeout, copy_message, cb_arg);
+ }
+}
+
+#define ping_thread_cleanup_push(ct) pthread_cleanup_push(cleanup_func, ct)
+#define ping_thread_cleanup_pop(ct) pthread_cleanup_pop(1)
+
+static void cleanup_func(void *data)
+{
+ int holders;
+ struct ping_checker_context *ct = data;
+ pthread_spin_lock(&ct->hldr_lock);
+ ct->holders--;
+ holders = ct->holders;
+ ct->thread = 0;
+ pthread_spin_unlock(&ct->hldr_lock);
+ if (!holders)
+ cleanup_context(ct);
+}
+
+static int ping_running(struct ping_checker_context *ct)
+{
+ pthread_t thread;
+
+ pthread_spin_lock(&ct->hldr_lock);
+ thread = ct->thread;
+ pthread_spin_unlock(&ct->hldr_lock);
+
+ return thread != 0;
+}
+
+static void copy_msg_to_tcc(void *ct_p, const char *msg)
+{
+ struct ping_checker_context *ct = ct_p;
+
+ pthread_mutex_lock(&ct->lock);
+ strlcpy(ct->message, msg, sizeof(ct->message));
+ pthread_mutex_unlock(&ct->lock);
+}
+
+static void *ping_thread(void *ctx)
+{
+ struct ping_checker_context *ct = ctx;
+ int state;
+ char devt[32];
+
+ condlog(3, "%s: ping checker starting up",
+ ping_devt(devt, sizeof(devt), ct));
+
+ /* This thread can be canceled, so setup clean up */
+ ping_thread_cleanup_push(ct);
+
+ /* PING checker start up */
+ pthread_mutex_lock(&ct->lock);
+ ct->state = PATH_PENDING;
+ ct->message[0] = '\0';
+ pthread_mutex_unlock(&ct->lock);
+ state = ping_check(ct->fd, ct->dev, ct->timeout, copy_msg_to_tcc, ct->message);
+ pthread_testcancel();
+
+ /* PING checker done */
+ pthread_mutex_lock(&ct->lock);
+ ct->state = state;
+ pthread_cond_signal(&ct->active);
+ pthread_mutex_unlock(&ct->lock);
+
+ condlog(3, "%s: ping checker finished, state %s",
+ ping_devt(devt, sizeof(devt), ct), checker_state_name(state));
+ ping_thread_cleanup_pop(ct);
+
+ return ((void *)0);
+}
+
+
+static void ping_timeout(struct timespec *tsp)
+{
+ clock_gettime(CLOCK_MONOTONIC, tsp);
+ tsp->tv_nsec += 1000 * 1000; /* 1 millisecond */
+ normalize_timespec(tsp);
+}
+
+static void ping_set_async_timeout(struct checker *c)
+{
+ struct ping_checker_context *ct = c->context;
+ struct timespec now;
+
+ clock_gettime(CLOCK_MONOTONIC, &now);
+ ct->time = now.tv_sec + c->timeout;
+}
+
+static int ping_check_async_timeout(struct checker *c)
+{
+ struct ping_checker_context *ct = c->context;
+ struct timespec now;
+
+ clock_gettime(CLOCK_MONOTONIC, &now);
+ return (now.tv_sec > ct->time);
+}
+
+static void copy_msg_to_checker(void *c_p, const char *msg)
+{
+ struct checker *c = c_p;
+
+ strlcpy(c->message, msg, sizeof(c->message));
+}
+
+int libcheck_check(struct checker * c)
+{
+ struct ping_checker_context *ct = c->context;
+ struct timespec tsp;
+ struct stat sb;
+ pthread_attr_t attr;
+ int ping_status, r;
+ char devt[32];
+
+
+ if (!ct)
+ return PATH_UNCHECKED;
+
+ if (fstat(c->fd, &sb) == 0) {
+ pthread_mutex_lock(&ct->lock);
+ ct->devt = sb.st_rdev;
+ pthread_mutex_unlock(&ct->lock);
+ }
+
+ if (c->sync)
+ return ping_check(c->fd, c->dev, c->timeout, copy_msg_to_checker, c);
+
+ /*
+ * Async mode
+ */
+ r = pthread_mutex_lock(&ct->lock);
+ if (r != 0) {
+ condlog(2, "%s: ping mutex lock failed with %d",
+ ping_devt(devt, sizeof(devt), ct), r);
+ MSG(c, MSG_PING_FAILED);
+ return PATH_WILD;
+ }
+
+ if (ct->running) {
+ /*
+ * Check if PING checker is still running. Hold hldr_lock
+ * around the pthread_cancel() call to avoid that
+ * pthread_cancel() gets called after the (detached) PING
+ * thread has exited.
+ */
+ pthread_spin_lock(&ct->hldr_lock);
+ if (ct->thread) {
+ if (ping_check_async_timeout(c)) {
+ condlog(3, "%s: ping checker timeout",
+ ping_devt(devt, sizeof(devt), ct));
+ pthread_cancel(ct->thread);
+ ct->running = 0;
+ MSG(c, MSG_PING_TIMEOUT);
+ ping_status = PATH_TIMEOUT;
+ } else {
+ condlog(3, "%s: ping checker not finished",
+ ping_devt(devt, sizeof(devt), ct));
+ ct->running++;
+ ping_status = PATH_PENDING;
+ }
+ } else {
+ /* PING checker done */
+ ct->running = 0;
+ ping_status = ct->state;
+ strlcpy(c->message, ct->message, sizeof(c->message));
+ }
+ pthread_spin_unlock(&ct->hldr_lock);
+ pthread_mutex_unlock(&ct->lock);
+ } else {
+ if (ping_running(ct)) {
+ /* pthread cancel failed. continue in sync mode */
+ pthread_mutex_unlock(&ct->lock);
+ condlog(3, "%s: ping thread not responding",
+ ping_devt(devt, sizeof(devt), ct));
+ return PATH_TIMEOUT;
+ }
+ /* Start new PING checker */
+ ct->state = PATH_UNCHECKED;
+ ct->fd = c->fd;
+ strncpy(ct->dev, c->dev, strlen(c->dev)+1);
+ ct->timeout = c->timeout;
+ pthread_spin_lock(&ct->hldr_lock);
+ ct->holders++;
+ pthread_spin_unlock(&ct->hldr_lock);
+ ping_set_async_timeout(c);
+ setup_thread_attr(&attr, 32 * 1024, 1);
+ r = pthread_create(&ct->thread, &attr, ping_thread, ct);
+ pthread_attr_destroy(&attr);
+ if (r) {
+ pthread_spin_lock(&ct->hldr_lock);
+ ct->holders--;
+ pthread_spin_unlock(&ct->hldr_lock);
+ pthread_mutex_unlock(&ct->lock);
+ ct->thread = 0;
+ condlog(3, "%s: failed to start ping thread, using"
+ " sync mode", ping_devt(devt, sizeof(devt), ct));
+ return ping_check(c->fd, c->dev, c->timeout,
+ copy_msg_to_checker, c);
+ }
+ ping_timeout(&tsp);
+ r = pthread_cond_timedwait(&ct->active, &ct->lock, &tsp);
+ ping_status = ct->state;
+ strlcpy(c->message, ct->message, sizeof(c->message));
+ pthread_mutex_unlock(&ct->lock);
+ if (ping_running(ct) &&
+ (ping_status == PATH_PENDING || ping_status == PATH_UNCHECKED)) {
+ condlog(3, "%s: ping checker still running",
+ ping_devt(devt, sizeof(devt), ct));
+ ct->running = 1;
+ ping_status = PATH_PENDING;
+ }
+ }
+
+ return ping_status;
+}
diff --git a/libmultipath/checkers/readsector0.c b/libmultipath/checkers/readsector0.c
index 8fccb46..e485810 100644
--- a/libmultipath/checkers/readsector0.c
+++ b/libmultipath/checkers/readsector0.c
@@ -4,11 +4,13 @@
#include <stdio.h>
#include "checkers.h"
-#include "libsg.h"
+#include "../libmultipath/libsg.h"
#define MSG_READSECTOR0_UP "readsector0 checker reports path is up"
#define MSG_READSECTOR0_DOWN "readsector0 checker reports path is down"
+#define SENSE_BUFF_LEN 32
+
struct readsector0_checker_context {
void * dummy;
};
diff --git a/libmultipath/checkers/tur.c b/libmultipath/checkers/tur.c
deleted file mode 100644
index b4a5cb2..0000000
--- a/libmultipath/checkers/tur.c
+++ /dev/null
@@ -1,427 +0,0 @@
-/*
- * Some code borrowed from sg-utils.
- *
- * Copyright (c) 2004 Christophe Varoqui
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/ioctl.h>
-#include <sys/sysmacros.h>
-#include <errno.h>
-#include <sys/time.h>
-#include <pthread.h>
-
-#include "checkers.h"
-
-#include "../libmultipath/debug.h"
-#include "../libmultipath/sg_include.h"
-#include "../libmultipath/util.h"
-#include "../libmultipath/time-util.h"
-#include "../libmultipath/util.h"
-
-#define TUR_CMD_LEN 6
-#define HEAVY_CHECK_COUNT 10
-
-#define MSG_TUR_UP "tur checker reports path is up"
-#define MSG_TUR_DOWN "tur checker reports path is down"
-#define MSG_TUR_GHOST "tur checker reports path is in standby state"
-#define MSG_TUR_RUNNING "tur checker still running"
-#define MSG_TUR_TIMEOUT "tur checker timed out"
-#define MSG_TUR_FAILED "tur checker failed to initialize"
-
-struct tur_checker_context {
- dev_t devt;
- int state;
- int running;
- int fd;
- unsigned int timeout;
- time_t time;
- pthread_t thread;
- pthread_mutex_t lock;
- pthread_cond_t active;
- pthread_spinlock_t hldr_lock;
- int holders;
- char message[CHECKER_MSG_LEN];
-};
-
-static const char *tur_devt(char *devt_buf, int size,
- struct tur_checker_context *ct)
-{
- dev_t devt;
-
- pthread_mutex_lock(&ct->lock);
- devt = ct->devt;
- pthread_mutex_unlock(&ct->lock);
-
- snprintf(devt_buf, size, "%d:%d", major(devt), minor(devt));
- return devt_buf;
-}
-
-int libcheck_init (struct checker * c)
-{
- struct tur_checker_context *ct;
- pthread_mutexattr_t attr;
-
- ct = malloc(sizeof(struct tur_checker_context));
- if (!ct)
- return 1;
- memset(ct, 0, sizeof(struct tur_checker_context));
-
- ct->state = PATH_UNCHECKED;
- ct->fd = -1;
- ct->holders = 1;
- pthread_cond_init_mono(&ct->active);
- pthread_mutexattr_init(&attr);
- pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE);
- pthread_mutex_init(&ct->lock, &attr);
- pthread_mutexattr_destroy(&attr);
- pthread_spin_init(&ct->hldr_lock, PTHREAD_PROCESS_PRIVATE);
- c->context = ct;
-
- return 0;
-}
-
-static void cleanup_context(struct tur_checker_context *ct)
-{
- pthread_mutex_destroy(&ct->lock);
- pthread_cond_destroy(&ct->active);
- pthread_spin_destroy(&ct->hldr_lock);
- free(ct);
-}
-
-void libcheck_free (struct checker * c)
-{
- if (c->context) {
- struct tur_checker_context *ct = c->context;
- int holders;
- pthread_t thread;
-
- pthread_spin_lock(&ct->hldr_lock);
- ct->holders--;
- holders = ct->holders;
- thread = ct->thread;
- pthread_spin_unlock(&ct->hldr_lock);
- if (holders)
- pthread_cancel(thread);
- else
- cleanup_context(ct);
- c->context = NULL;
- }
- return;
-}
-
-void libcheck_repair (struct checker * c)
-{
- return;
-}
-
-#define TUR_MSG(fmt, args...) \
- do { \
- char msg[CHECKER_MSG_LEN]; \
- \
- snprintf(msg, sizeof(msg), fmt, ##args); \
- copy_message(cb_arg, msg); \
- } while (0)
-
-static int
-tur_check(int fd, unsigned int timeout,
- void (*copy_message)(void *, const char *), void *cb_arg)
-{
- struct sg_io_hdr io_hdr;
- unsigned char turCmdBlk[TUR_CMD_LEN] = { 0x00, 0, 0, 0, 0, 0 };
- unsigned char sense_buffer[32];
- int retry_tur = 5;
-
-retry:
- memset(&io_hdr, 0, sizeof (struct sg_io_hdr));
- memset(&sense_buffer, 0, 32);
- io_hdr.interface_id = 'S';
- io_hdr.cmd_len = sizeof (turCmdBlk);
- io_hdr.mx_sb_len = sizeof (sense_buffer);
- io_hdr.dxfer_direction = SG_DXFER_NONE;
- io_hdr.cmdp = turCmdBlk;
- io_hdr.sbp = sense_buffer;
- io_hdr.timeout = timeout * 1000;
- io_hdr.pack_id = 0;
- if (ioctl(fd, SG_IO, &io_hdr) < 0) {
- TUR_MSG(MSG_TUR_DOWN);
- return PATH_DOWN;
- }
- if ((io_hdr.status & 0x7e) == 0x18) {
- /*
- * SCSI-3 arrays might return
- * reservation conflict on TUR
- */
- TUR_MSG(MSG_TUR_UP);
- return PATH_UP;
- }
- if (io_hdr.info & SG_INFO_OK_MASK) {
- int key = 0, asc, ascq;
-
- switch (io_hdr.host_status) {
- case DID_OK:
- case DID_NO_CONNECT:
- case DID_BAD_TARGET:
- case DID_ABORT:
- case DID_TRANSPORT_FAILFAST:
- break;
- default:
- /* Driver error, retry */
- if (--retry_tur)
- goto retry;
- break;
- }
- if (io_hdr.sb_len_wr > 3) {
- if (io_hdr.sbp[0] == 0x72 || io_hdr.sbp[0] == 0x73) {
- key = io_hdr.sbp[1] & 0x0f;
- asc = io_hdr.sbp[2];
- ascq = io_hdr.sbp[3];
- } else if (io_hdr.sb_len_wr > 13 &&
- ((io_hdr.sbp[0] & 0x7f) == 0x70 ||
- (io_hdr.sbp[0] & 0x7f) == 0x71)) {
- key = io_hdr.sbp[2] & 0x0f;
- asc = io_hdr.sbp[12];
- ascq = io_hdr.sbp[13];
- }
- }
- if (key == 0x6) {
- /* Unit Attention, retry */
- if (--retry_tur)
- goto retry;
- }
- else if (key == 0x2) {
- /* Not Ready */
- /* Note: Other ALUA states are either UP or DOWN */
- if( asc == 0x04 && ascq == 0x0b){
- /*
- * LOGICAL UNIT NOT ACCESSIBLE,
- * TARGET PORT IN STANDBY STATE
- */
- TUR_MSG(MSG_TUR_GHOST);
- return PATH_GHOST;
- }
- }
- TUR_MSG(MSG_TUR_DOWN);
- return PATH_DOWN;
- }
- TUR_MSG(MSG_TUR_UP);
- return PATH_UP;
-}
-
-#define tur_thread_cleanup_push(ct) pthread_cleanup_push(cleanup_func, ct)
-#define tur_thread_cleanup_pop(ct) pthread_cleanup_pop(1)
-
-static void cleanup_func(void *data)
-{
- int holders;
- struct tur_checker_context *ct = data;
- pthread_spin_lock(&ct->hldr_lock);
- ct->holders--;
- holders = ct->holders;
- ct->thread = 0;
- pthread_spin_unlock(&ct->hldr_lock);
- if (!holders)
- cleanup_context(ct);
-}
-
-static int tur_running(struct tur_checker_context *ct)
-{
- pthread_t thread;
-
- pthread_spin_lock(&ct->hldr_lock);
- thread = ct->thread;
- pthread_spin_unlock(&ct->hldr_lock);
-
- return thread != 0;
-}
-
-static void copy_msg_to_tcc(void *ct_p, const char *msg)
-{
- struct tur_checker_context *ct = ct_p;
-
- pthread_mutex_lock(&ct->lock);
- strlcpy(ct->message, msg, sizeof(ct->message));
- pthread_mutex_unlock(&ct->lock);
-}
-
-static void *tur_thread(void *ctx)
-{
- struct tur_checker_context *ct = ctx;
- int state;
- char devt[32];
-
- condlog(3, "%s: tur checker starting up",
- tur_devt(devt, sizeof(devt), ct));
-
- /* This thread can be canceled, so setup clean up */
- tur_thread_cleanup_push(ct);
-
- /* TUR checker start up */
- pthread_mutex_lock(&ct->lock);
- ct->state = PATH_PENDING;
- ct->message[0] = '\0';
- pthread_mutex_unlock(&ct->lock);
-
- state = tur_check(ct->fd, ct->timeout, copy_msg_to_tcc, ct->message);
- pthread_testcancel();
-
- /* TUR checker done */
- pthread_mutex_lock(&ct->lock);
- ct->state = state;
- pthread_cond_signal(&ct->active);
- pthread_mutex_unlock(&ct->lock);
-
- condlog(3, "%s: tur checker finished, state %s",
- tur_devt(devt, sizeof(devt), ct), checker_state_name(state));
- tur_thread_cleanup_pop(ct);
-
- return ((void *)0);
-}
-
-
-static void tur_timeout(struct timespec *tsp)
-{
- clock_gettime(CLOCK_MONOTONIC, tsp);
- tsp->tv_nsec += 1000 * 1000; /* 1 millisecond */
- normalize_timespec(tsp);
-}
-
-static void tur_set_async_timeout(struct checker *c)
-{
- struct tur_checker_context *ct = c->context;
- struct timespec now;
-
- clock_gettime(CLOCK_MONOTONIC, &now);
- ct->time = now.tv_sec + c->timeout;
-}
-
-static int tur_check_async_timeout(struct checker *c)
-{
- struct tur_checker_context *ct = c->context;
- struct timespec now;
-
- clock_gettime(CLOCK_MONOTONIC, &now);
- return (now.tv_sec > ct->time);
-}
-
-static void copy_msg_to_checker(void *c_p, const char *msg)
-{
- struct checker *c = c_p;
-
- strlcpy(c->message, msg, sizeof(c->message));
-}
-
-int libcheck_check(struct checker * c)
-{
- struct tur_checker_context *ct = c->context;
- struct timespec tsp;
- struct stat sb;
- pthread_attr_t attr;
- int tur_status, r;
- char devt[32];
-
-
- if (!ct)
- return PATH_UNCHECKED;
-
- if (fstat(c->fd, &sb) == 0) {
- pthread_mutex_lock(&ct->lock);
- ct->devt = sb.st_rdev;
- pthread_mutex_unlock(&ct->lock);
- }
-
- if (c->sync)
- return tur_check(c->fd, c->timeout, copy_msg_to_checker, c);
-
- /*
- * Async mode
- */
- r = pthread_mutex_lock(&ct->lock);
- if (r != 0) {
- condlog(2, "%s: tur mutex lock failed with %d",
- tur_devt(devt, sizeof(devt), ct), r);
- MSG(c, MSG_TUR_FAILED);
- return PATH_WILD;
- }
-
- if (ct->running) {
- /*
- * Check if TUR checker is still running. Hold hldr_lock
- * around the pthread_cancel() call to avoid that
- * pthread_cancel() gets called after the (detached) TUR
- * thread has exited.
- */
- pthread_spin_lock(&ct->hldr_lock);
- if (ct->thread) {
- if (tur_check_async_timeout(c)) {
- condlog(3, "%s: tur checker timeout",
- tur_devt(devt, sizeof(devt), ct));
- pthread_cancel(ct->thread);
- ct->running = 0;
- MSG(c, MSG_TUR_TIMEOUT);
- tur_status = PATH_TIMEOUT;
- } else {
- condlog(3, "%s: tur checker not finished",
- tur_devt(devt, sizeof(devt), ct));
- ct->running++;
- tur_status = PATH_PENDING;
- }
- } else {
- /* TUR checker done */
- ct->running = 0;
- tur_status = ct->state;
- strlcpy(c->message, ct->message, sizeof(c->message));
- }
- pthread_spin_unlock(&ct->hldr_lock);
- pthread_mutex_unlock(&ct->lock);
- } else {
- if (tur_running(ct)) {
- /* pthread cancel failed. continue in sync mode */
- pthread_mutex_unlock(&ct->lock);
- condlog(3, "%s: tur thread not responding",
- tur_devt(devt, sizeof(devt), ct));
- return PATH_TIMEOUT;
- }
- /* Start new TUR checker */
- ct->state = PATH_UNCHECKED;
- ct->fd = c->fd;
- ct->timeout = c->timeout;
- pthread_spin_lock(&ct->hldr_lock);
- ct->holders++;
- pthread_spin_unlock(&ct->hldr_lock);
- tur_set_async_timeout(c);
- setup_thread_attr(&attr, 32 * 1024, 1);
- r = pthread_create(&ct->thread, &attr, tur_thread, ct);
- pthread_attr_destroy(&attr);
- if (r) {
- pthread_spin_lock(&ct->hldr_lock);
- ct->holders--;
- pthread_spin_unlock(&ct->hldr_lock);
- pthread_mutex_unlock(&ct->lock);
- ct->thread = 0;
- condlog(3, "%s: failed to start tur thread, using"
- " sync mode", tur_devt(devt, sizeof(devt), ct));
- return tur_check(c->fd, c->timeout,
- copy_msg_to_checker, c);
- }
- tur_timeout(&tsp);
- r = pthread_cond_timedwait(&ct->active, &ct->lock, &tsp);
- tur_status = ct->state;
- strlcpy(c->message, ct->message, sizeof(c->message));
- pthread_mutex_unlock(&ct->lock);
- if (tur_running(ct) &&
- (tur_status == PATH_PENDING || tur_status == PATH_UNCHECKED)) {
- condlog(3, "%s: tur checker still running",
- tur_devt(devt, sizeof(devt), ct));
- ct->running = 1;
- tur_status = PATH_PENDING;
- }
- }
-
- return tur_status;
-}
diff --git a/libmultipath/checkers/tur.h b/libmultipath/checkers/tur.h
deleted file mode 100644
index a2e8c88..0000000
--- a/libmultipath/checkers/tur.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef _TUR_H
-#define _TUR_H
-
-int tur (struct checker *);
-int tur_init (struct checker *);
-void tur_free (struct checker *);
-
-#endif /* _TUR_H */
diff --git a/libmultipath/defaults.h b/libmultipath/defaults.h
index db2b756..9a65cec 100644
--- a/libmultipath/defaults.h
+++ b/libmultipath/defaults.h
@@ -32,7 +32,7 @@
#define DEFAULT_UEV_WAIT_TIMEOUT 30
#define DEFAULT_PRIO PRIO_CONST
#define DEFAULT_PRIO_ARGS ""
-#define DEFAULT_CHECKER TUR
+#define DEFAULT_CHECKER PING
#define DEFAULT_FLUSH FLUSH_DISABLED
#define DEFAULT_USER_FRIENDLY_NAMES USER_FRIENDLY_NAMES_OFF
#define DEFAULT_FORCE_SYNC 0
diff --git a/libmultipath/discovery.c b/libmultipath/discovery.c
index 663c8ea..bae5d24 100644
--- a/libmultipath/discovery.c
+++ b/libmultipath/discovery.c
@@ -1539,6 +1539,7 @@ get_state (struct path * pp, struct config *conf, int daemon)
return PATH_UNCHECKED;
}
checker_set_fd(c, pp->fd);
+ checker_set_dev(c, pp->dev);
if (checker_init(c, pp->mpp?&pp->mpp->mpcontext:NULL)) {
memset(c, 0x0, sizeof(struct checker));
condlog(3, "%s: checker init failed", pp->dev);
diff --git a/libmultipath/hwtable.c b/libmultipath/hwtable.c
index 390d143..9e8e9e3 100644
--- a/libmultipath/hwtable.c
+++ b/libmultipath/hwtable.c
@@ -1081,7 +1081,7 @@ static struct hwentry default_hw[] = {
.pgpolicy = FAILOVER,
.uid_attribute = "ID_SERIAL",
.selector = "service-time 0",
- .checker_name = TUR,
+ .checker_name = PING,
.alias_prefix = "mpath",
.features = "0",
.hwhandler = "0",
diff --git a/libmultipath/libnvme.c b/libmultipath/libnvme.c
new file mode 100644
index 0000000..97c9125
--- /dev/null
+++ b/libmultipath/libnvme.c
@@ -0,0 +1,130 @@
+/*
+ * (C) Copyright HUAWEI Technology Corp. 2017, All Rights Reserved.
+ *
+ * libnvme.c
+ *
+ * Some code borrowed from NVM-Express command line utility.
+ *
+ * Author(s): Yang Feng <philip.yang at huawei.com>
+ *
+ * This file is released under the GPL version 2, or any later version.
+ *
+ */
+#include <linux/types.h>
+#include <sys/ioctl.h>
+#include <stdint.h>
+
+struct nvme_user_io {
+ __u8 opcode;
+ __u8 flags;
+ __u16 control;
+ __u16 nblocks;
+ __u16 rsvd;
+ __u64 metadata;
+ __u64 addr;
+ __u64 slba;
+ __u32 dsmgmt;
+ __u32 reftag;
+ __u16 apptag;
+ __u16 appmask;
+};
+
+struct nvme_admin_cmd {
+ __u8 opcode;
+ __u8 flags;
+ __u16 rsvd1;
+ __u32 nsid;
+ __u32 cdw2;
+ __u32 cdw3;
+ __u64 metadata;
+ __u64 addr;
+ __u32 metadata_len;
+ __u32 data_len;
+ __u32 cdw10;
+ __u32 cdw11;
+ __u32 cdw12;
+ __u32 cdw13;
+ __u32 cdw14;
+ __u32 cdw15;
+ __u32 timeout_ms;
+ __u32 result;
+};
+
+#define NVME_IOCTL_ADMIN_CMD _IOWR('N', 0x41, struct nvme_admin_cmd)
+#define NVME_IOCTL_SUBMIT_IO _IOW('N', 0x42, struct nvme_user_io)
+
+static int nvme_io(int fd, __u8 opcode, __u64 slba, __u16 nblocks, __u16 control,
+ __u32 dsmgmt, __u32 reftag, __u16 apptag, __u16 appmask, void *data,
+ void *metadata)
+{
+ struct nvme_user_io io = {
+ .opcode = opcode,
+ .flags = 0,
+ .control = control,
+ .nblocks = nblocks,
+ .rsvd = 0,
+ .metadata = (__u64)(uintptr_t) metadata,
+ .addr = (__u64)(uintptr_t) data,
+ .slba = slba,
+ .dsmgmt = dsmgmt,
+ .reftag = reftag,
+ .appmask = apptag,
+ .apptag = appmask,
+ };
+
+ return ioctl(fd, NVME_IOCTL_SUBMIT_IO, &io);
+}
+
+int nvme_read(int fd, __u64 slba, __u16 nblocks, __u16 control, __u32 dsmgmt,
+ __u32 reftag, __u16 apptag, __u16 appmask, void *data, void *metadata)
+{
+ return nvme_io(fd, 0x2, slba, nblocks, control, dsmgmt,
+ reftag, apptag, appmask, data, metadata);
+}
+
+static int nvme_submit_passthru(int fd, int ioctl_cmd, struct nvme_admin_cmd *cmd)
+{
+ return ioctl(fd, ioctl_cmd, cmd);
+}
+
+int nvme_passthru(int fd, int ioctl_cmd, __u8 opcode, __u8 flags, __u16 rsvd,
+ __u32 nsid, __u32 cdw2, __u32 cdw3, __u32 cdw10, __u32 cdw11,
+ __u32 cdw12, __u32 cdw13, __u32 cdw14, __u32 cdw15,
+ __u32 data_len, void *data, __u32 metadata_len,
+ void *metadata, __u32 timeout_ms, __u32 *result)
+{
+ struct nvme_admin_cmd cmd = {
+ .opcode = opcode,
+ .flags = flags,
+ .rsvd1 = rsvd,
+ .nsid = nsid,
+ .cdw2 = cdw2,
+ .cdw3 = cdw3,
+ .metadata = (__u64)(uintptr_t) metadata,
+ .addr = (__u64)(uintptr_t) data,
+ .metadata_len = metadata_len,
+ .data_len = data_len,
+ .cdw10 = cdw10,
+ .cdw11 = cdw11,
+ .cdw12 = cdw12,
+ .cdw13 = cdw13,
+ .cdw14 = cdw14,
+ .cdw15 = cdw15,
+ .timeout_ms = timeout_ms,
+ .result = 0,
+ };
+ int err;
+
+ err = nvme_submit_passthru(fd, ioctl_cmd, &cmd);
+ if (!err && result)
+ *result = cmd.result;
+ return err;
+}
+
+int nvme_keep_alive(int fd, __u32 timeout_ms)
+{
+ __u32 result;
+
+ return nvme_passthru(fd, NVME_IOCTL_ADMIN_CMD, 0x18, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0,0 , timeout_ms, &result);
+}
diff --git a/libmultipath/libnvme.h b/libmultipath/libnvme.h
new file mode 100644
index 0000000..a2b5460
--- /dev/null
+++ b/libmultipath/libnvme.h
@@ -0,0 +1,10 @@
+#ifndef _LIBNVME_H
+#define _LIBNVME_H
+
+#include <linux/types.h>
+
+int nvme_read(int fd, __u64 slba, __u16 nblocks, __u16 control, __u32 dsmgmt,
+ __u32 reftag, __u16 apptag, __u16 appmask, void *data, void *metadata);
+int nvme_keep_alive(int fd, __u32 timeout_ms);
+
+#endif /* _LIBNVME_H */
diff --git a/libmultipath/libsg.c b/libmultipath/libsg.c
new file mode 100644
index 0000000..900103e
--- /dev/null
+++ b/libmultipath/libsg.c
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2004, 2005 Christophe Varoqui
+ */
+#include <string.h>
+#include <sys/ioctl.h>
+#include <errno.h>
+#include <sys/stat.h>
+
+#include "checkers.h"
+#include "libsg.h"
+
+int
+sg_read (int sg_fd, unsigned char * buff, int buff_len,
+ unsigned char * sense, int sense_len, unsigned int timeout)
+{
+ /* defaults */
+ int blocks;
+ long long start_block = 0;
+ int bs = 512;
+ int cdbsz = 10;
+
+ unsigned char rdCmd[cdbsz];
+ unsigned char *sbb = sense;
+ struct sg_io_hdr io_hdr;
+ int res;
+ int rd_opcode[] = {0x8, 0x28, 0xa8, 0x88};
+ int sz_ind;
+ struct stat filestatus;
+ int retry_count = 3;
+
+ if (fstat(sg_fd, &filestatus) != 0)
+ return PATH_DOWN;
+ bs = (filestatus.st_blksize > 4096)? 4096: filestatus.st_blksize;
+ blocks = buff_len / bs;
+ memset(rdCmd, 0, cdbsz);
+ sz_ind = 1;
+ rdCmd[0] = rd_opcode[sz_ind];
+ rdCmd[2] = (unsigned char)((start_block >> 24) & 0xff);
+ rdCmd[3] = (unsigned char)((start_block >> 16) & 0xff);
+ rdCmd[4] = (unsigned char)((start_block >> 8) & 0xff);
+ rdCmd[5] = (unsigned char)(start_block & 0xff);
+ rdCmd[7] = (unsigned char)((blocks >> 8) & 0xff);
+ rdCmd[8] = (unsigned char)(blocks & 0xff);
+
+ memset(&io_hdr, 0, sizeof(struct sg_io_hdr));
+ io_hdr.interface_id = 'S';
+ io_hdr.cmd_len = cdbsz;
+ io_hdr.cmdp = rdCmd;
+ io_hdr.dxfer_direction = SG_DXFER_FROM_DEV;
+ io_hdr.dxfer_len = bs * blocks;
+ io_hdr.dxferp = buff;
+ io_hdr.mx_sb_len = sense_len;
+ io_hdr.sbp = sense;
+ io_hdr.timeout = timeout * 1000;
+ io_hdr.pack_id = (int)start_block;
+
+retry:
+ memset(sense, 0, sense_len);
+ while (((res = ioctl(sg_fd, SG_IO, &io_hdr)) < 0) && (EINTR == errno));
+
+ if (res < 0) {
+ if (ENOMEM == errno) {
+ return PATH_UP;
+ }
+ return PATH_DOWN;
+ }
+
+ if ((0 == io_hdr.status) &&
+ (0 == io_hdr.host_status) &&
+ (0 == io_hdr.driver_status)) {
+ return PATH_UP;
+ } else {
+ int key = 0;
+
+ if (io_hdr.sb_len_wr > 3) {
+ if (sbb[0] == 0x72 || sbb[0] == 0x73)
+ key = sbb[1] & 0x0f;
+ else if (io_hdr.sb_len_wr > 13 &&
+ ((sbb[0] & 0x7f) == 0x70 ||
+ (sbb[0] & 0x7f) == 0x71))
+ key = sbb[2] & 0x0f;
+ }
+
+ /*
+ * Retry if UNIT_ATTENTION check condition.
+ */
+ if (key == 0x6) {
+ if (--retry_count)
+ goto retry;
+ }
+ return PATH_DOWN;
+ }
+}
+
+int
+sg_tur(int fd, struct sg_io_hdr *io_hdr, unsigned char *sense,
+ int sense_len, unsigned int timeout)
+{
+ unsigned char turCmdBlk[TUR_CMD_LEN] = { 0x00, 0, 0, 0, 0, 0 };
+
+ memset(io_hdr, 0, sizeof(struct sg_io_hdr));
+ memset(sense, 0, sense_len);
+ io_hdr->interface_id = 'S';
+ io_hdr->cmd_len = sizeof(turCmdBlk);
+ io_hdr->mx_sb_len = sense_len;
+ io_hdr->dxfer_direction = SG_DXFER_NONE;
+ io_hdr->cmdp = turCmdBlk;
+ io_hdr->sbp = sense;
+ io_hdr->timeout = timeout * 1000;
+ io_hdr->pack_id = 0;
+
+ return ioctl(fd, SG_IO, io_hdr);
+}
diff --git a/libmultipath/libsg.h b/libmultipath/libsg.h
new file mode 100644
index 0000000..70049a2
--- /dev/null
+++ b/libmultipath/libsg.h
@@ -0,0 +1,13 @@
+#ifndef _LIBSG_H
+#define _LIBSG_H
+
+#include "sg_include.h"
+
+#define TUR_CMD_LEN 6
+
+int sg_read (int sg_fd, unsigned char * buff, int buff_len,
+ unsigned char * sense, int sense_len, unsigned int timeout);
+int sg_tur(int fd, struct sg_io_hdr *io_hdr, unsigned char *sense,
+ int sense_len, unsigned int timeout);
+
+#endif /* _LIBSG_H */
diff --git a/libmultipath/prioritizers/Makefile b/libmultipath/prioritizers/Makefile
index 0c71e63..0c5c69b 100644
--- a/libmultipath/prioritizers/Makefile
+++ b/libmultipath/prioritizers/Makefile
@@ -26,7 +26,7 @@ all: $(LIBS)
libprioalua.so: alua.o alua_rtpg.o
$(CC) $(LDFLAGS) $(SHARED_FLAGS) -o $@ $^
-libpriopath_latency.so: path_latency.o ../checkers/libsg.o
+libpriopath_latency.so: path_latency.o ../libsg.o ../libnvme.o
$(CC) $(LDFLAGS) $(SHARED_FLAGS) -o $@ $^ -lm
libprio%.so: %.o
diff --git a/libmultipath/prioritizers/path_latency.c b/libmultipath/prioritizers/path_latency.c
index 8f633e0..21209ff 100644
--- a/libmultipath/prioritizers/path_latency.c
+++ b/libmultipath/prioritizers/path_latency.c
@@ -26,29 +26,11 @@
#include "debug.h"
#include "prio.h"
#include "structs.h"
-#include <linux/types.h>
-#include <sys/ioctl.h>
-#include "../checkers/libsg.h"
+#include "libsg.h"
+#include "libnvme.h"
#define pp_pl_log(prio, fmt, args...) condlog(prio, "path_latency prio: " fmt, ##args)
-struct nvme_user_io {
- __u8 opcode;
- __u8 flags;
- __u16 control;
- __u16 nblocks;
- __u16 rsvd;
- __u64 metadata;
- __u64 addr;
- __u64 slba;
- __u32 dsmgmt;
- __u32 reftag;
- __u16 apptag;
- __u16 appmask;
-};
-
-#define NVME_IOCTL_SUBMIT_IO _IOW('N', 0x42, struct nvme_user_io)
-
#define MAX_IO_NUM 200
#define MIN_IO_NUM 2
@@ -62,6 +44,8 @@ struct nvme_user_io {
#define MAX_CHAR_SIZE 30
+#define SENSE_BUFF_LEN 32
+
#define USEC_PER_SEC 1000000LL
#define NSEC_PER_USEC 1000LL
@@ -72,34 +56,6 @@ static inline long long timeval_to_us(const struct timespec *tv)
return ((long long) tv->tv_sec * USEC_PER_SEC) + (tv->tv_nsec / NSEC_PER_USEC);
}
-int nvme_io(int fd, __u8 opcode, __u64 slba, __u16 nblocks, __u16 control,
- __u32 dsmgmt, __u32 reftag, __u16 apptag, __u16 appmask, void *data, void *metadata)
-{
- struct nvme_user_io io = {
- .opcode = opcode,
- .flags = 0,
- .control = control,
- .nblocks = nblocks,
- .rsvd = 0,
- .metadata = (__u64)(uintptr_t) metadata,
- .addr = (__u64)(uintptr_t) data,
- .slba = slba,
- .dsmgmt = dsmgmt,
- .reftag = reftag,
- .appmask = apptag,
- .apptag = appmask,
- };
-
- return ioctl(fd, NVME_IOCTL_SUBMIT_IO, &io);
-}
-
-int nvme_read(int fd, __u64 slba, __u16 nblocks, __u16 control, __u32 dsmgmt,
- __u32 reftag, __u16 apptag, __u16 appmask, void *data, void *metadata)
-{
- return nvme_io(fd, 0x2, slba, nblocks, control, dsmgmt,
- reftag, apptag, appmask, data, metadata);
-}
-
static int do_readsector0(struct path *pp, unsigned int timeout)
{
unsigned char buf[4096];
@@ -108,12 +64,12 @@ static int do_readsector0(struct path *pp, unsigned int timeout)
if (!strncmp(pp->dev, "nvme", 4))
{
- if (nvme_read(pp->fd, 0, 1, 0, 0, 0, 0, 0, buf, mbuf) < 0)
+ if (nvme_read(pp->fd, 0, 1, 0, 0, 0, 0, 0, buf, mbuf) != 0)
return 0;
}
else
{
- if (sg_read(pp->fd, &buf[0], 4096, &sbuf[0],SENSE_BUFF_LEN, timeout) == 2)
+ if (sg_read(pp->fd, &buf[0], 4096, &sbuf[0], SENSE_BUFF_LEN, timeout) == 2)
return 0;
}
@@ -300,7 +256,7 @@ int getprio (struct path *pp, char *args, unsigned int timeout)
Warn the user if latency_interval is smaller than (2 * standard_deviation), or equal */
standard_deviation = calc_standard_deviation(path_latency, index, avglatency);
latency_interval = calc_latency_interval(avglatency, MAX_AVG_LATENCY, MIN_AVG_LATENCY, base_num);
- if ((latency_interval != 0)
+ if ((latency_interval!= 0)
&& (latency_interval <= (2 * standard_deviation)))
pp_pl_log(3, "%s: latency interval (%lld) according to average latency (%lld us) is smaller than "
"2 * standard deviation (%lld us), or equal, args base_num (%d) needs to be set bigger value",
diff --git a/libmultipath/propsel.c b/libmultipath/propsel.c
index 27f3951..d4c24de 100644
--- a/libmultipath/propsel.c
+++ b/libmultipath/propsel.c
@@ -316,7 +316,7 @@ int select_checker(struct config *conf, struct path *pp)
struct checker * c = &pp->checker;
if (pp->detect_checker == DETECT_CHECKER_ON && pp->tpgs > 0) {
- checker_name = TUR;
+ checker_name = PING;
origin = "(setting: array autodetected)";
goto out;
}
diff --git a/multipath/multipath.conf.5 b/multipath/multipath.conf.5
index 0049cba..915cc50 100644
--- a/multipath/multipath.conf.5
+++ b/multipath/multipath.conf.5
@@ -418,8 +418,8 @@ are:
(Deprecated) Read the first sector of the device. This checker is being
deprecated, please use \fItur\fR instead.
.TP
-.I tur
-Issue a \fITEST UNIT READY\fR command to the device.
+.I ping
+Issue a \fITEST UNIT READY\fR command or a \fIKEEP ALIVE\fR command to the device.
.TP
.I emc_clariion
(Hardware-dependent)
--
2.6.4.windows.1
More information about the dm-devel
mailing list