[dm-devel] [PATCH 1/3] multipath-tools: use direct IO for path latency prioritizer

Guan Junxiong guanjunxiong at huawei.com
Tue Sep 5 13:45:03 UTC 2017


The SCSI-to-NVMe translations which was blamed broken has been removed
since linux kernel 4.13, so that SG_IO IOCTL used in the reading is not
supported. Instead, this patch drops sg_read method and uses direct IO
reading both for NVMe device and SCSI device.

Signed-off-by: Junxiong Guan <guanjunxiong at huawei.com>
---
 libmultipath/prioritizers/path_latency.c | 85 +++++++++++++++++++++++++++++---
 1 file changed, 78 insertions(+), 7 deletions(-)

diff --git a/libmultipath/prioritizers/path_latency.c b/libmultipath/prioritizers/path_latency.c
index 9fc2dfc0..c75ae03f 100644
--- a/libmultipath/prioritizers/path_latency.c
+++ b/libmultipath/prioritizers/path_latency.c
@@ -19,15 +19,19 @@
  * This file is released under the GPL version 2, or any later version.
  */
 
+#define _GNU_SOURCE
 #include <stdio.h>
 #include <math.h>
 #include <ctype.h>
 #include <time.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <linux/fs.h>
+#include <unistd.h>
 
 #include "debug.h"
 #include "prio.h"
 #include "structs.h"
-#include "../checkers/libsg.h"
 
 #define pp_pl_log(prio, fmt, args...) condlog(prio, "path_latency prio: " fmt, ##args)
 
@@ -47,6 +51,8 @@
 #define USEC_PER_SEC		1000000LL
 #define NSEC_PER_USEC		1000LL
 
+#define DEF_BLK_SIZE		4096
+
 static long long path_latency[MAX_IO_NUM];
 
 static inline long long timeval_to_us(const struct timespec *tv)
@@ -55,15 +61,72 @@ static inline long long timeval_to_us(const struct timespec *tv)
 	    (tv->tv_nsec / NSEC_PER_USEC);
 }
 
-static int do_readsector0(int fd, unsigned int timeout)
+static int prepare_directio_read(int fd, int *blksz, char **pbuf,
+		int *restore_flags)
+{
+	unsigned long pgsize = getpagesize();
+	long flags;
+
+	if (ioctl(fd, BLKBSZGET, blksz) < 0) {
+		pp_pl_log(3,"catnnot get blocksize, set default");
+		*blksz = DEF_BLK_SIZE;
+	}
+	if (posix_memalign((void **)pbuf, pgsize, *blksz))
+		return -1;
+
+	flags = fcntl(fd, F_GETFL);
+	if (flags < 0)
+		goto free_out;
+	if (!(flags & O_DIRECT)) {
+		flags |= O_DIRECT;
+		if (fcntl(fd, F_SETFL, flags) < 0)
+			goto free_out;
+		*restore_flags = 1;
+	}
+
+	return 0;
+
+free_out:
+	free(*pbuf);
+
+	return -1;
+}
+
+static void cleanup_directio_read(int fd, char *buf, int restore_flags)
 {
-	unsigned char buf[4096];
-	unsigned char sbuf[SENSE_BUFF_LEN];
+	long flags;
+
+	free(buf);
+
+	if (!restore_flags)
+		return;
+	if ((flags = fcntl(fd, F_GETFL)) >= 0) {
+		int ret __attribute__ ((unused));
+		flags &= ~O_DIRECT;
+		/* No point in checking for errors */
+		ret = fcntl(fd, F_SETFL, flags);
+	}
+}
+
+static int do_directio_read(int fd, unsigned int timeout, char *buf, int sz)
+{
+	fd_set read_fds;
+	struct timeval tm = { .tv_sec = timeout };
 	int ret;
+	int num_read;
 
-	ret = sg_read(fd, &buf[0], 4096, &sbuf[0], SENSE_BUFF_LEN, timeout);
+	if (lseek(fd, 0, SEEK_SET) == -1)
+		return -1;
+	FD_ZERO(&read_fds);
+	FD_SET(fd, &read_fds);
+	ret = select(fd+1, &read_fds, NULL, NULL, &tm);
+	if (ret <= 0)
+		return -1;
+	num_read = read(fd, buf, sz);
+	if (num_read != sz)
+		return -1;
 
-	return ret;
+	return 0;
 }
 
 int check_args_valid(int io_num, int base_num)
@@ -194,6 +257,9 @@ int getprio(struct path *pp, char *args, unsigned int timeout)
 	long long toldelay = 0;
 	long long before, after;
 	struct timespec tv;
+	int blksize;
+	char *buf;
+	int restore_flags = 0;
 
 	if (pp->fd < 0)
 		return -1;
@@ -205,13 +271,16 @@ int getprio(struct path *pp, char *args, unsigned int timeout)
 
 	memset(path_latency, 0, sizeof(path_latency));
 
+	prepare_directio_read(pp->fd, &blksize, &buf, &restore_flags);
+
 	temp = io_num;
 	while (temp-- > 0) {
 		(void)clock_gettime(CLOCK_MONOTONIC, &tv);
 		before = timeval_to_us(&tv);
 
-		if (do_readsector0(pp->fd, timeout) == 2) {
+		if (do_directio_read(pp->fd, timeout, buf, blksize)) {
 			pp_pl_log(0, "%s: path down", pp->dev);
+			cleanup_directio_read(pp->fd, buf, restore_flags);
 			return -1;
 		}
 
@@ -222,6 +291,8 @@ int getprio(struct path *pp, char *args, unsigned int timeout)
 		toldelay += path_latency[index++];
 	}
 
+	cleanup_directio_read(pp->fd, buf, restore_flags);
+
 	avglatency = toldelay / (long long)io_num;
 	pp_pl_log(4, "%s: average latency is (%lld us)", pp->dev, avglatency);
 
-- 
2.11.1





More information about the dm-devel mailing list