[dm-devel] [PATCH 9/21] [libcheckers] use async I/O for directio checker

Hannes Reinecke hare at suse.de
Mon May 21 09:23:49 UTC 2007


The directio checker will block until the request is returned.
We should rather use async I/O to guarantee that the checker
will return after a certain time so as not to stall the entire
daemon.

Signed-off-by: Stefan Bader <bader at de.ibm.com>
Signed-off-by: Hannes Reinecke <hare at suse.de>
---
 libcheckers/directio.c |  119 ++++++++++++++++++--------
 libcheckers/libaio.h   |  222 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 303 insertions(+), 38 deletions(-)

diff --git a/libcheckers/directio.c b/libcheckers/directio.c
index b53c1c3..2251515 100644
--- a/libcheckers/directio.c
+++ b/libcheckers/directio.c
@@ -12,28 +12,44 @@
 #include <sys/ioctl.h>
 #include <linux/fs.h>
 #include <errno.h>
+#include <linux/kdev_t.h>
+#include <asm/unistd.h>
 
+#include "libaio.h"
 #include "checkers.h"
+#include "../libmultipath/debug.h"
 
 #define MSG_DIRECTIO_UNKNOWN	"directio checker is not available"
 #define MSG_DIRECTIO_UP		"directio checker reports path is up"
 #define MSG_DIRECTIO_DOWN	"directio checker reports path is down"
 
 struct directio_context {
-	int blksize; 
-	unsigned char *buf;
-	unsigned char *ptr;
+	int		running;
+	int		reset_flags;
+	int		blksize;
+	unsigned char *	buf;
+	unsigned char * ptr;
+	io_context_t	ioctx;
+	struct iocb	io;
 };
 
+
 int directio_init (struct checker * c)
 {
 	unsigned long pgsize = getpagesize();
 	struct directio_context * ct;
+	long flags;
 
 	ct = malloc(sizeof(struct directio_context));
 	if (!ct)
 		return 1;
-	c->context = (void *)ct;
+	memset(ct, 0, sizeof(struct directio_context));
+
+	if (syscall(__NR_io_setup, 1, &ct->ioctx) != 0) {
+		condlog(1, "io_setup failed");
+		free(ct);
+		return 1;
+	}
 
 	if (ioctl(c->fd, BLKBSZGET, &ct->blksize) < 0) {
 		MSG(c, "cannot get blocksize, set default");
@@ -50,11 +66,28 @@ int directio_init (struct checker * c)
 	ct->buf = (unsigned char *)malloc(ct->blksize + pgsize);
 	if (!ct->buf)
 		goto out;
-	ct->ptr = (unsigned char *)(((unsigned long)ct->buf + pgsize - 1) &
-		  (~(pgsize - 1))); 
 
+	flags = fcntl(c->fd, F_GETFL);
+	if (flags < 0)
+		goto out;
+	if (!(flags & O_DIRECT)) {
+		flags |= O_DIRECT;
+		if (fcntl(c->fd, F_SETFL, flags) < 0)
+			goto out;
+		ct->reset_flags = 1;
+	}
+
+	ct->ptr = (unsigned char *) (((unsigned long)ct->buf + pgsize - 1) &
+		  (~(pgsize - 1)));
+
+	/* Sucessfully initialized, return the context. */
+	c->context = (void *) ct;
 	return 0;
+
 out:
+	if (ct->buf)
+		free(ct->buf);
+	syscall(__NR_io_destroy, ct->ioctx);
 	free(ct);
 	return 1;
 }
@@ -62,56 +95,63 @@ out:
 void directio_free (struct checker * c)
 {
 	struct directio_context * ct = (struct directio_context *)c->context;
+	long flags;
 
 	if (!ct)
 		return;
+
+	if (ct->reset_flags) {
+		if ((flags = fcntl(c->fd, F_GETFL)) >= 0) {
+			flags &= ~O_DIRECT;
+			/* No point in checking for errors */
+			fcntl(c->fd, F_SETFL, flags);
+		}
+	}
+
 	if (ct->buf)
 		free(ct->buf);
+	syscall(__NR_io_destroy, ct->ioctx);
 	free(ct);
 }
 
 static int
-direct_read (int fd, unsigned char * buff, int size)
+check_state(int fd, struct directio_context *ct)
 {
-	long flags;
-	int reset_flags = 0;
-	int res, retval;
-
-	flags = fcntl(fd,F_GETFL);
-
-	if (flags < 0) {
-		return PATH_UNCHECKED;
+	struct timespec	timeout = { .tv_sec = 2 };
+	struct io_event event;
+	struct stat	sb;
+	int		rc = PATH_UNCHECKED;
+	long		r;
+
+	if (fstat(fd, &sb) == 0) {
+		condlog(4, "directio: called for %x", (unsigned) sb.st_rdev);
 	}
 
-	if (!(flags & O_DIRECT)) {
-		flags |= O_DIRECT;
-		if (fcntl(fd,F_SETFL,flags) < 0) {
+	if (!ct->running) {
+		struct iocb *ios[1] = { &ct->io };
+
+		condlog(3, "directio: starting new request");
+		memset(&ct->io, 0, sizeof(struct iocb));
+		io_prep_pread(&ct->io, fd, ct->ptr, ct->blksize, 0);
+		if (syscall(__NR_io_submit, ct->ioctx, 1, ios) != 1) {
+			condlog(3, "directio: io_submit error %i", errno);
 			return PATH_UNCHECKED;
 		}
-		reset_flags = 1;
 	}
+	ct->running = 1;
 
-	while ( (res = read(fd,buff,size)) < 0 && errno == EINTR );
-	if (res < 0) {
-		if (errno == EINVAL) {
-			/* O_DIRECT is not available */
-			retval = PATH_UNCHECKED;
-		} else if (errno == ENOMEM) {
-			retval = PATH_UP;
-		} else {
-			retval = PATH_DOWN;
-		}
+	r = syscall(__NR_io_getevents, ct->ioctx, 1L, 1L, &event, &timeout);
+	if (r < 1L) {
+		condlog(3, "directio: timeout r=%li errno=%i", r, errno);
+		rc = PATH_DOWN;
 	} else {
-		retval = PATH_UP;
-	}
-	
-	if (reset_flags) {
-		flags &= ~O_DIRECT;
-		/* No point in checking for errors */
-		fcntl(fd,F_SETFL,flags);
+		condlog(3, "directio: io finished %lu/%lu", event.res,
+			event.res2);
+		ct->running = 0;
+		rc = (event.res == ct->blksize) ? PATH_UP : PATH_DOWN;
 	}
 
-	return retval;
+	return rc;
 }
 
 int directio (struct checker * c)
@@ -119,7 +159,10 @@ int directio (struct checker * c)
 	int ret;
 	struct directio_context * ct = (struct directio_context *)c->context;
 
-	ret = direct_read(c->fd, ct->ptr, ct->blksize);
+	if (!ct)
+		return PATH_UNCHECKED;
+
+	ret = check_state(c->fd, ct);
 
 	switch (ret)
 	{
diff --git a/libcheckers/libaio.h b/libcheckers/libaio.h
new file mode 100644
index 0000000..6574601
--- /dev/null
+++ b/libcheckers/libaio.h
@@ -0,0 +1,222 @@
+/* /usr/include/libaio.h
+ *
+ * Copyright 2000,2001,2002 Red Hat, Inc.
+ *
+ * Written by Benjamin LaHaise <bcrl at redhat.com>
+ *
+ * libaio Linux async I/O interface
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
+ */
+#ifndef __LIBAIO_H
+#define __LIBAIO_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/types.h>
+#include <string.h>
+
+struct timespec;
+struct sockaddr;
+struct iovec;
+struct iocb;
+
+typedef struct io_context *io_context_t;
+
+typedef enum io_iocb_cmd {
+	IO_CMD_PREAD = 0,
+	IO_CMD_PWRITE = 1,
+
+	IO_CMD_FSYNC = 2,
+	IO_CMD_FDSYNC = 3,
+
+	IO_CMD_POLL = 5,
+	IO_CMD_NOOP = 6,
+} io_iocb_cmd_t;
+
+#if defined(__i386__) /* little endian, 32 bits */
+#define PADDED(x, y)	x; unsigned y
+#define PADDEDptr(x, y)	x; unsigned y
+#define PADDEDul(x, y)	unsigned long x; unsigned y
+#elif defined(__ia64__) || defined(__x86_64__) || defined(__alpha__)
+#define PADDED(x, y)	x, y
+#define PADDEDptr(x, y)	x
+#define PADDEDul(x, y)	unsigned long x
+#elif defined(__powerpc64__) /* big endian, 64 bits */
+#define PADDED(x, y)	unsigned y; x
+#define PADDEDptr(x,y)	x
+#define PADDEDul(x, y)	unsigned long x
+#elif defined(__PPC__)  /* big endian, 32 bits */
+#define PADDED(x, y)	unsigned y; x
+#define PADDEDptr(x, y)	unsigned y; x
+#define PADDEDul(x, y)	unsigned y; unsigned long x
+#elif defined(__s390x__) /* big endian, 64 bits */
+#define PADDED(x, y)	unsigned y; x
+#define PADDEDptr(x,y)	x
+#define PADDEDul(x, y)	unsigned long x
+#elif defined(__s390__) /* big endian, 32 bits */
+#define PADDED(x, y)	unsigned y; x
+#define PADDEDptr(x, y) unsigned y; x
+#define PADDEDul(x, y)	unsigned y; unsigned long x
+#else
+#error	endian?
+#endif
+
+struct io_iocb_poll {
+	PADDED(int events, __pad1);
+};	/* result code is the set of result flags or -'ve errno */
+
+struct io_iocb_sockaddr {
+	struct sockaddr *addr;
+	int		len;
+};	/* result code is the length of the sockaddr, or -'ve errno */
+
+struct io_iocb_common {
+	PADDEDptr(void	*buf, __pad1);
+	PADDEDul(nbytes, __pad2);
+	long long	offset;
+	long long	__pad3, __pad4;
+};	/* result code is the amount read or -'ve errno */
+
+struct io_iocb_vector {
+	const struct iovec	*vec;
+	int			nr;
+	long long		offset;
+};	/* result code is the amount read or -'ve errno */
+
+struct iocb {
+	PADDEDptr(void *data, __pad1);	/* Return in the io completion event */
+	PADDED(unsigned key, __pad2);	/* For use in identifying io requests */
+
+	short		aio_lio_opcode;	
+	short		aio_reqprio;
+	int		aio_fildes;
+
+	union {
+		struct io_iocb_common		c;
+		struct io_iocb_vector		v;
+		struct io_iocb_poll		poll;
+		struct io_iocb_sockaddr	saddr;
+	} u;
+};
+
+struct io_event {
+	PADDEDptr(void *data, __pad1);
+	PADDEDptr(struct iocb *obj,  __pad2);
+	PADDEDul(res,  __pad3);
+	PADDEDul(res2, __pad4);
+};
+
+#undef PADDED
+#undef PADDEDptr
+#undef PADDEDul
+
+typedef void (*io_callback_t)(io_context_t ctx, struct iocb *iocb, long res, long res2);
+
+/* library wrappers */
+extern int io_queue_init(int maxevents, io_context_t *ctxp);
+/*extern int io_queue_grow(io_context_t ctx, int new_maxevents);*/
+extern int io_queue_release(io_context_t ctx);
+/*extern int io_queue_wait(io_context_t ctx, struct timespec *timeout);*/
+extern int io_queue_run(io_context_t ctx);
+
+/* Actual syscalls */
+extern int io_setup(int maxevents, io_context_t *ctxp);
+extern int io_destroy(io_context_t ctx);
+extern int io_submit(io_context_t ctx, long nr, struct iocb *ios[]);
+extern int io_cancel(io_context_t ctx, struct iocb *iocb, struct io_event *evt);
+extern int io_getevents(io_context_t ctx_id, long min_nr, long nr, struct io_event *events, struct timespec *timeout);
+
+
+static inline void io_set_callback(struct iocb *iocb, io_callback_t cb)
+{
+	iocb->data = (void *)cb;
+}
+
+static inline void io_prep_pread(struct iocb *iocb, int fd, void *buf, size_t count, long long offset)
+{
+	memset(iocb, 0, sizeof(*iocb));
+	iocb->aio_fildes = fd;
+	iocb->aio_lio_opcode = IO_CMD_PREAD;
+	iocb->aio_reqprio = 0;
+	iocb->u.c.buf = buf;
+	iocb->u.c.nbytes = count;
+	iocb->u.c.offset = offset;
+}
+
+static inline void io_prep_pwrite(struct iocb *iocb, int fd, void *buf, size_t count, long long offset)
+{
+	memset(iocb, 0, sizeof(*iocb));
+	iocb->aio_fildes = fd;
+	iocb->aio_lio_opcode = IO_CMD_PWRITE;
+	iocb->aio_reqprio = 0;
+	iocb->u.c.buf = buf;
+	iocb->u.c.nbytes = count;
+	iocb->u.c.offset = offset;
+}
+
+static inline void io_prep_poll(struct iocb *iocb, int fd, int events)
+{
+	memset(iocb, 0, sizeof(*iocb));
+	iocb->aio_fildes = fd;
+	iocb->aio_lio_opcode = IO_CMD_POLL;
+	iocb->aio_reqprio = 0;
+	iocb->u.poll.events = events;
+}
+
+static inline int io_poll(io_context_t ctx, struct iocb *iocb, io_callback_t cb, int fd, int events)
+{
+	io_prep_poll(iocb, fd, events);
+	io_set_callback(iocb, cb);
+	return io_submit(ctx, 1, &iocb);
+}
+
+static inline void io_prep_fsync(struct iocb *iocb, int fd)
+{
+	memset(iocb, 0, sizeof(*iocb));
+	iocb->aio_fildes = fd;
+	iocb->aio_lio_opcode = IO_CMD_FSYNC;
+	iocb->aio_reqprio = 0;
+}
+
+static inline int io_fsync(io_context_t ctx, struct iocb *iocb, io_callback_t cb, int fd)
+{
+	io_prep_fsync(iocb, fd);
+	io_set_callback(iocb, cb);
+	return io_submit(ctx, 1, &iocb);
+}
+
+static inline void io_prep_fdsync(struct iocb *iocb, int fd)
+{
+	memset(iocb, 0, sizeof(*iocb));
+	iocb->aio_fildes = fd;
+	iocb->aio_lio_opcode = IO_CMD_FDSYNC;
+	iocb->aio_reqprio = 0;
+}
+
+static inline int io_fdsync(io_context_t ctx, struct iocb *iocb, io_callback_t cb, int fd)
+{
+	io_prep_fdsync(iocb, fd);
+	io_set_callback(iocb, cb);
+	return io_submit(ctx, 1, &iocb);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __LIBAIO_H */
-- 
1.4.3.4




More information about the dm-devel mailing list