[dm-devel] [PATCH 4/4] dm-userspace: use mmaped buffer instead of read/write system calls
fujita
tomof at dd.iij4u.or.jp
Sat Sep 30 10:44:36 UTC 2006
FUJITA Tomonori <fujita.tomonori at lab.ntt.co.jp>
Dcc: fujita.tomonori at lab.ntt.co.jp
Mime-Version: 1.0
Content-Type: Text/Plain; charset=us-ascii
Content-Transfer-Encoding: 7bit
Use mmaped buffer instead of read/write system calls for kernel/user
communication. I've not tested this heavily, though seems to
works. Hopefully, good enough for performance comparisons.
Here's a user-space example code:
http://www.kernel.org/pub/linux/kernel/people/tomo/dmu/example-rb.c
Signed-off-by: FUJITA Tomonori <fujita.tomonori at lab.ntt.co.jp>
---
drivers/md/dm-user.h | 2
drivers/md/dm-userspace-chardev.c | 300 ++++++++++++++++++++++++++-----------
drivers/md/dm-userspace.c | 20 --
include/linux/dm-userspace.h | 6 +
4 files changed, 221 insertions(+), 107 deletions(-)
diff --git a/drivers/md/dm-user.h b/drivers/md/dm-user.h
index 06b251b..1f301f2 100644
--- a/drivers/md/dm-user.h
+++ b/drivers/md/dm-user.h
@@ -119,6 +119,8 @@ void cleanup_chardev_transport(void);
void write_chardev_transport_info(struct dmu_device *dev,
char *buf, unsigned int maxlen);
+extern void dmu_add_tx_request(struct dmu_device *dev, struct dmu_request *req);
+
/* Return the block number for @sector */
static inline u64 dmu_block(struct dmu_device *dev,
sector_t sector)
diff --git a/drivers/md/dm-userspace-chardev.c b/drivers/md/dm-userspace-chardev.c
index ee55ca8..e3f85c7 100644
--- a/drivers/md/dm-userspace-chardev.c
+++ b/drivers/md/dm-userspace-chardev.c
@@ -2,6 +2,8 @@
* Copyright (C) International Business Machines Corp., 2006
* Author: Dan Smith <danms at us.ibm.com>
*
+ * Copyright (C) 2006 FUJITA Tomonori <tomof at acm.org>
+ *
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; under version 2 of the License.
@@ -36,6 +38,12 @@ #include "dm-user.h"
#define DM_MSG_PREFIX "dm-userspace"
+struct dmu_ring {
+ u32 r_idx;
+ unsigned long r_pages[DMU_RING_PAGES];
+ spinlock_t r_lock;
+};
+
/* This allows for a cleaner separation between the dm-userspace
* device-mapper target, and the userspace transport used. Right now,
* only a chardev transport exists, but it's possible that there could
@@ -45,8 +53,31 @@ struct chardev_transport {
struct cdev cdev;
dev_t ctl_dev;
struct dmu_device *parent;
+
+ struct dmu_ring tx;
+ struct dmu_ring rx;
+ wait_queue_head_t tx_poll_wait;
};
+static inline void dmu_ring_idx_inc(struct dmu_ring *r)
+{
+ if (r->r_idx == DMU_MAX_EVENTS - 1)
+ r->r_idx = 0;
+ else
+ r->r_idx++;
+}
+
+static struct dmu_msg *dmu_head_msg(struct dmu_ring *r, u32 idx)
+{
+ u32 pidx, off;
+
+ pidx = idx / DMU_EVENT_PER_PAGE;
+ off = idx % DMU_EVENT_PER_PAGE;
+
+ return (struct dmu_msg *)
+ (r->r_pages[pidx] + sizeof(struct dmu_msg) * off);
+}
+
static struct dmu_request *find_rx_request(struct dmu_device *dev,
uint64_t id)
{
@@ -66,34 +97,39 @@ static struct dmu_request *find_rx_reque
return match;
}
-static int have_pending_requests(struct dmu_device *dev)
-{
- return atomic_read(&dev->t_reqs) != 0;
-}
-
-static int send_userspace_message(uint8_t __user *buffer,
- struct dmu_request *req)
+static int send_userspace_message(struct dmu_device *dev, struct dmu_request *req)
{
+ struct chardev_transport *t = dev->transport_private;
int ret = 0;
- struct dmu_msg msg;
+ struct dmu_msg *msg;
+ struct dmu_ring *ring = &t->tx;
+
+ spin_lock(&ring->r_lock);
+ msg = dmu_head_msg(ring, ring->r_idx);
+ if (!msg->hdr.status)
+ dmu_ring_idx_inc(ring);
+ else
+ ret = -EBUSY;
+ spin_unlock(&ring->r_lock);
- memset(&msg, 0, sizeof(msg));
+ if (ret)
+ return ret;
- msg.hdr.id = req->id;
+ msg->hdr.id = req->id;
switch (req->type) {
case DM_USERSPACE_MAP_BLOCK_REQ:
- msg.hdr.msg_type = req->type;
- msg.payload.map_req.org_block = req->u.block;
- dmu_cpy_flag(&msg.payload.map_req.flags,
+ msg->hdr.msg_type = req->type;
+ msg->payload.map_req.org_block = req->u.block;
+ dmu_cpy_flag(&msg->payload.map_req.flags,
req->flags, DMU_FLAG_WR);
break;
case DM_USERSPACE_MAP_DONE:
- msg.hdr.msg_type = DM_USERSPACE_MAP_DONE;
- msg.payload.map_done.id_of_op = req->id;
- msg.payload.map_done.org_block = req->u.block;
- dmu_cpy_flag(&msg.payload.map_done.flags,
+ msg->hdr.msg_type = DM_USERSPACE_MAP_DONE;
+ msg->payload.map_done.id_of_op = req->id;
+ msg->payload.map_done.org_block = req->u.block;
+ dmu_cpy_flag(&msg->payload.map_done.flags,
req->flags, DMU_FLAG_WR);
break;
@@ -102,10 +138,9 @@ static int send_userspace_message(uint8_
ret = 0;
}
- if (copy_to_user(buffer, &msg, sizeof(msg)))
- return -EFAULT;
-
- ret = sizeof(msg);
+ msg->hdr.status = 1;
+ mb();
+ flush_dcache_page(virt_to_page(msg));
/* If this request is not on a list (the rx_requests list),
* then it needs to be freed after sending
@@ -113,10 +148,12 @@ static int send_userspace_message(uint8_
if (list_empty(&req->list))
mempool_free(req, request_pool);
- return ret;
+ wake_up_interruptible(&dev->wqueue);
+
+ return 0;
}
-struct dmu_request *pluck_next_request(struct dmu_device *dev)
+static struct dmu_request *pluck_next_request(struct dmu_device *dev)
{
struct dmu_request *req = NULL;
unsigned long flags;
@@ -142,56 +179,39 @@ struct dmu_request *pluck_next_request(s
return req;
}
-ssize_t dmu_ctl_read(struct file *file, char __user *buffer,
- size_t size, loff_t *offset)
+static void delay_tx_request(struct dmu_device *dev, struct dmu_request *req)
{
+ unsigned long flags;
- struct dmu_device *dev = (struct dmu_device *)file->private_data;
- struct dmu_request *req = NULL;
- int ret = 0, r;
-
- if (!capable(CAP_SYS_ADMIN))
- return -EACCES;
-
- if (size < sizeof(struct dmu_msg)) {
- DMERR("Userspace buffer too small for a single message");
- return 0;
- }
-
- while (!have_pending_requests(dev)) {
- if (file->f_flags & O_NONBLOCK) {
- return 0;
- }
-
- if (wait_event_interruptible(dev->wqueue,
- have_pending_requests(dev)))
- return -ERESTARTSYS;
- }
-
- while (ret < size) {
- if ((size - ret) < sizeof(struct dmu_msg))
- break;
+ spin_lock(&dev->lock);
+ list_del_init(&req->list);
+ atomic_dec(&dev->r_reqs);
+ spin_unlock(&dev->lock);
- req = pluck_next_request(dev);
- if (!req)
- break;
+ spin_lock_irqsave(&dev->tx_lock, flags);
+ list_add_tail(&req->list, &dev->tx_requests);
+ atomic_inc(&dev->t_reqs);
+ spin_unlock_irqrestore(&dev->tx_lock, flags);
+}
- r = send_userspace_message((void *)(buffer + ret), req);
- if (r == 0)
- continue;
- else if (r < 0)
- return r;
+/* Add a request to a device's request queue */
+void dmu_add_tx_request(struct dmu_device *dev, struct dmu_request *req)
+{
+ int err;
- ret += r;
- }
+ BUG_ON(!list_empty(&req->list));
- if (ret < sizeof(struct dmu_msg)) {
- if (ret != 0)
- DMERR("Sending partial message!");
- DMINFO("Sent 0 requests to userspace");
+ if (req->type == DM_USERSPACE_MAP_BLOCK_REQ ||
+ req->type == DM_USERSPACE_MAP_DONE) {
+ spin_lock(&dev->lock);
+ list_add_tail(&req->list, &dev->rx_requests);
+ atomic_inc(&dev->r_reqs);
+ spin_unlock(&dev->lock);
}
- return ret;
+ err = send_userspace_message(dev, req);
+ if (err)
+ delay_tx_request(dev, req);
}
static struct dmu_request *pluck_dep_req(struct dmu_request *req)
@@ -402,54 +422,91 @@ ssize_t dmu_ctl_write(struct file *file,
size_t size, loff_t *offset)
{
struct dmu_device *dev = (struct dmu_device *)file->private_data;
- int ret = 0;
- struct dmu_msg msg;
+ struct chardev_transport *t = dev->transport_private;
+ struct dmu_ring *ring = &t->rx;
+ struct dmu_msg *msg;
+ struct dmu_request *req;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
- while ((ret + sizeof(msg)) <= size) {
- if (copy_from_user(&msg, buffer+ret, sizeof(msg))) {
- DMERR("%s copy_from_user failed!", __FUNCTION__);
- ret = -EFAULT;
- goto out;
- }
+ while (1) {
+ msg = dmu_head_msg(ring, ring->r_idx);
- ret += sizeof(msg);
+ if (!msg->hdr.status)
+ break;
+
+ /* do we need this? */
+ flush_dcache_page(virt_to_page(msg));
+ dmu_ring_idx_inc(ring);
- switch (msg.hdr.msg_type) {
+ switch (msg->hdr.msg_type) {
case DM_USERSPACE_MAP_BLOCK_RESP:
- do_map_bio(dev, &msg.payload.map_rsp);
+ do_map_bio(dev, &msg->payload.map_rsp);
break;
case DM_USERSPACE_MAP_FAILED:
- do_map_failed(dev, msg.payload.map_rsp.id_of_req);
+ do_map_failed(dev, msg->payload.map_rsp.id_of_req);
break;
case DM_USERSPACE_MAP_DONE:
- do_map_done(dev, msg.payload.map_done.id_of_op, 0);
+ do_map_done(dev, msg->payload.map_done.id_of_op, 0);
break;
case DM_USERSPACE_MAP_DONE_FAILED:
- do_map_done(dev, msg.payload.map_done.id_of_op, 1);
+ do_map_done(dev, msg->payload.map_done.id_of_op, 1);
break;
default:
DMWARN("Unknown incoming request type: %i",
- msg.hdr.msg_type);
+ msg->hdr.msg_type);
}
+
+ msg->hdr.status = 0;
}
- out:
- if (ret < sizeof(msg))
- DMINFO("Received 0 responses from userspace");
- return ret;
+ while ((req = pluck_next_request(dev))) {
+ int err = send_userspace_message(dev, req);
+ if (err) {
+ delay_tx_request(dev, req);
+ break;
+ }
+ }
+
+ return size;
+}
+
+static void dmu_ring_free(struct dmu_ring *r)
+{
+ int i;
+ for (i = 0; i < DMU_RING_PAGES; i++) {
+ if (!r->r_pages[i])
+ break;
+ free_page(r->r_pages[i]);
+ r->r_pages[i] = 0;
+ }
+}
+
+static int dmu_ring_alloc(struct dmu_ring *r)
+{
+ int i;
+
+ r->r_idx = 0;
+ spin_lock_init(&r->r_lock);
+
+ for (i = 0; i < DMU_RING_PAGES; i++) {
+ r->r_pages[i] = get_zeroed_page(GFP_KERNEL);
+ if (!r->r_pages[i])
+ return -ENOMEM;
+ }
+ return 0;
}
int dmu_ctl_open(struct inode *inode, struct file *file)
{
struct chardev_transport *t;
struct dmu_device *dev;
+ int ret;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
@@ -457,19 +514,33 @@ int dmu_ctl_open(struct inode *inode, st
t = container_of(inode->i_cdev, struct chardev_transport, cdev);
dev = t->parent;
+ ret = dmu_ring_alloc(&t->tx);
+ if (ret)
+ goto free_tx;
+
+ ret = dmu_ring_alloc(&t->rx);
+ if (ret)
+ goto free_rx;
+
get_dev(dev);
file->private_data = dev;
return 0;
+free_rx:
+ dmu_ring_free(&t->rx);
+free_tx:
+ dmu_ring_free(&t->tx);
+ return ret;
}
int dmu_ctl_release(struct inode *inode, struct file *file)
{
- struct dmu_device *dev;
-
- dev = (struct dmu_device *)file->private_data;
+ struct dmu_device *dev = (struct dmu_device *)file->private_data;
+ struct chardev_transport *t = dev->transport_private;
+ dmu_ring_free(&t->rx);
+ dmu_ring_free(&t->tx);
put_dev(dev);
return 0;
@@ -478,21 +549,72 @@ int dmu_ctl_release(struct inode *inode,
unsigned dmu_ctl_poll(struct file *file, poll_table *wait)
{
struct dmu_device *dev = (struct dmu_device *)file->private_data;
+ struct chardev_transport *t = dev->transport_private;
+ struct dmu_ring *ring = &t->tx;
+ struct dmu_msg *msg;
unsigned mask = 0;
+ u32 idx;
poll_wait(file, &dev->wqueue, wait);
- if (have_pending_requests(dev))
+ spin_lock(&ring->r_lock);
+
+ idx = ring->r_idx ? ring->r_idx - 1 : DMU_MAX_EVENTS - 1;
+ msg = dmu_head_msg(ring, idx);
+ if (msg->hdr.status)
mask |= POLLIN | POLLRDNORM;
+ spin_unlock(&ring->r_lock);
+
return mask;
}
+static int dmu_ring_map(struct vm_area_struct *vma, unsigned long addr,
+ struct dmu_ring *ring)
+{
+ int i, err;
+
+ for (i = 0; i < DMU_RING_PAGES; i++) {
+ struct page *page = virt_to_page(ring->r_pages[i]);
+ err = vm_insert_page(vma, addr, page);
+ if (err)
+ return err;
+ addr += PAGE_SIZE;
+ }
+
+ return 0;
+}
+
+static int dmu_ctl_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ struct dmu_device *dev = (struct dmu_device *)file->private_data;
+ struct chardev_transport *t = dev->transport_private;
+ unsigned long addr;
+ int err;
+
+ if (vma->vm_pgoff)
+ return -EINVAL;
+
+ if (vma->vm_end - vma->vm_start != DMU_RING_SIZE * 2) {
+ DMERR("mmap size must be %lu, not %lu \n",
+ DMU_RING_SIZE * 2, vma->vm_end - vma->vm_start);
+ return -EINVAL;
+ }
+
+ addr = vma->vm_start;
+ err = dmu_ring_map(vma, addr, &t->tx);
+ if (err)
+ return err;
+ err = dmu_ring_map(vma, addr + DMU_RING_SIZE, &t->rx);
+
+ return err;
+}
+
static struct file_operations ctl_fops = {
.open = dmu_ctl_open,
.release = dmu_ctl_release,
- .read = dmu_ctl_read,
.write = dmu_ctl_write,
+ .mmap = dmu_ctl_mmap,
.poll = dmu_ctl_poll,
.owner = THIS_MODULE,
};
diff --git a/drivers/md/dm-userspace.c b/drivers/md/dm-userspace.c
index 3f3d2ef..6074f6b 100644
--- a/drivers/md/dm-userspace.c
+++ b/drivers/md/dm-userspace.c
@@ -49,22 +49,6 @@ LIST_HEAD(devices);
/* Device number for the control device */
dev_t dmu_dev;
-/* Add a request to a device's request queue */
-static void add_tx_request(struct dmu_device *dev,
- struct dmu_request *req)
-{
- unsigned long flags;
-
- BUG_ON(!list_empty(&req->list));
-
- spin_lock_irqsave(&dev->tx_lock, flags);
- list_add_tail(&req->list, &dev->tx_requests);
- atomic_inc(&dev->t_reqs);
- spin_unlock_irqrestore(&dev->tx_lock, flags);
-
- wake_up(&dev->wqueue);
-}
-
static void endio_worker(void *data)
{
struct dmu_request *req = data;
@@ -431,7 +415,7 @@ static int dmu_map(struct dm_target *ti,
init_req(dev, bio, req);
- add_tx_request(dev, req);
+ dmu_add_tx_request(dev, req);
return 0;
}
@@ -480,7 +464,7 @@ static int dmu_end_io(struct dm_target *
if (dmu_get_flag(&req->flags, DMU_FLAG_SYNC)) {
req->type = DM_USERSPACE_MAP_DONE;
- add_tx_request(req->dev, req);
+ dmu_add_tx_request(req->dev, req);
ret = 1;
} else {
INIT_WORK(&req->task, endio_worker, req);
diff --git a/include/linux/dm-userspace.h b/include/linux/dm-userspace.h
index 698093a..0d7f59e 100644
--- a/include/linux/dm-userspace.h
+++ b/include/linux/dm-userspace.h
@@ -65,6 +65,7 @@ static inline void dmu_cpy_flag(uint32_t
*/
struct dmu_msg_header {
uint64_t id;
+ uint64_t status;
uint32_t msg_type;
uint32_t payload_len;
};
@@ -112,4 +113,9 @@ struct dmu_msg {
} payload;
};
+#define DMU_RING_SIZE (1UL << 18)
+#define DMU_RING_PAGES (DMU_RING_SIZE >> PAGE_SHIFT)
+#define DMU_EVENT_PER_PAGE (PAGE_SIZE / sizeof(struct dmu_msg))
+#define DMU_MAX_EVENTS (DMU_EVENT_PER_PAGE * DMU_RING_PAGES)
+
#endif
--
1.4.1
More information about the dm-devel
mailing list