[dm-devel] [PATCH 2/2] Use low pointer bits for dm io region
Mikulas Patocka
mpatocka at redhat.com
Wed Nov 11 02:05:04 UTC 2009
Use low pointer bits for dm io region
We need to store two things per bio: the pointer to the main io structure and
a region number, an index of disk where this bio belongs to (if there is
simultaneous write to multiple disks). There can be at most BITS_PER_LONG
regions. BITS_PER_LONG is 32 on 32-bit machines and 64 on 64-bit machines.
A region number was stored in the last hidden bio vector and the pointer to
struct io was stored in bi_private.
This patch changes it so that "struct io" is always aligned on BITS_PER_LONG
bytes and region number is stored in the low BITS_PER_LONG bits of bi_private.
Signed-off-by: Mikulas Patocka <mpatocka at redhat.com>
---
drivers/md/dm-io.c | 84 +++++++++++++++++++++++++++++++----------------------
1 file changed, 50 insertions(+), 34 deletions(-)
Index: linux-2.6.31.6-fast/drivers/md/dm-io.c
===================================================================
--- linux-2.6.31.6-fast.orig/drivers/md/dm-io.c 2009-11-11 00:51:38.000000000 +0100
+++ linux-2.6.31.6-fast/drivers/md/dm-io.c 2009-11-11 02:13:32.000000000 +0100
@@ -14,12 +14,17 @@
#include <linux/slab.h>
#include <linux/dm-io.h>
+#define DM_IO_MAX_REGIONS BITS_PER_LONG
+
struct dm_io_client {
mempool_t *pool;
struct bio_set *bios;
};
-/* FIXME: can we shrink this ? */
+/*
+ * The alignment of "struct io" is required because region number is stored
+ * in the low bits of the pointer.
+ */
struct io {
unsigned long error_bits;
unsigned long eopnotsupp_bits;
@@ -28,7 +33,7 @@ struct io {
struct dm_io_client *client;
io_notify_fn callback;
void *context;
-};
+} __attribute__((aligned(DM_IO_MAX_REGIONS)));
static struct kmem_cache *_io_cache = NULL;
@@ -90,18 +95,25 @@ EXPORT_SYMBOL(dm_io_client_destroy);
/*-----------------------------------------------------------------
* We need to keep track of which region a bio is doing io for.
- * In order to save a memory allocation we store this the last
- * bvec which we know is unused (blech).
- * XXX This is ugly and can OOPS with some configs... find another way.
+ * In order to save a memory allocation we store this the lowest
+ * bits of the pointer.
*---------------------------------------------------------------*/
-static inline void bio_set_region(struct bio *bio, unsigned region)
+static inline void bio_set_io_region(struct bio *bio, struct io *io,
+ unsigned region)
{
- bio->bi_io_vec[bio->bi_max_vecs].bv_len = region;
+ if (unlikely(!IS_ALIGNED((unsigned long)io, DM_IO_MAX_REGIONS))) {
+ printk(KERN_CRIT "dm-io: Unaligned pointer %p\n", io);
+ BUG();
+ }
+ bio->bi_private = (void *)((unsigned long)io | region);
}
-static inline unsigned bio_get_region(struct bio *bio)
+static inline void bio_get_io_region(struct bio *bio, struct io **io,
+ unsigned *region)
{
- return bio->bi_io_vec[bio->bi_max_vecs].bv_len;
+ unsigned long val = (unsigned long)bio->bi_private;
+ *io = (void *)(val & -(unsigned long)DM_IO_MAX_REGIONS);
+ *region = val & (DM_IO_MAX_REGIONS - 1);
}
/*-----------------------------------------------------------------
@@ -142,10 +154,8 @@ static void endio(struct bio *bio, int e
/*
* The bio destructor in bio_put() may use the io object.
*/
- io = bio->bi_private;
- region = bio_get_region(bio);
+ bio_get_io_region(bio, &io, ®ion);
- bio->bi_max_vecs++;
bio_put(bio);
dec_count(io, region, error);
@@ -245,7 +255,10 @@ static void vm_dp_init(struct dpages *dp
static void dm_bio_destructor(struct bio *bio)
{
- struct io *io = bio->bi_private;
+ unsigned region;
+ struct io *io;
+
+ bio_get_io_region(bio, &io, ®ion);
bio_free(bio, io->client->bios);
}
@@ -294,24 +307,17 @@ static void do_region(int rw, unsigned r
*/
do {
/*
- * Allocate a suitably sized-bio: we add an extra
- * bvec for bio_get/set_region() and decrement bi_max_vecs
- * to hide it from bio_add_page().
+ * Allocate a suitably sized-bio.
*/
num_bvecs = dm_sector_div_up(remaining,
(PAGE_SIZE >> SECTOR_SHIFT));
- num_bvecs = 1 + min_t(int, bio_get_nr_vecs(where->bdev),
- num_bvecs);
- if (unlikely(num_bvecs > BIO_MAX_PAGES))
- num_bvecs = BIO_MAX_PAGES;
+ num_bvecs = min_t(int, bio_get_nr_vecs(where->bdev), num_bvecs);
bio = bio_alloc_bioset(GFP_NOIO, num_bvecs, io->client->bios);
bio->bi_sector = where->sector + (where->count - remaining);
bio->bi_bdev = where->bdev;
bio->bi_end_io = endio;
- bio->bi_private = io;
bio->bi_destructor = dm_bio_destructor;
- bio->bi_max_vecs--;
- bio_set_region(bio, region);
+ bio_set_io_region(bio, io, region);
/*
* Try and add as many pages as possible.
@@ -339,6 +345,8 @@ static void dispatch_io(int rw, unsigned
int i;
struct dpages old_pages = *dp;
+ BUG_ON(num_regions > DM_IO_MAX_REGIONS);
+
if (sync)
rw |= (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG);
@@ -363,7 +371,15 @@ static int sync_io(struct dm_io_client *
struct dm_io_region *where, int rw, struct dpages *dp,
unsigned long *error_bits)
{
- struct io io;
+ /*
+ * gcc <= 4.3 can't do the alignment for stack variables, so we must
+ * align it on our own.
+ * volatile is there to prevent the optimizer from removing or reusing
+ * "io_" field from the stack frame. (which would be allowed according
+ * to ANSI C rules).
+ */
+ volatile char io_[sizeof(struct io) * 2 - 1];
+ struct io *io = (struct io *)PTR_ALIGN(&io_, __alignof__(struct io));
if (num_regions > 1 && (rw & RW_MASK) != WRITE) {
WARN_ON(1);
@@ -371,33 +387,33 @@ static int sync_io(struct dm_io_client *
}
retry:
- io.error_bits = 0;
- io.eopnotsupp_bits = 0;
- atomic_set(&io.count, 1); /* see dispatch_io() */
- io.sleeper = current;
- io.client = client;
+ io->error_bits = 0;
+ io->eopnotsupp_bits = 0;
+ atomic_set(&io->count, 1); /* see dispatch_io() */
+ io->sleeper = current;
+ io->client = client;
- dispatch_io(rw, num_regions, where, dp, &io, 1);
+ dispatch_io(rw, num_regions, where, dp, io, 1);
while (1) {
set_current_state(TASK_UNINTERRUPTIBLE);
- if (!atomic_read(&io.count))
+ if (!atomic_read(&io->count))
break;
io_schedule();
}
set_current_state(TASK_RUNNING);
- if (io.eopnotsupp_bits && (rw & (1 << BIO_RW_BARRIER))) {
+ if (io->eopnotsupp_bits && (rw & (1 << BIO_RW_BARRIER))) {
rw &= ~(1 << BIO_RW_BARRIER);
goto retry;
}
if (error_bits)
- *error_bits = io.error_bits;
+ *error_bits = io->error_bits;
- return io.error_bits ? -EIO : 0;
+ return io->error_bits ? -EIO : 0;
}
static int async_io(struct dm_io_client *client, unsigned int num_regions,
More information about the dm-devel
mailing list