[dm-devel] [PATCH 2/2] dm thin: support for non power of 2 pool blocksize
Mike Snitzer
snitzer at redhat.com
Sat Apr 28 04:44:29 UTC 2012
Non power of 2 blocksize support is needed to properly align thinp IO
on storage that has non power of 2 optimal IO sizes (e.g. RAID6 10+2).
Use do_div wrappers to support non power of 2 blocksize for the pool's
data device. do_div provides comparable performance to the power of 2
math that was performed until now (as tested on modern x86_64 hardware).
Verify that the pool's blocksize is a multiple of 64K and that the
pool's data device is a multiple of blocksize.
Eliminate pool structure's 'sectors_per_block', 'block_shift' and
remaining 4 byte holes.
Signed-off-by: Mike Snitzer <snitzer at redhat.com>
---
drivers/md/dm-thin.c | 56 +++++++++++++++++++++++++++++++++----------------
1 files changed, 38 insertions(+), 18 deletions(-)
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index ce7dd80..91644b4 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -507,10 +507,8 @@ struct pool {
struct block_device *md_dev;
struct dm_pool_metadata *pmd;
- uint32_t sectors_per_block;
- unsigned block_shift;
- dm_block_t offset_mask;
dm_block_t low_water_blocks;
+ uint32_t sectors_per_block;
struct pool_features pf;
unsigned low_water_triggered:1; /* A dm event has been sent */
@@ -523,8 +521,8 @@ struct pool {
struct work_struct worker;
struct delayed_work waker;
- unsigned ref_count;
unsigned long last_commit_jiffies;
+ unsigned ref_count;
spinlock_t lock;
struct bio_list deferred_bios;
@@ -673,9 +671,27 @@ static void requeue_io(struct thin_c *tc)
* target.
*/
+/*
+ * do_div wrappers that don't modify the dividend
+ */
+static inline sector_t dm_thin_do_div(sector_t a, __u32 b)
+{
+ sector_t r = a;
+
+ do_div(r, b);
+ return r;
+}
+
+static inline sector_t dm_thin_do_mod(sector_t a, __u32 b)
+{
+ sector_t tmp = a;
+
+ return do_div(tmp, b);
+}
+
static dm_block_t get_bio_block(struct thin_c *tc, struct bio *bio)
{
- return bio->bi_sector >> tc->pool->block_shift;
+ return dm_thin_do_div(bio->bi_sector, tc->pool->sectors_per_block);
}
static void remap(struct thin_c *tc, struct bio *bio, dm_block_t block)
@@ -683,8 +699,8 @@ static void remap(struct thin_c *tc, struct bio *bio, dm_block_t block)
struct pool *pool = tc->pool;
bio->bi_bdev = tc->pool_dev->bdev;
- bio->bi_sector = (block << pool->block_shift) +
- (bio->bi_sector & pool->offset_mask);
+ bio->bi_sector = (block * pool->sectors_per_block) +
+ dm_thin_do_mod(bio->bi_sector, pool->sectors_per_block);
}
static void remap_to_origin(struct thin_c *tc, struct bio *bio)
@@ -929,9 +945,8 @@ static void process_prepared(struct pool *pool, struct list_head *head,
*/
static int io_overlaps_block(struct pool *pool, struct bio *bio)
{
- return !(bio->bi_sector & pool->offset_mask) &&
+ return !dm_thin_do_mod(bio->bi_sector, pool->sectors_per_block) &&
(bio->bi_size == (pool->sectors_per_block << SECTOR_SHIFT));
-
}
static int io_overwrites_block(struct pool *pool, struct bio *bio)
@@ -1234,8 +1249,8 @@ static void process_discard(struct thin_c *tc, struct bio *bio)
* part of the discard that is in a subsequent
* block.
*/
- sector_t offset = bio->bi_sector - (block << pool->block_shift);
- unsigned remaining = (pool->sectors_per_block - offset) << 9;
+ sector_t offset = bio->bi_sector - (block * pool->sectors_per_block);
+ unsigned remaining = (pool->sectors_per_block - offset) << SECTOR_SHIFT;
bio->bi_size = min(bio->bi_size, remaining);
cell_release_singleton(cell, bio);
@@ -1696,8 +1711,6 @@ static struct pool *pool_create(struct mapped_device *pool_md,
pool->pmd = pmd;
pool->sectors_per_block = block_size;
- pool->block_shift = ffs(block_size) - 1;
- pool->offset_mask = block_size - 1;
pool->low_water_blocks = 0;
pool_features_init(&pool->pf);
pool->prison = prison_create(PRISON_CELLS);
@@ -1941,12 +1954,18 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
if (kstrtoul(argv[2], 10, &block_size) || !block_size ||
block_size < DATA_DEV_BLOCK_SIZE_MIN_SECTORS ||
block_size > DATA_DEV_BLOCK_SIZE_MAX_SECTORS ||
- !is_power_of_2(block_size)) {
+ dm_thin_do_mod(block_size, DATA_DEV_BLOCK_SIZE_MIN_SECTORS)) {
ti->error = "Invalid block size";
r = -EINVAL;
goto out;
}
+ if (dm_thin_do_mod(ti->len, block_size)) {
+ ti->error = "Data device is not a multiple of block size";
+ r = -EINVAL;
+ goto out;
+ }
+
if (kstrtoull(argv[3], 10, (unsigned long long *)&low_water_blocks)) {
ti->error = "Invalid low water mark";
r = -EINVAL;
@@ -2089,7 +2108,7 @@ static int pool_preresume(struct dm_target *ti)
if (r)
return r;
- data_size = ti->len >> pool->block_shift;
+ data_size = dm_thin_do_div(ti->len, pool->sectors_per_block);
r = dm_pool_get_data_dev_size(pool->pmd, &sb_data_size);
if (r) {
DMERR("failed to retrieve data device size");
@@ -2709,17 +2728,18 @@ static int thin_iterate_devices(struct dm_target *ti,
{
dm_block_t blocks;
struct thin_c *tc = ti->private;
+ struct pool *pool = tc->pool;
/*
* We can't call dm_pool_get_data_dev_size() since that blocks. So
* we follow a more convoluted path through to the pool's target.
*/
- if (!tc->pool->ti)
+ if (!pool->ti)
return 0; /* nothing is bound */
- blocks = tc->pool->ti->len >> tc->pool->block_shift;
+ blocks = dm_thin_do_div(pool->ti->len, pool->sectors_per_block);
if (blocks)
- return fn(ti, tc->pool_dev, 0, tc->pool->sectors_per_block * blocks, data);
+ return fn(ti, tc->pool_dev, 0, pool->sectors_per_block * blocks, data);
return 0;
}
--
1.7.1
More information about the dm-devel
mailing list