From a7047dcb4f19335089de39bcc5faee27e68f1b74 Mon Sep 17 00:00:00 2001 From: Robert Collins Date: Fri, 8 Jul 2011 22:42:02 +1200 Subject: [PATCH 1/2] Load balance dm-raid1 reads by most recently dispatched sector. --- drivers/md/dm-raid1.c | 97 ++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 83 insertions(+), 14 deletions(-) diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 9bfd057..e16a030 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -26,6 +26,14 @@ #define DM_RAID1_HANDLE_ERRORS 0x01 #define errors_handled(p) ((p)->features & DM_RAID1_HANDLE_ERRORS) +#ifndef PRINTK +# if DEBUG > 0 +# define PRINTK(x...) printk(KERN_DEBUG x) +# else +# define PRINTK(x...) +# endif +#endif + static DECLARE_WAIT_QUEUE_HEAD(_kmirrord_recovery_stopped); /*----------------------------------------------------------------- @@ -44,6 +52,7 @@ struct mirror { unsigned long error_type; struct dm_dev *dev; sector_t offset; + sector_t last_read_position; /* let us map sequential IO to one disk */ }; struct mirror_set { @@ -403,19 +412,77 @@ static void do_recovery(struct mirror_set *ms) /*----------------------------------------------------------------- * Reads *---------------------------------------------------------------*/ -static struct mirror *choose_mirror(struct mirror_set *ms, sector_t sector) +static sector_t bio_distance(struct mirror *m, sector_t sector) { - struct mirror *m = get_default_mirror(ms); - - do { - if (likely(!atomic_read(&m->error_count))) - return m; - - if (m-- == ms->mirror) - m += ms->nr_mirrors; - } while (m != get_default_mirror(ms)); + return m->last_read_position > sector ? m->last_read_position - sector : sector - m->last_read_position; +} - return NULL; +static struct mirror *choose_mirror(struct mirror_set *ms, sector_t sector, sector_t count, bool update_read_pos) +{ + struct mirror *m, *closest = NULL; + sector_t distance; + sector_t temp_distance; + PRINTK("Choosing mirror: %llu %llu %d\n", + (unsigned long long) sector, + (unsigned long long) count, + (int) update_read_pos); + /* Find the first usable */ + for (m = &ms->mirror[ms->nr_mirrors]; m != ms->mirror;) { + if (likely(!atomic_read(&(--m)->error_count))) { + distance = bio_distance(m, sector); + closest = m; + PRINTK("Choosing mirror: %llu %llu %d: closest=%p mirror=%p distance=%llu last-read %llu\n", + (unsigned long long) sector, + (unsigned long long) count, + (int) update_read_pos, + closest, ms->mirror, (unsigned long long)distance, + (unsigned long long)m->last_read_position); + break; + } else { + PRINTK("Choosing mirror: %llu %llu %d: has-errors %d\n", + (unsigned long long) sector, + (unsigned long long) count, + (int) update_read_pos, + atomic_read(&m->error_count)); + } + } + /* Nothing usable */ + if (unlikely(closest == NULL)) + return NULL; + if (unlikely(closest == ms->mirror)) + return closest; + /* Now see if there is a closer mirror */ + for (m = closest; m != ms->mirror;) { + if (unlikely(atomic_read(&(--m)->error_count))){ + PRINTK("Choosing mirror: %llu %llu %d: has-errors %d\n", + (unsigned long long) sector, + (unsigned long long) count, + (int) update_read_pos, + atomic_read(&m->error_count)); + continue; + } + temp_distance = bio_distance(m, sector); + PRINTK("Choosing mirror: %llu %llu %d: closest=%p mirror=%p distance=%llu, temp_distance=%llu last-read %llu\n", + (unsigned long long) sector, + (unsigned long long) count, + (int) update_read_pos, + closest, ms->mirror, + (unsigned long long) distance, + (unsigned long long) temp_distance, + (unsigned long long)m->last_read_position); + if (temp_distance < distance) { + distance = temp_distance; + closest = m; + } + } + /* TODO: also track where IO's have completed against: tracking submits + * lets us handle sequential requests, but the disks probably have ahci + * tagging - if we know where the disk is after either a write or a + * read we can dispatch nearer it. + */ + if (likely(update_read_pos)) + closest->last_read_position = sector + count; + return closest; } static int default_ok(struct mirror *m) @@ -431,7 +498,7 @@ static int mirror_available(struct mirror_set *ms, struct bio *bio) region_t region = dm_rh_bio_to_region(ms->rh, bio); if (log->type->in_sync(log, region, 0)) - return choose_mirror(ms, bio->bi_sector) ? 1 : 0; + return choose_mirror(ms, bio->bi_sector, bio->bi_size >> 9, false) ? 1 : 0; return 0; } @@ -500,6 +567,7 @@ static void read_callback(unsigned long error, void *context) bio_set_m(bio, NULL); if (likely(!error)) { + /* Should update head position here */ bio_endio(bio, 0); return; } @@ -558,7 +626,7 @@ static void do_reads(struct mirror_set *ms, struct bio_list *reads) * We can only read balance if the region is in sync. */ if (likely(region_in_sync(ms, region, 1))) - m = choose_mirror(ms, bio->bi_sector); + m = choose_mirror(ms, bio->bi_sector, bio->bi_size >> 9, 1); else if (m && atomic_read(&m->error_count)) m = NULL; @@ -940,6 +1008,7 @@ static int get_mirror(struct mirror_set *ms, struct dm_target *ti, atomic_set(&(ms->mirror[mirror].error_count), 0); ms->mirror[mirror].error_type = 0; ms->mirror[mirror].offset = offset; + ms->mirror[mirror].last_read_position = 0; return 0; } @@ -1181,7 +1250,7 @@ static int mirror_map(struct dm_target *ti, struct bio *bio, * The region is in-sync and we can perform reads directly. * Store enough information so we can retry if it fails. */ - m = choose_mirror(ms, bio->bi_sector); + m = choose_mirror(ms, bio->bi_sector, bio->bi_size >> 9, 1); if (unlikely(!m)) return -EIO; -- 1.7.9.5