From 693806f872ff119aa526216ee45628e929c386f6 Mon Sep 17 00:00:00 2001 From: Robert Collins Date: Mon, 4 Jun 2012 15:01:14 +1200 Subject: [PATCH 2/2] Force switching drives after enough linear IO - allows readahead to exercise parallelism as well. --- drivers/md/dm-raid1.c | 59 ++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 51 insertions(+), 8 deletions(-) diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index e16a030..b9504d8 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -22,11 +22,19 @@ #define DM_MSG_PREFIX "raid1" #define MAX_RECOVERY 1 /* Maximum number of regions recovered in parallel. */ +/* Maximum sector count of sequential IO sent to one mirror. This needs to be + * large enough not to saturate drive command queues but small enough that we + * can keep both drives active. + */ +#define DM_SEQUENTIAL_IO_MIRROR_LIMIT 131072 #define DM_RAID1_HANDLE_ERRORS 0x01 #define errors_handled(p) ((p)->features & DM_RAID1_HANDLE_ERRORS) #ifndef PRINTK +# ifndef DEBUG +# define DEBUG 0 +# endif # if DEBUG > 0 # define PRINTK(x...) printk(KERN_DEBUG x) # else @@ -52,6 +60,7 @@ struct mirror { unsigned long error_type; struct dm_dev *dev; sector_t offset; + sector_t sequential_io_start_position; /* lets us detect long runs of sequential IO for load balancing */ sector_t last_read_position; /* let us map sequential IO to one disk */ }; @@ -417,11 +426,18 @@ static sector_t bio_distance(struct mirror *m, sector_t sector) return m->last_read_position > sector ? m->last_read_position - sector : sector - m->last_read_position; } +/* Return the number of sequential read IO in sectors undertaken so far by m */ +static sector_t sequential_ios(struct mirror *m) +{ + return m->last_read_position - m->sequential_io_start_position; +} + static struct mirror *choose_mirror(struct mirror_set *ms, sector_t sector, sector_t count, bool update_read_pos) { struct mirror *m, *closest = NULL; sector_t distance; sector_t temp_distance; + bool prefer_other_mirror; PRINTK("Choosing mirror: %llu %llu %d\n", (unsigned long long) sector, (unsigned long long) count, @@ -431,12 +447,15 @@ static struct mirror *choose_mirror(struct mirror_set *ms, sector_t sector, sect if (likely(!atomic_read(&(--m)->error_count))) { distance = bio_distance(m, sector); closest = m; - PRINTK("Choosing mirror: %llu %llu %d: closest=%p mirror=%p distance=%llu last-read %llu\n", + PRINTK("Choosing mirror: %llu %llu %d: closest=%p mirror=%p distance=%llu last-read %llu sequential %llu\n", (unsigned long long) sector, (unsigned long long) count, (int) update_read_pos, closest, ms->mirror, (unsigned long long)distance, - (unsigned long long)m->last_read_position); + (unsigned long long)m->last_read_position, + (unsigned long long)sequential_ios(m)); + /* Actively prefer a different mirror on sequential IO if we have exceeded the threshold */ + prefer_other_mirror = distance==0 && (sequential_ios(m) >= DM_SEQUENTIAL_IO_MIRROR_LIMIT); break; } else { PRINTK("Choosing mirror: %llu %llu %d: has-errors %d\n", @@ -449,9 +468,15 @@ static struct mirror *choose_mirror(struct mirror_set *ms, sector_t sector, sect /* Nothing usable */ if (unlikely(closest == NULL)) return NULL; + /* Only one drive availale */ if (unlikely(closest == ms->mirror)) return closest; - /* Now see if there is a closer mirror */ + /* Now see if there is another usable mirror that is closer and has not + * had too much sequential read IO dispatched to it yet. + * TODO: When N = or > nr_mirrors sets of sequential read IO are + * happening, allow each mirror to specialise, and avoid extraneous + * seeking. + */ for (m = closest; m != ms->mirror;) { if (unlikely(atomic_read(&(--m)->error_count))){ PRINTK("Choosing mirror: %llu %llu %d: has-errors %d\n", @@ -462,17 +487,31 @@ static struct mirror *choose_mirror(struct mirror_set *ms, sector_t sector, sect continue; } temp_distance = bio_distance(m, sector); - PRINTK("Choosing mirror: %llu %llu %d: closest=%p mirror=%p distance=%llu, temp_distance=%llu last-read %llu\n", + PRINTK("Choosing mirror: %llu %llu %d: closest=%p m=%p distance=%llu, temp_distance=%llu last-read %llu sequential %llu prefer-other %d\n", (unsigned long long) sector, (unsigned long long) count, (int) update_read_pos, - closest, ms->mirror, + closest, m, (unsigned long long) distance, (unsigned long long) temp_distance, - (unsigned long long)m->last_read_position); - if (temp_distance < distance) { + (unsigned long long)m->last_read_position, + (unsigned long long)sequential_ios(m), + (int)prefer_other_mirror); + /* Use this valid mirror if: + * - the first usable mirror has hit its sequential limit + * - or this mirror is closer and (on sequential IO) has not hit its limit + */ + if (prefer_other_mirror || + (temp_distance < distance && + (temp_distance != 0 || sequential_ios(m) < DM_SEQUENTIAL_IO_MIRROR_LIMIT))) { distance = temp_distance; closest = m; + prefer_other_mirror = false; + PRINTK("Choosing mirror: %llu %llu %d: selected %p\n", + (unsigned long long) sector, + (unsigned long long) count, + (int) update_read_pos, + m); } } /* TODO: also track where IO's have completed against: tracking submits @@ -480,8 +519,11 @@ static struct mirror *choose_mirror(struct mirror_set *ms, sector_t sector, sect * tagging - if we know where the disk is after either a write or a * read we can dispatch nearer it. */ - if (likely(update_read_pos)) + if (likely(update_read_pos)) { + if (closest->last_read_position != sector) + closest->sequential_io_start_position = sector; closest->last_read_position = sector + count; + } return closest; } @@ -1009,6 +1051,7 @@ static int get_mirror(struct mirror_set *ms, struct dm_target *ti, ms->mirror[mirror].error_type = 0; ms->mirror[mirror].offset = offset; ms->mirror[mirror].last_read_position = 0; + ms->mirror[mirror].sequential_io_start_position = 0; return 0; } -- 1.7.9.5