[dm-devel] [PATCH] 1/1: dm-mpath: Use one list of paths in the priority_group
Kevin Corry
kevcorry at us.ibm.com
Fri Jan 23 11:35:02 UTC 2004
Here's the patch to combine the valid_paths and invalid_paths lists in the
priority_group into a singe list. It really pains me to have to send in a
patch this huge, but I really couldn't find a meaningful way to break it up.
--
Kevin Corry
kevcorry at us.ibm.com
http://evms.sourceforge.net/
In the priority_group struct, combine the valid_paths and invalid_paths lists
into a single paths list. This is done to eliminate a dead-lock condition
that will occur if a multipath device is suspended while one of its paths
is in the middle of being tested.
This has quite a rippling effect on the data structures and locking throughout
dm-mpath.c. In the path struct, we introduce a failed_lock spinlock to protect
has_failed, fail_count, and fail_total. In the multipath struct, the path_lock
spinlock is removed and replaced with a current_lock spinlock to protect the
current_path pointer and count of I/Os to the current_path. With this new
locking, some of the atomc_t's can become simple unsigned int's. The code
changes look very substantial, but it all relates to the elimination of the
path_lock and the addition of the failed_lock and current_lock.
Also in the multipath struct, we remove the test_ios bio list. This was
introduced a few revisions ago because we didn't want to submit the test
bio's while holding the path_lock. Now that the path_lock is gone, we can
go back to submitting the test bio's in test_path() instead of queueing them
to be submitted later in do_work().
--- diff/drivers/md/dm-mpath.c 2004-01-23 09:55:27.000000000 -0600
+++ source/drivers/md/dm-mpath.c 2004-01-23 10:10:55.000000000 -0600
@@ -28,9 +28,10 @@
struct dm_dev *dev;
struct priority_group *pg;
+ spinlock_t failed_lock;
int has_failed;
- atomic_t fail_count;
- atomic_t fail_total;
+ unsigned fail_count;
+ unsigned fail_total;
struct semaphore test_lock;
struct bio *test_bio;
@@ -43,8 +44,7 @@
unsigned priority;
struct multipath *m;
struct path_selector *ps;
- struct list_head valid_paths;
- struct list_head invalid_paths;
+ struct list_head paths;
};
/* Multipath context */
@@ -52,15 +52,15 @@
struct list_head list;
struct dm_target *ti;
- spinlock_t path_lock;
struct list_head priority_groups;
+
+ spinlock_t current_lock;
struct path *current_path;
- atomic_t count;
+ unsigned current_count;
unsigned min_io;
spinlock_t failed_lock;
struct bio_list failed_ios;
- struct bio_list test_ios;
unsigned test_interval;
atomic_t trigger_event;
@@ -72,8 +72,8 @@
if (path) {
memset(path, 0, sizeof(*path));
- atomic_set(&path->fail_count, MPATH_FAIL_COUNT);
- atomic_set(&path->fail_total, 0);
+ path->failed_lock = SPIN_LOCK_UNLOCKED;
+ path->fail_count = MPATH_FAIL_COUNT;
init_MUTEX_LOCKED(&path->test_lock); /* resume will unlock */
path->test_bio = bio_alloc(GFP_KERNEL, 1);
@@ -116,8 +116,7 @@
}
memset(pg->ps, 0, sizeof(*pg->ps));
- INIT_LIST_HEAD(&pg->valid_paths);
- INIT_LIST_HEAD(&pg->invalid_paths);
+ INIT_LIST_HEAD(&pg->paths);
return pg;
}
@@ -146,8 +145,7 @@
kfree(ps);
}
- free_paths(&pg->valid_paths, ti);
- free_paths(&pg->invalid_paths, ti);
+ free_paths(&pg->paths, ti);
kfree(pg);
}
@@ -158,8 +156,8 @@
m = kmalloc(sizeof(*m), GFP_KERNEL);
if (m) {
memset(m, 0, sizeof(*m));
- m->path_lock = SPIN_LOCK_UNLOCKED;
INIT_LIST_HEAD(&m->priority_groups);
+ m->current_lock = SPIN_LOCK_UNLOCKED;
m->failed_lock = SPIN_LOCK_UNLOCKED;
m->min_io = MPATH_MIN_IO;
}
@@ -182,48 +180,51 @@
/*-----------------------------------------------------------------
* All paths should be tested periodically.
*---------------------------------------------------------------*/
-static void __fail_path(struct path *path)
+static void fail_path(struct path *path)
{
- if (path->has_failed)
- return;
+ unsigned long flags;
- /* FIXME: this is brain dead */
- if (!atomic_dec_and_test(&path->fail_count))
- return;
+ spin_lock_irqsave(&path->failed_lock, flags);
- path->has_failed = 1;
-// path->fail_time = jiffies;
- atomic_inc(&path->fail_total);
- list_move(&path->list, &path->pg->invalid_paths);
- path->pg->ps->type->set_path_state(path->pg->ps, path, 0);
+ if (!path->has_failed) {
+ /* FIXME: this is brain dead */
+ if (--path->fail_count == 0) {
+ path->has_failed = 1;
+ path->fail_total++;
+ path->pg->ps->type->set_path_state(path->pg->ps,
+ path, 0);
+ }
+ }
+
+ spin_unlock_irqrestore(&path->failed_lock, flags);
}
-static void __recover_path(struct path *path)
+static void recover_path(struct path *path)
{
- if (!path->has_failed)
- return;
+ unsigned long flags;
+
+ spin_lock_irqsave(&path->failed_lock, flags);
+
+ if (path->has_failed) {
+ path->has_failed = 0;
+ path->fail_count = MPATH_FAIL_COUNT;
+ path->pg->ps->type->set_path_state(path->pg->ps, path, 1);
+ }
- atomic_set(&path->fail_count, MPATH_FAIL_COUNT);
- list_move(&path->list, &path->pg->valid_paths);
- path->pg->ps->type->set_path_state(path->pg->ps, path, 1);
- path->has_failed = 0;
+ spin_unlock_irqrestore(&path->failed_lock, flags);
}
static int test_endio(struct bio *bio, unsigned int done, int error)
{
struct path *path = (struct path *) bio->bi_private;
- struct multipath *m = path->pg->m;
- unsigned long flags;
if (bio->bi_size)
return 1;
- spin_lock_irqsave(&m->path_lock, flags);
if (error)
- __fail_path(path);
+ fail_path(path);
else
- __recover_path(path);
- spin_unlock_irqrestore(&m->path_lock, flags);
+ recover_path(path);
up(&path->test_lock);
return 0;
@@ -232,14 +233,15 @@
static void test_path(struct path *p)
{
if (down_trylock(&p->test_lock))
- return; /* last test io still pending */
+ /* last test io still pending or device is suspended */
+ return;
p->test_bio->bi_sector = 0;
p->test_bio->bi_bdev = p->dev->bdev;
p->test_bio->bi_size = bdev_hardsect_size(p->dev->bdev);
p->test_bio->bi_idx = 0;
- bio_list_add(&p->pg->m->test_ios, p->test_bio);
+ generic_make_request(p->test_bio);
}
/*-----------------------------------------------------------------
@@ -250,44 +252,32 @@
static LIST_HEAD(_mpaths);
static spinlock_t _mpath_lock = SPIN_LOCK_UNLOCKED;
-static void submit_ios(struct bio *bio)
-{
- struct bio *next;
- while (bio) {
- next = bio->bi_next;
- bio->bi_next = NULL;
- generic_make_request(bio);
- bio = next;
- }
-}
-
static void dispatch_failed_ios(struct multipath *m)
{
unsigned long flags;
- struct bio *bio;
+ struct bio *bio, *next;
spin_lock_irqsave(&m->failed_lock, flags);
bio = bio_list_get(&m->failed_ios);
spin_unlock_irqrestore(&m->failed_lock, flags);
- submit_ios(bio);
+ while (bio) {
+ next = bio->bi_next;
+ bio->bi_next = NULL;
+ generic_make_request(bio);
+ bio = next;
+ }
}
static void iterate_paths(struct multipath *m, void (*fn)(struct path *p))
{
struct priority_group *pg;
struct path *p;
- unsigned long flags;
- spin_lock_irqsave(&m->path_lock, flags);
list_for_each_entry (pg, &m->priority_groups, list) {
- list_for_each_entry (p, &pg->valid_paths, list)
- fn(p);
-
- list_for_each_entry (p, &pg->invalid_paths, list)
+ list_for_each_entry (p, &pg->paths, list)
fn(p);
}
- spin_unlock_irqrestore(&m->path_lock, flags);
}
#define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r))
@@ -303,7 +293,6 @@
list_for_each_entry (m, &_mpaths, list) {
dispatch_failed_ios(m);
iterate_paths(m, test_path);
- submit_ios(bio_list_get(&m->test_ios));
if (atomic_dec_and_test(&m->trigger_event))
dm_table_event(m->ti->table);
@@ -495,7 +484,7 @@
goto bad;
path->pg = pg;
- list_add_tail(&path->list, &pg->valid_paths);
+ list_add_tail(&path->list, &pg->paths);
consume(as, nr_params);
}
@@ -592,7 +581,6 @@
{
struct multipath *m = (struct multipath *) ti->private;
-// wait_for_scrub_ios(m);
spin_lock(&_mpath_lock);
list_del(&m->list);
spin_unlock(&_mpath_lock);
@@ -600,36 +588,40 @@
free_multipath(m);
}
-static struct priority_group *__find_priority_group(struct list_head *pgs)
+/* Must hold m->current_lock */
+static int __choose_path(struct multipath *m)
{
struct priority_group *pg;
+ struct path *path = NULL;
- list_for_each_entry (pg, pgs, list) {
- if (!list_empty(&pg->valid_paths))
- return pg;
+ /* loop through the priority groups until we find a valid path. */
+ list_for_each_entry (pg, &m->priority_groups, list) {
+ path = pg->ps->type->select_path(pg->ps);
+ if (path)
+ break;
}
- return NULL;
+ m->current_path = path;
+ m->current_count = m->min_io;
+ return 0;
}
-static int __choose_path(struct multipath *m)
+static struct path *get_current_path(struct multipath *m)
{
- struct priority_group *pg;
struct path *path;
+ unsigned long flags;
- /* get the priority group */
- pg = __find_priority_group(&m->priority_groups);
- if (!pg)
- return -EIO;
+ spin_lock_irqsave(&m->current_lock, flags);
- /* select a path */
- path = pg->ps->type->select_path(pg->ps);
- if (!path)
- return -EIO; /* No valid path found */
+ /* Do we need to select a new path? */
+ if (!m->current_path || --m->current_count == 0) {
+ __choose_path(m);
+ }
+ path = m->current_path;
- m->current_path = path;
- atomic_set(&m->count, m->min_io);
- return 0;
+ spin_unlock_irqrestore(&m->current_lock, flags);
+
+ return path;
}
static int multipath_map(struct dm_target *ti, struct bio *bio,
@@ -637,23 +629,10 @@
{
struct multipath *m = (struct multipath *) ti->private;
struct path *path;
- unsigned long flags;
-
- spin_lock_irqsave(&m->path_lock, flags);
-
- /*
- * Do we need to choose a new path?
- */
- if (!m->current_path || atomic_dec_and_test(&m->count)) {
- if (__choose_path(m)) {
- /* no paths */
- spin_unlock_irqrestore(&m->path_lock, flags);
- return -EIO;
- }
- }
- path = m->current_path;
- spin_unlock_irqrestore(&m->path_lock, flags);
+ path = get_current_path(m);
+ if (!path)
+ return -EIO;
/* map */
bio->bi_rw |= (1 << BIO_RW_FAILFAST);
@@ -664,46 +643,39 @@
/*
* Only called on the error path.
*/
-static struct path *__lookup_path(struct list_head *head,
- struct block_device *bdev)
-{
- struct path *p;
-
- list_for_each_entry (p, head, list)
- if (p->dev->bdev == bdev)
- return p;
-
- return NULL;
-}
-
-static struct path *__find_path(struct multipath *m, struct block_device *bdev)
+static struct path *find_path(struct multipath *m, struct block_device *bdev)
{
struct path *p;
struct priority_group *pg;
list_for_each_entry (pg, &m->priority_groups, list) {
- p = __lookup_path(&pg->valid_paths, bdev);
- if (p)
- return p;
-
- p = __lookup_path(&pg->invalid_paths, bdev);
- if (p)
- return p;
+ list_for_each_entry (p, &pg->paths, list) {
+ if (p->dev->bdev == bdev) {
+ return p;
+ }
+ }
}
return NULL;
}
-static int __remap_io(struct multipath *m, struct bio *bio)
+static int remap_io(struct multipath *m, struct bio *bio)
{
- int r;
+ unsigned long flags;
+ struct path *path;
- r = __choose_path(m);
- if (r)
- return r;
+ /* Force a new path to be chosen in case it
+ * was the current_path that caused an error.
+ */
+ spin_lock_irqsave(&m->current_lock, flags);
+ __choose_path(m);
+ spin_unlock_irqrestore(&m->current_lock, flags);
- /* remap */
- bio->bi_bdev = m->current_path->dev->bdev;
+ path = get_current_path(m);
+ if (!path)
+ return -EIO;
+
+ bio->bi_bdev = path->dev->bdev;
return 0;
}
@@ -717,11 +689,9 @@
if (error) {
struct path *path;
- spin_lock_irqsave(&m->path_lock, flags);
- path = __find_path(m, bio->bi_bdev);
- __fail_path(path);
- r = __remap_io(m, bio);
- spin_unlock_irqrestore(&m->path_lock, flags);
+ path = find_path(m, bio->bi_bdev);
+ fail_path(path);
+ r = remap_io(m, bio);
if (!r) {
/* queue for the daemon to resubmit */
@@ -752,7 +722,7 @@
iterate_paths(m, unlock_path);
}
-static inline int __pg_count(struct multipath *m)
+static inline int pg_count(struct multipath *m)
{
int count = 0;
struct priority_group *pg;
@@ -760,12 +730,11 @@
return count;
}
-static inline int __path_count(struct priority_group *pg)
+static inline int path_count(struct priority_group *pg)
{
int count = 0;
struct path *p;
- list_for_each_entry(p, &pg->valid_paths, list) count++;
- list_for_each_entry(p, &pg->invalid_paths, list) count++;
+ list_for_each_entry(p, &pg->paths, list) count++;
return count;
}
@@ -786,38 +755,25 @@
struct path *p;
char buffer[32];
- spin_lock_irqsave(&m->path_lock, flags);
-
switch (type) {
case STATUSTYPE_INFO:
- sz += snprintf(result + sz, maxlen - sz, "%u ", __pg_count(m));
+ sz += snprintf(result + sz, maxlen - sz, "%u ", pg_count(m));
list_for_each_entry(pg, &m->priority_groups, list) {
sz += snprintf(result + sz, maxlen - sz, "%u %u ",
- __path_count(pg),
+ path_count(pg),
pg->ps->type->info_args);
- list_for_each_entry(p, &pg->valid_paths, list) {
+ list_for_each_entry(p, &pg->paths, list) {
format_dev_t(buffer, p->dev->bdev->bd_dev);
+ spin_lock_irqsave(&p->failed_lock, flags);
sz += snprintf(result + sz, maxlen - sz,
- "%s A %u %u ", buffer,
- atomic_read(&p->fail_count),
- atomic_read(&p->fail_total));
- pg->ps->type->status(pg->ps, p, type,
- result + sz, maxlen - sz);
-
- sz = strlen(result);
- if (sz >= maxlen)
- break;
- }
- list_for_each_entry(p, &pg->invalid_paths, list) {
- format_dev_t(buffer, p->dev->bdev->bd_dev);
- sz += snprintf(result + sz, maxlen - sz,
- "%s F %u %u ", buffer,
- atomic_read(&p->fail_count),
- atomic_read(&p->fail_total));
+ "%s %s %u %u ", buffer,
+ p->has_failed ? "F" : "A",
+ p->fail_count, p->fail_total);
pg->ps->type->status(pg->ps, p, type,
result + sz, maxlen - sz);
+ spin_unlock_irqrestore(&p->failed_lock, flags);
sz = strlen(result);
if (sz >= maxlen)
@@ -829,25 +785,14 @@
case STATUSTYPE_TABLE:
sz += snprintf(result + sz, maxlen - sz, "%u %u ",
- m->test_interval, __pg_count(m));
+ m->test_interval, pg_count(m));
list_for_each_entry(pg, &m->priority_groups, list) {
sz += snprintf(result + sz, maxlen - sz, "%u %s %u %u ",
pg->priority, pg->ps->type->name,
- __path_count(pg), pg->ps->type->table_args);
-
- list_for_each_entry(p, &pg->valid_paths, list) {
- format_dev_t(buffer, p->dev->bdev->bd_dev);
- sz += snprintf(result + sz, maxlen - sz,
- "%s ", buffer);
- pg->ps->type->status(pg->ps, p, type,
- result + sz, maxlen - sz);
+ path_count(pg), pg->ps->type->table_args);
- sz = strlen(result);
- if (sz >= maxlen)
- break;
- }
- list_for_each_entry(p, &pg->invalid_paths, list) {
+ list_for_each_entry(p, &pg->paths, list) {
format_dev_t(buffer, p->dev->bdev->bd_dev);
sz += snprintf(result + sz, maxlen - sz,
"%s ", buffer);
@@ -863,8 +808,6 @@
break;
}
- spin_unlock_irqrestore(&m->path_lock, flags);
-
return 0;
}
More information about the dm-devel
mailing list