[dm-devel] [PATCHES]: dm lock optimization
Mikulas Patocka
mpatocka at redhat.com
Fri May 18 06:37:46 UTC 2012
Hi
Thanks for reviewing this. I applied your patch, the new version is here:
http://people.redhat.com/mpatocka/patches/kernel/dm-lock-optimization/
Mikulas
On Thu, 10 May 2012, Jun'ichi Nomura wrote:
> Hi Mikulas,
>
> On 05/02/12 11:17, Mikulas Patocka wrote:
> > I placed the new code using srcu here:
> > http://people.redhat.com/mpatocka/patches/kernel/dm-lock-optimization/
> >
> > It removes io_lock, map_lock and holders and replaces them with srcu.
>
> I've reviewed the patches and here are some comments.
> The 1st one seems to be a bug.
> Others are minor comments about readability.
>
> - There are functions destroying inactive table
> without SRCU synchronization.
> * table_load
> * table_clear
> * do_resume
> * __hash_remove
> It could cause use-after-free by the user of
> dm_get_inactive_table().
> synchronization is needed, outside of exclusive hash_lock.
>
> - I couldn't see the reason why locking is different
> for request-based in dm_wq_work().
>
> - We could use dm_get_live_table() in the caller of
> __split_and_process_bio() and pass the result to it.
> Then, naked use of srcu_read_lock/unlock and
> rcu_dereference can be eliminated.
> I think it helps readability.
>
> - md->map is directly referenced in dm_suspend/dm_resume.
> It's ok because md->suspend_lock is taken and nobody
> can replace md->map. I think it's worth putting a comment
> in 'struct mapped_device' about the rule.
>
> Attached patch explains the above comments by code.
>
> >> synchronize_rcu could be put in dm_table_destroy() instead of __bind().
> >> I think it's safer place to wait.
> >
> > I think the code is more readable if synchronizing rcu is just after
> > assigning the pointer that is protected by rcu.
>
> OK. I don't object.
>
> Thanks,
> ---
> Jun'ichi Nomura, NEC Corporation
>
> * Added a comment about locking for reading md->map
> * dm_sync_table() to wait for other processes to exit from
> read-side critical section
> * __split_and_process_bio() takes map
> * __hash_remove() returns a table pointer to be destroyed later
> * Added dm_sync_table() in functions in dm-ioctl.c to synchronize with
> inactive table users
>
> Index: linux-3.3/drivers/md/dm.c
> ===================================================================
> --- linux-3.3.orig/drivers/md/dm.c 2012-05-10 12:19:10.977242964 +0900
> +++ linux-3.3/drivers/md/dm.c 2012-05-10 13:54:30.150853855 +0900
> @@ -141,6 +141,8 @@ struct mapped_device {
>
> /*
> * The current mapping.
> + * Use dm_get_live_table{_fast} or take suspend_lock for
> + * dereference.
> */
> struct dm_table *map;
>
> @@ -561,6 +563,12 @@ void dm_put_live_table(struct mapped_dev
> srcu_read_unlock(&md->io_barrier, srcu_idx);
> }
>
> +void dm_sync_table(struct mapped_device *md)
> +{
> + synchronize_srcu(&md->io_barrier);
> + synchronize_rcu_expedited();
> +}
> +
> /*
> * A fast alternative to dm_get_live_table/dm_put_live_table.
> * The caller must not block between these two functions.
> @@ -1316,17 +1324,18 @@ static int __clone_and_map(struct clone_
> /*
> * Split the bio into several clones and submit it to targets.
> */
> -static void __split_and_process_bio(struct mapped_device *md, struct bio *bio)
> +static void __split_and_process_bio(struct mapped_device *md,
> + struct dm_table *map, struct bio *bio)
> {
> struct clone_info ci;
> int error = 0;
>
> - ci.map = srcu_dereference(md->map, &md->io_barrier);
> - if (unlikely(!ci.map)) {
> + if (unlikely(!map)) {
> bio_io_error(bio);
> return;
> }
>
> + ci.map = map;
> ci.md = md;
> ci.io = alloc_io(md);
> ci.io->error = 0;
> @@ -1422,8 +1431,9 @@ static void _dm_request(struct request_q
> struct mapped_device *md = q->queuedata;
> int cpu;
> int srcu_idx;
> + struct dm_table *map;
>
> - srcu_idx = srcu_read_lock(&md->io_barrier);
> + map = dm_get_live_table(md, &srcu_idx);
>
> cpu = part_stat_lock();
> part_stat_inc(cpu, &dm_disk(md)->part0, ios[rw]);
> @@ -1432,7 +1442,7 @@ static void _dm_request(struct request_q
>
> /* if we're suspended, we have to queue this io for later */
> if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))) {
> - srcu_read_unlock(&md->io_barrier, srcu_idx);
> + dm_put_live_table(md, srcu_idx);
>
> if (bio_rw(bio) != READA)
> queue_io(md, bio);
> @@ -1441,8 +1451,8 @@ static void _dm_request(struct request_q
> return;
> }
>
> - __split_and_process_bio(md, bio);
> - srcu_read_unlock(&md->io_barrier, srcu_idx);
> + __split_and_process_bio(md, map, bio);
> + dm_put_live_table(md, srcu_idx);
> return;
> }
>
> @@ -2115,8 +2125,7 @@ static struct dm_table *__bind(struct ma
> set_bit(DMF_MERGE_IS_OPTIONAL, &md->flags);
> else
> clear_bit(DMF_MERGE_IS_OPTIONAL, &md->flags);
> - synchronize_srcu(&md->io_barrier);
> - synchronize_rcu_expedited();
> + dm_sync_table(md);
>
> return old_map;
> }
> @@ -2133,8 +2142,7 @@ static struct dm_table *__unbind(struct
>
> dm_table_event_callback(map, NULL, NULL);
> rcu_assign_pointer(md->map, NULL);
> - synchronize_srcu(&md->io_barrier);
> - synchronize_rcu_expedited();
> + dm_sync_table(md);
>
> return map;
> }
> @@ -2375,8 +2383,9 @@ static void dm_wq_work(struct work_struc
> work);
> struct bio *c;
> int srcu_idx;
> + struct dm_table *map;
>
> - srcu_idx = srcu_read_lock(&md->io_barrier);
> + map = dm_get_live_table(md, &srcu_idx);
>
> while (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) {
> spin_lock_irq(&md->deferred_lock);
> @@ -2386,15 +2395,13 @@ static void dm_wq_work(struct work_struc
> if (!c)
> break;
>
> - if (dm_request_based(md)) {
> - srcu_read_unlock(&md->io_barrier, srcu_idx);
> + if (dm_request_based(md))
> generic_make_request(c);
> - srcu_idx = srcu_read_lock(&md->io_barrier);
> - } else
> - __split_and_process_bio(md, c);
> + else
> + __split_and_process_bio(md, map, c);
> }
>
> - srcu_read_unlock(&md->io_barrier, srcu_idx);
> + dm_put_live_table(md, srcu_idx);
> }
>
> static void dm_queue_flush(struct mapped_device *md)
> Index: linux-3.3/drivers/md/dm-ioctl.c
> ===================================================================
> --- linux-3.3.orig/drivers/md/dm-ioctl.c 2012-05-10 12:19:10.995242964 +0900
> +++ linux-3.3/drivers/md/dm-ioctl.c 2012-05-10 12:54:30.312180811 +0900
> @@ -250,7 +250,7 @@ static int dm_hash_insert(const char *na
> return -EBUSY;
> }
>
> -static void __hash_remove(struct hash_cell *hc)
> +static struct dm_table *__hash_remove(struct hash_cell *hc)
> {
> struct dm_table *table;
> int srcu_idx;
> @@ -267,10 +267,13 @@ static void __hash_remove(struct hash_ce
> dm_table_event(table);
> dm_put_live_table(hc->md, srcu_idx);
>
> + table = NULL;
> if (hc->new_map)
> - dm_table_destroy(hc->new_map);
> + table = hc->new_map;
> dm_put(hc->md);
> free_cell(hc);
> +
> + return table;
> }
>
> static void dm_hash_remove_all(int keep_open_devices)
> @@ -278,6 +281,7 @@ static void dm_hash_remove_all(int keep_
> int i, dev_skipped;
> struct hash_cell *hc;
> struct mapped_device *md;
> + struct dm_table *t;
>
> retry:
> dev_skipped = 0;
> @@ -295,10 +299,14 @@ retry:
> continue;
> }
>
> - __hash_remove(hc);
> + t = __hash_remove(hc);
>
> up_write(&_hash_lock);
>
> + if (t) {
> + dm_sync_table(md);
> + dm_table_destroy(t);
> + }
> dm_put(md);
> if (likely(keep_open_devices))
> dm_destroy(md);
> @@ -808,6 +816,7 @@ static int dev_remove(struct dm_ioctl *p
> struct hash_cell *hc;
> struct mapped_device *md;
> int r;
> + struct dm_table *t;
>
> down_write(&_hash_lock);
> hc = __find_device_hash_cell(param);
> @@ -831,9 +840,14 @@ static int dev_remove(struct dm_ioctl *p
> return r;
> }
>
> - __hash_remove(hc);
> + t = __hash_remove(hc);
> up_write(&_hash_lock);
>
> + if (t) {
> + dm_sync_table(md);
> + dm_table_destroy(t);
> + }
> +
> if (!dm_kobject_uevent(md, KOBJ_REMOVE, param->event_nr))
> param->flags |= DM_UEVENT_GENERATED_FLAG;
>
> @@ -997,6 +1011,7 @@ static int do_resume(struct dm_ioctl *pa
>
> old_map = dm_swap_table(md, new_map);
> if (IS_ERR(old_map)) {
> + dm_sync_table(md);
> dm_table_destroy(new_map);
> dm_put(md);
> return PTR_ERR(old_map);
> @@ -1014,6 +1029,10 @@ static int do_resume(struct dm_ioctl *pa
> param->flags |= DM_UEVENT_GENERATED_FLAG;
> }
>
> + /*
> + * Since dm_swap_table synchronizes RCU, nobody should be in
> + * read-side critical section already.
> + */
> if (old_map)
> dm_table_destroy(old_map);
>
> @@ -1225,7 +1244,7 @@ static int table_load(struct dm_ioctl *p
> {
> int r;
> struct hash_cell *hc;
> - struct dm_table *t;
> + struct dm_table *t, *old_map = NULL;
> struct mapped_device *md;
> struct target_type *immutable_target_type;
>
> @@ -1281,14 +1300,14 @@ static int table_load(struct dm_ioctl *p
> hc = dm_get_mdptr(md);
> if (!hc || hc->md != md) {
> DMWARN("device has been removed from the dev hash table.");
> - dm_table_destroy(t);
> up_write(&_hash_lock);
> + dm_table_destroy(t);
> r = -ENXIO;
> goto out;
> }
>
> if (hc->new_map)
> - dm_table_destroy(hc->new_map);
> + old_map = hc->new_map;
> hc->new_map = t;
> up_write(&_hash_lock);
>
> @@ -1296,6 +1315,11 @@ static int table_load(struct dm_ioctl *p
> __dev_status(md, param);
>
> out:
> + if (old_map) {
> + dm_sync_table(md);
> + dm_table_destroy(old_map);
> + }
> +
> dm_put(md);
>
> return r;
> @@ -1305,6 +1329,7 @@ static int table_clear(struct dm_ioctl *
> {
> struct hash_cell *hc;
> struct mapped_device *md;
> + struct dm_table *old_map = NULL;
>
> down_write(&_hash_lock);
>
> @@ -1316,7 +1341,7 @@ static int table_clear(struct dm_ioctl *
> }
>
> if (hc->new_map) {
> - dm_table_destroy(hc->new_map);
> + old_map = hc->new_map;
> hc->new_map = NULL;
> }
>
> @@ -1325,6 +1350,10 @@ static int table_clear(struct dm_ioctl *
> __dev_status(hc->md, param);
> md = hc->md;
> up_write(&_hash_lock);
> + if (old_map) {
> + dm_sync_table(md);
> + dm_table_destroy(old_map);
> + }
> dm_put(md);
>
> return 0;
> Index: linux-3.3/include/linux/device-mapper.h
> ===================================================================
> --- linux-3.3.orig/include/linux/device-mapper.h 2012-05-10 10:03:06.000000000 +0900
> +++ linux-3.3/include/linux/device-mapper.h 2012-05-10 12:45:48.510196182 +0900
> @@ -364,6 +364,7 @@ int dm_table_complete(struct dm_table *t
> */
> struct dm_table *dm_get_live_table(struct mapped_device *md, int *srcu_idx);
> void dm_put_live_table(struct mapped_device *md, int srcu_idx);
> +void dm_sync_table(struct mapped_device *md);
>
> /*
> * Queries
>
More information about the dm-devel
mailing list