[dm-devel] dmcache RAID1 bug?
Leonardo Santos
heiligerstein at gmail.com
Wed Nov 19 21:39:44 UTC 2014
After run regression tests using 'git bisect', I identify that the error
was created after commit 8c081b52c6833a30a69ea3bdcef316eccc740c87
To reproduce this error just:
- create a raid1 HDDs for origin device;
- create a cache device with SSDs (could or not be raid)
- create a metadata device with SSDs (could or not be raid)
- create a cache device using prior devices
- kernel crashes.
I attached the bitseclog and diff file!
On Tue, Nov 18, 2014 at 4:18 PM, Leonardo Santos <heiligerstein at gmail.com>
wrote:
> I tested updating my xbox to same production kernel (3.17.2) and the
> problem appears now.
>
> The old kernel (ok) is 3.13.0-37.
>
> About updating lvm, I think it should not be, because this time I did not
> use them and did creating only through the dmsetup command.
>
> I will continue testing with other kernels.
>
> Can anyone help me?
>
>
> On Mon, Nov 17, 2014 at 4:14 PM, Leonardo Santos <heiligerstein at gmail.com>
> wrote:
>
>> On Mon, Nov 17, 2014 at 12:46 PM, Mike Snitzer <snitzer at redhat.com>
>> wrote:
>>
>>> On Mon, Nov 17 2014 at 9:24am -0500,
>>> Leonardo Santos <heiligerstein at gmail.com> wrote:
>>>
>>> > I'm working in tests for my msc thesis envolving dm_cache module. I'm
>>> > having problems with the following scenario:
>>> >
>>> > Slowstorage: RAID1 (mdadm) with 4 HDDs;
>>> > - /dev/md/raid1_4ssds
>>>
>>> You mean /dev/md/raid1_4hdds ?
>>>
>>
>> A symlink to raid1 created using 4 ssds.
>>
>> mdadm --create --verbose /dev/md/raid0_4ssds --raid-devices=4 --force
>> --level=1 /dev/sdd /dev/sdc /dev/sdb /dev/sda
>>
>>
>>>
>>> > -- /dev/sda
>>> > -- /dev/sdb
>>> > -- /dev/sdc
>>> > -- /dev/sdd
>>> >
>>> > Faststorage: RAID1 (mdadm) with 4 SSDs
>>> > - /dev/md/raid1_4hdds
>>>
>>> And /dev/md/raid1_4ssds here?
>>>
>>
>> mdadm --create --verbose /dev/md/raid0_4hdds --raid-devices=4 --force
>> --level=1 /dev/sdh /dev/sdg /dev/sdf /dev/sde
>>
>>
>>>
>>> > -- /dev/sde
>>> > -- /dev/sdf
>>> > -- /dev/sdg
>>> > -- /dev/sdh
>>> >
>>> >
>>> > Cachestorage: dm_cache envolving both devices
>>> > - /dev/mapper/dmcache
>>> > -- /dev/md/raid1_4ssds
>>> > -- /dev/md/raid1_4hdds
>>> >
>>>
>>
>> Commands:
>> vgcreate cache /dev/md/raid0_4ssds -y
>> lvcreate cache -n metadata -l 5%FREE
>> lvcreate cache -n block -l 100%FREE
>> dd if=/dev/zero of=/dev/cache/metadata bs=1M count=100
>> # 16773120 = blockdev --getsize /dev/md/raid0_4hdds
>> dmsetup create dmcache --table '0 16773120 cache /dev/cache/metadata
>> /dev/cache/block /dev/md/raid0_4hdds 512 1 writeback default 0'
>>
>>
>>> > Other informations (strange facts):
>>> > In RAID0 it's working very well.
>>> > In a virtual machine (vbox) it's working as well (in this case disks
>>> have
>>> > 2GB only for big tests automatization, in production are 160GB SSDs
>>> and 2TB
>>> > HDDs);
>>> >
>>> > Have you seen about this error? Syslog is bellow.
>>>
>>> Never seen this before... have you taken care to zero the dm-cache
>>> metadata's superblock (first 512B of the cache metadata device) each
>>> time you've reconfigured the cache device?
>>>
>>> Have you tried using the latest lvm2 to create the dm-cache device?
>>>
>>
>> I've used LVM2 from distribution (debian) and a latest kernel version.
>>
>> LVM2 latest today is 2.02.112 (I'll try with latest LVM2 version and post
>> results).
>>
>> # lvm version
>> LVM version: 2.02.95(2) (2012-03-06)
>> Library version: 1.02.74 (2012-03-06)
>> Driver version: 4.27.0
>>
>> # uname -a
>> Linux steinhager 3.17.2 #2 SMP Fri Nov 7 12:49:16 BRST 2014 x86_64
>> GNU/Linux
>>
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://listman.redhat.com/archives/dm-devel/attachments/20141119/cccc9ba5/attachment.htm>
-------------- next part --------------
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 2c63326..2a156af 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -718,6 +718,22 @@ static int bio_triggers_commit(struct cache *cache, struct bio *bio)
return bio->bi_rw & (REQ_FLUSH | REQ_FUA);
}
+/*
+ * You must increment the deferred set whilst the prison cell is held. To
+ * encourage this, we ask for 'cell' to be passed in.
+ */
+static void inc_ds(struct cache *cache, struct bio *bio,
+ struct dm_bio_prison_cell *cell)
+{
+ size_t pb_data_size = get_per_bio_data_size(cache);
+ struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
+
+ BUG_ON(!cell);
+ BUG_ON(pb->all_io_entry);
+
+ pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
+}
+
static void issue(struct cache *cache, struct bio *bio)
{
unsigned long flags;
@@ -737,6 +753,12 @@ static void issue(struct cache *cache, struct bio *bio)
spin_unlock_irqrestore(&cache->lock, flags);
}
+static void inc_and_issue(struct cache *cache, struct bio *bio, struct dm_bio_prison_cell *cell)
+{
+ inc_ds(cache, bio, cell);
+ issue(cache, bio);
+}
+
static void defer_writethrough_bio(struct cache *cache, struct bio *bio)
{
unsigned long flags;
@@ -1015,6 +1037,11 @@ static void issue_overwrite(struct dm_cache_migration *mg, struct bio *bio)
dm_hook_bio(&pb->hook_info, bio, overwrite_endio, mg);
remap_to_cache_dirty(mg->cache, bio, mg->new_oblock, mg->cblock);
+
+ /*
+ * No need to inc_ds() here, since the cell will be held for the
+ * duration of the io.
+ */
generic_make_request(bio);
}
@@ -1115,8 +1142,7 @@ static void check_for_quiesced_migrations(struct cache *cache,
return;
INIT_LIST_HEAD(&work);
- if (pb->all_io_entry)
- dm_deferred_entry_dec(pb->all_io_entry, &work);
+ dm_deferred_entry_dec(pb->all_io_entry, &work);
if (!list_empty(&work))
queue_quiesced_migrations(cache, &work);
@@ -1252,6 +1278,11 @@ static void process_flush_bio(struct cache *cache, struct bio *bio)
else
remap_to_cache(cache, bio, 0);
+ /*
+ * REQ_FLUSH is not directed at any particular block so we don't
+ * need to inc_ds(). REQ_FUA's are split into a write + REQ_FLUSH
+ * by dm-core.
+ */
issue(cache, bio);
}
@@ -1301,15 +1332,6 @@ static void inc_miss_counter(struct cache *cache, struct bio *bio)
&cache->stats.read_miss : &cache->stats.write_miss);
}
-static void issue_cache_bio(struct cache *cache, struct bio *bio,
- struct per_bio_data *pb,
- dm_oblock_t oblock, dm_cblock_t cblock)
-{
- pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
- remap_to_cache_dirty(cache, bio, oblock, cblock);
- issue(cache, bio);
-}
-
static void process_bio(struct cache *cache, struct prealloc *structs,
struct bio *bio)
{
@@ -1318,8 +1340,6 @@ static void process_bio(struct cache *cache, struct prealloc *structs,
dm_oblock_t block = get_bio_block(cache, bio);
struct dm_bio_prison_cell *cell_prealloc, *old_ocell, *new_ocell;
struct policy_result lookup_result;
- size_t pb_data_size = get_per_bio_data_size(cache);
- struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
bool discarded_block = is_discarded_oblock(cache, block);
bool passthrough = passthrough_mode(&cache->features);
bool can_migrate = !passthrough && (discarded_block || spare_migration_bandwidth(cache));
@@ -1359,9 +1379,8 @@ static void process_bio(struct cache *cache, struct prealloc *structs,
} else {
/* FIXME: factor out issue_origin() */
- pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
remap_to_origin_clear_discard(cache, bio, block);
- issue(cache, bio);
+ inc_and_issue(cache, bio, new_ocell);
}
} else {
inc_hit_counter(cache, bio);
@@ -1369,20 +1388,21 @@ static void process_bio(struct cache *cache, struct prealloc *structs,
if (bio_data_dir(bio) == WRITE &&
writethrough_mode(&cache->features) &&
!is_dirty(cache, lookup_result.cblock)) {
- pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock);
- issue(cache, bio);
- } else
- issue_cache_bio(cache, bio, pb, block, lookup_result.cblock);
+ inc_and_issue(cache, bio, new_ocell);
+
+ } else {
+ remap_to_cache_dirty(cache, bio, block, lookup_result.cblock);
+ inc_and_issue(cache, bio, new_ocell);
+ }
}
break;
case POLICY_MISS:
inc_miss_counter(cache, bio);
- pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
remap_to_origin_clear_discard(cache, bio, block);
- issue(cache, bio);
+ inc_and_issue(cache, bio, new_ocell);
break;
case POLICY_NEW:
@@ -1501,6 +1521,9 @@ static void process_deferred_flush_bios(struct cache *cache, bool submit_bios)
bio_list_init(&cache->deferred_flush_bios);
spin_unlock_irqrestore(&cache->lock, flags);
+ /*
+ * These bios have already been through inc_ds()
+ */
while ((bio = bio_list_pop(&bios)))
submit_bios ? generic_make_request(bio) : bio_io_error(bio);
}
@@ -1518,6 +1541,9 @@ static void process_deferred_writethrough_bios(struct cache *cache)
bio_list_init(&cache->deferred_writethrough_bios);
spin_unlock_irqrestore(&cache->lock, flags);
+ /*
+ * These bios have already been through inc_ds()
+ */
while ((bio = bio_list_pop(&bios)))
generic_make_request(bio);
}
@@ -2406,16 +2432,13 @@ out:
return r;
}
-static int cache_map(struct dm_target *ti, struct bio *bio)
+static int __cache_map(struct cache *cache, struct bio *bio, struct dm_bio_prison_cell **cell)
{
- struct cache *cache = ti->private;
-
int r;
dm_oblock_t block = get_bio_block(cache, bio);
size_t pb_data_size = get_per_bio_data_size(cache);
bool can_migrate = false;
bool discarded_block;
- struct dm_bio_prison_cell *cell;
struct policy_result lookup_result;
struct per_bio_data *pb = init_per_bio_data(bio, pb_data_size);
@@ -2437,15 +2460,15 @@ static int cache_map(struct dm_target *ti, struct bio *bio)
/*
* Check to see if that block is currently migrating.
*/
- cell = alloc_prison_cell(cache);
- if (!cell) {
+ *cell = alloc_prison_cell(cache);
+ if (!*cell) {
defer_bio(cache, bio);
return DM_MAPIO_SUBMITTED;
}
- r = bio_detain(cache, block, bio, cell,
+ r = bio_detain(cache, block, bio, *cell,
(cell_free_fn) free_prison_cell,
- cache, &cell);
+ cache, cell);
if (r) {
if (r < 0)
defer_bio(cache, bio);
@@ -2458,11 +2481,12 @@ static int cache_map(struct dm_target *ti, struct bio *bio)
r = policy_map(cache->policy, block, false, can_migrate, discarded_block,
bio, &lookup_result);
if (r == -EWOULDBLOCK) {
- cell_defer(cache, cell, true);
+ cell_defer(cache, *cell, true);
return DM_MAPIO_SUBMITTED;
} else if (r) {
DMERR_LIMIT("Unexpected return from cache replacement policy: %d", r);
+ cell_defer(cache, *cell, false);
bio_io_error(bio);
return DM_MAPIO_SUBMITTED;
}
@@ -2476,52 +2500,44 @@ static int cache_map(struct dm_target *ti, struct bio *bio)
* We need to invalidate this block, so
* defer for the worker thread.
*/
- cell_defer(cache, cell, true);
+ cell_defer(cache, *cell, true);
r = DM_MAPIO_SUBMITTED;
} else {
- pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
inc_miss_counter(cache, bio);
remap_to_origin_clear_discard(cache, bio, block);
-
- cell_defer(cache, cell, false);
}
} else {
inc_hit_counter(cache, bio);
- pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
-
if (bio_data_dir(bio) == WRITE && writethrough_mode(&cache->features) &&
!is_dirty(cache, lookup_result.cblock))
remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock);
else
remap_to_cache_dirty(cache, bio, block, lookup_result.cblock);
-
- cell_defer(cache, cell, false);
}
break;
case POLICY_MISS:
inc_miss_counter(cache, bio);
- pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
-
if (pb->req_nr != 0) {
/*
* This is a duplicate writethrough io that is no
* longer needed because the block has been demoted.
*/
bio_endio(bio, 0);
- cell_defer(cache, cell, false);
- return DM_MAPIO_SUBMITTED;
- } else {
+ cell_defer(cache, *cell, false);
+ r = DM_MAPIO_SUBMITTED;
+
+ } else
remap_to_origin_clear_discard(cache, bio, block);
- cell_defer(cache, cell, false);
- }
+
break;
default:
DMERR_LIMIT("%s: erroring bio: unknown policy op: %u", __func__,
(unsigned) lookup_result.op);
+ cell_defer(cache, *cell, false);
bio_io_error(bio);
r = DM_MAPIO_SUBMITTED;
}
@@ -2529,6 +2545,21 @@ static int cache_map(struct dm_target *ti, struct bio *bio)
return r;
}
+static int cache_map(struct dm_target *ti, struct bio *bio)
+{
+ int r;
+ struct dm_bio_prison_cell *cell;
+ struct cache *cache = ti->private;
+
+ r = __cache_map(cache, bio, &cell);
+ if (r == DM_MAPIO_REMAPPED) {
+ inc_ds(cache, bio, cell);
+ cell_defer(cache, cell, false);
+ }
+
+ return r;
+}
+
static int cache_end_io(struct dm_target *ti, struct bio *bio, int error)
{
struct cache *cache = ti->private;
@@ -3072,7 +3103,7 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)
static struct target_type cache_target = {
.name = "cache",
- .version = {1, 4, 0},
+ .version = {1, 5, 0},
.module = THIS_MODULE,
.ctr = cache_ctr,
.dtr = cache_dtr,
-------------- next part --------------
A non-text attachment was scrubbed...
Name: bisectlog
Type: application/octet-stream
Size: 3423 bytes
Desc: not available
URL: <http://listman.redhat.com/archives/dm-devel/attachments/20141119/cccc9ba5/attachment.obj>
More information about the dm-devel
mailing list