[dm-devel] Re: 2.6.28.2 & dm-snapshot or kcopyd Oops
Mikulas Patocka
mpatocka at redhat.com
Fri Feb 13 07:31:42 UTC 2009
On Fri, 13 Feb 2009, Mikulas Patocka wrote:
> Hi
>
> Few more questions:
> - do you write to the snapshots?
> - do you create/delete snapshots while the crash happens?
> - do all the snapshots have the same chunk size?
> - how many snapshots do you have?
another question: do you resize the origin device? or the snapshots?
One more test patch. (the code looks so terrible that I am thinking about
submitting you a patch that erases this logic all and reimplements it).
Mikulas
---
drivers/md/dm-exception-store.c | 2 +
drivers/md/dm-snap.c | 52 ++++++++++++++++++++++++++++++++++------
2 files changed, 47 insertions(+), 7 deletions(-)
Index: linux-2.6.28-clean/drivers/md/dm-snap.c
===================================================================
--- linux-2.6.28-clean.orig/drivers/md/dm-snap.c 2009-02-13 07:49:49.000000000 +0100
+++ linux-2.6.28-clean/drivers/md/dm-snap.c 2009-02-13 08:26:54.000000000 +0100
@@ -90,6 +90,8 @@ struct dm_snap_pending_exception {
int started;
struct list_head list_all;
+
+ int magic;
};
/*
@@ -378,10 +380,13 @@ static struct dm_snap_pending_exception
atomic_inc(&s->pending_exceptions_count);
pe->snap = s;
+ pe->primary_pe = NULL;
+ pe->magic = 0x12345678;
spin_lock(&pending_spinlock);
list_for_each_entry(pe2, &pending_all, list_all) {
BUG_ON(pe2 == pe);
+ BUG_ON(pe2->primary_pe == pe);
}
list_add(&pe->list_all, &pending_all);
spin_unlock(&pending_spinlock);
@@ -400,13 +405,21 @@ static void free_pending_exception(struc
spin_lock(&pending_spinlock);
list_for_each_entry(pe2, &pending_all, list_all) {
+ if (pe2->primary_pe == pe && pe2 != pe) {
+ printk(KERN_ALERT "Freeing referenced exception, pe %p, pe->magic %x, pe2 %p, pe2->magic %x, pe2->primary_pe, %p, pe->ref_count %d\n", pe, pe->magic, pe2, pe2->magic, pe2->primary_pe, atomic_read(&pe->ref_count));
+ BUG();
+ }
+ }
+ list_for_each_entry(pe2, &pending_all, list_all) {
if (pe2 == pe) goto found;
}
+ printk(KERN_ALERT "Freeing free exception pe %p, magic %x, pe->primary_pe %p, pe->ref_count %d\n", pe, pe->magic, pe->primary_pe, atomic_read(&pe->ref_count));
BUG();
found:
list_del(&pe->list_all);
spin_unlock(&pending_spinlock);
+ pe->magic = 0x90abcdef;
mempool_free(pe, s->pending_pool);
smp_mb__before_atomic_dec();
atomic_dec(&s->pending_exceptions_count);
@@ -862,6 +875,13 @@ static struct bio *put_pending_exception
primary_pe = pe->primary_pe;
+ if (primary_pe) {
+ if (atomic_read(&primary_pe->ref_count) <= 0 || primary_pe->magic != 0x12345678) {
+ printk(KERN_ALERT "Bad ref count, pe %p, pe->magic %x, primary_pe %p, primary_pe->magic %x, primary_pe->ref_count %d\n", pe, pe->magic, pe->primary_pe, primary_pe->magic, atomic_read(&primary_pe->ref_count));
+ BUG();
+ }
+ }
+
/*
* If this pe is involved in a write to the origin and
* it is the last sibling to complete then release
@@ -1313,11 +1333,19 @@ static int __origin_write(struct list_he
* Either every pe here has same
* primary_pe or none has one yet.
*/
- if (pe->primary_pe)
+ if (pe->primary_pe) {
primary_pe = pe->primary_pe;
- else {
+ if (atomic_read(&primary_pe->ref_count) <= 0 || primary_pe->magic != 0x12345678) {
+ printk(KERN_ALERT "Bad ref count, pe %p, pe->magic %x, primary_pe %p, primary_pe->magic %x, primary_pe->ref_count %d\n", pe, pe->magic, primary_pe, primary_pe->magic, atomic_read(&primary_pe->ref_count));
+ BUG();
+ }
+ } else {
primary_pe = pe;
first = 1;
+ if (atomic_read(&primary_pe->ref_count) <= 0 || primary_pe->magic != 0x12345678) {
+ printk(KERN_ALERT "Bad ref count, pe %p, pe->magic %x, primary_pe %p, primary_pe->magic %x, primary_pe->ref_count %d\n", pe, pe->magic, pe->primary_pe, primary_pe->magic, atomic_read(&primary_pe->ref_count));
+ BUG();
+ }
}
bio_list_add(&primary_pe->origin_bios, bio);
@@ -1327,6 +1355,10 @@ static int __origin_write(struct list_he
if (!pe->primary_pe) {
pe->primary_pe = primary_pe;
+ if (atomic_read(&primary_pe->ref_count) <= 0 || primary_pe->magic != 0x12345678) {
+ printk(KERN_ALERT "Bad ref count, pe %p, pe->magic %x, primary_pe %p, primary_pe->magic %x, primary_pe->ref_count %d\n", pe, pe->magic, primary_pe, primary_pe->magic, atomic_read(&primary_pe->ref_count));
+ BUG();
+ }
get_pending_exception(primary_pe);
}
@@ -1350,11 +1382,17 @@ static int __origin_write(struct list_he
* us here to remove the primary_pe and submit any origin_bios.
*/
- if (first && atomic_dec_and_test(&primary_pe->ref_count)) {
- flush_bios(bio_list_get(&primary_pe->origin_bios));
- free_pending_exception(primary_pe);
- /* If we got here, pe_queue is necessarily empty. */
- return r;
+ if (first) {
+ if (atomic_read(&primary_pe->ref_count) <= 0 || primary_pe->magic != 0x12345678) {
+ printk(KERN_ALERT "Bad ref count, primary_pe %p, primary_pe->magic %x, primary_pe->primary_pe %p, primary_pe->ref_count %d\n", primary_pe, primary_pe->magic, primary_pe->primary_pe, atomic_read(&primary_pe->ref_count));
+ BUG();
+ }
+ if (atomic_dec_and_test(&primary_pe->ref_count)) {
+ flush_bios(bio_list_get(&primary_pe->origin_bios));
+ free_pending_exception(primary_pe);
+ /* If we got here, pe_queue is necessarily empty. */
+ return r;
+ }
}
/*
Index: linux-2.6.28-clean/drivers/md/dm-exception-store.c
===================================================================
--- linux-2.6.28-clean.orig/drivers/md/dm-exception-store.c 2009-02-13 08:20:45.000000000 +0100
+++ linux-2.6.28-clean/drivers/md/dm-exception-store.c 2009-02-13 08:20:50.000000000 +0100
@@ -621,6 +621,8 @@ struct dm_snap_pending_exception {
int started;
struct list_head list_all;
+
+ int magic;
};
static DEFINE_SPINLOCK(callback_spinlock);
More information about the dm-devel
mailing list