[dm-devel] Re: 2.6.28.2 & dm-snapshot or kcopyd Oops

Mikulas Patocka mpatocka at redhat.com
Fri Feb 13 07:31:42 UTC 2009


On Fri, 13 Feb 2009, Mikulas Patocka wrote:

> Hi
> 
> Few more questions:
> - do you write to the snapshots?
> - do you create/delete snapshots while the crash happens?
> - do all the snapshots have the same chunk size?
> - how many snapshots do you have?

another question: do you resize the origin device? or the snapshots?

One more test patch. (the code looks so terrible that I am thinking about 
submitting you a patch that erases this logic all and reimplements it).

Mikulas

---
 drivers/md/dm-exception-store.c |    2 +
 drivers/md/dm-snap.c            |   52 ++++++++++++++++++++++++++++++++++------
 2 files changed, 47 insertions(+), 7 deletions(-)

Index: linux-2.6.28-clean/drivers/md/dm-snap.c
===================================================================
--- linux-2.6.28-clean.orig/drivers/md/dm-snap.c	2009-02-13 07:49:49.000000000 +0100
+++ linux-2.6.28-clean/drivers/md/dm-snap.c	2009-02-13 08:26:54.000000000 +0100
@@ -90,6 +90,8 @@ struct dm_snap_pending_exception {
 	int started;
 
 	struct list_head list_all;
+
+	int magic;
 };
 
 /*
@@ -378,10 +380,13 @@ static struct dm_snap_pending_exception 
 
 	atomic_inc(&s->pending_exceptions_count);
 	pe->snap = s;
+	pe->primary_pe = NULL;
+	pe->magic = 0x12345678;
 
 	spin_lock(&pending_spinlock);
 	list_for_each_entry(pe2, &pending_all, list_all) {
 		BUG_ON(pe2 == pe);
+		BUG_ON(pe2->primary_pe == pe);
 	}
 	list_add(&pe->list_all, &pending_all);
 	spin_unlock(&pending_spinlock);
@@ -400,13 +405,21 @@ static void free_pending_exception(struc
 
 	spin_lock(&pending_spinlock);
 	list_for_each_entry(pe2, &pending_all, list_all) {
+		if (pe2->primary_pe == pe && pe2 != pe) {
+			printk(KERN_ALERT "Freeing referenced exception, pe %p, pe->magic %x, pe2 %p, pe2->magic %x, pe2->primary_pe, %p, pe->ref_count %d\n", pe, pe->magic, pe2, pe2->magic, pe2->primary_pe, atomic_read(&pe->ref_count));
+			BUG();
+		}
+	}
+	list_for_each_entry(pe2, &pending_all, list_all) {
 		if (pe2 == pe) goto found;
 	}
+	printk(KERN_ALERT "Freeing free exception pe %p, magic %x, pe->primary_pe %p, pe->ref_count %d\n", pe, pe->magic, pe->primary_pe, atomic_read(&pe->ref_count));
 	BUG();
 	found:
 	list_del(&pe->list_all);
 	spin_unlock(&pending_spinlock);
 
+	pe->magic = 0x90abcdef;
 	mempool_free(pe, s->pending_pool);
 	smp_mb__before_atomic_dec();
 	atomic_dec(&s->pending_exceptions_count);
@@ -862,6 +875,13 @@ static struct bio *put_pending_exception
 
 	primary_pe = pe->primary_pe;
 
+	if (primary_pe) {
+		if (atomic_read(&primary_pe->ref_count) <= 0 || primary_pe->magic != 0x12345678) {
+			printk(KERN_ALERT "Bad ref count, pe %p, pe->magic %x, primary_pe %p, primary_pe->magic %x, primary_pe->ref_count %d\n", pe, pe->magic, pe->primary_pe, primary_pe->magic, atomic_read(&primary_pe->ref_count));
+			BUG();
+		}
+	}
+
 	/*
 	 * If this pe is involved in a write to the origin and
 	 * it is the last sibling to complete then release
@@ -1313,11 +1333,19 @@ static int __origin_write(struct list_he
 			 * Either every pe here has same
 			 * primary_pe or none has one yet.
 			 */
-			if (pe->primary_pe)
+			if (pe->primary_pe) {
 				primary_pe = pe->primary_pe;
-			else {
+				if (atomic_read(&primary_pe->ref_count) <= 0 || primary_pe->magic != 0x12345678) {
+					printk(KERN_ALERT "Bad ref count, pe %p, pe->magic %x, primary_pe %p, primary_pe->magic %x, primary_pe->ref_count %d\n", pe, pe->magic, primary_pe, primary_pe->magic, atomic_read(&primary_pe->ref_count));
+					BUG();
+				}
+			} else {
 				primary_pe = pe;
 				first = 1;
+				if (atomic_read(&primary_pe->ref_count) <= 0 || primary_pe->magic != 0x12345678) {
+					printk(KERN_ALERT "Bad ref count, pe %p, pe->magic %x, primary_pe %p, primary_pe->magic %x, primary_pe->ref_count %d\n", pe, pe->magic, pe->primary_pe, primary_pe->magic, atomic_read(&primary_pe->ref_count));
+					BUG();
+				}
 			}
 
 			bio_list_add(&primary_pe->origin_bios, bio);
@@ -1327,6 +1355,10 @@ static int __origin_write(struct list_he
 
 		if (!pe->primary_pe) {
 			pe->primary_pe = primary_pe;
+			if (atomic_read(&primary_pe->ref_count) <= 0 || primary_pe->magic != 0x12345678) {
+				printk(KERN_ALERT "Bad ref count, pe %p, pe->magic %x, primary_pe %p, primary_pe->magic %x, primary_pe->ref_count %d\n", pe, pe->magic, primary_pe, primary_pe->magic, atomic_read(&primary_pe->ref_count));
+				BUG();
+			}
 			get_pending_exception(primary_pe);
 		}
 
@@ -1350,11 +1382,17 @@ static int __origin_write(struct list_he
 	 * us here to remove the primary_pe and submit any origin_bios.
 	 */
 
-	if (first && atomic_dec_and_test(&primary_pe->ref_count)) {
-		flush_bios(bio_list_get(&primary_pe->origin_bios));
-		free_pending_exception(primary_pe);
-		/* If we got here, pe_queue is necessarily empty. */
-		return r;
+	if (first) {
+		if (atomic_read(&primary_pe->ref_count) <= 0 || primary_pe->magic != 0x12345678) {
+			printk(KERN_ALERT "Bad ref count, primary_pe %p, primary_pe->magic %x, primary_pe->primary_pe %p, primary_pe->ref_count %d\n", primary_pe, primary_pe->magic, primary_pe->primary_pe, atomic_read(&primary_pe->ref_count));
+			BUG();
+		}
+		if (atomic_dec_and_test(&primary_pe->ref_count)) {
+			flush_bios(bio_list_get(&primary_pe->origin_bios));
+			free_pending_exception(primary_pe);
+			/* If we got here, pe_queue is necessarily empty. */
+			return r;
+		}
 	}
 
 	/*
Index: linux-2.6.28-clean/drivers/md/dm-exception-store.c
===================================================================
--- linux-2.6.28-clean.orig/drivers/md/dm-exception-store.c	2009-02-13 08:20:45.000000000 +0100
+++ linux-2.6.28-clean/drivers/md/dm-exception-store.c	2009-02-13 08:20:50.000000000 +0100
@@ -621,6 +621,8 @@ struct dm_snap_pending_exception {
 	int started;
 
 	struct list_head list_all;
+
+	int magic;
 };
 
 static DEFINE_SPINLOCK(callback_spinlock);




More information about the dm-devel mailing list