[Cluster-devel] cluster/cmirror-kernel/src dm-cmirror-server.c
jbrassow at sourceware.org
jbrassow at sourceware.org
Wed Apr 4 21:36:02 UTC 2007
CVSROOT: /cvs/cluster
Module name: cluster
Branch: RHEL45
Changes by: jbrassow at sourceware.org 2007-04-04 22:36:02
Modified files:
cmirror-kernel/src: dm-cmirror-server.c
Log message:
Bug 235252: cmirror synchronization deadlocked waiting for response fro...
Moved the check for recovery/write conflict to flush from mark_region
to avoid potential conflicts that were causing writes to indefinitly
hang on failure conditions.
Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror-kernel/src/dm-cmirror-server.c.diff?cvsroot=cluster&only_with_tag=RHEL45&r1=1.1.2.26.2.2&r2=1.1.2.26.2.3
--- cluster/cmirror-kernel/src/Attic/dm-cmirror-server.c 2007/04/03 18:23:01 1.1.2.26.2.2
+++ cluster/cmirror-kernel/src/Attic/dm-cmirror-server.c 2007/04/04 21:36:01 1.1.2.26.2.3
@@ -223,10 +223,13 @@
return count;
}
+struct region_user *find_ru_by_region(struct log_c *lc, region_t region);
static int _core_get_resync_work(struct log_c *lc, region_t *region)
{
+ int sync_search, conflict = 0;
+
if (lc->recovering_region != (uint64_t)-1) {
- DMDEBUG("Someone is already recovering (%Lu)", lc->recovering_region);
+ DMDEBUG("Someone is already recovering region %Lu", lc->recovering_region);
return 0;
}
@@ -242,16 +245,27 @@
return 0;
}
}
- *region = ext2_find_next_zero_bit((unsigned long *) lc->sync_bits,
- lc->region_count,
- lc->sync_search);
- lc->sync_search = *region + 1;
+ for (sync_search = lc->sync_search;
+ sync_search < lc->region_count;
+ sync_search = (*region + 1)) {
+ *region = ext2_find_next_zero_bit((unsigned long *) lc->sync_bits,
+ lc->region_count,
+ sync_search);
+ if (find_ru_by_region(lc, *region)) {
+ conflict = 1;
+ DMDEBUG("Recovery blocked by outstanding write on region %Lu",
+ *region);
+ } else {
+ break;
+ }
+ }
+ if (!conflict)
+ lc->sync_search = *region + 1;
if (*region >= lc->region_count)
return 0;
lc->recovering_region = *region;
- DMDEBUG("Assigning recovery work: %Lu", *region);
return 1;
}
@@ -374,6 +388,8 @@
bad_count++;
log_clear_bit(lc, lc->sync_bits, ru->ru_region);
if (ru->ru_rw == RU_RECOVER) {
+ DMINFO("Failed node was recovering region %Lu - cleared",
+ ru->ru_region);
lc->recovering_region = (uint64_t)-1;
}
list_del(&ru->ru_list);
@@ -523,14 +539,19 @@
log_clear_bit(lc, lc->clean_bits, lr->u.lr_region);
list_add(&new->ru_list, &lc->region_users);
} else if (ru->ru_rw == RU_RECOVER) {
+ /*
+ * The flush will block if a write conflicts with a
+ * recovering region. In the meantime, we add this
+ * entry to the tail of the list so the recovery
+ * gets cleared first.
+ */
DMDEBUG("Attempt to mark a region " SECTOR_FORMAT
"/%s which is being recovered.",
lr->u.lr_region, lc->uuid + (strlen(lc->uuid) - 8));
DMDEBUG("Current recoverer: %u", ru->ru_nodeid);
DMDEBUG("Mark requester : %u", who);
-
- mempool_free(new, region_user_pool);
- return -EBUSY;
+ log_clear_bit(lc, lc->clean_bits, lr->u.lr_region);
+ list_add_tail(&new->ru_list, &lc->region_users);
} else if (!find_ru(lc, who, lr->u.lr_region)) {
list_add(&new->ru_list, &ru->ru_list);
} else {
@@ -569,6 +590,34 @@
static int server_flush(struct log_c *lc)
{
int r = 0;
+ int count = 0;
+ struct region_user *ru, *ru2;
+
+ if (lc->recovering_region != (uint64_t)-1) {
+ list_for_each_entry(ru, &lc->region_users, ru_list)
+ if (ru->ru_region == lc->recovering_region)
+ count++;
+
+ if (count > 1) {
+ list_for_each_entry(ru, &lc->region_users, ru_list)
+ if (ru->ru_rw == RU_RECOVER)
+ break;
+
+ DMDEBUG("Flush includes region which is being recovered (%u/%Lu). Delaying...",
+ ru->ru_nodeid, ru->ru_region);
+ DMDEBUG("Recovering region: %Lu", lc->recovering_region);
+ DMDEBUG(" sync_bit: %s, clean_bit: %s",
+ log_test_bit(lc->sync_bits, lc->recovering_region) ? "set" : "unset",
+ log_test_bit(lc->clean_bits, lc->recovering_region) ? "set" : "unset");
+
+ list_for_each_entry(ru2, &lc->region_users, ru_list)
+ if (ru->ru_region == ru2->ru_region)
+ DMDEBUG(" %s", (ru2->ru_rw == RU_RECOVER) ? "recover" :
+ (ru2->ru_rw == RU_WRITE) ? "writer" : "unknown");
+
+ return -EBUSY;
+ }
+ }
r = write_bits(lc);
if (!r) {
@@ -597,6 +646,7 @@
new->ru_region = lr->u.lr_region_rtn;
new->ru_rw = RU_RECOVER;
list_add(&new->ru_list, &lc->region_users);
+ DMDEBUG("Assigning recovery work to %u: %Lu", who, new->ru_region);
} else {
mempool_free(new, region_user_pool);
}
@@ -624,6 +674,9 @@
log_set_bit(lc, lc->sync_bits, lr->u.lr_region);
lc->sync_count++;
}
+ lc->sync_pass = 0;
+
+ DMDEBUG("Resync work completed: %Lu", lr->u.lr_region);
} else if (log_test_bit(lc->sync_bits, lr->u.lr_region)) {
/* gone again: lc->sync_count--;*/
log_clear_bit(lc, lc->sync_bits, lr->u.lr_region);
More information about the Cluster-devel
mailing list