[Cluster-devel] cluster/group/daemon app.c

teigland at sourceware.org teigland at sourceware.org
Thu Aug 31 18:20:51 UTC 2006


CVSROOT:	/cvs/cluster
Module name:	cluster
Changes by:	teigland at sourceware.org	2006-08-31 18:20:51

Modified files:
	group/daemon   : app.c 

Log message:
	when we set a recovery event back to the FAIL_BEGIN state, make
	sure that we process the event once before processing any new
	messages.  this is probably a better fix for bz 202635 than I
	added previously where we accept messages more liberally i.e. in
	X_BEGIN states.

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/group/daemon/app.c.diff?cvsroot=cluster&r1=1.41&r2=1.42

--- cluster/group/daemon/app.c	2006/08/22 14:36:20	1.41
+++ cluster/group/daemon/app.c	2006/08/31 18:20:51	1.42
@@ -1316,6 +1316,7 @@
 	event_t *ev, *rev;
 	node_t *node;
 	struct nodeid *id, *safe;
+	int rv = 0;
 
 	ev = a->current_event;
 	if (!ev)
@@ -1361,7 +1362,7 @@
 
 		list_del(&rev->list);
 		free_event(rev);
-		return 0;
+		return 1;
 	}
 
 	/* Before starting the rev we need to apply the node addition/removal
@@ -1383,7 +1384,7 @@
 		list_del(&rev->list);
 		a->current_event = rev;
 		free_event(ev);
-
+		rv = 1;
 	} else if (event_state_stopping(a)) {
 
 		/* We'll come back through here multiple times until all the
@@ -1397,9 +1398,7 @@
 		mark_node_stopped(a, rev->nodeid);
 		list_for_each_entry(id, &rev->extended, list)
 			mark_node_stopped(a, id->nodeid);
-
-		process_current_event(g);
-
+		rv = 1;
 	} else {
 		log_group(g, "rev for %d delayed for ev %d %s",
 			  rev->nodeid, ev->nodeid, ev_state_str(ev));
@@ -1411,7 +1410,7 @@
 	/* FIXME: if the current event is a leave and the leaving node has
 	   failed, then replace the current event with the rev */
 
-	return 0;
+	return rv;
 }
 
 static int process_app(group_t *g)
@@ -1428,7 +1427,18 @@
 			goto out;
 		rv += ret;
 
-		rv += recover_current_event(g);
+		ret = recover_current_event(g);
+		if (ret > 0) {
+			rv += ret;
+
+			/* it's important that we call process_current_event()
+			   when recover_current_event() returns 1 */
+
+			ret = process_current_event(g);
+			if (ret < 0)
+				goto out;
+			rv += ret;
+		}
 	} else {
 		/* We only take on a new non-recovery event if there are
 		   no recovery sets outstanding.  The new event may be




More information about the Cluster-devel mailing list