[Cluster-devel] cluster/group/daemon joinleave.c gd_internal.h ...

teigland at sourceware.org teigland at sourceware.org
Wed Jun 28 22:16:40 UTC 2006


CVSROOT:	/cvs/cluster
Module name:	cluster
Changes by:	teigland at sourceware.org	2006-06-28 22:16:37

Modified files:
	group/daemon   : joinleave.c gd_internal.h app.c 

Log message:
	- extra checking and debugging when events get backlogged
	- prevent joins while we're still leaving and leaves while
	we're still joining

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/group/daemon/joinleave.c.diff?cvsroot=cluster&r1=1.16&r2=1.17
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/group/daemon/gd_internal.h.diff?cvsroot=cluster&r1=1.35&r2=1.36
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/group/daemon/app.c.diff?cvsroot=cluster&r1=1.33&r2=1.34

--- cluster/group/daemon/joinleave.c	2006/06/21 20:43:54	1.16
+++ cluster/group/daemon/joinleave.c	2006/06/28 22:16:36	1.17
@@ -93,7 +93,7 @@
 	a->client = ci;
 
 	log_debug("%d:%s got join", level, name);
-
+	g->joining = 1;
 	rv = do_cpg_join(g);
  out:
 	return rv;
@@ -102,6 +102,7 @@
 int do_leave(char *name, int level)
 {
 	group_t *g;
+	event_t *ev;
 	int rv;
 
 	g = find_group_level(name, level);
@@ -113,8 +114,30 @@
 		return -EINVAL;
 	}
 
-	log_debug("%d:%s got leave", level, name);
+	if (g->joining) {
+		log_group(g, "leave: still joining");
+		return -EAGAIN;
+	}
+
+	if (g->leaving) {
+		log_group(g, "leave: already leaving");
+		return -EBUSY;
+	}
 
+	if (g->app->current_event &&
+	    g->app->current_event->nodeid == our_nodeid) {
+		log_group(g, "leave: busy event %llx state %s",
+			  ev->id, ev_state_str(g->app->current_event));
+		return -EAGAIN;
+	}
+
+	list_for_each_entry(ev, &g->app->events, list) {
+		ASSERT(ev->nodeid != our_nodeid);
+		log_group(g, "event id %llx", ev->id);
+	}
+
+	log_debug("%d:%s got leave", level, name);
+	g->leaving = 1;
 	rv = do_cpg_leave(g);
 	return rv;
 }
--- cluster/group/daemon/gd_internal.h	2006/06/22 18:39:26	1.35
+++ cluster/group/daemon/gd_internal.h	2006/06/28 22:16:36	1.36
@@ -169,6 +169,8 @@
 	int			cpg_fd;
 	int			cpg_client;
 	int			have_set_id;
+	int			joining;
+	int			leaving;
 };
 
 struct app {
--- cluster/group/daemon/app.c	2006/06/22 21:12:33	1.33
+++ cluster/group/daemon/app.c	2006/06/28 22:16:36	1.34
@@ -525,16 +525,52 @@
 	}
 }
 
+event_t *search_event(group_t *g, int nodeid)
+{
+	event_t *ev;
+
+	list_for_each_entry(ev, &g->app->events, list) {
+		if (ev->nodeid == nodeid)
+			return ev;
+	}
+	return NULL;
+}
+
+void dump_queued_events(group_t *g)
+{
+	event_t *ev;
+
+	list_for_each_entry(ev, &g->app->events, list) {
+		log_group(g, "    queued ev %d %llx %s",
+			  ev->nodeid, ev->id, ev_state_str(ev));
+	}
+}
+
 int queue_app_join(group_t *g, int nodeid)
 {
 	event_t *ev;
 
+	/* sanity check */
+	ev = g->app->current_event;
+	if (ev && ev->nodeid == nodeid) {
+		log_group(g, "queue_app_join: current event %d %llx %s",
+			  nodeid, ev->id, ev_state_str(ev));
+	}
+
+	/* sanity check */
+	ev = search_event(g, nodeid);
+	if (ev) {
+		log_group(g, "queue_app_join: queued event %d %llx %s",
+			  nodeid, ev->id, ev_state_str(ev));
+	}
+
 	ev = create_event(g);
 	ev->nodeid = nodeid;
 	ev->state = EST_JOIN_BEGIN;
 	ev->id = make_event_id(g, EST_JOIN_BEGIN, nodeid);
 
 	log_group(g, "queue join event for nodeid %d", nodeid);
+	dump_queued_events(g);
 
 	if (nodeid == our_nodeid)
 		add_event_nodes(g, ev);
@@ -547,12 +583,27 @@
 {
 	event_t *ev;
 
+	/* sanity check */
+	ev = g->app->current_event;
+	if (ev && ev->nodeid == nodeid) {
+		log_group(g, "queue_app_leave: current event %d %llx %s",
+			  nodeid, ev->id, ev_state_str(ev));
+	}
+
+	/* sanity check */
+	ev = search_event(g, nodeid);
+	if (ev) {
+		log_group(g, "queue_app_leave: queued event %d %llx %s",
+			  nodeid, ev->id, ev_state_str(ev));
+	}
+
 	ev = create_event(g);
 	ev->nodeid = nodeid;
 	ev->state = EST_LEAVE_BEGIN;
 	ev->id = make_event_id(g, EST_LEAVE_BEGIN, nodeid);
 
 	log_group(g, "queue leave event for nodeid %d", nodeid);
+	dump_queued_events(g);
 
 	list_add_tail(&ev->list, &g->app->events);
 	return 0;
@@ -845,8 +896,10 @@
 	case EST_JOIN_ALL_STARTED:
 		app_finish(a);
 
-		if (is_our_join(ev))
+		if (is_our_join(ev)) {
 			purge_messages(g);
+			g->joining = 0;
+		}
 		free_event(ev);
 		a->current_event = NULL;
 		rv = 1;




More information about the Cluster-devel mailing list