[Cluster-devel] cluster/cman/daemon cnxman-private.h commands.c

pcaulfield at sourceware.org pcaulfield at sourceware.org
Fri Sep 22 12:35:43 UTC 2006


CVSROOT:	/cvs/cluster
Module name:	cluster
Changes by:	pcaulfield at sourceware.org	2006-09-22 12:35:43

Modified files:
	cman/daemon    : cnxman-private.h commands.c 

Log message:
	Cope with a node being fenced manually and then going offline (ie someone
	else fenced it).. Some further modifications to fenced can then prevent
	double-fencing with the following caveats:
	
	- the clocks on the nodes are reasonably in sync
	- the node goes down within two minutes of the fence message being received
	
	If there is any ambiguity then cman will mark the node as unfenced so that
	fenced can do the job anyway.

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/daemon/cnxman-private.h.diff?cvsroot=cluster&r1=1.23&r2=1.24
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/daemon/commands.c.diff?cvsroot=cluster&r1=1.49&r2=1.50

--- cluster/cman/daemon/cnxman-private.h	2006/08/17 13:22:39	1.23
+++ cluster/cman/daemon/cnxman-private.h	2006/09/22 12:35:42	1.24
@@ -144,6 +144,7 @@
 
 #define NODE_FLAGS_GOTTRANSITION      1
 #define NODE_FLAGS_FENCED             2
+#define NODE_FLAGS_FENCEDWHILEUP      4
 
 /* There's one of these for each node in the cluster */
 struct cluster_node {
--- cluster/cman/daemon/commands.c	2006/09/12 12:45:52	1.49
+++ cluster/cman/daemon/commands.c	2006/09/22 12:35:42	1.50
@@ -1053,6 +1053,8 @@
 		return -EINVAL;
 
 	node->flags |= NODE_FLAGS_FENCED;
+	if (node->state == NODESTATE_MEMBER)
+		node->flags |= NODE_FLAGS_FENCEDWHILEUP;
 
 	/* Tell the rest of the cluster (and us!) */
 	fence_msg->cmd = CLUSTER_MSG_FENCESTATUS;
@@ -1083,7 +1085,7 @@
 
 	f->nodeid = nodeid;
 	f->fence_time = node->fence_time;
-	f->flags = node->flags;
+	f->flags = node->flags&NODE_FLAGS_FENCED;
 
 	if (node->fence_agent)
 		strcpy(f->fence_agent, node->fence_agent);
@@ -1621,8 +1623,13 @@
 	if (node->fence_agent)
 		free(node->fence_agent);
 	node->fence_agent = strdup(msg->agent);
-	if (msg->fenced)
+	if (msg->fenced) {
 		node->flags |= NODE_FLAGS_FENCED;
+
+		if (node->state == NODESTATE_MEMBER)
+			node->flags |= NODE_FLAGS_FENCEDWHILEUP;
+	}
+
 }
 
 static void do_process_transition(int nodeid, char *data, int len)
@@ -1850,12 +1857,24 @@
 void del_ais_node(int nodeid)
 {
 	struct cluster_node *node;
+	time_t t;
 	P_MEMB("del_ais_node %d\n", nodeid);
 
 	node = find_node_by_nodeid(nodeid);
 	assert(node);
 
-	node->flags &= ~NODE_FLAGS_FENCED;
+	/* If the node was fenced while up (ie independantly of fenced) then
+	 * don't clear the fenced flag. There is a timeout associated with
+	 * this so if we get the node down more than 2 minutes after the
+	 * fence message then we still clear fenced just to be certain that
+	 * fenced will do the job too.
+	 */
+	time(&t);
+	if (!(node->flags & NODE_FLAGS_FENCEDWHILEUP) || (t - node->fence_time > 120))
+		node->flags &= ~NODE_FLAGS_FENCED;
+
+	node->flags &= ~NODE_FLAGS_FENCEDWHILEUP;
+
 	if (node->state == NODESTATE_MEMBER) {
 		node->state = NODESTATE_DEAD;
 		cluster_members--;




More information about the Cluster-devel mailing list