[Cluster-devel] [PATCH 1/3] fenced: full check for member changes

David Teigland teigland at redhat.com
Wed Sep 28 16:37:26 UTC 2011


cman members are queried in response to a callback,
and members sometimes leave and rejoin between queries
(e.g. when they leave and rejoin before corosync
detects they left.)

This means that simply checking if a node is a member
in consecutive queries sometimes misses events.  We
need to compare the incarnation numbers of members
from consecutive queries to avoid this.

bz 663397

Signed-off-by: David Teigland <teigland at redhat.com>
---
 fence/fenced/member_cman.c |   36 ++++++++++++++++++++++++++++++++++++
 1 files changed, 36 insertions(+), 0 deletions(-)

diff --git a/fence/fenced/member_cman.c b/fence/fenced/member_cman.c
index 0919b8e..ee879be 100644
--- a/fence/fenced/member_cman.c
+++ b/fence/fenced/member_cman.c
@@ -33,6 +33,17 @@ void kick_node_from_cluster(int nodeid)
 	}
 }
 
+static cman_node_t *get_node(cman_node_t *node_list, int count, int nodeid)
+{
+	int i;
+
+	for (i = 0; i < count; i++) {
+		if (node_list[i].cn_nodeid == nodeid)
+			return &node_list[i];
+	}
+	return NULL;
+}
+
 static int is_member(cman_node_t *node_list, int count, int nodeid)
 {
 	int i;
@@ -149,6 +160,7 @@ int name_to_nodeid(char *name)
 static void update_cluster(void)
 {
 	cman_cluster_t info;
+	cman_node_t *old;
 	int quorate = cluster_quorate;
 	int removed = 0, added = 0;
 	int i, rv;
@@ -197,6 +209,30 @@ static void update_cluster(void)
 
 			node_history_cluster_add(cman_nodes[i].cn_nodeid);
 			added++;
+		} else {
+			/* look for any nodes that were members of both
+			 * old and new but have a new incarnation number
+			 * from old to new, indicating they left and rejoined
+			 * in between */
+
+			old = get_node(old_nodes, old_node_count, cman_nodes[i].cn_nodeid);
+
+			if (!old)
+				continue;
+			if (cman_nodes[i].cn_incarnation == old->cn_incarnation)
+				continue;
+
+			log_debug("cluster node %d removed and added seq %u "
+				  "old %u new %u",
+				  cman_nodes[i].cn_nodeid, cluster_ringid_seq,
+				  old->cn_incarnation,
+				  cman_nodes[i].cn_incarnation);
+
+			node_history_cluster_remove(cman_nodes[i].cn_nodeid);
+			removed++;
+
+			node_history_cluster_add(cman_nodes[i].cn_nodeid);
+			added++;
 		}
 	}
 
-- 
1.7.6




More information about the Cluster-devel mailing list