[Cluster-devel] cluster/cman cman_tool/main.c daemon/ais.c dae ...

pcaulfield at sourceware.org pcaulfield at sourceware.org
Thu Oct 5 07:48:34 UTC 2006


CVSROOT:	/cvs/cluster
Module name:	cluster
Changes by:	pcaulfield at sourceware.org	2006-10-05 07:48:33

Modified files:
	cman/cman_tool : main.c 
	cman/daemon    : ais.c ais.h cnxman-private.h cnxman-socket.h 
	                 commands.c 
	cman/lib       : libcman.c libcman.h 

Log message:
	Add some extra semantics to CMAN to cope with openAIS rejoins.
	Basically, this adds an extra state to a node: AISONLY which is only cleared
	when cman receives a valid TRANSITION message from the node.
	
	A TRANSITION message is deemed to be invalid if the join_time of the node
	has not been changed (this is the timestamp the daemon was started) and
	the node has since been down and is rejoining. cman_tool will show if this
	is the case for a cluster by displaying the DisallowedNodes flag in the
	"cman_tool status command".
	
	If there are disallowed nodes in the cluster then the "cman_tool expected"
	command is disabled until those nodes have been removed.

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/cman_tool/main.c.diff?cvsroot=cluster&r1=1.48&r2=1.49
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/daemon/ais.c.diff?cvsroot=cluster&r1=1.41&r2=1.42
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/daemon/ais.h.diff?cvsroot=cluster&r1=1.10&r2=1.11
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/daemon/cnxman-private.h.diff?cvsroot=cluster&r1=1.24&r2=1.25
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/daemon/cnxman-socket.h.diff?cvsroot=cluster&r1=1.16&r2=1.17
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/daemon/commands.c.diff?cvsroot=cluster&r1=1.50&r2=1.51
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/lib/libcman.c.diff?cvsroot=cluster&r1=1.27&r2=1.28
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/lib/libcman.h.diff?cvsroot=cluster&r1=1.28&r2=1.29

--- cluster/cman/cman_tool/main.c	2006/08/11 12:34:18	1.48
+++ cluster/cman/cman_tool/main.c	2006/10/05 07:48:33	1.49
@@ -232,6 +232,8 @@
 		printf(" Shutdown");
 	if (einfo->ei_flags & CMAN_EXTRA_FLAG_ERROR)
 		printf(" Error");
+	if (einfo->ei_flags & CMAN_EXTRA_FLAG_DISALLOWED)
+		printf(" DisallowedNodes");
 	printf(" \n");
 
 	printf("Ports Bound: ");
--- cluster/cman/daemon/ais.c	2006/10/02 08:50:02	1.41
+++ cluster/cman/daemon/ais.c	2006/10/05 07:48:33	1.42
@@ -57,6 +57,7 @@
 struct totem_ip_address ifaddrs[MAX_INTERFACES];
 int num_interfaces;
 uint64_t incarnation;
+int num_ais_nodes;
 
 static int config_run;
 static char errorstring[512];
@@ -411,12 +412,15 @@
 	P_AIS("confchg_fn called type = %d, seq=%lld\n", configuration_type, ring_id->seq);
 
 	incarnation = ring_id->seq;
+	num_ais_nodes = member_list_entries;
 
 	/* Tell the cman membership layer */
 	for (i=0; i<left_list_entries; i++)
 		del_ais_node(left_list[i]);
-	for (i=0; i<joined_list_entries; i++)
-		add_ais_node(joined_list[i], incarnation, member_list_entries);
+
+	/* Joining nodes are only added after a valid TRANSITION message
+	 * is received.
+	 */
 
 	/* Save the left list for later so we can do a consolidated confchg message */
 	if (configuration_type == TOTEM_CONFIGURATION_TRANSITIONAL) {
--- cluster/cman/daemon/ais.h	2006/08/11 12:34:18	1.10
+++ cluster/cman/daemon/ais.h	2006/10/05 07:48:33	1.11
@@ -25,3 +25,4 @@
 extern struct totem_ip_address mcast_addr[MAX_INTERFACES];
 extern struct totem_ip_address ifaddrs[MAX_INTERFACES];
 extern int num_interfaces;
+extern int num_ais_nodes;
--- cluster/cman/daemon/cnxman-private.h	2006/09/22 12:35:42	1.24
+++ cluster/cman/daemon/cnxman-private.h	2006/10/05 07:48:33	1.25
@@ -81,6 +81,7 @@
 	unsigned int   config_version;
 	unsigned int   flags;
 	uint64_t       fence_time;
+	uint64_t       join_time;
         char           clustername[16];
 	char           fence_agent[];
 };
@@ -142,7 +143,10 @@
 #define RECONFIG_PARAM_CONFIG_VERSION 3
 #define RECONFIG_PARAM_CCS            4
 
-#define NODE_FLAGS_GOTTRANSITION      1
+/* NODE_FLAGS_BEENDOWN   - this node has been down.
+   NODE_FLAGS_FENCED     - This node has been fenced since it last went down.
+*/
+#define NODE_FLAGS_BEENDOWN           1
 #define NODE_FLAGS_FENCED             2
 #define NODE_FLAGS_FENCEDWHILEUP      4
 
@@ -161,6 +165,8 @@
 	uint64_t fence_time; /* A time_t */
 	char    *fence_agent;
 
+	uint64_t cman_join_time; /* A time_t */
+
 	struct timeval last_hello; /* Only used for quorum devices */
 
 	unsigned int votes;
@@ -189,6 +195,7 @@
 /* Kill reasons */
 #define CLUSTER_KILL_REJECTED   1
 #define CLUSTER_KILL_CMANTOOL   2
+#define CLUSTER_KILL_REJOIN     3
 
 #define MAX_ADDR_PRINTED_LEN (address_length*3 + 1)
 
--- cluster/cman/daemon/cnxman-socket.h	2006/08/17 13:22:39	1.16
+++ cluster/cman/daemon/cnxman-socket.h	2006/10/05 07:48:33	1.17
@@ -113,7 +113,7 @@
 #define MSG_BCASTSELF    0x4000000
 
 typedef enum { NODESTATE_JOINING=1, NODESTATE_MEMBER,
-	       NODESTATE_DEAD, NODESTATE_LEAVING } nodestate_t;
+	       NODESTATE_DEAD, NODESTATE_LEAVING, NODESTATE_AISONLY } nodestate_t;
 
 static const char CLIENT_SOCKNAME[]= "/var/run/cman_client";
 static const char ADMIN_SOCKNAME[]=  "/var/run/cman_admin";
@@ -163,9 +163,10 @@
 };
 
 /* Flags */
-#define CMAN_EXTRA_FLAG_2NODE    1
-#define CMAN_EXTRA_FLAG_ERROR    2
-#define CMAN_EXTRA_FLAG_SHUTDOWN 4
+#define CMAN_EXTRA_FLAG_2NODE     1
+#define CMAN_EXTRA_FLAG_ERROR     2
+#define CMAN_EXTRA_FLAG_SHUTDOWN  4
+#define CMAN_EXTRA_FLAG_UNCOUNTED 8
 
 struct cl_extra_info {
 	int           node_state;
--- cluster/cman/daemon/commands.c	2006/09/22 12:35:42	1.50
+++ cluster/cman/daemon/commands.c	2006/10/05 07:48:33	1.51
@@ -33,6 +33,7 @@
 #include <openais/totem/totemip.h>
 #include <openais/totem/totempg.h>
 #include <openais/service/swab.h>
+#include <openais/service/print.h>
 #include <openais/totem/aispoll.h>
 #include "list.h"
 #include "cnxman-socket.h"
@@ -72,6 +73,7 @@
 static struct cluster_node *quorum_device;
 static uint16_t cluster_id;
 static int ais_running;
+static time_t join_time;
 static poll_timer_handle quorum_device_timer;
 
 /* If CCS gets out of sync, we poll it until it isn't */
@@ -94,6 +96,7 @@
 static int send_port_enquire(int nodeid);
 static void process_internal_message(char *data, int len, int nodeid, int byteswap);
 static void recalculate_quorum(int allow_decrease);
+static void send_kill(int nodeid, uint16_t reason);
 
 static void set_port_bit(struct cluster_node *node, uint8_t port)
 {
@@ -146,6 +149,18 @@
 	if (!cluster_is_quorate && quorate)
 		log_msg(LOG_INFO, "quorum regained, resuming activity\n");
 
+	/* If we are newly quorate, then kill any AISONLY nodes */
+	if (!cluster_is_quorate && quorate) {
+		struct cluster_node *node = NULL;
+		struct list *tmp;
+
+		list_iterate(tmp, &cluster_members_list) {
+			node = list_item(tmp, struct cluster_node);
+			if (node->state == NODESTATE_AISONLY)
+				send_kill(node->node_id, CLUSTER_KILL_REJOIN);
+		}
+	}
+
 	cluster_is_quorate = quorate;
 
 }
@@ -386,6 +401,7 @@
 		strcpy(nodename, un.nodename);
 	}
 
+	time(&join_time);
 	us = add_new_node(nodename, wanted_nodeid, -1, expected_votes,
 			  NODESTATE_MEMBER);
 	set_port_bit(us, 0);
@@ -424,6 +440,7 @@
 	int total_votes = 0;
 	int max_expected = 0;
 	int addrlen;
+	int uncounted = 0;
 	struct cluster_node *node;
 	struct sockaddr_storage *ss;
 	char *ptr;
@@ -437,6 +454,8 @@
 			total_votes += node->votes;
 			max_expected = max(max_expected, node->expected_votes);
 		}
+		if (node->state == NODESTATE_AISONLY)
+			uncounted = 1;
 	}
 	if (quorum_device && quorum_device->state == NODESTATE_MEMBER)
 		total_votes += quorum_device->votes;
@@ -467,6 +486,8 @@
 		einfo->flags |= CMAN_EXTRA_FLAG_ERROR;
 	if (shutdown_con)
 		einfo->flags |= CMAN_EXTRA_FLAG_SHUTDOWN;
+	if (uncounted)
+		einfo->flags |= CMAN_EXTRA_FLAG_UNCOUNTED;
 
 	ptr = einfo->addresses;
 	for (i=0; i<num_interfaces; i++) {
@@ -585,9 +606,22 @@
 	unsigned int total_votes;
 	unsigned int newquorum;
 	unsigned int newexp;
+	struct cluster_node *node = NULL;
+	struct list *tmp;
 
 	if (!we_are_a_cluster_member)
 		return -ENOENT;
+
+	/* If there are any AISONLY nodes then we can't allow
+	   the user to set expected votes as it may destroy data */
+	list_iterate(tmp, &cluster_members_list) {
+		node = list_item(tmp, struct cluster_node);
+		if (node->state == NODESTATE_AISONLY) {
+			log_printf(LOG_NOTICE, "Attempt to set expected votes when cluster has AISONLY nodes in it.");
+			return -EINVAL;
+		}
+	}
+
 	memcpy(&newexp, cmdbuf, sizeof(int));
 	newquorum = calculate_quorum(1, newexp, &total_votes);
 
@@ -647,7 +681,7 @@
 	if ((node = find_node_by_nodeid(nodeid)) == NULL)
 		return -EINVAL;
 
-	if (node->state != NODESTATE_MEMBER)
+	if (node->state != NODESTATE_MEMBER && node->state != NODESTATE_AISONLY)
 		return -EINVAL;
 
 	node->leave_reason = CLUSTER_LEAVEFLAG_KILLED;
@@ -1485,6 +1519,7 @@
 	msg->config_version = config_version;
 	msg->flags = us->flags;
 	msg->fence_time = us->fence_time;
+	msg->join_time = join_time;
 	strcpy(msg->clustername, cluster_name);
 	if (us->fence_agent)
 	{
@@ -1644,10 +1679,27 @@
 	node = find_node_by_nodeid(nodeid);
 	assert(node);
 
-	if (node->flags & NODE_FLAGS_GOTTRANSITION) {
-
+        /* This is the killer. If the join_time of the node matches that already stored AND
+	   the node has been down, then we kill it as this must be a rejoin */
+	if (msg->join_time == node->cman_join_time && node->flags & NODE_FLAGS_BEENDOWN) {
+		if (cluster_is_quorate) {
+			P_MEMB("Killing node %s because it has rejoined the cluster without cman_tool join", node->name);
+			log_printf(LOG_CRIT, "Killing node %s because it has rejoined the cluster without cman_tool join", node->name);
+			send_kill(nodeid, CLUSTER_KILL_REJOIN);
+		}
+		else {
+			P_MEMB("Node %s not joined to cman because it has rejoined an inquorate cluster", node->name);
+			log_printf(LOG_CRIT, "Node %s not joined to cman because it has rejoined an inquorate cluster", node->name);
+			node->state = NODESTATE_AISONLY;
+		}
+		return;
 	}
-	node->flags = msg->flags;
+	else {
+		node->cman_join_time = msg->join_time;
+		add_ais_node(nodeid, incarnation, num_ais_nodes);
+	}
+
+	node->flags = msg->flags; /* This will clear the BEENDOWN flag of course */
 	if (node->fence_agent && msg->fence_agent[0] && strcmp(node->fence_agent, msg->fence_agent))
 	{
 		free(node->fence_agent);
@@ -1748,7 +1800,7 @@
 			node->leave_reason = leavemsg->reason;
 
 		/* Mark it as leaving, and remove it when we get an AIS node down event for it */
-		if (node && node->state == NODESTATE_MEMBER)
+		if (node && (node->state == NODESTATE_MEMBER || node->state == NODESTATE_AISONLY))
 			node->state = NODESTATE_LEAVING;
 		break;
 
@@ -1843,11 +1895,9 @@
 		node->name = strdup(tempname);
 	}
 
-	node->incarnation = incarnation;
-
-	gettimeofday(&node->join_time, NULL);
-
 	if (node->state == NODESTATE_DEAD) {
+		gettimeofday(&node->join_time, NULL);
+		node->incarnation = incarnation;
 		node->state = NODESTATE_MEMBER;
 		cluster_members++;
 		recalculate_quorum(0);
@@ -1874,6 +1924,7 @@
 		node->flags &= ~NODE_FLAGS_FENCED;
 
 	node->flags &= ~NODE_FLAGS_FENCEDWHILEUP;
+	node->flags |= NODE_FLAGS_BEENDOWN;
 
 	if (node->state == NODESTATE_MEMBER) {
 		node->state = NODESTATE_DEAD;
--- cluster/cman/lib/libcman.c	2006/08/24 10:40:57	1.27
+++ cluster/cman/lib/libcman.c	2006/10/05 07:48:33	1.28
@@ -45,7 +45,7 @@
 	int magic;
 	int fd;
 	int zero_fd;
-	void *private;
+	void *privdata;
 	int want_reply;
 	cman_callback_t event_callback;
 	cman_datacallback_t data_callback;
@@ -161,7 +161,7 @@
 		else
 		{
 			if (h->data_callback)
-				h->data_callback(h, h->private,
+				h->data_callback(h, h->privdata,
 						 buf+sizeof(*dmsg), msg->length-sizeof(*dmsg),
 						 dmsg->port, dmsg->nodeid);
 		}
@@ -203,14 +203,14 @@
 		{
 			if (msg->command == CMAN_CMD_EVENT && h->event_callback) {
 				struct sock_event_message *emsg = (struct sock_event_message *)msg;
-				h->event_callback(h, h->private, emsg->reason, emsg->arg);
+				h->event_callback(h, h->privdata, emsg->reason, emsg->arg);
 			}
 
 			if (msg->command == CMAN_CMD_CONFCHG && h->confchg_callback)
 			{
 				struct sock_confchg_message *cmsg = (struct sock_confchg_message *)msg;
 
-				h->confchg_callback(h, h->private,
+				h->confchg_callback(h, h->privdata,
 						    cmsg->entries,cmsg->member_entries, 
 						    &cmsg->entries[cmsg->member_entries], cmsg->left_entries, 
 						    &cmsg->entries[cmsg->member_entries+cmsg->left_entries], cmsg->joined_entries);
@@ -287,7 +287,7 @@
 	return wait_for_reply(h, outbuf, outlen);
 }
 
-static cman_handle_t open_socket(const char *name, int namelen, void *private)
+static cman_handle_t open_socket(const char *name, int namelen, void *privdata)
 {
 	struct cman_handle *h;
 	struct sockaddr_un sockaddr;
@@ -297,7 +297,7 @@
 		return NULL;
 
 	h->magic = CMAN_MAGIC;
-	h->private = private;
+	h->privdata = privdata;
 	h->event_callback = NULL;
 	h->data_callback = NULL;
 	h->confchg_callback = NULL;
@@ -344,14 +344,14 @@
 	return (cman_handle_t)h;
 }
 
-cman_handle_t cman_admin_init(void *private)
+cman_handle_t cman_admin_init(void *privdata)
 {
-	return open_socket(ADMIN_SOCKNAME, sizeof(ADMIN_SOCKNAME), private);
+	return open_socket(ADMIN_SOCKNAME, sizeof(ADMIN_SOCKNAME), privdata);
 }
 
-cman_handle_t cman_init(void *private)
+cman_handle_t cman_init(void *privdata)
 {
-	return open_socket(CLIENT_SOCKNAME, sizeof(CLIENT_SOCKNAME), private);
+	return open_socket(CLIENT_SOCKNAME, sizeof(CLIENT_SOCKNAME), privdata);
 }
 
 int cman_finish(cman_handle_t handle)
@@ -367,21 +367,21 @@
 	return 0;
 }
 
-int cman_set_private(cman_handle_t *handle, void *private)
+int cman_setprivdata(cman_handle_t *handle, void *privdata)
 {
 	struct cman_handle *h = (struct cman_handle *)handle;
 	VALIDATE_HANDLE(h);
 
-	h->private = private;
+	h->privdata = privdata;
 	return 0;
 }
 
-int cman_get_private(cman_handle_t *handle, void **private)
+int cman_getprivdata(cman_handle_t *handle, void **privdata)
 {
 	struct cman_handle *h = (struct cman_handle *)handle;
 	VALIDATE_HANDLE(h);
 
-	*private = h->private;
+	*privdata = h->privdata;
 
 	return 0;
 }
@@ -631,6 +631,53 @@
 	return 0;
 }
 
+int cman_get_disallowed_nodes(cman_handle_t handle, int maxnodes, int *retnodes, cman_node_t *nodes)
+{
+	struct cman_handle *h = (struct cman_handle *)handle;
+	struct cl_cluster_node *cman_nodes;
+	int status;
+	int buflen;
+	int count = 0;
+	int out_count = 0;
+	VALIDATE_HANDLE(h);
+
+	if (!retnodes || !nodes || maxnodes < 1)
+	{
+		errno = EINVAL;
+		return -1;
+	}
+
+	buflen = sizeof(struct cl_cluster_node) * maxnodes;
+	cman_nodes = malloc(buflen);
+	if (!cman_nodes)
+		return -1;
+
+	status = info_call(h, CMAN_CMD_GETALLMEMBERS, NULL, 0, cman_nodes, buflen);
+	if (status < 0)
+	{
+		int saved_errno = errno;
+		free(cman_nodes);
+		errno = saved_errno;
+		return -1;
+	}
+
+	if (cman_nodes[0].size != sizeof(struct cl_cluster_node))
+	{
+		free(cman_nodes);
+		errno = EINVAL;
+		return -1;
+	}
+
+	for (count = 0; count < status; count++)
+	{
+		if (cman_nodes[count].state == NODESTATE_AISONLY && out_count < maxnodes)
+			copy_node(&nodes[out_count++], &cman_nodes[count]);
+	}
+	free(cman_nodes);
+	*retnodes = out_count;
+	return 0;
+}
+
 int cman_get_node(cman_handle_t handle, int nodeid, cman_node_t *node)
 {
 	struct cman_handle *h = (struct cman_handle *)handle;
--- cluster/cman/lib/libcman.h	2006/09/01 08:47:40	1.28
+++ cluster/cman/lib/libcman.h	2006/10/05 07:48:33	1.29
@@ -150,9 +150,10 @@
  */
 
 /* Flags in ei_flags */
-#define CMAN_EXTRA_FLAG_2NODE    1
-#define CMAN_EXTRA_FLAG_ERROR    2
-#define CMAN_EXTRA_FLAG_SHUTDOWN 4
+#define CMAN_EXTRA_FLAG_2NODE      1
+#define CMAN_EXTRA_FLAG_ERROR      2
+#define CMAN_EXTRA_FLAG_SHUTDOWN   4
+#define CMAN_EXTRA_FLAG_DISALLOWED 8
 
 typedef struct cman_extra_info {
 	int           ei_node_state;
@@ -198,8 +199,8 @@
 int cman_finish(cman_handle_t handle);
 
 /* Update/retrieve the private data */
-int cman_set_private(cman_handle_t *h, void *privdata);
-int cman_get_private(cman_handle_t *h, void **privdata);
+int cman_setprivdata(cman_handle_t *h, void *privdata);
+int cman_getprivdata(cman_handle_t *h, void **privdata);
 
 /*
  * Notification of membership change events. Note that these are sent after
@@ -259,6 +260,10 @@
    to determine how big your array needs to be */
 int cman_get_nodes(cman_handle_t handle, int maxnodes, int *retnodes, cman_node_t *nodes);
 
+/* Returns a list of nodes that are known to AIS but blocked from joining the CMAN
+   cluster because they rejoined with cluster without a cman_tool join */
+int cman_get_disallowed_nodes(cman_handle_t handle, int maxnodes, int *retnodes, cman_node_t *nodes);
+
 /*
  * cman_get_node() can get node info by nodeid OR by name. If the first
  * char of node->cn_name is zero then the nodeid will be used, otherwise




More information about the Cluster-devel mailing list