[Cluster-devel] cluster/cman/daemon ais.c cmanccs.c commands.c

pcaulfield at sourceware.org pcaulfield at sourceware.org
Wed Aug 2 11:54:38 UTC 2006


CVSROOT:	/cvs/cluster
Module name:	cluster
Changes by:	pcaulfield at sourceware.org	2006-08-02 11:54:37

Modified files:
	cman/daemon    : ais.c cmanccs.c commands.c 

Log message:
	if we can't get the latest config from CCS, poll it until we do.

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/daemon/ais.c.diff?cvsroot=cluster&r1=1.33&r2=1.34
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/daemon/cmanccs.c.diff?cvsroot=cluster&r1=1.17&r2=1.18
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/daemon/commands.c.diff?cvsroot=cluster&r1=1.45&r2=1.46

--- cluster/cman/daemon/ais.c	2006/07/21 12:25:21	1.33
+++ cluster/cman/daemon/ais.c	2006/08/02 11:54:36	1.34
@@ -46,6 +46,7 @@
 extern char cluster_name[MAX_CLUSTER_NAME_LEN+1];
 extern char *key_filename;
 extern unsigned int quorumdev_poll;
+extern unsigned int ccsd_poll_interval;
 extern unsigned int shutdown_timeout;
 extern int init_config(struct objdb_iface_ver0 *objdb);
 
@@ -239,6 +240,7 @@
 	{
 		objdb_get_int(objdb, object_handle, "quorum_dev_poll", &quorumdev_poll);
 		objdb_get_int(objdb, object_handle, "shutdown_timeout", &shutdown_timeout);
+		objdb_get_int(objdb, object_handle, "ccsd_poll", &ccsd_poll_interval);
 
 		/* Only use the CCS version of this if it was not overridden on the command-line */
 		if (!getenv("CMAN_DEBUGLOG"))
--- cluster/cman/daemon/cmanccs.c	2006/07/03 08:51:10	1.17
+++ cluster/cman/daemon/cmanccs.c	2006/08/02 11:54:36	1.18
@@ -107,14 +107,6 @@
     if (!ccs_get(ctree, CONFIG_VERSION_PATH, &str)) {
 	    config = atoi(str);
 	    free(str);
-
-	    /* config_version is zero at startup when we read initial config */
-	    if (*config_version && config != *config_version) {
-		    ccs_disconnect(ctree);
-		    log_msg(LOG_ERR, "CCS version is %d, we expected %d. config not updated\n",
-			    config, *config_version);
-		    return -1;
-	    }
 	    *config_version = config;
     }
 
--- cluster/cman/daemon/commands.c	2006/07/21 12:25:21	1.45
+++ cluster/cman/daemon/commands.c	2006/08/02 11:54:36	1.46
@@ -63,6 +63,7 @@
 static int two_node;
        unsigned int quorumdev_poll=10000;
        unsigned int shutdown_timeout=5000;
+       unsigned int ccsd_poll_interval=1000;
 static int cluster_is_quorate;
        char cluster_name[MAX_CLUSTER_NAME_LEN+1];
 static char nodename[MAX_CLUSTER_MEMBER_NAME_LEN+1];
@@ -73,6 +74,11 @@
 static int ais_running;
 static poll_timer_handle quorum_device_timer;
 
+/* If CCS gets out of sync, we poll it until it isn't */
+static poll_timer_handle ccsd_timer;
+static unsigned int wanted_config_version;
+static int config_error;
+
 static poll_timer_handle shutdown_timer;
 static struct connection *shutdown_con;
 static uint32_t shutdown_flags;
@@ -128,7 +134,7 @@
 {
 	int quorate;
 
-	if (quorum > total_votes) {
+	if (quorum > total_votes || config_error) {
 		quorate = 0;
 	}
 	else {
@@ -457,7 +463,7 @@
 	einfo->flags = 0;
 	if (two_node)
 		einfo->flags |= CMAN_EXTRA_FLAG_2NODE;
-	if (us->expected_votes == INT_MAX)
+	if (config_error)
 		einfo->flags |= CMAN_EXTRA_FLAG_ERROR;
 	if (shutdown_con)
 		einfo->flags |= CMAN_EXTRA_FLAG_SHUTDOWN;
@@ -962,6 +968,27 @@
         return 0;
 }
 
+static void ccsd_timer_fn(void *arg)
+{
+	int ccs_err;
+
+	log_msg(LOG_DEBUG, "Polling ccsd for updated information\n");
+	ccs_err = read_ccs_nodes(&config_version);
+	if (ccs_err || config_version < wanted_config_version) {
+		log_msg(LOG_ERR, "Can't read CCS to get updated config version %d. Activity suspended on this node\n",
+				wanted_config_version);
+
+		poll_timer_add(ais_poll_handle, ccsd_poll_interval, NULL,
+			       ccsd_timer_fn, &ccsd_timer);
+	}
+	else {
+		log_msg(LOG_ERR, "Now got CCS information version %d, continuing\n", config_version);
+		config_error = 0;
+		recalculate_quorum(0);
+	}
+}
+
+
 static void quorum_device_timer_fn(void *arg)
 {
 	struct timeval now;
@@ -1352,20 +1379,20 @@
 static int valid_transition_msg(int nodeid, struct cl_transmsg *msg)
 {
 	if (strcmp(msg->clustername, cluster_name) != 0) {
-		log_msg(LOG_ERR, "Node %d refused, remote cluster name='%s', local='%s'\n",
+		log_msg(LOG_ERR, "Node %d conflict, remote cluster name='%s', local='%s'\n",
 			nodeid, msg->clustername, cluster_name);
 		return -1;
 	}
 
 	if (msg->cluster_id != cluster_id) {
-		log_msg(LOG_ERR, "Node %d refused, remote cluster id=%d, local=%d\n",
+		log_msg(LOG_ERR, "Node %d conflict, remote cluster id=%d, local=%d\n",
 			nodeid, msg->cluster_id, cluster_id);
 		return -1;
 	}
 
 	if (msg->major_version != CNXMAN_MAJOR_VERSION) {
 
-		log_msg(LOG_ERR, "Node %d refused, remote version id=%d, local=%d\n",
+		log_msg(LOG_ERR, "Node %d conflict, remote version id=%d, local=%d\n",
 			nodeid, msg->major_version, CNXMAN_MAJOR_VERSION);
 		return -1;
 	}
@@ -1376,9 +1403,13 @@
 
 		ccs_err = read_ccs_nodes(&config_version);
 		if (ccs_err || config_version < msg->config_version) {
-			us->expected_votes = INT_MAX; /* Force us to stop */
+			config_error = 1;
 			log_msg(LOG_ERR, "Can't read CCS to get updated config version %d. Activity suspended on this node\n",
 				msg->config_version);
+
+			wanted_config_version = msg->config_version;
+			poll_timer_add(ais_poll_handle, ccsd_poll_interval, NULL,
+				       ccsd_timer_fn, &ccsd_timer);
 		}
 		if (config_version > msg->config_version) {
 			// TODO tell everyone else to update...
@@ -1388,7 +1419,7 @@
 
 
 	if (msg->config_version != config_version) {
-		log_msg(LOG_ERR, "Node %d refused, remote config version id=%d, local=%d\n",
+		log_msg(LOG_ERR, "Node %d conflict, remote config version id=%d, local=%d\n",
 			nodeid, msg->config_version, config_version);
 		return -1;
 	}
@@ -1528,8 +1559,12 @@
 			log_msg(LOG_ERR, "Can't read CCS to get updated config version %d. Activity suspended on this node\n",
 				msg->value);
 
-			us->expected_votes = INT_MAX; /* Force us to stop */
+			config_error = 1;
 			recalculate_quorum(0);
+
+			wanted_config_version = config_version;
+			poll_timer_add(ais_poll_handle, ccsd_poll_interval, NULL,
+				       ccsd_timer_fn, &ccsd_timer);
 		}
 		break;
 	}




More information about the Cluster-devel mailing list