[Cluster-devel] cluster/cman/daemon ais.c cmanccs.c commands.c
pcaulfield at sourceware.org
pcaulfield at sourceware.org
Wed Aug 2 11:54:38 UTC 2006
CVSROOT: /cvs/cluster
Module name: cluster
Changes by: pcaulfield at sourceware.org 2006-08-02 11:54:37
Modified files:
cman/daemon : ais.c cmanccs.c commands.c
Log message:
if we can't get the latest config from CCS, poll it until we do.
Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/daemon/ais.c.diff?cvsroot=cluster&r1=1.33&r2=1.34
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/daemon/cmanccs.c.diff?cvsroot=cluster&r1=1.17&r2=1.18
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/daemon/commands.c.diff?cvsroot=cluster&r1=1.45&r2=1.46
--- cluster/cman/daemon/ais.c 2006/07/21 12:25:21 1.33
+++ cluster/cman/daemon/ais.c 2006/08/02 11:54:36 1.34
@@ -46,6 +46,7 @@
extern char cluster_name[MAX_CLUSTER_NAME_LEN+1];
extern char *key_filename;
extern unsigned int quorumdev_poll;
+extern unsigned int ccsd_poll_interval;
extern unsigned int shutdown_timeout;
extern int init_config(struct objdb_iface_ver0 *objdb);
@@ -239,6 +240,7 @@
{
objdb_get_int(objdb, object_handle, "quorum_dev_poll", &quorumdev_poll);
objdb_get_int(objdb, object_handle, "shutdown_timeout", &shutdown_timeout);
+ objdb_get_int(objdb, object_handle, "ccsd_poll", &ccsd_poll_interval);
/* Only use the CCS version of this if it was not overridden on the command-line */
if (!getenv("CMAN_DEBUGLOG"))
--- cluster/cman/daemon/cmanccs.c 2006/07/03 08:51:10 1.17
+++ cluster/cman/daemon/cmanccs.c 2006/08/02 11:54:36 1.18
@@ -107,14 +107,6 @@
if (!ccs_get(ctree, CONFIG_VERSION_PATH, &str)) {
config = atoi(str);
free(str);
-
- /* config_version is zero at startup when we read initial config */
- if (*config_version && config != *config_version) {
- ccs_disconnect(ctree);
- log_msg(LOG_ERR, "CCS version is %d, we expected %d. config not updated\n",
- config, *config_version);
- return -1;
- }
*config_version = config;
}
--- cluster/cman/daemon/commands.c 2006/07/21 12:25:21 1.45
+++ cluster/cman/daemon/commands.c 2006/08/02 11:54:36 1.46
@@ -63,6 +63,7 @@
static int two_node;
unsigned int quorumdev_poll=10000;
unsigned int shutdown_timeout=5000;
+ unsigned int ccsd_poll_interval=1000;
static int cluster_is_quorate;
char cluster_name[MAX_CLUSTER_NAME_LEN+1];
static char nodename[MAX_CLUSTER_MEMBER_NAME_LEN+1];
@@ -73,6 +74,11 @@
static int ais_running;
static poll_timer_handle quorum_device_timer;
+/* If CCS gets out of sync, we poll it until it isn't */
+static poll_timer_handle ccsd_timer;
+static unsigned int wanted_config_version;
+static int config_error;
+
static poll_timer_handle shutdown_timer;
static struct connection *shutdown_con;
static uint32_t shutdown_flags;
@@ -128,7 +134,7 @@
{
int quorate;
- if (quorum > total_votes) {
+ if (quorum > total_votes || config_error) {
quorate = 0;
}
else {
@@ -457,7 +463,7 @@
einfo->flags = 0;
if (two_node)
einfo->flags |= CMAN_EXTRA_FLAG_2NODE;
- if (us->expected_votes == INT_MAX)
+ if (config_error)
einfo->flags |= CMAN_EXTRA_FLAG_ERROR;
if (shutdown_con)
einfo->flags |= CMAN_EXTRA_FLAG_SHUTDOWN;
@@ -962,6 +968,27 @@
return 0;
}
+static void ccsd_timer_fn(void *arg)
+{
+ int ccs_err;
+
+ log_msg(LOG_DEBUG, "Polling ccsd for updated information\n");
+ ccs_err = read_ccs_nodes(&config_version);
+ if (ccs_err || config_version < wanted_config_version) {
+ log_msg(LOG_ERR, "Can't read CCS to get updated config version %d. Activity suspended on this node\n",
+ wanted_config_version);
+
+ poll_timer_add(ais_poll_handle, ccsd_poll_interval, NULL,
+ ccsd_timer_fn, &ccsd_timer);
+ }
+ else {
+ log_msg(LOG_ERR, "Now got CCS information version %d, continuing\n", config_version);
+ config_error = 0;
+ recalculate_quorum(0);
+ }
+}
+
+
static void quorum_device_timer_fn(void *arg)
{
struct timeval now;
@@ -1352,20 +1379,20 @@
static int valid_transition_msg(int nodeid, struct cl_transmsg *msg)
{
if (strcmp(msg->clustername, cluster_name) != 0) {
- log_msg(LOG_ERR, "Node %d refused, remote cluster name='%s', local='%s'\n",
+ log_msg(LOG_ERR, "Node %d conflict, remote cluster name='%s', local='%s'\n",
nodeid, msg->clustername, cluster_name);
return -1;
}
if (msg->cluster_id != cluster_id) {
- log_msg(LOG_ERR, "Node %d refused, remote cluster id=%d, local=%d\n",
+ log_msg(LOG_ERR, "Node %d conflict, remote cluster id=%d, local=%d\n",
nodeid, msg->cluster_id, cluster_id);
return -1;
}
if (msg->major_version != CNXMAN_MAJOR_VERSION) {
- log_msg(LOG_ERR, "Node %d refused, remote version id=%d, local=%d\n",
+ log_msg(LOG_ERR, "Node %d conflict, remote version id=%d, local=%d\n",
nodeid, msg->major_version, CNXMAN_MAJOR_VERSION);
return -1;
}
@@ -1376,9 +1403,13 @@
ccs_err = read_ccs_nodes(&config_version);
if (ccs_err || config_version < msg->config_version) {
- us->expected_votes = INT_MAX; /* Force us to stop */
+ config_error = 1;
log_msg(LOG_ERR, "Can't read CCS to get updated config version %d. Activity suspended on this node\n",
msg->config_version);
+
+ wanted_config_version = msg->config_version;
+ poll_timer_add(ais_poll_handle, ccsd_poll_interval, NULL,
+ ccsd_timer_fn, &ccsd_timer);
}
if (config_version > msg->config_version) {
// TODO tell everyone else to update...
@@ -1388,7 +1419,7 @@
if (msg->config_version != config_version) {
- log_msg(LOG_ERR, "Node %d refused, remote config version id=%d, local=%d\n",
+ log_msg(LOG_ERR, "Node %d conflict, remote config version id=%d, local=%d\n",
nodeid, msg->config_version, config_version);
return -1;
}
@@ -1528,8 +1559,12 @@
log_msg(LOG_ERR, "Can't read CCS to get updated config version %d. Activity suspended on this node\n",
msg->value);
- us->expected_votes = INT_MAX; /* Force us to stop */
+ config_error = 1;
recalculate_quorum(0);
+
+ wanted_config_version = config_version;
+ poll_timer_add(ais_poll_handle, ccsd_poll_interval, NULL,
+ ccsd_timer_fn, &ccsd_timer);
}
break;
}
More information about the Cluster-devel
mailing list