[Cluster-devel] cluster/rgmanager ChangeLog include/members.h ...
lhh at sourceware.org
lhh at sourceware.org
Wed Sep 27 16:28:42 UTC 2006
CVSROOT: /cvs/cluster
Module name: cluster
Changes by: lhh at sourceware.org 2006-09-27 16:28:41
Modified files:
rgmanager : ChangeLog
rgmanager/include: members.h reslist.h
rgmanager/src/clulib: members.c
rgmanager/src/daemons: fo_domain.c groups.c main.c nodeevent.c
rg_state.c
rgmanager/src/utils: clustat.c
Log message:
Fix various bugs, incl. 208011, 203762
Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/ChangeLog.diff?cvsroot=cluster&r1=1.24&r2=1.25
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/members.h.diff?cvsroot=cluster&r1=1.2&r2=1.3
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/reslist.h.diff?cvsroot=cluster&r1=1.14&r2=1.15
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/clulib/members.c.diff?cvsroot=cluster&r1=1.3&r2=1.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/fo_domain.c.diff?cvsroot=cluster&r1=1.10&r2=1.11
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/groups.c.diff?cvsroot=cluster&r1=1.22&r2=1.23
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/main.c.diff?cvsroot=cluster&r1=1.31&r2=1.32
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/nodeevent.c.diff?cvsroot=cluster&r1=1.3&r2=1.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/rg_state.c.diff?cvsroot=cluster&r1=1.21&r2=1.22
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/utils/clustat.c.diff?cvsroot=cluster&r1=1.20&r2=1.21
--- cluster/rgmanager/ChangeLog 2006/09/27 12:26:16 1.24
+++ cluster/rgmanager/ChangeLog 2006/09/27 16:28:41 1.25
@@ -1,6 +1,29 @@
2006-09-27 Lon Hohberger <lhh at redhat.com>
* src/daemons/rg_state.c: Fix #208011 - failed->disabled state
- transition
+ transition. Fix node ID type.
+ * include/members.h: Add a third state to note that a node does
+ not need to be fenced as a result of rgmanager crashing. Add protos
+ for memb_set_state and member_online functions.
+ * include/reslist.h: Fix type error; node IDs are 32-bit ints, not
+ 64-bit ints.
+ * src/clulib/members.c: Add member_set_state/online functions for
+ quick checks and sets of the protected member list. Zero out
+ the node structure prior to calling cman_get_nodeid so that we
+ don't get ENOENT (fixed clustat bug where clustat didn't report
+ the "local" flag). Fix node ID type.
+ * src/daemons/fo_domain.c: Fix node ID type, fix shadow declaration
+ of member_online
+ * src/daemons/sgroups.c: Unfix logic error that wasn't broken in the
+ first place.
+ * src/daemons/main.c: Fix node ID types. Add fourth ("clean")
+ param to node_event* to decide whether a node death is clean or not.
+ Nodes get marked clean if we get an RG_EXITING message, otherwise,
+ they are unclean, and we wait for fencing.
+ * src/daemons/nodeevent.c: Add fourth param to node_event* to help
+ decide if we need to wait for a node to be fenced or not. Fix
+ node ID type.
+ * src/utils/clustat.c: Fix logic error preventing nodes from properly
+ being flagged.
2006-09-01 Lon Hohberger <lhh at redhat.com>
* include/resgroup.h: Add proto for rg_strerror
--- cluster/rgmanager/include/members.h 2006/07/12 14:04:06 1.2
+++ cluster/rgmanager/include/members.h 2006/09/27 16:28:41 1.3
@@ -3,10 +3,19 @@
#include <rg_types.h>
+typedef enum {
+ NODE_STATE_DOWN = 0,
+ NODE_STATE_UP = 1,
+ NODE_STATE_CLEAN = 2
+} node_state_t;
+
+
int get_my_nodeid(cman_handle_t h);
int my_id(void);
cluster_member_list_t * get_member_list(cman_handle_t h);
void free_member_list(cluster_member_list_t *ml);
+void member_set_state(int nodeid, int state);
+int member_online(int nodeid);
int memb_online(cluster_member_list_t *ml, int nodeid);
int memb_online_name(cluster_member_list_t *ml, char *name);
int memb_name_to_id(cluster_member_list_t *ml, char *name);
--- cluster/rgmanager/include/reslist.h 2006/07/19 18:43:32 1.14
+++ cluster/rgmanager/include/reslist.h 2006/09/27 16:28:41 1.15
@@ -196,7 +196,7 @@
int construct_domains(int ccsfd, fod_t **domains);
void deconstruct_domains(fod_t **domains);
void print_domains(fod_t **domains);
-int node_should_start(uint64_t nodeid, cluster_member_list_t *membership,
+int node_should_start(int nodeid, cluster_member_list_t *membership,
char *rg_name, fod_t **domains);
--- cluster/rgmanager/src/clulib/members.c 2006/08/09 21:48:34 1.3
+++ cluster/rgmanager/src/clulib/members.c 2006/09/27 16:28:41 1.4
@@ -66,6 +66,7 @@
get_my_nodeid(cman_handle_t h)
{
cman_node_t node;
+ memset(&node,0,sizeof(node));
if (cman_get_node(h, CMAN_NODEID_US, &node) != 0)
return -1;
@@ -212,8 +213,51 @@
}
+void
+member_set_state(int nodeid, int state)
+{
+ int x = 0;
+
+ pthread_rwlock_wrlock(&memblock);
+ if (!membership) {
+ pthread_rwlock_unlock(&memblock);
+ return;
+ }
+
+ for (x = 0; x < membership->cml_count; x++) {
+ if (membership->cml_members[x].cn_nodeid == nodeid)
+ membership->cml_members[x].cn_member = state;
+ }
+ pthread_rwlock_unlock(&memblock);
+}
+
+
+int
+member_online(int nodeid)
+{
+ int x = 0, ret = 0;
+
+ pthread_rwlock_rdlock(&memblock);
+ if (!membership) {
+ pthread_rwlock_unlock(&memblock);
+ return 0;
+ }
+
+ for (x = 0; x < membership->cml_count; x++) {
+ if (membership->cml_members[x].cn_nodeid == nodeid) {
+ ret = membership->cml_members[x].cn_member;
+ break;
+ }
+ }
+ pthread_rwlock_unlock(&memblock);
+
+ return ret;
+}
+
+
+
char *
-member_name(uint64_t id, char *buf, int buflen)
+member_name(int id, char *buf, int buflen)
{
char *n;
--- cluster/rgmanager/src/daemons/fo_domain.c 2006/07/19 18:43:32 1.10
+++ cluster/rgmanager/src/daemons/fo_domain.c 2006/09/27 16:28:41 1.11
@@ -266,7 +266,7 @@
node_in_domain(char *nodename, fod_t *domain,
cluster_member_list_t *membership)
{
- int member_online = 0, member_match = 0, preferred = 100, myprio = -1;
+ int online = 0, member_match = 0, preferred = 100, myprio = -1;
fod_node_t *fodn;
list_do(&domain->fd_nodes, fodn) {
@@ -283,7 +283,7 @@
* If we get here, we know:
* A member of the domain is online somewhere
*/
- member_online = 1;
+ online = 1;
if (!strcmp(nodename, fodn->fdn_name)) {
/*
* If we get here, we know:
@@ -297,7 +297,7 @@
preferred = fodn->fdn_prio;
} while (!list_done(&domain->fd_nodes, fodn));
- if (!member_online)
+ if (!online)
return 0;
if (!member_match)
@@ -322,7 +322,7 @@
* @return 0 on NO, 1 for YES
*/
int
-node_should_start(uint64_t nodeid, cluster_member_list_t *membership,
+node_should_start(int nodeid, cluster_member_list_t *membership,
char *rg_name, fod_t **domains)
{
char *nodename = NULL;
--- cluster/rgmanager/src/daemons/groups.c 2006/09/01 19:02:22 1.22
+++ cluster/rgmanager/src/daemons/groups.c 2006/09/27 16:28:41 1.23
@@ -273,7 +273,7 @@
* local start.
*/
if (svcStatus->rs_state == RG_STATE_STARTED &&
- svcStatus->rs_owner != mp->cn_nodeid)
+ svcStatus->rs_owner == mp->cn_nodeid)
return;
if (svcStatus->rs_state == RG_STATE_DISABLED)
--- cluster/rgmanager/src/daemons/main.c 2006/09/01 19:02:22 1.31
+++ cluster/rgmanager/src/daemons/main.c 2006/09/27 16:28:41 1.32
@@ -41,13 +41,13 @@
int configure_logging(int ccsfd, int debug);
-void node_event(int, uint64_t, int);
-void node_event_q(int, uint64_t, int);
+void node_event(int, int, int, int);
+void node_event_q(int, int, int, int);
int daemon_init(char *);
int init_resource_groups(int);
void kill_resource_groups(void);
void set_my_id(int);
-int eval_groups(int, uint64_t, int);
+int eval_groups(int, int, int);
void flag_shutdown(int sig);
void hard_exit(void);
int send_rg_states(msgctx_t *, int);
@@ -60,7 +60,7 @@
static int signalled = 0;
static int port = RG_PORT;
-uint64_t next_node_id(cluster_member_list_t *membership, uint64_t me);
+int next_node_id(cluster_member_list_t *membership, int me);
int rg_event_q(char *svcName, uint32_t state, int owner);
@@ -190,7 +190,7 @@
clulog(LOG_INFO, "State change: LOCAL OFFLINE\n");
if (node_delta)
free_member_list(node_delta);
- node_event(1, my_id(), 0);
+ node_event(1, my_id(), 0, 0);
/* NOTREACHED */
}
@@ -202,9 +202,9 @@
locked. This is just a performance thing */
if (!rg_locked()) {
node_event_q(0, node_delta->cml_members[x].cn_nodeid,
- 0);
+ 0, 0);
} else {
- clulog(LOG_NOTICE, "Not taking action - services"
+ clulog(LOG_DEBUG, "Not taking action - services"
" locked\n");
}
}
@@ -219,7 +219,7 @@
me = memb_online(node_delta, my_id());
if (me) {
clulog(LOG_INFO, "State change: Local UP\n");
- node_event_q(1, my_id(), 1);
+ node_event_q(1, my_id(), 1, 1);
}
for (x=0; node_delta && x < node_delta->cml_count; x++) {
@@ -232,7 +232,7 @@
clulog(LOG_INFO, "State change: %s UP\n",
node_delta->cml_members[x].cn_name);
- node_event_q(0, node_delta->cml_members[x].cn_nodeid, 1);
+ node_event_q(0, node_delta->cml_members[x].cn_nodeid, 1, 1);
}
free_member_list(node_delta);
@@ -490,7 +490,13 @@
break;
case RG_EXITING:
- clulog(LOG_NOTICE, "Member %d is going offline\n", (int)nodeid);
+ if (!member_online(msg_hdr->gh_arg1))
+ break;
+
+ clulog(LOG_NOTICE, "Member %d shutting down\n",
+ msg_hdr->gh_arg1);
+ member_set_state(msg_hdr->gh_arg1, 0);
+ node_event_q(0, msg_hdr->gh_arg1, 0, 1);
break;
case VF_MESSAGE:
--- cluster/rgmanager/src/daemons/nodeevent.c 2006/07/11 23:52:41 1.3
+++ cluster/rgmanager/src/daemons/nodeevent.c 2006/09/27 16:28:41 1.4
@@ -27,8 +27,9 @@
typedef struct __ne_q {
list_head();
int ne_local;
- uint64_t ne_nodeid;
+ int ne_nodeid;
int ne_state;
+ int ne_clean;
} nevent_t;
/**
@@ -37,7 +38,7 @@
static nevent_t *event_queue = NULL;
static pthread_mutex_t ne_queue_mutex = PTHREAD_MUTEX_INITIALIZER;
static pthread_t ne_thread = 0;
-int ne_queue_request(int local, uint64_t nodeid, int state);
+int ne_queue_request(int local, int nodeid, int state);
void hard_exit(void);
int init_resource_groups(int);
@@ -59,7 +60,7 @@
@see eval_groups
*/
void
-node_event(int local, uint64_t nodeID, int nodeStatus)
+node_event(int local, int nodeID, int nodeStatus, int clean)
{
if (!running)
return;
@@ -136,7 +137,7 @@
{
cman_handle_t ch;
int fenced = 0;
- uint64_t fence_time;
+ int fence_time;
ch = cman_init(NULL);
if (cman_get_fenceinfo(ch, nodeid, &fence_time, &fenced, NULL) < 0)
@@ -163,7 +164,8 @@
break; /* We're outta here */
pthread_mutex_unlock(&ne_queue_mutex);
- if (ev->ne_state == 0 && node_has_fencing(ev->ne_nodeid)) {
+ if (ev->ne_state == 0 && !ev->ne_clean &&
+ node_has_fencing(ev->ne_nodeid)) {
notice = 0;
while (!node_fenced(ev->ne_nodeid)) {
if (!notice) {
@@ -179,7 +181,8 @@
"continuing\n", ev->ne_nodeid);
}
- node_event(ev->ne_local, ev->ne_nodeid, ev->ne_state);
+ node_event(ev->ne_local, ev->ne_nodeid, ev->ne_state,
+ ev->ne_clean);
free(ev);
}
@@ -192,7 +195,7 @@
void
-node_event_q(int local, uint64_t nodeID, int state)
+node_event_q(int local, int nodeID, int state, int clean)
{
nevent_t *ev;
pthread_attr_t attrs;
@@ -210,6 +213,7 @@
ev->ne_state = state;
ev->ne_local = local;
ev->ne_nodeid = nodeID;
+ ev->ne_clean = clean;
pthread_mutex_lock (&ne_queue_mutex);
list_insert(&event_queue, ev);
--- cluster/rgmanager/src/daemons/rg_state.c 2006/09/27 12:26:17 1.21
+++ cluster/rgmanager/src/daemons/rg_state.c 2006/09/27 16:28:41 1.22
@@ -38,7 +38,7 @@
int node_should_start_safe(uint32_t, cluster_member_list_t *, char *);
-uint32_t next_node_id(cluster_member_list_t *membership, uint32_t me);
+int next_node_id(cluster_member_list_t *membership, int me);
int rg_exec_script(char *rgname, char *script, char *action);
static int _svc_stop_finish(char *svcName, int failed, uint32_t newstate);
@@ -50,11 +50,11 @@
int group_migratory(char *servicename);
-uint32_t
-next_node_id(cluster_member_list_t *membership, uint32_t me)
+int
+next_node_id(cluster_member_list_t *membership, int me)
{
- uint32_t low = (uint32_t)(-1);
- uint32_t next = me, curr;
+ int low = (int)(-1);
+ int next = me, curr;
int x;
for (x = 0; x < membership->cml_count; x++) {
--- cluster/rgmanager/src/utils/clustat.c 2006/09/01 19:02:22 1.20
+++ cluster/rgmanager/src/utils/clustat.c 2006/09/27 16:28:41 1.21
@@ -243,7 +243,7 @@
m = memb_name_to_p(these, all->cml_members[x].cn_name);
- if (m) {
+ if (m && m->cn_member) {
all->cml_members[x].cn_nodeid = m->cn_nodeid;
all->cml_members[x].cn_member |= flag;
}
@@ -299,7 +299,7 @@
char *
-my_memb_id_to_name(cluster_member_list_t *members, uint64_t memb_id)
+my_memb_id_to_name(cluster_member_list_t *members, int memb_id)
{
int x;
@@ -637,6 +637,7 @@
/* Grab the local node ID and flag it from the list of reported
online nodes */
*lid = get_my_nodeid(ch);
+ /* */
for (x=0; x<all->cml_count; x++) {
if (all->cml_members[x].cn_nodeid == *lid) {
m = &all->cml_members[x];
@@ -677,7 +678,7 @@
int local_node_id;
int fast = 0;
int runtype = 0;
- cman_handle_t ch;
+ cman_handle_t ch = NULL;
int refresh_sec = 0, errors = 0;
int opt, xml = 0, flags = 0;
More information about the Cluster-devel
mailing list