[Cluster-devel] cluster/rgmanager ChangeLog include/members.h ...

lhh at sourceware.org lhh at sourceware.org
Wed Sep 27 16:28:42 UTC 2006


CVSROOT:	/cvs/cluster
Module name:	cluster
Changes by:	lhh at sourceware.org	2006-09-27 16:28:41

Modified files:
	rgmanager      : ChangeLog 
	rgmanager/include: members.h reslist.h 
	rgmanager/src/clulib: members.c 
	rgmanager/src/daemons: fo_domain.c groups.c main.c nodeevent.c 
	                       rg_state.c 
	rgmanager/src/utils: clustat.c 

Log message:
	Fix various bugs, incl. 208011, 203762

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/ChangeLog.diff?cvsroot=cluster&r1=1.24&r2=1.25
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/members.h.diff?cvsroot=cluster&r1=1.2&r2=1.3
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/reslist.h.diff?cvsroot=cluster&r1=1.14&r2=1.15
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/clulib/members.c.diff?cvsroot=cluster&r1=1.3&r2=1.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/fo_domain.c.diff?cvsroot=cluster&r1=1.10&r2=1.11
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/groups.c.diff?cvsroot=cluster&r1=1.22&r2=1.23
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/main.c.diff?cvsroot=cluster&r1=1.31&r2=1.32
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/nodeevent.c.diff?cvsroot=cluster&r1=1.3&r2=1.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/rg_state.c.diff?cvsroot=cluster&r1=1.21&r2=1.22
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/utils/clustat.c.diff?cvsroot=cluster&r1=1.20&r2=1.21

--- cluster/rgmanager/ChangeLog	2006/09/27 12:26:16	1.24
+++ cluster/rgmanager/ChangeLog	2006/09/27 16:28:41	1.25
@@ -1,6 +1,29 @@
 2006-09-27 Lon Hohberger <lhh at redhat.com>
 	* src/daemons/rg_state.c: Fix #208011 - failed->disabled state 
-	transition
+	transition.  Fix node ID type.
+	* include/members.h: Add a third state to note that a node does
+	not need to be fenced as a result of rgmanager crashing.  Add protos
+	for memb_set_state and member_online functions.
+	* include/reslist.h: Fix type error; node IDs are 32-bit ints, not 
+	64-bit ints.
+	* src/clulib/members.c: Add member_set_state/online functions for
+	quick checks and sets of the protected member list.  Zero out
+	the node structure prior to calling cman_get_nodeid so that we 
+	don't get ENOENT (fixed clustat bug where clustat didn't report
+	the "local" flag).  Fix node ID type.
+	* src/daemons/fo_domain.c: Fix node ID type, fix shadow declaration
+	of member_online
+	* src/daemons/sgroups.c: Unfix logic error that wasn't broken in the
+	first place.
+	* src/daemons/main.c: Fix node ID types.  Add fourth ("clean")
+	param to node_event* to decide whether a node death is clean or not.
+	Nodes get marked clean if we get an RG_EXITING message, otherwise,
+	they are unclean, and we wait for fencing.  
+	* src/daemons/nodeevent.c: Add fourth param to node_event* to help
+	decide if we need to wait for a node to be fenced or not.  Fix
+	node ID type.
+	* src/utils/clustat.c: Fix logic error preventing nodes from properly
+	being flagged.
 
 2006-09-01 Lon Hohberger <lhh at redhat.com>
 	* include/resgroup.h: Add proto for rg_strerror
--- cluster/rgmanager/include/members.h	2006/07/12 14:04:06	1.2
+++ cluster/rgmanager/include/members.h	2006/09/27 16:28:41	1.3
@@ -3,10 +3,19 @@
 
 #include <rg_types.h>
 
+typedef enum {
+	NODE_STATE_DOWN = 0,
+	NODE_STATE_UP = 1,
+	NODE_STATE_CLEAN = 2
+} node_state_t;
+
+
 int get_my_nodeid(cman_handle_t h);
 int my_id(void);
 cluster_member_list_t * get_member_list(cman_handle_t h);
 void free_member_list(cluster_member_list_t *ml);
+void member_set_state(int nodeid, int state);
+int member_online(int nodeid);
 int memb_online(cluster_member_list_t *ml, int nodeid);
 int memb_online_name(cluster_member_list_t *ml, char *name);
 int memb_name_to_id(cluster_member_list_t *ml, char *name);
--- cluster/rgmanager/include/reslist.h	2006/07/19 18:43:32	1.14
+++ cluster/rgmanager/include/reslist.h	2006/09/27 16:28:41	1.15
@@ -196,7 +196,7 @@
 int construct_domains(int ccsfd, fod_t **domains);
 void deconstruct_domains(fod_t **domains);
 void print_domains(fod_t **domains);
-int node_should_start(uint64_t nodeid, cluster_member_list_t *membership,
+int node_should_start(int nodeid, cluster_member_list_t *membership,
 		      char *rg_name, fod_t **domains);
 
 
--- cluster/rgmanager/src/clulib/members.c	2006/08/09 21:48:34	1.3
+++ cluster/rgmanager/src/clulib/members.c	2006/09/27 16:28:41	1.4
@@ -66,6 +66,7 @@
 get_my_nodeid(cman_handle_t h)
 {
 	cman_node_t node;
+	memset(&node,0,sizeof(node));
 
 	if (cman_get_node(h, CMAN_NODEID_US, &node) != 0)
 		return -1;
@@ -212,8 +213,51 @@
 }
 
 
+void
+member_set_state(int nodeid, int state)
+{
+	int x = 0;
+
+	pthread_rwlock_wrlock(&memblock);
+	if (!membership) {
+		pthread_rwlock_unlock(&memblock);
+		return;
+	}
+
+	for (x = 0; x < membership->cml_count; x++) {
+		if (membership->cml_members[x].cn_nodeid == nodeid)
+			membership->cml_members[x].cn_member = state;
+	}
+	pthread_rwlock_unlock(&memblock);
+}
+
+
+int
+member_online(int nodeid)
+{
+	int x = 0, ret = 0;
+
+	pthread_rwlock_rdlock(&memblock);
+	if (!membership) {
+		pthread_rwlock_unlock(&memblock);
+		return 0;
+	}
+
+	for (x = 0; x < membership->cml_count; x++) {
+		if (membership->cml_members[x].cn_nodeid == nodeid) {
+			ret = membership->cml_members[x].cn_member;
+			break;
+		}
+	}
+	pthread_rwlock_unlock(&memblock);
+
+	return ret;
+}
+
+
+
 char *
-member_name(uint64_t id, char *buf, int buflen)
+member_name(int id, char *buf, int buflen)
 {
 	char *n;
 
--- cluster/rgmanager/src/daemons/fo_domain.c	2006/07/19 18:43:32	1.10
+++ cluster/rgmanager/src/daemons/fo_domain.c	2006/09/27 16:28:41	1.11
@@ -266,7 +266,7 @@
 node_in_domain(char *nodename, fod_t *domain,
 	       cluster_member_list_t *membership)
 {
-	int member_online = 0, member_match = 0, preferred = 100, myprio = -1;
+	int online = 0, member_match = 0, preferred = 100, myprio = -1;
 	fod_node_t *fodn;
 
 	list_do(&domain->fd_nodes, fodn) {
@@ -283,7 +283,7 @@
 		 * If we get here, we know:
 		 * A member of the domain is online somewhere
 		 */
-		member_online = 1;
+		online = 1;
 		if (!strcmp(nodename, fodn->fdn_name)) {
 			/*
 			 * If we get here, we know:
@@ -297,7 +297,7 @@
 			preferred = fodn->fdn_prio;
 	} while (!list_done(&domain->fd_nodes, fodn));
 
-	if (!member_online)
+	if (!online)
 		return 0;
 
 	if (!member_match)
@@ -322,7 +322,7 @@
  * @return		0 on NO, 1 for YES
  */
 int
-node_should_start(uint64_t nodeid, cluster_member_list_t *membership,
+node_should_start(int nodeid, cluster_member_list_t *membership,
 		  char *rg_name, fod_t **domains)
 {
 	char *nodename = NULL;
--- cluster/rgmanager/src/daemons/groups.c	2006/09/01 19:02:22	1.22
+++ cluster/rgmanager/src/daemons/groups.c	2006/09/27 16:28:41	1.23
@@ -273,7 +273,7 @@
 	 * local start.
 	 */
 	if (svcStatus->rs_state == RG_STATE_STARTED &&
-	    svcStatus->rs_owner != mp->cn_nodeid)
+	    svcStatus->rs_owner == mp->cn_nodeid)
 		return;
 
 	if (svcStatus->rs_state == RG_STATE_DISABLED)
--- cluster/rgmanager/src/daemons/main.c	2006/09/01 19:02:22	1.31
+++ cluster/rgmanager/src/daemons/main.c	2006/09/27 16:28:41	1.32
@@ -41,13 +41,13 @@
 
 int configure_logging(int ccsfd, int debug);
 
-void node_event(int, uint64_t, int);
-void node_event_q(int, uint64_t, int);
+void node_event(int, int, int, int);
+void node_event_q(int, int, int, int);
 int daemon_init(char *);
 int init_resource_groups(int);
 void kill_resource_groups(void);
 void set_my_id(int);
-int eval_groups(int, uint64_t, int);
+int eval_groups(int, int, int);
 void flag_shutdown(int sig);
 void hard_exit(void);
 int send_rg_states(msgctx_t *, int);
@@ -60,7 +60,7 @@
 static int signalled = 0;
 static int port = RG_PORT;
 
-uint64_t next_node_id(cluster_member_list_t *membership, uint64_t me);
+int next_node_id(cluster_member_list_t *membership, int me);
 int rg_event_q(char *svcName, uint32_t state, int owner);
 
 
@@ -190,7 +190,7 @@
 		clulog(LOG_INFO, "State change: LOCAL OFFLINE\n");
 		if (node_delta)
 			free_member_list(node_delta);
-		node_event(1, my_id(), 0);
+		node_event(1, my_id(), 0, 0);
 		/* NOTREACHED */
 	}
 
@@ -202,9 +202,9 @@
 		   locked.  This is just a performance thing */
 		if (!rg_locked()) {
 			node_event_q(0, node_delta->cml_members[x].cn_nodeid,
-			     		0);
+				     0, 0);
 		} else {
-			clulog(LOG_NOTICE, "Not taking action - services"
+			clulog(LOG_DEBUG, "Not taking action - services"
 			       " locked\n");
 		}
 	}
@@ -219,7 +219,7 @@
 	me = memb_online(node_delta, my_id());
 	if (me) {
 		clulog(LOG_INFO, "State change: Local UP\n");
-		node_event_q(1, my_id(), 1);
+		node_event_q(1, my_id(), 1, 1);
 	}
 
 	for (x=0; node_delta && x < node_delta->cml_count; x++) {
@@ -232,7 +232,7 @@
 
 		clulog(LOG_INFO, "State change: %s UP\n",
 		       node_delta->cml_members[x].cn_name);
-		node_event_q(0, node_delta->cml_members[x].cn_nodeid, 1);
+		node_event_q(0, node_delta->cml_members[x].cn_nodeid, 1, 1);
 	}
 
 	free_member_list(node_delta);
@@ -490,7 +490,13 @@
 		break;
 
 	case RG_EXITING:
-		clulog(LOG_NOTICE, "Member %d is going offline\n", (int)nodeid);
+		if (!member_online(msg_hdr->gh_arg1))
+			break;
+
+		clulog(LOG_NOTICE, "Member %d shutting down\n",
+		       msg_hdr->gh_arg1);
+	       	member_set_state(msg_hdr->gh_arg1, 0);
+		node_event_q(0, msg_hdr->gh_arg1, 0, 1);
 		break;
 
 	case VF_MESSAGE:
--- cluster/rgmanager/src/daemons/nodeevent.c	2006/07/11 23:52:41	1.3
+++ cluster/rgmanager/src/daemons/nodeevent.c	2006/09/27 16:28:41	1.4
@@ -27,8 +27,9 @@
 typedef struct __ne_q {
 	list_head();
 	int ne_local;
-	uint64_t ne_nodeid;
+	int ne_nodeid;
 	int ne_state;
+	int ne_clean;
 } nevent_t;
 
 /**
@@ -37,7 +38,7 @@
 static nevent_t *event_queue = NULL;
 static pthread_mutex_t ne_queue_mutex = PTHREAD_MUTEX_INITIALIZER;
 static pthread_t ne_thread = 0;
-int ne_queue_request(int local, uint64_t nodeid, int state);
+int ne_queue_request(int local, int nodeid, int state);
 
 void hard_exit(void);
 int init_resource_groups(int);
@@ -59,7 +60,7 @@
   @see eval_groups
  */
 void
-node_event(int local, uint64_t nodeID, int nodeStatus)
+node_event(int local, int nodeID, int nodeStatus, int clean)
 {
 	if (!running)
 		return;
@@ -136,7 +137,7 @@
 {
 	cman_handle_t ch;
 	int fenced = 0;
-	uint64_t fence_time;
+	int fence_time;
 
 	ch = cman_init(NULL);
 	if (cman_get_fenceinfo(ch, nodeid, &fence_time, &fenced, NULL) < 0)
@@ -163,7 +164,8 @@
 			break; /* We're outta here */
 		pthread_mutex_unlock(&ne_queue_mutex);
 
-		if (ev->ne_state == 0 && node_has_fencing(ev->ne_nodeid)) {
+		if (ev->ne_state == 0 && !ev->ne_clean &&
+		    node_has_fencing(ev->ne_nodeid)) {
 			notice = 0;
 			while (!node_fenced(ev->ne_nodeid)) {
 				if (!notice) {
@@ -179,7 +181,8 @@
 				       "continuing\n", ev->ne_nodeid);
 		}
 
-		node_event(ev->ne_local, ev->ne_nodeid, ev->ne_state);
+		node_event(ev->ne_local, ev->ne_nodeid, ev->ne_state,
+			   ev->ne_clean);
 
 		free(ev);
 	}
@@ -192,7 +195,7 @@
 
 
 void
-node_event_q(int local, uint64_t nodeID, int state)
+node_event_q(int local, int nodeID, int state, int clean)
 {
 	nevent_t *ev;
 	pthread_attr_t attrs;
@@ -210,6 +213,7 @@
 	ev->ne_state = state;
 	ev->ne_local = local;
 	ev->ne_nodeid = nodeID;
+	ev->ne_clean = clean;
 
 	pthread_mutex_lock (&ne_queue_mutex);
 	list_insert(&event_queue, ev);
--- cluster/rgmanager/src/daemons/rg_state.c	2006/09/27 12:26:17	1.21
+++ cluster/rgmanager/src/daemons/rg_state.c	2006/09/27 16:28:41	1.22
@@ -38,7 +38,7 @@
 
 int node_should_start_safe(uint32_t, cluster_member_list_t *, char *);
 
-uint32_t next_node_id(cluster_member_list_t *membership, uint32_t me);
+int next_node_id(cluster_member_list_t *membership, int me);
 
 int rg_exec_script(char *rgname, char *script, char *action);
 static int _svc_stop_finish(char *svcName, int failed, uint32_t newstate);
@@ -50,11 +50,11 @@
 int group_migratory(char *servicename);
 
 
-uint32_t
-next_node_id(cluster_member_list_t *membership, uint32_t me)
+int 
+next_node_id(cluster_member_list_t *membership, int me)
 {
-	uint32_t low = (uint32_t)(-1);
-	uint32_t next = me, curr;
+	int low = (int)(-1);
+	int next = me, curr;
 	int x;
 
 	for (x = 0; x < membership->cml_count; x++) {
--- cluster/rgmanager/src/utils/clustat.c	2006/09/01 19:02:22	1.20
+++ cluster/rgmanager/src/utils/clustat.c	2006/09/27 16:28:41	1.21
@@ -243,7 +243,7 @@
 
 		m = memb_name_to_p(these, all->cml_members[x].cn_name);
 
-		if (m) {
+		if (m && m->cn_member) {
 			all->cml_members[x].cn_nodeid = m->cn_nodeid;
 			all->cml_members[x].cn_member |= flag;
 		}
@@ -299,7 +299,7 @@
 
 
 char *
-my_memb_id_to_name(cluster_member_list_t *members, uint64_t memb_id)
+my_memb_id_to_name(cluster_member_list_t *members, int memb_id)
 {
 	int x;
 
@@ -637,6 +637,7 @@
 	/* Grab the local node ID and flag it from the list of reported
 	   online nodes */
 	*lid = get_my_nodeid(ch);
+	/* */
 	for (x=0; x<all->cml_count; x++) {
 		if (all->cml_members[x].cn_nodeid == *lid) {
 			m = &all->cml_members[x];
@@ -677,7 +678,7 @@
 	int local_node_id;
 	int fast = 0;
 	int runtype = 0;
-	cman_handle_t ch;
+	cman_handle_t ch = NULL;
 
 	int refresh_sec = 0, errors = 0;
 	int opt, xml = 0, flags = 0;




More information about the Cluster-devel mailing list