[Cluster-devel] cluster/rgmanager ChangeLog TODO include/res-o ...

lhh at sourceware.org lhh at sourceware.org
Tue Dec 18 17:53:00 UTC 2007


CVSROOT:	/cvs/cluster
Module name:	cluster
Branch: 	RHEL5
Changes by:	lhh at sourceware.org	2007-12-18 17:52:56

Modified files:
	rgmanager      : ChangeLog TODO 
	rgmanager/include: res-ocf.h resgroup.h reslist.h 
	                   restart_counter.h rg_locks.h rg_queue.h 
	rgmanager/src/clulib: Makefile members.c rg_strings.c vft.c 
	rgmanager/src/daemons: Makefile fo_domain.c groups.c main.c 
	                       resrules.c rg_event.c rg_forward.c 
	                       rg_state.c rg_thread.c test.c 
	rgmanager/src/resources: default_event_script.sl service.sh 
	rgmanager/src/utils: clustat.c 
Added files:
	rgmanager      : event-script.txt 
	rgmanager/include: ds.h event.h sets.h 
	rgmanager/src/clulib: sets.c 
	rgmanager/src/daemons: event_config.c service_op.c slang_event.c 

Log message:
	Merge RIND 0.8.1 + bugfixes from HEAD to RHEL5 branch

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/event-script.txt.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=NONE&r2=1.1.2.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/ChangeLog.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.31.2.31&r2=1.31.2.32
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/TODO.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.8&r2=1.8.2.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/ds.h.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=NONE&r2=1.1.2.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/event.h.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=NONE&r2=1.1.2.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/sets.h.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=NONE&r2=1.1.2.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/res-ocf.h.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.1.16.2&r2=1.1.16.3
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/resgroup.h.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.15.2.8&r2=1.15.2.9
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/reslist.h.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.15.2.7&r2=1.15.2.8
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/restart_counter.h.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.1.2.1&r2=1.1.2.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/rg_locks.h.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.2.2.1&r2=1.2.2.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/rg_queue.h.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.6&r2=1.6.2.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/clulib/sets.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=NONE&r2=1.1.2.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/clulib/Makefile.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.10.2.3&r2=1.10.2.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/clulib/members.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.4&r2=1.4.2.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/clulib/rg_strings.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.5.2.5&r2=1.5.2.6
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/clulib/vft.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.17.2.4&r2=1.17.2.5
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/event_config.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=NONE&r2=1.1.2.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/service_op.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=NONE&r2=1.2.2.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/slang_event.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=NONE&r2=1.3.2.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/Makefile.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.14.2.4&r2=1.14.2.5
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/fo_domain.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.11.2.1&r2=1.11.2.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/groups.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.25.2.13&r2=1.25.2.14
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/main.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.34.2.10&r2=1.34.2.11
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/resrules.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.16.2.8&r2=1.16.2.9
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/rg_event.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.1.2.1&r2=1.1.2.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/rg_forward.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.8.2.3&r2=1.8.2.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/rg_state.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.24.2.14&r2=1.24.2.15
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/rg_thread.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.15.2.9&r2=1.15.2.10
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/test.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.6.2.6&r2=1.6.2.7
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/default_event_script.sl.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.1.2.1&r2=1.1.2.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/service.sh.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.7.2.8&r2=1.7.2.9
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/utils/clustat.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.25.2.9&r2=1.25.2.10

/cvs/cluster/cluster/rgmanager/event-script.txt,v  -->  standard output
revision 1.1.2.1
--- cluster/rgmanager/event-script.txt
+++ -	2007-12-18 17:52:56.910998000 +0000
@@ -0,0 +1,306 @@
+TODO:
+* Return correct error codes to clusvcadm (currently it always returns
+  "Unknown")
+* Write glue for 'migrate' operations and migrate-enabled services
+
+Basic configuration specification:
+
+  <rm>
+    <events>
+      <event class="node"/>        <!-- all node events -->
+      <event class="node"
+             node="bar"/>     <!-- events concerning 'bar' -->
+      <event class="node"
+             node="foo"
+             node_state="up"/>     <!-- 'up' events for 'foo' -->
+      <event class="node"
+             node_id="3"
+             node_state="down"/>   <!-- 'down' events for node ID 3 -->
+
+          (note, all service ops and such deal with node ID, not
+           with node names)
+
+      <event class="service"/>     <!-- all service events-->
+      <event class="service"
+             service_name="A"/>    <!-- events concerning 'A' -->
+      <event class="service"
+             service_name="B"
+	     service_state="started"/> <!-- when 'B' is started... -->
+      <event class="service"
+             service_name="B"
+	     service_state="started"/>
+	     service_owner="3"/> <!-- when 'B' is started on node 3... -->
+
+      <event class="service"
+             priority="1"
+	     service_state="started"/>
+	     service_owner="3"/> <!-- when 'B' is started on node 3, do this
+				      before the other event handlers ... -->
+
+
+    </events>
+    ...
+  </rm>
+
+General globals available from all scripts:
+
+   node_self - local node ID
+   event_type - event class, either:
+       EVENT_NONE - unspecified / unknown
+       EVENT_NODE - node transition
+       EVENT_SERVICE - service transition
+       EVENT_USER - a user-generated request
+       EVENT_CONFIG - [NOT CONFIGURABLE]
+
+Node event globals (i.e. when event_type == EVENT_NODE):
+  
+   node_id - node ID which is transitioning
+   node_name - name of node which is transitioning
+   node_state - new node state (NODE_ONLINE or NODE_OFFLINE, or if you prefer,
+                1 or 0, respectively)
+   node_clean - 0 if the node has not been fenced, 1 if the node has been
+                fenced
+
+Service event globals (i.e. when event_type == EVENT_SERVICE):
+
+   service_name - Name of service which transitioned
+   service_state - new state of service
+   service_owner - new owner of service (or <0 if service is no longer
+		   running)
+   service_last_owner - Last owner of service if known.  Used for when
+                   service_state = "recovering" generally, in order to
+                   apply restart/relocate/disable policy.
+
+User event globals (i.e. when event_type == EVENT_USER):
+
+   service_name - service to perform request upon
+   user_request - request to perform (USER_ENABLE, USER_DISABLE,
+                   USER_STOP, USER_RELOCATE, [TODO] USER_MIGRATE)
+   user_target - target node ID if applicable
+
+
+Scripting functions - Informational:
+
+  node_list = nodes_online();
+
+	Returns a list of all online nodes.
+
+  service_list = service_list();
+
+	Returns a list of all configured services.
+
+  (restarts, last_owner, owner, state) = service_status(service_name);
+
+	Returns the state, owner, last_owner, and restarts.  Note that
+	all return values are optional, but are right-justified per S-Lang
+	specification.  This means if you only want the 'state', you can use:
+	
+	(state) = service_status(service_name);
+
+	However, if you need the restart count, you must provide all four 
+	return values as above.
+
+  (nofailback, restricted, ordered, node_list) =
+		service_domain_info(service_name);
+
+	Returns the failover domain specification, if it exists, for the
+	specified service name.  The node list returned is an ordered list
+	according to priority levels.  In the case of unordered domains, 
+	the ordering of the returned list is pseudo-random.
+
+Scripting functions - Operational:
+
+  err = service_start(service_name, node_list, [avoid_list]);
+
+	Start a non-running, (but runnable, i.e. not failed)
+	service on the first node in node_list.  Failing that, start it on
+	the second node in node_list and so forth.  One may also specify
+	an avoid list, but it's better to just use the subtract() function
+	below.  If the start is successful, the node ID running the service
+	is returned.  If the start is unsuccessful, a value < 0 is returned.
+
+  err = service_stop(service_name, [0 = stop, 1 = disable]);
+
+	Stop a running service.  The second parameter is optional, and if
+	non-zero is specified, the service will enter the disabled state.
+
+  ... stuff that's not done but needs to be:
+
+  err = service_relocate(service_name, node_list);
+
+	Move a running service to the specified node_list in order of
+	preference.  In the case of VMs, this is actually a migrate-or-
+	relocate operation.
+
+Utility functions - Node list manipulation
+
+  node_list = union(left_node_list, right_node_list);
+
+	Calculates the union between the two node list, removing duplicates
+	and preserving ordering according to left_node_list.  Any added
+	values from right_node_list will appear in their order, but
+	after left_node_list in the returned list.
+
+  node_list = intersection(left_node_list, right_node_list);
+
+	Calculates the intersection (items in both lists) between the two
+	node lists, removing duplicates and preserving ordering according
+	to left_node_list.  Any added values from right_node_list will
+	appear in their order, but after left_node_list in the returned list.
+
+  node_list = delta(left_node_list, right_node_list);
+
+	Calculates the delta (items not in both lists) between the two
+	node lists, removing duplicates and preserving ordering according
+	to left_node_list.  Any added values from right_node_list will
+	appear in their order, but after left_node_list in the returned list.
+
+  node_list = subtract(left_node_list, right_node_list);
+
+	Removes any duplicates as well as items specified in right_node_list
+	from left_node_list.  Example:
+
+	all_nodes = nodes_online();
+	allowed_nodes = subtract(nodes_online, node_to_avoid);
+
+Utility functions - Logging:
+
+  debug(item1, item2, ...);	LOG_DEBUG level
+  info(...);			LOG_INFO level
+  notice(...);			LOG_NOTICE level
+  warning(...);			LOG_WARNING level
+  err(...);			LOG_ERR level
+  crit(...);			LOG_CRIT level
+  alert(...);			LOG_ALERT level
+  emerg(...);			LOG_EMERG level
+
+	items - These can be strings, integer lists, or integers.  Logging
+		string lists is not supported.
+
+	level - the level is consistent with syslog(8)
+
+  stop_processing();
+
+	Calling this function will prevent further event scripts from being
+	executed on a particular event.  Call this script if, for example,
+	you do not wish for the default event handler to process the event.
+
+	Note: This does NOT terminate the caller script; that is, the
+	script being executed will run to completion.
+
+Event scripts are written in a language called S-Lang; documentation specifics
+about the language are available at http://www.s-lang.org
+
+Example script (creating a follows-but-avoid-after-start behavior):
+%
+% If the main queue server and replication queue server are on the same
+% node, relocate the replication server somewhere else if possible.
+%
+define my_sap_event_trigger()
+{
+	variable state, owner_rep, owner_main;
+	variable nodes, allowed;
+
+	%
+	% If this was a service event, don't execute the default event
+	% script trigger after this script completes.
+	%
+	if (event_type == EVENT_SERVICE) {
+		stop_processing();
+	}
+
+	(owner_main, state) = service_status("service:main_queue");
+	(owner_rep, state) = service_status("service:replication_server");
+
+	if ((event_type == EVENT_NODE) and (owner_main == node_id) and
+	    (node_state == NODE_OFFLINE) and (owner_rep >= 0)) {
+		%
+		% uh oh, the owner of the main server died.  Restart it
+		% on the node running the replication server
+		%
+		notice("Starting Main Queue Server on node ", owner_rep);
+		()=service_start("service:main_queue", owner_rep);
+		return;
+	}
+
+	%
+	% S-Lang doesn't short-circuit prior to 2.1.0
+	%
+	if ((owner_main >= 0) and
+	    ((owner_main == owner_rep) or (owner_rep < 0))) {
+
+		%
+		% Get all online nodes
+		%
+		nodes = nodes_online();
+
+		%
+		% Drop out the owner of the main server
+		%
+		allowed = subtract(nodes, owner_main);
+		if ((owner_rep >= 0) and (length(allowed) == 0)) {
+			%
+			% Only one node is online and the rep server is
+			% already running.  Don't do anything else.
+			%
+			return;
+		}
+
+		if ((length(allowed) == 0) and (owner_rep < 0)) {
+			%
+			% Only node online is the owner ... go ahead
+			% and start it, even though it doesn't increase
+			% availability to do so.
+			%
+			allowed = owner_main;
+		}
+
+		%
+		% Move the replication server off the node that is
+		% running the main server if a node's available.
+		%
+		if (owner_rep >= 0) {
+			()=service_stop("service:replication_server");
+		}
+		()=service_start("service:replication_server", allowed);
+	}
+
+	return;
+}
+
+my_sap_event_trigger();
+
+
+Relevant <rm> section from cluster.conf:
+
+        <rm central_processing="1">
+                <events>
+                        <event name="main-start" class="service"
+				service="service:main_queue"
+				service_state="started"
+				file="/tmp/sap.sl"/>
+                        <event name="rep-start" class="service"
+				service="service:replication_server"
+				service_state="started"
+				file="/tmp/sap.sl"/>
+                        <event name="node-up" node_state="up"
+				class="node"
+				file="/tmp/sap.sl"/>
+
+                </events>
+                <failoverdomains>
+                        <failoverdomain name="all" ordered="1" restricted="1">
+                                <failoverdomainnode name="molly"
+priority="2"/>
+                                <failoverdomainnode name="frederick"
+priority="1"/>
+                        </failoverdomain>
+                </failoverdomains>
+                <resources/>
+                <service name="main_queue"/>
+                <service name="replication_server" autostart="0"/>
+		<!-- replication server is started when main-server start
+		     event completes -->
+        </rm>
+
+
--- cluster/rgmanager/ChangeLog	2007/11/30 19:47:36	1.31.2.31
+++ cluster/rgmanager/ChangeLog	2007/12/18 17:52:55	1.31.2.32
@@ -1,3 +1,8 @@
+2007-12-18 Lon Hohberger <lhh at redhat.com>
+	* Merge RIND 0.8.1 + bugfixes from head branch to RHEL5
+	branch.  Must be explicitly enabled by administrators in
+	cluster.conf.  Migration (clusvcadm -M) not supported yet.
+
 2007-11-30 Lon Hohberger <lhh at redhat.com>
 	* src/resources/clusterfs.sh: Retry mount up to 3 times to avoid
 	race condition during another process mounting a GFS volume
--- cluster/rgmanager/TODO	2006/07/19 18:43:32	1.8
+++ cluster/rgmanager/TODO	2007/12/18 17:52:55	1.8.2.1
@@ -1,5 +0,0 @@
-* Make live-migration of resources work; preferrably so that admins
-can manually migrate Xen VMs to other nodes without telling the cluster
-about it.  That is, the cluster should be able to acquire running VMs
-and update its state accordingly.
-* Test against a working Xen build and shake out bugs
/cvs/cluster/cluster/rgmanager/include/ds.h,v  -->  standard output
revision 1.1.2.1
--- cluster/rgmanager/include/ds.h
+++ -	2007-12-18 17:52:57.216534000 +0000
@@ -0,0 +1,13 @@
+#ifndef _DS_H
+#define _DS_H
+
+int ds_init(void);
+int ds_key_init(char *keyid, int maxsize, int timeout);
+int ds_key_finish(char *keyid);
+int ds_write(char *keyid, void *buf, size_t maxlen);
+int ds_read(char *keyid, void *buf, size_t maxlen);
+int ds_finish(void);
+
+#define DS_MIN_SIZE 512
+
+#endif
/cvs/cluster/cluster/rgmanager/include/event.h,v  -->  standard output
revision 1.1.2.1
--- cluster/rgmanager/include/event.h
+++ -	2007-12-18 17:52:57.295789000 +0000
@@ -0,0 +1,145 @@
+/*
+  Copyright Red Hat, Inc. 2007
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms of the GNU General Public License version 2 as published
+  by the Free Software Foundation.
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; see the file COPYING.  If not, write to the
+  Free Software Foundation, Inc.,  675 Mass Ave, Cambridge, 
+  MA 02139, USA.
+*/
+#ifndef _EVENT_H
+#define _EVENT_H
+
+/* 128 is a bit big, but it should be okay */
+typedef struct __rge_q {
+	char rg_name[128];
+	uint32_t rg_state;
+	uint32_t pad1;
+	int rg_owner;
+	int rg_last_owner;
+} group_event_t;
+
+typedef struct __ne_q {
+	int ne_local;
+	int ne_nodeid;
+	int ne_state;
+	int ne_clean;
+} node_event_t;
+
+typedef struct __cfg_q {
+	int cfg_version;
+	int cfg_oldversion;
+} config_event_t;
+
+typedef struct __user_q {
+	char u_name[128];
+	msgctx_t *u_ctx;
+	int u_request;
+	int u_arg1;
+	int u_arg2;
+	int u_target;		/* Node ID */
+} user_event_t;
+
+typedef enum {
+	EVENT_NONE=0,
+	EVENT_CONFIG,
+	EVENT_NODE,
+	EVENT_RG,
+	EVENT_USER
+} event_type_t;
+
+/* Data that's distributed which indicates which
+   node is the event master */
+typedef struct __rgm {
+	uint32_t m_magic;
+	uint32_t m_nodeid;
+	uint64_t m_master_time;
+	uint8_t  m_reserved[112];
+} event_master_t;
+
+#define swab_event_master_t(ptr) \
+{\
+	swab32((ptr)->m_nodeid);\
+	swab32((ptr)->m_magic);\
+	swab64((ptr)->m_master_time);\
+}
+
+/* Just a magic # to help us ensure we've got good
+   date from VF */
+#define EVENT_MASTER_MAGIC 0xfabab0de
+
+/* Event structure - internal to the event subsystem; use
+   the queueing functions below which allocate this struct
+   and pass it to the event handler */
+typedef struct _event {
+	/* Not used dynamically - part of config info */
+	list_head();
+	char *ev_name;
+	char *ev_script;
+	char *ev_script_file;
+	int ev_prio; 
+	int ev_pad;
+	/* --- end config part */
+	int ev_type;		/* config & generated by rgmanager*/
+	int ev_transaction;
+	union {
+		group_event_t group;
+		node_event_t node;
+		config_event_t config;
+		user_event_t user;
+	} ev;
+} event_t;
+
+#define EVENT_PRIO_COUNT 100
+
+typedef struct _event_table {
+	int max_prio;
+	int pad;
+	event_t *entries[0];
+} event_table_t;
+
+
+int construct_events(int ccsfd, event_table_t **);
+void deconstruct_events(event_table_t **);
+void print_events(event_table_t *);
+
+/* Does the event match a configured event? */
+int event_match(event_t *pattern, event_t *actual);
+
+/* Event queueing functions. */
+void node_event_q(int local, int nodeID, int state, int clean);
+void rg_event_q(char *name, uint32_t state, int owner, int last);
+void user_event_q(char *svc, int request, int arg1, int arg2,
+		  int target, msgctx_t *ctx);
+void config_event_q(int old_version, int new_version);
+
+/* Call this to see if there's a master. */
+int event_master_info_cached(event_master_t *);
+
+/* Call this to get the node ID of the current 
+   master *or* become the master if none exists */
+int event_master(void);
+
+/* Setup */
+int central_events_enabled(void);
+void set_central_events(int flag);
+int slang_process_event(event_table_t *event_table, event_t *ev);
+
+/* For distributed events. */
+void set_transition_throttling(int nsecs);
+
+/* Simplified service start. */
+int service_op_start(char *svcName, int *target_list, int target_list_len,
+		     int *new_owner);
+int service_op_stop(char *svcName, int do_disable, int event_type);
+
+
+#endif
/cvs/cluster/cluster/rgmanager/include/sets.h,v  -->  standard output
revision 1.1.2.1
--- cluster/rgmanager/include/sets.h
+++ -	2007-12-18 17:52:57.377956000 +0000
@@ -0,0 +1,39 @@
+/*
+  Copyright Red Hat, Inc. 2007
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms of the GNU General Public License version 2 as published
+  by the Free Software Foundation.
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; see the file COPYING.  If not, write to the
+  Free Software Foundation, Inc.,  675 Mass Ave, Cambridge, 
+  MA 02139, USA.
+*/
+/**
+ @file sets.h - Header file for sets.c
+ @author Lon Hohberger <lhh at redhat.com>
+ */
+#ifndef _SETS_H
+#define _SETS_H
+
+/* #include <stdint.h> */
+typedef int set_type_t;
+
+int s_add(set_type_t *, int *, set_type_t);
+int s_union(set_type_t *, int, set_type_t *,
+	    int, set_type_t **, int *);
+
+int s_intersection(set_type_t *, int, set_type_t *,
+		   int, set_type_t **, int *);
+int s_delta(set_type_t *, int, set_type_t *,
+	    int, set_type_t **, int *);
+int s_subtract(set_type_t *, int, set_type_t *, int, set_type_t **, int *);
+int s_shuffle(set_type_t *, int);
+
+#endif
--- cluster/rgmanager/include/res-ocf.h	2007/07/31 17:54:54	1.1.16.2
+++ cluster/rgmanager/include/res-ocf.h	2007/12/18 17:52:56	1.1.16.3
@@ -31,6 +31,7 @@
 #define OCF_RESOURCE_INSTANCE_STR "OCF_RESOURCE_INSTANCE"
 #define OCF_CHECK_LEVEL_STR "OCF_CHECK_LEVEL"
 #define OCF_RESOURCE_TYPE_STR "OCF_RESOURCE_TYPE"
+#define OCF_RECFNT_STR "OCF_RESKEY_RGMANAGER_meta_refcnt"
 
 /*
    LSB return codes 
--- cluster/rgmanager/include/resgroup.h	2007/06/29 19:22:11	1.15.2.8
+++ cluster/rgmanager/include/resgroup.h	2007/12/18 17:52:56	1.15.2.9
@@ -50,9 +50,16 @@
 
 
 #define RG_PORT    177
+
+/* Constants moved to src/clulib/constants.c */
+/* DO NOT EDIT */
 #define RG_MAGIC   0x11398fed
 
 #define RG_ACTION_REQUEST	/* Message header */ 0x138582
+/* Argument to RG_ACTION_REQUEST */
+#define RG_ACTION_MASTER	0xfe0db143
+#define RG_ACTION_USER		0x3f173bfd
+/* */
 #define RG_EVENT		0x138583
 
 /* Requests */
@@ -109,6 +116,7 @@
 #define DEFAULT_CHECK_INTERVAL		10
 
 const char *rg_state_str(int val);
+int rg_state_str_to_id(const char *val);
 const char *agent_op_str(int val);
 
 int eval_groups(int local, uint32_t nodeid, int nodeStatus);
@@ -118,19 +126,22 @@
 int group_op(char *rgname, int op);
 void rg_init(void);
 
-/* FOOM */
+/* Basic service operations */
 int svc_start(char *svcName, int req);
 int svc_stop(char *svcName, int error);
 int svc_status(char *svcName);
+int svc_status_inquiry(char *svcName);
 int svc_disable(char *svcName);
 int svc_fail(char *svcName);
 int svc_migrate(char *svcName, int target);
+int check_restart(char *svcName);
+
 int rt_enqueue_request(const char *resgroupname, int request,
 		       msgctx_t *resp_ctx,
        		       int max, uint32_t target, int arg0, int arg1);
 
 void send_response(int ret, int node, request_t *req);
-void send_ret(msgctx_t *ctx, char *name, int ret, int req);
+void send_ret(msgctx_t *ctx, char *name, int ret, int req, int newowner);
 
 /* do this op on all resource groups.  The handler for the request 
    will sort out whether or not it's a valid request given the state */
@@ -141,6 +152,7 @@
 /* from rg_state.c */
 int set_rg_state(char *name, rg_state_t *svcblk);
 int get_rg_state(char *servicename, rg_state_t *svcblk);
+int get_rg_state_local(char *servicename, rg_state_t *svcblk);
 uint32_t best_target_node(cluster_member_list_t *allowed, uint32_t owner,
 			  char *rg_name, int lock);
 
@@ -165,6 +177,14 @@
 int my_id(void);
 
 /* Return codes */
+#define RG_EEXCL	-16		/* Service not runnable due to
+					   the fact that it is tagged 
+					   exclusive and there are no
+					   empty nodes. */
+#define RG_EDOMAIN	-15		/* Service not runnable given the
+					   set of nodes and its failover
+					   domain */
+#define RG_ESCRIPT	-14		/* S/Lang script failed */
 #define RG_EFENCE	-13		/* Fencing operation pending */
 #define RG_ENODE	-12		/* Node is dead/nonexistent */
 //#define RG_EFROZEN    -11		/* Forward compat. with -HEAD */
@@ -182,6 +202,7 @@
 #define RG_YES		1
 #define RG_NO		2
 
+
 const char *rg_strerror(int val);
 
 
--- cluster/rgmanager/include/reslist.h	2007/11/26 21:46:26	1.15.2.7
+++ cluster/rgmanager/include/reslist.h	2007/12/18 17:52:56	1.15.2.8
@@ -139,7 +139,7 @@
 	list_head();
 	char	*fdn_name;
 	int	fdn_prio;
-	int	_pad_; /* align */
+	int	fdn_nodeid; /* on rhel4 this will be 64-bit int */
 } fod_node_t;
 
 typedef struct _fod {
@@ -202,6 +202,8 @@
 void print_domains(fod_t **domains);
 int node_should_start(int nodeid, cluster_member_list_t *membership,
 		      char *rg_name, fod_t **domains);
+int node_domain_set(fod_t *domain, int **ret, int *retlen);
+int node_domain_set_safe(char *domainname, int **ret, int *retlen, int *flags);
 
 
 /*
@@ -210,6 +212,7 @@
 resource_t *find_resource_by_ref(resource_t **reslist, char *type, char *ref);
 resource_t *find_root_by_ref(resource_t **reslist, char *ref);
 resource_rule_t *find_rule_by_type(resource_rule_t **rulelist, char *type);
+void res_build_name(char *, size_t, resource_t *);
 
 /*
    Internal functions; shouldn't be needed.
--- cluster/rgmanager/include/restart_counter.h	2007/11/26 21:46:26	1.1.2.1
+++ cluster/rgmanager/include/restart_counter.h	2007/12/18 17:52:56	1.1.2.2
@@ -1,3 +1,22 @@
+/*
+  Copyright Red Hat, Inc. 2007
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms of the GNU General Public License version 2 as published
+  by the Free Software Foundation.
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; see the file COPYING.  If not, write to the
+  Free Software Foundation, Inc.,  675 Mass Ave, Cambridge, 
+  MA 02139, USA.
+*/
+/* Time-based restart counters for rgmanager */
+
 #ifndef _RESTART_COUNTER_H
 #define _RESTART_COUNTER_H
 
--- cluster/rgmanager/include/rg_locks.h	2006/12/18 21:48:48	1.2.2.1
+++ cluster/rgmanager/include/rg_locks.h	2007/12/18 17:52:56	1.2.2.2
@@ -1,3 +1,20 @@
+/*
+  Copyright Red Hat, Inc. 2004-2007
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms of the GNU General Public License version 2 as published
+  by the Free Software Foundation.
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; see the file COPYING.  If not, write to the
+  Free Software Foundation, Inc.,  675 Mass Ave, Cambridge, 
+  MA 02139, USA.
+*/
 #ifndef __RG_LOCKS_H
 #define __RG_LOCKS_H
 
--- cluster/rgmanager/include/rg_queue.h	2006/07/19 18:43:32	1.6
+++ cluster/rgmanager/include/rg_queue.h	2007/12/18 17:52:56	1.6.2.1
@@ -1,3 +1,20 @@
+/*
+  Copyright Red Hat, Inc. 2004-2007
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms of the GNU General Public License version 2 as published
+  by the Free Software Foundation.
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; see the file COPYING.  If not, write to the
+  Free Software Foundation, Inc.,  675 Mass Ave, Cambridge, 
+  MA 02139, USA.
+*/
 #ifndef _RG_QUEUE_H
 #define _RG_QUEUE_H
 #include <list.h>
@@ -19,7 +36,7 @@
 	uint32_t	rr_target;		/** Target node */
 	uint32_t	rr_arg0;		/** Integer argument */
 	uint32_t	rr_arg1;		/** Integer argument */
-	uint32_t	rr_arg3;		/** Integer argument */
+	uint32_t	rr_arg2;		/** Integer argument */
 	uint32_t	rr_line;		/** Line no */
 	msgctx_t *	rr_resp_ctx;		/** FD to send response */
 	char 		*rr_file;		/** Who made req */
@@ -42,5 +59,7 @@
 void rq_free(request_t *foo);
 
 void forward_request(request_t *req);
+void forward_message(msgctx_t *ctx, void *msg, int nodeid);
+
 
 #endif
/cvs/cluster/cluster/rgmanager/src/clulib/sets.c,v  -->  standard output
revision 1.1.2.1
--- cluster/rgmanager/src/clulib/sets.c
+++ -	2007-12-18 17:52:58.080513000 +0000
@@ -0,0 +1,370 @@
+/*
+  Copyright Red Hat, Inc. 2007
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms of the GNU General Public License version 2 as published
+  by the Free Software Foundation.
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; see the file COPYING.  If not, write to the
+  Free Software Foundation, Inc.,  675 Mass Ave, Cambridge, 
+  MA 02139, USA.
+*/
+/**
+ @file sets.c - Order-preserving set functions (union / intersection / delta)
+                (designed for integer types; a la int, uint64_t, etc...)
+ @author Lon Hohberger <lhh at redhat.com>
+ */
+#include <stdio.h>
+#include <malloc.h>
+#include <string.h>
+#include <stdlib.h>
+#include <sets.h>
+#include <sys/time.h>
+
+
+/**
+ Add a value to a set.  This function disregards an add if the value is already
+ in the set.  Note that the maximum length of set s must be preallocated; this
+ function doesn't do error or bounds checking. 
+
+ @param s		Set to modify
+ @param curlen		Current length (modified if added)
+ @param val		Value to add
+ @return		0 if not added, 1 if added
+ */
+int
+s_add(set_type_t *s, int *curlen, set_type_t val)
+{
+	int idx=0;
+
+	for (; idx < *curlen; idx++)
+		if (s[idx] == val)
+			return 0;
+	s[*curlen] = val;
+	++(*curlen);
+	return 1;
+}
+
+
+/**
+ Union-set function.  Allocates and returns a new set which is the union of
+ the two given sets 'left' and 'right'.  Also returns the new set length.
+
+ @param left		Left set - order is preserved on this set; that is,
+			this is the set where the caller cares about ordering.
+ @param ll		Length of left set.
+ @param right		Right set - order is not preserved on this set during
+			the union operation
+ @param rl		Length of right set
+ @param ret		Return set.  Should * not * be preallocated.
+ @param retl		Return set length.  Should be ready to accept 1 integer
+			upon calling this function
+ @return 		0 on success, -1 on error
+ */
+int
+s_union(set_type_t *left, int ll, set_type_t *right, int rl,
+	set_type_t **ret, int *retl)
+{
+	int l, r, cnt = 0, total;
+
+	total = ll + rl; /* Union will never exceed both sets */
+
+	*ret = malloc(sizeof(set_type_t)*total);
+	if (!*ret) {
+		return -1;
+	}
+	memset((void *)(*ret), 0, sizeof(set_type_t)*total);
+
+	cnt = 0;
+
+	/* Add all the ones on the left */
+	for (l = 0; l < ll; l++)
+		s_add(*ret, &cnt, left[l]);
+
+	/* Add the ones on the left */
+	for (r = 0; r < rl; r++)
+		s_add(*ret, &cnt, right[r]);
+
+	*retl = cnt;
+
+	return 0;
+}
+
+
+/**
+ Intersection-set function.  Allocates and returns a new set which is the 
+ intersection of the two given sets 'left' and 'right'.  Also returns the new
+ set length.
+
+ @param left		Left set - order is preserved on this set; that is,
+			this is the set where the caller cares about ordering.
+ @param ll		Length of left set.
+ @param right		Right set - order is not preserved on this set during
+			the union operation
+ @param rl		Length of right set
+ @param ret		Return set.  Should * not * be preallocated.
+ @param retl		Return set length.  Should be ready to accept 1 integer
+			upon calling this function
+ @return 		0 on success, -1 on error
+ */
+int
+s_intersection(set_type_t *left, int ll, set_type_t *right, int rl,
+	       set_type_t **ret, int *retl)
+{
+	int l, r, cnt = 0, total;
+
+	total = ll; /* Intersection will never exceed one of the two set
+		       sizes */
+
+	*ret = malloc(sizeof(set_type_t)*total);
+	if (!*ret) {
+		return -1;
+	}
+	memset((void *)(*ret), 0, sizeof(set_type_t)*total);
+
+	cnt = 0;
+	/* Find duplicates */
+	for (l = 0; l < ll; l++) {
+		for (r = 0; r < rl; r++) {
+			if (left[l] != right[r])
+				continue;
+			if (s_add(*ret, &cnt, right[r]))
+				break;
+		}
+	}
+
+	*retl = cnt;
+	return 0;
+}
+
+
+/**
+ Delta-set function.  Allocates and returns a new set which is the delta (i.e.
+ numbers not in both sets) of the two given sets 'left' and 'right'.  Also
+ returns the new set length.
+
+ @param left		Left set - order is preserved on this set; that is,
+			this is the set where the caller cares about ordering.
+ @param ll		Length of left set.
+ @param right		Right set - order is not preserved on this set during
+			the union operation
+ @param rl		Length of right set
+ @param ret		Return set.  Should * not * be preallocated.
+ @param retl		Return set length.  Should be ready to accept 1 integer
+			upon calling this function
+ @return 		0 on success, -1 on error
+ */
+int
+s_delta(set_type_t *left, int ll, set_type_t *right, int rl,
+	set_type_t **ret, int *retl)
+{
+	int l, r, cnt = 0, total, found;
+
+	total = ll + rl; /* Union will never exceed both sets */
+
+	*ret = malloc(sizeof(set_type_t)*total);
+	if (!*ret) {
+		return -1;
+	}
+	memset((void *)(*ret), 0, sizeof(set_type_t)*total);
+
+	cnt = 0;
+
+	/* not efficient, but it works */
+	/* Add all the ones on the left */
+	for (l = 0; l < ll; l++) {
+		found = 0;
+		for (r = 0; r < rl; r++) {
+			if (right[r] == left[l]) {
+				found = 1;
+				break;
+			}
+		}
+		
+		if (found)
+			continue;
+		s_add(*ret, &cnt, left[l]);
+	}
+
+
+	/* Add all the ones on the right*/
+	for (r = 0; r < rl; r++) {
+		found = 0;
+		for (l = 0; l < ll; l++) {
+			if (right[r] == left[l]) {
+				found = 1;
+				break;
+			}
+		}
+		
+		if (found)
+			continue;
+		s_add(*ret, &cnt, right[r]);
+	}
+
+	*retl = cnt;
+
+	return 0;
+}
+
+
+/**
+ Subtract-set function.  Allocates and returns a new set which is the
+ subtraction of the right set from the left set.
+ Also returns the new set length.
+
+ @param left		Left set - order is preserved on this set; that is,
+			this is the set where the caller cares about ordering.
+ @param ll		Length of left set.
+ @param right		Right set - order is not preserved on this set during
+			the union operation
+ @param rl		Length of right set
+ @param ret		Return set.  Should * not * be preallocated.
+ @param retl		Return set length.  Should be ready to accept 1 integer
+			upon calling this function
+ @return 		0 on success, -1 on error
+ */
+int
+s_subtract(set_type_t *left, int ll, set_type_t *right, int rl,
+	   set_type_t **ret, int *retl)
+{
+	int l, r, cnt = 0, total, found;
+
+	total = ll; /* Union will never exceed left set length*/
+
+	*ret = malloc(sizeof(set_type_t)*total);
+	if (!*ret) {
+		return -1;
+	}
+	memset((void *)(*ret), 0, sizeof(set_type_t)*total);
+
+	cnt = 0;
+
+	/* not efficient, but it works */
+	for (l = 0; l < ll; l++) {
+		found = 0;
+		for (r = 0; r < rl; r++) {
+			if (right[r] == left[l]) {
+				found = 1;
+				break;
+			}
+		}
+		
+		if (found)
+			continue;
+		s_add(*ret, &cnt, left[l]);
+	}
+
+	*retl = cnt;
+
+	return 0;
+}
+
+
+/**
+ Shuffle-set function.  Weakly randomizes ordering of a set in-place.
+
+ @param set		Set to randomize
+ @param sl		Length of set
+ @return		0
+ */
+int
+s_shuffle(set_type_t *set, int sl)
+{
+	int x, newidx;
+	unsigned r_state = 0;
+	set_type_t t;
+	struct timeval tv;
+
+	gettimeofday(&tv, NULL);
+	r_state = (int)(tv.tv_usec);
+
+	for (x = 0; x < sl; x++) {
+		newidx = (rand_r(&r_state) % sl);
+		if (newidx == x)
+			continue;
+		t = set[x];
+		set[x] = set[newidx];
+		set[newidx] = t;
+	}
+
+	return 0;
+}
+
+
+#ifdef STANDALONE
+/* Testbed */
+/*
+  gcc -o sets sets.c -DSTANDALONE -ggdb -I../../include \
+       -Wall -Werror -Wstrict-prototypes -Wextra
+ */
+int
+main(int __attribute__ ((unused)) argc, char __attribute__ ((unused)) **argv)
+{
+	set_type_t a[] = { 1, 2, 3, 3, 3, 2, 2, 3 };
+	set_type_t b[] = { 2, 3, 4 };
+	set_type_t *i;
+	int ilen = 0, x;
+
+	s_union(a, 8, b, 3, &i, &ilen);
+
+	/* Should return length of 4 - { 1 2 3 4 } */
+	printf("set_union [%d] = ", ilen);
+	for ( x = 0; x < ilen; x++) {
+		printf("%d ", (int)i[x]);
+	}
+	printf("\n");
+
+	s_shuffle(i, ilen);
+	printf("shuffled [%d] = ", ilen);
+	for ( x = 0; x < ilen; x++) {
+		printf("%d ", (int)i[x]);
+	}
+	printf("\n");
+
+
+	free(i);
+
+	/* Should return length of 2 - { 2 3 } */
+	s_intersection(a, 8, b, 3, &i, &ilen);
+
+	printf("set_intersection [%d] = ", ilen);
+	for ( x = 0; x < ilen; x++) {
+		printf("%d ", (int)i[x]);
+	}
+	printf("\n");
+
+	free(i);
+
+	/* Should return length of 2 - { 1 4 } */
+	s_delta(a, 8, b, 3, &i, &ilen);
+
+	printf("set_delta [%d] = ", ilen);
+	for ( x = 0; x < ilen; x++) {
+		printf("%d ", (int)i[x]);
+	}
+	printf("\n");
+
+	free(i);
+
+	/* Should return length of 1 - { 1 } */
+	s_subtract(a, 8, b, 3, &i, &ilen);
+
+	printf("set_subtract [%d] = ", ilen);
+	for ( x = 0; x < ilen; x++) {
+		printf("%d ", (int)i[x]);
+	}
+	printf("\n");
+
+	free(i);
+
+
+	return 0;
+}
+#endif
--- cluster/rgmanager/src/clulib/Makefile	2007/07/24 13:53:08	1.10.2.3
+++ cluster/rgmanager/src/clulib/Makefile	2007/12/18 17:52:56	1.10.2.4
@@ -34,7 +34,7 @@
 libclulib.a: clulog.o daemon_init.o signals.o msgsimple.o \
 		gettid.o rg_strings.o message.o members.o fdops.o \
 		lock.o cman.o vft.o msg_cluster.o msg_socket.o \
-		wrap_lock.o tmgr.o
+		wrap_lock.o tmgr.o sets.o constants.o
 	${AR} cru $@ $^
 	ranlib $@
 
--- cluster/rgmanager/src/clulib/members.c	2006/09/27 16:28:41	1.4
+++ cluster/rgmanager/src/clulib/members.c	2007/12/18 17:52:56	1.4.2.1
@@ -233,6 +233,50 @@
 
 
 int
+member_low_id(void)
+{
+	int x = 0, low = -1;
+
+	pthread_rwlock_wrlock(&memblock);
+	if (!membership) {
+		pthread_rwlock_unlock(&memblock);
+		return low;
+	}
+
+	for (x = 0; x < membership->cml_count; x++) {
+		if ((membership->cml_members[x].cn_member) &&
+		    ((membership->cml_members[x].cn_nodeid < low) || (low == -1)))
+			low = membership->cml_members[x].cn_nodeid;
+	}
+	pthread_rwlock_unlock(&memblock);
+
+	return low;
+}
+
+
+int
+member_high_id(void)
+{
+	int x = 0, high = -1;
+
+	pthread_rwlock_wrlock(&memblock);
+	if (!membership) {
+		pthread_rwlock_unlock(&memblock);
+		return high;
+	}
+
+	for (x = 0; x < membership->cml_count; x++) {
+		if (membership->cml_members[x].cn_member &&
+		    (membership->cml_members[x].cn_nodeid > high))
+			high = membership->cml_members[x].cn_nodeid;
+	}
+	pthread_rwlock_unlock(&memblock);
+
+	return high;
+}
+
+
+int
 member_online(int nodeid)
 {
 	int x = 0, ret = 0;
--- cluster/rgmanager/src/clulib/rg_strings.c	2007/07/31 17:54:54	1.5.2.5
+++ cluster/rgmanager/src/clulib/rg_strings.c	2007/12/18 17:52:56	1.5.2.6
@@ -26,6 +26,9 @@
 
 
 const struct string_val rg_error_strings[] = {
+	{ RG_EEXCL,	"Service not runnable: cannot run exclusive" },
+	{ RG_EDOMAIN,   "Service not runnable: restricted failover domain offline" },
+	{ RG_ESCRIPT,   "S/Lang Script Error" },
 	{ RG_EFENCE,    "Fencing operation pending; try again later" },
 	{ RG_ENODE,     "Target node dead / nonexistent" },
 	{ RG_ERUN,      "Service is already running" },
@@ -126,6 +129,21 @@
 }
 
 
+static inline int
+rg_search_table_by_str(const struct string_val *table, const char *val)
+{
+	int x;
+
+	for (x = 0; table[x].str != NULL; x++) {
+		if (!strcasecmp(table[x].str, val))
+			return table[x].val;
+	}
+
+	return -1;
+}
+
+
+
 const char *
 rg_strerror(int val)
 {
@@ -139,6 +157,14 @@
 }
 
 
+int
+rg_state_str_to_id(const char *val)
+{
+	return rg_search_table_by_str(rg_state_strings, val);
+}
+
+
+
 const char *
 rg_req_str(int val)
 {
--- cluster/rgmanager/src/clulib/vft.c	2007/11/14 16:56:50	1.17.2.4
+++ cluster/rgmanager/src/clulib/vft.c	2007/12/18 17:52:56	1.17.2.5
@@ -1734,55 +1734,52 @@
 		}
 		msg_close(&ctx);
 		msg = (vf_msg_t *)gh;
-		break;
-	}
-
-	if (x >= membership->cml_count)
-		return VFR_ERROR;
-
-	/* Uh oh */
-	if (!msg || (msg == &rmsg)) {
-		printf("VF: No valid message\n");
-		return VFR_ERROR;
-	}
-		
-	swab_generic_msg_hdr(&(msg->vm_hdr));
-	if (msg->vm_hdr.gh_command == VF_NACK) {
-		free(msg);
-		return VFR_NODATA;
-	}
 
-	if (msg->vm_hdr.gh_length < sizeof(vf_msg_t)) {
-		fprintf(stderr, "VF: Short reply from %d\n", x);
-		free(msg);
-		return VFR_ERROR;
-	}
-
-	if (msg->vm_hdr.gh_length > n) {
-		fprintf(stderr,"VF: Size mismatch during decode (%d > %d)\n",
-			msg->vm_hdr.gh_length, n);
-		free(msg);
-		return VFR_ERROR;
-	}
+		/* Uh oh */
+		if (!msg || (msg == &rmsg)) {
+			printf("VF: No valid message\n");
+			return VFR_ERROR;
+		}
+		swab_generic_msg_hdr(&(msg->vm_hdr));
+		if (msg->vm_hdr.gh_command == VF_NACK) {
+			free(msg);
+			continue;
+		}
+		if (msg->vm_hdr.gh_length < sizeof(vf_msg_t)) {
+			fprintf(stderr, "VF: Short reply from %d\n", x);
+			free(msg);
+			continue;
+		}
+		if (msg->vm_hdr.gh_length > n) {
+			fprintf(stderr,
+				"VF: Size mismatch during decode (%d > %d)\n",
+				msg->vm_hdr.gh_length, n);
+			free(msg);
+			continue;
+		}
 
-	swab_vf_msg_info_t(&(msg->vm_msg));
+		swab_vf_msg_info_t(&(msg->vm_msg));
 
-	if (msg->vm_msg.vf_datalen != (n - sizeof(*msg))) {
-		fprintf(stderr,"VF: Size mismatch during decode (\n");
-		free(msg);
-		return VFR_ERROR;
-	}
+		if (msg->vm_msg.vf_datalen != (n - sizeof(*msg))) {
+			fprintf(stderr,"VF: Size mismatch during decode (\n");
+			free(msg);
+			continue;
+		}
 
-	if (vf_set_current(keyid, msg->vm_msg.vf_view,
+		/* Ok... we've got data! */
+		if (vf_set_current(keyid, msg->vm_msg.vf_view,
 			   msg->vm_msg.vf_data,
 			   msg->vm_msg.vf_datalen) == VFR_ERROR) {
+			free(msg);
+			return VFR_ERROR;
+		}
+
 		free(msg);
-		return VFR_ERROR;
-	}
 
-	free(msg);
+		return VFR_OK;
+	}
 
-	return VFR_OK;
+	return VFR_NODATA;
 }
 
 
/cvs/cluster/cluster/rgmanager/src/daemons/event_config.c,v  -->  standard output
revision 1.1.2.1
--- cluster/rgmanager/src/daemons/event_config.c
+++ -	2007-12-18 17:52:58.662036000 +0000
@@ -0,0 +1,541 @@
+/**
+  Copyright Red Hat, Inc. 2002-2007
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms of the GNU General Public License version 2 as published
+  by the Free Software Foundation.
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; see the file COPYING.  If not, write to the
+  Free Software Foundation, Inc.,  675 Mass Ave, Cambridge,
+  MA 02139, USA.
+*/
+/** @file
+ * CCS event parsing, based on failover domain parsing
+ */
+#include <string.h>
+#include <list.h>
+#include <clulog.h>
+#include <resgroup.h>
+#include <restart_counter.h>
+#include <reslist.h>
+#include <ccs.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <members.h>
+#include <reslist.h>
+#include <ctype.h>
+#include <event.h>
+
+#define CONFIG_NODE_ID_TO_NAME \
+   "/cluster/clusternodes/clusternode[@nodeid=\"%d\"]/@name"
+#define CONFIG_NODE_NAME_TO_ID \
+   "/cluster/clusternodes/clusternode[@name=\"%s\"]/@nodeid"
+
+void deconstruct_events(event_table_t **);
+void print_event(event_t *ev);
+
+//#define DEBUG
+
+#ifdef DEBUG
+#define ENTER() clulog(LOG_DEBUG, "ENTER: %s\n", __FUNCTION__)
+#define RETURN(val) {\
+	clulog(LOG_DEBUG, "RETURN: %s line=%d value=%d\n", __FUNCTION__, \
+	       __LINE__, (val));\
+	return(val);\
+}
+#else
+#define ENTER()
+#define RETURN(val) return(val)
+#endif
+
+#ifdef NO_CCS
+#define ccs_get(fd, query, ret) conf_get(query, ret)
+#endif
+
+/*
+   <events>
+     <event name="helpful_name_here" class="node"
+            node="nodeid|nodename" nodestate="up|down">
+	    slang_script_stuff();
+	    start_service();
+     </event>
+   </events>
+ */
+int
+event_match(event_t *pattern, event_t *actual)
+{
+	if (pattern->ev_type != EVENT_NONE &&
+	    actual->ev_type != pattern->ev_type)
+		return 0;
+
+	/* If there's no event class specified, the rest is
+	   irrelevant */
+	if (pattern->ev_type == EVENT_NONE)
+		return 1;
+
+	switch(pattern->ev_type) {
+	case EVENT_NODE:
+		if (pattern->ev.node.ne_nodeid >= 0 &&
+		    actual->ev.node.ne_nodeid !=
+				pattern->ev.node.ne_nodeid) {
+			return 0;
+		}
+		if (pattern->ev.node.ne_local >= 0 && 
+		    actual->ev.node.ne_local !=
+				pattern->ev.node.ne_local) {
+			return 0;
+		}
+		if (pattern->ev.node.ne_state >= 0 && 
+		    actual->ev.node.ne_state !=
+				pattern->ev.node.ne_state) {
+			return 0;
+		}
+		if (pattern->ev.node.ne_clean >= 0 && 
+		    actual->ev.node.ne_clean !=
+				pattern->ev.node.ne_clean) {
+			return 0;
+		}
+		return 1; /* All specified params match */
+	case EVENT_RG:
+		if (pattern->ev.group.rg_name[0] &&
+		    strcasecmp(actual->ev.group.rg_name, 
+			       pattern->ev.group.rg_name)) {
+			return 0;
+		}
+		if (pattern->ev.group.rg_state != (uint32_t)-1 && 
+		    actual->ev.group.rg_state !=
+				pattern->ev.group.rg_state) {
+			return 0;
+		}
+		if (pattern->ev.group.rg_owner >= 0 && 
+		    actual->ev.group.rg_owner !=
+				pattern->ev.group.rg_owner) {
+			return 0;
+		}
+		return 1;
+	case EVENT_CONFIG:
+		if (pattern->ev.config.cfg_version >= 0 && 
+		    actual->ev.config.cfg_version !=
+				pattern->ev.config.cfg_version) {
+			return 0;
+		}
+		if (pattern->ev.config.cfg_oldversion >= 0 && 
+		    actual->ev.config.cfg_oldversion !=
+				pattern->ev.config.cfg_oldversion) {
+			return 0;
+		}
+		return 1;
+	case EVENT_USER:
+		if (pattern->ev.user.u_name[0] &&
+		    strcasecmp(actual->ev.user.u_name, 
+			       pattern->ev.user.u_name)) {
+			return 0;
+		}
+		if (pattern->ev.user.u_request != 0 && 
+		    actual->ev.user.u_request !=
+				pattern->ev.user.u_request) {
+			return 0;
+		}
+		if (pattern->ev.user.u_target != 0 && 
+		    actual->ev.user.u_target !=
+				pattern->ev.user.u_target) {
+			return 0;
+		}
+		return 1;
+	default:
+		break;
+	}
+			
+	return 0;
+}
+
+
+char *
+ccs_node_id_to_name(int ccsfd, int nodeid)
+{
+	char xpath[256], *ret = 0;
+
+	snprintf(xpath, sizeof(xpath), CONFIG_NODE_ID_TO_NAME,
+		 nodeid);
+	if (ccs_get(ccsfd, xpath, &ret) == 0)
+		return ret;
+	return NULL;
+}
+
+
+int
+ccs_node_name_to_id(int ccsfd, char *name)
+{
+	char xpath[256], *ret = 0;
+	int rv = 0;
+
+	snprintf(xpath, sizeof(xpath), CONFIG_NODE_NAME_TO_ID,
+		 name);
+	if (ccs_get(ccsfd, xpath, &ret) == 0) {
+		rv = atoi(ret);
+		free(ret);
+		return rv;
+	}
+	return 0;
+}
+
+
+static void 
+deconstruct_event(event_t *ev)
+{
+	if (ev->ev_script)
+		free(ev->ev_script);
+	if (ev->ev_name)
+		free(ev->ev_name);
+	free(ev);
+}
+
+
+static int
+get_node_event(int ccsfd, char *base, event_t *ev)
+{
+	char xpath[256], *ret = NULL;
+
+	/* Clear out the possibilitiies */
+	ev->ev.node.ne_nodeid = -1;
+	ev->ev.node.ne_local = -1;
+	ev->ev.node.ne_state = -1;
+	ev->ev.node.ne_clean = -1;
+
+	snprintf(xpath, sizeof(xpath), "%s/@node_id", base);
+	if (ccs_get(ccsfd, xpath, &ret) == 0) {
+		ev->ev.node.ne_nodeid = atoi(ret);
+		free(ret);
+		if (ev->ev.node.ne_nodeid <= 0)
+			return -1;
+	} else {
+		/* See if there's a node name */
+		snprintf(xpath, sizeof(xpath), "%s/@node", base);
+		if (ccs_get(ccsfd, xpath, &ret) == 0) {
+			ev->ev.node.ne_nodeid =
+				ccs_node_name_to_id(ccsfd, ret);
+			free(ret);
+			if (ev->ev.node.ne_nodeid <= 0)
+				return -1;
+		}
+	}
+
+	snprintf(xpath, sizeof(xpath), "%s/@node_state", base);
+	if (ccs_get(ccsfd, xpath, &ret) == 0) {
+		if (!strcasecmp(ret, "up")) {
+			ev->ev.node.ne_state = 1;
+		} else if (!strcasecmp(ret, "down")) {
+			ev->ev.node.ne_state = 0;
+		} else {
+			ev->ev.node.ne_state = !!atoi(ret);
+		}
+		free(ret);
+	}
+
+	snprintf(xpath, sizeof(xpath), "%s/@node_clean", base);
+	if (ccs_get(ccsfd, xpath, &ret) == 0) {
+		ev->ev.node.ne_clean = !!atoi(ret);
+		free(ret);
+	}
+
+	snprintf(xpath, sizeof(xpath), "%s/@node_local", base);
+	if (ccs_get(ccsfd, xpath, &ret) == 0) {
+		ev->ev.node.ne_local = !!atoi(ret);
+		free(ret);
+	}
+
+	return 0;
+}
+
+
+static int
+get_rg_event(int ccsfd, char *base, event_t *ev)
+{
+	char xpath[256], *ret = NULL;
+
+	/* Clear out the possibilitiies */
+	ev->ev.group.rg_name[0] = 0;
+	ev->ev.group.rg_state = (uint32_t)-1;
+	ev->ev.group.rg_owner = -1;
+
+	snprintf(xpath, sizeof(xpath), "%s/@service", base);
+	if (ccs_get(ccsfd, xpath, &ret) == 0) {
+		strncpy(ev->ev.group.rg_name, ret,
+			sizeof(ev->ev.group.rg_name));
+		free(ret);
+		if (!strlen(ev->ev.group.rg_name)) {
+			return -1;
+		}
+	}
+
+	snprintf(xpath, sizeof(xpath), "%s/@service_state", base);
+	if (ccs_get(ccsfd, xpath, &ret) == 0) {
+		if (!isdigit(ret[0])) {
+			ev->ev.group.rg_state =
+			       	rg_state_str_to_id(ret);
+		} else {
+			ev->ev.group.rg_state = atoi(ret);
+		}	
+		free(ret);
+	}
+
+	snprintf(xpath, sizeof(xpath), "%s/@service_owner", base);
+	if (ccs_get(ccsfd, xpath, &ret) == 0) {
+		if (!isdigit(ret[0])) {
+			ev->ev.group.rg_owner =
+			       	ccs_node_name_to_id(ccsfd, ret);
+		} else {
+			ev->ev.group.rg_owner = !!atoi(ret);
+		}	
+		free(ret);
+	}
+
+	return 0;
+}
+
+
+static int
+get_config_event(int __attribute__((unused)) ccsfd,
+		 char __attribute__((unused)) *base,
+		 event_t __attribute__((unused)) *ev)
+{
+	errno = ENOSYS;
+	return -1;
+}
+
+
+static event_t *
+get_event(int ccsfd, char *base, int idx, int *_done)
+{
+	event_t *ev;
+	char xpath[256];
+	char *ret = NULL;
+
+	*_done = 0;
+	snprintf(xpath, sizeof(xpath), "%s/event[%d]/@name",
+		 base, idx);
+	if (ccs_get(ccsfd, xpath, &ret) != 0) {
+		*_done = 1;
+		return NULL;
+	}
+
+	ev = malloc(sizeof(*ev));
+	if (!ev)
+		return NULL;
+	memset(ev, 0, sizeof(*ev));
+	ev->ev_name = ret;
+
+	/* Get the script file / inline from config */
+	ret = NULL;
+	snprintf(xpath, sizeof(xpath), "%s/event[%d]/@file",
+		 base, idx);
+	if (ccs_get(ccsfd, xpath, &ret) == 0) {
+		ev->ev_script_file = ret;
+	} else {
+		snprintf(xpath, sizeof(xpath), "%s/event[%d]",
+		         base, idx);
+		if (ccs_get(ccsfd, xpath, &ret) == 0) {
+			ev->ev_script = ret;
+		} else {
+			goto out_fail;
+		}
+	}
+
+	/* Get the priority ordering (must be nonzero) */
+	ev->ev_prio = 99;
+	ret = NULL;
+	snprintf(xpath, sizeof(xpath), "%s/event[%d]/@priority",
+		 base, idx);
+	if (ccs_get(ccsfd, xpath, &ret) == 0) {
+		ev->ev_prio = atoi(ret);
+		if (ev->ev_prio <= 0 || ev->ev_prio > EVENT_PRIO_COUNT) {
+			clulog(LOG_ERR,
+			       "event %s: priority %s invalid\n",
+			       ev->ev_name, ret);
+			goto out_fail;
+		}
+		free(ret);
+	}
+
+	/* Get the event class */
+	snprintf(xpath, sizeof(xpath), "%s/event[%d]/@class",
+		 base, idx);
+	ret = NULL;
+	if (ccs_get(ccsfd, xpath, &ret) == 0) {
+		snprintf(xpath, sizeof(xpath), "%s/event[%d]",
+		 	 base, idx);
+		if (!strcasecmp(ret, "node")) {
+			ev->ev_type = EVENT_NODE;
+			if (get_node_event(ccsfd, xpath, ev) < 0)
+				goto out_fail;
+		} else if (!strcasecmp(ret, "service") ||
+			   !strcasecmp(ret, "resource") ||
+			   !strcasecmp(ret, "rg") ) {
+			ev->ev_type = EVENT_RG;
+			if (get_rg_event(ccsfd, xpath, ev) < 0)
+				goto out_fail;
+		} else if (!strcasecmp(ret, "config") ||
+			   !strcasecmp(ret, "reconfig")) {
+			ev->ev_type = EVENT_CONFIG;
+			if (get_config_event(ccsfd, xpath, ev) < 0)
+				goto out_fail;
+		} else {
+			clulog(LOG_ERR,
+			       "event %s: class %s unrecognized\n",
+			       ev->ev_name, ret);
+			goto out_fail;
+		}
+
+		free(ret);
+		ret = NULL;
+	}
+
+	return ev;
+out_fail:
+	if (ret)
+		free(ret);
+	deconstruct_event(ev);
+	return NULL;
+}
+
+
+static event_t *
+get_default_event(void)
+{
+	event_t *ev;
+	char xpath[1024];
+
+	ev = malloc(sizeof(*ev));
+	if (!ev)
+		return NULL;
+	memset(ev, 0, sizeof(*ev));
+	ev->ev_name = strdup("Default");
+
+	/* Get the script file / inline from config */
+	snprintf(xpath, sizeof(xpath), "%s/default_event_script.sl",
+		 RESOURCE_ROOTDIR);
+
+	ev->ev_prio = 100;
+	ev->ev_type = EVENT_NONE;
+	ev->ev_script_file = strdup(xpath);
+	if (!ev->ev_script_file || ! ev->ev_name) {
+		deconstruct_event(ev);
+		return NULL;
+	}
+
+	return ev;
+}
+
+
+/**
+ * similar API to failover domain
+ */
+int
+construct_events(int ccsfd, event_table_t **events)
+{
+	char xpath[256];
+	event_t *ev;
+	int x = 1, done = 0;
+
+	/* Allocate the event list table */
+	*events = malloc(sizeof(event_table_t) +
+			 sizeof(event_t) * (EVENT_PRIO_COUNT+1));
+	if (!*events)
+		return -1;
+	memset(*events, 0, sizeof(event_table_t) +
+	       		   sizeof(event_t) * (EVENT_PRIO_COUNT+1));
+	(*events)->max_prio = EVENT_PRIO_COUNT;
+
+	snprintf(xpath, sizeof(xpath),
+		 RESOURCE_TREE_ROOT "/events");
+
+	do {
+		ev = get_event(ccsfd, xpath, x++, &done);
+		if (ev)
+			list_insert(&((*events)->entries[ev->ev_prio]), ev);
+	} while (!done);
+
+	ev = get_default_event();
+	if (ev)
+		list_insert(&((*events)->entries[ev->ev_prio]), ev);
+	
+	return 0;
+}
+
+
+void
+print_event(event_t *ev)
+{
+	printf("  Name: %s\n", ev->ev_name);
+
+	switch(ev->ev_type) {
+	case EVENT_NODE:
+		printf("    Node %d State %d\n", ev->ev.node.ne_nodeid,
+		       ev->ev.node.ne_state);
+		break;
+	case EVENT_RG:
+		printf("    RG %s State %s\n", ev->ev.group.rg_name,
+		       rg_state_str(ev->ev.group.rg_state));
+		break;
+	case EVENT_CONFIG:
+		printf("    Config change - unsupported\n");
+		break;
+	default:
+		printf("    (Any event)\n");
+		break;
+	}
+	
+	if (ev->ev_script) {
+		printf("    Inline script.\n");
+	} else {
+		printf("    File: %s\n", ev->ev_script_file);
+	}
+}
+
+
+void
+print_events(event_table_t *events)
+{
+	int x, y;
+	event_t *ev;
+
+	for (x = 0; x <= events->max_prio; x++) {
+		if (!events->entries[x])
+			continue;
+		printf("Event Priority Level %d:\n", x);
+		list_for(&(events->entries[x]), ev, y) {
+			print_event(ev);
+		}
+	}
+}
+
+
+void
+deconstruct_events(event_table_t **eventsp)
+{
+	int x;
+	event_table_t *events = *eventsp;
+	event_t *ev = NULL;
+
+	if (!events)
+		return;
+
+	for (x = 0; x <= events->max_prio; x++) {
+		while ((ev = (events->entries[x]))) {
+			list_remove(&(events->entries[x]), ev);
+			deconstruct_event(ev);
+		}
+	}
+
+	free(events);
+	*eventsp = NULL;
+}
+
+
/cvs/cluster/cluster/rgmanager/src/daemons/service_op.c,v  -->  standard output
revision 1.2.2.1
--- cluster/rgmanager/src/daemons/service_op.c
+++ -	2007-12-18 17:52:58.744706000 +0000
@@ -0,0 +1,204 @@
+/*
+  Copyright Red Hat, Inc. 2007
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms of the GNU General Public License version 2 as published
+  by the Free Software Foundation.
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; see the file COPYING.  If not, write to the
+  Free Software Foundation, Inc.,  675 Mass Ave, Cambridge, 
+  MA 02139, USA.
+*/
+#include <assert.h>
+#include <platform.h>
+#include <message.h>
+#include <members.h>
+#include <stdio.h>
+#include <string.h>
+#include <resgroup.h>
+#include <clulog.h>
+#include <lock.h>
+#include <rg_locks.h>
+#include <ccs.h>
+#include <rg_queue.h>
+#include <msgsimple.h>
+#include <res-ocf.h>
+#include <event.h>
+
+
+/*
+ * Send a message to the target node to start the service.
+ */
+int svc_start_remote(char *svcName, int request, uint32_t target);
+void svc_report_failure(char *);
+int get_service_state_internal(char *svcName, rg_state_t *svcStatus);
+
+
+/**
+ *
+ */
+int
+service_op_start(char *svcName,
+		 int *target_list,
+		 int target_list_len,
+		 int *new_owner)
+{
+	int target;
+	int ret, x;
+	int excl = 0, dep = 0, fail = 0;
+	rg_state_t svcStatus;
+	
+	if (get_service_state_internal(svcName, &svcStatus) < 0) {
+		return RG_EFAIL;
+	}
+
+	if (svcStatus.rs_state == RG_STATE_FAILED ||
+	    svcStatus.rs_state == RG_STATE_UNINITIALIZED)
+		return RG_EINVAL;
+
+	for (x = 0; x < target_list_len; x++) {
+
+		target = target_list[x];
+		ret = svc_start_remote(svcName, RG_START_REMOTE,
+				       target);
+		switch (ret) {
+		case RG_ERUN:
+			/* Someone stole the service while we were 
+			   trying to start it */
+			get_rg_state_local(svcName, &svcStatus);
+			if (new_owner)
+				*new_owner = svcStatus.rs_owner;
+			return 0;
+		case RG_EEXCL:
+			++excl;
+			continue;
+		case RG_EDEPEND:
+			++dep;
+			continue;
+		case RG_EFAIL:
+			++fail;
+			continue;
+		case RG_EABORT:
+			svc_report_failure(svcName);
+			return RG_EFAIL;
+		default:
+			/* deliberate fallthrough */
+			clulog(LOG_ERR,
+			       "#61: Invalid reply from member %d during"
+			       " start operation!\n", target);
+		case RG_NO:
+			/* state uncertain */
+			clulog(LOG_CRIT, "State Uncertain: svc:%s "
+			       "nid:%d req:%s ret:%d\n", svcName,
+			       target, rg_req_str(RG_START_REMOTE), ret);
+			return 0;
+		case 0:
+			if (new_owner)
+				*new_owner = target;
+			clulog(LOG_NOTICE, "Service %s is now running "
+			       "on member %d\n", svcName, (int)target);
+			return 0;
+		}
+	}
+
+	ret = RG_EFAIL;
+	if (excl == target_list_len) 
+		ret = RG_EEXCL;
+	else if (dep == target_list_len)
+		ret = RG_EDEPEND;
+
+	clulog(LOG_INFO, "Start failed; node reports: %d failures, "
+	       "%d exclusive, %d dependency errors\n", fail, excl, dep);
+	return ret;
+}
+
+
+int
+service_op_stop(char *svcName, int do_disable, int event_type)
+{
+	SmMessageSt msg;
+	int msg_ret;
+	msgctx_t ctx;
+	rg_state_t svcStatus;
+	int msgtarget = my_id();
+
+	/* Build the message header */
+	msg.sm_hdr.gh_magic = GENERIC_HDR_MAGIC;
+	msg.sm_hdr.gh_command = RG_ACTION_REQUEST;
+	msg.sm_hdr.gh_arg1 = RG_ACTION_MASTER; 
+	msg.sm_hdr.gh_length = sizeof (SmMessageSt);
+
+	msg.sm_data.d_action = ((!do_disable) ? RG_STOP:RG_DISABLE);
+
+	if (msg.sm_data.d_action == RG_STOP && event_type == EVENT_USER)
+		msg.sm_data.d_action = RG_STOP_USER;
+
+	strncpy(msg.sm_data.d_svcName, svcName,
+		sizeof(msg.sm_data.d_svcName));
+	msg.sm_data.d_ret = 0;
+	msg.sm_data.d_svcOwner = 0;
+
+	/* Open a connection to the local node - it will decide what to
+	   do in this case. XXX inefficient; should queue requests
+	   locally and immediately forward requests otherwise */
+
+	if (get_service_state_internal(svcName, &svcStatus) < 0)
+		return RG_EFAIL;
+	if (svcStatus.rs_owner > 0)
+		msgtarget = svcStatus.rs_owner;
+
+	if (msg_open(MSG_CLUSTER, msgtarget, RG_PORT, &ctx, 2)< 0) {
+		clulog(LOG_ERR,
+		       "#58: Failed opening connection to member #%d\n",
+		       my_id());
+		return -1;
+	}
+
+	/* Encode */
+	swab_SmMessageSt(&msg);
+
+	/* Send stop message to the other node */
+	if (msg_send(&ctx, &msg, sizeof (SmMessageSt)) < 
+	    (int)sizeof (SmMessageSt)) {
+		clulog(LOG_ERR, "Failed to send complete message\n");
+		msg_close(&ctx);
+		return -1;
+	}
+
+	/* Check the response */
+	do {
+		msg_ret = msg_receive(&ctx, &msg,
+				      sizeof (SmMessageSt), 10);
+		if ((msg_ret == -1 && errno != ETIMEDOUT) ||
+		    (msg_ret >= 0)) {
+			break;
+		}
+	} while(1);
+
+	if (msg_ret != sizeof (SmMessageSt)) {
+		clulog(LOG_WARNING, "Strange response size: %d vs %d\n",
+		       msg_ret, (int)sizeof(SmMessageSt));
+		return 0;	/* XXX really UNKNOWN */
+	}
+
+	/* Got a valid response from other node. */
+	msg_close(&ctx);
+
+	/* Decode */
+	swab_SmMessageSt(&msg);
+
+	return msg.sm_data.d_ret;
+}
+
+
+/*
+   TODO
+   service_op_migrate()
+ */
+
/cvs/cluster/cluster/rgmanager/src/daemons/slang_event.c,v  -->  standard output
revision 1.3.2.1
--- cluster/rgmanager/src/daemons/slang_event.c
+++ -	2007-12-18 17:52:58.827110000 +0000
@@ -0,0 +1,1249 @@
+/*
+  Copyright Red Hat, Inc. 2007
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms of the GNU General Public License version 2 as published
+  by the Free Software Foundation.
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; see the file COPYING.  If not, write to the
+  Free Software Foundation, Inc.,  675 Mass Ave, Cambridge, 
+  MA 02139, USA.
+*/
+/**
+  @file S/Lang event handling & intrinsic functions + vars
+ */
+#include <platform.h>
+#include <resgroup.h>
+#include <list.h>
+#include <restart_counter.h>
+#include <reslist.h>
+#include <clulog.h>
+#include <members.h>
+#include <assert.h>
+#include <event.h>
+
+#include <stdio.h>
+#include <string.h>
+#include <slang.h>
+#include <sys/syslog.h>
+#include <malloc.h>
+#include <clulog.h>
+#include <sets.h>
+
+static int __sl_initialized = 0;
+
+static char **_service_list = NULL;
+static int _service_list_len = 0;
+
+char **get_service_names(int *len); /* from groups.c */
+int get_service_property(char *rg_name, char *prop, char *buf, size_t buflen);
+void push_int_array(int *stuff, int len);
+
+
+/* ================================================================
+ * Node states 
+ * ================================================================ */
+static const int
+   _ns_online = 1,
+   _ns_offline = 0;
+
+/* ================================================================
+ * Event information 
+ * ================================================================ */
+static const int
+   _ev_none = EVENT_NONE,
+   _ev_node = EVENT_NODE,
+   _ev_service = EVENT_RG,
+   _ev_config = EVENT_CONFIG,
+   _ev_user = EVENT_USER;
+
+static const int
+   _rg_fail = RG_EFAIL,
+   _rg_success = RG_ESUCCESS,
+   _rg_edomain = RG_EDOMAIN,
+   _rg_edepend = RG_EDEPEND,
+   _rg_eabort = RG_EABORT,
+   _rg_einval = RG_EINVAL,
+   _rg_erun = RG_ERUN;
+
+static int
+   _stop_processing = 0,
+   _my_node_id = 0,
+   _node_state = 0,
+   _node_id = 0,
+   _node_clean = 0,
+   _service_owner = 0,
+   _service_last_owner = 0,
+   _user_request = 0,
+   _user_arg1 = 0,
+   _user_arg2 = 0,
+   _user_return = 0,
+   _rg_err = 0,
+   _event_type = 0;
+
+static char
+   *_node_name = NULL,
+   *_service_name = NULL,
+   *_service_state = NULL,
+   *_rg_err_str = "No Error";
+
+static int
+   _user_enable = RG_ENABLE,
+   _user_disable = RG_DISABLE,
+   _user_stop = RG_STOP_USER,		/* From clusvcadm */
+   _user_relo = RG_RELOCATE,
+   _user_restart = RG_RESTART,
+   _user_migrate = RG_MIGRATE;
+
+
+SLang_Intrin_Var_Type rgmanager_vars[] =
+{
+	/* Log levels (constants) */
+
+	/* Node state information */
+	MAKE_VARIABLE("NODE_ONLINE",	&_ns_online,	SLANG_INT_TYPE, 1),
+	MAKE_VARIABLE("NODE_OFFLINE",	&_ns_offline,	SLANG_INT_TYPE, 1),
+
+	/* Node event information */
+	MAKE_VARIABLE("node_self",	&_my_node_id,	SLANG_INT_TYPE, 1),
+	MAKE_VARIABLE("node_state",	&_node_state,	SLANG_INT_TYPE, 1),
+	MAKE_VARIABLE("node_id",	&_node_id,	SLANG_INT_TYPE, 1),
+	MAKE_VARIABLE("node_name",	&_node_name,	SLANG_STRING_TYPE,1),
+	MAKE_VARIABLE("node_clean",	&_node_clean,	SLANG_INT_TYPE, 1),
+
+	/* Service event information */
+	MAKE_VARIABLE("service_name",	&_service_name,	SLANG_STRING_TYPE,1),
+	MAKE_VARIABLE("service_state",	&_service_state,SLANG_STRING_TYPE,1),
+	MAKE_VARIABLE("service_owner",	&_service_owner,SLANG_INT_TYPE, 1),
+	MAKE_VARIABLE("service_last_owner", &_service_last_owner,
+		      					SLANG_INT_TYPE, 1),
+
+	/* User event information */
+	MAKE_VARIABLE("user_request",	&_user_request,	SLANG_INT_TYPE,1),
+	MAKE_VARIABLE("user_arg1",	&_user_arg1,	SLANG_INT_TYPE,1),
+	MAKE_VARIABLE("user_arg2",	&_user_arg2,	SLANG_INT_TYPE,1),
+	MAKE_VARIABLE("user_service",	&_service_name, SLANG_STRING_TYPE,1),
+	MAKE_VARIABLE("user_target",	&_service_owner,SLANG_INT_TYPE, 1),
+	/* Return code to user requests; i.e. clusvcadm */
+	MAKE_VARIABLE("user_return",	&_user_return,	SLANG_INT_TYPE, 0),
+
+	/* General event information */
+	MAKE_VARIABLE("event_type",	&_event_type,	SLANG_INT_TYPE, 1),
+	MAKE_VARIABLE("EVENT_NONE",	&_ev_none,	SLANG_INT_TYPE, 1),
+	MAKE_VARIABLE("EVENT_NODE",	&_ev_node,	SLANG_INT_TYPE, 1),
+	MAKE_VARIABLE("EVENT_CONFIG",	&_ev_config,	SLANG_INT_TYPE, 1),
+	MAKE_VARIABLE("EVENT_SERVICE",	&_ev_service,	SLANG_INT_TYPE, 1),
+	MAKE_VARIABLE("EVENT_USER",	&_ev_user,	SLANG_INT_TYPE, 1),
+
+	/* User request constants */
+	MAKE_VARIABLE("USER_ENABLE",	&_user_enable,	SLANG_INT_TYPE, 1),
+	MAKE_VARIABLE("USER_DISABLE",	&_user_disable,	SLANG_INT_TYPE, 1),
+	MAKE_VARIABLE("USER_STOP",	&_user_stop,	SLANG_INT_TYPE, 1),
+	MAKE_VARIABLE("USER_RELOCATE",	&_user_relo,	SLANG_INT_TYPE, 1),
+	MAKE_VARIABLE("USER_RESTART",	&_user_restart,	SLANG_INT_TYPE, 1),
+	MAKE_VARIABLE("USER_MIGRATE",	&_user_migrate,	SLANG_INT_TYPE, 1),
+
+	/* Errors */
+	MAKE_VARIABLE("rg_error",	&_rg_err,	SLANG_INT_TYPE, 1),
+	MAKE_VARIABLE("rg_error_string",&_rg_err_str,	SLANG_STRING_TYPE,1),
+
+	/* From constants.c */
+	MAKE_VARIABLE("FAIL",		&_rg_fail,	SLANG_INT_TYPE, 1),
+	MAKE_VARIABLE("SUCCESS",	&_rg_success,	SLANG_INT_TYPE, 1),
+	MAKE_VARIABLE("ERR_ABORT",	&_rg_eabort,	SLANG_INT_TYPE, 1),
+	MAKE_VARIABLE("ERR_INVALID",	&_rg_einval,	SLANG_INT_TYPE, 1),
+	MAKE_VARIABLE("ERR_DEPEND",	&_rg_edepend,	SLANG_INT_TYPE, 1),
+	MAKE_VARIABLE("ERR_DOMAIN",	&_rg_edomain,	SLANG_INT_TYPE, 1),
+	MAKE_VARIABLE("ERR_RUNNING",	&_rg_erun,	SLANG_INT_TYPE, 1),
+
+	SLANG_END_INTRIN_VAR_TABLE
+};
+
+
+#define rg_error(errortype) \
+do { \
+	_rg_err = errortype; \
+	_rg_err_str = ##errortype; \
+} while(0)
+
+
+int
+get_service_state_internal(char *svcName, rg_state_t *svcStatus)
+{
+	struct dlm_lksb lock;
+	char buf[32];
+
+	get_rg_state_local(svcName, svcStatus);
+	if (svcStatus->rs_state == RG_STATE_UNINITIALIZED) {
+		if (rg_lock(svcName, &lock) < 0) {
+			errno = ENOLCK;
+			return -1;
+		}
+
+		if (get_rg_state(svcName, svcStatus) < 0) {
+			errno = ENOENT;
+			rg_unlock(&lock);
+			return -1;
+		}
+
+		/* We got a copy from another node - don't flip the state */
+		if (svcStatus->rs_transition) {
+			rg_unlock(&lock);
+			return 0;
+		}
+
+		/* Finish initializing the service state */
+		svcStatus->rs_transition = (uint64_t)time(NULL);
+
+		if (get_service_property(svcName, "autostart",
+					 buf, sizeof(buf)) == 0) {
+			if (buf[0] == '0' || !strcasecmp(buf, "no")) {
+				svcStatus->rs_state = RG_STATE_DISABLED;
+			} else {
+				svcStatus->rs_state = RG_STATE_STOPPED;
+			}
+		}
+
+		set_rg_state(svcName, svcStatus);
+
+		rg_unlock(&lock);
+	}
+
+	return 0;
+}
+
+
+/*
+   (restarts, last_owner, owner, state) = get_service_status(servicename)
+ */
+void
+sl_service_status(char *svcName)
+{
+	rg_state_t svcStatus;
+	char *state_str;
+
+	if (get_service_state_internal(svcName, &svcStatus) < 0) {
+		SLang_verror(SL_RunTime_Error,
+			     "%s: Failed to get status for %s",
+			     __FUNCTION__,
+			     svcName);
+		return;
+	}
+
+	if (SLang_push_integer(svcStatus.rs_restarts) < 0) {
+		SLang_verror(SL_RunTime_Error,
+			     "%s: Failed to push restarts for %s",
+			     __FUNCTION__,
+			     svcName);
+		return;
+	}
+
+	if (SLang_push_integer(svcStatus.rs_last_owner) < 0) {
+		SLang_verror(SL_RunTime_Error,
+			     "%s: Failed to push last owner of %s",
+			     __FUNCTION__,
+			     svcName);
+		return;
+	}
+
+	switch(svcStatus.rs_state) {
+	case RG_STATE_DISABLED:
+	case RG_STATE_STOPPED:
+	case RG_STATE_FAILED:
+	case RG_STATE_RECOVER:
+	case RG_STATE_ERROR:
+		/* There is no owner for these states.  Ever.  */
+		svcStatus.rs_owner = -1;
+	}
+
+	if (SLang_push_integer(svcStatus.rs_owner) < 0) {
+		SLang_verror(SL_RunTime_Error,
+			     "%s: Failed to push owner of %s",
+			     __FUNCTION__,
+			     svcName);
+		return;
+	}
+
+	state_str = strdup(rg_state_str(svcStatus.rs_state));
+	if (!state_str) {
+		SLang_verror(SL_RunTime_Error,
+			     "%s: Failed to duplicate state of %s",
+			     __FUNCTION__,
+			     svcName);
+		return;
+	}
+
+	if (SLang_push_malloced_string(state_str) < 0) {
+		SLang_verror(SL_RunTime_Error,
+			     "%s: Failed to push state of %s",
+			     __FUNCTION__,
+			     svcName);
+		free(state_str);
+	}
+}
+
+
+/**
+  (nofailback, restricted, ordered, nodelist) = service_domain_info(svcName);
+ */
+void
+sl_domain_info(char *svcName)
+{
+	int *nodelist = NULL, listlen;
+	char buf[64];
+	int flags = 0;
+
+	if (get_service_property(svcName, "domain", buf, sizeof(buf)) < 0) {
+		/* no nodes */
+		SLang_push_integer(0);
+
+		/* no domain? */
+/*
+		str = strdup("none");
+		if (SLang_push_malloced_string(str) < 0) {
+			free(state_str);
+			return;
+		}
+*/
+
+		/* not ordered */
+		SLang_push_integer(0);
+		/* not restricted */
+		SLang_push_integer(0);
+		/* nofailback not set */
+		SLang_push_integer(0);
+	}
+
+	if (node_domain_set_safe(buf, &nodelist, &listlen, &flags) < 0) {
+		SLang_push_integer(0);
+		SLang_push_integer(0);
+		SLang_push_integer(0);
+		SLang_push_integer(0);
+		return;
+	}
+
+	SLang_push_integer(!!(flags & FOD_NOFAILBACK));
+	SLang_push_integer(!!(flags & FOD_RESTRICTED));
+	SLang_push_integer(!!(flags & FOD_ORDERED));
+
+	push_int_array(nodelist, listlen);
+	free(nodelist);
+
+/*
+	str = strdup(buf);
+	if (SLang_push_malloced_string(str) < 0) {
+		free(state_str);
+		return;
+	}
+*/
+}
+
+
+static int
+get_int_array(int **nodelist, int *len)
+{
+	SLang_Array_Type *a = NULL;
+	SLindex_Type i;
+	int *nodes = NULL, t, ret = -1;
+
+	if (!nodelist || !len)
+		return -1;
+
+	t = SLang_peek_at_stack();
+	if (t == SLANG_INT_TYPE) {
+
+		nodes = malloc(sizeof(int) * 1);
+		if (!nodes)
+			goto out;
+		if (SLang_pop_integer(&nodes[0]) < 0)
+			goto out;
+
+		*len = 1;
+		ret = 0;
+
+	} else if (t == SLANG_ARRAY_TYPE) {
+		if (SLang_pop_array_of_type(&a, SLANG_INT_TYPE) < 0)
+			goto out;
+		if (a->num_dims > 1)
+			goto out;
+		if (a->dims[0] < 0)
+			goto out;
+		nodes = malloc(sizeof(int) * a->dims[0]);
+		if (!nodes)
+			goto out;
+		for (i = 0; i < a->dims[0]; i++)
+			SLang_get_array_element(a, &i, &nodes[i]);
+
+		*len = a->dims[0];
+		ret = 0;
+	}
+
+out:
+	if (a)
+		SLang_free_array(a);
+	if (ret == 0) {
+		*nodelist = nodes;
+	} else {
+		if (nodes)
+			free(nodes);
+	}
+	
+	return ret;
+}
+
+
+/**
+  get_service_property(service_name, property)
+ */
+char *
+sl_service_property(char *svcName, char *prop)
+{
+	char buf[96];
+
+	if (get_service_property(svcName, prop, buf, sizeof(buf)) < 0)
+		return NULL;
+
+	/* does this work or do I have to push a malloce'd string? */
+	return strdup(buf);
+}
+
+
+/**
+  usage:
+
+  stop_service(name, disable_flag);
+ */
+int
+sl_stop_service(void)
+{
+	char *svcname = NULL;
+	int nargs, t, ret = -1;
+	int do_disable = 0;
+
+	nargs = SLang_Num_Function_Args;
+
+	/* Takes one or two args */
+	if (nargs <= 0 || nargs > 2) {
+		SLang_verror(SL_Syntax_Error,
+		     "%s: Wrong # of args (%d), must be 1 or 2\n",
+		     __FUNCTION__,
+		     nargs);
+		return -1;
+	}
+
+	if (nargs == 2) {
+		t = SLang_peek_at_stack();
+		if (t != SLANG_INT_TYPE) {
+			SLang_verror(SL_Syntax_Error,
+				     "%s: expected type %d got %d\n",
+				     __FUNCTION__, SLANG_INT_TYPE, t);
+			goto out;
+		}
+
+		if (SLang_pop_integer(&do_disable) < 0) {
+			SLang_verror(SL_Syntax_Error,
+			    "%s: Failed to pop integer from stack!\n",
+			    __FUNCTION__);
+			goto out;
+		}
+
+		--nargs;
+	}
+
+	if (nargs == 1) {
+		t = SLang_peek_at_stack();
+		if (t != SLANG_STRING_TYPE) {
+			SLang_verror(SL_Syntax_Error,
+				     "%s: expected type %d got %d\n",
+				     __FUNCTION__,
+				     SLANG_STRING_TYPE, t);
+			goto out;
+		}
+
+		if (SLpop_string(&svcname) < 0) {
+			SLang_verror(SL_Syntax_Error,
+			    "%s: Failed to pop string from stack!\n",
+			    __FUNCTION__);
+			goto out;
+		}
+	}
+
+	/* TODO: Meat of function goes here */
+	ret = service_op_stop(svcname, do_disable, _event_type);
+out:
+	if (svcname)
+		free(svcname);
+	_user_return = ret;
+	return ret;
+}
+
+
+/**
+  usage:
+
+  start_service(name, <array>ordered_node_list_allowed,
+  		      <array>node_list_illegal)
+ */
+int
+sl_start_service(void)
+{
+	char *svcname = NULL;
+	int *pref_list = NULL, pref_list_len = 0;
+	int *illegal_list = NULL, illegal_list_len = 0;
+	int nargs, t, newowner = 0, ret = -1;
+
+	nargs = SLang_Num_Function_Args;
+
+	/* Takes one, two, or three */
+	if (nargs <= 0 || nargs > 3) {
+		SLang_verror(SL_Syntax_Error,
+		     "%s: Wrong # of args (%d), must be 1 or 2\n",
+		     __FUNCTION__, nargs);
+		return -1;
+	}
+
+	if (nargs == 3) {
+		if (get_int_array(&illegal_list, &illegal_list_len) < 0)
+			goto out;
+		--nargs;
+	}
+
+	if (nargs == 2) {
+		if (get_int_array(&pref_list, &pref_list_len) < 0)
+			goto out;
+		--nargs;
+	}
+
+	if (nargs == 1) {
+		/* Just get the service name */
+		t = SLang_peek_at_stack();
+		if (t != SLANG_STRING_TYPE) {
+			SLang_verror(SL_Syntax_Error,
+				     "%s: expected type %d got %d\n",
+				     __FUNCTION__,
+				     SLANG_STRING_TYPE, t);
+			goto out;
+		}
+
+		if (SLpop_string(&svcname) < 0)
+			goto out;
+	}
+
+	/* TODO: Meat of function goes here */
+	ret = service_op_start(svcname, pref_list,
+			       pref_list_len, &newowner);
+
+	if (ret == 0 && newowner > 0)
+		ret = newowner;
+out:
+	if (svcname)
+		free(svcname);
+	if (illegal_list)
+		free(illegal_list);
+	if (pref_list)
+		free(pref_list);
+	_user_return = ret;
+	return ret;
+}
+
+
+/* Take an array of integers given its length and
+   push it on to the S/Lang stack */
+void
+push_int_array(int *stuff, int len)
+{
+	SLindex_Type arrlen, x;
+	SLang_Array_Type *arr;
+	int i;
+
+	arrlen = len;
+	arr = SLang_create_array(SLANG_INT_TYPE, 0, NULL, &arrlen, 1);
+	if (!arr)
+		return;
+
+	x = 0;
+	for (x = 0; x < len; x++) {
+		i = stuff[x];
+		SLang_set_array_element(arr, &x, &i);
+	}
+	SLang_push_array(arr, 1);
+}
+
+
+/*
+   Returns an array of rgmanager-visible nodes online.  How cool is that?
+ */
+void
+sl_nodes_online(void)
+{
+	int i, *nodes, nodecount = 0;
+
+	cluster_member_list_t *membership = member_list();
+	if (!membership)
+		return;
+	nodes = malloc(sizeof(int) * membership->cml_count);
+	if (!nodes)
+		return;
+
+	nodecount = 0;
+	for (i = 0; i < membership->cml_count; i++) {
+		if (membership->cml_members[i].cn_member &&
+		    membership->cml_members[i].cn_nodeid != 0) {
+			nodes[nodecount] = membership->cml_members[i].cn_nodeid;
+			++nodecount;
+		}
+	}
+	free_member_list(membership);
+	push_int_array(nodes, nodecount);
+	free(nodes);
+}
+
+
+/*
+   Returns an array of rgmanager-defined services, in type:name format
+   We allocate/kill this list *once* per event to ensure we don't leak
+   memory
+ */
+void
+sl_service_list(void)
+{
+	SLindex_Type svccount = _service_list_len, x = 0;
+	SLang_Array_Type *svcarray;
+
+	svcarray = SLang_create_array(SLANG_STRING_TYPE, 0, NULL, &svccount, 1);
+	if (!svcarray)
+		return;
+
+	for (; x < _service_list_len; x++) 
+		SLang_set_array_element(svcarray, &x, &_service_list[x]);
+
+	SLang_push_array(svcarray, 1);
+}
+
+
+/* s_union hook (see sets.c) */
+void
+sl_union(void)
+{
+	int *arr1 = NULL, a1len = 0;
+	int *arr2 = NULL, a2len = 0;
+	int *ret = NULL, retlen = 0;
+	int nargs = SLang_Num_Function_Args;
+
+	if (nargs != 2)
+		return;
+		
+	/* Remember: args on the stack are reversed */
+	get_int_array(&arr2, &a2len);
+	get_int_array(&arr1, &a1len);
+	s_union(arr1, a1len, arr2, a2len, &ret, &retlen);
+	push_int_array(ret, retlen);
+	if (arr1)
+		free(arr1);
+	if (arr2)
+		free(arr2);
+	if (ret)
+		free(ret);
+	return;
+}
+
+
+/* s_intersection hook (see sets.c) */
+void
+sl_intersection(void)
+{
+	int *arr1 = NULL, a1len = 0;
+	int *arr2 = NULL, a2len = 0;
+	int *ret = NULL, retlen = 0;
+	int nargs = SLang_Num_Function_Args;
+
+	if (nargs != 2)
+		return;
+		
+	/* Remember: args on the stack are reversed */
+	get_int_array(&arr2, &a2len);
+	get_int_array(&arr1, &a1len);
+	s_intersection(arr1, a1len, arr2, a2len, &ret, &retlen);
+	push_int_array(ret, retlen);
+	if (arr1)
+		free(arr1);
+	if (arr2)
+		free(arr2);
+	if (ret)
+		free(ret);
+	return;
+}
+
+
+/* s_delta hook (see sets.c) */
+void
+sl_delta(void)
+{
+	int *arr1 = NULL, a1len = 0;
+	int *arr2 = NULL, a2len = 0;
+	int *ret = NULL, retlen = 0;
+	int nargs = SLang_Num_Function_Args;
+
+	if (nargs != 2)
+		return;
+		
+	/* Remember: args on the stack are reversed */
+	get_int_array(&arr2, &a2len);
+	get_int_array(&arr1, &a1len);
+	s_delta(arr1, a1len, arr2, a2len, &ret, &retlen);
+	push_int_array(ret, retlen);
+	if (arr1)
+		free(arr1);
+	if (arr2)
+		free(arr2);
+	if (ret)
+		free(ret);
+	return;
+}
+
+
+/* s_subtract hook (see sets.c) */
+void
+sl_subtract(void)
+{
+	int *arr1 = NULL, a1len = 0;
+	int *arr2 = NULL, a2len = 0;
+	int *ret = NULL, retlen = 0;
+	int nargs = SLang_Num_Function_Args;
+
+	if (nargs != 2)
+		return;
+		
+	/* Remember: args on the stack are reversed */
+	get_int_array(&arr2, &a2len);
+	get_int_array(&arr1, &a1len);
+	s_subtract(arr1, a1len, arr2, a2len, &ret, &retlen);
+	push_int_array(ret, retlen);
+	if (arr1)
+		free(arr1);
+	if (arr2)
+		free(arr2);
+	if (ret)
+		free(ret);
+	return;
+}
+
+
+/* Shuffle array (see sets.c) */
+void
+sl_shuffle(void)
+{
+	int *arr1 = NULL, a1len = 0;
+	int nargs = SLang_Num_Function_Args;
+
+	if (nargs != 1)
+		return;
+		
+	/* Remember: args on the stack are reversed */
+	get_int_array(&arr1, &a1len);
+	s_shuffle(arr1, a1len);
+	push_int_array(arr1, a1len);
+	if (arr1)
+		free(arr1);
+	return;
+}
+
+
+/* Converts an int array to a string so we can log it in one shot */
+static int
+array_to_string(char *buf, int buflen, int *array, int arraylen)
+{
+	char intbuf[16];
+	int x, len, remain = buflen;
+
+	memset(intbuf, 0, sizeof(intbuf));
+	memset(buf, 0, buflen);
+	len = snprintf(buf, buflen - 1, "[ ");
+	if (len == buflen)
+		return -1;
+
+	remain -= len;
+	for (x = 0; x < arraylen; x++) {
+		len = snprintf(intbuf, sizeof(intbuf) - 1, "%d ", array[x]);
+		remain -= len;
+		if (remain > 0) {
+			strncat(buf, intbuf, len);
+		} else {
+			return -1;
+		}
+	}
+
+	len = snprintf(intbuf, sizeof(intbuf) - 1 ,  "]");
+	remain -= len;
+	if (remain > 0) {
+		strncat(buf, intbuf, len);
+	} else {
+		return -1;
+	}
+	return (buflen - remain);
+}
+
+
+/**
+  Start at the end of the arg list and work backwards, prepending a string.
+  This does not support standard clulog / printf formattting; rather, we 
+  just allow integers / strings to be mixed on the stack, figure out the
+  type, convert it to the right type, and prepend it on to our log message
+
+  The last must be a log level, as specified above:
+     LOG_DEBUG
+     ...
+     LOG_EMERG
+
+  This matches up with clulog / syslog mappings in the var table; the above
+  are constants in the S/Lang interpreter.  Any number of arguments may
+  be provided.  Examples are:
+
+    log(LOG_INFO, "String", 1, "string2");
+
+  Result:  String1string2
+
+    log(LOG_INFO, "String ", 1, " string2");
+
+  Result:  String 1 string2
+
+ */
+void
+sl_clulog(int level)
+{
+	int t, nargs, len;
+	//int level;
+	int s_intval;
+	char *s_strval;
+	int *nodes = 0, nlen = 0;
+	char logbuf[512];
+	char tmp[256];
+	int need_free;
+	int remain = sizeof(logbuf)-2;
+
+	nargs = SLang_Num_Function_Args;
+	if (nargs < 1)
+		return;
+
+	memset(logbuf, 0, sizeof(logbuf));
+	memset(tmp, 0, sizeof(tmp));
+	logbuf[sizeof(logbuf)-1] = 0;
+	logbuf[sizeof(logbuf)-2] = '\n';
+
+	while (nargs && (t = SLang_peek_at_stack()) >= 0 && remain) {
+		switch(t) {
+		case SLANG_ARRAY_TYPE:
+			if (get_int_array(&nodes, &nlen) < 0)
+				return;
+			len = array_to_string(tmp, sizeof(tmp),
+					      nodes, nlen);
+			if (len < 0) {
+				free(nodes);
+				return;
+			}
+			free(nodes);
+			break;
+		case SLANG_INT_TYPE:
+			if (SLang_pop_integer(&s_intval) < 0)
+				return;
+			len=snprintf(tmp, sizeof(tmp) - 1, "%d", s_intval);
+			break;
+		case SLANG_STRING_TYPE:
+			need_free = 0;
+			if (SLpop_string(&s_strval) < 0)
+				return;
+			len=snprintf(tmp, sizeof(tmp) - 1, "%s", s_strval);
+			SLfree(s_strval);
+			break;
+		default:
+			need_free = 0;
+			len=snprintf(tmp, sizeof(tmp) - 1,
+				     "{UnknownType %d}", t);
+			break;
+		}
+
+		--nargs;
+
+		if (len > remain)
+			return;
+		remain -= len;
+
+		memcpy(&logbuf[remain], tmp, len);
+	}
+
+#if 0
+	printf("<%d> %s\n", level, &logbuf[remain]);
+#endif
+	clulog(level, &logbuf[remain]);
+	return;
+}
+
+
+/* Logging functions */
+void
+sl_log_debug(void)
+{
+	sl_clulog(LOG_DEBUG);
+}
+
+
+void
+sl_log_info(void)
+{
+	sl_clulog(LOG_INFO);
+}
+
+
+void
+sl_log_notice(void)
+{
+	sl_clulog(LOG_NOTICE);
+}
+
+
+void
+sl_log_warning(void)
+{
+	sl_clulog(LOG_WARNING);
+}
+
+
+void
+sl_log_err(void)
+{
+	sl_clulog(LOG_ERR);
+}
+
+
+void
+sl_log_crit(void)
+{
+	sl_clulog(LOG_CRIT);
+}
+
+
+void
+sl_log_alert(void)
+{
+	sl_clulog(LOG_ALERT);
+}
+
+
+void
+sl_log_emerg(void)
+{
+	sl_clulog(LOG_EMERG);
+}
+
+
+void
+sl_die(void)
+{
+	_stop_processing = 1;
+	return;
+}
+
+
+SLang_Intrin_Fun_Type rgmanager_slang[] =
+{
+	MAKE_INTRINSIC_0("nodes_online", sl_nodes_online, SLANG_VOID_TYPE),
+	MAKE_INTRINSIC_0("service_list", sl_service_list, SLANG_VOID_TYPE),
+
+	MAKE_INTRINSIC_SS("service_property", sl_service_property,
+			  SLANG_STRING_TYPE),
+	MAKE_INTRINSIC_S("service_domain_info", sl_domain_info, SLANG_VOID_TYPE),
+	MAKE_INTRINSIC_0("service_stop", sl_stop_service, SLANG_INT_TYPE),
+	MAKE_INTRINSIC_0("service_start", sl_start_service, SLANG_INT_TYPE),
+	MAKE_INTRINSIC_S("service_status", sl_service_status,
+			 SLANG_VOID_TYPE),
+
+	/* Node list manipulation */
+	MAKE_INTRINSIC_0("union", sl_union, SLANG_VOID_TYPE),
+	MAKE_INTRINSIC_0("intersection", sl_intersection, SLANG_VOID_TYPE),
+	MAKE_INTRINSIC_0("delta", sl_delta, SLANG_VOID_TYPE),
+	MAKE_INTRINSIC_0("subtract", sl_subtract, SLANG_VOID_TYPE),
+	MAKE_INTRINSIC_0("shuffle", sl_shuffle, SLANG_VOID_TYPE),
+
+	/* Logging */
+	MAKE_INTRINSIC_0("debug", sl_log_debug, SLANG_VOID_TYPE),
+	MAKE_INTRINSIC_0("info", sl_log_info, SLANG_VOID_TYPE),
+	MAKE_INTRINSIC_0("notice", sl_log_notice, SLANG_VOID_TYPE),
+	MAKE_INTRINSIC_0("warning", sl_log_warning, SLANG_VOID_TYPE),
+	MAKE_INTRINSIC_0("err", sl_log_err, SLANG_VOID_TYPE),
+	MAKE_INTRINSIC_0("crit", sl_log_crit, SLANG_VOID_TYPE),
+	MAKE_INTRINSIC_0("alert", sl_log_alert, SLANG_VOID_TYPE),
+	MAKE_INTRINSIC_0("emerg", sl_log_emerg, SLANG_VOID_TYPE),
+
+	MAKE_INTRINSIC_0("stop_processing", sl_die, SLANG_VOID_TYPE),
+
+	SLANG_END_INTRIN_FUN_TABLE
+};
+
+
+/* Hook for when we generate a script error */
+void
+rgmanager_slang_error_hook(char *errstr)
+{
+	/* Don't just send errstr, because it might contain
+	   "%s" for example which would result in a crash!
+	   plus, we like the newline :) */
+	clulog(LOG_ERR, "[S/Lang] %s\n", errstr);
+}
+
+
+
+/* ================================================================
+ * S/Lang initialization
+ * ================================================================ */
+int
+do_init_slang(void)
+{
+	SLang_init_slang();
+	SLang_init_slfile();
+
+	if (SLadd_intrin_fun_table(rgmanager_slang, NULL) < 0)
+		return 1;
+    	if (SLadd_intrin_var_table (rgmanager_vars, NULL) < 0)
+		return 1;
+
+	/* TODO: Make rgmanager S/Lang conformant.  Though, it
+	   might be a poor idea to provide access to all the 
+	   S/Lang libs */
+	SLpath_set_load_path(RESOURCE_ROOTDIR);
+
+	_my_node_id = my_id();
+	__sl_initialized = 1;
+
+	SLang_Error_Hook = rgmanager_slang_error_hook;
+
+	return 0;
+}
+
+
+/*
+   Execute a script / file and return the result to the caller
+   Log an error if we receive one.
+ */
+int
+do_slang_run(const char *file, const char *script)
+{
+	int ret = 0;
+
+	if (file) 
+		ret = SLang_load_file((char *)file);
+	else
+		ret = SLang_load_string((char *)script);
+
+	if (ret < 0) {
+		clulog(LOG_ERR, "[S/Lang] Script Execution Failure\n");
+		SLang_restart(1);
+	}
+
+	return ret;
+}
+
+
+int
+S_node_event(const char *file, const char *script, int nodeid,
+	     int state, int clean)
+{
+	int ret;
+	cluster_member_list_t *membership = member_list();
+
+	_node_name = strdup(memb_id_to_name(membership, nodeid));
+	_node_state = state;
+	_node_clean = clean;
+	_node_id = nodeid;
+	free_member_list(membership);
+
+	ret = do_slang_run(file, script);
+
+	_node_state = 0;
+	_node_clean = 0;
+	_node_id = 0;
+	if (_node_name)
+		free(_node_name);
+	_node_name = NULL;
+
+	return ret;
+}
+
+
+int
+S_service_event(const char *file, const char *script, char *name,
+	        int state, int owner, int last_owner)
+{
+	int ret;
+
+	_service_name = name;
+	_service_state = (char *)rg_state_str(state);
+	_service_owner = owner;
+	_service_last_owner = last_owner;
+
+	switch(state) {
+	case RG_STATE_DISABLED:
+	case RG_STATE_STOPPED:
+	case RG_STATE_FAILED:
+	case RG_STATE_RECOVER:
+	case RG_STATE_ERROR:
+		/* There is no owner for these states.  Ever.  */
+		_service_owner = -1;
+	}
+
+	ret = do_slang_run(file, script);
+
+	_service_name = NULL;
+	_service_state = 0;
+	_service_owner = 0;
+	_service_last_owner = 0;
+
+	return ret;
+}
+
+
+int
+S_user_event(const char *file, const char *script, char *name,
+	     int request, int arg1, int arg2, int target, msgctx_t *ctx)
+{
+	int ret = RG_SUCCESS;
+
+	_service_name = name;
+	_service_owner = target;
+	_user_request = request;
+	_user_arg1 = arg1;
+	_user_arg2 = arg2;
+	_user_return = 0;
+
+	ret = do_slang_run(file, script);
+	if (ret < 0) {
+		_user_return = RG_ESCRIPT;
+	}
+
+	_service_name = NULL;
+	_service_owner = 0;
+	_user_request = 0;
+	_user_arg1 = 0;
+	_user_arg2 = 0;
+
+	/* XXX Send response code to caller - that 0 should be the
+	   new service owner, if there is one  */
+	if (ctx) {
+		if (_user_return > 0) {
+			/* sl_start_service() squashes return code and
+			   node ID into one value.  <0 = error, >0 =
+			   success, return-value == node id running
+			   service */
+			send_ret(ctx, name, 0, request, _user_return);
+		} else {
+			/* return value < 0 ... pass directly back;
+			   don't transpose */
+			send_ret(ctx, name, _user_return, request, 0);
+		}
+		msg_close(ctx);
+		msg_free_ctx(ctx);
+	}
+	_user_return = 0;
+	return ret;
+}
+
+
+int
+slang_do_script(event_t *pattern, event_t *ev)
+{
+	_event_type = ev->ev_type;
+	int ret = 0;
+
+	switch(ev->ev_type) {
+	case EVENT_NODE:
+		ret = S_node_event(
+				pattern->ev_script_file,
+				pattern->ev_script,
+				ev->ev.node.ne_nodeid,
+				ev->ev.node.ne_state,
+				ev->ev.node.ne_clean);
+		break;
+	case EVENT_RG:
+		ret = S_service_event(
+				pattern->ev_script_file,
+				pattern->ev_script,
+				ev->ev.group.rg_name,
+				ev->ev.group.rg_state,
+				ev->ev.group.rg_owner,
+				ev->ev.group.rg_last_owner);
+		break;
+	case EVENT_USER:
+		ret = S_user_event(
+				pattern->ev_script_file,
+				pattern->ev_script,
+				ev->ev.user.u_name,
+				ev->ev.user.u_request,
+				ev->ev.user.u_arg1,
+				ev->ev.user.u_arg2,
+				ev->ev.user.u_target,
+				ev->ev.user.u_ctx);
+		break;
+	default:
+		break;
+	}
+
+	_event_type = EVENT_NONE;
+	return ret;
+}
+
+
+
+/**
+  Process an event given our event table and the event that
+  occurred.  Note that the caller is responsible for freeing the
+  event - do not free (ev) ...
+ */
+int
+slang_process_event(event_table_t *event_table, event_t *ev)
+{
+	int x, y;
+	event_t *pattern;
+
+	if (!__sl_initialized)
+		do_init_slang();
+
+	/* Get the service list once before processing events */
+	if (!_service_list || !_service_list_len)
+		_service_list = get_service_names(&_service_list_len);
+
+	_stop_processing = 0;
+	for (x = 1; x <= event_table->max_prio; x++) {
+		list_for(&event_table->entries[x], pattern, y) {
+			if (event_match(pattern, ev))
+				slang_do_script(pattern, ev);
+			if (_stop_processing)
+				goto out;
+		}
+	}
+
+	/* Default level = 0 */
+	list_for(&event_table->entries[0], pattern, y) {
+		if (event_match(pattern, ev))
+			slang_do_script(pattern, ev);
+		if (_stop_processing)
+			goto out;
+	}
+
+out:
+	/* Free the service list */
+	if (_service_list) {
+		for(x = 0; x < _service_list_len; x++) {
+			free(_service_list[x]);
+		}
+		free(_service_list);
+		_service_list = NULL;
+		_service_list_len = 0;
+	}
+
+	return 0;
+}
--- cluster/rgmanager/src/daemons/Makefile	2007/11/26 21:46:27	1.14.2.4
+++ cluster/rgmanager/src/daemons/Makefile	2007/12/18 17:52:56	1.14.2.5
@@ -15,9 +15,9 @@
 
 include ${top_srcdir}/make/defines.mk
 INCLUDE += -I $(top_srcdir)/include -I $(top_srcdir)/../cman/lib -I $(top_srcdir)/../ccs/lib -I $(top_srcdir)/../dlm/lib
-INCLUDE += -I${incdir} -I/usr/include/libxml2
+INCLUDE += -I${incdir} -I/usr/include/libxml2 -I/usr/include/slang
 
-CFLAGS+= -g -Wstrict-prototypes -Wshadow -fPIC -D_GNU_SOURCE -DWRAP_THREADS
+CFLAGS+= -g -Wstrict-prototypes -Wshadow -fPIC -D_GNU_SOURCE -DWRAP_THREADS -Wall -Wextra
 
 LDFLAGS+= -L ../clulib -L../../../cman/lib -L../../../ccs/lib -L../../../dlm/lib -L${libdir} -lclulib -lxml2 -lpthread -ldl -Wl,-wrap,pthread_create,-wrap,pthread_exit -rdynamic
 TARGETS=clurgmgrd clurmtabd rg_test
@@ -37,10 +37,14 @@
 
 clurgmgrd: rg_thread.o rg_locks.o main.o groups.o  \
 		rg_queue.o rg_forward.o reslist.o \
-		resrules.o restree.o fo_domain.o nodeevent.o \
-		rg_event.o watchdog.o rg_state.o \
-		restart_counter.o ../clulib/libclulib.a
-	$(CC) -o $@ $^ $(INCLUDE) $(CFLAGS) $(LDFLAGS) -lccs -lcman -lpthread -ldlm
+		resrules.o restree.o fo_domain.o  \
+		rg_event.o watchdog.o rg_state.o event_config.o \
+		slang_event.o service_op.o restart_counter.o \
+		../clulib/libclulib.a
+	$(CC) -o $@ $^ $(INCLUDE) $(CFLAGS) $(LDFLAGS) -lccs -lcman -lpthread -ldlm -lslang
+
+rg_script_test: slang_event.o
+	$(CC) -o rg_script_test slang_event.o $(INCLUDE) $(CFLAGS) -lslang $(LDFLAGS)
 
 #
 # Our test program links against the local allocator so that
@@ -58,7 +62,7 @@
 #
 rg_test: rg_locks-noccs.o test-noccs.o reslist-noccs.o \
 		resrules-noccs.o restree-noccs.o fo_domain-noccs.o \
-		restart_counter.o 
+		event_config-noccs.o restart_counter.o
 	$(CC) -o $@ $^ $(INCLUDE) $(CFLAGS) -llalloc $(LDFLAGS) -lccs -lcman
 
 clurmtabd: clurmtabd.o clurmtabd_lib.o
--- cluster/rgmanager/src/daemons/fo_domain.c	2007/11/26 21:46:27	1.11.2.1
+++ cluster/rgmanager/src/daemons/fo_domain.c	2007/12/18 17:52:56	1.11.2.2
@@ -34,6 +34,7 @@
 #include <stdlib.h>
 #include <stdio.h>
 #include <members.h>
+#include <sets.h>
 
 
 //#define DEBUG
@@ -96,6 +97,23 @@
 	fodn->fdn_name = ret;
 	fodn->fdn_prio = 0;
 
+	snprintf(xpath, sizeof(xpath),
+		 "/cluster/clusternodes/clusternode[@name=\"%s\"]/@nodeid",
+		 ret);
+	if (ccs_get(ccsfd, xpath, &ret) != 0) {
+		clulog(LOG_WARNING, "Node %s has no nodeid attribute\n",
+		       fodn->fdn_name);
+		fodn->fdn_nodeid = -1;
+	} else {
+		/* 64-bit-ism on rhel4? */
+		fodn->fdn_nodeid = atoi(ret);
+	}
+
+	/* Don't even bother getting priority if we're not ordered (it's set
+	   to 0 above */
+	if (!(domain->fd_flags & FOD_ORDERED))
+		return fodn;
+
 	snprintf(xpath, sizeof(xpath), "%s/failoverdomainnode[%d]/@priority",
 		 base, idx);
 	if (ccs_get(ccsfd, xpath, &ret) != 0)
@@ -228,6 +246,11 @@
 {
 	fod_t *fod;
 	fod_node_t *fodn = NULL;
+	/*
+	int x;
+	int *node_set = NULL;
+	int node_set_len = 0;
+	 */
 
 	list_do(domains, fod) {
 		printf("Failover domain: %s\n", fod->fd_name);
@@ -245,9 +268,21 @@
 		}
 
 		list_do(&fod->fd_nodes, fodn) {
-			printf("  Node %s (priority %d)\n",
-			       fodn->fdn_name, fodn->fdn_prio);
+			printf("  Node %s (id %d, priority %d)\n",
+			       fodn->fdn_name, fodn->fdn_nodeid,
+			       fodn->fdn_prio);
 		} while (!list_done(&fod->fd_nodes, fodn));
+
+		/*
+		node_domain_set(fod, &node_set, &node_set_len);
+		printf("  Failover Order = {");
+		for (x = 0; x < node_set_len; x++) {
+			printf(" %d ", node_set[x]);
+		}
+		free(node_set);
+		printf("}\n");
+		*/
+		
 	} while (!list_done(domains, fod));
 }
 
@@ -313,6 +348,70 @@
 }
 
 
+int
+node_domain_set(fod_t *domain, int **ret, int *retlen)
+{
+	int x, i, j;
+	int *tmpset;
+	int ts_count;
+
+	fod_node_t *fodn;
+
+	/* Count domain length */
+	list_for(&domain->fd_nodes, fodn, x) { }
+	
+	*retlen = 0;
+	*ret = malloc(sizeof(int) * x);
+	if (!(*ret))
+		return -1;
+	tmpset = malloc(sizeof(int) * x);
+	if (!(*tmpset))
+		return -1;
+
+	if (domain->fd_flags & FOD_ORDERED) {
+		for (i = 1; i <= 100; i++) {
+			
+			ts_count = 0;
+			list_for(&domain->fd_nodes, fodn, x) {
+				if (fodn->fdn_prio == i) {
+					s_add(tmpset, &ts_count,
+					      fodn->fdn_nodeid);
+				}
+			}
+
+			if (!ts_count)
+				continue;
+
+			/* Shuffle stuff at this prio level */
+			if (ts_count > 1)
+				s_shuffle(tmpset, ts_count);
+			for (j = 0; j < ts_count; j++)
+				s_add(*ret, retlen, tmpset[j]);
+		}
+	}
+
+	/* Add unprioritized nodes */
+	ts_count = 0;
+	list_for(&domain->fd_nodes, fodn, x) {
+		if (!fodn->fdn_prio) {
+			s_add(tmpset, &ts_count,
+			      fodn->fdn_nodeid);
+		}
+	}
+
+	if (!ts_count)
+		return 0;
+
+	/* Shuffle stuff at this prio level */
+	if (ts_count > 1)
+		s_shuffle(tmpset, ts_count);
+	for (j = 0; j < ts_count; j++)
+		s_add(*ret, retlen, tmpset[j]);
+
+	return 0;
+}
+
+
 /**
  * See if a given nodeid should start a specified service svcid.
  *
--- cluster/rgmanager/src/daemons/groups.c	2007/11/26 21:46:27	1.25.2.13
+++ cluster/rgmanager/src/daemons/groups.c	2007/12/18 17:52:56	1.25.2.14
@@ -30,6 +30,7 @@
 #include <list.h>
 #include <reslist.h>
 #include <assert.h>
+#include <event.h>
 
 /* Use address field in this because we never use it internally,
    and there is no extra space in the cman_node_t type.
@@ -38,6 +39,8 @@
 #define cn_svccount cn_address.cna_address[0] /* Theses are uint8_t size */
 #define cn_svcexcl  cn_address.cna_address[1]
 
+extern event_table_t *master_event_table;
+
 static int config_version = 0;
 static resource_t *_resources = NULL;
 static resource_rule_t *_rules = NULL;
@@ -54,7 +57,7 @@
 pthread_rwlock_t resource_lock = PTHREAD_RWLOCK_INITIALIZER;
 
 void res_build_name(char *, size_t, resource_t *);
-int get_rg_state_local(char *, rg_state_t *);
+int group_migratory(char *groupname, int lock);
 
 
 struct status_arg {
@@ -83,6 +86,32 @@
 
 
 int
+node_domain_set_safe(char *domainname, int **ret, int *retlen, int *flags)
+{
+	fod_t *fod;
+	int rv = -1, found = 0, x = 0;
+
+	pthread_rwlock_rdlock(&resource_lock);
+
+	list_for(&_domains, fod, x) {
+		if (!strcasecmp(fod->fd_name, domainname)) {
+			found = 1;
+			break;
+		}
+	} // while (!list_done(&_domains, fod));
+
+	if (found) {
+		rv = node_domain_set(fod, ret, retlen);
+		*flags = fod->fd_flags;
+	}
+
+	pthread_rwlock_unlock(&resource_lock);
+
+	return rv;
+}
+
+
+int
 count_resource_groups(cluster_member_list_t *ml)
 {
 	resource_t *res;
@@ -187,7 +216,7 @@
 	char rgname[64];
 	int x;
 
-	list_for(&_tree, node, x) {
+	list_for(tree, node, x) {
 
 		res = node->rn_resource;
 		res_build_name(rgname, sizeof(rgname), res);
@@ -564,6 +593,60 @@
 }
 
 
+char **
+get_service_names(int *len)
+{
+	resource_node_t *node = NULL;
+	int nservices, ncopied = 0, x;
+	char **ret = NULL;
+	char rg_name[64];
+
+	pthread_rwlock_rdlock(&resource_lock);
+
+	nservices = 0;
+	list_do(&_tree, node) {
+		++nservices;
+	} while (!list_done(&_tree, node));
+	
+	ret = malloc(sizeof(char *) * (nservices + 1));
+	if (!ret)
+		goto out_fail;
+
+	memset(ret, 0, sizeof(char *) * (nservices + 1));
+	nservices = 0;
+	list_for(&_tree, node, nservices) {
+		res_build_name(rg_name, sizeof(rg_name),
+			       node->rn_resource);
+
+		if (!strlen(rg_name))
+			continue;
+
+		ret[ncopied] = strdup(rg_name);
+		if (ret[ncopied]) {
+			ncopied++;
+		} else {
+			goto out_fail;
+		}
+	}
+
+	if (len)
+		*len = ncopied;
+	pthread_rwlock_unlock(&resource_lock);
+	return ret;
+
+out_fail:
+	pthread_rwlock_unlock(&resource_lock);
+	for (x = 0; x < ncopied; x++)
+		free(ret[x]);
+	if (ret)
+		free(ret);
+	return NULL;
+}
+
+
+
+
+
 /**
  * Called to decide what services to start locally during a node_event.
  * Originally a part of node_event, it is now its own function to cut down
@@ -1054,6 +1137,14 @@
 
 	free(arg);
 
+	if (central_events_enabled()) {
+		/* Never call get_rg_state() (distributed) if 
+		   central events are enabled, otherwise we
+		   might overwrite the rg state with 'stopped' 
+		   when it should be 'disabled' (e.g. autostart="0") */
+		fast = 1;
+	}
+
 	/* See if we have a slot... */
 	if (rg_inc_status() < 0) {
 		/* Too many outstanding status checks.  try again later. */
@@ -1063,6 +1154,8 @@
 		pthread_exit(NULL);
 	}
 	
+	/*send_master_state(ctx);*/
+
 	pthread_rwlock_rdlock(&resource_lock);
 
 	list_do(&_tree, node) {
@@ -1191,11 +1284,12 @@
   Stop changed resources.
  */
 void *
-q_status_checks(void *arg)
+q_status_checks(void __attribute__ ((unused)) *arg)
 {
 	resource_node_t *curr;
 	rg_state_t svcblk;
 	char rg[64];
+	struct dlm_lksb lockp;
 	
 	/* Only one status thread at a time, please! */
 	if (pthread_mutex_trylock(&status_mutex) != 0)
@@ -1209,7 +1303,13 @@
 
 		/* Local check - no one will make us take a service */
 		if (get_rg_state_local(rg, &svcblk) < 0) {
-			continue;
+			if (rg_lock(rg, &lockp) != 0)
+				continue;
+			if (get_rg_state(rg, &svcblk) < 0) {
+				rg_unlock(&lockp);
+				continue;
+			}
+			rg_unlock(&lockp);
 		}
 
 		if (svcblk.rs_owner != my_id() ||
@@ -1430,7 +1530,7 @@
 
 
 int
-check_config_update(void)
+check_config_update(int *new, int *old)
 {
 	int newver = 0, fd, ret = 0;
 	char *val = NULL;
@@ -1450,6 +1550,8 @@
 	pthread_mutex_lock(&config_mutex);
 	if (newver && newver != config_version)
 		ret = 1;
+	if (new) *new = newver;
+	if (old) *old = config_version;
 	pthread_mutex_unlock(&config_mutex);
 	ccs_unlock(fd);
 
@@ -1473,12 +1575,14 @@
 int
 init_resource_groups(int reconfigure)
 {
-	int fd, x;
+	int fd, x, y, cnt;
 
+	event_table_t *evt = NULL;
 	resource_t *reslist = NULL, *res;
 	resource_rule_t *rulelist = NULL, *rule;
 	resource_node_t *tree = NULL;
 	fod_t *domains = NULL, *fod;
+	event_t *evp;
 	char *val;
 
 	if (reconfigure)
@@ -1539,6 +1643,24 @@
 	x = 0;
 	list_do(&domains, fod) { ++x; } while (!list_done(&domains, fod));
 	clulog(LOG_DEBUG, "%d domains defined\n", x);
+	construct_events(fd, &evt);
+	cnt = 0;
+	if (evt) {
+		for (x=0; x <= evt->max_prio; x++) {
+			if (!evt->entries[x])
+				continue;
+			
+			y = 0;
+
+			list_do(&evt->entries[x], evp) {
+				++y;
+			} while (!list_done(&evt->entries[x], evp));
+
+			cnt += y;
+		}
+	}
+	clulog(LOG_DEBUG, "%d events defined\n", x);
+	
 
 	/* Reconfiguration done */
 	ccs_unlock(fd);
@@ -1567,6 +1689,9 @@
 	if (_domains)
 		deconstruct_domains(&_domains);
 	_domains = domains;
+	if (master_event_table)
+		deconstruct_events(&master_event_table);
+	master_event_table = evt;
 	pthread_rwlock_unlock(&resource_lock);
 
 	if (reconfigure) {
@@ -1608,6 +1733,60 @@
 
 
 int
+get_service_property(char *rg_name, char *prop, char *buf, size_t buflen)
+{
+	int ret = 0;
+	resource_t *res;
+	char *val;
+
+	memset(buf, 0, buflen);
+
+#if 0
+	if (!strcmp(prop, "domain")) {
+		/* not needed */
+		strncpy(buf, "", buflen);
+	} else if (!strcmp(prop, "autostart")) {
+		strncpy(buf, "1", buflen);
+	} else if (!strcmp(prop, "hardrecovery")) {
+		strncpy(buf, "0", buflen);
+	} else if (!strcmp(prop, "exclusive")) {
+		strncpy(buf, "0", buflen);
+	} else if (!strcmp(prop, "nfslock")) {
+		strncpy(buf, "0", buflen);
+	} else if (!strcmp(prop, "recovery")) {
+		strncpy(buf, "restart", buflen);
+	} else if (!strcmp(prop, "depend")) {
+		/* not needed */
+		strncpy(buf, "", buflen);
+	} else {
+		/* not found / no defaults */
+		ret = -1;
+	}
+#endif
+
+	pthread_rwlock_rdlock(&resource_lock);
+	res = find_root_by_ref(&_resources, rg_name);
+	if (res) {
+		val = res_attr_value(res, prop);
+		if (val) {
+			ret = 0;
+			strncpy(buf, val, buflen);
+		}
+	}
+	pthread_rwlock_unlock(&resource_lock);
+
+#if 0
+	if (ret == 0)
+		printf("%s(%s, %s) = %s\n", __FUNCTION__, rg_name, prop, buf);
+	else 
+		printf("%s(%s, %s) = NOT FOUND\n", __FUNCTION__, rg_name, prop);
+#endif
+
+	return ret;
+}
+
+
+int
 check_restart(char *rg_name)
 {
 	resource_node_t *node;
--- cluster/rgmanager/src/daemons/main.c	2007/11/26 21:46:27	1.34.2.10
+++ cluster/rgmanager/src/daemons/main.c	2007/12/18 17:52:56	1.34.2.11
@@ -35,6 +35,7 @@
 #include <rg_queue.h>
 #include <malloc.h>
 #include <cman-private.h>
+#include <event.h>
 
 #define L_SHUTDOWN (1<<2)
 #define L_SYS (1<<1)
@@ -54,9 +55,10 @@
 void flag_shutdown(int sig);
 void hard_exit(void);
 int send_rg_states(msgctx_t *, int);
-int check_config_update(void);
+int check_config_update(int *, int *);
 int svc_exists(char *);
 int watchdog_init(void);
+int32_t master_event_callback(char *key, uint64_t viewno, void *data, uint32_t datalen);
 
 int shutdown_pending = 0, running = 1, need_reconfigure = 0;
 char debug = 0; /* XXX* */
@@ -65,11 +67,10 @@
 static char *rgmanager_lsname = "rgmanager"; /* XXX default */
 
 int next_node_id(cluster_member_list_t *membership, int me);
-int rg_event_q(char *svcName, uint32_t state, int owner);
 void malloc_dump_table(FILE *, size_t, size_t);
 
 void
-segfault(int sig)
+segfault(int __attribute__ ((unused)) sig)
 {
 	char ow[64];
 
@@ -94,13 +95,20 @@
 send_node_states(msgctx_t *ctx)
 {
 	int x;
+	event_master_t master;
 	generic_msg_hdr hdr;
 	cluster_member_list_t *ml = member_list();
 
+	master.m_nodeid = 0;
+	event_master_info_cached(&master);
+
 	for (x = 0; x < ml->cml_count; x++) {
 		if (ml->cml_members[x].cn_member == 1) {
 			msg_send_simple(ctx, RG_STATUS_NODE,
-					ml->cml_members[x].cn_nodeid, 0);
+					ml->cml_members[x].cn_nodeid, 
+					(ml->cml_members[x].cn_nodeid &&
+					 (ml->cml_members[x].cn_nodeid == 
+					  (int)master.m_nodeid)));
 		}
 	}
 	msg_send_simple(ctx, RG_SUCCESS, 0, 0);
@@ -110,7 +118,7 @@
 
 
 void
-flag_reconfigure(int sig)
+flag_reconfigure(int __attribute__ ((unused)) sig)
 {
 	need_reconfigure++;
 }
@@ -167,15 +175,25 @@
 	new_ml = get_member_list(h);
 	memb_mark_down(new_ml, 0);
 
-	for (x = 0; x < new_ml->cml_count; x++) {
+	for(x=0; new_ml && x<new_ml->cml_count;x++) {
+		if (new_ml->cml_members[x].cn_nodeid == 0) {
+		    new_ml->cml_members[x].cn_member = 0;
+		}
+	}
+
+	for (x = 0; new_ml && x < new_ml->cml_count; x++) {
 
-		if (new_ml->cml_members[x].cn_member == 0)
+		if (new_ml->cml_members[x].cn_member == 0) {
+			printf("skipping %d - node not member\n",
+			       new_ml->cml_members[x].cn_nodeid);
 			continue;
+		}
 		if (new_ml->cml_members[x].cn_nodeid == my_id())
 			continue;
 
 #ifdef DEBUG
-		printf("Checking for listening status of %d\n", new_ml->cml_members[x].cn_nodeid);
+		printf("Checking for listening status of %d\n",
+		       new_ml->cml_members[x].cn_nodeid);
 #endif
 
 		do {
@@ -187,6 +205,7 @@
 				clulog(LOG_DEBUG, "Node %d is not listening\n",
 					new_ml->cml_members[x].cn_nodeid);
 				new_ml->cml_members[x].cn_member = 0;
+				break;
 			} else if (quorate < 0) {
 				if (errno == ENOTCONN) {
 					new_ml->cml_members[x].cn_member = 0;
@@ -276,7 +295,9 @@
 
 
 int
-lock_commit_cb(char *key, uint64_t viewno, void *data, uint32_t datalen)
+lock_commit_cb(char __attribute__ ((unused)) *key,
+	       uint64_t __attribute__ ((unused)) viewno,
+	       void *data, uint32_t datalen)
 {
 	char lockstate;
 
@@ -403,7 +424,7 @@
 int
 dispatch_msg(msgctx_t *ctx, int nodeid, int need_close)
 {
-	int ret = 0, sz = -1;
+	int ret = 0, sz = -1, nid;
 	char msgbuf[4096];
 	generic_msg_hdr	*msg_hdr = (generic_msg_hdr *)msgbuf;
 	SmMessageSt	*msg_sm = (SmMessageSt *)msgbuf;
@@ -412,7 +433,7 @@
 
 	/* Peek-a-boo */
 	sz = msg_receive(ctx, msg_hdr, sizeof(msgbuf), 1);
-	if (sz < sizeof (generic_msg_hdr)) {
+	if (sz < (int)sizeof (generic_msg_hdr)) {
 		clulog(LOG_ERR,
 		       "#37: Error receiving header from %d sz=%d CTX %p\n",
 		       nodeid, sz, ctx);
@@ -422,7 +443,7 @@
 	if (sz < 0)
 		return -1;
 
-	if (sz > sizeof(msgbuf)) {
+	if (sz > (int)sizeof(msgbuf)) {
 		raise(SIGSTOP);
 	}
 
@@ -441,7 +462,7 @@
 		goto out;
 	}
 
-	if (msg_hdr->gh_length != sz) {
+	if ((int)msg_hdr->gh_length != sz) {
 		clulog(LOG_ERR, "#XX: Read size mismatch: %d %d\n",
 		       ret, msg_hdr->gh_length);
 		goto out;
@@ -449,13 +470,13 @@
 
 	switch (msg_hdr->gh_command) {
 	case RG_STATUS:
-		clulog(LOG_DEBUG, "Sending service states to CTX%p\n",ctx);
+		//clulog(LOG_DEBUG, "Sending service states to CTX%p\n",ctx);
 		if (send_rg_states(ctx, msg_hdr->gh_arg1) == 0)
 			need_close = 0;
 		break;
 
 	case RG_STATUS_NODE:
-		clulog(LOG_DEBUG, "Sending node states to CTX%p\n",ctx);
+		//clulog(LOG_DEBUG, "Sending node states to CTX%p\n",ctx);
 		send_node_states(ctx);
 		break;
 
@@ -474,7 +495,7 @@
 
 	case RG_ACTION_REQUEST:
 
-		if (sz < sizeof(msg_sm)) {
+		if (sz < (int)sizeof(msg_sm)) {
 			clulog(LOG_ERR,
 			       "#39: Error receiving entire request (%d/%d)\n",
 			       ret, (int)sizeof(msg_sm));
@@ -493,14 +514,37 @@
 			swab_SmMessageSt(msg_sm);
 
 			if (msg_send(ctx, msg_sm, sizeof (SmMessageSt)) <
-		    	    sizeof (SmMessageSt))
+		    	    (int)sizeof (SmMessageSt))
 				clulog(LOG_ERR, "#40: Error replying to "
 				       "action request.\n");
 			ret = -1;
 			goto out;
 		}
 
-		/* Queue request */
+		if (central_events_enabled() &&
+		    msg_sm->sm_hdr.gh_arg1 != RG_ACTION_MASTER) {
+			
+			/* Centralized processing or request is from
+			   clusvcadm */
+			nid = event_master();
+			if (nid != my_id()) {
+				/* Forward the message to the event master */
+				forward_message(ctx, msg_sm, nid);
+			} else {
+				/* for us: queue it */
+				user_event_q(msg_sm->sm_data.d_svcName,
+					     msg_sm->sm_data.d_action,
+					     msg_sm->sm_hdr.gh_arg1,
+					     msg_sm->sm_hdr.gh_arg2,
+					     msg_sm->sm_data.d_svcOwner,
+					     ctx);
+			}
+
+			return 0;
+		}
+
+		/* Distributed processing and/or request is from master node
+		   -- Queue request */
 		rt_enqueue_request(msg_sm->sm_data.d_svcName,
 		  		   msg_sm->sm_data.d_action,
 		  		   ctx, 0, msg_sm->sm_data.d_svcOwner,
@@ -510,7 +554,7 @@
 
 	case RG_EVENT:
 		/* Service event.  Run a dependency check */
-		if (sz < sizeof(msg_sm)) {
+		if (sz < (int)sizeof(msg_sm)) {
 			clulog(LOG_ERR,
 			       "#39: Error receiving entire request (%d/%d)\n",
 			       ret, (int)sizeof(msg_sm));
@@ -526,7 +570,8 @@
 		/* Send to our rg event handler */
 		rg_event_q(msg_sm->sm_data.d_svcName,
 			   msg_sm->sm_data.d_action,
-			   msg_sm->sm_data.d_svcOwner);
+			   msg_sm->sm_hdr.gh_arg1,
+			   msg_sm->sm_hdr.gh_arg2);
 		break;
 
 	case RG_EXITING:
@@ -664,7 +709,7 @@
 int
 event_loop(msgctx_t *localctx, msgctx_t *clusterctx)
 {
-	int n, max, ret;
+	int n = 0, max, ret, oldver, newver;
 	fd_set rfds;
 	msgctx_t *newctx;
 	struct timeval tv;
@@ -733,10 +778,10 @@
 	if (!running)
 		return 0;
 
-	if (need_reconfigure || check_config_update()) {
+	if (need_reconfigure || check_config_update(&oldver, &newver)) {
 		need_reconfigure = 0;
 		configure_rgmanager(-1, 0);
-		init_resource_groups(1);
+		config_event_q(oldver, newver);
 		return 0;
 	}
 
@@ -755,7 +800,7 @@
 
 
 void
-flag_shutdown(int sig)
+flag_shutdown(int __attribute__ ((unused)) sig)
 {
 	shutdown_pending = 1;
 }
@@ -781,7 +826,7 @@
 
 
 void
-statedump(int sig)
+statedump(int __attribute__ ((unused)) sig)
 {
 	signalled++;
 }
@@ -818,8 +863,15 @@
 	}
 
 	if (ccs_get(ccsfd, "/cluster/rm/@transition_throttling", &v) == 0) {
-		if (!dbg)
-			set_transition_throttling(atoi(v));
+		set_transition_throttling(atoi(v));
+		free(v);
+	}
+
+	if (ccs_get(ccsfd, "/cluster/rm/@central_processing", &v) == 0) {
+		set_central_events(atoi(v));
+		if (atoi(v))
+			clulog(LOG_NOTICE,
+			       "Centralized Event Processing enabled\n");
 		free(v);
 	}
 
@@ -873,7 +925,7 @@
 
 
 void *
-shutdown_thread(void *arg)
+shutdown_thread(void __attribute__ ((unused)) *arg)
 {
 	rg_lockall(L_SYS|L_SHUTDOWN);
 	rg_doall(RG_STOP_EXITING, 1, NULL);
@@ -1013,6 +1065,7 @@
 	}
 
 	vf_key_init("rg_lockdown", 10, NULL, lock_commit_cb);
+	vf_key_init("Transition-Master", 10, NULL, master_event_callback);
 #endif
 
 	/*
--- cluster/rgmanager/src/daemons/resrules.c	2007/11/26 21:46:27	1.16.2.8
+++ cluster/rgmanager/src/daemons/resrules.c	2007/12/18 17:52:56	1.16.2.9
@@ -27,6 +27,7 @@
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <list.h>
+#include <ctype.h>
 #include <restart_counter.h>
 #include <reslist.h>
 #include <pthread.h>
--- cluster/rgmanager/src/daemons/rg_event.c	2007/07/24 13:53:08	1.1.2.1
+++ cluster/rgmanager/src/daemons/rg_event.c	2007/12/18 17:52:56	1.1.2.2
@@ -1,10 +1,9 @@
 /*
-  Copyright Red Hat, Inc. 2006
+  Copyright Red Hat, Inc. 2006-2007
 
   This program is free software; you can redistribute it and/or modify it
-  under the terms of the GNU General Public License as published by the
-  Free Software Foundation; either version 2, or (at your option) any
-  later version.
+  under the terms of the GNU General Public License version 2 as published
+  by the Free Software Foundation.
 
   This program is distributed in the hope that it will be useful, but
   WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -23,81 +22,552 @@
 #include <libcman.h>
 #include <ccs.h>
 #include <clulog.h>
-
-typedef struct __rge_q {
-	list_head();
-	char rg_name[128];
-	uint32_t rg_state;
-	int rg_owner;
-} rgevent_t;
+#include <lock.h>
+#include <event.h>
+#include <stdint.h>
+#include <vf.h>
+#include <members.h>
 
 
 /**
  * resource group event queue.
  */
-static rgevent_t *rg_ev_queue = NULL;
-static pthread_mutex_t rg_queue_mutex = PTHREAD_MUTEX_INITIALIZER;
-static pthread_t rg_ev_thread = 0;
+static event_t *event_queue = NULL;
+#ifdef WRAP_LOCKS
+static pthread_mutex_t event_queue_mutex = PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP;
+static pthread_mutex_t mi_mutex = PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP;
+#else
+static pthread_mutex_t event_queue_mutex = PTHREAD_MUTEX_INITIALIZER;
+static pthread_mutex_t mi_mutex = PTHREAD_MUTEX_INITIALIZER;
+#endif
+static pthread_t event_thread = 0;
+static int transition_throttling = 5;
+static int central_events = 0;
+
+extern int running;
+extern int shutdown_pending;
+static int _master = 0;
+static struct dlm_lksb _master_lock;
+static int _xid = 0;
+static event_master_t *mi = NULL;
+
+void hard_exit(void);
+int init_resource_groups(int);
+void flag_shutdown(int sig);
+void flag_reconfigure(int sig);
 
-void group_event(char *name, uint32_t state, int owner);
+event_table_t *master_event_table = NULL;
+
+
+void
+set_transition_throttling(int nsecs)
+{
+	if (nsecs < 0)
+		nsecs = 0;
+	transition_throttling = nsecs;
+}
+
+
+void
+set_central_events(int flag)
+{
+	central_events = flag;
+}
+
+
+int
+central_events_enabled(void)
+{
+	return central_events;
+}
+
+
+/**
+  Called to handle the transition of a cluster member from up->down or
+  down->up.  This handles initializing services (in the local node-up case),
+  exiting due to loss of quorum (local node-down), and service fail-over
+  (remote node down).  This is the distributed node event processor;
+  for the local-only node event processor, see slang_event.c
+ 
+  @param nodeID		ID of the member which has come up/gone down.
+  @param nodeStatus		New state of the member in question.
+  @see eval_groups
+ */
+void
+node_event(int local, int nodeID, int nodeStatus, int clean)
+{
+	if (!running)
+		return;
+
+	if (local) {
+
+		/* Local Node Event */
+		if (nodeStatus == 0) {
+			clulog(LOG_ERR, "Exiting uncleanly\n");
+			hard_exit();
+		}
+
+		if (!rg_initialized()) {
+			if (init_resource_groups(0) != 0) {
+				clulog(LOG_ERR,
+				       "#36: Cannot initialize services\n");
+				hard_exit();
+			}
+		}
+
+		if (shutdown_pending) {
+			clulog(LOG_NOTICE, "Processing delayed exit signal\n");
+			running = 0;
+			return;
+		}
+		setup_signal(SIGINT, flag_shutdown);
+		setup_signal(SIGTERM, flag_shutdown);
+		setup_signal(SIGHUP, flag_reconfigure);
+
+		eval_groups(1, nodeID, 1);
+		return;
+	}
+
+	/*
+	 * Nothing to do for events from other nodes if we are not ready.
+	 */
+	if (!rg_initialized()) {
+		clulog(LOG_DEBUG, "Services not initialized.\n");
+		return;
+	}
+
+	eval_groups(0, nodeID, nodeStatus);
+}
 
 
+/**
+   Query CCS to see whether a node has fencing enabled or not in
+   the configuration.  This does not check to see if it's in the
+   fence domain.
+ */
+int
+node_has_fencing(int nodeid)
+{
+	int ccs_desc;
+	char *val = NULL;
+	char buf[1024];
+	int ret = 1;
+	
+	ccs_desc = ccs_connect();
+	if (ccs_desc < 0) {
+		clulog(LOG_ERR, "Unable to connect to ccsd; cannot handle"
+		       " node event!\n");
+		/* Assume node has fencing */
+		return 1;
+	}
+
+	snprintf(buf, sizeof(buf), 
+		 "/cluster/clusternodes/clusternode[@nodeid=\"%d\"]"
+		 "/fence/method/device/@name", nodeid);
+
+	if (ccs_get(ccs_desc, buf, &val) != 0)
+		ret = 0;
+	if (val) 
+		free(val);
+	ccs_disconnect(ccs_desc);
+	return ret;
+}
+
+
+/**
+   Quick query to cman to see if a node has been fenced.
+ */
+int
+node_fenced(int nodeid)
+{
+	cman_handle_t ch;
+	int fenced = 0;
+	uint64_t fence_time;
+
+	ch = cman_init(NULL);
+	if (cman_get_fenceinfo(ch, nodeid, &fence_time, &fenced, NULL) < 0)
+		fenced = 0;
+
+	cman_finish(ch);
+
+	return fenced;
+}
+
+
+/**
+   Callback from view-formation when a commit occurs for the Transition-
+   Master key.
+ */
+int32_t
+master_event_callback(char *key, uint64_t viewno,
+		      void *data, uint32_t datalen)
+{
+	event_master_t *m;
+
+	m = data;
+	if (datalen != (uint32_t)sizeof(*m)) {
+		clulog(LOG_ERR, "%s: wrong size\n", __FUNCTION__);
+		return 1;
+	}
+
+	swab_event_master_t(m);
+	if (m->m_magic != EVENT_MASTER_MAGIC) {
+		clulog(LOG_ERR, "%s: wrong size\n", __FUNCTION__);
+		return 1;
+	}
+
+	if (m->m_nodeid == my_id())
+		clulog(LOG_DEBUG, "Master Commit: I am master\n");
+	else 
+		clulog(LOG_DEBUG, "Master Commit: %d is master\n", m->m_nodeid);
+
+	pthread_mutex_lock(&mi_mutex);
+	if (mi)
+		free(mi);
+	mi = m;
+	pthread_mutex_unlock(&mi_mutex);
+
+	return 0;
+}
+
+
+/**
+  Read the Transition-Master key from vf if it exists.  If it doesn't,
+  attempt to become the transition-master.
+ */
+static int
+find_master(void)
+{
+	event_master_t *masterinfo = NULL;
+	void *data;
+	uint32_t sz;
+	cluster_member_list_t *m;
+	uint64_t vn;
+	int master_id = -1;
+
+	m = member_list();
+	if (vf_read(m, "Transition-Master", &vn,
+		    (void **)(&data), &sz) < 0) {
+		clulog(LOG_ERR, "Unable to discover master"
+		       " status\n");
+		masterinfo = NULL;
+	} else {
+		masterinfo = (event_master_t *)data;
+	}
+	free_member_list(m);
+
+	if (masterinfo && (sz >= sizeof(*masterinfo))) {
+		swab_event_master_t(masterinfo);
+		if (masterinfo->m_magic == EVENT_MASTER_MAGIC) {
+			clulog(LOG_DEBUG, "Master Locate: %d is master\n",
+			       masterinfo->m_nodeid);
+			pthread_mutex_lock(&mi_mutex);
+			if (mi)
+				free(mi);
+			mi = masterinfo;
+			pthread_mutex_unlock(&mi_mutex);
+			master_id = masterinfo->m_nodeid;
+		}
+	}
+
+	return master_id;
+}
+
+
+/**
+  Return a copy of the cached event_master_t structure to the
+  caller.
+ */
+int
+event_master_info_cached(event_master_t *mi_out)
+{
+	if (!central_events || !mi_out) {
+		errno = -EINVAL;
+		return -1;
+	}
+
+	pthread_mutex_lock(&mi_mutex);
+	if (!mi) {
+		pthread_mutex_unlock(&mi_mutex);
+		errno = -ENOENT;
+		return -1;
+	}
+
+	memcpy(mi_out, mi, sizeof(*mi));
+	pthread_mutex_unlock(&mi_mutex);
+	return 0;
+}
+
+
+/**
+  Return the node ID of the master.  If none exists, become
+  the master and return our own node ID.
+ */
+int
+event_master(void)
+{
+	cluster_member_list_t *m = NULL;
+	event_master_t masterinfo;
+	int master_id = -1;
+
+	/* We hold this forever. */
+	if (_master)
+		return my_id();
+
+	m = member_list();
+	pthread_mutex_lock(&mi_mutex);
+
+	if (mi) {
+		master_id = mi->m_nodeid;
+		pthread_mutex_unlock(&mi_mutex);
+		if (memb_online(m, master_id)) {
+			//clulog(LOG_DEBUG, "%d is master\n", mi->m_nodeid);
+			goto out;
+		}
+	}
+
+	pthread_mutex_unlock(&mi_mutex);
+
+	memset(&_master_lock, 0, sizeof(_master_lock));
+	if (clu_lock(LKM_EXMODE, &_master_lock, LKF_NOQUEUE,
+		     "Transition-Master") < 0) {
+		/* not us, find out who is master */
+		master_id = find_master();
+		goto out;
+	}
+
+	if (_master_lock.sb_status != 0) {
+		master_id = -1;
+		goto out;
+	}
+
+	_master = 1;
+
+	memset(&masterinfo, 0, sizeof(masterinfo));
+	masterinfo.m_magic = EVENT_MASTER_MAGIC;
+	masterinfo.m_nodeid = my_id();
+	masterinfo.m_master_time = (uint64_t)time(NULL);
+	swab_event_master_t(&masterinfo);
+
+	if (vf_write(m, VFF_IGN_CONN_ERRORS | VFF_RETRY,
+		     "Transition-Master", &masterinfo,
+		     sizeof(masterinfo)) < 0) {
+		clulog(LOG_ERR, "Unable to advertise master"
+		       " status to all nodes\n");
+	}
+
+	master_id = my_id();
+out:
+	free_member_list(m);
+	return master_id;
+}
+
+
+
+void group_event(char *name, uint32_t state, int owner);
+
+/**
+  Event handling function.  This only stays around as long as
+  events are on the queue.
+ */
 void *
-rg_event_thread(void *arg)
+_event_thread_f(void *arg)
 {
-	rgevent_t *ev;
+	event_t *ev;
+	int notice = 0, count = 0;
 
 	while (1) {
-		pthread_mutex_lock(&rg_queue_mutex);
-		ev = rg_ev_queue;
+		pthread_mutex_lock(&event_queue_mutex);
+		ev = event_queue;
 		if (ev)
-			list_remove(&rg_ev_queue, ev);
+			list_remove(&event_queue, ev);
 		else
 			break; /* We're outta here */
-		pthread_mutex_unlock(&rg_queue_mutex);
 
-		group_event(ev->rg_name, ev->rg_state, ev->rg_owner);
+		++count;
+		/* Event thread usually doesn't hang around.  When it's
+	   	   spawned, sleep for this many seconds in order to let
+	   	   some events queue up */
+		if ((count==1) && transition_throttling && !central_events)
+			sleep(transition_throttling);
+
+		pthread_mutex_unlock(&event_queue_mutex);
+
+		if (ev->ev_type == EVENT_CONFIG) {
+			/*
+			clulog(LOG_NOTICE, "Config Event: %d -> %d\n",
+			       ev->ev.config.cfg_oldversion,
+			       ev->ev.config.cfg_version);
+			 */
+			init_resource_groups(1);
+			free(ev);
+			continue;
+		}
+
+		if (central_events) {
+			/* If the master node died or there isn't
+			   one yet, take the master lock. */
+			if (event_master() == my_id()) {
+				slang_process_event(master_event_table,
+						    ev);
+			} 
+			free(ev);
+			continue;
+			/* ALL OF THE CODE BELOW IS DISABLED
+			   when using central_events */
+		}
+
+		if (ev->ev_type == EVENT_RG) {
+			/*
+			clulog(LOG_NOTICE, "RG Event: %s %s %d\n",
+			       ev->ev.group.rg_name,
+			       rg_state_str(ev->ev.group.rg_state),
+			       ev->ev.group.rg_owner);
+			 */
+			group_event(ev->ev.group.rg_name,
+				    ev->ev.group.rg_state,
+				    ev->ev.group.rg_owner);
+		} else if (ev->ev_type == EVENT_NODE) {
+			/*
+			clulog(LOG_NOTICE, "Node Event: %s %d %s %s\n",
+			       ev->ev.node.ne_local?"Local":"Remote",
+			       ev->ev.node.ne_nodeid,
+			       ev->ev.node.ne_state?"UP":"DOWN",
+			       ev->ev.node.ne_clean?"Clean":"Dirty")
+			 */
+
+			if (ev->ev.node.ne_state == 0 &&
+			    !ev->ev.node.ne_clean &&
+			    node_has_fencing(ev->ev.node.ne_nodeid)) {
+				notice = 0;
+				while (!node_fenced(ev->ev.node.ne_nodeid)) {
+					if (!notice) {
+						notice = 1;
+						clulog(LOG_INFO, "Waiting for "
+						       "node #%d to be fenced\n",
+						       ev->ev.node.ne_nodeid);
+					}
+					sleep(2);
+				}
+
+				if (notice)
+					clulog(LOG_INFO, "Node #%d fenced; "
+					       "continuing\n",
+					       ev->ev.node.ne_nodeid);
+			}
+
+			node_event(ev->ev.node.ne_local,
+				   ev->ev.node.ne_nodeid,
+				   ev->ev.node.ne_state,
+				   ev->ev.node.ne_clean);
+		}
 
 		free(ev);
 	}
 
+	if (!central_events || _master) {
+		clulog(LOG_DEBUG, "%d events processed\n", count);
+	}
 	/* Mutex held */
-	rg_ev_thread = 0;
-	pthread_mutex_unlock(&rg_queue_mutex);
+	event_thread = 0;
+	pthread_mutex_unlock(&event_queue_mutex);
 	pthread_exit(NULL);
 }
 
 
-void
-rg_event_q(char *name, uint32_t state, int owner)
+static void
+insert_event(event_t *ev)
 {
-	rgevent_t *ev;
 	pthread_attr_t attrs;
+	pthread_mutex_lock (&event_queue_mutex);
+	ev->ev_transaction = ++_xid;
+	list_insert(&event_queue, ev);
+	if (event_thread == 0) {
+        	pthread_attr_init(&attrs);
+        	pthread_attr_setinheritsched(&attrs, PTHREAD_INHERIT_SCHED);
+        	pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED);
+		pthread_attr_setstacksize(&attrs, 262144);
+
+		pthread_create(&event_thread, &attrs, _event_thread_f, NULL);
+        	pthread_attr_destroy(&attrs);
+	}
+	pthread_mutex_unlock (&event_queue_mutex);
+}
+
+
+static event_t *
+new_event(void)
+{
+	event_t *ev;
 
 	while (1) {
-		ev = malloc(sizeof(rgevent_t));
+		ev = malloc(sizeof(*ev));
 		if (ev) {
 			break;
 		}
 		sleep(1);
 	}
-
 	memset(ev,0,sizeof(*ev));
+	ev->ev_type = EVENT_NONE;
 
-	strncpy(ev->rg_name, name, 128);
-	ev->rg_state = state;
-	ev->rg_owner = owner;
-
-	pthread_mutex_lock (&rg_queue_mutex);
-	list_insert(&rg_ev_queue, ev);
-	if (rg_ev_thread == 0) {
-        	pthread_attr_init(&attrs);
-        	pthread_attr_setinheritsched(&attrs, PTHREAD_INHERIT_SCHED);
-        	pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED);
-		pthread_attr_setstacksize(&attrs, 262144);
+	return ev;
+}
 
-		pthread_create(&rg_ev_thread, &attrs, rg_event_thread, NULL);
-        	pthread_attr_destroy(&attrs);
-	}
-	pthread_mutex_unlock (&rg_queue_mutex);
+
+void
+rg_event_q(char *name, uint32_t state, int owner, int last)
+{
+	event_t *ev = new_event();
+
+	ev->ev_type = EVENT_RG;
+
+	strncpy(ev->ev.group.rg_name, name, 128);
+	ev->ev.group.rg_state = state;
+	ev->ev.group.rg_owner = owner;
+	ev->ev.group.rg_last_owner = last;
+
+	insert_event(ev);
+}
+
+
+void
+node_event_q(int local, int nodeID, int state, int clean)
+{
+	event_t *ev = new_event();
+
+	ev->ev_type = EVENT_NODE;
+	ev->ev.node.ne_state = state;
+	ev->ev.node.ne_local = local;
+	ev->ev.node.ne_nodeid = nodeID;
+	ev->ev.node.ne_clean = clean;
+	insert_event(ev);
 }
+
+
+void
+config_event_q(int old_version, int new_version)
+{
+	event_t *ev = new_event();
+
+	ev->ev_type = EVENT_CONFIG;
+	ev->ev.config.cfg_version = new_version;
+	ev->ev.config.cfg_oldversion = old_version;
+	insert_event(ev);
+}
+
+void
+user_event_q(char *svc, int request,
+	     int arg1, int arg2, int target, msgctx_t *ctx)
+{
+	event_t *ev = new_event();
+
+	ev->ev_type = EVENT_USER;
+	strncpy(ev->ev.user.u_name, svc, sizeof(ev->ev.user.u_name));
+	ev->ev.user.u_request = request;
+	ev->ev.user.u_arg1 = arg1;
+	ev->ev.user.u_arg2 = arg2;
+	ev->ev.user.u_target = target;
+	ev->ev.user.u_ctx = ctx;
+	insert_event(ev);
+}
+
--- cluster/rgmanager/src/daemons/rg_forward.c	2007/08/02 14:46:51	1.8.2.3
+++ cluster/rgmanager/src/daemons/rg_forward.c	2007/12/18 17:52:56	1.8.2.4
@@ -27,11 +27,21 @@
 #include <members.h>
 
 
+struct fw_message {
+	msgctx_t *ctx;
+	SmMessageSt msg;
+	int nodeid;
+};
+
+
 void
-build_message(SmMessageSt *msgp, int action, char *svcName, int target)
+build_message(SmMessageSt *msgp, int action, char *svcName, int target,
+	      int arg1, int arg2)
 {
 	msgp->sm_hdr.gh_magic = GENERIC_HDR_MAGIC;
 	msgp->sm_hdr.gh_command = RG_ACTION_REQUEST;
+	msgp->sm_hdr.gh_arg1 = arg1;
+	msgp->sm_hdr.gh_arg2 = arg2;
 	msgp->sm_hdr.gh_length = sizeof(*msgp);
 	msgp->sm_data.d_action = action;
 	strncpy(msgp->sm_data.d_svcName, svcName,
@@ -90,7 +100,8 @@
 	}
 
 	/* Construct message */
-	build_message(&msg, req->rr_request, req->rr_group, req->rr_target);
+	build_message(&msg, req->rr_request, req->rr_group, req->rr_target,
+		      req->rr_arg0, req->rr_arg1);
 
 	if (msg_open(MSG_CLUSTER, rgs.rs_owner, RG_PORT, ctx, 10) < 0) {
 		clulog(LOG_DEBUG, "FW: Failed to open channel to %d CTX: %p\n",
@@ -166,3 +177,121 @@
         pthread_attr_destroy(&attrs);
 }
 
+
+
+void *
+forwarding_thread_v2(void *arg)
+{
+	msgctx_t *ctx = NULL, *resp_ctx = NULL;
+	cluster_member_list_t *m = NULL;
+	SmMessageSt *msgp = NULL, msg;
+	int response_code = RG_EAGAIN, ret, target = -1;
+	int retries = 0;
+	struct fw_message *fwmsg = (struct fw_message *)arg;
+
+	msgp = &fwmsg->msg;
+	resp_ctx = fwmsg->ctx;
+	target = fwmsg->nodeid;
+
+	clulog(LOG_DEBUG, "FW: Forwarding SM request to %d\n",
+	       target);
+
+	ctx = msg_new_ctx();
+	if (ctx == NULL) {
+		clulog(LOG_DEBUG, "FW: Failed to allocate socket context: %s\n",
+		       strerror(errno));
+		goto out_fail;
+	}
+	if (msg_open(MSG_CLUSTER, target, RG_PORT, ctx, 10) < 0) {
+		clulog(LOG_DEBUG, "FW: Failed to open channel to %d CTX: %p\n",
+		       target, ctx);
+		goto out_fail;
+	}
+
+	/* swap + send */
+	swab_SmMessageSt(msgp);
+	if (msg_send(ctx, msgp, sizeof(*msgp)) < sizeof(*msgp)) {
+		clulog(LOG_DEBUG, "FW: Failed to send message to %d CTX: %p\n",
+		       target, ctx);
+		goto out_fail;
+	}
+
+
+        /*
+	 * Ok, we're forwarding a message to another node.  Keep tabs on
+	 * the node to make sure it doesn't die.  Basically, wake up every
+	 * now and again to make sure it's still online.  If it isn't, send
+	 * a response back to the caller.
+	 */
+	do {
+		ret = msg_receive(ctx, &msg, sizeof(msg), 10);
+		if (ret < (int)sizeof(msg)) {
+			if (ret < 0 && errno == ETIMEDOUT) {
+				m = member_list();
+				if (!memb_online(m, target)) {
+					response_code = RG_ENODE;
+					goto out_fail;
+				}
+				free_member_list(m);
+				m = NULL;
+				continue;
+			}
+
+			if (ret == 0)
+				continue;
+		}
+		break;
+	} while(++retries < 60); /* old 600 second rule */
+
+	swab_SmMessageSt(&msg);
+
+	response_code = msg.sm_data.d_ret;
+	target = msg.sm_data.d_svcOwner;
+
+out_fail:
+	free(fwmsg); 
+
+	if (resp_ctx) {
+		send_ret(resp_ctx, msgp->sm_data.d_svcName, response_code,
+			 msgp->sm_data.d_action, target);
+		msg_close(resp_ctx);
+		msg_free_ctx(resp_ctx);
+	}
+
+	if (ctx) {
+		msg_close(ctx);
+		msg_free_ctx(ctx);
+	}
+	if (m)
+		free_member_list(m);
+
+	pthread_exit(NULL);
+}
+
+
+void
+forward_message(msgctx_t *ctx, void *msgp, int nodeid)
+{
+	pthread_t newthread;
+	pthread_attr_t attrs;
+	struct fw_message *fwmsg;
+
+	fwmsg = malloc(sizeof(struct fw_message));
+	if (!fwmsg) {
+		msg_close(ctx);
+		msg_free_ctx(ctx);
+		return;
+	}
+
+	memcpy(&fwmsg->msg, msgp, sizeof(fwmsg->msg));
+	fwmsg->ctx = ctx;
+	fwmsg->nodeid = nodeid;
+
+        pthread_attr_init(&attrs);
+        pthread_attr_setinheritsched(&attrs, PTHREAD_INHERIT_SCHED);
+        pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED);
+	pthread_attr_setstacksize(&attrs, 262144);
+
+	pthread_create(&newthread, &attrs, forwarding_thread_v2, fwmsg);
+        pthread_attr_destroy(&attrs);
+}
--- cluster/rgmanager/src/daemons/rg_state.c	2007/11/26 21:46:27	1.24.2.14
+++ cluster/rgmanager/src/daemons/rg_state.c	2007/12/18 17:52:56	1.24.2.15
@@ -36,6 +36,7 @@
 #include <rg_queue.h>
 #include <msgsimple.h>
 #include <res-ocf.h>
+#include <event.h>
 
 /* XXX - copied :( */
 #define cn_svccount cn_address.cna_address[0] /* Theses are uint8_t size */
@@ -86,8 +87,24 @@
 }
 
 
+char *
+c_name(char *svcName)
+{
+	char *ptr, *ret = svcName;
+
+	ptr = strchr(svcName,':');
+	if (!ptr)
+		return ret;
+	if ((int)(ptr - svcName) == 7 &&
+	    !memcmp(svcName, "service", 7)) /* strlen("service") */
+		ret = ptr + 1;
+
+	return ret;
+}
+
+
 void
-broadcast_event(char *svcName, uint32_t state)
+broadcast_event(char *svcName, uint32_t state, int owner, int last)
 {
 	SmMessageSt msgp;
 	msgctx_t everyone;
@@ -95,10 +112,12 @@
 	msgp.sm_hdr.gh_magic = GENERIC_HDR_MAGIC;
 	msgp.sm_hdr.gh_command = RG_EVENT;
 	msgp.sm_hdr.gh_length = sizeof(msgp);
+	msgp.sm_hdr.gh_arg1 = owner; 
+	msgp.sm_hdr.gh_arg2 = last; 
 	msgp.sm_data.d_action = state;
 	strncpy(msgp.sm_data.d_svcName, svcName,
 		sizeof(msgp.sm_data.d_svcName));
-	msgp.sm_data.d_svcOwner = 0;
+	msgp.sm_data.d_svcOwner = owner;
 	msgp.sm_data.d_ret = 0;
 
 	swab_SmMessageSt(&msgp);
@@ -201,7 +220,7 @@
 
 
 void
-send_ret(msgctx_t *ctx, char *name, int ret, int orig_request)
+send_ret(msgctx_t *ctx, char *name, int ret, int orig_request, int new_owner)
 {
 	SmMessageSt msg, *msgp = &msg;
 	if (!ctx)
@@ -213,7 +232,9 @@
 	msgp->sm_data.d_action = orig_request;
 	strncpy(msgp->sm_data.d_svcName, name,
 		sizeof(msgp->sm_data.d_svcName));
-	msgp->sm_data.d_svcOwner = my_id(); /* XXX Broken */
+	if (!new_owner)
+		new_owner = my_id();
+	msgp->sm_data.d_svcOwner = new_owner; /* XXX Broken */
 	msgp->sm_data.d_ret = ret;
 
 	swab_SmMessageSt(msgp);
@@ -343,6 +364,7 @@
 	return 0;
 #else
 	membership = member_list();
+
 	ret = vf_read(membership, res, &viewno, &data, &datalen);
 
 	if (ret != VFR_OK || datalen == 0) {
@@ -652,7 +674,7 @@
 		/*
 		 * Starting failed service...
 		 */
-		if (req == RG_START_RECOVER) {
+		if (req == RG_START_RECOVER || central_events_enabled()) {
 			clulog(LOG_NOTICE,
 			       "Recovering failed service %s\n",
 			       svcName);
@@ -684,7 +706,7 @@
 	
 	case RG_STATE_DISABLED:
 	case RG_STATE_UNINITIALIZED:
-		if (req == RG_ENABLE) {
+		if (req == RG_ENABLE || req == RG_START_REMOTE) {
 			/* Don't actually enable if the RG is locked! */
 			if (rg_locked()) {
 				ret = 3;
@@ -808,7 +830,8 @@
 		       "Service %s started\n",
 		       svcName);
 
-		broadcast_event(svcName, RG_STATE_STARTED);
+		broadcast_event(svcName, RG_STATE_STARTED, svcStatus.rs_owner,
+				svcStatus.rs_last_owner);
 	} else {
 		clulog(LOG_WARNING,
 		       "#68: Failed to start %s; return value: %d\n",
@@ -1264,8 +1287,8 @@
 
 	clulog(LOG_NOTICE, "Stopping service %s\n", svcName);
 
-	if (recover)
-		svcStatus.rs_state = RG_STATE_ERROR;
+	if (recover) 
+	       	svcStatus.rs_state = RG_STATE_ERROR;
 	else
 		svcStatus.rs_state = RG_STATE_STOPPING;
 	svcStatus.rs_transition = (uint64_t)time(NULL);
@@ -1347,7 +1370,7 @@
 	}
 	rg_unlock(&lockp);
 
-	broadcast_event(svcName, newstate);
+	broadcast_event(svcName, newstate, -1, svcStatus.rs_last_owner);
 
 	return 0;
 }
@@ -1428,7 +1451,8 @@
 	}
 	rg_unlock(&lockp);
 
-	broadcast_event(svcName, RG_STATE_FAILED);
+	broadcast_event(svcName, RG_STATE_FAILED, -1,
+			svcStatus.rs_last_owner);
 
 	return 0;
 }
@@ -1437,8 +1461,8 @@
 /*
  * Send a message to the target node to start the service.
  */
-static int
-relocate_service(char *svcName, int request, uint32_t target)
+int
+svc_start_remote(char *svcName, int request, uint32_t target)
 {
 	SmMessageSt msg_relo;
 	int msg_ret;
@@ -1448,6 +1472,8 @@
 	/* Build the message header */
 	msg_relo.sm_hdr.gh_magic = GENERIC_HDR_MAGIC;
 	msg_relo.sm_hdr.gh_command = RG_ACTION_REQUEST;
+	/* XXX XXX */
+	msg_relo.sm_hdr.gh_arg1 = RG_ACTION_MASTER;
 	msg_relo.sm_hdr.gh_length = sizeof (SmMessageSt);
 	msg_relo.sm_data.d_action = request;
 	strncpy(msg_relo.sm_data.d_svcName, svcName,
@@ -1470,13 +1496,13 @@
 	if (msg_send(&ctx, &msg_relo, sizeof (SmMessageSt)) < 
 	    sizeof (SmMessageSt)) {
 		clulog(LOG_ERR,
-		       "#59: Error sending relocate request to member #%d\n",
+		       "#59: Error sending remote-start request to member #%d\n",
 		       target);
 		msg_close(&ctx);
 		return -1;
 	}
 
-	clulog(LOG_DEBUG, "Sent relocate request to %d\n", (int)target);
+	clulog(LOG_DEBUG, "Sent remote-start request to %d\n", (int)target);
 
 	/* Check the response */
 	do {
@@ -1649,7 +1675,7 @@
 		 	 * It's legal to start the service on the given
 		 	 * node.  Try to do so.
 		 	 */
-			if (relocate_service(svcName, request, target) == 0) {
+			if (svc_start_remote(svcName, request, target) == 0) {
 				*new_owner = target;
 				/*
 				 * Great! We're done...
@@ -1679,7 +1705,7 @@
 		if (target == me)
 			goto exhausted;
 
-		ret = relocate_service(svcName, request, target);
+		ret = svc_start_remote(svcName, request, target);
 		switch (ret) {
 		case RG_ERUN:
 			/* Someone stole the service while we were 
@@ -1932,7 +1958,7 @@
 		if (check_exclusive_resources(membership, svcName) != 0) {
 			free_member_list(membership);
 			pthread_mutex_unlock(&exclusive_mutex);
-			return RG_EFAIL;
+			return RG_EEXCL;
 		}
 	}
 	free_member_list(membership);
@@ -2013,7 +2039,7 @@
 			ret = RG_EFAIL;
 			goto out;
 		} else {
-			ret = relocate_service(svcName, RG_START_REMOTE, target);
+			ret = svc_start_remote(svcName, RG_START_REMOTE, target);
 		}
 
 		switch(ret) {
@@ -2033,7 +2059,7 @@
 		default:
 			clulog(LOG_ERR,
 			       "#6X: Invalid reply [%d] from member %d during"
-			       " relocate operation!\n", ret, target);
+			       " operation!\n", ret, target);
 		}
 	}
 
--- cluster/rgmanager/src/daemons/rg_thread.c	2007/07/24 13:58:47	1.15.2.9
+++ cluster/rgmanager/src/daemons/rg_thread.c	2007/12/18 17:52:56	1.15.2.10
@@ -16,12 +16,12 @@
   Free Software Foundation, Inc.,  675 Mass Ave, Cambridge, 
   MA 02139, USA.
 */
+#include <message.h>
 #include <resgroup.h>
 #include <rg_locks.h>
 #include <gettid.h>
 #include <rg_queue.h>
 #include <assert.h>
-#include <message.h>
 
 /**
  * Resource thread list entry.
@@ -54,6 +54,7 @@
 int rt_enqueue_request(const char *resgroupname, int request,
 		       msgctx_t *response_ctx, int max, uint32_t target,
 		       int arg0, int arg1);
+int central_events_enabled(void);
 
 
 /**
@@ -446,6 +447,11 @@
 
 			error = svc_stop(myname, RG_STOP_RECOVER);
 			if (error == 0) {
+				/* Stop generates an event - whatever the
+				   result.  If central events are enabled
+				   don't bother trying to recover */
+				if (central_events_enabled())
+					break;
 				error = handle_recover_req(myname, &newowner);
 				if (error == 0)
 					ret = RG_SUCCESS;
@@ -678,7 +684,7 @@
 	} else {
 		if (max) {
 			list_do(resgroup->rt_queue, curr) {
-				if (curr->rr_request == request)
+				if ((int)curr->rr_request == request)
 					count++;
 			} while (!list_done(resgroup->rt_queue, curr));
 	
@@ -701,7 +707,7 @@
 		case RG_START:
 		case RG_ENABLE:
 			send_ret(response_ctx, resgroup->rt_name, RG_EDEADLCK,
-				 request);
+				 request, 0);
 			msg_close(response_ctx);
 			msg_free_ctx(response_ctx);
 			break;
--- cluster/rgmanager/src/daemons/test.c	2007/11/26 21:46:27	1.6.2.6
+++ cluster/rgmanager/src/daemons/test.c	2007/12/18 17:52:56	1.6.2.7
@@ -28,6 +28,7 @@
 #include <restart_counter.h>
 #include <reslist.h>
 #include <pthread.h>
+#include <event.h>
 
 #ifndef NO_CCS
 #error "Can not be built with CCS support."
@@ -131,6 +132,7 @@
 	resource_t *reslist = NULL, *curres;
 	resource_node_t *tree = NULL, *tmp, *rn = NULL;
 	int ccsfd, ret = 0, rules = 0;
+	event_table_t *events = NULL;
 
 	fprintf(stderr,"Running in test mode.\n");
 
@@ -143,6 +145,7 @@
 
 	load_resource_rules(agentpath, &rulelist);
 	construct_domains(ccsfd, &domains);
+	construct_events(ccsfd, &events);
 	load_resources(ccsfd, &reslist, &rulelist);
 	build_resource_tree(ccsfd, &tree, &rulelist, &reslist);
 
@@ -177,6 +180,11 @@
 			printf("=== Failover Domains ===\n");
 			print_domains(&domains);
 		}
+
+		if (events) {
+			printf("=== Event Triggers ===\n");
+			print_events(events);
+		}
 	}
 
 	ccs_unlock(ccsfd);
@@ -247,6 +255,7 @@
 	}
 
 out:
+	deconstruct_events(&events);
 	deconstruct_domains(&domains);
 	destroy_resource_tree(&tree);
 	destroy_resources(&reslist);
--- cluster/rgmanager/src/resources/default_event_script.sl	2007/12/04 21:59:54	1.1.2.1
+++ cluster/rgmanager/src/resources/default_event_script.sl	2007/12/18 17:52:56	1.1.2.2
@@ -192,7 +192,8 @@
 		}
 
 		(owner, state) = service_status(services[x]);
-		if ((service_state == "started") and (owner < 0)) {
+		if ((service_state == "started") and (owner < 0) and
+		    (state == "stopped")) {
 			info("Dependency met; starting ", services[x]);
 			nodes = allowed_nodes(services[x]);
 			()=move_or_start(services[x], nodes);
@@ -245,6 +246,10 @@
 
 		if (user_target > 0) {
 			for (x = 0; x < length(nodes); x++) {
+				%
+				% Put the preferred node at the front of the 
+				% list for a user-relocate operation
+				%
 				if (nodes[x] == user_target) {
 					reordered = union(user_target, nodes);
 					nodes = reordered;
@@ -262,6 +267,13 @@
 			if (service_stop(service_name) < 0) {
 				return ERR_ABORT;
 			}
+
+			%
+			% The current owner shouldn't be the default
+			% for a relocate operation
+			%
+			reordered = subtract(nodes, owner);
+			nodes = union(reordered, owner);
 		}
 
 		ret = move_or_start(service_name, nodes);
@@ -275,7 +287,10 @@
 		ret = service_stop(service_name);
 
 	} 
+
+	%
 	% todo - migrate
+	%
 
 	return ret;
 }
--- cluster/rgmanager/src/resources/service.sh	2007/11/30 19:44:34	1.7.2.8
+++ cluster/rgmanager/src/resources/service.sh	2007/12/18 17:52:56	1.7.2.9
@@ -222,6 +222,7 @@
 #
 case $1 in
 	start)
+		[ -d "/var/run/cluster/rgmanager" ] && touch "/var/run/cluster/rgmanager/$OCF_RESOURCE_INSTANCE"
 		#
 		# XXX If this is set, we kill lockd.  If there is no
 		# child IP address, then clients will NOT get the reclaim
@@ -236,6 +237,7 @@
 		exit 0
 		;;
 	stop)
+		[ -d "/var/run/cluster/rgmanager" ] && rm -f "/var/run/cluster/rgmanager/$OCF_RESOURCE_INSTANCE"
 		exit 0
 		;;
 	recover|restart)
--- cluster/rgmanager/src/utils/clustat.c	2007/12/10 18:24:12	1.25.2.9
+++ cluster/rgmanager/src/utils/clustat.c	2007/12/18 17:52:56	1.25.2.10
@@ -472,7 +472,8 @@
 	if (rs->rs_state == RG_STATE_STOPPED ||
 	    rs->rs_state == RG_STATE_DISABLED ||
 	    rs->rs_state == RG_STATE_ERROR ||
-	    rs->rs_state == RG_STATE_FAILED) {
+	    rs->rs_state == RG_STATE_FAILED ||
+	    rs->rs_state == RG_STATE_UNINITIALIZED) {
 
 		snprintf(owner, sizeof(owner)-1, "(%-.*s)", nodesize-2,
 			 my_memb_id_to_name(members, rs->rs_last_owner));




More information about the Cluster-devel mailing list