[Cluster-devel] cluster/rgmanager ChangeLog TODO include/res-o ...
lhh at sourceware.org
lhh at sourceware.org
Tue Dec 18 17:53:00 UTC 2007
CVSROOT: /cvs/cluster
Module name: cluster
Branch: RHEL5
Changes by: lhh at sourceware.org 2007-12-18 17:52:56
Modified files:
rgmanager : ChangeLog TODO
rgmanager/include: res-ocf.h resgroup.h reslist.h
restart_counter.h rg_locks.h rg_queue.h
rgmanager/src/clulib: Makefile members.c rg_strings.c vft.c
rgmanager/src/daemons: Makefile fo_domain.c groups.c main.c
resrules.c rg_event.c rg_forward.c
rg_state.c rg_thread.c test.c
rgmanager/src/resources: default_event_script.sl service.sh
rgmanager/src/utils: clustat.c
Added files:
rgmanager : event-script.txt
rgmanager/include: ds.h event.h sets.h
rgmanager/src/clulib: sets.c
rgmanager/src/daemons: event_config.c service_op.c slang_event.c
Log message:
Merge RIND 0.8.1 + bugfixes from HEAD to RHEL5 branch
Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/event-script.txt.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=NONE&r2=1.1.2.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/ChangeLog.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.31.2.31&r2=1.31.2.32
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/TODO.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.8&r2=1.8.2.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/ds.h.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=NONE&r2=1.1.2.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/event.h.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=NONE&r2=1.1.2.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/sets.h.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=NONE&r2=1.1.2.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/res-ocf.h.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.1.16.2&r2=1.1.16.3
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/resgroup.h.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.15.2.8&r2=1.15.2.9
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/reslist.h.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.15.2.7&r2=1.15.2.8
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/restart_counter.h.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.1.2.1&r2=1.1.2.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/rg_locks.h.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.2.2.1&r2=1.2.2.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/rg_queue.h.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.6&r2=1.6.2.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/clulib/sets.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=NONE&r2=1.1.2.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/clulib/Makefile.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.10.2.3&r2=1.10.2.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/clulib/members.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.4&r2=1.4.2.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/clulib/rg_strings.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.5.2.5&r2=1.5.2.6
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/clulib/vft.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.17.2.4&r2=1.17.2.5
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/event_config.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=NONE&r2=1.1.2.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/service_op.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=NONE&r2=1.2.2.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/slang_event.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=NONE&r2=1.3.2.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/Makefile.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.14.2.4&r2=1.14.2.5
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/fo_domain.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.11.2.1&r2=1.11.2.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/groups.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.25.2.13&r2=1.25.2.14
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/main.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.34.2.10&r2=1.34.2.11
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/resrules.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.16.2.8&r2=1.16.2.9
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/rg_event.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.1.2.1&r2=1.1.2.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/rg_forward.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.8.2.3&r2=1.8.2.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/rg_state.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.24.2.14&r2=1.24.2.15
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/rg_thread.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.15.2.9&r2=1.15.2.10
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/test.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.6.2.6&r2=1.6.2.7
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/default_event_script.sl.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.1.2.1&r2=1.1.2.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/service.sh.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.7.2.8&r2=1.7.2.9
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/utils/clustat.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.25.2.9&r2=1.25.2.10
/cvs/cluster/cluster/rgmanager/event-script.txt,v --> standard output
revision 1.1.2.1
--- cluster/rgmanager/event-script.txt
+++ - 2007-12-18 17:52:56.910998000 +0000
@@ -0,0 +1,306 @@
+TODO:
+* Return correct error codes to clusvcadm (currently it always returns
+ "Unknown")
+* Write glue for 'migrate' operations and migrate-enabled services
+
+Basic configuration specification:
+
+ <rm>
+ <events>
+ <event class="node"/> <!-- all node events -->
+ <event class="node"
+ node="bar"/> <!-- events concerning 'bar' -->
+ <event class="node"
+ node="foo"
+ node_state="up"/> <!-- 'up' events for 'foo' -->
+ <event class="node"
+ node_id="3"
+ node_state="down"/> <!-- 'down' events for node ID 3 -->
+
+ (note, all service ops and such deal with node ID, not
+ with node names)
+
+ <event class="service"/> <!-- all service events-->
+ <event class="service"
+ service_name="A"/> <!-- events concerning 'A' -->
+ <event class="service"
+ service_name="B"
+ service_state="started"/> <!-- when 'B' is started... -->
+ <event class="service"
+ service_name="B"
+ service_state="started"/>
+ service_owner="3"/> <!-- when 'B' is started on node 3... -->
+
+ <event class="service"
+ priority="1"
+ service_state="started"/>
+ service_owner="3"/> <!-- when 'B' is started on node 3, do this
+ before the other event handlers ... -->
+
+
+ </events>
+ ...
+ </rm>
+
+General globals available from all scripts:
+
+ node_self - local node ID
+ event_type - event class, either:
+ EVENT_NONE - unspecified / unknown
+ EVENT_NODE - node transition
+ EVENT_SERVICE - service transition
+ EVENT_USER - a user-generated request
+ EVENT_CONFIG - [NOT CONFIGURABLE]
+
+Node event globals (i.e. when event_type == EVENT_NODE):
+
+ node_id - node ID which is transitioning
+ node_name - name of node which is transitioning
+ node_state - new node state (NODE_ONLINE or NODE_OFFLINE, or if you prefer,
+ 1 or 0, respectively)
+ node_clean - 0 if the node has not been fenced, 1 if the node has been
+ fenced
+
+Service event globals (i.e. when event_type == EVENT_SERVICE):
+
+ service_name - Name of service which transitioned
+ service_state - new state of service
+ service_owner - new owner of service (or <0 if service is no longer
+ running)
+ service_last_owner - Last owner of service if known. Used for when
+ service_state = "recovering" generally, in order to
+ apply restart/relocate/disable policy.
+
+User event globals (i.e. when event_type == EVENT_USER):
+
+ service_name - service to perform request upon
+ user_request - request to perform (USER_ENABLE, USER_DISABLE,
+ USER_STOP, USER_RELOCATE, [TODO] USER_MIGRATE)
+ user_target - target node ID if applicable
+
+
+Scripting functions - Informational:
+
+ node_list = nodes_online();
+
+ Returns a list of all online nodes.
+
+ service_list = service_list();
+
+ Returns a list of all configured services.
+
+ (restarts, last_owner, owner, state) = service_status(service_name);
+
+ Returns the state, owner, last_owner, and restarts. Note that
+ all return values are optional, but are right-justified per S-Lang
+ specification. This means if you only want the 'state', you can use:
+
+ (state) = service_status(service_name);
+
+ However, if you need the restart count, you must provide all four
+ return values as above.
+
+ (nofailback, restricted, ordered, node_list) =
+ service_domain_info(service_name);
+
+ Returns the failover domain specification, if it exists, for the
+ specified service name. The node list returned is an ordered list
+ according to priority levels. In the case of unordered domains,
+ the ordering of the returned list is pseudo-random.
+
+Scripting functions - Operational:
+
+ err = service_start(service_name, node_list, [avoid_list]);
+
+ Start a non-running, (but runnable, i.e. not failed)
+ service on the first node in node_list. Failing that, start it on
+ the second node in node_list and so forth. One may also specify
+ an avoid list, but it's better to just use the subtract() function
+ below. If the start is successful, the node ID running the service
+ is returned. If the start is unsuccessful, a value < 0 is returned.
+
+ err = service_stop(service_name, [0 = stop, 1 = disable]);
+
+ Stop a running service. The second parameter is optional, and if
+ non-zero is specified, the service will enter the disabled state.
+
+ ... stuff that's not done but needs to be:
+
+ err = service_relocate(service_name, node_list);
+
+ Move a running service to the specified node_list in order of
+ preference. In the case of VMs, this is actually a migrate-or-
+ relocate operation.
+
+Utility functions - Node list manipulation
+
+ node_list = union(left_node_list, right_node_list);
+
+ Calculates the union between the two node list, removing duplicates
+ and preserving ordering according to left_node_list. Any added
+ values from right_node_list will appear in their order, but
+ after left_node_list in the returned list.
+
+ node_list = intersection(left_node_list, right_node_list);
+
+ Calculates the intersection (items in both lists) between the two
+ node lists, removing duplicates and preserving ordering according
+ to left_node_list. Any added values from right_node_list will
+ appear in their order, but after left_node_list in the returned list.
+
+ node_list = delta(left_node_list, right_node_list);
+
+ Calculates the delta (items not in both lists) between the two
+ node lists, removing duplicates and preserving ordering according
+ to left_node_list. Any added values from right_node_list will
+ appear in their order, but after left_node_list in the returned list.
+
+ node_list = subtract(left_node_list, right_node_list);
+
+ Removes any duplicates as well as items specified in right_node_list
+ from left_node_list. Example:
+
+ all_nodes = nodes_online();
+ allowed_nodes = subtract(nodes_online, node_to_avoid);
+
+Utility functions - Logging:
+
+ debug(item1, item2, ...); LOG_DEBUG level
+ info(...); LOG_INFO level
+ notice(...); LOG_NOTICE level
+ warning(...); LOG_WARNING level
+ err(...); LOG_ERR level
+ crit(...); LOG_CRIT level
+ alert(...); LOG_ALERT level
+ emerg(...); LOG_EMERG level
+
+ items - These can be strings, integer lists, or integers. Logging
+ string lists is not supported.
+
+ level - the level is consistent with syslog(8)
+
+ stop_processing();
+
+ Calling this function will prevent further event scripts from being
+ executed on a particular event. Call this script if, for example,
+ you do not wish for the default event handler to process the event.
+
+ Note: This does NOT terminate the caller script; that is, the
+ script being executed will run to completion.
+
+Event scripts are written in a language called S-Lang; documentation specifics
+about the language are available at http://www.s-lang.org
+
+Example script (creating a follows-but-avoid-after-start behavior):
+%
+% If the main queue server and replication queue server are on the same
+% node, relocate the replication server somewhere else if possible.
+%
+define my_sap_event_trigger()
+{
+ variable state, owner_rep, owner_main;
+ variable nodes, allowed;
+
+ %
+ % If this was a service event, don't execute the default event
+ % script trigger after this script completes.
+ %
+ if (event_type == EVENT_SERVICE) {
+ stop_processing();
+ }
+
+ (owner_main, state) = service_status("service:main_queue");
+ (owner_rep, state) = service_status("service:replication_server");
+
+ if ((event_type == EVENT_NODE) and (owner_main == node_id) and
+ (node_state == NODE_OFFLINE) and (owner_rep >= 0)) {
+ %
+ % uh oh, the owner of the main server died. Restart it
+ % on the node running the replication server
+ %
+ notice("Starting Main Queue Server on node ", owner_rep);
+ ()=service_start("service:main_queue", owner_rep);
+ return;
+ }
+
+ %
+ % S-Lang doesn't short-circuit prior to 2.1.0
+ %
+ if ((owner_main >= 0) and
+ ((owner_main == owner_rep) or (owner_rep < 0))) {
+
+ %
+ % Get all online nodes
+ %
+ nodes = nodes_online();
+
+ %
+ % Drop out the owner of the main server
+ %
+ allowed = subtract(nodes, owner_main);
+ if ((owner_rep >= 0) and (length(allowed) == 0)) {
+ %
+ % Only one node is online and the rep server is
+ % already running. Don't do anything else.
+ %
+ return;
+ }
+
+ if ((length(allowed) == 0) and (owner_rep < 0)) {
+ %
+ % Only node online is the owner ... go ahead
+ % and start it, even though it doesn't increase
+ % availability to do so.
+ %
+ allowed = owner_main;
+ }
+
+ %
+ % Move the replication server off the node that is
+ % running the main server if a node's available.
+ %
+ if (owner_rep >= 0) {
+ ()=service_stop("service:replication_server");
+ }
+ ()=service_start("service:replication_server", allowed);
+ }
+
+ return;
+}
+
+my_sap_event_trigger();
+
+
+Relevant <rm> section from cluster.conf:
+
+ <rm central_processing="1">
+ <events>
+ <event name="main-start" class="service"
+ service="service:main_queue"
+ service_state="started"
+ file="/tmp/sap.sl"/>
+ <event name="rep-start" class="service"
+ service="service:replication_server"
+ service_state="started"
+ file="/tmp/sap.sl"/>
+ <event name="node-up" node_state="up"
+ class="node"
+ file="/tmp/sap.sl"/>
+
+ </events>
+ <failoverdomains>
+ <failoverdomain name="all" ordered="1" restricted="1">
+ <failoverdomainnode name="molly"
+priority="2"/>
+ <failoverdomainnode name="frederick"
+priority="1"/>
+ </failoverdomain>
+ </failoverdomains>
+ <resources/>
+ <service name="main_queue"/>
+ <service name="replication_server" autostart="0"/>
+ <!-- replication server is started when main-server start
+ event completes -->
+ </rm>
+
+
--- cluster/rgmanager/ChangeLog 2007/11/30 19:47:36 1.31.2.31
+++ cluster/rgmanager/ChangeLog 2007/12/18 17:52:55 1.31.2.32
@@ -1,3 +1,8 @@
+2007-12-18 Lon Hohberger <lhh at redhat.com>
+ * Merge RIND 0.8.1 + bugfixes from head branch to RHEL5
+ branch. Must be explicitly enabled by administrators in
+ cluster.conf. Migration (clusvcadm -M) not supported yet.
+
2007-11-30 Lon Hohberger <lhh at redhat.com>
* src/resources/clusterfs.sh: Retry mount up to 3 times to avoid
race condition during another process mounting a GFS volume
--- cluster/rgmanager/TODO 2006/07/19 18:43:32 1.8
+++ cluster/rgmanager/TODO 2007/12/18 17:52:55 1.8.2.1
@@ -1,5 +0,0 @@
-* Make live-migration of resources work; preferrably so that admins
-can manually migrate Xen VMs to other nodes without telling the cluster
-about it. That is, the cluster should be able to acquire running VMs
-and update its state accordingly.
-* Test against a working Xen build and shake out bugs
/cvs/cluster/cluster/rgmanager/include/ds.h,v --> standard output
revision 1.1.2.1
--- cluster/rgmanager/include/ds.h
+++ - 2007-12-18 17:52:57.216534000 +0000
@@ -0,0 +1,13 @@
+#ifndef _DS_H
+#define _DS_H
+
+int ds_init(void);
+int ds_key_init(char *keyid, int maxsize, int timeout);
+int ds_key_finish(char *keyid);
+int ds_write(char *keyid, void *buf, size_t maxlen);
+int ds_read(char *keyid, void *buf, size_t maxlen);
+int ds_finish(void);
+
+#define DS_MIN_SIZE 512
+
+#endif
/cvs/cluster/cluster/rgmanager/include/event.h,v --> standard output
revision 1.1.2.1
--- cluster/rgmanager/include/event.h
+++ - 2007-12-18 17:52:57.295789000 +0000
@@ -0,0 +1,145 @@
+/*
+ Copyright Red Hat, Inc. 2007
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License version 2 as published
+ by the Free Software Foundation.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; see the file COPYING. If not, write to the
+ Free Software Foundation, Inc., 675 Mass Ave, Cambridge,
+ MA 02139, USA.
+*/
+#ifndef _EVENT_H
+#define _EVENT_H
+
+/* 128 is a bit big, but it should be okay */
+typedef struct __rge_q {
+ char rg_name[128];
+ uint32_t rg_state;
+ uint32_t pad1;
+ int rg_owner;
+ int rg_last_owner;
+} group_event_t;
+
+typedef struct __ne_q {
+ int ne_local;
+ int ne_nodeid;
+ int ne_state;
+ int ne_clean;
+} node_event_t;
+
+typedef struct __cfg_q {
+ int cfg_version;
+ int cfg_oldversion;
+} config_event_t;
+
+typedef struct __user_q {
+ char u_name[128];
+ msgctx_t *u_ctx;
+ int u_request;
+ int u_arg1;
+ int u_arg2;
+ int u_target; /* Node ID */
+} user_event_t;
+
+typedef enum {
+ EVENT_NONE=0,
+ EVENT_CONFIG,
+ EVENT_NODE,
+ EVENT_RG,
+ EVENT_USER
+} event_type_t;
+
+/* Data that's distributed which indicates which
+ node is the event master */
+typedef struct __rgm {
+ uint32_t m_magic;
+ uint32_t m_nodeid;
+ uint64_t m_master_time;
+ uint8_t m_reserved[112];
+} event_master_t;
+
+#define swab_event_master_t(ptr) \
+{\
+ swab32((ptr)->m_nodeid);\
+ swab32((ptr)->m_magic);\
+ swab64((ptr)->m_master_time);\
+}
+
+/* Just a magic # to help us ensure we've got good
+ date from VF */
+#define EVENT_MASTER_MAGIC 0xfabab0de
+
+/* Event structure - internal to the event subsystem; use
+ the queueing functions below which allocate this struct
+ and pass it to the event handler */
+typedef struct _event {
+ /* Not used dynamically - part of config info */
+ list_head();
+ char *ev_name;
+ char *ev_script;
+ char *ev_script_file;
+ int ev_prio;
+ int ev_pad;
+ /* --- end config part */
+ int ev_type; /* config & generated by rgmanager*/
+ int ev_transaction;
+ union {
+ group_event_t group;
+ node_event_t node;
+ config_event_t config;
+ user_event_t user;
+ } ev;
+} event_t;
+
+#define EVENT_PRIO_COUNT 100
+
+typedef struct _event_table {
+ int max_prio;
+ int pad;
+ event_t *entries[0];
+} event_table_t;
+
+
+int construct_events(int ccsfd, event_table_t **);
+void deconstruct_events(event_table_t **);
+void print_events(event_table_t *);
+
+/* Does the event match a configured event? */
+int event_match(event_t *pattern, event_t *actual);
+
+/* Event queueing functions. */
+void node_event_q(int local, int nodeID, int state, int clean);
+void rg_event_q(char *name, uint32_t state, int owner, int last);
+void user_event_q(char *svc, int request, int arg1, int arg2,
+ int target, msgctx_t *ctx);
+void config_event_q(int old_version, int new_version);
+
+/* Call this to see if there's a master. */
+int event_master_info_cached(event_master_t *);
+
+/* Call this to get the node ID of the current
+ master *or* become the master if none exists */
+int event_master(void);
+
+/* Setup */
+int central_events_enabled(void);
+void set_central_events(int flag);
+int slang_process_event(event_table_t *event_table, event_t *ev);
+
+/* For distributed events. */
+void set_transition_throttling(int nsecs);
+
+/* Simplified service start. */
+int service_op_start(char *svcName, int *target_list, int target_list_len,
+ int *new_owner);
+int service_op_stop(char *svcName, int do_disable, int event_type);
+
+
+#endif
/cvs/cluster/cluster/rgmanager/include/sets.h,v --> standard output
revision 1.1.2.1
--- cluster/rgmanager/include/sets.h
+++ - 2007-12-18 17:52:57.377956000 +0000
@@ -0,0 +1,39 @@
+/*
+ Copyright Red Hat, Inc. 2007
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License version 2 as published
+ by the Free Software Foundation.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; see the file COPYING. If not, write to the
+ Free Software Foundation, Inc., 675 Mass Ave, Cambridge,
+ MA 02139, USA.
+*/
+/**
+ @file sets.h - Header file for sets.c
+ @author Lon Hohberger <lhh at redhat.com>
+ */
+#ifndef _SETS_H
+#define _SETS_H
+
+/* #include <stdint.h> */
+typedef int set_type_t;
+
+int s_add(set_type_t *, int *, set_type_t);
+int s_union(set_type_t *, int, set_type_t *,
+ int, set_type_t **, int *);
+
+int s_intersection(set_type_t *, int, set_type_t *,
+ int, set_type_t **, int *);
+int s_delta(set_type_t *, int, set_type_t *,
+ int, set_type_t **, int *);
+int s_subtract(set_type_t *, int, set_type_t *, int, set_type_t **, int *);
+int s_shuffle(set_type_t *, int);
+
+#endif
--- cluster/rgmanager/include/res-ocf.h 2007/07/31 17:54:54 1.1.16.2
+++ cluster/rgmanager/include/res-ocf.h 2007/12/18 17:52:56 1.1.16.3
@@ -31,6 +31,7 @@
#define OCF_RESOURCE_INSTANCE_STR "OCF_RESOURCE_INSTANCE"
#define OCF_CHECK_LEVEL_STR "OCF_CHECK_LEVEL"
#define OCF_RESOURCE_TYPE_STR "OCF_RESOURCE_TYPE"
+#define OCF_RECFNT_STR "OCF_RESKEY_RGMANAGER_meta_refcnt"
/*
LSB return codes
--- cluster/rgmanager/include/resgroup.h 2007/06/29 19:22:11 1.15.2.8
+++ cluster/rgmanager/include/resgroup.h 2007/12/18 17:52:56 1.15.2.9
@@ -50,9 +50,16 @@
#define RG_PORT 177
+
+/* Constants moved to src/clulib/constants.c */
+/* DO NOT EDIT */
#define RG_MAGIC 0x11398fed
#define RG_ACTION_REQUEST /* Message header */ 0x138582
+/* Argument to RG_ACTION_REQUEST */
+#define RG_ACTION_MASTER 0xfe0db143
+#define RG_ACTION_USER 0x3f173bfd
+/* */
#define RG_EVENT 0x138583
/* Requests */
@@ -109,6 +116,7 @@
#define DEFAULT_CHECK_INTERVAL 10
const char *rg_state_str(int val);
+int rg_state_str_to_id(const char *val);
const char *agent_op_str(int val);
int eval_groups(int local, uint32_t nodeid, int nodeStatus);
@@ -118,19 +126,22 @@
int group_op(char *rgname, int op);
void rg_init(void);
-/* FOOM */
+/* Basic service operations */
int svc_start(char *svcName, int req);
int svc_stop(char *svcName, int error);
int svc_status(char *svcName);
+int svc_status_inquiry(char *svcName);
int svc_disable(char *svcName);
int svc_fail(char *svcName);
int svc_migrate(char *svcName, int target);
+int check_restart(char *svcName);
+
int rt_enqueue_request(const char *resgroupname, int request,
msgctx_t *resp_ctx,
int max, uint32_t target, int arg0, int arg1);
void send_response(int ret, int node, request_t *req);
-void send_ret(msgctx_t *ctx, char *name, int ret, int req);
+void send_ret(msgctx_t *ctx, char *name, int ret, int req, int newowner);
/* do this op on all resource groups. The handler for the request
will sort out whether or not it's a valid request given the state */
@@ -141,6 +152,7 @@
/* from rg_state.c */
int set_rg_state(char *name, rg_state_t *svcblk);
int get_rg_state(char *servicename, rg_state_t *svcblk);
+int get_rg_state_local(char *servicename, rg_state_t *svcblk);
uint32_t best_target_node(cluster_member_list_t *allowed, uint32_t owner,
char *rg_name, int lock);
@@ -165,6 +177,14 @@
int my_id(void);
/* Return codes */
+#define RG_EEXCL -16 /* Service not runnable due to
+ the fact that it is tagged
+ exclusive and there are no
+ empty nodes. */
+#define RG_EDOMAIN -15 /* Service not runnable given the
+ set of nodes and its failover
+ domain */
+#define RG_ESCRIPT -14 /* S/Lang script failed */
#define RG_EFENCE -13 /* Fencing operation pending */
#define RG_ENODE -12 /* Node is dead/nonexistent */
//#define RG_EFROZEN -11 /* Forward compat. with -HEAD */
@@ -182,6 +202,7 @@
#define RG_YES 1
#define RG_NO 2
+
const char *rg_strerror(int val);
--- cluster/rgmanager/include/reslist.h 2007/11/26 21:46:26 1.15.2.7
+++ cluster/rgmanager/include/reslist.h 2007/12/18 17:52:56 1.15.2.8
@@ -139,7 +139,7 @@
list_head();
char *fdn_name;
int fdn_prio;
- int _pad_; /* align */
+ int fdn_nodeid; /* on rhel4 this will be 64-bit int */
} fod_node_t;
typedef struct _fod {
@@ -202,6 +202,8 @@
void print_domains(fod_t **domains);
int node_should_start(int nodeid, cluster_member_list_t *membership,
char *rg_name, fod_t **domains);
+int node_domain_set(fod_t *domain, int **ret, int *retlen);
+int node_domain_set_safe(char *domainname, int **ret, int *retlen, int *flags);
/*
@@ -210,6 +212,7 @@
resource_t *find_resource_by_ref(resource_t **reslist, char *type, char *ref);
resource_t *find_root_by_ref(resource_t **reslist, char *ref);
resource_rule_t *find_rule_by_type(resource_rule_t **rulelist, char *type);
+void res_build_name(char *, size_t, resource_t *);
/*
Internal functions; shouldn't be needed.
--- cluster/rgmanager/include/restart_counter.h 2007/11/26 21:46:26 1.1.2.1
+++ cluster/rgmanager/include/restart_counter.h 2007/12/18 17:52:56 1.1.2.2
@@ -1,3 +1,22 @@
+/*
+ Copyright Red Hat, Inc. 2007
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License version 2 as published
+ by the Free Software Foundation.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; see the file COPYING. If not, write to the
+ Free Software Foundation, Inc., 675 Mass Ave, Cambridge,
+ MA 02139, USA.
+*/
+/* Time-based restart counters for rgmanager */
+
#ifndef _RESTART_COUNTER_H
#define _RESTART_COUNTER_H
--- cluster/rgmanager/include/rg_locks.h 2006/12/18 21:48:48 1.2.2.1
+++ cluster/rgmanager/include/rg_locks.h 2007/12/18 17:52:56 1.2.2.2
@@ -1,3 +1,20 @@
+/*
+ Copyright Red Hat, Inc. 2004-2007
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License version 2 as published
+ by the Free Software Foundation.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; see the file COPYING. If not, write to the
+ Free Software Foundation, Inc., 675 Mass Ave, Cambridge,
+ MA 02139, USA.
+*/
#ifndef __RG_LOCKS_H
#define __RG_LOCKS_H
--- cluster/rgmanager/include/rg_queue.h 2006/07/19 18:43:32 1.6
+++ cluster/rgmanager/include/rg_queue.h 2007/12/18 17:52:56 1.6.2.1
@@ -1,3 +1,20 @@
+/*
+ Copyright Red Hat, Inc. 2004-2007
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License version 2 as published
+ by the Free Software Foundation.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; see the file COPYING. If not, write to the
+ Free Software Foundation, Inc., 675 Mass Ave, Cambridge,
+ MA 02139, USA.
+*/
#ifndef _RG_QUEUE_H
#define _RG_QUEUE_H
#include <list.h>
@@ -19,7 +36,7 @@
uint32_t rr_target; /** Target node */
uint32_t rr_arg0; /** Integer argument */
uint32_t rr_arg1; /** Integer argument */
- uint32_t rr_arg3; /** Integer argument */
+ uint32_t rr_arg2; /** Integer argument */
uint32_t rr_line; /** Line no */
msgctx_t * rr_resp_ctx; /** FD to send response */
char *rr_file; /** Who made req */
@@ -42,5 +59,7 @@
void rq_free(request_t *foo);
void forward_request(request_t *req);
+void forward_message(msgctx_t *ctx, void *msg, int nodeid);
+
#endif
/cvs/cluster/cluster/rgmanager/src/clulib/sets.c,v --> standard output
revision 1.1.2.1
--- cluster/rgmanager/src/clulib/sets.c
+++ - 2007-12-18 17:52:58.080513000 +0000
@@ -0,0 +1,370 @@
+/*
+ Copyright Red Hat, Inc. 2007
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License version 2 as published
+ by the Free Software Foundation.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; see the file COPYING. If not, write to the
+ Free Software Foundation, Inc., 675 Mass Ave, Cambridge,
+ MA 02139, USA.
+*/
+/**
+ @file sets.c - Order-preserving set functions (union / intersection / delta)
+ (designed for integer types; a la int, uint64_t, etc...)
+ @author Lon Hohberger <lhh at redhat.com>
+ */
+#include <stdio.h>
+#include <malloc.h>
+#include <string.h>
+#include <stdlib.h>
+#include <sets.h>
+#include <sys/time.h>
+
+
+/**
+ Add a value to a set. This function disregards an add if the value is already
+ in the set. Note that the maximum length of set s must be preallocated; this
+ function doesn't do error or bounds checking.
+
+ @param s Set to modify
+ @param curlen Current length (modified if added)
+ @param val Value to add
+ @return 0 if not added, 1 if added
+ */
+int
+s_add(set_type_t *s, int *curlen, set_type_t val)
+{
+ int idx=0;
+
+ for (; idx < *curlen; idx++)
+ if (s[idx] == val)
+ return 0;
+ s[*curlen] = val;
+ ++(*curlen);
+ return 1;
+}
+
+
+/**
+ Union-set function. Allocates and returns a new set which is the union of
+ the two given sets 'left' and 'right'. Also returns the new set length.
+
+ @param left Left set - order is preserved on this set; that is,
+ this is the set where the caller cares about ordering.
+ @param ll Length of left set.
+ @param right Right set - order is not preserved on this set during
+ the union operation
+ @param rl Length of right set
+ @param ret Return set. Should * not * be preallocated.
+ @param retl Return set length. Should be ready to accept 1 integer
+ upon calling this function
+ @return 0 on success, -1 on error
+ */
+int
+s_union(set_type_t *left, int ll, set_type_t *right, int rl,
+ set_type_t **ret, int *retl)
+{
+ int l, r, cnt = 0, total;
+
+ total = ll + rl; /* Union will never exceed both sets */
+
+ *ret = malloc(sizeof(set_type_t)*total);
+ if (!*ret) {
+ return -1;
+ }
+ memset((void *)(*ret), 0, sizeof(set_type_t)*total);
+
+ cnt = 0;
+
+ /* Add all the ones on the left */
+ for (l = 0; l < ll; l++)
+ s_add(*ret, &cnt, left[l]);
+
+ /* Add the ones on the left */
+ for (r = 0; r < rl; r++)
+ s_add(*ret, &cnt, right[r]);
+
+ *retl = cnt;
+
+ return 0;
+}
+
+
+/**
+ Intersection-set function. Allocates and returns a new set which is the
+ intersection of the two given sets 'left' and 'right'. Also returns the new
+ set length.
+
+ @param left Left set - order is preserved on this set; that is,
+ this is the set where the caller cares about ordering.
+ @param ll Length of left set.
+ @param right Right set - order is not preserved on this set during
+ the union operation
+ @param rl Length of right set
+ @param ret Return set. Should * not * be preallocated.
+ @param retl Return set length. Should be ready to accept 1 integer
+ upon calling this function
+ @return 0 on success, -1 on error
+ */
+int
+s_intersection(set_type_t *left, int ll, set_type_t *right, int rl,
+ set_type_t **ret, int *retl)
+{
+ int l, r, cnt = 0, total;
+
+ total = ll; /* Intersection will never exceed one of the two set
+ sizes */
+
+ *ret = malloc(sizeof(set_type_t)*total);
+ if (!*ret) {
+ return -1;
+ }
+ memset((void *)(*ret), 0, sizeof(set_type_t)*total);
+
+ cnt = 0;
+ /* Find duplicates */
+ for (l = 0; l < ll; l++) {
+ for (r = 0; r < rl; r++) {
+ if (left[l] != right[r])
+ continue;
+ if (s_add(*ret, &cnt, right[r]))
+ break;
+ }
+ }
+
+ *retl = cnt;
+ return 0;
+}
+
+
+/**
+ Delta-set function. Allocates and returns a new set which is the delta (i.e.
+ numbers not in both sets) of the two given sets 'left' and 'right'. Also
+ returns the new set length.
+
+ @param left Left set - order is preserved on this set; that is,
+ this is the set where the caller cares about ordering.
+ @param ll Length of left set.
+ @param right Right set - order is not preserved on this set during
+ the union operation
+ @param rl Length of right set
+ @param ret Return set. Should * not * be preallocated.
+ @param retl Return set length. Should be ready to accept 1 integer
+ upon calling this function
+ @return 0 on success, -1 on error
+ */
+int
+s_delta(set_type_t *left, int ll, set_type_t *right, int rl,
+ set_type_t **ret, int *retl)
+{
+ int l, r, cnt = 0, total, found;
+
+ total = ll + rl; /* Union will never exceed both sets */
+
+ *ret = malloc(sizeof(set_type_t)*total);
+ if (!*ret) {
+ return -1;
+ }
+ memset((void *)(*ret), 0, sizeof(set_type_t)*total);
+
+ cnt = 0;
+
+ /* not efficient, but it works */
+ /* Add all the ones on the left */
+ for (l = 0; l < ll; l++) {
+ found = 0;
+ for (r = 0; r < rl; r++) {
+ if (right[r] == left[l]) {
+ found = 1;
+ break;
+ }
+ }
+
+ if (found)
+ continue;
+ s_add(*ret, &cnt, left[l]);
+ }
+
+
+ /* Add all the ones on the right*/
+ for (r = 0; r < rl; r++) {
+ found = 0;
+ for (l = 0; l < ll; l++) {
+ if (right[r] == left[l]) {
+ found = 1;
+ break;
+ }
+ }
+
+ if (found)
+ continue;
+ s_add(*ret, &cnt, right[r]);
+ }
+
+ *retl = cnt;
+
+ return 0;
+}
+
+
+/**
+ Subtract-set function. Allocates and returns a new set which is the
+ subtraction of the right set from the left set.
+ Also returns the new set length.
+
+ @param left Left set - order is preserved on this set; that is,
+ this is the set where the caller cares about ordering.
+ @param ll Length of left set.
+ @param right Right set - order is not preserved on this set during
+ the union operation
+ @param rl Length of right set
+ @param ret Return set. Should * not * be preallocated.
+ @param retl Return set length. Should be ready to accept 1 integer
+ upon calling this function
+ @return 0 on success, -1 on error
+ */
+int
+s_subtract(set_type_t *left, int ll, set_type_t *right, int rl,
+ set_type_t **ret, int *retl)
+{
+ int l, r, cnt = 0, total, found;
+
+ total = ll; /* Union will never exceed left set length*/
+
+ *ret = malloc(sizeof(set_type_t)*total);
+ if (!*ret) {
+ return -1;
+ }
+ memset((void *)(*ret), 0, sizeof(set_type_t)*total);
+
+ cnt = 0;
+
+ /* not efficient, but it works */
+ for (l = 0; l < ll; l++) {
+ found = 0;
+ for (r = 0; r < rl; r++) {
+ if (right[r] == left[l]) {
+ found = 1;
+ break;
+ }
+ }
+
+ if (found)
+ continue;
+ s_add(*ret, &cnt, left[l]);
+ }
+
+ *retl = cnt;
+
+ return 0;
+}
+
+
+/**
+ Shuffle-set function. Weakly randomizes ordering of a set in-place.
+
+ @param set Set to randomize
+ @param sl Length of set
+ @return 0
+ */
+int
+s_shuffle(set_type_t *set, int sl)
+{
+ int x, newidx;
+ unsigned r_state = 0;
+ set_type_t t;
+ struct timeval tv;
+
+ gettimeofday(&tv, NULL);
+ r_state = (int)(tv.tv_usec);
+
+ for (x = 0; x < sl; x++) {
+ newidx = (rand_r(&r_state) % sl);
+ if (newidx == x)
+ continue;
+ t = set[x];
+ set[x] = set[newidx];
+ set[newidx] = t;
+ }
+
+ return 0;
+}
+
+
+#ifdef STANDALONE
+/* Testbed */
+/*
+ gcc -o sets sets.c -DSTANDALONE -ggdb -I../../include \
+ -Wall -Werror -Wstrict-prototypes -Wextra
+ */
+int
+main(int __attribute__ ((unused)) argc, char __attribute__ ((unused)) **argv)
+{
+ set_type_t a[] = { 1, 2, 3, 3, 3, 2, 2, 3 };
+ set_type_t b[] = { 2, 3, 4 };
+ set_type_t *i;
+ int ilen = 0, x;
+
+ s_union(a, 8, b, 3, &i, &ilen);
+
+ /* Should return length of 4 - { 1 2 3 4 } */
+ printf("set_union [%d] = ", ilen);
+ for ( x = 0; x < ilen; x++) {
+ printf("%d ", (int)i[x]);
+ }
+ printf("\n");
+
+ s_shuffle(i, ilen);
+ printf("shuffled [%d] = ", ilen);
+ for ( x = 0; x < ilen; x++) {
+ printf("%d ", (int)i[x]);
+ }
+ printf("\n");
+
+
+ free(i);
+
+ /* Should return length of 2 - { 2 3 } */
+ s_intersection(a, 8, b, 3, &i, &ilen);
+
+ printf("set_intersection [%d] = ", ilen);
+ for ( x = 0; x < ilen; x++) {
+ printf("%d ", (int)i[x]);
+ }
+ printf("\n");
+
+ free(i);
+
+ /* Should return length of 2 - { 1 4 } */
+ s_delta(a, 8, b, 3, &i, &ilen);
+
+ printf("set_delta [%d] = ", ilen);
+ for ( x = 0; x < ilen; x++) {
+ printf("%d ", (int)i[x]);
+ }
+ printf("\n");
+
+ free(i);
+
+ /* Should return length of 1 - { 1 } */
+ s_subtract(a, 8, b, 3, &i, &ilen);
+
+ printf("set_subtract [%d] = ", ilen);
+ for ( x = 0; x < ilen; x++) {
+ printf("%d ", (int)i[x]);
+ }
+ printf("\n");
+
+ free(i);
+
+
+ return 0;
+}
+#endif
--- cluster/rgmanager/src/clulib/Makefile 2007/07/24 13:53:08 1.10.2.3
+++ cluster/rgmanager/src/clulib/Makefile 2007/12/18 17:52:56 1.10.2.4
@@ -34,7 +34,7 @@
libclulib.a: clulog.o daemon_init.o signals.o msgsimple.o \
gettid.o rg_strings.o message.o members.o fdops.o \
lock.o cman.o vft.o msg_cluster.o msg_socket.o \
- wrap_lock.o tmgr.o
+ wrap_lock.o tmgr.o sets.o constants.o
${AR} cru $@ $^
ranlib $@
--- cluster/rgmanager/src/clulib/members.c 2006/09/27 16:28:41 1.4
+++ cluster/rgmanager/src/clulib/members.c 2007/12/18 17:52:56 1.4.2.1
@@ -233,6 +233,50 @@
int
+member_low_id(void)
+{
+ int x = 0, low = -1;
+
+ pthread_rwlock_wrlock(&memblock);
+ if (!membership) {
+ pthread_rwlock_unlock(&memblock);
+ return low;
+ }
+
+ for (x = 0; x < membership->cml_count; x++) {
+ if ((membership->cml_members[x].cn_member) &&
+ ((membership->cml_members[x].cn_nodeid < low) || (low == -1)))
+ low = membership->cml_members[x].cn_nodeid;
+ }
+ pthread_rwlock_unlock(&memblock);
+
+ return low;
+}
+
+
+int
+member_high_id(void)
+{
+ int x = 0, high = -1;
+
+ pthread_rwlock_wrlock(&memblock);
+ if (!membership) {
+ pthread_rwlock_unlock(&memblock);
+ return high;
+ }
+
+ for (x = 0; x < membership->cml_count; x++) {
+ if (membership->cml_members[x].cn_member &&
+ (membership->cml_members[x].cn_nodeid > high))
+ high = membership->cml_members[x].cn_nodeid;
+ }
+ pthread_rwlock_unlock(&memblock);
+
+ return high;
+}
+
+
+int
member_online(int nodeid)
{
int x = 0, ret = 0;
--- cluster/rgmanager/src/clulib/rg_strings.c 2007/07/31 17:54:54 1.5.2.5
+++ cluster/rgmanager/src/clulib/rg_strings.c 2007/12/18 17:52:56 1.5.2.6
@@ -26,6 +26,9 @@
const struct string_val rg_error_strings[] = {
+ { RG_EEXCL, "Service not runnable: cannot run exclusive" },
+ { RG_EDOMAIN, "Service not runnable: restricted failover domain offline" },
+ { RG_ESCRIPT, "S/Lang Script Error" },
{ RG_EFENCE, "Fencing operation pending; try again later" },
{ RG_ENODE, "Target node dead / nonexistent" },
{ RG_ERUN, "Service is already running" },
@@ -126,6 +129,21 @@
}
+static inline int
+rg_search_table_by_str(const struct string_val *table, const char *val)
+{
+ int x;
+
+ for (x = 0; table[x].str != NULL; x++) {
+ if (!strcasecmp(table[x].str, val))
+ return table[x].val;
+ }
+
+ return -1;
+}
+
+
+
const char *
rg_strerror(int val)
{
@@ -139,6 +157,14 @@
}
+int
+rg_state_str_to_id(const char *val)
+{
+ return rg_search_table_by_str(rg_state_strings, val);
+}
+
+
+
const char *
rg_req_str(int val)
{
--- cluster/rgmanager/src/clulib/vft.c 2007/11/14 16:56:50 1.17.2.4
+++ cluster/rgmanager/src/clulib/vft.c 2007/12/18 17:52:56 1.17.2.5
@@ -1734,55 +1734,52 @@
}
msg_close(&ctx);
msg = (vf_msg_t *)gh;
- break;
- }
-
- if (x >= membership->cml_count)
- return VFR_ERROR;
-
- /* Uh oh */
- if (!msg || (msg == &rmsg)) {
- printf("VF: No valid message\n");
- return VFR_ERROR;
- }
-
- swab_generic_msg_hdr(&(msg->vm_hdr));
- if (msg->vm_hdr.gh_command == VF_NACK) {
- free(msg);
- return VFR_NODATA;
- }
- if (msg->vm_hdr.gh_length < sizeof(vf_msg_t)) {
- fprintf(stderr, "VF: Short reply from %d\n", x);
- free(msg);
- return VFR_ERROR;
- }
-
- if (msg->vm_hdr.gh_length > n) {
- fprintf(stderr,"VF: Size mismatch during decode (%d > %d)\n",
- msg->vm_hdr.gh_length, n);
- free(msg);
- return VFR_ERROR;
- }
+ /* Uh oh */
+ if (!msg || (msg == &rmsg)) {
+ printf("VF: No valid message\n");
+ return VFR_ERROR;
+ }
+ swab_generic_msg_hdr(&(msg->vm_hdr));
+ if (msg->vm_hdr.gh_command == VF_NACK) {
+ free(msg);
+ continue;
+ }
+ if (msg->vm_hdr.gh_length < sizeof(vf_msg_t)) {
+ fprintf(stderr, "VF: Short reply from %d\n", x);
+ free(msg);
+ continue;
+ }
+ if (msg->vm_hdr.gh_length > n) {
+ fprintf(stderr,
+ "VF: Size mismatch during decode (%d > %d)\n",
+ msg->vm_hdr.gh_length, n);
+ free(msg);
+ continue;
+ }
- swab_vf_msg_info_t(&(msg->vm_msg));
+ swab_vf_msg_info_t(&(msg->vm_msg));
- if (msg->vm_msg.vf_datalen != (n - sizeof(*msg))) {
- fprintf(stderr,"VF: Size mismatch during decode (\n");
- free(msg);
- return VFR_ERROR;
- }
+ if (msg->vm_msg.vf_datalen != (n - sizeof(*msg))) {
+ fprintf(stderr,"VF: Size mismatch during decode (\n");
+ free(msg);
+ continue;
+ }
- if (vf_set_current(keyid, msg->vm_msg.vf_view,
+ /* Ok... we've got data! */
+ if (vf_set_current(keyid, msg->vm_msg.vf_view,
msg->vm_msg.vf_data,
msg->vm_msg.vf_datalen) == VFR_ERROR) {
+ free(msg);
+ return VFR_ERROR;
+ }
+
free(msg);
- return VFR_ERROR;
- }
- free(msg);
+ return VFR_OK;
+ }
- return VFR_OK;
+ return VFR_NODATA;
}
/cvs/cluster/cluster/rgmanager/src/daemons/event_config.c,v --> standard output
revision 1.1.2.1
--- cluster/rgmanager/src/daemons/event_config.c
+++ - 2007-12-18 17:52:58.662036000 +0000
@@ -0,0 +1,541 @@
+/**
+ Copyright Red Hat, Inc. 2002-2007
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License version 2 as published
+ by the Free Software Foundation.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; see the file COPYING. If not, write to the
+ Free Software Foundation, Inc., 675 Mass Ave, Cambridge,
+ MA 02139, USA.
+*/
+/** @file
+ * CCS event parsing, based on failover domain parsing
+ */
+#include <string.h>
+#include <list.h>
+#include <clulog.h>
+#include <resgroup.h>
+#include <restart_counter.h>
+#include <reslist.h>
+#include <ccs.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <members.h>
+#include <reslist.h>
+#include <ctype.h>
+#include <event.h>
+
+#define CONFIG_NODE_ID_TO_NAME \
+ "/cluster/clusternodes/clusternode[@nodeid=\"%d\"]/@name"
+#define CONFIG_NODE_NAME_TO_ID \
+ "/cluster/clusternodes/clusternode[@name=\"%s\"]/@nodeid"
+
+void deconstruct_events(event_table_t **);
+void print_event(event_t *ev);
+
+//#define DEBUG
+
+#ifdef DEBUG
+#define ENTER() clulog(LOG_DEBUG, "ENTER: %s\n", __FUNCTION__)
+#define RETURN(val) {\
+ clulog(LOG_DEBUG, "RETURN: %s line=%d value=%d\n", __FUNCTION__, \
+ __LINE__, (val));\
+ return(val);\
+}
+#else
+#define ENTER()
+#define RETURN(val) return(val)
+#endif
+
+#ifdef NO_CCS
+#define ccs_get(fd, query, ret) conf_get(query, ret)
+#endif
+
+/*
+ <events>
+ <event name="helpful_name_here" class="node"
+ node="nodeid|nodename" nodestate="up|down">
+ slang_script_stuff();
+ start_service();
+ </event>
+ </events>
+ */
+int
+event_match(event_t *pattern, event_t *actual)
+{
+ if (pattern->ev_type != EVENT_NONE &&
+ actual->ev_type != pattern->ev_type)
+ return 0;
+
+ /* If there's no event class specified, the rest is
+ irrelevant */
+ if (pattern->ev_type == EVENT_NONE)
+ return 1;
+
+ switch(pattern->ev_type) {
+ case EVENT_NODE:
+ if (pattern->ev.node.ne_nodeid >= 0 &&
+ actual->ev.node.ne_nodeid !=
+ pattern->ev.node.ne_nodeid) {
+ return 0;
+ }
+ if (pattern->ev.node.ne_local >= 0 &&
+ actual->ev.node.ne_local !=
+ pattern->ev.node.ne_local) {
+ return 0;
+ }
+ if (pattern->ev.node.ne_state >= 0 &&
+ actual->ev.node.ne_state !=
+ pattern->ev.node.ne_state) {
+ return 0;
+ }
+ if (pattern->ev.node.ne_clean >= 0 &&
+ actual->ev.node.ne_clean !=
+ pattern->ev.node.ne_clean) {
+ return 0;
+ }
+ return 1; /* All specified params match */
+ case EVENT_RG:
+ if (pattern->ev.group.rg_name[0] &&
+ strcasecmp(actual->ev.group.rg_name,
+ pattern->ev.group.rg_name)) {
+ return 0;
+ }
+ if (pattern->ev.group.rg_state != (uint32_t)-1 &&
+ actual->ev.group.rg_state !=
+ pattern->ev.group.rg_state) {
+ return 0;
+ }
+ if (pattern->ev.group.rg_owner >= 0 &&
+ actual->ev.group.rg_owner !=
+ pattern->ev.group.rg_owner) {
+ return 0;
+ }
+ return 1;
+ case EVENT_CONFIG:
+ if (pattern->ev.config.cfg_version >= 0 &&
+ actual->ev.config.cfg_version !=
+ pattern->ev.config.cfg_version) {
+ return 0;
+ }
+ if (pattern->ev.config.cfg_oldversion >= 0 &&
+ actual->ev.config.cfg_oldversion !=
+ pattern->ev.config.cfg_oldversion) {
+ return 0;
+ }
+ return 1;
+ case EVENT_USER:
+ if (pattern->ev.user.u_name[0] &&
+ strcasecmp(actual->ev.user.u_name,
+ pattern->ev.user.u_name)) {
+ return 0;
+ }
+ if (pattern->ev.user.u_request != 0 &&
+ actual->ev.user.u_request !=
+ pattern->ev.user.u_request) {
+ return 0;
+ }
+ if (pattern->ev.user.u_target != 0 &&
+ actual->ev.user.u_target !=
+ pattern->ev.user.u_target) {
+ return 0;
+ }
+ return 1;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+
+char *
+ccs_node_id_to_name(int ccsfd, int nodeid)
+{
+ char xpath[256], *ret = 0;
+
+ snprintf(xpath, sizeof(xpath), CONFIG_NODE_ID_TO_NAME,
+ nodeid);
+ if (ccs_get(ccsfd, xpath, &ret) == 0)
+ return ret;
+ return NULL;
+}
+
+
+int
+ccs_node_name_to_id(int ccsfd, char *name)
+{
+ char xpath[256], *ret = 0;
+ int rv = 0;
+
+ snprintf(xpath, sizeof(xpath), CONFIG_NODE_NAME_TO_ID,
+ name);
+ if (ccs_get(ccsfd, xpath, &ret) == 0) {
+ rv = atoi(ret);
+ free(ret);
+ return rv;
+ }
+ return 0;
+}
+
+
+static void
+deconstruct_event(event_t *ev)
+{
+ if (ev->ev_script)
+ free(ev->ev_script);
+ if (ev->ev_name)
+ free(ev->ev_name);
+ free(ev);
+}
+
+
+static int
+get_node_event(int ccsfd, char *base, event_t *ev)
+{
+ char xpath[256], *ret = NULL;
+
+ /* Clear out the possibilitiies */
+ ev->ev.node.ne_nodeid = -1;
+ ev->ev.node.ne_local = -1;
+ ev->ev.node.ne_state = -1;
+ ev->ev.node.ne_clean = -1;
+
+ snprintf(xpath, sizeof(xpath), "%s/@node_id", base);
+ if (ccs_get(ccsfd, xpath, &ret) == 0) {
+ ev->ev.node.ne_nodeid = atoi(ret);
+ free(ret);
+ if (ev->ev.node.ne_nodeid <= 0)
+ return -1;
+ } else {
+ /* See if there's a node name */
+ snprintf(xpath, sizeof(xpath), "%s/@node", base);
+ if (ccs_get(ccsfd, xpath, &ret) == 0) {
+ ev->ev.node.ne_nodeid =
+ ccs_node_name_to_id(ccsfd, ret);
+ free(ret);
+ if (ev->ev.node.ne_nodeid <= 0)
+ return -1;
+ }
+ }
+
+ snprintf(xpath, sizeof(xpath), "%s/@node_state", base);
+ if (ccs_get(ccsfd, xpath, &ret) == 0) {
+ if (!strcasecmp(ret, "up")) {
+ ev->ev.node.ne_state = 1;
+ } else if (!strcasecmp(ret, "down")) {
+ ev->ev.node.ne_state = 0;
+ } else {
+ ev->ev.node.ne_state = !!atoi(ret);
+ }
+ free(ret);
+ }
+
+ snprintf(xpath, sizeof(xpath), "%s/@node_clean", base);
+ if (ccs_get(ccsfd, xpath, &ret) == 0) {
+ ev->ev.node.ne_clean = !!atoi(ret);
+ free(ret);
+ }
+
+ snprintf(xpath, sizeof(xpath), "%s/@node_local", base);
+ if (ccs_get(ccsfd, xpath, &ret) == 0) {
+ ev->ev.node.ne_local = !!atoi(ret);
+ free(ret);
+ }
+
+ return 0;
+}
+
+
+static int
+get_rg_event(int ccsfd, char *base, event_t *ev)
+{
+ char xpath[256], *ret = NULL;
+
+ /* Clear out the possibilitiies */
+ ev->ev.group.rg_name[0] = 0;
+ ev->ev.group.rg_state = (uint32_t)-1;
+ ev->ev.group.rg_owner = -1;
+
+ snprintf(xpath, sizeof(xpath), "%s/@service", base);
+ if (ccs_get(ccsfd, xpath, &ret) == 0) {
+ strncpy(ev->ev.group.rg_name, ret,
+ sizeof(ev->ev.group.rg_name));
+ free(ret);
+ if (!strlen(ev->ev.group.rg_name)) {
+ return -1;
+ }
+ }
+
+ snprintf(xpath, sizeof(xpath), "%s/@service_state", base);
+ if (ccs_get(ccsfd, xpath, &ret) == 0) {
+ if (!isdigit(ret[0])) {
+ ev->ev.group.rg_state =
+ rg_state_str_to_id(ret);
+ } else {
+ ev->ev.group.rg_state = atoi(ret);
+ }
+ free(ret);
+ }
+
+ snprintf(xpath, sizeof(xpath), "%s/@service_owner", base);
+ if (ccs_get(ccsfd, xpath, &ret) == 0) {
+ if (!isdigit(ret[0])) {
+ ev->ev.group.rg_owner =
+ ccs_node_name_to_id(ccsfd, ret);
+ } else {
+ ev->ev.group.rg_owner = !!atoi(ret);
+ }
+ free(ret);
+ }
+
+ return 0;
+}
+
+
+static int
+get_config_event(int __attribute__((unused)) ccsfd,
+ char __attribute__((unused)) *base,
+ event_t __attribute__((unused)) *ev)
+{
+ errno = ENOSYS;
+ return -1;
+}
+
+
+static event_t *
+get_event(int ccsfd, char *base, int idx, int *_done)
+{
+ event_t *ev;
+ char xpath[256];
+ char *ret = NULL;
+
+ *_done = 0;
+ snprintf(xpath, sizeof(xpath), "%s/event[%d]/@name",
+ base, idx);
+ if (ccs_get(ccsfd, xpath, &ret) != 0) {
+ *_done = 1;
+ return NULL;
+ }
+
+ ev = malloc(sizeof(*ev));
+ if (!ev)
+ return NULL;
+ memset(ev, 0, sizeof(*ev));
+ ev->ev_name = ret;
+
+ /* Get the script file / inline from config */
+ ret = NULL;
+ snprintf(xpath, sizeof(xpath), "%s/event[%d]/@file",
+ base, idx);
+ if (ccs_get(ccsfd, xpath, &ret) == 0) {
+ ev->ev_script_file = ret;
+ } else {
+ snprintf(xpath, sizeof(xpath), "%s/event[%d]",
+ base, idx);
+ if (ccs_get(ccsfd, xpath, &ret) == 0) {
+ ev->ev_script = ret;
+ } else {
+ goto out_fail;
+ }
+ }
+
+ /* Get the priority ordering (must be nonzero) */
+ ev->ev_prio = 99;
+ ret = NULL;
+ snprintf(xpath, sizeof(xpath), "%s/event[%d]/@priority",
+ base, idx);
+ if (ccs_get(ccsfd, xpath, &ret) == 0) {
+ ev->ev_prio = atoi(ret);
+ if (ev->ev_prio <= 0 || ev->ev_prio > EVENT_PRIO_COUNT) {
+ clulog(LOG_ERR,
+ "event %s: priority %s invalid\n",
+ ev->ev_name, ret);
+ goto out_fail;
+ }
+ free(ret);
+ }
+
+ /* Get the event class */
+ snprintf(xpath, sizeof(xpath), "%s/event[%d]/@class",
+ base, idx);
+ ret = NULL;
+ if (ccs_get(ccsfd, xpath, &ret) == 0) {
+ snprintf(xpath, sizeof(xpath), "%s/event[%d]",
+ base, idx);
+ if (!strcasecmp(ret, "node")) {
+ ev->ev_type = EVENT_NODE;
+ if (get_node_event(ccsfd, xpath, ev) < 0)
+ goto out_fail;
+ } else if (!strcasecmp(ret, "service") ||
+ !strcasecmp(ret, "resource") ||
+ !strcasecmp(ret, "rg") ) {
+ ev->ev_type = EVENT_RG;
+ if (get_rg_event(ccsfd, xpath, ev) < 0)
+ goto out_fail;
+ } else if (!strcasecmp(ret, "config") ||
+ !strcasecmp(ret, "reconfig")) {
+ ev->ev_type = EVENT_CONFIG;
+ if (get_config_event(ccsfd, xpath, ev) < 0)
+ goto out_fail;
+ } else {
+ clulog(LOG_ERR,
+ "event %s: class %s unrecognized\n",
+ ev->ev_name, ret);
+ goto out_fail;
+ }
+
+ free(ret);
+ ret = NULL;
+ }
+
+ return ev;
+out_fail:
+ if (ret)
+ free(ret);
+ deconstruct_event(ev);
+ return NULL;
+}
+
+
+static event_t *
+get_default_event(void)
+{
+ event_t *ev;
+ char xpath[1024];
+
+ ev = malloc(sizeof(*ev));
+ if (!ev)
+ return NULL;
+ memset(ev, 0, sizeof(*ev));
+ ev->ev_name = strdup("Default");
+
+ /* Get the script file / inline from config */
+ snprintf(xpath, sizeof(xpath), "%s/default_event_script.sl",
+ RESOURCE_ROOTDIR);
+
+ ev->ev_prio = 100;
+ ev->ev_type = EVENT_NONE;
+ ev->ev_script_file = strdup(xpath);
+ if (!ev->ev_script_file || ! ev->ev_name) {
+ deconstruct_event(ev);
+ return NULL;
+ }
+
+ return ev;
+}
+
+
+/**
+ * similar API to failover domain
+ */
+int
+construct_events(int ccsfd, event_table_t **events)
+{
+ char xpath[256];
+ event_t *ev;
+ int x = 1, done = 0;
+
+ /* Allocate the event list table */
+ *events = malloc(sizeof(event_table_t) +
+ sizeof(event_t) * (EVENT_PRIO_COUNT+1));
+ if (!*events)
+ return -1;
+ memset(*events, 0, sizeof(event_table_t) +
+ sizeof(event_t) * (EVENT_PRIO_COUNT+1));
+ (*events)->max_prio = EVENT_PRIO_COUNT;
+
+ snprintf(xpath, sizeof(xpath),
+ RESOURCE_TREE_ROOT "/events");
+
+ do {
+ ev = get_event(ccsfd, xpath, x++, &done);
+ if (ev)
+ list_insert(&((*events)->entries[ev->ev_prio]), ev);
+ } while (!done);
+
+ ev = get_default_event();
+ if (ev)
+ list_insert(&((*events)->entries[ev->ev_prio]), ev);
+
+ return 0;
+}
+
+
+void
+print_event(event_t *ev)
+{
+ printf(" Name: %s\n", ev->ev_name);
+
+ switch(ev->ev_type) {
+ case EVENT_NODE:
+ printf(" Node %d State %d\n", ev->ev.node.ne_nodeid,
+ ev->ev.node.ne_state);
+ break;
+ case EVENT_RG:
+ printf(" RG %s State %s\n", ev->ev.group.rg_name,
+ rg_state_str(ev->ev.group.rg_state));
+ break;
+ case EVENT_CONFIG:
+ printf(" Config change - unsupported\n");
+ break;
+ default:
+ printf(" (Any event)\n");
+ break;
+ }
+
+ if (ev->ev_script) {
+ printf(" Inline script.\n");
+ } else {
+ printf(" File: %s\n", ev->ev_script_file);
+ }
+}
+
+
+void
+print_events(event_table_t *events)
+{
+ int x, y;
+ event_t *ev;
+
+ for (x = 0; x <= events->max_prio; x++) {
+ if (!events->entries[x])
+ continue;
+ printf("Event Priority Level %d:\n", x);
+ list_for(&(events->entries[x]), ev, y) {
+ print_event(ev);
+ }
+ }
+}
+
+
+void
+deconstruct_events(event_table_t **eventsp)
+{
+ int x;
+ event_table_t *events = *eventsp;
+ event_t *ev = NULL;
+
+ if (!events)
+ return;
+
+ for (x = 0; x <= events->max_prio; x++) {
+ while ((ev = (events->entries[x]))) {
+ list_remove(&(events->entries[x]), ev);
+ deconstruct_event(ev);
+ }
+ }
+
+ free(events);
+ *eventsp = NULL;
+}
+
+
/cvs/cluster/cluster/rgmanager/src/daemons/service_op.c,v --> standard output
revision 1.2.2.1
--- cluster/rgmanager/src/daemons/service_op.c
+++ - 2007-12-18 17:52:58.744706000 +0000
@@ -0,0 +1,204 @@
+/*
+ Copyright Red Hat, Inc. 2007
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License version 2 as published
+ by the Free Software Foundation.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; see the file COPYING. If not, write to the
+ Free Software Foundation, Inc., 675 Mass Ave, Cambridge,
+ MA 02139, USA.
+*/
+#include <assert.h>
+#include <platform.h>
+#include <message.h>
+#include <members.h>
+#include <stdio.h>
+#include <string.h>
+#include <resgroup.h>
+#include <clulog.h>
+#include <lock.h>
+#include <rg_locks.h>
+#include <ccs.h>
+#include <rg_queue.h>
+#include <msgsimple.h>
+#include <res-ocf.h>
+#include <event.h>
+
+
+/*
+ * Send a message to the target node to start the service.
+ */
+int svc_start_remote(char *svcName, int request, uint32_t target);
+void svc_report_failure(char *);
+int get_service_state_internal(char *svcName, rg_state_t *svcStatus);
+
+
+/**
+ *
+ */
+int
+service_op_start(char *svcName,
+ int *target_list,
+ int target_list_len,
+ int *new_owner)
+{
+ int target;
+ int ret, x;
+ int excl = 0, dep = 0, fail = 0;
+ rg_state_t svcStatus;
+
+ if (get_service_state_internal(svcName, &svcStatus) < 0) {
+ return RG_EFAIL;
+ }
+
+ if (svcStatus.rs_state == RG_STATE_FAILED ||
+ svcStatus.rs_state == RG_STATE_UNINITIALIZED)
+ return RG_EINVAL;
+
+ for (x = 0; x < target_list_len; x++) {
+
+ target = target_list[x];
+ ret = svc_start_remote(svcName, RG_START_REMOTE,
+ target);
+ switch (ret) {
+ case RG_ERUN:
+ /* Someone stole the service while we were
+ trying to start it */
+ get_rg_state_local(svcName, &svcStatus);
+ if (new_owner)
+ *new_owner = svcStatus.rs_owner;
+ return 0;
+ case RG_EEXCL:
+ ++excl;
+ continue;
+ case RG_EDEPEND:
+ ++dep;
+ continue;
+ case RG_EFAIL:
+ ++fail;
+ continue;
+ case RG_EABORT:
+ svc_report_failure(svcName);
+ return RG_EFAIL;
+ default:
+ /* deliberate fallthrough */
+ clulog(LOG_ERR,
+ "#61: Invalid reply from member %d during"
+ " start operation!\n", target);
+ case RG_NO:
+ /* state uncertain */
+ clulog(LOG_CRIT, "State Uncertain: svc:%s "
+ "nid:%d req:%s ret:%d\n", svcName,
+ target, rg_req_str(RG_START_REMOTE), ret);
+ return 0;
+ case 0:
+ if (new_owner)
+ *new_owner = target;
+ clulog(LOG_NOTICE, "Service %s is now running "
+ "on member %d\n", svcName, (int)target);
+ return 0;
+ }
+ }
+
+ ret = RG_EFAIL;
+ if (excl == target_list_len)
+ ret = RG_EEXCL;
+ else if (dep == target_list_len)
+ ret = RG_EDEPEND;
+
+ clulog(LOG_INFO, "Start failed; node reports: %d failures, "
+ "%d exclusive, %d dependency errors\n", fail, excl, dep);
+ return ret;
+}
+
+
+int
+service_op_stop(char *svcName, int do_disable, int event_type)
+{
+ SmMessageSt msg;
+ int msg_ret;
+ msgctx_t ctx;
+ rg_state_t svcStatus;
+ int msgtarget = my_id();
+
+ /* Build the message header */
+ msg.sm_hdr.gh_magic = GENERIC_HDR_MAGIC;
+ msg.sm_hdr.gh_command = RG_ACTION_REQUEST;
+ msg.sm_hdr.gh_arg1 = RG_ACTION_MASTER;
+ msg.sm_hdr.gh_length = sizeof (SmMessageSt);
+
+ msg.sm_data.d_action = ((!do_disable) ? RG_STOP:RG_DISABLE);
+
+ if (msg.sm_data.d_action == RG_STOP && event_type == EVENT_USER)
+ msg.sm_data.d_action = RG_STOP_USER;
+
+ strncpy(msg.sm_data.d_svcName, svcName,
+ sizeof(msg.sm_data.d_svcName));
+ msg.sm_data.d_ret = 0;
+ msg.sm_data.d_svcOwner = 0;
+
+ /* Open a connection to the local node - it will decide what to
+ do in this case. XXX inefficient; should queue requests
+ locally and immediately forward requests otherwise */
+
+ if (get_service_state_internal(svcName, &svcStatus) < 0)
+ return RG_EFAIL;
+ if (svcStatus.rs_owner > 0)
+ msgtarget = svcStatus.rs_owner;
+
+ if (msg_open(MSG_CLUSTER, msgtarget, RG_PORT, &ctx, 2)< 0) {
+ clulog(LOG_ERR,
+ "#58: Failed opening connection to member #%d\n",
+ my_id());
+ return -1;
+ }
+
+ /* Encode */
+ swab_SmMessageSt(&msg);
+
+ /* Send stop message to the other node */
+ if (msg_send(&ctx, &msg, sizeof (SmMessageSt)) <
+ (int)sizeof (SmMessageSt)) {
+ clulog(LOG_ERR, "Failed to send complete message\n");
+ msg_close(&ctx);
+ return -1;
+ }
+
+ /* Check the response */
+ do {
+ msg_ret = msg_receive(&ctx, &msg,
+ sizeof (SmMessageSt), 10);
+ if ((msg_ret == -1 && errno != ETIMEDOUT) ||
+ (msg_ret >= 0)) {
+ break;
+ }
+ } while(1);
+
+ if (msg_ret != sizeof (SmMessageSt)) {
+ clulog(LOG_WARNING, "Strange response size: %d vs %d\n",
+ msg_ret, (int)sizeof(SmMessageSt));
+ return 0; /* XXX really UNKNOWN */
+ }
+
+ /* Got a valid response from other node. */
+ msg_close(&ctx);
+
+ /* Decode */
+ swab_SmMessageSt(&msg);
+
+ return msg.sm_data.d_ret;
+}
+
+
+/*
+ TODO
+ service_op_migrate()
+ */
+
/cvs/cluster/cluster/rgmanager/src/daemons/slang_event.c,v --> standard output
revision 1.3.2.1
--- cluster/rgmanager/src/daemons/slang_event.c
+++ - 2007-12-18 17:52:58.827110000 +0000
@@ -0,0 +1,1249 @@
+/*
+ Copyright Red Hat, Inc. 2007
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License version 2 as published
+ by the Free Software Foundation.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; see the file COPYING. If not, write to the
+ Free Software Foundation, Inc., 675 Mass Ave, Cambridge,
+ MA 02139, USA.
+*/
+/**
+ @file S/Lang event handling & intrinsic functions + vars
+ */
+#include <platform.h>
+#include <resgroup.h>
+#include <list.h>
+#include <restart_counter.h>
+#include <reslist.h>
+#include <clulog.h>
+#include <members.h>
+#include <assert.h>
+#include <event.h>
+
+#include <stdio.h>
+#include <string.h>
+#include <slang.h>
+#include <sys/syslog.h>
+#include <malloc.h>
+#include <clulog.h>
+#include <sets.h>
+
+static int __sl_initialized = 0;
+
+static char **_service_list = NULL;
+static int _service_list_len = 0;
+
+char **get_service_names(int *len); /* from groups.c */
+int get_service_property(char *rg_name, char *prop, char *buf, size_t buflen);
+void push_int_array(int *stuff, int len);
+
+
+/* ================================================================
+ * Node states
+ * ================================================================ */
+static const int
+ _ns_online = 1,
+ _ns_offline = 0;
+
+/* ================================================================
+ * Event information
+ * ================================================================ */
+static const int
+ _ev_none = EVENT_NONE,
+ _ev_node = EVENT_NODE,
+ _ev_service = EVENT_RG,
+ _ev_config = EVENT_CONFIG,
+ _ev_user = EVENT_USER;
+
+static const int
+ _rg_fail = RG_EFAIL,
+ _rg_success = RG_ESUCCESS,
+ _rg_edomain = RG_EDOMAIN,
+ _rg_edepend = RG_EDEPEND,
+ _rg_eabort = RG_EABORT,
+ _rg_einval = RG_EINVAL,
+ _rg_erun = RG_ERUN;
+
+static int
+ _stop_processing = 0,
+ _my_node_id = 0,
+ _node_state = 0,
+ _node_id = 0,
+ _node_clean = 0,
+ _service_owner = 0,
+ _service_last_owner = 0,
+ _user_request = 0,
+ _user_arg1 = 0,
+ _user_arg2 = 0,
+ _user_return = 0,
+ _rg_err = 0,
+ _event_type = 0;
+
+static char
+ *_node_name = NULL,
+ *_service_name = NULL,
+ *_service_state = NULL,
+ *_rg_err_str = "No Error";
+
+static int
+ _user_enable = RG_ENABLE,
+ _user_disable = RG_DISABLE,
+ _user_stop = RG_STOP_USER, /* From clusvcadm */
+ _user_relo = RG_RELOCATE,
+ _user_restart = RG_RESTART,
+ _user_migrate = RG_MIGRATE;
+
+
+SLang_Intrin_Var_Type rgmanager_vars[] =
+{
+ /* Log levels (constants) */
+
+ /* Node state information */
+ MAKE_VARIABLE("NODE_ONLINE", &_ns_online, SLANG_INT_TYPE, 1),
+ MAKE_VARIABLE("NODE_OFFLINE", &_ns_offline, SLANG_INT_TYPE, 1),
+
+ /* Node event information */
+ MAKE_VARIABLE("node_self", &_my_node_id, SLANG_INT_TYPE, 1),
+ MAKE_VARIABLE("node_state", &_node_state, SLANG_INT_TYPE, 1),
+ MAKE_VARIABLE("node_id", &_node_id, SLANG_INT_TYPE, 1),
+ MAKE_VARIABLE("node_name", &_node_name, SLANG_STRING_TYPE,1),
+ MAKE_VARIABLE("node_clean", &_node_clean, SLANG_INT_TYPE, 1),
+
+ /* Service event information */
+ MAKE_VARIABLE("service_name", &_service_name, SLANG_STRING_TYPE,1),
+ MAKE_VARIABLE("service_state", &_service_state,SLANG_STRING_TYPE,1),
+ MAKE_VARIABLE("service_owner", &_service_owner,SLANG_INT_TYPE, 1),
+ MAKE_VARIABLE("service_last_owner", &_service_last_owner,
+ SLANG_INT_TYPE, 1),
+
+ /* User event information */
+ MAKE_VARIABLE("user_request", &_user_request, SLANG_INT_TYPE,1),
+ MAKE_VARIABLE("user_arg1", &_user_arg1, SLANG_INT_TYPE,1),
+ MAKE_VARIABLE("user_arg2", &_user_arg2, SLANG_INT_TYPE,1),
+ MAKE_VARIABLE("user_service", &_service_name, SLANG_STRING_TYPE,1),
+ MAKE_VARIABLE("user_target", &_service_owner,SLANG_INT_TYPE, 1),
+ /* Return code to user requests; i.e. clusvcadm */
+ MAKE_VARIABLE("user_return", &_user_return, SLANG_INT_TYPE, 0),
+
+ /* General event information */
+ MAKE_VARIABLE("event_type", &_event_type, SLANG_INT_TYPE, 1),
+ MAKE_VARIABLE("EVENT_NONE", &_ev_none, SLANG_INT_TYPE, 1),
+ MAKE_VARIABLE("EVENT_NODE", &_ev_node, SLANG_INT_TYPE, 1),
+ MAKE_VARIABLE("EVENT_CONFIG", &_ev_config, SLANG_INT_TYPE, 1),
+ MAKE_VARIABLE("EVENT_SERVICE", &_ev_service, SLANG_INT_TYPE, 1),
+ MAKE_VARIABLE("EVENT_USER", &_ev_user, SLANG_INT_TYPE, 1),
+
+ /* User request constants */
+ MAKE_VARIABLE("USER_ENABLE", &_user_enable, SLANG_INT_TYPE, 1),
+ MAKE_VARIABLE("USER_DISABLE", &_user_disable, SLANG_INT_TYPE, 1),
+ MAKE_VARIABLE("USER_STOP", &_user_stop, SLANG_INT_TYPE, 1),
+ MAKE_VARIABLE("USER_RELOCATE", &_user_relo, SLANG_INT_TYPE, 1),
+ MAKE_VARIABLE("USER_RESTART", &_user_restart, SLANG_INT_TYPE, 1),
+ MAKE_VARIABLE("USER_MIGRATE", &_user_migrate, SLANG_INT_TYPE, 1),
+
+ /* Errors */
+ MAKE_VARIABLE("rg_error", &_rg_err, SLANG_INT_TYPE, 1),
+ MAKE_VARIABLE("rg_error_string",&_rg_err_str, SLANG_STRING_TYPE,1),
+
+ /* From constants.c */
+ MAKE_VARIABLE("FAIL", &_rg_fail, SLANG_INT_TYPE, 1),
+ MAKE_VARIABLE("SUCCESS", &_rg_success, SLANG_INT_TYPE, 1),
+ MAKE_VARIABLE("ERR_ABORT", &_rg_eabort, SLANG_INT_TYPE, 1),
+ MAKE_VARIABLE("ERR_INVALID", &_rg_einval, SLANG_INT_TYPE, 1),
+ MAKE_VARIABLE("ERR_DEPEND", &_rg_edepend, SLANG_INT_TYPE, 1),
+ MAKE_VARIABLE("ERR_DOMAIN", &_rg_edomain, SLANG_INT_TYPE, 1),
+ MAKE_VARIABLE("ERR_RUNNING", &_rg_erun, SLANG_INT_TYPE, 1),
+
+ SLANG_END_INTRIN_VAR_TABLE
+};
+
+
+#define rg_error(errortype) \
+do { \
+ _rg_err = errortype; \
+ _rg_err_str = ##errortype; \
+} while(0)
+
+
+int
+get_service_state_internal(char *svcName, rg_state_t *svcStatus)
+{
+ struct dlm_lksb lock;
+ char buf[32];
+
+ get_rg_state_local(svcName, svcStatus);
+ if (svcStatus->rs_state == RG_STATE_UNINITIALIZED) {
+ if (rg_lock(svcName, &lock) < 0) {
+ errno = ENOLCK;
+ return -1;
+ }
+
+ if (get_rg_state(svcName, svcStatus) < 0) {
+ errno = ENOENT;
+ rg_unlock(&lock);
+ return -1;
+ }
+
+ /* We got a copy from another node - don't flip the state */
+ if (svcStatus->rs_transition) {
+ rg_unlock(&lock);
+ return 0;
+ }
+
+ /* Finish initializing the service state */
+ svcStatus->rs_transition = (uint64_t)time(NULL);
+
+ if (get_service_property(svcName, "autostart",
+ buf, sizeof(buf)) == 0) {
+ if (buf[0] == '0' || !strcasecmp(buf, "no")) {
+ svcStatus->rs_state = RG_STATE_DISABLED;
+ } else {
+ svcStatus->rs_state = RG_STATE_STOPPED;
+ }
+ }
+
+ set_rg_state(svcName, svcStatus);
+
+ rg_unlock(&lock);
+ }
+
+ return 0;
+}
+
+
+/*
+ (restarts, last_owner, owner, state) = get_service_status(servicename)
+ */
+void
+sl_service_status(char *svcName)
+{
+ rg_state_t svcStatus;
+ char *state_str;
+
+ if (get_service_state_internal(svcName, &svcStatus) < 0) {
+ SLang_verror(SL_RunTime_Error,
+ "%s: Failed to get status for %s",
+ __FUNCTION__,
+ svcName);
+ return;
+ }
+
+ if (SLang_push_integer(svcStatus.rs_restarts) < 0) {
+ SLang_verror(SL_RunTime_Error,
+ "%s: Failed to push restarts for %s",
+ __FUNCTION__,
+ svcName);
+ return;
+ }
+
+ if (SLang_push_integer(svcStatus.rs_last_owner) < 0) {
+ SLang_verror(SL_RunTime_Error,
+ "%s: Failed to push last owner of %s",
+ __FUNCTION__,
+ svcName);
+ return;
+ }
+
+ switch(svcStatus.rs_state) {
+ case RG_STATE_DISABLED:
+ case RG_STATE_STOPPED:
+ case RG_STATE_FAILED:
+ case RG_STATE_RECOVER:
+ case RG_STATE_ERROR:
+ /* There is no owner for these states. Ever. */
+ svcStatus.rs_owner = -1;
+ }
+
+ if (SLang_push_integer(svcStatus.rs_owner) < 0) {
+ SLang_verror(SL_RunTime_Error,
+ "%s: Failed to push owner of %s",
+ __FUNCTION__,
+ svcName);
+ return;
+ }
+
+ state_str = strdup(rg_state_str(svcStatus.rs_state));
+ if (!state_str) {
+ SLang_verror(SL_RunTime_Error,
+ "%s: Failed to duplicate state of %s",
+ __FUNCTION__,
+ svcName);
+ return;
+ }
+
+ if (SLang_push_malloced_string(state_str) < 0) {
+ SLang_verror(SL_RunTime_Error,
+ "%s: Failed to push state of %s",
+ __FUNCTION__,
+ svcName);
+ free(state_str);
+ }
+}
+
+
+/**
+ (nofailback, restricted, ordered, nodelist) = service_domain_info(svcName);
+ */
+void
+sl_domain_info(char *svcName)
+{
+ int *nodelist = NULL, listlen;
+ char buf[64];
+ int flags = 0;
+
+ if (get_service_property(svcName, "domain", buf, sizeof(buf)) < 0) {
+ /* no nodes */
+ SLang_push_integer(0);
+
+ /* no domain? */
+/*
+ str = strdup("none");
+ if (SLang_push_malloced_string(str) < 0) {
+ free(state_str);
+ return;
+ }
+*/
+
+ /* not ordered */
+ SLang_push_integer(0);
+ /* not restricted */
+ SLang_push_integer(0);
+ /* nofailback not set */
+ SLang_push_integer(0);
+ }
+
+ if (node_domain_set_safe(buf, &nodelist, &listlen, &flags) < 0) {
+ SLang_push_integer(0);
+ SLang_push_integer(0);
+ SLang_push_integer(0);
+ SLang_push_integer(0);
+ return;
+ }
+
+ SLang_push_integer(!!(flags & FOD_NOFAILBACK));
+ SLang_push_integer(!!(flags & FOD_RESTRICTED));
+ SLang_push_integer(!!(flags & FOD_ORDERED));
+
+ push_int_array(nodelist, listlen);
+ free(nodelist);
+
+/*
+ str = strdup(buf);
+ if (SLang_push_malloced_string(str) < 0) {
+ free(state_str);
+ return;
+ }
+*/
+}
+
+
+static int
+get_int_array(int **nodelist, int *len)
+{
+ SLang_Array_Type *a = NULL;
+ SLindex_Type i;
+ int *nodes = NULL, t, ret = -1;
+
+ if (!nodelist || !len)
+ return -1;
+
+ t = SLang_peek_at_stack();
+ if (t == SLANG_INT_TYPE) {
+
+ nodes = malloc(sizeof(int) * 1);
+ if (!nodes)
+ goto out;
+ if (SLang_pop_integer(&nodes[0]) < 0)
+ goto out;
+
+ *len = 1;
+ ret = 0;
+
+ } else if (t == SLANG_ARRAY_TYPE) {
+ if (SLang_pop_array_of_type(&a, SLANG_INT_TYPE) < 0)
+ goto out;
+ if (a->num_dims > 1)
+ goto out;
+ if (a->dims[0] < 0)
+ goto out;
+ nodes = malloc(sizeof(int) * a->dims[0]);
+ if (!nodes)
+ goto out;
+ for (i = 0; i < a->dims[0]; i++)
+ SLang_get_array_element(a, &i, &nodes[i]);
+
+ *len = a->dims[0];
+ ret = 0;
+ }
+
+out:
+ if (a)
+ SLang_free_array(a);
+ if (ret == 0) {
+ *nodelist = nodes;
+ } else {
+ if (nodes)
+ free(nodes);
+ }
+
+ return ret;
+}
+
+
+/**
+ get_service_property(service_name, property)
+ */
+char *
+sl_service_property(char *svcName, char *prop)
+{
+ char buf[96];
+
+ if (get_service_property(svcName, prop, buf, sizeof(buf)) < 0)
+ return NULL;
+
+ /* does this work or do I have to push a malloce'd string? */
+ return strdup(buf);
+}
+
+
+/**
+ usage:
+
+ stop_service(name, disable_flag);
+ */
+int
+sl_stop_service(void)
+{
+ char *svcname = NULL;
+ int nargs, t, ret = -1;
+ int do_disable = 0;
+
+ nargs = SLang_Num_Function_Args;
+
+ /* Takes one or two args */
+ if (nargs <= 0 || nargs > 2) {
+ SLang_verror(SL_Syntax_Error,
+ "%s: Wrong # of args (%d), must be 1 or 2\n",
+ __FUNCTION__,
+ nargs);
+ return -1;
+ }
+
+ if (nargs == 2) {
+ t = SLang_peek_at_stack();
+ if (t != SLANG_INT_TYPE) {
+ SLang_verror(SL_Syntax_Error,
+ "%s: expected type %d got %d\n",
+ __FUNCTION__, SLANG_INT_TYPE, t);
+ goto out;
+ }
+
+ if (SLang_pop_integer(&do_disable) < 0) {
+ SLang_verror(SL_Syntax_Error,
+ "%s: Failed to pop integer from stack!\n",
+ __FUNCTION__);
+ goto out;
+ }
+
+ --nargs;
+ }
+
+ if (nargs == 1) {
+ t = SLang_peek_at_stack();
+ if (t != SLANG_STRING_TYPE) {
+ SLang_verror(SL_Syntax_Error,
+ "%s: expected type %d got %d\n",
+ __FUNCTION__,
+ SLANG_STRING_TYPE, t);
+ goto out;
+ }
+
+ if (SLpop_string(&svcname) < 0) {
+ SLang_verror(SL_Syntax_Error,
+ "%s: Failed to pop string from stack!\n",
+ __FUNCTION__);
+ goto out;
+ }
+ }
+
+ /* TODO: Meat of function goes here */
+ ret = service_op_stop(svcname, do_disable, _event_type);
+out:
+ if (svcname)
+ free(svcname);
+ _user_return = ret;
+ return ret;
+}
+
+
+/**
+ usage:
+
+ start_service(name, <array>ordered_node_list_allowed,
+ <array>node_list_illegal)
+ */
+int
+sl_start_service(void)
+{
+ char *svcname = NULL;
+ int *pref_list = NULL, pref_list_len = 0;
+ int *illegal_list = NULL, illegal_list_len = 0;
+ int nargs, t, newowner = 0, ret = -1;
+
+ nargs = SLang_Num_Function_Args;
+
+ /* Takes one, two, or three */
+ if (nargs <= 0 || nargs > 3) {
+ SLang_verror(SL_Syntax_Error,
+ "%s: Wrong # of args (%d), must be 1 or 2\n",
+ __FUNCTION__, nargs);
+ return -1;
+ }
+
+ if (nargs == 3) {
+ if (get_int_array(&illegal_list, &illegal_list_len) < 0)
+ goto out;
+ --nargs;
+ }
+
+ if (nargs == 2) {
+ if (get_int_array(&pref_list, &pref_list_len) < 0)
+ goto out;
+ --nargs;
+ }
+
+ if (nargs == 1) {
+ /* Just get the service name */
+ t = SLang_peek_at_stack();
+ if (t != SLANG_STRING_TYPE) {
+ SLang_verror(SL_Syntax_Error,
+ "%s: expected type %d got %d\n",
+ __FUNCTION__,
+ SLANG_STRING_TYPE, t);
+ goto out;
+ }
+
+ if (SLpop_string(&svcname) < 0)
+ goto out;
+ }
+
+ /* TODO: Meat of function goes here */
+ ret = service_op_start(svcname, pref_list,
+ pref_list_len, &newowner);
+
+ if (ret == 0 && newowner > 0)
+ ret = newowner;
+out:
+ if (svcname)
+ free(svcname);
+ if (illegal_list)
+ free(illegal_list);
+ if (pref_list)
+ free(pref_list);
+ _user_return = ret;
+ return ret;
+}
+
+
+/* Take an array of integers given its length and
+ push it on to the S/Lang stack */
+void
+push_int_array(int *stuff, int len)
+{
+ SLindex_Type arrlen, x;
+ SLang_Array_Type *arr;
+ int i;
+
+ arrlen = len;
+ arr = SLang_create_array(SLANG_INT_TYPE, 0, NULL, &arrlen, 1);
+ if (!arr)
+ return;
+
+ x = 0;
+ for (x = 0; x < len; x++) {
+ i = stuff[x];
+ SLang_set_array_element(arr, &x, &i);
+ }
+ SLang_push_array(arr, 1);
+}
+
+
+/*
+ Returns an array of rgmanager-visible nodes online. How cool is that?
+ */
+void
+sl_nodes_online(void)
+{
+ int i, *nodes, nodecount = 0;
+
+ cluster_member_list_t *membership = member_list();
+ if (!membership)
+ return;
+ nodes = malloc(sizeof(int) * membership->cml_count);
+ if (!nodes)
+ return;
+
+ nodecount = 0;
+ for (i = 0; i < membership->cml_count; i++) {
+ if (membership->cml_members[i].cn_member &&
+ membership->cml_members[i].cn_nodeid != 0) {
+ nodes[nodecount] = membership->cml_members[i].cn_nodeid;
+ ++nodecount;
+ }
+ }
+ free_member_list(membership);
+ push_int_array(nodes, nodecount);
+ free(nodes);
+}
+
+
+/*
+ Returns an array of rgmanager-defined services, in type:name format
+ We allocate/kill this list *once* per event to ensure we don't leak
+ memory
+ */
+void
+sl_service_list(void)
+{
+ SLindex_Type svccount = _service_list_len, x = 0;
+ SLang_Array_Type *svcarray;
+
+ svcarray = SLang_create_array(SLANG_STRING_TYPE, 0, NULL, &svccount, 1);
+ if (!svcarray)
+ return;
+
+ for (; x < _service_list_len; x++)
+ SLang_set_array_element(svcarray, &x, &_service_list[x]);
+
+ SLang_push_array(svcarray, 1);
+}
+
+
+/* s_union hook (see sets.c) */
+void
+sl_union(void)
+{
+ int *arr1 = NULL, a1len = 0;
+ int *arr2 = NULL, a2len = 0;
+ int *ret = NULL, retlen = 0;
+ int nargs = SLang_Num_Function_Args;
+
+ if (nargs != 2)
+ return;
+
+ /* Remember: args on the stack are reversed */
+ get_int_array(&arr2, &a2len);
+ get_int_array(&arr1, &a1len);
+ s_union(arr1, a1len, arr2, a2len, &ret, &retlen);
+ push_int_array(ret, retlen);
+ if (arr1)
+ free(arr1);
+ if (arr2)
+ free(arr2);
+ if (ret)
+ free(ret);
+ return;
+}
+
+
+/* s_intersection hook (see sets.c) */
+void
+sl_intersection(void)
+{
+ int *arr1 = NULL, a1len = 0;
+ int *arr2 = NULL, a2len = 0;
+ int *ret = NULL, retlen = 0;
+ int nargs = SLang_Num_Function_Args;
+
+ if (nargs != 2)
+ return;
+
+ /* Remember: args on the stack are reversed */
+ get_int_array(&arr2, &a2len);
+ get_int_array(&arr1, &a1len);
+ s_intersection(arr1, a1len, arr2, a2len, &ret, &retlen);
+ push_int_array(ret, retlen);
+ if (arr1)
+ free(arr1);
+ if (arr2)
+ free(arr2);
+ if (ret)
+ free(ret);
+ return;
+}
+
+
+/* s_delta hook (see sets.c) */
+void
+sl_delta(void)
+{
+ int *arr1 = NULL, a1len = 0;
+ int *arr2 = NULL, a2len = 0;
+ int *ret = NULL, retlen = 0;
+ int nargs = SLang_Num_Function_Args;
+
+ if (nargs != 2)
+ return;
+
+ /* Remember: args on the stack are reversed */
+ get_int_array(&arr2, &a2len);
+ get_int_array(&arr1, &a1len);
+ s_delta(arr1, a1len, arr2, a2len, &ret, &retlen);
+ push_int_array(ret, retlen);
+ if (arr1)
+ free(arr1);
+ if (arr2)
+ free(arr2);
+ if (ret)
+ free(ret);
+ return;
+}
+
+
+/* s_subtract hook (see sets.c) */
+void
+sl_subtract(void)
+{
+ int *arr1 = NULL, a1len = 0;
+ int *arr2 = NULL, a2len = 0;
+ int *ret = NULL, retlen = 0;
+ int nargs = SLang_Num_Function_Args;
+
+ if (nargs != 2)
+ return;
+
+ /* Remember: args on the stack are reversed */
+ get_int_array(&arr2, &a2len);
+ get_int_array(&arr1, &a1len);
+ s_subtract(arr1, a1len, arr2, a2len, &ret, &retlen);
+ push_int_array(ret, retlen);
+ if (arr1)
+ free(arr1);
+ if (arr2)
+ free(arr2);
+ if (ret)
+ free(ret);
+ return;
+}
+
+
+/* Shuffle array (see sets.c) */
+void
+sl_shuffle(void)
+{
+ int *arr1 = NULL, a1len = 0;
+ int nargs = SLang_Num_Function_Args;
+
+ if (nargs != 1)
+ return;
+
+ /* Remember: args on the stack are reversed */
+ get_int_array(&arr1, &a1len);
+ s_shuffle(arr1, a1len);
+ push_int_array(arr1, a1len);
+ if (arr1)
+ free(arr1);
+ return;
+}
+
+
+/* Converts an int array to a string so we can log it in one shot */
+static int
+array_to_string(char *buf, int buflen, int *array, int arraylen)
+{
+ char intbuf[16];
+ int x, len, remain = buflen;
+
+ memset(intbuf, 0, sizeof(intbuf));
+ memset(buf, 0, buflen);
+ len = snprintf(buf, buflen - 1, "[ ");
+ if (len == buflen)
+ return -1;
+
+ remain -= len;
+ for (x = 0; x < arraylen; x++) {
+ len = snprintf(intbuf, sizeof(intbuf) - 1, "%d ", array[x]);
+ remain -= len;
+ if (remain > 0) {
+ strncat(buf, intbuf, len);
+ } else {
+ return -1;
+ }
+ }
+
+ len = snprintf(intbuf, sizeof(intbuf) - 1 , "]");
+ remain -= len;
+ if (remain > 0) {
+ strncat(buf, intbuf, len);
+ } else {
+ return -1;
+ }
+ return (buflen - remain);
+}
+
+
+/**
+ Start at the end of the arg list and work backwards, prepending a string.
+ This does not support standard clulog / printf formattting; rather, we
+ just allow integers / strings to be mixed on the stack, figure out the
+ type, convert it to the right type, and prepend it on to our log message
+
+ The last must be a log level, as specified above:
+ LOG_DEBUG
+ ...
+ LOG_EMERG
+
+ This matches up with clulog / syslog mappings in the var table; the above
+ are constants in the S/Lang interpreter. Any number of arguments may
+ be provided. Examples are:
+
+ log(LOG_INFO, "String", 1, "string2");
+
+ Result: String1string2
+
+ log(LOG_INFO, "String ", 1, " string2");
+
+ Result: String 1 string2
+
+ */
+void
+sl_clulog(int level)
+{
+ int t, nargs, len;
+ //int level;
+ int s_intval;
+ char *s_strval;
+ int *nodes = 0, nlen = 0;
+ char logbuf[512];
+ char tmp[256];
+ int need_free;
+ int remain = sizeof(logbuf)-2;
+
+ nargs = SLang_Num_Function_Args;
+ if (nargs < 1)
+ return;
+
+ memset(logbuf, 0, sizeof(logbuf));
+ memset(tmp, 0, sizeof(tmp));
+ logbuf[sizeof(logbuf)-1] = 0;
+ logbuf[sizeof(logbuf)-2] = '\n';
+
+ while (nargs && (t = SLang_peek_at_stack()) >= 0 && remain) {
+ switch(t) {
+ case SLANG_ARRAY_TYPE:
+ if (get_int_array(&nodes, &nlen) < 0)
+ return;
+ len = array_to_string(tmp, sizeof(tmp),
+ nodes, nlen);
+ if (len < 0) {
+ free(nodes);
+ return;
+ }
+ free(nodes);
+ break;
+ case SLANG_INT_TYPE:
+ if (SLang_pop_integer(&s_intval) < 0)
+ return;
+ len=snprintf(tmp, sizeof(tmp) - 1, "%d", s_intval);
+ break;
+ case SLANG_STRING_TYPE:
+ need_free = 0;
+ if (SLpop_string(&s_strval) < 0)
+ return;
+ len=snprintf(tmp, sizeof(tmp) - 1, "%s", s_strval);
+ SLfree(s_strval);
+ break;
+ default:
+ need_free = 0;
+ len=snprintf(tmp, sizeof(tmp) - 1,
+ "{UnknownType %d}", t);
+ break;
+ }
+
+ --nargs;
+
+ if (len > remain)
+ return;
+ remain -= len;
+
+ memcpy(&logbuf[remain], tmp, len);
+ }
+
+#if 0
+ printf("<%d> %s\n", level, &logbuf[remain]);
+#endif
+ clulog(level, &logbuf[remain]);
+ return;
+}
+
+
+/* Logging functions */
+void
+sl_log_debug(void)
+{
+ sl_clulog(LOG_DEBUG);
+}
+
+
+void
+sl_log_info(void)
+{
+ sl_clulog(LOG_INFO);
+}
+
+
+void
+sl_log_notice(void)
+{
+ sl_clulog(LOG_NOTICE);
+}
+
+
+void
+sl_log_warning(void)
+{
+ sl_clulog(LOG_WARNING);
+}
+
+
+void
+sl_log_err(void)
+{
+ sl_clulog(LOG_ERR);
+}
+
+
+void
+sl_log_crit(void)
+{
+ sl_clulog(LOG_CRIT);
+}
+
+
+void
+sl_log_alert(void)
+{
+ sl_clulog(LOG_ALERT);
+}
+
+
+void
+sl_log_emerg(void)
+{
+ sl_clulog(LOG_EMERG);
+}
+
+
+void
+sl_die(void)
+{
+ _stop_processing = 1;
+ return;
+}
+
+
+SLang_Intrin_Fun_Type rgmanager_slang[] =
+{
+ MAKE_INTRINSIC_0("nodes_online", sl_nodes_online, SLANG_VOID_TYPE),
+ MAKE_INTRINSIC_0("service_list", sl_service_list, SLANG_VOID_TYPE),
+
+ MAKE_INTRINSIC_SS("service_property", sl_service_property,
+ SLANG_STRING_TYPE),
+ MAKE_INTRINSIC_S("service_domain_info", sl_domain_info, SLANG_VOID_TYPE),
+ MAKE_INTRINSIC_0("service_stop", sl_stop_service, SLANG_INT_TYPE),
+ MAKE_INTRINSIC_0("service_start", sl_start_service, SLANG_INT_TYPE),
+ MAKE_INTRINSIC_S("service_status", sl_service_status,
+ SLANG_VOID_TYPE),
+
+ /* Node list manipulation */
+ MAKE_INTRINSIC_0("union", sl_union, SLANG_VOID_TYPE),
+ MAKE_INTRINSIC_0("intersection", sl_intersection, SLANG_VOID_TYPE),
+ MAKE_INTRINSIC_0("delta", sl_delta, SLANG_VOID_TYPE),
+ MAKE_INTRINSIC_0("subtract", sl_subtract, SLANG_VOID_TYPE),
+ MAKE_INTRINSIC_0("shuffle", sl_shuffle, SLANG_VOID_TYPE),
+
+ /* Logging */
+ MAKE_INTRINSIC_0("debug", sl_log_debug, SLANG_VOID_TYPE),
+ MAKE_INTRINSIC_0("info", sl_log_info, SLANG_VOID_TYPE),
+ MAKE_INTRINSIC_0("notice", sl_log_notice, SLANG_VOID_TYPE),
+ MAKE_INTRINSIC_0("warning", sl_log_warning, SLANG_VOID_TYPE),
+ MAKE_INTRINSIC_0("err", sl_log_err, SLANG_VOID_TYPE),
+ MAKE_INTRINSIC_0("crit", sl_log_crit, SLANG_VOID_TYPE),
+ MAKE_INTRINSIC_0("alert", sl_log_alert, SLANG_VOID_TYPE),
+ MAKE_INTRINSIC_0("emerg", sl_log_emerg, SLANG_VOID_TYPE),
+
+ MAKE_INTRINSIC_0("stop_processing", sl_die, SLANG_VOID_TYPE),
+
+ SLANG_END_INTRIN_FUN_TABLE
+};
+
+
+/* Hook for when we generate a script error */
+void
+rgmanager_slang_error_hook(char *errstr)
+{
+ /* Don't just send errstr, because it might contain
+ "%s" for example which would result in a crash!
+ plus, we like the newline :) */
+ clulog(LOG_ERR, "[S/Lang] %s\n", errstr);
+}
+
+
+
+/* ================================================================
+ * S/Lang initialization
+ * ================================================================ */
+int
+do_init_slang(void)
+{
+ SLang_init_slang();
+ SLang_init_slfile();
+
+ if (SLadd_intrin_fun_table(rgmanager_slang, NULL) < 0)
+ return 1;
+ if (SLadd_intrin_var_table (rgmanager_vars, NULL) < 0)
+ return 1;
+
+ /* TODO: Make rgmanager S/Lang conformant. Though, it
+ might be a poor idea to provide access to all the
+ S/Lang libs */
+ SLpath_set_load_path(RESOURCE_ROOTDIR);
+
+ _my_node_id = my_id();
+ __sl_initialized = 1;
+
+ SLang_Error_Hook = rgmanager_slang_error_hook;
+
+ return 0;
+}
+
+
+/*
+ Execute a script / file and return the result to the caller
+ Log an error if we receive one.
+ */
+int
+do_slang_run(const char *file, const char *script)
+{
+ int ret = 0;
+
+ if (file)
+ ret = SLang_load_file((char *)file);
+ else
+ ret = SLang_load_string((char *)script);
+
+ if (ret < 0) {
+ clulog(LOG_ERR, "[S/Lang] Script Execution Failure\n");
+ SLang_restart(1);
+ }
+
+ return ret;
+}
+
+
+int
+S_node_event(const char *file, const char *script, int nodeid,
+ int state, int clean)
+{
+ int ret;
+ cluster_member_list_t *membership = member_list();
+
+ _node_name = strdup(memb_id_to_name(membership, nodeid));
+ _node_state = state;
+ _node_clean = clean;
+ _node_id = nodeid;
+ free_member_list(membership);
+
+ ret = do_slang_run(file, script);
+
+ _node_state = 0;
+ _node_clean = 0;
+ _node_id = 0;
+ if (_node_name)
+ free(_node_name);
+ _node_name = NULL;
+
+ return ret;
+}
+
+
+int
+S_service_event(const char *file, const char *script, char *name,
+ int state, int owner, int last_owner)
+{
+ int ret;
+
+ _service_name = name;
+ _service_state = (char *)rg_state_str(state);
+ _service_owner = owner;
+ _service_last_owner = last_owner;
+
+ switch(state) {
+ case RG_STATE_DISABLED:
+ case RG_STATE_STOPPED:
+ case RG_STATE_FAILED:
+ case RG_STATE_RECOVER:
+ case RG_STATE_ERROR:
+ /* There is no owner for these states. Ever. */
+ _service_owner = -1;
+ }
+
+ ret = do_slang_run(file, script);
+
+ _service_name = NULL;
+ _service_state = 0;
+ _service_owner = 0;
+ _service_last_owner = 0;
+
+ return ret;
+}
+
+
+int
+S_user_event(const char *file, const char *script, char *name,
+ int request, int arg1, int arg2, int target, msgctx_t *ctx)
+{
+ int ret = RG_SUCCESS;
+
+ _service_name = name;
+ _service_owner = target;
+ _user_request = request;
+ _user_arg1 = arg1;
+ _user_arg2 = arg2;
+ _user_return = 0;
+
+ ret = do_slang_run(file, script);
+ if (ret < 0) {
+ _user_return = RG_ESCRIPT;
+ }
+
+ _service_name = NULL;
+ _service_owner = 0;
+ _user_request = 0;
+ _user_arg1 = 0;
+ _user_arg2 = 0;
+
+ /* XXX Send response code to caller - that 0 should be the
+ new service owner, if there is one */
+ if (ctx) {
+ if (_user_return > 0) {
+ /* sl_start_service() squashes return code and
+ node ID into one value. <0 = error, >0 =
+ success, return-value == node id running
+ service */
+ send_ret(ctx, name, 0, request, _user_return);
+ } else {
+ /* return value < 0 ... pass directly back;
+ don't transpose */
+ send_ret(ctx, name, _user_return, request, 0);
+ }
+ msg_close(ctx);
+ msg_free_ctx(ctx);
+ }
+ _user_return = 0;
+ return ret;
+}
+
+
+int
+slang_do_script(event_t *pattern, event_t *ev)
+{
+ _event_type = ev->ev_type;
+ int ret = 0;
+
+ switch(ev->ev_type) {
+ case EVENT_NODE:
+ ret = S_node_event(
+ pattern->ev_script_file,
+ pattern->ev_script,
+ ev->ev.node.ne_nodeid,
+ ev->ev.node.ne_state,
+ ev->ev.node.ne_clean);
+ break;
+ case EVENT_RG:
+ ret = S_service_event(
+ pattern->ev_script_file,
+ pattern->ev_script,
+ ev->ev.group.rg_name,
+ ev->ev.group.rg_state,
+ ev->ev.group.rg_owner,
+ ev->ev.group.rg_last_owner);
+ break;
+ case EVENT_USER:
+ ret = S_user_event(
+ pattern->ev_script_file,
+ pattern->ev_script,
+ ev->ev.user.u_name,
+ ev->ev.user.u_request,
+ ev->ev.user.u_arg1,
+ ev->ev.user.u_arg2,
+ ev->ev.user.u_target,
+ ev->ev.user.u_ctx);
+ break;
+ default:
+ break;
+ }
+
+ _event_type = EVENT_NONE;
+ return ret;
+}
+
+
+
+/**
+ Process an event given our event table and the event that
+ occurred. Note that the caller is responsible for freeing the
+ event - do not free (ev) ...
+ */
+int
+slang_process_event(event_table_t *event_table, event_t *ev)
+{
+ int x, y;
+ event_t *pattern;
+
+ if (!__sl_initialized)
+ do_init_slang();
+
+ /* Get the service list once before processing events */
+ if (!_service_list || !_service_list_len)
+ _service_list = get_service_names(&_service_list_len);
+
+ _stop_processing = 0;
+ for (x = 1; x <= event_table->max_prio; x++) {
+ list_for(&event_table->entries[x], pattern, y) {
+ if (event_match(pattern, ev))
+ slang_do_script(pattern, ev);
+ if (_stop_processing)
+ goto out;
+ }
+ }
+
+ /* Default level = 0 */
+ list_for(&event_table->entries[0], pattern, y) {
+ if (event_match(pattern, ev))
+ slang_do_script(pattern, ev);
+ if (_stop_processing)
+ goto out;
+ }
+
+out:
+ /* Free the service list */
+ if (_service_list) {
+ for(x = 0; x < _service_list_len; x++) {
+ free(_service_list[x]);
+ }
+ free(_service_list);
+ _service_list = NULL;
+ _service_list_len = 0;
+ }
+
+ return 0;
+}
--- cluster/rgmanager/src/daemons/Makefile 2007/11/26 21:46:27 1.14.2.4
+++ cluster/rgmanager/src/daemons/Makefile 2007/12/18 17:52:56 1.14.2.5
@@ -15,9 +15,9 @@
include ${top_srcdir}/make/defines.mk
INCLUDE += -I $(top_srcdir)/include -I $(top_srcdir)/../cman/lib -I $(top_srcdir)/../ccs/lib -I $(top_srcdir)/../dlm/lib
-INCLUDE += -I${incdir} -I/usr/include/libxml2
+INCLUDE += -I${incdir} -I/usr/include/libxml2 -I/usr/include/slang
-CFLAGS+= -g -Wstrict-prototypes -Wshadow -fPIC -D_GNU_SOURCE -DWRAP_THREADS
+CFLAGS+= -g -Wstrict-prototypes -Wshadow -fPIC -D_GNU_SOURCE -DWRAP_THREADS -Wall -Wextra
LDFLAGS+= -L ../clulib -L../../../cman/lib -L../../../ccs/lib -L../../../dlm/lib -L${libdir} -lclulib -lxml2 -lpthread -ldl -Wl,-wrap,pthread_create,-wrap,pthread_exit -rdynamic
TARGETS=clurgmgrd clurmtabd rg_test
@@ -37,10 +37,14 @@
clurgmgrd: rg_thread.o rg_locks.o main.o groups.o \
rg_queue.o rg_forward.o reslist.o \
- resrules.o restree.o fo_domain.o nodeevent.o \
- rg_event.o watchdog.o rg_state.o \
- restart_counter.o ../clulib/libclulib.a
- $(CC) -o $@ $^ $(INCLUDE) $(CFLAGS) $(LDFLAGS) -lccs -lcman -lpthread -ldlm
+ resrules.o restree.o fo_domain.o \
+ rg_event.o watchdog.o rg_state.o event_config.o \
+ slang_event.o service_op.o restart_counter.o \
+ ../clulib/libclulib.a
+ $(CC) -o $@ $^ $(INCLUDE) $(CFLAGS) $(LDFLAGS) -lccs -lcman -lpthread -ldlm -lslang
+
+rg_script_test: slang_event.o
+ $(CC) -o rg_script_test slang_event.o $(INCLUDE) $(CFLAGS) -lslang $(LDFLAGS)
#
# Our test program links against the local allocator so that
@@ -58,7 +62,7 @@
#
rg_test: rg_locks-noccs.o test-noccs.o reslist-noccs.o \
resrules-noccs.o restree-noccs.o fo_domain-noccs.o \
- restart_counter.o
+ event_config-noccs.o restart_counter.o
$(CC) -o $@ $^ $(INCLUDE) $(CFLAGS) -llalloc $(LDFLAGS) -lccs -lcman
clurmtabd: clurmtabd.o clurmtabd_lib.o
--- cluster/rgmanager/src/daemons/fo_domain.c 2007/11/26 21:46:27 1.11.2.1
+++ cluster/rgmanager/src/daemons/fo_domain.c 2007/12/18 17:52:56 1.11.2.2
@@ -34,6 +34,7 @@
#include <stdlib.h>
#include <stdio.h>
#include <members.h>
+#include <sets.h>
//#define DEBUG
@@ -96,6 +97,23 @@
fodn->fdn_name = ret;
fodn->fdn_prio = 0;
+ snprintf(xpath, sizeof(xpath),
+ "/cluster/clusternodes/clusternode[@name=\"%s\"]/@nodeid",
+ ret);
+ if (ccs_get(ccsfd, xpath, &ret) != 0) {
+ clulog(LOG_WARNING, "Node %s has no nodeid attribute\n",
+ fodn->fdn_name);
+ fodn->fdn_nodeid = -1;
+ } else {
+ /* 64-bit-ism on rhel4? */
+ fodn->fdn_nodeid = atoi(ret);
+ }
+
+ /* Don't even bother getting priority if we're not ordered (it's set
+ to 0 above */
+ if (!(domain->fd_flags & FOD_ORDERED))
+ return fodn;
+
snprintf(xpath, sizeof(xpath), "%s/failoverdomainnode[%d]/@priority",
base, idx);
if (ccs_get(ccsfd, xpath, &ret) != 0)
@@ -228,6 +246,11 @@
{
fod_t *fod;
fod_node_t *fodn = NULL;
+ /*
+ int x;
+ int *node_set = NULL;
+ int node_set_len = 0;
+ */
list_do(domains, fod) {
printf("Failover domain: %s\n", fod->fd_name);
@@ -245,9 +268,21 @@
}
list_do(&fod->fd_nodes, fodn) {
- printf(" Node %s (priority %d)\n",
- fodn->fdn_name, fodn->fdn_prio);
+ printf(" Node %s (id %d, priority %d)\n",
+ fodn->fdn_name, fodn->fdn_nodeid,
+ fodn->fdn_prio);
} while (!list_done(&fod->fd_nodes, fodn));
+
+ /*
+ node_domain_set(fod, &node_set, &node_set_len);
+ printf(" Failover Order = {");
+ for (x = 0; x < node_set_len; x++) {
+ printf(" %d ", node_set[x]);
+ }
+ free(node_set);
+ printf("}\n");
+ */
+
} while (!list_done(domains, fod));
}
@@ -313,6 +348,70 @@
}
+int
+node_domain_set(fod_t *domain, int **ret, int *retlen)
+{
+ int x, i, j;
+ int *tmpset;
+ int ts_count;
+
+ fod_node_t *fodn;
+
+ /* Count domain length */
+ list_for(&domain->fd_nodes, fodn, x) { }
+
+ *retlen = 0;
+ *ret = malloc(sizeof(int) * x);
+ if (!(*ret))
+ return -1;
+ tmpset = malloc(sizeof(int) * x);
+ if (!(*tmpset))
+ return -1;
+
+ if (domain->fd_flags & FOD_ORDERED) {
+ for (i = 1; i <= 100; i++) {
+
+ ts_count = 0;
+ list_for(&domain->fd_nodes, fodn, x) {
+ if (fodn->fdn_prio == i) {
+ s_add(tmpset, &ts_count,
+ fodn->fdn_nodeid);
+ }
+ }
+
+ if (!ts_count)
+ continue;
+
+ /* Shuffle stuff at this prio level */
+ if (ts_count > 1)
+ s_shuffle(tmpset, ts_count);
+ for (j = 0; j < ts_count; j++)
+ s_add(*ret, retlen, tmpset[j]);
+ }
+ }
+
+ /* Add unprioritized nodes */
+ ts_count = 0;
+ list_for(&domain->fd_nodes, fodn, x) {
+ if (!fodn->fdn_prio) {
+ s_add(tmpset, &ts_count,
+ fodn->fdn_nodeid);
+ }
+ }
+
+ if (!ts_count)
+ return 0;
+
+ /* Shuffle stuff at this prio level */
+ if (ts_count > 1)
+ s_shuffle(tmpset, ts_count);
+ for (j = 0; j < ts_count; j++)
+ s_add(*ret, retlen, tmpset[j]);
+
+ return 0;
+}
+
+
/**
* See if a given nodeid should start a specified service svcid.
*
--- cluster/rgmanager/src/daemons/groups.c 2007/11/26 21:46:27 1.25.2.13
+++ cluster/rgmanager/src/daemons/groups.c 2007/12/18 17:52:56 1.25.2.14
@@ -30,6 +30,7 @@
#include <list.h>
#include <reslist.h>
#include <assert.h>
+#include <event.h>
/* Use address field in this because we never use it internally,
and there is no extra space in the cman_node_t type.
@@ -38,6 +39,8 @@
#define cn_svccount cn_address.cna_address[0] /* Theses are uint8_t size */
#define cn_svcexcl cn_address.cna_address[1]
+extern event_table_t *master_event_table;
+
static int config_version = 0;
static resource_t *_resources = NULL;
static resource_rule_t *_rules = NULL;
@@ -54,7 +57,7 @@
pthread_rwlock_t resource_lock = PTHREAD_RWLOCK_INITIALIZER;
void res_build_name(char *, size_t, resource_t *);
-int get_rg_state_local(char *, rg_state_t *);
+int group_migratory(char *groupname, int lock);
struct status_arg {
@@ -83,6 +86,32 @@
int
+node_domain_set_safe(char *domainname, int **ret, int *retlen, int *flags)
+{
+ fod_t *fod;
+ int rv = -1, found = 0, x = 0;
+
+ pthread_rwlock_rdlock(&resource_lock);
+
+ list_for(&_domains, fod, x) {
+ if (!strcasecmp(fod->fd_name, domainname)) {
+ found = 1;
+ break;
+ }
+ } // while (!list_done(&_domains, fod));
+
+ if (found) {
+ rv = node_domain_set(fod, ret, retlen);
+ *flags = fod->fd_flags;
+ }
+
+ pthread_rwlock_unlock(&resource_lock);
+
+ return rv;
+}
+
+
+int
count_resource_groups(cluster_member_list_t *ml)
{
resource_t *res;
@@ -187,7 +216,7 @@
char rgname[64];
int x;
- list_for(&_tree, node, x) {
+ list_for(tree, node, x) {
res = node->rn_resource;
res_build_name(rgname, sizeof(rgname), res);
@@ -564,6 +593,60 @@
}
+char **
+get_service_names(int *len)
+{
+ resource_node_t *node = NULL;
+ int nservices, ncopied = 0, x;
+ char **ret = NULL;
+ char rg_name[64];
+
+ pthread_rwlock_rdlock(&resource_lock);
+
+ nservices = 0;
+ list_do(&_tree, node) {
+ ++nservices;
+ } while (!list_done(&_tree, node));
+
+ ret = malloc(sizeof(char *) * (nservices + 1));
+ if (!ret)
+ goto out_fail;
+
+ memset(ret, 0, sizeof(char *) * (nservices + 1));
+ nservices = 0;
+ list_for(&_tree, node, nservices) {
+ res_build_name(rg_name, sizeof(rg_name),
+ node->rn_resource);
+
+ if (!strlen(rg_name))
+ continue;
+
+ ret[ncopied] = strdup(rg_name);
+ if (ret[ncopied]) {
+ ncopied++;
+ } else {
+ goto out_fail;
+ }
+ }
+
+ if (len)
+ *len = ncopied;
+ pthread_rwlock_unlock(&resource_lock);
+ return ret;
+
+out_fail:
+ pthread_rwlock_unlock(&resource_lock);
+ for (x = 0; x < ncopied; x++)
+ free(ret[x]);
+ if (ret)
+ free(ret);
+ return NULL;
+}
+
+
+
+
+
/**
* Called to decide what services to start locally during a node_event.
* Originally a part of node_event, it is now its own function to cut down
@@ -1054,6 +1137,14 @@
free(arg);
+ if (central_events_enabled()) {
+ /* Never call get_rg_state() (distributed) if
+ central events are enabled, otherwise we
+ might overwrite the rg state with 'stopped'
+ when it should be 'disabled' (e.g. autostart="0") */
+ fast = 1;
+ }
+
/* See if we have a slot... */
if (rg_inc_status() < 0) {
/* Too many outstanding status checks. try again later. */
@@ -1063,6 +1154,8 @@
pthread_exit(NULL);
}
+ /*send_master_state(ctx);*/
+
pthread_rwlock_rdlock(&resource_lock);
list_do(&_tree, node) {
@@ -1191,11 +1284,12 @@
Stop changed resources.
*/
void *
-q_status_checks(void *arg)
+q_status_checks(void __attribute__ ((unused)) *arg)
{
resource_node_t *curr;
rg_state_t svcblk;
char rg[64];
+ struct dlm_lksb lockp;
/* Only one status thread at a time, please! */
if (pthread_mutex_trylock(&status_mutex) != 0)
@@ -1209,7 +1303,13 @@
/* Local check - no one will make us take a service */
if (get_rg_state_local(rg, &svcblk) < 0) {
- continue;
+ if (rg_lock(rg, &lockp) != 0)
+ continue;
+ if (get_rg_state(rg, &svcblk) < 0) {
+ rg_unlock(&lockp);
+ continue;
+ }
+ rg_unlock(&lockp);
}
if (svcblk.rs_owner != my_id() ||
@@ -1430,7 +1530,7 @@
int
-check_config_update(void)
+check_config_update(int *new, int *old)
{
int newver = 0, fd, ret = 0;
char *val = NULL;
@@ -1450,6 +1550,8 @@
pthread_mutex_lock(&config_mutex);
if (newver && newver != config_version)
ret = 1;
+ if (new) *new = newver;
+ if (old) *old = config_version;
pthread_mutex_unlock(&config_mutex);
ccs_unlock(fd);
@@ -1473,12 +1575,14 @@
int
init_resource_groups(int reconfigure)
{
- int fd, x;
+ int fd, x, y, cnt;
+ event_table_t *evt = NULL;
resource_t *reslist = NULL, *res;
resource_rule_t *rulelist = NULL, *rule;
resource_node_t *tree = NULL;
fod_t *domains = NULL, *fod;
+ event_t *evp;
char *val;
if (reconfigure)
@@ -1539,6 +1643,24 @@
x = 0;
list_do(&domains, fod) { ++x; } while (!list_done(&domains, fod));
clulog(LOG_DEBUG, "%d domains defined\n", x);
+ construct_events(fd, &evt);
+ cnt = 0;
+ if (evt) {
+ for (x=0; x <= evt->max_prio; x++) {
+ if (!evt->entries[x])
+ continue;
+
+ y = 0;
+
+ list_do(&evt->entries[x], evp) {
+ ++y;
+ } while (!list_done(&evt->entries[x], evp));
+
+ cnt += y;
+ }
+ }
+ clulog(LOG_DEBUG, "%d events defined\n", x);
+
/* Reconfiguration done */
ccs_unlock(fd);
@@ -1567,6 +1689,9 @@
if (_domains)
deconstruct_domains(&_domains);
_domains = domains;
+ if (master_event_table)
+ deconstruct_events(&master_event_table);
+ master_event_table = evt;
pthread_rwlock_unlock(&resource_lock);
if (reconfigure) {
@@ -1608,6 +1733,60 @@
int
+get_service_property(char *rg_name, char *prop, char *buf, size_t buflen)
+{
+ int ret = 0;
+ resource_t *res;
+ char *val;
+
+ memset(buf, 0, buflen);
+
+#if 0
+ if (!strcmp(prop, "domain")) {
+ /* not needed */
+ strncpy(buf, "", buflen);
+ } else if (!strcmp(prop, "autostart")) {
+ strncpy(buf, "1", buflen);
+ } else if (!strcmp(prop, "hardrecovery")) {
+ strncpy(buf, "0", buflen);
+ } else if (!strcmp(prop, "exclusive")) {
+ strncpy(buf, "0", buflen);
+ } else if (!strcmp(prop, "nfslock")) {
+ strncpy(buf, "0", buflen);
+ } else if (!strcmp(prop, "recovery")) {
+ strncpy(buf, "restart", buflen);
+ } else if (!strcmp(prop, "depend")) {
+ /* not needed */
+ strncpy(buf, "", buflen);
+ } else {
+ /* not found / no defaults */
+ ret = -1;
+ }
+#endif
+
+ pthread_rwlock_rdlock(&resource_lock);
+ res = find_root_by_ref(&_resources, rg_name);
+ if (res) {
+ val = res_attr_value(res, prop);
+ if (val) {
+ ret = 0;
+ strncpy(buf, val, buflen);
+ }
+ }
+ pthread_rwlock_unlock(&resource_lock);
+
+#if 0
+ if (ret == 0)
+ printf("%s(%s, %s) = %s\n", __FUNCTION__, rg_name, prop, buf);
+ else
+ printf("%s(%s, %s) = NOT FOUND\n", __FUNCTION__, rg_name, prop);
+#endif
+
+ return ret;
+}
+
+
+int
check_restart(char *rg_name)
{
resource_node_t *node;
--- cluster/rgmanager/src/daemons/main.c 2007/11/26 21:46:27 1.34.2.10
+++ cluster/rgmanager/src/daemons/main.c 2007/12/18 17:52:56 1.34.2.11
@@ -35,6 +35,7 @@
#include <rg_queue.h>
#include <malloc.h>
#include <cman-private.h>
+#include <event.h>
#define L_SHUTDOWN (1<<2)
#define L_SYS (1<<1)
@@ -54,9 +55,10 @@
void flag_shutdown(int sig);
void hard_exit(void);
int send_rg_states(msgctx_t *, int);
-int check_config_update(void);
+int check_config_update(int *, int *);
int svc_exists(char *);
int watchdog_init(void);
+int32_t master_event_callback(char *key, uint64_t viewno, void *data, uint32_t datalen);
int shutdown_pending = 0, running = 1, need_reconfigure = 0;
char debug = 0; /* XXX* */
@@ -65,11 +67,10 @@
static char *rgmanager_lsname = "rgmanager"; /* XXX default */
int next_node_id(cluster_member_list_t *membership, int me);
-int rg_event_q(char *svcName, uint32_t state, int owner);
void malloc_dump_table(FILE *, size_t, size_t);
void
-segfault(int sig)
+segfault(int __attribute__ ((unused)) sig)
{
char ow[64];
@@ -94,13 +95,20 @@
send_node_states(msgctx_t *ctx)
{
int x;
+ event_master_t master;
generic_msg_hdr hdr;
cluster_member_list_t *ml = member_list();
+ master.m_nodeid = 0;
+ event_master_info_cached(&master);
+
for (x = 0; x < ml->cml_count; x++) {
if (ml->cml_members[x].cn_member == 1) {
msg_send_simple(ctx, RG_STATUS_NODE,
- ml->cml_members[x].cn_nodeid, 0);
+ ml->cml_members[x].cn_nodeid,
+ (ml->cml_members[x].cn_nodeid &&
+ (ml->cml_members[x].cn_nodeid ==
+ (int)master.m_nodeid)));
}
}
msg_send_simple(ctx, RG_SUCCESS, 0, 0);
@@ -110,7 +118,7 @@
void
-flag_reconfigure(int sig)
+flag_reconfigure(int __attribute__ ((unused)) sig)
{
need_reconfigure++;
}
@@ -167,15 +175,25 @@
new_ml = get_member_list(h);
memb_mark_down(new_ml, 0);
- for (x = 0; x < new_ml->cml_count; x++) {
+ for(x=0; new_ml && x<new_ml->cml_count;x++) {
+ if (new_ml->cml_members[x].cn_nodeid == 0) {
+ new_ml->cml_members[x].cn_member = 0;
+ }
+ }
+
+ for (x = 0; new_ml && x < new_ml->cml_count; x++) {
- if (new_ml->cml_members[x].cn_member == 0)
+ if (new_ml->cml_members[x].cn_member == 0) {
+ printf("skipping %d - node not member\n",
+ new_ml->cml_members[x].cn_nodeid);
continue;
+ }
if (new_ml->cml_members[x].cn_nodeid == my_id())
continue;
#ifdef DEBUG
- printf("Checking for listening status of %d\n", new_ml->cml_members[x].cn_nodeid);
+ printf("Checking for listening status of %d\n",
+ new_ml->cml_members[x].cn_nodeid);
#endif
do {
@@ -187,6 +205,7 @@
clulog(LOG_DEBUG, "Node %d is not listening\n",
new_ml->cml_members[x].cn_nodeid);
new_ml->cml_members[x].cn_member = 0;
+ break;
} else if (quorate < 0) {
if (errno == ENOTCONN) {
new_ml->cml_members[x].cn_member = 0;
@@ -276,7 +295,9 @@
int
-lock_commit_cb(char *key, uint64_t viewno, void *data, uint32_t datalen)
+lock_commit_cb(char __attribute__ ((unused)) *key,
+ uint64_t __attribute__ ((unused)) viewno,
+ void *data, uint32_t datalen)
{
char lockstate;
@@ -403,7 +424,7 @@
int
dispatch_msg(msgctx_t *ctx, int nodeid, int need_close)
{
- int ret = 0, sz = -1;
+ int ret = 0, sz = -1, nid;
char msgbuf[4096];
generic_msg_hdr *msg_hdr = (generic_msg_hdr *)msgbuf;
SmMessageSt *msg_sm = (SmMessageSt *)msgbuf;
@@ -412,7 +433,7 @@
/* Peek-a-boo */
sz = msg_receive(ctx, msg_hdr, sizeof(msgbuf), 1);
- if (sz < sizeof (generic_msg_hdr)) {
+ if (sz < (int)sizeof (generic_msg_hdr)) {
clulog(LOG_ERR,
"#37: Error receiving header from %d sz=%d CTX %p\n",
nodeid, sz, ctx);
@@ -422,7 +443,7 @@
if (sz < 0)
return -1;
- if (sz > sizeof(msgbuf)) {
+ if (sz > (int)sizeof(msgbuf)) {
raise(SIGSTOP);
}
@@ -441,7 +462,7 @@
goto out;
}
- if (msg_hdr->gh_length != sz) {
+ if ((int)msg_hdr->gh_length != sz) {
clulog(LOG_ERR, "#XX: Read size mismatch: %d %d\n",
ret, msg_hdr->gh_length);
goto out;
@@ -449,13 +470,13 @@
switch (msg_hdr->gh_command) {
case RG_STATUS:
- clulog(LOG_DEBUG, "Sending service states to CTX%p\n",ctx);
+ //clulog(LOG_DEBUG, "Sending service states to CTX%p\n",ctx);
if (send_rg_states(ctx, msg_hdr->gh_arg1) == 0)
need_close = 0;
break;
case RG_STATUS_NODE:
- clulog(LOG_DEBUG, "Sending node states to CTX%p\n",ctx);
+ //clulog(LOG_DEBUG, "Sending node states to CTX%p\n",ctx);
send_node_states(ctx);
break;
@@ -474,7 +495,7 @@
case RG_ACTION_REQUEST:
- if (sz < sizeof(msg_sm)) {
+ if (sz < (int)sizeof(msg_sm)) {
clulog(LOG_ERR,
"#39: Error receiving entire request (%d/%d)\n",
ret, (int)sizeof(msg_sm));
@@ -493,14 +514,37 @@
swab_SmMessageSt(msg_sm);
if (msg_send(ctx, msg_sm, sizeof (SmMessageSt)) <
- sizeof (SmMessageSt))
+ (int)sizeof (SmMessageSt))
clulog(LOG_ERR, "#40: Error replying to "
"action request.\n");
ret = -1;
goto out;
}
- /* Queue request */
+ if (central_events_enabled() &&
+ msg_sm->sm_hdr.gh_arg1 != RG_ACTION_MASTER) {
+
+ /* Centralized processing or request is from
+ clusvcadm */
+ nid = event_master();
+ if (nid != my_id()) {
+ /* Forward the message to the event master */
+ forward_message(ctx, msg_sm, nid);
+ } else {
+ /* for us: queue it */
+ user_event_q(msg_sm->sm_data.d_svcName,
+ msg_sm->sm_data.d_action,
+ msg_sm->sm_hdr.gh_arg1,
+ msg_sm->sm_hdr.gh_arg2,
+ msg_sm->sm_data.d_svcOwner,
+ ctx);
+ }
+
+ return 0;
+ }
+
+ /* Distributed processing and/or request is from master node
+ -- Queue request */
rt_enqueue_request(msg_sm->sm_data.d_svcName,
msg_sm->sm_data.d_action,
ctx, 0, msg_sm->sm_data.d_svcOwner,
@@ -510,7 +554,7 @@
case RG_EVENT:
/* Service event. Run a dependency check */
- if (sz < sizeof(msg_sm)) {
+ if (sz < (int)sizeof(msg_sm)) {
clulog(LOG_ERR,
"#39: Error receiving entire request (%d/%d)\n",
ret, (int)sizeof(msg_sm));
@@ -526,7 +570,8 @@
/* Send to our rg event handler */
rg_event_q(msg_sm->sm_data.d_svcName,
msg_sm->sm_data.d_action,
- msg_sm->sm_data.d_svcOwner);
+ msg_sm->sm_hdr.gh_arg1,
+ msg_sm->sm_hdr.gh_arg2);
break;
case RG_EXITING:
@@ -664,7 +709,7 @@
int
event_loop(msgctx_t *localctx, msgctx_t *clusterctx)
{
- int n, max, ret;
+ int n = 0, max, ret, oldver, newver;
fd_set rfds;
msgctx_t *newctx;
struct timeval tv;
@@ -733,10 +778,10 @@
if (!running)
return 0;
- if (need_reconfigure || check_config_update()) {
+ if (need_reconfigure || check_config_update(&oldver, &newver)) {
need_reconfigure = 0;
configure_rgmanager(-1, 0);
- init_resource_groups(1);
+ config_event_q(oldver, newver);
return 0;
}
@@ -755,7 +800,7 @@
void
-flag_shutdown(int sig)
+flag_shutdown(int __attribute__ ((unused)) sig)
{
shutdown_pending = 1;
}
@@ -781,7 +826,7 @@
void
-statedump(int sig)
+statedump(int __attribute__ ((unused)) sig)
{
signalled++;
}
@@ -818,8 +863,15 @@
}
if (ccs_get(ccsfd, "/cluster/rm/@transition_throttling", &v) == 0) {
- if (!dbg)
- set_transition_throttling(atoi(v));
+ set_transition_throttling(atoi(v));
+ free(v);
+ }
+
+ if (ccs_get(ccsfd, "/cluster/rm/@central_processing", &v) == 0) {
+ set_central_events(atoi(v));
+ if (atoi(v))
+ clulog(LOG_NOTICE,
+ "Centralized Event Processing enabled\n");
free(v);
}
@@ -873,7 +925,7 @@
void *
-shutdown_thread(void *arg)
+shutdown_thread(void __attribute__ ((unused)) *arg)
{
rg_lockall(L_SYS|L_SHUTDOWN);
rg_doall(RG_STOP_EXITING, 1, NULL);
@@ -1013,6 +1065,7 @@
}
vf_key_init("rg_lockdown", 10, NULL, lock_commit_cb);
+ vf_key_init("Transition-Master", 10, NULL, master_event_callback);
#endif
/*
--- cluster/rgmanager/src/daemons/resrules.c 2007/11/26 21:46:27 1.16.2.8
+++ cluster/rgmanager/src/daemons/resrules.c 2007/12/18 17:52:56 1.16.2.9
@@ -27,6 +27,7 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <list.h>
+#include <ctype.h>
#include <restart_counter.h>
#include <reslist.h>
#include <pthread.h>
--- cluster/rgmanager/src/daemons/rg_event.c 2007/07/24 13:53:08 1.1.2.1
+++ cluster/rgmanager/src/daemons/rg_event.c 2007/12/18 17:52:56 1.1.2.2
@@ -1,10 +1,9 @@
/*
- Copyright Red Hat, Inc. 2006
+ Copyright Red Hat, Inc. 2006-2007
This program is free software; you can redistribute it and/or modify it
- under the terms of the GNU General Public License as published by the
- Free Software Foundation; either version 2, or (at your option) any
- later version.
+ under the terms of the GNU General Public License version 2 as published
+ by the Free Software Foundation.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -23,81 +22,552 @@
#include <libcman.h>
#include <ccs.h>
#include <clulog.h>
-
-typedef struct __rge_q {
- list_head();
- char rg_name[128];
- uint32_t rg_state;
- int rg_owner;
-} rgevent_t;
+#include <lock.h>
+#include <event.h>
+#include <stdint.h>
+#include <vf.h>
+#include <members.h>
/**
* resource group event queue.
*/
-static rgevent_t *rg_ev_queue = NULL;
-static pthread_mutex_t rg_queue_mutex = PTHREAD_MUTEX_INITIALIZER;
-static pthread_t rg_ev_thread = 0;
+static event_t *event_queue = NULL;
+#ifdef WRAP_LOCKS
+static pthread_mutex_t event_queue_mutex = PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP;
+static pthread_mutex_t mi_mutex = PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP;
+#else
+static pthread_mutex_t event_queue_mutex = PTHREAD_MUTEX_INITIALIZER;
+static pthread_mutex_t mi_mutex = PTHREAD_MUTEX_INITIALIZER;
+#endif
+static pthread_t event_thread = 0;
+static int transition_throttling = 5;
+static int central_events = 0;
+
+extern int running;
+extern int shutdown_pending;
+static int _master = 0;
+static struct dlm_lksb _master_lock;
+static int _xid = 0;
+static event_master_t *mi = NULL;
+
+void hard_exit(void);
+int init_resource_groups(int);
+void flag_shutdown(int sig);
+void flag_reconfigure(int sig);
-void group_event(char *name, uint32_t state, int owner);
+event_table_t *master_event_table = NULL;
+
+
+void
+set_transition_throttling(int nsecs)
+{
+ if (nsecs < 0)
+ nsecs = 0;
+ transition_throttling = nsecs;
+}
+
+
+void
+set_central_events(int flag)
+{
+ central_events = flag;
+}
+
+
+int
+central_events_enabled(void)
+{
+ return central_events;
+}
+
+
+/**
+ Called to handle the transition of a cluster member from up->down or
+ down->up. This handles initializing services (in the local node-up case),
+ exiting due to loss of quorum (local node-down), and service fail-over
+ (remote node down). This is the distributed node event processor;
+ for the local-only node event processor, see slang_event.c
+
+ @param nodeID ID of the member which has come up/gone down.
+ @param nodeStatus New state of the member in question.
+ @see eval_groups
+ */
+void
+node_event(int local, int nodeID, int nodeStatus, int clean)
+{
+ if (!running)
+ return;
+
+ if (local) {
+
+ /* Local Node Event */
+ if (nodeStatus == 0) {
+ clulog(LOG_ERR, "Exiting uncleanly\n");
+ hard_exit();
+ }
+
+ if (!rg_initialized()) {
+ if (init_resource_groups(0) != 0) {
+ clulog(LOG_ERR,
+ "#36: Cannot initialize services\n");
+ hard_exit();
+ }
+ }
+
+ if (shutdown_pending) {
+ clulog(LOG_NOTICE, "Processing delayed exit signal\n");
+ running = 0;
+ return;
+ }
+ setup_signal(SIGINT, flag_shutdown);
+ setup_signal(SIGTERM, flag_shutdown);
+ setup_signal(SIGHUP, flag_reconfigure);
+
+ eval_groups(1, nodeID, 1);
+ return;
+ }
+
+ /*
+ * Nothing to do for events from other nodes if we are not ready.
+ */
+ if (!rg_initialized()) {
+ clulog(LOG_DEBUG, "Services not initialized.\n");
+ return;
+ }
+
+ eval_groups(0, nodeID, nodeStatus);
+}
+/**
+ Query CCS to see whether a node has fencing enabled or not in
+ the configuration. This does not check to see if it's in the
+ fence domain.
+ */
+int
+node_has_fencing(int nodeid)
+{
+ int ccs_desc;
+ char *val = NULL;
+ char buf[1024];
+ int ret = 1;
+
+ ccs_desc = ccs_connect();
+ if (ccs_desc < 0) {
+ clulog(LOG_ERR, "Unable to connect to ccsd; cannot handle"
+ " node event!\n");
+ /* Assume node has fencing */
+ return 1;
+ }
+
+ snprintf(buf, sizeof(buf),
+ "/cluster/clusternodes/clusternode[@nodeid=\"%d\"]"
+ "/fence/method/device/@name", nodeid);
+
+ if (ccs_get(ccs_desc, buf, &val) != 0)
+ ret = 0;
+ if (val)
+ free(val);
+ ccs_disconnect(ccs_desc);
+ return ret;
+}
+
+
+/**
+ Quick query to cman to see if a node has been fenced.
+ */
+int
+node_fenced(int nodeid)
+{
+ cman_handle_t ch;
+ int fenced = 0;
+ uint64_t fence_time;
+
+ ch = cman_init(NULL);
+ if (cman_get_fenceinfo(ch, nodeid, &fence_time, &fenced, NULL) < 0)
+ fenced = 0;
+
+ cman_finish(ch);
+
+ return fenced;
+}
+
+
+/**
+ Callback from view-formation when a commit occurs for the Transition-
+ Master key.
+ */
+int32_t
+master_event_callback(char *key, uint64_t viewno,
+ void *data, uint32_t datalen)
+{
+ event_master_t *m;
+
+ m = data;
+ if (datalen != (uint32_t)sizeof(*m)) {
+ clulog(LOG_ERR, "%s: wrong size\n", __FUNCTION__);
+ return 1;
+ }
+
+ swab_event_master_t(m);
+ if (m->m_magic != EVENT_MASTER_MAGIC) {
+ clulog(LOG_ERR, "%s: wrong size\n", __FUNCTION__);
+ return 1;
+ }
+
+ if (m->m_nodeid == my_id())
+ clulog(LOG_DEBUG, "Master Commit: I am master\n");
+ else
+ clulog(LOG_DEBUG, "Master Commit: %d is master\n", m->m_nodeid);
+
+ pthread_mutex_lock(&mi_mutex);
+ if (mi)
+ free(mi);
+ mi = m;
+ pthread_mutex_unlock(&mi_mutex);
+
+ return 0;
+}
+
+
+/**
+ Read the Transition-Master key from vf if it exists. If it doesn't,
+ attempt to become the transition-master.
+ */
+static int
+find_master(void)
+{
+ event_master_t *masterinfo = NULL;
+ void *data;
+ uint32_t sz;
+ cluster_member_list_t *m;
+ uint64_t vn;
+ int master_id = -1;
+
+ m = member_list();
+ if (vf_read(m, "Transition-Master", &vn,
+ (void **)(&data), &sz) < 0) {
+ clulog(LOG_ERR, "Unable to discover master"
+ " status\n");
+ masterinfo = NULL;
+ } else {
+ masterinfo = (event_master_t *)data;
+ }
+ free_member_list(m);
+
+ if (masterinfo && (sz >= sizeof(*masterinfo))) {
+ swab_event_master_t(masterinfo);
+ if (masterinfo->m_magic == EVENT_MASTER_MAGIC) {
+ clulog(LOG_DEBUG, "Master Locate: %d is master\n",
+ masterinfo->m_nodeid);
+ pthread_mutex_lock(&mi_mutex);
+ if (mi)
+ free(mi);
+ mi = masterinfo;
+ pthread_mutex_unlock(&mi_mutex);
+ master_id = masterinfo->m_nodeid;
+ }
+ }
+
+ return master_id;
+}
+
+
+/**
+ Return a copy of the cached event_master_t structure to the
+ caller.
+ */
+int
+event_master_info_cached(event_master_t *mi_out)
+{
+ if (!central_events || !mi_out) {
+ errno = -EINVAL;
+ return -1;
+ }
+
+ pthread_mutex_lock(&mi_mutex);
+ if (!mi) {
+ pthread_mutex_unlock(&mi_mutex);
+ errno = -ENOENT;
+ return -1;
+ }
+
+ memcpy(mi_out, mi, sizeof(*mi));
+ pthread_mutex_unlock(&mi_mutex);
+ return 0;
+}
+
+
+/**
+ Return the node ID of the master. If none exists, become
+ the master and return our own node ID.
+ */
+int
+event_master(void)
+{
+ cluster_member_list_t *m = NULL;
+ event_master_t masterinfo;
+ int master_id = -1;
+
+ /* We hold this forever. */
+ if (_master)
+ return my_id();
+
+ m = member_list();
+ pthread_mutex_lock(&mi_mutex);
+
+ if (mi) {
+ master_id = mi->m_nodeid;
+ pthread_mutex_unlock(&mi_mutex);
+ if (memb_online(m, master_id)) {
+ //clulog(LOG_DEBUG, "%d is master\n", mi->m_nodeid);
+ goto out;
+ }
+ }
+
+ pthread_mutex_unlock(&mi_mutex);
+
+ memset(&_master_lock, 0, sizeof(_master_lock));
+ if (clu_lock(LKM_EXMODE, &_master_lock, LKF_NOQUEUE,
+ "Transition-Master") < 0) {
+ /* not us, find out who is master */
+ master_id = find_master();
+ goto out;
+ }
+
+ if (_master_lock.sb_status != 0) {
+ master_id = -1;
+ goto out;
+ }
+
+ _master = 1;
+
+ memset(&masterinfo, 0, sizeof(masterinfo));
+ masterinfo.m_magic = EVENT_MASTER_MAGIC;
+ masterinfo.m_nodeid = my_id();
+ masterinfo.m_master_time = (uint64_t)time(NULL);
+ swab_event_master_t(&masterinfo);
+
+ if (vf_write(m, VFF_IGN_CONN_ERRORS | VFF_RETRY,
+ "Transition-Master", &masterinfo,
+ sizeof(masterinfo)) < 0) {
+ clulog(LOG_ERR, "Unable to advertise master"
+ " status to all nodes\n");
+ }
+
+ master_id = my_id();
+out:
+ free_member_list(m);
+ return master_id;
+}
+
+
+
+void group_event(char *name, uint32_t state, int owner);
+
+/**
+ Event handling function. This only stays around as long as
+ events are on the queue.
+ */
void *
-rg_event_thread(void *arg)
+_event_thread_f(void *arg)
{
- rgevent_t *ev;
+ event_t *ev;
+ int notice = 0, count = 0;
while (1) {
- pthread_mutex_lock(&rg_queue_mutex);
- ev = rg_ev_queue;
+ pthread_mutex_lock(&event_queue_mutex);
+ ev = event_queue;
if (ev)
- list_remove(&rg_ev_queue, ev);
+ list_remove(&event_queue, ev);
else
break; /* We're outta here */
- pthread_mutex_unlock(&rg_queue_mutex);
- group_event(ev->rg_name, ev->rg_state, ev->rg_owner);
+ ++count;
+ /* Event thread usually doesn't hang around. When it's
+ spawned, sleep for this many seconds in order to let
+ some events queue up */
+ if ((count==1) && transition_throttling && !central_events)
+ sleep(transition_throttling);
+
+ pthread_mutex_unlock(&event_queue_mutex);
+
+ if (ev->ev_type == EVENT_CONFIG) {
+ /*
+ clulog(LOG_NOTICE, "Config Event: %d -> %d\n",
+ ev->ev.config.cfg_oldversion,
+ ev->ev.config.cfg_version);
+ */
+ init_resource_groups(1);
+ free(ev);
+ continue;
+ }
+
+ if (central_events) {
+ /* If the master node died or there isn't
+ one yet, take the master lock. */
+ if (event_master() == my_id()) {
+ slang_process_event(master_event_table,
+ ev);
+ }
+ free(ev);
+ continue;
+ /* ALL OF THE CODE BELOW IS DISABLED
+ when using central_events */
+ }
+
+ if (ev->ev_type == EVENT_RG) {
+ /*
+ clulog(LOG_NOTICE, "RG Event: %s %s %d\n",
+ ev->ev.group.rg_name,
+ rg_state_str(ev->ev.group.rg_state),
+ ev->ev.group.rg_owner);
+ */
+ group_event(ev->ev.group.rg_name,
+ ev->ev.group.rg_state,
+ ev->ev.group.rg_owner);
+ } else if (ev->ev_type == EVENT_NODE) {
+ /*
+ clulog(LOG_NOTICE, "Node Event: %s %d %s %s\n",
+ ev->ev.node.ne_local?"Local":"Remote",
+ ev->ev.node.ne_nodeid,
+ ev->ev.node.ne_state?"UP":"DOWN",
+ ev->ev.node.ne_clean?"Clean":"Dirty")
+ */
+
+ if (ev->ev.node.ne_state == 0 &&
+ !ev->ev.node.ne_clean &&
+ node_has_fencing(ev->ev.node.ne_nodeid)) {
+ notice = 0;
+ while (!node_fenced(ev->ev.node.ne_nodeid)) {
+ if (!notice) {
+ notice = 1;
+ clulog(LOG_INFO, "Waiting for "
+ "node #%d to be fenced\n",
+ ev->ev.node.ne_nodeid);
+ }
+ sleep(2);
+ }
+
+ if (notice)
+ clulog(LOG_INFO, "Node #%d fenced; "
+ "continuing\n",
+ ev->ev.node.ne_nodeid);
+ }
+
+ node_event(ev->ev.node.ne_local,
+ ev->ev.node.ne_nodeid,
+ ev->ev.node.ne_state,
+ ev->ev.node.ne_clean);
+ }
free(ev);
}
+ if (!central_events || _master) {
+ clulog(LOG_DEBUG, "%d events processed\n", count);
+ }
/* Mutex held */
- rg_ev_thread = 0;
- pthread_mutex_unlock(&rg_queue_mutex);
+ event_thread = 0;
+ pthread_mutex_unlock(&event_queue_mutex);
pthread_exit(NULL);
}
-void
-rg_event_q(char *name, uint32_t state, int owner)
+static void
+insert_event(event_t *ev)
{
- rgevent_t *ev;
pthread_attr_t attrs;
+ pthread_mutex_lock (&event_queue_mutex);
+ ev->ev_transaction = ++_xid;
+ list_insert(&event_queue, ev);
+ if (event_thread == 0) {
+ pthread_attr_init(&attrs);
+ pthread_attr_setinheritsched(&attrs, PTHREAD_INHERIT_SCHED);
+ pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED);
+ pthread_attr_setstacksize(&attrs, 262144);
+
+ pthread_create(&event_thread, &attrs, _event_thread_f, NULL);
+ pthread_attr_destroy(&attrs);
+ }
+ pthread_mutex_unlock (&event_queue_mutex);
+}
+
+
+static event_t *
+new_event(void)
+{
+ event_t *ev;
while (1) {
- ev = malloc(sizeof(rgevent_t));
+ ev = malloc(sizeof(*ev));
if (ev) {
break;
}
sleep(1);
}
-
memset(ev,0,sizeof(*ev));
+ ev->ev_type = EVENT_NONE;
- strncpy(ev->rg_name, name, 128);
- ev->rg_state = state;
- ev->rg_owner = owner;
-
- pthread_mutex_lock (&rg_queue_mutex);
- list_insert(&rg_ev_queue, ev);
- if (rg_ev_thread == 0) {
- pthread_attr_init(&attrs);
- pthread_attr_setinheritsched(&attrs, PTHREAD_INHERIT_SCHED);
- pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED);
- pthread_attr_setstacksize(&attrs, 262144);
+ return ev;
+}
- pthread_create(&rg_ev_thread, &attrs, rg_event_thread, NULL);
- pthread_attr_destroy(&attrs);
- }
- pthread_mutex_unlock (&rg_queue_mutex);
+
+void
+rg_event_q(char *name, uint32_t state, int owner, int last)
+{
+ event_t *ev = new_event();
+
+ ev->ev_type = EVENT_RG;
+
+ strncpy(ev->ev.group.rg_name, name, 128);
+ ev->ev.group.rg_state = state;
+ ev->ev.group.rg_owner = owner;
+ ev->ev.group.rg_last_owner = last;
+
+ insert_event(ev);
+}
+
+
+void
+node_event_q(int local, int nodeID, int state, int clean)
+{
+ event_t *ev = new_event();
+
+ ev->ev_type = EVENT_NODE;
+ ev->ev.node.ne_state = state;
+ ev->ev.node.ne_local = local;
+ ev->ev.node.ne_nodeid = nodeID;
+ ev->ev.node.ne_clean = clean;
+ insert_event(ev);
}
+
+
+void
+config_event_q(int old_version, int new_version)
+{
+ event_t *ev = new_event();
+
+ ev->ev_type = EVENT_CONFIG;
+ ev->ev.config.cfg_version = new_version;
+ ev->ev.config.cfg_oldversion = old_version;
+ insert_event(ev);
+}
+
+void
+user_event_q(char *svc, int request,
+ int arg1, int arg2, int target, msgctx_t *ctx)
+{
+ event_t *ev = new_event();
+
+ ev->ev_type = EVENT_USER;
+ strncpy(ev->ev.user.u_name, svc, sizeof(ev->ev.user.u_name));
+ ev->ev.user.u_request = request;
+ ev->ev.user.u_arg1 = arg1;
+ ev->ev.user.u_arg2 = arg2;
+ ev->ev.user.u_target = target;
+ ev->ev.user.u_ctx = ctx;
+ insert_event(ev);
+}
+
--- cluster/rgmanager/src/daemons/rg_forward.c 2007/08/02 14:46:51 1.8.2.3
+++ cluster/rgmanager/src/daemons/rg_forward.c 2007/12/18 17:52:56 1.8.2.4
@@ -27,11 +27,21 @@
#include <members.h>
+struct fw_message {
+ msgctx_t *ctx;
+ SmMessageSt msg;
+ int nodeid;
+};
+
+
void
-build_message(SmMessageSt *msgp, int action, char *svcName, int target)
+build_message(SmMessageSt *msgp, int action, char *svcName, int target,
+ int arg1, int arg2)
{
msgp->sm_hdr.gh_magic = GENERIC_HDR_MAGIC;
msgp->sm_hdr.gh_command = RG_ACTION_REQUEST;
+ msgp->sm_hdr.gh_arg1 = arg1;
+ msgp->sm_hdr.gh_arg2 = arg2;
msgp->sm_hdr.gh_length = sizeof(*msgp);
msgp->sm_data.d_action = action;
strncpy(msgp->sm_data.d_svcName, svcName,
@@ -90,7 +100,8 @@
}
/* Construct message */
- build_message(&msg, req->rr_request, req->rr_group, req->rr_target);
+ build_message(&msg, req->rr_request, req->rr_group, req->rr_target,
+ req->rr_arg0, req->rr_arg1);
if (msg_open(MSG_CLUSTER, rgs.rs_owner, RG_PORT, ctx, 10) < 0) {
clulog(LOG_DEBUG, "FW: Failed to open channel to %d CTX: %p\n",
@@ -166,3 +177,121 @@
pthread_attr_destroy(&attrs);
}
+
+
+void *
+forwarding_thread_v2(void *arg)
+{
+ msgctx_t *ctx = NULL, *resp_ctx = NULL;
+ cluster_member_list_t *m = NULL;
+ SmMessageSt *msgp = NULL, msg;
+ int response_code = RG_EAGAIN, ret, target = -1;
+ int retries = 0;
+ struct fw_message *fwmsg = (struct fw_message *)arg;
+
+ msgp = &fwmsg->msg;
+ resp_ctx = fwmsg->ctx;
+ target = fwmsg->nodeid;
+
+ clulog(LOG_DEBUG, "FW: Forwarding SM request to %d\n",
+ target);
+
+ ctx = msg_new_ctx();
+ if (ctx == NULL) {
+ clulog(LOG_DEBUG, "FW: Failed to allocate socket context: %s\n",
+ strerror(errno));
+ goto out_fail;
+ }
+ if (msg_open(MSG_CLUSTER, target, RG_PORT, ctx, 10) < 0) {
+ clulog(LOG_DEBUG, "FW: Failed to open channel to %d CTX: %p\n",
+ target, ctx);
+ goto out_fail;
+ }
+
+ /* swap + send */
+ swab_SmMessageSt(msgp);
+ if (msg_send(ctx, msgp, sizeof(*msgp)) < sizeof(*msgp)) {
+ clulog(LOG_DEBUG, "FW: Failed to send message to %d CTX: %p\n",
+ target, ctx);
+ goto out_fail;
+ }
+
+
+ /*
+ * Ok, we're forwarding a message to another node. Keep tabs on
+ * the node to make sure it doesn't die. Basically, wake up every
+ * now and again to make sure it's still online. If it isn't, send
+ * a response back to the caller.
+ */
+ do {
+ ret = msg_receive(ctx, &msg, sizeof(msg), 10);
+ if (ret < (int)sizeof(msg)) {
+ if (ret < 0 && errno == ETIMEDOUT) {
+ m = member_list();
+ if (!memb_online(m, target)) {
+ response_code = RG_ENODE;
+ goto out_fail;
+ }
+ free_member_list(m);
+ m = NULL;
+ continue;
+ }
+
+ if (ret == 0)
+ continue;
+ }
+ break;
+ } while(++retries < 60); /* old 600 second rule */
+
+ swab_SmMessageSt(&msg);
+
+ response_code = msg.sm_data.d_ret;
+ target = msg.sm_data.d_svcOwner;
+
+out_fail:
+ free(fwmsg);
+
+ if (resp_ctx) {
+ send_ret(resp_ctx, msgp->sm_data.d_svcName, response_code,
+ msgp->sm_data.d_action, target);
+ msg_close(resp_ctx);
+ msg_free_ctx(resp_ctx);
+ }
+
+ if (ctx) {
+ msg_close(ctx);
+ msg_free_ctx(ctx);
+ }
+ if (m)
+ free_member_list(m);
+
+ pthread_exit(NULL);
+}
+
+
+void
+forward_message(msgctx_t *ctx, void *msgp, int nodeid)
+{
+ pthread_t newthread;
+ pthread_attr_t attrs;
+ struct fw_message *fwmsg;
+
+ fwmsg = malloc(sizeof(struct fw_message));
+ if (!fwmsg) {
+ msg_close(ctx);
+ msg_free_ctx(ctx);
+ return;
+ }
+
+ memcpy(&fwmsg->msg, msgp, sizeof(fwmsg->msg));
+ fwmsg->ctx = ctx;
+ fwmsg->nodeid = nodeid;
+
+ pthread_attr_init(&attrs);
+ pthread_attr_setinheritsched(&attrs, PTHREAD_INHERIT_SCHED);
+ pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED);
+ pthread_attr_setstacksize(&attrs, 262144);
+
+ pthread_create(&newthread, &attrs, forwarding_thread_v2, fwmsg);
+ pthread_attr_destroy(&attrs);
+}
--- cluster/rgmanager/src/daemons/rg_state.c 2007/11/26 21:46:27 1.24.2.14
+++ cluster/rgmanager/src/daemons/rg_state.c 2007/12/18 17:52:56 1.24.2.15
@@ -36,6 +36,7 @@
#include <rg_queue.h>
#include <msgsimple.h>
#include <res-ocf.h>
+#include <event.h>
/* XXX - copied :( */
#define cn_svccount cn_address.cna_address[0] /* Theses are uint8_t size */
@@ -86,8 +87,24 @@
}
+char *
+c_name(char *svcName)
+{
+ char *ptr, *ret = svcName;
+
+ ptr = strchr(svcName,':');
+ if (!ptr)
+ return ret;
+ if ((int)(ptr - svcName) == 7 &&
+ !memcmp(svcName, "service", 7)) /* strlen("service") */
+ ret = ptr + 1;
+
+ return ret;
+}
+
+
void
-broadcast_event(char *svcName, uint32_t state)
+broadcast_event(char *svcName, uint32_t state, int owner, int last)
{
SmMessageSt msgp;
msgctx_t everyone;
@@ -95,10 +112,12 @@
msgp.sm_hdr.gh_magic = GENERIC_HDR_MAGIC;
msgp.sm_hdr.gh_command = RG_EVENT;
msgp.sm_hdr.gh_length = sizeof(msgp);
+ msgp.sm_hdr.gh_arg1 = owner;
+ msgp.sm_hdr.gh_arg2 = last;
msgp.sm_data.d_action = state;
strncpy(msgp.sm_data.d_svcName, svcName,
sizeof(msgp.sm_data.d_svcName));
- msgp.sm_data.d_svcOwner = 0;
+ msgp.sm_data.d_svcOwner = owner;
msgp.sm_data.d_ret = 0;
swab_SmMessageSt(&msgp);
@@ -201,7 +220,7 @@
void
-send_ret(msgctx_t *ctx, char *name, int ret, int orig_request)
+send_ret(msgctx_t *ctx, char *name, int ret, int orig_request, int new_owner)
{
SmMessageSt msg, *msgp = &msg;
if (!ctx)
@@ -213,7 +232,9 @@
msgp->sm_data.d_action = orig_request;
strncpy(msgp->sm_data.d_svcName, name,
sizeof(msgp->sm_data.d_svcName));
- msgp->sm_data.d_svcOwner = my_id(); /* XXX Broken */
+ if (!new_owner)
+ new_owner = my_id();
+ msgp->sm_data.d_svcOwner = new_owner; /* XXX Broken */
msgp->sm_data.d_ret = ret;
swab_SmMessageSt(msgp);
@@ -343,6 +364,7 @@
return 0;
#else
membership = member_list();
+
ret = vf_read(membership, res, &viewno, &data, &datalen);
if (ret != VFR_OK || datalen == 0) {
@@ -652,7 +674,7 @@
/*
* Starting failed service...
*/
- if (req == RG_START_RECOVER) {
+ if (req == RG_START_RECOVER || central_events_enabled()) {
clulog(LOG_NOTICE,
"Recovering failed service %s\n",
svcName);
@@ -684,7 +706,7 @@
case RG_STATE_DISABLED:
case RG_STATE_UNINITIALIZED:
- if (req == RG_ENABLE) {
+ if (req == RG_ENABLE || req == RG_START_REMOTE) {
/* Don't actually enable if the RG is locked! */
if (rg_locked()) {
ret = 3;
@@ -808,7 +830,8 @@
"Service %s started\n",
svcName);
- broadcast_event(svcName, RG_STATE_STARTED);
+ broadcast_event(svcName, RG_STATE_STARTED, svcStatus.rs_owner,
+ svcStatus.rs_last_owner);
} else {
clulog(LOG_WARNING,
"#68: Failed to start %s; return value: %d\n",
@@ -1264,8 +1287,8 @@
clulog(LOG_NOTICE, "Stopping service %s\n", svcName);
- if (recover)
- svcStatus.rs_state = RG_STATE_ERROR;
+ if (recover)
+ svcStatus.rs_state = RG_STATE_ERROR;
else
svcStatus.rs_state = RG_STATE_STOPPING;
svcStatus.rs_transition = (uint64_t)time(NULL);
@@ -1347,7 +1370,7 @@
}
rg_unlock(&lockp);
- broadcast_event(svcName, newstate);
+ broadcast_event(svcName, newstate, -1, svcStatus.rs_last_owner);
return 0;
}
@@ -1428,7 +1451,8 @@
}
rg_unlock(&lockp);
- broadcast_event(svcName, RG_STATE_FAILED);
+ broadcast_event(svcName, RG_STATE_FAILED, -1,
+ svcStatus.rs_last_owner);
return 0;
}
@@ -1437,8 +1461,8 @@
/*
* Send a message to the target node to start the service.
*/
-static int
-relocate_service(char *svcName, int request, uint32_t target)
+int
+svc_start_remote(char *svcName, int request, uint32_t target)
{
SmMessageSt msg_relo;
int msg_ret;
@@ -1448,6 +1472,8 @@
/* Build the message header */
msg_relo.sm_hdr.gh_magic = GENERIC_HDR_MAGIC;
msg_relo.sm_hdr.gh_command = RG_ACTION_REQUEST;
+ /* XXX XXX */
+ msg_relo.sm_hdr.gh_arg1 = RG_ACTION_MASTER;
msg_relo.sm_hdr.gh_length = sizeof (SmMessageSt);
msg_relo.sm_data.d_action = request;
strncpy(msg_relo.sm_data.d_svcName, svcName,
@@ -1470,13 +1496,13 @@
if (msg_send(&ctx, &msg_relo, sizeof (SmMessageSt)) <
sizeof (SmMessageSt)) {
clulog(LOG_ERR,
- "#59: Error sending relocate request to member #%d\n",
+ "#59: Error sending remote-start request to member #%d\n",
target);
msg_close(&ctx);
return -1;
}
- clulog(LOG_DEBUG, "Sent relocate request to %d\n", (int)target);
+ clulog(LOG_DEBUG, "Sent remote-start request to %d\n", (int)target);
/* Check the response */
do {
@@ -1649,7 +1675,7 @@
* It's legal to start the service on the given
* node. Try to do so.
*/
- if (relocate_service(svcName, request, target) == 0) {
+ if (svc_start_remote(svcName, request, target) == 0) {
*new_owner = target;
/*
* Great! We're done...
@@ -1679,7 +1705,7 @@
if (target == me)
goto exhausted;
- ret = relocate_service(svcName, request, target);
+ ret = svc_start_remote(svcName, request, target);
switch (ret) {
case RG_ERUN:
/* Someone stole the service while we were
@@ -1932,7 +1958,7 @@
if (check_exclusive_resources(membership, svcName) != 0) {
free_member_list(membership);
pthread_mutex_unlock(&exclusive_mutex);
- return RG_EFAIL;
+ return RG_EEXCL;
}
}
free_member_list(membership);
@@ -2013,7 +2039,7 @@
ret = RG_EFAIL;
goto out;
} else {
- ret = relocate_service(svcName, RG_START_REMOTE, target);
+ ret = svc_start_remote(svcName, RG_START_REMOTE, target);
}
switch(ret) {
@@ -2033,7 +2059,7 @@
default:
clulog(LOG_ERR,
"#6X: Invalid reply [%d] from member %d during"
- " relocate operation!\n", ret, target);
+ " operation!\n", ret, target);
}
}
--- cluster/rgmanager/src/daemons/rg_thread.c 2007/07/24 13:58:47 1.15.2.9
+++ cluster/rgmanager/src/daemons/rg_thread.c 2007/12/18 17:52:56 1.15.2.10
@@ -16,12 +16,12 @@
Free Software Foundation, Inc., 675 Mass Ave, Cambridge,
MA 02139, USA.
*/
+#include <message.h>
#include <resgroup.h>
#include <rg_locks.h>
#include <gettid.h>
#include <rg_queue.h>
#include <assert.h>
-#include <message.h>
/**
* Resource thread list entry.
@@ -54,6 +54,7 @@
int rt_enqueue_request(const char *resgroupname, int request,
msgctx_t *response_ctx, int max, uint32_t target,
int arg0, int arg1);
+int central_events_enabled(void);
/**
@@ -446,6 +447,11 @@
error = svc_stop(myname, RG_STOP_RECOVER);
if (error == 0) {
+ /* Stop generates an event - whatever the
+ result. If central events are enabled
+ don't bother trying to recover */
+ if (central_events_enabled())
+ break;
error = handle_recover_req(myname, &newowner);
if (error == 0)
ret = RG_SUCCESS;
@@ -678,7 +684,7 @@
} else {
if (max) {
list_do(resgroup->rt_queue, curr) {
- if (curr->rr_request == request)
+ if ((int)curr->rr_request == request)
count++;
} while (!list_done(resgroup->rt_queue, curr));
@@ -701,7 +707,7 @@
case RG_START:
case RG_ENABLE:
send_ret(response_ctx, resgroup->rt_name, RG_EDEADLCK,
- request);
+ request, 0);
msg_close(response_ctx);
msg_free_ctx(response_ctx);
break;
--- cluster/rgmanager/src/daemons/test.c 2007/11/26 21:46:27 1.6.2.6
+++ cluster/rgmanager/src/daemons/test.c 2007/12/18 17:52:56 1.6.2.7
@@ -28,6 +28,7 @@
#include <restart_counter.h>
#include <reslist.h>
#include <pthread.h>
+#include <event.h>
#ifndef NO_CCS
#error "Can not be built with CCS support."
@@ -131,6 +132,7 @@
resource_t *reslist = NULL, *curres;
resource_node_t *tree = NULL, *tmp, *rn = NULL;
int ccsfd, ret = 0, rules = 0;
+ event_table_t *events = NULL;
fprintf(stderr,"Running in test mode.\n");
@@ -143,6 +145,7 @@
load_resource_rules(agentpath, &rulelist);
construct_domains(ccsfd, &domains);
+ construct_events(ccsfd, &events);
load_resources(ccsfd, &reslist, &rulelist);
build_resource_tree(ccsfd, &tree, &rulelist, &reslist);
@@ -177,6 +180,11 @@
printf("=== Failover Domains ===\n");
print_domains(&domains);
}
+
+ if (events) {
+ printf("=== Event Triggers ===\n");
+ print_events(events);
+ }
}
ccs_unlock(ccsfd);
@@ -247,6 +255,7 @@
}
out:
+ deconstruct_events(&events);
deconstruct_domains(&domains);
destroy_resource_tree(&tree);
destroy_resources(&reslist);
--- cluster/rgmanager/src/resources/default_event_script.sl 2007/12/04 21:59:54 1.1.2.1
+++ cluster/rgmanager/src/resources/default_event_script.sl 2007/12/18 17:52:56 1.1.2.2
@@ -192,7 +192,8 @@
}
(owner, state) = service_status(services[x]);
- if ((service_state == "started") and (owner < 0)) {
+ if ((service_state == "started") and (owner < 0) and
+ (state == "stopped")) {
info("Dependency met; starting ", services[x]);
nodes = allowed_nodes(services[x]);
()=move_or_start(services[x], nodes);
@@ -245,6 +246,10 @@
if (user_target > 0) {
for (x = 0; x < length(nodes); x++) {
+ %
+ % Put the preferred node at the front of the
+ % list for a user-relocate operation
+ %
if (nodes[x] == user_target) {
reordered = union(user_target, nodes);
nodes = reordered;
@@ -262,6 +267,13 @@
if (service_stop(service_name) < 0) {
return ERR_ABORT;
}
+
+ %
+ % The current owner shouldn't be the default
+ % for a relocate operation
+ %
+ reordered = subtract(nodes, owner);
+ nodes = union(reordered, owner);
}
ret = move_or_start(service_name, nodes);
@@ -275,7 +287,10 @@
ret = service_stop(service_name);
}
+
+ %
% todo - migrate
+ %
return ret;
}
--- cluster/rgmanager/src/resources/service.sh 2007/11/30 19:44:34 1.7.2.8
+++ cluster/rgmanager/src/resources/service.sh 2007/12/18 17:52:56 1.7.2.9
@@ -222,6 +222,7 @@
#
case $1 in
start)
+ [ -d "/var/run/cluster/rgmanager" ] && touch "/var/run/cluster/rgmanager/$OCF_RESOURCE_INSTANCE"
#
# XXX If this is set, we kill lockd. If there is no
# child IP address, then clients will NOT get the reclaim
@@ -236,6 +237,7 @@
exit 0
;;
stop)
+ [ -d "/var/run/cluster/rgmanager" ] && rm -f "/var/run/cluster/rgmanager/$OCF_RESOURCE_INSTANCE"
exit 0
;;
recover|restart)
--- cluster/rgmanager/src/utils/clustat.c 2007/12/10 18:24:12 1.25.2.9
+++ cluster/rgmanager/src/utils/clustat.c 2007/12/18 17:52:56 1.25.2.10
@@ -472,7 +472,8 @@
if (rs->rs_state == RG_STATE_STOPPED ||
rs->rs_state == RG_STATE_DISABLED ||
rs->rs_state == RG_STATE_ERROR ||
- rs->rs_state == RG_STATE_FAILED) {
+ rs->rs_state == RG_STATE_FAILED ||
+ rs->rs_state == RG_STATE_UNINITIALIZED) {
snprintf(owner, sizeof(owner)-1, "(%-.*s)", nodesize-2,
my_memb_id_to_name(members, rs->rs_last_owner));
More information about the Cluster-devel
mailing list