[Cluster-devel] cluster/rgmanager ChangeLog include/reslist.h ...
lhh at sourceware.org
lhh at sourceware.org
Thu May 31 18:58:47 UTC 2007
CVSROOT: /cvs/cluster
Module name: cluster
Branch: RHEL5
Changes by: lhh at sourceware.org 2007-05-31 18:58:46
Modified files:
rgmanager : ChangeLog
rgmanager/include: reslist.h
rgmanager/src/daemons: groups.c resrules.c restree.c
rgmanager/src/resources: script.sh
Log message:
Fix bugzilla #229650; implement __independent_subtree feature
Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/ChangeLog.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.31.2.9&r2=1.31.2.10
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/reslist.h.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.15.2.2&r2=1.15.2.3
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/groups.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.25.2.5&r2=1.25.2.6
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/resrules.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.16.2.4&r2=1.16.2.5
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/restree.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.23.2.3&r2=1.23.2.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/script.sh.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.8&r2=1.8.2.1
--- cluster/rgmanager/ChangeLog 2007/05/31 18:38:44 1.31.2.9
+++ cluster/rgmanager/ChangeLog 2007/05/31 18:58:46 1.31.2.10
@@ -1,6 +1,8 @@
2007-05-31 Lon Hohberger <lhh at redhat.com>
* src/daemons/resrules.c: Fix #234249 - ignore obvious backup files
in /usr/share/cluster when processing resource rules
+ * src/daemons/restree.c, src/daemons/groups.c, include/reslist.h:
+ Implement independent subtrees, per bug #229650
2007-05-22 Lon Hohberger <lhh at redhat.com>
* src/resources/SAPInstance, SAPDatabase: Add primary attrs
--- cluster/rgmanager/include/reslist.h 2007/03/23 00:06:34 1.15.2.2
+++ cluster/rgmanager/include/reslist.h 2007/05/31 18:58:46 1.15.2.3
@@ -35,6 +35,8 @@
#define RF_NEEDSTART (1<<2) /** Used when adding/changing resources */
#define RF_NEEDSTOP (1<<3) /** Used when deleting/changing resources */
#define RF_COMMON (1<<4) /** " */
+#define RF_INDEPENDENT (1<<5) /** Define this for a resource if it is
+ otherwise an independent subtree */
#define RES_STOPPED (0)
#define RES_STARTED (1)
@@ -56,10 +58,10 @@
typedef struct _resource_attribute {
- int ra_flags;
- /* XXX possible alignment problem on ia64 */
char *ra_name;
char *ra_value;
+ int ra_flags;
+ int _pad_;
} resource_attr_t;
@@ -78,6 +80,7 @@
time_t ra_last;
time_t ra_interval;
int ra_depth;
+ int _pad_;
} resource_act_t;
--- cluster/rgmanager/src/daemons/groups.c 2007/05/10 16:23:43 1.25.2.5
+++ cluster/rgmanager/src/daemons/groups.c 2007/05/31 18:58:46 1.25.2.6
@@ -813,6 +813,7 @@
}
pthread_rwlock_unlock(&resource_lock);
+#if 0
/*
Do NOT return error codes if we failed to stop for one of these
reasons. It didn't start, either, so it's safe to assume that
@@ -830,6 +831,7 @@
break;
}
}
+#endif
return ret;
}
--- cluster/rgmanager/src/daemons/resrules.c 2007/05/31 18:37:50 1.16.2.4
+++ cluster/rgmanager/src/daemons/resrules.c 2007/05/31 18:58:46 1.16.2.5
@@ -262,6 +262,7 @@
acts[0].ra_depth = depth;
acts[0].ra_timeout = timeout;
acts[0].ra_interval = interval;
+ acts[0].ra_last = 0;
acts[1].ra_name = NULL;
*actsp = acts;
@@ -271,7 +272,7 @@
for (x = 0; acts[x].ra_name; x++) {
if (!strcmp(acts[x].ra_name, name) &&
(depth == acts[x].ra_depth || depth == -1)) {
- printf("Replacing action '%s' depth %d: ",
+ fprintf(stderr, "Replacing action '%s' depth %d: ",
name, acts[x].ra_depth);
if (timeout >= 0) {
printf("timeout: %d->%d ",
@@ -306,6 +307,7 @@
acts[x].ra_depth = depth;
acts[x].ra_timeout = timeout;
acts[x].ra_interval = interval;
+ acts[x].ra_last = 0;
acts[x+1].ra_name = NULL;
--- cluster/rgmanager/src/daemons/restree.c 2007/05/03 15:14:16 1.23.2.3
+++ cluster/rgmanager/src/daemons/restree.c 2007/05/31 18:58:46 1.23.2.4
@@ -39,6 +39,9 @@
void malloc_zap_mutex(void);
#endif
+#define FL_FAILURE 0x1
+#define FL_RECOVERABLE 0x2
+
/* XXX from resrules.c */
int store_childtype(resource_child_t **childp, char *name, int start,
@@ -507,6 +510,19 @@
node->rn_resource = curres;
node->rn_state = RES_STOPPED;
node->rn_actions = (resource_act_t *)act_dup(curres->r_actions);
+
+ snprintf(tok, sizeof(tok), "%s/@__independent_subtree", base);
+#ifndef NO_CCS
+ if (ccs_get(ccsfd, tok, &ref) == 0) {
+#else
+ if (conf_get(tok, &ref) == 0) {
+#endif
+ if (atoi(ref) > 0 || strcasecmp(ref, "yes") == 0)
+ node->rn_flags |= RF_INDEPENDENT;
+ free(ref);
+ }
+
+
curres->r_refs++;
*newnode = node;
@@ -718,7 +734,6 @@
resource_rule_t **rulelist,
resource_t **reslist)
{
- resource_rule_t *curr;
resource_node_t *root = NULL;
char tok[512];
@@ -777,6 +792,8 @@
printf("NEEDSTART ");
if (node->rn_flags & RF_COMMON)
printf("COMMON ");
+ if (node->rn_flags & RF_INDEPENDENT)
+ printf("INDEPENDENT ");
printf("]");
}
printf(" {\n");
@@ -838,10 +855,11 @@
#endif
/* Do op on all children at our level */
- rv += _res_op(&node->rn_child, first,
+ rv |= _res_op(&node->rn_child, first,
rule->rr_childtypes[x].rc_name,
ret, op);
- if (rv != 0 && op != RS_STOP)
+
+ if (rv & FL_FAILURE && op != RS_STOP)
return rv;
}
@@ -853,46 +871,6 @@
}
-#if 0
-static inline int
-_do_child_default_level(resource_node_t **tree, resource_t *first,
- void *ret, int op)
-{
- resource_node_t *node = *tree;
- resource_t *res = node->rn_resource;
- resource_rule_t *rule = res->r_rule;
- int x, rv = 0, lev;
-
- for (x = 0; rule->rr_childtypes &&
- rule->rr_childtypes[x].rc_name; x++) {
-
- if(op == RS_STOP)
- lev = rule->rr_childtypes[x].rc_stoplevel;
- else
- lev = rule->rr_childtypes[x].rc_startlevel;
-
- if (lev)
- continue;
-
- /*
- printf("%s children of %s type %s (default level)\n",
- agent_op_str(op),
- node->rn_resource->r_rule->rr_type,
- rule->rr_childtypes[x].rc_name);
- */
-
- rv = _res_op(&node->rn_child, first,
- rule->rr_childtypes[x].rc_name,
- ret, op);
- if (rv != 0)
- return rv;
- }
-
- return 0;
-}
-#endif
-
-
static inline int
_xx_child_internal(resource_node_t *node, resource_t *first,
resource_node_t *child, void *ret, int op)
@@ -926,13 +904,14 @@
if (op == RS_START || op == RS_STATUS) {
list_for(&node->rn_child, child, y) {
- rv = _xx_child_internal(node, first, child, ret, op);
- if (rv)
+ rv |= _xx_child_internal(node, first, child, ret, op);
+
+ if (rv & FL_FAILURE)
return rv;
}
} else {
list_for_rev(&node->rn_child, child, y) {
- rv += _xx_child_internal(node, first, child, ret, op);
+ rv |= _xx_child_internal(node, first, child, ret, op);
}
}
@@ -973,7 +952,7 @@
if (op == RS_START || op == RS_STATUS) {
rv = _do_child_levels(tree, first, ret, op);
- if (rv != 0)
+ if (rv & FL_FAILURE)
return rv;
/* Start default level after specified ones */
@@ -992,6 +971,22 @@
}
+void
+mark_nodes(resource_node_t *node, int state, int flags)
+{
+ int x;
+ resource_node_t *child;
+
+ list_for(&node->rn_child, child, x) {
+ if (child->rn_child)
+ mark_nodes(child->rn_child, state, flags);
+ }
+
+ node->rn_state = state;
+ node->rn_flags |= (RF_NEEDSTART | RF_NEEDSTOP);
+}
+
+
/**
Do a status on a resource node. This takes into account the last time the
status operation was run and selects the highest possible resource depth
@@ -1123,130 +1118,6 @@
in the subtree).
@see _res_op_by_level res_exec
*/
-#if 0
-int
-_res_op(resource_node_t **tree, resource_t *first,
- char *type, void * __attribute__((unused))ret, int realop)
-{
- int rv, me;
- resource_node_t *node;
- int op;
-
- list_do(tree, node) {
-
- /* Restore default operation. */
- op = realop;
-
- /* If we're starting by type, do that funky thing. */
- if (type && strlen(type) &&
- strcmp(node->rn_resource->r_rule->rr_type, type))
- continue;
-
- /* If the resource is found, all nodes in the subtree must
- have the operation performed as well. */
- me = !first || (node->rn_resource == first);
-
- /*
- printf("begin %s: %s %s [0x%x]\n", agent_op_str(op),
- node->rn_resource->r_rule->rr_type,
- primary_attr_value(node->rn_resource),
- node->rn_flags);
- */
-
- if (me) {
- /*
- If we've been marked as a node which
- needs to be started or stopped, clear
- that flag and start/stop this resource
- and all resource babies.
-
- Otherwise, don't do anything; look for
- children with RF_NEEDSTART and
- RF_NEEDSTOP flags.
-
- CONDSTART and CONDSTOP are no-ops if
- the appropriate flag is not set.
- */
- if ((op == RS_CONDSTART) &&
- (node->rn_flags & RF_NEEDSTART)) {
- /*
- printf("Node %s:%s - CONDSTART\n",
- node->rn_resource->r_rule->rr_type,
- primary_attr_value(node->rn_resource));
- */
- op = RS_START;
- }
-
- if ((op == RS_CONDSTOP) &&
- (node->rn_flags & RF_NEEDSTOP)) {
- /*
- printf("Node %s:%s - CONDSTOP\n",
- node->rn_resource->r_rule->rr_type,
- primary_attr_value(node->rn_resource));
- */
- op = RS_STOP;
- }
- }
-
- /* Start starts before children */
- if (me && (op == RS_START)) {
- node->rn_flags &= ~RF_NEEDSTART;
-
- rv = res_exec(node, agent_op_str(op), NULL, 0);
- if (rv != 0) {
- node->rn_state = RES_FAILED;
- return rv;
- }
-
- set_time("start", 0, node);
- clear_checks(node);
-
- if (node->rn_state != RES_STARTED) {
- ++node->rn_resource->r_incarnations;
- node->rn_state = RES_STARTED;
- }
- }
-
- if (node->rn_child) {
- rv = _res_op_by_level(&node, me?NULL:first, ret, op);
- if (rv != 0)
- return rv;
- }
-
- /* Stop/status/etc stops after children have stopped */
- if (me && (op == RS_STOP)) {
- node->rn_flags &= ~RF_NEEDSTOP;
- rv = res_exec(node, agent_op_str(op), NULL, 0);
-
- if (rv != 0) {
- node->rn_state = RES_FAILED;
- return rv;
- }
-
- if (node->rn_state != RES_STOPPED) {
- --node->rn_resource->r_incarnations;
- node->rn_state = RES_STOPPED;
- }
-
- } else if (me && (op == RS_STATUS)) {
-
- rv = do_status(node);
- if (rv != 0)
- return rv;
- }
-
- /*
- printf("end %s: %s %s\n", agent_op_str(op),
- node->rn_resource->r_rule->rr_type,
- primary_attr_value(node->rn_resource));
- */
- } while (!list_done(tree, node));
-
- return 0;
-}
-#endif
-
-
static inline int
_res_op_internal(resource_node_t **tree, resource_t *first,
char *type, void *__attribute__((unused))ret, int realop,
@@ -1309,7 +1180,7 @@
rv = res_exec(node, agent_op_str(op), NULL, 0);
if (rv != 0) {
node->rn_state = RES_FAILED;
- return rv;
+ return FL_FAILURE;
}
set_time("start", 0, node);
@@ -1322,14 +1193,43 @@
} else if (me && (op == RS_STATUS)) {
/* Check status before children*/
rv = do_status(node);
- if (rv != 0)
- return rv;
+ if (rv != 0) {
+ /*
+ If this node's status has failed, all of its
+ dependent children are failed, whether or not this
+ node is independent or not.
+ */
+ mark_nodes(node, RES_FAILED,
+ RF_NEEDSTART | RF_NEEDSTOP);
+
+ /* If we're an independent subtree, return a flag
+ stating that this section is recoverable apart
+ from siblings in the resource tree. All child
+ resources of this node must be restarted,
+ but siblings of this node are not affected. */
+ if (node->rn_flags & RF_INDEPENDENT)
+ return FL_RECOVERABLE;
+
+ return FL_FAILURE;
+ }
+
}
if (node->rn_child) {
rv = _res_op_by_level(&node, me?NULL:first, ret, op);
- if (rv != 0)
- return rv;
+ if (rv != 0) {
+ mark_nodes(node, RES_FAILED,
+ RF_NEEDSTART | RF_NEEDSTOP);
+
+ /* If this node is independent of its siblings,
+ that one of its dependent children failed
+ does not matter: its dependent children must
+ also be independent of this node's siblings. */
+ if (node->rn_flags & RF_INDEPENDENT)
+ return FL_RECOVERABLE;
+
+ return FL_FAILURE;
+ }
}
/* Stop should occur after children have stopped */
@@ -1339,7 +1239,7 @@
if (rv != 0) {
node->rn_state = RES_FAILED;
- return rv;
+ return FL_FAILURE;
}
if (node->rn_state != RES_STOPPED) {
@@ -1378,24 +1278,31 @@
char *type, void * __attribute__((unused))ret, int realop)
{
resource_node_t *node;
- int count = 0, rv;
+ int count = 0, rv = 0;
if (realop == RS_STOP) {
list_for_rev(tree, node, count) {
- rv = _res_op_internal(tree, first, type, ret, realop,
- node);
- if (rv != 0)
- return rv;
+ rv |= _res_op_internal(tree, first, type, ret, realop,
+ node);
}
} else {
list_for(tree, node, count) {
- rv = _res_op_internal(tree, first, type, ret, realop,
- node);
- if (rv != 0)
+ rv |= _res_op_internal(tree, first, type, ret, realop,
+ node);
+
+ /* If we hit a problem during a 'status' op in an
+ independent subtree, rv will have the
+ FL_RECOVERABLE bit set, but not FL_FAILURE.
+ If we ever hit FL_FAILURE during a status
+ operation, we're *DONE* - even if the subtree
+ is flagged w/ indy-subtree */
+
+ if (rv & FL_FAILURE)
return rv;
}
}
- return 0;
+
+ return rv;
}
/**
@@ -1464,7 +1371,30 @@
int
res_status(resource_node_t **tree, resource_t *res, void *ret)
{
- return _res_op(tree, res, NULL, ret, RS_STATUS);
+ int rv;
+ rv = _res_op(tree, res, NULL, ret, RS_STATUS);
+
+ if (rv & FL_FAILURE)
+ return rv;
+
+ clulog(LOG_WARNING, "Some independent resources in %s:%s failed; "
+ "Attempting inline recovery\n",
+ res->r_rule->rr_type, res->r_attrs->ra_value);
+
+ rv = res_condstop(tree, res, ret);
+ if (rv & FL_FAILURE)
+ goto out_fail;
+ rv = res_condstart(tree, res, ret);
+ if (rv & FL_FAILURE)
+ goto out_fail;
+
+ clulog(LOG_NOTICE, "Inline recovery of %s:%s successful\n",
+ res->r_rule->rr_type, res->r_attrs->ra_value);
+ return 0;
+out_fail:
+ clulog(LOG_WARNING, "Inline recovery of %s:%s failed\n",
+ res->r_rule->rr_type, res->r_attrs->ra_value);
+ return 1;
}
--- cluster/rgmanager/src/resources/script.sh 2006/08/18 15:26:23 1.8
+++ cluster/rgmanager/src/resources/script.sh 2007/05/31 18:58:46 1.8.2.1
@@ -115,5 +115,5 @@
declare -i rv=$?
if [ $rv -ne 0 ]; then
ocf_log err "script:$OCF_RESKEY_name: $1 of $OCF_RESKEY_file failed (returned $rv)"
- return $OCF_ERR_GENERIC
+ exit $OCF_ERR_GENERIC
fi
More information about the Cluster-devel
mailing list