[Cluster-devel] cluster/rgmanager ChangeLog include/reslist.h ...
lhh at sourceware.org
lhh at sourceware.org
Thu May 31 19:08:15 UTC 2007
CVSROOT: /cvs/cluster
Module name: cluster
Changes by: lhh at sourceware.org 2007-05-31 19:08:14
Modified files:
rgmanager : ChangeLog
rgmanager/include: reslist.h
rgmanager/src/daemons: groups.c resrules.c restree.c
rgmanager/src/resources: script.sh
Log message:
Fix 234249, 229650
Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/ChangeLog.diff?cvsroot=cluster&r1=1.43&r2=1.44
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/reslist.h.diff?cvsroot=cluster&r1=1.19&r2=1.20
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/groups.c.diff?cvsroot=cluster&r1=1.32&r2=1.33
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/resrules.c.diff?cvsroot=cluster&r1=1.21&r2=1.22
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/restree.c.diff?cvsroot=cluster&r1=1.30&r2=1.31
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/script.sh.diff?cvsroot=cluster&r1=1.9&r2=1.10
--- cluster/rgmanager/ChangeLog 2007/05/22 17:01:08 1.43
+++ cluster/rgmanager/ChangeLog 2007/05/31 19:08:13 1.44
@@ -1,3 +1,9 @@
+2007-05-31 Lon Hohberger <lhh at redhat.com>
+ * src/daemons/resrules.c: Fix #234249 - ignore obvious backup files
+ in /usr/share/cluster when processing resource rules
+ * src/daemons/restree.c, src/daemons/groups.c, include/reslist.h:
+ Implement independent subtrees, per bug #229650
+
2007-05-22 Lon Hohberger <lhh at redhat.com>
* src/resources/SAPInstance, SAPDatabase: Add primary attrs
--- cluster/rgmanager/include/reslist.h 2007/03/22 23:46:58 1.19
+++ cluster/rgmanager/include/reslist.h 2007/05/31 19:08:14 1.20
@@ -35,6 +35,8 @@
#define RF_NEEDSTART (1<<2) /** Used when adding/changing resources */
#define RF_NEEDSTOP (1<<3) /** Used when deleting/changing resources */
#define RF_COMMON (1<<4) /** " */
+#define RF_INDEPENDENT (1<<5) /** Define this for a resource if it is
+ otherwise an independent subtree */
#define RES_STOPPED (0)
#define RES_STARTED (1)
--- cluster/rgmanager/src/daemons/groups.c 2007/04/27 18:10:10 1.32
+++ cluster/rgmanager/src/daemons/groups.c 2007/05/31 19:08:14 1.33
@@ -816,6 +816,7 @@
}
pthread_rwlock_unlock(&resource_lock);
+#if 0
/*
Do NOT return error codes if we failed to stop for one of these
reasons. It didn't start, either, so it's safe to assume that
@@ -833,6 +834,7 @@
break;
}
}
+#endif
return ret;
}
--- cluster/rgmanager/src/daemons/resrules.c 2007/04/04 19:22:29 1.21
+++ cluster/rgmanager/src/daemons/resrules.c 2007/05/31 19:08:14 1.22
@@ -1025,7 +1025,7 @@
{
DIR *dir;
struct dirent *de;
- char *fn;//, *dot;
+ char *fn, *dot;
char path[2048];
struct stat st_buf;
@@ -1040,10 +1040,23 @@
if (!fn)
continue;
+ /* Ignore files with common backup extension */
if ((fn != NULL) && (strlen(fn) > 0) &&
(fn[strlen(fn)-1] == '~'))
continue;
+ dot = strrchr(fn, '.');
+ if (dot) {
+ /* Ignore RPM installed save files, patches,
+ diffs, etc. */
+ if (!strncasecmp(dot, ".rpm", 4)) {
+ fprintf(stderr, "Warning: "
+ "Ignoring %s/%s: Bad extension %s\n",
+ rpath, de->d_name, dot);
+ continue;
+ }
+ }
+
snprintf(path, sizeof(path), "%s/%s",
rpath, de->d_name);
@@ -1053,8 +1066,10 @@
if (S_ISDIR(st_buf.st_mode))
continue;
- if (st_buf.st_mode & (S_IXUSR|S_IXOTH|S_IXGRP))
- load_resource_rulefile(path, rules);
+ if (st_buf.st_mode & (S_IXUSR|S_IXOTH|S_IXGRP)) {
+ printf("Loading resource rule from %s\n", path);
+ load_resource_rulefile(path, rules);
+ }
}
xmlCleanupParser();
--- cluster/rgmanager/src/daemons/restree.c 2007/05/03 15:15:17 1.30
+++ cluster/rgmanager/src/daemons/restree.c 2007/05/31 19:08:14 1.31
@@ -39,6 +39,9 @@
void malloc_zap_mutex(void);
#endif
+#define FL_FAILURE 0x1
+#define FL_RECOVERABLE 0x2
+
/* XXX from resrules.c */
int store_childtype(resource_child_t **childp, char *name, int start,
@@ -507,6 +510,19 @@
node->rn_resource = curres;
node->rn_state = RES_STOPPED;
node->rn_actions = (resource_act_t *)act_dup(curres->r_actions);
+
+ snprintf(tok, sizeof(tok), "%s/@__independent_subtree", base);
+#ifndef NO_CCS
+ if (ccs_get(ccsfd, tok, &ref) == 0) {
+#else
+ if (conf_get(tok, &ref) == 0) {
+#endif
+ if (atoi(ref) > 0 || strcasecmp(ref, "yes") == 0)
+ node->rn_flags |= RF_INDEPENDENT;
+ free(ref);
+ }
+
+
curres->r_refs++;
*newnode = node;
@@ -777,6 +793,8 @@
printf("NEEDSTART ");
if (node->rn_flags & RF_COMMON)
printf("COMMON ");
+ if (node->rn_flags & RF_INDEPENDENT)
+ printf("INDEPENDENT ");
printf("]");
}
printf(" {\n");
@@ -841,10 +859,11 @@
#endif
/* Do op on all children at our level */
- rv += _res_op(&node->rn_child, first,
+ rv |= _res_op(&node->rn_child, first,
rule->rr_childtypes[x].rc_name,
ret, op);
- if (rv != 0 && op != RS_STOP)
+
+ if (rv & FL_FAILURE && op != RS_STOP)
return rv;
}
@@ -856,46 +875,6 @@
}
-#if 0
-static inline int
-_do_child_default_level(resource_node_t **tree, resource_t *first,
- void *ret, int op)
-{
- resource_node_t *node = *tree;
- resource_t *res = node->rn_resource;
- resource_rule_t *rule = res->r_rule;
- int x, rv = 0, lev;
-
- for (x = 0; rule->rr_childtypes &&
- rule->rr_childtypes[x].rc_name; x++) {
-
- if(op == RS_STOP)
- lev = rule->rr_childtypes[x].rc_stoplevel;
- else
- lev = rule->rr_childtypes[x].rc_startlevel;
-
- if (lev)
- continue;
-
- /*
- printf("%s children of %s type %s (default level)\n",
- agent_op_str(op),
- node->rn_resource->r_rule->rr_type,
- rule->rr_childtypes[x].rc_name);
- */
-
- rv = _res_op(&node->rn_child, first,
- rule->rr_childtypes[x].rc_name,
- ret, op);
- if (rv != 0)
- return rv;
- }
-
- return 0;
-}
-#endif
-
-
static inline int
_xx_child_internal(resource_node_t *node, resource_t *first,
resource_node_t *child, void *ret, int op)
@@ -929,13 +908,14 @@
if (op == RS_START || op == RS_STATUS) {
list_for(&node->rn_child, child, y) {
- rv = _xx_child_internal(node, first, child, ret, op);
- if (rv)
+ rv |= _xx_child_internal(node, first, child, ret, op);
+
+ if (rv & FL_FAILURE)
return rv;
}
} else {
list_for_rev(&node->rn_child, child, y) {
- rv += _xx_child_internal(node, first, child, ret, op);
+ rv |= _xx_child_internal(node, first, child, ret, op);
}
}
@@ -976,7 +956,7 @@
if (op == RS_START || op == RS_STATUS) {
rv = _do_child_levels(tree, first, ret, op);
- if (rv != 0)
+ if (rv & FL_FAILURE)
return rv;
/* Start default level after specified ones */
@@ -995,6 +975,22 @@
}
+void
+mark_nodes(resource_node_t *node, int state, int flags)
+{
+ int x;
+ resource_node_t *child;
+
+ list_for(&node->rn_child, child, x) {
+ if (child->rn_child)
+ mark_nodes(child->rn_child, state, flags);
+ }
+
+ node->rn_state = state;
+ node->rn_flags |= (RF_NEEDSTART | RF_NEEDSTOP);
+}
+
+
/**
Do a status on a resource node. This takes into account the last time the
status operation was run and selects the highest possible resource depth
@@ -1223,7 +1219,7 @@
rv = res_exec(node, agent_op_str(op), NULL, 0);
if (rv != 0) {
node->rn_state = RES_FAILED;
- return rv;
+ return FL_FAILURE;
}
set_time("start", 0, node);
@@ -1236,14 +1232,43 @@
} else if (me && (op == RS_STATUS)) {
/* Check status before children*/
rv = do_status(node);
- if (rv != 0)
- return rv;
+ if (rv != 0) {
+ /*
+ If this node's status has failed, all of its
+ dependent children are failed, whether or not this
+ node is independent or not.
+ */
+ mark_nodes(node, RES_FAILED,
+ RF_NEEDSTART | RF_NEEDSTOP);
+
+ /* If we're an independent subtree, return a flag
+ stating that this section is recoverable apart
+ from siblings in the resource tree. All child
+ resources of this node must be restarted,
+ but siblings of this node are not affected. */
+ if (node->rn_flags & RF_INDEPENDENT)
+ return FL_RECOVERABLE;
+
+ return FL_FAILURE;
+ }
+
}
if (node->rn_child) {
rv = _res_op_by_level(&node, me?NULL:first, ret, op);
- if (rv != 0)
- return rv;
+ if (rv != 0) {
+ mark_nodes(node, RES_FAILED,
+ RF_NEEDSTART | RF_NEEDSTOP);
+
+ /* If this node is independent of its siblings,
+ that one of its dependent children failed
+ does not matter: its dependent children must
+ also be independent of this node's siblings. */
+ if (node->rn_flags & RF_INDEPENDENT)
+ return FL_RECOVERABLE;
+
+ return FL_FAILURE;
+ }
}
/* Stop should occur after children have stopped */
@@ -1253,7 +1278,7 @@
if (rv != 0) {
node->rn_state = RES_FAILED;
- return rv;
+ return FL_FAILURE;
}
if (node->rn_state != RES_STOPPED) {
@@ -1292,24 +1317,31 @@
char *type, void * __attribute__((unused))ret, int realop)
{
resource_node_t *node;
- int count = 0, rv;
+ int count = 0, rv = 0;
if (realop == RS_STOP) {
list_for_rev(tree, node, count) {
- rv = _res_op_internal(tree, first, type, ret, realop,
- node);
- if (rv != 0)
- return rv;
+ rv |= _res_op_internal(tree, first, type, ret, realop,
+ node);
}
} else {
list_for(tree, node, count) {
- rv = _res_op_internal(tree, first, type, ret, realop,
- node);
- if (rv != 0)
+ rv |= _res_op_internal(tree, first, type, ret, realop,
+ node);
+
+ /* If we hit a problem during a 'status' op in an
+ independent subtree, rv will have the
+ FL_RECOVERABLE bit set, but not FL_FAILURE.
+ If we ever hit FL_FAILURE during a status
+ operation, we're *DONE* - even if the subtree
+ is flagged w/ indy-subtree */
+
+ if (rv & FL_FAILURE)
return rv;
}
}
- return 0;
+
+ return rv;
}
/**
@@ -1378,7 +1410,30 @@
int
res_status(resource_node_t **tree, resource_t *res, void *ret)
{
- return _res_op(tree, res, NULL, ret, RS_STATUS);
+ int rv;
+ rv = _res_op(tree, res, NULL, ret, RS_STATUS);
+
+ if (rv & FL_FAILURE)
+ return rv;
+
+ clulog(LOG_WARNING, "Some independent resources in %s:%s failed; "
+ "Attempting inline recovery\n",
+ res->r_rule->rr_type, res->r_attrs->ra_value);
+
+ rv = res_condstop(tree, res, ret);
+ if (rv & FL_FAILURE)
+ goto out_fail;
+ rv = res_condstart(tree, res, ret);
+ if (rv & FL_FAILURE)
+ goto out_fail;
+
+ clulog(LOG_NOTICE, "Inline recovery of %s:%s successful\n",
+ res->r_rule->rr_type, res->r_attrs->ra_value);
+ return 0;
+out_fail:
+ clulog(LOG_WARNING, "Inline recovery of %s:%s failed\n",
+ res->r_rule->rr_type, res->r_attrs->ra_value);
+ return 1;
}
--- cluster/rgmanager/src/resources/script.sh 2007/04/05 15:08:20 1.9
+++ cluster/rgmanager/src/resources/script.sh 2007/05/31 19:08:14 1.10
@@ -118,5 +118,5 @@
declare -i rv=$?
if [ $rv -ne 0 ]; then
ocf_log err "script:$OCF_RESKEY_name: $1 of $OCF_RESKEY_file failed (returned $rv)"
- return $OCF_ERR_GENERIC
+ exit $OCF_ERR_GENERIC
fi
More information about the Cluster-devel
mailing list