[Cluster-devel] cluster/rgmanager ChangeLog include/reslist.h ...

lhh at sourceware.org lhh at sourceware.org
Thu May 31 18:58:47 UTC 2007


CVSROOT:	/cvs/cluster
Module name:	cluster
Branch: 	RHEL5
Changes by:	lhh at sourceware.org	2007-05-31 18:58:46

Modified files:
	rgmanager      : ChangeLog 
	rgmanager/include: reslist.h 
	rgmanager/src/daemons: groups.c resrules.c restree.c 
	rgmanager/src/resources: script.sh 

Log message:
	Fix bugzilla #229650; implement __independent_subtree feature

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/ChangeLog.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.31.2.9&r2=1.31.2.10
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/reslist.h.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.15.2.2&r2=1.15.2.3
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/groups.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.25.2.5&r2=1.25.2.6
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/resrules.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.16.2.4&r2=1.16.2.5
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/restree.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.23.2.3&r2=1.23.2.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/script.sh.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.8&r2=1.8.2.1

--- cluster/rgmanager/ChangeLog	2007/05/31 18:38:44	1.31.2.9
+++ cluster/rgmanager/ChangeLog	2007/05/31 18:58:46	1.31.2.10
@@ -1,6 +1,8 @@
 2007-05-31 Lon Hohberger <lhh at redhat.com>
 	* src/daemons/resrules.c: Fix #234249 - ignore obvious backup files
 	in /usr/share/cluster when processing resource rules
+	* src/daemons/restree.c, src/daemons/groups.c, include/reslist.h: 
+	Implement independent subtrees, per bug #229650
 
 2007-05-22 Lon Hohberger <lhh at redhat.com>
 	* src/resources/SAPInstance, SAPDatabase: Add primary attrs
--- cluster/rgmanager/include/reslist.h	2007/03/23 00:06:34	1.15.2.2
+++ cluster/rgmanager/include/reslist.h	2007/05/31 18:58:46	1.15.2.3
@@ -35,6 +35,8 @@
 #define RF_NEEDSTART	(1<<2)	/** Used when adding/changing resources */
 #define RF_NEEDSTOP	(1<<3)  /** Used when deleting/changing resources */
 #define RF_COMMON	(1<<4)	/** " */
+#define RF_INDEPENDENT	(1<<5)  /** Define this for a resource if it is
+				  otherwise an independent subtree */
 
 #define RES_STOPPED	(0)
 #define RES_STARTED	(1)
@@ -56,10 +58,10 @@
 
 
 typedef struct _resource_attribute {
-	int	ra_flags;
-	/* XXX possible alignment problem on ia64 */
 	char	*ra_name;
 	char	*ra_value;
+	int	ra_flags;
+	int	_pad_;
 } resource_attr_t;
 
 
@@ -78,6 +80,7 @@
 	time_t	ra_last;
 	time_t	ra_interval;
 	int	ra_depth;
+	int 	_pad_;
 } resource_act_t;
 
 
--- cluster/rgmanager/src/daemons/groups.c	2007/05/10 16:23:43	1.25.2.5
+++ cluster/rgmanager/src/daemons/groups.c	2007/05/31 18:58:46	1.25.2.6
@@ -813,6 +813,7 @@
 	}
 	pthread_rwlock_unlock(&resource_lock);
 
+#if 0
 	/*
 	   Do NOT return error codes if we failed to stop for one of these
 	   reasons.  It didn't start, either, so it's safe to assume that
@@ -830,6 +831,7 @@
 			break;
 		}
 	}
+#endif
 
 	return ret;
 }
--- cluster/rgmanager/src/daemons/resrules.c	2007/05/31 18:37:50	1.16.2.4
+++ cluster/rgmanager/src/daemons/resrules.c	2007/05/31 18:58:46	1.16.2.5
@@ -262,6 +262,7 @@
 		acts[0].ra_depth = depth;
 		acts[0].ra_timeout = timeout;
 		acts[0].ra_interval = interval;
+		acts[0].ra_last = 0;
 		acts[1].ra_name = NULL;
 
 		*actsp = acts;
@@ -271,7 +272,7 @@
 	for (x = 0; acts[x].ra_name; x++) {
 		if (!strcmp(acts[x].ra_name, name) &&
 		    (depth == acts[x].ra_depth || depth == -1)) {
-			printf("Replacing action '%s' depth %d: ",
+			fprintf(stderr, "Replacing action '%s' depth %d: ",
 			       name, acts[x].ra_depth);
 			if (timeout >= 0) {
 				printf("timeout: %d->%d ",
@@ -306,6 +307,7 @@
 	acts[x].ra_depth = depth;
 	acts[x].ra_timeout = timeout;
 	acts[x].ra_interval = interval;
+	acts[x].ra_last = 0;
 
 	acts[x+1].ra_name = NULL;
 
--- cluster/rgmanager/src/daemons/restree.c	2007/05/03 15:14:16	1.23.2.3
+++ cluster/rgmanager/src/daemons/restree.c	2007/05/31 18:58:46	1.23.2.4
@@ -39,6 +39,9 @@
 void malloc_zap_mutex(void);
 #endif
 
+#define FL_FAILURE	0x1
+#define FL_RECOVERABLE	0x2
+
 
 /* XXX from resrules.c */
 int store_childtype(resource_child_t **childp, char *name, int start,
@@ -507,6 +510,19 @@
 	node->rn_resource = curres;
 	node->rn_state = RES_STOPPED;
 	node->rn_actions = (resource_act_t *)act_dup(curres->r_actions);
+
+	snprintf(tok, sizeof(tok), "%s/@__independent_subtree", base);
+#ifndef NO_CCS
+	if (ccs_get(ccsfd, tok, &ref) == 0) {
+#else
+	if (conf_get(tok, &ref) == 0) {
+#endif
+		if (atoi(ref) > 0 || strcasecmp(ref, "yes") == 0)
+			node->rn_flags |= RF_INDEPENDENT;
+		free(ref);
+	}
+
+
 	curres->r_refs++;
 
 	*newnode = node;
@@ -718,7 +734,6 @@
 		    resource_rule_t **rulelist,
 		    resource_t **reslist)
 {
-	resource_rule_t *curr;
 	resource_node_t *root = NULL;
 	char tok[512];
 
@@ -777,6 +792,8 @@
 				printf("NEEDSTART ");
 			if (node->rn_flags & RF_COMMON)
 				printf("COMMON ");
+			if (node->rn_flags & RF_INDEPENDENT)
+				printf("INDEPENDENT ");
 			printf("]");
 		}
 		printf(" {\n");
@@ -838,10 +855,11 @@
 #endif
 
 			/* Do op on all children at our level */
-			rv += _res_op(&node->rn_child, first,
+			rv |= _res_op(&node->rn_child, first,
 			     	     rule->rr_childtypes[x].rc_name, 
 		     		     ret, op);
-			if (rv != 0 && op != RS_STOP)
+
+			if (rv & FL_FAILURE && op != RS_STOP)
 				return rv;
 		}
 
@@ -853,46 +871,6 @@
 }
 
 
-#if 0
-static inline int
-_do_child_default_level(resource_node_t **tree, resource_t *first,
-			void *ret, int op)
-{
-	resource_node_t *node = *tree;
-	resource_t *res = node->rn_resource;
-	resource_rule_t *rule = res->r_rule;
-	int x, rv = 0, lev;
-
-	for (x = 0; rule->rr_childtypes &&
-	     rule->rr_childtypes[x].rc_name; x++) {
-
-		if(op == RS_STOP)
-			lev = rule->rr_childtypes[x].rc_stoplevel;
-		else
-			lev = rule->rr_childtypes[x].rc_startlevel;
-
-		if (lev)
-			continue;
-
-		/*
-		printf("%s children of %s type %s (default level)\n",
-		       agent_op_str(op),
-		       node->rn_resource->r_rule->rr_type,
-		       rule->rr_childtypes[x].rc_name);
-		 */
-
-		rv = _res_op(&node->rn_child, first,
-			     rule->rr_childtypes[x].rc_name, 
-			     ret, op);
-		if (rv != 0)
-			return rv;
-	}
-
-	return 0;
-}
-#endif
-
-
 static inline int
 _xx_child_internal(resource_node_t *node, resource_t *first,
 		   resource_node_t *child, void *ret, int op)
@@ -926,13 +904,14 @@
 
 	if (op == RS_START || op == RS_STATUS) {
 		list_for(&node->rn_child, child, y) {
-			rv = _xx_child_internal(node, first, child, ret, op);
-			if (rv)
+			rv |= _xx_child_internal(node, first, child, ret, op);
+
+			if (rv & FL_FAILURE)
 				return rv;
 		}
 	} else {
 		list_for_rev(&node->rn_child, child, y) {
-			rv += _xx_child_internal(node, first, child, ret, op);
+			rv |= _xx_child_internal(node, first, child, ret, op);
 		}
 	}
 
@@ -973,7 +952,7 @@
 
 	if (op == RS_START || op == RS_STATUS) {
 		rv =  _do_child_levels(tree, first, ret, op);
-	       	if (rv != 0)
+	       	if (rv & FL_FAILURE)
 			return rv;
 
 		/* Start default level after specified ones */
@@ -992,6 +971,22 @@
 }
 
 
+void
+mark_nodes(resource_node_t *node, int state, int flags)
+{
+	int x;
+	resource_node_t *child;
+
+	list_for(&node->rn_child, child, x) {
+		if (child->rn_child)
+			mark_nodes(child->rn_child, state, flags);
+	}
+
+	node->rn_state = state;
+	node->rn_flags |= (RF_NEEDSTART | RF_NEEDSTOP);
+}
+
+
 /**
    Do a status on a resource node.  This takes into account the last time the
    status operation was run and selects the highest possible resource depth
@@ -1123,130 +1118,6 @@
 			in the subtree).
    @see			_res_op_by_level res_exec
  */
-#if 0
-int
-_res_op(resource_node_t **tree, resource_t *first,
-	char *type, void * __attribute__((unused))ret, int realop)
-{
-	int rv, me;
-	resource_node_t *node;
-	int op;
-
-	list_do(tree, node) {
-
-		/* Restore default operation. */
-		op = realop;
-
-		/* If we're starting by type, do that funky thing. */
-		if (type && strlen(type) &&
-		    strcmp(node->rn_resource->r_rule->rr_type, type))
-			continue;
-
-		/* If the resource is found, all nodes in the subtree must
-		   have the operation performed as well. */
-		me = !first || (node->rn_resource == first);
-
-		/*
-		printf("begin %s: %s %s [0x%x]\n", agent_op_str(op),
-		       node->rn_resource->r_rule->rr_type,
-		       primary_attr_value(node->rn_resource),
-		       node->rn_flags);
-		 */
-
-		if (me) {
-			/*
-			   If we've been marked as a node which
-			   needs to be started or stopped, clear
-			   that flag and start/stop this resource
-			   and all resource babies.
-
-			   Otherwise, don't do anything; look for
-			   children with RF_NEEDSTART and
-			   RF_NEEDSTOP flags.
-
-			   CONDSTART and CONDSTOP are no-ops if
-			   the appropriate flag is not set.
-			 */
-		       	if ((op == RS_CONDSTART) &&
-			    (node->rn_flags & RF_NEEDSTART)) {
-				/*
-				printf("Node %s:%s - CONDSTART\n",
-				       node->rn_resource->r_rule->rr_type,
-				       primary_attr_value(node->rn_resource));
-				 */
-				op = RS_START;
-			}
-
-			if ((op == RS_CONDSTOP) &&
-			    (node->rn_flags & RF_NEEDSTOP)) {
-				/*
-				printf("Node %s:%s - CONDSTOP\n",
-				       node->rn_resource->r_rule->rr_type,
-				       primary_attr_value(node->rn_resource));
-				 */
-				op = RS_STOP;
-			}
-		}
-
-		/* Start starts before children */
-		if (me && (op == RS_START)) {
-			node->rn_flags &= ~RF_NEEDSTART;
-
-			rv = res_exec(node, agent_op_str(op), NULL, 0);
-			if (rv != 0) {
-				node->rn_state = RES_FAILED;
-				return rv;
-			}
-
-			set_time("start", 0, node);
-			clear_checks(node);
-
-			if (node->rn_state != RES_STARTED) {
-				++node->rn_resource->r_incarnations;
-				node->rn_state = RES_STARTED;
-			}
-		}
-
-		if (node->rn_child) {
-			rv = _res_op_by_level(&node, me?NULL:first, ret, op);
-			if (rv != 0)
-				return rv;
-		}
-
-		/* Stop/status/etc stops after children have stopped */
-		if (me && (op == RS_STOP)) {
-			node->rn_flags &= ~RF_NEEDSTOP;
-			rv = res_exec(node, agent_op_str(op), NULL, 0);
-
-			if (rv != 0) {
-				node->rn_state = RES_FAILED;
-				return rv;
-			}
-
-			if (node->rn_state != RES_STOPPED) {
-				--node->rn_resource->r_incarnations;
-				node->rn_state = RES_STOPPED;
-			}
-
-		} else if (me && (op == RS_STATUS)) {
-
-			rv = do_status(node);
-			if (rv != 0)
-				return rv;
-		}
-
-		/*
-		printf("end %s: %s %s\n", agent_op_str(op),
-		       node->rn_resource->r_rule->rr_type,
-		       primary_attr_value(node->rn_resource));
-		 */
-	} while (!list_done(tree, node));
-
-	return 0;
-}
-#endif
-
-
 static inline int
 _res_op_internal(resource_node_t **tree, resource_t *first,
 		 char *type, void *__attribute__((unused))ret, int realop,
@@ -1309,7 +1180,7 @@
 		rv = res_exec(node, agent_op_str(op), NULL, 0);
 		if (rv != 0) {
 			node->rn_state = RES_FAILED;
-			return rv;
+			return FL_FAILURE;
 		}
 
 		set_time("start", 0, node);
@@ -1322,14 +1193,43 @@
 	} else if (me && (op == RS_STATUS)) {
 		/* Check status before children*/
 		rv = do_status(node);
-		if (rv != 0)
-			return rv;
+		if (rv != 0) {
+			/*
+			   If this node's status has failed, all of its
+			   dependent children are failed, whether or not this
+			   node is independent or not.
+			 */
+			mark_nodes(node, RES_FAILED,
+				   RF_NEEDSTART | RF_NEEDSTOP);
+
+			/* If we're an independent subtree, return a flag
+			   stating that this section is recoverable apart
+			   from siblings in the resource tree.  All child
+			   resources of this node must be restarted,
+			   but siblings of this node are not affected. */
+			if (node->rn_flags & RF_INDEPENDENT)
+				return FL_RECOVERABLE;
+
+			return FL_FAILURE;
+		}
+
 	}
 
 	if (node->rn_child) {
 		rv = _res_op_by_level(&node, me?NULL:first, ret, op);
-		if (rv != 0)
-			return rv;
+		if (rv != 0) {
+			mark_nodes(node, RES_FAILED,
+				   RF_NEEDSTART | RF_NEEDSTOP);
+
+			/* If this node is independent of its siblings,
+			   that one of its dependent children failed
+			   does not matter: its dependent children must
+			   also be independent of this node's siblings. */
+			if (node->rn_flags & RF_INDEPENDENT)
+				return FL_RECOVERABLE;
+
+			return FL_FAILURE;
+		}
 	}
 
 	/* Stop should occur after children have stopped */
@@ -1339,7 +1239,7 @@
 
 		if (rv != 0) {
 			node->rn_state = RES_FAILED;
-			return rv;
+			return FL_FAILURE;
 		}
 
 		if (node->rn_state != RES_STOPPED) {
@@ -1378,24 +1278,31 @@
 	char *type, void * __attribute__((unused))ret, int realop)
 {
   	resource_node_t *node;
- 	int count = 0, rv;
+ 	int count = 0, rv = 0;
  	
  	if (realop == RS_STOP) {
  		list_for_rev(tree, node, count) {
- 			rv = _res_op_internal(tree, first, type, ret, realop,
- 					      node);
- 			if (rv != 0) 
- 				return rv;
+ 			rv |= _res_op_internal(tree, first, type, ret, realop,
+ 					       node);
  		}
  	} else {
  		list_for(tree, node, count) {
- 			rv = _res_op_internal(tree, first, type, ret, realop,
- 					      node);
- 			if (rv != 0) 
+ 			rv |= _res_op_internal(tree, first, type, ret, realop,
+ 					       node);
+
+			/* If we hit a problem during a 'status' op in an
+			   independent subtree, rv will have the
+			   FL_RECOVERABLE bit set, but not FL_FAILURE.
+			   If we ever hit FL_FAILURE during a status
+			   operation, we're *DONE* - even if the subtree
+			   is flagged w/ indy-subtree */
+			  
+ 			if (rv & FL_FAILURE) 
  				return rv;
  		}
  	}
-	return 0;
+
+	return rv;
 }
 
 /**
@@ -1464,7 +1371,30 @@
 int
 res_status(resource_node_t **tree, resource_t *res, void *ret)
 {
-	return _res_op(tree, res, NULL, ret, RS_STATUS);
+	int rv;
+	rv = _res_op(tree, res, NULL, ret, RS_STATUS);
+
+	if (rv & FL_FAILURE)
+		return rv;
+
+	clulog(LOG_WARNING, "Some independent resources in %s:%s failed; "
+	       "Attempting inline recovery\n",
+	       res->r_rule->rr_type, res->r_attrs->ra_value);
+
+	rv = res_condstop(tree, res, ret);
+	if (rv & FL_FAILURE)
+		goto out_fail;
+	rv = res_condstart(tree, res, ret);
+	if (rv & FL_FAILURE)
+		goto out_fail;
+
+	clulog(LOG_NOTICE, "Inline recovery of %s:%s successful\n",
+	       res->r_rule->rr_type, res->r_attrs->ra_value);
+	return 0;
+out_fail:
+	clulog(LOG_WARNING, "Inline recovery of %s:%s failed\n",
+	       res->r_rule->rr_type, res->r_attrs->ra_value);
+	return 1;
 }
 
 
--- cluster/rgmanager/src/resources/script.sh	2006/08/18 15:26:23	1.8
+++ cluster/rgmanager/src/resources/script.sh	2007/05/31 18:58:46	1.8.2.1
@@ -115,5 +115,5 @@
 declare -i rv=$?
 if [ $rv -ne 0 ]; then
 	ocf_log err "script:$OCF_RESKEY_name: $1 of $OCF_RESKEY_file failed (returned $rv)"
-	return $OCF_ERR_GENERIC
+	exit $OCF_ERR_GENERIC
 fi




More information about the Cluster-devel mailing list