[Cluster-devel] cluster/rgmanager ChangeLog include/reslist.h ...

lhh at sourceware.org lhh at sourceware.org
Thu May 31 19:08:15 UTC 2007


CVSROOT:	/cvs/cluster
Module name:	cluster
Changes by:	lhh at sourceware.org	2007-05-31 19:08:14

Modified files:
	rgmanager      : ChangeLog 
	rgmanager/include: reslist.h 
	rgmanager/src/daemons: groups.c resrules.c restree.c 
	rgmanager/src/resources: script.sh 

Log message:
	Fix 234249, 229650

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/ChangeLog.diff?cvsroot=cluster&r1=1.43&r2=1.44
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/reslist.h.diff?cvsroot=cluster&r1=1.19&r2=1.20
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/groups.c.diff?cvsroot=cluster&r1=1.32&r2=1.33
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/resrules.c.diff?cvsroot=cluster&r1=1.21&r2=1.22
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/restree.c.diff?cvsroot=cluster&r1=1.30&r2=1.31
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/script.sh.diff?cvsroot=cluster&r1=1.9&r2=1.10

--- cluster/rgmanager/ChangeLog	2007/05/22 17:01:08	1.43
+++ cluster/rgmanager/ChangeLog	2007/05/31 19:08:13	1.44
@@ -1,3 +1,9 @@
+2007-05-31 Lon Hohberger <lhh at redhat.com>
+	* src/daemons/resrules.c: Fix #234249 - ignore obvious backup files
+	in /usr/share/cluster when processing resource rules
+	* src/daemons/restree.c, src/daemons/groups.c, include/reslist.h: 
+	Implement independent subtrees, per bug #229650
+
 2007-05-22 Lon Hohberger <lhh at redhat.com>
 	* src/resources/SAPInstance, SAPDatabase: Add primary attrs
 
--- cluster/rgmanager/include/reslist.h	2007/03/22 23:46:58	1.19
+++ cluster/rgmanager/include/reslist.h	2007/05/31 19:08:14	1.20
@@ -35,6 +35,8 @@
 #define RF_NEEDSTART	(1<<2)	/** Used when adding/changing resources */
 #define RF_NEEDSTOP	(1<<3)  /** Used when deleting/changing resources */
 #define RF_COMMON	(1<<4)	/** " */
+#define RF_INDEPENDENT	(1<<5)  /** Define this for a resource if it is
+				  otherwise an independent subtree */
 
 #define RES_STOPPED	(0)
 #define RES_STARTED	(1)
--- cluster/rgmanager/src/daemons/groups.c	2007/04/27 18:10:10	1.32
+++ cluster/rgmanager/src/daemons/groups.c	2007/05/31 19:08:14	1.33
@@ -816,6 +816,7 @@
 	}
 	pthread_rwlock_unlock(&resource_lock);
 
+#if 0
 	/*
 	   Do NOT return error codes if we failed to stop for one of these
 	   reasons.  It didn't start, either, so it's safe to assume that
@@ -833,6 +834,7 @@
 			break;
 		}
 	}
+#endif
 
 	return ret;
 }
--- cluster/rgmanager/src/daemons/resrules.c	2007/04/04 19:22:29	1.21
+++ cluster/rgmanager/src/daemons/resrules.c	2007/05/31 19:08:14	1.22
@@ -1025,7 +1025,7 @@
 {
 	DIR *dir;
 	struct dirent *de;
-	char *fn;//, *dot;
+	char *fn, *dot;
 	char path[2048];
 	struct stat st_buf;
 
@@ -1040,10 +1040,23 @@
 		if (!fn)
 			continue;
 		
+		/* Ignore files with common backup extension */
 		if ((fn != NULL) && (strlen(fn) > 0) && 
 			(fn[strlen(fn)-1] == '~')) 
 			continue;
 
+ 		dot = strrchr(fn, '.');
+ 		if (dot) {
+ 			/* Ignore RPM installed save files, patches,
+ 			   diffs, etc. */
+ 			if (!strncasecmp(dot, ".rpm", 4)) {
+ 				fprintf(stderr, "Warning: "
+ 					"Ignoring %s/%s: Bad extension %s\n",
+ 					rpath, de->d_name, dot);
+ 				continue;
+ 			}
+ 		}
+
 		snprintf(path, sizeof(path), "%s/%s",
 			 rpath, de->d_name);
 		
@@ -1053,8 +1066,10 @@
 		if (S_ISDIR(st_buf.st_mode))
 			continue;
 		
-		if (st_buf.st_mode & (S_IXUSR|S_IXOTH|S_IXGRP))
-			load_resource_rulefile(path, rules);
+  		if (st_buf.st_mode & (S_IXUSR|S_IXOTH|S_IXGRP)) {
+  			printf("Loading resource rule from %s\n", path);
+   			load_resource_rulefile(path, rules);
+  		}
 	}
 	xmlCleanupParser();
 
--- cluster/rgmanager/src/daemons/restree.c	2007/05/03 15:15:17	1.30
+++ cluster/rgmanager/src/daemons/restree.c	2007/05/31 19:08:14	1.31
@@ -39,6 +39,9 @@
 void malloc_zap_mutex(void);
 #endif
 
+#define FL_FAILURE	0x1
+#define FL_RECOVERABLE	0x2
+
 
 /* XXX from resrules.c */
 int store_childtype(resource_child_t **childp, char *name, int start,
@@ -507,6 +510,19 @@
 	node->rn_resource = curres;
 	node->rn_state = RES_STOPPED;
 	node->rn_actions = (resource_act_t *)act_dup(curres->r_actions);
+
+	snprintf(tok, sizeof(tok), "%s/@__independent_subtree", base);
+#ifndef NO_CCS
+	if (ccs_get(ccsfd, tok, &ref) == 0) {
+#else
+	if (conf_get(tok, &ref) == 0) {
+#endif
+		if (atoi(ref) > 0 || strcasecmp(ref, "yes") == 0)
+			node->rn_flags |= RF_INDEPENDENT;
+		free(ref);
+	}
+
+
 	curres->r_refs++;
 
 	*newnode = node;
@@ -777,6 +793,8 @@
 				printf("NEEDSTART ");
 			if (node->rn_flags & RF_COMMON)
 				printf("COMMON ");
+			if (node->rn_flags & RF_INDEPENDENT)
+				printf("INDEPENDENT ");
 			printf("]");
 		}
 		printf(" {\n");
@@ -841,10 +859,11 @@
 #endif
 
 			/* Do op on all children at our level */
-			rv += _res_op(&node->rn_child, first,
+			rv |= _res_op(&node->rn_child, first,
 			     	     rule->rr_childtypes[x].rc_name, 
 		     		     ret, op);
-			if (rv != 0 && op != RS_STOP)
+
+			if (rv & FL_FAILURE && op != RS_STOP)
 				return rv;
 		}
 
@@ -856,46 +875,6 @@
 }
 
 
-#if 0
-static inline int
-_do_child_default_level(resource_node_t **tree, resource_t *first,
-			void *ret, int op)
-{
-	resource_node_t *node = *tree;
-	resource_t *res = node->rn_resource;
-	resource_rule_t *rule = res->r_rule;
-	int x, rv = 0, lev;
-
-	for (x = 0; rule->rr_childtypes &&
-	     rule->rr_childtypes[x].rc_name; x++) {
-
-		if(op == RS_STOP)
-			lev = rule->rr_childtypes[x].rc_stoplevel;
-		else
-			lev = rule->rr_childtypes[x].rc_startlevel;
-
-		if (lev)
-			continue;
-
-		/*
-		printf("%s children of %s type %s (default level)\n",
-		       agent_op_str(op),
-		       node->rn_resource->r_rule->rr_type,
-		       rule->rr_childtypes[x].rc_name);
-		 */
-
-		rv = _res_op(&node->rn_child, first,
-			     rule->rr_childtypes[x].rc_name, 
-			     ret, op);
-		if (rv != 0)
-			return rv;
-	}
-
-	return 0;
-}
-#endif
-
-
 static inline int
 _xx_child_internal(resource_node_t *node, resource_t *first,
 		   resource_node_t *child, void *ret, int op)
@@ -929,13 +908,14 @@
 
 	if (op == RS_START || op == RS_STATUS) {
 		list_for(&node->rn_child, child, y) {
-			rv = _xx_child_internal(node, first, child, ret, op);
-			if (rv)
+			rv |= _xx_child_internal(node, first, child, ret, op);
+
+			if (rv & FL_FAILURE)
 				return rv;
 		}
 	} else {
 		list_for_rev(&node->rn_child, child, y) {
-			rv += _xx_child_internal(node, first, child, ret, op);
+			rv |= _xx_child_internal(node, first, child, ret, op);
 		}
 	}
 
@@ -976,7 +956,7 @@
 
 	if (op == RS_START || op == RS_STATUS) {
 		rv =  _do_child_levels(tree, first, ret, op);
-	       	if (rv != 0)
+	       	if (rv & FL_FAILURE)
 			return rv;
 
 		/* Start default level after specified ones */
@@ -995,6 +975,22 @@
 }
 
 
+void
+mark_nodes(resource_node_t *node, int state, int flags)
+{
+	int x;
+	resource_node_t *child;
+
+	list_for(&node->rn_child, child, x) {
+		if (child->rn_child)
+			mark_nodes(child->rn_child, state, flags);
+	}
+
+	node->rn_state = state;
+	node->rn_flags |= (RF_NEEDSTART | RF_NEEDSTOP);
+}
+
+
 /**
    Do a status on a resource node.  This takes into account the last time the
    status operation was run and selects the highest possible resource depth
@@ -1223,7 +1219,7 @@
 		rv = res_exec(node, agent_op_str(op), NULL, 0);
 		if (rv != 0) {
 			node->rn_state = RES_FAILED;
-			return rv;
+			return FL_FAILURE;
 		}
 
 		set_time("start", 0, node);
@@ -1236,14 +1232,43 @@
 	} else if (me && (op == RS_STATUS)) {
 		/* Check status before children*/
 		rv = do_status(node);
-		if (rv != 0)
-			return rv;
+		if (rv != 0) {
+			/*
+			   If this node's status has failed, all of its
+			   dependent children are failed, whether or not this
+			   node is independent or not.
+			 */
+			mark_nodes(node, RES_FAILED,
+				   RF_NEEDSTART | RF_NEEDSTOP);
+
+			/* If we're an independent subtree, return a flag
+			   stating that this section is recoverable apart
+			   from siblings in the resource tree.  All child
+			   resources of this node must be restarted,
+			   but siblings of this node are not affected. */
+			if (node->rn_flags & RF_INDEPENDENT)
+				return FL_RECOVERABLE;
+
+			return FL_FAILURE;
+		}
+
 	}
 
 	if (node->rn_child) {
 		rv = _res_op_by_level(&node, me?NULL:first, ret, op);
-		if (rv != 0)
-			return rv;
+		if (rv != 0) {
+			mark_nodes(node, RES_FAILED,
+				   RF_NEEDSTART | RF_NEEDSTOP);
+
+			/* If this node is independent of its siblings,
+			   that one of its dependent children failed
+			   does not matter: its dependent children must
+			   also be independent of this node's siblings. */
+			if (node->rn_flags & RF_INDEPENDENT)
+				return FL_RECOVERABLE;
+
+			return FL_FAILURE;
+		}
 	}
 
 	/* Stop should occur after children have stopped */
@@ -1253,7 +1278,7 @@
 
 		if (rv != 0) {
 			node->rn_state = RES_FAILED;
-			return rv;
+			return FL_FAILURE;
 		}
 
 		if (node->rn_state != RES_STOPPED) {
@@ -1292,24 +1317,31 @@
 	char *type, void * __attribute__((unused))ret, int realop)
 {
   	resource_node_t *node;
- 	int count = 0, rv;
+ 	int count = 0, rv = 0;
  	
  	if (realop == RS_STOP) {
  		list_for_rev(tree, node, count) {
- 			rv = _res_op_internal(tree, first, type, ret, realop,
- 					      node);
- 			if (rv != 0) 
- 				return rv;
+ 			rv |= _res_op_internal(tree, first, type, ret, realop,
+ 					       node);
  		}
  	} else {
  		list_for(tree, node, count) {
- 			rv = _res_op_internal(tree, first, type, ret, realop,
- 					      node);
- 			if (rv != 0) 
+ 			rv |= _res_op_internal(tree, first, type, ret, realop,
+ 					       node);
+
+			/* If we hit a problem during a 'status' op in an
+			   independent subtree, rv will have the
+			   FL_RECOVERABLE bit set, but not FL_FAILURE.
+			   If we ever hit FL_FAILURE during a status
+			   operation, we're *DONE* - even if the subtree
+			   is flagged w/ indy-subtree */
+			  
+ 			if (rv & FL_FAILURE) 
  				return rv;
  		}
  	}
-	return 0;
+
+	return rv;
 }
 
 /**
@@ -1378,7 +1410,30 @@
 int
 res_status(resource_node_t **tree, resource_t *res, void *ret)
 {
-	return _res_op(tree, res, NULL, ret, RS_STATUS);
+	int rv;
+	rv = _res_op(tree, res, NULL, ret, RS_STATUS);
+
+	if (rv & FL_FAILURE)
+		return rv;
+
+	clulog(LOG_WARNING, "Some independent resources in %s:%s failed; "
+	       "Attempting inline recovery\n",
+	       res->r_rule->rr_type, res->r_attrs->ra_value);
+
+	rv = res_condstop(tree, res, ret);
+	if (rv & FL_FAILURE)
+		goto out_fail;
+	rv = res_condstart(tree, res, ret);
+	if (rv & FL_FAILURE)
+		goto out_fail;
+
+	clulog(LOG_NOTICE, "Inline recovery of %s:%s successful\n",
+	       res->r_rule->rr_type, res->r_attrs->ra_value);
+	return 0;
+out_fail:
+	clulog(LOG_WARNING, "Inline recovery of %s:%s failed\n",
+	       res->r_rule->rr_type, res->r_attrs->ra_value);
+	return 1;
 }
 
 
--- cluster/rgmanager/src/resources/script.sh	2007/04/05 15:08:20	1.9
+++ cluster/rgmanager/src/resources/script.sh	2007/05/31 19:08:14	1.10
@@ -118,5 +118,5 @@
 declare -i rv=$?
 if [ $rv -ne 0 ]; then
 	ocf_log err "script:$OCF_RESKEY_name: $1 of $OCF_RESKEY_file failed (returned $rv)"
-	return $OCF_ERR_GENERIC
+	exit $OCF_ERR_GENERIC
 fi




More information about the Cluster-devel mailing list