[Cluster-devel] cluster/fence agents/manual/Makefile fenced/fd ...

lhh at sourceware.org lhh at sourceware.org
Mon Jan 22 23:15:55 UTC 2007


CVSROOT:	/cvs/cluster
Module name:	cluster
Changes by:	lhh at sourceware.org	2007-01-22 23:15:54

Modified files:
	fence/agents/manual: Makefile 
	fence/fenced   : fd.h main.c recover.c 

Log message:
	Simple manual override for fenced & example replacement for fence_ack_manual

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/fence/agents/manual/Makefile.diff?cvsroot=cluster&r1=1.7&r2=1.8
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/fence/fenced/fd.h.diff?cvsroot=cluster&r1=1.24&r2=1.25
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/fence/fenced/main.c.diff?cvsroot=cluster&r1=1.40&r2=1.41
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/fence/fenced/recover.c.diff?cvsroot=cluster&r1=1.25&r2=1.26

--- cluster/fence/agents/manual/Makefile	2006/08/11 15:18:08	1.7
+++ cluster/fence/agents/manual/Makefile	2007/01/22 23:15:54	1.8
@@ -36,6 +36,12 @@
 fence_ack_manual: ack.o
 	${CC} -o $@ $^
 
+# Using manual override instead of the binary version.
+# (this version does not require any configuration params
+#  in cluster.conf, but fencing must fail first)
+#fence_ack_manual: fence_ack_manual.sh
+#	cp $@ $^
+
 ack.o: ack.c
 	$(CC) $(CFLAGS) -c -o $@ $<
 
--- cluster/fence/fenced/fd.h	2006/10/09 10:52:50	1.24
+++ cluster/fence/fenced/fd.h	2007/01/22 23:15:54	1.25
@@ -53,6 +53,7 @@
 #define DEFAULT_POST_JOIN_DELAY	6
 #define DEFAULT_POST_FAIL_DELAY	0
 #define DEFAULT_CLEAN_START	0
+#define DEFAULT_OVERRIDE_PATH	"/var/run/cluster/fenced_override"
 #define FENCED_SOCK_PATH	"fenced_socket"
 
 extern char			*prog_name;
@@ -131,10 +132,12 @@
 {
 	int post_join_delay;
 	int post_fail_delay;
+	char *override_path;
 	int8_t clean_start;
 	int8_t post_join_delay_opt;
 	int8_t post_fail_delay_opt;
 	int8_t clean_start_opt;
+	int8_t override_path_opt;
 };
 
 #define FDFL_RUN        (0)
--- cluster/fence/fenced/main.c	2006/12/01 15:28:11	1.40
+++ cluster/fence/fenced/main.c	2007/01/22 23:15:54	1.41
@@ -15,7 +15,7 @@
 #include "ccs.h"
 #include "copyright.cf"
 
-#define OPTION_STRING			("cj:f:Dn:hVSw")
+#define OPTION_STRING			("cj:f:Dn:O:hVSw")
 #define LOCKFILE_NAME			"/var/run/fenced.pid"
 
 struct client {
@@ -145,6 +145,23 @@
 			free(str);
 	}
 
+	if (comline.override_path_opt == FALSE) {
+		str = NULL;
+		memset(path, 0, 256);
+		sprintf(path, "/cluster/fence_daemon/@override_path");
+
+		error = ccs_get(cd, path, &str);
+		if (!error)
+			/* XXX These are not explicitly freed on exit; if
+			   we decide to make fenced handle SIGHUP at a later
+			   time, we will need to free this. */
+			comline.override_path = strdup(str);
+		else
+			comline.override_path = strdup(DEFAULT_OVERRIDE_PATH);
+		if (str)
+			free(str);
+	}
+
 	log_debug("delay post_join %ds post_fail %ds",
 		  comline.post_join_delay, comline.post_fail_delay);
 
@@ -500,6 +517,8 @@
 				   DEFAULT_POST_JOIN_DELAY);
 	printf("  -f <secs>	Post-fail fencing delay (default %d)\n",
 				   DEFAULT_POST_FAIL_DELAY);
+	printf("  -O <path>    Override path (default %s)\n",
+	       			   DEFAULT_OVERRIDE_PATH);
 	printf("  -D	       Enable debugging code and don't fork\n");
 	printf("  -h	       Print this help, then exit\n");
 	printf("  -V	       Print program version information, then exit\n");
@@ -547,6 +566,8 @@
 	int cont = TRUE;
 	int optchar;
 
+	comline->override_path_opt = FALSE;
+	comline->override_path = NULL;
 	comline->post_join_delay_opt = FALSE;
 	comline->post_fail_delay_opt = FALSE;
 	comline->clean_start_opt = FALSE;
@@ -571,6 +592,11 @@
 			comline->post_fail_delay_opt = TRUE;
 			break;
 
+		case 'O':
+			comline->override_path = strdup(optarg);
+			comline->override_path_opt = TRUE;
+			break;
+
 		case 'D':
 			daemon_debug_opt = TRUE;
 			break;
--- cluster/fence/fenced/recover.c	2006/10/09 10:52:50	1.25
+++ cluster/fence/fenced/recover.c	2007/01/22 23:15:54	1.26
@@ -13,6 +13,9 @@
 
 #include "fd.h"
 #include "ccs.h"
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/select.h>
 
 extern int our_nodeid;
 extern commandline_t comline;
@@ -213,6 +216,79 @@
 	return num_victims;
 }
 
+static inline void close_override(int *fd, char *path)
+{
+	unlink(path);
+	if (fd && *fd >= 0)
+		close(*fd);
+	*fd = -1;
+}
+
+static int open_override(char *path)
+{
+	int ret;
+	mode_t om;
+
+	om = umask(077);
+	ret = mkfifo(path, (S_IRUSR | S_IWUSR));
+	umask(om);
+
+	if (ret < 0)
+		return -1;
+        return open(path, O_RDONLY | O_NONBLOCK);
+}
+
+static int check_override(int ofd, char *nodename, int timeout)
+{
+	char buf[128];
+	fd_set rfds;
+	struct timeval tv = {0, 0};
+	int ret, x;
+
+	if (ofd < 0 || !nodename || !strlen(nodename)) {
+		sleep(timeout);
+		return 0;
+	}
+
+	FD_ZERO(&rfds);
+	FD_SET(ofd, &rfds);
+	tv.tv_usec = 0;
+	tv.tv_sec = timeout;
+
+	ret = select(ofd + 1, &rfds, NULL, NULL, &tv);
+	if (ret < 0) {
+		syslog(LOG_ERR, "select: %s\n", strerror(errno));
+		return -1;
+	}
+
+	if (ret == 0)
+		return 0;
+
+	memset(buf, 0, sizeof(buf));
+	ret = read(ofd, buf, sizeof(buf) - 1);
+	if (ret < 0) {
+		syslog(LOG_ERR, "read: %s\n", strerror(errno));
+		return -1;
+	}
+
+	/* chop off control characters */
+	for (x = 0; x < ret; x++) {
+		if (buf[x] < 0x20) {
+			buf[x] = 0;
+			break;
+		}
+	}
+
+	if (!strcasecmp(nodename, buf)) {
+		/* Case insensitive, but not as nice as, say, name_equal
+		   in the other file... */
+		return 1;
+	}
+
+	return 0;
+}
+
+
 /* If there are victims after a node has joined, it's a good indication that
    they may be joining the cluster shortly.  If we delay a bit they might
    become members and we can avoid fencing them.  This is only really an issue
@@ -283,6 +359,7 @@
 	fd_node_t *node;
 	char *master_name;
 	int master, error, cd;
+	int override = -1;
 
 	master = find_master_nodeid(fd, &master_name);
 
@@ -319,7 +396,22 @@
 			list_del(&node->list);
 			free(node);
 		}
-		sleep(5);
+
+		if (!comline.override_path) {
+			sleep(5);
+			continue;
+		}
+
+		/* Check for manual intervention */
+		override = open_override(comline.override_path);
+		if (check_override(override, node->name, 5) > 0) {
+			syslog(LOG_WARNING, "fence \"%s\" overridden by "
+			       "administrator intervention", node->name);
+
+			list_del(&node->list);
+			free(node);
+		}
+		close_override(&override, comline.override_path);
 	}
 
 	ccs_disconnect(cd);




More information about the Cluster-devel mailing list