[Cluster-devel] cluster/fence agents/manual/Makefile fenced/fd ...
lhh at sourceware.org
lhh at sourceware.org
Mon Jan 22 23:15:55 UTC 2007
CVSROOT: /cvs/cluster
Module name: cluster
Changes by: lhh at sourceware.org 2007-01-22 23:15:54
Modified files:
fence/agents/manual: Makefile
fence/fenced : fd.h main.c recover.c
Log message:
Simple manual override for fenced & example replacement for fence_ack_manual
Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/fence/agents/manual/Makefile.diff?cvsroot=cluster&r1=1.7&r2=1.8
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/fence/fenced/fd.h.diff?cvsroot=cluster&r1=1.24&r2=1.25
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/fence/fenced/main.c.diff?cvsroot=cluster&r1=1.40&r2=1.41
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/fence/fenced/recover.c.diff?cvsroot=cluster&r1=1.25&r2=1.26
--- cluster/fence/agents/manual/Makefile 2006/08/11 15:18:08 1.7
+++ cluster/fence/agents/manual/Makefile 2007/01/22 23:15:54 1.8
@@ -36,6 +36,12 @@
fence_ack_manual: ack.o
${CC} -o $@ $^
+# Using manual override instead of the binary version.
+# (this version does not require any configuration params
+# in cluster.conf, but fencing must fail first)
+#fence_ack_manual: fence_ack_manual.sh
+# cp $@ $^
+
ack.o: ack.c
$(CC) $(CFLAGS) -c -o $@ $<
--- cluster/fence/fenced/fd.h 2006/10/09 10:52:50 1.24
+++ cluster/fence/fenced/fd.h 2007/01/22 23:15:54 1.25
@@ -53,6 +53,7 @@
#define DEFAULT_POST_JOIN_DELAY 6
#define DEFAULT_POST_FAIL_DELAY 0
#define DEFAULT_CLEAN_START 0
+#define DEFAULT_OVERRIDE_PATH "/var/run/cluster/fenced_override"
#define FENCED_SOCK_PATH "fenced_socket"
extern char *prog_name;
@@ -131,10 +132,12 @@
{
int post_join_delay;
int post_fail_delay;
+ char *override_path;
int8_t clean_start;
int8_t post_join_delay_opt;
int8_t post_fail_delay_opt;
int8_t clean_start_opt;
+ int8_t override_path_opt;
};
#define FDFL_RUN (0)
--- cluster/fence/fenced/main.c 2006/12/01 15:28:11 1.40
+++ cluster/fence/fenced/main.c 2007/01/22 23:15:54 1.41
@@ -15,7 +15,7 @@
#include "ccs.h"
#include "copyright.cf"
-#define OPTION_STRING ("cj:f:Dn:hVSw")
+#define OPTION_STRING ("cj:f:Dn:O:hVSw")
#define LOCKFILE_NAME "/var/run/fenced.pid"
struct client {
@@ -145,6 +145,23 @@
free(str);
}
+ if (comline.override_path_opt == FALSE) {
+ str = NULL;
+ memset(path, 0, 256);
+ sprintf(path, "/cluster/fence_daemon/@override_path");
+
+ error = ccs_get(cd, path, &str);
+ if (!error)
+ /* XXX These are not explicitly freed on exit; if
+ we decide to make fenced handle SIGHUP at a later
+ time, we will need to free this. */
+ comline.override_path = strdup(str);
+ else
+ comline.override_path = strdup(DEFAULT_OVERRIDE_PATH);
+ if (str)
+ free(str);
+ }
+
log_debug("delay post_join %ds post_fail %ds",
comline.post_join_delay, comline.post_fail_delay);
@@ -500,6 +517,8 @@
DEFAULT_POST_JOIN_DELAY);
printf(" -f <secs> Post-fail fencing delay (default %d)\n",
DEFAULT_POST_FAIL_DELAY);
+ printf(" -O <path> Override path (default %s)\n",
+ DEFAULT_OVERRIDE_PATH);
printf(" -D Enable debugging code and don't fork\n");
printf(" -h Print this help, then exit\n");
printf(" -V Print program version information, then exit\n");
@@ -547,6 +566,8 @@
int cont = TRUE;
int optchar;
+ comline->override_path_opt = FALSE;
+ comline->override_path = NULL;
comline->post_join_delay_opt = FALSE;
comline->post_fail_delay_opt = FALSE;
comline->clean_start_opt = FALSE;
@@ -571,6 +592,11 @@
comline->post_fail_delay_opt = TRUE;
break;
+ case 'O':
+ comline->override_path = strdup(optarg);
+ comline->override_path_opt = TRUE;
+ break;
+
case 'D':
daemon_debug_opt = TRUE;
break;
--- cluster/fence/fenced/recover.c 2006/10/09 10:52:50 1.25
+++ cluster/fence/fenced/recover.c 2007/01/22 23:15:54 1.26
@@ -13,6 +13,9 @@
#include "fd.h"
#include "ccs.h"
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/select.h>
extern int our_nodeid;
extern commandline_t comline;
@@ -213,6 +216,79 @@
return num_victims;
}
+static inline void close_override(int *fd, char *path)
+{
+ unlink(path);
+ if (fd && *fd >= 0)
+ close(*fd);
+ *fd = -1;
+}
+
+static int open_override(char *path)
+{
+ int ret;
+ mode_t om;
+
+ om = umask(077);
+ ret = mkfifo(path, (S_IRUSR | S_IWUSR));
+ umask(om);
+
+ if (ret < 0)
+ return -1;
+ return open(path, O_RDONLY | O_NONBLOCK);
+}
+
+static int check_override(int ofd, char *nodename, int timeout)
+{
+ char buf[128];
+ fd_set rfds;
+ struct timeval tv = {0, 0};
+ int ret, x;
+
+ if (ofd < 0 || !nodename || !strlen(nodename)) {
+ sleep(timeout);
+ return 0;
+ }
+
+ FD_ZERO(&rfds);
+ FD_SET(ofd, &rfds);
+ tv.tv_usec = 0;
+ tv.tv_sec = timeout;
+
+ ret = select(ofd + 1, &rfds, NULL, NULL, &tv);
+ if (ret < 0) {
+ syslog(LOG_ERR, "select: %s\n", strerror(errno));
+ return -1;
+ }
+
+ if (ret == 0)
+ return 0;
+
+ memset(buf, 0, sizeof(buf));
+ ret = read(ofd, buf, sizeof(buf) - 1);
+ if (ret < 0) {
+ syslog(LOG_ERR, "read: %s\n", strerror(errno));
+ return -1;
+ }
+
+ /* chop off control characters */
+ for (x = 0; x < ret; x++) {
+ if (buf[x] < 0x20) {
+ buf[x] = 0;
+ break;
+ }
+ }
+
+ if (!strcasecmp(nodename, buf)) {
+ /* Case insensitive, but not as nice as, say, name_equal
+ in the other file... */
+ return 1;
+ }
+
+ return 0;
+}
+
+
/* If there are victims after a node has joined, it's a good indication that
they may be joining the cluster shortly. If we delay a bit they might
become members and we can avoid fencing them. This is only really an issue
@@ -283,6 +359,7 @@
fd_node_t *node;
char *master_name;
int master, error, cd;
+ int override = -1;
master = find_master_nodeid(fd, &master_name);
@@ -319,7 +396,22 @@
list_del(&node->list);
free(node);
}
- sleep(5);
+
+ if (!comline.override_path) {
+ sleep(5);
+ continue;
+ }
+
+ /* Check for manual intervention */
+ override = open_override(comline.override_path);
+ if (check_override(override, node->name, 5) > 0) {
+ syslog(LOG_WARNING, "fence \"%s\" overridden by "
+ "administrator intervention", node->name);
+
+ list_del(&node->list);
+ free(node);
+ }
+ close_override(&override, comline.override_path);
}
ccs_disconnect(cd);
More information about the Cluster-devel
mailing list