[Linux-cluster] [PATCH 0/4] fence

Bastian Blank bastian at waldi.eu.org
Fri Feb 18 12:23:31 UTC 2005


On Fri, Feb 18, 2005 at 10:07:27AM +0100, Bastian Blank wrote:
> > Or, I just thought of another method.  fence_tool's -w handling could
> > could read fenced's unix socket and wait until it sees "finish:".  See
> > fence_tool.c:do_monitor().  do_monitor("finish:") would return when it
> > sees a line matching "finish:".
> > We could also use this method to allow "fence_tool leave -w".
> Hmm, lets think about it.

This is the implementation. It looks a bit fancy as it uses several
callbacks to not duplicate code.

Bastian

-- 
Our missions are peaceful -- not for conquest.  When we do battle, it
is only because we have no choice.
		-- Kirk, "The Squire of Gothos", stardate 2124.5
-------------- next part --------------
diff -urN -x CVS -x debian cvs-patch05-event/fenced/main.c cvs-patch06-wait/fenced/main.c
--- cvs-patch05-event/fenced/main.c	2005-02-18 11:13:48.000000000 +0100
+++ cvs-patch06-wait/fenced/main.c	2005-02-18 12:28:59.000000000 +0100
@@ -23,7 +23,7 @@
 char our_name[MAX_CLUSTER_MEMBER_NAME_LEN+1];
 
 
-#define OPTION_STRING			("cj:f:Dn:hVSwQ")
+#define OPTION_STRING			("cj:f:Dn:hVSw:Q")
 #define LOCKFILE_NAME			"/var/run/fenced.pid"
 
 
diff -urN -x CVS -x debian cvs-patch05-event/fence_tool/fence_tool.c cvs-patch06-wait/fence_tool/fence_tool.c
--- cvs-patch05-event/fence_tool/fence_tool.c	2005-02-17 18:39:30.000000000 +0100
+++ cvs-patch06-wait/fence_tool/fence_tool.c	2005-02-18 13:20:36.000000000 +0100
@@ -11,7 +11,8 @@
 *******************************************************************************
 ******************************************************************************/
 
-#include <unistd.h>
+#include <ctype.h>
+#include <errno.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <stddef.h>
@@ -21,12 +22,15 @@
 #include <stdbool.h>
 #include <stdint.h>
 #include <sys/ioctl.h>
-#include <sys/types.h>
-#include <sys/wait.h>
+#include <sys/poll.h>
 #include <sys/socket.h>
+#include <sys/time.h>
+#include <sys/types.h>
 #include <sys/un.h>
+#include <sys/wait.h>
+#include <time.h>
+#include <unistd.h>
 #include <fcntl.h>
-#include <errno.h>
 #include <mntent.h>
 #include <libgen.h>
 
@@ -34,7 +38,7 @@
 #include "ccs.h"
 #include "copyright.cf"
 
-#define OPTION_STRING			("VhScj:f:DwQ")
+#define OPTION_STRING			("VhScj:f:Dw:Q")
 #define LOCKFILE_NAME                   "/var/run/fenced.pid"
 #define FENCED_SOCK_PATH                "fenced_socket"
 
@@ -47,7 +51,7 @@
 bool debug = false;
 int operation;
 bool skip_unfence = false;
-bool child_wait = false;
+int event_wait_timeout = 0;
 bool wait_for_quorum = true;
 int cl_sock;
 char our_name[MAX_CLUSTER_MEMBER_NAME_LEN+1];
@@ -215,6 +219,136 @@
 }
 
 /*
+ * Callback types for fenced_socket.
+ * fenced_socket_receive_callback:
+ *   Called for each received event.
+ *   Return:
+ *     true: Break of the loop.
+ *     false: Go further.
+ * fenced_socket_setup_callback:
+ *   Called after the socket setup.
+ *   Return:
+ *     true: Go further.
+ *     false: Signal an error to the caller.
+ */
+typedef bool fenced_socket_receive_callback(const char *buf, void *user_data);
+typedef bool fenced_socket_setup_callback(int fd, void *user_data);
+
+enum fenced_socket_ret {
+	FENCED_SOCKET_ERROR,
+	FENCED_SOCKET_FINISH,
+	FENCED_SOCKET_TIMEOUT,
+	FENCED_SOCKET_SHUTDOWN,
+};
+
+static enum fenced_socket_ret fenced_socket(int timeout, fenced_socket_receive_callback receive_callback, void *receive_user_data, fenced_socket_setup_callback setup_callback, void *setup_user_data)
+{
+	int sfd, error, rv;
+	struct sockaddr_un addr;
+	socklen_t addrlen;
+	char buf[256];
+	struct timeval act, end;
+	struct pollfd fds[] = {
+		{ -1, POLLIN, 0 },
+	};
+	enum fenced_socket_ret ret = FENCED_SOCKET_ERROR;
+
+	sfd = socket(AF_LOCAL, SOCK_DGRAM, 0);
+	if (sfd < 0)
+		die("cannot create local socket");
+
+	fds[0].fd = sfd;
+
+	fcntl(sfd, F_SETFD, FD_CLOEXEC);
+	fcntl(sfd, F_SETFL, O_NONBLOCK);
+
+	memset(&addr, 0, sizeof(addr));
+	addr.sun_family = AF_LOCAL;
+	strcpy(&addr.sun_path[1], FENCED_SOCK_PATH);
+	addrlen = sizeof(sa_family_t) + strlen(addr.sun_path+1) + 1;
+
+	error = bind(sfd, (struct sockaddr *) &addr, addrlen);
+	if (error < 0)
+		die("cannot bind to local socket");
+
+	if (setup_callback)
+		if (!setup_callback(sfd, setup_user_data))
+			goto out;
+
+	if (timeout > 0) {
+		gettimeofday(&end, NULL);
+		end.tv_sec += timeout;
+	}
+
+	while (1) {
+		int t = -1;
+		if (timeout > 0) {
+			gettimeofday(&act, NULL);
+			t = (end.tv_sec - act.tv_sec) * 1000;
+			if (t < 0) {
+				ret = FENCED_SOCKET_TIMEOUT;
+				break;
+			}
+		}
+		rv = poll(fds, 1, t);
+		if (rv < 0)
+			die("poll failed");
+		else if (rv == 0) {
+			ret = FENCED_SOCKET_TIMEOUT;
+			break;
+		}
+		rv = recvfrom(sfd, buf, sizeof(buf) - 1, 0, (struct sockaddr *)&addr, &addrlen);
+		if (rv < 0)
+			die("recv failed");
+		else if (rv == 0) {
+			ret = FENCED_SOCKET_SHUTDOWN;
+			break;
+		}
+		buf[rv] = 0;
+
+		if (receive_callback(buf, receive_user_data)) {
+			ret = FENCED_SOCKET_FINISH;
+			break;
+		}
+	}
+
+out:
+	close(sfd);
+	return ret;
+}
+
+/*
+ * Checks if the received event matches the given one.
+ */
+static fenced_socket_receive_callback event_wait_callback;
+static bool event_wait_callback(const char *buf, void *user_data)
+{
+	const char *event = user_data;
+	while (*buf && isdigit(*buf)) buf++;
+	if (*buf++ != ' ')
+		return false;
+	if (strncmp(buf, "event:", strlen("event:")) == 0)
+		if (strncmp(buf + strlen("event:") + 1, event, strlen (event)) == 0)
+			return true;
+	return false;
+}
+
+/*
+ * Wrapper for fenced_socket, produces correct return values for main.
+ */
+static int event_wait(char *event, fenced_socket_setup_callback setup_callback, void *setup_user_data)
+{
+	enum fenced_socket_ret ret = fenced_socket(event_wait_timeout, event_wait_callback, event, setup_callback, setup_user_data);
+	switch (ret)
+	{
+	case FENCED_SOCKET_FINISH:
+		return EXIT_SUCCESS;
+	default:
+		return EXIT_FAILURE;
+	}
+}
+
+/*
  * This is a really lousy way of waiting, which is why I took so long to add
  * it.  I guess it's better than nothing for a lot of people.  The state may
  * not be "run" if we've joined but other nodes are joining/leaving.
@@ -254,8 +388,45 @@
 	return EXIT_SUCCESS;
 }
 
+struct do_join_callback_data
+{
+	int argc;
+	char **argv;
+};
+
+static void do_join_real(struct do_join_callback_data *data)
+{
+	strcpy(data->argv[0], "fenced");
+	data->argv[data->argc - 1] = NULL;
+
+	execvp("fenced", data->argv);
+	die("starting fenced failed");
+}
+
+static fenced_socket_setup_callback do_join_callback;
+static bool do_join_callback(int fd, void *user_data)
+{
+	struct do_join_callback_data *data = user_data;
+
+	pid_t pid = fork();
+	/* parent waits for fenced to join */
+	if (pid > 0) {
+		int status;
+		waitpid(pid, &status, 0);
+		if (WIFEXITED(status) && !WEXITSTATUS(status))
+			return true;
+		return false;
+	}
+
+	do_join_real(data);
+	return false;
+}
+
 static int do_join(int argc, char *argv[])
 {
+	struct do_join_callback_data data = {
+		argc, argv
+	};
 	int cd;
 
 	setup_sock();
@@ -280,33 +451,36 @@
 	if (debug)
 		printf("%s: start fenced\n", prog_name);
 
-	if (!debug && child_wait) {
-		int status;
-		pid_t pid = fork();
-		/* parent waits for fenced to join */
-		if (pid > 0) {
-			waitpid(pid, &status, 0);
-			if (WIFEXITED(status) && !WEXITSTATUS(status))
-				do_wait();
-			exit(EXIT_SUCCESS);
-		}
-		/* child execs fenced */
-	}
+	if (!debug && event_wait_timeout)
+		return event_wait("join:finish", do_join_callback, &data);
 
-	strcpy(argv[0], "fenced");
-	argv[argc - 1] = NULL;
-
-	execvp("fenced", argv);
-	die("starting fenced failed");
+	do_join_real(&data);
 
 	return EXIT_FAILURE;
 }
 
+struct do_leave_callback_data
+{
+	pid_t pid;
+};
+
+static bool do_leave_real(struct do_leave_callback_data *data)
+{
+	return kill(data->pid, SIGTERM) == 0;
+}
+
+static fenced_socket_setup_callback do_leave_callback;
+static bool do_leave_callback(int fd, void *user_data)
+{
+	struct do_leave_callback_data *data = user_data;
+	return do_leave_real(data);
+}
+
 static int do_leave(void)
 {
 	FILE *f;
 	char buf[33] = "";
-	int pid = 0;
+	struct do_leave_callback_data data = { 0 };
 
 	lockfile();
 
@@ -316,7 +490,7 @@
 	if (!f)
 		die("fenced not running - no file %s", LOCKFILE_NAME);
 	fgets(buf, 33, f);
-	sscanf(buf, "%d", &pid);
+	sscanf(buf, "%d", &data.pid);
 	fclose(f);
 
 	check_mounted();
@@ -327,41 +501,28 @@
 
 	close(cl_sock);
 
-	kill(pid, SIGTERM);
+	if (event_wait_timeout)
+		return event_wait("unknown:leavedone", do_leave_callback, &data);
+	return do_leave_real(&data);
+}
 
-	return EXIT_SUCCESS;
+static fenced_socket_receive_callback do_monitor_callback;
+static bool do_monitor_callback(const char *buf, void *user_data)
+{
+	fputs(buf, stdout);
+	return false;
 }
 
 static int do_monitor(void)
 {
-	int sfd, error, rv;
-	struct sockaddr_un addr;
-	socklen_t addrlen;
-	char buf[256];
-
-	sfd = socket(AF_LOCAL, SOCK_DGRAM, 0);
-	if (sfd < 0)
-		die("cannot create local socket");
-
-	memset(&addr, 0, sizeof(addr));
-	addr.sun_family = AF_LOCAL;
-	strcpy(&addr.sun_path[1], FENCED_SOCK_PATH);
-	addrlen = sizeof(sa_family_t) + strlen(addr.sun_path+1) + 1;
-
-	error = bind(sfd, (struct sockaddr *) &addr, addrlen);
-	if (error < 0)
-		die("cannot bind to local socket");
-
-	for (;;) {
-		memset(buf, 0, 256);
-
-		rv = recvfrom(sfd, buf, 256, 0, (struct sockaddr *)&addr,
-			      &addrlen);
-
-		printf("%s", buf);
+	enum fenced_socket_ret ret = fenced_socket(-1, do_monitor_callback, 0, 0, 0);
+	switch (ret)
+	{
+	case FENCED_SOCKET_SHUTDOWN:
+		return EXIT_SUCCESS;
+	default:
+		return EXIT_FAILURE;
 	}
-
-	return EXIT_SUCCESS;
 }
 
 static void print_usage(void)
@@ -376,7 +537,7 @@
 	printf("  wait             Wait for node to be member of default fence domain\n");
 	printf("\n");
 	printf("Options:\n");
-	printf("  -w               Wait for join to complete\n");
+	printf("  -w <secs>        Wait for join or leave to complete\n");
 	printf("  -V               Print program version information, then exit\n");
 	printf("  -h               Print this help, then exit\n");
 	printf("  -S               Skip self unfencing on join\n");
@@ -422,7 +583,7 @@
 			break;
 
 		case 'w':
-			child_wait = true;
+			event_wait_timeout = atoi(optarg);
 			break;
 
 		case 'Q':
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 197 bytes
Desc: Digital signature
URL: <http://listman.redhat.com/archives/linux-cluster/attachments/20050218/caed4227/attachment.sig>


More information about the Linux-cluster mailing list