[Linux-cluster] [PATCH 4/4] fence - Wait for the join complete event in fenced

Bastian Blank bastian at waldi.eu.org
Thu Feb 17 18:35:54 UTC 2005


This patch changes the -w option to wait for the join finish event in
fenced. It uses a pipe and wait for its hangup event. It currently lacks
a timeout to make it usable in init scripts without a startup stale.

Bastian

-- 
Wait!  You have not been prepared!
		-- Mr. Atoz, "Tomorrow is Yesterday", stardate 3113.2
-------------- next part --------------
diff -urN -x CVS -x debian cvs-patch03-quorum/fenced/fd.h cvs-patch04-wait/fenced/fd.h
--- cvs-patch03-quorum/fenced/fd.h	2005-02-17 18:43:36.000000000 +0100
+++ cvs-patch04-wait/fenced/fd.h	2005-02-17 19:05:48.000000000 +0100
@@ -136,6 +136,7 @@
 	bool post_join_delay_opt;
 	bool post_fail_delay_opt;
 	bool clean_start_opt;
+	bool wait_opt;
 };
 
 #define FDFL_RUN        (0)
@@ -145,6 +146,7 @@
 struct fd {
 	struct commandline	*comline;
 	int			cl_sock;
+	int			wait_fds[2];
 	uint32_t 		our_nodeid;
 	uint32_t 		local_id;	/* local unique fd ID */
 	uint32_t 		global_id;	/* global unique fd ID */
@@ -152,6 +154,7 @@
 	int 			last_stop;
 	int 			last_start;
 	int 			last_finish;
+	int 			last_start_type;
 
 	bool			first_recovery;
 	int 			prev_count;
diff -urN -x CVS -x debian cvs-patch03-quorum/fenced/main.c cvs-patch04-wait/fenced/main.c
--- cvs-patch03-quorum/fenced/main.c	2005-02-17 18:34:17.000000000 +0100
+++ cvs-patch04-wait/fenced/main.c	2005-02-17 19:05:04.000000000 +0100
@@ -15,6 +15,7 @@
 #include "ccs.h"
 #include "copyright.cf"
 
+#include <sys/poll.h>
 
 /* static pthread_t recv_thread; */
 static int quit;
@@ -239,6 +240,7 @@
 
 	if (ev->type == SERVICE_EVENT_START) {
 		fd->last_start = ev->event_id;
+		fd->last_start_type = ev->start_type;
 
 		/* space for two extra to be sure it's not too small */
 		n = ev->node_count + 2;
@@ -276,6 +278,11 @@
 	else if (ev->type == SERVICE_EVENT_FINISH) {
 		fd->last_finish = ev->event_id;
 		do_recovery_done(fd);
+		/* Report successfull join to parent */
+		if (fd->wait_fds[1] != -1 && fd->last_start_type == SERVICE_START_JOIN) {
+			close(fd->wait_fds[1]);
+			fd->wait_fds[1] = -1;
+		}
 	}
 }
 
@@ -518,6 +525,11 @@
 	INIT_LIST_HEAD(&fd->leaving);
 	INIT_LIST_HEAD(&fd->complete);
 
+	if (!comline->wait_opt)
+		fd->wait_fds[0] = fd->wait_fds[1] = -1;
+	else if (pipe(fd->wait_fds) == -1)
+		die("can't allocate pipe");
+
 	return fd;
 }
 
@@ -559,6 +571,10 @@
 			strncpy(comline->name, optarg, MAX_NAME_LEN);
 			break;
 
+		case 'w':
+			comline->wait_opt = true;
+			break;
+
 		case 'h':
 			print_usage();
 			exit(EXIT_SUCCESS);
@@ -572,7 +588,6 @@
 			break;
 
 		case 'S':
-		case 'w':
 		case 'Q':
 			/* do nothing, this is a fence_tool option that
 			   we ignore when fence_tool starts us */
@@ -639,8 +654,22 @@
 			perror("main: cannot fork");
 			exit(EXIT_FAILURE);
 		}
-		if (pid)
-			exit(EXIT_SUCCESS);
+		else if (pid)
+		{
+			if (comline.wait_opt)
+			{
+				close(fd->wait_fds[1]);
+				struct pollfd fds[] = {
+					{ fd->wait_fds[0], POLLIN, 0 }
+				};
+				int ret = poll(fds, 1, -1);
+				if (ret < 0)
+					return EXIT_FAILURE;
+			}
+			return EXIT_SUCCESS;
+		}
+		if (comline.wait_opt)
+			close(fd->wait_fds[0]);
 		setsid();
 		chdir("/");
 		umask(0);
diff -urN -x CVS -x debian cvs-patch03-quorum/fence_tool/fence_tool.c cvs-patch04-wait/fence_tool/fence_tool.c
--- cvs-patch03-quorum/fence_tool/fence_tool.c	2005-02-17 18:39:30.000000000 +0100
+++ cvs-patch04-wait/fence_tool/fence_tool.c	2005-02-17 18:41:39.000000000 +0100
@@ -47,7 +47,6 @@
 bool debug = false;
 int operation;
 bool skip_unfence = false;
-bool child_wait = false;
 bool wait_for_quorum = true;
 int cl_sock;
 char our_name[MAX_CLUSTER_MEMBER_NAME_LEN+1];
@@ -280,19 +279,6 @@
 	if (debug)
 		printf("%s: start fenced\n", prog_name);
 
-	if (!debug && child_wait) {
-		int status;
-		pid_t pid = fork();
-		/* parent waits for fenced to join */
-		if (pid > 0) {
-			waitpid(pid, &status, 0);
-			if (WIFEXITED(status) && !WEXITSTATUS(status))
-				do_wait();
-			exit(EXIT_SUCCESS);
-		}
-		/* child execs fenced */
-	}
-
 	strcpy(argv[0], "fenced");
 	argv[argc - 1] = NULL;
 
@@ -376,7 +362,6 @@
 	printf("  wait             Wait for node to be member of default fence domain\n");
 	printf("\n");
 	printf("Options:\n");
-	printf("  -w               Wait for join to complete\n");
 	printf("  -V               Print program version information, then exit\n");
 	printf("  -h               Print this help, then exit\n");
 	printf("  -S               Skip self unfencing on join\n");
@@ -388,6 +373,7 @@
 	printf("  -c               All nodes are in a clean state to start\n");
 	printf("  -j <secs>        Post-join fencing delay\n");
 	printf("  -f <secs>        Post-fail fencing delay\n");
+	printf("  -w               Wait for join to complete\n");
 	printf("\n");
 }
 
@@ -421,10 +407,6 @@
 			debug = true;
 			break;
 
-		case 'w':
-			child_wait = true;
-			break;
-
 		case 'Q':
 			wait_for_quorum = false;
 			break;
@@ -440,8 +422,9 @@
 			break;
 
 		case 'c':
-		case 'j':
 		case 'f':
+		case 'j':
+		case 'w':
 			/* Do nothing, just pass these options on to fenced */
 			break;
 
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 197 bytes
Desc: Digital signature
URL: <http://listman.redhat.com/archives/linux-cluster/attachments/20050217/94f9479d/attachment.sig>


More information about the Linux-cluster mailing list