[Linux-cluster] [PATCH 0/4] fence
Bastian Blank
bastian at waldi.eu.org
Fri Feb 18 12:23:31 UTC 2005
On Fri, Feb 18, 2005 at 10:07:27AM +0100, Bastian Blank wrote:
> > Or, I just thought of another method. fence_tool's -w handling could
> > could read fenced's unix socket and wait until it sees "finish:". See
> > fence_tool.c:do_monitor(). do_monitor("finish:") would return when it
> > sees a line matching "finish:".
> > We could also use this method to allow "fence_tool leave -w".
> Hmm, lets think about it.
This is the implementation. It looks a bit fancy as it uses several
callbacks to not duplicate code.
Bastian
--
Our missions are peaceful -- not for conquest. When we do battle, it
is only because we have no choice.
-- Kirk, "The Squire of Gothos", stardate 2124.5
-------------- next part --------------
diff -urN -x CVS -x debian cvs-patch05-event/fenced/main.c cvs-patch06-wait/fenced/main.c
--- cvs-patch05-event/fenced/main.c 2005-02-18 11:13:48.000000000 +0100
+++ cvs-patch06-wait/fenced/main.c 2005-02-18 12:28:59.000000000 +0100
@@ -23,7 +23,7 @@
char our_name[MAX_CLUSTER_MEMBER_NAME_LEN+1];
-#define OPTION_STRING ("cj:f:Dn:hVSwQ")
+#define OPTION_STRING ("cj:f:Dn:hVSw:Q")
#define LOCKFILE_NAME "/var/run/fenced.pid"
diff -urN -x CVS -x debian cvs-patch05-event/fence_tool/fence_tool.c cvs-patch06-wait/fence_tool/fence_tool.c
--- cvs-patch05-event/fence_tool/fence_tool.c 2005-02-17 18:39:30.000000000 +0100
+++ cvs-patch06-wait/fence_tool/fence_tool.c 2005-02-18 13:20:36.000000000 +0100
@@ -11,7 +11,8 @@
*******************************************************************************
******************************************************************************/
-#include <unistd.h>
+#include <ctype.h>
+#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
@@ -21,12 +22,15 @@
#include <stdbool.h>
#include <stdint.h>
#include <sys/ioctl.h>
-#include <sys/types.h>
-#include <sys/wait.h>
+#include <sys/poll.h>
#include <sys/socket.h>
+#include <sys/time.h>
+#include <sys/types.h>
#include <sys/un.h>
+#include <sys/wait.h>
+#include <time.h>
+#include <unistd.h>
#include <fcntl.h>
-#include <errno.h>
#include <mntent.h>
#include <libgen.h>
@@ -34,7 +38,7 @@
#include "ccs.h"
#include "copyright.cf"
-#define OPTION_STRING ("VhScj:f:DwQ")
+#define OPTION_STRING ("VhScj:f:Dw:Q")
#define LOCKFILE_NAME "/var/run/fenced.pid"
#define FENCED_SOCK_PATH "fenced_socket"
@@ -47,7 +51,7 @@
bool debug = false;
int operation;
bool skip_unfence = false;
-bool child_wait = false;
+int event_wait_timeout = 0;
bool wait_for_quorum = true;
int cl_sock;
char our_name[MAX_CLUSTER_MEMBER_NAME_LEN+1];
@@ -215,6 +219,136 @@
}
/*
+ * Callback types for fenced_socket.
+ * fenced_socket_receive_callback:
+ * Called for each received event.
+ * Return:
+ * true: Break of the loop.
+ * false: Go further.
+ * fenced_socket_setup_callback:
+ * Called after the socket setup.
+ * Return:
+ * true: Go further.
+ * false: Signal an error to the caller.
+ */
+typedef bool fenced_socket_receive_callback(const char *buf, void *user_data);
+typedef bool fenced_socket_setup_callback(int fd, void *user_data);
+
+enum fenced_socket_ret {
+ FENCED_SOCKET_ERROR,
+ FENCED_SOCKET_FINISH,
+ FENCED_SOCKET_TIMEOUT,
+ FENCED_SOCKET_SHUTDOWN,
+};
+
+static enum fenced_socket_ret fenced_socket(int timeout, fenced_socket_receive_callback receive_callback, void *receive_user_data, fenced_socket_setup_callback setup_callback, void *setup_user_data)
+{
+ int sfd, error, rv;
+ struct sockaddr_un addr;
+ socklen_t addrlen;
+ char buf[256];
+ struct timeval act, end;
+ struct pollfd fds[] = {
+ { -1, POLLIN, 0 },
+ };
+ enum fenced_socket_ret ret = FENCED_SOCKET_ERROR;
+
+ sfd = socket(AF_LOCAL, SOCK_DGRAM, 0);
+ if (sfd < 0)
+ die("cannot create local socket");
+
+ fds[0].fd = sfd;
+
+ fcntl(sfd, F_SETFD, FD_CLOEXEC);
+ fcntl(sfd, F_SETFL, O_NONBLOCK);
+
+ memset(&addr, 0, sizeof(addr));
+ addr.sun_family = AF_LOCAL;
+ strcpy(&addr.sun_path[1], FENCED_SOCK_PATH);
+ addrlen = sizeof(sa_family_t) + strlen(addr.sun_path+1) + 1;
+
+ error = bind(sfd, (struct sockaddr *) &addr, addrlen);
+ if (error < 0)
+ die("cannot bind to local socket");
+
+ if (setup_callback)
+ if (!setup_callback(sfd, setup_user_data))
+ goto out;
+
+ if (timeout > 0) {
+ gettimeofday(&end, NULL);
+ end.tv_sec += timeout;
+ }
+
+ while (1) {
+ int t = -1;
+ if (timeout > 0) {
+ gettimeofday(&act, NULL);
+ t = (end.tv_sec - act.tv_sec) * 1000;
+ if (t < 0) {
+ ret = FENCED_SOCKET_TIMEOUT;
+ break;
+ }
+ }
+ rv = poll(fds, 1, t);
+ if (rv < 0)
+ die("poll failed");
+ else if (rv == 0) {
+ ret = FENCED_SOCKET_TIMEOUT;
+ break;
+ }
+ rv = recvfrom(sfd, buf, sizeof(buf) - 1, 0, (struct sockaddr *)&addr, &addrlen);
+ if (rv < 0)
+ die("recv failed");
+ else if (rv == 0) {
+ ret = FENCED_SOCKET_SHUTDOWN;
+ break;
+ }
+ buf[rv] = 0;
+
+ if (receive_callback(buf, receive_user_data)) {
+ ret = FENCED_SOCKET_FINISH;
+ break;
+ }
+ }
+
+out:
+ close(sfd);
+ return ret;
+}
+
+/*
+ * Checks if the received event matches the given one.
+ */
+static fenced_socket_receive_callback event_wait_callback;
+static bool event_wait_callback(const char *buf, void *user_data)
+{
+ const char *event = user_data;
+ while (*buf && isdigit(*buf)) buf++;
+ if (*buf++ != ' ')
+ return false;
+ if (strncmp(buf, "event:", strlen("event:")) == 0)
+ if (strncmp(buf + strlen("event:") + 1, event, strlen (event)) == 0)
+ return true;
+ return false;
+}
+
+/*
+ * Wrapper for fenced_socket, produces correct return values for main.
+ */
+static int event_wait(char *event, fenced_socket_setup_callback setup_callback, void *setup_user_data)
+{
+ enum fenced_socket_ret ret = fenced_socket(event_wait_timeout, event_wait_callback, event, setup_callback, setup_user_data);
+ switch (ret)
+ {
+ case FENCED_SOCKET_FINISH:
+ return EXIT_SUCCESS;
+ default:
+ return EXIT_FAILURE;
+ }
+}
+
+/*
* This is a really lousy way of waiting, which is why I took so long to add
* it. I guess it's better than nothing for a lot of people. The state may
* not be "run" if we've joined but other nodes are joining/leaving.
@@ -254,8 +388,45 @@
return EXIT_SUCCESS;
}
+struct do_join_callback_data
+{
+ int argc;
+ char **argv;
+};
+
+static void do_join_real(struct do_join_callback_data *data)
+{
+ strcpy(data->argv[0], "fenced");
+ data->argv[data->argc - 1] = NULL;
+
+ execvp("fenced", data->argv);
+ die("starting fenced failed");
+}
+
+static fenced_socket_setup_callback do_join_callback;
+static bool do_join_callback(int fd, void *user_data)
+{
+ struct do_join_callback_data *data = user_data;
+
+ pid_t pid = fork();
+ /* parent waits for fenced to join */
+ if (pid > 0) {
+ int status;
+ waitpid(pid, &status, 0);
+ if (WIFEXITED(status) && !WEXITSTATUS(status))
+ return true;
+ return false;
+ }
+
+ do_join_real(data);
+ return false;
+}
+
static int do_join(int argc, char *argv[])
{
+ struct do_join_callback_data data = {
+ argc, argv
+ };
int cd;
setup_sock();
@@ -280,33 +451,36 @@
if (debug)
printf("%s: start fenced\n", prog_name);
- if (!debug && child_wait) {
- int status;
- pid_t pid = fork();
- /* parent waits for fenced to join */
- if (pid > 0) {
- waitpid(pid, &status, 0);
- if (WIFEXITED(status) && !WEXITSTATUS(status))
- do_wait();
- exit(EXIT_SUCCESS);
- }
- /* child execs fenced */
- }
+ if (!debug && event_wait_timeout)
+ return event_wait("join:finish", do_join_callback, &data);
- strcpy(argv[0], "fenced");
- argv[argc - 1] = NULL;
-
- execvp("fenced", argv);
- die("starting fenced failed");
+ do_join_real(&data);
return EXIT_FAILURE;
}
+struct do_leave_callback_data
+{
+ pid_t pid;
+};
+
+static bool do_leave_real(struct do_leave_callback_data *data)
+{
+ return kill(data->pid, SIGTERM) == 0;
+}
+
+static fenced_socket_setup_callback do_leave_callback;
+static bool do_leave_callback(int fd, void *user_data)
+{
+ struct do_leave_callback_data *data = user_data;
+ return do_leave_real(data);
+}
+
static int do_leave(void)
{
FILE *f;
char buf[33] = "";
- int pid = 0;
+ struct do_leave_callback_data data = { 0 };
lockfile();
@@ -316,7 +490,7 @@
if (!f)
die("fenced not running - no file %s", LOCKFILE_NAME);
fgets(buf, 33, f);
- sscanf(buf, "%d", &pid);
+ sscanf(buf, "%d", &data.pid);
fclose(f);
check_mounted();
@@ -327,41 +501,28 @@
close(cl_sock);
- kill(pid, SIGTERM);
+ if (event_wait_timeout)
+ return event_wait("unknown:leavedone", do_leave_callback, &data);
+ return do_leave_real(&data);
+}
- return EXIT_SUCCESS;
+static fenced_socket_receive_callback do_monitor_callback;
+static bool do_monitor_callback(const char *buf, void *user_data)
+{
+ fputs(buf, stdout);
+ return false;
}
static int do_monitor(void)
{
- int sfd, error, rv;
- struct sockaddr_un addr;
- socklen_t addrlen;
- char buf[256];
-
- sfd = socket(AF_LOCAL, SOCK_DGRAM, 0);
- if (sfd < 0)
- die("cannot create local socket");
-
- memset(&addr, 0, sizeof(addr));
- addr.sun_family = AF_LOCAL;
- strcpy(&addr.sun_path[1], FENCED_SOCK_PATH);
- addrlen = sizeof(sa_family_t) + strlen(addr.sun_path+1) + 1;
-
- error = bind(sfd, (struct sockaddr *) &addr, addrlen);
- if (error < 0)
- die("cannot bind to local socket");
-
- for (;;) {
- memset(buf, 0, 256);
-
- rv = recvfrom(sfd, buf, 256, 0, (struct sockaddr *)&addr,
- &addrlen);
-
- printf("%s", buf);
+ enum fenced_socket_ret ret = fenced_socket(-1, do_monitor_callback, 0, 0, 0);
+ switch (ret)
+ {
+ case FENCED_SOCKET_SHUTDOWN:
+ return EXIT_SUCCESS;
+ default:
+ return EXIT_FAILURE;
}
-
- return EXIT_SUCCESS;
}
static void print_usage(void)
@@ -376,7 +537,7 @@
printf(" wait Wait for node to be member of default fence domain\n");
printf("\n");
printf("Options:\n");
- printf(" -w Wait for join to complete\n");
+ printf(" -w <secs> Wait for join or leave to complete\n");
printf(" -V Print program version information, then exit\n");
printf(" -h Print this help, then exit\n");
printf(" -S Skip self unfencing on join\n");
@@ -422,7 +583,7 @@
break;
case 'w':
- child_wait = true;
+ event_wait_timeout = atoi(optarg);
break;
case 'Q':
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 197 bytes
Desc: Digital signature
URL: <http://listman.redhat.com/archives/linux-cluster/attachments/20050218/caed4227/attachment.sig>
More information about the Linux-cluster
mailing list