[Cluster-devel] cluster/fence/fence_tool fence_tool.c
rpeterso at sourceware.org
rpeterso at sourceware.org
Tue Jan 23 16:54:09 UTC 2007
CVSROOT: /cvs/cluster
Module name: cluster
Branch: RHEL5
Changes by: rpeterso at sourceware.org 2007-01-23 16:54:09
Modified files:
fence/fence_tool: fence_tool.c
Log message:
Resolves: bz 222933: regression: fence_tool no longer times out
after 300 seconds
Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/fence/fence_tool/fence_tool.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.23&r2=1.23.2.1
--- cluster/fence/fence_tool/fence_tool.c 2006/10/13 14:57:55 1.23
+++ cluster/fence/fence_tool/fence_tool.c 2007/01/23 16:54:09 1.23.2.1
@@ -2,7 +2,7 @@
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
-** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
+** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
@@ -29,6 +29,7 @@
#include "ccs.h"
#include "copyright.cf"
+#include "libcman.h"
#include "libgroup.h"
#ifndef TRUE
@@ -36,7 +37,7 @@
#define FALSE 0
#endif
-#define OPTION_STRING ("Vhcj:f:t:w")
+#define OPTION_STRING ("Vhcj:f:t:wQ")
#define FENCED_SOCK_PATH "fenced_socket"
#define MAXLINE 256
@@ -57,7 +58,10 @@
char *prog_name;
int operation;
int child_wait = FALSE;
+int quorum_wait = TRUE;
int fenced_start_timeout = 300; /* five minutes */
+int signalled = 0;
+cman_handle_t ch;
static int get_int_arg(char argopt, char *arg)
{
@@ -97,6 +101,11 @@
return 0;
}
+static void sigalarm_handler(int sig)
+{
+ signalled = 1;
+}
+
int fenced_connect(void)
{
struct sockaddr_un sun;
@@ -135,6 +144,50 @@
return gdata.member;
}
+/*
+ * We wait for the cluster to be quorate in this program because it's easy to
+ * kill this program if we want to quit waiting. If we just started fenced
+ * without waiting for quorum, fenced's join would then wait for quorum in SM
+ * but we can't kill/cancel it at that point -- we have to wait for it to
+ * complete.
+ *
+ * A second reason to wait for quorum is that the unfencing step involves
+ * cluster.conf lookups through ccs, but ccsd may wait for the cluster to be
+ * quorate before responding to the lookups. There wouldn't be a problem
+ * blocking there per se, but it's cleaner I think to just wait here first.
+ *
+ * In the case where we're leaving, we want to wait for quorum because if we go
+ * ahead and shut down fenced, the fence domain leave will block in SM where it
+ * will wait for quorum before the leave can be processed. We can't
+ * kill/cancel the leave at that point, but we can if we're waiting here.
+ *
+ * Waiting here doesn't guarantee we won't end up blocking in SM on the join or
+ * leave, but it avoids it in some common cases which can be helpful. (Quorum
+ * could easily be lost between the time we wait for it here and then begin the
+ * join/leave process.)
+ */
+
+static int check_quorum(void)
+{
+ int rv = 0, i = 0;
+
+ while (!signalled) {
+ rv = cman_is_quorate(ch);
+ if (rv)
+ return TRUE;
+ else if (!quorum_wait)
+ return FALSE;
+
+ sleep(1);
+
+ if (!signalled && ++i > 9 && !(i % 10))
+ printf("%s: waiting for cluster quorum\n", prog_name);
+ }
+
+ errno = ETIMEDOUT;
+ return FALSE;
+}
+
static int do_wait(int joining)
{
int i;
@@ -156,6 +209,22 @@
int i, fd, rv;
char buf[MAXLINE];
+ ch = cman_init(NULL);
+
+ if (fenced_start_timeout) {
+ signal(SIGALRM, sigalarm_handler);
+ alarm(fenced_start_timeout);
+ }
+
+ if (!check_quorum()) {
+ if (errno == ETIMEDOUT)
+ printf("%s: Timed out waiting for cluster "
+ "quorum to form.\n", prog_name);
+ cman_finish(ch);
+ return EXIT_FAILURE;
+ }
+ cman_finish(ch);
+
i = 0;
do {
sleep(1);
@@ -253,6 +322,7 @@
printf(" -V Print program version information, then exit\n");
printf(" -h Print this help, then exit\n");
printf(" -t Maximum time in seconds to wait\n");
+ printf(" -Q Fail if cluster is not quorate, don't wait\n");
printf("\n");
printf("Fenced options:\n");
printf(" these are passed on to fenced when it's started\n");
@@ -284,6 +354,10 @@
exit(EXIT_SUCCESS);
break;
+ case 'Q':
+ quorum_wait = FALSE;
+ break;
+
case 'w':
child_wait = TRUE;
break;
More information about the Cluster-devel
mailing list