[Cluster-devel] [patch] cman: Added checkquorum script for self fencing
Chris Feist
cfeist at redhat.com
Tue Feb 1 21:13:11 UTC 2011
cman: Added checkquorum script for self fencing
A checkquorum script has been added which when copied to the
/etc/watchdog.d directory will cause the machine to node to reboot
itself if it has lost quorum for ~60 seconds.
Resolves: rhbz#560700
---
cman/Makefile | 2 +-
cman/man/Makefile | 3 +-
cman/man/checkquorum.8 | 29 ++++++++++++++
cman/scripts/Makefile | 10 +++++
cman/scripts/checkquorum | 97 ++++++++++++++++++++++++++++++++++++++++++++++
make/install.mk | 4 ++
6 files changed, 143 insertions(+), 2 deletions(-)
diff --git a/cman/Makefile b/cman/Makefile
index ead0baa..1cf8bc9 100644
--- a/cman/Makefile
+++ b/cman/Makefile
@@ -1,4 +1,4 @@
include ../make/defines.mk
include $(OBJDIR)/make/passthrough.mk
-SUBDIRS=lib cman_tool daemon qdisk notifyd init.d man
+SUBDIRS=lib cman_tool daemon qdisk notifyd init.d man scripts
diff --git a/cman/man/Makefile b/cman/man/Makefile
index df20abb..f7fbebf 100644
--- a/cman/man/Makefile
+++ b/cman/man/Makefile
@@ -5,7 +5,8 @@ MANTARGET= \
qdiskd.8 \
mkqdisk.8 \
cmannotifyd.8 \
- cman_notify.8
+ cman_notify.8 \
+ checkquorum.8
include ../../make/defines.mk
include $(OBJDIR)/make/install.mk
diff --git a/cman/man/checkquorum.8 b/cman/man/checkquorum.8
new file mode 100644
index 0000000..96f61f0
--- /dev/null
+++ b/cman/man/checkquorum.8
@@ -0,0 +1,29 @@
+.TH "checkquorum" "8" "February 2011" "" "Check Quorum Watchdog Script"
+.SH "NAME"
+checkquorum \- Check Quorum Watchdog Script
+.SH "SYNOPSIS"
+\fBcheckquorum
+.SH "DESCRIPTION"
+.PP
+The \fBcheckquorum\fP watchdog script, when copied to the
+.IR /etc/watchdog.d
+directory and after enabling/starting the watchdog daemon causes the node to reboot if quorum is
+lost and not regained within a user configurable amount of time (default: 60 seconds).
+.SH "OPTIONS"
+The checkquorum script includes several options which can be set by editing
+the script with a text editor.
+.TP
+.BR $wait_time
+Amount of time in seconds to wait after quorum is lost before trigger a reboot
+(Default: 60 seconds).
+.TP
+.BR $hardreboot
+Instantly reboot the machine without cleanly shutting down the system.
+Useful when the machine may hang on reboot. Set to 1 to hard reboot the
+system, 0 to do a normal reboot.
+.SH "NOTES"
+\fBcheckquorum\fP should never be called outside of watchdog except for
+debugging purposes.
+
+.SH "SEE ALSO"
+watchdog(8)
diff --git a/cman/scripts/Makefile b/cman/scripts/Makefile
new file mode 100644
index 0000000..b4866c8
--- /dev/null
+++ b/cman/scripts/Makefile
@@ -0,0 +1,10 @@
+SHAREDIRTEX=checkquorum
+
+include ../../make/defines.mk
+include $(OBJDIR)/make/clean.mk
+include $(OBJDIR)/make/install.mk
+include $(OBJDIR)/make/uninstall.mk
+
+all:
+
+clean: generalclean
diff --git a/cman/scripts/checkquorum b/cman/scripts/checkquorum
new file mode 100755
index 0000000..43cbc6d
--- /dev/null
+++ b/cman/scripts/checkquorum
@@ -0,0 +1,97 @@
+#!/usr/bin/perl -w
+# Quorum detection watchdog script
+#
+# This script will return -2 if the node had quorum at one point
+# and then subsequently lost it
+#
+# Copyright 2011 Red Hat, Inc.
+
+# Amount of time in seconds to wait after quorum is lost to fail script
+$wait_time = 60;
+
+# Hard Reboot the system (doesn't cleanly shut down the system)
+$hardreboot = 0;
+
+# Location of temporary file to capture timeouts
+$timerfile = "/var/run/cluster/checkquorum-timer";
+
+# Enable debug messages (0 to disable, 1 to enable)
+$debugval = 0;
+
+# If command is called attempting to 'repair' we automatically fail
+if (($#ARGV != -1) && ($ARGV[0] eq "repair")) {
+ debug ("Failing on repair\n");
+ exit 1;
+}
+
+if (!quorum()) {
+ if (has_quorum_already_been_formed()) {
+ debug("Quorum has already existed, node can be self fenced!\n");
+ if (-e $timerfile) {
+ $tf = open (FILE, "$timerfile");
+ $time = <FILE>;
+ close (FILE);
+ $timediff = time() - $time;
+ if ($timediff >= $wait_time) {
+ self_fence()
+ } else {
+ $remaining = $wait_time - $timediff;
+ debug("Time has not exceeded wait time ($remaining seconds remaining).\n");
+ }
+ } else {
+ debug("Creating timer file...\n");
+ $tf = open (FILE, ">$timerfile");
+ print FILE time();
+ close (FILE);
+ }
+ } else {
+ debug("This is a new startup no self-fencing will occur.\n");
+ `rm -f $timerfile`;
+ }
+} else {
+ debug("Quorum exists, no self-fencing should occur.\n");
+ `rm -f $timerfile`;
+}
+
+sub has_quorum_already_been_formed {
+ $oe = `/usr/sbin/corosync-objctl 2>&1 | grep -E "runtime.totem.pg.mrp.srp.operational_entered|Could not initialize objdb library|Cannot connect to quorum service" `;
+ if ($oe =~ /^Could not/ || $oe =~ /^Cannot/) {
+ debug("corosync is not running\n");
+ exit 0;
+ }
+ $oe =~ s/.*=//;
+ if ($oe > 1) {
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+sub quorum {
+ $cq = `corosync-quorumtool -s 2>&1 | grep -E "Quorate:|Cannot connect to quorum service"`;
+ if ($cq =~ /Cannot connect to quorum service/) {
+ debug("corosync is not running\n");
+ exit 0;
+ }
+ $cq =~ s/Quorate: *//;
+ chomp ($cq);
+ return 1 if ($cq eq "Yes");
+ return 0;
+}
+
+sub self_fence {
+ debug("Self fencing commencing...\n");
+ `rm -f $timerfile`;
+ if ($hardreboot == 1) {
+ `echo 1 > /proc/sys/kernel/sysrq`;
+ `echo b > /proc/sysrq-trigger`;
+ }
+ exit -2;
+}
+
+sub debug {
+ $out = pop(@_);
+ if ($debugval) {
+ print $out;
+ }
+}
diff --git a/make/install.mk b/make/install.mk
index 3f23bca..fa6ac92 100644
--- a/make/install.mk
+++ b/make/install.mk
@@ -66,6 +66,10 @@ ifdef PKGCONF
install -d ${pkgconfigdir}
install -m644 ${PKGCONF} ${pkgconfigdir}
endif
+ifdef SHAREDIRTEX
+ install -d ${sharedir}
+ install -m755 ${SHAREDIRTEX} ${sharedir}
+endif
ifdef SHAREDIRT
install -d ${sharedir}
install -m644 ${SHAREDIRT} ${sharedir}
More information about the Cluster-devel
mailing list