[Cluster-devel] [patch] cman: Added checkquorum script for self fencing

Chris Feist cfeist at redhat.com
Tue Feb 1 21:13:11 UTC 2011


cman: Added checkquorum script for self fencing

A checkquorum script has been added which when copied to the
/etc/watchdog.d directory will cause the machine to node to reboot
itself if it has lost quorum for ~60 seconds.

Resolves: rhbz#560700
---
 cman/Makefile            |    2 +-
 cman/man/Makefile        |    3 +-
 cman/man/checkquorum.8   |   29 ++++++++++++++
 cman/scripts/Makefile    |   10 +++++
 cman/scripts/checkquorum |   97 ++++++++++++++++++++++++++++++++++++++++++++++
 make/install.mk          |    4 ++
 6 files changed, 143 insertions(+), 2 deletions(-)

diff --git a/cman/Makefile b/cman/Makefile
index ead0baa..1cf8bc9 100644
--- a/cman/Makefile
+++ b/cman/Makefile
@@ -1,4 +1,4 @@
 include ../make/defines.mk
 include $(OBJDIR)/make/passthrough.mk
 
-SUBDIRS=lib cman_tool daemon qdisk notifyd init.d man
+SUBDIRS=lib cman_tool daemon qdisk notifyd init.d man scripts
diff --git a/cman/man/Makefile b/cman/man/Makefile
index df20abb..f7fbebf 100644
--- a/cman/man/Makefile
+++ b/cman/man/Makefile
@@ -5,7 +5,8 @@ MANTARGET= \
 	qdiskd.8 \
 	mkqdisk.8 \
 	cmannotifyd.8 \
-	cman_notify.8
+	cman_notify.8 \
+	checkquorum.8
 
 include ../../make/defines.mk
 include $(OBJDIR)/make/install.mk
diff --git a/cman/man/checkquorum.8 b/cman/man/checkquorum.8
new file mode 100644
index 0000000..96f61f0
--- /dev/null
+++ b/cman/man/checkquorum.8
@@ -0,0 +1,29 @@
+.TH "checkquorum" "8" "February 2011" "" "Check Quorum Watchdog Script"
+.SH "NAME"
+checkquorum \- Check Quorum Watchdog Script
+.SH "SYNOPSIS"
+\fBcheckquorum
+.SH "DESCRIPTION"
+.PP 
+The \fBcheckquorum\fP watchdog script, when copied to the
+.IR /etc/watchdog.d
+directory and after enabling/starting the watchdog daemon causes the node to reboot if quorum is
+lost and not regained within a user configurable amount of time (default: 60 seconds).
+.SH "OPTIONS"
+The checkquorum script includes several options which can be set by editing
+the script with a text editor.
+.TP
+.BR $wait_time
+Amount of time in seconds to wait after quorum is lost before trigger a reboot
+(Default: 60 seconds).
+.TP
+.BR $hardreboot
+Instantly reboot the machine without cleanly shutting down the system.
+Useful when the machine may hang on reboot.  Set to 1 to hard reboot the
+system, 0 to do a normal reboot.
+.SH "NOTES"
+\fBcheckquorum\fP should never be called outside of watchdog except for
+debugging purposes.
+
+.SH "SEE ALSO"
+watchdog(8)
diff --git a/cman/scripts/Makefile b/cman/scripts/Makefile
new file mode 100644
index 0000000..b4866c8
--- /dev/null
+++ b/cman/scripts/Makefile
@@ -0,0 +1,10 @@
+SHAREDIRTEX=checkquorum
+
+include ../../make/defines.mk
+include $(OBJDIR)/make/clean.mk
+include $(OBJDIR)/make/install.mk
+include $(OBJDIR)/make/uninstall.mk
+
+all:
+
+clean: generalclean
diff --git a/cman/scripts/checkquorum b/cman/scripts/checkquorum
new file mode 100755
index 0000000..43cbc6d
--- /dev/null
+++ b/cman/scripts/checkquorum
@@ -0,0 +1,97 @@
+#!/usr/bin/perl -w
+# Quorum detection watchdog script
+#
+# This script will return -2 if the node had quorum at one point
+# and then subsequently lost it
+#
+# Copyright 2011 Red Hat, Inc.
+
+# Amount of time in seconds to wait after quorum is lost to fail script
+$wait_time = 60;
+
+# Hard Reboot the system (doesn't cleanly shut down the system)
+$hardreboot = 0;
+
+# Location of temporary file to capture timeouts
+$timerfile = "/var/run/cluster/checkquorum-timer";
+
+# Enable debug messages (0 to disable, 1 to enable)
+$debugval = 0;
+
+# If command is called attempting to 'repair' we automatically fail
+if (($#ARGV != -1) && ($ARGV[0] eq "repair")) {
+  debug ("Failing on repair\n");
+  exit 1;
+}
+
+if (!quorum()) {
+  if (has_quorum_already_been_formed()) {
+    debug("Quorum has already existed, node can be self fenced!\n");
+    if (-e $timerfile) {
+       $tf = open (FILE, "$timerfile");
+       $time = <FILE>;
+       close (FILE);
+       $timediff = time() - $time;
+       if ($timediff >= $wait_time) {
+	 self_fence()
+       } else {
+         $remaining = $wait_time - $timediff;
+         debug("Time has not exceeded wait time ($remaining seconds remaining).\n");
+       }
+    } else {
+      debug("Creating timer file...\n");
+       $tf = open (FILE, ">$timerfile");
+       print FILE time();
+       close (FILE);
+    }
+  } else {
+    debug("This is a new startup no self-fencing will occur.\n");
+    `rm -f $timerfile`;
+  }
+} else {
+  debug("Quorum exists, no self-fencing should occur.\n");
+  `rm -f $timerfile`;
+}
+
+sub has_quorum_already_been_formed {
+   $oe = `/usr/sbin/corosync-objctl 2>&1 | grep -E "runtime.totem.pg.mrp.srp.operational_entered|Could not initialize objdb library|Cannot connect to quorum service" `;
+   if ($oe =~ /^Could not/ || $oe =~ /^Cannot/) {
+	debug("corosync is not running\n");
+	exit 0;
+   }
+   $oe =~ s/.*=//;
+   if ($oe > 1) {
+	return 1;
+   } else {
+	return 0;
+   }
+}
+
+sub quorum {
+  $cq = `corosync-quorumtool -s 2>&1 | grep -E "Quorate:|Cannot connect to quorum service"`;
+  if ($cq =~ /Cannot connect to quorum service/) {
+    debug("corosync is not running\n");
+    exit 0;
+  }
+  $cq =~ s/Quorate: *//;
+  chomp ($cq);
+  return 1 if ($cq eq "Yes");
+  return 0;
+}
+
+sub self_fence {
+  debug("Self fencing commencing...\n");
+  `rm -f $timerfile`;
+  if ($hardreboot == 1) {
+    `echo 1 > /proc/sys/kernel/sysrq`;
+    `echo b > /proc/sysrq-trigger`;
+  }
+  exit -2;
+}
+
+sub debug {
+  $out = pop(@_);
+  if ($debugval) {
+    print $out;
+  }
+}
diff --git a/make/install.mk b/make/install.mk
index 3f23bca..fa6ac92 100644
--- a/make/install.mk
+++ b/make/install.mk
@@ -66,6 +66,10 @@ ifdef PKGCONF
 	install -d ${pkgconfigdir}
 	install -m644 ${PKGCONF} ${pkgconfigdir}
 endif
+ifdef SHAREDIRTEX
+	install -d ${sharedir}
+	install -m755 ${SHAREDIRTEX} ${sharedir}
+endif
 ifdef SHAREDIRT
 	install -d ${sharedir}
 	install -m644 ${SHAREDIRT} ${sharedir}




More information about the Cluster-devel mailing list