[Cluster-devel] [PATCH] gfs2_lockcapture: Capture the status of the cluster nodes and find the clusternode name and id.
sbradley at redhat.com
sbradley at redhat.com
Thu Jan 31 14:41:30 UTC 2013
From: Shane Bradley <sbradley at redhat.com>
The status of the cluster will be captured and written to the file with respect
to version: cman_tool nodes, corosync-quorumtool -l. Added two new configuration
variables to the hostinformation.txt for the clusternode name and id.
Signed-off-by: Shane Bradley <sbradley at redhat.com>
---
gfs2/scripts/gfs2_lockcapture | 102 +++++++++++++++++++++++++++++++-----------
1 file changed, 76 insertions(+), 26 deletions(-)
diff --git a/gfs2/scripts/gfs2_lockcapture b/gfs2/scripts/gfs2_lockcapture
index 2b3421c..6a63fc8 100644
--- a/gfs2/scripts/gfs2_lockcapture
+++ b/gfs2/scripts/gfs2_lockcapture
@@ -45,12 +45,15 @@ class ClusterNode:
"""
This class represents a cluster node that is a current memeber in a cluster.
"""
- def __init__(self, clusternodeName, clusterName, mapOfMountedFilesystemLabels):
+ def __init__(self, clusternodeName, clusternodeID, clusterName, mapOfMountedFilesystemLabels):
"""
@param clusternodeName: The name of the cluster node.
@type clusternodeName: String
@param clusterName: The name of the cluster that this cluster node is a
member of.
+ @param clusternodeID: The id of the cluster node.
+ @type clusternodeID: Int
+ @param clusterName: The name of the cluster that this cluster node is a
@type clusterName: String
@param mapOfMountedFilesystemLabels: A map of filesystem labels(key) for
a mounted filesystem. The value is the line for the matching mounted
@@ -58,6 +61,7 @@ class ClusterNode:
@type mapOfMountedFilesystemLabels: Dict
"""
self.__clusternodeName = clusternodeName
+ self.__clusternodeID = clusternodeID
self.__clusterName = clusterName
self.__mapOfMountedFilesystemLabels = mapOfMountedFilesystemLabels
@@ -69,7 +73,7 @@ class ClusterNode:
@rtype: String
"""
rString = ""
- rString += "%s:%s" %(self.getClusterName(), self.getClusterNodeName())
+ rString += "%s:%s(id:%d)" %(self.getClusterName(), self.getClusterNodeName(), self.getClusterNodeID())
fsLabels = self.__mapOfMountedFilesystemLabels.keys()
fsLabels.sort()
for fsLabel in fsLabels:
@@ -85,6 +89,14 @@ class ClusterNode:
"""
return self.__clusternodeName
+ def getClusterNodeID(self):
+ """
+ Returns the id of the cluster node.
+ @return: Returns the id of the cluster node.
+ @rtype: String
+ """
+ return self.__clusternodeID
+
def getClusterName(self):
"""
Returns the name of cluster that this cluster node is a member of.
@@ -539,6 +551,7 @@ def getClusterNode(listOfGFS2Names):
# in the output, else return None.
clusterName = ""
clusternodeName = ""
+ clusternodeID = ""
if (runCommand("which", ["cman_tool"])):
stdout = runCommandOutput("cman_tool", ["status"])
if (not stdout == None):
@@ -550,6 +563,8 @@ def getClusterNode(listOfGFS2Names):
clusterName = line.split("Cluster Name:")[1].strip().rstrip()
if (line.startswith("Node name: ")):
clusternodeName = line.split("Node name:")[1].strip().rstrip()
+ if (line.startswith("Node ID: ")):
+ clusternodeID = line.split("Node ID: ")[1].strip().rstrip()
elif (runCommand("which", ["corosync-cmapctl"])):
# Another way to get the local cluster node is: $ crm_node -i; crm_node -l
# Get the name of the cluster.
@@ -559,14 +574,14 @@ def getClusterNode(listOfGFS2Names):
if (len(stdoutSplit) == 2):
clusterName = stdoutSplit[1].strip().rstrip()
# Get the id of the local cluster node so we can get the clusternode name
- thisNodeID = ""
+ clusternodeID = ""
stdout = runCommandOutput("corosync-cmapctl", ["-g", "runtime.votequorum.this_node_id"])
if (not stdout == None):
stdoutSplit = stdout.split("=")
if (len(stdoutSplit) == 2):
- thisNodeID = stdoutSplit[1].strip().rstrip()
+ clusternodeID = stdoutSplit[1].strip().rstrip()
# Now that we the nodeid then we can get the clusternode name.
- if (len(thisNodeID) > 0):
+ if (len(clusternodeID) > 0):
stdout = runCommandOutput("corosync-quorumtool", ["-l"])
if (not stdout == None):
for line in stdout.split("\n"):
@@ -588,7 +603,15 @@ def getClusterNode(listOfGFS2Names):
break
if ((not foundMatch) and (mapOfMountedFilesystemLabels.has_key(label))):
del(mapOfMountedFilesystemLabels[label])
- return ClusterNode(clusternodeName, clusterName, mapOfMountedFilesystemLabels)
+ # Cast the node id to an int, and default is 0 if node is not found or
+ # not castable.
+ clusternodeIDInt = 0
+ if (clusternodeID.isalnum()):
+ try:
+ clusternodeIDInt = int(clusternodeID)
+ except(ValueError):
+ pass
+ return ClusterNode(clusternodeName, clusternodeIDInt, clusterName, mapOfMountedFilesystemLabels)
else:
return None
@@ -701,6 +724,28 @@ def gatherGeneralInformation(pathToDSTDir):
message = "There was an error the command output for %s to the file %s." %(command, pathToCommandOutput)
logging.getLogger(MAIN_LOGGER_NAME).error(message)
+ # Write the status of all the nodes in the cluster out.
+ if (runCommand("which", ["cman_tool"])):
+ command = "cman_tool"
+ pathToCommandOutput = os.path.join(pathToDSTDir, "cman_tool_status")
+ try:
+ fout = open(pathToCommandOutput, "w")
+ runCommand(command, ["status"], standardOut=fout)
+ fout.close()
+ except IOError:
+ message = "There was an error the command output for %s to the file %s." %(command, pathToCommandOutput)
+ logging.getLogger(MAIN_LOGGER_NAME).error(message)
+ elif (runCommand("which", ["corosync-cmapctl"])):
+ command = "corosync-quorumtool"
+ pathToCommandOutput = os.path.join(pathToDSTDir, "corosync-quorumtool_l")
+ try:
+ fout = open(pathToCommandOutput, "w")
+ runCommand(command, ["-l"], standardOut=fout)
+ fout.close()
+ except IOError:
+ message = "There was an error the command output for %s to the file %s." %(command, pathToCommandOutput)
+ logging.getLogger(MAIN_LOGGER_NAME).error(message)
+
def isProcPidStackEnabled(pathToPidData):
"""
@@ -1067,26 +1112,6 @@ if __name__ == "__main__":
# script running.
writeToFile(PATH_TO_PID_FILENAME, str(os.getpid()), createFile=True)
# #######################################################################
- # Verify they want to continue because this script will trigger sysrq events.
- # #######################################################################
- if (not cmdLineOpts.disableQuestions):
- valid = {"yes":True, "y":True, "no":False, "n":False}
- question = "This script will trigger a sysrq -t event or collect the data for each pid directory located in /proc for each run. Are you sure you want to continue?"
- prompt = " [y/n] "
- while True:
- sys.stdout.write(question + prompt)
- choice = raw_input().lower()
- if (choice in valid):
- if (valid.get(choice)):
- # If yes, or y then exit loop and continue.
- break
- else:
- message = "The script will not continue since you chose not to continue."
- logging.getLogger(MAIN_LOGGER_NAME).error(message)
- exitScript(removePidFile=True, errorCode=1)
- else:
- sys.stdout.write("Please respond with '(y)es' or '(n)o'.\n")
- # #######################################################################
# Get the clusternode name and verify that mounted GFS2 filesystems were
# found.
# #######################################################################
@@ -1110,6 +1135,26 @@ if __name__ == "__main__":
print clusternode
exitScript()
# #######################################################################
+ # Verify they want to continue because this script will trigger sysrq events.
+ # #######################################################################
+ if (not cmdLineOpts.disableQuestions):
+ valid = {"yes":True, "y":True, "no":False, "n":False}
+ question = "This script will trigger a sysrq -t event or collect the data for each pid directory located in /proc for each run. Are you sure you want to continue?"
+ prompt = " [y/n] "
+ while True:
+ sys.stdout.write(question + prompt)
+ choice = raw_input().lower()
+ if (choice in valid):
+ if (valid.get(choice)):
+ # If yes, or y then exit loop and continue.
+ break
+ else:
+ message = "The script will not continue since you chose not to continue."
+ logging.getLogger(MAIN_LOGGER_NAME).error(message)
+ exitScript(removePidFile=True, errorCode=1)
+ else:
+ sys.stdout.write("Please respond with '(y)es' or '(n)o'.\n")
+ # #######################################################################
# Create the output directory to verify it can be created before
# proceeding unless it is already created from a previous run data needs
# to be analyzed. Probably could add more debugging on if file or dir.
@@ -1178,6 +1223,11 @@ if __name__ == "__main__":
message = "Pass (%d/%d): Gathering general information about the host." %(i, cmdLineOpts.numberOfRuns)
logging.getLogger(MAIN_LOGGER_NAME).debug(message)
gatherGeneralInformation(pathToOutputRunDir)
+ # Write the clusternode name and id to the general information file.
+ writeToFile(os.path.join(pathToOutputRunDir, "hostinformation.txt"),
+ "NODE_NAME=%s\nNODE_ID=%d" %(clusternode.getClusterNodeName(), clusternode.getClusterNodeID()),
+ appendToFile=True, createFile=True)
+
# Going to sleep for 2 seconds, so that TIMESTAMP should be in the
# past in the logs so that capturing sysrq data will be guaranteed.
time.sleep(2)
--
1.8.0.2
More information about the Cluster-devel
mailing list