[Cluster-devel] [PATCH] gfs2_lockcapture: Capture the status of the cluster nodes and find the clusternode name and id.
Steven Whitehouse
swhiteho at redhat.com
Tue Feb 5 13:23:08 UTC 2013
Hi,
Looks ok to me, so long as it does what you want it to do,
Steve.
On Thu, 2013-01-31 at 09:41 -0500, sbradley at redhat.com wrote:
> From: Shane Bradley <sbradley at redhat.com>
>
> The status of the cluster will be captured and written to the file with respect
> to version: cman_tool nodes, corosync-quorumtool -l. Added two new configuration
> variables to the hostinformation.txt for the clusternode name and id.
>
> Signed-off-by: Shane Bradley <sbradley at redhat.com>
> ---
> gfs2/scripts/gfs2_lockcapture | 102 +++++++++++++++++++++++++++++++-----------
> 1 file changed, 76 insertions(+), 26 deletions(-)
>
> diff --git a/gfs2/scripts/gfs2_lockcapture b/gfs2/scripts/gfs2_lockcapture
> index 2b3421c..6a63fc8 100644
> --- a/gfs2/scripts/gfs2_lockcapture
> +++ b/gfs2/scripts/gfs2_lockcapture
> @@ -45,12 +45,15 @@ class ClusterNode:
> """
> This class represents a cluster node that is a current memeber in a cluster.
> """
> - def __init__(self, clusternodeName, clusterName, mapOfMountedFilesystemLabels):
> + def __init__(self, clusternodeName, clusternodeID, clusterName, mapOfMountedFilesystemLabels):
> """
> @param clusternodeName: The name of the cluster node.
> @type clusternodeName: String
> @param clusterName: The name of the cluster that this cluster node is a
> member of.
> + @param clusternodeID: The id of the cluster node.
> + @type clusternodeID: Int
> + @param clusterName: The name of the cluster that this cluster node is a
> @type clusterName: String
> @param mapOfMountedFilesystemLabels: A map of filesystem labels(key) for
> a mounted filesystem. The value is the line for the matching mounted
> @@ -58,6 +61,7 @@ class ClusterNode:
> @type mapOfMountedFilesystemLabels: Dict
> """
> self.__clusternodeName = clusternodeName
> + self.__clusternodeID = clusternodeID
> self.__clusterName = clusterName
> self.__mapOfMountedFilesystemLabels = mapOfMountedFilesystemLabels
>
> @@ -69,7 +73,7 @@ class ClusterNode:
> @rtype: String
> """
> rString = ""
> - rString += "%s:%s" %(self.getClusterName(), self.getClusterNodeName())
> + rString += "%s:%s(id:%d)" %(self.getClusterName(), self.getClusterNodeName(), self.getClusterNodeID())
> fsLabels = self.__mapOfMountedFilesystemLabels.keys()
> fsLabels.sort()
> for fsLabel in fsLabels:
> @@ -85,6 +89,14 @@ class ClusterNode:
> """
> return self.__clusternodeName
>
> + def getClusterNodeID(self):
> + """
> + Returns the id of the cluster node.
> + @return: Returns the id of the cluster node.
> + @rtype: String
> + """
> + return self.__clusternodeID
> +
> def getClusterName(self):
> """
> Returns the name of cluster that this cluster node is a member of.
> @@ -539,6 +551,7 @@ def getClusterNode(listOfGFS2Names):
> # in the output, else return None.
> clusterName = ""
> clusternodeName = ""
> + clusternodeID = ""
> if (runCommand("which", ["cman_tool"])):
> stdout = runCommandOutput("cman_tool", ["status"])
> if (not stdout == None):
> @@ -550,6 +563,8 @@ def getClusterNode(listOfGFS2Names):
> clusterName = line.split("Cluster Name:")[1].strip().rstrip()
> if (line.startswith("Node name: ")):
> clusternodeName = line.split("Node name:")[1].strip().rstrip()
> + if (line.startswith("Node ID: ")):
> + clusternodeID = line.split("Node ID: ")[1].strip().rstrip()
> elif (runCommand("which", ["corosync-cmapctl"])):
> # Another way to get the local cluster node is: $ crm_node -i; crm_node -l
> # Get the name of the cluster.
> @@ -559,14 +574,14 @@ def getClusterNode(listOfGFS2Names):
> if (len(stdoutSplit) == 2):
> clusterName = stdoutSplit[1].strip().rstrip()
> # Get the id of the local cluster node so we can get the clusternode name
> - thisNodeID = ""
> + clusternodeID = ""
> stdout = runCommandOutput("corosync-cmapctl", ["-g", "runtime.votequorum.this_node_id"])
> if (not stdout == None):
> stdoutSplit = stdout.split("=")
> if (len(stdoutSplit) == 2):
> - thisNodeID = stdoutSplit[1].strip().rstrip()
> + clusternodeID = stdoutSplit[1].strip().rstrip()
> # Now that we the nodeid then we can get the clusternode name.
> - if (len(thisNodeID) > 0):
> + if (len(clusternodeID) > 0):
> stdout = runCommandOutput("corosync-quorumtool", ["-l"])
> if (not stdout == None):
> for line in stdout.split("\n"):
> @@ -588,7 +603,15 @@ def getClusterNode(listOfGFS2Names):
> break
> if ((not foundMatch) and (mapOfMountedFilesystemLabels.has_key(label))):
> del(mapOfMountedFilesystemLabels[label])
> - return ClusterNode(clusternodeName, clusterName, mapOfMountedFilesystemLabels)
> + # Cast the node id to an int, and default is 0 if node is not found or
> + # not castable.
> + clusternodeIDInt = 0
> + if (clusternodeID.isalnum()):
> + try:
> + clusternodeIDInt = int(clusternodeID)
> + except(ValueError):
> + pass
> + return ClusterNode(clusternodeName, clusternodeIDInt, clusterName, mapOfMountedFilesystemLabels)
> else:
> return None
>
> @@ -701,6 +724,28 @@ def gatherGeneralInformation(pathToDSTDir):
> message = "There was an error the command output for %s to the file %s." %(command, pathToCommandOutput)
> logging.getLogger(MAIN_LOGGER_NAME).error(message)
>
> + # Write the status of all the nodes in the cluster out.
> + if (runCommand("which", ["cman_tool"])):
> + command = "cman_tool"
> + pathToCommandOutput = os.path.join(pathToDSTDir, "cman_tool_status")
> + try:
> + fout = open(pathToCommandOutput, "w")
> + runCommand(command, ["status"], standardOut=fout)
> + fout.close()
> + except IOError:
> + message = "There was an error the command output for %s to the file %s." %(command, pathToCommandOutput)
> + logging.getLogger(MAIN_LOGGER_NAME).error(message)
> + elif (runCommand("which", ["corosync-cmapctl"])):
> + command = "corosync-quorumtool"
> + pathToCommandOutput = os.path.join(pathToDSTDir, "corosync-quorumtool_l")
> + try:
> + fout = open(pathToCommandOutput, "w")
> + runCommand(command, ["-l"], standardOut=fout)
> + fout.close()
> + except IOError:
> + message = "There was an error the command output for %s to the file %s." %(command, pathToCommandOutput)
> + logging.getLogger(MAIN_LOGGER_NAME).error(message)
> +
>
> def isProcPidStackEnabled(pathToPidData):
> """
> @@ -1067,26 +1112,6 @@ if __name__ == "__main__":
> # script running.
> writeToFile(PATH_TO_PID_FILENAME, str(os.getpid()), createFile=True)
> # #######################################################################
> - # Verify they want to continue because this script will trigger sysrq events.
> - # #######################################################################
> - if (not cmdLineOpts.disableQuestions):
> - valid = {"yes":True, "y":True, "no":False, "n":False}
> - question = "This script will trigger a sysrq -t event or collect the data for each pid directory located in /proc for each run. Are you sure you want to continue?"
> - prompt = " [y/n] "
> - while True:
> - sys.stdout.write(question + prompt)
> - choice = raw_input().lower()
> - if (choice in valid):
> - if (valid.get(choice)):
> - # If yes, or y then exit loop and continue.
> - break
> - else:
> - message = "The script will not continue since you chose not to continue."
> - logging.getLogger(MAIN_LOGGER_NAME).error(message)
> - exitScript(removePidFile=True, errorCode=1)
> - else:
> - sys.stdout.write("Please respond with '(y)es' or '(n)o'.\n")
> - # #######################################################################
> # Get the clusternode name and verify that mounted GFS2 filesystems were
> # found.
> # #######################################################################
> @@ -1110,6 +1135,26 @@ if __name__ == "__main__":
> print clusternode
> exitScript()
> # #######################################################################
> + # Verify they want to continue because this script will trigger sysrq events.
> + # #######################################################################
> + if (not cmdLineOpts.disableQuestions):
> + valid = {"yes":True, "y":True, "no":False, "n":False}
> + question = "This script will trigger a sysrq -t event or collect the data for each pid directory located in /proc for each run. Are you sure you want to continue?"
> + prompt = " [y/n] "
> + while True:
> + sys.stdout.write(question + prompt)
> + choice = raw_input().lower()
> + if (choice in valid):
> + if (valid.get(choice)):
> + # If yes, or y then exit loop and continue.
> + break
> + else:
> + message = "The script will not continue since you chose not to continue."
> + logging.getLogger(MAIN_LOGGER_NAME).error(message)
> + exitScript(removePidFile=True, errorCode=1)
> + else:
> + sys.stdout.write("Please respond with '(y)es' or '(n)o'.\n")
> + # #######################################################################
> # Create the output directory to verify it can be created before
> # proceeding unless it is already created from a previous run data needs
> # to be analyzed. Probably could add more debugging on if file or dir.
> @@ -1178,6 +1223,11 @@ if __name__ == "__main__":
> message = "Pass (%d/%d): Gathering general information about the host." %(i, cmdLineOpts.numberOfRuns)
> logging.getLogger(MAIN_LOGGER_NAME).debug(message)
> gatherGeneralInformation(pathToOutputRunDir)
> + # Write the clusternode name and id to the general information file.
> + writeToFile(os.path.join(pathToOutputRunDir, "hostinformation.txt"),
> + "NODE_NAME=%s\nNODE_ID=%d" %(clusternode.getClusterNodeName(), clusternode.getClusterNodeID()),
> + appendToFile=True, createFile=True)
> +
> # Going to sleep for 2 seconds, so that TIMESTAMP should be in the
> # past in the logs so that capturing sysrq data will be guaranteed.
> time.sleep(2)
More information about the Cluster-devel
mailing list