[Cluster-devel] cluster/cman cman_tool/cman_tool.h cman_tool/j ...

pcaulfield at sourceware.org pcaulfield at sourceware.org
Wed Jan 30 15:46:42 UTC 2008


CVSROOT:	/cvs/cluster
Module name:	cluster
Changes by:	pcaulfield at sourceware.org	2008-01-30 15:46:41

Modified files:
	cman/cman_tool : cman_tool.h join.c 
	cman/daemon    : ais.c 
	cman/man       : cman_tool.8 

Log message:
	Improve startup error checking and logging.

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/cman_tool/cman_tool.h.diff?cvsroot=cluster&r1=1.14&r2=1.15
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/cman_tool/join.c.diff?cvsroot=cluster&r1=1.53&r2=1.54
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/daemon/ais.c.diff?cvsroot=cluster&r1=1.59&r2=1.60
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/man/cman_tool.8.diff?cvsroot=cluster&r1=1.15&r2=1.16

--- cluster/cman/cman_tool/cman_tool.h	2007/11/29 11:19:12	1.14
+++ cluster/cman/cman_tool/cman_tool.h	2008/01/30 15:46:41	1.15
@@ -56,6 +56,8 @@
 #define MAX_MCAST_NAME_LEN 256
 #define MAX_PATH_LEN 256
 
+#define DEBUG_STARTUP_ONLY 32
+
 enum format_opt
 {
 	FMT_NONE,
--- cluster/cman/cman_tool/join.c	2008/01/10 10:39:16	1.53
+++ cluster/cman/cman_tool/join.c	2008/01/30 15:46:41	1.54
@@ -2,7 +2,7 @@
 *******************************************************************************
 **
 **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
-**  Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2004-2008 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -51,22 +51,21 @@
 	setsid();
 }
 
-
 int join(commandline_t *comline)
 {
 	int i;
 	int envptr = 0;
+	int argvptr = 0;
 	char scratch[1024];
 	cman_handle_t h;
+	int status;
 	pid_t aisexec_pid;
 	int ctree;
 	int p[2];
 
-	if (!comline->noccs_opt)
-	{
+	if (!comline->noccs_opt) {
 		ctree = ccs_force_connect(NULL, 1);
-		if (ctree < 0)
-		{
+		if (ctree < 0) {
 			die("ccsd is not running\n");
 		}
 		ccs_disconnect(ctree);
@@ -79,7 +78,6 @@
 	if (h)
 		die("Node is already active");
 
-
 	/* Set up environment variables for override */
 	if (comline->multicast_addr) {
 		snprintf(scratch, sizeof(scratch), "CMAN_MCAST_ADDR=%s", comline->multicast_addr);
@@ -117,27 +115,29 @@
 		snprintf(scratch, sizeof(scratch), "CMAN_2NODE=true");
 		envp[envptr++] = strdup(scratch);
 	}
-	if (comline->verbose) {
+	if (comline->verbose ^ DEBUG_STARTUP_ONLY) {
 		snprintf(scratch, sizeof(scratch), "CMAN_DEBUGLOG=%d", comline->verbose);
 		envp[envptr++] = strdup(scratch);
 	}
 	if (comline->noccs_opt) {
-		snprintf(scratch, sizeof(scratch), "CMAN_NOCCS=TRUE");
-		envp[envptr++] = strdup(scratch);
+		envp[envptr++] = strdup("CMAN_NOCCS=true");
+		envp[envptr++] = strdup("OPENAIS_DEFAULT_CONFIG_IFACE=cmanpreconfig");
+	}
+	else {
+		envp[envptr++] = strdup("OPENAIS_DEFAULT_CONFIG_IFACE=cmanconfig");
 	}
-
-	/* Use cman to configure services */
-	envp[envptr++] = strdup("OPENAIS_DEFAULT_CONFIG_IFACE=cmanconfig");
 
 	/* Create a pipe to monitor cman startup progress */
 	pipe(p);
 	fcntl(p[1], F_SETFD, 0); /* Don't close on exec */
 	snprintf(scratch, sizeof(scratch), "CMAN_PIPE=%d", p[1]);
 	envp[envptr++] = strdup(scratch);
-
 	envp[envptr++] = NULL;
 
 	argv[0] = "aisexec";
+	if (comline->verbose & ~DEBUG_STARTUP_ONLY)
+		argv[++argvptr] = "-f";
+	argv[++argvptr] = NULL;
 
 	/* Fork/exec cman */
 	switch ( (aisexec_pid = fork()) )
@@ -145,18 +145,28 @@
 	case -1:
 		die("fork of aisexec daemon failed: %s", strerror(errno));
 
-	case 0: // child
+	case 0: /* child */
 		close(p[0]);
-		be_daemon(!comline->verbose);
+		if (comline->verbose & DEBUG_STARTUP_ONLY) {
+			fprintf(stderr, "Starting %s", AISEXECBIN);
+			for (i=0; i< argvptr; i++) {
+				fprintf(stderr, " %s", argv[i]);
+			}
+			fprintf(stderr, "\n");
+			for (i=0; i<envptr-1; i++) {
+				fprintf(stderr, "%s\n", envp[i]);
+			}
+		}
+		be_daemon(!(comline->verbose & ~DEBUG_STARTUP_ONLY));
 		execve(AISEXECBIN, argv, envp);
 
-		// exec failed - tell the parent process */
+		/* exec failed - tell the parent process */
 		sprintf(scratch, "execve of " AISEXECBIN " failed: %s", strerror(errno));
 		write(p[1], scratch, strlen(scratch));
 		exit(1);
 		break;
 
-	default: //parent
+	default: /* parent */
 		break;
 
 	}
@@ -164,10 +174,12 @@
 	/* Give the daemon a chance to start up, and monitor the pipe FD for messages */
 	i = 0;
 	close(p[1]);
+
+	/* Wait for the process to start or die */
+	sleep(1);
 	do {
 		fd_set fds;
 		struct timeval tv={1, 0};
-		int status;
 		char message[1024];
 
 		FD_ZERO(&fds);
@@ -177,31 +189,69 @@
 
 		/* Did we get an error? */
 		if (status == 1) {
-			if (read(p[0], message, sizeof(message)) != 0) {
-				fprintf(stderr, "cman not started: %s\n", message);
+			int len;
+			if ((len = read(p[0], message, sizeof(message)) > 0)) {
+
+				/* Success! get the new PID of double-forked aisexec */
+				if (sscanf(message, "SUCCESS: %d", &aisexec_pid) == 1) {
+					if (comline->verbose & DEBUG_STARTUP_ONLY)
+						fprintf(stderr, "aisexec running, process ID is %d\n", aisexec_pid);
+					status = 0;
+				}
+				else {
+					fprintf(stderr, "cman not started: %s\n", message);
+				}
 				break;
 			}
-			else {
+			else if (len < 0 && errno == EINTR) {
+				continue;
+			}
+			else { /* Error or EOF - check the child status */
 				int pidstatus;
-				if (waitpid(aisexec_pid, &pidstatus, WNOHANG) == 0 && pidstatus != 0)
-					fprintf(stderr, "cman died with status: %d\n", WEXITSTATUS(pidstatus));
-				else
+				status = waitpid(aisexec_pid, &pidstatus, WNOHANG);
+				if (status == -1 && errno == ECHILD) {
+					fprintf(stderr, "cman not started\n");
+					break;
+				}
+				if (status == 0 && pidstatus != 0) {
+					if (WIFEXITED(pidstatus))
+						fprintf(stderr, "aisexec died with status: %d\n", WEXITSTATUS(pidstatus));
+					if (WIFSIGNALED(pidstatus))
+						fprintf(stderr, "aisexec died with signal: %d\n", WTERMSIG(pidstatus));
+					status = -1;
+					break;
+				}
+				else {
 					status = 0; /* Try to connect */
+				}
 			}
 		}
-		if (status == 0) {
-			h = cman_admin_init(NULL);
-			if (!h && comline->verbose)
-			{
-				fprintf(stderr, "waiting for aisexec to start\n");
+
+	} while (status != 0);
+	close(p[0]);
+
+	/* If aisexec has started, try to connect to cman ... if it's still there */
+	if (status == 0) {
+		do {
+			if (status == 0) {
+				if (kill(aisexec_pid, 0) < 0) {
+					die("aisexec died during startup\n");
+				}
+
+				h = cman_admin_init(NULL);
+				if (!h && comline->verbose & DEBUG_STARTUP_ONLY)
+				{
+					fprintf(stderr, "waiting for aisexec to start\n");
+				}
 			}
-		}
-	} while (!h && ++i < 100);
+			sleep (1);
+		} while (!h && ++i < 100);
+	}
 
 	if (!h)
 		die("aisexec daemon didn't start");
 
-	if (comline->verbose && !cman_is_active(h))
+	if ((comline->verbose & DEBUG_STARTUP_ONLY) && !cman_is_active(h))
 		fprintf(stderr, "aisexec started, but not joined the cluster yet.\n");
 
 	cman_finish(h);
--- cluster/cman/daemon/ais.c	2008/01/02 16:35:44	1.59
+++ cluster/cman/daemon/ais.c	2008/01/30 15:46:41	1.60
@@ -249,6 +249,7 @@
 static int cman_exec_init_fn(struct objdb_iface_ver0 *objdb)
 {
 	unsigned int object_handle;
+	char pipe_msg[256];
 
 	/* We can only work if our config interface was run first */
 	if (!config_run)
@@ -273,7 +274,9 @@
 	/* Open local sockets and initialise I/O queues */
 	cman_init();
 
-	/* Let cman_tool know we are running */
+	/* Let cman_tool know we are running and our PID */
+	sprintf(pipe_msg,"SUCCESS: %d", getpid());
+	write_cman_pipe(pipe_msg);
 	close(startup_pipe);
 	startup_pipe = 0;
 
--- cluster/cman/man/cman_tool.8	2007/11/29 11:19:12	1.15
+++ cluster/cman/man/cman_tool.8	2008/01/30 15:46:41	1.16
@@ -290,6 +290,8 @@
 .br
 16 Interaction with OpenAIS
 .br
+32 Startup debugging (cman_tool join operations only)
+.br
 .SH NOTES
 .br
 the 




More information about the Cluster-devel mailing list