[Cluster-devel] [PATCH] qdiskd: (RHEL56) Don't write evictions if allow_kill is off

Lon Hohberger lhh at redhat.com
Wed Nov 10 14:39:26 UTC 2010


Previously, qdisk master would write an eviction notice to disk
for a hung qdisk node even if allow_kill was off, causing the
other node to reboot.

This patch causes the qdisk master to write S_NONE as the state
of hung nodes on-disk when allow_kill is off instead of S_EVICT.

So, when the node wakes up, it will read the S_NONE state and
take action based on that state instead of reading S_EVICT and
rebooting.

Because there is so much internal qdiskd state which would need
to be fixed on a node which is in this state (including rejoining
the qdisk membership), the only clean method to continue
operations is to restart qdiskd.

Resolves: rhbz#602731

Signed-off-by: Lon Hohberger <lhh at redhat.com>
---
 cman/qdisk/main.c |   80 +++++++++++++++++++++++++++++++++++++++++------------
 1 files changed, 62 insertions(+), 18 deletions(-)

diff --git a/cman/qdisk/main.c b/cman/qdisk/main.c
index 153b190..1eb10a6 100644
--- a/cman/qdisk/main.c
+++ b/cman/qdisk/main.c
@@ -109,17 +109,36 @@ node_info_init(node_info_t *ni, int max)
 }
 
 
+static void
+reincarnate(void)
+{
+	char buf[PATH_MAX];
+	char cmd[PATH_MAX];
+
+	clulog(LOG_CRIT, "Attempting to restart\n");
+
+	snprintf(buf, sizeof(buf), "/proc/%d/exe", getpid());
+	if (readlink(buf, cmd, sizeof(cmd)) < 0)
+		goto out_die;
+
+	execlp(cmd, cmd, NULL);
+out_die:
+	clulog(LOG_CRIT, "Unable to restart; dying.\n");
+	exit(-1);
+}
+
+
 /**
   Check to see if someone tried to evict us but we were out to lunch.
   Rare case; usually other nodes would put up the 'Undead' message and
   re-evict us.
  */
-void
+static int
 check_self(qd_ctx *ctx, status_block_t *sb)
 {
 	if (!sb->ps_updatenode ||
 	    (sb->ps_updatenode == ctx->qc_my_id)) {
-		return;
+		return 0;
 	}
 
 	/* I did not update this??! */
@@ -127,10 +146,16 @@ check_self(qd_ctx *ctx, status_block_t *sb)
 	case S_EVICT:
 		/* Someone told us to die. */
 		reboot(RB_AUTOBOOT);
+	case S_NONE:
+		return -1;
 	default:
-		clulog(LOG_EMERG, "Unhandled state: %d\n", sb->ps_state);
-		raise(SIGSTOP);
+		break;
 	}
+
+	clulog(LOG_EMERG, "Unhandled state: %d\n", sb->ps_state);
+	raise(SIGSTOP);
+
+	return -1;
 }
 
 
@@ -160,9 +185,11 @@ read_node_blocks(qd_ctx *ctx, node_info_t *ni, int max)
 		swab_status_block_t(sb);
 
 		if (sb->ps_nodeid == ctx->qc_my_id) {
-			check_self(ctx, sb);
+			if (check_self(ctx, sb) < 0)
+				reincarnate();
 			continue;
 		} 
+
 		/* message. */
 		memcpy(&(ni[x].ni_last_msg), &(ni[x].ni_msg),
 		       sizeof(ni[x].ni_last_msg));
@@ -278,17 +305,26 @@ check_transitions(qd_ctx *ctx, node_info_t *ni, int max, memb_mask_t mask)
 			   Write eviction notice if we're the master.
 			 */
 			if (ctx->qc_status == S_MASTER) {
-				clulog(LOG_NOTICE,
-				       "Writing eviction notice for node %d\n",
-				       ni[x].ni_status.ps_nodeid);
-				qd_write_status(ctx, ni[x].ni_status.ps_nodeid,
-						S_EVICT, NULL, NULL, NULL);
+
 				if (ctx->qc_flags & RF_ALLOW_KILL) {
+					clulog(LOG_NOTICE,
+					       "Writing eviction notice for node %d\n",
+					       ni[x].ni_status.ps_nodeid);
+					qd_write_status(ctx, ni[x].ni_status.ps_nodeid,
+							S_EVICT, NULL, NULL, NULL);
 					clulog(LOG_DEBUG, "Telling CMAN to "
 						"kill the node\n");
 					cman_kill_node(ctx->qc_ch,
 						ni[x].ni_status.ps_nodeid);
+				} else {
+					clulog(LOG_NOTICE,
+					       "Node %d should be evicted, but "
+					       "allow_kill is off\n",
+					       ni[x].ni_status.ps_nodeid);
+					qd_write_status(ctx, ni[x].ni_status.ps_nodeid,
+							S_NONE, NULL, NULL, NULL);
 				}
+
 			}
 
 			/* Clear our master mask for the node after eviction */
@@ -313,20 +349,28 @@ check_transitions(qd_ctx *ctx, node_info_t *ni, int max, memb_mask_t mask)
 			clulog(LOG_CRIT, "Node %d is undead.\n",
 			       ni[x].ni_status.ps_nodeid);
 
-			clulog(LOG_ALERT,
-			       "Writing eviction notice (again) for node %d\n",
-			       ni[x].ni_status.ps_nodeid);
-			qd_write_status(ctx, ni[x].ni_status.ps_nodeid,
-					S_EVICT, NULL, NULL, NULL);
-			ni[x].ni_status.ps_state = S_EVICT;
-
-			/* XXX Need to fence it again */
 			if (ctx->qc_flags & RF_ALLOW_KILL) {
+				clulog(LOG_ALERT,
+				       "Writing eviction notice (again) for node %d\n",
+				       ni[x].ni_status.ps_nodeid);
+				qd_write_status(ctx, ni[x].ni_status.ps_nodeid,
+						S_EVICT, NULL, NULL, NULL);
+				ni[x].ni_status.ps_state = S_EVICT;
+
+				/* XXX Need to fence it again */
 				clulog(LOG_DEBUG, "Telling CMAN to "
 					"kill the node\n");
 				cman_kill_node(ctx->qc_ch,
 					ni[x].ni_status.ps_nodeid);
+			} else {
+				/* administrator doesn't care */
+				clulog(LOG_DEBUG,
+				       "Ignoring zombie node %d since "
+				       "allow_kill is off\n",
+				       ni[x].ni_status.ps_nodeid);
+				ni[x].ni_evil_incarnation = 0;
 			}
+
 			continue;
 		}
 
-- 
1.7.2.3




More information about the Cluster-devel mailing list