[Cluster-devel] [PATCH] qdiskd: (RHEL56) Don't write evictions if allow_kill is off
Lon Hohberger
lhh at redhat.com
Wed Nov 10 14:39:26 UTC 2010
Previously, qdisk master would write an eviction notice to disk
for a hung qdisk node even if allow_kill was off, causing the
other node to reboot.
This patch causes the qdisk master to write S_NONE as the state
of hung nodes on-disk when allow_kill is off instead of S_EVICT.
So, when the node wakes up, it will read the S_NONE state and
take action based on that state instead of reading S_EVICT and
rebooting.
Because there is so much internal qdiskd state which would need
to be fixed on a node which is in this state (including rejoining
the qdisk membership), the only clean method to continue
operations is to restart qdiskd.
Resolves: rhbz#602731
Signed-off-by: Lon Hohberger <lhh at redhat.com>
---
cman/qdisk/main.c | 80 +++++++++++++++++++++++++++++++++++++++++------------
1 files changed, 62 insertions(+), 18 deletions(-)
diff --git a/cman/qdisk/main.c b/cman/qdisk/main.c
index 153b190..1eb10a6 100644
--- a/cman/qdisk/main.c
+++ b/cman/qdisk/main.c
@@ -109,17 +109,36 @@ node_info_init(node_info_t *ni, int max)
}
+static void
+reincarnate(void)
+{
+ char buf[PATH_MAX];
+ char cmd[PATH_MAX];
+
+ clulog(LOG_CRIT, "Attempting to restart\n");
+
+ snprintf(buf, sizeof(buf), "/proc/%d/exe", getpid());
+ if (readlink(buf, cmd, sizeof(cmd)) < 0)
+ goto out_die;
+
+ execlp(cmd, cmd, NULL);
+out_die:
+ clulog(LOG_CRIT, "Unable to restart; dying.\n");
+ exit(-1);
+}
+
+
/**
Check to see if someone tried to evict us but we were out to lunch.
Rare case; usually other nodes would put up the 'Undead' message and
re-evict us.
*/
-void
+static int
check_self(qd_ctx *ctx, status_block_t *sb)
{
if (!sb->ps_updatenode ||
(sb->ps_updatenode == ctx->qc_my_id)) {
- return;
+ return 0;
}
/* I did not update this??! */
@@ -127,10 +146,16 @@ check_self(qd_ctx *ctx, status_block_t *sb)
case S_EVICT:
/* Someone told us to die. */
reboot(RB_AUTOBOOT);
+ case S_NONE:
+ return -1;
default:
- clulog(LOG_EMERG, "Unhandled state: %d\n", sb->ps_state);
- raise(SIGSTOP);
+ break;
}
+
+ clulog(LOG_EMERG, "Unhandled state: %d\n", sb->ps_state);
+ raise(SIGSTOP);
+
+ return -1;
}
@@ -160,9 +185,11 @@ read_node_blocks(qd_ctx *ctx, node_info_t *ni, int max)
swab_status_block_t(sb);
if (sb->ps_nodeid == ctx->qc_my_id) {
- check_self(ctx, sb);
+ if (check_self(ctx, sb) < 0)
+ reincarnate();
continue;
}
+
/* message. */
memcpy(&(ni[x].ni_last_msg), &(ni[x].ni_msg),
sizeof(ni[x].ni_last_msg));
@@ -278,17 +305,26 @@ check_transitions(qd_ctx *ctx, node_info_t *ni, int max, memb_mask_t mask)
Write eviction notice if we're the master.
*/
if (ctx->qc_status == S_MASTER) {
- clulog(LOG_NOTICE,
- "Writing eviction notice for node %d\n",
- ni[x].ni_status.ps_nodeid);
- qd_write_status(ctx, ni[x].ni_status.ps_nodeid,
- S_EVICT, NULL, NULL, NULL);
+
if (ctx->qc_flags & RF_ALLOW_KILL) {
+ clulog(LOG_NOTICE,
+ "Writing eviction notice for node %d\n",
+ ni[x].ni_status.ps_nodeid);
+ qd_write_status(ctx, ni[x].ni_status.ps_nodeid,
+ S_EVICT, NULL, NULL, NULL);
clulog(LOG_DEBUG, "Telling CMAN to "
"kill the node\n");
cman_kill_node(ctx->qc_ch,
ni[x].ni_status.ps_nodeid);
+ } else {
+ clulog(LOG_NOTICE,
+ "Node %d should be evicted, but "
+ "allow_kill is off\n",
+ ni[x].ni_status.ps_nodeid);
+ qd_write_status(ctx, ni[x].ni_status.ps_nodeid,
+ S_NONE, NULL, NULL, NULL);
}
+
}
/* Clear our master mask for the node after eviction */
@@ -313,20 +349,28 @@ check_transitions(qd_ctx *ctx, node_info_t *ni, int max, memb_mask_t mask)
clulog(LOG_CRIT, "Node %d is undead.\n",
ni[x].ni_status.ps_nodeid);
- clulog(LOG_ALERT,
- "Writing eviction notice (again) for node %d\n",
- ni[x].ni_status.ps_nodeid);
- qd_write_status(ctx, ni[x].ni_status.ps_nodeid,
- S_EVICT, NULL, NULL, NULL);
- ni[x].ni_status.ps_state = S_EVICT;
-
- /* XXX Need to fence it again */
if (ctx->qc_flags & RF_ALLOW_KILL) {
+ clulog(LOG_ALERT,
+ "Writing eviction notice (again) for node %d\n",
+ ni[x].ni_status.ps_nodeid);
+ qd_write_status(ctx, ni[x].ni_status.ps_nodeid,
+ S_EVICT, NULL, NULL, NULL);
+ ni[x].ni_status.ps_state = S_EVICT;
+
+ /* XXX Need to fence it again */
clulog(LOG_DEBUG, "Telling CMAN to "
"kill the node\n");
cman_kill_node(ctx->qc_ch,
ni[x].ni_status.ps_nodeid);
+ } else {
+ /* administrator doesn't care */
+ clulog(LOG_DEBUG,
+ "Ignoring zombie node %d since "
+ "allow_kill is off\n",
+ ni[x].ni_status.ps_nodeid);
+ ni[x].ni_evil_incarnation = 0;
}
+
continue;
}
--
1.7.2.3
More information about the Cluster-devel
mailing list