[Cluster-devel] cluster/rgmanager ChangeLog include/resgroup.h ...
lhh at sourceware.org
lhh at sourceware.org
Wed Jun 27 14:03:53 UTC 2007
CVSROOT: /cvs/cluster
Module name: cluster
Changes by: lhh at sourceware.org 2007-06-27 14:03:52
Modified files:
rgmanager : ChangeLog
rgmanager/include: resgroup.h reslist.h
rgmanager/src/clulib: rg_strings.c
rgmanager/src/daemons: groups.c main.c nodeevent.c restree.c
rg_state.c rg_thread.c test.c
rgmanager/src/resources: vm.sh
Log message:
Merge from RHEL5 branch
Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/ChangeLog.diff?cvsroot=cluster&r1=1.48&r2=1.49
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/resgroup.h.diff?cvsroot=cluster&r1=1.22&r2=1.23
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/reslist.h.diff?cvsroot=cluster&r1=1.20&r2=1.21
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/clulib/rg_strings.c.diff?cvsroot=cluster&r1=1.8&r2=1.9
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/groups.c.diff?cvsroot=cluster&r1=1.33&r2=1.34
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/main.c.diff?cvsroot=cluster&r1=1.39&r2=1.40
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/nodeevent.c.diff?cvsroot=cluster&r1=1.6&r2=1.7
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/restree.c.diff?cvsroot=cluster&r1=1.33&r2=1.34
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/rg_state.c.diff?cvsroot=cluster&r1=1.35&r2=1.36
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/rg_thread.c.diff?cvsroot=cluster&r1=1.21&r2=1.22
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/test.c.diff?cvsroot=cluster&r1=1.9&r2=1.10
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/vm.sh.diff?cvsroot=cluster&r1=1.4&r2=1.5
--- cluster/rgmanager/ChangeLog 2007/06/21 18:39:08 1.48
+++ cluster/rgmanager/ChangeLog 2007/06/27 14:03:51 1.49
@@ -1,3 +1,30 @@
+2007-06-27 Lon Hohberger <lhh at redhat.com>
+ * Merge from RHEL5 branch.
+ * src/daemons/vm.sh: Un-break migrate (#231692). Make status
+ checks happen every 30 seconds instead of 30 minutes.
+ * include/resgroup.h: Move inline recovery flags to a header file,
+ add RG_STATUS_INQUIRY for locating virtual machines which may have
+ migrated.
+ * include/reslist.h: Change res_exec() back to using agent_op_str()
+ inline so we can squelch errors while performing RG_STATUS_INQUIRY
+ * src/clulib/rg_strings.c: Add new strings for new error code /
+ request types
+ * src/daemons/groups.c: Change group_migrate() to use the correct
+ calling semantics
+ * src/daemons/main.c, nodeevent.c: Clean up cases which could cause
+ #244143
+ * src/daemons/resrules.c: Clear up noise
+ * src/daemons/restree.c: Squelch errors during RG_STATUS_INQUIRY
+ Patch up inline service recovery (#229650)
+ * src/daemons/rg_state.c: Don't let migrations or relocations to a
+ node running exclusive services occur in the first place and return
+ a useful error. (goes with #237144). Locate virtual machines (or
+ generally, services with the 'migrate' ability) elsewhere in the
+ cluster prior to trying to start one. Detect if someone migrates
+ such a service without using the cluster tools (#232300)
+ * src/daemons/test.c: Make rg_test do the right thing for migrate
+ operations
+
2007-06-21 Fabio M. Di Nitto <fabbione at ubuntu.com>
* rgmanager/src/clulib/alloc.c: Undefine DEBUG when building on IA64.
The __builtin_address functionality should be taken from libunwind
--- cluster/rgmanager/include/resgroup.h 2007/06/14 19:08:57 1.22
+++ cluster/rgmanager/include/resgroup.h 2007/06/27 14:03:51 1.23
@@ -98,6 +98,7 @@
#define RG_MIGRATE 22
#define RG_FREEZE 23
#define RG_UNFREEZE 24
+#define RG_STATUS_INQUIRY 25
#define RG_NONE 999
const char *rg_req_str(int req);
@@ -143,6 +144,7 @@
int svc_start(char *svcName, int req);
int svc_stop(char *svcName, int error);
int svc_status(char *svcName);
+int svc_status_inquiry(char *svcName);
int svc_disable(char *svcName);
int svc_fail(char *svcName);
int svc_freeze(char *svcName);
@@ -188,6 +190,8 @@
int my_id(void);
/* Return codes */
+#define RG_EFENCE -13 /* Fencing operation pending */
+#define RG_ENODE -12 /* Node is dead/nonexistent */
#define RG_EFROZEN -11 /* Service is frozen */
#define RG_ERUN -10 /* Service is already running */
#define RG_EQUORUM -9 /* Operation requires quorum */
@@ -221,6 +225,12 @@
#define FOD_RESTRICTED (1<<1)
#define FOD_NOFAILBACK (1<<2)
+/*
+ Status tree flags
+ */
+#define SFL_FAILURE (1<<0)
+#define SFL_RECOVERABLE (1<<1)
+
//#define DEBUG
#ifdef DEBUG
--- cluster/rgmanager/include/reslist.h 2007/05/31 19:08:14 1.20
+++ cluster/rgmanager/include/reslist.h 2007/06/27 14:03:51 1.21
@@ -144,7 +144,7 @@
int res_status(resource_node_t **tree, resource_t *res, void *ret);
int res_condstart(resource_node_t **tree, resource_t *res, void *ret);
int res_condstop(resource_node_t **tree, resource_t *res, void *ret);
-int res_exec(resource_node_t *node, const char *op, const char *arg, int depth);
+int res_exec(resource_node_t *node, int op, const char *arg, int depth);
/*int res_resinfo(resource_node_t **tree, resource_t *res, void *ret);*/
int expand_time(char *val);
int store_action(resource_act_t **actsp, char *name, int depth, int timeout, int interval);
--- cluster/rgmanager/src/clulib/rg_strings.c 2007/04/27 18:10:10 1.8
+++ cluster/rgmanager/src/clulib/rg_strings.c 2007/06/27 14:03:51 1.9
@@ -26,6 +26,8 @@
const struct string_val rg_error_strings[] = {
+ { RG_EFENCE, "Fencing operation pending; try again later" },
+ { RG_ENODE, "Target node dead / nonexistent" },
{ RG_ERUN, "Service is already running" },
{ RG_EQUORUM, "Operation requires quorum" },
{ RG_EINVAL, "Invalid operation for resource" },
@@ -68,6 +70,7 @@
{RG_UNLOCK, "unlocking"},
{RG_QUERY_LOCK, "lock status inquiry"},
{RG_MIGRATE, "migrate"},
+ {RG_STATUS_INQUIRY, "out of band service status inquiry"},
{RG_NONE, "none"},
{0, NULL}
};
@@ -182,5 +185,6 @@
const char *
agent_op_str(int val)
{
+ printf("searching agent_ops for %d\n", val);
return rg_search_table(agent_ops, val);
}
--- cluster/rgmanager/src/daemons/groups.c 2007/05/31 19:08:14 1.33
+++ cluster/rgmanager/src/daemons/groups.c 2007/06/27 14:03:51 1.34
@@ -896,7 +896,7 @@
}
clulog(LOG_NOTICE, "Migrating %s to %s\n", groupname, tgt_name);
- ret = res_exec(rn, agent_op_str(RS_MIGRATE), tgt_name, 0);
+ ret = res_exec(rn, RS_MIGRATE, tgt_name, 0);
if (ret == 0) {
clulog(LOG_NOTICE,
"Migration of %s to %s completed\n",
--- cluster/rgmanager/src/daemons/main.c 2007/06/14 15:06:51 1.39
+++ cluster/rgmanager/src/daemons/main.c 2007/06/27 14:03:51 1.40
@@ -617,10 +617,12 @@
clulog(LOG_WARNING, "#67: Shutting down uncleanly\n");
rg_set_inquorate();
rg_doall(RG_INIT, 1, "Emergency stop of %s");
+ rg_set_uninitialized();
#if defined(LIBCMAN_VERSION) && LIBCMAN_VERSION >= 2
/* cman_replyto_shutdown() */
#endif
- exit(0);
+ running = 0;
+ break;
}
return ret;
@@ -700,6 +702,9 @@
}
}
+ if (!running)
+ return 0;
+
if (need_reconfigure || check_config_update()) {
need_reconfigure = 0;
configure_logging(-1, 0);
@@ -985,7 +990,8 @@
}
}
- cleanup(cluster_ctx);
+ if (rg_initialized())
+ cleanup(cluster_ctx);
clulog(LOG_NOTICE, "Shutdown complete, exiting\n");
clu_lock_finished(rgmanager_lsname);
cman_finish(clu);
--- cluster/rgmanager/src/daemons/nodeevent.c 2007/03/27 19:33:20 1.6
+++ cluster/rgmanager/src/daemons/nodeevent.c 2007/06/27 14:03:51 1.7
@@ -72,8 +72,10 @@
if (local) {
/* Local Node Event */
- if (nodeStatus == 0)
+ if (nodeStatus == 0) {
+ clulog(LOG_ERR, "Exiting uncleanly\n");
hard_exit();
+ }
if (!rg_initialized()) {
if (init_resource_groups(0) != 0) {
--- cluster/rgmanager/src/daemons/restree.c 2007/06/13 20:32:41 1.33
+++ cluster/rgmanager/src/daemons/restree.c 2007/06/27 14:03:51 1.34
@@ -39,10 +39,6 @@
void malloc_zap_mutex(void);
#endif
-#define FL_FAILURE 0x1
-#define FL_RECOVERABLE 0x2
-
-
/* XXX from resrules.c */
int store_childtype(resource_child_t **childp, char *name, int start,
int stop, int forbid, int flags);
@@ -335,12 +331,13 @@
@see build_env
*/
int
-res_exec(resource_node_t *node, const char *op, const char *arg, int depth)
+res_exec(resource_node_t *node, int op, const char *arg, int depth)
{
int childpid, pid;
int ret = 0;
char **env = NULL;
resource_t *res = node->rn_resource;
+ const char *op_str = agent_op_str(op);
char fullpath[2048];
if (!res->r_rule->rr_agent)
@@ -354,7 +351,7 @@
#ifdef NO_CCS
if (_no_op_mode_) {
- printf("[%s] %s:%s\n", op, res->r_rule->rr_type,
+ printf("[%s] %s:%s\n", op_str, res->r_rule->rr_type,
res->r_attrs->ra_value);
return 0;
}
@@ -392,9 +389,9 @@
restore_signals();
if (arg)
- execle(fullpath, fullpath, op, arg, NULL, env);
+ execle(fullpath, fullpath, op_str, arg, NULL, env);
else
- execle(fullpath, fullpath, op, NULL, env);
+ execle(fullpath, fullpath, op_str, NULL, env);
}
#ifdef DEBUG
@@ -411,10 +408,16 @@
ret = WEXITSTATUS(ret);
+#ifndef NO_CCS
+ if ((op == RS_STATUS &&
+ node->rn_state == RES_STARTED && ret) ||
+ (op != RS_STATUS && ret)) {
+#else
if (ret) {
+#endif
clulog(LOG_NOTICE,
"%s on %s \"%s\" returned %d (%s)\n",
- op, res->r_rule->rr_type,
+ op_str, res->r_rule->rr_type,
res->r_attrs->ra_value, ret,
ocf_strerror(ret));
}
@@ -864,7 +867,7 @@
rule->rr_childtypes[x].rc_name,
ret, op);
- if (rv & FL_FAILURE && op != RS_STOP)
+ if (rv & SFL_FAILURE && op != RS_STOP)
return rv;
}
@@ -911,7 +914,7 @@
list_for(&node->rn_child, child, y) {
rv |= _xx_child_internal(node, first, child, ret, op);
- if (rv & FL_FAILURE)
+ if (rv & SFL_FAILURE)
return rv;
}
} else {
@@ -957,7 +960,7 @@
if (op == RS_START || op == RS_STATUS) {
rv = _do_child_levels(tree, first, ret, op);
- if (rv & FL_FAILURE)
+ if (rv & SFL_FAILURE)
return rv;
/* Start default level after specified ones */
@@ -1016,12 +1019,6 @@
if (strcmp(node->rn_actions[x].ra_name, "status"))
continue;
- /* If a status check has never been done, reset its status. */
- if (!node->rn_actions[x].ra_last) {
- node->rn_actions[x].ra_last = now;
- continue;
- }
-
delta = now - node->rn_actions[x].ra_last;
/*
@@ -1067,7 +1064,8 @@
node->rn_actions[idx].ra_depth,
(int)node->rn_actions[idx].ra_interval);*/
- if ((x = res_exec(node, agent_op_str(RS_STATUS), NULL,
+ node->rn_actions[idx].ra_last = now;
+ if ((x = res_exec(node, RS_STATUS, NULL,
node->rn_actions[idx].ra_depth)) == 0)
return 0;
@@ -1075,7 +1073,7 @@
return x;
/* Strange/failed status. Try to recover inline. */
- if ((x = res_exec(node, agent_op_str(RS_RECOVER), NULL, 0)) == 0)
+ if ((x = res_exec(node, RS_RECOVER, NULL, 0)) == 0)
return 0;
return x;
@@ -1163,7 +1161,7 @@
char *type, void *__attribute__((unused))ret, int realop,
resource_node_t *node)
{
- int rv, me, op;
+ int rv = 0, me, op;
/* Restore default operation. */
op = realop;
@@ -1217,10 +1215,10 @@
if (me && (op == RS_START)) {
node->rn_flags &= ~RF_NEEDSTART;
- rv = res_exec(node, agent_op_str(op), NULL, 0);
+ rv = res_exec(node, op, NULL, 0);
if (rv != 0) {
node->rn_state = RES_FAILED;
- return FL_FAILURE;
+ return SFL_FAILURE;
}
set_time("start", 0, node);
@@ -1248,9 +1246,9 @@
resources of this node must be restarted,
but siblings of this node are not affected. */
if (node->rn_flags & RF_INDEPENDENT)
- return FL_RECOVERABLE;
+ return SFL_RECOVERABLE;
- return FL_FAILURE;
+ return SFL_FAILURE;
}
}
@@ -1266,20 +1264,20 @@
does not matter: its dependent children must
also be independent of this node's siblings. */
if (node->rn_flags & RF_INDEPENDENT)
- return FL_RECOVERABLE;
+ return SFL_RECOVERABLE;
- return FL_FAILURE;
+ return SFL_FAILURE;
}
}
/* Stop should occur after children have stopped */
if (me && (op == RS_STOP)) {
node->rn_flags &= ~RF_NEEDSTOP;
- rv = res_exec(node, agent_op_str(op), NULL, 0);
+ rv = res_exec(node, op, NULL, 0);
if (rv != 0) {
node->rn_state = RES_FAILED;
- return FL_FAILURE;
+ return SFL_FAILURE;
}
if (node->rn_state != RES_STOPPED) {
@@ -1292,7 +1290,7 @@
//node->rn_resource->r_rule->rr_type,
//primary_attr_value(node->rn_resource));
- return 0;
+ return rv;
}
@@ -1332,12 +1330,12 @@
/* If we hit a problem during a 'status' op in an
independent subtree, rv will have the
- FL_RECOVERABLE bit set, but not FL_FAILURE.
- If we ever hit FL_FAILURE during a status
+ SFL_RECOVERABLE bit set, but not SFL_FAILURE.
+ If we ever hit SFL_FAILURE during a status
operation, we're *DONE* - even if the subtree
is flagged w/ indy-subtree */
- if (rv & FL_FAILURE)
+ if (rv & SFL_FAILURE)
return rv;
}
}
@@ -1411,33 +1409,7 @@
int
res_status(resource_node_t **tree, resource_t *res, void *ret)
{
- int rv;
- rv = _res_op(tree, res, NULL, ret, RS_STATUS);
-
- if (rv == 0)
- return 0;
-
- if (rv & FL_FAILURE)
- return rv;
-
- clulog(LOG_WARNING, "Some independent resources in %s:%s failed; "
- "Attempting inline recovery\n",
- res->r_rule->rr_type, res->r_attrs->ra_value);
-
- rv = res_condstop(tree, res, ret);
- if (rv & FL_FAILURE)
- goto out_fail;
- rv = res_condstart(tree, res, ret);
- if (rv & FL_FAILURE)
- goto out_fail;
-
- clulog(LOG_NOTICE, "Inline recovery of %s:%s successful\n",
- res->r_rule->rr_type, res->r_attrs->ra_value);
- return 0;
-out_fail:
- clulog(LOG_WARNING, "Inline recovery of %s:%s failed\n",
- res->r_rule->rr_type, res->r_attrs->ra_value);
- return 1;
+ return _res_op(tree, res, NULL, ret, RS_STATUS);
}
--- cluster/rgmanager/src/daemons/rg_state.c 2007/06/25 16:49:28 1.35
+++ cluster/rgmanager/src/daemons/rg_state.c 2007/06/27 14:03:51 1.36
@@ -36,6 +36,10 @@
#include <rg_queue.h>
#include <msgsimple.h>
+/* XXX - copied :( */
+#define cn_svccount cn_address.cna_address[0] /* Theses are uint8_t size */
+#define cn_svcexcl cn_address.cna_address[1]
+
int node_should_start_safe(uint32_t, cluster_member_list_t *, char *);
int next_node_id(cluster_member_list_t *membership, int me);
@@ -50,6 +54,10 @@
int group_migratory(char *servicename, int lock);
int have_exclusive_resources(void);
int check_exclusive_resources(cluster_member_list_t *membership, char *svcName);
+static int msvc_check_cluster(char *svcName);
+static inline int handle_started_status(char *svcName, int ret, rg_state_t *svcStatus);
+static inline int handle_migrate_status(char *svcName, int ret, rg_state_t *svcStatus);
+int count_resource_groups_local(cman_node_t *mp);
int
@@ -837,10 +845,27 @@
struct dlm_lksb lockp;
rg_state_t svcStatus;
int ret;
+ cluster_member_list_t *membership;
+ cman_node_t *m;
if (!group_migratory(svcName, 1))
return RG_EINVAL;
+ membership = member_list();
+ m = memb_id_to_p(membership, target);
+ if (!m) {
+ free_member_list(membership);
+ return RG_EINVAL;
+ }
+
+ count_resource_groups_local(m);
+ if (m->cn_svcexcl) {
+ free_member_list(membership);
+ return RG_EDEPEND;
+ }
+ free_member_list(membership);
+
+
if (rg_lock(svcName, &lockp) < 0) {
clulog(LOG_ERR, "#45: Unable to obtain cluster lock: %s\n",
strerror(errno));
@@ -905,6 +930,129 @@
/**
+ * Ask the other nodes if they've seen this service. This can be used
+ * to allow users the ability to use non-rgmanager tools to migrate
+ * a virtual machine to another node in the cluster.
+ *
+ * Returns the node ID of the new owner, if any. -1 if no one in the
+ * cluster has seen the service.
+ */
+int
+get_new_owner(char *svcName)
+{
+ SmMessageSt msgp, response;
+ msgctx_t ctx;
+ cluster_member_list_t *membership;
+ int x, ret = -1, me = my_id();
+
+ /* Build message */
+ msgp.sm_hdr.gh_magic = GENERIC_HDR_MAGIC;
+ msgp.sm_hdr.gh_command = RG_ACTION_REQUEST;
+ msgp.sm_hdr.gh_arg1 = RG_STATUS_INQUIRY;
+ msgp.sm_hdr.gh_length = sizeof(msgp);
+ msgp.sm_data.d_action = RG_STATUS_INQUIRY;
+ strncpy(msgp.sm_data.d_svcName, svcName,
+ sizeof(msgp.sm_data.d_svcName));
+ msgp.sm_data.d_svcOwner = 0;
+ msgp.sm_data.d_ret = 0;
+
+ swab_SmMessageSt(&msgp);
+
+ membership = member_list();
+ for (x = 0; x < membership->cml_count && ret < 0; x++) {
+
+ /* don't query down members */
+ if (!membership->cml_members[x].cn_member)
+ continue;
+ /* don't query self */
+ if (membership->cml_members[x].cn_nodeid == me)
+ continue;
+
+ if (msg_open(MSG_CLUSTER, membership->cml_members[x].cn_nodeid,
+ RG_PORT, &ctx, 2) < 0) {
+ /* failed to open: better to claim false successful
+ status rather than claim a failure and possibly
+ end up with a service on >1 node */
+ goto out;
+ }
+
+ msg_send(&ctx, &msgp, sizeof(msgp));
+ msg_receive(&ctx, &response, sizeof (response), 5);
+
+ swab_SmMessageSt(&response);
+ if (response.sm_data.d_ret == RG_SUCCESS)
+ ret = response.sm_data.d_svcOwner;
+ else
+ ret = -1;
+
+ msg_close(&ctx);
+ }
+
+out:
+ free_member_list(membership);
+
+ return ret;
+}
+
+
+/**
+ If a service is 'migratory' - that is, it has the 'migratory' attribute
+ and has no children, this will query other nodes in the cluster, checking
+ to see if the service has migrated to that node using a status inquiry
+ message. Note that this is a very inefficient thing to do; it would be
+ much, much better to simply use the cluster tools to migrate rather than
+ using the standard management tools for the service/virtual machine.
+ */
+static int
+msvc_check_cluster(char *svcName)
+{
+ struct dlm_lksb lockp;
+ int newowner;
+ rg_state_t svcStatus;
+
+ if (!group_migratory(svcName, 1))
+ return -1;
+
+ newowner = get_new_owner(svcName);
+ if (newowner < 0) {
+ clulog(LOG_DEBUG, "No other nodes have seen %s\n", svcName);
+ return -1;
+ }
+
+ /* New owner found */
+ clulog(LOG_NOTICE, "Migration: %s is running on %d\n", svcName, newowner);
+
+ /* If the check succeeds (returns 0), then flip the state back to
+ 'started' - with a new owner */
+ if (rg_lock(svcName, &lockp) < 0) {
+ clulog(LOG_ERR, "#451: Unable to obtain cluster lock: %s\n",
+ strerror(errno));
+ return -1;
+ }
+
+ if (get_rg_state(svcName, &svcStatus) != 0) {
+ rg_unlock(&lockp);
+ clulog(LOG_ERR, "#452: Failed getting status for RG %s\n",
+ svcName);
+ return -1;
+ }
+
+ svcStatus.rs_state = RG_STATE_STARTED;
+ svcStatus.rs_owner = newowner;
+
+ if (set_rg_state(svcName, &svcStatus) != 0) {
+ rg_unlock(&lockp);
+ clulog(LOG_ERR, "#453: Failed setting status for RG %s\n",
+ svcName);
+ return -1;
+ }
+ rg_unlock(&lockp);
+
+ return newowner;
+}
+
+
+/**
* Check status of a cluster service
*
* @param svcName Service name to check.
@@ -946,14 +1094,58 @@
ret = group_op(svcName, RG_STATUS);
- /* For running services, just check the return code */
+ /* For running services, if the return code is 0, we're done*/
if (svcStatus.rs_state == RG_STATE_STARTED)
- return ret;
+ return handle_started_status(svcName, ret, &svcStatus);
+
+ return handle_migrate_status(svcName, ret, &svcStatus);
+}
+
+
+static inline int
+handle_started_status(char *svcName, int ret, rg_state_t *svcStatus)
+{
+ if (ret & SFL_FAILURE) {
+ ret = msvc_check_cluster(svcName);
+ if (ret >= 0)
+ return 1;
+ }
+
+ /* Ok, we have a recoverable service. Try to perform
+ inline recovery */
+ if (ret & SFL_RECOVERABLE) {
+
+ clulog(LOG_WARNING, "Some independent resources in %s failed; "
+ "Attempting inline recovery\n", svcName);
+ ret = group_op(svcName, RG_CONDSTOP);
+ if (!(ret & SFL_FAILURE)) {
+ ret = group_op(svcName, RG_CONDSTART);
+ }
+
+ if (ret) {
+ clulog(LOG_WARNING, "Inline recovery of %s failed\n",
+ svcName);
+ } else {
+ clulog(LOG_NOTICE,
+ "Inline recovery of %s succeeded\n",
+ svcName);
+ return 0;
+ }
+ }
+
+ return ret;
+}
+
+
+static inline int
+handle_migrate_status(char *svcName, int ret, rg_state_t *svcStatus)
+{
+ struct dlm_lksb lockp;
/* For service(s) migrating to the local node, ignore invalid
return codes.
XXX Should put a timeout on migrating services */
- if (ret < 0)
+ if (ret != 0)
return 0;
/* If the check succeeds (returns 0), then flip the state back to
@@ -964,8 +1156,8 @@
return RG_EFAIL;
}
- svcStatus.rs_state = RG_STATE_STARTED;
- if (set_rg_state(svcName, &svcStatus) != 0) {
+ svcStatus->rs_state = RG_STATE_STARTED;
+ if (set_rg_state(svcName, svcStatus) != 0) {
rg_unlock(&lockp);
clulog(LOG_ERR, "#46: Failed getting status for RG %s\n",
svcName);
@@ -1417,8 +1609,10 @@
int *new_owner)
{
cluster_member_list_t *allowed_nodes, *backup = NULL;
+ cman_node_t *m;
int target = preferred_target, me = my_id();
int ret, x;
+ rg_state_t svcStatus;
/*
* Stop the service - if we haven't already done so.
@@ -1436,9 +1630,22 @@
return RG_EFORWARD;
}
- if (preferred_target >= 0) {
+ if (preferred_target > 0) {
allowed_nodes = member_list();
+ m = memb_id_to_p(allowed_nodes, preferred_target);
+ if (!m) {
+ free_member_list(allowed_nodes);
+ return RG_EINVAL;
+ }
+
+ /* Avoid even bothering the other node if we can */
+ count_resource_groups_local(m);
+ if (m->cn_svcexcl) {
+ free_member_list(allowed_nodes);
+ return RG_EDEPEND;
+ }
+
/*
Mark everyone except me and the preferred target DOWN for now
If we can't start it on the preferred target, then we'll try
@@ -1472,7 +1679,6 @@
if (target == me && me != preferred_target)
goto exhausted;
-
if (target == me) {
/*
Relocate to self. Don't send a network request
@@ -1508,7 +1714,7 @@
//count_resource_groups(allowed_nodes);
}
- if (preferred_target >= 0)
+ if (preferred_target > 0)
memb_mark_down(allowed_nodes, preferred_target);
memb_mark_down(allowed_nodes, me);
@@ -1517,7 +1723,16 @@
if (target == me)
goto exhausted;
- switch (relocate_service(svcName, request, target)) {
+ ret = relocate_service(svcName, request, target);
+ switch (ret) {
+ case RG_ERUN:
+ /* Someone stole the service while we were
+ trying to relo it */
+ get_rg_state_local(svcName, &svcStatus);
+ *new_owner = svcStatus.rs_owner;
+ free_member_list(allowed_nodes);
+ return 0;
+ case RG_EDEPEND:
case RG_EFAIL:
memb_mark_down(allowed_nodes, target);
continue;
@@ -1525,12 +1740,17 @@
svc_report_failure(svcName);
free_member_list(allowed_nodes);
return RG_EFAIL;
+ default:
+ /* deliberate fallthrough */
+ clulog(LOG_ERR,
+ "#61: Invalid reply from member %d during"
+ " relocate operation!\n", target);
case RG_NO:
/* state uncertain */
free_member_list(allowed_nodes);
- clulog(LOG_DEBUG, "State Uncertain: svc:%s "
- "nid:%08x req:%d\n", svcName,
- target, request);
+ clulog(LOG_CRIT, "State Uncertain: svc:%s "
+ "nid:%d req:%s ret:%d\n", svcName,
+ target, rg_req_str(request), ret);
return 0;
case 0:
*new_owner = target;
@@ -1538,10 +1758,6 @@
"on member %d\n", svcName, (int)target);
free_member_list(allowed_nodes);
return 0;
- default:
- clulog(LOG_ERR,
- "#61: Invalid reply from member %d during"
- " relocate operation!\n", target);
}
}
free_member_list(allowed_nodes);
@@ -1592,8 +1808,20 @@
handle_start_req(char *svcName, int req, int *new_owner)
{
int ret, tolerance = FOD_BEST;
- cluster_member_list_t *membership = member_list();
- int need_check = have_exclusive_resources();
+ cluster_member_list_t *membership;
+ int need_check, actual_failure = 0;
+
+ /* When we get an enable req. for a migratory service,
+ check other nodes to see if they are already running
+ said service - and ignore failover domain constraints
+ */
+ if ((ret = msvc_check_cluster(svcName)) >= 0) {
+ *new_owner = ret;
+ return RG_SUCCESS;
+ }
+
+ need_check = have_exclusive_resources();
+ membership = member_list();
/*
* When a service request is from a user application (eg, clusvcadm),
@@ -1672,14 +1900,16 @@
*/
return RG_EABORT;
}
+ actual_failure = 1;
relocate:
/*
* OK, it failed to start - but succeeded to stop. Now,
* we should relocate the service.
*/
- clulog(LOG_WARNING, "#71: Relocating failed service %s\n",
- svcName);
+ if (actual_failure)
+ clulog(LOG_WARNING, "#71: Relocating failed service %s\n",
+ svcName);
ret = handle_relocate_req(svcName, RG_START_RECOVER, -1, new_owner);
/* If we leave the service stopped, instead of disabled, someone
@@ -1780,46 +2010,56 @@
return handle_start_req(svcName, RG_START_RECOVER, new_owner);
}
+
int
handle_fd_start_req(char *svcName, int request, int *new_owner)
{
- cluster_member_list_t *allowed_nodes;
- int target, me = my_id();
- int ret;
-
- allowed_nodes = member_list();
-
- while (memb_count(allowed_nodes)) {
- target = best_target_node(allowed_nodes, -1,
- svcName, 1);
- if (target == me) {
- ret = handle_start_remote_req(svcName, request);
- } else if (target < 0) {
- free_member_list(allowed_nodes);
- return RG_EFAIL;
- } else {
- ret = relocate_service(svcName, request, target);
- }
-
- switch(ret) {
- case RG_ESUCCESS:
- return RG_ESUCCESS;
- case RG_ERUN:
- return RG_ERUN;
- case RG_EFAIL:
- memb_mark_down(allowed_nodes, target);
- continue;
- case RG_EABORT:
- svc_report_failure(svcName);
- free_member_list(allowed_nodes);
- return RG_EFAIL;
- default:
- clulog(LOG_ERR,
- "#6X: Invalid reply [%d] from member %d during"
- " relocate operation!\n", ret, target);
- }
- }
+ cluster_member_list_t *allowed_nodes;
+ int target, me = my_id();
+ int ret = RG_EFAIL;
+
+ /* When we get an enable req. for a migratory service,
+ check other nodes to see if they are already running
+ said service - and ignore failover domain constraints
+ */
+ if ((ret = msvc_check_cluster(svcName)) >= 0) {
+ *new_owner = ret;
+ return RG_SUCCESS;
+ }
+
+ allowed_nodes = member_list();
- free_member_list(allowed_nodes);
- return RG_EFAIL;
+ while (memb_count(allowed_nodes)) {
+ target = best_target_node(allowed_nodes, -1,
+ svcName, 1);
+ if (target == me) {
+ ret = handle_start_remote_req(svcName, request);
+ } else if (target < 0) {
+ free_member_list(allowed_nodes);
+ return RG_EFAIL;
+ } else {
+ ret = relocate_service(svcName, request, target);
+ }
+
+ switch(ret) {
+ case RG_ESUCCESS:
+ return RG_ESUCCESS;
+ case RG_ERUN:
+ return RG_ERUN;
+ case RG_EFAIL:
+ memb_mark_down(allowed_nodes, target);
+ continue;
+ case RG_EABORT:
+ svc_report_failure(svcName);
+ free_member_list(allowed_nodes);
+ return RG_EFAIL;
+ default:
+ clulog(LOG_ERR,
+ "#6X: Invalid reply [%d] from member %d during"
+ " relocate operation!\n", ret, target);
+ }
+ }
+
+ free_member_list(allowed_nodes);
+ return RG_EFAIL;
}
--- cluster/rgmanager/src/daemons/rg_thread.c 2007/06/14 15:06:52 1.21
+++ cluster/rgmanager/src/daemons/rg_thread.c 2007/06/27 14:03:51 1.22
@@ -441,6 +441,19 @@
ret = RG_EFAIL;
break;
+ case RG_STATUS_INQUIRY:
+ error = svc_status_inquiry(myname);
+
+ if (error == 0) {
+ ret = RG_SUCCESS;
+ newowner = my_id();
+ } else {
+ ret = RG_EFAIL;
+ newowner = -1;
+ }
+
+ break;
+
default:
printf("Unhandled request %d\n", req->rr_request);
ret = RG_NONE;
--- cluster/rgmanager/src/daemons/test.c 2007/03/22 23:46:58 1.9
+++ cluster/rgmanager/src/daemons/test.c 2007/06/27 14:03:51 1.10
@@ -217,7 +217,7 @@
}
#endif
- if (res_exec(rn, "migrate", argv[4], 0)) {
+ if (res_exec(rn, RS_MIGRATE, argv[4], 0)) {
ret = -1;
goto out;
}
@@ -226,9 +226,9 @@
} else if (!strcmp(argv[1], "status")) {
printf("Checking status of %s...\n", argv[3]);
- if (res_status(&tree, curres, NULL)) {
+ ret = res_status(&tree, curres, NULL);
+ if (ret) {
printf("Status check of %s failed\n", argv[3]);
- ret = -1;
goto out;
}
printf("Status of %s is good\n", argv[3]);
@@ -391,5 +391,5 @@
out:
xmlCleanupParser();
malloc_dump_table();
- return 0;
+ return ret;
}
--- cluster/rgmanager/src/resources/vm.sh 2007/04/19 17:53:05 1.4
+++ cluster/rgmanager/src/resources/vm.sh 2007/06/27 14:03:51 1.5
@@ -182,9 +182,8 @@
<action name="start" timeout="20"/>
<action name="stop" timeout="120"/>
- <!-- No-ops. Groups are abstract resource types. -->
- <action name="status" timeout="10" interval="30m"/>
- <action name="monitor" timeout="10" interval="30m"/>
+ <action name="status" timeout="10" interval="30"/>
+ <action name="monitor" timeout="10" interval="30"/>
<!-- reconfigure - reconfigure with new OCF parameters.
NOT OCF COMPATIBLE AT ALL -->
@@ -273,13 +272,15 @@
# Start a virtual machine given the parameters from
# the environment.
#
-start()
+do_start()
{
# Use /dev/null for the configuration file, if xmdefconfig
# doesn't exist...
#
declare cmdline
+ do_status && return 0
+
cmdline="`build_xm_cmdline`"
echo "# xm command line: $cmdline"
@@ -293,7 +294,7 @@
# Stop a VM. Try to shut it down. Wait a bit, and if it
# doesn't shut down, destroy it.
#
-stop()
+do_stop()
{
declare -i timeout=60
declare -i ret=1
@@ -307,7 +308,7 @@
while [ $timeout -gt 0 ]; do
sleep 5
((timeout -= 5))
- status || return 0
+ do_status || return 0
while read dom state; do
#
# State is "stopped". Kill it.
@@ -346,10 +347,27 @@
# Simple status check: Find the VM in the list of running
# VMs
#
-status()
+do_status()
{
declare line
+ xm list $OCF_RESKEY_name &> /dev/null
+ if [ $? -eq 0 ]; then
+ return $OCF_SUCCESS
+ fi
+ xm list migrating-$OCF_RESKEY_name &> /dev/null
+ if [ $? -eq 1 ]; then
+ return $OCF_NOT_RUNNING
+ fi
+
+ return $OCF_ERR_GENERIC
+
+### NOT REACHED ###
+
+ # virsh doesn't handle migrating domains right now
+ # When this gets fixed, we need to revisit this status
+ # function.
+
line=$(virsh domstate $OCF_RESKEY_name)
if [ "$line" = "" ]; then
return $OCF_NOT_RUNNING
@@ -400,26 +418,26 @@
case $1 in
start)
- start
+ do_start
exit $?
;;
stop)
- stop shutdown destroy
+ do_stop shutdown destroy
exit $?
;;
kill)
- stop destroy
+ do_stop destroy
exit $?
;;
recover|restart)
exit 0
;;
status|monitor)
- status
+ do_status
exit $?
;;
migrate)
- migrate $2 # Send VM to this node
+ do_migrate $2 # Send VM to this node
exit $?
;;
reload)
More information about the Cluster-devel
mailing list