[Cluster-devel] cluster/rgmanager ChangeLog include/message.h ...

Mon Oct 23 22:47:05 UTC 2006

CVSROOT:	/cvs/cluster
Module name:	cluster
Changes by:	lhh at sourceware.org	2006-10-23 22:47:01

Modified files:
	rgmanager      : ChangeLog 
	rgmanager/include: message.h vf.h 
	rgmanager/src/clulib: lock.c message.c msg_cluster.c vft.c 
	rgmanager/src/daemons: groups.c main.c rg_forward.c rg_state.c 
	rgmanager/src/resources: Makefile 
Added files:
	rgmanager/src/resources: vm.sh 
Removed files:
	rgmanager/src/resources: xenvm.sh 

Log message:
	Fix #211701 (rgmanager + clustat hangs), #211933 (xenvm rename -> vm)

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/ChangeLog.diff?cvsroot=cluster&r1=1.29&r2=1.30
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/message.h.diff?cvsroot=cluster&r1=1.3&r2=1.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/vf.h.diff?cvsroot=cluster&r1=1.6&r2=1.7
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/clulib/lock.c.diff?cvsroot=cluster&r1=1.2&r2=1.3
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/clulib/message.c.diff?cvsroot=cluster&r1=1.3&r2=1.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/clulib/msg_cluster.c.diff?cvsroot=cluster&r1=1.3&r2=1.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/clulib/vft.c.diff?cvsroot=cluster&r1=1.16&r2=1.17
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/groups.c.diff?cvsroot=cluster&r1=1.24&r2=1.25
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/main.c.diff?cvsroot=cluster&r1=1.33&r2=1.34
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/rg_forward.c.diff?cvsroot=cluster&r1=1.7&r2=1.8
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/rg_state.c.diff?cvsroot=cluster&r1=1.23&r2=1.24
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/vm.sh.diff?cvsroot=cluster&r1=NONE&r2=1.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/Makefile.diff?cvsroot=cluster&r1=1.12&r2=1.13
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/xenvm.sh.diff?cvsroot=cluster&r1=1.4&r2=NONE

--- cluster/rgmanager/ChangeLog	2006/10/06 21:23:40	1.29
+++ cluster/rgmanager/ChangeLog	2006/10/23 22:47:00	1.30
@@ -1,3 +1,8 @@
+2006-10-23 Lon Hohberger <lhh at redhat.com>
+	* src/resources/xenvm.sh: Gone; replaced with vm.sh.
+	* The C code parts: Fix deadlocks incurred while testing
+	rgmanager on larger node counts. #211701
+
 2006-10-06 Lon Hohberger <lhh at redhat.com>
 	* src/daemons/main.c: Fix #202492: provide rgmanager's view of
 	who is running rgmanager to clustat.
--- cluster/rgmanager/include/message.h	2006/08/07 22:05:01	1.3
+++ cluster/rgmanager/include/message.h	2006/10/23 22:47:00	1.4
@@ -74,6 +74,7 @@
 	msgctx_type_t type;
 	int flags;
 	/* XXX todo make this opaque */
+	void *sp;
 	union {
 		struct {
 			msg_q_t *queue;
--- cluster/rgmanager/include/vf.h	2006/09/01 19:02:21	1.6
+++ cluster/rgmanager/include/vf.h	2006/10/23 22:47:00	1.7
@@ -148,6 +148,7 @@
 
 /* Return codes for vf_handle_msg... */
 #define VFR_ERROR	100
+#define VFR_TIMEOUT	101
 #define VFR_OK		0
 #define VFR_YES		VFR_OK
 #define VFR_NO		1
--- cluster/rgmanager/src/clulib/lock.c	2006/07/11 23:52:41	1.2
+++ cluster/rgmanager/src/clulib/lock.c	2006/10/23 22:47:00	1.3
@@ -176,6 +176,12 @@
 
 	block = !(options & LKF_NOQUEUE);
 
+	errno = EINVAL;
+	if (!lksb)
+		return -1;
+
+	memset(lksb, 0, sizeof(struct dlm_lksb));
+
 	/*
 	   Try to use a conversion lock mechanism when possible
 	   If the caller calls explicitly with a NULL lock, then
--- cluster/rgmanager/src/clulib/message.c	2006/08/07 22:05:01	1.3
+++ cluster/rgmanager/src/clulib/message.c	2006/10/23 22:47:00	1.4
@@ -184,6 +184,9 @@
 		return -1;
 	}
 
+	/* Record where this was called, in case we have to debug */
+	ctx->sp = __builtin_return_address(0);
+
 	if (ctx->ops && ctx->ops->mo_open)
 		return ctx->ops->mo_open(ctx->type, nodeid, port, ctx, timeout);
 	errno = ENOSYS;
--- cluster/rgmanager/src/clulib/msg_cluster.c	2006/08/08 15:01:27	1.3
+++ cluster/rgmanager/src/clulib/msg_cluster.c	2006/10/23 22:47:00	1.4
@@ -33,6 +33,7 @@
 #include <signals.h>
 #include <gettid.h>
 #include <cman-private.h>
+#include <clulog.h>
 
 /* Ripped from ccsd's setup_local_socket */
 
@@ -711,9 +712,10 @@
 {
 	msg_q_t *node;
 
-	if (ctx->type == MSG_NONE) {
-		printf("Queue_for_context called w/o valid context\n");
-		raise(SIGSEGV);
+	if (ctx->type != MSG_CLUSTER) {
+		clulog(LOG_WARNING, "%s called on invalid context %p\n",
+		       __FUNCTION__, ctx);
+		return;
 	}
 
 	while ((node = malloc(sizeof(*node))) == NULL) {
@@ -826,8 +828,15 @@
 			}
 		}
 #endif
-		
-		queue_for_context(contexts[m->dest_ctx], buf, len);
+		if (m->msg_control == M_CLOSE &&
+		    contexts[m->dest_ctx]->type != MSG_CLUSTER) {
+			/* XXX Work around bug where M_CLOSE is called
+			   on a context which has been destroyed */
+			clulog(LOG_WARNING, "Ignoring M_CLOSE for destroyed "
+			       "context %d\n", m->dest_ctx);
+		} else {
+			queue_for_context(contexts[m->dest_ctx], buf, len);
+		}
 	}
 	/* If none of the above, then we msg for something we've already
 	   detached from our list.  No big deal, just ignore. */
--- cluster/rgmanager/src/clulib/vft.c	2006/09/01 19:02:22	1.16
+++ cluster/rgmanager/src/clulib/vft.c	2006/10/23 22:47:00	1.17
@@ -301,24 +301,15 @@
 	     int timeout)
 {
 	generic_msg_hdr response;
-	struct timeval tv;
 	int x;
 
-	/* Set up for the select */
-	tv.tv_sec = timeout;
-	tv.tv_usec = 0;
-
-	/*
-	 * Wait for activity
-	 */
-	
 	/*
 	 * Flag hosts which we received messages from so we don't
 	 * read a second message.
 	 */
 	while (remain && timeout) {
 
-		if (msg_wait(mcast_ctx, 5) <= 0) {
+		if (msg_wait(mcast_ctx, 1) <= 0) {
 			--timeout;
 			continue;
 		}
@@ -355,7 +346,7 @@
 #ifdef DEBUG
 			printf("VF: Abort: someone voted NO\n");
 #endif
-			return 0;
+			return VFR_ABORT;
 		}
 
 #ifdef DEBUG
@@ -368,14 +359,14 @@
 #ifdef DEBUG
 		printf("VF: Timed out waiting for %d responses\n", remain);
 #endif
-		return 0;
+		return VFR_TIMEOUT;
 	}
 		
 
 	/*
 	 * Whohoooooooo!
 	 */
-	return 1;
+	return VFR_OK;
 }
 
 
@@ -884,6 +875,7 @@
 	}
 
 	msg_close(ctx);
+	msg_free_ctx(ctx);
 	return NULL;
 }
 
@@ -910,7 +902,7 @@
 		sleep(1);
 
 	if (msg_open(MSG_CLUSTER, 0, my_port, ctx, 1) < 0) {
-		free(ctx);	
+		msg_free_ctx(ctx);	
 		free(args);
 		return -1;
 	}
@@ -975,10 +967,6 @@
 int
 vf_shutdown(void)
 {
-	key_node_t *c_key;
-	view_node_t *c_jv;
-	commit_node_t *c_cn;
-
 	pthread_mutex_lock(&vf_mutex);
 	vf_thread_ready = 0;
 	pthread_cancel(vf_thread);
@@ -1138,7 +1126,6 @@
 	if (!data || !datalen || !keyid || !strlen(keyid) || !membership)
 		return -1;
 
-
 	pthread_mutex_lock(&vf_mutex);
 	if (!trans) {
 		trans = _node_id << 16;
@@ -1231,7 +1218,7 @@
 	 * See if we have a consensus =)
 	 */
 	if ((rv = (vf_unanimous(&everyone, trans, remain,
-				5)))) {
+				5))) == VFR_OK) {
 		vf_send_commit(&everyone, trans);
 #ifdef DEBUG
 		printf("VF: Consensus reached!\n");
@@ -1253,7 +1240,7 @@
 	pthread_mutex_unlock(&vf_mutex);
 
 #ifdef DEBUG
-	if (rv) {
+	if (rv == VFR_OK) {
 		getuptime(&end);
 
 		dif.tv_usec = end.tv_usec - start.tv_usec;
@@ -1269,7 +1256,7 @@
 	}
 #endif
 
-	return (rv?0:-1);
+	return rv;
 }
 
 
@@ -1595,7 +1582,7 @@
 			VFR_OK : VFR_ERROR;
 
 	swab_vf_msg_t(msg);
-	ret = (msg_send(ctx, msg, totallen) != -1)?VFR_OK:VFR_ERROR;
+	ret = (msg_send(ctx, msg, totallen) >= 0)?VFR_OK:VFR_ERROR;
 	free(msg);
 	return ret;
 }
@@ -1697,14 +1684,15 @@
 		       //msg->vm_msg.vf_keyid,
 		       //(int)membership->cml_members[x].cn_nodeid);
 
-		if (msg_send(&ctx, msg, sizeof(*msg)) != sizeof(*msg)) {
+		if (msg_send(&ctx, msg, sizeof(*msg)) < sizeof(*msg)) {
 			printf("Couldn't send entire message\n");
+			msg_close(&ctx);
 			continue;
 		}
 
 		gh = NULL;
 		if ((n = msg_receive_simple(&ctx, (generic_msg_hdr **)&gh, 10))
-		    == -1) {
+		    < 0) {
 			if (gh)
 				free(gh);
 			msg_close(&ctx);
--- cluster/rgmanager/src/daemons/groups.c	2006/10/06 21:22:27	1.24
+++ cluster/rgmanager/src/daemons/groups.c	2006/10/23 22:47:01	1.25
@@ -79,6 +79,7 @@
 count_resource_groups(cluster_member_list_t *ml)
 {
 	resource_t *res;
+	resource_node_t *node;
 	char rgname[64], *val;
 	int x;
 	rg_state_t st;
@@ -92,7 +93,9 @@
 
 	pthread_rwlock_rdlock(&resource_lock);
 
-	list_do(&_resources, res) {
+	list_do(&_tree, node) {
+
+		res = node->rn_resource;
 
 		res_build_name(rgname, sizeof(rgname), res);
 
@@ -128,7 +131,7 @@
 			++mp->cn_svcexcl;
 		}
 
-	} while (!list_done(&_resources, res));
+	} while (!list_done(&_tree, node));
 
 	pthread_rwlock_unlock(&resource_lock);
 	return 0;
@@ -527,7 +530,7 @@
 	int depend;
 
 	if (rg_locked()) {
-		clulog(LOG_NOTICE,
+		clulog(LOG_DEBUG,
 			"Resource groups locked; not evaluating\n");
 		return -EAGAIN;
 	}
@@ -1090,8 +1093,20 @@
 		if (curr->rn_resource->r_flags & RF_NEEDSTART)
 			need_init = 1;
 
-		if (get_rg_state_local(rg, &svcblk) < 0)
-			continue;
+		if (!need_init) {
+			if (get_rg_state_local(rg, &svcblk) < 0)
+				continue;
+		} else {
+			if (rg_lock(rg, &lockp) != 0)
+				continue;
+
+			if (get_rg_state(rg, &svcblk) < 0) {
+				rg_unlock(&lockp);
+				continue;
+			}
+
+			rg_unlock(&lockp);
+		}
 
 		if (!need_init && svcblk.rs_owner != my_id())
 			continue;
--- cluster/rgmanager/src/daemons/main.c	2006/10/06 21:22:27	1.33
+++ cluster/rgmanager/src/daemons/main.c	2006/10/23 22:47:01	1.34
@@ -36,6 +36,7 @@
 #include <malloc.h>
 #include <cman-private.h>
 
+#define L_SHUTDOWN (1<<2)
 #define L_SYS (1<<1)
 #define L_USER (1<<0)
 
@@ -59,6 +60,7 @@
 char debug = 0; /* XXX* */
 static int signalled = 0;
 static int port = RG_PORT;
+static char *rgmanager_lsname = "rgmanager"; /* XXX default */
 
 int next_node_id(cluster_member_list_t *membership, int me);
 int rg_event_q(char *svcName, uint32_t state, int owner);
@@ -479,7 +481,7 @@
 			/* No such service! */
 			swab_SmMessageSt(msg_sm);
 
-			if (msg_send(ctx, msg_sm, sizeof (SmMessageSt)) !=
+			if (msg_send(ctx, msg_sm, sizeof (SmMessageSt)) <
 		    	    sizeof (SmMessageSt))
 				clulog(LOG_ERR, "#40: Error replying to "
 				       "action request.\n");
@@ -832,7 +834,7 @@
 void *
 shutdown_thread(void *arg)
 {
-	rg_lockall(L_SYS);
+	rg_lockall(L_SYS|L_SHUTDOWN);
 	rg_doall(RG_STOP_EXITING, 1, NULL);
 	running = 0;
 
@@ -886,7 +888,7 @@
 		return -1;
 	}
 
-	if (clu_lock_init("rgmanager") != 0) {
+	if (clu_lock_init(rgmanager_lsname) != 0) {
 		printf("Locks not working!\n");
 		return -1;
 	}
@@ -982,6 +984,7 @@
 
 	cleanup(cluster_ctx);
 	clulog(LOG_NOTICE, "Shutdown complete, exiting\n");
+	clu_lock_finished(rgmanager_lsname);
 	cman_finish(clu);
 	
 	/*malloc_dump_table(); */ /* Only works if alloc.c us used */
--- cluster/rgmanager/src/daemons/rg_forward.c	2006/09/27 18:58:53	1.7
+++ cluster/rgmanager/src/daemons/rg_forward.c	2006/10/23 22:47:01	1.8
@@ -48,22 +48,15 @@
 	rg_state_t rgs;
 	request_t *req = (request_t *)arg;
 	struct dlm_lksb lockp;
-	msgctx_t ctx;
+	msgctx_t *ctx = NULL;
 	SmMessageSt msg;
 
-	if (rg_lock(req->rr_group, &lockp) != 0) {
-		msg_close(req->rr_resp_ctx);
-		msg_free_ctx(req->rr_resp_ctx);
-		rq_free(req);
-		pthread_exit(NULL);
-	}
+	if (rg_lock(req->rr_group, &lockp) != 0)
+		goto out_fail;
 
 	if (get_rg_state(req->rr_group, &rgs) != 0) {
 		rg_unlock(&lockp);
-		msg_close(req->rr_resp_ctx);
-		msg_free_ctx(req->rr_resp_ctx);
-		rq_free(req);
-		pthread_exit(NULL);
+		goto out_fail;
 	}
 
 	rg_unlock(&lockp);
@@ -84,35 +77,32 @@
 	clulog(LOG_DEBUG, "Forwarding %s request to %d\n",
 	       rg_req_str(req->rr_request), rgs.rs_owner);
 
-	if (msg_open(MSG_CLUSTER, rgs.rs_owner, RG_PORT, &ctx, 10) < 0)  {
-		msg_close(req->rr_resp_ctx);
-		msg_free_ctx(req->rr_resp_ctx);
-		rq_free(req);
-		pthread_exit(NULL);
-	}
+	while ((ctx = msg_new_ctx()) == NULL)
+		sleep(1);
 
-	if (msg_send(&ctx, &msg, sizeof(msg)) != sizeof(msg)) {
-		msg_close(&ctx);
-		msg_close(req->rr_resp_ctx);
-		msg_free_ctx(req->rr_resp_ctx);
-		rq_free(req);
-		pthread_exit(NULL);
-	}
+	if (msg_open(MSG_CLUSTER, rgs.rs_owner, RG_PORT, ctx, 10) < 0)
+		goto out_fail;
+	if (msg_send(ctx, &msg, sizeof(msg)) < sizeof(msg))
+		goto out_fail;
+	if (msg_receive(ctx, &msg, sizeof(msg), 600) < sizeof(msg))
+		goto out_fail;
 
-	if (msg_receive(&ctx, &msg, sizeof(msg), 600) != sizeof(msg)) {
-		msg_close(&ctx);
-		msg_close(req->rr_resp_ctx);
-		msg_free_ctx(req->rr_resp_ctx);
-		rq_free(req);
-		pthread_exit(NULL);
-	}
-	msg_close(&ctx);
+	msg_close(ctx);
+	msg_free_ctx(ctx);
 
 	swab_SmMessageSt(&msg);
 	send_response(msg.sm_data.d_ret, req);
-
 	rq_free(req);
-
+	pthread_exit(NULL);
+	
+out_fail: /* Failure path */
+	if (ctx) {
+		msg_close(ctx);
+		msg_free_ctx(ctx);
+	}
+	msg_close(req->rr_resp_ctx);
+	msg_free_ctx(req->rr_resp_ctx);
+	rq_free(req);
 	pthread_exit(NULL);
 }
 
--- cluster/rgmanager/src/daemons/rg_state.c	2006/09/28 20:01:56	1.23
+++ cluster/rgmanager/src/daemons/rg_state.c	2006/10/23 22:47:01	1.24
@@ -245,7 +245,7 @@
 	char res[256];
 #ifndef OPENAIS
 	cluster_member_list_t *membership;
-	int ret;
+	int ret, tries = 0;
 #endif
 
 	if (name)
@@ -257,11 +257,16 @@
 		return -1;
 	return 0;
 #else
-	membership = member_list();
-	ret = vf_write(membership, VFF_IGN_CONN_ERRORS, res, svcblk,
-       		       sizeof(*svcblk));
-	free_member_list(membership);
-	return ret;
+	do {
+		/* Retry up to 3 times just in case members transition
+		   while we're trying to commit something */
+		membership = member_list();
+		ret = vf_write(membership, VFF_IGN_CONN_ERRORS, res, svcblk,
+       		       	       sizeof(*svcblk));
+		free_member_list(membership);
+	} while (ret == VFR_TIMEOUT && ++tries < 3);
+
+	return (ret==VFR_OK?0:-1);
 #endif
 }
 
@@ -1193,7 +1198,7 @@
 	swab_SmMessageSt(&msg_relo);
 
 	/* Send relocate message to the other node */
-	if (msg_send(&ctx, &msg_relo, sizeof (SmMessageSt)) !=
+	if (msg_send(&ctx, &msg_relo, sizeof (SmMessageSt)) < 
 	    sizeof (SmMessageSt)) {
 		clulog(LOG_ERR,
 		       "#59: Error sending relocate request to member #%d\n",
/cvs/cluster/cluster/rgmanager/src/resources/vm.sh,v  -->  standard output
revision 1.1
--- cluster/rgmanager/src/resources/vm.sh
+++ -	2006-10-23 22:47:04.631911000 +0000
@@ -0,0 +1,422 @@
+#!/bin/bash
+#
+#  Copyright Red Hat Inc., 2005-2006
+#
+#  This program is free software; you can redistribute it and/or modify it
+#  under the terms of the GNU General Public License as published by the
+#  Free Software Foundation; either version 2, or (at your option) any
+#  later version.
+#
+#  This program is distributed in the hope that it will be useful, but
+#  WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+#  General Public License for more details.
+#
+#  You should have received a copy of the GNU General Public License
+#  along with this program; see the file COPYING.  If not, write to the
+#  Free Software Foundation, Inc.,  675 Mass Ave, Cambridge, 
+#  MA 02139, USA.
+#
+
+PATH=/bin:/sbin:/usr/bin:/usr/sbin
+
+export PATH
+
+#
+# Virtual Machine start/stop script (requires the xm command)
+#
+
+meta_data()
+{
+    cat <<EOT
+<?xml version="1.0"?>
+<resource-agent version="rgmanager 2.0" name="vm">
+    <version>1.0</version>
+
+    <longdesc lang="en">
+	Defines a Virtual Machine
+    </longdesc>
+    <shortdesc lang="en">
+        Defines a Virtual Machine
+    </shortdesc>
+
+    <parameters>
+        <parameter name="name" primary="1">
+            <longdesc lang="en">
+                This is the name of the virtual machine.
+            </longdesc>
+            <shortdesc lang="en">
+                Name
+            </shortdesc>
+            <content type="string"/>
+        </parameter>
+    
+        <parameter name="domain">
+            <longdesc lang="en">
+                Fail over domains define lists of cluster members
+                to try in the event that the host of the virtual machine
+		fails.
+            </longdesc>
+            <shortdesc lang="en">
+                Cluster Fail Over Domain
+            </shortdesc>
+            <content type="string"/>
+        </parameter>
+
+        <parameter name="autostart">
+            <longdesc lang="en">
+	    	If set to yes, this resource group will automatically be started
+		after the cluster forms a quorum.  If set to no, this virtual
+		machine will start in the 'disabled' state after the cluster
+		forms a quorum.
+            </longdesc>
+            <shortdesc lang="en">
+	    	Automatic start after quorum formation
+            </shortdesc>
+            <content type="boolean"/>
+        </parameter>
+
+        <parameter name="recovery" reconfig="1">
+            <longdesc lang="en">
+	        This currently has three possible options: "restart" tries
+		to restart this virtual machine locally before
+		attempting to relocate (default); "relocate" does not bother
+		trying to restart the VM locally; "disable" disables
+		the VM if it fails.
+            </longdesc>
+            <shortdesc lang="en">
+	    	Failure recovery policy
+            </shortdesc>
+            <content type="string"/>
+        </parameter>
+
+	<parameter name="memory" reconfig="1">
+	    <longdesc lang="en">
+		Memory size.  This can be reconfigured on the fly.
+	    </longdesc>
+	    <shortdesc lang="en">
+		Memory Size
+	    </shortdesc>
+            <content type="integer"/>
+        </parameter>
+
+	<parameter name="bootloader">
+	    <longdesc lang="en">
+		Boot loader that can start the VM from physical image
+	    </longdesc>
+	    <shortdesc lang="en">
+		Boot loader that can start the VM from physical image
+	    </shortdesc>
+            <content type="string"/>
+        </parameter>
+
+	<parameter name="path">
+	    <longdesc lang="en">
+	    	Path specification 'xm create' will search for the specified
+		VM configuration file
+	    </longdesc>
+	    <shortdesc lang="en">
+	    	Path to virtual machine configuration files
+	    </shortdesc>
+            <content type="string"/>
+        </parameter>
+
+
+	<parameter name="rootdisk_physical" unique="1">
+	    <longdesc lang="en">
+		Root disk for the virtual machine.  (physical, on the host)
+	    </longdesc>
+	    <shortdesc lang="en">
+		Root disk (physical)
+	    </shortdesc>
+            <content type="string"/>
+        </parameter>
+        
+	<parameter name="rootdisk_virtual">
+	    <longdesc lang="en">
+		Root disk for the virtual machine.  (as presented to the VM)
+	    </longdesc>
+	    <shortdesc lang="en">
+		Root disk (virtual)
+	    </shortdesc>
+            <content type="string"/>
+        </parameter>
+
+
+	<parameter name="swapdisk_physical" unique="1">
+	    <longdesc lang="en">
+		Swap disk for the virtual machine.  (physical, on the host)
+	    </longdesc>
+	    <shortdesc lang="en">
+		Swap disk (physical)
+	    </shortdesc>
+            <content type="string"/>
+        </parameter>
+        
+	<parameter name="swapdisk_virtual">
+	    <longdesc lang="en">
+		Swap disk for the virtual machine.  (as presented to the VM)
+	    </longdesc>
+	    <shortdesc lang="en">
+		Swap disk (virtual)
+	    </shortdesc>
+            <content type="string"/>
+        </parameter>
+
+	<parameter name="vif">
+	    <longdesc lang="en">
+		Virtual interface MAC address
+	    </longdesc>
+	    <shortdesc lang="en">
+		Virtual interface MAC address
+	    </shortdesc>
+            <content type="string"/>
+        </parameter>
+
+
+    </parameters>
+
+    <actions>
+        <action name="start" timeout="20"/>
+        <action name="stop" timeout="120"/>
+	
+	<!-- No-ops.  Groups are abstract resource types.  -->
+        <action name="status" timeout="10" interval="30m"/>
+        <action name="monitor" timeout="10" interval="30m"/>
+
+	<!-- reconfigure - reconfigure with new OCF parameters.
+	     NOT OCF COMPATIBLE AT ALL -->
+	<action name="reconfig" timeout="10"/>
+
+	<!-- Suspend: if available, suspend this resource instead of
+	     doing a full stop. -->
+	<!-- <action name="suspend" timeout="10m"/> -->
+	<action name="migrate" timeout="10m"/>
+
+        <action name="meta-data" timeout="5"/>
+        <action name="verify-all" timeout="5"/>
+
+    </actions>
+    
+    <special tag="rgmanager">
+        <attributes maxinstances="1"/>
+    </special>
+</resource-agent>
+EOT
+}
+
+
+build_xm_cmdline()
+{
+	#
+	# Virtual domains should never restart themselves when 
+	# controlled externally; the external monitoring app
+	# should.
+	#
+	declare cmdline="restart=\"never\""
+	declare varp val temp
+
+	#
+	# Transliterate the OCF_RESKEY_* to something the xm
+	# command can recognize.
+	#
+	for var in ${!OCF_RESKEY_*}; do
+		varp=${var/OCF_RESKEY_/}
+		val=`eval "echo \\$$var"`
+
+		case $varp in
+		bootloader)
+			cmdline="$cmdline bootloader=\"$val\""
+			;;
+		rootdisk_physical)
+			[ -n "$OCF_RESKEY_rootdisk_virtual" ] || exit 2
+			cmdline="$cmdline disk=\"phy:$val,$OCF_RESKEY_rootdisk_virtual,w\""
+			;;
+		swapdisk_physical)
+			[ -n "$OCF_RESKEY_swapdisk_virtual" ] || exit 2
+			cmdline="$cmdline disk=\"phy:$val,$OCF_RESKEY_swapdisk_virtual,w\""
+			;;
+		vif)
+			cmdline="$cmdline vif=\"mac=$val\""
+			;;
+		recovery|autostart|domain)
+			;;
+		memory)
+			cmdline="$cmdline $varp=$val"
+			;;
+		swapdisk_virtual)
+			;;
+		rootdisk_virtual)
+			;;
+		name)	# Do nothing with name; add it later
+			;;
+		path)
+			cmdline="$cmdline --path=\"$val\""
+			;;
+		*)
+			cmdline="$cmdline $varp=\"$val\""
+			;;
+		esac
+	done
+
+	if [ -n "$OCF_RESKEY_name" ]; then
+		cmdline="$OCF_RESKEY_name $cmdline"
+	fi
+
+	echo $cmdline
+}
+
+
+#
+# Start a virtual machine given the parameters from
+# the environment.
+#
+start()
+{
+	# Use /dev/null for the configuration file, if xmdefconfig
+	# doesn't exist...
+	#
+	declare cmdline
+
+	cmdline="`build_xm_cmdline`"
+
+	echo "# xm command line: $cmdline"
+
+	eval xm create $cmdline
+	return $?
+}
+
+
+#
+# Stop a VM.  Try to shut it down.  Wait a bit, and if it
+# doesn't shut down, destroy it.
+#
+stop()
+{
+	declare -i timeout=60
+	declare -i ret=1
+	declare st
+
+	for op in $*; do
+		echo xm $op $OCF_RESKEY_name ...
+		xm $op $OCF_RESKEY_name
+
+		timeout=60
+		while [ $timeout -gt 0 ]; do
+			sleep 5
+			((timeout -= 5))
+			status || return 0
+			while read dom state; do
+				#
+				# State is "stopped".  Kill it.
+				#
+				if [ "$dom" != "$OCF_RESKEY_name" ]; then
+					continue
+				fi
+				if [ "$state" != "---s-" ]; then
+					continue
+				fi
+				xm destroy $OCF_RESKEY_name
+			done < <(xm list | awk '{print $1, $5}')
+		done
+	done
+
+	return 1
+}
+
+
+#
+# Reconfigure a running VM.  Currently, all we support is
+# memory ballooning.
+#
+reconfigure()
+{
+	if [ -n "$OCF_RESKEY_memory" ]; then
+		echo "xm balloon $OCF_RESKEY_name $OCF_RESKEY_memory"
+		xm balloon $OCF_RESKEY_name $OCF_RESKEY_memory
+		return $?
+	fi
+	return 0
+}
+
+
+#
+# Simple status check: Find the VM in the list of running
+# VMs
+#
+status()
+{
+	xm list $OCF_RESKEY_name &> /dev/null
+	return $?
+}
+
+
+verify_all()
+{
+	declare errors=0
+
+	if [ -n "$OCF_RESKEY_bootloader" ] && \
+	   ! [ -x "$OCF_RESKEY_bootloader" ]; then
+		echo "$OCF_RESKEY_bootloader is not executable"
+		((errors++))
+	fi
+}
+
+
+migrate()
+{
+	declare target=$1
+
+	# XXX TODO
+	return 1
+}
+
+#
+# A Resource group is abstract, but the OCF RA API doesn't allow for abstract
+# resources, so here it is.
+#
+case $1 in
+	start)
+		start
+		exit $?
+		;;
+	stop)
+		stop shutdown destroy
+		exit $?
+		;;
+	kill)
+		stop destroy
+		exit $?
+		;;
+	recover|restart)
+		exit 0
+		;;
+	status|monitor)
+		status
+		exit $?
+		;;
+	migrate)
+		migrate $2 # Send VM to this node
+		exit $?
+		;;
+	reload)
+		exit 0
+		;;
+	reconfig)
+		echo "$0 RECONFIGURING $OCF_RESKEY_memory"
+		reconfigure
+		exit $?
+		;;
+	meta-data)
+		meta_data
+		exit 0
+		;;
+	verify-all)
+		verify_all
+		exit $?
+		;;
+	*)
+		echo "usage: $0 {start|stop|restart|status|reload|reconfig|meta-data|verify-all}"
+		exit 1
+		;;
+esac
--- cluster/rgmanager/src/resources/Makefile	2006/07/12 15:43:56	1.12
+++ cluster/rgmanager/src/resources/Makefile	2006/10/23 22:47:01	1.13
@@ -18,7 +18,7 @@
 INCLUDE += -I $(top_srcdir)/include
 
 RESOURCES=fs.sh service.sh ip.sh nfsclient.sh nfsexport.sh \
-	script.sh netfs.sh clusterfs.sh smb.sh xenvm.sh
+	script.sh netfs.sh clusterfs.sh smb.sh vm.sh
 
 TARGETS=${RESOURCES} ocf-shellfuncs svclib_nfslock