[Cluster-devel] cluster/group/dlm_controld Makefile deadlock.c ...

teigland at sourceware.org teigland at sourceware.org
Mon Aug 6 21:50:28 UTC 2007


CVSROOT:	/cvs/cluster
Module name:	cluster
Changes by:	teigland at sourceware.org	2007-08-06 21:50:27

Modified files:
	group/dlm_controld: Makefile deadlock.c dlm_daemon.h main.c 

Log message:
	fill in a couple more bits related to canceling the chosen lock

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/group/dlm_controld/Makefile.diff?cvsroot=cluster&r1=1.8&r2=1.9
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/group/dlm_controld/deadlock.c.diff?cvsroot=cluster&r1=1.1&r2=1.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/group/dlm_controld/dlm_daemon.h.diff?cvsroot=cluster&r1=1.10&r2=1.11
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/group/dlm_controld/main.c.diff?cvsroot=cluster&r1=1.12&r2=1.13

--- cluster/group/dlm_controld/Makefile	2007/07/24 18:15:43	1.8
+++ cluster/group/dlm_controld/Makefile	2007/08/06 21:50:26	1.9
@@ -26,7 +26,7 @@
 CFLAGS += -I../../group/lib/ -I../include/ -I../../dlm/lib/
 CFLAGS += -I${incdir}
 
-LDFLAGS += -L${ccslibdir} -L${cmanlibdir} -L${openaislibdir} -lccs -lcman -lcpg -lSaCkpt
+LDFLAGS += -L${ccslibdir} -L${cmanlibdir} -L${openaislibdir} -lccs -lcman -lcpg -lSaCkpt -ldlm
 LDFLAGS += -L../lib -lgroup
 
 all: depends ${TARGET}
--- cluster/group/dlm_controld/deadlock.c	2007/07/24 18:15:43	1.1
+++ cluster/group/dlm_controld/deadlock.c	2007/08/06 21:50:26	1.2
@@ -11,8 +11,9 @@
 ******************************************************************************/
 
 #include "dlm_daemon.h"
+#include "libdlm.h"
 
-int deadlock_enabled;
+int deadlock_enabled = 0;
 
 extern struct list_head lockspaces;
 extern int our_nodeid;
@@ -60,41 +61,46 @@
 	struct list_head        list;       /* r->locks */
 	struct pack_lock	lock;       /* data from debugfs/checkpoint */
 	unsigned int		time;       /* waiting time read from debugfs */
-	int			from;       /* node that checkpointed the lock */
+	int			from;       /* node that checkpointed the lock*/
 	struct dlm_rsb		*rsb;       /* lock against this resource */
 	struct trans		*trans;     /* lock owned by this transaction */
 	struct list_head	trans_list; /* tr->locks */
 };
 
-#define TR_NALLOC 4               /* waitfor pointers alloc'ed 4 at at time */
+/* waitfor pointers alloc'ed 4 at at time */
+#define TR_NALLOC		4
 
 struct trans {
-	struct list_head list;
-	struct list_head locks;
-	uint64_t xid;
-	int others_waiting_on_us; /* count of trans's pointing to us in waitfor */
-	int waitfor_alloc;
-	int waitfor_count;        /* count of in-use waitfor slots */
-	struct trans **waitfor;   /* waitfor_alloc trans pointers */
+	struct list_head	list;
+	struct list_head	locks;
+	uint64_t		xid;
+	int			others_waiting_on_us; /* count of trans's
+							 pointing to us in
+							 waitfor */
+	int			waitfor_alloc;
+	int			waitfor_count;        /* count of in-use
+							 waitfor slots */
+	struct trans		**waitfor;	      /* waitfor_alloc trans
+							 pointers */
 };
 
-#define DLM_HEADER_MAJOR 1
-#define DLM_HEADER_MINOR 0
-#define DLM_HEADER_PATCH 0
+#define DLM_HEADER_MAJOR	1
+#define DLM_HEADER_MINOR	0
+#define DLM_HEADER_PATCH	0
 
-#define DLM_MSG_CYCLE_START 1
+#define DLM_MSG_CYCLE_START	 1
 #define DLM_MSG_CHECKPOINT_READY 2
-#define DLM_MSG_CANCEL_LOCK 3
+#define DLM_MSG_CANCEL_LOCK	 3
 
 struct dlm_header {
-	uint16_t	version[3];
-	uint16_t	type; /* MSG_ */
-	uint32_t	nodeid; /* sender */
-	uint32_t	to_nodeid; /* 0 if to all */
-	uint32_t	global_id;
-	uint32_t	lock_id;
-	uint32_t	pad;
-	char		name[MAXNAME];
+	uint16_t		version[3];
+	uint16_t		type; /* MSG_ */
+	uint32_t		nodeid; /* sender */
+	uint32_t		to_nodeid; /* 0 if to all */
+	uint32_t		global_id;
+	uint32_t		lkid;
+	uint32_t		pad;
+	char			name[MAXNAME];
 };
 
 static const int __dlm_compat_matrix[8][8] = {
@@ -836,19 +842,14 @@
 	send_message(ls, DLM_MSG_CYCLE_START);
 }
 
-/* FIXME: where to send this?  we want to do the cancel on the node
-   where the transaction lives, which isn't always the master node that
-   sent us the info.  look at lkb->from and lkb->lock.nodeid, use
-   remid if sending to a process copy node */
-
 static void send_cancel_lock(struct lockspace *ls, struct trans *tr,
 			     struct dlm_lkb *lkb)
 {
 	struct dlm_header *hd;
 	int len;
 	char *buf;
-
-	log_group(ls, "send_cancel_lock");
+	int to_nodeid;
+	uint32_t lkid;
 
 	len = sizeof(struct dlm_header);
 	buf = malloc(len);
@@ -859,13 +860,28 @@
 	}
 	memset(buf, 0, len);
 
+	if (lkb->lock.nodeid) {
+		/* this was MSTCPY lkb from master node */
+		to_nodeid = lkb->lock.nodeid;
+		lkid = lkb->lock.remid;
+	} else {
+		/* process-copy lkb from node where lock is held */
+		to_nodeid = lkb->from;
+		lkid = lkb->lock.id;
+	}
+
+	log_group(ls, "send_cancel_lock to %x lkid %d, id %x remid %x "
+		  "nodeid %d from %d", to_nodeid, lkid,
+		  lkb->lock.id, lkb->lock.remid, lkb->lock.nodeid, lkb->from);
+
 	hd = (struct dlm_header *)buf;
 	hd->version[0]  = cpu_to_le16(DLM_HEADER_MAJOR);
 	hd->version[1]  = cpu_to_le16(DLM_HEADER_MINOR);
 	hd->version[2]  = cpu_to_le16(DLM_HEADER_PATCH);
 	hd->type	= cpu_to_le16(DLM_MSG_CANCEL_LOCK);
 	hd->nodeid      = cpu_to_le32(our_nodeid);
-	hd->to_nodeid   = 0;
+	hd->to_nodeid   = cpu_to_le32(to_nodeid);
+	hd->lkid        = cpu_to_le32(lkid);
 	hd->global_id   = cpu_to_le32(ls->global_id);
 	memcpy(hd->name, ls->name, strlen(ls->name));
 
@@ -926,6 +942,32 @@
 	send_checkpoint_ready(ls);
 }
 
+static void receive_cancel_lock(struct lockspace *ls, int nodeid, uint32_t lkid)
+{
+	dlm_lshandle_t h;
+	int rv;
+
+	if (nodeid != our_nodeid)
+		return;
+
+	h = dlm_open_lockspace(ls->name);
+	if (!h) {
+		log_error("deadlock cancel %x from %d can't open lockspace %s",
+			  lkid, nodeid, ls->name);
+		return;
+	}
+
+	log_group(ls, "receive_cancel_lock %x from %d", lkid, nodeid);
+
+	rv = dlm_ls_deadlock_cancel(h, lkid, 0);
+	if (rv < 0) {
+		log_error("deadlock cancel %x from %x lib cancel error %d",
+			  lkid, nodeid, rv);
+	}
+
+	dlm_close_lockspace(h);
+}
+
 static void deliver_cb(cpg_handle_t handle, struct cpg_name *group_name,
 		uint32_t nodeid, uint32_t pid, void *data, int data_len)
 {
@@ -959,6 +1001,9 @@
 	case DLM_MSG_CHECKPOINT_READY:
 		receive_checkpoint_ready(ls, hd->nodeid);
 		break;
+	case DLM_MSG_CANCEL_LOCK:
+		receive_cancel_lock(ls, hd->nodeid, hd->lkid);
+		break;
 	default:
 		log_error("unknown message type %d from %d",
 			  hd->type, hd->nodeid);
@@ -1391,27 +1436,6 @@
 	return "?";
 }
 
-static char *mode_str(int mode)
-{
-	switch (mode) {
-	case DLM_LOCK_IV:
-		return "IV";
-	case DLM_LOCK_NL:
-		return "NL";
-	case DLM_LOCK_CR:
-		return "CR";
-	case DLM_LOCK_CW:
-		return "CW";
-	case DLM_LOCK_PR:
-		return "PR";
-	case DLM_LOCK_PW:
-		return "PW";
-	case DLM_LOCK_EX:
-		return "EX";
-	}
-	return "??";
-}
-
 static void dump_trans(struct lockspace *ls, struct trans *tr)
 {
 	struct dlm_lkb *lkb;
@@ -1428,8 +1452,8 @@
 		log_group(ls, "  %s: id %08x gr %s rq %s pid %u \"%s\"",
 			  status_str(lkb->lock.status),
 			  lkb->lock.id,
-			  mode_str(lkb->lock.grmode),
-			  mode_str(lkb->lock.rqmode),
+			  dlm_mode_str(lkb->lock.grmode),
+			  dlm_mode_str(lkb->lock.rqmode),
 			  lkb->lock.ownpid,
 			  lkb->rsb->name);
 	}
--- cluster/group/dlm_controld/dlm_daemon.h	2007/07/24 18:15:43	1.10
+++ cluster/group/dlm_controld/dlm_daemon.h	2007/08/06 21:50:26	1.11
@@ -41,10 +41,6 @@
 #include <sched.h>
 #include <signal.h>
 #include <sys/time.h>
-#include <linux/netlink.h>
-#include <linux/genetlink.h>
-#include <linux/dlm.h>
-#include <linux/dlm_netlink.h>
 
 #include <openais/saAis.h>
 #include <openais/saCkpt.h>
@@ -129,6 +125,7 @@
 struct lockspace *get_client_lockspace(int ci);
 struct lockspace *create_ls(char *name);
 struct lockspace *find_ls(char *name);
+char *dlm_mode_str(int mode);
 
 /* member_cman.c */
 int is_cman_member(int nodeid);
--- cluster/group/dlm_controld/main.c	2007/07/24 18:15:43	1.12
+++ cluster/group/dlm_controld/main.c	2007/08/06 21:50:26	1.13
@@ -12,6 +12,11 @@
 
 #include "dlm_daemon.h"
 
+#include <linux/netlink.h>
+#include <linux/genetlink.h>
+#include <linux/dlm.h>
+#include <linux/dlm_netlink.h>
+
 #define OPTION_STRING			"KDhVd:"
 #define LOCKFILE_NAME			"/var/run/dlm_controld.pid"
 
@@ -22,7 +27,7 @@
 struct list_head lockspaces;
 
 extern group_handle_t gh;
-extern deadlock_enabled = 0;
+extern int deadlock_enabled;
 
 static int daemon_quit;
 static int client_maxi;
@@ -184,6 +189,26 @@
 	return rp;
 }
 
+char *dlm_mode_str(int mode)
+{
+	switch (mode) {
+	case DLM_LOCK_IV:
+		return "IV";
+	case DLM_LOCK_NL:
+		return "NL";
+	case DLM_LOCK_CR:
+		return "CR";
+	case DLM_LOCK_CW:
+		return "CW";
+	case DLM_LOCK_PR:
+		return "PR";
+	case DLM_LOCK_PW:
+		return "PW";
+	case DLM_LOCK_EX:
+		return "EX";
+	}
+	return "??";
+}
 
 /* recv "online" (join) and "offline" (leave) 
    messages from dlm via uevents and pass them on to groupd */




More information about the Cluster-devel mailing list