[Cluster-devel] Cluster Project branch, master, updated. gfs-kernel_0_1_22-201-g96078c8

teigland at sourceware.org teigland at sourceware.org
Wed Apr 23 20:41:24 UTC 2008


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "Cluster Project".

http://sources.redhat.com/git/gitweb.cgi?p=cluster.git;a=commitdiff;h=96078c8382cb000b85424d5826f6ac83d49ce2ee

The branch, master has been updated
       via  96078c8382cb000b85424d5826f6ac83d49ce2ee (commit)
      from  74315c485a9c134caa9d4a2cb77715c32c7493c5 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit 96078c8382cb000b85424d5826f6ac83d49ce2ee
Author: David Teigland <teigland at redhat.com>
Date:   Wed Apr 23 15:34:02 2008 -0500

    fenced: more new devel
    
    New stuff still under development, lots of various things fixed,
    reworked, changed.
    
    Signed-off-by: David Teigland <teigland at redhat.com>

-----------------------------------------------------------------------

Summary of changes:
 fence/fenced/config.c                          |    2 +-
 fence/fenced/cpg.c                             |  168 +++++++++++++++++-------
 fence/fenced/fd.h                              |    4 +-
 ccs/ccs_tool/update.h => fence/fenced/fenced.h |   12 +-
 fence/fenced/main.c                            |   34 ++---
 fence/fenced/recover.c                         |   10 ++-
 group/dlm_controld/config.c                    |    2 +-
 group/dlm_controld/cpg.c                       |    2 +-
 8 files changed, 157 insertions(+), 77 deletions(-)
 copy ccs/ccs_tool/update.h => fence/fenced/fenced.h (73%)

diff --git a/fence/fenced/config.c b/fence/fenced/config.c
index 85f0252..e34ac24 100644
--- a/fence/fenced/config.c
+++ b/fence/fenced/config.c
@@ -49,7 +49,7 @@ static void read_ccs_int(int cd, char *path, int *config_val)
 }
 
 #define OUR_NAME_PATH "/cluster/clusternodes/clusternode[@name=\"%s\"]/@name"
-#define GROUPD_COMPAT_PATH "/cluster/fence_daemon/@groupd_compat"
+#define GROUPD_COMPAT_PATH "/cluster/group/@groupd_compat"
 #define CLEAN_START_PATH "/cluster/fence_daemon/@clean_start"
 #define POST_JOIN_DELAY_PATH "/cluster/fence_daemon/@post_join_delay"
 #define POST_FAIL_DELAY_PATH "/cluster/fence_daemon/@post_fail_delay"
diff --git a/fence/fenced/cpg.c b/fence/fenced/cpg.c
index 2b6933b..e0b9685 100644
--- a/fence/fenced/cpg.c
+++ b/fence/fenced/cpg.c
@@ -31,6 +31,8 @@ static char *msg_name(int type)
 		return "start";
 	case FD_MSG_VICTIM_DONE:
 		return "victim_done";
+	case FD_MSG_COMPLETE:
+		return "complete";
 	case FD_MSG_EXTERNAL:
 		return "external";
 	default:
@@ -366,7 +368,7 @@ void send_victim_done(struct fd *fd, int victim, int how)
 
 static void receive_victim_done(struct fd *fd, struct fd_header *hd, int len)
 {
-	struct node *node, *safe;
+	struct node *node;
 	uint32_t seq = hd->msgdata;
 	int victim, how, remaining, found;
 	int *nums;
@@ -376,26 +378,18 @@ static void receive_victim_done(struct fd *fd, struct fd_header *hd, int len)
 
 	/* check that hd->nodeids is fd->master ? */
 
-	nums = (int *)((char *)hd + sizeof(struct fd_header));
-
-	victim = le32_to_cpu(nums[0]);
-	how = le32_to_cpu(nums[1]);
-	remaining = le32_to_cpu(nums[2]);
-
 	/* I don't think there's any problem with the master removing the
 	   victim when it's done instead of waiting to remove it when it
 	   receives its own victim_done message, like the other nodes do */
 
 	if (hd->nodeid == our_nodeid)
-		goto out;
+		return;
 
-	/* if a domain membership change involves no victims, the master sends
-	   a victim_done message with victim nodeid -1 and remaining 0; new nodes
-	   are interested in seeing the remaining 0 so they can clear their
-	   init_victims */
+	nums = (int *)((char *)hd + sizeof(struct fd_header));
 
-	if (victim == -1)
-		goto out;
+	victim = le32_to_cpu(nums[0]);
+	how = le32_to_cpu(nums[1]);
+	remaining = le32_to_cpu(nums[2]);
 
 	found = 0;
 
@@ -412,21 +406,99 @@ static void receive_victim_done(struct fd *fd, struct fd_header *hd, int len)
 	}
 
 	if (!found)
-		log_error("receive_victim_done victim %d not found from %d",
+		log_debug("receive_victim_done victim %d not found from %d",
 			  victim, hd->nodeid);
+}
 
- out:
-	if (!fd->init_complete && !remaining) {
-		log_debug("receive_victim_done init_complete");
-		fd->init_complete = 1;
+/* same content as a start message, a new (incomplete) node will look for
+   a complete message that shows it as a member, when it sees one it can
+   clear any init_victims and set init_complete for future cycles */
 
-		/* we may have victims from init which we can clear now */
-		list_for_each_entry_safe(node, safe, &fd->victims, list) {
-			log_debug("receive_victim_done clear victim %d init %d",
-				  node->nodeid, node->init_victim);
-			list_del(&node->list);
-			free(node);
-		}
+static void send_complete(struct fd *fd)
+{
+	struct change *cg = list_first_entry(&fd->changes, struct change, list);
+	struct fd_header *hd;
+	struct member *memb;
+	int n_ints, len, *p, i;
+	char *buf;
+
+	n_ints = 4 + cg->member_count;
+	len = sizeof(struct fd_header) + (n_ints * sizeof(int));
+
+	buf = malloc(len);
+	if (!buf) {
+		return;
+	}
+	memset(buf, 0, len);
+
+	hd = (struct fd_header *)buf;
+	hd->type = FD_MSG_COMPLETE;
+	hd->msgdata = cg->seq;
+
+	p = (int *)(buf + sizeof(struct fd_header));
+
+	p[0] = cpu_to_le32(cg->member_count);
+	p[1] = cpu_to_le32(cg->joined_count);
+	p[2] = cpu_to_le32(cg->remove_count);
+	p[3] = cpu_to_le32(cg->failed_count);
+
+	i = 4;
+	list_for_each_entry(memb, &cg->members, list)
+		p[i++] = cpu_to_le32(memb->nodeid);
+
+	log_debug("send_complete %u counts %d %d %d %d", cg->seq,
+		  cg->member_count, cg->joined_count,
+		  cg->remove_count, cg->failed_count);
+
+	fd_send_message(fd, buf, len);
+
+	free(buf);
+}
+
+static void receive_complete(struct fd *fd, struct fd_header *hd, int len)
+{
+	int member_count, joined_count, remove_count, failed_count;
+	int i, n_ints, *nums;
+	uint32_t seq = hd->msgdata;
+	struct node *node, *safe;
+
+	log_debug("receive_complete %d:%u len %d", hd->nodeid, seq, len);
+
+	if (fd->init_complete)
+		return;
+
+	nums = (int *)((char *)hd + sizeof(struct fd_header));
+
+	member_count = le32_to_cpu(nums[0]);
+	joined_count = le32_to_cpu(nums[1]);
+	remove_count = le32_to_cpu(nums[2]);
+	failed_count = le32_to_cpu(nums[3]);
+
+	n_ints = 4 + member_count;
+	if (len < (sizeof(struct fd_header) + (n_ints * sizeof(int)))) {
+		log_debug("receive_complete %d:%u bad len %d nums %s",
+			  hd->nodeid, seq, len, str_nums(nums, n_ints));
+		return;
+	}
+
+	for (i = 0; i < member_count; i++) {
+		if (our_nodeid == le32_to_cpu(nums[4+i]))
+			break;
+	}
+	if (i == member_count) {
+		log_debug("receive_complete %d:%u we are not in members",
+			  hd->nodeid, seq);
+		return;
+	}
+
+	fd->init_complete = 1;
+
+	/* we may have victims from init which we can clear now */
+	list_for_each_entry_safe(node, safe, &fd->victims, list) {
+		log_debug("receive_complete clear victim %d init %d",
+			  node->nodeid, node->init_victim);
+		list_del(&node->list);
+		free(node);
 	}
 }
 
@@ -546,7 +618,7 @@ static int match_change(struct fd *fd, struct change *cg,
 	failed_count = le32_to_cpu(nums[3]);
 
 	n_ints = 4 + member_count;
-	if (len != (sizeof(struct fd_header) + (n_ints * sizeof(int)))) {
+	if (len < (sizeof(struct fd_header) + (n_ints * sizeof(int)))) {
 		log_debug("match_change fail %d:%u bad len %d nums %s",
 			  hd->nodeid, seq, len, str_nums(nums, n_ints));
 		return 0;
@@ -734,6 +806,8 @@ static void send_start(struct fd *fd)
 	free(buf);
 }
 
+/* FIXME: better to just look in victims list for any nodes with init_victim? */
+
 static int nodes_added(struct fd *fd)
 {
 	struct change *cg;
@@ -771,14 +845,14 @@ static void add_victims(struct fd *fd, struct change *cg)
 /* with start messages from all members, we can pick which one should be master
    and do the fencing (low nodeid with state, "COMPLETE").  as the master
    successfully fences each victim, it sends a status message such that all
-   members remove the node from their victims list.  the status message also
-   indicates the number of remaining victims.
+   members remove the node from their victims list.
 
-   when a node sees via status message that there are no more outstanding
-   victims, it sets fd->init_complete.  if a node is going from !complete to
-   complete, it may still have entries on its victims list at this point from
-   startup init; it can clear them out.  this node will volunteer to be master
-   in the next round of start messages by setting COMPLETE flag.
+   after all victims have been dealt following a change (or set of changes),
+   the master sends a complete message that indicates the members of the group
+   for the change it has completed processing.  when a joining node sees this
+   complete message and sees itself as a member, it knows it can clear all
+   init_victims from startup init, and it sets init_complete so it will
+   volunteer to be master in the next round by setting COMPLETE flag.
 
    once the master begins fencing victims, it won't process any new changes
    until it's done.  the non-master members will process changes while the
@@ -806,12 +880,9 @@ static void apply_changes(struct fd *fd)
 		if (wait_messages_done(fd)) {
 			set_master(fd);
 			if (fd->master == our_nodeid) {
-				if (!list_empty(&fd->victims)) {
-					delay_fencing(fd, nodes_added(fd));
-					fence_victims(fd);
-				} else {
-					send_victim_done(fd, -1, 0);
-				}
+				delay_fencing(fd, nodes_added(fd));
+				fence_victims(fd);
+				send_complete(fd);
 			} else {
 				defer_fencing(fd);
 			}
@@ -939,13 +1010,13 @@ static int is_victim(struct fd *fd, int nodeid)
 	return 0;
 }
 
+/* add a victim for each node in complete list (represents all nodes in
+   cluster.conf) that is not a cman member (and not already a victim) */
+
 static void add_victims_init(struct fd *fd, struct change *cg)
 {
 	struct node *node, *safe;
 
-	/* add a victim for each node in complete list that is not
-	   a cman member (and not already a victim) */
-
 	list_for_each_entry_safe(node, safe, &fd->complete, list) {
 		list_del(&node->list);
 
@@ -1009,10 +1080,10 @@ static void confchg_cb(cpg_handle_t handle, struct cpg_name *group_name,
 
 	add_victims(fd, cg);
 
-	/* We need to assume non-member nodes are already victims;
-	   these initial victims are cleared when we get a status
-	   with zero remaining victims from the master.  But, if
-	   we're the master, we do end up fencing these init nodes. */
+	/* As a joining domain member with no previous state, we need to
+	   assume non-member nodes are already victims; these initial victims
+	   are cleared if we get a "complete" message from the master.
+	   But, if we're the master, we do end up fencing these init nodes. */
 
 	if (cg->we_joined)
 		add_victims_init(fd, cg);
@@ -1062,6 +1133,9 @@ static void deliver_cb(cpg_handle_t handle, struct cpg_name *group_name,
 	case FD_MSG_VICTIM_DONE:
 		receive_victim_done(fd, hd, len);
 		break;
+	case FD_MSG_COMPLETE:
+		receive_complete(fd, hd, len);
+		break;
 	case FD_MSG_EXTERNAL:
 		receive_external(fd, hd, len);
 		break;
diff --git a/fence/fenced/fd.h b/fence/fenced/fd.h
index 581a206..3765db1 100644
--- a/fence/fenced/fd.h
+++ b/fence/fenced/fd.h
@@ -42,6 +42,7 @@
 #include "list.h"
 #include "linux_endian.h"
 #include "libfence.h"
+#include "fenced.h"
 
 /* Max name length for a group, pointless since we only ever create the
    "default" group.  Regardless, set arbitrary max to match dlm's
@@ -130,7 +131,8 @@ extern struct commandline comline;
 
 #define FD_MSG_START		1
 #define FD_MSG_VICTIM_DONE	2
-#define FD_MSG_EXTERNAL		3
+#define FD_MSG_COMPLETE		3
+#define FD_MSG_EXTERNAL		4
 
 #define FD_MFLG_JOINING		1  /* accompanies start, we are joining */
 #define FD_MFLG_COMPLETE	2  /* accompanies start, we have complete info */
diff --git a/ccs/ccs_tool/update.h b/fence/fenced/fenced.h
similarity index 73%
copy from ccs/ccs_tool/update.h
copy to fence/fenced/fenced.h
index 19fd7cb..737c3cd 100644
--- a/ccs/ccs_tool/update.h
+++ b/fence/fenced/fenced.h
@@ -1,7 +1,7 @@
 /******************************************************************************
 *******************************************************************************
 **
-**  Copyright (C) 2005 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2008 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -10,9 +10,11 @@
 *******************************************************************************
 ******************************************************************************/
 
-#ifndef __UPDATE_DOT_H__
-#define __UPDATE_DOT_H__
+#ifndef __FENCED_DOT_H__
+#define __FENCED_DOT_H__
 
-int update(char *location);
+#define FENCED_SOCK_PATH	"fenced_socket"
+#define FENCED_MSGLEN		256
+
+#endif
 
-#endif /* __UPDATE_DOT_H__ */
diff --git a/fence/fenced/main.c b/fence/fenced/main.c
index 05b0923..811ad7c 100644
--- a/fence/fenced/main.c
+++ b/fence/fenced/main.c
@@ -14,7 +14,6 @@
 #include "fd.h"
 #include "copyright.cf"
 
-#define FENCED_SOCK_PATH	"fenced_socket"
 #define LOCKFILE_NAME		"/var/run/fenced.pid"
 #define CLIENT_NALLOC		32
 
@@ -282,8 +281,6 @@ static void make_args(char *buf, int *argc, char **argv, char sep)
 	*argc = i;
 }
 
-#define FENCED_MSGLEN 256
-
 static void process_connection(int ci)
 {
 	char buf[FENCED_MSGLEN];
@@ -524,21 +521,13 @@ static void print_usage(void)
 	printf("\n");
 }
 
-#define OPTION_STRING	"gcj:f:Dn:O:T:hVS"
+#define OPTION_STRING	"g:cj:f:Dn:O:T:hVS"
 
 static void read_arguments(int argc, char **argv)
 {
 	int cont = 1;
 	int optchar;
 
-	comline.override_path_opt = 0;
-	comline.override_path = NULL;
-	comline.post_join_delay_opt = 0;
-	comline.post_fail_delay_opt = 0;
-	comline.clean_start_opt = 0;
-	comline.override_time_opt = 0;
-	comline.override_time = 5;	/* default */
-
 	while (cont) {
 		optchar = getopt(argc, argv, OPTION_STRING);
 
@@ -703,19 +692,24 @@ struct commandline comline;
 
    /* tell fenced that an external program has fenced a node, e.g. fence_node;
       fenced will try to suppress its own fencing of this node a second time */
-   fenced_external(char *domain, int nodeid);
+   fenced_external(int nodeid);
 
    /* fenced gives info about a single node */
-   fenced_node_info(char *domain, int nodeid, char *name,
-		    struct fenced_node *info);
+   fenced_node_info(int nodeid, char *name, struct fenced_node *info);
 
    /* fenced gives info about the domain */
-   fenced_domain_info(char *domain, struct fenced_domain *info);
+   fenced_domain_info(struct fenced_domain *info);
 
    /* fenced copies a node struct for each member */
-   fenced_domain_members(char *domain, int num, struct fenced_node **info);
+   fenced_domain_members(int num, struct fenced_node **info);
+
+   fenced_debug_dump(char **buf, int len);
+   fenced_join(void);
+   fenced_leave(void);
+
+   for all of these, libfenced connects to fenced, writes a structure that
+   defines the type, then for some, reads back data, copies data into
+   buffers provided by caller, disconnects from fenced
 
-   fenced_debug_dump();
-   fenced_join();
-   fenced_leave();
 #endif
+
diff --git a/fence/fenced/recover.c b/fence/fenced/recover.c
index 430f778..4774b5a 100644
--- a/fence/fenced/recover.c
+++ b/fence/fenced/recover.c
@@ -162,6 +162,9 @@ void delay_fencing(struct fd *fd, int node_join)
 	struct node *node;
 	char *delay_type;
 
+	if (list_empty(&fd->victims))
+		return;
+
 	if (node_join) {
 		delay = comline.post_join_delay;
 		delay_type = "post_join_delay";
@@ -216,7 +219,12 @@ void delay_fencing(struct fd *fd, int node_join)
 
 void defer_fencing(struct fd *fd)
 {
-	char *master_name = nodeid_to_name(fd->master);
+	char *master_name;
+
+	if (list_empty(&fd->victims))
+		return;
+
+	master_name = nodeid_to_name(fd->master);
 
 	log_debug("defer fencing to %d %s", fd->master, master_name);
 	syslog(LOG_INFO, "fencing deferred to %s", master_name);
diff --git a/group/dlm_controld/config.c b/group/dlm_controld/config.c
index 6a4fa73..4709186 100644
--- a/group/dlm_controld/config.c
+++ b/group/dlm_controld/config.c
@@ -237,7 +237,7 @@ static void read_ccs_protocol(int cd, char *path, int *config_val)
 #define DEBUG_PATH "/cluster/dlm/@log_debug"
 #define TIMEWARN_PATH "/cluster/dlm/@timewarn"
 #define PROTOCOL_PATH "/cluster/dlm/@protocol"
-#define GROUPD_COMPAT_PATH "/cluster/dlm/@groupd_compat"
+#define GROUPD_COMPAT_PATH "/cluster/group/@groupd_compat"
 #define ENABLE_DEADLK_PATH "/cluster/dlm/@enable_deadlk"
 #define ENABLE_PLOCK_PATH "/cluster/dlm/@enable_plock"
 #define PLOCK_DEBUG_PATH "/cluster/dlm/@plock_debug"
diff --git a/group/dlm_controld/cpg.c b/group/dlm_controld/cpg.c
index afe12bd..e531b65 100644
--- a/group/dlm_controld/cpg.c
+++ b/group/dlm_controld/cpg.c
@@ -667,7 +667,7 @@ static int match_change(struct lockspace *ls, struct change *cg,
 	failed_count = le32_to_cpu(nums[3]);
 
 	n_ints = 4 + member_count;
-	if (len != (sizeof(struct dlm_header) + (n_ints * sizeof(int)))) {
+	if (len < (sizeof(struct dlm_header) + (n_ints * sizeof(int)))) {
 		log_group(ls, "match_change fail %d:%u bad len %d nums %s",
 			  hd->nodeid, seq, len, str_nums(nums, n_ints));
 		return 0;


hooks/post-receive
--
Cluster Project




More information about the Cluster-devel mailing list