[Cluster-devel] [PATCH] cman: improve cman/qdisk interactions

Fabio M. Di Nitto fdinitto at redhat.com
Wed Sep 7 13:10:25 UTC 2011


- libcman/cman: add new quorum API call to update name and votes of a quorum device
- cman: simplify common code to free quorum_device infrastructure and handle quorum recalculation
- cman: do better logging/error reports of the quorum API usage
- cman: use strdup instead of malloc+strcpy (code is more readable)
- libcman: perform better error checking in register_quorum_device/update_quorum_device

- Allow qdisk to update device name in cman using a new libcman quorum API call
- Perform slight better error checking of some update opertaions

Resolves: rhbz#735917

Signed-off-by: Fabio M. Di Nitto <fdinitto at redhat.com>
---
 cman/daemon/cnxman-socket.h |    1 +
 cman/daemon/commands.c      |  138 +++++++++++++++++++++++++++++++++----------
 cman/lib/libcman.c          |   19 +++++-
 cman/lib/libcman.h          |    4 +
 cman/qdisk/main.c           |   28 ++++++++-
 5 files changed, 153 insertions(+), 37 deletions(-)

diff --git a/cman/daemon/cnxman-socket.h b/cman/daemon/cnxman-socket.h
index e8b7378..d243b40 100644
--- a/cman/daemon/cnxman-socket.h
+++ b/cman/daemon/cnxman-socket.h
@@ -32,6 +32,7 @@
 #define CMAN_CMD_REG_QUORUMDEV      0x800000b5
 #define CMAN_CMD_UNREG_QUORUMDEV    0x800000b6
 #define CMAN_CMD_POLL_QUORUMDEV     0x800000b7
+#define CMAN_CMD_UPDATE_QUORUMDEV   0x800000b8
 #define CMAN_CMD_TRY_SHUTDOWN       0x800000bb
 #define CMAN_CMD_SHUTDOWN_REPLY     0x000000bc
 #define CMAN_CMD_UPDATE_FENCE_INFO  0x800000bd
diff --git a/cman/daemon/commands.c b/cman/daemon/commands.c
index 2948952..567ff96 100644
--- a/cman/daemon/commands.c
+++ b/cman/daemon/commands.c
@@ -1080,27 +1080,69 @@ static int do_cmd_try_shutdown(struct connection *con, char *cmdbuf)
 	return 0;
 }
 
+static void free_quorum_device(void)
+{
+	if (!quorum_device)
+		return;
+
+	if (quorum_device->name)
+		free(quorum_device->name);
+
+	free(quorum_device);
+
+	quorum_device = NULL;
+
+	return;
+}
+
+
+static void quorum_device_update_votes(int votes)
+{
+	int oldvotes;
+
+	/* Update votes even if it existed before */
+	oldvotes = quorum_device->votes;
+	quorum_device->votes = votes;
+
+	/* If it is a member and votes decreased, recalculate quorum */
+	if (quorum_device->state == NODESTATE_MEMBER &&
+	    oldvotes != votes) {
+		recalculate_quorum(1, 0);
+	}
+}
+
 static int do_cmd_register_quorum_device(char *cmdbuf, int *retlen)
 {
 	int votes;
-	int oldvotes;
 	char *name = cmdbuf+sizeof(int);
 
-	if (!ais_running)
+	if (!ais_running) {
+		log_printf(LOG_ERR, "unable to register quorum device: corosync is not running\n");
 		return -ENOTCONN;
+	}
 
-	if (!we_are_a_cluster_member)
+	if (!we_are_a_cluster_member) {
+		log_printf(LOG_ERR, "unable to register quorum device: this node is not part of a cluster\n");
 		return -ENOENT;
+	}
 
-	if (strlen(name) > MAX_CLUSTER_MEMBER_NAME_LEN)
+	if (strlen(name) > MAX_CLUSTER_MEMBER_NAME_LEN) {
+		log_printf(LOG_ERR, "unable to register quorum device: name is too long\n");
+		/* this should probably return -E2BIG? */
 		return -EINVAL;
+	}
 
 	/* Allow re-registering of a quorum device if the name is the same */
-	if (quorum_device && strcmp(name, quorum_device->name))
-                return -EBUSY;
+	if (quorum_device && strcmp(name, quorum_device->name)) {
+		log_printf(LOG_ERR, "unable to re-register quorum device: device names do not match\n");
+		log_printf(LOG_DEBUG, "memb: old name: %s new name: %s\n", quorum_device->name, name);
+		return -EBUSY;
+	}
 
-	if (find_node_by_name(name))
-                return -EALREADY;
+	if (find_node_by_name(name)) {
+		log_printf(LOG_ERR, "unable to register quorum device: a node with the same name (%s) already exists\n", name);
+		return -EALREADY;
+	}
 
 	memcpy(&votes, cmdbuf, sizeof(int));
 
@@ -1108,18 +1150,19 @@ static int do_cmd_register_quorum_device(char *cmdbuf, int *retlen)
 	if (!quorum_device)
 	{
 		quorum_device = malloc(sizeof(struct cluster_node));
-		if (!quorum_device)
+		if (!quorum_device) {
+			log_printf(LOG_ERR, "unable to register quorum device: not enough memory\n");
 			return -ENOMEM;
+		}
 		memset(quorum_device, 0, sizeof(struct cluster_node));
 
-		quorum_device->name = malloc(strlen(name) + 1);
+		quorum_device->name = strdup(name);
 		if (!quorum_device->name) {
-			free(quorum_device);
-			quorum_device = NULL;
+			log_printf(LOG_ERR, "unable to register quorum device: not enough memory\n");
+			free_quorum_device();
 			return -ENOMEM;
 		}
 
-		strcpy(quorum_device->name, name);
 		quorum_device->state = NODESTATE_DEAD;
 		gettimeofday(&quorum_device->join_time, NULL);
 
@@ -1132,34 +1175,63 @@ static int do_cmd_register_quorum_device(char *cmdbuf, int *retlen)
 		log_printf(LOG_INFO, "quorum device re-registered\n");
 	}
 
-	/* Update votes even if it existed before */
-	oldvotes = quorum_device->votes;
-        quorum_device->votes = votes;
+	quorum_device_update_votes(votes);
 
-	/* If it is a member and votes decreased, recalculate quorum */
-	if (quorum_device->state == NODESTATE_MEMBER &&
-	    oldvotes != votes) {
-		recalculate_quorum(1, 0);
+	return 0;
+}
+
+static int do_cmd_unregister_quorum_device(char *cmdbuf, int *retlen)
+{
+	if (!quorum_device) {
+		log_printf(LOG_DEBUG, "memb: failed to unregister a non existing quorum device\n");
+		return -EINVAL;
 	}
 
-        return 0;
+	if (quorum_device->state == NODESTATE_MEMBER) {
+		log_printf(LOG_DEBUG, "memb: failed to unregister: quorum device still active.\n");
+		return -EBUSY;
+	}
+
+	free_quorum_device();
+
+	log_printf(LOG_INFO, "quorum device unregistered\n");
+	return 0;
 }
 
-static int do_cmd_unregister_quorum_device(char *cmdbuf, int *retlen)
+static int do_cmd_update_quorum_device(char *cmdbuf, int *retlen)
 {
-        if (!quorum_device)
-                return -EINVAL;
+	int votes, ret = 0;
+	char *name = cmdbuf+sizeof(int);
 
-        if (quorum_device->state == NODESTATE_MEMBER)
-                return -EBUSY;
+	if (!quorum_device) {
+		log_printf(LOG_DEBUG, "memb: failed to update a non-existing quorum device\n");
+		return -EINVAL;
+	}
 
-	free(quorum_device->name);
-	free(quorum_device);
+	memcpy(&votes, cmdbuf, sizeof(int));
 
-        quorum_device = NULL;
+	/* allow name change of the quorum device */
+	if (quorum_device && strcmp(name, quorum_device->name)) {
+		char *newname = NULL;
+		char *oldname = NULL;
 
-	log_printf(LOG_INFO, "quorum device unregistered\n");
-        return 0;
+		log_printf(LOG_DEBUG, "memb: old name: %s new name: %s\n", quorum_device->name, name);
+		newname = strdup(name);
+		if (!newname) {
+			log_printf(LOG_ERR, "memb: unable to update quorum device name: out of memory\n");
+			ret = -ENOMEM;
+			goto out;
+		}
+		log_printf(LOG_INFO, "quorum device name changed to %s\n", name);
+		oldname = quorum_device->name;
+		quorum_device->name = newname;
+		free(oldname);
+	}
+
+out:
+	quorum_device_update_votes(votes);
+
+	return ret;
 }
 
 static int reload_config(int new_version, int should_broadcast)
@@ -1560,6 +1632,10 @@ int process_command(struct connection *con, int cmd, char *cmdbuf,
 		err = do_cmd_unregister_quorum_device(cmdbuf, retlen);
 		break;
 
+	case CMAN_CMD_UPDATE_QUORUMDEV:
+		err = do_cmd_update_quorum_device(cmdbuf, retlen);
+		break;
+
 	case CMAN_CMD_POLL_QUORUMDEV:
 		err = do_cmd_poll_quorum_device(cmdbuf, retlen);
 		break;
diff --git a/cman/lib/libcman.c b/cman/lib/libcman.c
index daaad07..a89c731 100644
--- a/cman/lib/libcman.c
+++ b/cman/lib/libcman.c
@@ -1002,14 +1002,15 @@ int cman_replyto_shutdown(cman_handle_t handle, int yesno)
 	return 0;
 }
 
-
-int cman_register_quorum_device(cman_handle_t handle, char *name, int votes)
+static int cman_set_quorum_device(cman_handle_t handle,
+				     int ops,
+				     char *name, int votes)
 {
 	struct cman_handle *h = (struct cman_handle *)handle;
 	char buf[strlen(name)+1 + sizeof(int)];
 	VALIDATE_HANDLE(h);
 
-	if (strlen(name) > MAX_CLUSTER_MEMBER_NAME_LEN)
+	if ((!name) || (strlen(name) > MAX_CLUSTER_MEMBER_NAME_LEN) || (votes < 0))
 	{
 		errno = EINVAL;
 		return -1;
@@ -1017,7 +1018,12 @@ int cman_register_quorum_device(cman_handle_t handle, char *name, int votes)
 
 	memcpy(buf, &votes, sizeof(int));
 	strcpy(buf+sizeof(int), name);
-	return info_call(h, CMAN_CMD_REG_QUORUMDEV, buf, strlen(name)+1+sizeof(int), NULL, 0);
+	return info_call(h, ops, buf, strlen(name)+1+sizeof(int), NULL, 0);
+}
+
+int cman_register_quorum_device(cman_handle_t handle, char *name, int votes)
+{
+	return cman_set_quorum_device(handle, CMAN_CMD_REG_QUORUMDEV, name, votes);
 }
 
 int cman_unregister_quorum_device(cman_handle_t handle)
@@ -1053,6 +1059,11 @@ int cman_get_quorum_device(cman_handle_t handle, struct cman_qdev_info *info)
 	return ret;
 }
 
+int cman_update_quorum_device(cman_handle_t handle, char *name, int votes)
+{
+	return cman_set_quorum_device(handle, CMAN_CMD_UPDATE_QUORUMDEV, name, votes);
+}
+
 int cman_get_fenceinfo(cman_handle_t handle, int nodeid, uint64_t *time, int *fenced, char *agent)
 {
 	struct cman_handle *h = (struct cman_handle *)handle;
diff --git a/cman/lib/libcman.h b/cman/lib/libcman.h
index feb10a2..9f97875 100644
--- a/cman/lib/libcman.h
+++ b/cman/lib/libcman.h
@@ -420,6 +420,9 @@ int cman_barrier_delete(cman_handle_t handle, const char *name);
 /*
  * Add your own quorum device here, needs an admin socket
  *
+ * register_quorum and update_quorum arguments are mandatory.
+ * name has to be a valid null-terminated string and votes >= 0.
+ *
  * After creating a quorum device you will need to call 'poll_quorum_device'
  * at least once every (default) 10 seconds (this can be changed in CCS)
  * otherwise it will time-out and the cluster will lose its vote.
@@ -428,6 +431,7 @@ int cman_register_quorum_device(cman_handle_t handle, char *name, int votes);
 int cman_unregister_quorum_device(cman_handle_t handle);
 int cman_poll_quorum_device(cman_handle_t handle, int isavailable);
 int cman_get_quorum_device(cman_handle_t handle, struct cman_qdev_info *info);
+int cman_update_quorum_device(cman_handle_t handle, char *name, int votes);
 
 /*
  * Sets the dirty bit inside cman. This indicates that the node has
diff --git a/cman/qdisk/main.c b/cman/qdisk/main.c
index c1598fa..2f0c2ca 100644
--- a/cman/qdisk/main.c
+++ b/cman/qdisk/main.c
@@ -690,6 +690,17 @@ register_device(qd_ctx *ctx)
 				ctx->qc_votes : 0);
 }
 
+static int
+update_device(qd_ctx *ctx)
+{
+	return cman_update_quorum_device(
+			ctx->qc_cman_admin,
+			(ctx->qc_flags&RF_CMAN_LABEL) ?
+				ctx->qc_cman_label : ctx->qc_device,
+			(!(ctx->qc_flags & RF_MASTER_WINS) ||
+			 ctx->qc_status == S_MASTER) ?
+				ctx->qc_votes : 0);
+}
 
 static int 
 adjust_votes(qd_ctx *ctx)
@@ -2119,9 +2130,22 @@ main(int argc, char **argv)
 
 	if (!_running)
 		goto out;
-	
+
 	/* This registers the quorum device */
-	register_device(&ctx);
+	ret = register_device(&ctx);
+	if (ret) {
+		if (errno == EBUSY) {
+			logt_print(LOG_NOTICE, "quorum device is already registered, updating\n");
+			ret = update_device(&ctx);
+			if (ret) {
+				logt_print(LOG_ERR, "DEBUG: unable to update quorum device info\n");
+				goto out;
+			}
+		} else {
+			logt_print(LOG_ERR, "Unable to register quorum device!\n");
+			goto out;
+		}
+	}
 
 	io_nanny_start(ch_user, ctx.qc_tko * ctx.qc_interval);
 
-- 
1.7.4.4




More information about the Cluster-devel mailing list