[Cluster-devel] cluster/cman/qdisk disk.c disk.h disk_util.c m ...

lhh at sourceware.org lhh at sourceware.org
Tue Dec 4 20:24:45 UTC 2007


CVSROOT:	/cvs/cluster
Module name:	cluster
Branch: 	RHEL5
Changes by:	lhh at sourceware.org	2007-12-04 20:24:43

Modified files:
	cman/qdisk     : disk.c disk.h disk_util.c main.c mkqdisk.c 
	                 proc.c 

Log message:
	Make qdiskd work with sector sizes other than 512 bytes.  Import patch from Fabio M. Di Nitto to make qdiskd use (node_count - 1) for votes if there's none specified in cluster.conf

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/qdisk/disk.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.4.4.1&r2=1.4.4.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/qdisk/disk.h.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.4.2.3&r2=1.4.2.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/qdisk/disk_util.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.2.4.2&r2=1.2.4.3
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/qdisk/main.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.4.2.6&r2=1.4.2.7
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/qdisk/mkqdisk.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.3.4.1&r2=1.3.4.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/qdisk/proc.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.2&r2=1.2.4.1

--- cluster/cman/qdisk/disk.c	2007/10/29 17:54:25	1.4.4.1
+++ cluster/cman/qdisk/disk.c	2007/12/04 20:24:43	1.4.4.2
@@ -43,8 +43,9 @@
 #include <platform.h>
 #include <unistd.h>
 #include <time.h>
+#include <linux/fs.h>
 
-static int diskRawRead(int fd, char *buf, int len);
+static int diskRawRead(target_info_t *disk, char *buf, int len);
 uint32_t clu_crc32(const char *data, size_t count);
 
 
@@ -211,49 +212,58 @@
  * Returns - (the file descriptor), a value >= 0 on success.
  */
 int
-qdisk_open(char *name)
+qdisk_open(char *name, target_info_t *disk)
 {
-	int fd;
-	int retval;
+	int ret;
+	unsigned long ssz;
 
 	/*
 	 * Open for synchronous writes to insure all writes go directly
 	 * to disk.
 	 */
-	fd = open(name, O_RDWR | O_SYNC | O_DIRECT);
-	if (fd < 0) {
-		return fd;
-	}
+	disk->d_fd = open(name, O_RDWR | O_SYNC | O_DIRECT);
+	if (disk->d_fd < 0)
+		return disk->d_fd;
+
+	disk->d_blksz = 512;
+	ret = ioctl(disk->d_fd, BLKSSZGET, &ssz);
+	if (ret < 0)
+		perror("qdisk_open: ioctl(BLKSSZGET)");
+	else
+		/* Sorry, no sector sizes >4GB please */
+		disk->d_blksz = (uint32_t)ssz;
 
-	/* Check to verify that the partition is large enough.*/
-	retval = lseek(fd, END_OF_DISK, SEEK_SET);
+	disk->d_pagesz = sysconf(_SC_PAGESIZE);
 
-	if (retval < 0) {
+	/* Check to verify that the partition is large enough.*/
+	ret = lseek(disk->d_fd, END_OF_DISK(disk->d_blksz), SEEK_SET);
+	if (ret < 0) {
 		perror("open_partition: seek");
 		return -1;
 	}
 
-	if (retval < END_OF_DISK) {
+	if (ret < END_OF_DISK(disk->d_blksz)) {
 		fprintf(stderr, "Partition %s too small\n", name);
 		errno = EINVAL;
 		return -1;
 	}
 
 	/* Set close-on-exec bit */
-        retval = fcntl(fd, F_GETFD, 0);
-        if (retval < 0) {
-                close(fd);
+        ret = fcntl(disk->d_fd, F_GETFD, 0);
+        if (ret < 0) {
+		perror("open_partition: fcntl(F_GETFD)");
+                close(disk->d_fd);
                 return -1;
         }
 
-        retval |= FD_CLOEXEC;
-        if (fcntl(fd, F_SETFD, retval) < 0) {
-		perror("open_partition: fcntl");
-                close(fd);
+        ret |= FD_CLOEXEC;
+        if (fcntl(disk->d_fd, F_SETFD, ret) < 0) {
+		perror("open_partition: fcntl(F_SETFD)");
+                close(disk->d_fd);
                 return -1;
         }
 
-	return fd;
+	return 0;
 }
 
 
@@ -263,17 +273,17 @@
  * Returns - value from close syscall.
  */
 int
-qdisk_close(int *fd)
+qdisk_close(target_info_t *disk)
 {
 	int retval;
 
-	if (!fd || *fd < 0) {
+	if (!disk || disk->d_fd < 0) {
 		errno = EINVAL;
 		return -1;
 	}
 
-	retval = close(*fd);
-	*fd = -1;
+	retval = close(disk->d_fd);
+	disk->d_fd = -1;
 
 	return retval;
 }
@@ -288,7 +298,7 @@
 qdisk_validate(char *name)
 {
 	struct stat stat_st, *stat_ptr;
-	int fd;
+	target_info_t disk;
 	stat_ptr = &stat_st;
 
 	if (stat(name, stat_ptr) < 0) {
@@ -310,26 +320,25 @@
 	/*
 	 * Verify read/write permission.
 	 */
-	fd = qdisk_open(name);
-	if (fd < 0) {
+	if (qdisk_open(name, &disk) < 0) {
 		fprintf(stderr, "%s: open of %s for RDWR failed: %s\n",
 			__FUNCTION__, name, strerror(errno));
 		return -1;
 	}
-	qdisk_close(&fd);
+	qdisk_close(&disk);
 	return 0;
 }
 
 
 static int
-diskRawReadShadow(int fd, off_t readOffset, char *buf, int len)
+diskRawReadShadow(target_info_t *disk, off_t readOffset, char *buf, int len)
 {
 	int ret;
 	shared_header_t *hdrp;
 	char *data;
 	int datalen;
 
-	ret = lseek(fd, readOffset, SEEK_SET);
+	ret = lseek(disk->d_fd, readOffset, SEEK_SET);
 	if (ret != readOffset) {
 #if 0
 		fprintf(stderr,
@@ -340,7 +349,7 @@
 		return -1;
 	}
 
-	ret = diskRawRead(fd, buf, len);
+	ret = diskRawRead(disk, buf, len);
 	if (ret != len) {
 #if 0
 		fprintf(stderr, "diskRawReadShadow: aligned read "
@@ -375,7 +384,7 @@
  * Here we check for alignment and do a bounceio if necessary.
  */
 static int
-diskRawRead(int fd, char *buf, int len)
+diskRawRead(target_info_t *disk, char *buf, int len)
 {
 	char *alignedBuf;
 	int readret;
@@ -383,21 +392,24 @@
 	int readlen;
 	int bounceNeeded = 1;
 
-	if ((((unsigned long) buf & (unsigned long) 0x3ff) == 0) &&
-	    ((len % 512) == 0)) {
+	
+	/* was 3ff, which is (512<<1-1) */
+	if ((((unsigned long) buf &
+	      (unsigned long) ((disk->d_blksz << 1) -1)) == 0) &&
+	    ((len % (disk->d_blksz)) == 0)) {
 		bounceNeeded = 0;
 	}
 
 	if (bounceNeeded == 0) {
 		/* Already aligned and even multiple of 512, no bounceio
 		 * required. */
-		return (read(fd, buf, len));
+		return (read(disk->d_fd, buf, len));
 	}
 
-	if (len > 512) {
+	if (len > disk->d_blksz) {
 		fprintf(stderr,
 			"diskRawRead: not setup for reads larger than %d.\n",
-		       512);
+		       (int)disk->d_blksz);
 		return (-1);
 	}
 	/*
@@ -406,8 +418,8 @@
 	 * XXX - if the on-disk offsets don't provide enough room we're cooked!
 	 */
 	extraLength = 0;
-	if (len % 512) {
-		extraLength = 512 - (len % 512);
+	if (len % disk->d_blksz) {
+		extraLength = disk->d_blksz - (len % disk->d_blksz);
 	}
 
 	readlen = len;
@@ -415,18 +427,18 @@
 		readlen += extraLength;
 	}
 
-	readret = posix_memalign((void **)&alignedBuf, 512, 512);
+	readret = posix_memalign((void **)&alignedBuf, disk->d_pagesz, disk->d_blksz);
 	if (readret < 0) {
 		return -1;
 	}
 
-	readret = read(fd, alignedBuf, readlen);
+	readret = read(disk->d_fd, alignedBuf, readlen);
 	if (readret > 0) {
 		if (readret > len) {
-			bcopy(alignedBuf, buf, len);
+			memcpy(alignedBuf, buf, len);
 			readret = len;
 		} else {
-			bcopy(alignedBuf, buf, readret);
+			memcpy(alignedBuf, buf, readret);
 		}
 	}
 
@@ -445,7 +457,7 @@
  * Here we check for alignment and do a bounceio if necessary.
  */
 static int
-diskRawWrite(int fd, char *buf, int len)
+diskRawWrite(target_info_t *disk, char *buf, int len)
 {
 	char *alignedBuf;
 	int ret;
@@ -453,31 +465,33 @@
 	int writelen;
 	int bounceNeeded = 1;
 
-	if ((((unsigned long) buf & (unsigned long) 0x3ff) == 0) &&
-	    ((len % 512) == 0)) {
+	/* was 3ff, which is (512<<1-1) */
+	if ((((unsigned long) buf &
+	      (unsigned long) ((disk->d_blksz << 1) -1)) == 0) &&
+	    ((len % (disk->d_blksz)) == 0)) {
 		bounceNeeded = 0;
 	}
+
 	if (bounceNeeded == 0) {
 		/* Already aligned and even multiple of 512, no bounceio
 		 * required. */
-		return (write(fd, buf, len));
+		return (write(disk->d_fd, buf, len));
 	}
 
-	if (len > 512) {
+	if (len > disk->d_blksz) {
 		fprintf(stderr,
-		       "diskRawWrite: not setup for larger than %d.\n",
-		       512);
+			"diskRawRead: not setup for reads larger than %d.\n",
+		       (int)disk->d_blksz);
 		return (-1);
 	}
-
 	/*
 	 * All IOs must be of size which is a multiple of 512.  Here we
 	 * just add in enough extra to accommodate.
 	 * XXX - if the on-disk offsets don't provide enough room we're cooked!
 	 */
 	extraLength = 0;
-	if (len % 512) {
-		extraLength = 512 - (len % 512);
+	if (len % disk->d_blksz) {
+		extraLength = disk->d_blksz - (len % disk->d_blksz);
 	}
 
 	writelen = len;
@@ -485,13 +499,20 @@
 		writelen += extraLength;
 	}
 
-	ret = posix_memalign((void **)&alignedBuf, 512,512);
+	ret = posix_memalign((void **)&alignedBuf, disk->d_pagesz, disk->d_blksz);
 	if (ret < 0) {
+		return -1;
+	}
+
+	if (len > disk->d_blksz) {
+		fprintf(stderr,
+		       "diskRawWrite: not setup for larger than %d.\n",
+		       (int)disk->d_blksz);
 		return (-1);
 	}
 
-	bcopy(buf, alignedBuf, len);
-	ret = write(fd, alignedBuf, writelen);
+	memcpy(buf, alignedBuf, len);
+	ret = write(disk->d_fd, alignedBuf, writelen);
 	if (ret > len) {
 		ret = len;
 	}
@@ -507,7 +528,7 @@
 
 
 static int
-diskRawWriteShadow(int fd, __off64_t writeOffset, char *buf, int len)
+diskRawWriteShadow(target_info_t *disk, __off64_t writeOffset, char *buf, int len)
 {
 	off_t retval_seek;
 	ssize_t retval_write;
@@ -519,7 +540,7 @@
 		return (-1);
 	}
 
-	retval_seek = lseek(fd, writeOffset, SEEK_SET);
+	retval_seek = lseek(disk->d_fd, writeOffset, SEEK_SET);
 	if (retval_seek != writeOffset) {
 		fprintf(stderr,
 		       "diskRawWriteShadow: can't seek to offset %d\n",
@@ -527,7 +548,7 @@
 		return (-1);
 	}
 
-	retval_write = diskRawWrite(fd, buf, len);
+	retval_write = diskRawWrite(disk, buf, len);
 	if (retval_write != len) {
 		if (retval_write == -1) {
 			fprintf(stderr, "%s: %s\n", __FUNCTION__,
@@ -544,7 +565,7 @@
 
 
 int
-qdisk_read(int fd, __off64_t offset, void *buf, int count)
+qdisk_read(target_info_t *disk, __off64_t offset, void *buf, int count)
 {
 	shared_header_t *hdrp;
 	char *data;
@@ -556,15 +577,15 @@
 	 * Raw blocks are 512 byte aligned.
 	 */
 	total = count + sizeof(shared_header_t);
-	if (total < 512)
-		total = 512;
+	if (total < disk->d_blksz)
+		total = disk->d_blksz;
 
 	/* Round it up */
-	if (total % 512) 
-		total = total + (512 * !!(total % 512)) - (total % 512);
+	if (total % disk->d_blksz) 
+		total = total + (disk->d_blksz * !!(total % disk->d_blksz)) - (total % disk->d_blksz);
 
 	hdrp = NULL;
-	rv = posix_memalign((void **)&hdrp, sysconf(_SC_PAGESIZE), total);
+	rv = posix_memalign((void **)&hdrp, disk->d_pagesz, disk->d_blksz);
 	if (rv < 0)
 		return -1;
 
@@ -573,7 +594,7 @@
 
 	data = (char *)hdrp + sizeof(shared_header_t);
 
-	rv = diskRawReadShadow(fd, offset, (char *)hdrp, total);
+	rv = diskRawReadShadow(disk, offset, (char *)hdrp, disk->d_blksz);
 	
 	if (rv == -1) {
 		return -1;
@@ -594,12 +615,12 @@
 
 
 int
-qdisk_write(int fd, __off64_t offset, const void *buf, int count)
+qdisk_write(target_info_t *disk, __off64_t offset, const void *buf, int count)
 {
 	size_t maxsize;
 	shared_header_t *hdrp;
 	char *data;
-	size_t total = 0, rv = -1, psz = 512; //sysconf(_SC_PAGESIZE);
+	size_t total = 0, rv = -1, psz = disk->d_blksz; //sysconf(_SC_PAGESIZE);
 
 	maxsize = psz - (sizeof(shared_header_t));
 	if (count >= (maxsize + sizeof(shared_header_t))) {
@@ -611,7 +632,6 @@
 
 	/*
 	 * Calculate the total length of the buffer, including the header.
-	 * Raw blocks are 512 byte aligned.
 	 */
 	total = count + sizeof(shared_header_t);
 	if (total < psz)
@@ -622,7 +642,7 @@
 		total = total + (psz * !!(total % psz)) - (total % psz);
 
 	hdrp = NULL;
-	rv = posix_memalign((void **)&hdrp, sysconf(_SC_PAGESIZE), total);
+	rv = posix_memalign((void **)&hdrp, disk->d_pagesz, total);
 	if (rv < 0) {
 		perror("posix_memalign");
 		return -1;
@@ -645,7 +665,7 @@
 	 * about locking here.
 	 */
 	if (total == psz)
-		rv = diskRawWriteShadow(fd, offset, (char *)hdrp, psz);
+		rv = diskRawWriteShadow(disk, offset, (char *)hdrp, psz);
 
 	if (rv == -1)
 		perror("diskRawWriteShadow");
@@ -658,11 +678,11 @@
 
 
 static int
-header_init(int fd, char *label)
+header_init(target_info_t *disk, char *label)
 {
 	quorum_header_t qh;
 
-	if (qdisk_read(fd, OFFSET_HEADER, &qh, sizeof(qh)) == sizeof(qh)) {
+	if (qdisk_read(disk, OFFSET_HEADER, &qh, sizeof(qh)) == sizeof(qh)) {
 		swab_quorum_header_t(&qh);
 		if (qh.qh_magic == HEADER_MAGIC_OLD) {
 			printf("Warning: Red Hat Cluster Manager 1.2.x "
@@ -681,14 +701,18 @@
 	/* Copy in the cluster/label name */
 	snprintf(qh.qh_cluster, sizeof(qh.qh_cluster)-1, "%s", label);
 
+	qh.qh_version = VERSION_MAGIC_V2;
 	if ((qh.qh_timestamp = (uint64_t)time(NULL)) <= 0) {
 		perror("time");
 		return -1;
 	}
 
 	qh.qh_magic = HEADER_MAGIC_NUMBER;
+	qh.qh_blksz = disk->d_blksz;
+	qh.qh_pad = 0;
+
 	swab_quorum_header_t(&qh);
-	if (qdisk_write(fd, OFFSET_HEADER, &qh, sizeof(qh)) != sizeof(qh)) {
+	if (qdisk_write(disk, OFFSET_HEADER, &qh, sizeof(qh)) != sizeof(qh)) {
 		return -1;
 	}
 
@@ -699,24 +723,24 @@
 int
 qdisk_init(char *partname, char *label)
 {
-	int fd;
+	target_info_t disk;
 	status_block_t ps, wps;
-	int nid;
+	int nid, ret;
 	time_t t;
 
-	fd = qdisk_validate(partname);
-	if (fd < 0) {
+	ret = qdisk_validate(partname);
+	if (ret < 0) {
 		perror("qdisk_verify");
 		return -1;
 	}
 
-	fd = qdisk_open(partname);
-	if (fd < 0) {
+	ret = qdisk_open(partname, &disk);
+	if (ret < 0) {
 		perror("qdisk_open");
 		return -1;
 	}
 
-	if (header_init(fd, label) < 0) {
+	if (header_init(&disk, label) < 0) {
 		return -1;
 	}
 
@@ -744,14 +768,14 @@
 		wps = ps;
 		swab_status_block_t(&wps);
 
-		if (qdisk_write(fd, qdisk_nodeid_offset(nid), &wps, sizeof(wps)) < 0) {
+		if (qdisk_write(&disk, qdisk_nodeid_offset(nid, disk.d_blksz), &wps, sizeof(wps)) < 0) {
 			printf("Error writing node ID block %d\n", nid);
-			qdisk_close(&fd);
+			qdisk_close(&disk);
 			return -1;
 		}
 	}
 
-	qdisk_close(&fd);
+	qdisk_close(&disk);
 
 	return 0;
 }
--- cluster/cman/qdisk/disk.h	2007/02/21 20:22:53	1.4.2.3
+++ cluster/cman/qdisk/disk.h	2007/12/04 20:24:43	1.4.2.4
@@ -72,7 +72,8 @@
 	RF_DEBUG = 0x4,
 	RF_PARANOID = 0x8,
 	RF_ALLOW_KILL = 0x10,
-	RF_UPTIME = 0x20
+	RF_UPTIME = 0x20,
+	RF_CMAN_LABEL = 0x40
 } run_flag_t;
 
 
@@ -86,6 +87,9 @@
 #define STATE_MAGIC_NUMBER	0x47bacef8	/* Status block */
 #define SHARED_HEADER_MAGIC	0x00DEBB1E	/* Per-block headeer */
 
+/* Version magic. */
+#define VERSION_MAGIC_V2	0x389fabc4
+
 
 typedef struct __attribute__ ((packed)) {
 	uint32_t	ps_magic;
@@ -152,16 +156,21 @@
  */
 typedef struct __attribute__ ((packed)) {
 	uint32_t	qh_magic;
-	uint32_t	qh_align;	   // 64-bit-ism: alignment fixer.
+	uint32_t	qh_version;	   // 
 	uint64_t	qh_timestamp;	   // time of last update
 	char 		qh_updatehost[128];// Hostname who put this here...
-	char		qh_cluster[128];   // Cluster name
+	char		qh_cluster[120];   // Cluster name; CMAN only 
+					   // supports 16 chars.
+	uint32_t	qh_blksz;          // Known block size @ creation
+	uint32_t	qh_pad;
 } quorum_header_t;
 
 #define swab_quorum_header_t(ptr) \
 {\
 	swab32((ptr)->qh_magic); \
-	swab32((ptr)->qh_align); \
+	swab32((ptr)->qh_version); \
+	swab32((ptr)->qh_blksz); \
+	swab32((ptr)->qh_pad); \
 	swab64((ptr)->qh_timestamp); \
 }
 
@@ -196,31 +205,35 @@
 
 /* Offsets from RHCM 1.2.x */
 #define OFFSET_HEADER	0
-#define HEADER_SIZE	4096		/* Page size for now */
+#define HEADER_SIZE(ssz)		(ssz<4096?4096:ssz)
 
-#define OFFSET_FIRST_STATUS_BLOCK	(OFFSET_HEADER + HEADER_SIZE)
-#define SPACE_PER_STATUS_BLOCK		4096 /* Page size for now */
+#define OFFSET_FIRST_STATUS_BLOCK(ssz)	(OFFSET_HEADER + HEADER_SIZE(ssz))
+#define SPACE_PER_STATUS_BLOCK(ssz)	(ssz<4096?4096:ssz)
 #define STATUS_BLOCK_COUNT		MAX_NODES_DISK
 
-#define SPACE_PER_MESSAGE_BLOCK		(4096)
-#define	MESSAGE_BLOCK_COUNT		MAX_NODES_DISK
-
-#define END_OF_DISK			(OFFSET_FIRST_STATUS_BLOCK + \
+#define END_OF_DISK(ssz)		(OFFSET_FIRST_STATUS_BLOCK(ssz) + \
 					 (MAX_NODES_DISK + 1) * \
-					 SPACE_PER_STATUS_BLOCK) \
+					 SPACE_PER_STATUS_BLOCK(ssz)) \
 
 
+typedef struct {
+	int d_fd;
+	int _pad_;
+	size_t d_blksz;
+	size_t d_pagesz;
+} target_info_t;
+
 
 /* From disk.c */
-int qdisk_open(char *name);
-int qdisk_close(int *fd);
+int qdisk_open(char *name, target_info_t *disk);
+int qdisk_close(target_info_t *disk);
 int qdisk_init(char *name, char *clustername);
 int qdisk_validate(char *name);
-int qdisk_read(int fd, __off64_t ofs, void *buf, int len);
-int qdisk_write(int fd, __off64_t ofs, const void *buf, int len);
+int qdisk_read(target_info_t *disk, __off64_t ofs, void *buf, int len);
+int qdisk_write(target_info_t *disk, __off64_t ofs, const void *buf, int len);
 
-#define qdisk_nodeid_offset(nodeid) \
-	(OFFSET_FIRST_STATUS_BLOCK + (SPACE_PER_STATUS_BLOCK * (nodeid - 1)))
+#define qdisk_nodeid_offset(nodeid, ssz) \
+	(OFFSET_FIRST_STATUS_BLOCK(ssz) + (SPACE_PER_STATUS_BLOCK(ssz) * (nodeid - 1)))
 
 /* From disk_utils.c */
 #define HISTORY_LENGTH 60
@@ -231,11 +244,12 @@
 	uint16_t pad0;
 } disk_msg_t;
 
+
 typedef struct {
 	uint64_t qc_incarnation;
 	struct timeval qc_average;
 	struct timeval qc_last[HISTORY_LENGTH];
-	int qc_fd;
+	target_info_t qc_disk;
 	int qc_my_id;
 	int qc_writes;
 	int qc_interval;
@@ -250,12 +264,14 @@
 	disk_node_state_t qc_disk_status;
 	disk_node_state_t qc_status;
 	int qc_master;		/* Master?! */
-	int _pad_;
+	int qc_status_sock;
 	run_flag_t qc_flags;
 	cman_handle_t qc_ch;
 	char *qc_device;
 	char *qc_label;
 	char *qc_status_file;
+	char *qc_cman_label;
+	char *qc_status_sockname;
 } qd_ctx;
 
 typedef struct {
@@ -272,14 +288,15 @@
 
 int qd_write_status(qd_ctx *ctx, int nid, disk_node_state_t state,
 		    disk_msg_t *msg, memb_mask_t mask, memb_mask_t master);
-int qd_read_print_status(int fd, int nid);
+int qd_read_print_status(target_info_t *disk, int nid);
 int qd_init(qd_ctx *ctx, cman_handle_t ch, int me);
 void qd_destroy(qd_ctx *ctx);
 
 /* proc.c */
 int find_partitions(const char *partfile, const char *label,
 		    char *devname, size_t devlen, int print);
-int check_device(char *device, char *label, quorum_header_t *qh);
+int check_device(char *device, char *label, int *ssz, quorum_header_t *qh,
+		 int flags);
 
 
 #endif
--- cluster/cman/qdisk/disk_util.c	2007/01/26 14:34:55	1.2.4.2
+++ cluster/cman/qdisk/disk_util.c	2007/12/04 20:24:43	1.2.4.3
@@ -201,8 +201,9 @@
 	if (get_time(&start, ctx->qc_flags&RF_UPTIME) < 0)
 		utime_ok = 0;
 	swab_status_block_t(&ps);
-	if (qdisk_write(ctx->qc_fd, qdisk_nodeid_offset(nid), &ps,
-			sizeof(ps)) < 0) {
+	if (qdisk_write(&ctx->qc_disk,
+			qdisk_nodeid_offset(nid, ctx->qc_disk.d_blksz),
+			&ps, sizeof(ps)) < 0) {
 		printf("Error writing node ID block %d\n", nid);
 		return -1;
 	}
@@ -223,12 +224,12 @@
 
 
 int
-qd_print_status(status_block_t *ps)
+qd_print_status(target_info_t *disk, status_block_t *ps)
 {
 	int x;
 
 	printf("Data @ offset %d:\n",
-	       (int)qdisk_nodeid_offset(ps->ps_nodeid));
+	       (int)qdisk_nodeid_offset(ps->ps_nodeid, disk->d_blksz));
 	printf("status_block_t {\n");
 	printf("\t.ps_magic = %08x;\n", (int)ps->ps_magic);
 	printf("\t.ps_nodeid = %d;\n", (int)ps->ps_nodeid);
@@ -261,11 +262,11 @@
 
 
 int
-qd_read_print_status(int fd, int nid)
+qd_read_print_status(target_info_t *disk, int nid)
 {
 	status_block_t ps;
 
-	if (fd < 0) {
+	if (!disk || disk->d_fd < 0) {
 		errno = EINVAL;
 		return -1;
 	}
@@ -275,13 +276,13 @@
 		return -1;
 	}
 
-	if (qdisk_read(fd, qdisk_nodeid_offset(nid), &ps,
+	if (qdisk_read(disk, qdisk_nodeid_offset(nid, disk->d_blksz), &ps,
 			sizeof(ps)) < 0) {
 		printf("Error reading node ID block %d\n", nid);
 		return -1;
 	}
 	swab_status_block_t(&ps);
-	qd_print_status(&ps);
+	qd_print_status(disk, &ps);
 
 	return 0;
 }
@@ -322,6 +323,7 @@
 	ctx->qc_incarnation = generate_token();
 	ctx->qc_ch = ch;
 	ctx->qc_my_id = me;
+	ctx->qc_status_sock = -1;
 
 	return 0;
 }
@@ -339,6 +341,5 @@
 		free(ctx->qc_device);
 		ctx->qc_device = NULL;
 	}
-	close(ctx->qc_fd);
-	ctx->qc_fd = -1;
+	qdisk_close(&ctx->qc_disk);
 }
--- cluster/cman/qdisk/main.c	2007/03/20 19:37:04	1.4.2.6
+++ cluster/cman/qdisk/main.c	2007/12/04 20:24:43	1.4.2.7
@@ -36,6 +36,7 @@
 #include <time.h>
 #include <sys/reboot.h>
 #include <sys/time.h>
+#include <sys/un.h>
 #include <linux/reboot.h>
 #include <sched.h>
 #include <signal.h>
@@ -147,7 +148,8 @@
 
 		sb = &ni[x].ni_status;
 
-		if (qdisk_read(ctx->qc_fd, qdisk_nodeid_offset(x+1),
+		if (qdisk_read(&ctx->qc_disk,
+			       qdisk_nodeid_offset(x+1, ctx->qc_disk.d_blksz),
 			       sb, sizeof(*sb)) < 0) {
 			clulog(LOG_WARNING,"Error reading node ID block %d\n",
 			       x+1);
@@ -452,6 +454,10 @@
 quorum_init(qd_ctx *ctx, node_info_t *ni, int max, struct h_data *h, int maxh)
 {
 	int x = 0, score, maxscore, score_req;
+	char buf[64];
+#if 0
+	struct sockaddr_un sun;
+#endif
 
 	clulog(LOG_INFO, "Quorum Daemon Initializing\n");
 	
@@ -462,12 +468,28 @@
 	if (qdisk_validate(ctx->qc_device) < 0)
 		return -1;
 
-	ctx->qc_fd = qdisk_open(ctx->qc_device);
-	if (ctx->qc_fd < 0) {
+	if (qdisk_open(ctx->qc_device, &ctx->qc_disk) < 0) {
 		clulog(LOG_CRIT, "Failed to open %s: %s\n", ctx->qc_device,
 		       strerror(errno));
 		return -1;
 	}
+
+	if (strlen(ctx->qc_device) > 15 && !(ctx->qc_flags & RF_CMAN_LABEL)) {
+		if (ctx->qc_label && strlen(ctx->qc_label) <= 15) {
+			ctx->qc_cman_label = strdup(ctx->qc_label);
+		} else {
+			snprintf(buf, sizeof(buf), "QDisk[%d]",
+			 	(int)strlen(ctx->qc_device));
+			ctx->qc_cman_label = strdup(buf);
+		}
+
+		ctx->qc_flags |= RF_CMAN_LABEL;
+		clulog(LOG_DEBUG, "Device too long! Setting CMAN label to: %s\n",
+			ctx->qc_cman_label);
+	}
+
+	clulog(LOG_DEBUG, "I/O Size: %d  Page Size: %d\n",
+	       ctx->qc_disk.d_blksz, ctx->qc_disk.d_pagesz);
 	
 	if (h && maxh) {
 		start_score_thread(ctx, h, maxh);
@@ -484,6 +506,42 @@
 		return -1;
 	}
 
+#if 0
+	if (ctx->qc_status_sockname) {
+		ctx->qc_status_sock = socket(PF_LOCAL, SOCK_STREAM, 0);
+
+		if (ctx->qc_status_sockname < 0) {
+			clulog(LOG_ERR,
+			       "Could not create local socket %s: %s\n",
+			       qc->qc_status_sockname, strerror(errno));
+			free(qc->qc_status_sockname);
+			qc->qc_status_sockname = NULL;
+		} else {
+			sun.sun_family = PF_LOCAL;
+			snprintf(sun.sun_path, sizeof(sun.sun_path),
+				 qc->qc_status_sockname);
+			unlink(qc->qc_status_sockname);
+			if (bind(ctx->qc_status_sock,
+				 (struct sockaddr *)&sun, sizeof(sun)) < 0) {
+				clulog(LOG_ERR, "Could not bind to local "
+				       "socket %s: %s\n",
+				       qc->qc_status_sockname,
+				       strerror(errno));
+				free(qc->qc_status_sockname);
+				qc->qc_status_sockname = NULL;
+				close(qc->qc_status_sock);
+				qc->qc_status_sock = -1;
+			}
+		}
+	} else {
+		qc->qc_status_sock = -1;
+	}
+
+	if (qc->qc_status_sock >= 0) {
+		listen(qc->qc_status_sock, 5);
+	}
+#endif
+
 	while (++x <= ctx->qc_tko && _running) {
 		read_node_blocks(ctx, ni, max);
 		check_transitions(ctx, ni, max, NULL);
@@ -622,23 +680,7 @@
 
 
 char *
-state_str(disk_node_state_t s)
-{
-	switch (s) {
-	case S_NONE:
-		return "None";
-	case S_EVICT:
-		return "Evicted";
-	case S_INIT:
-		return "Initializing";
-	case S_RUN:
-		return "Running";
-	case S_MASTER:
-		return "Master";
-	default:
-		return "ILLEGAL";
-	}
-}
+state_str(disk_node_state_t s);
 
 
 void
@@ -1237,6 +1279,12 @@
 		ctx->qc_status_file = val;
 	}
 
+	/* Get status socket */
+	snprintf(query, sizeof(query), "/cluster/quorumd/@status_sock");
+	if (ccs_get(ccsfd, query, &val) == 0) {
+		ctx->qc_status_sockname = val;
+	}
+
 	/* Get min score */
 	snprintf(query, sizeof(query), "/cluster/quorumd/@min_score");
 	if (ccs_get(ccsfd, query, &val) == 0) {
@@ -1285,6 +1333,15 @@
 			ctx->qc_flags &= ~RF_REBOOT;
 		free(val);
 	}
+
+	/* Get cman_label */
+	snprintf(query, sizeof(query), "/cluster/quorumd/@cman_label");
+	if (ccs_get(ccsfd, query, &val) == 0) {
+		if (strlen(val) > 0 && strlen(val) <= 15) {
+			ctx->qc_flags |= RF_CMAN_LABEL;
+			ctx->qc_cman_label = val;
+		}
+	}
 	
 	/*
 	 * Get flag to see if we're supposed to kill cman if qdisk is not 
@@ -1384,21 +1441,25 @@
 main(int argc, char **argv)
 {
 	cman_node_t me;
-	int cfh, rv, forked = 0, nfd = -1;
+	int cfh, rv, forked = 0, nfd = -1, ret = -1;
+#if 0
+	int status_run = 0;
+#endif
 	qd_ctx ctx;
-	cman_handle_t ch;
+	cman_handle_t ch = NULL;
 	node_info_t ni[MAX_NODES_DISK];
 	struct h_data h[10];
 	char debug = 0, foreground = 0;
 	char device[128];
 	pid_t pid;
+	quorum_header_t qh;
 
 	if (check_process_running(argv[0], &pid) && pid !=getpid()) {
 		printf("QDisk services already running\n");
 		return 0;
 	}
 	
-	while ((rv = getopt(argc, argv, "fdQ")) != EOF) {
+	while ((rv = getopt(argc, argv, "fdQs")) != EOF) {
 		switch (rv) {
 		case 'd':
 			debug = 1;
@@ -1418,11 +1479,15 @@
 			dup2(nfd, 2);
 			close(nfd);
 			break;
+#if 0
+		case 's':
+			status_run = 1;
+#endif
 		default:
 			break;
 		}
 	}
-	
+
 #if (defined(LIBCMAN_VERSION) && LIBCMAN_VERSION >= 2)
 	ch = cman_admin_init(NULL);
 #else
@@ -1431,7 +1496,7 @@
 	if (!ch) {
 		if (!foreground && !forked) {
 			if (daemon_init(argv[0]) < 0)
-				return -1;
+				goto out;
 			else
 				forked = 1;
 		}
@@ -1452,7 +1517,7 @@
 	while (cman_get_node(ch, CMAN_NODEID_US, &me) < 0) {
 		if (!foreground && !forked) {
 			if (daemon_init(argv[0]) < 0)
-				return -1;
+				goto out;
 			else
 				forked = 1;
 		}
@@ -1472,7 +1537,7 @@
 	if (get_config_data(NULL, &ctx, h, 10, &cfh, debug) < 0) {
 		clulog_and_print(LOG_CRIT, "Configuration failed\n");
 		check_stop_cman(&ctx);
-		return -1;
+		goto out;
 	}
 	
 	if (ctx.qc_label) {
@@ -1483,7 +1548,7 @@
 					 " '%s' to any device\n",
 					 ctx.qc_label);
 			check_stop_cman(&ctx);
-			return -1;
+			goto out;
 		}
 
 		if (ctx.qc_device)
@@ -1494,18 +1559,29 @@
 		clulog(LOG_INFO, "Quorum Partition: %s Label: %s\n",
 		       ctx.qc_device, ctx.qc_label);
 	} else if (ctx.qc_device) {
-		if (check_device(ctx.qc_device, NULL, NULL) != 0) {
+		if (check_device(ctx.qc_device, NULL, &rv, &qh, 0) != 0) {
 			clulog(LOG_CRIT,
 			       "Specified partition %s does not have a "
 			       "qdisk label\n", ctx.qc_device);
 			check_stop_cman(&ctx);
-			return -1;
+			goto out;
+		}
+
+		if (qh.qh_version == VERSION_MAGIC_V2 &&
+                    qh.qh_blksz != rv) {
+			clulog(LOG_CRIT,
+			       "Specified device %s does match kernel's "
+			       "reported sector size (%d != %d)\n",
+			       ctx.qc_device,
+			       ctx.qc_disk.d_blksz, rv);
+			check_stop_cman(&ctx);
+			goto out;
 		}
 	}
 
 	if (!foreground && !forked) {
                 if (daemon_init(argv[0]) < 0)
-			return -1;
+			goto out;
 	}
 	
 	set_priority(ctx.qc_sched, ctx.qc_sched_prio);
@@ -1513,13 +1589,19 @@
 	if (quorum_init(&ctx, ni, MAX_NODES_DISK, h, cfh) < 0) {
 		clulog_and_print(LOG_CRIT, "Initialization failed\n");
 		check_stop_cman(&ctx);
-		return -1;
+		goto out;
 	}
 
+	ret = 0;
+
 	if (!_running)
-		return 0;
+		goto out;
 	
-	cman_register_quorum_device(ctx.qc_ch, ctx.qc_device, ctx.qc_votes);
+	cman_register_quorum_device(ctx.qc_ch,
+				    (ctx.qc_flags&RF_CMAN_LABEL)? 
+				        ctx.qc_cman_label:
+                                        ctx.qc_device,
+				    ctx.qc_votes);
 	/*
 		XXX this always returns -1 / EBUSY even when it works?!!!
 		
@@ -1529,16 +1611,18 @@
 				 "Could not register %s with CMAN; "
 				 "return = %d; error = %s\n",
 				 ctx.qc_device, rv, strerror(errno));
-		return -1;
+		goto out;
 	}
 	*/
-
 	if (quorum_loop(&ctx, ni, MAX_NODES_DISK) == 0)
 		cman_unregister_quorum_device(ctx.qc_ch);
 
 	quorum_logout(&ctx);
+	/* free cman handle to avoid leak in cman */
+out:
+	cman_finish(ctx.qc_ch);
 	qd_destroy(&ctx);
 
-	return 0;
+	return ret;
 }
 
--- cluster/cman/qdisk/mkqdisk.c	2006/11/21 14:50:30	1.3.4.1
+++ cluster/cman/qdisk/mkqdisk.c	2007/12/04 20:24:43	1.3.4.2
@@ -37,23 +37,26 @@
 {
 	char device[128];
 	char *newdev = NULL, *newlabel = NULL;
-	int rv;
+	int rv, debug_level = 1;
 
-	printf("mkqdisk v0.5.1\n");
+	printf("mkqdisk v0.5.2\n");
 
-	while ((rv = getopt(argc, argv, "Lf:c:l:h")) != EOF) {
+	while ((rv = getopt(argc, argv, "Ldf:c:l:h")) != EOF) {
 		switch (rv) {
+		case 'd':
+			++debug_level;
+			break;
 		case 'L':
 			/* List */
 			close(2);
 			return find_partitions("/proc/partitions",
-					       NULL, NULL, 0, 1);
+					       NULL, NULL, 0, debug_level);
 			break;
 		case 'f':
 			close(2);
 			return find_partitions("/proc/partitions",
 					       optarg, device,
-					       sizeof(device), 1);
+					       sizeof(device), debug_level);
 		case 'c':
 			newdev = optarg;
 			break;
--- cluster/cman/qdisk/proc.c	2006/06/23 16:05:33	1.2
+++ cluster/cman/qdisk/proc.c	2007/12/04 20:24:43	1.2.4.1
@@ -32,27 +32,33 @@
 
 
 int
-check_device(char *device, char *label, quorum_header_t *qh)
+check_device(char *device, char *label, int *ssz, quorum_header_t *qh,
+	     int flags)
 {
-	int fd = -1, ret = -1;
+	int ret = -1;
 	quorum_header_t qh_local;
+	target_info_t disk;
 
 	if (!qh)
 		qh = &qh_local;
 
-	fd = qdisk_validate(device);
-	if (fd < 0) {
+	ret = qdisk_validate(device);
+	if (ret < 0) {
 		perror("qdisk_verify");
 		return -1;
 	}
 
-	fd = qdisk_open(device);
-	if (fd < 0) {
+	ret = qdisk_open(device, &disk);
+	if (ret < 0) {
 		perror("qdisk_open");
 		return -1;
 	}
 
-	if (qdisk_read(fd, OFFSET_HEADER, qh, sizeof(*qh)) == sizeof(*qh)) {
+	if (ssz) 
+		*ssz = disk.d_blksz;
+
+	ret = -1;
+	if (qdisk_read(&disk, OFFSET_HEADER, qh, sizeof(*qh)) == sizeof(*qh)) {
 		swab_quorum_header_t(qh);
                 if (qh->qh_magic == HEADER_MAGIC_NUMBER) {
 			if (!label || !strcmp(qh->qh_cluster, label)) {
@@ -61,12 +67,91 @@
                 }
         }
 
-	qdisk_close(&fd);
+	/* only flag now is 'strict device check'; i.e.,
+	  "block size recorded must match kernel's reported size" */
+	if (flags && qh->qh_version == VERSION_MAGIC_V2 &&
+            disk.d_blksz != qh->qh_blksz) {
+		ret = -1;
+	}
+
+	qdisk_close(&disk);
 
 	return ret;
 }
 
 
+char *
+state_str(disk_node_state_t s)
+{
+	switch (s) {
+	case S_NONE:
+		return "None";
+	case S_EVICT:
+		return "Evicted";
+	case S_INIT:
+		return "Initializing";
+	case S_RUN:
+		return "Running";
+	case S_MASTER:
+		return "Master";
+	default:
+		return "ILLEGAL";
+	}
+}
+
+
+void
+print_status_block(status_block_t *sb)
+{
+	if (sb->ps_state == S_NONE)
+		return;
+	printf("Status block for node %d\n", sb->ps_nodeid);
+	printf("\tLast updated by node %d\n", sb->ps_updatenode);
+	printf("\tLast updated on %s", ctime((time_t *)&sb->ps_timestamp));
+	printf("\tState: %s\n", state_str(sb->ps_state));
+	printf("\tFlags: %04x\n", sb->ps_flags);
+	printf("\tScore: %d/%d\n", sb->ps_score, sb->ps_scoremax);
+	printf("\tAverage Cycle speed: %d.%06d seconds\n", 
+		sb->ps_ca_sec, sb->ps_ca_usec);
+	printf("\tLast Cycle speed: %d.%06d seconds\n", 
+		sb->ps_lc_sec, sb->ps_lc_usec);
+	printf("\tIncarnation: %08x%08x\n",
+		(int)(sb->ps_incarnation>>32&0xffffffff),
+		(int)(sb->ps_incarnation&0xffffffff));
+
+}
+
+
+void
+read_info(char *dev)
+{
+	target_info_t ti;
+	int x;
+	status_block_t sb;
+
+	if (qdisk_open(dev, &ti) < 0) {
+		printf("Could not read from %s: %s\n",
+		       dev, strerror(errno));
+		return;
+	}
+
+	for (x = 0; x < MAX_NODES_DISK; x++) {
+
+		if (qdisk_read(&ti,
+			       qdisk_nodeid_offset(x+1, ti.d_blksz),
+			       &sb, sizeof(sb)) < 0) {
+			printf("Error reading node ID block %d\n",
+			       x+1);
+			continue;
+		}
+		swab_status_block_t(&sb);
+		print_status_block(&sb);
+	}
+
+	qdisk_close(&ti);
+}
+
+
 int
 find_partitions(const char *partfile, const char *label,
 	        char *devname, size_t devlen, int print)
@@ -78,6 +163,7 @@
 	char device[128];
 	char realdev[256];
 	quorum_header_t qh;
+	int ssz;
 
 	fp = fopen(partfile, "r");
 	if (!fp)
@@ -96,16 +182,35 @@
 		if (strlen(device)) {
 			snprintf(realdev, sizeof(realdev),
 				 "/dev/%s", device);
-			if (check_device(realdev, (char *)label, &qh) != 0)
+
+			/* If we're not "just printing", then 
+			   then reject devices which don't match
+			   the recorded sector size */
+			if (check_device(realdev, (char *)label, &ssz,
+					 &qh, !print) != 0)
 				continue;
 
 			if (print) {
 				printf("%s:\n", realdev);
-				printf("\tMagic:   %08x\n", qh.qh_magic);
-				printf("\tLabel:   %s\n", qh.qh_cluster);
-				printf("\tCreated: %s",
+				printf("\tMagic:                %08x\n", qh.qh_magic);
+				printf("\tLabel:                %s\n", qh.qh_cluster);
+				printf("\tCreated:              %s",
 				       ctime((time_t *)&qh.qh_timestamp));
-				printf("\tHost:    %s\n\n", qh.qh_updatehost);
+				printf("\tHost:                 %s\n", qh.qh_updatehost);
+				printf("\tKernel Sector Size:   %d\n", ssz);
+				if (qh.qh_version == VERSION_MAGIC_V2) {
+					printf("\tRecorded Sector Size: %d\n\n", (int)qh.qh_blksz);
+					if (qh.qh_blksz != ssz) {
+						printf("WARNING: Sector size mismatch: Header: %d  Kernel: %d\n",
+							(int)qh.qh_blksz, ssz);
+					}
+				} else
+					printf("\n");
+			}
+
+			if (print >= 2) {
+				/* Print node stuff */
+				read_info(realdev);
 			}
 
 			if (devname && devlen) {




More information about the Cluster-devel mailing list