[Cluster-devel] cluster/cman/qdisk disk.c disk.h disk_util.c m ...

lhh at sourceware.org lhh at sourceware.org
Tue Dec 4 20:40:55 UTC 2007


CVSROOT:	/cvs/cluster
Module name:	cluster
Branch: 	RHEL4
Changes by:	lhh at sourceware.org	2007-12-04 20:40:55

Modified files:
	cman/qdisk     : disk.c disk.h disk_util.c main.c mkqdisk.c 
	                 proc.c 

Log message:
	Make qdiskd work with sector sizes other than 512 bytes.  Import patch from Fabio M. Di Nitto to make qdiskd use (node_count - 1) for votes if there's none specified in cluster.conf

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/qdisk/disk.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.4&r2=1.1.2.5
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/qdisk/disk.h.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.6&r2=1.1.2.7
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/qdisk/disk_util.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.3&r2=1.1.2.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/qdisk/main.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.8&r2=1.1.2.9
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/qdisk/mkqdisk.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.3&r2=1.1.2.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/qdisk/proc.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.1&r2=1.1.2.2

--- cluster/cman/qdisk/disk.c	2007/10/29 20:38:12	1.1.2.4
+++ cluster/cman/qdisk/disk.c	2007/12/04 20:40:54	1.1.2.5
@@ -43,8 +43,9 @@
 #include <platform.h>
 #include <unistd.h>
 #include <time.h>
+#include <linux/fs.h>
 
-static int diskRawRead(int fd, char *buf, int len);
+static int diskRawRead(target_info_t *disk, char *buf, int len);
 uint32_t clu_crc32(const char *data, size_t count);
 
 
@@ -211,49 +212,58 @@
  * Returns - (the file descriptor), a value >= 0 on success.
  */
 int
-qdisk_open(char *name)
+qdisk_open(char *name, target_info_t *disk)
 {
-	int fd;
-	int retval;
+	int ret;
+	unsigned long ssz;
 
 	/*
 	 * Open for synchronous writes to insure all writes go directly
 	 * to disk.
 	 */
-	fd = open(name, O_RDWR | O_SYNC | O_DIRECT);
-	if (fd < 0) {
-		return fd;
-	}
+	disk->d_fd = open(name, O_RDWR | O_SYNC | O_DIRECT);
+	if (disk->d_fd < 0)
+		return disk->d_fd;
+
+	disk->d_blksz = 512;
+	ret = ioctl(disk->d_fd, BLKSSZGET, &ssz);
+	if (ret < 0)
+		perror("qdisk_open: ioctl(BLKSSZGET)");
+	else
+		/* Sorry, no sector sizes >4GB please */
+		disk->d_blksz = (uint32_t)ssz;
 
-	/* Check to verify that the partition is large enough.*/
-	retval = lseek(fd, END_OF_DISK, SEEK_SET);
+	disk->d_pagesz = sysconf(_SC_PAGESIZE);
 
-	if (retval < 0) {
+	/* Check to verify that the partition is large enough.*/
+	ret = lseek(disk->d_fd, END_OF_DISK(disk->d_blksz), SEEK_SET);
+	if (ret < 0) {
 		perror("open_partition: seek");
 		return -1;
 	}
 
-	if (retval < END_OF_DISK) {
+	if (ret < END_OF_DISK(disk->d_blksz)) {
 		fprintf(stderr, "Partition %s too small\n", name);
 		errno = EINVAL;
 		return -1;
 	}
 
 	/* Set close-on-exec bit */
-        retval = fcntl(fd, F_GETFD, 0);
-        if (retval < 0) {
-                close(fd);
+        ret = fcntl(disk->d_fd, F_GETFD, 0);
+        if (ret < 0) {
+		perror("open_partition: fcntl(F_GETFD)");
+                close(disk->d_fd);
                 return -1;
         }
 
-        retval |= FD_CLOEXEC;
-        if (fcntl(fd, F_SETFD, retval) < 0) {
-		perror("open_partition: fcntl");
-                close(fd);
+        ret |= FD_CLOEXEC;
+        if (fcntl(disk->d_fd, F_SETFD, ret) < 0) {
+		perror("open_partition: fcntl(F_SETFD)");
+                close(disk->d_fd);
                 return -1;
         }
 
-	return fd;
+	return 0;
 }
 
 
@@ -263,17 +273,17 @@
  * Returns - value from close syscall.
  */
 int
-qdisk_close(int *fd)
+qdisk_close(target_info_t *disk)
 {
 	int retval;
 
-	if (!fd || *fd < 0) {
+	if (!disk || disk->d_fd < 0) {
 		errno = EINVAL;
 		return -1;
 	}
 
-	retval = close(*fd);
-	*fd = -1;
+	retval = close(disk->d_fd);
+	disk->d_fd = -1;
 
 	return retval;
 }
@@ -288,7 +298,7 @@
 qdisk_validate(char *name)
 {
 	struct stat stat_st, *stat_ptr;
-	int fd;
+	target_info_t disk;
 	stat_ptr = &stat_st;
 
 	if (stat(name, stat_ptr) < 0) {
@@ -310,26 +320,25 @@
 	/*
 	 * Verify read/write permission.
 	 */
-	fd = qdisk_open(name);
-	if (fd < 0) {
+	if (qdisk_open(name, &disk) < 0) {
 		fprintf(stderr, "%s: open of %s for RDWR failed: %s\n",
 			__FUNCTION__, name, strerror(errno));
 		return -1;
 	}
-	qdisk_close(&fd);
+	qdisk_close(&disk);
 	return 0;
 }
 
 
 static int
-diskRawReadShadow(int fd, off_t readOffset, char *buf, int len)
+diskRawReadShadow(target_info_t *disk, off_t readOffset, char *buf, int len)
 {
 	int ret;
 	shared_header_t *hdrp;
 	char *data;
 	int datalen;
 
-	ret = lseek(fd, readOffset, SEEK_SET);
+	ret = lseek(disk->d_fd, readOffset, SEEK_SET);
 	if (ret != readOffset) {
 #if 0
 		fprintf(stderr,
@@ -340,7 +349,7 @@
 		return -1;
 	}
 
-	ret = diskRawRead(fd, buf, len);
+	ret = diskRawRead(disk, buf, len);
 	if (ret != len) {
 #if 0
 		fprintf(stderr, "diskRawReadShadow: aligned read "
@@ -375,7 +384,7 @@
  * Here we check for alignment and do a bounceio if necessary.
  */
 static int
-diskRawRead(int fd, char *buf, int len)
+diskRawRead(target_info_t *disk, char *buf, int len)
 {
 	char *alignedBuf;
 	int readret;
@@ -383,21 +392,24 @@
 	int readlen;
 	int bounceNeeded = 1;
 
-	if ((((unsigned long) buf & (unsigned long) 0x3ff) == 0) &&
-	    ((len % 512) == 0)) {
+	
+	/* was 3ff, which is (512<<1-1) */
+	if ((((unsigned long) buf &
+	      (unsigned long) ((disk->d_blksz << 1) -1)) == 0) &&
+	    ((len % (disk->d_blksz)) == 0)) {
 		bounceNeeded = 0;
 	}
 
 	if (bounceNeeded == 0) {
 		/* Already aligned and even multiple of 512, no bounceio
 		 * required. */
-		return (read(fd, buf, len));
+		return (read(disk->d_fd, buf, len));
 	}
 
-	if (len > 512) {
+	if (len > disk->d_blksz) {
 		fprintf(stderr,
 			"diskRawRead: not setup for reads larger than %d.\n",
-		       512);
+		       (int)disk->d_blksz);
 		return (-1);
 	}
 	/*
@@ -406,8 +418,8 @@
 	 * XXX - if the on-disk offsets don't provide enough room we're cooked!
 	 */
 	extraLength = 0;
-	if (len % 512) {
-		extraLength = 512 - (len % 512);
+	if (len % disk->d_blksz) {
+		extraLength = disk->d_blksz - (len % disk->d_blksz);
 	}
 
 	readlen = len;
@@ -415,18 +427,18 @@
 		readlen += extraLength;
 	}
 
-	readret = posix_memalign((void **)&alignedBuf, 512, 512);
+	readret = posix_memalign((void **)&alignedBuf, disk->d_pagesz, disk->d_blksz);
 	if (readret < 0) {
 		return -1;
 	}
 
-	readret = read(fd, alignedBuf, readlen);
+	readret = read(disk->d_fd, alignedBuf, readlen);
 	if (readret > 0) {
 		if (readret > len) {
-			bcopy(alignedBuf, buf, len);
+			memcpy(alignedBuf, buf, len);
 			readret = len;
 		} else {
-			bcopy(alignedBuf, buf, readret);
+			memcpy(alignedBuf, buf, readret);
 		}
 	}
 
@@ -445,7 +457,7 @@
  * Here we check for alignment and do a bounceio if necessary.
  */
 static int
-diskRawWrite(int fd, char *buf, int len)
+diskRawWrite(target_info_t *disk, char *buf, int len)
 {
 	char *alignedBuf;
 	int ret;
@@ -453,31 +465,33 @@
 	int writelen;
 	int bounceNeeded = 1;
 
-	if ((((unsigned long) buf & (unsigned long) 0x3ff) == 0) &&
-	    ((len % 512) == 0)) {
+	/* was 3ff, which is (512<<1-1) */
+	if ((((unsigned long) buf &
+	      (unsigned long) ((disk->d_blksz << 1) -1)) == 0) &&
+	    ((len % (disk->d_blksz)) == 0)) {
 		bounceNeeded = 0;
 	}
+
 	if (bounceNeeded == 0) {
 		/* Already aligned and even multiple of 512, no bounceio
 		 * required. */
-		return (write(fd, buf, len));
+		return (write(disk->d_fd, buf, len));
 	}
 
-	if (len > 512) {
+	if (len > disk->d_blksz) {
 		fprintf(stderr,
-		       "diskRawWrite: not setup for larger than %d.\n",
-		       512);
+			"diskRawRead: not setup for reads larger than %d.\n",
+		       (int)disk->d_blksz);
 		return (-1);
 	}
-
 	/*
 	 * All IOs must be of size which is a multiple of 512.  Here we
 	 * just add in enough extra to accommodate.
 	 * XXX - if the on-disk offsets don't provide enough room we're cooked!
 	 */
 	extraLength = 0;
-	if (len % 512) {
-		extraLength = 512 - (len % 512);
+	if (len % disk->d_blksz) {
+		extraLength = disk->d_blksz - (len % disk->d_blksz);
 	}
 
 	writelen = len;
@@ -485,13 +499,20 @@
 		writelen += extraLength;
 	}
 
-	ret = posix_memalign((void **)&alignedBuf, 512,512);
+	ret = posix_memalign((void **)&alignedBuf, disk->d_pagesz, disk->d_blksz);
 	if (ret < 0) {
+		return -1;
+	}
+
+	if (len > disk->d_blksz) {
+		fprintf(stderr,
+		       "diskRawWrite: not setup for larger than %d.\n",
+		       (int)disk->d_blksz);
 		return (-1);
 	}
 
-	bcopy(buf, alignedBuf, len);
-	ret = write(fd, alignedBuf, writelen);
+	memcpy(buf, alignedBuf, len);
+	ret = write(disk->d_fd, alignedBuf, writelen);
 	if (ret > len) {
 		ret = len;
 	}
@@ -507,7 +528,7 @@
 
 
 static int
-diskRawWriteShadow(int fd, __off64_t writeOffset, char *buf, int len)
+diskRawWriteShadow(target_info_t *disk, __off64_t writeOffset, char *buf, int len)
 {
 	off_t retval_seek;
 	ssize_t retval_write;
@@ -519,7 +540,7 @@
 		return (-1);
 	}
 
-	retval_seek = lseek(fd, writeOffset, SEEK_SET);
+	retval_seek = lseek(disk->d_fd, writeOffset, SEEK_SET);
 	if (retval_seek != writeOffset) {
 		fprintf(stderr,
 		       "diskRawWriteShadow: can't seek to offset %d\n",
@@ -527,7 +548,7 @@
 		return (-1);
 	}
 
-	retval_write = diskRawWrite(fd, buf, len);
+	retval_write = diskRawWrite(disk, buf, len);
 	if (retval_write != len) {
 		if (retval_write == -1) {
 			fprintf(stderr, "%s: %s\n", __FUNCTION__,
@@ -544,7 +565,7 @@
 
 
 int
-qdisk_read(int fd, __off64_t offset, void *buf, int count)
+qdisk_read(target_info_t *disk, __off64_t offset, void *buf, int count)
 {
 	shared_header_t *hdrp;
 	char *data;
@@ -556,15 +577,15 @@
 	 * Raw blocks are 512 byte aligned.
 	 */
 	total = count + sizeof(shared_header_t);
-	if (total < 512)
-		total = 512;
+	if (total < disk->d_blksz)
+		total = disk->d_blksz;
 
 	/* Round it up */
-	if (total % 512) 
-		total = total + (512 * !!(total % 512)) - (total % 512);
+	if (total % disk->d_blksz) 
+		total = total + (disk->d_blksz * !!(total % disk->d_blksz)) - (total % disk->d_blksz);
 
 	hdrp = NULL;
-	rv = posix_memalign((void **)&hdrp, sysconf(_SC_PAGESIZE), total);
+	rv = posix_memalign((void **)&hdrp, disk->d_pagesz, disk->d_blksz);
 	if (rv < 0)
 		return -1;
 
@@ -573,7 +594,7 @@
 
 	data = (char *)hdrp + sizeof(shared_header_t);
 
-	rv = diskRawReadShadow(fd, offset, (char *)hdrp, total);
+	rv = diskRawReadShadow(disk, offset, (char *)hdrp, disk->d_blksz);
 	
 	if (rv == -1) {
 		return -1;
@@ -594,12 +615,12 @@
 
 
 int
-qdisk_write(int fd, __off64_t offset, const void *buf, int count)
+qdisk_write(target_info_t *disk, __off64_t offset, const void *buf, int count)
 {
 	size_t maxsize;
 	shared_header_t *hdrp;
 	char *data;
-	size_t total = 0, rv = -1, psz = 512; //sysconf(_SC_PAGESIZE);
+	size_t total = 0, rv = -1, psz = disk->d_blksz; //sysconf(_SC_PAGESIZE);
 
 	maxsize = psz - (sizeof(shared_header_t));
 	if (count >= (maxsize + sizeof(shared_header_t))) {
@@ -611,7 +632,6 @@
 
 	/*
 	 * Calculate the total length of the buffer, including the header.
-	 * Raw blocks are 512 byte aligned.
 	 */
 	total = count + sizeof(shared_header_t);
 	if (total < psz)
@@ -622,7 +642,7 @@
 		total = total + (psz * !!(total % psz)) - (total % psz);
 
 	hdrp = NULL;
-	rv = posix_memalign((void **)&hdrp, sysconf(_SC_PAGESIZE), total);
+	rv = posix_memalign((void **)&hdrp, disk->d_pagesz, total);
 	if (rv < 0) {
 		perror("posix_memalign");
 		return -1;
@@ -645,7 +665,7 @@
 	 * about locking here.
 	 */
 	if (total == psz)
-		rv = diskRawWriteShadow(fd, offset, (char *)hdrp, psz);
+		rv = diskRawWriteShadow(disk, offset, (char *)hdrp, psz);
 
 	if (rv == -1)
 		perror("diskRawWriteShadow");
@@ -658,11 +678,11 @@
 
 
 static int
-header_init(int fd, char *label)
+header_init(target_info_t *disk, char *label)
 {
 	quorum_header_t qh;
 
-	if (qdisk_read(fd, OFFSET_HEADER, &qh, sizeof(qh)) == sizeof(qh)) {
+	if (qdisk_read(disk, OFFSET_HEADER, &qh, sizeof(qh)) == sizeof(qh)) {
 		swab_quorum_header_t(&qh);
 		if (qh.qh_magic == HEADER_MAGIC_OLD) {
 			printf("Warning: Red Hat Cluster Manager 1.2.x "
@@ -681,14 +701,18 @@
 	/* Copy in the cluster/label name */
 	snprintf(qh.qh_cluster, sizeof(qh.qh_cluster)-1, "%s", label);
 
+	qh.qh_version = VERSION_MAGIC_V2;
 	if ((qh.qh_timestamp = (uint64_t)time(NULL)) <= 0) {
 		perror("time");
 		return -1;
 	}
 
 	qh.qh_magic = HEADER_MAGIC_NUMBER;
+	qh.qh_blksz = disk->d_blksz;
+	qh.qh_pad = 0;
+
 	swab_quorum_header_t(&qh);
-	if (qdisk_write(fd, OFFSET_HEADER, &qh, sizeof(qh)) != sizeof(qh)) {
+	if (qdisk_write(disk, OFFSET_HEADER, &qh, sizeof(qh)) != sizeof(qh)) {
 		return -1;
 	}
 
@@ -699,24 +723,24 @@
 int
 qdisk_init(char *partname, char *label)
 {
-	int fd;
+	target_info_t disk;
 	status_block_t ps, wps;
-	int nid;
+	int nid, ret;
 	time_t t;
 
-	fd = qdisk_validate(partname);
-	if (fd < 0) {
+	ret = qdisk_validate(partname);
+	if (ret < 0) {
 		perror("qdisk_verify");
 		return -1;
 	}
 
-	fd = qdisk_open(partname);
-	if (fd < 0) {
+	ret = qdisk_open(partname, &disk);
+	if (ret < 0) {
 		perror("qdisk_open");
 		return -1;
 	}
 
-	if (header_init(fd, label) < 0) {
+	if (header_init(&disk, label) < 0) {
 		return -1;
 	}
 
@@ -744,14 +768,14 @@
 		wps = ps;
 		swab_status_block_t(&wps);
 
-		if (qdisk_write(fd, qdisk_nodeid_offset(nid), &wps, sizeof(wps)) < 0) {
+		if (qdisk_write(&disk, qdisk_nodeid_offset(nid, disk.d_blksz), &wps, sizeof(wps)) < 0) {
 			printf("Error writing node ID block %d\n", nid);
-			qdisk_close(&fd);
+			qdisk_close(&disk);
 			return -1;
 		}
 	}
 
-	qdisk_close(&fd);
+	qdisk_close(&disk);
 
 	return 0;
 }
--- cluster/cman/qdisk/disk.h	2007/02/21 20:19:43	1.1.2.6
+++ cluster/cman/qdisk/disk.h	2007/12/04 20:40:54	1.1.2.7
@@ -72,7 +72,8 @@
 	RF_DEBUG = 0x4,
 	RF_PARANOID = 0x8,
 	RF_ALLOW_KILL = 0x10,
-	RF_UPTIME = 0x20
+	RF_UPTIME = 0x20,
+	RF_CMAN_LABEL = 0x40
 } run_flag_t;
 
 
@@ -86,6 +87,9 @@
 #define STATE_MAGIC_NUMBER	0x47bacef8	/* Status block */
 #define SHARED_HEADER_MAGIC	0x00DEBB1E	/* Per-block headeer */
 
+/* Version magic. */
+#define VERSION_MAGIC_V2	0x389fabc4
+
 
 typedef struct __attribute__ ((packed)) {
 	uint32_t	ps_magic;
@@ -152,16 +156,21 @@
  */
 typedef struct __attribute__ ((packed)) {
 	uint32_t	qh_magic;
-	uint32_t	qh_align;	   // 64-bit-ism: alignment fixer.
+	uint32_t	qh_version;	   // 
 	uint64_t	qh_timestamp;	   // time of last update
 	char 		qh_updatehost[128];// Hostname who put this here...
-	char		qh_cluster[128];   // Cluster name
+	char		qh_cluster[120];   // Cluster name; CMAN only 
+					   // supports 16 chars.
+	uint32_t	qh_blksz;          // Known block size @ creation
+	uint32_t	qh_pad;
 } quorum_header_t;
 
 #define swab_quorum_header_t(ptr) \
 {\
 	swab32((ptr)->qh_magic); \
-	swab32((ptr)->qh_align); \
+	swab32((ptr)->qh_version); \
+	swab32((ptr)->qh_blksz); \
+	swab32((ptr)->qh_pad); \
 	swab64((ptr)->qh_timestamp); \
 }
 
@@ -196,31 +205,35 @@
 
 /* Offsets from RHCM 1.2.x */
 #define OFFSET_HEADER	0
-#define HEADER_SIZE	4096		/* Page size for now */
+#define HEADER_SIZE(ssz)		(ssz<4096?4096:ssz)
 
-#define OFFSET_FIRST_STATUS_BLOCK	(OFFSET_HEADER + HEADER_SIZE)
-#define SPACE_PER_STATUS_BLOCK		4096 /* Page size for now */
+#define OFFSET_FIRST_STATUS_BLOCK(ssz)	(OFFSET_HEADER + HEADER_SIZE(ssz))
+#define SPACE_PER_STATUS_BLOCK(ssz)	(ssz<4096?4096:ssz)
 #define STATUS_BLOCK_COUNT		MAX_NODES_DISK
 
-#define SPACE_PER_MESSAGE_BLOCK		(4096)
-#define	MESSAGE_BLOCK_COUNT		MAX_NODES_DISK
-
-#define END_OF_DISK			(OFFSET_FIRST_STATUS_BLOCK + \
+#define END_OF_DISK(ssz)		(OFFSET_FIRST_STATUS_BLOCK(ssz) + \
 					 (MAX_NODES_DISK + 1) * \
-					 SPACE_PER_STATUS_BLOCK) \
+					 SPACE_PER_STATUS_BLOCK(ssz)) \
+
 
+typedef struct {
+	int d_fd;
+	int _pad_;
+	size_t d_blksz;
+	size_t d_pagesz;
+} target_info_t;
 
 
 /* From disk.c */
-int qdisk_open(char *name);
-int qdisk_close(int *fd);
+int qdisk_open(char *name, target_info_t *disk);
+int qdisk_close(target_info_t *disk);
 int qdisk_init(char *name, char *clustername);
 int qdisk_validate(char *name);
-int qdisk_read(int fd, __off64_t ofs, void *buf, int len);
-int qdisk_write(int fd, __off64_t ofs, const void *buf, int len);
+int qdisk_read(target_info_t *disk, __off64_t ofs, void *buf, int len);
+int qdisk_write(target_info_t *disk, __off64_t ofs, const void *buf, int len);
 
-#define qdisk_nodeid_offset(nodeid) \
-	(OFFSET_FIRST_STATUS_BLOCK + (SPACE_PER_STATUS_BLOCK * (nodeid - 1)))
+#define qdisk_nodeid_offset(nodeid, ssz) \
+	(OFFSET_FIRST_STATUS_BLOCK(ssz) + (SPACE_PER_STATUS_BLOCK(ssz) * (nodeid - 1)))
 
 /* From disk_utils.c */
 #define HISTORY_LENGTH 60
@@ -231,11 +244,12 @@
 	uint16_t pad0;
 } disk_msg_t;
 
+
 typedef struct {
 	uint64_t qc_incarnation;
 	struct timeval qc_average;
 	struct timeval qc_last[HISTORY_LENGTH];
-	int qc_fd;
+	target_info_t qc_disk;
 	int qc_my_id;
 	int qc_writes;
 	int qc_interval;
@@ -256,6 +270,7 @@
 	char *qc_device;
 	char *qc_label;
 	char *qc_status_file;
+	char *qc_cman_label;
 } qd_ctx;
 
 typedef struct {
@@ -272,14 +287,15 @@
 
 int qd_write_status(qd_ctx *ctx, int nid, disk_node_state_t state,
 		    disk_msg_t *msg, memb_mask_t mask, memb_mask_t master);
-int qd_read_print_status(int fd, int nid);
+int qd_read_print_status(target_info_t *disk, int nid);
 int qd_init(qd_ctx *ctx, cman_handle_t ch, int me);
 void qd_destroy(qd_ctx *ctx);
 
 /* proc.c */
 int find_partitions(const char *partfile, const char *label,
 		    char *devname, size_t devlen, int print);
-int check_device(char *device, char *label, quorum_header_t *qh);
+int check_device(char *device, char *label, int *ssz, quorum_header_t *qh,
+		 int flags);
 
 
 #endif
--- cluster/cman/qdisk/disk_util.c	2007/01/26 14:34:26	1.1.2.3
+++ cluster/cman/qdisk/disk_util.c	2007/12/04 20:40:54	1.1.2.4
@@ -201,8 +201,9 @@
 	if (get_time(&start, ctx->qc_flags&RF_UPTIME) < 0)
 		utime_ok = 0;
 	swab_status_block_t(&ps);
-	if (qdisk_write(ctx->qc_fd, qdisk_nodeid_offset(nid), &ps,
-			sizeof(ps)) < 0) {
+	if (qdisk_write(&ctx->qc_disk,
+			qdisk_nodeid_offset(nid, ctx->qc_disk.d_blksz),
+			&ps, sizeof(ps)) < 0) {
 		printf("Error writing node ID block %d\n", nid);
 		return -1;
 	}
@@ -223,12 +224,12 @@
 
 
 int
-qd_print_status(status_block_t *ps)
+qd_print_status(target_info_t *disk, status_block_t *ps)
 {
 	int x;
 
 	printf("Data @ offset %d:\n",
-	       (int)qdisk_nodeid_offset(ps->ps_nodeid));
+	       (int)qdisk_nodeid_offset(ps->ps_nodeid, disk->d_blksz));
 	printf("status_block_t {\n");
 	printf("\t.ps_magic = %08x;\n", (int)ps->ps_magic);
 	printf("\t.ps_nodeid = %d;\n", (int)ps->ps_nodeid);
@@ -261,11 +262,11 @@
 
 
 int
-qd_read_print_status(int fd, int nid)
+qd_read_print_status(target_info_t *disk, int nid)
 {
 	status_block_t ps;
 
-	if (fd < 0) {
+	if (!disk || disk->d_fd < 0) {
 		errno = EINVAL;
 		return -1;
 	}
@@ -275,13 +276,13 @@
 		return -1;
 	}
 
-	if (qdisk_read(fd, qdisk_nodeid_offset(nid), &ps,
+	if (qdisk_read(disk, qdisk_nodeid_offset(nid, disk->d_blksz), &ps,
 			sizeof(ps)) < 0) {
 		printf("Error reading node ID block %d\n", nid);
 		return -1;
 	}
 	swab_status_block_t(&ps);
-	qd_print_status(&ps);
+	qd_print_status(disk, &ps);
 
 	return 0;
 }
@@ -339,6 +340,5 @@
 		free(ctx->qc_device);
 		ctx->qc_device = NULL;
 	}
-	close(ctx->qc_fd);
-	ctx->qc_fd = -1;
+	qdisk_close(&ctx->qc_disk);
 }
--- cluster/cman/qdisk/main.c	2007/03/20 19:36:14	1.1.2.8
+++ cluster/cman/qdisk/main.c	2007/12/04 20:40:54	1.1.2.9
@@ -147,7 +147,8 @@
 
 		sb = &ni[x].ni_status;
 
-		if (qdisk_read(ctx->qc_fd, qdisk_nodeid_offset(x+1),
+		if (qdisk_read(&ctx->qc_disk,
+			       qdisk_nodeid_offset(x+1, ctx->qc_disk.d_blksz),
 			       sb, sizeof(*sb)) < 0) {
 			clulog(LOG_WARNING,"Error reading node ID block %d\n",
 			       x+1);
@@ -452,6 +453,7 @@
 quorum_init(qd_ctx *ctx, node_info_t *ni, int max, struct h_data *h, int maxh)
 {
 	int x = 0, score, maxscore, score_req;
+	char buf[64];
 
 	clulog(LOG_INFO, "Quorum Daemon Initializing\n");
 	
@@ -462,12 +464,28 @@
 	if (qdisk_validate(ctx->qc_device) < 0)
 		return -1;
 
-	ctx->qc_fd = qdisk_open(ctx->qc_device);
-	if (ctx->qc_fd < 0) {
+	if (qdisk_open(ctx->qc_device, &ctx->qc_disk) < 0) {
 		clulog(LOG_CRIT, "Failed to open %s: %s\n", ctx->qc_device,
 		       strerror(errno));
 		return -1;
 	}
+
+	if (strlen(ctx->qc_device) > 15 && !(ctx->qc_flags & RF_CMAN_LABEL)) {
+		if (ctx->qc_label && strlen(ctx->qc_label) <= 15) {
+			ctx->qc_cman_label = strdup(ctx->qc_label);
+		} else {
+			snprintf(buf, sizeof(buf), "QDisk[%d]",
+			 	strlen(ctx->qc_device));
+			ctx->qc_cman_label = strdup(buf);
+		}
+
+		ctx->qc_flags |= RF_CMAN_LABEL;
+		clulog(LOG_DEBUG, "Device too long! Setting CMAN label to: %s\n",
+			ctx->qc_cman_label);
+	}
+
+	clulog(LOG_DEBUG, "I/O Size: %d  Page Size: %d\n",
+	       ctx->qc_disk.d_blksz, ctx->qc_disk.d_pagesz);
 	
 	if (h && maxh) {
 		start_score_thread(ctx, h, maxh);
@@ -1209,14 +1227,30 @@
 	}
 	if (ctx->qc_master_wait <= ctx->qc_tko_up)
 		ctx->qc_master_wait = ctx->qc_tko_up + 1;
-		
+
 	/* Get votes */
+
+	/* check if votes is set in cluster.conf */
 	snprintf(query, sizeof(query), "/cluster/quorumd/@votes");
 	if (ccs_get(ccsfd, query, &val) == 0) {
 		ctx->qc_votes = atoi(val);
 		free(val);
 		if (ctx->qc_votes < 0)
 			ctx->qc_votes = 0;
+	} else { /* if votes is not set, default to node_num - 1 */
+		int nodes = 0, error;
+		for (;;) {
+			error = ccs_get_list(ccsfd, "/cluster/clusternodes/child::*", &val);
+			if (error || !val)
+				break;
+
+			nodes++;
+		}
+		nodes--;
+		if (nodes < 0)
+			nodes = 0;
+
+		ctx->qc_votes = nodes;
 	}
 
 	/* Get device */
@@ -1285,6 +1319,15 @@
 			ctx->qc_flags &= ~RF_REBOOT;
 		free(val);
 	}
+
+	/* Get cman_label */
+	snprintf(query, sizeof(query), "/cluster/quorumd/@cman_label");
+	if (ccs_get(ccsfd, query, &val) == 0) {
+		if (strlen(val) > 0 && strlen(val) <= 15) {
+			ctx->qc_flags |= RF_CMAN_LABEL;
+			ctx->qc_cman_label = val;
+		}
+	}
 	
 	/*
 	 * Get flag to see if we're supposed to kill cman if qdisk is not 
@@ -1347,8 +1390,9 @@
 	*cfh = configure_heuristics(ccsfd, h, maxh);
 
 	clulog(LOG_DEBUG,
-	       "Quorum Daemon: %d heuristics, %d interval, %d tko, %d votes\n",
-	       *cfh, ctx->qc_interval, ctx->qc_tko, ctx->qc_votes);
+	       "Quorum Daemon: %d heuristics, %d interval, %d tko, %d votes,"
+	       " flags=%08x\n",
+	       *cfh, ctx->qc_interval, ctx->qc_tko, ctx->qc_votes, ctx->qc_flags);
 
 	ccs_disconnect(ccsfd);
 
@@ -1391,6 +1435,7 @@
 	char debug = 0, foreground = 0;
 	char device[128];
 	pid_t pid;
+	quorum_header_t qh;
 
 	if (check_process_running(argv[0], &pid) && pid !=getpid()) {
 		printf("QDisk services already running\n");
@@ -1493,13 +1538,24 @@
 		clulog(LOG_INFO, "Quorum Partition: %s Label: %s\n",
 		       ctx.qc_device, ctx.qc_label);
 	} else if (ctx.qc_device) {
-		if (check_device(ctx.qc_device, NULL, NULL) != 0) {
+		if (check_device(ctx.qc_device, NULL, &rv, &qh, 0) != 0) {
 			clulog(LOG_CRIT,
 			       "Specified partition %s does not have a "
 			       "qdisk label\n", ctx.qc_device);
 			check_stop_cman(&ctx);
 			return -1;
 		}
+
+		if (qh.qh_version == VERSION_MAGIC_V2 &&
+                    qh.qh_blksz != rv) {
+			clulog(LOG_CRIT,
+			       "Specified device %s does match kernel's "
+			       "reported sector size (%d != %d)\n",
+			       ctx.qc_device,
+			       ctx.qc_disk.d_blksz, rv);
+			check_stop_cman(&ctx);
+			return -1;
+		}
 	}
 
 	if (!foreground && !forked) {
@@ -1518,7 +1574,11 @@
 	if (!_running)
 		return 0;
 	
-	cman_register_quorum_device(ctx.qc_ch, ctx.qc_device, ctx.qc_votes);
+	cman_register_quorum_device(ctx.qc_ch,
+				    (ctx.qc_flags&RF_CMAN_LABEL)? 
+				        ctx.qc_cman_label:
+                                        ctx.qc_device,
+				    ctx.qc_votes);
 	/*
 		XXX this always returns -1 / EBUSY even when it works?!!!
 		
--- cluster/cman/qdisk/mkqdisk.c	2006/11/21 14:50:01	1.1.2.3
+++ cluster/cman/qdisk/mkqdisk.c	2007/12/04 20:40:54	1.1.2.4
@@ -39,7 +39,7 @@
 	char *newdev = NULL, *newlabel = NULL;
 	int rv;
 
-	printf("mkqdisk v0.5.1\n");
+	printf("mkqdisk v0.5.2\n");
 
 	while ((rv = getopt(argc, argv, "Lf:c:l:h")) != EOF) {
 		switch (rv) {
--- cluster/cman/qdisk/proc.c	2006/06/23 16:01:02	1.1.2.1
+++ cluster/cman/qdisk/proc.c	2007/12/04 20:40:54	1.1.2.2
@@ -32,27 +32,33 @@
 
 
 int
-check_device(char *device, char *label, quorum_header_t *qh)
+check_device(char *device, char *label, int *ssz, quorum_header_t *qh,
+	     int flags)
 {
-	int fd = -1, ret = -1;
+	int ret = -1;
 	quorum_header_t qh_local;
+	target_info_t disk;
 
 	if (!qh)
 		qh = &qh_local;
 
-	fd = qdisk_validate(device);
-	if (fd < 0) {
+	ret = qdisk_validate(device);
+	if (ret < 0) {
 		perror("qdisk_verify");
 		return -1;
 	}
 
-	fd = qdisk_open(device);
-	if (fd < 0) {
+	ret = qdisk_open(device, &disk);
+	if (ret < 0) {
 		perror("qdisk_open");
 		return -1;
 	}
 
-	if (qdisk_read(fd, OFFSET_HEADER, qh, sizeof(*qh)) == sizeof(*qh)) {
+	if (ssz) 
+		*ssz = disk.d_blksz;
+
+	ret = -1;
+	if (qdisk_read(&disk, OFFSET_HEADER, qh, sizeof(*qh)) == sizeof(*qh)) {
 		swab_quorum_header_t(qh);
                 if (qh->qh_magic == HEADER_MAGIC_NUMBER) {
 			if (!label || !strcmp(qh->qh_cluster, label)) {
@@ -61,7 +67,14 @@
                 }
         }
 
-	qdisk_close(&fd);
+	/* only flag now is 'strict device check'; i.e.,
+	  "block size recorded must match kernel's reported size" */
+	if (flags && qh->qh_version == VERSION_MAGIC_V2 &&
+            disk.d_blksz != qh->qh_blksz) {
+		ret = -1;
+	}
+
+	qdisk_close(&disk);
 
 	return ret;
 }
@@ -78,6 +91,7 @@
 	char device[128];
 	char realdev[256];
 	quorum_header_t qh;
+	int ssz;
 
 	fp = fopen(partfile, "r");
 	if (!fp)
@@ -96,16 +110,30 @@
 		if (strlen(device)) {
 			snprintf(realdev, sizeof(realdev),
 				 "/dev/%s", device);
-			if (check_device(realdev, (char *)label, &qh) != 0)
+
+			/* If we're not "just printing", then 
+			   then reject devices which don't match
+			   the recorded sector size */
+			if (check_device(realdev, (char *)label, &ssz,
+					 &qh, !print) != 0)
 				continue;
 
 			if (print) {
 				printf("%s:\n", realdev);
-				printf("\tMagic:   %08x\n", qh.qh_magic);
-				printf("\tLabel:   %s\n", qh.qh_cluster);
-				printf("\tCreated: %s",
+				printf("\tMagic:                %08x\n", qh.qh_magic);
+				printf("\tLabel:                %s\n", qh.qh_cluster);
+				printf("\tCreated:              %s",
 				       ctime((time_t *)&qh.qh_timestamp));
-				printf("\tHost:    %s\n\n", qh.qh_updatehost);
+				printf("\tHost:                 %s\n", qh.qh_updatehost);
+				printf("\tKernel Sector Size:   %d\n", ssz);
+				if (qh.qh_version == VERSION_MAGIC_V2) {
+					printf("\tRecorded Sector Size: %d\n\n", (int)qh.qh_blksz);
+					if (qh.qh_blksz != ssz) {
+						printf("WARNING: Sector size mismatch: Header: %d  Kernel: %d\n",
+							(int)qh.qh_blksz, ssz);
+					}
+				} else
+					printf("\n");
 			}
 
 			if (devname && devlen) {




More information about the Cluster-devel mailing list