[Cluster-devel] cluster/cman/qdisk disk.c disk.h disk_util.c m ...
lhh at sourceware.org
lhh at sourceware.org
Tue Dec 4 20:40:55 UTC 2007
CVSROOT: /cvs/cluster
Module name: cluster
Branch: RHEL4
Changes by: lhh at sourceware.org 2007-12-04 20:40:55
Modified files:
cman/qdisk : disk.c disk.h disk_util.c main.c mkqdisk.c
proc.c
Log message:
Make qdiskd work with sector sizes other than 512 bytes. Import patch from Fabio M. Di Nitto to make qdiskd use (node_count - 1) for votes if there's none specified in cluster.conf
Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/qdisk/disk.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.4&r2=1.1.2.5
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/qdisk/disk.h.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.6&r2=1.1.2.7
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/qdisk/disk_util.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.3&r2=1.1.2.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/qdisk/main.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.8&r2=1.1.2.9
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/qdisk/mkqdisk.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.3&r2=1.1.2.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/qdisk/proc.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.1&r2=1.1.2.2
--- cluster/cman/qdisk/disk.c 2007/10/29 20:38:12 1.1.2.4
+++ cluster/cman/qdisk/disk.c 2007/12/04 20:40:54 1.1.2.5
@@ -43,8 +43,9 @@
#include <platform.h>
#include <unistd.h>
#include <time.h>
+#include <linux/fs.h>
-static int diskRawRead(int fd, char *buf, int len);
+static int diskRawRead(target_info_t *disk, char *buf, int len);
uint32_t clu_crc32(const char *data, size_t count);
@@ -211,49 +212,58 @@
* Returns - (the file descriptor), a value >= 0 on success.
*/
int
-qdisk_open(char *name)
+qdisk_open(char *name, target_info_t *disk)
{
- int fd;
- int retval;
+ int ret;
+ unsigned long ssz;
/*
* Open for synchronous writes to insure all writes go directly
* to disk.
*/
- fd = open(name, O_RDWR | O_SYNC | O_DIRECT);
- if (fd < 0) {
- return fd;
- }
+ disk->d_fd = open(name, O_RDWR | O_SYNC | O_DIRECT);
+ if (disk->d_fd < 0)
+ return disk->d_fd;
+
+ disk->d_blksz = 512;
+ ret = ioctl(disk->d_fd, BLKSSZGET, &ssz);
+ if (ret < 0)
+ perror("qdisk_open: ioctl(BLKSSZGET)");
+ else
+ /* Sorry, no sector sizes >4GB please */
+ disk->d_blksz = (uint32_t)ssz;
- /* Check to verify that the partition is large enough.*/
- retval = lseek(fd, END_OF_DISK, SEEK_SET);
+ disk->d_pagesz = sysconf(_SC_PAGESIZE);
- if (retval < 0) {
+ /* Check to verify that the partition is large enough.*/
+ ret = lseek(disk->d_fd, END_OF_DISK(disk->d_blksz), SEEK_SET);
+ if (ret < 0) {
perror("open_partition: seek");
return -1;
}
- if (retval < END_OF_DISK) {
+ if (ret < END_OF_DISK(disk->d_blksz)) {
fprintf(stderr, "Partition %s too small\n", name);
errno = EINVAL;
return -1;
}
/* Set close-on-exec bit */
- retval = fcntl(fd, F_GETFD, 0);
- if (retval < 0) {
- close(fd);
+ ret = fcntl(disk->d_fd, F_GETFD, 0);
+ if (ret < 0) {
+ perror("open_partition: fcntl(F_GETFD)");
+ close(disk->d_fd);
return -1;
}
- retval |= FD_CLOEXEC;
- if (fcntl(fd, F_SETFD, retval) < 0) {
- perror("open_partition: fcntl");
- close(fd);
+ ret |= FD_CLOEXEC;
+ if (fcntl(disk->d_fd, F_SETFD, ret) < 0) {
+ perror("open_partition: fcntl(F_SETFD)");
+ close(disk->d_fd);
return -1;
}
- return fd;
+ return 0;
}
@@ -263,17 +273,17 @@
* Returns - value from close syscall.
*/
int
-qdisk_close(int *fd)
+qdisk_close(target_info_t *disk)
{
int retval;
- if (!fd || *fd < 0) {
+ if (!disk || disk->d_fd < 0) {
errno = EINVAL;
return -1;
}
- retval = close(*fd);
- *fd = -1;
+ retval = close(disk->d_fd);
+ disk->d_fd = -1;
return retval;
}
@@ -288,7 +298,7 @@
qdisk_validate(char *name)
{
struct stat stat_st, *stat_ptr;
- int fd;
+ target_info_t disk;
stat_ptr = &stat_st;
if (stat(name, stat_ptr) < 0) {
@@ -310,26 +320,25 @@
/*
* Verify read/write permission.
*/
- fd = qdisk_open(name);
- if (fd < 0) {
+ if (qdisk_open(name, &disk) < 0) {
fprintf(stderr, "%s: open of %s for RDWR failed: %s\n",
__FUNCTION__, name, strerror(errno));
return -1;
}
- qdisk_close(&fd);
+ qdisk_close(&disk);
return 0;
}
static int
-diskRawReadShadow(int fd, off_t readOffset, char *buf, int len)
+diskRawReadShadow(target_info_t *disk, off_t readOffset, char *buf, int len)
{
int ret;
shared_header_t *hdrp;
char *data;
int datalen;
- ret = lseek(fd, readOffset, SEEK_SET);
+ ret = lseek(disk->d_fd, readOffset, SEEK_SET);
if (ret != readOffset) {
#if 0
fprintf(stderr,
@@ -340,7 +349,7 @@
return -1;
}
- ret = diskRawRead(fd, buf, len);
+ ret = diskRawRead(disk, buf, len);
if (ret != len) {
#if 0
fprintf(stderr, "diskRawReadShadow: aligned read "
@@ -375,7 +384,7 @@
* Here we check for alignment and do a bounceio if necessary.
*/
static int
-diskRawRead(int fd, char *buf, int len)
+diskRawRead(target_info_t *disk, char *buf, int len)
{
char *alignedBuf;
int readret;
@@ -383,21 +392,24 @@
int readlen;
int bounceNeeded = 1;
- if ((((unsigned long) buf & (unsigned long) 0x3ff) == 0) &&
- ((len % 512) == 0)) {
+
+ /* was 3ff, which is (512<<1-1) */
+ if ((((unsigned long) buf &
+ (unsigned long) ((disk->d_blksz << 1) -1)) == 0) &&
+ ((len % (disk->d_blksz)) == 0)) {
bounceNeeded = 0;
}
if (bounceNeeded == 0) {
/* Already aligned and even multiple of 512, no bounceio
* required. */
- return (read(fd, buf, len));
+ return (read(disk->d_fd, buf, len));
}
- if (len > 512) {
+ if (len > disk->d_blksz) {
fprintf(stderr,
"diskRawRead: not setup for reads larger than %d.\n",
- 512);
+ (int)disk->d_blksz);
return (-1);
}
/*
@@ -406,8 +418,8 @@
* XXX - if the on-disk offsets don't provide enough room we're cooked!
*/
extraLength = 0;
- if (len % 512) {
- extraLength = 512 - (len % 512);
+ if (len % disk->d_blksz) {
+ extraLength = disk->d_blksz - (len % disk->d_blksz);
}
readlen = len;
@@ -415,18 +427,18 @@
readlen += extraLength;
}
- readret = posix_memalign((void **)&alignedBuf, 512, 512);
+ readret = posix_memalign((void **)&alignedBuf, disk->d_pagesz, disk->d_blksz);
if (readret < 0) {
return -1;
}
- readret = read(fd, alignedBuf, readlen);
+ readret = read(disk->d_fd, alignedBuf, readlen);
if (readret > 0) {
if (readret > len) {
- bcopy(alignedBuf, buf, len);
+ memcpy(alignedBuf, buf, len);
readret = len;
} else {
- bcopy(alignedBuf, buf, readret);
+ memcpy(alignedBuf, buf, readret);
}
}
@@ -445,7 +457,7 @@
* Here we check for alignment and do a bounceio if necessary.
*/
static int
-diskRawWrite(int fd, char *buf, int len)
+diskRawWrite(target_info_t *disk, char *buf, int len)
{
char *alignedBuf;
int ret;
@@ -453,31 +465,33 @@
int writelen;
int bounceNeeded = 1;
- if ((((unsigned long) buf & (unsigned long) 0x3ff) == 0) &&
- ((len % 512) == 0)) {
+ /* was 3ff, which is (512<<1-1) */
+ if ((((unsigned long) buf &
+ (unsigned long) ((disk->d_blksz << 1) -1)) == 0) &&
+ ((len % (disk->d_blksz)) == 0)) {
bounceNeeded = 0;
}
+
if (bounceNeeded == 0) {
/* Already aligned and even multiple of 512, no bounceio
* required. */
- return (write(fd, buf, len));
+ return (write(disk->d_fd, buf, len));
}
- if (len > 512) {
+ if (len > disk->d_blksz) {
fprintf(stderr,
- "diskRawWrite: not setup for larger than %d.\n",
- 512);
+ "diskRawRead: not setup for reads larger than %d.\n",
+ (int)disk->d_blksz);
return (-1);
}
-
/*
* All IOs must be of size which is a multiple of 512. Here we
* just add in enough extra to accommodate.
* XXX - if the on-disk offsets don't provide enough room we're cooked!
*/
extraLength = 0;
- if (len % 512) {
- extraLength = 512 - (len % 512);
+ if (len % disk->d_blksz) {
+ extraLength = disk->d_blksz - (len % disk->d_blksz);
}
writelen = len;
@@ -485,13 +499,20 @@
writelen += extraLength;
}
- ret = posix_memalign((void **)&alignedBuf, 512,512);
+ ret = posix_memalign((void **)&alignedBuf, disk->d_pagesz, disk->d_blksz);
if (ret < 0) {
+ return -1;
+ }
+
+ if (len > disk->d_blksz) {
+ fprintf(stderr,
+ "diskRawWrite: not setup for larger than %d.\n",
+ (int)disk->d_blksz);
return (-1);
}
- bcopy(buf, alignedBuf, len);
- ret = write(fd, alignedBuf, writelen);
+ memcpy(buf, alignedBuf, len);
+ ret = write(disk->d_fd, alignedBuf, writelen);
if (ret > len) {
ret = len;
}
@@ -507,7 +528,7 @@
static int
-diskRawWriteShadow(int fd, __off64_t writeOffset, char *buf, int len)
+diskRawWriteShadow(target_info_t *disk, __off64_t writeOffset, char *buf, int len)
{
off_t retval_seek;
ssize_t retval_write;
@@ -519,7 +540,7 @@
return (-1);
}
- retval_seek = lseek(fd, writeOffset, SEEK_SET);
+ retval_seek = lseek(disk->d_fd, writeOffset, SEEK_SET);
if (retval_seek != writeOffset) {
fprintf(stderr,
"diskRawWriteShadow: can't seek to offset %d\n",
@@ -527,7 +548,7 @@
return (-1);
}
- retval_write = diskRawWrite(fd, buf, len);
+ retval_write = diskRawWrite(disk, buf, len);
if (retval_write != len) {
if (retval_write == -1) {
fprintf(stderr, "%s: %s\n", __FUNCTION__,
@@ -544,7 +565,7 @@
int
-qdisk_read(int fd, __off64_t offset, void *buf, int count)
+qdisk_read(target_info_t *disk, __off64_t offset, void *buf, int count)
{
shared_header_t *hdrp;
char *data;
@@ -556,15 +577,15 @@
* Raw blocks are 512 byte aligned.
*/
total = count + sizeof(shared_header_t);
- if (total < 512)
- total = 512;
+ if (total < disk->d_blksz)
+ total = disk->d_blksz;
/* Round it up */
- if (total % 512)
- total = total + (512 * !!(total % 512)) - (total % 512);
+ if (total % disk->d_blksz)
+ total = total + (disk->d_blksz * !!(total % disk->d_blksz)) - (total % disk->d_blksz);
hdrp = NULL;
- rv = posix_memalign((void **)&hdrp, sysconf(_SC_PAGESIZE), total);
+ rv = posix_memalign((void **)&hdrp, disk->d_pagesz, disk->d_blksz);
if (rv < 0)
return -1;
@@ -573,7 +594,7 @@
data = (char *)hdrp + sizeof(shared_header_t);
- rv = diskRawReadShadow(fd, offset, (char *)hdrp, total);
+ rv = diskRawReadShadow(disk, offset, (char *)hdrp, disk->d_blksz);
if (rv == -1) {
return -1;
@@ -594,12 +615,12 @@
int
-qdisk_write(int fd, __off64_t offset, const void *buf, int count)
+qdisk_write(target_info_t *disk, __off64_t offset, const void *buf, int count)
{
size_t maxsize;
shared_header_t *hdrp;
char *data;
- size_t total = 0, rv = -1, psz = 512; //sysconf(_SC_PAGESIZE);
+ size_t total = 0, rv = -1, psz = disk->d_blksz; //sysconf(_SC_PAGESIZE);
maxsize = psz - (sizeof(shared_header_t));
if (count >= (maxsize + sizeof(shared_header_t))) {
@@ -611,7 +632,6 @@
/*
* Calculate the total length of the buffer, including the header.
- * Raw blocks are 512 byte aligned.
*/
total = count + sizeof(shared_header_t);
if (total < psz)
@@ -622,7 +642,7 @@
total = total + (psz * !!(total % psz)) - (total % psz);
hdrp = NULL;
- rv = posix_memalign((void **)&hdrp, sysconf(_SC_PAGESIZE), total);
+ rv = posix_memalign((void **)&hdrp, disk->d_pagesz, total);
if (rv < 0) {
perror("posix_memalign");
return -1;
@@ -645,7 +665,7 @@
* about locking here.
*/
if (total == psz)
- rv = diskRawWriteShadow(fd, offset, (char *)hdrp, psz);
+ rv = diskRawWriteShadow(disk, offset, (char *)hdrp, psz);
if (rv == -1)
perror("diskRawWriteShadow");
@@ -658,11 +678,11 @@
static int
-header_init(int fd, char *label)
+header_init(target_info_t *disk, char *label)
{
quorum_header_t qh;
- if (qdisk_read(fd, OFFSET_HEADER, &qh, sizeof(qh)) == sizeof(qh)) {
+ if (qdisk_read(disk, OFFSET_HEADER, &qh, sizeof(qh)) == sizeof(qh)) {
swab_quorum_header_t(&qh);
if (qh.qh_magic == HEADER_MAGIC_OLD) {
printf("Warning: Red Hat Cluster Manager 1.2.x "
@@ -681,14 +701,18 @@
/* Copy in the cluster/label name */
snprintf(qh.qh_cluster, sizeof(qh.qh_cluster)-1, "%s", label);
+ qh.qh_version = VERSION_MAGIC_V2;
if ((qh.qh_timestamp = (uint64_t)time(NULL)) <= 0) {
perror("time");
return -1;
}
qh.qh_magic = HEADER_MAGIC_NUMBER;
+ qh.qh_blksz = disk->d_blksz;
+ qh.qh_pad = 0;
+
swab_quorum_header_t(&qh);
- if (qdisk_write(fd, OFFSET_HEADER, &qh, sizeof(qh)) != sizeof(qh)) {
+ if (qdisk_write(disk, OFFSET_HEADER, &qh, sizeof(qh)) != sizeof(qh)) {
return -1;
}
@@ -699,24 +723,24 @@
int
qdisk_init(char *partname, char *label)
{
- int fd;
+ target_info_t disk;
status_block_t ps, wps;
- int nid;
+ int nid, ret;
time_t t;
- fd = qdisk_validate(partname);
- if (fd < 0) {
+ ret = qdisk_validate(partname);
+ if (ret < 0) {
perror("qdisk_verify");
return -1;
}
- fd = qdisk_open(partname);
- if (fd < 0) {
+ ret = qdisk_open(partname, &disk);
+ if (ret < 0) {
perror("qdisk_open");
return -1;
}
- if (header_init(fd, label) < 0) {
+ if (header_init(&disk, label) < 0) {
return -1;
}
@@ -744,14 +768,14 @@
wps = ps;
swab_status_block_t(&wps);
- if (qdisk_write(fd, qdisk_nodeid_offset(nid), &wps, sizeof(wps)) < 0) {
+ if (qdisk_write(&disk, qdisk_nodeid_offset(nid, disk.d_blksz), &wps, sizeof(wps)) < 0) {
printf("Error writing node ID block %d\n", nid);
- qdisk_close(&fd);
+ qdisk_close(&disk);
return -1;
}
}
- qdisk_close(&fd);
+ qdisk_close(&disk);
return 0;
}
--- cluster/cman/qdisk/disk.h 2007/02/21 20:19:43 1.1.2.6
+++ cluster/cman/qdisk/disk.h 2007/12/04 20:40:54 1.1.2.7
@@ -72,7 +72,8 @@
RF_DEBUG = 0x4,
RF_PARANOID = 0x8,
RF_ALLOW_KILL = 0x10,
- RF_UPTIME = 0x20
+ RF_UPTIME = 0x20,
+ RF_CMAN_LABEL = 0x40
} run_flag_t;
@@ -86,6 +87,9 @@
#define STATE_MAGIC_NUMBER 0x47bacef8 /* Status block */
#define SHARED_HEADER_MAGIC 0x00DEBB1E /* Per-block headeer */
+/* Version magic. */
+#define VERSION_MAGIC_V2 0x389fabc4
+
typedef struct __attribute__ ((packed)) {
uint32_t ps_magic;
@@ -152,16 +156,21 @@
*/
typedef struct __attribute__ ((packed)) {
uint32_t qh_magic;
- uint32_t qh_align; // 64-bit-ism: alignment fixer.
+ uint32_t qh_version; //
uint64_t qh_timestamp; // time of last update
char qh_updatehost[128];// Hostname who put this here...
- char qh_cluster[128]; // Cluster name
+ char qh_cluster[120]; // Cluster name; CMAN only
+ // supports 16 chars.
+ uint32_t qh_blksz; // Known block size @ creation
+ uint32_t qh_pad;
} quorum_header_t;
#define swab_quorum_header_t(ptr) \
{\
swab32((ptr)->qh_magic); \
- swab32((ptr)->qh_align); \
+ swab32((ptr)->qh_version); \
+ swab32((ptr)->qh_blksz); \
+ swab32((ptr)->qh_pad); \
swab64((ptr)->qh_timestamp); \
}
@@ -196,31 +205,35 @@
/* Offsets from RHCM 1.2.x */
#define OFFSET_HEADER 0
-#define HEADER_SIZE 4096 /* Page size for now */
+#define HEADER_SIZE(ssz) (ssz<4096?4096:ssz)
-#define OFFSET_FIRST_STATUS_BLOCK (OFFSET_HEADER + HEADER_SIZE)
-#define SPACE_PER_STATUS_BLOCK 4096 /* Page size for now */
+#define OFFSET_FIRST_STATUS_BLOCK(ssz) (OFFSET_HEADER + HEADER_SIZE(ssz))
+#define SPACE_PER_STATUS_BLOCK(ssz) (ssz<4096?4096:ssz)
#define STATUS_BLOCK_COUNT MAX_NODES_DISK
-#define SPACE_PER_MESSAGE_BLOCK (4096)
-#define MESSAGE_BLOCK_COUNT MAX_NODES_DISK
-
-#define END_OF_DISK (OFFSET_FIRST_STATUS_BLOCK + \
+#define END_OF_DISK(ssz) (OFFSET_FIRST_STATUS_BLOCK(ssz) + \
(MAX_NODES_DISK + 1) * \
- SPACE_PER_STATUS_BLOCK) \
+ SPACE_PER_STATUS_BLOCK(ssz)) \
+
+typedef struct {
+ int d_fd;
+ int _pad_;
+ size_t d_blksz;
+ size_t d_pagesz;
+} target_info_t;
/* From disk.c */
-int qdisk_open(char *name);
-int qdisk_close(int *fd);
+int qdisk_open(char *name, target_info_t *disk);
+int qdisk_close(target_info_t *disk);
int qdisk_init(char *name, char *clustername);
int qdisk_validate(char *name);
-int qdisk_read(int fd, __off64_t ofs, void *buf, int len);
-int qdisk_write(int fd, __off64_t ofs, const void *buf, int len);
+int qdisk_read(target_info_t *disk, __off64_t ofs, void *buf, int len);
+int qdisk_write(target_info_t *disk, __off64_t ofs, const void *buf, int len);
-#define qdisk_nodeid_offset(nodeid) \
- (OFFSET_FIRST_STATUS_BLOCK + (SPACE_PER_STATUS_BLOCK * (nodeid - 1)))
+#define qdisk_nodeid_offset(nodeid, ssz) \
+ (OFFSET_FIRST_STATUS_BLOCK(ssz) + (SPACE_PER_STATUS_BLOCK(ssz) * (nodeid - 1)))
/* From disk_utils.c */
#define HISTORY_LENGTH 60
@@ -231,11 +244,12 @@
uint16_t pad0;
} disk_msg_t;
+
typedef struct {
uint64_t qc_incarnation;
struct timeval qc_average;
struct timeval qc_last[HISTORY_LENGTH];
- int qc_fd;
+ target_info_t qc_disk;
int qc_my_id;
int qc_writes;
int qc_interval;
@@ -256,6 +270,7 @@
char *qc_device;
char *qc_label;
char *qc_status_file;
+ char *qc_cman_label;
} qd_ctx;
typedef struct {
@@ -272,14 +287,15 @@
int qd_write_status(qd_ctx *ctx, int nid, disk_node_state_t state,
disk_msg_t *msg, memb_mask_t mask, memb_mask_t master);
-int qd_read_print_status(int fd, int nid);
+int qd_read_print_status(target_info_t *disk, int nid);
int qd_init(qd_ctx *ctx, cman_handle_t ch, int me);
void qd_destroy(qd_ctx *ctx);
/* proc.c */
int find_partitions(const char *partfile, const char *label,
char *devname, size_t devlen, int print);
-int check_device(char *device, char *label, quorum_header_t *qh);
+int check_device(char *device, char *label, int *ssz, quorum_header_t *qh,
+ int flags);
#endif
--- cluster/cman/qdisk/disk_util.c 2007/01/26 14:34:26 1.1.2.3
+++ cluster/cman/qdisk/disk_util.c 2007/12/04 20:40:54 1.1.2.4
@@ -201,8 +201,9 @@
if (get_time(&start, ctx->qc_flags&RF_UPTIME) < 0)
utime_ok = 0;
swab_status_block_t(&ps);
- if (qdisk_write(ctx->qc_fd, qdisk_nodeid_offset(nid), &ps,
- sizeof(ps)) < 0) {
+ if (qdisk_write(&ctx->qc_disk,
+ qdisk_nodeid_offset(nid, ctx->qc_disk.d_blksz),
+ &ps, sizeof(ps)) < 0) {
printf("Error writing node ID block %d\n", nid);
return -1;
}
@@ -223,12 +224,12 @@
int
-qd_print_status(status_block_t *ps)
+qd_print_status(target_info_t *disk, status_block_t *ps)
{
int x;
printf("Data @ offset %d:\n",
- (int)qdisk_nodeid_offset(ps->ps_nodeid));
+ (int)qdisk_nodeid_offset(ps->ps_nodeid, disk->d_blksz));
printf("status_block_t {\n");
printf("\t.ps_magic = %08x;\n", (int)ps->ps_magic);
printf("\t.ps_nodeid = %d;\n", (int)ps->ps_nodeid);
@@ -261,11 +262,11 @@
int
-qd_read_print_status(int fd, int nid)
+qd_read_print_status(target_info_t *disk, int nid)
{
status_block_t ps;
- if (fd < 0) {
+ if (!disk || disk->d_fd < 0) {
errno = EINVAL;
return -1;
}
@@ -275,13 +276,13 @@
return -1;
}
- if (qdisk_read(fd, qdisk_nodeid_offset(nid), &ps,
+ if (qdisk_read(disk, qdisk_nodeid_offset(nid, disk->d_blksz), &ps,
sizeof(ps)) < 0) {
printf("Error reading node ID block %d\n", nid);
return -1;
}
swab_status_block_t(&ps);
- qd_print_status(&ps);
+ qd_print_status(disk, &ps);
return 0;
}
@@ -339,6 +340,5 @@
free(ctx->qc_device);
ctx->qc_device = NULL;
}
- close(ctx->qc_fd);
- ctx->qc_fd = -1;
+ qdisk_close(&ctx->qc_disk);
}
--- cluster/cman/qdisk/main.c 2007/03/20 19:36:14 1.1.2.8
+++ cluster/cman/qdisk/main.c 2007/12/04 20:40:54 1.1.2.9
@@ -147,7 +147,8 @@
sb = &ni[x].ni_status;
- if (qdisk_read(ctx->qc_fd, qdisk_nodeid_offset(x+1),
+ if (qdisk_read(&ctx->qc_disk,
+ qdisk_nodeid_offset(x+1, ctx->qc_disk.d_blksz),
sb, sizeof(*sb)) < 0) {
clulog(LOG_WARNING,"Error reading node ID block %d\n",
x+1);
@@ -452,6 +453,7 @@
quorum_init(qd_ctx *ctx, node_info_t *ni, int max, struct h_data *h, int maxh)
{
int x = 0, score, maxscore, score_req;
+ char buf[64];
clulog(LOG_INFO, "Quorum Daemon Initializing\n");
@@ -462,12 +464,28 @@
if (qdisk_validate(ctx->qc_device) < 0)
return -1;
- ctx->qc_fd = qdisk_open(ctx->qc_device);
- if (ctx->qc_fd < 0) {
+ if (qdisk_open(ctx->qc_device, &ctx->qc_disk) < 0) {
clulog(LOG_CRIT, "Failed to open %s: %s\n", ctx->qc_device,
strerror(errno));
return -1;
}
+
+ if (strlen(ctx->qc_device) > 15 && !(ctx->qc_flags & RF_CMAN_LABEL)) {
+ if (ctx->qc_label && strlen(ctx->qc_label) <= 15) {
+ ctx->qc_cman_label = strdup(ctx->qc_label);
+ } else {
+ snprintf(buf, sizeof(buf), "QDisk[%d]",
+ strlen(ctx->qc_device));
+ ctx->qc_cman_label = strdup(buf);
+ }
+
+ ctx->qc_flags |= RF_CMAN_LABEL;
+ clulog(LOG_DEBUG, "Device too long! Setting CMAN label to: %s\n",
+ ctx->qc_cman_label);
+ }
+
+ clulog(LOG_DEBUG, "I/O Size: %d Page Size: %d\n",
+ ctx->qc_disk.d_blksz, ctx->qc_disk.d_pagesz);
if (h && maxh) {
start_score_thread(ctx, h, maxh);
@@ -1209,14 +1227,30 @@
}
if (ctx->qc_master_wait <= ctx->qc_tko_up)
ctx->qc_master_wait = ctx->qc_tko_up + 1;
-
+
/* Get votes */
+
+ /* check if votes is set in cluster.conf */
snprintf(query, sizeof(query), "/cluster/quorumd/@votes");
if (ccs_get(ccsfd, query, &val) == 0) {
ctx->qc_votes = atoi(val);
free(val);
if (ctx->qc_votes < 0)
ctx->qc_votes = 0;
+ } else { /* if votes is not set, default to node_num - 1 */
+ int nodes = 0, error;
+ for (;;) {
+ error = ccs_get_list(ccsfd, "/cluster/clusternodes/child::*", &val);
+ if (error || !val)
+ break;
+
+ nodes++;
+ }
+ nodes--;
+ if (nodes < 0)
+ nodes = 0;
+
+ ctx->qc_votes = nodes;
}
/* Get device */
@@ -1285,6 +1319,15 @@
ctx->qc_flags &= ~RF_REBOOT;
free(val);
}
+
+ /* Get cman_label */
+ snprintf(query, sizeof(query), "/cluster/quorumd/@cman_label");
+ if (ccs_get(ccsfd, query, &val) == 0) {
+ if (strlen(val) > 0 && strlen(val) <= 15) {
+ ctx->qc_flags |= RF_CMAN_LABEL;
+ ctx->qc_cman_label = val;
+ }
+ }
/*
* Get flag to see if we're supposed to kill cman if qdisk is not
@@ -1347,8 +1390,9 @@
*cfh = configure_heuristics(ccsfd, h, maxh);
clulog(LOG_DEBUG,
- "Quorum Daemon: %d heuristics, %d interval, %d tko, %d votes\n",
- *cfh, ctx->qc_interval, ctx->qc_tko, ctx->qc_votes);
+ "Quorum Daemon: %d heuristics, %d interval, %d tko, %d votes,"
+ " flags=%08x\n",
+ *cfh, ctx->qc_interval, ctx->qc_tko, ctx->qc_votes, ctx->qc_flags);
ccs_disconnect(ccsfd);
@@ -1391,6 +1435,7 @@
char debug = 0, foreground = 0;
char device[128];
pid_t pid;
+ quorum_header_t qh;
if (check_process_running(argv[0], &pid) && pid !=getpid()) {
printf("QDisk services already running\n");
@@ -1493,13 +1538,24 @@
clulog(LOG_INFO, "Quorum Partition: %s Label: %s\n",
ctx.qc_device, ctx.qc_label);
} else if (ctx.qc_device) {
- if (check_device(ctx.qc_device, NULL, NULL) != 0) {
+ if (check_device(ctx.qc_device, NULL, &rv, &qh, 0) != 0) {
clulog(LOG_CRIT,
"Specified partition %s does not have a "
"qdisk label\n", ctx.qc_device);
check_stop_cman(&ctx);
return -1;
}
+
+ if (qh.qh_version == VERSION_MAGIC_V2 &&
+ qh.qh_blksz != rv) {
+ clulog(LOG_CRIT,
+ "Specified device %s does match kernel's "
+ "reported sector size (%d != %d)\n",
+ ctx.qc_device,
+ ctx.qc_disk.d_blksz, rv);
+ check_stop_cman(&ctx);
+ return -1;
+ }
}
if (!foreground && !forked) {
@@ -1518,7 +1574,11 @@
if (!_running)
return 0;
- cman_register_quorum_device(ctx.qc_ch, ctx.qc_device, ctx.qc_votes);
+ cman_register_quorum_device(ctx.qc_ch,
+ (ctx.qc_flags&RF_CMAN_LABEL)?
+ ctx.qc_cman_label:
+ ctx.qc_device,
+ ctx.qc_votes);
/*
XXX this always returns -1 / EBUSY even when it works?!!!
--- cluster/cman/qdisk/mkqdisk.c 2006/11/21 14:50:01 1.1.2.3
+++ cluster/cman/qdisk/mkqdisk.c 2007/12/04 20:40:54 1.1.2.4
@@ -39,7 +39,7 @@
char *newdev = NULL, *newlabel = NULL;
int rv;
- printf("mkqdisk v0.5.1\n");
+ printf("mkqdisk v0.5.2\n");
while ((rv = getopt(argc, argv, "Lf:c:l:h")) != EOF) {
switch (rv) {
--- cluster/cman/qdisk/proc.c 2006/06/23 16:01:02 1.1.2.1
+++ cluster/cman/qdisk/proc.c 2007/12/04 20:40:54 1.1.2.2
@@ -32,27 +32,33 @@
int
-check_device(char *device, char *label, quorum_header_t *qh)
+check_device(char *device, char *label, int *ssz, quorum_header_t *qh,
+ int flags)
{
- int fd = -1, ret = -1;
+ int ret = -1;
quorum_header_t qh_local;
+ target_info_t disk;
if (!qh)
qh = &qh_local;
- fd = qdisk_validate(device);
- if (fd < 0) {
+ ret = qdisk_validate(device);
+ if (ret < 0) {
perror("qdisk_verify");
return -1;
}
- fd = qdisk_open(device);
- if (fd < 0) {
+ ret = qdisk_open(device, &disk);
+ if (ret < 0) {
perror("qdisk_open");
return -1;
}
- if (qdisk_read(fd, OFFSET_HEADER, qh, sizeof(*qh)) == sizeof(*qh)) {
+ if (ssz)
+ *ssz = disk.d_blksz;
+
+ ret = -1;
+ if (qdisk_read(&disk, OFFSET_HEADER, qh, sizeof(*qh)) == sizeof(*qh)) {
swab_quorum_header_t(qh);
if (qh->qh_magic == HEADER_MAGIC_NUMBER) {
if (!label || !strcmp(qh->qh_cluster, label)) {
@@ -61,7 +67,14 @@
}
}
- qdisk_close(&fd);
+ /* only flag now is 'strict device check'; i.e.,
+ "block size recorded must match kernel's reported size" */
+ if (flags && qh->qh_version == VERSION_MAGIC_V2 &&
+ disk.d_blksz != qh->qh_blksz) {
+ ret = -1;
+ }
+
+ qdisk_close(&disk);
return ret;
}
@@ -78,6 +91,7 @@
char device[128];
char realdev[256];
quorum_header_t qh;
+ int ssz;
fp = fopen(partfile, "r");
if (!fp)
@@ -96,16 +110,30 @@
if (strlen(device)) {
snprintf(realdev, sizeof(realdev),
"/dev/%s", device);
- if (check_device(realdev, (char *)label, &qh) != 0)
+
+ /* If we're not "just printing", then
+ then reject devices which don't match
+ the recorded sector size */
+ if (check_device(realdev, (char *)label, &ssz,
+ &qh, !print) != 0)
continue;
if (print) {
printf("%s:\n", realdev);
- printf("\tMagic: %08x\n", qh.qh_magic);
- printf("\tLabel: %s\n", qh.qh_cluster);
- printf("\tCreated: %s",
+ printf("\tMagic: %08x\n", qh.qh_magic);
+ printf("\tLabel: %s\n", qh.qh_cluster);
+ printf("\tCreated: %s",
ctime((time_t *)&qh.qh_timestamp));
- printf("\tHost: %s\n\n", qh.qh_updatehost);
+ printf("\tHost: %s\n", qh.qh_updatehost);
+ printf("\tKernel Sector Size: %d\n", ssz);
+ if (qh.qh_version == VERSION_MAGIC_V2) {
+ printf("\tRecorded Sector Size: %d\n\n", (int)qh.qh_blksz);
+ if (qh.qh_blksz != ssz) {
+ printf("WARNING: Sector size mismatch: Header: %d Kernel: %d\n",
+ (int)qh.qh_blksz, ssz);
+ }
+ } else
+ printf("\n");
}
if (devname && devlen) {
More information about the Cluster-devel
mailing list