[Cluster-devel] cluster/cman/qdisk disk.c disk.h disk_util.c m ...
lhh at sourceware.org
lhh at sourceware.org
Tue Dec 4 20:24:45 UTC 2007
CVSROOT: /cvs/cluster
Module name: cluster
Branch: RHEL5
Changes by: lhh at sourceware.org 2007-12-04 20:24:43
Modified files:
cman/qdisk : disk.c disk.h disk_util.c main.c mkqdisk.c
proc.c
Log message:
Make qdiskd work with sector sizes other than 512 bytes. Import patch from Fabio M. Di Nitto to make qdiskd use (node_count - 1) for votes if there's none specified in cluster.conf
Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/qdisk/disk.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.4.4.1&r2=1.4.4.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/qdisk/disk.h.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.4.2.3&r2=1.4.2.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/qdisk/disk_util.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.2.4.2&r2=1.2.4.3
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/qdisk/main.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.4.2.6&r2=1.4.2.7
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/qdisk/mkqdisk.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.3.4.1&r2=1.3.4.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/qdisk/proc.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.2&r2=1.2.4.1
--- cluster/cman/qdisk/disk.c 2007/10/29 17:54:25 1.4.4.1
+++ cluster/cman/qdisk/disk.c 2007/12/04 20:24:43 1.4.4.2
@@ -43,8 +43,9 @@
#include <platform.h>
#include <unistd.h>
#include <time.h>
+#include <linux/fs.h>
-static int diskRawRead(int fd, char *buf, int len);
+static int diskRawRead(target_info_t *disk, char *buf, int len);
uint32_t clu_crc32(const char *data, size_t count);
@@ -211,49 +212,58 @@
* Returns - (the file descriptor), a value >= 0 on success.
*/
int
-qdisk_open(char *name)
+qdisk_open(char *name, target_info_t *disk)
{
- int fd;
- int retval;
+ int ret;
+ unsigned long ssz;
/*
* Open for synchronous writes to insure all writes go directly
* to disk.
*/
- fd = open(name, O_RDWR | O_SYNC | O_DIRECT);
- if (fd < 0) {
- return fd;
- }
+ disk->d_fd = open(name, O_RDWR | O_SYNC | O_DIRECT);
+ if (disk->d_fd < 0)
+ return disk->d_fd;
+
+ disk->d_blksz = 512;
+ ret = ioctl(disk->d_fd, BLKSSZGET, &ssz);
+ if (ret < 0)
+ perror("qdisk_open: ioctl(BLKSSZGET)");
+ else
+ /* Sorry, no sector sizes >4GB please */
+ disk->d_blksz = (uint32_t)ssz;
- /* Check to verify that the partition is large enough.*/
- retval = lseek(fd, END_OF_DISK, SEEK_SET);
+ disk->d_pagesz = sysconf(_SC_PAGESIZE);
- if (retval < 0) {
+ /* Check to verify that the partition is large enough.*/
+ ret = lseek(disk->d_fd, END_OF_DISK(disk->d_blksz), SEEK_SET);
+ if (ret < 0) {
perror("open_partition: seek");
return -1;
}
- if (retval < END_OF_DISK) {
+ if (ret < END_OF_DISK(disk->d_blksz)) {
fprintf(stderr, "Partition %s too small\n", name);
errno = EINVAL;
return -1;
}
/* Set close-on-exec bit */
- retval = fcntl(fd, F_GETFD, 0);
- if (retval < 0) {
- close(fd);
+ ret = fcntl(disk->d_fd, F_GETFD, 0);
+ if (ret < 0) {
+ perror("open_partition: fcntl(F_GETFD)");
+ close(disk->d_fd);
return -1;
}
- retval |= FD_CLOEXEC;
- if (fcntl(fd, F_SETFD, retval) < 0) {
- perror("open_partition: fcntl");
- close(fd);
+ ret |= FD_CLOEXEC;
+ if (fcntl(disk->d_fd, F_SETFD, ret) < 0) {
+ perror("open_partition: fcntl(F_SETFD)");
+ close(disk->d_fd);
return -1;
}
- return fd;
+ return 0;
}
@@ -263,17 +273,17 @@
* Returns - value from close syscall.
*/
int
-qdisk_close(int *fd)
+qdisk_close(target_info_t *disk)
{
int retval;
- if (!fd || *fd < 0) {
+ if (!disk || disk->d_fd < 0) {
errno = EINVAL;
return -1;
}
- retval = close(*fd);
- *fd = -1;
+ retval = close(disk->d_fd);
+ disk->d_fd = -1;
return retval;
}
@@ -288,7 +298,7 @@
qdisk_validate(char *name)
{
struct stat stat_st, *stat_ptr;
- int fd;
+ target_info_t disk;
stat_ptr = &stat_st;
if (stat(name, stat_ptr) < 0) {
@@ -310,26 +320,25 @@
/*
* Verify read/write permission.
*/
- fd = qdisk_open(name);
- if (fd < 0) {
+ if (qdisk_open(name, &disk) < 0) {
fprintf(stderr, "%s: open of %s for RDWR failed: %s\n",
__FUNCTION__, name, strerror(errno));
return -1;
}
- qdisk_close(&fd);
+ qdisk_close(&disk);
return 0;
}
static int
-diskRawReadShadow(int fd, off_t readOffset, char *buf, int len)
+diskRawReadShadow(target_info_t *disk, off_t readOffset, char *buf, int len)
{
int ret;
shared_header_t *hdrp;
char *data;
int datalen;
- ret = lseek(fd, readOffset, SEEK_SET);
+ ret = lseek(disk->d_fd, readOffset, SEEK_SET);
if (ret != readOffset) {
#if 0
fprintf(stderr,
@@ -340,7 +349,7 @@
return -1;
}
- ret = diskRawRead(fd, buf, len);
+ ret = diskRawRead(disk, buf, len);
if (ret != len) {
#if 0
fprintf(stderr, "diskRawReadShadow: aligned read "
@@ -375,7 +384,7 @@
* Here we check for alignment and do a bounceio if necessary.
*/
static int
-diskRawRead(int fd, char *buf, int len)
+diskRawRead(target_info_t *disk, char *buf, int len)
{
char *alignedBuf;
int readret;
@@ -383,21 +392,24 @@
int readlen;
int bounceNeeded = 1;
- if ((((unsigned long) buf & (unsigned long) 0x3ff) == 0) &&
- ((len % 512) == 0)) {
+
+ /* was 3ff, which is (512<<1-1) */
+ if ((((unsigned long) buf &
+ (unsigned long) ((disk->d_blksz << 1) -1)) == 0) &&
+ ((len % (disk->d_blksz)) == 0)) {
bounceNeeded = 0;
}
if (bounceNeeded == 0) {
/* Already aligned and even multiple of 512, no bounceio
* required. */
- return (read(fd, buf, len));
+ return (read(disk->d_fd, buf, len));
}
- if (len > 512) {
+ if (len > disk->d_blksz) {
fprintf(stderr,
"diskRawRead: not setup for reads larger than %d.\n",
- 512);
+ (int)disk->d_blksz);
return (-1);
}
/*
@@ -406,8 +418,8 @@
* XXX - if the on-disk offsets don't provide enough room we're cooked!
*/
extraLength = 0;
- if (len % 512) {
- extraLength = 512 - (len % 512);
+ if (len % disk->d_blksz) {
+ extraLength = disk->d_blksz - (len % disk->d_blksz);
}
readlen = len;
@@ -415,18 +427,18 @@
readlen += extraLength;
}
- readret = posix_memalign((void **)&alignedBuf, 512, 512);
+ readret = posix_memalign((void **)&alignedBuf, disk->d_pagesz, disk->d_blksz);
if (readret < 0) {
return -1;
}
- readret = read(fd, alignedBuf, readlen);
+ readret = read(disk->d_fd, alignedBuf, readlen);
if (readret > 0) {
if (readret > len) {
- bcopy(alignedBuf, buf, len);
+ memcpy(alignedBuf, buf, len);
readret = len;
} else {
- bcopy(alignedBuf, buf, readret);
+ memcpy(alignedBuf, buf, readret);
}
}
@@ -445,7 +457,7 @@
* Here we check for alignment and do a bounceio if necessary.
*/
static int
-diskRawWrite(int fd, char *buf, int len)
+diskRawWrite(target_info_t *disk, char *buf, int len)
{
char *alignedBuf;
int ret;
@@ -453,31 +465,33 @@
int writelen;
int bounceNeeded = 1;
- if ((((unsigned long) buf & (unsigned long) 0x3ff) == 0) &&
- ((len % 512) == 0)) {
+ /* was 3ff, which is (512<<1-1) */
+ if ((((unsigned long) buf &
+ (unsigned long) ((disk->d_blksz << 1) -1)) == 0) &&
+ ((len % (disk->d_blksz)) == 0)) {
bounceNeeded = 0;
}
+
if (bounceNeeded == 0) {
/* Already aligned and even multiple of 512, no bounceio
* required. */
- return (write(fd, buf, len));
+ return (write(disk->d_fd, buf, len));
}
- if (len > 512) {
+ if (len > disk->d_blksz) {
fprintf(stderr,
- "diskRawWrite: not setup for larger than %d.\n",
- 512);
+ "diskRawRead: not setup for reads larger than %d.\n",
+ (int)disk->d_blksz);
return (-1);
}
-
/*
* All IOs must be of size which is a multiple of 512. Here we
* just add in enough extra to accommodate.
* XXX - if the on-disk offsets don't provide enough room we're cooked!
*/
extraLength = 0;
- if (len % 512) {
- extraLength = 512 - (len % 512);
+ if (len % disk->d_blksz) {
+ extraLength = disk->d_blksz - (len % disk->d_blksz);
}
writelen = len;
@@ -485,13 +499,20 @@
writelen += extraLength;
}
- ret = posix_memalign((void **)&alignedBuf, 512,512);
+ ret = posix_memalign((void **)&alignedBuf, disk->d_pagesz, disk->d_blksz);
if (ret < 0) {
+ return -1;
+ }
+
+ if (len > disk->d_blksz) {
+ fprintf(stderr,
+ "diskRawWrite: not setup for larger than %d.\n",
+ (int)disk->d_blksz);
return (-1);
}
- bcopy(buf, alignedBuf, len);
- ret = write(fd, alignedBuf, writelen);
+ memcpy(buf, alignedBuf, len);
+ ret = write(disk->d_fd, alignedBuf, writelen);
if (ret > len) {
ret = len;
}
@@ -507,7 +528,7 @@
static int
-diskRawWriteShadow(int fd, __off64_t writeOffset, char *buf, int len)
+diskRawWriteShadow(target_info_t *disk, __off64_t writeOffset, char *buf, int len)
{
off_t retval_seek;
ssize_t retval_write;
@@ -519,7 +540,7 @@
return (-1);
}
- retval_seek = lseek(fd, writeOffset, SEEK_SET);
+ retval_seek = lseek(disk->d_fd, writeOffset, SEEK_SET);
if (retval_seek != writeOffset) {
fprintf(stderr,
"diskRawWriteShadow: can't seek to offset %d\n",
@@ -527,7 +548,7 @@
return (-1);
}
- retval_write = diskRawWrite(fd, buf, len);
+ retval_write = diskRawWrite(disk, buf, len);
if (retval_write != len) {
if (retval_write == -1) {
fprintf(stderr, "%s: %s\n", __FUNCTION__,
@@ -544,7 +565,7 @@
int
-qdisk_read(int fd, __off64_t offset, void *buf, int count)
+qdisk_read(target_info_t *disk, __off64_t offset, void *buf, int count)
{
shared_header_t *hdrp;
char *data;
@@ -556,15 +577,15 @@
* Raw blocks are 512 byte aligned.
*/
total = count + sizeof(shared_header_t);
- if (total < 512)
- total = 512;
+ if (total < disk->d_blksz)
+ total = disk->d_blksz;
/* Round it up */
- if (total % 512)
- total = total + (512 * !!(total % 512)) - (total % 512);
+ if (total % disk->d_blksz)
+ total = total + (disk->d_blksz * !!(total % disk->d_blksz)) - (total % disk->d_blksz);
hdrp = NULL;
- rv = posix_memalign((void **)&hdrp, sysconf(_SC_PAGESIZE), total);
+ rv = posix_memalign((void **)&hdrp, disk->d_pagesz, disk->d_blksz);
if (rv < 0)
return -1;
@@ -573,7 +594,7 @@
data = (char *)hdrp + sizeof(shared_header_t);
- rv = diskRawReadShadow(fd, offset, (char *)hdrp, total);
+ rv = diskRawReadShadow(disk, offset, (char *)hdrp, disk->d_blksz);
if (rv == -1) {
return -1;
@@ -594,12 +615,12 @@
int
-qdisk_write(int fd, __off64_t offset, const void *buf, int count)
+qdisk_write(target_info_t *disk, __off64_t offset, const void *buf, int count)
{
size_t maxsize;
shared_header_t *hdrp;
char *data;
- size_t total = 0, rv = -1, psz = 512; //sysconf(_SC_PAGESIZE);
+ size_t total = 0, rv = -1, psz = disk->d_blksz; //sysconf(_SC_PAGESIZE);
maxsize = psz - (sizeof(shared_header_t));
if (count >= (maxsize + sizeof(shared_header_t))) {
@@ -611,7 +632,6 @@
/*
* Calculate the total length of the buffer, including the header.
- * Raw blocks are 512 byte aligned.
*/
total = count + sizeof(shared_header_t);
if (total < psz)
@@ -622,7 +642,7 @@
total = total + (psz * !!(total % psz)) - (total % psz);
hdrp = NULL;
- rv = posix_memalign((void **)&hdrp, sysconf(_SC_PAGESIZE), total);
+ rv = posix_memalign((void **)&hdrp, disk->d_pagesz, total);
if (rv < 0) {
perror("posix_memalign");
return -1;
@@ -645,7 +665,7 @@
* about locking here.
*/
if (total == psz)
- rv = diskRawWriteShadow(fd, offset, (char *)hdrp, psz);
+ rv = diskRawWriteShadow(disk, offset, (char *)hdrp, psz);
if (rv == -1)
perror("diskRawWriteShadow");
@@ -658,11 +678,11 @@
static int
-header_init(int fd, char *label)
+header_init(target_info_t *disk, char *label)
{
quorum_header_t qh;
- if (qdisk_read(fd, OFFSET_HEADER, &qh, sizeof(qh)) == sizeof(qh)) {
+ if (qdisk_read(disk, OFFSET_HEADER, &qh, sizeof(qh)) == sizeof(qh)) {
swab_quorum_header_t(&qh);
if (qh.qh_magic == HEADER_MAGIC_OLD) {
printf("Warning: Red Hat Cluster Manager 1.2.x "
@@ -681,14 +701,18 @@
/* Copy in the cluster/label name */
snprintf(qh.qh_cluster, sizeof(qh.qh_cluster)-1, "%s", label);
+ qh.qh_version = VERSION_MAGIC_V2;
if ((qh.qh_timestamp = (uint64_t)time(NULL)) <= 0) {
perror("time");
return -1;
}
qh.qh_magic = HEADER_MAGIC_NUMBER;
+ qh.qh_blksz = disk->d_blksz;
+ qh.qh_pad = 0;
+
swab_quorum_header_t(&qh);
- if (qdisk_write(fd, OFFSET_HEADER, &qh, sizeof(qh)) != sizeof(qh)) {
+ if (qdisk_write(disk, OFFSET_HEADER, &qh, sizeof(qh)) != sizeof(qh)) {
return -1;
}
@@ -699,24 +723,24 @@
int
qdisk_init(char *partname, char *label)
{
- int fd;
+ target_info_t disk;
status_block_t ps, wps;
- int nid;
+ int nid, ret;
time_t t;
- fd = qdisk_validate(partname);
- if (fd < 0) {
+ ret = qdisk_validate(partname);
+ if (ret < 0) {
perror("qdisk_verify");
return -1;
}
- fd = qdisk_open(partname);
- if (fd < 0) {
+ ret = qdisk_open(partname, &disk);
+ if (ret < 0) {
perror("qdisk_open");
return -1;
}
- if (header_init(fd, label) < 0) {
+ if (header_init(&disk, label) < 0) {
return -1;
}
@@ -744,14 +768,14 @@
wps = ps;
swab_status_block_t(&wps);
- if (qdisk_write(fd, qdisk_nodeid_offset(nid), &wps, sizeof(wps)) < 0) {
+ if (qdisk_write(&disk, qdisk_nodeid_offset(nid, disk.d_blksz), &wps, sizeof(wps)) < 0) {
printf("Error writing node ID block %d\n", nid);
- qdisk_close(&fd);
+ qdisk_close(&disk);
return -1;
}
}
- qdisk_close(&fd);
+ qdisk_close(&disk);
return 0;
}
--- cluster/cman/qdisk/disk.h 2007/02/21 20:22:53 1.4.2.3
+++ cluster/cman/qdisk/disk.h 2007/12/04 20:24:43 1.4.2.4
@@ -72,7 +72,8 @@
RF_DEBUG = 0x4,
RF_PARANOID = 0x8,
RF_ALLOW_KILL = 0x10,
- RF_UPTIME = 0x20
+ RF_UPTIME = 0x20,
+ RF_CMAN_LABEL = 0x40
} run_flag_t;
@@ -86,6 +87,9 @@
#define STATE_MAGIC_NUMBER 0x47bacef8 /* Status block */
#define SHARED_HEADER_MAGIC 0x00DEBB1E /* Per-block headeer */
+/* Version magic. */
+#define VERSION_MAGIC_V2 0x389fabc4
+
typedef struct __attribute__ ((packed)) {
uint32_t ps_magic;
@@ -152,16 +156,21 @@
*/
typedef struct __attribute__ ((packed)) {
uint32_t qh_magic;
- uint32_t qh_align; // 64-bit-ism: alignment fixer.
+ uint32_t qh_version; //
uint64_t qh_timestamp; // time of last update
char qh_updatehost[128];// Hostname who put this here...
- char qh_cluster[128]; // Cluster name
+ char qh_cluster[120]; // Cluster name; CMAN only
+ // supports 16 chars.
+ uint32_t qh_blksz; // Known block size @ creation
+ uint32_t qh_pad;
} quorum_header_t;
#define swab_quorum_header_t(ptr) \
{\
swab32((ptr)->qh_magic); \
- swab32((ptr)->qh_align); \
+ swab32((ptr)->qh_version); \
+ swab32((ptr)->qh_blksz); \
+ swab32((ptr)->qh_pad); \
swab64((ptr)->qh_timestamp); \
}
@@ -196,31 +205,35 @@
/* Offsets from RHCM 1.2.x */
#define OFFSET_HEADER 0
-#define HEADER_SIZE 4096 /* Page size for now */
+#define HEADER_SIZE(ssz) (ssz<4096?4096:ssz)
-#define OFFSET_FIRST_STATUS_BLOCK (OFFSET_HEADER + HEADER_SIZE)
-#define SPACE_PER_STATUS_BLOCK 4096 /* Page size for now */
+#define OFFSET_FIRST_STATUS_BLOCK(ssz) (OFFSET_HEADER + HEADER_SIZE(ssz))
+#define SPACE_PER_STATUS_BLOCK(ssz) (ssz<4096?4096:ssz)
#define STATUS_BLOCK_COUNT MAX_NODES_DISK
-#define SPACE_PER_MESSAGE_BLOCK (4096)
-#define MESSAGE_BLOCK_COUNT MAX_NODES_DISK
-
-#define END_OF_DISK (OFFSET_FIRST_STATUS_BLOCK + \
+#define END_OF_DISK(ssz) (OFFSET_FIRST_STATUS_BLOCK(ssz) + \
(MAX_NODES_DISK + 1) * \
- SPACE_PER_STATUS_BLOCK) \
+ SPACE_PER_STATUS_BLOCK(ssz)) \
+typedef struct {
+ int d_fd;
+ int _pad_;
+ size_t d_blksz;
+ size_t d_pagesz;
+} target_info_t;
+
/* From disk.c */
-int qdisk_open(char *name);
-int qdisk_close(int *fd);
+int qdisk_open(char *name, target_info_t *disk);
+int qdisk_close(target_info_t *disk);
int qdisk_init(char *name, char *clustername);
int qdisk_validate(char *name);
-int qdisk_read(int fd, __off64_t ofs, void *buf, int len);
-int qdisk_write(int fd, __off64_t ofs, const void *buf, int len);
+int qdisk_read(target_info_t *disk, __off64_t ofs, void *buf, int len);
+int qdisk_write(target_info_t *disk, __off64_t ofs, const void *buf, int len);
-#define qdisk_nodeid_offset(nodeid) \
- (OFFSET_FIRST_STATUS_BLOCK + (SPACE_PER_STATUS_BLOCK * (nodeid - 1)))
+#define qdisk_nodeid_offset(nodeid, ssz) \
+ (OFFSET_FIRST_STATUS_BLOCK(ssz) + (SPACE_PER_STATUS_BLOCK(ssz) * (nodeid - 1)))
/* From disk_utils.c */
#define HISTORY_LENGTH 60
@@ -231,11 +244,12 @@
uint16_t pad0;
} disk_msg_t;
+
typedef struct {
uint64_t qc_incarnation;
struct timeval qc_average;
struct timeval qc_last[HISTORY_LENGTH];
- int qc_fd;
+ target_info_t qc_disk;
int qc_my_id;
int qc_writes;
int qc_interval;
@@ -250,12 +264,14 @@
disk_node_state_t qc_disk_status;
disk_node_state_t qc_status;
int qc_master; /* Master?! */
- int _pad_;
+ int qc_status_sock;
run_flag_t qc_flags;
cman_handle_t qc_ch;
char *qc_device;
char *qc_label;
char *qc_status_file;
+ char *qc_cman_label;
+ char *qc_status_sockname;
} qd_ctx;
typedef struct {
@@ -272,14 +288,15 @@
int qd_write_status(qd_ctx *ctx, int nid, disk_node_state_t state,
disk_msg_t *msg, memb_mask_t mask, memb_mask_t master);
-int qd_read_print_status(int fd, int nid);
+int qd_read_print_status(target_info_t *disk, int nid);
int qd_init(qd_ctx *ctx, cman_handle_t ch, int me);
void qd_destroy(qd_ctx *ctx);
/* proc.c */
int find_partitions(const char *partfile, const char *label,
char *devname, size_t devlen, int print);
-int check_device(char *device, char *label, quorum_header_t *qh);
+int check_device(char *device, char *label, int *ssz, quorum_header_t *qh,
+ int flags);
#endif
--- cluster/cman/qdisk/disk_util.c 2007/01/26 14:34:55 1.2.4.2
+++ cluster/cman/qdisk/disk_util.c 2007/12/04 20:24:43 1.2.4.3
@@ -201,8 +201,9 @@
if (get_time(&start, ctx->qc_flags&RF_UPTIME) < 0)
utime_ok = 0;
swab_status_block_t(&ps);
- if (qdisk_write(ctx->qc_fd, qdisk_nodeid_offset(nid), &ps,
- sizeof(ps)) < 0) {
+ if (qdisk_write(&ctx->qc_disk,
+ qdisk_nodeid_offset(nid, ctx->qc_disk.d_blksz),
+ &ps, sizeof(ps)) < 0) {
printf("Error writing node ID block %d\n", nid);
return -1;
}
@@ -223,12 +224,12 @@
int
-qd_print_status(status_block_t *ps)
+qd_print_status(target_info_t *disk, status_block_t *ps)
{
int x;
printf("Data @ offset %d:\n",
- (int)qdisk_nodeid_offset(ps->ps_nodeid));
+ (int)qdisk_nodeid_offset(ps->ps_nodeid, disk->d_blksz));
printf("status_block_t {\n");
printf("\t.ps_magic = %08x;\n", (int)ps->ps_magic);
printf("\t.ps_nodeid = %d;\n", (int)ps->ps_nodeid);
@@ -261,11 +262,11 @@
int
-qd_read_print_status(int fd, int nid)
+qd_read_print_status(target_info_t *disk, int nid)
{
status_block_t ps;
- if (fd < 0) {
+ if (!disk || disk->d_fd < 0) {
errno = EINVAL;
return -1;
}
@@ -275,13 +276,13 @@
return -1;
}
- if (qdisk_read(fd, qdisk_nodeid_offset(nid), &ps,
+ if (qdisk_read(disk, qdisk_nodeid_offset(nid, disk->d_blksz), &ps,
sizeof(ps)) < 0) {
printf("Error reading node ID block %d\n", nid);
return -1;
}
swab_status_block_t(&ps);
- qd_print_status(&ps);
+ qd_print_status(disk, &ps);
return 0;
}
@@ -322,6 +323,7 @@
ctx->qc_incarnation = generate_token();
ctx->qc_ch = ch;
ctx->qc_my_id = me;
+ ctx->qc_status_sock = -1;
return 0;
}
@@ -339,6 +341,5 @@
free(ctx->qc_device);
ctx->qc_device = NULL;
}
- close(ctx->qc_fd);
- ctx->qc_fd = -1;
+ qdisk_close(&ctx->qc_disk);
}
--- cluster/cman/qdisk/main.c 2007/03/20 19:37:04 1.4.2.6
+++ cluster/cman/qdisk/main.c 2007/12/04 20:24:43 1.4.2.7
@@ -36,6 +36,7 @@
#include <time.h>
#include <sys/reboot.h>
#include <sys/time.h>
+#include <sys/un.h>
#include <linux/reboot.h>
#include <sched.h>
#include <signal.h>
@@ -147,7 +148,8 @@
sb = &ni[x].ni_status;
- if (qdisk_read(ctx->qc_fd, qdisk_nodeid_offset(x+1),
+ if (qdisk_read(&ctx->qc_disk,
+ qdisk_nodeid_offset(x+1, ctx->qc_disk.d_blksz),
sb, sizeof(*sb)) < 0) {
clulog(LOG_WARNING,"Error reading node ID block %d\n",
x+1);
@@ -452,6 +454,10 @@
quorum_init(qd_ctx *ctx, node_info_t *ni, int max, struct h_data *h, int maxh)
{
int x = 0, score, maxscore, score_req;
+ char buf[64];
+#if 0
+ struct sockaddr_un sun;
+#endif
clulog(LOG_INFO, "Quorum Daemon Initializing\n");
@@ -462,12 +468,28 @@
if (qdisk_validate(ctx->qc_device) < 0)
return -1;
- ctx->qc_fd = qdisk_open(ctx->qc_device);
- if (ctx->qc_fd < 0) {
+ if (qdisk_open(ctx->qc_device, &ctx->qc_disk) < 0) {
clulog(LOG_CRIT, "Failed to open %s: %s\n", ctx->qc_device,
strerror(errno));
return -1;
}
+
+ if (strlen(ctx->qc_device) > 15 && !(ctx->qc_flags & RF_CMAN_LABEL)) {
+ if (ctx->qc_label && strlen(ctx->qc_label) <= 15) {
+ ctx->qc_cman_label = strdup(ctx->qc_label);
+ } else {
+ snprintf(buf, sizeof(buf), "QDisk[%d]",
+ (int)strlen(ctx->qc_device));
+ ctx->qc_cman_label = strdup(buf);
+ }
+
+ ctx->qc_flags |= RF_CMAN_LABEL;
+ clulog(LOG_DEBUG, "Device too long! Setting CMAN label to: %s\n",
+ ctx->qc_cman_label);
+ }
+
+ clulog(LOG_DEBUG, "I/O Size: %d Page Size: %d\n",
+ ctx->qc_disk.d_blksz, ctx->qc_disk.d_pagesz);
if (h && maxh) {
start_score_thread(ctx, h, maxh);
@@ -484,6 +506,42 @@
return -1;
}
+#if 0
+ if (ctx->qc_status_sockname) {
+ ctx->qc_status_sock = socket(PF_LOCAL, SOCK_STREAM, 0);
+
+ if (ctx->qc_status_sockname < 0) {
+ clulog(LOG_ERR,
+ "Could not create local socket %s: %s\n",
+ qc->qc_status_sockname, strerror(errno));
+ free(qc->qc_status_sockname);
+ qc->qc_status_sockname = NULL;
+ } else {
+ sun.sun_family = PF_LOCAL;
+ snprintf(sun.sun_path, sizeof(sun.sun_path),
+ qc->qc_status_sockname);
+ unlink(qc->qc_status_sockname);
+ if (bind(ctx->qc_status_sock,
+ (struct sockaddr *)&sun, sizeof(sun)) < 0) {
+ clulog(LOG_ERR, "Could not bind to local "
+ "socket %s: %s\n",
+ qc->qc_status_sockname,
+ strerror(errno));
+ free(qc->qc_status_sockname);
+ qc->qc_status_sockname = NULL;
+ close(qc->qc_status_sock);
+ qc->qc_status_sock = -1;
+ }
+ }
+ } else {
+ qc->qc_status_sock = -1;
+ }
+
+ if (qc->qc_status_sock >= 0) {
+ listen(qc->qc_status_sock, 5);
+ }
+#endif
+
while (++x <= ctx->qc_tko && _running) {
read_node_blocks(ctx, ni, max);
check_transitions(ctx, ni, max, NULL);
@@ -622,23 +680,7 @@
char *
-state_str(disk_node_state_t s)
-{
- switch (s) {
- case S_NONE:
- return "None";
- case S_EVICT:
- return "Evicted";
- case S_INIT:
- return "Initializing";
- case S_RUN:
- return "Running";
- case S_MASTER:
- return "Master";
- default:
- return "ILLEGAL";
- }
-}
+state_str(disk_node_state_t s);
void
@@ -1237,6 +1279,12 @@
ctx->qc_status_file = val;
}
+ /* Get status socket */
+ snprintf(query, sizeof(query), "/cluster/quorumd/@status_sock");
+ if (ccs_get(ccsfd, query, &val) == 0) {
+ ctx->qc_status_sockname = val;
+ }
+
/* Get min score */
snprintf(query, sizeof(query), "/cluster/quorumd/@min_score");
if (ccs_get(ccsfd, query, &val) == 0) {
@@ -1285,6 +1333,15 @@
ctx->qc_flags &= ~RF_REBOOT;
free(val);
}
+
+ /* Get cman_label */
+ snprintf(query, sizeof(query), "/cluster/quorumd/@cman_label");
+ if (ccs_get(ccsfd, query, &val) == 0) {
+ if (strlen(val) > 0 && strlen(val) <= 15) {
+ ctx->qc_flags |= RF_CMAN_LABEL;
+ ctx->qc_cman_label = val;
+ }
+ }
/*
* Get flag to see if we're supposed to kill cman if qdisk is not
@@ -1384,21 +1441,25 @@
main(int argc, char **argv)
{
cman_node_t me;
- int cfh, rv, forked = 0, nfd = -1;
+ int cfh, rv, forked = 0, nfd = -1, ret = -1;
+#if 0
+ int status_run = 0;
+#endif
qd_ctx ctx;
- cman_handle_t ch;
+ cman_handle_t ch = NULL;
node_info_t ni[MAX_NODES_DISK];
struct h_data h[10];
char debug = 0, foreground = 0;
char device[128];
pid_t pid;
+ quorum_header_t qh;
if (check_process_running(argv[0], &pid) && pid !=getpid()) {
printf("QDisk services already running\n");
return 0;
}
- while ((rv = getopt(argc, argv, "fdQ")) != EOF) {
+ while ((rv = getopt(argc, argv, "fdQs")) != EOF) {
switch (rv) {
case 'd':
debug = 1;
@@ -1418,11 +1479,15 @@
dup2(nfd, 2);
close(nfd);
break;
+#if 0
+ case 's':
+ status_run = 1;
+#endif
default:
break;
}
}
-
+
#if (defined(LIBCMAN_VERSION) && LIBCMAN_VERSION >= 2)
ch = cman_admin_init(NULL);
#else
@@ -1431,7 +1496,7 @@
if (!ch) {
if (!foreground && !forked) {
if (daemon_init(argv[0]) < 0)
- return -1;
+ goto out;
else
forked = 1;
}
@@ -1452,7 +1517,7 @@
while (cman_get_node(ch, CMAN_NODEID_US, &me) < 0) {
if (!foreground && !forked) {
if (daemon_init(argv[0]) < 0)
- return -1;
+ goto out;
else
forked = 1;
}
@@ -1472,7 +1537,7 @@
if (get_config_data(NULL, &ctx, h, 10, &cfh, debug) < 0) {
clulog_and_print(LOG_CRIT, "Configuration failed\n");
check_stop_cman(&ctx);
- return -1;
+ goto out;
}
if (ctx.qc_label) {
@@ -1483,7 +1548,7 @@
" '%s' to any device\n",
ctx.qc_label);
check_stop_cman(&ctx);
- return -1;
+ goto out;
}
if (ctx.qc_device)
@@ -1494,18 +1559,29 @@
clulog(LOG_INFO, "Quorum Partition: %s Label: %s\n",
ctx.qc_device, ctx.qc_label);
} else if (ctx.qc_device) {
- if (check_device(ctx.qc_device, NULL, NULL) != 0) {
+ if (check_device(ctx.qc_device, NULL, &rv, &qh, 0) != 0) {
clulog(LOG_CRIT,
"Specified partition %s does not have a "
"qdisk label\n", ctx.qc_device);
check_stop_cman(&ctx);
- return -1;
+ goto out;
+ }
+
+ if (qh.qh_version == VERSION_MAGIC_V2 &&
+ qh.qh_blksz != rv) {
+ clulog(LOG_CRIT,
+ "Specified device %s does match kernel's "
+ "reported sector size (%d != %d)\n",
+ ctx.qc_device,
+ ctx.qc_disk.d_blksz, rv);
+ check_stop_cman(&ctx);
+ goto out;
}
}
if (!foreground && !forked) {
if (daemon_init(argv[0]) < 0)
- return -1;
+ goto out;
}
set_priority(ctx.qc_sched, ctx.qc_sched_prio);
@@ -1513,13 +1589,19 @@
if (quorum_init(&ctx, ni, MAX_NODES_DISK, h, cfh) < 0) {
clulog_and_print(LOG_CRIT, "Initialization failed\n");
check_stop_cman(&ctx);
- return -1;
+ goto out;
}
+ ret = 0;
+
if (!_running)
- return 0;
+ goto out;
- cman_register_quorum_device(ctx.qc_ch, ctx.qc_device, ctx.qc_votes);
+ cman_register_quorum_device(ctx.qc_ch,
+ (ctx.qc_flags&RF_CMAN_LABEL)?
+ ctx.qc_cman_label:
+ ctx.qc_device,
+ ctx.qc_votes);
/*
XXX this always returns -1 / EBUSY even when it works?!!!
@@ -1529,16 +1611,18 @@
"Could not register %s with CMAN; "
"return = %d; error = %s\n",
ctx.qc_device, rv, strerror(errno));
- return -1;
+ goto out;
}
*/
-
if (quorum_loop(&ctx, ni, MAX_NODES_DISK) == 0)
cman_unregister_quorum_device(ctx.qc_ch);
quorum_logout(&ctx);
+ /* free cman handle to avoid leak in cman */
+out:
+ cman_finish(ctx.qc_ch);
qd_destroy(&ctx);
- return 0;
+ return ret;
}
--- cluster/cman/qdisk/mkqdisk.c 2006/11/21 14:50:30 1.3.4.1
+++ cluster/cman/qdisk/mkqdisk.c 2007/12/04 20:24:43 1.3.4.2
@@ -37,23 +37,26 @@
{
char device[128];
char *newdev = NULL, *newlabel = NULL;
- int rv;
+ int rv, debug_level = 1;
- printf("mkqdisk v0.5.1\n");
+ printf("mkqdisk v0.5.2\n");
- while ((rv = getopt(argc, argv, "Lf:c:l:h")) != EOF) {
+ while ((rv = getopt(argc, argv, "Ldf:c:l:h")) != EOF) {
switch (rv) {
+ case 'd':
+ ++debug_level;
+ break;
case 'L':
/* List */
close(2);
return find_partitions("/proc/partitions",
- NULL, NULL, 0, 1);
+ NULL, NULL, 0, debug_level);
break;
case 'f':
close(2);
return find_partitions("/proc/partitions",
optarg, device,
- sizeof(device), 1);
+ sizeof(device), debug_level);
case 'c':
newdev = optarg;
break;
--- cluster/cman/qdisk/proc.c 2006/06/23 16:05:33 1.2
+++ cluster/cman/qdisk/proc.c 2007/12/04 20:24:43 1.2.4.1
@@ -32,27 +32,33 @@
int
-check_device(char *device, char *label, quorum_header_t *qh)
+check_device(char *device, char *label, int *ssz, quorum_header_t *qh,
+ int flags)
{
- int fd = -1, ret = -1;
+ int ret = -1;
quorum_header_t qh_local;
+ target_info_t disk;
if (!qh)
qh = &qh_local;
- fd = qdisk_validate(device);
- if (fd < 0) {
+ ret = qdisk_validate(device);
+ if (ret < 0) {
perror("qdisk_verify");
return -1;
}
- fd = qdisk_open(device);
- if (fd < 0) {
+ ret = qdisk_open(device, &disk);
+ if (ret < 0) {
perror("qdisk_open");
return -1;
}
- if (qdisk_read(fd, OFFSET_HEADER, qh, sizeof(*qh)) == sizeof(*qh)) {
+ if (ssz)
+ *ssz = disk.d_blksz;
+
+ ret = -1;
+ if (qdisk_read(&disk, OFFSET_HEADER, qh, sizeof(*qh)) == sizeof(*qh)) {
swab_quorum_header_t(qh);
if (qh->qh_magic == HEADER_MAGIC_NUMBER) {
if (!label || !strcmp(qh->qh_cluster, label)) {
@@ -61,12 +67,91 @@
}
}
- qdisk_close(&fd);
+ /* only flag now is 'strict device check'; i.e.,
+ "block size recorded must match kernel's reported size" */
+ if (flags && qh->qh_version == VERSION_MAGIC_V2 &&
+ disk.d_blksz != qh->qh_blksz) {
+ ret = -1;
+ }
+
+ qdisk_close(&disk);
return ret;
}
+char *
+state_str(disk_node_state_t s)
+{
+ switch (s) {
+ case S_NONE:
+ return "None";
+ case S_EVICT:
+ return "Evicted";
+ case S_INIT:
+ return "Initializing";
+ case S_RUN:
+ return "Running";
+ case S_MASTER:
+ return "Master";
+ default:
+ return "ILLEGAL";
+ }
+}
+
+
+void
+print_status_block(status_block_t *sb)
+{
+ if (sb->ps_state == S_NONE)
+ return;
+ printf("Status block for node %d\n", sb->ps_nodeid);
+ printf("\tLast updated by node %d\n", sb->ps_updatenode);
+ printf("\tLast updated on %s", ctime((time_t *)&sb->ps_timestamp));
+ printf("\tState: %s\n", state_str(sb->ps_state));
+ printf("\tFlags: %04x\n", sb->ps_flags);
+ printf("\tScore: %d/%d\n", sb->ps_score, sb->ps_scoremax);
+ printf("\tAverage Cycle speed: %d.%06d seconds\n",
+ sb->ps_ca_sec, sb->ps_ca_usec);
+ printf("\tLast Cycle speed: %d.%06d seconds\n",
+ sb->ps_lc_sec, sb->ps_lc_usec);
+ printf("\tIncarnation: %08x%08x\n",
+ (int)(sb->ps_incarnation>>32&0xffffffff),
+ (int)(sb->ps_incarnation&0xffffffff));
+
+}
+
+
+void
+read_info(char *dev)
+{
+ target_info_t ti;
+ int x;
+ status_block_t sb;
+
+ if (qdisk_open(dev, &ti) < 0) {
+ printf("Could not read from %s: %s\n",
+ dev, strerror(errno));
+ return;
+ }
+
+ for (x = 0; x < MAX_NODES_DISK; x++) {
+
+ if (qdisk_read(&ti,
+ qdisk_nodeid_offset(x+1, ti.d_blksz),
+ &sb, sizeof(sb)) < 0) {
+ printf("Error reading node ID block %d\n",
+ x+1);
+ continue;
+ }
+ swab_status_block_t(&sb);
+ print_status_block(&sb);
+ }
+
+ qdisk_close(&ti);
+}
+
+
int
find_partitions(const char *partfile, const char *label,
char *devname, size_t devlen, int print)
@@ -78,6 +163,7 @@
char device[128];
char realdev[256];
quorum_header_t qh;
+ int ssz;
fp = fopen(partfile, "r");
if (!fp)
@@ -96,16 +182,35 @@
if (strlen(device)) {
snprintf(realdev, sizeof(realdev),
"/dev/%s", device);
- if (check_device(realdev, (char *)label, &qh) != 0)
+
+ /* If we're not "just printing", then
+ then reject devices which don't match
+ the recorded sector size */
+ if (check_device(realdev, (char *)label, &ssz,
+ &qh, !print) != 0)
continue;
if (print) {
printf("%s:\n", realdev);
- printf("\tMagic: %08x\n", qh.qh_magic);
- printf("\tLabel: %s\n", qh.qh_cluster);
- printf("\tCreated: %s",
+ printf("\tMagic: %08x\n", qh.qh_magic);
+ printf("\tLabel: %s\n", qh.qh_cluster);
+ printf("\tCreated: %s",
ctime((time_t *)&qh.qh_timestamp));
- printf("\tHost: %s\n\n", qh.qh_updatehost);
+ printf("\tHost: %s\n", qh.qh_updatehost);
+ printf("\tKernel Sector Size: %d\n", ssz);
+ if (qh.qh_version == VERSION_MAGIC_V2) {
+ printf("\tRecorded Sector Size: %d\n\n", (int)qh.qh_blksz);
+ if (qh.qh_blksz != ssz) {
+ printf("WARNING: Sector size mismatch: Header: %d Kernel: %d\n",
+ (int)qh.qh_blksz, ssz);
+ }
+ } else
+ printf("\n");
+ }
+
+ if (print >= 2) {
+ /* Print node stuff */
+ read_info(realdev);
}
if (devname && devlen) {
More information about the Cluster-devel
mailing list