public inbox for cluster-cvs@sourceware.org
help / color / mirror / Atom feed
* Cluster Project branch, STABLE, updated. rgmanager_1_9_66-13-g703bdf8
@ 2008-04-17 14:28 lhh
  0 siblings, 0 replies; only message in thread
From: lhh @ 2008-04-17 14:28 UTC (permalink / raw)
  To: cluster-cvs, cluster-devel

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "Cluster Project".

http://sources.redhat.com/git/gitweb.cgi?p=cluster.git;a=commitdiff;h=703bdf832679cdcad3137017cd07640483bed767

The branch, STABLE has been updated
       via  703bdf832679cdcad3137017cd07640483bed767 (commit)
      from  5ee43bab4b9c3048e58eb5f1de61d57b1a28e045 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit 703bdf832679cdcad3137017cd07640483bed767
Author: Lon Hohberger <lhh@redhat.com>
Date:   Thu Apr 17 10:28:27 2008 -0400

    [cman] Allow disk devices with sector sizes != 512 bytes
    
    Merged from RHEL4 branch.

-----------------------------------------------------------------------

Summary of changes:
 cman/qdisk/clulog.c    |    2 +-
 cman/qdisk/disk.c      |  191 +++++++++++++++++++++++++++---------------------
 cman/qdisk/disk.h      |   58 +++++++++-----
 cman/qdisk/disk_util.c |   20 +++---
 cman/qdisk/main.c      |   61 +++++++++++++--
 cman/qdisk/mkqdisk.c   |    2 +-
 cman/qdisk/proc.c      |   54 ++++++++++---
 cman/qdisk/score.c     |   19 +++++
 8 files changed, 269 insertions(+), 138 deletions(-)

diff --git a/cman/qdisk/clulog.c b/cman/qdisk/clulog.c
index 9ed0eef..6b998f1 100644
--- a/cman/qdisk/clulog.c
+++ b/cman/qdisk/clulog.c
@@ -273,7 +273,7 @@ do_clulog(int        severity,
 		fprintf(stdout, "%s", printmsg);
 	}
 
-	syslog(severity, logmsg);
+	syslog(severity, "%s", logmsg);
 
 	pthread_mutex_unlock(&log_mutex);
 
diff --git a/cman/qdisk/disk.c b/cman/qdisk/disk.c
index ad22b7b..cd9a462 100644
--- a/cman/qdisk/disk.c
+++ b/cman/qdisk/disk.c
@@ -43,8 +43,9 @@
 #include <platform.h>
 #include <unistd.h>
 #include <time.h>
+#include <linux/fs.h>
 
-static int diskRawRead(int fd, char *buf, int len);
+static int diskRawRead(target_info_t *disk, char *buf, int len);
 uint32_t clu_crc32(const char *data, size_t count);
 
 
@@ -211,49 +212,57 @@ header_verify(shared_header_t *hdr, const char *data, size_t count)
  * Returns - (the file descriptor), a value >= 0 on success.
  */
 int
-qdisk_open(char *name)
+qdisk_open(char *name, target_info_t *disk)
 {
-	int fd;
-	int retval;
+	int ret;
+	int ssz;
 
 	/*
 	 * Open for synchronous writes to insure all writes go directly
 	 * to disk.
 	 */
-	fd = open(name, O_RDWR | O_SYNC | O_DIRECT);
-	if (fd < 0) {
-		return fd;
+	disk->d_fd = open(name, O_RDWR | O_SYNC | O_DIRECT);
+	if (disk->d_fd < 0)
+		return disk->d_fd;
+
+	ret = ioctl(disk->d_fd, BLKSSZGET, &ssz);
+	if (ret < 0) {
+		perror("qdisk_open: ioctl(BLKSSZGET)");
+		return -1;
 	}
 
-	/* Check to verify that the partition is large enough.*/
-	retval = lseek(fd, END_OF_DISK, SEEK_SET);
+	disk->d_blksz = ssz;
+	disk->d_pagesz = sysconf(_SC_PAGESIZE);
 
-	if (retval < 0) {
+	/* Check to verify that the partition is large enough.*/
+	ret = lseek(disk->d_fd, END_OF_DISK(disk->d_blksz), SEEK_SET);
+	if (ret < 0) {
 		perror("open_partition: seek");
 		return -1;
 	}
 
-	if (retval < END_OF_DISK) {
+	if (ret < END_OF_DISK(disk->d_blksz)) {
 		fprintf(stderr, "Partition %s too small\n", name);
 		errno = EINVAL;
 		return -1;
 	}
 
 	/* Set close-on-exec bit */
-        retval = fcntl(fd, F_GETFD, 0);
-        if (retval < 0) {
-                close(fd);
+        ret = fcntl(disk->d_fd, F_GETFD, 0);
+        if (ret < 0) {
+		perror("open_partition: fcntl(F_GETFD)");
+                close(disk->d_fd);
                 return -1;
         }
 
-        retval |= FD_CLOEXEC;
-        if (fcntl(fd, F_SETFD, retval) < 0) {
-		perror("open_partition: fcntl");
-                close(fd);
+        ret |= FD_CLOEXEC;
+        if (fcntl(disk->d_fd, F_SETFD, ret) < 0) {
+		perror("open_partition: fcntl(F_SETFD)");
+                close(disk->d_fd);
                 return -1;
         }
 
-	return fd;
+	return 0;
 }
 
 
@@ -263,17 +272,17 @@ qdisk_open(char *name)
  * Returns - value from close syscall.
  */
 int
-qdisk_close(int *fd)
+qdisk_close(target_info_t *disk)
 {
 	int retval;
 
-	if (!fd || *fd < 0) {
+	if (!disk || disk->d_fd < 0) {
 		errno = EINVAL;
 		return -1;
 	}
 
-	retval = close(*fd);
-	*fd = -1;
+	retval = close(disk->d_fd);
+	disk->d_fd = -1;
 
 	return retval;
 }
@@ -288,7 +297,7 @@ int
 qdisk_validate(char *name)
 {
 	struct stat stat_st, *stat_ptr;
-	int fd;
+	target_info_t disk;
 	stat_ptr = &stat_st;
 
 	if (stat(name, stat_ptr) < 0) {
@@ -310,26 +319,25 @@ qdisk_validate(char *name)
 	/*
 	 * Verify read/write permission.
 	 */
-	fd = qdisk_open(name);
-	if (fd < 0) {
+	if (qdisk_open(name, &disk) < 0) {
 		fprintf(stderr, "%s: open of %s for RDWR failed: %s\n",
 			__FUNCTION__, name, strerror(errno));
 		return -1;
 	}
-	qdisk_close(&fd);
+	qdisk_close(&disk);
 	return 0;
 }
 
 
 static int
-diskRawReadShadow(int fd, off_t readOffset, char *buf, int len)
+diskRawReadShadow(target_info_t *disk, off_t readOffset, char *buf, int len)
 {
 	int ret;
 	shared_header_t *hdrp;
 	char *data;
 	int datalen;
 
-	ret = lseek(fd, readOffset, SEEK_SET);
+	ret = lseek(disk->d_fd, readOffset, SEEK_SET);
 	if (ret != readOffset) {
 #if 0
 		fprintf(stderr,
@@ -340,7 +348,7 @@ diskRawReadShadow(int fd, off_t readOffset, char *buf, int len)
 		return -1;
 	}
 
-	ret = diskRawRead(fd, buf, len);
+	ret = diskRawRead(disk, buf, len);
 	if (ret != len) {
 #if 0
 		fprintf(stderr, "diskRawReadShadow: aligned read "
@@ -375,7 +383,7 @@ diskRawReadShadow(int fd, off_t readOffset, char *buf, int len)
  * Here we check for alignment and do a bounceio if necessary.
  */
 static int
-diskRawRead(int fd, char *buf, int len)
+diskRawRead(target_info_t *disk, char *buf, int len)
 {
 	char *alignedBuf;
 	int readret;
@@ -383,21 +391,24 @@ diskRawRead(int fd, char *buf, int len)
 	int readlen;
 	int bounceNeeded = 1;
 
-	if ((((unsigned long) buf & (unsigned long) 0x3ff) == 0) &&
-	    ((len % 512) == 0)) {
+	
+	/* was 3ff, which is (512<<1-1) */
+	if ((((unsigned long) buf &
+	      (unsigned long) ((disk->d_blksz << 1) -1)) == 0) &&
+	    ((len % (disk->d_blksz)) == 0)) {
 		bounceNeeded = 0;
 	}
 
 	if (bounceNeeded == 0) {
 		/* Already aligned and even multiple of 512, no bounceio
 		 * required. */
-		return (read(fd, buf, len));
+		return (read(disk->d_fd, buf, len));
 	}
 
-	if (len > 512) {
+	if (len > disk->d_blksz) {
 		fprintf(stderr,
 			"diskRawRead: not setup for reads larger than %d.\n",
-		       512);
+		       (int)disk->d_blksz);
 		return (-1);
 	}
 	/*
@@ -406,8 +417,8 @@ diskRawRead(int fd, char *buf, int len)
 	 * XXX - if the on-disk offsets don't provide enough room we're cooked!
 	 */
 	extraLength = 0;
-	if (len % 512) {
-		extraLength = 512 - (len % 512);
+	if (len % disk->d_blksz) {
+		extraLength = disk->d_blksz - (len % disk->d_blksz);
 	}
 
 	readlen = len;
@@ -415,18 +426,18 @@ diskRawRead(int fd, char *buf, int len)
 		readlen += extraLength;
 	}
 
-	readret = posix_memalign((void **)&alignedBuf, 512, 512);
+	readret = posix_memalign((void **)&alignedBuf, disk->d_pagesz, disk->d_blksz);
 	if (readret < 0) {
 		return -1;
 	}
 
-	readret = read(fd, alignedBuf, readlen);
+	readret = read(disk->d_fd, alignedBuf, readlen);
 	if (readret > 0) {
 		if (readret > len) {
-			bcopy(alignedBuf, buf, len);
+			memcpy(alignedBuf, buf, len);
 			readret = len;
 		} else {
-			bcopy(alignedBuf, buf, readret);
+			memcpy(alignedBuf, buf, readret);
 		}
 	}
 
@@ -445,7 +456,7 @@ diskRawRead(int fd, char *buf, int len)
  * Here we check for alignment and do a bounceio if necessary.
  */
 static int
-diskRawWrite(int fd, char *buf, int len)
+diskRawWrite(target_info_t *disk, char *buf, int len)
 {
 	char *alignedBuf;
 	int ret;
@@ -453,31 +464,33 @@ diskRawWrite(int fd, char *buf, int len)
 	int writelen;
 	int bounceNeeded = 1;
 
-	if ((((unsigned long) buf & (unsigned long) 0x3ff) == 0) &&
-	    ((len % 512) == 0)) {
+	/* was 3ff, which is (512<<1-1) */
+	if ((((unsigned long) buf &
+	      (unsigned long) ((disk->d_blksz << 1) -1)) == 0) &&
+	    ((len % (disk->d_blksz)) == 0)) {
 		bounceNeeded = 0;
 	}
+
 	if (bounceNeeded == 0) {
 		/* Already aligned and even multiple of 512, no bounceio
 		 * required. */
-		return (write(fd, buf, len));
+		return (write(disk->d_fd, buf, len));
 	}
 
-	if (len > 512) {
+	if (len > disk->d_blksz) {
 		fprintf(stderr,
-		       "diskRawWrite: not setup for larger than %d.\n",
-		       512);
+			"diskRawRead: not setup for reads larger than %d.\n",
+		       (int)disk->d_blksz);
 		return (-1);
 	}
-
 	/*
 	 * All IOs must be of size which is a multiple of 512.  Here we
 	 * just add in enough extra to accommodate.
 	 * XXX - if the on-disk offsets don't provide enough room we're cooked!
 	 */
 	extraLength = 0;
-	if (len % 512) {
-		extraLength = 512 - (len % 512);
+	if (len % disk->d_blksz) {
+		extraLength = disk->d_blksz - (len % disk->d_blksz);
 	}
 
 	writelen = len;
@@ -485,13 +498,20 @@ diskRawWrite(int fd, char *buf, int len)
 		writelen += extraLength;
 	}
 
-	ret = posix_memalign((void **)&alignedBuf, 512,512);
+	ret = posix_memalign((void **)&alignedBuf, disk->d_pagesz, disk->d_blksz);
 	if (ret < 0) {
+		return -1;
+	}
+
+	if (len > disk->d_blksz) {
+		fprintf(stderr,
+		       "diskRawWrite: not setup for larger than %d.\n",
+		       (int)disk->d_blksz);
 		return (-1);
 	}
 
-	bcopy(buf, alignedBuf, len);
-	ret = write(fd, alignedBuf, writelen);
+	memcpy(buf, alignedBuf, len);
+	ret = write(disk->d_fd, alignedBuf, writelen);
 	if (ret > len) {
 		ret = len;
 	}
@@ -507,7 +527,7 @@ diskRawWrite(int fd, char *buf, int len)
 
 
 static int
-diskRawWriteShadow(int fd, __off64_t writeOffset, char *buf, int len)
+diskRawWriteShadow(target_info_t *disk, __off64_t writeOffset, char *buf, int len)
 {
 	off_t retval_seek;
 	ssize_t retval_write;
@@ -519,7 +539,7 @@ diskRawWriteShadow(int fd, __off64_t writeOffset, char *buf, int len)
 		return (-1);
 	}
 
-	retval_seek = lseek(fd, writeOffset, SEEK_SET);
+	retval_seek = lseek(disk->d_fd, writeOffset, SEEK_SET);
 	if (retval_seek != writeOffset) {
 		fprintf(stderr,
 		       "diskRawWriteShadow: can't seek to offset %d\n",
@@ -527,7 +547,7 @@ diskRawWriteShadow(int fd, __off64_t writeOffset, char *buf, int len)
 		return (-1);
 	}
 
-	retval_write = diskRawWrite(fd, buf, len);
+	retval_write = diskRawWrite(disk, buf, len);
 	if (retval_write != len) {
 		if (retval_write == -1) {
 			fprintf(stderr, "%s: %s\n", __FUNCTION__,
@@ -544,7 +564,7 @@ diskRawWriteShadow(int fd, __off64_t writeOffset, char *buf, int len)
 
 
 int
-qdisk_read(int fd, __off64_t offset, void *buf, int count)
+qdisk_read(target_info_t *disk, __off64_t offset, void *buf, int count)
 {
 	shared_header_t *hdrp;
 	char *data;
@@ -556,15 +576,15 @@ qdisk_read(int fd, __off64_t offset, void *buf, int count)
 	 * Raw blocks are 512 byte aligned.
 	 */
 	total = count + sizeof(shared_header_t);
-	if (total < 512)
-		total = 512;
+	if (total < disk->d_blksz)
+		total = disk->d_blksz;
 
 	/* Round it up */
-	if (total % 512) 
-		total = total + (512 * !!(total % 512)) - (total % 512);
+	if (total % disk->d_blksz) 
+		total = total + (disk->d_blksz * !!(total % disk->d_blksz)) - (total % disk->d_blksz);
 
 	hdrp = NULL;
-	rv = posix_memalign((void **)&hdrp, sysconf(_SC_PAGESIZE), total);
+	rv = posix_memalign((void **)&hdrp, disk->d_pagesz, disk->d_blksz);
 	if (rv < 0)
 		return -1;
 
@@ -573,7 +593,7 @@ qdisk_read(int fd, __off64_t offset, void *buf, int count)
 
 	data = (char *)hdrp + sizeof(shared_header_t);
 
-	rv = diskRawReadShadow(fd, offset, (char *)hdrp, total);
+	rv = diskRawReadShadow(disk, offset, (char *)hdrp, disk->d_blksz);
 	
 	if (rv == -1) {
 		return -1;
@@ -594,12 +614,12 @@ qdisk_read(int fd, __off64_t offset, void *buf, int count)
 
 
 int
-qdisk_write(int fd, __off64_t offset, const void *buf, int count)
+qdisk_write(target_info_t *disk, __off64_t offset, const void *buf, int count)
 {
 	size_t maxsize;
 	shared_header_t *hdrp;
 	char *data;
-	size_t total = 0, rv = -1, psz = 512; //sysconf(_SC_PAGESIZE);
+	size_t total = 0, rv = -1, psz = disk->d_blksz; //sysconf(_SC_PAGESIZE);
 
 	maxsize = psz - (sizeof(shared_header_t));
 	if (count >= (maxsize + sizeof(shared_header_t))) {
@@ -611,7 +631,6 @@ qdisk_write(int fd, __off64_t offset, const void *buf, int count)
 
 	/*
 	 * Calculate the total length of the buffer, including the header.
-	 * Raw blocks are 512 byte aligned.
 	 */
 	total = count + sizeof(shared_header_t);
 	if (total < psz)
@@ -622,7 +641,7 @@ qdisk_write(int fd, __off64_t offset, const void *buf, int count)
 		total = total + (psz * !!(total % psz)) - (total % psz);
 
 	hdrp = NULL;
-	rv = posix_memalign((void **)&hdrp, sysconf(_SC_PAGESIZE), total);
+	rv = posix_memalign((void **)&hdrp, disk->d_pagesz, total);
 	if (rv < 0) {
 		perror("posix_memalign");
 		return -1;
@@ -645,7 +664,7 @@ qdisk_write(int fd, __off64_t offset, const void *buf, int count)
 	 * about locking here.
 	 */
 	if (total == psz)
-		rv = diskRawWriteShadow(fd, offset, (char *)hdrp, psz);
+		rv = diskRawWriteShadow(disk, offset, (char *)hdrp, psz);
 
 	if (rv == -1)
 		perror("diskRawWriteShadow");
@@ -658,11 +677,11 @@ qdisk_write(int fd, __off64_t offset, const void *buf, int count)
 
 
 static int
-header_init(int fd, char *label)
+header_init(target_info_t *disk, char *label)
 {
 	quorum_header_t qh;
 
-	if (qdisk_read(fd, OFFSET_HEADER, &qh, sizeof(qh)) == sizeof(qh)) {
+	if (qdisk_read(disk, OFFSET_HEADER, &qh, sizeof(qh)) == sizeof(qh)) {
 		swab_quorum_header_t(&qh);
 		if (qh.qh_magic == HEADER_MAGIC_OLD) {
 			printf("Warning: Red Hat Cluster Manager 1.2.x "
@@ -679,16 +698,20 @@ header_init(int fd, char *label)
 	}
 
 	/* Copy in the cluster/label name */
-	snprintf(qh.qh_cluster, sizeof(qh.qh_cluster)-1, label);
+	snprintf(qh.qh_cluster, sizeof(qh.qh_cluster)-1, "%s", label);
 
+	qh.qh_version = VERSION_MAGIC_V2;
 	if ((qh.qh_timestamp = (uint64_t)time(NULL)) <= 0) {
 		perror("time");
 		return -1;
 	}
 
 	qh.qh_magic = HEADER_MAGIC_NUMBER;
+	qh.qh_blksz = disk->d_blksz;
+	qh.qh_pad = 0;
+
 	swab_quorum_header_t(&qh);
-	if (qdisk_write(fd, OFFSET_HEADER, &qh, sizeof(qh)) != sizeof(qh)) {
+	if (qdisk_write(disk, OFFSET_HEADER, &qh, sizeof(qh)) != sizeof(qh)) {
 		return -1;
 	}
 
@@ -699,24 +722,24 @@ header_init(int fd, char *label)
 int
 qdisk_init(char *partname, char *label)
 {
-	int fd;
+	target_info_t disk;
 	status_block_t ps, wps;
-	int nid;
+	int nid, ret;
 	time_t t;
 
-	fd = qdisk_validate(partname);
-	if (fd < 0) {
+	ret = qdisk_validate(partname);
+	if (ret < 0) {
 		perror("qdisk_verify");
 		return -1;
 	}
 
-	fd = qdisk_open(partname);
-	if (fd < 0) {
+	ret = qdisk_open(partname, &disk);
+	if (ret < 0) {
 		perror("qdisk_open");
 		return -1;
 	}
 
-	if (header_init(fd, label) < 0) {
+	if (header_init(&disk, label) < 0) {
 		return -1;
 	}
 
@@ -744,14 +767,14 @@ qdisk_init(char *partname, char *label)
 		wps = ps;
 		swab_status_block_t(&wps);
 
-		if (qdisk_write(fd, qdisk_nodeid_offset(nid), &wps, sizeof(wps)) < 0) {
+		if (qdisk_write(&disk, qdisk_nodeid_offset(nid, disk.d_blksz), &wps, sizeof(wps)) < 0) {
 			printf("Error writing node ID block %d\n", nid);
-			qdisk_close(&fd);
+			qdisk_close(&disk);
 			return -1;
 		}
 	}
 
-	qdisk_close(&fd);
+	qdisk_close(&disk);
 
 	return 0;
 }
diff --git a/cman/qdisk/disk.h b/cman/qdisk/disk.h
index 277005a..abc870e 100644
--- a/cman/qdisk/disk.h
+++ b/cman/qdisk/disk.h
@@ -72,7 +72,8 @@ typedef enum {
 	RF_DEBUG = 0x4,
 	RF_PARANOID = 0x8,
 	RF_ALLOW_KILL = 0x10,
-	RF_UPTIME = 0x20
+	RF_UPTIME = 0x20,
+	RF_CMAN_LABEL = 0x40
 } run_flag_t;
 
 
@@ -86,6 +87,9 @@ typedef enum {
 #define STATE_MAGIC_NUMBER	0x47bacef8	/* Status block */
 #define SHARED_HEADER_MAGIC	0x00DEBB1E	/* Per-block headeer */
 
+/* Version magic. */
+#define VERSION_MAGIC_V2	0x389fabc4
+
 
 typedef struct __attribute__ ((packed)) {
 	uint32_t	ps_magic;
@@ -152,16 +156,21 @@ typedef struct __attribute__ ((packed)) {
  */
 typedef struct __attribute__ ((packed)) {
 	uint32_t	qh_magic;
-	uint32_t	qh_align;	   // 64-bit-ism: alignment fixer.
+	uint32_t	qh_version;	   // 
 	uint64_t	qh_timestamp;	   // time of last update
 	char 		qh_updatehost[128];// Hostname who put this here...
-	char		qh_cluster[128];   // Cluster name
+	char		qh_cluster[120];   // Cluster name; CMAN only 
+					   // supports 16 chars.
+	uint32_t	qh_blksz;          // Known block size @ creation
+	uint32_t	qh_pad;
 } quorum_header_t;
 
 #define swab_quorum_header_t(ptr) \
 {\
 	swab32((ptr)->qh_magic); \
-	swab32((ptr)->qh_align); \
+	swab32((ptr)->qh_version); \
+	swab32((ptr)->qh_blksz); \
+	swab32((ptr)->qh_pad); \
 	swab64((ptr)->qh_timestamp); \
 }
 
@@ -196,31 +205,35 @@ typedef struct __attribute__ ((packed)) {
 
 /* Offsets from RHCM 1.2.x */
 #define OFFSET_HEADER	0
-#define HEADER_SIZE	4096		/* Page size for now */
+#define HEADER_SIZE(ssz)		(ssz<4096?4096:ssz)
 
-#define OFFSET_FIRST_STATUS_BLOCK	(OFFSET_HEADER + HEADER_SIZE)
-#define SPACE_PER_STATUS_BLOCK		4096 /* Page size for now */
+#define OFFSET_FIRST_STATUS_BLOCK(ssz)	(OFFSET_HEADER + HEADER_SIZE(ssz))
+#define SPACE_PER_STATUS_BLOCK(ssz)	(ssz<4096?4096:ssz)
 #define STATUS_BLOCK_COUNT		MAX_NODES_DISK
 
-#define SPACE_PER_MESSAGE_BLOCK		(4096)
-#define	MESSAGE_BLOCK_COUNT		MAX_NODES_DISK
-
-#define END_OF_DISK			(OFFSET_FIRST_STATUS_BLOCK + \
+#define END_OF_DISK(ssz)		(OFFSET_FIRST_STATUS_BLOCK(ssz) + \
 					 (MAX_NODES_DISK + 1) * \
-					 SPACE_PER_STATUS_BLOCK) \
+					 SPACE_PER_STATUS_BLOCK(ssz)) \
+
 
+typedef struct {
+	int d_fd;
+	int _pad_;
+	size_t d_blksz;
+	size_t d_pagesz;
+} target_info_t;
 
 
 /* From disk.c */
-int qdisk_open(char *name);
-int qdisk_close(int *fd);
+int qdisk_open(char *name, target_info_t *disk);
+int qdisk_close(target_info_t *disk);
 int qdisk_init(char *name, char *clustername);
 int qdisk_validate(char *name);
-int qdisk_read(int fd, __off64_t ofs, void *buf, int len);
-int qdisk_write(int fd, __off64_t ofs, const void *buf, int len);
+int qdisk_read(target_info_t *disk, __off64_t ofs, void *buf, int len);
+int qdisk_write(target_info_t *disk, __off64_t ofs, const void *buf, int len);
 
-#define qdisk_nodeid_offset(nodeid) \
-	(OFFSET_FIRST_STATUS_BLOCK + (SPACE_PER_STATUS_BLOCK * (nodeid - 1)))
+#define qdisk_nodeid_offset(nodeid, ssz) \
+	(OFFSET_FIRST_STATUS_BLOCK(ssz) + (SPACE_PER_STATUS_BLOCK(ssz) * (nodeid - 1)))
 
 /* From disk_utils.c */
 #define HISTORY_LENGTH 60
@@ -231,11 +244,12 @@ typedef struct {
 	uint16_t pad0;
 } disk_msg_t;
 
+
 typedef struct {
 	uint64_t qc_incarnation;
 	struct timeval qc_average;
 	struct timeval qc_last[HISTORY_LENGTH];
-	int qc_fd;
+	target_info_t qc_disk;
 	int qc_my_id;
 	int qc_writes;
 	int qc_interval;
@@ -256,6 +270,7 @@ typedef struct {
 	char *qc_device;
 	char *qc_label;
 	char *qc_status_file;
+	char *qc_cman_label;
 } qd_ctx;
 
 typedef struct {
@@ -272,14 +287,15 @@ typedef struct {
 
 int qd_write_status(qd_ctx *ctx, int nid, disk_node_state_t state,
 		    disk_msg_t *msg, memb_mask_t mask, memb_mask_t master);
-int qd_read_print_status(int fd, int nid);
+int qd_read_print_status(target_info_t *disk, int nid);
 int qd_init(qd_ctx *ctx, cman_handle_t ch, int me);
 void qd_destroy(qd_ctx *ctx);
 
 /* proc.c */
 int find_partitions(const char *partfile, const char *label,
 		    char *devname, size_t devlen, int print);
-int check_device(char *device, char *label, quorum_header_t *qh);
+int check_device(char *device, char *label, int *ssz, quorum_header_t *qh,
+		 int flags);
 
 
 #endif
diff --git a/cman/qdisk/disk_util.c b/cman/qdisk/disk_util.c
index 80c211d..b36f8d7 100644
--- a/cman/qdisk/disk_util.c
+++ b/cman/qdisk/disk_util.c
@@ -201,8 +201,9 @@ qd_write_status(qd_ctx *ctx, int nid, disk_node_state_t state,
 	if (get_time(&start, ctx->qc_flags&RF_UPTIME) < 0)
 		utime_ok = 0;
 	swab_status_block_t(&ps);
-	if (qdisk_write(ctx->qc_fd, qdisk_nodeid_offset(nid), &ps,
-			sizeof(ps)) < 0) {
+	if (qdisk_write(&ctx->qc_disk,
+			qdisk_nodeid_offset(nid, ctx->qc_disk.d_blksz),
+			&ps, sizeof(ps)) < 0) {
 		printf("Error writing node ID block %d\n", nid);
 		return -1;
 	}
@@ -223,12 +224,12 @@ qd_write_status(qd_ctx *ctx, int nid, disk_node_state_t state,
 
 
 int
-qd_print_status(status_block_t *ps)
+qd_print_status(target_info_t *disk, status_block_t *ps)
 {
 	int x;
 
 	printf("Data @ offset %d:\n",
-	       (int)qdisk_nodeid_offset(ps->ps_nodeid));
+	       (int)qdisk_nodeid_offset(ps->ps_nodeid, disk->d_blksz));
 	printf("status_block_t {\n");
 	printf("\t.ps_magic = %08x;\n", (int)ps->ps_magic);
 	printf("\t.ps_nodeid = %d;\n", (int)ps->ps_nodeid);
@@ -261,11 +262,11 @@ qd_print_status(status_block_t *ps)
 
 
 int
-qd_read_print_status(int fd, int nid)
+qd_read_print_status(target_info_t *disk, int nid)
 {
 	status_block_t ps;
 
-	if (fd < 0) {
+	if (!disk || disk->d_fd < 0) {
 		errno = EINVAL;
 		return -1;
 	}
@@ -275,13 +276,13 @@ qd_read_print_status(int fd, int nid)
 		return -1;
 	}
 
-	if (qdisk_read(fd, qdisk_nodeid_offset(nid), &ps,
+	if (qdisk_read(disk, qdisk_nodeid_offset(nid, disk->d_blksz), &ps,
 			sizeof(ps)) < 0) {
 		printf("Error reading node ID block %d\n", nid);
 		return -1;
 	}
 	swab_status_block_t(&ps);
-	qd_print_status(&ps);
+	qd_print_status(disk, &ps);
 
 	return 0;
 }
@@ -339,6 +340,5 @@ qd_destroy(qd_ctx *ctx)
 		free(ctx->qc_device);
 		ctx->qc_device = NULL;
 	}
-	close(ctx->qc_fd);
-	ctx->qc_fd = -1;
+	qdisk_close(&ctx->qc_disk);
 }
diff --git a/cman/qdisk/main.c b/cman/qdisk/main.c
index cb20590..a29a5d0 100644
--- a/cman/qdisk/main.c
+++ b/cman/qdisk/main.c
@@ -147,7 +147,8 @@ read_node_blocks(qd_ctx *ctx, node_info_t *ni, int max)
 
 		sb = &ni[x].ni_status;
 
-		if (qdisk_read(ctx->qc_fd, qdisk_nodeid_offset(x+1),
+		if (qdisk_read(&ctx->qc_disk,
+			       qdisk_nodeid_offset(x+1, ctx->qc_disk.d_blksz),
 			       sb, sizeof(*sb)) < 0) {
 			clulog(LOG_WARNING,"Error reading node ID block %d\n",
 			       x+1);
@@ -462,12 +463,14 @@ quorum_init(qd_ctx *ctx, node_info_t *ni, int max, struct h_data *h, int maxh)
 	if (qdisk_validate(ctx->qc_device) < 0)
 		return -1;
 
-	ctx->qc_fd = qdisk_open(ctx->qc_device);
-	if (ctx->qc_fd < 0) {
+	if (qdisk_open(ctx->qc_device, &ctx->qc_disk) < 0) {
 		clulog(LOG_CRIT, "Failed to open %s: %s\n", ctx->qc_device,
 		       strerror(errno));
 		return -1;
 	}
+
+	clulog(LOG_DEBUG, "I/O Size: %d  Page Size: %d\n",
+	       ctx->qc_disk.d_blksz, ctx->qc_disk.d_pagesz);
 	
 	if (h && maxh) {
 		start_score_thread(ctx, h, maxh);
@@ -1210,14 +1213,30 @@ get_config_data(char *cluster_name, qd_ctx *ctx, struct h_data *h, int maxh,
 	}
 	if (ctx->qc_master_wait <= ctx->qc_tko_up)
 		ctx->qc_master_wait = ctx->qc_tko_up + 1;
-		
+
 	/* Get votes */
+
+	/* check if votes is set in cluster.conf */
 	snprintf(query, sizeof(query), "/cluster/quorumd/@votes");
 	if (ccs_get(ccsfd, query, &val) == 0) {
 		ctx->qc_votes = atoi(val);
 		free(val);
 		if (ctx->qc_votes < 0)
 			ctx->qc_votes = 0;
+	} else { /* if votes is not set, default to node_num - 1 */
+		int nodes = 0, error;
+		for (;;) {
+			error = ccs_get_list(ccsfd, "/cluster/clusternodes/child::*", &val);
+			if (error || !val)
+				break;
+
+			nodes++;
+		}
+		nodes--;
+		if (nodes < 0)
+			nodes = 0;
+
+		ctx->qc_votes = nodes;
 	}
 
 	/* Get device */
@@ -1286,6 +1305,15 @@ get_config_data(char *cluster_name, qd_ctx *ctx, struct h_data *h, int maxh,
 			ctx->qc_flags &= ~RF_REBOOT;
 		free(val);
 	}
+
+	/* Get cman_label */
+	snprintf(query, sizeof(query), "/cluster/quorumd/@cman_label");
+	if (ccs_get(ccsfd, query, &val) == 0) {
+		if (strlen(val) > 0) {
+			ctx->qc_flags |= RF_CMAN_LABEL;
+			ctx->qc_cman_label = val;
+		}
+	}
 	
 	/*
 	 * Get flag to see if we're supposed to kill cman if qdisk is not 
@@ -1348,8 +1376,9 @@ get_config_data(char *cluster_name, qd_ctx *ctx, struct h_data *h, int maxh,
 	*cfh = configure_heuristics(ccsfd, h, maxh);
 
 	clulog(LOG_DEBUG,
-	       "Quorum Daemon: %d heuristics, %d interval, %d tko, %d votes\n",
-	       *cfh, ctx->qc_interval, ctx->qc_tko, ctx->qc_votes);
+	       "Quorum Daemon: %d heuristics, %d interval, %d tko, %d votes,"
+	       " flags=%08x\n",
+	       *cfh, ctx->qc_interval, ctx->qc_tko, ctx->qc_votes, ctx->qc_flags);
 
 	ccs_disconnect(ccsfd);
 
@@ -1392,6 +1421,7 @@ main(int argc, char **argv)
 	char debug = 0, foreground = 0;
 	char device[128];
 	pid_t pid;
+	quorum_header_t qh;
 
 	if (check_process_running(argv[0], &pid) && pid !=getpid()) {
 		printf("QDisk services already running\n");
@@ -1494,13 +1524,24 @@ main(int argc, char **argv)
 		clulog(LOG_INFO, "Quorum Partition: %s Label: %s\n",
 		       ctx.qc_device, ctx.qc_label);
 	} else if (ctx.qc_device) {
-		if (check_device(ctx.qc_device, NULL, NULL) != 0) {
+		if (check_device(ctx.qc_device, NULL, &rv, &qh, 0) != 0) {
 			clulog(LOG_CRIT,
 			       "Specified partition %s does not have a "
 			       "qdisk label\n", ctx.qc_device);
 			check_stop_cman(&ctx);
 			return -1;
 		}
+
+		if (qh.qh_version == VERSION_MAGIC_V2 &&
+                    qh.qh_blksz != rv) {
+			clulog(LOG_CRIT,
+			       "Specified device %s does match kernel's "
+			       "reported sector size (%d != %d)\n",
+			       ctx.qc_device,
+			       ctx.qc_disk.d_blksz, rv);
+			check_stop_cman(&ctx);
+			return -1;
+		}
 	}
 
 	if (!foreground && !forked) {
@@ -1519,7 +1560,11 @@ main(int argc, char **argv)
 	if (!_running)
 		return 0;
 	
-	cman_register_quorum_device(ctx.qc_ch, ctx.qc_device, ctx.qc_votes);
+	cman_register_quorum_device(ctx.qc_ch,
+				    (ctx.qc_flags&RF_CMAN_LABEL)? 
+				        ctx.qc_cman_label:
+                                        ctx.qc_device,
+				    ctx.qc_votes);
 	/*
 		XXX this always returns -1 / EBUSY even when it works?!!!
 		
diff --git a/cman/qdisk/mkqdisk.c b/cman/qdisk/mkqdisk.c
index 76fa26d..057193a 100644
--- a/cman/qdisk/mkqdisk.c
+++ b/cman/qdisk/mkqdisk.c
@@ -39,7 +39,7 @@ main(int argc, char **argv)
 	char *newdev = NULL, *newlabel = NULL;
 	int rv;
 
-	printf("mkqdisk v0.5.1\n");
+	printf("mkqdisk v0.5.2\n");
 
 	while ((rv = getopt(argc, argv, "Lf:c:l:h")) != EOF) {
 		switch (rv) {
diff --git a/cman/qdisk/proc.c b/cman/qdisk/proc.c
index dc0cdf1..8868466 100644
--- a/cman/qdisk/proc.c
+++ b/cman/qdisk/proc.c
@@ -32,27 +32,33 @@
 
 
 int
-check_device(char *device, char *label, quorum_header_t *qh)
+check_device(char *device, char *label, int *ssz, quorum_header_t *qh,
+	     int flags)
 {
-	int fd = -1, ret = -1;
+	int ret = -1;
 	quorum_header_t qh_local;
+	target_info_t disk;
 
 	if (!qh)
 		qh = &qh_local;
 
-	fd = qdisk_validate(device);
-	if (fd < 0) {
+	ret = qdisk_validate(device);
+	if (ret < 0) {
 		perror("qdisk_verify");
 		return -1;
 	}
 
-	fd = qdisk_open(device);
-	if (fd < 0) {
+	ret = qdisk_open(device, &disk);
+	if (ret < 0) {
 		perror("qdisk_open");
 		return -1;
 	}
 
-	if (qdisk_read(fd, OFFSET_HEADER, qh, sizeof(*qh)) == sizeof(*qh)) {
+	if (ssz) 
+		*ssz = disk.d_blksz;
+
+	ret = -1;
+	if (qdisk_read(&disk, OFFSET_HEADER, qh, sizeof(*qh)) == sizeof(*qh)) {
 		swab_quorum_header_t(qh);
                 if (qh->qh_magic == HEADER_MAGIC_NUMBER) {
 			if (!label || !strcmp(qh->qh_cluster, label)) {
@@ -61,7 +67,14 @@ check_device(char *device, char *label, quorum_header_t *qh)
                 }
         }
 
-	qdisk_close(&fd);
+	/* only flag now is 'strict device check'; i.e.,
+	  "block size recorded must match kernel's reported size" */
+	if (flags && qh->qh_version == VERSION_MAGIC_V2 &&
+            disk.d_blksz != qh->qh_blksz) {
+		ret = -1;
+	}
+
+	qdisk_close(&disk);
 
 	return ret;
 }
@@ -78,6 +91,7 @@ find_partitions(const char *partfile, const char *label,
 	char device[128];
 	char realdev[256];
 	quorum_header_t qh;
+	int ssz;
 
 	fp = fopen(partfile, "r");
 	if (!fp)
@@ -96,16 +110,30 @@ find_partitions(const char *partfile, const char *label,
 		if (strlen(device)) {
 			snprintf(realdev, sizeof(realdev),
 				 "/dev/%s", device);
-			if (check_device(realdev, (char *)label, &qh) != 0)
+
+			/* If we're not "just printing", then 
+			   then reject devices which don't match
+			   the recorded sector size */
+			if (check_device(realdev, (char *)label, &ssz,
+					 &qh, !print) != 0)
 				continue;
 
 			if (print) {
 				printf("%s:\n", realdev);
-				printf("\tMagic:   %08x\n", qh.qh_magic);
-				printf("\tLabel:   %s\n", qh.qh_cluster);
-				printf("\tCreated: %s",
+				printf("\tMagic:                %08x\n", qh.qh_magic);
+				printf("\tLabel:                %s\n", qh.qh_cluster);
+				printf("\tCreated:              %s",
 				       ctime((time_t *)&qh.qh_timestamp));
-				printf("\tHost:    %s\n\n", qh.qh_updatehost);
+				printf("\tHost:                 %s\n", qh.qh_updatehost);
+				printf("\tKernel Sector Size:   %d\n", ssz);
+				if (qh.qh_version == VERSION_MAGIC_V2) {
+					printf("\tRecorded Sector Size: %d\n\n", (int)qh.qh_blksz);
+					if (qh.qh_blksz != ssz) {
+						printf("WARNING: Sector size mismatch: Header: %d  Kernel: %d\n",
+							(int)qh.qh_blksz, ssz);
+					}
+				} else
+					printf("\n");
 			}
 
 			if (devname && devlen) {
diff --git a/cman/qdisk/score.c b/cman/qdisk/score.c
index d8d7a8f..3f508a2 100644
--- a/cman/qdisk/score.c
+++ b/cman/qdisk/score.c
@@ -75,6 +75,24 @@ nullify(void)
 
 
 /**
+  Set all signal handlers to default for exec of a script.
+  ONLY do this after a fork().
+ */
+void
+restore_signals(void)
+{
+	sigset_t set;
+	int x;
+
+	for (x = 1; x < _NSIG; x++)
+		signal(x, SIG_DFL);
+
+	sigfillset(&set);
+	sigprocmask(SIG_UNBLOCK, &set, NULL);
+}
+
+
+/**
   Spin off a user-defined heuristic
  */
 static int
@@ -117,6 +135,7 @@ fork_heuristic(struct h_data *h)
 	 */
 	set_priority(SCHED_OTHER, -1);
 	munlockall();
+	restore_signals();
 
 	argv[0] = "/bin/sh";
 	argv[1] = "-c";


hooks/post-receive
--
Cluster Project


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2008-04-17 14:28 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-04-17 14:28 Cluster Project branch, STABLE, updated. rgmanager_1_9_66-13-g703bdf8 lhh

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).