public inbox for cluster-cvs@sourceware.org
help / color / mirror / Atom feed
* cluster: RHEL5 - qdisk: Add reporting for I/O hangs to quourm disk
@ 2009-05-13 15:14 Lon Hohberger
0 siblings, 0 replies; only message in thread
From: Lon Hohberger @ 2009-05-13 15:14 UTC (permalink / raw)
To: cluster-cvs-relay
Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=83a61282601bff7dd26e8bcf4ebd4b1f38d6e25c
Commit: 83a61282601bff7dd26e8bcf4ebd4b1f38d6e25c
Parent: bb1e50295e8dcf36d7ce9ad22196fd7f89fba899
Author: Lon Hohberger <lhh@redhat.com>
AuthorDate: Fri May 8 13:23:04 2009 -0400
Committer: Lon Hohberger <lhh@redhat.com>
CommitterDate: Wed May 13 11:13:56 2009 -0400
qdisk: Add reporting for I/O hangs to quourm disk
Signed-off-by: Lon Hohberger <lhh@redhat.com>
---
cman/qdisk/Makefile | 10 ++--
cman/qdisk/disk.c | 21 +++++++-
cman/qdisk/iostate.c | 142 ++++++++++++++++++++++++++++++++++++++++++++++++++
cman/qdisk/iostate.h | 17 ++++++
cman/qdisk/main.c | 6 ++
5 files changed, 189 insertions(+), 7 deletions(-)
diff --git a/cman/qdisk/Makefile b/cman/qdisk/Makefile
index 23d0890..f58806b 100644
--- a/cman/qdisk/Makefile
+++ b/cman/qdisk/Makefile
@@ -28,12 +28,12 @@ install: all
install ${TARGET} ${sbindir}
qdiskd: disk.o crc32.o disk_util.o main.o score.o bitmap.o clulog.o \
- gettid.o proc.o daemon_init.o scandisk.o ../lib/libcman.a
- gcc -o $@ $^ -lpthread -L../lib -L${ccslibdir} -lccs
+ gettid.o proc.o daemon_init.o scandisk.o iostate.o ../lib/libcman.a
+ gcc -o $@ $^ -lpthread -L../lib -L${ccslibdir} -lccs -lrt
-mkqdisk: disk.o crc32.o disk_util.o \
- proc.o mkqdisk.o scandisk.o
- gcc -o $@ $^
+mkqdisk: disk.o crc32.o disk_util.o iostate.o \
+ proc.o mkqdisk.o scandisk.o clulog.o gettid.o
+ gcc -o $@ $^ -lrt
%.o: %.c
$(CC) -c -o $@ $^ $(INCLUDES) $(CFLAGS)
diff --git a/cman/qdisk/disk.c b/cman/qdisk/disk.c
index 8cf7b5a..6771e06 100644
--- a/cman/qdisk/disk.c
+++ b/cman/qdisk/disk.c
@@ -44,6 +44,7 @@
#include <unistd.h>
#include <time.h>
#include <linux/fs.h>
+#include "iostate.h"
static int diskRawRead(target_info_t *disk, char *buf, int len);
uint32_t clu_crc32(const char *data, size_t count);
@@ -236,7 +237,9 @@ qdisk_open(char *name, target_info_t *disk)
disk->d_pagesz = sysconf(_SC_PAGESIZE);
/* Check to verify that the partition is large enough.*/
+ io_state(STATE_LSEEK);
ret = lseek(disk->d_fd, END_OF_DISK(disk->d_blksz), SEEK_SET);
+ io_state(STATE_NONE);
if (ret < 0) {
perror("open_partition: seek");
close(disk->d_fd);
@@ -340,7 +343,9 @@ diskRawReadShadow(target_info_t *disk, off_t readOffset, char *buf, int len)
char *data;
int datalen;
+ io_state(STATE_LSEEK);
ret = lseek(disk->d_fd, readOffset, SEEK_SET);
+ io_state(STATE_NONE);
if (ret != readOffset) {
#if 0
fprintf(stderr,
@@ -405,7 +410,10 @@ diskRawRead(target_info_t *disk, char *buf, int len)
if (bounceNeeded == 0) {
/* Already aligned and even multiple of 512, no bounceio
* required. */
- return (read(disk->d_fd, buf, len));
+ io_state(STATE_READ);
+ readret = read(disk->d_fd, buf, len);
+ io_state(STATE_NONE);
+ return readret;
}
if (len > disk->d_blksz) {
@@ -434,7 +442,9 @@ diskRawRead(target_info_t *disk, char *buf, int len)
return -1;
}
+ io_state(STATE_READ);
readret = read(disk->d_fd, alignedBuf, readlen);
+ io_state(STATE_NONE);
if (readret > 0) {
if (readret > len) {
memcpy(alignedBuf, buf, len);
@@ -477,7 +487,10 @@ diskRawWrite(target_info_t *disk, char *buf, int len)
if (bounceNeeded == 0) {
/* Already aligned and even multiple of 512, no bounceio
* required. */
- return (write(disk->d_fd, buf, len));
+ io_state(STATE_WRITE);
+ ret = write(disk->d_fd, buf, len);
+ io_state(STATE_NONE);
+ return ret;
}
if (len > disk->d_blksz) {
@@ -514,7 +527,9 @@ diskRawWrite(target_info_t *disk, char *buf, int len)
}
memcpy(buf, alignedBuf, len);
+ io_state(STATE_WRITE);
ret = write(disk->d_fd, alignedBuf, writelen);
+ io_state(STATE_NONE);
if (ret > len) {
ret = len;
}
@@ -542,7 +557,9 @@ diskRawWriteShadow(target_info_t *disk, __off64_t writeOffset, char *buf, int le
return (-1);
}
+ io_state(STATE_LSEEK);
retval_seek = lseek(disk->d_fd, writeOffset, SEEK_SET);
+ io_state(STATE_NONE);
if (retval_seek != writeOffset) {
fprintf(stderr,
"diskRawWriteShadow: can't seek to offset %d\n",
diff --git a/cman/qdisk/iostate.c b/cman/qdisk/iostate.c
new file mode 100644
index 0000000..f4f2329
--- /dev/null
+++ b/cman/qdisk/iostate.c
@@ -0,0 +1,142 @@
+#include <pthread.h>
+#include <iostate.h>
+#include <unistd.h>
+#include <time.h>
+#include <sys/time.h>
+#include <clulog.h>
+#include "iostate.h"
+
+static iostate_t main_state = 0;
+static int main_incarnation = 0;
+static int qdisk_timeout = 0, sleeptime = 0;
+static int thread_active = 0;
+static pthread_t io_nanny_tid = 0;
+static pthread_mutex_t state_mutex = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t state_cond = PTHREAD_COND_INITIALIZER;
+
+struct state_table {
+ iostate_t state;
+ const char *value;
+};
+
+static struct state_table io_state_table[] = {
+{ STATE_NONE, "none" },
+{ STATE_WRITE, "write" },
+{ STATE_READ, "read" },
+{ STATE_LSEEK, "seek" },
+{ -1, NULL } };
+
+static const char *
+state_to_string(iostate_t state)
+{
+ static const char *ret = "unknown";
+ int i;
+
+ for (i=0; io_state_table[i].value; i++) {
+ if (io_state_table[i].state == state) {
+ ret = io_state_table[i].value;
+ break;
+ }
+ }
+
+ return ret;
+}
+
+
+void
+io_state(iostate_t state)
+{
+ pthread_mutex_lock(&state_mutex);
+ main_state = state;
+ main_incarnation++; /* it does not matter if this wraps. */
+ pthread_mutex_unlock(&state_mutex);
+
+ /* Optimization: Don't signal on STATE_NONE */
+ if (state != STATE_NONE)
+ pthread_cond_broadcast(&state_cond);
+}
+
+
+static void *
+io_nanny_thread(void *arg)
+{
+ struct timespec wait_time;
+ iostate_t last_main_state = 0, current_main_state = 0;
+ int last_main_incarnation = 0, current_main_incarnation = 0;
+ int logged_incarnation = 0;
+
+ /* Start with wherever we're at now */
+ pthread_mutex_lock(&state_mutex);
+ current_main_state = last_main_state = main_state;
+ current_main_incarnation = last_main_incarnation = main_incarnation;
+ pthread_mutex_unlock(&state_mutex);
+
+ while (thread_active) {
+ pthread_mutex_lock(&state_mutex);
+ clock_gettime(CLOCK_REALTIME, &wait_time);
+ wait_time.tv_sec += sleeptime;
+ pthread_cond_timedwait(&state_cond, &state_mutex, &wait_time);
+ current_main_state = main_state;
+ current_main_incarnation = main_incarnation;
+ pthread_mutex_unlock(&state_mutex);
+
+ if (!thread_active)
+ break;
+
+ if (!current_main_state)
+ continue;
+
+ /* if the state or incarnation changed, the main qdiskd
+ * thread is healthy */
+ if (current_main_state != last_main_state ||
+ current_main_incarnation != last_main_incarnation) {
+ last_main_state = current_main_state;
+ last_main_incarnation = current_main_incarnation;
+ continue;
+ }
+
+ /* Don't log things twice */
+ if (logged_incarnation == current_main_incarnation)
+ continue;
+ logged_incarnation = current_main_incarnation;
+
+ clulog(LOG_WARNING, "qdiskd: %s "
+ "(system call) has hung for %d seconds\n",
+ state_to_string(current_main_state), sleeptime);
+ clulog(LOG_WARNING,
+ "In %d more seconds, we will be evicted\n",
+ (qdisk_timeout-sleeptime));
+ }
+
+ return NULL;
+}
+
+
+int
+io_nanny_start(int timeout)
+{
+ int ret;
+
+ pthread_mutex_lock(&state_mutex);
+
+ sleeptime = timeout / 2;
+ qdisk_timeout = timeout;
+ thread_active = 1;
+
+ ret = pthread_create(&io_nanny_tid, NULL, io_nanny_thread, NULL);
+ pthread_mutex_unlock(&state_mutex);
+
+ return ret;
+}
+
+
+int
+io_nanny_stop(void)
+{
+ thread_active = 0;
+ pthread_cond_broadcast(&state_cond);
+ pthread_join(io_nanny_tid, NULL);
+ io_nanny_tid = 0;
+
+ return 0;
+}
diff --git a/cman/qdisk/iostate.h b/cman/qdisk/iostate.h
new file mode 100644
index 0000000..7dd7bf6
--- /dev/null
+++ b/cman/qdisk/iostate.h
@@ -0,0 +1,17 @@
+#ifndef _IOSTATE_H
+#define _IOSTATE_H
+
+typedef enum {
+ STATE_NONE = 0,
+ STATE_READ = 1,
+ STATE_WRITE = 2,
+ STATE_LSEEK = 3,
+ STATE_UNKNOWN = 4
+} iostate_t;
+
+void io_state(iostate_t state);
+
+int io_nanny_start(int timeout);
+int io_nanny_stop(void);
+
+#endif
diff --git a/cman/qdisk/main.c b/cman/qdisk/main.c
index e235883..090c71e 100644
--- a/cman/qdisk/main.c
+++ b/cman/qdisk/main.c
@@ -43,6 +43,7 @@
#include <ccs.h>
#include "score.h"
#include "clulog.h"
+#include "iostate.h"
#if (!defined(LIBCMAN_VERSION) || \
(defined(LIBCMAN_VERSION) && LIBCMAN_VERSION < 2))
#include <cluster/cnxman-socket.h>
@@ -1592,9 +1593,14 @@ main(int argc, char **argv)
goto out;
}
*/
+
+ io_nanny_start(ctx.qc_tko * ctx.qc_interval);
+
if (quorum_loop(&ctx, ni, MAX_NODES_DISK) == 0)
cman_unregister_quorum_device(ctx.qc_ch);
+ io_nanny_stop();
+
quorum_logout(&ctx);
/* free cman handle to avoid leak in cman */
out:
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2009-05-13 15:14 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-05-13 15:14 cluster: RHEL5 - qdisk: Add reporting for I/O hangs to quourm disk Lon Hohberger
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).