public inbox for cluster-cvs@sourceware.org
help / color / mirror / Atom feed
* RHEL5 - fence_tool: new option to delay before join
@ 2008-08-27 16:02 David Teigland
0 siblings, 0 replies; only message in thread
From: David Teigland @ 2008-08-27 16:02 UTC (permalink / raw)
To: cluster-cvs-relay
Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=5ea416d26ec2b6bf605c573a5173736d0f8cd27c
Commit: 5ea416d26ec2b6bf605c573a5173736d0f8cd27c
Parent: 34db56d4f5ea7428c02b072a742c40ca9c574f1c
Author: David Teigland <teigland@redhat.com>
AuthorDate: Tue Aug 26 15:50:49 2008 -0500
Committer: David Teigland <teigland@redhat.com>
CommitterDate: Wed Aug 27 10:51:04 2008 -0500
fence_tool: new option to delay before join
bz 460190
Certain network/switch settings cause nodes to form partitioned clusters
when they start up. Add code to better cope with these initial partitions.
The network partitions are a particular problem for two_node clusters where
a node has quorum when it starts up on its own.
This adds a new fence_tool option -m, e.g. fence_tool join -m <seconds>.
It causes fence_tool to delay the join by up to <seconds> to allow all
nodes in cluster.conf to become cluster members.
This allows openais on the nodes to all see each other before starting
the fence domain. So we join the domain *after* the nodes merge into a
single cluster. If we joined the domain *before* the cluster partition
merged, then nodes end up being fenced unnecessarily. (This is a similar
idea to post_join_delay; a delay that gives us time to determine that a
node in an unknown state is actually ok and doesn't require fencing.)
Signed-off-by: David Teigland <teigland@redhat.com>
---
fence/fence_tool/fence_tool.c | 93 ++++++++++++++++++++++++++++++++++++++++-
fence/man/fence_tool.8 | 7 ++-
2 files changed, 96 insertions(+), 4 deletions(-)
diff --git a/fence/fence_tool/fence_tool.c b/fence/fence_tool/fence_tool.c
index a6b002a..0b7ea62 100644
--- a/fence/fence_tool/fence_tool.c
+++ b/fence/fence_tool/fence_tool.c
@@ -37,10 +37,12 @@
#define FALSE 0
#endif
-#define OPTION_STRING ("Vht:wQ")
+#define OPTION_STRING ("Vht:m:wQ")
#define FENCED_SOCK_PATH "fenced_socket"
#define MAXLINE 256
+#define MAX_NODES 128
+
#define OP_JOIN 1
#define OP_LEAVE 2
#define OP_WAIT 3
@@ -63,9 +65,15 @@ char *prog_name;
int operation;
int child_wait = FALSE;
int quorum_wait = TRUE;
+int member_wait = 0;
int fenced_start_timeout = 300; /* five minutes */
int signalled = 0;
cman_handle_t ch;
+int all_nodeids[MAX_NODES];
+int all_nodeids_count;
+cman_node_t cman_nodes[MAX_NODES];
+int cman_nodes_count;
+
static int do_write(int fd, void *buf, size_t count)
{
@@ -245,6 +253,77 @@ static int do_wait(int joining)
return -1;
}
+static int all_nodeids_are_members(void)
+{
+ int i, j, rv, found;
+
+ cman_nodes_count = 0;
+ memset(&cman_nodes, 0, sizeof(cman_nodes));
+
+ rv = cman_get_nodes(ch, MAX_NODES, &cman_nodes_count, cman_nodes);
+ if (rv < 0) {
+ printf("cman_get_nodes error %d %d\n", rv, errno);
+ return 0;
+ }
+
+ for (i = 0; i < all_nodeids_count; i++) {
+ found = 0;
+
+ for (j = 0; j < cman_nodes_count; j++) {
+ if (cman_nodes[j].cn_nodeid == all_nodeids[i] &&
+ cman_nodes[j].cn_member) {
+ found = 1;
+ break;
+ }
+ }
+
+ if (!found)
+ return 0;
+ }
+ return 1;
+}
+
+static void wait_for_members(void)
+{
+ char path[256];
+ char *nodeid_str;
+ int i = 0, cd, error;
+
+ while ((cd = ccs_connect()) < 0) {
+ sleep(1);
+ if (++i > 9 && !(i % 10))
+ printf("connect to ccs error %d %d\n", cd, errno);
+ }
+
+ memset(all_nodeids, 0, sizeof(all_nodeids));
+ all_nodeids_count = 0;
+
+ for (i = 1; ; i++) {
+ nodeid_str = NULL;
+ memset(path, 0, 256);
+ sprintf(path, "/cluster/clusternodes/clusternode[%d]/@nodeid", i);
+
+ error = ccs_get(cd, path, &nodeid_str);
+ if (error || !nodeid_str)
+ break;
+
+ all_nodeids[all_nodeids_count++] = atoi(nodeid_str);
+ free(nodeid_str);
+ }
+
+ ccs_disconnect(cd);
+
+ for (i = 0; i < member_wait; i++) {
+ if (all_nodeids_are_members())
+ break;
+ if (i && !(i % 5))
+ printf("Waiting for all %d nodes to be members\n",
+ all_nodeids_count);
+ sleep(1);
+ }
+
+}
+
static int do_join(int argc, char *argv[])
{
int i, fd, rv;
@@ -264,6 +343,10 @@ static int do_join(int argc, char *argv[])
cman_finish(ch);
return EXIT_FAILURE;
}
+
+ if (member_wait)
+ wait_for_members();
+
cman_finish(ch);
i = 0;
@@ -361,10 +444,12 @@ static void print_usage(void)
printf(" dump Dump debug buffer from fenced\n");
printf("\n");
printf("Options:\n");
+ printf(" -m <n> Delay join up to n seconds for all nodes in cluster.conf\n");
+ printf(" to be cluster members\n");
printf(" -w Wait for join to complete\n");
printf(" -V Print program version information, then exit\n");
printf(" -h Print this help, then exit\n");
- printf(" -t Maximum time in seconds to wait\n");
+ printf(" -t <n> Maximum time in seconds to wait\n");
printf(" -Q Fail if cluster is not quorate, don't wait\n");
printf("\n");
}
@@ -399,6 +484,10 @@ static void decode_arguments(int argc, char *argv[])
child_wait = TRUE;
break;
+ case 'm':
+ member_wait = atoi(optarg);
+ break;
+
case ':':
case '?':
fprintf(stderr, "Please use '-h' for usage.\n");
diff --git a/fence/man/fence_tool.8 b/fence/man/fence_tool.8
index a7ad0c4..7477f6f 100644
--- a/fence/man/fence_tool.8
+++ b/fence/man/fence_tool.8
@@ -27,6 +27,9 @@ it to stdout.
.SH OPTIONS
.TP
+\fB-m\fP <n>
+Delay join up to n seconds for all nodes in cluster.conf to be cluster members.
+.TP
\fB-w\fP
Wait until the join or leave is completed.
.TP
@@ -36,8 +39,8 @@ Help. Print out the usage syntax.
\fB-V\fP
Print version information.
.TP
-\fB-t\fP
-Maximum time in seconds to wait (default: 300 seconds)
+\fB-t\fP <n>
+Maximum time in seconds to wait for quorum or -w (default: 300 seconds)
.TP
\fB-Q\fP
Fail command immediately if the cluster is not quorate, don't wait.
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2008-08-27 16:02 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-08-27 16:02 RHEL5 - fence_tool: new option to delay before join David Teigland
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).