public inbox for cluster-cvs@sourceware.org
help / color / mirror / Atom feed
* master - fence_tool: new option to delay before join
@ 2008-08-27 19:27 David Teigland
0 siblings, 0 replies; only message in thread
From: David Teigland @ 2008-08-27 19:27 UTC (permalink / raw)
To: cluster-cvs-relay
Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=809e1e9fa79b4bf003fc137b2a8291e709d03b89
Commit: 809e1e9fa79b4bf003fc137b2a8291e709d03b89
Parent: 32849ba0f7e022ca5de30d043de5fe8c8c7ab982
Author: David Teigland <teigland@redhat.com>
AuthorDate: Wed Aug 27 14:08:07 2008 -0500
Committer: David Teigland <teigland@redhat.com>
CommitterDate: Wed Aug 27 14:08:07 2008 -0500
fence_tool: new option to delay before join
bz 460190
Certain network/switch settings cause nodes to form partitioned clusters
when they start up. Add code to better cope with these initial partitions.
The network partitions are a particular problem for two_node clusters where
a node has quorum when it starts up on its own.
This adds a new fence_tool option -m, e.g. fence_tool join -m <seconds>.
It causes fence_tool to delay the join by up to <seconds> to allow all
nodes in cluster.conf to become cluster members.
This allows openais on the nodes to all see each other before starting
the fence domain. So we join the domain *after* the nodes merge into a
single cluster. If we joined the domain *before* the cluster partition
merged, then nodes end up being fenced unnecessarily. (This is a similar
idea to post_join_delay; a delay that gives us time to determine that a
node in an unknown state is actually ok and doesn't require fencing.)
Signed-off-by: David Teigland <teigland@redhat.com>
---
fence/fence_tool/fence_tool.c | 169 +++++++++++++++++++++++++++++++++-------
fence/man/fence_tool.8 | 7 +-
2 files changed, 144 insertions(+), 32 deletions(-)
diff --git a/fence/fence_tool/fence_tool.c b/fence/fence_tool/fence_tool.c
index 95f4ba1..8e4040b 100644
--- a/fence/fence_tool/fence_tool.c
+++ b/fence/fence_tool/fence_tool.c
@@ -27,20 +27,28 @@
#define DEFAULT_WAIT_TIMEOUT 300 /* five minutes */
-#define die(fmt, args...) \
-do { \
- fprintf(stderr, "%s: ", prog_name); \
- fprintf(stderr, fmt "\n", ##args); \
- exit(EXIT_FAILURE); \
-} while (0)
+#define MAX_NODES 128
+int all_nodeids[MAX_NODES];
+int all_nodeids_count;
+cman_node_t cman_nodes[MAX_NODES];
+int cman_nodes_count;
+struct fenced_node nodes[MAX_NODES];
char *prog_name;
int operation;
int verbose = 0;
int inquorate_fail = 0;
int wait_join = 0; /* default: don't wait for join */
int wait_leave = 0; /* default: don't wait for leave */
-int wait_timeout = DEFAULT_WAIT_TIMEOUT; /* applies to all waits */
+int wait_members = 0; /* default: don't wait for members */
+int wait_timeout = DEFAULT_WAIT_TIMEOUT;
+
+#define die(fmt, args...) \
+do { \
+ fprintf(stderr, "%s: ", prog_name); \
+ fprintf(stderr, fmt "\n", ##args); \
+ exit(EXIT_FAILURE); \
+} while (0)
static int do_write(int fd, void *buf, size_t count)
{
@@ -116,7 +124,7 @@ static int we_are_in_fence_domain(void)
return 0;
}
-static void do_wait(int joining)
+static void wait_domain(int joining)
{
int in, tries = 0;
@@ -144,10 +152,65 @@ static void do_wait(int joining)
printf("Error %s the fence group.\n", joining ? "joining" : "leaving");
}
-static void wait_quorum(void)
+static void read_ccs_nodeids(int cd)
+{
+ char path[PATH_MAX];
+ char *nodeid_str;
+ int i, error;
+
+ memset(all_nodeids, 0, sizeof(all_nodeids));
+ all_nodeids_count = 0;
+
+ for (i = 1; ; i++) {
+ nodeid_str = NULL;
+ memset(path, 0, sizeof(path));
+ sprintf(path, "/cluster/clusternodes/clusternode[%d]/@nodeid", i);
+
+ error = ccs_get(cd, path, &nodeid_str);
+ if (error || !nodeid_str)
+ break;
+
+ all_nodeids[all_nodeids_count++] = atoi(nodeid_str);
+ free(nodeid_str);
+ }
+}
+
+static int all_nodeids_are_members(cman_handle_t ch)
+{
+ int i, j, rv, found;
+
+ memset(&cman_nodes, 0, sizeof(cman_nodes));
+ cman_nodes_count = 0;
+
+ rv = cman_get_nodes(ch, MAX_NODES, &cman_nodes_count, cman_nodes);
+ if (rv < 0) {
+ printf("cman_get_nodes error %d %d\n", rv, errno);
+ return 0;
+ }
+
+ for (i = 0; i < all_nodeids_count; i++) {
+ found = 0;
+
+ for (j = 0; j < cman_nodes_count; j++) {
+ if (cman_nodes[j].cn_nodeid == all_nodeids[i] &&
+ cman_nodes[j].cn_member) {
+ found = 1;
+ break;
+ }
+ }
+
+ if (!found)
+ return 0;
+ }
+ return 1;
+}
+
+static void wait_cman(void)
{
cman_handle_t ch;
- int rv, try_init = 0, try_active = 0, try_quorate = 0;
+ int try_init = 0, try_active = 0, try_quorate = 0;
+ int try_ccs = 0, try_members = 0;
+ int rv, cd;
while (1) {
ch = cman_init(NULL);
@@ -157,8 +220,11 @@ static void wait_quorum(void)
if (inquorate_fail)
goto fail;
- if (try_init++ >= wait_timeout)
- goto fail_err;
+ if (try_init++ >= wait_timeout) {
+ printf("%s: timed out waiting for cman init\n",
+ prog_name);
+ goto fail;
+ }
if (!(try_init % 10))
printf("%s: waiting for cman to start\n", prog_name);
@@ -174,12 +240,14 @@ static void wait_quorum(void)
if (inquorate_fail)
goto fail;
- if (try_active++ >= wait_timeout)
- goto fail_err;
+ if (try_active++ >= wait_timeout) {
+ printf("%s: timed out waiting for cman active\n",
+ prog_name);
+ goto fail;
+ }
if (!(try_active % 10))
- printf("%s: waiting for cman to be active\n",prog_name);
-
+ printf("%s: waiting for cman active\n", prog_name);
sleep(1);
}
@@ -191,22 +259,61 @@ static void wait_quorum(void)
if (inquorate_fail)
goto fail;
- if (try_quorate++ >= wait_timeout)
- goto fail_err;
+ if (try_quorate++ >= wait_timeout) {
+ printf("%s: timed out waiting for cman quorum\n",
+ prog_name);
+ goto fail;
+ }
if (!(try_quorate % 10))
- printf("%s: waiting for cluster quorum\n", prog_name);
+ printf("%s: waiting for cman quorum\n", prog_name);
+
+ sleep(1);
+ }
+
+ while (1) {
+ cd = ccs_connect();
+ if (cd > 0)
+ break;
+
+ if (try_ccs++ >= wait_timeout) {
+ printf("%s: timed out waiting for ccs connect\n",
+ prog_name);
+ goto fail;
+ }
+
+ if (!(try_ccs % 10))
+ printf("%s: waiting for ccs connect\n", prog_name);
sleep(1);
}
+ if (!wait_members)
+ goto out;
+ read_ccs_nodeids(cd);
+
+ while (1) {
+ rv = all_nodeids_are_members(ch);
+ if (rv)
+ break;
+
+ if (try_members++ >= wait_members)
+ break;
+
+ if (!(try_members % 10))
+ printf("%s: waiting for all %d nodes to be members\n",
+ prog_name, all_nodeids_count);
+ sleep(1);
+ }
+
+ out:
+ ccs_disconnect(cd);
cman_finish(ch);
return;
- fail_err:
- printf("%s: Timed out waiting for cluster quorum to form.\n",
- prog_name);
fail:
+ if (ch)
+ cman_finish(ch);
exit(EXIT_FAILURE);
}
@@ -214,14 +321,14 @@ static void do_join(int argc, char *argv[])
{
int rv;
- wait_quorum();
+ wait_cman();
rv = fenced_join();
if (rv < 0)
die("can't communicate with fenced");
if (wait_join)
- do_wait(1);
+ wait_domain(1);
exit(EXIT_SUCCESS);
}
@@ -237,7 +344,7 @@ static void do_leave(void)
die("can't communicate with fenced");
if (wait_leave)
- do_wait(0);
+ wait_domain(0);
exit(EXIT_SUCCESS);
}
@@ -264,10 +371,6 @@ static int node_compare(const void *va, const void *vb)
return a->nodeid - b->nodeid;
}
-#define MAX_NODES 128
-
-struct fenced_node nodes[MAX_NODES];
-
static int do_list(void)
{
struct fenced_domain d;
@@ -346,6 +449,8 @@ static void print_usage(void)
printf(" dump Dump debug buffer from fenced\n");
printf("\n");
printf("Options:\n");
+ printf(" -m <seconds> Delay join up to <seconds> for all nodes in cluster.conf\n");
+ printf(" to be cluster members\n");
printf(" -w Wait for join or leave to complete\n");
printf(" -t <seconds> Maximum time in seconds to wait (default %d)\n", DEFAULT_WAIT_TIMEOUT);
printf(" -Q Fail if cluster is not quorate, don't wait\n");
@@ -354,7 +459,7 @@ static void print_usage(void)
printf("\n");
}
-#define OPTION_STRING "vVht:wQ"
+#define OPTION_STRING "vVht:wQm:"
static void decode_arguments(int argc, char *argv[])
{
@@ -391,6 +496,10 @@ static void decode_arguments(int argc, char *argv[])
wait_leave = 1;
break;
+ case 'm':
+ wait_members = atoi(optarg);
+ break;
+
case 't':
wait_timeout = get_int_arg(optchar, optarg);
break;
diff --git a/fence/man/fence_tool.8 b/fence/man/fence_tool.8
index a83da94..625fbe0 100644
--- a/fence/man/fence_tool.8
+++ b/fence/man/fence_tool.8
@@ -20,6 +20,9 @@ it to stdout.
.SH OPTIONS
.TP
+\fB-m\fP <n>
+Delay join up to n seconds for all nodes in cluster.conf to be cluster members.
+.TP
\fB-w\fP
Wait until the join or leave is completed.
.TP
@@ -29,8 +32,8 @@ Help. Print out the usage syntax.
\fB-V\fP
Print version information.
.TP
-\fB-t\fP
-Maximum time in seconds to wait (default: 300 seconds)
+\fB-t\fP <n>
+Maximum time in seconds to wait for quorum or -w (default: 300 seconds)
.TP
\fB-Q\fP
Fail command immediately if the cluster is not quorate, don't wait.
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2008-08-27 19:20 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-08-27 19:27 master - fence_tool: new option to delay before join David Teigland
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).