public inbox for cluster-cvs@sourceware.org help / color / mirror / Atom feed
From: David Teigland <teigland@fedoraproject.org> To: cluster-cvs-relay@redhat.com Subject: master - fence_tool: new option to delay before join Date: Wed, 27 Aug 2008 19:27:00 -0000 [thread overview] Message-ID: <20080827191924.82CDA12036B@lists.fedorahosted.org> (raw) Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=809e1e9fa79b4bf003fc137b2a8291e709d03b89 Commit: 809e1e9fa79b4bf003fc137b2a8291e709d03b89 Parent: 32849ba0f7e022ca5de30d043de5fe8c8c7ab982 Author: David Teigland <teigland@redhat.com> AuthorDate: Wed Aug 27 14:08:07 2008 -0500 Committer: David Teigland <teigland@redhat.com> CommitterDate: Wed Aug 27 14:08:07 2008 -0500 fence_tool: new option to delay before join bz 460190 Certain network/switch settings cause nodes to form partitioned clusters when they start up. Add code to better cope with these initial partitions. The network partitions are a particular problem for two_node clusters where a node has quorum when it starts up on its own. This adds a new fence_tool option -m, e.g. fence_tool join -m <seconds>. It causes fence_tool to delay the join by up to <seconds> to allow all nodes in cluster.conf to become cluster members. This allows openais on the nodes to all see each other before starting the fence domain. So we join the domain *after* the nodes merge into a single cluster. If we joined the domain *before* the cluster partition merged, then nodes end up being fenced unnecessarily. (This is a similar idea to post_join_delay; a delay that gives us time to determine that a node in an unknown state is actually ok and doesn't require fencing.) Signed-off-by: David Teigland <teigland@redhat.com> --- fence/fence_tool/fence_tool.c | 169 +++++++++++++++++++++++++++++++++------- fence/man/fence_tool.8 | 7 +- 2 files changed, 144 insertions(+), 32 deletions(-) diff --git a/fence/fence_tool/fence_tool.c b/fence/fence_tool/fence_tool.c index 95f4ba1..8e4040b 100644 --- a/fence/fence_tool/fence_tool.c +++ b/fence/fence_tool/fence_tool.c @@ -27,20 +27,28 @@ #define DEFAULT_WAIT_TIMEOUT 300 /* five minutes */ -#define die(fmt, args...) \ -do { \ - fprintf(stderr, "%s: ", prog_name); \ - fprintf(stderr, fmt "\n", ##args); \ - exit(EXIT_FAILURE); \ -} while (0) +#define MAX_NODES 128 +int all_nodeids[MAX_NODES]; +int all_nodeids_count; +cman_node_t cman_nodes[MAX_NODES]; +int cman_nodes_count; +struct fenced_node nodes[MAX_NODES]; char *prog_name; int operation; int verbose = 0; int inquorate_fail = 0; int wait_join = 0; /* default: don't wait for join */ int wait_leave = 0; /* default: don't wait for leave */ -int wait_timeout = DEFAULT_WAIT_TIMEOUT; /* applies to all waits */ +int wait_members = 0; /* default: don't wait for members */ +int wait_timeout = DEFAULT_WAIT_TIMEOUT; + +#define die(fmt, args...) \ +do { \ + fprintf(stderr, "%s: ", prog_name); \ + fprintf(stderr, fmt "\n", ##args); \ + exit(EXIT_FAILURE); \ +} while (0) static int do_write(int fd, void *buf, size_t count) { @@ -116,7 +124,7 @@ static int we_are_in_fence_domain(void) return 0; } -static void do_wait(int joining) +static void wait_domain(int joining) { int in, tries = 0; @@ -144,10 +152,65 @@ static void do_wait(int joining) printf("Error %s the fence group.\n", joining ? "joining" : "leaving"); } -static void wait_quorum(void) +static void read_ccs_nodeids(int cd) +{ + char path[PATH_MAX]; + char *nodeid_str; + int i, error; + + memset(all_nodeids, 0, sizeof(all_nodeids)); + all_nodeids_count = 0; + + for (i = 1; ; i++) { + nodeid_str = NULL; + memset(path, 0, sizeof(path)); + sprintf(path, "/cluster/clusternodes/clusternode[%d]/@nodeid", i); + + error = ccs_get(cd, path, &nodeid_str); + if (error || !nodeid_str) + break; + + all_nodeids[all_nodeids_count++] = atoi(nodeid_str); + free(nodeid_str); + } +} + +static int all_nodeids_are_members(cman_handle_t ch) +{ + int i, j, rv, found; + + memset(&cman_nodes, 0, sizeof(cman_nodes)); + cman_nodes_count = 0; + + rv = cman_get_nodes(ch, MAX_NODES, &cman_nodes_count, cman_nodes); + if (rv < 0) { + printf("cman_get_nodes error %d %d\n", rv, errno); + return 0; + } + + for (i = 0; i < all_nodeids_count; i++) { + found = 0; + + for (j = 0; j < cman_nodes_count; j++) { + if (cman_nodes[j].cn_nodeid == all_nodeids[i] && + cman_nodes[j].cn_member) { + found = 1; + break; + } + } + + if (!found) + return 0; + } + return 1; +} + +static void wait_cman(void) { cman_handle_t ch; - int rv, try_init = 0, try_active = 0, try_quorate = 0; + int try_init = 0, try_active = 0, try_quorate = 0; + int try_ccs = 0, try_members = 0; + int rv, cd; while (1) { ch = cman_init(NULL); @@ -157,8 +220,11 @@ static void wait_quorum(void) if (inquorate_fail) goto fail; - if (try_init++ >= wait_timeout) - goto fail_err; + if (try_init++ >= wait_timeout) { + printf("%s: timed out waiting for cman init\n", + prog_name); + goto fail; + } if (!(try_init % 10)) printf("%s: waiting for cman to start\n", prog_name); @@ -174,12 +240,14 @@ static void wait_quorum(void) if (inquorate_fail) goto fail; - if (try_active++ >= wait_timeout) - goto fail_err; + if (try_active++ >= wait_timeout) { + printf("%s: timed out waiting for cman active\n", + prog_name); + goto fail; + } if (!(try_active % 10)) - printf("%s: waiting for cman to be active\n",prog_name); - + printf("%s: waiting for cman active\n", prog_name); sleep(1); } @@ -191,22 +259,61 @@ static void wait_quorum(void) if (inquorate_fail) goto fail; - if (try_quorate++ >= wait_timeout) - goto fail_err; + if (try_quorate++ >= wait_timeout) { + printf("%s: timed out waiting for cman quorum\n", + prog_name); + goto fail; + } if (!(try_quorate % 10)) - printf("%s: waiting for cluster quorum\n", prog_name); + printf("%s: waiting for cman quorum\n", prog_name); + + sleep(1); + } + + while (1) { + cd = ccs_connect(); + if (cd > 0) + break; + + if (try_ccs++ >= wait_timeout) { + printf("%s: timed out waiting for ccs connect\n", + prog_name); + goto fail; + } + + if (!(try_ccs % 10)) + printf("%s: waiting for ccs connect\n", prog_name); sleep(1); } + if (!wait_members) + goto out; + read_ccs_nodeids(cd); + + while (1) { + rv = all_nodeids_are_members(ch); + if (rv) + break; + + if (try_members++ >= wait_members) + break; + + if (!(try_members % 10)) + printf("%s: waiting for all %d nodes to be members\n", + prog_name, all_nodeids_count); + sleep(1); + } + + out: + ccs_disconnect(cd); cman_finish(ch); return; - fail_err: - printf("%s: Timed out waiting for cluster quorum to form.\n", - prog_name); fail: + if (ch) + cman_finish(ch); exit(EXIT_FAILURE); } @@ -214,14 +321,14 @@ static void do_join(int argc, char *argv[]) { int rv; - wait_quorum(); + wait_cman(); rv = fenced_join(); if (rv < 0) die("can't communicate with fenced"); if (wait_join) - do_wait(1); + wait_domain(1); exit(EXIT_SUCCESS); } @@ -237,7 +344,7 @@ static void do_leave(void) die("can't communicate with fenced"); if (wait_leave) - do_wait(0); + wait_domain(0); exit(EXIT_SUCCESS); } @@ -264,10 +371,6 @@ static int node_compare(const void *va, const void *vb) return a->nodeid - b->nodeid; } -#define MAX_NODES 128 - -struct fenced_node nodes[MAX_NODES]; - static int do_list(void) { struct fenced_domain d; @@ -346,6 +449,8 @@ static void print_usage(void) printf(" dump Dump debug buffer from fenced\n"); printf("\n"); printf("Options:\n"); + printf(" -m <seconds> Delay join up to <seconds> for all nodes in cluster.conf\n"); + printf(" to be cluster members\n"); printf(" -w Wait for join or leave to complete\n"); printf(" -t <seconds> Maximum time in seconds to wait (default %d)\n", DEFAULT_WAIT_TIMEOUT); printf(" -Q Fail if cluster is not quorate, don't wait\n"); @@ -354,7 +459,7 @@ static void print_usage(void) printf("\n"); } -#define OPTION_STRING "vVht:wQ" +#define OPTION_STRING "vVht:wQm:" static void decode_arguments(int argc, char *argv[]) { @@ -391,6 +496,10 @@ static void decode_arguments(int argc, char *argv[]) wait_leave = 1; break; + case 'm': + wait_members = atoi(optarg); + break; + case 't': wait_timeout = get_int_arg(optchar, optarg); break; diff --git a/fence/man/fence_tool.8 b/fence/man/fence_tool.8 index a83da94..625fbe0 100644 --- a/fence/man/fence_tool.8 +++ b/fence/man/fence_tool.8 @@ -20,6 +20,9 @@ it to stdout. .SH OPTIONS .TP +\fB-m\fP <n> +Delay join up to n seconds for all nodes in cluster.conf to be cluster members. +.TP \fB-w\fP Wait until the join or leave is completed. .TP @@ -29,8 +32,8 @@ Help. Print out the usage syntax. \fB-V\fP Print version information. .TP -\fB-t\fP -Maximum time in seconds to wait (default: 300 seconds) +\fB-t\fP <n> +Maximum time in seconds to wait for quorum or -w (default: 300 seconds) .TP \fB-Q\fP Fail command immediately if the cluster is not quorate, don't wait.
reply other threads:[~2008-08-27 19:20 UTC|newest] Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20080827191924.82CDA12036B@lists.fedorahosted.org \ --to=teigland@fedoraproject.org \ --cc=cluster-cvs-relay@redhat.com \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).