From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 31178 invoked by alias); 27 Aug 2008 16:02:06 -0000 Received: (qmail 31169 invoked by alias); 27 Aug 2008 16:02:05 -0000 X-Spam-Status: No, hits=-0.4 required=5.0 tests=AWL,BAYES_05,J_CHICKENPOX_64,J_CHICKENPOX_74,KAM_MX,SPF_HELO_PASS X-Spam-Check-By: sourceware.org X-Spam-Checker-Version: SpamAssassin 3.2.4 (2008-01-01) on bastion.fedora.phx.redhat.com X-Spam-Level: Subject: RHEL5 - fence_tool: new option to delay before join To: cluster-cvs-relay@redhat.com X-Project: Cluster Project X-Git-Module: cluster.git X-Git-Refname: refs/heads/RHEL5 X-Git-Reftype: branch X-Git-Oldrev: 34db56d4f5ea7428c02b072a742c40ca9c574f1c X-Git-Newrev: 5ea416d26ec2b6bf605c573a5173736d0f8cd27c From: David Teigland Message-Id: <20080827160100.B426112036B@lists.fedorahosted.org> Date: Wed, 27 Aug 2008 16:02:00 -0000 X-Scanned-By: MIMEDefang 2.58 on 172.16.52.254 Mailing-List: contact cluster-cvs-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Subscribe: List-Post: List-Help: , Sender: cluster-cvs-owner@sourceware.org X-SW-Source: 2008-q3/txt/msg00331.txt.bz2 Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=5ea416d26ec2b6bf605c573a5173736d0f8cd27c Commit: 5ea416d26ec2b6bf605c573a5173736d0f8cd27c Parent: 34db56d4f5ea7428c02b072a742c40ca9c574f1c Author: David Teigland AuthorDate: Tue Aug 26 15:50:49 2008 -0500 Committer: David Teigland CommitterDate: Wed Aug 27 10:51:04 2008 -0500 fence_tool: new option to delay before join bz 460190 Certain network/switch settings cause nodes to form partitioned clusters when they start up. Add code to better cope with these initial partitions. The network partitions are a particular problem for two_node clusters where a node has quorum when it starts up on its own. This adds a new fence_tool option -m, e.g. fence_tool join -m . It causes fence_tool to delay the join by up to to allow all nodes in cluster.conf to become cluster members. This allows openais on the nodes to all see each other before starting the fence domain. So we join the domain *after* the nodes merge into a single cluster. If we joined the domain *before* the cluster partition merged, then nodes end up being fenced unnecessarily. (This is a similar idea to post_join_delay; a delay that gives us time to determine that a node in an unknown state is actually ok and doesn't require fencing.) Signed-off-by: David Teigland --- fence/fence_tool/fence_tool.c | 93 ++++++++++++++++++++++++++++++++++++++++- fence/man/fence_tool.8 | 7 ++- 2 files changed, 96 insertions(+), 4 deletions(-) diff --git a/fence/fence_tool/fence_tool.c b/fence/fence_tool/fence_tool.c index a6b002a..0b7ea62 100644 --- a/fence/fence_tool/fence_tool.c +++ b/fence/fence_tool/fence_tool.c @@ -37,10 +37,12 @@ #define FALSE 0 #endif -#define OPTION_STRING ("Vht:wQ") +#define OPTION_STRING ("Vht:m:wQ") #define FENCED_SOCK_PATH "fenced_socket" #define MAXLINE 256 +#define MAX_NODES 128 + #define OP_JOIN 1 #define OP_LEAVE 2 #define OP_WAIT 3 @@ -63,9 +65,15 @@ char *prog_name; int operation; int child_wait = FALSE; int quorum_wait = TRUE; +int member_wait = 0; int fenced_start_timeout = 300; /* five minutes */ int signalled = 0; cman_handle_t ch; +int all_nodeids[MAX_NODES]; +int all_nodeids_count; +cman_node_t cman_nodes[MAX_NODES]; +int cman_nodes_count; + static int do_write(int fd, void *buf, size_t count) { @@ -245,6 +253,77 @@ static int do_wait(int joining) return -1; } +static int all_nodeids_are_members(void) +{ + int i, j, rv, found; + + cman_nodes_count = 0; + memset(&cman_nodes, 0, sizeof(cman_nodes)); + + rv = cman_get_nodes(ch, MAX_NODES, &cman_nodes_count, cman_nodes); + if (rv < 0) { + printf("cman_get_nodes error %d %d\n", rv, errno); + return 0; + } + + for (i = 0; i < all_nodeids_count; i++) { + found = 0; + + for (j = 0; j < cman_nodes_count; j++) { + if (cman_nodes[j].cn_nodeid == all_nodeids[i] && + cman_nodes[j].cn_member) { + found = 1; + break; + } + } + + if (!found) + return 0; + } + return 1; +} + +static void wait_for_members(void) +{ + char path[256]; + char *nodeid_str; + int i = 0, cd, error; + + while ((cd = ccs_connect()) < 0) { + sleep(1); + if (++i > 9 && !(i % 10)) + printf("connect to ccs error %d %d\n", cd, errno); + } + + memset(all_nodeids, 0, sizeof(all_nodeids)); + all_nodeids_count = 0; + + for (i = 1; ; i++) { + nodeid_str = NULL; + memset(path, 0, 256); + sprintf(path, "/cluster/clusternodes/clusternode[%d]/@nodeid", i); + + error = ccs_get(cd, path, &nodeid_str); + if (error || !nodeid_str) + break; + + all_nodeids[all_nodeids_count++] = atoi(nodeid_str); + free(nodeid_str); + } + + ccs_disconnect(cd); + + for (i = 0; i < member_wait; i++) { + if (all_nodeids_are_members()) + break; + if (i && !(i % 5)) + printf("Waiting for all %d nodes to be members\n", + all_nodeids_count); + sleep(1); + } + +} + static int do_join(int argc, char *argv[]) { int i, fd, rv; @@ -264,6 +343,10 @@ static int do_join(int argc, char *argv[]) cman_finish(ch); return EXIT_FAILURE; } + + if (member_wait) + wait_for_members(); + cman_finish(ch); i = 0; @@ -361,10 +444,12 @@ static void print_usage(void) printf(" dump Dump debug buffer from fenced\n"); printf("\n"); printf("Options:\n"); + printf(" -m Delay join up to n seconds for all nodes in cluster.conf\n"); + printf(" to be cluster members\n"); printf(" -w Wait for join to complete\n"); printf(" -V Print program version information, then exit\n"); printf(" -h Print this help, then exit\n"); - printf(" -t Maximum time in seconds to wait\n"); + printf(" -t Maximum time in seconds to wait\n"); printf(" -Q Fail if cluster is not quorate, don't wait\n"); printf("\n"); } @@ -399,6 +484,10 @@ static void decode_arguments(int argc, char *argv[]) child_wait = TRUE; break; + case 'm': + member_wait = atoi(optarg); + break; + case ':': case '?': fprintf(stderr, "Please use '-h' for usage.\n"); diff --git a/fence/man/fence_tool.8 b/fence/man/fence_tool.8 index a7ad0c4..7477f6f 100644 --- a/fence/man/fence_tool.8 +++ b/fence/man/fence_tool.8 @@ -27,6 +27,9 @@ it to stdout. .SH OPTIONS .TP +\fB-m\fP +Delay join up to n seconds for all nodes in cluster.conf to be cluster members. +.TP \fB-w\fP Wait until the join or leave is completed. .TP @@ -36,8 +39,8 @@ Help. Print out the usage syntax. \fB-V\fP Print version information. .TP -\fB-t\fP -Maximum time in seconds to wait (default: 300 seconds) +\fB-t\fP +Maximum time in seconds to wait for quorum or -w (default: 300 seconds) .TP \fB-Q\fP Fail command immediately if the cluster is not quorate, don't wait.