From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 15955 invoked by alias); 27 Aug 2008 16:40:35 -0000 Received: (qmail 15949 invoked by alias); 27 Aug 2008 16:40:35 -0000 X-Spam-Status: No, hits=-1.1 required=5.0 tests=AWL,BAYES_00,J_CHICKENPOX_64,J_CHICKENPOX_74,KAM_MX,SPF_HELO_PASS X-Spam-Check-By: sourceware.org X-Spam-Checker-Version: SpamAssassin 3.2.4 (2008-01-01) on bastion.fedora.phx.redhat.com X-Spam-Level: Subject: STABLE2 - fence_tool: new option to delay before join To: cluster-cvs-relay@redhat.com X-Project: Cluster Project X-Git-Module: cluster.git X-Git-Refname: refs/heads/STABLE2 X-Git-Reftype: branch X-Git-Oldrev: 672db2d7e03d61f4f64a51fdbde4e887054e7839 X-Git-Newrev: 41a69f04aeaf9aa3f38c899bf55495f04c19831c From: David Teigland Message-Id: <20080827163933.A116712036B@lists.fedorahosted.org> Date: Wed, 27 Aug 2008 18:52:00 -0000 X-Scanned-By: MIMEDefang 2.58 on 172.16.52.254 Mailing-List: contact cluster-cvs-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Subscribe: List-Post: List-Help: , Sender: cluster-cvs-owner@sourceware.org X-SW-Source: 2008-q3/txt/msg00334.txt.bz2 Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=41a69f04aeaf9aa3f38c899bf55495f04c19831c Commit: 41a69f04aeaf9aa3f38c899bf55495f04c19831c Parent: 672db2d7e03d61f4f64a51fdbde4e887054e7839 Author: David Teigland AuthorDate: Tue Aug 26 15:50:49 2008 -0500 Committer: David Teigland CommitterDate: Wed Aug 27 10:55:36 2008 -0500 fence_tool: new option to delay before join bz 460190 Certain network/switch settings cause nodes to form partitioned clusters when they start up. Add code to better cope with these initial partitions. The network partitions are a particular problem for two_node clusters where a node has quorum when it starts up on its own. This adds a new fence_tool option -m, e.g. fence_tool join -m . It causes fence_tool to delay the join by up to to allow all nodes in cluster.conf to become cluster members. This allows openais on the nodes to all see each other before starting the fence domain. So we join the domain *after* the nodes merge into a single cluster. If we joined the domain *before* the cluster partition merged, then nodes end up being fenced unnecessarily. (This is a similar idea to post_join_delay; a delay that gives us time to determine that a node in an unknown state is actually ok and doesn't require fencing.) Signed-off-by: David Teigland --- fence/fence_tool/fence_tool.c | 93 ++++++++++++++++++++++++++++++++++++++++- fence/man/fence_tool.8 | 7 ++- 2 files changed, 96 insertions(+), 4 deletions(-) diff --git a/fence/fence_tool/fence_tool.c b/fence/fence_tool/fence_tool.c index 7a4fe27..60d47eb 100644 --- a/fence/fence_tool/fence_tool.c +++ b/fence/fence_tool/fence_tool.c @@ -25,10 +25,12 @@ #define FALSE 0 #endif -#define OPTION_STRING ("Vht:wQ") +#define OPTION_STRING ("Vht:m:wQ") #define FENCED_SOCK_PATH "fenced_socket" #define MAXLINE 256 +#define MAX_NODES 128 + #define OP_JOIN 1 #define OP_LEAVE 2 #define OP_WAIT 3 @@ -51,9 +53,15 @@ char *prog_name; int operation; int child_wait = FALSE; int quorum_wait = TRUE; +int member_wait = 0; int fenced_start_timeout = 300; /* five minutes */ int signalled = 0; cman_handle_t ch; +int all_nodeids[MAX_NODES]; +int all_nodeids_count; +cman_node_t cman_nodes[MAX_NODES]; +int cman_nodes_count; + static int do_write(int fd, void *buf, size_t count) { @@ -233,6 +241,77 @@ static int do_wait(int joining) return -1; } +static int all_nodeids_are_members(void) +{ + int i, j, rv, found; + + cman_nodes_count = 0; + memset(&cman_nodes, 0, sizeof(cman_nodes)); + + rv = cman_get_nodes(ch, MAX_NODES, &cman_nodes_count, cman_nodes); + if (rv < 0) { + printf("cman_get_nodes error %d %d\n", rv, errno); + return 0; + } + + for (i = 0; i < all_nodeids_count; i++) { + found = 0; + + for (j = 0; j < cman_nodes_count; j++) { + if (cman_nodes[j].cn_nodeid == all_nodeids[i] && + cman_nodes[j].cn_member) { + found = 1; + break; + } + } + + if (!found) + return 0; + } + return 1; +} + +static void wait_for_members(void) +{ + char path[256]; + char *nodeid_str; + int i = 0, cd, error; + + while ((cd = ccs_connect()) < 0) { + sleep(1); + if (++i > 9 && !(i % 10)) + printf("connect to ccs error %d %d\n", cd, errno); + } + + memset(all_nodeids, 0, sizeof(all_nodeids)); + all_nodeids_count = 0; + + for (i = 1; ; i++) { + nodeid_str = NULL; + memset(path, 0, 256); + sprintf(path, "/cluster/clusternodes/clusternode[%d]/@nodeid", i); + + error = ccs_get(cd, path, &nodeid_str); + if (error || !nodeid_str) + break; + + all_nodeids[all_nodeids_count++] = atoi(nodeid_str); + free(nodeid_str); + } + + ccs_disconnect(cd); + + for (i = 0; i < member_wait; i++) { + if (all_nodeids_are_members()) + break; + if (i && !(i % 5)) + printf("Waiting for all %d nodes to be members\n", + all_nodeids_count); + sleep(1); + } + +} + static int do_join(int argc, char *argv[]) { int i, fd, rv; @@ -252,6 +331,10 @@ static int do_join(int argc, char *argv[]) cman_finish(ch); return EXIT_FAILURE; } + + if (member_wait) + wait_for_members(); + cman_finish(ch); i = 0; @@ -349,10 +432,12 @@ static void print_usage(void) printf(" dump Dump debug buffer from fenced\n"); printf("\n"); printf("Options:\n"); + printf(" -m Delay join up to n seconds for all nodes in cluster.conf\n"); + printf(" to be cluster members\n"); printf(" -w Wait for join to complete\n"); printf(" -V Print program version information, then exit\n"); printf(" -h Print this help, then exit\n"); - printf(" -t Maximum time in seconds to wait\n"); + printf(" -t Maximum time in seconds to wait\n"); printf(" -Q Fail if cluster is not quorate, don't wait\n"); printf("\n"); } @@ -387,6 +472,10 @@ static void decode_arguments(int argc, char *argv[]) child_wait = TRUE; break; + case 'm': + member_wait = atoi(optarg); + break; + case ':': case '?': fprintf(stderr, "Please use '-h' for usage.\n"); diff --git a/fence/man/fence_tool.8 b/fence/man/fence_tool.8 index 73867cb..2a35240 100644 --- a/fence/man/fence_tool.8 +++ b/fence/man/fence_tool.8 @@ -20,6 +20,9 @@ it to stdout. .SH OPTIONS .TP +\fB-m\fP +Delay join up to n seconds for all nodes in cluster.conf to be cluster members. +.TP \fB-w\fP Wait until the join or leave is completed. .TP @@ -29,8 +32,8 @@ Help. Print out the usage syntax. \fB-V\fP Print version information. .TP -\fB-t\fP -Maximum time in seconds to wait (default: 300 seconds) +\fB-t\fP +Maximum time in seconds to wait for quorum or -w (default: 300 seconds) .TP \fB-Q\fP Fail command immediately if the cluster is not quorate, don't wait.