From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 20388 invoked by alias); 12 Aug 2009 13:20:33 -0000 Received: (qmail 20368 invoked by alias); 12 Aug 2009 13:20:32 -0000 X-SWARE-Spam-Status: No, hits=-1.9 required=5.0 tests=AWL,BAYES_00,SPF_HELO_PASS X-Spam-Status: No, hits=-1.9 required=5.0 tests=AWL,BAYES_00,SPF_HELO_PASS X-Spam-Check-By: sourceware.org X-Spam-Checker-Version: SpamAssassin 3.2.5 (2008-06-10) on bastion2.fedora.phx.redhat.com Subject: cluster: STABLE3 - cman: Make disallowed state optional. To: cluster-cvs-relay@redhat.com X-Project: Cluster Project X-Git-Module: cluster.git X-Git-Refname: refs/heads/STABLE3 X-Git-Reftype: branch X-Git-Oldrev: 922b9930e20a8894e29326ffb8f4d314207a3128 X-Git-Newrev: b8a5fd9d99b933f95d80bbe9577371fa5e94bf82 From: Christine Caulfield Message-Id: <20090812132003.D66BC1201E6@lists.fedorahosted.org> Date: Wed, 12 Aug 2009 13:20:00 -0000 X-Scanned-By: MIMEDefang 2.58 on 172.16.52.254 Mailing-List: contact cluster-cvs-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Subscribe: List-Post: List-Help: , Sender: cluster-cvs-owner@sourceware.org X-SW-Source: 2009-q3/txt/msg00193.txt.bz2 Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=b8a5fd9d99b933f95d80bbe9577371fa5e94bf82 Commit: b8a5fd9d99b933f95d80bbe9577371fa5e94bf82 Parent: 922b9930e20a8894e29326ffb8f4d314207a3128 Author: Christine Caulfield AuthorDate: Wed Aug 12 14:18:14 2009 +0100 Committer: Christine Caulfield CommitterDate: Wed Aug 12 14:18:14 2009 +0100 cman: Make disallowed state optional. I've left this enabled by default at the moment. I might change that when the compatibility code arrives, and also when I have written, notarised statements stating that the daemons no longer need it. Signed-off-by: Christine Caulfield --- cman/cman_tool/main.c | 2 + cman/daemon/ais.c | 2 + cman/daemon/cman.h | 1 + cman/daemon/cnxman-socket.h | 1 + cman/daemon/commands.c | 79 +++++++++++++++++++++++++------------------ cman/lib/libcman.h | 1 + 6 files changed, 53 insertions(+), 33 deletions(-) diff --git a/cman/cman_tool/main.c b/cman/cman_tool/main.c index cbee41e..2d28bc2 100644 --- a/cman/cman_tool/main.c +++ b/cman/cman_tool/main.c @@ -237,6 +237,8 @@ static void show_status(void) printf(" Error"); if (einfo->ei_flags & CMAN_EXTRA_FLAG_DISALLOWED) printf(" DisallowedNodes"); + if (einfo->ei_flags & CMAN_EXTRA_FLAG_DISALLOWED_ENABLED) + printf(" DisallowedEnabled"); if (einfo->ei_flags & CMAN_EXTRA_FLAG_DIRTY) printf(" HaveState"); printf(" \n"); diff --git a/cman/daemon/ais.c b/cman/daemon/ais.c index dd6037e..45df5d3 100644 --- a/cman/daemon/ais.c +++ b/cman/daemon/ais.c @@ -42,6 +42,7 @@ extern char cluster_name[MAX_CLUSTER_NAME_LEN+1]; extern unsigned int quorumdev_poll; extern unsigned int ccsd_poll_interval; +extern unsigned int enable_disallowed; extern unsigned int shutdown_timeout; extern int init_config(struct corosync_api_v1 *api); @@ -184,6 +185,7 @@ static int cman_exec_init_fn(struct corosync_api_v1 *api) objdb_get_int(api, object_handle, "quorum_dev_poll", &quorumdev_poll, DEFAULT_QUORUMDEV_POLL); objdb_get_int(api, object_handle, "shutdown_timeout", &shutdown_timeout, DEFAULT_SHUTDOWN_TIMEOUT); objdb_get_int(api, object_handle, "ccsd_poll", &ccsd_poll_interval, DEFAULT_CCSD_POLL); + objdb_get_int(api, object_handle, "disallowed", &enable_disallowed, DEFAULT_DISALLOWED); } corosync->object_find_destroy(find_handle); diff --git a/cman/daemon/cman.h b/cman/daemon/cman.h index 2ab40e1..f6e87cc 100644 --- a/cman/daemon/cman.h +++ b/cman/daemon/cman.h @@ -14,3 +14,4 @@ extern int our_nodeid(void); #define DEFAULT_QUORUMDEV_POLL 10000 #define DEFAULT_SHUTDOWN_TIMEOUT 5000 #define DEFAULT_CCSD_POLL 1000 +#define DEFAULT_DISALLOWED 1 diff --git a/cman/daemon/cnxman-socket.h b/cman/daemon/cnxman-socket.h index 49e3bc4..e8b7378 100644 --- a/cman/daemon/cnxman-socket.h +++ b/cman/daemon/cnxman-socket.h @@ -163,6 +163,7 @@ struct sock_confchg_message { #define CMAN_EXTRA_FLAG_SHUTDOWN 4 #define CMAN_EXTRA_FLAG_UNCOUNTED 8 #define CMAN_EXTRA_FLAG_DIRTY 16 +#define CMAN_EXTRA_FLAG_DISALLOWED_ENABLED 32 struct cl_extra_info { int node_state; diff --git a/cman/daemon/commands.c b/cman/daemon/commands.c index f31f47d..c2169f0 100644 --- a/cman/daemon/commands.c +++ b/cman/daemon/commands.c @@ -60,6 +60,7 @@ extern int two_node; unsigned int quorumdev_poll=DEFAULT_QUORUMDEV_POLL; unsigned int shutdown_timeout=DEFAULT_SHUTDOWN_TIMEOUT; unsigned int ccsd_poll_interval=DEFAULT_CCSD_POLL; + unsigned int enable_disallowed=DEFAULT_DISALLOWED; static int cluster_is_quorate; char cluster_name[MAX_CLUSTER_NAME_LEN+1]; static char nodename[MAX_CLUSTER_MEMBER_NAME_LEN+1]; @@ -134,6 +135,9 @@ static int have_disallowed(void) { struct cluster_node *node; + if (!enable_disallowed) + return 0; + list_iterate_items(node, &cluster_members_list) { if (node->state == NODESTATE_AISONLY) return 1; @@ -566,6 +570,8 @@ static int do_cmd_get_extrainfo(char *cmdbuf, char **retbuf, int retsize, int *r einfo->flags |= CMAN_EXTRA_FLAG_UNCOUNTED; if (us->flags & NODE_FLAGS_DIRTY) einfo->flags |= CMAN_EXTRA_FLAG_DIRTY; + if (enable_disallowed) + einfo->flags |= CMAN_EXTRA_FLAG_DISALLOWED_ENABLED; ptr = einfo->addresses; @@ -1889,7 +1895,8 @@ static void do_process_transition(int nodeid, char *data) /* If the remote node can see AISONLY nodes and we want to join, then we can't, as we don't know the full state */ - if (local_first_trans && msg->flags & NODE_FLAGS_SEESDISALLOWED && !have_disallowed()) { + if (enable_disallowed && + local_first_trans && msg->flags & NODE_FLAGS_SEESDISALLOWED && !have_disallowed()) { /* Must use syslog directly here or the message will never arrive */ syslog(LOG_CRIT, "CMAN: Joined a cluster with disallowed nodes. must die"); cman_finish(); @@ -1911,50 +1918,56 @@ static void do_process_transition(int nodeid, char *data) /* Newer nodes 6.1.0 onwards, set the DIRTY flag if they have state. If the new node has been down and has state then we mark it disallowed because we cannot merge stateful nodes */ - if ( (msg->flags & NODE_FLAGS_DIRTY && (node->flags & NODE_FLAGS_BEENDOWN)) || - (msg->flags & NODE_FLAGS_DIRTY && msg->first_trans && !node->us && (us->flags & NODE_FLAGS_DIRTY))) { - /* Don't duplicate messages */ - if (node->state != NODESTATE_AISONLY) { - if (cluster_is_quorate) { - log_printf(LOG_CRIT, "Killing node %s because it has rejoined the cluster with existing state", node->name); - node->state = NODESTATE_AISONLY; - send_kill(nodeid, CLUSTER_KILL_REJOIN); - } - else { - log_printf(LOG_CRIT, "Node %s not joined to cman because it has existing state", node->name); - node->state = NODESTATE_AISONLY; + if (enable_disallowed) { + if ( (msg->flags & NODE_FLAGS_DIRTY && (node->flags & NODE_FLAGS_BEENDOWN)) || + (msg->flags & NODE_FLAGS_DIRTY && msg->first_trans && !node->us && (us->flags & NODE_FLAGS_DIRTY))) { + /* Don't duplicate messages */ + if (node->state != NODESTATE_AISONLY) { + if (cluster_is_quorate) { + log_printf(LOG_CRIT, "Killing node %s because it has rejoined the cluster with existing state", node->name); + node->state = NODESTATE_AISONLY; + send_kill(nodeid, CLUSTER_KILL_REJOIN); + } + else { + log_printf(LOG_CRIT, "Node %s not joined to cman because it has existing state", node->name); + node->state = NODESTATE_AISONLY; + } } + return; } - return; - } - /* This is for older nodes. If the join_time of the node matches that already stored AND - the node has been down, then we kill it as this must be a rejoin */ - if (msg->minor_version == 0 && - msg->join_time == node->cman_join_time && node->flags & NODE_FLAGS_BEENDOWN) { - /* Don't duplicate messages */ - if (node->state != NODESTATE_AISONLY) { - if (cluster_is_quorate) { - log_printf(LOG_CRIT, "Killing node %s because it has rejoined the cluster without cman_tool join", node->name); - node->state = NODESTATE_AISONLY; - send_kill(nodeid, CLUSTER_KILL_REJOIN); - } - else { - log_printf(LOG_CRIT, "Node %s not joined to cman because it has rejoined an inquorate cluster", node->name); - node->state = NODESTATE_AISONLY; + /* This is for older nodes. If the join_time of the node matches that already stored AND + the node has been down, then we kill it as this must be a rejoin */ + if (msg->minor_version == 0 && + msg->join_time == node->cman_join_time && node->flags & NODE_FLAGS_BEENDOWN) { + /* Don't duplicate messages */ + if (node->state != NODESTATE_AISONLY) { + if (cluster_is_quorate) { + log_printf(LOG_CRIT, "Killing node %s because it has rejoined the cluster without cman_tool join", node->name); + node->state = NODESTATE_AISONLY; + send_kill(nodeid, CLUSTER_KILL_REJOIN); + } + else { + log_printf(LOG_CRIT, "Node %s not joined to cman because it has rejoined an inquorate cluster", node->name); + node->state = NODESTATE_AISONLY; + } } + return; + } + else { + node->cman_join_time = msg->join_time; + add_ais_node(nodeid, incarnation, num_ais_nodes); } - return; } else { - node->cman_join_time = msg->join_time; - add_ais_node(nodeid, incarnation, num_ais_nodes); + add_ais_node(nodeid, incarnation, num_ais_nodes); } /* If the new node is joining and the existing cluster already has some AISONLY nodes then we can't make sense of the membership. So the new node has to also be AISONLY until we are consistent again */ - if (msg->first_trans && !node->us && have_disallowed()) + if (enable_disallowed && + msg->first_trans && !node->us && have_disallowed()) node->state = NODESTATE_AISONLY; node->flags = msg->flags; /* This will clear the BEENDOWN flag of course */ diff --git a/cman/lib/libcman.h b/cman/lib/libcman.h index 49b374f..feb10a2 100644 --- a/cman/lib/libcman.h +++ b/cman/lib/libcman.h @@ -174,6 +174,7 @@ typedef struct cman_cluster #define CMAN_EXTRA_FLAG_SHUTDOWN 4 #define CMAN_EXTRA_FLAG_DISALLOWED 8 #define CMAN_EXTRA_FLAG_DIRTY 16 +#define CMAN_EXTRA_FLAG_DISALLOWED_ENABLED 32 typedef struct cman_extra_info { int ei_node_state;