From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 8723 invoked by alias); 8 Oct 2008 20:06:40 -0000 Received: (qmail 8717 invoked by alias); 8 Oct 2008 20:06:40 -0000 X-Spam-Status: No, hits=1.3 required=5.0 tests=AWL,BAYES_50,J_CHICKENPOX_46,J_CHICKENPOX_62,J_CHICKENPOX_64,J_CHICKENPOX_65,J_CHICKENPOX_66,KAM_MX,SPF_HELO_PASS X-Spam-Check-By: sourceware.org X-Spam-Checker-Version: SpamAssassin 3.2.4 (2008-01-01) on bastion.fedora.phx.redhat.com X-Spam-Level: Subject: master - daemons/tools: misc minor cleanups and improvements To: cluster-cvs-relay@redhat.com X-Project: Cluster Project X-Git-Module: cluster.git X-Git-Refname: refs/heads/master X-Git-Reftype: branch X-Git-Oldrev: aa24deb8511d8fca8e027a954747648a04ad7988 X-Git-Newrev: b6d73f5752306d635f306036438af1fcf26f4332 From: David Teigland Message-Id: <20081008200525.83DACC07B8@lists.fedorahosted.org> Date: Wed, 08 Oct 2008 20:06:00 -0000 X-Scanned-By: MIMEDefang 2.58 on 172.16.52.254 Mailing-List: contact cluster-cvs-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Subscribe: List-Post: List-Help: , Sender: cluster-cvs-owner@sourceware.org X-SW-Source: 2008-q4/txt/msg00017.txt.bz2 Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=b6d73f5752306d635f306036438af1fcf26f4332 Commit: b6d73f5752306d635f306036438af1fcf26f4332 Parent: aa24deb8511d8fca8e027a954747648a04ad7988 Author: David Teigland AuthorDate: Wed Oct 8 14:46:46 2008 -0500 Committer: David Teigland CommitterDate: Wed Oct 8 14:46:46 2008 -0500 daemons/tools: misc minor cleanups and improvements fenced/fence_tool: fix and improve output of ls daemons: don't attempt cpg exit cleanup after cluster goes down daemons: fix lazy memset size args to avoid mistakes dlm_controld: clean up daemon cpg on exit Signed-off-by: David Teigland --- fence/fence_tool/fence_tool.c | 35 ++++++++++++++++++++++++++++--- fence/fenced/cpg.c | 43 ++++++++++++++++++++++++-------------- fence/fenced/fd.h | 2 + fence/fenced/main.c | 2 + fence/fenced/member_cman.c | 2 +- group/daemon/main.c | 4 ++- group/dlm_controld/cpg.c | 25 ++++++++++++++++++++++ group/dlm_controld/dlm_daemon.h | 2 + group/dlm_controld/group.c | 7 +++-- group/dlm_controld/main.c | 6 ++++- group/gfs_controld/cpg-new.c | 2 +- group/gfs_controld/cpg-old.c | 4 +- group/gfs_controld/gfs_daemon.h | 1 + group/gfs_controld/main.c | 2 + 14 files changed, 108 insertions(+), 29 deletions(-) diff --git a/fence/fence_tool/fence_tool.c b/fence/fence_tool/fence_tool.c index b4ce113..e12c398 100644 --- a/fence/fence_tool/fence_tool.c +++ b/fence/fence_tool/fence_tool.c @@ -371,8 +371,10 @@ static int node_compare(const void *va, const void *vb) return a->nodeid - b->nodeid; } -#define CGST_WAIT_CONDITIONS 1 -#define CGST_WAIT_MESSAGES 2 +/* copied from fence/fenced/fd.h, should probably be in libfenced.h */ +#define CGST_WAIT_CONDITIONS 1 +#define CGST_WAIT_MESSAGES 2 +#define CGST_WAIT_FENCING 3 static char *wait_str(int state) { @@ -383,6 +385,31 @@ static char *wait_str(int state) return "quorum"; case CGST_WAIT_MESSAGES: return "messages"; + case CGST_WAIT_FENCING: + return "fencing"; + } + return "unknown"; +} + +/* copied from fence/fenced/fd.h, should probably be in libfenced.h */ +#define VIC_DONE_AGENT 1 +#define VIC_DONE_MEMBER 2 +#define VIC_DONE_OVERRIDE 3 +#define VIC_DONE_EXTERNAL 4 + +static char *how_str(int how) +{ + switch (how) { + case 0: + return "none"; + case VIC_DONE_AGENT: + return "agent"; + case VIC_DONE_MEMBER: + return "member"; + case VIC_DONE_OVERRIDE: + return "override"; + case VIC_DONE_EXTERNAL: + return "external"; } return "unknown"; } @@ -444,12 +471,12 @@ static int do_list(void) np = nodes; for (i = 0; i < node_count; i++) { - printf("nodeid %d member %d victim %d last fence master %d how %d\n", + printf("nodeid %d member %d victim %d last fence master %d how %s\n", np->nodeid, np->member, np->victim, np->last_fenced_master, - np->last_fenced_how); + how_str(np->last_fenced_how)); np++; } printf("\n"); diff --git a/fence/fenced/cpg.c b/fence/fenced/cpg.c index 936f787..6d51a78 100644 --- a/fence/fenced/cpg.c +++ b/fence/fenced/cpg.c @@ -344,9 +344,15 @@ static void save_history(struct fd *fd, struct fd_info *fi, struct id_info *ids) id = ids; for (i = 0; i < fi->id_info_count; i++) { + /* create history entries for nodes that were domain members + prior to our joining the domain */ + node_history_init(fd, id->nodeid); + node = get_node_history(fd, id->nodeid); - if (!node) + if (!node) { + log_error("save_history no nodeid %d", id->nodeid); goto next; + } if (!node->fence_time && id->fence_time) { node->fence_master = id->fence_master; @@ -1039,6 +1045,8 @@ static void apply_changes(struct fd *fd) case CGST_WAIT_MESSAGES: if (wait_messages_done(fd)) { set_master(fd); + cg->state = CGST_WAIT_FENCING; /* for queries */ + if (fd->master == our_nodeid) { delay_fencing(fd, nodes_added(fd)); fence_victims(fd); @@ -1849,7 +1857,7 @@ void close_cpg(void) struct cpg_name name; int i = 0; - if (!cpg_handle_daemon) + if (!cpg_handle_daemon || cluster_down) return; memset(&name, 0, sizeof(name)); @@ -1872,25 +1880,28 @@ int set_node_info(struct fd *fd, int nodeid, struct fenced_node *nodeinfo) { struct node_history *node; struct member *memb; + struct change *cg; nodeinfo->nodeid = nodeid; nodeinfo->victim = is_victim(fd, nodeid); - if (!fd->started_change) - goto history; + if (list_empty(&fd->changes)) + cg = fd->started_change; + else + cg = list_first_entry(&fd->changes, struct change, list); - memb = find_memb(fd->started_change, nodeid); - if (memb) - nodeinfo->member = memb->disallowed ? 0 : 1; + if (cg) { + memb = find_memb(cg, nodeid); + if (memb) + nodeinfo->member = memb->disallowed ? -1 : 1; + } - history: node = get_node_history(fd, nodeid); - if (!node) - return 0; - - nodeinfo->last_fenced_master = node->fence_master; - nodeinfo->last_fenced_how = node->fence_how; - nodeinfo->last_fenced_time = node->fence_time; + if (node) { + nodeinfo->last_fenced_master = node->fence_master; + nodeinfo->last_fenced_how = node->fence_how; + nodeinfo->last_fenced_time = node->fence_time; + } return 0; } @@ -1932,7 +1943,7 @@ int set_domain_nodes(struct fd *fd, int option, int *node_count, nodes = malloc(count * sizeof(struct fenced_node)); if (!nodes) return -ENOMEM; - memset(nodes, 0, sizeof(*nodes)); + memset(nodes, 0, count * sizeof(struct fenced_node)); n = nodes; list_for_each_entry(memb, &cg->members, list) @@ -1946,7 +1957,7 @@ int set_domain_nodes(struct fd *fd, int option, int *node_count, nodes = malloc(count * sizeof(struct fenced_node)); if (!nodes) return -ENOMEM; - memset(nodes, 0, sizeof(*nodes)); + memset(nodes, 0, count * sizeof(struct fenced_node)); n = nodes; list_for_each_entry(nh, &fd->node_history, list) diff --git a/fence/fenced/fd.h b/fence/fenced/fd.h index 0ebd721..1520838 100644 --- a/fence/fenced/fd.h +++ b/fence/fenced/fd.h @@ -61,6 +61,7 @@ extern int daemon_debug_opt; extern int daemon_quit; +extern int cluster_down; extern struct list_head domains; extern int cman_quorate; extern int our_nodeid; @@ -118,6 +119,7 @@ struct fd_header { #define CGST_WAIT_CONDITIONS 1 #define CGST_WAIT_MESSAGES 2 +#define CGST_WAIT_FENCING 3 /* for queries */ struct change { struct list_head list; diff --git a/fence/fenced/main.c b/fence/fenced/main.c index 909720b..0c1d960 100644 --- a/fence/fenced/main.c +++ b/fence/fenced/main.c @@ -722,6 +722,7 @@ void cluster_dead(int ci) { log_error("cluster is down, exiting"); daemon_quit = 1; + cluster_down = 1; } static void loop(void) @@ -1051,6 +1052,7 @@ void daemon_dump_save(void) int daemon_debug_opt; int daemon_quit; +int cluster_down; struct list_head domains; int cman_quorate; int our_nodeid; diff --git a/fence/fenced/member_cman.c b/fence/fenced/member_cman.c index a35e746..1e199f2 100644 --- a/fence/fenced/member_cman.c +++ b/fence/fenced/member_cman.c @@ -255,7 +255,7 @@ struct node *get_new_node(struct fd *fd, int nodeid) node = malloc(sizeof(*node)); if (!node) return NULL; - memset(node, 0, sizeof(*node)); + memset(node, 0, sizeof(struct node)); node->nodeid = nodeid; diff --git a/group/daemon/main.c b/group/daemon/main.c index 9f4c88f..c598307 100644 --- a/group/daemon/main.c +++ b/group/daemon/main.c @@ -827,7 +827,9 @@ static void loop(void) close_ccs(); close_cman(); - if (!list_empty(&gd_groups)) + /* in LIBCPG mode, gd_groups is not empty because of the groups we + add to "block" old versions of groupd */ + if ((group_mode == GROUP_LIBGROUP) && !list_empty(&gd_groups)) log_print("groups abandoned"); } diff --git a/group/dlm_controld/cpg.c b/group/dlm_controld/cpg.c index 3f8457e..1d3a369 100644 --- a/group/dlm_controld/cpg.c +++ b/group/dlm_controld/cpg.c @@ -2005,6 +2005,31 @@ int setup_cpg(void) return -1; } +void close_cpg(void) +{ + cpg_error_t error; + struct cpg_name name; + int i = 0; + + if (!daemon_cpg_handle || cluster_down) + return; + + memset(&name, 0, sizeof(name)); + sprintf(name.value, "dlm:controld"); + name.length = strlen(name.value) + 1; + + retry: + error = cpg_leave(daemon_cpg_handle, &name); + if (error == CPG_ERR_TRY_AGAIN) { + sleep(1); + if (!(++i % 10)) + log_error("daemon cpg_leave error retrying"); + goto retry; + } + if (error != CPG_OK) + log_error("daemon cpg_leave error %d", error); +} + /* fs_controld has seen nodedown for nodeid; it's now ok for dlm to do recovery for the failed node */ diff --git a/group/dlm_controld/dlm_daemon.h b/group/dlm_controld/dlm_daemon.h index c704b99..a84f541 100644 --- a/group/dlm_controld/dlm_daemon.h +++ b/group/dlm_controld/dlm_daemon.h @@ -67,6 +67,7 @@ extern int daemon_debug_opt; extern int daemon_quit; +extern int cluster_down; extern int poll_fencing; extern int poll_quorum; extern int poll_fs; @@ -235,6 +236,7 @@ int get_weight(int nodeid, char *lockspace); /* cpg.c */ int setup_cpg(void); +void close_cpg(void); void process_cpg(int ci); int set_protocol(void); void process_lockspace_changes(void); diff --git a/group/dlm_controld/group.c b/group/dlm_controld/group.c index ac6c354..c992683 100644 --- a/group/dlm_controld/group.c +++ b/group/dlm_controld/group.c @@ -300,15 +300,16 @@ int set_lockspace_nodes_group(struct lockspace *ls, int option, int *node_count, struct dlmc_node **nodes_out) { struct dlmc_node *nodes = NULL, *nodep; - int i; + int i, len; if (!ls->cb_member_count) goto out; - nodes = malloc(ls->cb_member_count * sizeof(struct dlmc_node)); + len = ls->cb_member_count * sizeof(struct dlmc_node); + nodes = malloc(len); if (!nodes) return -ENOMEM; - memset(nodes, 0, sizeof(*nodes)); + memset(nodes, 0, len); nodep = nodes; for (i = 0; i < ls->cb_member_count; i++) { diff --git a/group/dlm_controld/main.c b/group/dlm_controld/main.c index b0eba12..f3ab1f1 100644 --- a/group/dlm_controld/main.c +++ b/group/dlm_controld/main.c @@ -157,7 +157,7 @@ static struct lockspace *create_ls(char *name) ls = malloc(sizeof(*ls)); if (!ls) goto out; - memset(ls, 0, sizeof(*ls)); + memset(ls, 0, sizeof(struct lockspace)); strncpy(ls->name, name, DLM_LOCKSPACE_LEN); INIT_LIST_HEAD(&ls->changes); @@ -860,6 +860,7 @@ void cluster_dead(int ci) { log_error("cluster is down, exiting"); daemon_quit = 1; + cluster_down = 1; } static void loop(void) @@ -999,6 +1000,8 @@ static void loop(void) out: if (cfgd_groupd_compat) close_groupd(); + if (group_mode == GROUP_LIBCPG) + close_cpg(); clear_configfs(); close_logging(); close_ccs(); @@ -1285,6 +1288,7 @@ void daemon_dump_save(void) int daemon_debug_opt; int daemon_quit; +int cluster_down; int poll_fencing; int poll_quorum; int poll_fs; diff --git a/group/gfs_controld/cpg-new.c b/group/gfs_controld/cpg-new.c index 1fc06f6..839ff4a 100644 --- a/group/gfs_controld/cpg-new.c +++ b/group/gfs_controld/cpg-new.c @@ -3242,7 +3242,7 @@ void close_cpg(void) struct cpg_name name; int i = 0; - if (!cpg_handle_daemon) + if (!cpg_handle_daemon || cluster_down) return; memset(&name, 0, sizeof(name)); diff --git a/group/gfs_controld/cpg-old.c b/group/gfs_controld/cpg-old.c index 5949054..192a403 100644 --- a/group/gfs_controld/cpg-old.c +++ b/group/gfs_controld/cpg-old.c @@ -1182,7 +1182,7 @@ static int add_member(struct mountgroup *mg, int nodeid) if (!memb) return -ENOMEM; - memset(memb, 0, sizeof(*memb)); + memset(memb, 0, sizeof(struct mg_member)); memb->nodeid = nodeid; memb->jid = JID_INIT; @@ -2414,7 +2414,7 @@ void close_cpg_old(void) cpg_error_t error; int i = 0; - if (!cpg_handle_daemon) + if (!cpg_handle_daemon || cluster_down) return; memset(&name, 0, sizeof(name)); diff --git a/group/gfs_controld/gfs_daemon.h b/group/gfs_controld/gfs_daemon.h index c3d4688..7ae26ff 100644 --- a/group/gfs_controld/gfs_daemon.h +++ b/group/gfs_controld/gfs_daemon.h @@ -64,6 +64,7 @@ extern int daemon_debug_opt; extern int daemon_quit; +extern int cluster_down; extern int poll_dlm; extern int poll_ignore_plock; extern int plock_fd; diff --git a/group/gfs_controld/main.c b/group/gfs_controld/main.c index 94d370d..b465239 100644 --- a/group/gfs_controld/main.c +++ b/group/gfs_controld/main.c @@ -1067,6 +1067,7 @@ void cluster_dead(int ci) { log_error("cluster is down, exiting"); daemon_quit = 1; + cluster_down = 1; } static void dlmcontrol_dead(int ci) @@ -1497,6 +1498,7 @@ void daemon_dump_save(void) int daemon_debug_opt; int daemon_quit; +int cluster_down; int poll_ignore_plock; int poll_dlm; int plock_fd;