public inbox for cluster-cvs@sourceware.org
help / color / mirror / Atom feed
* master - fenced: joining daemon cpg to bypass fencing
@ 2008-09-04 21:27 David Teigland
0 siblings, 0 replies; only message in thread
From: David Teigland @ 2008-09-04 21:27 UTC (permalink / raw)
To: cluster-cvs-relay
Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=5043d28a1a37bc51122246f9631dc8c4441011a8
Commit: 5043d28a1a37bc51122246f9631dc8c4441011a8
Parent: f00e7639798652daf6cce0dc7f9d8d3be0dae194
Author: David Teigland <teigland@redhat.com>
AuthorDate: Wed Sep 3 12:56:24 2008 -0500
Committer: David Teigland <teigland@redhat.com>
CommitterDate: Wed Sep 3 14:07:53 2008 -0500
fenced: joining daemon cpg to bypass fencing
When the fenced daemon starts, it checks for uncontrolled instances
of gfs/dlm, and if none are found, it joins a special "daemon" cpg (not
the fence domain cpg). This join simply tells fenced on other nodes
that the new node is in a cleanly reset state and they can skip fencing
it if it's currently a victim.
Currently, fencing is skipped if the victim just joins the cluster, but
this is not sufficient since a node can join the cluster with uncontrolled
gfs/dlm instances (it still needs to be fenced).
In cluster2, the groupd cpg filled the role of this new fenced cpg in
advertising the clean/reset state of a node.
Signed-off-by: David Teigland <teigland@redhat.com>
---
fence/fenced/cpg.c | 120 ++++++++++++++++++++++++++++++++++++++++++-
fence/fenced/fd.h | 5 ++
fence/fenced/main.c | 134 ++++++++++++++++++++++++++++++++++++++++++++----
fence/fenced/recover.c | 22 ++++----
4 files changed, 259 insertions(+), 22 deletions(-)
diff --git a/fence/fenced/cpg.c b/fence/fenced/cpg.c
index ccebbd9..5b6c826 100644
--- a/fence/fenced/cpg.c
+++ b/fence/fenced/cpg.c
@@ -2,6 +2,9 @@
#include "config.h"
static unsigned int protocol_active[3] = {1, 0, 0};
+static cpg_handle_t cpg_handle_daemon;
+static struct cpg_address daemon_member_list[MAX_NODES];
+static int daemon_member_list_entries;
struct member {
struct list_head list;
@@ -1308,7 +1311,6 @@ int fd_join(struct fd *fd)
}
if (error != CPG_OK) {
log_error("cpg_join error %d", error);
- cpg_finalize(h);
goto fail;
}
@@ -1349,6 +1351,122 @@ int fd_leave(struct fd *fd)
return 0;
}
+/* process_cpg(), setup_cpg(), close_cpg() are for the "daemon" cpg which
+ tracks the presence of other daemons; it's not the fenced domain cpg.
+ Joining this cpg tells others that we don't have uncontrolled dlm/gfs
+ kernel state and they can skip fencing us if we're a victim. (We have
+ to check for that uncontrolled state before calling setup_cpg, obviously.) */
+
+static void deliver_cb_daemon(cpg_handle_t handle, struct cpg_name *group_name,
+ uint32_t nodeid, uint32_t pid, void *data, int len)
+{
+}
+
+static void confchg_cb_daemon(cpg_handle_t handle, struct cpg_name *group_name,
+ struct cpg_address *member_list, int member_list_entries,
+ struct cpg_address *left_list, int left_list_entries,
+ struct cpg_address *joined_list, int joined_list_entries)
+{
+ memset(&daemon_member_list, 0, sizeof(daemon_member_list));
+ memcpy(&daemon_member_list, member_list,
+ member_list_entries * sizeof(struct cpg_address));
+ daemon_member_list_entries = member_list_entries;
+}
+
+static cpg_callbacks_t cpg_callbacks_daemon = {
+ .cpg_deliver_fn = deliver_cb_daemon,
+ .cpg_confchg_fn = confchg_cb_daemon,
+};
+
+void process_cpg(int ci)
+{
+ cpg_error_t error;
+
+ error = cpg_dispatch(cpg_handle_daemon, CPG_DISPATCH_ALL);
+ if (error != CPG_OK)
+ log_error("daemon cpg_dispatch error %d", error);
+}
+
+int in_daemon_member_list(int nodeid)
+{
+ int i;
+
+ cpg_dispatch(cpg_handle_daemon, CPG_DISPATCH_ALL);
+
+ for (i = 0; i < daemon_member_list_entries; i++) {
+ if (daemon_member_list[i].nodeid == nodeid)
+ return 1;
+ }
+ return 0;
+}
+
+int setup_cpg(void)
+{
+ cpg_error_t error;
+ cpg_handle_t h;
+ struct cpg_name name;
+ int i = 0, f;
+
+ error = cpg_initialize(&h, &cpg_callbacks_daemon);
+ if (error != CPG_OK) {
+ log_error("daemon cpg_initialize error %d", error);
+ goto fail;
+ }
+
+ cpg_fd_get(h, &f);
+
+ cpg_handle_daemon = h;
+
+ memset(&name, 0, sizeof(name));
+ sprintf(name.value, "fenced:daemon");
+ name.length = strlen(name.value) + 1;
+
+ retry:
+ error = cpg_join(h, &name);
+ if (error == CPG_ERR_TRY_AGAIN) {
+ sleep(1);
+ if (!(++i % 10))
+ log_error("daemon cpg_join error retrying");
+ goto retry;
+ }
+ if (error != CPG_OK) {
+ log_error("daemon cpg_join error %d", error);
+ goto fail;
+ }
+
+ log_debug("setup_cpg %d", f);
+ return f;
+
+ fail:
+ cpg_finalize(h);
+ return -1;
+}
+
+void close_cpg(void)
+{
+ cpg_error_t error;
+ struct cpg_name name;
+ int i = 0;
+
+ if (!cpg_handle_daemon)
+ return;
+
+ memset(&name, 0, sizeof(name));
+ sprintf(name.value, "fenced:daemon");
+ name.length = strlen(name.value) + 1;
+
+ retry:
+ error = cpg_leave(cpg_handle_daemon, &name);
+ if (error == CPG_ERR_TRY_AGAIN) {
+ sleep(1);
+ if (!(++i % 10))
+ log_error("daemon cpg_leave error retrying");
+ goto retry;
+ }
+ if (error != CPG_OK)
+ log_error("daemon cpg_leave error %d", error);
+}
+
int set_node_info(struct fd *fd, int nodeid, struct fenced_node *nodeinfo)
{
struct node_history *node;
diff --git a/fence/fenced/fd.h b/fence/fenced/fd.h
index 21cac39..a9dacbd 100644
--- a/fence/fenced/fd.h
+++ b/fence/fenced/fd.h
@@ -13,6 +13,7 @@
#include <time.h>
#include <sched.h>
#include <limits.h>
+#include <dirent.h>
#include <sys/ioctl.h>
#include <sys/types.h>
#include <sys/stat.h>
@@ -203,6 +204,9 @@ int read_ccs(struct fd *fd);
/* cpg.c */
+void process_cpg(int ci);
+int setup_cpg(void);
+void close_cpg(void);
void free_cg(struct change *cg);
void node_history_fence(struct fd *fd, int victim, int master, int how,
uint64_t mastertime);
@@ -216,6 +220,7 @@ int set_node_info(struct fd *fd, int nodeid, struct fenced_node *node);
int set_domain_info(struct fd *fd, struct fenced_domain *domain);
int set_domain_nodes(struct fd *fd, int option, int *node_count,
struct fenced_node **nodes);
+int in_daemon_member_list(int nodeid);
/* group.c */
diff --git a/fence/fenced/main.c b/fence/fenced/main.c
index ae5f662..01937f1 100644
--- a/fence/fenced/main.c
+++ b/fence/fenced/main.c
@@ -12,6 +12,7 @@ static struct client *client = NULL;
static struct pollfd *pollfd = NULL;
static pthread_t query_thread;
static pthread_mutex_t query_mutex;
+static struct list_head controlled_entries;
struct client {
int fd;
@@ -602,6 +603,108 @@ static int setup_queries(void)
return 0;
}
+struct controlled_entry {
+ struct list_head list;
+ char path[PATH_MAX+1];
+};
+
+static void register_controlled_dir(char *path)
+{
+ struct controlled_entry *ce;
+
+ ce = malloc(sizeof(struct controlled_entry));
+ if (!ce)
+ return;
+ memset(ce, 0, sizeof(struct controlled_entry));
+ strncpy(ce->path, path, PATH_MAX);
+ list_add(&ce->list, &controlled_entries);
+}
+
+static int ignore_nolock(char *sysfs_dir, char *table)
+{
+ char path[PATH_MAX];
+ int fd;
+
+ memset(path, 0, PATH_MAX);
+
+ snprintf(path, PATH_MAX, "%s/%s/lock_module/proto_name",
+ sysfs_dir, table);
+
+ /* lock_nolock doesn't create the "lock_module" dir at all,
+ so we'll fail to open this */
+
+ fd = open(path, O_RDONLY);
+ if (fd < 0)
+ return 1;
+
+ close(fd);
+ return 0;
+}
+
+static int check_controlled_dir(char *path)
+{
+ DIR *d;
+ struct dirent *de;
+ int count = 0;
+
+ d = opendir(path);
+ if (!d)
+ return 0;
+
+ while ((de = readdir(d))) {
+ if (de->d_name[0] == '.')
+ continue;
+
+ if (strstr(path, "fs/gfs") && ignore_nolock(path, de->d_name))
+ continue;
+
+ log_error("found uncontrolled entry %s/%s", path, de->d_name);
+ count++;
+ }
+ closedir(d);
+
+ return count;
+}
+
+/* Joining the "fenced:daemon" cpg (in setup_cpg()) tells fenced on other
+ nodes that we are in a "clean state", and don't need fencing. So, if
+ we're a pending fence victim on another node, they'll skip fencing us
+ once we start fenced and join the "daemon" cpg (it's not the fence domain
+ cpg which we join when fence_tool join is run). This "daemon" cpg is just
+ to notify others that we have no uncontrolled gfs/dlm objects.
+ (Conceptually, we could use the fence domain cpg for this purpose instead,
+ but that would require processing domain membership changes during
+ fence_victims(), which would be a major change in the way the daemon works.)
+
+ So, if we (the local node) are *not* in a clean state, we don't join the
+ daemon cpg and we exit; we still need to be fenced. If we are starting
+ up and find that instances of gfs/dlm in the kernel have been previously
+ abandoned, that's an unclean, unreset state, and we still need fencing. */
+
+static int check_uncontrolled_entries(void)
+{
+ struct controlled_entry *ce;
+ int count = 0;
+
+ list_for_each_entry(ce, &controlled_entries, list) {
+ if (strncmp(ce->path, "-", 1))
+ goto skip_default;
+ }
+
+ /* the default dirs to check */
+ register_controlled_dir("/sys/kernel/dlm");
+ register_controlled_dir("/sys/fs/gfs2");
+ register_controlled_dir("/sys/fs/gfs");
+
+ skip_default:
+ list_for_each_entry(ce, &controlled_entries, list)
+ count += check_controlled_dir(ce->path);
+
+ if (count)
+ return -1;
+ return 0;
+}
+
void cluster_dead(int ci)
{
log_error("cluster is down, exiting");
@@ -634,6 +737,15 @@ static void loop(void)
setup_logging();
+ rv = check_uncontrolled_entries();
+ if (rv < 0)
+ goto out;
+
+ rv = setup_cpg();
+ if (rv < 0)
+ goto out;
+ client_add(rv, process_cpg, cluster_dead);
+
group_mode = GROUP_LIBCPG;
if (cfgd_groupd_compat) {
@@ -648,15 +760,6 @@ static void loop(void)
}
log_debug("group_mode %d compat %d", group_mode, cfgd_groupd_compat);
- if (group_mode == GROUP_LIBCPG) {
- /*
- rv = setup_cpg();
- if (rv < 0)
- goto out;
- client_add(rv, process_cpg, cluster_dead);
- */
- }
-
for (;;) {
rv = poll(pollfd, client_maxi + 1, -1);
if (rv == -1 && errno == EINTR) {
@@ -692,6 +795,7 @@ static void loop(void)
out:
if (cfgd_groupd_compat)
close_groupd();
+ close_cpg();
close_logging();
close_ccs();
close_cman();
@@ -757,7 +861,10 @@ static void print_usage(void)
printf(" 1: use libgroup for compat with cluster2/rhel5\n");
printf(" 2: use groupd to detect old, or mode 1, nodes that\n"
" require compat, use libcpg if none found\n");
- printf(" -c All nodes are in a clean state to start\n");
+ printf(" -r <path> Register a directory that needs to be empty for\n");
+ printf(" the daemon to start. \"-\" to skip default directories\n");
+ printf(" /sys/fs/gfs, /sys/fs/gfs2, /sys/kernel/dlm\n");
+ printf(" -c All nodes are in a clean state to start; do no startup fencing\n");
printf(" -s Skip startup fencing of nodes with no defined fence methods\n");
printf(" -j <secs> Post-join fencing delay (default %d)\n", DEFAULT_POST_JOIN_DELAY);
printf(" -f <secs> Post-fail fencing delay (default %d)\n", DEFAULT_POST_FAIL_DELAY);
@@ -772,7 +879,7 @@ static void print_usage(void)
printf("\n");
}
-#define OPTION_STRING "L:g:cj:f:Dn:O:hVSs"
+#define OPTION_STRING "L:g:cj:f:Dn:O:hVSse:r:"
static void read_arguments(int argc, char **argv)
{
@@ -830,6 +937,10 @@ static void read_arguments(int argc, char **argv)
cfgd_override_path = strdup(optarg);
break;
+ case 'r':
+ register_controlled_dir(optarg);
+ break;
+
case 'h':
print_usage();
exit(EXIT_SUCCESS);
@@ -879,6 +990,7 @@ static void set_oom_adj(int val)
int main(int argc, char **argv)
{
INIT_LIST_HEAD(&domains);
+ INIT_LIST_HEAD(&controlled_entries);
init_logging();
diff --git a/fence/fenced/recover.c b/fence/fenced/recover.c
index d70a876..ecb13d5 100644
--- a/fence/fenced/recover.c
+++ b/fence/fenced/recover.c
@@ -62,7 +62,8 @@ static int reduce_victims(struct fd *fd)
num_victims = list_count(&fd->victims);
list_for_each_entry_safe(node, safe, &fd->victims, list) {
- if (is_cman_member(node->nodeid)) {
+ if (is_cman_member(node->nodeid) &&
+ in_daemon_member_list(node->nodeid)) {
log_debug("reduce victim %s", node->name);
victim_done(fd, node->nodeid, VIC_DONE_MEMBER);
list_del(&node->list);
@@ -235,23 +236,24 @@ void fence_victims(struct fd *fd)
struct node *node;
int error;
int override = -1;
- int member, fenced;
+ int cman_member, cpg_member, ext;
while (!list_empty(&fd->victims)) {
node = list_entry(fd->victims.next, struct node, list);
- member = is_cman_member(node->nodeid);
+ cman_member = is_cman_member(node->nodeid);
+ cpg_member = in_daemon_member_list(node->nodeid);
if (group_mode == GROUP_LIBCPG)
- fenced = is_fenced_external(fd, node->nodeid);
+ ext = is_fenced_external(fd, node->nodeid);
else
- fenced = 0;
+ ext = 0;
- if (member || fenced) {
+ if ((cman_member && cpg_member) || ext) {
log_debug("averting fence of node %s "
- "member %d external %d",
- node->name, member, fenced);
- victim_done(fd, node->nodeid, member ? VIC_DONE_MEMBER :
- VIC_DONE_EXTERNAL);
+ "cman member %d cpg member %d external %d",
+ node->name, cman_member, cpg_member, ext);
+ victim_done(fd, node->nodeid,
+ ext ? VIC_DONE_EXTERNAL : VIC_DONE_MEMBER);
list_del(&node->list);
free(node);
continue;
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2008-09-03 21:29 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-09-04 21:27 master - fenced: joining daemon cpg to bypass fencing David Teigland
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).