public inbox for cluster-cvs@sourceware.org
help / color / mirror / Atom feed
* master - rgmanager: Permit careful restart w/o disturbing services
@ 2008-09-24 17:58 Lon Hohberger
0 siblings, 0 replies; only message in thread
From: Lon Hohberger @ 2008-09-24 17:58 UTC (permalink / raw)
To: cluster-cvs-relay
Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=9f0d36f3fee3c7c00e2f5cf7cbd9ea878220a69d
Commit: 9f0d36f3fee3c7c00e2f5cf7cbd9ea878220a69d
Parent: 63f9fc14ecee65b107bc0fe2d8c745839a24fbd0
Author: Lon Hohberger <lhh@redhat.com>
AuthorDate: Mon Sep 8 11:59:36 2008 -0400
Committer: Lon Hohberger <lhh@redhat.com>
CommitterDate: Wed Sep 24 13:39:56 2008 -0400
rgmanager: Permit careful restart w/o disturbing services
... e.g. for upgrades of rgmanager in-place for example.
Note: Requires service-freeze patch
Example use:
* Manually freeze all services on a node.
* Stop rgmanager (service rgmanager stop)
* Upgrade rgmanager package
* Manually start rgmanager from the command line
'clurgmgrd -N'
---
rgmanager/include/resgroup.h | 2 +
rgmanager/man/clurgmgrd.8 | 13 +++++++++-
rgmanager/man/clusvcadm.8 | 28 +++++++++++++++++++++++
rgmanager/src/clulib/rg_strings.c | 23 ++++++++++++------
rgmanager/src/daemons/fo_domain.c | 2 +-
rgmanager/src/daemons/groups.c | 23 +++++++++++-------
rgmanager/src/daemons/main.c | 11 +++++---
rgmanager/src/daemons/rg_state.c | 2 +-
rgmanager/src/daemons/slang_event.c | 9 ++++++-
rgmanager/src/resources/default_event_script.sl | 3 +-
10 files changed, 90 insertions(+), 26 deletions(-)
diff --git a/rgmanager/include/resgroup.h b/rgmanager/include/resgroup.h
index a4a55b0..c79924c 100644
--- a/rgmanager/include/resgroup.h
+++ b/rgmanager/include/resgroup.h
@@ -137,6 +137,8 @@ int handle_start_remote_req(char *svcName, int req);
#define RG_FLAG_FROZEN (1<<0) /** Resource frozen */
const char *rg_state_str(int val);
+const char *rg_flag_str(int val);
+const char *rg_flags_str(char *flags_string, size_t size, int val, char *separator);
int rg_state_str_to_id(const char *val);
const char *rg_flags_str(char *flags_string, size_t size, int val, char *separator);
const char *agent_op_str(int val);
diff --git a/rgmanager/man/clurgmgrd.8 b/rgmanager/man/clurgmgrd.8
index bbde0f2..7b43925 100644
--- a/rgmanager/man/clurgmgrd.8
+++ b/rgmanager/man/clurgmgrd.8
@@ -26,5 +26,16 @@ the member has been fenced whenever fencing is available.
When a cluster member determines that it is no longer in the cluster quorum,
the service manager stops all services and waits for a new quorum to form.
+.SH "COMMAND LINE OPTIONS"
+.IP \-f
+Run in the foreground (do not fork).
+.IP \-d
+Enable debug-level logging.
+.IP \-N
+Do not perform stop-before-start. Combined with the
+.I -Z
+flag to clusvcadm, this can be used to allow rgmanager to be upgraded
+without stopping a given user service or set of services.
+
.SH "SEE ALSO"
-clurmtabd(8), ccsd(8)
+clusvcadm(8), ccsd(8)
diff --git a/rgmanager/man/clusvcadm.8 b/rgmanager/man/clusvcadm.8
index 20ae823..155ac88 100644
--- a/rgmanager/man/clusvcadm.8
+++ b/rgmanager/man/clusvcadm.8
@@ -22,6 +22,12 @@ clusvcadm \- Cluster User Service Administration Utility
.B [\-R
.I <service>
.B ]
+.B [\-Z
+.I <service>
+.B ]
+.B [\-U
+.I <service>
+.B ]
.B [\-s
.I <service>
.B ]
@@ -73,6 +79,19 @@ Restarts the user service named
.I
service
on the cluster member on which it is currently running.
+.IP "\-Z <service>"
+Freezes the service named
+.I
+service
+on the cluster member on which it is currently running. This will
+prevent status checks of the service as well as failover in the
+event the node fails or rgmanager is stopped.
+.IP "\-U <service>"
+Unfreezes the user service named
+.I
+service
+on the cluster member on which it is currently running. This will
+re-enable status checks.
.IP "\-S"
Display whether each of the active service managers is locked or not. This
can be used to verify the correct operation of the \fB-l\fR and \fB-u\fR
@@ -89,5 +108,14 @@ again.
.IP \-v
Display version information and exit.
+.SH "NOTES"
+Executing
+.I -U
+(unfreeze) on a service which was frozen in the
+.B started
+state while the service owner is offline results in an undefined
+(and possibly dangerous) condition. Manually ensure all resources are
+clear before doing this.
+
.SH "SEE ALSO"
clustat(8)
diff --git a/rgmanager/src/clulib/rg_strings.c b/rgmanager/src/clulib/rg_strings.c
index 6641fc5..8c613bf 100644
--- a/rgmanager/src/clulib/rg_strings.c
+++ b/rgmanager/src/clulib/rg_strings.c
@@ -145,12 +145,12 @@ rg_search_table_by_str(const struct string_val *table, const char *val)
}
-
const char *
rg_strerror(int val)
{
return rg_search_table(rg_error_strings, val);
}
+
const char *
rg_state_str(int val)
@@ -165,6 +165,19 @@ rg_state_str_to_id(const char *val)
}
+const char *
+rg_req_str(int val)
+{
+ return rg_search_table(rg_req_strings, val);
+}
+
+
+const char *
+rg_flag_str(int val)
+{
+ return rg_search_table(rg_flags_strings, val);
+}
+
const char *
rg_flags_str(char *flags_string, size_t size, int val, char *separator)
@@ -172,7 +185,7 @@ rg_flags_str(char *flags_string, size_t size, int val, char *separator)
int i;
const char *string;
- for (i = 0; i < sizeof(uint32_t); i++) {
+ for (i = 0; i < (sizeof(val) * 8); i++) {
if ( val & (1 << i)) {
if (strlen(flags_string))
strncat(flags_string, separator, size - (strlen(flags_string) + strlen(separator) + 1));
@@ -183,12 +196,6 @@ rg_flags_str(char *flags_string, size_t size, int val, char *separator)
return flags_string;
}
-const char *
-rg_req_str(int val)
-{
- return rg_search_table(rg_req_strings, val);
-}
-
const char *
agent_op_str(int val)
diff --git a/rgmanager/src/daemons/fo_domain.c b/rgmanager/src/daemons/fo_domain.c
index 97f244c..6104c40 100644
--- a/rgmanager/src/daemons/fo_domain.c
+++ b/rgmanager/src/daemons/fo_domain.c
@@ -354,7 +354,7 @@ node_domain_set(fod_t **domains, char *name, int **ret, int *retlen, int *flags)
int ts_count;
fod_node_t *fodn;
fod_t *domain;
- int rv = -1, found = 0;
+ int found = 0;
list_for(domains, domain, x) {
if (!strcasecmp(domain->fd_name, name)) {
diff --git a/rgmanager/src/daemons/groups.c b/rgmanager/src/daemons/groups.c
index 3927479..f656977 100644
--- a/rgmanager/src/daemons/groups.c
+++ b/rgmanager/src/daemons/groups.c
@@ -12,6 +12,7 @@
#include <reslist.h>
#include <assert.h>
#include <event.h>
+#include <sets.h>
/* Use address field in this because we never use it internally,
and there is no extra space in the cman_node_t type.
@@ -410,15 +411,15 @@ check_depend_safe(char *rg_name)
int
check_rdomain_crash(char *svcName)
{
- int *nodes = NULL, nodecount;
- int *fd_nodes = NULL, fd_nodecount, fl;
- int *isect = NULL, icount;
+ int *nodes = NULL, nodecount = 0;
+ int *fd_nodes = NULL, fd_nodecount = 0, fl = 0;
+ int *isect = NULL, icount = 0;
char fd_name[256];
if (_group_property(svcName, "domain", fd_name, sizeof(fd_name)) != 0)
goto out_free;
- if (node_domain_set(_domains, fd_name, &fd_nodes,
+ if (node_domain_set(&_domains, fd_name, &fd_nodes,
&fd_nodecount, &fl) != 0)
goto out_free;
@@ -1597,7 +1598,7 @@ dump_config_version(FILE *fp)
resource group modification.
*/
int
-init_resource_groups(int reconfigure)
+init_resource_groups(int reconfigure, int do_init)
{
int fd, x, y, cnt;
@@ -1724,10 +1725,14 @@ init_resource_groups(int reconfigure)
clulog(LOG_INFO, "Restarting changed resources.\n");
do_condstarts();
} else {
- /* Do initial stop-before-start */
- clulog(LOG_INFO, "Initializing Services\n");
- rg_doall(RG_INIT, 1, "Initializing %s\n");
- clulog(LOG_INFO, "Services Initialized\n");
+ if (do_init) {
+ /* Do initial stop-before-start */
+ clulog(LOG_INFO, "Initializing Services\n");
+ rg_doall(RG_INIT, 1, "Initializing %s\n");
+ clulog(LOG_INFO, "Services Initialized\n");
+ } else {
+ clulog(LOG_INFO, "Skipping stop-before-start: overridden by administrator\n");
+ }
rg_set_initialized();
}
diff --git a/rgmanager/src/daemons/main.c b/rgmanager/src/daemons/main.c
index 9c4f842..89bdcd1 100644
--- a/rgmanager/src/daemons/main.c
+++ b/rgmanager/src/daemons/main.c
@@ -31,7 +31,7 @@ void set_transition_throttling(int);
void node_event(int, int, int, int);
void node_event_q(int, int, int, int);
int daemon_init(char *);
-int init_resource_groups(int);
+int init_resource_groups(int, int);
void kill_resource_groups(void);
void set_my_id(int);
void flag_shutdown(int sig);
@@ -924,7 +924,7 @@ void dump_thread_states(FILE *);
int
main(int argc, char **argv)
{
- int rv;
+ int rv, do_init = 1;
char foreground = 0, wd = 1;
cman_node_t me;
msgctx_t *cluster_ctx;
@@ -932,7 +932,7 @@ main(int argc, char **argv)
pthread_t th;
cman_handle_t clu = NULL;
- while ((rv = getopt(argc, argv, "wfd")) != EOF) {
+ while ((rv = getopt(argc, argv, "wfdN")) != EOF) {
switch (rv) {
case 'w':
wd = 0;
@@ -940,6 +940,9 @@ main(int argc, char **argv)
case 'd':
debug = 1;
break;
+ case 'N':
+ do_init = 0;
+ break;
case 'f':
foreground = 1;
break;
@@ -1005,7 +1008,7 @@ main(int argc, char **argv)
configure_rgmanager(-1, debug);
clulog(LOG_NOTICE, "Resource Group Manager Starting\n");
- if (init_resource_groups(0) != 0) {
+ if (init_resource_groups(0, do_init) != 0) {
clulog(LOG_CRIT, "#8: Couldn't initialize services\n");
return -1;
}
diff --git a/rgmanager/src/daemons/rg_state.c b/rgmanager/src/daemons/rg_state.c
index c57b148..14a1d5e 100644
--- a/rgmanager/src/daemons/rg_state.c
+++ b/rgmanager/src/daemons/rg_state.c
@@ -1551,7 +1551,7 @@ _svc_freeze(char *svcName, int enabled)
default:
rg_unlock(&lockp);
- return RG_EFAIL;
+ return RG_EAGAIN;
break;
}
diff --git a/rgmanager/src/daemons/slang_event.c b/rgmanager/src/daemons/slang_event.c
index 737e01a..01fff05 100644
--- a/rgmanager/src/daemons/slang_event.c
+++ b/rgmanager/src/daemons/slang_event.c
@@ -248,7 +248,14 @@ sl_service_status(char *svcName)
return;
}
- state_str = strdup(rg_state_str(svcStatus.rs_state));
+ if (svcStatus.rs_flags & RG_FLAG_FROZEN) {
+ /* Special case: "frozen" is a flag, but user scripts should
+ treat it as a state. */
+ state_str = strdup(rg_flag_str(RG_FLAG_FROZEN));
+ } else {
+ state_str = strdup(rg_state_str(svcStatus.rs_state));
+ }
+
if (!state_str) {
SLang_verror(SL_RunTime_Error,
"%s: Failed to duplicate state of %s",
diff --git a/rgmanager/src/resources/default_event_script.sl b/rgmanager/src/resources/default_event_script.sl
index df9bce0..3f1379a 100644
--- a/rgmanager/src/resources/default_event_script.sl
+++ b/rgmanager/src/resources/default_event_script.sl
@@ -36,7 +36,8 @@ define move_or_start(service, node_list)
return ERR_DOMAIN;
}
- if (((event_type != EVENT_USER) and (state == "disabled")) or (state == "failed")) {
+ if (((event_type != EVENT_USER) and (state == "disabled")) or
+ ((state == "failed") or (state == "frozen"))) {
%
% Commenting out this block will -not- allow you to
% recover failed services from event scripts. Sorry.
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2008-09-24 17:57 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-09-24 17:58 master - rgmanager: Permit careful restart w/o disturbing services Lon Hohberger
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).