public inbox for cluster-cvs@sourceware.org
help / color / mirror / Atom feed
* RHEL5 - rgmanager: Permit careful restart w/o disturbing services
@ 2008-08-26 14:33 Lon Hohberger
  0 siblings, 0 replies; only message in thread
From: Lon Hohberger @ 2008-08-26 14:33 UTC (permalink / raw)
  To: cluster-cvs-relay

Gitweb:        http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=acf1eaef5132ef56bb29e333bc9ee35117b7da1c
Commit:        acf1eaef5132ef56bb29e333bc9ee35117b7da1c
Parent:        c84780c201b7efdb63dffe0e6182f6b1b2d054a7
Author:        Lon Hohberger <lhh@redhat.com>
AuthorDate:    Mon Aug 25 17:43:25 2008 -0400
Committer:     Lon Hohberger <lhh@redhat.com>
CommitterDate: Mon Aug 25 17:43:25 2008 -0400

rgmanager: Permit careful restart w/o disturbing services

... e.g. for upgrades of rgmanager in-place for example.

Note: Requires service-freeze patch

Example use:
 * Manually freeze all services on a node.
 * Stop rgmanager (service rgmanager stop)
 * Upgrade rgmanager package
 * Manually start rgmanager from the command line
   'clurgmgrd -N'
---
 rgmanager/include/resgroup.h                    |    1 +
 rgmanager/man/clurgmgrd.8                       |   13 +++++++++-
 rgmanager/man/clusvcadm.8                       |   28 +++++++++++++++++++++++
 rgmanager/src/clulib/rg_strings.c               |   27 ++++++++--------------
 rgmanager/src/daemons/groups.c                  |   14 +++++++----
 rgmanager/src/daemons/main.c                    |   11 +++++---
 rgmanager/src/daemons/rg_state.c                |    2 +-
 rgmanager/src/daemons/slang_event.c             |    9 ++++++-
 rgmanager/src/resources/default_event_script.sl |    3 +-
 9 files changed, 78 insertions(+), 30 deletions(-)

diff --git a/rgmanager/include/resgroup.h b/rgmanager/include/resgroup.h
index 36b002f..86ffbb3 100644
--- a/rgmanager/include/resgroup.h
+++ b/rgmanager/include/resgroup.h
@@ -120,6 +120,7 @@ int handle_start_remote_req(char *svcName, int req);
 #define RG_FLAG_FROZEN			(1<<0)	/** Resource frozen */
 
 const char *rg_state_str(int val);
+const char *rg_flag_str(int val);
 const char *rg_flags_str(char *flags_string, size_t size, int val, char *separator);
 int rg_state_str_to_id(const char *val);
 const char *agent_op_str(int val);
diff --git a/rgmanager/man/clurgmgrd.8 b/rgmanager/man/clurgmgrd.8
index bbde0f2..7b43925 100644
--- a/rgmanager/man/clurgmgrd.8
+++ b/rgmanager/man/clurgmgrd.8
@@ -26,5 +26,16 @@ the member has been fenced whenever fencing is available.
 When a cluster member determines that it is no longer in the cluster quorum,
 the service manager stops all services and waits for a new quorum to form.
 
+.SH "COMMAND LINE OPTIONS"
+.IP \-f
+Run in the foreground (do not fork).
+.IP \-d
+Enable debug-level logging.
+.IP \-N
+Do not perform stop-before-start.  Combined with the
+.I -Z
+flag to clusvcadm, this can be used to allow rgmanager to be upgraded
+without stopping a given user service or set of services.
+
 .SH "SEE ALSO"
-clurmtabd(8), ccsd(8)
+clusvcadm(8), ccsd(8)
diff --git a/rgmanager/man/clusvcadm.8 b/rgmanager/man/clusvcadm.8
index 20ae823..155ac88 100644
--- a/rgmanager/man/clusvcadm.8
+++ b/rgmanager/man/clusvcadm.8
@@ -22,6 +22,12 @@ clusvcadm \- Cluster User Service Administration Utility
 .B [\-R
 .I <service>
 .B ]
+.B [\-Z
+.I <service>
+.B ]
+.B [\-U
+.I <service>
+.B ]
 .B [\-s
 .I <service>
 .B ]
@@ -73,6 +79,19 @@ Restarts the user service named
 .I
 service
 on the cluster member on which it is currently running.
+.IP "\-Z <service>"
+Freezes the service named
+.I
+service
+on the cluster member on which it is currently running.  This will
+prevent status checks of the service as well as failover in the
+event the node fails or rgmanager is stopped.
+.IP "\-U <service>"
+Unfreezes the user service named
+.I
+service
+on the cluster member on which it is currently running.  This will
+re-enable status checks.
 .IP "\-S"
 Display whether each of the active service managers is locked or not.  This
 can be used to verify the correct operation of the \fB-l\fR and \fB-u\fR 
@@ -89,5 +108,14 @@ again.
 .IP \-v
 Display version information and exit.
 
+.SH "NOTES"
+Executing
+.I -U
+(unfreeze) on a service which was frozen in the 
+.B started
+state while the service owner is offline results in an undefined
+(and possibly dangerous) condition.  Manually ensure all resources are
+clear before doing this.
+
 .SH "SEE ALSO"
 clustat(8)
diff --git a/rgmanager/src/clulib/rg_strings.c b/rgmanager/src/clulib/rg_strings.c
index a215eca..9047f0f 100644
--- a/rgmanager/src/clulib/rg_strings.c
+++ b/rgmanager/src/clulib/rg_strings.c
@@ -150,26 +150,12 @@ rg_search_table_by_str(const struct string_val *table, const char *val)
 }
 
 
-
-static inline const char *
-rg_flag_search_table(const struct string_val *table, int val)
-{
-	int x;
-
-	for (x = 0; table[x].str != NULL; x++) {
-		if (table[x].val == val) {
-			return table[x].str;
-		}
-	}
-
-	return "Unknown";
-}
-
 const char *
 rg_strerror(int val)
 {
 	return rg_search_table(rg_error_strings, val);
 }
+
 	
 const char *
 rg_state_str(int val)
@@ -185,20 +171,27 @@ rg_state_str_to_id(const char *val)
 }
 
 
-
 const char *
 rg_req_str(int val)
 {
 	return rg_search_table(rg_req_strings, val);
 }
 
+
+const char *
+rg_flag_str(int val)
+{
+	return rg_search_table(rg_flags_strings, val);
+}
+
+
 const char *
 rg_flags_str(char *flags_string, size_t size, int val, char *separator)
 {
 	int i;
 	const char *string;
 
-	for (i = 0; i < sizeof(uint32_t); i++) {
+	for (i = 0; i < (sizeof(val) * 8); i++) {
 		if ( val & (1 << i)) {
 			if (strlen(flags_string))
 				strncat(flags_string, separator, size - (strlen(flags_string) + strlen(separator) + 1));
diff --git a/rgmanager/src/daemons/groups.c b/rgmanager/src/daemons/groups.c
index 2d8ae79..921cf99 100644
--- a/rgmanager/src/daemons/groups.c
+++ b/rgmanager/src/daemons/groups.c
@@ -1615,7 +1615,7 @@ dump_config_version(FILE *fp)
   resource group modification.
  */
 int
-init_resource_groups(int reconfigure)
+init_resource_groups(int reconfigure, int do_init)
 {
 	int fd, x, y, cnt;
 
@@ -1742,10 +1742,14 @@ init_resource_groups(int reconfigure)
 		clulog(LOG_INFO, "Restarting changed resources.\n");
 		do_condstarts();
 	} else {
-		/* Do initial stop-before-start */
-		clulog(LOG_INFO, "Initializing Services\n");
-		rg_doall(RG_INIT, 1, "Initializing %s\n");
-		clulog(LOG_INFO, "Services Initialized\n");
+		if (do_init) {
+			/* Do initial stop-before-start */
+			clulog(LOG_INFO, "Initializing Services\n");
+			rg_doall(RG_INIT, 1, "Initializing %s\n");
+			clulog(LOG_INFO, "Services Initialized\n");
+		} else {
+			clulog(LOG_INFO, "Skipping stop-before-start: overridden by administrator\n");
+		}
 		rg_set_initialized();
 	}
 
diff --git a/rgmanager/src/daemons/main.c b/rgmanager/src/daemons/main.c
index a3d3441..8401bca 100644
--- a/rgmanager/src/daemons/main.c
+++ b/rgmanager/src/daemons/main.c
@@ -49,7 +49,7 @@ int configure_rgmanager(int ccsfd, int debug);
 void node_event(int, int, int, int);
 void node_event_q(int, int, int, int);
 int daemon_init(char *);
-int init_resource_groups(int);
+int init_resource_groups(int, int);
 void kill_resource_groups(void);
 void set_my_id(int);
 void flag_shutdown(int sig);
@@ -941,7 +941,7 @@ void dump_thread_states(FILE *);
 int
 main(int argc, char **argv)
 {
-	int rv;
+	int rv, do_init = 1;
 	char foreground = 0, wd = 1;
 	cman_node_t me;
 	msgctx_t *cluster_ctx;
@@ -949,7 +949,7 @@ main(int argc, char **argv)
 	pthread_t th;
 	cman_handle_t clu = NULL;
 
-	while ((rv = getopt(argc, argv, "wfd")) != EOF) {
+	while ((rv = getopt(argc, argv, "wfdN")) != EOF) {
 		switch (rv) {
 		case 'w':
 			wd = 0;
@@ -957,6 +957,9 @@ main(int argc, char **argv)
 		case 'd':
 			debug = 1;
 			break;
+		case 'N':
+			do_init = 0;
+			break;
 		case 'f':
 			foreground = 1;
 			break;
@@ -1022,7 +1025,7 @@ main(int argc, char **argv)
 	configure_rgmanager(-1, debug);
 	clulog(LOG_NOTICE, "Resource Group Manager Starting\n");
 
-	if (init_resource_groups(0) != 0) {
+	if (init_resource_groups(0, do_init) != 0) {
 		clulog(LOG_CRIT, "#8: Couldn't initialize services\n");
 		return -1;
 	}
diff --git a/rgmanager/src/daemons/rg_state.c b/rgmanager/src/daemons/rg_state.c
index e99fc07..ad79038 100644
--- a/rgmanager/src/daemons/rg_state.c
+++ b/rgmanager/src/daemons/rg_state.c
@@ -1577,7 +1577,7 @@ _svc_freeze(char *svcName, int enabled)
 
 	default:
 		rg_unlock(&lockp);
-		return RG_EFAIL;
+		return RG_EAGAIN;
 		break;
 	}
 
diff --git a/rgmanager/src/daemons/slang_event.c b/rgmanager/src/daemons/slang_event.c
index 88e9925..c3c337c 100644
--- a/rgmanager/src/daemons/slang_event.c
+++ b/rgmanager/src/daemons/slang_event.c
@@ -287,7 +287,14 @@ sl_service_status(char *svcName)
 		return;
 	}
 
-	state_str = strdup(rg_state_str(svcStatus.rs_state));
+	if (svcStatus.rs_flags & RG_FLAG_FROZEN) {
+		/* Special case: "frozen" is a flag, but user scripts should
+		   treat it as a state. */
+		state_str = strdup(rg_flag_str(RG_FLAG_FROZEN));
+	} else {
+		state_str = strdup(rg_state_str(svcStatus.rs_state));
+	}
+
 	if (!state_str) {
 		SLang_verror(SL_RunTime_Error,
 			     "%s: Failed to duplicate state of %s",
diff --git a/rgmanager/src/resources/default_event_script.sl b/rgmanager/src/resources/default_event_script.sl
index 327f8c9..196eb13 100644
--- a/rgmanager/src/resources/default_event_script.sl
+++ b/rgmanager/src/resources/default_event_script.sl
@@ -36,7 +36,8 @@ define move_or_start(service, node_list)
 		return ERR_DOMAIN;
 	}
 
-	if (((event_type != EVENT_USER) and (state == "disabled")) or (state == "failed")) {
+	if (((event_type != EVENT_USER) and (state == "disabled")) or
+            ((state == "failed") or (state == "frozen"))) {
 		%
 		% Commenting out this block will -not- allow you to
 		% recover failed services from event scripts.  Sorry.


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2008-08-26 14:33 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-08-26 14:33 RHEL5 - rgmanager: Permit careful restart w/o disturbing services Lon Hohberger

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).