public inbox for cluster-cvs@sourceware.org
help / color / mirror / Atom feed
* cluster: STABLE3 - rgmanager: Allow restart counters to work with central_processing
@ 2009-03-05 14:08 Lon Hohberger
  0 siblings, 0 replies; only message in thread
From: Lon Hohberger @ 2009-03-05 14:08 UTC (permalink / raw)
  To: cluster-cvs-relay

Gitweb:        http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=2c724185bb57b9998c555edafc326badf5facd60
Commit:        2c724185bb57b9998c555edafc326badf5facd60
Parent:        7981232fc6862eba25c45cd7f5a36df7bfcf96a1
Author:        Lon Hohberger <lhh@redhat.com>
AuthorDate:    Thu Mar 5 09:03:23 2009 -0500
Committer:     Lon Hohberger <lhh@redhat.com>
CommitterDate: Thu Mar 5 09:03:23 2009 -0500

rgmanager: Allow restart counters to work with central_processing

rhbz #400211 / #431130
---
 rgmanager/ChangeLog                     |    5 +++++
 rgmanager/include/resgroup.h            |    1 +
 rgmanager/include/restart_counter.h     |    1 +
 rgmanager/src/daemons/groups.c          |   25 +++++++++++++++++++------
 rgmanager/src/daemons/restart_counter.c |   22 +++++++++++++++++++++-
 rgmanager/src/daemons/rg_state.c        |   24 +++++++++++++++++++-----
 rgmanager/src/daemons/slang_event.c     |   15 +++++++++++++++
 7 files changed, 81 insertions(+), 12 deletions(-)

diff --git a/rgmanager/ChangeLog b/rgmanager/ChangeLog
index 2387815..917500f 100644
--- a/rgmanager/ChangeLog
+++ b/rgmanager/ChangeLog
@@ -8,6 +8,11 @@
 2008-02-26 Lon Hohberger <lhh at redhat.com>
 	* src/resources/ip.sh: Fix netmask handling in ip.sh
 	* src/utils/clustat.c: Don't show estranged nodes if they're down
+2008-02-01 Lon Hohberger <lhh at redhat.com>
+	* src/daemons/rg_state.c, slang_event.c, groups.c, restart_counter.c,
+	include/restart_counter.h, resgroup.h,
+	src/resources/default_event_script.sl: Allow restart counters to 
+	correctly work with central_processing. (#400211 / #431130)
 
 2008-01-25 Lon Hohberger <lhh at redhat.com>
 	* src/daemons/rg_thread.c: Fix case that broke 'clusvcadm -e <service>
diff --git a/rgmanager/include/resgroup.h b/rgmanager/include/resgroup.h
index 67acb6d..84442fd 100644
--- a/rgmanager/include/resgroup.h
+++ b/rgmanager/include/resgroup.h
@@ -162,6 +162,7 @@ int svc_freeze(char *svcName);
 int svc_unfreeze(char *svcName);
 int svc_migrate(char *svcName, int target);
 int check_restart(char *svcName);
+int add_restart(char *svcName);
 
 int rt_enqueue_request(const char *resgroupname, int request,
 		       msgctx_t *resp_ctx,
diff --git a/rgmanager/include/restart_counter.h b/rgmanager/include/restart_counter.h
index 399680c..04714e4 100644
--- a/rgmanager/include/restart_counter.h
+++ b/rgmanager/include/restart_counter.h
@@ -8,6 +8,7 @@ typedef void *restart_counter_t;
 int restart_add(restart_counter_t arg);
 int restart_clear(restart_counter_t arg);
 int restart_count(restart_counter_t arg);
+int restart_treshold_exceeded(restart_counter_t arg);
 restart_counter_t restart_init(time_t expire_timeout, int max_restarts);
 int restart_cleanup(restart_counter_t arg);
 
diff --git a/rgmanager/src/daemons/groups.c b/rgmanager/src/daemons/groups.c
index 4047be1..d91d6e6 100644
--- a/rgmanager/src/daemons/groups.c
+++ b/rgmanager/src/daemons/groups.c
@@ -1797,7 +1797,7 @@ get_service_property(char *rg_name, char *prop, char *buf, size_t buflen)
 
 
 int
-check_restart(char *rg_name)
+add_restart(char *rg_name)
 {
 	resource_node_t *node;
 	int ret = 1;
@@ -1806,11 +1806,24 @@ check_restart(char *rg_name)
 	node = node_by_ref(&_tree, rg_name);
 	if (node) {
 		ret = restart_add(node->rn_restart_counter);
-		if (ret) {
-			/* Clear it out - caller is about 
-			   to relocate the service anyway */
-			restart_clear(node->rn_restart_counter);
-		}
+	}
+	pthread_rwlock_unlock(&resource_lock);
+
+	return ret;
+}
+
+
+int
+check_restart(char *rg_name)
+{
+	resource_node_t *node;
+	int ret = 0;
+
+	pthread_rwlock_rdlock(&resource_lock);
+	node = node_by_ref(&_tree, rg_name);
+	if (node) {
+		printf("%s %p\n", rg_name, node->rn_restart_counter);
+		ret = restart_threshold_exceeded(node->rn_restart_counter);
 	}
 	pthread_rwlock_unlock(&resource_lock);
 
diff --git a/rgmanager/src/daemons/restart_counter.c b/rgmanager/src/daemons/restart_counter.c
index 9b2e3c6..8789987 100644
--- a/rgmanager/src/daemons/restart_counter.c
+++ b/rgmanager/src/daemons/restart_counter.c
@@ -29,6 +29,10 @@ typedef struct {
 
 #define VALIDATE(arg, ret) \
 do { \
+	if (!arg) {\
+		errno = EINVAL; \
+		return ret; \
+	} \
 	if (((restart_info_t *)arg)->magic != RESTART_INFO_MAGIC) {\
 		errno = EINVAL; \
 		return ret; \
@@ -80,6 +84,21 @@ restart_count(restart_counter_t arg)
 }
 
 
+int
+restart_threshold_exceeded(restart_counter_t arg)
+{
+	restart_info_t *restarts = (restart_info_t *)arg;
+	time_t now;
+
+	VALIDATE(arg, -1);
+	now = time(NULL);
+	restart_timer_purge(arg, now);
+	if (restarts->restart_count >= restarts->max_restarts)
+		return 1;
+	return 0;
+}
+
+
 /* Add a restart entry to the list.  Returns 1 if restart
    count is exceeded */
 int
@@ -110,7 +129,7 @@ restart_add(restart_counter_t arg)
 	/* Check and remove old entries */
 	restart_timer_purge(restarts, t);
 
-	if (restarts->restart_count > restarts->max_restarts)
+	if (restarts->restart_count >= restarts->max_restarts)
 		return 1;
 
 	return 0;
@@ -153,6 +172,7 @@ restart_init(time_t expire_timeout, int max_restarts)
 	info->expire_timeout = expire_timeout;
 	info->max_restarts = max_restarts;
 	info->restart_count = 0;
+	info->restart_nodes = NULL;
 
 	return (void *)info;
 }
diff --git a/rgmanager/src/daemons/rg_state.c b/rgmanager/src/daemons/rg_state.c
index ad6ba1f..7937c0d 100644
--- a/rgmanager/src/daemons/rg_state.c
+++ b/rgmanager/src/daemons/rg_state.c
@@ -684,7 +684,6 @@ svc_advise_start(rg_state_t *svcStatus, char *svcName, int req)
 			logt_print(LOG_NOTICE,
 			       "Recovering failed service %s\n",
 			       svcName);
-			svcStatus->rs_state = RG_STATE_STOPPED;
 			/* Start! */
 			ret = 1;
 			break;
@@ -798,13 +797,16 @@ svc_start(char *svcName, int req)
 	/* LOCK HELD if we get here */
 
 	svcStatus.rs_owner = my_id();
-	svcStatus.rs_state = RG_STATE_STARTING;
 	svcStatus.rs_transition = (uint64_t)time(NULL);
 
-	if (req == RG_START_RECOVER)
+	if (svcStatus.rs_state == RG_STATE_RECOVER) {
+		add_restart(svcName);
 		svcStatus.rs_restarts++;
-	else
+	} else {
 		svcStatus.rs_restarts = 0;
+	}
+
+	svcStatus.rs_state = RG_STATE_STARTING;
 
 	if (set_rg_state(svcName, &svcStatus) < 0) {
 		logt_print(LOG_ERR,
@@ -1273,7 +1275,7 @@ _svc_stop(char *svcName, int req, int recover, uint32_t newstate)
 {
 	struct dlm_lksb lockp;
 	rg_state_t svcStatus;
-	int ret;
+	int ret = 0;
 	int old_state;
 
 	if (!rg_quorate()) {
@@ -1329,6 +1331,18 @@ _svc_stop(char *svcName, int req, int recover, uint32_t newstate)
 
 	old_state = svcStatus.rs_state;
 
+	if (old_state == RG_STATE_RECOVER) {
+		logt_print(LOG_DEBUG, "%s is clean; skipping double-stop\n",
+		       svcName);
+		svcStatus.rs_state = newstate;
+
+		if (set_rg_state(svcName, &svcStatus) != 0) {
+			rg_unlock(&lockp);
+			logt_print(LOG_ERR, "#52: Failed changing RG status\n");
+			return RG_EFAIL;
+		}
+	} 
+
 	logt_print(LOG_NOTICE, "Stopping service %s\n", svcName);
 
 	if (recover) 
diff --git a/rgmanager/src/daemons/slang_event.c b/rgmanager/src/daemons/slang_event.c
index b215ef9..e019fbb 100644
--- a/rgmanager/src/daemons/slang_event.c
+++ b/rgmanager/src/daemons/slang_event.c
@@ -63,6 +63,7 @@ static int
    _node_clean = 0,
    _service_owner = 0,
    _service_last_owner = 0,
+   _service_restarts_exceeded = 0,
    _user_request = 0,
    _user_arg1 = 0,
    _user_arg2 = 0,
@@ -108,6 +109,8 @@ SLang_Intrin_Var_Type rgmanager_vars[] =
 	MAKE_VARIABLE("service_owner",	&_service_owner,SLANG_INT_TYPE, 1),
 	MAKE_VARIABLE("service_last_owner", &_service_last_owner,
 		      					SLANG_INT_TYPE, 1),
+	MAKE_VARIABLE("service_restarts_exceeded", &_service_restarts_exceeded,
+		      					SLANG_INT_TYPE, 1),
 
 	/* User event information */
 	MAKE_VARIABLE("user_request",	&_user_request,	SLANG_INT_TYPE,1),
@@ -204,6 +207,7 @@ void
 sl_service_status(char *svcName)
 {
 	rg_state_t svcStatus;
+	int restarts_exceeded = 0;
 	char *state_str;
 
 	if (get_service_state_internal(svcName, &svcStatus) < 0) {
@@ -214,6 +218,15 @@ sl_service_status(char *svcName)
 		return;
 	}
 
+	restarts_exceeded = check_restart(svcName);
+	if (SLang_push_integer(restarts_exceeded) < 0) {
+		SLang_verror(SL_RunTime_Error,
+			     "%s: Failed to push restarts_exceeded %s",
+			     __FUNCTION__,
+			     svcName);
+		return;
+	}
+
 	if (SLang_push_integer(svcStatus.rs_restarts) < 0) {
 		SLang_verror(SL_RunTime_Error,
 			     "%s: Failed to push restarts for %s",
@@ -1077,6 +1090,7 @@ S_service_event(const char *file, const char *script, char *name,
 	_service_state = (char *)rg_state_str(state);
 	_service_owner = owner;
 	_service_last_owner = last_owner;
+	_service_restarts_exceeded = check_restart(name);
 
 	switch(state) {
 	case RG_STATE_DISABLED:
@@ -1094,6 +1108,7 @@ S_service_event(const char *file, const char *script, char *name,
 	_service_state = 0;
 	_service_owner = 0;
 	_service_last_owner = 0;
+	_service_restarts_exceeded = 0;
 
 	return ret;
 }


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2009-03-05 14:08 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-03-05 14:08 cluster: STABLE3 - rgmanager: Allow restart counters to work with central_processing Lon Hohberger

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).