public inbox for cluster-cvs@sourceware.org
help / color / mirror / Atom feed
* cluster: STABLE3 - rgmanager: Allow restart counters to work with central_processing
@ 2009-03-05 14:08 Lon Hohberger
0 siblings, 0 replies; only message in thread
From: Lon Hohberger @ 2009-03-05 14:08 UTC (permalink / raw)
To: cluster-cvs-relay
Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=2c724185bb57b9998c555edafc326badf5facd60
Commit: 2c724185bb57b9998c555edafc326badf5facd60
Parent: 7981232fc6862eba25c45cd7f5a36df7bfcf96a1
Author: Lon Hohberger <lhh@redhat.com>
AuthorDate: Thu Mar 5 09:03:23 2009 -0500
Committer: Lon Hohberger <lhh@redhat.com>
CommitterDate: Thu Mar 5 09:03:23 2009 -0500
rgmanager: Allow restart counters to work with central_processing
rhbz #400211 / #431130
---
rgmanager/ChangeLog | 5 +++++
rgmanager/include/resgroup.h | 1 +
rgmanager/include/restart_counter.h | 1 +
rgmanager/src/daemons/groups.c | 25 +++++++++++++++++++------
rgmanager/src/daemons/restart_counter.c | 22 +++++++++++++++++++++-
rgmanager/src/daemons/rg_state.c | 24 +++++++++++++++++++-----
rgmanager/src/daemons/slang_event.c | 15 +++++++++++++++
7 files changed, 81 insertions(+), 12 deletions(-)
diff --git a/rgmanager/ChangeLog b/rgmanager/ChangeLog
index 2387815..917500f 100644
--- a/rgmanager/ChangeLog
+++ b/rgmanager/ChangeLog
@@ -8,6 +8,11 @@
2008-02-26 Lon Hohberger <lhh at redhat.com>
* src/resources/ip.sh: Fix netmask handling in ip.sh
* src/utils/clustat.c: Don't show estranged nodes if they're down
+2008-02-01 Lon Hohberger <lhh at redhat.com>
+ * src/daemons/rg_state.c, slang_event.c, groups.c, restart_counter.c,
+ include/restart_counter.h, resgroup.h,
+ src/resources/default_event_script.sl: Allow restart counters to
+ correctly work with central_processing. (#400211 / #431130)
2008-01-25 Lon Hohberger <lhh at redhat.com>
* src/daemons/rg_thread.c: Fix case that broke 'clusvcadm -e <service>
diff --git a/rgmanager/include/resgroup.h b/rgmanager/include/resgroup.h
index 67acb6d..84442fd 100644
--- a/rgmanager/include/resgroup.h
+++ b/rgmanager/include/resgroup.h
@@ -162,6 +162,7 @@ int svc_freeze(char *svcName);
int svc_unfreeze(char *svcName);
int svc_migrate(char *svcName, int target);
int check_restart(char *svcName);
+int add_restart(char *svcName);
int rt_enqueue_request(const char *resgroupname, int request,
msgctx_t *resp_ctx,
diff --git a/rgmanager/include/restart_counter.h b/rgmanager/include/restart_counter.h
index 399680c..04714e4 100644
--- a/rgmanager/include/restart_counter.h
+++ b/rgmanager/include/restart_counter.h
@@ -8,6 +8,7 @@ typedef void *restart_counter_t;
int restart_add(restart_counter_t arg);
int restart_clear(restart_counter_t arg);
int restart_count(restart_counter_t arg);
+int restart_treshold_exceeded(restart_counter_t arg);
restart_counter_t restart_init(time_t expire_timeout, int max_restarts);
int restart_cleanup(restart_counter_t arg);
diff --git a/rgmanager/src/daemons/groups.c b/rgmanager/src/daemons/groups.c
index 4047be1..d91d6e6 100644
--- a/rgmanager/src/daemons/groups.c
+++ b/rgmanager/src/daemons/groups.c
@@ -1797,7 +1797,7 @@ get_service_property(char *rg_name, char *prop, char *buf, size_t buflen)
int
-check_restart(char *rg_name)
+add_restart(char *rg_name)
{
resource_node_t *node;
int ret = 1;
@@ -1806,11 +1806,24 @@ check_restart(char *rg_name)
node = node_by_ref(&_tree, rg_name);
if (node) {
ret = restart_add(node->rn_restart_counter);
- if (ret) {
- /* Clear it out - caller is about
- to relocate the service anyway */
- restart_clear(node->rn_restart_counter);
- }
+ }
+ pthread_rwlock_unlock(&resource_lock);
+
+ return ret;
+}
+
+
+int
+check_restart(char *rg_name)
+{
+ resource_node_t *node;
+ int ret = 0;
+
+ pthread_rwlock_rdlock(&resource_lock);
+ node = node_by_ref(&_tree, rg_name);
+ if (node) {
+ printf("%s %p\n", rg_name, node->rn_restart_counter);
+ ret = restart_threshold_exceeded(node->rn_restart_counter);
}
pthread_rwlock_unlock(&resource_lock);
diff --git a/rgmanager/src/daemons/restart_counter.c b/rgmanager/src/daemons/restart_counter.c
index 9b2e3c6..8789987 100644
--- a/rgmanager/src/daemons/restart_counter.c
+++ b/rgmanager/src/daemons/restart_counter.c
@@ -29,6 +29,10 @@ typedef struct {
#define VALIDATE(arg, ret) \
do { \
+ if (!arg) {\
+ errno = EINVAL; \
+ return ret; \
+ } \
if (((restart_info_t *)arg)->magic != RESTART_INFO_MAGIC) {\
errno = EINVAL; \
return ret; \
@@ -80,6 +84,21 @@ restart_count(restart_counter_t arg)
}
+int
+restart_threshold_exceeded(restart_counter_t arg)
+{
+ restart_info_t *restarts = (restart_info_t *)arg;
+ time_t now;
+
+ VALIDATE(arg, -1);
+ now = time(NULL);
+ restart_timer_purge(arg, now);
+ if (restarts->restart_count >= restarts->max_restarts)
+ return 1;
+ return 0;
+}
+
+
/* Add a restart entry to the list. Returns 1 if restart
count is exceeded */
int
@@ -110,7 +129,7 @@ restart_add(restart_counter_t arg)
/* Check and remove old entries */
restart_timer_purge(restarts, t);
- if (restarts->restart_count > restarts->max_restarts)
+ if (restarts->restart_count >= restarts->max_restarts)
return 1;
return 0;
@@ -153,6 +172,7 @@ restart_init(time_t expire_timeout, int max_restarts)
info->expire_timeout = expire_timeout;
info->max_restarts = max_restarts;
info->restart_count = 0;
+ info->restart_nodes = NULL;
return (void *)info;
}
diff --git a/rgmanager/src/daemons/rg_state.c b/rgmanager/src/daemons/rg_state.c
index ad6ba1f..7937c0d 100644
--- a/rgmanager/src/daemons/rg_state.c
+++ b/rgmanager/src/daemons/rg_state.c
@@ -684,7 +684,6 @@ svc_advise_start(rg_state_t *svcStatus, char *svcName, int req)
logt_print(LOG_NOTICE,
"Recovering failed service %s\n",
svcName);
- svcStatus->rs_state = RG_STATE_STOPPED;
/* Start! */
ret = 1;
break;
@@ -798,13 +797,16 @@ svc_start(char *svcName, int req)
/* LOCK HELD if we get here */
svcStatus.rs_owner = my_id();
- svcStatus.rs_state = RG_STATE_STARTING;
svcStatus.rs_transition = (uint64_t)time(NULL);
- if (req == RG_START_RECOVER)
+ if (svcStatus.rs_state == RG_STATE_RECOVER) {
+ add_restart(svcName);
svcStatus.rs_restarts++;
- else
+ } else {
svcStatus.rs_restarts = 0;
+ }
+
+ svcStatus.rs_state = RG_STATE_STARTING;
if (set_rg_state(svcName, &svcStatus) < 0) {
logt_print(LOG_ERR,
@@ -1273,7 +1275,7 @@ _svc_stop(char *svcName, int req, int recover, uint32_t newstate)
{
struct dlm_lksb lockp;
rg_state_t svcStatus;
- int ret;
+ int ret = 0;
int old_state;
if (!rg_quorate()) {
@@ -1329,6 +1331,18 @@ _svc_stop(char *svcName, int req, int recover, uint32_t newstate)
old_state = svcStatus.rs_state;
+ if (old_state == RG_STATE_RECOVER) {
+ logt_print(LOG_DEBUG, "%s is clean; skipping double-stop\n",
+ svcName);
+ svcStatus.rs_state = newstate;
+
+ if (set_rg_state(svcName, &svcStatus) != 0) {
+ rg_unlock(&lockp);
+ logt_print(LOG_ERR, "#52: Failed changing RG status\n");
+ return RG_EFAIL;
+ }
+ }
+
logt_print(LOG_NOTICE, "Stopping service %s\n", svcName);
if (recover)
diff --git a/rgmanager/src/daemons/slang_event.c b/rgmanager/src/daemons/slang_event.c
index b215ef9..e019fbb 100644
--- a/rgmanager/src/daemons/slang_event.c
+++ b/rgmanager/src/daemons/slang_event.c
@@ -63,6 +63,7 @@ static int
_node_clean = 0,
_service_owner = 0,
_service_last_owner = 0,
+ _service_restarts_exceeded = 0,
_user_request = 0,
_user_arg1 = 0,
_user_arg2 = 0,
@@ -108,6 +109,8 @@ SLang_Intrin_Var_Type rgmanager_vars[] =
MAKE_VARIABLE("service_owner", &_service_owner,SLANG_INT_TYPE, 1),
MAKE_VARIABLE("service_last_owner", &_service_last_owner,
SLANG_INT_TYPE, 1),
+ MAKE_VARIABLE("service_restarts_exceeded", &_service_restarts_exceeded,
+ SLANG_INT_TYPE, 1),
/* User event information */
MAKE_VARIABLE("user_request", &_user_request, SLANG_INT_TYPE,1),
@@ -204,6 +207,7 @@ void
sl_service_status(char *svcName)
{
rg_state_t svcStatus;
+ int restarts_exceeded = 0;
char *state_str;
if (get_service_state_internal(svcName, &svcStatus) < 0) {
@@ -214,6 +218,15 @@ sl_service_status(char *svcName)
return;
}
+ restarts_exceeded = check_restart(svcName);
+ if (SLang_push_integer(restarts_exceeded) < 0) {
+ SLang_verror(SL_RunTime_Error,
+ "%s: Failed to push restarts_exceeded %s",
+ __FUNCTION__,
+ svcName);
+ return;
+ }
+
if (SLang_push_integer(svcStatus.rs_restarts) < 0) {
SLang_verror(SL_RunTime_Error,
"%s: Failed to push restarts for %s",
@@ -1077,6 +1090,7 @@ S_service_event(const char *file, const char *script, char *name,
_service_state = (char *)rg_state_str(state);
_service_owner = owner;
_service_last_owner = last_owner;
+ _service_restarts_exceeded = check_restart(name);
switch(state) {
case RG_STATE_DISABLED:
@@ -1094,6 +1108,7 @@ S_service_event(const char *file, const char *script, char *name,
_service_state = 0;
_service_owner = 0;
_service_last_owner = 0;
+ _service_restarts_exceeded = 0;
return ret;
}
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2009-03-05 14:08 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-03-05 14:08 cluster: STABLE3 - rgmanager: Allow restart counters to work with central_processing Lon Hohberger
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).