public inbox for cluster-cvs@sourceware.org
help / color / mirror / Atom feed
From: Lon Hohberger <lon@fedoraproject.org>
To: cluster-cvs-relay@redhat.com
Subject: cluster: RHEL4 - rgmanager: Status check tuning/optimization
Date: Tue, 31 Mar 2009 19:00:00 -0000	[thread overview]
Message-ID: <20090331185944.2AC66120197@lists.fedorahosted.org> (raw)

Gitweb:        http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=95ae1b2b584e840d23c92df474ca82bf600c8378
Commit:        95ae1b2b584e840d23c92df474ca82bf600c8378
Parent:        b3f4f87d1a3caea844376719ebaccd1c6ed17666
Author:        Lon Hohberger <lhh@redhat.com>
AuthorDate:    Tue Mar 24 12:49:59 2009 -0400
Committer:     Lon Hohberger <lhh@redhat.com>
CommitterDate: Tue Mar 31 14:47:04 2009 -0400

rgmanager: Status check tuning/optimization

* Don't bother with status checks on 'service'
abstract resource.
* Spread status checks out based on completion
time instead of initiation time.
* Allow administrators to cap simultaneous
status checks to prevent load spikes.
* rhbz487596

Signed-off-by: Lon Hohberger <lhh@redhat.com>
---
 rgmanager/include/rg_locks.h       |    4 +++
 rgmanager/src/daemons/main.c       |   33 ++++++++++++++++++++++++-
 rgmanager/src/daemons/restree.c    |   10 +++++--
 rgmanager/src/daemons/rg_locks.c   |   48 ++++++++++++++++++++++++++++++++++++
 rgmanager/src/resources/service.sh |    3 +-
 5 files changed, 93 insertions(+), 5 deletions(-)

diff --git a/rgmanager/include/rg_locks.h b/rgmanager/include/rg_locks.h
index 78d6096..4cb808b 100644
--- a/rgmanager/include/rg_locks.h
+++ b/rgmanager/include/rg_locks.h
@@ -41,6 +41,10 @@ int rg_inc_status(void);
 int rg_dec_status(void);
 int rg_set_statusmax(int max);
 
+int rg_inc_children(void);
+int rg_dec_children(void);
+int rg_set_childmax(int max);
+
 int ccs_lock(void);
 int ccs_unlock(int fd);
 
diff --git a/rgmanager/src/daemons/main.c b/rgmanager/src/daemons/main.c
index 05997aa..ea3f65a 100644
--- a/rgmanager/src/daemons/main.c
+++ b/rgmanager/src/daemons/main.c
@@ -59,6 +59,7 @@ int running = 1, need_reconfigure = 0;
 int shutdown_pending = 0;
 char debug = 0; /* XXX* */
 static int signalled = 0;
+static int status_poll_interval = 10;
 
 uint64_t next_node_id(cluster_member_list_t *membership, uint64_t me);
 
@@ -525,7 +526,7 @@ event_loop(int clusterfd)
 	struct timeval tv;
 	uint64_t nodeid;
 
-	tv.tv_sec = 10;
+	tv.tv_sec = status_poll_interval;
 	tv.tv_usec = 0;
 
 	if (signalled) {
@@ -649,6 +650,7 @@ configure_logging(int ccsfd)
 {
 	char *v;
 	char internal = 0;
+	int status_child_max = 0;
 
 	if (ccsfd < 0) {
 		internal = 1;
@@ -675,6 +677,35 @@ configure_logging(int ccsfd)
 		free(v);
 	}
 
+	if (ccs_get(ccsfd, "/cluster/rm/@status_poll_interval", &v) == 0) {
+		status_poll_interval = atoi(v);
+		if (status_poll_interval >= 1) {
+			clulog(LOG_NOTICE,
+			       "Status Polling Interval set to %d\n", v);
+		} else {
+			clulog(LOG_WARNING, "Ignoring illegal "
+			       "status_poll_interval of %s\n", v);
+			status_poll_interval = 10;
+		}
+		
+		free(v);
+	}
+
+	if (ccs_get(ccsfd, "/cluster/rm/@status_child_max", &v) == 0) {
+		status_child_max = atoi(v);
+		if (status_child_max >= 1) {
+			clulog(LOG_NOTICE,
+			       "Status Child Max set to %d\n",
+			       status_poll_interval);
+			rg_set_childmax(status_child_max);
+		} else {
+			clulog(LOG_WARNING, "Ignoring illegal "
+			       "status_child_max of %s\n", v);
+		}
+		
+		free(v);
+	}
+
 	if (internal)
 		ccs_disconnect(ccsfd);
 
diff --git a/rgmanager/src/daemons/restree.c b/rgmanager/src/daemons/restree.c
index 828b985..fab77f3 100644
--- a/rgmanager/src/daemons/restree.c
+++ b/rgmanager/src/daemons/restree.c
@@ -1177,15 +1177,19 @@ do_status(resource_node_t *node)
 	}
 
 	/* No check levels ready at the moment. */
-	if (idx == -1) {
+	/* Cap status check children if configured to do so */
+	if (idx == -1 || rg_inc_children() < 0) {
 		if (node->rn_checked)
 			return node->rn_last_status;
 		return 0;
 	}
 
-
-	node->rn_actions[idx].ra_last = now;
 	x = res_exec(node, RS_STATUS, NULL, node->rn_actions[idx].ra_depth);
+	rg_dec_children();
+
+	/* Record status check result *after* the status check has
+	 * completed. */
+	node->rn_actions[idx].ra_last = time(NULL);
 
 	node->rn_last_status = x;
 	node->rn_last_depth = node->rn_actions[idx].ra_depth;
diff --git a/rgmanager/src/daemons/rg_locks.c b/rgmanager/src/daemons/rg_locks.c
index 4a89762..759a2de 100644
--- a/rgmanager/src/daemons/rg_locks.c
+++ b/rgmanager/src/daemons/rg_locks.c
@@ -18,6 +18,7 @@
 */
 #include <pthread.h>
 #include <stdio.h>
+#include <assert.h>
 #ifdef NO_CCS
 #include <libxml/xmlmemory.h>
 #include <libxml/parser.h>
@@ -36,6 +37,9 @@ static int __rg_initialized = 0;
 static int _rg_statuscnt = 0;
 static int _rg_statusmax = 5; /* XXX */
 
+static int _rg_childcnt = 0;
+static int _rg_childmax = 0; /* XXX */
+
 static pthread_cond_t unlock_cond = PTHREAD_COND_INITIALIZER;
 static pthread_cond_t zero_cond = PTHREAD_COND_INITIALIZER;
 static pthread_cond_t init_cond = PTHREAD_COND_INITIALIZER;
@@ -315,6 +319,50 @@ rg_dec_status(void)
 
 
 int
+rg_set_childmax(int max)
+{
+	int old;
+	
+	if (max <= 1)
+		max = 1;
+	
+	pthread_mutex_lock(&locks_mutex);
+	old = _rg_childmax;
+	_rg_childmax = max;
+	pthread_mutex_unlock(&locks_mutex);
+	return old;
+}
+
+
+int
+rg_inc_children(void)
+{
+	pthread_mutex_lock(&locks_mutex);
+	if (_rg_childmax && (_rg_childcnt >= _rg_childmax)) {
+		pthread_mutex_unlock(&locks_mutex);
+		return -1;
+	}
+	++_rg_childcnt;
+	pthread_mutex_unlock(&locks_mutex);
+	return 0;
+}
+
+
+int
+rg_dec_children(void)
+{
+	pthread_mutex_lock(&locks_mutex);
+	--_rg_childcnt;
+	if (_rg_childcnt < 0) {
+		assert(0);
+		_rg_childcnt = 0;
+	}
+	pthread_mutex_unlock(&locks_mutex);
+	return 0;
+}
+
+
+int
 rg_wait_threads(void)
 {
 	pthread_mutex_lock(&locks_mutex);
diff --git a/rgmanager/src/resources/service.sh b/rgmanager/src/resources/service.sh
index b7c9e08..a21a66b 100755
--- a/rgmanager/src/resources/service.sh
+++ b/rgmanager/src/resources/service.sh
@@ -222,9 +222,10 @@ meta_data()
         <action name="start" timeout="5"/>
         <action name="stop" timeout="5"/>
 	
-	<!-- No-ops.  Groups are abstract resource types.  -->
+	<!-- No-ops.  Groups are abstract resource types.
         <action name="status" timeout="5" interval="1h"/>
         <action name="monitor" timeout="5" interval="1h"/>
+       	 -->
 
         <action name="reconfig" timeout="5"/>
         <action name="recover" timeout="5"/>


                 reply	other threads:[~2009-03-31 19:00 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20090331185944.2AC66120197@lists.fedorahosted.org \
    --to=lon@fedoraproject.org \
    --cc=cluster-cvs-relay@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).