cluster: RHEL5 - clogd: Short circuit resume requests

public inbox for cluster-cvs@sourceware.org
help / color / mirror / Atom feed

* cluster: RHEL5 - clogd: Short circuit resume requests
@ 2009-02-11 22:33 Jonathan Brassow
  0 siblings, 0 replies; only message in thread
From: Jonathan Brassow @ 2009-02-11 22:33 UTC (permalink / raw)
  To: cluster-cvs-relay

Gitweb:        http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=823bc90ff6b989a7bc43377d3e35528604ff4d6b
Commit:        823bc90ff6b989a7bc43377d3e35528604ff4d6b
Parent:        948e49365049e7540d5c5702c1885ff8f1887619
Author:        Jonathan Brassow <jbrassow@redhat.com>
AuthorDate:    Wed Feb 11 16:32:18 2009 -0600
Committer:     Jonathan Brassow <jbrassow@redhat.com>
CommitterDate: Wed Feb 11 16:32:18 2009 -0600

clogd: Short circuit resume requests

Resume requests are already handled by the node that sends
them.  They are sent to the cluster to ensure proper timing
with checkpoints.  However, there is no reason that the
response should go around the cluster when the node is simply
responding to itself.  So, we now send the response to the
request directly down to the kernel instead of out to the
cluster and then down to the kernel.

This change fixes situations where the node may try to resend
the resume request due to lower nodeid nodes leaving - resulting
in complaints about 'additional resumes', etc.  The complaints
didn't hurt anything, but if we can streamline the process /and/
fix the annoying messages; I think that's ok.
---
 cmirror/src/cluster.c   |   15 ++++++---------
 cmirror/src/functions.c |    6 +++---
 2 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/cmirror/src/cluster.c b/cmirror/src/cluster.c
index 7711b28..a23e39e 100644
--- a/cmirror/src/cluster.c
+++ b/cmirror/src/cluster.c
@@ -68,7 +68,7 @@ static SaCkptHandleT ckpt_handle = 0;
 static SaCkptCallbacksT callbacks = { 0, 0 };
 static SaVersionT version = { 'B', 1, 1 };
 
-#define DEBUGGING_HISTORY 50
+#define DEBUGGING_HISTORY 200
 static char debugging[DEBUGGING_HISTORY][128];
 static int idx = 0;
 
@@ -217,11 +217,9 @@ static int handle_cluster_request(struct clog_cpg *entry,
 		if (t->originator == my_cluster_id) {
 			r = do_request(t, server);
 
-			t->request_type |= DM_CLOG_RESPONSE;
-
-			r = cluster_send(t);
-			if (r < 0)
-				LOG_ERROR("cluster_send failed: %s", strerror(-r));
+			r = kernel_send(t);
+			if (r)
+				LOG_ERROR("Failed to send response to kernel");
 		}
 		return r;
 	}
@@ -775,8 +773,6 @@ static int resend_requests(struct clog_cpg *entry)
 		}
 
 		switch (tfr->request_type) {
-		case DM_CLOG_RESUME:
-			/* We are only concerned about this request locally */
 		case DM_CLOG_SET_REGION_SYNC:
 			/*
 			 * Some requests simply do not need to be resent.
@@ -913,6 +909,7 @@ static void cpg_message_callback(cpg_handle_t handle, struct cpg_name *gname,
 
 	if ((nodeid == my_cluster_id) &&
 	    !(tfr->request_type & DM_CLOG_RESPONSE) &&
+	    (tfr->request_type != DM_CLOG_RESUME) &&
 	    (tfr->request_type != DM_CLOG_CLEAR_REGION) &&
 	    (tfr->request_type != DM_CLOG_CHECKPOINT_READY)) {
 		tmp_tfr = malloc(DM_CLOG_TFR_SIZE);
@@ -1041,7 +1038,7 @@ static void cpg_message_callback(cpg_handle_t handle, struct cpg_name *gname,
 
 		if (log_get_state(tfr) != LOG_RESUMED) {
 			LOG_COND(log_checkpoint,
-				 "[%s] Withholding checkpoints until log is read",
+				 "[%s] Withholding checkpoints until log is valid",
 				 SHORT_UUID(tfr->uuid));
 			break;
 		}
diff --git a/cmirror/src/functions.c b/cmirror/src/functions.c
index 4db14ad..c106889 100644
--- a/cmirror/src/functions.c
+++ b/cmirror/src/functions.c
@@ -645,7 +645,6 @@ static int clog_presuspend(struct clog_tfr *tfr)
 	if (lc->touched)
 		LOG_DBG("WARNING: log still marked as 'touched' during suspend");
 
-	lc->state = LOG_SUSPENDED;
 	lc->recovery_halted = 1;
 
 	return 0;
@@ -666,6 +665,7 @@ static int clog_postsuspend(struct clog_tfr *tfr)
 	LOG_DBG("[%s] clog_postsuspend: leaving CPG", SHORT_UUID(lc->uuid));
 	destroy_cluster_cpg(tfr->uuid);
 
+	lc->state = LOG_SUSPENDED;
 	lc->recovering_region = (uint64_t)-1;
 	lc->recoverer = (uint32_t)-1;
 
@@ -806,8 +806,8 @@ out:
 
 	lc->sync_count = count_bits32(lc->sync_bits, lc->bitset_uint32_count);
 
-	LOG_DBG("[%s] Initial sync_count = %llu",
-		SHORT_UUID(lc->uuid), (unsigned long long)lc->sync_count);
+	LOG_SPRINT("[%s] Initial sync_count = %llu",
+		   SHORT_UUID(lc->uuid), (unsigned long long)lc->sync_count);
 	lc->sync_search = 0;
 	lc->state = LOG_RESUMED;
 	lc->recovery_halted = 0;


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2009-02-11 22:33 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-02-11 22:33 cluster: RHEL5 - clogd: Short circuit resume requests Jonathan Brassow

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).