From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 24672 invoked by alias); 11 Feb 2009 22:33:22 -0000 Received: (qmail 24665 invoked by alias); 11 Feb 2009 22:33:22 -0000 X-SWARE-Spam-Status: No, hits=-1.5 required=5.0 tests=AWL,BAYES_00,SPF_HELO_PASS X-Spam-Status: No, hits=-1.5 required=5.0 tests=AWL,BAYES_00,SPF_HELO_PASS X-Spam-Check-By: sourceware.org X-Spam-Checker-Version: SpamAssassin 3.2.5 (2008-06-10) on bastion.fedora.phx.redhat.com Subject: cluster: RHEL5 - clogd: Short circuit resume requests To: cluster-cvs-relay@redhat.com X-Project: Cluster Project X-Git-Module: cluster.git X-Git-Refname: refs/heads/RHEL5 X-Git-Reftype: branch X-Git-Oldrev: 948e49365049e7540d5c5702c1885ff8f1887619 X-Git-Newrev: 823bc90ff6b989a7bc43377d3e35528604ff4d6b From: Jonathan Brassow Message-Id: <20090211223257.9ABD612019A@lists.fedorahosted.org> Date: Wed, 11 Feb 2009 22:33:00 -0000 X-Scanned-By: MIMEDefang 2.58 on 172.16.52.254 Mailing-List: contact cluster-cvs-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Subscribe: List-Post: List-Help: , Sender: cluster-cvs-owner@sourceware.org X-SW-Source: 2009-q1/txt/msg00439.txt.bz2 Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=823bc90ff6b989a7bc43377d3e35528604ff4d6b Commit: 823bc90ff6b989a7bc43377d3e35528604ff4d6b Parent: 948e49365049e7540d5c5702c1885ff8f1887619 Author: Jonathan Brassow AuthorDate: Wed Feb 11 16:32:18 2009 -0600 Committer: Jonathan Brassow CommitterDate: Wed Feb 11 16:32:18 2009 -0600 clogd: Short circuit resume requests Resume requests are already handled by the node that sends them. They are sent to the cluster to ensure proper timing with checkpoints. However, there is no reason that the response should go around the cluster when the node is simply responding to itself. So, we now send the response to the request directly down to the kernel instead of out to the cluster and then down to the kernel. This change fixes situations where the node may try to resend the resume request due to lower nodeid nodes leaving - resulting in complaints about 'additional resumes', etc. The complaints didn't hurt anything, but if we can streamline the process /and/ fix the annoying messages; I think that's ok. --- cmirror/src/cluster.c | 15 ++++++--------- cmirror/src/functions.c | 6 +++--- 2 files changed, 9 insertions(+), 12 deletions(-) diff --git a/cmirror/src/cluster.c b/cmirror/src/cluster.c index 7711b28..a23e39e 100644 --- a/cmirror/src/cluster.c +++ b/cmirror/src/cluster.c @@ -68,7 +68,7 @@ static SaCkptHandleT ckpt_handle = 0; static SaCkptCallbacksT callbacks = { 0, 0 }; static SaVersionT version = { 'B', 1, 1 }; -#define DEBUGGING_HISTORY 50 +#define DEBUGGING_HISTORY 200 static char debugging[DEBUGGING_HISTORY][128]; static int idx = 0; @@ -217,11 +217,9 @@ static int handle_cluster_request(struct clog_cpg *entry, if (t->originator == my_cluster_id) { r = do_request(t, server); - t->request_type |= DM_CLOG_RESPONSE; - - r = cluster_send(t); - if (r < 0) - LOG_ERROR("cluster_send failed: %s", strerror(-r)); + r = kernel_send(t); + if (r) + LOG_ERROR("Failed to send response to kernel"); } return r; } @@ -775,8 +773,6 @@ static int resend_requests(struct clog_cpg *entry) } switch (tfr->request_type) { - case DM_CLOG_RESUME: - /* We are only concerned about this request locally */ case DM_CLOG_SET_REGION_SYNC: /* * Some requests simply do not need to be resent. @@ -913,6 +909,7 @@ static void cpg_message_callback(cpg_handle_t handle, struct cpg_name *gname, if ((nodeid == my_cluster_id) && !(tfr->request_type & DM_CLOG_RESPONSE) && + (tfr->request_type != DM_CLOG_RESUME) && (tfr->request_type != DM_CLOG_CLEAR_REGION) && (tfr->request_type != DM_CLOG_CHECKPOINT_READY)) { tmp_tfr = malloc(DM_CLOG_TFR_SIZE); @@ -1041,7 +1038,7 @@ static void cpg_message_callback(cpg_handle_t handle, struct cpg_name *gname, if (log_get_state(tfr) != LOG_RESUMED) { LOG_COND(log_checkpoint, - "[%s] Withholding checkpoints until log is read", + "[%s] Withholding checkpoints until log is valid", SHORT_UUID(tfr->uuid)); break; } diff --git a/cmirror/src/functions.c b/cmirror/src/functions.c index 4db14ad..c106889 100644 --- a/cmirror/src/functions.c +++ b/cmirror/src/functions.c @@ -645,7 +645,6 @@ static int clog_presuspend(struct clog_tfr *tfr) if (lc->touched) LOG_DBG("WARNING: log still marked as 'touched' during suspend"); - lc->state = LOG_SUSPENDED; lc->recovery_halted = 1; return 0; @@ -666,6 +665,7 @@ static int clog_postsuspend(struct clog_tfr *tfr) LOG_DBG("[%s] clog_postsuspend: leaving CPG", SHORT_UUID(lc->uuid)); destroy_cluster_cpg(tfr->uuid); + lc->state = LOG_SUSPENDED; lc->recovering_region = (uint64_t)-1; lc->recoverer = (uint32_t)-1; @@ -806,8 +806,8 @@ out: lc->sync_count = count_bits32(lc->sync_bits, lc->bitset_uint32_count); - LOG_DBG("[%s] Initial sync_count = %llu", - SHORT_UUID(lc->uuid), (unsigned long long)lc->sync_count); + LOG_SPRINT("[%s] Initial sync_count = %llu", + SHORT_UUID(lc->uuid), (unsigned long long)lc->sync_count); lc->sync_search = 0; lc->state = LOG_RESUMED; lc->recovery_halted = 0;