From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 29814 invoked by alias); 11 Feb 2009 15:43:58 -0000 Received: (qmail 29807 invoked by alias); 11 Feb 2009 15:43:57 -0000 X-SWARE-Spam-Status: No, hits=-1.5 required=5.0 tests=AWL,BAYES_00,SPF_HELO_PASS X-Spam-Status: No, hits=-1.5 required=5.0 tests=AWL,BAYES_00,SPF_HELO_PASS X-Spam-Check-By: sourceware.org X-Spam-Checker-Version: SpamAssassin 3.2.5 (2008-06-10) on bastion.fedora.phx.redhat.com Subject: cluster: RHEL5 - clogd: Remove pending checkpoints for nodes that leave. To: cluster-cvs-relay@redhat.com X-Project: Cluster Project X-Git-Module: cluster.git X-Git-Refname: refs/heads/RHEL5 X-Git-Reftype: branch X-Git-Oldrev: 222f0319c1face786a24842c7323dc78e1e0ca46 X-Git-Newrev: 864eb5eb988f9036759f58d651754ec0831675d5 From: Jonathan Brassow Message-Id: <20090211154334.DEC39120199@lists.fedorahosted.org> Date: Wed, 11 Feb 2009 15:43:00 -0000 X-Scanned-By: MIMEDefang 2.58 on 172.16.52.254 Mailing-List: contact cluster-cvs-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Subscribe: List-Post: List-Help: , Sender: cluster-cvs-owner@sourceware.org X-SW-Source: 2009-q1/txt/msg00432.txt.bz2 Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=864eb5eb988f9036759f58d651754ec0831675d5 Commit: 864eb5eb988f9036759f58d651754ec0831675d5 Parent: 222f0319c1face786a24842c7323dc78e1e0ca46 Author: Jonathan Brassow AuthorDate: Wed Feb 11 09:42:00 2009 -0600 Committer: Jonathan Brassow CommitterDate: Wed Feb 11 09:43:20 2009 -0600 clogd: Remove pending checkpoints for nodes that leave. Remove pending checkpoints for nodes that leave before we can send the checkpoints. This prevents stale checkpoints from existing before the leaving node comes back. --- cmirror/src/cluster.c | 57 +++++++++++++++++++++++++++++++++++++++++++++--- 1 files changed, 53 insertions(+), 4 deletions(-) diff --git a/cmirror/src/cluster.c b/cmirror/src/cluster.c index e43edbe..483ed1d 100644 --- a/cmirror/src/cluster.c +++ b/cmirror/src/cluster.c @@ -213,9 +213,20 @@ static int handle_cluster_request(struct clog_cpg *entry, * a cluster action to co-ordinate reading * the disk and checkpointing */ - if ((t->request_type != DM_CLOG_RESUME) || - (t->originator == my_cluster_id)) - r = do_request(t, server); + if (t->request_type == DM_CLOG_RESUME) { + if (t->originator == my_cluster_id) { + r = do_request(t, server); + + t->request_type |= DM_CLOG_RESPONSE; + + r = cluster_send(t); + if (r < 0) + LOG_ERROR("cluster_send failed: %s", strerror(-r)); + } + return r; + } + + r = do_request(t, server); if (server && (t->request_type != DM_CLOG_CLEAR_REGION) && @@ -541,6 +552,7 @@ rr_create_retry: tfr->request_type = DM_CLOG_CHECKPOINT_READY; tfr->originator = cp->requester; /* FIXME: hack to overload meaning of originator */ strncpy(tfr->uuid, cp->uuid, CPG_MAX_NAME_LENGTH); + tfr->seq = my_cluster_id; /* Just for debugging */ r = cluster_send(tfr); if (r) @@ -1167,10 +1179,11 @@ static void cpg_leave_callback(struct clog_cpg *match, struct cpg_address *member_list, int member_list_entries) { - int i, fd; + int i, j, fd; struct list_head *p, *n; uint32_t lowest = match->lowest_id; struct clog_tfr *tfr; + struct checkpoint_data *p_cp, *c_cp; { idx++; @@ -1205,6 +1218,42 @@ static void cpg_leave_callback(struct clog_cpg *match, match->state = INVALID; } + /* Remove any pending checkpoints for the leaving node */ + for (p_cp = NULL, c_cp = match->checkpoint_list; + c_cp && (c_cp->requester != left->nodeid); + p_cp = c_cp, c_cp = c_cp->next); + if (c_cp) { + if (p_cp) + p_cp->next = c_cp->next; + else + match->checkpoint_list = c_cp->next; + + LOG_COND(log_checkpoint, + "[%s] Removing pending checkpoint (%u is leaving)", + SHORT_UUID(match->name.value), left->nodeid); + free_checkpoint(c_cp); + } + list_for_each_safe(p, n, &match->startup_list) { + tfr = (struct clog_tfr *)p; + if ((tfr->request_type == DM_CLOG_MEMBER_JOIN) && + (tfr->originator == left->nodeid)) { + LOG_COND(log_checkpoint, + "[%s] Removing pending ckpt from startup list (%u is leaving)", + SHORT_UUID(match->name.value), left->nodeid); + list_del_init(p); + free(tfr); + } + } + for (i = 0, j = 0; i < match->checkpoints_needed; i++, j++) { + match->checkpoint_requesters[j] = match->checkpoint_requesters[i]; + if (match->checkpoint_requesters[i] == left->nodeid) { + LOG_ERROR("[%s] Removing pending ckpt from needed list (%u is leaving)", + SHORT_UUID(match->name.value), left->nodeid); + j--; + } + } + match->checkpoints_needed = j; + if (left->nodeid < my_cluster_id) { match->delay = (match->delay > 0) ? match->delay - 1 : 0; if (!match->delay && list_empty(&match->working_list))