From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 12481 invoked by alias); 17 Sep 2009 20:07:57 -0000 Received: (qmail 12475 invoked by alias); 17 Sep 2009 20:07:57 -0000 X-SWARE-Spam-Status: No, hits=-1.2 required=5.0 tests=BAYES_00,J_CHICKENPOX_54,J_CHICKENPOX_66,SPF_HELO_PASS,SUBJECT_FUZZY_TION X-Spam-Status: No, hits=-1.2 required=5.0 tests=BAYES_00,J_CHICKENPOX_54,J_CHICKENPOX_66,SPF_HELO_PASS,SUBJECT_FUZZY_TION X-Spam-Check-By: sourceware.org X-Spam-Checker-Version: SpamAssassin 3.2.5 (2008-06-10) on bastion2.fedora.phx.redhat.com Subject: dlm: master - dlm_controld: fix start matching for partition+merge changes To: cluster-cvs-relay@redhat.com X-Project: Cluster Project X-Git-Module: dlm.git X-Git-Refname: refs/heads/master X-Git-Reftype: branch X-Git-Oldrev: cc59c8efa02b4c8474f5c888f3f8fafcdc377d9c X-Git-Newrev: fff520b757bfd56307ed5a1a5b7192fea714f8a7 From: David Teigland Message-Id: <20090917200724.24DAB120232@lists.fedorahosted.org> Date: Thu, 17 Sep 2009 20:07:00 -0000 X-Scanned-By: MIMEDefang 2.67 on 10.5.11.18 Mailing-List: contact cluster-cvs-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Subscribe: List-Post: List-Help: , Sender: cluster-cvs-owner@sourceware.org X-SW-Source: 2009-q3/txt/msg00340.txt.bz2 Gitweb: http://git.fedorahosted.org/git/dlm.git?p=dlm.git;a=commitdiff;h=fff520b757bfd56307ed5a1a5b7192fea714f8a7 Commit: fff520b757bfd56307ed5a1a5b7192fea714f8a7 Parent: cc59c8efa02b4c8474f5c888f3f8fafcdc377d9c Author: David Teigland AuthorDate: Thu Sep 17 14:53:25 2009 -0500 Committer: David Teigland CommitterDate: Thu Sep 17 14:58:51 2009 -0500 dlm_controld: fix start matching for partition+merge changes When a node is removed, added, removed due to a partition+merge, the start messages for the second removal are mistakenly matched to the first removal (since the change descriptions are idential). To prevent this, detect when there are identical outstanding changes and send a start+nack for the first before sending the regular start for the second. Signed-off-by: David Teigland --- group/dlm_controld/cpg.c | 57 ++++++++++++++++++++++++++++++++++---- group/dlm_controld/dlm_daemon.h | 2 + 2 files changed, 53 insertions(+), 6 deletions(-) diff --git a/group/dlm_controld/cpg.c b/group/dlm_controld/cpg.c index 4c5ed5c..cf249c2 100644 --- a/group/dlm_controld/cpg.c +++ b/group/dlm_controld/cpg.c @@ -904,6 +904,12 @@ static int match_change(struct lockspace *ls, struct change *cg, return 0; } + if (memb->start_flags & DLM_MFLG_NACK) { + log_group(ls, "match_change %d:%u skip %u is nacked", + hd->nodeid, seq, cg->seq); + return 0; + } + if (memb->start && hd->type == DLM_MSG_START) { log_group(ls, "match_change %d:%u skip %u already start", hd->nodeid, seq, cg->seq); @@ -1052,6 +1058,11 @@ static void receive_start(struct lockspace *ls, struct dlm_header *hd, int len) return; } + if (memb->start_flags & DLM_MFLG_NACK) { + log_group(ls, "receive_start %d:%u is NACK", hd->nodeid, seq); + return; + } + node_history_start(ls, hd->nodeid); memb->start = 1; } @@ -1095,9 +1106,9 @@ static void receive_plocks_stored(struct lockspace *ls, struct dlm_header *hd, ls->save_plocks = 0; } -static void send_info(struct lockspace *ls, int type) +static void send_info(struct lockspace *ls, struct change *cg, int type, + uint32_t flags) { - struct change *cg; struct dlm_header *hd; struct ls_info *li; struct id_info *id; @@ -1105,8 +1116,6 @@ static void send_info(struct lockspace *ls, int type) char *buf; int len, id_count; - cg = list_first_entry(&ls->changes, struct change, list); - id_count = cg->member_count; len = sizeof(struct dlm_header) + sizeof(struct ls_info) + @@ -1127,6 +1136,8 @@ static void send_info(struct lockspace *ls, int type) hd->type = type; hd->msgdata = cg->seq; + hd->flags = flags; + if (ls->joining) hd->flags |= DLM_MFLG_JOINING; if (!ls->need_plocks) @@ -1162,12 +1173,45 @@ static void send_info(struct lockspace *ls, int type) static void send_start(struct lockspace *ls) { - send_info(ls, DLM_MSG_START); + struct change *cg = list_first_entry(&ls->changes, struct change, list); + + send_info(ls, cg, DLM_MSG_START, 0); } static void send_plocks_stored(struct lockspace *ls) { - send_info(ls, DLM_MSG_PLOCKS_STORED); + struct change *cg = list_first_entry(&ls->changes, struct change, list); + + send_info(ls, cg, DLM_MSG_PLOCKS_STORED, 0); +} + +static int same_members(struct change *cg1, struct change *cg2) +{ + struct member *memb; + + list_for_each_entry(memb, &cg1->members, list) { + if (!find_memb(cg2, memb->nodeid)) + return 0; + } + return 1; +} + +static void send_nacks(struct lockspace *ls, struct change *startcg) +{ + struct change *cg; + + list_for_each_entry(cg, &ls->changes, list) { + if (cg->seq < startcg->seq && + cg->member_count == startcg->member_count && + cg->joined_count == startcg->joined_count && + cg->remove_count == startcg->remove_count && + cg->failed_count == startcg->failed_count && + same_members(cg, startcg)) { + log_group(ls, "send nack old cg %u new cg %u", + cg->seq, startcg->seq); + send_info(ls, cg, DLM_MSG_START, DLM_MFLG_NACK); + } + } } static int nodes_added(struct lockspace *ls) @@ -1260,6 +1304,7 @@ static void apply_changes(struct lockspace *ls) case CGST_WAIT_CONDITIONS: if (wait_conditions_done(ls)) { + send_nacks(ls, cg); send_start(ls); cg->state = CGST_WAIT_MESSAGES; } diff --git a/group/dlm_controld/dlm_daemon.h b/group/dlm_controld/dlm_daemon.h index d34d18e..acd1c52 100644 --- a/group/dlm_controld/dlm_daemon.h +++ b/group/dlm_controld/dlm_daemon.h @@ -138,6 +138,8 @@ enum { /* dlm_header flags */ #define DLM_MFLG_JOINING 1 /* accompanies start, we are joining */ #define DLM_MFLG_HAVEPLOCK 2 /* accompanies start, we have plock state */ +#define DLM_MFLG_NACK 4 /* accompanies start, prevent wrong match when + two outstanding changes are the same */ struct dlm_header { uint16_t version[3];