From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <cluster-cvs-return-8566-listarch-cluster-cvs=sources.redhat.com@sourceware.org>
Received: (qmail 24046 invoked by alias); 27 Jun 2008 14:31:48 -0000
Received: (qmail 24003 invoked by uid 9478); 27 Jun 2008 14:31:45 -0000
Date: Fri, 27 Jun 2008 14:31:00 -0000
Message-ID: <20080627143142.23954.qmail@sourceware.org>
From: jbrassow@sourceware.org
To: cluster-cvs@sources.redhat.com, cluster-devel@redhat.com
Subject: Cluster Project branch, RHEL47, updated. gfs-kernel_2_6_9_76-82-g0949bb8
X-Git-Refname: refs/heads/RHEL47
X-Git-Reftype: branch
X-Git-Oldrev: 634f4f53b00c2c1ee49e02773e44d3352043f1c3
X-Git-Newrev: 0949bb8d8f145a8fe4121cf530d500d3125ab426
Mailing-List: contact cluster-cvs-help@sourceware.org; run by ezmlm
Precedence: bulk
List-Id: <cluster-cvs.sourceware.org>
List-Subscribe: <mailto:cluster-cvs-subscribe@sourceware.org>
List-Post: <mailto:cluster-cvs@sourceware.org>
List-Help: <mailto:cluster-cvs-help@sourceware.org>, <http://sourceware.org/lists.html#faqs>
Sender: cluster-cvs-owner@sourceware.org
X-SW-Source: 2008-q2/txt/msg00562.txt.bz2

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "Cluster Project".

http://sources.redhat.com/git/gitweb.cgi?p=cluster.git;a=commitdiff;h=0949bb8d8f145a8fe4121cf530d500d3125ab426

The branch, RHEL47 has been updated
       via  0949bb8d8f145a8fe4121cf530d500d3125ab426 (commit)
      from  634f4f53b00c2c1ee49e02773e44d3352043f1c3 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit 0949bb8d8f145a8fe4121cf530d500d3125ab426
Author: Jonathan Brassow <jbrassow@redhat.com>
Date:   Thu Jun 26 16:32:51 2008 -0500

    dm-cmirror.ko:  Fix for bug 450939, and other minor cleanups
    
    - If a write-recovery conflict is detected, halt recovery
      rather than calling BUG() (the fix bug 450939)
    - Minor code style cleanups

-----------------------------------------------------------------------

Summary of changes:
 cmirror-kernel/src/dm-cmirror-client.c |   81 ++++++++++++---------------
 cmirror-kernel/src/dm-cmirror-server.c |   95 ++++++++++++++++++--------------
 2 files changed, 90 insertions(+), 86 deletions(-)

diff --git a/cmirror-kernel/src/dm-cmirror-client.c b/cmirror-kernel/src/dm-cmirror-client.c
index 79794c2..0ca1741 100644
--- a/cmirror-kernel/src/dm-cmirror-client.c
+++ b/cmirror-kernel/src/dm-cmirror-client.c
@@ -311,11 +311,10 @@ static int _consult_server(struct log_c *lc, region_t region,
 	request_count++;
 
 	lr = kmalloc(sizeof(struct log_request), GFP_NOFS);
-	if(!lr){
+	if (!lr) {
 		BUG();
 		error = -ENOMEM;
-		*retry = 1;
-		goto fail;
+		goto retry;
 	}
 
 	memset(lr, 0, sizeof(struct log_request));
@@ -344,10 +343,9 @@ static int _consult_server(struct log_c *lc, region_t region,
   
 	saddr_in.sin_family = AF_INET;
 	saddr_in.sin_port = CLUSTER_LOG_PORT;
-	if(!(saddr_in.sin_addr.s_addr = nodeid_to_ipaddr(lc->server_id))){
+	if (!(saddr_in.sin_addr.s_addr = nodeid_to_ipaddr(lc->server_id))) {
 		error = -ENXIO;
-		*retry = 1;
-		goto fail;
+		goto retry;
 	}
 	msg.msg_name = &saddr_in;
 	msg.msg_namelen = sizeof(saddr_in);
@@ -362,7 +360,7 @@ static int _consult_server(struct log_c *lc, region_t region,
 
 	set_fs(fs);
 
-	if(len < sizeof(struct log_request)){
+	if (len < sizeof(struct log_request)) {
 		DMWARN("unable to send log request to server");
 		error = -EBADE;
 		goto fail;
@@ -379,14 +377,13 @@ rerecv:
 			 sizeof(struct log_request), 0, 15);
 	set_fs(fs);
 
-	if(len <= 0){
+	if (len <= 0) {
 		/* ATTENTION -- what do we do with this ? */
 		DMWARN("Error listening for server(%u) response for %s: %d",
 		       lc->server_id, lc->uuid + (strlen(lc->uuid) - 8), len);
 		error = len;
-		*retry = 1;
 		seq++;
-		goto fail;
+		goto retry;
 	}
     
 	if (seq != lr->lr_seq) {
@@ -400,9 +397,8 @@ rerecv:
 		}
 		DMERR(" Seq# mismatch: Must try to resend request, %s", RQ_STRING(type));
 		error = -EBADE;
-		*retry = 1;
 		seq++;
-		goto fail;
+		goto retry;
 	}
 	seq++;
 
@@ -410,8 +406,7 @@ rerecv:
 		DMERR("Got incorrect message type back: %s/%s",
 		      RQ_STRING(type), RQ_STRING(lr->lr_type));
 		error = -EBADE;
-		*retry = 1;
-		goto fail;
+		goto retry;
 	}
 
 	if (memcmp(lc->uuid, lr->lr_uuid, MAX_NAME_LEN)) {
@@ -419,54 +414,52 @@ rerecv:
 		DMERR(" Expected UUID: %s", lc->uuid);
 		DMERR(" Recieved UUID: %s", lr->lr_uuid);
 		error = -EBADE;
-		*retry = 1;
-		goto fail;
+		goto retry;
 	}
 
-	if(lr->u.lr_int_rtn == -EAGAIN){
-		DMWARN("Server (%u), request type %d, -EAGAIN."
-		       "  Mirror suspended?",
+	if (lr->u.lr_int_rtn == -EAGAIN) {
+		DMWARN("Server (%u), request type %d, -EAGAIN.",
 		       lc->server_id, lr->lr_type);
-		*retry = 1;
-		goto fail;
+		goto retry;
 	}
 
 	if (lr->u.lr_int_rtn == -ENXIO) {
 		DMDEBUG("Server (%u) says it no longer controls this log (%s)",
 			lc->server_id, lc->uuid + (strlen(lc->uuid) - 8));
 		lc->server_id = 0xDEAD;
-		*retry = 1;
-		goto fail;
+		goto retry;
 	}
 
-	if(result)
+	if (result)
 		*result = lr->u.lr_region_rtn;
 
 	error = lr->u.lr_int_rtn;
 	kfree(lr);
 	return error;
- fail:
-	if(*retry){
-		request_retry_count++;
-		if(!(request_retry_count & 0x1F)){
-			DMINFO("Clustered mirror retried requests :: %u of %u (%u%%)",
-			       request_retry_count,
-			       request_count,
-			       dm_div_up(request_retry_count*100, request_count));
-			DMDEBUG("Last request:");
-			DMDEBUG(" - my_id   :: %u", my_id);
-			DMDEBUG(" - server  :: %u", lc->server_id);
-			DMDEBUG(" - log uuid:: %s (%s)",
-			       lc->uuid + (strlen(lc->uuid) - 8),
-			       atomic_read(&lc->suspended) ? "suspended" : "active");
-			DMDEBUG(" - request :: %s", RQ_STRING(type));
-			DMDEBUG(" - error   :: %d", error);
-			DMINFO("Too many retries, attempting to re-establish server connection.");
-			lc->server_id = 0xDEAD;
-		}
+
+ retry:
+	*retry = 1;
+	request_retry_count++;
+	if (!(request_retry_count & 0x1F)) {
+		DMINFO("Clustered mirror retried requests :: %u of %u (%u%%)",
+		       request_retry_count,
+		       request_count,
+		       dm_div_up(request_retry_count*100, request_count));
+		DMDEBUG("Last request:");
+		DMDEBUG(" - my_id   :: %u", my_id);
+		DMDEBUG(" - server  :: %u", lc->server_id);
+		DMDEBUG(" - log uuid:: %s (%s)",
+			lc->uuid + (strlen(lc->uuid) - 8),
+			atomic_read(&lc->suspended) ? "suspended" : "active");
+		DMDEBUG(" - request :: %s", RQ_STRING(type));
+		DMDEBUG(" - error   :: %d", error);
+		DMINFO("Too many retries, attempting to re-establish server connection.");
+		lc->server_id = 0xDEAD;
 	}
 
-	if(lr) kfree(lr);
+fail:
+	if (lr)
+		kfree(lr);
 	return error;
 }
 
diff --git a/cmirror-kernel/src/dm-cmirror-server.c b/cmirror-kernel/src/dm-cmirror-server.c
index a2857f4..25a82b5 100644
--- a/cmirror-kernel/src/dm-cmirror-server.c
+++ b/cmirror-kernel/src/dm-cmirror-server.c
@@ -339,14 +339,17 @@ static int disk_resume(struct log_c *lc)
 	}
 
 	/* set or clear any new bits -- device has grown */
-	if (lc->sync == NOSYNC)
+	if (lc->sync == NOSYNC) {
+		DMDEBUG("  NOSYNC            :: set");
 		for (i = lc->header.nr_regions; i < lc->region_count; i++)
 			/* FIXME: amazingly inefficient */
 			log_set_bit(lc, lc->clean_bits, i);
-	else
+	} else {
+		DMDEBUG("  NOSYNC            :: unset");
 		for (i = lc->header.nr_regions; i < lc->region_count; i++)
 			/* FIXME: amazingly inefficient */
 			log_clear_bit(lc, lc->clean_bits, i);
+	}
 
 	/* clear any old/unused bits -- device has shrunk */
 	for(i = lc->region_count; i % (sizeof(*lc->clean_bits) << BYTE_SHIFT); i++)
@@ -358,10 +361,16 @@ static int disk_resume(struct log_c *lc)
 	/* must go through the list twice.  The dead node could have been using **
 	** the same region as other nodes and we want any region that was in    **
 	** use by the dead node to be marked _not_ in-sync..................... */
+	lc->recovering_region = (uint64_t)-1;
 	list_for_each_entry(ru, &lc->region_users, ru_list){
-		if(live_nodes[ru->ru_nodeid/8] & 1 << (ru->ru_nodeid%8)){
+		if (live_nodes[ru->ru_nodeid/8] & 1 << (ru->ru_nodeid%8)) {
 			good_count++;
-			log_set_bit(lc, lc->sync_bits, ru->ru_region);
+			if (ru->ru_rw == RU_WRITE) {
+				log_set_bit(lc, lc->sync_bits, ru->ru_region);
+			} else if (ru->ru_rw == RU_RECOVER) {
+				log_clear_bit(lc, lc->sync_bits, ru->ru_region);
+				lc->recovering_region = ru->ru_region;
+			}
 		}
 	}
 
@@ -387,11 +396,12 @@ static int disk_resume(struct log_c *lc)
 	lc->sync_count = count_bits32(lc->sync_bits, lc->bitset_uint32_count);
 	lc->sync_search = 0;
 
+	DMDEBUG("  in_sync           :: %d", atomic_read(&lc->in_sync));
 	DMDEBUG("  Sync count        :: %Lu", lc->sync_count);
 	DMDEBUG("  Disk Region count :: %Lu", lc->header.nr_regions);
 	DMDEBUG("  Region count      :: %Lu", lc->region_count);
 
-	if(lc->header.nr_regions != lc->region_count){
+	if (lc->header.nr_regions != lc->region_count) {
 		DMDEBUG("  NOTE:  Mapping has changed.");
 	}
 /* Take this out for now.
@@ -566,9 +576,11 @@ static int server_mark_region(struct log_c *lc, struct log_request *lr, uint32_t
 		DMERR("  lc->in_sync = %d", atomic_read(&lc->in_sync));
 		DMERR("  lc->sync_pass = %d", lc->sync_pass);
 		DMERR("  lc->sync_search = %d", lc->sync_search);
-		DMERR("  lc->recovery_halted = %d", lc->recovery_halted);
+		DMERR("  lc->recovery_halted = %d -> 1", lc->recovery_halted);
 
-		BUG();
+		lc->recovery_halted = 1;
+		mempool_free(new, region_user_pool);
+		return -EAGAIN;
 	} else {
 		list_add(&new->ru_list, &ru->ru_list);
 	}
@@ -641,8 +653,9 @@ static int server_flush(struct log_c *lc, uint32_t who)
 
 static int server_get_resync_work(struct log_c *lc, struct log_request *lr, uint32_t who)
 {
-	struct region_user *new;
+	struct region_user *new, *test;
 	region_t *region = &(lr->u.lr_region_rtn);
+	region_t sync_search = lc->sync_search;
 
 	lr->u.lr_int_rtn = 0; /* Default to no work */
 
@@ -673,50 +686,48 @@ static int server_get_resync_work(struct log_c *lc, struct log_request *lr, uint
 
 	if ((lc->recovering_next != (uint64_t)-1) &&
 	    (!log_test_bit(lc->sync_bits, lc->recovering_next))) {
-		new = mempool_alloc(region_user_pool, GFP_NOFS);
-		if (!new)
-			return -ENOMEM;
-		*region = lc->recovering_region = lc->recovering_next;
+		*region = lc->recovering_next;
 		DMDEBUG("Preempting normal recovery work for preferred region...");
 	} else {
 		*region = ext2_find_next_zero_bit((unsigned long *) lc->sync_bits,
 						  lc->region_count,
 						  lc->sync_search);
-		if ((new = find_ru_by_region(lc, *region))) {
-			/*
-			 * We disallow writes to regions that have not yet been
-			 * recovered via is_remote_recovering(), so this should
-			 * not happen.
-			 */
-			DMERR("Recovery blocked by outstanding write on region %Lu/%s",
-			      *region, lc->uuid + (strlen(lc->uuid) - 8));
-			DMERR("  region_user { %s, %u, %Lu }",
-			      (new->ru_rw == RU_WRITE) ? "RU_WRITE":
-			      (new->ru_rw == RU_RECOVER) ? "RU_RECOVER":
-			      (new->ru_rw == RU_READ) ? "RU_READ" : "UNKOWN",
-			      new->ru_nodeid, new->ru_region);
-			DMERR("  lc->recovering_region = %Lu", lc->recovering_region);
-			DMERR("  lc->sync_count = %Lu", lc->sync_count);
-			DMERR("  lc->in_sync = %d", atomic_read(&lc->in_sync));
-			DMERR("  lc->sync_pass = %d", lc->sync_pass);
-			DMERR("  lc->sync_search = %d", lc->sync_search);
-			DMERR("  lc->recovery_halted = %d", lc->recovery_halted);
-			BUG();
-			return 0;
-		}
+		sync_search = *region + 1;
+	}
 
-		if (*region >= lc->region_count)
-			return 0;
+	if ((test = find_ru_by_region(lc, *region))) {
+		/*
+		 * We disallow writes to regions that have not yet been
+		 * recovered via is_remote_recovering(), so this should
+		 * not happen.
+		 */
+		DMERR("Recovery blocked by outstanding write on region %Lu/%s",
+		      *region, lc->uuid + (strlen(lc->uuid) - 8));
+		DMERR("  region_user { %s, %u, %Lu }",
+		      (test->ru_rw == RU_WRITE) ? "RU_WRITE":
+		      (test->ru_rw == RU_RECOVER) ? "RU_RECOVER":
+		      (test->ru_rw == RU_READ) ? "RU_READ" : "UNKOWN",
+		      test->ru_nodeid, test->ru_region);
+		DMERR("  lc->recovering_region = %Lu", lc->recovering_region);
+		DMERR("  lc->sync_count = %Lu", lc->sync_count);
+		DMERR("  lc->in_sync = %d", atomic_read(&lc->in_sync));
+		DMERR("  lc->sync_pass = %d", lc->sync_pass);
+		DMERR("  lc->sync_search = %d", lc->sync_search);
+		DMERR("  lc->recovery_halted = %d -> 1", lc->recovery_halted);
 
-		new = mempool_alloc(region_user_pool, GFP_NOFS);
-		if (!new)
-			return -ENOMEM;
+		lc->recovery_halted = 1;
+		return 0;
+	}
 
-		lc->sync_search = *region + 1;
+	if (*region >= lc->region_count)
+		return 0;
 
-		lc->recovering_region = *region;
-	}
+	new = mempool_alloc(region_user_pool, GFP_NOFS);
+	if (!new)
+		return -ENOMEM;
 
+	lc->sync_search = sync_search;
+	lc->recovering_region = *region;
 	lc->recovering_next = (uint64_t)-1;
 	lr->u.lr_int_rtn = 1; /* Assigning work */
 	new->ru_nodeid = who;


hooks/post-receive
--
Cluster Project