public inbox for cluster-cvs@sourceware.org
help / color / mirror / Atom feed
* cluster: RHEL5 - gfs: improve gfs_fsck rindex repair code
@ 2008-12-22 21:31 Bob Peterson
  0 siblings, 0 replies; only message in thread
From: Bob Peterson @ 2008-12-22 21:31 UTC (permalink / raw)
  To: cluster-cvs-relay

Gitweb:        http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=70bc706890e89f6a51728e3c5b6f267fb0323acd
Commit:        70bc706890e89f6a51728e3c5b6f267fb0323acd
Parent:        0e0a8e2e583f4a48d1fb865e2fc46b8b11362e8a
Author:        Bob Peterson <rpeterso@redhat.com>
AuthorDate:    Fri Dec 19 16:27:50 2008 -0600
Committer:     Bob Peterson <rpeterso@redhat.com>
CommitterDate: Mon Dec 22 15:29:50 2008 -0600

gfs: improve gfs_fsck rindex repair code

bz 442271 - GFS: gfs_fsck bugs found in rindex repair code

This patch makes improvements and fixes some bugs in gfs_fsck's
rindex repair code.  Basically, if RGs are damaged, especially
in the third section (i.e. RGs added by gfs_grow) it did not
properly locate the RG boundaries.  Also, if the rindex was
completely zeroed out, it did not recover it in cases where
the file system had been extended.
---
 gfs/gfs_fsck/rgrp.c  |    5 ++
 gfs/gfs_fsck/super.c |  169 ++++++++++++++++++++++++++++++++++++++------------
 2 files changed, 134 insertions(+), 40 deletions(-)

diff --git a/gfs/gfs_fsck/rgrp.c b/gfs/gfs_fsck/rgrp.c
index 6ef6f74..429f515 100644
--- a/gfs/gfs_fsck/rgrp.c
+++ b/gfs/gfs_fsck/rgrp.c
@@ -51,6 +51,7 @@ int fs_compute_bitstructs(struct fsck_rgrp *rgd)
 		return -1;
 	}
 	if(!memset(rgd->rd_bits, 0, length * sizeof(fs_bitmap_t))) {
+		free(rgd->rd_bits);
 		log_err("Unable to zero bitmap structure\n");
 		stack;
 		return -1;
@@ -92,6 +93,7 @@ int fs_compute_bitstructs(struct fsck_rgrp *rgd)
 	if(bytes_left){
 		log_err( "fs_compute_bitstructs:  Too many blocks in rgrp to "
 			"fit into available bitmap.\n");
+		free(rgd->rd_bits);
 		return -1;
 	}
 
@@ -107,6 +109,7 @@ int fs_compute_bitstructs(struct fsck_rgrp *rgd)
 			rgd->rd_bits[length - 1].bi_len,
 			GFS_NBBY,
 			rgd->rd_ri.ri_data);
+		free(rgd->rd_bits);
 		return -1;
 	}
 
@@ -114,11 +117,13 @@ int fs_compute_bitstructs(struct fsck_rgrp *rgd)
 	if(!(rgd->rd_bh = (osi_buf_t **)malloc(length * sizeof(osi_buf_t *)))) {
 		log_err("Unable to allocate osi_buf structure\n");
 		stack;
+		free(rgd->rd_bits);
 		return -1;
 	}
 	if(!memset(rgd->rd_bh, 0, length * sizeof(osi_buf_t *))) {
 		log_err("Unable to zero osi_buf structure\n");
 		stack;
+		free(rgd->rd_bits);
 		return -1;
 	}
 
diff --git a/gfs/gfs_fsck/super.c b/gfs/gfs_fsck/super.c
index 55ff997..840a6c5 100644
--- a/gfs/gfs_fsck/super.c
+++ b/gfs/gfs_fsck/super.c
@@ -356,7 +356,7 @@ uint32 rgrplength2bitblocks(struct fsck_sb *sdp, uint32 length)
  * Other RGs found after that will be considered "extra."
  */
 int gfs_rgindex_rebuild(struct fsck_sb *sdp, osi_list_t *ret_list,
-						int *num_rgs)
+			unsigned int *num_rgs)
 {
 	osi_buf_t *bh; /* buffer handle */
 	uint64 subdevice_size, fs_total_size;
@@ -380,6 +380,8 @@ int gfs_rgindex_rebuild(struct fsck_sb *sdp, osi_list_t *ret_list,
 	uint64_t last_known_ri_addr = 0, prev_known_ri_addr = 0;
 	uint32_t last_known_ri_length = 0;
 	uint32_t last_known_ri_data = 0;
+	int section3_bump_size = 0;
+	uint64 start_block, end_block = 0;
 
 	osi_list_init(ret_list);
 	*num_rgs = 0;
@@ -515,8 +517,6 @@ int gfs_rgindex_rebuild(struct fsck_sb *sdp, osi_list_t *ret_list,
 	/* ----------------------------------------------------------------- */
 	subdevice_size = sdp->jindex->ji_addr; /* addr of first journal */;
 	for (subd = 0; subd < 2; subd++) {
-		uint64 start_block;
-
 		if (!subd)
 			start_block = (GFS_SB_ADDR >> sdp->fsb2bb_shift) + 1;
 		else
@@ -620,7 +620,13 @@ int gfs_rgindex_rebuild(struct fsck_sb *sdp, osi_list_t *ret_list,
 					blok = tmpndx.ri_addr - 1; /* go by the index */
 					log_debug("I(0x%" PRIx64 ")\n", blok);
 				}
-				else {
+				/* If this is the second section, we know for sure that */
+				/* the block length can't be smaller than section 1's   */
+				/* rg length. Might as well skip ahead.                 */
+				else if (subd == 1) {
+					blok += shortest_dist_btwn_rgs[0];
+					log_debug("2(0x%" PRIx64 ")\n", blok);
+				} else {
 					blok += tmp_rgrp.rg_useddi + tmp_rgrp.rg_free;
 					log_debug("R(0x%" PRIx64 ")\n", blok);
 				}
@@ -661,6 +667,37 @@ int gfs_rgindex_rebuild(struct fsck_sb *sdp, osi_list_t *ret_list,
 		log_debug("Section %d: distance between RGs: 0x%" PRIx64 "\n",
 				 subd + 1, shortest_dist_btwn_rgs[subd]);
 		log_debug("Section size: 0x%" PRIx64 "\n", subdevice_size);
+		/* If our rindex was all bad, we may have an improper count of RGs per
+		   section.  We may also not know where the third section should start.
+		   We need those for later. */
+		if (subd == 0) {
+			if (shortest_dist_btwn_rgs[subd] != 0) {
+				unsigned long long blocks_b4_sb;
+				unsigned long long index_entries;
+
+				blocks_b4_sb = (16 * (4096 / sdp->sb.sb_bsize));
+				index_entries = (subdevice_size - blocks_b4_sb) /
+					shortest_dist_btwn_rgs[subd];
+				if (index_entries_per_subd != index_entries) {
+					log_debug("rindex entries per section "
+						  "changed from %lld to %lld\n",
+						  index_entries_per_subd,
+						  index_entries);
+					index_entries_per_subd = index_entries;
+				}
+			}
+			if (fs_size_from_rgindex == 0) {
+				fs_size_from_rgindex = (sdp->jindex->ji_addr +
+							total_journal_space) +
+					subdevice_size;
+				log_debug("Fixed zero fs_size_from_rgindex "
+					  "due to rindex corruption.\n");
+				log_debug("New fs_size_from_rgindex value: "
+					  "%lld (0x%" PRIx64 ")\n",
+					  fs_size_from_rgindex,
+					  fs_size_from_rgindex);
+			}
+		}
 	} /* for subd */
 	number_of_rgs = 0; /* reset this because it is reused below */
 	/* ----------------------------------------------------------------- */
@@ -674,14 +711,12 @@ int gfs_rgindex_rebuild(struct fsck_sb *sdp, osi_list_t *ret_list,
 	/* rgindex and hope to God it's correct.  That's the only way we're  */
 	/* going to be able to recover RGs in the third section.             */
 	/* ----------------------------------------------------------------- */
-	prev_rgd = NULL;
 	block_bump = first_rg_dist[0];
 	corrupt_rgs = 0;
 	for (subd = 0; subd < 3; subd++) { /* third subdevice is for all RGs
-										  extended past the normal 2 with
-										  gfs_grow, etc. */
-		uint64 start_block, end_block;
-
+					      extended past the normal 2 with
+					      gfs_grow, etc. */
+		prev_rgd = NULL;
 		if (subd == 0) {
 			start_block = (GFS_SB_ADDR >> sdp->fsb2bb_shift) + 1;
 			end_block = subdevice_size - 1;
@@ -698,8 +733,18 @@ int gfs_rgindex_rebuild(struct fsck_sb *sdp, osi_list_t *ret_list,
 				end_block = start_block + subdevice_size - 1; /* go to end */
 		}
 		else {
-			start_block = end_block + 1;
-			end_block = fs_total_size;
+			/* Section 3 should start the block after section 2.  However,     */
+			/* gfs_grow sometimes foolishly decides to put it as much as three */
+			/* blocks early. So we need to check for this special case.        */
+			for (start_block = end_block - 2;
+			     start_block < end_block + 1; start_block++) {
+				error = get_and_read_buf(sdp, start_block, &bh, 0);
+				rg_was_fnd = (!check_type(bh, GFS_METATYPE_RG));
+				relse_buf(sdp, bh); /* release the read buffer */
+				if (rg_was_fnd)
+					break;
+			}
+			end_block = fs_total_size - 1;
 			if (start_block + GFS_NBBY >= end_block)
 				break;
 		}
@@ -717,6 +762,16 @@ int gfs_rgindex_rebuild(struct fsck_sb *sdp, osi_list_t *ret_list,
 			}
 			rg_was_fnd = (!check_type(bh, GFS_METATYPE_RG));
 			relse_buf(sdp, bh); /* release the read buffer */
+			if (!rg_was_fnd && subd == 2) {
+				if (section3_bump_size) {
+					log_warn("Lost track of Section 3 rg length.\n");
+					blok -= block_bump; /* back up in case we overshot it */
+					section3_bump_size = 0;
+					block_bump = 1;
+				}
+				if (block_bump == 1)
+					continue;
+			}
 			/* ------------------------------------------------------------- */
 			/* For the first and second subdevice, we know the RG size.      */
 			/* Since we're bumping by that amount, this better be an RG.     */
@@ -774,11 +829,7 @@ int gfs_rgindex_rebuild(struct fsck_sb *sdp, osi_list_t *ret_list,
 				/*prev_rgd->rd_ri.ri_data = block_bump;*/
 			}
 			number_of_rgs++;
-			log_warn("%c RG %d at block 0x%" PRIX64 " %s",
-					 (rg_was_fnd ? ' ' : '*'), number_of_rgs, blok,
-					 (rg_was_fnd ? "intact" : "*** DAMAGED ***"));
 			rgs_per_subd++;
-			prev_rgd = calc_rgd;
 			block_of_last_rg = blok;
 			if (subd == 2) { /* if beyond the normal RGs into gfs_grow RGs  */
 				/* -------------------------------------------------------- */
@@ -794,26 +845,49 @@ int gfs_rgindex_rebuild(struct fsck_sb *sdp, osi_list_t *ret_list,
 				/* find an entry that has the smallest address greater than */
 				/* the block we're on (blok).                               */
 				/* -------------------------------------------------------- */
-				uint64_t rgndx_next_block;
+				if (section3_bump_size == 0) {
+					uint64_t rgndx_next_block, highest_riaddr;
 
-				rgndx_next_block = end_block;
-				for (rgi = 0; ; rgi++) {
-					error = readi(sdp->riinode, (char *)&buf,
-								  rgi * sizeof(struct gfs_rindex),
-								  sizeof(struct gfs_rindex));
-					if (!error)      /* if end of the rgindex */
-						break;        /* stop processing for more RGs */
-					gfs_rindex_in(&tmpndx, (char *)&buf);
-					/* if this index entry is the next RG physically */
-					if (tmpndx.ri_addr > blok &&
-						tmpndx.ri_addr < rgndx_next_block) {
-						rgndx_next_block = tmpndx.ri_addr; /* remember it */
+					rgndx_next_block = end_block;
+					highest_riaddr = 0;
+					for (rgi = 0; ; rgi++) {
+						error = readi(sdp->riinode, (char *)&buf,
+							      rgi * sizeof(struct gfs_rindex),
+							      sizeof(struct gfs_rindex));
+						if (!error)      /* if end of the rgindex */
+							break;        /* stop processing for more RGs */
+						gfs_rindex_in(&tmpndx, (char *)&buf);
+						/* if this index entry is the next RG physically */
+						if (tmpndx.ri_addr > blok &&
+						    tmpndx.ri_addr < rgndx_next_block) {
+							rgndx_next_block = tmpndx.ri_addr; /* remember it */
+						}
+						if (tmpndx.ri_addr > highest_riaddr)
+							highest_riaddr= tmpndx.ri_addr;
 					}
-				}
-				block_bump = rgndx_next_block - blok;
-				if (rgndx_next_block == end_block) { /* if no more RGs */
-					log_warn(" [length 0x%" PRIx64 "]\n", block_bump);
-					break;                 /* stop processing */
+					/* A special exception must be made for the last RG because we */
+					/* won't have a "next highest" entry in the rindex.            */
+					if (blok == highest_riaddr)
+						block_bump = end_block - blok + 1;
+					else if (rgndx_next_block == end_block) {
+						if (block_bump != 1) {
+							log_warn("\nUnable to use rindex; "
+								 "doing block-by-block search.\n");
+							log_warn("This will be slow, so be patient.\n");
+							rgndx_next_block = blok + 1;
+							block_bump = 1;
+						} else {
+							if (prev_rgd &&
+							    block_bump != blok -
+							    prev_rgd->rd_ri.ri_addr) {
+								log_warn("I think I figured it out.\n");
+								block_bump = blok -
+									prev_rgd->rd_ri.ri_addr;
+								section3_bump_size = block_bump;
+							}
+						}
+					} else
+						block_bump = rgndx_next_block - blok;
 				}
 			}
 			else {
@@ -822,8 +896,20 @@ int gfs_rgindex_rebuild(struct fsck_sb *sdp, osi_list_t *ret_list,
 				else
 					block_bump = shortest_dist_btwn_rgs[subd];
 			}
-			if (block_bump != 1)
-				log_warn(" [length 0x%" PRIx64 "]\n", block_bump);
+			if (block_bump == 1 && prev_rgd && subd == 2) {
+				uint64_t last_distance = blok - prev_rgd->rd_ri.ri_addr;
+
+				error = get_and_read_buf(sdp, blok + last_distance, &bh, 0);
+				rg_was_fnd = (!check_type(bh, GFS_METATYPE_RG));
+				relse_buf(sdp, bh); /* release the read buffer */
+				if (rg_was_fnd)
+					block_bump = last_distance;
+			}
+			log_warn("%c RG %d at block 0x%" PRIX64 " %s",
+					 (rg_was_fnd ? ' ' : '*'), number_of_rgs, blok,
+					 (rg_was_fnd ? "intact" : "*** DAMAGED ***"));
+			log_warn(" [length 0x%" PRIx64 "]\n", block_bump);
+			prev_rgd = calc_rgd;
 		} /* for blocks in subdevice */
 	} /* for subdevices */
 	/* ------------------------------------------------------------------- */
@@ -886,7 +972,7 @@ int gfs_rgindex_rebuild(struct fsck_sb *sdp, osi_list_t *ret_list,
  *          what we think the rgindex should really look like.
  */
 int gfs_rgindex_calculate(struct fsck_sb *sdp, osi_list_t *ret_list,
-						  int *num_rgs)
+			  unsigned int *num_rgs)
 {
 	osi_buf_t *bh; /* buffer handle */
 	uint64 subdevice_size, adjust_subdevice_size, fs_total_size;
@@ -896,7 +982,7 @@ int gfs_rgindex_calculate(struct fsck_sb *sdp, osi_list_t *ret_list,
 	int error;
 	int rgi, rgs_per_subd;
 	uint64 subdevice_start;
-	uint64 addr, prev_addr, length, prev_length;
+	uint64 addr = 0, prev_addr, length = 0, prev_length;
 	uint64 blocks;
 	struct fsck_rgrp *calc_rgd;
 	char rgindex_buf_ondisk[sizeof(struct gfs_rindex)];
@@ -1019,8 +1105,9 @@ int gfs_rgindex_calculate(struct fsck_sb *sdp, osi_list_t *ret_list,
 		gfs_rindex_out(&calc_rgd->rd_ri, rgindex_buf_ondisk);
 		/* Note: rgindex_buf_ondisk is ONLY used for debug to see what the
 		   entry would look like on disk. */
-		hexdump(rgi*sizeof(struct gfs_rindex), rgindex_buf_ondisk,
-				sizeof(struct gfs_rindex));
+		hexdump(rgi*sizeof(struct gfs_rindex),
+			(unsigned char *)rgindex_buf_ondisk,
+			sizeof(struct gfs_rindex));
 	} /* for */
 	relse_buf(sdp, bh); /* release the read buffer if we have one */
 	return 0;
@@ -1127,6 +1214,8 @@ int ri_update(struct fsck_sb *sdp)
 				log_err("Unable to read resource group index #%u.\n", rg);
 				goto fail;
 			}
+			if (trust_lvl != blind_faith && osi_list_empty(&expected_rglist))
+				break;
 			
 			rgd = (struct fsck_rgrp *)malloc(sizeof(struct fsck_rgrp));
 			memset(rgd, 0, sizeof(struct fsck_rgrp));
@@ -1172,7 +1261,7 @@ int ri_update(struct fsck_sb *sdp)
 				free(expected_rgd);
 			} /* if we can't trust the rg index */
 			else { /* blind faith -- just check for the gfs_grow problem */
-				if (rgd->rd_ri.ri_data == 4294967292) {
+				if (rgd->rd_ri.ri_data == (uint32_t)-4) {
 					if (!fix_grow_problems) {
 						log_err("A problem with the rindex file caused by gfs_grow was detected.\n");
 						if(query(sdp, "Fix the rindex problem? (y/n)"))


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2008-12-22 21:31 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-12-22 21:31 cluster: RHEL5 - gfs: improve gfs_fsck rindex repair code Bob Peterson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).