public inbox for cluster-cvs@sourceware.org
help / color / mirror / Atom feed
* cluster: RHEL5 - gfs: improve gfs_fsck rindex repair code
@ 2008-12-22 21:31 Bob Peterson
0 siblings, 0 replies; only message in thread
From: Bob Peterson @ 2008-12-22 21:31 UTC (permalink / raw)
To: cluster-cvs-relay
Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=70bc706890e89f6a51728e3c5b6f267fb0323acd
Commit: 70bc706890e89f6a51728e3c5b6f267fb0323acd
Parent: 0e0a8e2e583f4a48d1fb865e2fc46b8b11362e8a
Author: Bob Peterson <rpeterso@redhat.com>
AuthorDate: Fri Dec 19 16:27:50 2008 -0600
Committer: Bob Peterson <rpeterso@redhat.com>
CommitterDate: Mon Dec 22 15:29:50 2008 -0600
gfs: improve gfs_fsck rindex repair code
bz 442271 - GFS: gfs_fsck bugs found in rindex repair code
This patch makes improvements and fixes some bugs in gfs_fsck's
rindex repair code. Basically, if RGs are damaged, especially
in the third section (i.e. RGs added by gfs_grow) it did not
properly locate the RG boundaries. Also, if the rindex was
completely zeroed out, it did not recover it in cases where
the file system had been extended.
---
gfs/gfs_fsck/rgrp.c | 5 ++
gfs/gfs_fsck/super.c | 169 ++++++++++++++++++++++++++++++++++++++------------
2 files changed, 134 insertions(+), 40 deletions(-)
diff --git a/gfs/gfs_fsck/rgrp.c b/gfs/gfs_fsck/rgrp.c
index 6ef6f74..429f515 100644
--- a/gfs/gfs_fsck/rgrp.c
+++ b/gfs/gfs_fsck/rgrp.c
@@ -51,6 +51,7 @@ int fs_compute_bitstructs(struct fsck_rgrp *rgd)
return -1;
}
if(!memset(rgd->rd_bits, 0, length * sizeof(fs_bitmap_t))) {
+ free(rgd->rd_bits);
log_err("Unable to zero bitmap structure\n");
stack;
return -1;
@@ -92,6 +93,7 @@ int fs_compute_bitstructs(struct fsck_rgrp *rgd)
if(bytes_left){
log_err( "fs_compute_bitstructs: Too many blocks in rgrp to "
"fit into available bitmap.\n");
+ free(rgd->rd_bits);
return -1;
}
@@ -107,6 +109,7 @@ int fs_compute_bitstructs(struct fsck_rgrp *rgd)
rgd->rd_bits[length - 1].bi_len,
GFS_NBBY,
rgd->rd_ri.ri_data);
+ free(rgd->rd_bits);
return -1;
}
@@ -114,11 +117,13 @@ int fs_compute_bitstructs(struct fsck_rgrp *rgd)
if(!(rgd->rd_bh = (osi_buf_t **)malloc(length * sizeof(osi_buf_t *)))) {
log_err("Unable to allocate osi_buf structure\n");
stack;
+ free(rgd->rd_bits);
return -1;
}
if(!memset(rgd->rd_bh, 0, length * sizeof(osi_buf_t *))) {
log_err("Unable to zero osi_buf structure\n");
stack;
+ free(rgd->rd_bits);
return -1;
}
diff --git a/gfs/gfs_fsck/super.c b/gfs/gfs_fsck/super.c
index 55ff997..840a6c5 100644
--- a/gfs/gfs_fsck/super.c
+++ b/gfs/gfs_fsck/super.c
@@ -356,7 +356,7 @@ uint32 rgrplength2bitblocks(struct fsck_sb *sdp, uint32 length)
* Other RGs found after that will be considered "extra."
*/
int gfs_rgindex_rebuild(struct fsck_sb *sdp, osi_list_t *ret_list,
- int *num_rgs)
+ unsigned int *num_rgs)
{
osi_buf_t *bh; /* buffer handle */
uint64 subdevice_size, fs_total_size;
@@ -380,6 +380,8 @@ int gfs_rgindex_rebuild(struct fsck_sb *sdp, osi_list_t *ret_list,
uint64_t last_known_ri_addr = 0, prev_known_ri_addr = 0;
uint32_t last_known_ri_length = 0;
uint32_t last_known_ri_data = 0;
+ int section3_bump_size = 0;
+ uint64 start_block, end_block = 0;
osi_list_init(ret_list);
*num_rgs = 0;
@@ -515,8 +517,6 @@ int gfs_rgindex_rebuild(struct fsck_sb *sdp, osi_list_t *ret_list,
/* ----------------------------------------------------------------- */
subdevice_size = sdp->jindex->ji_addr; /* addr of first journal */;
for (subd = 0; subd < 2; subd++) {
- uint64 start_block;
-
if (!subd)
start_block = (GFS_SB_ADDR >> sdp->fsb2bb_shift) + 1;
else
@@ -620,7 +620,13 @@ int gfs_rgindex_rebuild(struct fsck_sb *sdp, osi_list_t *ret_list,
blok = tmpndx.ri_addr - 1; /* go by the index */
log_debug("I(0x%" PRIx64 ")\n", blok);
}
- else {
+ /* If this is the second section, we know for sure that */
+ /* the block length can't be smaller than section 1's */
+ /* rg length. Might as well skip ahead. */
+ else if (subd == 1) {
+ blok += shortest_dist_btwn_rgs[0];
+ log_debug("2(0x%" PRIx64 ")\n", blok);
+ } else {
blok += tmp_rgrp.rg_useddi + tmp_rgrp.rg_free;
log_debug("R(0x%" PRIx64 ")\n", blok);
}
@@ -661,6 +667,37 @@ int gfs_rgindex_rebuild(struct fsck_sb *sdp, osi_list_t *ret_list,
log_debug("Section %d: distance between RGs: 0x%" PRIx64 "\n",
subd + 1, shortest_dist_btwn_rgs[subd]);
log_debug("Section size: 0x%" PRIx64 "\n", subdevice_size);
+ /* If our rindex was all bad, we may have an improper count of RGs per
+ section. We may also not know where the third section should start.
+ We need those for later. */
+ if (subd == 0) {
+ if (shortest_dist_btwn_rgs[subd] != 0) {
+ unsigned long long blocks_b4_sb;
+ unsigned long long index_entries;
+
+ blocks_b4_sb = (16 * (4096 / sdp->sb.sb_bsize));
+ index_entries = (subdevice_size - blocks_b4_sb) /
+ shortest_dist_btwn_rgs[subd];
+ if (index_entries_per_subd != index_entries) {
+ log_debug("rindex entries per section "
+ "changed from %lld to %lld\n",
+ index_entries_per_subd,
+ index_entries);
+ index_entries_per_subd = index_entries;
+ }
+ }
+ if (fs_size_from_rgindex == 0) {
+ fs_size_from_rgindex = (sdp->jindex->ji_addr +
+ total_journal_space) +
+ subdevice_size;
+ log_debug("Fixed zero fs_size_from_rgindex "
+ "due to rindex corruption.\n");
+ log_debug("New fs_size_from_rgindex value: "
+ "%lld (0x%" PRIx64 ")\n",
+ fs_size_from_rgindex,
+ fs_size_from_rgindex);
+ }
+ }
} /* for subd */
number_of_rgs = 0; /* reset this because it is reused below */
/* ----------------------------------------------------------------- */
@@ -674,14 +711,12 @@ int gfs_rgindex_rebuild(struct fsck_sb *sdp, osi_list_t *ret_list,
/* rgindex and hope to God it's correct. That's the only way we're */
/* going to be able to recover RGs in the third section. */
/* ----------------------------------------------------------------- */
- prev_rgd = NULL;
block_bump = first_rg_dist[0];
corrupt_rgs = 0;
for (subd = 0; subd < 3; subd++) { /* third subdevice is for all RGs
- extended past the normal 2 with
- gfs_grow, etc. */
- uint64 start_block, end_block;
-
+ extended past the normal 2 with
+ gfs_grow, etc. */
+ prev_rgd = NULL;
if (subd == 0) {
start_block = (GFS_SB_ADDR >> sdp->fsb2bb_shift) + 1;
end_block = subdevice_size - 1;
@@ -698,8 +733,18 @@ int gfs_rgindex_rebuild(struct fsck_sb *sdp, osi_list_t *ret_list,
end_block = start_block + subdevice_size - 1; /* go to end */
}
else {
- start_block = end_block + 1;
- end_block = fs_total_size;
+ /* Section 3 should start the block after section 2. However, */
+ /* gfs_grow sometimes foolishly decides to put it as much as three */
+ /* blocks early. So we need to check for this special case. */
+ for (start_block = end_block - 2;
+ start_block < end_block + 1; start_block++) {
+ error = get_and_read_buf(sdp, start_block, &bh, 0);
+ rg_was_fnd = (!check_type(bh, GFS_METATYPE_RG));
+ relse_buf(sdp, bh); /* release the read buffer */
+ if (rg_was_fnd)
+ break;
+ }
+ end_block = fs_total_size - 1;
if (start_block + GFS_NBBY >= end_block)
break;
}
@@ -717,6 +762,16 @@ int gfs_rgindex_rebuild(struct fsck_sb *sdp, osi_list_t *ret_list,
}
rg_was_fnd = (!check_type(bh, GFS_METATYPE_RG));
relse_buf(sdp, bh); /* release the read buffer */
+ if (!rg_was_fnd && subd == 2) {
+ if (section3_bump_size) {
+ log_warn("Lost track of Section 3 rg length.\n");
+ blok -= block_bump; /* back up in case we overshot it */
+ section3_bump_size = 0;
+ block_bump = 1;
+ }
+ if (block_bump == 1)
+ continue;
+ }
/* ------------------------------------------------------------- */
/* For the first and second subdevice, we know the RG size. */
/* Since we're bumping by that amount, this better be an RG. */
@@ -774,11 +829,7 @@ int gfs_rgindex_rebuild(struct fsck_sb *sdp, osi_list_t *ret_list,
/*prev_rgd->rd_ri.ri_data = block_bump;*/
}
number_of_rgs++;
- log_warn("%c RG %d at block 0x%" PRIX64 " %s",
- (rg_was_fnd ? ' ' : '*'), number_of_rgs, blok,
- (rg_was_fnd ? "intact" : "*** DAMAGED ***"));
rgs_per_subd++;
- prev_rgd = calc_rgd;
block_of_last_rg = blok;
if (subd == 2) { /* if beyond the normal RGs into gfs_grow RGs */
/* -------------------------------------------------------- */
@@ -794,26 +845,49 @@ int gfs_rgindex_rebuild(struct fsck_sb *sdp, osi_list_t *ret_list,
/* find an entry that has the smallest address greater than */
/* the block we're on (blok). */
/* -------------------------------------------------------- */
- uint64_t rgndx_next_block;
+ if (section3_bump_size == 0) {
+ uint64_t rgndx_next_block, highest_riaddr;
- rgndx_next_block = end_block;
- for (rgi = 0; ; rgi++) {
- error = readi(sdp->riinode, (char *)&buf,
- rgi * sizeof(struct gfs_rindex),
- sizeof(struct gfs_rindex));
- if (!error) /* if end of the rgindex */
- break; /* stop processing for more RGs */
- gfs_rindex_in(&tmpndx, (char *)&buf);
- /* if this index entry is the next RG physically */
- if (tmpndx.ri_addr > blok &&
- tmpndx.ri_addr < rgndx_next_block) {
- rgndx_next_block = tmpndx.ri_addr; /* remember it */
+ rgndx_next_block = end_block;
+ highest_riaddr = 0;
+ for (rgi = 0; ; rgi++) {
+ error = readi(sdp->riinode, (char *)&buf,
+ rgi * sizeof(struct gfs_rindex),
+ sizeof(struct gfs_rindex));
+ if (!error) /* if end of the rgindex */
+ break; /* stop processing for more RGs */
+ gfs_rindex_in(&tmpndx, (char *)&buf);
+ /* if this index entry is the next RG physically */
+ if (tmpndx.ri_addr > blok &&
+ tmpndx.ri_addr < rgndx_next_block) {
+ rgndx_next_block = tmpndx.ri_addr; /* remember it */
+ }
+ if (tmpndx.ri_addr > highest_riaddr)
+ highest_riaddr= tmpndx.ri_addr;
}
- }
- block_bump = rgndx_next_block - blok;
- if (rgndx_next_block == end_block) { /* if no more RGs */
- log_warn(" [length 0x%" PRIx64 "]\n", block_bump);
- break; /* stop processing */
+ /* A special exception must be made for the last RG because we */
+ /* won't have a "next highest" entry in the rindex. */
+ if (blok == highest_riaddr)
+ block_bump = end_block - blok + 1;
+ else if (rgndx_next_block == end_block) {
+ if (block_bump != 1) {
+ log_warn("\nUnable to use rindex; "
+ "doing block-by-block search.\n");
+ log_warn("This will be slow, so be patient.\n");
+ rgndx_next_block = blok + 1;
+ block_bump = 1;
+ } else {
+ if (prev_rgd &&
+ block_bump != blok -
+ prev_rgd->rd_ri.ri_addr) {
+ log_warn("I think I figured it out.\n");
+ block_bump = blok -
+ prev_rgd->rd_ri.ri_addr;
+ section3_bump_size = block_bump;
+ }
+ }
+ } else
+ block_bump = rgndx_next_block - blok;
}
}
else {
@@ -822,8 +896,20 @@ int gfs_rgindex_rebuild(struct fsck_sb *sdp, osi_list_t *ret_list,
else
block_bump = shortest_dist_btwn_rgs[subd];
}
- if (block_bump != 1)
- log_warn(" [length 0x%" PRIx64 "]\n", block_bump);
+ if (block_bump == 1 && prev_rgd && subd == 2) {
+ uint64_t last_distance = blok - prev_rgd->rd_ri.ri_addr;
+
+ error = get_and_read_buf(sdp, blok + last_distance, &bh, 0);
+ rg_was_fnd = (!check_type(bh, GFS_METATYPE_RG));
+ relse_buf(sdp, bh); /* release the read buffer */
+ if (rg_was_fnd)
+ block_bump = last_distance;
+ }
+ log_warn("%c RG %d at block 0x%" PRIX64 " %s",
+ (rg_was_fnd ? ' ' : '*'), number_of_rgs, blok,
+ (rg_was_fnd ? "intact" : "*** DAMAGED ***"));
+ log_warn(" [length 0x%" PRIx64 "]\n", block_bump);
+ prev_rgd = calc_rgd;
} /* for blocks in subdevice */
} /* for subdevices */
/* ------------------------------------------------------------------- */
@@ -886,7 +972,7 @@ int gfs_rgindex_rebuild(struct fsck_sb *sdp, osi_list_t *ret_list,
* what we think the rgindex should really look like.
*/
int gfs_rgindex_calculate(struct fsck_sb *sdp, osi_list_t *ret_list,
- int *num_rgs)
+ unsigned int *num_rgs)
{
osi_buf_t *bh; /* buffer handle */
uint64 subdevice_size, adjust_subdevice_size, fs_total_size;
@@ -896,7 +982,7 @@ int gfs_rgindex_calculate(struct fsck_sb *sdp, osi_list_t *ret_list,
int error;
int rgi, rgs_per_subd;
uint64 subdevice_start;
- uint64 addr, prev_addr, length, prev_length;
+ uint64 addr = 0, prev_addr, length = 0, prev_length;
uint64 blocks;
struct fsck_rgrp *calc_rgd;
char rgindex_buf_ondisk[sizeof(struct gfs_rindex)];
@@ -1019,8 +1105,9 @@ int gfs_rgindex_calculate(struct fsck_sb *sdp, osi_list_t *ret_list,
gfs_rindex_out(&calc_rgd->rd_ri, rgindex_buf_ondisk);
/* Note: rgindex_buf_ondisk is ONLY used for debug to see what the
entry would look like on disk. */
- hexdump(rgi*sizeof(struct gfs_rindex), rgindex_buf_ondisk,
- sizeof(struct gfs_rindex));
+ hexdump(rgi*sizeof(struct gfs_rindex),
+ (unsigned char *)rgindex_buf_ondisk,
+ sizeof(struct gfs_rindex));
} /* for */
relse_buf(sdp, bh); /* release the read buffer if we have one */
return 0;
@@ -1127,6 +1214,8 @@ int ri_update(struct fsck_sb *sdp)
log_err("Unable to read resource group index #%u.\n", rg);
goto fail;
}
+ if (trust_lvl != blind_faith && osi_list_empty(&expected_rglist))
+ break;
rgd = (struct fsck_rgrp *)malloc(sizeof(struct fsck_rgrp));
memset(rgd, 0, sizeof(struct fsck_rgrp));
@@ -1172,7 +1261,7 @@ int ri_update(struct fsck_sb *sdp)
free(expected_rgd);
} /* if we can't trust the rg index */
else { /* blind faith -- just check for the gfs_grow problem */
- if (rgd->rd_ri.ri_data == 4294967292) {
+ if (rgd->rd_ri.ri_data == (uint32_t)-4) {
if (!fix_grow_problems) {
log_err("A problem with the rindex file caused by gfs_grow was detected.\n");
if(query(sdp, "Fix the rindex problem? (y/n)"))
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2008-12-22 21:31 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-12-22 21:31 cluster: RHEL5 - gfs: improve gfs_fsck rindex repair code Bob Peterson
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).