[Cluster-devel] cluster/gfs2 convert/gfs2_convert.c fsck/Makef ...

rpeterso at sourceware.org rpeterso at sourceware.org
Tue May 1 16:43:41 UTC 2007


CVSROOT:	/cvs/cluster
Module name:	cluster
Changes by:	rpeterso at sourceware.org	2007-05-01 17:43:39

Modified files:
	gfs2/convert   : gfs2_convert.c 
	gfs2/fsck      : Makefile fsck.h initialize.c main.c pass1.c 
	                 pass2.c 
	gfs2/libgfs2   : device_geometry.c fs_geometry.c fs_ops.c 
	                 libgfs2.h rgrp.c super.c 
	gfs2/mkfs      : main_mkfs.c 
Added files:
	gfs2/fsck      : rgrepair.c 

Log message:
	Resolves: bz 223893: gfs2_fsck unable to fix damaged RGs and RG indexes.

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs2/convert/gfs2_convert.c.diff?cvsroot=cluster&r1=1.8&r2=1.9
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs2/fsck/rgrepair.c.diff?cvsroot=cluster&r1=NONE&r2=1.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs2/fsck/Makefile.diff?cvsroot=cluster&r1=1.8&r2=1.9
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs2/fsck/fsck.h.diff?cvsroot=cluster&r1=1.4&r2=1.5
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs2/fsck/initialize.c.diff?cvsroot=cluster&r1=1.7&r2=1.8
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs2/fsck/main.c.diff?cvsroot=cluster&r1=1.6&r2=1.7
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs2/fsck/pass1.c.diff?cvsroot=cluster&r1=1.6&r2=1.7
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs2/fsck/pass2.c.diff?cvsroot=cluster&r1=1.5&r2=1.6
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs2/libgfs2/device_geometry.c.diff?cvsroot=cluster&r1=1.1&r2=1.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs2/libgfs2/fs_geometry.c.diff?cvsroot=cluster&r1=1.3&r2=1.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs2/libgfs2/fs_ops.c.diff?cvsroot=cluster&r1=1.6&r2=1.7
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs2/libgfs2/libgfs2.h.diff?cvsroot=cluster&r1=1.10&r2=1.11
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs2/libgfs2/rgrp.c.diff?cvsroot=cluster&r1=1.1&r2=1.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs2/libgfs2/super.c.diff?cvsroot=cluster&r1=1.3&r2=1.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs2/mkfs/main_mkfs.c.diff?cvsroot=cluster&r1=1.11&r2=1.12

--- cluster/gfs2/convert/gfs2_convert.c	2006/11/08 21:14:01	1.8
+++ cluster/gfs2/convert/gfs2_convert.c	2007/05/01 16:43:38	1.9
@@ -3,7 +3,7 @@
 **
 **  gfs2_convert - convert a gfs1 filesystem into a gfs2 filesystem.
 **
-**  Copyright (C) 2006 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2006-2007 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -801,7 +801,7 @@
 	/* look like a directory, temporarily.                               */
 	sbp->md.riinode->i_di.di_mode &= ~S_IFMT;
 	sbp->md.riinode->i_di.di_mode |= S_IFDIR; 
-	if (ri_update(sbp, &rgcount)){
+	if (ri_update(sbp, 0, &rgcount)){
 		log_crit("Unable to fill in resource group information.\n");
 		return -1;
 	}
@@ -1181,7 +1181,7 @@
 		/* Now delete the now-obsolete gfs1 files: */
 		remove_obsolete_gfs1(&sb2);
 		/* Now free all the in memory */
-		gfs2_rgrp_free(&sb2, updated);
+		gfs2_rgrp_free(&sb2.rglist, updated);
 		log_notice("Committing changes to disk.\n");
 		fflush(stdout);
 		/* Set filesystem type in superblock to gfs2.  We do this at the */
/cvs/cluster/cluster/gfs2/fsck/rgrepair.c,v  -->  standard output
revision 1.1
--- cluster/gfs2/fsck/rgrepair.c
+++ -	2007-05-01 17:43:39.609403000 +0100
@@ -0,0 +1,527 @@
+/******************************************************************************
+*******************************************************************************
+**
+**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
+**  Copyright (C) 2007 Red Hat, Inc.  All rights reserved.
+**
+**  This copyrighted material is made available to anyone wishing to use,
+**  modify, copy, or redistribute it subject to the terms and conditions
+**  of the GNU General Public License v.2.
+**
+*******************************************************************************
+******************************************************************************/
+
+#include <unistd.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "libgfs2.h"
+#include "osi_list.h"
+#include "fsck.h"
+
+int rindex_modified = FALSE;
+
+#define ri_equal(ondisk, expected, field) (ondisk.field == expected.field)
+
+#define ri_compare(rg, ondisk, expected, field, fmt)	\
+	if (ondisk.field != expected.field) { \
+		log_warn("rindex #%d " #field " discrepancy: index 0x%" fmt \
+			 " != expected: 0x%" fmt "\n",			\
+			 rg + 1, ondisk.field, expected.field);		\
+		ondisk.field = expected.field; \
+		rindex_modified = TRUE; \
+	}
+
+/*
+ * gfs2_rindex_rebuild - rebuild a corrupt Resource Group (RG) index manually
+ *                        where trust_lvl == distrust
+ *
+ * If this routine is called, it means we have RGs in odd/unexpected places,
+ * and there is a corrupt RG or RG index entry.  It also means we can't trust
+ * the RG index to be sane, and the RGs don't agree with how mkfs would have
+ * built them by default.  So we have no choice but to go through and count 
+ * them by hand.  We've tried twice to recover the RGs and RG index, and
+ * failed, so this is our last chance to remedy the situation.
+ *
+ * This routine tries to minimize performance impact by:
+ * 1. Skipping through the filesystem at known increments when possible.
+ * 2. Shuffle through every block when RGs are not found at the predicted
+ *    locations.
+ *
+ * Note: A GFS2 filesystem differs from a GFS1 file system in that there will
+ * only be ONE chunk (i.e. no artificial subdevices on either size of the
+ * journals).  The journals and even the rindex are kept as part of the file
+ * system, so we need to rebuild that information by hand.  Also, with GFS1,
+ * the different chunks ("subdevices") could have different RG sizes, which
+ * made for quite a mess when trying to recover RGs.  GFS2 always uses the 
+ * same RG size determined by the original mkfs, so recovery is easier.
+ *
+ */
+int gfs2_rindex_rebuild(struct gfs2_sbd *sdp, osi_list_t *ret_list,
+			 int *num_rgs)
+{
+	struct gfs2_buffer_head *bh;
+	uint64_t shortest_dist_btwn_rgs;
+	uint64_t blk, block_of_last_rg;
+	uint64_t fwd_block, block_bump;
+	uint64_t first_rg_dist, initial_first_rg_dist;
+	struct rgrp_list *calc_rgd, *prev_rgd;
+	int number_of_rgs, rgi;
+	struct gfs2_rindex buf, tmpndx;
+	int rg_was_fnd = FALSE, corrupt_rgs = 0, bitmap_was_fnd;
+	osi_list_t *tmp;
+
+	osi_list_init(ret_list);
+	number_of_rgs = 0;
+	initial_first_rg_dist = first_rg_dist = sdp->sb_addr + 1;
+	block_of_last_rg = sdp->sb_addr + 1;
+	/* ------------------------------------------------------------- */
+	/* First, hunt and peck for the shortest distance between RGs.   */
+	/* Sample several of them because an RG that's been blasted may  */
+	/* look like twice the distance.  If we can find 6 of them, that */
+	/* should be enough to figure out the correct layout.            */
+	/* ------------------------------------------------------------- */
+	shortest_dist_btwn_rgs = sdp->device.length;
+	for (blk = sdp->sb_addr + 1;
+	     blk < sdp->device.length && number_of_rgs < 6;
+	     blk++) {
+		bh = bread(sdp, blk);
+		if ((blk == sdp->sb_addr + 1) ||
+		    (!gfs2_check_meta(bh, GFS2_METATYPE_RG))) {
+			log_debug("RG found at block 0x%" PRIx64 "\n", blk);
+			if (blk > sdp->sb_addr + 1) {
+				uint64_t rgdist;
+				
+				rgdist = blk - block_of_last_rg;
+				log_debug("dist 0x%" PRIx64 " = 0x% " PRIx64
+					  " - 0x%" PRIx64, rgdist,
+					  blk, block_of_last_rg);
+				/* ----------------------------------------- */
+				/* We found an RG.  Check to see if we need  */
+				/* to set the first_rg_dist based on whether */
+				/* it's still at its initial value (i.e. the */
+				/* fs.)  The first rg distance is different  */
+				/* from the rest because of the superblock   */
+				/* and 64K dead space.                       */
+				/* ----------------------------------------- */
+				if (first_rg_dist == initial_first_rg_dist)
+					first_rg_dist = rgdist;
+				if (rgdist < shortest_dist_btwn_rgs) {
+					shortest_dist_btwn_rgs = rgdist;
+					log_debug("(shortest so far)\n");
+				}
+				else
+					log_debug("\n");
+			}
+			block_of_last_rg = blk;
+			number_of_rgs++;
+			blk += 250; /* skip ahead for performance */
+		}
+		brelse(bh, not_updated);
+	}
+	number_of_rgs = 0;
+	/* -------------------------------------------------------------- */
+	/* Sanity-check our first_rg_dist. If RG #2 got nuked, the        */
+	/* first_rg_dist would measure from #1 to #3, which would be bad. */
+	/* We need to take remedial measures to fix it (from the index).  */
+	/* -------------------------------------------------------------- */
+	log_debug("First RG distance: 0x%" PRIx64 "\n", first_rg_dist);
+	log_debug("Distance between RGs: 0x%" PRIx64 "\n",
+		  shortest_dist_btwn_rgs);
+	if (first_rg_dist >= shortest_dist_btwn_rgs +
+	    (shortest_dist_btwn_rgs / 4)) {
+		/* read in the second RG index entry for this subd. */
+		gfs2_readi(sdp->md.riinode, (char *)&buf,
+			   sizeof(struct gfs2_rindex),
+			   sizeof(struct gfs2_rindex));
+		gfs2_rindex_in(&tmpndx, (char *)&buf);
+		if (tmpndx.ri_addr > sdp->sb_addr + 1) { /* sanity check */
+			log_warn("RG 2 is damaged: getting dist from index: ");
+			first_rg_dist = tmpndx.ri_addr - (sdp->sb_addr + 1);
+			log_warn("0x%" PRIx64 "\n", first_rg_dist);
+		}
+		else {
+			log_warn("RG index 2 is damaged: extrapolating dist: ");
+			first_rg_dist = sdp->device.length -
+				(sdp->rgrps - 1) *
+				(sdp->device.length / sdp->rgrps);
+			log_warn("0x%" PRIx64 "\n", first_rg_dist);
+		}
+		log_debug("Adjusted first RG distance: 0x%" PRIx64 "\n",
+			  first_rg_dist);
+	} /* if first RG distance is within tolerance */
+	/* -------------------------------------------------------------- */
+	/* Now go through the RGs and verify their integrity, fixing as   */
+	/* needed when corruption is encountered.                         */
+	/* -------------------------------------------------------------- */
+	prev_rgd = NULL;
+	block_bump = first_rg_dist;
+	for (blk = sdp->sb_addr + 1; blk <= sdp->device.length;
+	     blk += block_bump) {
+		log_debug("Block 0x%" PRIx64 "\n", blk);
+		bh = bread(sdp, blk);
+		rg_was_fnd = (!gfs2_check_meta(bh, GFS2_METATYPE_RG));
+		brelse(bh, not_updated);
+		/* Allocate a new RG and index. */
+		calc_rgd = malloc(sizeof(struct rgrp_list));
+		if (!calc_rgd) {
+			log_crit("Can't allocate memory for rg repair.\n");
+			return -1;
+		}
+		memset(calc_rgd, 0, sizeof(struct rgrp_list));
+		osi_list_add_prev(&calc_rgd->list, ret_list);
+		calc_rgd->ri.ri_length = 1;
+		calc_rgd->ri.ri_addr = blk;
+		if (!rg_was_fnd) { /* if not an RG */
+			/* ------------------------------------------------- */
+			/* This SHOULD be an RG but isn't.                   */
+			/* ------------------------------------------------- */
+			corrupt_rgs++;
+			if (corrupt_rgs < 5)
+				log_debug("Missing or damaged RG at block %" 
+					  PRIu64 " (0x%" PRIx64 ")\n",
+					  blk, blk);
+			else {
+				log_crit("Error: too many bad RGs.\n");
+				return -1;
+			}
+		}
+		/* ------------------------------------------------ */
+		/* Now go through and count the bitmaps for this RG */
+		/* ------------------------------------------------ */
+		bitmap_was_fnd = FALSE;
+		for (fwd_block = blk + 1;
+		     fwd_block < sdp->device.length; 
+		     fwd_block++) {
+			bh = bread(sdp, fwd_block);
+			bitmap_was_fnd =
+				(!gfs2_check_meta(bh, GFS2_METATYPE_RB));
+			brelse(bh, not_updated);
+			if (bitmap_was_fnd) /* if a bitmap */
+				calc_rgd->ri.ri_length++;
+			else
+				break; /* end of bitmap, so call it quits. */
+		} /* for subsequent bitmaps */
+		
+		gfs2_compute_bitstructs(sdp, calc_rgd);
+		log_debug("Memory allocated for rg at 0x%p, bh:\n",
+			  calc_rgd->ri.ri_addr, calc_rgd->bh);
+		if (!calc_rgd->bh) {
+			log_crit("Can't allocate memory for bitmap repair.\n");
+			return -1;
+		}
+		calc_rgd->ri.ri_data0 = calc_rgd->ri.ri_addr +
+			calc_rgd->ri.ri_length;
+		if (prev_rgd) {
+			uint32_t rgblocks, bitblocks;
+
+			rgblocks = block_bump;
+			rgblocks2bitblocks(sdp->bsize, &rgblocks, &bitblocks);
+
+			prev_rgd->ri.ri_length = bitblocks;
+			prev_rgd->ri.ri_data = rgblocks;
+			prev_rgd->ri.ri_data -= prev_rgd->ri.ri_data %
+				GFS2_NBBY;
+			prev_rgd->ri.ri_bitbytes = prev_rgd->ri.ri_data /
+				GFS2_NBBY;
+			log_debug("Prev ri_data set to: %" PRIx32 ".\n",
+				  prev_rgd->ri.ri_data);
+		}
+		number_of_rgs++;
+		log_warn("%c RG %d at block 0x%" PRIX64 " %s",
+			 (rg_was_fnd ? ' ' : '*'), number_of_rgs, blk,
+			 (rg_was_fnd ? "intact" : "*** DAMAGED ***"));
+		prev_rgd = calc_rgd;
+		block_of_last_rg = blk;
+
+		if (blk == sdp->sb_addr + 1)
+			block_bump = first_rg_dist;
+		else
+			block_bump = shortest_dist_btwn_rgs;
+		if (block_bump != 1)
+			log_warn(" [length 0x%" PRIx64 "]\n", block_bump);
+	} /* for each rg block */
+	/* ----------------------------------------------------------------- */
+	/* If we got to the end of the fs, we still need to fix the          */
+	/* allocation information for the very last RG.                      */
+	/* ----------------------------------------------------------------- */
+	if (prev_rgd && !prev_rgd->ri.ri_data) {
+		uint32_t rgblocks, bitblocks;
+
+		rgblocks = block_bump;
+		rgblocks2bitblocks(sdp->bsize, &rgblocks, &bitblocks);
+
+		prev_rgd->ri.ri_length = bitblocks;
+		prev_rgd->ri.ri_data = rgblocks;
+		prev_rgd->ri.ri_data -= prev_rgd->ri.ri_data % GFS2_NBBY;
+		prev_rgd->ri.ri_bitbytes = prev_rgd->ri.ri_data / GFS2_NBBY;
+		log_debug("Prev ri_data set to: %" PRIx32 ".\n",
+			  prev_rgd->ri.ri_data);
+		prev_rgd = NULL; /* make sure we don't use it later */
+	}
+        /* ---------------------------------------------- */
+        /* Now dump out the information (if verbose mode) */      
+        /* ---------------------------------------------- */
+        log_debug("RG index rebuilt as follows:\n");
+        for (tmp = ret_list, rgi = 0; tmp != ret_list;
+	     tmp = tmp->next, rgi++) {
+                calc_rgd = osi_list_entry(tmp, struct rgrp_list, list);
+                log_debug("%d: 0x%" PRIx64 " / %x / 0x%"
+			  PRIx64 " / 0x%x / 0x%x\n", rgi + 1, 
+			  calc_rgd->ri.ri_addr, calc_rgd->ri.ri_length,
+			  calc_rgd->ri.ri_data0, calc_rgd->ri.ri_data, 
+			  calc_rgd->ri.ri_bitbytes);
+        }
+	*num_rgs = number_of_rgs;
+	return 0;
+}
+
+/*
+ * gfs2_rindex_calculate - calculate what the rindex should look like
+ *                          in a perfect world (trust_lvl == open_minded)
+ *
+ * Calculate what the rindex should look like, 
+ * so we can later check if all RG index entries are sane.
+ * This is a lot easier for gfs2 because we can just call the same libgfs2 
+ * functions used by mkfs.
+ *
+ * Returns: 0 on success, -1 on failure
+ * Sets:    sdp->rglist to a linked list of fsck_rgrp structs representing
+ *          what we think the rindex should really look like.
+ */
+int gfs2_rindex_calculate(struct gfs2_sbd *sdp, osi_list_t *ret_list,
+			   int *num_rgs)
+{
+	osi_list_init(ret_list);
+	sdp->rgsize = GFS2_DEFAULT_RGSIZE; /* compute_rgrp_layout adjusts */
+	device_geometry(sdp);
+	fix_device_geometry(sdp);
+	/* Compute the default resource group layout as mkfs would have done */
+	compute_rgrp_layout(sdp, FALSE);
+	build_rgrps(sdp, FALSE); /* FALSE = calc but don't write to disk. */
+	*num_rgs = 0;
+	log_debug("fs_total_size = 0x%" PRIX64 " blocks.\n",
+		  sdp->device.length);
+	/* ----------------------------------------------------------------- */
+	/* Calculate how many RGs there are supposed to be based on the      */
+	/* rindex filesize.  Remember that our trust level is open-minded    */
+	/* here.  If the filesize of the rindex file is not a multiple of    */
+	/* our rindex structures, then something's wrong and we can't trust  */
+	/* the index.                                                        */
+	/* ----------------------------------------------------------------- */
+	*num_rgs = sdp->md.riinode->i_di.di_size / sizeof(struct gfs2_rindex);
+	log_warn("L2: number of rgs in the index = %d.\n", *num_rgs);
+	return 0;
+}
+
+/*
+ * rewrite_rg_block - rewrite ("fix") a buffer with rg or bitmap data
+ * returns: 0 if the rg was repaired, otherwise 1
+ */
+int rewrite_rg_block(struct gfs2_sbd *sdp, struct rgrp_list *rg,
+		     uint64_t errblock)
+{
+	int x = errblock - rg->ri.ri_addr;
+
+	log_err("Block #%"PRIu64" (0x%" PRIx64") (%d of %d) is neither"
+		" GFS2_METATYPE_RB nor GFS2_METATYPE_RG.\n",
+		rg->bh[x]->b_blocknr, rg->bh[x]->b_blocknr,
+		(int)x+1, (int)rg->ri.ri_length);
+	if (query(&opts, "Fix the RG? (y/n)")) {
+
+		log_err("Attempting to repair the RG.\n");
+		rg->bh[x] = bread(sdp, rg->ri.ri_addr + x);
+		if (x) {
+			struct gfs2_meta_header mh;
+
+			mh.mh_magic = GFS2_MAGIC;
+			mh.mh_type = GFS2_METATYPE_RB;
+			mh.mh_format = GFS2_FORMAT_RB;
+			gfs2_meta_header_out(&mh, rg->bh[x]->b_data);
+		} else {
+			memset(&rg->rg, 0, sizeof(struct gfs2_rgrp));
+			rg->rg.rg_header.mh_magic = GFS2_MAGIC;
+			rg->rg.rg_header.mh_type = GFS2_METATYPE_RG;
+			rg->rg.rg_header.mh_format = GFS2_FORMAT_RG;
+			rg->rg.rg_free = rg->ri.ri_data;
+			gfs2_rgrp_out(&rg->rg, rg->bh[x]->b_data);
+		}
+		brelse(rg->bh[x], updated);
+		return 0;
+	}
+	return 1;
+}
+
+/*
+ * rg_repair - try to repair a damaged rg index (rindex)
+ * trust_lvl - This is how much we trust the rindex file.
+ *             blind_faith means we take the rindex at face value.
+ *             open_minded means it might be okay, but we should verify it.
+ *             distrust means it's not to be trusted, so we should go to
+ *             greater lengths to build it from scratch.
+ */
+int rg_repair(struct gfs2_sbd *sdp, int trust_lvl, int *rg_count)
+{
+	int error, descrepencies;
+	osi_list_t expected_rglist;
+	int calc_rg_count, rgcount_from_index, rg;
+	osi_list_t *exp, *act; /* expected, actual */
+	struct gfs2_rindex buf;
+
+	if (trust_lvl == blind_faith)
+		return 0;
+	else if (trust_lvl == open_minded) { /* If we can't trust RG index */
+		/* Calculate our own RG index for comparison */
+		error = gfs2_rindex_calculate(sdp, &expected_rglist,
+					       &calc_rg_count);
+		if (error) { /* If calculated RGs don't match the fs */
+			gfs2_rgrp_free(&expected_rglist, not_updated);
+			return -1;
+		}
+	}
+	else if (trust_lvl == distrust) { /* If we can't trust RG index */
+		error = gfs2_rindex_rebuild(sdp, &expected_rglist,
+					     &calc_rg_count);
+		if (error) {
+			log_crit("Error rebuilding rg list.\n");
+			gfs2_rgrp_free(&expected_rglist, not_updated);
+			return -1;
+		}
+		sdp->rgrps = calc_rg_count;
+	}
+	/* Read in the rindex */
+	osi_list_init(&sdp->rglist); /* Just to be safe */
+	rindex_read(sdp, 0, &rgcount_from_index);
+	if (sdp->md.riinode->i_di.di_size % sizeof(struct gfs2_rindex)) {
+		log_warn("WARNING: rindex file is corrupt.\n");
+		gfs2_rgrp_free(&expected_rglist, not_updated);
+		gfs2_rgrp_free(&sdp->rglist, not_updated);
+		return -1;
+	}
+	log_warn("L%d: number of rgs expected     = %d.\n", trust_lvl + 1,
+		 sdp->rgrps);
+	if (calc_rg_count != sdp->rgrps) {
+		log_warn("L%d: They don't match; either (1) the fs was extended, (2) an odd\n", trust_lvl + 1);
+		log_warn("L%d: rg size was used, or (3) we have a corrupt rg index.\n", trust_lvl + 1);
+		gfs2_rgrp_free(&expected_rglist, not_updated);
+		gfs2_rgrp_free(&sdp->rglist, not_updated);
+		return -1;
+	}
+	/* ------------------------------------------------------------- */
+	/* Now compare the rindex to what we think it should be.         */
+	/* See how far off our expected values are.  If too much, abort. */
+	/* The theory is: if we calculated the index to have 32 RGs and  */
+	/* we have a large number that are completely wrong, we should   */
+	/* abandon this method of recovery and try a better one.         */
+	/* ------------------------------------------------------------- */
+	descrepencies = 0;
+	for (rg = 0, act = sdp->rglist.next, exp = expected_rglist.next;
+	     act != &sdp->rglist && exp != &expected_rglist;
+	     act = act->next, exp = exp->next, rg++) {
+		struct rgrp_list *expected, *actual;
+
+		expected = osi_list_entry(exp, struct rgrp_list, list);
+		actual = osi_list_entry(act, struct rgrp_list, list);
+		if (!ri_equal(actual->ri, expected->ri, ri_addr) ||
+		    !ri_equal(actual->ri, expected->ri, ri_length) ||
+		    !ri_equal(actual->ri, expected->ri, ri_data0) ||
+		    !ri_equal(actual->ri, expected->ri, ri_data) ||
+		    !ri_equal(actual->ri, expected->ri, ri_bitbytes)) {
+			descrepencies++;
+		}
+	}
+	if (trust_lvl < distrust && descrepencies > (trust_lvl * 8)) {
+		log_warn("Level %d didn't work.  Too many descepencies.\n",
+			 trust_lvl + 1);
+		log_warn("%d out of %d RGs did not match what was expected.\n",
+			 descrepencies, rg);
+		gfs2_rgrp_free(&expected_rglist, not_updated);
+		gfs2_rgrp_free(&sdp->rglist, not_updated);
+		return -1;
+	}
+	/* ------------------------------------------------------------- */
+	/* Now compare the rindex to what we think it should be.         */
+	/* Our rindex should be pretty predictable unless we've grown    */
+	/* so look for index problems first before looking at the rgs.   */
+	/* ------------------------------------------------------------- */
+	for (rg = 0, act = sdp->rglist.next, exp = expected_rglist.next;
+	     act != &sdp->rglist && exp != &expected_rglist;
+	     act = act->next, exp = exp->next, rg++) {
+		struct rgrp_list *expected, *actual;
+
+		expected = osi_list_entry(exp, struct rgrp_list, list);
+		actual = osi_list_entry(act, struct rgrp_list, list);
+		ri_compare(rg, actual->ri, expected->ri, ri_addr, PRIx64);
+		ri_compare(rg, actual->ri, expected->ri, ri_length, PRIx32);
+		ri_compare(rg, actual->ri, expected->ri, ri_data0, PRIx64);
+		ri_compare(rg, actual->ri, expected->ri, ri_data, PRIx32);
+		ri_compare(rg, actual->ri, expected->ri, ri_bitbytes,
+			   PRIx32);
+		/* If we modified the index, write it back to disk. */
+		if (rindex_modified) {
+			if (query(&opts, "Fix the index? (y/n)")) {
+				gfs2_rindex_out(&expected->ri, (char *)&buf);
+				gfs2_writei(sdp->md.riinode, (char *)&buf,
+					    rg * sizeof(struct gfs2_rindex),
+					    sizeof(struct gfs2_rindex));
+				actual->ri.ri_addr = expected->ri.ri_addr;
+				actual->ri.ri_length = expected->ri.ri_length;
+				actual->ri.ri_data0 = expected->ri.ri_data0;
+				actual->ri.ri_data = expected->ri.ri_data;
+				actual->ri.ri_bitbytes =
+					expected->ri.ri_bitbytes;
+				/* If our rindex was hosed, ri_length is bad */
+				/* Therefore, gfs2_compute_bitstructs might  */
+				/* have malloced the wrong length for bitmap */
+				/* buffers.  So we have to redo it.          */
+				if (actual->bh)
+					free(actual->bh);
+				if (actual->bits)
+					free(actual->bits);
+				gfs2_compute_bitstructs(sdp, actual);
+			}
+			else
+				log_err("RG index not fixed.\n");
+			rindex_modified = FALSE;
+			
+		}
+	}
+	/* ------------------------------------------------------------- */
+	/* Read the real RGs and check their integrity.                  */
+	/* Now we can somewhat trust the rindex and the RG addresses,    */
+	/* so let's read them in, check them and optionally fix them.    */
+	/* ------------------------------------------------------------- */
+	for (rg = 0, act = sdp->rglist.next; act != &sdp->rglist;
+	     act = act->next, rg++) {
+		struct rgrp_list *rgd;
+		uint64_t prev_err = 0, errblock;
+		int i;
+
+		/* Now we try repeatedly to read in the rg.  For every block */
+		/* we encounter that has errors, repair it and try again.    */
+		i = 0;
+		do {
+			rgd = osi_list_entry(act, struct rgrp_list, list);
+			errblock = gfs2_rgrp_read(sdp, rgd);
+			if (errblock) {
+				if (errblock == prev_err)
+					break;
+				prev_err = errblock;
+				rewrite_rg_block(sdp, rgd, errblock);
+			}
+			else {
+				gfs2_rgrp_relse(rgd, not_updated);
+				break;
+			}
+			i++;
+		} while (i < rgd->ri.ri_length);
+	}
+	*rg_count = rg;
+	gfs2_rgrp_free(&expected_rglist, not_updated);
+	gfs2_rgrp_free(&sdp->rglist, not_updated);
+	return 0;
+}
--- cluster/gfs2/fsck/Makefile	2007/04/30 11:22:16	1.8
+++ cluster/gfs2/fsck/Makefile	2007/05/01 16:43:38	1.9
@@ -1,7 +1,7 @@
 ###############################################################################
 ###############################################################################
 ##
-##  Copyright (C) 2005 Red Hat, Inc.  All rights reserved.
+##  Copyright (C) 2005-2007 Red Hat, Inc.  All rights reserved.
 ##
 ##  This copyrighted material is made available to anyone wishing to use,
 ##  modify, copy, or redistribute it subject to the terms and conditions
@@ -18,7 +18,7 @@
 
 sources = main.c initialize.c pass1.c pass1b.c pass1c.c pass2.c pass3.c \
 	  pass4.c pass5.c util.c fs_recovery.c lost_n_found.c link.c \
-	  eattr.c hash.c inode_hash.c metawalk.c
+	  eattr.c hash.c inode_hash.c metawalk.c rgrepair.c
 
 CFLAGS += -D_FILE_OFFSET_BITS=64 -DHELPER_PROGRAM -DGFS_RELEASE_NAME=\"${RELEASE}\"
 CFLAGS += -MMD -Wall -O2
--- cluster/gfs2/fsck/fsck.h	2007/01/23 19:23:07	1.4
+++ cluster/gfs2/fsck/fsck.h	2007/05/01 16:43:38	1.5
@@ -1,7 +1,7 @@
 /*****************************************************************************
 *******************************************************************************
 **
-**  Copyright (C) 2005 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2005-2007 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -38,6 +38,24 @@
 
 };
 
+struct dir_status {
+	uint8_t dotdir:1;
+	uint8_t dotdotdir:1;
+	struct gfs2_block_query q;
+	uint32_t entry_count;
+};
+
+enum rgindex_trust_level { /* how far can we trust our RG index? */
+	blind_faith = 0, /* We'd like to trust the rgindex. We always used to
+			    before bz 179069. This should cover most cases. */
+	open_minded = 1, /* At least 1 RG is corrupt. Try to calculate what it
+			    should be, in a perfect world where our RGs are all
+			    on even boundaries. Blue sky. Chirping birds. */
+	distrust = 2   /* The world isn't perfect, our RGs are not on nice neat
+			  boundaries.  The fs must have been messed with by
+			  gfs2_grow or something.  Count the RGs by hand. */
+};
+
 int initialize(struct gfs2_sbd *sbp);
 void destroy(struct gfs2_sbd *sbp);
 int block_mounters(struct gfs2_sbd *sbp, int block_em);
@@ -48,6 +66,7 @@
 int pass3(struct gfs2_sbd *sbp);
 int pass4(struct gfs2_sbd *sbp);
 int pass5(struct gfs2_sbd *sbp);
+int rg_repair(struct gfs2_sbd *sdp, int trust_lvl, int *rg_count);
 
 /* FIXME: Hack to get this going for pass2 - this should be pulled out
  * of pass1 and put somewhere else... */
--- cluster/gfs2/fsck/initialize.c	2007/01/23 19:23:07	1.7
+++ cluster/gfs2/fsck/initialize.c	2007/05/01 16:43:38	1.8
@@ -2,7 +2,7 @@
 *******************************************************************************
 **
 **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
-**  Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -100,6 +100,8 @@
 		struct rgrp_list *rgd;
 
 		rgd = osi_list_entry(sdp->rglist.next, struct rgrp_list, list);
+		log_debug("Deleting rgd for 0x%p:  rgd=0x%p bits=0x%p\n",
+			  rgd->ri.ri_addr, rgd, rgd->bits);
 		osi_list_del(&rgd->list);
 		if(rgd->bits)
 			free(rgd->bits);
@@ -189,7 +191,6 @@
 	return -1;
 }
 
-
 /**
  * fill_super_block
  * @sdp:
@@ -204,6 +205,7 @@
 	struct gfs2_statfs_change sc;
 	int rgcount;
 	uint64_t addl_mem_needed;
+	enum rgindex_trust_level trust_lvl;
 
 	sync();
 
@@ -243,7 +245,8 @@
 	log_info("Initializing special inodes...\n");
 
 	/* Get master dinode */
-	sdp->master_dir = gfs2_load_inode(sdp, sdp->sd_sb.sb_master_dir.no_addr);
+	sdp->master_dir = gfs2_load_inode(sdp,
+					  sdp->sd_sb.sb_master_dir.no_addr);
 	/* Get root dinode */
 	sdp->md.rooti = gfs2_load_inode(sdp, sdp->sd_sb.sb_root_dir.no_addr);
 
@@ -255,8 +258,6 @@
 	sdp->md.next_inum = be64_to_cpu(inumbuf);
 
 	gfs2_lookupi(sdp->master_dir, "statfs", 6, &sdp->md.statfs);
-	/* Read inum entry into buffer */
-	/* FIXME finish this */
 	buf = malloc(sdp->md.statfs->i_di.di_size);
 	gfs2_readi(sdp->md.statfs, buf, 0, sdp->md.statfs->i_di.di_size);
 	/* call gfs2_inum_range_in() to retrieve range */
@@ -284,11 +285,22 @@
 		return -1;
 	}
 
-	if(ri_update(sdp, &rgcount)){
-		log_err("Unable to fill in resource group information.\n");
+	log_warn("Validating Resource Group index.\n");
+	for (trust_lvl = blind_faith; trust_lvl <= distrust; trust_lvl++) {
+		log_warn("Level %d RG check.\n", trust_lvl + 1);
+		if ((rg_repair(sdp, trust_lvl, &rgcount) == 0) &&
+		    (ri_update(sdp, 0, &rgcount) == 0)) {
+			log_err("(level %d passed)\n", trust_lvl + 1);
+			break;
+		}
+		else
+			log_err("(level %d failed)\n", trust_lvl + 1);
+	}
+	if (trust_lvl > distrust) {
+		log_err("RG recovery impossible; I can't fix this file system.\n");
 		goto fail;
 	}
-
+	log_info("%u resource groups found.\n", rgcount);
 	/*******************************************************************
 	 *******  Now, set boundary fields in the super block  *************
 	 *******************************************************************/
--- cluster/gfs2/fsck/main.c	2007/04/03 17:28:33	1.6
+++ cluster/gfs2/fsck/main.c	2007/05/01 16:43:38	1.7
@@ -1,7 +1,7 @@
 /*****************************************************************************
 ******************************************************************************
 **
-**  Copyright (C) 2005 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2005-2007 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -192,6 +192,122 @@
 	}
 }
 
+/* Check system inode and verify it's marked "in use" in the bitmap:       */
+/* Should work for all system inodes: root, master, jindex, per_node, etc. */
+int check_system_inode(struct gfs2_inode *sysinode, const char *filename,
+		       void builder(struct gfs2_sbd *sbp),
+		       enum gfs2_mark_block mark)
+{
+	uint64_t iblock = 0;
+	struct dir_status ds = {0};
+
+	log_info("Checking system inode '%s'\n", filename);
+	if (sysinode) {
+		/* Read in the system inode, look at its dentries, and start
+		 * reading through them */
+		iblock = sysinode->i_di.di_num.no_addr;
+		log_info("System inode for '%s' is located at block %"
+			 PRIu64 " (0x%" PRIx64 ")\n", filename,
+			 iblock, iblock);
+		
+		/* FIXME: check this block's validity */
+
+		if(gfs2_block_check(bl, iblock, &ds.q)) {
+			log_crit("Can't get %s inode block %" PRIu64 " (0x%"
+				 PRIx64 ") from block list\n", filename,
+				 iblock, iblock);
+			return -1;
+		}
+		/* If the inode exists but the block is marked      */
+		/* free, we might be recovering from a corrupt      */
+		/* bitmap.  In that case, don't rebuild the inode.  */
+		/* Just reuse the inode and fix the bitmap.         */
+		if (ds.q.block_type == gfs2_block_free) {
+			log_info("The inode exists but the block is not marked 'in use'; fixing it.\n");
+			gfs2_block_set(bl, sysinode->i_di.di_num.no_addr,
+				       mark);
+			ds.q.block_type = mark;
+			if (mark == gfs2_inode_dir)
+				add_to_dir_list(sysinode->i_sbd,
+						sysinode->i_di.di_num.no_addr);
+		}
+	}
+	else
+		log_info("System inode for '%s' is missing.\n", filename);
+	/* If there are errors with the inode here, we need to
+	 * create a new inode and get it all setup - of course,
+	 * everything will be in lost+found then, but we *need* our
+	 * system inodes before we can do any of that. */
+	if(!sysinode || ds.q.block_type != mark) {
+		log_err("Invalid or missing %s system inode.\n", filename);
+		if (query(&opts, "Create new %s system inode? (y/n) ",
+			  filename)) {
+			builder(sysinode->i_sbd);
+			gfs2_block_set(bl, sysinode->i_di.di_num.no_addr,
+				       mark);
+			ds.q.block_type = mark;
+			if (mark == gfs2_inode_dir)
+				add_to_dir_list(sysinode->i_sbd,
+						sysinode->i_di.di_num.no_addr);
+		}
+		else {
+			log_err("Cannot continue without valid %s inode\n",
+				filename);
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+int check_system_inodes(struct gfs2_sbd *sdp)
+{
+	/*******************************************************************
+	 *******  Check the system inode integrity             *************
+	 *******************************************************************/
+	if (check_system_inode(sdp->master_dir, "master", build_master,
+			       gfs2_inode_dir)) {
+		stack;
+		return -1;
+	}
+	if (check_system_inode(sdp->md.rooti, "root", build_root,
+			       gfs2_inode_dir)) {
+		stack;
+		return -1;
+	}
+	if (check_system_inode(sdp->md.inum, "inum", build_inum,
+			       gfs2_inode_file)) {
+		stack;
+		return -1;
+	}
+	if (check_system_inode(sdp->md.statfs, "statfs", build_statfs,
+			       gfs2_inode_file)) {
+		stack;
+		return -1;
+	}
+	if (check_system_inode(sdp->md.jiinode, "jindex", build_jindex,
+			       gfs2_inode_dir)) {
+		stack;
+		return -1;
+	}
+	if (check_system_inode(sdp->md.riinode, "rindex", build_rindex,
+			       gfs2_inode_file)) {
+		stack;
+		return -1;
+	}
+	if (check_system_inode(sdp->md.qinode, "quota", build_quota,
+			       gfs2_inode_file)) {
+		stack;
+		return -1;
+	}
+	if (check_system_inode(sdp->md.pinode, "per_node", build_per_node,
+			       gfs2_inode_dir)) {
+		stack;
+		return -1;
+	}
+	return 0;
+}
+
 int main(int argc, char **argv)
 {
 	struct gfs2_sbd sb;
@@ -220,6 +336,9 @@
 	else
 		log_notice("Pass1 complete      \n");
 
+	/* Make sure the system inodes are okay & represented in the bitmap. */
+	check_system_inodes(sbp);
+
 	if (!fsck_abort) {
 		last_reported_block = 0;
 		pass = "pass 1b";
--- cluster/gfs2/fsck/pass1.c	2007/03/26 19:14:03	1.6
+++ cluster/gfs2/fsck/pass1.c	2007/05/01 16:43:38	1.7
@@ -2,7 +2,7 @@
 *******************************************************************************
 **
 **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
-**  Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -471,7 +471,7 @@
 		return -1;
 	}
 
-	if(!(newdi = (struct dir_info *) malloc(sizeof(*newdi)))) {
+	if(!(newdi = (struct dir_info *) malloc(sizeof(struct dir_info)))) {
 		log_crit("Unable to allocate dir_info structure\n");
 		return -1;
 	}
--- cluster/gfs2/fsck/pass2.c	2007/01/23 19:23:07	1.5
+++ cluster/gfs2/fsck/pass2.c	2007/05/01 16:43:38	1.6
@@ -2,7 +2,7 @@
 *******************************************************************************
 **
 **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
-**  Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -27,14 +27,6 @@
 
 #define MAX_FILENAME 256
 
-struct dir_status {
-	uint8_t dotdir:1;
-	uint8_t dotdotdir:1;
-	struct gfs2_block_query q;
-	uint32_t entry_count;
-};
-
-
 static int check_leaf(struct gfs2_inode *ip, uint64_t block,
 					  struct gfs2_buffer_head **lbh, void *private)
 {
@@ -336,10 +328,11 @@
 	   q.block_type != gfs2_inode_chr && q.block_type != gfs2_inode_fifo &&
 	   q.block_type != gfs2_inode_sock) {
 		log_err("Directory entry '%s' at block %" PRIu64 " (0x%" PRIx64
-				") in dir inode %" PRIu64 " (0x%" PRIx64
-				") has an invalid block type.\n", tmp_name,
-				de->de_inum.no_addr, de->de_inum.no_addr,
-				ip->i_di.di_num.no_addr, ip->i_di.di_num.no_addr);
+			") in dir inode %" PRIu64 " (0x%" PRIx64
+			") has an invalid block type: %d.\n", tmp_name,
+			de->de_inum.no_addr, de->de_inum.no_addr,
+			ip->i_di.di_num.no_addr, ip->i_di.di_num.no_addr,
+			q.block_type);
 
 		if(query(&opts, "Clear directory entry to non-inode block? (y/n) ")) {
 			/* FIXME: make sure all blocks referenced by
@@ -576,46 +569,12 @@
 	.check_eattr_entry = NULL,
 };
 
-int build_rooti(struct gfs2_sbd *sbp)
+/* Check system directory inode                                           */
+/* Should work for all system directories: root, master, jindex, per_node */
+int check_system_dir(struct gfs2_inode *sysinode, const char *dirname,
+		     void builder(struct gfs2_sbd *sbp))
 {
-	struct gfs2_inode *ip;
-
-	build_root(sbp);
-	ip = sbp->md.rooti;
-	/* Create a new inode ondisk */
-	gfs2_block_set(bl, ip->i_di.di_num.no_addr, gfs2_inode_dir);
-
-	/* FIXME need to remove old 'root' entry from the master dir,
-	 * and write a new one with this inode as the target */
-	dir_add(sbp->master_dir, "root", 4,	&(ip->i_di.di_num), DT_DIR);
-
-	sbp->md.rooti = ip;
-
-	dir_add(ip, ".", 1, &(ip->i_di.di_num), DT_DIR);
-	dir_add(ip, "..", 2, &ip->i_di.di_num, DT_DIR);
-
-	gfs2_block_set(bl, ip->i_di.di_num.no_addr, gfs2_inode_dir);
-	add_to_dir_list(sbp, ip->i_di.di_num.no_addr);
-
-	/* Attach lost+found to it */
-	lf_dip = createi(sbp->md.rooti, "lost+found", 00700, 0);
-
-	if(lf_dip){
-		inode_put(lf_dip, updated);
-		log_debug("Lost and Found directory inode is at block #%" PRIu64
-				  " (0x%" PRIx64 ").\n",
-				  lf_dip->i_di.di_num.no_addr, lf_dip->i_di.di_num.no_addr);
-	}
-	gfs2_block_set(bl, lf_dip->i_di.di_num.no_addr, gfs2_inode_dir);
-
-	add_to_dir_list(sbp, lf_dip->i_di.di_num.no_addr);
-	return 0;
-}
-
-/* Check root inode and verify it's in the bitmap */
-int check_root_dir(struct gfs2_sbd *sbp)
-{
-	uint64_t rootblock;
+	uint64_t iblock = 0;
 	struct dir_status ds = {0};
 	struct gfs2_buffer_head b, *bh = &b;
 	char *filename;
@@ -623,67 +582,37 @@
 	char tmp_name[256];
 	int update=0, error = 0;
 
-	if(sbp->md.rooti) {
-		/* Read in the root inode, look at its dentries, and start
-		 * reading through them */
-		rootblock = sbp->md.rooti->i_di.di_num.no_addr;
-		
-		/* FIXME: check this block's validity */
-
-		if(gfs2_block_check(bl, rootblock, &ds.q)) {
-			log_crit("Can't get root block %" PRIu64 " (0x%" PRIx64
-					 ") from block list\n", rootblock, rootblock);
-			/* FIXME: Need to check if the root block is out of
-			 * the fs range and if it is, rebuild it.  Still can
-			 * error out if the root block number is valid, but
-			 * gfs2_block_check fails */
-			return -1;
-		}
-
-		/* if there are errors with the root inode here, we need to
-		 * create a new root inode and get it all setup - of course,
-		 * everything will be in lost+found then, but we *need* a root inode
-		 * before we can do any of that.
-		 */
+	log_info("Checking system directory inode '%s'\n", dirname);
 
-	}
-	if(!sbp->md.rooti || ds.q.block_type != gfs2_inode_dir) {
-		log_err("Invalid or missing root inode in superblock.\n");
-		if(query(&opts, "Create new root inode? (y/n) ")) {
-			if(build_rooti(sbp)) {
-				stack;
-				return -1;
-			}
-		} else {
-			log_err("Cannot continue without valid root inode\n");
-			return -1;
+	if (sysinode) {
+		iblock = sysinode->i_di.di_num.no_addr;
+		if(gfs2_block_check(bl, iblock, &ds.q)) {
+			iblock = sysinode->i_di.di_num.no_addr;
 		}
 	}
-
-	rootblock = sbp->md.rooti->i_di.di_num.no_addr;
 	pass2_fxns.private = (void *) &ds;
 	if(ds.q.bad_block) {
 		/* First check that the directory's metatree is valid */
-		if(check_metatree(sbp->md.rooti, &pass2_fxns)) {
+		if(check_metatree(sysinode, &pass2_fxns)) {
 			stack;
 			return -1;
 		}
 	}
-	error = check_dir(sbp, rootblock, &pass2_fxns);
+	error = check_dir(sysinode->i_sbd, iblock, &pass2_fxns);
 	if(error < 0) {
 		stack;
 		return -1;
 	}
 	if (error > 0)
-		gfs2_block_set(bl, rootblock, gfs2_meta_inval);
+		gfs2_block_set(bl, iblock, gfs2_meta_inval);
 
-	bh = bhold(sbp->md.rooti->i_bh);
-	if(check_inode_eattr(sbp->md.rooti, &pass2_fxns)) {
+	bh = bhold(sysinode->i_bh);
+	if(check_inode_eattr(sysinode, &pass2_fxns)) {
 		stack;
 		return -1;
 	}
 	if(!ds.dotdir) {
-		log_err("No '.' entry found for root directory.\n");
+		log_err("No '.' entry found for %s directory.\n", dirname);
 		sprintf(tmp_name, ".");
 		filename_len = strlen(tmp_name);  /* no trailing NULL */
 		if(!(filename = malloc(sizeof(char) * filename_len))) {
@@ -698,31 +627,32 @@
 		}
 		memcpy(filename, tmp_name, filename_len);
 		log_warn("Adding '.' entry\n");
-		dir_add(sbp->md.rooti, filename, filename_len,
-				&(sbp->md.rooti->i_di.di_num), DT_DIR);
-		increment_link(sbp->md.rooti->i_sbd,
-					   sbp->md.rooti->i_di.di_num.no_addr);
+		dir_add(sysinode, filename, filename_len,
+				&(sysinode->i_di.di_num), DT_DIR);
+		increment_link(sysinode->i_sbd,
+					   sysinode->i_di.di_num.no_addr);
 		ds.entry_count++;
 		free(filename);
 		update = 1;
 	}
-	if(sbp->md.rooti->i_di.di_entries != ds.entry_count) {
-		log_err("Root inode %" PRIu64 " (0x%" PRIx64
-				"): Entries is %d - should be %d\n",
-				sbp->md.rooti->i_di.di_num.no_addr,
-				sbp->md.rooti->i_di.di_num.no_addr,
-				sbp->md.rooti->i_di.di_entries, ds.entry_count);
-		if(query(&opts, "Fix entries for root inode %" PRIu64 " (0x%" PRIx64
-				 ")? (y/n) ", sbp->md.rooti->i_di.di_num.no_addr,
-				 sbp->md.rooti->i_di.di_num.no_addr)) {
-			sbp->md.rooti->i_di.di_entries = ds.entry_count;
+	if(sysinode->i_di.di_entries != ds.entry_count) {
+		log_err("%s inode %" PRIu64 " (0x%" PRIx64
+			"): Entries is %d - should be %d\n", dirname,
+			sysinode->i_di.di_num.no_addr,
+			sysinode->i_di.di_num.no_addr,
+			sysinode->i_di.di_entries, ds.entry_count);
+		if(query(&opts, "Fix entries for %s inode %" PRIu64 " (0x%"
+			 PRIx64 ")? (y/n) ", dirname,
+			 sysinode->i_di.di_num.no_addr,
+			 sysinode->i_di.di_num.no_addr)) {
+			sysinode->i_di.di_entries = ds.entry_count;
 			log_warn("Entries updated\n");
 			update = 1;
 		} else {
 			log_err("Entries for inode %" PRIu64 " (0x%" PRIx64
 					") left out of sync\n",
-					sbp->md.rooti->i_di.di_num.no_addr,
-					sbp->md.rooti->i_di.di_num.no_addr);
+					sysinode->i_di.di_num.no_addr,
+					sysinode->i_di.di_num.no_addr);
 		}
 	}
 
@@ -730,6 +660,19 @@
 	return 0;
 }
 
+/**
+ * is_system_dir - determine if a given block is for a system directory.
+ */
+static inline int is_system_dir(struct gfs2_sbd *sbp, uint64_t block)
+{
+	if (block == sbp->md.rooti->i_di.di_num.no_addr ||
+	    block == sbp->md.jiinode->i_di.di_num.no_addr ||
+	    block == sbp->md.pinode->i_di.di_num.no_addr ||
+	    block == sbp->master_dir->i_di.di_num.no_addr)
+		return TRUE;
+	return FALSE;
+}
+
 /* What i need to do in this pass is check that the dentries aren't
  * pointing to invalid blocks...and verify the contents of each
  * directory. and start filling in the directory info structure*/
@@ -753,7 +696,20 @@
 	char tmp_name[256];
 	int error = 0;
 
-	if(check_root_dir(sbp)) {
+	/* Check all the system directory inodes. */
+	if (check_system_dir(sbp->md.jiinode, "jindex", build_jindex)) {
+		stack;
+		return -1;
+	}
+	if (check_system_dir(sbp->md.pinode, "per_node", build_per_node)) {
+		stack;
+		return -1;
+	}
+	if (check_system_dir(sbp->master_dir, "master", build_master)) {
+		stack;
+		return -1;
+	}
+	if (check_system_dir(sbp->md.rooti, "root", build_root)) {
 		stack;
 		return -1;
 	}
@@ -764,8 +720,8 @@
 		if (skip_this_pass || fsck_abort) /* if asked to skip the rest */
 			return 0;
 
-		/* Skip the root inode - it's checked above */
-		if(i == sbp->md.rooti->i_di.di_num.no_addr)
+		/* Skip the system inodes - they're checked above */
+		if (is_system_dir(sbp, i))
 			continue;
 
 		if(gfs2_block_check(bl, i, &q)) {
--- cluster/gfs2/libgfs2/device_geometry.c	2006/04/27 19:25:46	1.1
+++ cluster/gfs2/libgfs2/device_geometry.c	2007/05/01 16:43:39	1.2
@@ -2,7 +2,7 @@
 *******************************************************************************
 **
 **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
-**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -47,11 +47,8 @@
 		printf("\nPartition size = %"PRIu64"\n",
 		       bytes >> GFS2_BASIC_BLOCK_SHIFT);
 
-	device->nsubdev = 1;
-	zalloc(device->subdev, sizeof(struct subdevice));
-
-	device->subdev->start = 0;
-	device->subdev->length = bytes >> GFS2_BASIC_BLOCK_SHIFT;
+	device->start = 0;
+	device->length = bytes >> GFS2_BASIC_BLOCK_SHIFT;
 }
 
 /**
@@ -64,109 +61,43 @@
 fix_device_geometry(struct gfs2_sbd *sdp)
 {
 	struct device *device = &sdp->device;
-	unsigned int x;
 	unsigned int bbsize = sdp->bsize >> GFS2_BASIC_BLOCK_SHIFT;
 	uint64_t start, length;
 	unsigned int remainder;
 
 	if (sdp->debug) {
 		printf("\nDevice Geometry:  (in basic blocks)\n");
-		for (x = 0; x < device->nsubdev; x++)
-			printf("  SubDevice #%u: start = %"PRIu64", length = %"PRIu64", rgf_flags = 0x%.8X\n",
-			       x,
-			       device->subdev[x].start,
-			       device->subdev[x].length,
-			       device->subdev[x].rgf_flags);
+		printf("  start = %"PRIu64", length = %"PRIu64", rgf_flags = 0x%.8X\n",
+		       device->start,
+		       device->length,
+		       device->rgf_flags);
 	}
 
-	/* Make sure all the subdevices are aligned */
+	start = device->start;
+	length = device->length;
 
-	for (x = 0; x < device->nsubdev; x++) {
-		start = device->subdev[x].start;
-		length = device->subdev[x].length;
-
-		if (length < 1 << (20 - GFS2_BASIC_BLOCK_SHIFT))
-			die("subdevice %d is way too small (%"PRIu64" bytes)\n",
-			    x, length << GFS2_BASIC_BLOCK_SHIFT);
-
-		remainder = start % bbsize;
-		if (remainder) {
-			length -= bbsize - remainder;
-			start += bbsize - remainder;
-		}
-
-		start /= bbsize;
-		length /= bbsize;
-
-		device->subdev[x].start = start;
-		device->subdev[x].length = length;
-		sdp->device_size = start + length;
+	if (length < 1 << (20 - GFS2_BASIC_BLOCK_SHIFT))
+		die("device is way too small (%"PRIu64" bytes)\n",
+		    length << GFS2_BASIC_BLOCK_SHIFT);
+
+	remainder = start % bbsize;
+	if (remainder) {
+		length -= bbsize - remainder;
+		start += bbsize - remainder;
 	}
 
-	if (sdp->debug) {
-		printf("\nDevice Geometry:  (in FS blocks)\n");
-		for (x = 0; x < device->nsubdev; x++)
-			printf("  SubDevice #%u: start = %"PRIu64", length = %"PRIu64", rgf_flags = 0x%.8X\n",
-			       x,
-			       device->subdev[x].start,
-			       device->subdev[x].length,
-			       device->subdev[x].rgf_flags);
+	start /= bbsize;
+	length /= bbsize;
 
-		printf("\nDevice Size: %"PRIu64"\n", sdp->device_size);
-	}
-}
-
-void
-munge_device_geometry_for_grow(struct gfs2_sbd *sdp)
-{
-	struct device *device = &sdp->device;
-	struct device new_dev;
-	struct subdevice *new_sdev;
-	uint64_t start, length;
-	unsigned int x;
-
-	memset(&new_dev, 0, sizeof(struct device));
-
-	for (x = 0; x < device->nsubdev; x++) {
-		struct subdevice *sdev = device->subdev + x;
-
-		if (sdev->start + sdev->length < sdp->orig_fssize)
-			continue;
-		else if (sdev->start < sdp->orig_fssize) {
-			start = sdp->orig_fssize;
-			length = sdev->start + sdev->length - sdp->orig_fssize;
-			if (length < GFS2_MIN_GROW_SIZE << (20 - sdp->bsize_shift))
-				continue;
-		} else {
-			start = sdev->start;
-			length = sdev->length;
-		}
-
-		new_dev.subdev = realloc(new_dev.subdev, (new_dev.nsubdev + 1) * sizeof(struct subdevice));
-		if (!new_dev.subdev)
-			die("out of memory\n");
-		new_sdev = new_dev.subdev + new_dev.nsubdev;
-		new_sdev->start = start;
-		new_sdev->length = length;
-		new_sdev->rgf_flags = sdev->rgf_flags;
-		new_dev.nsubdev++;
-	}
-
-	free(device->subdev);
-	*device = new_dev;
-
-	if (!device->nsubdev)
-		die("The device didn't grow enough to warrant growing the FS.\n");
+	device->start = start;
+	device->length = length;
+	sdp->device_size = start + length;
 
 	if (sdp->debug) {
-		printf("\nMunged Device Geometry:  (in FS blocks)\n");
-		for (x = 0; x < device->nsubdev; x++)
-			printf("  SubDevice #%u: start = %"PRIu64", length = %"PRIu64", rgf_flags = 0x%.8X\n",
-			       x,
-			       device->subdev[x].start,
-			       device->subdev[x].length,
-			       device->subdev[x].rgf_flags);
+		printf("\nDevice Geometry:  (in FS blocks)\n");
+		printf("  start = %"PRIu64", length = %"
+		       PRIu64", rgf_flags = 0x%.8X\n",
+		       device->start, device->length, device->rgf_flags);
+		printf("\nDevice Size: %"PRIu64"\n", sdp->device_size);
 	}
 }
-
-
--- cluster/gfs2/libgfs2/fs_geometry.c	2006/11/30 15:25:49	1.3
+++ cluster/gfs2/libgfs2/fs_geometry.c	2007/05/01 16:43:39	1.4
@@ -2,7 +2,7 @@
 *******************************************************************************
 **
 **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
-**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -28,19 +28,18 @@
 /**
  * how_many_rgrps - figure out how many RG to put in a subdevice
  * @w: the command line
- * @sdev: the subdevice
+ * @dev: the device
  *
  * Returns: the number of RGs
  */
 
-static uint64_t
-how_many_rgrps(struct gfs2_sbd *sdp, struct subdevice *sdev,
-			   int rgsize_specified)
+uint64_t
+how_many_rgrps(struct gfs2_sbd *sdp, struct device *dev, int rgsize_specified)
 {
 	uint64_t nrgrp;
 
 	while (TRUE) {
-		nrgrp = DIV_RU(sdev->length, (sdp->rgsize << 20) / sdp->bsize);
+		nrgrp = DIV_RU(dev->length, (sdp->rgsize << 20) / sdp->bsize);
 
 		if (rgsize_specified || /* If user specified an rg size or */
 			nrgrp <= GFS2_EXCESSIVE_RGS || /* not an excessive # of rgs or  */
@@ -67,61 +66,81 @@
 void
 compute_rgrp_layout(struct gfs2_sbd *sdp, int rgsize_specified)
 {
-	struct subdevice *sdev;
-	struct rgrp_list *rl, *rlast = NULL;
+	struct device *dev;
+	struct rgrp_list *rl, *rlast = NULL, *rlast2 = NULL;
 	osi_list_t *tmp, *head = &sdp->rglist;
-	uint64_t rgrp, nrgrp;
-	unsigned int x;
-	int new_fs;
-
-	new_fs = TRUE;
-	for (x = 0; x < sdp->device.nsubdev; x++) {
-		sdev = sdp->device.subdev + x;
-
-		/* If this is the first subdevice reserve space for the superblock */
-		if (new_fs) {
-			sdev->start += sdp->sb_addr + 1;
-			sdev->length -= sdp->sb_addr + 1;
-			new_fs = FALSE;
-		}
-
-		if (sdp->debug)
-			printf("\nData Subdevice %u\n", x);
+	unsigned int rgrp = 0, nrgrp;
+	uint64_t rglength;
 
-		nrgrp = how_many_rgrps(sdp, sdev, rgsize_specified);
+	sdp->new_rgrps = 0;
+	dev = &sdp->device;
 
-		for (rgrp = 0; rgrp < nrgrp; rgrp++) {
-			zalloc(rl, sizeof(struct rgrp_list));
-
-			rl->subdevice = x;
-
-			if (rgrp) {
-				rl->start = rlast->start + rlast->length;
-				rl->length = sdev->length / nrgrp;
-			} else {
-				rl->start = sdev->start;
-				rl->length = sdev->length -
-					(nrgrp - 1) * (sdev->length / nrgrp);
-			}
-			rl->rgf_flags = sdev->rgf_flags;
-
-			osi_list_add_prev(&rl->list, head);
+	/* Reserve space for the superblock */
+	dev->start += sdp->sb_addr + 1;
 
+	/* If this is a new file system, compute the length and number */
+	/* of rgs based on the size of the device.                     */
+	/* If we have existing RGs (i.e. gfs2_grow) find the last one. */
+	if (osi_list_empty(&sdp->rglist)) {
+		dev->length -= sdp->sb_addr + 1;
+		nrgrp = how_many_rgrps(sdp, dev, rgsize_specified);
+		rglength = dev->length / nrgrp;
+		sdp->new_rgrps = nrgrp;
+	} else {
+		uint64_t old_length, new_chunk;
+
+		log_info("Existing resource groups:\n");
+		rgsize_specified = TRUE; /* consistently use existing size */
+		for (rgrp = 0, tmp = head->next; tmp != head;
+		     tmp = tmp->next, rgrp++) {
+			rl = osi_list_entry(tmp, struct rgrp_list, list);
+			log_info("%d: start: %" PRIu64 " (0x%"
+				 PRIx64 "), length = %"PRIu64" (0x%"
+				 PRIx64 ")\n", rgrp + 1, rl->start, rl->start,
+				 rl->length, rl->length);
+			rlast2 = rlast;
 			rlast = rl;
 		}
+		rlast->start = rlast->ri.ri_addr;
+		rglength = rlast->ri.ri_addr - rlast2->ri.ri_addr;
+		rlast->length = rglength;
+		old_length = rlast->ri.ri_addr + rglength;
+		new_chunk = dev->length - old_length;
+		sdp->new_rgrps = new_chunk / rglength;
+		nrgrp = rgrp + sdp->new_rgrps;
+	}
+
+	log_info("\nNew resource groups:\n");
+	for (; rgrp < nrgrp; rgrp++) {
+		zalloc(rl, sizeof(struct rgrp_list));
+
+		if (rgrp) {
+			rl->start = rlast->start + rlast->length;
+			rl->length = rglength;
+		} else {
+			rl->start = dev->start;
+			rl->length = dev->length -
+				(nrgrp - 1) * (dev->length / nrgrp);
+		}
+		rl->rgf_flags = dev->rgf_flags;
 
-		sdp->rgrps += nrgrp;
-		sdp->new_rgrps += nrgrp;
+		log_info("%d: start: %" PRIu64 " (0x%"
+			 PRIx64 "), length = %"PRIu64" (0x%"
+			 PRIx64 ")\n", rgrp + 1, rl->start, rl->start,
+			 rl->length, rl->length);
+		osi_list_add_prev(&rl->list, head);
+		rlast = rl;
 	}
 
+	sdp->rgrps = nrgrp;
+
 	if (sdp->debug) {
-		printf("\n");
+		log_info("\n");
 
 		for (tmp = head->next; tmp != head; tmp = tmp->next) {
 			rl = osi_list_entry(tmp, struct rgrp_list, list);
-			printf("subdevice %u:  rg_o = %"PRIu64", rg_l = %"PRIu64"\n",
-			       rl->subdevice,
-			       rl->start, rl->length);
+			log_info("rg_o = %llu, rg_l = %llu\n",
+				 rl->start, rl->length);
 		}
 	}
 }
@@ -137,7 +156,7 @@
  *
  */
 
-static void
+void
 rgblocks2bitblocks(unsigned int bsize, uint32_t *rgblocks, uint32_t *bitblocks)
 {
 	unsigned int bitbytes_provided, last = 0;
@@ -163,7 +182,12 @@
 	*rgblocks = bitbytes_needed * GFS2_NBBY;
 }
 
-void build_rgrps(struct gfs2_sbd *sdp)
+/**
+ * build_rgrps - write a bunch of resource groups to disk.
+ * If fd > 0, write the data to the given file handle.
+ * Otherwise, use gfs2 buffering in buf.c.
+ */
+void build_rgrps(struct gfs2_sbd *sdp, int write)
 {
 	osi_list_t *tmp, *head;
 	struct rgrp_list *rl;
@@ -200,15 +224,16 @@
 		rg->rg_flags = rl->rgf_flags;
 		rg->rg_free = rgblocks;
 
-		if (!sdp->test)
+		if (write) {
 			for (x = 0; x < bitblocks; x++) {
 				bh = bget(sdp, rl->start + x);
-				if (x) {
+				if (x)
 					gfs2_meta_header_out(&mh, bh->b_data);
-				} else
+				else
 					gfs2_rgrp_out(rg, bh->b_data);
 				brelse(bh, updated);
 			}
+		}
 
 		if (sdp->debug) {
 			printf("\n");
@@ -219,5 +244,3 @@
 		sdp->fssize = ri->ri_data0 + ri->ri_data;
 	}
 }
-
-
--- cluster/gfs2/libgfs2/fs_ops.c	2007/03/26 19:31:59	1.6
+++ cluster/gfs2/libgfs2/fs_ops.c	2007/05/01 16:43:39	1.7
@@ -2,7 +2,7 @@
 *******************************************************************************
 **
 **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
-**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -729,7 +729,7 @@
 		      index * sizeof(uint64_t),
 		      sizeof(uint64_t));
 	if (count != sizeof(uint64_t))
-		die("gfs2_get_leaf_nr\n");
+		die("gfs2_get_leaf_nr:  Bad internal read.\n");
 
 	*leaf_out = be64_to_cpu(leaf_no);
 }
@@ -953,7 +953,11 @@
 
  restart:
 	hash = gfs2_disk_hash(filename, len);
-	index = hash >> (32 - dip->i_di.di_depth);
+	/* Have to kludge because (hash >> 32) gives hash for some reason. */
+	if (dip->i_di.di_depth)
+		index = hash >> (32 - dip->i_di.di_depth);
+	else
+		index = 0;
 
 	gfs2_get_leaf_nr(dip, index, &leaf_no);
 
--- cluster/gfs2/libgfs2/libgfs2.h	2007/02/12 18:55:29	1.10
+++ cluster/gfs2/libgfs2/libgfs2.h	2007/05/01 16:43:39	1.11
@@ -2,7 +2,7 @@
 *******************************************************************************
 **
 **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
-**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -17,6 +17,7 @@
 #include <inttypes.h>
 #include <sys/types.h>
 #include <linux/types.h>
+#include <linux/limits.h>
 
 #include "linux_endian.h"
 #include <linux/gfs2_ondisk.h>
@@ -63,17 +64,12 @@
 #define RESRANDOM do { srandom(RANDOM(1000000000)); } while (0)
 #define RANDOM(values) ((values) * (random() / (RAND_MAX + 1.0)))
 
-struct subdevice {
+struct device {
 	uint64_t start;
 	uint64_t length;
 	uint32_t rgf_flags;
 };
 
-struct device {
-	unsigned int nsubdev;
-	struct subdevice *subdev;
-};
-
 struct gfs2_bitmap
 {
 	uint32_t   bi_offset;  /* The offset in the buffer of the first byte */
@@ -84,9 +80,6 @@
 
 struct rgrp_list {
 	osi_list_t list;
-
-	uint32_t subdevice;	/* The subdevice who holds this resource group */
-
 	uint64_t start;	   /* The offset of the beginning of this resource group */
 	uint64_t length;	/* The length of this resource group */
 	uint32_t rgf_flags;
@@ -169,7 +162,6 @@
 
 	int debug;
 	int quiet;
-	int test;
 	int expert;
 	int override;
 
@@ -374,8 +366,12 @@
 int gfs2_set_bitmap(struct gfs2_sbd *sdp, uint64_t blkno, int state);
 
 /* fs_geometry.c */
+void rgblocks2bitblocks(unsigned int bsize, uint32_t *rgblocks,
+			uint32_t *bitblocks);
+uint64_t how_many_rgrps(struct gfs2_sbd *sdp, struct device *dev,
+			int rgsize_specified);
 void compute_rgrp_layout(struct gfs2_sbd *sdp, int rgsize_specified);
-void build_rgrps(struct gfs2_sbd *sdp);
+void build_rgrps(struct gfs2_sbd *sdp, int write);
 
 /* fs_ops.c */
 #define IS_LEAF     (1)
@@ -507,7 +503,7 @@
 struct rgrp_list *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, uint64_t blk);
 uint64_t gfs2_rgrp_read(struct gfs2_sbd *sdp, struct rgrp_list *rgd);
 void gfs2_rgrp_relse(struct rgrp_list *rgd, enum update_flags updated);
-void gfs2_rgrp_free(struct gfs2_sbd *sdp, enum update_flags updated);
+void gfs2_rgrp_free(osi_list_t *rglist, enum update_flags updated);
 
 /* structures.c */
 void build_master(struct gfs2_sbd *sdp);
@@ -530,7 +526,8 @@
 /* super.c */
 int read_sb(struct gfs2_sbd *sdp);
 int ji_update(struct gfs2_sbd *sdp);
-int ri_update(struct gfs2_sbd *sdp, int *rgcount);
+int rindex_read(struct gfs2_sbd *sdp, int fd, int *count1);
+int ri_update(struct gfs2_sbd *sdp, int fd, int *rgcount);
 int write_sb(struct gfs2_sbd *sdp);
 
 /* ondisk.c */
--- cluster/gfs2/libgfs2/rgrp.c	2006/06/08 20:52:26	1.1
+++ cluster/gfs2/libgfs2/rgrp.c	2007/05/01 16:43:39	1.2
@@ -2,7 +2,7 @@
 *******************************************************************************
 **
 **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
-**  Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -124,7 +124,8 @@
 	for (x = 0; x < length; x++){
 		rgd->bh[x] = bread(sdp, rgd->ri.ri_addr + x);
 		if(gfs2_check_meta(rgd->bh[x],
-						   (x) ? GFS2_METATYPE_RB : GFS2_METATYPE_RG)) {
+				   (x) ? GFS2_METATYPE_RB : GFS2_METATYPE_RG))
+		{
 			uint64_t error;
 
 			error = rgd->ri.ri_addr + x;
@@ -146,15 +147,15 @@
 		brelse(rgd->bh[x], updated);
 }
 
-void gfs2_rgrp_free(struct gfs2_sbd *sdp, enum update_flags updated)
+void gfs2_rgrp_free(osi_list_t *rglist, enum update_flags updated)
 {
 	struct rgrp_list *rgd;
 
-	while(!osi_list_empty(&sdp->rglist)){
-		rgd = osi_list_entry(sdp->rglist.next, struct rgrp_list, list);
-		if (rgd->bh && rgd->bh[0] && /* if a buffer exists and           */
-			rgd->bh[0]->b_count)     /* the first buffer is allocated    */
-			gfs2_rgrp_relse(rgd, updated); /* they must all be so free them. */
+	while(!osi_list_empty(rglist->next)){
+		rgd = osi_list_entry(rglist->next, struct rgrp_list, list);
+		if (rgd->bh && rgd->bh[0] && /* if a buffer exists and       */
+			rgd->bh[0]->b_count) /* the 1st buffer is allocated */
+			gfs2_rgrp_relse(rgd, updated); /* free them all. */
 		if(rgd->bits)
 			free(rgd->bits);
 		if(rgd->bh)
--- cluster/gfs2/libgfs2/super.c	2006/06/19 20:45:15	1.3
+++ cluster/gfs2/libgfs2/super.c	2007/05/01 16:43:39	1.4
@@ -2,7 +2,7 @@
 *******************************************************************************
 **
 **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
-**  Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -165,31 +165,35 @@
 }
 
 /**
- * ri_update - attach rgrps to the super block
- * @sdp:
- *
- * Given the rgrp index inode, link in all rgrps into the super block
- * and be sure that they can be read.
+ * rindex_read - read in the rg index file
+ * @sdp: the incore superblock pointer
+ * fd: optional file handle for rindex file (if meta_fs file system is mounted)
+ *     (if fd is <= zero, it will read from raw device)
+ * @count1: return count of the rgs.
  *
- * Returns: 0 on success, -1 on failure.
+ * Returns: 0 on success, -1 on failure
  */
-int ri_update(struct gfs2_sbd *sdp, int *rgcount)
+int rindex_read(struct gfs2_sbd *sdp, int fd, int *count1)
 {
-	struct rgrp_list *rgd;
-	osi_list_t *tmp;
-	struct gfs2_rindex buf;
 	unsigned int rg;
-	int error, count1 = 0, count2 = 0;
-	uint64_t errblock = 0;
+	int error;
+	struct gfs2_rindex buf;
+	struct rgrp_list *rgd, *prev_rgd;
+	uint64_t prev_length;
 
+	*count1 = 0;
+	prev_rgd = NULL;
 	for (rg = 0; ; rg++) {
-		error = gfs2_readi(sdp->md.riinode, (char *)&buf,
-						   rg * sizeof(struct gfs2_rindex),
-						   sizeof(struct gfs2_rindex));
+		if (fd > 0)
+			error = read(fd, &buf, sizeof(struct gfs2_rindex));
+		else
+			error = gfs2_readi(sdp->md.riinode, (char *)&buf,
+					   rg * sizeof(struct gfs2_rindex),
+					   sizeof(struct gfs2_rindex));
 		if (!error)
 			break;
 		if (error != sizeof(struct gfs2_rindex))
-			goto fail;
+			return -1;
 
 		rgd = (struct rgrp_list *)malloc(sizeof(struct rgrp_list));
 		memset(rgd, 0, sizeof(struct rgrp_list));
@@ -197,34 +201,53 @@
 
 		gfs2_rindex_in(&rgd->ri, (char *)&buf);
 
+		rgd->start = rgd->ri.ri_addr;
+		if (prev_rgd) {
+			prev_length = rgd->start - prev_rgd->start;
+			prev_rgd->length = prev_length;
+		}
+
 		if(gfs2_compute_bitstructs(sdp, rgd))
-			goto fail;
+			return -1;
 
-		count1++;
+		(*count1)++;
+		prev_rgd = rgd;
 	}
+	if (prev_rgd)
+		prev_rgd->length = prev_length;
+	return 0;
+}
+
+/**
+ * ri_update - attach rgrps to the super block
+ * @sdp: incore superblock data
+ * fd: optional file handle for rindex (through the meta_fs)
+ * @rgcount: returned count of rgs
+ *
+ * Given the rgrp index inode, link in all rgrps into the super block
+ * and be sure that they can be read.
+ *
+ * Returns: 0 on success, -1 on failure.
+ */
+int ri_update(struct gfs2_sbd *sdp, int fd, int *rgcount)
+{
+	struct rgrp_list *rgd;
+	osi_list_t *tmp;
+	int count1 = 0, count2 = 0;
+	uint64_t errblock = 0;
 
+	if (rindex_read(sdp, fd, &count1))
+	    goto fail;
 	for (tmp = sdp->rglist.next; tmp != &sdp->rglist; tmp = tmp->next) {
-		int i;
-		uint64_t prev_err = 0;
 		enum update_flags f;
 
 		f = not_updated;
 		rgd = osi_list_entry(tmp, struct rgrp_list, list);
-		/* If we have errors, we may need to repair and continue.           */
-		/* We have multiple bitmaps, and all of them might potentially need */
-		/* repair.  So we have to try to read and repair as many times as   */
-		/* there are bitmaps.                                               */
-		for (i = 0; i < rgd->ri.ri_length; i++) {
-			errblock = gfs2_rgrp_read(sdp, rgd);
-			if (errblock) {
-				if (errblock == prev_err) /* if same block is still bad */
-					goto fail;
-				prev_err = errblock;
-			}
-			else
-				break;
-		} /* for all bitmap structures */
-		gfs2_rgrp_relse(rgd, f);
+		errblock = gfs2_rgrp_read(sdp, rgd);
+		if (errblock)
+			return errblock;
+		else
+			gfs2_rgrp_relse(rgd, f);
 		count2++;
 	}
 
@@ -235,7 +258,7 @@
 	return 0;
 
  fail:
-	gfs2_rgrp_free(sdp, not_updated);
+	gfs2_rgrp_free(&sdp->rglist, not_updated);
 	return -1;
 }
 
--- cluster/gfs2/mkfs/main_mkfs.c	2006/12/19 17:49:53	1.11
+++ cluster/gfs2/mkfs/main_mkfs.c	2007/05/01 16:43:39	1.12
@@ -2,7 +2,7 @@
 *******************************************************************************
 **
 **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
-**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -375,7 +375,7 @@
 
 	/* Build ondisk structures */
 
-	build_rgrps(sdp);
+	build_rgrps(sdp, TRUE);
 	build_root(sdp);
 	build_master(sdp);
 	build_sb(sdp);




More information about the Cluster-devel mailing list