[Cluster-devel] GFS2: Clean up recovery code
Steven Whitehouse
swhiteho at redhat.com
Tue Nov 10 13:41:20 UTC 2009
The following patch cleans up the recovery code and fixes a few
bugs along the way. The bugs are:
o An incorrect assumption about the size of the journal
o An issue where the superblock was being used to store variables
local to the recovery process which would cause a problem if
multiple journals were recovered at once.
o Can report incorrect counts of blocks read & recovered in some cases
(this is harmless, its just a logging issue)
Features:
o Moves the recovery code from lops.c into recovery.c which allows
making a number of functions static and removing other bits of code.
o Removes the "before scan" functions as they are not needed (partly
merged into the "scan" functions)
o Removes the "after scan" functions. These have also been merged into
the "scan" functions
o We no longer call any functions which may in turn call withdraw from
the recovery code. If there is an issue with recovery, we report it
to the caller (and userspace).
o New uevent env variable is documented
o Superblock shrinks by 32 bytes on 64 bit arches.
o Code shrinks by about 100 lines (probably more since there are more
comments now)
TODO:
o Report where error has occurred in log, as well as what the error is
o Check code for finding journal headers (maybe remove gfs2_log_header_host?)
o Testing :-)
For the moment, this is just a heads up on what I'm working on. I hope it
won't be too long before I have a final version of this patch,
Steve.
diff --git a/Documentation/filesystems/gfs2-uevents.txt b/Documentation/filesystems/gfs2-uevents.txt
index fd966dc..c029596 100644
--- a/Documentation/filesystems/gfs2-uevents.txt
+++ b/Documentation/filesystems/gfs2-uevents.txt
@@ -44,6 +44,10 @@ for every journal recovered, whether it is during the initial mount
process or as the result of gfs_controld requesting a specific journal
recovery via the /sys/fs/gfs2/<fsname>/lock_module/recovery file.
+If the recovery has failed, then on recent versions of GFS2 the
+ERROR= variable will also be included. This returns a kernel
+error code indicating what went wrong during recovery.
+
Because the CHANGE uevent was used (in early versions of gfs_controld)
without checking the environment variables to discover the state, we
cannot add any more functions to it without running the risk of
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 4792200..e497aaf 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -50,12 +50,6 @@ struct gfs2_log_operations {
void (*lo_add) (struct gfs2_sbd *sdp, struct gfs2_log_element *le);
void (*lo_before_commit) (struct gfs2_sbd *sdp);
void (*lo_after_commit) (struct gfs2_sbd *sdp, struct gfs2_ail *ai);
- void (*lo_before_scan) (struct gfs2_jdesc *jd,
- struct gfs2_log_header_host *head, int pass);
- int (*lo_scan_elements) (struct gfs2_jdesc *jd, unsigned int start,
- struct gfs2_log_descriptor *ld, __be64 *ptr,
- int pass);
- void (*lo_after_scan) (struct gfs2_jdesc *jd, int error, int pass);
const char *lo_name;
};
@@ -648,15 +642,6 @@ struct gfs2_sbd {
struct list_head sd_ail2_list;
u64 sd_ail_sync_gen;
- /* Replay stuff */
-
- struct list_head sd_revoke_list;
- unsigned int sd_replay_tail;
-
- unsigned int sd_found_blocks;
- unsigned int sd_found_revokes;
- unsigned int sd_replayed_blocks;
-
/* For quiescing the filesystem */
struct gfs2_holder sd_freeze_gh;
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index de97632..4d301af 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -136,6 +136,12 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
struct gfs2_trans *tr;
lock_buffer(bd->bd_bh);
+ mh = (struct gfs2_meta_header *)bd->bd_bh->b_data;
+ if (unlikely(mh->mh_magic != cpu_to_be32(GFS2_MAGIC))) {
+ printk(KERN_ERR "GFS2: %s mh error: buf_lo_add block %llu\n",
+ sdp->sd_fsname, (unsigned long long)bd->bd_bh->b_blocknr);
+ BUG();
+ }
gfs2_log_lock(sdp);
if (!list_empty(&bd->bd_list_tr))
goto out;
@@ -147,9 +153,7 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
goto out;
set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
- gfs2_meta_check(sdp, bd->bd_bh);
gfs2_pin(sdp, bd->bd_bh);
- mh = (struct gfs2_meta_header *)bd->bd_bh->b_data;
mh->__pad0 = cpu_to_be64(0);
mh->mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid);
sdp->sd_log_num_buf++;
@@ -235,84 +239,6 @@ static void buf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
gfs2_assert_warn(sdp, !sdp->sd_log_num_buf);
}
-static void buf_lo_before_scan(struct gfs2_jdesc *jd,
- struct gfs2_log_header_host *head, int pass)
-{
- struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
-
- if (pass != 0)
- return;
-
- sdp->sd_found_blocks = 0;
- sdp->sd_replayed_blocks = 0;
-}
-
-static int buf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
- struct gfs2_log_descriptor *ld, __be64 *ptr,
- int pass)
-{
- struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
- struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
- struct gfs2_glock *gl = ip->i_gl;
- unsigned int blks = be32_to_cpu(ld->ld_data1);
- struct buffer_head *bh_log, *bh_ip;
- u64 blkno;
- int error = 0;
-
- if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_METADATA)
- return 0;
-
- gfs2_replay_incr_blk(sdp, &start);
-
- for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) {
- blkno = be64_to_cpu(*ptr++);
-
- sdp->sd_found_blocks++;
-
- if (gfs2_revoke_check(sdp, blkno, start))
- continue;
-
- error = gfs2_replay_read_block(jd, start, &bh_log);
- if (error)
- return error;
-
- bh_ip = gfs2_meta_new(gl, blkno);
- memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size);
-
- if (gfs2_meta_check(sdp, bh_ip))
- error = -EIO;
- else
- mark_buffer_dirty(bh_ip);
-
- brelse(bh_log);
- brelse(bh_ip);
-
- if (error)
- break;
-
- sdp->sd_replayed_blocks++;
- }
-
- return error;
-}
-
-static void buf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
-{
- struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
- struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
-
- if (error) {
- gfs2_meta_sync(ip->i_gl);
- return;
- }
- if (pass != 1)
- return;
-
- gfs2_meta_sync(ip->i_gl);
-
- fs_info(sdp, "jid=%u: Replayed %u of %u blocks\n",
- jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks);
-}
static void revoke_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
{
@@ -370,85 +296,6 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
submit_bh(WRITE_SYNC_PLUG, bh);
}
-static void revoke_lo_before_scan(struct gfs2_jdesc *jd,
- struct gfs2_log_header_host *head, int pass)
-{
- struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
-
- if (pass != 0)
- return;
-
- sdp->sd_found_revokes = 0;
- sdp->sd_replay_tail = head->lh_tail;
-}
-
-static int revoke_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
- struct gfs2_log_descriptor *ld, __be64 *ptr,
- int pass)
-{
- struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
- unsigned int blks = be32_to_cpu(ld->ld_length);
- unsigned int revokes = be32_to_cpu(ld->ld_data1);
- struct buffer_head *bh;
- unsigned int offset;
- u64 blkno;
- int first = 1;
- int error;
-
- if (pass != 0 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_REVOKE)
- return 0;
-
- offset = sizeof(struct gfs2_log_descriptor);
-
- for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) {
- error = gfs2_replay_read_block(jd, start, &bh);
- if (error)
- return error;
-
- if (!first)
- gfs2_metatype_check(sdp, bh, GFS2_METATYPE_LB);
-
- while (offset + sizeof(u64) <= sdp->sd_sb.sb_bsize) {
- blkno = be64_to_cpu(*(__be64 *)(bh->b_data + offset));
-
- error = gfs2_revoke_add(sdp, blkno, start);
- if (error < 0) {
- brelse(bh);
- return error;
- }
- else if (error)
- sdp->sd_found_revokes++;
-
- if (!--revokes)
- break;
- offset += sizeof(u64);
- }
-
- brelse(bh);
- offset = sizeof(struct gfs2_meta_header);
- first = 0;
- }
-
- return 0;
-}
-
-static void revoke_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
-{
- struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
-
- if (error) {
- gfs2_revoke_clean(sdp);
- return;
- }
- if (pass != 1)
- return;
-
- fs_info(sdp, "jid=%u: Found %u revoke tags\n",
- jd->jd_jid, sdp->sd_found_revokes);
-
- gfs2_revoke_clean(sdp);
-}
-
static void rg_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
{
struct gfs2_rgrpd *rgd;
@@ -643,78 +490,6 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
gfs2_log_unlock(sdp);
}
-static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
- struct gfs2_log_descriptor *ld,
- __be64 *ptr, int pass)
-{
- struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
- struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
- struct gfs2_glock *gl = ip->i_gl;
- unsigned int blks = be32_to_cpu(ld->ld_data1);
- struct buffer_head *bh_log, *bh_ip;
- u64 blkno;
- u64 esc;
- int error = 0;
-
- if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_JDATA)
- return 0;
-
- gfs2_replay_incr_blk(sdp, &start);
- for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) {
- blkno = be64_to_cpu(*ptr++);
- esc = be64_to_cpu(*ptr++);
-
- sdp->sd_found_blocks++;
-
- if (gfs2_revoke_check(sdp, blkno, start))
- continue;
-
- error = gfs2_replay_read_block(jd, start, &bh_log);
- if (error)
- return error;
-
- bh_ip = gfs2_meta_new(gl, blkno);
- memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size);
-
- /* Unescape */
- if (esc) {
- __be32 *eptr = (__be32 *)bh_ip->b_data;
- *eptr = cpu_to_be32(GFS2_MAGIC);
- }
- mark_buffer_dirty(bh_ip);
-
- brelse(bh_log);
- brelse(bh_ip);
- if (error)
- break;
-
- sdp->sd_replayed_blocks++;
- }
-
- return error;
-}
-
-/* FIXME: sort out accounting for log blocks etc. */
-
-static void databuf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
-{
- struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
- struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
-
- if (error) {
- gfs2_meta_sync(ip->i_gl);
- return;
- }
- if (pass != 1)
- return;
-
- /* data sync? */
- gfs2_meta_sync(ip->i_gl);
-
- fs_info(sdp, "jid=%u: Replayed %u of %u data blocks\n",
- jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks);
-}
-
static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
{
struct list_head *head = &sdp->sd_log_le_databuf;
@@ -734,18 +509,12 @@ const struct gfs2_log_operations gfs2_buf_lops = {
.lo_add = buf_lo_add,
.lo_before_commit = buf_lo_before_commit,
.lo_after_commit = buf_lo_after_commit,
- .lo_before_scan = buf_lo_before_scan,
- .lo_scan_elements = buf_lo_scan_elements,
- .lo_after_scan = buf_lo_after_scan,
.lo_name = "buf",
};
const struct gfs2_log_operations gfs2_revoke_lops = {
.lo_add = revoke_lo_add,
.lo_before_commit = revoke_lo_before_commit,
- .lo_before_scan = revoke_lo_before_scan,
- .lo_scan_elements = revoke_lo_scan_elements,
- .lo_after_scan = revoke_lo_after_scan,
.lo_name = "revoke",
};
@@ -759,8 +528,6 @@ const struct gfs2_log_operations gfs2_databuf_lops = {
.lo_add = databuf_lo_add,
.lo_before_commit = databuf_lo_before_commit,
.lo_after_commit = databuf_lo_after_commit,
- .lo_scan_elements = databuf_lo_scan_elements,
- .lo_after_scan = databuf_lo_after_scan,
.lo_name = "databuf",
};
diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h
index 3c0b273..c2f8dc0 100644
--- a/fs/gfs2/lops.h
+++ b/fs/gfs2/lops.h
@@ -73,41 +73,5 @@ static inline void lops_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
gfs2_log_ops[x]->lo_after_commit(sdp, ai);
}
-static inline void lops_before_scan(struct gfs2_jdesc *jd,
- struct gfs2_log_header_host *head,
- unsigned int pass)
-{
- int x;
- for (x = 0; gfs2_log_ops[x]; x++)
- if (gfs2_log_ops[x]->lo_before_scan)
- gfs2_log_ops[x]->lo_before_scan(jd, head, pass);
-}
-
-static inline int lops_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
- struct gfs2_log_descriptor *ld,
- __be64 *ptr,
- unsigned int pass)
-{
- int x, error;
- for (x = 0; gfs2_log_ops[x]; x++)
- if (gfs2_log_ops[x]->lo_scan_elements) {
- error = gfs2_log_ops[x]->lo_scan_elements(jd, start,
- ld, ptr, pass);
- if (error)
- return error;
- }
-
- return 0;
-}
-
-static inline void lops_after_scan(struct gfs2_jdesc *jd, int error,
- unsigned int pass)
-{
- int x;
- for (x = 0; gfs2_log_ops[x]; x++)
- if (gfs2_log_ops[x]->lo_before_scan)
- gfs2_log_ops[x]->lo_after_scan(jd, error, pass);
-}
-
#endif /* __LOPS_DOT_H__ */
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index cb8d7a9..e89f14d 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -136,24 +136,6 @@ void gfs2_aspace_put(struct inode *aspace)
}
/**
- * gfs2_meta_sync - Sync all buffers associated with a glock
- * @gl: The glock
- *
- */
-
-void gfs2_meta_sync(struct gfs2_glock *gl)
-{
- struct address_space *mapping = gl->gl_aspace->i_mapping;
- int error;
-
- filemap_fdatawrite(mapping);
- error = filemap_fdatawait(mapping);
-
- if (error)
- gfs2_io_error(gl->gl_sbd);
-}
-
-/**
* gfs2_getbuf - Get a buffer with a given address space
* @gl: the glock
* @blkno: the block number (filesystem scope)
diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h
index de270c2..38cca55 100644
--- a/fs/gfs2/meta_io.h
+++ b/fs/gfs2/meta_io.h
@@ -40,8 +40,6 @@ static inline void gfs2_buffer_copy_tail(struct buffer_head *to_bh,
struct inode *gfs2_aspace_get(struct gfs2_sbd *sdp);
void gfs2_aspace_put(struct inode *aspace);
-void gfs2_meta_sync(struct gfs2_glock *gl);
-
struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno);
int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno,
int flags, struct buffer_head **bhp);
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index edfee24..bf7361a 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -115,8 +115,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
atomic_set(&sdp->sd_log_in_flight, 0);
init_waitqueue_head(&sdp->sd_log_flush_wait);
- INIT_LIST_HEAD(&sdp->sd_revoke_list);
-
mutex_init(&sdp->sd_freeze_lock);
return sdp;
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index e594d9e..64b4892 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -27,8 +27,15 @@
#include "util.h"
#include "dir.h"
-int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk,
- struct buffer_head **bh)
+struct gfs2_recovery_ops {
+ u32 type;
+ int (*fxn)(struct gfs2_jdesc *jd, struct list_head *revoke_list,
+ const struct gfs2_log_descriptor *ld,
+ u32 tail, u32 start, const __be64 *ptr);
+};
+
+static int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk,
+ struct buffer_head **bh)
{
struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
struct gfs2_glock *gl = ip->i_gl;
@@ -40,19 +47,16 @@ int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk,
error = gfs2_extent_map(&ip->i_inode, blk, &new, &dblock, &extlen);
if (error)
return error;
- if (!dblock) {
- gfs2_consist_inode(ip);
- return -EIO;
- }
+ if (!dblock)
+ return -ESRCH;
*bh = gfs2_meta_ra(gl, dblock, extlen);
return error;
}
-int gfs2_revoke_add(struct gfs2_sbd *sdp, u64 blkno, unsigned int where)
+static int gfs2_revoke_add(struct list_head *head, u64 blkno, unsigned int where)
{
- struct list_head *head = &sdp->sd_revoke_list;
struct gfs2_revoke_replay *rr;
int found = 0;
@@ -79,13 +83,13 @@ int gfs2_revoke_add(struct gfs2_sbd *sdp, u64 blkno, unsigned int where)
return 1;
}
-int gfs2_revoke_check(struct gfs2_sbd *sdp, u64 blkno, unsigned int where)
+static int gfs2_revoke_check(struct list_head *head, u64 blkno, u32 tail, unsigned int where)
{
struct gfs2_revoke_replay *rr;
int wrap, a, b, revoke;
int found = 0;
- list_for_each_entry(rr, &sdp->sd_revoke_list, rr_list) {
+ list_for_each_entry(rr, head, rr_list) {
if (rr->rr_blkno == blkno) {
found = 1;
break;
@@ -95,17 +99,16 @@ int gfs2_revoke_check(struct gfs2_sbd *sdp, u64 blkno, unsigned int where)
if (!found)
return 0;
- wrap = (rr->rr_where < sdp->sd_replay_tail);
- a = (sdp->sd_replay_tail < where);
+ wrap = (rr->rr_where < tail);
+ a = (tail < where);
b = (where < rr->rr_where);
revoke = (wrap) ? (a || b) : (a && b);
return revoke;
}
-void gfs2_revoke_clean(struct gfs2_sbd *sdp)
+static void gfs2_revoke_clean(struct list_head *head)
{
- struct list_head *head = &sdp->sd_revoke_list;
struct gfs2_revoke_replay *rr;
while (!list_empty(head)) {
@@ -115,19 +118,38 @@ void gfs2_revoke_clean(struct gfs2_sbd *sdp)
}
}
-static int gfs2_log_header_in(struct gfs2_log_header_host *lh, const void *buf)
+static void gfs2_log_header_in(struct gfs2_log_header_host *lh, const void *buf)
{
const struct gfs2_log_header *str = buf;
- if (str->lh_header.mh_magic != cpu_to_be32(GFS2_MAGIC) ||
- str->lh_header.mh_type != cpu_to_be32(GFS2_METATYPE_LH))
- return 1;
-
lh->lh_sequence = be64_to_cpu(str->lh_sequence);
lh->lh_flags = be32_to_cpu(str->lh_flags);
lh->lh_tail = be32_to_cpu(str->lh_tail);
lh->lh_blkno = be32_to_cpu(str->lh_blkno);
lh->lh_hash = be32_to_cpu(str->lh_hash);
+}
+
+static int gfs2_check_log_header(const void *ptr, u32 blkno)
+{
+ const struct gfs2_log_header *lh = ptr;
+ const struct gfs2_meta_header *mh = &lh->lh_header;
+ const u32 nothing = 0;
+ u32 hash;
+
+ if (mh->mh_magic != cpu_to_be32(GFS2_MAGIC) ||
+ mh->mh_type != cpu_to_be32(GFS2_METATYPE_LH))
+ return 1;
+
+ hash = crc32_le((u32)~0, (unsigned char const *)lh, sizeof(struct gfs2_log_header) -
+ sizeof(u32));
+ hash = crc32_le(hash, (unsigned char const *)¬hing, sizeof(nothing));
+ hash ^= (u32)~0;
+
+ if (be32_to_cpu(lh->lh_hash) != hash)
+ return -EINVAL;
+ if (be32_to_cpu(lh->lh_blkno) != blkno)
+ return -EINVAL;
+
return 0;
}
@@ -150,22 +172,17 @@ static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk,
{
struct buffer_head *bh;
struct gfs2_log_header_host uninitialized_var(lh);
- const u32 nothing = 0;
- u32 hash;
int error;
error = gfs2_replay_read_block(jd, blk, &bh);
if (error)
return error;
- hash = crc32_le((u32)~0, bh->b_data, sizeof(struct gfs2_log_header) -
- sizeof(u32));
- hash = crc32_le(hash, (unsigned char const *)¬hing, sizeof(nothing));
- hash ^= (u32)~0;
- error = gfs2_log_header_in(&lh, bh->b_data);
+ error = gfs2_check_log_header(bh->b_data, blk);
+ gfs2_log_header_in(&lh, bh->b_data);
brelse(bh);
- if (error || lh.lh_blkno != blk || lh.lh_hash != hash)
+ if (error)
return 1;
*head = lh;
@@ -200,10 +217,8 @@ static int find_good_lh(struct gfs2_jdesc *jd, unsigned int *blk,
if (++*blk == jd->jd_blocks)
*blk = 0;
- if (*blk == orig_blk) {
- gfs2_consist_inode(GFS2_I(jd->jd_inode));
- return -EIO;
- }
+ if (*blk == orig_blk)
+ return -EINVAL;
}
}
@@ -234,10 +249,8 @@ static int jhead_scan(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
if (error == 1)
continue;
- if (lh.lh_sequence == head->lh_sequence) {
- gfs2_consist_inode(GFS2_I(jd->jd_inode));
- return -EIO;
- }
+ if (lh.lh_sequence == head->lh_sequence)
+ return -EINVAL;
if (lh.lh_sequence < head->lh_sequence)
break;
@@ -296,6 +309,199 @@ int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
return error;
}
+static int gfs2_recover_metadata(struct gfs2_jdesc *jd,
+ struct list_head *revoke_list,
+ const struct gfs2_log_descriptor *ld,
+ u32 tail, u32 start, const __be64 *ptr)
+{
+ struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
+ struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
+ struct gfs2_glock *gl = ip->i_gl;
+ unsigned int blks = be32_to_cpu(ld->ld_data1);
+ struct buffer_head *bh_log, *bh_ip;
+ unsigned int found_blocks = 0;
+ unsigned int replayed_blocks = 0;
+ const struct gfs2_meta_header *mh;
+ u64 blkno;
+ int error = 0;
+
+ start++;
+ start %= jd->jd_blocks;
+
+ for (; blks; start++, blks--) {
+ start %= jd->jd_blocks;
+ blkno = be64_to_cpu(*ptr++);
+
+ found_blocks++;
+
+ if (gfs2_revoke_check(revoke_list, blkno, tail, start))
+ continue;
+
+ error = gfs2_replay_read_block(jd, start, &bh_log);
+ if (error)
+ return error;
+
+ bh_ip = gfs2_meta_new(gl, blkno);
+ memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size);
+ mh = (const struct gfs2_meta_header *)bh_ip->b_data;
+ if (unlikely(mh->mh_magic != cpu_to_be32(GFS2_MAGIC)))
+ error = -EINVAL;
+ else
+ mark_buffer_dirty(bh_ip);
+
+ brelse(bh_log);
+ brelse(bh_ip);
+
+ if (error)
+ break;
+
+ replayed_blocks++;
+ }
+ if (error)
+ return error;
+ fs_info(sdp, "jid=%u: Replayed %u of %u blocks\n",
+ jd->jd_jid, replayed_blocks, found_blocks);
+ return 0;
+}
+
+static int gfs2_recover_jdata(struct gfs2_jdesc *jd,
+ struct list_head *revoke_list,
+ const struct gfs2_log_descriptor *ld,
+ u32 tail, u32 start, const __be64 *ptr)
+{
+ struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
+ struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
+ struct gfs2_glock *gl = ip->i_gl;
+ unsigned int blks = be32_to_cpu(ld->ld_data1);
+ struct buffer_head *bh_log, *bh_ip;
+ unsigned int found_blocks = 0;
+ unsigned int replayed_blocks = 0;
+ u64 blkno;
+ u64 esc;
+ int error = 0;
+
+ start++;
+ for (; blks; start++, blks--) {
+ start %= jd->jd_blocks;
+ blkno = be64_to_cpu(*ptr++);
+ esc = be64_to_cpu(*ptr++);
+
+ found_blocks++;
+
+ if (gfs2_revoke_check(revoke_list, blkno, tail, start))
+ continue;
+
+ error = gfs2_replay_read_block(jd, start, &bh_log);
+ if (error)
+ return error;
+
+ bh_ip = gfs2_meta_new(gl, blkno);
+ memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size);
+
+ /* Unescape */
+ if (esc) {
+ __be32 *eptr = (__be32 *)bh_ip->b_data;
+ *eptr = cpu_to_be32(GFS2_MAGIC);
+ }
+ mark_buffer_dirty(bh_ip);
+
+ brelse(bh_log);
+ brelse(bh_ip);
+ if (error)
+ break;
+
+ replayed_blocks++;
+ }
+ if (error)
+ return error;
+ fs_info(sdp, "jid=%u: Replayed %u of %u data blocks\n",
+ jd->jd_jid, replayed_blocks, found_blocks);
+ return 0;
+}
+
+static int gfs2_recover_revoke(struct gfs2_jdesc *jd,
+ struct list_head *revoke_list,
+ const struct gfs2_log_descriptor *ld,
+ u32 tail, u32 start, const __be64 *ptr)
+{
+ struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
+ unsigned int blks = be32_to_cpu(ld->ld_length);
+ unsigned int revokes = be32_to_cpu(ld->ld_data1);
+ const struct gfs2_meta_header *mh;
+ struct buffer_head *bh;
+ unsigned int offset;
+ u64 blkno;
+ int first = 1;
+ int error;
+ unsigned int found_revokes = 0;
+
+ offset = sizeof(struct gfs2_log_descriptor);
+
+ for (; blks; start++, blks--) {
+ start %= jd->jd_blocks;
+ error = gfs2_replay_read_block(jd, start, &bh);
+ if (error)
+ return error;
+
+ if (!first) {
+ mh = (const struct gfs2_meta_header *)bh->b_data;
+ if ((mh->mh_magic != cpu_to_be32(GFS2_MAGIC)) ||
+ (mh->mh_type != cpu_to_be32(GFS2_METATYPE_LB)))
+ return -EINVAL;
+ }
+
+ while (offset + sizeof(u64) <= sdp->sd_sb.sb_bsize) {
+ blkno = be64_to_cpu(*(__be64 *)(bh->b_data + offset));
+
+ error = gfs2_revoke_add(revoke_list, blkno, start);
+ if (error < 0) {
+ brelse(bh);
+ return error;
+ } else if (error)
+ found_revokes++;
+
+ if (!--revokes)
+ break;
+ offset += sizeof(u64);
+ }
+
+ brelse(bh);
+ offset = sizeof(struct gfs2_meta_header);
+ first = 0;
+ }
+
+ fs_info(sdp, "jid=%u: Found %u revoke tags\n",
+ jd->jd_jid, found_revokes);
+ return 0;
+}
+
+static const struct gfs2_recovery_ops recovery_pass0[] = {
+ { .type = GFS2_LOG_DESC_REVOKE, .fxn = gfs2_recover_revoke, },
+ { .type = GFS2_LOG_DESC_METADATA, },
+ { .type = GFS2_LOG_DESC_JDATA, },
+ { .type = 0, } /* End of list */
+};
+
+static const struct gfs2_recovery_ops recovery_pass1[] = {
+ { .type = GFS2_LOG_DESC_REVOKE, },
+ { .type = GFS2_LOG_DESC_METADATA, .fxn = gfs2_recover_metadata },
+ { .type = GFS2_LOG_DESC_JDATA, .fxn = gfs2_recover_jdata },
+ { .type = 0, } /* End of list */
+};
+
+static int find_recovery_op(const struct gfs2_log_descriptor *ld,
+ const struct gfs2_recovery_ops *ops)
+{
+ int i;
+
+ for (i = 0; ops[i].type; i++) {
+ if (ops[i].type == be32_to_cpu(ld->ld_type))
+ return i;
+ }
+
+ return -EINVAL;
+}
+
/**
* foreach_descriptor - go through the active part of the log
* @jd: the journal
@@ -308,16 +514,20 @@ int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
* Returns: errno
*/
-static int foreach_descriptor(struct gfs2_jdesc *jd, unsigned int start,
- unsigned int end, int pass)
+static int foreach_descriptor(struct gfs2_jdesc *jd,
+ struct list_head *revoke_list, unsigned int start,
+ unsigned int end,
+ const struct gfs2_recovery_ops *ops)
{
- struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
struct buffer_head *bh;
- struct gfs2_log_descriptor *ld;
+ const struct gfs2_log_descriptor *ld;
+ const struct gfs2_meta_header *mh;
+ unsigned int tail = start;
int error = 0;
u32 length;
__be64 *ptr;
unsigned int offset = sizeof(struct gfs2_log_descriptor);
+
offset += sizeof(__be64) - 1;
offset &= ~(sizeof(__be64) - 1);
@@ -325,42 +535,37 @@ static int foreach_descriptor(struct gfs2_jdesc *jd, unsigned int start,
error = gfs2_replay_read_block(jd, start, &bh);
if (error)
return error;
- if (gfs2_meta_check(sdp, bh)) {
- brelse(bh);
- return -EIO;
- }
- ld = (struct gfs2_log_descriptor *)bh->b_data;
- length = be32_to_cpu(ld->ld_length);
-
- if (be32_to_cpu(ld->ld_header.mh_type) == GFS2_METATYPE_LH) {
- struct gfs2_log_header_host lh;
- error = get_log_header(jd, start, &lh);
- if (!error) {
- gfs2_replay_incr_blk(sdp, &start);
- brelse(bh);
+ mh = (const struct gfs2_meta_header *)bh->b_data;
+ switch (be32_to_cpu(mh->mh_type)) {
+ case GFS2_METATYPE_LD:
+ break;
+ case GFS2_METATYPE_LH:
+ error = gfs2_check_log_header(bh->b_data, start);
+ if (error == 0) {
+ start++;
+ start %= jd->jd_blocks;
continue;
}
- if (error == 1) {
- gfs2_consist_inode(GFS2_I(jd->jd_inode));
- error = -EIO;
- }
- brelse(bh);
- return error;
- } else if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_LD)) {
- brelse(bh);
- return -EIO;
+ default: /* Fall though */
+ return -EINVAL;
}
+
+ ld = (struct gfs2_log_descriptor *)bh->b_data;
+ length = be32_to_cpu(ld->ld_length);
ptr = (__be64 *)(bh->b_data + offset);
- error = lops_scan_elements(jd, start, ld, ptr, pass);
- if (error) {
- brelse(bh);
+ error = find_recovery_op(ld, ops);
+ if (error < 0)
return error;
- }
-
- while (length--)
- gfs2_replay_incr_blk(sdp, &start);
-
+ if (ops[error].fxn)
+ error = ops[error].fxn(jd, revoke_list, ld, tail, start, ptr);
+ else
+ error = 0;
brelse(bh);
+ if (error)
+ return error;
+
+ start += length;
+ start %= jd->jd_blocks;
}
return 0;
@@ -388,15 +593,14 @@ static int clean_journal(struct gfs2_jdesc *jd, struct gfs2_log_header_host *hea
struct buffer_head bh_map = { .b_state = 0, .b_blocknr = 0 };
lblock = head->lh_blkno;
- gfs2_replay_incr_blk(sdp, &lblock);
+ lblock++;
+ lblock %= jd->jd_blocks;
bh_map.b_size = 1 << ip->i_inode.i_blkbits;
error = gfs2_block_map(&ip->i_inode, lblock, &bh_map, 0);
if (error)
return error;
- if (!bh_map.b_blocknr) {
- gfs2_consist_inode(ip);
- return -EIO;
- }
+ if (!bh_map.b_blocknr)
+ return -ESRCH;
bh = sb_getblk(sdp->sd_vfs, bh_map.b_blocknr);
lock_buffer(bh);
@@ -426,20 +630,44 @@ static int clean_journal(struct gfs2_jdesc *jd, struct gfs2_log_header_host *hea
return error;
}
+/**
+ * gfs2_recovery_done - Notify the results of recovery to userspace
+ * @sdp: The superblock
+ * @jid: The journal id
+ * @errno: The error associated with the journal recovery result
+ *
+ * This sends a uevent and also prints log messages to notify userspace
+ * about the result of a journal recovery attempt. If @errno is zero then
+ * it is considered successful. There are a number of possible reasons
+ * for failure, including:
+ * -EROFS - The block device is read-only
+ * -EINVAL - Some invalid data was read from the journal
+ * -EIO - An I/O error occured while reading the journal or writing back
+ * changed information
+ * -ESRCH - Block map on the journal inode failed
+ */
-static void gfs2_recovery_done(struct gfs2_sbd *sdp, unsigned int jid,
- unsigned int message)
+static void gfs2_recovery_done(struct gfs2_sbd *sdp, unsigned int jid, int errno)
{
char env_jid[20];
char env_status[20];
- char *envp[] = { env_jid, env_status, NULL };
+ char env_error[20];
+ char *envp[] = { env_jid, env_status, NULL, NULL };
+ const char *msg = errno ? "Failed" : "Done";
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+
ls->ls_recover_jid_done = jid;
- ls->ls_recover_jid_status = message;
+ ls->ls_recover_jid_status = errno ? LM_RD_GAVEUP : LM_RD_SUCCESS;
sprintf(env_jid, "JID=%d", jid);
- sprintf(env_status, "RECOVERY=%s",
- message == LM_RD_SUCCESS ? "Done" : "Failed");
+ sprintf(env_status, "RECOVERY=%s", msg);
+ if (errno) {
+ sprintf(env_error, "ERROR=%d\n", errno);
+ envp[2] = env_error;
+ }
kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp);
+ if (errno == -EROFS)
+ fs_warn(sdp, "jid=%u: Can't replay: read-only block device\n", jid);
+ fs_info(sdp, "jid=%u: Recovery %s (%d)\n", jid, msg, errno);
}
static int gfs2_recover_get_ref(struct slow_work *work)
@@ -458,6 +686,22 @@ static void gfs2_recover_put_ref(struct slow_work *work)
wake_up_bit(&jd->jd_flags, JDF_RECOVERY);
}
+/**
+ * gfs2_recover_work - The main journal recovery function
+ * @work: The context for the recovery
+ *
+ * There are two reasons why we recover journals. Firstly at mount
+ * time we recover the journal which we are about to use and if we
+ * are the first node to mount the filesystem, we also recover all
+ * the other journals before other nodes are allowed to mount. Once
+ * we are mounted, if a node fails, then this function is scheduled
+ * to recover its journal. We never recover our own journal except at
+ * mount time.
+ *
+ * The results of recovery are logged and also sent to userspace
+ * via a uevent message.
+ */
+
static void gfs2_recover_work(struct slow_work *work)
{
struct gfs2_jdesc *jd = container_of(work, struct gfs2_jdesc, jd_work);
@@ -466,9 +710,8 @@ static void gfs2_recover_work(struct slow_work *work)
struct gfs2_log_header_host head;
struct gfs2_holder j_gh, ji_gh, t_gh;
unsigned long t;
- int ro = 0;
- unsigned int pass;
int error;
+ LIST_HEAD(revoke_list);
if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) {
fs_info(sdp, "jid=%u: Trying to acquire journal lock...\n",
@@ -510,76 +753,63 @@ static void gfs2_recover_work(struct slow_work *work)
if (error)
goto fail_gunlock_ji;
- if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
- fs_info(sdp, "jid=%u: Acquiring the transaction lock...\n",
- jd->jd_jid);
-
- t = jiffies;
-
- /* Acquire a shared hold on the transaction lock */
-
- error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED,
- LM_FLAG_NOEXP | LM_FLAG_PRIORITY |
- GL_NOCACHE, &t_gh);
- if (error)
- goto fail_gunlock_ji;
-
- if (test_bit(SDF_JOURNAL_CHECKED, &sdp->sd_flags)) {
- if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))
- ro = 1;
- } else {
- if (sdp->sd_vfs->s_flags & MS_RDONLY) {
- /* check if device itself is read-only */
- ro = bdev_read_only(sdp->sd_vfs->s_bdev);
- if (!ro) {
- fs_info(sdp, "recovery required on "
- "read-only filesystem.\n");
- fs_info(sdp, "write access will be "
- "enabled during recovery.\n");
- }
- }
- }
+ /* Clean unmount, skip recovery */
+ if (head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)
+ goto fail_gunlock_ji;
- if (ro) {
- fs_warn(sdp, "jid=%u: Can't replay: read-only block "
- "device\n", jd->jd_jid);
- error = -EROFS;
- goto fail_gunlock_tr;
- }
+ fs_info(sdp, "jid=%u: Acquiring the transaction lock...\n", jd->jd_jid);
- fs_info(sdp, "jid=%u: Replaying journal...\n", jd->jd_jid);
+ t = jiffies;
+ error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED,
+ LM_FLAG_NOEXP | LM_FLAG_PRIORITY |
+ GL_NOCACHE, &t_gh);
+ if (error)
+ goto fail_gunlock_ji;
- for (pass = 0; pass < 2; pass++) {
- lops_before_scan(jd, &head, pass);
- error = foreach_descriptor(jd, head.lh_tail,
- head.lh_blkno, pass);
- lops_after_scan(jd, error, pass);
- if (error)
- goto fail_gunlock_tr;
- }
+ error = -EROFS;
+ if (test_bit(SDF_JOURNAL_CHECKED, &sdp->sd_flags)) {
+ if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))
+ goto out_gunlock_tr;
+ } else if (sdp->sd_vfs->s_flags & MS_RDONLY) {
+ /* check if device itself is read-only */
+ if (bdev_read_only(sdp->sd_vfs->s_bdev))
+ goto out_gunlock_tr;
+ fs_info(sdp, "recovery required on read-only filesystem.\n");
+ fs_info(sdp, "write access will be enabled during recovery.\n");
+ }
- error = clean_journal(jd, &head);
- if (error)
- goto fail_gunlock_tr;
+ fs_info(sdp, "jid=%u: Replaying journal...\n", jd->jd_jid);
- gfs2_glock_dq_uninit(&t_gh);
- t = DIV_ROUND_UP(jiffies - t, HZ);
- fs_info(sdp, "jid=%u: Journal replayed in %lus\n",
- jd->jd_jid, t);
+ /* Pass 0: Build revoke list, check block types & lengths */
+ error = foreach_descriptor(jd, &revoke_list, head.lh_tail,
+ head.lh_blkno, recovery_pass0);
+ if (error == 0) {
+ /* Pass 1: Scan metadata & jdata */
+ error = foreach_descriptor(jd, &revoke_list, head.lh_tail,
+ head.lh_blkno, recovery_pass1);
}
- if (jd->jd_jid != sdp->sd_lockstruct.ls_jid)
- gfs2_glock_dq_uninit(&ji_gh);
+ gfs2_revoke_clean(&revoke_list);
+ if (error)
+ goto out_gunlock_tr;
- gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_SUCCESS);
+ /* Write back any changed blocks */
+ error = filemap_fdatawrite(ip->i_inode.i_mapping);
+ if (error)
+ goto out_gunlock_tr;
+ error = filemap_fdatawait(ip->i_inode.i_mapping);
+ if (error)
+ goto out_gunlock_tr;
- if (jd->jd_jid != sdp->sd_lockstruct.ls_jid)
- gfs2_glock_dq_uninit(&j_gh);
+ /* Write a clean, unmount journal header */
+ error = clean_journal(jd, &head);
+ if (error)
+ goto out_gunlock_tr;
- fs_info(sdp, "jid=%u: Done\n", jd->jd_jid);
- return;
+ t = DIV_ROUND_UP(jiffies - t, HZ);
+ fs_info(sdp, "jid=%u: Journal replayed in %lus\n", jd->jd_jid, t);
-fail_gunlock_tr:
+out_gunlock_tr:
gfs2_glock_dq_uninit(&t_gh);
fail_gunlock_ji:
if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) {
@@ -587,11 +817,8 @@ fail_gunlock_ji:
fail_gunlock_j:
gfs2_glock_dq_uninit(&j_gh);
}
-
- fs_info(sdp, "jid=%u: %s\n", jd->jd_jid, (error) ? "Failed" : "Done");
-
fail:
- gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_GAVEUP);
+ gfs2_recovery_done(sdp, jd->jd_jid, error);
}
struct slow_work_ops gfs2_recover_ops = {
diff --git a/fs/gfs2/recovery.h b/fs/gfs2/recovery.h
index 1616ac2..fbc6300 100644
--- a/fs/gfs2/recovery.h
+++ b/fs/gfs2/recovery.h
@@ -12,19 +12,6 @@
#include "incore.h"
-static inline void gfs2_replay_incr_blk(struct gfs2_sbd *sdp, unsigned int *blk)
-{
- if (++*blk == sdp->sd_jdesc->jd_blocks)
- *blk = 0;
-}
-
-extern int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk,
- struct buffer_head **bh);
-
-extern int gfs2_revoke_add(struct gfs2_sbd *sdp, u64 blkno, unsigned int where);
-extern int gfs2_revoke_check(struct gfs2_sbd *sdp, u64 blkno, unsigned int where);
-extern void gfs2_revoke_clean(struct gfs2_sbd *sdp);
-
extern int gfs2_find_jhead(struct gfs2_jdesc *jd,
struct gfs2_log_header_host *head);
extern int gfs2_recover_journal(struct gfs2_jdesc *gfs2_jd);
diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h
index 33e96b0..eca400d 100644
--- a/fs/gfs2/util.h
+++ b/fs/gfs2/util.h
@@ -77,23 +77,6 @@ int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
const char *type, const char *function,
char *file, unsigned int line);
-static inline int gfs2_meta_check_i(struct gfs2_sbd *sdp,
- struct buffer_head *bh,
- const char *function,
- char *file, unsigned int line)
-{
- struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data;
- u32 magic = be32_to_cpu(mh->mh_magic);
- if (unlikely(magic != GFS2_MAGIC))
- return gfs2_meta_check_ii(sdp, bh, "magic number", function,
- file, line);
- return 0;
-}
-
-#define gfs2_meta_check(sdp, bh) \
-gfs2_meta_check_i((sdp), (bh), __func__, __FILE__, __LINE__)
-
-
int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
u16 type, u16 t,
const char *function,
More information about the Cluster-devel
mailing list