This is the mail archive of the
cluster-cvs@sourceware.org
mailing list for the cluster.
cluster: STABLE2 - GFS2: gfs2_fsck should fix journal sequence numberproblems
- From: Bob Peterson <rpeterso at fedoraproject dot org>
- To: cluster-cvs-relay at redhat dot com
- Date: Fri, 1 May 2009 21:37:48 +0000 (UTC)
- Subject: cluster: STABLE2 - GFS2: gfs2_fsck should fix journal sequence numberproblems
Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=67dde3066222a0cc63673e942e3b8f81f95a9203
Commit: 67dde3066222a0cc63673e942e3b8f81f95a9203
Parent: d533275202980637a257dd90c459433f04cdc97f
Author: Bob Peterson <rpeterso@redhat.com>
AuthorDate: Fri May 1 16:36:48 2009 -0500
Committer: Bob Peterson <rpeterso@redhat.com>
CommitterDate: Fri May 1 16:36:48 2009 -0500
GFS2: gfs2_fsck should fix journal sequence number problems
bz 498646
Due to bug #457557 or if a customer accidentally mounts their
file system as "lock_nolock" from two nodes, the journals
can have sequence number problems. These sequence number
problems prevent gfs2 from ever mounting the file system again.
Therefore, this enables gfs2_fsck to detect and repair most
sequence number problems.
---
gfs2/fsck/fs_recovery.c | 76 +++++++++++++++++++++++++++++++++++++++++++++--
1 files changed, 73 insertions(+), 3 deletions(-)
diff --git a/gfs2/fsck/fs_recovery.c b/gfs2/fsck/fs_recovery.c
index c8bf973..5c942b9 100644
--- a/gfs2/fsck/fs_recovery.c
+++ b/gfs2/fsck/fs_recovery.c
@@ -321,6 +321,59 @@ int foreach_descriptor(struct gfs2_inode *ip, unsigned int start,
}
/**
+ * fix_journal_seq_no - Fix log header sequencing problems
+ * @ip: the journal incore inode
+ */
+int fix_journal_seq_no(struct gfs2_inode *ip)
+{
+ int error = 0, wrapped = 0;
+ uint32_t jd_blocks = ip->i_di.di_size / ip->i_sbd->sd_sb.sb_bsize;
+ uint32_t blk;
+ struct gfs2_log_header lh;
+ uint64_t highest_seq = 0, lowest_seq = 0, prev_seq = 0;
+ int new = 0;
+ uint64_t dblock;
+ uint32_t extlen;
+ struct gfs2_buffer_head *bh;
+
+ for (blk = 0; blk < jd_blocks; blk++) {
+ error = get_log_header(ip, blk, &lh);
+ if (error == 1) /* if not a log header */
+ continue; /* just journal data--ignore it */
+ if (!lowest_seq || lh.lh_sequence < lowest_seq)
+ lowest_seq = lh.lh_sequence;
+ if (!highest_seq || lh.lh_sequence > highest_seq)
+ highest_seq = lh.lh_sequence;
+ if (lh.lh_sequence > prev_seq) {
+ prev_seq = lh.lh_sequence;
+ continue;
+ }
+ /* The sequence number is not higher than the previous one,
+ so it's either wrap-around or a sequencing problem. */
+ if (!wrapped && lh.lh_sequence == lowest_seq) {
+ wrapped = 1;
+ prev_seq = lh.lh_sequence;
+ continue;
+ }
+ log_err("Journal block %u (0x%x): sequence no. 0x%llx out of "
+ "order.\n", blk, blk, lh.lh_sequence);
+ log_info("Low: 0x%llx, High: 0x%llx, Prev: 0x%llx\n",
+ (unsigned long long)lowest_seq,
+ (unsigned long long)highest_seq,
+ (unsigned long long)prev_seq);
+ highest_seq++;
+ lh.lh_sequence = highest_seq;
+ prev_seq = lh.lh_sequence;
+ log_warn("Renumbering it as 0x%llx\n", lh.lh_sequence);
+ block_map(ip, blk, &new, &dblock, &extlen, FALSE, not_updated);
+ bh = bread(&ip->i_sbd->buf_list, dblock);
+ gfs2_log_header_out(&lh, bh->b_data);
+ brelse(bh, updated);
+ }
+ return 0;
+}
+
+/**
* gfs2_recover_journal - recovery a given journal
* @ip: the journal incore inode
*
@@ -341,9 +394,26 @@ int gfs2_recover_journal(struct gfs2_inode *ip, int j)
osi_list_init(&sd_revoke_list);
error = gfs2_find_jhead(ip, &head);
- if (error)
- goto out;
-
+ if (error) {
+ if (!query(&opts, "\nJournal #%d (\"journal%d\") is corrupt. "
+ "Okay to repair it? (y/n)", j+1, j)) {
+ log_err("jid=%u: The journal was not repaired.\n", j);
+ goto out;
+ }
+ log_info("jid=%u: Repairing journal...\n", j);
+ error = fix_journal_seq_no(ip);
+ if (error) {
+ log_err("jid=%u: Unable to repair the bad journal.\n",
+ j);
+ goto out;
+ }
+ error = gfs2_find_jhead(ip, &head);
+ if (error) {
+ log_err("jid=%u: Unable to fix the bad journal.\n", j);
+ goto out;
+ }
+ log_err("jid=%u: The journal was successfully fixed.\n", j);
+ }
if (head.lh_flags & GFS2_LOG_HEAD_UNMOUNT) {
log_info("jid=%u: Journal is clean.\n", j);
return 0;