linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Duane Griffin" <duaneg@dghda.com>
To: linux-ext4@vger.kernel.org
Cc: linux-kernel@vger.kernel.org, Theodore Tso <tytso@mit.edu>,
	sct@redhat.com, akpm@linux-foundation.org, adilger@clusterfs.com,
	Duane Griffin <duaneg@dghda.com>
Subject: [RFC, PATCH 4/6] jbd: refactor nested journal log recovery loop into separate functions
Date: Thu,  6 Mar 2008 01:59:12 +0000	[thread overview]
Message-ID: <1204768754-29655-5-git-send-email-duaneg@dghda.com> (raw)
Message-ID: <8644b32ddec999bbc1da0ac55ad7b66d0b8176de.1204685366.git.duaneg@dghda.com> (raw)
In-Reply-To: <1204768754-29655-4-git-send-email-duaneg@dghda.com>
In-Reply-To: <a047dfbd7855d5f484ae8a3434f6a073302505be.1204685366.git.duaneg@dghda.com>

The do_one_pass function iterates through the jbd log operating on blocks
depending on the pass. During the replay pass descriptor blocks are processed
by an inner loop which replays them. The nesting makes the code difficult to
follow or to modify. Splitting the inner loop and replay code into separate
functions simplifies matters.

The refactored code no longer unnecessarily reads revoked data blocks, so
potential read errors from such blocks will cause differing behaviour. Aside
from that there should be no functional effect.

Signed-off-by: Duane Griffin <duaneg@dghda.com>
---

Note the TODO before the block read in the outer loop below. We could possibly
attempt to continue after a failed read as we do when replaying descriptor
blocks. However if it was a descriptor block we'd likely bomb out on the next
iteration anyway, so I'm not sure it would be worth it. Thoughts?

 fs/jbd/recovery.c |  243 ++++++++++++++++++++++++++++-------------------------
 1 files changed, 127 insertions(+), 116 deletions(-)

diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c
index 2b8edf4..e2ac78f 100644
--- a/fs/jbd/recovery.c
+++ b/fs/jbd/recovery.c
@@ -307,6 +307,101 @@ int journal_skip_recovery(journal_t *journal)
 	return err;
 }
 
+static int replay_data_block(
+	journal_t *journal, struct buffer_head *obh, char *data,
+	int flags, unsigned long blocknr)
+{
+	struct buffer_head *nbh;
+
+	/* Find a buffer for the new data being restored */
+	nbh = __getblk(journal->j_fs_dev, blocknr, journal->j_blocksize);
+	if (nbh == NULL)
+		return -ENOMEM;
+
+	lock_buffer(nbh);
+	memcpy(nbh->b_data, obh->b_data, journal->j_blocksize);
+	if (flags & JFS_FLAG_ESCAPE)
+		*((__be32 *)data) = cpu_to_be32(JFS_MAGIC_NUMBER);
+
+	BUFFER_TRACE(nbh, "marking dirty");
+	set_buffer_uptodate(nbh);
+	mark_buffer_dirty(nbh);
+	BUFFER_TRACE(nbh, "marking uptodate");
+	/* ll_rw_block(WRITE, 1, &nbh); */
+	unlock_buffer(nbh);
+	brelse(nbh);
+
+	return 0;
+}
+
+/* Replay a descriptor block: write all of the data blocks. */
+static int replay_descriptor_block(
+	journal_t *journal, char *data,
+	unsigned int next_commit_ID,
+	unsigned long *next_log_block,
+	struct recovery_info *info)
+{
+	int err;
+	int success = 0;
+	char *tagp;
+	unsigned long io_block;
+	struct buffer_head *obh;
+	unsigned long next = *next_log_block;
+
+	tagp = &data[sizeof(journal_header_t)];
+	while ((tagp - data + sizeof(journal_block_tag_t)) <=
+	       journal->j_blocksize) {
+		unsigned long blocknr;
+		journal_block_tag_t *tag = (journal_block_tag_t *) tagp;
+		int flags = be32_to_cpu(tag->t_flags);
+
+		io_block = next++;
+		wrap(journal, next);
+		blocknr = be32_to_cpu(tag->t_blocknr);
+
+		/* If the block has been revoked, then we're all done here. */
+		if (journal_test_revoke(journal, blocknr, next_commit_ID)) {
+			++info->nr_revoke_hits;
+			goto skip_write;
+		}
+
+		err = jread(&obh, journal, io_block);
+		if (err) {
+
+			/* Recover what we can, but
+			 * report failure at the end. */
+			success = err;
+			printk(KERN_ERR "JBD: IO error %d recovering "
+			       "block %ld in log\n", err, io_block);
+			continue;
+		}
+		J_ASSERT(obh != NULL);
+
+		err = replay_data_block(journal, obh, data, flags, blocknr);
+		if (err) {
+			brelse(obh);
+			printk(KERN_ERR "JBD: Out of memory during recovery\n");
+			success = err;
+			goto failed;
+		}
+
+		++info->nr_replays;
+		brelse(obh);
+
+skip_write:
+		tagp += sizeof(journal_block_tag_t);
+		if (!(flags & JFS_FLAG_SAME_UUID))
+			tagp += 16;
+
+		if (flags & JFS_FLAG_LAST_TAG)
+			break;
+	}
+
+	*next_log_block = next;
+failed:
+	return success;
+}
+
 static int do_one_pass(journal_t *journal,
 			struct recovery_info *info, enum passtype pass)
 {
@@ -316,8 +411,6 @@ static int do_one_pass(journal_t *journal,
 	journal_superblock_t *	sb;
 	journal_header_t *	tmp;
 	struct buffer_head *	bh;
-	unsigned int		sequence;
-	int			blocktype;
 
 	/* Precompute the maximum metadata descriptors in a descriptor block */
 	int			MAX_BLOCKS_PER_DESC;
@@ -348,11 +441,8 @@ static int do_one_pass(journal_t *journal,
 	 */
 
 	while (1) {
-		int			flags;
-		char *			tagp;
-		journal_block_tag_t *	tag;
-		struct buffer_head *	obh;
-		struct buffer_head *	nbh;
+		int blocktype;
+		unsigned int sequence;
 
 		cond_resched();
 
@@ -371,10 +461,13 @@ static int do_one_pass(journal_t *journal,
 		 * either the next descriptor block or the final commit
 		 * record. */
 
+		/* TODO: Should we continue on error? */
 		jbd_debug(3, "JBD: checking block %ld\n", next_log_block);
 		err = jread(&bh, journal, next_log_block);
-		if (err)
+		if (err) {
+			success = err;
 			goto failed;
+		}
 
 		next_log_block++;
 		wrap(journal, next_log_block);
@@ -406,137 +499,57 @@ static int do_one_pass(journal_t *journal,
 		 * all of the sequence number checks.  What are we going
 		 * to do with it?  That depends on the pass... */
 
-		switch(blocktype) {
+		switch (blocktype) {
 		case JFS_DESCRIPTOR_BLOCK:
 			/* If it is a valid descriptor block, replay it
 			 * in pass REPLAY; otherwise, just skip over the
 			 * blocks it describes. */
-			if (pass != PASS_REPLAY) {
+			if (pass == PASS_REPLAY) {
+				err = replay_descriptor_block(
+					journal,
+					bh->b_data,
+					next_commit_ID,
+					&next_log_block,
+					info);
+			} else {
 				next_log_block +=
 					count_tags(bh, journal->j_blocksize);
 				wrap(journal, next_log_block);
-				brelse(bh);
-				continue;
 			}
-
-			/* A descriptor block: we can now write all of
-			 * the data blocks.  Yay, useful work is finally
-			 * getting done here! */
-
-			tagp = &bh->b_data[sizeof(journal_header_t)];
-			while ((tagp - bh->b_data +sizeof(journal_block_tag_t))
-			       <= journal->j_blocksize) {
-				unsigned long io_block;
-
-				tag = (journal_block_tag_t *) tagp;
-				flags = be32_to_cpu(tag->t_flags);
-
-				io_block = next_log_block++;
-				wrap(journal, next_log_block);
-				err = jread(&obh, journal, io_block);
-				if (err) {
-					/* Recover what we can, but
-					 * report failure at the end. */
-					success = err;
-					printk (KERN_ERR
-						"JBD: IO error %d recovering "
-						"block %ld in log\n",
-						err, io_block);
-				} else {
-					unsigned long blocknr;
-
-					J_ASSERT(obh != NULL);
-					blocknr = be32_to_cpu(tag->t_blocknr);
-
-					/* If the block has been
-					 * revoked, then we're all done
-					 * here. */
-					if (journal_test_revoke
-					    (journal, blocknr,
-					     next_commit_ID)) {
-						brelse(obh);
-						++info->nr_revoke_hits;
-						goto skip_write;
-					}
-
-					/* Find a buffer for the new
-					 * data being restored */
-					nbh = __getblk(journal->j_fs_dev,
-							blocknr,
-							journal->j_blocksize);
-					if (nbh == NULL) {
-						printk(KERN_ERR
-						       "JBD: Out of memory "
-						       "during recovery.\n");
-						err = -ENOMEM;
-						brelse(bh);
-						brelse(obh);
-						goto failed;
-					}
-
-					lock_buffer(nbh);
-					memcpy(nbh->b_data, obh->b_data,
-							journal->j_blocksize);
-					if (flags & JFS_FLAG_ESCAPE) {
-						*((__be32 *)bh->b_data) =
-						cpu_to_be32(JFS_MAGIC_NUMBER);
-					}
-
-					BUFFER_TRACE(nbh, "marking dirty");
-					set_buffer_uptodate(nbh);
-					mark_buffer_dirty(nbh);
-					BUFFER_TRACE(nbh, "marking uptodate");
-					++info->nr_replays;
-					/* ll_rw_block(WRITE, 1, &nbh); */
-					unlock_buffer(nbh);
-					brelse(obh);
-					brelse(nbh);
-				}
-
-			skip_write:
-				tagp += sizeof(journal_block_tag_t);
-				if (!(flags & JFS_FLAG_SAME_UUID))
-					tagp += 16;
-
-				if (flags & JFS_FLAG_LAST_TAG)
-					break;
-			}
-
-			brelse(bh);
-			continue;
+			break;
 
 		case JFS_COMMIT_BLOCK:
 			/* Found an expected commit block: not much to
 			 * do other than move on to the next sequence
 			 * number. */
-			brelse(bh);
 			next_commit_ID++;
-			continue;
+			break;
 
 		case JFS_REVOKE_BLOCK:
 			/* If we aren't in the REVOKE pass, then we can
 			 * just skip over this block. */
-			if (pass != PASS_REVOKE) {
-				brelse(bh);
-				continue;
+			if (pass == PASS_REVOKE) {
+				err = scan_revoke_records(
+					journal, bh, next_commit_ID, info);
 			}
-
-			err = scan_revoke_records(journal, bh,
-						  next_commit_ID, info);
-			brelse(bh);
-			if (err)
-				goto failed;
-			continue;
+			break;
 
 		default:
 			jbd_debug(3, "Unrecognised magic %d, end of scan.\n",
 				  blocktype);
-			brelse(bh);
-			goto done;
+			break;
 		}
+
+		brelse(bh);
+
+		/* Immediately fail on OOM; on other errors try to recover
+		 * as much as we can, but report the failure at the end. */
+		if (err)
+			success = err;
+		if (err == -ENOMEM)
+			goto failed;
 	}
 
- done:
 	/*
 	 * We broke out of the log scan loop: either we came to the
 	 * known end of the log or we found an unexpected block in the
@@ -558,10 +571,8 @@ static int do_one_pass(journal_t *journal,
 		}
 	}
 
-	return success;
-
  failed:
-	return err;
+	return success;
 }
 
 
-- 
1.5.3.7


  parent reply	other threads:[~2008-03-06  2:08 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-03-06  1:59 [RFC, PATCH 0/6] ext3: do not modify data on-disk when mounting read-only filesystem Duane Griffin
     [not found] ` <a047dfbd7855d5f484ae8a3434f6a073302505be.1204685366.git.duaneg@dghda.com>
2008-03-06  1:59   ` [RFC, PATCH 1/6] jbd: eliminate duplicated code in revocation table init/destroy functions Duane Griffin
     [not found]   ` <bd11bfe0ae59094e7a3fbd813dfd877f989f122f.1204685366.git.duaneg@dghda.com>
2008-03-06  1:59     ` [RFC, PATCH 3/6] jbd: only create debugfs entries if cache initialisation is successful Duane Griffin
     [not found]   ` <f4577a0cc4262de0fbff02cae3858d2e2edfaaea.1204685366.git.duaneg@dghda.com>
2008-03-06  1:59     ` [RFC, PATCH 6/6] ext3: do not write to the disk when mounting a dirty read-only filesystem Duane Griffin
2008-03-06  7:17     ` Andreas Dilger
2008-03-06 11:19       ` Duane Griffin
2008-03-11 15:11       ` Jan Kara
2008-03-12  2:42         ` Duane Griffin
2008-03-12 10:53           ` Jan Kara
     [not found]   ` <7fbb2f28dcb417e3173ffacc103932c36683f2f0.1204685366.git.duaneg@dghda.com>
2008-03-06  1:59     ` [RFC, PATCH 2/6] jbd: replace potentially false assertion with if block Duane Griffin
2008-03-08 14:52     ` Christoph Hellwig
     [not found]   ` <8644b32ddec999bbc1da0ac55ad7b66d0b8176de.1204685366.git.duaneg@dghda.com>
2008-03-06  1:59     ` Duane Griffin [this message]
2008-03-08 14:53     ` [RFC, PATCH 4/6] jbd: refactor nested journal log recovery loop into separate functions Christoph Hellwig
2008-03-08 18:40       ` Duane Griffin
2008-03-11 14:35     ` Jan Kara
2008-03-12  1:02       ` Duane Griffin
2008-03-12 10:50         ` Jan Kara
     [not found]   ` <7f095bf2403465433796f2f7aab20f1c9a2e0f73.1204685366.git.duaneg@dghda.com>
2008-03-06  1:59     ` [RFC, PATCH 5/6] jbd: add support for read-only log recovery Duane Griffin
2008-03-11 15:05     ` Jan Kara
2008-03-12  1:40       ` Duane Griffin
2008-03-12 10:51         ` Jan Kara
2008-03-06  3:42 ` [RFC, PATCH 0/6] ext3: do not modify data on-disk when mounting read-only filesystem Andrew Morton
2008-03-06 11:20   ` Duane Griffin
2008-03-13  3:22 ` Daniel Phillips
2008-03-13 12:35   ` Duane Griffin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1204768754-29655-5-git-send-email-duaneg@dghda.com \
    --to=duaneg@dghda.com \
    --cc=adilger@clusterfs.com \
    --cc=akpm@linux-foundation.org \
    --cc=linux-ext4@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=sct@redhat.com \
    --cc=tytso@mit.edu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).