linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Fengguang Wu <wfg@mail.ustc.edu.cn>
To: Andrew Morton <akpm@osdl.org>
Cc: "Cc: Ken Chen" <kenchen@google.com>,
	Mike Waychison <mikew@google.com>,
	Andrew Morton <akpm@linux-foundation.org>
Cc: linux-kernel@vger.kernel.org
Cc: linux-fsdevel@vger.kernel.org
Subject: [PATCH 4/6] check dirty inode list
Date: Sun, 12 Aug 2007 17:11:24 +0800	[thread overview]
Message-ID: <386910468.27672@ustc.edu.cn> (raw)
Message-ID: <20070812092052.983296733@mail.ustc.edu.cn> (raw)
In-Reply-To: 20070812091120.189651872@mail.ustc.edu.cn

[-- Attachment #1: check_dirty_inode_list.patch --]
[-- Type: text/plain, Size: 6004 bytes --]

 From: Andrew Morton <akpm@linux-foundation.org>

The per-superblock dirty-inode list super_block.s_dirty is supposed to be
sorted in reverse order of each inode's time-of-first-dirtying.  This is so
that the kupdate function can avoid having to walk all the dirty inodes on the
list: it terminates the search as soon as it finds an inode which was dirtied
less than 30 seconds ago (dirty_expire_centisecs).

We have a bunch of several-year-old bugs which cause that list to not be in
the correct reverse-time-order.  The result of this is that under certain
obscure circumstances, inodes get stuck and basically never get written back. 
It has been reported a couple of times, but nobody really cared much because
most people use ordered-mode journalling filesystems, which take care of the
writeback independently.  Plus we will _eventually_ get onto these inodes even
when the list is out of order, and a /bin/sync will still work OK.

However this is a pretty important data-integrity issue for filesystems such
as ext2.


As preparation for fixing these bugs, this patch adds a pile of fantastically
expensive debugging code which checks the sanity of the s_dirty list all over
the place, so we find out as soon as it goes bad.

The debugging code is controlled by /proc/sys/fs/inode_debug, which defaults
to off.  The debugging will disable itself whenever it detects a misordering,
to avoid log spew.

We can remove all this code later.

Cc: Mike Waychison <mikew@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---

 fs/fs-writeback.c         |   77 ++++++++++++++++++++++++++++++++++++
 include/linux/writeback.h |    1 
 kernel/sysctl.c           |    8 +++
 3 files changed, 86 insertions(+)

--- linux-2.6.23-rc2-mm2.orig/fs/fs-writeback.c
+++ linux-2.6.23-rc2-mm2/fs/fs-writeback.c
@@ -24,6 +24,75 @@
 #include <linux/buffer_head.h>
 #include "internal.h"
 
+int sysctl_inode_debug __read_mostly;
+
+static int __check(struct list_head *head, int print_stuff)
+{
+	struct list_head *cursor = head;
+	unsigned long dirtied_when = 0;
+
+	while ((cursor = cursor->prev) != head) {
+		struct inode *inode = list_entry(cursor, struct inode, i_list);
+		if (print_stuff) {
+			printk("%p:%lu\n", inode, inode->dirtied_when);
+		} else {
+			if (dirtied_when &&
+			    time_before(inode->dirtied_when, dirtied_when))
+				return 1;
+			dirtied_when = inode->dirtied_when;
+		}
+	}
+	return 0;
+}
+
+static void __check_dirty_inode_list(struct super_block *sb,
+			struct inode *inode, const char *file, int line)
+{
+	if (!sysctl_inode_debug)
+		return;
+
+	if (__check(&sb->s_dirty, 0)) {
+		sysctl_inode_debug = 0;
+		if (inode)
+			printk("%s:%d: s_dirty got screwed up.  inode=%p:%lu\n",
+					file, line, inode, inode->dirtied_when);
+		else
+			printk("%s:%d: s_dirty got screwed up\n", file, line);
+		__check(&sb->s_dirty, 1);
+	}
+	if (__check(&sb->s_io, 0)) {
+		sysctl_inode_debug = 0;
+		if (inode)
+			printk("%s:%d: s_io got screwed up.  inode=%p:%lu\n",
+					file, line, inode, inode->dirtied_when);
+		else
+			printk("%s:%d: s_io got screwed up\n", file, line);
+		__check(&sb->s_io, 1);
+	}
+	if (__check(&sb->s_more_io, 0)) {
+		sysctl_inode_debug = 0;
+		if (inode)
+			printk("%s:%d: s_more_io got screwed up.  inode=%p:%lu\n",
+					file, line, inode, inode->dirtied_when);
+		else
+			printk("%s:%d: s_more_io got screwed up\n", file, line);
+		__check(&sb->s_more_io, 1);
+	}
+}
+
+#define check_dirty_inode_list(sb)					\
+	do {								\
+		if (unlikely(sysctl_inode_debug))			\
+		__check_dirty_inode_list(sb, NULL, __FILE__, __LINE__);	\
+	} while (0)
+
+#define check_dirty_inode(inode)					\
+	do {								\
+		if (unlikely(sysctl_inode_debug))			\
+			__check_dirty_inode_list(inode->i_sb, inode,	\
+						__FILE__, __LINE__);	\
+	} while (0)
+
 /**
  *	__mark_inode_dirty -	internal function
  *	@inode: inode to mark
@@ -122,8 +191,10 @@ void __mark_inode_dirty(struct inode *in
 		 * reposition it (that would break s_dirty time-ordering).
 		 */
 		if (!was_dirty) {
+			check_dirty_inode(inode);
 			inode->dirtied_when = jiffies;
 			list_move(&inode->i_list, &sb->s_dirty);
+			check_dirty_inode(inode);
 		}
 	}
 out:
@@ -152,6 +223,7 @@ static void redirty_tail(struct inode *i
 {
 	struct super_block *sb = inode->i_sb;
 
+	check_dirty_inode(inode);
 	if (!list_empty(&sb->s_dirty)) {
 		struct inode *tail_inode;
 
@@ -161,6 +233,7 @@ static void redirty_tail(struct inode *i
 			inode->dirtied_when = jiffies;
 	}
 	list_move(&inode->i_list, &sb->s_dirty);
+	check_dirty_inode(inode);
 }
 
 /*
@@ -168,7 +241,9 @@ static void redirty_tail(struct inode *i
  */
 static void requeue_io(struct inode *inode)
 {
+	check_dirty_inode(inode);
 	list_move(&inode->i_list, &inode->i_sb->s_more_io);
+	check_dirty_inode(inode);
 }
 
 static void inode_sync_complete(struct inode *inode)
@@ -463,8 +538,10 @@ int generic_sync_sb_inodes(struct super_
 		if (!ret)
 			ret = err;
 		if (wbc->sync_mode == WB_SYNC_HOLD) {
+			check_dirty_inode(inode);
 			inode->dirtied_when = jiffies;
 			list_move(&inode->i_list, &sb->s_dirty);
+			check_dirty_inode(inode);
 		}
 		if (current_is_pdflush())
 			writeback_release(bdi);
--- linux-2.6.23-rc2-mm2.orig/include/linux/writeback.h
+++ linux-2.6.23-rc2-mm2/include/linux/writeback.h
@@ -140,5 +140,6 @@ void writeback_set_ratelimit(void);
 extern int nr_pdflush_threads;	/* Global so it can be exported to sysctl
 				   read-only. */
 
+extern int sysctl_inode_debug;
 
 #endif		/* WRITEBACK_H */
--- linux-2.6.23-rc2-mm2.orig/kernel/sysctl.c
+++ linux-2.6.23-rc2-mm2/kernel/sysctl.c
@@ -1238,6 +1238,14 @@ static struct ctl_table fs_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "inode_debug",
+		.data		= &sysctl_inode_debug,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
 	{
 		.ctl_name	= CTL_UNNUMBERED,

-- 

  parent reply	other threads:[~2007-08-12  9:23 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <20070812091120.189651872@mail.ustc.edu.cn>
2007-08-12  9:11 ` [PATCH 0/6] writeback time order/delay fixes take 3 Fengguang Wu
2007-08-22  0:23   ` Chris Mason
     [not found]     ` <20070822011841.GA8090@mail.ustc.edu.cn>
2007-08-22  1:18       ` Fengguang Wu
2007-08-22 12:42         ` Chris Mason
2007-08-23  2:47           ` David Chinner
2007-08-23 12:13             ` Chris Mason
     [not found]               ` <20070824125643.GB7933@mail.ustc.edu.cn>
2007-08-24 12:56                 ` Fengguang Wu
     [not found]           ` <20070824132458.GC7933@mail.ustc.edu.cn>
2007-08-24 13:24             ` Fengguang Wu
2007-08-24 14:36               ` Chris Mason
2007-08-23  2:33       ` David Chinner
     [not found]         ` <20070824135504.GA9029@mail.ustc.edu.cn>
2007-08-24 13:55           ` Fengguang Wu
2007-08-28 14:55           ` David Chinner
2007-08-28 15:08             ` Chris Mason
2007-08-28 16:33               ` David Chinner
2007-08-28 16:57                 ` Chris Mason
     [not found]                 ` <20070829075330.GA5960@mail.ustc.edu.cn>
2007-08-29  7:53                   ` Fengguang Wu
     [not found] ` <20070812092052.558804846@mail.ustc.edu.cn>
2007-08-12  9:11   ` [PATCH 1/6] writeback: fix time ordering of the per superblock inode lists 8 Fengguang Wu
     [not found] ` <20070812092052.704326603@mail.ustc.edu.cn>
2007-08-12  9:11   ` [PATCH 2/6] writeback: fix ntfs with sb_has_dirty_inodes() Fengguang Wu
     [not found] ` <20070812092052.983296733@mail.ustc.edu.cn>
2007-08-12  9:11   ` Fengguang Wu [this message]
     [not found] ` <20070812092053.113127445@mail.ustc.edu.cn>
2007-08-12  9:11   ` [PATCH 5/6] prevent time-ordering warnings Fengguang Wu
     [not found] ` <20070812092053.242474484@mail.ustc.edu.cn>
2007-08-12  9:11   ` [PATCH 6/6] track redirty_tail() calls Fengguang Wu
     [not found] ` <20070812092052.848213359@mail.ustc.edu.cn>
2007-08-12  9:11   ` [PATCH 3/6] writeback: remove pages_skipped accounting in __block_write_full_page() Fengguang Wu
2007-08-13  1:03   ` David Chinner
     [not found]     ` <20070813103000.GA8520@mail.ustc.edu.cn>
2007-08-13 10:30       ` Fengguang Wu
     [not found]       ` <20070817071317.GA8965@mail.ustc.edu.cn>
2007-08-17  7:13         ` Fengguang Wu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=386910468.27672@ustc.edu.cn \
    --to=wfg@mail.ustc.edu.cn \
    --cc=akpm@linux-foundation.org \
    --cc=akpm@osdl.org \
    --cc=kenchen@google.com \
    --cc=mikew@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).