All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/3] fs: peel back the inode_lock some more
@ 2010-10-28 11:42 Dave Chinner
  2010-10-28 11:42 ` [PATCH 1/3] fs: move i_sb_list out from under inode_lock Dave Chinner
                   ` (3 more replies)
  0 siblings, 4 replies; 9+ messages in thread
From: Dave Chinner @ 2010-10-28 11:42 UTC (permalink / raw)
  To: viro; +Cc: linux-fsdevel, linux-kernel

Hi Al,

These as the first versions of the patches that remove the inode
lock from the writeback lists, superblock inode lists and the inode
hash table. I haven't polished them at all, but they appear to work
ok so I thought I'd send them out for comments before I went to bed...

With these patches the inode_lock pretty much disappears from my
lock_stat profiles, with the writeback list lock being the most
contended of the replacements on an 8-way machine.

Cheers,

Dave.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 1/3] fs: move i_sb_list out from under inode_lock
  2010-10-28 11:42 [PATCH 0/3] fs: peel back the inode_lock some more Dave Chinner
@ 2010-10-28 11:42 ` Dave Chinner
  2010-10-28 14:11   ` Christoph Hellwig
  2010-10-28 11:42 ` [PATCH 2/3] fs: move i_wb_list " Dave Chinner
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 9+ messages in thread
From: Dave Chinner @ 2010-10-28 11:42 UTC (permalink / raw)
  To: viro; +Cc: linux-fsdevel, linux-kernel

From: Dave Chinner <dchinner@redhat.com>

Protect the per-sb inode list with a new global lock
inode_sb_list_lock and use it to protect the list manipulations and
traversals. This lock replaces the inode_lock as the inodes on the
list can be validity checked while holding the inode->i_lock and
hence the inode_lock is no longer needed to protect the list.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
 fs/drop_caches.c       |    9 +++++----
 fs/fs-writeback.c      |   21 +++++++++++----------
 fs/inode.c             |   43 +++++++++++++++++++++++--------------------
 fs/internal.h          |    2 ++
 fs/notify/inode_mark.c |   15 ++++++++-------
 fs/quota/dquot.c       |   28 ++++++++++++++++------------
 6 files changed, 65 insertions(+), 53 deletions(-)

diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index 62dd8ee..86cd2f1 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -8,6 +8,7 @@
 #include <linux/writeback.h>
 #include <linux/sysctl.h>
 #include <linux/gfp.h>
+#include "internal.h"
 
 /* A global variable is a bit ugly, but it keeps the code simple */
 int sysctl_drop_caches;
@@ -16,7 +17,7 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
 {
 	struct inode *inode, *toput_inode = NULL;
 
-	spin_lock(&inode_lock);
+	spin_lock(&inode_sb_list_lock);
 	list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
 		spin_lock(&inode->i_lock);
 		if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
@@ -26,13 +27,13 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
 		}
 		__iget(inode);
 		spin_unlock(&inode->i_lock);
-		spin_unlock(&inode_lock);
+		spin_unlock(&inode_sb_list_lock);
 		invalidate_mapping_pages(inode->i_mapping, 0, -1);
 		iput(toput_inode);
 		toput_inode = inode;
-		spin_lock(&inode_lock);
+		spin_lock(&inode_sb_list_lock);
 	}
-	spin_unlock(&inode_lock);
+	spin_unlock(&inode_sb_list_lock);
 	iput(toput_inode);
 }
 
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 50de4f1..7707a62 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1053,7 +1053,7 @@ static void wait_sb_inodes(struct super_block *sb)
 	 */
 	WARN_ON(!rwsem_is_locked(&sb->s_umount));
 
-	spin_lock(&inode_lock);
+	spin_lock(&inode_sb_list_lock);
 
 	/*
 	 * Data integrity sync. Must wait for all pages under writeback,
@@ -1073,14 +1073,15 @@ static void wait_sb_inodes(struct super_block *sb)
 		}
 		__iget(inode);
 		spin_unlock(&inode->i_lock);
-		spin_unlock(&inode_lock);
+		spin_unlock(&inode_sb_list_lock);
+
 		/*
-		 * We hold a reference to 'inode' so it couldn't have
-		 * been removed from s_inodes list while we dropped the
-		 * inode_lock.  We cannot iput the inode now as we can
-		 * be holding the last reference and we cannot iput it
-		 * under inode_lock. So we keep the reference and iput
-		 * it later.
+		 * We hold a reference to 'inode' so it couldn't have been
+		 * removed from s_inodes list while we dropped the
+		 * inode_sb_list_lock.  We cannot iput the inode now as we can
+		 * be holding the last reference and we cannot iput it under
+		 * inode_sb_list_lock. So we keep the reference and iput it
+		 * later.
 		 */
 		iput(old_inode);
 		old_inode = inode;
@@ -1089,9 +1090,9 @@ static void wait_sb_inodes(struct super_block *sb)
 
 		cond_resched();
 
-		spin_lock(&inode_lock);
+		spin_lock(&inode_sb_list_lock);
 	}
-	spin_unlock(&inode_lock);
+	spin_unlock(&inode_sb_list_lock);
 	iput(old_inode);
 }
 
diff --git a/fs/inode.c b/fs/inode.c
index f64aeda..44f28dd 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -32,10 +32,15 @@
  *   inode->i_state, __iget()
  * inode_lru_lock protects:
  *   inode_lru, inode->i_lru
+ * inode_sb_list_lock protects:
+ *   sb->s_inodes, inode->i_sb_list
  *
  * Lock ordering:
  * inode_lock
  *   inode->i_lock
+ *
+ * inode_sb_list_lock
+ *   inode->i_lock
  *     inode_lru_lock
  */
 
@@ -97,6 +102,8 @@ static struct hlist_head *inode_hashtable __read_mostly;
  */
 DEFINE_SPINLOCK(inode_lock);
 
+__cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_sb_list_lock);
+
 /*
  * iprune_sem provides exclusion between the kswapd or try_to_free_pages
  * icache shrinking path, and the umount path.  Without this exclusion,
@@ -365,26 +372,23 @@ static void inode_lru_list_del(struct inode *inode)
 	spin_unlock(&inode_lru_lock);
 }
 
-static inline void __inode_sb_list_add(struct inode *inode)
-{
-	list_add(&inode->i_sb_list, &inode->i_sb->s_inodes);
-}
-
 /**
  * inode_sb_list_add - add inode to the superblock list of inodes
  * @inode: inode to add
  */
 void inode_sb_list_add(struct inode *inode)
 {
-	spin_lock(&inode_lock);
-	__inode_sb_list_add(inode);
-	spin_unlock(&inode_lock);
+	spin_lock(&inode_sb_list_lock);
+	list_add(&inode->i_sb_list, &inode->i_sb->s_inodes);
+	spin_unlock(&inode_sb_list_lock);
 }
 EXPORT_SYMBOL_GPL(inode_sb_list_add);
 
-static inline void __inode_sb_list_del(struct inode *inode)
+static inline void inode_sb_list_del(struct inode *inode)
 {
+	spin_lock(&inode_sb_list_lock);
 	list_del_init(&inode->i_sb_list);
+	spin_unlock(&inode_sb_list_lock);
 }
 
 static unsigned long hash(struct super_block *sb, unsigned long hashval)
@@ -460,9 +464,10 @@ static void evict(struct inode *inode)
 
 	spin_lock(&inode_lock);
 	list_del_init(&inode->i_wb_list);
-	__inode_sb_list_del(inode);
 	spin_unlock(&inode_lock);
 
+	inode_sb_list_del(inode);
+
 	if (op->evict_inode) {
 		op->evict_inode(inode);
 	} else {
@@ -516,7 +521,7 @@ void evict_inodes(struct super_block *sb)
 
 	down_write(&iprune_sem);
 
-	spin_lock(&inode_lock);
+	spin_lock(&inode_sb_list_lock);
 	list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
 		if (atomic_read(&inode->i_count))
 			continue;
@@ -534,7 +539,7 @@ void evict_inodes(struct super_block *sb)
 
 		list_add(&inode->i_lru, &dispose);
 	}
-	spin_unlock(&inode_lock);
+	spin_unlock(&inode_sb_list_lock);
 
 	dispose_list(&dispose);
 	up_write(&iprune_sem);
@@ -555,7 +560,7 @@ int invalidate_inodes(struct super_block *sb)
 
 	down_write(&iprune_sem);
 
-	spin_lock(&inode_lock);
+	spin_lock(&inode_sb_list_lock);
 	list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
 		spin_lock(&inode->i_lock);
 		if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
@@ -574,7 +579,7 @@ int invalidate_inodes(struct super_block *sb)
 
 		list_add(&inode->i_lru, &dispose);
 	}
-	spin_unlock(&inode_lock);
+	spin_unlock(&inode_sb_list_lock);
 
 	dispose_list(&dispose);
 	up_write(&iprune_sem);
@@ -839,16 +844,14 @@ struct inode *new_inode(struct super_block *sb)
 {
 	struct inode *inode;
 
-	spin_lock_prefetch(&inode_lock);
+	spin_lock_prefetch(&inode_sb_list_lock);
 
 	inode = alloc_inode(sb);
 	if (inode) {
-		spin_lock(&inode_lock);
 		spin_lock(&inode->i_lock);
 		inode->i_state = 0;
 		spin_unlock(&inode->i_lock);
-		__inode_sb_list_add(inode);
-		spin_unlock(&inode_lock);
+		inode_sb_list_add(inode);
 	}
 	return inode;
 }
@@ -918,7 +921,7 @@ static struct inode *get_new_inode(struct super_block *sb,
 			inode->i_state = I_NEW;
 			spin_unlock(&inode->i_lock);
 			hlist_add_head(&inode->i_hash, head);
-			__inode_sb_list_add(inode);
+			inode_sb_list_add(inode);
 			spin_unlock(&inode_lock);
 
 			/* Return the locked inode with I_NEW set, the
@@ -967,7 +970,7 @@ static struct inode *get_new_inode_fast(struct super_block *sb,
 			inode->i_state = I_NEW;
 			spin_unlock(&inode->i_lock);
 			hlist_add_head(&inode->i_hash, head);
-			__inode_sb_list_add(inode);
+			inode_sb_list_add(inode);
 			spin_unlock(&inode_lock);
 
 			/* Return the locked inode with I_NEW set, the
diff --git a/fs/internal.h b/fs/internal.h
index ebad3b9..493baa1 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -108,3 +108,5 @@ extern void release_open_intent(struct nameidata *);
 extern int get_nr_dirty_inodes(void);
 extern int evict_inodes(struct super_block *);
 extern int invalidate_inodes(struct super_block *);
+
+extern spinlock_t inode_sb_list_lock;
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index 9c0fb09..06c8216 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -29,6 +29,8 @@
 #include <linux/fsnotify_backend.h>
 #include "fsnotify.h"
 
+#include "../internal.h"
+
 /*
  * Recalculate the mask of events relevant to a given inode locked.
  */
@@ -232,15 +234,14 @@ out:
  * fsnotify_unmount_inodes - an sb is unmounting.  handle any watched inodes.
  * @list: list of inodes being unmounted (sb->s_inodes)
  *
- * Called with inode_lock held, protecting the unmounting super block's list
- * of inodes, and with iprune_mutex held, keeping shrink_icache_memory() at bay.
- * We temporarily drop inode_lock, however, and CAN block.
+ * Called during unmount with no locks held, so needs to be safe against
+ * concurrent modifiers. We temporarily drop inode_sb_list_lock and CAN block.
  */
 void fsnotify_unmount_inodes(struct list_head *list)
 {
 	struct inode *inode, *next_i, *need_iput = NULL;
 
-	spin_lock(&inode_lock);
+	spin_lock(&inode_sb_list_lock);
 	list_for_each_entry_safe(inode, next_i, list, i_sb_list) {
 		struct inode *need_iput_tmp;
 
@@ -293,7 +294,7 @@ void fsnotify_unmount_inodes(struct list_head *list)
 		 * will be added since the umount has begun.  Finally,
 		 * iprune_mutex keeps shrink_icache_memory() away.
 		 */
-		spin_unlock(&inode_lock);
+		spin_unlock(&inode_sb_list_lock);
 
 		if (need_iput_tmp)
 			iput(need_iput_tmp);
@@ -305,7 +306,7 @@ void fsnotify_unmount_inodes(struct list_head *list)
 
 		iput(inode);
 
-		spin_lock(&inode_lock);
+		spin_lock(&inode_sb_list_lock);
 	}
-	spin_unlock(&inode_lock);
+	spin_unlock(&inode_sb_list_lock);
 }
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index a2d89f8..440f64c 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -76,7 +76,7 @@
 #include <linux/buffer_head.h>
 #include <linux/capability.h>
 #include <linux/quotaops.h>
-#include <linux/writeback.h> /* for inode_lock, oddly enough.. */
+#include "../internal.h" /* ugh */
 
 #include <asm/uaccess.h>
 
@@ -896,7 +896,7 @@ static void add_dquot_ref(struct super_block *sb, int type)
 	int reserved = 0;
 #endif
 
-	spin_lock(&inode_lock);
+	spin_lock(&inode_sb_list_lock);
 	list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
 		spin_lock(&inode->i_lock);
 		if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
@@ -911,19 +911,23 @@ static void add_dquot_ref(struct super_block *sb, int type)
 #endif
 		__iget(inode);
 		spin_unlock(&inode->i_lock);
-		spin_unlock(&inode_lock);
+		spin_unlock(&inode_sb_list_lock);
 
 		iput(old_inode);
 		__dquot_initialize(inode, type);
-		/* We hold a reference to 'inode' so it couldn't have been
-		 * removed from s_inodes list while we dropped the inode_lock.
-		 * We cannot iput the inode now as we can be holding the last
-		 * reference and we cannot iput it under inode_lock. So we
-		 * keep the reference and iput it later. */
+
+		/*
+		 * We hold a reference to 'inode' so it couldn't have been
+		 * removed from s_inodes list while we dropped the
+		 * inode_sb_list_lock We cannot iput the inode now as we can be
+		 * holding the last reference and we cannot iput it under
+		 * inode_sb_list_lock. So we keep the reference and iput it
+		 * later.
+		 */
 		old_inode = inode;
-		spin_lock(&inode_lock);
+		spin_lock(&inode_sb_list_lock);
 	}
-	spin_unlock(&inode_lock);
+	spin_unlock(&inode_sb_list_lock);
 	iput(old_inode);
 
 #ifdef CONFIG_QUOTA_DEBUG
@@ -1004,7 +1008,7 @@ static void remove_dquot_ref(struct super_block *sb, int type,
 	struct inode *inode;
 	int reserved = 0;
 
-	spin_lock(&inode_lock);
+	spin_lock(&inode_sb_list_lock);
 	list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
 		/*
 		 *  We have to scan also I_NEW inodes because they can already
@@ -1018,7 +1022,7 @@ static void remove_dquot_ref(struct super_block *sb, int type,
 			remove_inode_dquot_ref(inode, type, tofree_head);
 		}
 	}
-	spin_unlock(&inode_lock);
+	spin_unlock(&inode_sb_list_lock);
 #ifdef CONFIG_QUOTA_DEBUG
 	if (reserved) {
 		printk(KERN_WARNING "VFS (%s): Writes happened after quota"
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 2/3] fs: move i_wb_list out from under inode_lock
  2010-10-28 11:42 [PATCH 0/3] fs: peel back the inode_lock some more Dave Chinner
  2010-10-28 11:42 ` [PATCH 1/3] fs: move i_sb_list out from under inode_lock Dave Chinner
@ 2010-10-28 11:42 ` Dave Chinner
  2010-10-28 14:19   ` Christoph Hellwig
  2010-10-28 11:42 ` [PATCH 3/3] fs: move i_hash " Dave Chinner
  2010-10-28 12:00 ` [PATCH 0/3] fs: peel back the inode_lock some more Christoph Hellwig
  3 siblings, 1 reply; 9+ messages in thread
From: Dave Chinner @ 2010-10-28 11:42 UTC (permalink / raw)
  To: viro; +Cc: linux-fsdevel, linux-kernel

From: Dave Chinner <dchinner@redhat.com>

Protect the inode writeback list with a new global lock
inode_wb_list_lock and use it to protect the list manipulations and
traversals. This lock replaces the inode_lock as the inodes on the
list can be validity checked while holding the inode->i_lock and
hence the inode_lock is no longer needed to protect the list.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
 fs/block_dev.c            |    4 +-
 fs/fs-writeback.c         |  152 ++++++++++++++++++++++++---------------------
 fs/inode.c                |   12 +++-
 fs/internal.h             |    7 ++-
 include/linux/writeback.h |    1 +
 mm/backing-dev.c          |    8 +-
 6 files changed, 103 insertions(+), 81 deletions(-)

diff --git a/fs/block_dev.c b/fs/block_dev.c
index dea3b62..a94cbf0 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -56,11 +56,11 @@ EXPORT_SYMBOL(I_BDEV);
 static void bdev_inode_switch_bdi(struct inode *inode,
 			struct backing_dev_info *dst)
 {
-	spin_lock(&inode_lock);
+	spin_lock(&inode_wb_list_lock);
 	inode->i_data.backing_dev_info = dst;
 	if (inode->i_state & I_DIRTY)
 		list_move(&inode->i_wb_list, &dst->wb.b_dirty);
-	spin_unlock(&inode_lock);
+	spin_unlock(&inode_wb_list_lock);
 }
 
 static sector_t max_block(struct block_device *bdev)
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 7707a62..5b7cb95 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -162,6 +162,17 @@ void bdi_start_background_writeback(struct backing_dev_info *bdi)
 }
 
 /*
+ * Remove the inode from the writeback list it is on.
+ */
+void inode_wb_list_del(struct inode *inode)
+{
+	spin_lock(&inode_wb_list_lock);
+	list_del_init(&inode->i_wb_list);
+	spin_unlock(&inode_wb_list_lock);
+}
+
+
+/*
  * Redirty an inode: set its when-it-was dirtied timestamp and move it to the
  * furthest end of its superblock's dirty-inode list.
  *
@@ -174,6 +185,7 @@ static void redirty_tail(struct inode *inode)
 {
 	struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
 
+	assert_spin_locked(&inode_wb_list_lock);
 	if (!list_empty(&wb->b_dirty)) {
 		struct inode *tail;
 
@@ -191,14 +203,17 @@ static void requeue_io(struct inode *inode)
 {
 	struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
 
+	assert_spin_locked(&inode_wb_list_lock);
 	list_move(&inode->i_wb_list, &wb->b_more_io);
 }
 
 static void inode_sync_complete(struct inode *inode)
 {
 	/*
-	 * Prevent speculative execution through spin_unlock(&inode_lock);
+	 * Prevent speculative execution through
+	 * spin_unlock(&inode_wb_list_lock);
 	 */
+
 	smp_mb();
 	wake_up_bit(&inode->i_state, __I_SYNC);
 }
@@ -272,6 +287,7 @@ static void move_expired_inodes(struct list_head *delaying_queue,
  */
 static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this)
 {
+	assert_spin_locked(&inode_wb_list_lock);
 	list_splice_init(&wb->b_more_io, &wb->b_io);
 	move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this);
 }
@@ -294,25 +310,23 @@ static void inode_wait_for_writeback(struct inode *inode)
 	wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
 	 while (inode->i_state & I_SYNC) {
 		spin_unlock(&inode->i_lock);
-		spin_unlock(&inode_lock);
+		spin_unlock(&inode_wb_list_lock);
 		__wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE);
-		spin_lock(&inode_lock);
+		spin_lock(&inode_wb_list_lock);
 		spin_lock(&inode->i_lock);
 	}
 }
 
 /*
- * Write out an inode's dirty pages.  Called under inode_lock.  Either the
- * caller has ref on the inode (either via __iget or via syscall against an fd)
- * or the inode has I_WILL_FREE set (via generic_forget_inode)
+ * Write out an inode's dirty pages.  Called under inode_wb_list_lock.  Either
+ * the caller has ref on the inode (either via __iget or via syscall against an
+ * fd) or the inode has I_WILL_FREE set.
  *
  * If `wait' is set, wait on the writeout.
  *
  * The whole writeout design is quite complex and fragile.  We want to avoid
  * starvation of particular inodes when others are being redirtied, prevent
  * livelocks, etc.
- *
- * Called under inode_lock.
  */
 static int
 writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
@@ -354,7 +368,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
 	inode->i_state |= I_SYNC;
 	inode->i_state &= ~I_DIRTY_PAGES;
 	spin_unlock(&inode->i_lock);
-	spin_unlock(&inode_lock);
+	spin_unlock(&inode_wb_list_lock);
 
 	ret = do_writepages(mapping, wbc);
 
@@ -374,12 +388,10 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
 	 * due to delalloc, clear dirty metadata flags right before
 	 * write_inode()
 	 */
-	spin_lock(&inode_lock);
 	spin_lock(&inode->i_lock);
 	dirty = inode->i_state & I_DIRTY;
 	inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC);
 	spin_unlock(&inode->i_lock);
-	spin_unlock(&inode_lock);
 	/* Don't write the inode if only I_DIRTY_PAGES was set */
 	if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
 		int err = write_inode(inode, wbc);
@@ -387,7 +399,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
 			ret = err;
 	}
 
-	spin_lock(&inode_lock);
+	spin_lock(&inode_wb_list_lock);
 	spin_lock(&inode->i_lock);
 	inode->i_state &= ~I_SYNC;
 	if (!(inode->i_state & I_FREEING)) {
@@ -529,10 +541,10 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
 			 */
 			redirty_tail(inode);
 		}
-		spin_unlock(&inode_lock);
+		spin_unlock(&inode_wb_list_lock);
 		iput(inode);
 		cond_resched();
-		spin_lock(&inode_lock);
+		spin_lock(&inode_wb_list_lock);
 		if (wbc->nr_to_write <= 0) {
 			wbc->more_io = 1;
 			return 1;
@@ -551,7 +563,7 @@ void writeback_inodes_wb(struct bdi_writeback *wb,
 
 	if (!wbc->wb_start)
 		wbc->wb_start = jiffies; /* livelock avoidance */
-	spin_lock(&inode_lock);
+	spin_lock(&inode_wb_list_lock);
 	if (!wbc->for_kupdate || list_empty(&wb->b_io))
 		queue_io(wb, wbc->older_than_this);
 
@@ -569,7 +581,7 @@ void writeback_inodes_wb(struct bdi_writeback *wb,
 		if (ret)
 			break;
 	}
-	spin_unlock(&inode_lock);
+	spin_unlock(&inode_wb_list_lock);
 	/* Leave any unwritten inodes on b_io */
 }
 
@@ -578,11 +590,11 @@ static void __writeback_inodes_sb(struct super_block *sb,
 {
 	WARN_ON(!rwsem_is_locked(&sb->s_umount));
 
-	spin_lock(&inode_lock);
+	spin_lock(&inode_wb_list_lock);
 	if (!wbc->for_kupdate || list_empty(&wb->b_io))
 		queue_io(wb, wbc->older_than_this);
 	writeback_sb_inodes(sb, wb, wbc, true);
-	spin_unlock(&inode_lock);
+	spin_unlock(&inode_wb_list_lock);
 }
 
 /*
@@ -692,7 +704,7 @@ static long wb_writeback(struct bdi_writeback *wb,
 		 * become available for writeback. Otherwise
 		 * we'll just busyloop.
 		 */
-		spin_lock(&inode_lock);
+		spin_lock(&inode_wb_list_lock);
 		if (!list_empty(&wb->b_more_io))  {
 			inode = wb_inode(wb->b_more_io.prev);
 			trace_wbc_writeback_wait(&wbc, wb->bdi);
@@ -700,7 +712,7 @@ static long wb_writeback(struct bdi_writeback *wb,
 			inode_wait_for_writeback(inode);
 			spin_unlock(&inode->i_lock);
 		}
-		spin_unlock(&inode_lock);
+		spin_unlock(&inode_wb_list_lock);
 	}
 
 	return wrote;
@@ -940,6 +952,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
 	struct super_block *sb = inode->i_sb;
 	struct backing_dev_info *bdi = NULL;
 	bool wakeup_bdi = false;
+	int was_dirty;
 
 	/*
 	 * Don't do this for I_DIRTY_PAGES - that doesn't actually
@@ -963,63 +976,62 @@ void __mark_inode_dirty(struct inode *inode, int flags)
 	if (unlikely(block_dump))
 		block_dump___mark_inode_dirty(inode);
 
-	spin_lock(&inode_lock);
 	spin_lock(&inode->i_lock);
-	if ((inode->i_state & flags) != flags) {
-		const int was_dirty = inode->i_state & I_DIRTY;
+	if ((inode->i_state & flags) == flags)
+		goto out_unlock_inode;
 
-		inode->i_state |= flags;
+	was_dirty = inode->i_state & I_DIRTY;
+	inode->i_state |= flags;
 
-		/*
-		 * If the inode is being synced, just update its dirty state.
-		 * The unlocker will place the inode on the appropriate
-		 * superblock list, based upon its state.
-		 */
-		if (inode->i_state & I_SYNC)
-			goto out_unlock_inode;
+	/*
+	 * If the inode is being synced, just update its dirty state.
+	 * The unlocker will place the inode on the appropriate
+	 * superblock list, based upon its state.
+	 */
+	if (inode->i_state & I_SYNC)
+		goto out_unlock_inode;
 
-		/*
-		 * Only add valid (hashed) inodes to the superblock's
-		 * dirty list.  Add blockdev inodes as well.
-		 */
-		if (!S_ISBLK(inode->i_mode)) {
-			if (inode_unhashed(inode))
-				goto out_unlock_inode;
-		}
-		if (inode->i_state & I_FREEING)
+	/*
+	 * Only add valid (hashed) inodes to the superblock's
+	 * dirty list.  Add blockdev inodes as well.
+	 */
+	if (!S_ISBLK(inode->i_mode)) {
+		if (inode_unhashed(inode))
 			goto out_unlock_inode;
+	}
+	if (inode->i_state & I_FREEING)
+		goto out_unlock_inode;
 
+	/*
+	 * If the inode was already on b_dirty/b_io/b_more_io, don't
+	 * reposition it (that would break b_dirty time-ordering).
+	 */
+	if (was_dirty) {
+out_unlock_inode:
 		spin_unlock(&inode->i_lock);
-		/*
-		 * If the inode was already on b_dirty/b_io/b_more_io, don't
-		 * reposition it (that would break b_dirty time-ordering).
-		 */
-		if (!was_dirty) {
-			bdi = inode_to_bdi(inode);
+		return;
+	}
 
-			if (bdi_cap_writeback_dirty(bdi)) {
-				WARN(!test_bit(BDI_registered, &bdi->state),
-				     "bdi-%s not registered\n", bdi->name);
+	spin_unlock(&inode->i_lock);
+	bdi = inode_to_bdi(inode);
 
-				/*
-				 * If this is the first dirty inode for this
-				 * bdi, we have to wake-up the corresponding
-				 * bdi thread to make sure background
-				 * write-back happens later.
-				 */
-				if (!wb_has_dirty_io(&bdi->wb))
-					wakeup_bdi = true;
-			}
+	if (bdi_cap_writeback_dirty(bdi)) {
+		WARN(!test_bit(BDI_registered, &bdi->state),
+		     "bdi-%s not registered\n", bdi->name);
 
-			inode->dirtied_when = jiffies;
-			list_move(&inode->i_wb_list, &bdi->wb.b_dirty);
-		}
-		goto out;
+		/*
+		 * If this is the first dirty inode for this bdi, we have to
+		 * wake-up the corresponding bdi thread to make sure background
+		 * write-back happens later.
+		 */
+		if (!wb_has_dirty_io(&bdi->wb))
+			wakeup_bdi = true;
 	}
-out_unlock_inode:
-	spin_unlock(&inode->i_lock);
-out:
-	spin_unlock(&inode_lock);
+
+	spin_lock(&inode_wb_list_lock);
+	inode->dirtied_when = jiffies;
+	list_move(&inode->i_wb_list, &bdi->wb.b_dirty);
+	spin_unlock(&inode_wb_list_lock);
 
 	if (wakeup_bdi)
 		bdi_wakeup_thread_delayed(bdi);
@@ -1195,9 +1207,9 @@ int write_inode_now(struct inode *inode, int sync)
 		wbc.nr_to_write = 0;
 
 	might_sleep();
-	spin_lock(&inode_lock);
+	spin_lock(&inode_wb_list_lock);
 	ret = writeback_single_inode(inode, &wbc);
-	spin_unlock(&inode_lock);
+	spin_unlock(&inode_wb_list_lock);
 	if (sync)
 		inode_sync_wait(inode);
 	return ret;
@@ -1219,9 +1231,9 @@ int sync_inode(struct inode *inode, struct writeback_control *wbc)
 {
 	int ret;
 
-	spin_lock(&inode_lock);
+	spin_lock(&inode_wb_list_lock);
 	ret = writeback_single_inode(inode, wbc);
-	spin_unlock(&inode_lock);
+	spin_unlock(&inode_wb_list_lock);
 	return ret;
 }
 EXPORT_SYMBOL(sync_inode);
diff --git a/fs/inode.c b/fs/inode.c
index 44f28dd..130aa74 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -24,6 +24,7 @@
 #include <linux/mount.h>
 #include <linux/async.h>
 #include <linux/posix_acl.h>
+#include "internal.h"
 
 /*
  * inode locking rules.
@@ -34,6 +35,8 @@
  *   inode_lru, inode->i_lru
  * inode_sb_list_lock protects:
  *   sb->s_inodes, inode->i_sb_list
+ * inode_sb_list_lock protects:
+ *   bdi->wb.b_{dirty,io,more_io}, inode->i_wb_list
  *
  * Lock ordering:
  * inode_lock
@@ -42,6 +45,9 @@
  * inode_sb_list_lock
  *   inode->i_lock
  *     inode_lru_lock
+ *
+ * inode_wb_list_lock
+ *   inode->i_lock
  */
 
 /*
@@ -103,6 +109,7 @@ static struct hlist_head *inode_hashtable __read_mostly;
 DEFINE_SPINLOCK(inode_lock);
 
 __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_sb_list_lock);
+__cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_wb_list_lock);
 
 /*
  * iprune_sem provides exclusion between the kswapd or try_to_free_pages
@@ -462,10 +469,7 @@ static void evict(struct inode *inode)
 
 	BUG_ON(!(inode->i_state & I_FREEING));
 
-	spin_lock(&inode_lock);
-	list_del_init(&inode->i_wb_list);
-	spin_unlock(&inode_lock);
-
+	inode_wb_list_del(inode);
 	inode_sb_list_del(inode);
 
 	if (op->evict_inode) {
diff --git a/fs/internal.h b/fs/internal.h
index 493baa1..fd3f700 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -106,7 +106,12 @@ extern void release_open_intent(struct nameidata *);
  * inode.c
  */
 extern int get_nr_dirty_inodes(void);
-extern int evict_inodes(struct super_block *);
+extern void evict_inodes(struct super_block *);
 extern int invalidate_inodes(struct super_block *);
 
 extern spinlock_t inode_sb_list_lock;
+
+/*
+ * fs-writeback.c
+ */
+extern void inode_wb_list_del(struct inode *inode);
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 242b6f8..e78a240 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -10,6 +10,7 @@
 struct backing_dev_info;
 
 extern spinlock_t inode_lock;
+extern spinlock_t inode_wb_list_lock;
 
 /*
  * fs/fs-writeback.c
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 15d5097..168ba5e 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -73,14 +73,14 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
 	struct inode *inode;
 
 	nr_wb = nr_dirty = nr_io = nr_more_io = 0;
-	spin_lock(&inode_lock);
+	spin_lock(&inode_wb_list_lock);
 	list_for_each_entry(inode, &wb->b_dirty, i_wb_list)
 		nr_dirty++;
 	list_for_each_entry(inode, &wb->b_io, i_wb_list)
 		nr_io++;
 	list_for_each_entry(inode, &wb->b_more_io, i_wb_list)
 		nr_more_io++;
-	spin_unlock(&inode_lock);
+	spin_unlock(&inode_wb_list_lock);
 
 	global_dirty_limits(&background_thresh, &dirty_thresh);
 	bdi_thresh = bdi_dirty_limit(bdi, dirty_thresh);
@@ -682,11 +682,11 @@ void bdi_destroy(struct backing_dev_info *bdi)
 	if (bdi_has_dirty_io(bdi)) {
 		struct bdi_writeback *dst = &default_backing_dev_info.wb;
 
-		spin_lock(&inode_lock);
+		spin_lock(&inode_wb_list_lock);
 		list_splice(&bdi->wb.b_dirty, &dst->b_dirty);
 		list_splice(&bdi->wb.b_io, &dst->b_io);
 		list_splice(&bdi->wb.b_more_io, &dst->b_more_io);
-		spin_unlock(&inode_lock);
+		spin_unlock(&inode_wb_list_lock);
 	}
 
 	bdi_unregister(bdi);
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 3/3] fs: move i_hash out from under inode_lock
  2010-10-28 11:42 [PATCH 0/3] fs: peel back the inode_lock some more Dave Chinner
  2010-10-28 11:42 ` [PATCH 1/3] fs: move i_sb_list out from under inode_lock Dave Chinner
  2010-10-28 11:42 ` [PATCH 2/3] fs: move i_wb_list " Dave Chinner
@ 2010-10-28 11:42 ` Dave Chinner
  2010-10-28 14:24   ` Christoph Hellwig
  2010-10-28 12:00 ` [PATCH 0/3] fs: peel back the inode_lock some more Christoph Hellwig
  3 siblings, 1 reply; 9+ messages in thread
From: Dave Chinner @ 2010-10-28 11:42 UTC (permalink / raw)
  To: viro; +Cc: linux-fsdevel, linux-kernel

From: Dave Chinner <dchinner@redhat.com>

Protect the inode hash lists with a new global lock
inode_hash_lock and use it to protect the list manipulations and
traversals. This lock replaces the inode_lock as the inodes on the
list can be validity checked while holding the inode->i_lock and
hence the inode_lock is no longer needed to protect the lists.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
 fs/inode.c |   69 ++++++++++++++++++++++++++++++++++-------------------------
 1 files changed, 40 insertions(+), 29 deletions(-)

diff --git a/fs/inode.c b/fs/inode.c
index 130aa74..738ba4e 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -37,6 +37,8 @@
  *   sb->s_inodes, inode->i_sb_list
  * inode_sb_list_lock protects:
  *   bdi->wb.b_{dirty,io,more_io}, inode->i_wb_list
+ * inode_hash_lock protects:
+ *   inode_hashtable, inode->i_hash
  *
  * Lock ordering:
  * inode_lock
@@ -48,6 +50,12 @@
  *
  * inode_wb_list_lock
  *   inode->i_lock
+ *
+ * inode_hash_lock
+ *   inode->i_lock
+ *
+ * iunique_lock
+ *   inode_hash_lock
  */
 
 /*
@@ -83,6 +91,8 @@
 
 static unsigned int i_hash_mask __read_mostly;
 static unsigned int i_hash_shift __read_mostly;
+static struct hlist_head *inode_hashtable __read_mostly;
+static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock);
 
 /*
  * Each inode can be on two separate lists. One is
@@ -98,7 +108,6 @@ static unsigned int i_hash_shift __read_mostly;
 
 static LIST_HEAD(inode_lru);
 static DEFINE_SPINLOCK(inode_lru_lock);
-static struct hlist_head *inode_hashtable __read_mostly;
 
 /*
  * A simple spinlock to protect the list manipulations.
@@ -420,9 +429,9 @@ void __insert_inode_hash(struct inode *inode, unsigned long hashval)
 {
 	struct hlist_head *b = inode_hashtable + hash(inode->i_sb, hashval);
 
-	spin_lock(&inode_lock);
+	spin_lock(&inode_hash_lock);
 	hlist_add_head(&inode->i_hash, b);
-	spin_unlock(&inode_lock);
+	spin_unlock(&inode_hash_lock);
 }
 EXPORT_SYMBOL(__insert_inode_hash);
 
@@ -434,9 +443,9 @@ EXPORT_SYMBOL(__insert_inode_hash);
  */
 void remove_inode_hash(struct inode *inode)
 {
-	spin_lock(&inode_lock);
+	spin_lock(&inode_hash_lock);
 	hlist_del_init(&inode->i_hash);
-	spin_unlock(&inode_lock);
+	spin_unlock(&inode_hash_lock);
 }
 EXPORT_SYMBOL(remove_inode_hash);
 
@@ -914,7 +923,7 @@ static struct inode *get_new_inode(struct super_block *sb,
 	if (inode) {
 		struct inode *old;
 
-		spin_lock(&inode_lock);
+		spin_lock(&inode_hash_lock);
 		/* We released the lock, so.. */
 		old = find_inode(sb, head, test, data);
 		if (!old) {
@@ -925,8 +934,8 @@ static struct inode *get_new_inode(struct super_block *sb,
 			inode->i_state = I_NEW;
 			spin_unlock(&inode->i_lock);
 			hlist_add_head(&inode->i_hash, head);
+			spin_unlock(&inode_hash_lock);
 			inode_sb_list_add(inode);
-			spin_unlock(&inode_lock);
 
 			/* Return the locked inode with I_NEW set, the
 			 * caller is responsible for filling in the contents
@@ -939,7 +948,7 @@ static struct inode *get_new_inode(struct super_block *sb,
 		 * us. Use the old inode instead of the one we just
 		 * allocated.
 		 */
-		spin_unlock(&inode_lock);
+		spin_unlock(&inode_hash_lock);
 		destroy_inode(inode);
 		inode = old;
 		wait_on_inode(inode);
@@ -947,7 +956,7 @@ static struct inode *get_new_inode(struct super_block *sb,
 	return inode;
 
 set_failed:
-	spin_unlock(&inode_lock);
+	spin_unlock(&inode_hash_lock);
 	destroy_inode(inode);
 	return NULL;
 }
@@ -965,7 +974,7 @@ static struct inode *get_new_inode_fast(struct super_block *sb,
 	if (inode) {
 		struct inode *old;
 
-		spin_lock(&inode_lock);
+		spin_lock(&inode_hash_lock);
 		/* We released the lock, so.. */
 		old = find_inode_fast(sb, head, ino);
 		if (!old) {
@@ -974,8 +983,8 @@ static struct inode *get_new_inode_fast(struct super_block *sb,
 			inode->i_state = I_NEW;
 			spin_unlock(&inode->i_lock);
 			hlist_add_head(&inode->i_hash, head);
+			spin_unlock(&inode_hash_lock);
 			inode_sb_list_add(inode);
-			spin_unlock(&inode_lock);
 
 			/* Return the locked inode with I_NEW set, the
 			 * caller is responsible for filling in the contents
@@ -988,7 +997,7 @@ static struct inode *get_new_inode_fast(struct super_block *sb,
 		 * us. Use the old inode instead of the one we just
 		 * allocated.
 		 */
-		spin_unlock(&inode_lock);
+		spin_unlock(&inode_hash_lock);
 		destroy_inode(inode);
 		inode = old;
 		wait_on_inode(inode);
@@ -1009,10 +1018,14 @@ static int test_inode_iunique(struct super_block *sb, unsigned long ino)
 	struct hlist_node *node;
 	struct inode *inode;
 
+	spin_lock(&inode_hash_lock);
 	hlist_for_each_entry(inode, node, b, i_hash) {
-		if (inode->i_ino == ino && inode->i_sb == sb)
+		if (inode->i_ino == ino && inode->i_sb == sb) {
+			spin_unlock(&inode_hash_lock);
 			return 0;
+		}
 	}
+	spin_unlock(&inode_hash_lock);
 
 	return 1;
 }
@@ -1042,7 +1055,6 @@ ino_t iunique(struct super_block *sb, ino_t max_reserved)
 	static unsigned int counter;
 	ino_t res;
 
-	spin_lock(&inode_lock);
 	spin_lock(&iunique_lock);
 	do {
 		if (counter <= max_reserved)
@@ -1050,7 +1062,6 @@ ino_t iunique(struct super_block *sb, ino_t max_reserved)
 		res = counter++;
 	} while (!test_inode_iunique(sb, res));
 	spin_unlock(&iunique_lock);
-	spin_unlock(&inode_lock);
 
 	return res;
 }
@@ -1102,15 +1113,15 @@ static struct inode *ifind(struct super_block *sb,
 {
 	struct inode *inode;
 
-	spin_lock(&inode_lock);
+	spin_lock(&inode_hash_lock);
 	inode = find_inode(sb, head, test, data);
 	if (inode) {
-		spin_unlock(&inode_lock);
+		spin_unlock(&inode_hash_lock);
 		if (likely(wait))
 			wait_on_inode(inode);
 		return inode;
 	}
-	spin_unlock(&inode_lock);
+	spin_unlock(&inode_hash_lock);
 	return NULL;
 }
 
@@ -1134,14 +1145,14 @@ static struct inode *ifind_fast(struct super_block *sb,
 {
 	struct inode *inode;
 
-	spin_lock(&inode_lock);
+	spin_lock(&inode_hash_lock);
 	inode = find_inode_fast(sb, head, ino);
 	if (inode) {
-		spin_unlock(&inode_lock);
+		spin_unlock(&inode_hash_lock);
 		wait_on_inode(inode);
 		return inode;
 	}
-	spin_unlock(&inode_lock);
+	spin_unlock(&inode_hash_lock);
 	return NULL;
 }
 
@@ -1306,7 +1317,7 @@ int insert_inode_locked(struct inode *inode)
 	while (1) {
 		struct hlist_node *node;
 		struct inode *old = NULL;
-		spin_lock(&inode_lock);
+		spin_lock(&inode_hash_lock);
 		hlist_for_each_entry(old, node, head, i_hash) {
 			if (old->i_ino != ino)
 				continue;
@@ -1321,12 +1332,12 @@ int insert_inode_locked(struct inode *inode)
 		}
 		if (likely(!node)) {
 			hlist_add_head(&inode->i_hash, head);
-			spin_unlock(&inode_lock);
+			spin_unlock(&inode_hash_lock);
 			return 0;
 		}
 		__iget(old);
 		spin_unlock(&old->i_lock);
-		spin_unlock(&inode_lock);
+		spin_unlock(&inode_hash_lock);
 		wait_on_inode(old);
 		if (unlikely(!inode_unhashed(old))) {
 			iput(old);
@@ -1351,7 +1362,7 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval,
 		struct hlist_node *node;
 		struct inode *old = NULL;
 
-		spin_lock(&inode_lock);
+		spin_lock(&inode_hash_lock);
 		hlist_for_each_entry(old, node, head, i_hash) {
 			if (old->i_sb != sb)
 				continue;
@@ -1366,12 +1377,12 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval,
 		}
 		if (likely(!node)) {
 			hlist_add_head(&inode->i_hash, head);
-			spin_unlock(&inode_lock);
+			spin_unlock(&inode_hash_lock);
 			return 0;
 		}
 		__iget(old);
 		spin_unlock(&old->i_lock);
-		spin_unlock(&inode_lock);
+		spin_unlock(&inode_hash_lock);
 		wait_on_inode(old);
 		if (unlikely(!inode_unhashed(old))) {
 			iput(old);
@@ -1651,10 +1662,10 @@ static void __wait_on_freeing_inode(struct inode *inode)
 	wq = bit_waitqueue(&inode->i_state, __I_NEW);
 	prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
 	spin_unlock(&inode->i_lock);
-	spin_unlock(&inode_lock);
+	spin_unlock(&inode_hash_lock);
 	schedule();
 	finish_wait(wq, &wait.wait);
-	spin_lock(&inode_lock);
+	spin_lock(&inode_hash_lock);
 }
 
 static __initdata unsigned long ihash_entries;
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH 0/3] fs: peel back the inode_lock some more
  2010-10-28 11:42 [PATCH 0/3] fs: peel back the inode_lock some more Dave Chinner
                   ` (2 preceding siblings ...)
  2010-10-28 11:42 ` [PATCH 3/3] fs: move i_hash " Dave Chinner
@ 2010-10-28 12:00 ` Christoph Hellwig
  3 siblings, 0 replies; 9+ messages in thread
From: Christoph Hellwig @ 2010-10-28 12:00 UTC (permalink / raw)
  To: Dave Chinner; +Cc: viro, linux-fsdevel, linux-kernel

Shouldn't inode_lock be completely gone after these?

also I hope moving to global locks really is just a step inbetween.
Especially for writeback I doubt moving from inode_lock to another
global lock makes a whole lot of difference - nevermind that it's
not too smart to lock per-object lists with a global lock.


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/3] fs: move i_sb_list out from under inode_lock
  2010-10-28 11:42 ` [PATCH 1/3] fs: move i_sb_list out from under inode_lock Dave Chinner
@ 2010-10-28 14:11   ` Christoph Hellwig
  0 siblings, 0 replies; 9+ messages in thread
From: Christoph Hellwig @ 2010-10-28 14:11 UTC (permalink / raw)
  To: Dave Chinner; +Cc: viro, linux-fsdevel, linux-kernel



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/3] fs: move i_wb_list out from under inode_lock
  2010-10-28 11:42 ` [PATCH 2/3] fs: move i_wb_list " Dave Chinner
@ 2010-10-28 14:19   ` Christoph Hellwig
  2010-10-28 21:47     ` Dave Chinner
  0 siblings, 1 reply; 9+ messages in thread
From: Christoph Hellwig @ 2010-10-28 14:19 UTC (permalink / raw)
  To: Dave Chinner; +Cc: viro, linux-fsdevel, linux-kernel

> + * Write out an inode's dirty pages.  Called under inode_wb_list_lock.  Either
> + * the caller has ref on the inode (either via __iget or via syscall against an
> + * fd) or the inode has I_WILL_FREE set.

Just drop mentioning of how we got the reference ,it's rather pointless.

>  writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
> @@ -354,7 +368,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
>  	inode->i_state |= I_SYNC;
>  	inode->i_state &= ~I_DIRTY_PAGES;
>  	spin_unlock(&inode->i_lock);
> -	spin_unlock(&inode_lock);
> +	spin_unlock(&inode_wb_list_lock);

We don't actually need inode_wb_list_lock here.  But I guess we can
fix this later and be conservative for now.

> @@ -963,63 +976,62 @@ void __mark_inode_dirty(struct inode *inode, int flags)

I think the __mark_inode_dirty cleanup should be a separate patch,
it's rather confusing in the current form.

> +	if (was_dirty) {
> +out_unlock_inode:
>  		spin_unlock(&inode->i_lock);
> +		return;
> +	}

Please just move the label to the end of the function and add another
goto here.


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 3/3] fs: move i_hash out from under inode_lock
  2010-10-28 11:42 ` [PATCH 3/3] fs: move i_hash " Dave Chinner
@ 2010-10-28 14:24   ` Christoph Hellwig
  0 siblings, 0 replies; 9+ messages in thread
From: Christoph Hellwig @ 2010-10-28 14:24 UTC (permalink / raw)
  To: Dave Chinner; +Cc: viro, linux-fsdevel, linux-kernel

> @@ -925,8 +934,8 @@ static struct inode *get_new_inode(struct super_block *sb,
>  			inode->i_state = I_NEW;
>  			spin_unlock(&inode->i_lock);
>  			hlist_add_head(&inode->i_hash, head);
> +			spin_unlock(&inode_hash_lock);
>  			inode_sb_list_add(inode);
> -			spin_unlock(&inode_lock);

Al said he wanted to have the sb lock nest inside the hash lock for now
I think.  Doubt it matters much, but it keeps the behaviour that we
can't look up an inode which is not added to the per-sb list yet.

After that a better patch description might be:

	"rename inode_lock to inode_hash_lock"

as the inode_lock coverage after the previous patches should be 100%
identical to the new hash lock.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/3] fs: move i_wb_list out from under inode_lock
  2010-10-28 14:19   ` Christoph Hellwig
@ 2010-10-28 21:47     ` Dave Chinner
  0 siblings, 0 replies; 9+ messages in thread
From: Dave Chinner @ 2010-10-28 21:47 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: viro, linux-fsdevel, linux-kernel

On Thu, Oct 28, 2010 at 10:19:49AM -0400, Christoph Hellwig wrote:
> > + * Write out an inode's dirty pages.  Called under inode_wb_list_lock.  Either
> > + * the caller has ref on the inode (either via __iget or via syscall against an
> > + * fd) or the inode has I_WILL_FREE set.
> 
> Just drop mentioning of how we got the reference ,it's rather pointless.

OK.

> >  writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
> > @@ -354,7 +368,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
> >  	inode->i_state |= I_SYNC;
> >  	inode->i_state &= ~I_DIRTY_PAGES;
> >  	spin_unlock(&inode->i_lock);
> > -	spin_unlock(&inode_lock);
> > +	spin_unlock(&inode_wb_list_lock);
> 
> We don't actually need inode_wb_list_lock here.  But I guess we can
> fix this later and be conservative for now.

Hmmm - I think you are right. However, there are lots of
opportunities for cleaning up the locking in this areaş so leaving
it for later is probably best.

> > @@ -963,63 +976,62 @@ void __mark_inode_dirty(struct inode *inode, int flags)
> 
> I think the __mark_inode_dirty cleanup should be a separate patch,
> it's rather confusing in the current form.

Ok. I'll leave it out for now - there's various other cleanups
needed here now as well (e.g. the unlocked flags check is not needed
to avoid a global lock anymore) so I'll leave that for later, too.

> > +	if (was_dirty) {
> > +out_unlock_inode:
> >  		spin_unlock(&inode->i_lock);
> > +		return;
> > +	}
> 
> Please just move the label to the end of the function and add another
> goto here.

Will do.

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2010-10-28 21:47 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-10-28 11:42 [PATCH 0/3] fs: peel back the inode_lock some more Dave Chinner
2010-10-28 11:42 ` [PATCH 1/3] fs: move i_sb_list out from under inode_lock Dave Chinner
2010-10-28 14:11   ` Christoph Hellwig
2010-10-28 11:42 ` [PATCH 2/3] fs: move i_wb_list " Dave Chinner
2010-10-28 14:19   ` Christoph Hellwig
2010-10-28 21:47     ` Dave Chinner
2010-10-28 11:42 ` [PATCH 3/3] fs: move i_hash " Dave Chinner
2010-10-28 14:24   ` Christoph Hellwig
2010-10-28 12:00 ` [PATCH 0/3] fs: peel back the inode_lock some more Christoph Hellwig

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.