linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Jens Axboe <jens.axboe@oracle.com>
To: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org
Cc: chris.mason@oracle.com, david@fromorbit.com, hch@infradead.org,
	akpm@linux-foundation.org, jack@suse.cz,
	Jens Axboe <jens.axboe@oracle.com>
Subject: [PATCH 13/13] writeback: ensure consistency for generic_sync_sb_inodes() with WB_SYNC_ALL
Date: Wed,  8 Apr 2009 14:00:16 +0200	[thread overview]
Message-ID: <1239192016-19857-14-git-send-email-jens.axboe@oracle.com> (raw)
In-Reply-To: <1239192016-19857-1-git-send-email-jens.axboe@oracle.com>

If WB_SYNC_ALL is given, we must block waiting for any bdi/wb to become
available and flush our data. Switch the bdi_list protection to SRCU
instead of RCU so that we can do that.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/fs-writeback.c           |   49 +++++++++++++++++++++++++++++--------------
 include/linux/backing-dev.h |   12 ++++++++-
 mm/backing-dev.c            |   23 ++++++++++++--------
 mm/page-writeback.c         |    4 +-
 4 files changed, 59 insertions(+), 29 deletions(-)

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 1d25d3a..0492399 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -50,11 +50,18 @@ static void generic_sync_wb_inodes(struct bdi_writeback *wb,
  * unless they implement their own.  Which is somewhat inefficient, as this
  * may prevent concurrent writeback against multiple devices.
  */
-static int writeback_acquire(struct bdi_writeback *wb)
+static bool writeback_acquire(struct bdi_writeback *wb, int wait)
 {
 	struct backing_dev_info *bdi = wb->bdi;
 
-	return !test_and_set_bit(wb->nr, &bdi->wb_active);
+	if (!test_and_set_bit(wb->nr, &bdi->wb_active))
+		return 1;
+	if (!wait)
+		return 0;
+
+	wait_on_bit_lock(&bdi->wb_active, wb->nr, bdi_sched_wait,
+				TASK_UNINTERRUPTIBLE);
+	return 1;
 }
 
 /**
@@ -82,12 +89,15 @@ static void writeback_release(struct bdi_writeback *wb)
 }
 
 static void wb_start_writeback(struct bdi_writeback *wb, struct super_block *sb,
-			       long nr_pages)
+			       long nr_pages, int wait)
 {
 	if (!wb_has_dirty_io(wb))
 		return;
 
-	if (writeback_acquire(wb)) {
+	/*
+	 * Wait is set, block waiting for the device to become available
+	 */
+	if (writeback_acquire(wb, wait)) {
 		wb->nr_pages = nr_pages;
 		wb->sb = sb;
 
@@ -100,7 +110,7 @@ static void wb_start_writeback(struct bdi_writeback *wb, struct super_block *sb,
 }
 
 int bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb,
-			 long nr_pages)
+			 long nr_pages, int wait)
 {
 	struct bdi_writeback *wb;
 
@@ -114,14 +124,14 @@ int bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb,
 	}
 
 	if (!bdi_wblist_needs_lock(bdi))
-		wb_start_writeback(&bdi->wb, sb, nr_pages);
+		wb_start_writeback(&bdi->wb, sb, nr_pages, wait);
 	else {
 		int idx;
 
 		idx = srcu_read_lock(&bdi->srcu);
 
 		list_for_each_entry_rcu(wb, &bdi->wb_list, list)
-			wb_start_writeback(wb, sb, nr_pages);
+			wb_start_writeback(wb, sb, nr_pages, wait);
 
 		srcu_read_unlock(&bdi->srcu, idx);
 	}
@@ -244,7 +254,7 @@ long wb_do_writeback(struct bdi_writeback *wb)
 	 *  pdflush style writeout.
 	 *
 	 */
-	if (writeback_acquire(wb))
+	if (writeback_acquire(wb, 0))
 		nr_pages = wb_kupdated(wb);
 	else
 		nr_pages = wb_writeback(wb);
@@ -295,21 +305,21 @@ int bdi_writeback_task(struct bdi_writeback *wb)
 	return 0;
 }
 
-void bdi_writeback_all(struct super_block *sb, long nr_pages)
+void bdi_writeback_all(struct super_block *sb, long nr_pages, int wait)
 {
 	struct backing_dev_info *bdi;
+	int idx;
 
-	rcu_read_lock();
-
+	idx = srcu_read_lock(&bdi_srcu);
 restart:
 	list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
 		if (!bdi_has_dirty_io(bdi))
 			continue;
-		if (bdi_start_writeback(bdi, sb, nr_pages))
+		if (bdi_start_writeback(bdi, sb, nr_pages, wait))
 			goto restart;
 	}
 
-	rcu_read_unlock();
+	srcu_read_unlock(&bdi_srcu, idx);
 }
 
 /*
@@ -828,12 +838,19 @@ void generic_sync_bdi_inodes(struct super_block *sb,
 void generic_sync_sb_inodes(struct super_block *sb,
 				struct writeback_control *wbc)
 {
+	const int sync_all = wbc->sync_mode == WB_SYNC_ALL;
+
+	/*
+	 * Kick off the specified bdi, if given, or all of them. If sync_all
+	 * is true, then this is a blocking operation and we must make sure
+	 * to wait for any device that is currently doing a writeback operation.
+	 */
 	if (wbc->bdi)
-		bdi_start_writeback(wbc->bdi, sb, 0);
+		bdi_start_writeback(wbc->bdi, sb, 0, sync_all);
 	else
-		bdi_writeback_all(sb, 0);
+		bdi_writeback_all(sb, 0, sync_all);
 
-	if (wbc->sync_mode == WB_SYNC_ALL) {
+	if (sync_all) {
 		struct inode *inode, *old_inode = NULL;
 
 		spin_lock(&inode_lock);
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index c7c1ed6..8ab2429 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -14,6 +14,7 @@
 #include <linux/kernel.h>
 #include <linux/fs.h>
 #include <linux/srcu.h>
+#include <linux/sched.h>
 #include <asm/atomic.h>
 
 struct page;
@@ -105,15 +106,22 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
 int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev);
 void bdi_unregister(struct backing_dev_info *bdi);
 int bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb,
-			 long nr_pages);
+			 long nr_pages, int wait);
 int bdi_writeback_task(struct bdi_writeback *wb);
-void bdi_writeback_all(struct super_block *sb, long nr_pages);
+void bdi_writeback_all(struct super_block *sb, long nr_pages, int wait);
 void bdi_add_default_flusher_task(struct backing_dev_info *bdi);
 void bdi_add_flusher_task(struct backing_dev_info *bdi);
 int bdi_has_dirty_io(struct backing_dev_info *bdi);
 
 extern spinlock_t bdi_lock;
 extern struct list_head bdi_list;
+extern struct srcu_struct bdi_srcu;
+
+static inline int bdi_sched_wait(void *word)
+{
+	schedule();
+	return 0;
+}
 
 static inline int wb_is_default_task(struct bdi_writeback *wb)
 {
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 9d6ac11..8ee7b55 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -29,6 +29,7 @@ static struct class *bdi_class;
 DEFINE_SPINLOCK(bdi_lock);
 LIST_HEAD(bdi_list);
 LIST_HEAD(bdi_pending_list);
+struct srcu_struct bdi_srcu;
 
 #ifdef CONFIG_DEBUG_FS
 #include <linux/debugfs.h>
@@ -220,10 +221,19 @@ static int __init default_bdi_init(void)
 {
 	int err;
 
+	err = init_srcu_struct(&bdi_srcu);
+	if (err)
+		return err;
+
 	err = bdi_init(&default_backing_dev_info);
 	if (!err)
 		bdi_register(&default_backing_dev_info, NULL, "default");
 
+	if (err) {
+		bdi_destroy(&default_backing_dev_info);
+		cleanup_srcu_struct(&bdi_srcu);
+	}
+
 	return err;
 }
 subsys_initcall(default_bdi_init);
@@ -473,12 +483,6 @@ static void bdi_add_to_pending(struct rcu_head *head)
 	wake_up(&default_backing_dev_info.wb.wait);
 }
 
-static int sched_wait(void *word)
-{
-	schedule();
-	return 0;
-}
-
 static void bdi_add_one_flusher_task(struct backing_dev_info *bdi,
 				     int(*func)(struct backing_dev_info *))
 {
@@ -513,7 +517,7 @@ static void bdi_add_one_flusher_task(struct backing_dev_info *bdi,
 
 static int flusher_add_helper_block(struct backing_dev_info *bdi)
 {
-	wait_on_bit_lock(&bdi->state, BDI_pending, sched_wait,
+	wait_on_bit_lock(&bdi->state, BDI_pending, bdi_sched_wait,
 				TASK_UNINTERRUPTIBLE);
 	return 0;
 }
@@ -620,7 +624,8 @@ static void bdi_wb_shutdown(struct backing_dev_info *bdi)
 	 * If setup is pending, wait for that to complete first
 	 * Make sure nobody finds us on the bdi_list anymore
 	 */
-	wait_on_bit(&bdi->state, BDI_pending, sched_wait, TASK_UNINTERRUPTIBLE);
+	wait_on_bit(&bdi->state, BDI_pending, bdi_sched_wait,
+			TASK_UNINTERRUPTIBLE);
 
 	/*
 	 * Make sure nobody finds us on the bdi_list anymore
@@ -633,7 +638,7 @@ static void bdi_wb_shutdown(struct backing_dev_info *bdi)
 	 * Now make sure that anybody who is currently looking at us from
 	 * the bdi_list iteration have exited.
 	 */
-	synchronize_rcu();
+	synchronize_srcu(&bdi_srcu);
 
 	/*
 	 * Finally, kill the kernel threads. We don't need to be RCU
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index e71e3c2..bac4ad6 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -581,7 +581,7 @@ static void balance_dirty_pages(struct address_space *mapping)
 			(!laptop_mode && (global_page_state(NR_FILE_DIRTY)
 					  + global_page_state(NR_UNSTABLE_NFS)
 					  > background_thresh)))
-		bdi_start_writeback(bdi, NULL, 0);
+		bdi_start_writeback(bdi, NULL, 0, 0);
 }
 
 void set_page_dirty_balance(struct page *page, int page_mkwrite)
@@ -675,7 +675,7 @@ int wakeup_flusher_threads(long nr_pages)
 	if (nr_pages == 0)
 		nr_pages = global_page_state(NR_FILE_DIRTY) +
 				global_page_state(NR_UNSTABLE_NFS);
-	bdi_writeback_all(NULL, nr_pages);
+	bdi_writeback_all(NULL, nr_pages, 0);
 	return 0;
 }
 
-- 
1.6.2.2.446.gfbdc0


  parent reply	other threads:[~2009-04-08 12:11 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-04-08 12:00 [PATCH 0/13] Per-bdi writeback flusher threads #3 Jens Axboe
2009-04-08 12:00 ` [PATCH 01/13] buffer: switch do_emergency_thaw() away from pdflush_operation() Jens Axboe
2009-04-08 13:03   ` Christoph Hellwig
2009-04-08 13:08     ` Jens Axboe
2009-04-08 12:00 ` [PATCH 02/13] writeback: move dirty inodes from super_block to backing_dev_info Jens Axboe
2009-04-08 12:00 ` [PATCH 03/13] writeback: switch to per-bdi threads for flushing data Jens Axboe
2009-04-08 12:00 ` [PATCH 04/13] writeback get rid of pdflush completely Jens Axboe
2009-04-08 12:00 ` [PATCH 05/13] writeback: separate the flushing state/task from the bdi Jens Axboe
2009-04-08 12:00 ` [PATCH 06/13] writeback: support > 1 flusher thread per bdi Jens Axboe
2009-04-08 12:00 ` [PATCH 07/13] writeback: include default_backing_dev_info in writeback Jens Axboe
2009-04-08 12:00 ` [PATCH 08/13] writeback: allow sleepy exit of default writeback task Jens Axboe
2009-04-08 12:00 ` [PATCH 09/13] writeback: btrfs must register its backing_devices Jens Axboe
2009-04-08 12:00 ` [PATCH 10/13] writeback: add some debug inode list counters to bdi stats Jens Axboe
2009-04-08 12:00 ` [PATCH 11/13] writeback: add name to backing_dev_info Jens Axboe
2009-04-08 12:00 ` [PATCH 12/13] writeback: check for registered bdi in flusher add and inode dirty Jens Axboe
2009-04-08 12:00 ` Jens Axboe [this message]
2009-04-10  3:46 ` [PATCH 0/13] Per-bdi writeback flusher threads #3 Zhang, Yanmin
2009-04-10  7:21   ` Jens Axboe
2009-04-13  3:18     ` Zhang, Yanmin
2009-04-17 13:07       ` Jens Axboe
2009-04-21 13:25         ` Jan Kara

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1239192016-19857-14-git-send-email-jens.axboe@oracle.com \
    --to=jens.axboe@oracle.com \
    --cc=akpm@linux-foundation.org \
    --cc=chris.mason@oracle.com \
    --cc=david@fromorbit.com \
    --cc=hch@infradead.org \
    --cc=jack@suse.cz \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).