All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jens Axboe <jens.axboe@oracle.com>
To: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org
Cc: chris.mason@oracle.com, hch@infradead.org, tytso@mit.edu,
	akpm@linux-foundation.org, jack@suse.cz,
	trond.myklebust@fys.uio.no, Jens Axboe <jens.axboe@oracle.com>
Subject: [PATCH 4/7] writeback: use RCU to protect bdi_list
Date: Mon, 14 Sep 2009 11:36:31 +0200	[thread overview]
Message-ID: <1252920994-11141-5-git-send-email-jens.axboe@oracle.com> (raw)
In-Reply-To: <1252920994-11141-1-git-send-email-jens.axboe@oracle.com>

Now that bdi_writeback_all() no longer handles integrity writeback,
it doesn't have to block anymore. This means that we can switch
bdi_list reader side protection to RCU.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/fs-writeback.c           |    4 +-
 include/linux/backing-dev.h |    1 +
 mm/backing-dev.c            |   76 +++++++++++++++++++++++++++++++------------
 mm/page-writeback.c         |    8 ++--
 4 files changed, 62 insertions(+), 27 deletions(-)

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 5d4bd1c..d7592c7 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -849,7 +849,7 @@ static void bdi_writeback_all(struct writeback_control *wbc)
 
 	WARN_ON(wbc->sync_mode == WB_SYNC_ALL);
 
-	spin_lock(&bdi_lock);
+	rcu_read_lock();
 
 	list_for_each_entry(bdi, &bdi_list, bdi_list) {
 		if (!bdi_has_dirty_io(bdi))
@@ -858,7 +858,7 @@ static void bdi_writeback_all(struct writeback_control *wbc)
 		bdi_alloc_queue_work(bdi, wbc);
 	}
 
-	spin_unlock(&bdi_lock);
+	rcu_read_unlock();
 }
 
 /*
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index f169bcb..859e797 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -59,6 +59,7 @@ struct bdi_writeback {
 
 struct backing_dev_info {
 	struct list_head bdi_list;
+	struct rcu_head rcu_head;
 	unsigned long ra_pages;	/* max readahead in PAGE_CACHE_SIZE units */
 	unsigned long state;	/* Always use atomic bitops on this */
 	unsigned int capabilities; /* Device capabilities */
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index d3ca0da..fd93566 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -26,6 +26,12 @@ struct backing_dev_info default_backing_dev_info = {
 EXPORT_SYMBOL_GPL(default_backing_dev_info);
 
 static struct class *bdi_class;
+
+/*
+ * bdi_lock protects updates to bdi_list and bdi_pending_list, as well as
+ * reader side protection for bdi_pending_list. bdi_list has RCU reader side
+ * locking.
+ */
 DEFINE_SPINLOCK(bdi_lock);
 LIST_HEAD(bdi_list);
 LIST_HEAD(bdi_pending_list);
@@ -284,9 +290,9 @@ static int bdi_start_fn(void *ptr)
 	/*
 	 * Add us to the active bdi_list
 	 */
-	spin_lock(&bdi_lock);
-	list_add(&bdi->bdi_list, &bdi_list);
-	spin_unlock(&bdi_lock);
+	spin_lock_bh(&bdi_lock);
+	list_add_rcu(&bdi->bdi_list, &bdi_list);
+	spin_unlock_bh(&bdi_lock);
 
 	bdi_task_init(bdi, wb);
 
@@ -389,7 +395,7 @@ static int bdi_forker_task(void *ptr)
 		if (wb_has_dirty_io(me) || !list_empty(&me->bdi->work_list))
 			wb_do_writeback(me, 0);
 
-		spin_lock(&bdi_lock);
+		spin_lock_bh(&bdi_lock);
 
 		/*
 		 * Check if any existing bdi's have dirty data without
@@ -410,7 +416,7 @@ static int bdi_forker_task(void *ptr)
 		if (list_empty(&bdi_pending_list)) {
 			unsigned long wait;
 
-			spin_unlock(&bdi_lock);
+			spin_unlock_bh(&bdi_lock);
 			wait = msecs_to_jiffies(dirty_writeback_interval * 10);
 			schedule_timeout(wait);
 			try_to_freeze();
@@ -426,7 +432,7 @@ static int bdi_forker_task(void *ptr)
 		bdi = list_entry(bdi_pending_list.next, struct backing_dev_info,
 				 bdi_list);
 		list_del_init(&bdi->bdi_list);
-		spin_unlock(&bdi_lock);
+		spin_unlock_bh(&bdi_lock);
 
 		wb = &bdi->wb;
 		wb->task = kthread_run(bdi_start_fn, wb, "flush-%s",
@@ -445,9 +451,9 @@ static int bdi_forker_task(void *ptr)
 			 * a chance to flush other bdi's to free
 			 * memory.
 			 */
-			spin_lock(&bdi_lock);
+			spin_lock_bh(&bdi_lock);
 			list_add_tail(&bdi->bdi_list, &bdi_pending_list);
-			spin_unlock(&bdi_lock);
+			spin_unlock_bh(&bdi_lock);
 
 			bdi_flush_io(bdi);
 		}
@@ -456,6 +462,24 @@ static int bdi_forker_task(void *ptr)
 	return 0;
 }
 
+static void bdi_add_to_pending(struct rcu_head *head)
+{
+	struct backing_dev_info *bdi;
+
+	bdi = container_of(head, struct backing_dev_info, rcu_head);
+	INIT_LIST_HEAD(&bdi->bdi_list);
+
+	spin_lock(&bdi_lock);
+	list_add_tail(&bdi->bdi_list, &bdi_pending_list);
+	spin_unlock(&bdi_lock);
+
+	/*
+	 * We are now on the pending list, wake up bdi_forker_task()
+	 * to finish the job and add us back to the active bdi_list
+	 */
+	wake_up_process(default_backing_dev_info.wb.task);
+}
+
 /*
  * Add the default flusher task that gets created for any bdi
  * that has dirty data pending writeout
@@ -478,16 +502,29 @@ void static bdi_add_default_flusher_task(struct backing_dev_info *bdi)
 	 * waiting for previous additions to finish.
 	 */
 	if (!test_and_set_bit(BDI_pending, &bdi->state)) {
-		list_move_tail(&bdi->bdi_list, &bdi_pending_list);
+		list_del_rcu(&bdi->bdi_list);
 
 		/*
-		 * We are now on the pending list, wake up bdi_forker_task()
-		 * to finish the job and add us back to the active bdi_list
+		 * We must wait for the current RCU period to end before
+		 * moving to the pending list. So schedule that operation
+		 * from an RCU callback.
 		 */
-		wake_up_process(default_backing_dev_info.wb.task);
+		call_rcu(&bdi->rcu_head, bdi_add_to_pending);
 	}
 }
 
+/*
+ * Remove bdi from bdi_list, and ensure that it is no longer visible
+ */
+static void bdi_remove_from_list(struct backing_dev_info *bdi)
+{
+	spin_lock_bh(&bdi_lock);
+	list_del_rcu(&bdi->bdi_list);
+	spin_unlock_bh(&bdi_lock);
+
+	synchronize_rcu();
+}
+
 int bdi_register(struct backing_dev_info *bdi, struct device *parent,
 		const char *fmt, ...)
 {
@@ -506,9 +543,9 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
 		goto exit;
 	}
 
-	spin_lock(&bdi_lock);
-	list_add_tail(&bdi->bdi_list, &bdi_list);
-	spin_unlock(&bdi_lock);
+	spin_lock_bh(&bdi_lock);
+	list_add_tail_rcu(&bdi->bdi_list, &bdi_list);
+	spin_unlock_bh(&bdi_lock);
 
 	bdi->dev = dev;
 
@@ -526,9 +563,7 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
 			wb->task = NULL;
 			ret = -ENOMEM;
 
-			spin_lock(&bdi_lock);
-			list_del(&bdi->bdi_list);
-			spin_unlock(&bdi_lock);
+			bdi_remove_from_list(bdi);
 			goto exit;
 		}
 	}
@@ -565,9 +600,7 @@ static void bdi_wb_shutdown(struct backing_dev_info *bdi)
 	/*
 	 * Make sure nobody finds us on the bdi_list anymore
 	 */
-	spin_lock(&bdi_lock);
-	list_del(&bdi->bdi_list);
-	spin_unlock(&bdi_lock);
+	bdi_remove_from_list(bdi);
 
 	/*
 	 * Finally, kill the kernel threads. We don't need to be RCU
@@ -599,6 +632,7 @@ int bdi_init(struct backing_dev_info *bdi)
 	bdi->max_ratio = 100;
 	bdi->max_prop_frac = PROP_FRAC_BASE;
 	spin_lock_init(&bdi->wb_lock);
+	INIT_RCU_HEAD(&bdi->rcu_head);
 	INIT_LIST_HEAD(&bdi->bdi_list);
 	INIT_LIST_HEAD(&bdi->wb_list);
 	INIT_LIST_HEAD(&bdi->work_list);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 25e7770..a5f0f76 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -315,7 +315,7 @@ int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio)
 {
 	int ret = 0;
 
-	spin_lock(&bdi_lock);
+	spin_lock_bh(&bdi_lock);
 	if (min_ratio > bdi->max_ratio) {
 		ret = -EINVAL;
 	} else {
@@ -327,7 +327,7 @@ int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio)
 			ret = -EINVAL;
 		}
 	}
-	spin_unlock(&bdi_lock);
+	spin_unlock_bh(&bdi_lock);
 
 	return ret;
 }
@@ -339,14 +339,14 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio)
 	if (max_ratio > 100)
 		return -EINVAL;
 
-	spin_lock(&bdi_lock);
+	spin_lock_bh(&bdi_lock);
 	if (bdi->min_ratio > max_ratio) {
 		ret = -EINVAL;
 	} else {
 		bdi->max_ratio = max_ratio;
 		bdi->max_prop_frac = (PROP_FRAC_BASE * max_ratio) / 100;
 	}
-	spin_unlock(&bdi_lock);
+	spin_unlock_bh(&bdi_lock);
 
 	return ret;
 }
-- 
1.6.4.1.207.g68ea


  parent reply	other threads:[~2009-09-14  9:37 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-09-14  9:36 [PATCH 0/7] Post merge per-bdi writeback patches v2 Jens Axboe
2009-09-14  9:36 ` [PATCH 1/7] writeback: merely wakeup flusher thread if work allocation fails for WB_SYNC_NONE Jens Axboe
2009-09-14 12:34   ` Jan Kara
2009-09-14 12:39     ` Jens Axboe
2009-09-14 13:19       ` Christoph Hellwig
2009-09-14  9:36 ` [PATCH 2/7] Assign bdi in super_block Jens Axboe
2009-09-14 13:02   ` Jan Kara
2009-09-14 18:25     ` Trond Myklebust
2009-09-14 18:36       ` Jens Axboe
2009-09-15 10:14         ` Jan Kara
2009-09-15 10:22           ` Jens Axboe
2009-09-15 13:16           ` Trond Myklebust
2009-09-14  9:36 ` [PATCH 3/7] writeback: only use bdi_writeback_all() for WB_SYNC_NONE writeout Jens Axboe
2009-09-14 13:12   ` Jan Kara
2009-09-14  9:36 ` Jens Axboe [this message]
2009-09-14 11:10   ` [PATCH 4/7] writeback: use RCU to protect bdi_list Minchan Kim
2009-09-14 11:11     ` Jens Axboe
2009-09-14  9:36 ` [PATCH 5/7] writeback: inline allocation failure handling in bdi_alloc_queue_work() Jens Axboe
2009-09-14 13:13   ` Jan Kara
2009-09-14  9:36 ` [PATCH 6/7] writeback: separate starting of sync vs opportunistic writeback Jens Axboe
2009-09-14 13:33   ` Jan Kara
2009-09-14 13:42     ` Christoph Hellwig
2009-09-14 19:28       ` Jens Axboe
2009-09-14 19:42         ` Jens Axboe
2009-09-15  9:08           ` Jan Kara
2009-09-15  9:14             ` Jens Axboe
2009-09-15 11:44               ` Jens Axboe
2009-09-15 12:58                 ` Jan Kara
2009-09-15 13:04                   ` Jens Axboe
2009-09-15 13:08                     ` Christoph Hellwig
2009-09-15 13:17                       ` Jens Axboe
2009-09-15 14:01                       ` Jan Kara
2009-09-15 14:09                         ` Chris Mason
2009-09-14  9:36 ` [PATCH 7/7] writeback: splice dirty inode entries to default bdi on bdi_destroy() Jens Axboe
2009-09-14 10:56   ` Jens Axboe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1252920994-11141-5-git-send-email-jens.axboe@oracle.com \
    --to=jens.axboe@oracle.com \
    --cc=akpm@linux-foundation.org \
    --cc=chris.mason@oracle.com \
    --cc=hch@infradead.org \
    --cc=jack@suse.cz \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=trond.myklebust@fys.uio.no \
    --cc=tytso@mit.edu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.