All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jens Axboe <jens.axboe@oracle.com>
To: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org
Cc: chris.mason@oracle.com, david@fromorbit.com, hch@infradead.org,
	akpm@linux-foundation.org, jack@suse.cz,
	yanmin_zhang@linux.intel.com, richard@rsk.demon.co.uk,
	Jens Axboe <jens.axboe@oracle.com>
Subject: [PATCH 06/12] writeback: separate the flushing state/task from the bdi
Date: Tue, 26 May 2009 11:33:44 +0200	[thread overview]
Message-ID: <1243330430-9964-7-git-send-email-jens.axboe@oracle.com> (raw)
In-Reply-To: <1243330430-9964-1-git-send-email-jens.axboe@oracle.com>

Add a struct bdi_writeback for tracking and handling dirty IO. This
is in preparation for adding > 1 flusher task per bdi.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/fs-writeback.c           |  145 ++++++++++++++++++++++++++----------------
 include/linux/backing-dev.h |   40 +++++++-----
 mm/backing-dev.c            |  128 ++++++++++++++++++++++++++++++--------
 3 files changed, 215 insertions(+), 98 deletions(-)

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 7a558a6..e72db8b 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -46,9 +46,11 @@ int nr_pdflush_threads;
  * unless they implement their own.  Which is somewhat inefficient, as this
  * may prevent concurrent writeback against multiple devices.
  */
-static int writeback_acquire(struct backing_dev_info *bdi)
+static int writeback_acquire(struct bdi_writeback *wb)
 {
-	return !test_and_set_bit(BDI_pdflush, &bdi->state);
+	struct backing_dev_info *bdi = wb->bdi;
+
+	return !test_and_set_bit(wb->nr, &bdi->wb_active);
 }
 
 /**
@@ -59,19 +61,40 @@ static int writeback_acquire(struct backing_dev_info *bdi)
  */
 int writeback_in_progress(struct backing_dev_info *bdi)
 {
-	return test_bit(BDI_pdflush, &bdi->state);
+	return bdi->wb_active != 0;
 }
 
 /**
  * writeback_release - relinquish exclusive writeback access against a device.
  * @bdi: the device's backing_dev_info structure
  */
-static void writeback_release(struct backing_dev_info *bdi)
+static void writeback_release(struct bdi_writeback *wb)
 {
-	WARN_ON_ONCE(!writeback_in_progress(bdi));
-	bdi->wb_arg.nr_pages = 0;
-	bdi->wb_arg.sb = NULL;
-	clear_bit(BDI_pdflush, &bdi->state);
+	struct backing_dev_info *bdi = wb->bdi;
+
+	wb->nr_pages = 0;
+	wb->sb = NULL;
+	clear_bit(wb->nr, &bdi->wb_active);
+}
+
+static void wb_start_writeback(struct bdi_writeback *wb, struct super_block *sb,
+			       long nr_pages,
+			       enum writeback_sync_modes sync_mode)
+{
+	if (!wb_has_dirty_io(wb))
+		return;
+
+	if (writeback_acquire(wb)) {
+		wb->nr_pages = nr_pages;
+		wb->sb = sb;
+		wb->sync_mode = sync_mode;
+
+		/*
+		 * make above store seen before the task is woken
+		 */
+		smp_mb();
+		wake_up(&wb->wait);
+	}
 }
 
 int bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb,
@@ -81,22 +104,12 @@ int bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb,
 	 * This only happens the first time someone kicks this bdi, so put
 	 * it out-of-line.
 	 */
-	if (unlikely(!bdi->task)) {
+	if (unlikely(!bdi->wb.task)) {
 		bdi_add_default_flusher_task(bdi);
 		return 1;
 	}
 
-	if (writeback_acquire(bdi)) {
-		bdi->wb_arg.nr_pages = nr_pages;
-		bdi->wb_arg.sb = sb;
-		bdi->wb_arg.sync_mode = sync_mode;
-		/*
-		 * make above store seen before the task is woken
-		 */
-		smp_mb();
-		wake_up(&bdi->wait);
-	}
-
+	wb_start_writeback(&bdi->wb, sb, nr_pages, sync_mode);
 	return 0;
 }
 
@@ -124,12 +137,12 @@ int bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb,
  * older_than_this takes precedence over nr_to_write.  So we'll only write back
  * all dirty pages if they are all attached to "old" mappings.
  */
-static void bdi_kupdated(struct backing_dev_info *bdi)
+static void wb_kupdated(struct bdi_writeback *wb)
 {
 	unsigned long oldest_jif;
 	long nr_to_write;
 	struct writeback_control wbc = {
-		.bdi			= bdi,
+		.bdi			= wb->bdi,
 		.sync_mode		= WB_SYNC_NONE,
 		.older_than_this	= &oldest_jif,
 		.nr_to_write		= 0,
@@ -164,15 +177,19 @@ static inline bool over_bground_thresh(void)
 		global_page_state(NR_UNSTABLE_NFS) >= background_thresh);
 }
 
-static void bdi_pdflush(struct backing_dev_info *bdi)
+static void generic_sync_wb_inodes(struct bdi_writeback *wb,
+				   struct super_block *sb,
+				   struct writeback_control *wbc);
+
+static void wb_writeback(struct bdi_writeback *wb)
 {
 	struct writeback_control wbc = {
-		.bdi			= bdi,
-		.sync_mode		= bdi->wb_arg.sync_mode,
+		.bdi			= wb->bdi,
+		.sync_mode		= wb->sync_mode,
 		.older_than_this	= NULL,
 		.range_cyclic		= 1,
 	};
-	long nr_pages = bdi->wb_arg.nr_pages;
+	long nr_pages = wb->nr_pages;
 
 	for (;;) {
 		if (wbc.sync_mode == WB_SYNC_NONE && nr_pages <= 0 &&
@@ -183,7 +200,7 @@ static void bdi_pdflush(struct backing_dev_info *bdi)
 		wbc.encountered_congestion = 0;
 		wbc.nr_to_write = MAX_WRITEBACK_PAGES;
 		wbc.pages_skipped = 0;
-		generic_sync_bdi_inodes(bdi->wb_arg.sb, &wbc);
+		generic_sync_wb_inodes(wb, wb->sb, &wbc);
 		nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
 		/*
 		 * If we ran out of stuff to write, bail unless more_io got set
@@ -200,13 +217,13 @@ static void bdi_pdflush(struct backing_dev_info *bdi)
  * Handle writeback of dirty data for the device backed by this bdi. Also
  * wakes up periodically and does kupdated style flushing.
  */
-int bdi_writeback_task(struct backing_dev_info *bdi)
+int bdi_writeback_task(struct bdi_writeback *wb)
 {
 	while (!kthread_should_stop()) {
 		unsigned long wait_jiffies;
 		DEFINE_WAIT(wait);
 
-		prepare_to_wait(&bdi->wait, &wait, TASK_INTERRUPTIBLE);
+		prepare_to_wait(&wb->wait, &wait, TASK_INTERRUPTIBLE);
 		wait_jiffies = msecs_to_jiffies(dirty_writeback_interval * 10);
 		schedule_timeout(wait_jiffies);
 		try_to_freeze();
@@ -225,13 +242,13 @@ int bdi_writeback_task(struct backing_dev_info *bdi)
 		 *  pdflush style writeout.
 		 *
 		 */
-		if (writeback_acquire(bdi))
-			bdi_kupdated(bdi);
+		if (writeback_acquire(wb))
+			wb_kupdated(wb);
 		else
-			bdi_pdflush(bdi);
+			wb_writeback(wb);
 
-		writeback_release(bdi);
-		finish_wait(&bdi->wait, &wait);
+		writeback_release(wb);
+		finish_wait(&wb->wait, &wait);
 	}
 
 	return 0;
@@ -253,6 +270,14 @@ void bdi_writeback_all(struct super_block *sb, long nr_pages,
 	mutex_unlock(&bdi_lock);
 }
 
+/*
+ * We have only a single wb per bdi, so just return that.
+ */
+static inline struct bdi_writeback *inode_get_wb(struct inode *inode)
+{
+	return &inode_to_bdi(inode)->wb;
+}
+
 /**
  *	__mark_inode_dirty -	internal function
  *	@inode: inode to mark
@@ -351,9 +376,10 @@ void __mark_inode_dirty(struct inode *inode, int flags)
 		 * reposition it (that would break b_dirty time-ordering).
 		 */
 		if (!was_dirty) {
+			struct bdi_writeback *wb = inode_get_wb(inode);
+
 			inode->dirtied_when = jiffies;
-			list_move(&inode->i_list,
-					&inode_to_bdi(inode)->b_dirty);
+			list_move(&inode->i_list, &wb->b_dirty);
 		}
 	}
 out:
@@ -380,16 +406,16 @@ static int write_inode(struct inode *inode, int sync)
  */
 static void redirty_tail(struct inode *inode)
 {
-	struct backing_dev_info *bdi = inode_to_bdi(inode);
+	struct bdi_writeback *wb = inode_get_wb(inode);
 
-	if (!list_empty(&bdi->b_dirty)) {
+	if (!list_empty(&wb->b_dirty)) {
 		struct inode *tail;
 
-		tail = list_entry(bdi->b_dirty.next, struct inode, i_list);
+		tail = list_entry(wb->b_dirty.next, struct inode, i_list);
 		if (time_before(inode->dirtied_when, tail->dirtied_when))
 			inode->dirtied_when = jiffies;
 	}
-	list_move(&inode->i_list, &bdi->b_dirty);
+	list_move(&inode->i_list, &wb->b_dirty);
 }
 
 /*
@@ -397,7 +423,9 @@ static void redirty_tail(struct inode *inode)
  */
 static void requeue_io(struct inode *inode)
 {
-	list_move(&inode->i_list, &inode_to_bdi(inode)->b_more_io);
+	struct bdi_writeback *wb = inode_get_wb(inode);
+
+	list_move(&inode->i_list, &wb->b_more_io);
 }
 
 static void inode_sync_complete(struct inode *inode)
@@ -444,11 +472,10 @@ static void move_expired_inodes(struct list_head *delaying_queue,
 /*
  * Queue all expired dirty inodes for io, eldest first.
  */
-static void queue_io(struct backing_dev_info *bdi,
-		     unsigned long *older_than_this)
+static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this)
 {
-	list_splice_init(&bdi->b_more_io, bdi->b_io.prev);
-	move_expired_inodes(&bdi->b_dirty, &bdi->b_io, older_than_this);
+	list_splice_init(&wb->b_more_io, wb->b_io.prev);
+	move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this);
 }
 
 /*
@@ -609,20 +636,20 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
 	return __sync_single_inode(inode, wbc);
 }
 
-void generic_sync_bdi_inodes(struct super_block *sb,
-			     struct writeback_control *wbc)
+static void generic_sync_wb_inodes(struct bdi_writeback *wb,
+				   struct super_block *sb,
+				   struct writeback_control *wbc)
 {
 	const int is_blkdev_sb = sb_is_blkdev_sb(sb);
-	struct backing_dev_info *bdi = wbc->bdi;
 	const unsigned long start = jiffies;	/* livelock avoidance */
 
 	spin_lock(&inode_lock);
 
-	if (!wbc->for_kupdate || list_empty(&bdi->b_io))
-		queue_io(bdi, wbc->older_than_this);
+	if (!wbc->for_kupdate || list_empty(&wb->b_io))
+		queue_io(wb, wbc->older_than_this);
 
-	while (!list_empty(&bdi->b_io)) {
-		struct inode *inode = list_entry(bdi->b_io.prev,
+	while (!list_empty(&wb->b_io)) {
+		struct inode *inode = list_entry(wb->b_io.prev,
 						struct inode, i_list);
 		long pages_skipped;
 
@@ -634,7 +661,7 @@ void generic_sync_bdi_inodes(struct super_block *sb,
 			continue;
 		}
 
-		if (!bdi_cap_writeback_dirty(bdi)) {
+		if (!bdi_cap_writeback_dirty(wb->bdi)) {
 			redirty_tail(inode);
 			if (is_blkdev_sb) {
 				/*
@@ -656,7 +683,7 @@ void generic_sync_bdi_inodes(struct super_block *sb,
 			continue;
 		}
 
-		if (wbc->nonblocking && bdi_write_congested(bdi)) {
+		if (wbc->nonblocking && bdi_write_congested(wb->bdi)) {
 			wbc->encountered_congestion = 1;
 			if (!is_blkdev_sb)
 				break;		/* Skip a congested fs */
@@ -690,7 +717,7 @@ void generic_sync_bdi_inodes(struct super_block *sb,
 			wbc->more_io = 1;
 			break;
 		}
-		if (!list_empty(&bdi->b_more_io))
+		if (!list_empty(&wb->b_more_io))
 			wbc->more_io = 1;
 	}
 
@@ -698,6 +725,14 @@ void generic_sync_bdi_inodes(struct super_block *sb,
 	/* Leave any unwritten inodes on b_io */
 }
 
+void generic_sync_bdi_inodes(struct super_block *sb,
+			     struct writeback_control *wbc)
+{
+	struct backing_dev_info *bdi = wbc->bdi;
+
+	generic_sync_wb_inodes(&bdi->wb, sb, wbc);
+}
+
 /*
  * Write out a superblock's list of dirty inodes.  A wait will be performed
  * upon no inodes, all inodes or the final one, depending upon sync_mode.
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index f164925..77dc62c 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -24,8 +24,8 @@ struct dentry;
  * Bits in backing_dev_info.state
  */
 enum bdi_state {
-	BDI_pdflush,		/* A pdflush thread is working this device */
 	BDI_pending,		/* On its way to being activated */
+	BDI_wb_alloc,		/* Default embedded wb allocated */
 	BDI_async_congested,	/* The async (write) queue is getting full */
 	BDI_sync_congested,	/* The sync queue is getting full */
 	BDI_unused,		/* Available bits start here */
@@ -41,15 +41,23 @@ enum bdi_stat_item {
 
 #define BDI_STAT_BATCH (8*(1+ilog2(nr_cpu_ids)))
 
-struct bdi_writeback_arg {
-	unsigned long nr_pages;
-	struct super_block *sb;
+struct bdi_writeback {
+	struct backing_dev_info *bdi;		/* our parent bdi */
+	unsigned int nr;
+
+	struct task_struct	*task;		/* writeback task */
+	wait_queue_head_t	wait;
+	struct list_head	b_dirty;	/* dirty inodes */
+	struct list_head	b_io;		/* parked for writeback */
+	struct list_head	b_more_io;	/* parked for more writeback */
+
+	unsigned long		nr_pages;
+	struct super_block	*sb;
 	enum writeback_sync_modes sync_mode;
 };
 
 struct backing_dev_info {
 	struct list_head bdi_list;
-
 	unsigned long ra_pages;	/* max readahead in PAGE_CACHE_SIZE units */
 	unsigned long state;	/* Always use atomic bitops on this */
 	unsigned int capabilities; /* Device capabilities */
@@ -66,14 +74,11 @@ struct backing_dev_info {
 	unsigned int min_ratio;
 	unsigned int max_ratio, max_prop_frac;
 
-	struct device *dev;
+	struct bdi_writeback wb;  /* default writeback info for this bdi */
+	unsigned long wb_active;  /* bitmap of active tasks */
+	unsigned long wb_mask;	  /* number of registered tasks */
 
-	struct task_struct	*task;		/* writeback task */
-	wait_queue_head_t	wait;
-	struct bdi_writeback_arg wb_arg;	/* protected by BDI_pdflush */
-	struct list_head	b_dirty;	/* dirty inodes */
-	struct list_head	b_io;		/* parked for writeback */
-	struct list_head	b_more_io;	/* parked for more writeback */
+	struct device *dev;
 
 #ifdef CONFIG_DEBUG_FS
 	struct dentry *debug_dir;
@@ -90,19 +95,20 @@ int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev);
 void bdi_unregister(struct backing_dev_info *bdi);
 int bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb,
 			 long nr_pages, enum writeback_sync_modes sync_mode);
-int bdi_writeback_task(struct backing_dev_info *bdi);
+int bdi_writeback_task(struct bdi_writeback *wb);
 void bdi_writeback_all(struct super_block *sb, long nr_pages,
 			enum writeback_sync_modes sync_mode);
 void bdi_add_default_flusher_task(struct backing_dev_info *bdi);
+int bdi_has_dirty_io(struct backing_dev_info *bdi);
 
 extern struct mutex bdi_lock;
 extern struct list_head bdi_list;
 
-static inline int bdi_has_dirty_io(struct backing_dev_info *bdi)
+static inline int wb_has_dirty_io(struct bdi_writeback *wb)
 {
-	return !list_empty(&bdi->b_dirty) ||
-	       !list_empty(&bdi->b_io) ||
-	       !list_empty(&bdi->b_more_io);
+	return !list_empty(&wb->b_dirty) ||
+	       !list_empty(&wb->b_io) ||
+	       !list_empty(&wb->b_more_io);
 }
 
 static inline void __add_bdi_stat(struct backing_dev_info *bdi,
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index bae3d4f..c8201f0 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -199,10 +199,46 @@ static int __init default_bdi_init(void)
 }
 subsys_initcall(default_bdi_init);
 
+static void bdi_wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi)
+{
+	memset(wb, 0, sizeof(*wb));
+
+	wb->bdi = bdi;
+	init_waitqueue_head(&wb->wait);
+	INIT_LIST_HEAD(&wb->b_dirty);
+	INIT_LIST_HEAD(&wb->b_io);
+	INIT_LIST_HEAD(&wb->b_more_io);
+}
+
+static int wb_assign_nr(struct backing_dev_info *bdi, struct bdi_writeback *wb)
+{
+	set_bit(0, &bdi->wb_mask);
+	wb->nr = 0;
+	return 0;
+}
+
+static void bdi_put_wb(struct backing_dev_info *bdi, struct bdi_writeback *wb)
+{
+	clear_bit(wb->nr, &bdi->wb_mask);
+	clear_bit(BDI_wb_alloc, &bdi->state);
+}
+
+static struct bdi_writeback *bdi_new_wb(struct backing_dev_info *bdi)
+{
+	struct bdi_writeback *wb;
+
+	set_bit(BDI_wb_alloc, &bdi->state);
+	wb = &bdi->wb;
+	wb_assign_nr(bdi, wb);
+	return wb;
+}
+
 static int bdi_start_fn(void *ptr)
 {
-	struct backing_dev_info *bdi = ptr;
+	struct bdi_writeback *wb = ptr;
+	struct backing_dev_info *bdi = wb->bdi;
 	struct task_struct *tsk = current;
+	int ret;
 
 	/*
 	 * Add us to the active bdi_list
@@ -226,7 +262,15 @@ static int bdi_start_fn(void *ptr)
 	smp_mb__after_clear_bit();
 	wake_up_bit(&bdi->state, BDI_pending);
 
-	return bdi_writeback_task(bdi);
+	ret = bdi_writeback_task(wb);
+
+	bdi_put_wb(bdi, wb);
+	return ret;
+}
+
+int bdi_has_dirty_io(struct backing_dev_info *bdi)
+{
+	return wb_has_dirty_io(&bdi->wb);
 }
 
 static void bdi_flush_io(struct backing_dev_info *bdi)
@@ -244,11 +288,12 @@ static void bdi_flush_io(struct backing_dev_info *bdi)
 
 static int bdi_forker_task(void *ptr)
 {
-	struct backing_dev_info *me = ptr;
+	struct bdi_writeback *me = ptr;
 	DEFINE_WAIT(wait);
 
 	for (;;) {
 		struct backing_dev_info *bdi, *tmp;
+		struct bdi_writeback *wb;
 
 		/*
 		 * Do this periodically, like kupdated() did before.
@@ -259,8 +304,8 @@ static int bdi_forker_task(void *ptr)
 		 * Temporary measure, we want to make sure we don't see
 		 * dirty data on the default backing_dev_info
 		 */
-		if (bdi_has_dirty_io(me))
-			bdi_flush_io(me);
+		if (wb_has_dirty_io(me))
+			bdi_flush_io(me->bdi);
 
 		prepare_to_wait(&me->wait, &wait, TASK_INTERRUPTIBLE);
 
@@ -271,7 +316,7 @@ static int bdi_forker_task(void *ptr)
 		 * a thread registered. If so, set that up.
 		 */
 		list_for_each_entry_safe(bdi, tmp, &bdi_list, bdi_list) {
-			if (bdi->task || !bdi_has_dirty_io(bdi))
+			if (bdi->wb.task || !bdi_has_dirty_io(bdi))
 				continue;
 
 			bdi_add_default_flusher_task(bdi);
@@ -292,17 +337,22 @@ static int bdi_forker_task(void *ptr)
 		list_del_init(&bdi->bdi_list);
 		mutex_unlock(&bdi_lock);
 
-		BUG_ON(bdi->task);
+		wb = bdi_new_wb(bdi);
+		if (!wb)
+			goto readd_flush;
 
-		bdi->task = kthread_run(bdi_start_fn, bdi, "bdi-%s",
+		wb->task = kthread_run(bdi_start_fn, wb, "bdi-%s",
 					dev_name(bdi->dev));
+
 		/*
 		 * If task creation fails, then readd the bdi to
 		 * the pending list and force writeout of the bdi
 		 * from this forker thread. That will free some memory
 		 * and we can try again.
 		 */
-		if (!bdi->task) {
+		if (!wb->task) {
+			bdi_put_wb(bdi, wb);
+readd_flush:
 			/*
 			 * Add this 'bdi' to the back, so we get
 			 * a chance to flush other bdi's to free
@@ -320,8 +370,18 @@ static int bdi_forker_task(void *ptr)
 	return 0;
 }
 
+/*
+ * Add a new flusher task that gets created for any bdi
+ * that has dirty data pending writeout
+ */
 void bdi_add_default_flusher_task(struct backing_dev_info *bdi)
 {
+	if (!bdi_cap_writeback_dirty(bdi))
+		return;
+
+	/*
+	 * Someone already marked this pending for task creation
+	 */
 	if (test_and_set_bit(BDI_pending, &bdi->state))
 		return;
 
@@ -329,7 +389,7 @@ void bdi_add_default_flusher_task(struct backing_dev_info *bdi)
 	list_move_tail(&bdi->bdi_list, &bdi_pending_list);
 	mutex_unlock(&bdi_lock);
 
-	wake_up(&default_backing_dev_info.wait);
+	wake_up(&default_backing_dev_info.wb.wait);
 }
 
 int bdi_register(struct backing_dev_info *bdi, struct device *parent,
@@ -362,13 +422,23 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
 	 * on-demand when they need it.
 	 */
 	if (bdi_cap_flush_forker(bdi)) {
-		bdi->task = kthread_run(bdi_forker_task, bdi, "bdi-%s",
+		struct bdi_writeback *wb;
+
+		wb = bdi_new_wb(bdi);
+		if (!wb) {
+			ret = -ENOMEM;
+			goto remove_err;
+		}
+
+		wb->task = kthread_run(bdi_forker_task, wb, "bdi-%s",
 						dev_name(dev));
-		if (!bdi->task) {
+		if (!wb->task) {
+			bdi_put_wb(bdi, wb);
+			ret = -ENOMEM;
+remove_err:
 			mutex_lock(&bdi_lock);
 			list_del(&bdi->bdi_list);
 			mutex_unlock(&bdi_lock);
-			ret = -ENOMEM;
 			goto exit;
 		}
 	}
@@ -391,28 +461,37 @@ static int sched_wait(void *word)
 	return 0;
 }
 
+/*
+ * Remove bdi from global list and shutdown any threads we have running
+ */
 static void bdi_wb_shutdown(struct backing_dev_info *bdi)
 {
+	if (!bdi_cap_writeback_dirty(bdi))
+		return;
+
 	/*
 	 * If setup is pending, wait for that to complete first
 	 */
 	wait_on_bit(&bdi->state, BDI_pending, sched_wait, TASK_UNINTERRUPTIBLE);
 
+	/*
+	 * Make sure nobody finds us on the bdi_list anymore
+	 */
 	mutex_lock(&bdi_lock);
 	list_del(&bdi->bdi_list);
 	mutex_unlock(&bdi_lock);
+
+	/*
+	 * Finally, kill the kernel thread
+	 */
+	kthread_stop(bdi->wb.task);
 }
 
 void bdi_unregister(struct backing_dev_info *bdi)
 {
 	if (bdi->dev) {
-		if (!bdi_cap_flush_forker(bdi)) {
+		if (!bdi_cap_flush_forker(bdi))
 			bdi_wb_shutdown(bdi);
-			if (bdi->task) {
-				kthread_stop(bdi->task);
-				bdi->task = NULL;
-			}
-		}
 		bdi_debug_unregister(bdi);
 		device_unregister(bdi->dev);
 		bdi->dev = NULL;
@@ -429,11 +508,10 @@ int bdi_init(struct backing_dev_info *bdi)
 	bdi->min_ratio = 0;
 	bdi->max_ratio = 100;
 	bdi->max_prop_frac = PROP_FRAC_BASE;
-	init_waitqueue_head(&bdi->wait);
 	INIT_LIST_HEAD(&bdi->bdi_list);
-	INIT_LIST_HEAD(&bdi->b_io);
-	INIT_LIST_HEAD(&bdi->b_dirty);
-	INIT_LIST_HEAD(&bdi->b_more_io);
+	bdi->wb_mask = bdi->wb_active = 0;
+
+	bdi_wb_init(&bdi->wb, bdi);
 
 	for (i = 0; i < NR_BDI_STAT_ITEMS; i++) {
 		err = percpu_counter_init(&bdi->bdi_stat[i], 0);
@@ -458,9 +536,7 @@ void bdi_destroy(struct backing_dev_info *bdi)
 {
 	int i;
 
-	WARN_ON(!list_empty(&bdi->b_dirty));
-	WARN_ON(!list_empty(&bdi->b_io));
-	WARN_ON(!list_empty(&bdi->b_more_io));
+	WARN_ON(bdi_has_dirty_io(bdi));
 
 	bdi_unregister(bdi);
 
-- 
1.6.3.rc0.1.gf800


  parent reply	other threads:[~2009-05-26  9:35 UTC|newest]

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-05-26  9:33 [PATCH 0/12] Per-bdi writeback flusher threads v7 Jens Axboe
2009-05-26  9:33 ` [PATCH 01/12] ntfs: remove old debug check for dirty data in ntfs_put_super() Jens Axboe
2009-05-26  9:33 ` [PATCH 02/12] btrfs: properly register fs backing device Jens Axboe
2009-05-26  9:33 ` [PATCH 03/12] writeback: move dirty inodes from super_block to backing_dev_info Jens Axboe
2009-05-26  9:33 ` [PATCH 04/12] writeback: switch to per-bdi threads for flushing data Jens Axboe
2009-05-26  9:33 ` [PATCH 05/12] writeback: get rid of pdflush completely Jens Axboe
2009-05-26  9:33 ` Jens Axboe [this message]
2009-05-26  9:33 ` [PATCH 07/12] writeback: support > 1 flusher thread per bdi Jens Axboe
2009-06-04 17:44   ` Paul E. McKenney
2009-06-04 19:48     ` Jens Axboe
2009-05-26  9:33 ` [PATCH 08/12] writeback: include default_backing_dev_info in writeback Jens Axboe
2009-05-26  9:33 ` [PATCH 09/12] writeback: allow sleepy exit of default writeback task Jens Axboe
2009-05-26  9:33 ` [PATCH 10/12] writeback: add some debug inode list counters to bdi stats Jens Axboe
2009-05-26  9:33 ` [PATCH 11/12] writeback: add name to backing_dev_info Jens Axboe
2009-05-26  9:33 ` [PATCH 12/12] writeback: check for registered bdi in flusher add and inode dirty Jens Axboe
2009-05-26 15:25 ` [PATCH 0/12] Per-bdi writeback flusher threads v7 Damien Wyart
2009-05-26 16:41   ` Jens Axboe
2009-05-26 17:08     ` Damien Wyart
2009-05-26 17:10       ` Damien Wyart
2009-05-26 20:47       ` Jens Axboe
2009-05-26 21:11         ` Jens Axboe
2009-05-27  5:21           ` Damien Wyart
2009-05-27  5:49             ` Damien Wyart
2009-05-27  9:20               ` Jens Axboe
2009-05-27 13:15                 ` Damien Wyart
2009-05-27 15:05                   ` Jens Axboe
2009-05-27 21:06                     ` Andrew Morton
2009-05-28 10:20                       ` Jens Axboe
2009-05-27  6:17             ` Jens Axboe
2009-05-27  5:27 ` Zhang, Yanmin
2009-05-27  6:17   ` Jens Axboe
2009-06-02  2:07     ` Zhang, Yanmin
2009-06-02 11:53       ` Jens Axboe
  -- strict thread matches above, loose matches on Subject: below --
2009-05-25  7:34 [PATCH 0/12] Per-bdi writeback flusher threads #5 Jens Axboe
2009-05-25  7:34 ` [PATCH 06/12] writeback: separate the flushing state/task from the bdi Jens Axboe
2009-05-25 10:13   ` Jan Kara
2009-05-25 10:36     ` Jens Axboe
2009-05-25  7:30 [PATCH 0/12] Per-bdi writeback flusher threads #5 Jens Axboe
2009-05-25  7:30 ` [PATCH 06/12] writeback: separate the flushing state/task from the bdi Jens Axboe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1243330430-9964-7-git-send-email-jens.axboe@oracle.com \
    --to=jens.axboe@oracle.com \
    --cc=akpm@linux-foundation.org \
    --cc=chris.mason@oracle.com \
    --cc=david@fromorbit.com \
    --cc=hch@infradead.org \
    --cc=jack@suse.cz \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=richard@rsk.demon.co.uk \
    --cc=yanmin_zhang@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.