All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2 v2] writeback: Add a 'reason' to wb_writeback_work
@ 2011-08-12 22:47 ` Curt Wohlgemuth
  0 siblings, 0 replies; 27+ messages in thread
From: Curt Wohlgemuth @ 2011-08-12 22:47 UTC (permalink / raw)
  To: Christoph Hellwig, Wu Fengguang, Jan Kara, Andrew Morton, Dave Chinner
  Cc: linux-fsdevel, linux-mm, Curt Wohlgemuth

This creates a new 'reason' field in a wb_writeback_work
structure, which unambiguously identifies who initiates
writeback activity.  A 'wb_stats' enumeration has been added
to writeback.h, to enumerate the possible reasons.

The 'writeback_work_class' tracepoint event class is updated
to include the symbolic 'reason' in all trace events.

The 'writeback_queue_io' tracepoint now takes a work object,
in order to print out the 'reason' for queue_io.

And the 'writeback_inodes_sbXXX' family of routines has had
a wb_stats parameter added to them, so callers can specify
why writeback is being started.

Signed-off-by: Curt Wohlgemuth <curtw@google.com>
Acked-by: Jan Kara <jack@suse.cz>
---

Changelog since v1:

  - Added a "enum wb_stats" parameter to writeback_inodes_wb(),
    instead of assuming that the reason for this is always
    WB_STAT_BALANCE_DIRTY

  - Removed change around the call to writeback_inodes_wb() in
    balance_dirty_pages(), meant for patch 2/2 in this series.


 fs/btrfs/extent-tree.c           |    3 +-
 fs/buffer.c                      |    2 +-
 fs/ext4/inode.c                  |    2 +-
 fs/fs-writeback.c                |   49 ++++++++++++++++++++++++--------------
 fs/quota/quota.c                 |    2 +-
 fs/sync.c                        |    4 +-
 fs/ubifs/budget.c                |    2 +-
 include/linux/backing-dev.h      |    3 +-
 include/linux/writeback.h        |   33 +++++++++++++++++++++----
 include/trace/events/writeback.h |   37 +++++++++++++++++++++-------
 mm/backing-dev.c                 |    3 +-
 mm/page-writeback.c              |    6 +++-
 mm/vmscan.c                      |    4 +-
 13 files changed, 104 insertions(+), 46 deletions(-)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 66bac22..5d085a9 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3332,7 +3332,8 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
 		smp_mb();
 		nr_pages = min_t(unsigned long, nr_pages,
 		       root->fs_info->delalloc_bytes >> PAGE_CACHE_SHIFT);
-		writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages);
+		writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages,
+						WB_STAT_FS_FREE_SPACE);
 
 		spin_lock(&space_info->lock);
 		if (reserved > space_info->bytes_reserved)
diff --git a/fs/buffer.c b/fs/buffer.c
index 1a80b04..cfca642 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -285,7 +285,7 @@ static void free_more_memory(void)
 	struct zone *zone;
 	int nid;
 
-	wakeup_flusher_threads(1024);
+	wakeup_flusher_threads(1024, WB_STAT_FREE_MORE_MEM);
 	yield();
 
 	for_each_online_node(nid) {
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 8fdc298..62aceb7 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2200,7 +2200,7 @@ static int ext4_nonda_switch(struct super_block *sb)
 	 * start pushing delalloc when 1/2 of free blocks are dirty.
 	 */
 	if (free_blocks < 2 * dirty_blocks)
-		writeback_inodes_sb_if_idle(sb);
+		writeback_inodes_sb_if_idle(sb, WB_STAT_FS_FREE_SPACE);
 
 	return 0;
 }
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 04cf3b9..70aa19d 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -41,6 +41,7 @@ struct wb_writeback_work {
 	unsigned int for_kupdate:1;
 	unsigned int range_cyclic:1;
 	unsigned int for_background:1;
+	enum wb_stats reason;		/* why was writeback initiated? */
 
 	struct list_head list;		/* pending work list */
 	struct completion *done;	/* set if the caller waits */
@@ -115,7 +116,7 @@ static void bdi_queue_work(struct backing_dev_info *bdi,
 
 static void
 __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
-		      bool range_cyclic)
+		      bool range_cyclic, enum wb_stats stat)
 {
 	struct wb_writeback_work *work;
 
@@ -135,6 +136,7 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
 	work->sync_mode	= WB_SYNC_NONE;
 	work->nr_pages	= nr_pages;
 	work->range_cyclic = range_cyclic;
+	work->reason = stat;
 
 	bdi_queue_work(bdi, work);
 }
@@ -150,9 +152,10 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
  *   completion. Caller need not hold sb s_umount semaphore.
  *
  */
-void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages)
+void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
+			enum wb_stats stat)
 {
-	__bdi_start_writeback(bdi, nr_pages, true);
+	__bdi_start_writeback(bdi, nr_pages, true, stat);
 }
 
 /**
@@ -302,13 +305,14 @@ out:
  *                                           |
  *                                           +--> dequeue for IO
  */
-static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this)
+static void queue_io(struct bdi_writeback *wb, struct wb_writeback_work *work)
 {
 	int moved;
 	assert_spin_locked(&wb->list_lock);
 	list_splice_init(&wb->b_more_io, &wb->b_io);
-	moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this);
-	trace_writeback_queue_io(wb, older_than_this, moved);
+	moved = move_expired_inodes(&wb->b_dirty, &wb->b_io,
+					work->older_than_this);
+	trace_writeback_queue_io(wb, work, moved);
 }
 
 static int write_inode(struct inode *inode, struct writeback_control *wbc)
@@ -641,17 +645,19 @@ static long __writeback_inodes_wb(struct bdi_writeback *wb,
 	return wrote;
 }
 
-long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages)
+long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages,
+				enum wb_stats stat)
 {
 	struct wb_writeback_work work = {
 		.nr_pages	= nr_pages,
 		.sync_mode	= WB_SYNC_NONE,
 		.range_cyclic	= 1,
+		.reason		= stat,
 	};
 
 	spin_lock(&wb->list_lock);
 	if (list_empty(&wb->b_io))
-		queue_io(wb, NULL);
+		queue_io(wb, &work);
 	__writeback_inodes_wb(wb, &work);
 	spin_unlock(&wb->list_lock);
 
@@ -738,7 +744,7 @@ static long wb_writeback(struct bdi_writeback *wb,
 
 		trace_writeback_start(wb->bdi, work);
 		if (list_empty(&wb->b_io))
-			queue_io(wb, work->older_than_this);
+			queue_io(wb, work);
 		if (work->sb)
 			progress = writeback_sb_inodes(work->sb, wb, work);
 		else
@@ -818,6 +824,7 @@ static long wb_check_background_flush(struct bdi_writeback *wb)
 			.sync_mode	= WB_SYNC_NONE,
 			.for_background	= 1,
 			.range_cyclic	= 1,
+			.reason		= WB_STAT_BG_WRITEOUT,
 		};
 
 		return wb_writeback(wb, &work);
@@ -851,6 +858,7 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb)
 			.sync_mode	= WB_SYNC_NONE,
 			.for_kupdate	= 1,
 			.range_cyclic	= 1,
+			.reason		= WB_STAT_KUPDATE,
 		};
 
 		return wb_writeback(wb, &work);
@@ -969,7 +977,7 @@ int bdi_writeback_thread(void *data)
  * Start writeback of `nr_pages' pages.  If `nr_pages' is zero, write back
  * the whole world.
  */
-void wakeup_flusher_threads(long nr_pages)
+void wakeup_flusher_threads(long nr_pages, enum wb_stats stat)
 {
 	struct backing_dev_info *bdi;
 
@@ -982,7 +990,7 @@ void wakeup_flusher_threads(long nr_pages)
 	list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
 		if (!bdi_has_dirty_io(bdi))
 			continue;
-		__bdi_start_writeback(bdi, nr_pages, false);
+		__bdi_start_writeback(bdi, nr_pages, false, stat);
 	}
 	rcu_read_unlock();
 }
@@ -1203,7 +1211,9 @@ static void wait_sb_inodes(struct super_block *sb)
  * on how many (if any) will be written, and this function does not wait
  * for IO completion of submitted IO.
  */
-void writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr)
+void writeback_inodes_sb_nr(struct super_block *sb,
+			    unsigned long nr,
+			    enum wb_stats stat)
 {
 	DECLARE_COMPLETION_ONSTACK(done);
 	struct wb_writeback_work work = {
@@ -1212,6 +1222,7 @@ void writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr)
 		.tagged_writepages	= 1,
 		.done			= &done,
 		.nr_pages		= nr,
+		.reason			= stat,
 	};
 
 	WARN_ON(!rwsem_is_locked(&sb->s_umount));
@@ -1228,9 +1239,9 @@ EXPORT_SYMBOL(writeback_inodes_sb_nr);
  * on how many (if any) will be written, and this function does not wait
  * for IO completion of submitted IO.
  */
-void writeback_inodes_sb(struct super_block *sb)
+void writeback_inodes_sb(struct super_block *sb, enum wb_stats stat)
 {
-	return writeback_inodes_sb_nr(sb, get_nr_dirty_pages());
+	return writeback_inodes_sb_nr(sb, get_nr_dirty_pages(), stat);
 }
 EXPORT_SYMBOL(writeback_inodes_sb);
 
@@ -1241,11 +1252,11 @@ EXPORT_SYMBOL(writeback_inodes_sb);
  * Invoke writeback_inodes_sb if no writeback is currently underway.
  * Returns 1 if writeback was started, 0 if not.
  */
-int writeback_inodes_sb_if_idle(struct super_block *sb)
+int writeback_inodes_sb_if_idle(struct super_block *sb, enum wb_stats stat)
 {
 	if (!writeback_in_progress(sb->s_bdi)) {
 		down_read(&sb->s_umount);
-		writeback_inodes_sb(sb);
+		writeback_inodes_sb(sb, stat);
 		up_read(&sb->s_umount);
 		return 1;
 	} else
@@ -1262,11 +1273,12 @@ EXPORT_SYMBOL(writeback_inodes_sb_if_idle);
  * Returns 1 if writeback was started, 0 if not.
  */
 int writeback_inodes_sb_nr_if_idle(struct super_block *sb,
-				   unsigned long nr)
+				   unsigned long nr,
+				   enum wb_stats stat)
 {
 	if (!writeback_in_progress(sb->s_bdi)) {
 		down_read(&sb->s_umount);
-		writeback_inodes_sb_nr(sb, nr);
+		writeback_inodes_sb_nr(sb, nr, stat);
 		up_read(&sb->s_umount);
 		return 1;
 	} else
@@ -1290,6 +1302,7 @@ void sync_inodes_sb(struct super_block *sb)
 		.nr_pages	= LONG_MAX,
 		.range_cyclic	= 0,
 		.done		= &done,
+		.reason		= WB_STAT_SYNC,
 	};
 
 	WARN_ON(!rwsem_is_locked(&sb->s_umount));
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index b34bdb2..a3d158c 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -286,7 +286,7 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
 		/* caller already holds s_umount */
 		if (sb->s_flags & MS_RDONLY)
 			return -EROFS;
-		writeback_inodes_sb(sb);
+		writeback_inodes_sb(sb, WB_STAT_SYNC);
 		return 0;
 	default:
 		return -EINVAL;
diff --git a/fs/sync.c b/fs/sync.c
index c98a747..b333bb4 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -43,7 +43,7 @@ static int __sync_filesystem(struct super_block *sb, int wait)
 	if (wait)
 		sync_inodes_sb(sb);
 	else
-		writeback_inodes_sb(sb);
+		writeback_inodes_sb(sb, WB_STAT_SYNC);
 
 	if (sb->s_op->sync_fs)
 		sb->s_op->sync_fs(sb, wait);
@@ -98,7 +98,7 @@ static void sync_filesystems(int wait)
  */
 SYSCALL_DEFINE0(sync)
 {
-	wakeup_flusher_threads(0);
+	wakeup_flusher_threads(0, WB_STAT_SYNC);
 	sync_filesystems(0);
 	sync_filesystems(1);
 	if (unlikely(laptop_mode))
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index 315de66..9a392a0 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -63,7 +63,7 @@
 static void shrink_liability(struct ubifs_info *c, int nr_to_write)
 {
 	down_read(&c->vfs_sb->s_umount);
-	writeback_inodes_sb(c->vfs_sb);
+	writeback_inodes_sb(c->vfs_sb, WB_STAT_FS_FREE_SPACE);
 	up_read(&c->vfs_sb->s_umount);
 }
 
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 3b2f9cb..9da81ef 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -107,7 +107,8 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
 int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev);
 void bdi_unregister(struct backing_dev_info *bdi);
 int bdi_setup_and_register(struct backing_dev_info *, char *, unsigned int);
-void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages);
+void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
+			enum wb_stats stat);
 void bdi_start_background_writeback(struct backing_dev_info *bdi);
 int bdi_writeback_thread(void *data);
 int bdi_has_dirty_io(struct backing_dev_info *bdi);
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index f1bfa12e..a70696d 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -50,6 +50,24 @@ enum writeback_sync_modes {
 };
 
 /*
+ * why this writeback was initiated
+ */
+enum wb_stats {
+	/* The following are counts of pages written for a specific cause */
+	WB_STAT_BALANCE_DIRTY,
+	WB_STAT_BG_WRITEOUT,
+	WB_STAT_TRY_TO_FREE_PAGES,
+	WB_STAT_SYNC,
+	WB_STAT_KUPDATE,
+	WB_STAT_LAPTOP_TIMER,
+	WB_STAT_FREE_MORE_MEM,
+	WB_STAT_FS_FREE_SPACE,
+	WB_STAT_FORKER_THREAD,
+
+	WB_STAT_MAX,
+};
+
+/*
  * A control structure which tells the writeback code what to do.  These are
  * always on the stack, and hence need no locking.  They are always initialised
  * in a manner such that unspecified fields are set to zero.
@@ -80,14 +98,17 @@ struct writeback_control {
  */	
 struct bdi_writeback;
 int inode_wait(void *);
-void writeback_inodes_sb(struct super_block *);
-void writeback_inodes_sb_nr(struct super_block *, unsigned long nr);
-int writeback_inodes_sb_if_idle(struct super_block *);
-int writeback_inodes_sb_nr_if_idle(struct super_block *, unsigned long nr);
+void writeback_inodes_sb(struct super_block *, enum wb_stats stat);
+void writeback_inodes_sb_nr(struct super_block *, unsigned long nr,
+							enum wb_stats stat);
+int writeback_inodes_sb_if_idle(struct super_block *, enum wb_stats stat);
+int writeback_inodes_sb_nr_if_idle(struct super_block *, unsigned long nr,
+							enum wb_stats stat);
 void sync_inodes_sb(struct super_block *);
-long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages);
+long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages,
+				enum wb_stats stat);
 long wb_do_writeback(struct bdi_writeback *wb, int force_wait);
-void wakeup_flusher_threads(long nr_pages);
+void wakeup_flusher_threads(long nr_pages, enum wb_stats stat);
 
 /* writeback.h requires fs.h; it, too, is not included from here. */
 static inline void wait_on_inode(struct inode *inode)
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index 6bca4cc..dbb8330 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -21,6 +21,18 @@
 		{I_REFERENCED,		"I_REFERENCED"}		\
 	)
 
+#define show_work_reason(reason)					\
+	__print_symbolic(reason,					\
+		{WB_STAT_BALANCE_DIRTY,		"balance_dirty"},	\
+		{WB_STAT_BG_WRITEOUT,		"background"},		\
+		{WB_STAT_TRY_TO_FREE_PAGES,	"try_to_free_pages"},	\
+		{WB_STAT_SYNC,			"sync"},		\
+		{WB_STAT_KUPDATE,		"periodic"},		\
+		{WB_STAT_LAPTOP_TIMER,		"laptop_timer"},	\
+		{WB_STAT_FREE_MORE_MEM,		"free_more_memory"},	\
+		{WB_STAT_FS_FREE_SPACE,		"FS_free_space"}	\
+	)
+
 struct wb_writeback_work;
 
 DECLARE_EVENT_CLASS(writeback_work_class,
@@ -34,6 +46,7 @@ DECLARE_EVENT_CLASS(writeback_work_class,
 		__field(int, for_kupdate)
 		__field(int, range_cyclic)
 		__field(int, for_background)
+		__field(int, reason)
 	),
 	TP_fast_assign(
 		strncpy(__entry->name, dev_name(bdi->dev), 32);
@@ -43,16 +56,18 @@ DECLARE_EVENT_CLASS(writeback_work_class,
 		__entry->for_kupdate = work->for_kupdate;
 		__entry->range_cyclic = work->range_cyclic;
 		__entry->for_background	= work->for_background;
+		__entry->reason = work->reason;
 	),
 	TP_printk("bdi %s: sb_dev %d:%d nr_pages=%ld sync_mode=%d "
-		  "kupdate=%d range_cyclic=%d background=%d",
+		  "kupdate=%d range_cyclic=%d background=%d reason=%s",
 		  __entry->name,
 		  MAJOR(__entry->sb_dev), MINOR(__entry->sb_dev),
 		  __entry->nr_pages,
 		  __entry->sync_mode,
 		  __entry->for_kupdate,
 		  __entry->range_cyclic,
-		  __entry->for_background
+		  __entry->for_background,
+		  show_work_reason(__entry->reason)
 	)
 );
 #define DEFINE_WRITEBACK_WORK_EVENT(name) \
@@ -181,27 +196,31 @@ DEFINE_WBC_EVENT(wbc_writepage);
 
 TRACE_EVENT(writeback_queue_io,
 	TP_PROTO(struct bdi_writeback *wb,
-		 unsigned long *older_than_this,
+		 struct wb_writeback_work *work,
 		 int moved),
-	TP_ARGS(wb, older_than_this, moved),
+	TP_ARGS(wb, work, moved),
 	TP_STRUCT__entry(
 		__array(char,		name, 32)
 		__field(unsigned long,	older)
 		__field(long,		age)
 		__field(int,		moved)
+		__field(int,		reason)
 	),
 	TP_fast_assign(
 		strncpy(__entry->name, dev_name(wb->bdi->dev), 32);
-		__entry->older	= older_than_this ?  *older_than_this : 0;
-		__entry->age	= older_than_this ?
-				  (jiffies - *older_than_this) * 1000 / HZ : -1;
+		__entry->older	= work->older_than_this ?
+						*work->older_than_this : 0;
+		__entry->age	= work->older_than_this ?
+			  (jiffies - *work->older_than_this) * 1000 / HZ : -1;
 		__entry->moved	= moved;
+		__entry->reason	= work->reason;
 	),
-	TP_printk("bdi %s: older=%lu age=%ld enqueue=%d",
+	TP_printk("bdi %s: older=%lu age=%ld enqueue=%d reason=%s",
 		__entry->name,
 		__entry->older,	/* older_than_this in jiffies */
 		__entry->age,	/* older_than_this in relative milliseconds */
-		__entry->moved)
+		__entry->moved,
+		show_work_reason(__entry->reason))
 );
 
 TRACE_EVENT(global_dirty_state,
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index d6edf8d..63b3b29 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -456,7 +456,8 @@ static int bdi_forker_thread(void *ptr)
 				 * the bdi from the thread. Hopefully 1024 is
 				 * large enough for efficient IO.
 				 */
-				writeback_inodes_wb(&bdi->wb, 1024);
+				writeback_inodes_wb(&bdi->wb, 1024,
+							WB_STAT_FORKER_THREAD);
 			} else {
 				/*
 				 * The spinlock makes sure we do not lose
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index d196074..5503461 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -738,7 +738,8 @@ static void balance_dirty_pages(struct address_space *mapping,
 		trace_balance_dirty_start(bdi);
 		if (bdi_nr_reclaimable > task_bdi_thresh) {
 			pages_written += writeback_inodes_wb(&bdi->wb,
-							     write_chunk);
+						write_chunk,
+						WB_STAT_BALANCE_DIRTY);
 			trace_balance_dirty_written(bdi, pages_written);
 			if (pages_written >= write_chunk)
 				break;		/* We've done our duty */
@@ -909,7 +910,8 @@ void laptop_mode_timer_fn(unsigned long data)
 	 * threshold
 	 */
 	if (bdi_has_dirty_io(&q->backing_dev_info))
-		bdi_start_writeback(&q->backing_dev_info, nr_pages);
+		bdi_start_writeback(&q->backing_dev_info, nr_pages,
+					WB_STAT_LAPTOP_TIMER);
 }
 
 /*
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 7ef6912..4a882ca 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -495,7 +495,6 @@ static pageout_t pageout(struct page *page, struct address_space *mapping,
 			ClearPageReclaim(page);
 			return PAGE_ACTIVATE;
 		}
-
 		/*
 		 * Wait on writeback if requested to. This happens when
 		 * direct reclaiming a large contiguous area and the
@@ -2198,7 +2197,8 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
 		 */
 		writeback_threshold = sc->nr_to_reclaim + sc->nr_to_reclaim / 2;
 		if (total_scanned > writeback_threshold) {
-			wakeup_flusher_threads(laptop_mode ? 0 : total_scanned);
+			wakeup_flusher_threads(laptop_mode ? 0 : total_scanned,
+						WB_STAT_TRY_TO_FREE_PAGES);
 			sc->may_writepage = 1;
 		}
 
-- 
1.7.3.1

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 1/2 v2] writeback: Add a 'reason' to wb_writeback_work
@ 2011-08-12 22:47 ` Curt Wohlgemuth
  0 siblings, 0 replies; 27+ messages in thread
From: Curt Wohlgemuth @ 2011-08-12 22:47 UTC (permalink / raw)
  To: Christoph Hellwig, Wu Fengguang, Jan Kara, Andrew Morton,
	Dave Chinner, Michael Rubin
  Cc: linux-fsdevel, linux-mm, Curt Wohlgemuth

This creates a new 'reason' field in a wb_writeback_work
structure, which unambiguously identifies who initiates
writeback activity.  A 'wb_stats' enumeration has been added
to writeback.h, to enumerate the possible reasons.

The 'writeback_work_class' tracepoint event class is updated
to include the symbolic 'reason' in all trace events.

The 'writeback_queue_io' tracepoint now takes a work object,
in order to print out the 'reason' for queue_io.

And the 'writeback_inodes_sbXXX' family of routines has had
a wb_stats parameter added to them, so callers can specify
why writeback is being started.

Signed-off-by: Curt Wohlgemuth <curtw@google.com>
Acked-by: Jan Kara <jack@suse.cz>
---

Changelog since v1:

  - Added a "enum wb_stats" parameter to writeback_inodes_wb(),
    instead of assuming that the reason for this is always
    WB_STAT_BALANCE_DIRTY

  - Removed change around the call to writeback_inodes_wb() in
    balance_dirty_pages(), meant for patch 2/2 in this series.


 fs/btrfs/extent-tree.c           |    3 +-
 fs/buffer.c                      |    2 +-
 fs/ext4/inode.c                  |    2 +-
 fs/fs-writeback.c                |   49 ++++++++++++++++++++++++--------------
 fs/quota/quota.c                 |    2 +-
 fs/sync.c                        |    4 +-
 fs/ubifs/budget.c                |    2 +-
 include/linux/backing-dev.h      |    3 +-
 include/linux/writeback.h        |   33 +++++++++++++++++++++----
 include/trace/events/writeback.h |   37 +++++++++++++++++++++-------
 mm/backing-dev.c                 |    3 +-
 mm/page-writeback.c              |    6 +++-
 mm/vmscan.c                      |    4 +-
 13 files changed, 104 insertions(+), 46 deletions(-)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 66bac22..5d085a9 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3332,7 +3332,8 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
 		smp_mb();
 		nr_pages = min_t(unsigned long, nr_pages,
 		       root->fs_info->delalloc_bytes >> PAGE_CACHE_SHIFT);
-		writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages);
+		writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages,
+						WB_STAT_FS_FREE_SPACE);
 
 		spin_lock(&space_info->lock);
 		if (reserved > space_info->bytes_reserved)
diff --git a/fs/buffer.c b/fs/buffer.c
index 1a80b04..cfca642 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -285,7 +285,7 @@ static void free_more_memory(void)
 	struct zone *zone;
 	int nid;
 
-	wakeup_flusher_threads(1024);
+	wakeup_flusher_threads(1024, WB_STAT_FREE_MORE_MEM);
 	yield();
 
 	for_each_online_node(nid) {
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 8fdc298..62aceb7 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2200,7 +2200,7 @@ static int ext4_nonda_switch(struct super_block *sb)
 	 * start pushing delalloc when 1/2 of free blocks are dirty.
 	 */
 	if (free_blocks < 2 * dirty_blocks)
-		writeback_inodes_sb_if_idle(sb);
+		writeback_inodes_sb_if_idle(sb, WB_STAT_FS_FREE_SPACE);
 
 	return 0;
 }
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 04cf3b9..70aa19d 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -41,6 +41,7 @@ struct wb_writeback_work {
 	unsigned int for_kupdate:1;
 	unsigned int range_cyclic:1;
 	unsigned int for_background:1;
+	enum wb_stats reason;		/* why was writeback initiated? */
 
 	struct list_head list;		/* pending work list */
 	struct completion *done;	/* set if the caller waits */
@@ -115,7 +116,7 @@ static void bdi_queue_work(struct backing_dev_info *bdi,
 
 static void
 __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
-		      bool range_cyclic)
+		      bool range_cyclic, enum wb_stats stat)
 {
 	struct wb_writeback_work *work;
 
@@ -135,6 +136,7 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
 	work->sync_mode	= WB_SYNC_NONE;
 	work->nr_pages	= nr_pages;
 	work->range_cyclic = range_cyclic;
+	work->reason = stat;
 
 	bdi_queue_work(bdi, work);
 }
@@ -150,9 +152,10 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
  *   completion. Caller need not hold sb s_umount semaphore.
  *
  */
-void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages)
+void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
+			enum wb_stats stat)
 {
-	__bdi_start_writeback(bdi, nr_pages, true);
+	__bdi_start_writeback(bdi, nr_pages, true, stat);
 }
 
 /**
@@ -302,13 +305,14 @@ out:
  *                                           |
  *                                           +--> dequeue for IO
  */
-static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this)
+static void queue_io(struct bdi_writeback *wb, struct wb_writeback_work *work)
 {
 	int moved;
 	assert_spin_locked(&wb->list_lock);
 	list_splice_init(&wb->b_more_io, &wb->b_io);
-	moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this);
-	trace_writeback_queue_io(wb, older_than_this, moved);
+	moved = move_expired_inodes(&wb->b_dirty, &wb->b_io,
+					work->older_than_this);
+	trace_writeback_queue_io(wb, work, moved);
 }
 
 static int write_inode(struct inode *inode, struct writeback_control *wbc)
@@ -641,17 +645,19 @@ static long __writeback_inodes_wb(struct bdi_writeback *wb,
 	return wrote;
 }
 
-long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages)
+long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages,
+				enum wb_stats stat)
 {
 	struct wb_writeback_work work = {
 		.nr_pages	= nr_pages,
 		.sync_mode	= WB_SYNC_NONE,
 		.range_cyclic	= 1,
+		.reason		= stat,
 	};
 
 	spin_lock(&wb->list_lock);
 	if (list_empty(&wb->b_io))
-		queue_io(wb, NULL);
+		queue_io(wb, &work);
 	__writeback_inodes_wb(wb, &work);
 	spin_unlock(&wb->list_lock);
 
@@ -738,7 +744,7 @@ static long wb_writeback(struct bdi_writeback *wb,
 
 		trace_writeback_start(wb->bdi, work);
 		if (list_empty(&wb->b_io))
-			queue_io(wb, work->older_than_this);
+			queue_io(wb, work);
 		if (work->sb)
 			progress = writeback_sb_inodes(work->sb, wb, work);
 		else
@@ -818,6 +824,7 @@ static long wb_check_background_flush(struct bdi_writeback *wb)
 			.sync_mode	= WB_SYNC_NONE,
 			.for_background	= 1,
 			.range_cyclic	= 1,
+			.reason		= WB_STAT_BG_WRITEOUT,
 		};
 
 		return wb_writeback(wb, &work);
@@ -851,6 +858,7 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb)
 			.sync_mode	= WB_SYNC_NONE,
 			.for_kupdate	= 1,
 			.range_cyclic	= 1,
+			.reason		= WB_STAT_KUPDATE,
 		};
 
 		return wb_writeback(wb, &work);
@@ -969,7 +977,7 @@ int bdi_writeback_thread(void *data)
  * Start writeback of `nr_pages' pages.  If `nr_pages' is zero, write back
  * the whole world.
  */
-void wakeup_flusher_threads(long nr_pages)
+void wakeup_flusher_threads(long nr_pages, enum wb_stats stat)
 {
 	struct backing_dev_info *bdi;
 
@@ -982,7 +990,7 @@ void wakeup_flusher_threads(long nr_pages)
 	list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
 		if (!bdi_has_dirty_io(bdi))
 			continue;
-		__bdi_start_writeback(bdi, nr_pages, false);
+		__bdi_start_writeback(bdi, nr_pages, false, stat);
 	}
 	rcu_read_unlock();
 }
@@ -1203,7 +1211,9 @@ static void wait_sb_inodes(struct super_block *sb)
  * on how many (if any) will be written, and this function does not wait
  * for IO completion of submitted IO.
  */
-void writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr)
+void writeback_inodes_sb_nr(struct super_block *sb,
+			    unsigned long nr,
+			    enum wb_stats stat)
 {
 	DECLARE_COMPLETION_ONSTACK(done);
 	struct wb_writeback_work work = {
@@ -1212,6 +1222,7 @@ void writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr)
 		.tagged_writepages	= 1,
 		.done			= &done,
 		.nr_pages		= nr,
+		.reason			= stat,
 	};
 
 	WARN_ON(!rwsem_is_locked(&sb->s_umount));
@@ -1228,9 +1239,9 @@ EXPORT_SYMBOL(writeback_inodes_sb_nr);
  * on how many (if any) will be written, and this function does not wait
  * for IO completion of submitted IO.
  */
-void writeback_inodes_sb(struct super_block *sb)
+void writeback_inodes_sb(struct super_block *sb, enum wb_stats stat)
 {
-	return writeback_inodes_sb_nr(sb, get_nr_dirty_pages());
+	return writeback_inodes_sb_nr(sb, get_nr_dirty_pages(), stat);
 }
 EXPORT_SYMBOL(writeback_inodes_sb);
 
@@ -1241,11 +1252,11 @@ EXPORT_SYMBOL(writeback_inodes_sb);
  * Invoke writeback_inodes_sb if no writeback is currently underway.
  * Returns 1 if writeback was started, 0 if not.
  */
-int writeback_inodes_sb_if_idle(struct super_block *sb)
+int writeback_inodes_sb_if_idle(struct super_block *sb, enum wb_stats stat)
 {
 	if (!writeback_in_progress(sb->s_bdi)) {
 		down_read(&sb->s_umount);
-		writeback_inodes_sb(sb);
+		writeback_inodes_sb(sb, stat);
 		up_read(&sb->s_umount);
 		return 1;
 	} else
@@ -1262,11 +1273,12 @@ EXPORT_SYMBOL(writeback_inodes_sb_if_idle);
  * Returns 1 if writeback was started, 0 if not.
  */
 int writeback_inodes_sb_nr_if_idle(struct super_block *sb,
-				   unsigned long nr)
+				   unsigned long nr,
+				   enum wb_stats stat)
 {
 	if (!writeback_in_progress(sb->s_bdi)) {
 		down_read(&sb->s_umount);
-		writeback_inodes_sb_nr(sb, nr);
+		writeback_inodes_sb_nr(sb, nr, stat);
 		up_read(&sb->s_umount);
 		return 1;
 	} else
@@ -1290,6 +1302,7 @@ void sync_inodes_sb(struct super_block *sb)
 		.nr_pages	= LONG_MAX,
 		.range_cyclic	= 0,
 		.done		= &done,
+		.reason		= WB_STAT_SYNC,
 	};
 
 	WARN_ON(!rwsem_is_locked(&sb->s_umount));
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index b34bdb2..a3d158c 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -286,7 +286,7 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
 		/* caller already holds s_umount */
 		if (sb->s_flags & MS_RDONLY)
 			return -EROFS;
-		writeback_inodes_sb(sb);
+		writeback_inodes_sb(sb, WB_STAT_SYNC);
 		return 0;
 	default:
 		return -EINVAL;
diff --git a/fs/sync.c b/fs/sync.c
index c98a747..b333bb4 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -43,7 +43,7 @@ static int __sync_filesystem(struct super_block *sb, int wait)
 	if (wait)
 		sync_inodes_sb(sb);
 	else
-		writeback_inodes_sb(sb);
+		writeback_inodes_sb(sb, WB_STAT_SYNC);
 
 	if (sb->s_op->sync_fs)
 		sb->s_op->sync_fs(sb, wait);
@@ -98,7 +98,7 @@ static void sync_filesystems(int wait)
  */
 SYSCALL_DEFINE0(sync)
 {
-	wakeup_flusher_threads(0);
+	wakeup_flusher_threads(0, WB_STAT_SYNC);
 	sync_filesystems(0);
 	sync_filesystems(1);
 	if (unlikely(laptop_mode))
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index 315de66..9a392a0 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -63,7 +63,7 @@
 static void shrink_liability(struct ubifs_info *c, int nr_to_write)
 {
 	down_read(&c->vfs_sb->s_umount);
-	writeback_inodes_sb(c->vfs_sb);
+	writeback_inodes_sb(c->vfs_sb, WB_STAT_FS_FREE_SPACE);
 	up_read(&c->vfs_sb->s_umount);
 }
 
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 3b2f9cb..9da81ef 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -107,7 +107,8 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
 int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev);
 void bdi_unregister(struct backing_dev_info *bdi);
 int bdi_setup_and_register(struct backing_dev_info *, char *, unsigned int);
-void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages);
+void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
+			enum wb_stats stat);
 void bdi_start_background_writeback(struct backing_dev_info *bdi);
 int bdi_writeback_thread(void *data);
 int bdi_has_dirty_io(struct backing_dev_info *bdi);
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index f1bfa12e..a70696d 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -50,6 +50,24 @@ enum writeback_sync_modes {
 };
 
 /*
+ * why this writeback was initiated
+ */
+enum wb_stats {
+	/* The following are counts of pages written for a specific cause */
+	WB_STAT_BALANCE_DIRTY,
+	WB_STAT_BG_WRITEOUT,
+	WB_STAT_TRY_TO_FREE_PAGES,
+	WB_STAT_SYNC,
+	WB_STAT_KUPDATE,
+	WB_STAT_LAPTOP_TIMER,
+	WB_STAT_FREE_MORE_MEM,
+	WB_STAT_FS_FREE_SPACE,
+	WB_STAT_FORKER_THREAD,
+
+	WB_STAT_MAX,
+};
+
+/*
  * A control structure which tells the writeback code what to do.  These are
  * always on the stack, and hence need no locking.  They are always initialised
  * in a manner such that unspecified fields are set to zero.
@@ -80,14 +98,17 @@ struct writeback_control {
  */	
 struct bdi_writeback;
 int inode_wait(void *);
-void writeback_inodes_sb(struct super_block *);
-void writeback_inodes_sb_nr(struct super_block *, unsigned long nr);
-int writeback_inodes_sb_if_idle(struct super_block *);
-int writeback_inodes_sb_nr_if_idle(struct super_block *, unsigned long nr);
+void writeback_inodes_sb(struct super_block *, enum wb_stats stat);
+void writeback_inodes_sb_nr(struct super_block *, unsigned long nr,
+							enum wb_stats stat);
+int writeback_inodes_sb_if_idle(struct super_block *, enum wb_stats stat);
+int writeback_inodes_sb_nr_if_idle(struct super_block *, unsigned long nr,
+							enum wb_stats stat);
 void sync_inodes_sb(struct super_block *);
-long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages);
+long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages,
+				enum wb_stats stat);
 long wb_do_writeback(struct bdi_writeback *wb, int force_wait);
-void wakeup_flusher_threads(long nr_pages);
+void wakeup_flusher_threads(long nr_pages, enum wb_stats stat);
 
 /* writeback.h requires fs.h; it, too, is not included from here. */
 static inline void wait_on_inode(struct inode *inode)
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index 6bca4cc..dbb8330 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -21,6 +21,18 @@
 		{I_REFERENCED,		"I_REFERENCED"}		\
 	)
 
+#define show_work_reason(reason)					\
+	__print_symbolic(reason,					\
+		{WB_STAT_BALANCE_DIRTY,		"balance_dirty"},	\
+		{WB_STAT_BG_WRITEOUT,		"background"},		\
+		{WB_STAT_TRY_TO_FREE_PAGES,	"try_to_free_pages"},	\
+		{WB_STAT_SYNC,			"sync"},		\
+		{WB_STAT_KUPDATE,		"periodic"},		\
+		{WB_STAT_LAPTOP_TIMER,		"laptop_timer"},	\
+		{WB_STAT_FREE_MORE_MEM,		"free_more_memory"},	\
+		{WB_STAT_FS_FREE_SPACE,		"FS_free_space"}	\
+	)
+
 struct wb_writeback_work;
 
 DECLARE_EVENT_CLASS(writeback_work_class,
@@ -34,6 +46,7 @@ DECLARE_EVENT_CLASS(writeback_work_class,
 		__field(int, for_kupdate)
 		__field(int, range_cyclic)
 		__field(int, for_background)
+		__field(int, reason)
 	),
 	TP_fast_assign(
 		strncpy(__entry->name, dev_name(bdi->dev), 32);
@@ -43,16 +56,18 @@ DECLARE_EVENT_CLASS(writeback_work_class,
 		__entry->for_kupdate = work->for_kupdate;
 		__entry->range_cyclic = work->range_cyclic;
 		__entry->for_background	= work->for_background;
+		__entry->reason = work->reason;
 	),
 	TP_printk("bdi %s: sb_dev %d:%d nr_pages=%ld sync_mode=%d "
-		  "kupdate=%d range_cyclic=%d background=%d",
+		  "kupdate=%d range_cyclic=%d background=%d reason=%s",
 		  __entry->name,
 		  MAJOR(__entry->sb_dev), MINOR(__entry->sb_dev),
 		  __entry->nr_pages,
 		  __entry->sync_mode,
 		  __entry->for_kupdate,
 		  __entry->range_cyclic,
-		  __entry->for_background
+		  __entry->for_background,
+		  show_work_reason(__entry->reason)
 	)
 );
 #define DEFINE_WRITEBACK_WORK_EVENT(name) \
@@ -181,27 +196,31 @@ DEFINE_WBC_EVENT(wbc_writepage);
 
 TRACE_EVENT(writeback_queue_io,
 	TP_PROTO(struct bdi_writeback *wb,
-		 unsigned long *older_than_this,
+		 struct wb_writeback_work *work,
 		 int moved),
-	TP_ARGS(wb, older_than_this, moved),
+	TP_ARGS(wb, work, moved),
 	TP_STRUCT__entry(
 		__array(char,		name, 32)
 		__field(unsigned long,	older)
 		__field(long,		age)
 		__field(int,		moved)
+		__field(int,		reason)
 	),
 	TP_fast_assign(
 		strncpy(__entry->name, dev_name(wb->bdi->dev), 32);
-		__entry->older	= older_than_this ?  *older_than_this : 0;
-		__entry->age	= older_than_this ?
-				  (jiffies - *older_than_this) * 1000 / HZ : -1;
+		__entry->older	= work->older_than_this ?
+						*work->older_than_this : 0;
+		__entry->age	= work->older_than_this ?
+			  (jiffies - *work->older_than_this) * 1000 / HZ : -1;
 		__entry->moved	= moved;
+		__entry->reason	= work->reason;
 	),
-	TP_printk("bdi %s: older=%lu age=%ld enqueue=%d",
+	TP_printk("bdi %s: older=%lu age=%ld enqueue=%d reason=%s",
 		__entry->name,
 		__entry->older,	/* older_than_this in jiffies */
 		__entry->age,	/* older_than_this in relative milliseconds */
-		__entry->moved)
+		__entry->moved,
+		show_work_reason(__entry->reason))
 );
 
 TRACE_EVENT(global_dirty_state,
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index d6edf8d..63b3b29 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -456,7 +456,8 @@ static int bdi_forker_thread(void *ptr)
 				 * the bdi from the thread. Hopefully 1024 is
 				 * large enough for efficient IO.
 				 */
-				writeback_inodes_wb(&bdi->wb, 1024);
+				writeback_inodes_wb(&bdi->wb, 1024,
+							WB_STAT_FORKER_THREAD);
 			} else {
 				/*
 				 * The spinlock makes sure we do not lose
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index d196074..5503461 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -738,7 +738,8 @@ static void balance_dirty_pages(struct address_space *mapping,
 		trace_balance_dirty_start(bdi);
 		if (bdi_nr_reclaimable > task_bdi_thresh) {
 			pages_written += writeback_inodes_wb(&bdi->wb,
-							     write_chunk);
+						write_chunk,
+						WB_STAT_BALANCE_DIRTY);
 			trace_balance_dirty_written(bdi, pages_written);
 			if (pages_written >= write_chunk)
 				break;		/* We've done our duty */
@@ -909,7 +910,8 @@ void laptop_mode_timer_fn(unsigned long data)
 	 * threshold
 	 */
 	if (bdi_has_dirty_io(&q->backing_dev_info))
-		bdi_start_writeback(&q->backing_dev_info, nr_pages);
+		bdi_start_writeback(&q->backing_dev_info, nr_pages,
+					WB_STAT_LAPTOP_TIMER);
 }
 
 /*
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 7ef6912..4a882ca 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -495,7 +495,6 @@ static pageout_t pageout(struct page *page, struct address_space *mapping,
 			ClearPageReclaim(page);
 			return PAGE_ACTIVATE;
 		}
-
 		/*
 		 * Wait on writeback if requested to. This happens when
 		 * direct reclaiming a large contiguous area and the
@@ -2198,7 +2197,8 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
 		 */
 		writeback_threshold = sc->nr_to_reclaim + sc->nr_to_reclaim / 2;
 		if (total_scanned > writeback_threshold) {
-			wakeup_flusher_threads(laptop_mode ? 0 : total_scanned);
+			wakeup_flusher_threads(laptop_mode ? 0 : total_scanned,
+						WB_STAT_TRY_TO_FREE_PAGES);
 			sc->may_writepage = 1;
 		}
 
-- 
1.7.3.1

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 2/2 v2] writeback: Add writeback stats for pages written
  2011-08-12 22:47 ` Curt Wohlgemuth
@ 2011-08-12 22:47   ` Curt Wohlgemuth
  -1 siblings, 0 replies; 27+ messages in thread
From: Curt Wohlgemuth @ 2011-08-12 22:47 UTC (permalink / raw)
  To: Christoph Hellwig, Wu Fengguang, Jan Kara, Andrew Morton, Dave Chinner
  Cc: linux-fsdevel, linux-mm, Curt Wohlgemuth

Add a new file, /proc/writeback/stats, which displays
machine global data for how many pages were cleaned for
which reasons.  It also displays some additional counts for
various writeback events.

These data are also available for each BDI, in
/sys/block/<device>/bdi/writeback_stats .

Sample output:

   page: balance_dirty_pages           2561544
   page: background_writeout              5153
   page: try_to_free_pages                   0
   page: sync                                0
   page: kupdate                        102723
   page: fdatawrite                    1228779
   page: laptop_periodic                     0
   page: free_more_memory                    0
   page: fs_free_space                       0
   periodic writeback                      377
   single inode wait                         0
   writeback_wb wait                         1

Signed-off-by: Curt Wohlgemuth <curtw@google.com>
---
 fs/fs-writeback.c           |   16 ++++-
 fs/proc/root.c              |    2 +
 include/linux/backing-dev.h |    6 ++
 include/linux/writeback.h   |   24 +++++++
 mm/backing-dev.c            |  152 +++++++++++++++++++++++++++++++++++++++++++
 mm/filemap.c                |    4 +
 mm/page-writeback.c         |    7 ++-
 7 files changed, 208 insertions(+), 3 deletions(-)

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 70aa19d..adc3b73 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -388,6 +388,8 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
 		/*
 		 * It's a data-integrity sync.  We must wait.
 		 */
+		bdi_writeback_stat_inc(inode->i_mapping->backing_dev_info,
+					WB_STAT_SINGLE_INODE_WAIT);
 		inode_wait_for_writeback(inode, wb);
 	}
 
@@ -543,6 +545,7 @@ static long writeback_sb_inodes(struct super_block *sb,
 
 	while (!list_empty(&wb->b_io)) {
 		struct inode *inode = wb_inode(wb->b_io.prev);
+		long wrote_this_inode;
 
 		if (inode->i_sb != sb) {
 			if (work->sb) {
@@ -581,8 +584,10 @@ static long writeback_sb_inodes(struct super_block *sb,
 
 		writeback_single_inode(inode, wb, &wbc);
 
-		work->nr_pages -= write_chunk - wbc.nr_to_write;
-		wrote += write_chunk - wbc.nr_to_write;
+		wrote_this_inode = write_chunk - wbc.nr_to_write;
+
+		work->nr_pages -= wrote_this_inode;
+		wrote += wrote_this_inode;
 		if (!(inode->i_state & I_DIRTY))
 			wrote++;
 		if (wbc.pages_skipped) {
@@ -592,6 +597,9 @@ static long writeback_sb_inodes(struct super_block *sb,
 			 */
 			redirty_tail(inode, wb);
 		}
+		bdi_writeback_stat_add(wb->bdi,
+					work->reason,
+					wrote_this_inode);
 		spin_unlock(&inode->i_lock);
 		spin_unlock(&wb->list_lock);
 		iput(inode);
@@ -777,6 +785,9 @@ static long wb_writeback(struct bdi_writeback *wb,
 			trace_writeback_wait(wb->bdi, work);
 			inode = wb_inode(wb->b_more_io.prev);
 			spin_lock(&inode->i_lock);
+			bdi_writeback_stat_inc(
+					inode->i_mapping->backing_dev_info,
+					WB_STAT_WRITEBACK_WB_WAIT);
 			inode_wait_for_writeback(inode, wb);
 			spin_unlock(&inode->i_lock);
 		}
@@ -937,6 +948,7 @@ int bdi_writeback_thread(void *data)
 		 */
 		del_timer(&wb->wakeup_timer);
 
+		bdi_writeback_stat_inc(bdi, WB_STAT_PERIODIC);
 		pages_written = wb_do_writeback(wb, 0);
 
 		trace_writeback_pages_written(pages_written);
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 9a8a2b7..c0e2412 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -18,6 +18,7 @@
 #include <linux/bitops.h>
 #include <linux/mount.h>
 #include <linux/pid_namespace.h>
+#include <linux/backing-dev.h>
 
 #include "internal.h"
 
@@ -125,6 +126,7 @@ void __init proc_root_init(void)
 #endif
 	proc_mkdir("bus", NULL);
 	proc_sys_init();
+	proc_writeback_init();
 }
 
 static int proc_root_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 9da81ef..b97dd92 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -72,6 +72,7 @@ struct backing_dev_info {
 	char *name;
 
 	struct percpu_counter bdi_stat[NR_BDI_STAT_ITEMS];
+	struct writeback_stats *wb_stat;
 
 	unsigned long bw_time_stamp;	/* last time write bw is updated */
 	unsigned long written_stamp;	/* pages written at bw_time_stamp */
@@ -190,6 +191,11 @@ static inline s64 bdi_stat_sum(struct backing_dev_info *bdi,
 	return sum;
 }
 
+void bdi_writeback_stat_inc(struct backing_dev_info *bdi, enum wb_stats stat);
+void bdi_writeback_stat_add(struct backing_dev_info *bdi, enum wb_stats stat,
+			    unsigned long value);
+void proc_writeback_init(void);
+
 extern void bdi_writeout_inc(struct backing_dev_info *bdi);
 
 /*
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index a70696d..d4e0113 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -59,14 +59,38 @@ enum wb_stats {
 	WB_STAT_TRY_TO_FREE_PAGES,
 	WB_STAT_SYNC,
 	WB_STAT_KUPDATE,
+	WB_STAT_FDATAWRITE,
 	WB_STAT_LAPTOP_TIMER,
 	WB_STAT_FREE_MORE_MEM,
 	WB_STAT_FS_FREE_SPACE,
 	WB_STAT_FORKER_THREAD,
 
+	/* These are event counts */
+	WB_STAT_PERIODIC,
+	WB_STAT_SINGLE_INODE_WAIT,
+	WB_STAT_WRITEBACK_WB_WAIT,
+
 	WB_STAT_MAX,
 };
 
+struct writeback_stats {
+	u64 stats[WB_STAT_MAX];
+};
+
+extern struct writeback_stats *writeback_sys_stats;
+
+static inline struct writeback_stats *writeback_stats_alloc(void)
+{
+	return alloc_percpu(struct writeback_stats);
+}
+
+static inline void writeback_stats_free(struct writeback_stats *stats)
+{
+	free_percpu(stats);
+}
+
+size_t writeback_stats_print(struct writeback_stats *, char *buf, size_t);
+
 /*
  * A control structure which tells the writeback code what to do.  These are
  * always on the stack, and hence need no locking.  They are always initialised
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 63b3b29..b37dc66 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -10,6 +10,7 @@
 #include <linux/module.h>
 #include <linux/writeback.h>
 #include <linux/device.h>
+#include <linux/proc_fs.h>
 #include <trace/events/writeback.h>
 
 static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
@@ -157,6 +158,13 @@ static inline void bdi_debug_unregister(struct backing_dev_info *bdi)
 }
 #endif
 
+static ssize_t writeback_stats_show(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	struct backing_dev_info *bdi = dev_get_drvdata(dev);
+	return writeback_stats_print(bdi->wb_stat, buf, PAGE_SIZE);
+}
+
 static ssize_t read_ahead_kb_store(struct device *dev,
 				  struct device_attribute *attr,
 				  const char *buf, size_t count)
@@ -229,6 +237,7 @@ static struct device_attribute bdi_dev_attrs[] = {
 	__ATTR_RW(read_ahead_kb),
 	__ATTR_RW(min_ratio),
 	__ATTR_RW(max_ratio),
+	__ATTR_RO(writeback_stats),
 	__ATTR_NULL,
 };
 
@@ -678,8 +687,13 @@ int bdi_init(struct backing_dev_info *bdi)
 
 	err = prop_local_init_percpu(&bdi->completions);
 
+	bdi->wb_stat = writeback_stats_alloc();
+	if (bdi->wb_stat == NULL)
+		err = -ENOMEM;
+
 	if (err) {
 err:
+		writeback_stats_free(bdi->wb_stat);
 		while (i--)
 			percpu_counter_destroy(&bdi->bdi_stat[i]);
 	}
@@ -712,6 +726,8 @@ void bdi_destroy(struct backing_dev_info *bdi)
 	for (i = 0; i < NR_BDI_STAT_ITEMS; i++)
 		percpu_counter_destroy(&bdi->bdi_stat[i]);
 
+	writeback_stats_free(bdi->wb_stat);
+
 	prop_local_destroy_percpu(&bdi->completions);
 }
 EXPORT_SYMBOL(bdi_destroy);
@@ -854,3 +870,139 @@ out:
 	return ret;
 }
 EXPORT_SYMBOL(wait_iff_congested);
+
+void bdi_writeback_stat_add(struct backing_dev_info *bdi, enum wb_stats stat,
+			    unsigned long value)
+{
+	if (bdi) {
+		struct writeback_stats *stats = bdi->wb_stat;
+
+		BUG_ON(stat >= WB_STAT_MAX);
+		preempt_disable();
+		stats = per_cpu_ptr(stats, smp_processor_id());
+		stats->stats[stat] += value;
+		if (likely(writeback_sys_stats)) {
+			stats = per_cpu_ptr(writeback_sys_stats,
+					    smp_processor_id());
+			stats->stats[stat] += value;
+		}
+		preempt_enable();
+	}
+}
+
+void bdi_writeback_stat_inc(struct backing_dev_info *bdi, enum wb_stats stat)
+{
+	bdi_writeback_stat_add(bdi, stat, 1);
+}
+
+struct writeback_stats *writeback_sys_stats;
+
+enum writeback_op {
+	WB_STATS_OP,
+};
+
+static const char *wb_stats_labels[WB_STAT_MAX] = {
+	[WB_STAT_BALANCE_DIRTY] = "page: balance_dirty_pages",
+	[WB_STAT_BG_WRITEOUT] = "page: background_writeout",
+	[WB_STAT_TRY_TO_FREE_PAGES] = "page: try_to_free_pages",
+	[WB_STAT_SYNC] = "page: sync",
+	[WB_STAT_KUPDATE] = "page: kupdate",
+	[WB_STAT_FDATAWRITE] = "page: fdatawrite",
+	[WB_STAT_LAPTOP_TIMER] = "page: laptop_periodic",
+	[WB_STAT_FREE_MORE_MEM] = "page: free_more_memory",
+	[WB_STAT_FS_FREE_SPACE] = "page: fs_free_space",
+
+	[WB_STAT_PERIODIC] = "periodic writeback",
+	[WB_STAT_SINGLE_INODE_WAIT] = "single inode wait",
+	[WB_STAT_WRITEBACK_WB_WAIT] = "writeback_wb wait",
+};
+
+static void writeback_stats_collect(struct writeback_stats *src,
+			struct writeback_stats *target)
+{
+	int cpu;
+	for_each_online_cpu(cpu) {
+		int stat;
+		struct writeback_stats *stats = per_cpu_ptr(src, cpu);
+		for (stat = 0; stat < WB_STAT_MAX; stat++)
+			target->stats[stat] += stats->stats[stat];
+	}
+}
+
+static size_t writeback_stats_to_str(struct writeback_stats *stats,
+				    char *buf, size_t len)
+{
+	int bufsize = len - 1;
+	int i, printed = 0;
+	for (i = 0; i < WB_STAT_MAX; i++) {
+		const char *label = wb_stats_labels[i];
+		if (label == NULL)
+			continue;
+		printed += snprintf(buf + printed, bufsize - printed,
+				"%-32s %10llu\n", label, stats->stats[i]);
+		if (printed >= bufsize) {
+			buf[len - 1] = '\n';
+			return len;
+		}
+	}
+
+	buf[printed - 1] = '\n';
+	return printed;
+}
+
+size_t writeback_stats_print(struct writeback_stats *stats,
+			     char *buf, size_t len)
+{
+	struct writeback_stats total;
+	memset(&total, 0, sizeof(total));
+	writeback_stats_collect(stats, &total);
+	return writeback_stats_to_str(&total, buf, len);
+}
+
+
+static int writeback_seq_show(struct seq_file *m, void *data)
+{
+	char *buf;
+	size_t size;
+	switch ((enum writeback_op)m->private) {
+	case WB_STATS_OP:
+		size = seq_get_buf(m, &buf);
+		if (size == 0)
+			return 0;
+		size = writeback_stats_print(writeback_sys_stats, buf, size);
+		seq_commit(m, size);
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static int writeback_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, writeback_seq_show, PDE(inode)->data);
+}
+
+static const struct file_operations writeback_ops = {
+	.open           = writeback_open,
+	.read           = seq_read,
+	.llseek         = seq_lseek,
+	.release        = single_release,
+};
+
+
+void __init proc_writeback_init(void)
+{
+	struct proc_dir_entry *base_dir;
+	base_dir = proc_mkdir("writeback", NULL);
+	if (base_dir == NULL) {
+		printk(KERN_ERR "Creating /proc/writeback/ failed");
+		return;
+	}
+
+	writeback_sys_stats = alloc_percpu(struct writeback_stats);
+
+	proc_create_data("stats", S_IRUGO|S_IWUSR, base_dir,
+			&writeback_ops, (void *)WB_STATS_OP);
+}
diff --git a/mm/filemap.c b/mm/filemap.c
index 645a080..30cdf92 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -216,6 +216,10 @@ int __filemap_fdatawrite_range(struct address_space *mapping, loff_t start,
 		return 0;
 
 	ret = do_writepages(mapping, &wbc);
+
+	bdi_writeback_stat_add(mapping->backing_dev_info,
+				WB_STAT_FDATAWRITE,
+				LONG_MAX - wbc.nr_to_write);
 	return ret;
 }
 
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 5503461..b209f4d 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -737,9 +737,14 @@ static void balance_dirty_pages(struct address_space *mapping,
 		 */
 		trace_balance_dirty_start(bdi);
 		if (bdi_nr_reclaimable > task_bdi_thresh) {
-			pages_written += writeback_inodes_wb(&bdi->wb,
+			long wrote;
+			wrote = writeback_inodes_wb(&bdi->wb,
 						write_chunk,
 						WB_STAT_BALANCE_DIRTY);
+			pages_written += wrote;
+			bdi_writeback_stat_add(bdi,
+						WB_STAT_BALANCE_DIRTY,
+						wrote);
 			trace_balance_dirty_written(bdi, pages_written);
 			if (pages_written >= write_chunk)
 				break;		/* We've done our duty */
-- 
1.7.3.1

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 2/2 v2] writeback: Add writeback stats for pages written
@ 2011-08-12 22:47   ` Curt Wohlgemuth
  0 siblings, 0 replies; 27+ messages in thread
From: Curt Wohlgemuth @ 2011-08-12 22:47 UTC (permalink / raw)
  To: Christoph Hellwig, Wu Fengguang, Jan Kara, Andrew Morton,
	Dave Chinner, Michael Rubin
  Cc: linux-fsdevel, linux-mm, Curt Wohlgemuth

Add a new file, /proc/writeback/stats, which displays
machine global data for how many pages were cleaned for
which reasons.  It also displays some additional counts for
various writeback events.

These data are also available for each BDI, in
/sys/block/<device>/bdi/writeback_stats .

Sample output:

   page: balance_dirty_pages           2561544
   page: background_writeout              5153
   page: try_to_free_pages                   0
   page: sync                                0
   page: kupdate                        102723
   page: fdatawrite                    1228779
   page: laptop_periodic                     0
   page: free_more_memory                    0
   page: fs_free_space                       0
   periodic writeback                      377
   single inode wait                         0
   writeback_wb wait                         1

Signed-off-by: Curt Wohlgemuth <curtw@google.com>
---
 fs/fs-writeback.c           |   16 ++++-
 fs/proc/root.c              |    2 +
 include/linux/backing-dev.h |    6 ++
 include/linux/writeback.h   |   24 +++++++
 mm/backing-dev.c            |  152 +++++++++++++++++++++++++++++++++++++++++++
 mm/filemap.c                |    4 +
 mm/page-writeback.c         |    7 ++-
 7 files changed, 208 insertions(+), 3 deletions(-)

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 70aa19d..adc3b73 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -388,6 +388,8 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
 		/*
 		 * It's a data-integrity sync.  We must wait.
 		 */
+		bdi_writeback_stat_inc(inode->i_mapping->backing_dev_info,
+					WB_STAT_SINGLE_INODE_WAIT);
 		inode_wait_for_writeback(inode, wb);
 	}
 
@@ -543,6 +545,7 @@ static long writeback_sb_inodes(struct super_block *sb,
 
 	while (!list_empty(&wb->b_io)) {
 		struct inode *inode = wb_inode(wb->b_io.prev);
+		long wrote_this_inode;
 
 		if (inode->i_sb != sb) {
 			if (work->sb) {
@@ -581,8 +584,10 @@ static long writeback_sb_inodes(struct super_block *sb,
 
 		writeback_single_inode(inode, wb, &wbc);
 
-		work->nr_pages -= write_chunk - wbc.nr_to_write;
-		wrote += write_chunk - wbc.nr_to_write;
+		wrote_this_inode = write_chunk - wbc.nr_to_write;
+
+		work->nr_pages -= wrote_this_inode;
+		wrote += wrote_this_inode;
 		if (!(inode->i_state & I_DIRTY))
 			wrote++;
 		if (wbc.pages_skipped) {
@@ -592,6 +597,9 @@ static long writeback_sb_inodes(struct super_block *sb,
 			 */
 			redirty_tail(inode, wb);
 		}
+		bdi_writeback_stat_add(wb->bdi,
+					work->reason,
+					wrote_this_inode);
 		spin_unlock(&inode->i_lock);
 		spin_unlock(&wb->list_lock);
 		iput(inode);
@@ -777,6 +785,9 @@ static long wb_writeback(struct bdi_writeback *wb,
 			trace_writeback_wait(wb->bdi, work);
 			inode = wb_inode(wb->b_more_io.prev);
 			spin_lock(&inode->i_lock);
+			bdi_writeback_stat_inc(
+					inode->i_mapping->backing_dev_info,
+					WB_STAT_WRITEBACK_WB_WAIT);
 			inode_wait_for_writeback(inode, wb);
 			spin_unlock(&inode->i_lock);
 		}
@@ -937,6 +948,7 @@ int bdi_writeback_thread(void *data)
 		 */
 		del_timer(&wb->wakeup_timer);
 
+		bdi_writeback_stat_inc(bdi, WB_STAT_PERIODIC);
 		pages_written = wb_do_writeback(wb, 0);
 
 		trace_writeback_pages_written(pages_written);
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 9a8a2b7..c0e2412 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -18,6 +18,7 @@
 #include <linux/bitops.h>
 #include <linux/mount.h>
 #include <linux/pid_namespace.h>
+#include <linux/backing-dev.h>
 
 #include "internal.h"
 
@@ -125,6 +126,7 @@ void __init proc_root_init(void)
 #endif
 	proc_mkdir("bus", NULL);
 	proc_sys_init();
+	proc_writeback_init();
 }
 
 static int proc_root_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 9da81ef..b97dd92 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -72,6 +72,7 @@ struct backing_dev_info {
 	char *name;
 
 	struct percpu_counter bdi_stat[NR_BDI_STAT_ITEMS];
+	struct writeback_stats *wb_stat;
 
 	unsigned long bw_time_stamp;	/* last time write bw is updated */
 	unsigned long written_stamp;	/* pages written at bw_time_stamp */
@@ -190,6 +191,11 @@ static inline s64 bdi_stat_sum(struct backing_dev_info *bdi,
 	return sum;
 }
 
+void bdi_writeback_stat_inc(struct backing_dev_info *bdi, enum wb_stats stat);
+void bdi_writeback_stat_add(struct backing_dev_info *bdi, enum wb_stats stat,
+			    unsigned long value);
+void proc_writeback_init(void);
+
 extern void bdi_writeout_inc(struct backing_dev_info *bdi);
 
 /*
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index a70696d..d4e0113 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -59,14 +59,38 @@ enum wb_stats {
 	WB_STAT_TRY_TO_FREE_PAGES,
 	WB_STAT_SYNC,
 	WB_STAT_KUPDATE,
+	WB_STAT_FDATAWRITE,
 	WB_STAT_LAPTOP_TIMER,
 	WB_STAT_FREE_MORE_MEM,
 	WB_STAT_FS_FREE_SPACE,
 	WB_STAT_FORKER_THREAD,
 
+	/* These are event counts */
+	WB_STAT_PERIODIC,
+	WB_STAT_SINGLE_INODE_WAIT,
+	WB_STAT_WRITEBACK_WB_WAIT,
+
 	WB_STAT_MAX,
 };
 
+struct writeback_stats {
+	u64 stats[WB_STAT_MAX];
+};
+
+extern struct writeback_stats *writeback_sys_stats;
+
+static inline struct writeback_stats *writeback_stats_alloc(void)
+{
+	return alloc_percpu(struct writeback_stats);
+}
+
+static inline void writeback_stats_free(struct writeback_stats *stats)
+{
+	free_percpu(stats);
+}
+
+size_t writeback_stats_print(struct writeback_stats *, char *buf, size_t);
+
 /*
  * A control structure which tells the writeback code what to do.  These are
  * always on the stack, and hence need no locking.  They are always initialised
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 63b3b29..b37dc66 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -10,6 +10,7 @@
 #include <linux/module.h>
 #include <linux/writeback.h>
 #include <linux/device.h>
+#include <linux/proc_fs.h>
 #include <trace/events/writeback.h>
 
 static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
@@ -157,6 +158,13 @@ static inline void bdi_debug_unregister(struct backing_dev_info *bdi)
 }
 #endif
 
+static ssize_t writeback_stats_show(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	struct backing_dev_info *bdi = dev_get_drvdata(dev);
+	return writeback_stats_print(bdi->wb_stat, buf, PAGE_SIZE);
+}
+
 static ssize_t read_ahead_kb_store(struct device *dev,
 				  struct device_attribute *attr,
 				  const char *buf, size_t count)
@@ -229,6 +237,7 @@ static struct device_attribute bdi_dev_attrs[] = {
 	__ATTR_RW(read_ahead_kb),
 	__ATTR_RW(min_ratio),
 	__ATTR_RW(max_ratio),
+	__ATTR_RO(writeback_stats),
 	__ATTR_NULL,
 };
 
@@ -678,8 +687,13 @@ int bdi_init(struct backing_dev_info *bdi)
 
 	err = prop_local_init_percpu(&bdi->completions);
 
+	bdi->wb_stat = writeback_stats_alloc();
+	if (bdi->wb_stat == NULL)
+		err = -ENOMEM;
+
 	if (err) {
 err:
+		writeback_stats_free(bdi->wb_stat);
 		while (i--)
 			percpu_counter_destroy(&bdi->bdi_stat[i]);
 	}
@@ -712,6 +726,8 @@ void bdi_destroy(struct backing_dev_info *bdi)
 	for (i = 0; i < NR_BDI_STAT_ITEMS; i++)
 		percpu_counter_destroy(&bdi->bdi_stat[i]);
 
+	writeback_stats_free(bdi->wb_stat);
+
 	prop_local_destroy_percpu(&bdi->completions);
 }
 EXPORT_SYMBOL(bdi_destroy);
@@ -854,3 +870,139 @@ out:
 	return ret;
 }
 EXPORT_SYMBOL(wait_iff_congested);
+
+void bdi_writeback_stat_add(struct backing_dev_info *bdi, enum wb_stats stat,
+			    unsigned long value)
+{
+	if (bdi) {
+		struct writeback_stats *stats = bdi->wb_stat;
+
+		BUG_ON(stat >= WB_STAT_MAX);
+		preempt_disable();
+		stats = per_cpu_ptr(stats, smp_processor_id());
+		stats->stats[stat] += value;
+		if (likely(writeback_sys_stats)) {
+			stats = per_cpu_ptr(writeback_sys_stats,
+					    smp_processor_id());
+			stats->stats[stat] += value;
+		}
+		preempt_enable();
+	}
+}
+
+void bdi_writeback_stat_inc(struct backing_dev_info *bdi, enum wb_stats stat)
+{
+	bdi_writeback_stat_add(bdi, stat, 1);
+}
+
+struct writeback_stats *writeback_sys_stats;
+
+enum writeback_op {
+	WB_STATS_OP,
+};
+
+static const char *wb_stats_labels[WB_STAT_MAX] = {
+	[WB_STAT_BALANCE_DIRTY] = "page: balance_dirty_pages",
+	[WB_STAT_BG_WRITEOUT] = "page: background_writeout",
+	[WB_STAT_TRY_TO_FREE_PAGES] = "page: try_to_free_pages",
+	[WB_STAT_SYNC] = "page: sync",
+	[WB_STAT_KUPDATE] = "page: kupdate",
+	[WB_STAT_FDATAWRITE] = "page: fdatawrite",
+	[WB_STAT_LAPTOP_TIMER] = "page: laptop_periodic",
+	[WB_STAT_FREE_MORE_MEM] = "page: free_more_memory",
+	[WB_STAT_FS_FREE_SPACE] = "page: fs_free_space",
+
+	[WB_STAT_PERIODIC] = "periodic writeback",
+	[WB_STAT_SINGLE_INODE_WAIT] = "single inode wait",
+	[WB_STAT_WRITEBACK_WB_WAIT] = "writeback_wb wait",
+};
+
+static void writeback_stats_collect(struct writeback_stats *src,
+			struct writeback_stats *target)
+{
+	int cpu;
+	for_each_online_cpu(cpu) {
+		int stat;
+		struct writeback_stats *stats = per_cpu_ptr(src, cpu);
+		for (stat = 0; stat < WB_STAT_MAX; stat++)
+			target->stats[stat] += stats->stats[stat];
+	}
+}
+
+static size_t writeback_stats_to_str(struct writeback_stats *stats,
+				    char *buf, size_t len)
+{
+	int bufsize = len - 1;
+	int i, printed = 0;
+	for (i = 0; i < WB_STAT_MAX; i++) {
+		const char *label = wb_stats_labels[i];
+		if (label == NULL)
+			continue;
+		printed += snprintf(buf + printed, bufsize - printed,
+				"%-32s %10llu\n", label, stats->stats[i]);
+		if (printed >= bufsize) {
+			buf[len - 1] = '\n';
+			return len;
+		}
+	}
+
+	buf[printed - 1] = '\n';
+	return printed;
+}
+
+size_t writeback_stats_print(struct writeback_stats *stats,
+			     char *buf, size_t len)
+{
+	struct writeback_stats total;
+	memset(&total, 0, sizeof(total));
+	writeback_stats_collect(stats, &total);
+	return writeback_stats_to_str(&total, buf, len);
+}
+
+
+static int writeback_seq_show(struct seq_file *m, void *data)
+{
+	char *buf;
+	size_t size;
+	switch ((enum writeback_op)m->private) {
+	case WB_STATS_OP:
+		size = seq_get_buf(m, &buf);
+		if (size == 0)
+			return 0;
+		size = writeback_stats_print(writeback_sys_stats, buf, size);
+		seq_commit(m, size);
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static int writeback_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, writeback_seq_show, PDE(inode)->data);
+}
+
+static const struct file_operations writeback_ops = {
+	.open           = writeback_open,
+	.read           = seq_read,
+	.llseek         = seq_lseek,
+	.release        = single_release,
+};
+
+
+void __init proc_writeback_init(void)
+{
+	struct proc_dir_entry *base_dir;
+	base_dir = proc_mkdir("writeback", NULL);
+	if (base_dir == NULL) {
+		printk(KERN_ERR "Creating /proc/writeback/ failed");
+		return;
+	}
+
+	writeback_sys_stats = alloc_percpu(struct writeback_stats);
+
+	proc_create_data("stats", S_IRUGO|S_IWUSR, base_dir,
+			&writeback_ops, (void *)WB_STATS_OP);
+}
diff --git a/mm/filemap.c b/mm/filemap.c
index 645a080..30cdf92 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -216,6 +216,10 @@ int __filemap_fdatawrite_range(struct address_space *mapping, loff_t start,
 		return 0;
 
 	ret = do_writepages(mapping, &wbc);
+
+	bdi_writeback_stat_add(mapping->backing_dev_info,
+				WB_STAT_FDATAWRITE,
+				LONG_MAX - wbc.nr_to_write);
 	return ret;
 }
 
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 5503461..b209f4d 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -737,9 +737,14 @@ static void balance_dirty_pages(struct address_space *mapping,
 		 */
 		trace_balance_dirty_start(bdi);
 		if (bdi_nr_reclaimable > task_bdi_thresh) {
-			pages_written += writeback_inodes_wb(&bdi->wb,
+			long wrote;
+			wrote = writeback_inodes_wb(&bdi->wb,
 						write_chunk,
 						WB_STAT_BALANCE_DIRTY);
+			pages_written += wrote;
+			bdi_writeback_stat_add(bdi,
+						WB_STAT_BALANCE_DIRTY,
+						wrote);
 			trace_balance_dirty_written(bdi, pages_written);
 			if (pages_written >= write_chunk)
 				break;		/* We've done our duty */
-- 
1.7.3.1

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* Re: [PATCH 1/2 v2] writeback: Add a 'reason' to wb_writeback_work
  2011-08-12 22:47 ` Curt Wohlgemuth
@ 2011-08-15 13:19   ` Wu Fengguang
  -1 siblings, 0 replies; 27+ messages in thread
From: Wu Fengguang @ 2011-08-15 13:19 UTC (permalink / raw)
  To: Curt Wohlgemuth
  Cc: Christoph Hellwig, Jan Kara, Andrew Morton, Dave Chinner,
	Michael Rubin, linux-fsdevel, linux-mm

Hi Curt,

This is a very useful patch, thanks!  Nitpicks followed :)

> +       enum wb_stats reason;           /* why was writeback initiated? */

Not about this patch, but some time later, some one may well find the
->for_background, ->for_kupdate fields duplicated with ->reason, and
try to eliminate the struct fields as well as the trace point fields :)

>  /*
> + * why this writeback was initiated
> + */
> +enum wb_stats {
> +       /* The following are counts of pages written for a specific cause */
> +       WB_STAT_BALANCE_DIRTY,
> +       WB_STAT_BG_WRITEOUT,
> +       WB_STAT_TRY_TO_FREE_PAGES,
> +       WB_STAT_SYNC,
> +       WB_STAT_KUPDATE,
> +       WB_STAT_LAPTOP_TIMER,
> +       WB_STAT_FREE_MORE_MEM,
> +       WB_STAT_FS_FREE_SPACE,
> +       WB_STAT_FORKER_THREAD,
> +
> +       WB_STAT_MAX,
> +};

I find it more comfortable to use "reason", "enum wb_reason" and
WB_REASON_* uniformly. Yeah, I read in the next patch that you'll add
other stat fields, however they are different in concept and looks
better be put to another enum. With some index shift, it should yield
the same efficient code, with better code readability.

> +#define show_work_reason(reason)                                       \
> +       __print_symbolic(reason,                                        \
> +               {WB_STAT_BALANCE_DIRTY,         "balance_dirty"},       \
> +               {WB_STAT_BG_WRITEOUT,           "background"},          \
> +               {WB_STAT_TRY_TO_FREE_PAGES,     "try_to_free_pages"},   \
> +               {WB_STAT_SYNC,                  "sync"},                \
> +               {WB_STAT_KUPDATE,               "periodic"},            \
> +               {WB_STAT_LAPTOP_TIMER,          "laptop_timer"},        \
> +               {WB_STAT_FREE_MORE_MEM,         "free_more_memory"},    \
> +               {WB_STAT_FS_FREE_SPACE,         "FS_free_space"}        \
> +       )

Some symbolic names disagree with the names used in the next patch..

> -                 "kupdate=%d range_cyclic=%d background=%d",
> +                 "kupdate=%d range_cyclic=%d background=%d reason=%s",

Here is the obvious duplicates. I'm not sure if there are serious
scripts relying on the kupdate/background fields (none from me), and
if we are going to carry this redundancy in future..

>  TRACE_EVENT(writeback_queue_io,
>         TP_PROTO(struct bdi_writeback *wb,
> -                unsigned long *older_than_this,
> +                struct wb_writeback_work *work,
>                  int moved),
> -       TP_ARGS(wb, older_than_this, moved),
> +       TP_ARGS(wb, work, moved),
>         TP_STRUCT__entry(
>                 __array(char,           name, 32)
>                 __field(unsigned long,  older)
>                 __field(long,           age)
>                 __field(int,            moved)
> +               __field(int,            reason)
>         ),
>         TP_fast_assign(
>                 strncpy(__entry->name, dev_name(wb->bdi->dev), 32);
> -               __entry->older  = older_than_this ?  *older_than_this : 0;
> -               __entry->age    = older_than_this ?
> -                                 (jiffies - *older_than_this) * 1000 / HZ : -1;
> +               __entry->older  = work->older_than_this ?
> +                                               *work->older_than_this : 0;
> +               __entry->age    = work->older_than_this ?
> +                         (jiffies - *work->older_than_this) * 1000 / HZ : -1;

The older_than_this change seems big enough for a standalone patch.

Thanks,
Fengguang

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 1/2 v2] writeback: Add a 'reason' to wb_writeback_work
@ 2011-08-15 13:19   ` Wu Fengguang
  0 siblings, 0 replies; 27+ messages in thread
From: Wu Fengguang @ 2011-08-15 13:19 UTC (permalink / raw)
  To: Curt Wohlgemuth
  Cc: Christoph Hellwig, Jan Kara, Andrew Morton, Dave Chinner,
	Michael Rubin, linux-fsdevel, linux-mm

Hi Curt,

This is a very useful patch, thanks!  Nitpicks followed :)

> +       enum wb_stats reason;           /* why was writeback initiated? */

Not about this patch, but some time later, some one may well find the
->for_background, ->for_kupdate fields duplicated with ->reason, and
try to eliminate the struct fields as well as the trace point fields :)

>  /*
> + * why this writeback was initiated
> + */
> +enum wb_stats {
> +       /* The following are counts of pages written for a specific cause */
> +       WB_STAT_BALANCE_DIRTY,
> +       WB_STAT_BG_WRITEOUT,
> +       WB_STAT_TRY_TO_FREE_PAGES,
> +       WB_STAT_SYNC,
> +       WB_STAT_KUPDATE,
> +       WB_STAT_LAPTOP_TIMER,
> +       WB_STAT_FREE_MORE_MEM,
> +       WB_STAT_FS_FREE_SPACE,
> +       WB_STAT_FORKER_THREAD,
> +
> +       WB_STAT_MAX,
> +};

I find it more comfortable to use "reason", "enum wb_reason" and
WB_REASON_* uniformly. Yeah, I read in the next patch that you'll add
other stat fields, however they are different in concept and looks
better be put to another enum. With some index shift, it should yield
the same efficient code, with better code readability.

> +#define show_work_reason(reason)                                       \
> +       __print_symbolic(reason,                                        \
> +               {WB_STAT_BALANCE_DIRTY,         "balance_dirty"},       \
> +               {WB_STAT_BG_WRITEOUT,           "background"},          \
> +               {WB_STAT_TRY_TO_FREE_PAGES,     "try_to_free_pages"},   \
> +               {WB_STAT_SYNC,                  "sync"},                \
> +               {WB_STAT_KUPDATE,               "periodic"},            \
> +               {WB_STAT_LAPTOP_TIMER,          "laptop_timer"},        \
> +               {WB_STAT_FREE_MORE_MEM,         "free_more_memory"},    \
> +               {WB_STAT_FS_FREE_SPACE,         "FS_free_space"}        \
> +       )

Some symbolic names disagree with the names used in the next patch..

> -                 "kupdate=%d range_cyclic=%d background=%d",
> +                 "kupdate=%d range_cyclic=%d background=%d reason=%s",

Here is the obvious duplicates. I'm not sure if there are serious
scripts relying on the kupdate/background fields (none from me), and
if we are going to carry this redundancy in future..

>  TRACE_EVENT(writeback_queue_io,
>         TP_PROTO(struct bdi_writeback *wb,
> -                unsigned long *older_than_this,
> +                struct wb_writeback_work *work,
>                  int moved),
> -       TP_ARGS(wb, older_than_this, moved),
> +       TP_ARGS(wb, work, moved),
>         TP_STRUCT__entry(
>                 __array(char,           name, 32)
>                 __field(unsigned long,  older)
>                 __field(long,           age)
>                 __field(int,            moved)
> +               __field(int,            reason)
>         ),
>         TP_fast_assign(
>                 strncpy(__entry->name, dev_name(wb->bdi->dev), 32);
> -               __entry->older  = older_than_this ?  *older_than_this : 0;
> -               __entry->age    = older_than_this ?
> -                                 (jiffies - *older_than_this) * 1000 / HZ : -1;
> +               __entry->older  = work->older_than_this ?
> +                                               *work->older_than_this : 0;
> +               __entry->age    = work->older_than_this ?
> +                         (jiffies - *work->older_than_this) * 1000 / HZ : -1;

The older_than_this change seems big enough for a standalone patch.

Thanks,
Fengguang

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 2/2 v2] writeback: Add writeback stats for pages written
  2011-08-12 22:47   ` Curt Wohlgemuth
@ 2011-08-15 13:48     ` Wu Fengguang
  -1 siblings, 0 replies; 27+ messages in thread
From: Wu Fengguang @ 2011-08-15 13:48 UTC (permalink / raw)
  To: Curt Wohlgemuth
  Cc: Christoph Hellwig, Jan Kara, Andrew Morton, Dave Chinner,
	Michael Rubin, linux-fsdevel, linux-mm

Curt,

Some thoughts about the interface..before dipping into the code.

On Sat, Aug 13, 2011 at 06:47:25AM +0800, Curt Wohlgemuth wrote:
> Add a new file, /proc/writeback/stats, which displays

That's creating a new top directory in /proc. Do you have plans for
adding more files under it?

> machine global data for how many pages were cleaned for
> which reasons.  It also displays some additional counts for
> various writeback events.
> 
> These data are also available for each BDI, in
> /sys/block/<device>/bdi/writeback_stats .

> Sample output:
> 
>    page: balance_dirty_pages           2561544
>    page: background_writeout              5153
>    page: try_to_free_pages                   0
>    page: sync                                0
>    page: kupdate                        102723
>    page: fdatawrite                    1228779
>    page: laptop_periodic                     0
>    page: free_more_memory                    0
>    page: fs_free_space                       0
>    periodic writeback                      377
>    single inode wait                         0
>    writeback_wb wait                         1

That's already useful data, and could be further extended (in
future patches) to answer questions like "what's the writeback
efficiency in terms of effective chunk size?"

So in future there could be lines like

    pages: balance_dirty_pages           2561544
    chunks: balance_dirty_pages          XXXXXXX
    works: balance_dirty_pages           XXXXXXX

or even derived lines like

    pages_per_chunk: balance_dirty_pages         XXXXXXX
    pages_per_work: balance_dirty_pages          XXXXXXX

Another question is, how can the display format be script friendly?
The current form looks not easily parse-able at least for "cut"..

Thanks,
Fengguang

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 2/2 v2] writeback: Add writeback stats for pages written
@ 2011-08-15 13:48     ` Wu Fengguang
  0 siblings, 0 replies; 27+ messages in thread
From: Wu Fengguang @ 2011-08-15 13:48 UTC (permalink / raw)
  To: Curt Wohlgemuth
  Cc: Christoph Hellwig, Jan Kara, Andrew Morton, Dave Chinner,
	Michael Rubin, linux-fsdevel, linux-mm

Curt,

Some thoughts about the interface..before dipping into the code.

On Sat, Aug 13, 2011 at 06:47:25AM +0800, Curt Wohlgemuth wrote:
> Add a new file, /proc/writeback/stats, which displays

That's creating a new top directory in /proc. Do you have plans for
adding more files under it?

> machine global data for how many pages were cleaned for
> which reasons.  It also displays some additional counts for
> various writeback events.
> 
> These data are also available for each BDI, in
> /sys/block/<device>/bdi/writeback_stats .

> Sample output:
> 
>    page: balance_dirty_pages           2561544
>    page: background_writeout              5153
>    page: try_to_free_pages                   0
>    page: sync                                0
>    page: kupdate                        102723
>    page: fdatawrite                    1228779
>    page: laptop_periodic                     0
>    page: free_more_memory                    0
>    page: fs_free_space                       0
>    periodic writeback                      377
>    single inode wait                         0
>    writeback_wb wait                         1

That's already useful data, and could be further extended (in
future patches) to answer questions like "what's the writeback
efficiency in terms of effective chunk size?"

So in future there could be lines like

    pages: balance_dirty_pages           2561544
    chunks: balance_dirty_pages          XXXXXXX
    works: balance_dirty_pages           XXXXXXX

or even derived lines like

    pages_per_chunk: balance_dirty_pages         XXXXXXX
    pages_per_work: balance_dirty_pages          XXXXXXX

Another question is, how can the display format be script friendly?
The current form looks not easily parse-able at least for "cut"..

Thanks,
Fengguang

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 2/2 v2] writeback: Add writeback stats for pages written
  2011-08-12 22:47   ` Curt Wohlgemuth
@ 2011-08-15 15:03     ` Jan Kara
  -1 siblings, 0 replies; 27+ messages in thread
From: Jan Kara @ 2011-08-15 15:03 UTC (permalink / raw)
  To: Curt Wohlgemuth
  Cc: Christoph Hellwig, Wu Fengguang, Jan Kara, Andrew Morton,
	Dave Chinner, Michael Rubin, linux-fsdevel, linux-mm

On Fri 12-08-11 15:47:25, Curt Wohlgemuth wrote:
> Add a new file, /proc/writeback/stats, which displays
> machine global data for how many pages were cleaned for
> which reasons.  It also displays some additional counts for
> various writeback events.
> 
> These data are also available for each BDI, in
> /sys/block/<device>/bdi/writeback_stats .
  I think /sys/kernel/debug/bdi/<device>/writeback_stats might be a better
place since we don't really want to make a stable interface out of this,
do we?

> Sample output:
> 
>    page: balance_dirty_pages           2561544
>    page: background_writeout              5153
>    page: try_to_free_pages                   0
>    page: sync                                0
>    page: kupdate                        102723
>    page: fdatawrite                    1228779
>    page: laptop_periodic                     0
>    page: free_more_memory                    0
>    page: fs_free_space                       0
  The above stats are probably useful. I'm not so convinced about the stats
below - it looks like it should be simple enough to get them by enabling
some trace points and processing output (or if we are missing some
tracepoints, it would be worthwhile to add them).

>    periodic writeback                      377
>    single inode wait                         0
>    writeback_wb wait                         1
> 
> Signed-off-by: Curt Wohlgemuth <curtw@google.com>
...
> +static size_t writeback_stats_to_str(struct writeback_stats *stats,
> +				    char *buf, size_t len)
> +{
> +	int bufsize = len - 1;
> +	int i, printed = 0;
> +	for (i = 0; i < WB_STAT_MAX; i++) {
> +		const char *label = wb_stats_labels[i];
> +		if (label == NULL)
> +			continue;
> +		printed += snprintf(buf + printed, bufsize - printed,
> +				"%-32s %10llu\n", label, stats->stats[i]);
  Cast stats->stats[i] to unsigned long long explicitely since it doesn't
have to be u64...

> +		if (printed >= bufsize) {
> +			buf[len - 1] = '\n';
> +			return len;
> +		}
> +	}
> +
> +	buf[printed - 1] = '\n';
> +	return printed;
> +}
> +
> +static int writeback_seq_show(struct seq_file *m, void *data)
> +{
> +	char *buf;
> +	size_t size;
> +	switch ((enum writeback_op)m->private) {
> +	case WB_STATS_OP:
  What's the point of WB_STATS_OP?

> +		size = seq_get_buf(m, &buf);
> +		if (size == 0)
> +			return 0;
> +		size = writeback_stats_print(writeback_sys_stats, buf, size);
> +		seq_commit(m, size);
> +		break;
> +	default:
> +		break;
> +	}
> +
> +	return 0;
> +}
> +
> +static int writeback_open(struct inode *inode, struct file *file)
> +{
> +	return single_open(file, writeback_seq_show, PDE(inode)->data);
> +}
> +
> +static const struct file_operations writeback_ops = {
> +	.open           = writeback_open,
> +	.read           = seq_read,
> +	.llseek         = seq_lseek,
> +	.release        = single_release,
> +};
> +
> +
> +void __init proc_writeback_init(void)
> +{
> +	struct proc_dir_entry *base_dir;
> +	base_dir = proc_mkdir("writeback", NULL);
> +	if (base_dir == NULL) {
> +		printk(KERN_ERR "Creating /proc/writeback/ failed");
> +		return;
> +	}
> +
> +	writeback_sys_stats = alloc_percpu(struct writeback_stats);
> +
> +	proc_create_data("stats", S_IRUGO|S_IWUSR, base_dir,
  Can user really write to the file?

> +			&writeback_ops, (void *)WB_STATS_OP);
> +}

								Honza
-- 
Jan Kara <jack@suse.cz>
SUSE Labs, CR

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 2/2 v2] writeback: Add writeback stats for pages written
@ 2011-08-15 15:03     ` Jan Kara
  0 siblings, 0 replies; 27+ messages in thread
From: Jan Kara @ 2011-08-15 15:03 UTC (permalink / raw)
  To: Curt Wohlgemuth
  Cc: Christoph Hellwig, Wu Fengguang, Jan Kara, Andrew Morton,
	Dave Chinner, Michael Rubin, linux-fsdevel, linux-mm

On Fri 12-08-11 15:47:25, Curt Wohlgemuth wrote:
> Add a new file, /proc/writeback/stats, which displays
> machine global data for how many pages were cleaned for
> which reasons.  It also displays some additional counts for
> various writeback events.
> 
> These data are also available for each BDI, in
> /sys/block/<device>/bdi/writeback_stats .
  I think /sys/kernel/debug/bdi/<device>/writeback_stats might be a better
place since we don't really want to make a stable interface out of this,
do we?

> Sample output:
> 
>    page: balance_dirty_pages           2561544
>    page: background_writeout              5153
>    page: try_to_free_pages                   0
>    page: sync                                0
>    page: kupdate                        102723
>    page: fdatawrite                    1228779
>    page: laptop_periodic                     0
>    page: free_more_memory                    0
>    page: fs_free_space                       0
  The above stats are probably useful. I'm not so convinced about the stats
below - it looks like it should be simple enough to get them by enabling
some trace points and processing output (or if we are missing some
tracepoints, it would be worthwhile to add them).

>    periodic writeback                      377
>    single inode wait                         0
>    writeback_wb wait                         1
> 
> Signed-off-by: Curt Wohlgemuth <curtw@google.com>
...
> +static size_t writeback_stats_to_str(struct writeback_stats *stats,
> +				    char *buf, size_t len)
> +{
> +	int bufsize = len - 1;
> +	int i, printed = 0;
> +	for (i = 0; i < WB_STAT_MAX; i++) {
> +		const char *label = wb_stats_labels[i];
> +		if (label == NULL)
> +			continue;
> +		printed += snprintf(buf + printed, bufsize - printed,
> +				"%-32s %10llu\n", label, stats->stats[i]);
  Cast stats->stats[i] to unsigned long long explicitely since it doesn't
have to be u64...

> +		if (printed >= bufsize) {
> +			buf[len - 1] = '\n';
> +			return len;
> +		}
> +	}
> +
> +	buf[printed - 1] = '\n';
> +	return printed;
> +}
> +
> +static int writeback_seq_show(struct seq_file *m, void *data)
> +{
> +	char *buf;
> +	size_t size;
> +	switch ((enum writeback_op)m->private) {
> +	case WB_STATS_OP:
  What's the point of WB_STATS_OP?

> +		size = seq_get_buf(m, &buf);
> +		if (size == 0)
> +			return 0;
> +		size = writeback_stats_print(writeback_sys_stats, buf, size);
> +		seq_commit(m, size);
> +		break;
> +	default:
> +		break;
> +	}
> +
> +	return 0;
> +}
> +
> +static int writeback_open(struct inode *inode, struct file *file)
> +{
> +	return single_open(file, writeback_seq_show, PDE(inode)->data);
> +}
> +
> +static const struct file_operations writeback_ops = {
> +	.open           = writeback_open,
> +	.read           = seq_read,
> +	.llseek         = seq_lseek,
> +	.release        = single_release,
> +};
> +
> +
> +void __init proc_writeback_init(void)
> +{
> +	struct proc_dir_entry *base_dir;
> +	base_dir = proc_mkdir("writeback", NULL);
> +	if (base_dir == NULL) {
> +		printk(KERN_ERR "Creating /proc/writeback/ failed");
> +		return;
> +	}
> +
> +	writeback_sys_stats = alloc_percpu(struct writeback_stats);
> +
> +	proc_create_data("stats", S_IRUGO|S_IWUSR, base_dir,
  Can user really write to the file?

> +			&writeback_ops, (void *)WB_STATS_OP);
> +}

								Honza
-- 
Jan Kara <jack@suse.cz>
SUSE Labs, CR

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 2/2 v2] writeback: Add writeback stats for pages written
  2011-08-15 13:48     ` Wu Fengguang
@ 2011-08-15 17:16       ` Curt Wohlgemuth
  -1 siblings, 0 replies; 27+ messages in thread
From: Curt Wohlgemuth @ 2011-08-15 17:16 UTC (permalink / raw)
  To: Wu Fengguang
  Cc: Christoph Hellwig, Jan Kara, Andrew Morton, Dave Chinner,
	Michael Rubin, linux-fsdevel, linux-mm

Hi Fengguang:

Thanks for looking at this.

On Mon, Aug 15, 2011 at 6:48 AM, Wu Fengguang <fengguang.wu@intel.com> wrote:
> Curt,
>
> Some thoughts about the interface..before dipping into the code.
>
> On Sat, Aug 13, 2011 at 06:47:25AM +0800, Curt Wohlgemuth wrote:
>> Add a new file, /proc/writeback/stats, which displays
>
> That's creating a new top directory in /proc. Do you have plans for
> adding more files under it?

Good question.  We have several files under /proc/writeback in our
kernels that we created at various times, some of which are probably
no longer useful, but others seem to be.  For example:
  - congestion: prints # of calls, # of jiffies slept in
congestion_wait() / io_schedule_timeout() from various call points
  - threshold_dirty : prints the current global FG threshold
  - threshold_bg : prints the current global BG threshold
  - pages_cleaned : prints the # pages sent to writeback -- same as
'nr_written' in /proc/vmstat (ours was earlier :-( )
  - pages_dirtied (same as nr_dirtied in /proc/vmstat)
  - prop_vm_XXX : print shift/events from vm_completions and vm_dirties

I'm not sure right now if global FG/BG thresholds appear anywhere in a
3.1 kernel; if so, the two threshold files above are superfluous.  So
are the pages_cleaned/dirtied.  The prop_vm files have not proven
useful to me.  I think the congestion file has a lot of value,
especially in an IO-less throttling world...

>
>> machine global data for how many pages were cleaned for
>> which reasons.  It also displays some additional counts for
>> various writeback events.
>>
>> These data are also available for each BDI, in
>> /sys/block/<device>/bdi/writeback_stats .
>
>> Sample output:
>>
>>    page: balance_dirty_pages           2561544
>>    page: background_writeout              5153
>>    page: try_to_free_pages                   0
>>    page: sync                                0
>>    page: kupdate                        102723
>>    page: fdatawrite                    1228779
>>    page: laptop_periodic                     0
>>    page: free_more_memory                    0
>>    page: fs_free_space                       0
>>    periodic writeback                      377
>>    single inode wait                         0
>>    writeback_wb wait                         1
>
> That's already useful data, and could be further extended (in
> future patches) to answer questions like "what's the writeback
> efficiency in terms of effective chunk size?"
>
> So in future there could be lines like
>
>    pages: balance_dirty_pages           2561544
>    chunks: balance_dirty_pages          XXXXXXX
>    works: balance_dirty_pages           XXXXXXX
>
> or even derived lines like
>
>    pages_per_chunk: balance_dirty_pages         XXXXXXX
>    pages_per_work: balance_dirty_pages          XXXXXXX
>
> Another question is, how can the display format be script friendly?
> The current form looks not easily parse-able at least for "cut"..

I suppose you mean because of the variable number of tokens.  Yeah,
this can be hard.  Of course, I always just use "awk '{print $NF}'"
and it works for me :-) .  But I'd be happy to change these to use a
consistent # of args.

Thanks,
Curt


> Thanks,
> Fengguang
>
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 2/2 v2] writeback: Add writeback stats for pages written
@ 2011-08-15 17:16       ` Curt Wohlgemuth
  0 siblings, 0 replies; 27+ messages in thread
From: Curt Wohlgemuth @ 2011-08-15 17:16 UTC (permalink / raw)
  To: Wu Fengguang
  Cc: Christoph Hellwig, Jan Kara, Andrew Morton, Dave Chinner,
	Michael Rubin, linux-fsdevel, linux-mm

Hi Fengguang:

Thanks for looking at this.

On Mon, Aug 15, 2011 at 6:48 AM, Wu Fengguang <fengguang.wu@intel.com> wrote:
> Curt,
>
> Some thoughts about the interface..before dipping into the code.
>
> On Sat, Aug 13, 2011 at 06:47:25AM +0800, Curt Wohlgemuth wrote:
>> Add a new file, /proc/writeback/stats, which displays
>
> That's creating a new top directory in /proc. Do you have plans for
> adding more files under it?

Good question.  We have several files under /proc/writeback in our
kernels that we created at various times, some of which are probably
no longer useful, but others seem to be.  For example:
  - congestion: prints # of calls, # of jiffies slept in
congestion_wait() / io_schedule_timeout() from various call points
  - threshold_dirty : prints the current global FG threshold
  - threshold_bg : prints the current global BG threshold
  - pages_cleaned : prints the # pages sent to writeback -- same as
'nr_written' in /proc/vmstat (ours was earlier :-( )
  - pages_dirtied (same as nr_dirtied in /proc/vmstat)
  - prop_vm_XXX : print shift/events from vm_completions and vm_dirties

I'm not sure right now if global FG/BG thresholds appear anywhere in a
3.1 kernel; if so, the two threshold files above are superfluous.  So
are the pages_cleaned/dirtied.  The prop_vm files have not proven
useful to me.  I think the congestion file has a lot of value,
especially in an IO-less throttling world...

>
>> machine global data for how many pages were cleaned for
>> which reasons.  It also displays some additional counts for
>> various writeback events.
>>
>> These data are also available for each BDI, in
>> /sys/block/<device>/bdi/writeback_stats .
>
>> Sample output:
>>
>>    page: balance_dirty_pages           2561544
>>    page: background_writeout              5153
>>    page: try_to_free_pages                   0
>>    page: sync                                0
>>    page: kupdate                        102723
>>    page: fdatawrite                    1228779
>>    page: laptop_periodic                     0
>>    page: free_more_memory                    0
>>    page: fs_free_space                       0
>>    periodic writeback                      377
>>    single inode wait                         0
>>    writeback_wb wait                         1
>
> That's already useful data, and could be further extended (in
> future patches) to answer questions like "what's the writeback
> efficiency in terms of effective chunk size?"
>
> So in future there could be lines like
>
>    pages: balance_dirty_pages           2561544
>    chunks: balance_dirty_pages          XXXXXXX
>    works: balance_dirty_pages           XXXXXXX
>
> or even derived lines like
>
>    pages_per_chunk: balance_dirty_pages         XXXXXXX
>    pages_per_work: balance_dirty_pages          XXXXXXX
>
> Another question is, how can the display format be script friendly?
> The current form looks not easily parse-able at least for "cut"..

I suppose you mean because of the variable number of tokens.  Yeah,
this can be hard.  Of course, I always just use "awk '{print $NF}'"
and it works for me :-) .  But I'd be happy to change these to use a
consistent # of args.

Thanks,
Curt


> Thanks,
> Fengguang
>

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 2/2 v2] writeback: Add writeback stats for pages written
  2011-08-15 15:03     ` Jan Kara
  (?)
@ 2011-08-15 17:24     ` Curt Wohlgemuth
  2011-08-16 12:26         ` Wu Fengguang
  -1 siblings, 1 reply; 27+ messages in thread
From: Curt Wohlgemuth @ 2011-08-15 17:24 UTC (permalink / raw)
  To: Jan Kara
  Cc: Christoph Hellwig, Wu Fengguang, Andrew Morton, Dave Chinner,
	Michael Rubin, linux-fsdevel, linux-mm

Hi Jan:

On Mon, Aug 15, 2011 at 8:03 AM, Jan Kara <jack@suse.cz> wrote:
> On Fri 12-08-11 15:47:25, Curt Wohlgemuth wrote:
>> Add a new file, /proc/writeback/stats, which displays
>> machine global data for how many pages were cleaned for
>> which reasons.  It also displays some additional counts for
>> various writeback events.
>>
>> These data are also available for each BDI, in
>> /sys/block/<device>/bdi/writeback_stats .
>  I think /sys/kernel/debug/bdi/<device>/writeback_stats might be a better
> place since we don't really want to make a stable interface out of this,
> do we?

Okay, I was waiting for someone to request this, I'll change it.

>> Sample output:
>>
>>    page: balance_dirty_pages           2561544
>>    page: background_writeout              5153
>>    page: try_to_free_pages                   0
>>    page: sync                                0
>>    page: kupdate                        102723
>>    page: fdatawrite                    1228779
>>    page: laptop_periodic                     0
>>    page: free_more_memory                    0
>>    page: fs_free_space                       0
>  The above stats are probably useful. I'm not so convinced about the stats
> below - it looks like it should be simple enough to get them by enabling
> some trace points and processing output (or if we are missing some
> tracepoints, it would be worthwhile to add them).

For these specifically, I'd agree with you.  In general, though, I
think that having generally available aggregated stats is really
useful, in a different way than tracepoints are.

>
>>    periodic writeback                      377
>>    single inode wait                         0
>>    writeback_wb wait                         1
>>
>> Signed-off-by: Curt Wohlgemuth <curtw@google.com>
> ...
>> +static size_t writeback_stats_to_str(struct writeback_stats *stats,
>> +                                 char *buf, size_t len)
>> +{
>> +     int bufsize = len - 1;
>> +     int i, printed = 0;
>> +     for (i = 0; i < WB_STAT_MAX; i++) {
>> +             const char *label = wb_stats_labels[i];
>> +             if (label == NULL)
>> +                     continue;
>> +             printed += snprintf(buf + printed, bufsize - printed,
>> +                             "%-32s %10llu\n", label, stats->stats[i]);
>  Cast stats->stats[i] to unsigned long long explicitely since it doesn't
> have to be u64...

Thanks.

>> +             if (printed >= bufsize) {
>> +                     buf[len - 1] = '\n';
>> +                     return len;
>> +             }
>> +     }
>> +
>> +     buf[printed - 1] = '\n';
>> +     return printed;
>> +}
>> +
>> +static int writeback_seq_show(struct seq_file *m, void *data)
>> +{
>> +     char *buf;
>> +     size_t size;
>> +     switch ((enum writeback_op)m->private) {
>> +     case WB_STATS_OP:
>  What's the point of WB_STATS_OP?

It's a vestige of the many more files under /proc/writeback/ that we
have in our kernels (see my response to Fengguang's email) -- and so
processing each file is done via a different WB_xxx_OP.  I forgot to
simplify this in the patch I sent out; will fix this.

>
>> +             size = seq_get_buf(m, &buf);
>> +             if (size == 0)
>> +                     return 0;
>> +             size = writeback_stats_print(writeback_sys_stats, buf, size);
>> +             seq_commit(m, size);
>> +             break;
>> +     default:
>> +             break;
>> +     }
>> +
>> +     return 0;
>> +}
>> +
>> +static int writeback_open(struct inode *inode, struct file *file)
>> +{
>> +     return single_open(file, writeback_seq_show, PDE(inode)->data);
>> +}
>> +
>> +static const struct file_operations writeback_ops = {
>> +     .open           = writeback_open,
>> +     .read           = seq_read,
>> +     .llseek         = seq_lseek,
>> +     .release        = single_release,
>> +};
>> +
>> +
>> +void __init proc_writeback_init(void)
>> +{
>> +     struct proc_dir_entry *base_dir;
>> +     base_dir = proc_mkdir("writeback", NULL);
>> +     if (base_dir == NULL) {
>> +             printk(KERN_ERR "Creating /proc/writeback/ failed");
>> +             return;
>> +     }
>> +
>> +     writeback_sys_stats = alloc_percpu(struct writeback_stats);
>> +
>> +     proc_create_data("stats", S_IRUGO|S_IWUSR, base_dir,
>  Can user really write to the file?

No to this file, I'll fix, thanks.  (Yes to some of our
/proc/writeback/ files, to clear them.)

Thanks,
Curt

>
>> +                     &writeback_ops, (void *)WB_STATS_OP);
>> +}
>
>                                                                Honza
> --
> Jan Kara <jack@suse.cz>
> SUSE Labs, CR
>

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 2/2 v2] writeback: Add writeback stats for pages written
  2011-08-15 17:16       ` Curt Wohlgemuth
@ 2011-08-15 18:40         ` Jan Kara
  -1 siblings, 0 replies; 27+ messages in thread
From: Jan Kara @ 2011-08-15 18:40 UTC (permalink / raw)
  To: Curt Wohlgemuth
  Cc: Wu Fengguang, Christoph Hellwig, Jan Kara, Andrew Morton,
	Dave Chinner, Michael Rubin, linux-fsdevel, linux-mm

On Mon 15-08-11 10:16:38, Curt Wohlgemuth wrote:
> On Mon, Aug 15, 2011 at 6:48 AM, Wu Fengguang <fengguang.wu@intel.com> wrote:
> > Curt,
> >
> > Some thoughts about the interface..before dipping into the code.
> >
> > On Sat, Aug 13, 2011 at 06:47:25AM +0800, Curt Wohlgemuth wrote:
> >> Add a new file, /proc/writeback/stats, which displays
> >
> > That's creating a new top directory in /proc. Do you have plans for
> > adding more files under it?
> 
> Good question.  We have several files under /proc/writeback in our
> kernels that we created at various times, some of which are probably
> no longer useful, but others seem to be.  For example:
>   - congestion: prints # of calls, # of jiffies slept in
> congestion_wait() / io_schedule_timeout() from various call points
>   - threshold_dirty : prints the current global FG threshold
>   - threshold_bg : prints the current global BG threshold
>   - pages_cleaned : prints the # pages sent to writeback -- same as
> 'nr_written' in /proc/vmstat (ours was earlier :-( )
>   - pages_dirtied (same as nr_dirtied in /proc/vmstat)
>   - prop_vm_XXX : print shift/events from vm_completions and vm_dirties
>
> I'm not sure right now if global FG/BG thresholds appear anywhere in a
> 3.1 kernel; if so, the two threshold files above are superfluous.  So
> are the pages_cleaned/dirtied.  The prop_vm files have not proven
> useful to me.  I think the congestion file has a lot of value,
> especially in an IO-less throttling world...
  /sys/kernel/debug/bdi/<dev>/stats has BdiDirtyThresh, DirtyThresh, and
BackgroundThresh. So we should already expose all you have in the threshold
files.

Regarding congestion_wait() statistics - do I get right that the numbers
gathered actually depend on the number of threads using the congested
device? They are something like
  \sum_{over threads} time_waited_for_bdi
How do you interpret the resulting numbers then?

								Honza

> >> machine global data for how many pages were cleaned for
> >> which reasons.  It also displays some additional counts for
> >> various writeback events.
> >>
> >> These data are also available for each BDI, in
> >> /sys/block/<device>/bdi/writeback_stats .
> >
> >> Sample output:
> >>
> >>    page: balance_dirty_pages           2561544
> >>    page: background_writeout              5153
> >>    page: try_to_free_pages                   0
> >>    page: sync                                0
> >>    page: kupdate                        102723
> >>    page: fdatawrite                    1228779
> >>    page: laptop_periodic                     0
> >>    page: free_more_memory                    0
> >>    page: fs_free_space                       0
> >>    periodic writeback                      377
> >>    single inode wait                         0
> >>    writeback_wb wait                         1
> >
> > That's already useful data, and could be further extended (in
> > future patches) to answer questions like "what's the writeback
> > efficiency in terms of effective chunk size?"
> >
> > So in future there could be lines like
> >
> >    pages: balance_dirty_pages           2561544
> >    chunks: balance_dirty_pages          XXXXXXX
> >    works: balance_dirty_pages           XXXXXXX
> >
> > or even derived lines like
> >
> >    pages_per_chunk: balance_dirty_pages         XXXXXXX
> >    pages_per_work: balance_dirty_pages          XXXXXXX
> >
> > Another question is, how can the display format be script friendly?
> > The current form looks not easily parse-able at least for "cut"..
> 
> I suppose you mean because of the variable number of tokens.  Yeah,
> this can be hard.  Of course, I always just use "awk '{print $NF}'"
> and it works for me :-) .  But I'd be happy to change these to use a
> consistent # of args.
> 
> Thanks,
> Curt
> 
> 
> > Thanks,
> > Fengguang
> >
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 2/2 v2] writeback: Add writeback stats for pages written
@ 2011-08-15 18:40         ` Jan Kara
  0 siblings, 0 replies; 27+ messages in thread
From: Jan Kara @ 2011-08-15 18:40 UTC (permalink / raw)
  To: Curt Wohlgemuth
  Cc: Wu Fengguang, Christoph Hellwig, Jan Kara, Andrew Morton,
	Dave Chinner, Michael Rubin, linux-fsdevel, linux-mm

On Mon 15-08-11 10:16:38, Curt Wohlgemuth wrote:
> On Mon, Aug 15, 2011 at 6:48 AM, Wu Fengguang <fengguang.wu@intel.com> wrote:
> > Curt,
> >
> > Some thoughts about the interface..before dipping into the code.
> >
> > On Sat, Aug 13, 2011 at 06:47:25AM +0800, Curt Wohlgemuth wrote:
> >> Add a new file, /proc/writeback/stats, which displays
> >
> > That's creating a new top directory in /proc. Do you have plans for
> > adding more files under it?
> 
> Good question.  We have several files under /proc/writeback in our
> kernels that we created at various times, some of which are probably
> no longer useful, but others seem to be.  For example:
>   - congestion: prints # of calls, # of jiffies slept in
> congestion_wait() / io_schedule_timeout() from various call points
>   - threshold_dirty : prints the current global FG threshold
>   - threshold_bg : prints the current global BG threshold
>   - pages_cleaned : prints the # pages sent to writeback -- same as
> 'nr_written' in /proc/vmstat (ours was earlier :-( )
>   - pages_dirtied (same as nr_dirtied in /proc/vmstat)
>   - prop_vm_XXX : print shift/events from vm_completions and vm_dirties
>
> I'm not sure right now if global FG/BG thresholds appear anywhere in a
> 3.1 kernel; if so, the two threshold files above are superfluous.  So
> are the pages_cleaned/dirtied.  The prop_vm files have not proven
> useful to me.  I think the congestion file has a lot of value,
> especially in an IO-less throttling world...
  /sys/kernel/debug/bdi/<dev>/stats has BdiDirtyThresh, DirtyThresh, and
BackgroundThresh. So we should already expose all you have in the threshold
files.

Regarding congestion_wait() statistics - do I get right that the numbers
gathered actually depend on the number of threads using the congested
device? They are something like
  \sum_{over threads} time_waited_for_bdi
How do you interpret the resulting numbers then?

								Honza

> >> machine global data for how many pages were cleaned for
> >> which reasons.  It also displays some additional counts for
> >> various writeback events.
> >>
> >> These data are also available for each BDI, in
> >> /sys/block/<device>/bdi/writeback_stats .
> >
> >> Sample output:
> >>
> >>    page: balance_dirty_pages           2561544
> >>    page: background_writeout              5153
> >>    page: try_to_free_pages                   0
> >>    page: sync                                0
> >>    page: kupdate                        102723
> >>    page: fdatawrite                    1228779
> >>    page: laptop_periodic                     0
> >>    page: free_more_memory                    0
> >>    page: fs_free_space                       0
> >>    periodic writeback                      377
> >>    single inode wait                         0
> >>    writeback_wb wait                         1
> >
> > That's already useful data, and could be further extended (in
> > future patches) to answer questions like "what's the writeback
> > efficiency in terms of effective chunk size?"
> >
> > So in future there could be lines like
> >
> >    pages: balance_dirty_pages           2561544
> >    chunks: balance_dirty_pages          XXXXXXX
> >    works: balance_dirty_pages           XXXXXXX
> >
> > or even derived lines like
> >
> >    pages_per_chunk: balance_dirty_pages         XXXXXXX
> >    pages_per_work: balance_dirty_pages          XXXXXXX
> >
> > Another question is, how can the display format be script friendly?
> > The current form looks not easily parse-able at least for "cut"..
> 
> I suppose you mean because of the variable number of tokens.  Yeah,
> this can be hard.  Of course, I always just use "awk '{print $NF}'"
> and it works for me :-) .  But I'd be happy to change these to use a
> consistent # of args.
> 
> Thanks,
> Curt
> 
> 
> > Thanks,
> > Fengguang
> >

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 2/2 v2] writeback: Add writeback stats for pages written
  2011-08-15 18:40         ` Jan Kara
@ 2011-08-15 18:56           ` Curt Wohlgemuth
  -1 siblings, 0 replies; 27+ messages in thread
From: Curt Wohlgemuth @ 2011-08-15 18:56 UTC (permalink / raw)
  To: Jan Kara
  Cc: Wu Fengguang, Christoph Hellwig, Andrew Morton, Dave Chinner,
	Michael Rubin, linux-fsdevel, linux-mm

Hi Jan:

On Mon, Aug 15, 2011 at 11:40 AM, Jan Kara <jack@suse.cz> wrote:
> On Mon 15-08-11 10:16:38, Curt Wohlgemuth wrote:
>> On Mon, Aug 15, 2011 at 6:48 AM, Wu Fengguang <fengguang.wu@intel.com> wrote:
>> > Curt,
>> >
>> > Some thoughts about the interface..before dipping into the code.
>> >
>> > On Sat, Aug 13, 2011 at 06:47:25AM +0800, Curt Wohlgemuth wrote:
>> >> Add a new file, /proc/writeback/stats, which displays
>> >
>> > That's creating a new top directory in /proc. Do you have plans for
>> > adding more files under it?
>>
>> Good question.  We have several files under /proc/writeback in our
>> kernels that we created at various times, some of which are probably
>> no longer useful, but others seem to be.  For example:
>>   - congestion: prints # of calls, # of jiffies slept in
>> congestion_wait() / io_schedule_timeout() from various call points
>>   - threshold_dirty : prints the current global FG threshold
>>   - threshold_bg : prints the current global BG threshold
>>   - pages_cleaned : prints the # pages sent to writeback -- same as
>> 'nr_written' in /proc/vmstat (ours was earlier :-( )
>>   - pages_dirtied (same as nr_dirtied in /proc/vmstat)
>>   - prop_vm_XXX : print shift/events from vm_completions and vm_dirties
>>
>> I'm not sure right now if global FG/BG thresholds appear anywhere in a
>> 3.1 kernel; if so, the two threshold files above are superfluous.  So
>> are the pages_cleaned/dirtied.  The prop_vm files have not proven
>> useful to me.  I think the congestion file has a lot of value,
>> especially in an IO-less throttling world...
>  /sys/kernel/debug/bdi/<dev>/stats has BdiDirtyThresh, DirtyThresh, and
> BackgroundThresh. So we should already expose all you have in the threshold
> files.

Ah, right, I knew that and overlooked it.  I get confused looking at
lots of kernel versions and patches at the same time :-) .

> Regarding congestion_wait() statistics - do I get right that the numbers
> gathered actually depend on the number of threads using the congested
> device? They are something like
>  \sum_{over threads} time_waited_for_bdi
> How do you interpret the resulting numbers then?

I don't have it by thread; just stupidly as totals, like this:

calls: ttfp           11290
time: ttfp        558191
calls: shrink_inactive_list isolated       xxx
time : shrink_inactive_list isolated            xxx
calls: shrink_inactive_list lumpy reclaim       xxx
time : shrink_inactive_list lumpy reclaim          xxx
calls: balance_pgdat                                xxx
time : balance_pgdat                                xxx
calls: alloc_pages_high_priority                    xxx
time : alloc_pages_high_priority                    xxx
calls: alloc_pages_slowpath                         xxx
time : alloc_pages_slowpath                         xxx
calls: throttle_vm_writeout                         xxx
time : throttle_vm_writeout                         xxx
calls: balance_dirty_pages                          xxx
time : balance_dirty_pages                         xxx

Note that the "call" points above are from a very old (2.6.34 +
backports) kernel, but you get the idea.  We just wrap
congestion_wait() with a routine that takes a 'type' parameter; does
the congestion_wait(); and increments the appropriate 'call' stat, and
adds to the appropriate 'time' stat the return value from
congestion_wait().

For a given workload, you can get an idea for where congestion is
adding to delays.  I really think that for IO-less
balance_dirty_pages(), we need some insight into how long writer
threads are being throttled.  And tracepoints are great, but not
sufficient, IMHO.

Thanks,
Curt

>
>                                                                Honza
>
>> >> machine global data for how many pages were cleaned for
>> >> which reasons.  It also displays some additional counts for
>> >> various writeback events.
>> >>
>> >> These data are also available for each BDI, in
>> >> /sys/block/<device>/bdi/writeback_stats .
>> >
>> >> Sample output:
>> >>
>> >>    page: balance_dirty_pages           2561544
>> >>    page: background_writeout              5153
>> >>    page: try_to_free_pages                   0
>> >>    page: sync                                0
>> >>    page: kupdate                        102723
>> >>    page: fdatawrite                    1228779
>> >>    page: laptop_periodic                     0
>> >>    page: free_more_memory                    0
>> >>    page: fs_free_space                       0
>> >>    periodic writeback                      377
>> >>    single inode wait                         0
>> >>    writeback_wb wait                         1
>> >
>> > That's already useful data, and could be further extended (in
>> > future patches) to answer questions like "what's the writeback
>> > efficiency in terms of effective chunk size?"
>> >
>> > So in future there could be lines like
>> >
>> >    pages: balance_dirty_pages           2561544
>> >    chunks: balance_dirty_pages          XXXXXXX
>> >    works: balance_dirty_pages           XXXXXXX
>> >
>> > or even derived lines like
>> >
>> >    pages_per_chunk: balance_dirty_pages         XXXXXXX
>> >    pages_per_work: balance_dirty_pages          XXXXXXX
>> >
>> > Another question is, how can the display format be script friendly?
>> > The current form looks not easily parse-able at least for "cut"..
>>
>> I suppose you mean because of the variable number of tokens.  Yeah,
>> this can be hard.  Of course, I always just use "awk '{print $NF}'"
>> and it works for me :-) .  But I'd be happy to change these to use a
>> consistent # of args.
>>
>> Thanks,
>> Curt
>>
>>
>> > Thanks,
>> > Fengguang
>> >
>
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 2/2 v2] writeback: Add writeback stats for pages written
@ 2011-08-15 18:56           ` Curt Wohlgemuth
  0 siblings, 0 replies; 27+ messages in thread
From: Curt Wohlgemuth @ 2011-08-15 18:56 UTC (permalink / raw)
  To: Jan Kara
  Cc: Wu Fengguang, Christoph Hellwig, Andrew Morton, Dave Chinner,
	Michael Rubin, linux-fsdevel, linux-mm

Hi Jan:

On Mon, Aug 15, 2011 at 11:40 AM, Jan Kara <jack@suse.cz> wrote:
> On Mon 15-08-11 10:16:38, Curt Wohlgemuth wrote:
>> On Mon, Aug 15, 2011 at 6:48 AM, Wu Fengguang <fengguang.wu@intel.com> wrote:
>> > Curt,
>> >
>> > Some thoughts about the interface..before dipping into the code.
>> >
>> > On Sat, Aug 13, 2011 at 06:47:25AM +0800, Curt Wohlgemuth wrote:
>> >> Add a new file, /proc/writeback/stats, which displays
>> >
>> > That's creating a new top directory in /proc. Do you have plans for
>> > adding more files under it?
>>
>> Good question.  We have several files under /proc/writeback in our
>> kernels that we created at various times, some of which are probably
>> no longer useful, but others seem to be.  For example:
>>   - congestion: prints # of calls, # of jiffies slept in
>> congestion_wait() / io_schedule_timeout() from various call points
>>   - threshold_dirty : prints the current global FG threshold
>>   - threshold_bg : prints the current global BG threshold
>>   - pages_cleaned : prints the # pages sent to writeback -- same as
>> 'nr_written' in /proc/vmstat (ours was earlier :-( )
>>   - pages_dirtied (same as nr_dirtied in /proc/vmstat)
>>   - prop_vm_XXX : print shift/events from vm_completions and vm_dirties
>>
>> I'm not sure right now if global FG/BG thresholds appear anywhere in a
>> 3.1 kernel; if so, the two threshold files above are superfluous.  So
>> are the pages_cleaned/dirtied.  The prop_vm files have not proven
>> useful to me.  I think the congestion file has a lot of value,
>> especially in an IO-less throttling world...
>  /sys/kernel/debug/bdi/<dev>/stats has BdiDirtyThresh, DirtyThresh, and
> BackgroundThresh. So we should already expose all you have in the threshold
> files.

Ah, right, I knew that and overlooked it.  I get confused looking at
lots of kernel versions and patches at the same time :-) .

> Regarding congestion_wait() statistics - do I get right that the numbers
> gathered actually depend on the number of threads using the congested
> device? They are something like
>  \sum_{over threads} time_waited_for_bdi
> How do you interpret the resulting numbers then?

I don't have it by thread; just stupidly as totals, like this:

calls: ttfp           11290
time: ttfp        558191
calls: shrink_inactive_list isolated       xxx
time : shrink_inactive_list isolated            xxx
calls: shrink_inactive_list lumpy reclaim       xxx
time : shrink_inactive_list lumpy reclaim          xxx
calls: balance_pgdat                                xxx
time : balance_pgdat                                xxx
calls: alloc_pages_high_priority                    xxx
time : alloc_pages_high_priority                    xxx
calls: alloc_pages_slowpath                         xxx
time : alloc_pages_slowpath                         xxx
calls: throttle_vm_writeout                         xxx
time : throttle_vm_writeout                         xxx
calls: balance_dirty_pages                          xxx
time : balance_dirty_pages                         xxx

Note that the "call" points above are from a very old (2.6.34 +
backports) kernel, but you get the idea.  We just wrap
congestion_wait() with a routine that takes a 'type' parameter; does
the congestion_wait(); and increments the appropriate 'call' stat, and
adds to the appropriate 'time' stat the return value from
congestion_wait().

For a given workload, you can get an idea for where congestion is
adding to delays.  I really think that for IO-less
balance_dirty_pages(), we need some insight into how long writer
threads are being throttled.  And tracepoints are great, but not
sufficient, IMHO.

Thanks,
Curt

>
>                                                                Honza
>
>> >> machine global data for how many pages were cleaned for
>> >> which reasons.  It also displays some additional counts for
>> >> various writeback events.
>> >>
>> >> These data are also available for each BDI, in
>> >> /sys/block/<device>/bdi/writeback_stats .
>> >
>> >> Sample output:
>> >>
>> >>    page: balance_dirty_pages           2561544
>> >>    page: background_writeout              5153
>> >>    page: try_to_free_pages                   0
>> >>    page: sync                                0
>> >>    page: kupdate                        102723
>> >>    page: fdatawrite                    1228779
>> >>    page: laptop_periodic                     0
>> >>    page: free_more_memory                    0
>> >>    page: fs_free_space                       0
>> >>    periodic writeback                      377
>> >>    single inode wait                         0
>> >>    writeback_wb wait                         1
>> >
>> > That's already useful data, and could be further extended (in
>> > future patches) to answer questions like "what's the writeback
>> > efficiency in terms of effective chunk size?"
>> >
>> > So in future there could be lines like
>> >
>> >    pages: balance_dirty_pages           2561544
>> >    chunks: balance_dirty_pages          XXXXXXX
>> >    works: balance_dirty_pages           XXXXXXX
>> >
>> > or even derived lines like
>> >
>> >    pages_per_chunk: balance_dirty_pages         XXXXXXX
>> >    pages_per_work: balance_dirty_pages          XXXXXXX
>> >
>> > Another question is, how can the display format be script friendly?
>> > The current form looks not easily parse-able at least for "cut"..
>>
>> I suppose you mean because of the variable number of tokens.  Yeah,
>> this can be hard.  Of course, I always just use "awk '{print $NF}'"
>> and it works for me :-) .  But I'd be happy to change these to use a
>> consistent # of args.
>>
>> Thanks,
>> Curt
>>
>>
>> > Thanks,
>> > Fengguang
>> >
>

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 2/2 v2] writeback: Add writeback stats for pages written
  2011-08-15 17:16       ` Curt Wohlgemuth
  (?)
  (?)
@ 2011-08-16 12:10       ` Wu Fengguang
  -1 siblings, 0 replies; 27+ messages in thread
From: Wu Fengguang @ 2011-08-16 12:10 UTC (permalink / raw)
  To: Curt Wohlgemuth
  Cc: Christoph Hellwig, Jan Kara, Andrew Morton, Dave Chinner,
	Michael Rubin, linux-fsdevel, linux-mm

Hi Curt,

> > Another question is, how can the display format be script friendly?
> > The current form looks not easily parse-able at least for "cut"..
> 
> I suppose you mean because of the variable number of tokens.  Yeah,
> this can be hard.  Of course, I always just use "awk '{print $NF}'"
> and it works for me :-) .  But I'd be happy to change these to use a
> consistent # of args.

Yes, thank you.  One possible format come to my mind later is to
present the numbers in a 2d table, like this:

                        pages           chunks          works          chunk_kb    work_kbps
balance_dirty_pages     XXXXX             XXXX            XXX              XXXX        XXXXX
background              XXXXX             XXXX            XXX              XXXX        XXXXX
sync                    XXXXX             XXXX            XXX              XXXX        XXXXX
...

The format is not only human friendly and trivial for scripting, but
also permits to add new lines or columns (append only though) without
breaking compatibility.

Thanks,
Fengguang

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 2/2 v2] writeback: Add writeback stats for pages written
  2011-08-15 17:24     ` Curt Wohlgemuth
@ 2011-08-16 12:26         ` Wu Fengguang
  0 siblings, 0 replies; 27+ messages in thread
From: Wu Fengguang @ 2011-08-16 12:26 UTC (permalink / raw)
  To: Curt Wohlgemuth
  Cc: Jan Kara, Christoph Hellwig, Andrew Morton, Dave Chinner,
	Michael Rubin, linux-fsdevel, linux-mm

Curt,

> >  The above stats are probably useful. I'm not so convinced about the stats
> > below - it looks like it should be simple enough to get them by enabling
> > some trace points and processing output (or if we are missing some
> > tracepoints, it would be worthwhile to add them).
> 
> For these specifically, I'd agree with you.  In general, though, I
> think that having generally available aggregated stats is really
> useful, in a different way than tracepoints are.

If there comes such useful aggregated stats in future, they may go to
vmstat and/or /debug/bdi/<dev>/stats.

Then we make the writeback stats a simple uniform interface rather
than a hybrid one.

> >
> >>    periodic writeback                      377

The above one can go to the "work" column, "periodic" row of the
writeback stats table :)

I'm in particular interested in the "work" column of the
"try_to_free_pages" row. I also suspect there could be many
short-lived background work, hence there lots of them.

> >>    single inode wait                         0
> >>    writeback_wb wait                         1

Thanks,
Fengguang
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 2/2 v2] writeback: Add writeback stats for pages written
@ 2011-08-16 12:26         ` Wu Fengguang
  0 siblings, 0 replies; 27+ messages in thread
From: Wu Fengguang @ 2011-08-16 12:26 UTC (permalink / raw)
  To: Curt Wohlgemuth
  Cc: Jan Kara, Christoph Hellwig, Andrew Morton, Dave Chinner,
	Michael Rubin, linux-fsdevel, linux-mm

Curt,

> > A The above stats are probably useful. I'm not so convinced about the stats
> > below - it looks like it should be simple enough to get them by enabling
> > some trace points and processing output (or if we are missing some
> > tracepoints, it would be worthwhile to add them).
> 
> For these specifically, I'd agree with you.  In general, though, I
> think that having generally available aggregated stats is really
> useful, in a different way than tracepoints are.

If there comes such useful aggregated stats in future, they may go to
vmstat and/or /debug/bdi/<dev>/stats.

Then we make the writeback stats a simple uniform interface rather
than a hybrid one.

> >
> >> A  A periodic writeback A  A  A  A  A  A  A  A  A  A  A 377

The above one can go to the "work" column, "periodic" row of the
writeback stats table :)

I'm in particular interested in the "work" column of the
"try_to_free_pages" row. I also suspect there could be many
short-lived background work, hence there lots of them.

> >> A  A single inode wait A  A  A  A  A  A  A  A  A  A  A  A  0
> >> A  A writeback_wb wait A  A  A  A  A  A  A  A  A  A  A  A  1

Thanks,
Fengguang

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 2/2 v2] writeback: Add writeback stats for pages written
  2011-08-15 18:56           ` Curt Wohlgemuth
@ 2011-08-16 13:10             ` Jan Kara
  -1 siblings, 0 replies; 27+ messages in thread
From: Jan Kara @ 2011-08-16 13:10 UTC (permalink / raw)
  To: Curt Wohlgemuth
  Cc: Jan Kara, Wu Fengguang, Christoph Hellwig, Andrew Morton,
	Dave Chinner, Michael Rubin, linux-fsdevel, linux-mm

  Hi Curt,

On Mon 15-08-11 11:56:08, Curt Wohlgemuth wrote:
> On Mon, Aug 15, 2011 at 11:40 AM, Jan Kara <jack@suse.cz> wrote:
> > Regarding congestion_wait() statistics - do I get right that the numbers
> > gathered actually depend on the number of threads using the congested
> > device? They are something like
> >  \sum_{over threads} time_waited_for_bdi
> > How do you interpret the resulting numbers then?
> 
> I don't have it by thread; just stupidly as totals, like this:
> 
> calls: ttfp           11290
> time: ttfp        558191
> calls: shrink_inactive_list isolated       xxx
> time : shrink_inactive_list isolated            xxx
> calls: shrink_inactive_list lumpy reclaim       xxx
> time : shrink_inactive_list lumpy reclaim          xxx
> calls: balance_pgdat                                xxx
> time : balance_pgdat                                xxx
> calls: alloc_pages_high_priority                    xxx
> time : alloc_pages_high_priority                    xxx
> calls: alloc_pages_slowpath                         xxx
> time : alloc_pages_slowpath                         xxx
> calls: throttle_vm_writeout                         xxx
> time : throttle_vm_writeout                         xxx
> calls: balance_dirty_pages                          xxx
> time : balance_dirty_pages                         xxx
  Yes, that's what I was expecting.

> Note that the "call" points above are from a very old (2.6.34 +
> backports) kernel, but you get the idea.  We just wrap
> congestion_wait() with a routine that takes a 'type' parameter; does
> the congestion_wait(); and increments the appropriate 'call' stat, and
> adds to the appropriate 'time' stat the return value from
> congestion_wait().
  OK I see. I imagine that could be useful when you are monitoring your
systems or doing some long term observations.

> For a given workload, you can get an idea for where congestion is
> adding to delays.  I really think that for IO-less
> balance_dirty_pages(), we need some insight into how long writer
> threads are being throttled.  And tracepoints are great, but not
> sufficient, IMHO.
  Well, we are going to report computed delays via tracepoints which are
going to be prime interface for debugging but I agree that some statistics
could be useful as well and more lightweight (no need to pass lots of trace
data to userspace). OTOH I wonder if we shouldn't write a userspace tool
processing trace information from balance_dirty_pages() and generating
exactly those statistics you want in the kernel - something like writeback
tracer...

								Honza
-- 
Jan Kara <jack@suse.cz>
SUSE Labs, CR
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 2/2 v2] writeback: Add writeback stats for pages written
@ 2011-08-16 13:10             ` Jan Kara
  0 siblings, 0 replies; 27+ messages in thread
From: Jan Kara @ 2011-08-16 13:10 UTC (permalink / raw)
  To: Curt Wohlgemuth
  Cc: Jan Kara, Wu Fengguang, Christoph Hellwig, Andrew Morton,
	Dave Chinner, Michael Rubin, linux-fsdevel, linux-mm

  Hi Curt,

On Mon 15-08-11 11:56:08, Curt Wohlgemuth wrote:
> On Mon, Aug 15, 2011 at 11:40 AM, Jan Kara <jack@suse.cz> wrote:
> > Regarding congestion_wait() statistics - do I get right that the numbers
> > gathered actually depend on the number of threads using the congested
> > device? They are something like
> >  \sum_{over threads} time_waited_for_bdi
> > How do you interpret the resulting numbers then?
> 
> I don't have it by thread; just stupidly as totals, like this:
> 
> calls: ttfp           11290
> time: ttfp        558191
> calls: shrink_inactive_list isolated       xxx
> time : shrink_inactive_list isolated            xxx
> calls: shrink_inactive_list lumpy reclaim       xxx
> time : shrink_inactive_list lumpy reclaim          xxx
> calls: balance_pgdat                                xxx
> time : balance_pgdat                                xxx
> calls: alloc_pages_high_priority                    xxx
> time : alloc_pages_high_priority                    xxx
> calls: alloc_pages_slowpath                         xxx
> time : alloc_pages_slowpath                         xxx
> calls: throttle_vm_writeout                         xxx
> time : throttle_vm_writeout                         xxx
> calls: balance_dirty_pages                          xxx
> time : balance_dirty_pages                         xxx
  Yes, that's what I was expecting.

> Note that the "call" points above are from a very old (2.6.34 +
> backports) kernel, but you get the idea.  We just wrap
> congestion_wait() with a routine that takes a 'type' parameter; does
> the congestion_wait(); and increments the appropriate 'call' stat, and
> adds to the appropriate 'time' stat the return value from
> congestion_wait().
  OK I see. I imagine that could be useful when you are monitoring your
systems or doing some long term observations.

> For a given workload, you can get an idea for where congestion is
> adding to delays.  I really think that for IO-less
> balance_dirty_pages(), we need some insight into how long writer
> threads are being throttled.  And tracepoints are great, but not
> sufficient, IMHO.
  Well, we are going to report computed delays via tracepoints which are
going to be prime interface for debugging but I agree that some statistics
could be useful as well and more lightweight (no need to pass lots of trace
data to userspace). OTOH I wonder if we shouldn't write a userspace tool
processing trace information from balance_dirty_pages() and generating
exactly those statistics you want in the kernel - something like writeback
tracer...

								Honza
-- 
Jan Kara <jack@suse.cz>
SUSE Labs, CR

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 1/2 v2] writeback: Add a 'reason' to wb_writeback_work
  2011-08-12 22:47 ` Curt Wohlgemuth
@ 2011-09-28 15:02   ` Christoph Hellwig
  -1 siblings, 0 replies; 27+ messages in thread
From: Christoph Hellwig @ 2011-09-28 15:02 UTC (permalink / raw)
  To: Curt Wohlgemuth
  Cc: Christoph Hellwig, Wu Fengguang, Jan Kara, Andrew Morton,
	Dave Chinner, Michael Rubin, linux-fsdevel, linux-mm

Did we get to any conclusion on this series?  I think having these
additional reasons in the tracepoints and the additional statistics
would be extremely useful for us who have to deal with writeback
issues frequently.


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 1/2 v2] writeback: Add a 'reason' to wb_writeback_work
@ 2011-09-28 15:02   ` Christoph Hellwig
  0 siblings, 0 replies; 27+ messages in thread
From: Christoph Hellwig @ 2011-09-28 15:02 UTC (permalink / raw)
  To: Curt Wohlgemuth
  Cc: Christoph Hellwig, Wu Fengguang, Jan Kara, Andrew Morton,
	Dave Chinner, Michael Rubin, linux-fsdevel, linux-mm

Did we get to any conclusion on this series?  I think having these
additional reasons in the tracepoints and the additional statistics
would be extremely useful for us who have to deal with writeback
issues frequently.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 1/2 v2] writeback: Add a 'reason' to wb_writeback_work
  2011-09-28 15:02   ` Christoph Hellwig
  (?)
@ 2011-10-07 15:28   ` Wu Fengguang
  2011-10-07 18:07       ` Curt Wohlgemuth
  -1 siblings, 1 reply; 27+ messages in thread
From: Wu Fengguang @ 2011-10-07 15:28 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Curt Wohlgemuth, Jan Kara, Andrew Morton, Dave Chinner,
	Michael Rubin, linux-fsdevel, linux-mm

On Wed, Sep 28, 2011 at 11:02:42PM +0800, Christoph Hellwig wrote:
> Did we get to any conclusion on this series?  I think having these
> additional reasons in the tracepoints and the additional statistics
> would be extremely useful for us who have to deal with writeback
> issues frequently.

I think we've reached reasonable agreements on the first patch, which
has been split into two ones by Curt:

1/2 writeback: send work item to queue_io, move_expired_inodes
2/2 writeback: Add a 'reason' to wb_writeback_work

I further incorporated some suggested changes by Jan and get the below
updated patch 2/2, with changes

- replace the tracing symbol array with wb_reason_name[]
- remove the balance_dirty_pages reason (no longer exists)
- rename @stat to @reason in the function declarations
- rename "FS_free_space" to "fs_free_space"

If no objections, I can push the two patches to linux-next tomorrow.

Thanks,
Fengguang
---
Subject: writeback: Add a 'reason' to wb_writeback_work
Date: Fri Oct 07 21:54:10 CST 2011

From: Curt Wohlgemuth <curtw@google.com>

This creates a new 'reason' field in a wb_writeback_work
structure, which unambiguously identifies who initiates
writeback activity.  A 'wb_reason' enumeration has been
added to writeback.h, to enumerate the possible reasons.

The 'writeback_work_class' and tracepoint event class and
'writeback_queue_io' tracepoints are updated to include the
symbolic 'reason' in all trace events.

And the 'writeback_inodes_sbXXX' family of routines has had
a wb_stats parameter added to them, so callers can specify
why writeback is being started.

Acked-by: Jan Kara <jack@suse.cz>
Signed-off-by: Curt Wohlgemuth <curtw@google.com>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
---

Changes since v2:

   - enum wb_stats renamed enum wb_reason
   - All WB_STAT_xxx enum constants now named WB_REASON_xxx
   - All 'reason' strings in tracepoints match the WB_REASON name
   - The change to send 'work' to queue_io, move_expired_inodes, and
     trace_writeback_queue_io are in a separate commit

 fs/btrfs/extent-tree.c           |    3 +
 fs/buffer.c                      |    2 -
 fs/ext4/inode.c                  |    2 -
 fs/fs-writeback.c                |   49 +++++++++++++++++++++--------
 fs/quota/quota.c                 |    2 -
 fs/sync.c                        |    4 +-
 fs/ubifs/budget.c                |    2 -
 include/linux/backing-dev.h      |    3 +
 include/linux/writeback.h        |   32 +++++++++++++++---
 include/trace/events/writeback.h |   14 +++++---
 mm/backing-dev.c                 |    3 +
 mm/page-writeback.c              |    3 +
 mm/vmscan.c                      |    3 +
 13 files changed, 88 insertions(+), 34 deletions(-)

--- linux-next.orig/fs/btrfs/extent-tree.c	2011-10-07 22:23:35.000000000 +0800
+++ linux-next/fs/btrfs/extent-tree.c	2011-10-07 22:24:47.000000000 +0800
@@ -3340,7 +3340,8 @@ static int shrink_delalloc(struct btrfs_
 		smp_mb();
 		nr_pages = min_t(unsigned long, nr_pages,
 		       root->fs_info->delalloc_bytes >> PAGE_CACHE_SHIFT);
-		writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages);
+		writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages,
+						WB_REASON_FS_FREE_SPACE);
 
 		spin_lock(&space_info->lock);
 		if (reserved > space_info->bytes_reserved)
--- linux-next.orig/fs/buffer.c	2011-10-07 22:23:35.000000000 +0800
+++ linux-next/fs/buffer.c	2011-10-07 22:24:47.000000000 +0800
@@ -285,7 +285,7 @@ static void free_more_memory(void)
 	struct zone *zone;
 	int nid;
 
-	wakeup_flusher_threads(1024);
+	wakeup_flusher_threads(1024, WB_REASON_FREE_MORE_MEM);
 	yield();
 
 	for_each_online_node(nid) {
--- linux-next.orig/fs/ext4/inode.c	2011-10-07 22:23:35.000000000 +0800
+++ linux-next/fs/ext4/inode.c	2011-10-07 22:24:47.000000000 +0800
@@ -2241,7 +2241,7 @@ static int ext4_nonda_switch(struct supe
 	 * start pushing delalloc when 1/2 of free blocks are dirty.
 	 */
 	if (free_blocks < 2 * dirty_blocks)
-		writeback_inodes_sb_if_idle(sb);
+		writeback_inodes_sb_if_idle(sb, WB_REASON_FS_FREE_SPACE);
 
 	return 0;
 }
--- linux-next.orig/fs/fs-writeback.c	2011-10-07 22:23:35.000000000 +0800
+++ linux-next/fs/fs-writeback.c	2011-10-07 22:24:47.000000000 +0800
@@ -41,11 +41,23 @@ struct wb_writeback_work {
 	unsigned int for_kupdate:1;
 	unsigned int range_cyclic:1;
 	unsigned int for_background:1;
+	enum wb_reason reason;		/* why was writeback initiated? */
 
 	struct list_head list;		/* pending work list */
 	struct completion *done;	/* set if the caller waits */
 };
 
+const char *wb_reason_name[] = {
+	[WB_REASON_BACKGROUND]		= "background",
+	[WB_REASON_TRY_TO_FREE_PAGES]	= "try_to_free_pages",
+	[WB_REASON_SYNC]		= "sync",
+	[WB_REASON_PERIODIC]		= "periodic",
+	[WB_REASON_LAPTOP_TIMER]	= "laptop_timer",
+	[WB_REASON_FREE_MORE_MEM]	= "free_more_memory",
+	[WB_REASON_FS_FREE_SPACE]	= "fs_free_space",
+	[WB_REASON_FORKER_THREAD]	= "forker_thread"
+};
+
 /*
  * Include the creation of the trace points after defining the
  * wb_writeback_work structure so that the definition remains local to this
@@ -115,7 +127,7 @@ static void bdi_queue_work(struct backin
 
 static void
 __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
-		      bool range_cyclic)
+		      bool range_cyclic, enum wb_reason reason)
 {
 	struct wb_writeback_work *work;
 
@@ -135,6 +147,7 @@ __bdi_start_writeback(struct backing_dev
 	work->sync_mode	= WB_SYNC_NONE;
 	work->nr_pages	= nr_pages;
 	work->range_cyclic = range_cyclic;
+	work->reason	= reason;
 
 	bdi_queue_work(bdi, work);
 }
@@ -150,9 +163,10 @@ __bdi_start_writeback(struct backing_dev
  *   completion. Caller need not hold sb s_umount semaphore.
  *
  */
-void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages)
+void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
+			enum wb_reason reason)
 {
-	__bdi_start_writeback(bdi, nr_pages, true);
+	__bdi_start_writeback(bdi, nr_pages, true, reason);
 }
 
 /**
@@ -641,12 +655,14 @@ static long __writeback_inodes_wb(struct
 	return wrote;
 }
 
-long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages)
+long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages,
+				enum wb_reason reason)
 {
 	struct wb_writeback_work work = {
 		.nr_pages	= nr_pages,
 		.sync_mode	= WB_SYNC_NONE,
 		.range_cyclic	= 1,
+		.reason		= reason,
 	};
 
 	spin_lock(&wb->list_lock);
@@ -825,6 +841,7 @@ static long wb_check_background_flush(st
 			.sync_mode	= WB_SYNC_NONE,
 			.for_background	= 1,
 			.range_cyclic	= 1,
+			.reason		= WB_REASON_BACKGROUND,
 		};
 
 		return wb_writeback(wb, &work);
@@ -858,6 +875,7 @@ static long wb_check_old_data_flush(stru
 			.sync_mode	= WB_SYNC_NONE,
 			.for_kupdate	= 1,
 			.range_cyclic	= 1,
+			.reason		= WB_REASON_PERIODIC,
 		};
 
 		return wb_writeback(wb, &work);
@@ -976,7 +994,7 @@ int bdi_writeback_thread(void *data)
  * Start writeback of `nr_pages' pages.  If `nr_pages' is zero, write back
  * the whole world.
  */
-void wakeup_flusher_threads(long nr_pages)
+void wakeup_flusher_threads(long nr_pages, enum wb_reason reason)
 {
 	struct backing_dev_info *bdi;
 
@@ -989,7 +1007,7 @@ void wakeup_flusher_threads(long nr_page
 	list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
 		if (!bdi_has_dirty_io(bdi))
 			continue;
-		__bdi_start_writeback(bdi, nr_pages, false);
+		__bdi_start_writeback(bdi, nr_pages, false, reason);
 	}
 	rcu_read_unlock();
 }
@@ -1210,7 +1228,9 @@ static void wait_sb_inodes(struct super_
  * on how many (if any) will be written, and this function does not wait
  * for IO completion of submitted IO.
  */
-void writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr)
+void writeback_inodes_sb_nr(struct super_block *sb,
+			    unsigned long nr,
+			    enum wb_reason reason)
 {
 	DECLARE_COMPLETION_ONSTACK(done);
 	struct wb_writeback_work work = {
@@ -1219,6 +1239,7 @@ void writeback_inodes_sb_nr(struct super
 		.tagged_writepages	= 1,
 		.done			= &done,
 		.nr_pages		= nr,
+		.reason			= reason,
 	};
 
 	WARN_ON(!rwsem_is_locked(&sb->s_umount));
@@ -1235,9 +1256,9 @@ EXPORT_SYMBOL(writeback_inodes_sb_nr);
  * on how many (if any) will be written, and this function does not wait
  * for IO completion of submitted IO.
  */
-void writeback_inodes_sb(struct super_block *sb)
+void writeback_inodes_sb(struct super_block *sb, enum wb_reason reason)
 {
-	return writeback_inodes_sb_nr(sb, get_nr_dirty_pages());
+	return writeback_inodes_sb_nr(sb, get_nr_dirty_pages(), reason);
 }
 EXPORT_SYMBOL(writeback_inodes_sb);
 
@@ -1248,11 +1269,11 @@ EXPORT_SYMBOL(writeback_inodes_sb);
  * Invoke writeback_inodes_sb if no writeback is currently underway.
  * Returns 1 if writeback was started, 0 if not.
  */
-int writeback_inodes_sb_if_idle(struct super_block *sb)
+int writeback_inodes_sb_if_idle(struct super_block *sb, enum wb_reason reason)
 {
 	if (!writeback_in_progress(sb->s_bdi)) {
 		down_read(&sb->s_umount);
-		writeback_inodes_sb(sb);
+		writeback_inodes_sb(sb, reason);
 		up_read(&sb->s_umount);
 		return 1;
 	} else
@@ -1269,11 +1290,12 @@ EXPORT_SYMBOL(writeback_inodes_sb_if_idl
  * Returns 1 if writeback was started, 0 if not.
  */
 int writeback_inodes_sb_nr_if_idle(struct super_block *sb,
-				   unsigned long nr)
+				   unsigned long nr,
+				   enum wb_reason reason)
 {
 	if (!writeback_in_progress(sb->s_bdi)) {
 		down_read(&sb->s_umount);
-		writeback_inodes_sb_nr(sb, nr);
+		writeback_inodes_sb_nr(sb, nr, reason);
 		up_read(&sb->s_umount);
 		return 1;
 	} else
@@ -1297,6 +1319,7 @@ void sync_inodes_sb(struct super_block *
 		.nr_pages	= LONG_MAX,
 		.range_cyclic	= 0,
 		.done		= &done,
+		.reason		= WB_REASON_SYNC,
 	};
 
 	WARN_ON(!rwsem_is_locked(&sb->s_umount));
--- linux-next.orig/fs/quota/quota.c	2011-10-07 22:23:35.000000000 +0800
+++ linux-next/fs/quota/quota.c	2011-10-07 22:24:47.000000000 +0800
@@ -286,7 +286,7 @@ static int do_quotactl(struct super_bloc
 		/* caller already holds s_umount */
 		if (sb->s_flags & MS_RDONLY)
 			return -EROFS;
-		writeback_inodes_sb(sb);
+		writeback_inodes_sb(sb, WB_REASON_SYNC);
 		return 0;
 	default:
 		return -EINVAL;
--- linux-next.orig/fs/sync.c	2011-10-07 22:23:35.000000000 +0800
+++ linux-next/fs/sync.c	2011-10-07 22:24:47.000000000 +0800
@@ -43,7 +43,7 @@ static int __sync_filesystem(struct supe
 	if (wait)
 		sync_inodes_sb(sb);
 	else
-		writeback_inodes_sb(sb);
+		writeback_inodes_sb(sb, WB_REASON_SYNC);
 
 	if (sb->s_op->sync_fs)
 		sb->s_op->sync_fs(sb, wait);
@@ -98,7 +98,7 @@ static void sync_filesystems(int wait)
  */
 SYSCALL_DEFINE0(sync)
 {
-	wakeup_flusher_threads(0);
+	wakeup_flusher_threads(0, WB_REASON_SYNC);
 	sync_filesystems(0);
 	sync_filesystems(1);
 	if (unlikely(laptop_mode))
--- linux-next.orig/fs/ubifs/budget.c	2011-10-07 22:23:35.000000000 +0800
+++ linux-next/fs/ubifs/budget.c	2011-10-07 22:24:47.000000000 +0800
@@ -63,7 +63,7 @@
 static void shrink_liability(struct ubifs_info *c, int nr_to_write)
 {
 	down_read(&c->vfs_sb->s_umount);
-	writeback_inodes_sb(c->vfs_sb);
+	writeback_inodes_sb(c->vfs_sb, WB_REASON_FS_FREE_SPACE);
 	up_read(&c->vfs_sb->s_umount);
 }
 
--- linux-next.orig/include/linux/backing-dev.h	2011-10-07 22:23:35.000000000 +0800
+++ linux-next/include/linux/backing-dev.h	2011-10-07 22:24:47.000000000 +0800
@@ -118,7 +118,8 @@ int bdi_register(struct backing_dev_info
 int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev);
 void bdi_unregister(struct backing_dev_info *bdi);
 int bdi_setup_and_register(struct backing_dev_info *, char *, unsigned int);
-void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages);
+void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
+			enum wb_reason reason);
 void bdi_start_background_writeback(struct backing_dev_info *bdi);
 int bdi_writeback_thread(void *data);
 int bdi_has_dirty_io(struct backing_dev_info *bdi);
--- linux-next.orig/include/linux/writeback.h	2011-10-07 22:23:35.000000000 +0800
+++ linux-next/include/linux/writeback.h	2011-10-07 22:24:47.000000000 +0800
@@ -39,6 +39,23 @@ enum writeback_sync_modes {
 };
 
 /*
+ * why some writeback work was initiated
+ */
+enum wb_reason {
+	WB_REASON_BACKGROUND,
+	WB_REASON_TRY_TO_FREE_PAGES,
+	WB_REASON_SYNC,
+	WB_REASON_PERIODIC,
+	WB_REASON_LAPTOP_TIMER,
+	WB_REASON_FREE_MORE_MEM,
+	WB_REASON_FS_FREE_SPACE,
+	WB_REASON_FORKER_THREAD,
+
+	WB_REASON_MAX,
+};
+extern const char *wb_reason_name[];
+
+/*
  * A control structure which tells the writeback code what to do.  These are
  * always on the stack, and hence need no locking.  They are always initialised
  * in a manner such that unspecified fields are set to zero.
@@ -69,14 +86,17 @@ struct writeback_control {
  */	
 struct bdi_writeback;
 int inode_wait(void *);
-void writeback_inodes_sb(struct super_block *);
-void writeback_inodes_sb_nr(struct super_block *, unsigned long nr);
-int writeback_inodes_sb_if_idle(struct super_block *);
-int writeback_inodes_sb_nr_if_idle(struct super_block *, unsigned long nr);
+void writeback_inodes_sb(struct super_block *, enum wb_reason reason);
+void writeback_inodes_sb_nr(struct super_block *, unsigned long nr,
+							enum wb_reason reason);
+int writeback_inodes_sb_if_idle(struct super_block *, enum wb_reason reason);
+int writeback_inodes_sb_nr_if_idle(struct super_block *, unsigned long nr,
+							enum wb_reason reason);
 void sync_inodes_sb(struct super_block *);
-long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages);
+long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages,
+				enum wb_reason reason);
 long wb_do_writeback(struct bdi_writeback *wb, int force_wait);
-void wakeup_flusher_threads(long nr_pages);
+void wakeup_flusher_threads(long nr_pages, enum wb_reason reason);
 
 /* writeback.h requires fs.h; it, too, is not included from here. */
 static inline void wait_on_inode(struct inode *inode)
--- linux-next.orig/include/trace/events/writeback.h	2011-10-07 22:23:35.000000000 +0800
+++ linux-next/include/trace/events/writeback.h	2011-10-07 22:24:47.000000000 +0800
@@ -34,6 +34,7 @@ DECLARE_EVENT_CLASS(writeback_work_class
 		__field(int, for_kupdate)
 		__field(int, range_cyclic)
 		__field(int, for_background)
+		__field(int, reason)
 	),
 	TP_fast_assign(
 		strncpy(__entry->name, dev_name(bdi->dev), 32);
@@ -43,16 +44,18 @@ DECLARE_EVENT_CLASS(writeback_work_class
 		__entry->for_kupdate = work->for_kupdate;
 		__entry->range_cyclic = work->range_cyclic;
 		__entry->for_background	= work->for_background;
+		__entry->reason = work->reason;
 	),
 	TP_printk("bdi %s: sb_dev %d:%d nr_pages=%ld sync_mode=%d "
-		  "kupdate=%d range_cyclic=%d background=%d",
+		  "kupdate=%d range_cyclic=%d background=%d reason=%s",
 		  __entry->name,
 		  MAJOR(__entry->sb_dev), MINOR(__entry->sb_dev),
 		  __entry->nr_pages,
 		  __entry->sync_mode,
 		  __entry->for_kupdate,
 		  __entry->range_cyclic,
-		  __entry->for_background
+		  __entry->for_background,
+		  wb_reason_name[__entry->reason]
 	)
 );
 #define DEFINE_WRITEBACK_WORK_EVENT(name) \
@@ -165,6 +168,7 @@ TRACE_EVENT(writeback_queue_io,
 		__field(unsigned long,	older)
 		__field(long,		age)
 		__field(int,		moved)
+		__field(int,		reason)
 	),
 	TP_fast_assign(
 		unsigned long *older_than_this = work->older_than_this;
@@ -173,12 +177,14 @@ TRACE_EVENT(writeback_queue_io,
 		__entry->age	= older_than_this ?
 				  (jiffies - *older_than_this) * 1000 / HZ : -1;
 		__entry->moved	= moved;
+		__entry->reason	= work->reason;
 	),
-	TP_printk("bdi %s: older=%lu age=%ld enqueue=%d",
+	TP_printk("bdi %s: older=%lu age=%ld enqueue=%d reason=%s",
 		__entry->name,
 		__entry->older,	/* older_than_this in jiffies */
 		__entry->age,	/* older_than_this in relative milliseconds */
-		__entry->moved)
+		__entry->moved,
+		wb_reason_name[__entry->reason])
 );
 
 TRACE_EVENT(global_dirty_state,
--- linux-next.orig/mm/backing-dev.c	2011-10-07 22:23:35.000000000 +0800
+++ linux-next/mm/backing-dev.c	2011-10-07 22:24:47.000000000 +0800
@@ -476,7 +476,8 @@ static int bdi_forker_thread(void *ptr)
 				 * the bdi from the thread. Hopefully 1024 is
 				 * large enough for efficient IO.
 				 */
-				writeback_inodes_wb(&bdi->wb, 1024);
+				writeback_inodes_wb(&bdi->wb, 1024,
+						    WB_REASON_FORKER_THREAD);
 			} else {
 				/*
 				 * The spinlock makes sure we do not lose
--- linux-next.orig/mm/page-writeback.c	2011-10-07 22:23:35.000000000 +0800
+++ linux-next/mm/page-writeback.c	2011-10-07 22:24:47.000000000 +0800
@@ -1304,7 +1304,8 @@ void laptop_mode_timer_fn(unsigned long 
 	 * threshold
 	 */
 	if (bdi_has_dirty_io(&q->backing_dev_info))
-		bdi_start_writeback(&q->backing_dev_info, nr_pages);
+		bdi_start_writeback(&q->backing_dev_info, nr_pages,
+					WB_REASON_LAPTOP_TIMER);
 }
 
 /*
--- linux-next.orig/mm/vmscan.c	2011-10-07 22:23:35.000000000 +0800
+++ linux-next/mm/vmscan.c	2011-10-07 22:24:47.000000000 +0800
@@ -2181,7 +2181,8 @@ static unsigned long do_try_to_free_page
 		 */
 		writeback_threshold = sc->nr_to_reclaim + sc->nr_to_reclaim / 2;
 		if (total_scanned > writeback_threshold) {
-			wakeup_flusher_threads(laptop_mode ? 0 : total_scanned);
+			wakeup_flusher_threads(laptop_mode ? 0 : total_scanned,
+						WB_REASON_TRY_TO_FREE_PAGES);
 			sc->may_writepage = 1;
 		}
 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 1/2 v2] writeback: Add a 'reason' to wb_writeback_work
  2011-10-07 15:28   ` Wu Fengguang
@ 2011-10-07 18:07       ` Curt Wohlgemuth
  0 siblings, 0 replies; 27+ messages in thread
From: Curt Wohlgemuth @ 2011-10-07 18:07 UTC (permalink / raw)
  To: Wu Fengguang
  Cc: Christoph Hellwig, Jan Kara, Andrew Morton, Dave Chinner,
	Michael Rubin, linux-fsdevel, linux-mm

Hi Fengguang:

On Fri, Oct 7, 2011 at 8:28 AM, Wu Fengguang <fengguang.wu@intel.com> wrote:
>
> On Wed, Sep 28, 2011 at 11:02:42PM +0800, Christoph Hellwig wrote:
> > Did we get to any conclusion on this series?  I think having these
> > additional reasons in the tracepoints and the additional statistics
> > would be extremely useful for us who have to deal with writeback
> > issues frequently.
>
> I think we've reached reasonable agreements on the first patch, which
> has been split into two ones by Curt:
>
> 1/2 writeback: send work item to queue_io, move_expired_inodes
> 2/2 writeback: Add a 'reason' to wb_writeback_work
>
> I further incorporated some suggested changes by Jan and get the below
> updated patch 2/2, with changes
>
> - replace the tracing symbol array with wb_reason_name[]
> - remove the balance_dirty_pages reason (no longer exists)
> - rename @stat to @reason in the function declarations
> - rename "FS_free_space" to "fs_free_space"
>
> If no objections, I can push the two patches to linux-next tomorrow.

No objections on my part.  Thanks for wrapping this up!
Curt

>
> Thanks,
> Fengguang
> ---
> Subject: writeback: Add a 'reason' to wb_writeback_work
> Date: Fri Oct 07 21:54:10 CST 2011
>
> From: Curt Wohlgemuth <curtw@google.com>
>
> This creates a new 'reason' field in a wb_writeback_work
> structure, which unambiguously identifies who initiates
> writeback activity.  A 'wb_reason' enumeration has been
> added to writeback.h, to enumerate the possible reasons.
>
> The 'writeback_work_class' and tracepoint event class and
> 'writeback_queue_io' tracepoints are updated to include the
> symbolic 'reason' in all trace events.
>
> And the 'writeback_inodes_sbXXX' family of routines has had
> a wb_stats parameter added to them, so callers can specify
> why writeback is being started.
>
> Acked-by: Jan Kara <jack@suse.cz>
> Signed-off-by: Curt Wohlgemuth <curtw@google.com>
> Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
> ---
>
> Changes since v2:
>
>   - enum wb_stats renamed enum wb_reason
>   - All WB_STAT_xxx enum constants now named WB_REASON_xxx
>   - All 'reason' strings in tracepoints match the WB_REASON name
>   - The change to send 'work' to queue_io, move_expired_inodes, and
>     trace_writeback_queue_io are in a separate commit
>
>  fs/btrfs/extent-tree.c           |    3 +
>  fs/buffer.c                      |    2 -
>  fs/ext4/inode.c                  |    2 -
>  fs/fs-writeback.c                |   49 +++++++++++++++++++++--------
>  fs/quota/quota.c                 |    2 -
>  fs/sync.c                        |    4 +-
>  fs/ubifs/budget.c                |    2 -
>  include/linux/backing-dev.h      |    3 +
>  include/linux/writeback.h        |   32 +++++++++++++++---
>  include/trace/events/writeback.h |   14 +++++---
>  mm/backing-dev.c                 |    3 +
>  mm/page-writeback.c              |    3 +
>  mm/vmscan.c                      |    3 +
>  13 files changed, 88 insertions(+), 34 deletions(-)
>
> --- linux-next.orig/fs/btrfs/extent-tree.c      2011-10-07 22:23:35.000000000 +0800
> +++ linux-next/fs/btrfs/extent-tree.c   2011-10-07 22:24:47.000000000 +0800
> @@ -3340,7 +3340,8 @@ static int shrink_delalloc(struct btrfs_
>                smp_mb();
>                nr_pages = min_t(unsigned long, nr_pages,
>                       root->fs_info->delalloc_bytes >> PAGE_CACHE_SHIFT);
> -               writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages);
> +               writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages,
> +                                               WB_REASON_FS_FREE_SPACE);
>
>                spin_lock(&space_info->lock);
>                if (reserved > space_info->bytes_reserved)
> --- linux-next.orig/fs/buffer.c 2011-10-07 22:23:35.000000000 +0800
> +++ linux-next/fs/buffer.c      2011-10-07 22:24:47.000000000 +0800
> @@ -285,7 +285,7 @@ static void free_more_memory(void)
>        struct zone *zone;
>        int nid;
>
> -       wakeup_flusher_threads(1024);
> +       wakeup_flusher_threads(1024, WB_REASON_FREE_MORE_MEM);
>        yield();
>
>        for_each_online_node(nid) {
> --- linux-next.orig/fs/ext4/inode.c     2011-10-07 22:23:35.000000000 +0800
> +++ linux-next/fs/ext4/inode.c  2011-10-07 22:24:47.000000000 +0800
> @@ -2241,7 +2241,7 @@ static int ext4_nonda_switch(struct supe
>         * start pushing delalloc when 1/2 of free blocks are dirty.
>         */
>        if (free_blocks < 2 * dirty_blocks)
> -               writeback_inodes_sb_if_idle(sb);
> +               writeback_inodes_sb_if_idle(sb, WB_REASON_FS_FREE_SPACE);
>
>        return 0;
>  }
> --- linux-next.orig/fs/fs-writeback.c   2011-10-07 22:23:35.000000000 +0800
> +++ linux-next/fs/fs-writeback.c        2011-10-07 22:24:47.000000000 +0800
> @@ -41,11 +41,23 @@ struct wb_writeback_work {
>        unsigned int for_kupdate:1;
>        unsigned int range_cyclic:1;
>        unsigned int for_background:1;
> +       enum wb_reason reason;          /* why was writeback initiated? */
>
>        struct list_head list;          /* pending work list */
>        struct completion *done;        /* set if the caller waits */
>  };
>
> +const char *wb_reason_name[] = {
> +       [WB_REASON_BACKGROUND]          = "background",
> +       [WB_REASON_TRY_TO_FREE_PAGES]   = "try_to_free_pages",
> +       [WB_REASON_SYNC]                = "sync",
> +       [WB_REASON_PERIODIC]            = "periodic",
> +       [WB_REASON_LAPTOP_TIMER]        = "laptop_timer",
> +       [WB_REASON_FREE_MORE_MEM]       = "free_more_memory",
> +       [WB_REASON_FS_FREE_SPACE]       = "fs_free_space",
> +       [WB_REASON_FORKER_THREAD]       = "forker_thread"
> +};
> +
>  /*
>  * Include the creation of the trace points after defining the
>  * wb_writeback_work structure so that the definition remains local to this
> @@ -115,7 +127,7 @@ static void bdi_queue_work(struct backin
>
>  static void
>  __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
> -                     bool range_cyclic)
> +                     bool range_cyclic, enum wb_reason reason)
>  {
>        struct wb_writeback_work *work;
>
> @@ -135,6 +147,7 @@ __bdi_start_writeback(struct backing_dev
>        work->sync_mode = WB_SYNC_NONE;
>        work->nr_pages  = nr_pages;
>        work->range_cyclic = range_cyclic;
> +       work->reason    = reason;
>
>        bdi_queue_work(bdi, work);
>  }
> @@ -150,9 +163,10 @@ __bdi_start_writeback(struct backing_dev
>  *   completion. Caller need not hold sb s_umount semaphore.
>  *
>  */
> -void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages)
> +void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
> +                       enum wb_reason reason)
>  {
> -       __bdi_start_writeback(bdi, nr_pages, true);
> +       __bdi_start_writeback(bdi, nr_pages, true, reason);
>  }
>
>  /**
> @@ -641,12 +655,14 @@ static long __writeback_inodes_wb(struct
>        return wrote;
>  }
>
> -long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages)
> +long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages,
> +                               enum wb_reason reason)
>  {
>        struct wb_writeback_work work = {
>                .nr_pages       = nr_pages,
>                .sync_mode      = WB_SYNC_NONE,
>                .range_cyclic   = 1,
> +               .reason         = reason,
>        };
>
>        spin_lock(&wb->list_lock);
> @@ -825,6 +841,7 @@ static long wb_check_background_flush(st
>                        .sync_mode      = WB_SYNC_NONE,
>                        .for_background = 1,
>                        .range_cyclic   = 1,
> +                       .reason         = WB_REASON_BACKGROUND,
>                };
>
>                return wb_writeback(wb, &work);
> @@ -858,6 +875,7 @@ static long wb_check_old_data_flush(stru
>                        .sync_mode      = WB_SYNC_NONE,
>                        .for_kupdate    = 1,
>                        .range_cyclic   = 1,
> +                       .reason         = WB_REASON_PERIODIC,
>                };
>
>                return wb_writeback(wb, &work);
> @@ -976,7 +994,7 @@ int bdi_writeback_thread(void *data)
>  * Start writeback of `nr_pages' pages.  If `nr_pages' is zero, write back
>  * the whole world.
>  */
> -void wakeup_flusher_threads(long nr_pages)
> +void wakeup_flusher_threads(long nr_pages, enum wb_reason reason)
>  {
>        struct backing_dev_info *bdi;
>
> @@ -989,7 +1007,7 @@ void wakeup_flusher_threads(long nr_page
>        list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
>                if (!bdi_has_dirty_io(bdi))
>                        continue;
> -               __bdi_start_writeback(bdi, nr_pages, false);
> +               __bdi_start_writeback(bdi, nr_pages, false, reason);
>        }
>        rcu_read_unlock();
>  }
> @@ -1210,7 +1228,9 @@ static void wait_sb_inodes(struct super_
>  * on how many (if any) will be written, and this function does not wait
>  * for IO completion of submitted IO.
>  */
> -void writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr)
> +void writeback_inodes_sb_nr(struct super_block *sb,
> +                           unsigned long nr,
> +                           enum wb_reason reason)
>  {
>        DECLARE_COMPLETION_ONSTACK(done);
>        struct wb_writeback_work work = {
> @@ -1219,6 +1239,7 @@ void writeback_inodes_sb_nr(struct super
>                .tagged_writepages      = 1,
>                .done                   = &done,
>                .nr_pages               = nr,
> +               .reason                 = reason,
>        };
>
>        WARN_ON(!rwsem_is_locked(&sb->s_umount));
> @@ -1235,9 +1256,9 @@ EXPORT_SYMBOL(writeback_inodes_sb_nr);
>  * on how many (if any) will be written, and this function does not wait
>  * for IO completion of submitted IO.
>  */
> -void writeback_inodes_sb(struct super_block *sb)
> +void writeback_inodes_sb(struct super_block *sb, enum wb_reason reason)
>  {
> -       return writeback_inodes_sb_nr(sb, get_nr_dirty_pages());
> +       return writeback_inodes_sb_nr(sb, get_nr_dirty_pages(), reason);
>  }
>  EXPORT_SYMBOL(writeback_inodes_sb);
>
> @@ -1248,11 +1269,11 @@ EXPORT_SYMBOL(writeback_inodes_sb);
>  * Invoke writeback_inodes_sb if no writeback is currently underway.
>  * Returns 1 if writeback was started, 0 if not.
>  */
> -int writeback_inodes_sb_if_idle(struct super_block *sb)
> +int writeback_inodes_sb_if_idle(struct super_block *sb, enum wb_reason reason)
>  {
>        if (!writeback_in_progress(sb->s_bdi)) {
>                down_read(&sb->s_umount);
> -               writeback_inodes_sb(sb);
> +               writeback_inodes_sb(sb, reason);
>                up_read(&sb->s_umount);
>                return 1;
>        } else
> @@ -1269,11 +1290,12 @@ EXPORT_SYMBOL(writeback_inodes_sb_if_idl
>  * Returns 1 if writeback was started, 0 if not.
>  */
>  int writeback_inodes_sb_nr_if_idle(struct super_block *sb,
> -                                  unsigned long nr)
> +                                  unsigned long nr,
> +                                  enum wb_reason reason)
>  {
>        if (!writeback_in_progress(sb->s_bdi)) {
>                down_read(&sb->s_umount);
> -               writeback_inodes_sb_nr(sb, nr);
> +               writeback_inodes_sb_nr(sb, nr, reason);
>                up_read(&sb->s_umount);
>                return 1;
>        } else
> @@ -1297,6 +1319,7 @@ void sync_inodes_sb(struct super_block *
>                .nr_pages       = LONG_MAX,
>                .range_cyclic   = 0,
>                .done           = &done,
> +               .reason         = WB_REASON_SYNC,
>        };
>
>        WARN_ON(!rwsem_is_locked(&sb->s_umount));
> --- linux-next.orig/fs/quota/quota.c    2011-10-07 22:23:35.000000000 +0800
> +++ linux-next/fs/quota/quota.c 2011-10-07 22:24:47.000000000 +0800
> @@ -286,7 +286,7 @@ static int do_quotactl(struct super_bloc
>                /* caller already holds s_umount */
>                if (sb->s_flags & MS_RDONLY)
>                        return -EROFS;
> -               writeback_inodes_sb(sb);
> +               writeback_inodes_sb(sb, WB_REASON_SYNC);
>                return 0;
>        default:
>                return -EINVAL;
> --- linux-next.orig/fs/sync.c   2011-10-07 22:23:35.000000000 +0800
> +++ linux-next/fs/sync.c        2011-10-07 22:24:47.000000000 +0800
> @@ -43,7 +43,7 @@ static int __sync_filesystem(struct supe
>        if (wait)
>                sync_inodes_sb(sb);
>        else
> -               writeback_inodes_sb(sb);
> +               writeback_inodes_sb(sb, WB_REASON_SYNC);
>
>        if (sb->s_op->sync_fs)
>                sb->s_op->sync_fs(sb, wait);
> @@ -98,7 +98,7 @@ static void sync_filesystems(int wait)
>  */
>  SYSCALL_DEFINE0(sync)
>  {
> -       wakeup_flusher_threads(0);
> +       wakeup_flusher_threads(0, WB_REASON_SYNC);
>        sync_filesystems(0);
>        sync_filesystems(1);
>        if (unlikely(laptop_mode))
> --- linux-next.orig/fs/ubifs/budget.c   2011-10-07 22:23:35.000000000 +0800
> +++ linux-next/fs/ubifs/budget.c        2011-10-07 22:24:47.000000000 +0800
> @@ -63,7 +63,7 @@
>  static void shrink_liability(struct ubifs_info *c, int nr_to_write)
>  {
>        down_read(&c->vfs_sb->s_umount);
> -       writeback_inodes_sb(c->vfs_sb);
> +       writeback_inodes_sb(c->vfs_sb, WB_REASON_FS_FREE_SPACE);
>        up_read(&c->vfs_sb->s_umount);
>  }
>
> --- linux-next.orig/include/linux/backing-dev.h 2011-10-07 22:23:35.000000000 +0800
> +++ linux-next/include/linux/backing-dev.h      2011-10-07 22:24:47.000000000 +0800
> @@ -118,7 +118,8 @@ int bdi_register(struct backing_dev_info
>  int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev);
>  void bdi_unregister(struct backing_dev_info *bdi);
>  int bdi_setup_and_register(struct backing_dev_info *, char *, unsigned int);
> -void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages);
> +void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
> +                       enum wb_reason reason);
>  void bdi_start_background_writeback(struct backing_dev_info *bdi);
>  int bdi_writeback_thread(void *data);
>  int bdi_has_dirty_io(struct backing_dev_info *bdi);
> --- linux-next.orig/include/linux/writeback.h   2011-10-07 22:23:35.000000000 +0800
> +++ linux-next/include/linux/writeback.h        2011-10-07 22:24:47.000000000 +0800
> @@ -39,6 +39,23 @@ enum writeback_sync_modes {
>  };
>
>  /*
> + * why some writeback work was initiated
> + */
> +enum wb_reason {
> +       WB_REASON_BACKGROUND,
> +       WB_REASON_TRY_TO_FREE_PAGES,
> +       WB_REASON_SYNC,
> +       WB_REASON_PERIODIC,
> +       WB_REASON_LAPTOP_TIMER,
> +       WB_REASON_FREE_MORE_MEM,
> +       WB_REASON_FS_FREE_SPACE,
> +       WB_REASON_FORKER_THREAD,
> +
> +       WB_REASON_MAX,
> +};
> +extern const char *wb_reason_name[];
> +
> +/*
>  * A control structure which tells the writeback code what to do.  These are
>  * always on the stack, and hence need no locking.  They are always initialised
>  * in a manner such that unspecified fields are set to zero.
> @@ -69,14 +86,17 @@ struct writeback_control {
>  */
>  struct bdi_writeback;
>  int inode_wait(void *);
> -void writeback_inodes_sb(struct super_block *);
> -void writeback_inodes_sb_nr(struct super_block *, unsigned long nr);
> -int writeback_inodes_sb_if_idle(struct super_block *);
> -int writeback_inodes_sb_nr_if_idle(struct super_block *, unsigned long nr);
> +void writeback_inodes_sb(struct super_block *, enum wb_reason reason);
> +void writeback_inodes_sb_nr(struct super_block *, unsigned long nr,
> +                                                       enum wb_reason reason);
> +int writeback_inodes_sb_if_idle(struct super_block *, enum wb_reason reason);
> +int writeback_inodes_sb_nr_if_idle(struct super_block *, unsigned long nr,
> +                                                       enum wb_reason reason);
>  void sync_inodes_sb(struct super_block *);
> -long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages);
> +long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages,
> +                               enum wb_reason reason);
>  long wb_do_writeback(struct bdi_writeback *wb, int force_wait);
> -void wakeup_flusher_threads(long nr_pages);
> +void wakeup_flusher_threads(long nr_pages, enum wb_reason reason);
>
>  /* writeback.h requires fs.h; it, too, is not included from here. */
>  static inline void wait_on_inode(struct inode *inode)
> --- linux-next.orig/include/trace/events/writeback.h    2011-10-07 22:23:35.000000000 +0800
> +++ linux-next/include/trace/events/writeback.h 2011-10-07 22:24:47.000000000 +0800
> @@ -34,6 +34,7 @@ DECLARE_EVENT_CLASS(writeback_work_class
>                __field(int, for_kupdate)
>                __field(int, range_cyclic)
>                __field(int, for_background)
> +               __field(int, reason)
>        ),
>        TP_fast_assign(
>                strncpy(__entry->name, dev_name(bdi->dev), 32);
> @@ -43,16 +44,18 @@ DECLARE_EVENT_CLASS(writeback_work_class
>                __entry->for_kupdate = work->for_kupdate;
>                __entry->range_cyclic = work->range_cyclic;
>                __entry->for_background = work->for_background;
> +               __entry->reason = work->reason;
>        ),
>        TP_printk("bdi %s: sb_dev %d:%d nr_pages=%ld sync_mode=%d "
> -                 "kupdate=%d range_cyclic=%d background=%d",
> +                 "kupdate=%d range_cyclic=%d background=%d reason=%s",
>                  __entry->name,
>                  MAJOR(__entry->sb_dev), MINOR(__entry->sb_dev),
>                  __entry->nr_pages,
>                  __entry->sync_mode,
>                  __entry->for_kupdate,
>                  __entry->range_cyclic,
> -                 __entry->for_background
> +                 __entry->for_background,
> +                 wb_reason_name[__entry->reason]
>        )
>  );
>  #define DEFINE_WRITEBACK_WORK_EVENT(name) \
> @@ -165,6 +168,7 @@ TRACE_EVENT(writeback_queue_io,
>                __field(unsigned long,  older)
>                __field(long,           age)
>                __field(int,            moved)
> +               __field(int,            reason)
>        ),
>        TP_fast_assign(
>                unsigned long *older_than_this = work->older_than_this;
> @@ -173,12 +177,14 @@ TRACE_EVENT(writeback_queue_io,
>                __entry->age    = older_than_this ?
>                                  (jiffies - *older_than_this) * 1000 / HZ : -1;
>                __entry->moved  = moved;
> +               __entry->reason = work->reason;
>        ),
> -       TP_printk("bdi %s: older=%lu age=%ld enqueue=%d",
> +       TP_printk("bdi %s: older=%lu age=%ld enqueue=%d reason=%s",
>                __entry->name,
>                __entry->older, /* older_than_this in jiffies */
>                __entry->age,   /* older_than_this in relative milliseconds */
> -               __entry->moved)
> +               __entry->moved,
> +               wb_reason_name[__entry->reason])
>  );
>
>  TRACE_EVENT(global_dirty_state,
> --- linux-next.orig/mm/backing-dev.c    2011-10-07 22:23:35.000000000 +0800
> +++ linux-next/mm/backing-dev.c 2011-10-07 22:24:47.000000000 +0800
> @@ -476,7 +476,8 @@ static int bdi_forker_thread(void *ptr)
>                                 * the bdi from the thread. Hopefully 1024 is
>                                 * large enough for efficient IO.
>                                 */
> -                               writeback_inodes_wb(&bdi->wb, 1024);
> +                               writeback_inodes_wb(&bdi->wb, 1024,
> +                                                   WB_REASON_FORKER_THREAD);
>                        } else {
>                                /*
>                                 * The spinlock makes sure we do not lose
> --- linux-next.orig/mm/page-writeback.c 2011-10-07 22:23:35.000000000 +0800
> +++ linux-next/mm/page-writeback.c      2011-10-07 22:24:47.000000000 +0800
> @@ -1304,7 +1304,8 @@ void laptop_mode_timer_fn(unsigned long
>         * threshold
>         */
>        if (bdi_has_dirty_io(&q->backing_dev_info))
> -               bdi_start_writeback(&q->backing_dev_info, nr_pages);
> +               bdi_start_writeback(&q->backing_dev_info, nr_pages,
> +                                       WB_REASON_LAPTOP_TIMER);
>  }
>
>  /*
> --- linux-next.orig/mm/vmscan.c 2011-10-07 22:23:35.000000000 +0800
> +++ linux-next/mm/vmscan.c      2011-10-07 22:24:47.000000000 +0800
> @@ -2181,7 +2181,8 @@ static unsigned long do_try_to_free_page
>                 */
>                writeback_threshold = sc->nr_to_reclaim + sc->nr_to_reclaim / 2;
>                if (total_scanned > writeback_threshold) {
> -                       wakeup_flusher_threads(laptop_mode ? 0 : total_scanned);
> +                       wakeup_flusher_threads(laptop_mode ? 0 : total_scanned,
> +                                               WB_REASON_TRY_TO_FREE_PAGES);
>                        sc->may_writepage = 1;
>                }
>
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 1/2 v2] writeback: Add a 'reason' to wb_writeback_work
@ 2011-10-07 18:07       ` Curt Wohlgemuth
  0 siblings, 0 replies; 27+ messages in thread
From: Curt Wohlgemuth @ 2011-10-07 18:07 UTC (permalink / raw)
  To: Wu Fengguang
  Cc: Christoph Hellwig, Jan Kara, Andrew Morton, Dave Chinner,
	Michael Rubin, linux-fsdevel, linux-mm

Hi Fengguang:

On Fri, Oct 7, 2011 at 8:28 AM, Wu Fengguang <fengguang.wu@intel.com> wrote:
>
> On Wed, Sep 28, 2011 at 11:02:42PM +0800, Christoph Hellwig wrote:
> > Did we get to any conclusion on this series?  I think having these
> > additional reasons in the tracepoints and the additional statistics
> > would be extremely useful for us who have to deal with writeback
> > issues frequently.
>
> I think we've reached reasonable agreements on the first patch, which
> has been split into two ones by Curt:
>
> 1/2 writeback: send work item to queue_io, move_expired_inodes
> 2/2 writeback: Add a 'reason' to wb_writeback_work
>
> I further incorporated some suggested changes by Jan and get the below
> updated patch 2/2, with changes
>
> - replace the tracing symbol array with wb_reason_name[]
> - remove the balance_dirty_pages reason (no longer exists)
> - rename @stat to @reason in the function declarations
> - rename "FS_free_space" to "fs_free_space"
>
> If no objections, I can push the two patches to linux-next tomorrow.

No objections on my part.  Thanks for wrapping this up!
Curt

>
> Thanks,
> Fengguang
> ---
> Subject: writeback: Add a 'reason' to wb_writeback_work
> Date: Fri Oct 07 21:54:10 CST 2011
>
> From: Curt Wohlgemuth <curtw@google.com>
>
> This creates a new 'reason' field in a wb_writeback_work
> structure, which unambiguously identifies who initiates
> writeback activity.  A 'wb_reason' enumeration has been
> added to writeback.h, to enumerate the possible reasons.
>
> The 'writeback_work_class' and tracepoint event class and
> 'writeback_queue_io' tracepoints are updated to include the
> symbolic 'reason' in all trace events.
>
> And the 'writeback_inodes_sbXXX' family of routines has had
> a wb_stats parameter added to them, so callers can specify
> why writeback is being started.
>
> Acked-by: Jan Kara <jack@suse.cz>
> Signed-off-by: Curt Wohlgemuth <curtw@google.com>
> Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
> ---
>
> Changes since v2:
>
>   - enum wb_stats renamed enum wb_reason
>   - All WB_STAT_xxx enum constants now named WB_REASON_xxx
>   - All 'reason' strings in tracepoints match the WB_REASON name
>   - The change to send 'work' to queue_io, move_expired_inodes, and
>     trace_writeback_queue_io are in a separate commit
>
>  fs/btrfs/extent-tree.c           |    3 +
>  fs/buffer.c                      |    2 -
>  fs/ext4/inode.c                  |    2 -
>  fs/fs-writeback.c                |   49 +++++++++++++++++++++--------
>  fs/quota/quota.c                 |    2 -
>  fs/sync.c                        |    4 +-
>  fs/ubifs/budget.c                |    2 -
>  include/linux/backing-dev.h      |    3 +
>  include/linux/writeback.h        |   32 +++++++++++++++---
>  include/trace/events/writeback.h |   14 +++++---
>  mm/backing-dev.c                 |    3 +
>  mm/page-writeback.c              |    3 +
>  mm/vmscan.c                      |    3 +
>  13 files changed, 88 insertions(+), 34 deletions(-)
>
> --- linux-next.orig/fs/btrfs/extent-tree.c      2011-10-07 22:23:35.000000000 +0800
> +++ linux-next/fs/btrfs/extent-tree.c   2011-10-07 22:24:47.000000000 +0800
> @@ -3340,7 +3340,8 @@ static int shrink_delalloc(struct btrfs_
>                smp_mb();
>                nr_pages = min_t(unsigned long, nr_pages,
>                       root->fs_info->delalloc_bytes >> PAGE_CACHE_SHIFT);
> -               writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages);
> +               writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages,
> +                                               WB_REASON_FS_FREE_SPACE);
>
>                spin_lock(&space_info->lock);
>                if (reserved > space_info->bytes_reserved)
> --- linux-next.orig/fs/buffer.c 2011-10-07 22:23:35.000000000 +0800
> +++ linux-next/fs/buffer.c      2011-10-07 22:24:47.000000000 +0800
> @@ -285,7 +285,7 @@ static void free_more_memory(void)
>        struct zone *zone;
>        int nid;
>
> -       wakeup_flusher_threads(1024);
> +       wakeup_flusher_threads(1024, WB_REASON_FREE_MORE_MEM);
>        yield();
>
>        for_each_online_node(nid) {
> --- linux-next.orig/fs/ext4/inode.c     2011-10-07 22:23:35.000000000 +0800
> +++ linux-next/fs/ext4/inode.c  2011-10-07 22:24:47.000000000 +0800
> @@ -2241,7 +2241,7 @@ static int ext4_nonda_switch(struct supe
>         * start pushing delalloc when 1/2 of free blocks are dirty.
>         */
>        if (free_blocks < 2 * dirty_blocks)
> -               writeback_inodes_sb_if_idle(sb);
> +               writeback_inodes_sb_if_idle(sb, WB_REASON_FS_FREE_SPACE);
>
>        return 0;
>  }
> --- linux-next.orig/fs/fs-writeback.c   2011-10-07 22:23:35.000000000 +0800
> +++ linux-next/fs/fs-writeback.c        2011-10-07 22:24:47.000000000 +0800
> @@ -41,11 +41,23 @@ struct wb_writeback_work {
>        unsigned int for_kupdate:1;
>        unsigned int range_cyclic:1;
>        unsigned int for_background:1;
> +       enum wb_reason reason;          /* why was writeback initiated? */
>
>        struct list_head list;          /* pending work list */
>        struct completion *done;        /* set if the caller waits */
>  };
>
> +const char *wb_reason_name[] = {
> +       [WB_REASON_BACKGROUND]          = "background",
> +       [WB_REASON_TRY_TO_FREE_PAGES]   = "try_to_free_pages",
> +       [WB_REASON_SYNC]                = "sync",
> +       [WB_REASON_PERIODIC]            = "periodic",
> +       [WB_REASON_LAPTOP_TIMER]        = "laptop_timer",
> +       [WB_REASON_FREE_MORE_MEM]       = "free_more_memory",
> +       [WB_REASON_FS_FREE_SPACE]       = "fs_free_space",
> +       [WB_REASON_FORKER_THREAD]       = "forker_thread"
> +};
> +
>  /*
>  * Include the creation of the trace points after defining the
>  * wb_writeback_work structure so that the definition remains local to this
> @@ -115,7 +127,7 @@ static void bdi_queue_work(struct backin
>
>  static void
>  __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
> -                     bool range_cyclic)
> +                     bool range_cyclic, enum wb_reason reason)
>  {
>        struct wb_writeback_work *work;
>
> @@ -135,6 +147,7 @@ __bdi_start_writeback(struct backing_dev
>        work->sync_mode = WB_SYNC_NONE;
>        work->nr_pages  = nr_pages;
>        work->range_cyclic = range_cyclic;
> +       work->reason    = reason;
>
>        bdi_queue_work(bdi, work);
>  }
> @@ -150,9 +163,10 @@ __bdi_start_writeback(struct backing_dev
>  *   completion. Caller need not hold sb s_umount semaphore.
>  *
>  */
> -void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages)
> +void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
> +                       enum wb_reason reason)
>  {
> -       __bdi_start_writeback(bdi, nr_pages, true);
> +       __bdi_start_writeback(bdi, nr_pages, true, reason);
>  }
>
>  /**
> @@ -641,12 +655,14 @@ static long __writeback_inodes_wb(struct
>        return wrote;
>  }
>
> -long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages)
> +long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages,
> +                               enum wb_reason reason)
>  {
>        struct wb_writeback_work work = {
>                .nr_pages       = nr_pages,
>                .sync_mode      = WB_SYNC_NONE,
>                .range_cyclic   = 1,
> +               .reason         = reason,
>        };
>
>        spin_lock(&wb->list_lock);
> @@ -825,6 +841,7 @@ static long wb_check_background_flush(st
>                        .sync_mode      = WB_SYNC_NONE,
>                        .for_background = 1,
>                        .range_cyclic   = 1,
> +                       .reason         = WB_REASON_BACKGROUND,
>                };
>
>                return wb_writeback(wb, &work);
> @@ -858,6 +875,7 @@ static long wb_check_old_data_flush(stru
>                        .sync_mode      = WB_SYNC_NONE,
>                        .for_kupdate    = 1,
>                        .range_cyclic   = 1,
> +                       .reason         = WB_REASON_PERIODIC,
>                };
>
>                return wb_writeback(wb, &work);
> @@ -976,7 +994,7 @@ int bdi_writeback_thread(void *data)
>  * Start writeback of `nr_pages' pages.  If `nr_pages' is zero, write back
>  * the whole world.
>  */
> -void wakeup_flusher_threads(long nr_pages)
> +void wakeup_flusher_threads(long nr_pages, enum wb_reason reason)
>  {
>        struct backing_dev_info *bdi;
>
> @@ -989,7 +1007,7 @@ void wakeup_flusher_threads(long nr_page
>        list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
>                if (!bdi_has_dirty_io(bdi))
>                        continue;
> -               __bdi_start_writeback(bdi, nr_pages, false);
> +               __bdi_start_writeback(bdi, nr_pages, false, reason);
>        }
>        rcu_read_unlock();
>  }
> @@ -1210,7 +1228,9 @@ static void wait_sb_inodes(struct super_
>  * on how many (if any) will be written, and this function does not wait
>  * for IO completion of submitted IO.
>  */
> -void writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr)
> +void writeback_inodes_sb_nr(struct super_block *sb,
> +                           unsigned long nr,
> +                           enum wb_reason reason)
>  {
>        DECLARE_COMPLETION_ONSTACK(done);
>        struct wb_writeback_work work = {
> @@ -1219,6 +1239,7 @@ void writeback_inodes_sb_nr(struct super
>                .tagged_writepages      = 1,
>                .done                   = &done,
>                .nr_pages               = nr,
> +               .reason                 = reason,
>        };
>
>        WARN_ON(!rwsem_is_locked(&sb->s_umount));
> @@ -1235,9 +1256,9 @@ EXPORT_SYMBOL(writeback_inodes_sb_nr);
>  * on how many (if any) will be written, and this function does not wait
>  * for IO completion of submitted IO.
>  */
> -void writeback_inodes_sb(struct super_block *sb)
> +void writeback_inodes_sb(struct super_block *sb, enum wb_reason reason)
>  {
> -       return writeback_inodes_sb_nr(sb, get_nr_dirty_pages());
> +       return writeback_inodes_sb_nr(sb, get_nr_dirty_pages(), reason);
>  }
>  EXPORT_SYMBOL(writeback_inodes_sb);
>
> @@ -1248,11 +1269,11 @@ EXPORT_SYMBOL(writeback_inodes_sb);
>  * Invoke writeback_inodes_sb if no writeback is currently underway.
>  * Returns 1 if writeback was started, 0 if not.
>  */
> -int writeback_inodes_sb_if_idle(struct super_block *sb)
> +int writeback_inodes_sb_if_idle(struct super_block *sb, enum wb_reason reason)
>  {
>        if (!writeback_in_progress(sb->s_bdi)) {
>                down_read(&sb->s_umount);
> -               writeback_inodes_sb(sb);
> +               writeback_inodes_sb(sb, reason);
>                up_read(&sb->s_umount);
>                return 1;
>        } else
> @@ -1269,11 +1290,12 @@ EXPORT_SYMBOL(writeback_inodes_sb_if_idl
>  * Returns 1 if writeback was started, 0 if not.
>  */
>  int writeback_inodes_sb_nr_if_idle(struct super_block *sb,
> -                                  unsigned long nr)
> +                                  unsigned long nr,
> +                                  enum wb_reason reason)
>  {
>        if (!writeback_in_progress(sb->s_bdi)) {
>                down_read(&sb->s_umount);
> -               writeback_inodes_sb_nr(sb, nr);
> +               writeback_inodes_sb_nr(sb, nr, reason);
>                up_read(&sb->s_umount);
>                return 1;
>        } else
> @@ -1297,6 +1319,7 @@ void sync_inodes_sb(struct super_block *
>                .nr_pages       = LONG_MAX,
>                .range_cyclic   = 0,
>                .done           = &done,
> +               .reason         = WB_REASON_SYNC,
>        };
>
>        WARN_ON(!rwsem_is_locked(&sb->s_umount));
> --- linux-next.orig/fs/quota/quota.c    2011-10-07 22:23:35.000000000 +0800
> +++ linux-next/fs/quota/quota.c 2011-10-07 22:24:47.000000000 +0800
> @@ -286,7 +286,7 @@ static int do_quotactl(struct super_bloc
>                /* caller already holds s_umount */
>                if (sb->s_flags & MS_RDONLY)
>                        return -EROFS;
> -               writeback_inodes_sb(sb);
> +               writeback_inodes_sb(sb, WB_REASON_SYNC);
>                return 0;
>        default:
>                return -EINVAL;
> --- linux-next.orig/fs/sync.c   2011-10-07 22:23:35.000000000 +0800
> +++ linux-next/fs/sync.c        2011-10-07 22:24:47.000000000 +0800
> @@ -43,7 +43,7 @@ static int __sync_filesystem(struct supe
>        if (wait)
>                sync_inodes_sb(sb);
>        else
> -               writeback_inodes_sb(sb);
> +               writeback_inodes_sb(sb, WB_REASON_SYNC);
>
>        if (sb->s_op->sync_fs)
>                sb->s_op->sync_fs(sb, wait);
> @@ -98,7 +98,7 @@ static void sync_filesystems(int wait)
>  */
>  SYSCALL_DEFINE0(sync)
>  {
> -       wakeup_flusher_threads(0);
> +       wakeup_flusher_threads(0, WB_REASON_SYNC);
>        sync_filesystems(0);
>        sync_filesystems(1);
>        if (unlikely(laptop_mode))
> --- linux-next.orig/fs/ubifs/budget.c   2011-10-07 22:23:35.000000000 +0800
> +++ linux-next/fs/ubifs/budget.c        2011-10-07 22:24:47.000000000 +0800
> @@ -63,7 +63,7 @@
>  static void shrink_liability(struct ubifs_info *c, int nr_to_write)
>  {
>        down_read(&c->vfs_sb->s_umount);
> -       writeback_inodes_sb(c->vfs_sb);
> +       writeback_inodes_sb(c->vfs_sb, WB_REASON_FS_FREE_SPACE);
>        up_read(&c->vfs_sb->s_umount);
>  }
>
> --- linux-next.orig/include/linux/backing-dev.h 2011-10-07 22:23:35.000000000 +0800
> +++ linux-next/include/linux/backing-dev.h      2011-10-07 22:24:47.000000000 +0800
> @@ -118,7 +118,8 @@ int bdi_register(struct backing_dev_info
>  int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev);
>  void bdi_unregister(struct backing_dev_info *bdi);
>  int bdi_setup_and_register(struct backing_dev_info *, char *, unsigned int);
> -void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages);
> +void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
> +                       enum wb_reason reason);
>  void bdi_start_background_writeback(struct backing_dev_info *bdi);
>  int bdi_writeback_thread(void *data);
>  int bdi_has_dirty_io(struct backing_dev_info *bdi);
> --- linux-next.orig/include/linux/writeback.h   2011-10-07 22:23:35.000000000 +0800
> +++ linux-next/include/linux/writeback.h        2011-10-07 22:24:47.000000000 +0800
> @@ -39,6 +39,23 @@ enum writeback_sync_modes {
>  };
>
>  /*
> + * why some writeback work was initiated
> + */
> +enum wb_reason {
> +       WB_REASON_BACKGROUND,
> +       WB_REASON_TRY_TO_FREE_PAGES,
> +       WB_REASON_SYNC,
> +       WB_REASON_PERIODIC,
> +       WB_REASON_LAPTOP_TIMER,
> +       WB_REASON_FREE_MORE_MEM,
> +       WB_REASON_FS_FREE_SPACE,
> +       WB_REASON_FORKER_THREAD,
> +
> +       WB_REASON_MAX,
> +};
> +extern const char *wb_reason_name[];
> +
> +/*
>  * A control structure which tells the writeback code what to do.  These are
>  * always on the stack, and hence need no locking.  They are always initialised
>  * in a manner such that unspecified fields are set to zero.
> @@ -69,14 +86,17 @@ struct writeback_control {
>  */
>  struct bdi_writeback;
>  int inode_wait(void *);
> -void writeback_inodes_sb(struct super_block *);
> -void writeback_inodes_sb_nr(struct super_block *, unsigned long nr);
> -int writeback_inodes_sb_if_idle(struct super_block *);
> -int writeback_inodes_sb_nr_if_idle(struct super_block *, unsigned long nr);
> +void writeback_inodes_sb(struct super_block *, enum wb_reason reason);
> +void writeback_inodes_sb_nr(struct super_block *, unsigned long nr,
> +                                                       enum wb_reason reason);
> +int writeback_inodes_sb_if_idle(struct super_block *, enum wb_reason reason);
> +int writeback_inodes_sb_nr_if_idle(struct super_block *, unsigned long nr,
> +                                                       enum wb_reason reason);
>  void sync_inodes_sb(struct super_block *);
> -long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages);
> +long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages,
> +                               enum wb_reason reason);
>  long wb_do_writeback(struct bdi_writeback *wb, int force_wait);
> -void wakeup_flusher_threads(long nr_pages);
> +void wakeup_flusher_threads(long nr_pages, enum wb_reason reason);
>
>  /* writeback.h requires fs.h; it, too, is not included from here. */
>  static inline void wait_on_inode(struct inode *inode)
> --- linux-next.orig/include/trace/events/writeback.h    2011-10-07 22:23:35.000000000 +0800
> +++ linux-next/include/trace/events/writeback.h 2011-10-07 22:24:47.000000000 +0800
> @@ -34,6 +34,7 @@ DECLARE_EVENT_CLASS(writeback_work_class
>                __field(int, for_kupdate)
>                __field(int, range_cyclic)
>                __field(int, for_background)
> +               __field(int, reason)
>        ),
>        TP_fast_assign(
>                strncpy(__entry->name, dev_name(bdi->dev), 32);
> @@ -43,16 +44,18 @@ DECLARE_EVENT_CLASS(writeback_work_class
>                __entry->for_kupdate = work->for_kupdate;
>                __entry->range_cyclic = work->range_cyclic;
>                __entry->for_background = work->for_background;
> +               __entry->reason = work->reason;
>        ),
>        TP_printk("bdi %s: sb_dev %d:%d nr_pages=%ld sync_mode=%d "
> -                 "kupdate=%d range_cyclic=%d background=%d",
> +                 "kupdate=%d range_cyclic=%d background=%d reason=%s",
>                  __entry->name,
>                  MAJOR(__entry->sb_dev), MINOR(__entry->sb_dev),
>                  __entry->nr_pages,
>                  __entry->sync_mode,
>                  __entry->for_kupdate,
>                  __entry->range_cyclic,
> -                 __entry->for_background
> +                 __entry->for_background,
> +                 wb_reason_name[__entry->reason]
>        )
>  );
>  #define DEFINE_WRITEBACK_WORK_EVENT(name) \
> @@ -165,6 +168,7 @@ TRACE_EVENT(writeback_queue_io,
>                __field(unsigned long,  older)
>                __field(long,           age)
>                __field(int,            moved)
> +               __field(int,            reason)
>        ),
>        TP_fast_assign(
>                unsigned long *older_than_this = work->older_than_this;
> @@ -173,12 +177,14 @@ TRACE_EVENT(writeback_queue_io,
>                __entry->age    = older_than_this ?
>                                  (jiffies - *older_than_this) * 1000 / HZ : -1;
>                __entry->moved  = moved;
> +               __entry->reason = work->reason;
>        ),
> -       TP_printk("bdi %s: older=%lu age=%ld enqueue=%d",
> +       TP_printk("bdi %s: older=%lu age=%ld enqueue=%d reason=%s",
>                __entry->name,
>                __entry->older, /* older_than_this in jiffies */
>                __entry->age,   /* older_than_this in relative milliseconds */
> -               __entry->moved)
> +               __entry->moved,
> +               wb_reason_name[__entry->reason])
>  );
>
>  TRACE_EVENT(global_dirty_state,
> --- linux-next.orig/mm/backing-dev.c    2011-10-07 22:23:35.000000000 +0800
> +++ linux-next/mm/backing-dev.c 2011-10-07 22:24:47.000000000 +0800
> @@ -476,7 +476,8 @@ static int bdi_forker_thread(void *ptr)
>                                 * the bdi from the thread. Hopefully 1024 is
>                                 * large enough for efficient IO.
>                                 */
> -                               writeback_inodes_wb(&bdi->wb, 1024);
> +                               writeback_inodes_wb(&bdi->wb, 1024,
> +                                                   WB_REASON_FORKER_THREAD);
>                        } else {
>                                /*
>                                 * The spinlock makes sure we do not lose
> --- linux-next.orig/mm/page-writeback.c 2011-10-07 22:23:35.000000000 +0800
> +++ linux-next/mm/page-writeback.c      2011-10-07 22:24:47.000000000 +0800
> @@ -1304,7 +1304,8 @@ void laptop_mode_timer_fn(unsigned long
>         * threshold
>         */
>        if (bdi_has_dirty_io(&q->backing_dev_info))
> -               bdi_start_writeback(&q->backing_dev_info, nr_pages);
> +               bdi_start_writeback(&q->backing_dev_info, nr_pages,
> +                                       WB_REASON_LAPTOP_TIMER);
>  }
>
>  /*
> --- linux-next.orig/mm/vmscan.c 2011-10-07 22:23:35.000000000 +0800
> +++ linux-next/mm/vmscan.c      2011-10-07 22:24:47.000000000 +0800
> @@ -2181,7 +2181,8 @@ static unsigned long do_try_to_free_page
>                 */
>                writeback_threshold = sc->nr_to_reclaim + sc->nr_to_reclaim / 2;
>                if (total_scanned > writeback_threshold) {
> -                       wakeup_flusher_threads(laptop_mode ? 0 : total_scanned);
> +                       wakeup_flusher_threads(laptop_mode ? 0 : total_scanned,
> +                                               WB_REASON_TRY_TO_FREE_PAGES);
>                        sc->may_writepage = 1;
>                }
>

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 27+ messages in thread

end of thread, other threads:[~2011-10-07 18:07 UTC | newest]

Thread overview: 27+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-08-12 22:47 [PATCH 1/2 v2] writeback: Add a 'reason' to wb_writeback_work Curt Wohlgemuth
2011-08-12 22:47 ` Curt Wohlgemuth
2011-08-12 22:47 ` [PATCH 2/2 v2] writeback: Add writeback stats for pages written Curt Wohlgemuth
2011-08-12 22:47   ` Curt Wohlgemuth
2011-08-15 13:48   ` Wu Fengguang
2011-08-15 13:48     ` Wu Fengguang
2011-08-15 17:16     ` Curt Wohlgemuth
2011-08-15 17:16       ` Curt Wohlgemuth
2011-08-15 18:40       ` Jan Kara
2011-08-15 18:40         ` Jan Kara
2011-08-15 18:56         ` Curt Wohlgemuth
2011-08-15 18:56           ` Curt Wohlgemuth
2011-08-16 13:10           ` Jan Kara
2011-08-16 13:10             ` Jan Kara
2011-08-16 12:10       ` Wu Fengguang
2011-08-15 15:03   ` Jan Kara
2011-08-15 15:03     ` Jan Kara
2011-08-15 17:24     ` Curt Wohlgemuth
2011-08-16 12:26       ` Wu Fengguang
2011-08-16 12:26         ` Wu Fengguang
2011-08-15 13:19 ` [PATCH 1/2 v2] writeback: Add a 'reason' to wb_writeback_work Wu Fengguang
2011-08-15 13:19   ` Wu Fengguang
2011-09-28 15:02 ` Christoph Hellwig
2011-09-28 15:02   ` Christoph Hellwig
2011-10-07 15:28   ` Wu Fengguang
2011-10-07 18:07     ` Curt Wohlgemuth
2011-10-07 18:07       ` Curt Wohlgemuth

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.