All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/3] writeback: tracing support
@ 2010-06-21  1:25 Dave Chinner
  2010-06-21  1:25 ` [PATCH 1/3] writeback: initial " Dave Chinner
                   ` (3 more replies)
  0 siblings, 4 replies; 12+ messages in thread
From: Dave Chinner @ 2010-06-21  1:25 UTC (permalink / raw)
  To: axboe; +Cc: linux-fsdevel

Hi Jens,

These are the writeback tracing patches that have not made it to
mainline yet.  Can you pull these into your writeback tree for
2.6.36?

Cheers,

Dave.


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 1/3] writeback: initial tracing support
  2010-06-21  1:25 [PATCH 0/3] writeback: tracing support Dave Chinner
@ 2010-06-21  1:25 ` Dave Chinner
  2010-06-21  8:09   ` Christoph Hellwig
  2010-06-21  1:25 ` [PATCH 2/3] writeback: Add tracing to balance_dirty_pages Dave Chinner
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 12+ messages in thread
From: Dave Chinner @ 2010-06-21  1:25 UTC (permalink / raw)
  To: axboe; +Cc: linux-fsdevel

From: From: Jens Axboe <jens.axboe@oracle.com>

Trace queue/sched/exec parts of the writeback loop. This provides
insight into when and why flusher threads are scheduled to run. e.g
a sync invocation leaves a trace like:

sync-2798  [006] 611323.335713: writeback_queue: 253:16: pages=87879, sb=0, kupdate=0, range_cyclic=0 for_background=0
sync-2798  [006] 611323.335718: writeback_sched: work=37c0, task=task
sync-2798  [006] 611323.335817: writeback_queue: 8:0: pages=92680, sb=1, kupdate=0, range_cyclic=-1 for_background=0
sync-2798  [006] 611323.335819: writeback_sched: work=35c0, task=task
sync-2798  [006] 611323.335855: writeback_queue: 253:16: pages=92680, sb=1, kupdate=0, range_cyclic=-1 for_background=0
sync-2798  [006] 611323.335857: writeback_sched: work=36c0, task=task
sync-2798  [006] 611323.335890: writeback_queue: 8:0: pages=9223372036854775807, sb=1, kupdate=0, range_cyclic=0 for_background=0
sync-2798  [006] 611323.335891: writeback_sched: work=fe58, task=task
sync-2798  [006] 611323.377341: writeback_queue: 253:16: pages=9223372036854775807, sb=1, kupdate=0, range_cyclic=0 for_background=0
sync-2798  [006] 611323.377346: writeback_sched: work=fe58, task=task

This also lays the foundation for adding more writeback tracing to
provide deeper insight into the whole writeback path.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 fs/fs-writeback.c                |   45 ++++++++--
 include/trace/events/writeback.h |  171 ++++++++++++++++++++++++++++++++++++++
 mm/backing-dev.c                 |    3 +
 3 files changed, 209 insertions(+), 10 deletions(-)
 create mode 100644 include/trace/events/writeback.h

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 1d1088f..0949dfd 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -26,15 +26,9 @@
 #include <linux/blkdev.h>
 #include <linux/backing-dev.h>
 #include <linux/buffer_head.h>
+#include <linux/ftrace.h>
 #include "internal.h"
 
-#define inode_to_bdi(inode)	((inode)->i_mapping->backing_dev_info)
-
-/*
- * We don't actually have pdflush, but this one is exported though /proc...
- */
-int nr_pdflush_threads;
-
 /*
  * Passed into wb_writeback(), essentially a subset of writeback_control
  */
@@ -62,6 +56,21 @@ struct bdi_work {
 	unsigned long state;		/* flag bits, see WS_* */
 };
 
+/*
+ * Include the creation of the trace points after defining the bdi_work and
+ * wb_writeback_args structures so that the definitions remain local to this
+ * file.
+ */
+#define CREATE_TRACE_POINTS
+#include <trace/events/writeback.h>
+
+#define inode_to_bdi(inode)	((inode)->i_mapping->backing_dev_info)
+
+/*
+ * We don't actually have pdflush, but this one is exported though /proc...
+ */
+int nr_pdflush_threads;
+
 enum {
 	WS_USED_B = 0,
 	WS_ONSTACK_B,
@@ -136,6 +145,8 @@ static void wb_work_complete(struct bdi_work *work)
 
 static void wb_clear_pending(struct bdi_writeback *wb, struct bdi_work *work)
 {
+	trace_writeback_clear(work);
+
 	/*
 	 * The caller has retrieved the work arguments from this work,
 	 * drop our reference. If this is the last ref, delete and free it
@@ -171,12 +182,16 @@ static void bdi_queue_work(struct backing_dev_info *bdi, struct bdi_work *work)
 	 * If the default thread isn't there, make sure we add it. When
 	 * it gets created and wakes up, we'll run this work.
 	 */
-	if (unlikely(list_empty_careful(&bdi->wb_list)))
+	if (unlikely(list_empty_careful(&bdi->wb_list))) {
+		trace_writeback_sched(bdi, work, "default");
 		wake_up_process(default_backing_dev_info.wb.task);
-	else {
+	} else {
 		struct bdi_writeback *wb = &bdi->wb;
+		struct task_struct *task = wb->task;
 
-		if (wb->task)
+		trace_writeback_sched(bdi, work, task ? "task" : "notask");
+
+		if (task)
 			wake_up_process(wb->task);
 	}
 }
@@ -203,6 +218,7 @@ static void bdi_alloc_queue_work(struct backing_dev_info *bdi,
 	work = kmalloc(sizeof(*work), GFP_ATOMIC);
 	if (work) {
 		bdi_work_init(work, args);
+		trace_writeback_queue(bdi, args);
 		bdi_queue_work(bdi, work);
 	} else {
 		struct bdi_writeback *wb = &bdi->wb;
@@ -236,6 +252,7 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi,
 	bdi_work_init(&work, &args);
 	work.state |= WS_ONSTACK;
 
+	trace_writeback_queue(bdi, &args);
 	bdi_queue_work(bdi, &work);
 	bdi_wait_on_work_clear(&work);
 }
@@ -907,6 +924,8 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
 		if (force_wait)
 			work->args.sync_mode = args.sync_mode = WB_SYNC_ALL;
 
+		trace_writeback_exec(work);
+
 		/*
 		 * If this isn't a data integrity operation, just notify
 		 * that we have seen this work and we are now starting it.
@@ -942,9 +961,13 @@ int bdi_writeback_task(struct bdi_writeback *wb)
 	unsigned long wait_jiffies = -1UL;
 	long pages_written;
 
+	trace_writeback_thread_start(1);
+
 	while (!kthread_should_stop()) {
 		pages_written = wb_do_writeback(wb, 0);
 
+		trace_writeback_pages_written(pages_written);
+
 		if (pages_written)
 			last_active = jiffies;
 		else if (wait_jiffies != -1UL) {
@@ -974,6 +997,8 @@ int bdi_writeback_task(struct bdi_writeback *wb)
 		try_to_freeze();
 	}
 
+	trace_writeback_thread_start(0);
+
 	return 0;
 }
 
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
new file mode 100644
index 0000000..6f510fa
--- /dev/null
+++ b/include/trace/events/writeback.h
@@ -0,0 +1,171 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM writeback
+
+#if !defined(_TRACE_WRITEBACK_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_WRITEBACK_H
+
+#include <linux/backing-dev.h>
+#include <linux/writeback.h>
+
+TRACE_EVENT(writeback_queue,
+
+	TP_PROTO(struct backing_dev_info *bdi, struct wb_writeback_args *args),
+
+	TP_ARGS(bdi, args),
+
+	TP_STRUCT__entry(
+		__array(char,		name,		16)
+		__field(long,		nr_pages)
+		__field(int,		sb)
+		__field(int,		sync_mode)
+		__field(int,		for_kupdate)
+		__field(int,		range_cyclic)
+		__field(int,		for_background)
+	),
+
+	TP_fast_assign(
+		strlcpy(__entry->name, dev_name(bdi->dev), 16);
+		__entry->nr_pages	= args->nr_pages;
+		__entry->sb		= !!args->sb;
+		__entry->for_kupdate	= args->for_kupdate;
+		__entry->range_cyclic	= args->range_cyclic;
+		__entry->for_background	= args->for_background;
+	),
+
+	TP_printk("%s: pages=%ld, sb=%d, kupdate=%d, range_cyclic=%d "
+		  "for_background=%d", __entry->name, __entry->nr_pages,
+			__entry->sb, __entry->for_kupdate,
+			__entry->range_cyclic, __entry->for_background)
+);
+
+TRACE_EVENT(writeback_sched,
+
+	TP_PROTO(struct backing_dev_info *bdi, struct bdi_work *work,
+		 const char *msg),
+
+	TP_ARGS(bdi, work, msg),
+
+	TP_STRUCT__entry(
+		__array(char,			name,		16)
+		__field(unsigned int,		work)
+		__array(char,			task,		8)
+	),
+
+	TP_fast_assign(
+		strlcpy(__entry->name, dev_name(bdi->dev), 16);
+		__entry->work = (unsigned long) work & 0xffff;
+		snprintf(__entry->task, 8, "%s", msg);
+	),
+
+	TP_printk("work=%x, task=%s", __entry->work, __entry->task)
+);
+
+TRACE_EVENT(writeback_exec,
+
+	TP_PROTO(struct bdi_work *work),
+
+	TP_ARGS(work),
+
+	TP_STRUCT__entry(
+		__field(unsigned int,	work)
+		__field(long,		nr_pages)
+		__field(int,		sb)
+		__field(int,		sync_mode)
+		__field(int,		for_kupdate)
+		__field(int,		range_cyclic)
+		__field(int,		for_background)
+	),
+
+	TP_fast_assign(
+		__entry->work		= (unsigned long) work & 0xffff;
+		__entry->nr_pages	= work->args.nr_pages;
+		__entry->sb		= !!work->args.sb;
+		__entry->for_kupdate	= work->args.for_kupdate;
+		__entry->range_cyclic	= work->args.range_cyclic;
+		__entry->for_background	= work->args.for_background;
+
+	),
+
+	TP_printk("work=%x pages=%ld, sb=%d, kupdate=%d, range_cyclic=%d"
+		  " for_background=%d", __entry->work,
+			__entry->nr_pages, __entry->sb, __entry->for_kupdate,
+			__entry->range_cyclic, __entry->for_background)
+);
+
+TRACE_EVENT(writeback_clear,
+
+	TP_PROTO(struct bdi_work *work),
+
+	TP_ARGS(work),
+
+	TP_STRUCT__entry(
+		__field(struct bdi_work *,	work)
+		__field(int,			refs)
+	),
+
+	TP_fast_assign(
+		__entry->work		= work;
+		__entry->refs		= atomic_read(&work->pending);
+	),
+
+	TP_printk("work=%p, refs=%d", __entry->work, __entry->refs)
+);
+
+TRACE_EVENT(writeback_pages_written,
+
+	TP_PROTO(long pages_written),
+
+	TP_ARGS(pages_written),
+
+	TP_STRUCT__entry(
+		__field(long,		pages)
+	),
+
+	TP_fast_assign(
+		__entry->pages		= pages_written;
+	),
+
+	TP_printk("%ld", __entry->pages)
+);
+
+
+TRACE_EVENT(writeback_thread_start,
+
+	TP_PROTO(int start),
+
+	TP_ARGS(start),
+
+	TP_STRUCT__entry(
+		__field(int,	start)
+	),
+
+	TP_fast_assign(
+		__entry->start = start;
+	),
+
+	TP_printk("%s", __entry->start ? "started" : "exited")
+);
+
+TRACE_EVENT(writeback_bdi_register,
+
+	TP_PROTO(const char *name, int start),
+
+	TP_ARGS(name, start),
+
+	TP_STRUCT__entry(
+		__array(char,	name,		16)
+		__field(int,	start)
+	),
+
+	TP_fast_assign(
+		strlcpy(__entry->name, name, 16);
+		__entry->start = start;
+	),
+
+	TP_printk("%s: %s", __entry->name,
+			__entry->start ? "registered" : "unregistered")
+);
+#endif /* _TRACE_WRITEBACK_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 660a87a..1f7723b 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -10,6 +10,7 @@
 #include <linux/module.h>
 #include <linux/writeback.h>
 #include <linux/device.h>
+#include <trace/events/writeback.h>
 
 static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
 
@@ -585,6 +586,7 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
 
 	bdi_debug_register(bdi, dev_name(dev));
 	set_bit(BDI_registered, &bdi->state);
+	trace_writeback_bdi_register(dev_name(dev), 1);
 exit:
 	return ret;
 }
@@ -647,6 +649,7 @@ static void bdi_prune_sb(struct backing_dev_info *bdi)
 void bdi_unregister(struct backing_dev_info *bdi)
 {
 	if (bdi->dev) {
+		trace_writeback_bdi_register(dev_name(bdi->dev), 0);
 		bdi_prune_sb(bdi);
 
 		if (!bdi_cap_flush_forker(bdi))
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 2/3] writeback: Add tracing to balance_dirty_pages
  2010-06-21  1:25 [PATCH 0/3] writeback: tracing support Dave Chinner
  2010-06-21  1:25 ` [PATCH 1/3] writeback: initial " Dave Chinner
@ 2010-06-21  1:25 ` Dave Chinner
  2010-06-21  8:12   ` Christoph Hellwig
  2010-06-21  1:25 ` [PATCH 3/3] writeback: Add tracing to write_cache_pages Dave Chinner
  2010-06-24 12:12 ` [PATCH 0/3] writeback: tracing support Jens Axboe
  3 siblings, 1 reply; 12+ messages in thread
From: Dave Chinner @ 2010-06-21  1:25 UTC (permalink / raw)
  To: axboe; +Cc: linux-fsdevel

From: Dave Chinner <dchinner@redhat.com>

Tracing high level background writeback events is good, but it doesn't
give the entire picture. Add IO dispatched by foreground throttling to the
writeback events.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 fs/fs-writeback.c                |    5 ++
 include/trace/events/writeback.h |   80 ++++++++++++++++++++++++++++++++++++++
 mm/page-writeback.c              |    4 ++
 3 files changed, 89 insertions(+), 0 deletions(-)

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 0949dfd..6f777d6 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -807,7 +807,11 @@ static long wb_writeback(struct bdi_writeback *wb,
 		wbc.more_io = 0;
 		wbc.nr_to_write = MAX_WRITEBACK_PAGES;
 		wbc.pages_skipped = 0;
+
+		trace_wbc_writeback_start(&wbc);
 		writeback_inodes_wb(wb, &wbc);
+		trace_wbc_writeback_written(&wbc);
+
 		args->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
 		wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write;
 
@@ -835,6 +839,7 @@ static long wb_writeback(struct bdi_writeback *wb,
 		if (!list_empty(&wb->b_more_io))  {
 			inode = list_entry(wb->b_more_io.prev,
 						struct inode, i_list);
+			trace_wbc_writeback_wait(&wbc);
 			inode_wait_for_writeback(inode);
 		}
 		spin_unlock(&inode_lock);
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index 6f510fa..7867c16 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -7,6 +7,9 @@
 #include <linux/backing-dev.h>
 #include <linux/writeback.h>
 
+struct wb_writeback_args;
+struct bdi_work;
+
 TRACE_EVENT(writeback_queue,
 
 	TP_PROTO(struct backing_dev_info *bdi, struct wb_writeback_args *args),
@@ -165,6 +168,83 @@ TRACE_EVENT(writeback_bdi_register,
 	TP_printk("%s: %s", __entry->name,
 			__entry->start ? "registered" : "unregistered")
 );
+
+/* pass flags explicitly */
+DECLARE_EVENT_CLASS(wbc_class,
+	TP_PROTO(struct writeback_control *wbc),
+	TP_ARGS(wbc),
+	TP_STRUCT__entry(
+		__field(unsigned int, wbc)
+		__array(char, name, 16)
+		__field(long, nr_to_write)
+		__field(long, pages_skipped)
+		__field(int, sb)
+		__field(int, sync_mode)
+		__field(int, nonblocking)
+		__field(int, encountered_congestion)
+		__field(int, for_kupdate)
+		__field(int, for_background)
+		__field(int, for_reclaim)
+		__field(int, range_cyclic)
+		__field(int, more_io)
+		__field(unsigned long, older_than_this)
+		__field(long, range_start)
+		__field(long, range_end)
+	),
+
+	TP_fast_assign(
+		char *__name = "(none)";
+
+		__entry->wbc		= (unsigned long)wbc & 0xffff;
+		if (wbc->bdi)
+			strncpy(__entry->name, dev_name(wbc->bdi->dev), 16);
+		else
+			strncpy(__entry->name, __name, 16);
+		__entry->nr_to_write	= wbc->nr_to_write;
+		__entry->pages_skipped	= wbc->pages_skipped;
+		__entry->sb		= !!wbc->sb;
+		__entry->sync_mode	= wbc->sync_mode;
+		__entry->for_kupdate	= wbc->for_kupdate;
+		__entry->for_background	= wbc->for_background;
+		__entry->for_reclaim	= wbc->for_reclaim;
+		__entry->range_cyclic	= wbc->range_cyclic;
+		__entry->more_io	= wbc->more_io;
+		__entry->older_than_this = wbc->older_than_this ?
+						*wbc->older_than_this : 0;
+		__entry->range_start	= (long)wbc->range_start;
+		__entry->range_end	= (long)wbc->range_end;
+	),
+
+	TP_printk("dev %s wbc=%x towrt=%ld skip=%ld sb=%d mode=%d kupd=%d "
+		"bgrd=%d reclm=%d cyclic=%d more=%d older=0x%lx "
+		"start=0x%lx end=0x%lx",
+		__entry->name,
+		__entry->wbc,
+		__entry->nr_to_write,
+		__entry->pages_skipped,
+		__entry->sb,
+		__entry->sync_mode,
+		__entry->for_kupdate,
+		__entry->for_background,
+		__entry->for_reclaim,
+		__entry->range_cyclic,
+		__entry->more_io,
+		__entry->older_than_this,
+		__entry->range_start,
+		__entry->range_end)
+)
+
+#define DEFINE_WBC_EVENT(name) \
+DEFINE_EVENT(wbc_class, name, \
+	TP_PROTO(struct writeback_control *wbc), \
+	TP_ARGS(wbc))
+DEFINE_WBC_EVENT(wbc_writeback_start);
+DEFINE_WBC_EVENT(wbc_writeback_written);
+DEFINE_WBC_EVENT(wbc_writeback_wait);
+DEFINE_WBC_EVENT(wbc_balance_dirty_start);
+DEFINE_WBC_EVENT(wbc_balance_dirty_written);
+DEFINE_WBC_EVENT(wbc_balance_dirty_wait);
+
 #endif /* _TRACE_WRITEBACK_H */
 
 /* This part must be outside protection */
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index bbd396a..6c0ad0a 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -34,6 +34,7 @@
 #include <linux/syscalls.h>
 #include <linux/buffer_head.h>
 #include <linux/pagevec.h>
+#include <trace/events/writeback.h>
 
 /*
  * After a CPU has dirtied this many pages, balance_dirty_pages_ratelimited
@@ -536,11 +537,13 @@ static void balance_dirty_pages(struct address_space *mapping,
 		 * threshold otherwise wait until the disk writes catch
 		 * up.
 		 */
+		trace_wbc_balance_dirty_start(&wbc);
 		if (bdi_nr_reclaimable > bdi_thresh) {
 			writeback_inodes_wbc(&wbc);
 			pages_written += write_chunk - wbc.nr_to_write;
 			get_dirty_limits(&background_thresh, &dirty_thresh,
 				       &bdi_thresh, bdi);
+			trace_wbc_balance_dirty_written(&wbc);
 		}
 
 		/*
@@ -566,6 +569,7 @@ static void balance_dirty_pages(struct address_space *mapping,
 		if (pages_written >= write_chunk)
 			break;		/* We've done our duty */
 
+		trace_wbc_balance_dirty_wait(&wbc);
 		__set_current_state(TASK_INTERRUPTIBLE);
 		io_schedule_timeout(pause);
 
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 3/3] writeback: Add tracing to write_cache_pages
  2010-06-21  1:25 [PATCH 0/3] writeback: tracing support Dave Chinner
  2010-06-21  1:25 ` [PATCH 1/3] writeback: initial " Dave Chinner
  2010-06-21  1:25 ` [PATCH 2/3] writeback: Add tracing to balance_dirty_pages Dave Chinner
@ 2010-06-21  1:25 ` Dave Chinner
  2010-06-24 12:12 ` [PATCH 0/3] writeback: tracing support Jens Axboe
  3 siblings, 0 replies; 12+ messages in thread
From: Dave Chinner @ 2010-06-21  1:25 UTC (permalink / raw)
  To: axboe; +Cc: linux-fsdevel

From: Dave Chinner <dchinner@redhat.com>

Add a trace event to the ->writepage loop in write_cache_pages to give
visibility into how the ->writepage call is changing variables within the
writeback control structure. Of most interest is how wbc->nr_to_write changes
from call to call, especially with filesystems that write multiple pages
in ->writepage.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 include/trace/events/writeback.h |    1 +
 mm/page-writeback.c              |    1 +
 2 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index 7867c16..6797c46 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -244,6 +244,7 @@ DEFINE_WBC_EVENT(wbc_writeback_wait);
 DEFINE_WBC_EVENT(wbc_balance_dirty_start);
 DEFINE_WBC_EVENT(wbc_balance_dirty_written);
 DEFINE_WBC_EVENT(wbc_balance_dirty_wait);
+DEFINE_WBC_EVENT(wbc_writepage);
 
 #endif /* _TRACE_WRITEBACK_H */
 
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 6c0ad0a..1a361f0 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -935,6 +935,7 @@ continue_unlock:
 			if (!clear_page_dirty_for_io(page))
 				goto continue_unlock;
 
+			trace_wbc_writepage(wbc);
 			ret = (*writepage)(page, wbc, data);
 			if (unlikely(ret)) {
 				if (ret == AOP_WRITEPAGE_ACTIVATE) {
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/3] writeback: initial tracing support
  2010-06-21  1:25 ` [PATCH 1/3] writeback: initial " Dave Chinner
@ 2010-06-21  8:09   ` Christoph Hellwig
  0 siblings, 0 replies; 12+ messages in thread
From: Christoph Hellwig @ 2010-06-21  8:09 UTC (permalink / raw)
  To: Dave Chinner; +Cc: axboe, linux-fsdevel

On Mon, Jun 21, 2010 at 11:25:24AM +1000, Dave Chinner wrote:
> +#include <linux/ftrace.h>

I think this should be <linux/tracepoint.h>.


I fear this might need some more rediffing after my latests writeback
patches.

> +	TP_fast_assign(
> +		strlcpy(__entry->name, dev_name(bdi->dev), 16);

For most other fs/block dev tracepoints we print the dev_t as
major:minor.  It would be good to have some consitency to match on
for the trace points.

> +		__entry->nr_pages	= args->nr_pages;
> +		__entry->sb		= !!args->sb;

It might be worth to store the dev_t of the superblock here?

> +	TP_printk("%s: pages=%ld, sb=%d, kupdate=%d, range_cyclic=%d "
> +		  "for_background=%d", __entry->name, __entry->nr_pages,
> +			__entry->sb, __entry->for_kupdate,
> +			__entry->range_cyclic, __entry->for_background)

> +TRACE_EVENT(writeback_sched,
> +
> +	TP_PROTO(struct backing_dev_info *bdi, struct bdi_work *work,
> +		 const char *msg),
> +
> +	TP_ARGS(bdi, work, msg),
> +
> +	TP_STRUCT__entry(
> +		__array(char,			name,		16)
> +		__field(unsigned int,		work)
> +		__array(char,			task,		8)
> +	),

Instead of the task field I'd make this a calss with three instances
for the different types of wakeups.

> +
> +	TP_fast_assign(
> +		strlcpy(__entry->name, dev_name(bdi->dev), 16);
> +		__entry->work = (unsigned long) work & 0xffff;

The unsigned long allows to store the whole pointer.  Or we could just
store it as a void pointer and print it using %p to make the types
simpler.

> +	TP_PROTO(const char *name, int start),
> +
> +	TP_ARGS(name, start),
> +
> +	TP_STRUCT__entry(
> +		__array(char,	name,		16)
> +		__field(int,	start)
> +	),
> +
> +	TP_fast_assign(
> +		strlcpy(__entry->name, name, 16);
> +		__entry->start = start;
> +	),
> +
> +	TP_printk("%s: %s", __entry->name,
> +			__entry->start ? "registered" : "unregistered")

Again, the clean way would be to have class with two instances instead
of using one tracepoint for two different events and differenciate them
by a string.


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/3] writeback: Add tracing to balance_dirty_pages
  2010-06-21  1:25 ` [PATCH 2/3] writeback: Add tracing to balance_dirty_pages Dave Chinner
@ 2010-06-21  8:12   ` Christoph Hellwig
  0 siblings, 0 replies; 12+ messages in thread
From: Christoph Hellwig @ 2010-06-21  8:12 UTC (permalink / raw)
  To: Dave Chinner; +Cc: axboe, linux-fsdevel

> +/* pass flags explicitly */

which flags?

> +	TP_fast_assign(
> +		char *__name = "(none)";
> +
> +		__entry->wbc		= (unsigned long)wbc & 0xffff;

The wbc generally is allocated on stack, does it reall make sense to
keep this?

> +		if (wbc->bdi)
> +			strncpy(__entry->name, dev_name(wbc->bdi->dev), 16);
> +		else
> +			strncpy(__entry->name, __name, 16);

> +		__entry->sb		= !!wbc->sb;

The bdi and sb fields are now gone from the writeback_control.  But
I think passing the bdi explicitly for the device name makes sense
here.


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 0/3] writeback: tracing support
  2010-06-21  1:25 [PATCH 0/3] writeback: tracing support Dave Chinner
                   ` (2 preceding siblings ...)
  2010-06-21  1:25 ` [PATCH 3/3] writeback: Add tracing to write_cache_pages Dave Chinner
@ 2010-06-24 12:12 ` Jens Axboe
  2010-06-24 22:16   ` Dave Chinner
  3 siblings, 1 reply; 12+ messages in thread
From: Jens Axboe @ 2010-06-24 12:12 UTC (permalink / raw)
  To: Dave Chinner; +Cc: linux-fsdevel

On 2010-06-21 03:25, Dave Chinner wrote:
> Hi Jens,
> 
> These are the writeback tracing patches that have not made it to
> mainline yet.  Can you pull these into your writeback tree for
> 2.6.36?

Will you send an updated and rediffed version against the
current patches? Just generate against for-2.6.36 of the block
git tree [1].

[1] git://git.kernel.dk/linux-2.6-block.git

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 0/3] writeback: tracing support
  2010-06-24 12:12 ` [PATCH 0/3] writeback: tracing support Jens Axboe
@ 2010-06-24 22:16   ` Dave Chinner
  0 siblings, 0 replies; 12+ messages in thread
From: Dave Chinner @ 2010-06-24 22:16 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-fsdevel

On Thu, Jun 24, 2010 at 02:12:27PM +0200, Jens Axboe wrote:
> On 2010-06-21 03:25, Dave Chinner wrote:
> > Hi Jens,
> > 
> > These are the writeback tracing patches that have not made it to
> > mainline yet.  Can you pull these into your writeback tree for
> > 2.6.36?
> 
> Will you send an updated and rediffed version against the
> current patches? Just generate against for-2.6.36 of the block
> git tree [1].
> 
> [1] git://git.kernel.dk/linux-2.6-block.git

Yes, will do.


Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 1/3] writeback: Initial tracing support
  2010-07-07  3:24 [PATCH 0/3] writeback: Tracing support V3 Dave Chinner
@ 2010-07-07  3:24 ` Dave Chinner
  0 siblings, 0 replies; 12+ messages in thread
From: Dave Chinner @ 2010-07-07  3:24 UTC (permalink / raw)
  To: axboe; +Cc: linux-fsdevel

Trace queue/sched/exec parts of the writeback loop. This provides
insight into when and why flusher threads are scheduled to run. e.g
a sync invocation leaves traces like:

     sync-[...]: writeback_queue: bdi 8:0: sb_dev 8:1 nr_pages=7712 sync_mode=0 kupdate=0 range_cyclic=0 background=0
flush-8:0-[...]: writeback_exec: bdi 8:0: sb_dev 8:1 nr_pages=7712 sync_mode=0 kupdate=0 range_cyclic=0 background=0

This also lays the foundation for adding more writeback tracing to
provide deeper insight into the whole writeback path.

The original tracing code is from Jens Axboe, though this version is
a rewrite as a result of the code being traced changing
significantly.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
 fs/fs-writeback.c                |   38 ++++++++++++---
 include/trace/events/writeback.h |   91 ++++++++++++++++++++++++++++++++++++++
 mm/backing-dev.c                 |    3 +
 3 files changed, 124 insertions(+), 8 deletions(-)
 create mode 100644 include/trace/events/writeback.h

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index c8471b3..73acab4 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -26,15 +26,9 @@
 #include <linux/blkdev.h>
 #include <linux/backing-dev.h>
 #include <linux/buffer_head.h>
+#include <linux/tracepoint.h>
 #include "internal.h"
 
-#define inode_to_bdi(inode)	((inode)->i_mapping->backing_dev_info)
-
-/*
- * We don't actually have pdflush, but this one is exported though /proc...
- */
-int nr_pdflush_threads;
-
 /*
  * Passed into wb_writeback(), essentially a subset of writeback_control
  */
@@ -50,6 +44,21 @@ struct wb_writeback_work {
 	struct completion *done;	/* set if the caller waits */
 };
 
+/*
+ * Include the creation of the trace points after defining the
+ * wb_writeback_work structure so that the definition remains local to this
+ * file.
+ */
+#define CREATE_TRACE_POINTS
+#include <trace/events/writeback.h>
+
+#define inode_to_bdi(inode)	((inode)->i_mapping->backing_dev_info)
+
+/*
+ * We don't actually have pdflush, but this one is exported though /proc...
+ */
+int nr_pdflush_threads;
+
 /**
  * writeback_in_progress - determine whether there is writeback in progress
  * @bdi: the device's backing_dev_info structure.
@@ -65,6 +74,8 @@ int writeback_in_progress(struct backing_dev_info *bdi)
 static void bdi_queue_work(struct backing_dev_info *bdi,
 		struct wb_writeback_work *work)
 {
+	trace_writeback_queue(bdi, work);
+
 	spin_lock(&bdi->wb_lock);
 	list_add_tail(&work->list, &bdi->work_list);
 	spin_unlock(&bdi->wb_lock);
@@ -74,6 +85,7 @@ static void bdi_queue_work(struct backing_dev_info *bdi,
 	 * it gets created and wakes up, we'll run this work.
 	 */
 	if (unlikely(!bdi->wb.task)) {
+		trace_writeback_nothread(bdi, work);
 		wake_up_process(default_backing_dev_info.wb.task);
 	} else {
 		struct bdi_writeback *wb = &bdi->wb;
@@ -95,8 +107,10 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
 	 */
 	work = kzalloc(sizeof(*work), GFP_ATOMIC);
 	if (!work) {
-		if (bdi->wb.task)
+		if (bdi->wb.task) {
+			trace_writeback_nowork(bdi);
 			wake_up_process(bdi->wb.task);
+		}
 		return;
 	}
 
@@ -751,6 +765,8 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
 		if (force_wait)
 			work->sync_mode = WB_SYNC_ALL;
 
+		trace_writeback_exec(bdi, work);
+
 		wrote += wb_writeback(wb, work);
 
 		/*
@@ -805,9 +821,13 @@ int bdi_writeback_thread(void *data)
 	smp_mb__after_clear_bit();
 	wake_up_bit(&bdi->state, BDI_pending);
 
+	trace_writeback_thread_start(bdi);
+
 	while (!kthread_should_stop()) {
 		pages_written = wb_do_writeback(wb, 0);
 
+		trace_writeback_pages_written(pages_written);
+
 		if (pages_written)
 			last_active = jiffies;
 		else if (wait_jiffies != -1UL) {
@@ -845,6 +865,8 @@ int bdi_writeback_thread(void *data)
 	 */
 	if (!list_empty(&bdi->work_list))
 		wb_do_writeback(wb, 1);
+
+	trace_writeback_thread_stop(bdi);
 	return 0;
 }
 
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
new file mode 100644
index 0000000..562fcae
--- /dev/null
+++ b/include/trace/events/writeback.h
@@ -0,0 +1,91 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM writeback
+
+#if !defined(_TRACE_WRITEBACK_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_WRITEBACK_H
+
+#include <linux/backing-dev.h>
+#include <linux/writeback.h>
+
+struct wb_writeback_work;
+
+DECLARE_EVENT_CLASS(writeback_work_class,
+	TP_PROTO(struct backing_dev_info *bdi, struct wb_writeback_work *work),
+	TP_ARGS(bdi, work),
+	TP_STRUCT__entry(
+		__array(char, name, 32)
+		__field(long, nr_pages)
+		__field(dev_t, sb_dev)
+		__field(int, sync_mode)
+		__field(int, for_kupdate)
+		__field(int, range_cyclic)
+		__field(int, for_background)
+	),
+	TP_fast_assign(
+		strncpy(__entry->name, dev_name(bdi->dev), 32);
+		__entry->nr_pages = work->nr_pages;
+		__entry->sb_dev = work->sb ? work->sb->s_dev : 0;
+		__entry->sync_mode = work->sync_mode;
+		__entry->for_kupdate = work->for_kupdate;
+		__entry->range_cyclic = work->range_cyclic;
+		__entry->for_background	= work->for_background;
+	),
+	TP_printk("bdi %s: sb_dev %d:%d nr_pages=%ld sync_mode=%d "
+		  "kupdate=%d range_cyclic=%d background=%d",
+		  __entry->name,
+		  MAJOR(__entry->sb_dev), MINOR(__entry->sb_dev),
+		  __entry->nr_pages,
+		  __entry->sync_mode,
+		  __entry->for_kupdate,
+		  __entry->range_cyclic,
+		  __entry->for_background
+	)
+);
+#define DEFINE_WRITEBACK_WORK_EVENT(name) \
+DEFINE_EVENT(writeback_work_class, name, \
+	TP_PROTO(struct backing_dev_info *bdi, struct wb_writeback_work *work), \
+	TP_ARGS(bdi, work))
+DEFINE_WRITEBACK_WORK_EVENT(writeback_nothread);
+DEFINE_WRITEBACK_WORK_EVENT(writeback_queue);
+DEFINE_WRITEBACK_WORK_EVENT(writeback_exec);
+
+TRACE_EVENT(writeback_pages_written,
+	TP_PROTO(long pages_written),
+	TP_ARGS(pages_written),
+	TP_STRUCT__entry(
+		__field(long,		pages)
+	),
+	TP_fast_assign(
+		__entry->pages		= pages_written;
+	),
+	TP_printk("%ld", __entry->pages)
+);
+
+DECLARE_EVENT_CLASS(writeback_class,
+	TP_PROTO(struct backing_dev_info *bdi),
+	TP_ARGS(bdi),
+	TP_STRUCT__entry(
+		__array(char, name, 32)
+	),
+	TP_fast_assign(
+		strncpy(__entry->name, dev_name(bdi->dev), 32);
+	),
+	TP_printk("bdi %s",
+		  __entry->name
+	)
+);
+#define DEFINE_WRITEBACK_EVENT(name) \
+DEFINE_EVENT(writeback_class, name, \
+	TP_PROTO(struct backing_dev_info *bdi), \
+	TP_ARGS(bdi))
+
+DEFINE_WRITEBACK_EVENT(writeback_nowork);
+DEFINE_WRITEBACK_EVENT(writeback_bdi_register);
+DEFINE_WRITEBACK_EVENT(writeback_bdi_unregister);
+DEFINE_WRITEBACK_EVENT(writeback_thread_start);
+DEFINE_WRITEBACK_EVENT(writeback_thread_stop);
+
+#endif /* _TRACE_WRITEBACK_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index bceac64..ac78a33 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -10,6 +10,7 @@
 #include <linux/module.h>
 #include <linux/writeback.h>
 #include <linux/device.h>
+#include <trace/events/writeback.h>
 
 static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
 
@@ -518,6 +519,7 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
 
 	bdi_debug_register(bdi, dev_name(dev));
 	set_bit(BDI_registered, &bdi->state);
+	trace_writeback_bdi_register(bdi);
 exit:
 	return ret;
 }
@@ -578,6 +580,7 @@ static void bdi_prune_sb(struct backing_dev_info *bdi)
 void bdi_unregister(struct backing_dev_info *bdi)
 {
 	if (bdi->dev) {
+		trace_writeback_bdi_unregister(bdi);
 		bdi_prune_sb(bdi);
 
 		if (!bdi_cap_flush_forker(bdi))
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/3] writeback: Initial tracing support
  2010-07-07  2:19   ` Christoph Hellwig
@ 2010-07-07  3:06     ` Dave Chinner
  0 siblings, 0 replies; 12+ messages in thread
From: Dave Chinner @ 2010-07-07  3:06 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: axboe, linux-fsdevel

On Tue, Jul 06, 2010 at 10:19:12PM -0400, Christoph Hellwig wrote:
> >  static void bdi_queue_work(struct backing_dev_info *bdi,
> >  		struct wb_writeback_work *work)
> >  {
> > +	trace_wb_sched_queue(bdi, work);
> 
> Okay, I'm getting more nitpicky with every repost, but the naming here
> seems odd.
> 
> First why the unreadle wb prefix instead of a proper writeback?
> And the sched in here seems rather useless, I'd just call it
> 
> 	trace_writeback_queue

Done.

> 
> > @@ -74,12 +85,16 @@ static void bdi_queue_work(struct backing_dev_info *bdi,
> >  	 * it gets created and wakes up, we'll run this work.
> >  	 */
> >  	if (unlikely(!bdi->wb.task)) {
> > +		trace_wb_sched_default(bdi, work);
> 
> 	trace_writeback_no_thread
> 
> >  		wake_up_process(default_backing_dev_info.wb.task);
> >  	} else {
> >  		struct bdi_writeback *wb = &bdi->wb;
> >  
> > -		if (wb->task)
> > +		if (wb->task) {
> > +			trace_wb_sched_task(bdi, work);
> 
> 	trace_writeback_wake
> 
> then again I don't think we actually need this, if we got the
> trace_writeback_queue event, and no trace_writeback_no_thread it's
> implicit that we got here.

OK, fixed.

> >  			wake_up_process(wb->task);
> > +		} else
> > +			trace_wb_sched_notask(bdi, work);
> 
> This case can't actually happen as we checked for the task a few lines
> above.

Right, my bad.

> Might be worth to throw in preparation patch in your series
> to  something like:
> 
> 
> 	if (unlikely(!bdi->wb.task)) {
> 		wake_up_process(default_backing_dev_info.wb.task);
> -	} else {
> -		struct bdi_writeback *wb = &bdi->wb;
> -
> -		if (wb->task)
> -			wake_up_process(wb->task);
> -	}
> +		return;
> +	}
> +
> +	wake_up_process(bdi->wb.task);
> 
> >  	}
> >  }

Perhaps. making the above changes means it's not necessary...

> >  
> > @@ -95,8 +110,10 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
> >  	 */
> >  	work = kzalloc(sizeof(*work), GFP_ATOMIC);
> >  	if (!work) {
> > -		if (bdi->wb.task)
> > +		if (bdi->wb.task) {
> > +			trace_wb_sched_nowork(bdi, work);
> 
> 	trace_writeback_nowork
> 
> > +	TP_fast_assign(
> > +		strncpy(__entry->name, dev_name(bdi->dev), 32);
> > +		__entry->nr_pages = work ? work->nr_pages : 0;
> > +		__entry->sb_dev = work ? work->sb ? work->sb->s_dev : 0 : 0;
> > +		__entry->sync_mode = work ? work->sync_mode : -1;
> > +		__entry->for_kupdate = work ? work->for_kupdate : -1;
> > +		__entry->range_cyclic = work ? work->range_cyclic : -1;
> > +		__entry->for_background	= work ? work->for_background : -1;
> > +	),
> 
> If you had a separate class for those tracepoints that do not have
> a work item the code would be a lot simpler.

Yup, fixed.

I'll post a new version in a short while.

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/3] writeback: Initial tracing support
  2010-07-06 23:41 ` [PATCH 1/3] writeback: Initial tracing support Dave Chinner
@ 2010-07-07  2:19   ` Christoph Hellwig
  2010-07-07  3:06     ` Dave Chinner
  0 siblings, 1 reply; 12+ messages in thread
From: Christoph Hellwig @ 2010-07-07  2:19 UTC (permalink / raw)
  To: Dave Chinner; +Cc: axboe, linux-fsdevel

>  static void bdi_queue_work(struct backing_dev_info *bdi,
>  		struct wb_writeback_work *work)
>  {
> +	trace_wb_sched_queue(bdi, work);

Okay, I'm getting more nitpicky with every repost, but the naming here
seems odd.

First why the unreadle wb prefix instead of a proper writeback?
And the sched in here seems rather useless, I'd just call it

	trace_writeback_queue

> @@ -74,12 +85,16 @@ static void bdi_queue_work(struct backing_dev_info *bdi,
>  	 * it gets created and wakes up, we'll run this work.
>  	 */
>  	if (unlikely(!bdi->wb.task)) {
> +		trace_wb_sched_default(bdi, work);

	trace_writeback_no_thread

>  		wake_up_process(default_backing_dev_info.wb.task);
>  	} else {
>  		struct bdi_writeback *wb = &bdi->wb;
>  
> -		if (wb->task)
> +		if (wb->task) {
> +			trace_wb_sched_task(bdi, work);

	trace_writeback_wake

then again I don't think we actually need this, if we got the
trace_writeback_queue event, and no trace_writeback_no_thread it's
implicit that we got here.

>  			wake_up_process(wb->task);
> +		} else
> +			trace_wb_sched_notask(bdi, work);

This case can't actually happen as we checked for the task a few lines
above.  Might be worth to throw in preparation patch in your series
to  something like:


	if (unlikely(!bdi->wb.task)) {
		wake_up_process(default_backing_dev_info.wb.task);
-	} else {
-		struct bdi_writeback *wb = &bdi->wb;
-
-		if (wb->task)
-			wake_up_process(wb->task);
-	}
+		return;
+	}
+
+	wake_up_process(bdi->wb.task);

>  	}
>  }
>  
> @@ -95,8 +110,10 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
>  	 */
>  	work = kzalloc(sizeof(*work), GFP_ATOMIC);
>  	if (!work) {
> -		if (bdi->wb.task)
> +		if (bdi->wb.task) {
> +			trace_wb_sched_nowork(bdi, work);

	trace_writeback_nowork

> +	TP_fast_assign(
> +		strncpy(__entry->name, dev_name(bdi->dev), 32);
> +		__entry->nr_pages = work ? work->nr_pages : 0;
> +		__entry->sb_dev = work ? work->sb ? work->sb->s_dev : 0 : 0;
> +		__entry->sync_mode = work ? work->sync_mode : -1;
> +		__entry->for_kupdate = work ? work->for_kupdate : -1;
> +		__entry->range_cyclic = work ? work->range_cyclic : -1;
> +		__entry->for_background	= work ? work->for_background : -1;
> +	),

If you had a separate class for those tracepoints that do not have
a work item the code would be a lot simpler.

> +DECLARE_EVENT_CLASS(wb_thread_class,
> +	TP_PROTO(struct backing_dev_info *bdi),
> +	TP_ARGS(bdi),
> +	TP_STRUCT__entry(
> +		__array(char, name, 32)
> +	),
> +	TP_fast_assign(
> +		strncpy(__entry->name, dev_name(bdi->dev), 32);
> +	),
> +	TP_printk("bdi %s",
> +		  __entry->name
> +	)

In fact you already have one.


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 1/3] writeback: Initial tracing support
  2010-07-06 23:41 [PATCH 0/3] writeback: Tracing support V2 Dave Chinner
@ 2010-07-06 23:41 ` Dave Chinner
  2010-07-07  2:19   ` Christoph Hellwig
  0 siblings, 1 reply; 12+ messages in thread
From: Dave Chinner @ 2010-07-06 23:41 UTC (permalink / raw)
  To: axboe; +Cc: linux-fsdevel

Trace queue/sched/exec parts of the writeback loop. This provides
insight into when and why flusher threads are scheduled to run. e.g
a sync invocation leaves traces like:

     sync-[...]: wb_sched_queue: bdi 8:0: sb_dev 8:1 nr_pages=7712 sync_mode=0 kupdate=0 range_cyclic=0 background=0
     sync-[...]: wb_sched_task: bdi 8:0: sb_dev 8:1 nr_pages=7712 sync_mode=0 kupdate=0 range_cyclic=0 background=0
flush-8:0-[...]: wb_sched_exec: bdi 8:0: sb_dev 8:1 nr_pages=7712 sync_mode=0 kupdate=0 range_cyclic=0 background=0

This also lays the foundation for adding more writeback tracing to
provide deeper insight into the whole writeback path.

The original tracing code is from Jens Axboe, though this version is
a rewrite as a result of the code being traced changing
significantly.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
 fs/fs-writeback.c                |   43 ++++++++++++++----
 include/trace/events/writeback.h |   93 ++++++++++++++++++++++++++++++++++++++
 mm/backing-dev.c                 |    3 +
 3 files changed, 130 insertions(+), 9 deletions(-)
 create mode 100644 include/trace/events/writeback.h

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index c8471b3..767898b 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -26,15 +26,9 @@
 #include <linux/blkdev.h>
 #include <linux/backing-dev.h>
 #include <linux/buffer_head.h>
+#include <linux/tracepoint.h>
 #include "internal.h"
 
-#define inode_to_bdi(inode)	((inode)->i_mapping->backing_dev_info)
-
-/*
- * We don't actually have pdflush, but this one is exported though /proc...
- */
-int nr_pdflush_threads;
-
 /*
  * Passed into wb_writeback(), essentially a subset of writeback_control
  */
@@ -50,6 +44,21 @@ struct wb_writeback_work {
 	struct completion *done;	/* set if the caller waits */
 };
 
+/*
+ * Include the creation of the trace points after defining the
+ * wb_writeback_work structure so that the definition remains local to this
+ * file.
+ */
+#define CREATE_TRACE_POINTS
+#include <trace/events/writeback.h>
+
+#define inode_to_bdi(inode)	((inode)->i_mapping->backing_dev_info)
+
+/*
+ * We don't actually have pdflush, but this one is exported though /proc...
+ */
+int nr_pdflush_threads;
+
 /**
  * writeback_in_progress - determine whether there is writeback in progress
  * @bdi: the device's backing_dev_info structure.
@@ -65,6 +74,8 @@ int writeback_in_progress(struct backing_dev_info *bdi)
 static void bdi_queue_work(struct backing_dev_info *bdi,
 		struct wb_writeback_work *work)
 {
+	trace_wb_sched_queue(bdi, work);
+
 	spin_lock(&bdi->wb_lock);
 	list_add_tail(&work->list, &bdi->work_list);
 	spin_unlock(&bdi->wb_lock);
@@ -74,12 +85,16 @@ static void bdi_queue_work(struct backing_dev_info *bdi,
 	 * it gets created and wakes up, we'll run this work.
 	 */
 	if (unlikely(!bdi->wb.task)) {
+		trace_wb_sched_default(bdi, work);
 		wake_up_process(default_backing_dev_info.wb.task);
 	} else {
 		struct bdi_writeback *wb = &bdi->wb;
 
-		if (wb->task)
+		if (wb->task) {
+			trace_wb_sched_task(bdi, work);
 			wake_up_process(wb->task);
+		} else
+			trace_wb_sched_notask(bdi, work);
 	}
 }
 
@@ -95,8 +110,10 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
 	 */
 	work = kzalloc(sizeof(*work), GFP_ATOMIC);
 	if (!work) {
-		if (bdi->wb.task)
+		if (bdi->wb.task) {
+			trace_wb_sched_nowork(bdi, work);
 			wake_up_process(bdi->wb.task);
+		}
 		return;
 	}
 
@@ -751,6 +768,8 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
 		if (force_wait)
 			work->sync_mode = WB_SYNC_ALL;
 
+		trace_wb_sched_exec(bdi, work);
+
 		wrote += wb_writeback(wb, work);
 
 		/*
@@ -805,9 +824,13 @@ int bdi_writeback_thread(void *data)
 	smp_mb__after_clear_bit();
 	wake_up_bit(&bdi->state, BDI_pending);
 
+	trace_wb_thread_start(bdi);
+
 	while (!kthread_should_stop()) {
 		pages_written = wb_do_writeback(wb, 0);
 
+		trace_writeback_pages_written(pages_written);
+
 		if (pages_written)
 			last_active = jiffies;
 		else if (wait_jiffies != -1UL) {
@@ -845,6 +868,8 @@ int bdi_writeback_thread(void *data)
 	 */
 	if (!list_empty(&bdi->work_list))
 		wb_do_writeback(wb, 1);
+
+	trace_wb_thread_stop(bdi);
 	return 0;
 }
 
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
new file mode 100644
index 0000000..d7b6f41
--- /dev/null
+++ b/include/trace/events/writeback.h
@@ -0,0 +1,93 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM writeback
+
+#if !defined(_TRACE_WRITEBACK_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_WRITEBACK_H
+
+#include <linux/backing-dev.h>
+#include <linux/writeback.h>
+
+struct wb_writeback_work;
+
+DECLARE_EVENT_CLASS(wb_sched_class,
+	TP_PROTO(struct backing_dev_info *bdi, struct wb_writeback_work *work),
+	TP_ARGS(bdi, work),
+	TP_STRUCT__entry(
+		__array(char, name, 32)
+		__field(long, nr_pages)
+		__field(dev_t, sb_dev)
+		__field(int, sync_mode)
+		__field(int, for_kupdate)
+		__field(int, range_cyclic)
+		__field(int, for_background)
+	),
+	TP_fast_assign(
+		strncpy(__entry->name, dev_name(bdi->dev), 32);
+		__entry->nr_pages = work ? work->nr_pages : 0;
+		__entry->sb_dev = work ? work->sb ? work->sb->s_dev : 0 : 0;
+		__entry->sync_mode = work ? work->sync_mode : -1;
+		__entry->for_kupdate = work ? work->for_kupdate : -1;
+		__entry->range_cyclic = work ? work->range_cyclic : -1;
+		__entry->for_background	= work ? work->for_background : -1;
+	),
+	TP_printk("bdi %s: sb_dev %d:%d nr_pages=%ld sync_mode=%d "
+		  "kupdate=%d range_cyclic=%d background=%d",
+		  __entry->name,
+		  MAJOR(__entry->sb_dev), MINOR(__entry->sb_dev),
+		  __entry->nr_pages,
+		  __entry->sync_mode,
+		  __entry->for_kupdate,
+		  __entry->range_cyclic,
+		  __entry->for_background
+	)
+);
+#define DEFINE_WB_SCHED_EVENT(name) \
+DEFINE_EVENT(wb_sched_class, name, \
+	TP_PROTO(struct backing_dev_info *bdi, struct wb_writeback_work *work), \
+	TP_ARGS(bdi, work))
+DEFINE_WB_SCHED_EVENT(wb_sched_default);
+DEFINE_WB_SCHED_EVENT(wb_sched_queue);
+DEFINE_WB_SCHED_EVENT(wb_sched_task);
+DEFINE_WB_SCHED_EVENT(wb_sched_notask);
+DEFINE_WB_SCHED_EVENT(wb_sched_nowork);
+DEFINE_WB_SCHED_EVENT(wb_sched_exec);
+
+TRACE_EVENT(writeback_pages_written,
+	TP_PROTO(long pages_written),
+	TP_ARGS(pages_written),
+	TP_STRUCT__entry(
+		__field(long,		pages)
+	),
+	TP_fast_assign(
+		__entry->pages		= pages_written;
+	),
+	TP_printk("%ld", __entry->pages)
+);
+
+DECLARE_EVENT_CLASS(wb_thread_class,
+	TP_PROTO(struct backing_dev_info *bdi),
+	TP_ARGS(bdi),
+	TP_STRUCT__entry(
+		__array(char, name, 32)
+	),
+	TP_fast_assign(
+		strncpy(__entry->name, dev_name(bdi->dev), 32);
+	),
+	TP_printk("bdi %s",
+		  __entry->name
+	)
+);
+#define DEFINE_WB_THREAD_EVENT(name) \
+DEFINE_EVENT(wb_thread_class, name, \
+	TP_PROTO(struct backing_dev_info *bdi), \
+	TP_ARGS(bdi))
+
+DEFINE_WB_THREAD_EVENT(wb_bdi_register);
+DEFINE_WB_THREAD_EVENT(wb_bdi_unregister);
+DEFINE_WB_THREAD_EVENT(wb_thread_start);
+DEFINE_WB_THREAD_EVENT(wb_thread_stop);
+
+#endif /* _TRACE_WRITEBACK_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index bceac64..e0411a6 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -10,6 +10,7 @@
 #include <linux/module.h>
 #include <linux/writeback.h>
 #include <linux/device.h>
+#include <trace/events/writeback.h>
 
 static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
 
@@ -518,6 +519,7 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
 
 	bdi_debug_register(bdi, dev_name(dev));
 	set_bit(BDI_registered, &bdi->state);
+	trace_wb_bdi_register(bdi);
 exit:
 	return ret;
 }
@@ -578,6 +580,7 @@ static void bdi_prune_sb(struct backing_dev_info *bdi)
 void bdi_unregister(struct backing_dev_info *bdi)
 {
 	if (bdi->dev) {
+		trace_wb_bdi_unregister(bdi);
 		bdi_prune_sb(bdi);
 
 		if (!bdi_cap_flush_forker(bdi))
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2010-07-07  3:24 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-06-21  1:25 [PATCH 0/3] writeback: tracing support Dave Chinner
2010-06-21  1:25 ` [PATCH 1/3] writeback: initial " Dave Chinner
2010-06-21  8:09   ` Christoph Hellwig
2010-06-21  1:25 ` [PATCH 2/3] writeback: Add tracing to balance_dirty_pages Dave Chinner
2010-06-21  8:12   ` Christoph Hellwig
2010-06-21  1:25 ` [PATCH 3/3] writeback: Add tracing to write_cache_pages Dave Chinner
2010-06-24 12:12 ` [PATCH 0/3] writeback: tracing support Jens Axboe
2010-06-24 22:16   ` Dave Chinner
2010-07-06 23:41 [PATCH 0/3] writeback: Tracing support V2 Dave Chinner
2010-07-06 23:41 ` [PATCH 1/3] writeback: Initial tracing support Dave Chinner
2010-07-07  2:19   ` Christoph Hellwig
2010-07-07  3:06     ` Dave Chinner
2010-07-07  3:24 [PATCH 0/3] writeback: Tracing support V3 Dave Chinner
2010-07-07  3:24 ` [PATCH 1/3] writeback: Initial tracing support Dave Chinner

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.