linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC PATCH] mm: trace filemap add and del
@ 2012-11-08 19:54 Robert Jarzmik
  2012-11-20 23:57 ` Andrew Morton
                   ` (2 more replies)
  0 siblings, 3 replies; 11+ messages in thread
From: Robert Jarzmik @ 2012-11-08 19:54 UTC (permalink / raw)
  To: linux-kernel; +Cc: Robert Jarzmik

Use the events API to trace filemap loading and
unloading of file pieces into the page cache.

This patch aims at tracing the eviction reload
cycle of executable and shared libraries pages in
a memory constrained environment.

The typical usage is to spot a specific device and
inode (for example /lib/libc.so) to see the eviction
cycles, and find out if frequently used code is
rather spread across many pages (bad) or coallesced
(good).

Signed-off-by: Robert Jarzmik <robert.jarzmik@free.fr>
---
 include/trace/events/filemap.h |   79 ++++++++++++++++++++++++++++++++++++++++
 mm/filemap.c                   |    5 +++
 2 files changed, 84 insertions(+)
 create mode 100644 include/trace/events/filemap.h

diff --git a/include/trace/events/filemap.h b/include/trace/events/filemap.h
new file mode 100644
index 0000000..a8319e2
--- /dev/null
+++ b/include/trace/events/filemap.h
@@ -0,0 +1,79 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM filemap
+
+#if !defined(_TRACE_FILEMAP_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_FILEMAP_H
+
+#include <linux/types.h>
+#include <linux/tracepoint.h>
+#include <linux/mm.h>
+#include <linux/memcontrol.h>
+#include <linux/device.h>
+#include <linux/kdev_t.h>
+
+TRACE_EVENT(mm_filemap_delete_from_page_cache,
+
+	TP_PROTO(struct page *page),
+
+	TP_ARGS(page),
+
+	TP_STRUCT__entry(
+		__field(struct page *, page)
+		__field(unsigned long, i_no)
+		__field(unsigned long, pageofs)
+		__field(dev_t, s_dev)
+	),
+
+	TP_fast_assign(
+		__entry->page = page;
+		__entry->i_no = page->mapping->host->i_ino;
+		__entry->pageofs = page->index;
+		if (page->mapping->host->i_sb)
+			__entry->s_dev = page->mapping->host->i_sb->s_dev;
+		else
+			__entry->s_dev = page->mapping->host->i_rdev;
+	),
+
+	TP_printk("page=%p pfn=%lu blk=%d:%d inode+ofs=%lu+%lu",
+		__entry->page,
+		page_to_pfn(__entry->page),
+		MAJOR(__entry->s_dev), MINOR(__entry->s_dev),
+		__entry->i_no,
+		__entry->pageofs << PAGE_SHIFT)
+);
+
+TRACE_EVENT(mm_filemap_add_to_page_cache,
+
+	TP_PROTO(struct page *page),
+
+	TP_ARGS(page),
+
+	TP_STRUCT__entry(
+		__field(struct page *, page)
+		__field(unsigned long, i_no)
+		__field(unsigned long, pageofs)
+		__field(dev_t, s_dev)
+	),
+
+	TP_fast_assign(
+		__entry->page = page;
+		__entry->i_no = page->mapping->host->i_ino;
+		__entry->pageofs = page->index;
+		if (page->mapping->host->i_sb)
+			__entry->s_dev = page->mapping->host->i_sb->s_dev;
+		else
+			__entry->s_dev = page->mapping->host->i_rdev;
+	),
+
+	TP_printk("page=%p pfn=%lu blk=%d:%d inode+ofs=%lu+%lu",
+		__entry->page,
+		page_to_pfn(__entry->page),
+		MAJOR(__entry->s_dev), MINOR(__entry->s_dev),
+		__entry->i_no,
+		__entry->pageofs)
+);
+
+#endif /* _TRACE_FILEMAP_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/mm/filemap.c b/mm/filemap.c
index 3843445..9753b7c 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -35,6 +35,9 @@
 #include <linux/cleancache.h>
 #include "internal.h"
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/filemap.h>
+
 /*
  * FIXME: remove all knowledge of the buffer layer from the core VM
  */
@@ -113,6 +116,7 @@ void __delete_from_page_cache(struct page *page)
 {
 	struct address_space *mapping = page->mapping;
 
+	trace_mm_filemap_delete_from_page_cache(page);
 	/*
 	 * if we're uptodate, flush out into the cleancache, otherwise
 	 * invalidate any existing cleancache entries.  We can't leave
@@ -467,6 +471,7 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
 		} else {
 			page->mapping = NULL;
 			/* Leave page->index set: truncation relies upon it */
+			trace_mm_filemap_add_to_page_cache(page);
 			spin_unlock_irq(&mapping->tree_lock);
 			mem_cgroup_uncharge_cache_page(page);
 			page_cache_release(page);
-- 
1.7.9.5


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [RFC PATCH] mm: trace filemap add and del
  2012-11-08 19:54 [RFC PATCH] mm: trace filemap add and del Robert Jarzmik
@ 2012-11-20 23:57 ` Andrew Morton
  2012-11-21  3:59   ` Dave Chinner
  2012-11-21 10:36   ` Robert Jarzmik
  2012-11-21  5:07 ` Hugh Dickins
  2012-11-23 14:10 ` [PATCH v2] " Robert Jarzmik
  2 siblings, 2 replies; 11+ messages in thread
From: Andrew Morton @ 2012-11-20 23:57 UTC (permalink / raw)
  To: Robert Jarzmik; +Cc: linux-kernel

On Thu,  8 Nov 2012 20:54:10 +0100
Robert Jarzmik <robert.jarzmik@free.fr> wrote:

> Use the events API to trace filemap loading and
> unloading of file pieces into the page cache.
> 
> This patch aims at tracing the eviction reload
> cycle of executable and shared libraries pages in
> a memory constrained environment.
> 
> The typical usage is to spot a specific device and
> inode (for example /lib/libc.so) to see the eviction
> cycles, and find out if frequently used code is
> rather spread across many pages (bad) or coallesced
> (good).

I suppose that could be useful.

>
> ...
>
> +TRACE_EVENT(mm_filemap_delete_from_page_cache,
> +
> +	TP_PROTO(struct page *page),
> +
> +	TP_ARGS(page),
> +
> +	TP_STRUCT__entry(
> +		__field(struct page *, page)
> +		__field(unsigned long, i_no)

May as well call this i_ino - there's little benefit in using a
different identifier.

> +		__field(unsigned long, pageofs)

"index".

> +		__field(dev_t, s_dev)

Perhaps use super_block.s_id here

> +	),
> +
> +	TP_fast_assign(
> +		__entry->page = page;
> +		__entry->i_no = page->mapping->host->i_ino;
> +		__entry->pageofs = page->index;
> +		if (page->mapping->host->i_sb)
> +			__entry->s_dev = page->mapping->host->i_sb->s_dev;
> +		else
> +			__entry->s_dev = page->mapping->host->i_rdev;

and hence avoid all this stuff.

> +	),
> +
> +	TP_printk("page=%p pfn=%lu blk=%d:%d inode+ofs=%lu+%lu",
> +		__entry->page,
> +		page_to_pfn(__entry->page),
> +		MAJOR(__entry->s_dev), MINOR(__entry->s_dev),
> +		__entry->i_no,
> +		__entry->pageofs << PAGE_SHIFT)
> +);
> +
> +TRACE_EVENT(mm_filemap_add_to_page_cache,

Dittoes.



^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC PATCH] mm: trace filemap add and del
  2012-11-20 23:57 ` Andrew Morton
@ 2012-11-21  3:59   ` Dave Chinner
  2012-11-21 10:38     ` Robert Jarzmik
  2012-11-22 11:51     ` Robert Jarzmik
  2012-11-21 10:36   ` Robert Jarzmik
  1 sibling, 2 replies; 11+ messages in thread
From: Dave Chinner @ 2012-11-21  3:59 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Robert Jarzmik, linux-kernel

On Tue, Nov 20, 2012 at 03:57:35PM -0800, Andrew Morton wrote:
> On Thu,  8 Nov 2012 20:54:10 +0100
> Robert Jarzmik <robert.jarzmik@free.fr> wrote:
.....
> > +		__field(dev_t, s_dev)
> 
> Perhaps use super_block.s_id here
> 
> > +	),
> > +
> > +	TP_fast_assign(
> > +		__entry->page = page;
> > +		__entry->i_no = page->mapping->host->i_ino;
> > +		__entry->pageofs = page->index;
> > +		if (page->mapping->host->i_sb)
> > +			__entry->s_dev = page->mapping->host->i_sb->s_dev;
> > +		else
> > +			__entry->s_dev = page->mapping->host->i_rdev;
> 
> and hence avoid all this stuff.

We actually have an informal convention for formating filesystem
trace events, and that is to use the device number....

> 
> > +	),
> > +
> > +	TP_printk("page=%p pfn=%lu blk=%d:%d inode+ofs=%lu+%lu",

... and to prefix messages like:

	TP_printk("dev %d:%d ino 0x%llx ....
		  MAJOR(__entry->dev), MINOR(__entry->dev),

i.e. the start of the event message has all the identifying
information where it is easy to grep for and get all the events for
a specific dev/inode combination without even having to think about
it.

XFS, ext3/4, jbd/jdb2 and gfs2 follow this convention, so we should
keep propagating that pattern in the name of consistency, rather
than having different trace formats for different parts of the
VFS/FS layers...

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC PATCH] mm: trace filemap add and del
  2012-11-08 19:54 [RFC PATCH] mm: trace filemap add and del Robert Jarzmik
  2012-11-20 23:57 ` Andrew Morton
@ 2012-11-21  5:07 ` Hugh Dickins
  2012-11-21 10:41   ` Robert Jarzmik
  2012-11-23 14:10 ` [PATCH v2] " Robert Jarzmik
  2 siblings, 1 reply; 11+ messages in thread
From: Hugh Dickins @ 2012-11-21  5:07 UTC (permalink / raw)
  To: Robert Jarzmik; +Cc: linux-kernel

On Thu, 8 Nov 2012, Robert Jarzmik wrote:
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -467,6 +471,7 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
>  		} else {
>  			page->mapping = NULL;
>  			/* Leave page->index set: truncation relies upon it */
> +			trace_mm_filemap_add_to_page_cache(page);
>  			spin_unlock_irq(&mapping->tree_lock);
>  			mem_cgroup_uncharge_cache_page(page);
>  			page_cache_release(page);

I doubt if you really want your tracepoint sited just in this error path.

Hugh

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC PATCH] mm: trace filemap add and del
  2012-11-20 23:57 ` Andrew Morton
  2012-11-21  3:59   ` Dave Chinner
@ 2012-11-21 10:36   ` Robert Jarzmik
  1 sibling, 0 replies; 11+ messages in thread
From: Robert Jarzmik @ 2012-11-21 10:36 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel

Andrew Morton <akpm@linux-foundation.org> writes:
>> +	TP_STRUCT__entry(
>> +		__field(struct page *, page)
>> +		__field(unsigned long, i_no)
>
> May as well call this i_ino - there's little benefit in using a
> different identifier.
Agreed for patch V2.
>
>> +		__field(unsigned long, pageofs)
>
> "index".
Agreed for patch V2.

>
>> +		__field(dev_t, s_dev)
>
> Perhaps use super_block.s_id here
If you imply by that that dereferencing page->mapping->host->i_sb and looking
for field s_dev, then I don't agree. Sometimes i_sb is NULL from what I recall,
especially in cases where the read page is from a journaling partition.

So unless I didn't understand you, I'll keep this part as well as the following,
to cover :
 - a mapping of an actual file
 - a mapping of a partition block

>
>> +	),
>> +
>> +	TP_fast_assign(
>> +		__entry->page = page;
>> +		__entry->i_no = page->mapping->host->i_ino;
>> +		__entry->pageofs = page->index;
>> +		if (page->mapping->host->i_sb)
>> +			__entry->s_dev = page->mapping->host->i_sb->s_dev;
>> +		else
>> +			__entry->s_dev = page->mapping->host->i_rdev;
>
> and hence avoid all this stuff.
See above.

>
>> +	),
>> +
>> +	TP_printk("page=%p pfn=%lu blk=%d:%d inode+ofs=%lu+%lu",
>> +		__entry->page,
>> +		page_to_pfn(__entry->page),
>> +		MAJOR(__entry->s_dev), MINOR(__entry->s_dev),
>> +		__entry->i_no,
>> +		__entry->pageofs << PAGE_SHIFT)
>> +);
>> +
>> +TRACE_EVENT(mm_filemap_add_to_page_cache,
>
Agreed for patch V2.

Thanks for the review.

-- 
Robert

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC PATCH] mm: trace filemap add and del
  2012-11-21  3:59   ` Dave Chinner
@ 2012-11-21 10:38     ` Robert Jarzmik
  2012-11-22 11:51     ` Robert Jarzmik
  1 sibling, 0 replies; 11+ messages in thread
From: Robert Jarzmik @ 2012-11-21 10:38 UTC (permalink / raw)
  To: Dave Chinner; +Cc: Andrew Morton, linux-kernel

Dave Chinner <david@fromorbit.com> writes:
> We actually have an informal convention for formating filesystem
> trace events, and that is to use the device number....
>
>> 
>> > +	),
>> > +
>> > +	TP_printk("page=%p pfn=%lu blk=%d:%d inode+ofs=%lu+%lu",
>
> ... and to prefix messages like:
>
> 	TP_printk("dev %d:%d ino 0x%llx ....
> 		  MAJOR(__entry->dev), MINOR(__entry->dev),
Right, it's sensible. I'll include that for patch V2.

> XFS, ext3/4, jbd/jdb2 and gfs2 follow this convention, so we should
> keep propagating that pattern in the name of consistency, rather
> than having different trace formats for different parts of the
> VFS/FS layers...
Very true.

Cheers.

-- 
Robert

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC PATCH] mm: trace filemap add and del
  2012-11-21  5:07 ` Hugh Dickins
@ 2012-11-21 10:41   ` Robert Jarzmik
  0 siblings, 0 replies; 11+ messages in thread
From: Robert Jarzmik @ 2012-11-21 10:41 UTC (permalink / raw)
  To: Hugh Dickins; +Cc: linux-kernel

Hugh Dickins <hughd@google.com> writes:

> On Thu, 8 Nov 2012, Robert Jarzmik wrote:
>> --- a/mm/filemap.c
>> +++ b/mm/filemap.c
>> @@ -467,6 +471,7 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
>>  		} else {
>>  			page->mapping = NULL;
>>  			/* Leave page->index set: truncation relies upon it */
>> +			trace_mm_filemap_add_to_page_cache(page);
>>  			spin_unlock_irq(&mapping->tree_lock);
>>  			mem_cgroup_uncharge_cache_page(page);
>>  			page_cache_release(page);
>
> I doubt if you really want your tracepoint sited just in this error path.

Urghh ... a git rebase mystified me.
In my original code, that tracepoint was 5 lines above, before the previous
spin_unlock_irq(), in the if branch, not the else branch.

Well spotted, I'll fix that for patch V2.

Cheers.

-- 
Robert

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC PATCH] mm: trace filemap add and del
  2012-11-21  3:59   ` Dave Chinner
  2012-11-21 10:38     ` Robert Jarzmik
@ 2012-11-22 11:51     ` Robert Jarzmik
  2012-11-23  0:15       ` Dave Chinner
  1 sibling, 1 reply; 11+ messages in thread
From: Robert Jarzmik @ 2012-11-22 11:51 UTC (permalink / raw)
  To: Dave Chinner; +Cc: Andrew Morton, linux-kernel

Dave Chinner <david@fromorbit.com> writes:

> We actually have an informal convention for formating filesystem
> trace events, and that is to use the device number....
>
>> 
>> > +	),
>> > +
>> > +	TP_printk("page=%p pfn=%lu blk=%d:%d inode+ofs=%lu+%lu",
>
> ... and to prefix messages like:
>
> 	TP_printk("dev %d:%d ino 0x%llx ....
> 		  MAJOR(__entry->dev), MINOR(__entry->dev),
>
> i.e. the start of the event message has all the identifying
> information where it is easy to grep for and get all the events for
> a specific dev/inode combination without even having to think about
> it.

I cross-checked your proposition.
The "ino 0x%llx" looks wrong to me, because :
 - i_ino is "unsigned long", not "(unsigned) long long"

 - triggers a printk where "ino" looks really awfull (on a 32bits LE arm)
>  mm_filemap_add_to_page_cache: dev 0:2 ino 0xc05186e000000000 page=000a0737
>  pfn=0 ofs=3283861504

 - why print the inode number in hexadecimal format ???
   Doing a "ls -i" returns decimal format, "debugfs" returns decimal. What is
   the rational behind hexadecimal ?

I'd rather have : "dev %d:%d ino %lu page=0x%p pfn=%lu ofs=%lu".

-- 
Robert

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC PATCH] mm: trace filemap add and del
  2012-11-22 11:51     ` Robert Jarzmik
@ 2012-11-23  0:15       ` Dave Chinner
  0 siblings, 0 replies; 11+ messages in thread
From: Dave Chinner @ 2012-11-23  0:15 UTC (permalink / raw)
  To: Robert Jarzmik; +Cc: Andrew Morton, linux-kernel

On Thu, Nov 22, 2012 at 12:51:00PM +0100, Robert Jarzmik wrote:
> Dave Chinner <david@fromorbit.com> writes:
> 
> > We actually have an informal convention for formating filesystem
> > trace events, and that is to use the device number....
> >
> >> 
> >> > +	),
> >> > +
> >> > +	TP_printk("page=%p pfn=%lu blk=%d:%d inode+ofs=%lu+%lu",
> >
> > ... and to prefix messages like:
> >
> > 	TP_printk("dev %d:%d ino 0x%llx ....
> > 		  MAJOR(__entry->dev), MINOR(__entry->dev),
> >
> > i.e. the start of the event message has all the identifying
> > information where it is easy to grep for and get all the events for
> > a specific dev/inode combination without even having to think about
> > it.
> 
> I cross-checked your proposition.
> The "ino 0x%llx" looks wrong to me, because :
>  - i_ino is "unsigned long", not "(unsigned) long long"

I just copied it from the XFS code, where the inode number is always
64 bits, even on 32 bit platforms.

>  - triggers a printk where "ino" looks really awfull (on a 32bits LE arm)
> >  mm_filemap_add_to_page_cache: dev 0:2 ino 0xc05186e000000000 page=000a0737
> >  pfn=0 ofs=3283861504

Then use the correct format specifier for a long. i.e. 0x%lx....

>  - why print the inode number in hexadecimal format ???

Because they are fair easier to parse and correlate by eye than
large decimal numbers.  Hexadecimal is frequently used because
boundaries in kernel and filesystem code typically fall on
powers-of-2 rather than powers-of-ten and hexadecimal makes it simple
to see those power-of-2 boundaries. Decimal tends to obfuscate
those boundaries, especially for large numbers....

e.g. with XFS inode numbers I can convert the inode number in
hex directly to it's location on disk in my head. e.g. decimal inode
numbers 137438980146 and 206158719668 don't look to have any real
meaning - they just look like large numbers. Convert them to hex and
you get 0x2000006802 and 0x3000046a14 repsectively.

For me, pattern recognition kicks in immediately and tells me that
the first inode is inode #2 at block offset 0x6800 in AG 32, and the
second is inode #20 at block 0x46a00 in AG 48, and I can immediately
correlate that to the filesystem geometry and storage layout and
know physically how far apart those inodes are.

IOWs, the inode numbers in hex are way more meaningful than in
decimal, and when I'm looking for needles in a haystack of
multi-gigabyte event traces (that's something I do every day), not
having to think about the relationship between two inodes is
extremely important....

>    Doing a "ls -i" returns decimal format, "debugfs" returns decimal. What is
>    the rational behind hexadecimal ?
> 
> I'd rather have : "dev %d:%d ino %lu page=0x%p pfn=%lu ofs=%lu".

Tracing is for debugging, therefore the format should be determined
by what makes life easier for those that are using the event traces
every day for debugging. Hence for anything that has defined power
of 2 boundaries like inode numbers, file offsets in the page cache,
etc, hexadecimal is much more meaningful to a programmer for
debugging purposes....

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v2] mm: trace filemap add and del
  2012-11-08 19:54 [RFC PATCH] mm: trace filemap add and del Robert Jarzmik
  2012-11-20 23:57 ` Andrew Morton
  2012-11-21  5:07 ` Hugh Dickins
@ 2012-11-23 14:10 ` Robert Jarzmik
  2012-11-23 23:34   ` [PATCH v3] " Robert Jarzmik
  2 siblings, 1 reply; 11+ messages in thread
From: Robert Jarzmik @ 2012-11-23 14:10 UTC (permalink / raw)
  To: Andrew Morton, Dave Chinner, Hugh Dickins
  Cc: linux-mm, linux-kernel, Robert Jarzmik

Use the events API to trace filemap loading and
unloading of file pieces into the page cache.

This patch aims at tracing the eviction reload
cycle of executable and shared libraries pages in
a memory constrained environment.

The typical usage is to spot a specific device and
inode (for example /lib/libc.so) to see the eviction
cycles, and find out if frequently used code is
rather spread across many pages (bad) or coallesced
(good).

Signed-off-by: Robert Jarzmik <robert.jarzmik@free.fr>
---
Since V1:
 - included Andrew's comments
 - included Dave's comment
 - fixed according to Hugh's comment
---
 include/trace/events/filemap.h |   79 ++++++++++++++++++++++++++++++++++++++++
 mm/filemap.c                   |    5 +++
 2 files changed, 84 insertions(+)
 create mode 100644 include/trace/events/filemap.h

diff --git a/include/trace/events/filemap.h b/include/trace/events/filemap.h
new file mode 100644
index 0000000..529b80d
--- /dev/null
+++ b/include/trace/events/filemap.h
@@ -0,0 +1,79 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM filemap
+
+#if !defined(_TRACE_FILEMAP_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_FILEMAP_H
+
+#include <linux/types.h>
+#include <linux/tracepoint.h>
+#include <linux/mm.h>
+#include <linux/memcontrol.h>
+#include <linux/device.h>
+#include <linux/kdev_t.h>
+
+TRACE_EVENT(mm_filemap_delete_from_page_cache,
+
+	TP_PROTO(struct page *page),
+
+	TP_ARGS(page),
+
+	TP_STRUCT__entry(
+		__field(struct page *, page)
+		__field(unsigned long, i_ino)
+		__field(unsigned long, index)
+		__field(dev_t, s_dev)
+	),
+
+	TP_fast_assign(
+		__entry->page = page;
+		__entry->i_ino = page->mapping->host->i_ino;
+		__entry->index = page->index;
+		if (page->mapping->host->i_sb)
+			__entry->s_dev = page->mapping->host->i_sb->s_dev;
+		else
+			__entry->s_dev = page->mapping->host->i_rdev;
+	),
+
+	TP_printk("dev %d:%d ino %lu page=%p pfn=%lu ofs=%lu",
+		MAJOR(__entry->s_dev), MINOR(__entry->s_dev),
+		__entry->i_ino,
+		__entry->page,
+		page_to_pfn(__entry->page),
+		__entry->index << PAGE_SHIFT)
+);
+
+TRACE_EVENT(mm_filemap_add_to_page_cache,
+
+	TP_PROTO(struct page *page),
+
+	TP_ARGS(page),
+
+	TP_STRUCT__entry(
+		__field(struct page *, page)
+		__field(unsigned long, i_ino)
+		__field(unsigned long, index)
+		__field(dev_t, s_dev)
+	),
+
+	TP_fast_assign(
+		__entry->page = page;
+		__entry->i_ino = page->mapping->host->i_ino;
+		__entry->index = page->index;
+		if (page->mapping->host->i_sb)
+			__entry->s_dev = page->mapping->host->i_sb->s_dev;
+		else
+			__entry->s_dev = page->mapping->host->i_rdev;
+	),
+
+	TP_printk("dev %d:%d ino %lu page=%p pfn=%lu ofs=%lu",
+		MAJOR(__entry->s_dev), MINOR(__entry->s_dev),
+		__entry->i_ino,
+		__entry->page,
+		page_to_pfn(__entry->page),
+		__entry->index << PAGE_SHIFT)
+);
+
+#endif /* _TRACE_FILEMAP_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/mm/filemap.c b/mm/filemap.c
index 83efee7..1ee7cf6 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -35,6 +35,9 @@
 #include <linux/cleancache.h>
 #include "internal.h"
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/filemap.h>
+
 /*
  * FIXME: remove all knowledge of the buffer layer from the core VM
  */
@@ -113,6 +116,7 @@ void __delete_from_page_cache(struct page *page)
 {
 	struct address_space *mapping = page->mapping;
 
+	trace_mm_filemap_delete_from_page_cache(page);
 	/*
 	 * if we're uptodate, flush out into the cleancache, otherwise
 	 * invalidate any existing cleancache entries.  We can't leave
@@ -463,6 +467,7 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
 		if (likely(!error)) {
 			mapping->nrpages++;
 			__inc_zone_page_state(page, NR_FILE_PAGES);
+			trace_mm_filemap_add_to_page_cache(page);
 			spin_unlock_irq(&mapping->tree_lock);
 		} else {
 			page->mapping = NULL;
-- 
1.7.10.4


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH v3] mm: trace filemap add and del
  2012-11-23 14:10 ` [PATCH v2] " Robert Jarzmik
@ 2012-11-23 23:34   ` Robert Jarzmik
  0 siblings, 0 replies; 11+ messages in thread
From: Robert Jarzmik @ 2012-11-23 23:34 UTC (permalink / raw)
  To: Andrew Morton, Dave Chinner, Hugh Dickins
  Cc: linux-mm, linux-kernel, Robert Jarzmik

Use the events API to trace filemap loading and
unloading of file pieces into the page cache.

This patch aims at tracing the eviction reload
cycle of executable and shared libraries pages in
a memory constrained environment.

The typical usage is to spot a specific device and
inode (for example /lib/libc.so) to see the eviction
cycles, and find out if frequently used code is
rather spread across many pages (bad) or coallesced
(good).

Signed-off-by: Robert Jarzmik <robert.jarzmik@free.fr>
---
Since V1:
 - included Andrew's comments
 - included Dave's comment
 - fixed according to Hugh's comment
Since V2:
 - amended inode print to hexadecimal format
---
 include/trace/events/filemap.h |   79 ++++++++++++++++++++++++++++++++++++++++
 mm/filemap.c                   |    5 +++
 2 files changed, 84 insertions(+)
 create mode 100644 include/trace/events/filemap.h

diff --git a/include/trace/events/filemap.h b/include/trace/events/filemap.h
new file mode 100644
index 0000000..2d36386
--- /dev/null
+++ b/include/trace/events/filemap.h
@@ -0,0 +1,79 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM filemap
+
+#if !defined(_TRACE_FILEMAP_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_FILEMAP_H
+
+#include <linux/types.h>
+#include <linux/tracepoint.h>
+#include <linux/mm.h>
+#include <linux/memcontrol.h>
+#include <linux/device.h>
+#include <linux/kdev_t.h>
+
+TRACE_EVENT(mm_filemap_delete_from_page_cache,
+
+	TP_PROTO(struct page *page),
+
+	TP_ARGS(page),
+
+	TP_STRUCT__entry(
+		__field(struct page *, page)
+		__field(unsigned long, i_ino)
+		__field(unsigned long, index)
+		__field(dev_t, s_dev)
+	),
+
+	TP_fast_assign(
+		__entry->page = page;
+		__entry->i_ino = page->mapping->host->i_ino;
+		__entry->index = page->index;
+		if (page->mapping->host->i_sb)
+			__entry->s_dev = page->mapping->host->i_sb->s_dev;
+		else
+			__entry->s_dev = page->mapping->host->i_rdev;
+	),
+
+	TP_printk("dev %d:%d ino %lx page=%p pfn=%lu ofs=%lu",
+		MAJOR(__entry->s_dev), MINOR(__entry->s_dev),
+		__entry->i_ino,
+		__entry->page,
+		page_to_pfn(__entry->page),
+		__entry->index << PAGE_SHIFT)
+);
+
+TRACE_EVENT(mm_filemap_add_to_page_cache,
+
+	TP_PROTO(struct page *page),
+
+	TP_ARGS(page),
+
+	TP_STRUCT__entry(
+		__field(struct page *, page)
+		__field(unsigned long, i_ino)
+		__field(unsigned long, index)
+		__field(dev_t, s_dev)
+	),
+
+	TP_fast_assign(
+		__entry->page = page;
+		__entry->i_ino = page->mapping->host->i_ino;
+		__entry->index = page->index;
+		if (page->mapping->host->i_sb)
+			__entry->s_dev = page->mapping->host->i_sb->s_dev;
+		else
+			__entry->s_dev = page->mapping->host->i_rdev;
+	),
+
+	TP_printk("dev %d:%d ino %lx page=%p pfn=%lu ofs=%lu",
+		MAJOR(__entry->s_dev), MINOR(__entry->s_dev),
+		__entry->i_ino,
+		__entry->page,
+		page_to_pfn(__entry->page),
+		__entry->index << PAGE_SHIFT)
+);
+
+#endif /* _TRACE_FILEMAP_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/mm/filemap.c b/mm/filemap.c
index 83efee7..1ee7cf6 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -35,6 +35,9 @@
 #include <linux/cleancache.h>
 #include "internal.h"
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/filemap.h>
+
 /*
  * FIXME: remove all knowledge of the buffer layer from the core VM
  */
@@ -113,6 +116,7 @@ void __delete_from_page_cache(struct page *page)
 {
 	struct address_space *mapping = page->mapping;
 
+	trace_mm_filemap_delete_from_page_cache(page);
 	/*
 	 * if we're uptodate, flush out into the cleancache, otherwise
 	 * invalidate any existing cleancache entries.  We can't leave
@@ -463,6 +467,7 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
 		if (likely(!error)) {
 			mapping->nrpages++;
 			__inc_zone_page_state(page, NR_FILE_PAGES);
+			trace_mm_filemap_add_to_page_cache(page);
 			spin_unlock_irq(&mapping->tree_lock);
 		} else {
 			page->mapping = NULL;
-- 
1.7.10.4


^ permalink raw reply related	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2012-11-23 23:34 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-11-08 19:54 [RFC PATCH] mm: trace filemap add and del Robert Jarzmik
2012-11-20 23:57 ` Andrew Morton
2012-11-21  3:59   ` Dave Chinner
2012-11-21 10:38     ` Robert Jarzmik
2012-11-22 11:51     ` Robert Jarzmik
2012-11-23  0:15       ` Dave Chinner
2012-11-21 10:36   ` Robert Jarzmik
2012-11-21  5:07 ` Hugh Dickins
2012-11-21 10:41   ` Robert Jarzmik
2012-11-23 14:10 ` [PATCH v2] " Robert Jarzmik
2012-11-23 23:34   ` [PATCH v3] " Robert Jarzmik

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).