All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] Add the pagefault count into memcg stats.
@ 2011-03-27  0:14 Ying Han
  2011-03-28  0:07 ` KAMEZAWA Hiroyuki
  2011-03-29 15:30 ` Balbir Singh
  0 siblings, 2 replies; 5+ messages in thread
From: Ying Han @ 2011-03-27  0:14 UTC (permalink / raw)
  To: Balbir Singh, Daisuke Nishimura, KAMEZAWA Hiroyuki, Hugh Dickins,
	Tejun Heo, Mark Brown, Andrew Morton, Greg Thelen,
	Suleiman Souhlal
  Cc: linux-mm

Two new stats in per-memcg memory.stat which tracks the number of
page faults and number of major page faults.

"pgfault"
"pgmajfault"

It is valuable to track the two stats for both measuring application's
performance as well as the efficiency of the kernel page reclaim path.

Functional test: check the total number of pgfault/pgmajfault of all
memcgs and compare with global vmstat value:

$ cat /proc/vmstat | grep fault
pgfault 1070751
pgmajfault 553

$ cat /dev/cgroup/memory.stat | grep fault
pgfault 1069962
pgmajfault 553
total_pgfault 1069966
total_pgmajfault 553

$ cat /dev/cgroup/A/memory.stat | grep fault
pgfault 199
pgmajfault 0
total_pgfault 199
total_pgmajfault 0

Performance test: run page fault test(pft) wit 16 thread on faulting in 15G
anon pages in 16G container. There is no regression noticed on the "flt/cpu/s"

Sample output from pft:
TAG pft:anon-sys-default:
  Gb  Thr CLine   User     System     Wall    flt/cpu/s fault/wsec
  15   16   1     0.67s   232.11s    14.68s   16892.130 267796.518

$ ./ministat mmotm.txt mmotm_fault.txt
x mmotm.txt (w/o patch)
+ mmotm_fault.txt (w/ patch)
+-------------------------------------------------------------------------+
    N           Min           Max        Median           Avg        Stddev
x  10     16682.962     17344.027     16913.524     16928.812      166.5362
+  10      16696.49      17480.09     16949.143     16951.448     223.56288
No difference proven at 95.0% confidence

Signed-off-by: Ying Han <yinghan@google.com>
---
 Documentation/cgroups/memory.txt |    4 +++
 fs/ncpfs/mmap.c                  |    2 +
 include/linux/memcontrol.h       |   22 +++++++++++++++
 mm/filemap.c                     |    1 +
 mm/memcontrol.c                  |   54 ++++++++++++++++++++++++++++++++++++++
 mm/memory.c                      |    2 +
 mm/shmem.c                       |    1 +
 7 files changed, 86 insertions(+), 0 deletions(-)

diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt
index b6ed61c..2db6103 100644
--- a/Documentation/cgroups/memory.txt
+++ b/Documentation/cgroups/memory.txt
@@ -385,6 +385,8 @@ mapped_file	- # of bytes of mapped file (includes tmpfs/shmem)
 pgpgin		- # of pages paged in (equivalent to # of charging events).
 pgpgout		- # of pages paged out (equivalent to # of uncharging events).
 swap		- # of bytes of swap usage
+pgfault		- # of page faults.
+pgmajfault	- # of major page faults.
 inactive_anon	- # of bytes of anonymous memory and swap cache memory on
 		LRU list.
 active_anon	- # of bytes of anonymous and swap cache memory on active
@@ -406,6 +408,8 @@ total_mapped_file	- sum of all children's "cache"
 total_pgpgin		- sum of all children's "pgpgin"
 total_pgpgout		- sum of all children's "pgpgout"
 total_swap		- sum of all children's "swap"
+total_pgfault		- sum of all children's "pgfault"
+total_pgmajfault	- sum of all children's "pgmajfault"
 total_inactive_anon	- sum of all children's "inactive_anon"
 total_active_anon	- sum of all children's "active_anon"
 total_inactive_file	- sum of all children's "inactive_file"
diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c
index a7c07b4..adb3f45 100644
--- a/fs/ncpfs/mmap.c
+++ b/fs/ncpfs/mmap.c
@@ -16,6 +16,7 @@
 #include <linux/mman.h>
 #include <linux/string.h>
 #include <linux/fcntl.h>
+#include <linux/memcontrol.h>
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
@@ -92,6 +93,7 @@ static int ncp_file_mmap_fault(struct vm_area_struct *area,
 	 * -- wli
 	 */
 	count_vm_event(PGMAJFAULT);
+	mem_cgroup_pgmajfault_from_mm(area->vm_mm);
 	return VM_FAULT_MAJOR;
 }
 
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 5a5ce70..f771fc1 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -147,6 +147,11 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
 						gfp_t gfp_mask);
 u64 mem_cgroup_get_limit(struct mem_cgroup *mem);
 
+void mem_cgroup_pgfault(struct mem_cgroup *memcg, int val);
+void mem_cgroup_pgmajfault(struct mem_cgroup *memcg, int val);
+void mem_cgroup_pgfault_from_mm(struct mm_struct *mm);
+void mem_cgroup_pgmajfault_from_mm(struct mm_struct *mm);
+
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 void mem_cgroup_split_huge_fixup(struct page *head, struct page *tail);
 #endif
@@ -354,6 +359,23 @@ static inline void mem_cgroup_split_huge_fixup(struct page *head,
 {
 }
 
+static inline void mem_cgroup_pgfault(struct mem_cgroup *memcg,
+				      int val)
+{
+}
+
+static inline void mem_cgroup_pgmajfault(struct mem_cgroup *memcg,
+					 int val)
+{
+}
+
+static inline void mem_cgroup_pgfault_from_mm(struct mm_struct *mm)
+{
+}
+
+static inline void mem_cgroup_pgmajfault_from_mm(struct mm_struct *mm)
+{
+}
 #endif /* CONFIG_CGROUP_MEM_CONT */
 
 #if !defined(CONFIG_CGROUP_MEM_RES_CTLR) || !defined(CONFIG_DEBUG_VM)
diff --git a/mm/filemap.c b/mm/filemap.c
index a6cfecf..5dc5401 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1683,6 +1683,7 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 		/* No page in the page cache at all */
 		do_sync_mmap_readahead(vma, ra, file, offset);
 		count_vm_event(PGMAJFAULT);
+		mem_cgroup_pgmajfault_from_mm(vma->vm_mm);
 		ret = VM_FAULT_MAJOR;
 retry_find:
 		page = find_get_page(mapping, offset);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 4407dd0..63d66f1 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -94,6 +94,8 @@ enum mem_cgroup_events_index {
 	MEM_CGROUP_EVENTS_PGPGIN,	/* # of pages paged in */
 	MEM_CGROUP_EVENTS_PGPGOUT,	/* # of pages paged out */
 	MEM_CGROUP_EVENTS_COUNT,	/* # of pages paged in/out */
+	MEM_CGROUP_EVENTS_PGFAULT,	/* # of page-faults */
+	MEM_CGROUP_EVENTS_PGMAJFAULT,	/* # of major page-faults */
 	MEM_CGROUP_EVENTS_NSTATS,
 };
 /*
@@ -585,6 +587,16 @@ static void mem_cgroup_swap_statistics(struct mem_cgroup *mem,
 	this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_SWAPOUT], val);
 }
 
+void mem_cgroup_pgfault(struct mem_cgroup *mem, int val)
+{
+	this_cpu_add(mem->stat->events[MEM_CGROUP_EVENTS_PGFAULT], val);
+}
+
+void mem_cgroup_pgmajfault(struct mem_cgroup *mem, int val)
+{
+	this_cpu_add(mem->stat->events[MEM_CGROUP_EVENTS_PGMAJFAULT], val);
+}
+
 static unsigned long mem_cgroup_read_events(struct mem_cgroup *mem,
 					    enum mem_cgroup_events_index idx)
 {
@@ -813,6 +825,40 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup *mem)
 	return (mem == root_mem_cgroup);
 }
 
+void mem_cgroup_pgfault_from_mm(struct mm_struct *mm)
+{
+	struct mem_cgroup *mem;
+
+	if (!mm)
+		return;
+
+	rcu_read_lock();
+	mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
+	if (unlikely(!mem))
+		goto out;
+	mem_cgroup_pgfault(mem, 1);
+
+out:
+	rcu_read_unlock();
+}
+
+void mem_cgroup_pgmajfault_from_mm(struct mm_struct *mm)
+{
+	struct mem_cgroup *mem;
+
+	if (!mm)
+		return;
+
+	rcu_read_lock();
+	mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
+	if (unlikely(!mem))
+		goto out;
+	mem_cgroup_pgmajfault(mem, 1);
+out:
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL(mem_cgroup_pgmajfault_from_mm);
+
 /*
  * Following LRU functions are allowed to be used without PCG_LOCK.
  * Operations are called by routine of global LRU independently from memcg.
@@ -3772,6 +3818,8 @@ enum {
 	MCS_PGPGIN,
 	MCS_PGPGOUT,
 	MCS_SWAP,
+	MCS_PGFAULT,
+	MCS_PGMAJFAULT,
 	MCS_INACTIVE_ANON,
 	MCS_ACTIVE_ANON,
 	MCS_INACTIVE_FILE,
@@ -3794,6 +3842,8 @@ struct {
 	{"pgpgin", "total_pgpgin"},
 	{"pgpgout", "total_pgpgout"},
 	{"swap", "total_swap"},
+	{"pgfault", "total_pgfault"},
+	{"pgmajfault", "total_pgmajfault"},
 	{"inactive_anon", "total_inactive_anon"},
 	{"active_anon", "total_active_anon"},
 	{"inactive_file", "total_inactive_file"},
@@ -3822,6 +3872,10 @@ mem_cgroup_get_local_stat(struct mem_cgroup *mem, struct mcs_total_stat *s)
 		val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_SWAPOUT);
 		s->stat[MCS_SWAP] += val * PAGE_SIZE;
 	}
+	val = mem_cgroup_read_events(mem, MEM_CGROUP_EVENTS_PGFAULT);
+	s->stat[MCS_PGFAULT] += val;
+	val = mem_cgroup_read_events(mem, MEM_CGROUP_EVENTS_PGMAJFAULT);
+	s->stat[MCS_PGMAJFAULT] += val;
 
 	/* per zone stat */
 	val = mem_cgroup_get_local_zonestat(mem, LRU_INACTIVE_ANON);
diff --git a/mm/memory.c b/mm/memory.c
index 8617d39..0f7ebc9 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2836,6 +2836,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		/* Had to read the page from swap area: Major fault */
 		ret = VM_FAULT_MAJOR;
 		count_vm_event(PGMAJFAULT);
+		mem_cgroup_pgmajfault_from_mm(mm);
 	} else if (PageHWPoison(page)) {
 		/*
 		 * hwpoisoned dirty swapcache pages are kept for killing
@@ -3375,6 +3376,7 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	__set_current_state(TASK_RUNNING);
 
 	count_vm_event(PGFAULT);
+	mem_cgroup_pgfault_from_mm(mm);
 
 	/* do counter updates before entering really critical section. */
 	check_sync_rss_stat(current);
diff --git a/mm/shmem.c b/mm/shmem.c
index ad8346b..5a82674 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1289,6 +1289,7 @@ repeat:
 			/* here we actually do the io */
 			if (type && !(*type & VM_FAULT_MAJOR)) {
 				__count_vm_event(PGMAJFAULT);
+				mem_cgroup_pgmajfault_from_mm(current->mm);
 				*type |= VM_FAULT_MAJOR;
 			}
 			spin_unlock(&info->lock);
-- 
1.7.3.1

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] Add the pagefault count into memcg stats.
  2011-03-27  0:14 [PATCH] Add the pagefault count into memcg stats Ying Han
@ 2011-03-28  0:07 ` KAMEZAWA Hiroyuki
  2011-03-28  3:16   ` Ying Han
  2011-03-29 15:30 ` Balbir Singh
  1 sibling, 1 reply; 5+ messages in thread
From: KAMEZAWA Hiroyuki @ 2011-03-28  0:07 UTC (permalink / raw)
  To: Ying Han
  Cc: Balbir Singh, Daisuke Nishimura, Hugh Dickins, Tejun Heo,
	Mark Brown, Andrew Morton, Greg Thelen, Suleiman Souhlal,
	linux-mm

On Sat, 26 Mar 2011 17:14:44 -0700
Ying Han <yinghan@google.com> wrote:

> Two new stats in per-memcg memory.stat which tracks the number of
> page faults and number of major page faults.
> 
> "pgfault"
> "pgmajfault"
> 
> It is valuable to track the two stats for both measuring application's
> performance as well as the efficiency of the kernel page reclaim path.
> 
> Functional test: check the total number of pgfault/pgmajfault of all
> memcgs and compare with global vmstat value:
> 
> $ cat /proc/vmstat | grep fault
> pgfault 1070751
> pgmajfault 553
> 
> $ cat /dev/cgroup/memory.stat | grep fault
> pgfault 1069962
> pgmajfault 553
> total_pgfault 1069966
> total_pgmajfault 553
> 
> $ cat /dev/cgroup/A/memory.stat | grep fault
> pgfault 199
> pgmajfault 0
> total_pgfault 199
> total_pgmajfault 0
> 
> Performance test: run page fault test(pft) wit 16 thread on faulting in 15G
> anon pages in 16G container. There is no regression noticed on the "flt/cpu/s"
> 
> Sample output from pft:
> TAG pft:anon-sys-default:
>   Gb  Thr CLine   User     System     Wall    flt/cpu/s fault/wsec
>   15   16   1     0.67s   232.11s    14.68s   16892.130 267796.518
> 
> $ ./ministat mmotm.txt mmotm_fault.txt
> x mmotm.txt (w/o patch)
> + mmotm_fault.txt (w/ patch)
> +-------------------------------------------------------------------------+
>     N           Min           Max        Median           Avg        Stddev
> x  10     16682.962     17344.027     16913.524     16928.812      166.5362
> +  10      16696.49      17480.09     16949.143     16951.448     223.56288
> No difference proven at 95.0% confidence
> 
> Signed-off-by: Ying Han <yinghan@google.com>

Hmm, maybe useful ? (It's good to describe what is difference with PGPGIN)
Especially, you should show why this is useful than per process pgfault count.
What I thought of this, I thought that I need per-process information, finally...
and didn't add this.

Anyway, I have a request for the style of the function. (see below)


> ---
>  Documentation/cgroups/memory.txt |    4 +++
>  fs/ncpfs/mmap.c                  |    2 +
>  include/linux/memcontrol.h       |   22 +++++++++++++++
>  mm/filemap.c                     |    1 +
>  mm/memcontrol.c                  |   54 ++++++++++++++++++++++++++++++++++++++
>  mm/memory.c                      |    2 +
>  mm/shmem.c                       |    1 +
>  7 files changed, 86 insertions(+), 0 deletions(-)
> 
> diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt
> index b6ed61c..2db6103 100644
> --- a/Documentation/cgroups/memory.txt
> +++ b/Documentation/cgroups/memory.txt
> @@ -385,6 +385,8 @@ mapped_file	- # of bytes of mapped file (includes tmpfs/shmem)
>  pgpgin		- # of pages paged in (equivalent to # of charging events).
>  pgpgout		- # of pages paged out (equivalent to # of uncharging events).
>  swap		- # of bytes of swap usage
> +pgfault		- # of page faults.
> +pgmajfault	- # of major page faults.
>  inactive_anon	- # of bytes of anonymous memory and swap cache memory on
>  		LRU list.
>  active_anon	- # of bytes of anonymous and swap cache memory on active
> @@ -406,6 +408,8 @@ total_mapped_file	- sum of all children's "cache"
>  total_pgpgin		- sum of all children's "pgpgin"
>  total_pgpgout		- sum of all children's "pgpgout"
>  total_swap		- sum of all children's "swap"
> +total_pgfault		- sum of all children's "pgfault"
> +total_pgmajfault	- sum of all children's "pgmajfault"
>  total_inactive_anon	- sum of all children's "inactive_anon"
>  total_active_anon	- sum of all children's "active_anon"
>  total_inactive_file	- sum of all children's "inactive_file"
> diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c
> index a7c07b4..adb3f45 100644
> --- a/fs/ncpfs/mmap.c
> +++ b/fs/ncpfs/mmap.c
> @@ -16,6 +16,7 @@
>  #include <linux/mman.h>
>  #include <linux/string.h>
>  #include <linux/fcntl.h>
> +#include <linux/memcontrol.h>
>  
>  #include <asm/uaccess.h>
>  #include <asm/system.h>
> @@ -92,6 +93,7 @@ static int ncp_file_mmap_fault(struct vm_area_struct *area,
>  	 * -- wli
>  	 */
>  	count_vm_event(PGMAJFAULT);
> +	mem_cgroup_pgmajfault_from_mm(area->vm_mm);

Could you do this as  mem_cgroup_count_vm_event(area->vm_mm, PGMAJFAULT) ?

<snip>

> +void mem_cgroup_pgfault_from_mm(struct mm_struct *mm)
> +{
> +	struct mem_cgroup *mem;
> +
> +	if (!mm)
> +		return;
> +
> +	rcu_read_lock();
> +	mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
> +	if (unlikely(!mem))
> +		goto out;
> +	mem_cgroup_pgfault(mem, 1);
> +
> +out:
> +	rcu_read_unlock();
> +}
> +
> +void mem_cgroup_pgmajfault_from_mm(struct mm_struct *mm)
> +{
> +	struct mem_cgroup *mem;
> +
> +	if (!mm)
> +		return;
> +
> +	rcu_read_lock();
> +	mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
> +	if (unlikely(!mem))
> +		goto out;
> +	mem_cgroup_pgmajfault(mem, 1);
> +out:
> +	rcu_read_unlock();
> +}
> +EXPORT_SYMBOL(mem_cgroup_pgmajfault_from_mm);
> +

Then, you can do above 2 in a function.


Thanks,
-Kame

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] Add the pagefault count into memcg stats.
  2011-03-28  0:07 ` KAMEZAWA Hiroyuki
@ 2011-03-28  3:16   ` Ying Han
  0 siblings, 0 replies; 5+ messages in thread
From: Ying Han @ 2011-03-28  3:16 UTC (permalink / raw)
  To: KAMEZAWA Hiroyuki
  Cc: Balbir Singh, Daisuke Nishimura, Hugh Dickins, Tejun Heo,
	Mark Brown, Andrew Morton, Greg Thelen, Suleiman Souhlal,
	linux-mm

On Sun, Mar 27, 2011 at 5:07 PM, KAMEZAWA Hiroyuki
<kamezawa.hiroyu@jp.fujitsu.com> wrote:
> On Sat, 26 Mar 2011 17:14:44 -0700
> Ying Han <yinghan@google.com> wrote:
>
>> Two new stats in per-memcg memory.stat which tracks the number of
>> page faults and number of major page faults.
>>
>> "pgfault"
>> "pgmajfault"
>>
>> It is valuable to track the two stats for both measuring application's
>> performance as well as the efficiency of the kernel page reclaim path.
>>
>> Functional test: check the total number of pgfault/pgmajfault of all
>> memcgs and compare with global vmstat value:
>>
>> $ cat /proc/vmstat | grep fault
>> pgfault 1070751
>> pgmajfault 553
>>
>> $ cat /dev/cgroup/memory.stat | grep fault
>> pgfault 1069962
>> pgmajfault 553
>> total_pgfault 1069966
>> total_pgmajfault 553
>>
>> $ cat /dev/cgroup/A/memory.stat | grep fault
>> pgfault 199
>> pgmajfault 0
>> total_pgfault 199
>> total_pgmajfault 0
>>
>> Performance test: run page fault test(pft) wit 16 thread on faulting in 15G
>> anon pages in 16G container. There is no regression noticed on the "flt/cpu/s"
>>
>> Sample output from pft:
>> TAG pft:anon-sys-default:
>>   Gb  Thr CLine   User     System     Wall    flt/cpu/s fault/wsec
>>   15   16   1     0.67s   232.11s    14.68s   16892.130 267796.518
>>
>> $ ./ministat mmotm.txt mmotm_fault.txt
>> x mmotm.txt (w/o patch)
>> + mmotm_fault.txt (w/ patch)
>> +-------------------------------------------------------------------------+
>>     N           Min           Max        Median           Avg        Stddev
>> x  10     16682.962     17344.027     16913.524     16928.812      166.5362
>> +  10      16696.49      17480.09     16949.143     16951.448     223.56288
>> No difference proven at 95.0% confidence
>>
>> Signed-off-by: Ying Han <yinghan@google.com>
>
> Hmm, maybe useful ? (It's good to describe what is difference with PGPGIN)
> Especially, you should show why this is useful than per process pgfault count.
> What I thought of this, I thought that I need per-process information, finally...
> and didn't add this.
>
> Anyway, I have a request for the style of the function. (see below)

Thanks for your comment, and I will post V2 shortly.

>
>
>> ---
>>  Documentation/cgroups/memory.txt |    4 +++
>>  fs/ncpfs/mmap.c                  |    2 +
>>  include/linux/memcontrol.h       |   22 +++++++++++++++
>>  mm/filemap.c                     |    1 +
>>  mm/memcontrol.c                  |   54 ++++++++++++++++++++++++++++++++++++++
>>  mm/memory.c                      |    2 +
>>  mm/shmem.c                       |    1 +
>>  7 files changed, 86 insertions(+), 0 deletions(-)
>>
>> diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt
>> index b6ed61c..2db6103 100644
>> --- a/Documentation/cgroups/memory.txt
>> +++ b/Documentation/cgroups/memory.txt
>> @@ -385,6 +385,8 @@ mapped_file       - # of bytes of mapped file (includes tmpfs/shmem)
>>  pgpgin               - # of pages paged in (equivalent to # of charging events).
>>  pgpgout              - # of pages paged out (equivalent to # of uncharging events).
>>  swap         - # of bytes of swap usage
>> +pgfault              - # of page faults.
>> +pgmajfault   - # of major page faults.
>>  inactive_anon        - # of bytes of anonymous memory and swap cache memory on
>>               LRU list.
>>  active_anon  - # of bytes of anonymous and swap cache memory on active
>> @@ -406,6 +408,8 @@ total_mapped_file - sum of all children's "cache"
>>  total_pgpgin         - sum of all children's "pgpgin"
>>  total_pgpgout                - sum of all children's "pgpgout"
>>  total_swap           - sum of all children's "swap"
>> +total_pgfault                - sum of all children's "pgfault"
>> +total_pgmajfault     - sum of all children's "pgmajfault"
>>  total_inactive_anon  - sum of all children's "inactive_anon"
>>  total_active_anon    - sum of all children's "active_anon"
>>  total_inactive_file  - sum of all children's "inactive_file"
>> diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c
>> index a7c07b4..adb3f45 100644
>> --- a/fs/ncpfs/mmap.c
>> +++ b/fs/ncpfs/mmap.c
>> @@ -16,6 +16,7 @@
>>  #include <linux/mman.h>
>>  #include <linux/string.h>
>>  #include <linux/fcntl.h>
>> +#include <linux/memcontrol.h>
>>
>>  #include <asm/uaccess.h>
>>  #include <asm/system.h>
>> @@ -92,6 +93,7 @@ static int ncp_file_mmap_fault(struct vm_area_struct *area,
>>        * -- wli
>>        */
>>       count_vm_event(PGMAJFAULT);
>> +     mem_cgroup_pgmajfault_from_mm(area->vm_mm);
>
> Could you do this as  mem_cgroup_count_vm_event(area->vm_mm, PGMAJFAULT) ?

will be included in V2.


>
> <snip>
>
>> +void mem_cgroup_pgfault_from_mm(struct mm_struct *mm)
>> +{
>> +     struct mem_cgroup *mem;
>> +
>> +     if (!mm)
>> +             return;
>> +
>> +     rcu_read_lock();
>> +     mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
>> +     if (unlikely(!mem))
>> +             goto out;
>> +     mem_cgroup_pgfault(mem, 1);
>> +
>> +out:
>> +     rcu_read_unlock();
>> +}
>> +
>> +void mem_cgroup_pgmajfault_from_mm(struct mm_struct *mm)
>> +{
>> +     struct mem_cgroup *mem;
>> +
>> +     if (!mm)
>> +             return;
>> +
>> +     rcu_read_lock();
>> +     mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
>> +     if (unlikely(!mem))
>> +             goto out;
>> +     mem_cgroup_pgmajfault(mem, 1);
>> +out:
>> +     rcu_read_unlock();
>> +}
>> +EXPORT_SYMBOL(mem_cgroup_pgmajfault_from_mm);
>> +
>
> Then, you can do above 2 in a function.

--Ying
>
>
> Thanks,
> -Kame
>
>

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] Add the pagefault count into memcg stats.
  2011-03-27  0:14 [PATCH] Add the pagefault count into memcg stats Ying Han
  2011-03-28  0:07 ` KAMEZAWA Hiroyuki
@ 2011-03-29 15:30 ` Balbir Singh
  2011-03-29 17:30   ` Ying Han
  1 sibling, 1 reply; 5+ messages in thread
From: Balbir Singh @ 2011-03-29 15:30 UTC (permalink / raw)
  To: Ying Han
  Cc: Daisuke Nishimura, KAMEZAWA Hiroyuki, Hugh Dickins, Tejun Heo,
	Mark Brown, Andrew Morton, Greg Thelen, Suleiman Souhlal,
	linux-mm

* Ying Han <yinghan@google.com> [2011-03-26 17:14:44]:

> Two new stats in per-memcg memory.stat which tracks the number of
> page faults and number of major page faults.
> 
> "pgfault"
> "pgmajfault"
> 
> It is valuable to track the two stats for both measuring application's
> performance as well as the efficiency of the kernel page reclaim path.
> 
> Functional test: check the total number of pgfault/pgmajfault of all
> memcgs and compare with global vmstat value:
> 
> $ cat /proc/vmstat | grep fault
> pgfault 1070751
> pgmajfault 553
> 
> $ cat /dev/cgroup/memory.stat | grep fault
> pgfault 1069962
> pgmajfault 553
> total_pgfault 1069966
> total_pgmajfault 553
> 
> $ cat /dev/cgroup/A/memory.stat | grep fault
> pgfault 199
> pgmajfault 0
> total_pgfault 199
> total_pgmajfault 0
> 
> Performance test: run page fault test(pft) wit 16 thread on faulting in 15G
> anon pages in 16G container. There is no regression noticed on the "flt/cpu/s"
> 
> Sample output from pft:
> TAG pft:anon-sys-default:
>   Gb  Thr CLine   User     System     Wall    flt/cpu/s fault/wsec
>   15   16   1     0.67s   232.11s    14.68s   16892.130 267796.518
> 
> $ ./ministat mmotm.txt mmotm_fault.txt
> x mmotm.txt (w/o patch)
> + mmotm_fault.txt (w/ patch)
> +-------------------------------------------------------------------------+
>     N           Min           Max        Median           Avg        Stddev
> x  10     16682.962     17344.027     16913.524     16928.812      166.5362
> +  10      16696.49      17480.09     16949.143     16951.448     223.56288
> No difference proven at 95.0% confidence
> 
> Signed-off-by: Ying Han <yinghan@google.com>
> ---
>  Documentation/cgroups/memory.txt |    4 +++
>  fs/ncpfs/mmap.c                  |    2 +
>  include/linux/memcontrol.h       |   22 +++++++++++++++
>  mm/filemap.c                     |    1 +
>  mm/memcontrol.c                  |   54 ++++++++++++++++++++++++++++++++++++++
>  mm/memory.c                      |    2 +
>  mm/shmem.c                       |    1 +
>  7 files changed, 86 insertions(+), 0 deletions(-)
> 
> diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt
> index b6ed61c..2db6103 100644
> --- a/Documentation/cgroups/memory.txt
> +++ b/Documentation/cgroups/memory.txt
> @@ -385,6 +385,8 @@ mapped_file	- # of bytes of mapped file (includes tmpfs/shmem)
>  pgpgin		- # of pages paged in (equivalent to # of charging events).
>  pgpgout		- # of pages paged out (equivalent to # of uncharging events).
>  swap		- # of bytes of swap usage
> +pgfault		- # of page faults.
> +pgmajfault	- # of major page faults.
>  inactive_anon	- # of bytes of anonymous memory and swap cache memory on
>  		LRU list.
>  active_anon	- # of bytes of anonymous and swap cache memory on active
> @@ -406,6 +408,8 @@ total_mapped_file	- sum of all children's "cache"
>  total_pgpgin		- sum of all children's "pgpgin"
>  total_pgpgout		- sum of all children's "pgpgout"
>  total_swap		- sum of all children's "swap"
> +total_pgfault		- sum of all children's "pgfault"
> +total_pgmajfault	- sum of all children's "pgmajfault"
>  total_inactive_anon	- sum of all children's "inactive_anon"
>  total_active_anon	- sum of all children's "active_anon"
>  total_inactive_file	- sum of all children's "inactive_file"
> diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c
> index a7c07b4..adb3f45 100644
> --- a/fs/ncpfs/mmap.c
> +++ b/fs/ncpfs/mmap.c
> @@ -16,6 +16,7 @@
>  #include <linux/mman.h>
>  #include <linux/string.h>
>  #include <linux/fcntl.h>
> +#include <linux/memcontrol.h>
> 
>  #include <asm/uaccess.h>
>  #include <asm/system.h>
> @@ -92,6 +93,7 @@ static int ncp_file_mmap_fault(struct vm_area_struct *area,
>  	 * -- wli
>  	 */
>  	count_vm_event(PGMAJFAULT);
> +	mem_cgroup_pgmajfault_from_mm(area->vm_mm);
>  	return VM_FAULT_MAJOR;
>  }
> 
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index 5a5ce70..f771fc1 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -147,6 +147,11 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
>  						gfp_t gfp_mask);
>  u64 mem_cgroup_get_limit(struct mem_cgroup *mem);
> 
> +void mem_cgroup_pgfault(struct mem_cgroup *memcg, int val);
> +void mem_cgroup_pgmajfault(struct mem_cgroup *memcg, int val);
> +void mem_cgroup_pgfault_from_mm(struct mm_struct *mm);
> +void mem_cgroup_pgmajfault_from_mm(struct mm_struct *mm);
> +
>  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
>  void mem_cgroup_split_huge_fixup(struct page *head, struct page *tail);
>  #endif
> @@ -354,6 +359,23 @@ static inline void mem_cgroup_split_huge_fixup(struct page *head,
>  {
>  }
> 
> +static inline void mem_cgroup_pgfault(struct mem_cgroup *memcg,
> +				      int val)
> +{
> +}
> +
> +static inline void mem_cgroup_pgmajfault(struct mem_cgroup *memcg,
> +					 int val)
> +{
> +}
> +
> +static inline void mem_cgroup_pgfault_from_mm(struct mm_struct *mm)
> +{
> +}
> +
> +static inline void mem_cgroup_pgmajfault_from_mm(struct mm_struct *mm)
> +{
> +}
>  #endif /* CONFIG_CGROUP_MEM_CONT */
> 
>  #if !defined(CONFIG_CGROUP_MEM_RES_CTLR) || !defined(CONFIG_DEBUG_VM)
> diff --git a/mm/filemap.c b/mm/filemap.c
> index a6cfecf..5dc5401 100644
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -1683,6 +1683,7 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
>  		/* No page in the page cache at all */
>  		do_sync_mmap_readahead(vma, ra, file, offset);
>  		count_vm_event(PGMAJFAULT);
> +		mem_cgroup_pgmajfault_from_mm(vma->vm_mm);
>  		ret = VM_FAULT_MAJOR;
>  retry_find:
>  		page = find_get_page(mapping, offset);
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 4407dd0..63d66f1 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -94,6 +94,8 @@ enum mem_cgroup_events_index {
>  	MEM_CGROUP_EVENTS_PGPGIN,	/* # of pages paged in */
>  	MEM_CGROUP_EVENTS_PGPGOUT,	/* # of pages paged out */
>  	MEM_CGROUP_EVENTS_COUNT,	/* # of pages paged in/out */
> +	MEM_CGROUP_EVENTS_PGFAULT,	/* # of page-faults */
> +	MEM_CGROUP_EVENTS_PGMAJFAULT,	/* # of major page-faults */
>  	MEM_CGROUP_EVENTS_NSTATS,
>  };
>  /*
> @@ -585,6 +587,16 @@ static void mem_cgroup_swap_statistics(struct mem_cgroup *mem,
>  	this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_SWAPOUT], val);
>  }
> 
> +void mem_cgroup_pgfault(struct mem_cgroup *mem, int val)
> +{
> +	this_cpu_add(mem->stat->events[MEM_CGROUP_EVENTS_PGFAULT], val);
> +}
> +
> +void mem_cgroup_pgmajfault(struct mem_cgroup *mem, int val)
> +{
> +	this_cpu_add(mem->stat->events[MEM_CGROUP_EVENTS_PGMAJFAULT], val);
> +}
> +
>  static unsigned long mem_cgroup_read_events(struct mem_cgroup *mem,
>  					    enum mem_cgroup_events_index idx)
>  {
> @@ -813,6 +825,40 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup *mem)
>  	return (mem == root_mem_cgroup);
>  }
> 
> +void mem_cgroup_pgfault_from_mm(struct mm_struct *mm)
> +{
> +	struct mem_cgroup *mem;
> +
> +	if (!mm)
> +		return;
> +
> +	rcu_read_lock();
> +	mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
> +	if (unlikely(!mem))
> +		goto out;

A lot of this can be reused, just a minor nitpick. May be you can
combine this function and the one below

> +	mem_cgroup_pgfault(mem, 1);
> +
> +out:
> +	rcu_read_unlock();
> +}
> +
> +void mem_cgroup_pgmajfault_from_mm(struct mm_struct *mm)
> +{
> +	struct mem_cgroup *mem;
> +
> +	if (!mm)
> +		return;
> +
> +	rcu_read_lock();
> +	mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
> +	if (unlikely(!mem))
> +		goto out;
> +	mem_cgroup_pgmajfault(mem, 1);
> +out:
> +	rcu_read_unlock();
> +}
> +EXPORT_SYMBOL(mem_cgroup_pgmajfault_from_mm);
> +
>  /*
>   * Following LRU functions are allowed to be used without PCG_LOCK.
>   * Operations are called by routine of global LRU independently from memcg.
> @@ -3772,6 +3818,8 @@ enum {
>  	MCS_PGPGIN,
>  	MCS_PGPGOUT,
>  	MCS_SWAP,
> +	MCS_PGFAULT,
> +	MCS_PGMAJFAULT,
>  	MCS_INACTIVE_ANON,
>  	MCS_ACTIVE_ANON,
>  	MCS_INACTIVE_FILE,
> @@ -3794,6 +3842,8 @@ struct {
>  	{"pgpgin", "total_pgpgin"},
>  	{"pgpgout", "total_pgpgout"},
>  	{"swap", "total_swap"},
> +	{"pgfault", "total_pgfault"},
> +	{"pgmajfault", "total_pgmajfault"},
>  	{"inactive_anon", "total_inactive_anon"},
>  	{"active_anon", "total_active_anon"},
>  	{"inactive_file", "total_inactive_file"},
> @@ -3822,6 +3872,10 @@ mem_cgroup_get_local_stat(struct mem_cgroup *mem, struct mcs_total_stat *s)
>  		val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_SWAPOUT);
>  		s->stat[MCS_SWAP] += val * PAGE_SIZE;
>  	}
> +	val = mem_cgroup_read_events(mem, MEM_CGROUP_EVENTS_PGFAULT);
> +	s->stat[MCS_PGFAULT] += val;
> +	val = mem_cgroup_read_events(mem, MEM_CGROUP_EVENTS_PGMAJFAULT);
> +	s->stat[MCS_PGMAJFAULT] += val;
> 
>  	/* per zone stat */
>  	val = mem_cgroup_get_local_zonestat(mem, LRU_INACTIVE_ANON);
> diff --git a/mm/memory.c b/mm/memory.c
> index 8617d39..0f7ebc9 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -2836,6 +2836,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
>  		/* Had to read the page from swap area: Major fault */
>  		ret = VM_FAULT_MAJOR;
>  		count_vm_event(PGMAJFAULT);
> +		mem_cgroup_pgmajfault_from_mm(mm);
>  	} else if (PageHWPoison(page)) {
>  		/*
>  		 * hwpoisoned dirty swapcache pages are kept for killing
> @@ -3375,6 +3376,7 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
>  	__set_current_state(TASK_RUNNING);
> 
>  	count_vm_event(PGFAULT);
> +	mem_cgroup_pgfault_from_mm(mm);
> 
>  	/* do counter updates before entering really critical section. */
>  	check_sync_rss_stat(current);
> diff --git a/mm/shmem.c b/mm/shmem.c
> index ad8346b..5a82674 100644
> --- a/mm/shmem.c
> +++ b/mm/shmem.c
> @@ -1289,6 +1289,7 @@ repeat:
>  			/* here we actually do the io */
>  			if (type && !(*type & VM_FAULT_MAJOR)) {
>  				__count_vm_event(PGMAJFAULT);
> +				mem_cgroup_pgmajfault_from_mm(current->mm);
>  				*type |= VM_FAULT_MAJOR;
>  			}
>  			spin_unlock(&info->lock);
> -- 
> 1.7.3.1
> 

-- 
	Three Cheers,
	Balbir

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] Add the pagefault count into memcg stats.
  2011-03-29 15:30 ` Balbir Singh
@ 2011-03-29 17:30   ` Ying Han
  0 siblings, 0 replies; 5+ messages in thread
From: Ying Han @ 2011-03-29 17:30 UTC (permalink / raw)
  To: balbir
  Cc: Daisuke Nishimura, KAMEZAWA Hiroyuki, Hugh Dickins, Tejun Heo,
	Mark Brown, Andrew Morton, Greg Thelen, Suleiman Souhlal,
	linux-mm

On Tue, Mar 29, 2011 at 8:30 AM, Balbir Singh <balbir@linux.vnet.ibm.com> wrote:
> * Ying Han <yinghan@google.com> [2011-03-26 17:14:44]:
>
>> Two new stats in per-memcg memory.stat which tracks the number of
>> page faults and number of major page faults.
>>
>> "pgfault"
>> "pgmajfault"
>>
>> It is valuable to track the two stats for both measuring application's
>> performance as well as the efficiency of the kernel page reclaim path.
>>
>> Functional test: check the total number of pgfault/pgmajfault of all
>> memcgs and compare with global vmstat value:
>>
>> $ cat /proc/vmstat | grep fault
>> pgfault 1070751
>> pgmajfault 553
>>
>> $ cat /dev/cgroup/memory.stat | grep fault
>> pgfault 1069962
>> pgmajfault 553
>> total_pgfault 1069966
>> total_pgmajfault 553
>>
>> $ cat /dev/cgroup/A/memory.stat | grep fault
>> pgfault 199
>> pgmajfault 0
>> total_pgfault 199
>> total_pgmajfault 0
>>
>> Performance test: run page fault test(pft) wit 16 thread on faulting in 15G
>> anon pages in 16G container. There is no regression noticed on the "flt/cpu/s"
>>
>> Sample output from pft:
>> TAG pft:anon-sys-default:
>>   Gb  Thr CLine   User     System     Wall    flt/cpu/s fault/wsec
>>   15   16   1     0.67s   232.11s    14.68s   16892.130 267796.518
>>
>> $ ./ministat mmotm.txt mmotm_fault.txt
>> x mmotm.txt (w/o patch)
>> + mmotm_fault.txt (w/ patch)
>> +-------------------------------------------------------------------------+
>>     N           Min           Max        Median           Avg        Stddev
>> x  10     16682.962     17344.027     16913.524     16928.812      166.5362
>> +  10      16696.49      17480.09     16949.143     16951.448     223.56288
>> No difference proven at 95.0% confidence
>>
>> Signed-off-by: Ying Han <yinghan@google.com>
>> ---
>>  Documentation/cgroups/memory.txt |    4 +++
>>  fs/ncpfs/mmap.c                  |    2 +
>>  include/linux/memcontrol.h       |   22 +++++++++++++++
>>  mm/filemap.c                     |    1 +
>>  mm/memcontrol.c                  |   54 ++++++++++++++++++++++++++++++++++++++
>>  mm/memory.c                      |    2 +
>>  mm/shmem.c                       |    1 +
>>  7 files changed, 86 insertions(+), 0 deletions(-)
>>
>> diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt
>> index b6ed61c..2db6103 100644
>> --- a/Documentation/cgroups/memory.txt
>> +++ b/Documentation/cgroups/memory.txt
>> @@ -385,6 +385,8 @@ mapped_file       - # of bytes of mapped file (includes tmpfs/shmem)
>>  pgpgin               - # of pages paged in (equivalent to # of charging events).
>>  pgpgout              - # of pages paged out (equivalent to # of uncharging events).
>>  swap         - # of bytes of swap usage
>> +pgfault              - # of page faults.
>> +pgmajfault   - # of major page faults.
>>  inactive_anon        - # of bytes of anonymous memory and swap cache memory on
>>               LRU list.
>>  active_anon  - # of bytes of anonymous and swap cache memory on active
>> @@ -406,6 +408,8 @@ total_mapped_file - sum of all children's "cache"
>>  total_pgpgin         - sum of all children's "pgpgin"
>>  total_pgpgout                - sum of all children's "pgpgout"
>>  total_swap           - sum of all children's "swap"
>> +total_pgfault                - sum of all children's "pgfault"
>> +total_pgmajfault     - sum of all children's "pgmajfault"
>>  total_inactive_anon  - sum of all children's "inactive_anon"
>>  total_active_anon    - sum of all children's "active_anon"
>>  total_inactive_file  - sum of all children's "inactive_file"
>> diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c
>> index a7c07b4..adb3f45 100644
>> --- a/fs/ncpfs/mmap.c
>> +++ b/fs/ncpfs/mmap.c
>> @@ -16,6 +16,7 @@
>>  #include <linux/mman.h>
>>  #include <linux/string.h>
>>  #include <linux/fcntl.h>
>> +#include <linux/memcontrol.h>
>>
>>  #include <asm/uaccess.h>
>>  #include <asm/system.h>
>> @@ -92,6 +93,7 @@ static int ncp_file_mmap_fault(struct vm_area_struct *area,
>>        * -- wli
>>        */
>>       count_vm_event(PGMAJFAULT);
>> +     mem_cgroup_pgmajfault_from_mm(area->vm_mm);
>>       return VM_FAULT_MAJOR;
>>  }
>>
>> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
>> index 5a5ce70..f771fc1 100644
>> --- a/include/linux/memcontrol.h
>> +++ b/include/linux/memcontrol.h
>> @@ -147,6 +147,11 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
>>                                               gfp_t gfp_mask);
>>  u64 mem_cgroup_get_limit(struct mem_cgroup *mem);
>>
>> +void mem_cgroup_pgfault(struct mem_cgroup *memcg, int val);
>> +void mem_cgroup_pgmajfault(struct mem_cgroup *memcg, int val);
>> +void mem_cgroup_pgfault_from_mm(struct mm_struct *mm);
>> +void mem_cgroup_pgmajfault_from_mm(struct mm_struct *mm);
>> +
>>  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
>>  void mem_cgroup_split_huge_fixup(struct page *head, struct page *tail);
>>  #endif
>> @@ -354,6 +359,23 @@ static inline void mem_cgroup_split_huge_fixup(struct page *head,
>>  {
>>  }
>>
>> +static inline void mem_cgroup_pgfault(struct mem_cgroup *memcg,
>> +                                   int val)
>> +{
>> +}
>> +
>> +static inline void mem_cgroup_pgmajfault(struct mem_cgroup *memcg,
>> +                                      int val)
>> +{
>> +}
>> +
>> +static inline void mem_cgroup_pgfault_from_mm(struct mm_struct *mm)
>> +{
>> +}
>> +
>> +static inline void mem_cgroup_pgmajfault_from_mm(struct mm_struct *mm)
>> +{
>> +}
>>  #endif /* CONFIG_CGROUP_MEM_CONT */
>>
>>  #if !defined(CONFIG_CGROUP_MEM_RES_CTLR) || !defined(CONFIG_DEBUG_VM)
>> diff --git a/mm/filemap.c b/mm/filemap.c
>> index a6cfecf..5dc5401 100644
>> --- a/mm/filemap.c
>> +++ b/mm/filemap.c
>> @@ -1683,6 +1683,7 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
>>               /* No page in the page cache at all */
>>               do_sync_mmap_readahead(vma, ra, file, offset);
>>               count_vm_event(PGMAJFAULT);
>> +             mem_cgroup_pgmajfault_from_mm(vma->vm_mm);
>>               ret = VM_FAULT_MAJOR;
>>  retry_find:
>>               page = find_get_page(mapping, offset);
>> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
>> index 4407dd0..63d66f1 100644
>> --- a/mm/memcontrol.c
>> +++ b/mm/memcontrol.c
>> @@ -94,6 +94,8 @@ enum mem_cgroup_events_index {
>>       MEM_CGROUP_EVENTS_PGPGIN,       /* # of pages paged in */
>>       MEM_CGROUP_EVENTS_PGPGOUT,      /* # of pages paged out */
>>       MEM_CGROUP_EVENTS_COUNT,        /* # of pages paged in/out */
>> +     MEM_CGROUP_EVENTS_PGFAULT,      /* # of page-faults */
>> +     MEM_CGROUP_EVENTS_PGMAJFAULT,   /* # of major page-faults */
>>       MEM_CGROUP_EVENTS_NSTATS,
>>  };
>>  /*
>> @@ -585,6 +587,16 @@ static void mem_cgroup_swap_statistics(struct mem_cgroup *mem,
>>       this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_SWAPOUT], val);
>>  }
>>
>> +void mem_cgroup_pgfault(struct mem_cgroup *mem, int val)
>> +{
>> +     this_cpu_add(mem->stat->events[MEM_CGROUP_EVENTS_PGFAULT], val);
>> +}
>> +
>> +void mem_cgroup_pgmajfault(struct mem_cgroup *mem, int val)
>> +{
>> +     this_cpu_add(mem->stat->events[MEM_CGROUP_EVENTS_PGMAJFAULT], val);
>> +}
>> +
>>  static unsigned long mem_cgroup_read_events(struct mem_cgroup *mem,
>>                                           enum mem_cgroup_events_index idx)
>>  {
>> @@ -813,6 +825,40 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup *mem)
>>       return (mem == root_mem_cgroup);
>>  }
>>
>> +void mem_cgroup_pgfault_from_mm(struct mm_struct *mm)
>> +{
>> +     struct mem_cgroup *mem;
>> +
>> +     if (!mm)
>> +             return;
>> +
>> +     rcu_read_lock();
>> +     mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
>> +     if (unlikely(!mem))
>> +             goto out;
>
> A lot of this can be reused, just a minor nitpick. May be you can
> combine this function and the one below

This has been fixed in V3 :)

--Ying
>
>> +     mem_cgroup_pgfault(mem, 1);
>> +
>> +out:
>> +     rcu_read_unlock();
>> +}
>> +
>> +void mem_cgroup_pgmajfault_from_mm(struct mm_struct *mm)
>> +{
>> +     struct mem_cgroup *mem;
>> +
>> +     if (!mm)
>> +             return;
>> +
>> +     rcu_read_lock();
>> +     mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
>> +     if (unlikely(!mem))
>> +             goto out;
>> +     mem_cgroup_pgmajfault(mem, 1);
>> +out:
>> +     rcu_read_unlock();
>> +}
>> +EXPORT_SYMBOL(mem_cgroup_pgmajfault_from_mm);
>> +
>>  /*
>>   * Following LRU functions are allowed to be used without PCG_LOCK.
>>   * Operations are called by routine of global LRU independently from memcg.
>> @@ -3772,6 +3818,8 @@ enum {
>>       MCS_PGPGIN,
>>       MCS_PGPGOUT,
>>       MCS_SWAP,
>> +     MCS_PGFAULT,
>> +     MCS_PGMAJFAULT,
>>       MCS_INACTIVE_ANON,
>>       MCS_ACTIVE_ANON,
>>       MCS_INACTIVE_FILE,
>> @@ -3794,6 +3842,8 @@ struct {
>>       {"pgpgin", "total_pgpgin"},
>>       {"pgpgout", "total_pgpgout"},
>>       {"swap", "total_swap"},
>> +     {"pgfault", "total_pgfault"},
>> +     {"pgmajfault", "total_pgmajfault"},
>>       {"inactive_anon", "total_inactive_anon"},
>>       {"active_anon", "total_active_anon"},
>>       {"inactive_file", "total_inactive_file"},
>> @@ -3822,6 +3872,10 @@ mem_cgroup_get_local_stat(struct mem_cgroup *mem, struct mcs_total_stat *s)
>>               val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_SWAPOUT);
>>               s->stat[MCS_SWAP] += val * PAGE_SIZE;
>>       }
>> +     val = mem_cgroup_read_events(mem, MEM_CGROUP_EVENTS_PGFAULT);
>> +     s->stat[MCS_PGFAULT] += val;
>> +     val = mem_cgroup_read_events(mem, MEM_CGROUP_EVENTS_PGMAJFAULT);
>> +     s->stat[MCS_PGMAJFAULT] += val;
>>
>>       /* per zone stat */
>>       val = mem_cgroup_get_local_zonestat(mem, LRU_INACTIVE_ANON);
>> diff --git a/mm/memory.c b/mm/memory.c
>> index 8617d39..0f7ebc9 100644
>> --- a/mm/memory.c
>> +++ b/mm/memory.c
>> @@ -2836,6 +2836,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
>>               /* Had to read the page from swap area: Major fault */
>>               ret = VM_FAULT_MAJOR;
>>               count_vm_event(PGMAJFAULT);
>> +             mem_cgroup_pgmajfault_from_mm(mm);
>>       } else if (PageHWPoison(page)) {
>>               /*
>>                * hwpoisoned dirty swapcache pages are kept for killing
>> @@ -3375,6 +3376,7 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
>>       __set_current_state(TASK_RUNNING);
>>
>>       count_vm_event(PGFAULT);
>> +     mem_cgroup_pgfault_from_mm(mm);
>>
>>       /* do counter updates before entering really critical section. */
>>       check_sync_rss_stat(current);
>> diff --git a/mm/shmem.c b/mm/shmem.c
>> index ad8346b..5a82674 100644
>> --- a/mm/shmem.c
>> +++ b/mm/shmem.c
>> @@ -1289,6 +1289,7 @@ repeat:
>>                       /* here we actually do the io */
>>                       if (type && !(*type & VM_FAULT_MAJOR)) {
>>                               __count_vm_event(PGMAJFAULT);
>> +                             mem_cgroup_pgmajfault_from_mm(current->mm);
>>                               *type |= VM_FAULT_MAJOR;
>>                       }
>>                       spin_unlock(&info->lock);
>> --
>> 1.7.3.1
>>
>
> --
>        Three Cheers,
>        Balbir
>

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2011-03-29 17:30 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-03-27  0:14 [PATCH] Add the pagefault count into memcg stats Ying Han
2011-03-28  0:07 ` KAMEZAWA Hiroyuki
2011-03-28  3:16   ` Ying Han
2011-03-29 15:30 ` Balbir Singh
2011-03-29 17:30   ` Ying Han

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.