All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH V5] memcg: add memory.numastat api for numa statistics
@ 2011-05-20 22:01 Ying Han
  2011-05-20 23:21 ` Rik van Riel
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: Ying Han @ 2011-05-20 22:01 UTC (permalink / raw)
  To: KOSAKI Motohiro, Minchan Kim, Daisuke Nishimura, Balbir Singh,
	Tejun Heo, Pavel Emelyanov, KAMEZAWA Hiroyuki, Andrew Morton,
	Li Zefan, Mel Gorman, Christoph Lameter, Johannes Weiner,
	Rik van Riel, Hugh Dickins, Michal Hocko, Dave Hansen,
	Zhu Yanhai
  Cc: linux-mm

The new API exports numa_maps per-memcg basis. This is a piece of useful
information where it exports per-memcg page distribution across real numa
nodes.

One of the usecase is evaluating application performance by combining this
information w/ the cpu allocation to the application.

The output of the memory.numastat tries to follow w/ simiar format of numa_maps
like:

total=<total pages> N0=<node 0 pages> N1=<node 1 pages> ...
file=<total file pages> N0=<node 0 pages> N1=<node 1 pages> ...
anon=<total anon pages> N0=<node 0 pages> N1=<node 1 pages> ...
unevictable=<total anon pages> N0=<node 0 pages> N1=<node 1 pages> ...

And we have per-node:
total = file + anon + unevictable

$ cat /dev/cgroup/memory/memory.numa_stat
total=250020 N0=87620 N1=52367 N2=45298 N3=64735
file=225232 N0=83402 N1=46160 N2=40522 N3=55148
anon=21053 N0=3424 N1=6207 N2=4776 N3=6646
unevictable=3735 N0=794 N1=0 N2=0 N3=2941

This patch is based on mmotm-2011-05-06-16-39

change v5..v4:
1. disable the API non-NUMA kernel.

change v4..v3:
1. add per-node "unevictable" value.
2. change the functions to be static.

change v3..v2:
1. calculate the "total" based on the per-memcg lru size instead of rss+cache.
this makes the "total" value to be consistant w/ the per-node values follows
after.

change v2..v1:
1. add also the file and anon pages on per-node distribution.

Signed-off-by: Ying Han <yinghan@google.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
---
 mm/memcontrol.c |  155 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 155 insertions(+), 0 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index e14677c..ced414b 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1162,6 +1162,93 @@ unsigned long mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *memcg,
 	return MEM_CGROUP_ZSTAT(mz, lru);
 }
 
+#ifdef CONFIG_NUMA
+static unsigned long mem_cgroup_node_nr_file_lru_pages(struct mem_cgroup *memcg,
+							int nid)
+{
+	unsigned long ret;
+
+	ret = mem_cgroup_get_zonestat_node(memcg, nid, LRU_INACTIVE_FILE) +
+		mem_cgroup_get_zonestat_node(memcg, nid, LRU_ACTIVE_FILE);
+
+	return ret;
+}
+
+static unsigned long mem_cgroup_nr_file_lru_pages(struct mem_cgroup *memcg)
+{
+	u64 total = 0;
+	int nid;
+
+	for_each_node_state(nid, N_HIGH_MEMORY)
+		total += mem_cgroup_node_nr_file_lru_pages(memcg, nid);
+
+	return total;
+}
+
+static unsigned long mem_cgroup_node_nr_anon_lru_pages(struct mem_cgroup *memcg,
+							int nid)
+{
+	unsigned long ret;
+
+	ret = mem_cgroup_get_zonestat_node(memcg, nid, LRU_INACTIVE_ANON) +
+		mem_cgroup_get_zonestat_node(memcg, nid, LRU_ACTIVE_ANON);
+
+	return ret;
+}
+
+static unsigned long mem_cgroup_nr_anon_lru_pages(struct mem_cgroup *memcg)
+{
+	u64 total = 0;
+	int nid;
+
+	for_each_node_state(nid, N_HIGH_MEMORY)
+		total += mem_cgroup_node_nr_anon_lru_pages(memcg, nid);
+
+	return total;
+}
+
+static unsigned long
+mem_cgroup_node_nr_unevictable_lru_pages(struct mem_cgroup *memcg, int nid)
+{
+	return mem_cgroup_get_zonestat_node(memcg, nid, LRU_UNEVICTABLE);
+}
+
+static unsigned long
+mem_cgroup_nr_unevictable_lru_pages(struct mem_cgroup *memcg)
+{
+	u64 total = 0;
+	int nid;
+
+	for_each_node_state(nid, N_HIGH_MEMORY)
+		total += mem_cgroup_node_nr_unevictable_lru_pages(memcg, nid);
+
+	return total;
+}
+
+static unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
+							int nid)
+{
+	enum lru_list l;
+	u64 total = 0;
+
+	for_each_lru(l)
+		total += mem_cgroup_get_zonestat_node(memcg, nid, l);
+
+	return total;
+}
+
+static unsigned long mem_cgroup_nr_lru_pages(struct mem_cgroup *memcg)
+{
+	u64 total = 0;
+	int nid;
+
+	for_each_node_state(nid, N_HIGH_MEMORY)
+		total += mem_cgroup_node_nr_lru_pages(memcg, nid);
+
+	return total;
+}
+#endif /* CONFIG_NUMA */
+
 struct zone_reclaim_stat *mem_cgroup_get_reclaim_stat(struct mem_cgroup *memcg,
 						      struct zone *zone)
 {
@@ -4048,6 +4135,51 @@ mem_cgroup_get_total_stat(struct mem_cgroup *mem, struct mcs_total_stat *s)
 		mem_cgroup_get_local_stat(iter, s);
 }
 
+#ifdef CONFIG_NUMA
+static int mem_control_numa_stat_show(struct seq_file *m, void *arg)
+{
+	int nid;
+	unsigned long total_nr, file_nr, anon_nr, unevictable_nr;
+	unsigned long node_nr;
+	struct cgroup *cont = m->private;
+	struct mem_cgroup *mem_cont = mem_cgroup_from_cont(cont);
+
+	total_nr = mem_cgroup_nr_lru_pages(mem_cont);
+	seq_printf(m, "total=%lu", total_nr);
+	for_each_node_state(nid, N_HIGH_MEMORY) {
+		node_nr = mem_cgroup_node_nr_lru_pages(mem_cont, nid);
+		seq_printf(m, " N%d=%lu", nid, node_nr);
+	}
+	seq_putc(m, '\n');
+
+	file_nr = mem_cgroup_nr_file_lru_pages(mem_cont);
+	seq_printf(m, "file=%lu", file_nr);
+	for_each_node_state(nid, N_HIGH_MEMORY) {
+		node_nr = mem_cgroup_node_nr_file_lru_pages(mem_cont, nid);
+		seq_printf(m, " N%d=%lu", nid, node_nr);
+	}
+	seq_putc(m, '\n');
+
+	anon_nr = mem_cgroup_nr_anon_lru_pages(mem_cont);
+	seq_printf(m, "anon=%lu", anon_nr);
+	for_each_node_state(nid, N_HIGH_MEMORY) {
+		node_nr = mem_cgroup_node_nr_anon_lru_pages(mem_cont, nid);
+		seq_printf(m, " N%d=%lu", nid, node_nr);
+	}
+	seq_putc(m, '\n');
+
+	unevictable_nr = mem_cgroup_nr_unevictable_lru_pages(mem_cont);
+	seq_printf(m, "unevictable=%lu", unevictable_nr);
+	for_each_node_state(nid, N_HIGH_MEMORY) {
+		node_nr = mem_cgroup_node_nr_unevictable_lru_pages(mem_cont,
+									nid);
+		seq_printf(m, " N%d=%lu", nid, node_nr);
+	}
+	seq_putc(m, '\n');
+	return 0;
+}
+#endif /* CONFIG_NUMA */
+
 static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft,
 				 struct cgroup_map_cb *cb)
 {
@@ -4058,6 +4190,7 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft,
 	memset(&mystat, 0, sizeof(mystat));
 	mem_cgroup_get_local_stat(mem_cont, &mystat);
 
+
 	for (i = 0; i < NR_MCS_STAT; i++) {
 		if (i == MCS_SWAP && !do_swap_account)
 			continue;
@@ -4481,6 +4614,22 @@ static int mem_cgroup_oom_control_write(struct cgroup *cgrp,
 	return 0;
 }
 
+#ifdef CONFIG_NUMA
+static const struct file_operations mem_control_numa_stat_file_operations = {
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static int mem_control_numa_stat_open(struct inode *unused, struct file *file)
+{
+	struct cgroup *cont = file->f_dentry->d_parent->d_fsdata;
+
+	file->f_op = &mem_control_numa_stat_file_operations;
+	return single_open(file, mem_control_numa_stat_show, cont);
+}
+#endif /* CONFIG_NUMA */
+
 static struct cftype mem_cgroup_files[] = {
 	{
 		.name = "usage_in_bytes",
@@ -4544,6 +4693,12 @@ static struct cftype mem_cgroup_files[] = {
 		.unregister_event = mem_cgroup_oom_unregister_event,
 		.private = MEMFILE_PRIVATE(_OOM_TYPE, OOM_CONTROL),
 	},
+#ifdef CONFIG_NUMA
+	{
+		.name = "numa_stat",
+		.open = mem_control_numa_stat_open,
+	},
+#endif
 };
 
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
-- 
1.7.3.1

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH V5] memcg: add memory.numastat api for numa statistics
  2011-05-20 22:01 [PATCH V5] memcg: add memory.numastat api for numa statistics Ying Han
@ 2011-05-20 23:21 ` Rik van Riel
  2011-05-23  0:08 ` KAMEZAWA Hiroyuki
  2011-05-24 15:46 ` Balbir Singh
  2 siblings, 0 replies; 7+ messages in thread
From: Rik van Riel @ 2011-05-20 23:21 UTC (permalink / raw)
  To: Ying Han
  Cc: KOSAKI Motohiro, Minchan Kim, Daisuke Nishimura, Balbir Singh,
	Tejun Heo, Pavel Emelyanov, KAMEZAWA Hiroyuki, Andrew Morton,
	Li Zefan, Mel Gorman, Christoph Lameter, Johannes Weiner,
	Hugh Dickins, Michal Hocko, Dave Hansen, Zhu Yanhai, linux-mm

On 05/20/2011 06:01 PM, Ying Han wrote:
> The new API exports numa_maps per-memcg basis. This is a piece of useful
> information where it exports per-memcg page distribution across real numa
> nodes.
>
> One of the usecase is evaluating application performance by combining this
> information w/ the cpu allocation to the application.
>
> The output of the memory.numastat tries to follow w/ simiar format of numa_maps
> like:

> Signed-off-by: Ying Han<yinghan@google.com>
> Acked-by: KAMEZAWA Hiroyuki<kamezawa.hiroyu@jp.fujitsu.com>
> Acked-by: Daisuke Nishimura<nishimura@mxp.nes.nec.co.jp>

Acked-by: Rik van Riel<riel@redhat.com>

-- 
All rights reversed

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH V5] memcg: add memory.numastat api for numa statistics
  2011-05-20 22:01 [PATCH V5] memcg: add memory.numastat api for numa statistics Ying Han
  2011-05-20 23:21 ` Rik van Riel
@ 2011-05-23  0:08 ` KAMEZAWA Hiroyuki
  2011-05-23 20:53   ` Ying Han
  2011-05-24 15:46 ` Balbir Singh
  2 siblings, 1 reply; 7+ messages in thread
From: KAMEZAWA Hiroyuki @ 2011-05-23  0:08 UTC (permalink / raw)
  To: Ying Han
  Cc: KOSAKI Motohiro, Minchan Kim, Daisuke Nishimura, Balbir Singh,
	Tejun Heo, Pavel Emelyanov, Andrew Morton, Li Zefan, Mel Gorman,
	Christoph Lameter, Johannes Weiner, Rik van Riel, Hugh Dickins,
	Michal Hocko, Dave Hansen, Zhu Yanhai, linux-mm

On Fri, 20 May 2011 15:01:58 -0700
Ying Han <yinghan@google.com> wrote:

> The new API exports numa_maps per-memcg basis. This is a piece of useful
> information where it exports per-memcg page distribution across real numa
> nodes.
> 
> One of the usecase is evaluating application performance by combining this
> information w/ the cpu allocation to the application.
> 
> The output of the memory.numastat tries to follow w/ simiar format of numa_maps
> like:
> 
> total=<total pages> N0=<node 0 pages> N1=<node 1 pages> ...
> file=<total file pages> N0=<node 0 pages> N1=<node 1 pages> ...
> anon=<total anon pages> N0=<node 0 pages> N1=<node 1 pages> ...
> unevictable=<total anon pages> N0=<node 0 pages> N1=<node 1 pages> ...
> 

Ah, please update Documentaion please.

Thanks,
-Kame


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH V5] memcg: add memory.numastat api for numa statistics
  2011-05-23  0:08 ` KAMEZAWA Hiroyuki
@ 2011-05-23 20:53   ` Ying Han
  0 siblings, 0 replies; 7+ messages in thread
From: Ying Han @ 2011-05-23 20:53 UTC (permalink / raw)
  To: KAMEZAWA Hiroyuki
  Cc: KOSAKI Motohiro, Minchan Kim, Daisuke Nishimura, Balbir Singh,
	Tejun Heo, Pavel Emelyanov, Andrew Morton, Li Zefan, Mel Gorman,
	Christoph Lameter, Johannes Weiner, Rik van Riel, Hugh Dickins,
	Michal Hocko, Dave Hansen, Zhu Yanhai, linux-mm

On Sun, May 22, 2011 at 5:08 PM, KAMEZAWA Hiroyuki
<kamezawa.hiroyu@jp.fujitsu.com> wrote:
> On Fri, 20 May 2011 15:01:58 -0700
> Ying Han <yinghan@google.com> wrote:
>
>> The new API exports numa_maps per-memcg basis. This is a piece of useful
>> information where it exports per-memcg page distribution across real numa
>> nodes.
>>
>> One of the usecase is evaluating application performance by combining this
>> information w/ the cpu allocation to the application.
>>
>> The output of the memory.numastat tries to follow w/ simiar format of numa_maps
>> like:
>>
>> total=<total pages> N0=<node 0 pages> N1=<node 1 pages> ...
>> file=<total file pages> N0=<node 0 pages> N1=<node 1 pages> ...
>> anon=<total anon pages> N0=<node 0 pages> N1=<node 1 pages> ...
>> unevictable=<total anon pages> N0=<node 0 pages> N1=<node 1 pages> ...
>>
>
> Ah, please update Documentaion please.

Sure, will send out patch for the Documentation.

--Ying
>
> Thanks,
> -Kame
>
>
>

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH V5] memcg: add memory.numastat api for numa statistics
  2011-05-20 22:01 [PATCH V5] memcg: add memory.numastat api for numa statistics Ying Han
  2011-05-20 23:21 ` Rik van Riel
  2011-05-23  0:08 ` KAMEZAWA Hiroyuki
@ 2011-05-24 15:46 ` Balbir Singh
  2011-05-24 16:54   ` Ying Han
  2 siblings, 1 reply; 7+ messages in thread
From: Balbir Singh @ 2011-05-24 15:46 UTC (permalink / raw)
  To: Ying Han
  Cc: KOSAKI Motohiro, Minchan Kim, Daisuke Nishimura, Tejun Heo,
	Pavel Emelyanov, KAMEZAWA Hiroyuki, Andrew Morton, Li Zefan,
	Mel Gorman, Christoph Lameter, Johannes Weiner, Rik van Riel,
	Hugh Dickins, Michal Hocko, Dave Hansen, Zhu Yanhai, linux-mm

* Ying Han <yinghan@google.com> [2011-05-20 15:01:58]:

> The new API exports numa_maps per-memcg basis. This is a piece of useful
> information where it exports per-memcg page distribution across real numa
> nodes.
> 
> One of the usecase is evaluating application performance by combining this
> information w/ the cpu allocation to the application.
> 
> The output of the memory.numastat tries to follow w/ simiar format of numa_maps
> like:
> 
> total=<total pages> N0=<node 0 pages> N1=<node 1 pages> ...
> file=<total file pages> N0=<node 0 pages> N1=<node 1 pages> ...
> anon=<total anon pages> N0=<node 0 pages> N1=<node 1 pages> ...
> unevictable=<total anon pages> N0=<node 0 pages> N1=<node 1 pages> ...
> 
> And we have per-node:
> total = file + anon + unevictable
> 
> $ cat /dev/cgroup/memory/memory.numa_stat
> total=250020 N0=87620 N1=52367 N2=45298 N3=64735
> file=225232 N0=83402 N1=46160 N2=40522 N3=55148
> anon=21053 N0=3424 N1=6207 N2=4776 N3=6646
> unevictable=3735 N0=794 N1=0 N2=0 N3=2941
> 
> This patch is based on mmotm-2011-05-06-16-39
> 
> change v5..v4:
> 1. disable the API non-NUMA kernel.
> 
> change v4..v3:
> 1. add per-node "unevictable" value.
> 2. change the functions to be static.
> 
> change v3..v2:
> 1. calculate the "total" based on the per-memcg lru size instead of rss+cache.
> this makes the "total" value to be consistant w/ the per-node values follows
> after.
> 
> change v2..v1:
> 1. add also the file and anon pages on per-node distribution.
> 
> Signed-off-by: Ying Han <yinghan@google.com>
> Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
> Acked-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
> ---
>  mm/memcontrol.c |  155 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 files changed, 155 insertions(+), 0 deletions(-)
> 
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index e14677c..ced414b 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -1162,6 +1162,93 @@ unsigned long mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *memcg,
>  	return MEM_CGROUP_ZSTAT(mz, lru);
>  }
> 
> +#ifdef CONFIG_NUMA
> +static unsigned long mem_cgroup_node_nr_file_lru_pages(struct mem_cgroup *memcg,
> +							int nid)
> +{
> +	unsigned long ret;
> +
> +	ret = mem_cgroup_get_zonestat_node(memcg, nid, LRU_INACTIVE_FILE) +
> +		mem_cgroup_get_zonestat_node(memcg, nid, LRU_ACTIVE_FILE);
> +
> +	return ret;
> +}
> +
> +static unsigned long mem_cgroup_nr_file_lru_pages(struct mem_cgroup *memcg)
> +{
> +	u64 total = 0;
> +	int nid;
> +
> +	for_each_node_state(nid, N_HIGH_MEMORY)
> +		total += mem_cgroup_node_nr_file_lru_pages(memcg, nid);
> +
> +	return total;
> +}
> +
> +static unsigned long mem_cgroup_node_nr_anon_lru_pages(struct mem_cgroup *memcg,
> +							int nid)
> +{
> +	unsigned long ret;
> +
> +	ret = mem_cgroup_get_zonestat_node(memcg, nid, LRU_INACTIVE_ANON) +
> +		mem_cgroup_get_zonestat_node(memcg, nid, LRU_ACTIVE_ANON);
> +
> +	return ret;
> +}
> +
> +static unsigned long mem_cgroup_nr_anon_lru_pages(struct mem_cgroup *memcg)
> +{
> +	u64 total = 0;
> +	int nid;
> +
> +	for_each_node_state(nid, N_HIGH_MEMORY)
> +		total += mem_cgroup_node_nr_anon_lru_pages(memcg, nid);
> +
> +	return total;
> +}
> +
> +static unsigned long
> +mem_cgroup_node_nr_unevictable_lru_pages(struct mem_cgroup *memcg, int nid)
> +{
> +	return mem_cgroup_get_zonestat_node(memcg, nid, LRU_UNEVICTABLE);
> +}
> +
> +static unsigned long
> +mem_cgroup_nr_unevictable_lru_pages(struct mem_cgroup *memcg)
> +{
> +	u64 total = 0;
> +	int nid;
> +
> +	for_each_node_state(nid, N_HIGH_MEMORY)
> +		total += mem_cgroup_node_nr_unevictable_lru_pages(memcg, nid);
> +
> +	return total;
> +}
> +
> +static unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
> +							int nid)
> +{
> +	enum lru_list l;
> +	u64 total = 0;
> +
> +	for_each_lru(l)
> +		total += mem_cgroup_get_zonestat_node(memcg, nid, l);
> +
> +	return total;
> +}
> +
> +static unsigned long mem_cgroup_nr_lru_pages(struct mem_cgroup *memcg)
> +{
> +	u64 total = 0;
> +	int nid;
> +
> +	for_each_node_state(nid, N_HIGH_MEMORY)
> +		total += mem_cgroup_node_nr_lru_pages(memcg, nid);
> +
> +	return total;
> +}
> +#endif /* CONFIG_NUMA */
> +
>  struct zone_reclaim_stat *mem_cgroup_get_reclaim_stat(struct mem_cgroup *memcg,
>  						      struct zone *zone)
>  {
> @@ -4048,6 +4135,51 @@ mem_cgroup_get_total_stat(struct mem_cgroup *mem, struct mcs_total_stat *s)
>  		mem_cgroup_get_local_stat(iter, s);
>  }
> 
> +#ifdef CONFIG_NUMA
> +static int mem_control_numa_stat_show(struct seq_file *m, void *arg)
> +{
> +	int nid;
> +	unsigned long total_nr, file_nr, anon_nr, unevictable_nr;
> +	unsigned long node_nr;
> +	struct cgroup *cont = m->private;
> +	struct mem_cgroup *mem_cont = mem_cgroup_from_cont(cont);
> +
> +	total_nr = mem_cgroup_nr_lru_pages(mem_cont);
> +	seq_printf(m, "total=%lu", total_nr);
> +	for_each_node_state(nid, N_HIGH_MEMORY) {
> +		node_nr = mem_cgroup_node_nr_lru_pages(mem_cont, nid);
> +		seq_printf(m, " N%d=%lu", nid, node_nr);
> +	}
> +	seq_putc(m, '\n');
> +
> +	file_nr = mem_cgroup_nr_file_lru_pages(mem_cont);
> +	seq_printf(m, "file=%lu", file_nr);
> +	for_each_node_state(nid, N_HIGH_MEMORY) {
> +		node_nr = mem_cgroup_node_nr_file_lru_pages(mem_cont, nid);
> +		seq_printf(m, " N%d=%lu", nid, node_nr);
> +	}
> +	seq_putc(m, '\n');
> +
> +	anon_nr = mem_cgroup_nr_anon_lru_pages(mem_cont);
> +	seq_printf(m, "anon=%lu", anon_nr);
> +	for_each_node_state(nid, N_HIGH_MEMORY) {
> +		node_nr = mem_cgroup_node_nr_anon_lru_pages(mem_cont, nid);
> +		seq_printf(m, " N%d=%lu", nid, node_nr);
> +	}
> +	seq_putc(m, '\n');
> +
> +	unevictable_nr = mem_cgroup_nr_unevictable_lru_pages(mem_cont);
> +	seq_printf(m, "unevictable=%lu", unevictable_nr);
> +	for_each_node_state(nid, N_HIGH_MEMORY) {
> +		node_nr = mem_cgroup_node_nr_unevictable_lru_pages(mem_cont,
> +									nid);
> +		seq_printf(m, " N%d=%lu", nid, node_nr);
> +	}
> +	seq_putc(m, '\n');
> +	return 0;
> +}
> +#endif /* CONFIG_NUMA */
> +
>  static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft,
>  				 struct cgroup_map_cb *cb)
>  {
> @@ -4058,6 +4190,7 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft,
>  	memset(&mystat, 0, sizeof(mystat));
>  	mem_cgroup_get_local_stat(mem_cont, &mystat);
> 
> +
>  	for (i = 0; i < NR_MCS_STAT; i++) {
>  		if (i == MCS_SWAP && !do_swap_account)
>  			continue;
> @@ -4481,6 +4614,22 @@ static int mem_cgroup_oom_control_write(struct cgroup *cgrp,
>  	return 0;
>  }
> 
> +#ifdef CONFIG_NUMA
> +static const struct file_operations mem_control_numa_stat_file_operations = {
> +	.read = seq_read,
> +	.llseek = seq_lseek,
> +	.release = single_release,
> +};
> +

Do we need this?


> +static int mem_control_numa_stat_open(struct inode *unused, struct file *file)
> +{
> +	struct cgroup *cont = file->f_dentry->d_parent->d_fsdata;
> +
> +	file->f_op = &mem_control_numa_stat_file_operations;
> +	return single_open(file, mem_control_numa_stat_show, cont);
> +}
> +#endif /* CONFIG_NUMA */
> +
>  static struct cftype mem_cgroup_files[] = {
>  	{
>  		.name = "usage_in_bytes",
> @@ -4544,6 +4693,12 @@ static struct cftype mem_cgroup_files[] = {
>  		.unregister_event = mem_cgroup_oom_unregister_event,
>  		.private = MEMFILE_PRIVATE(_OOM_TYPE, OOM_CONTROL),
>  	},
> +#ifdef CONFIG_NUMA
> +	{
> +		.name = "numa_stat",
> +		.open = mem_control_numa_stat_open,
> +	},
> +#endif

Can't we do this the way we do the stats file? Please see
mem_control_stat_show().

>  };
> 
>  #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
> -- 
> 1.7.3.1
> 

-- 
	Three Cheers,
	Balbir

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH V5] memcg: add memory.numastat api for numa statistics
  2011-05-24 15:46 ` Balbir Singh
@ 2011-05-24 16:54   ` Ying Han
  2011-05-25  3:25     ` Balbir Singh
  0 siblings, 1 reply; 7+ messages in thread
From: Ying Han @ 2011-05-24 16:54 UTC (permalink / raw)
  To: balbir
  Cc: KOSAKI Motohiro, Minchan Kim, Daisuke Nishimura, Tejun Heo,
	Pavel Emelyanov, KAMEZAWA Hiroyuki, Andrew Morton, Li Zefan,
	Mel Gorman, Christoph Lameter, Johannes Weiner, Rik van Riel,
	Hugh Dickins, Michal Hocko, Dave Hansen, Zhu Yanhai, linux-mm

[-- Attachment #1: Type: text/plain, Size: 9456 bytes --]

On Tue, May 24, 2011 at 8:46 AM, Balbir Singh <balbir@linux.vnet.ibm.com>wrote:

> * Ying Han <yinghan@google.com> [2011-05-20 15:01:58]:
>
> > The new API exports numa_maps per-memcg basis. This is a piece of useful
> > information where it exports per-memcg page distribution across real numa
> > nodes.
> >
> > One of the usecase is evaluating application performance by combining
> this
> > information w/ the cpu allocation to the application.
> >
> > The output of the memory.numastat tries to follow w/ simiar format of
> numa_maps
> > like:
> >
> > total=<total pages> N0=<node 0 pages> N1=<node 1 pages> ...
> > file=<total file pages> N0=<node 0 pages> N1=<node 1 pages> ...
> > anon=<total anon pages> N0=<node 0 pages> N1=<node 1 pages> ...
> > unevictable=<total anon pages> N0=<node 0 pages> N1=<node 1 pages> ...
> >
> > And we have per-node:
> > total = file + anon + unevictable
> >
> > $ cat /dev/cgroup/memory/memory.numa_stat
> > total=250020 N0=87620 N1=52367 N2=45298 N3=64735
> > file=225232 N0=83402 N1=46160 N2=40522 N3=55148
> > anon=21053 N0=3424 N1=6207 N2=4776 N3=6646
> > unevictable=3735 N0=794 N1=0 N2=0 N3=2941
> >
> > This patch is based on mmotm-2011-05-06-16-39
> >
> > change v5..v4:
> > 1. disable the API non-NUMA kernel.
> >
> > change v4..v3:
> > 1. add per-node "unevictable" value.
> > 2. change the functions to be static.
> >
> > change v3..v2:
> > 1. calculate the "total" based on the per-memcg lru size instead of
> rss+cache.
> > this makes the "total" value to be consistant w/ the per-node values
> follows
> > after.
> >
> > change v2..v1:
> > 1. add also the file and anon pages on per-node distribution.
> >
> > Signed-off-by: Ying Han <yinghan@google.com>
> > Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
> > Acked-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
> > ---
> >  mm/memcontrol.c |  155
> +++++++++++++++++++++++++++++++++++++++++++++++++++++++
> >  1 files changed, 155 insertions(+), 0 deletions(-)
> >
> > diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> > index e14677c..ced414b 100644
> > --- a/mm/memcontrol.c
> > +++ b/mm/memcontrol.c
> > @@ -1162,6 +1162,93 @@ unsigned long mem_cgroup_zone_nr_lru_pages(struct
> mem_cgroup *memcg,
> >       return MEM_CGROUP_ZSTAT(mz, lru);
> >  }
> >
> > +#ifdef CONFIG_NUMA
> > +static unsigned long mem_cgroup_node_nr_file_lru_pages(struct mem_cgroup
> *memcg,
> > +                                                     int nid)
> > +{
> > +     unsigned long ret;
> > +
> > +     ret = mem_cgroup_get_zonestat_node(memcg, nid, LRU_INACTIVE_FILE) +
> > +             mem_cgroup_get_zonestat_node(memcg, nid, LRU_ACTIVE_FILE);
> > +
> > +     return ret;
> > +}
> > +
> > +static unsigned long mem_cgroup_nr_file_lru_pages(struct mem_cgroup
> *memcg)
> > +{
> > +     u64 total = 0;
> > +     int nid;
> > +
> > +     for_each_node_state(nid, N_HIGH_MEMORY)
> > +             total += mem_cgroup_node_nr_file_lru_pages(memcg, nid);
> > +
> > +     return total;
> > +}
> > +
> > +static unsigned long mem_cgroup_node_nr_anon_lru_pages(struct mem_cgroup
> *memcg,
> > +                                                     int nid)
> > +{
> > +     unsigned long ret;
> > +
> > +     ret = mem_cgroup_get_zonestat_node(memcg, nid, LRU_INACTIVE_ANON) +
> > +             mem_cgroup_get_zonestat_node(memcg, nid, LRU_ACTIVE_ANON);
> > +
> > +     return ret;
> > +}
> > +
> > +static unsigned long mem_cgroup_nr_anon_lru_pages(struct mem_cgroup
> *memcg)
> > +{
> > +     u64 total = 0;
> > +     int nid;
> > +
> > +     for_each_node_state(nid, N_HIGH_MEMORY)
> > +             total += mem_cgroup_node_nr_anon_lru_pages(memcg, nid);
> > +
> > +     return total;
> > +}
> > +
> > +static unsigned long
> > +mem_cgroup_node_nr_unevictable_lru_pages(struct mem_cgroup *memcg, int
> nid)
> > +{
> > +     return mem_cgroup_get_zonestat_node(memcg, nid, LRU_UNEVICTABLE);
> > +}
> > +
> > +static unsigned long
> > +mem_cgroup_nr_unevictable_lru_pages(struct mem_cgroup *memcg)
> > +{
> > +     u64 total = 0;
> > +     int nid;
> > +
> > +     for_each_node_state(nid, N_HIGH_MEMORY)
> > +             total += mem_cgroup_node_nr_unevictable_lru_pages(memcg,
> nid);
> > +
> > +     return total;
> > +}
> > +
> > +static unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup
> *memcg,
> > +                                                     int nid)
> > +{
> > +     enum lru_list l;
> > +     u64 total = 0;
> > +
> > +     for_each_lru(l)
> > +             total += mem_cgroup_get_zonestat_node(memcg, nid, l);
> > +
> > +     return total;
> > +}
> > +
> > +static unsigned long mem_cgroup_nr_lru_pages(struct mem_cgroup *memcg)
> > +{
> > +     u64 total = 0;
> > +     int nid;
> > +
> > +     for_each_node_state(nid, N_HIGH_MEMORY)
> > +             total += mem_cgroup_node_nr_lru_pages(memcg, nid);
> > +
> > +     return total;
> > +}
> > +#endif /* CONFIG_NUMA */
> > +
> >  struct zone_reclaim_stat *mem_cgroup_get_reclaim_stat(struct mem_cgroup
> *memcg,
> >                                                     struct zone *zone)
> >  {
> > @@ -4048,6 +4135,51 @@ mem_cgroup_get_total_stat(struct mem_cgroup *mem,
> struct mcs_total_stat *s)
> >               mem_cgroup_get_local_stat(iter, s);
> >  }
> >
> > +#ifdef CONFIG_NUMA
> > +static int mem_control_numa_stat_show(struct seq_file *m, void *arg)
> > +{
> > +     int nid;
> > +     unsigned long total_nr, file_nr, anon_nr, unevictable_nr;
> > +     unsigned long node_nr;
> > +     struct cgroup *cont = m->private;
> > +     struct mem_cgroup *mem_cont = mem_cgroup_from_cont(cont);
> > +
> > +     total_nr = mem_cgroup_nr_lru_pages(mem_cont);
> > +     seq_printf(m, "total=%lu", total_nr);
> > +     for_each_node_state(nid, N_HIGH_MEMORY) {
> > +             node_nr = mem_cgroup_node_nr_lru_pages(mem_cont, nid);
> > +             seq_printf(m, " N%d=%lu", nid, node_nr);
> > +     }
> > +     seq_putc(m, '\n');
> > +
> > +     file_nr = mem_cgroup_nr_file_lru_pages(mem_cont);
> > +     seq_printf(m, "file=%lu", file_nr);
> > +     for_each_node_state(nid, N_HIGH_MEMORY) {
> > +             node_nr = mem_cgroup_node_nr_file_lru_pages(mem_cont, nid);
> > +             seq_printf(m, " N%d=%lu", nid, node_nr);
> > +     }
> > +     seq_putc(m, '\n');
> > +
> > +     anon_nr = mem_cgroup_nr_anon_lru_pages(mem_cont);
> > +     seq_printf(m, "anon=%lu", anon_nr);
> > +     for_each_node_state(nid, N_HIGH_MEMORY) {
> > +             node_nr = mem_cgroup_node_nr_anon_lru_pages(mem_cont, nid);
> > +             seq_printf(m, " N%d=%lu", nid, node_nr);
> > +     }
> > +     seq_putc(m, '\n');
> > +
> > +     unevictable_nr = mem_cgroup_nr_unevictable_lru_pages(mem_cont);
> > +     seq_printf(m, "unevictable=%lu", unevictable_nr);
> > +     for_each_node_state(nid, N_HIGH_MEMORY) {
> > +             node_nr =
> mem_cgroup_node_nr_unevictable_lru_pages(mem_cont,
> > +
> nid);
> > +             seq_printf(m, " N%d=%lu", nid, node_nr);
> > +     }
> > +     seq_putc(m, '\n');
> > +     return 0;
> > +}
> > +#endif /* CONFIG_NUMA */
> > +
> >  static int mem_control_stat_show(struct cgroup *cont, struct cftype
> *cft,
> >                                struct cgroup_map_cb *cb)
> >  {
> > @@ -4058,6 +4190,7 @@ static int mem_control_stat_show(struct cgroup
> *cont, struct cftype *cft,
> >       memset(&mystat, 0, sizeof(mystat));
> >       mem_cgroup_get_local_stat(mem_cont, &mystat);
> >
> > +
> >       for (i = 0; i < NR_MCS_STAT; i++) {
> >               if (i == MCS_SWAP && !do_swap_account)
> >                       continue;
> > @@ -4481,6 +4614,22 @@ static int mem_cgroup_oom_control_write(struct
> cgroup *cgrp,
> >       return 0;
> >  }
> >
> > +#ifdef CONFIG_NUMA
> > +static const struct file_operations
> mem_control_numa_stat_file_operations = {
> > +     .read = seq_read,
> > +     .llseek = seq_lseek,
> > +     .release = single_release,
> > +};
> > +
>
> Do we need this?
>
>
> > +static int mem_control_numa_stat_open(struct inode *unused, struct file
> *file)
> > +{
> > +     struct cgroup *cont = file->f_dentry->d_parent->d_fsdata;
> > +
> > +     file->f_op = &mem_control_numa_stat_file_operations;
> > +     return single_open(file, mem_control_numa_stat_show, cont);
> > +}
> > +#endif /* CONFIG_NUMA */
> > +
> >  static struct cftype mem_cgroup_files[] = {
> >       {
> >               .name = "usage_in_bytes",
> > @@ -4544,6 +4693,12 @@ static struct cftype mem_cgroup_files[] = {
> >               .unregister_event = mem_cgroup_oom_unregister_event,
> >               .private = MEMFILE_PRIVATE(_OOM_TYPE, OOM_CONTROL),
> >       },
> > +#ifdef CONFIG_NUMA
> > +     {
> > +             .name = "numa_stat",
> > +             .open = mem_control_numa_stat_open,
> > +     },
> > +#endif
>
> Can't we do this the way we do the stats file? Please see
> mem_control_stat_show().
>

I looked that earlier but can not get the formating working as well as the
seq_*. Is there a particular reason we prefer one than the other?

Thanks
--Ying


>
> >  };
> >
> >  #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
> > --
> > 1.7.3.1
> >
>
> --
>        Three Cheers,
>         Balbir
>
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majordomo@kvack.org.  For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Fight unfair telecom internet charges in Canada: sign
> http://stopthemeter.ca/
> Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
>

[-- Attachment #2: Type: text/html, Size: 12344 bytes --]

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH V5] memcg: add memory.numastat api for numa statistics
  2011-05-24 16:54   ` Ying Han
@ 2011-05-25  3:25     ` Balbir Singh
  0 siblings, 0 replies; 7+ messages in thread
From: Balbir Singh @ 2011-05-25  3:25 UTC (permalink / raw)
  To: Ying Han
  Cc: KOSAKI Motohiro, Minchan Kim, Daisuke Nishimura, Tejun Heo,
	Pavel Emelyanov, KAMEZAWA Hiroyuki, Andrew Morton, Li Zefan,
	Mel Gorman, Christoph Lameter, Johannes Weiner, Rik van Riel,
	Hugh Dickins, Michal Hocko, Dave Hansen, Zhu Yanhai, linux-mm

* Ying Han <yinghan@google.com> [2011-05-24 09:54:43]:

> > >  static struct cftype mem_cgroup_files[] = {
> > >       {
> > >               .name = "usage_in_bytes",
> > > @@ -4544,6 +4693,12 @@ static struct cftype mem_cgroup_files[] = {
> > >               .unregister_event = mem_cgroup_oom_unregister_event,
> > >               .private = MEMFILE_PRIVATE(_OOM_TYPE, OOM_CONTROL),
> > >       },
> > > +#ifdef CONFIG_NUMA
> > > +     {
> > > +             .name = "numa_stat",
> > > +             .open = mem_control_numa_stat_open,
> > > +     },
> > > +#endif
> >
> > Can't we do this the way we do the stats file? Please see
> > mem_control_stat_show().
> >
> 
> I looked that earlier but can not get the formating working as well as the
> seq_*. Is there a particular reason we prefer one than the other?
>

Fair enough, I wanted to avoid repeating what kernel/cgroup.c already
does in terms of formatting output.

 
Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com>
 

-- 
	Three Cheers,
	Balbir

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2011-05-25 11:36 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-05-20 22:01 [PATCH V5] memcg: add memory.numastat api for numa statistics Ying Han
2011-05-20 23:21 ` Rik van Riel
2011-05-23  0:08 ` KAMEZAWA Hiroyuki
2011-05-23 20:53   ` Ying Han
2011-05-24 15:46 ` Balbir Singh
2011-05-24 16:54   ` Ying Han
2011-05-25  3:25     ` Balbir Singh

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.