All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] mm: memcontrol: Add the missing numa stat of anon and file for cgroup v2
@ 2020-09-10  8:42 Muchun Song
  2020-09-10 16:01   ` Shakeel Butt
  0 siblings, 1 reply; 15+ messages in thread
From: Muchun Song @ 2020-09-10  8:42 UTC (permalink / raw)
  To: hannes, mhocko, vdavydov.dev, akpm
  Cc: cgroups, linux-mm, linux-kernel, Muchun Song

In the cgroup v1, we have a numa_stat interface. This is useful for
providing visibility into the numa locality information within an
memcg since the pages are allowed to be allocated from any physical
node. One of the use cases is evaluating application performance by
combining this information with the application's CPU allocation.
But the cgroup v2 does not. So this patch adds the missing information.

Signed-off-by: Muchun Song <songmuchun@bytedance.com>
---
 mm/memcontrol.c | 46 ++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 44 insertions(+), 2 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 75cd1a1e66c8..c779673f29b2 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1492,10 +1492,34 @@ static bool mem_cgroup_wait_acct_move(struct mem_cgroup *memcg)
 	return false;
 }
 
+#ifdef CONFIG_NUMA
+static unsigned long memcg_node_page_state(struct mem_cgroup *memcg,
+					   unsigned int nid,
+					   enum node_stat_item idx)
+{
+	long x;
+	struct mem_cgroup_per_node *pn;
+	struct lruvec *lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(nid));
+
+	VM_BUG_ON(nid >= nr_node_ids);
+
+	pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
+	x = atomic_long_read(&pn->lruvec_stat[idx]);
+#ifdef CONFIG_SMP
+	if (x < 0)
+		x = 0;
+#endif
+	return x;
+}
+#endif
+
 static char *memory_stat_format(struct mem_cgroup *memcg)
 {
 	struct seq_buf s;
 	int i;
+#ifdef CONFIG_NUMA
+	int nid;
+#endif
 
 	seq_buf_init(&s, kmalloc(PAGE_SIZE, GFP_KERNEL), PAGE_SIZE);
 	if (!s.buffer)
@@ -1512,12 +1536,30 @@ static char *memory_stat_format(struct mem_cgroup *memcg)
 	 * Current memory state:
 	 */
 
-	seq_buf_printf(&s, "anon %llu\n",
+	seq_buf_printf(&s, "anon %llu",
 		       (u64)memcg_page_state(memcg, NR_ANON_MAPPED) *
 		       PAGE_SIZE);
-	seq_buf_printf(&s, "file %llu\n",
+#ifdef CONFIG_NUMA
+	for_each_node_state(nid, N_MEMORY)
+		seq_buf_printf(&s, " N%d=%llu", nid,
+			       (u64)memcg_node_page_state(memcg, nid,
+							  NR_ANON_MAPPED) *
+			       PAGE_SIZE);
+#endif
+	seq_buf_putc(&s, '\n');
+
+	seq_buf_printf(&s, "file %llu",
 		       (u64)memcg_page_state(memcg, NR_FILE_PAGES) *
 		       PAGE_SIZE);
+#ifdef CONFIG_NUMA
+	for_each_node_state(nid, N_MEMORY)
+		seq_buf_printf(&s, " N%d=%llu", nid,
+			       (u64)memcg_node_page_state(memcg, nid,
+							  NR_FILE_PAGES) *
+			       PAGE_SIZE);
+#endif
+	seq_buf_putc(&s, '\n');
+
 	seq_buf_printf(&s, "kernel_stack %llu\n",
 		       (u64)memcg_page_state(memcg, NR_KERNEL_STACK_KB) *
 		       1024);
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH] mm: memcontrol: Add the missing numa stat of anon and file for cgroup v2
  2020-09-10  8:42 [PATCH] mm: memcontrol: Add the missing numa stat of anon and file for cgroup v2 Muchun Song
@ 2020-09-10 16:01   ` Shakeel Butt
  0 siblings, 0 replies; 15+ messages in thread
From: Shakeel Butt @ 2020-09-10 16:01 UTC (permalink / raw)
  To: Muchun Song
  Cc: Johannes Weiner, Michal Hocko, Vladimir Davydov, Andrew Morton,
	Cgroups, Linux MM, LKML

On Thu, Sep 10, 2020 at 1:46 AM Muchun Song <songmuchun@bytedance.com> wrote:
>
> In the cgroup v1, we have a numa_stat interface. This is useful for
> providing visibility into the numa locality information within an
> memcg since the pages are allowed to be allocated from any physical
> node. One of the use cases is evaluating application performance by
> combining this information with the application's CPU allocation.
> But the cgroup v2 does not. So this patch adds the missing information.
>
> Signed-off-by: Muchun Song <songmuchun@bytedance.com>
> ---

I am actually working on exposing this info on v2 as well.

>  mm/memcontrol.c | 46 ++++++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 44 insertions(+), 2 deletions(-)
>
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 75cd1a1e66c8..c779673f29b2 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -1492,10 +1492,34 @@ static bool mem_cgroup_wait_acct_move(struct mem_cgroup *memcg)
>         return false;
>  }
>
> +#ifdef CONFIG_NUMA
> +static unsigned long memcg_node_page_state(struct mem_cgroup *memcg,
> +                                          unsigned int nid,
> +                                          enum node_stat_item idx)
> +{
> +       long x;
> +       struct mem_cgroup_per_node *pn;
> +       struct lruvec *lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(nid));
> +
> +       VM_BUG_ON(nid >= nr_node_ids);
> +
> +       pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
> +       x = atomic_long_read(&pn->lruvec_stat[idx]);
> +#ifdef CONFIG_SMP
> +       if (x < 0)
> +               x = 0;
> +#endif
> +       return x;
> +}
> +#endif
> +
>  static char *memory_stat_format(struct mem_cgroup *memcg)
>  {
>         struct seq_buf s;
>         int i;
> +#ifdef CONFIG_NUMA
> +       int nid;
> +#endif
>
>         seq_buf_init(&s, kmalloc(PAGE_SIZE, GFP_KERNEL), PAGE_SIZE);
>         if (!s.buffer)
> @@ -1512,12 +1536,30 @@ static char *memory_stat_format(struct mem_cgroup *memcg)
>          * Current memory state:
>          */
>

Let's not break the parsers of memory.stat. I would prefer a separate
interface like v1 i.e. memory.numa_stat.

> -       seq_buf_printf(&s, "anon %llu\n",
> +       seq_buf_printf(&s, "anon %llu",
>                        (u64)memcg_page_state(memcg, NR_ANON_MAPPED) *
>                        PAGE_SIZE);
> -       seq_buf_printf(&s, "file %llu\n",
> +#ifdef CONFIG_NUMA
> +       for_each_node_state(nid, N_MEMORY)
> +               seq_buf_printf(&s, " N%d=%llu", nid,
> +                              (u64)memcg_node_page_state(memcg, nid,
> +                                                         NR_ANON_MAPPED) *
> +                              PAGE_SIZE);
> +#endif
> +       seq_buf_putc(&s, '\n');
> +
> +       seq_buf_printf(&s, "file %llu",
>                        (u64)memcg_page_state(memcg, NR_FILE_PAGES) *
>                        PAGE_SIZE);
> +#ifdef CONFIG_NUMA
> +       for_each_node_state(nid, N_MEMORY)
> +               seq_buf_printf(&s, " N%d=%llu", nid,
> +                              (u64)memcg_node_page_state(memcg, nid,
> +                                                         NR_FILE_PAGES) *
> +                              PAGE_SIZE);
> +#endif
> +       seq_buf_putc(&s, '\n');
> +

The v1's numa_stat exposes the LRUs, why NR_ANON_MAPPED and NR_FILE_PAGES?

Also I think exposing slab_[un]reclaimable per node would be beneficial as well.

>         seq_buf_printf(&s, "kernel_stack %llu\n",
>                        (u64)memcg_page_state(memcg, NR_KERNEL_STACK_KB) *
>                        1024);
> --
> 2.20.1
>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] mm: memcontrol: Add the missing numa stat of anon and file for cgroup v2
@ 2020-09-10 16:01   ` Shakeel Butt
  0 siblings, 0 replies; 15+ messages in thread
From: Shakeel Butt @ 2020-09-10 16:01 UTC (permalink / raw)
  To: Muchun Song
  Cc: Johannes Weiner, Michal Hocko, Vladimir Davydov, Andrew Morton,
	Cgroups, Linux MM, LKML

On Thu, Sep 10, 2020 at 1:46 AM Muchun Song <songmuchun@bytedance.com> wrote:
>
> In the cgroup v1, we have a numa_stat interface. This is useful for
> providing visibility into the numa locality information within an
> memcg since the pages are allowed to be allocated from any physical
> node. One of the use cases is evaluating application performance by
> combining this information with the application's CPU allocation.
> But the cgroup v2 does not. So this patch adds the missing information.
>
> Signed-off-by: Muchun Song <songmuchun@bytedance.com>
> ---

I am actually working on exposing this info on v2 as well.

>  mm/memcontrol.c | 46 ++++++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 44 insertions(+), 2 deletions(-)
>
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 75cd1a1e66c8..c779673f29b2 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -1492,10 +1492,34 @@ static bool mem_cgroup_wait_acct_move(struct mem_cgroup *memcg)
>         return false;
>  }
>
> +#ifdef CONFIG_NUMA
> +static unsigned long memcg_node_page_state(struct mem_cgroup *memcg,
> +                                          unsigned int nid,
> +                                          enum node_stat_item idx)
> +{
> +       long x;
> +       struct mem_cgroup_per_node *pn;
> +       struct lruvec *lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(nid));
> +
> +       VM_BUG_ON(nid >= nr_node_ids);
> +
> +       pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
> +       x = atomic_long_read(&pn->lruvec_stat[idx]);
> +#ifdef CONFIG_SMP
> +       if (x < 0)
> +               x = 0;
> +#endif
> +       return x;
> +}
> +#endif
> +
>  static char *memory_stat_format(struct mem_cgroup *memcg)
>  {
>         struct seq_buf s;
>         int i;
> +#ifdef CONFIG_NUMA
> +       int nid;
> +#endif
>
>         seq_buf_init(&s, kmalloc(PAGE_SIZE, GFP_KERNEL), PAGE_SIZE);
>         if (!s.buffer)
> @@ -1512,12 +1536,30 @@ static char *memory_stat_format(struct mem_cgroup *memcg)
>          * Current memory state:
>          */
>

Let's not break the parsers of memory.stat. I would prefer a separate
interface like v1 i.e. memory.numa_stat.

> -       seq_buf_printf(&s, "anon %llu\n",
> +       seq_buf_printf(&s, "anon %llu",
>                        (u64)memcg_page_state(memcg, NR_ANON_MAPPED) *
>                        PAGE_SIZE);
> -       seq_buf_printf(&s, "file %llu\n",
> +#ifdef CONFIG_NUMA
> +       for_each_node_state(nid, N_MEMORY)
> +               seq_buf_printf(&s, " N%d=%llu", nid,
> +                              (u64)memcg_node_page_state(memcg, nid,
> +                                                         NR_ANON_MAPPED) *
> +                              PAGE_SIZE);
> +#endif
> +       seq_buf_putc(&s, '\n');
> +
> +       seq_buf_printf(&s, "file %llu",
>                        (u64)memcg_page_state(memcg, NR_FILE_PAGES) *
>                        PAGE_SIZE);
> +#ifdef CONFIG_NUMA
> +       for_each_node_state(nid, N_MEMORY)
> +               seq_buf_printf(&s, " N%d=%llu", nid,
> +                              (u64)memcg_node_page_state(memcg, nid,
> +                                                         NR_FILE_PAGES) *
> +                              PAGE_SIZE);
> +#endif
> +       seq_buf_putc(&s, '\n');
> +

The v1's numa_stat exposes the LRUs, why NR_ANON_MAPPED and NR_FILE_PAGES?

Also I think exposing slab_[un]reclaimable per node would be beneficial as well.

>         seq_buf_printf(&s, "kernel_stack %llu\n",
>                        (u64)memcg_page_state(memcg, NR_KERNEL_STACK_KB) *
>                        1024);
> --
> 2.20.1
>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [External] Re: [PATCH] mm: memcontrol: Add the missing numa stat of anon and file for cgroup v2
  2020-09-10 16:01   ` Shakeel Butt
  (?)
@ 2020-09-11  3:51     ` Muchun Song
  -1 siblings, 0 replies; 15+ messages in thread
From: Muchun Song @ 2020-09-11  3:51 UTC (permalink / raw)
  To: Shakeel Butt
  Cc: Johannes Weiner, Michal Hocko, Vladimir Davydov, Andrew Morton,
	Cgroups, Linux MM, LKML

On Fri, Sep 11, 2020 at 12:02 AM Shakeel Butt <shakeelb@google.com> wrote:
>
> On Thu, Sep 10, 2020 at 1:46 AM Muchun Song <songmuchun@bytedance.com> wrote:
> >
> > In the cgroup v1, we have a numa_stat interface. This is useful for
> > providing visibility into the numa locality information within an
> > memcg since the pages are allowed to be allocated from any physical
> > node. One of the use cases is evaluating application performance by
> > combining this information with the application's CPU allocation.
> > But the cgroup v2 does not. So this patch adds the missing information.
> >
> > Signed-off-by: Muchun Song <songmuchun@bytedance.com>
> > ---
>
> I am actually working on exposing this info on v2 as well.
>
> >  mm/memcontrol.c | 46 ++++++++++++++++++++++++++++++++++++++++++++--
> >  1 file changed, 44 insertions(+), 2 deletions(-)
> >
> > diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> > index 75cd1a1e66c8..c779673f29b2 100644
> > --- a/mm/memcontrol.c
> > +++ b/mm/memcontrol.c
> > @@ -1492,10 +1492,34 @@ static bool mem_cgroup_wait_acct_move(struct mem_cgroup *memcg)
> >         return false;
> >  }
> >
> > +#ifdef CONFIG_NUMA
> > +static unsigned long memcg_node_page_state(struct mem_cgroup *memcg,
> > +                                          unsigned int nid,
> > +                                          enum node_stat_item idx)
> > +{
> > +       long x;
> > +       struct mem_cgroup_per_node *pn;
> > +       struct lruvec *lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(nid));
> > +
> > +       VM_BUG_ON(nid >= nr_node_ids);
> > +
> > +       pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
> > +       x = atomic_long_read(&pn->lruvec_stat[idx]);
> > +#ifdef CONFIG_SMP
> > +       if (x < 0)
> > +               x = 0;
> > +#endif
> > +       return x;
> > +}
> > +#endif
> > +
> >  static char *memory_stat_format(struct mem_cgroup *memcg)
> >  {
> >         struct seq_buf s;
> >         int i;
> > +#ifdef CONFIG_NUMA
> > +       int nid;
> > +#endif
> >
> >         seq_buf_init(&s, kmalloc(PAGE_SIZE, GFP_KERNEL), PAGE_SIZE);
> >         if (!s.buffer)
> > @@ -1512,12 +1536,30 @@ static char *memory_stat_format(struct mem_cgroup *memcg)
> >          * Current memory state:
> >          */
> >
>
> Let's not break the parsers of memory.stat. I would prefer a separate
> interface like v1 i.e. memory.numa_stat.

It is also a good idea to expose a new interface like memory.numa_stat.

>
> > -       seq_buf_printf(&s, "anon %llu\n",
> > +       seq_buf_printf(&s, "anon %llu",
> >                        (u64)memcg_page_state(memcg, NR_ANON_MAPPED) *
> >                        PAGE_SIZE);
> > -       seq_buf_printf(&s, "file %llu\n",
> > +#ifdef CONFIG_NUMA
> > +       for_each_node_state(nid, N_MEMORY)
> > +               seq_buf_printf(&s, " N%d=%llu", nid,
> > +                              (u64)memcg_node_page_state(memcg, nid,
> > +                                                         NR_ANON_MAPPED) *
> > +                              PAGE_SIZE);
> > +#endif
> > +       seq_buf_putc(&s, '\n');
> > +
> > +       seq_buf_printf(&s, "file %llu",
> >                        (u64)memcg_page_state(memcg, NR_FILE_PAGES) *
> >                        PAGE_SIZE);
> > +#ifdef CONFIG_NUMA
> > +       for_each_node_state(nid, N_MEMORY)
> > +               seq_buf_printf(&s, " N%d=%llu", nid,
> > +                              (u64)memcg_node_page_state(memcg, nid,
> > +                                                         NR_FILE_PAGES) *
> > +                              PAGE_SIZE);
> > +#endif
> > +       seq_buf_putc(&s, '\n');
> > +
>
> The v1's numa_stat exposes the LRUs, why NR_ANON_MAPPED and NR_FILE_PAGES?

If we want to expose the anon per node, we need to add inactive anon and
active anon together. Why not use NR_ANON_MAPPED directly?

>
> Also I think exposing slab_[un]reclaimable per node would be beneficial as well.

Yeah, I agree with you. Maybe kernel_stack and percpu also should
be exposed.

>
> >         seq_buf_printf(&s, "kernel_stack %llu\n",
> >                        (u64)memcg_page_state(memcg, NR_KERNEL_STACK_KB) *
> >                        1024);
> > --
> > 2.20.1
> >



-- 
Yours,
Muchun

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [External] Re: [PATCH] mm: memcontrol: Add the missing numa stat of anon and file for cgroup v2
@ 2020-09-11  3:51     ` Muchun Song
  0 siblings, 0 replies; 15+ messages in thread
From: Muchun Song @ 2020-09-11  3:51 UTC (permalink / raw)
  To: Shakeel Butt
  Cc: Johannes Weiner, Michal Hocko, Vladimir Davydov, Andrew Morton,
	Cgroups, Linux MM, LKML

On Fri, Sep 11, 2020 at 12:02 AM Shakeel Butt <shakeelb@google.com> wrote:
>
> On Thu, Sep 10, 2020 at 1:46 AM Muchun Song <songmuchun@bytedance.com> wrote:
> >
> > In the cgroup v1, we have a numa_stat interface. This is useful for
> > providing visibility into the numa locality information within an
> > memcg since the pages are allowed to be allocated from any physical
> > node. One of the use cases is evaluating application performance by
> > combining this information with the application's CPU allocation.
> > But the cgroup v2 does not. So this patch adds the missing information.
> >
> > Signed-off-by: Muchun Song <songmuchun@bytedance.com>
> > ---
>
> I am actually working on exposing this info on v2 as well.
>
> >  mm/memcontrol.c | 46 ++++++++++++++++++++++++++++++++++++++++++++--
> >  1 file changed, 44 insertions(+), 2 deletions(-)
> >
> > diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> > index 75cd1a1e66c8..c779673f29b2 100644
> > --- a/mm/memcontrol.c
> > +++ b/mm/memcontrol.c
> > @@ -1492,10 +1492,34 @@ static bool mem_cgroup_wait_acct_move(struct mem_cgroup *memcg)
> >         return false;
> >  }
> >
> > +#ifdef CONFIG_NUMA
> > +static unsigned long memcg_node_page_state(struct mem_cgroup *memcg,
> > +                                          unsigned int nid,
> > +                                          enum node_stat_item idx)
> > +{
> > +       long x;
> > +       struct mem_cgroup_per_node *pn;
> > +       struct lruvec *lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(nid));
> > +
> > +       VM_BUG_ON(nid >= nr_node_ids);
> > +
> > +       pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
> > +       x = atomic_long_read(&pn->lruvec_stat[idx]);
> > +#ifdef CONFIG_SMP
> > +       if (x < 0)
> > +               x = 0;
> > +#endif
> > +       return x;
> > +}
> > +#endif
> > +
> >  static char *memory_stat_format(struct mem_cgroup *memcg)
> >  {
> >         struct seq_buf s;
> >         int i;
> > +#ifdef CONFIG_NUMA
> > +       int nid;
> > +#endif
> >
> >         seq_buf_init(&s, kmalloc(PAGE_SIZE, GFP_KERNEL), PAGE_SIZE);
> >         if (!s.buffer)
> > @@ -1512,12 +1536,30 @@ static char *memory_stat_format(struct mem_cgroup *memcg)
> >          * Current memory state:
> >          */
> >
>
> Let's not break the parsers of memory.stat. I would prefer a separate
> interface like v1 i.e. memory.numa_stat.

It is also a good idea to expose a new interface like memory.numa_stat.

>
> > -       seq_buf_printf(&s, "anon %llu\n",
> > +       seq_buf_printf(&s, "anon %llu",
> >                        (u64)memcg_page_state(memcg, NR_ANON_MAPPED) *
> >                        PAGE_SIZE);
> > -       seq_buf_printf(&s, "file %llu\n",
> > +#ifdef CONFIG_NUMA
> > +       for_each_node_state(nid, N_MEMORY)
> > +               seq_buf_printf(&s, " N%d=%llu", nid,
> > +                              (u64)memcg_node_page_state(memcg, nid,
> > +                                                         NR_ANON_MAPPED) *
> > +                              PAGE_SIZE);
> > +#endif
> > +       seq_buf_putc(&s, '\n');
> > +
> > +       seq_buf_printf(&s, "file %llu",
> >                        (u64)memcg_page_state(memcg, NR_FILE_PAGES) *
> >                        PAGE_SIZE);
> > +#ifdef CONFIG_NUMA
> > +       for_each_node_state(nid, N_MEMORY)
> > +               seq_buf_printf(&s, " N%d=%llu", nid,
> > +                              (u64)memcg_node_page_state(memcg, nid,
> > +                                                         NR_FILE_PAGES) *
> > +                              PAGE_SIZE);
> > +#endif
> > +       seq_buf_putc(&s, '\n');
> > +
>
> The v1's numa_stat exposes the LRUs, why NR_ANON_MAPPED and NR_FILE_PAGES?

If we want to expose the anon per node, we need to add inactive anon and
active anon together. Why not use NR_ANON_MAPPED directly?

>
> Also I think exposing slab_[un]reclaimable per node would be beneficial as well.

Yeah, I agree with you. Maybe kernel_stack and percpu also should
be exposed.

>
> >         seq_buf_printf(&s, "kernel_stack %llu\n",
> >                        (u64)memcg_page_state(memcg, NR_KERNEL_STACK_KB) *
> >                        1024);
> > --
> > 2.20.1
> >



-- 
Yours,
Muchun


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [External] Re: [PATCH] mm: memcontrol: Add the missing numa stat of anon and file for cgroup v2
@ 2020-09-11  3:51     ` Muchun Song
  0 siblings, 0 replies; 15+ messages in thread
From: Muchun Song @ 2020-09-11  3:51 UTC (permalink / raw)
  To: Shakeel Butt
  Cc: Johannes Weiner, Michal Hocko, Vladimir Davydov, Andrew Morton,
	Cgroups, Linux MM, LKML

On Fri, Sep 11, 2020 at 12:02 AM Shakeel Butt <shakeelb-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org> wrote:
>
> On Thu, Sep 10, 2020 at 1:46 AM Muchun Song <songmuchun-EC8Uxl6Npydl57MIdRCFDg@public.gmane.org> wrote:
> >
> > In the cgroup v1, we have a numa_stat interface. This is useful for
> > providing visibility into the numa locality information within an
> > memcg since the pages are allowed to be allocated from any physical
> > node. One of the use cases is evaluating application performance by
> > combining this information with the application's CPU allocation.
> > But the cgroup v2 does not. So this patch adds the missing information.
> >
> > Signed-off-by: Muchun Song <songmuchun-EC8Uxl6Npydl57MIdRCFDg@public.gmane.org>
> > ---
>
> I am actually working on exposing this info on v2 as well.
>
> >  mm/memcontrol.c | 46 ++++++++++++++++++++++++++++++++++++++++++++--
> >  1 file changed, 44 insertions(+), 2 deletions(-)
> >
> > diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> > index 75cd1a1e66c8..c779673f29b2 100644
> > --- a/mm/memcontrol.c
> > +++ b/mm/memcontrol.c
> > @@ -1492,10 +1492,34 @@ static bool mem_cgroup_wait_acct_move(struct mem_cgroup *memcg)
> >         return false;
> >  }
> >
> > +#ifdef CONFIG_NUMA
> > +static unsigned long memcg_node_page_state(struct mem_cgroup *memcg,
> > +                                          unsigned int nid,
> > +                                          enum node_stat_item idx)
> > +{
> > +       long x;
> > +       struct mem_cgroup_per_node *pn;
> > +       struct lruvec *lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(nid));
> > +
> > +       VM_BUG_ON(nid >= nr_node_ids);
> > +
> > +       pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
> > +       x = atomic_long_read(&pn->lruvec_stat[idx]);
> > +#ifdef CONFIG_SMP
> > +       if (x < 0)
> > +               x = 0;
> > +#endif
> > +       return x;
> > +}
> > +#endif
> > +
> >  static char *memory_stat_format(struct mem_cgroup *memcg)
> >  {
> >         struct seq_buf s;
> >         int i;
> > +#ifdef CONFIG_NUMA
> > +       int nid;
> > +#endif
> >
> >         seq_buf_init(&s, kmalloc(PAGE_SIZE, GFP_KERNEL), PAGE_SIZE);
> >         if (!s.buffer)
> > @@ -1512,12 +1536,30 @@ static char *memory_stat_format(struct mem_cgroup *memcg)
> >          * Current memory state:
> >          */
> >
>
> Let's not break the parsers of memory.stat. I would prefer a separate
> interface like v1 i.e. memory.numa_stat.

It is also a good idea to expose a new interface like memory.numa_stat.

>
> > -       seq_buf_printf(&s, "anon %llu\n",
> > +       seq_buf_printf(&s, "anon %llu",
> >                        (u64)memcg_page_state(memcg, NR_ANON_MAPPED) *
> >                        PAGE_SIZE);
> > -       seq_buf_printf(&s, "file %llu\n",
> > +#ifdef CONFIG_NUMA
> > +       for_each_node_state(nid, N_MEMORY)
> > +               seq_buf_printf(&s, " N%d=%llu", nid,
> > +                              (u64)memcg_node_page_state(memcg, nid,
> > +                                                         NR_ANON_MAPPED) *
> > +                              PAGE_SIZE);
> > +#endif
> > +       seq_buf_putc(&s, '\n');
> > +
> > +       seq_buf_printf(&s, "file %llu",
> >                        (u64)memcg_page_state(memcg, NR_FILE_PAGES) *
> >                        PAGE_SIZE);
> > +#ifdef CONFIG_NUMA
> > +       for_each_node_state(nid, N_MEMORY)
> > +               seq_buf_printf(&s, " N%d=%llu", nid,
> > +                              (u64)memcg_node_page_state(memcg, nid,
> > +                                                         NR_FILE_PAGES) *
> > +                              PAGE_SIZE);
> > +#endif
> > +       seq_buf_putc(&s, '\n');
> > +
>
> The v1's numa_stat exposes the LRUs, why NR_ANON_MAPPED and NR_FILE_PAGES?

If we want to expose the anon per node, we need to add inactive anon and
active anon together. Why not use NR_ANON_MAPPED directly?

>
> Also I think exposing slab_[un]reclaimable per node would be beneficial as well.

Yeah, I agree with you. Maybe kernel_stack and percpu also should
be exposed.

>
> >         seq_buf_printf(&s, "kernel_stack %llu\n",
> >                        (u64)memcg_page_state(memcg, NR_KERNEL_STACK_KB) *
> >                        1024);
> > --
> > 2.20.1
> >



-- 
Yours,
Muchun

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [External] Re: [PATCH] mm: memcontrol: Add the missing numa stat of anon and file for cgroup v2
  2020-09-11  3:51     ` Muchun Song
@ 2020-09-11 14:55       ` Shakeel Butt
  -1 siblings, 0 replies; 15+ messages in thread
From: Shakeel Butt @ 2020-09-11 14:55 UTC (permalink / raw)
  To: Muchun Song
  Cc: Johannes Weiner, Michal Hocko, Vladimir Davydov, Andrew Morton,
	Cgroups, Linux MM, LKML

On Thu, Sep 10, 2020 at 8:52 PM Muchun Song <songmuchun@bytedance.com> wrote:
>
> On Fri, Sep 11, 2020 at 12:02 AM Shakeel Butt <shakeelb@google.com> wrote:
> >
> > On Thu, Sep 10, 2020 at 1:46 AM Muchun Song <songmuchun@bytedance.com> wrote:
> > >
> > > In the cgroup v1, we have a numa_stat interface. This is useful for
> > > providing visibility into the numa locality information within an
> > > memcg since the pages are allowed to be allocated from any physical
> > > node. One of the use cases is evaluating application performance by
> > > combining this information with the application's CPU allocation.
> > > But the cgroup v2 does not. So this patch adds the missing information.
> > >
> > > Signed-off-by: Muchun Song <songmuchun@bytedance.com>
> > > ---
> >
> > I am actually working on exposing this info on v2 as well.
> >
> > >  mm/memcontrol.c | 46 ++++++++++++++++++++++++++++++++++++++++++++--
> > >  1 file changed, 44 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> > > index 75cd1a1e66c8..c779673f29b2 100644
> > > --- a/mm/memcontrol.c
> > > +++ b/mm/memcontrol.c
> > > @@ -1492,10 +1492,34 @@ static bool mem_cgroup_wait_acct_move(struct mem_cgroup *memcg)
> > >         return false;
> > >  }
> > >
> > > +#ifdef CONFIG_NUMA
> > > +static unsigned long memcg_node_page_state(struct mem_cgroup *memcg,
> > > +                                          unsigned int nid,
> > > +                                          enum node_stat_item idx)
> > > +{
> > > +       long x;
> > > +       struct mem_cgroup_per_node *pn;
> > > +       struct lruvec *lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(nid));
> > > +
> > > +       VM_BUG_ON(nid >= nr_node_ids);
> > > +
> > > +       pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
> > > +       x = atomic_long_read(&pn->lruvec_stat[idx]);
> > > +#ifdef CONFIG_SMP
> > > +       if (x < 0)
> > > +               x = 0;
> > > +#endif
> > > +       return x;
> > > +}
> > > +#endif
> > > +
> > >  static char *memory_stat_format(struct mem_cgroup *memcg)
> > >  {
> > >         struct seq_buf s;
> > >         int i;
> > > +#ifdef CONFIG_NUMA
> > > +       int nid;
> > > +#endif
> > >
> > >         seq_buf_init(&s, kmalloc(PAGE_SIZE, GFP_KERNEL), PAGE_SIZE);
> > >         if (!s.buffer)
> > > @@ -1512,12 +1536,30 @@ static char *memory_stat_format(struct mem_cgroup *memcg)
> > >          * Current memory state:
> > >          */
> > >
> >
> > Let's not break the parsers of memory.stat. I would prefer a separate
> > interface like v1 i.e. memory.numa_stat.
>
> It is also a good idea to expose a new interface like memory.numa_stat.
>
> >
> > > -       seq_buf_printf(&s, "anon %llu\n",
> > > +       seq_buf_printf(&s, "anon %llu",
> > >                        (u64)memcg_page_state(memcg, NR_ANON_MAPPED) *
> > >                        PAGE_SIZE);
> > > -       seq_buf_printf(&s, "file %llu\n",
> > > +#ifdef CONFIG_NUMA
> > > +       for_each_node_state(nid, N_MEMORY)
> > > +               seq_buf_printf(&s, " N%d=%llu", nid,
> > > +                              (u64)memcg_node_page_state(memcg, nid,
> > > +                                                         NR_ANON_MAPPED) *
> > > +                              PAGE_SIZE);
> > > +#endif
> > > +       seq_buf_putc(&s, '\n');
> > > +
> > > +       seq_buf_printf(&s, "file %llu",
> > >                        (u64)memcg_page_state(memcg, NR_FILE_PAGES) *
> > >                        PAGE_SIZE);
> > > +#ifdef CONFIG_NUMA
> > > +       for_each_node_state(nid, N_MEMORY)
> > > +               seq_buf_printf(&s, " N%d=%llu", nid,
> > > +                              (u64)memcg_node_page_state(memcg, nid,
> > > +                                                         NR_FILE_PAGES) *
> > > +                              PAGE_SIZE);
> > > +#endif
> > > +       seq_buf_putc(&s, '\n');
> > > +
> >
> > The v1's numa_stat exposes the LRUs, why NR_ANON_MAPPED and NR_FILE_PAGES?
>
> If we want to expose the anon per node, we need to add inactive anon and
> active anon together. Why not use NR_ANON_MAPPED directly?
>

Active anon plus inactive anon is not equal to NR_ANON_MAPPED. The
shmem related memory is on anon LRUs but not accounted in
NR_ANON_MAPPED.

Similarly file LRU can contain MADV_FREE pages which are not accounted
in NR_FILE_PAGES.

> >
> > Also I think exposing slab_[un]reclaimable per node would be beneficial as well.
>
> Yeah, I agree with you. Maybe kernel_stack and percpu also should
> be exposed.
>
> >
> > >         seq_buf_printf(&s, "kernel_stack %llu\n",
> > >                        (u64)memcg_page_state(memcg, NR_KERNEL_STACK_KB) *
> > >                        1024);
> > > --
> > > 2.20.1
> > >
>
>
>
> --
> Yours,
> Muchun

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [External] Re: [PATCH] mm: memcontrol: Add the missing numa stat of anon and file for cgroup v2
@ 2020-09-11 14:55       ` Shakeel Butt
  0 siblings, 0 replies; 15+ messages in thread
From: Shakeel Butt @ 2020-09-11 14:55 UTC (permalink / raw)
  To: Muchun Song
  Cc: Johannes Weiner, Michal Hocko, Vladimir Davydov, Andrew Morton,
	Cgroups, Linux MM, LKML

On Thu, Sep 10, 2020 at 8:52 PM Muchun Song <songmuchun@bytedance.com> wrote:
>
> On Fri, Sep 11, 2020 at 12:02 AM Shakeel Butt <shakeelb@google.com> wrote:
> >
> > On Thu, Sep 10, 2020 at 1:46 AM Muchun Song <songmuchun@bytedance.com> wrote:
> > >
> > > In the cgroup v1, we have a numa_stat interface. This is useful for
> > > providing visibility into the numa locality information within an
> > > memcg since the pages are allowed to be allocated from any physical
> > > node. One of the use cases is evaluating application performance by
> > > combining this information with the application's CPU allocation.
> > > But the cgroup v2 does not. So this patch adds the missing information.
> > >
> > > Signed-off-by: Muchun Song <songmuchun@bytedance.com>
> > > ---
> >
> > I am actually working on exposing this info on v2 as well.
> >
> > >  mm/memcontrol.c | 46 ++++++++++++++++++++++++++++++++++++++++++++--
> > >  1 file changed, 44 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> > > index 75cd1a1e66c8..c779673f29b2 100644
> > > --- a/mm/memcontrol.c
> > > +++ b/mm/memcontrol.c
> > > @@ -1492,10 +1492,34 @@ static bool mem_cgroup_wait_acct_move(struct mem_cgroup *memcg)
> > >         return false;
> > >  }
> > >
> > > +#ifdef CONFIG_NUMA
> > > +static unsigned long memcg_node_page_state(struct mem_cgroup *memcg,
> > > +                                          unsigned int nid,
> > > +                                          enum node_stat_item idx)
> > > +{
> > > +       long x;
> > > +       struct mem_cgroup_per_node *pn;
> > > +       struct lruvec *lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(nid));
> > > +
> > > +       VM_BUG_ON(nid >= nr_node_ids);
> > > +
> > > +       pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
> > > +       x = atomic_long_read(&pn->lruvec_stat[idx]);
> > > +#ifdef CONFIG_SMP
> > > +       if (x < 0)
> > > +               x = 0;
> > > +#endif
> > > +       return x;
> > > +}
> > > +#endif
> > > +
> > >  static char *memory_stat_format(struct mem_cgroup *memcg)
> > >  {
> > >         struct seq_buf s;
> > >         int i;
> > > +#ifdef CONFIG_NUMA
> > > +       int nid;
> > > +#endif
> > >
> > >         seq_buf_init(&s, kmalloc(PAGE_SIZE, GFP_KERNEL), PAGE_SIZE);
> > >         if (!s.buffer)
> > > @@ -1512,12 +1536,30 @@ static char *memory_stat_format(struct mem_cgroup *memcg)
> > >          * Current memory state:
> > >          */
> > >
> >
> > Let's not break the parsers of memory.stat. I would prefer a separate
> > interface like v1 i.e. memory.numa_stat.
>
> It is also a good idea to expose a new interface like memory.numa_stat.
>
> >
> > > -       seq_buf_printf(&s, "anon %llu\n",
> > > +       seq_buf_printf(&s, "anon %llu",
> > >                        (u64)memcg_page_state(memcg, NR_ANON_MAPPED) *
> > >                        PAGE_SIZE);
> > > -       seq_buf_printf(&s, "file %llu\n",
> > > +#ifdef CONFIG_NUMA
> > > +       for_each_node_state(nid, N_MEMORY)
> > > +               seq_buf_printf(&s, " N%d=%llu", nid,
> > > +                              (u64)memcg_node_page_state(memcg, nid,
> > > +                                                         NR_ANON_MAPPED) *
> > > +                              PAGE_SIZE);
> > > +#endif
> > > +       seq_buf_putc(&s, '\n');
> > > +
> > > +       seq_buf_printf(&s, "file %llu",
> > >                        (u64)memcg_page_state(memcg, NR_FILE_PAGES) *
> > >                        PAGE_SIZE);
> > > +#ifdef CONFIG_NUMA
> > > +       for_each_node_state(nid, N_MEMORY)
> > > +               seq_buf_printf(&s, " N%d=%llu", nid,
> > > +                              (u64)memcg_node_page_state(memcg, nid,
> > > +                                                         NR_FILE_PAGES) *
> > > +                              PAGE_SIZE);
> > > +#endif
> > > +       seq_buf_putc(&s, '\n');
> > > +
> >
> > The v1's numa_stat exposes the LRUs, why NR_ANON_MAPPED and NR_FILE_PAGES?
>
> If we want to expose the anon per node, we need to add inactive anon and
> active anon together. Why not use NR_ANON_MAPPED directly?
>

Active anon plus inactive anon is not equal to NR_ANON_MAPPED. The
shmem related memory is on anon LRUs but not accounted in
NR_ANON_MAPPED.

Similarly file LRU can contain MADV_FREE pages which are not accounted
in NR_FILE_PAGES.

> >
> > Also I think exposing slab_[un]reclaimable per node would be beneficial as well.
>
> Yeah, I agree with you. Maybe kernel_stack and percpu also should
> be exposed.
>
> >
> > >         seq_buf_printf(&s, "kernel_stack %llu\n",
> > >                        (u64)memcg_page_state(memcg, NR_KERNEL_STACK_KB) *
> > >                        1024);
> > > --
> > > 2.20.1
> > >
>
>
>
> --
> Yours,
> Muchun

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [External] Re: [PATCH] mm: memcontrol: Add the missing numa stat of anon and file for cgroup v2
  2020-09-11 14:55       ` Shakeel Butt
@ 2020-09-11 15:47         ` Muchun Song
  -1 siblings, 0 replies; 15+ messages in thread
From: Muchun Song @ 2020-09-11 15:47 UTC (permalink / raw)
  To: Shakeel Butt
  Cc: Johannes Weiner, Michal Hocko, Vladimir Davydov, Andrew Morton,
	Cgroups, Linux MM, LKML

On Fri, Sep 11, 2020 at 10:55 PM Shakeel Butt <shakeelb@google.com> wrote:
>
> On Thu, Sep 10, 2020 at 8:52 PM Muchun Song <songmuchun@bytedance.com> wrote:
> >
> > On Fri, Sep 11, 2020 at 12:02 AM Shakeel Butt <shakeelb@google.com> wrote:
> > >
> > > On Thu, Sep 10, 2020 at 1:46 AM Muchun Song <songmuchun@bytedance.com> wrote:
> > > >
> > > > In the cgroup v1, we have a numa_stat interface. This is useful for
> > > > providing visibility into the numa locality information within an
> > > > memcg since the pages are allowed to be allocated from any physical
> > > > node. One of the use cases is evaluating application performance by
> > > > combining this information with the application's CPU allocation.
> > > > But the cgroup v2 does not. So this patch adds the missing information.
> > > >
> > > > Signed-off-by: Muchun Song <songmuchun@bytedance.com>
> > > > ---
> > >
> > > I am actually working on exposing this info on v2 as well.
> > >
> > > >  mm/memcontrol.c | 46 ++++++++++++++++++++++++++++++++++++++++++++--
> > > >  1 file changed, 44 insertions(+), 2 deletions(-)
> > > >
> > > > diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> > > > index 75cd1a1e66c8..c779673f29b2 100644
> > > > --- a/mm/memcontrol.c
> > > > +++ b/mm/memcontrol.c
> > > > @@ -1492,10 +1492,34 @@ static bool mem_cgroup_wait_acct_move(struct mem_cgroup *memcg)
> > > >         return false;
> > > >  }
> > > >
> > > > +#ifdef CONFIG_NUMA
> > > > +static unsigned long memcg_node_page_state(struct mem_cgroup *memcg,
> > > > +                                          unsigned int nid,
> > > > +                                          enum node_stat_item idx)
> > > > +{
> > > > +       long x;
> > > > +       struct mem_cgroup_per_node *pn;
> > > > +       struct lruvec *lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(nid));
> > > > +
> > > > +       VM_BUG_ON(nid >= nr_node_ids);
> > > > +
> > > > +       pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
> > > > +       x = atomic_long_read(&pn->lruvec_stat[idx]);
> > > > +#ifdef CONFIG_SMP
> > > > +       if (x < 0)
> > > > +               x = 0;
> > > > +#endif
> > > > +       return x;
> > > > +}
> > > > +#endif
> > > > +
> > > >  static char *memory_stat_format(struct mem_cgroup *memcg)
> > > >  {
> > > >         struct seq_buf s;
> > > >         int i;
> > > > +#ifdef CONFIG_NUMA
> > > > +       int nid;
> > > > +#endif
> > > >
> > > >         seq_buf_init(&s, kmalloc(PAGE_SIZE, GFP_KERNEL), PAGE_SIZE);
> > > >         if (!s.buffer)
> > > > @@ -1512,12 +1536,30 @@ static char *memory_stat_format(struct mem_cgroup *memcg)
> > > >          * Current memory state:
> > > >          */
> > > >
> > >
> > > Let's not break the parsers of memory.stat. I would prefer a separate
> > > interface like v1 i.e. memory.numa_stat.
> >
> > It is also a good idea to expose a new interface like memory.numa_stat.
> >
> > >
> > > > -       seq_buf_printf(&s, "anon %llu\n",
> > > > +       seq_buf_printf(&s, "anon %llu",
> > > >                        (u64)memcg_page_state(memcg, NR_ANON_MAPPED) *
> > > >                        PAGE_SIZE);
> > > > -       seq_buf_printf(&s, "file %llu\n",
> > > > +#ifdef CONFIG_NUMA
> > > > +       for_each_node_state(nid, N_MEMORY)
> > > > +               seq_buf_printf(&s, " N%d=%llu", nid,
> > > > +                              (u64)memcg_node_page_state(memcg, nid,
> > > > +                                                         NR_ANON_MAPPED) *
> > > > +                              PAGE_SIZE);
> > > > +#endif
> > > > +       seq_buf_putc(&s, '\n');
> > > > +
> > > > +       seq_buf_printf(&s, "file %llu",
> > > >                        (u64)memcg_page_state(memcg, NR_FILE_PAGES) *
> > > >                        PAGE_SIZE);
> > > > +#ifdef CONFIG_NUMA
> > > > +       for_each_node_state(nid, N_MEMORY)
> > > > +               seq_buf_printf(&s, " N%d=%llu", nid,
> > > > +                              (u64)memcg_node_page_state(memcg, nid,
> > > > +                                                         NR_FILE_PAGES) *
> > > > +                              PAGE_SIZE);
> > > > +#endif
> > > > +       seq_buf_putc(&s, '\n');
> > > > +
> > >
> > > The v1's numa_stat exposes the LRUs, why NR_ANON_MAPPED and NR_FILE_PAGES?
> >
> > If we want to expose the anon per node, we need to add inactive anon and
> > active anon together. Why not use NR_ANON_MAPPED directly?
> >
>
> Active anon plus inactive anon is not equal to NR_ANON_MAPPED. The
> shmem related memory is on anon LRUs but not accounted in
> NR_ANON_MAPPED.
>
> Similarly file LRU can contain MADV_FREE pages which are not accounted
> in NR_FILE_PAGES.

I got it, thanks. Because the "state" interface exposes the anon and
file information. So I think that we also should expose the anon and
file for "numa_stat" per node instead of the lru statistics. Maybe it is
better that we expose both of all the information.

>
> > >
> > > Also I think exposing slab_[un]reclaimable per node would be beneficial as well.
> >
> > Yeah, I agree with you. Maybe kernel_stack and percpu also should
> > be exposed.
> >
> > >
> > > >         seq_buf_printf(&s, "kernel_stack %llu\n",
> > > >                        (u64)memcg_page_state(memcg, NR_KERNEL_STACK_KB) *
> > > >                        1024);
> > > > --
> > > > 2.20.1
> > > >
> >
> >
> >
> > --
> > Yours,
> > Muchun



-- 
Yours,
Muchun

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [External] Re: [PATCH] mm: memcontrol: Add the missing numa stat of anon and file for cgroup v2
@ 2020-09-11 15:47         ` Muchun Song
  0 siblings, 0 replies; 15+ messages in thread
From: Muchun Song @ 2020-09-11 15:47 UTC (permalink / raw)
  To: Shakeel Butt
  Cc: Johannes Weiner, Michal Hocko, Vladimir Davydov, Andrew Morton,
	Cgroups, Linux MM, LKML

On Fri, Sep 11, 2020 at 10:55 PM Shakeel Butt <shakeelb@google.com> wrote:
>
> On Thu, Sep 10, 2020 at 8:52 PM Muchun Song <songmuchun@bytedance.com> wrote:
> >
> > On Fri, Sep 11, 2020 at 12:02 AM Shakeel Butt <shakeelb@google.com> wrote:
> > >
> > > On Thu, Sep 10, 2020 at 1:46 AM Muchun Song <songmuchun@bytedance.com> wrote:
> > > >
> > > > In the cgroup v1, we have a numa_stat interface. This is useful for
> > > > providing visibility into the numa locality information within an
> > > > memcg since the pages are allowed to be allocated from any physical
> > > > node. One of the use cases is evaluating application performance by
> > > > combining this information with the application's CPU allocation.
> > > > But the cgroup v2 does not. So this patch adds the missing information.
> > > >
> > > > Signed-off-by: Muchun Song <songmuchun@bytedance.com>
> > > > ---
> > >
> > > I am actually working on exposing this info on v2 as well.
> > >
> > > >  mm/memcontrol.c | 46 ++++++++++++++++++++++++++++++++++++++++++++--
> > > >  1 file changed, 44 insertions(+), 2 deletions(-)
> > > >
> > > > diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> > > > index 75cd1a1e66c8..c779673f29b2 100644
> > > > --- a/mm/memcontrol.c
> > > > +++ b/mm/memcontrol.c
> > > > @@ -1492,10 +1492,34 @@ static bool mem_cgroup_wait_acct_move(struct mem_cgroup *memcg)
> > > >         return false;
> > > >  }
> > > >
> > > > +#ifdef CONFIG_NUMA
> > > > +static unsigned long memcg_node_page_state(struct mem_cgroup *memcg,
> > > > +                                          unsigned int nid,
> > > > +                                          enum node_stat_item idx)
> > > > +{
> > > > +       long x;
> > > > +       struct mem_cgroup_per_node *pn;
> > > > +       struct lruvec *lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(nid));
> > > > +
> > > > +       VM_BUG_ON(nid >= nr_node_ids);
> > > > +
> > > > +       pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
> > > > +       x = atomic_long_read(&pn->lruvec_stat[idx]);
> > > > +#ifdef CONFIG_SMP
> > > > +       if (x < 0)
> > > > +               x = 0;
> > > > +#endif
> > > > +       return x;
> > > > +}
> > > > +#endif
> > > > +
> > > >  static char *memory_stat_format(struct mem_cgroup *memcg)
> > > >  {
> > > >         struct seq_buf s;
> > > >         int i;
> > > > +#ifdef CONFIG_NUMA
> > > > +       int nid;
> > > > +#endif
> > > >
> > > >         seq_buf_init(&s, kmalloc(PAGE_SIZE, GFP_KERNEL), PAGE_SIZE);
> > > >         if (!s.buffer)
> > > > @@ -1512,12 +1536,30 @@ static char *memory_stat_format(struct mem_cgroup *memcg)
> > > >          * Current memory state:
> > > >          */
> > > >
> > >
> > > Let's not break the parsers of memory.stat. I would prefer a separate
> > > interface like v1 i.e. memory.numa_stat.
> >
> > It is also a good idea to expose a new interface like memory.numa_stat.
> >
> > >
> > > > -       seq_buf_printf(&s, "anon %llu\n",
> > > > +       seq_buf_printf(&s, "anon %llu",
> > > >                        (u64)memcg_page_state(memcg, NR_ANON_MAPPED) *
> > > >                        PAGE_SIZE);
> > > > -       seq_buf_printf(&s, "file %llu\n",
> > > > +#ifdef CONFIG_NUMA
> > > > +       for_each_node_state(nid, N_MEMORY)
> > > > +               seq_buf_printf(&s, " N%d=%llu", nid,
> > > > +                              (u64)memcg_node_page_state(memcg, nid,
> > > > +                                                         NR_ANON_MAPPED) *
> > > > +                              PAGE_SIZE);
> > > > +#endif
> > > > +       seq_buf_putc(&s, '\n');
> > > > +
> > > > +       seq_buf_printf(&s, "file %llu",
> > > >                        (u64)memcg_page_state(memcg, NR_FILE_PAGES) *
> > > >                        PAGE_SIZE);
> > > > +#ifdef CONFIG_NUMA
> > > > +       for_each_node_state(nid, N_MEMORY)
> > > > +               seq_buf_printf(&s, " N%d=%llu", nid,
> > > > +                              (u64)memcg_node_page_state(memcg, nid,
> > > > +                                                         NR_FILE_PAGES) *
> > > > +                              PAGE_SIZE);
> > > > +#endif
> > > > +       seq_buf_putc(&s, '\n');
> > > > +
> > >
> > > The v1's numa_stat exposes the LRUs, why NR_ANON_MAPPED and NR_FILE_PAGES?
> >
> > If we want to expose the anon per node, we need to add inactive anon and
> > active anon together. Why not use NR_ANON_MAPPED directly?
> >
>
> Active anon plus inactive anon is not equal to NR_ANON_MAPPED. The
> shmem related memory is on anon LRUs but not accounted in
> NR_ANON_MAPPED.
>
> Similarly file LRU can contain MADV_FREE pages which are not accounted
> in NR_FILE_PAGES.

I got it, thanks. Because the "state" interface exposes the anon and
file information. So I think that we also should expose the anon and
file for "numa_stat" per node instead of the lru statistics. Maybe it is
better that we expose both of all the information.

>
> > >
> > > Also I think exposing slab_[un]reclaimable per node would be beneficial as well.
> >
> > Yeah, I agree with you. Maybe kernel_stack and percpu also should
> > be exposed.
> >
> > >
> > > >         seq_buf_printf(&s, "kernel_stack %llu\n",
> > > >                        (u64)memcg_page_state(memcg, NR_KERNEL_STACK_KB) *
> > > >                        1024);
> > > > --
> > > > 2.20.1
> > > >
> >
> >
> >
> > --
> > Yours,
> > Muchun



-- 
Yours,
Muchun

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [External] Re: [PATCH] mm: memcontrol: Add the missing numa stat of anon and file for cgroup v2
  2020-09-11 15:47         ` Muchun Song
  (?)
@ 2020-09-11 15:55           ` Shakeel Butt
  -1 siblings, 0 replies; 15+ messages in thread
From: Shakeel Butt @ 2020-09-11 15:55 UTC (permalink / raw)
  To: Muchun Song
  Cc: Johannes Weiner, Michal Hocko, Vladimir Davydov, Andrew Morton,
	Cgroups, Linux MM, LKML

On Fri, Sep 11, 2020 at 8:48 AM Muchun Song <songmuchun@bytedance.com> wrote:
>
[snip]
>
> I got it, thanks. Because the "state" interface exposes the anon and
> file information. So I think that we also should expose the anon and
> file for "numa_stat" per node instead of the lru statistics. Maybe it is
> better that we expose both of all the information.
>

Sure, go ahead and please do update the doc file as well in the next version.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [External] Re: [PATCH] mm: memcontrol: Add the missing numa stat of anon and file for cgroup v2
@ 2020-09-11 15:55           ` Shakeel Butt
  0 siblings, 0 replies; 15+ messages in thread
From: Shakeel Butt @ 2020-09-11 15:55 UTC (permalink / raw)
  To: Muchun Song
  Cc: Johannes Weiner, Michal Hocko, Vladimir Davydov, Andrew Morton,
	Cgroups, Linux MM, LKML

On Fri, Sep 11, 2020 at 8:48 AM Muchun Song <songmuchun@bytedance.com> wrote:
>
[snip]
>
> I got it, thanks. Because the "state" interface exposes the anon and
> file information. So I think that we also should expose the anon and
> file for "numa_stat" per node instead of the lru statistics. Maybe it is
> better that we expose both of all the information.
>

Sure, go ahead and please do update the doc file as well in the next version.


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [External] Re: [PATCH] mm: memcontrol: Add the missing numa stat of anon and file for cgroup v2
@ 2020-09-11 15:55           ` Shakeel Butt
  0 siblings, 0 replies; 15+ messages in thread
From: Shakeel Butt @ 2020-09-11 15:55 UTC (permalink / raw)
  To: Muchun Song
  Cc: Johannes Weiner, Michal Hocko, Vladimir Davydov, Andrew Morton,
	Cgroups, Linux MM, LKML

On Fri, Sep 11, 2020 at 8:48 AM Muchun Song <songmuchun-EC8Uxl6Npydl57MIdRCFDg@public.gmane.org> wrote:
>
[snip]
>
> I got it, thanks. Because the "state" interface exposes the anon and
> file information. So I think that we also should expose the anon and
> file for "numa_stat" per node instead of the lru statistics. Maybe it is
> better that we expose both of all the information.
>

Sure, go ahead and please do update the doc file as well in the next version.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [External] Re: [PATCH] mm: memcontrol: Add the missing numa stat of anon and file for cgroup v2
@ 2020-09-11 21:51       ` Roman Gushchin
  0 siblings, 0 replies; 15+ messages in thread
From: Roman Gushchin @ 2020-09-11 21:51 UTC (permalink / raw)
  To: Muchun Song
  Cc: Shakeel Butt, Johannes Weiner, Michal Hocko, Vladimir Davydov,
	Andrew Morton, Cgroups, Linux MM, LKML

On Fri, Sep 11, 2020 at 11:51:42AM +0800, Muchun Song wrote:
> On Fri, Sep 11, 2020 at 12:02 AM Shakeel Butt <shakeelb@google.com> wrote:
> >
> > On Thu, Sep 10, 2020 at 1:46 AM Muchun Song <songmuchun@bytedance.com> wrote:
> > >
> > > In the cgroup v1, we have a numa_stat interface. This is useful for
> > > providing visibility into the numa locality information within an
> > > memcg since the pages are allowed to be allocated from any physical
> > > node. One of the use cases is evaluating application performance by
> > > combining this information with the application's CPU allocation.
> > > But the cgroup v2 does not. So this patch adds the missing information.
> > >
> > > Signed-off-by: Muchun Song <songmuchun@bytedance.com>
> > > ---
> >
> > I am actually working on exposing this info on v2 as well.
> >
> > >  mm/memcontrol.c | 46 ++++++++++++++++++++++++++++++++++++++++++++--
> > >  1 file changed, 44 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> > > index 75cd1a1e66c8..c779673f29b2 100644
> > > --- a/mm/memcontrol.c
> > > +++ b/mm/memcontrol.c
> > > @@ -1492,10 +1492,34 @@ static bool mem_cgroup_wait_acct_move(struct mem_cgroup *memcg)
> > >         return false;
> > >  }
> > >
> > > +#ifdef CONFIG_NUMA
> > > +static unsigned long memcg_node_page_state(struct mem_cgroup *memcg,
> > > +                                          unsigned int nid,
> > > +                                          enum node_stat_item idx)
> > > +{
> > > +       long x;
> > > +       struct mem_cgroup_per_node *pn;
> > > +       struct lruvec *lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(nid));
> > > +
> > > +       VM_BUG_ON(nid >= nr_node_ids);
> > > +
> > > +       pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
> > > +       x = atomic_long_read(&pn->lruvec_stat[idx]);
> > > +#ifdef CONFIG_SMP
> > > +       if (x < 0)
> > > +               x = 0;
> > > +#endif
> > > +       return x;
> > > +}
> > > +#endif
> > > +
> > >  static char *memory_stat_format(struct mem_cgroup *memcg)
> > >  {
> > >         struct seq_buf s;
> > >         int i;
> > > +#ifdef CONFIG_NUMA
> > > +       int nid;
> > > +#endif
> > >
> > >         seq_buf_init(&s, kmalloc(PAGE_SIZE, GFP_KERNEL), PAGE_SIZE);
> > >         if (!s.buffer)
> > > @@ -1512,12 +1536,30 @@ static char *memory_stat_format(struct mem_cgroup *memcg)
> > >          * Current memory state:
> > >          */
> > >
> >
> > Let's not break the parsers of memory.stat. I would prefer a separate
> > interface like v1 i.e. memory.numa_stat.
> 
> It is also a good idea to expose a new interface like memory.numa_stat.
> 
> >
> > > -       seq_buf_printf(&s, "anon %llu\n",
> > > +       seq_buf_printf(&s, "anon %llu",
> > >                        (u64)memcg_page_state(memcg, NR_ANON_MAPPED) *
> > >                        PAGE_SIZE);
> > > -       seq_buf_printf(&s, "file %llu\n",
> > > +#ifdef CONFIG_NUMA
> > > +       for_each_node_state(nid, N_MEMORY)
> > > +               seq_buf_printf(&s, " N%d=%llu", nid,
> > > +                              (u64)memcg_node_page_state(memcg, nid,
> > > +                                                         NR_ANON_MAPPED) *
> > > +                              PAGE_SIZE);
> > > +#endif
> > > +       seq_buf_putc(&s, '\n');
> > > +
> > > +       seq_buf_printf(&s, "file %llu",
> > >                        (u64)memcg_page_state(memcg, NR_FILE_PAGES) *
> > >                        PAGE_SIZE);
> > > +#ifdef CONFIG_NUMA
> > > +       for_each_node_state(nid, N_MEMORY)
> > > +               seq_buf_printf(&s, " N%d=%llu", nid,
> > > +                              (u64)memcg_node_page_state(memcg, nid,
> > > +                                                         NR_FILE_PAGES) *
> > > +                              PAGE_SIZE);
> > > +#endif
> > > +       seq_buf_putc(&s, '\n');
> > > +
> >
> > The v1's numa_stat exposes the LRUs, why NR_ANON_MAPPED and NR_FILE_PAGES?
> 
> If we want to expose the anon per node, we need to add inactive anon and
> active anon together. Why not use NR_ANON_MAPPED directly?
> 
> >
> > Also I think exposing slab_[un]reclaimable per node would be beneficial as well.
> 
> Yeah, I agree with you. Maybe kernel_stack and percpu also should
> be exposed.

Percpu allocations are usually spread over multiple pages and numa nodes,
so there are no per-node pepcpu counters.

Thanks!

> 
> >
> > >         seq_buf_printf(&s, "kernel_stack %llu\n",
> > >                        (u64)memcg_page_state(memcg, NR_KERNEL_STACK_KB) *
> > >                        1024);
> > > --
> > > 2.20.1
> > >
> 
> 
> 
> -- 
> Yours,
> Muchun

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [External] Re: [PATCH] mm: memcontrol: Add the missing numa stat of anon and file for cgroup v2
@ 2020-09-11 21:51       ` Roman Gushchin
  0 siblings, 0 replies; 15+ messages in thread
From: Roman Gushchin @ 2020-09-11 21:51 UTC (permalink / raw)
  To: Muchun Song
  Cc: Shakeel Butt, Johannes Weiner, Michal Hocko, Vladimir Davydov,
	Andrew Morton, Cgroups, Linux MM, LKML

On Fri, Sep 11, 2020 at 11:51:42AM +0800, Muchun Song wrote:
> On Fri, Sep 11, 2020 at 12:02 AM Shakeel Butt <shakeelb-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org> wrote:
> >
> > On Thu, Sep 10, 2020 at 1:46 AM Muchun Song <songmuchun-EC8Uxl6Npydl57MIdRCFDg@public.gmane.org> wrote:
> > >
> > > In the cgroup v1, we have a numa_stat interface. This is useful for
> > > providing visibility into the numa locality information within an
> > > memcg since the pages are allowed to be allocated from any physical
> > > node. One of the use cases is evaluating application performance by
> > > combining this information with the application's CPU allocation.
> > > But the cgroup v2 does not. So this patch adds the missing information.
> > >
> > > Signed-off-by: Muchun Song <songmuchun-EC8Uxl6Npydl57MIdRCFDg@public.gmane.org>
> > > ---
> >
> > I am actually working on exposing this info on v2 as well.
> >
> > >  mm/memcontrol.c | 46 ++++++++++++++++++++++++++++++++++++++++++++--
> > >  1 file changed, 44 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> > > index 75cd1a1e66c8..c779673f29b2 100644
> > > --- a/mm/memcontrol.c
> > > +++ b/mm/memcontrol.c
> > > @@ -1492,10 +1492,34 @@ static bool mem_cgroup_wait_acct_move(struct mem_cgroup *memcg)
> > >         return false;
> > >  }
> > >
> > > +#ifdef CONFIG_NUMA
> > > +static unsigned long memcg_node_page_state(struct mem_cgroup *memcg,
> > > +                                          unsigned int nid,
> > > +                                          enum node_stat_item idx)
> > > +{
> > > +       long x;
> > > +       struct mem_cgroup_per_node *pn;
> > > +       struct lruvec *lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(nid));
> > > +
> > > +       VM_BUG_ON(nid >= nr_node_ids);
> > > +
> > > +       pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
> > > +       x = atomic_long_read(&pn->lruvec_stat[idx]);
> > > +#ifdef CONFIG_SMP
> > > +       if (x < 0)
> > > +               x = 0;
> > > +#endif
> > > +       return x;
> > > +}
> > > +#endif
> > > +
> > >  static char *memory_stat_format(struct mem_cgroup *memcg)
> > >  {
> > >         struct seq_buf s;
> > >         int i;
> > > +#ifdef CONFIG_NUMA
> > > +       int nid;
> > > +#endif
> > >
> > >         seq_buf_init(&s, kmalloc(PAGE_SIZE, GFP_KERNEL), PAGE_SIZE);
> > >         if (!s.buffer)
> > > @@ -1512,12 +1536,30 @@ static char *memory_stat_format(struct mem_cgroup *memcg)
> > >          * Current memory state:
> > >          */
> > >
> >
> > Let's not break the parsers of memory.stat. I would prefer a separate
> > interface like v1 i.e. memory.numa_stat.
> 
> It is also a good idea to expose a new interface like memory.numa_stat.
> 
> >
> > > -       seq_buf_printf(&s, "anon %llu\n",
> > > +       seq_buf_printf(&s, "anon %llu",
> > >                        (u64)memcg_page_state(memcg, NR_ANON_MAPPED) *
> > >                        PAGE_SIZE);
> > > -       seq_buf_printf(&s, "file %llu\n",
> > > +#ifdef CONFIG_NUMA
> > > +       for_each_node_state(nid, N_MEMORY)
> > > +               seq_buf_printf(&s, " N%d=%llu", nid,
> > > +                              (u64)memcg_node_page_state(memcg, nid,
> > > +                                                         NR_ANON_MAPPED) *
> > > +                              PAGE_SIZE);
> > > +#endif
> > > +       seq_buf_putc(&s, '\n');
> > > +
> > > +       seq_buf_printf(&s, "file %llu",
> > >                        (u64)memcg_page_state(memcg, NR_FILE_PAGES) *
> > >                        PAGE_SIZE);
> > > +#ifdef CONFIG_NUMA
> > > +       for_each_node_state(nid, N_MEMORY)
> > > +               seq_buf_printf(&s, " N%d=%llu", nid,
> > > +                              (u64)memcg_node_page_state(memcg, nid,
> > > +                                                         NR_FILE_PAGES) *
> > > +                              PAGE_SIZE);
> > > +#endif
> > > +       seq_buf_putc(&s, '\n');
> > > +
> >
> > The v1's numa_stat exposes the LRUs, why NR_ANON_MAPPED and NR_FILE_PAGES?
> 
> If we want to expose the anon per node, we need to add inactive anon and
> active anon together. Why not use NR_ANON_MAPPED directly?
> 
> >
> > Also I think exposing slab_[un]reclaimable per node would be beneficial as well.
> 
> Yeah, I agree with you. Maybe kernel_stack and percpu also should
> be exposed.

Percpu allocations are usually spread over multiple pages and numa nodes,
so there are no per-node pepcpu counters.

Thanks!

> 
> >
> > >         seq_buf_printf(&s, "kernel_stack %llu\n",
> > >                        (u64)memcg_page_state(memcg, NR_KERNEL_STACK_KB) *
> > >                        1024);
> > > --
> > > 2.20.1
> > >
> 
> 
> 
> -- 
> Yours,
> Muchun

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2020-09-11 21:52 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-09-10  8:42 [PATCH] mm: memcontrol: Add the missing numa stat of anon and file for cgroup v2 Muchun Song
2020-09-10 16:01 ` Shakeel Butt
2020-09-10 16:01   ` Shakeel Butt
2020-09-11  3:51   ` [External] " Muchun Song
2020-09-11  3:51     ` Muchun Song
2020-09-11  3:51     ` Muchun Song
2020-09-11 14:55     ` Shakeel Butt
2020-09-11 14:55       ` Shakeel Butt
2020-09-11 15:47       ` Muchun Song
2020-09-11 15:47         ` Muchun Song
2020-09-11 15:55         ` Shakeel Butt
2020-09-11 15:55           ` Shakeel Butt
2020-09-11 15:55           ` Shakeel Butt
2020-09-11 21:51     ` Roman Gushchin
2020-09-11 21:51       ` Roman Gushchin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.