linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] mm/vmscan: expose cgroup_ino for memcg reclaim tracepoints
@ 2019-04-09 10:11 Yafang Shao
  2019-05-10 19:24 ` Dmitry Dolgov
  2019-07-12  1:10 ` Andrew Morton
  0 siblings, 2 replies; 6+ messages in thread
From: Yafang Shao @ 2019-04-09 10:11 UTC (permalink / raw)
  To: mhocko, akpm; +Cc: linux-mm, shaoyafang, Yafang Shao

We can use the exposed cgroup_ino to trace specified cgroup.

For example,
step 1, get the inode of the specified cgroup
	$ ls -di /tmp/cgroupv2/foo
step 2, set this inode into tracepoint filter to trace this cgroup only
	(assume the inode is 11)
	$ cd /sys/kernel/debug/tracing/events/vmscan/
	$ echo 'cgroup_ino == 11' > mm_vmscan_memcg_reclaim_begin/filter
	$ echo 'cgroup_ino == 11' > mm_vmscan_memcg_reclaim_end/filter

Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
---
 include/trace/events/vmscan.h | 71 +++++++++++++++++++++++++++++++++++--------
 mm/vmscan.c                   | 18 ++++++++---
 2 files changed, 72 insertions(+), 17 deletions(-)

diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
index c27a563..3be0023 100644
--- a/include/trace/events/vmscan.h
+++ b/include/trace/events/vmscan.h
@@ -133,18 +133,43 @@
 );
 
 #ifdef CONFIG_MEMCG
-DEFINE_EVENT(mm_vmscan_direct_reclaim_begin_template, mm_vmscan_memcg_reclaim_begin,
+DECLARE_EVENT_CLASS(mm_vmscan_memcg_reclaim_begin_template,
 
-	TP_PROTO(int order, gfp_t gfp_flags),
+	TP_PROTO(unsigned int cgroup_ino, int order, gfp_t gfp_flags),
 
-	TP_ARGS(order, gfp_flags)
+	TP_ARGS(cgroup_ino, order, gfp_flags),
+
+	TP_STRUCT__entry(
+		__field(unsigned int, cgroup_ino)
+		__field(int, order)
+		__field(gfp_t, gfp_flags)
+	),
+
+	TP_fast_assign(
+		__entry->cgroup_ino	= cgroup_ino;
+		__entry->order		= order;
+		__entry->gfp_flags	= gfp_flags;
+	),
+
+	TP_printk("cgroup_ino=%u order=%d gfp_flags=%s",
+		__entry->cgroup_ino, __entry->order,
+		show_gfp_flags(__entry->gfp_flags))
 );
 
-DEFINE_EVENT(mm_vmscan_direct_reclaim_begin_template, mm_vmscan_memcg_softlimit_reclaim_begin,
+DEFINE_EVENT(mm_vmscan_memcg_reclaim_begin_template,
+	mm_vmscan_memcg_reclaim_begin,
 
-	TP_PROTO(int order, gfp_t gfp_flags),
+	TP_PROTO(unsigned int cgroup_ino, int order, gfp_t gfp_flags),
 
-	TP_ARGS(order, gfp_flags)
+	TP_ARGS(cgroup_ino, order, gfp_flags)
+);
+
+DEFINE_EVENT(mm_vmscan_memcg_reclaim_begin_template,
+	mm_vmscan_memcg_softlimit_reclaim_begin,
+
+	TP_PROTO(unsigned int cgroup_ino, int order, gfp_t gfp_flags),
+
+	TP_ARGS(cgroup_ino, order, gfp_flags)
 );
 #endif /* CONFIG_MEMCG */
 
@@ -173,18 +198,40 @@
 );
 
 #ifdef CONFIG_MEMCG
-DEFINE_EVENT(mm_vmscan_direct_reclaim_end_template, mm_vmscan_memcg_reclaim_end,
+DECLARE_EVENT_CLASS(mm_vmscan_memcg_reclaim_end_template,
 
-	TP_PROTO(unsigned long nr_reclaimed),
+	TP_PROTO(unsigned int cgroup_ino, unsigned long nr_reclaimed),
 
-	TP_ARGS(nr_reclaimed)
+	TP_ARGS(cgroup_ino, nr_reclaimed),
+
+	TP_STRUCT__entry(
+		__field(unsigned int, cgroup_ino)
+		__field(unsigned long, nr_reclaimed)
+	),
+
+	TP_fast_assign(
+		__entry->cgroup_ino	= cgroup_ino;
+		__entry->nr_reclaimed	= nr_reclaimed;
+	),
+
+	TP_printk("cgroup_ino=%u nr_reclaimed=%lu",
+		__entry->cgroup_ino, __entry->nr_reclaimed)
 );
 
-DEFINE_EVENT(mm_vmscan_direct_reclaim_end_template, mm_vmscan_memcg_softlimit_reclaim_end,
+DEFINE_EVENT(mm_vmscan_memcg_reclaim_end_template,
+	mm_vmscan_memcg_reclaim_end,
 
-	TP_PROTO(unsigned long nr_reclaimed),
+	TP_PROTO(unsigned int cgroup_ino, unsigned long nr_reclaimed),
 
-	TP_ARGS(nr_reclaimed)
+	TP_ARGS(cgroup_ino, nr_reclaimed)
+);
+
+DEFINE_EVENT(mm_vmscan_memcg_reclaim_end_template,
+	mm_vmscan_memcg_softlimit_reclaim_end,
+
+	TP_PROTO(unsigned int cgroup_ino, unsigned long nr_reclaimed),
+
+	TP_ARGS(cgroup_ino, nr_reclaimed)
 );
 #endif /* CONFIG_MEMCG */
 
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 347c9b3..15a9eb9 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -3268,8 +3268,10 @@ unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg,
 	sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
 			(GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
 
-	trace_mm_vmscan_memcg_softlimit_reclaim_begin(sc.order,
-						      sc.gfp_mask);
+	trace_mm_vmscan_memcg_softlimit_reclaim_begin(
+				cgroup_ino(memcg->css.cgroup),
+				sc.order,
+				sc.gfp_mask);
 
 	/*
 	 * NOTE: Although we can get the priority field, using it
@@ -3280,7 +3282,9 @@ unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg,
 	 */
 	shrink_node_memcg(pgdat, memcg, &sc);
 
-	trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed);
+	trace_mm_vmscan_memcg_softlimit_reclaim_end(
+				cgroup_ino(memcg->css.cgroup),
+				sc.nr_reclaimed);
 
 	*nr_scanned = sc.nr_scanned;
 	return sc.nr_reclaimed;
@@ -3318,7 +3322,9 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
 
 	zonelist = &NODE_DATA(nid)->node_zonelists[ZONELIST_FALLBACK];
 
-	trace_mm_vmscan_memcg_reclaim_begin(0, sc.gfp_mask);
+	trace_mm_vmscan_memcg_reclaim_begin(
+				cgroup_ino(memcg->css.cgroup),
+				0, sc.gfp_mask);
 
 	psi_memstall_enter(&pflags);
 	noreclaim_flag = memalloc_noreclaim_save();
@@ -3328,7 +3334,9 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
 	memalloc_noreclaim_restore(noreclaim_flag);
 	psi_memstall_leave(&pflags);
 
-	trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed);
+	trace_mm_vmscan_memcg_reclaim_end(
+				cgroup_ino(memcg->css.cgroup),
+				nr_reclaimed);
 
 	return nr_reclaimed;
 }
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] mm/vmscan: expose cgroup_ino for memcg reclaim tracepoints
  2019-04-09 10:11 [PATCH] mm/vmscan: expose cgroup_ino for memcg reclaim tracepoints Yafang Shao
@ 2019-05-10 19:24 ` Dmitry Dolgov
  2019-05-11  0:51   ` Yafang Shao
  2019-07-12  1:10 ` Andrew Morton
  1 sibling, 1 reply; 6+ messages in thread
From: Dmitry Dolgov @ 2019-05-10 19:24 UTC (permalink / raw)
  To: Yafang Shao; +Cc: mhocko, akpm, linux-mm, shaoyafang

> On Tue, Apr 9, 2019 at 12:12 PM Yafang Shao <laoar.shao@gmail.com> wrote:
>
> We can use the exposed cgroup_ino to trace specified cgroup.

As far as I see, this patch didn't make it through yet, but sounds like a
useful feature. It needs to be rebased, since mm_vmscan_memcg_reclaim_begin /
mm_vmscan_memcg_softlimit_reclaim_begin now have may_writepage and
classzone_idx, but overall looks good. I've checket it out with cgroup2 and
ftrace, works as expected.


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] mm/vmscan: expose cgroup_ino for memcg reclaim tracepoints
  2019-05-10 19:24 ` Dmitry Dolgov
@ 2019-05-11  0:51   ` Yafang Shao
  0 siblings, 0 replies; 6+ messages in thread
From: Yafang Shao @ 2019-05-11  0:51 UTC (permalink / raw)
  To: Dmitry Dolgov; +Cc: Michal Hocko, Andrew Morton, Linux MM, shaoyafang

On Sat, May 11, 2019 at 3:21 AM Dmitry Dolgov <9erthalion6@gmail.com> wrote:
>
> > On Tue, Apr 9, 2019 at 12:12 PM Yafang Shao <laoar.shao@gmail.com> wrote:
> >
> > We can use the exposed cgroup_ino to trace specified cgroup.
>
> As far as I see, this patch didn't make it through yet, but sounds like a
> useful feature. It needs to be rebased, since mm_vmscan_memcg_reclaim_begin /
> mm_vmscan_memcg_softlimit_reclaim_begin now have may_writepage and
> classzone_idx, but overall looks good. I've checket it out with cgroup2 and
> ftrace, works as expected.

Thanks for your feedback!
I will rebase it.

Thanks
Yafang


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] mm/vmscan: expose cgroup_ino for memcg reclaim tracepoints
  2019-04-09 10:11 [PATCH] mm/vmscan: expose cgroup_ino for memcg reclaim tracepoints Yafang Shao
  2019-05-10 19:24 ` Dmitry Dolgov
@ 2019-07-12  1:10 ` Andrew Morton
  2019-07-12 23:58   ` Yang Shi
  1 sibling, 1 reply; 6+ messages in thread
From: Andrew Morton @ 2019-07-12  1:10 UTC (permalink / raw)
  To: Yafang Shao; +Cc: mhocko, linux-mm, shaoyafang, Johannes Weiner


Can we please get some review of this one?  It has been in -mm since
May 22, no issues that I've heard of.


From: Yafang Shao <laoar.shao@gmail.com>
Subject: mm/vmscan: expose cgroup_ino for memcg reclaim tracepoints

We can use the exposed cgroup_ino to trace specified cgroup.

For example,
step 1, get the inode of the specified cgroup
	$ ls -di /tmp/cgroupv2/foo
step 2, set this inode into tracepoint filter to trace this cgroup only
	(assume the inode is 11)
	$ cd /sys/kernel/debug/tracing/events/vmscan/
	$ echo 'cgroup_ino == 11' > mm_vmscan_memcg_reclaim_begin/filter
	$ echo 'cgroup_ino == 11' > mm_vmscan_memcg_reclaim_end/filter

The reason I made this change is to trace a specific container.

Sometimes there're lots of containers on one host.  Some of them are
not important at all, so we don't care whether them are under memory
pressure.  While some of them are important, so we want't to know if
these containers are doing memcg reclaim and how long this relaim
takes.

Without this change, we don't know the memcg reclaim happend in which
container.

Link: http://lkml.kernel.org/r/1557649528-11676-1-git-send-email-laoar.shao@gmail.com
Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: <shaoyafang@didiglobal.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---

 include/trace/events/vmscan.h |   71 ++++++++++++++++++++++++++------
 mm/vmscan.c                   |   18 +++++---
 2 files changed, 72 insertions(+), 17 deletions(-)

--- a/include/trace/events/vmscan.h~mm-vmscan-expose-cgroup_ino-for-memcg-reclaim-tracepoints
+++ a/include/trace/events/vmscan.h
@@ -127,18 +127,43 @@ DEFINE_EVENT(mm_vmscan_direct_reclaim_be
 );
 
 #ifdef CONFIG_MEMCG
-DEFINE_EVENT(mm_vmscan_direct_reclaim_begin_template, mm_vmscan_memcg_reclaim_begin,
+DECLARE_EVENT_CLASS(mm_vmscan_memcg_reclaim_begin_template,
 
-	TP_PROTO(int order, gfp_t gfp_flags),
+	TP_PROTO(unsigned int cgroup_ino, int order, gfp_t gfp_flags),
 
-	TP_ARGS(order, gfp_flags)
+	TP_ARGS(cgroup_ino, order, gfp_flags),
+
+	TP_STRUCT__entry(
+		__field(unsigned int, cgroup_ino)
+		__field(int, order)
+		__field(gfp_t, gfp_flags)
+	),
+
+	TP_fast_assign(
+		__entry->cgroup_ino	= cgroup_ino;
+		__entry->order		= order;
+		__entry->gfp_flags	= gfp_flags;
+	),
+
+	TP_printk("cgroup_ino=%u order=%d gfp_flags=%s",
+		__entry->cgroup_ino, __entry->order,
+		show_gfp_flags(__entry->gfp_flags))
 );
 
-DEFINE_EVENT(mm_vmscan_direct_reclaim_begin_template, mm_vmscan_memcg_softlimit_reclaim_begin,
+DEFINE_EVENT(mm_vmscan_memcg_reclaim_begin_template,
+	     mm_vmscan_memcg_reclaim_begin,
 
-	TP_PROTO(int order, gfp_t gfp_flags),
+	TP_PROTO(unsigned int cgroup_ino, int order, gfp_t gfp_flags),
 
-	TP_ARGS(order, gfp_flags)
+	TP_ARGS(cgroup_ino, order, gfp_flags)
+);
+
+DEFINE_EVENT(mm_vmscan_memcg_reclaim_begin_template,
+	     mm_vmscan_memcg_softlimit_reclaim_begin,
+
+	TP_PROTO(unsigned int cgroup_ino, int order, gfp_t gfp_flags),
+
+	TP_ARGS(cgroup_ino, order, gfp_flags)
 );
 #endif /* CONFIG_MEMCG */
 
@@ -167,18 +192,40 @@ DEFINE_EVENT(mm_vmscan_direct_reclaim_en
 );
 
 #ifdef CONFIG_MEMCG
-DEFINE_EVENT(mm_vmscan_direct_reclaim_end_template, mm_vmscan_memcg_reclaim_end,
+DECLARE_EVENT_CLASS(mm_vmscan_memcg_reclaim_end_template,
 
-	TP_PROTO(unsigned long nr_reclaimed),
+	TP_PROTO(unsigned int cgroup_ino, unsigned long nr_reclaimed),
 
-	TP_ARGS(nr_reclaimed)
+	TP_ARGS(cgroup_ino, nr_reclaimed),
+
+	TP_STRUCT__entry(
+		__field(unsigned int, cgroup_ino)
+		__field(unsigned long, nr_reclaimed)
+	),
+
+	TP_fast_assign(
+		__entry->cgroup_ino	= cgroup_ino;
+		__entry->nr_reclaimed	= nr_reclaimed;
+	),
+
+	TP_printk("cgroup_ino=%u nr_reclaimed=%lu",
+		__entry->cgroup_ino, __entry->nr_reclaimed)
 );
 
-DEFINE_EVENT(mm_vmscan_direct_reclaim_end_template, mm_vmscan_memcg_softlimit_reclaim_end,
+DEFINE_EVENT(mm_vmscan_memcg_reclaim_end_template,
+	     mm_vmscan_memcg_reclaim_end,
 
-	TP_PROTO(unsigned long nr_reclaimed),
+	TP_PROTO(unsigned int cgroup_ino, unsigned long nr_reclaimed),
 
-	TP_ARGS(nr_reclaimed)
+	TP_ARGS(cgroup_ino, nr_reclaimed)
+);
+
+DEFINE_EVENT(mm_vmscan_memcg_reclaim_end_template,
+	     mm_vmscan_memcg_softlimit_reclaim_end,
+
+	TP_PROTO(unsigned int cgroup_ino, unsigned long nr_reclaimed),
+
+	TP_ARGS(cgroup_ino, nr_reclaimed)
 );
 #endif /* CONFIG_MEMCG */
 
--- a/mm/vmscan.c~mm-vmscan-expose-cgroup_ino-for-memcg-reclaim-tracepoints
+++ a/mm/vmscan.c
@@ -3191,8 +3191,10 @@ unsigned long mem_cgroup_shrink_node(str
 	sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
 			(GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
 
-	trace_mm_vmscan_memcg_softlimit_reclaim_begin(sc.order,
-						      sc.gfp_mask);
+	trace_mm_vmscan_memcg_softlimit_reclaim_begin(
+					cgroup_ino(memcg->css.cgroup),
+					sc.order,
+					sc.gfp_mask);
 
 	/*
 	 * NOTE: Although we can get the priority field, using it
@@ -3203,7 +3205,9 @@ unsigned long mem_cgroup_shrink_node(str
 	 */
 	shrink_node_memcg(pgdat, memcg, &sc, &lru_pages);
 
-	trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed);
+	trace_mm_vmscan_memcg_softlimit_reclaim_end(
+					cgroup_ino(memcg->css.cgroup),
+					sc.nr_reclaimed);
 
 	*nr_scanned = sc.nr_scanned;
 	return sc.nr_reclaimed;
@@ -3241,7 +3245,9 @@ unsigned long try_to_free_mem_cgroup_pag
 
 	zonelist = &NODE_DATA(nid)->node_zonelists[ZONELIST_FALLBACK];
 
-	trace_mm_vmscan_memcg_reclaim_begin(0, sc.gfp_mask);
+	trace_mm_vmscan_memcg_reclaim_begin(
+				cgroup_ino(memcg->css.cgroup),
+				0, sc.gfp_mask);
 
 	psi_memstall_enter(&pflags);
 	noreclaim_flag = memalloc_noreclaim_save();
@@ -3251,7 +3257,9 @@ unsigned long try_to_free_mem_cgroup_pag
 	memalloc_noreclaim_restore(noreclaim_flag);
 	psi_memstall_leave(&pflags);
 
-	trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed);
+	trace_mm_vmscan_memcg_reclaim_end(
+				cgroup_ino(memcg->css.cgroup),
+				nr_reclaimed);
 
 	return nr_reclaimed;
 }
_


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] mm/vmscan: expose cgroup_ino for memcg reclaim tracepoints
  2019-07-12  1:10 ` Andrew Morton
@ 2019-07-12 23:58   ` Yang Shi
  2019-07-13  6:46     ` Yafang Shao
  0 siblings, 1 reply; 6+ messages in thread
From: Yang Shi @ 2019-07-12 23:58 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Yafang Shao, Michal Hocko, Linux MM, shaoyafang, Johannes Weiner

On Thu, Jul 11, 2019 at 6:10 PM Andrew Morton <akpm@linux-foundation.org> wrote:
>
>
> Can we please get some review of this one?  It has been in -mm since
> May 22, no issues that I've heard of.
>
>
> From: Yafang Shao <laoar.shao@gmail.com>
> Subject: mm/vmscan: expose cgroup_ino for memcg reclaim tracepoints
>
> We can use the exposed cgroup_ino to trace specified cgroup.
>
> For example,
> step 1, get the inode of the specified cgroup
>         $ ls -di /tmp/cgroupv2/foo
> step 2, set this inode into tracepoint filter to trace this cgroup only
>         (assume the inode is 11)
>         $ cd /sys/kernel/debug/tracing/events/vmscan/
>         $ echo 'cgroup_ino == 11' > mm_vmscan_memcg_reclaim_begin/filter
>         $ echo 'cgroup_ino == 11' > mm_vmscan_memcg_reclaim_end/filter
>
> The reason I made this change is to trace a specific container.

I'm wondering how useful this is. You could filter events by cgroup
with bpftrace easily. For example:

# bpftrace -e 'tracepoint:syscalls:sys_enter_openat /cgroup ==
cgroupid("/sys/fs/cgroup/unified/mycg")/ { printf("%s\n",
str(args->filename)); }':


>
> Sometimes there're lots of containers on one host.  Some of them are
> not important at all, so we don't care whether them are under memory
> pressure.  While some of them are important, so we want't to know if
> these containers are doing memcg reclaim and how long this relaim
> takes.
>
> Without this change, we don't know the memcg reclaim happend in which
> container.
>
> Link: http://lkml.kernel.org/r/1557649528-11676-1-git-send-email-laoar.shao@gmail.com
> Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
> Cc: Michal Hocko <mhocko@suse.com>
> Cc: <shaoyafang@didiglobal.com>
> Cc: Johannes Weiner <hannes@cmpxchg.org>
> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
> ---
>
>  include/trace/events/vmscan.h |   71 ++++++++++++++++++++++++++------
>  mm/vmscan.c                   |   18 +++++---
>  2 files changed, 72 insertions(+), 17 deletions(-)
>
> --- a/include/trace/events/vmscan.h~mm-vmscan-expose-cgroup_ino-for-memcg-reclaim-tracepoints
> +++ a/include/trace/events/vmscan.h
> @@ -127,18 +127,43 @@ DEFINE_EVENT(mm_vmscan_direct_reclaim_be
>  );
>
>  #ifdef CONFIG_MEMCG
> -DEFINE_EVENT(mm_vmscan_direct_reclaim_begin_template, mm_vmscan_memcg_reclaim_begin,
> +DECLARE_EVENT_CLASS(mm_vmscan_memcg_reclaim_begin_template,
>
> -       TP_PROTO(int order, gfp_t gfp_flags),
> +       TP_PROTO(unsigned int cgroup_ino, int order, gfp_t gfp_flags),
>
> -       TP_ARGS(order, gfp_flags)
> +       TP_ARGS(cgroup_ino, order, gfp_flags),
> +
> +       TP_STRUCT__entry(
> +               __field(unsigned int, cgroup_ino)
> +               __field(int, order)
> +               __field(gfp_t, gfp_flags)
> +       ),
> +
> +       TP_fast_assign(
> +               __entry->cgroup_ino     = cgroup_ino;
> +               __entry->order          = order;
> +               __entry->gfp_flags      = gfp_flags;
> +       ),
> +
> +       TP_printk("cgroup_ino=%u order=%d gfp_flags=%s",
> +               __entry->cgroup_ino, __entry->order,
> +               show_gfp_flags(__entry->gfp_flags))
>  );
>
> -DEFINE_EVENT(mm_vmscan_direct_reclaim_begin_template, mm_vmscan_memcg_softlimit_reclaim_begin,
> +DEFINE_EVENT(mm_vmscan_memcg_reclaim_begin_template,
> +            mm_vmscan_memcg_reclaim_begin,
>
> -       TP_PROTO(int order, gfp_t gfp_flags),
> +       TP_PROTO(unsigned int cgroup_ino, int order, gfp_t gfp_flags),
>
> -       TP_ARGS(order, gfp_flags)
> +       TP_ARGS(cgroup_ino, order, gfp_flags)
> +);
> +
> +DEFINE_EVENT(mm_vmscan_memcg_reclaim_begin_template,
> +            mm_vmscan_memcg_softlimit_reclaim_begin,
> +
> +       TP_PROTO(unsigned int cgroup_ino, int order, gfp_t gfp_flags),
> +
> +       TP_ARGS(cgroup_ino, order, gfp_flags)
>  );
>  #endif /* CONFIG_MEMCG */
>
> @@ -167,18 +192,40 @@ DEFINE_EVENT(mm_vmscan_direct_reclaim_en
>  );
>
>  #ifdef CONFIG_MEMCG
> -DEFINE_EVENT(mm_vmscan_direct_reclaim_end_template, mm_vmscan_memcg_reclaim_end,
> +DECLARE_EVENT_CLASS(mm_vmscan_memcg_reclaim_end_template,
>
> -       TP_PROTO(unsigned long nr_reclaimed),
> +       TP_PROTO(unsigned int cgroup_ino, unsigned long nr_reclaimed),
>
> -       TP_ARGS(nr_reclaimed)
> +       TP_ARGS(cgroup_ino, nr_reclaimed),
> +
> +       TP_STRUCT__entry(
> +               __field(unsigned int, cgroup_ino)
> +               __field(unsigned long, nr_reclaimed)
> +       ),
> +
> +       TP_fast_assign(
> +               __entry->cgroup_ino     = cgroup_ino;
> +               __entry->nr_reclaimed   = nr_reclaimed;
> +       ),
> +
> +       TP_printk("cgroup_ino=%u nr_reclaimed=%lu",
> +               __entry->cgroup_ino, __entry->nr_reclaimed)
>  );
>
> -DEFINE_EVENT(mm_vmscan_direct_reclaim_end_template, mm_vmscan_memcg_softlimit_reclaim_end,
> +DEFINE_EVENT(mm_vmscan_memcg_reclaim_end_template,
> +            mm_vmscan_memcg_reclaim_end,
>
> -       TP_PROTO(unsigned long nr_reclaimed),
> +       TP_PROTO(unsigned int cgroup_ino, unsigned long nr_reclaimed),
>
> -       TP_ARGS(nr_reclaimed)
> +       TP_ARGS(cgroup_ino, nr_reclaimed)
> +);
> +
> +DEFINE_EVENT(mm_vmscan_memcg_reclaim_end_template,
> +            mm_vmscan_memcg_softlimit_reclaim_end,
> +
> +       TP_PROTO(unsigned int cgroup_ino, unsigned long nr_reclaimed),
> +
> +       TP_ARGS(cgroup_ino, nr_reclaimed)
>  );
>  #endif /* CONFIG_MEMCG */
>
> --- a/mm/vmscan.c~mm-vmscan-expose-cgroup_ino-for-memcg-reclaim-tracepoints
> +++ a/mm/vmscan.c
> @@ -3191,8 +3191,10 @@ unsigned long mem_cgroup_shrink_node(str
>         sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
>                         (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
>
> -       trace_mm_vmscan_memcg_softlimit_reclaim_begin(sc.order,
> -                                                     sc.gfp_mask);
> +       trace_mm_vmscan_memcg_softlimit_reclaim_begin(
> +                                       cgroup_ino(memcg->css.cgroup),
> +                                       sc.order,
> +                                       sc.gfp_mask);
>
>         /*
>          * NOTE: Although we can get the priority field, using it
> @@ -3203,7 +3205,9 @@ unsigned long mem_cgroup_shrink_node(str
>          */
>         shrink_node_memcg(pgdat, memcg, &sc, &lru_pages);
>
> -       trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed);
> +       trace_mm_vmscan_memcg_softlimit_reclaim_end(
> +                                       cgroup_ino(memcg->css.cgroup),
> +                                       sc.nr_reclaimed);
>
>         *nr_scanned = sc.nr_scanned;
>         return sc.nr_reclaimed;
> @@ -3241,7 +3245,9 @@ unsigned long try_to_free_mem_cgroup_pag
>
>         zonelist = &NODE_DATA(nid)->node_zonelists[ZONELIST_FALLBACK];
>
> -       trace_mm_vmscan_memcg_reclaim_begin(0, sc.gfp_mask);
> +       trace_mm_vmscan_memcg_reclaim_begin(
> +                               cgroup_ino(memcg->css.cgroup),
> +                               0, sc.gfp_mask);
>
>         psi_memstall_enter(&pflags);
>         noreclaim_flag = memalloc_noreclaim_save();
> @@ -3251,7 +3257,9 @@ unsigned long try_to_free_mem_cgroup_pag
>         memalloc_noreclaim_restore(noreclaim_flag);
>         psi_memstall_leave(&pflags);
>
> -       trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed);
> +       trace_mm_vmscan_memcg_reclaim_end(
> +                               cgroup_ino(memcg->css.cgroup),
> +                               nr_reclaimed);
>
>         return nr_reclaimed;
>  }
> _
>


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] mm/vmscan: expose cgroup_ino for memcg reclaim tracepoints
  2019-07-12 23:58   ` Yang Shi
@ 2019-07-13  6:46     ` Yafang Shao
  0 siblings, 0 replies; 6+ messages in thread
From: Yafang Shao @ 2019-07-13  6:46 UTC (permalink / raw)
  To: Yang Shi
  Cc: Andrew Morton, Michal Hocko, Linux MM, Yafang Shao, Johannes Weiner

On Sat, Jul 13, 2019 at 7:58 AM Yang Shi <shy828301@gmail.com> wrote:
>
> On Thu, Jul 11, 2019 at 6:10 PM Andrew Morton <akpm@linux-foundation.org> wrote:
> >
> >
> > Can we please get some review of this one?  It has been in -mm since
> > May 22, no issues that I've heard of.
> >
> >
> > From: Yafang Shao <laoar.shao@gmail.com>
> > Subject: mm/vmscan: expose cgroup_ino for memcg reclaim tracepoints
> >
> > We can use the exposed cgroup_ino to trace specified cgroup.
> >
> > For example,
> > step 1, get the inode of the specified cgroup
> >         $ ls -di /tmp/cgroupv2/foo
> > step 2, set this inode into tracepoint filter to trace this cgroup only
> >         (assume the inode is 11)
> >         $ cd /sys/kernel/debug/tracing/events/vmscan/
> >         $ echo 'cgroup_ino == 11' > mm_vmscan_memcg_reclaim_begin/filter
> >         $ echo 'cgroup_ino == 11' > mm_vmscan_memcg_reclaim_end/filter
> >
> > The reason I made this change is to trace a specific container.
>
> I'm wondering how useful this is. You could filter events by cgroup
> with bpftrace easily. For example:
>
> # bpftrace -e 'tracepoint:syscalls:sys_enter_openat /cgroup ==
> cgroupid("/sys/fs/cgroup/unified/mycg")/ { printf("%s\n",
> str(args->filename)); }':
>

Seems the bpftrace get the cgroupid from the current task and then
compare the task-cgroupid with the speficied cgroupid?
While in the memcg reclaim, the pages in a memcg may be reclaimed by a
process in other memcgs, i.e. the parent memcg,
so we can't use the process's memcg as the filter.

The way to use bpftrace here is using kprobe to do it, I guess.
But as the tracepoint is already there, we can make little change to enhance it.

Thanks
Yafang

>
> >
> > Sometimes there're lots of containers on one host.  Some of them are
> > not important at all, so we don't care whether them are under memory
> > pressure.  While some of them are important, so we want't to know if
> > these containers are doing memcg reclaim and how long this relaim
> > takes.
> >
> > Without this change, we don't know the memcg reclaim happend in which
> > container.
> >
> > Link: http://lkml.kernel.org/r/1557649528-11676-1-git-send-email-laoar.shao@gmail.com
> > Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
> > Cc: Michal Hocko <mhocko@suse.com>
> > Cc: <shaoyafang@didiglobal.com>
> > Cc: Johannes Weiner <hannes@cmpxchg.org>
> > Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
> > ---
> >
> >  include/trace/events/vmscan.h |   71 ++++++++++++++++++++++++++------
> >  mm/vmscan.c                   |   18 +++++---
> >  2 files changed, 72 insertions(+), 17 deletions(-)
> >
> > --- a/include/trace/events/vmscan.h~mm-vmscan-expose-cgroup_ino-for-memcg-reclaim-tracepoints
> > +++ a/include/trace/events/vmscan.h
> > @@ -127,18 +127,43 @@ DEFINE_EVENT(mm_vmscan_direct_reclaim_be
> >  );
> >
> >  #ifdef CONFIG_MEMCG
> > -DEFINE_EVENT(mm_vmscan_direct_reclaim_begin_template, mm_vmscan_memcg_reclaim_begin,
> > +DECLARE_EVENT_CLASS(mm_vmscan_memcg_reclaim_begin_template,
> >
> > -       TP_PROTO(int order, gfp_t gfp_flags),
> > +       TP_PROTO(unsigned int cgroup_ino, int order, gfp_t gfp_flags),
> >
> > -       TP_ARGS(order, gfp_flags)
> > +       TP_ARGS(cgroup_ino, order, gfp_flags),
> > +
> > +       TP_STRUCT__entry(
> > +               __field(unsigned int, cgroup_ino)
> > +               __field(int, order)
> > +               __field(gfp_t, gfp_flags)
> > +       ),
> > +
> > +       TP_fast_assign(
> > +               __entry->cgroup_ino     = cgroup_ino;
> > +               __entry->order          = order;
> > +               __entry->gfp_flags      = gfp_flags;
> > +       ),
> > +
> > +       TP_printk("cgroup_ino=%u order=%d gfp_flags=%s",
> > +               __entry->cgroup_ino, __entry->order,
> > +               show_gfp_flags(__entry->gfp_flags))
> >  );
> >
> > -DEFINE_EVENT(mm_vmscan_direct_reclaim_begin_template, mm_vmscan_memcg_softlimit_reclaim_begin,
> > +DEFINE_EVENT(mm_vmscan_memcg_reclaim_begin_template,
> > +            mm_vmscan_memcg_reclaim_begin,
> >
> > -       TP_PROTO(int order, gfp_t gfp_flags),
> > +       TP_PROTO(unsigned int cgroup_ino, int order, gfp_t gfp_flags),
> >
> > -       TP_ARGS(order, gfp_flags)
> > +       TP_ARGS(cgroup_ino, order, gfp_flags)
> > +);
> > +
> > +DEFINE_EVENT(mm_vmscan_memcg_reclaim_begin_template,
> > +            mm_vmscan_memcg_softlimit_reclaim_begin,
> > +
> > +       TP_PROTO(unsigned int cgroup_ino, int order, gfp_t gfp_flags),
> > +
> > +       TP_ARGS(cgroup_ino, order, gfp_flags)
> >  );
> >  #endif /* CONFIG_MEMCG */
> >
> > @@ -167,18 +192,40 @@ DEFINE_EVENT(mm_vmscan_direct_reclaim_en
> >  );
> >
> >  #ifdef CONFIG_MEMCG
> > -DEFINE_EVENT(mm_vmscan_direct_reclaim_end_template, mm_vmscan_memcg_reclaim_end,
> > +DECLARE_EVENT_CLASS(mm_vmscan_memcg_reclaim_end_template,
> >
> > -       TP_PROTO(unsigned long nr_reclaimed),
> > +       TP_PROTO(unsigned int cgroup_ino, unsigned long nr_reclaimed),
> >
> > -       TP_ARGS(nr_reclaimed)
> > +       TP_ARGS(cgroup_ino, nr_reclaimed),
> > +
> > +       TP_STRUCT__entry(
> > +               __field(unsigned int, cgroup_ino)
> > +               __field(unsigned long, nr_reclaimed)
> > +       ),
> > +
> > +       TP_fast_assign(
> > +               __entry->cgroup_ino     = cgroup_ino;
> > +               __entry->nr_reclaimed   = nr_reclaimed;
> > +       ),
> > +
> > +       TP_printk("cgroup_ino=%u nr_reclaimed=%lu",
> > +               __entry->cgroup_ino, __entry->nr_reclaimed)
> >  );
> >
> > -DEFINE_EVENT(mm_vmscan_direct_reclaim_end_template, mm_vmscan_memcg_softlimit_reclaim_end,
> > +DEFINE_EVENT(mm_vmscan_memcg_reclaim_end_template,
> > +            mm_vmscan_memcg_reclaim_end,
> >
> > -       TP_PROTO(unsigned long nr_reclaimed),
> > +       TP_PROTO(unsigned int cgroup_ino, unsigned long nr_reclaimed),
> >
> > -       TP_ARGS(nr_reclaimed)
> > +       TP_ARGS(cgroup_ino, nr_reclaimed)
> > +);
> > +
> > +DEFINE_EVENT(mm_vmscan_memcg_reclaim_end_template,
> > +            mm_vmscan_memcg_softlimit_reclaim_end,
> > +
> > +       TP_PROTO(unsigned int cgroup_ino, unsigned long nr_reclaimed),
> > +
> > +       TP_ARGS(cgroup_ino, nr_reclaimed)
> >  );
> >  #endif /* CONFIG_MEMCG */
> >
> > --- a/mm/vmscan.c~mm-vmscan-expose-cgroup_ino-for-memcg-reclaim-tracepoints
> > +++ a/mm/vmscan.c
> > @@ -3191,8 +3191,10 @@ unsigned long mem_cgroup_shrink_node(str
> >         sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
> >                         (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
> >
> > -       trace_mm_vmscan_memcg_softlimit_reclaim_begin(sc.order,
> > -                                                     sc.gfp_mask);
> > +       trace_mm_vmscan_memcg_softlimit_reclaim_begin(
> > +                                       cgroup_ino(memcg->css.cgroup),
> > +                                       sc.order,
> > +                                       sc.gfp_mask);
> >
> >         /*
> >          * NOTE: Although we can get the priority field, using it
> > @@ -3203,7 +3205,9 @@ unsigned long mem_cgroup_shrink_node(str
> >          */
> >         shrink_node_memcg(pgdat, memcg, &sc, &lru_pages);
> >
> > -       trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed);
> > +       trace_mm_vmscan_memcg_softlimit_reclaim_end(
> > +                                       cgroup_ino(memcg->css.cgroup),
> > +                                       sc.nr_reclaimed);
> >
> >         *nr_scanned = sc.nr_scanned;
> >         return sc.nr_reclaimed;
> > @@ -3241,7 +3245,9 @@ unsigned long try_to_free_mem_cgroup_pag
> >
> >         zonelist = &NODE_DATA(nid)->node_zonelists[ZONELIST_FALLBACK];
> >
> > -       trace_mm_vmscan_memcg_reclaim_begin(0, sc.gfp_mask);
> > +       trace_mm_vmscan_memcg_reclaim_begin(
> > +                               cgroup_ino(memcg->css.cgroup),
> > +                               0, sc.gfp_mask);
> >
> >         psi_memstall_enter(&pflags);
> >         noreclaim_flag = memalloc_noreclaim_save();
> > @@ -3251,7 +3257,9 @@ unsigned long try_to_free_mem_cgroup_pag
> >         memalloc_noreclaim_restore(noreclaim_flag);
> >         psi_memstall_leave(&pflags);
> >
> > -       trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed);
> > +       trace_mm_vmscan_memcg_reclaim_end(
> > +                               cgroup_ino(memcg->css.cgroup),
> > +                               nr_reclaimed);
> >
> >         return nr_reclaimed;
> >  }
> > _
> >


^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2019-07-13  6:47 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-04-09 10:11 [PATCH] mm/vmscan: expose cgroup_ino for memcg reclaim tracepoints Yafang Shao
2019-05-10 19:24 ` Dmitry Dolgov
2019-05-11  0:51   ` Yafang Shao
2019-07-12  1:10 ` Andrew Morton
2019-07-12 23:58   ` Yang Shi
2019-07-13  6:46     ` Yafang Shao

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).