linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] perf tool: add skip_not_exec_file_map_events option
@ 2022-08-17 13:43 tcwzxx
  2022-08-17 18:13 ` Ian Rogers
  0 siblings, 1 reply; 5+ messages in thread
From: tcwzxx @ 2022-08-17 13:43 UTC (permalink / raw)
  To: peterz, mingo, acme, alexander.shishkin, jolsa
  Cc: linux-perf-users, linux-kernel, tcwzxx

When generate the flame graph, the perf-script subcommand will
process all mmap event and add them to the rbtree. The 240,000
mmap region takes about 5 minutes, which is not useful for flame
graph. So add the skip-not-exec-file-map-events option to skip
not PROT_EXEC flag memory regions.

Signed-off-by: tcwzxx <tcwzxx@gmail.com>
---
 tools/perf/builtin-report.c | 2 ++
 tools/perf/builtin-script.c | 3 +++
 tools/perf/util/machine.c   | 3 +++
 tools/perf/util/map.c       | 5 +++++
 tools/perf/util/map.h       | 2 ++
 5 files changed, 15 insertions(+)

diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 91ed41cc7d88..c28eb9450a66 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -1364,6 +1364,8 @@ int cmd_report(int argc, const char **argv)
 		    "Disable raw trace ordering"),
 	OPT_BOOLEAN(0, "skip-empty", &report.skip_empty,
 		    "Do not display empty (or dummy) events in the output"),
+	OPT_BOOLEAN(0, "skip-not-exec-file-map_events", &skip_not_exec_file_map_events,
+		    "skip not exec map events"),
 	OPT_END()
 	};
 	struct perf_data data = {
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 13580a9c50b8..e3f4e5e654c9 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -32,6 +32,7 @@
 #include "util/time-utils.h"
 #include "util/path.h"
 #include "util/event.h"
+#include "util/map.h"
 #include "ui/ui.h"
 #include "print_binary.h"
 #include "archinsn.h"
@@ -3936,6 +3937,8 @@ int cmd_script(int argc, const char **argv)
 		    "Guest code can be found in hypervisor process"),
 	OPT_BOOLEAN('\0', "stitch-lbr", &script.stitch_lbr,
 		    "Enable LBR callgraph stitching approach"),
+	OPT_BOOLEAN(0, "skip-not-exec-map_events", &skip_not_exec_file_map_events,
+		    "skip not exec map events"),
 	OPTS_EVSWITCH(&script.evswitch),
 	OPT_END()
 	};
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 2a16cae28407..21dde9f9935c 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -1904,6 +1904,9 @@ int machine__process_mmap2_event(struct machine *machine,
 	if (thread == NULL)
 		goto out_problem;
 
+	if (skip_not_exec_file_map_events && !(event->mmap2.prot & PROT_EXEC))
+		goto out_problem;
+
 	map = map__new(machine, event->mmap2.start,
 			event->mmap2.len, event->mmap2.pgoff,
 			&dso_id, event->mmap2.prot,
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index e0aa4a254583..2b51ca012c91 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -16,6 +16,8 @@
 #include "thread.h"
 #include "vdso.h"
 
+bool skip_not_exec_file_map_events;
+
 static inline int is_android_lib(const char *filename)
 {
 	return strstarts(filename, "/data/app-lib/") ||
@@ -168,6 +170,9 @@ struct map *map__new(struct machine *machine, u64 start, u64 len,
 		if (dso == NULL)
 			goto out_delete;
 
+		if (skip_not_exec_file_map_events && dso__type(dso, machine) == DSO__TYPE_UNKNOWN)
+			goto out_delete;
+
 		map__init(map, start, start + len, pgoff, dso);
 
 		if (anon || no_dso) {
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index 3dcfe06db6b3..67b0e0f9f0ae 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -11,6 +11,8 @@
 #include <stdbool.h>
 #include <linux/types.h>
 
+extern bool skip_not_exec_file_map_events;
+
 struct dso;
 struct maps;
 struct machine;
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] perf tool: add skip_not_exec_file_map_events option
  2022-08-17 13:43 [PATCH] perf tool: add skip_not_exec_file_map_events option tcwzxx
@ 2022-08-17 18:13 ` Ian Rogers
  2022-08-18  3:26   ` lika you
  0 siblings, 1 reply; 5+ messages in thread
From: Ian Rogers @ 2022-08-17 18:13 UTC (permalink / raw)
  To: tcwzxx
  Cc: peterz, mingo, acme, alexander.shishkin, jolsa, linux-perf-users,
	linux-kernel

On Wed, Aug 17, 2022 at 6:43 AM tcwzxx <tcwzxx@gmail.com> wrote:
>
> When generate the flame graph, the perf-script subcommand will
> process all mmap event and add them to the rbtree. The 240,000
> mmap region takes about 5 minutes, which is not useful for flame
> graph. So add the skip-not-exec-file-map-events option to skip
> not PROT_EXEC flag memory regions.
>
> Signed-off-by: tcwzxx <tcwzxx@gmail.com>

Could you provide more context? A reproduction?

When we synthesize mmap events we drop non-executable pages:
https://github.com/torvalds/linux/blob/master/tools/perf/util/synthetic-events.c#L466

Similarly in the kernel for the "dummy" event:
https://github.com/torvalds/linux/blob/master/kernel/events/core.c#L8258

Thanks,
Ian

> ---
>  tools/perf/builtin-report.c | 2 ++
>  tools/perf/builtin-script.c | 3 +++
>  tools/perf/util/machine.c   | 3 +++
>  tools/perf/util/map.c       | 5 +++++
>  tools/perf/util/map.h       | 2 ++
>  5 files changed, 15 insertions(+)
>
> diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
> index 91ed41cc7d88..c28eb9450a66 100644
> --- a/tools/perf/builtin-report.c
> +++ b/tools/perf/builtin-report.c
> @@ -1364,6 +1364,8 @@ int cmd_report(int argc, const char **argv)
>                     "Disable raw trace ordering"),
>         OPT_BOOLEAN(0, "skip-empty", &report.skip_empty,
>                     "Do not display empty (or dummy) events in the output"),
> +       OPT_BOOLEAN(0, "skip-not-exec-file-map_events", &skip_not_exec_file_map_events,
> +                   "skip not exec map events"),
>         OPT_END()
>         };
>         struct perf_data data = {
> diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
> index 13580a9c50b8..e3f4e5e654c9 100644
> --- a/tools/perf/builtin-script.c
> +++ b/tools/perf/builtin-script.c
> @@ -32,6 +32,7 @@
>  #include "util/time-utils.h"
>  #include "util/path.h"
>  #include "util/event.h"
> +#include "util/map.h"
>  #include "ui/ui.h"
>  #include "print_binary.h"
>  #include "archinsn.h"
> @@ -3936,6 +3937,8 @@ int cmd_script(int argc, const char **argv)
>                     "Guest code can be found in hypervisor process"),
>         OPT_BOOLEAN('\0', "stitch-lbr", &script.stitch_lbr,
>                     "Enable LBR callgraph stitching approach"),
> +       OPT_BOOLEAN(0, "skip-not-exec-map_events", &skip_not_exec_file_map_events,
> +                   "skip not exec map events"),
>         OPTS_EVSWITCH(&script.evswitch),
>         OPT_END()
>         };
> diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
> index 2a16cae28407..21dde9f9935c 100644
> --- a/tools/perf/util/machine.c
> +++ b/tools/perf/util/machine.c
> @@ -1904,6 +1904,9 @@ int machine__process_mmap2_event(struct machine *machine,
>         if (thread == NULL)
>                 goto out_problem;
>
> +       if (skip_not_exec_file_map_events && !(event->mmap2.prot & PROT_EXEC))
> +               goto out_problem;
> +
>         map = map__new(machine, event->mmap2.start,
>                         event->mmap2.len, event->mmap2.pgoff,
>                         &dso_id, event->mmap2.prot,
> diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
> index e0aa4a254583..2b51ca012c91 100644
> --- a/tools/perf/util/map.c
> +++ b/tools/perf/util/map.c
> @@ -16,6 +16,8 @@
>  #include "thread.h"
>  #include "vdso.h"
>
> +bool skip_not_exec_file_map_events;
> +
>  static inline int is_android_lib(const char *filename)
>  {
>         return strstarts(filename, "/data/app-lib/") ||
> @@ -168,6 +170,9 @@ struct map *map__new(struct machine *machine, u64 start, u64 len,
>                 if (dso == NULL)
>                         goto out_delete;
>
> +               if (skip_not_exec_file_map_events && dso__type(dso, machine) == DSO__TYPE_UNKNOWN)
> +                       goto out_delete;
> +
>                 map__init(map, start, start + len, pgoff, dso);
>
>                 if (anon || no_dso) {
> diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
> index 3dcfe06db6b3..67b0e0f9f0ae 100644
> --- a/tools/perf/util/map.h
> +++ b/tools/perf/util/map.h
> @@ -11,6 +11,8 @@
>  #include <stdbool.h>
>  #include <linux/types.h>
>
> +extern bool skip_not_exec_file_map_events;
> +
>  struct dso;
>  struct maps;
>  struct machine;
> --
> 2.34.1
>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] perf tool: add skip_not_exec_file_map_events option
  2022-08-17 18:13 ` Ian Rogers
@ 2022-08-18  3:26   ` lika you
  2022-08-18 15:00     ` Ian Rogers
  0 siblings, 1 reply; 5+ messages in thread
From: lika you @ 2022-08-18  3:26 UTC (permalink / raw)
  To: Ian Rogers
  Cc: peterz, mingo, acme, alexander.shishkin, jolsa, linux-perf-users,
	linux-kernel

I'm so sorry for forgetting to reply to all. Here is the reply again.

Thanks for the reply.

The background is we have two types of tasks running on the same host.The high
priority one which is CPU overhead and the low priority which is IO overhead.
The high priority task has mmap many files as shared memory. The low priority
task may load multi TB data from SSD at once time which will cause the high
priority task file page cache to be swapped out. So we mmap all files with the
PROT_EXEC flag to prevent hot page cache to be swapped out. That cause
too many executable memory regions without symbols on it.

The trick is implementate here.
https://github.com/torvalds/linux/blob/master/mm/vmscan.c#L2572

Thanks again

On Thu, 18 Aug 2022 at 02:13, Ian Rogers <irogers@google.com> wrote:
>
> On Wed, Aug 17, 2022 at 6:43 AM tcwzxx <tcwzxx@gmail.com> wrote:
> >
> > When generate the flame graph, the perf-script subcommand will
> > process all mmap event and add them to the rbtree. The 240,000
> > mmap region takes about 5 minutes, which is not useful for flame
> > graph. So add the skip-not-exec-file-map-events option to skip
> > not PROT_EXEC flag memory regions.
> >
> > Signed-off-by: tcwzxx <tcwzxx@gmail.com>
>
> Could you provide more context? A reproduction?
>
> When we synthesize mmap events we drop non-executable pages:
> https://github.com/torvalds/linux/blob/master/tools/perf/util/synthetic-events.c#L466
>
> Similarly in the kernel for the "dummy" event:
> https://github.com/torvalds/linux/blob/master/kernel/events/core.c#L8258
>
> Thanks,
> Ian
>
> > ---
> >  tools/perf/builtin-report.c | 2 ++
> >  tools/perf/builtin-script.c | 3 +++
> >  tools/perf/util/machine.c   | 3 +++
> >  tools/perf/util/map.c       | 5 +++++
> >  tools/perf/util/map.h       | 2 ++
> >  5 files changed, 15 insertions(+)
> >
> > diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
> > index 91ed41cc7d88..c28eb9450a66 100644
> > --- a/tools/perf/builtin-report.c
> > +++ b/tools/perf/builtin-report.c
> > @@ -1364,6 +1364,8 @@ int cmd_report(int argc, const char **argv)
> >                     "Disable raw trace ordering"),
> >         OPT_BOOLEAN(0, "skip-empty", &report.skip_empty,
> >                     "Do not display empty (or dummy) events in the output"),
> > +       OPT_BOOLEAN(0, "skip-not-exec-file-map_events", &skip_not_exec_file_map_events,
> > +                   "skip not exec map events"),
> >         OPT_END()
> >         };
> >         struct perf_data data = {
> > diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
> > index 13580a9c50b8..e3f4e5e654c9 100644
> > --- a/tools/perf/builtin-script.c
> > +++ b/tools/perf/builtin-script.c
> > @@ -32,6 +32,7 @@
> >  #include "util/time-utils.h"
> >  #include "util/path.h"
> >  #include "util/event.h"
> > +#include "util/map.h"
> >  #include "ui/ui.h"
> >  #include "print_binary.h"
> >  #include "archinsn.h"
> > @@ -3936,6 +3937,8 @@ int cmd_script(int argc, const char **argv)
> >                     "Guest code can be found in hypervisor process"),
> >         OPT_BOOLEAN('\0', "stitch-lbr", &script.stitch_lbr,
> >                     "Enable LBR callgraph stitching approach"),
> > +       OPT_BOOLEAN(0, "skip-not-exec-map_events", &skip_not_exec_file_map_events,
> > +                   "skip not exec map events"),
> >         OPTS_EVSWITCH(&script.evswitch),
> >         OPT_END()
> >         };
> > diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
> > index 2a16cae28407..21dde9f9935c 100644
> > --- a/tools/perf/util/machine.c
> > +++ b/tools/perf/util/machine.c
> > @@ -1904,6 +1904,9 @@ int machine__process_mmap2_event(struct machine *machine,
> >         if (thread == NULL)
> >                 goto out_problem;
> >
> > +       if (skip_not_exec_file_map_events && !(event->mmap2.prot & PROT_EXEC))
> > +               goto out_problem;
> > +
> >         map = map__new(machine, event->mmap2.start,
> >                         event->mmap2.len, event->mmap2.pgoff,
> >                         &dso_id, event->mmap2.prot,
> > diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
> > index e0aa4a254583..2b51ca012c91 100644
> > --- a/tools/perf/util/map.c
> > +++ b/tools/perf/util/map.c
> > @@ -16,6 +16,8 @@
> >  #include "thread.h"
> >  #include "vdso.h"
> >
> > +bool skip_not_exec_file_map_events;
> > +
> >  static inline int is_android_lib(const char *filename)
> >  {
> >         return strstarts(filename, "/data/app-lib/") ||
> > @@ -168,6 +170,9 @@ struct map *map__new(struct machine *machine, u64 start, u64 len,
> >                 if (dso == NULL)
> >                         goto out_delete;
> >
> > +               if (skip_not_exec_file_map_events && dso__type(dso, machine) == DSO__TYPE_UNKNOWN)
> > +                       goto out_delete;
> > +
> >                 map__init(map, start, start + len, pgoff, dso);
> >
> >                 if (anon || no_dso) {
> > diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
> > index 3dcfe06db6b3..67b0e0f9f0ae 100644
> > --- a/tools/perf/util/map.h
> > +++ b/tools/perf/util/map.h
> > @@ -11,6 +11,8 @@
> >  #include <stdbool.h>
> >  #include <linux/types.h>
> >
> > +extern bool skip_not_exec_file_map_events;
> > +
> >  struct dso;
> >  struct maps;
> >  struct machine;
> > --
> > 2.34.1
> >

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] perf tool: add skip_not_exec_file_map_events option
  2022-08-18  3:26   ` lika you
@ 2022-08-18 15:00     ` Ian Rogers
  2022-08-19  3:18       ` zhizhi xu
  0 siblings, 1 reply; 5+ messages in thread
From: Ian Rogers @ 2022-08-18 15:00 UTC (permalink / raw)
  To: lika you
  Cc: peterz, mingo, acme, alexander.shishkin, jolsa, linux-perf-users,
	linux-kernel

On Wed, Aug 17, 2022 at 8:24 PM lika you <tcwzxx@gmail.com> wrote:
>
> I'm so sorry for forgetting to reply to all. Here is the reply again.
>
> Thanks for the reply.
>
> The background is we have two types of tasks running on the same host.The high
> priority one which is CPU overhead and the low priority which is IO overhead.
> The high priority task has mmap many files as shared memory. The low priority
> task may load multi TB data from SSD at once time which will cause the high
> priority task file page cache to be swapped out. So we mmap all files with the
> PROT_EXEC flag to prevent hot page cache to be swapped out. That cause
> too many executable memory regions without symbols on it.
>
> The trick is implementate here.
> https://github.com/torvalds/linux/blob/master/mm/vmscan.c#L2572
>
> Thanks again

Thanks. So you are making data pages executable for the sake of
getting "better chances to stay in memory under moderate memory
pressure." Having lots of executable pages in your program isn't
great, I'm reminded of efforts to stop stacks from being executable.
This also feels like a case where madvise should be being used, for
example, the MADV_WILLNEED option. Given this, I'm not sure supporting
this case in the perf tool makes sense.

> On Thu, 18 Aug 2022 at 02:13, Ian Rogers <irogers@google.com> wrote:
> >
> > On Wed, Aug 17, 2022 at 6:43 AM tcwzxx <tcwzxx@gmail.com> wrote:
> > >
> > > When generate the flame graph, the perf-script subcommand will
> > > process all mmap event and add them to the rbtree. The 240,000
> > > mmap region takes about 5 minutes, which is not useful for flame
> > > graph. So add the skip-not-exec-file-map-events option to skip
> > > not PROT_EXEC flag memory regions.
> > >
> > > Signed-off-by: tcwzxx <tcwzxx@gmail.com>
> >
> > Could you provide more context? A reproduction?
> >
> > When we synthesize mmap events we drop non-executable pages:
> > https://github.com/torvalds/linux/blob/master/tools/perf/util/synthetic-events.c#L466
> >
> > Similarly in the kernel for the "dummy" event:
> > https://github.com/torvalds/linux/blob/master/kernel/events/core.c#L8258
> >
> > Thanks,
> > Ian
> >
> > > ---
> > >  tools/perf/builtin-report.c | 2 ++
> > >  tools/perf/builtin-script.c | 3 +++
> > >  tools/perf/util/machine.c   | 3 +++
> > >  tools/perf/util/map.c       | 5 +++++
> > >  tools/perf/util/map.h       | 2 ++
> > >  5 files changed, 15 insertions(+)
> > >
> > > diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
> > > index 91ed41cc7d88..c28eb9450a66 100644
> > > --- a/tools/perf/builtin-report.c
> > > +++ b/tools/perf/builtin-report.c
> > > @@ -1364,6 +1364,8 @@ int cmd_report(int argc, const char **argv)
> > >                     "Disable raw trace ordering"),
> > >         OPT_BOOLEAN(0, "skip-empty", &report.skip_empty,
> > >                     "Do not display empty (or dummy) events in the output"),
> > > +       OPT_BOOLEAN(0, "skip-not-exec-file-map_events", &skip_not_exec_file_map_events,
> > > +                   "skip not exec map events"),
> > >         OPT_END()
> > >         };
> > >         struct perf_data data = {
> > > diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
> > > index 13580a9c50b8..e3f4e5e654c9 100644
> > > --- a/tools/perf/builtin-script.c
> > > +++ b/tools/perf/builtin-script.c
> > > @@ -32,6 +32,7 @@
> > >  #include "util/time-utils.h"
> > >  #include "util/path.h"
> > >  #include "util/event.h"
> > > +#include "util/map.h"
> > >  #include "ui/ui.h"
> > >  #include "print_binary.h"
> > >  #include "archinsn.h"
> > > @@ -3936,6 +3937,8 @@ int cmd_script(int argc, const char **argv)
> > >                     "Guest code can be found in hypervisor process"),
> > >         OPT_BOOLEAN('\0', "stitch-lbr", &script.stitch_lbr,
> > >                     "Enable LBR callgraph stitching approach"),
> > > +       OPT_BOOLEAN(0, "skip-not-exec-map_events", &skip_not_exec_file_map_events,
> > > +                   "skip not exec map events"),
> > >         OPTS_EVSWITCH(&script.evswitch),
> > >         OPT_END()
> > >         };
> > > diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
> > > index 2a16cae28407..21dde9f9935c 100644
> > > --- a/tools/perf/util/machine.c
> > > +++ b/tools/perf/util/machine.c
> > > @@ -1904,6 +1904,9 @@ int machine__process_mmap2_event(struct machine *machine,
> > >         if (thread == NULL)
> > >                 goto out_problem;
> > >
> > > +       if (skip_not_exec_file_map_events && !(event->mmap2.prot & PROT_EXEC))
> > > +               goto out_problem;

Do you mean to drop all executable pages with the flag here or just
those with the DSO__TYPE_UNKNOWN (as below)? It reads that all
executable pages will be dropped.

Thanks,
Ian

> > > +
> > >         map = map__new(machine, event->mmap2.start,
> > >                         event->mmap2.len, event->mmap2.pgoff,
> > >                         &dso_id, event->mmap2.prot,
> > > diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
> > > index e0aa4a254583..2b51ca012c91 100644
> > > --- a/tools/perf/util/map.c
> > > +++ b/tools/perf/util/map.c
> > > @@ -16,6 +16,8 @@
> > >  #include "thread.h"
> > >  #include "vdso.h"
> > >
> > > +bool skip_not_exec_file_map_events;
> > > +
> > >  static inline int is_android_lib(const char *filename)
> > >  {
> > >         return strstarts(filename, "/data/app-lib/") ||
> > > @@ -168,6 +170,9 @@ struct map *map__new(struct machine *machine, u64 start, u64 len,
> > >                 if (dso == NULL)
> > >                         goto out_delete;
> > >
> > > +               if (skip_not_exec_file_map_events && dso__type(dso, machine) == DSO__TYPE_UNKNOWN)
> > > +                       goto out_delete;
> > > +
> > >                 map__init(map, start, start + len, pgoff, dso);
> > >
> > >                 if (anon || no_dso) {
> > > diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
> > > index 3dcfe06db6b3..67b0e0f9f0ae 100644
> > > --- a/tools/perf/util/map.h
> > > +++ b/tools/perf/util/map.h
> > > @@ -11,6 +11,8 @@
> > >  #include <stdbool.h>
> > >  #include <linux/types.h>
> > >
> > > +extern bool skip_not_exec_file_map_events;
> > > +
> > >  struct dso;
> > >  struct maps;
> > >  struct machine;
> > > --
> > > 2.34.1
> > >

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] perf tool: add skip_not_exec_file_map_events option
  2022-08-18 15:00     ` Ian Rogers
@ 2022-08-19  3:18       ` zhizhi xu
  0 siblings, 0 replies; 5+ messages in thread
From: zhizhi xu @ 2022-08-19  3:18 UTC (permalink / raw)
  To: Ian Rogers
  Cc: peterz, mingo, acme, alexander.shishkin, jolsa, linux-perf-users,
	linux-kernel

On Thu, 18 Aug 2022 at 23:00, Ian Rogers <irogers@google.com> wrote:
>
> On Wed, Aug 17, 2022 at 8:24 PM lika you <tcwzxx@gmail.com> wrote:
> >
> > I'm so sorry for forgetting to reply to all. Here is the reply again.
> >
> > Thanks for the reply.
> >
> > The background is we have two types of tasks running on the same host.The high
> > priority one which is CPU overhead and the low priority which is IO overhead.
> > The high priority task has mmap many files as shared memory. The low priority
> > task may load multi TB data from SSD at once time which will cause the high
> > priority task file page cache to be swapped out. So we mmap all files with the
> > PROT_EXEC flag to prevent hot page cache to be swapped out. That cause
> > too many executable memory regions without symbols on it.
> >
> > The trick is implementate here.
> > https://github.com/torvalds/linux/blob/master/mm/vmscan.c#L2572
> >
> > Thanks again
>
> Thanks. So you are making data pages executable for the sake of
> getting "better chances to stay in memory under moderate memory
> pressure." Having lots of executable pages in your program isn't
> great, I'm reminded of efforts to stop stacks from being executable.
> This also feels like a case where madvise should be being used, for
> example, the MADV_WILLNEED option. Given this, I'm not sure supporting
> this case in the perf tool makes sense.
>
> > On Thu, 18 Aug 2022 at 02:13, Ian Rogers <irogers@google.com> wrote:
> > >
> > > On Wed, Aug 17, 2022 at 6:43 AM tcwzxx <tcwzxx@gmail.com> wrote:
> > > >
> > > > When generate the flame graph, the perf-script subcommand will
> > > > process all mmap event and add them to the rbtree. The 240,000
> > > > mmap region takes about 5 minutes, which is not useful for flame
> > > > graph. So add the skip-not-exec-file-map-events option to skip
> > > > not PROT_EXEC flag memory regions.
> > > >
> > > > Signed-off-by: tcwzxx <tcwzxx@gmail.com>
> > >
> > > Could you provide more context? A reproduction?
> > >
> > > When we synthesize mmap events we drop non-executable pages:
> > > https://github.com/torvalds/linux/blob/master/tools/perf/util/synthetic-events.c#L466
> > >
> > > Similarly in the kernel for the "dummy" event:
> > > https://github.com/torvalds/linux/blob/master/kernel/events/core.c#L8258
> > >
> > > Thanks,
> > > Ian
> > >
> > > > ---
> > > >  tools/perf/builtin-report.c | 2 ++
> > > >  tools/perf/builtin-script.c | 3 +++
> > > >  tools/perf/util/machine.c   | 3 +++
> > > >  tools/perf/util/map.c       | 5 +++++
> > > >  tools/perf/util/map.h       | 2 ++
> > > >  5 files changed, 15 insertions(+)
> > > >
> > > > diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
> > > > index 91ed41cc7d88..c28eb9450a66 100644
> > > > --- a/tools/perf/builtin-report.c
> > > > +++ b/tools/perf/builtin-report.c
> > > > @@ -1364,6 +1364,8 @@ int cmd_report(int argc, const char **argv)
> > > >                     "Disable raw trace ordering"),
> > > >         OPT_BOOLEAN(0, "skip-empty", &report.skip_empty,
> > > >                     "Do not display empty (or dummy) events in the output"),
> > > > +       OPT_BOOLEAN(0, "skip-not-exec-file-map_events", &skip_not_exec_file_map_events,
> > > > +                   "skip not exec map events"),
> > > >         OPT_END()
> > > >         };
> > > >         struct perf_data data = {
> > > > diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
> > > > index 13580a9c50b8..e3f4e5e654c9 100644
> > > > --- a/tools/perf/builtin-script.c
> > > > +++ b/tools/perf/builtin-script.c
> > > > @@ -32,6 +32,7 @@
> > > >  #include "util/time-utils.h"
> > > >  #include "util/path.h"
> > > >  #include "util/event.h"
> > > > +#include "util/map.h"
> > > >  #include "ui/ui.h"
> > > >  #include "print_binary.h"
> > > >  #include "archinsn.h"
> > > > @@ -3936,6 +3937,8 @@ int cmd_script(int argc, const char **argv)
> > > >                     "Guest code can be found in hypervisor process"),
> > > >         OPT_BOOLEAN('\0', "stitch-lbr", &script.stitch_lbr,
> > > >                     "Enable LBR callgraph stitching approach"),
> > > > +       OPT_BOOLEAN(0, "skip-not-exec-map_events", &skip_not_exec_file_map_events,
> > > > +                   "skip not exec map events"),
> > > >         OPTS_EVSWITCH(&script.evswitch),
> > > >         OPT_END()
> > > >         };
> > > > diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
> > > > index 2a16cae28407..21dde9f9935c 100644
> > > > --- a/tools/perf/util/machine.c
> > > > +++ b/tools/perf/util/machine.c
> > > > @@ -1904,6 +1904,9 @@ int machine__process_mmap2_event(struct machine *machine,
> > > >         if (thread == NULL)
> > > >                 goto out_problem;
> > > >
> > > > +       if (skip_not_exec_file_map_events && !(event->mmap2.prot & PROT_EXEC))
> > > > +               goto out_problem;
>
> Do you mean to drop all executable pages with the flag here or just
> those with the DSO__TYPE_UNKNOWN (as below)? It reads that all
> executable pages will be dropped.
>
> Thanks,
> Ian
>

Thank you for your advice.
I mean remove non-executable pages and non-ELF files

!(event->mmap2.prot & PROT_EXEC)

Thanks

> > > > +
> > > >         map = map__new(machine, event->mmap2.start,
> > > >                         event->mmap2.len, event->mmap2.pgoff,
> > > >                         &dso_id, event->mmap2.prot,
> > > > diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
> > > > index e0aa4a254583..2b51ca012c91 100644
> > > > --- a/tools/perf/util/map.c
> > > > +++ b/tools/perf/util/map.c
> > > > @@ -16,6 +16,8 @@
> > > >  #include "thread.h"
> > > >  #include "vdso.h"
> > > >
> > > > +bool skip_not_exec_file_map_events;
> > > > +
> > > >  static inline int is_android_lib(const char *filename)
> > > >  {
> > > >         return strstarts(filename, "/data/app-lib/") ||
> > > > @@ -168,6 +170,9 @@ struct map *map__new(struct machine *machine, u64 start, u64 len,
> > > >                 if (dso == NULL)
> > > >                         goto out_delete;
> > > >
> > > > +               if (skip_not_exec_file_map_events && dso__type(dso, machine) == DSO__TYPE_UNKNOWN)
> > > > +                       goto out_delete;
> > > > +
> > > >                 map__init(map, start, start + len, pgoff, dso);
> > > >
> > > >                 if (anon || no_dso) {
> > > > diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
> > > > index 3dcfe06db6b3..67b0e0f9f0ae 100644
> > > > --- a/tools/perf/util/map.h
> > > > +++ b/tools/perf/util/map.h
> > > > @@ -11,6 +11,8 @@
> > > >  #include <stdbool.h>
> > > >  #include <linux/types.h>
> > > >
> > > > +extern bool skip_not_exec_file_map_events;
> > > > +
> > > >  struct dso;
> > > >  struct maps;
> > > >  struct machine;
> > > > --
> > > > 2.34.1
> > > >

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2022-08-19  3:17 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-08-17 13:43 [PATCH] perf tool: add skip_not_exec_file_map_events option tcwzxx
2022-08-17 18:13 ` Ian Rogers
2022-08-18  3:26   ` lika you
2022-08-18 15:00     ` Ian Rogers
2022-08-19  3:18       ` zhizhi xu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).