All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v1 0/4]  Avoid some large stack allocations
@ 2023-05-27  3:43 Ian Rogers
  2023-05-27  3:43 ` [PATCH v1 1/4] perf sched: Avoid " Ian Rogers
                   ` (4 more replies)
  0 siblings, 5 replies; 6+ messages in thread
From: Ian Rogers @ 2023-05-27  3:43 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Mark Rutland, Alexander Shishkin, Jiri Olsa, Namhyung Kim,
	Ian Rogers, Adrian Hunter, linux-perf-users, linux-kernel

Following on cleaning up .data and .bss in:
https://lore.kernel.org/lkml/20230526183401.2326121-1-irogers@google.com/
Look for some probably too large stack allocations with -Wstack-usage=20000
and pahole.

Don't attempt to cleanup variable length arrays like in:
```
util/header.c: In function ‘write_cache’:
util/header.c:1269:12: warning: stack usage might be unbounded [-Wstack-usage=]
 1269 | static int write_cache(struct feat_fd *ff,
      |            ^~~~~~~~~~~
```

Also leave two allocations relating to session/event processing:
```
util/auxtrace.c: In function ‘auxtrace_queues__add_indexed_event’:
util/auxtrace.c:424:12: warning: stack usage is 65616 bytes [-Wstack-usage=]
  424 | static int auxtrace_queues__add_indexed_event(struct auxtrace_queues *queues,
      |            ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
util/session.c: In function ‘perf_session__peek_events’:
util/session.c:1822:5: warning: stack usage is 65648 bytes [-Wstack-usage=]
 1822 | int perf_session__peek_events(struct perf_session *session, u64 offset,
      |     ^~~~~~~~~~~~~~~~~~~~~~~~~
```

The biggest win is for perf inject where 128kb becomes lazily
allocated when aux or guest data is encountered.

Ian Rogers (4):
  perf sched: Avoid large stack allocations
  perf script: Remove some large stack allocations
  perf inject: Lazily allocate event_copy
  perf inject: Lazily allocate guest_event event_buf

 tools/perf/builtin-inject.c | 31 +++++++++++++++++++++++++------
 tools/perf/builtin-sched.c  | 26 ++++++++++++++++++++++----
 tools/perf/builtin-script.c | 17 +++++++++++++----
 3 files changed, 60 insertions(+), 14 deletions(-)

-- 
2.41.0.rc0.172.g3f132b7071-goog


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH v1 1/4] perf sched: Avoid large stack allocations
  2023-05-27  3:43 [PATCH v1 0/4] Avoid some large stack allocations Ian Rogers
@ 2023-05-27  3:43 ` Ian Rogers
  2023-05-27  3:43 ` [PATCH v1 2/4] perf script: Remove some " Ian Rogers
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Ian Rogers @ 2023-05-27  3:43 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Mark Rutland, Alexander Shishkin, Jiri Olsa, Namhyung Kim,
	Ian Rogers, Adrian Hunter, linux-perf-users, linux-kernel

Commit 5ded57ac1bdb ("perf inject: Remove static variables") moved
static variables to local, however, in this case 3 MAX_CPUS (4096)
sized arrays were moved onto the stack making the stack frame quite
large. Avoid the stack usage by dynamically allocating the arrays.

Signed-off-by: Ian Rogers <irogers@google.com>
---
 tools/perf/builtin-sched.c | 26 ++++++++++++++++++++++----
 1 file changed, 22 insertions(+), 4 deletions(-)

diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index cc4ba506e119..2aeb3c8ac396 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -193,8 +193,8 @@ struct perf_sched {
  * weird events, such as a task being switched away that is not current.
  */
 	struct perf_cpu	 max_cpu;
-	u32		 curr_pid[MAX_CPUS];
-	struct thread	 *curr_thread[MAX_CPUS];
+	u32		 *curr_pid;
+	struct thread	 **curr_thread;
 	char		 next_shortname1;
 	char		 next_shortname2;
 	unsigned int	 replay_repeat;
@@ -224,7 +224,7 @@ struct perf_sched {
 	u64		 run_avg;
 	u64		 all_runtime;
 	u64		 all_count;
-	u64		 cpu_last_switched[MAX_CPUS];
+	u64		 *cpu_last_switched;
 	struct rb_root_cached atom_root, sorted_atom_root, merged_atom_root;
 	struct list_head sort_list, cmp_pid;
 	bool force;
@@ -3599,7 +3599,22 @@ int cmd_sched(int argc, const char **argv)
 
 	mutex_init(&sched.start_work_mutex);
 	mutex_init(&sched.work_done_wait_mutex);
-	for (i = 0; i < ARRAY_SIZE(sched.curr_pid); i++)
+	sched.curr_thread = calloc(MAX_CPUS, sizeof(*sched.curr_thread));
+	if (!sched.curr_thread) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	sched.cpu_last_switched = calloc(MAX_CPUS, sizeof(*sched.cpu_last_switched));
+	if (!sched.cpu_last_switched) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	sched.curr_pid = malloc(MAX_CPUS * sizeof(*sched.curr_pid));
+	if (!sched.curr_pid) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	for (i = 0; i < MAX_CPUS; i++)
 		sched.curr_pid[i] = -1;
 
 	argc = parse_options_subcommand(argc, argv, sched_options, sched_subcommands,
@@ -3668,6 +3683,9 @@ int cmd_sched(int argc, const char **argv)
 	}
 
 out:
+	free(sched.curr_pid);
+	free(sched.cpu_last_switched);
+	free(sched.curr_thread);
 	mutex_destroy(&sched.start_work_mutex);
 	mutex_destroy(&sched.work_done_wait_mutex);
 
-- 
2.41.0.rc0.172.g3f132b7071-goog


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH v1 2/4] perf script: Remove some large stack allocations
  2023-05-27  3:43 [PATCH v1 0/4] Avoid some large stack allocations Ian Rogers
  2023-05-27  3:43 ` [PATCH v1 1/4] perf sched: Avoid " Ian Rogers
@ 2023-05-27  3:43 ` Ian Rogers
  2023-05-27  3:43 ` [PATCH v1 3/4] perf inject: Lazily allocate event_copy Ian Rogers
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Ian Rogers @ 2023-05-27  3:43 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Mark Rutland, Alexander Shishkin, Jiri Olsa, Namhyung Kim,
	Ian Rogers, Adrian Hunter, linux-perf-users, linux-kernel

Some char buffers are stack allocated but in total they come to
24kb. Avoid Wstack-usage warnings by moving the arrays to being
dynamically allocated.

Signed-off-by: Ian Rogers <irogers@google.com>
---
 tools/perf/builtin-script.c | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 029d5a597233..3875c4b72db1 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -3299,14 +3299,21 @@ static int list_available_scripts(const struct option *opt __maybe_unused,
 				  int unset __maybe_unused)
 {
 	struct dirent *script_dirent, *lang_dirent;
-	char scripts_path[MAXPATHLEN];
+	char *buf, *scripts_path, *script_path, *lang_path, *first_half;
 	DIR *scripts_dir, *lang_dir;
-	char script_path[MAXPATHLEN];
-	char lang_path[MAXPATHLEN];
 	struct script_desc *desc;
-	char first_half[BUFSIZ];
 	char *script_root;
 
+	buf = malloc(3 * MAXPATHLEN + BUFSIZ);
+	if (!buf) {
+		pr_err("malloc failed\n");
+		exit(-1);
+	}
+	scripts_path = buf;
+	script_path = buf + MAXPATHLEN;
+	lang_path = buf + 2 * MAXPATHLEN;
+	first_half = buf + 3 * MAXPATHLEN;
+
 	snprintf(scripts_path, MAXPATHLEN, "%s/scripts", get_argv_exec_path());
 
 	scripts_dir = opendir(scripts_path);
@@ -3315,6 +3322,7 @@ static int list_available_scripts(const struct option *opt __maybe_unused,
 			"open(%s) failed.\n"
 			"Check \"PERF_EXEC_PATH\" env to set scripts dir.\n",
 			scripts_path);
+		free(buf);
 		exit(-1);
 	}
 
@@ -3345,6 +3353,7 @@ static int list_available_scripts(const struct option *opt __maybe_unused,
 			desc->half_liner ? desc->half_liner : "");
 	}
 
+	free(buf);
 	exit(0);
 }
 
-- 
2.41.0.rc0.172.g3f132b7071-goog


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH v1 3/4] perf inject: Lazily allocate event_copy
  2023-05-27  3:43 [PATCH v1 0/4] Avoid some large stack allocations Ian Rogers
  2023-05-27  3:43 ` [PATCH v1 1/4] perf sched: Avoid " Ian Rogers
  2023-05-27  3:43 ` [PATCH v1 2/4] perf script: Remove some " Ian Rogers
@ 2023-05-27  3:43 ` Ian Rogers
  2023-05-27  3:43 ` [PATCH v1 4/4] perf inject: Lazily allocate guest_event event_buf Ian Rogers
  2023-06-12 19:01 ` [PATCH v1 0/4] Avoid some large stack allocations Arnaldo Carvalho de Melo
  4 siblings, 0 replies; 6+ messages in thread
From: Ian Rogers @ 2023-05-27  3:43 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Mark Rutland, Alexander Shishkin, Jiri Olsa, Namhyung Kim,
	Ian Rogers, Adrian Hunter, linux-perf-users, linux-kernel

The event_copy is 64kb (PERF_SAMPLE_SIZE_MAX) and stack allocated in
struct perf_inject. It is used for aux events that may not exist in a
file. Make the array allocation lazy to cut down on the stack usage.

Signed-off-by: Ian Rogers <irogers@google.com>
---
 tools/perf/builtin-inject.c | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 61766eead4f4..da8c89fefa3a 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -122,7 +122,7 @@ struct perf_inject {
 	u64			aux_id;
 	struct list_head	samples;
 	struct itrace_synth_opts itrace_synth_opts;
-	char			event_copy[PERF_SAMPLE_MAX_SIZE];
+	char			*event_copy;
 	struct perf_file_section secs[HEADER_FEAT_BITS];
 	struct guest_session	guest_session;
 	struct strlist		*known_build_ids;
@@ -320,8 +320,14 @@ perf_inject__cut_auxtrace_sample(struct perf_inject *inject,
 {
 	size_t sz1 = sample->aux_sample.data - (void *)event;
 	size_t sz2 = event->header.size - sample->aux_sample.size - sz1;
-	union perf_event *ev = (union perf_event *)inject->event_copy;
+	union perf_event *ev;
 
+	if (inject->event_copy == NULL) {
+		inject->event_copy = malloc(PERF_SAMPLE_MAX_SIZE);
+		if (!inject->event_copy)
+			return ERR_PTR(-ENOMEM);
+	}
+	ev = (union perf_event *)inject->event_copy;
 	if (sz1 > event->header.size || sz2 > event->header.size ||
 	    sz1 + sz2 > event->header.size ||
 	    sz1 < sizeof(struct perf_event_header) + sizeof(u64))
@@ -357,8 +363,11 @@ static int perf_event__repipe_sample(struct perf_tool *tool,
 
 	build_id__mark_dso_hit(tool, event, sample, evsel, machine);
 
-	if (inject->itrace_synth_opts.set && sample->aux_sample.size)
+	if (inject->itrace_synth_opts.set && sample->aux_sample.size) {
 		event = perf_inject__cut_auxtrace_sample(inject, event, sample);
+		if (IS_ERR(event))
+			return PTR_ERR(event);
+	}
 
 	return perf_event__repipe_synth(tool, event);
 }
@@ -2389,5 +2398,6 @@ int cmd_inject(int argc, const char **argv)
 	if (!inject.in_place_update)
 		perf_data__close(&inject.output);
 	free(inject.itrace_synth_opts.vm_tm_corr_args);
+	free(inject.event_copy);
 	return ret;
 }
-- 
2.41.0.rc0.172.g3f132b7071-goog


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH v1 4/4] perf inject: Lazily allocate guest_event event_buf
  2023-05-27  3:43 [PATCH v1 0/4] Avoid some large stack allocations Ian Rogers
                   ` (2 preceding siblings ...)
  2023-05-27  3:43 ` [PATCH v1 3/4] perf inject: Lazily allocate event_copy Ian Rogers
@ 2023-05-27  3:43 ` Ian Rogers
  2023-06-12 19:01 ` [PATCH v1 0/4] Avoid some large stack allocations Arnaldo Carvalho de Melo
  4 siblings, 0 replies; 6+ messages in thread
From: Ian Rogers @ 2023-05-27  3:43 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Mark Rutland, Alexander Shishkin, Jiri Olsa, Namhyung Kim,
	Ian Rogers, Adrian Hunter, linux-perf-users, linux-kernel

The event_buf is 64kb (PERF_SAMPLE_SIZE_MAX) and stack allocated in
struct perf_inject. It is used for guest events that may not exist in
a file. Make the array allocation lazy to cut down on the stack usage.

Signed-off-by: Ian Rogers <irogers@google.com>
---
 tools/perf/builtin-inject.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index da8c89fefa3a..d68d25575b6c 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -47,7 +47,7 @@
 struct guest_event {
 	struct perf_sample		sample;
 	union perf_event		*event;
-	char				event_buf[PERF_SAMPLE_MAX_SIZE];
+	char				*event_buf;
 };
 
 struct guest_id {
@@ -1372,11 +1372,19 @@ static void guest_session__convert_time(struct guest_session *gs, u64 guest_time
 
 static int guest_session__fetch(struct guest_session *gs)
 {
-	void *buf = gs->ev.event_buf;
-	struct perf_event_header *hdr = buf;
+	void *buf;
+	struct perf_event_header *hdr;
 	size_t hdr_sz = sizeof(*hdr);
 	ssize_t ret;
 
+	buf = gs->ev.event_buf;
+	if (!buf) {
+		buf = malloc(PERF_SAMPLE_MAX_SIZE);
+		if (!buf)
+			return -ENOMEM;
+		gs->ev.event_buf = buf;
+	}
+	hdr = buf;
 	ret = readn(gs->tmp_fd, buf, hdr_sz);
 	if (ret < 0)
 		return ret;
@@ -2399,5 +2407,6 @@ int cmd_inject(int argc, const char **argv)
 		perf_data__close(&inject.output);
 	free(inject.itrace_synth_opts.vm_tm_corr_args);
 	free(inject.event_copy);
+	free(inject.guest_session.ev.event_buf);
 	return ret;
 }
-- 
2.41.0.rc0.172.g3f132b7071-goog


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH v1 0/4]  Avoid some large stack allocations
  2023-05-27  3:43 [PATCH v1 0/4] Avoid some large stack allocations Ian Rogers
                   ` (3 preceding siblings ...)
  2023-05-27  3:43 ` [PATCH v1 4/4] perf inject: Lazily allocate guest_event event_buf Ian Rogers
@ 2023-06-12 19:01 ` Arnaldo Carvalho de Melo
  4 siblings, 0 replies; 6+ messages in thread
From: Arnaldo Carvalho de Melo @ 2023-06-12 19:01 UTC (permalink / raw)
  To: Ian Rogers
  Cc: Peter Zijlstra, Ingo Molnar, Mark Rutland, Alexander Shishkin,
	Jiri Olsa, Namhyung Kim, Adrian Hunter, linux-perf-users,
	linux-kernel

Em Fri, May 26, 2023 at 08:43:18PM -0700, Ian Rogers escreveu:
> Following on cleaning up .data and .bss in:
> https://lore.kernel.org/lkml/20230526183401.2326121-1-irogers@google.com/
> Look for some probably too large stack allocations with -Wstack-usage=20000
> and pahole.

Thanks, applied.

- Arnaldo

 
> Don't attempt to cleanup variable length arrays like in:
> ```
> util/header.c: In function ‘write_cache’:
> util/header.c:1269:12: warning: stack usage might be unbounded [-Wstack-usage=]
>  1269 | static int write_cache(struct feat_fd *ff,
>       |            ^~~~~~~~~~~
> ```
> 
> Also leave two allocations relating to session/event processing:
> ```
> util/auxtrace.c: In function ‘auxtrace_queues__add_indexed_event’:
> util/auxtrace.c:424:12: warning: stack usage is 65616 bytes [-Wstack-usage=]
>   424 | static int auxtrace_queues__add_indexed_event(struct auxtrace_queues *queues,
>       |            ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
> util/session.c: In function ‘perf_session__peek_events’:
> util/session.c:1822:5: warning: stack usage is 65648 bytes [-Wstack-usage=]
>  1822 | int perf_session__peek_events(struct perf_session *session, u64 offset,
>       |     ^~~~~~~~~~~~~~~~~~~~~~~~~
> ```
> 
> The biggest win is for perf inject where 128kb becomes lazily
> allocated when aux or guest data is encountered.
> 
> Ian Rogers (4):
>   perf sched: Avoid large stack allocations
>   perf script: Remove some large stack allocations
>   perf inject: Lazily allocate event_copy
>   perf inject: Lazily allocate guest_event event_buf
> 
>  tools/perf/builtin-inject.c | 31 +++++++++++++++++++++++++------
>  tools/perf/builtin-sched.c  | 26 ++++++++++++++++++++++----
>  tools/perf/builtin-script.c | 17 +++++++++++++----
>  3 files changed, 60 insertions(+), 14 deletions(-)
> 
> -- 
> 2.41.0.rc0.172.g3f132b7071-goog
> 

-- 

- Arnaldo

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2023-06-12 19:02 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-05-27  3:43 [PATCH v1 0/4] Avoid some large stack allocations Ian Rogers
2023-05-27  3:43 ` [PATCH v1 1/4] perf sched: Avoid " Ian Rogers
2023-05-27  3:43 ` [PATCH v1 2/4] perf script: Remove some " Ian Rogers
2023-05-27  3:43 ` [PATCH v1 3/4] perf inject: Lazily allocate event_copy Ian Rogers
2023-05-27  3:43 ` [PATCH v1 4/4] perf inject: Lazily allocate guest_event event_buf Ian Rogers
2023-06-12 19:01 ` [PATCH v1 0/4] Avoid some large stack allocations Arnaldo Carvalho de Melo

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.