linux-perf-users.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 1/4] perf tools: record aarch64 registers automatically
@ 2021-02-10 13:22 Alexandre Truong
  2021-02-10 13:22 ` [PATCH v2 2/4] perf tools: add a mechanism to inject stack frames Alexandre Truong
                   ` (2 more replies)
  0 siblings, 3 replies; 5+ messages in thread
From: Alexandre Truong @ 2021-02-10 13:22 UTC (permalink / raw)
  To: linux-kernel, linux-perf-users; +Cc: Alexandre Truong

On arm64, automatically record all the registers if the frame pointer
mode is on. They will be used to do a dwarf unwind to find the caller
of the leaf frame if the frame pointer was omitted.

Signed-off-by: Alexandre Truong <alexandre.truong@arm.com>
---
 tools/perf/arch/arm64/util/machine.c | 7 +++++++
 tools/perf/builtin-record.c          | 7 +++++++
 tools/perf/util/callchain.h          | 2 ++
 3 files changed, 16 insertions(+)

diff --git a/tools/perf/arch/arm64/util/machine.c b/tools/perf/arch/arm64/util/machine.c
index d41b27e781d3..8a8bab5f4e68 100644
--- a/tools/perf/arch/arm64/util/machine.c
+++ b/tools/perf/arch/arm64/util/machine.c
@@ -4,6 +4,8 @@
 #include <string.h>
 #include "debug.h"
 #include "symbol.h"
+#include "callchain.h"
+#include "record.h"
 
 /* On arm64, kernel text segment start at high memory address,
  * for example 0xffff 0000 8xxx xxxx. Modules start at a low memory
@@ -25,3 +27,8 @@ void arch__symbols__fixup_end(struct symbol *p, struct symbol *c)
 		p->end = c->start;
 	pr_debug4("%s sym:%s end:%#lx\n", __func__, p->name, p->end);
 }
+
+void arch__add_leaf_frame_record_opts(struct record_opts *opts)
+{
+	opts->sample_user_regs = arch__user_reg_mask();
+}
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 7bb10e9863bd..a5161f54b838 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -2243,6 +2243,10 @@ static int record__parse_mmap_pages(const struct option *opt,
 	return ret;
 }
 
+void __weak arch__add_leaf_frame_record_opts(struct record_opts *opts __maybe_unused)
+{
+}
+
 static int parse_control_option(const struct option *opt,
 				const char *str,
 				int unset __maybe_unused)
@@ -2810,6 +2814,9 @@ int cmd_record(int argc, const char **argv)
 	/* Enable ignoring missing threads when -u/-p option is defined. */
 	rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid;
 
+	if (callchain_param.enabled && callchain_param.record_mode == CALLCHAIN_FP)
+		arch__add_leaf_frame_record_opts(&rec->opts);
+
 	err = -ENOMEM;
 	if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
 		usage_with_options(record_usage, record_options);
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 5824134f983b..77fba053c677 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -280,6 +280,8 @@ static inline int arch_skip_callchain_idx(struct thread *thread __maybe_unused,
 }
 #endif
 
+void arch__add_leaf_frame_record_opts(struct record_opts *opts);
+
 char *callchain_list__sym_name(struct callchain_list *cl,
 			       char *bf, size_t bfsize, bool show_dso);
 char *callchain_node__scnprintf_value(struct callchain_node *node,
-- 
2.23.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH v2 2/4] perf tools: add a mechanism to inject stack frames
  2021-02-10 13:22 [PATCH v2 1/4] perf tools: record aarch64 registers automatically Alexandre Truong
@ 2021-02-10 13:22 ` Alexandre Truong
  2021-02-10 13:22 ` [PATCH v2 3/4] perf tools: enable dwarf_callchain_users on aarch64 Alexandre Truong
  2021-02-10 13:22 ` [PATCH v2 4/4] perf tools: determine if LR is the return address Alexandre Truong
  2 siblings, 0 replies; 5+ messages in thread
From: Alexandre Truong @ 2021-02-10 13:22 UTC (permalink / raw)
  To: linux-kernel, linux-perf-users; +Cc: Alexandre Truong

Add a mechanism for platforms to inject stack frames for the leaf
frame caller if there is enough information to determine a frame
is missing from dwarf or other post processing mechanisms.

Signed-off-by: Alexandre Truong <alexandre.truong@arm.com>
---
 tools/perf/util/machine.c | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 522ea3236bcc..40082d70eec1 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -2671,6 +2671,12 @@ static int find_prev_cpumode(struct ip_callchain *chain, struct thread *thread,
 	return err;
 }
 
+static u64 get_leaf_frame_caller(struct perf_sample *sample __maybe_unused,
+		struct thread *thread __maybe_unused)
+{
+	return 0;
+}
+
 static int thread__resolve_callchain_sample(struct thread *thread,
 					    struct callchain_cursor *cursor,
 					    struct evsel *evsel,
@@ -2687,6 +2693,8 @@ static int thread__resolve_callchain_sample(struct thread *thread,
 	int i, j, err, nr_entries;
 	int skip_idx = -1;
 	int first_call = 0;
+	u64 leaf_frame_caller;
+	int pos;
 
 	if (chain)
 		chain_nr = chain->nr;
@@ -2811,6 +2819,21 @@ static int thread__resolve_callchain_sample(struct thread *thread,
 			continue;
 		}
 
+		pos = callchain_param.order == ORDER_CALLEE ? 2 : chain_nr - 2;
+
+		if (i == pos) {
+			leaf_frame_caller = get_leaf_frame_caller(sample, thread);
+
+			if (leaf_frame_caller && leaf_frame_caller != ip) {
+
+				err = add_callchain_ip(thread, cursor, parent,
+					       root_al, &cpumode, leaf_frame_caller,
+					       false, NULL, NULL, 0);
+				if (err)
+					return (err < 0) ? err : 0;
+			}
+		}
+
 		err = add_callchain_ip(thread, cursor, parent,
 				       root_al, &cpumode, ip,
 				       false, NULL, NULL, 0);
-- 
2.23.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH v2 3/4] perf tools: enable dwarf_callchain_users on aarch64
  2021-02-10 13:22 [PATCH v2 1/4] perf tools: record aarch64 registers automatically Alexandre Truong
  2021-02-10 13:22 ` [PATCH v2 2/4] perf tools: add a mechanism to inject stack frames Alexandre Truong
@ 2021-02-10 13:22 ` Alexandre Truong
  2021-02-10 13:22 ` [PATCH v2 4/4] perf tools: determine if LR is the return address Alexandre Truong
  2 siblings, 0 replies; 5+ messages in thread
From: Alexandre Truong @ 2021-02-10 13:22 UTC (permalink / raw)
  To: linux-kernel, linux-perf-users; +Cc: Alexandre Truong

On arm64, enable dwarf_callchain_users which will be needed
to do a dwarf unwind in order to get the caller of the leaf frame.

Signed-off-by: Alexandre Truong <alexandre.truong@arm.com>
---
 tools/perf/builtin-report.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 2a845d6cac09..93661a3eaeb1 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -405,6 +405,10 @@ static int report__setup_sample_type(struct report *rep)
 
 	callchain_param_setup(sample_type);
 
+	if (callchain_param.record_mode == CALLCHAIN_FP &&
+			strncmp(rep->session->header.env.arch, "aarch64", 7) == 0)
+		dwarf_callchain_users = true;
+
 	if (rep->stitch_lbr && (callchain_param.record_mode != CALLCHAIN_LBR)) {
 		ui__warning("Can't find LBR callchain. Switch off --stitch-lbr.\n"
 			    "Please apply --call-graph lbr when recording.\n");
-- 
2.23.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH v2 4/4] perf tools: determine if LR is the return address
  2021-02-10 13:22 [PATCH v2 1/4] perf tools: record aarch64 registers automatically Alexandre Truong
  2021-02-10 13:22 ` [PATCH v2 2/4] perf tools: add a mechanism to inject stack frames Alexandre Truong
  2021-02-10 13:22 ` [PATCH v2 3/4] perf tools: enable dwarf_callchain_users on aarch64 Alexandre Truong
@ 2021-02-10 13:22 ` Alexandre Truong
  2 siblings, 0 replies; 5+ messages in thread
From: Alexandre Truong @ 2021-02-10 13:22 UTC (permalink / raw)
  To: linux-kernel, linux-perf-users; +Cc: Alexandre Truong

On arm64 and frame pointer mode (e.g: perf record --callgraph fp),
use dwarf unwind info to check if the link register is the return
address in order to inject it to the frame pointer stack.

Write the following application:

	int a = 10;

	void f2(void)
	{
		for (int i = 0; i < 1000000; i++)
			a *= a;
	}

	void f1()
	{
		for (int i = 0; i < 10; i++)
			f2();
	}

	int main (void)
	{
		f1();
		return 0;
	}

with the following compilation flags:
	gcc -fno-omit-frame-pointer -fno-inline -O2

The compiler omits the frame pointer for f2 on arm. This is a problem
with any leaf call, for example an application with many different
calls to malloc() would always omit the calling frame, even if it
can be determined.

	./perf record --call-graph fp ./a.out
	./perf report

currently gives the following stack:

0xffffea52f361
_start
__libc_start_main
main
f2

After this change, perf report correctly shows f1() calling f2(),
even though it was missing from the frame pointer unwind:

	./perf report

0xffffea52f361
_start
__libc_start_main
main
f1
f2

Signed-off-by: Alexandre Truong <alexandre.truong@arm.com>
---
 tools/perf/util/Build                         |  1 +
 .../util/arm-frame-pointer-unwind-support.c   | 44 +++++++++++++++++++
 .../util/arm-frame-pointer-unwind-support.h   |  7 +++
 tools/perf/util/machine.c                     |  9 ++--
 4 files changed, 58 insertions(+), 3 deletions(-)
 create mode 100644 tools/perf/util/arm-frame-pointer-unwind-support.c
 create mode 100644 tools/perf/util/arm-frame-pointer-unwind-support.h

diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index e2563d0154eb..2009d5f02972 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -1,3 +1,4 @@
+perf-y += arm-frame-pointer-unwind-support.o
 perf-y += annotate.o
 perf-y += block-info.o
 perf-y += block-range.o
diff --git a/tools/perf/util/arm-frame-pointer-unwind-support.c b/tools/perf/util/arm-frame-pointer-unwind-support.c
new file mode 100644
index 000000000000..964efd08e72e
--- /dev/null
+++ b/tools/perf/util/arm-frame-pointer-unwind-support.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "../arch/arm64/include/uapi/asm/perf_regs.h"
+#include "arch/arm64/include/perf_regs.h"
+#include "event.h"
+#include "arm-frame-pointer-unwind-support.h"
+#include "callchain.h"
+#include "unwind.h"
+
+struct entries {
+	u64 stack[2];
+	size_t length;
+};
+
+static bool get_leaf_frame_caller_enabled(struct perf_sample *sample)
+{
+	return callchain_param.record_mode == CALLCHAIN_FP && sample->user_regs.regs
+		&& sample->user_regs.mask == PERF_REGS_MASK;
+}
+
+static int add_entry(struct unwind_entry *entry, void *arg)
+{
+	struct entries *entries = arg;
+
+	entries->stack[entries->length++] = entry->ip;
+	return 0;
+}
+
+u64 get_leaf_frame_caller_aarch64(struct perf_sample *sample, struct thread *thread)
+{
+	int ret;
+
+	struct entries entries = {{0, 0}, 0};
+
+	if (!get_leaf_frame_caller_enabled(sample))
+		return 0;
+
+	ret = unwind__get_entries(add_entry, &entries, thread, sample, 2);
+
+	if (ret || entries.length != 2)
+		return ret;
+
+	return callchain_param.order == ORDER_CALLER ?
+		entries.stack[0] : entries.stack[1];
+}
diff --git a/tools/perf/util/arm-frame-pointer-unwind-support.h b/tools/perf/util/arm-frame-pointer-unwind-support.h
new file mode 100644
index 000000000000..16dc03fa9abe
--- /dev/null
+++ b/tools/perf/util/arm-frame-pointer-unwind-support.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_ARM_FRAME_POINTER_UNWIND_SUPPORT_H
+#define __PERF_ARM_FRAME_POINTER_UNWIND_SUPPORT_H
+
+u64 get_leaf_frame_caller_aarch64(struct perf_sample *sample, struct thread *thread);
+
+#endif /* __PERF_ARM_FRAME_POINTER_UNWIND_SUPPORT_H */
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 40082d70eec1..bc6147e46c89 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -34,6 +34,7 @@
 #include "bpf-event.h"
 #include <internal/lib.h> // page_size
 #include "cgroup.h"
+#include "arm-frame-pointer-unwind-support.h"
 
 #include <linux/ctype.h>
 #include <symbol/kallsyms.h>
@@ -2671,10 +2672,12 @@ static int find_prev_cpumode(struct ip_callchain *chain, struct thread *thread,
 	return err;
 }
 
-static u64 get_leaf_frame_caller(struct perf_sample *sample __maybe_unused,
-		struct thread *thread __maybe_unused)
+static u64 get_leaf_frame_caller(struct perf_sample *sample, struct thread *thread)
 {
-	return 0;
+	if (strncmp(thread->maps->machine->env->arch, "aarch64", 7) == 0)
+		return get_leaf_frame_caller_aarch64(sample, thread);
+	else
+		return 0;
 }
 
 static int thread__resolve_callchain_sample(struct thread *thread,
-- 
2.23.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH v2 2/4] perf tools: add a mechanism to inject stack frames
  2021-02-12 17:03 [PATCH v2 1/4] perf tools: record aarch64 registers automatically Alexandre Truong
@ 2021-02-12 17:03 ` Alexandre Truong
  0 siblings, 0 replies; 5+ messages in thread
From: Alexandre Truong @ 2021-02-12 17:03 UTC (permalink / raw)
  To: linux-kernel, linux-perf-users; +Cc: Alexandre Truong

Add a mechanism for platforms to inject stack frames for the leaf
frame caller if there is enough information to determine a frame
is missing from dwarf or other post processing mechanisms.

Signed-off-by: Alexandre Truong <alexandre.truong@arm.com>
---
 tools/perf/util/machine.c | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index ab8a6b3e801d..7f03ffa016b0 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -2671,6 +2671,12 @@ static int find_prev_cpumode(struct ip_callchain *chain, struct thread *thread,
 	return err;
 }
 
+static u64 get_leaf_frame_caller(struct perf_sample *sample __maybe_unused,
+		struct thread *thread __maybe_unused)
+{
+	return 0;
+}
+
 static int thread__resolve_callchain_sample(struct thread *thread,
 					    struct callchain_cursor *cursor,
 					    struct evsel *evsel,
@@ -2687,6 +2693,8 @@ static int thread__resolve_callchain_sample(struct thread *thread,
 	int i, j, err, nr_entries;
 	int skip_idx = -1;
 	int first_call = 0;
+	u64 leaf_frame_caller;
+	int pos;
 
 	if (chain)
 		chain_nr = chain->nr;
@@ -2811,6 +2819,21 @@ static int thread__resolve_callchain_sample(struct thread *thread,
 			continue;
 		}
 
+		pos = callchain_param.order == ORDER_CALLEE ? 2 : chain_nr - 2;
+
+		if (i == pos) {
+			leaf_frame_caller = get_leaf_frame_caller(sample, thread);
+
+			if (leaf_frame_caller && leaf_frame_caller != ip) {
+
+				err = add_callchain_ip(thread, cursor, parent,
+					       root_al, &cpumode, leaf_frame_caller,
+					       false, NULL, NULL, 0);
+				if (err)
+					return (err < 0) ? err : 0;
+			}
+		}
+
 		err = add_callchain_ip(thread, cursor, parent,
 				       root_al, &cpumode, ip,
 				       false, NULL, NULL, 0);
-- 
2.23.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2021-02-12 17:05 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-02-10 13:22 [PATCH v2 1/4] perf tools: record aarch64 registers automatically Alexandre Truong
2021-02-10 13:22 ` [PATCH v2 2/4] perf tools: add a mechanism to inject stack frames Alexandre Truong
2021-02-10 13:22 ` [PATCH v2 3/4] perf tools: enable dwarf_callchain_users on aarch64 Alexandre Truong
2021-02-10 13:22 ` [PATCH v2 4/4] perf tools: determine if LR is the return address Alexandre Truong
2021-02-12 17:03 [PATCH v2 1/4] perf tools: record aarch64 registers automatically Alexandre Truong
2021-02-12 17:03 ` [PATCH v2 2/4] perf tools: add a mechanism to inject stack frames Alexandre Truong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).