* [PATCH v2 1/4] perf tools: record aarch64 registers automatically
@ 2021-02-10 13:22 Alexandre Truong
2021-02-10 13:22 ` [PATCH v2 2/4] perf tools: add a mechanism to inject stack frames Alexandre Truong
` (2 more replies)
0 siblings, 3 replies; 5+ messages in thread
From: Alexandre Truong @ 2021-02-10 13:22 UTC (permalink / raw)
To: linux-kernel, linux-perf-users; +Cc: Alexandre Truong
On arm64, automatically record all the registers if the frame pointer
mode is on. They will be used to do a dwarf unwind to find the caller
of the leaf frame if the frame pointer was omitted.
Signed-off-by: Alexandre Truong <alexandre.truong@arm.com>
---
tools/perf/arch/arm64/util/machine.c | 7 +++++++
tools/perf/builtin-record.c | 7 +++++++
tools/perf/util/callchain.h | 2 ++
3 files changed, 16 insertions(+)
diff --git a/tools/perf/arch/arm64/util/machine.c b/tools/perf/arch/arm64/util/machine.c
index d41b27e781d3..8a8bab5f4e68 100644
--- a/tools/perf/arch/arm64/util/machine.c
+++ b/tools/perf/arch/arm64/util/machine.c
@@ -4,6 +4,8 @@
#include <string.h>
#include "debug.h"
#include "symbol.h"
+#include "callchain.h"
+#include "record.h"
/* On arm64, kernel text segment start at high memory address,
* for example 0xffff 0000 8xxx xxxx. Modules start at a low memory
@@ -25,3 +27,8 @@ void arch__symbols__fixup_end(struct symbol *p, struct symbol *c)
p->end = c->start;
pr_debug4("%s sym:%s end:%#lx\n", __func__, p->name, p->end);
}
+
+void arch__add_leaf_frame_record_opts(struct record_opts *opts)
+{
+ opts->sample_user_regs = arch__user_reg_mask();
+}
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 7bb10e9863bd..a5161f54b838 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -2243,6 +2243,10 @@ static int record__parse_mmap_pages(const struct option *opt,
return ret;
}
+void __weak arch__add_leaf_frame_record_opts(struct record_opts *opts __maybe_unused)
+{
+}
+
static int parse_control_option(const struct option *opt,
const char *str,
int unset __maybe_unused)
@@ -2810,6 +2814,9 @@ int cmd_record(int argc, const char **argv)
/* Enable ignoring missing threads when -u/-p option is defined. */
rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid;
+ if (callchain_param.enabled && callchain_param.record_mode == CALLCHAIN_FP)
+ arch__add_leaf_frame_record_opts(&rec->opts);
+
err = -ENOMEM;
if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
usage_with_options(record_usage, record_options);
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 5824134f983b..77fba053c677 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -280,6 +280,8 @@ static inline int arch_skip_callchain_idx(struct thread *thread __maybe_unused,
}
#endif
+void arch__add_leaf_frame_record_opts(struct record_opts *opts);
+
char *callchain_list__sym_name(struct callchain_list *cl,
char *bf, size_t bfsize, bool show_dso);
char *callchain_node__scnprintf_value(struct callchain_node *node,
--
2.23.0
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH v2 2/4] perf tools: add a mechanism to inject stack frames
2021-02-10 13:22 [PATCH v2 1/4] perf tools: record aarch64 registers automatically Alexandre Truong
@ 2021-02-10 13:22 ` Alexandre Truong
2021-02-10 13:22 ` [PATCH v2 3/4] perf tools: enable dwarf_callchain_users on aarch64 Alexandre Truong
2021-02-10 13:22 ` [PATCH v2 4/4] perf tools: determine if LR is the return address Alexandre Truong
2 siblings, 0 replies; 5+ messages in thread
From: Alexandre Truong @ 2021-02-10 13:22 UTC (permalink / raw)
To: linux-kernel, linux-perf-users; +Cc: Alexandre Truong
Add a mechanism for platforms to inject stack frames for the leaf
frame caller if there is enough information to determine a frame
is missing from dwarf or other post processing mechanisms.
Signed-off-by: Alexandre Truong <alexandre.truong@arm.com>
---
tools/perf/util/machine.c | 23 +++++++++++++++++++++++
1 file changed, 23 insertions(+)
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 522ea3236bcc..40082d70eec1 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -2671,6 +2671,12 @@ static int find_prev_cpumode(struct ip_callchain *chain, struct thread *thread,
return err;
}
+static u64 get_leaf_frame_caller(struct perf_sample *sample __maybe_unused,
+ struct thread *thread __maybe_unused)
+{
+ return 0;
+}
+
static int thread__resolve_callchain_sample(struct thread *thread,
struct callchain_cursor *cursor,
struct evsel *evsel,
@@ -2687,6 +2693,8 @@ static int thread__resolve_callchain_sample(struct thread *thread,
int i, j, err, nr_entries;
int skip_idx = -1;
int first_call = 0;
+ u64 leaf_frame_caller;
+ int pos;
if (chain)
chain_nr = chain->nr;
@@ -2811,6 +2819,21 @@ static int thread__resolve_callchain_sample(struct thread *thread,
continue;
}
+ pos = callchain_param.order == ORDER_CALLEE ? 2 : chain_nr - 2;
+
+ if (i == pos) {
+ leaf_frame_caller = get_leaf_frame_caller(sample, thread);
+
+ if (leaf_frame_caller && leaf_frame_caller != ip) {
+
+ err = add_callchain_ip(thread, cursor, parent,
+ root_al, &cpumode, leaf_frame_caller,
+ false, NULL, NULL, 0);
+ if (err)
+ return (err < 0) ? err : 0;
+ }
+ }
+
err = add_callchain_ip(thread, cursor, parent,
root_al, &cpumode, ip,
false, NULL, NULL, 0);
--
2.23.0
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH v2 3/4] perf tools: enable dwarf_callchain_users on aarch64
2021-02-10 13:22 [PATCH v2 1/4] perf tools: record aarch64 registers automatically Alexandre Truong
2021-02-10 13:22 ` [PATCH v2 2/4] perf tools: add a mechanism to inject stack frames Alexandre Truong
@ 2021-02-10 13:22 ` Alexandre Truong
2021-02-10 13:22 ` [PATCH v2 4/4] perf tools: determine if LR is the return address Alexandre Truong
2 siblings, 0 replies; 5+ messages in thread
From: Alexandre Truong @ 2021-02-10 13:22 UTC (permalink / raw)
To: linux-kernel, linux-perf-users; +Cc: Alexandre Truong
On arm64, enable dwarf_callchain_users which will be needed
to do a dwarf unwind in order to get the caller of the leaf frame.
Signed-off-by: Alexandre Truong <alexandre.truong@arm.com>
---
tools/perf/builtin-report.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 2a845d6cac09..93661a3eaeb1 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -405,6 +405,10 @@ static int report__setup_sample_type(struct report *rep)
callchain_param_setup(sample_type);
+ if (callchain_param.record_mode == CALLCHAIN_FP &&
+ strncmp(rep->session->header.env.arch, "aarch64", 7) == 0)
+ dwarf_callchain_users = true;
+
if (rep->stitch_lbr && (callchain_param.record_mode != CALLCHAIN_LBR)) {
ui__warning("Can't find LBR callchain. Switch off --stitch-lbr.\n"
"Please apply --call-graph lbr when recording.\n");
--
2.23.0
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH v2 4/4] perf tools: determine if LR is the return address
2021-02-10 13:22 [PATCH v2 1/4] perf tools: record aarch64 registers automatically Alexandre Truong
2021-02-10 13:22 ` [PATCH v2 2/4] perf tools: add a mechanism to inject stack frames Alexandre Truong
2021-02-10 13:22 ` [PATCH v2 3/4] perf tools: enable dwarf_callchain_users on aarch64 Alexandre Truong
@ 2021-02-10 13:22 ` Alexandre Truong
2 siblings, 0 replies; 5+ messages in thread
From: Alexandre Truong @ 2021-02-10 13:22 UTC (permalink / raw)
To: linux-kernel, linux-perf-users; +Cc: Alexandre Truong
On arm64 and frame pointer mode (e.g: perf record --callgraph fp),
use dwarf unwind info to check if the link register is the return
address in order to inject it to the frame pointer stack.
Write the following application:
int a = 10;
void f2(void)
{
for (int i = 0; i < 1000000; i++)
a *= a;
}
void f1()
{
for (int i = 0; i < 10; i++)
f2();
}
int main (void)
{
f1();
return 0;
}
with the following compilation flags:
gcc -fno-omit-frame-pointer -fno-inline -O2
The compiler omits the frame pointer for f2 on arm. This is a problem
with any leaf call, for example an application with many different
calls to malloc() would always omit the calling frame, even if it
can be determined.
./perf record --call-graph fp ./a.out
./perf report
currently gives the following stack:
0xffffea52f361
_start
__libc_start_main
main
f2
After this change, perf report correctly shows f1() calling f2(),
even though it was missing from the frame pointer unwind:
./perf report
0xffffea52f361
_start
__libc_start_main
main
f1
f2
Signed-off-by: Alexandre Truong <alexandre.truong@arm.com>
---
tools/perf/util/Build | 1 +
.../util/arm-frame-pointer-unwind-support.c | 44 +++++++++++++++++++
.../util/arm-frame-pointer-unwind-support.h | 7 +++
tools/perf/util/machine.c | 9 ++--
4 files changed, 58 insertions(+), 3 deletions(-)
create mode 100644 tools/perf/util/arm-frame-pointer-unwind-support.c
create mode 100644 tools/perf/util/arm-frame-pointer-unwind-support.h
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index e2563d0154eb..2009d5f02972 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -1,3 +1,4 @@
+perf-y += arm-frame-pointer-unwind-support.o
perf-y += annotate.o
perf-y += block-info.o
perf-y += block-range.o
diff --git a/tools/perf/util/arm-frame-pointer-unwind-support.c b/tools/perf/util/arm-frame-pointer-unwind-support.c
new file mode 100644
index 000000000000..964efd08e72e
--- /dev/null
+++ b/tools/perf/util/arm-frame-pointer-unwind-support.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "../arch/arm64/include/uapi/asm/perf_regs.h"
+#include "arch/arm64/include/perf_regs.h"
+#include "event.h"
+#include "arm-frame-pointer-unwind-support.h"
+#include "callchain.h"
+#include "unwind.h"
+
+struct entries {
+ u64 stack[2];
+ size_t length;
+};
+
+static bool get_leaf_frame_caller_enabled(struct perf_sample *sample)
+{
+ return callchain_param.record_mode == CALLCHAIN_FP && sample->user_regs.regs
+ && sample->user_regs.mask == PERF_REGS_MASK;
+}
+
+static int add_entry(struct unwind_entry *entry, void *arg)
+{
+ struct entries *entries = arg;
+
+ entries->stack[entries->length++] = entry->ip;
+ return 0;
+}
+
+u64 get_leaf_frame_caller_aarch64(struct perf_sample *sample, struct thread *thread)
+{
+ int ret;
+
+ struct entries entries = {{0, 0}, 0};
+
+ if (!get_leaf_frame_caller_enabled(sample))
+ return 0;
+
+ ret = unwind__get_entries(add_entry, &entries, thread, sample, 2);
+
+ if (ret || entries.length != 2)
+ return ret;
+
+ return callchain_param.order == ORDER_CALLER ?
+ entries.stack[0] : entries.stack[1];
+}
diff --git a/tools/perf/util/arm-frame-pointer-unwind-support.h b/tools/perf/util/arm-frame-pointer-unwind-support.h
new file mode 100644
index 000000000000..16dc03fa9abe
--- /dev/null
+++ b/tools/perf/util/arm-frame-pointer-unwind-support.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_ARM_FRAME_POINTER_UNWIND_SUPPORT_H
+#define __PERF_ARM_FRAME_POINTER_UNWIND_SUPPORT_H
+
+u64 get_leaf_frame_caller_aarch64(struct perf_sample *sample, struct thread *thread);
+
+#endif /* __PERF_ARM_FRAME_POINTER_UNWIND_SUPPORT_H */
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 40082d70eec1..bc6147e46c89 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -34,6 +34,7 @@
#include "bpf-event.h"
#include <internal/lib.h> // page_size
#include "cgroup.h"
+#include "arm-frame-pointer-unwind-support.h"
#include <linux/ctype.h>
#include <symbol/kallsyms.h>
@@ -2671,10 +2672,12 @@ static int find_prev_cpumode(struct ip_callchain *chain, struct thread *thread,
return err;
}
-static u64 get_leaf_frame_caller(struct perf_sample *sample __maybe_unused,
- struct thread *thread __maybe_unused)
+static u64 get_leaf_frame_caller(struct perf_sample *sample, struct thread *thread)
{
- return 0;
+ if (strncmp(thread->maps->machine->env->arch, "aarch64", 7) == 0)
+ return get_leaf_frame_caller_aarch64(sample, thread);
+ else
+ return 0;
}
static int thread__resolve_callchain_sample(struct thread *thread,
--
2.23.0
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH v2 1/4] perf tools: record aarch64 registers automatically
@ 2021-02-12 17:03 Alexandre Truong
0 siblings, 0 replies; 5+ messages in thread
From: Alexandre Truong @ 2021-02-12 17:03 UTC (permalink / raw)
To: linux-kernel, linux-perf-users; +Cc: Alexandre Truong
On arm64, automatically record all the registers if the frame pointer
mode is on. They will be used to do a dwarf unwind to find the caller
of the leaf frame if the frame pointer was omitted.
Signed-off-by: Alexandre Truong <alexandre.truong@arm.com>
---
tools/perf/arch/arm64/util/machine.c | 7 +++++++
tools/perf/builtin-record.c | 7 +++++++
tools/perf/util/callchain.h | 2 ++
3 files changed, 16 insertions(+)
diff --git a/tools/perf/arch/arm64/util/machine.c b/tools/perf/arch/arm64/util/machine.c
index 40c5e0b5bda8..bf2f9c447867 100644
--- a/tools/perf/arch/arm64/util/machine.c
+++ b/tools/perf/arch/arm64/util/machine.c
@@ -5,6 +5,8 @@
#include <string.h>
#include "debug.h"
#include "symbol.h"
+#include "callchain.h"
+#include "record.h"
/* On arm64, kernel text segment start at high memory address,
* for example 0xffff 0000 8xxx xxxx. Modules start at a low memory
@@ -26,3 +28,8 @@ void arch__symbols__fixup_end(struct symbol *p, struct symbol *c)
p->end = c->start;
pr_debug4("%s sym:%s end:%#" PRIx64 "\n", __func__, p->name, p->end);
}
+
+void arch__add_leaf_frame_record_opts(struct record_opts *opts)
+{
+ opts->sample_user_regs = arch__user_reg_mask();
+}
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 8a0127d4fb52..496307ef490e 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -2244,6 +2244,10 @@ static int record__parse_mmap_pages(const struct option *opt,
return ret;
}
+void __weak arch__add_leaf_frame_record_opts(struct record_opts *opts __maybe_unused)
+{
+}
+
static int parse_control_option(const struct option *opt,
const char *str,
int unset __maybe_unused)
@@ -2813,6 +2817,9 @@ int cmd_record(int argc, const char **argv)
/* Enable ignoring missing threads when -u/-p option is defined. */
rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid;
+ if (callchain_param.enabled && callchain_param.record_mode == CALLCHAIN_FP)
+ arch__add_leaf_frame_record_opts(&rec->opts);
+
err = -ENOMEM;
if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
usage_with_options(record_usage, record_options);
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 5824134f983b..77fba053c677 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -280,6 +280,8 @@ static inline int arch_skip_callchain_idx(struct thread *thread __maybe_unused,
}
#endif
+void arch__add_leaf_frame_record_opts(struct record_opts *opts);
+
char *callchain_list__sym_name(struct callchain_list *cl,
char *bf, size_t bfsize, bool show_dso);
char *callchain_node__scnprintf_value(struct callchain_node *node,
--
2.23.0
^ permalink raw reply related [flat|nested] 5+ messages in thread
end of thread, other threads:[~2021-02-12 17:05 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-02-10 13:22 [PATCH v2 1/4] perf tools: record aarch64 registers automatically Alexandre Truong
2021-02-10 13:22 ` [PATCH v2 2/4] perf tools: add a mechanism to inject stack frames Alexandre Truong
2021-02-10 13:22 ` [PATCH v2 3/4] perf tools: enable dwarf_callchain_users on aarch64 Alexandre Truong
2021-02-10 13:22 ` [PATCH v2 4/4] perf tools: determine if LR is the return address Alexandre Truong
2021-02-12 17:03 [PATCH v2 1/4] perf tools: record aarch64 registers automatically Alexandre Truong
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).