All of lore.kernel.org
 help / color / mirror / Atom feed
From: Arnaldo Carvalho de Melo <acme@kernel.org>
To: Ingo Molnar <mingo@kernel.org>
Cc: linux-kernel@vger.kernel.org, Andi Kleen <ak@linux.intel.com>,
	Adrian Hunter <adrian.hunter@intel.com>,
	Jiri Olsa <jolsa@kernel.org>,
	Stephane Eranian <eranian@google.com>,
	Arnaldo Carvalho de Melo <acme@redhat.com>
Subject: [PATCH 11/11] perf script: Add support for printing assembler
Date: Tue, 29 Mar 2016 20:41:29 -0300	[thread overview]
Message-ID: <1459294889-12148-12-git-send-email-acme@kernel.org> (raw)
In-Reply-To: <1459294889-12148-1-git-send-email-acme@kernel.org>

From: Andi Kleen <ak@linux.intel.com>

When dumping PT traces with perf script it is very useful to see the
assembler for each sample, so that it is easily possible to follow the
control flow.

As using objdump is difficult and inefficient from perf script this
patch uses the udis86 library to implement assembler output.  The
library can be downloaded from http://udis86.sourceforge.net/

The library is probed as an external dependency in the usual way. Then
'perf script' calls into it when needed, and handles callbacks to
resolve symbols.

  % perf record -e intel_pt//u true
  % perf script -F sym,symoff,ip,asm --itrace=i0ns | head
     7fc7188b4190 _start+0x0	mov %rsp, %rdi
     7fc7188b4193 _start+0x3	call _dl_start
     7fc7188b7710 _dl_start+0x0	push %rbp
     7fc7188b7711 _dl_start+0x1	mov %rsp, %rbp
     7fc7188b7714 _dl_start+0x4	push %r15
     7fc7188b7716 _dl_start+0x6	push %r14
     7fc7188b7718 _dl_start+0x8	push %r13
     7fc7188b771a _dl_start+0xa	push %r12
     7fc7188b771c _dl_start+0xc	mov %rdi, %r12
     7fc7188b771f _dl_start+0xf	push %rbx

Current issues:

- Some jump references do not get resolved to symbols.
- udis86 release does not support STAC/CLAC, which are used in the kernel,
  but there is a pending patch for it.

v2: Fix address resolution. Port to latest acme/perf/core

Committer note:

To test intel_pt one needs to make sure VT-x isn't active, i.e.
stopping KVM guests on the test machine, as described by Andi Kleen at
http://lkml.kernel.org/r/20160301234953.GD23621@tassilo.jf.intel.com

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1459187142-20035-3-git-send-email-andi@firstfloor.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-script.txt |   4 +-
 tools/perf/builtin-script.c              | 107 +++++++++++++++++++++++++++++--
 2 files changed, 105 insertions(+), 6 deletions(-)

diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 22ef3933342a..f2b81d837799 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -116,7 +116,7 @@ OPTIONS
 --fields::
         Comma separated list of fields to print. Options are:
         comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
-	srcline, period, iregs, brstack, brstacksym, flags.
+	srcline, period, iregs, brstack, brstacksym, flags, asm.
         Field list can be prepended with the type, trace, sw or hw,
         to indicate to which event type the field list applies.
         e.g., -f sw:comm,tid,time,ip,sym  and -f trace:time,cpu,trace
@@ -185,6 +185,8 @@ OPTIONS
 
 	The brstacksym is identical to brstack, except that the FROM and TO addresses are printed in a symbolic form if possible.
 
+	When asm is specified the assembler instruction of each sample is printed in disassembled form.
+
 -k::
 --vmlinux=<file>::
         vmlinux pathname
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 3770c3dffe5e..323572e72706 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -25,6 +25,10 @@
 #include "asm/bug.h"
 #include "util/mem-events.h"
 
+#ifdef HAVE_UDIS86
+#include <udis86.h>
+#endif
+
 static char const		*script_name;
 static char const		*generate_script_lang;
 static bool			debug_mode;
@@ -62,6 +66,7 @@ enum perf_output_field {
 	PERF_OUTPUT_DATA_SRC	    = 1U << 17,
 	PERF_OUTPUT_WEIGHT	    = 1U << 18,
 	PERF_OUTPUT_BPF_OUTPUT	    = 1U << 19,
+	PERF_OUTPUT_ASM		    = 1U << 20,
 };
 
 struct output_option {
@@ -88,6 +93,7 @@ struct output_option {
 	{.str = "data_src", .field = PERF_OUTPUT_DATA_SRC},
 	{.str = "weight",   .field = PERF_OUTPUT_WEIGHT},
 	{.str = "bpf-output",   .field = PERF_OUTPUT_BPF_OUTPUT},
+	{.str = "asm", .field = PERF_OUTPUT_ASM},
 };
 
 /* default set to maintain compatibility with current format */
@@ -282,7 +288,11 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
 		       "selected. Hence, no address to lookup the source line number.\n");
 		return -EINVAL;
 	}
-
+	if (PRINT_FIELD(ASM) && !PRINT_FIELD(IP)) {
+		pr_err("Display of assembler requested but sample IP is not\n"
+		       "selected.\n");
+		return -EINVAL;
+	}
 	if ((PRINT_FIELD(PID) || PRINT_FIELD(TID)) &&
 		perf_evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID",
 					PERF_OUTPUT_TID|PERF_OUTPUT_PID))
@@ -421,6 +431,88 @@ static void print_sample_iregs(struct perf_sample *sample,
 	}
 }
 
+#ifdef HAVE_UDIS86
+
+struct perf_ud {
+	ud_t ud_obj;
+	struct thread *thread;
+	u8 cpumode;
+	int cpu;
+};
+
+static const char *dis_resolve(struct ud *u, uint64_t addr, int64_t *off)
+{
+	struct perf_ud *ud = container_of(u, struct perf_ud, ud_obj);
+	struct addr_location al;
+
+	memset(&al, 0, sizeof(struct addr_location));
+
+	thread__find_addr_map(ud->thread, ud->cpumode, MAP__FUNCTION, addr, &al);
+	if (!al.map)
+		thread__find_addr_map(ud->thread, ud->cpumode, MAP__VARIABLE,
+					addr, &al);
+	al.cpu = ud->cpu;
+	al.sym = NULL;
+
+	if (al.map)
+		al.sym = map__find_symbol(al.map, al.addr, NULL);
+
+	if (!al.sym)
+		return NULL;
+
+	if (al.addr < al.sym->end)
+		*off = al.addr - al.sym->start;
+	else
+		*off = al.addr - al.map->start - al.sym->start;
+	return al.sym->name;
+}
+#endif
+
+static void print_sample_asm(struct perf_sample *sample __maybe_unused,
+			     struct thread *thread __maybe_unused,
+			     struct perf_event_attr *attr __maybe_unused,
+			     struct addr_location *al __maybe_unused,
+			     struct machine *machine __maybe_unused)
+{
+#ifdef HAVE_UDIS86
+	static bool ud_initialized = false;
+	static struct perf_ud ud;
+	u8 buffer[32];
+	int len;
+	u64 offset;
+
+	if (!ud_initialized) {
+		ud_initialized = true;
+		ud_init(&ud.ud_obj);
+		ud_set_syntax(&ud.ud_obj, UD_SYN_ATT);
+		ud_set_sym_resolver(&ud.ud_obj, dis_resolve);
+	}
+	ud.thread = thread;
+	ud.cpumode = sample->cpumode;
+	ud.cpu = sample->cpu;
+
+	if (!al->map || !al->map->dso)
+		return;
+	if (al->map->dso->data.status == DSO_DATA_STATUS_ERROR)
+		return;
+
+	/* Load maps to ensure dso->is_64_bit has been updated */
+	map__load(al->map, machine->symbol_filter);
+
+	offset = al->map->map_ip(al->map, sample->ip);
+	len = dso__data_read_offset(al->map->dso, machine,
+				    offset, buffer, 32);
+	if (len <= 0)
+		return;
+
+	ud_set_mode(&ud.ud_obj, al->map->dso->is_64_bit ? 64 : 32);
+	ud_set_pc(&ud.ud_obj, sample->ip);
+	ud_set_input_buffer(&ud.ud_obj, buffer, len);
+	ud_disassemble(&ud.ud_obj);
+	printf("\t%s", ud_insn_asm(&ud.ud_obj));
+#endif
+}
+
 static void print_sample_start(struct perf_sample *sample,
 			       struct thread *thread,
 			       struct perf_evsel *evsel)
@@ -739,7 +831,8 @@ static size_t data_src__printf(u64 data_src)
 
 static void process_event(struct perf_script *script,
 			  struct perf_sample *sample, struct perf_evsel *evsel,
-			  struct addr_location *al)
+			  struct addr_location *al,
+			  struct machine *machine)
 {
 	struct thread *thread = al->thread;
 	struct perf_event_attr *attr = &evsel->attr;
@@ -767,7 +860,7 @@ static void process_event(struct perf_script *script,
 
 	if (is_bts_event(attr)) {
 		print_sample_bts(sample, evsel, thread, al);
-		return;
+		goto print_rest;
 	}
 
 	if (PRINT_FIELD(TRACE))
@@ -796,6 +889,7 @@ static void process_event(struct perf_script *script,
 	if (PRINT_FIELD(IREGS))
 		print_sample_iregs(sample, attr);
 
+print_rest:
 	if (PRINT_FIELD(BRSTACK))
 		print_sample_brstack(sample);
 	else if (PRINT_FIELD(BRSTACKSYM))
@@ -804,6 +898,9 @@ static void process_event(struct perf_script *script,
 	if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT))
 		print_sample_bpf_output(sample);
 
+	if (PRINT_FIELD(ASM))
+		print_sample_asm(sample, thread, attr, al, machine);
+
 	printf("\n");
 }
 
@@ -910,7 +1007,7 @@ static int process_sample_event(struct perf_tool *tool,
 	if (scripting_ops)
 		scripting_ops->process_event(event, sample, evsel, &al);
 	else
-		process_event(scr, sample, evsel, &al);
+		process_event(scr, sample, evsel, &al, machine);
 
 out_put:
 	addr_location__put(&al);
@@ -2010,7 +2107,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
 		     "comma separated output fields prepend with 'type:'. "
 		     "Valid types: hw,sw,trace,raw. "
 		     "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
-		     "addr,symoff,period,iregs,brstack,brstacksym,flags", parse_output_fields),
+		     "addr,symoff,period,iregs,brstack,brstacksym,flags,asm", parse_output_fields),
 	OPT_BOOLEAN('a', "all-cpus", &system_wide,
 		    "system-wide collection from all CPUs"),
 	OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
-- 
2.5.5

      parent reply	other threads:[~2016-03-29 23:42 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-03-29 23:41 [GIT PULL 00/11] perf/core improvements and fixes Arnaldo Carvalho de Melo
2016-03-29 23:41 ` [PATCH 01/11] perf mem: Add -U/-K (--all-user/--all-kernel) options Arnaldo Carvalho de Melo
2016-03-29 23:41 ` [PATCH 02/11] perf tools: Make hists__collapse_insert_entry static Arnaldo Carvalho de Melo
2016-03-29 23:41 ` [PATCH 03/11] perf tools: Make -f/--force option documentation consistent across tools Arnaldo Carvalho de Melo
2016-03-29 23:41 ` [PATCH 04/11] perf tests: Add test to check for event times Arnaldo Carvalho de Melo
2016-03-29 23:41 ` [PATCH 05/11] perf config: Remove duplicated set_buildid_dir calls Arnaldo Carvalho de Melo
2016-03-29 23:41 ` [PATCH 06/11] perf config: Rework buildid_dir_command_config to perf_buildid_config Arnaldo Carvalho de Melo
2016-03-29 23:41 ` [PATCH 07/11] perf config: Rename 'v' to 'home' in set_buildid_dir() Arnaldo Carvalho de Melo
2016-03-29 23:41 ` [PATCH 08/11] perf script perl: Perl scripts now get a backtrace, like the python ones Arnaldo Carvalho de Melo
2016-03-29 23:41 ` [PATCH 09/11] perf tools: Add support for skipping itrace instructions Arnaldo Carvalho de Melo
2016-03-29 23:41 ` [PATCH 10/11] perf tools: Add probing for udev86 library Arnaldo Carvalho de Melo
2016-03-30 10:43   ` Ingo Molnar
2016-03-30 13:36     ` Arnaldo Carvalho de Melo
2016-03-30 13:52       ` Ingo Molnar
2016-03-30 14:10         ` Arnaldo Carvalho de Melo
2016-03-30 14:47           ` Arnaldo Carvalho de Melo
2016-03-31  6:34             ` Ingo Molnar
2016-03-30 15:08         ` Andi Kleen
2016-03-31  6:49           ` Ingo Molnar
2016-03-30 14:42     ` Andi Kleen
2016-03-29 23:41 ` Arnaldo Carvalho de Melo [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1459294889-12148-12-git-send-email-acme@kernel.org \
    --to=acme@kernel.org \
    --cc=acme@redhat.com \
    --cc=adrian.hunter@intel.com \
    --cc=ak@linux.intel.com \
    --cc=eranian@google.com \
    --cc=jolsa@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.