All of lore.kernel.org
 help / color / mirror / Atom feed
* New attempt at adding an disassembler to perf
@ 2017-01-19  1:41 Andi Kleen
  2017-01-19  1:41 ` [PATCH 1/5] perf, tools: Add probing for xed Andi Kleen
                   ` (6 more replies)
  0 siblings, 7 replies; 14+ messages in thread
From: Andi Kleen @ 2017-01-19  1:41 UTC (permalink / raw)
  To: acme; +Cc: jolsa, linux-kernel

A native disassembler in perf is very useful, in particular with perf script to trace 
instruction streams, but also for other analysis. Previously I attempted
to do this using the udis86 library, but that was rejected because:
- udis86 was not maintained anymore and lacking recent instructions
- udis86 is dynamically linked and gives a runtime dependency.
Doing this needs a full disassembler, not just a decoder, so the existing
instruction decoder cannot be used without major changes.

This patchkit addresses these issues.  Intel recently released an open source version
of the XED disassembler library, which is used in many other Intel software.
It is very well maintained, uptodate, and supports static linking, so there is no
runtime dependency. This version adds XED support to perf, and uses it to implement
assembler output in perf script. It also fixes a range of issues in the previous
version, see the individual change logs.

Available in 
git://git.kernel.org/pub/scm/linux/kernel/git/ak/linux-misc.git perf/xed-6

v1: First post of XED version
v2: Change probing to not be default and support XED_DIR. Other cleanups
based on review.
v3: Use FEATURE_FLAGS_BASIC for probing

-Andi

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 1/5] perf, tools: Add probing for xed
  2017-01-19  1:41 New attempt at adding an disassembler to perf Andi Kleen
@ 2017-01-19  1:41 ` Andi Kleen
  2017-01-19  1:41 ` [PATCH 2/5] perf, tools: Add one liner warning for disabled features Andi Kleen
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 14+ messages in thread
From: Andi Kleen @ 2017-01-19  1:41 UTC (permalink / raw)
  To: acme; +Cc: jolsa, linux-kernel, Andi Kleen

From: Andi Kleen <ak@linux.intel.com>

Add autoprobing for the xed disassembler library.

Can be downloaded from https://github.com/intelxed/xed

v2: Hide. Require XED=1 to enable. Add XED_DIR
v3: Remove -lxed from probe all. Don't touch FEATURE_DISPLAY.
v4: Move to FEATURE_FLAGS_BASIC
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 tools/build/Makefile.feature   |  4 +++-
 tools/build/feature/Makefile   |  6 +++++-
 tools/build/feature/test-all.c |  5 +++++
 tools/build/feature/test-xed.c |  9 +++++++++
 tools/perf/Makefile.config     | 16 ++++++++++++++++
 tools/perf/Makefile.perf       |  3 +++
 6 files changed, 41 insertions(+), 2 deletions(-)
 create mode 100644 tools/build/feature/test-xed.c

diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature
index e3fb5ecbdcb6..c5012b8a49fe 100644
--- a/tools/build/Makefile.feature
+++ b/tools/build/Makefile.feature
@@ -63,7 +63,8 @@ FEATURE_TESTS_BASIC :=                  \
         lzma                            \
         get_cpuid                       \
         bpf                             \
-        sdt
+        sdt				\
+        xed
 
 # FEATURE_TESTS_BASIC + FEATURE_TESTS_EXTRA is the complete list
 # of all feature tests
@@ -140,6 +141,7 @@ ifeq ($(feature-all), 1)
   $(call feature_check,compile-x32)
   $(call feature_check,bionic)
   $(call feature_check,libbabeltrace)
+  $(call feature_check,xed)
 else
   $(foreach feat,$(FEATURE_TESTS),$(call feature_check,$(feat)))
 endif
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index b564a2eea039..4f1aa82b867a 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -48,7 +48,8 @@ FILES=                                          \
          test-get_cpuid.bin                     \
          test-sdt.bin                           \
          test-cxx.bin                           \
-         test-jvmti.bin
+         test-jvmti.bin				\
+         test-xed.bin
 
 FILES := $(addprefix $(OUTPUT),$(FILES))
 
@@ -123,6 +124,9 @@ $(OUTPUT)test-numa_num_possible_cpus.bin:
 $(OUTPUT)test-libunwind.bin:
 	$(BUILD) -lelf
 
+$(OUTPUT)test-xed.bin:
+	$(BUILD) -lxed
+
 $(OUTPUT)test-libunwind-debug-frame.bin:
 	$(BUILD) -lelf
 $(OUTPUT)test-libunwind-x86.bin:
diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c
index 699e43627397..4a6dd1d1ff49 100644
--- a/tools/build/feature/test-all.c
+++ b/tools/build/feature/test-all.c
@@ -149,6 +149,10 @@
 # include "test-sdt.c"
 #undef main
 
+#define main main_test_xed
+#  include "test-xed.c"
+#endif
+
 int main(int argc, char *argv[])
 {
 	main_test_libpython();
@@ -183,6 +187,7 @@ int main(int argc, char *argv[])
 	main_test_bpf();
 	main_test_libcrypto();
 	main_test_sdt();
+	main_test_xed();
 
 	return 0;
 }
diff --git a/tools/build/feature/test-xed.c b/tools/build/feature/test-xed.c
new file mode 100644
index 000000000000..ef9aebf1559d
--- /dev/null
+++ b/tools/build/feature/test-xed.c
@@ -0,0 +1,9 @@
+#include <xed/xed-interface.h>
+#include <xed/xed-decode.h>
+#include <xed/xed-decoded-inst-api.h>
+
+int main(void)
+{
+	xed_tables_init();
+	return 0;
+}
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 03cf947755b9..9996027e7a52 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -684,6 +684,22 @@ ifndef NO_ZLIB
   endif
 endif
 
+ifdef XED
+  ifdef XED_DIR
+    FEATURE_CHECK_CFLAGS-xed := -I $(XED_DIR)/include
+    # override for lib64?
+    FEATURE_CHECK_LDFLAGS-xed := -L $(XED_DIR)/lib -lxed
+  else
+    FEATURE_CHECK_CFLAGS-xed :=
+    FEATURE_CHECK_LDFLAGS-xed := -lxed
+  endif
+  $(call feature_check,xed)
+  ifeq ($(feature-xed), 1)
+    EXTLIBS += -lxed
+    $(call detected,CONFIG_XED)
+  endif
+endif
+
 ifndef NO_LZMA
   ifeq ($(feature-lzma), 1)
     CFLAGS += -DHAVE_LZMA_SUPPORT
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 4da19b6ba94a..f6760309edd3 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -84,6 +84,9 @@ include ../scripts/utilities.mak
 # Define NO_SDT if you do not want to define SDT event in perf tools,
 # note that it doesn't disable SDT scanning support.
 #
+# Define XED=1 to build WITH the XED disassembler for perf script
+# Can also set XED_DIR=/path to set XED directory.
+#
 # Define FEATURES_DUMP to provide features detection dump file
 # and bypass the feature detection
 #
-- 
2.9.3

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 2/5] perf, tools: Add one liner warning for disabled features
  2017-01-19  1:41 New attempt at adding an disassembler to perf Andi Kleen
  2017-01-19  1:41 ` [PATCH 1/5] perf, tools: Add probing for xed Andi Kleen
@ 2017-01-19  1:41 ` Andi Kleen
  2017-01-19  1:41 ` [PATCH 3/5] perf, tools: Add disassembler for x86 using the XED library Andi Kleen
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 14+ messages in thread
From: Andi Kleen @ 2017-01-19  1:41 UTC (permalink / raw)
  To: acme; +Cc: jolsa, linux-kernel, Andi Kleen

From: Andi Kleen <ak@linux.intel.com>

Add a one liner warning for perf features that need to be enabled
explicitly by the user, so that they know they are missing something.
Currently enabled for XED and BABELTRACE.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 tools/perf/Makefile.config | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 9996027e7a52..39b90ffd9412 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -65,6 +65,8 @@ ifneq ($(ARCH),$(filter $(ARCH),x86 arm))
   NO_LIBDW_DWARF_UNWIND := 1
 endif
 
+DISABLED_FEATURES =
+
 ifeq ($(LIBUNWIND_LIBS),)
   NO_LIBUNWIND := 1
 endif
@@ -698,6 +700,8 @@ ifdef XED
     EXTLIBS += -lxed
     $(call detected,CONFIG_XED)
   endif
+else
+  DISABLED_FEATURES += XED
 endif
 
 ifndef NO_LZMA
@@ -772,6 +776,8 @@ ifdef LIBBABELTRACE
   else
     msg := $(warning No libbabeltrace found, disables 'perf data' CTF format support, please install libbabeltrace-dev[el]/libbabeltrace-ctf-dev);
   endif
+else
+  DISABLED_FEATURES += LIBBABELTRACE
 endif
 
 ifndef NO_AUXTRACE
@@ -843,6 +849,10 @@ ifdef LIBCLANGLLVM
   endif
 endif
 
+ifneq ($(DISABLED_FEATURES),)
+  $(warning Disabled features, need explicit enabling by user: $(DISABLED_FEATURES))
+endif
+
 # Among the variables below, these:
 #   perfexecdir
 #   template_dir
-- 
2.9.3

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 3/5] perf, tools: Add disassembler for x86 using the XED library
  2017-01-19  1:41 New attempt at adding an disassembler to perf Andi Kleen
  2017-01-19  1:41 ` [PATCH 1/5] perf, tools: Add probing for xed Andi Kleen
  2017-01-19  1:41 ` [PATCH 2/5] perf, tools: Add one liner warning for disabled features Andi Kleen
@ 2017-01-19  1:41 ` Andi Kleen
  2017-01-19  1:41 ` [PATCH 4/5] perf, tools, script: Add support for printing assembler Andi Kleen
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 14+ messages in thread
From: Andi Kleen @ 2017-01-19  1:41 UTC (permalink / raw)
  To: acme; +Cc: jolsa, linux-kernel, Andi Kleen

From: Andi Kleen <ak@linux.intel.com>

Add a generic disassembler function for x86 using the XED library,
and a fallback function for architectures that don't implement one.
Other architectures can implement their own disassembler functions.

The previous version of this patch used udis86, but was
rejected because udis86 was unmaintained and a runtime dependency.
Using the recently released xed avoids both of these problems:
- XED is well maintained, uptodate, and used by many Intel tools
- XED is linked statically so there is no runtime dependency.

The XED library can be downloaded from http://github.com/intelxed/xed

v2: Clean up includes.
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 tools/perf/arch/x86/util/Build |  1 +
 tools/perf/arch/x86/util/dis.c | 86 ++++++++++++++++++++++++++++++++++++++++++
 tools/perf/util/Build          |  1 +
 tools/perf/util/dis.c          | 15 ++++++++
 tools/perf/util/dis.h          | 20 ++++++++++
 5 files changed, 123 insertions(+)
 create mode 100644 tools/perf/arch/x86/util/dis.c
 create mode 100644 tools/perf/util/dis.c
 create mode 100644 tools/perf/util/dis.h

diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build
index f95e6f46ef0d..93490009ea6a 100644
--- a/tools/perf/arch/x86/util/Build
+++ b/tools/perf/arch/x86/util/Build
@@ -14,3 +14,4 @@ libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
 libperf-$(CONFIG_AUXTRACE) += auxtrace.o
 libperf-$(CONFIG_AUXTRACE) += intel-pt.o
 libperf-$(CONFIG_AUXTRACE) += intel-bts.o
+libperf-$(CONFIG_XED) += dis.o
diff --git a/tools/perf/arch/x86/util/dis.c b/tools/perf/arch/x86/util/dis.c
new file mode 100644
index 000000000000..39703512fe17
--- /dev/null
+++ b/tools/perf/arch/x86/util/dis.c
@@ -0,0 +1,86 @@
+/* Disassembler using the XED library */
+#include "perf.h"
+#include "util/session.h"
+#include "util/symbol.h"
+#include "util/thread.h"
+#include "util/dis.h"
+
+#include <xed/xed-interface.h>
+#include <xed/xed-decode.h>
+#include <xed/xed-decoded-inst-api.h>
+
+static int dis_resolve(xed_uint64_t addr, char *buf, xed_uint32_t buflen,
+		xed_uint64_t *off, void *data)
+{
+	struct perf_dis *x = data;
+	struct addr_location al;
+
+	memset(&al, 0, sizeof(struct addr_location));
+
+	thread__find_addr_map(x->thread, x->cpumode, MAP__FUNCTION, addr, &al);
+	if (!al.map)
+		thread__find_addr_map(x->thread, x->cpumode, MAP__VARIABLE,
+					addr, &al);
+	al.cpu = x->cpu;
+	al.sym = NULL;
+
+	if (al.map)
+		al.sym = map__find_symbol(al.map, al.addr);
+
+	if (!al.sym)
+		return 0;
+
+	if (al.addr < al.sym->end)
+		*off = al.addr - al.sym->start;
+	else
+		*off = al.addr - al.map->start - al.sym->start;
+	snprintf(buf, buflen, "%s", al.sym->name);
+	return 1;
+}
+
+/* x must be set up earlier */
+char *disas_inst(struct perf_dis *x, uint64_t ip, u8 *inbuf, int inlen,
+		 int *lenp)
+{
+	xed_decoded_inst_t inst;
+	xed_print_info_t info;
+	xed_error_enum_t err;
+	static bool init;
+
+	if (!init) {
+		xed_tables_init();
+		init = true;
+	}
+
+	if (lenp)
+		*lenp = 0;
+
+	xed_init_print_info(&info);
+	info.syntax = XED_SYNTAX_ATT;
+	info.disassembly_callback = dis_resolve;
+	info.context = x;
+
+	xed_decoded_inst_zero(&inst);
+	if (x->is64bit)
+		xed_decoded_inst_set_mode(&inst, XED_MACHINE_MODE_LONG_64,
+				XED_ADDRESS_WIDTH_64b);
+	else
+		xed_decoded_inst_set_mode(&inst, XED_MACHINE_MODE_LEGACY_32,
+				XED_ADDRESS_WIDTH_32b);
+
+	err = xed_decode(&inst, (uint8_t *)inbuf, inlen);
+	if (err != XED_ERROR_NONE) {
+		snprintf(x->out, sizeof(x->out), "err: %s for %d bytes",
+				xed_error_enum_t2str(err), inlen);
+		return x->out;
+	}
+	if (lenp)
+		*lenp = xed_decoded_inst_get_length(&inst);
+	info.p = &inst;
+	info.buf = x->out;
+	info.blen = sizeof(x->out);
+	info.runtime_address = ip;
+	if (!xed_format_generic(&info))
+		strcpy(x->out, "err: cannot format");
+	return x->out;
+}
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 5da376bc1afc..cdaeb4764fee 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -88,6 +88,7 @@ libperf-y += mem-events.o
 libperf-y += vsprintf.o
 libperf-y += drv_configs.o
 libperf-y += time-utils.o
+libperf-y += dis.o
 
 libperf-$(CONFIG_LIBBPF) += bpf-loader.o
 libperf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o
diff --git a/tools/perf/util/dis.c b/tools/perf/util/dis.c
new file mode 100644
index 000000000000..89c9b84051b8
--- /dev/null
+++ b/tools/perf/util/dis.c
@@ -0,0 +1,15 @@
+#include "perf.h"
+#include "dis.h"
+#include "util.h"
+
+/* Fallback for architectures with no disassembler */
+
+__weak char *disas_inst(struct perf_dis *x, uint64_t ip __maybe_unused,
+		u8 *inbuf __maybe_unused, int inlen __maybe_unused,
+		int *lenp)
+{
+	if (lenp)
+		*lenp = 0;
+	strcpy(x->out, "?");
+	return x->out;
+}
diff --git a/tools/perf/util/dis.h b/tools/perf/util/dis.h
new file mode 100644
index 000000000000..ffda324cbc1a
--- /dev/null
+++ b/tools/perf/util/dis.h
@@ -0,0 +1,20 @@
+#ifndef DIS_H
+#define DIS_H 1
+
+#define MAXINSN 15
+
+struct perf_dis {
+	/* Initialized by callers: */
+	struct thread *thread;
+	u8 cpumode;
+	int cpu;
+	bool is64bit;
+	/* Temporary */
+	char out[256];
+};
+
+char *disas_inst(struct perf_dis *x, uint64_t ip, u8 *inbuf, int inlen,
+		 int *lenp);
+
+
+#endif
-- 
2.9.3

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 4/5] perf, tools, script: Add support for printing assembler
  2017-01-19  1:41 New attempt at adding an disassembler to perf Andi Kleen
                   ` (2 preceding siblings ...)
  2017-01-19  1:41 ` [PATCH 3/5] perf, tools: Add disassembler for x86 using the XED library Andi Kleen
@ 2017-01-19  1:41 ` Andi Kleen
  2017-01-23 19:49   ` Arnaldo Carvalho de Melo
  2017-01-19  1:41 ` [PATCH 5/5] perf, tools, script: Add brstackasm output for branch stacks Andi Kleen
                   ` (2 subsequent siblings)
  6 siblings, 1 reply; 14+ messages in thread
From: Andi Kleen @ 2017-01-19  1:41 UTC (permalink / raw)
  To: acme; +Cc: jolsa, linux-kernel, Andi Kleen, adrian.hunter

From: Andi Kleen <ak@linux.intel.com>

When dumping PT traces with perf script it is very useful to see the
assembler for each sample, so that it is easily possible to follow
the control flow.

As using objdump is difficult and inefficient from perf script this
patch uses the Intel xed library to implement assembler output.
The library can be downloaded from http://github.com/intelxed/xed

The previous version of this patch used udis86, but was
rejected because udis86 was unmaintained and a runtime dependency.
Using the recently released xed avoids both of these problems:
- XED is well maintained and used by many Intel tools
- XED is linked statically so there is no runtime dependency.

The library is probed as an external dependency in the usual way. Then perf
script calls into it when needed, and handles callbacks to resolve
symbols.

% perf record -e intel_pt//u true
% perf script -F sym,symoff,ip,asm --itrace=i0ns | head
     7fc7188b4190 _start+0x0	mov %rsp, %rdi
     7fc7188b4193 _start+0x3	call _dl_start
     7fc7188b7710 _dl_start+0x0	push %rbp
     7fc7188b7711 _dl_start+0x1	mov %rsp, %rbp
     7fc7188b7714 _dl_start+0x4	push %r15
     7fc7188b7716 _dl_start+0x6	push %r14
     7fc7188b7718 _dl_start+0x8	push %r13
     7fc7188b771a _dl_start+0xa	push %r12
     7fc7188b771c _dl_start+0xc	mov %rdi, %r12
     7fc7188b771f _dl_start+0xf	push %rbx

v2:
Converted to use XED instead of udis86.
Separate disassembler interface into separate arch specific file.
Lots of cleanups and improvements.

Cc: adrian.hunter@intel.com
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 tools/perf/Documentation/perf-script.txt |  4 +-
 tools/perf/builtin-script.c              | 72 +++++++++++++++++++++++++++-----
 2 files changed, 64 insertions(+), 12 deletions(-)

diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 4ed5f239ba7d..497989ea9768 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -116,7 +116,7 @@ OPTIONS
 --fields::
         Comma separated list of fields to print. Options are:
         comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
-        srcline, period, iregs, brstack, brstacksym, flags, bpf-output,
+        srcline, period, iregs, brstack, brstacksym, flags, bpf-output, asm.
         callindent, insn, insnlen. Field list can be prepended with the type, trace, sw or hw,
         to indicate to which event type the field list applies.
         e.g., -F sw:comm,tid,time,ip,sym  and -F trace:time,cpu,trace
@@ -198,6 +198,8 @@ OPTIONS
 
 	The brstacksym is identical to brstack, except that the FROM and TO addresses are printed in a symbolic form if possible.
 
+	When asm is specified the assembler instruction of each sample is printed in disassembled form.
+
 -k::
 --vmlinux=<file>::
         vmlinux pathname
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index c0783b4f7b6c..7a09c4f7df3f 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -28,6 +28,7 @@
 #include <linux/time64.h>
 #include "asm/bug.h"
 #include "util/mem-events.h"
+#include "util/dis.h"
 
 static char const		*script_name;
 static char const		*generate_script_lang;
@@ -69,6 +70,7 @@ enum perf_output_field {
 	PERF_OUTPUT_CALLINDENT	    = 1U << 20,
 	PERF_OUTPUT_INSN	    = 1U << 21,
 	PERF_OUTPUT_INSNLEN	    = 1U << 22,
+	PERF_OUTPUT_ASM		    = 1U << 23,
 };
 
 struct output_option {
@@ -98,6 +100,7 @@ struct output_option {
 	{.str = "callindent", .field = PERF_OUTPUT_CALLINDENT},
 	{.str = "insn", .field = PERF_OUTPUT_INSN},
 	{.str = "insnlen", .field = PERF_OUTPUT_INSNLEN},
+	{.str = "asm", .field = PERF_OUTPUT_ASM},
 };
 
 /* default set to maintain compatibility with current format */
@@ -292,7 +295,11 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
 		       "selected. Hence, no address to lookup the source line number.\n");
 		return -EINVAL;
 	}
-
+	if (PRINT_FIELD(ASM) && !PRINT_FIELD(IP)) {
+		pr_err("Display of assembler requested but sample IP is not\n"
+		       "selected.\n");
+		return -EINVAL;
+	}
 	if ((PRINT_FIELD(PID) || PRINT_FIELD(TID)) &&
 		perf_evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID",
 					PERF_OUTPUT_TID|PERF_OUTPUT_PID))
@@ -436,6 +443,39 @@ static void print_sample_iregs(struct perf_sample *sample,
 	}
 }
 
+static void print_sample_asm(union perf_event *event,
+			     struct perf_sample *sample,
+			     struct thread *thread,
+			     struct addr_location *al,
+			     struct machine *machine)
+{
+	struct perf_dis x;
+	u8 buffer[32];
+	int len;
+	u64 offset;
+
+	x.thread = thread;
+	x.cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+	x.cpu = sample->cpu;
+
+	if (!al->map || !al->map->dso)
+		return;
+	if (al->map->dso->data.status == DSO_DATA_STATUS_ERROR)
+		return;
+
+	/* Load maps to ensure dso->is_64_bit has been updated */
+	map__load(al->map);
+	x.is64bit = al->map->dso->is_64_bit;
+
+	offset = al->map->map_ip(al->map, sample->ip);
+	len = dso__data_read_offset(al->map->dso, machine,
+				    offset, buffer, MAXINSN);
+	if (len <= 0)
+		return;
+
+	printf("\t%s", disas_inst(&x, sample->ip, buffer, len, NULL));
+}
+
 static void print_sample_start(struct perf_sample *sample,
 			       struct thread *thread,
 			       struct perf_evsel *evsel)
@@ -631,8 +671,12 @@ static void print_sample_callindent(struct perf_sample *sample,
 		printf("%*s", spacing - len, "");
 }
 
-static void print_insn(struct perf_sample *sample,
-		       struct perf_event_attr *attr)
+static void print_insn(union perf_event *event,
+		       struct perf_sample *sample,
+		       struct perf_event_attr *attr,
+		       struct thread *thread,
+		       struct addr_location *al,
+		       struct machine *machine)
 {
 	if (PRINT_FIELD(INSNLEN))
 		printf(" ilen: %d", sample->insn_len);
@@ -643,12 +687,16 @@ static void print_insn(struct perf_sample *sample,
 		for (i = 0; i < sample->insn_len; i++)
 			printf(" %02x", (unsigned char)sample->insn[i]);
 	}
+	if (PRINT_FIELD(ASM))
+		print_sample_asm(event, sample, thread, al, machine);
 }
 
-static void print_sample_bts(struct perf_sample *sample,
+static void print_sample_bts(union perf_event *event,
+			     struct perf_sample *sample,
 			     struct perf_evsel *evsel,
 			     struct thread *thread,
-			     struct addr_location *al)
+			     struct addr_location *al,
+			     struct machine *machine)
 {
 	struct perf_event_attr *attr = &evsel->attr;
 	bool print_srcline_last = false;
@@ -689,7 +737,7 @@ static void print_sample_bts(struct perf_sample *sample,
 	if (print_srcline_last)
 		map__fprintf_srcline(al->map, al->addr, "\n  ", stdout);
 
-	print_insn(sample, attr);
+	print_insn(event, sample, attr, thread, al, machine);
 
 	printf("\n");
 }
@@ -871,7 +919,9 @@ static size_t data_src__printf(u64 data_src)
 
 static void process_event(struct perf_script *script,
 			  struct perf_sample *sample, struct perf_evsel *evsel,
-			  struct addr_location *al)
+			  struct addr_location *al,
+			  struct machine *machine,
+			  union perf_event *event)
 {
 	struct thread *thread = al->thread;
 	struct perf_event_attr *attr = &evsel->attr;
@@ -898,7 +948,7 @@ static void process_event(struct perf_script *script,
 		print_sample_flags(sample->flags);
 
 	if (is_bts_event(attr)) {
-		print_sample_bts(sample, evsel, thread, al);
+		print_sample_bts(event, sample, evsel, thread, al, machine);
 		return;
 	}
 
@@ -936,7 +986,7 @@ static void process_event(struct perf_script *script,
 
 	if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT))
 		print_sample_bpf_output(sample);
-	print_insn(sample, attr);
+	print_insn(event, sample, attr, thread, al, machine);
 	printf("\n");
 }
 
@@ -1046,7 +1096,7 @@ static int process_sample_event(struct perf_tool *tool,
 	if (scripting_ops)
 		scripting_ops->process_event(event, sample, evsel, &al);
 	else
-		process_event(scr, sample, evsel, &al);
+		process_event(scr, sample, evsel, &al, machine, event);
 
 out_put:
 	addr_location__put(&al);
@@ -2152,7 +2202,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
 		     "Valid types: hw,sw,trace,raw. "
 		     "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
 		     "addr,symoff,period,iregs,brstack,brstacksym,flags,"
-		     "bpf-output,callindent,insn,insnlen", parse_output_fields),
+		     "bpf-output,callindent,insn,insnlen,asm", parse_output_fields),
 	OPT_BOOLEAN('a', "all-cpus", &system_wide,
 		    "system-wide collection from all CPUs"),
 	OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
-- 
2.9.3

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 5/5] perf, tools, script: Add brstackasm output for branch stacks
  2017-01-19  1:41 New attempt at adding an disassembler to perf Andi Kleen
                   ` (3 preceding siblings ...)
  2017-01-19  1:41 ` [PATCH 4/5] perf, tools, script: Add support for printing assembler Andi Kleen
@ 2017-01-19  1:41 ` Andi Kleen
  2017-01-24 18:54   ` Arnaldo Carvalho de Melo
  2017-01-19 15:36 ` New attempt at adding an disassembler to perf Jiri Olsa
  2017-01-20 13:22 ` Jiri Olsa
  6 siblings, 1 reply; 14+ messages in thread
From: Andi Kleen @ 2017-01-19  1:41 UTC (permalink / raw)
  To: acme; +Cc: jolsa, linux-kernel, Andi Kleen

From: Andi Kleen <ak@linux.intel.com>

Implement printing full disassembled sequences for branch stacks in perf
script. This allows to directly print hot paths for individual samples,
together with branch misprediction and cycle count / IPC information if
available (on Skylake systems). This only works when no special branch
filters are specified.

% perf record -b ...
% perf script -F brstackasm
...
        000055b55d1147d0        pushq  %rbp
        000055b55d1147d1        pushq  %r15
        000055b55d1147d3        pushq  %r14
        000055b55d1147d5        pushq  %r13
        000055b55d1147d7        pushq  %r12
        000055b55d1147d9        pushq  %rbx
        000055b55d1147da        sub $0x18, %rsp
        000055b55d1147de        mov %r8, %r13
        000055b55d1147e1        mov %rcx, %rbp
        000055b55d1147e4        mov %rdx, %r14
        000055b55d1147e7        mov %rsi, %r15
        000055b55d1147ea        mov %rdi, %rbx
        000055b55d1147ed        movl  $0x0, 0xc(%rsp)
        000055b55d1147f5        movq  (%rbp), %rax
        000055b55d1147f9        test $0x1, %al
        000055b55d1147fb        jnz 0x55b55d114890              # PRED 4 cycles 3.75 IPC
        000055b55d114890        mov %eax, %ecx
        000055b55d114892        and $0x3, %ecx
        000055b55d114895        cmp $0x1, %rcx
        000055b55d114899        jnz 0x55b55d1148f8
        000055b55d11489b        movq  -0x1(%rax), %rcx
        000055b55d11489f        cmpb  $0x81, 0xb(%rcx)
        000055b55d1148a3        jnz 0x55b55d1148fe              # PRED 1 cycles 6.00 IPC
...

Occasionally the path does not reach up to the sample IP, as the LBRs
may be frozen before executing a final jump. In this case we print
a special message.

v2:
Use low level abstracted disassembler interface.
Print symbols and source lines as labels.
Print first jump in LBR too.
Patch up blocks with filtered ring transfers.
Show IPC
Lots of cleanups and improvements.
v3:
Print special message for branches frozen early.
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 tools/perf/Documentation/perf-script.txt |  13 +-
 tools/perf/builtin-script.c              | 267 +++++++++++++++++++++++++++++++
 2 files changed, 278 insertions(+), 2 deletions(-)

diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 497989ea9768..15a80815941e 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -116,7 +116,7 @@ OPTIONS
 --fields::
         Comma separated list of fields to print. Options are:
         comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
-        srcline, period, iregs, brstack, brstacksym, flags, bpf-output, asm.
+        srcline, period, iregs, brstack, brstacksym, flags, bpf-output, asm, brstackasm,
         callindent, insn, insnlen. Field list can be prepended with the type, trace, sw or hw,
         to indicate to which event type the field list applies.
         e.g., -F sw:comm,tid,time,ip,sym  and -F trace:time,cpu,trace
@@ -189,17 +189,22 @@ OPTIONS
 	i.e., -F "" is not allowed.
 
 	The brstack output includes branch related information with raw addresses using the
-	/v/v/v/v/ syntax in the following order:
+	/v/v/v/v/cycles syntax in the following order:
 	FROM: branch source instruction
 	TO  : branch target instruction
         M/P/-: M=branch target mispredicted or branch direction was mispredicted, P=target predicted or direction predicted, -=not supported
 	X/- : X=branch inside a transactional region, -=not in transaction region or not supported
 	A/- : A=TSX abort entry, -=not aborted region or not supported
+	cycles
 
 	The brstacksym is identical to brstack, except that the FROM and TO addresses are printed in a symbolic form if possible.
 
 	When asm is specified the assembler instruction of each sample is printed in disassembled form.
 
+	When brstackasm is specified the full assembler sequences of branch sequences for each sample
+	is printed. This is the full execution path leading to the sample. This is only supported when the
+	sample was recorded with perf record -b or -j any.
+
 -k::
 --vmlinux=<file>::
         vmlinux pathname
@@ -301,6 +306,10 @@ include::itrace.txt[]
 	stop time is not given (i.e, time string is 'x.y,') then analysis goes
 	to end of file.
 
+--max-blocks::
+	Set the maximum number of program blocks to print with brstackasm for
+	each sample.
+
 SEE ALSO
 --------
 linkperf:perf-record[1], linkperf:perf-script-perl[1],
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 7a09c4f7df3f..01097e12162d 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -43,6 +43,7 @@ static bool			nanosecs;
 static const char		*cpu_list;
 static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
 static struct perf_stat_config	stat_config;
+static int			max_blocks;
 
 unsigned int scripting_max_stack = PERF_MAX_STACK_DEPTH;
 
@@ -71,6 +72,7 @@ enum perf_output_field {
 	PERF_OUTPUT_INSN	    = 1U << 21,
 	PERF_OUTPUT_INSNLEN	    = 1U << 22,
 	PERF_OUTPUT_ASM		    = 1U << 23,
+	PERF_OUTPUT_BRSTACKASM	    = 1U << 24,
 };
 
 struct output_option {
@@ -101,6 +103,7 @@ struct output_option {
 	{.str = "insn", .field = PERF_OUTPUT_INSN},
 	{.str = "insnlen", .field = PERF_OUTPUT_INSNLEN},
 	{.str = "asm", .field = PERF_OUTPUT_ASM},
+	{.str = "brstackasm", .field = PERF_OUTPUT_BRSTACKASM},
 };
 
 /* default set to maintain compatibility with current format */
@@ -300,6 +303,13 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
 		       "selected.\n");
 		return -EINVAL;
 	}
+	if (PRINT_FIELD(BRSTACKASM) &&
+	    !(perf_evlist__combined_branch_type(session->evlist) &
+	      PERF_SAMPLE_BRANCH_ANY)) {
+		pr_err("Display of branch stack assembler requested, but non all-branch filter set\n");
+		return -EINVAL;
+	}
+
 	if ((PRINT_FIELD(PID) || PRINT_FIELD(TID)) &&
 		perf_evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID",
 					PERF_OUTPUT_TID|PERF_OUTPUT_PID))
@@ -586,6 +596,259 @@ static void print_sample_brstacksym(struct perf_sample *sample,
 	}
 }
 
+#define MAXBB 16384UL
+
+static int grab_bb(u8 *buffer, u64 start, u64 end,
+		    struct machine *machine, struct thread *thread,
+		    bool *is64bit, u8 *cpumode, bool last)
+{
+	int offset, len;
+	struct addr_location al;
+	bool kernel;
+
+	if (!start || !end)
+		return 0;
+
+	kernel = machine__kernel_ip(machine, start);
+	if (kernel)
+		*cpumode = PERF_RECORD_MISC_KERNEL;
+	else
+		*cpumode = PERF_RECORD_MISC_USER;
+
+	/*
+	 * Block overlaps between kernel and user.
+	 * This can happen due to ring filtering
+	 * On Intel CPUs the entry into the kernel is filtered,
+	 * but the exit is not. Let the caller patch it up.
+	 */
+	if (kernel != machine__kernel_ip(machine, end)) {
+		printf("\tblock %" PRIx64 "-%" PRIx64 " transfers between kernel and user\n",
+				start, end);
+		return -ENXIO;
+	}
+
+	memset(&al, 0, sizeof(al));
+	if (end - start > MAXBB - MAXINSN) {
+		if (last)
+			printf("\tbrstack does not reach to final jump (%" PRIx64 "-%" PRIx64 ")\n",
+					start, end);
+		else
+			printf("\tblock %" PRIx64 "-%" PRIx64 " (%ld) too long to dump\n",
+					start, end, end - start);
+		return 0;
+	}
+
+	thread__find_addr_map(thread, *cpumode, MAP__FUNCTION, start, &al);
+	if (!al.map || !al.map->dso) {
+		printf("\tcannot resolve %" PRIx64 "-%" PRIx64 "\n",
+				start, end);
+		return 0;
+	}
+	if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR) {
+		printf("\tcannot resolve %" PRIx64 "-%" PRIx64 "\n",
+				start, end);
+		return 0;
+	}
+
+	/* Load maps to ensure dso->is_64_bit has been updated */
+	map__load(al.map);
+
+	offset = al.map->map_ip(al.map, start);
+	len = dso__data_read_offset(al.map->dso, machine,
+				    offset, (u8 *)buffer,
+				    end - start + MAXINSN);
+
+	*is64bit = al.map->dso->is_64_bit;
+	if (len <= 0)
+		printf("\tcannot fetch code for block at %" PRIx64 "-%" PRIx64 "\n",
+			start, end);
+	return len;
+}
+
+static void print_jump(uint64_t ip, struct branch_entry *en,
+		       struct perf_dis *x, u8 *inbuf, int len,
+		       int insn)
+{
+	printf("\t%016" PRIx64 "\t%-30s\t#%s%s%s%s",
+	       ip,
+	       disas_inst(x, ip, inbuf, len, NULL),
+	       en->flags.predicted ? " PRED" : "",
+	       en->flags.mispred ? " MISPRED" : "",
+	       en->flags.in_tx ? " INTX" : "",
+	       en->flags.abort ? " ABORT" : "");
+	if (en->flags.cycles) {
+		printf(" %d cycles", en->flags.cycles);
+		if (insn)
+			printf(" %.2f IPC", (float)insn / en->flags.cycles);
+	}
+	putchar('\n');
+}
+
+static void print_ip_sym(struct thread *thread,
+			 u8 cpumode, int cpu,
+			 uint64_t addr,
+			 struct symbol **lastsym,
+			 struct perf_event_attr *attr)
+{
+	struct addr_location al;
+	int off;
+
+	memset(&al, 0, sizeof(struct addr_location));
+
+	thread__find_addr_map(thread, cpumode, MAP__FUNCTION, addr, &al);
+	if (!al.map)
+		thread__find_addr_map(thread, cpumode, MAP__VARIABLE,
+				      addr, &al);
+	if ((*lastsym) && al.addr >= (*lastsym)->start && al.addr < (*lastsym)->end)
+		return;
+
+	al.cpu = cpu;
+	al.sym = NULL;
+	if (al.map)
+		al.sym = map__find_symbol(al.map, al.addr);
+
+	if (!al.sym)
+		return;
+
+	if (al.addr < al.sym->end)
+		off = al.addr - al.sym->start;
+	else
+		off = al.addr - al.map->start - al.sym->start;
+	printf("\t%s", al.sym->name);
+	if (off)
+		printf("%+d", off);
+	putchar(':');
+	if (PRINT_FIELD(SRCLINE))
+		map__fprintf_srcline(al.map, al.addr, "\t", stdout);
+	putchar('\n');
+	*lastsym = al.sym;
+}
+
+static void print_sample_brstackasm(struct perf_sample *sample,
+				    struct thread *thread,
+				    struct perf_event_attr *attr,
+				    struct machine *machine)
+{
+	struct branch_stack *br = sample->branch_stack;
+	u64 start, end;
+	int i, insn;
+	struct perf_dis x;
+	u8 buffer[MAXBB];
+	int len;
+	int nr;
+	unsigned off;
+	int ilen;
+	struct symbol *lastsym = NULL;
+
+	if (!(br && br->nr))
+		return;
+	nr = br->nr;
+	if (max_blocks && nr > max_blocks + 1)
+		nr = max_blocks + 1;
+
+	x.thread = thread;
+	x.cpu = sample->cpu;
+
+	putchar('\n');
+
+	/* Handle first from jump, of which we don't know the entry. */
+	len = grab_bb(buffer, br->entries[nr-1].from,
+			br->entries[nr-1].from,
+			machine, thread, &x.is64bit, &x.cpumode, false);
+	if (len > 0) {
+		print_ip_sym(thread, x.cpumode, x.cpu,
+			     br->entries[nr - 1].from,
+			     &lastsym, attr);
+		print_jump(br->entries[nr - 1].from, &br->entries[nr - 1],
+			&x, buffer, len, 0);
+	}
+
+	/* Print all blocks */
+	for (i = nr - 2; i >= 0; i--) {
+		if (br->entries[i].from || br->entries[i].to)
+			pr_debug("%d: %" PRIx64 "-%" PRIx64 "\n", i,
+				 br->entries[i].from,
+				 br->entries[i].to);
+		start = br->entries[i + 1].to;
+		end = br->entries[i].from;
+
+		len = grab_bb(buffer, start, end,
+				machine, thread, &x.is64bit,
+				&x.cpumode, false);
+		/* Patch up missing kernel transfers due to ring filters */
+		if (len == -ENXIO && i > 0) {
+			end = br->entries[--i].from;
+			pr_debug("\tpatching up to %" PRIx64 "-%" PRIx64 "\n",
+					start, end);
+			len = grab_bb(buffer, start, end,
+				      machine, thread, &x.is64bit,
+				      &x.cpumode, false);
+		}
+		if (len <= 0)
+			continue;
+
+		insn = 0;
+		for (off = 0;; off += ilen) {
+			uint64_t ip = start + off;
+
+			print_ip_sym(thread, x.cpumode, x.cpu,
+				     ip,
+				     &lastsym, attr);
+			if (ip == end) {
+				print_jump(ip, &br->entries[i], &x,
+					   buffer + off,
+					   len - off, insn);
+				break;
+			} else {
+				printf("\t%016" PRIx64 "\t%s\n", ip,
+					disas_inst(&x, ip, buffer + off,
+						   len - off, &ilen));
+				if (ilen == 0)
+					break;
+				insn++;
+			}
+		}
+	}
+
+	/*
+	 * Hit the branch? In this case we are already done, and the target
+	 * has not been executed yet.
+	 */
+	if (br->entries[0].from == sample->ip)
+		return;
+	if (br->entries[0].flags.abort)
+		return;
+
+	/*
+	 * Print final block upto sample
+	 */
+	start = br->entries[0].to;
+	end = sample->ip;
+	len = grab_bb(buffer, start, end, machine, thread, &x.is64bit,
+			&x.cpumode, true);
+	print_ip_sym(thread, x.cpumode, x.cpu,
+		     start,
+		     &lastsym, attr);
+	if (len <= 0) {
+		/* Print at least last IP if basic block did not work */
+		len = grab_bb(buffer, sample->ip, sample->ip,
+				machine, thread, &x.is64bit, &x.cpumode,
+				false);
+		if (len <= 0)
+			return;
+
+		printf("\t%016" PRIx64 "\t%s\n", sample->ip,
+			disas_inst(&x, sample->ip, buffer, len, NULL));
+		return;
+	}
+	for (off = 0; off <= end - start; off += ilen) {
+		printf("\t%016" PRIx64 "\t%s\n", start + off,
+				disas_inst(&x, start + off, buffer + off,
+					   len - off, &ilen));
+		if (ilen == 0)
+			break;
+	}
+}
 
 static void print_sample_addr(struct perf_sample *sample,
 			  struct thread *thread,
@@ -689,6 +952,8 @@ static void print_insn(union perf_event *event,
 	}
 	if (PRINT_FIELD(ASM))
 		print_sample_asm(event, sample, thread, al, machine);
+	if (PRINT_FIELD(BRSTACKASM))
+		print_sample_brstackasm(sample, thread, attr, machine);
 }
 
 static void print_sample_bts(union perf_event *event,
@@ -2231,6 +2496,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
 	OPT_BOOLEAN('\0', "show-switch-events", &script.show_switch_events,
 		    "Show context switch events (if recorded)"),
 	OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"),
+	OPT_INTEGER(0, "max-blocks", &max_blocks,
+		    "Maximum number of code blocks to dump with brstackasm"),
 	OPT_BOOLEAN(0, "ns", &nanosecs,
 		    "Use 9 decimal places when displaying time"),
 	OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts",
-- 
2.9.3

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: New attempt at adding an disassembler to perf
  2017-01-19  1:41 New attempt at adding an disassembler to perf Andi Kleen
                   ` (4 preceding siblings ...)
  2017-01-19  1:41 ` [PATCH 5/5] perf, tools, script: Add brstackasm output for branch stacks Andi Kleen
@ 2017-01-19 15:36 ` Jiri Olsa
  2017-01-19 16:54   ` Andi Kleen
  2017-01-20 13:22 ` Jiri Olsa
  6 siblings, 1 reply; 14+ messages in thread
From: Jiri Olsa @ 2017-01-19 15:36 UTC (permalink / raw)
  To: Andi Kleen; +Cc: acme, jolsa, linux-kernel

On Wed, Jan 18, 2017 at 05:41:45PM -0800, Andi Kleen wrote:
> A native disassembler in perf is very useful, in particular with perf script to trace 
> instruction streams, but also for other analysis. Previously I attempted
> to do this using the udis86 library, but that was rejected because:
> - udis86 was not maintained anymore and lacking recent instructions
> - udis86 is dynamically linked and gives a runtime dependency.
> Doing this needs a full disassembler, not just a decoder, so the existing
> instruction decoder cannot be used without major changes.
> 
> This patchkit addresses these issues.  Intel recently released an open source version
> of the XED disassembler library, which is used in many other Intel software.
> It is very well maintained, uptodate, and supports static linking, so there is no
> runtime dependency. This version adds XED support to perf, and uses it to implement
> assembler output in perf script. It also fixes a range of issues in the previous
> version, see the individual change logs.
> 
> Available in 
> git://git.kernel.org/pub/scm/linux/kernel/git/ak/linux-misc.git perf/xed-6

i dont see that...

[jolsa@krava perf]$ git branch -r | grep xed-
  ak/perf/xed-3
  ak/perf/xed-4

jirka

> 
> v1: First post of XED version
> v2: Change probing to not be default and support XED_DIR. Other cleanups
> based on review.
> v3: Use FEATURE_FLAGS_BASIC for probing
> 
> -Andi

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: New attempt at adding an disassembler to perf
  2017-01-19 15:36 ` New attempt at adding an disassembler to perf Jiri Olsa
@ 2017-01-19 16:54   ` Andi Kleen
  0 siblings, 0 replies; 14+ messages in thread
From: Andi Kleen @ 2017-01-19 16:54 UTC (permalink / raw)
  To: Jiri Olsa; +Cc: Andi Kleen, acme, jolsa, linux-kernel

> 
> [jolsa@krava perf]$ git branch -r | grep xed-
>   ak/perf/xed-3
>   ak/perf/xed-4

Pushed.

-Andi

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: New attempt at adding an disassembler to perf
  2017-01-19  1:41 New attempt at adding an disassembler to perf Andi Kleen
                   ` (5 preceding siblings ...)
  2017-01-19 15:36 ` New attempt at adding an disassembler to perf Jiri Olsa
@ 2017-01-20 13:22 ` Jiri Olsa
  6 siblings, 0 replies; 14+ messages in thread
From: Jiri Olsa @ 2017-01-20 13:22 UTC (permalink / raw)
  To: Andi Kleen; +Cc: acme, jolsa, linux-kernel

On Wed, Jan 18, 2017 at 05:41:45PM -0800, Andi Kleen wrote:
> A native disassembler in perf is very useful, in particular with perf script to trace 
> instruction streams, but also for other analysis. Previously I attempted
> to do this using the udis86 library, but that was rejected because:
> - udis86 was not maintained anymore and lacking recent instructions
> - udis86 is dynamically linked and gives a runtime dependency.
> Doing this needs a full disassembler, not just a decoder, so the existing
> instruction decoder cannot be used without major changes.
> 
> This patchkit addresses these issues.  Intel recently released an open source version
> of the XED disassembler library, which is used in many other Intel software.
> It is very well maintained, uptodate, and supports static linking, so there is no
> runtime dependency. This version adds XED support to perf, and uses it to implement
> assembler output in perf script. It also fixes a range of issues in the previous
> version, see the individual change logs.
> 
> Available in 
> git://git.kernel.org/pub/scm/linux/kernel/git/ak/linux-misc.git perf/xed-6

the perf/xed-6 branch looks good to me..
not sure if that matches what was posted ;-)

Acked-by: Jiri Olsa <jolsa@kernel.org>

thanks,
jirka

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 4/5] perf, tools, script: Add support for printing assembler
  2017-01-19  1:41 ` [PATCH 4/5] perf, tools, script: Add support for printing assembler Andi Kleen
@ 2017-01-23 19:49   ` Arnaldo Carvalho de Melo
  2017-01-23 19:55     ` Andi Kleen
  0 siblings, 1 reply; 14+ messages in thread
From: Arnaldo Carvalho de Melo @ 2017-01-23 19:49 UTC (permalink / raw)
  To: Andi Kleen; +Cc: Jiri Olsa, linux-kernel, Andi Kleen, Adrian Hunter

Em Wed, Jan 18, 2017 at 05:41:49PM -0800, Andi Kleen escreveu:
> From: Andi Kleen <ak@linux.intel.com>
> 
> When dumping PT traces with perf script it is very useful to see the
> assembler for each sample, so that it is easily possible to follow
> the control flow.
> 
> As using objdump is difficult and inefficient from perf script this
> patch uses the Intel xed library to implement assembler output.
> The library can be downloaded from http://github.com/intelxed/xed

What I have, and there are multiple changes to make this build, is
available at the tmp.perf/xed branch of my tree at:

git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux.git

https://git.kernel.org/cgit/linux/kernel/git/acme/linux.git/log/?h=tmp.perf/xed

So, while testing this I noticed some differences from what objdump -d produces
to what is produced with this patch, see below, where it is different I added
the bytes from the objdump output, have to lookup the tables manually to check
each case, and some seem harmless.

Do you know if there is any tool comparing the output of objdump -d to what is
produced by a similar xed based tool?

Disassembly of perf_evsel__enable() bits referenced in the samples
collected for this test:

  'perf script' with this patch:                     DIFF   objdump -d

  4b8506  jz 0x4b84d0 <perf_evsel__enable+0x70>      74 c8                 je     4b84d0 <perf_evsel__enable+0x70>
  4b84d0  add $0x1, %r14                                                   add    $0x1,%r14
  4b84d4  cmp %r14d, %ebx                                                  cmp    %r14d,%ebx
  4b84d7  jle 0x4b8530 <perf_evsel__enable+0xd0>                           jle    4b8530 <perf_evsel__enable+0xd0>
  4b8530  add $0x1, %r12                                                   add    $0x1,%r12
  4b8534  cmp %r12d, %r13d                                                 cmp    %r12d,%r13d
  4b8537  jnle 0x4b84c2 <perf_evsel__enable+0x62>    7f 89                 jg     4b84c2 <perf_evsel__enable+0x62>
  4b84c2  xor %r14d, %r14d                                                 xor    %r14d,%r14d
  4b84c5  test %ebx, %ebx                                                  test   %ebx,%ebx
  4b84c7  jnle 0x4b84d9 <perf_evsel__enable+0x79>    7f 10                 jg     4b84d9 <perf_evsel__enable+0x79>
  4b84d9  movq  0x90(%r15), %rax                     49 8b 87 90 00 00 00  mov    0x90(%r15),%rax
  4b84e0  mov %r12, %rdx                                                   mov    %r12,%rdx
  4b84e3  mov %r14, %rcx                                                   mov    %r14,%rcx
  4b84e6  mov $0x2400, %esi                                                mov    $0x2400,%esi
  4b84eb  imulq  (%rax), %rdx                        48 0f af 10           imul   (%rax),%rdx
  4b84ef  imulq  0x8(%rax), %rcx                     48 0f af 48 08        imul   0x8(%rax),%rcx
  4b84f4  add %rdx, %rax                                                   add    %rdx,%rax
  4b84f7  xor %edx, %edx                                                   xor    %edx,%edx
  4b84f9  movl  0x18(%rcx,%rax,1), %edi              8b 7c 01 18           mov    0x18(%rcx,%rax,1),%edi
  4b84fd  xor %eax, %eax                                                   xor    %eax,%eax
  4b84ff  callq  0x42d990 <ioctl@plt>                                      callq  42d990 <ioctl@plt>
  4b8504  test %eax, %eax                                                  test   %eax,%eax
  4b8506  jz 0x4b84d0 <perf_evsel__enable+0x70>      74 c8                 je     4b84d0 <perf_evsel__enable+0x70>

Built today using xed from:

https://github.com/intelxed/xed

[acme@jouet xed]$ git log --oneline -5
4507b57ba629 rebase tests to account for new operand sorting
c7c1777216f5 generator: xed operand ordering sort was wrong
eb45a282de28 convert LOOPNE/E comments to XED COMMENT field
22427e1a4027 generator.py: duplicate iform check now checks isa-set conflicts
4a0a09a1542e Disambiguate iforms for VFPCLASS{PD,PS} mem forms, suffix with VL
[acme@jouet xed]$
 
> The previous version of this patch used udis86, but was
> rejected because udis86 was unmaintained and a runtime dependency.
> Using the recently released xed avoids both of these problems:
> - XED is well maintained and used by many Intel tools
> - XED is linked statically so there is no runtime dependency.
> 
> The library is probed as an external dependency in the usual way. Then perf
> script calls into it when needed, and handles callbacks to resolve
> symbols.
> 
> % perf record -e intel_pt//u true
> % perf script -F sym,symoff,ip,asm --itrace=i0ns | head
>      7fc7188b4190 _start+0x0	mov %rsp, %rdi
>      7fc7188b4193 _start+0x3	call _dl_start
>      7fc7188b7710 _dl_start+0x0	push %rbp
>      7fc7188b7711 _dl_start+0x1	mov %rsp, %rbp
>      7fc7188b7714 _dl_start+0x4	push %r15
>      7fc7188b7716 _dl_start+0x6	push %r14
>      7fc7188b7718 _dl_start+0x8	push %r13
>      7fc7188b771a _dl_start+0xa	push %r12
>      7fc7188b771c _dl_start+0xc	mov %rdi, %r12
>      7fc7188b771f _dl_start+0xf	push %rbx
> 
> v2:
> Converted to use XED instead of udis86.
> Separate disassembler interface into separate arch specific file.
> Lots of cleanups and improvements.
> 
> Cc: adrian.hunter@intel.com
> Signed-off-by: Andi Kleen <ak@linux.intel.com>
> ---
>  tools/perf/Documentation/perf-script.txt |  4 +-
>  tools/perf/builtin-script.c              | 72 +++++++++++++++++++++++++++-----
>  2 files changed, 64 insertions(+), 12 deletions(-)
> 
> diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
> index 4ed5f239ba7d..497989ea9768 100644
> --- a/tools/perf/Documentation/perf-script.txt
> +++ b/tools/perf/Documentation/perf-script.txt
> @@ -116,7 +116,7 @@ OPTIONS
>  --fields::
>          Comma separated list of fields to print. Options are:
>          comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
> -        srcline, period, iregs, brstack, brstacksym, flags, bpf-output,
> +        srcline, period, iregs, brstack, brstacksym, flags, bpf-output, asm.
>          callindent, insn, insnlen. Field list can be prepended with the type, trace, sw or hw,
>          to indicate to which event type the field list applies.
>          e.g., -F sw:comm,tid,time,ip,sym  and -F trace:time,cpu,trace
> @@ -198,6 +198,8 @@ OPTIONS
>  
>  	The brstacksym is identical to brstack, except that the FROM and TO addresses are printed in a symbolic form if possible.
>  
> +	When asm is specified the assembler instruction of each sample is printed in disassembled form.
> +
>  -k::
>  --vmlinux=<file>::
>          vmlinux pathname
> diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
> index c0783b4f7b6c..7a09c4f7df3f 100644
> --- a/tools/perf/builtin-script.c
> +++ b/tools/perf/builtin-script.c
> @@ -28,6 +28,7 @@
>  #include <linux/time64.h>
>  #include "asm/bug.h"
>  #include "util/mem-events.h"
> +#include "util/dis.h"
>  
>  static char const		*script_name;
>  static char const		*generate_script_lang;
> @@ -69,6 +70,7 @@ enum perf_output_field {
>  	PERF_OUTPUT_CALLINDENT	    = 1U << 20,
>  	PERF_OUTPUT_INSN	    = 1U << 21,
>  	PERF_OUTPUT_INSNLEN	    = 1U << 22,
> +	PERF_OUTPUT_ASM		    = 1U << 23,
>  };
>  
>  struct output_option {
> @@ -98,6 +100,7 @@ struct output_option {
>  	{.str = "callindent", .field = PERF_OUTPUT_CALLINDENT},
>  	{.str = "insn", .field = PERF_OUTPUT_INSN},
>  	{.str = "insnlen", .field = PERF_OUTPUT_INSNLEN},
> +	{.str = "asm", .field = PERF_OUTPUT_ASM},
>  };
>  
>  /* default set to maintain compatibility with current format */
> @@ -292,7 +295,11 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
>  		       "selected. Hence, no address to lookup the source line number.\n");
>  		return -EINVAL;
>  	}
> -
> +	if (PRINT_FIELD(ASM) && !PRINT_FIELD(IP)) {
> +		pr_err("Display of assembler requested but sample IP is not\n"
> +		       "selected.\n");
> +		return -EINVAL;
> +	}
>  	if ((PRINT_FIELD(PID) || PRINT_FIELD(TID)) &&
>  		perf_evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID",
>  					PERF_OUTPUT_TID|PERF_OUTPUT_PID))
> @@ -436,6 +443,39 @@ static void print_sample_iregs(struct perf_sample *sample,
>  	}
>  }
>  
> +static void print_sample_asm(union perf_event *event,
> +			     struct perf_sample *sample,
> +			     struct thread *thread,
> +			     struct addr_location *al,
> +			     struct machine *machine)
> +{
> +	struct perf_dis x;
> +	u8 buffer[32];
> +	int len;
> +	u64 offset;
> +
> +	x.thread = thread;
> +	x.cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
> +	x.cpu = sample->cpu;
> +
> +	if (!al->map || !al->map->dso)
> +		return;
> +	if (al->map->dso->data.status == DSO_DATA_STATUS_ERROR)
> +		return;
> +
> +	/* Load maps to ensure dso->is_64_bit has been updated */
> +	map__load(al->map);
> +	x.is64bit = al->map->dso->is_64_bit;
> +
> +	offset = al->map->map_ip(al->map, sample->ip);
> +	len = dso__data_read_offset(al->map->dso, machine,
> +				    offset, buffer, MAXINSN);
> +	if (len <= 0)
> +		return;
> +
> +	printf("\t%s", disas_inst(&x, sample->ip, buffer, len, NULL));
> +}
> +
>  static void print_sample_start(struct perf_sample *sample,
>  			       struct thread *thread,
>  			       struct perf_evsel *evsel)
> @@ -631,8 +671,12 @@ static void print_sample_callindent(struct perf_sample *sample,
>  		printf("%*s", spacing - len, "");
>  }
>  
> -static void print_insn(struct perf_sample *sample,
> -		       struct perf_event_attr *attr)
> +static void print_insn(union perf_event *event,
> +		       struct perf_sample *sample,
> +		       struct perf_event_attr *attr,
> +		       struct thread *thread,
> +		       struct addr_location *al,
> +		       struct machine *machine)
>  {
>  	if (PRINT_FIELD(INSNLEN))
>  		printf(" ilen: %d", sample->insn_len);
> @@ -643,12 +687,16 @@ static void print_insn(struct perf_sample *sample,
>  		for (i = 0; i < sample->insn_len; i++)
>  			printf(" %02x", (unsigned char)sample->insn[i]);
>  	}
> +	if (PRINT_FIELD(ASM))
> +		print_sample_asm(event, sample, thread, al, machine);
>  }
>  
> -static void print_sample_bts(struct perf_sample *sample,
> +static void print_sample_bts(union perf_event *event,
> +			     struct perf_sample *sample,
>  			     struct perf_evsel *evsel,
>  			     struct thread *thread,
> -			     struct addr_location *al)
> +			     struct addr_location *al,
> +			     struct machine *machine)
>  {
>  	struct perf_event_attr *attr = &evsel->attr;
>  	bool print_srcline_last = false;
> @@ -689,7 +737,7 @@ static void print_sample_bts(struct perf_sample *sample,
>  	if (print_srcline_last)
>  		map__fprintf_srcline(al->map, al->addr, "\n  ", stdout);
>  
> -	print_insn(sample, attr);
> +	print_insn(event, sample, attr, thread, al, machine);
>  
>  	printf("\n");
>  }
> @@ -871,7 +919,9 @@ static size_t data_src__printf(u64 data_src)
>  
>  static void process_event(struct perf_script *script,
>  			  struct perf_sample *sample, struct perf_evsel *evsel,
> -			  struct addr_location *al)
> +			  struct addr_location *al,
> +			  struct machine *machine,
> +			  union perf_event *event)
>  {
>  	struct thread *thread = al->thread;
>  	struct perf_event_attr *attr = &evsel->attr;
> @@ -898,7 +948,7 @@ static void process_event(struct perf_script *script,
>  		print_sample_flags(sample->flags);
>  
>  	if (is_bts_event(attr)) {
> -		print_sample_bts(sample, evsel, thread, al);
> +		print_sample_bts(event, sample, evsel, thread, al, machine);
>  		return;
>  	}
>  
> @@ -936,7 +986,7 @@ static void process_event(struct perf_script *script,
>  
>  	if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT))
>  		print_sample_bpf_output(sample);
> -	print_insn(sample, attr);
> +	print_insn(event, sample, attr, thread, al, machine);
>  	printf("\n");
>  }
>  
> @@ -1046,7 +1096,7 @@ static int process_sample_event(struct perf_tool *tool,
>  	if (scripting_ops)
>  		scripting_ops->process_event(event, sample, evsel, &al);
>  	else
> -		process_event(scr, sample, evsel, &al);
> +		process_event(scr, sample, evsel, &al, machine, event);
>  
>  out_put:
>  	addr_location__put(&al);
> @@ -2152,7 +2202,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
>  		     "Valid types: hw,sw,trace,raw. "
>  		     "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
>  		     "addr,symoff,period,iregs,brstack,brstacksym,flags,"
> -		     "bpf-output,callindent,insn,insnlen", parse_output_fields),
> +		     "bpf-output,callindent,insn,insnlen,asm", parse_output_fields),
>  	OPT_BOOLEAN('a', "all-cpus", &system_wide,
>  		    "system-wide collection from all CPUs"),
>  	OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
> -- 
> 2.9.3

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 4/5] perf, tools, script: Add support for printing assembler
  2017-01-23 19:49   ` Arnaldo Carvalho de Melo
@ 2017-01-23 19:55     ` Andi Kleen
  2017-01-23 20:06       ` Arnaldo Carvalho de Melo
  0 siblings, 1 reply; 14+ messages in thread
From: Andi Kleen @ 2017-01-23 19:55 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: Andi Kleen, Jiri Olsa, linux-kernel, Andi Kleen, Adrian Hunter

> Do you know if there is any tool comparing the output of objdump -d to what is
> produced by a similar xed based tool?

I'm not aware of such a tool, but could be written using the "xed" tool
in the xed distribution. However I would trust xed over objdump,
it is used widely in Intel tools with likely far more testing
than binutils

>   4b8506  jz 0x4b84d0 <perf_evsel__enable+0x70>      74 c8                 je     4b84d0 <perf_evsel__enable+0x70>
>   4b84d0  add $0x1, %r14                                                   add    $0x1,%r14
>   4b84d4  cmp %r14d, %ebx                                                  cmp    %r14d,%ebx
>   4b84d7  jle 0x4b8530 <perf_evsel__enable+0xd0>                           jle    4b8530 <perf_evsel__enable+0xd0>
>   4b8530  add $0x1, %r12                                                   add    $0x1,%r12
>   4b8534  cmp %r12d, %r13d                                                 cmp    %r12d,%r13d
>   4b8537  jnle 0x4b84c2 <perf_evsel__enable+0x62>    7f 89                 jg     4b84c2 <perf_evsel__enable+0x62>
>   4b84c2  xor %r14d, %r14d                                                 xor    %r14d,%r14d
>   4b84c5  test %ebx, %ebx                                                  test   %ebx,%ebx
>   4b84c7  jnle 0x4b84d9 <perf_evsel__enable+0x79>    7f 10                 jg     4b84d9 <perf_evsel__enable+0x79>
>   4b84d9  movq  0x90(%r15), %rax                     49 8b 87 90 00 00 00  mov    0x90(%r15),%rax
>   4b84e0  mov %r12, %rdx                                                   mov    %r12,%rdx
>   4b84e3  mov %r14, %rcx                                                   mov    %r14,%rcx
>   4b84e6  mov $0x2400, %esi                                                mov    $0x2400,%esi
>   4b84eb  imulq  (%rax), %rdx                        48 0f af 10           imul   (%rax),%rdx
>   4b84ef  imulq  0x8(%rax), %rcx                     48 0f af 48 08        imul   0x8(%rax),%rcx
>   4b84f4  add %rdx, %rax                                                   add    %rdx,%rax
>   4b84f7  xor %edx, %edx                                                   xor    %edx,%edx
>   4b84f9  movl  0x18(%rcx,%rax,1), %edi              8b 7c 01 18           mov    0x18(%rcx,%rax,1),%edi
>   4b84fd  xor %eax, %eax                                                   xor    %eax,%eax
>   4b84ff  callq  0x42d990 <ioctl@plt>                                      callq  42d990 <ioctl@plt>
>   4b8504  test %eax, %eax                                                  test   %eax,%eax
>   4b8506  jz 0x4b84d0 <perf_evsel__enable+0x70>      74 c8                 je     4b84d0 <perf_evsel__enable+0x70>

Yes all the differences are ok. It's just synonyms of the instructions.

-Andi

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 4/5] perf, tools, script: Add support for printing assembler
  2017-01-23 19:55     ` Andi Kleen
@ 2017-01-23 20:06       ` Arnaldo Carvalho de Melo
  0 siblings, 0 replies; 14+ messages in thread
From: Arnaldo Carvalho de Melo @ 2017-01-23 20:06 UTC (permalink / raw)
  To: Andi Kleen; +Cc: Jiri Olsa, linux-kernel, Andi Kleen, Adrian Hunter

Em Mon, Jan 23, 2017 at 11:55:21AM -0800, Andi Kleen escreveu:
> > Do you know if there is any tool comparing the output of objdump -d to what is
> > produced by a similar xed based tool?
> 
> I'm not aware of such a tool, but could be written using the "xed" tool
> in the xed distribution. However I would trust xed over objdump,

It would go both ways, and I was not implying which one is "better",
just checking differences in the output.

> it is used widely in Intel tools with likely far more testing
> than binutils
> 
> >   4b8506  jz 0x4b84d0 <perf_evsel__enable+0x70>      74 c8                 je     4b84d0 <perf_evsel__enable+0x70>
> >   4b84d0  add $0x1, %r14                                                   add    $0x1,%r14
> >   4b84d4  cmp %r14d, %ebx                                                  cmp    %r14d,%ebx
> >   4b84d7  jle 0x4b8530 <perf_evsel__enable+0xd0>                           jle    4b8530 <perf_evsel__enable+0xd0>
> >   4b8530  add $0x1, %r12                                                   add    $0x1,%r12
> >   4b8534  cmp %r12d, %r13d                                                 cmp    %r12d,%r13d
> >   4b8537  jnle 0x4b84c2 <perf_evsel__enable+0x62>    7f 89                 jg     4b84c2 <perf_evsel__enable+0x62>
> >   4b84c2  xor %r14d, %r14d                                                 xor    %r14d,%r14d
> >   4b84c5  test %ebx, %ebx                                                  test   %ebx,%ebx
> >   4b84c7  jnle 0x4b84d9 <perf_evsel__enable+0x79>    7f 10                 jg     4b84d9 <perf_evsel__enable+0x79>
> >   4b84d9  movq  0x90(%r15), %rax                     49 8b 87 90 00 00 00  mov    0x90(%r15),%rax
> >   4b84e0  mov %r12, %rdx                                                   mov    %r12,%rdx
> >   4b84e3  mov %r14, %rcx                                                   mov    %r14,%rcx
> >   4b84e6  mov $0x2400, %esi                                                mov    $0x2400,%esi
> >   4b84eb  imulq  (%rax), %rdx                        48 0f af 10           imul   (%rax),%rdx
> >   4b84ef  imulq  0x8(%rax), %rcx                     48 0f af 48 08        imul   0x8(%rax),%rcx
> >   4b84f4  add %rdx, %rax                                                   add    %rdx,%rax
> >   4b84f7  xor %edx, %edx                                                   xor    %edx,%edx
> >   4b84f9  movl  0x18(%rcx,%rax,1), %edi              8b 7c 01 18           mov    0x18(%rcx,%rax,1),%edi
> >   4b84fd  xor %eax, %eax                                                   xor    %eax,%eax
> >   4b84ff  callq  0x42d990 <ioctl@plt>                                      callq  42d990 <ioctl@plt>
> >   4b8504  test %eax, %eax                                                  test   %eax,%eax
> >   4b8506  jz 0x4b84d0 <perf_evsel__enable+0x70>      74 c8                 je     4b84d0 <perf_evsel__enable+0x70>
> 
> Yes all the differences are ok. It's just synonyms of the instructions.
> 
> -Andi

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 5/5] perf, tools, script: Add brstackasm output for branch stacks
  2017-01-19  1:41 ` [PATCH 5/5] perf, tools, script: Add brstackasm output for branch stacks Andi Kleen
@ 2017-01-24 18:54   ` Arnaldo Carvalho de Melo
  0 siblings, 0 replies; 14+ messages in thread
From: Arnaldo Carvalho de Melo @ 2017-01-24 18:54 UTC (permalink / raw)
  To: Andi Kleen; +Cc: jolsa, linux-kernel, Andi Kleen

Em Wed, Jan 18, 2017 at 05:41:50PM -0800, Andi Kleen escreveu:
> +	memset(&al, 0, sizeof(al));
> +	if (end - start > MAXBB - MAXINSN) {
> +		if (last)
> +			printf("\tbrstack does not reach to final jump (%" PRIx64 "-%" PRIx64 ")\n",
> +					start, end);
> +		else
> +			printf("\tblock %" PRIx64 "-%" PRIx64 " (%ld) too long to dump\n",
> +					start, end, end - start);
> +		return 0;

Those are multiline if/else blocks, one has to use {}, additionally (end
- start) should be, just like the other bits PRI, fixing...

This actually broke the build in some distros, like fedora:24
crossbuilding to ARC-uClibc.

- Arnaldo

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 3/5] perf, tools: Add disassembler for x86 using the XED library
  2017-01-10  1:02 New attempt at adding an disassembler to perf v2 Andi Kleen
@ 2017-01-10  1:02 ` Andi Kleen
  0 siblings, 0 replies; 14+ messages in thread
From: Andi Kleen @ 2017-01-10  1:02 UTC (permalink / raw)
  To: acme; +Cc: jolsa, mingo, linux-kernel, Andi Kleen

From: Andi Kleen <ak@linux.intel.com>

Add a generic disassembler function for x86 using the XED library,
and a fallback function for architectures that don't implement one.
Other architectures can implement their own disassembler functions.

The previous version of this patch used udis86, but was
rejected because udis86 was unmaintained and a runtime dependency.
Using the recently released xed avoids both of these problems:
- XED is well maintained, uptodate, and used by many Intel tools
- XED is linked statically so there is no runtime dependency.

The XED library can be downloaded from http://github.com/intelxed/xed

v2: Clean up includes.
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 tools/perf/arch/x86/util/Build |  1 +
 tools/perf/arch/x86/util/dis.c | 86 ++++++++++++++++++++++++++++++++++++++++++
 tools/perf/util/Build          |  1 +
 tools/perf/util/dis.c          | 15 ++++++++
 tools/perf/util/dis.h          | 20 ++++++++++
 5 files changed, 123 insertions(+)
 create mode 100644 tools/perf/arch/x86/util/dis.c
 create mode 100644 tools/perf/util/dis.c
 create mode 100644 tools/perf/util/dis.h

diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build
index f95e6f46ef0d..93490009ea6a 100644
--- a/tools/perf/arch/x86/util/Build
+++ b/tools/perf/arch/x86/util/Build
@@ -14,3 +14,4 @@ libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
 libperf-$(CONFIG_AUXTRACE) += auxtrace.o
 libperf-$(CONFIG_AUXTRACE) += intel-pt.o
 libperf-$(CONFIG_AUXTRACE) += intel-bts.o
+libperf-$(CONFIG_XED) += dis.o
diff --git a/tools/perf/arch/x86/util/dis.c b/tools/perf/arch/x86/util/dis.c
new file mode 100644
index 000000000000..39703512fe17
--- /dev/null
+++ b/tools/perf/arch/x86/util/dis.c
@@ -0,0 +1,86 @@
+/* Disassembler using the XED library */
+#include "perf.h"
+#include "util/session.h"
+#include "util/symbol.h"
+#include "util/thread.h"
+#include "util/dis.h"
+
+#include <xed/xed-interface.h>
+#include <xed/xed-decode.h>
+#include <xed/xed-decoded-inst-api.h>
+
+static int dis_resolve(xed_uint64_t addr, char *buf, xed_uint32_t buflen,
+		xed_uint64_t *off, void *data)
+{
+	struct perf_dis *x = data;
+	struct addr_location al;
+
+	memset(&al, 0, sizeof(struct addr_location));
+
+	thread__find_addr_map(x->thread, x->cpumode, MAP__FUNCTION, addr, &al);
+	if (!al.map)
+		thread__find_addr_map(x->thread, x->cpumode, MAP__VARIABLE,
+					addr, &al);
+	al.cpu = x->cpu;
+	al.sym = NULL;
+
+	if (al.map)
+		al.sym = map__find_symbol(al.map, al.addr);
+
+	if (!al.sym)
+		return 0;
+
+	if (al.addr < al.sym->end)
+		*off = al.addr - al.sym->start;
+	else
+		*off = al.addr - al.map->start - al.sym->start;
+	snprintf(buf, buflen, "%s", al.sym->name);
+	return 1;
+}
+
+/* x must be set up earlier */
+char *disas_inst(struct perf_dis *x, uint64_t ip, u8 *inbuf, int inlen,
+		 int *lenp)
+{
+	xed_decoded_inst_t inst;
+	xed_print_info_t info;
+	xed_error_enum_t err;
+	static bool init;
+
+	if (!init) {
+		xed_tables_init();
+		init = true;
+	}
+
+	if (lenp)
+		*lenp = 0;
+
+	xed_init_print_info(&info);
+	info.syntax = XED_SYNTAX_ATT;
+	info.disassembly_callback = dis_resolve;
+	info.context = x;
+
+	xed_decoded_inst_zero(&inst);
+	if (x->is64bit)
+		xed_decoded_inst_set_mode(&inst, XED_MACHINE_MODE_LONG_64,
+				XED_ADDRESS_WIDTH_64b);
+	else
+		xed_decoded_inst_set_mode(&inst, XED_MACHINE_MODE_LEGACY_32,
+				XED_ADDRESS_WIDTH_32b);
+
+	err = xed_decode(&inst, (uint8_t *)inbuf, inlen);
+	if (err != XED_ERROR_NONE) {
+		snprintf(x->out, sizeof(x->out), "err: %s for %d bytes",
+				xed_error_enum_t2str(err), inlen);
+		return x->out;
+	}
+	if (lenp)
+		*lenp = xed_decoded_inst_get_length(&inst);
+	info.p = &inst;
+	info.buf = x->out;
+	info.blen = sizeof(x->out);
+	info.runtime_address = ip;
+	if (!xed_format_generic(&info))
+		strcpy(x->out, "err: cannot format");
+	return x->out;
+}
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 3840e3a87057..393000501579 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -88,6 +88,7 @@ libperf-y += mem-events.o
 libperf-y += vsprintf.o
 libperf-y += drv_configs.o
 libperf-y += time-utils.o
+libperf-y += dis.o
 
 libperf-$(CONFIG_LIBBPF) += bpf-loader.o
 libperf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o
diff --git a/tools/perf/util/dis.c b/tools/perf/util/dis.c
new file mode 100644
index 000000000000..89c9b84051b8
--- /dev/null
+++ b/tools/perf/util/dis.c
@@ -0,0 +1,15 @@
+#include "perf.h"
+#include "dis.h"
+#include "util.h"
+
+/* Fallback for architectures with no disassembler */
+
+__weak char *disas_inst(struct perf_dis *x, uint64_t ip __maybe_unused,
+		u8 *inbuf __maybe_unused, int inlen __maybe_unused,
+		int *lenp)
+{
+	if (lenp)
+		*lenp = 0;
+	strcpy(x->out, "?");
+	return x->out;
+}
diff --git a/tools/perf/util/dis.h b/tools/perf/util/dis.h
new file mode 100644
index 000000000000..ffda324cbc1a
--- /dev/null
+++ b/tools/perf/util/dis.h
@@ -0,0 +1,20 @@
+#ifndef DIS_H
+#define DIS_H 1
+
+#define MAXINSN 15
+
+struct perf_dis {
+	/* Initialized by callers: */
+	struct thread *thread;
+	u8 cpumode;
+	int cpu;
+	bool is64bit;
+	/* Temporary */
+	char out[256];
+};
+
+char *disas_inst(struct perf_dis *x, uint64_t ip, u8 *inbuf, int inlen,
+		 int *lenp);
+
+
+#endif
-- 
2.9.3

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2017-01-24 18:54 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-01-19  1:41 New attempt at adding an disassembler to perf Andi Kleen
2017-01-19  1:41 ` [PATCH 1/5] perf, tools: Add probing for xed Andi Kleen
2017-01-19  1:41 ` [PATCH 2/5] perf, tools: Add one liner warning for disabled features Andi Kleen
2017-01-19  1:41 ` [PATCH 3/5] perf, tools: Add disassembler for x86 using the XED library Andi Kleen
2017-01-19  1:41 ` [PATCH 4/5] perf, tools, script: Add support for printing assembler Andi Kleen
2017-01-23 19:49   ` Arnaldo Carvalho de Melo
2017-01-23 19:55     ` Andi Kleen
2017-01-23 20:06       ` Arnaldo Carvalho de Melo
2017-01-19  1:41 ` [PATCH 5/5] perf, tools, script: Add brstackasm output for branch stacks Andi Kleen
2017-01-24 18:54   ` Arnaldo Carvalho de Melo
2017-01-19 15:36 ` New attempt at adding an disassembler to perf Jiri Olsa
2017-01-19 16:54   ` Andi Kleen
2017-01-20 13:22 ` Jiri Olsa
  -- strict thread matches above, loose matches on Subject: below --
2017-01-10  1:02 New attempt at adding an disassembler to perf v2 Andi Kleen
2017-01-10  1:02 ` [PATCH 3/5] perf, tools: Add disassembler for x86 using the XED library Andi Kleen

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.