All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 0/3] perf report: Implement visual marker for macro fusion in annotate
@ 2017-06-19  2:55 Jin Yao
  2017-06-19  2:55 ` [PATCH v2 1/3] perf util: Return arch from symbol__disassemble and save it in browser Jin Yao
                   ` (2 more replies)
  0 siblings, 3 replies; 10+ messages in thread
From: Jin Yao @ 2017-06-19  2:55 UTC (permalink / raw)
  To: acme, jolsa, peterz, mingo, alexander.shishkin
  Cc: Linux-kernel, ak, kan.liang, yao.jin, Jin Yao

Macro fusion merges two instructions to a single micro-op. Intel
core platform performs this hardware optimization under limited
circumstances. For example, CMP + JCC can be "fused" and executed
/retired together. While with sampling this can result in the
sample sometimes being on the JCC and sometimes on the CMP.
So for the fused instruction pair, they could be considered
together.

In general, the fused instruction pairs are:

cmp/test/add/sub/and/inc/dec + jcc.

This patch series marks the case clearly by joining the fused
instruction pair in the arrow of the jump.

For example:

       │   ┌──cmpl   $0x0,argp_program_version_hook
 81.93 │   │──je     20
       │   │  lock   cmpxchg %esi,0x38a9a4(%rip)
       │   │↓ jne    29
       │   │↓ jmp    43
 11.47 │20:└─→cmpxch %esi,0x38a999(%rip)

Change-log:
-----------
v2: According to Arnaldo's comments, remove the weak function and
    use an arch-specific function instead to check fused instruction
    pair.

v1: Inital post

Jin Yao (3):
  perf util: Return arch from symbol__disassemble and save it in browser
  perf util: Check for fused instruction
  perf report: Implement visual marker for macro fusion in annotate

 tools/perf/arch/x86/annotate/instructions.c | 18 ++++++++++++++++
 tools/perf/builtin-top.c                    |  2 +-
 tools/perf/ui/browser.c                     | 27 ++++++++++++++++++++++++
 tools/perf/ui/browser.h                     |  2 ++
 tools/perf/ui/browsers/annotate.c           | 32 ++++++++++++++++++++++++++++-
 tools/perf/ui/gtk/annotate.c                |  3 ++-
 tools/perf/util/annotate.c                  | 25 ++++++++++++++++++++--
 tools/perf/util/annotate.h                  |  6 +++++-
 8 files changed, 109 insertions(+), 6 deletions(-)

-- 
2.7.4

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH v2 1/3] perf util: Return arch from symbol__disassemble and save it in browser
  2017-06-19  2:55 [PATCH v2 0/3] perf report: Implement visual marker for macro fusion in annotate Jin Yao
@ 2017-06-19  2:55 ` Jin Yao
  2017-06-20  9:02   ` [tip:perf/core] perf annotate: Return arch from symbol__disassemble() " tip-bot for Jin Yao
  2017-06-19  2:55 ` [PATCH v2 2/3] perf util: Check for fused instruction Jin Yao
  2017-06-19  2:55 ` [PATCH v2 3/3] perf report: Implement visual marker for macro fusion in annotate Jin Yao
  2 siblings, 1 reply; 10+ messages in thread
From: Jin Yao @ 2017-06-19  2:55 UTC (permalink / raw)
  To: acme, jolsa, peterz, mingo, alexander.shishkin
  Cc: Linux-kernel, ak, kan.liang, yao.jin, Jin Yao

In annotate browser, we will add support to check fused instructions.
While this is x86-specific feature so we need the annotate browser
to know what the arch it runs on.

symbol__disassemble has figured out the arch. This patch just lets
the arch return from symbol__disassemble and save the arch in
annotate browser.

Change-log:
-----------
This is a new function in v2 series.

Signed-off-by: Jin Yao <yao.jin@linux.intel.com>
---
 tools/perf/builtin-top.c          |  2 +-
 tools/perf/ui/browsers/annotate.c |  6 +++++-
 tools/perf/ui/gtk/annotate.c      |  3 ++-
 tools/perf/util/annotate.c        | 10 ++++++++--
 tools/perf/util/annotate.h        |  4 +++-
 5 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 10b6362..2bcfa46 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -134,7 +134,7 @@ static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he)
 		return err;
 	}
 
-	err = symbol__disassemble(sym, map, NULL, 0);
+	err = symbol__disassemble(sym, map, NULL, 0, NULL);
 	if (err == 0) {
 out_assign:
 		top->sym_filter_entry = he;
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 7a03389..27f41f2 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -46,12 +46,15 @@ static struct annotate_browser_opt {
 	.jump_arrows	= true,
 };
 
+struct arch;
+
 struct annotate_browser {
 	struct ui_browser b;
 	struct rb_root	  entries;
 	struct rb_node	  *curr_hot;
 	struct disasm_line  *selection;
 	struct disasm_line  **offsets;
+	struct arch	    *arch;
 	int		    nr_events;
 	u64		    start;
 	int		    nr_asm_entries;
@@ -1070,7 +1073,8 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map,
 		  (nr_pcnt - 1);
 	}
 
-	err = symbol__disassemble(sym, map, perf_evsel__env_arch(evsel), sizeof_bdl);
+	err = symbol__disassemble(sym, map, perf_evsel__env_arch(evsel),
+				  sizeof_bdl, &browser.arch);
 	if (err) {
 		char msg[BUFSIZ];
 		symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg));
diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c
index e99ba86..d903fd4 100644
--- a/tools/perf/ui/gtk/annotate.c
+++ b/tools/perf/ui/gtk/annotate.c
@@ -168,7 +168,8 @@ static int symbol__gtk_annotate(struct symbol *sym, struct map *map,
 	if (map->dso->annotate_warned)
 		return -1;
 
-	err = symbol__disassemble(sym, map, perf_evsel__env_arch(evsel), 0);
+	err = symbol__disassemble(sym, map, perf_evsel__env_arch(evsel),
+				  0, NULL);
 	if (err) {
 		char msg[BUFSIZ];
 		symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg));
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 1367d7e..f21a105 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -1371,7 +1371,9 @@ static const char *annotate__norm_arch(const char *arch_name)
 	return normalize_arch((char *)arch_name);
 }
 
-int symbol__disassemble(struct symbol *sym, struct map *map, const char *arch_name, size_t privsize)
+int symbol__disassemble(struct symbol *sym, struct map *map,
+			const char *arch_name, size_t privsize,
+			struct arch **parch)
 {
 	struct dso *dso = map->dso;
 	char command[PATH_MAX * 2];
@@ -1397,6 +1399,9 @@ int symbol__disassemble(struct symbol *sym, struct map *map, const char *arch_na
 	if (arch == NULL)
 		return -ENOTSUP;
 
+	if (parch)
+		*parch = arch;
+
 	if (arch->init) {
 		err = arch->init(arch);
 		if (err) {
@@ -1914,7 +1919,8 @@ int symbol__tty_annotate(struct symbol *sym, struct map *map,
 	struct rb_root source_line = RB_ROOT;
 	u64 len;
 
-	if (symbol__disassemble(sym, map, perf_evsel__env_arch(evsel), 0) < 0)
+	if (symbol__disassemble(sym, map, perf_evsel__env_arch(evsel),
+				0, NULL) < 0)
 		return -1;
 
 	len = symbol__size(sym);
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 948aa8e..2105503 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -158,7 +158,9 @@ int hist_entry__inc_addr_samples(struct hist_entry *he, int evidx, u64 addr);
 int symbol__alloc_hist(struct symbol *sym);
 void symbol__annotate_zero_histograms(struct symbol *sym);
 
-int symbol__disassemble(struct symbol *sym, struct map *map, const char *arch_name, size_t privsize);
+int symbol__disassemble(struct symbol *sym, struct map *map,
+			const char *arch_name, size_t privsize,
+			struct arch **parch);
 
 enum symbol_disassemble_errno {
 	SYMBOL_ANNOTATE_ERRNO__SUCCESS		= 0,
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH v2 2/3] perf util: Check for fused instruction
  2017-06-19  2:55 [PATCH v2 0/3] perf report: Implement visual marker for macro fusion in annotate Jin Yao
  2017-06-19  2:55 ` [PATCH v2 1/3] perf util: Return arch from symbol__disassemble and save it in browser Jin Yao
@ 2017-06-19  2:55 ` Jin Yao
  2017-06-19  2:55 ` [PATCH v2 3/3] perf report: Implement visual marker for macro fusion in annotate Jin Yao
  2 siblings, 0 replies; 10+ messages in thread
From: Jin Yao @ 2017-06-19  2:55 UTC (permalink / raw)
  To: acme, jolsa, peterz, mingo, alexander.shishkin
  Cc: Linux-kernel, ak, kan.liang, yao.jin, Jin Yao

Macro fusion merges two instructions to a single micro-op. Intel
core platform performs this hardware optimization under limited
circumstances.

For example, CMP + JCC can be "fused" and executed /retired
together. While with sampling this can result in the sample
sometimes being on the JCC and sometimes on the CMP.
So for the fused instruction pair, they could be considered
together.

In general, the fused instruction pairs are:

cmp/test/add/sub/and/inc/dec + jcc.

This patch adds an x86-specific function which checks if 2
instructions are in a "fused" pair. For non-x86 arch, the
function is just NULL.

Change-log:
-----------
v2: Remove the origial weak function. Arnaldo points out
    that doing it as a weak function that will be overridden
    by the host arch doesn't work. So now it's implemented
    as an arch-specific function.

v1: Initial post

Signed-off-by: Jin Yao <yao.jin@linux.intel.com>
---
 tools/perf/arch/x86/annotate/instructions.c | 18 ++++++++++++++++++
 tools/perf/util/annotate.c                  | 10 ++++++++++
 tools/perf/util/annotate.h                  |  1 +
 3 files changed, 29 insertions(+)

diff --git a/tools/perf/arch/x86/annotate/instructions.c b/tools/perf/arch/x86/annotate/instructions.c
index c1625f2..bc6272c 100644
--- a/tools/perf/arch/x86/annotate/instructions.c
+++ b/tools/perf/arch/x86/annotate/instructions.c
@@ -76,3 +76,21 @@ static struct ins x86__instructions[] = {
 	{ .name = "xbeginq",	.ops = &jump_ops, },
 	{ .name = "retq",	.ops = &ret_ops,  },
 };
+
+static bool x86__ins_is_fused(const char *ins1, const char *ins2)
+{
+	if (strstr(ins2, "jmp"))
+		return false;
+
+	if ((strstr(ins1, "cmp") && !strstr(ins1, "xchg")) ||
+	     strstr(ins1, "test") ||
+	     strstr(ins1, "add") ||
+	     strstr(ins1, "sub") ||
+	     strstr(ins1, "and") ||
+	     strstr(ins1, "inc") ||
+	     strstr(ins1, "dec")) {
+		return true;
+	}
+
+	return false;
+}
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index f21a105..bb40158 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -48,6 +48,7 @@ struct arch {
 	bool		initialized;
 	void		*priv;
 	int		(*init)(struct arch *arch);
+	bool		(*ins_is_fused)(const char *insn1, const char *insn2);
 	struct		{
 		char comment_char;
 		char skip_functions_char;
@@ -129,6 +130,7 @@ static struct arch architectures[] = {
 		.name = "x86",
 		.instructions = x86__instructions,
 		.nr_instructions = ARRAY_SIZE(x86__instructions),
+		.ins_is_fused = x86__ins_is_fused,
 		.objdump =  {
 			.comment_char = '#',
 		},
@@ -171,6 +173,14 @@ int ins__scnprintf(struct ins *ins, char *bf, size_t size,
 	return ins__raw_scnprintf(ins, bf, size, ops);
 }
 
+bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2)
+{
+	if (!arch || !arch->ins_is_fused)
+		return false;
+
+	return arch->ins_is_fused(ins1, ins2);
+}
+
 static int call__parse(struct arch *arch, struct ins_operands *ops, struct map *map)
 {
 	char *endptr, *tok, *name;
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 2105503..eb6ec40 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -53,6 +53,7 @@ bool ins__is_jump(const struct ins *ins);
 bool ins__is_call(const struct ins *ins);
 bool ins__is_ret(const struct ins *ins);
 int ins__scnprintf(struct ins *ins, char *bf, size_t size, struct ins_operands *ops);
+bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2);
 
 struct annotation;
 
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH v2 3/3] perf report: Implement visual marker for macro fusion in annotate
  2017-06-19  2:55 [PATCH v2 0/3] perf report: Implement visual marker for macro fusion in annotate Jin Yao
  2017-06-19  2:55 ` [PATCH v2 1/3] perf util: Return arch from symbol__disassemble and save it in browser Jin Yao
  2017-06-19  2:55 ` [PATCH v2 2/3] perf util: Check for fused instruction Jin Yao
@ 2017-06-19  2:55 ` Jin Yao
  2017-06-19 17:35   ` Arnaldo Carvalho de Melo
  2 siblings, 1 reply; 10+ messages in thread
From: Jin Yao @ 2017-06-19  2:55 UTC (permalink / raw)
  To: acme, jolsa, peterz, mingo, alexander.shishkin
  Cc: Linux-kernel, ak, kan.liang, yao.jin, Jin Yao

For marking the fused instructions clearly, This patch adds a
line before the first instruction of pair and joins it with the
arrow of the jump.

For example, when je is selected in annotate view, the line
before cmpl is displayed and joins the arrow of je.

       │   ┌──cmpl   $0x0,argp_program_version_hook
 81.93 │   │──je     20
       │   │  lock   cmpxchg %esi,0x38a9a4(%rip)
       │   │↓ jne    29
       │   │↓ jmp    43
 11.47 │20:└─→cmpxch %esi,0x38a999(%rip)

That means the cmpl+je is fused instruction pair and they should
be considered together.

Change-log:
----------
v2: No more changes, just uses a new function "ins__is_fused"
    to check if the instructions are fused.

v1: Initial post

Signed-off-by: Jin Yao <yao.jin@linux.intel.com>
---
 tools/perf/ui/browser.c           | 27 +++++++++++++++++++++++++++
 tools/perf/ui/browser.h           |  2 ++
 tools/perf/ui/browsers/annotate.c | 26 ++++++++++++++++++++++++++
 tools/perf/util/annotate.c        |  5 +++++
 tools/perf/util/annotate.h        |  1 +
 5 files changed, 61 insertions(+)

diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c
index a4d3762..acba636 100644
--- a/tools/perf/ui/browser.c
+++ b/tools/perf/ui/browser.c
@@ -738,6 +738,33 @@ void __ui_browser__line_arrow(struct ui_browser *browser, unsigned int column,
 		__ui_browser__line_arrow_down(browser, column, start, end);
 }
 
+void ui_browser__mark_fused(struct ui_browser *browser, unsigned int column,
+			    unsigned int row, bool arrow_down)
+{
+	unsigned int end_row;
+
+	if (row >= browser->top_idx)
+		end_row = row - browser->top_idx;
+	else
+		return;
+
+	SLsmg_set_char_set(1);
+
+	if (arrow_down) {
+		ui_browser__gotorc(browser, end_row, column - 1);
+		SLsmg_write_char(SLSMG_ULCORN_CHAR);
+		ui_browser__gotorc(browser, end_row, column);
+		SLsmg_draw_hline(2);
+		ui_browser__gotorc(browser, end_row + 1, column - 1);
+		SLsmg_draw_vline(1);
+	} else {
+		ui_browser__gotorc(browser, end_row, column);
+		SLsmg_draw_hline(2);
+	}
+
+	SLsmg_set_char_set(0);
+}
+
 void ui_browser__init(void)
 {
 	int i = 0;
diff --git a/tools/perf/ui/browser.h b/tools/perf/ui/browser.h
index be3b70e..a12eff7 100644
--- a/tools/perf/ui/browser.h
+++ b/tools/perf/ui/browser.h
@@ -43,6 +43,8 @@ void ui_browser__printf(struct ui_browser *browser, const char *fmt, ...);
 void ui_browser__write_graph(struct ui_browser *browser, int graph);
 void __ui_browser__line_arrow(struct ui_browser *browser, unsigned int column,
 			      u64 start, u64 end);
+void ui_browser__mark_fused(struct ui_browser *browser, unsigned int column,
+			    unsigned int row, bool arrow_down);
 void __ui_browser__show_title(struct ui_browser *browser, const char *title);
 void ui_browser__show_title(struct ui_browser *browser, const char *title);
 int ui_browser__show(struct ui_browser *browser, const char *title,
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 27f41f2..89f54c4 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -272,6 +272,25 @@ static bool disasm_line__is_valid_jump(struct disasm_line *dl, struct symbol *sy
 	return true;
 }
 
+static bool is_fused(struct annotate_browser *ab, struct disasm_line *cursor)
+{
+	struct disasm_line *pos = list_prev_entry(cursor, node);
+	const char *name;
+
+	if (!pos)
+		return false;
+
+	if (ins__is_lock(&pos->ins))
+		name = pos->ops.locked.ins.name;
+	else
+		name = pos->ins.name;
+
+	if (!name || !cursor->ins.name)
+		return false;
+
+	return ins__is_fused(ab->arch, name, cursor->ins.name);
+}
+
 static void annotate_browser__draw_current_jump(struct ui_browser *browser)
 {
 	struct annotate_browser *ab = container_of(browser, struct annotate_browser, b);
@@ -307,6 +326,13 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser)
 	ui_browser__set_color(browser, HE_COLORSET_JUMP_ARROWS);
 	__ui_browser__line_arrow(browser, pcnt_width + 2 + ab->addr_width,
 				 from, to);
+
+	if (is_fused(ab, cursor)) {
+		ui_browser__mark_fused(browser,
+				       pcnt_width + 3 + ab->addr_width,
+				       from - 1,
+				       to > from ? true : false);
+	}
 }
 
 static unsigned int annotate_browser__refresh(struct ui_browser *browser)
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index bb40158..bbbcf91 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -512,6 +512,11 @@ bool ins__is_ret(const struct ins *ins)
 	return ins->ops == &ret_ops;
 }
 
+bool ins__is_lock(const struct ins *ins)
+{
+	return ins->ops == &lock_ops;
+}
+
 static int ins__key_cmp(const void *name, const void *insp)
 {
 	const struct ins *ins = insp;
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index eb6ec40..a3f2173 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -52,6 +52,7 @@ struct ins_ops {
 bool ins__is_jump(const struct ins *ins);
 bool ins__is_call(const struct ins *ins);
 bool ins__is_ret(const struct ins *ins);
+bool ins__is_lock(const struct ins *ins);
 int ins__scnprintf(struct ins *ins, char *bf, size_t size, struct ins_operands *ops);
 bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2);
 
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 3/3] perf report: Implement visual marker for macro fusion in annotate
  2017-06-19  2:55 ` [PATCH v2 3/3] perf report: Implement visual marker for macro fusion in annotate Jin Yao
@ 2017-06-19 17:35   ` Arnaldo Carvalho de Melo
  2017-06-19 19:13     ` Arnaldo Carvalho de Melo
  2017-06-20  1:25     ` Jin, Yao
  0 siblings, 2 replies; 10+ messages in thread
From: Arnaldo Carvalho de Melo @ 2017-06-19 17:35 UTC (permalink / raw)
  To: Jin Yao
  Cc: Jiri Olsa, Peter Zijlstra, mingo, alexander.shishkin,
	linux-kernel, ak, kan.liang, yao.jin

Em Mon, Jun 19, 2017 at 10:55:58AM +0800, Jin Yao escreveu:
> For marking the fused instructions clearly, This patch adds a
> line before the first instruction of pair and joins it with the
> arrow of the jump.
> 
> For example, when je is selected in annotate view, the line
> before cmpl is displayed and joins the arrow of je.
> 
>        │   ┌──cmpl   $0x0,argp_program_version_hook
>  81.93 │   │──je     20
>        │   │  lock   cmpxchg %esi,0x38a9a4(%rip)
>        │   │↓ jne    29
>        │   │↓ jmp    43
>  11.47 │20:└─→cmpxch %esi,0x38a999(%rip)

Ok, thanks for making this per-arch! Some comments:

I think we should have this marked permanently, i.e. not just when we go
to the jump line, something like this (testing here in a t450s
broadwell, function hc_find_func, /usr/lib64/liblzma.so.5.2.2):

It is like this now, when we are not on the jne jump line:

  0.71 │       mov    %r14d,%r10d                                                                                                                            ▒
       │       movzbl (%rdx,%r10,1),%ebp                                                                                                                     ▒
  1.06 │ 70:   mov    (%r9,%rcx,4),%ecx                                                                                                                      ◆
 77.98 │ 74:   cmp    %bpl,(%rbx,%r10,1)                                                                                                                     ▒
       │     ↑ jne    70                                                                                                                                     ▒
  0.85 │       movzbl (%rdx),%r10d                                                                                                                           ▒
  0.99 │       cmp    %r10b,(%rbx)                                                                                                                           ▒

I think it should be augmented to:

  0.71 │       mov    %r14d,%r10d                                                                                                                            ▒
       │       movzbl (%rdx,%r10,1),%ebp                                                                                                                     ▒
  1.06 │ 70: ┌─mov    (%r9,%rcx,4),%ecx                                                                                                                      ◆
 77.98 │ 74: └─cmp    %bpl,(%rbx,%r10,1)                                                                                                                     ▒
       │     ↑ jne    70                                                                                                                                     ▒
  0.85 │       movzbl (%rdx),%r10d                                                                                                                           ▒
  0.99 │       cmp    %r10b,(%rbx)                                                                                                                           ▒

I.e. no arrow, the two instructions that end up as one micro-op being
connected.

And then this:

        │   ┌──cmpl   $0x0,argp_program_version_hook
  81.93 │   │──je     20
        │   │  lock   cmpxchg %esi,0x38a9a4(%rip)
        │   │↓ jne    29
        │   │↓ jmp    43
  11.47 │20:└─→cmpxch %esi,0x38a999(%rip)

Would look better as:

        │   ┌──cmpl   $0x0,argp_program_version_hook
  81.93 │   ├──je     20
        │   │  lock   cmpxchg %esi,0x38a9a4(%rip)
        │   │↓ jne    29
        │   │↓ jmp    43
  11.47 │20:└─→cmpxch %esi,0x38a999(%rip)

Patch below, please test/ack :-)

This was the low hanging fruit, having the:

  1.06 │ 70: ┌─mov    (%r9,%rcx,4),%ecx                                                                                                                      ◆
 77.98 │ 74: └─cmp    %bpl,(%rbx,%r10,1)                                                                                                                     ▒

Marker always there, not just when we have the cursor on top of one of
those lines remains to be coded.

But you state:

 ------------
    Macro fusion merges two instructions to a single micro-op. Intel core
    platform performs this hardware optimization under limited
    circumstances.
 ------------

"Intel core", what about older arches, etc, don't you have to look at:

# cpudesc : Intel(R) Core(TM) i7-5600U CPU @ 2.60GHz
# cpuid : GenuineIntel,6,61,4

present in the perf.data header (or in the running system, for things
like 'perf top') to make sure that this is a machine where such "macro
fusion" takes place?

- Arnaldo

diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c
index acba636bd165..9ef7677ae14f 100644
--- a/tools/perf/ui/browser.c
+++ b/tools/perf/ui/browser.c
@@ -756,8 +756,10 @@ void ui_browser__mark_fused(struct ui_browser *browser, unsigned int column,
 		ui_browser__gotorc(browser, end_row, column);
 		SLsmg_draw_hline(2);
 		ui_browser__gotorc(browser, end_row + 1, column - 1);
-		SLsmg_draw_vline(1);
+		SLsmg_write_char(SLSMG_LTEE_CHAR);
 	} else {
+		ui_browser__gotorc(browser, end_row, column - 1);
+		SLsmg_write_char(SLSMG_LTEE_CHAR);
 		ui_browser__gotorc(browser, end_row, column);
 		SLsmg_draw_hline(2);
 	}

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 3/3] perf report: Implement visual marker for macro fusion in annotate
  2017-06-19 17:35   ` Arnaldo Carvalho de Melo
@ 2017-06-19 19:13     ` Arnaldo Carvalho de Melo
  2017-06-20  1:25     ` Jin, Yao
  1 sibling, 0 replies; 10+ messages in thread
From: Arnaldo Carvalho de Melo @ 2017-06-19 19:13 UTC (permalink / raw)
  To: Jin Yao
  Cc: Jiri Olsa, Peter Zijlstra, mingo, alexander.shishkin,
	linux-kernel, ak, kan.liang, yao.jin

Em Mon, Jun 19, 2017 at 02:35:29PM -0300, Arnaldo Carvalho de Melo escreveu:
> Em Mon, Jun 19, 2017 at 10:55:58AM +0800, Jin Yao escreveu:
> 
> Marker always there, not just when we have the cursor on top of one of
> those lines remains to be coded.
> 
> But you state:
> 
>  ------------
>     Macro fusion merges two instructions to a single micro-op. Intel core
>     platform performs this hardware optimization under limited
>     circumstances.
>  ------------
> 
> "Intel core", what about older arches, etc, don't you have to look at:
> 
> # cpudesc : Intel(R) Core(TM) i7-5600U CPU @ 2.60GHz
> # cpuid : GenuineIntel,6,61,4
> 
> present in the perf.data header (or in the running system, for things
> like 'perf top') to make sure that this is a machine where such "macro
> fusion" takes place?

Ok, I have the patches that need this discussion to get to a conclusion
on a separate patch, tmp.perf/annotate, the first patch, the one that
returns the 'struct arch' for the browser to use arch specific stuff is
in perf/core and can go to Ingo now.

- Arnaldo

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 3/3] perf report: Implement visual marker for macro fusion in annotate
  2017-06-19 17:35   ` Arnaldo Carvalho de Melo
  2017-06-19 19:13     ` Arnaldo Carvalho de Melo
@ 2017-06-20  1:25     ` Jin, Yao
  2017-06-20  1:37       ` Arnaldo Carvalho de Melo
  1 sibling, 1 reply; 10+ messages in thread
From: Jin, Yao @ 2017-06-20  1:25 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: Jiri Olsa, Peter Zijlstra, mingo, alexander.shishkin,
	linux-kernel, ak, kan.liang, yao.jin


> Ok, thanks for making this per-arch! Some comments:
>
> I think we should have this marked permanently, i.e. not just when we go
> to the jump line, something like this (testing here in a t450s
> broadwell, function hc_find_func, /usr/lib64/liblzma.so.5.2.2):
>
> It is like this now, when we are not on the jne jump line:
>
>    0.71 │       mov    %r14d,%r10d                                                                                                                            ▒
>         │       movzbl (%rdx,%r10,1),%ebp                                                                                                                     ▒
>    1.06 │ 70:   mov    (%r9,%rcx,4),%ecx                                                                                                                      ◆
>   77.98 │ 74:   cmp    %bpl,(%rbx,%r10,1)                                                                                                                     ▒
>         │     ↑ jne    70                                                                                                                                     ▒
>    0.85 │       movzbl (%rdx),%r10d                                                                                                                           ▒
>    0.99 │       cmp    %r10b,(%rbx)                                                                                                                           ▒
>
> I think it should be augmented to:
>
>    0.71 │       mov    %r14d,%r10d                                                                                                                            ▒
>         │       movzbl (%rdx,%r10,1),%ebp                                                                                                                     ▒
>    1.06 │ 70: ┌─mov    (%r9,%rcx,4),%ecx                                                                                                                      ◆
>   77.98 │ 74: └─cmp    %bpl,(%rbx,%r10,1)                                                                                                                     ▒
>         │     ↑ jne    70                                                                                                                                     ▒
>    0.85 │       movzbl (%rdx),%r10d                                                                                                                           ▒
>    0.99 │       cmp    %r10b,(%rbx)                                                                                                                           ▒
>
> I.e. no arrow, the two instructions that end up as one micro-op being
> connected.

The fused instruction pairs are:
cmp + jcc
test + jcc
add + jcc
sub + jcc
and + jcc
inc + jcc
dec + jcc

Mov and cmp are not the fused instruction pair. So we don't need to 
connect mov and cmp. I guess what Arnaldo wants is to connect two fused 
instructions even we don't go to the jcc line. For example: a line is 
connected between cmp and jne in above case.

I have thought about that. While the visualization may be not very good 
because the original arrow before jne would be overwritten. So now I 
just implement a way that joins the jump arrow when we go to the jcc 
line. Another consideration is the fused instruction pairs are very 
common instructions in code, if we mark them all, there may be too much.

> And then this:
>
>          │   ┌──cmpl   $0x0,argp_program_version_hook
>    81.93 │   │──je     20
>          │   │  lock   cmpxchg %esi,0x38a9a4(%rip)
>          │   │↓ jne    29
>          │   │↓ jmp    43
>    11.47 │20:└─→cmpxch %esi,0x38a999(%rip)
>
> Would look better as:
>
>          │   ┌──cmpl   $0x0,argp_program_version_hook
>    81.93 │   ├──je     20
>          │   │  lock   cmpxchg %esi,0x38a9a4(%rip)
>          │   │↓ jne    29
>          │   │↓ jmp    43
>    11.47 │20:└─→cmpxch %esi,0x38a999(%rip)
>
> Patch below, please test/ack :-)

I have tested. It's better! There is no space in the line. Thanks!

> This was the low hanging fruit, having the:
>
>    1.06 │ 70: ┌─mov    (%r9,%rcx,4),%ecx                                                                                                                      ◆
>   77.98 │ 74: └─cmp    %bpl,(%rbx,%r10,1)                                                                                                                     ▒
>
> Marker always there, not just when we have the cursor on top of one of
> those lines remains to be coded.

My comment is as above.

> But you state:
>
>   ------------
>      Macro fusion merges two instructions to a single micro-op. Intel core
>      platform performs this hardware optimization under limited
>      circumstances.
>   ------------
>
> "Intel core", what about older arches, etc, don't you have to look at:
>
> # cpudesc : Intel(R) Core(TM) i7-5600U CPU @ 2.60GHz
> # cpuid : GenuineIntel,6,61,4
>
> present in the perf.data header (or in the running system, for things
> like 'perf top') to make sure that this is a machine where such "macro
> fusion" takes place?
>
> - Arnaldo

Reference for macro fusion is the optimization guide,
http://www.intel.com/content/www/us/en/architecture-and-technology/64-ia-32-architectures-optimization-manual.html
2.3.2.1
— In Intel microarchitecture code name Nehalem: CMP, TEST.
— In Intel microarchitecture code name Sandy Bridge: CMP, TEST, ADD, 
SUB, AND, INC, DEC
— These instructions can fuse if The first source / destination operand 
is a register.

The second source operand (if exists) is one of: immediate, register, or 
non RIP-relative memory.
The second instruction of the macro-fusable pair is a conditional branch.

We probably don't need the full rules, just a simple test for 
CMP/TEST/ADD/SUB/AND/INC/DEC and second instruction a Jcc condition 
branch. Also I don't think we need to distinguish Nehalem/Sandy Bridge 
and other core platforms. A simple test may be acceptable.

Thanks!
Jin Yao

> diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c
> index acba636bd165..9ef7677ae14f 100644
> --- a/tools/perf/ui/browser.c
> +++ b/tools/perf/ui/browser.c
> @@ -756,8 +756,10 @@ void ui_browser__mark_fused(struct ui_browser *browser, unsigned int column,
>   		ui_browser__gotorc(browser, end_row, column);
>   		SLsmg_draw_hline(2);
>   		ui_browser__gotorc(browser, end_row + 1, column - 1);
> -		SLsmg_draw_vline(1);
> +		SLsmg_write_char(SLSMG_LTEE_CHAR);
>   	} else {
> +		ui_browser__gotorc(browser, end_row, column - 1);
> +		SLsmg_write_char(SLSMG_LTEE_CHAR);
>   		ui_browser__gotorc(browser, end_row, column);
>   		SLsmg_draw_hline(2);
>   	}

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 3/3] perf report: Implement visual marker for macro fusion in annotate
  2017-06-20  1:25     ` Jin, Yao
@ 2017-06-20  1:37       ` Arnaldo Carvalho de Melo
  2017-06-20  1:54         ` Jin, Yao
  0 siblings, 1 reply; 10+ messages in thread
From: Arnaldo Carvalho de Melo @ 2017-06-20  1:37 UTC (permalink / raw)
  To: Jin, Yao
  Cc: Jiri Olsa, Peter Zijlstra, mingo, alexander.shishkin,
	linux-kernel, ak, kan.liang, yao.jin

Em Tue, Jun 20, 2017 at 09:25:35AM +0800, Jin, Yao escreveu:
> 
> > Ok, thanks for making this per-arch! Some comments:
> > 
> > I think we should have this marked permanently, i.e. not just when we go
> > to the jump line, something like this (testing here in a t450s
> > broadwell, function hc_find_func, /usr/lib64/liblzma.so.5.2.2):
> > 
> > It is like this now, when we are not on the jne jump line:
> > 
> >    0.71 │       mov    %r14d,%r10d                                                                                                                            ▒
> >         │       movzbl (%rdx,%r10,1),%ebp                                                                                                                     ▒
> >    1.06 │ 70:   mov    (%r9,%rcx,4),%ecx                                                                                                                      ◆
> >   77.98 │ 74:   cmp    %bpl,(%rbx,%r10,1)                                                                                                                     ▒
> >         │     ↑ jne    70                                                                                                                                     ▒
> >    0.85 │       movzbl (%rdx),%r10d                                                                                                                           ▒
> >    0.99 │       cmp    %r10b,(%rbx)                                                                                                                           ▒
> > 
> > I think it should be augmented to:
> > 
> >    0.71 │       mov    %r14d,%r10d                                                                                                                            ▒
> >         │       movzbl (%rdx,%r10,1),%ebp                                                                                                                     ▒
> >    1.06 │ 70: ┌─mov    (%r9,%rcx,4),%ecx                                                                                                                      ◆
> >   77.98 │ 74: └─cmp    %bpl,(%rbx,%r10,1)                                                                                                                     ▒
> >         │     ↑ jne    70                                                                                                                                     ▒
> >    0.85 │       movzbl (%rdx),%r10d                                                                                                                           ▒
> >    0.99 │       cmp    %r10b,(%rbx)                                                                                                                           ▒
> > 
> > I.e. no arrow, the two instructions that end up as one micro-op being
> > connected.
> 
> The fused instruction pairs are:
> cmp + jcc
> test + jcc
> add + jcc
> sub + jcc
> and + jcc
> inc + jcc
> dec + jcc
> 
> Mov and cmp are not the fused instruction pair. So we don't need to connect

Right, my bad, what I was trying to say was to have a marker for fused
instructions, not just when we go with the cursor over it like with this
patchset.

> mov and cmp. I guess what Arnaldo wants is to connect two fused instructions
> even we don't go to the jcc line. For example: a line is connected between
> cmp and jne in above case.

Right
 
> I have thought about that. While the visualization may be not very good
> because the original arrow before jne would be overwritten. So now I just
> implement a way that joins the jump arrow when we go to the jcc line.
> Another consideration is the fused instruction pairs are very common
> instructions in code, if we mark them all, there may be too much.

perhaps

> > And then this:
> > 
> >          │   ┌──cmpl   $0x0,argp_program_version_hook
> >    81.93 │   │──je     20
> >          │   │  lock   cmpxchg %esi,0x38a9a4(%rip)
> >          │   │↓ jne    29
> >          │   │↓ jmp    43
> >    11.47 │20:└─→cmpxch %esi,0x38a999(%rip)
> > 
> > Would look better as:
> > 
> >          │   ┌──cmpl   $0x0,argp_program_version_hook
> >    81.93 │   ├──je     20
> >          │   │  lock   cmpxchg %esi,0x38a9a4(%rip)
> >          │   │↓ jne    29
> >          │   │↓ jmp    43
> >    11.47 │20:└─→cmpxch %esi,0x38a999(%rip)
> > 
> > Patch below, please test/ack :-)
> 
> I have tested. It's better! There is no space in the line. Thanks!
> 
> > This was the low hanging fruit, having the:
> > 
> >    1.06 │ 70: ┌─mov    (%r9,%rcx,4),%ecx                                                                                                                      ◆
> >   77.98 │ 74: └─cmp    %bpl,(%rbx,%r10,1)                                                                                                                     ▒
> > 
> > Marker always there, not just when we have the cursor on top of one of
> > those lines remains to be coded.
> 
> My comment is as above.
> 
> > But you state:
> > 
> >   ------------
> >      Macro fusion merges two instructions to a single micro-op. Intel core
> >      platform performs this hardware optimization under limited
> >      circumstances.
> >   ------------
> > 
> > "Intel core", what about older arches, etc, don't you have to look at:
> > 
> > # cpudesc : Intel(R) Core(TM) i7-5600U CPU @ 2.60GHz
> > # cpuid : GenuineIntel,6,61,4
> > 
> > present in the perf.data header (or in the running system, for things
> > like 'perf top') to make sure that this is a machine where such "macro
> > fusion" takes place?
> > 
> > - Arnaldo
> 
> Reference for macro fusion is the optimization guide,
> http://www.intel.com/content/www/us/en/architecture-and-technology/64-ia-32-architectures-optimization-manual.html
> 2.3.2.1
> — In Intel microarchitecture code name Nehalem: CMP, TEST.
> — In Intel microarchitecture code name Sandy Bridge: CMP, TEST, ADD, SUB,
> AND, INC, DEC
> — These instructions can fuse if The first source / destination operand is a
> register.
> 
> The second source operand (if exists) is one of: immediate, register, or non
> RIP-relative memory.
> The second instruction of the macro-fusable pair is a conditional branch.
> 
> We probably don't need the full rules, just a simple test for
> CMP/TEST/ADD/SUB/AND/INC/DEC and second instruction a Jcc condition branch.
> Also I don't think we need to distinguish Nehalem/Sandy Bridge and other
> core platforms. A simple test may be acceptable.

Humm, then we need to make sure somehow that this may or may not be
happening, with the above rules and optimization guide URL and pages
mentioned in the documentation.

I think that as we improve the disassembler, the more precise we can go
the better. If we know that the machine is x86 _and_ Nehalem, then we
should do this fusing visual cue onlyu for CMP and TEST, etc.

- Arnaldo
 
> Thanks!
> Jin Yao
> 
> > diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c
> > index acba636bd165..9ef7677ae14f 100644
> > --- a/tools/perf/ui/browser.c
> > +++ b/tools/perf/ui/browser.c
> > @@ -756,8 +756,10 @@ void ui_browser__mark_fused(struct ui_browser *browser, unsigned int column,
> >   		ui_browser__gotorc(browser, end_row, column);
> >   		SLsmg_draw_hline(2);
> >   		ui_browser__gotorc(browser, end_row + 1, column - 1);
> > -		SLsmg_draw_vline(1);
> > +		SLsmg_write_char(SLSMG_LTEE_CHAR);
> >   	} else {
> > +		ui_browser__gotorc(browser, end_row, column - 1);
> > +		SLsmg_write_char(SLSMG_LTEE_CHAR);
> >   		ui_browser__gotorc(browser, end_row, column);
> >   		SLsmg_draw_hline(2);
> >   	}

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 3/3] perf report: Implement visual marker for macro fusion in annotate
  2017-06-20  1:37       ` Arnaldo Carvalho de Melo
@ 2017-06-20  1:54         ` Jin, Yao
  0 siblings, 0 replies; 10+ messages in thread
From: Jin, Yao @ 2017-06-20  1:54 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: Jiri Olsa, Peter Zijlstra, mingo, alexander.shishkin,
	linux-kernel, ak, kan.liang, yao.jin


>> Reference for macro fusion is the optimization guide,
>> http://www.intel.com/content/www/us/en/architecture-and-technology/64-ia-32-architectures-optimization-manual.html
>> 2.3.2.1
>> — In Intel microarchitecture code name Nehalem: CMP, TEST.
>> — In Intel microarchitecture code name Sandy Bridge: CMP, TEST, ADD, SUB,
>> AND, INC, DEC
>> — These instructions can fuse if The first source / destination operand is a
>> register.
>>
>> The second source operand (if exists) is one of: immediate, register, or non
>> RIP-relative memory.
>> The second instruction of the macro-fusable pair is a conditional branch.
>>
>> We probably don't need the full rules, just a simple test for
>> CMP/TEST/ADD/SUB/AND/INC/DEC and second instruction a Jcc condition branch.
>> Also I don't think we need to distinguish Nehalem/Sandy Bridge and other
>> core platforms. A simple test may be acceptable.
> Humm, then we need to make sure somehow that this may or may not be
> happening, with the above rules and optimization guide URL and pages
> mentioned in the documentation.
>
> I think that as we improve the disassembler, the more precise we can go
> the better. If we know that the machine is x86 _and_ Nehalem, then we
> should do this fusing visual cue onlyu for CMP and TEST, etc.
>
> - Arnaldo
>   

I will add checking for Nehalem (CMP, TEST). For other newer Intel CPUs 
just check it by default (CMP, TEST, ADD, SUB, AND, INC, DEC).

Thanks
Jin Yao

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [tip:perf/core] perf annotate: Return arch from symbol__disassemble() and save it in browser
  2017-06-19  2:55 ` [PATCH v2 1/3] perf util: Return arch from symbol__disassemble and save it in browser Jin Yao
@ 2017-06-20  9:02   ` tip-bot for Jin Yao
  0 siblings, 0 replies; 10+ messages in thread
From: tip-bot for Jin Yao @ 2017-06-20  9:02 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: kan.liang, mingo, jolsa, linux-kernel, acme, ak, yao.jin,
	alexander.shishkin, peterz, tglx, hpa

Commit-ID:  dcaa394807ac219d8597d25bad3fe1bc6c86123b
Gitweb:     http://git.kernel.org/tip/dcaa394807ac219d8597d25bad3fe1bc6c86123b
Author:     Jin Yao <yao.jin@linux.intel.com>
AuthorDate: Mon, 19 Jun 2017 10:55:56 +0800
Committer:  Arnaldo Carvalho de Melo <acme@redhat.com>
CommitDate: Mon, 19 Jun 2017 15:27:09 -0300

perf annotate: Return arch from symbol__disassemble() and save it in browser

In annotate browser, we will add support to check fused instructions.
While this is x86-specific feature so we need the annotate browser to
know what the arch it runs on.

symbol__disassemble() has figured out the arch. This patch just lets the
arch return from symbol__disassemble and save the arch in annotate
browser.

Signed-off-by: Yao Jin <yao.jin@linux.intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1497840958-4759-2-git-send-email-yao.jin@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-top.c          |  2 +-
 tools/perf/ui/browsers/annotate.c |  6 +++++-
 tools/perf/ui/gtk/annotate.c      |  3 ++-
 tools/perf/util/annotate.c        | 10 ++++++++--
 tools/perf/util/annotate.h        |  4 +++-
 5 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 10b6362..2bcfa46 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -134,7 +134,7 @@ static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he)
 		return err;
 	}
 
-	err = symbol__disassemble(sym, map, NULL, 0);
+	err = symbol__disassemble(sym, map, NULL, 0, NULL);
 	if (err == 0) {
 out_assign:
 		top->sym_filter_entry = he;
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 7a03389..27f41f2 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -46,12 +46,15 @@ static struct annotate_browser_opt {
 	.jump_arrows	= true,
 };
 
+struct arch;
+
 struct annotate_browser {
 	struct ui_browser b;
 	struct rb_root	  entries;
 	struct rb_node	  *curr_hot;
 	struct disasm_line  *selection;
 	struct disasm_line  **offsets;
+	struct arch	    *arch;
 	int		    nr_events;
 	u64		    start;
 	int		    nr_asm_entries;
@@ -1070,7 +1073,8 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map,
 		  (nr_pcnt - 1);
 	}
 
-	err = symbol__disassemble(sym, map, perf_evsel__env_arch(evsel), sizeof_bdl);
+	err = symbol__disassemble(sym, map, perf_evsel__env_arch(evsel),
+				  sizeof_bdl, &browser.arch);
 	if (err) {
 		char msg[BUFSIZ];
 		symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg));
diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c
index e99ba86..d903fd4 100644
--- a/tools/perf/ui/gtk/annotate.c
+++ b/tools/perf/ui/gtk/annotate.c
@@ -168,7 +168,8 @@ static int symbol__gtk_annotate(struct symbol *sym, struct map *map,
 	if (map->dso->annotate_warned)
 		return -1;
 
-	err = symbol__disassemble(sym, map, perf_evsel__env_arch(evsel), 0);
+	err = symbol__disassemble(sym, map, perf_evsel__env_arch(evsel),
+				  0, NULL);
 	if (err) {
 		char msg[BUFSIZ];
 		symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg));
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index ddbd56d..be1caab 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -1379,7 +1379,9 @@ static const char *annotate__norm_arch(const char *arch_name)
 	return normalize_arch((char *)arch_name);
 }
 
-int symbol__disassemble(struct symbol *sym, struct map *map, const char *arch_name, size_t privsize)
+int symbol__disassemble(struct symbol *sym, struct map *map,
+			const char *arch_name, size_t privsize,
+			struct arch **parch)
 {
 	struct dso *dso = map->dso;
 	char command[PATH_MAX * 2];
@@ -1405,6 +1407,9 @@ int symbol__disassemble(struct symbol *sym, struct map *map, const char *arch_na
 	if (arch == NULL)
 		return -ENOTSUP;
 
+	if (parch)
+		*parch = arch;
+
 	if (arch->init) {
 		err = arch->init(arch);
 		if (err) {
@@ -1901,7 +1906,8 @@ int symbol__tty_annotate(struct symbol *sym, struct map *map,
 	struct rb_root source_line = RB_ROOT;
 	u64 len;
 
-	if (symbol__disassemble(sym, map, perf_evsel__env_arch(evsel), 0) < 0)
+	if (symbol__disassemble(sym, map, perf_evsel__env_arch(evsel),
+				0, NULL) < 0)
 		return -1;
 
 	len = symbol__size(sym);
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 948aa8e..2105503 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -158,7 +158,9 @@ int hist_entry__inc_addr_samples(struct hist_entry *he, int evidx, u64 addr);
 int symbol__alloc_hist(struct symbol *sym);
 void symbol__annotate_zero_histograms(struct symbol *sym);
 
-int symbol__disassemble(struct symbol *sym, struct map *map, const char *arch_name, size_t privsize);
+int symbol__disassemble(struct symbol *sym, struct map *map,
+			const char *arch_name, size_t privsize,
+			struct arch **parch);
 
 enum symbol_disassemble_errno {
 	SYMBOL_ANNOTATE_ERRNO__SUCCESS		= 0,

^ permalink raw reply related	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2017-06-20  9:05 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-06-19  2:55 [PATCH v2 0/3] perf report: Implement visual marker for macro fusion in annotate Jin Yao
2017-06-19  2:55 ` [PATCH v2 1/3] perf util: Return arch from symbol__disassemble and save it in browser Jin Yao
2017-06-20  9:02   ` [tip:perf/core] perf annotate: Return arch from symbol__disassemble() " tip-bot for Jin Yao
2017-06-19  2:55 ` [PATCH v2 2/3] perf util: Check for fused instruction Jin Yao
2017-06-19  2:55 ` [PATCH v2 3/3] perf report: Implement visual marker for macro fusion in annotate Jin Yao
2017-06-19 17:35   ` Arnaldo Carvalho de Melo
2017-06-19 19:13     ` Arnaldo Carvalho de Melo
2017-06-20  1:25     ` Jin, Yao
2017-06-20  1:37       ` Arnaldo Carvalho de Melo
2017-06-20  1:54         ` Jin, Yao

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.