linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/1] perf tools: perf diff for different binaries
@ 2014-10-31 12:06 kan.liang
  2014-10-31 20:03 ` Andi Kleen
                   ` (2 more replies)
  0 siblings, 3 replies; 11+ messages in thread
From: kan.liang @ 2014-10-31 12:06 UTC (permalink / raw)
  To: acme, jolsa; +Cc: linux-kernel, andi, Kan Liang

From: Kan Liang <kan.liang@intel.com>

perf diff can display the differential profile between two perf.data
files. However, the perf.data files have to come from same binaries.

The patch introduced a key "symbol_name" for --sort option, so the user
can compare the perf.data files from different binaries, even different
kernel version. If symbol_name is set, perf diff display the
differential profile of functions.
It could help the user quickly locate the scaling regression caused by
new binaries/kernels.

Here is an example. I have two version of binaries. One is running on
kernel 3.16. The other is running on kernel 3.17.

Using perf record, I got two perf.data files, v1_1_6perf.data (version
one in 3.16) and v2_1_7perf.data (version two in 3.17).

If I run old perf diff with --sort dso,symbol, we cannot get any useful
information for the user space binary. Furthermore, it failed to compare
the kernel function. E.g. native_write_msr_safe.

perf diff -s dso,symbol --compute ratio v1_1_6perf.data v2_1_7perf.data

 Event 'cycles'

 Baseline           Ratio  Shared Object      Symbol
 ........  ..............  .................
....................................

                            [kernel.kallsyms]  [k]
__update_entity_load_avg_contrib
                            [kernel.kallsyms]  [k] _raw_spin_lock
                            [kernel.kallsyms]  [k] apic_timer_interrupt
                            [kernel.kallsyms]  [k] hrtimer_interrupt
     0.01%                  [kernel.kallsyms]  [k] native_write_msr_safe
                            [kernel.kallsyms]  [k] native_write_msr_safe
     0.01%                  [kernel.kallsyms]  [k] notifier_call_chain
     0.01%                  [kernel.kallsyms]  [k] perf_event_task_tick
     0.01%                  [kernel.kallsyms]  [k] run_posix_cpu_timers
     0.01%                  [kernel.kallsyms]  [k] run_timer_softirq
     0.01%                  [kernel.kallsyms]  [k] trigger_load_balance
     0.01%                  [kernel.kallsyms]  [k] update_vsyscall
     0.05%                  [unknown]          [.] 0x0000000000400540
     0.04%                  [unknown]          [.] 0x0000000000400541
     0.03%                  [unknown]          [.] 0x000000000040054b
     0.04%                  [unknown]          [.] 0x0000000000400552
    33.55%                  [unknown]          [.] 0x0000000000400554
     1.22%                  [unknown]          [.] 0x000000000040055a
     8.00%                  [unknown]          [.] 0x000000000040055e
     0.02%                  [unknown]          [.] 0x0000000000400562
     8.41%                  [unknown]          [.] 0x0000000000400564
    48.13%                  [unknown]          [.] 0x000000000040056b
     0.16%                  [unknown]          [.] 0x0000000000400570
     0.17%                  [unknown]          [.] 0x0000000000400571
                            [unknown]          [.] 0x0000000000400580
                            [unknown]          [.] 0x0000000000400581
     0.01%                  [unknown]          [.] 0x0000000000400583
     0.01%                  [unknown]          [.] 0x0000000000400588
                            [unknown]          [.] 0x000000000040058b
     0.01%     1240.990221  [unknown]          [.] 0x000000000040058d
                            [unknown]          [.] 0x0000000000400590
     0.06%                  [unknown]          [.] 0x0000000000400591
                            [unknown]          [.] 0x0000000000400593
     0.04%                  [unknown]          [.] 0x0000000000400595
     0.01%     1240.603148  [unknown]          [.] 0x0000000000400597
                            [unknown]          [.] 0x000000000040059b
                            [unknown]          [.] 0x000000000040059d
                            [unknown]          [.] 0x00000000004005a1
                            [unknown]          [.] 0x00000000004005a5
                            [unknown]          [.] 0x00000000004005a7
                            [unknown]          [.] 0x00000000004005a8
                            [unknown]          [.] 0x00000000004005aa
                            [unknown]          [.] 0x00000000004005ba
                            [unknown]          [.] 0x00000000004005bf
                            [unknown]          [.] 0x00000000004005c4
                            [unknown]          [.] 0x00000000004005c9
                            [unknown]          [.] 0x00000000004005ce
                            [unknown]          [.] 0x00000000004005d2
                            [unknown]          [.] 0x00000000004005d6
                            [unknown]          [.] 0x00000000004005d8
                            [unknown]          [.] 0x00000000004005f5

With the key "symbol_name", we can easily get the differential profile
from different binary versions and different kernel versions.

./perf diff -s dso,symbol_name --compute ratio v1_1_6perf.data v2_1_7perf.data

 Event 'cycles'

 Baseline           Ratio  Shared Object      Symbol Name
 ........  ..............  .................
....................................

                            [kernel.kallsyms]  [k]
__update_entity_load_avg_contrib
                            [kernel.kallsyms]  [k] _raw_spin_lock
                            [kernel.kallsyms]  [k] apic_timer_interrupt
                            [kernel.kallsyms]  [k] hrtimer_interrupt
     0.01%        1.998522  [kernel.kallsyms]  [k] native_write_msr_safe
     0.01%                  [kernel.kallsyms]  [k] notifier_call_chain
     0.01%                  [kernel.kallsyms]  [k] perf_event_task_tick
     0.01%                  [kernel.kallsyms]  [k] run_posix_cpu_timers
     0.01%                  [kernel.kallsyms]  [k] run_timer_softirq
     0.01%                  [kernel.kallsyms]  [k] trigger_load_balance
     0.01%                  [kernel.kallsyms]  [k] update_vsyscall
                            tchain_edit        [.] f1
     0.14%        3.913444  tchain_edit        [.] f2
    99.82%        1.005478  tchain_edit        [.] f3

Signed-off-by: Kan Liang <kan.liang@intel.com>
---
 tools/perf/builtin-diff.c |  5 ++++-
 tools/perf/util/sort.c    | 26 ++++++++++++++++++++++++++
 tools/perf/util/sort.h    |  3 +++
 3 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 25114c9..71183c1 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -743,7 +743,7 @@ static const struct option options[] = {
 	OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
 		   "only consider these symbols"),
 	OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
-		   "sort by key(s): pid, comm, dso, symbol, parent, cpu, srcline, ..."
+		   "sort by key(s): pid, comm, dso, symbol, symbol_name, parent, cpu, srcline, ..."
 		   " Please refer the man page for the complete list."),
 	OPT_STRING('t', "field-separator", &symbol_conf.field_sep, "separator",
 		   "separator for columns, no spaces will be added between "
@@ -1164,6 +1164,9 @@ int cmd_diff(int argc, const char **argv, const char *prefix __maybe_unused)
 	if (setup_sorting() < 0)
 		usage_with_options(diff_usage, options);
 
+	if (sort__has_sym_name)
+		tool.mmap2 = perf_event__process_mmap2;
+
 	setup_pager();
 
 	sort__setup_elide(NULL);
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 9402885..1226e37 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -20,6 +20,7 @@ int		have_ignore_callees = 0;
 int		sort__need_collapse = 0;
 int		sort__has_parent = 0;
 int		sort__has_sym = 0;
+int		sort__has_sym_name = 0;
 int		sort__has_dso = 0;
 enum sort_mode	sort__mode = SORT_MODE__NORMAL;
 
@@ -280,6 +281,27 @@ struct sort_entry sort_sym = {
 	.se_width_idx	= HISTC_SYMBOL,
 };
 
+static int64_t
+sort__sym_name_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	const char *sym_name_l, *sym_name_r;
+
+	if (!left->ms.sym || !right->ms.sym)
+		return cmp_null(right->ms.sym, left->ms.sym);
+
+	sym_name_l = left->ms.sym->name;
+	sym_name_r = right->ms.sym->name;
+
+	return strcmp(sym_name_l, sym_name_r);
+}
+
+struct sort_entry sort_sym_name = {
+	.se_header	= "Symbol Name",
+	.se_cmp		= sort__sym_name_cmp,
+	.se_sort	= sort__sym_sort,
+	.se_snprintf	= hist_entry__sym_snprintf,
+	.se_width_idx	= HISTC_SYMBOL,
+};
 /* --sort srcline */
 
 static int64_t
@@ -1170,6 +1192,7 @@ static struct sort_dimension common_sort_dimensions[] = {
 	DIM(SORT_COMM, "comm", sort_comm),
 	DIM(SORT_DSO, "dso", sort_dso),
 	DIM(SORT_SYM, "symbol", sort_sym),
+	DIM(SORT_SYM_NAME, "symbol_name", sort_sym_name),
 	DIM(SORT_PARENT, "parent", sort_parent),
 	DIM(SORT_CPU, "cpu", sort_cpu),
 	DIM(SORT_SRCLINE, "srcline", sort_srcline),
@@ -1430,6 +1453,8 @@ int sort_dimension__add(const char *tok)
 			sort__has_parent = 1;
 		} else if (sd->entry == &sort_sym) {
 			sort__has_sym = 1;
+		} else if (sd->entry == &sort_sym_name) {
+			sort__has_sym_name = 1;
 		} else if (sd->entry == &sort_dso) {
 			sort__has_dso = 1;
 		}
@@ -1809,6 +1834,7 @@ void reset_output_field(void)
 	sort__need_collapse = 0;
 	sort__has_parent = 0;
 	sort__has_sym = 0;
+	sort__has_sym_name = 0;
 	sort__has_dso = 0;
 
 	field_order = NULL;
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index c03e4ff..9b43c6c 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -34,10 +34,12 @@ extern int have_ignore_callees;
 extern int sort__need_collapse;
 extern int sort__has_parent;
 extern int sort__has_sym;
+extern int sort__has_sym_name;
 extern enum sort_mode sort__mode;
 extern struct sort_entry sort_comm;
 extern struct sort_entry sort_dso;
 extern struct sort_entry sort_sym;
+extern struct sort_entry sort_sym_name;
 extern struct sort_entry sort_parent;
 extern struct sort_entry sort_dso_from;
 extern struct sort_entry sort_dso_to;
@@ -161,6 +163,7 @@ enum sort_type {
 	SORT_COMM,
 	SORT_DSO,
 	SORT_SYM,
+	SORT_SYM_NAME,
 	SORT_PARENT,
 	SORT_CPU,
 	SORT_SRCLINE,
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/1] perf tools: perf diff for different binaries
  2014-10-31 12:06 [PATCH 1/1] perf tools: perf diff for different binaries kan.liang
@ 2014-10-31 20:03 ` Andi Kleen
  2014-11-03 10:40 ` Jiri Olsa
  2014-11-04  5:41 ` Namhyung Kim
  2 siblings, 0 replies; 11+ messages in thread
From: Andi Kleen @ 2014-10-31 20:03 UTC (permalink / raw)
  To: kan.liang; +Cc: acme, jolsa, linux-kernel, andi

> 
> If I run old perf diff with --sort dso,symbol, we cannot get any useful
> information for the user space binary. Furthermore, it failed to compare
> the kernel function. E.g. native_write_msr_safe.
> 
> perf diff -s dso,symbol --compute ratio v1_1_6perf.data v2_1_7perf.data

The new option needs to be documented in the manpage

(with a paragraph explaining that it allows comparison
between different versions of the binary)

-Andi

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/1] perf tools: perf diff for different binaries
  2014-10-31 12:06 [PATCH 1/1] perf tools: perf diff for different binaries kan.liang
  2014-10-31 20:03 ` Andi Kleen
@ 2014-11-03 10:40 ` Jiri Olsa
  2014-11-03 14:52   ` Liang, Kan
  2014-11-04  5:41 ` Namhyung Kim
  2 siblings, 1 reply; 11+ messages in thread
From: Jiri Olsa @ 2014-11-03 10:40 UTC (permalink / raw)
  To: kan.liang; +Cc: acme, jolsa, linux-kernel, andi

On Fri, Oct 31, 2014 at 05:06:12AM -0700, kan.liang@intel.com wrote:

SNIP

>     99.82%        1.005478  tchain_edit        [.] f3
> 
> Signed-off-by: Kan Liang <kan.liang@intel.com>
> ---
>  tools/perf/builtin-diff.c |  5 ++++-
>  tools/perf/util/sort.c    | 26 ++++++++++++++++++++++++++
>  tools/perf/util/sort.h    |  3 +++
>  3 files changed, 33 insertions(+), 1 deletion(-)
> 
> diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
> index 25114c9..71183c1 100644
> --- a/tools/perf/builtin-diff.c
> +++ b/tools/perf/builtin-diff.c
> @@ -743,7 +743,7 @@ static const struct option options[] = {
>  	OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
>  		   "only consider these symbols"),
>  	OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
> -		   "sort by key(s): pid, comm, dso, symbol, parent, cpu, srcline, ..."
> +		   "sort by key(s): pid, comm, dso, symbol, symbol_name, parent, cpu, srcline, ..."
>  		   " Please refer the man page for the complete list."),
>  	OPT_STRING('t', "field-separator", &symbol_conf.field_sep, "separator",
>  		   "separator for columns, no spaces will be added between "
> @@ -1164,6 +1164,9 @@ int cmd_diff(int argc, const char **argv, const char *prefix __maybe_unused)
>  	if (setup_sorting() < 0)
>  		usage_with_options(diff_usage, options);
>  
> +	if (sort__has_sym_name)
> +		tool.mmap2 = perf_event__process_mmap2;

why is the mmap2 callback set only for sort__has_sym_name?
Shouldn't we use/define it directly in the tool's definition?

jirka

^ permalink raw reply	[flat|nested] 11+ messages in thread

* RE: [PATCH 1/1] perf tools: perf diff for different binaries
  2014-11-03 10:40 ` Jiri Olsa
@ 2014-11-03 14:52   ` Liang, Kan
  0 siblings, 0 replies; 11+ messages in thread
From: Liang, Kan @ 2014-11-03 14:52 UTC (permalink / raw)
  To: Jiri Olsa; +Cc: acme, jolsa, linux-kernel, andi



> > @@ -1164,6 +1164,9 @@ int cmd_diff(int argc, const char **argv, const
> char *prefix __maybe_unused)
> >  	if (setup_sorting() < 0)
> >  		usage_with_options(diff_usage, options);
> >
> > +	if (sort__has_sym_name)
> > +		tool.mmap2 = perf_event__process_mmap2;
> 
> why is the mmap2 callback set only for sort__has_sym_name?
> Shouldn't we use/define it directly in the tool's definition?
> 

If we define it in tools definition, perf diff will display the differential profile
between functions (Not previous userspace IPs).
For same binary, IPs are same. So I think it's better to compare between IPs.
For different binaries, IPs change. But most of the function names are same for
most binary update case. So we'd better compare between functions.
I want to keep both IP and function compare.
So only new sort key "symble_name" set mmap2 which implies function compare.
If we only need to compare between IPs, we don't need to set mmap2.

Thanks,
Kan

> jirka

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/1] perf tools: perf diff for different binaries
  2014-10-31 12:06 [PATCH 1/1] perf tools: perf diff for different binaries kan.liang
  2014-10-31 20:03 ` Andi Kleen
  2014-11-03 10:40 ` Jiri Olsa
@ 2014-11-04  5:41 ` Namhyung Kim
  2014-11-04 17:07   ` Liang, Kan
  2 siblings, 1 reply; 11+ messages in thread
From: Namhyung Kim @ 2014-11-04  5:41 UTC (permalink / raw)
  To: kan.liang; +Cc: acme, jolsa, linux-kernel, andi

Hi kan,

On Fri, 31 Oct 2014 05:06:12 -0700, kan liang wrote:
> From: Kan Liang <kan.liang@intel.com>
>
> perf diff can display the differential profile between two perf.data
> files. However, the perf.data files have to come from same binaries.
>
> The patch introduced a key "symbol_name" for --sort option, so the user
> can compare the perf.data files from different binaries, even different
> kernel version. If symbol_name is set, perf diff display the
> differential profile of functions.
> It could help the user quickly locate the scaling regression caused by
> new binaries/kernels.
>
> Here is an example. I have two version of binaries. One is running on
> kernel 3.16. The other is running on kernel 3.17.
>
> Using perf record, I got two perf.data files, v1_1_6perf.data (version
> one in 3.16) and v2_1_7perf.data (version two in 3.17).
>
> If I run old perf diff with --sort dso,symbol, we cannot get any useful
> information for the user space binary. Furthermore, it failed to compare
> the kernel function. E.g. native_write_msr_safe.
>
> perf diff -s dso,symbol --compute ratio v1_1_6perf.data v2_1_7perf.data
>
>  Event 'cycles'
>
>  Baseline           Ratio  Shared Object      Symbol
>  ........  ..............  .................
> ....................................
>
>                             [kernel.kallsyms]  [k]
> __update_entity_load_avg_contrib
>                             [kernel.kallsyms]  [k] _raw_spin_lock
>                             [kernel.kallsyms]  [k] apic_timer_interrupt
>                             [kernel.kallsyms]  [k] hrtimer_interrupt
>      0.01%                  [kernel.kallsyms]  [k] native_write_msr_safe
>                             [kernel.kallsyms]  [k] native_write_msr_safe
>      0.01%                  [kernel.kallsyms]  [k] notifier_call_chain
>      0.01%                  [kernel.kallsyms]  [k] perf_event_task_tick
>      0.01%                  [kernel.kallsyms]  [k] run_posix_cpu_timers
>      0.01%                  [kernel.kallsyms]  [k] run_timer_softirq
>      0.01%                  [kernel.kallsyms]  [k] trigger_load_balance
>      0.01%                  [kernel.kallsyms]  [k] update_vsyscall
>      0.05%                  [unknown]          [.] 0x0000000000400540
>      0.04%                  [unknown]          [.] 0x0000000000400541
>      0.03%                  [unknown]          [.] 0x000000000040054b
>      0.04%                  [unknown]          [.] 0x0000000000400552
>     33.55%                  [unknown]          [.] 0x0000000000400554
>      1.22%                  [unknown]          [.] 0x000000000040055a
>      8.00%                  [unknown]          [.] 0x000000000040055e
>      0.02%                  [unknown]          [.] 0x0000000000400562
>      8.41%                  [unknown]          [.] 0x0000000000400564
>     48.13%                  [unknown]          [.] 0x000000000040056b
>      0.16%                  [unknown]          [.] 0x0000000000400570
>      0.17%                  [unknown]          [.] 0x0000000000400571
>                             [unknown]          [.] 0x0000000000400580
>                             [unknown]          [.] 0x0000000000400581
>      0.01%                  [unknown]          [.] 0x0000000000400583
>      0.01%                  [unknown]          [.] 0x0000000000400588
>                             [unknown]          [.] 0x000000000040058b
>      0.01%     1240.990221  [unknown]          [.] 0x000000000040058d
>                             [unknown]          [.] 0x0000000000400590
>      0.06%                  [unknown]          [.] 0x0000000000400591
>                             [unknown]          [.] 0x0000000000400593
>      0.04%                  [unknown]          [.] 0x0000000000400595
>      0.01%     1240.603148  [unknown]          [.] 0x0000000000400597
>                             [unknown]          [.] 0x000000000040059b
>                             [unknown]          [.] 0x000000000040059d
>                             [unknown]          [.] 0x00000000004005a1
>                             [unknown]          [.] 0x00000000004005a5
>                             [unknown]          [.] 0x00000000004005a7
>                             [unknown]          [.] 0x00000000004005a8
>                             [unknown]          [.] 0x00000000004005aa
>                             [unknown]          [.] 0x00000000004005ba
>                             [unknown]          [.] 0x00000000004005bf
>                             [unknown]          [.] 0x00000000004005c4
>                             [unknown]          [.] 0x00000000004005c9
>                             [unknown]          [.] 0x00000000004005ce
>                             [unknown]          [.] 0x00000000004005d2
>                             [unknown]          [.] 0x00000000004005d6
>                             [unknown]          [.] 0x00000000004005d8
>                             [unknown]          [.] 0x00000000004005f5
>
> With the key "symbol_name", we can easily get the differential profile
> from different binary versions and different kernel versions.
>
> ./perf diff -s dso,symbol_name --compute ratio v1_1_6perf.data v2_1_7perf.data
>
>  Event 'cycles'
>
>  Baseline           Ratio  Shared Object      Symbol Name
>  ........  ..............  .................
> ....................................
>
>                             [kernel.kallsyms]  [k]
> __update_entity_load_avg_contrib
>                             [kernel.kallsyms]  [k] _raw_spin_lock
>                             [kernel.kallsyms]  [k] apic_timer_interrupt
>                             [kernel.kallsyms]  [k] hrtimer_interrupt
>      0.01%        1.998522  [kernel.kallsyms]  [k] native_write_msr_safe
>      0.01%                  [kernel.kallsyms]  [k] notifier_call_chain
>      0.01%                  [kernel.kallsyms]  [k] perf_event_task_tick
>      0.01%                  [kernel.kallsyms]  [k] run_posix_cpu_timers
>      0.01%                  [kernel.kallsyms]  [k] run_timer_softirq
>      0.01%                  [kernel.kallsyms]  [k] trigger_load_balance
>      0.01%                  [kernel.kallsyms]  [k] update_vsyscall
>                             tchain_edit        [.] f1
>      0.14%        3.913444  tchain_edit        [.] f2
>     99.82%        1.005478  tchain_edit        [.] f3

Hmm.. I think it should be a default behavior for perf diff, otherwise
-s symbol is almost meaningless IMHO.  What about setting the
sort_sym.se_collapse in data_process() so that hists__match() can use
symbol names?

Thanks,
Namhyung

^ permalink raw reply	[flat|nested] 11+ messages in thread

* RE: [PATCH 1/1] perf tools: perf diff for different binaries
  2014-11-04  5:41 ` Namhyung Kim
@ 2014-11-04 17:07   ` Liang, Kan
  2014-11-05  6:32     ` Namhyung Kim
  0 siblings, 1 reply; 11+ messages in thread
From: Liang, Kan @ 2014-11-04 17:07 UTC (permalink / raw)
  To: Namhyung Kim; +Cc: acme, jolsa, linux-kernel, andi

Hi Namhyung,

> >                             tchain_edit        [.] f1
> >      0.14%        3.913444  tchain_edit        [.] f2
> >     99.82%        1.005478  tchain_edit        [.] f3
> 
> Hmm.. I think it should be a default behavior for perf diff, otherwise -s
> symbol is almost meaningless IMHO. 

I think we need both instruction level and function level diff.
For debugging scaling issue, I think we need to do deeper analysis for some
cache or lock issue. The function level is too high granularity.  

The new option can be used to debug scaling regression issue.
If the binary/kernel is updated, it doesn't make sense to compare the
symbol address, since it should be changed. So comparing the function
should be more useful.


> What about setting the
> sort_sym.se_collapse in data_process() so that hists__match() can use
> symbol names?

Yes, we can set it if we only do function level diff. But I'd like to keep
both. So I defined two sort keys.
"symbol" means "symbol address executed at the time of sample "
"symbol_name" means "name of function executed at the time of sample"

Thanks,
Kan
> 
> Thanks,
> Namhyung

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/1] perf tools: perf diff for different binaries
  2014-11-04 17:07   ` Liang, Kan
@ 2014-11-05  6:32     ` Namhyung Kim
  2014-11-05 17:28       ` Liang, Kan
  0 siblings, 1 reply; 11+ messages in thread
From: Namhyung Kim @ 2014-11-05  6:32 UTC (permalink / raw)
  To: Liang, Kan; +Cc: acme, jolsa, linux-kernel, andi

Hi Kan,

On Tue, 4 Nov 2014 17:07:43 +0000, Kan Liang wrote:
> Hi Namhyung,
>
>> >                             tchain_edit        [.] f1
>> >      0.14%        3.913444  tchain_edit        [.] f2
>> >     99.82%        1.005478  tchain_edit        [.] f3
>> 
>> Hmm.. I think it should be a default behavior for perf diff, otherwise -s
>> symbol is almost meaningless IMHO. 
>
> I think we need both instruction level and function level diff.
> For debugging scaling issue, I think we need to do deeper analysis for some
> cache or lock issue. The function level is too high granularity.  
>
> The new option can be used to debug scaling regression issue.
> If the binary/kernel is updated, it doesn't make sense to compare the
> symbol address, since it should be changed. So comparing the function
> should be more useful.
>
>
>> What about setting the
>> sort_sym.se_collapse in data_process() so that hists__match() can use
>> symbol names?
>
> Yes, we can set it if we only do function level diff. But I'd like to keep
> both. So I defined two sort keys.
> "symbol" means "symbol address executed at the time of sample "
> "symbol_name" means "name of function executed at the time of sample"

Hmm.. I don't think the symbol sort key provides the instruction level
diff that you want.  If it finds a symbol it just use the start address
of the symbol, not the exact address of the sample.  Am I missing
something?

Thanks,
Namhyung

^ permalink raw reply	[flat|nested] 11+ messages in thread

* RE: [PATCH 1/1] perf tools: perf diff for different binaries
  2014-11-05  6:32     ` Namhyung Kim
@ 2014-11-05 17:28       ` Liang, Kan
  2014-11-06  7:32         ` Namhyung Kim
  0 siblings, 1 reply; 11+ messages in thread
From: Liang, Kan @ 2014-11-05 17:28 UTC (permalink / raw)
  To: Namhyung Kim; +Cc: acme, jolsa, linux-kernel, andi


> Hi Kan,
> 
> On Tue, 4 Nov 2014 17:07:43 +0000, Kan Liang wrote:
> > Hi Namhyung,
> >
> >> >                             tchain_edit        [.] f1
> >> >      0.14%        3.913444  tchain_edit        [.] f2
> >> >     99.82%        1.005478  tchain_edit        [.] f3
> >>
> >> Hmm.. I think it should be a default behavior for perf diff,
> >> otherwise -s symbol is almost meaningless IMHO.
> >
> > I think we need both instruction level and function level diff.
> > For debugging scaling issue, I think we need to do deeper analysis for
> > some cache or lock issue. The function level is too high granularity.
> >
> > The new option can be used to debug scaling regression issue.
> > If the binary/kernel is updated, it doesn't make sense to compare the
> > symbol address, since it should be changed. So comparing the function
> > should be more useful.
> >
> >
> >> What about setting the
> >> sort_sym.se_collapse in data_process() so that hists__match() can use
> >> symbol names?
> >
> > Yes, we can set it if we only do function level diff. But I'd like to
> > keep both. So I defined two sort keys.
> > "symbol" means "symbol address executed at the time of sample "
> > "symbol_name" means "name of function executed at the time of
> sample"
> 
> Hmm.. I don't think the symbol sort key provides the instruction level diff
> that you want.  If it finds a symbol it just use the start address of the
> symbol, not the exact address of the sample.  Am I missing something?
> 

No, the meaning of symbol in perf diff is different as in perf report. It uses
the exact address of the sample.

Here is current perf diff result and objdump fragment of the first binary.
You can see the exact address of the sample in function f3 was used, not the
start address.
33.55%                  [unknown]          [.] 0x0000000000400554
48.13%                  [unknown]          [.] 0x000000000040056b

[perf-test]#./perf diff -s dso,symbol --compute ratio ../../../v1_1_6perf.data
../../../v2_1_7perf.data
# Event 'cycles'
#
# Baseline           Ratio  Shared Object      Symbol
# ........  ..............  .................  .........................
#
                            [kernel.kallsyms]  [k] __update_cpu_load
                            [kernel.kallsyms]  [k] mod_timer
     0.01%                  [kernel.kallsyms]  [k] native_write_msr_safe
     0.01%                  [kernel.kallsyms]  [k] notifier_call_chain
     0.01%                  [kernel.kallsyms]  [k] perf_event_task_tick
                            [kernel.kallsyms]  [k] rt_mutex_slowlock
     0.01%                  [kernel.kallsyms]  [k] run_posix_cpu_timers
     0.01%                  [kernel.kallsyms]  [k] run_timer_softirq
     0.01%                  [kernel.kallsyms]  [k] trigger_load_balance
     0.01%                  [kernel.kallsyms]  [k] update_vsyscall
                            [kernel.kallsyms]  [k] wait_for_common
                            [kernel.kallsyms]  [k] 0x000000000028482a
     0.05%                  [unknown]          [.] 0x0000000000400540
     0.04%                  [unknown]          [.] 0x0000000000400541
     0.03%                  [unknown]          [.] 0x000000000040054b
     0.04%                  [unknown]          [.] 0x0000000000400552
    33.55%                  [unknown]          [.] 0x0000000000400554
     1.22%                  [unknown]          [.] 0x000000000040055a
     8.00%                  [unknown]          [.] 0x000000000040055e
     0.02%                  [unknown]          [.] 0x0000000000400562
     8.41%                  [unknown]          [.] 0x0000000000400564
    48.13%                  [unknown]          [.] 0x000000000040056b
     0.16%                  [unknown]          [.] 0x0000000000400570
     0.17%                  [unknown]          [.] 0x0000000000400571
                            [unknown]          [.] 0x0000000000400580
                            [unknown]          [.] 0x0000000000400581
     0.01%                  [unknown]          [.] 0x0000000000400583
     0.01%                  [unknown]          [.] 0x0000000000400588
                            [unknown]          [.] 0x000000000040058b
     0.01%     1240.990221  [unknown]          [.] 0x000000000040058d
                            [unknown]          [.] 0x0000000000400590
     0.06%                  [unknown]          [.] 0x0000000000400591
                            [unknown]          [.] 0x0000000000400593
     0.04%                  [unknown]          [.] 0x0000000000400595
     0.01%     1240.603148  [unknown]          [.] 0x0000000000400597
                            [unknown]          [.] 0x000000000040059b
                            [unknown]          [.] 0x000000000040059d
                            [unknown]          [.] 0x00000000004005a1
                            [unknown]          [.] 0x00000000004005a5
                            [unknown]          [.] 0x00000000004005a7
                            [unknown]          [.] 0x00000000004005a8
                            [unknown]          [.] 0x00000000004005aa
                            [unknown]          [.] 0x00000000004005ba
                            [unknown]          [.] 0x00000000004005bf
                            [unknown]          [.] 0x00000000004005c4
                            [unknown]          [.] 0x00000000004005c9
                            [unknown]          [.] 0x00000000004005ce
                            [unknown]          [.] 0x00000000004005d2
                            [unknown]          [.] 0x00000000004005d6
                            [unknown]          [.] 0x00000000004005d8
                            [unknown]          [.] 0x00000000004005f5


[perf-test]#objdump -d tchain_edit_1

0000000000400540 <f3>:
  400540:       55                      push   %rbp
  400541:       48 89 e5                mov    %rsp,%rbp
  400544:       c7 45 f8 e8 03 00 00    movl   $0x3e8,-0x8(%rbp)
  40054b:       c7 45 fc 00 00 00 00    movl   $0x0,-0x4(%rbp)
  400552:       eb 14                   jmp    400568 <f3+0x28>
  400554:       8b 45 fc                mov    -0x4(%rbp),%eax
  400557:       83 e0 01                and    $0x1,%eax
  40055a:       85 c0                   test   %eax,%eax
  40055c:       74 06                   je     400564 <f3+0x24>
  40055e:       83 45 fc 01             addl   $0x1,-0x4(%rbp)
  400562:       eb 04                   jmp    400568 <f3+0x28>
  400564:       83 45 fc 01             addl   $0x1,-0x4(%rbp)
  400568:       8b 45 fc                mov    -0x4(%rbp),%eax
  40056b:       3b 45 f8                cmp    -0x8(%rbp),%eax
  40056e:       7c e4                   jl     400554 <f3+0x14>
  400570:       5d                      pop    %rbp
  400571:       c3                      retq


Thanks,
Kan

> Thanks,
> Namhyung

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/1] perf tools: perf diff for different binaries
  2014-11-05 17:28       ` Liang, Kan
@ 2014-11-06  7:32         ` Namhyung Kim
  2014-11-06 14:16           ` Liang, Kan
  0 siblings, 1 reply; 11+ messages in thread
From: Namhyung Kim @ 2014-11-06  7:32 UTC (permalink / raw)
  To: Liang, Kan; +Cc: acme, jolsa, linux-kernel, andi

Hi Kan,

On Thu, Nov 6, 2014 at 2:28 AM, Liang, Kan <kan.liang@intel.com> wrote:
>
>> Hi Kan,
>>
>> On Tue, 4 Nov 2014 17:07:43 +0000, Kan Liang wrote:
>> >> What about setting the
>> >> sort_sym.se_collapse in data_process() so that hists__match() can use
>> >> symbol names?
>> >
>> > Yes, we can set it if we only do function level diff. But I'd like to
>> > keep both. So I defined two sort keys.
>> > "symbol" means "symbol address executed at the time of sample "
>> > "symbol_name" means "name of function executed at the time of
>> sample"
>>
>> Hmm.. I don't think the symbol sort key provides the instruction level diff
>> that you want.  If it finds a symbol it just use the start address of the
>> symbol, not the exact address of the sample.  Am I missing something?
>>
>
> No, the meaning of symbol in perf diff is different as in perf report. It uses
> the exact address of the sample.

Hmm.. are you sure?  I think it uses same code..


>
> Here is current perf diff result and objdump fragment of the first binary.
> You can see the exact address of the sample in function f3 was used, not the
> start address.
> 33.55%                  [unknown]          [.] 0x0000000000400554
> 48.13%                  [unknown]          [.] 0x000000000040056b

But I think it's because it failed to find a symbol (f3) for some
reason.  The symbol sort key will use the ip address only if it didn't
find a symbol for the address.  Please see sort__sym_cmp (and
_sort__sym_cmp too).

Thanks,
Namhyung


>
> [perf-test]#./perf diff -s dso,symbol --compute ratio ../../../v1_1_6perf.data
> ../../../v2_1_7perf.data
> # Event 'cycles'
> #
> # Baseline           Ratio  Shared Object      Symbol
> # ........  ..............  .................  .........................
> #
>                             [kernel.kallsyms]  [k] __update_cpu_load
>                             [kernel.kallsyms]  [k] mod_timer
>      0.01%                  [kernel.kallsyms]  [k] native_write_msr_safe
>      0.01%                  [kernel.kallsyms]  [k] notifier_call_chain
>      0.01%                  [kernel.kallsyms]  [k] perf_event_task_tick
>                             [kernel.kallsyms]  [k] rt_mutex_slowlock
>      0.01%                  [kernel.kallsyms]  [k] run_posix_cpu_timers
>      0.01%                  [kernel.kallsyms]  [k] run_timer_softirq
>      0.01%                  [kernel.kallsyms]  [k] trigger_load_balance
>      0.01%                  [kernel.kallsyms]  [k] update_vsyscall
>                             [kernel.kallsyms]  [k] wait_for_common
>                             [kernel.kallsyms]  [k] 0x000000000028482a
>      0.05%                  [unknown]          [.] 0x0000000000400540
>      0.04%                  [unknown]          [.] 0x0000000000400541
>      0.03%                  [unknown]          [.] 0x000000000040054b
>      0.04%                  [unknown]          [.] 0x0000000000400552
>     33.55%                  [unknown]          [.] 0x0000000000400554
>      1.22%                  [unknown]          [.] 0x000000000040055a
>      8.00%                  [unknown]          [.] 0x000000000040055e
>      0.02%                  [unknown]          [.] 0x0000000000400562
>      8.41%                  [unknown]          [.] 0x0000000000400564
>     48.13%                  [unknown]          [.] 0x000000000040056b
>      0.16%                  [unknown]          [.] 0x0000000000400570
>      0.17%                  [unknown]          [.] 0x0000000000400571
>                             [unknown]          [.] 0x0000000000400580
>                             [unknown]          [.] 0x0000000000400581
>      0.01%                  [unknown]          [.] 0x0000000000400583
>      0.01%                  [unknown]          [.] 0x0000000000400588
>                             [unknown]          [.] 0x000000000040058b
>      0.01%     1240.990221  [unknown]          [.] 0x000000000040058d
>                             [unknown]          [.] 0x0000000000400590
>      0.06%                  [unknown]          [.] 0x0000000000400591
>                             [unknown]          [.] 0x0000000000400593
>      0.04%                  [unknown]          [.] 0x0000000000400595
>      0.01%     1240.603148  [unknown]          [.] 0x0000000000400597
>                             [unknown]          [.] 0x000000000040059b
>                             [unknown]          [.] 0x000000000040059d
>                             [unknown]          [.] 0x00000000004005a1
>                             [unknown]          [.] 0x00000000004005a5
>                             [unknown]          [.] 0x00000000004005a7
>                             [unknown]          [.] 0x00000000004005a8
>                             [unknown]          [.] 0x00000000004005aa
>                             [unknown]          [.] 0x00000000004005ba
>                             [unknown]          [.] 0x00000000004005bf
>                             [unknown]          [.] 0x00000000004005c4
>                             [unknown]          [.] 0x00000000004005c9
>                             [unknown]          [.] 0x00000000004005ce
>                             [unknown]          [.] 0x00000000004005d2
>                             [unknown]          [.] 0x00000000004005d6
>                             [unknown]          [.] 0x00000000004005d8
>                             [unknown]          [.] 0x00000000004005f5
>
>
> [perf-test]#objdump -d tchain_edit_1
>
> 0000000000400540 <f3>:
>   400540:       55                      push   %rbp
>   400541:       48 89 e5                mov    %rsp,%rbp
>   400544:       c7 45 f8 e8 03 00 00    movl   $0x3e8,-0x8(%rbp)
>   40054b:       c7 45 fc 00 00 00 00    movl   $0x0,-0x4(%rbp)
>   400552:       eb 14                   jmp    400568 <f3+0x28>
>   400554:       8b 45 fc                mov    -0x4(%rbp),%eax
>   400557:       83 e0 01                and    $0x1,%eax
>   40055a:       85 c0                   test   %eax,%eax
>   40055c:       74 06                   je     400564 <f3+0x24>
>   40055e:       83 45 fc 01             addl   $0x1,-0x4(%rbp)
>   400562:       eb 04                   jmp    400568 <f3+0x28>
>   400564:       83 45 fc 01             addl   $0x1,-0x4(%rbp)
>   400568:       8b 45 fc                mov    -0x4(%rbp),%eax
>   40056b:       3b 45 f8                cmp    -0x8(%rbp),%eax
>   40056e:       7c e4                   jl     400554 <f3+0x14>
>   400570:       5d                      pop    %rbp
>   400571:       c3                      retq
>
>
> Thanks,
> Kan
>
>> Thanks,
>> Namhyung



-- 
Thanks,
Namhyung

^ permalink raw reply	[flat|nested] 11+ messages in thread

* RE: [PATCH 1/1] perf tools: perf diff for different binaries
  2014-11-06  7:32         ` Namhyung Kim
@ 2014-11-06 14:16           ` Liang, Kan
  2014-11-10  5:00             ` Namhyung Kim
  0 siblings, 1 reply; 11+ messages in thread
From: Liang, Kan @ 2014-11-06 14:16 UTC (permalink / raw)
  To: Namhyung Kim; +Cc: acme, jolsa, linux-kernel, andi

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset="utf-8", Size: 7196 bytes --]

> Hi Kan,
> 
> On Thu, Nov 6, 2014 at 2:28 AM, Liang, Kan <kan.liang@intel.com> wrote:
> >
> >> Hi Kan,
> >>
> >> On Tue, 4 Nov 2014 17:07:43 +0000, Kan Liang wrote:
> >> >> What about setting the
> >> >> sort_sym.se_collapse in data_process() so that hists__match() can
> >> >> use symbol names?
> >> >
> >> > Yes, we can set it if we only do function level diff. But I'd like
> >> > to keep both. So I defined two sort keys.
> >> > "symbol" means "symbol address executed at the time of sample "
> >> > "symbol_name" means "name of function executed at the time of
> >> sample"
> >>
> >> Hmm.. I don't think the symbol sort key provides the instruction
> >> level diff that you want.  If it finds a symbol it just use the start
> >> address of the symbol, not the exact address of the sample.  Am I
> missing something?
> >>
> >
> > No, the meaning of symbol in perf diff is different as in perf report.
> > It uses the exact address of the sample.
> 
> Hmm.. are you sure?  I think it uses same code..
> 
Yes, they uses the same code except event_op mmap2.
> 
> >
> > Here is current perf diff result and objdump fragment of the first binary.
> > You can see the exact address of the sample in function f3 was used,
> > not the start address.
> > 33.55%                  [unknown]          [.] 0x0000000000400554
> > 48.13%                  [unknown]          [.] 0x000000000040056b
> 
> But I think it's because it failed to find a symbol (f3) for some reason.  The
> symbol sort key will use the ip address only if it didn't find a symbol for the
> address.  Please see sort__sym_cmp (and _sort__sym_cmp too).
> 

The diff code doesn’t define event_op mmap2, so it fails to get the symbol.
You are right, it’s ip address. The meaning of symbol for diff and report should
be same.

But I still think the author intends to compare the ip address. Low granularity is
still useful for debugging the scaling issue. So we'd better keep both of them,
and give ip address sorting a proper key name. 

"ip" means "IP address executed at the time of sample "
"symbol" means "name of function executed at the time of sample"

What about the attached patch?

Thanks,
Kan

diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 25114c9..3063fbd 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -357,6 +357,7 @@ static int diff__process_sample_event(struct perf_tool *tool __maybe_unused,
 static struct perf_tool tool = {
        .sample = diff__process_sample_event,
        .mmap   = perf_event__process_mmap,
+       .mmap2  = perf_event__process_mmap2,
        .comm   = perf_event__process_comm,
        .exit   = perf_event__process_exit,
        .fork   = perf_event__process_fork,
@@ -743,7 +744,7 @@ static const struct option options[] = {
        OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
                   "only consider these symbols"),
        OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
-                  "sort by key(s): pid, comm, dso, symbol, parent, cpu, srcline, ..."
+                  "sort by key(s): pid, comm, dso, symbol, ip, parent, cpu, srcline, ..."
                   " Please refer the man page for the complete list."),
        OPT_STRING('t', "field-separator", &symbol_conf.field_sep, "separator",
                   "separator for columns, no spaces will be added between "
@@ -1164,6 +1165,9 @@ int cmd_diff(int argc, const char **argv, const char *prefix __maybe_unused)
        if (setup_sorting() < 0)
                usage_with_options(diff_usage, options);

+       if (sort__has_ip)
+               tool.mmap2 = NULL;
+
        setup_pager();

        sort__setup_elide(NULL);
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 9402885..a59f389 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -20,6 +20,7 @@ int           have_ignore_callees = 0;
 int            sort__need_collapse = 0;
 int            sort__has_parent = 0;
 int            sort__has_sym = 0;
+int            sort__has_ip = 0;
 int            sort__has_dso = 0;
 enum sort_mode sort__mode = SORT_MODE__NORMAL;

@@ -272,9 +273,32 @@ static int hist_entry__sym_snprintf(struct hist_entry *he, char *bf,
                                         he->level, bf, size, width);
 }

+static int64_t
+sort__sym_name_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+       const char *sym_name_l, *sym_name_r;
+
+       if (!left->ms.sym || !right->ms.sym)
+               return cmp_null(right->ms.sym, left->ms.sym);
+
+       sym_name_l = left->ms.sym->name;
+       sym_name_r = right->ms.sym->name;
+
+       return strcmp(sym_name_l, sym_name_r);
+}
+
 struct sort_entry sort_sym = {
        .se_header      = "Symbol",
        .se_cmp         = sort__sym_cmp,
+       .se_collapse    = sort__sym_name_cmp,
+       .se_sort        = sort__sym_sort,
+       .se_snprintf    = hist_entry__sym_snprintf,
+       .se_width_idx   = HISTC_SYMBOL,
+};
+
+struct sort_entry sort_ip = {
+       .se_header      = "IP address",
+       .se_cmp         = sort__sym_cmp,
        .se_sort        = sort__sym_sort,
        .se_snprintf    = hist_entry__sym_snprintf,
        .se_width_idx   = HISTC_SYMBOL,
@@ -1170,6 +1194,7 @@ static struct sort_dimension common_sort_dimensions[] = {
        DIM(SORT_COMM, "comm", sort_comm),
        DIM(SORT_DSO, "dso", sort_dso),
        DIM(SORT_SYM, "symbol", sort_sym),
+       DIM(SORT_IP, "ip", sort_ip),
        DIM(SORT_PARENT, "parent", sort_parent),
        DIM(SORT_CPU, "cpu", sort_cpu),
        DIM(SORT_SRCLINE, "srcline", sort_srcline),
@@ -1430,6 +1455,8 @@ int sort_dimension__add(const char *tok)
                        sort__has_parent = 1;
                } else if (sd->entry == &sort_sym) {
                        sort__has_sym = 1;
+               } else if (sd->entry == &sort_ip) {
+                       sort__has_ip = 1;
                } else if (sd->entry == &sort_dso) {
                        sort__has_dso = 1;
                }
@@ -1809,6 +1836,7 @@ void reset_output_field(void)
        sort__need_collapse = 0;
        sort__has_parent = 0;
        sort__has_sym = 0;
+       sort__has_ip = 0;
        sort__has_dso = 0;

        field_order = NULL;
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index c03e4ff..f00900a 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -34,10 +34,12 @@ extern int have_ignore_callees;
 extern int sort__need_collapse;
 extern int sort__has_parent;
 extern int sort__has_sym;
+extern int sort__has_ip;
 extern enum sort_mode sort__mode;
 extern struct sort_entry sort_comm;
 extern struct sort_entry sort_dso;
 extern struct sort_entry sort_sym;
+extern struct sort_entry sort_ip;
 extern struct sort_entry sort_parent;
 extern struct sort_entry sort_dso_from;
 extern struct sort_entry sort_dso_to;
@@ -161,6 +163,7 @@ enum sort_type {
        SORT_COMM,
        SORT_DSO,
        SORT_SYM,
+       SORT_IP,
        SORT_PARENT,
        SORT_CPU,
        SORT_SRCLINE,
ÿôèº{.nÇ+‰·Ÿ®‰­†+%ŠËÿ±éݶ\x17¥Šwÿº{.nÇ+‰·¥Š{±þG«éÿŠ{ayº\x1dʇڙë,j\a­¢f£¢·hšïêÿ‘êçz_è®\x03(­éšŽŠÝ¢j"ú\x1a¶^[m§ÿÿ¾\a«þG«éÿ¢¸?™¨è­Ú&£ø§~á¶iO•æ¬z·švØ^\x14\x04\x1a¶^[m§ÿÿÃ\fÿ¶ìÿ¢¸?–I¥

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/1] perf tools: perf diff for different binaries
  2014-11-06 14:16           ` Liang, Kan
@ 2014-11-10  5:00             ` Namhyung Kim
  0 siblings, 0 replies; 11+ messages in thread
From: Namhyung Kim @ 2014-11-10  5:00 UTC (permalink / raw)
  To: Liang, Kan; +Cc: acme, jolsa, linux-kernel, andi

Hi Kan,

On Thu, 6 Nov 2014 14:16:23 +0000, Kan Liang wrote:
> The diff code doesn’t define event_op mmap2, so it fails to get the symbol.

Looks like a bug in perf diff code.  (It's too easy to miss... :-/ )


> You are right, it’s ip address. The meaning of symbol for diff and report should
> be same.

Agreed.


>
> But I still think the author intends to compare the ip address. Low granularity is
> still useful for debugging the scaling issue. So we'd better keep both of them,
> and give ip address sorting a proper key name. 
>
> "ip" means "IP address executed at the time of sample "
> "symbol" means "name of function executed at the time of sample"

I think the term 'IP address' is confusing since users might expect
network address.


>
> What about the attached patch?
>
> Thanks,
> Kan
>
> diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
> index 25114c9..3063fbd 100644
> --- a/tools/perf/builtin-diff.c
> +++ b/tools/perf/builtin-diff.c
> @@ -357,6 +357,7 @@ static int diff__process_sample_event(struct perf_tool *tool __maybe_unused,
>  static struct perf_tool tool = {
>         .sample = diff__process_sample_event,
>         .mmap   = perf_event__process_mmap,
> +       .mmap2  = perf_event__process_mmap2,
>         .comm   = perf_event__process_comm,
>         .exit   = perf_event__process_exit,
>         .fork   = perf_event__process_fork,

Please send it as a separate fix.


> @@ -743,7 +744,7 @@ static const struct option options[] = {
>         OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
>                    "only consider these symbols"),
>         OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
> -                  "sort by key(s): pid, comm, dso, symbol, parent, cpu, srcline, ..."
> +                  "sort by key(s): pid, comm, dso, symbol, ip, parent, cpu, srcline, ..."

With this, you also need to update the documentation.


>                    " Please refer the man page for the complete list."),
>         OPT_STRING('t', "field-separator", &symbol_conf.field_sep, "separator",
>                    "separator for columns, no spaces will be added between "
> @@ -1164,6 +1165,9 @@ int cmd_diff(int argc, const char **argv, const char *prefix __maybe_unused)
>         if (setup_sorting() < 0)
>                 usage_with_options(diff_usage, options);
>
> +       if (sort__has_ip)
> +               tool.mmap2 = NULL;
> +

I don't think this is the right thing to do.  And I guess you need to
identify symbols anyway.


>         setup_pager();
>
>         sort__setup_elide(NULL);
> diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
> index 9402885..a59f389 100644
> --- a/tools/perf/util/sort.c
> +++ b/tools/perf/util/sort.c
> @@ -20,6 +20,7 @@ int           have_ignore_callees = 0;
>  int            sort__need_collapse = 0;
>  int            sort__has_parent = 0;
>  int            sort__has_sym = 0;
> +int            sort__has_ip = 0;

Why is this needed?


>  int            sort__has_dso = 0;
>  enum sort_mode sort__mode = SORT_MODE__NORMAL;
>
> @@ -272,9 +273,32 @@ static int hist_entry__sym_snprintf(struct hist_entry *he, char *bf,
>                                          he->level, bf, size, width);
>  }
>
> +static int64_t
> +sort__sym_name_cmp(struct hist_entry *left, struct hist_entry *right)
> +{
> +       const char *sym_name_l, *sym_name_r;
> +
> +       if (!left->ms.sym || !right->ms.sym)
> +               return cmp_null(right->ms.sym, left->ms.sym);
> +
> +       sym_name_l = left->ms.sym->name;
> +       sym_name_r = right->ms.sym->name;
> +
> +       return strcmp(sym_name_l, sym_name_r);
> +}

Looks like doing same thing as sort__sym_sort().


> +
>  struct sort_entry sort_sym = {
>         .se_header      = "Symbol",
>         .se_cmp         = sort__sym_cmp,
> +       .se_collapse    = sort__sym_name_cmp,

This will change the behavior of perf report somewhat.


> +       .se_sort        = sort__sym_sort,
> +       .se_snprintf    = hist_entry__sym_snprintf,
> +       .se_width_idx   = HISTC_SYMBOL,
> +};
> +
> +struct sort_entry sort_ip = {
> +       .se_header      = "IP address",
> +       .se_cmp         = sort__sym_cmp,

I think what you need is "symbol+offset" comparison so the .se_cmp
should compare hist_entry->ip (ie. mapped addr) instead of sym->start.

But I'm still not sure how it's meaningful since a bit of change will
result in changing offsets so that it cannot find a match.

Thanks,
Namhyung


>         .se_sort        = sort__sym_sort,
>         .se_snprintf    = hist_entry__sym_snprintf,
>         .se_width_idx   = HISTC_SYMBOL,
> @@ -1170,6 +1194,7 @@ static struct sort_dimension common_sort_dimensions[] = {
>         DIM(SORT_COMM, "comm", sort_comm),
>         DIM(SORT_DSO, "dso", sort_dso),
>         DIM(SORT_SYM, "symbol", sort_sym),
> +       DIM(SORT_IP, "ip", sort_ip),
>         DIM(SORT_PARENT, "parent", sort_parent),
>         DIM(SORT_CPU, "cpu", sort_cpu),
>         DIM(SORT_SRCLINE, "srcline", sort_srcline),
> @@ -1430,6 +1455,8 @@ int sort_dimension__add(const char *tok)
>                         sort__has_parent = 1;
>                 } else if (sd->entry == &sort_sym) {
>                         sort__has_sym = 1;
> +               } else if (sd->entry == &sort_ip) {
> +                       sort__has_ip = 1;
>                 } else if (sd->entry == &sort_dso) {
>                         sort__has_dso = 1;
>                 }
> @@ -1809,6 +1836,7 @@ void reset_output_field(void)
>         sort__need_collapse = 0;
>         sort__has_parent = 0;
>         sort__has_sym = 0;
> +       sort__has_ip = 0;
>         sort__has_dso = 0;
>
>         field_order = NULL;
> diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
> index c03e4ff..f00900a 100644
> --- a/tools/perf/util/sort.h
> +++ b/tools/perf/util/sort.h
> @@ -34,10 +34,12 @@ extern int have_ignore_callees;
>  extern int sort__need_collapse;
>  extern int sort__has_parent;
>  extern int sort__has_sym;
> +extern int sort__has_ip;
>  extern enum sort_mode sort__mode;
>  extern struct sort_entry sort_comm;
>  extern struct sort_entry sort_dso;
>  extern struct sort_entry sort_sym;
> +extern struct sort_entry sort_ip;
>  extern struct sort_entry sort_parent;
>  extern struct sort_entry sort_dso_from;
>  extern struct sort_entry sort_dso_to;
> @@ -161,6 +163,7 @@ enum sort_type {
>         SORT_COMM,
>         SORT_DSO,
>         SORT_SYM,
> +       SORT_IP,
>         SORT_PARENT,
>         SORT_CPU,
>         SORT_SRCLINE,

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2014-11-10  5:00 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-10-31 12:06 [PATCH 1/1] perf tools: perf diff for different binaries kan.liang
2014-10-31 20:03 ` Andi Kleen
2014-11-03 10:40 ` Jiri Olsa
2014-11-03 14:52   ` Liang, Kan
2014-11-04  5:41 ` Namhyung Kim
2014-11-04 17:07   ` Liang, Kan
2014-11-05  6:32     ` Namhyung Kim
2014-11-05 17:28       ` Liang, Kan
2014-11-06  7:32         ` Namhyung Kim
2014-11-06 14:16           ` Liang, Kan
2014-11-10  5:00             ` Namhyung Kim

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).