linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/2] perf pt: Mark PT return events as "return"
@ 2016-06-10 22:55 Andi Kleen
  2016-06-10 22:55 ` [PATCH 2/2] perf script: Support callindent Andi Kleen
  2016-06-16 12:38 ` [PATCH 1/2] perf pt: Mark PT return events as "return" Adrian Hunter
  0 siblings, 2 replies; 7+ messages in thread
From: Andi Kleen @ 2016-06-10 22:55 UTC (permalink / raw)
  To: acme; +Cc: linux-kernel, jolsa, Andi Kleen, adrian.hunter

From: Andi Kleen <ak@linux.intel.com>

With perf script --itrace=cr we can synthesize calls and returns out of
a PT log. However both calls and returns are marked with the same event,
called branches. This makes it difficult to read and post process,
because calls and returns are somewhat diffferent.

Create a separate return event and mark the returns as return.

Cc: adrian.hunter@intel.com
v2:
Add extra filter for returns.
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 tools/perf/util/intel-pt.c | 61 ++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 54 insertions(+), 7 deletions(-)

diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 137196990012..c72b9074e86e 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -82,9 +82,13 @@ struct intel_pt {
 	u64 instructions_id;
 
 	bool sample_branches;
+	bool sample_returns;
 	u32 branches_filter;
 	u64 branches_sample_type;
+	u64 returns_sample_type;
+	u32 returns_filter;
 	u64 branches_id;
+	u64 returns_id;
 
 	bool sample_transactions;
 	u64 transactions_sample_type;
@@ -960,7 +964,9 @@ static int intel_pt_inject_event(union perf_event *event,
 	return perf_event__synthesize_sample(event, type, 0, sample, swapped);
 }
 
-static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
+static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq,
+					bool is_return,
+					u32 filter)
 {
 	int ret;
 	struct intel_pt *pt = ptq->pt;
@@ -971,7 +977,7 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
 		struct branch_entry	entries;
 	} dummy_bs;
 
-	if (pt->branches_filter && !(pt->branches_filter & ptq->flags))
+	if (filter && !(filter & ptq->flags))
 		return 0;
 
 	if (pt->synth_opts.initial_skip &&
@@ -990,8 +996,13 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
 	sample.pid = ptq->pid;
 	sample.tid = ptq->tid;
 	sample.addr = ptq->state->to_ip;
-	sample.id = ptq->pt->branches_id;
-	sample.stream_id = ptq->pt->branches_id;
+	if (is_return) {
+		sample.id = ptq->pt->returns_id;
+		sample.stream_id = ptq->pt->returns_id;
+	} else {
+		sample.id = ptq->pt->branches_id;
+		sample.stream_id = ptq->pt->branches_id;
+	}
 	sample.period = 1;
 	sample.cpu = ptq->cpu;
 	sample.flags = ptq->flags;
@@ -1014,6 +1025,8 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
 
 	if (pt->synth_opts.inject) {
 		ret = intel_pt_inject_event(event, &sample,
+					    is_return ?
+					    pt->returns_sample_type :
 					    pt->branches_sample_type,
 					    pt->synth_needs_swap);
 		if (ret)
@@ -1241,7 +1254,15 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
 		thread_stack__set_trace_nr(ptq->thread, state->trace_nr);
 
 	if (pt->sample_branches) {
-		err = intel_pt_synth_branch_sample(ptq);
+		err = intel_pt_synth_branch_sample(ptq, false,
+						   pt->branches_filter);
+		if (err)
+			return err;
+	}
+
+	if (pt->sample_returns) {
+		err = intel_pt_synth_branch_sample(ptq, true,
+						   pt->returns_filter);
 		if (err)
 			return err;
 	}
@@ -1956,7 +1977,33 @@ static int intel_pt_synth_events(struct intel_pt *pt,
 		}
 		pt->sample_branches = true;
 		pt->branches_sample_type = attr.sample_type;
-		pt->branches_id = id;
+		pt->branches_id = id++;
+	}
+	if (pt->synth_opts.returns) {
+		attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
+		attr.sample_period = 1;
+		attr.sample_type |= PERF_SAMPLE_ADDR;
+		attr.sample_type &= ~(u64)PERF_SAMPLE_CALLCHAIN;
+		attr.sample_type &= ~(u64)PERF_SAMPLE_BRANCH_STACK;
+		pr_debug("Synthesizing 'return' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
+			 id, (u64)attr.sample_type);
+		err = intel_pt_synth_event(session, &attr, id);
+		if (err) {
+			pr_err("%s: failed to synthesize 'return' event type\n",
+			       __func__);
+			return err;
+		}
+		pt->sample_returns = true;
+		pt->returns_sample_type = attr.sample_type;
+		pt->returns_id = id;
+		evlist__for_each(evlist, evsel) {
+			if (evsel->id && evsel->id[0] == pt->returns_id) {
+				if (evsel->name)
+					zfree(&evsel->name);
+				evsel->name = strdup("return");
+				break;
+			}
+		}
 	}
 
 	pt->synth_needs_swap = evsel->needs_swap;
@@ -2155,7 +2202,7 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
 		pt->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC |
 				       PERF_IP_FLAG_TRACE_END;
 	if (pt->synth_opts.returns)
-		pt->branches_filter |= PERF_IP_FLAG_RETURN |
+		pt->returns_filter |= PERF_IP_FLAG_RETURN |
 				       PERF_IP_FLAG_TRACE_BEGIN;
 
 	if (pt->synth_opts.callchain && !symbol_conf.use_callchain) {
-- 
2.5.5

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/2] perf script: Support callindent
  2016-06-10 22:55 [PATCH 1/2] perf pt: Mark PT return events as "return" Andi Kleen
@ 2016-06-10 22:55 ` Andi Kleen
  2016-06-16 12:41   ` Adrian Hunter
  2016-06-16 12:38 ` [PATCH 1/2] perf pt: Mark PT return events as "return" Adrian Hunter
  1 sibling, 1 reply; 7+ messages in thread
From: Andi Kleen @ 2016-06-10 22:55 UTC (permalink / raw)
  To: acme; +Cc: linux-kernel, jolsa, Andi Kleen, adrian.hunter

From: Andi Kleen <ak@linux.intel.com>

When printing PT instruction traces with perf script
it is rather useful to see some indentation for the call tree. This
patch adds a new callindent field to perf script that prints
spaces for the function call stack depth.

We already have code to track the function call stack for PT,
previously used for the data base export. We can reuse that code
directly with minor modifications.

The resulting output is not quite as nice as ftrace yet, but
a lot better than what was there before.

Note there are some corner cases when the thread stack gets code confused
and prints incorrect indentation. Even with that it is fairly useful.

When displaying kernel code traces it is recommended to run as root, as
otherwise perf doesn't understand the kernel addresses properly, and may not
reset the call stack correctly on kernel boundaries.

Example output:

$ perf record -a -e intel_pt// sleep 1
$ perf script --ns -F callindent,time,comm,pid,sym,addr,event --itrace=cr
...
         swapper     0 126327.044742970:             return:      => ffffffff810aa999 cpu_idle_loop
         swapper     0 126327.044742970:           branches:          => ffffffff81525400 cpuidle_reflect
         swapper     0 126327.044742970:             return:              => ffffffff81525400 cpuidle_reflect
         swapper     0 126327.044742970:           branches:          => ffffffff810aa9ce cpu_idle_loop
         swapper     0 126327.044742970:             return:      => ffffffff810aa9ce cpu_idle_loop
         swapper     0 126327.044742970:           branches:          => ffffffff810cb0b0 rcu_idle_exit
         swapper     0 126327.044742970:             return:              => ffffffff810cb0b0 rcu_idle_exit
         swapper     0 126327.044742970:           branches:                  => ffffffff810c99d0 rcu_eqs_exit_common.isra.43
         swapper     0 126327.044742970:             return:                      => ffffffff810c99d0 rcu_eqs_exit_common.isra.43
         swapper     0 126327.044742970:           branches:                  => ffffffff810cb124 rcu_idle_exit
         swapper     0 126327.044742970:             return:              => ffffffff810cb124 rcu_idle_exit
         swapper     0 126327.044742970:           branches:          => ffffffff810aa719 cpu_idle_loop
         swapper     0 126327.044742970:             return:      => ffffffff810aa719 cpu_idle_loop

Cc: adrian.hunter@intel.com
v2: Move get_main_thread in separate patch. Add thread__put.
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 tools/perf/builtin-script.c    | 46 ++++++++++++++++++++++++++++++++++++++++++
 tools/perf/util/thread-stack.c |  7 +++++++
 tools/perf/util/thread-stack.h |  1 +
 3 files changed, 54 insertions(+)

diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 46011235af5d..2973c097ec69 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -21,6 +21,7 @@
 #include "util/cpumap.h"
 #include "util/thread_map.h"
 #include "util/stat.h"
+#include "util/thread-stack.h"
 #include <linux/bitmap.h>
 #include <linux/stringify.h>
 #include "asm/bug.h"
@@ -63,6 +64,7 @@ enum perf_output_field {
 	PERF_OUTPUT_DATA_SRC	    = 1U << 17,
 	PERF_OUTPUT_WEIGHT	    = 1U << 18,
 	PERF_OUTPUT_BPF_OUTPUT	    = 1U << 19,
+	PERF_OUTPUT_CALLINDENT	    = 1U << 20,
 };
 
 struct output_option {
@@ -89,6 +91,7 @@ struct output_option {
 	{.str = "data_src", .field = PERF_OUTPUT_DATA_SRC},
 	{.str = "weight",   .field = PERF_OUTPUT_WEIGHT},
 	{.str = "bpf-output",   .field = PERF_OUTPUT_BPF_OUTPUT},
+	{.str = "callindent", .field = PERF_OUTPUT_CALLINDENT},
 };
 
 /* default set to maintain compatibility with current format */
@@ -261,6 +264,11 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
 					PERF_OUTPUT_WEIGHT))
 		return -EINVAL;
 
+	if (PRINT_FIELD(CALLINDENT) && !PRINT_FIELD(ADDR)) {
+		pr_err("Display of callindent requested, but no branch address\n");
+		return -EINVAL;
+	}
+
 	if (PRINT_FIELD(SYM) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR)) {
 		pr_err("Display of symbols requested but neither sample IP nor "
 			   "sample address\nis selected. Hence, no addresses to convert "
@@ -562,6 +570,41 @@ static void print_sample_addr(struct perf_sample *sample,
 	}
 }
 
+static int dummy_call_return(struct call_return *cr __maybe_unused,
+				     void *arg __maybe_unused)
+{
+	return 0;
+}
+
+static void print_sample_callindent(struct perf_sample *sample,
+				    struct perf_evsel *evsel,
+				    struct thread *thread,
+				    struct addr_location *al)
+{
+	struct perf_event_attr *attr = &evsel->attr;
+
+	if (sample_addr_correlates_sym(attr)) {
+		static struct call_return_processor *crp;
+		struct addr_location addr_al;
+		struct thread *main_thread;
+		struct comm *comm;
+
+		if (!crp)
+			crp = call_return_processor__new(dummy_call_return,
+							 NULL);
+		thread__resolve(thread, &addr_al, sample);
+		main_thread = thread__main_thread(al->machine, thread);
+		if (main_thread) {
+			comm = machine__thread_exec_comm(al->machine,
+							 main_thread);
+			thread__put(main_thread);
+		}
+		thread_stack__process(thread, comm, sample, al, &addr_al,
+							0, crp);
+	}
+	thread_stack__print_indent(thread);
+}
+
 static void print_sample_bts(struct perf_sample *sample,
 			     struct perf_evsel *evsel,
 			     struct thread *thread,
@@ -570,6 +613,9 @@ static void print_sample_bts(struct perf_sample *sample,
 	struct perf_event_attr *attr = &evsel->attr;
 	bool print_srcline_last = false;
 
+	if (PRINT_FIELD(CALLINDENT))
+		print_sample_callindent(sample, evsel, thread, al);
+
 	/* print branch_from information */
 	if (PRINT_FIELD(IP)) {
 		unsigned int print_opts = output[attr->type].print_ip_opts;
diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c
index 825086aa9a08..20e57f263c41 100644
--- a/tools/perf/util/thread-stack.c
+++ b/tools/perf/util/thread-stack.c
@@ -616,3 +616,10 @@ int thread_stack__process(struct thread *thread, struct comm *comm,
 
 	return err;
 }
+
+void thread_stack__print_indent(struct thread *thread)
+{
+	if (!thread->ts)
+		return;
+	printf("%*s", (int)thread->ts->cnt * 4, "");
+}
diff --git a/tools/perf/util/thread-stack.h b/tools/perf/util/thread-stack.h
index ad44c7944b8e..7fad96415f88 100644
--- a/tools/perf/util/thread-stack.h
+++ b/tools/perf/util/thread-stack.h
@@ -87,6 +87,7 @@ void thread_stack__sample(struct thread *thread, struct ip_callchain *chain,
 			  size_t sz, u64 ip);
 int thread_stack__flush(struct thread *thread);
 void thread_stack__free(struct thread *thread);
+void thread_stack__print_indent(struct thread *thread);
 
 struct call_return_processor *
 call_return_processor__new(int (*process)(struct call_return *cr, void *data),
-- 
2.5.5

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/2] perf pt: Mark PT return events as "return"
  2016-06-10 22:55 [PATCH 1/2] perf pt: Mark PT return events as "return" Andi Kleen
  2016-06-10 22:55 ` [PATCH 2/2] perf script: Support callindent Andi Kleen
@ 2016-06-16 12:38 ` Adrian Hunter
  1 sibling, 0 replies; 7+ messages in thread
From: Adrian Hunter @ 2016-06-16 12:38 UTC (permalink / raw)
  To: Andi Kleen, acme; +Cc: linux-kernel, jolsa, Andi Kleen

On 11/06/16 01:55, Andi Kleen wrote:
> From: Andi Kleen <ak@linux.intel.com>
> 
> With perf script --itrace=cr we can synthesize calls and returns out of
> a PT log. However both calls and returns are marked with the same event,
> called branches. This makes it difficult to read and post process,
> because calls and returns are somewhat diffferent.
> 
> Create a separate return event and mark the returns as return.

I tend to think that there should be a new itrace option for that.
Also I have an alternative.  Please let me know what you think.

	http://marc.info/?l=linux-kernel&m=146608073131297

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/2] perf script: Support callindent
  2016-06-10 22:55 ` [PATCH 2/2] perf script: Support callindent Andi Kleen
@ 2016-06-16 12:41   ` Adrian Hunter
  0 siblings, 0 replies; 7+ messages in thread
From: Adrian Hunter @ 2016-06-16 12:41 UTC (permalink / raw)
  To: Andi Kleen, acme; +Cc: linux-kernel, jolsa, Andi Kleen

On 11/06/16 01:55, Andi Kleen wrote:
> From: Andi Kleen <ak@linux.intel.com>
> 
> When printing PT instruction traces with perf script
> it is rather useful to see some indentation for the call tree. This
> patch adds a new callindent field to perf script that prints
> spaces for the function call stack depth.

I think there are advantages to having the decoder feed the thread stack so
I have tried to do that here:

	http://marc.info/?l=linux-kernel&m=146608073131297

Please let me know what you think.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/2] perf pt: Mark PT return events as "return"
  2016-05-23 12:05 ` Adrian Hunter
@ 2016-05-24  0:50   ` Andi Kleen
  0 siblings, 0 replies; 7+ messages in thread
From: Andi Kleen @ 2016-05-24  0:50 UTC (permalink / raw)
  To: Adrian Hunter; +Cc: Andi Kleen, acme, jolsa, linux-kernel

On Mon, May 23, 2016 at 03:05:08PM +0300, Adrian Hunter wrote:
> On 20/05/16 22:52, Andi Kleen wrote:
> > From: Andi Kleen <ak@linux.intel.com>
> > 
> > With perf script --itrace=cr we can synthesize calls and returns out of
> > a PT log. However both calls and returns are marked with the same event,
> > called branches. This makes it difficult to read and post process,
> > because calls and returns are somewhat diffferent.
> 
> Did you consider the sample flags?
> e.g.

True. But reporting it in the event name looks nicer, so I think the patch
is still useful.

-Andi

-- 
ak@linux.intel.com -- Speaking for myself only

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/2] perf pt: Mark PT return events as "return"
  2016-05-20 19:52 Andi Kleen
@ 2016-05-23 12:05 ` Adrian Hunter
  2016-05-24  0:50   ` Andi Kleen
  0 siblings, 1 reply; 7+ messages in thread
From: Adrian Hunter @ 2016-05-23 12:05 UTC (permalink / raw)
  To: Andi Kleen; +Cc: acme, jolsa, linux-kernel, Andi Kleen

On 20/05/16 22:52, Andi Kleen wrote:
> From: Andi Kleen <ak@linux.intel.com>
> 
> With perf script --itrace=cr we can synthesize calls and returns out of
> a PT log. However both calls and returns are marked with the same event,
> called branches. This makes it difficult to read and post process,
> because calls and returns are somewhat diffferent.

Did you consider the sample flags?
e.g.

perf script --itrace=cre -F pid,tid,comm,dso,event,ip,sym,symoff,addr,flags

...
           uname  1807/1807   branches:u:   bB                   0 [unknown] ([unknown]) =>     7fc78a81e18c brk+0xc (/lib/x86_64-linux-gnu/ld-2.19.so)
           uname  1807/1807   branches:u:   br        7fc78a81e1a5 brk+0x25 (/lib/x86_64-linux-gnu/ld-2.19.so) =>     7fc78a81d508 _dl_sysdep_start+0x148 (/lib/x86_64-linux-gnu/ld-2.19.so)
           uname  1807/1807   branches:u:   bc        7fc78a81d630 _dl_sysdep_start+0x270 (/lib/x86_64-linux-gnu/ld-2.19.so) =>     7fc78a81fca0 strlen+0x0 (/lib/x86_64-linux-gnu/ld-2.19.so)
           uname  1807/1807   branches:u:   br        7fc78a81fd88 strlen+0xe8 (/lib/x86_64-linux-gnu/ld-2.19.so) =>     7fc78a81d635 _dl_sysdep_start+0x275 (/lib/x86_64-linux-gnu/ld-2.19.so)
           uname  1807/1807   branches:u:   bc        7fc78a81d52a _dl_sysdep_start+0x16a (/lib/x86_64-linux-gnu/ld-2.19.so) =>     7fc78a81e1e0 sbrk+0x0 (/lib/x86_64-linux-gnu/ld-2.19.so)
           uname  1807/1807   branches:u:   br        7fc78a81e222 sbrk+0x42 (/lib/x86_64-linux-gnu/ld-2.19.so) =>     7fc78a81d52f _dl_sysdep_start+0x16f (/lib/x86_64-linux-gnu/ld-2.19.so)
           uname  1807/1807   branches:u:   bc        7fc78a81d563 _dl_sysdep_start+0x1a3 (/lib/x86_64-linux-gnu/ld-2.19.so) =>     7fc78a807910 dl_main+0x0 (/lib/x86_64-linux-gnu/ld-2.19.so)
           uname  1807/1807   branches:u:   bE        7fc78a807929 dl_main+0x19 (/lib/x86_64-linux-gnu/ld-2.19.so) =>                0 [unknown] ([unknown])
...

You can see 'c' and 'r' flags for calls and returns.

> 
> Create a separate return event and mark the returns as return.
> 
> Cc: adrian.hunter@intel.com
> Signed-off-by: Andi Kleen <ak@linux.intel.com>
> ---
>  tools/perf/util/intel-pt.c | 53 +++++++++++++++++++++++++++++++++++++++++-----
>  1 file changed, 48 insertions(+), 5 deletions(-)
> 
> diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
> index ddec87f6e616..25f839e765ef 100644
> --- a/tools/perf/util/intel-pt.c
> +++ b/tools/perf/util/intel-pt.c
> @@ -82,9 +82,12 @@ struct intel_pt {
>  	u64 instructions_id;
>  
>  	bool sample_branches;
> +	bool sample_returns;
>  	u32 branches_filter;
>  	u64 branches_sample_type;
> +	u64 returns_sample_type;
>  	u64 branches_id;
> +	u64 returns_id;
>  
>  	bool sample_transactions;
>  	u64 transactions_sample_type;
> @@ -960,7 +963,8 @@ static int intel_pt_inject_event(union perf_event *event,
>  	return perf_event__synthesize_sample(event, type, 0, sample, swapped);
>  }
>  
> -static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
> +static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq,
> +					bool is_return)
>  {
>  	int ret;
>  	struct intel_pt *pt = ptq->pt;
> @@ -990,8 +994,13 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
>  	sample.pid = ptq->pid;
>  	sample.tid = ptq->tid;
>  	sample.addr = ptq->state->to_ip;
> -	sample.id = ptq->pt->branches_id;
> -	sample.stream_id = ptq->pt->branches_id;
> +	if (is_return) {
> +		sample.id = ptq->pt->returns_id;
> +		sample.stream_id = ptq->pt->returns_id;
> +	} else {
> +		sample.id = ptq->pt->branches_id;
> +		sample.stream_id = ptq->pt->branches_id;
> +	}
>  	sample.period = 1;
>  	sample.cpu = ptq->cpu;
>  	sample.flags = ptq->flags;
> @@ -1014,6 +1023,8 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
>  
>  	if (pt->synth_opts.inject) {
>  		ret = intel_pt_inject_event(event, &sample,
> +					    is_return ?
> +					    pt->returns_sample_type :
>  					    pt->branches_sample_type,
>  					    pt->synth_needs_swap);
>  		if (ret)
> @@ -1241,7 +1252,13 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
>  		thread_stack__set_trace_nr(ptq->thread, state->trace_nr);
>  
>  	if (pt->sample_branches) {
> -		err = intel_pt_synth_branch_sample(ptq);
> +		err = intel_pt_synth_branch_sample(ptq, false);
> +		if (err)
> +			return err;
> +	}
> +
> +	if (pt->sample_returns) {
> +		err = intel_pt_synth_branch_sample(ptq, true);
>  		if (err)
>  			return err;
>  	}
> @@ -1956,7 +1973,33 @@ static int intel_pt_synth_events(struct intel_pt *pt,
>  		}
>  		pt->sample_branches = true;
>  		pt->branches_sample_type = attr.sample_type;
> -		pt->branches_id = id;
> +		pt->branches_id = id++;
> +	}
> +	if (pt->synth_opts.returns) {
> +		attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
> +		attr.sample_period = 1;
> +		attr.sample_type |= PERF_SAMPLE_ADDR;
> +		attr.sample_type &= ~(u64)PERF_SAMPLE_CALLCHAIN;
> +		attr.sample_type &= ~(u64)PERF_SAMPLE_BRANCH_STACK;
> +		pr_debug("Synthesizing 'return' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
> +			 id, (u64)attr.sample_type);
> +		err = intel_pt_synth_event(session, &attr, id);
> +		if (err) {
> +			pr_err("%s: failed to synthesize 'return' event type\n",
> +			       __func__);
> +			return err;
> +		}
> +		pt->sample_returns = true;
> +		pt->returns_sample_type = attr.sample_type;
> +		pt->returns_id = id;
> +		evlist__for_each(evlist, evsel) {
> +			if (evsel->id && evsel->id[0] == pt->returns_id) {
> +				if (evsel->name)
> +					zfree(&evsel->name);
> +				evsel->name = strdup("return");
> +				break;
> +			}
> +		}
>  	}
>  
>  	pt->synth_needs_swap = evsel->needs_swap;
> 

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 1/2] perf pt: Mark PT return events as "return"
@ 2016-05-20 19:52 Andi Kleen
  2016-05-23 12:05 ` Adrian Hunter
  0 siblings, 1 reply; 7+ messages in thread
From: Andi Kleen @ 2016-05-20 19:52 UTC (permalink / raw)
  To: acme; +Cc: jolsa, linux-kernel, Andi Kleen, adrian.hunter

From: Andi Kleen <ak@linux.intel.com>

With perf script --itrace=cr we can synthesize calls and returns out of
a PT log. However both calls and returns are marked with the same event,
called branches. This makes it difficult to read and post process,
because calls and returns are somewhat diffferent.

Create a separate return event and mark the returns as return.

Cc: adrian.hunter@intel.com
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 tools/perf/util/intel-pt.c | 53 +++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 48 insertions(+), 5 deletions(-)

diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index ddec87f6e616..25f839e765ef 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -82,9 +82,12 @@ struct intel_pt {
 	u64 instructions_id;
 
 	bool sample_branches;
+	bool sample_returns;
 	u32 branches_filter;
 	u64 branches_sample_type;
+	u64 returns_sample_type;
 	u64 branches_id;
+	u64 returns_id;
 
 	bool sample_transactions;
 	u64 transactions_sample_type;
@@ -960,7 +963,8 @@ static int intel_pt_inject_event(union perf_event *event,
 	return perf_event__synthesize_sample(event, type, 0, sample, swapped);
 }
 
-static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
+static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq,
+					bool is_return)
 {
 	int ret;
 	struct intel_pt *pt = ptq->pt;
@@ -990,8 +994,13 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
 	sample.pid = ptq->pid;
 	sample.tid = ptq->tid;
 	sample.addr = ptq->state->to_ip;
-	sample.id = ptq->pt->branches_id;
-	sample.stream_id = ptq->pt->branches_id;
+	if (is_return) {
+		sample.id = ptq->pt->returns_id;
+		sample.stream_id = ptq->pt->returns_id;
+	} else {
+		sample.id = ptq->pt->branches_id;
+		sample.stream_id = ptq->pt->branches_id;
+	}
 	sample.period = 1;
 	sample.cpu = ptq->cpu;
 	sample.flags = ptq->flags;
@@ -1014,6 +1023,8 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
 
 	if (pt->synth_opts.inject) {
 		ret = intel_pt_inject_event(event, &sample,
+					    is_return ?
+					    pt->returns_sample_type :
 					    pt->branches_sample_type,
 					    pt->synth_needs_swap);
 		if (ret)
@@ -1241,7 +1252,13 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
 		thread_stack__set_trace_nr(ptq->thread, state->trace_nr);
 
 	if (pt->sample_branches) {
-		err = intel_pt_synth_branch_sample(ptq);
+		err = intel_pt_synth_branch_sample(ptq, false);
+		if (err)
+			return err;
+	}
+
+	if (pt->sample_returns) {
+		err = intel_pt_synth_branch_sample(ptq, true);
 		if (err)
 			return err;
 	}
@@ -1956,7 +1973,33 @@ static int intel_pt_synth_events(struct intel_pt *pt,
 		}
 		pt->sample_branches = true;
 		pt->branches_sample_type = attr.sample_type;
-		pt->branches_id = id;
+		pt->branches_id = id++;
+	}
+	if (pt->synth_opts.returns) {
+		attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
+		attr.sample_period = 1;
+		attr.sample_type |= PERF_SAMPLE_ADDR;
+		attr.sample_type &= ~(u64)PERF_SAMPLE_CALLCHAIN;
+		attr.sample_type &= ~(u64)PERF_SAMPLE_BRANCH_STACK;
+		pr_debug("Synthesizing 'return' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
+			 id, (u64)attr.sample_type);
+		err = intel_pt_synth_event(session, &attr, id);
+		if (err) {
+			pr_err("%s: failed to synthesize 'return' event type\n",
+			       __func__);
+			return err;
+		}
+		pt->sample_returns = true;
+		pt->returns_sample_type = attr.sample_type;
+		pt->returns_id = id;
+		evlist__for_each(evlist, evsel) {
+			if (evsel->id && evsel->id[0] == pt->returns_id) {
+				if (evsel->name)
+					zfree(&evsel->name);
+				evsel->name = strdup("return");
+				break;
+			}
+		}
 	}
 
 	pt->synth_needs_swap = evsel->needs_swap;
-- 
2.5.5

^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2016-06-16 12:45 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-06-10 22:55 [PATCH 1/2] perf pt: Mark PT return events as "return" Andi Kleen
2016-06-10 22:55 ` [PATCH 2/2] perf script: Support callindent Andi Kleen
2016-06-16 12:41   ` Adrian Hunter
2016-06-16 12:38 ` [PATCH 1/2] perf pt: Mark PT return events as "return" Adrian Hunter
  -- strict thread matches above, loose matches on Subject: below --
2016-05-20 19:52 Andi Kleen
2016-05-23 12:05 ` Adrian Hunter
2016-05-24  0:50   ` Andi Kleen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).