All of lore.kernel.org
 help / color / mirror / Atom feed
From: Adrian Hunter <adrian.hunter@intel.com>
To: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>, Andi Kleen <ak@linux.intel.com>,
	linux-kernel@vger.kernel.org
Subject: [PATCH 7/9] perf intel-pt: Add support for synthesizing branch stacks for regular events
Date: Wed, 29 Apr 2020 18:07:49 +0300	[thread overview]
Message-ID: <20200429150751.12570-8-adrian.hunter@intel.com> (raw)
In-Reply-To: <20200429150751.12570-1-adrian.hunter@intel.com>

Use the new thread_stack__br_sample_late() function to create a thread
stack for regular events.

Example:

 # perf record --kcore --aux-sample -e '{intel_pt//,cycles:ppp}' -c 10000 uname
 Linux
 [ perf record: Woken up 2 times to write data ]
 [ perf record: Captured and wrote 0.743 MB perf.data ]
 # perf report --itrace=Le --stdio | head -30 | tail -18

 # Samples: 11K of event 'cycles:ppp'
 # Event count (approx.): 11648
 #
 # Overhead  Command  Source Shared Object  Source Symbol                                   Target Symbol                                   Basic Block Cycles
 # ........  .......  ....................  ..............................................  ..............................................  ..................
 #
      5.49%  uname    libc-2.30.so          [.] _dl_addr                                    [.] _dl_addr                                    -
      2.41%  uname    ld-2.30.so            [.] _dl_relocate_object                         [.] _dl_relocate_object                         -
      2.31%  uname    ld-2.30.so            [.] do_lookup_x                                 [.] do_lookup_x                                 -
      2.17%  uname    [kernel.kallsyms]     [k] unmap_page_range                            [k] unmap_page_range                            -
      2.05%  uname    ld-2.30.so            [k] _dl_start                                   [k] _dl_start                                   -
      1.97%  uname    ld-2.30.so            [.] _dl_lookup_symbol_x                         [.] _dl_lookup_symbol_x                         -
      1.94%  uname    [kernel.kallsyms]     [k] filemap_map_pages                           [k] filemap_map_pages                           -
      1.60%  uname    [kernel.kallsyms]     [k] __handle_mm_fault                           [k] __handle_mm_fault                           -
      1.44%  uname    [kernel.kallsyms]     [k] page_add_file_rmap                          [k] page_add_file_rmap                          -
      1.12%  uname    [kernel.kallsyms]     [k] vma_interval_tree_insert                    [k] vma_interval_tree_insert                    -
      0.94%  uname    [kernel.kallsyms]     [k] perf_iterate_ctx                            [k] perf_iterate_ctx                            -

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
---
 tools/perf/util/intel-pt.c | 73 ++++++++++++++++++++++++++++++++++----
 1 file changed, 66 insertions(+), 7 deletions(-)

diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 248a39fd4d0e..7fb807b91f73 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -71,6 +71,7 @@ struct intel_pt {
 	bool use_thread_stack;
 	bool callstack;
 	unsigned int br_stack_sz;
+	unsigned int br_stack_sz_plus;
 	int have_sched_switch;
 	u32 pmu_type;
 	u64 kernel_start;
@@ -129,6 +130,7 @@ struct intel_pt {
 	unsigned int range_cnt;
 
 	struct ip_callchain *chain;
+	struct branch_stack *br_stack;
 };
 
 enum switch_state {
@@ -910,6 +912,44 @@ static void intel_pt_add_callchain(struct intel_pt *pt,
 	sample->callchain = pt->chain;
 }
 
+static struct branch_stack *intel_pt_alloc_br_stack(struct intel_pt *pt)
+{
+	size_t sz = sizeof(struct branch_stack);
+
+	sz += pt->br_stack_sz * sizeof(struct branch_entry);
+	return zalloc(sz);
+}
+
+static int intel_pt_br_stack_init(struct intel_pt *pt)
+{
+	struct evsel *evsel;
+
+	evlist__for_each_entry(pt->session->evlist, evsel) {
+		if (!(evsel->core.attr.sample_type & PERF_SAMPLE_BRANCH_STACK))
+			evsel->synth_sample_type |= PERF_SAMPLE_BRANCH_STACK;
+	}
+
+	pt->br_stack = intel_pt_alloc_br_stack(pt);
+	if (!pt->br_stack)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void intel_pt_add_br_stack(struct intel_pt *pt,
+				  struct perf_sample *sample)
+{
+	struct thread *thread = machine__findnew_thread(pt->machine,
+							sample->pid,
+							sample->tid);
+
+	thread_stack__br_sample_late(thread, sample->cpu, pt->br_stack,
+				     pt->br_stack_sz, sample->ip,
+				     pt->kernel_start);
+
+	sample->branch_stack = pt->br_stack;
+}
+
 static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
 						   unsigned int queue_nr)
 {
@@ -928,10 +968,7 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
 	}
 
 	if (pt->synth_opts.last_branch) {
-		size_t sz = sizeof(struct branch_stack);
-
-		sz += pt->br_stack_sz * sizeof(struct branch_entry);
-		ptq->last_branch = zalloc(sz);
+		ptq->last_branch = intel_pt_alloc_br_stack(pt);
 		if (!ptq->last_branch)
 			goto out_free;
 	}
@@ -1962,7 +1999,7 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
 		thread_stack__event(ptq->thread, ptq->cpu, ptq->flags,
 				    state->from_ip, state->to_ip, ptq->insn_len,
 				    state->trace_nr, pt->callstack,
-				    pt->br_stack_sz,
+				    pt->br_stack_sz_plus,
 				    pt->mispred_all);
 	} else {
 		thread_stack__set_trace_nr(ptq->thread, ptq->cpu, state->trace_nr);
@@ -2608,6 +2645,8 @@ static int intel_pt_process_event(struct perf_session *session,
 	if (event->header.type == PERF_RECORD_SAMPLE) {
 		if (pt->synth_opts.add_callchain && !sample->callchain)
 			intel_pt_add_callchain(pt, sample);
+		if (pt->synth_opts.add_last_branch && !sample->branch_stack)
+			intel_pt_add_br_stack(pt, sample);
 	}
 
 	if (event->header.type == PERF_RECORD_AUX &&
@@ -3369,13 +3408,33 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
 			goto err_delete_thread;
 	}
 
-	if (pt->synth_opts.last_branch)
+	if (pt->synth_opts.last_branch || pt->synth_opts.add_last_branch) {
 		pt->br_stack_sz = pt->synth_opts.last_branch_sz;
+		pt->br_stack_sz_plus = pt->br_stack_sz;
+	}
+
+	if (pt->synth_opts.add_last_branch) {
+		err = intel_pt_br_stack_init(pt);
+		if (err)
+			goto err_delete_thread;
+		/*
+		 * Additional branch stack size to cater for tracing from the
+		 * actual sample ip to where the sample time is recorded.
+		 * Measured at about 200 branches, but generously set to 1024.
+		 * If kernel space is not being traced, then add just 1 for the
+		 * branch to kernel space.
+		 */
+		if (intel_pt_tracing_kernel(pt))
+			pt->br_stack_sz_plus += 1024;
+		else
+			pt->br_stack_sz_plus += 1;
+	}
 
 	pt->use_thread_stack = pt->synth_opts.callchain ||
 			       pt->synth_opts.add_callchain ||
 			       pt->synth_opts.thread_stack ||
-			       pt->synth_opts.last_branch;
+			       pt->synth_opts.last_branch ||
+			       pt->synth_opts.add_last_branch;
 
 	pt->callstack = pt->synth_opts.callchain ||
 			pt->synth_opts.add_callchain ||
-- 
2.17.1


  parent reply	other threads:[~2020-04-29 15:07 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-04-29 15:07 [PATCH 0/9] perf intel-pt: Add support for synthesizing branch stacks for regular events Adrian Hunter
2020-04-29 15:07 ` [PATCH 1/9] perf thread-stack: Add branch stack support Adrian Hunter
2020-05-08 13:04   ` [tip: perf/core] " tip-bot2 for Adrian Hunter
2020-04-29 15:07 ` [PATCH 2/9] perf intel-pt: Consolidate thread-stack use condition Adrian Hunter
2020-05-08 13:04   ` [tip: perf/core] " tip-bot2 for Adrian Hunter
2020-04-29 15:07 ` [PATCH 3/9] perf intel-pt: Change branch stack support to use thread-stacks Adrian Hunter
2020-05-08 13:04   ` [tip: perf/core] " tip-bot2 for Adrian Hunter
2020-04-29 15:07 ` [PATCH 4/9] perf auxtrace: Add option to synthesize branch stack for regular events Adrian Hunter
2020-05-08 13:04   ` [tip: perf/core] " tip-bot2 for Adrian Hunter
2020-04-29 15:07 ` [PATCH 5/9] perf evsel: Add support for synthesized branch stack sample type Adrian Hunter
2020-05-08 13:04   ` [tip: perf/core] " tip-bot2 for Adrian Hunter
2020-04-29 15:07 ` [PATCH 6/9] perf thread-stack: Add thread_stack__br_sample_late() Adrian Hunter
2020-05-08 13:04   ` [tip: perf/core] " tip-bot2 for Adrian Hunter
2020-04-29 15:07 ` Adrian Hunter [this message]
2020-05-08 13:04   ` [tip: perf/core] perf intel-pt: Add support for synthesizing branch stacks for regular events tip-bot2 for Adrian Hunter
2020-04-29 15:07 ` [PATCH 8/9] perf intel-pt: Update documentation about itrace G and L options Adrian Hunter
2020-04-29 23:03   ` Andi Kleen
2020-04-30  5:36     ` Adrian Hunter
2020-04-30 12:28       ` Andi Kleen
2020-05-08 13:04   ` [tip: perf/core] " tip-bot2 for Adrian Hunter
2020-04-29 15:07 ` [PATCH 9/9] perf intel-pt: Update documentation about using /proc/kcore Adrian Hunter
2020-05-08 13:04   ` [tip: perf/core] " tip-bot2 for Adrian Hunter
2020-04-29 18:29 ` [PATCH 0/9] perf intel-pt: Add support for synthesizing branch stacks for regular events Arnaldo Carvalho de Melo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200429150751.12570-8-adrian.hunter@intel.com \
    --to=adrian.hunter@intel.com \
    --cc=acme@kernel.org \
    --cc=ak@linux.intel.com \
    --cc=jolsa@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.