All of lore.kernel.org
 help / color / mirror / Atom feed
From: Arnaldo Carvalho de Melo <acme@kernel.org>
To: Ingo Molnar <mingo@kernel.org>, Thomas Gleixner <tglx@linutronix.de>
Cc: Jiri Olsa <jolsa@kernel.org>, Namhyung Kim <namhyung@kernel.org>,
	Clark Williams <williams@redhat.com>,
	linux-kernel@vger.kernel.org, linux-perf-users@vger.kernel.org,
	Kan Liang <kan.liang@linux.intel.com>,
	Andi Kleen <ak@linux.intel.com>, Jiri Olsa <jolsa@redhat.com>,
	Adrian Hunter <adrian.hunter@intel.com>,
	Alexey Budankov <alexey.budankov@linux.intel.com>,
	Mathieu Poirier <mathieu.poirier@linaro.org>,
	Michael Ellerman <mpe@ellerman.id.au>,
	Pavel Gerasimov <pavel.gerasimov@intel.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Ravi Bangoria <ravi.bangoria@linux.ibm.com>,
	Stephane Eranian <eranian@google.com>,
	Vitaly Slobodskoy <vitaly.slobodskoy@intel.com>,
	Arnaldo Carvalho de Melo <acme@redhat.com>
Subject: [PATCH 54/60] perf callchain: Save previous cursor nodes for LBR stitching approach
Date: Mon, 20 Apr 2020 08:53:10 -0300	[thread overview]
Message-ID: <20200420115316.18781-55-acme@kernel.org> (raw)
In-Reply-To: <20200420115316.18781-1-acme@kernel.org>

From: Kan Liang <kan.liang@linux.intel.com>

The cursor nodes which generates from sample are eventually added into
callchain. To avoid generating cursor nodes from previous samples again,
the previous cursor nodes are also saved for LBR stitching approach.

Some option, e.g. hide-unresolved, may hide some LBRs.  Add a variable
'valid' in struct callchain_cursor_node to indicate this case. The LBR
stitching approach will only append the valid cursor nodes from previous
samples later.

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexey Budankov <alexey.budankov@linux.intel.com>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Pavel Gerasimov <pavel.gerasimov@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Vitaly Slobodskoy <vitaly.slobodskoy@intel.com>
Link: http://lore.kernel.org/lkml/20200319202517.23423-12-kan.liang@linux.intel.com
[ Use zfree() instead of open coded equivalent, and use it when freeing members of structs ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/callchain.h |  3 ++
 tools/perf/util/machine.c   | 76 +++++++++++++++++++++++++++++++++++--
 tools/perf/util/thread.h    |  8 ++++
 3 files changed, 83 insertions(+), 4 deletions(-)

diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 706bb7bbe1e1..cb33cd42ff43 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -143,6 +143,9 @@ struct callchain_cursor_node {
 	u64				ip;
 	struct map_symbol		ms;
 	const char			*srcline;
+	/* Indicate valid cursor node for LBR stitch */
+	bool				valid;
+
 	bool				branch;
 	struct branch_flags		branch_flags;
 	u64				branch_from;
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index a54ca09a1d00..737dee723a57 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -2224,6 +2224,31 @@ static int lbr_callchain_add_kernel_ip(struct thread *thread,
 	return 0;
 }
 
+static void save_lbr_cursor_node(struct thread *thread,
+				 struct callchain_cursor *cursor,
+				 int idx)
+{
+	struct lbr_stitch *lbr_stitch = thread->lbr_stitch;
+
+	if (!lbr_stitch)
+		return;
+
+	if (cursor->pos == cursor->nr) {
+		lbr_stitch->prev_lbr_cursor[idx].valid = false;
+		return;
+	}
+
+	if (!cursor->curr)
+		cursor->curr = cursor->first;
+	else
+		cursor->curr = cursor->curr->next;
+	memcpy(&lbr_stitch->prev_lbr_cursor[idx], cursor->curr,
+	       sizeof(struct callchain_cursor_node));
+
+	lbr_stitch->prev_lbr_cursor[idx].valid = true;
+	cursor->pos++;
+}
+
 static int lbr_callchain_add_lbr_ip(struct thread *thread,
 				    struct callchain_cursor *cursor,
 				    struct perf_sample *sample,
@@ -2240,6 +2265,21 @@ static int lbr_callchain_add_lbr_ip(struct thread *thread,
 	int err, i;
 	u64 ip;
 
+	/*
+	 * The curr and pos are not used in writing session. They are cleared
+	 * in callchain_cursor_commit() when the writing session is closed.
+	 * Using curr and pos to track the current cursor node.
+	 */
+	if (thread->lbr_stitch) {
+		cursor->curr = NULL;
+		cursor->pos = cursor->nr;
+		if (cursor->nr) {
+			cursor->curr = cursor->first;
+			for (i = 0; i < (int)(cursor->nr - 1); i++)
+				cursor->curr = cursor->curr->next;
+		}
+	}
+
 	if (callee) {
 		/* Add LBR ip from first entries.to */
 		ip = entries[0].to;
@@ -2252,6 +2292,20 @@ static int lbr_callchain_add_lbr_ip(struct thread *thread,
 		if (err)
 			return err;
 
+		/*
+		 * The number of cursor node increases.
+		 * Move the current cursor node.
+		 * But does not need to save current cursor node for entry 0.
+		 * It's impossible to stitch the whole LBRs of previous sample.
+		 */
+		if (thread->lbr_stitch && (cursor->pos != cursor->nr)) {
+			if (!cursor->curr)
+				cursor->curr = cursor->first;
+			else
+				cursor->curr = cursor->curr->next;
+			cursor->pos++;
+		}
+
 		/* Add LBR ip from entries.from one by one. */
 		for (i = 0; i < lbr_nr; i++) {
 			ip = entries[i].from;
@@ -2262,6 +2316,7 @@ static int lbr_callchain_add_lbr_ip(struct thread *thread,
 					       *branch_from);
 			if (err)
 				return err;
+			save_lbr_cursor_node(thread, cursor, i);
 		}
 		return 0;
 	}
@@ -2276,6 +2331,7 @@ static int lbr_callchain_add_lbr_ip(struct thread *thread,
 				       *branch_from);
 		if (err)
 			return err;
+		save_lbr_cursor_node(thread, cursor, i);
 	}
 
 	/* Add LBR ip from first entries.to */
@@ -2292,7 +2348,7 @@ static int lbr_callchain_add_lbr_ip(struct thread *thread,
 	return 0;
 }
 
-static bool alloc_lbr_stitch(struct thread *thread)
+static bool alloc_lbr_stitch(struct thread *thread, unsigned int max_lbr)
 {
 	if (thread->lbr_stitch)
 		return true;
@@ -2301,6 +2357,14 @@ static bool alloc_lbr_stitch(struct thread *thread)
 	if (!thread->lbr_stitch)
 		goto err;
 
+	thread->lbr_stitch->prev_lbr_cursor = calloc(max_lbr + 1, sizeof(struct callchain_cursor_node));
+	if (!thread->lbr_stitch->prev_lbr_cursor)
+		goto free_lbr_stitch;
+
+	return true;
+
+free_lbr_stitch:
+	zfree(&thread->lbr_stitch);
 err:
 	pr_warning("Failed to allocate space for stitched LBRs. Disable LBR stitch\n");
 	thread->lbr_stitch_enable = false;
@@ -2319,7 +2383,8 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
 					struct perf_sample *sample,
 					struct symbol **parent,
 					struct addr_location *root_al,
-					int max_stack)
+					int max_stack,
+					unsigned int max_lbr)
 {
 	struct ip_callchain *chain = sample->callchain;
 	int chain_nr = min(max_stack, (int)chain->nr), i;
@@ -2337,7 +2402,7 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
 		return 0;
 
 	if (thread->lbr_stitch_enable && !sample->no_hw_idx &&
-	    alloc_lbr_stitch(thread)) {
+	    (max_lbr > 0) && alloc_lbr_stitch(thread, max_lbr)) {
 		lbr_stitch = thread->lbr_stitch;
 
 		memcpy(&lbr_stitch->prev_sample, sample, sizeof(*sample));
@@ -2417,8 +2482,11 @@ static int thread__resolve_callchain_sample(struct thread *thread,
 		chain_nr = chain->nr;
 
 	if (perf_evsel__has_branch_callstack(evsel)) {
+		struct perf_env *env = perf_evsel__env(evsel);
+
 		err = resolve_lbr_callchain_sample(thread, cursor, sample, parent,
-						   root_al, max_stack);
+						   root_al, max_stack,
+						   !env ? 0 : env->max_branches);
 		if (err)
 			return (err < 0) ? err : 0;
 	}
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index 34eb61cee6a4..8456174a52c5 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -15,6 +15,7 @@
 #include <intlist.h>
 #include "rwsem.h"
 #include "event.h"
+#include "callchain.h"
 
 struct addr_location;
 struct map;
@@ -24,6 +25,7 @@ struct unwind_libunwind_ops;
 
 struct lbr_stitch {
 	struct perf_sample		prev_sample;
+	struct callchain_cursor_node	*prev_lbr_cursor;
 };
 
 struct thread {
@@ -154,6 +156,12 @@ static inline bool thread__is_filtered(struct thread *thread)
 
 static inline void thread__free_stitch_list(struct thread *thread)
 {
+	struct lbr_stitch *lbr_stitch = thread->lbr_stitch;
+
+	if (!lbr_stitch)
+		return;
+
+	zfree(&lbr_stitch->prev_lbr_cursor);
 	zfree(&thread->lbr_stitch);
 }
 
-- 
2.21.1


  parent reply	other threads:[~2020-04-20 11:57 UTC|newest]

Thread overview: 93+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-04-20 11:52 [GIT PULL] perf/core improvements and fixes Arnaldo Carvalho de Melo
2020-04-20 11:52 ` [PATCH 01/60] perf stat: Honour --timeout for forked workloads Arnaldo Carvalho de Melo
2020-04-20 11:52 ` [PATCH 02/60] perf tools: Synthesize bpf_trampoline/dispatcher ksymbol event Arnaldo Carvalho de Melo
2020-04-20 11:52 ` [PATCH 03/60] perf machine: Set ksymbol dso as loaded on arrival Arnaldo Carvalho de Melo
2020-04-20 11:52 ` [PATCH 04/60] perf annotate: Add basic support for bpf_image Arnaldo Carvalho de Melo
2020-04-20 11:52 ` [PATCH 05/60] capabilities: Introduce CAP_PERFMON to kernel and user space Arnaldo Carvalho de Melo
2020-04-20 11:52   ` [Intel-gfx] " Arnaldo Carvalho de Melo
2020-04-20 11:52   ` Arnaldo Carvalho de Melo
2020-04-20 11:52 ` [PATCH 06/60] perf/core: Open access to the core for CAP_PERFMON privileged process Arnaldo Carvalho de Melo
2020-04-20 11:52   ` [Intel-gfx] " Arnaldo Carvalho de Melo
2020-04-20 11:52 ` [PATCH 07/60] perf/core: open access to probes " Arnaldo Carvalho de Melo
2020-04-20 11:52   ` [Intel-gfx] " Arnaldo Carvalho de Melo
2020-04-20 11:52 ` [PATCH 08/60] perf tools: Support CAP_PERFMON capability Arnaldo Carvalho de Melo
2020-04-20 11:52   ` [Intel-gfx] " Arnaldo Carvalho de Melo
2020-04-20 11:52 ` [PATCH 09/60] drm/i915/perf: Open access for CAP_PERFMON privileged process Arnaldo Carvalho de Melo
2020-04-20 11:52   ` [Intel-gfx] " Arnaldo Carvalho de Melo
2020-04-20 11:52   ` Arnaldo Carvalho de Melo
2020-04-20 11:52 ` [PATCH 10/60] trace/bpf_trace: " Arnaldo Carvalho de Melo
2020-04-20 11:52   ` [Intel-gfx] " Arnaldo Carvalho de Melo
2020-04-20 11:52 ` [PATCH 11/60] powerpc/perf: open " Arnaldo Carvalho de Melo
2020-04-20 11:52   ` [Intel-gfx] " Arnaldo Carvalho de Melo
2020-04-20 11:52   ` Arnaldo Carvalho de Melo
2020-04-20 11:52 ` [PATCH 12/60] parisc/perf: " Arnaldo Carvalho de Melo
2020-04-20 11:52   ` [Intel-gfx] " Arnaldo Carvalho de Melo
2020-04-20 11:52 ` [PATCH 13/60] drivers/perf: Open " Arnaldo Carvalho de Melo
2020-04-20 11:52   ` [Intel-gfx] " Arnaldo Carvalho de Melo
2020-04-20 11:52   ` Arnaldo Carvalho de Melo
2020-04-20 11:52 ` [PATCH 14/60] drivers/oprofile: " Arnaldo Carvalho de Melo
2020-04-20 11:52   ` [Intel-gfx] " Arnaldo Carvalho de Melo
2020-04-20 11:52 ` [PATCH 15/60] doc/admin-guide: Update perf-security.rst with CAP_PERFMON information Arnaldo Carvalho de Melo
2020-04-20 11:52   ` [Intel-gfx] " Arnaldo Carvalho de Melo
2020-04-20 11:52 ` [PATCH 16/60] doc/admin-guide: update kernel.rst " Arnaldo Carvalho de Melo
2020-04-20 11:52   ` [Intel-gfx] " Arnaldo Carvalho de Melo
2020-04-20 11:52 ` [PATCH 17/60] perf script: Simplify auxiliary event printing functions Arnaldo Carvalho de Melo
2020-04-20 11:52 ` [PATCH 18/60] perf bench: Add event synthesis benchmark Arnaldo Carvalho de Melo
2020-04-20 11:52 ` [PATCH 19/60] tools api fs: Make xxx__mountpoint() more scalable Arnaldo Carvalho de Melo
2020-04-20 11:52 ` [PATCH 20/60] perf synthetic-events: save 4kb from 2 stack frames Arnaldo Carvalho de Melo
2020-04-20 11:52 ` [PATCH 21/60] perf expr: Add expr_ prefix for parse_ctx and parse_id Arnaldo Carvalho de Melo
2020-04-20 11:52   ` Arnaldo Carvalho de Melo
2020-04-20 11:52   ` Arnaldo Carvalho de Melo
2020-04-20 11:52 ` [PATCH 22/60] perf expr: Add expr_scanner_ctx object Arnaldo Carvalho de Melo
2020-04-20 11:52   ` Arnaldo Carvalho de Melo
2020-04-20 11:52   ` Arnaldo Carvalho de Melo
2020-04-20 11:52 ` [PATCH 23/60] perf metrictroup: Split the metricgroup__add_metric function Arnaldo Carvalho de Melo
2020-04-20 11:52   ` Arnaldo Carvalho de Melo
2020-04-20 11:52   ` Arnaldo Carvalho de Melo
2020-04-20 11:52 ` [PATCH 24/60] perf script: Add flamegraph.py script Arnaldo Carvalho de Melo
2020-04-20 11:52 ` [PATCH 25/60] perf auxtrace: Add ->evsel_is_auxtrace() callback Arnaldo Carvalho de Melo
2020-04-20 11:52 ` [PATCH 26/60] perf intel-pt: Implement " Arnaldo Carvalho de Melo
2020-04-20 11:52 ` [PATCH 27/60] perf intel-bts: " Arnaldo Carvalho de Melo
2020-04-20 11:52 ` [PATCH 28/60] perf arm-spe: " Arnaldo Carvalho de Melo
2020-04-20 11:52 ` [PATCH 29/60] perf cs-etm: " Arnaldo Carvalho de Melo
2020-04-20 11:52 ` [PATCH 30/60] perf s390-cpumsf: " Arnaldo Carvalho de Melo
2020-04-20 11:52 ` [PATCH 31/60] perf auxtrace: For reporting purposes, un-group AUX area event Arnaldo Carvalho de Melo
2020-04-20 11:52 ` [PATCH 32/60] perf auxtrace: Add an option to synthesize callchains for regular events Arnaldo Carvalho de Melo
2020-04-20 11:52 ` [PATCH 33/60] perf thread-stack: Add thread_stack__sample_late() Arnaldo Carvalho de Melo
2020-04-20 11:52 ` [PATCH 34/60] perf evsel: Be consistent when looking which evsel PERF_SAMPLE_ bits are set Arnaldo Carvalho de Melo
2020-04-20 11:52 ` [PATCH 35/60] perf evsel: Add support for synthesized sample type Arnaldo Carvalho de Melo
2020-04-20 11:52 ` [PATCH 36/60] perf intel-pt: Add support for synthesizing callchains for regular events Arnaldo Carvalho de Melo
2020-04-20 11:52 ` [PATCH 37/60] perf evsel: Move and globalize perf_evsel__find_pmu() and perf_evsel__is_aux_event() Arnaldo Carvalho de Melo
2020-04-20 11:52 ` [PATCH 38/60] perf evlist: Move leader-sampling configuration Arnaldo Carvalho de Melo
2020-04-20 11:52 ` [PATCH 39/60] perf evsel: Rearrange perf_evsel__config_leader_sampling() Arnaldo Carvalho de Melo
2020-04-20 11:52 ` [PATCH 40/60] perf evlist: Allow multiple read formats Arnaldo Carvalho de Melo
2020-04-20 11:52 ` [PATCH 41/60] perf tools: Add support for leader-sampling with AUX area events Arnaldo Carvalho de Melo
2020-04-20 11:52 ` [PATCH 42/60] perf stat: Force error in fallback on :k events Arnaldo Carvalho de Melo
2020-04-20 11:52 ` [PATCH 43/60] tools lib traceevent: Take care of return value of asprintf Arnaldo Carvalho de Melo
2020-04-20 11:53 ` [PATCH 44/60] perf pmu: Add support for PMU capabilities Arnaldo Carvalho de Melo
2020-04-20 11:53 ` [PATCH 45/60] perf doc: allow ASCIIDOC_EXTRA to be an argument Arnaldo Carvalho de Melo
2020-04-20 11:53   ` Arnaldo Carvalho de Melo
2020-04-20 11:53 ` [PATCH 46/60] perf parser: Add support to specify rXXX event with pmu Arnaldo Carvalho de Melo
2020-04-20 11:53 ` [PATCH 47/60] perf header: Support CPU PMU capabilities Arnaldo Carvalho de Melo
2020-04-20 11:53 ` [PATCH 48/60] perf machine: Remove the indent in resolve_lbr_callchain_sample Arnaldo Carvalho de Melo
2020-04-20 11:53 ` [PATCH 49/60] perf machine: Refine the function for LBR call stack reconstruction Arnaldo Carvalho de Melo
2020-04-20 11:53 ` [PATCH 50/60] perf machine: Factor out lbr_callchain_add_kernel_ip() Arnaldo Carvalho de Melo
2020-04-20 11:53 ` [PATCH 51/60] perf machine: Factor out lbr_callchain_add_lbr_ip() Arnaldo Carvalho de Melo
2020-04-20 11:53 ` [PATCH 52/60] perf thread: Add a knob for LBR stitch approach Arnaldo Carvalho de Melo
2020-04-20 11:53 ` [PATCH 53/60] perf thread: Save previous sample for LBR stitching approach Arnaldo Carvalho de Melo
2020-04-20 11:53 ` Arnaldo Carvalho de Melo [this message]
2020-04-20 11:53 ` [PATCH 55/60] perf callchain: Stitch LBR call stack Arnaldo Carvalho de Melo
2020-04-20 11:53 ` [PATCH 56/60] perf report: Add option to enable the LBR stitching approach Arnaldo Carvalho de Melo
2020-04-20 11:53 ` [PATCH 57/60] perf script: " Arnaldo Carvalho de Melo
2020-04-20 11:53 ` [PATCH 58/60] perf top: " Arnaldo Carvalho de Melo
2020-04-20 11:53 ` [PATCH 59/60] perf c2c: " Arnaldo Carvalho de Melo
2020-04-20 11:53 ` [PATCH 60/60] perf hist: Add fast path for duplicate entries check Arnaldo Carvalho de Melo
2020-04-22 12:09 ` [GIT PULL] perf/core improvements and fixes Ingo Molnar
2020-04-23 21:28   ` Daniel Díaz
2020-04-24 13:07     ` Arnaldo Carvalho de Melo
2020-04-24 14:10       ` Andreas Gerstmayr
2020-05-04 19:07         ` Daniel Díaz
2020-05-05 16:37           ` Arnaldo Carvalho de Melo
2020-05-05 16:57             ` Daniel Díaz
2020-05-05 17:03               ` Arnaldo Carvalho de Melo
2020-05-08 13:04     ` [tip: perf/core] perf flamegraph: Use /bin/bash for report and record scripts tip-bot2 for Arnaldo Carvalho de Melo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200420115316.18781-55-acme@kernel.org \
    --to=acme@kernel.org \
    --cc=acme@redhat.com \
    --cc=adrian.hunter@intel.com \
    --cc=ak@linux.intel.com \
    --cc=alexey.budankov@linux.intel.com \
    --cc=eranian@google.com \
    --cc=jolsa@kernel.org \
    --cc=jolsa@redhat.com \
    --cc=kan.liang@linux.intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-perf-users@vger.kernel.org \
    --cc=mathieu.poirier@linaro.org \
    --cc=mingo@kernel.org \
    --cc=mpe@ellerman.id.au \
    --cc=namhyung@kernel.org \
    --cc=pavel.gerasimov@intel.com \
    --cc=peterz@infradead.org \
    --cc=ravi.bangoria@linux.ibm.com \
    --cc=tglx@linutronix.de \
    --cc=vitaly.slobodskoy@intel.com \
    --cc=williams@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.