linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Kan Liang <kan.liang@intel.com>
To: a.p.zijlstra@chello.nl, eranian@google.com
Cc: linux-kernel@vger.kernel.org, mingo@redhat.com, paulus@samba.org,
	acme@kernel.org, jolsa@redhat.com, ak@linux.intel.com,
	Kan Liang <kan.liang@intel.com>,
	"Yan, Zheng" <zheng.z.yan@intel.com>
Subject: [PATCH V6 11/17] perf, core: expose LBR call stack to user perf tool
Date: Sun, 19 Oct 2014 17:55:06 -0400	[thread overview]
Message-ID: <1413755712-8259-12-git-send-email-kan.liang@intel.com> (raw)
In-Reply-To: <1413755712-8259-1-git-send-email-kan.liang@intel.com>

With LBR call stack feature enable, there are two call chain data
sources, traditional frame pointer and LBR call stack.
This patch extends the perf_callchain_entry struct to mark the available
call chain source.
The frame pointer is still output as PERF_SAMPLE_CALLCHAIN data format.
The LBR call stack data will be output as PERF_SAMPLE_BRANCH_STACK data
format.

Note: The LBR call stack is only available for user callchain. The
kernel is always got from frame pointers.
The user space perf tool also need to be changed to handle thses two
sources.

Signed-off-by: Kan Liang <kan.liang@intel.com>
Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
---
 arch/x86/kernel/cpu/perf_event.c           |  7 +++++++
 arch/x86/kernel/cpu/perf_event_intel.c     |  2 +-
 arch/x86/kernel/cpu/perf_event_intel_ds.c  |  2 +-
 arch/x86/kernel/cpu/perf_event_intel_lbr.c |  2 ++
 include/linux/perf_event.h                 | 14 +++++++++++++-
 kernel/events/callchain.c                  |  1 +
 kernel/events/core.c                       | 22 +++++++++++++++++-----
 7 files changed, 42 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index f94a618e..8043526 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -2049,6 +2049,10 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
 		perf_callchain_store(entry, cs_base + frame.return_address);
 		fp = compat_ptr(ss_base + frame.next_frame);
 	}
+
+	if (fp != compat_ptr(regs->bp))
+		entry->source |= PERF_FP_CALLCHAIN;
+
 	return 1;
 }
 #else
@@ -2101,6 +2105,9 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
 		perf_callchain_store(entry, frame.return_address);
 		fp = frame.next_frame;
 	}
+
+	if (fp != (void __user *)regs->bp)
+		entry->source |= PERF_FP_CALLCHAIN;
 }
 
 /*
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 50bb51d..2808267 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1533,7 +1533,7 @@ again:
 
 		perf_sample_data_init(&data, 0, event->hw.last_period);
 
-		if (has_branch_stack(event))
+		if (needs_branch_stack(event))
 			data.br_stack = &cpuc->lbr_stack;
 
 		if (perf_event_overflow(event, &data, regs))
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index b1553d0..5b2d2b3 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -907,7 +907,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
 			data.txn = intel_hsw_transaction(pebs);
 	}
 
-	if (has_branch_stack(event))
+	if (needs_branch_stack(event))
 		data.br_stack = &cpuc->lbr_stack;
 
 	if (perf_event_overflow(event, &data, &regs))
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index 1908875..6e473c9 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -745,6 +745,8 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
 	int i, j, type;
 	bool compress = false;
 
+	cpuc->lbr_stack.user_callstack = branch_user_callstack(br_sel);
+
 	/* if sampling all branches, then nothing to filter */
 	if ((br_sel & X86_BR_ALL) == X86_BR_ALL)
 		return;
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 285776a..fd1936f 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -55,7 +55,16 @@ struct perf_guest_info_callbacks {
 #include <linux/workqueue.h>
 #include <asm/local.h>
 
+/*
+ * From Haswell, the existing Last Branch Record facility can
+ * also be used to record call chains.
+ * source: indicates the available call chains source.
+ */
+#define	PERF_FP_CALLCHAIN		0x01
+#define	PERF_LBR_CALLCHAIN		0x02
+
 struct perf_callchain_entry {
+	__u64				source;
 	__u64				nr;
 	__u64				ip[PERF_MAX_STACK_DEPTH];
 };
@@ -67,7 +76,9 @@ struct perf_raw_record {
 
 /*
  * branch stack layout:
- *  nr: number of taken branches stored in entries[]
+ * user_callstack: LBR is enhanced to support call stack profiling.
+ * user_callstack indicates if it's call stack info.
+ * nr: number of taken branches stored in entries[]
  *
  * Note that nr can vary from sample to sample
  * branches (to, from) are stored from most recent
@@ -75,6 +86,7 @@ struct perf_raw_record {
  * recent branch.
  */
 struct perf_branch_stack {
+	bool				user_callstack;
 	__u64				nr;
 	struct perf_branch_entry	entries[0];
 };
diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c
index f2a88de..69fab7c 100644
--- a/kernel/events/callchain.c
+++ b/kernel/events/callchain.c
@@ -175,6 +175,7 @@ perf_callchain(struct perf_event *event, struct pt_regs *regs)
 	if (!entry)
 		goto exit_put;
 
+	entry->source = 0;
 	entry->nr = 0;
 
 	if (kernel && !user_mode(regs)) {
diff --git a/kernel/events/core.c b/kernel/events/core.c
index c8e367c..84f9885 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4793,7 +4793,7 @@ void perf_output_sample(struct perf_output_handle *handle,
 
 	if (sample_type & PERF_SAMPLE_CALLCHAIN) {
 		if (data->callchain) {
-			int size = 1;
+			int size = 2;
 
 			if (data->callchain)
 				size += data->callchain->nr;
@@ -4824,7 +4824,9 @@ void perf_output_sample(struct perf_output_handle *handle,
 		}
 	}
 
-	if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
+	/* LBR can be used for call stack, so it may be enabled implicitly. */
+	if ((sample_type & PERF_SAMPLE_BRANCH_STACK) ||
+			(data->br_stack && data->br_stack->user_callstack)) {
 		if (data->br_stack) {
 			size_t size;
 
@@ -4908,13 +4910,21 @@ void perf_prepare_sample(struct perf_event_header *header,
 		data->ip = perf_instruction_pointer(regs);
 
 	if (sample_type & PERF_SAMPLE_CALLCHAIN) {
-		int size = 1;
+		int size = 2;
 
 		data->callchain = perf_callchain(event, regs);
 
-		if (data->callchain)
+		if (data->callchain) {
 			size += data->callchain->nr;
 
+			if (data->br_stack &&
+			    data->br_stack->user_callstack &&
+			    !(sample_type & PERF_SAMPLE_BRANCH_STACK) &&
+			    !(sample_type & PERF_SAMPLE_STACK_USER))
+				data->callchain->source |=
+					PERF_LBR_CALLCHAIN;
+		}
+
 		header->size += size * sizeof(u64);
 	}
 
@@ -4930,7 +4940,9 @@ void perf_prepare_sample(struct perf_event_header *header,
 		header->size += size;
 	}
 
-	if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
+	/* LBR can be used for call stack, so it may be enabled implicitly. */
+	if ((sample_type & PERF_SAMPLE_BRANCH_STACK) ||
+		(data->br_stack && data->br_stack->user_callstack)) {
 		int size = sizeof(u64); /* nr */
 		if (data->br_stack) {
 			size += data->br_stack->nr
-- 
1.8.3.2


  parent reply	other threads:[~2014-10-19 22:07 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-10-19 21:54 [PATCH V6 00/17] perf, x86: Haswell LBR call stack support Kan Liang
2014-10-19 21:54 ` [PATCH V6 01/17] perf, x86: Reduce lbr_sel_map size Kan Liang
2014-10-24  9:39   ` Peter Zijlstra
2014-11-04  1:07     ` Liang, Kan
2014-11-04  7:14       ` Peter Zijlstra
2014-11-04  7:16         ` Peter Zijlstra
2014-10-19 21:54 ` [PATCH V6 02/17] perf, core: introduce pmu context switch callback Kan Liang
2014-10-19 21:54 ` [PATCH V6 03/17] perf, x86: use context switch callback to flush LBR stack Kan Liang
2014-10-19 21:54 ` [PATCH V6 04/17] perf, x86: Basic Haswell LBR call stack support Kan Liang
2014-10-19 21:55 ` [PATCH V6 05/17] perf, core: pmu specific data for perf task context Kan Liang
2014-10-19 21:55 ` [PATCH V6 06/17] perf, core: always switch pmu specific data during context switch Kan Liang
2014-10-19 21:55 ` [PATCH V6 07/17] perf, x86: allocate space for storing LBR stack Kan Liang
2014-10-19 21:55 ` [PATCH V6 08/17] perf, x86: track number of events that use LBR callstack Kan Liang
2014-10-19 21:55 ` [PATCH V6 09/17] perf, x86: Save/resotre LBR stack during context switch Kan Liang
2014-10-19 21:55 ` [PATCH V6 10/17] perf, core: simplify need branch stack check Kan Liang
2014-10-19 21:55 ` Kan Liang [this message]
2014-10-19 21:55 ` [PATCH V6 12/17] perf, x86: re-organize code that implicitly enables LBR/PEBS Kan Liang
2014-10-19 21:55 ` [PATCH V6 13/17] perf, x86: enable LBR callstack when recording callchain Kan Liang
2014-10-24 13:39   ` Jiri Olsa
2014-10-24 14:49     ` Liang, Kan
2014-10-19 21:55 ` [PATCH V6 14/17] perf, x86: disable FREEZE_LBRS_ON_PMI when LBR operates in callstack mode Kan Liang
2014-10-19 21:55 ` [PATCH V6 15/17] perf, x86: Discard zero length call entries in LBR call stack Kan Liang
2014-10-19 21:55 ` [PATCH V6 16/17] perf tools: handle LBR call stack data Kan Liang
2014-10-19 21:55 ` [PATCH V6 17/17] perf tools: choose to dump callchain from LBR and FP Kan Liang
2014-10-24 13:36   ` Jiri Olsa
2014-10-24 13:55     ` Jiri Olsa
2014-10-24 15:20       ` Liang, Kan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1413755712-8259-12-git-send-email-kan.liang@intel.com \
    --to=kan.liang@intel.com \
    --cc=a.p.zijlstra@chello.nl \
    --cc=acme@kernel.org \
    --cc=ak@linux.intel.com \
    --cc=eranian@google.com \
    --cc=jolsa@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=paulus@samba.org \
    --cc=zheng.z.yan@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).