linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Yan, Zheng" <zheng.z.yan@intel.com>
To: linux-kernel@vger.kernel.org
Cc: a.p.zijlstra@chello.nl, mingo@kernel.org, acme@infradead.org,
	eranian@google.com, andi@firstfloor.org, "Yan,
	Zheng" <zheng.z.yan@intel.com>
Subject: [PATCH v3 12/14] perf, x86: use LBR call stack to get user callchain
Date: Tue, 18 Feb 2014 14:07:39 +0800	[thread overview]
Message-ID: <1392703661-15104-13-git-send-email-zheng.z.yan@intel.com> (raw)
In-Reply-To: <1392703661-15104-1-git-send-email-zheng.z.yan@intel.com>

Haswell has a new feature that utilizes the existing Last Branch Record
facility to record call chains. When the feature is enabled, function
call will be collected as normal, but as return instructions are executed
the last captured branch record is popped from the on-chip LBR registers.
The LBR call stack facility can help perf to get call chains of progam
without frame pointer.

This patch makes x86's perf_callchain_user() failback to LBR callstack
when there is no frame pointer in the user program.

Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
---
 arch/x86/kernel/cpu/perf_event.c           | 33 ++++++++++++++++++++++++++----
 arch/x86/kernel/cpu/perf_event_intel.c     | 10 ++++++++-
 arch/x86/kernel/cpu/perf_event_intel_lbr.c |  2 ++
 include/linux/perf_event.h                 |  1 +
 4 files changed, 41 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 6094560..0d0fe2f3 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1975,12 +1975,28 @@ static unsigned long get_segment_base(unsigned int segment)
 	return get_desc_base(desc + idx);
 }
 
+static inline void
+perf_callchain_lbr_callstack(struct perf_callchain_entry *entry,
+			     struct perf_sample_data *data)
+{
+	struct perf_branch_stack *br_stack = data->br_stack;
+
+	if (br_stack && br_stack->user_callstack) {
+		int i = 0;
+		while (i < br_stack->nr && entry->nr < PERF_MAX_STACK_DEPTH) {
+			perf_callchain_store(entry, br_stack->entries[i].from);
+			i++;
+		}
+	}
+}
+
 #ifdef CONFIG_COMPAT
 
 #include <asm/compat.h>
 
 static inline int
-perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
+perf_callchain_user32(struct perf_callchain_entry *entry,
+		      struct pt_regs *regs, struct perf_sample_data *data)
 {
 	/* 32-bit process in 64-bit kernel. */
 	unsigned long ss_base, cs_base;
@@ -2009,11 +2025,16 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
 		perf_callchain_store(entry, cs_base + frame.return_address);
 		fp = compat_ptr(ss_base + frame.next_frame);
 	}
+
+	if (fp == compat_ptr(regs->bp))
+		perf_callchain_lbr_callstack(entry, data);
+
 	return 1;
 }
 #else
 static inline int
-perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
+perf_callchain_user32(struct perf_callchain_entry *entry,
+		      struct pt_regs *regs, struct perf_sample_data *data)
 {
     return 0;
 }
@@ -2043,12 +2064,12 @@ void perf_callchain_user(struct perf_callchain_entry *entry,
 	if (!current->mm)
 		return;
 
-	if (perf_callchain_user32(regs, entry))
+	if (perf_callchain_user32(entry, regs, data))
 		return;
 
 	while (entry->nr < PERF_MAX_STACK_DEPTH) {
 		unsigned long bytes;
-		frame.next_frame	     = NULL;
+		frame.next_frame = NULL;
 		frame.return_address = 0;
 
 		bytes = copy_from_user_nmi(&frame, fp, sizeof(frame));
@@ -2061,6 +2082,10 @@ void perf_callchain_user(struct perf_callchain_entry *entry,
 		perf_callchain_store(entry, frame.return_address);
 		fp = frame.next_frame;
 	}
+
+	/* try LBR callstack if there is no frame pointer */
+	if (fp == (void __user *)regs->bp)
+		perf_callchain_lbr_callstack(entry, data);
 }
 
 /*
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 722171c..9057a20 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1030,6 +1030,14 @@ static __initconst const u64 slm_hw_cache_event_ids
  },
 };
 
+static inline bool intel_pmu_needs_lbr_callstack(struct perf_event *event)
+{
+	if ((event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) &&
+	    (event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK))
+		return true;
+	return false;
+}
+
 static void intel_pmu_disable_all(void)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
@@ -1398,7 +1406,7 @@ again:
 
 		perf_sample_data_init(&data, 0, event->hw.last_period);
 
-		if (has_branch_stack(event))
+		if (needs_branch_stack(event))
 			data.br_stack = &cpuc->lbr_stack;
 
 		if (perf_event_overflow(event, &data, regs))
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index aa726af..7e26367 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -717,6 +717,8 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
 	int i, j, type;
 	bool compress = false;
 
+	cpuc->lbr_stack.user_callstack = branch_user_callstack(br_sel);
+
 	/* if sampling all branches, then nothing to filter */
 	if ((br_sel & X86_BR_ALL) == X86_BR_ALL)
 		return;
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index b87974a..517c34a 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -74,6 +74,7 @@ struct perf_raw_record {
  * recent branch.
  */
 struct perf_branch_stack {
+	bool				user_callstack;
 	__u64				nr;
 	struct perf_branch_entry	entries[0];
 };
-- 
1.8.5.3


  parent reply	other threads:[~2014-02-18  6:08 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-02-18  6:07 [PATCH v3 00/14] perf, x86: Haswell LBR call stack support Yan, Zheng
2014-02-18  6:07 ` [PATCH v3 01/14] perf, x86: Reduce lbr_sel_map size Yan, Zheng
2014-02-18  6:07 ` [PATCH v3 02/14] perf, core: introduce pmu context switch callback Yan, Zheng
2014-02-18  6:07 ` [PATCH v3 03/14] perf, x86: use context switch callback to flush LBR stack Yan, Zheng
2014-02-18  6:07 ` [PATCH v3 04/14] perf, x86: Basic Haswell LBR call stack support Yan, Zheng
2014-02-18  6:07 ` [PATCH v3 05/14] perf, core: pmu specific data for perf task context Yan, Zheng
2014-02-18  6:07 ` [PATCH v3 06/14] perf, core: always switch pmu specific data during context switch Yan, Zheng
2014-02-18  6:07 ` [PATCH v3 07/14] perf, x86: track number of events that use LBR callstack Yan, Zheng
2014-02-18  6:07 ` [PATCH v3 08/14] perf, x86: allocate space for storing LBR stack Yan, Zheng
2014-02-18  6:07 ` [PATCH v3 09/14] perf, x86: Save/resotre LBR stack during context switch Yan, Zheng
2014-02-18  6:07 ` [PATCH v3 10/14] perf, core: simplify need branch stack check Yan, Zheng
2014-02-18  6:07 ` [PATCH v3 11/14] perf, core: Pass perf_sample_data to perf_callchain() Yan, Zheng
2014-02-18  6:07 ` Yan, Zheng [this message]
2014-02-18  6:07 ` [PATCH v3 13/14] perf, x86: enable LBR callstack when recording callchain Yan, Zheng
2014-02-18  6:07 ` [PATCH v3 14/14] perf, x86: Discard zero length call entries in LBR call stack Yan, Zheng
2014-02-23 19:47 ` [PATCH v3 00/14] perf, x86: Haswell LBR call stack support Stephane Eranian
2014-02-24  1:07   ` Yan, Zheng
2014-02-24  7:14 ` Peter Zijlstra
2014-02-26  2:39 ` Andy Lutomirski
2014-02-26  7:04   ` Stephane Eranian
2014-02-26  8:57     ` Yan, Zheng
2014-02-26 16:03     ` Andy Lutomirski
2014-02-26 18:55       ` Andi Kleen
2014-02-26 18:59         ` Andy Lutomirski
2014-02-26 19:19           ` David Ahern
2014-02-26 19:25             ` Andy Lutomirski
2014-02-26 20:14               ` David Ahern
2014-02-26 20:26                 ` Andy Lutomirski
2014-04-09 11:48                   ` Peter Zijlstra
2014-04-09 16:48                     ` Andi Kleen
2014-04-09 17:40                       ` Andi Kleen
2014-02-26 20:32                 ` Peter Zijlstra
2014-02-26 20:53                 ` Andi Kleen
2014-02-26 21:15                   ` Peter Zijlstra
2014-02-26 21:33                     ` Andi Kleen
2014-02-26 21:34                   ` David Ahern
2014-02-26 21:42                     ` Andi Kleen
2014-02-27  9:09                       ` Stephane Eranian
2014-02-27 12:35           ` Ingo Molnar
2014-02-27 16:08             ` Andi Kleen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1392703661-15104-13-git-send-email-zheng.z.yan@intel.com \
    --to=zheng.z.yan@intel.com \
    --cc=a.p.zijlstra@chello.nl \
    --cc=acme@infradead.org \
    --cc=andi@firstfloor.org \
    --cc=eranian@google.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).