From: Stephane Eranian <eranian@google.com>
To: linux-kernel@vger.kernel.org
Cc: peterz@infradead.org, mingo@elte.hu, acme@redhat.com,
robert.richter@amd.com, ming.m.lin@intel.com,
andi@firstfloor.org, asharma@fb.com, ravitillo@lbl.gov,
vweaver1@eecs.utk.edu, khandual@linux.vnet.ibm.com,
dsahern@gmail.com
Subject: [PATCH v5 03/18] perf: add Intel X86 LBR sharing logic
Date: Thu, 2 Feb 2012 13:54:33 +0100 [thread overview]
Message-ID: <1328187288-24395-4-git-send-email-eranian@google.com> (raw)
In-Reply-To: <1328187288-24395-1-git-send-email-eranian@google.com>
The Intel LBR on some recent processor is capable
of filtering branches by type. The filter is configurable
via the LBR_SELECT MSR register.
There are limitation on how this register can be used.
On Nehalem/Westmere, the LBR_SELECT is shared by the two HT threads
when HT is on. It is private to each core when HT is off.
On SandyBridge, the LBR_SELECT register is private to each thread
when HT is on. It is private to each core when HT is off.
The kernel must manage the sharing of LBR_SELECT. It allows
multiple users on the same logical CPU to use LBR_SELECT as
long as they program it with the same value. Across sibling
CPUs (HT threads), the same restriction applies on NHM/WSM.
This patch implements this sharing logic by leveraging the
mechanism put in place for managing the offcore_response
shared MSR.
We modify __intel_shared_reg_get_constraints() to cause
x86_get_event_constraint() to be called because LBR may
be associated with events that may be counter constrained.
Signed-off-by: Stephane Eranian <eranian@google.com>
---
arch/x86/kernel/cpu/perf_event.c | 4 ++
arch/x86/kernel/cpu/perf_event.h | 4 ++
arch/x86/kernel/cpu/perf_event_intel.c | 70 ++++++++++++++++++++------------
3 files changed, 52 insertions(+), 26 deletions(-)
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index f8bddb5..3779313 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -426,6 +426,10 @@ static int __x86_pmu_event_init(struct perf_event *event)
/* mark unused */
event->hw.extra_reg.idx = EXTRA_REG_NONE;
+ /* mark not used */
+ event->hw.extra_reg.idx = EXTRA_REG_NONE;
+ event->hw.branch_reg.idx = EXTRA_REG_NONE;
+
return x86_pmu.hw_config(event);
}
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 513d617..4535ada 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -33,6 +33,7 @@ enum extra_reg_type {
EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */
EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */
+ EXTRA_REG_LBR = 2, /* lbr_select */
EXTRA_REG_MAX /* number of entries needed */
};
@@ -130,6 +131,7 @@ struct cpu_hw_events {
void *lbr_context;
struct perf_branch_stack lbr_stack;
struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];
+ struct er_account *lbr_sel;
/*
* Intel host/guest exclude bits
@@ -340,6 +342,8 @@ struct x86_pmu {
*/
unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */
int lbr_nr; /* hardware stack size */
+ u64 lbr_sel_mask; /* LBR_SELECT valid bits */
+ const int *lbr_sel_map; /* lbr_select mappings */
/*
* Extra registers for events
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 3bd37bd..97f7bb5 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1123,17 +1123,17 @@ static bool intel_try_alt_er(struct perf_event *event, int orig_idx)
*/
static struct event_constraint *
__intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
- struct perf_event *event)
+ struct perf_event *event,
+ struct hw_perf_event_extra *reg)
{
struct event_constraint *c = &emptyconstraint;
- struct hw_perf_event_extra *reg = &event->hw.extra_reg;
struct er_account *era;
unsigned long flags;
int orig_idx = reg->idx;
/* already allocated shared msr */
if (reg->alloc)
- return &unconstrained;
+ return NULL; /* call x86_get_event_constraint() */
again:
era = &cpuc->shared_regs->regs[reg->idx];
@@ -1156,14 +1156,10 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
reg->alloc = 1;
/*
- * All events using extra_reg are unconstrained.
- * Avoids calling x86_get_event_constraints()
- *
- * Must revisit if extra_reg controlling events
- * ever have constraints. Worst case we go through
- * the regular event constraint table.
+ * need to call x86_get_event_constraint()
+ * to check if associated event has constraints
*/
- c = &unconstrained;
+ c = NULL;
} else if (intel_try_alt_er(event, orig_idx)) {
raw_spin_unlock_irqrestore(&era->lock, flags);
goto again;
@@ -1200,11 +1196,23 @@ static struct event_constraint *
intel_shared_regs_constraints(struct cpu_hw_events *cpuc,
struct perf_event *event)
{
- struct event_constraint *c = NULL;
-
- if (event->hw.extra_reg.idx != EXTRA_REG_NONE)
- c = __intel_shared_reg_get_constraints(cpuc, event);
-
+ struct event_constraint *c = NULL, *d;
+ struct hw_perf_event_extra *xreg, *breg;
+
+ xreg = &event->hw.extra_reg;
+ if (xreg->idx != EXTRA_REG_NONE) {
+ c = __intel_shared_reg_get_constraints(cpuc, event, xreg);
+ if (c == &emptyconstraint)
+ return c;
+ }
+ breg = &event->hw.branch_reg;
+ if (breg->idx != EXTRA_REG_NONE) {
+ d = __intel_shared_reg_get_constraints(cpuc, event, breg);
+ if (d == &emptyconstraint) {
+ __intel_shared_reg_put_constraints(cpuc, xreg);
+ c = d;
+ }
+ }
return c;
}
@@ -1252,6 +1260,10 @@ intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc,
reg = &event->hw.extra_reg;
if (reg->idx != EXTRA_REG_NONE)
__intel_shared_reg_put_constraints(cpuc, reg);
+
+ reg = &event->hw.branch_reg;
+ if (reg->idx != EXTRA_REG_NONE)
+ __intel_shared_reg_put_constraints(cpuc, reg);
}
static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
@@ -1431,7 +1443,7 @@ static int intel_pmu_cpu_prepare(int cpu)
{
struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
- if (!x86_pmu.extra_regs)
+ if (!(x86_pmu.extra_regs || x86_pmu.lbr_sel_map))
return NOTIFY_OK;
cpuc->shared_regs = allocate_shared_regs(cpu);
@@ -1453,22 +1465,28 @@ static void intel_pmu_cpu_starting(int cpu)
*/
intel_pmu_lbr_reset();
- if (!cpuc->shared_regs || (x86_pmu.er_flags & ERF_NO_HT_SHARING))
+ cpuc->lbr_sel = NULL;
+
+ if (!cpuc->shared_regs)
return;
- for_each_cpu(i, topology_thread_cpumask(cpu)) {
- struct intel_shared_regs *pc;
+ if (!(x86_pmu.er_flags & ERF_NO_HT_SHARING)) {
+ for_each_cpu(i, topology_thread_cpumask(cpu)) {
+ struct intel_shared_regs *pc;
- pc = per_cpu(cpu_hw_events, i).shared_regs;
- if (pc && pc->core_id == core_id) {
- cpuc->kfree_on_online = cpuc->shared_regs;
- cpuc->shared_regs = pc;
- break;
+ pc = per_cpu(cpu_hw_events, i).shared_regs;
+ if (pc && pc->core_id == core_id) {
+ cpuc->kfree_on_online = cpuc->shared_regs;
+ cpuc->shared_regs = pc;
+ break;
+ }
}
+ cpuc->shared_regs->core_id = core_id;
+ cpuc->shared_regs->refcnt++;
}
- cpuc->shared_regs->core_id = core_id;
- cpuc->shared_regs->refcnt++;
+ if (x86_pmu.lbr_sel_map)
+ cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR];
}
static void intel_pmu_cpu_dying(int cpu)
--
1.7.4.1
next prev parent reply other threads:[~2012-02-02 12:55 UTC|newest]
Thread overview: 43+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-02-02 12:54 [PATCH v5 00/18] perf: add support for sampling taken branches Stephane Eranian
2012-02-02 12:54 ` [PATCH v5 01/18] perf: add generic taken branch sampling support Stephane Eranian
2012-02-02 12:54 ` [PATCH v5 02/18] perf: add Intel LBR MSR definitions Stephane Eranian
2012-02-02 12:54 ` Stephane Eranian [this message]
2012-02-02 12:54 ` [PATCH v5 04/18] perf: sync branch stack sampling with X86 precise_sampling Stephane Eranian
2012-02-02 12:54 ` [PATCH v5 05/18] perf: add Intel X86 LBR mappings for PERF_SAMPLE_BRANCH filters Stephane Eranian
2012-02-02 12:54 ` [PATCH v5 06/18] perf: disable LBR support for older Intel Atom processors Stephane Eranian
2012-02-02 12:54 ` [PATCH v5 07/18] perf: implement PERF_SAMPLE_BRANCH for Intel X86 Stephane Eranian
2012-02-02 12:54 ` [PATCH v5 08/18] perf: add LBR software filter support " Stephane Eranian
2012-02-02 12:54 ` [PATCH v5 09/18] perf: disable PERF_SAMPLE_BRANCH_* when not supported Stephane Eranian
2012-02-06 19:23 ` Peter Zijlstra
2012-02-06 19:59 ` Stephane Eranian
2012-02-02 12:54 ` [PATCH v5 10/18] perf: add hook to flush branch_stack on context switch Stephane Eranian
2012-02-02 12:54 ` [PATCH v5 11/18] perf: add code to support PERF_SAMPLE_BRANCH_STACK Stephane Eranian
2012-02-06 18:06 ` Arnaldo Carvalho de Melo
2012-02-07 14:11 ` Stephane Eranian
2012-02-07 15:21 ` Arnaldo Carvalho de Melo
2012-02-02 12:54 ` [PATCH v5 12/18] perf: add support for sampling taken branch to perf record Stephane Eranian
2012-02-06 18:08 ` Arnaldo Carvalho de Melo
2012-02-02 12:54 ` [PATCH v5 13/18] perf: add support for taken branch sampling to perf report Stephane Eranian
2012-02-06 18:14 ` Arnaldo Carvalho de Melo
2012-02-02 12:54 ` [PATCH v5 14/18] perf: fix endianness detection in perf.data Stephane Eranian
2012-02-06 18:17 ` Arnaldo Carvalho de Melo
2012-02-06 18:18 ` Stephane Eranian
2012-02-06 21:47 ` David Ahern
2012-02-06 22:06 ` Arnaldo Carvalho de Melo
2012-02-06 22:29 ` David Ahern
2012-02-07 14:13 ` Stephane Eranian
2012-02-07 14:38 ` Arnaldo Carvalho de Melo
2012-02-17 9:42 ` [tip:perf/core] perf tools: " tip-bot for Stephane Eranian
2012-02-02 12:54 ` [PATCH v5 15/18] perf: add ABI reference sizes Stephane Eranian
2012-02-02 12:54 ` [PATCH v5 16/18] perf: enable reading of perf.data files from different ABI rev Stephane Eranian
2012-02-06 18:19 ` Arnaldo Carvalho de Melo
2012-02-06 18:22 ` Arnaldo Carvalho de Melo
2012-02-07 7:03 ` Anshuman Khandual
2012-02-07 14:52 ` Arnaldo Carvalho de Melo
2012-02-06 22:19 ` David Ahern
2012-02-07 15:50 ` Stephane Eranian
2012-02-07 16:41 ` David Ahern
2012-02-07 17:42 ` Stephane Eranian
2012-02-07 17:57 ` David Ahern
2012-02-02 12:54 ` [PATCH v5 17/18] perf: fix bug print_event_desc() Stephane Eranian
2012-02-02 12:54 ` [PATCH v5 18/18] perf: make perf able to read file from older ABIs Stephane Eranian
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1328187288-24395-4-git-send-email-eranian@google.com \
--to=eranian@google.com \
--cc=acme@redhat.com \
--cc=andi@firstfloor.org \
--cc=asharma@fb.com \
--cc=dsahern@gmail.com \
--cc=khandual@linux.vnet.ibm.com \
--cc=linux-kernel@vger.kernel.org \
--cc=ming.m.lin@intel.com \
--cc=mingo@elte.hu \
--cc=peterz@infradead.org \
--cc=ravitillo@lbl.gov \
--cc=robert.richter@amd.com \
--cc=vweaver1@eecs.utk.edu \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).