* [PATCH] perf, x86: Add Silvermont (22nm Atom) support
@ 2013-07-18 5:36 Yan, Zheng
2013-07-18 5:39 ` Yan, Zheng
` (3 more replies)
0 siblings, 4 replies; 12+ messages in thread
From: Yan, Zheng @ 2013-07-18 5:36 UTC (permalink / raw)
To: linux-kernel; +Cc: a.p.zijlstra, mingo, eranian, ak, Yan, Zheng
From: "Yan, Zheng" <zheng.z.yan@intel.com>
Compare to old atom, Silvermont has offcore and has more events
that support PEBS.
Silvermont has two offcore response configuration MSRs, but the
event code for OFFCORE_RSP_1 is 0x02b7. To avoid complicating
intel_fixup_er(), use INTEL_UEVENT_EXTRA_REG to define offcore
MSRs. So intel_fixup_er() can find the code for OFFCORE_RSP_1
by x86_pmu.extra_regs[1].event.
Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
---
arch/x86/kernel/cpu/perf_event.h | 2 +
arch/x86/kernel/cpu/perf_event_intel.c | 178 ++++++++++++++++++++++++++++--
arch/x86/kernel/cpu/perf_event_intel_ds.c | 26 +++++
3 files changed, 198 insertions(+), 8 deletions(-)
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 97e557b..cc16faa 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -641,6 +641,8 @@ extern struct event_constraint intel_core2_pebs_event_constraints[];
extern struct event_constraint intel_atom_pebs_event_constraints[];
+extern struct event_constraint intel_slm_pebs_event_constraints[];
+
extern struct event_constraint intel_nehalem_pebs_event_constraints[];
extern struct event_constraint intel_westmere_pebs_event_constraints[];
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index fbc9210..748fea5 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -81,7 +81,8 @@ static struct event_constraint intel_nehalem_event_constraints[] __read_mostly =
static struct extra_reg intel_nehalem_extra_regs[] __read_mostly =
{
- INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
+ /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
+ INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b),
EVENT_EXTRA_END
};
@@ -143,8 +144,9 @@ static struct event_constraint intel_ivb_event_constraints[] __read_mostly =
static struct extra_reg intel_westmere_extra_regs[] __read_mostly =
{
- INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
- INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1),
+ /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
+ INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
+ INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1),
INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b),
EVENT_EXTRA_END
};
@@ -162,16 +164,27 @@ static struct event_constraint intel_gen_event_constraints[] __read_mostly =
EVENT_CONSTRAINT_END
};
+static struct event_constraint intel_slm_event_constraints[] __read_mostly =
+{
+ FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+ FIXED_EVENT_CONSTRAINT(0x013c, 2), /* CPU_CLK_UNHALTED.REF */
+ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* pseudo CPU_CLK_UNHALTED.REF */
+ EVENT_CONSTRAINT_END
+};
+
static struct extra_reg intel_snb_extra_regs[] __read_mostly = {
- INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3f807f8fffull, RSP_0),
- INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3f807f8fffull, RSP_1),
+ /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
+ INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3f807f8fffull, RSP_0),
+ INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3f807f8fffull, RSP_1),
INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
EVENT_EXTRA_END
};
static struct extra_reg intel_snbep_extra_regs[] __read_mostly = {
- INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0),
- INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1),
+ /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
+ INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0),
+ INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1),
INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
EVENT_EXTRA_END
};
@@ -882,6 +895,140 @@ static __initconst const u64 atom_hw_cache_event_ids
},
};
+static struct extra_reg intel_slm_extra_regs[] __read_mostly =
+{
+ /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
+ INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x768005ffff, RSP_0),
+ INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x768005ffff, RSP_1),
+ EVENT_EXTRA_END
+};
+
+#define SLM_DMND_READ SNB_DMND_DATA_RD
+#define SLM_DMND_WRITE SNB_DMND_RFO
+#define SLM_DMND_PREFETCH (SNB_PF_DATA_RD|SNB_PF_RFO)
+
+#define SLM_SNP_ANY (SNB_SNP_NONE|SNB_SNP_MISS|SNB_NO_FWD|SNB_HITM)
+#define SLM_LLC_ACCESS SNB_RESP_ANY
+#define SLM_LLC_MISS (SLM_SNP_ANY|SNB_NON_DRAM)
+
+static __initconst const u64 slm_hw_cache_extra_regs
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(LL ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = SLM_DMND_READ|SLM_LLC_ACCESS,
+ [ C(RESULT_MISS) ] = SLM_DMND_READ|SLM_LLC_MISS,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = SLM_DMND_WRITE|SLM_LLC_ACCESS,
+ [ C(RESULT_MISS) ] = SLM_DMND_WRITE|SLM_LLC_MISS,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = SLM_DMND_PREFETCH|SLM_LLC_ACCESS,
+ [ C(RESULT_MISS) ] = SLM_DMND_PREFETCH|SLM_LLC_MISS,
+ },
+ },
+};
+
+static __initconst const u64 slm_hw_cache_event_ids
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0x0104, /* LD_DCU_MISS */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ },
+ [ C(L1I ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0380, /* ICACHE.ACCESSES */
+ [ C(RESULT_MISS) ] = 0x0280, /* ICACGE.MISSES */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ },
+ [ C(LL ) ] = {
+ [ C(OP_READ) ] = {
+ /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
+ [ C(RESULT_ACCESS) ] = 0x01b7,
+ /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
+ [ C(RESULT_MISS) ] = 0x01b7,
+ },
+ [ C(OP_WRITE) ] = {
+ /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
+ [ C(RESULT_ACCESS) ] = 0x01b7,
+ /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
+ [ C(RESULT_MISS) ] = 0x01b7,
+ },
+ [ C(OP_PREFETCH) ] = {
+ /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
+ [ C(RESULT_ACCESS) ] = 0x01b7,
+ /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
+ [ C(RESULT_MISS) ] = 0x01b7,
+ },
+ },
+ [ C(DTLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0x0804, /* LD_DTLB_MISS */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ },
+ [ C(ITLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
+ [ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+ [ C(BPU ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
+ [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+};
+
static inline bool intel_pmu_needs_lbr_smpl(struct perf_event *event)
{
/* user explicitly requested branch sampling */
@@ -1305,7 +1452,7 @@ static void intel_fixup_er(struct perf_event *event, int idx)
event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;
} else if (idx == EXTRA_REG_RSP_1) {
event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
- event->hw.config |= 0x01bb;
+ event->hw.config |= x86_pmu.extra_regs[1].event;
event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
}
}
@@ -2176,6 +2323,21 @@ __init int intel_pmu_init(void)
pr_cont("Atom events, ");
break;
+ case 55: /* Atom 22nm "Silvermont" */
+ memcpy(hw_cache_event_ids, slm_hw_cache_event_ids,
+ sizeof(hw_cache_event_ids));
+ memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs,
+ sizeof(hw_cache_extra_regs));
+
+ intel_pmu_lbr_init_atom();
+
+ x86_pmu.event_constraints = intel_slm_event_constraints;
+ x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints;
+ x86_pmu.extra_regs = intel_slm_extra_regs;
+ x86_pmu.er_flags |= ERF_HAS_RSP_1;
+ pr_cont("Silvermont events, ");
+ break;
+
case 37: /* 32 nm nehalem, "Clarkdale" */
case 44: /* 32 nm nehalem, "Gulftown" */
case 47: /* 32 nm Xeon E7 */
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 3065c57..442fcc2 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -517,6 +517,32 @@ struct event_constraint intel_atom_pebs_event_constraints[] = {
EVENT_CONSTRAINT_END
};
+struct event_constraint intel_slm_pebs_event_constraints[] = {
+ INTEL_UEVENT_CONSTRAINT(0x0103, 0x1), /* REHABQ.LD_BLOCK_ST_FORWARD_PS */
+ INTEL_UEVENT_CONSTRAINT(0x0803, 0x1), /* REHABQ.LD_SPLITS_PS */
+ INTEL_UEVENT_CONSTRAINT(0x0204, 0x1), /* MEM_UOPS_RETIRED.L2_HIT_LOADS_PS */
+ INTEL_UEVENT_CONSTRAINT(0x0404, 0x1), /* MEM_UOPS_RETIRED.L2_MISS_LOADS_PS */
+ INTEL_UEVENT_CONSTRAINT(0x0804, 0x1), /* MEM_UOPS_RETIRED.DTLB_MISS_LOADS_PS */
+ INTEL_UEVENT_CONSTRAINT(0x2004, 0x1), /* MEM_UOPS_RETIRED.HITM_PS */
+ INTEL_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY_PS */
+ INTEL_UEVENT_CONSTRAINT(0x00c4, 0x1), /* BR_INST_RETIRED.ALL_BRANCHES_PS */
+ INTEL_UEVENT_CONSTRAINT(0x7ec4, 0x1), /* BR_INST_RETIRED.JCC_PS */
+ INTEL_UEVENT_CONSTRAINT(0xbfc4, 0x1), /* BR_INST_RETIRED.FAR_BRANCH_PS */
+ INTEL_UEVENT_CONSTRAINT(0xebc4, 0x1), /* BR_INST_RETIRED.NON_RETURN_IND_PS */
+ INTEL_UEVENT_CONSTRAINT(0xf7c4, 0x1), /* BR_INST_RETIRED.RETURN_PS */
+ INTEL_UEVENT_CONSTRAINT(0xf9c4, 0x1), /* BR_INST_RETIRED.CALL_PS */
+ INTEL_UEVENT_CONSTRAINT(0xfbc4, 0x1), /* BR_INST_RETIRED.IND_CALL_PS */
+ INTEL_UEVENT_CONSTRAINT(0xfdc4, 0x1), /* BR_INST_RETIRED.REL_CALL_PS */
+ INTEL_UEVENT_CONSTRAINT(0xfec4, 0x1), /* BR_INST_RETIRED.TAKEN_JCC_PS */
+ INTEL_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_MISP_RETIRED.ALL_BRANCHES_PS */
+ INTEL_UEVENT_CONSTRAINT(0x7ec5, 0x1), /* BR_INST_MISP_RETIRED.JCC_PS */
+ INTEL_UEVENT_CONSTRAINT(0xebc5, 0x1), /* BR_INST_MISP_RETIRED.NON_RETURN_IND_PS */
+ INTEL_UEVENT_CONSTRAINT(0xf7c5, 0x1), /* BR_INST_MISP_RETIRED.RETURN_PS */
+ INTEL_UEVENT_CONSTRAINT(0xfbc5, 0x1), /* BR_INST_MISP_RETIRED.IND_CALL_PS */
+ INTEL_UEVENT_CONSTRAINT(0xfec5, 0x1), /* BR_INST_MISP_RETIRED.TAKEN_JCC_PS */
+ EVENT_CONSTRAINT_END
+};
+
struct event_constraint intel_nehalem_pebs_event_constraints[] = {
INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */
INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
--
1.8.1.4
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH] perf, x86: Add Silvermont (22nm Atom) support
2013-07-18 5:36 [PATCH] perf, x86: Add Silvermont (22nm Atom) support Yan, Zheng
@ 2013-07-18 5:39 ` Yan, Zheng
2013-07-18 8:42 ` Peter Zijlstra
2013-07-18 8:23 ` Peter Zijlstra
` (2 subsequent siblings)
3 siblings, 1 reply; 12+ messages in thread
From: Yan, Zheng @ 2013-07-18 5:39 UTC (permalink / raw)
To: Yan, Zheng; +Cc: linux-kernel, a.p.zijlstra, mingo, eranian, ak
On 07/18/2013 01:36 PM, Yan, Zheng wrote:
> From: "Yan, Zheng" <zheng.z.yan@intel.com>
>
> Compare to old atom, Silvermont has offcore and has more events
> that support PEBS.
>
> Silvermont has two offcore response configuration MSRs, but the
> event code for OFFCORE_RSP_1 is 0x02b7. To avoid complicating
> intel_fixup_er(), use INTEL_UEVENT_EXTRA_REG to define offcore
> MSRs. So intel_fixup_er() can find the code for OFFCORE_RSP_1
> by x86_pmu.extra_regs[1].event.
>
Document is at http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf, but it has no PEBS event list.
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] perf, x86: Add Silvermont (22nm Atom) support
2013-07-18 5:36 [PATCH] perf, x86: Add Silvermont (22nm Atom) support Yan, Zheng
2013-07-18 5:39 ` Yan, Zheng
@ 2013-07-18 8:23 ` Peter Zijlstra
2013-07-18 8:27 ` Yan, Zheng
2013-07-18 8:29 ` Peter Zijlstra
2013-07-18 8:41 ` Peter Zijlstra
3 siblings, 1 reply; 12+ messages in thread
From: Peter Zijlstra @ 2013-07-18 8:23 UTC (permalink / raw)
To: Yan, Zheng; +Cc: linux-kernel, mingo, eranian, ak
On Thu, Jul 18, 2013 at 01:36:07PM +0800, Yan, Zheng wrote:
> +static struct event_constraint intel_slm_event_constraints[] __read_mostly =
> +{
> + FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
> + FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
> + FIXED_EVENT_CONSTRAINT(0x013c, 2), /* CPU_CLK_UNHALTED.REF */
> + FIXED_EVENT_CONSTRAINT(0x0300, 2), /* pseudo CPU_CLK_UNHALTED.REF */
So the normal event 0x13c and the fixed counter 2 are normally _not_ the
same. Are they for slm? Are you sure?
> + EVENT_CONSTRAINT_END
> +};
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] perf, x86: Add Silvermont (22nm Atom) support
2013-07-18 8:23 ` Peter Zijlstra
@ 2013-07-18 8:27 ` Yan, Zheng
2013-07-18 9:02 ` Peter Zijlstra
2013-07-19 2:26 ` Yan, Zheng
0 siblings, 2 replies; 12+ messages in thread
From: Yan, Zheng @ 2013-07-18 8:27 UTC (permalink / raw)
To: Peter Zijlstra; +Cc: linux-kernel, mingo, eranian, ak
On 07/18/2013 04:23 PM, Peter Zijlstra wrote:
> On Thu, Jul 18, 2013 at 01:36:07PM +0800, Yan, Zheng wrote:
>> +static struct event_constraint intel_slm_event_constraints[] __read_mostly =
>> +{
>> + FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
>> + FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
>> + FIXED_EVENT_CONSTRAINT(0x013c, 2), /* CPU_CLK_UNHALTED.REF */
>> + FIXED_EVENT_CONSTRAINT(0x0300, 2), /* pseudo CPU_CLK_UNHALTED.REF */
>
> So the normal event 0x13c and the fixed counter 2 are normally _not_ the
> same. Are they for slm? Are you sure?
>
yes, I'm sure. see page 15-15 of http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf
Regards
Yan, Zheng
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] perf, x86: Add Silvermont (22nm Atom) support
2013-07-18 5:36 [PATCH] perf, x86: Add Silvermont (22nm Atom) support Yan, Zheng
2013-07-18 5:39 ` Yan, Zheng
2013-07-18 8:23 ` Peter Zijlstra
@ 2013-07-18 8:29 ` Peter Zijlstra
2013-07-18 8:41 ` Peter Zijlstra
3 siblings, 0 replies; 12+ messages in thread
From: Peter Zijlstra @ 2013-07-18 8:29 UTC (permalink / raw)
To: Yan, Zheng; +Cc: linux-kernel, mingo, eranian, ak
On Thu, Jul 18, 2013 at 01:36:07PM +0800, Yan, Zheng wrote:
> From: "Yan, Zheng" <zheng.z.yan@intel.com>
>
> Compare to old atom, Silvermont has offcore and has more events
> that support PEBS.
>
> Silvermont has two offcore response configuration MSRs, but the
> event code for OFFCORE_RSP_1 is 0x02b7. To avoid complicating
> intel_fixup_er(), use INTEL_UEVENT_EXTRA_REG to define offcore
> MSRs. So intel_fixup_er() can find the code for OFFCORE_RSP_1
> by x86_pmu.extra_regs[1].event.
Please split this in two patches; one reworking the OFFCORE_RSP stuff;
one adding slm support.
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] perf, x86: Add Silvermont (22nm Atom) support
2013-07-18 5:36 [PATCH] perf, x86: Add Silvermont (22nm Atom) support Yan, Zheng
` (2 preceding siblings ...)
2013-07-18 8:29 ` Peter Zijlstra
@ 2013-07-18 8:41 ` Peter Zijlstra
3 siblings, 0 replies; 12+ messages in thread
From: Peter Zijlstra @ 2013-07-18 8:41 UTC (permalink / raw)
To: Yan, Zheng; +Cc: linux-kernel, mingo, eranian, ak
On Thu, Jul 18, 2013 at 01:36:07PM +0800, Yan, Zheng wrote:
> @@ -1305,7 +1452,7 @@ static void intel_fixup_er(struct perf_event *event, int idx)
> event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;
> } else if (idx == EXTRA_REG_RSP_1) {
> event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
> - event->hw.config |= 0x01bb;
> + event->hw.config |= x86_pmu.extra_regs[1].event;
> event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
> }
> }
When you do the split up, please make this consistent and use
x86_pmu.extra_regs[0].event as well.
Also, ideally we'd use EXTRA_REG_RSP_[01] instead of the 0 and 1.
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] perf, x86: Add Silvermont (22nm Atom) support
2013-07-18 5:39 ` Yan, Zheng
@ 2013-07-18 8:42 ` Peter Zijlstra
2013-07-18 11:00 ` Yan, Zheng
0 siblings, 1 reply; 12+ messages in thread
From: Peter Zijlstra @ 2013-07-18 8:42 UTC (permalink / raw)
To: Yan, Zheng; +Cc: linux-kernel, mingo, eranian, ak
On Thu, Jul 18, 2013 at 01:39:28PM +0800, Yan, Zheng wrote:
> On 07/18/2013 01:36 PM, Yan, Zheng wrote:
> > From: "Yan, Zheng" <zheng.z.yan@intel.com>
> >
> > Compare to old atom, Silvermont has offcore and has more events
> > that support PEBS.
> >
> > Silvermont has two offcore response configuration MSRs, but the
> > event code for OFFCORE_RSP_1 is 0x02b7. To avoid complicating
> > intel_fixup_er(), use INTEL_UEVENT_EXTRA_REG to define offcore
> > MSRs. So intel_fixup_er() can find the code for OFFCORE_RSP_1
> > by x86_pmu.extra_regs[1].event.
> >
>
> Document is at http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf, but it has no PEBS event list.
Why isn't this in the regular SDM like all the other stuff?
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] perf, x86: Add Silvermont (22nm Atom) support
2013-07-18 8:27 ` Yan, Zheng
@ 2013-07-18 9:02 ` Peter Zijlstra
2013-07-18 10:48 ` Yan, Zheng
2013-07-19 2:26 ` Yan, Zheng
1 sibling, 1 reply; 12+ messages in thread
From: Peter Zijlstra @ 2013-07-18 9:02 UTC (permalink / raw)
To: Yan, Zheng; +Cc: linux-kernel, mingo, eranian, ak
On Thu, Jul 18, 2013 at 04:27:31PM +0800, Yan, Zheng wrote:
> On 07/18/2013 04:23 PM, Peter Zijlstra wrote:
> > On Thu, Jul 18, 2013 at 01:36:07PM +0800, Yan, Zheng wrote:
> >> +static struct event_constraint intel_slm_event_constraints[] __read_mostly =
> >> +{
> >> + FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
> >> + FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
> >> + FIXED_EVENT_CONSTRAINT(0x013c, 2), /* CPU_CLK_UNHALTED.REF */
> >> + FIXED_EVENT_CONSTRAINT(0x0300, 2), /* pseudo CPU_CLK_UNHALTED.REF */
> >
> > So the normal event 0x13c and the fixed counter 2 are normally _not_ the
> > same. Are they for slm? Are you sure?
> >
>
> yes, I'm sure. see page 15-15 of http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf
OK, then put in a comment how slm is 'special' and you might want to fix
intel_pmu_init():
if (x86_pmu.event_constraints) {
/*
* event on fixed counter2 (REF_CYCLES) only works on this
* counter, so do not extend mask to generic counters
*/
for_each_event_constraint(c, x86_pmu.event_constraints) {
if (c->cmask != FIXED_EVENT_FLAGS
|| c->idxmsk64 == INTEL_PMC_MSK_FIXED_REF_CYCLES) {
continue;
}
c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
c->weight += x86_pmu.num_counters;
}
}
Since that explicitly skips the fixed counter 2 and doesn't extend its
constraint to include all other counters.
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] perf, x86: Add Silvermont (22nm Atom) support
2013-07-18 9:02 ` Peter Zijlstra
@ 2013-07-18 10:48 ` Yan, Zheng
2013-07-18 11:46 ` Peter Zijlstra
0 siblings, 1 reply; 12+ messages in thread
From: Yan, Zheng @ 2013-07-18 10:48 UTC (permalink / raw)
To: Peter Zijlstra; +Cc: linux-kernel, mingo, eranian, ak
On 07/18/2013 05:02 PM, Peter Zijlstra wrote:
> On Thu, Jul 18, 2013 at 04:27:31PM +0800, Yan, Zheng wrote:
>> On 07/18/2013 04:23 PM, Peter Zijlstra wrote:
>>> On Thu, Jul 18, 2013 at 01:36:07PM +0800, Yan, Zheng wrote:
>>>> +static struct event_constraint intel_slm_event_constraints[] __read_mostly =
>>>> +{
>>>> + FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
>>>> + FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
>>>> + FIXED_EVENT_CONSTRAINT(0x013c, 2), /* CPU_CLK_UNHALTED.REF */
>>>> + FIXED_EVENT_CONSTRAINT(0x0300, 2), /* pseudo CPU_CLK_UNHALTED.REF */
>>>
>>> So the normal event 0x13c and the fixed counter 2 are normally _not_ the
>>> same. Are they for slm? Are you sure?
>>>
>>
>> yes, I'm sure. see page 15-15 of http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf
>
> OK, then put in a comment how slm is 'special' and you might want to fix
> intel_pmu_init():
>
> if (x86_pmu.event_constraints) {
> /*
> * event on fixed counter2 (REF_CYCLES) only works on this
> * counter, so do not extend mask to generic counters
> */
> for_each_event_constraint(c, x86_pmu.event_constraints) {
> if (c->cmask != FIXED_EVENT_FLAGS
> || c->idxmsk64 == INTEL_PMC_MSK_FIXED_REF_CYCLES) {
> continue;
> }
>
> c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
> c->weight += x86_pmu.num_counters;
> }
> }
>
> Since that explicitly skips the fixed counter 2 and doesn't extend its
> constraint to include all other counters.
>
how about below patch
Regards
Yan, Zheng
---
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 8249df4..aa0d876 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -140,7 +140,6 @@ struct x86_pmu_capability {
/* CPU_CLK_Unhalted.Ref: */
#define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b
#define INTEL_PMC_IDX_FIXED_REF_CYCLES (INTEL_PMC_IDX_FIXED + 2)
-#define INTEL_PMC_MSK_FIXED_REF_CYCLES (1ULL << INTEL_PMC_IDX_FIXED_REF_CYCLES)
/*
* We model BTS tracing as another fixed-mode PMC.
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index e4bb30a..47ffb48 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -169,7 +169,6 @@ static struct event_constraint intel_slm_event_constraints[] __read_mostly =
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
FIXED_EVENT_CONSTRAINT(0x013c, 2), /* CPU_CLK_UNHALTED.REF */
- FIXED_EVENT_CONSTRAINT(0x0300, 2), /* pseudo CPU_CLK_UNHALTED.REF */
EVENT_CONSTRAINT_END
};
@@ -2331,6 +2330,9 @@ __init int intel_pmu_init(void)
intel_pmu_lbr_init_atom();
+ /* both event 0x013c and fixed counter2 count REF_CYCLES */
+ intel_perfmon_event_map[PERF_COUNT_HW_REF_CPU_CYCLES] = 0x013c;
+
x86_pmu.event_constraints = intel_slm_event_constraints;
x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints;
x86_pmu.extra_regs = intel_slm_extra_regs;
@@ -2486,12 +2488,12 @@ __init int intel_pmu_init(void)
if (x86_pmu.event_constraints) {
/*
- * event on fixed counter2 (REF_CYCLES) only works on this
- * counter, so do not extend mask to generic counters
+ * If only fixed counter2 can count event REF_CYCLES, we use
+ * pseudo-code 0x0300 for REF_CYCLES.
*/
for_each_event_constraint(c, x86_pmu.event_constraints) {
- if (c->cmask != FIXED_EVENT_FLAGS
- || c->idxmsk64 == INTEL_PMC_MSK_FIXED_REF_CYCLES) {
+ if (c->cmask != FIXED_EVENT_FLAGS ||
+ c->code == 0x0300) {
continue;
}
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH] perf, x86: Add Silvermont (22nm Atom) support
2013-07-18 8:42 ` Peter Zijlstra
@ 2013-07-18 11:00 ` Yan, Zheng
0 siblings, 0 replies; 12+ messages in thread
From: Yan, Zheng @ 2013-07-18 11:00 UTC (permalink / raw)
To: Peter Zijlstra; +Cc: linux-kernel, mingo, eranian, ak
On 07/18/2013 04:42 PM, Peter Zijlstra wrote:
> On Thu, Jul 18, 2013 at 01:39:28PM +0800, Yan, Zheng wrote:
>> On 07/18/2013 01:36 PM, Yan, Zheng wrote:
>>> From: "Yan, Zheng" <zheng.z.yan@intel.com>
>>>
>>> Compare to old atom, Silvermont has offcore and has more events
>>> that support PEBS.
>>>
>>> Silvermont has two offcore response configuration MSRs, but the
>>> event code for OFFCORE_RSP_1 is 0x02b7. To avoid complicating
>>> intel_fixup_er(), use INTEL_UEVENT_EXTRA_REG to define offcore
>>> MSRs. So intel_fixup_er() can find the code for OFFCORE_RSP_1
>>> by x86_pmu.extra_regs[1].event.
>>>
>>
>> Document is at http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf, but it has no PEBS event list.
>
> Why isn't this in the regular SDM like all the other stuff?
>
I don't know, probably they still haven't finished the document yet. (above PDF has no PEBS event list).
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] perf, x86: Add Silvermont (22nm Atom) support
2013-07-18 10:48 ` Yan, Zheng
@ 2013-07-18 11:46 ` Peter Zijlstra
0 siblings, 0 replies; 12+ messages in thread
From: Peter Zijlstra @ 2013-07-18 11:46 UTC (permalink / raw)
To: Yan, Zheng; +Cc: linux-kernel, mingo, eranian, ak
On Thu, Jul 18, 2013 at 06:48:06PM +0800, Yan, Zheng wrote:
> ---
> diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
> index 8249df4..aa0d876 100644
> --- a/arch/x86/include/asm/perf_event.h
> +++ b/arch/x86/include/asm/perf_event.h
> @@ -140,7 +140,6 @@ struct x86_pmu_capability {
> /* CPU_CLK_Unhalted.Ref: */
> #define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b
> #define INTEL_PMC_IDX_FIXED_REF_CYCLES (INTEL_PMC_IDX_FIXED + 2)
> -#define INTEL_PMC_MSK_FIXED_REF_CYCLES (1ULL << INTEL_PMC_IDX_FIXED_REF_CYCLES)
>
> /*
> * We model BTS tracing as another fixed-mode PMC.
> diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
> index e4bb30a..47ffb48 100644
> --- a/arch/x86/kernel/cpu/perf_event_intel.c
> +++ b/arch/x86/kernel/cpu/perf_event_intel.c
> @@ -169,7 +169,6 @@ static struct event_constraint intel_slm_event_constraints[] __read_mostly =
> FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
> FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
> FIXED_EVENT_CONSTRAINT(0x013c, 2), /* CPU_CLK_UNHALTED.REF */
> - FIXED_EVENT_CONSTRAINT(0x0300, 2), /* pseudo CPU_CLK_UNHALTED.REF */
> EVENT_CONSTRAINT_END
> };
Stephane, should we preserve the 0x0300 event as alias in this case? If
so the below gets a little more complicated.
>
> @@ -2331,6 +2330,9 @@ __init int intel_pmu_init(void)
>
> intel_pmu_lbr_init_atom();
>
> + /* both event 0x013c and fixed counter2 count REF_CYCLES */
> + intel_perfmon_event_map[PERF_COUNT_HW_REF_CPU_CYCLES] = 0x013c;
> +
> x86_pmu.event_constraints = intel_slm_event_constraints;
> x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints;
> x86_pmu.extra_regs = intel_slm_extra_regs;
> @@ -2486,12 +2488,12 @@ __init int intel_pmu_init(void)
>
> if (x86_pmu.event_constraints) {
> /*
> - * event on fixed counter2 (REF_CYCLES) only works on this
> - * counter, so do not extend mask to generic counters
> + * If only fixed counter2 can count event REF_CYCLES, we use
> + * pseudo-code 0x0300 for REF_CYCLES.
> */
> for_each_event_constraint(c, x86_pmu.event_constraints) {
> - if (c->cmask != FIXED_EVENT_FLAGS
> - || c->idxmsk64 == INTEL_PMC_MSK_FIXED_REF_CYCLES) {
> + if (c->cmask != FIXED_EVENT_FLAGS ||
> + c->code == 0x0300) {
> continue;
> }
Yeah, this looks basically OK, but now that I look at it we should
probably have a symbolic name for the fake events. ISTR us having more,
by virtue of this being umask=3 not 1. But I can't find them.
Stephane?
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] perf, x86: Add Silvermont (22nm Atom) support
2013-07-18 8:27 ` Yan, Zheng
2013-07-18 9:02 ` Peter Zijlstra
@ 2013-07-19 2:26 ` Yan, Zheng
1 sibling, 0 replies; 12+ messages in thread
From: Yan, Zheng @ 2013-07-19 2:26 UTC (permalink / raw)
To: Peter Zijlstra; +Cc: linux-kernel, mingo, eranian, ak
On 07/18/2013 04:27 PM, Yan, Zheng wrote:
> On 07/18/2013 04:23 PM, Peter Zijlstra wrote:
>> On Thu, Jul 18, 2013 at 01:36:07PM +0800, Yan, Zheng wrote:
>>> +static struct event_constraint intel_slm_event_constraints[] __read_mostly =
>>> +{
>>> + FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
>>> + FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
>>> + FIXED_EVENT_CONSTRAINT(0x013c, 2), /* CPU_CLK_UNHALTED.REF */
>>> + FIXED_EVENT_CONSTRAINT(0x0300, 2), /* pseudo CPU_CLK_UNHALTED.REF */
>>
>> So the normal event 0x13c and the fixed counter 2 are normally _not_ the
>> same. Are they for slm? Are you sure?
>>
>
> yes, I'm sure. see page 15-15 of http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf
>
I'm sorry, I was wrong.
---
perf stat -e ref-cycles -e cpu/config=0x013c/ sleep 1
Performance counter stats for 'sleep 1':
1,181,220 ref-cycles
98,434 cpu/config=0x013c/
0.196653863 seconds time elapsed
^ permalink raw reply [flat|nested] 12+ messages in thread
end of thread, other threads:[~2013-07-19 2:26 UTC | newest]
Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-07-18 5:36 [PATCH] perf, x86: Add Silvermont (22nm Atom) support Yan, Zheng
2013-07-18 5:39 ` Yan, Zheng
2013-07-18 8:42 ` Peter Zijlstra
2013-07-18 11:00 ` Yan, Zheng
2013-07-18 8:23 ` Peter Zijlstra
2013-07-18 8:27 ` Yan, Zheng
2013-07-18 9:02 ` Peter Zijlstra
2013-07-18 10:48 ` Yan, Zheng
2013-07-18 11:46 ` Peter Zijlstra
2013-07-19 2:26 ` Yan, Zheng
2013-07-18 8:29 ` Peter Zijlstra
2013-07-18 8:41 ` Peter Zijlstra
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.