All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] perf, x86: Add Silvermont (22nm Atom) support
@ 2013-07-18  5:36 Yan, Zheng
  2013-07-18  5:39 ` Yan, Zheng
                   ` (3 more replies)
  0 siblings, 4 replies; 12+ messages in thread
From: Yan, Zheng @ 2013-07-18  5:36 UTC (permalink / raw)
  To: linux-kernel; +Cc: a.p.zijlstra, mingo, eranian, ak, Yan, Zheng

From: "Yan, Zheng" <zheng.z.yan@intel.com>

Compare to old atom, Silvermont has offcore and has more events
that support PEBS.

Silvermont has two offcore response configuration MSRs, but the
event code for OFFCORE_RSP_1 is 0x02b7. To avoid complicating
intel_fixup_er(), use INTEL_UEVENT_EXTRA_REG to define offcore
MSRs. So intel_fixup_er() can find the code for OFFCORE_RSP_1
by x86_pmu.extra_regs[1].event.

Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
---
 arch/x86/kernel/cpu/perf_event.h          |   2 +
 arch/x86/kernel/cpu/perf_event_intel.c    | 178 ++++++++++++++++++++++++++++--
 arch/x86/kernel/cpu/perf_event_intel_ds.c |  26 +++++
 3 files changed, 198 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 97e557b..cc16faa 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -641,6 +641,8 @@ extern struct event_constraint intel_core2_pebs_event_constraints[];
 
 extern struct event_constraint intel_atom_pebs_event_constraints[];
 
+extern struct event_constraint intel_slm_pebs_event_constraints[];
+
 extern struct event_constraint intel_nehalem_pebs_event_constraints[];
 
 extern struct event_constraint intel_westmere_pebs_event_constraints[];
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index fbc9210..748fea5 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -81,7 +81,8 @@ static struct event_constraint intel_nehalem_event_constraints[] __read_mostly =
 
 static struct extra_reg intel_nehalem_extra_regs[] __read_mostly =
 {
-	INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
+	/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
+	INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
 	INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b),
 	EVENT_EXTRA_END
 };
@@ -143,8 +144,9 @@ static struct event_constraint intel_ivb_event_constraints[] __read_mostly =
 
 static struct extra_reg intel_westmere_extra_regs[] __read_mostly =
 {
-	INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
-	INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1),
+	/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
+	INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
+	INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1),
 	INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b),
 	EVENT_EXTRA_END
 };
@@ -162,16 +164,27 @@ static struct event_constraint intel_gen_event_constraints[] __read_mostly =
 	EVENT_CONSTRAINT_END
 };
 
+static struct event_constraint intel_slm_event_constraints[] __read_mostly =
+{
+	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+	FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+	FIXED_EVENT_CONSTRAINT(0x013c, 2), /* CPU_CLK_UNHALTED.REF */
+	FIXED_EVENT_CONSTRAINT(0x0300, 2), /* pseudo CPU_CLK_UNHALTED.REF */
+	EVENT_CONSTRAINT_END
+};
+
 static struct extra_reg intel_snb_extra_regs[] __read_mostly = {
-	INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3f807f8fffull, RSP_0),
-	INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3f807f8fffull, RSP_1),
+	/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
+	INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3f807f8fffull, RSP_0),
+	INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3f807f8fffull, RSP_1),
 	INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
 	EVENT_EXTRA_END
 };
 
 static struct extra_reg intel_snbep_extra_regs[] __read_mostly = {
-	INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0),
-	INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1),
+	/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
+	INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0),
+	INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1),
 	INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
 	EVENT_EXTRA_END
 };
@@ -882,6 +895,140 @@ static __initconst const u64 atom_hw_cache_event_ids
  },
 };
 
+static struct extra_reg intel_slm_extra_regs[] __read_mostly =
+{
+	/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
+	INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x768005ffff, RSP_0),
+	INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x768005ffff, RSP_1),
+	EVENT_EXTRA_END
+};
+
+#define SLM_DMND_READ		SNB_DMND_DATA_RD
+#define SLM_DMND_WRITE		SNB_DMND_RFO
+#define SLM_DMND_PREFETCH	(SNB_PF_DATA_RD|SNB_PF_RFO)
+
+#define SLM_SNP_ANY		(SNB_SNP_NONE|SNB_SNP_MISS|SNB_NO_FWD|SNB_HITM)
+#define SLM_LLC_ACCESS		SNB_RESP_ANY
+#define SLM_LLC_MISS		(SLM_SNP_ANY|SNB_NON_DRAM)
+
+static __initconst const u64 slm_hw_cache_extra_regs
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(LL  ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = SLM_DMND_READ|SLM_LLC_ACCESS,
+		[ C(RESULT_MISS)   ] = SLM_DMND_READ|SLM_LLC_MISS,
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = SLM_DMND_WRITE|SLM_LLC_ACCESS,
+		[ C(RESULT_MISS)   ] = SLM_DMND_WRITE|SLM_LLC_MISS,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = SLM_DMND_PREFETCH|SLM_LLC_ACCESS,
+		[ C(RESULT_MISS)   ] = SLM_DMND_PREFETCH|SLM_LLC_MISS,
+	},
+ },
+};
+
+static __initconst const u64 slm_hw_cache_event_ids
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0x0104, /* LD_DCU_MISS */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(L1I ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0380, /* ICACHE.ACCESSES */
+		[ C(RESULT_MISS)   ] = 0x0280, /* ICACGE.MISSES */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(LL  ) ] = {
+	[ C(OP_READ) ] = {
+		/* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
+		[ C(RESULT_ACCESS) ] = 0x01b7,
+		/* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
+		[ C(RESULT_MISS)   ] = 0x01b7,
+	},
+	[ C(OP_WRITE) ] = {
+		/* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
+		[ C(RESULT_ACCESS) ] = 0x01b7,
+		/* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
+		[ C(RESULT_MISS)   ] = 0x01b7,
+	},
+	[ C(OP_PREFETCH) ] = {
+		/* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
+		[ C(RESULT_ACCESS) ] = 0x01b7,
+		/* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
+		[ C(RESULT_MISS)   ] = 0x01b7,
+	},
+ },
+ [ C(DTLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0x0804, /* LD_DTLB_MISS */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(ITLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
+		[ C(RESULT_MISS)   ] = 0x0282, /* ITLB.MISSES */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+ [ C(BPU ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
+		[ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+};
+
 static inline bool intel_pmu_needs_lbr_smpl(struct perf_event *event)
 {
 	/* user explicitly requested branch sampling */
@@ -1305,7 +1452,7 @@ static void intel_fixup_er(struct perf_event *event, int idx)
 		event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;
 	} else if (idx == EXTRA_REG_RSP_1) {
 		event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
-		event->hw.config |= 0x01bb;
+		event->hw.config |= x86_pmu.extra_regs[1].event;
 		event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
 	}
 }
@@ -2176,6 +2323,21 @@ __init int intel_pmu_init(void)
 		pr_cont("Atom events, ");
 		break;
 
+	case 55: /* Atom 22nm "Silvermont" */
+		memcpy(hw_cache_event_ids, slm_hw_cache_event_ids,
+			sizeof(hw_cache_event_ids));
+		memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs,
+		       sizeof(hw_cache_extra_regs));
+
+		intel_pmu_lbr_init_atom();
+
+		x86_pmu.event_constraints = intel_slm_event_constraints;
+		x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints;
+		x86_pmu.extra_regs = intel_slm_extra_regs;
+		x86_pmu.er_flags |= ERF_HAS_RSP_1;
+		pr_cont("Silvermont events, ");
+		break;
+
 	case 37: /* 32 nm nehalem, "Clarkdale" */
 	case 44: /* 32 nm nehalem, "Gulftown" */
 	case 47: /* 32 nm Xeon E7 */
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 3065c57..442fcc2 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -517,6 +517,32 @@ struct event_constraint intel_atom_pebs_event_constraints[] = {
 	EVENT_CONSTRAINT_END
 };
 
+struct event_constraint intel_slm_pebs_event_constraints[] = {
+	INTEL_UEVENT_CONSTRAINT(0x0103, 0x1), /* REHABQ.LD_BLOCK_ST_FORWARD_PS */
+	INTEL_UEVENT_CONSTRAINT(0x0803, 0x1), /* REHABQ.LD_SPLITS_PS */
+	INTEL_UEVENT_CONSTRAINT(0x0204, 0x1), /* MEM_UOPS_RETIRED.L2_HIT_LOADS_PS */
+	INTEL_UEVENT_CONSTRAINT(0x0404, 0x1), /* MEM_UOPS_RETIRED.L2_MISS_LOADS_PS */
+	INTEL_UEVENT_CONSTRAINT(0x0804, 0x1), /* MEM_UOPS_RETIRED.DTLB_MISS_LOADS_PS */
+	INTEL_UEVENT_CONSTRAINT(0x2004, 0x1), /* MEM_UOPS_RETIRED.HITM_PS */
+	INTEL_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY_PS */
+	INTEL_UEVENT_CONSTRAINT(0x00c4, 0x1), /* BR_INST_RETIRED.ALL_BRANCHES_PS */
+	INTEL_UEVENT_CONSTRAINT(0x7ec4, 0x1), /* BR_INST_RETIRED.JCC_PS */
+	INTEL_UEVENT_CONSTRAINT(0xbfc4, 0x1), /* BR_INST_RETIRED.FAR_BRANCH_PS */
+	INTEL_UEVENT_CONSTRAINT(0xebc4, 0x1), /* BR_INST_RETIRED.NON_RETURN_IND_PS */
+	INTEL_UEVENT_CONSTRAINT(0xf7c4, 0x1), /* BR_INST_RETIRED.RETURN_PS */
+	INTEL_UEVENT_CONSTRAINT(0xf9c4, 0x1), /* BR_INST_RETIRED.CALL_PS */
+	INTEL_UEVENT_CONSTRAINT(0xfbc4, 0x1), /* BR_INST_RETIRED.IND_CALL_PS */
+	INTEL_UEVENT_CONSTRAINT(0xfdc4, 0x1), /* BR_INST_RETIRED.REL_CALL_PS */
+	INTEL_UEVENT_CONSTRAINT(0xfec4, 0x1), /* BR_INST_RETIRED.TAKEN_JCC_PS */
+	INTEL_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_MISP_RETIRED.ALL_BRANCHES_PS */
+	INTEL_UEVENT_CONSTRAINT(0x7ec5, 0x1), /* BR_INST_MISP_RETIRED.JCC_PS */
+	INTEL_UEVENT_CONSTRAINT(0xebc5, 0x1), /* BR_INST_MISP_RETIRED.NON_RETURN_IND_PS */
+	INTEL_UEVENT_CONSTRAINT(0xf7c5, 0x1), /* BR_INST_MISP_RETIRED.RETURN_PS */
+	INTEL_UEVENT_CONSTRAINT(0xfbc5, 0x1), /* BR_INST_MISP_RETIRED.IND_CALL_PS */
+	INTEL_UEVENT_CONSTRAINT(0xfec5, 0x1), /* BR_INST_MISP_RETIRED.TAKEN_JCC_PS */
+	EVENT_CONSTRAINT_END
+};
+
 struct event_constraint intel_nehalem_pebs_event_constraints[] = {
 	INTEL_PLD_CONSTRAINT(0x100b, 0xf),      /* MEM_INST_RETIRED.* */
 	INTEL_EVENT_CONSTRAINT(0x0f, 0xf),    /* MEM_UNCORE_RETIRED.* */
-- 
1.8.1.4


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH] perf, x86: Add Silvermont (22nm Atom) support
  2013-07-18  5:36 [PATCH] perf, x86: Add Silvermont (22nm Atom) support Yan, Zheng
@ 2013-07-18  5:39 ` Yan, Zheng
  2013-07-18  8:42   ` Peter Zijlstra
  2013-07-18  8:23 ` Peter Zijlstra
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 12+ messages in thread
From: Yan, Zheng @ 2013-07-18  5:39 UTC (permalink / raw)
  To: Yan, Zheng; +Cc: linux-kernel, a.p.zijlstra, mingo, eranian, ak

On 07/18/2013 01:36 PM, Yan, Zheng wrote:
> From: "Yan, Zheng" <zheng.z.yan@intel.com>
> 
> Compare to old atom, Silvermont has offcore and has more events
> that support PEBS.
> 
> Silvermont has two offcore response configuration MSRs, but the
> event code for OFFCORE_RSP_1 is 0x02b7. To avoid complicating
> intel_fixup_er(), use INTEL_UEVENT_EXTRA_REG to define offcore
> MSRs. So intel_fixup_er() can find the code for OFFCORE_RSP_1
> by x86_pmu.extra_regs[1].event.
>

Document is at http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf, but it has no PEBS event list.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] perf, x86: Add Silvermont (22nm Atom) support
  2013-07-18  5:36 [PATCH] perf, x86: Add Silvermont (22nm Atom) support Yan, Zheng
  2013-07-18  5:39 ` Yan, Zheng
@ 2013-07-18  8:23 ` Peter Zijlstra
  2013-07-18  8:27   ` Yan, Zheng
  2013-07-18  8:29 ` Peter Zijlstra
  2013-07-18  8:41 ` Peter Zijlstra
  3 siblings, 1 reply; 12+ messages in thread
From: Peter Zijlstra @ 2013-07-18  8:23 UTC (permalink / raw)
  To: Yan, Zheng; +Cc: linux-kernel, mingo, eranian, ak

On Thu, Jul 18, 2013 at 01:36:07PM +0800, Yan, Zheng wrote:
> +static struct event_constraint intel_slm_event_constraints[] __read_mostly =
> +{
> +	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
> +	FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
> +	FIXED_EVENT_CONSTRAINT(0x013c, 2), /* CPU_CLK_UNHALTED.REF */
> +	FIXED_EVENT_CONSTRAINT(0x0300, 2), /* pseudo CPU_CLK_UNHALTED.REF */

So the normal event 0x13c and the fixed counter 2 are normally _not_ the
same. Are they for slm? Are you sure?

> +	EVENT_CONSTRAINT_END
> +};

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] perf, x86: Add Silvermont (22nm Atom) support
  2013-07-18  8:23 ` Peter Zijlstra
@ 2013-07-18  8:27   ` Yan, Zheng
  2013-07-18  9:02     ` Peter Zijlstra
  2013-07-19  2:26     ` Yan, Zheng
  0 siblings, 2 replies; 12+ messages in thread
From: Yan, Zheng @ 2013-07-18  8:27 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: linux-kernel, mingo, eranian, ak

On 07/18/2013 04:23 PM, Peter Zijlstra wrote:
> On Thu, Jul 18, 2013 at 01:36:07PM +0800, Yan, Zheng wrote:
>> +static struct event_constraint intel_slm_event_constraints[] __read_mostly =
>> +{
>> +	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
>> +	FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
>> +	FIXED_EVENT_CONSTRAINT(0x013c, 2), /* CPU_CLK_UNHALTED.REF */
>> +	FIXED_EVENT_CONSTRAINT(0x0300, 2), /* pseudo CPU_CLK_UNHALTED.REF */
> 
> So the normal event 0x13c and the fixed counter 2 are normally _not_ the
> same. Are they for slm? Are you sure?
> 

yes, I'm sure. see page 15-15 of http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf

Regards
Yan, Zheng


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] perf, x86: Add Silvermont (22nm Atom) support
  2013-07-18  5:36 [PATCH] perf, x86: Add Silvermont (22nm Atom) support Yan, Zheng
  2013-07-18  5:39 ` Yan, Zheng
  2013-07-18  8:23 ` Peter Zijlstra
@ 2013-07-18  8:29 ` Peter Zijlstra
  2013-07-18  8:41 ` Peter Zijlstra
  3 siblings, 0 replies; 12+ messages in thread
From: Peter Zijlstra @ 2013-07-18  8:29 UTC (permalink / raw)
  To: Yan, Zheng; +Cc: linux-kernel, mingo, eranian, ak

On Thu, Jul 18, 2013 at 01:36:07PM +0800, Yan, Zheng wrote:
> From: "Yan, Zheng" <zheng.z.yan@intel.com>
> 
> Compare to old atom, Silvermont has offcore and has more events
> that support PEBS.
> 
> Silvermont has two offcore response configuration MSRs, but the
> event code for OFFCORE_RSP_1 is 0x02b7. To avoid complicating
> intel_fixup_er(), use INTEL_UEVENT_EXTRA_REG to define offcore
> MSRs. So intel_fixup_er() can find the code for OFFCORE_RSP_1
> by x86_pmu.extra_regs[1].event.

Please split this in two patches; one reworking the OFFCORE_RSP stuff;
one adding slm support.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] perf, x86: Add Silvermont (22nm Atom) support
  2013-07-18  5:36 [PATCH] perf, x86: Add Silvermont (22nm Atom) support Yan, Zheng
                   ` (2 preceding siblings ...)
  2013-07-18  8:29 ` Peter Zijlstra
@ 2013-07-18  8:41 ` Peter Zijlstra
  3 siblings, 0 replies; 12+ messages in thread
From: Peter Zijlstra @ 2013-07-18  8:41 UTC (permalink / raw)
  To: Yan, Zheng; +Cc: linux-kernel, mingo, eranian, ak

On Thu, Jul 18, 2013 at 01:36:07PM +0800, Yan, Zheng wrote:

> @@ -1305,7 +1452,7 @@ static void intel_fixup_er(struct perf_event *event, int idx)
>  		event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;
>  	} else if (idx == EXTRA_REG_RSP_1) {
>  		event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
> -		event->hw.config |= 0x01bb;
> +		event->hw.config |= x86_pmu.extra_regs[1].event;
>  		event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
>  	}
>  }

When you do the split up, please make this consistent and use
x86_pmu.extra_regs[0].event as well.

Also, ideally we'd use EXTRA_REG_RSP_[01] instead of the 0 and 1.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] perf, x86: Add Silvermont (22nm Atom) support
  2013-07-18  5:39 ` Yan, Zheng
@ 2013-07-18  8:42   ` Peter Zijlstra
  2013-07-18 11:00     ` Yan, Zheng
  0 siblings, 1 reply; 12+ messages in thread
From: Peter Zijlstra @ 2013-07-18  8:42 UTC (permalink / raw)
  To: Yan, Zheng; +Cc: linux-kernel, mingo, eranian, ak

On Thu, Jul 18, 2013 at 01:39:28PM +0800, Yan, Zheng wrote:
> On 07/18/2013 01:36 PM, Yan, Zheng wrote:
> > From: "Yan, Zheng" <zheng.z.yan@intel.com>
> > 
> > Compare to old atom, Silvermont has offcore and has more events
> > that support PEBS.
> > 
> > Silvermont has two offcore response configuration MSRs, but the
> > event code for OFFCORE_RSP_1 is 0x02b7. To avoid complicating
> > intel_fixup_er(), use INTEL_UEVENT_EXTRA_REG to define offcore
> > MSRs. So intel_fixup_er() can find the code for OFFCORE_RSP_1
> > by x86_pmu.extra_regs[1].event.
> >
> 
> Document is at http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf, but it has no PEBS event list.

Why isn't this in the regular SDM like all the other stuff?

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] perf, x86: Add Silvermont (22nm Atom) support
  2013-07-18  8:27   ` Yan, Zheng
@ 2013-07-18  9:02     ` Peter Zijlstra
  2013-07-18 10:48       ` Yan, Zheng
  2013-07-19  2:26     ` Yan, Zheng
  1 sibling, 1 reply; 12+ messages in thread
From: Peter Zijlstra @ 2013-07-18  9:02 UTC (permalink / raw)
  To: Yan, Zheng; +Cc: linux-kernel, mingo, eranian, ak

On Thu, Jul 18, 2013 at 04:27:31PM +0800, Yan, Zheng wrote:
> On 07/18/2013 04:23 PM, Peter Zijlstra wrote:
> > On Thu, Jul 18, 2013 at 01:36:07PM +0800, Yan, Zheng wrote:
> >> +static struct event_constraint intel_slm_event_constraints[] __read_mostly =
> >> +{
> >> +	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
> >> +	FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
> >> +	FIXED_EVENT_CONSTRAINT(0x013c, 2), /* CPU_CLK_UNHALTED.REF */
> >> +	FIXED_EVENT_CONSTRAINT(0x0300, 2), /* pseudo CPU_CLK_UNHALTED.REF */
> > 
> > So the normal event 0x13c and the fixed counter 2 are normally _not_ the
> > same. Are they for slm? Are you sure?
> > 
> 
> yes, I'm sure. see page 15-15 of http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf

OK, then put in a comment how slm is 'special' and you might want to fix
intel_pmu_init():

	if (x86_pmu.event_constraints) {
		/*
		 * event on fixed counter2 (REF_CYCLES) only works on this
		 * counter, so do not extend mask to generic counters
		 */
		for_each_event_constraint(c, x86_pmu.event_constraints) {
			if (c->cmask != FIXED_EVENT_FLAGS
			    || c->idxmsk64 == INTEL_PMC_MSK_FIXED_REF_CYCLES) {
				continue;
			}

			c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
			c->weight += x86_pmu.num_counters;
		}
	}

Since that explicitly skips the fixed counter 2 and doesn't extend its
constraint to include all other counters.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] perf, x86: Add Silvermont (22nm Atom) support
  2013-07-18  9:02     ` Peter Zijlstra
@ 2013-07-18 10:48       ` Yan, Zheng
  2013-07-18 11:46         ` Peter Zijlstra
  0 siblings, 1 reply; 12+ messages in thread
From: Yan, Zheng @ 2013-07-18 10:48 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: linux-kernel, mingo, eranian, ak

On 07/18/2013 05:02 PM, Peter Zijlstra wrote:
> On Thu, Jul 18, 2013 at 04:27:31PM +0800, Yan, Zheng wrote:
>> On 07/18/2013 04:23 PM, Peter Zijlstra wrote:
>>> On Thu, Jul 18, 2013 at 01:36:07PM +0800, Yan, Zheng wrote:
>>>> +static struct event_constraint intel_slm_event_constraints[] __read_mostly =
>>>> +{
>>>> +	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
>>>> +	FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
>>>> +	FIXED_EVENT_CONSTRAINT(0x013c, 2), /* CPU_CLK_UNHALTED.REF */
>>>> +	FIXED_EVENT_CONSTRAINT(0x0300, 2), /* pseudo CPU_CLK_UNHALTED.REF */
>>>
>>> So the normal event 0x13c and the fixed counter 2 are normally _not_ the
>>> same. Are they for slm? Are you sure?
>>>
>>
>> yes, I'm sure. see page 15-15 of http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf
> 
> OK, then put in a comment how slm is 'special' and you might want to fix
> intel_pmu_init():
> 
> 	if (x86_pmu.event_constraints) {
> 		/*
> 		 * event on fixed counter2 (REF_CYCLES) only works on this
> 		 * counter, so do not extend mask to generic counters
> 		 */
> 		for_each_event_constraint(c, x86_pmu.event_constraints) {
> 			if (c->cmask != FIXED_EVENT_FLAGS
> 			    || c->idxmsk64 == INTEL_PMC_MSK_FIXED_REF_CYCLES) {
> 				continue;
> 			}
> 
> 			c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
> 			c->weight += x86_pmu.num_counters;
> 		}
> 	}
> 
> Since that explicitly skips the fixed counter 2 and doesn't extend its
> constraint to include all other counters.
> 

how about below patch

Regards
Yan, Zheng
---
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 8249df4..aa0d876 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -140,7 +140,6 @@ struct x86_pmu_capability {
 /* CPU_CLK_Unhalted.Ref: */
 #define MSR_ARCH_PERFMON_FIXED_CTR2	0x30b
 #define INTEL_PMC_IDX_FIXED_REF_CYCLES	(INTEL_PMC_IDX_FIXED + 2)
-#define INTEL_PMC_MSK_FIXED_REF_CYCLES	(1ULL << INTEL_PMC_IDX_FIXED_REF_CYCLES)
 
 /*
  * We model BTS tracing as another fixed-mode PMC.
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index e4bb30a..47ffb48 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -169,7 +169,6 @@ static struct event_constraint intel_slm_event_constraints[] __read_mostly =
 	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
 	FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
 	FIXED_EVENT_CONSTRAINT(0x013c, 2), /* CPU_CLK_UNHALTED.REF */
-	FIXED_EVENT_CONSTRAINT(0x0300, 2), /* pseudo CPU_CLK_UNHALTED.REF */
 	EVENT_CONSTRAINT_END
 };
 
@@ -2331,6 +2330,9 @@ __init int intel_pmu_init(void)
 
 		intel_pmu_lbr_init_atom();
 
+		/* both event 0x013c and fixed counter2 count REF_CYCLES */
+		intel_perfmon_event_map[PERF_COUNT_HW_REF_CPU_CYCLES] = 0x013c;
+
 		x86_pmu.event_constraints = intel_slm_event_constraints;
 		x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints;
 		x86_pmu.extra_regs = intel_slm_extra_regs;
@@ -2486,12 +2488,12 @@ __init int intel_pmu_init(void)
 
 	if (x86_pmu.event_constraints) {
 		/*
-		 * event on fixed counter2 (REF_CYCLES) only works on this
-		 * counter, so do not extend mask to generic counters
+		 * If only fixed counter2 can count event REF_CYCLES, we use
+		 * pseudo-code 0x0300 for REF_CYCLES.
 		 */
 		for_each_event_constraint(c, x86_pmu.event_constraints) {
-			if (c->cmask != FIXED_EVENT_FLAGS
-			    || c->idxmsk64 == INTEL_PMC_MSK_FIXED_REF_CYCLES) {
+			if (c->cmask != FIXED_EVENT_FLAGS ||
+			    c->code == 0x0300) {
 				continue;
 			}
 


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH] perf, x86: Add Silvermont (22nm Atom) support
  2013-07-18  8:42   ` Peter Zijlstra
@ 2013-07-18 11:00     ` Yan, Zheng
  0 siblings, 0 replies; 12+ messages in thread
From: Yan, Zheng @ 2013-07-18 11:00 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: linux-kernel, mingo, eranian, ak

On 07/18/2013 04:42 PM, Peter Zijlstra wrote:
> On Thu, Jul 18, 2013 at 01:39:28PM +0800, Yan, Zheng wrote:
>> On 07/18/2013 01:36 PM, Yan, Zheng wrote:
>>> From: "Yan, Zheng" <zheng.z.yan@intel.com>
>>>
>>> Compare to old atom, Silvermont has offcore and has more events
>>> that support PEBS.
>>>
>>> Silvermont has two offcore response configuration MSRs, but the
>>> event code for OFFCORE_RSP_1 is 0x02b7. To avoid complicating
>>> intel_fixup_er(), use INTEL_UEVENT_EXTRA_REG to define offcore
>>> MSRs. So intel_fixup_er() can find the code for OFFCORE_RSP_1
>>> by x86_pmu.extra_regs[1].event.
>>>
>>
>> Document is at http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf, but it has no PEBS event list.
> 
> Why isn't this in the regular SDM like all the other stuff?
> 

I don't know, probably they still haven't finished the document yet. (above PDF has no PEBS event list). 


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] perf, x86: Add Silvermont (22nm Atom) support
  2013-07-18 10:48       ` Yan, Zheng
@ 2013-07-18 11:46         ` Peter Zijlstra
  0 siblings, 0 replies; 12+ messages in thread
From: Peter Zijlstra @ 2013-07-18 11:46 UTC (permalink / raw)
  To: Yan, Zheng; +Cc: linux-kernel, mingo, eranian, ak

On Thu, Jul 18, 2013 at 06:48:06PM +0800, Yan, Zheng wrote:
> ---
> diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
> index 8249df4..aa0d876 100644
> --- a/arch/x86/include/asm/perf_event.h
> +++ b/arch/x86/include/asm/perf_event.h
> @@ -140,7 +140,6 @@ struct x86_pmu_capability {
>  /* CPU_CLK_Unhalted.Ref: */
>  #define MSR_ARCH_PERFMON_FIXED_CTR2	0x30b
>  #define INTEL_PMC_IDX_FIXED_REF_CYCLES	(INTEL_PMC_IDX_FIXED + 2)
> -#define INTEL_PMC_MSK_FIXED_REF_CYCLES	(1ULL << INTEL_PMC_IDX_FIXED_REF_CYCLES)
>  
>  /*
>   * We model BTS tracing as another fixed-mode PMC.
> diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
> index e4bb30a..47ffb48 100644
> --- a/arch/x86/kernel/cpu/perf_event_intel.c
> +++ b/arch/x86/kernel/cpu/perf_event_intel.c
> @@ -169,7 +169,6 @@ static struct event_constraint intel_slm_event_constraints[] __read_mostly =
>  	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
>  	FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
>  	FIXED_EVENT_CONSTRAINT(0x013c, 2), /* CPU_CLK_UNHALTED.REF */
> -	FIXED_EVENT_CONSTRAINT(0x0300, 2), /* pseudo CPU_CLK_UNHALTED.REF */
>  	EVENT_CONSTRAINT_END
>  };

Stephane, should we preserve the 0x0300 event as alias in this case? If
so the below gets a little more complicated.

>  
> @@ -2331,6 +2330,9 @@ __init int intel_pmu_init(void)
>  
>  		intel_pmu_lbr_init_atom();
>  
> +		/* both event 0x013c and fixed counter2 count REF_CYCLES */
> +		intel_perfmon_event_map[PERF_COUNT_HW_REF_CPU_CYCLES] = 0x013c;
> +
>  		x86_pmu.event_constraints = intel_slm_event_constraints;
>  		x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints;
>  		x86_pmu.extra_regs = intel_slm_extra_regs;
> @@ -2486,12 +2488,12 @@ __init int intel_pmu_init(void)
>  
>  	if (x86_pmu.event_constraints) {
>  		/*
> -		 * event on fixed counter2 (REF_CYCLES) only works on this
> -		 * counter, so do not extend mask to generic counters
> +		 * If only fixed counter2 can count event REF_CYCLES, we use
> +		 * pseudo-code 0x0300 for REF_CYCLES.
>  		 */
>  		for_each_event_constraint(c, x86_pmu.event_constraints) {
> -			if (c->cmask != FIXED_EVENT_FLAGS
> -			    || c->idxmsk64 == INTEL_PMC_MSK_FIXED_REF_CYCLES) {
> +			if (c->cmask != FIXED_EVENT_FLAGS ||
> +			    c->code == 0x0300) {
>  				continue;
>  			}

Yeah, this looks basically OK, but now that I look at it we should
probably have a symbolic name for the fake events. ISTR us having more,
by virtue of this being umask=3 not 1. But I can't find them.

Stephane?



^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] perf, x86: Add Silvermont (22nm Atom) support
  2013-07-18  8:27   ` Yan, Zheng
  2013-07-18  9:02     ` Peter Zijlstra
@ 2013-07-19  2:26     ` Yan, Zheng
  1 sibling, 0 replies; 12+ messages in thread
From: Yan, Zheng @ 2013-07-19  2:26 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: linux-kernel, mingo, eranian, ak

On 07/18/2013 04:27 PM, Yan, Zheng wrote:
> On 07/18/2013 04:23 PM, Peter Zijlstra wrote:
>> On Thu, Jul 18, 2013 at 01:36:07PM +0800, Yan, Zheng wrote:
>>> +static struct event_constraint intel_slm_event_constraints[] __read_mostly =
>>> +{
>>> +	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
>>> +	FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
>>> +	FIXED_EVENT_CONSTRAINT(0x013c, 2), /* CPU_CLK_UNHALTED.REF */
>>> +	FIXED_EVENT_CONSTRAINT(0x0300, 2), /* pseudo CPU_CLK_UNHALTED.REF */
>>
>> So the normal event 0x13c and the fixed counter 2 are normally _not_ the
>> same. Are they for slm? Are you sure?
>>
> 
> yes, I'm sure. see page 15-15 of http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf
> 

I'm sorry, I was wrong.

---
perf stat -e ref-cycles -e cpu/config=0x013c/ sleep 1

 Performance counter stats for 'sleep 1':

         1,181,220 ref-cycles                                                  
            98,434 cpu/config=0x013c/                                          

       0.196653863 seconds time elapsed




^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2013-07-19  2:26 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-07-18  5:36 [PATCH] perf, x86: Add Silvermont (22nm Atom) support Yan, Zheng
2013-07-18  5:39 ` Yan, Zheng
2013-07-18  8:42   ` Peter Zijlstra
2013-07-18 11:00     ` Yan, Zheng
2013-07-18  8:23 ` Peter Zijlstra
2013-07-18  8:27   ` Yan, Zheng
2013-07-18  9:02     ` Peter Zijlstra
2013-07-18 10:48       ` Yan, Zheng
2013-07-18 11:46         ` Peter Zijlstra
2013-07-19  2:26     ` Yan, Zheng
2013-07-18  8:29 ` Peter Zijlstra
2013-07-18  8:41 ` Peter Zijlstra

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.