linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/1] perf/x86/intel: Add extended event constraints for Knights Landing
@ 2016-06-08  4:02 Lukasz Odzioba
  2016-06-08  6:48 ` Peter Zijlstra
  0 siblings, 1 reply; 6+ messages in thread
From: Lukasz Odzioba @ 2016-06-08  4:02 UTC (permalink / raw)
  To: linux-kernel, x86, tglx, mingo, hpa, peterz, ak, kan.liang, akpm,
	eranian, acme, alexander.shishkin, bp
  Cc: lukasz.anaczkowski, lukasz.odzioba

For Knights Landing processor we need to filter OFFCORE_RESPONSE
events by config1 parameter to make sure that it will end up in
an appropriate PMC to meet specification.

On Knights Landing:
MSR_OFFCORE_RSP_1 bits 8, 11, 14 can be used only on PMC1
MSR_OFFCORE_RSP_0 bit 38 can be used only on PMC0

This patch introduces INTEL_EEVENT_CONSTRAINT where third parameter
specifies extended config bits allowed only on given PMCs.

Patch depends on "Change offcore response masks for Knights Landing"

Reported-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Lukasz Odzioba <lukasz.odzioba@intel.com>
---
 arch/x86/events/core.c         |  3 ++-
 arch/x86/events/intel/core.c   | 17 ++++++++++++++---
 arch/x86/events/intel/uncore.c |  2 +-
 arch/x86/events/perf_event.h   | 41 ++++++++++++++++++++++++-----------------
 4 files changed, 41 insertions(+), 22 deletions(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 33787ee..a4be71c 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -122,6 +122,7 @@ static int x86_pmu_extra_regs(u64 config, struct perf_event *event)
 			continue;
 		if (event->attr.config1 & ~er->valid_mask)
 			return -EINVAL;
+
 		/* Check if the extra msrs can be safely accessed*/
 		if (!er->extra_msr_access)
 			return -ENXIO;
@@ -1736,7 +1737,7 @@ static int __init init_hw_perf_events(void)
 
 	unconstrained = (struct event_constraint)
 		__EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
-				   0, x86_pmu.num_counters, 0, 0);
+				   0, x86_pmu.num_counters, 0, 0, 0);
 
 	x86_pmu_format_group.attrs = x86_pmu.format_attrs;
 
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 7c66695..794f5c8 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -177,6 +177,17 @@ static struct event_constraint intel_slm_event_constraints[] __read_mostly =
 	EVENT_CONSTRAINT_END
 };
 
+static struct event_constraint intel_knl_event_constraints[] __read_mostly = {
+	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+	FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+	FIXED_EVENT_CONSTRAINT(0x0300, 2), /* pseudo CPU_CLK_UNHALTED.REF */
+	/* MSR_OFFCORE_RSP_1 bits 8, 11, 14 can be used only on PMC1 */
+	INTEL_EEVENT_CONSTRAINT(0x02b7, 2, 0x4900),
+	/* MSR_OFFCORE_RSP_0 bit 38 can be used only on PMC0 */
+	INTEL_EEVENT_CONSTRAINT(0x01b7, 1, 1ull<<38),
+	EVENT_CONSTRAINT_END
+};
+
 struct event_constraint intel_skl_event_constraints[] = {
 	FIXED_EVENT_CONSTRAINT(0x00c0, 0),	/* INST_RETIRED.ANY */
 	FIXED_EVENT_CONSTRAINT(0x003c, 1),	/* CPU_CLK_UNHALTED.CORE */
@@ -2284,16 +2295,16 @@ x86_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
 			  struct perf_event *event)
 {
 	struct event_constraint *c;
-
 	if (x86_pmu.event_constraints) {
 		for_each_event_constraint(c, x86_pmu.event_constraints) {
 			if ((event->hw.config & c->cmask) == c->code) {
+				if (c->emask && !(c->emask & event->attr.config1))
+					continue;
 				event->hw.flags |= c->flags;
 				return c;
 			}
 		}
 	}
-
 	return &unconstrained;
 }
 
@@ -3784,7 +3795,7 @@ __init int intel_pmu_init(void)
 		       knl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
 		intel_pmu_lbr_init_knl();
 
-		x86_pmu.event_constraints = intel_slm_event_constraints;
+		x86_pmu.event_constraints = intel_knl_event_constraints;
 		x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints;
 		x86_pmu.extra_regs = intel_knl_extra_regs;
 
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index fce7406..fc5b866 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -839,7 +839,7 @@ static int __init uncore_type_init(struct intel_uncore_type *type, bool setid)
 	type->pmus = pmus;
 	type->unconstrainted = (struct event_constraint)
 		__EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1,
-				0, type->num_counters, 0, 0);
+				0, type->num_counters, 0, 0, 0);
 
 	if (type->event_descs) {
 		for (i = 0; type->event_descs[i].attr.attr.name; i++);
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 8bd764d..47241ed5 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -52,6 +52,7 @@ struct event_constraint {
 	int	weight;
 	int	overlap;
 	int	flags;
+	u64	emask;
 };
 /*
  * struct hw_perf_event.flags flags
@@ -239,21 +240,22 @@ struct cpu_hw_events {
 	void				*kfree_on_online[X86_PERF_KFREE_MAX];
 };
 
-#define __EVENT_CONSTRAINT(c, n, m, w, o, f) {\
+#define __EVENT_CONSTRAINT(c, n, m, w, o, f, e) {\
 	{ .idxmsk64 = (n) },		\
 	.code = (c),			\
 	.cmask = (m),			\
 	.weight = (w),			\
 	.overlap = (o),			\
-	.flags = f,			\
+	.flags = (f),			\
+	.emask = (e),			\
 }
 
 #define EVENT_CONSTRAINT(c, n, m)	\
-	__EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0, 0)
+	__EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0, 0, 0)
 
 #define INTEL_EXCLEVT_CONSTRAINT(c, n)	\
 	__EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT, HWEIGHT(n),\
-			   0, PERF_X86_EVENT_EXCL)
+			   0, PERF_X86_EVENT_EXCL, 0)
 
 /*
  * The overlap flag marks event constraints with overlapping counter
@@ -277,7 +279,7 @@ struct cpu_hw_events {
  * and its counter masks must be kept at a minimum.
  */
 #define EVENT_CONSTRAINT_OVERLAP(c, n, m)	\
-	__EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 1, 0)
+	__EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 1, 0, 0)
 
 /*
  * Constraint on the Event code.
@@ -286,6 +288,12 @@ struct cpu_hw_events {
 	EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT)
 
 /*
+ * Constraint on the Extended Event code
+ */
+#define INTEL_EEVENT_CONSTRAINT(c, n, e) \
+	__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, HWEIGHT(n), 0, 0, e)
+
+/*
  * Constraint on the Event code + UMask + fixed-mask
  *
  * filter mask to validate fixed counter events.
@@ -318,15 +326,15 @@ struct cpu_hw_events {
 
 #define INTEL_EXCLUEVT_CONSTRAINT(c, n)	\
 	__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
-			   HWEIGHT(n), 0, PERF_X86_EVENT_EXCL)
+			   HWEIGHT(n), 0, PERF_X86_EVENT_EXC, 0)
 
 #define INTEL_PLD_CONSTRAINT(c, n)	\
 	__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
-			   HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT)
+			   HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT, 0)
 
 #define INTEL_PST_CONSTRAINT(c, n)	\
 	__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
-			  HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST)
+			  HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST, 0)
 
 /* Event constraint, but match on all event flags too. */
 #define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \
@@ -340,50 +348,49 @@ struct cpu_hw_events {
 #define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_ST(code, n) \
 	__EVENT_CONSTRAINT(code, n, 			\
 			  ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \
-			  HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW)
+			  HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW, 0)
 
 /* Check flags and event code, and set the HSW load flag */
 #define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(code, n) \
 	__EVENT_CONSTRAINT(code, n,			\
 			  ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \
-			  HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LD_HSW)
+			  HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LD_HSW, 0)
 
 #define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(code, n) \
 	__EVENT_CONSTRAINT(code, n,			\
 			  ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \
 			  HWEIGHT(n), 0, \
-			  PERF_X86_EVENT_PEBS_LD_HSW|PERF_X86_EVENT_EXCL)
+			  PERF_X86_EVENT_PEBS_LD_HSW|PERF_X86_EVENT_EXCL, 0)
 
 /* Check flags and event code/umask, and set the HSW store flag */
 #define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(code, n) \
 	__EVENT_CONSTRAINT(code, n, 			\
 			  INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
-			  HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW)
+			  HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW, 0)
 
 #define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(code, n) \
 	__EVENT_CONSTRAINT(code, n,			\
 			  INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
 			  HWEIGHT(n), 0, \
-			  PERF_X86_EVENT_PEBS_ST_HSW|PERF_X86_EVENT_EXCL)
+			  PERF_X86_EVENT_PEBS_ST_HSW|PERF_X86_EVENT_EXCL, 0)
 
 /* Check flags and event code/umask, and set the HSW load flag */
 #define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(code, n) \
 	__EVENT_CONSTRAINT(code, n, 			\
 			  INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
-			  HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LD_HSW)
+			  HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LD_HSW, 0)
 
 #define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(code, n) \
 	__EVENT_CONSTRAINT(code, n,			\
 			  INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
 			  HWEIGHT(n), 0, \
-			  PERF_X86_EVENT_PEBS_LD_HSW|PERF_X86_EVENT_EXCL)
+			  PERF_X86_EVENT_PEBS_LD_HSW|PERF_X86_EVENT_EXCL, 0)
 
 /* Check flags and event code/umask, and set the HSW N/A flag */
 #define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(code, n) \
 	__EVENT_CONSTRAINT(code, n, 			\
 			  INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
-			  HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_NA_HSW)
-
+			  HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_NA_HSW, 0)
 
 /*
  * We define the end marker as having a weight of -1
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/1] perf/x86/intel: Add extended event constraints for Knights Landing
  2016-06-08  4:02 [PATCH 1/1] perf/x86/intel: Add extended event constraints for Knights Landing Lukasz Odzioba
@ 2016-06-08  6:48 ` Peter Zijlstra
  2016-06-20 10:26   ` Odzioba, Lukasz
  0 siblings, 1 reply; 6+ messages in thread
From: Peter Zijlstra @ 2016-06-08  6:48 UTC (permalink / raw)
  To: Lukasz Odzioba
  Cc: linux-kernel, x86, tglx, mingo, hpa, ak, kan.liang, akpm,
	eranian, acme, alexander.shishkin, bp, lukasz.anaczkowski

On Wed, Jun 08, 2016 at 06:02:16AM +0200, Lukasz Odzioba wrote:
> For Knights Landing processor we need to filter OFFCORE_RESPONSE
> events by config1 parameter to make sure that it will end up in
> an appropriate PMC to meet specification.
> 
> On Knights Landing:
> MSR_OFFCORE_RSP_1 bits 8, 11, 14 can be used only on PMC1
> MSR_OFFCORE_RSP_0 bit 38 can be used only on PMC0
> 
> This patch introduces INTEL_EEVENT_CONSTRAINT where third parameter
> specifies extended config bits allowed only on given PMCs.
> 

How does this work in the light of intel_alt_er() ?

^ permalink raw reply	[flat|nested] 6+ messages in thread

* RE: [PATCH 1/1] perf/x86/intel: Add extended event constraints for Knights Landing
  2016-06-08  6:48 ` Peter Zijlstra
@ 2016-06-20 10:26   ` Odzioba, Lukasz
  2016-06-21  9:37     ` Peter Zijlstra
  0 siblings, 1 reply; 6+ messages in thread
From: Odzioba, Lukasz @ 2016-06-20 10:26 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: linux-kernel, x86, tglx, mingo, hpa, ak, Liang, Kan, akpm,
	eranian, acme, alexander.shishkin, bp, Anaczkowski, Lukasz

On 08.06.2016 Peter Zijlstra wrote:
> How does this work in the light of intel_alt_er() ?

Hi Peter,

If the constrained bit is valid on only one of the OCR MSRs (like in case of KNL), 
then OCR valid mask will forbid altering it by the other MSR in intel_alt_er.

If constrained bit is valid on both OCR MSR, so it can be safely altered by 
intel_alt_er, but in this case it should be defined twice - for each MSR i.e:

INTEL_EEVENT_CONSTRAINT(0x01b7, 0, 0x1); // bit0 can be used only on PMC0
INTEL_EEVENT_CONSTRAINT(0x02b7, 0, 0x1); // bit0 can be used only on PMC0

I hope that answers your question. 

Thanks,
Lukas

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/1] perf/x86/intel: Add extended event constraints for Knights Landing
  2016-06-20 10:26   ` Odzioba, Lukasz
@ 2016-06-21  9:37     ` Peter Zijlstra
  2016-06-24 14:54       ` Odzioba, Lukasz
  0 siblings, 1 reply; 6+ messages in thread
From: Peter Zijlstra @ 2016-06-21  9:37 UTC (permalink / raw)
  To: Odzioba, Lukasz
  Cc: linux-kernel, x86, tglx, mingo, hpa, ak, Liang, Kan, akpm,
	eranian, acme, alexander.shishkin, bp, Anaczkowski, Lukasz

On Mon, Jun 20, 2016 at 10:26:30AM +0000, Odzioba, Lukasz wrote:
> On 08.06.2016 Peter Zijlstra wrote:
> > How does this work in the light of intel_alt_er() ?
> 
> Hi Peter,
> 
> If the constrained bit is valid on only one of the OCR MSRs (like in case of KNL), 
> then OCR valid mask will forbid altering it by the other MSR in intel_alt_er.

Yes, that is the intent, but how is this achieved? I'm not sure I see
how the patch ensure this.

Also, intel_get_event_constraints() has a path where it copies the
constraint, should it not also copy the new field?

^ permalink raw reply	[flat|nested] 6+ messages in thread

* RE: [PATCH 1/1] perf/x86/intel: Add extended event constraints for Knights Landing
  2016-06-21  9:37     ` Peter Zijlstra
@ 2016-06-24 14:54       ` Odzioba, Lukasz
  2016-06-24 17:04         ` Stephane Eranian
  0 siblings, 1 reply; 6+ messages in thread
From: Odzioba, Lukasz @ 2016-06-24 14:54 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: linux-kernel, x86, tglx, mingo, hpa, ak, Liang, Kan, akpm,
	eranian, acme, alexander.shishkin, bp, Anaczkowski, Lukasz

On Tuesday, June 21, 2016 11:38 AM, Peter Zijlstra wrote:
> Yes, that is the intent, but how is this achieved? I'm not sure I see
> how the patch ensure this.

If you are confused, then it is likely that I did something wrong here.
Let me explain myself.

We already have a mechanism to create static constraints which limit events
to given PMCs via event code filtering. Such constraints are later obeyed by event 
scheduler to assure that. Scheduler bases its decisions on idxmsk to place events
on the right PMC.

We can think of OFFCORE_RESPONSE/config1 values as an extension
of event code making it 128bit long (code+extended code).

Emask is extended code logically ANDed with extended code mask (analogy to 
c->cmask and c->code), we could add separate values here, but I didn't see a real use
for it right now.

Event code is used only in x86_get_event_constraints, so we have to extend event 
code matching check there to use config1 against our new emask.
If constraint code matches event code and constraint has non empty extended
code we check it against config1, if config1 uses one of the bits defined in emask
we return constraint as if would be normal 64bit-code constraint, scheduler will take
care of the rest.

> Also, intel_get_event_constraints() has a path where it copies the
> constraint, should it not also copy the new field?

Since event code is not used anywhere except x86_get_event_constraints, 
so extended code is also not needed there.

To verify that it works as I expect I added printk's to x86_assign_hw_event
to see selected PMC.

Thanks,
Lukas

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/1] perf/x86/intel: Add extended event constraints for Knights Landing
  2016-06-24 14:54       ` Odzioba, Lukasz
@ 2016-06-24 17:04         ` Stephane Eranian
  0 siblings, 0 replies; 6+ messages in thread
From: Stephane Eranian @ 2016-06-24 17:04 UTC (permalink / raw)
  To: Odzioba, Lukasz
  Cc: Peter Zijlstra, linux-kernel, x86, tglx, mingo, hpa, ak, Liang,
	Kan, akpm, acme, alexander.shishkin, bp, Anaczkowski, Lukasz

Hi,

On Fri, Jun 24, 2016 at 7:54 AM, Odzioba, Lukasz
<lukasz.odzioba@intel.com> wrote:
> On Tuesday, June 21, 2016 11:38 AM, Peter Zijlstra wrote:
>> Yes, that is the intent, but how is this achieved? I'm not sure I see
>> how the patch ensure this.
>
> If you are confused, then it is likely that I did something wrong here.
> Let me explain myself.
>
> We already have a mechanism to create static constraints which limit events
> to given PMCs via event code filtering. Such constraints are later obeyed by event
> scheduler to assure that. Scheduler bases its decisions on idxmsk to place events
> on the right PMC.
>
> We can think of OFFCORE_RESPONSE/config1 values as an extension
> of event code making it 128bit long (code+extended code).
>
> Emask is extended code logically ANDed with extended code mask (analogy to
> c->cmask and c->code), we could add separate values here, but I didn't see a real use
> for it right now.
>
> Event code is used only in x86_get_event_constraints, so we have to extend event
> code matching check there to use config1 against our new emask.
> If constraint code matches event code and constraint has non empty extended
> code we check it against config1, if config1 uses one of the bits defined in emask
> we return constraint as if would be normal 64bit-code constraint, scheduler will take
> care of the rest.
>
>> Also, intel_get_event_constraints() has a path where it copies the
>> constraint, should it not also copy the new field?
>
> Since event code is not used anywhere except x86_get_event_constraints,
> so extended code is also not needed there.
>
> To verify that it works as I expect I added printk's to x86_assign_hw_event
> to see selected PMC.
>
What happens if I do in one run:
   2- cpu/event=0xb7,umask=0x1,offcore_rsp=0x100/u,cpu/event=0xb7,umask=0x2,offcore_rsp=0x100/k
   3- cpu/event=0xb7,umask=0x1,offcore_rsp=1ull<<38/,cpu/event=0xb7,umask=0x1,offcore_rsp=0x1/
   4- cpu/event=0xb7,umask=0x1,offcore_rsp=1ull<<38/,cpu/event=0xb7,umask=0x1,offcore_rsp=1ull<<38|1/

You get the shared constraints BEFORE the static constraints. So if
you've gone thru intel_alt_er() , then you should still be fine. In
case of counter conflict during scheduling the shared regs are free in
put_event_constraints(). So it should be fine there too.

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2016-06-24 17:04 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-06-08  4:02 [PATCH 1/1] perf/x86/intel: Add extended event constraints for Knights Landing Lukasz Odzioba
2016-06-08  6:48 ` Peter Zijlstra
2016-06-20 10:26   ` Odzioba, Lukasz
2016-06-21  9:37     ` Peter Zijlstra
2016-06-24 14:54       ` Odzioba, Lukasz
2016-06-24 17:04         ` Stephane Eranian

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).