historical-speck.lore.kernel.org archive mirror
 help / color / mirror / Atom feed
* [MODERATED] [PATCH 0/4] walnut 0
@ 2019-02-19 15:58 Peter Zijlstra
  2019-02-19 15:58 ` [MODERATED] [PATCH 1/4] walnut 1 Peter Zijlstra
                   ` (5 more replies)
  0 siblings, 6 replies; 17+ messages in thread
From: Peter Zijlstra @ 2019-02-19 15:58 UTC (permalink / raw)
  To: speck

nicely split

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [MODERATED] [PATCH 1/4] walnut 1
  2019-02-19 15:58 [MODERATED] [PATCH 0/4] walnut 0 Peter Zijlstra
@ 2019-02-19 15:58 ` Peter Zijlstra
  2019-02-19 15:58 ` [MODERATED] [PATCH 2/4] walnut 2 Peter Zijlstra
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 17+ messages in thread
From: Peter Zijlstra @ 2019-02-19 15:58 UTC (permalink / raw)
  To: speck

The cpuc data structure allocation is different between fake and real
cpuc's; use the same code to init/free both.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/events/core.c       |   13 +++++--------
 arch/x86/events/intel/core.c |   29 ++++++++++++++++++-----------
 arch/x86/events/perf_event.h |   11 ++++++++---
 3 files changed, 31 insertions(+), 22 deletions(-)

--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -1995,7 +1995,7 @@ static int x86_pmu_commit_txn(struct pmu
  */
 static void free_fake_cpuc(struct cpu_hw_events *cpuc)
 {
-	kfree(cpuc->shared_regs);
+	intel_cpuc_finish(cpuc);
 	kfree(cpuc);
 }
 
@@ -2007,14 +2007,11 @@ static struct cpu_hw_events *allocate_fa
 	cpuc = kzalloc(sizeof(*cpuc), GFP_KERNEL);
 	if (!cpuc)
 		return ERR_PTR(-ENOMEM);
-
-	/* only needed, if we have extra_regs */
-	if (x86_pmu.extra_regs) {
-		cpuc->shared_regs = allocate_shared_regs(cpu);
-		if (!cpuc->shared_regs)
-			goto error;
-	}
 	cpuc->is_fake = 1;
+
+	if (intel_cpuc_prepare(cpuc, cpu))
+		goto error;
+
 	return cpuc;
 error:
 	free_fake_cpuc(cpuc);
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3410,7 +3410,7 @@ ssize_t intel_event_sysfs_show(char *pag
 	return x86_event_sysfs_show(page, config, event);
 }
 
-struct intel_shared_regs *allocate_shared_regs(int cpu)
+static struct intel_shared_regs *allocate_shared_regs(int cpu)
 {
 	struct intel_shared_regs *regs;
 	int i;
@@ -3442,10 +3442,9 @@ static struct intel_excl_cntrs *allocate
 	return c;
 }
 
-static int intel_pmu_cpu_prepare(int cpu)
-{
-	struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
 
+int intel_cpuc_prepare(struct cpu_hw_events *cpuc, int cpu)
+{
 	if (x86_pmu.extra_regs || x86_pmu.lbr_sel_map) {
 		cpuc->shared_regs = allocate_shared_regs(cpu);
 		if (!cpuc->shared_regs)
@@ -3455,7 +3454,7 @@ static int intel_pmu_cpu_prepare(int cpu
 	if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) {
 		size_t sz = X86_PMC_IDX_MAX * sizeof(struct event_constraint);
 
-		cpuc->constraint_list = kzalloc(sz, GFP_KERNEL);
+		cpuc->constraint_list = kzalloc_node(sz, GFP_KERNEL, cpu_to_node(cpu));
 		if (!cpuc->constraint_list)
 			goto err_shared_regs;
 
@@ -3480,6 +3479,11 @@ static int intel_pmu_cpu_prepare(int cpu
 	return -ENOMEM;
 }
 
+static int intel_pmu_cpu_prepare(int cpu)
+{
+	return intel_cpuc_prepare(&per_cpu(cpu_hw_events, cpu), cpu);
+}
+
 static void flip_smm_bit(void *data)
 {
 	unsigned long set = *(unsigned long *)data;
@@ -3554,9 +3558,8 @@ static void intel_pmu_cpu_starting(int c
 	}
 }
 
-static void free_excl_cntrs(int cpu)
+static void free_excl_cntrs(struct cpu_hw_events *cpuc)
 {
-	struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
 	struct intel_excl_cntrs *c;
 
 	c = cpuc->excl_cntrs;
@@ -3577,9 +3580,8 @@ static void intel_pmu_cpu_dying(int cpu)
 		disable_counter_freeze();
 }
 
-static void intel_pmu_cpu_dead(int cpu)
+void intel_cpuc_finish(struct cpu_hw_events *cpuc)
 {
-	struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
 	struct intel_shared_regs *pc;
 
 	pc = cpuc->shared_regs;
@@ -3589,7 +3591,12 @@ static void intel_pmu_cpu_dead(int cpu)
 		cpuc->shared_regs = NULL;
 	}
 
-	free_excl_cntrs(cpu);
+	free_excl_cntrs(cpuc);
+}
+
+static void intel_pmu_cpu_dead(int cpu)
+{
+	intel_cpuc_finish(&per_cpu(cpu_hw_events, cpu));
 }
 
 static void intel_pmu_sched_task(struct perf_event_context *ctx,
@@ -4758,7 +4765,7 @@ static __init int fixup_ht_bug(void)
 	hardlockup_detector_perf_restart();
 
 	for_each_online_cpu(c)
-		free_excl_cntrs(c);
+		free_excl_cntrs(&per_cpu(cpu_hw_events, c));
 
 	cpus_read_unlock();
 	pr_info("PMU erratum BJ122, BV98, HSD29 workaround disabled, HT off\n");
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -890,7 +890,8 @@ struct event_constraint *
 x86_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
 			  struct perf_event *event);
 
-struct intel_shared_regs *allocate_shared_regs(int cpu);
+extern int intel_cpuc_prepare(struct cpu_hw_events *cpuc, int cpu);
+extern void intel_cpuc_finish(struct cpu_hw_events *cpuc);
 
 int intel_pmu_init(void);
 
@@ -1026,9 +1027,13 @@ static inline int intel_pmu_init(void)
 	return 0;
 }
 
-static inline struct intel_shared_regs *allocate_shared_regs(int cpu)
+static inline int intel_cpuc_prepare(struct cpu_hw_event *cpuc, int cpu)
+{
+	return 0;
+}
+
+static inline void intel_cpuc_finish(struct cpu_hw_event *cpuc)
 {
-	return NULL;
 }
 
 static inline int is_ht_workaround_enabled(void)

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [MODERATED] [PATCH 2/4] walnut 2
  2019-02-19 15:58 [MODERATED] [PATCH 0/4] walnut 0 Peter Zijlstra
  2019-02-19 15:58 ` [MODERATED] [PATCH 1/4] walnut 1 Peter Zijlstra
@ 2019-02-19 15:58 ` Peter Zijlstra
  2019-02-19 15:58 ` [MODERATED] [PATCH 3/4] walnut 3 Peter Zijlstra
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 17+ messages in thread
From: Peter Zijlstra @ 2019-02-19 15:58 UTC (permalink / raw)
  To: speck

Such that we can re-use it.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/events/intel/core.c |   51 +++++++++++++++++++++++++------------------
 1 file changed, 30 insertions(+), 21 deletions(-)

--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2770,6 +2770,35 @@ intel_stop_scheduling(struct cpu_hw_even
 }
 
 static struct event_constraint *
+dyn_constraint(struct cpu_hw_events *cpuc, struct event_constraint *c, int idx)
+{
+	WARN_ON_ONCE(!cpuc->constraint_list);
+
+	if (!(c->flags & PERF_X86_EVENT_DYNAMIC)) {
+		struct event_constraint *cx;
+
+		/*
+		 * grab pre-allocated constraint entry
+		 */
+		cx = &cpuc->constraint_list[idx];
+
+		/*
+		 * initialize dynamic constraint
+		 * with static constraint
+		 */
+		*cx = *c;
+
+		/*
+		 * mark constraint as dynamic
+		 */
+		cx->flags |= PERF_X86_EVENT_DYNAMIC;
+		c = cx;
+	}
+
+	return c;
+}
+
+static struct event_constraint *
 intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
 			   int idx, struct event_constraint *c)
 {
@@ -2799,27 +2828,7 @@ intel_get_excl_constraints(struct cpu_hw
 	 * only needed when constraint has not yet
 	 * been cloned (marked dynamic)
 	 */
-	if (!(c->flags & PERF_X86_EVENT_DYNAMIC)) {
-		struct event_constraint *cx;
-
-		/*
-		 * grab pre-allocated constraint entry
-		 */
-		cx = &cpuc->constraint_list[idx];
-
-		/*
-		 * initialize dynamic constraint
-		 * with static constraint
-		 */
-		*cx = *c;
-
-		/*
-		 * mark constraint as dynamic, so we
-		 * can free it later on
-		 */
-		cx->flags |= PERF_X86_EVENT_DYNAMIC;
-		c = cx;
-	}
+	c = dyn_constraint(cpuc, c, idx);
 
 	/*
 	 * From here on, the constraint is dynamic.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [MODERATED] [PATCH 3/4] walnut 3
  2019-02-19 15:58 [MODERATED] [PATCH 0/4] walnut 0 Peter Zijlstra
  2019-02-19 15:58 ` [MODERATED] [PATCH 1/4] walnut 1 Peter Zijlstra
  2019-02-19 15:58 ` [MODERATED] [PATCH 2/4] walnut 2 Peter Zijlstra
@ 2019-02-19 15:58 ` Peter Zijlstra
  2019-02-19 15:58 ` [MODERATED] [PATCH 4/4] walnut 4 Peter Zijlstra
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 17+ messages in thread
From: Peter Zijlstra @ 2019-02-19 15:58 UTC (permalink / raw)
  To: speck

Skylake systems will receive a microcode update to address a TSX
errata. This microcode will (by default) clobber PMC3 when TSX
instructions are (speculatively or not) executed.

It also provides an MSR to cause all TSX transaction to abort and
preserve PMC3.

Add the CPUID enumeration and MSR definition.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/include/asm/cpufeatures.h |    1 +
 arch/x86/include/asm/msr-index.h   |    6 ++++++
 2 files changed, 7 insertions(+)

--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -344,6 +344,7 @@
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */
 #define X86_FEATURE_AVX512_4VNNIW	(18*32+ 2) /* AVX-512 Neural Network Instructions */
 #define X86_FEATURE_AVX512_4FMAPS	(18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */
+#define X86_FEATURE_TSX_FORCE_ABORT	(18*32+13) /* "" TSX_FORCE_ABORT */
 #define X86_FEATURE_PCONFIG		(18*32+18) /* Intel PCONFIG */
 #define X86_FEATURE_SPEC_CTRL		(18*32+26) /* "" Speculation Control (IBRS + IBPB) */
 #define X86_FEATURE_INTEL_STIBP		(18*32+27) /* "" Single Thread Indirect Branch Predictors */
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -666,6 +666,12 @@
 
 #define MSR_IA32_TSC_DEADLINE		0x000006E0
 
+
+#define MSR_TSX_FORCE_ABORT		0x0000010F
+
+#define MSR_TFA_RTM_FORCE_ABORT_BIT	0
+#define MSR_TFA_RTM_FORCE_ABORT		BIT_ULL(MSR_TFA_RTM_FORCE_ABORT_BIT)
+
 /* P4/Xeon+ specific */
 #define MSR_IA32_MCG_EAX		0x00000180
 #define MSR_IA32_MCG_EBX		0x00000181

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [MODERATED] [PATCH 4/4] walnut 4
  2019-02-19 15:58 [MODERATED] [PATCH 0/4] walnut 0 Peter Zijlstra
                   ` (2 preceding siblings ...)
  2019-02-19 15:58 ` [MODERATED] [PATCH 3/4] walnut 3 Peter Zijlstra
@ 2019-02-19 15:58 ` Peter Zijlstra
  2019-02-19 16:10   ` [MODERATED] " Peter Zijlstra
                     ` (2 more replies)
  2019-02-20 18:10 ` [MODERATED] Re: [PATCH 0/4] walnut 0 mark gross
  2019-03-01 18:59 ` mark gross
  5 siblings, 3 replies; 17+ messages in thread
From: Peter Zijlstra @ 2019-02-19 15:58 UTC (permalink / raw)
  To: speck

On Skylake systems that have TSX Force Abort (TFA), provide a system
wide knob to allow (by default) or disallow the usage of TFA.

When TFA is allowed (default); the MSR gets set when PMC3 gets
scheduled and cleared when, after scheduling, PMC3 is unused.

When TFA is not allowed; clear PMC3 from all constraints such that it
will not get used.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/events/intel/core.c |   74 +++++++++++++++++++++++++++++++++++++++++--
 arch/x86/events/perf_event.h |    6 +++
 2 files changed, 77 insertions(+), 3 deletions(-)

--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2000,6 +2000,39 @@ static void intel_pmu_nhm_enable_all(int
 	intel_pmu_enable_all(added);
 }
 
+static void intel_set_tfa(struct cpu_hw_events *cpuc, bool on)
+{
+	u64 val = on ? MSR_TFA_RTM_FORCE_ABORT : 0;
+
+	if (cpuc->tfa_shadow != val) {
+		cpuc->tfa_shadow = val;
+		wrmsrl(MSR_TSX_FORCE_ABORT, val);
+	}
+}
+
+static void intel_skl_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cntr)
+{
+	/*
+	 * We're going to use PMC3, make sure TFA is set before we touch it.
+	 */
+	if (cntr == 3 && !cpuc->is_fake)
+		intel_set_tfa(cpuc, true);
+}
+
+static void intel_skl_pmu_enable_all(int added)
+{
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+	/*
+	 * If we find PMC3 is no longer used when we enable the PMU, we can
+	 * clear TFA.
+	 */
+	if (!test_bit(3, cpuc->active_mask))
+		intel_set_tfa(cpuc, false);
+
+	intel_pmu_enable_all(added);
+}
+
 static void enable_counter_freeze(void)
 {
 	update_debugctlmsr(get_debugctlmsr() |
@@ -3366,6 +3399,26 @@ glp_get_event_constraints(struct cpu_hw_
 	return c;
 }
 
+static bool allow_tsx_force_abort = true;
+
+static struct event_constraint *
+skl_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+			  struct perf_event *event)
+{
+	struct event_constraint *c = hsw_get_event_constraints(cpuc, idx, event);
+
+	/*
+	 * Without TFA we must not use PMC3.
+	 */
+	if (!allow_tsx_force_abort && test_bit(3, c->idxmsk)) {
+		c = dyn_constraint(cpuc, c, idx);
+		c->idxmsk64 &= ~(1ULL << 3);
+		c->weight = c->idxmsk64 - 1;
+	}
+
+	return c;
+}
+
 /*
  * Broadwell:
  *
@@ -3460,13 +3513,15 @@ int intel_cpuc_prepare(struct cpu_hw_eve
 			goto err;
 	}
 
-	if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) {
+	if (x86_pmu.flags & (PMU_FL_EXCL_CNTRS | PMU_FL_TFA)) {
 		size_t sz = X86_PMC_IDX_MAX * sizeof(struct event_constraint);
 
 		cpuc->constraint_list = kzalloc_node(sz, GFP_KERNEL, cpu_to_node(cpu));
 		if (!cpuc->constraint_list)
 			goto err_shared_regs;
+	}
 
+	if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) {
 		cpuc->excl_cntrs = allocate_excl_cntrs(cpu);
 		if (!cpuc->excl_cntrs)
 			goto err_constraint_list;
@@ -3576,9 +3631,10 @@ static void free_excl_cntrs(struct cpu_h
 		if (c->core_id == -1 || --c->refcnt == 0)
 			kfree(c);
 		cpuc->excl_cntrs = NULL;
-		kfree(cpuc->constraint_list);
-		cpuc->constraint_list = NULL;
 	}
+
+	kfree(cpuc->constraint_list);
+	cpuc->constraint_list = NULL;
 }
 
 static void intel_pmu_cpu_dying(int cpu)
@@ -4123,8 +4179,11 @@ static struct attribute *intel_pmu_caps_
        NULL
 };
 
+DEVICE_BOOL_ATTR(allow_tsx_force_abort, 0644, allow_tsx_force_abort);
+
 static struct attribute *intel_pmu_attrs[] = {
 	&dev_attr_freeze_on_smi.attr,
+	NULL, /* &dev_attr_allow_tsx_force_abort.attr.attr */
 	NULL,
 };
 
@@ -4623,6 +4682,15 @@ __init int intel_pmu_init(void)
 		tsx_attr = hsw_tsx_events_attrs;
 		intel_pmu_pebs_data_source_skl(
 			boot_cpu_data.x86_model == INTEL_FAM6_SKYLAKE_X);
+
+		if (boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT)) {
+			x86_pmu.flags |= PMU_FL_TFA;
+			x86_pmu.get_event_constraints = skl_get_event_constraints;
+			x86_pmu.enable_all = intel_skl_pmu_enable_all;
+			x86_pmu.commit_scheduling = intel_skl_commit_scheduling;
+			intel_pmu_attrs[1] = &dev_attr_allow_tsx_force_abort.attr.attr;
+		}
+
 		pr_cont("Skylake events, ");
 		name = "skylake";
 		break;
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -243,6 +243,11 @@ struct cpu_hw_events {
 	int excl_thread_id; /* 0 or 1 */
 
 	/*
+	 * SKL TSX_FORCE_ABORT shadow
+	 */
+	u64				tfa_shadow;
+
+	/*
 	 * AMD specific bits
 	 */
 	struct amd_nb			*amd_nb;
@@ -682,6 +687,7 @@ do {									\
 #define PMU_FL_EXCL_CNTRS	0x4 /* has exclusive counter requirements  */
 #define PMU_FL_EXCL_ENABLED	0x8 /* exclusive counter active */
 #define PMU_FL_PEBS_ALL		0x10 /* all events are valid PEBS events */
+#define PMU_FL_TFA		0x20 /* deal with TSX force abort */
 
 #define EVENT_VAR(_id)  event_attr_##_id
 #define EVENT_PTR(_id) &event_attr_##_id.attr.attr

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [MODERATED] Re: [PATCH 4/4] walnut 4
  2019-02-19 15:58 ` [MODERATED] [PATCH 4/4] walnut 4 Peter Zijlstra
@ 2019-02-19 16:10   ` Peter Zijlstra
  2019-02-20 22:31     ` Nelson D'Souza
  2019-02-19 18:49   ` [MODERATED] Linus Torvalds
  2019-03-07 13:56   ` [MODERATED] Re: [PATCH 4/4] walnut 4 Greg KH
  2 siblings, 1 reply; 17+ messages in thread
From: Peter Zijlstra @ 2019-02-19 16:10 UTC (permalink / raw)
  To: speck

On Tue, Feb 19, 2019 at 04:58:11PM +0100, speck for Peter Zijlstra wrote:
> +	if (!allow_tsx_force_abort && test_bit(3, c->idxmsk)) {
> +		c = dyn_constraint(cpuc, c, idx);
> +		c->idxmsk64 &= ~(1ULL << 3);
> +		c->weight = c->idxmsk64 - 1;

Damn; that should be:

		c->weight--;

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [MODERATED] Re:
  2019-02-19 15:58 ` [MODERATED] [PATCH 4/4] walnut 4 Peter Zijlstra
  2019-02-19 16:10   ` [MODERATED] " Peter Zijlstra
@ 2019-02-19 18:49   ` Linus Torvalds
  2019-02-20 14:37     ` Peter Zijlstra
  2019-03-07 13:56   ` [MODERATED] Re: [PATCH 4/4] walnut 4 Greg KH
  2 siblings, 1 reply; 17+ messages in thread
From: Linus Torvalds @ 2019-02-19 18:49 UTC (permalink / raw)
  To: speck

On Tue, Feb 19, 2019 at 8:03 AM speck for Peter Zijlstra
<speck@linutronix.de> wrote:
>
> When TFA is allowed (default); the MSR gets set when PMC3 gets
> scheduled and cleared when, after scheduling, PMC3 is unused.
>
> When TFA is not allowed; clear PMC3 from all constraints such that it
> will not get used.

I wonder if somebody wants a "abort always" mode for testing?

IOW, have a mode where even if PCM3 is not used, set the TSX_FORCE_ABORT bit.

That way people can actually verify that their code works even if it
sees TSX enabled and uses it (but it always aborts). Basically
verifying their fallback code.

            Linus

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [MODERATED] Re:
  2019-02-19 18:49   ` [MODERATED] Linus Torvalds
@ 2019-02-20 14:37     ` Peter Zijlstra
  0 siblings, 0 replies; 17+ messages in thread
From: Peter Zijlstra @ 2019-02-20 14:37 UTC (permalink / raw)
  To: speck

On Tue, Feb 19, 2019 at 10:49:41AM -0800, speck for Linus Torvalds wrote:
> On Tue, Feb 19, 2019 at 8:03 AM speck for Peter Zijlstra
> <speck@linutronix.de> wrote:
> >
> > When TFA is allowed (default); the MSR gets set when PMC3 gets
> > scheduled and cleared when, after scheduling, PMC3 is unused.
> >
> > When TFA is not allowed; clear PMC3 from all constraints such that it
> > will not get used.
> 
> I wonder if somebody wants a "abort always" mode for testing?
> 
> IOW, have a mode where even if PCM3 is not used, set the TSX_FORCE_ABORT bit.
> 
> That way people can actually verify that their code works even if it
> sees TSX enabled and uses it (but it always aborts). Basically
> verifying their fallback code.

Something like the below would make that happen. Not sure it's worth it
to backport this to everything though.


--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2000,6 +2000,8 @@ static void intel_pmu_nhm_enable_all(int
 	intel_pmu_enable_all(added);
 }
 
+static int allow_tsx_force_abort = 1;
+
 static void intel_set_tfa(struct cpu_hw_events *cpuc, bool on)
 {
 	u64 val = on ? MSR_TFA_RTM_FORCE_ABORT : 0;
@@ -2027,7 +2029,7 @@ static void intel_skl_pmu_enable_all(int
 	 * If we find PMC3 is no longer used when we enable the PMU, we can
 	 * clear TFA.
 	 */
-	if (!test_bit(3, cpuc->active_mask))
+	if (!test_bit(3, cpuc->active_mask) && allow_tsx_force_abort < 2)
 		intel_set_tfa(cpuc, false);
 
 	intel_pmu_enable_all(added);
@@ -3399,8 +3401,6 @@ glp_get_event_constraints(struct cpu_hw_
 	return c;
 }
 
-static bool allow_tsx_force_abort = true;
-
 static struct event_constraint *
 skl_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
 			  struct perf_event *event)
@@ -4115,7 +4115,7 @@ static ssize_t freeze_on_smi_show(struct
 	return sprintf(buf, "%lu\n", x86_pmu.attr_freeze_on_smi);
 }
 
-static DEFINE_MUTEX(freeze_on_smi_mutex);
+static DEFINE_MUTEX(sysfs_mutex);
 
 static ssize_t freeze_on_smi_store(struct device *cdev,
 				   struct device_attribute *attr,
@@ -4131,7 +4131,7 @@ static ssize_t freeze_on_smi_store(struc
 	if (val > 1)
 		return -EINVAL;
 
-	mutex_lock(&freeze_on_smi_mutex);
+	mutex_lock(&sysfs_mutex);
 
 	if (x86_pmu.attr_freeze_on_smi == val)
 		goto done;
@@ -4142,7 +4142,7 @@ static ssize_t freeze_on_smi_store(struc
 	on_each_cpu(flip_smm_bit, &val, 1);
 	put_online_cpus();
 done:
-	mutex_unlock(&freeze_on_smi_mutex);
+	mutex_unlock(&sysfs_mutex);
 
 	return count;
 }
@@ -4179,11 +4179,59 @@ static struct attribute *intel_pmu_caps_
        NULL
 };
 
-DEVICE_BOOL_ATTR(allow_tsx_force_abort, 0644, allow_tsx_force_abort);
+static ssize_t allow_tsx_force_abort_show(struct device *cdev
+					  struct device_attribute *attr,
+					  char *buf)
+{
+	return sprintf(buf, "%d\n", allow_tsx_force_abort);
+}
+
+static void flip_tfa_bit(void *data)
+{
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+	int val = *(int *)data;
+	bool set;
+
+	lockdep_assert_irqs_disabled();
+
+	if (val == 2)
+		set = true;
+	else
+		set = test_bit(3, cpuc->active_mask);
+
+	intel_set_tfa(cpuc, set);
+}
+
+static ssize_t allow_tsx_force_abort_store(struct device *cdev,
+					   struct device_attribute *attr,
+					   const char *buf, ssize_t count)
+{
+	ssize_t ret;
+	int val;
+
+	ret = kstrtoint(buf, 0, &val);
+	if (ret)
+		return ret;
+
+	if ((unsigned int)val > 2)
+		return -EINVAL;
+
+	mutex_lock(&sysfs_mutex);
+	if (allow_tsx_force_abort != val) {
+		allow_tsx_force_abort = val;
+		cpus_read_lock();
+		on_each_cpu(flip_tfa_bit, &val, 1);
+		cpus_read_unlock();
+	}
+	mutex_unlock(&sysfs_mutex);
+	return ret;
+}
+
+DEVICE_ATTR_RW(allow_tsx_force_abort);
 
 static struct attribute *intel_pmu_attrs[] = {
 	&dev_attr_freeze_on_smi.attr,
-	NULL, /* &dev_attr_allow_tsx_force_abort.attr.attr */
+	NULL, /* &dev_attr_allow_tsx_force_abort.attr */
 	NULL,
 };
 
@@ -4688,7 +4736,7 @@ __init int intel_pmu_init(void)
 			x86_pmu.get_event_constraints = skl_get_event_constraints;
 			x86_pmu.enable_all = intel_skl_pmu_enable_all;
 			x86_pmu.commit_scheduling = intel_skl_commit_scheduling;
-			intel_pmu_attrs[1] = &dev_attr_allow_tsx_force_abort.attr.attr;
+			intel_pmu_attrs[1] = &dev_attr_allow_tsx_force_abort.attr;
 		}
 
 		pr_cont("Skylake events, ");

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [MODERATED] Re: [PATCH 0/4] walnut 0
  2019-02-19 15:58 [MODERATED] [PATCH 0/4] walnut 0 Peter Zijlstra
                   ` (3 preceding siblings ...)
  2019-02-19 15:58 ` [MODERATED] [PATCH 4/4] walnut 4 Peter Zijlstra
@ 2019-02-20 18:10 ` mark gross
  2019-02-22 19:20   ` Nelson D'Souza
  2019-03-01 18:59 ` mark gross
  5 siblings, 1 reply; 17+ messages in thread
From: mark gross @ 2019-02-20 18:10 UTC (permalink / raw)
  To: speck

On Tue, Feb 19, 2019 at 04:58:07PM +0100, speck for Peter Zijlstra wrote:
> From: Peter Zijlstra <peterz@infradead.org>
> Subject: [PATCH 0/4] walnut
                       ^cute^
> 
> nicely split

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [MODERATED] Re: [PATCH 4/4] walnut 4
  2019-02-19 16:10   ` [MODERATED] " Peter Zijlstra
@ 2019-02-20 22:31     ` Nelson D'Souza
  2019-02-22 22:58       ` [MODERATED] Fwd: " Nelson D'Souza
  0 siblings, 1 reply; 17+ messages in thread
From: Nelson D'Souza @ 2019-02-20 22:31 UTC (permalink / raw)
  To: speck


[-- Attachment #1.1: Type: text/plain, Size: 554 bytes --]

Hi Peter,

I have attached the initial test logs from your latest patchset. 

The perf command does not crash when /sys/devices/cpu/allow_tsx_force_abort is set to 0.
 
Thanks,
Nelson

On 2/19/19 8:10 AM, speck for Peter Zijlstra wrote:
> On Tue, Feb 19, 2019 at 04:58:11PM +0100, speck for Peter Zijlstra wrote:
>> +	if (!allow_tsx_force_abort && test_bit(3, c->idxmsk)) {
>> +		c = dyn_constraint(cpuc, c, idx);
>> +		c->idxmsk64 &= ~(1ULL << 3);
>> +		c->weight = c->idxmsk64 - 1;
> 
> Damn; that should be:
> 
> 		c->weight--;
> 

[-- Attachment #1.2: tsx_force_abort_0.3counters --]
[-- Type: text/plain, Size: 395 bytes --]

# started on Thu Feb 21 15:23:52 2019


 Performance counter stats for 'system wide':

            89,033      cache-misses              #   21.591 % of all cache refs    
           412,353      cache-references                                            
     4,630,472,209      branches                                                    

       1.005154418 seconds time elapsed


[-- Attachment #1.3: tsx_force_abort_0.4counters --]
[-- Type: text/plain, Size: 481 bytes --]

# started on Thu Feb 21 15:24:10 2019


 Performance counter stats for 'system wide':

     <not counted>      cache-misses                                                
     <not counted>      cache-references                                            
     <not counted>      branches                                                    
   <not supported>      branch-misses                                               

       1.005054423 seconds time elapsed


[-- Attachment #1.4: tsx_force_abort_1.3counters --]
[-- Type: text/plain, Size: 395 bytes --]

# started on Thu Feb 21 15:21:37 2019


 Performance counter stats for 'system wide':

            58,484      cache-misses              #   14.165 % of all cache refs    
           412,875      cache-references                                            
     4,627,163,916      branches                                                    

       1.005123883 seconds time elapsed


[-- Attachment #1.5: tsx_force_abort_1.4counters --]
[-- Type: text/plain, Size: 481 bytes --]

# started on Thu Feb 21 15:21:04 2019


 Performance counter stats for 'system wide':

            71,146      cache-misses              #   11.203 % of all cache refs    
           635,039      cache-references                                            
     4,616,150,556      branches                                                    
            84,552      branch-misses             #    0.00% of all branches        

       1.005141562 seconds time elapsed


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [MODERATED] Re: [PATCH 0/4] walnut 0
  2019-02-20 18:10 ` [MODERATED] Re: [PATCH 0/4] walnut 0 mark gross
@ 2019-02-22 19:20   ` Nelson D'Souza
  0 siblings, 0 replies; 17+ messages in thread
From: Nelson D'Souza @ 2019-02-22 19:20 UTC (permalink / raw)
  To: speck

[-- Attachment #1: Type: text/plain, Size: 305 bytes --]

Hope you are enjoying your new camera.

On 2/20/19 10:10 AM, speck for mark gross wrote:
> On Tue, Feb 19, 2019 at 04:58:07PM +0100, speck for Peter Zijlstra wrote:
>> From: Peter Zijlstra <peterz@infradead.org>
>> Subject: [PATCH 0/4] walnut
>                        ^cute^
>>
>> nicely split


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [MODERATED] Fwd: [PATCH 4/4] walnut 4
  2019-02-20 22:31     ` Nelson D'Souza
@ 2019-02-22 22:58       ` Nelson D'Souza
  2019-02-22 23:19         ` Nelson D'Souza
  0 siblings, 1 reply; 17+ messages in thread
From: Nelson D'Souza @ 2019-02-22 22:58 UTC (permalink / raw)
  To: speck


[-- Attachment #1.1: Type: text/plain, Size: 558 bytes --]


Hi Peter,

I have attached the initial test logs from your latest patchset. 

The perf command does not crash when /sys/devices/cpu/allow_tsx_force_abort is set to 0.
 
Thanks,
Nelson

On 2/19/19 8:10 AM, speck for Peter Zijlstra wrote:
> On Tue, Feb 19, 2019 at 04:58:11PM +0100, speck for Peter Zijlstra wrote:
>> +	if (!allow_tsx_force_abort && test_bit(3, c->idxmsk)) {
>> +		c = dyn_constraint(cpuc, c, idx);
>> +		c->idxmsk64 &= ~(1ULL << 3);
>> +		c->weight = c->idxmsk64 - 1;
> 
> Damn; that should be:
> 
> 		c->weight--;
> 


[-- Attachment #1.2: tsx_force_abort_0.3counters --]
[-- Type: text/plain, Size: 397 bytes --]

# started on Thu Feb 21 15:23:52 2019


 Performance counter stats for 'system wide':

            89,033      cache-misses              #   21.591 % of all cache refs    
           412,353      cache-references                                            
     4,630,472,209      branches                                                    

       1.005154418 seconds time elapsed



[-- Attachment #1.3: tsx_force_abort_0.4counters --]
[-- Type: text/plain, Size: 483 bytes --]

# started on Thu Feb 21 15:24:10 2019


 Performance counter stats for 'system wide':

     <not counted>      cache-misses                                                
     <not counted>      cache-references                                            
     <not counted>      branches                                                    
   <not supported>      branch-misses                                               

       1.005054423 seconds time elapsed



[-- Attachment #1.4: tsx_force_abort_1.3counters --]
[-- Type: text/plain, Size: 397 bytes --]

# started on Thu Feb 21 15:21:37 2019


 Performance counter stats for 'system wide':

            58,484      cache-misses              #   14.165 % of all cache refs    
           412,875      cache-references                                            
     4,627,163,916      branches                                                    

       1.005123883 seconds time elapsed



[-- Attachment #1.5: tsx_force_abort_1.4counters --]
[-- Type: text/plain, Size: 483 bytes --]

# started on Thu Feb 21 15:21:04 2019


 Performance counter stats for 'system wide':

            71,146      cache-misses              #   11.203 % of all cache refs    
           635,039      cache-references                                            
     4,616,150,556      branches                                                    
            84,552      branch-misses             #    0.00% of all branches        

       1.005141562 seconds time elapsed



^ permalink raw reply	[flat|nested] 17+ messages in thread

* [MODERATED] Fwd: [PATCH 4/4] walnut 4
  2019-02-22 22:58       ` [MODERATED] Fwd: " Nelson D'Souza
@ 2019-02-22 23:19         ` Nelson D'Souza
  0 siblings, 0 replies; 17+ messages in thread
From: Nelson D'Souza @ 2019-02-22 23:19 UTC (permalink / raw)
  To: speck


[-- Attachment #1.1: Type: text/plain, Size: 562 bytes --]



Hi Peter,

I have attached the initial test logs from your latest patchset. 

The perf command does not crash when /sys/devices/cpu/allow_tsx_force_abort is set to 0.
 
Thanks,
Nelson

On 2/19/19 8:10 AM, speck for Peter Zijlstra wrote:
> On Tue, Feb 19, 2019 at 04:58:11PM +0100, speck for Peter Zijlstra wrote:
>> +	if (!allow_tsx_force_abort && test_bit(3, c->idxmsk)) {
>> +		c = dyn_constraint(cpuc, c, idx);
>> +		c->idxmsk64 &= ~(1ULL << 3);
>> +		c->weight = c->idxmsk64 - 1;
> 
> Damn; that should be:
> 
> 		c->weight--;
> 



[-- Attachment #1.2: tsx_force_abort_0.3counters --]
[-- Type: text/plain, Size: 399 bytes --]

# started on Thu Feb 21 15:23:52 2019


 Performance counter stats for 'system wide':

            89,033      cache-misses              #   21.591 % of all cache refs    
           412,353      cache-references                                            
     4,630,472,209      branches                                                    

       1.005154418 seconds time elapsed




[-- Attachment #1.3: tsx_force_abort_0.4counters --]
[-- Type: text/plain, Size: 485 bytes --]

# started on Thu Feb 21 15:24:10 2019


 Performance counter stats for 'system wide':

     <not counted>      cache-misses                                                
     <not counted>      cache-references                                            
     <not counted>      branches                                                    
   <not supported>      branch-misses                                               

       1.005054423 seconds time elapsed




[-- Attachment #1.4: tsx_force_abort_1.3counters --]
[-- Type: text/plain, Size: 399 bytes --]

# started on Thu Feb 21 15:21:37 2019


 Performance counter stats for 'system wide':

            58,484      cache-misses              #   14.165 % of all cache refs    
           412,875      cache-references                                            
     4,627,163,916      branches                                                    

       1.005123883 seconds time elapsed




[-- Attachment #1.5: tsx_force_abort_1.4counters --]
[-- Type: text/plain, Size: 485 bytes --]

# started on Thu Feb 21 15:21:04 2019


 Performance counter stats for 'system wide':

            71,146      cache-misses              #   11.203 % of all cache refs    
           635,039      cache-references                                            
     4,616,150,556      branches                                                    
            84,552      branch-misses             #    0.00% of all branches        

       1.005141562 seconds time elapsed




^ permalink raw reply	[flat|nested] 17+ messages in thread

* [MODERATED] Re: [PATCH 0/4] walnut 0
  2019-02-19 15:58 [MODERATED] [PATCH 0/4] walnut 0 Peter Zijlstra
                   ` (4 preceding siblings ...)
  2019-02-20 18:10 ` [MODERATED] Re: [PATCH 0/4] walnut 0 mark gross
@ 2019-03-01 18:59 ` mark gross
  5 siblings, 0 replies; 17+ messages in thread
From: mark gross @ 2019-03-01 18:59 UTC (permalink / raw)
  To: speck

[-- Attachment #1: Type: text/plain, Size: 812 bytes --]

On Tue, Feb 19, 2019 at 04:58:07PM +0100, speck for Peter Zijlstra wrote:
> From: Peter Zijlstra <peterz@infradead.org>
> Subject: [PATCH 0/4] walnut
> 
> nicely split
Peter, We have been testing your patch set named after a nut posted Feb 19.

We tested it on bare metal and it looks good.  There was some worry that a KVM
guest would have issues with multiplexing if the host had your patch and
allow_tsx_abort=0.

What we saw away the 4th counter in the guest showed zero.  Which IMO seems ok.

Attached are the tests that Nelson ran for both host and kvm guest.  The
behavior between host and guest behavior with 4 counters when allow_tsx_abort=0
looks minor to me.

We are eager to see a more final version of this patchset so we can be ready
for the public disclosure coming up March 12. 

Thanks!  --mark

[-- Attachment #2: patchedhost.txt --]
[-- Type: text/plain, Size: 2859 bytes --]

/* ALLOW_TSX_FORCE_ABORT = 0 */


// Four Counters

root@jf1-otc-3AR3-33:/home/labuser/ocean/tools/perf# taskset -c 0 ./perf stat -e cs,'{cache-misses,cache-references,branches,branch-misses}' --no-merge ./rtm-success-long
rtm success 100.00%, aborted 155

 Performance counter stats for './rtm-success-long':

               154      cs
     <not counted>      cache-misses
     <not counted>      cache-references
     <not counted>      branches
   <not supported>      branch-misses

       6.749371782 seconds time elapsed

       4.458202000 seconds user
       0.000000000 seconds sys


root@jf1-otc-3AR3-33:/home/labuser/ocean/tools/perf# taskset -c 0 ./rtm-success-long
rtm success 100.00%, aborted 193


// Three Counters
root@jf1-otc-3AR3-33:/home/labuser/ocean/tools/perf# taskset -c 0 ./perf stat -e cs,'{cache-misses,cache-references,branches}' --no-merge ./rtm-success-long
rtm success 100.00%, aborted 171

 Performance counter stats for './rtm-success-long':

               155      cs
             5,332      cache-misses              #   16.190 % of all cache refs
            32,934      cache-references
       201,012,996      branches

       6.808987753 seconds time elapsed

       4.466809000 seconds user
       0.000000000 seconds sys

root@jf1-otc-3AR3-33:/home/labuser/ocean/tools/perf# taskset -c 0 ./rtm-success-long
rtm success 100.00%, aborted 166


/* ALLOW_TSX_FORCE_ABORT = 1 */

//Four Counters

root@jf1-otc-3AR3-33:/home/labuser/ocean/tools/perf# taskset -c 0 ./perf stat -e cs,'{cache-misses,cache-references,branches,branch-misses}' --no-merge ./rtm-success-long
rtm success 0.00%, aborted 100000000
 
Performance counter stats for './rtm-success-long':
 
               145  	cs
            42,505      cache-misses              #   48.776 % of all cache refs
            87,144  	cache-references
       301,075,199  	branches
       100,040,329      branch-misses             #   33.23% of all branches
 
       6.496993416 seconds time elapsed
 
       4.347726000 seconds user
       0.000000000 seconds sys
	   
root@jf1-otc-3AR3-33:/home/labuser/ocean/tools/perf# taskset -c 0 ./rtm-success-long
rtm success 100.00%, aborted 589

 
 
//Three Counters

root@jf1-otc-3AR3-33:/home/labuser/ocean/tools/perf# taskset -c 0 ./perf stat -e cs,'{cache-misses,cache-references,branches}' --no-merge ./rtm-success-long
rtm success 100.00%, aborted 162

 Performance counter stats for './rtm-success-long':

               145      cs
             5,488      cache-misses              #   17.551 % of all cache refs
            31,268      cache-references
       201,016,636      branches

       6.593972485 seconds time elapsed

       4.462666000 seconds user


       0.000000000 seconds sys


root@jf1-otc-3AR3-33:/home/labuser/ocean/tools/perf# taskset -c 0 ./rtm-success-long
rtm success 100.00%, aborted 180




[-- Attachment #3: patchedkvm_guest.txt --]
[-- Type: text/plain, Size: 2992 bytes --]

/* ALLOW_TSX_FORCE_ABORT = 0 */


// Four Counters

kvm-virtual@kvmvirtual-Standard-PC-i440FX-PIIX-1996:~$ taskset -c 0 ./perf stat -e cs,'{cache-misses,cache-references,branches,branch-misses}' --no-merge ./rtm-success-long
rtm success 100.00%, aborted 919

 Performance counter stats for './rtm-success-long':

                 0      cs:u
             3,318      cache-misses:u            #   44.753 % of all cache refs
             7,414      cache-references:u
       200,038,222      branches:u
                 0      branch-misses:u           #    0.00% of all branches

      16.023427318 seconds time elapsed

       6.446820000 seconds user
       2.420087000 seconds sys


kvm-virtual@kvmvirtual-Standard-PC-i440FX-PIIX-1996:~$ taskset -c 0 ./rtm-success-long
rtm success 100.00%, aborted 701


// Three Counters

kvm-virtual@kvmvirtual-Standard-PC-i440FX-PIIX-1996:~$ taskset -c 0 ./perf stat -e cs,'{cache-misses,cache-references,branches}' --no-merge ./rtm-success-long
rtm success 100.00%, aborted 830

 Performance counter stats for './rtm-success-long':

                 0      cs:u
             3,311      cache-misses:u            #   47.354 % of all cache refs
             6,992      cache-references:u
       200,038,067      branches:u

      13.679651620 seconds time elapsed

       6.123687000 seconds user
       1.926116000 seconds sys
	   

kvm-virtual@kvmvirtual-Standard-PC-i440FX-PIIX-1996:~$ taskset -c 0 ./rtm-success-long
rtm success 100.00%, aborted 704


/* ALLOW_TSX_FORCE_ABORT = 1 */

// Four Counters

kvm-virtual@kvmvirtual-Standard-PC-i440FX-PIIX-1996:~$ taskset -c 0 ./perf stat -e cs,'{cache-misses,cache-references,branches,branch-misses}' --no-merge ./rtm-success-long
rtm success 0.00%, aborted 100000000

 Performance counter stats for './rtm-success-long':

                 0      cs:u
             2,742      cache-misses:u            #   29.685 % of all cache refs
             9,237      cache-references:u
       300,037,068      branches:u
       100,003,687      branch-misses:u           #   33.33% of all branches

      17.318943108 seconds time elapsed

       6.415269000 seconds user
       2.753311000 seconds sys

kvm-virtual@kvmvirtual-Standard-PC-i440FX-PIIX-1996:~$ taskset -c 0 ./rtm-success-long
rtm success 100.00%, aborted 1501


//Three Counters

kvm-virtual@kvmvirtual-Standard-PC-i440FX-PIIX-1996:~$ taskset -c 0 ./perf stat -e cs,'{cache-misses,cache-references,branches}' --no-merge ./rtm-success-long
rtm success 100.00%, aborted 772

 Performance counter stats for './rtm-success-long':

                 0      cs:u
             2,892      cache-misses:u            #   40.228 % of all cache refs
             7,189      cache-references:u
       200,038,116      branches:u

      17.638868052 seconds time elapsed

       6.710811000 seconds user
       2.761211000 seconds sys


kvm-virtual@kvmvirtual-Standard-PC-i440FX-PIIX-1996:~$ taskset -c 0 ./rtm-success-long
rtm success 100.00%, aborted 830









^ permalink raw reply	[flat|nested] 17+ messages in thread

* [MODERATED] Re: [PATCH 4/4] walnut 4
  2019-02-19 15:58 ` [MODERATED] [PATCH 4/4] walnut 4 Peter Zijlstra
  2019-02-19 16:10   ` [MODERATED] " Peter Zijlstra
  2019-02-19 18:49   ` [MODERATED] Linus Torvalds
@ 2019-03-07 13:56   ` Greg KH
  2019-03-07 14:34     ` Peter Zijlstra
  2019-03-07 15:09     ` mark gross
  2 siblings, 2 replies; 17+ messages in thread
From: Greg KH @ 2019-03-07 13:56 UTC (permalink / raw)
  To: speck

I was working on the backport to 4.9.y and ran across merge issues with
this patch.  I understand the constraints on which you are working with
here, but please, let me do a tiny rant about how you all are doing
"extra work" with regards to the creation of sysfs files:

On Tue, Feb 19, 2019 at 04:58:11PM +0100, speck for Peter Zijlstra wrote:
> @@ -4123,8 +4179,11 @@ static struct attribute *intel_pmu_caps_
>         NULL
>  };
>  
> +DEVICE_BOOL_ATTR(allow_tsx_force_abort, 0644, allow_tsx_force_abort);
> +
>  static struct attribute *intel_pmu_attrs[] = {
>  	&dev_attr_freeze_on_smi.attr,
> +	NULL, /* &dev_attr_allow_tsx_force_abort.attr.attr */
>  	NULL,
>  };
>  
> @@ -4623,6 +4682,15 @@ __init int intel_pmu_init(void)
>  		tsx_attr = hsw_tsx_events_attrs;
>  		intel_pmu_pebs_data_source_skl(
>  			boot_cpu_data.x86_model == INTEL_FAM6_SKYLAKE_X);
> +
> +		if (boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT)) {
> +			x86_pmu.flags |= PMU_FL_TFA;
> +			x86_pmu.get_event_constraints = skl_get_event_constraints;
> +			x86_pmu.enable_all = intel_skl_pmu_enable_all;
> +			x86_pmu.commit_scheduling = intel_skl_commit_scheduling;
> +			intel_pmu_attrs[1] = &dev_attr_allow_tsx_force_abort.attr.attr;
> +		}
> +
>  		pr_cont("Skylake events, ");
>  		name = "skylake";
>  		break;

sysfs files have the ability to be "shown" or not, on their own.  There
is a "is_visable()" callback for every sysfs group.

This allows you to set a whole bunch of groups to a device, and then,
when the device is created, the kobject core will call back to you and
ask "should I show this file?"  At that point in time, you can do your
check for "boot_cpu_has(...)" and the like to see if you really should
be showing the file or not.

This saves you all the horrid mess of the merge_attr() function, trying
to overload NULL pointers here in this attribute list, and other such
hacks.

I guess no one stopped to think why _only_ the perf cpu code has a
function like merge_attr(), and how the rest of the kernel could get
away without needing a hack like that :(

Anyway, rant over.  I'll see if I can squeze this into the 4.9.y tree as
it was before the major revamp of the perf cpu sysfs files.

In the future, someone should just switch all of this to attribute
groups, like the rest of the kernel uses, so no such crazyness is
needed.

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [MODERATED] Re: [PATCH 4/4] walnut 4
  2019-03-07 13:56   ` [MODERATED] Re: [PATCH 4/4] walnut 4 Greg KH
@ 2019-03-07 14:34     ` Peter Zijlstra
  2019-03-07 15:09     ` mark gross
  1 sibling, 0 replies; 17+ messages in thread
From: Peter Zijlstra @ 2019-03-07 14:34 UTC (permalink / raw)
  To: speck

On Thu, Mar 07, 2019 at 02:56:31PM +0100, speck for Greg KH wrote:
> sysfs files have the ability to be "shown" or not, on their own.  There
> is a "is_visable()" callback for every sysfs group.
> 
> This allows you to set a whole bunch of groups to a device, and then,
> when the device is created, the kobject core will call back to you and
> ask "should I show this file?"  At that point in time, you can do your
> check for "boot_cpu_has(...)" and the like to see if you really should
> be showing the file or not.
> 
> This saves you all the horrid mess of the merge_attr() function, trying
> to overload NULL pointers here in this attribute list, and other such
> hacks.
> 
> I guess no one stopped to think why _only_ the perf cpu code has a
> function like merge_attr(), and how the rest of the kernel could get
> away without needing a hack like that :(
> 
> Anyway, rant over.  I'll see if I can squeze this into the 4.9.y tree as
> it was before the major revamp of the perf cpu sysfs files.
> 
> In the future, someone should just switch all of this to attribute
> groups, like the rest of the kernel uses, so no such crazyness is
> needed.

Right; that would be a really nice cleanup. I suppose nobody who touched
all that (very much including me) knew about that is_visible() thing.

I'll try and not forget and make someone clean that up.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [MODERATED] Re: [PATCH 4/4] walnut 4
  2019-03-07 13:56   ` [MODERATED] Re: [PATCH 4/4] walnut 4 Greg KH
  2019-03-07 14:34     ` Peter Zijlstra
@ 2019-03-07 15:09     ` mark gross
  1 sibling, 0 replies; 17+ messages in thread
From: mark gross @ 2019-03-07 15:09 UTC (permalink / raw)
  To: speck

On Thu, Mar 07, 2019 at 02:56:31PM +0100, speck for Greg KH wrote:
> I was working on the backport to 4.9.y and ran across merge issues with
> this patch.  I understand the constraints on which you are working with
> here, but please, let me do a tiny rant about how you all are doing
> "extra work" with regards to the creation of sysfs files:
> 
> On Tue, Feb 19, 2019 at 04:58:11PM +0100, speck for Peter Zijlstra wrote:
> > @@ -4123,8 +4179,11 @@ static struct attribute *intel_pmu_caps_
> >         NULL
> >  };
> >  
> > +DEVICE_BOOL_ATTR(allow_tsx_force_abort, 0644, allow_tsx_force_abort);
> > +
> >  static struct attribute *intel_pmu_attrs[] = {
> >  	&dev_attr_freeze_on_smi.attr,
> > +	NULL, /* &dev_attr_allow_tsx_force_abort.attr.attr */
> >  	NULL,
> >  };
> >  
> > @@ -4623,6 +4682,15 @@ __init int intel_pmu_init(void)
> >  		tsx_attr = hsw_tsx_events_attrs;
> >  		intel_pmu_pebs_data_source_skl(
> >  			boot_cpu_data.x86_model == INTEL_FAM6_SKYLAKE_X);
> > +
> > +		if (boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT)) {
> > +			x86_pmu.flags |= PMU_FL_TFA;
> > +			x86_pmu.get_event_constraints = skl_get_event_constraints;
> > +			x86_pmu.enable_all = intel_skl_pmu_enable_all;
> > +			x86_pmu.commit_scheduling = intel_skl_commit_scheduling;
> > +			intel_pmu_attrs[1] = &dev_attr_allow_tsx_force_abort.attr.attr;
> > +		}
> > +
> >  		pr_cont("Skylake events, ");
> >  		name = "skylake";
> >  		break;
> 
> sysfs files have the ability to be "shown" or not, on their own.  There
> is a "is_visable()" callback for every sysfs group.
> 
> This allows you to set a whole bunch of groups to a device, and then,
> when the device is created, the kobject core will call back to you and
> ask "should I show this file?"  At that point in time, you can do your
> check for "boot_cpu_has(...)" and the like to see if you really should
> be showing the file or not.
> 
> This saves you all the horrid mess of the merge_attr() function, trying
> to overload NULL pointers here in this attribute list, and other such
> hacks.
> 
> I guess no one stopped to think why _only_ the perf cpu code has a
> function like merge_attr(), and how the rest of the kernel could get
> away without needing a hack like that :(
> 
> Anyway, rant over.  I'll see if I can squeze this into the 4.9.y tree as
> it was before the major revamp of the perf cpu sysfs files.
> 
> In the future, someone should just switch all of this to attribute
> groups, like the rest of the kernel uses, so no such crazyness is
> needed.

If peterz doesn't beat me to it I want to mentor one of the more Jr engineers
I'm working with to do this.  (and if I can't convince one of them to do this
with my help then I'll do it by the end of next week.)

But, its fine if someone beats me to it too.  There will be other opportunities
;)

--mark

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2019-03-07 15:58 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-02-19 15:58 [MODERATED] [PATCH 0/4] walnut 0 Peter Zijlstra
2019-02-19 15:58 ` [MODERATED] [PATCH 1/4] walnut 1 Peter Zijlstra
2019-02-19 15:58 ` [MODERATED] [PATCH 2/4] walnut 2 Peter Zijlstra
2019-02-19 15:58 ` [MODERATED] [PATCH 3/4] walnut 3 Peter Zijlstra
2019-02-19 15:58 ` [MODERATED] [PATCH 4/4] walnut 4 Peter Zijlstra
2019-02-19 16:10   ` [MODERATED] " Peter Zijlstra
2019-02-20 22:31     ` Nelson D'Souza
2019-02-22 22:58       ` [MODERATED] Fwd: " Nelson D'Souza
2019-02-22 23:19         ` Nelson D'Souza
2019-02-19 18:49   ` [MODERATED] Linus Torvalds
2019-02-20 14:37     ` Peter Zijlstra
2019-03-07 13:56   ` [MODERATED] Re: [PATCH 4/4] walnut 4 Greg KH
2019-03-07 14:34     ` Peter Zijlstra
2019-03-07 15:09     ` mark gross
2019-02-20 18:10 ` [MODERATED] Re: [PATCH 0/4] walnut 0 mark gross
2019-02-22 19:20   ` Nelson D'Souza
2019-03-01 18:59 ` mark gross

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).