All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: speck@linutronix.de
Subject: [MODERATED] [RFC][PATCH] performance walnuts
Date: Fri, 8 Feb 2019 11:53:18 +0100	[thread overview]
Message-ID: <20190208105318.GE32534@hirez.programming.kicks-ass.net> (raw)
In-Reply-To: <20190208093950.GD32534@hirez.programming.kicks-ass.net>

On Fri, Feb 08, 2019 at 10:39:50AM +0100, Peter Zijlstra wrote:
> Ah, I think I found a way to avoid having to rely on this. Let me try.

Something like so. Can someone with access to a relevant machine test
this?

If it works, I'll write a Changelog and this'll be it.

---
 arch/x86/events/intel/core.c       | 127 ++++++++++++++++++++++++++++++-------
 arch/x86/events/perf_event.h       |   6 ++
 arch/x86/include/asm/cpufeatures.h |   1 +
 arch/x86/include/asm/msr-index.h   |   6 ++
 4 files changed, 116 insertions(+), 24 deletions(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index e0232bdb7aff..8352c2647a1b 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -1999,6 +1999,39 @@ static void intel_pmu_nhm_enable_all(int added)
 	intel_pmu_enable_all(added);
 }
 
+static void intel_set_tfa(struct cpu_hw_events *cpuc, bool on)
+{
+	u64 val = MSR_TFA_RTM_FORCE_ABORT * on;
+
+	if (cpuc->tfa_shadow != val) {
+		cpuc->tfa_shadow = val;
+		wrmsrl(MSR_TSX_FORCE_ABORT, val);
+	}
+}
+
+static void intel_skl_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cntr)
+{
+	/*
+	 * We're going to use PMC3, make sure TFA is set before we touch it.
+	 */
+	if (cntr == 3)
+		intel_set_tfa(cpuc, true);
+}
+
+static void intel_skl_pmu_enable_all(int added)
+{
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+	/*
+	 * If we find PMC3 is no longer used when we enable the PMU, we can
+	 * clear TFA.
+	 */
+	if (!test_bit(3, cpuc->active_mask))
+		intel_set_tfa(cpuc, false);
+
+	intel_pmu_enable_all(added);
+}
+
 static void enable_counter_freeze(void)
 {
 	update_debugctlmsr(get_debugctlmsr() |
@@ -2768,6 +2801,35 @@ intel_stop_scheduling(struct cpu_hw_events *cpuc)
 	raw_spin_unlock(&excl_cntrs->lock);
 }
 
+static struct event_constraint *
+dyn_constraint(struct cpu_hw_events *cpuc, struct event_constraint *c, int idx)
+{
+	WARN_ON_ONCE(!cpuc->constraint_list);
+
+	if (!(c->flags & PERF_X86_EVENT_DYNAMIC)) {
+		struct event_constraint *cx;
+
+		/*
+		 * grab pre-allocated constraint entry
+		 */
+		cx = &cpuc->constraint_list[idx];
+
+		/*
+		 * initialize dynamic constraint
+		 * with static constraint
+		 */
+		*cx = *c;
+
+		/*
+		 * mark constraint as dynamic
+		 */
+		cx->flags |= PERF_X86_EVENT_DYNAMIC;
+		c = cx;
+	}
+
+	return c;
+}
+
 static struct event_constraint *
 intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
 			   int idx, struct event_constraint *c)
@@ -2798,27 +2860,7 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
 	 * only needed when constraint has not yet
 	 * been cloned (marked dynamic)
 	 */
-	if (!(c->flags & PERF_X86_EVENT_DYNAMIC)) {
-		struct event_constraint *cx;
-
-		/*
-		 * grab pre-allocated constraint entry
-		 */
-		cx = &cpuc->constraint_list[idx];
-
-		/*
-		 * initialize dynamic constraint
-		 * with static constraint
-		 */
-		*cx = *c;
-
-		/*
-		 * mark constraint as dynamic, so we
-		 * can free it later on
-		 */
-		cx->flags |= PERF_X86_EVENT_DYNAMIC;
-		c = cx;
-	}
+	c = dyn_constraint(cpuc, c, idx);
 
 	/*
 	 * From here on, the constraint is dynamic.
@@ -3345,6 +3387,26 @@ glp_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
 	return c;
 }
 
+static bool allow_tsx_force_abort = true;
+
+static struct event_constraint *
+skl_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+			  struct perf_event *event)
+{
+	struct event_constraint *c = hsw_get_event_constraints(cpuc, idx, event);
+
+	/*
+	 * Without TFA we must not use PMC3.
+	 */
+	if (!allow_tsx_force_abort && test_bit(3, c->idxmsk)) {
+		c = dyn_constraint(cpuc, c, idx);
+		c->idxmsk64 &= ~(1ULL << 3);
+		c->weight = hweight64(c->idxmsk64);
+	}
+
+	return c;
+}
+
 /*
  * Broadwell:
  *
@@ -3440,13 +3502,15 @@ static int intel_pmu_cpu_prepare(int cpu)
 			goto err;
 	}
 
-	if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) {
+	if (x86_pmu.flags & (PMU_FL_EXCL_CNTRS | PMU_FL_WALNUT)) {
 		size_t sz = X86_PMC_IDX_MAX * sizeof(struct event_constraint);
 
 		cpuc->constraint_list = kzalloc(sz, GFP_KERNEL);
 		if (!cpuc->constraint_list)
 			goto err_shared_regs;
+	}
 
+	if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) {
 		cpuc->excl_cntrs = allocate_excl_cntrs(cpu);
 		if (!cpuc->excl_cntrs)
 			goto err_constraint_list;
@@ -3552,9 +3616,10 @@ static void free_excl_cntrs(int cpu)
 		if (c->core_id == -1 || --c->refcnt == 0)
 			kfree(c);
 		cpuc->excl_cntrs = NULL;
-		kfree(cpuc->constraint_list);
-		cpuc->constraint_list = NULL;
 	}
+
+	kfree(cpuc->constraint_list);
+	cpuc->constraint_list = NULL;
 }
 
 static void intel_pmu_cpu_dying(int cpu)
@@ -4061,9 +4126,12 @@ static struct attribute *intel_pmu_caps_attrs[] = {
        NULL
 };
 
+DEVICE_BOOL_ATTR(allow_tsx_force_abort, 0644, allow_tsx_force_abort);
+
 static struct attribute *intel_pmu_attrs[] = {
 	&dev_attr_freeze_on_smi.attr,
 	NULL,
+	NULL,
 };
 
 static __init struct attribute **
@@ -4546,6 +4614,7 @@ __init int intel_pmu_init(void)
 		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
 		x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
 
+
 		x86_pmu.hw_config = hsw_hw_config;
 		x86_pmu.get_event_constraints = hsw_get_event_constraints;
 		extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
@@ -4557,6 +4626,16 @@ __init int intel_pmu_init(void)
 		tsx_attr = hsw_tsx_events_attrs;
 		intel_pmu_pebs_data_source_skl(
 			boot_cpu_data.x86_model == INTEL_FAM6_SKYLAKE_X);
+
+		/* If our CPU haz a walnut */
+		if (boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT)) {
+			x86_pmu.flags |= PMU_FL_WALNUT;
+			x86_pmu.get_event_constraints = skl_get_event_constraints;
+			x86_pmu.enable_all = intel_skl_pmu_enable_all;
+			x86_pmu.commit_scheduling = intel_skl_commit_scheduling;
+			intel_pmu_attrs[1] = &dev_attr_allow_tsx_force_abort.attr.attr;
+		}
+
 		pr_cont("Skylake events, ");
 		name = "skylake";
 		break;
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 78d7b7031bfc..44b3426c618e 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -242,6 +242,11 @@ struct cpu_hw_events {
 	struct intel_excl_cntrs		*excl_cntrs;
 	int excl_thread_id; /* 0 or 1 */
 
+	/*
+	 * SKL TSX_FORCE_ABORT shadow
+	 */
+	int				tfa_shadow;
+
 	/*
 	 * AMD specific bits
 	 */
@@ -676,6 +681,7 @@ do {									\
 #define PMU_FL_EXCL_CNTRS	0x4 /* has exclusive counter requirements  */
 #define PMU_FL_EXCL_ENABLED	0x8 /* exclusive counter active */
 #define PMU_FL_PEBS_ALL		0x10 /* all events are valid PEBS events */
+#define PMU_FL_WALNUT		0x20 /* deal with the walnut errata */
 
 #define EVENT_VAR(_id)  event_attr_##_id
 #define EVENT_PTR(_id) &event_attr_##_id.attr.attr
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 6d6122524711..981ff9479648 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -344,6 +344,7 @@
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */
 #define X86_FEATURE_AVX512_4VNNIW	(18*32+ 2) /* AVX-512 Neural Network Instructions */
 #define X86_FEATURE_AVX512_4FMAPS	(18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */
+#define X86_FEATURE_TSX_FORCE_ABORT	(18*32+13) /* "" TSX_FORCE_ABORT */
 #define X86_FEATURE_PCONFIG		(18*32+18) /* Intel PCONFIG */
 #define X86_FEATURE_SPEC_CTRL		(18*32+26) /* "" Speculation Control (IBRS + IBPB) */
 #define X86_FEATURE_INTEL_STIBP		(18*32+27) /* "" Single Thread Indirect Branch Predictors */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 8e40c2446fd1..ca5bc0eacb95 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -666,6 +666,12 @@
 
 #define MSR_IA32_TSC_DEADLINE		0x000006E0
 
+
+#define MSR_TSX_FORCE_ABORT		0x0000010F
+
+#define MSR_TFA_RTM_FORCE_ABORT_BIT	0
+#define MSR_TFA_RTM_FORCE_ABORT		BIT_ULL(MSR_TFA_RTM_FORCE_ABORT_BIT)
+
 /* P4/Xeon+ specific */
 #define MSR_IA32_MCG_EAX		0x00000180
 #define MSR_IA32_MCG_EBX		0x00000181

  reply	other threads:[~2019-02-08 10:53 UTC|newest]

Thread overview: 33+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-02-07 23:41 [MODERATED] [PATCH v3 0/6] PERFv3 Andi Kleen
2019-02-07 23:41 ` [MODERATED] [PATCH v3 1/6] PERFv3 Andi Kleen
2019-02-08  8:45   ` [MODERATED] " Peter Zijlstra
2019-02-07 23:41 ` [MODERATED] [PATCH v3 2/6] PERFv3 Andi Kleen
2019-02-08  0:51   ` [MODERATED] Re: [SUSPECTED SPAM][PATCH " Andrew Cooper
2019-02-08  9:01     ` Peter Zijlstra
2019-02-08  9:31       ` [MODERATED] Re: [PATCH " Andrew Cooper
2019-02-08  9:39       ` [MODERATED] Re: [SUSPECTED SPAM][PATCH " Peter Zijlstra
2019-02-08 10:53         ` Peter Zijlstra [this message]
2019-02-08 18:07           ` [MODERATED] Re: [RFC][PATCH] performance walnuts Andi Kleen
2019-02-11 10:40             ` Peter Zijlstra
2019-02-11 14:06               ` Thomas Gleixner
2019-02-11 20:17                 ` [MODERATED] " Konrad Rzeszutek Wilk
2019-02-11 23:39                   ` Thomas Gleixner
2019-02-09  0:28           ` [MODERATED] " Linus Torvalds
2019-02-09  4:34             ` Andi Kleen
2019-02-09  8:57             ` Peter Zijlstra
2019-02-13  2:56           ` mark gross
2019-02-15 17:32             ` mark gross
2019-02-15 17:44               ` Peter Zijlstra
2019-02-15 20:47                 ` mark gross
2019-02-15 21:33                   ` Thomas Gleixner
2019-02-19 13:35               ` [MODERATED] [RFC][PATCH v2] " Peter Zijlstra
2019-02-15 23:45           ` [MODERATED] Encrypted Message Jon Masters
2019-02-08  8:50   ` [MODERATED] Re: [PATCH v3 2/6] PERFv3 Peter Zijlstra
2019-02-08 17:26     ` Andi Kleen
2019-02-07 23:41 ` [MODERATED] [PATCH v3 3/6] PERFv3 Andi Kleen
2019-02-08  9:02   ` [MODERATED] " Peter Zijlstra
2019-02-07 23:41 ` [MODERATED] [PATCH v3 4/6] PERFv3 Andi Kleen
2019-02-07 23:41 ` [MODERATED] [PATCH v3 5/6] PERFv3 Andi Kleen
2019-02-08  0:54   ` [MODERATED] " Andrew Cooper
2019-02-07 23:41 ` [MODERATED] [PATCH v3 6/6] PERFv3 Andi Kleen
2019-02-08  9:07   ` [MODERATED] " Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190208105318.GE32534@hirez.programming.kicks-ass.net \
    --to=peterz@infradead.org \
    --cc=speck@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.