linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v4 1/2] x86/cpufeature: Add facility to match microcode revisions
@ 2018-10-25 23:45 Andi Kleen
  2018-10-25 23:45 ` [PATCH v4 2/2] perf/x86/kvm: Avoid unnecessary work in guest filtering Andi Kleen
  0 siblings, 1 reply; 2+ messages in thread
From: Andi Kleen @ 2018-10-25 23:45 UTC (permalink / raw)
  To: x86; +Cc: linux-kernel, peterz, eranian, Andi Kleen

From: Andi Kleen <ak@linux.intel.com>

For bug workarounds or checks it is useful to check for specific
microcode revisionss. Add a new table format to check for steppings
with min microcode revisions.

This does not change the existing x86_cpu_id because it's an ABI
shared with modutils, and also has quite different requirements,
as in no wildcards, but everything has to be matched exactly.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
v2:
Remove all CPU match, only check boot cpu
Move INTEL_MIN_UCODE macro to header.
Minor cleanups.
Remove max ucode and driver data
v3:
Rename function
Update comments.
Document mixed stepping caveats.
Use u8 for model
Remove vendor 0 check.
Change return check
v4:
Rename to x86_min_microcode
Change return value to bool
---
 arch/x86/include/asm/cpu_device_id.h | 28 ++++++++++++++++++++++++++++
 arch/x86/kernel/cpu/match.c          | 19 +++++++++++++++++++
 2 files changed, 47 insertions(+)

diff --git a/arch/x86/include/asm/cpu_device_id.h b/arch/x86/include/asm/cpu_device_id.h
index baeba0567126..28847d5ea1fa 100644
--- a/arch/x86/include/asm/cpu_device_id.h
+++ b/arch/x86/include/asm/cpu_device_id.h
@@ -11,4 +11,32 @@
 
 extern const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match);
 
+/*
+ * Match specific microcode revisions.
+ *
+ * vendor/family/model/stepping must be all set.
+ *
+ * only checks against the boot cpu.  When mixed-stepping configs are
+ * valid for a CPU model, add a quirk for every valid stepping and
+ * do the fine-tuning in the quirk handler.
+ */
+
+struct x86_ucode_id {
+	u8  vendor;
+	u8  family;
+	u8  model;
+	u8  stepping;
+	u32 min_ucode;
+};
+
+#define INTEL_MIN_UCODE(mod, step, rev) {			\
+	.vendor = X86_VENDOR_INTEL,				\
+	.family = 6,						\
+	.model = mod,						\
+	.stepping = step,					\
+	.min_ucode = rev,					\
+}
+
+extern bool x86_min_microcode(const struct x86_ucode_id *mt);
+
 #endif
diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c
index 3fed38812eea..12db14232d62 100644
--- a/arch/x86/kernel/cpu/match.c
+++ b/arch/x86/kernel/cpu/match.c
@@ -48,3 +48,22 @@ const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match)
 	return NULL;
 }
 EXPORT_SYMBOL(x86_match_cpu);
+
+bool x86_min_microcode(const struct x86_ucode_id *mt)
+{
+	struct cpuinfo_x86 *c = &boot_cpu_data;
+	const struct x86_ucode_id *m;
+
+	for (m = mt; m->family | m->model; m++) {
+		if (c->x86_vendor != m->vendor)
+			continue;
+		if (c->x86 != m->family)
+			continue;
+		if (c->x86_model != m->model)
+			continue;
+		if (c->x86_stepping != m->stepping)
+			continue;
+		return c->microcode >= m->min_ucode;
+	}
+	return false;
+}
-- 
2.17.2


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* [PATCH v4 2/2] perf/x86/kvm: Avoid unnecessary work in guest filtering
  2018-10-25 23:45 [PATCH v4 1/2] x86/cpufeature: Add facility to match microcode revisions Andi Kleen
@ 2018-10-25 23:45 ` Andi Kleen
  0 siblings, 0 replies; 2+ messages in thread
From: Andi Kleen @ 2018-10-25 23:45 UTC (permalink / raw)
  To: x86; +Cc: linux-kernel, peterz, eranian, Andi Kleen

From: Andi Kleen <ak@linux.intel.com>

KVM added a workaround for PEBS events leaking
into guests with 26a4f3c08de4 ("perf/x86: disable PEBS on a guest entry.")
This uses the VT entry/exit list to add an extra disable of the PEBS_ENABLE MSR.

Intel also added a fix for this issue to microcode updates on
Haswell/Broadwell/Skylake.

It turns out using the MSR entry/exit list makes VM exits
significantly slower. The list is only needed for disabling
PEBS, because the GLOBAL_CTRL change gets optimized by
KVM into changing the VMCS.

Check for the microcode updates that have the microcode
fix for leaking PEBS, and disable the extra entry/exit list
entry for PEBS_ENABLE. In addition we always clear the
GLOBAL_CTRL for the PEBS counter while running in the guest,
which is enough to make them never fire at the wrong
side of the host/guest transition.

We see significantly reduced overhead for VM exits with the
filtering active with the patch from 8% to 4%.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
v2:
Use match_ucode, not match_ucode_all
Remove cpu lock
Use INTEL_MIN_UCODE and move to header
Update Table to include skylake clients.
v3:
Use x86_min_microcode
---
 arch/x86/events/intel/core.c | 80 ++++++++++++++++++++++++++++++++----
 arch/x86/events/perf_event.h |  3 +-
 2 files changed, 73 insertions(+), 10 deletions(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 0fb8659b20d8..89ec85c3359c 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -18,6 +18,7 @@
 #include <asm/hardirq.h>
 #include <asm/intel-family.h>
 #include <asm/apic.h>
+#include <asm/cpu_device_id.h>
 
 #include "../perf_event.h"
 
@@ -3170,16 +3171,27 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr)
 	arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL;
 	arr[0].host = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask;
 	arr[0].guest = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_host_mask;
-	/*
-	 * If PMU counter has PEBS enabled it is not enough to disable counter
-	 * on a guest entry since PEBS memory write can overshoot guest entry
-	 * and corrupt guest memory. Disabling PEBS solves the problem.
-	 */
-	arr[1].msr = MSR_IA32_PEBS_ENABLE;
-	arr[1].host = cpuc->pebs_enabled;
-	arr[1].guest = 0;
+	if (x86_pmu.flags & PMU_FL_PEBS_ALL)
+		arr[0].guest &= ~cpuc->pebs_enabled;
+	else
+		arr[0].guest &= ~(cpuc->pebs_enabled & PEBS_COUNTER_MASK);
+	*nr = 1;
+
+	if (!x86_pmu.pebs_isolated) {
+		/*
+		 * If PMU counter has PEBS enabled it is not enough to
+		 * disable counter on a guest entry since PEBS memory
+		 * write can overshoot guest entry and corrupt guest
+		 * memory. Disabling PEBS solves the problem.
+		 *
+		 * Don't do this if the CPU already enforces it.
+		 */
+		arr[1].msr = MSR_IA32_PEBS_ENABLE;
+		arr[1].host = cpuc->pebs_enabled;
+		arr[1].guest = 0;
+		*nr = 2;
+	}
 
-	*nr = 2;
 	return arr;
 }
 
@@ -3697,6 +3709,45 @@ static __init void intel_clovertown_quirk(void)
 	x86_pmu.pebs_constraints = NULL;
 }
 
+static const struct x86_ucode_id isolation_ucodes[] = {
+	INTEL_MIN_UCODE(INTEL_FAM6_HASWELL_CORE,	 3, 0x0000001f),
+	INTEL_MIN_UCODE(INTEL_FAM6_HASWELL_ULT,		 1, 0x0000001e),
+	INTEL_MIN_UCODE(INTEL_FAM6_HASWELL_GT3E,	 1, 0x00000015),
+	INTEL_MIN_UCODE(INTEL_FAM6_HASWELL_X,		 2, 0x00000037),
+	INTEL_MIN_UCODE(INTEL_FAM6_HASWELL_X,		 4, 0x0000000a),
+	INTEL_MIN_UCODE(INTEL_FAM6_BROADWELL_CORE,	 4, 0x00000023),
+	INTEL_MIN_UCODE(INTEL_FAM6_BROADWELL_GT3E,	 1, 0x00000014),
+	INTEL_MIN_UCODE(INTEL_FAM6_BROADWELL_XEON_D,	 2, 0x00000010),
+	INTEL_MIN_UCODE(INTEL_FAM6_BROADWELL_XEON_D,	 3, 0x07000009),
+	INTEL_MIN_UCODE(INTEL_FAM6_BROADWELL_XEON_D,	 4, 0x0f000009),
+	INTEL_MIN_UCODE(INTEL_FAM6_BROADWELL_XEON_D,	 5, 0x0e000002),
+	INTEL_MIN_UCODE(INTEL_FAM6_BROADWELL_X,		 2, 0x0b000014),
+	INTEL_MIN_UCODE(INTEL_FAM6_SKYLAKE_X,		 3, 0x00000021),
+	INTEL_MIN_UCODE(INTEL_FAM6_SKYLAKE_X,		 4, 0x00000000),
+	INTEL_MIN_UCODE(INTEL_FAM6_SKYLAKE_MOBILE,	 3, 0x0000007c),
+	INTEL_MIN_UCODE(INTEL_FAM6_SKYLAKE_DESKTOP,	 3, 0x0000007c),
+	INTEL_MIN_UCODE(INTEL_FAM6_KABYLAKE_DESKTOP,	 9, 0x0000004e),
+	INTEL_MIN_UCODE(INTEL_FAM6_KABYLAKE_MOBILE,	 9, 0x0000004e),
+	INTEL_MIN_UCODE(INTEL_FAM6_KABYLAKE_MOBILE,     10, 0x0000004e),
+	INTEL_MIN_UCODE(INTEL_FAM6_KABYLAKE_MOBILE,     11, 0x0000004e),
+	INTEL_MIN_UCODE(INTEL_FAM6_KABYLAKE_MOBILE,     12, 0x0000004e),
+	INTEL_MIN_UCODE(INTEL_FAM6_KABYLAKE_DESKTOP,    10, 0x0000004e),
+	INTEL_MIN_UCODE(INTEL_FAM6_KABYLAKE_DESKTOP,    11, 0x0000004e),
+	INTEL_MIN_UCODE(INTEL_FAM6_KABYLAKE_DESKTOP,    12, 0x0000004e),
+	INTEL_MIN_UCODE(INTEL_FAM6_KABYLAKE_DESKTOP,    13, 0x0000004e),
+	INTEL_MIN_UCODE(INTEL_FAM6_CANNONLAKE_MOBILE,    3, 0x00000000),
+	{}
+};
+
+static void intel_check_isolation(void)
+{
+	if (!x86_min_microcode(isolation_ucodes)) {
+		x86_pmu.pebs_isolated = 0;
+		return;
+	}
+	x86_pmu.pebs_isolated = 1;
+}
+
 static int intel_snb_pebs_broken(int cpu)
 {
 	u32 rev = UINT_MAX; /* default to broken for unknown models */
@@ -3721,6 +3772,8 @@ static void intel_snb_check_microcode(void)
 	int pebs_broken = 0;
 	int cpu;
 
+	intel_check_isolation();
+
 	for_each_online_cpu(cpu) {
 		if ((pebs_broken = intel_snb_pebs_broken(cpu)))
 			break;
@@ -3802,6 +3855,12 @@ static __init void intel_sandybridge_quirk(void)
 	cpus_read_unlock();
 }
 
+static __init void intel_isolation_quirk(void)
+{
+	x86_pmu.check_microcode = intel_check_isolation;
+	intel_check_isolation();
+}
+
 static const struct { int id; char *name; } intel_arch_events_map[] __initconst = {
 	{ PERF_COUNT_HW_CPU_CYCLES, "cpu cycles" },
 	{ PERF_COUNT_HW_INSTRUCTIONS, "instructions" },
@@ -4388,6 +4447,7 @@ __init int intel_pmu_init(void)
 	case INTEL_FAM6_HASWELL_X:
 	case INTEL_FAM6_HASWELL_ULT:
 	case INTEL_FAM6_HASWELL_GT3E:
+		x86_add_quirk(intel_isolation_quirk);
 		x86_add_quirk(intel_ht_bug);
 		x86_pmu.late_ack = true;
 		memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
@@ -4420,6 +4480,7 @@ __init int intel_pmu_init(void)
 	case INTEL_FAM6_BROADWELL_XEON_D:
 	case INTEL_FAM6_BROADWELL_GT3E:
 	case INTEL_FAM6_BROADWELL_X:
+		x86_add_quirk(intel_isolation_quirk);
 		x86_pmu.late_ack = true;
 		memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
 		memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
@@ -4482,6 +4543,7 @@ __init int intel_pmu_init(void)
 	case INTEL_FAM6_SKYLAKE_X:
 	case INTEL_FAM6_KABYLAKE_MOBILE:
 	case INTEL_FAM6_KABYLAKE_DESKTOP:
+		x86_add_quirk(intel_isolation_quirk);
 		x86_pmu.late_ack = true;
 		memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
 		memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index adae087cecdd..d5745ed62622 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -607,7 +607,8 @@ struct x86_pmu {
 			pebs_active	:1,
 			pebs_broken	:1,
 			pebs_prec_dist	:1,
-			pebs_no_tlb	:1;
+			pebs_no_tlb	:1,
+			pebs_isolated   :1;
 	int		pebs_record_size;
 	int		pebs_buffer_size;
 	void		(*drain_pebs)(struct pt_regs *regs);
-- 
2.17.2


^ permalink raw reply related	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2018-10-25 23:45 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-10-25 23:45 [PATCH v4 1/2] x86/cpufeature: Add facility to match microcode revisions Andi Kleen
2018-10-25 23:45 ` [PATCH v4 2/2] perf/x86/kvm: Avoid unnecessary work in guest filtering Andi Kleen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).