From: Andi Kleen <andi@firstfloor.org>
To: peterz@infradead.org
Cc: x86@kernel.org, eranian@google.com, kan.liang@intel.com,
linux-kernel@vger.kernel.org, Andi Kleen <ak@linux.intel.com>
Subject: [PATCH v2 2/2] perf/x86/kvm: Avoid unnecessary work in guest filtering
Date: Wed, 10 Oct 2018 09:26:08 -0700 [thread overview]
Message-ID: <20181010162608.23899-2-andi@firstfloor.org> (raw)
In-Reply-To: <20181010162608.23899-1-andi@firstfloor.org>
From: Andi Kleen <ak@linux.intel.com>
KVM added a workaround for PEBS events leaking
into guests with 26a4f3c08de4 ("perf/x86: disable PEBS on a guest entry.")
This uses the VT entry/exit list to add an extra disable of the PEBS_ENABLE MSR.
Intel also added a fix for this issue to microcode updates on
Haswell/Broadwell/Skylake.
It turns out using the MSR entry/exit list makes VM exits
significantly slower. The list is only needed for disabling
PEBS, because the GLOBAL_CTRL change gets optimized by
KVM into changing the VMCS.
Check for the microcode updates that have the microcode
fix for leaking PEBS, and disable the extra entry/exit list
entry for PEBS_ENABLE. In addition we always clear the
GLOBAL_CTRL for the PEBS counter while running in the guest,
which is enough to make them never fire at the wrong
side of the host/guest transition.
We see significantly reduced overhead for VM exits with the
filtering active with the patch from 8% to 4%.
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
v2:
Use match_ucode, not match_ucode_all
Remove cpu lock
Use INTEL_MIN_UCODE and move to header
Update Table to include skylake clients.
---
arch/x86/events/intel/core.c | 80 ++++++++++++++++++++++++++++++++----
arch/x86/events/perf_event.h | 3 +-
2 files changed, 73 insertions(+), 10 deletions(-)
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index ab01ef9ddd77..5e8e76753eea 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -18,6 +18,7 @@
#include <asm/hardirq.h>
#include <asm/intel-family.h>
#include <asm/apic.h>
+#include <asm/cpu_device_id.h>
#include "../perf_event.h"
@@ -3166,16 +3167,27 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr)
arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL;
arr[0].host = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask;
arr[0].guest = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_host_mask;
- /*
- * If PMU counter has PEBS enabled it is not enough to disable counter
- * on a guest entry since PEBS memory write can overshoot guest entry
- * and corrupt guest memory. Disabling PEBS solves the problem.
- */
- arr[1].msr = MSR_IA32_PEBS_ENABLE;
- arr[1].host = cpuc->pebs_enabled;
- arr[1].guest = 0;
+ if (x86_pmu.flags & PMU_FL_PEBS_ALL)
+ arr[0].guest &= ~cpuc->pebs_enabled;
+ else
+ arr[0].guest &= ~(cpuc->pebs_enabled & PEBS_COUNTER_MASK);
+ *nr = 1;
+
+ if (!x86_pmu.pebs_isolated) {
+ /*
+ * If PMU counter has PEBS enabled it is not enough to
+ * disable counter on a guest entry since PEBS memory
+ * write can overshoot guest entry and corrupt guest
+ * memory. Disabling PEBS solves the problem.
+ *
+ * Don't do this if the CPU already enforces it.
+ */
+ arr[1].msr = MSR_IA32_PEBS_ENABLE;
+ arr[1].host = cpuc->pebs_enabled;
+ arr[1].guest = 0;
+ *nr = 2;
+ }
- *nr = 2;
return arr;
}
@@ -3693,6 +3705,45 @@ static __init void intel_clovertown_quirk(void)
x86_pmu.pebs_constraints = NULL;
}
+static const struct x86_ucode_id isolation_ucodes[] = {
+ INTEL_MIN_UCODE(INTEL_FAM6_HASWELL_CORE, 3, 0x0000001f),
+ INTEL_MIN_UCODE(INTEL_FAM6_HASWELL_ULT, 1, 0x0000001e),
+ INTEL_MIN_UCODE(INTEL_FAM6_HASWELL_GT3E, 1, 0x00000015),
+ INTEL_MIN_UCODE(INTEL_FAM6_HASWELL_X, 2, 0x00000037),
+ INTEL_MIN_UCODE(INTEL_FAM6_HASWELL_X, 4, 0x0000000a),
+ INTEL_MIN_UCODE(INTEL_FAM6_BROADWELL_CORE, 4, 0x00000023),
+ INTEL_MIN_UCODE(INTEL_FAM6_BROADWELL_GT3E, 1, 0x00000014),
+ INTEL_MIN_UCODE(INTEL_FAM6_BROADWELL_XEON_D, 2, 0x00000010),
+ INTEL_MIN_UCODE(INTEL_FAM6_BROADWELL_XEON_D, 3, 0x07000009),
+ INTEL_MIN_UCODE(INTEL_FAM6_BROADWELL_XEON_D, 4, 0x0f000009),
+ INTEL_MIN_UCODE(INTEL_FAM6_BROADWELL_XEON_D, 5, 0x0e000002),
+ INTEL_MIN_UCODE(INTEL_FAM6_BROADWELL_X, 2, 0x0b000014),
+ INTEL_MIN_UCODE(INTEL_FAM6_SKYLAKE_X, 3, 0x00000021),
+ INTEL_MIN_UCODE(INTEL_FAM6_SKYLAKE_X, 4, 0x00000000),
+ INTEL_MIN_UCODE(INTEL_FAM6_SKYLAKE_MOBILE, 3, 0x0000007c),
+ INTEL_MIN_UCODE(INTEL_FAM6_SKYLAKE_DESKTOP, 3, 0x0000007c),
+ INTEL_MIN_UCODE(INTEL_FAM6_KABYLAKE_DESKTOP, 9, 0x0000004e),
+ INTEL_MIN_UCODE(INTEL_FAM6_KABYLAKE_MOBILE, 9, 0x0000004e),
+ INTEL_MIN_UCODE(INTEL_FAM6_KABYLAKE_MOBILE, 10, 0x0000004e),
+ INTEL_MIN_UCODE(INTEL_FAM6_KABYLAKE_MOBILE, 11, 0x0000004e),
+ INTEL_MIN_UCODE(INTEL_FAM6_KABYLAKE_MOBILE, 12, 0x0000004e),
+ INTEL_MIN_UCODE(INTEL_FAM6_KABYLAKE_DESKTOP, 10, 0x0000004e),
+ INTEL_MIN_UCODE(INTEL_FAM6_KABYLAKE_DESKTOP, 11, 0x0000004e),
+ INTEL_MIN_UCODE(INTEL_FAM6_KABYLAKE_DESKTOP, 12, 0x0000004e),
+ INTEL_MIN_UCODE(INTEL_FAM6_KABYLAKE_DESKTOP, 13, 0x0000004e),
+ INTEL_MIN_UCODE(INTEL_FAM6_CANNONLAKE_MOBILE, 3, 0x00000000),
+ {}
+};
+
+static void intel_check_isolation(void)
+{
+ if (!x86_match_ucode(isolation_ucodes)) {
+ x86_pmu.pebs_isolated = 0;
+ return;
+ }
+ x86_pmu.pebs_isolated = 1;
+}
+
static int intel_snb_pebs_broken(int cpu)
{
u32 rev = UINT_MAX; /* default to broken for unknown models */
@@ -3717,6 +3768,8 @@ static void intel_snb_check_microcode(void)
int pebs_broken = 0;
int cpu;
+ intel_check_isolation();
+
for_each_online_cpu(cpu) {
if ((pebs_broken = intel_snb_pebs_broken(cpu)))
break;
@@ -3798,6 +3851,12 @@ static __init void intel_sandybridge_quirk(void)
cpus_read_unlock();
}
+static __init void intel_isolation_quirk(void)
+{
+ x86_pmu.check_microcode = intel_check_isolation;
+ intel_check_isolation();
+}
+
static const struct { int id; char *name; } intel_arch_events_map[] __initconst = {
{ PERF_COUNT_HW_CPU_CYCLES, "cpu cycles" },
{ PERF_COUNT_HW_INSTRUCTIONS, "instructions" },
@@ -4362,6 +4421,7 @@ __init int intel_pmu_init(void)
case INTEL_FAM6_HASWELL_X:
case INTEL_FAM6_HASWELL_ULT:
case INTEL_FAM6_HASWELL_GT3E:
+ x86_add_quirk(intel_isolation_quirk);
x86_add_quirk(intel_ht_bug);
x86_pmu.late_ack = true;
memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
@@ -4392,6 +4452,7 @@ __init int intel_pmu_init(void)
case INTEL_FAM6_BROADWELL_XEON_D:
case INTEL_FAM6_BROADWELL_GT3E:
case INTEL_FAM6_BROADWELL_X:
+ x86_add_quirk(intel_isolation_quirk);
x86_pmu.late_ack = true;
memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
@@ -4452,6 +4513,7 @@ __init int intel_pmu_init(void)
case INTEL_FAM6_SKYLAKE_X:
case INTEL_FAM6_KABYLAKE_MOBILE:
case INTEL_FAM6_KABYLAKE_DESKTOP:
+ x86_add_quirk(intel_isolation_quirk);
x86_pmu.late_ack = true;
memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index adae087cecdd..d5745ed62622 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -607,7 +607,8 @@ struct x86_pmu {
pebs_active :1,
pebs_broken :1,
pebs_prec_dist :1,
- pebs_no_tlb :1;
+ pebs_no_tlb :1,
+ pebs_isolated :1;
int pebs_record_size;
int pebs_buffer_size;
void (*drain_pebs)(struct pt_regs *regs);
--
2.17.1
next prev parent reply other threads:[~2018-10-10 16:26 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-10-10 16:26 [PATCH v2 1/2] x86/cpufeature: Add facility to match microcode revisions Andi Kleen
2018-10-10 16:26 ` Andi Kleen [this message]
2018-10-10 16:37 ` Borislav Petkov
2018-10-11 11:43 ` Henrique de Moraes Holschuh
2018-10-17 9:59 ` Thomas Gleixner
2018-10-19 23:47 ` Andi Kleen
2018-10-20 8:19 ` Thomas Gleixner
2018-10-20 14:38 ` Andi Kleen
2018-10-21 10:20 ` Thomas Gleixner
2018-10-21 15:13 ` Borislav Petkov
2018-10-25 23:23 ` Andi Kleen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20181010162608.23899-2-andi@firstfloor.org \
--to=andi@firstfloor.org \
--cc=ak@linux.intel.com \
--cc=eranian@google.com \
--cc=kan.liang@intel.com \
--cc=linux-kernel@vger.kernel.org \
--cc=peterz@infradead.org \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).