From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S933976AbcHaMEJ (ORCPT ); Wed, 31 Aug 2016 08:04:09 -0400 Received: from mx1.redhat.com ([209.132.183.28]:59352 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S932341AbcHaMEG (ORCPT ); Wed, 31 Aug 2016 08:04:06 -0400 Date: Wed, 31 Aug 2016 14:03:58 +0200 From: Jiri Olsa To: Ingo Molnar , Peter Zijlstra , Andi Kleen Cc: lkml , Alexander Shishkin Subject: [RFC,PATCH] VMWARE faults on accessing disabled counters Message-ID: <20160831120358.GB9001@krava> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline User-Agent: Mutt/1.7.0 (2016-08-17) X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.39]); Wed, 31 Aug 2016 12:04:00 +0000 (UTC) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org hi, when booting under VMWARE we've got following dmesg lines: [ 0.051567] perf_event_intel: CPUID marked event: 'cpu cycles' unavailable [ 0.051567] perf_event_intel: CPUID marked event: 'instructions' unavailable [ 0.051568] perf_event_intel: CPUID marked event: 'bus cycles' unavailable [ 0.051568] perf_event_intel: CPUID marked event: 'cache references' unavailable [ 0.051569] perf_event_intel: CPUID marked event: 'cache misses' unavailable [ 0.051570] perf_event_intel: CPUID marked event: 'branch instructions' unavailable [ 0.051570] perf_event_intel: CPUID marked event: 'branch misses' unavailable that means all the architectural events are disabled by CPUID(0xa) The kernel code sets intel_perfmon_event_map to prevent those event to be configured by PERF_TYPE_HARDWARE pmu type. However they can still be configured by via PERF_TYPE_RAW type. We're getting GP fault on VMWARE when reading cycles PMC configured throgh the PERF_TYPE_RAW interface: #4 [ffff88007c603e10] do_general_protection at ffffffff8163da9e #5 [ffff88007c603e40] general_protection at ffffffff8163d3a8 [exception RIP: native_read_pmc+6] RIP: ffffffff81058d66 RSP: ffff88007c603ef0 RFLAGS: 00010083 RAX: ffffffff81957ee0 RBX: 0000000000000000 RCX: 0000000040000002 RDX: 000000000ff8f719 RSI: ffff88007c617fa8 RDI: 0000000040000002 RBP: ffff88007c603ef0 R8: 00007ffde5053150 R9: 0000000000000000 R10: 00007ffde5052530 R11: 00007fbb22aedc70 R12: ffffffff80000001 R13: ffff880079b74400 R14: ffff880079b74578 R15: 0000000000000010 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0000 #6 [ffff88007c603ef8] x86_perf_event_update at ffffffff81029e03 #7 [ffff88007c603f30] x86_pmu_read at ffffffff8102a079 #8 [ffff88007c603f40] __perf_event_read at ffffffff811590de I couldn't find what real HW rdpmc does on this situation, so I'm not sure if we actually want to prevent this.. patch below tries to catch this case. thanks, jirka --- arch/x86/events/core.c | 8 ++++- arch/x86/events/intel/core.c | 72 ++++++++++++++++++++++++++++++++------------ arch/x86/events/perf_event.h | 6 ++++ 3 files changed, 65 insertions(+), 21 deletions(-) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 473519100b11..d836c5922b12 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -534,8 +534,14 @@ int x86_pmu_hw_config(struct perf_event *event) if (!event->attr.exclude_kernel) event->hw.config |= ARCH_PERFMON_EVENTSEL_OS; - if (event->attr.type == PERF_TYPE_RAW) + if (event->attr.type == PERF_TYPE_RAW) { + u64 arch_config = event->attr.config & INTEL_ARCH_EVENT_MASK; + + if (x86_pmu_event_disabled(arch_config)) + return -ENOENT; + event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK; + } if (event->attr.sample_period && x86_pmu.limit_period) { if (x86_pmu.limit_period(event, event->attr.sample_period) > diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 9049d62f34ae..99a83529c7ff 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -23,16 +23,22 @@ /* * Intel PerfMon, used on Core and later. */ -static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly = -{ - [PERF_COUNT_HW_CPU_CYCLES] = 0x003c, - [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, - [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e, - [PERF_COUNT_HW_CACHE_MISSES] = 0x412e, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, - [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, - [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, - [PERF_COUNT_HW_REF_CPU_CYCLES] = 0x0300, /* pseudo-encoding */ +struct intel_perfmon_event { + u64 config; + bool disabled; + u64 replacement; +}; + +static struct intel_perfmon_event intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly = +{ + [PERF_COUNT_HW_CPU_CYCLES] = { .config = 0x003c }, + [PERF_COUNT_HW_INSTRUCTIONS] = { .config = 0x00c0 }, + [PERF_COUNT_HW_CACHE_REFERENCES] = { .config = 0x4f2e }, + [PERF_COUNT_HW_CACHE_MISSES] = { .config = 0x412e }, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { .config = 0x00c4 }, + [PERF_COUNT_HW_BRANCH_MISSES] = { .config = 0x00c5 }, + [PERF_COUNT_HW_BUS_CYCLES] = { .config = 0x013c }, + [PERF_COUNT_HW_REF_CPU_CYCLES] = { .config = 0x0300 }, /* pseudo-encoding */ }; static struct event_constraint intel_core_event_constraints[] __read_mostly = @@ -268,7 +274,31 @@ struct event_constraint intel_bdw_event_constraints[] = { static u64 intel_pmu_event_map(int hw_event) { - return intel_perfmon_event_map[hw_event]; + struct intel_perfmon_event *event = &intel_perfmon_event_map[hw_event]; + + if (event->disabled) + return event->config; + if (event->replacement) + return event->replacement; + + return event->config; +} + +static bool intel_pmu_event_disabled(int hw_event) +{ + unsigned i; + + for (i = 0; i < ARRAY_SIZE(intel_perfmon_event_map); i++) { + struct intel_perfmon_event *event = &intel_perfmon_event_map[hw_event]; + + if (event->config != hw_event) + continue; + + if (event->disabled) + return true; + } + + return false; } /* @@ -3165,6 +3195,7 @@ static __initconst const struct x86_pmu core_pmu = { .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, .perfctr = MSR_ARCH_PERFMON_PERFCTR0, .event_map = intel_pmu_event_map, + .event_disabled = intel_pmu_event_disabled, .max_events = ARRAY_SIZE(intel_perfmon_event_map), .apic = 1, .free_running_flags = PEBS_FREERUNNING_FLAGS, @@ -3205,6 +3236,7 @@ static __initconst const struct x86_pmu intel_pmu = { .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, .perfctr = MSR_ARCH_PERFMON_PERFCTR0, .event_map = intel_pmu_event_map, + .event_disabled = intel_pmu_event_disabled, .max_events = ARRAY_SIZE(intel_perfmon_event_map), .apic = 1, .free_running_flags = PEBS_FREERUNNING_FLAGS, @@ -3357,7 +3389,7 @@ static __init void intel_arch_events_quirk(void) /* disable event that reported as not presend by cpuid */ for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(intel_arch_events_map)) { - intel_perfmon_event_map[intel_arch_events_map[bit].id] = 0; + intel_perfmon_event_map[intel_arch_events_map[bit].id].disabled = true; pr_warn("CPUID marked event: \'%s\' unavailable\n", intel_arch_events_map[bit].name); } @@ -3375,7 +3407,7 @@ static __init void intel_nehalem_quirk(void) * branch-misses, but it's still much better than the * architectural event which is often completely bogus: */ - intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89; + intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES].replacement = 0x7f89; ebx.split.no_branch_misses_retired = 0; x86_pmu.events_maskl = ebx.full; pr_info("CPU erratum AAJ80 worked around\n"); @@ -3543,10 +3575,10 @@ __init int intel_pmu_init(void) x86_pmu.cpu_events = nhm_events_attrs; /* UOPS_ISSUED.STALLED_CYCLES */ - intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = + intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND].replacement = X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ - intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = + intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND].replacement = X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1); intel_pmu_pebs_data_source_nhm(); @@ -3630,10 +3662,10 @@ __init int intel_pmu_init(void) x86_pmu.cpu_events = nhm_events_attrs; /* UOPS_ISSUED.STALLED_CYCLES */ - intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = + intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND].replacement = X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ - intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = + intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND].replacement = X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1); intel_pmu_pebs_data_source_nhm(); @@ -3667,10 +3699,10 @@ __init int intel_pmu_init(void) x86_pmu.cpu_events = snb_events_attrs; /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ - intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = + intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND].replacement = X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); /* UOPS_DISPATCHED.THREAD,c=1,i=1 to count stall cycles*/ - intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = + intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND].replacement = X86_CONFIG(.event=0xb1, .umask=0x01, .inv=1, .cmask=1); pr_cont("SandyBridge events, "); @@ -3704,7 +3736,7 @@ __init int intel_pmu_init(void) x86_pmu.cpu_events = snb_events_attrs; /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ - intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = + intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND].replacement = X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); pr_cont("IvyBridge events, "); diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 01ddfeadaee6..69cca7dc8de4 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -514,6 +514,7 @@ struct x86_pmu { int (*addr_offset)(int index, bool eventsel); int (*rdpmc_index)(int index); u64 (*event_map)(int); + bool (*event_disabled)(int); int max_events; int num_counters; int num_counters_fixed; @@ -715,6 +716,11 @@ static inline int x86_pmu_rdpmc_index(int index) return x86_pmu.rdpmc_index ? x86_pmu.rdpmc_index(index) : index; } +static inline bool x86_pmu_event_disabled(u64 config) +{ + return x86_pmu.event_disabled ? x86_pmu.event_disabled(config) : false; +} + int x86_add_exclusive(unsigned int what); void x86_del_exclusive(unsigned int what); -- 2.7.4