* [PATCH] perf/x86/intel/pt: Don't die on VMXON
@ 2016-04-01 16:24 Alexander Shishkin
2016-04-06 8:52 ` Peter Zijlstra
0 siblings, 1 reply; 12+ messages in thread
From: Alexander Shishkin @ 2016-04-01 16:24 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Gleb Natapov, Paolo Bonzini, x86, kvm, Ingo Molnar, linux-kernel,
tglx, hpa, Arnaldo Carvalho de Melo, Alexander Shishkin
Some versions of Intel PT do not support tracing across VMXON, more
specifically, VMXON will clear TraceEn control bit and any attempt to
set it before VMXOFF will throw a #GP, which in the current state of
things will crash the kernel. Namely,
$ perf record -e intel_pt// kvm -nographic
on such a machine will kill it.
To avoid this, notify the intel_pt driver before VMXON and after
VMXOFF so that it knows when not to enable itself.
Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
---
arch/x86/events/intel/pt.c | 71 +++++++++++++++++++++++++++++++++------
arch/x86/events/intel/pt.h | 2 ++
arch/x86/include/asm/perf_event.h | 4 +++
arch/x86/kvm/vmx.c | 4 +++
4 files changed, 70 insertions(+), 11 deletions(-)
diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index 127f58c179..a16f2ebc12 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -136,9 +136,17 @@ static int __init pt_pmu_hw_init(void)
struct dev_ext_attribute *de_attrs;
struct attribute **attrs;
size_t size;
+ u64 reg;
int ret;
long i;
+ if (test_cpu_cap(&boot_cpu_data, X86_FEATURE_VMX)) {
+ /* Intel SDM, 36.5 "Tracing post-VMXON" */
+ rdmsrl(MSR_IA32_VMX_MISC, reg);
+ if (reg & BIT(14))
+ pt_pmu.vmx = true;
+ }
+
attrs = NULL;
for (i = 0; i < PT_CPUID_LEAVES; i++) {
@@ -269,20 +277,23 @@ static void pt_config(struct perf_event *event)
reg |= (event->attr.config & PT_CONFIG_MASK);
+ event->hw.config = reg;
wrmsrl(MSR_IA32_RTIT_CTL, reg);
}
-static void pt_config_start(bool start)
+static void pt_config_stop(struct perf_event *event)
{
- u64 ctl;
+ u64 ctl = READ_ONCE(event->hw.config);
- rdmsrl(MSR_IA32_RTIT_CTL, ctl);
- if (start)
- ctl |= RTIT_CTL_TRACEEN;
- else
- ctl &= ~RTIT_CTL_TRACEEN;
+ /* may be already stopped by a PMI*/
+ if (!(ctl & RTIT_CTL_TRACEEN))
+ return;
+
+ ctl ^= RTIT_CTL_TRACEEN;
wrmsrl(MSR_IA32_RTIT_CTL, ctl);
+ WRITE_ONCE(event->hw.config, ctl);
+
/*
* A wrmsr that disables trace generation serializes other PT
* registers and causes all data packets to be written to memory,
@@ -291,8 +302,7 @@ static void pt_config_start(bool start)
* The below WMB, separating data store and aux_head store matches
* the consumer's RMB that separates aux_head load and data load.
*/
- if (!start)
- wmb();
+ wmb();
}
static void pt_config_buffer(void *buf, unsigned int topa_idx,
@@ -922,11 +932,17 @@ void intel_pt_interrupt(void)
if (!ACCESS_ONCE(pt->handle_nmi))
return;
- pt_config_start(false);
+ /*
+ * If VMX is on and PT does not support it, don't touch anything.
+ */
+ if (ACCESS_ONCE(pt->vmx_on))
+ return;
if (!event)
return;
+ pt_config_stop(event);
+
buf = perf_get_aux(&pt->handle);
if (!buf)
return;
@@ -963,6 +979,35 @@ void intel_pt_interrupt(void)
}
}
+void intel_pt_vmxon(int entry)
+{
+ struct pt *pt = this_cpu_ptr(&pt_ctx);
+ struct perf_event *event;
+ unsigned long flags;
+
+ /* PT plays nice with VMX, do nothing */
+ if (pt_pmu.vmx)
+ return;
+
+ /*
+ * VMX entry will clear RTIT_CTL.TraceEn; we need to make
+ * sure to not try to set it while VMX is on. Disable
+ * interrupts to avoid racing with pmu callbacks;
+ * concurrent PMI should be handled fine.
+ */
+ local_irq_save(flags);
+ WRITE_ONCE(pt->vmx_on, entry);
+
+ if (entry) {
+ /* prevent pt_config_stop() from writing RTIT_CTL */
+ event = pt->handle.event;
+ if (event)
+ event->hw.config = 0;
+ }
+ local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(intel_pt_vmxon);
+
/*
* PMU callbacks
*/
@@ -973,6 +1018,9 @@ static void pt_event_start(struct perf_event *event, int mode)
struct pt *pt = this_cpu_ptr(&pt_ctx);
struct pt_buffer *buf;
+ if (ACCESS_ONCE(pt->vmx_on))
+ return;
+
buf = perf_aux_output_begin(&pt->handle, event);
if (!buf)
goto fail_stop;
@@ -1007,7 +1055,8 @@ static void pt_event_stop(struct perf_event *event, int mode)
* see comment in intel_pt_interrupt().
*/
ACCESS_ONCE(pt->handle_nmi) = 0;
- pt_config_start(false);
+
+ pt_config_stop(event);
if (event->hw.state == PERF_HES_STOPPED)
return;
diff --git a/arch/x86/events/intel/pt.h b/arch/x86/events/intel/pt.h
index 336878a5d2..b0731630cd 100644
--- a/arch/x86/events/intel/pt.h
+++ b/arch/x86/events/intel/pt.h
@@ -65,6 +65,7 @@ enum pt_capabilities {
struct pt_pmu {
struct pmu pmu;
u32 caps[PT_CPUID_REGS_NUM * PT_CPUID_LEAVES];
+ bool vmx;
};
/**
@@ -111,6 +112,7 @@ struct pt_buffer {
struct pt {
struct perf_output_handle handle;
int handle_nmi;
+ int vmx_on;
};
#endif /* __INTEL_PT_H__ */
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 5a2ed3ed2f..8d34c39982 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -285,6 +285,10 @@ static inline void perf_events_lapic_init(void) { }
static inline void perf_check_microcode(void) { }
#endif
+#ifdef CONFIG_CPU_SUP_INTEL
+ extern void intel_pt_vmxon(int entry);
+#endif
+
#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
extern void amd_pmu_enable_virt(void);
extern void amd_pmu_disable_virt(void);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 1735ae9d68..744ea43b79 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3075,6 +3075,8 @@ static __init int vmx_disabled_by_bios(void)
static void kvm_cpu_vmxon(u64 addr)
{
+ intel_pt_vmxon(1);
+
asm volatile (ASM_VMX_VMXON_RAX
: : "a"(&addr), "m"(addr)
: "memory", "cc");
@@ -3144,6 +3146,8 @@ static void vmclear_local_loaded_vmcss(void)
static void kvm_cpu_vmxoff(void)
{
asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
+
+ intel_pt_vmxon(0);
}
static void hardware_disable(void)
--
2.8.0.rc3
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH] perf/x86/intel/pt: Don't die on VMXON
2016-04-01 16:24 [PATCH] perf/x86/intel/pt: Don't die on VMXON Alexander Shishkin
@ 2016-04-06 8:52 ` Peter Zijlstra
2016-04-06 11:10 ` Alexander Shishkin
0 siblings, 1 reply; 12+ messages in thread
From: Peter Zijlstra @ 2016-04-06 8:52 UTC (permalink / raw)
To: Alexander Shishkin
Cc: Gleb Natapov, Paolo Bonzini, x86, kvm, Ingo Molnar, linux-kernel,
tglx, hpa, Arnaldo Carvalho de Melo
On Fri, Apr 01, 2016 at 07:24:14PM +0300, Alexander Shishkin wrote:
> +static void pt_config_stop(struct perf_event *event)
> {
> + u64 ctl = READ_ONCE(event->hw.config);
>
> + /* may be already stopped by a PMI*/
> + if (!(ctl & RTIT_CTL_TRACEEN))
> + return;
> +
> + ctl ^= RTIT_CTL_TRACEEN;
Would that not be much less confusing when written like |= ?
> wrmsrl(MSR_IA32_RTIT_CTL, ctl);
>
> + WRITE_ONCE(event->hw.config, ctl);
> +
> /*
> * A wrmsr that disables trace generation serializes other PT
> * registers and causes all data packets to be written to memory,
> +void intel_pt_vmxon(int entry)
> +{
> + struct pt *pt = this_cpu_ptr(&pt_ctx);
> + struct perf_event *event;
> + unsigned long flags;
> +
> + /* PT plays nice with VMX, do nothing */
> + if (pt_pmu.vmx)
> + return;
> +
> + /*
> + * VMX entry will clear RTIT_CTL.TraceEn; we need to make
> + * sure to not try to set it while VMX is on. Disable
> + * interrupts to avoid racing with pmu callbacks;
> + * concurrent PMI should be handled fine.
> + */
> + local_irq_save(flags);
> + WRITE_ONCE(pt->vmx_on, entry);
So you mix: "VMX is on" and "VMX entry", please pick one.
Since the function is called vmxon, I find .entry a very confusing
argument name.
> +
> + if (entry) {
> + /* prevent pt_config_stop() from writing RTIT_CTL */
> + event = pt->handle.event;
> + if (event)
> + event->hw.config = 0;
> + }
> + local_irq_restore(flags);
> +}
> +EXPORT_SYMBOL_GPL(intel_pt_vmxon);
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] perf/x86/intel/pt: Don't die on VMXON
2016-04-06 8:52 ` Peter Zijlstra
@ 2016-04-06 11:10 ` Alexander Shishkin
2016-04-06 11:52 ` Peter Zijlstra
0 siblings, 1 reply; 12+ messages in thread
From: Alexander Shishkin @ 2016-04-06 11:10 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Gleb Natapov, Paolo Bonzini, x86, kvm, Ingo Molnar, linux-kernel,
tglx, hpa, Arnaldo Carvalho de Melo
Peter Zijlstra <peterz@infradead.org> writes:
> On Fri, Apr 01, 2016 at 07:24:14PM +0300, Alexander Shishkin wrote:
>> +static void pt_config_stop(struct perf_event *event)
>> {
>> + u64 ctl = READ_ONCE(event->hw.config);
>>
>> + /* may be already stopped by a PMI*/
>> + if (!(ctl & RTIT_CTL_TRACEEN))
>> + return;
>> +
>> + ctl ^= RTIT_CTL_TRACEEN;
>
> Would that not be much less confusing when written like |= ?
This one's actually clearing TraceEn, see the if-not-set-leave in front
of it, but that just goes to prove your point I guess. :)
>> +void intel_pt_vmxon(int entry)
>> +{
>> + struct pt *pt = this_cpu_ptr(&pt_ctx);
>> + struct perf_event *event;
>> + unsigned long flags;
>> +
>> + /* PT plays nice with VMX, do nothing */
>> + if (pt_pmu.vmx)
>> + return;
>> +
>> + /*
>> + * VMX entry will clear RTIT_CTL.TraceEn; we need to make
>> + * sure to not try to set it while VMX is on. Disable
>> + * interrupts to avoid racing with pmu callbacks;
>> + * concurrent PMI should be handled fine.
>> + */
>> + local_irq_save(flags);
>> + WRITE_ONCE(pt->vmx_on, entry);
>
> So you mix: "VMX is on" and "VMX entry", please pick one.
>
> Since the function is called vmxon, I find .entry a very confusing
> argument name.
I did struggle with this one indeed.
How about this then:
>From 9faf95f173decda1f2e3101ced9d9370c14f2339 Mon Sep 17 00:00:00 2001
From: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Date: Tue, 29 Mar 2016 17:43:10 +0300
Subject: [PATCH] perf/x86/intel/pt: Don't die on VMXON
Some versions of Intel PT do not support tracing across VMXON, more
specifically, VMXON will clear TraceEn control bit and any attempt to
set it before VMXOFF will throw a #GP, which in the current state of
things will crash the kernel. Namely,
$ perf record -e intel_pt// kvm -nographic
on such a machine will kill it.
To avoid this, notify the intel_pt driver before VMXON and after
VMXOFF so that it knows when not to enable itself.
Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
---
arch/x86/events/intel/pt.c | 71 +++++++++++++++++++++++++++++++++------
arch/x86/events/intel/pt.h | 2 ++
arch/x86/include/asm/perf_event.h | 4 +++
arch/x86/kvm/vmx.c | 4 +++
4 files changed, 70 insertions(+), 11 deletions(-)
diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index 127f58c179..02384cc6ff 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -136,9 +136,17 @@ static int __init pt_pmu_hw_init(void)
struct dev_ext_attribute *de_attrs;
struct attribute **attrs;
size_t size;
+ u64 reg;
int ret;
long i;
+ if (test_cpu_cap(&boot_cpu_data, X86_FEATURE_VMX)) {
+ /* Intel SDM, 36.5 "Tracing post-VMXON" */
+ rdmsrl(MSR_IA32_VMX_MISC, reg);
+ if (reg & BIT(14))
+ pt_pmu.vmx = true;
+ }
+
attrs = NULL;
for (i = 0; i < PT_CPUID_LEAVES; i++) {
@@ -269,20 +277,23 @@ static void pt_config(struct perf_event *event)
reg |= (event->attr.config & PT_CONFIG_MASK);
+ event->hw.config = reg;
wrmsrl(MSR_IA32_RTIT_CTL, reg);
}
-static void pt_config_start(bool start)
+static void pt_config_stop(struct perf_event *event)
{
- u64 ctl;
+ u64 ctl = READ_ONCE(event->hw.config);
- rdmsrl(MSR_IA32_RTIT_CTL, ctl);
- if (start)
- ctl |= RTIT_CTL_TRACEEN;
- else
- ctl &= ~RTIT_CTL_TRACEEN;
+ /* may be already stopped by a PMI */
+ if (!(ctl & RTIT_CTL_TRACEEN))
+ return;
+
+ ctl &= ~RTIT_CTL_TRACEEN;
wrmsrl(MSR_IA32_RTIT_CTL, ctl);
+ WRITE_ONCE(event->hw.config, ctl);
+
/*
* A wrmsr that disables trace generation serializes other PT
* registers and causes all data packets to be written to memory,
@@ -291,8 +302,7 @@ static void pt_config_start(bool start)
* The below WMB, separating data store and aux_head store matches
* the consumer's RMB that separates aux_head load and data load.
*/
- if (!start)
- wmb();
+ wmb();
}
static void pt_config_buffer(void *buf, unsigned int topa_idx,
@@ -922,11 +932,17 @@ void intel_pt_interrupt(void)
if (!ACCESS_ONCE(pt->handle_nmi))
return;
- pt_config_start(false);
+ /*
+ * If VMX is on and PT does not support it, don't touch anything.
+ */
+ if (ACCESS_ONCE(pt->vmx_on))
+ return;
if (!event)
return;
+ pt_config_stop(event);
+
buf = perf_get_aux(&pt->handle);
if (!buf)
return;
@@ -963,6 +979,35 @@ void intel_pt_interrupt(void)
}
}
+void intel_pt_vmx(int on)
+{
+ struct pt *pt = this_cpu_ptr(&pt_ctx);
+ struct perf_event *event;
+ unsigned long flags;
+
+ /* PT plays nice with VMX, do nothing */
+ if (pt_pmu.vmx)
+ return;
+
+ /*
+ * VMXON will clear RTIT_CTL.TraceEn; we need to make
+ * sure to not try to set it while VMX is on. Disable
+ * interrupts to avoid racing with pmu callbacks;
+ * concurrent PMI should be handled fine.
+ */
+ local_irq_save(flags);
+ WRITE_ONCE(pt->vmx_on, on);
+
+ if (on) {
+ /* prevent pt_config_stop() from writing RTIT_CTL */
+ event = pt->handle.event;
+ if (event)
+ event->hw.config = 0;
+ }
+ local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(intel_pt_vmx);
+
/*
* PMU callbacks
*/
@@ -973,6 +1018,9 @@ static void pt_event_start(struct perf_event *event, int mode)
struct pt *pt = this_cpu_ptr(&pt_ctx);
struct pt_buffer *buf;
+ if (ACCESS_ONCE(pt->vmx_on))
+ return;
+
buf = perf_aux_output_begin(&pt->handle, event);
if (!buf)
goto fail_stop;
@@ -1007,7 +1055,8 @@ static void pt_event_stop(struct perf_event *event, int mode)
* see comment in intel_pt_interrupt().
*/
ACCESS_ONCE(pt->handle_nmi) = 0;
- pt_config_start(false);
+
+ pt_config_stop(event);
if (event->hw.state == PERF_HES_STOPPED)
return;
diff --git a/arch/x86/events/intel/pt.h b/arch/x86/events/intel/pt.h
index 336878a5d2..b0731630cd 100644
--- a/arch/x86/events/intel/pt.h
+++ b/arch/x86/events/intel/pt.h
@@ -65,6 +65,7 @@ enum pt_capabilities {
struct pt_pmu {
struct pmu pmu;
u32 caps[PT_CPUID_REGS_NUM * PT_CPUID_LEAVES];
+ bool vmx;
};
/**
@@ -111,6 +112,7 @@ struct pt_buffer {
struct pt {
struct perf_output_handle handle;
int handle_nmi;
+ int vmx_on;
};
#endif /* __INTEL_PT_H__ */
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 5a2ed3ed2f..fcc5956f3f 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -285,6 +285,10 @@ static inline void perf_events_lapic_init(void) { }
static inline void perf_check_microcode(void) { }
#endif
+#ifdef CONFIG_CPU_SUP_INTEL
+ extern void intel_pt_vmx(int on);
+#endif
+
#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
extern void amd_pmu_enable_virt(void);
extern void amd_pmu_disable_virt(void);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 1735ae9d68..fa8c98f5fa 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3075,6 +3075,8 @@ static __init int vmx_disabled_by_bios(void)
static void kvm_cpu_vmxon(u64 addr)
{
+ intel_pt_vmx(1);
+
asm volatile (ASM_VMX_VMXON_RAX
: : "a"(&addr), "m"(addr)
: "memory", "cc");
@@ -3144,6 +3146,8 @@ static void vmclear_local_loaded_vmcss(void)
static void kvm_cpu_vmxoff(void)
{
asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
+
+ intel_pt_vmx(0);
}
static void hardware_disable(void)
--
2.8.0.rc3
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH] perf/x86/intel/pt: Don't die on VMXON
2016-04-06 11:10 ` Alexander Shishkin
@ 2016-04-06 11:52 ` Peter Zijlstra
2016-04-06 11:55 ` Borislav Petkov
2016-04-06 14:06 ` Alexander Shishkin
0 siblings, 2 replies; 12+ messages in thread
From: Peter Zijlstra @ 2016-04-06 11:52 UTC (permalink / raw)
To: Alexander Shishkin
Cc: Gleb Natapov, Paolo Bonzini, x86, kvm, Ingo Molnar, linux-kernel,
tglx, hpa, Arnaldo Carvalho de Melo, Borislav Petkov
On Wed, Apr 06, 2016 at 02:10:49PM +0300, Alexander Shishkin wrote:
> >> + /* may be already stopped by a PMI*/
> >> + if (!(ctl & RTIT_CTL_TRACEEN))
> >> + return;
> >> +
> >> + ctl ^= RTIT_CTL_TRACEEN;
> >
> > Would that not be much less confusing when written like |= ?
>
> This one's actually clearing TraceEn, see the if-not-set-leave in front
> of it, but that just goes to prove your point I guess. :)
Hehe, indeed. So much for pretending to be awake :-)
> How about this then:
Looks good, however:
> + if (test_cpu_cap(&boot_cpu_data, X86_FEATURE_VMX)) {
Borislav tells me this ought to be boot_cpu_has(X86_FEATURE_VMX)
> + /* Intel SDM, 36.5 "Tracing post-VMXON" */
> + rdmsrl(MSR_IA32_VMX_MISC, reg);
> + if (reg & BIT(14))
> + pt_pmu.vmx = true;
> + }
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] perf/x86/intel/pt: Don't die on VMXON
2016-04-06 11:52 ` Peter Zijlstra
@ 2016-04-06 11:55 ` Borislav Petkov
2016-04-06 12:07 ` Alexander Shishkin
2016-04-06 14:06 ` Alexander Shishkin
1 sibling, 1 reply; 12+ messages in thread
From: Borislav Petkov @ 2016-04-06 11:55 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Alexander Shishkin, Gleb Natapov, Paolo Bonzini, x86, kvm,
Ingo Molnar, linux-kernel, tglx, hpa, Arnaldo Carvalho de Melo
On Wed, Apr 06, 2016 at 01:52:15PM +0200, Peter Zijlstra wrote:
> Borislav tells me this ought to be boot_cpu_has(X86_FEATURE_VMX)
>
> > + /* Intel SDM, 36.5 "Tracing post-VMXON" */
> > + rdmsrl(MSR_IA32_VMX_MISC, reg);
> > + if (reg & BIT(14))
Also, I needz to consult my crystal ball about what bit 14 is...
:-)
--
Regards/Gruss,
Boris.
ECO tip #101: Trim your mails when you reply.
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] perf/x86/intel/pt: Don't die on VMXON
2016-04-06 11:55 ` Borislav Petkov
@ 2016-04-06 12:07 ` Alexander Shishkin
0 siblings, 0 replies; 12+ messages in thread
From: Alexander Shishkin @ 2016-04-06 12:07 UTC (permalink / raw)
To: Borislav Petkov, Peter Zijlstra
Cc: Gleb Natapov, Paolo Bonzini, x86, kvm, Ingo Molnar, linux-kernel,
tglx, hpa, Arnaldo Carvalho de Melo
Borislav Petkov <bp@alien8.de> writes:
> On Wed, Apr 06, 2016 at 01:52:15PM +0200, Peter Zijlstra wrote:
>> Borislav tells me this ought to be boot_cpu_has(X86_FEATURE_VMX)
>>
>> > + /* Intel SDM, 36.5 "Tracing post-VMXON" */
>> > + rdmsrl(MSR_IA32_VMX_MISC, reg);
>> > + if (reg & BIT(14))
>
> Also, I needz to consult my crystal ball about what bit 14 is...
>
> :-)
I know. It's right there in SDM chapter 36.5, referred to as "bit 14"
and it's not mentioned anywhere else that I could find, so I just left a
comment there.
Regards,
--
Alex
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] perf/x86/intel/pt: Don't die on VMXON
2016-04-06 11:52 ` Peter Zijlstra
2016-04-06 11:55 ` Borislav Petkov
@ 2016-04-06 14:06 ` Alexander Shishkin
2016-04-13 9:49 ` Ingo Molnar
1 sibling, 1 reply; 12+ messages in thread
From: Alexander Shishkin @ 2016-04-06 14:06 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Gleb Natapov, Paolo Bonzini, x86, kvm, Ingo Molnar, linux-kernel,
tglx, hpa, Arnaldo Carvalho de Melo, Borislav Petkov
Peter Zijlstra <peterz@infradead.org> writes:
> Looks good, however:
>
>> + if (test_cpu_cap(&boot_cpu_data, X86_FEATURE_VMX)) {
>
> Borislav tells me this ought to be boot_cpu_has(X86_FEATURE_VMX)
Right. I copied it from another place in the driver, which I'll then
patch as well.
3x a charm:
>From 1ac911d5096af88217b3a26b26d594b02f0db614 Mon Sep 17 00:00:00 2001
From: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Date: Tue, 29 Mar 2016 17:43:10 +0300
Subject: [PATCH] perf/x86/intel/pt: Don't die on VMXON
Some versions of Intel PT do not support tracing across VMXON, more
specifically, VMXON will clear TraceEn control bit and any attempt to
set it before VMXOFF will throw a #GP, which in the current state of
things will crash the kernel. Namely,
$ perf record -e intel_pt// kvm -nographic
on such a machine will kill it.
To avoid this, notify the intel_pt driver before VMXON and after
VMXOFF so that it knows when not to enable itself.
Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
---
arch/x86/events/intel/pt.c | 75 +++++++++++++++++++++++++++++++++------
arch/x86/events/intel/pt.h | 2 ++
arch/x86/include/asm/perf_event.h | 4 +++
arch/x86/kvm/vmx.c | 4 +++
4 files changed, 74 insertions(+), 11 deletions(-)
diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index 127f58c179..32b613e863 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -136,9 +136,21 @@ static int __init pt_pmu_hw_init(void)
struct dev_ext_attribute *de_attrs;
struct attribute **attrs;
size_t size;
+ u64 reg;
int ret;
long i;
+ if (boot_cpu_has(X86_FEATURE_VMX)) {
+ /*
+ * Intel SDM, 36.5 "Tracing post-VMXON" says that
+ * "IA32_VMX_MISC[bit 14]" being 1 means PT can trace
+ * post-VMXON.
+ */
+ rdmsrl(MSR_IA32_VMX_MISC, reg);
+ if (reg & BIT(14))
+ pt_pmu.vmx = true;
+ }
+
attrs = NULL;
for (i = 0; i < PT_CPUID_LEAVES; i++) {
@@ -269,20 +281,23 @@ static void pt_config(struct perf_event *event)
reg |= (event->attr.config & PT_CONFIG_MASK);
+ event->hw.config = reg;
wrmsrl(MSR_IA32_RTIT_CTL, reg);
}
-static void pt_config_start(bool start)
+static void pt_config_stop(struct perf_event *event)
{
- u64 ctl;
+ u64 ctl = READ_ONCE(event->hw.config);
+
+ /* may be already stopped by a PMI */
+ if (!(ctl & RTIT_CTL_TRACEEN))
+ return;
- rdmsrl(MSR_IA32_RTIT_CTL, ctl);
- if (start)
- ctl |= RTIT_CTL_TRACEEN;
- else
- ctl &= ~RTIT_CTL_TRACEEN;
+ ctl &= ~RTIT_CTL_TRACEEN;
wrmsrl(MSR_IA32_RTIT_CTL, ctl);
+ WRITE_ONCE(event->hw.config, ctl);
+
/*
* A wrmsr that disables trace generation serializes other PT
* registers and causes all data packets to be written to memory,
@@ -291,8 +306,7 @@ static void pt_config_start(bool start)
* The below WMB, separating data store and aux_head store matches
* the consumer's RMB that separates aux_head load and data load.
*/
- if (!start)
- wmb();
+ wmb();
}
static void pt_config_buffer(void *buf, unsigned int topa_idx,
@@ -922,11 +936,17 @@ void intel_pt_interrupt(void)
if (!ACCESS_ONCE(pt->handle_nmi))
return;
- pt_config_start(false);
+ /*
+ * If VMX is on and PT does not support it, don't touch anything.
+ */
+ if (ACCESS_ONCE(pt->vmx_on))
+ return;
if (!event)
return;
+ pt_config_stop(event);
+
buf = perf_get_aux(&pt->handle);
if (!buf)
return;
@@ -963,6 +983,35 @@ void intel_pt_interrupt(void)
}
}
+void intel_pt_vmx(int on)
+{
+ struct pt *pt = this_cpu_ptr(&pt_ctx);
+ struct perf_event *event;
+ unsigned long flags;
+
+ /* PT plays nice with VMX, do nothing */
+ if (pt_pmu.vmx)
+ return;
+
+ /*
+ * VMXON will clear RTIT_CTL.TraceEn; we need to make
+ * sure to not try to set it while VMX is on. Disable
+ * interrupts to avoid racing with pmu callbacks;
+ * concurrent PMI should be handled fine.
+ */
+ local_irq_save(flags);
+ WRITE_ONCE(pt->vmx_on, on);
+
+ if (on) {
+ /* prevent pt_config_stop() from writing RTIT_CTL */
+ event = pt->handle.event;
+ if (event)
+ event->hw.config = 0;
+ }
+ local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(intel_pt_vmx);
+
/*
* PMU callbacks
*/
@@ -973,6 +1022,9 @@ static void pt_event_start(struct perf_event *event, int mode)
struct pt *pt = this_cpu_ptr(&pt_ctx);
struct pt_buffer *buf;
+ if (ACCESS_ONCE(pt->vmx_on))
+ return;
+
buf = perf_aux_output_begin(&pt->handle, event);
if (!buf)
goto fail_stop;
@@ -1007,7 +1059,8 @@ static void pt_event_stop(struct perf_event *event, int mode)
* see comment in intel_pt_interrupt().
*/
ACCESS_ONCE(pt->handle_nmi) = 0;
- pt_config_start(false);
+
+ pt_config_stop(event);
if (event->hw.state == PERF_HES_STOPPED)
return;
diff --git a/arch/x86/events/intel/pt.h b/arch/x86/events/intel/pt.h
index 336878a5d2..b0731630cd 100644
--- a/arch/x86/events/intel/pt.h
+++ b/arch/x86/events/intel/pt.h
@@ -65,6 +65,7 @@ enum pt_capabilities {
struct pt_pmu {
struct pmu pmu;
u32 caps[PT_CPUID_REGS_NUM * PT_CPUID_LEAVES];
+ bool vmx;
};
/**
@@ -111,6 +112,7 @@ struct pt_buffer {
struct pt {
struct perf_output_handle handle;
int handle_nmi;
+ int vmx_on;
};
#endif /* __INTEL_PT_H__ */
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 5a2ed3ed2f..fcc5956f3f 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -285,6 +285,10 @@ static inline void perf_events_lapic_init(void) { }
static inline void perf_check_microcode(void) { }
#endif
+#ifdef CONFIG_CPU_SUP_INTEL
+ extern void intel_pt_vmx(int on);
+#endif
+
#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
extern void amd_pmu_enable_virt(void);
extern void amd_pmu_disable_virt(void);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 1735ae9d68..fa8c98f5fa 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3075,6 +3075,8 @@ static __init int vmx_disabled_by_bios(void)
static void kvm_cpu_vmxon(u64 addr)
{
+ intel_pt_vmx(1);
+
asm volatile (ASM_VMX_VMXON_RAX
: : "a"(&addr), "m"(addr)
: "memory", "cc");
@@ -3144,6 +3146,8 @@ static void vmclear_local_loaded_vmcss(void)
static void kvm_cpu_vmxoff(void)
{
asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
+
+ intel_pt_vmx(0);
}
static void hardware_disable(void)
--
2.8.0.rc3
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH] perf/x86/intel/pt: Don't die on VMXON
2016-04-06 14:06 ` Alexander Shishkin
@ 2016-04-13 9:49 ` Ingo Molnar
2016-04-13 10:32 ` Alexander Shishkin
0 siblings, 1 reply; 12+ messages in thread
From: Ingo Molnar @ 2016-04-13 9:49 UTC (permalink / raw)
To: Alexander Shishkin
Cc: Peter Zijlstra, Gleb Natapov, Paolo Bonzini, x86, kvm,
Ingo Molnar, linux-kernel, tglx, hpa, Arnaldo Carvalho de Melo,
Borislav Petkov
* Alexander Shishkin <alexander.shishkin@linux.intel.com> wrote:
> Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
> ---
> arch/x86/events/intel/pt.c | 75 +++++++++++++++++++++++++++++++++------
> arch/x86/events/intel/pt.h | 2 ++
> arch/x86/include/asm/perf_event.h | 4 +++
> arch/x86/kvm/vmx.c | 4 +++
> 4 files changed, 74 insertions(+), 11 deletions(-)
> +void intel_pt_vmx(int on)
> +{
> + struct pt *pt = this_cpu_ptr(&pt_ctx);
> + struct perf_event *event;
> + unsigned long flags;
> +
> + /* PT plays nice with VMX, do nothing */
> + if (pt_pmu.vmx)
> + return;
> +
> + /*
> + * VMXON will clear RTIT_CTL.TraceEn; we need to make
> + * sure to not try to set it while VMX is on. Disable
> + * interrupts to avoid racing with pmu callbacks;
> + * concurrent PMI should be handled fine.
> + */
> + local_irq_save(flags);
> + WRITE_ONCE(pt->vmx_on, on);
> +
> + if (on) {
> + /* prevent pt_config_stop() from writing RTIT_CTL */
> + event = pt->handle.event;
> + if (event)
> + event->hw.config = 0;
> + }
> + local_irq_restore(flags);
> +}
> +EXPORT_SYMBOL_GPL(intel_pt_vmx);
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -3075,6 +3075,8 @@ static __init int vmx_disabled_by_bios(void)
>
> static void kvm_cpu_vmxon(u64 addr)
> {
> + intel_pt_vmx(1);
> +
> asm volatile (ASM_VMX_VMXON_RAX
> : : "a"(&addr), "m"(addr)
> : "memory", "cc");
> @@ -3144,6 +3146,8 @@ static void vmclear_local_loaded_vmcss(void)
> static void kvm_cpu_vmxoff(void)
> {
> asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
> +
> + intel_pt_vmx(0);
> }
Yeah so the name intel_pt_vmx() is pretty information-free because it has no verb,
only nouns - please name new functions descriptively to after what they do!
Something like intel_pt_set_vmx_state() or so?
Thanks,
Ingo
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] perf/x86/intel/pt: Don't die on VMXON
2016-04-13 9:49 ` Ingo Molnar
@ 2016-04-13 10:32 ` Alexander Shishkin
2016-04-13 10:53 ` Ingo Molnar
0 siblings, 1 reply; 12+ messages in thread
From: Alexander Shishkin @ 2016-04-13 10:32 UTC (permalink / raw)
To: Ingo Molnar
Cc: Peter Zijlstra, Gleb Natapov, Paolo Bonzini, x86, kvm,
Ingo Molnar, linux-kernel, tglx, hpa, Arnaldo Carvalho de Melo,
Borislav Petkov
Ingo Molnar <mingo@kernel.org> writes:
>> @@ -3144,6 +3146,8 @@ static void vmclear_local_loaded_vmcss(void)
>> static void kvm_cpu_vmxoff(void)
>> {
>> asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
>> +
>> + intel_pt_vmx(0);
>> }
>
> Yeah so the name intel_pt_vmx() is pretty information-free because it has no verb,
> only nouns - please name new functions descriptively to after what they do!
I do agree that it can use a better name (and this is a second attempt
already).
> Something like intel_pt_set_vmx_state() or so?
Hmm how about intel_pt_handle_vmx()? Ideally, akin to the VMXON/VMXOFF
insns, this could be two functions (intel_pt_handle_vmx{on,off}()) if
the global namespace can take it.
Regards,
--
Alex
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] perf/x86/intel/pt: Don't die on VMXON
2016-04-13 10:32 ` Alexander Shishkin
@ 2016-04-13 10:53 ` Ingo Molnar
2016-04-13 13:48 ` Alexander Shishkin
0 siblings, 1 reply; 12+ messages in thread
From: Ingo Molnar @ 2016-04-13 10:53 UTC (permalink / raw)
To: Alexander Shishkin
Cc: Peter Zijlstra, Gleb Natapov, Paolo Bonzini, x86, kvm,
Ingo Molnar, linux-kernel, tglx, hpa, Arnaldo Carvalho de Melo,
Borislav Petkov
* Alexander Shishkin <alexander.shishkin@linux.intel.com> wrote:
> Ingo Molnar <mingo@kernel.org> writes:
>
> >> @@ -3144,6 +3146,8 @@ static void vmclear_local_loaded_vmcss(void)
> >> static void kvm_cpu_vmxoff(void)
> >> {
> >> asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
> >> +
> >> + intel_pt_vmx(0);
> >> }
> >
> > Yeah so the name intel_pt_vmx() is pretty information-free because it has no verb,
> > only nouns - please name new functions descriptively to after what they do!
>
> I do agree that it can use a better name (and this is a second attempt
> already).
>
> > Something like intel_pt_set_vmx_state() or so?
>
> Hmm how about intel_pt_handle_vmx()? Ideally, akin to the VMXON/VMXOFF insns,
> this could be two functions (intel_pt_handle_vmx{on,off}()) if the global
> namespace can take it.
Sure, intel_pt_handle_vmx(0/1) sounds good too. I wouldn't split it into two
functions ...
Thanks,
Ingo
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] perf/x86/intel/pt: Don't die on VMXON
2016-04-13 10:53 ` Ingo Molnar
@ 2016-04-13 13:48 ` Alexander Shishkin
2016-04-28 10:25 ` [tip:perf/core] " tip-bot for Alexander Shishkin
0 siblings, 1 reply; 12+ messages in thread
From: Alexander Shishkin @ 2016-04-13 13:48 UTC (permalink / raw)
To: Ingo Molnar, Peter Zijlstra
Cc: Gleb Natapov, Paolo Bonzini, x86, kvm, Ingo Molnar, linux-kernel,
tglx, hpa, Arnaldo Carvalho de Melo, Borislav Petkov
Ingo Molnar <mingo@kernel.org> writes:
> * Alexander Shishkin <alexander.shishkin@linux.intel.com> wrote:
>
>> Ingo Molnar <mingo@kernel.org> writes:
>>
>> >> @@ -3144,6 +3146,8 @@ static void vmclear_local_loaded_vmcss(void)
>> >> static void kvm_cpu_vmxoff(void)
>> >> {
>> >> asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
>> >> +
>> >> + intel_pt_vmx(0);
>> >> }
>> >
>> > Yeah so the name intel_pt_vmx() is pretty information-free because it has no verb,
>> > only nouns - please name new functions descriptively to after what they do!
>>
>> I do agree that it can use a better name (and this is a second attempt
>> already).
>>
>> > Something like intel_pt_set_vmx_state() or so?
>>
>> Hmm how about intel_pt_handle_vmx()? Ideally, akin to the VMXON/VMXOFF insns,
>> this could be two functions (intel_pt_handle_vmx{on,off}()) if the global
>> namespace can take it.
>
> Sure, intel_pt_handle_vmx(0/1) sounds good too. I wouldn't split it into two
> functions ...
Ok, so I'll use this one then. Thanks!
Peter, here's an updated patch against perf/urgent. It also doesn't
introduce new ACCESS_ONCE()s any more.
>From 89b85412c46270a675cf77918fc9fda520a7bdd2 Mon Sep 17 00:00:00 2001
From: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Date: Tue, 29 Mar 2016 17:43:10 +0300
Subject: [PATCH] perf/x86/intel/pt: Don't die on VMXON
Some versions of Intel PT do not support tracing across VMXON, more
specifically, VMXON will clear TraceEn control bit and any attempt to
set it before VMXOFF will throw a #GP, which in the current state of
things will crash the kernel. Namely,
$ perf record -e intel_pt// kvm -nographic
on such a machine will kill it.
To avoid this, notify the intel_pt driver before VMXON and after
VMXOFF so that it knows when not to enable itself.
Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
---
arch/x86/events/intel/pt.c | 75 +++++++++++++++++++++++++++++++++------
arch/x86/events/intel/pt.h | 3 ++
arch/x86/include/asm/perf_event.h | 4 +++
arch/x86/kvm/vmx.c | 4 +++
4 files changed, 75 insertions(+), 11 deletions(-)
diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index 6af7cf71d6..09a77dbc73 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -136,9 +136,21 @@ static int __init pt_pmu_hw_init(void)
struct dev_ext_attribute *de_attrs;
struct attribute **attrs;
size_t size;
+ u64 reg;
int ret;
long i;
+ if (boot_cpu_has(X86_FEATURE_VMX)) {
+ /*
+ * Intel SDM, 36.5 "Tracing post-VMXON" says that
+ * "IA32_VMX_MISC[bit 14]" being 1 means PT can trace
+ * post-VMXON.
+ */
+ rdmsrl(MSR_IA32_VMX_MISC, reg);
+ if (reg & BIT(14))
+ pt_pmu.vmx = true;
+ }
+
attrs = NULL;
for (i = 0; i < PT_CPUID_LEAVES; i++) {
@@ -269,20 +281,23 @@ static void pt_config(struct perf_event *event)
reg |= (event->attr.config & PT_CONFIG_MASK);
+ event->hw.config = reg;
wrmsrl(MSR_IA32_RTIT_CTL, reg);
}
-static void pt_config_start(bool start)
+static void pt_config_stop(struct perf_event *event)
{
- u64 ctl;
+ u64 ctl = READ_ONCE(event->hw.config);
+
+ /* may be already stopped by a PMI */
+ if (!(ctl & RTIT_CTL_TRACEEN))
+ return;
- rdmsrl(MSR_IA32_RTIT_CTL, ctl);
- if (start)
- ctl |= RTIT_CTL_TRACEEN;
- else
- ctl &= ~RTIT_CTL_TRACEEN;
+ ctl &= ~RTIT_CTL_TRACEEN;
wrmsrl(MSR_IA32_RTIT_CTL, ctl);
+ WRITE_ONCE(event->hw.config, ctl);
+
/*
* A wrmsr that disables trace generation serializes other PT
* registers and causes all data packets to be written to memory,
@@ -291,8 +306,7 @@ static void pt_config_start(bool start)
* The below WMB, separating data store and aux_head store matches
* the consumer's RMB that separates aux_head load and data load.
*/
- if (!start)
- wmb();
+ wmb();
}
static void pt_config_buffer(void *buf, unsigned int topa_idx,
@@ -942,11 +956,17 @@ void intel_pt_interrupt(void)
if (!ACCESS_ONCE(pt->handle_nmi))
return;
- pt_config_start(false);
+ /*
+ * If VMX is on and PT does not support it, don't touch anything.
+ */
+ if (READ_ONCE(pt->vmx_on))
+ return;
if (!event)
return;
+ pt_config_stop(event);
+
buf = perf_get_aux(&pt->handle);
if (!buf)
return;
@@ -983,6 +1003,35 @@ void intel_pt_interrupt(void)
}
}
+void intel_pt_handle_vmx(int on)
+{
+ struct pt *pt = this_cpu_ptr(&pt_ctx);
+ struct perf_event *event;
+ unsigned long flags;
+
+ /* PT plays nice with VMX, do nothing */
+ if (pt_pmu.vmx)
+ return;
+
+ /*
+ * VMXON will clear RTIT_CTL.TraceEn; we need to make
+ * sure to not try to set it while VMX is on. Disable
+ * interrupts to avoid racing with pmu callbacks;
+ * concurrent PMI should be handled fine.
+ */
+ local_irq_save(flags);
+ WRITE_ONCE(pt->vmx_on, on);
+
+ if (on) {
+ /* prevent pt_config_stop() from writing RTIT_CTL */
+ event = pt->handle.event;
+ if (event)
+ event->hw.config = 0;
+ }
+ local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(intel_pt_handle_vmx);
+
/*
* PMU callbacks
*/
@@ -992,6 +1041,9 @@ static void pt_event_start(struct perf_event *event, int mode)
struct pt *pt = this_cpu_ptr(&pt_ctx);
struct pt_buffer *buf = perf_get_aux(&pt->handle);
+ if (READ_ONCE(pt->vmx_on))
+ return;
+
if (!buf || pt_buffer_is_full(buf, pt)) {
event->hw.state = PERF_HES_STOPPED;
return;
@@ -1014,7 +1066,8 @@ static void pt_event_stop(struct perf_event *event, int mode)
* see comment in intel_pt_interrupt().
*/
ACCESS_ONCE(pt->handle_nmi) = 0;
- pt_config_start(false);
+
+ pt_config_stop(event);
if (event->hw.state == PERF_HES_STOPPED)
return;
diff --git a/arch/x86/events/intel/pt.h b/arch/x86/events/intel/pt.h
index 336878a5d2..3abb5f5ccc 100644
--- a/arch/x86/events/intel/pt.h
+++ b/arch/x86/events/intel/pt.h
@@ -65,6 +65,7 @@ enum pt_capabilities {
struct pt_pmu {
struct pmu pmu;
u32 caps[PT_CPUID_REGS_NUM * PT_CPUID_LEAVES];
+ bool vmx;
};
/**
@@ -107,10 +108,12 @@ struct pt_buffer {
* struct pt - per-cpu pt context
* @handle: perf output handle
* @handle_nmi: do handle PT PMI on this cpu, there's an active event
+ * @vmx_on: 1 if VMX is ON on this cpu
*/
struct pt {
struct perf_output_handle handle;
int handle_nmi;
+ int vmx_on;
};
#endif /* __INTEL_PT_H__ */
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 5a2ed3ed2f..f353061bba 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -285,6 +285,10 @@ static inline void perf_events_lapic_init(void) { }
static inline void perf_check_microcode(void) { }
#endif
+#ifdef CONFIG_CPU_SUP_INTEL
+ extern void intel_pt_handle_vmx(int on);
+#endif
+
#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
extern void amd_pmu_enable_virt(void);
extern void amd_pmu_disable_virt(void);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 1735ae9d68..143648c056 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3075,6 +3075,8 @@ static __init int vmx_disabled_by_bios(void)
static void kvm_cpu_vmxon(u64 addr)
{
+ intel_pt_handle_vmx(1);
+
asm volatile (ASM_VMX_VMXON_RAX
: : "a"(&addr), "m"(addr)
: "memory", "cc");
@@ -3144,6 +3146,8 @@ static void vmclear_local_loaded_vmcss(void)
static void kvm_cpu_vmxoff(void)
{
asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
+
+ intel_pt_handle_vmx(0);
}
static void hardware_disable(void)
--
2.8.0.rc3
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [tip:perf/core] perf/x86/intel/pt: Don't die on VMXON
2016-04-13 13:48 ` Alexander Shishkin
@ 2016-04-28 10:25 ` tip-bot for Alexander Shishkin
0 siblings, 0 replies; 12+ messages in thread
From: tip-bot for Alexander Shishkin @ 2016-04-28 10:25 UTC (permalink / raw)
To: linux-tip-commits
Cc: vincent.weaver, acme, tglx, acme, pbonzini, mingo, linux-kernel,
jolsa, bp, peterz, hpa, eranian, gleb, alexander.shishkin
Commit-ID: 1c5ac21a0e9bab7fc45d0ba9e11623e9ad99d02e
Gitweb: http://git.kernel.org/tip/1c5ac21a0e9bab7fc45d0ba9e11623e9ad99d02e
Author: Alexander Shishkin <alexander.shishkin@linux.intel.com>
AuthorDate: Tue, 29 Mar 2016 17:43:10 +0300
Committer: Ingo Molnar <mingo@kernel.org>
CommitDate: Thu, 28 Apr 2016 10:32:42 +0200
perf/x86/intel/pt: Don't die on VMXON
Some versions of Intel PT do not support tracing across VMXON, more
specifically, VMXON will clear TraceEn control bit and any attempt to
set it before VMXOFF will throw a #GP, which in the current state of
things will crash the kernel. Namely:
$ perf record -e intel_pt// kvm -nographic
on such a machine will kill it.
To avoid this, notify the intel_pt driver before VMXON and after
VMXOFF so that it knows when not to enable itself.
Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Gleb Natapov <gleb@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: hpa@zytor.com
Link: http://lkml.kernel.org/r/87oa9dwrfk.fsf@ashishki-desk.ger.corp.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
arch/x86/events/intel/pt.c | 75 +++++++++++++++++++++++++++++++++------
arch/x86/events/intel/pt.h | 3 ++
arch/x86/include/asm/perf_event.h | 4 +++
arch/x86/kvm/vmx.c | 4 +++
4 files changed, 75 insertions(+), 11 deletions(-)
diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index 6af7cf7..09a77db 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -136,9 +136,21 @@ static int __init pt_pmu_hw_init(void)
struct dev_ext_attribute *de_attrs;
struct attribute **attrs;
size_t size;
+ u64 reg;
int ret;
long i;
+ if (boot_cpu_has(X86_FEATURE_VMX)) {
+ /*
+ * Intel SDM, 36.5 "Tracing post-VMXON" says that
+ * "IA32_VMX_MISC[bit 14]" being 1 means PT can trace
+ * post-VMXON.
+ */
+ rdmsrl(MSR_IA32_VMX_MISC, reg);
+ if (reg & BIT(14))
+ pt_pmu.vmx = true;
+ }
+
attrs = NULL;
for (i = 0; i < PT_CPUID_LEAVES; i++) {
@@ -269,20 +281,23 @@ static void pt_config(struct perf_event *event)
reg |= (event->attr.config & PT_CONFIG_MASK);
+ event->hw.config = reg;
wrmsrl(MSR_IA32_RTIT_CTL, reg);
}
-static void pt_config_start(bool start)
+static void pt_config_stop(struct perf_event *event)
{
- u64 ctl;
+ u64 ctl = READ_ONCE(event->hw.config);
+
+ /* may be already stopped by a PMI */
+ if (!(ctl & RTIT_CTL_TRACEEN))
+ return;
- rdmsrl(MSR_IA32_RTIT_CTL, ctl);
- if (start)
- ctl |= RTIT_CTL_TRACEEN;
- else
- ctl &= ~RTIT_CTL_TRACEEN;
+ ctl &= ~RTIT_CTL_TRACEEN;
wrmsrl(MSR_IA32_RTIT_CTL, ctl);
+ WRITE_ONCE(event->hw.config, ctl);
+
/*
* A wrmsr that disables trace generation serializes other PT
* registers and causes all data packets to be written to memory,
@@ -291,8 +306,7 @@ static void pt_config_start(bool start)
* The below WMB, separating data store and aux_head store matches
* the consumer's RMB that separates aux_head load and data load.
*/
- if (!start)
- wmb();
+ wmb();
}
static void pt_config_buffer(void *buf, unsigned int topa_idx,
@@ -942,11 +956,17 @@ void intel_pt_interrupt(void)
if (!ACCESS_ONCE(pt->handle_nmi))
return;
- pt_config_start(false);
+ /*
+ * If VMX is on and PT does not support it, don't touch anything.
+ */
+ if (READ_ONCE(pt->vmx_on))
+ return;
if (!event)
return;
+ pt_config_stop(event);
+
buf = perf_get_aux(&pt->handle);
if (!buf)
return;
@@ -983,6 +1003,35 @@ void intel_pt_interrupt(void)
}
}
+void intel_pt_handle_vmx(int on)
+{
+ struct pt *pt = this_cpu_ptr(&pt_ctx);
+ struct perf_event *event;
+ unsigned long flags;
+
+ /* PT plays nice with VMX, do nothing */
+ if (pt_pmu.vmx)
+ return;
+
+ /*
+ * VMXON will clear RTIT_CTL.TraceEn; we need to make
+ * sure to not try to set it while VMX is on. Disable
+ * interrupts to avoid racing with pmu callbacks;
+ * concurrent PMI should be handled fine.
+ */
+ local_irq_save(flags);
+ WRITE_ONCE(pt->vmx_on, on);
+
+ if (on) {
+ /* prevent pt_config_stop() from writing RTIT_CTL */
+ event = pt->handle.event;
+ if (event)
+ event->hw.config = 0;
+ }
+ local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(intel_pt_handle_vmx);
+
/*
* PMU callbacks
*/
@@ -992,6 +1041,9 @@ static void pt_event_start(struct perf_event *event, int mode)
struct pt *pt = this_cpu_ptr(&pt_ctx);
struct pt_buffer *buf = perf_get_aux(&pt->handle);
+ if (READ_ONCE(pt->vmx_on))
+ return;
+
if (!buf || pt_buffer_is_full(buf, pt)) {
event->hw.state = PERF_HES_STOPPED;
return;
@@ -1014,7 +1066,8 @@ static void pt_event_stop(struct perf_event *event, int mode)
* see comment in intel_pt_interrupt().
*/
ACCESS_ONCE(pt->handle_nmi) = 0;
- pt_config_start(false);
+
+ pt_config_stop(event);
if (event->hw.state == PERF_HES_STOPPED)
return;
diff --git a/arch/x86/events/intel/pt.h b/arch/x86/events/intel/pt.h
index 336878a..3abb5f5 100644
--- a/arch/x86/events/intel/pt.h
+++ b/arch/x86/events/intel/pt.h
@@ -65,6 +65,7 @@ enum pt_capabilities {
struct pt_pmu {
struct pmu pmu;
u32 caps[PT_CPUID_REGS_NUM * PT_CPUID_LEAVES];
+ bool vmx;
};
/**
@@ -107,10 +108,12 @@ struct pt_buffer {
* struct pt - per-cpu pt context
* @handle: perf output handle
* @handle_nmi: do handle PT PMI on this cpu, there's an active event
+ * @vmx_on: 1 if VMX is ON on this cpu
*/
struct pt {
struct perf_output_handle handle;
int handle_nmi;
+ int vmx_on;
};
#endif /* __INTEL_PT_H__ */
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 5a2ed3e..f353061 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -285,6 +285,10 @@ static inline void perf_events_lapic_init(void) { }
static inline void perf_check_microcode(void) { }
#endif
+#ifdef CONFIG_CPU_SUP_INTEL
+ extern void intel_pt_handle_vmx(int on);
+#endif
+
#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
extern void amd_pmu_enable_virt(void);
extern void amd_pmu_disable_virt(void);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index ee1c8a9..133679d 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3103,6 +3103,8 @@ static __init int vmx_disabled_by_bios(void)
static void kvm_cpu_vmxon(u64 addr)
{
+ intel_pt_handle_vmx(1);
+
asm volatile (ASM_VMX_VMXON_RAX
: : "a"(&addr), "m"(addr)
: "memory", "cc");
@@ -3172,6 +3174,8 @@ static void vmclear_local_loaded_vmcss(void)
static void kvm_cpu_vmxoff(void)
{
asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
+
+ intel_pt_handle_vmx(0);
}
static void hardware_disable(void)
^ permalink raw reply related [flat|nested] 12+ messages in thread
end of thread, other threads:[~2016-04-28 10:26 UTC | newest]
Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-04-01 16:24 [PATCH] perf/x86/intel/pt: Don't die on VMXON Alexander Shishkin
2016-04-06 8:52 ` Peter Zijlstra
2016-04-06 11:10 ` Alexander Shishkin
2016-04-06 11:52 ` Peter Zijlstra
2016-04-06 11:55 ` Borislav Petkov
2016-04-06 12:07 ` Alexander Shishkin
2016-04-06 14:06 ` Alexander Shishkin
2016-04-13 9:49 ` Ingo Molnar
2016-04-13 10:32 ` Alexander Shishkin
2016-04-13 10:53 ` Ingo Molnar
2016-04-13 13:48 ` Alexander Shishkin
2016-04-28 10:25 ` [tip:perf/core] " tip-bot for Alexander Shishkin
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).