linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] perf/x86/intel/pt: Don't die on VMXON
@ 2016-04-01 16:24 Alexander Shishkin
  2016-04-06  8:52 ` Peter Zijlstra
  0 siblings, 1 reply; 12+ messages in thread
From: Alexander Shishkin @ 2016-04-01 16:24 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Gleb Natapov, Paolo Bonzini, x86, kvm, Ingo Molnar, linux-kernel,
	tglx, hpa, Arnaldo Carvalho de Melo, Alexander Shishkin

Some versions of Intel PT do not support tracing across VMXON, more
specifically, VMXON will clear TraceEn control bit and any attempt to
set it before VMXOFF will throw a #GP, which in the current state of
things will crash the kernel. Namely,

$ perf record -e intel_pt// kvm -nographic

on such a machine will kill it.

To avoid this, notify the intel_pt driver before VMXON and after
VMXOFF so that it knows when not to enable itself.

Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
---
 arch/x86/events/intel/pt.c        | 71 +++++++++++++++++++++++++++++++++------
 arch/x86/events/intel/pt.h        |  2 ++
 arch/x86/include/asm/perf_event.h |  4 +++
 arch/x86/kvm/vmx.c                |  4 +++
 4 files changed, 70 insertions(+), 11 deletions(-)

diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index 127f58c179..a16f2ebc12 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -136,9 +136,17 @@ static int __init pt_pmu_hw_init(void)
 	struct dev_ext_attribute *de_attrs;
 	struct attribute **attrs;
 	size_t size;
+	u64 reg;
 	int ret;
 	long i;
 
+	if (test_cpu_cap(&boot_cpu_data, X86_FEATURE_VMX)) {
+		/* Intel SDM, 36.5 "Tracing post-VMXON" */
+		rdmsrl(MSR_IA32_VMX_MISC, reg);
+		if (reg & BIT(14))
+			pt_pmu.vmx = true;
+	}
+
 	attrs = NULL;
 
 	for (i = 0; i < PT_CPUID_LEAVES; i++) {
@@ -269,20 +277,23 @@ static void pt_config(struct perf_event *event)
 
 	reg |= (event->attr.config & PT_CONFIG_MASK);
 
+	event->hw.config = reg;
 	wrmsrl(MSR_IA32_RTIT_CTL, reg);
 }
 
-static void pt_config_start(bool start)
+static void pt_config_stop(struct perf_event *event)
 {
-	u64 ctl;
+	u64 ctl = READ_ONCE(event->hw.config);
 
-	rdmsrl(MSR_IA32_RTIT_CTL, ctl);
-	if (start)
-		ctl |= RTIT_CTL_TRACEEN;
-	else
-		ctl &= ~RTIT_CTL_TRACEEN;
+	/* may be already stopped by a PMI*/
+	if (!(ctl & RTIT_CTL_TRACEEN))
+		return;
+
+	ctl ^= RTIT_CTL_TRACEEN;
 	wrmsrl(MSR_IA32_RTIT_CTL, ctl);
 
+	WRITE_ONCE(event->hw.config, ctl);
+
 	/*
 	 * A wrmsr that disables trace generation serializes other PT
 	 * registers and causes all data packets to be written to memory,
@@ -291,8 +302,7 @@ static void pt_config_start(bool start)
 	 * The below WMB, separating data store and aux_head store matches
 	 * the consumer's RMB that separates aux_head load and data load.
 	 */
-	if (!start)
-		wmb();
+	wmb();
 }
 
 static void pt_config_buffer(void *buf, unsigned int topa_idx,
@@ -922,11 +932,17 @@ void intel_pt_interrupt(void)
 	if (!ACCESS_ONCE(pt->handle_nmi))
 		return;
 
-	pt_config_start(false);
+	/*
+	 * If VMX is on and PT does not support it, don't touch anything.
+	 */
+	if (ACCESS_ONCE(pt->vmx_on))
+		return;
 
 	if (!event)
 		return;
 
+	pt_config_stop(event);
+
 	buf = perf_get_aux(&pt->handle);
 	if (!buf)
 		return;
@@ -963,6 +979,35 @@ void intel_pt_interrupt(void)
 	}
 }
 
+void intel_pt_vmxon(int entry)
+{
+	struct pt *pt = this_cpu_ptr(&pt_ctx);
+	struct perf_event *event;
+	unsigned long flags;
+
+	/* PT plays nice with VMX, do nothing */
+	if (pt_pmu.vmx)
+		return;
+
+	/*
+	 * VMX entry will clear RTIT_CTL.TraceEn; we need to make
+	 * sure to not try to set it while VMX is on. Disable
+	 * interrupts to avoid racing with pmu callbacks;
+	 * concurrent PMI should be handled fine.
+	 */
+	local_irq_save(flags);
+	WRITE_ONCE(pt->vmx_on, entry);
+
+	if (entry) {
+		/* prevent pt_config_stop() from writing RTIT_CTL */
+		event = pt->handle.event;
+		if (event)
+			event->hw.config = 0;
+	}
+	local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(intel_pt_vmxon);
+
 /*
  * PMU callbacks
  */
@@ -973,6 +1018,9 @@ static void pt_event_start(struct perf_event *event, int mode)
 	struct pt *pt = this_cpu_ptr(&pt_ctx);
 	struct pt_buffer *buf;
 
+	if (ACCESS_ONCE(pt->vmx_on))
+		return;
+
 	buf = perf_aux_output_begin(&pt->handle, event);
 	if (!buf)
 		goto fail_stop;
@@ -1007,7 +1055,8 @@ static void pt_event_stop(struct perf_event *event, int mode)
 	 * see comment in intel_pt_interrupt().
 	 */
 	ACCESS_ONCE(pt->handle_nmi) = 0;
-	pt_config_start(false);
+
+	pt_config_stop(event);
 
 	if (event->hw.state == PERF_HES_STOPPED)
 		return;
diff --git a/arch/x86/events/intel/pt.h b/arch/x86/events/intel/pt.h
index 336878a5d2..b0731630cd 100644
--- a/arch/x86/events/intel/pt.h
+++ b/arch/x86/events/intel/pt.h
@@ -65,6 +65,7 @@ enum pt_capabilities {
 struct pt_pmu {
 	struct pmu		pmu;
 	u32			caps[PT_CPUID_REGS_NUM * PT_CPUID_LEAVES];
+	bool			vmx;
 };
 
 /**
@@ -111,6 +112,7 @@ struct pt_buffer {
 struct pt {
 	struct perf_output_handle handle;
 	int			handle_nmi;
+	int			vmx_on;
 };
 
 #endif /* __INTEL_PT_H__ */
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 5a2ed3ed2f..8d34c39982 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -285,6 +285,10 @@ static inline void perf_events_lapic_init(void)	{ }
 static inline void perf_check_microcode(void) { }
 #endif
 
+#ifdef CONFIG_CPU_SUP_INTEL
+ extern void intel_pt_vmxon(int entry);
+#endif
+
 #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
  extern void amd_pmu_enable_virt(void);
  extern void amd_pmu_disable_virt(void);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 1735ae9d68..744ea43b79 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3075,6 +3075,8 @@ static __init int vmx_disabled_by_bios(void)
 
 static void kvm_cpu_vmxon(u64 addr)
 {
+	intel_pt_vmxon(1);
+
 	asm volatile (ASM_VMX_VMXON_RAX
 			: : "a"(&addr), "m"(addr)
 			: "memory", "cc");
@@ -3144,6 +3146,8 @@ static void vmclear_local_loaded_vmcss(void)
 static void kvm_cpu_vmxoff(void)
 {
 	asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
+
+	intel_pt_vmxon(0);
 }
 
 static void hardware_disable(void)
-- 
2.8.0.rc3

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH] perf/x86/intel/pt: Don't die on VMXON
  2016-04-01 16:24 [PATCH] perf/x86/intel/pt: Don't die on VMXON Alexander Shishkin
@ 2016-04-06  8:52 ` Peter Zijlstra
  2016-04-06 11:10   ` Alexander Shishkin
  0 siblings, 1 reply; 12+ messages in thread
From: Peter Zijlstra @ 2016-04-06  8:52 UTC (permalink / raw)
  To: Alexander Shishkin
  Cc: Gleb Natapov, Paolo Bonzini, x86, kvm, Ingo Molnar, linux-kernel,
	tglx, hpa, Arnaldo Carvalho de Melo

On Fri, Apr 01, 2016 at 07:24:14PM +0300, Alexander Shishkin wrote:
> +static void pt_config_stop(struct perf_event *event)
>  {
> +	u64 ctl = READ_ONCE(event->hw.config);
>  
> +	/* may be already stopped by a PMI*/
> +	if (!(ctl & RTIT_CTL_TRACEEN))
> +		return;
> +
> +	ctl ^= RTIT_CTL_TRACEEN;

Would that not be much less confusing when written like |= ?

>  	wrmsrl(MSR_IA32_RTIT_CTL, ctl);
>  
> +	WRITE_ONCE(event->hw.config, ctl);
> +
>  	/*
>  	 * A wrmsr that disables trace generation serializes other PT
>  	 * registers and causes all data packets to be written to memory,


> +void intel_pt_vmxon(int entry)
> +{
> +	struct pt *pt = this_cpu_ptr(&pt_ctx);
> +	struct perf_event *event;
> +	unsigned long flags;
> +
> +	/* PT plays nice with VMX, do nothing */
> +	if (pt_pmu.vmx)
> +		return;
> +
> +	/*
> +	 * VMX entry will clear RTIT_CTL.TraceEn; we need to make
> +	 * sure to not try to set it while VMX is on. Disable
> +	 * interrupts to avoid racing with pmu callbacks;
> +	 * concurrent PMI should be handled fine.
> +	 */
> +	local_irq_save(flags);
> +	WRITE_ONCE(pt->vmx_on, entry);

So you mix: "VMX is on" and "VMX entry", please pick one.

Since the function is called vmxon, I find .entry a very confusing
argument name.

> +
> +	if (entry) {
> +		/* prevent pt_config_stop() from writing RTIT_CTL */
> +		event = pt->handle.event;
> +		if (event)
> +			event->hw.config = 0;
> +	}
> +	local_irq_restore(flags);
> +}
> +EXPORT_SYMBOL_GPL(intel_pt_vmxon);

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] perf/x86/intel/pt: Don't die on VMXON
  2016-04-06  8:52 ` Peter Zijlstra
@ 2016-04-06 11:10   ` Alexander Shishkin
  2016-04-06 11:52     ` Peter Zijlstra
  0 siblings, 1 reply; 12+ messages in thread
From: Alexander Shishkin @ 2016-04-06 11:10 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Gleb Natapov, Paolo Bonzini, x86, kvm, Ingo Molnar, linux-kernel,
	tglx, hpa, Arnaldo Carvalho de Melo

Peter Zijlstra <peterz@infradead.org> writes:

> On Fri, Apr 01, 2016 at 07:24:14PM +0300, Alexander Shishkin wrote:
>> +static void pt_config_stop(struct perf_event *event)
>>  {
>> +	u64 ctl = READ_ONCE(event->hw.config);
>>  
>> +	/* may be already stopped by a PMI*/
>> +	if (!(ctl & RTIT_CTL_TRACEEN))
>> +		return;
>> +
>> +	ctl ^= RTIT_CTL_TRACEEN;
>
> Would that not be much less confusing when written like |= ?

This one's actually clearing TraceEn, see the if-not-set-leave in front
of it, but that just goes to prove your point I guess. :)

>> +void intel_pt_vmxon(int entry)
>> +{
>> +	struct pt *pt = this_cpu_ptr(&pt_ctx);
>> +	struct perf_event *event;
>> +	unsigned long flags;
>> +
>> +	/* PT plays nice with VMX, do nothing */
>> +	if (pt_pmu.vmx)
>> +		return;
>> +
>> +	/*
>> +	 * VMX entry will clear RTIT_CTL.TraceEn; we need to make
>> +	 * sure to not try to set it while VMX is on. Disable
>> +	 * interrupts to avoid racing with pmu callbacks;
>> +	 * concurrent PMI should be handled fine.
>> +	 */
>> +	local_irq_save(flags);
>> +	WRITE_ONCE(pt->vmx_on, entry);
>
> So you mix: "VMX is on" and "VMX entry", please pick one.
>
> Since the function is called vmxon, I find .entry a very confusing
> argument name.

I did struggle with this one indeed.

How about this then:

>From 9faf95f173decda1f2e3101ced9d9370c14f2339 Mon Sep 17 00:00:00 2001
From: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Date: Tue, 29 Mar 2016 17:43:10 +0300
Subject: [PATCH] perf/x86/intel/pt: Don't die on VMXON

Some versions of Intel PT do not support tracing across VMXON, more
specifically, VMXON will clear TraceEn control bit and any attempt to
set it before VMXOFF will throw a #GP, which in the current state of
things will crash the kernel. Namely,

$ perf record -e intel_pt// kvm -nographic

on such a machine will kill it.

To avoid this, notify the intel_pt driver before VMXON and after
VMXOFF so that it knows when not to enable itself.

Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
---
 arch/x86/events/intel/pt.c        | 71 +++++++++++++++++++++++++++++++++------
 arch/x86/events/intel/pt.h        |  2 ++
 arch/x86/include/asm/perf_event.h |  4 +++
 arch/x86/kvm/vmx.c                |  4 +++
 4 files changed, 70 insertions(+), 11 deletions(-)

diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index 127f58c179..02384cc6ff 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -136,9 +136,17 @@ static int __init pt_pmu_hw_init(void)
 	struct dev_ext_attribute *de_attrs;
 	struct attribute **attrs;
 	size_t size;
+	u64 reg;
 	int ret;
 	long i;
 
+	if (test_cpu_cap(&boot_cpu_data, X86_FEATURE_VMX)) {
+		/* Intel SDM, 36.5 "Tracing post-VMXON" */
+		rdmsrl(MSR_IA32_VMX_MISC, reg);
+		if (reg & BIT(14))
+			pt_pmu.vmx = true;
+	}
+
 	attrs = NULL;
 
 	for (i = 0; i < PT_CPUID_LEAVES; i++) {
@@ -269,20 +277,23 @@ static void pt_config(struct perf_event *event)
 
 	reg |= (event->attr.config & PT_CONFIG_MASK);
 
+	event->hw.config = reg;
 	wrmsrl(MSR_IA32_RTIT_CTL, reg);
 }
 
-static void pt_config_start(bool start)
+static void pt_config_stop(struct perf_event *event)
 {
-	u64 ctl;
+	u64 ctl = READ_ONCE(event->hw.config);
 
-	rdmsrl(MSR_IA32_RTIT_CTL, ctl);
-	if (start)
-		ctl |= RTIT_CTL_TRACEEN;
-	else
-		ctl &= ~RTIT_CTL_TRACEEN;
+	/* may be already stopped by a PMI */
+	if (!(ctl & RTIT_CTL_TRACEEN))
+		return;
+
+	ctl &= ~RTIT_CTL_TRACEEN;
 	wrmsrl(MSR_IA32_RTIT_CTL, ctl);
 
+	WRITE_ONCE(event->hw.config, ctl);
+
 	/*
 	 * A wrmsr that disables trace generation serializes other PT
 	 * registers and causes all data packets to be written to memory,
@@ -291,8 +302,7 @@ static void pt_config_start(bool start)
 	 * The below WMB, separating data store and aux_head store matches
 	 * the consumer's RMB that separates aux_head load and data load.
 	 */
-	if (!start)
-		wmb();
+	wmb();
 }
 
 static void pt_config_buffer(void *buf, unsigned int topa_idx,
@@ -922,11 +932,17 @@ void intel_pt_interrupt(void)
 	if (!ACCESS_ONCE(pt->handle_nmi))
 		return;
 
-	pt_config_start(false);
+	/*
+	 * If VMX is on and PT does not support it, don't touch anything.
+	 */
+	if (ACCESS_ONCE(pt->vmx_on))
+		return;
 
 	if (!event)
 		return;
 
+	pt_config_stop(event);
+
 	buf = perf_get_aux(&pt->handle);
 	if (!buf)
 		return;
@@ -963,6 +979,35 @@ void intel_pt_interrupt(void)
 	}
 }
 
+void intel_pt_vmx(int on)
+{
+	struct pt *pt = this_cpu_ptr(&pt_ctx);
+	struct perf_event *event;
+	unsigned long flags;
+
+	/* PT plays nice with VMX, do nothing */
+	if (pt_pmu.vmx)
+		return;
+
+	/*
+	 * VMXON will clear RTIT_CTL.TraceEn; we need to make
+	 * sure to not try to set it while VMX is on. Disable
+	 * interrupts to avoid racing with pmu callbacks;
+	 * concurrent PMI should be handled fine.
+	 */
+	local_irq_save(flags);
+	WRITE_ONCE(pt->vmx_on, on);
+
+	if (on) {
+		/* prevent pt_config_stop() from writing RTIT_CTL */
+		event = pt->handle.event;
+		if (event)
+			event->hw.config = 0;
+	}
+	local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(intel_pt_vmx);
+
 /*
  * PMU callbacks
  */
@@ -973,6 +1018,9 @@ static void pt_event_start(struct perf_event *event, int mode)
 	struct pt *pt = this_cpu_ptr(&pt_ctx);
 	struct pt_buffer *buf;
 
+	if (ACCESS_ONCE(pt->vmx_on))
+		return;
+
 	buf = perf_aux_output_begin(&pt->handle, event);
 	if (!buf)
 		goto fail_stop;
@@ -1007,7 +1055,8 @@ static void pt_event_stop(struct perf_event *event, int mode)
 	 * see comment in intel_pt_interrupt().
 	 */
 	ACCESS_ONCE(pt->handle_nmi) = 0;
-	pt_config_start(false);
+
+	pt_config_stop(event);
 
 	if (event->hw.state == PERF_HES_STOPPED)
 		return;
diff --git a/arch/x86/events/intel/pt.h b/arch/x86/events/intel/pt.h
index 336878a5d2..b0731630cd 100644
--- a/arch/x86/events/intel/pt.h
+++ b/arch/x86/events/intel/pt.h
@@ -65,6 +65,7 @@ enum pt_capabilities {
 struct pt_pmu {
 	struct pmu		pmu;
 	u32			caps[PT_CPUID_REGS_NUM * PT_CPUID_LEAVES];
+	bool			vmx;
 };
 
 /**
@@ -111,6 +112,7 @@ struct pt_buffer {
 struct pt {
 	struct perf_output_handle handle;
 	int			handle_nmi;
+	int			vmx_on;
 };
 
 #endif /* __INTEL_PT_H__ */
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 5a2ed3ed2f..fcc5956f3f 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -285,6 +285,10 @@ static inline void perf_events_lapic_init(void)	{ }
 static inline void perf_check_microcode(void) { }
 #endif
 
+#ifdef CONFIG_CPU_SUP_INTEL
+ extern void intel_pt_vmx(int on);
+#endif
+
 #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
  extern void amd_pmu_enable_virt(void);
  extern void amd_pmu_disable_virt(void);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 1735ae9d68..fa8c98f5fa 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3075,6 +3075,8 @@ static __init int vmx_disabled_by_bios(void)
 
 static void kvm_cpu_vmxon(u64 addr)
 {
+	intel_pt_vmx(1);
+
 	asm volatile (ASM_VMX_VMXON_RAX
 			: : "a"(&addr), "m"(addr)
 			: "memory", "cc");
@@ -3144,6 +3146,8 @@ static void vmclear_local_loaded_vmcss(void)
 static void kvm_cpu_vmxoff(void)
 {
 	asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
+
+	intel_pt_vmx(0);
 }
 
 static void hardware_disable(void)
-- 
2.8.0.rc3

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH] perf/x86/intel/pt: Don't die on VMXON
  2016-04-06 11:10   ` Alexander Shishkin
@ 2016-04-06 11:52     ` Peter Zijlstra
  2016-04-06 11:55       ` Borislav Petkov
  2016-04-06 14:06       ` Alexander Shishkin
  0 siblings, 2 replies; 12+ messages in thread
From: Peter Zijlstra @ 2016-04-06 11:52 UTC (permalink / raw)
  To: Alexander Shishkin
  Cc: Gleb Natapov, Paolo Bonzini, x86, kvm, Ingo Molnar, linux-kernel,
	tglx, hpa, Arnaldo Carvalho de Melo, Borislav Petkov

On Wed, Apr 06, 2016 at 02:10:49PM +0300, Alexander Shishkin wrote:
> >> +	/* may be already stopped by a PMI*/
> >> +	if (!(ctl & RTIT_CTL_TRACEEN))
> >> +		return;
> >> +
> >> +	ctl ^= RTIT_CTL_TRACEEN;
> >
> > Would that not be much less confusing when written like |= ?
> 
> This one's actually clearing TraceEn, see the if-not-set-leave in front
> of it, but that just goes to prove your point I guess. :)

Hehe, indeed. So much for pretending to be awake :-)


> How about this then:

Looks good, however:

> +	if (test_cpu_cap(&boot_cpu_data, X86_FEATURE_VMX)) {

Borislav tells me this ought to be boot_cpu_has(X86_FEATURE_VMX)

> +		/* Intel SDM, 36.5 "Tracing post-VMXON" */
> +		rdmsrl(MSR_IA32_VMX_MISC, reg);
> +		if (reg & BIT(14))
> +			pt_pmu.vmx = true;
> +	}

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] perf/x86/intel/pt: Don't die on VMXON
  2016-04-06 11:52     ` Peter Zijlstra
@ 2016-04-06 11:55       ` Borislav Petkov
  2016-04-06 12:07         ` Alexander Shishkin
  2016-04-06 14:06       ` Alexander Shishkin
  1 sibling, 1 reply; 12+ messages in thread
From: Borislav Petkov @ 2016-04-06 11:55 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Alexander Shishkin, Gleb Natapov, Paolo Bonzini, x86, kvm,
	Ingo Molnar, linux-kernel, tglx, hpa, Arnaldo Carvalho de Melo

On Wed, Apr 06, 2016 at 01:52:15PM +0200, Peter Zijlstra wrote:
> Borislav tells me this ought to be boot_cpu_has(X86_FEATURE_VMX)
> 
> > +		/* Intel SDM, 36.5 "Tracing post-VMXON" */
> > +		rdmsrl(MSR_IA32_VMX_MISC, reg);
> > +		if (reg & BIT(14))

Also, I needz to consult my crystal ball about what bit 14 is...

:-)

-- 
Regards/Gruss,
    Boris.

ECO tip #101: Trim your mails when you reply.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] perf/x86/intel/pt: Don't die on VMXON
  2016-04-06 11:55       ` Borislav Petkov
@ 2016-04-06 12:07         ` Alexander Shishkin
  0 siblings, 0 replies; 12+ messages in thread
From: Alexander Shishkin @ 2016-04-06 12:07 UTC (permalink / raw)
  To: Borislav Petkov, Peter Zijlstra
  Cc: Gleb Natapov, Paolo Bonzini, x86, kvm, Ingo Molnar, linux-kernel,
	tglx, hpa, Arnaldo Carvalho de Melo

Borislav Petkov <bp@alien8.de> writes:

> On Wed, Apr 06, 2016 at 01:52:15PM +0200, Peter Zijlstra wrote:
>> Borislav tells me this ought to be boot_cpu_has(X86_FEATURE_VMX)
>> 
>> > +		/* Intel SDM, 36.5 "Tracing post-VMXON" */
>> > +		rdmsrl(MSR_IA32_VMX_MISC, reg);
>> > +		if (reg & BIT(14))
>
> Also, I needz to consult my crystal ball about what bit 14 is...
>
> :-)

I know. It's right there in SDM chapter 36.5, referred to as "bit 14"
and it's not mentioned anywhere else that I could find, so I just left a
comment there.

Regards,
--
Alex

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] perf/x86/intel/pt: Don't die on VMXON
  2016-04-06 11:52     ` Peter Zijlstra
  2016-04-06 11:55       ` Borislav Petkov
@ 2016-04-06 14:06       ` Alexander Shishkin
  2016-04-13  9:49         ` Ingo Molnar
  1 sibling, 1 reply; 12+ messages in thread
From: Alexander Shishkin @ 2016-04-06 14:06 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Gleb Natapov, Paolo Bonzini, x86, kvm, Ingo Molnar, linux-kernel,
	tglx, hpa, Arnaldo Carvalho de Melo, Borislav Petkov

Peter Zijlstra <peterz@infradead.org> writes:

> Looks good, however:
>
>> +	if (test_cpu_cap(&boot_cpu_data, X86_FEATURE_VMX)) {
>
> Borislav tells me this ought to be boot_cpu_has(X86_FEATURE_VMX)

Right. I copied it from another place in the driver, which I'll then
patch as well.

3x a charm:

>From 1ac911d5096af88217b3a26b26d594b02f0db614 Mon Sep 17 00:00:00 2001
From: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Date: Tue, 29 Mar 2016 17:43:10 +0300
Subject: [PATCH] perf/x86/intel/pt: Don't die on VMXON

Some versions of Intel PT do not support tracing across VMXON, more
specifically, VMXON will clear TraceEn control bit and any attempt to
set it before VMXOFF will throw a #GP, which in the current state of
things will crash the kernel. Namely,

$ perf record -e intel_pt// kvm -nographic

on such a machine will kill it.

To avoid this, notify the intel_pt driver before VMXON and after
VMXOFF so that it knows when not to enable itself.

Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
---
 arch/x86/events/intel/pt.c        | 75 +++++++++++++++++++++++++++++++++------
 arch/x86/events/intel/pt.h        |  2 ++
 arch/x86/include/asm/perf_event.h |  4 +++
 arch/x86/kvm/vmx.c                |  4 +++
 4 files changed, 74 insertions(+), 11 deletions(-)

diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index 127f58c179..32b613e863 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -136,9 +136,21 @@ static int __init pt_pmu_hw_init(void)
 	struct dev_ext_attribute *de_attrs;
 	struct attribute **attrs;
 	size_t size;
+	u64 reg;
 	int ret;
 	long i;
 
+	if (boot_cpu_has(X86_FEATURE_VMX)) {
+		/*
+		 * Intel SDM, 36.5 "Tracing post-VMXON" says that
+		 * "IA32_VMX_MISC[bit 14]" being 1 means PT can trace
+		 * post-VMXON.
+		 */
+		rdmsrl(MSR_IA32_VMX_MISC, reg);
+		if (reg & BIT(14))
+			pt_pmu.vmx = true;
+	}
+
 	attrs = NULL;
 
 	for (i = 0; i < PT_CPUID_LEAVES; i++) {
@@ -269,20 +281,23 @@ static void pt_config(struct perf_event *event)
 
 	reg |= (event->attr.config & PT_CONFIG_MASK);
 
+	event->hw.config = reg;
 	wrmsrl(MSR_IA32_RTIT_CTL, reg);
 }
 
-static void pt_config_start(bool start)
+static void pt_config_stop(struct perf_event *event)
 {
-	u64 ctl;
+	u64 ctl = READ_ONCE(event->hw.config);
+
+	/* may be already stopped by a PMI */
+	if (!(ctl & RTIT_CTL_TRACEEN))
+		return;
 
-	rdmsrl(MSR_IA32_RTIT_CTL, ctl);
-	if (start)
-		ctl |= RTIT_CTL_TRACEEN;
-	else
-		ctl &= ~RTIT_CTL_TRACEEN;
+	ctl &= ~RTIT_CTL_TRACEEN;
 	wrmsrl(MSR_IA32_RTIT_CTL, ctl);
 
+	WRITE_ONCE(event->hw.config, ctl);
+
 	/*
 	 * A wrmsr that disables trace generation serializes other PT
 	 * registers and causes all data packets to be written to memory,
@@ -291,8 +306,7 @@ static void pt_config_start(bool start)
 	 * The below WMB, separating data store and aux_head store matches
 	 * the consumer's RMB that separates aux_head load and data load.
 	 */
-	if (!start)
-		wmb();
+	wmb();
 }
 
 static void pt_config_buffer(void *buf, unsigned int topa_idx,
@@ -922,11 +936,17 @@ void intel_pt_interrupt(void)
 	if (!ACCESS_ONCE(pt->handle_nmi))
 		return;
 
-	pt_config_start(false);
+	/*
+	 * If VMX is on and PT does not support it, don't touch anything.
+	 */
+	if (ACCESS_ONCE(pt->vmx_on))
+		return;
 
 	if (!event)
 		return;
 
+	pt_config_stop(event);
+
 	buf = perf_get_aux(&pt->handle);
 	if (!buf)
 		return;
@@ -963,6 +983,35 @@ void intel_pt_interrupt(void)
 	}
 }
 
+void intel_pt_vmx(int on)
+{
+	struct pt *pt = this_cpu_ptr(&pt_ctx);
+	struct perf_event *event;
+	unsigned long flags;
+
+	/* PT plays nice with VMX, do nothing */
+	if (pt_pmu.vmx)
+		return;
+
+	/*
+	 * VMXON will clear RTIT_CTL.TraceEn; we need to make
+	 * sure to not try to set it while VMX is on. Disable
+	 * interrupts to avoid racing with pmu callbacks;
+	 * concurrent PMI should be handled fine.
+	 */
+	local_irq_save(flags);
+	WRITE_ONCE(pt->vmx_on, on);
+
+	if (on) {
+		/* prevent pt_config_stop() from writing RTIT_CTL */
+		event = pt->handle.event;
+		if (event)
+			event->hw.config = 0;
+	}
+	local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(intel_pt_vmx);
+
 /*
  * PMU callbacks
  */
@@ -973,6 +1022,9 @@ static void pt_event_start(struct perf_event *event, int mode)
 	struct pt *pt = this_cpu_ptr(&pt_ctx);
 	struct pt_buffer *buf;
 
+	if (ACCESS_ONCE(pt->vmx_on))
+		return;
+
 	buf = perf_aux_output_begin(&pt->handle, event);
 	if (!buf)
 		goto fail_stop;
@@ -1007,7 +1059,8 @@ static void pt_event_stop(struct perf_event *event, int mode)
 	 * see comment in intel_pt_interrupt().
 	 */
 	ACCESS_ONCE(pt->handle_nmi) = 0;
-	pt_config_start(false);
+
+	pt_config_stop(event);
 
 	if (event->hw.state == PERF_HES_STOPPED)
 		return;
diff --git a/arch/x86/events/intel/pt.h b/arch/x86/events/intel/pt.h
index 336878a5d2..b0731630cd 100644
--- a/arch/x86/events/intel/pt.h
+++ b/arch/x86/events/intel/pt.h
@@ -65,6 +65,7 @@ enum pt_capabilities {
 struct pt_pmu {
 	struct pmu		pmu;
 	u32			caps[PT_CPUID_REGS_NUM * PT_CPUID_LEAVES];
+	bool			vmx;
 };
 
 /**
@@ -111,6 +112,7 @@ struct pt_buffer {
 struct pt {
 	struct perf_output_handle handle;
 	int			handle_nmi;
+	int			vmx_on;
 };
 
 #endif /* __INTEL_PT_H__ */
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 5a2ed3ed2f..fcc5956f3f 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -285,6 +285,10 @@ static inline void perf_events_lapic_init(void)	{ }
 static inline void perf_check_microcode(void) { }
 #endif
 
+#ifdef CONFIG_CPU_SUP_INTEL
+ extern void intel_pt_vmx(int on);
+#endif
+
 #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
  extern void amd_pmu_enable_virt(void);
  extern void amd_pmu_disable_virt(void);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 1735ae9d68..fa8c98f5fa 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3075,6 +3075,8 @@ static __init int vmx_disabled_by_bios(void)
 
 static void kvm_cpu_vmxon(u64 addr)
 {
+	intel_pt_vmx(1);
+
 	asm volatile (ASM_VMX_VMXON_RAX
 			: : "a"(&addr), "m"(addr)
 			: "memory", "cc");
@@ -3144,6 +3146,8 @@ static void vmclear_local_loaded_vmcss(void)
 static void kvm_cpu_vmxoff(void)
 {
 	asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
+
+	intel_pt_vmx(0);
 }
 
 static void hardware_disable(void)
-- 
2.8.0.rc3

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH] perf/x86/intel/pt: Don't die on VMXON
  2016-04-06 14:06       ` Alexander Shishkin
@ 2016-04-13  9:49         ` Ingo Molnar
  2016-04-13 10:32           ` Alexander Shishkin
  0 siblings, 1 reply; 12+ messages in thread
From: Ingo Molnar @ 2016-04-13  9:49 UTC (permalink / raw)
  To: Alexander Shishkin
  Cc: Peter Zijlstra, Gleb Natapov, Paolo Bonzini, x86, kvm,
	Ingo Molnar, linux-kernel, tglx, hpa, Arnaldo Carvalho de Melo,
	Borislav Petkov


* Alexander Shishkin <alexander.shishkin@linux.intel.com> wrote:

> Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
> ---
>  arch/x86/events/intel/pt.c        | 75 +++++++++++++++++++++++++++++++++------
>  arch/x86/events/intel/pt.h        |  2 ++
>  arch/x86/include/asm/perf_event.h |  4 +++
>  arch/x86/kvm/vmx.c                |  4 +++
>  4 files changed, 74 insertions(+), 11 deletions(-)

> +void intel_pt_vmx(int on)
> +{
> +	struct pt *pt = this_cpu_ptr(&pt_ctx);
> +	struct perf_event *event;
> +	unsigned long flags;
> +
> +	/* PT plays nice with VMX, do nothing */
> +	if (pt_pmu.vmx)
> +		return;
> +
> +	/*
> +	 * VMXON will clear RTIT_CTL.TraceEn; we need to make
> +	 * sure to not try to set it while VMX is on. Disable
> +	 * interrupts to avoid racing with pmu callbacks;
> +	 * concurrent PMI should be handled fine.
> +	 */
> +	local_irq_save(flags);
> +	WRITE_ONCE(pt->vmx_on, on);
> +
> +	if (on) {
> +		/* prevent pt_config_stop() from writing RTIT_CTL */
> +		event = pt->handle.event;
> +		if (event)
> +			event->hw.config = 0;
> +	}
> +	local_irq_restore(flags);
> +}
> +EXPORT_SYMBOL_GPL(intel_pt_vmx);

> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -3075,6 +3075,8 @@ static __init int vmx_disabled_by_bios(void)
>  
>  static void kvm_cpu_vmxon(u64 addr)
>  {
> +	intel_pt_vmx(1);
> +
>  	asm volatile (ASM_VMX_VMXON_RAX
>  			: : "a"(&addr), "m"(addr)
>  			: "memory", "cc");
> @@ -3144,6 +3146,8 @@ static void vmclear_local_loaded_vmcss(void)
>  static void kvm_cpu_vmxoff(void)
>  {
>  	asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
> +
> +	intel_pt_vmx(0);
>  }

Yeah so the name intel_pt_vmx() is pretty information-free because it has no verb, 
only nouns - please name new functions descriptively to after what they do!

Something like intel_pt_set_vmx_state() or so?

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] perf/x86/intel/pt: Don't die on VMXON
  2016-04-13  9:49         ` Ingo Molnar
@ 2016-04-13 10:32           ` Alexander Shishkin
  2016-04-13 10:53             ` Ingo Molnar
  0 siblings, 1 reply; 12+ messages in thread
From: Alexander Shishkin @ 2016-04-13 10:32 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Peter Zijlstra, Gleb Natapov, Paolo Bonzini, x86, kvm,
	Ingo Molnar, linux-kernel, tglx, hpa, Arnaldo Carvalho de Melo,
	Borislav Petkov

Ingo Molnar <mingo@kernel.org> writes:

>> @@ -3144,6 +3146,8 @@ static void vmclear_local_loaded_vmcss(void)
>>  static void kvm_cpu_vmxoff(void)
>>  {
>>  	asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
>> +
>> +	intel_pt_vmx(0);
>>  }
>
> Yeah so the name intel_pt_vmx() is pretty information-free because it has no verb, 
> only nouns - please name new functions descriptively to after what they do!

I do agree that it can use a better name (and this is a second attempt
already).

> Something like intel_pt_set_vmx_state() or so?

Hmm how about intel_pt_handle_vmx()? Ideally, akin to the VMXON/VMXOFF
insns, this could be two functions (intel_pt_handle_vmx{on,off}()) if
the global namespace can take it.

Regards,
--
Alex

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] perf/x86/intel/pt: Don't die on VMXON
  2016-04-13 10:32           ` Alexander Shishkin
@ 2016-04-13 10:53             ` Ingo Molnar
  2016-04-13 13:48               ` Alexander Shishkin
  0 siblings, 1 reply; 12+ messages in thread
From: Ingo Molnar @ 2016-04-13 10:53 UTC (permalink / raw)
  To: Alexander Shishkin
  Cc: Peter Zijlstra, Gleb Natapov, Paolo Bonzini, x86, kvm,
	Ingo Molnar, linux-kernel, tglx, hpa, Arnaldo Carvalho de Melo,
	Borislav Petkov


* Alexander Shishkin <alexander.shishkin@linux.intel.com> wrote:

> Ingo Molnar <mingo@kernel.org> writes:
> 
> >> @@ -3144,6 +3146,8 @@ static void vmclear_local_loaded_vmcss(void)
> >>  static void kvm_cpu_vmxoff(void)
> >>  {
> >>  	asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
> >> +
> >> +	intel_pt_vmx(0);
> >>  }
> >
> > Yeah so the name intel_pt_vmx() is pretty information-free because it has no verb, 
> > only nouns - please name new functions descriptively to after what they do!
> 
> I do agree that it can use a better name (and this is a second attempt
> already).
> 
> > Something like intel_pt_set_vmx_state() or so?
> 
> Hmm how about intel_pt_handle_vmx()? Ideally, akin to the VMXON/VMXOFF insns, 
> this could be two functions (intel_pt_handle_vmx{on,off}()) if the global 
> namespace can take it.

Sure, intel_pt_handle_vmx(0/1) sounds good too. I wouldn't split it into two 
functions ...

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] perf/x86/intel/pt: Don't die on VMXON
  2016-04-13 10:53             ` Ingo Molnar
@ 2016-04-13 13:48               ` Alexander Shishkin
  2016-04-28 10:25                 ` [tip:perf/core] " tip-bot for Alexander Shishkin
  0 siblings, 1 reply; 12+ messages in thread
From: Alexander Shishkin @ 2016-04-13 13:48 UTC (permalink / raw)
  To: Ingo Molnar, Peter Zijlstra
  Cc: Gleb Natapov, Paolo Bonzini, x86, kvm, Ingo Molnar, linux-kernel,
	tglx, hpa, Arnaldo Carvalho de Melo, Borislav Petkov

Ingo Molnar <mingo@kernel.org> writes:

> * Alexander Shishkin <alexander.shishkin@linux.intel.com> wrote:
>
>> Ingo Molnar <mingo@kernel.org> writes:
>> 
>> >> @@ -3144,6 +3146,8 @@ static void vmclear_local_loaded_vmcss(void)
>> >>  static void kvm_cpu_vmxoff(void)
>> >>  {
>> >>  	asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
>> >> +
>> >> +	intel_pt_vmx(0);
>> >>  }
>> >
>> > Yeah so the name intel_pt_vmx() is pretty information-free because it has no verb, 
>> > only nouns - please name new functions descriptively to after what they do!
>> 
>> I do agree that it can use a better name (and this is a second attempt
>> already).
>> 
>> > Something like intel_pt_set_vmx_state() or so?
>> 
>> Hmm how about intel_pt_handle_vmx()? Ideally, akin to the VMXON/VMXOFF insns, 
>> this could be two functions (intel_pt_handle_vmx{on,off}()) if the global 
>> namespace can take it.
>
> Sure, intel_pt_handle_vmx(0/1) sounds good too. I wouldn't split it into two 
> functions ...

Ok, so I'll use this one then. Thanks!

Peter, here's an updated patch against perf/urgent. It also doesn't
introduce new ACCESS_ONCE()s any more.

>From 89b85412c46270a675cf77918fc9fda520a7bdd2 Mon Sep 17 00:00:00 2001
From: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Date: Tue, 29 Mar 2016 17:43:10 +0300
Subject: [PATCH] perf/x86/intel/pt: Don't die on VMXON

Some versions of Intel PT do not support tracing across VMXON, more
specifically, VMXON will clear TraceEn control bit and any attempt to
set it before VMXOFF will throw a #GP, which in the current state of
things will crash the kernel. Namely,

$ perf record -e intel_pt// kvm -nographic

on such a machine will kill it.

To avoid this, notify the intel_pt driver before VMXON and after
VMXOFF so that it knows when not to enable itself.

Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
---
 arch/x86/events/intel/pt.c        | 75 +++++++++++++++++++++++++++++++++------
 arch/x86/events/intel/pt.h        |  3 ++
 arch/x86/include/asm/perf_event.h |  4 +++
 arch/x86/kvm/vmx.c                |  4 +++
 4 files changed, 75 insertions(+), 11 deletions(-)

diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index 6af7cf71d6..09a77dbc73 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -136,9 +136,21 @@ static int __init pt_pmu_hw_init(void)
 	struct dev_ext_attribute *de_attrs;
 	struct attribute **attrs;
 	size_t size;
+	u64 reg;
 	int ret;
 	long i;
 
+	if (boot_cpu_has(X86_FEATURE_VMX)) {
+		/*
+		 * Intel SDM, 36.5 "Tracing post-VMXON" says that
+		 * "IA32_VMX_MISC[bit 14]" being 1 means PT can trace
+		 * post-VMXON.
+		 */
+		rdmsrl(MSR_IA32_VMX_MISC, reg);
+		if (reg & BIT(14))
+			pt_pmu.vmx = true;
+	}
+
 	attrs = NULL;
 
 	for (i = 0; i < PT_CPUID_LEAVES; i++) {
@@ -269,20 +281,23 @@ static void pt_config(struct perf_event *event)
 
 	reg |= (event->attr.config & PT_CONFIG_MASK);
 
+	event->hw.config = reg;
 	wrmsrl(MSR_IA32_RTIT_CTL, reg);
 }
 
-static void pt_config_start(bool start)
+static void pt_config_stop(struct perf_event *event)
 {
-	u64 ctl;
+	u64 ctl = READ_ONCE(event->hw.config);
+
+	/* may be already stopped by a PMI */
+	if (!(ctl & RTIT_CTL_TRACEEN))
+		return;
 
-	rdmsrl(MSR_IA32_RTIT_CTL, ctl);
-	if (start)
-		ctl |= RTIT_CTL_TRACEEN;
-	else
-		ctl &= ~RTIT_CTL_TRACEEN;
+	ctl &= ~RTIT_CTL_TRACEEN;
 	wrmsrl(MSR_IA32_RTIT_CTL, ctl);
 
+	WRITE_ONCE(event->hw.config, ctl);
+
 	/*
 	 * A wrmsr that disables trace generation serializes other PT
 	 * registers and causes all data packets to be written to memory,
@@ -291,8 +306,7 @@ static void pt_config_start(bool start)
 	 * The below WMB, separating data store and aux_head store matches
 	 * the consumer's RMB that separates aux_head load and data load.
 	 */
-	if (!start)
-		wmb();
+	wmb();
 }
 
 static void pt_config_buffer(void *buf, unsigned int topa_idx,
@@ -942,11 +956,17 @@ void intel_pt_interrupt(void)
 	if (!ACCESS_ONCE(pt->handle_nmi))
 		return;
 
-	pt_config_start(false);
+	/*
+	 * If VMX is on and PT does not support it, don't touch anything.
+	 */
+	if (READ_ONCE(pt->vmx_on))
+		return;
 
 	if (!event)
 		return;
 
+	pt_config_stop(event);
+
 	buf = perf_get_aux(&pt->handle);
 	if (!buf)
 		return;
@@ -983,6 +1003,35 @@ void intel_pt_interrupt(void)
 	}
 }
 
+void intel_pt_handle_vmx(int on)
+{
+	struct pt *pt = this_cpu_ptr(&pt_ctx);
+	struct perf_event *event;
+	unsigned long flags;
+
+	/* PT plays nice with VMX, do nothing */
+	if (pt_pmu.vmx)
+		return;
+
+	/*
+	 * VMXON will clear RTIT_CTL.TraceEn; we need to make
+	 * sure to not try to set it while VMX is on. Disable
+	 * interrupts to avoid racing with pmu callbacks;
+	 * concurrent PMI should be handled fine.
+	 */
+	local_irq_save(flags);
+	WRITE_ONCE(pt->vmx_on, on);
+
+	if (on) {
+		/* prevent pt_config_stop() from writing RTIT_CTL */
+		event = pt->handle.event;
+		if (event)
+			event->hw.config = 0;
+	}
+	local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(intel_pt_handle_vmx);
+
 /*
  * PMU callbacks
  */
@@ -992,6 +1041,9 @@ static void pt_event_start(struct perf_event *event, int mode)
 	struct pt *pt = this_cpu_ptr(&pt_ctx);
 	struct pt_buffer *buf = perf_get_aux(&pt->handle);
 
+	if (READ_ONCE(pt->vmx_on))
+		return;
+
 	if (!buf || pt_buffer_is_full(buf, pt)) {
 		event->hw.state = PERF_HES_STOPPED;
 		return;
@@ -1014,7 +1066,8 @@ static void pt_event_stop(struct perf_event *event, int mode)
 	 * see comment in intel_pt_interrupt().
 	 */
 	ACCESS_ONCE(pt->handle_nmi) = 0;
-	pt_config_start(false);
+
+	pt_config_stop(event);
 
 	if (event->hw.state == PERF_HES_STOPPED)
 		return;
diff --git a/arch/x86/events/intel/pt.h b/arch/x86/events/intel/pt.h
index 336878a5d2..3abb5f5ccc 100644
--- a/arch/x86/events/intel/pt.h
+++ b/arch/x86/events/intel/pt.h
@@ -65,6 +65,7 @@ enum pt_capabilities {
 struct pt_pmu {
 	struct pmu		pmu;
 	u32			caps[PT_CPUID_REGS_NUM * PT_CPUID_LEAVES];
+	bool			vmx;
 };
 
 /**
@@ -107,10 +108,12 @@ struct pt_buffer {
  * struct pt - per-cpu pt context
  * @handle:	perf output handle
  * @handle_nmi:	do handle PT PMI on this cpu, there's an active event
+ * @vmx_on:	1 if VMX is ON on this cpu
  */
 struct pt {
 	struct perf_output_handle handle;
 	int			handle_nmi;
+	int			vmx_on;
 };
 
 #endif /* __INTEL_PT_H__ */
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 5a2ed3ed2f..f353061bba 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -285,6 +285,10 @@ static inline void perf_events_lapic_init(void)	{ }
 static inline void perf_check_microcode(void) { }
 #endif
 
+#ifdef CONFIG_CPU_SUP_INTEL
+ extern void intel_pt_handle_vmx(int on);
+#endif
+
 #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
  extern void amd_pmu_enable_virt(void);
  extern void amd_pmu_disable_virt(void);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 1735ae9d68..143648c056 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3075,6 +3075,8 @@ static __init int vmx_disabled_by_bios(void)
 
 static void kvm_cpu_vmxon(u64 addr)
 {
+	intel_pt_handle_vmx(1);
+
 	asm volatile (ASM_VMX_VMXON_RAX
 			: : "a"(&addr), "m"(addr)
 			: "memory", "cc");
@@ -3144,6 +3146,8 @@ static void vmclear_local_loaded_vmcss(void)
 static void kvm_cpu_vmxoff(void)
 {
 	asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
+
+	intel_pt_handle_vmx(0);
 }
 
 static void hardware_disable(void)
-- 
2.8.0.rc3

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [tip:perf/core] perf/x86/intel/pt: Don't die on VMXON
  2016-04-13 13:48               ` Alexander Shishkin
@ 2016-04-28 10:25                 ` tip-bot for Alexander Shishkin
  0 siblings, 0 replies; 12+ messages in thread
From: tip-bot for Alexander Shishkin @ 2016-04-28 10:25 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: vincent.weaver, acme, tglx, acme, pbonzini, mingo, linux-kernel,
	jolsa, bp, peterz, hpa, eranian, gleb, alexander.shishkin

Commit-ID:  1c5ac21a0e9bab7fc45d0ba9e11623e9ad99d02e
Gitweb:     http://git.kernel.org/tip/1c5ac21a0e9bab7fc45d0ba9e11623e9ad99d02e
Author:     Alexander Shishkin <alexander.shishkin@linux.intel.com>
AuthorDate: Tue, 29 Mar 2016 17:43:10 +0300
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Thu, 28 Apr 2016 10:32:42 +0200

perf/x86/intel/pt: Don't die on VMXON

Some versions of Intel PT do not support tracing across VMXON, more
specifically, VMXON will clear TraceEn control bit and any attempt to
set it before VMXOFF will throw a #GP, which in the current state of
things will crash the kernel. Namely:

  $ perf record -e intel_pt// kvm -nographic

on such a machine will kill it.

To avoid this, notify the intel_pt driver before VMXON and after
VMXOFF so that it knows when not to enable itself.

Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Gleb Natapov <gleb@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: hpa@zytor.com
Link: http://lkml.kernel.org/r/87oa9dwrfk.fsf@ashishki-desk.ger.corp.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/intel/pt.c        | 75 +++++++++++++++++++++++++++++++++------
 arch/x86/events/intel/pt.h        |  3 ++
 arch/x86/include/asm/perf_event.h |  4 +++
 arch/x86/kvm/vmx.c                |  4 +++
 4 files changed, 75 insertions(+), 11 deletions(-)

diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index 6af7cf7..09a77db 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -136,9 +136,21 @@ static int __init pt_pmu_hw_init(void)
 	struct dev_ext_attribute *de_attrs;
 	struct attribute **attrs;
 	size_t size;
+	u64 reg;
 	int ret;
 	long i;
 
+	if (boot_cpu_has(X86_FEATURE_VMX)) {
+		/*
+		 * Intel SDM, 36.5 "Tracing post-VMXON" says that
+		 * "IA32_VMX_MISC[bit 14]" being 1 means PT can trace
+		 * post-VMXON.
+		 */
+		rdmsrl(MSR_IA32_VMX_MISC, reg);
+		if (reg & BIT(14))
+			pt_pmu.vmx = true;
+	}
+
 	attrs = NULL;
 
 	for (i = 0; i < PT_CPUID_LEAVES; i++) {
@@ -269,20 +281,23 @@ static void pt_config(struct perf_event *event)
 
 	reg |= (event->attr.config & PT_CONFIG_MASK);
 
+	event->hw.config = reg;
 	wrmsrl(MSR_IA32_RTIT_CTL, reg);
 }
 
-static void pt_config_start(bool start)
+static void pt_config_stop(struct perf_event *event)
 {
-	u64 ctl;
+	u64 ctl = READ_ONCE(event->hw.config);
+
+	/* may be already stopped by a PMI */
+	if (!(ctl & RTIT_CTL_TRACEEN))
+		return;
 
-	rdmsrl(MSR_IA32_RTIT_CTL, ctl);
-	if (start)
-		ctl |= RTIT_CTL_TRACEEN;
-	else
-		ctl &= ~RTIT_CTL_TRACEEN;
+	ctl &= ~RTIT_CTL_TRACEEN;
 	wrmsrl(MSR_IA32_RTIT_CTL, ctl);
 
+	WRITE_ONCE(event->hw.config, ctl);
+
 	/*
 	 * A wrmsr that disables trace generation serializes other PT
 	 * registers and causes all data packets to be written to memory,
@@ -291,8 +306,7 @@ static void pt_config_start(bool start)
 	 * The below WMB, separating data store and aux_head store matches
 	 * the consumer's RMB that separates aux_head load and data load.
 	 */
-	if (!start)
-		wmb();
+	wmb();
 }
 
 static void pt_config_buffer(void *buf, unsigned int topa_idx,
@@ -942,11 +956,17 @@ void intel_pt_interrupt(void)
 	if (!ACCESS_ONCE(pt->handle_nmi))
 		return;
 
-	pt_config_start(false);
+	/*
+	 * If VMX is on and PT does not support it, don't touch anything.
+	 */
+	if (READ_ONCE(pt->vmx_on))
+		return;
 
 	if (!event)
 		return;
 
+	pt_config_stop(event);
+
 	buf = perf_get_aux(&pt->handle);
 	if (!buf)
 		return;
@@ -983,6 +1003,35 @@ void intel_pt_interrupt(void)
 	}
 }
 
+void intel_pt_handle_vmx(int on)
+{
+	struct pt *pt = this_cpu_ptr(&pt_ctx);
+	struct perf_event *event;
+	unsigned long flags;
+
+	/* PT plays nice with VMX, do nothing */
+	if (pt_pmu.vmx)
+		return;
+
+	/*
+	 * VMXON will clear RTIT_CTL.TraceEn; we need to make
+	 * sure to not try to set it while VMX is on. Disable
+	 * interrupts to avoid racing with pmu callbacks;
+	 * concurrent PMI should be handled fine.
+	 */
+	local_irq_save(flags);
+	WRITE_ONCE(pt->vmx_on, on);
+
+	if (on) {
+		/* prevent pt_config_stop() from writing RTIT_CTL */
+		event = pt->handle.event;
+		if (event)
+			event->hw.config = 0;
+	}
+	local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(intel_pt_handle_vmx);
+
 /*
  * PMU callbacks
  */
@@ -992,6 +1041,9 @@ static void pt_event_start(struct perf_event *event, int mode)
 	struct pt *pt = this_cpu_ptr(&pt_ctx);
 	struct pt_buffer *buf = perf_get_aux(&pt->handle);
 
+	if (READ_ONCE(pt->vmx_on))
+		return;
+
 	if (!buf || pt_buffer_is_full(buf, pt)) {
 		event->hw.state = PERF_HES_STOPPED;
 		return;
@@ -1014,7 +1066,8 @@ static void pt_event_stop(struct perf_event *event, int mode)
 	 * see comment in intel_pt_interrupt().
 	 */
 	ACCESS_ONCE(pt->handle_nmi) = 0;
-	pt_config_start(false);
+
+	pt_config_stop(event);
 
 	if (event->hw.state == PERF_HES_STOPPED)
 		return;
diff --git a/arch/x86/events/intel/pt.h b/arch/x86/events/intel/pt.h
index 336878a..3abb5f5 100644
--- a/arch/x86/events/intel/pt.h
+++ b/arch/x86/events/intel/pt.h
@@ -65,6 +65,7 @@ enum pt_capabilities {
 struct pt_pmu {
 	struct pmu		pmu;
 	u32			caps[PT_CPUID_REGS_NUM * PT_CPUID_LEAVES];
+	bool			vmx;
 };
 
 /**
@@ -107,10 +108,12 @@ struct pt_buffer {
  * struct pt - per-cpu pt context
  * @handle:	perf output handle
  * @handle_nmi:	do handle PT PMI on this cpu, there's an active event
+ * @vmx_on:	1 if VMX is ON on this cpu
  */
 struct pt {
 	struct perf_output_handle handle;
 	int			handle_nmi;
+	int			vmx_on;
 };
 
 #endif /* __INTEL_PT_H__ */
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 5a2ed3e..f353061 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -285,6 +285,10 @@ static inline void perf_events_lapic_init(void)	{ }
 static inline void perf_check_microcode(void) { }
 #endif
 
+#ifdef CONFIG_CPU_SUP_INTEL
+ extern void intel_pt_handle_vmx(int on);
+#endif
+
 #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
  extern void amd_pmu_enable_virt(void);
  extern void amd_pmu_disable_virt(void);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index ee1c8a9..133679d 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3103,6 +3103,8 @@ static __init int vmx_disabled_by_bios(void)
 
 static void kvm_cpu_vmxon(u64 addr)
 {
+	intel_pt_handle_vmx(1);
+
 	asm volatile (ASM_VMX_VMXON_RAX
 			: : "a"(&addr), "m"(addr)
 			: "memory", "cc");
@@ -3172,6 +3174,8 @@ static void vmclear_local_loaded_vmcss(void)
 static void kvm_cpu_vmxoff(void)
 {
 	asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
+
+	intel_pt_handle_vmx(0);
 }
 
 static void hardware_disable(void)

^ permalink raw reply related	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2016-04-28 10:26 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-04-01 16:24 [PATCH] perf/x86/intel/pt: Don't die on VMXON Alexander Shishkin
2016-04-06  8:52 ` Peter Zijlstra
2016-04-06 11:10   ` Alexander Shishkin
2016-04-06 11:52     ` Peter Zijlstra
2016-04-06 11:55       ` Borislav Petkov
2016-04-06 12:07         ` Alexander Shishkin
2016-04-06 14:06       ` Alexander Shishkin
2016-04-13  9:49         ` Ingo Molnar
2016-04-13 10:32           ` Alexander Shishkin
2016-04-13 10:53             ` Ingo Molnar
2016-04-13 13:48               ` Alexander Shishkin
2016-04-28 10:25                 ` [tip:perf/core] " tip-bot for Alexander Shishkin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).