All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 0/8] KVM: x86: expose CVE-2017-5715 ("Spectre variant 2") mitigations to guest
@ 2018-01-09 12:03 Paolo Bonzini
  2018-01-09 12:03 ` [PATCH 1/8] KVM: x86: add SPEC_CTRL and IBPB_SUPPORT accessors Paolo Bonzini
                   ` (8 more replies)
  0 siblings, 9 replies; 70+ messages in thread
From: Paolo Bonzini @ 2018-01-09 12:03 UTC (permalink / raw)
  To: linux-kernel, kvm
  Cc: rkrcmar, liran.alon, jmattson, aliguori, thomas.lendacky, dwmw, bp, x86

This series allows guests to use the MSR_IA32_SPEC_CTRL and
MSR_IA32_PRED_CMD model specific registers that were added as mitigations
for CVE-2017-5715.

These are only the KVM specific parts of the fix.  It does *not* yet
include any protection for reading host memory from the guest, because
that would be done in the same way as the rest of Linux.  So there is no
IBRS *usage* here, no retpolines, no stuffing of the return stack buffer.
(KVM already includes a fix to clear all registers on vmexit, which is
enough to block Google Project Zero's PoC exploit).

However, I am including the changes to use IBPB (indirect branch
predictor barrier) if available.  That occurs only when there is a VCPU
switch on a physical CPU, thus it has a small impact on performance.

The patches are a bit hackish because the relevant cpufeatures have
not been included yet, and because I wanted to make the patches easier
to backport to distro kernels if desired, but I would still like to
have them in 4.16.

Please review.  The interdiff from v1 is at the end of this cover letter.

Thanks,

Paolo

Paolo Bonzini (6):
  KVM: x86: add SPEC_CTRL and IBPB_SUPPORT accessors
  x86/msr: add definitions for indirect branch predictor MSRs
  kvm: vmx: pass MSR_IA32_SPEC_CTRL and MSR_IA32_PRED_CMD down to the guest
  KVM: SVM: fix comment
  kvm: svm: pass MSR_IA32_SPEC_CTRL and MSR_IA32_PRED_CMD down to guest
  KVM: x86: add SPEC_CTRL and IBPB_SUPPORT to MSR and CPUID lists

Tim Chen (1):
  kvm: vmx: Set IBPB when running a different VCPU

Tom Lendacky (1):
  x86/svm: Set IBPB when running a different VCPU

 arch/x86/include/asm/msr-index.h |  9 ++++-
 arch/x86/kvm/cpuid.c             | 27 ++++++++++---
 arch/x86/kvm/cpuid.h             | 22 +++++++++++
 arch/x86/kvm/svm.c               | 83 +++++++++++++++++++++++++++++++++++++++-
 arch/x86/kvm/vmx.c               | 59 ++++++++++++++++++++++++++++
 arch/x86/kvm/x86.c               |  1 +
 6 files changed, 193 insertions(+), 8 deletions(-)


diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index ec08f1d8d39b..828a03425571 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -39,11 +39,6 @@
 
 /* Intel MSRs. Some also available on other CPUs */
 
-#define MSR_IA32_SPEC_CTRL		0x00000048
-
-#define MSR_IA32_PRED_CMD		0x00000049
-#define FEATURE_SET_IBPB		(1UL << 0)
-
 #define MSR_PPIN_CTL			0x0000004e
 #define MSR_PPIN			0x0000004f
 
@@ -469,8 +464,15 @@
 #define MSR_SMI_COUNT			0x00000034
 #define MSR_IA32_FEATURE_CONTROL        0x0000003a
 #define MSR_IA32_TSC_ADJUST             0x0000003b
+
+#define MSR_IA32_SPEC_CTRL		0x00000048
+#define SPEC_CTRL_FEATURE_DISABLE_IBRS	(0 << 0)
+#define SPEC_CTRL_FEATURE_ENABLE_IBRS	(1 << 0)
+
+#define MSR_IA32_PRED_CMD		0x00000049
+#define PRED_CMD_IBPB			(1UL << 0)
+
 #define MSR_IA32_BNDCFGS		0x00000d90
-
 #define MSR_IA32_BNDCFGS_RSVD		0x00000ffc
 
 #define MSR_IA32_XSS			0x00000da0
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index dd744d896cec..c249d5f599e4 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1738,7 +1738,7 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
 	 * svm_vcpu_load; block speculative execution.
 	 */
 	if (have_ibpb_support)
-		wrmsrl(MSR_IA32_PRED_CMD, FEATURE_SET_IBPB);
+		wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
 }
 
 static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
@@ -1776,7 +1776,7 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 	if (sd->current_vmcb != svm->vmcb) {
 		sd->current_vmcb = svm->vmcb;
 		if (have_ibpb_support)
-			wrmsrl(MSR_IA32_PRED_CMD, FEATURE_SET_IBPB);
+			wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
 	}
 
 	avic_vcpu_load(vcpu, cpu);
@@ -3648,6 +3648,10 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		msr_info->data = svm->nested.vm_cr_msr;
 		break;
 	case MSR_IA32_SPEC_CTRL:
+		if (!static_cpu_has(X86_FEATURE_SPEC_CTRL) ||
+	    	    (!msr_info->host_initiated &&
+       		     !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)))
+			return 1;
 		msr_info->data = svm->spec_ctrl;
 		break;
 	case MSR_IA32_UCODE_REV:
@@ -3806,6 +3810,10 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
 		vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
 		break;
 	case MSR_IA32_SPEC_CTRL:
+		if (!static_cpu_has(X86_FEATURE_SPEC_CTRL) ||
+	    	    (!msr_info->host_initiated &&
+       		     !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)))
+			return 1;
 		svm->spec_ctrl = data;
 		break;
 	case MSR_IA32_APICBASE:
@@ -4996,6 +5004,10 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 
 	local_irq_enable();
 
+	/*
+	 * MSR_IA32_SPEC_CTRL is restored after the last indirect branch
+	 * before vmentry.
+	 */
 	if (have_spec_ctrl && svm->spec_ctrl != 0)
 		wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
 
@@ -5077,6 +5089,12 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 		if (svm->spec_ctrl != 0)
 			wrmsrl(MSR_IA32_SPEC_CTRL, 0);
 	}
+	/*
+	 * Speculative execution past the above wrmsrl might encounter
+	 * an indirect branch and use guest-controlled contents of the
+	 * indirect branch predictor; block it.
+	 */
+	asm("lfence");
 
 #ifdef CONFIG_X86_64
 	wrmsrl(MSR_GS_BASE, svm->host.gs_base);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index bf127c570675..ef2681fa568a 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2376,7 +2376,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 		per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs;
 		vmcs_load(vmx->loaded_vmcs->vmcs);
 		if (have_spec_ctrl)
-			wrmsrl(MSR_IA32_PRED_CMD, FEATURE_SET_IBPB);
+			wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
 	}
 
 	if (!already_loaded) {
@@ -3347,6 +3347,7 @@ static inline bool vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu,
  */
 static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 {
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	struct shared_msr_entry *msr;
 
 	switch (msr_info->index) {
@@ -3358,8 +3359,8 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		msr_info->data = vmcs_readl(GUEST_GS_BASE);
 		break;
 	case MSR_KERNEL_GS_BASE:
-		vmx_load_host_state(to_vmx(vcpu));
-		msr_info->data = to_vmx(vcpu)->msr_guest_kernel_gs_base;
+		vmx_load_host_state(vmx);
+		msr_info->data = vmx->msr_guest_kernel_gs_base;
 		break;
 #endif
 	case MSR_EFER:
@@ -3368,7 +3369,11 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		msr_info->data = guest_read_tsc(vcpu);
 		break;
 	case MSR_IA32_SPEC_CTRL:
-		msr_info->data = to_vmx(vcpu)->spec_ctrl;
+		if (!static_cpu_has(X86_FEATURE_SPEC_CTRL) ||
+		    (!msr_info->host_initiated &&
+		     !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)))
+			return 1;
+		msr_info->data = to_vmx(vcpu)->spec_ctrl;
 		break;
 	case MSR_IA32_SYSENTER_CS:
 		msr_info->data = vmcs_read32(GUEST_SYSENTER_CS);
@@ -3388,13 +3393,13 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		break;
 	case MSR_IA32_MCG_EXT_CTL:
 		if (!msr_info->host_initiated &&
-		    !(to_vmx(vcpu)->msr_ia32_feature_control &
+		    !(vmx->msr_ia32_feature_control &
 		      FEATURE_CONTROL_LMCE))
 			return 1;
 		msr_info->data = vcpu->arch.mcg_ext_ctl;
 		break;
 	case MSR_IA32_FEATURE_CONTROL:
-		msr_info->data = to_vmx(vcpu)->msr_ia32_feature_control;
+		msr_info->data = vmx->msr_ia32_feature_control;
 		break;
 	case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
 		if (!nested_vmx_allowed(vcpu))
@@ -3443,7 +3448,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 			return 1;
 		/* Otherwise falls through */
 	default:
-		msr = find_msr_entry(to_vmx(vcpu), msr_info->index);
+		msr = find_msr_entry(vmx, msr_info->index);
 		if (msr) {
 			msr_info->data = msr->data;
 			break;
@@ -3510,7 +3515,11 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		kvm_write_tsc(vcpu, msr_info);
 		break;
 	case MSR_IA32_SPEC_CTRL:
-		to_vmx(vcpu)->spec_ctrl = msr_info->data;
+		if (!static_cpu_has(X86_FEATURE_SPEC_CTRL) ||
+		    (!msr_info->host_initiated &&
+		     !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)))
+			return 1;
+		to_vmx(vcpu)->spec_ctrl = data;
 		break;
 	case MSR_IA32_CR_PAT:
 		if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
@@ -4046,7 +4046,7 @@ static void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
 	 * vmx_vcpu_load; block speculative execution.
 	 */
 	if (have_spec_ctrl)
-		wrmsrl(MSR_IA32_PRED_CMD, FEATURE_SET_IBPB);
+		wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
 }
 
 static void vmx_nested_free_vmcs02(struct vcpu_vmx *vmx)
@@ -9629,13 +9638,17 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 
 	pt_guest_enter(vmx);
 
-	if (have_spec_ctrl && vmx->spec_ctrl != 0)
-		wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
-
 	atomic_switch_perf_msrs(vmx);
 
 	vmx_arm_hv_timer(vcpu);
 
+	/*
+	 * MSR_IA32_SPEC_CTRL is restored after the last indirect branch
+	 * before vmentry.
+	 */
+	if (have_spec_ctrl && vmx->spec_ctrl != 0)
+		wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
+
 	vmx->__launched = vmx->loaded_vmcs->launched;
 	asm(
 		/* Store host registers */
@@ -9744,9 +9757,15 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 
 	if (have_spec_ctrl) {
 		rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
-		if (vmx->spec_ctrl)
+		if (vmx->spec_ctrl != 0)
 			wrmsrl(MSR_IA32_SPEC_CTRL, 0);
 	}
+	/*
+	 * Speculative execution past the above wrmsrl might encounter
+	 * an indirect branch and use guest-controlled contents of the
+	 * indirect branch predictor; block it.
+	 */
+	asm("lfence");
 
 	/* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
 	if (vmx->host_debugctlmsr)
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 70+ messages in thread
* Re: [PATCH 3/8] kvm: vmx: pass MSR_IA32_SPEC_CTRL and MSR_IA32_PRED_CMD down to the guest
@ 2018-01-09 16:06 Liran Alon
  0 siblings, 0 replies; 70+ messages in thread
From: Liran Alon @ 2018-01-09 16:06 UTC (permalink / raw)
  To: pbonzini
  Cc: jmattson, x86, dwmw, bp, thomas.lendacky, aliguori, rkrcmar,
	linux-kernel, kvm


----- pbonzini@redhat.com wrote:

> Direct access to MSR_IA32_SPEC_CTRL and MSR_IA32_PRED_CMD is
> important
> for performance.  Allow load/store of MSR_IA32_SPEC_CTRL, restore
> guest
> IBRS on VM entry and set it to 0 on VM exit (because Linux does not
> use
> it yet).
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  arch/x86/kvm/vmx.c | 42 ++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 42 insertions(+)
> 
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 669f5f74857d..ef603692aa98 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -120,6 +120,8 @@
>  module_param_named(preemption_timer, enable_preemption_timer, bool,
> S_IRUGO);
>  #endif
>  
> +static bool __read_mostly have_spec_ctrl;
> +
>  #define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD)
>  #define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP |
> X86_CR0_NE)
>  #define KVM_VM_CR0_ALWAYS_ON						\
> @@ -609,6 +611,8 @@ struct vcpu_vmx {
>  	u64 		      msr_host_kernel_gs_base;
>  	u64 		      msr_guest_kernel_gs_base;
>  #endif
> +	u64		      spec_ctrl;
> +
>  	u32 vm_entry_controls_shadow;
>  	u32 vm_exit_controls_shadow;
>  	u32 secondary_exec_control;
> @@ -3361,6 +3365,9 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu,
> struct msr_data *msr_info)
>  	case MSR_IA32_TSC:
>  		msr_info->data = guest_read_tsc(vcpu);
>  		break;
> +	case MSR_IA32_SPEC_CTRL:
> +		msr_info->data = to_vmx(vcpu)->spec_ctrl;
> +		break;
>  	case MSR_IA32_SYSENTER_CS:
>  		msr_info->data = vmcs_read32(GUEST_SYSENTER_CS);
>  		break;
> @@ -3500,6 +3507,9 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu,
> struct msr_data *msr_info)
>  	case MSR_IA32_TSC:
>  		kvm_write_tsc(vcpu, msr_info);
>  		break;
> +	case MSR_IA32_SPEC_CTRL:
> +		to_vmx(vcpu)->spec_ctrl = data;
> +		break;
>  	case MSR_IA32_CR_PAT:
>  		if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
>  			if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
> @@ -7062,6 +7072,17 @@ static __init int hardware_setup(void)
>  		goto out;
>  	}
>  
> +	/*
> +	 * FIXME: this is only needed until SPEC_CTRL is supported
> +	 * by upstream Linux in cpufeatures, then it can be replaced
> +	 * with static_cpu_has.
> +	 */
> +	have_spec_ctrl = cpu_has_spec_ctrl();
> +	if (have_spec_ctrl)
> +		pr_info("kvm: SPEC_CTRL available\n");
> +	else
> +		pr_info("kvm: SPEC_CTRL not available\n");
> +
>  	if (boot_cpu_has(X86_FEATURE_NX))
>  		kvm_enable_efer_bits(EFER_NX);
>  
> @@ -7131,6 +7152,8 @@ static __init int hardware_setup(void)
>  	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
>  	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
>  	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
> +	vmx_disable_intercept_for_msr(MSR_IA32_SPEC_CTRL, false);
> +	vmx_disable_intercept_for_msr(MSR_IA32_PRED_CMD, false);
>  
>  	memcpy(vmx_msr_bitmap_legacy_x2apic_apicv,
>  			vmx_msr_bitmap_legacy, PAGE_SIZE);
> @@ -9601,6 +9624,13 @@ static void __noclone vmx_vcpu_run(struct
> kvm_vcpu *vcpu)
>  
>  	vmx_arm_hv_timer(vcpu);
>  
> +	/*
> +	 * MSR_IA32_SPEC_CTRL is restored after the last indirect branch
> +	 * before vmentry.
> +	 */
> +	if (have_spec_ctrl && vmx->spec_ctrl != 0)
> +		wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
> +
>  	vmx->__launched = vmx->loaded_vmcs->launched;
>  	asm(
>  		/* Store host registers */
> @@ -9707,6 +9737,18 @@ static void __noclone vmx_vcpu_run(struct
> kvm_vcpu *vcpu)
>  #endif
>  	      );
>  
> +	if (have_spec_ctrl) {
> +		rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
> +		if (vmx->spec_ctrl != 0)
> +			wrmsrl(MSR_IA32_SPEC_CTRL, 0);
> +	}
> +	/*
> +	 * Speculative execution past the above wrmsrl might encounter
> +	 * an indirect branch and use guest-controlled contents of the
> +	 * indirect branch predictor; block it.
> +	 */
> +	asm("lfence");
> +
>  	/* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed
> */
>  	if (vmx->host_debugctlmsr)
>  		update_debugctlmsr(vmx->host_debugctlmsr);
> -- 
> 1.8.3.1

Reviewed-by: Liran Alon <liran.alon@oracle.com>

^ permalink raw reply	[flat|nested] 70+ messages in thread
* Re: [PATCH 3/8] kvm: vmx: pass MSR_IA32_SPEC_CTRL and MSR_IA32_PRED_CMD down to the guest
@ 2018-01-09 16:48 Liran Alon
  2018-01-09 16:57 ` Paolo Bonzini
  0 siblings, 1 reply; 70+ messages in thread
From: Liran Alon @ 2018-01-09 16:48 UTC (permalink / raw)
  To: pbonzini
  Cc: jmattson, x86, dwmw, bp, aliguori, thomas.lendacky, rkrcmar,
	linux-kernel, kvm


----- liran.alon@oracle.com wrote:

> ----- pbonzini@redhat.com wrote:
> 
> > Direct access to MSR_IA32_SPEC_CTRL and MSR_IA32_PRED_CMD is
> > important
> > for performance.  Allow load/store of MSR_IA32_SPEC_CTRL, restore
> > guest
> > IBRS on VM entry and set it to 0 on VM exit (because Linux does not
> > use
> > it yet).
> > 
> > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> > ---
> >  arch/x86/kvm/vmx.c | 42 ++++++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 42 insertions(+)
> > 
> > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> > index 669f5f74857d..ef603692aa98 100644
> > --- a/arch/x86/kvm/vmx.c
> > +++ b/arch/x86/kvm/vmx.c
> > @@ -120,6 +120,8 @@
> >  module_param_named(preemption_timer, enable_preemption_timer,
> bool,
> > S_IRUGO);
> >  #endif
> >  
> > +static bool __read_mostly have_spec_ctrl;
> > +
> >  #define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD)
> >  #define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP |
> > X86_CR0_NE)
> >  #define KVM_VM_CR0_ALWAYS_ON						\
> > @@ -609,6 +611,8 @@ struct vcpu_vmx {
> >  	u64 		      msr_host_kernel_gs_base;
> >  	u64 		      msr_guest_kernel_gs_base;
> >  #endif
> > +	u64		      spec_ctrl;
> > +
> >  	u32 vm_entry_controls_shadow;
> >  	u32 vm_exit_controls_shadow;
> >  	u32 secondary_exec_control;
> > @@ -3361,6 +3365,9 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu,
> > struct msr_data *msr_info)
> >  	case MSR_IA32_TSC:
> >  		msr_info->data = guest_read_tsc(vcpu);
> >  		break;
> > +	case MSR_IA32_SPEC_CTRL:
> > +		msr_info->data = to_vmx(vcpu)->spec_ctrl;
> > +		break;
> >  	case MSR_IA32_SYSENTER_CS:
> >  		msr_info->data = vmcs_read32(GUEST_SYSENTER_CS);
> >  		break;
> > @@ -3500,6 +3507,9 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu,
> > struct msr_data *msr_info)
> >  	case MSR_IA32_TSC:
> >  		kvm_write_tsc(vcpu, msr_info);
> >  		break;
> > +	case MSR_IA32_SPEC_CTRL:
> > +		to_vmx(vcpu)->spec_ctrl = data;
> > +		break;
> >  	case MSR_IA32_CR_PAT:
> >  		if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
> >  			if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
> > @@ -7062,6 +7072,17 @@ static __init int hardware_setup(void)
> >  		goto out;
> >  	}
> >  
> > +	/*
> > +	 * FIXME: this is only needed until SPEC_CTRL is supported
> > +	 * by upstream Linux in cpufeatures, then it can be replaced
> > +	 * with static_cpu_has.
> > +	 */
> > +	have_spec_ctrl = cpu_has_spec_ctrl();
> > +	if (have_spec_ctrl)
> > +		pr_info("kvm: SPEC_CTRL available\n");
> > +	else
> > +		pr_info("kvm: SPEC_CTRL not available\n");
> > +
> >  	if (boot_cpu_has(X86_FEATURE_NX))
> >  		kvm_enable_efer_bits(EFER_NX);
> >  
> > @@ -7131,6 +7152,8 @@ static __init int hardware_setup(void)
> >  	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
> >  	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
> >  	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
> > +	vmx_disable_intercept_for_msr(MSR_IA32_SPEC_CTRL, false);
> > +	vmx_disable_intercept_for_msr(MSR_IA32_PRED_CMD, false);
> >  
> >  	memcpy(vmx_msr_bitmap_legacy_x2apic_apicv,
> >  			vmx_msr_bitmap_legacy, PAGE_SIZE);
> > @@ -9601,6 +9624,13 @@ static void __noclone vmx_vcpu_run(struct
> > kvm_vcpu *vcpu)
> >  
> >  	vmx_arm_hv_timer(vcpu);
> >  
> > +	/*
> > +	 * MSR_IA32_SPEC_CTRL is restored after the last indirect branch
> > +	 * before vmentry.
> > +	 */
> > +	if (have_spec_ctrl && vmx->spec_ctrl != 0)
> > +		wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
> > +
> >  	vmx->__launched = vmx->loaded_vmcs->launched;
> >  	asm(
> >  		/* Store host registers */
> > @@ -9707,6 +9737,18 @@ static void __noclone vmx_vcpu_run(struct
> > kvm_vcpu *vcpu)
> >  #endif
> >  	      );
> >  
> > +	if (have_spec_ctrl) {
> > +		rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
> > +		if (vmx->spec_ctrl != 0)

As I said also on the AMD patch, I think this is a bug.
Intel specify that we should set IBRS bit even if it was already set on every #VMExit.

-Liran

> > +			wrmsrl(MSR_IA32_SPEC_CTRL, 0);
> > +	}
> > +	/*
> > +	 * Speculative execution past the above wrmsrl might encounter
> > +	 * an indirect branch and use guest-controlled contents of the
> > +	 * indirect branch predictor; block it.
> > +	 */
> > +	asm("lfence");
> > +
> >  	/* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed
> > */
> >  	if (vmx->host_debugctlmsr)
> >  		update_debugctlmsr(vmx->host_debugctlmsr);
> > -- 
> > 1.8.3.1
> 
> Reviewed-by: Liran Alon <liran.alon@oracle.com>

^ permalink raw reply	[flat|nested] 70+ messages in thread
* Re: [PATCH 3/8] kvm: vmx: pass MSR_IA32_SPEC_CTRL and MSR_IA32_PRED_CMD down to the guest
@ 2018-01-10  0:33 Liran Alon
  0 siblings, 0 replies; 70+ messages in thread
From: Liran Alon @ 2018-01-10  0:33 UTC (permalink / raw)
  To: pbonzini
  Cc: jmattson, x86, dwmw, bp, thomas.lendacky, aliguori, rkrcmar,
	linux-kernel, kvm


----- pbonzini@redhat.com wrote:

> On 09/01/2018 17:48, Liran Alon wrote:
> >>>  
> >>> +	if (have_spec_ctrl) {
> >>> +		rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
> >>> +		if (vmx->spec_ctrl != 0)
> >>> +			wrmsrl(MSR_IA32_SPEC_CTRL, 0);
> >
> > As I said also on the AMD patch, I think this is a bug.
> > Intel specify that we should set IBRS bit even if it was already set
> on every #VMExit.
> 
> That's correct (though I'd like to understand _why_---I'm not
> inclined
> to blindly trust a spec), but for now it's saving a wrmsr of 0.  That
> is
> quite obviously okay, and will be also okay after the bare-metal IBRS
> patches.
> 
> Of course the code will become something like
> 
> 	if (using_ibrs || vmx->spec_ctrl != 0)
> 		wrmsrl(MSR_IA32_SPEC_CTRL, host_ibrs);
> 
> optimizing the case where the host is using retpolines.
> 
> Paolo

I agree with all the above.

-Liran

^ permalink raw reply	[flat|nested] 70+ messages in thread
* Re: [PATCH 3/8] kvm: vmx: pass MSR_IA32_SPEC_CTRL and MSR_IA32_PRED_CMD down to the guest
@ 2018-01-10 16:19 Liran Alon
  2018-01-10 16:27 ` Paolo Bonzini
  2018-01-10 16:47 ` David Woodhouse
  0 siblings, 2 replies; 70+ messages in thread
From: Liran Alon @ 2018-01-10 16:19 UTC (permalink / raw)
  To: dwmw
  Cc: konrad.wilk, jmattson, x86, bp, nadav.amit, thomas.lendacky,
	aliguori, arjan, rkrcmar, pbonzini, linux-kernel, kvm


----- dwmw@amazon.co.uk wrote:

> On Wed, 2018-01-10 at 10:41 -0500, Konrad Rzeszutek Wilk wrote:
> > On Wed, Jan 10, 2018 at 03:28:43PM +0100, Paolo Bonzini wrote:
> > > On 10/01/2018 15:06, Arjan van de Ven wrote:
> > > > On 1/10/2018 5:20 AM, Paolo Bonzini wrote:
> > > >> * a simple specification that does "IBRS=1 blocks indirect
> > branch
> > > >> prediction altogether" would actually satisfy the
> specification
> > just as
> > > >> well, and it would be nice to know if that's what the
> processor
> > actually
> > > >> does.
> > > > 
> > > > it doesn't exactly, not for all.
> > > > 
> > > > so you really do need to write ibrs again.
> > > 
> > > Okay, so "always set IBRS=1" does *not* protect against variant
> 2. 
> > Thanks,
> > 
> > And what is the point of this "always set IBRS=1" then? Are there
> > some other things lurking in the shadows?
> 
> Yes. *FUTURE* CPUs will have a mode where you can just set IBRS and
> leave it set for ever and not worry about any of this, and the
> performance won't even suck.
> 
> Quite why it's still an option you have to set in an MSR, and not
> just
> a feature bit that they advertise and do it unconditionally, I have
> no
> idea. But apparently that's the plan.
> 
> But no current hardware will do this; they've done the best they can
> do
> with microcode already.

I'm still a bit confused here (maybe I'm not the only one), but I have the following pending questions:
(I really hope someone from Intel will clarify these)

(1) On VMEntry, Intel recommends to just restore SPEC_CTRL to guest value (using WRMSR or MSR save/load list) and that's it. As I previously said to Jim, I am missing here a mechanism which should be responsible for hiding host's BHB & RSB from guest. Therefore, guest still have the possibility to info-leak host's kernel module addresses (kvm-intel.ko / kvm.ko / vmlinux).
* a) As I currently understand things, the only available solution to this is to issue an IBPB and stuff RSB on every VMEntry before resuming the guest. The IBPB is done to clear host's BTB/BHB and the stuff RSB is done to overwrite all host's RSB entries.
* b) Does "IBRS ALL THE TIME" usage change this in some aspect? What is Intel recommendation to handle this info-leak in that usage?

(2) On VMExit, Intel recommends to always save guest SPEC_CTRL value, set IBRS to 1 (even if it is already set by guest) and stuff RSB. What exactly does this write of 1 to IBRS do? 
* a) Does it keep all currently existing BTB/BHB entries created by less-privileged prediction-mode and marks them as were created in less-privileged prediction-mode such that they won't be used in current prediction-mode?
* b) Or does it, as Paolo has mentioned multiple times, disables the branch predictor to never consult the BTB/BHB at all as long as IBRS=1?
If (b) is true, why is setting IBRS=1 better than just issue an IBPB that clears all entries? At least in that case the host kernel could still benefict branch prediction performance boost.
If (a) is true, does "IBRS ALL THE TIME" usage is basically a CPU change to just create all BTB/BHB entries to be tagged with prediction-mode at creation-time and that tag to be compared to current prediction-mode when CPU attempts to use BTB/BHB?

Regards,
-Liran

^ permalink raw reply	[flat|nested] 70+ messages in thread
* Re: [PATCH 3/8] kvm: vmx: pass MSR_IA32_SPEC_CTRL and MSR_IA32_PRED_CMD down to the guest
@ 2018-01-10 16:51 Liran Alon
  2018-01-10 17:07 ` David Woodhouse
  0 siblings, 1 reply; 70+ messages in thread
From: Liran Alon @ 2018-01-10 16:51 UTC (permalink / raw)
  To: dwmw2
  Cc: jmattson, konrad.wilk, x86, nadav.amit, bp, arjan, aliguori,
	thomas.lendacky, rkrcmar, pbonzini, linux-kernel, kvm


----- dwmw2@infradead.org wrote:

> On Wed, 2018-01-10 at 08:19 -0800, Liran Alon wrote:
> > 
> > (1) On VMEntry, Intel recommends to just restore SPEC_CTRL to guest
> > value (using WRMSR or MSR save/load list) and that's it. As I
> > previously said to Jim, I am missing here a mechanism which should
> be
> > responsible for hiding host's BHB & RSB from guest. Therefore,
> guest
> > still have the possibility to info-leak host's kernel module
> > addresses (kvm-intel.ko / kvm.ko / vmlinux).
> 
> How so?
> 
> The host has the capability to attack the guest... but that's not an
> interesting observation.

That's not the issue I am talking about here.
I'm talking about the guest ability to info-leak addresses in host.

> 
> I'm not sure why you consider it an information leak to have host
> addresses in the BTB/RSB when the guest is running; it's not like
> they
> can be *read* from there. Perhaps you could mount a really contrived
> attack where you might attempt to provide your own spec-leak code at
> various candidate addresses that you think might be host BTB targets,
> and validate your assumptions... but I suspect basic cache-based
> observations were easier than that anyway.
> 
> I don't think this is a consideration.

Hmm... This is exactly how Google Project-Zero PoC leaks kvm-intel.ko, kvm.ko & vmlinux...
See section "Locating the host kernel" here:
https://googleprojectzero.blogspot.co.il/2018/01/reading-privileged-memory-with-side.html

This was an important primitive in order for them to actually launch the attack of reading host's memory pages. As they needed the hypervisor addresses such that they will be able to later poison the BTB/BHB to gadgets residing in known host addresses.

-Liran

^ permalink raw reply	[flat|nested] 70+ messages in thread

end of thread, other threads:[~2018-02-05 12:10 UTC | newest]

Thread overview: 70+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-01-09 12:03 [PATCH v2 0/8] KVM: x86: expose CVE-2017-5715 ("Spectre variant 2") mitigations to guest Paolo Bonzini
2018-01-09 12:03 ` [PATCH 1/8] KVM: x86: add SPEC_CTRL and IBPB_SUPPORT accessors Paolo Bonzini
2018-01-15  9:42   ` David Hildenbrand
2018-01-09 12:03 ` [PATCH 2/8] x86/msr: add definitions for indirect branch predictor MSRs Paolo Bonzini
2018-01-09 12:03 ` [PATCH 3/8] kvm: vmx: pass MSR_IA32_SPEC_CTRL and MSR_IA32_PRED_CMD down to the guest Paolo Bonzini
2018-01-13 10:16   ` Longpeng (Mike)
2018-01-15  9:23     ` Paolo Bonzini
2018-01-15  9:34       ` Thomas Gleixner
     [not found]   ` <1515839272.22302.520.camel@amazon.co.uk>
2018-01-15  9:23     ` Paolo Bonzini
2018-01-09 12:03 ` [PATCH 4/8] kvm: vmx: Set IBPB when running a different VCPU Paolo Bonzini
2018-01-12  1:49   ` Wanpeng Li
2018-01-12 17:03     ` Jim Mattson
2018-01-13  9:29       ` Woodhouse, David
2018-01-15  9:21         ` Paolo Bonzini
2018-01-09 12:03 ` [PATCH 5/8] KVM: SVM: fix comment Paolo Bonzini
2018-01-15  9:53   ` David Hildenbrand
2018-01-09 12:03 ` [PATCH 6/8] kvm: svm: pass MSR_IA32_SPEC_CTRL and MSR_IA32_PRED_CMD down to guest Paolo Bonzini
2018-01-09 14:22   ` Konrad Rzeszutek Wilk
2018-01-09 16:05     ` Paolo Bonzini
2018-01-09 16:08     ` Paolo Bonzini
2018-01-11 10:45       ` Wanpeng Li
2018-01-10 20:13   ` Tom Lendacky
2018-01-11 10:33     ` Paolo Bonzini
2018-01-09 12:03 ` [PATCH 7/8] x86/svm: Set IBPB when running a different VCPU Paolo Bonzini
2018-01-09 14:23   ` Konrad Rzeszutek Wilk
2018-01-09 12:03 ` [PATCH 8/8] KVM: x86: add SPEC_CTRL and IBPB_SUPPORT to MSR and CPUID lists Paolo Bonzini
2018-01-13  1:25   ` Eric Wheeler
2018-01-13  8:00     ` Paolo Bonzini
2018-01-16  0:40       ` Eric Wheeler
2018-01-16  7:39         ` R: " Paolo Bonzini
2018-01-09 12:03 ` [PATCH 9/8] KVM: x86: limit MSR_IA32_SPEC_CTRL access based on CPUID availability Paolo Bonzini
2018-01-16  0:55   ` Eric Wheeler
2018-01-16 12:59     ` Paolo Bonzini
2018-01-30 13:21   ` [9/8] " Mihai Carabas
2018-01-30 16:33     ` Jim Mattson
2018-01-30 16:43       ` Mihai Carabas
2018-01-30 16:57         ` Jim Mattson
2018-01-30 17:14           ` David Woodhouse
2018-01-30 17:38             ` Jim Mattson
2018-01-30 17:45             ` Thomas Gleixner
2018-01-30 23:11               ` Paolo Bonzini
2018-01-30 23:47                 ` David Woodhouse
2018-01-31  1:06                   ` Paolo Bonzini
2018-02-05 11:10                 ` Ingo Molnar
2018-02-05 11:15                   ` David Woodhouse
2018-02-05 12:10                     ` Ingo Molnar
2018-01-09 16:06 [PATCH 3/8] kvm: vmx: pass MSR_IA32_SPEC_CTRL and MSR_IA32_PRED_CMD down to the guest Liran Alon
2018-01-09 16:48 Liran Alon
2018-01-09 16:57 ` Paolo Bonzini
2018-01-10  5:03   ` Nadav Amit
2018-01-10 13:20     ` Paolo Bonzini
2018-01-10 14:06       ` Arjan van de Ven
2018-01-10 14:28         ` Paolo Bonzini
2018-01-10 15:41           ` Konrad Rzeszutek Wilk
2018-01-10 15:45             ` Paolo Bonzini
2018-01-10 15:48             ` Woodhouse, David
2018-01-10 15:56               ` Paolo Bonzini
2018-01-10 16:05                 ` David Woodhouse
2018-01-12 23:17     ` Jim Mattson
2018-01-12 23:19       ` Nadav Amit
2018-01-10  0:33 Liran Alon
2018-01-10 16:19 Liran Alon
2018-01-10 16:27 ` Paolo Bonzini
2018-01-10 17:14   ` Jim Mattson
2018-01-10 17:16     ` Paolo Bonzini
2018-01-10 17:23       ` Nadav Amit
2018-01-10 17:32         ` Jim Mattson
2018-01-10 16:47 ` David Woodhouse
2018-01-10 16:51 Liran Alon
2018-01-10 17:07 ` David Woodhouse

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.