All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] KVM: nVMX: Fix memory corruption when using VMCS shadowing
@ 2016-06-30 22:24 Jim Mattson
  2016-07-01  5:24 ` Paolo Bonzini
  0 siblings, 1 reply; 6+ messages in thread
From: Jim Mattson @ 2016-06-30 22:24 UTC (permalink / raw)
  To: kvm

In copy_shadow_to_vmcs12, a vCPU's shadow VMCS is temporarily loaded
on a logical processor as the current VMCS.  When the copy is
complete, the logical processor's previous current VMCS must be
restored.  However, the logical processor in question may not be the
one on which the vCPU is loaded (for instance, during kvm_vm_release,
when copy_shadow_to_vmcs12 is invoked on the same logical processor
for every vCPU in a VM).  The new functions __vmptrst and __vmptrld
are introduced to save the logical processor's current VMCS before the
copy and to restore it afterwards.

Note that copy_vmcs12_to_shadow does not suffer from this problem,
since it is only called from a context where the vCPU is loaded on the
logical processor.

Signed-off-by: Jim Mattson <jmattson@google.com>

---
 arch/x86/include/asm/vmx.h |  1 +
 arch/x86/kvm/vmx.c         | 27 +++++++++++++++++++++++----
 2 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 14c63c7..2d0548a 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -443,6 +443,7 @@ enum vmcs_field {
 #define ASM_VMX_VMLAUNCH          ".byte 0x0f, 0x01, 0xc2"
 #define ASM_VMX_VMRESUME          ".byte 0x0f, 0x01, 0xc3"
 #define ASM_VMX_VMPTRLD_RAX       ".byte 0x0f, 0xc7, 0x30"
+#define ASM_VMX_VMPTRST_RAX       ".byte 0x0f, 0xc7, 0x38"
 #define ASM_VMX_VMREAD_RDX_RAX    ".byte 0x0f, 0x78, 0xd0"
 #define ASM_VMX_VMWRITE_RAX_RDX   ".byte 0x0f, 0x79, 0xd0"
 #define ASM_VMX_VMWRITE_RSP_RDX   ".byte 0x0f, 0x79, 0xd4"
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 003618e..c79868a 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1338,15 +1338,30 @@ static inline void loaded_vmcs_init(struct loaded_vmcs *loaded_vmcs)
 	loaded_vmcs->launched = 0;
 }
 
-static void vmcs_load(struct vmcs *vmcs)
+static inline u64 __vmptrst(void)
+{
+	u64 phys_addr;
+
+	asm volatile (__ex(ASM_VMX_VMPTRST_RAX)
+			: : "a"(&phys_addr) : "cc", "memory");
+	return phys_addr;
+}
+
+static inline u8 __vmptrld(u64 phys_addr)
 {
-	u64 phys_addr = __pa(vmcs);
 	u8 error;
 
 	asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) "; setna %0"
 			: "=qm"(error) : "a"(&phys_addr), "m"(phys_addr)
 			: "cc", "memory");
-	if (error)
+	return error;
+}
+
+static void vmcs_load(struct vmcs *vmcs)
+{
+	u64 phys_addr = __pa(vmcs);
+
+	if (__vmptrld(phys_addr))
 		printk(KERN_ERR "kvm: vmptrld %p/%llx failed\n",
 		       vmcs, phys_addr);
 }
@@ -7136,12 +7151,14 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
 	int i;
 	unsigned long field;
 	u64 field_value;
+	u64 current_vmcs_pa;
 	struct vmcs *shadow_vmcs = vmx->nested.current_shadow_vmcs;
 	const unsigned long *fields = shadow_read_write_fields;
 	const int num_fields = max_shadow_read_write_fields;
 
 	preempt_disable();
 
+	current_vmcs_pa = __vmptrst();
 	vmcs_load(shadow_vmcs);
 
 	for (i = 0; i < num_fields; i++) {
@@ -7167,7 +7184,9 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
 	}
 
 	vmcs_clear(shadow_vmcs);
-	vmcs_load(vmx->loaded_vmcs->vmcs);
+	if (current_vmcs_pa != -1ull)
+		__vmptrld(current_vmcs_pa);
+
 
 	preempt_enable();
 }

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] KVM: nVMX: Fix memory corruption when using VMCS shadowing
  2016-06-30 22:24 [PATCH] KVM: nVMX: Fix memory corruption when using VMCS shadowing Jim Mattson
@ 2016-07-01  5:24 ` Paolo Bonzini
  2016-07-01 12:33   ` Paolo Bonzini
  0 siblings, 1 reply; 6+ messages in thread
From: Paolo Bonzini @ 2016-07-01  5:24 UTC (permalink / raw)
  To: Jim Mattson, kvm



On 01/07/2016 00:24, Jim Mattson wrote:
> In copy_shadow_to_vmcs12, a vCPU's shadow VMCS is temporarily loaded
> on a logical processor as the current VMCS.  When the copy is
> complete, the logical processor's previous current VMCS must be
> restored.  However, the logical processor in question may not be the
> one on which the vCPU is loaded (for instance, during kvm_vm_release,
> when copy_shadow_to_vmcs12 is invoked on the same logical processor
> for every vCPU in a VM).  The new functions __vmptrst and __vmptrld
> are introduced to save the logical processor's current VMCS before the
> copy and to restore it afterwards.
> 
> Note that copy_vmcs12_to_shadow does not suffer from this problem,
> since it is only called from a context where the vCPU is loaded on the
> logical processor.

Are you sure this is enough?  This in nested_release_vmcs12 assumes that
the L1 VMCS is loaded after copy_shadow_to_vmcs12:

        if (enable_shadow_vmcs) {
                /* copy to memory all shadowed fields in case
                   they were modified */
                copy_shadow_to_vmcs12(vmx);
                vmx->nested.sync_shadow_vmcs = false;
                vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
                                SECONDARY_EXEC_SHADOW_VMCS);
                vmcs_write64(VMCS_LINK_POINTER, -1ull);
        }

So perhaps it's the callers of nested_release_vmcs12 that have to ensure
vcpu_load/vcpu_put are called if necessary around the calls to
nested_release_vmcs12.  Only free_nested should need this.  The other
callers of nested_release_vmcs12 and copy_shadow_to_vmcs12 are vmexit
handlers and do have the problem.

Paolo

> Signed-off-by: Jim Mattson <jmattson@google.com>
> 
> ---
>  arch/x86/include/asm/vmx.h |  1 +
>  arch/x86/kvm/vmx.c         | 27 +++++++++++++++++++++++----
>  2 files changed, 24 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
> index 14c63c7..2d0548a 100644
> --- a/arch/x86/include/asm/vmx.h
> +++ b/arch/x86/include/asm/vmx.h
> @@ -443,6 +443,7 @@ enum vmcs_field {
>  #define ASM_VMX_VMLAUNCH          ".byte 0x0f, 0x01, 0xc2"
>  #define ASM_VMX_VMRESUME          ".byte 0x0f, 0x01, 0xc3"
>  #define ASM_VMX_VMPTRLD_RAX       ".byte 0x0f, 0xc7, 0x30"
> +#define ASM_VMX_VMPTRST_RAX       ".byte 0x0f, 0xc7, 0x38"
>  #define ASM_VMX_VMREAD_RDX_RAX    ".byte 0x0f, 0x78, 0xd0"
>  #define ASM_VMX_VMWRITE_RAX_RDX   ".byte 0x0f, 0x79, 0xd0"
>  #define ASM_VMX_VMWRITE_RSP_RDX   ".byte 0x0f, 0x79, 0xd4"
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 003618e..c79868a 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -1338,15 +1338,30 @@ static inline void loaded_vmcs_init(struct loaded_vmcs *loaded_vmcs)
>  	loaded_vmcs->launched = 0;
>  }
>  
> -static void vmcs_load(struct vmcs *vmcs)
> +static inline u64 __vmptrst(void)
> +{
> +	u64 phys_addr;
> +
> +	asm volatile (__ex(ASM_VMX_VMPTRST_RAX)
> +			: : "a"(&phys_addr) : "cc", "memory");
> +	return phys_addr;
> +}
> +
> +static inline u8 __vmptrld(u64 phys_addr)
>  {
> -	u64 phys_addr = __pa(vmcs);
>  	u8 error;
>  
>  	asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) "; setna %0"
>  			: "=qm"(error) : "a"(&phys_addr), "m"(phys_addr)
>  			: "cc", "memory");
> -	if (error)
> +	return error;
> +}
> +
> +static void vmcs_load(struct vmcs *vmcs)
> +{
> +	u64 phys_addr = __pa(vmcs);
> +
> +	if (__vmptrld(phys_addr))
>  		printk(KERN_ERR "kvm: vmptrld %p/%llx failed\n",
>  		       vmcs, phys_addr);
>  }
> @@ -7136,12 +7151,14 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
>  	int i;
>  	unsigned long field;
>  	u64 field_value;
> +	u64 current_vmcs_pa;
>  	struct vmcs *shadow_vmcs = vmx->nested.current_shadow_vmcs;
>  	const unsigned long *fields = shadow_read_write_fields;
>  	const int num_fields = max_shadow_read_write_fields;
>  
>  	preempt_disable();
>  
> +	current_vmcs_pa = __vmptrst();
>  	vmcs_load(shadow_vmcs);
>  
>  	for (i = 0; i < num_fields; i++) {
> @@ -7167,7 +7184,9 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
>  	}
>  
>  	vmcs_clear(shadow_vmcs);
> -	vmcs_load(vmx->loaded_vmcs->vmcs);
> +	if (current_vmcs_pa != -1ull)
> +		__vmptrld(current_vmcs_pa);
> +
>  
>  	preempt_enable();
>  }
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] KVM: nVMX: Fix memory corruption when using VMCS shadowing
  2016-07-01  5:24 ` Paolo Bonzini
@ 2016-07-01 12:33   ` Paolo Bonzini
  2016-07-01 16:17     ` Jim Mattson
  0 siblings, 1 reply; 6+ messages in thread
From: Paolo Bonzini @ 2016-07-01 12:33 UTC (permalink / raw)
  To: Jim Mattson, kvm



On 01/07/2016 07:24, Paolo Bonzini wrote:
> So perhaps it's the callers of nested_release_vmcs12 that have to ensure
> vcpu_load/vcpu_put are called if necessary around the calls to
> nested_release_vmcs12.  Only free_nested should need this.  The other
> callers of nested_release_vmcs12 and copy_shadow_to_vmcs12 are vmexit
> handlers and do have the problem.

... do not have.

Paolo

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] KVM: nVMX: Fix memory corruption when using VMCS shadowing
  2016-07-01 12:33   ` Paolo Bonzini
@ 2016-07-01 16:17     ` Jim Mattson
  2016-07-08 22:36       ` [PATCH v2] " Jim Mattson
  0 siblings, 1 reply; 6+ messages in thread
From: Jim Mattson @ 2016-07-01 16:17 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: kvm

I agree. I'll have a revised proposal next week.

On Fri, Jul 1, 2016 at 5:33 AM, Paolo Bonzini <pbonzini@redhat.com> wrote:
>
>
> On 01/07/2016 07:24, Paolo Bonzini wrote:
>> So perhaps it's the callers of nested_release_vmcs12 that have to ensure
>> vcpu_load/vcpu_put are called if necessary around the calls to
>> nested_release_vmcs12.  Only free_nested should need this.  The other
>> callers of nested_release_vmcs12 and copy_shadow_to_vmcs12 are vmexit
>> handlers and do have the problem.
>
> ... do not have.
>
> Paolo

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH v2] KVM: nVMX: Fix memory corruption when using VMCS shadowing
  2016-07-01 16:17     ` Jim Mattson
@ 2016-07-08 22:36       ` Jim Mattson
  2016-07-09  5:59         ` Paolo Bonzini
  0 siblings, 1 reply; 6+ messages in thread
From: Jim Mattson @ 2016-07-08 22:36 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: kvm

When freeing the nested resources of a vcpu, there is an assumption that
the vcpu's vmcs01 is the current VMCS on the CPU that executes
nested_release_vmcs12(). If this assumption is violated, the vcpu's
vmcs01 may be made active on multiple CPUs at the same time, in
violation of Intel's specification. Moreover, since the vcpu's vmcs01 is
not VMCLEARed on every CPU on which it is active, it can linger in a
CPU's VMCS cache after it has been freed and potentially
repurposed. Subsequent eviction from the CPU's VMCS cache on a capacity
miss can result in memory corruption.

It is not sufficient for vmx_free_vcpu() to call vmx_load_vmcs01(). If
the vcpu in question was last loaded on a different CPU, it must be
migrated to the current CPU before calling vmx_load_vmcs01().

Signed-off-by: Jim Mattson <jmattson@google.com>

---
 arch/x86/kvm/vmx.c  | 19 +++++++++++++++++--
 virt/kvm/kvm_main.c |  2 ++
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 003618e..afb1ab3 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -8855,6 +8855,22 @@ static void vmx_load_vmcs01(struct kvm_vcpu *vcpu)
 	put_cpu();
 }
 
+/*
+ * Ensure that the current vmcs of the logical processor is the
+ * vmcs01 of the vcpu before calling free_nested().
+ */
+static void vmx_free_vcpu_nested(struct kvm_vcpu *vcpu)
+{
+       struct vcpu_vmx *vmx = to_vmx(vcpu);
+       int r;
+
+       r = vcpu_load(vcpu);
+       BUG_ON(r);
+       vmx_load_vmcs01(vcpu);
+       free_nested(vmx);
+       vcpu_put(vcpu);
+}
+
 static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -8863,8 +8879,7 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
 		vmx_destroy_pml_buffer(vmx);
 	free_vpid(vmx->vpid);
 	leave_guest_mode(vcpu);
-	vmx_load_vmcs01(vcpu);
-	free_nested(vmx);
+	vmx_free_vcpu_nested(vcpu);
 	free_loaded_vmcs(vmx->loaded_vmcs);
 	kfree(vmx->guest_msrs);
 	kvm_vcpu_uninit(vcpu);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 48bd520..dd25346 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -148,6 +148,7 @@ int vcpu_load(struct kvm_vcpu *vcpu)
 	put_cpu();
 	return 0;
 }
+EXPORT_SYMBOL_GPL(vcpu_load);
 
 void vcpu_put(struct kvm_vcpu *vcpu)
 {
@@ -157,6 +158,7 @@ void vcpu_put(struct kvm_vcpu *vcpu)
 	preempt_enable();
 	mutex_unlock(&vcpu->mutex);
 }
+EXPORT_SYMBOL_GPL(vcpu_put);
 
 static void ack_flush(void *_completed)
 {

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH v2] KVM: nVMX: Fix memory corruption when using VMCS shadowing
  2016-07-08 22:36       ` [PATCH v2] " Jim Mattson
@ 2016-07-09  5:59         ` Paolo Bonzini
  0 siblings, 0 replies; 6+ messages in thread
From: Paolo Bonzini @ 2016-07-09  5:59 UTC (permalink / raw)
  To: Jim Mattson; +Cc: kvm, stable



On 09/07/2016 00:36, Jim Mattson wrote:
> When freeing the nested resources of a vcpu, there is an assumption that
> the vcpu's vmcs01 is the current VMCS on the CPU that executes
> nested_release_vmcs12(). If this assumption is violated, the vcpu's
> vmcs01 may be made active on multiple CPUs at the same time, in
> violation of Intel's specification. Moreover, since the vcpu's vmcs01 is
> not VMCLEARed on every CPU on which it is active, it can linger in a
> CPU's VMCS cache after it has been freed and potentially
> repurposed. Subsequent eviction from the CPU's VMCS cache on a capacity
> miss can result in memory corruption.
> 
> It is not sufficient for vmx_free_vcpu() to call vmx_load_vmcs01(). If
> the vcpu in question was last loaded on a different CPU, it must be
> migrated to the current CPU before calling vmx_load_vmcs01().
> 
> Signed-off-by: Jim Mattson <jmattson@google.com>
> 
> ---
>  arch/x86/kvm/vmx.c  | 19 +++++++++++++++++--
>  virt/kvm/kvm_main.c |  2 ++
>  2 files changed, 19 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 003618e..afb1ab3 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -8855,6 +8855,22 @@ static void vmx_load_vmcs01(struct kvm_vcpu *vcpu)
>  	put_cpu();
>  }
>  
> +/*
> + * Ensure that the current vmcs of the logical processor is the
> + * vmcs01 of the vcpu before calling free_nested().
> + */
> +static void vmx_free_vcpu_nested(struct kvm_vcpu *vcpu)
> +{
> +       struct vcpu_vmx *vmx = to_vmx(vcpu);
> +       int r;
> +
> +       r = vcpu_load(vcpu);
> +       BUG_ON(r);
> +       vmx_load_vmcs01(vcpu);
> +       free_nested(vmx);
> +       vcpu_put(vcpu);
> +}
> +
>  static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
>  {
>  	struct vcpu_vmx *vmx = to_vmx(vcpu);
> @@ -8863,8 +8879,7 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
>  		vmx_destroy_pml_buffer(vmx);
>  	free_vpid(vmx->vpid);
>  	leave_guest_mode(vcpu);
> -	vmx_load_vmcs01(vcpu);
> -	free_nested(vmx);
> +	vmx_free_vcpu_nested(vcpu);
>  	free_loaded_vmcs(vmx->loaded_vmcs);
>  	kfree(vmx->guest_msrs);
>  	kvm_vcpu_uninit(vcpu);
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 48bd520..dd25346 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -148,6 +148,7 @@ int vcpu_load(struct kvm_vcpu *vcpu)
>  	put_cpu();
>  	return 0;
>  }
> +EXPORT_SYMBOL_GPL(vcpu_load);
>  
>  void vcpu_put(struct kvm_vcpu *vcpu)
>  {
> @@ -157,6 +158,7 @@ void vcpu_put(struct kvm_vcpu *vcpu)
>  	preempt_enable();
>  	mutex_unlock(&vcpu->mutex);
>  }
> +EXPORT_SYMBOL_GPL(vcpu_put);
>  
>  static void ack_flush(void *_completed)
>  {
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

Looks good, thanks!  I haven't yet pushed it out, but I plan to send it
to Linus for 4.7.  So also

Cc: stable <stable@vger.kernel.org>

Thanks,

Paolo

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2016-07-09  5:59 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-06-30 22:24 [PATCH] KVM: nVMX: Fix memory corruption when using VMCS shadowing Jim Mattson
2016-07-01  5:24 ` Paolo Bonzini
2016-07-01 12:33   ` Paolo Bonzini
2016-07-01 16:17     ` Jim Mattson
2016-07-08 22:36       ` [PATCH v2] " Jim Mattson
2016-07-09  5:59         ` Paolo Bonzini

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.