linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] KVM: nVMX: do not pin the VMCS12
@ 2017-07-27 13:54 Paolo Bonzini
  2017-07-27 17:20 ` David Matlack
  2017-07-27 17:54 ` David Hildenbrand
  0 siblings, 2 replies; 7+ messages in thread
From: Paolo Bonzini @ 2017-07-27 13:54 UTC (permalink / raw)
  To: linux-kernel, kvm; +Cc: dmatlack

Since the current implementation of VMCS12 does a memcpy in and out
of guest memory, we do not need current_vmcs12 and current_vmcs12_page
anymore.  current_vmptr is enough to read and write the VMCS12.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx.c | 23 ++++++-----------------
 1 file changed, 6 insertions(+), 17 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index b37161808352..142f16ebdca2 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -416,9 +416,6 @@ struct nested_vmx {
 
 	/* The guest-physical address of the current VMCS L1 keeps for L2 */
 	gpa_t current_vmptr;
-	/* The host-usable pointer to the above */
-	struct page *current_vmcs12_page;
-	struct vmcs12 *current_vmcs12;
 	/*
 	 * Cache of the guest's VMCS, existing outside of guest memory.
 	 * Loaded from guest memory during VMPTRLD. Flushed to guest
@@ -7183,10 +7180,6 @@ static inline void nested_release_vmcs12(struct vcpu_vmx *vmx)
 	if (vmx->nested.current_vmptr == -1ull)
 		return;
 
-	/* current_vmptr and current_vmcs12 are always set/reset together */
-	if (WARN_ON(vmx->nested.current_vmcs12 == NULL))
-		return;
-
 	if (enable_shadow_vmcs) {
 		/* copy to memory all shadowed fields in case
 		   they were modified */
@@ -7199,13 +7192,11 @@ static inline void nested_release_vmcs12(struct vcpu_vmx *vmx)
 	vmx->nested.posted_intr_nv = -1;
 
 	/* Flush VMCS12 to guest memory */
-	memcpy(vmx->nested.current_vmcs12, vmx->nested.cached_vmcs12,
-	       VMCS12_SIZE);
+	kvm_vcpu_write_guest_page(&vmx->vcpu,
+				  vmx->nested.current_vmptr >> PAGE_SHIFT,
+				  vmx->nested.cached_vmcs12, 0, VMCS12_SIZE);
 
-	kunmap(vmx->nested.current_vmcs12_page);
-	nested_release_page(vmx->nested.current_vmcs12_page);
 	vmx->nested.current_vmptr = -1ull;
-	vmx->nested.current_vmcs12 = NULL;
 }
 
 /*
@@ -7623,14 +7614,13 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
 		}
 
 		nested_release_vmcs12(vmx);
-		vmx->nested.current_vmcs12 = new_vmcs12;
-		vmx->nested.current_vmcs12_page = page;
 		/*
 		 * Load VMCS12 from guest memory since it is not already
 		 * cached.
 		 */
-		memcpy(vmx->nested.cached_vmcs12,
-		       vmx->nested.current_vmcs12, VMCS12_SIZE);
+		memcpy(vmx->nested.cached_vmcs12, new_vmcs12, VMCS12_SIZE);
+		kunmap(page);
+
 		set_current_vmptr(vmx, vmptr);
 	}
 
@@ -9354,7 +9344,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 
 	vmx->nested.posted_intr_nv = -1;
 	vmx->nested.current_vmptr = -1ull;
-	vmx->nested.current_vmcs12 = NULL;
 
 	vmx->msr_ia32_feature_control_valid_bits = FEATURE_CONTROL_LOCKED;
 
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH] KVM: nVMX: do not pin the VMCS12
  2017-07-27 13:54 [PATCH] KVM: nVMX: do not pin the VMCS12 Paolo Bonzini
@ 2017-07-27 17:20 ` David Matlack
  2017-07-28  1:28   ` Wanpeng Li
                     ` (2 more replies)
  2017-07-27 17:54 ` David Hildenbrand
  1 sibling, 3 replies; 7+ messages in thread
From: David Matlack @ 2017-07-27 17:20 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: linux-kernel, kvm list, Jim Mattson

On Thu, Jul 27, 2017 at 6:54 AM, Paolo Bonzini <pbonzini@redhat.com> wrote:
> Since the current implementation of VMCS12 does a memcpy in and out
> of guest memory, we do not need current_vmcs12 and current_vmcs12_page
> anymore.  current_vmptr is enough to read and write the VMCS12.

This patch also fixes dirty tracking (memslot->dirty_bitmap) of the
VMCS12 page by using kvm_write_guest. nested_release_page() only marks
the struct page dirty.

>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  arch/x86/kvm/vmx.c | 23 ++++++-----------------
>  1 file changed, 6 insertions(+), 17 deletions(-)
>
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index b37161808352..142f16ebdca2 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -416,9 +416,6 @@ struct nested_vmx {
>
>         /* The guest-physical address of the current VMCS L1 keeps for L2 */
>         gpa_t current_vmptr;
> -       /* The host-usable pointer to the above */
> -       struct page *current_vmcs12_page;
> -       struct vmcs12 *current_vmcs12;
>         /*
>          * Cache of the guest's VMCS, existing outside of guest memory.
>          * Loaded from guest memory during VMPTRLD. Flushed to guest
> @@ -7183,10 +7180,6 @@ static inline void nested_release_vmcs12(struct vcpu_vmx *vmx)
>         if (vmx->nested.current_vmptr == -1ull)
>                 return;
>
> -       /* current_vmptr and current_vmcs12 are always set/reset together */
> -       if (WARN_ON(vmx->nested.current_vmcs12 == NULL))
> -               return;
> -
>         if (enable_shadow_vmcs) {
>                 /* copy to memory all shadowed fields in case
>                    they were modified */
> @@ -7199,13 +7192,11 @@ static inline void nested_release_vmcs12(struct vcpu_vmx *vmx)
>         vmx->nested.posted_intr_nv = -1;
>
>         /* Flush VMCS12 to guest memory */
> -       memcpy(vmx->nested.current_vmcs12, vmx->nested.cached_vmcs12,
> -              VMCS12_SIZE);
> +       kvm_vcpu_write_guest_page(&vmx->vcpu,
> +                                 vmx->nested.current_vmptr >> PAGE_SHIFT,
> +                                 vmx->nested.cached_vmcs12, 0, VMCS12_SIZE);

Have you hit any "suspicious RCU usage" error messages during VM
teardown with this patch? We did when we replaced memcpy with
kvm_write_guest a while back. IIRC it was due to kvm->srcu not being
held in one of the teardown paths. kvm_write_guest() expects it to be
held in order to access memslots.

We fixed this by skipping the VMCS12 flush during VMXOFF. I'll send
that patch along with a few other nVMX dirty tracking related patches
I've been meaning to get upstreamed.

>
> -       kunmap(vmx->nested.current_vmcs12_page);
> -       nested_release_page(vmx->nested.current_vmcs12_page);
>         vmx->nested.current_vmptr = -1ull;
> -       vmx->nested.current_vmcs12 = NULL;
>  }
>
>  /*
> @@ -7623,14 +7614,13 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
>                 }
>
>                 nested_release_vmcs12(vmx);
> -               vmx->nested.current_vmcs12 = new_vmcs12;
> -               vmx->nested.current_vmcs12_page = page;
>                 /*
>                  * Load VMCS12 from guest memory since it is not already
>                  * cached.
>                  */
> -               memcpy(vmx->nested.cached_vmcs12,
> -                      vmx->nested.current_vmcs12, VMCS12_SIZE);
> +               memcpy(vmx->nested.cached_vmcs12, new_vmcs12, VMCS12_SIZE);
> +               kunmap(page);

+ nested_release_page_clean(page);

> +
>                 set_current_vmptr(vmx, vmptr);
>         }
>
> @@ -9354,7 +9344,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
>
>         vmx->nested.posted_intr_nv = -1;
>         vmx->nested.current_vmptr = -1ull;
> -       vmx->nested.current_vmcs12 = NULL;
>
>         vmx->msr_ia32_feature_control_valid_bits = FEATURE_CONTROL_LOCKED;
>
> --
> 1.8.3.1
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] KVM: nVMX: do not pin the VMCS12
  2017-07-27 13:54 [PATCH] KVM: nVMX: do not pin the VMCS12 Paolo Bonzini
  2017-07-27 17:20 ` David Matlack
@ 2017-07-27 17:54 ` David Hildenbrand
  1 sibling, 0 replies; 7+ messages in thread
From: David Hildenbrand @ 2017-07-27 17:54 UTC (permalink / raw)
  To: Paolo Bonzini, linux-kernel, kvm; +Cc: dmatlack

On 27.07.2017 15:54, Paolo Bonzini wrote:
> Since the current implementation of VMCS12 does a memcpy in and out
> of guest memory, we do not need current_vmcs12 and current_vmcs12_page
> anymore.  current_vmptr is enough to read and write the VMCS12.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

This looks like the right thing to do!

(and as mentioned, also properly marks the page as dirty)

Reviewed-by: David Hildenbrand <david@redhat.com>

-- 

Thanks,

David

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] KVM: nVMX: do not pin the VMCS12
  2017-07-27 17:20 ` David Matlack
@ 2017-07-28  1:28   ` Wanpeng Li
  2017-07-28  6:57   ` Paolo Bonzini
  2017-08-02 20:36   ` Radim Krčmář
  2 siblings, 0 replies; 7+ messages in thread
From: Wanpeng Li @ 2017-07-28  1:28 UTC (permalink / raw)
  To: David Matlack; +Cc: Paolo Bonzini, linux-kernel, kvm list, Jim Mattson

2017-07-28 1:20 GMT+08:00 David Matlack <dmatlack@google.com>:
> On Thu, Jul 27, 2017 at 6:54 AM, Paolo Bonzini <pbonzini@redhat.com> wrote:
>> Since the current implementation of VMCS12 does a memcpy in and out
>> of guest memory, we do not need current_vmcs12 and current_vmcs12_page
>> anymore.  current_vmptr is enough to read and write the VMCS12.
>
> This patch also fixes dirty tracking (memslot->dirty_bitmap) of the
> VMCS12 page by using kvm_write_guest. nested_release_page() only marks
> the struct page dirty.
>
>>
>> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
>> ---
>>  arch/x86/kvm/vmx.c | 23 ++++++-----------------
>>  1 file changed, 6 insertions(+), 17 deletions(-)
>>
>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>> index b37161808352..142f16ebdca2 100644
>> --- a/arch/x86/kvm/vmx.c
>> +++ b/arch/x86/kvm/vmx.c
>> @@ -416,9 +416,6 @@ struct nested_vmx {
>>
>>         /* The guest-physical address of the current VMCS L1 keeps for L2 */
>>         gpa_t current_vmptr;
>> -       /* The host-usable pointer to the above */
>> -       struct page *current_vmcs12_page;
>> -       struct vmcs12 *current_vmcs12;
>>         /*
>>          * Cache of the guest's VMCS, existing outside of guest memory.
>>          * Loaded from guest memory during VMPTRLD. Flushed to guest
>> @@ -7183,10 +7180,6 @@ static inline void nested_release_vmcs12(struct vcpu_vmx *vmx)
>>         if (vmx->nested.current_vmptr == -1ull)
>>                 return;
>>
>> -       /* current_vmptr and current_vmcs12 are always set/reset together */
>> -       if (WARN_ON(vmx->nested.current_vmcs12 == NULL))
>> -               return;
>> -
>>         if (enable_shadow_vmcs) {
>>                 /* copy to memory all shadowed fields in case
>>                    they were modified */
>> @@ -7199,13 +7192,11 @@ static inline void nested_release_vmcs12(struct vcpu_vmx *vmx)
>>         vmx->nested.posted_intr_nv = -1;
>>
>>         /* Flush VMCS12 to guest memory */
>> -       memcpy(vmx->nested.current_vmcs12, vmx->nested.cached_vmcs12,
>> -              VMCS12_SIZE);
>> +       kvm_vcpu_write_guest_page(&vmx->vcpu,
>> +                                 vmx->nested.current_vmptr >> PAGE_SHIFT,
>> +                                 vmx->nested.cached_vmcs12, 0, VMCS12_SIZE);
>
> Have you hit any "suspicious RCU usage" error messages during VM

Yeah, I observe this splat when testing Paolo's patch today.

[87214.855344] =============================
[87214.855346] WARNING: suspicious RCU usage
[87214.855348] 4.13.0-rc2+ #2 Tainted: G           OE
[87214.855350] -----------------------------
[87214.855352] ./include/linux/kvm_host.h:573 suspicious
rcu_dereference_check() usage!
[87214.855353]
other info that might help us debug this:

[87214.855355]
rcu_scheduler_active = 2, debug_locks = 1
[87214.855357] 1 lock held by qemu-system-x86/17059:
[87214.855359]  #0:  (&vcpu->mutex){+.+.+.}, at: [<ffffffffc051bb12>]
vcpu_load+0x22/0x80 [kvm]
[87214.855396]
stack backtrace:
[87214.855399] CPU: 3 PID: 17059 Comm: qemu-system-x86 Tainted: G
     OE   4.13.0-rc2+ #2
[87214.855401] Hardware name: LENOVO ThinkCentre M8500t-N000/SHARKBAY,
BIOS FBKTC1AUS 02/16/2016
[87214.855403] Call Trace:
[87214.855408]  dump_stack+0x99/0xce
[87214.855413]  lockdep_rcu_suspicious+0xc5/0x100
[87214.855423]  kvm_vcpu_gfn_to_memslot+0x166/0x180 [kvm]
[87214.855432]  kvm_vcpu_write_guest_page+0x24/0x50 [kvm]
[87214.855438]  free_nested.part.76+0x76/0x270 [kvm_intel]
[87214.855443]  vmx_free_vcpu+0x7a/0xc0 [kvm_intel]
[87214.855454]  kvm_arch_destroy_vm+0x104/0x1d0 [kvm]
[87214.855463]  kvm_put_kvm+0x17a/0x2b0 [kvm]
[87214.855473]  kvm_vm_release+0x21/0x30 [kvm]
[87214.855477]  __fput+0xfb/0x240
[87214.855482]  ____fput+0xe/0x10
[87214.855485]  task_work_run+0x7e/0xb0
[87214.855490]  do_exit+0x323/0xcf0
[87214.855494]  ? get_signal+0x318/0x930
[87214.855498]  ? _raw_spin_unlock_irq+0x2c/0x60
[87214.855503]  do_group_exit+0x50/0xd0
[87214.855507]  get_signal+0x24f/0x930
[87214.855514]  do_signal+0x37/0x750
[87214.855518]  ? __might_fault+0x3e/0x90
[87214.855523]  ? __might_fault+0x85/0x90
[87214.855527]  ? exit_to_usermode_loop+0x2b/0x100
[87214.855531]  ? __this_cpu_preempt_check+0x13/0x20
[87214.855535]  exit_to_usermode_loop+0xab/0x100
[87214.855539]  syscall_return_slowpath+0x153/0x160
[87214.855542]  entry_SYSCALL_64_fastpath+0xc0/0xc2
[87214.855545] RIP: 0033:0x7ff40d24a26d


Regards,
Wanpeng Li

> teardown with this patch? We did when we replaced memcpy with
> kvm_write_guest a while back. IIRC it was due to kvm->srcu not being
> held in one of the teardown paths. kvm_write_guest() expects it to be
> held in order to access memslots.
>
> We fixed this by skipping the VMCS12 flush during VMXOFF. I'll send
> that patch along with a few other nVMX dirty tracking related patches
> I've been meaning to get upstreamed.
>
>>
>> -       kunmap(vmx->nested.current_vmcs12_page);
>> -       nested_release_page(vmx->nested.current_vmcs12_page);
>>         vmx->nested.current_vmptr = -1ull;
>> -       vmx->nested.current_vmcs12 = NULL;
>>  }
>>
>>  /*
>> @@ -7623,14 +7614,13 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
>>                 }
>>
>>                 nested_release_vmcs12(vmx);
>> -               vmx->nested.current_vmcs12 = new_vmcs12;
>> -               vmx->nested.current_vmcs12_page = page;
>>                 /*
>>                  * Load VMCS12 from guest memory since it is not already
>>                  * cached.
>>                  */
>> -               memcpy(vmx->nested.cached_vmcs12,
>> -                      vmx->nested.current_vmcs12, VMCS12_SIZE);
>> +               memcpy(vmx->nested.cached_vmcs12, new_vmcs12, VMCS12_SIZE);
>> +               kunmap(page);
>
> + nested_release_page_clean(page);
>
>> +
>>                 set_current_vmptr(vmx, vmptr);
>>         }
>>
>> @@ -9354,7 +9344,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
>>
>>         vmx->nested.posted_intr_nv = -1;
>>         vmx->nested.current_vmptr = -1ull;
>> -       vmx->nested.current_vmcs12 = NULL;
>>
>>         vmx->msr_ia32_feature_control_valid_bits = FEATURE_CONTROL_LOCKED;
>>
>> --
>> 1.8.3.1
>>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] KVM: nVMX: do not pin the VMCS12
  2017-07-27 17:20 ` David Matlack
  2017-07-28  1:28   ` Wanpeng Li
@ 2017-07-28  6:57   ` Paolo Bonzini
  2017-07-28  7:29     ` Christian Borntraeger
  2017-08-02 20:36   ` Radim Krčmář
  2 siblings, 1 reply; 7+ messages in thread
From: Paolo Bonzini @ 2017-07-28  6:57 UTC (permalink / raw)
  To: David Matlack; +Cc: linux-kernel, kvm list, Jim Mattson, Christian Borntraeger

On 27/07/2017 19:20, David Matlack wrote:
>> +       kvm_vcpu_write_guest_page(&vmx->vcpu,
>> +                                 vmx->nested.current_vmptr >> PAGE_SHIFT,
>> +                                 vmx->nested.cached_vmcs12, 0, VMCS12_SIZE);
> Have you hit any "suspicious RCU usage" error messages during VM
> teardown with this patch? We did when we replaced memcpy with
> kvm_write_guest a while back. IIRC it was due to kvm->srcu not being
> held in one of the teardown paths. kvm_write_guest() expects it to be
> held in order to access memslots.
> 
> We fixed this by skipping the VMCS12 flush during VMXOFF. I'll send
> that patch along with a few other nVMX dirty tracking related patches
> I've been meaning to get upstreamed.

Oh, right.  I had this other (untested) patch in the queue after
Christian recently annotated everything with RCU checks:

Paolo

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 890b706d1943..07e3b02a1be3 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -477,7 +477,8 @@ struct kvm {
 static inline struct kvm_io_bus *kvm_get_bus(struct kvm *kvm, enum kvm_bus idx)
 {
 	return srcu_dereference_check(kvm->buses[idx], &kvm->srcu,
-				      lockdep_is_held(&kvm->slots_lock));
+				      lockdep_is_held(&kvm->slots_lock) ||
+				      !refcount_read(&kvm->users_count));
 }
 
 static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
@@ -570,7 +571,8 @@ void kvm_put_kvm(struct kvm *kvm);
 static inline struct kvm_memslots *__kvm_memslots(struct kvm *kvm, int as_id)
 {
 	return srcu_dereference_check(kvm->memslots[as_id], &kvm->srcu,
-			lockdep_is_held(&kvm->slots_lock));
+			lockdep_is_held(&kvm->slots_lock) ||
+			!refcount_read(&kvm->users_count));
 }
 
 static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index f3f74271f1a9..6a21c98b22bf 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -655,7 +655,6 @@ static struct kvm *kvm_create_vm(unsigned long type)
 	mutex_init(&kvm->lock);
 	mutex_init(&kvm->irq_lock);
 	mutex_init(&kvm->slots_lock);
-	refcount_set(&kvm->users_count, 1);
 	INIT_LIST_HEAD(&kvm->devices);
 
 	r = kvm_arch_init_vm(kvm, type);
@@ -701,6 +700,7 @@ static struct kvm *kvm_create_vm(unsigned long type)
 	if (r)
 		goto out_err;
 
+	refcount_set(&kvm->users_count, 1);
 	spin_lock(&kvm_lock);
 	list_add(&kvm->vm_list, &vm_list);
 	spin_unlock(&kvm_lock);
@@ -717,10 +717,9 @@ static struct kvm *kvm_create_vm(unsigned long type)
 	hardware_disable_all();
 out_err_no_disable:
 	for (i = 0; i < KVM_NR_BUSES; i++)
-		kfree(rcu_access_pointer(kvm->buses[i]));
+		kfree(kvm_get_bus(kvm, i));
 	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
-		kvm_free_memslots(kvm,
-			rcu_dereference_protected(kvm->memslots[i], 1));
+		kvm_free_memslots(kvm, __kvm_memslots(kvm, i));
 	kvm_arch_free_vm(kvm);
 	mmdrop(current->mm);
 	return ERR_PTR(r);
@@ -754,9 +754,8 @@ static void kvm_destroy_vm(struct kvm *kvm)
 	spin_unlock(&kvm_lock);
 	kvm_free_irq_routing(kvm);
 	for (i = 0; i < KVM_NR_BUSES; i++) {
-		struct kvm_io_bus *bus;
+		struct kvm_io_bus *bus = kvm_get_bus(kvm, i);
 
-		bus = rcu_dereference_protected(kvm->buses[i], 1);
 		if (bus)
 			kvm_io_bus_destroy(bus);
 		kvm->buses[i] = NULL;
@@ -770,8 +769,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
 	kvm_arch_destroy_vm(kvm);
 	kvm_destroy_devices(kvm);
 	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
-		kvm_free_memslots(kvm,
-			rcu_dereference_protected(kvm->memslots[i], 1));
+		kvm_free_memslots(kvm, __kvm_memslots(kvm, i));
 	cleanup_srcu_struct(&kvm->irq_srcu);
 	cleanup_srcu_struct(&kvm->srcu);
 	kvm_arch_free_vm(kvm);

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH] KVM: nVMX: do not pin the VMCS12
  2017-07-28  6:57   ` Paolo Bonzini
@ 2017-07-28  7:29     ` Christian Borntraeger
  0 siblings, 0 replies; 7+ messages in thread
From: Christian Borntraeger @ 2017-07-28  7:29 UTC (permalink / raw)
  To: Paolo Bonzini, David Matlack; +Cc: linux-kernel, kvm list, Jim Mattson



On 07/28/2017 08:57 AM, Paolo Bonzini wrote:
> On 27/07/2017 19:20, David Matlack wrote:
>>> +       kvm_vcpu_write_guest_page(&vmx->vcpu,
>>> +                                 vmx->nested.current_vmptr >> PAGE_SHIFT,
>>> +                                 vmx->nested.cached_vmcs12, 0, VMCS12_SIZE);
>> Have you hit any "suspicious RCU usage" error messages during VM
>> teardown with this patch? We did when we replaced memcpy with
>> kvm_write_guest a while back. IIRC it was due to kvm->srcu not being
>> held in one of the teardown paths. kvm_write_guest() expects it to be
>> held in order to access memslots.
>>
>> We fixed this by skipping the VMCS12 flush during VMXOFF. I'll send
>> that patch along with a few other nVMX dirty tracking related patches
>> I've been meaning to get upstreamed.
> 
> Oh, right.  I had this other (untested) patch in the queue after
> Christian recently annotated everything with RCU checks:
> 

So you make the checks not trigger for users_count == 0 to cope with
the teardown pathes?
Since for users_count==0 all file descriptors are gone, no
memslot/bus can be changed by userspace so this makes sense.


> Paolo
> 
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 890b706d1943..07e3b02a1be3 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -477,7 +477,8 @@ struct kvm {
>  static inline struct kvm_io_bus *kvm_get_bus(struct kvm *kvm, enum kvm_bus idx)
>  {
>  	return srcu_dereference_check(kvm->buses[idx], &kvm->srcu,
> -				      lockdep_is_held(&kvm->slots_lock));
> +				      lockdep_is_held(&kvm->slots_lock) ||
> +				      !refcount_read(&kvm->users_count));
>  }
> 
>  static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
> @@ -570,7 +571,8 @@ void kvm_put_kvm(struct kvm *kvm);
>  static inline struct kvm_memslots *__kvm_memslots(struct kvm *kvm, int as_id)
>  {
>  	return srcu_dereference_check(kvm->memslots[as_id], &kvm->srcu,
> -			lockdep_is_held(&kvm->slots_lock));
> +			lockdep_is_held(&kvm->slots_lock) ||
> +			!refcount_read(&kvm->users_count));
>  }
> 
>  static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm)
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index f3f74271f1a9..6a21c98b22bf 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -655,7 +655,6 @@ static struct kvm *kvm_create_vm(unsigned long type)
>  	mutex_init(&kvm->lock);
>  	mutex_init(&kvm->irq_lock);
>  	mutex_init(&kvm->slots_lock);
> -	refcount_set(&kvm->users_count, 1);
>  	INIT_LIST_HEAD(&kvm->devices);
> 
>  	r = kvm_arch_init_vm(kvm, type);
> @@ -701,6 +700,7 @@ static struct kvm *kvm_create_vm(unsigned long type)
>  	if (r)
>  		goto out_err;
> 
> +	refcount_set(&kvm->users_count, 1);
>  	spin_lock(&kvm_lock);
>  	list_add(&kvm->vm_list, &vm_list);
>  	spin_unlock(&kvm_lock);
> @@ -717,10 +717,9 @@ static struct kvm *kvm_create_vm(unsigned long type)
>  	hardware_disable_all();
>  out_err_no_disable:
>  	for (i = 0; i < KVM_NR_BUSES; i++)
> -		kfree(rcu_access_pointer(kvm->buses[i]));
> +		kfree(kvm_get_bus(kvm, i));
>  	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
> -		kvm_free_memslots(kvm,
> -			rcu_dereference_protected(kvm->memslots[i], 1));
> +		kvm_free_memslots(kvm, __kvm_memslots(kvm, i));
>  	kvm_arch_free_vm(kvm);
>  	mmdrop(current->mm);
>  	return ERR_PTR(r);
> @@ -754,9 +754,8 @@ static void kvm_destroy_vm(struct kvm *kvm)
>  	spin_unlock(&kvm_lock);
>  	kvm_free_irq_routing(kvm);
>  	for (i = 0; i < KVM_NR_BUSES; i++) {
> -		struct kvm_io_bus *bus;
> +		struct kvm_io_bus *bus = kvm_get_bus(kvm, i);
> 
> -		bus = rcu_dereference_protected(kvm->buses[i], 1);
>  		if (bus)
>  			kvm_io_bus_destroy(bus);
>  		kvm->buses[i] = NULL;
> @@ -770,8 +769,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
>  	kvm_arch_destroy_vm(kvm);
>  	kvm_destroy_devices(kvm);
>  	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
> -		kvm_free_memslots(kvm,
> -			rcu_dereference_protected(kvm->memslots[i], 1));
> +		kvm_free_memslots(kvm, __kvm_memslots(kvm, i));
>  	cleanup_srcu_struct(&kvm->irq_srcu);
>  	cleanup_srcu_struct(&kvm->srcu);
>  	kvm_arch_free_vm(kvm);
> 

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] KVM: nVMX: do not pin the VMCS12
  2017-07-27 17:20 ` David Matlack
  2017-07-28  1:28   ` Wanpeng Li
  2017-07-28  6:57   ` Paolo Bonzini
@ 2017-08-02 20:36   ` Radim Krčmář
  2 siblings, 0 replies; 7+ messages in thread
From: Radim Krčmář @ 2017-08-02 20:36 UTC (permalink / raw)
  To: David Matlack; +Cc: Paolo Bonzini, linux-kernel, kvm list, Jim Mattson

2017-07-27 10:20-0700, David Matlack:
> On Thu, Jul 27, 2017 at 6:54 AM, Paolo Bonzini <pbonzini@redhat.com> wrote:
> > Since the current implementation of VMCS12 does a memcpy in and out
> > of guest memory, we do not need current_vmcs12 and current_vmcs12_page
> > anymore.  current_vmptr is enough to read and write the VMCS12.
> 
> This patch also fixes dirty tracking (memslot->dirty_bitmap) of the
> VMCS12 page by using kvm_write_guest. nested_release_page() only marks
> the struct page dirty.
> 
> >
> > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> > ---
> > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> > @@ -7623,14 +7614,13 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
> >                 }
> >
> >                 nested_release_vmcs12(vmx);
> > -               vmx->nested.current_vmcs12 = new_vmcs12;
> > -               vmx->nested.current_vmcs12_page = page;
> >                 /*
> >                  * Load VMCS12 from guest memory since it is not already
> >                  * cached.
> >                  */
> > -               memcpy(vmx->nested.cached_vmcs12,
> > -                      vmx->nested.current_vmcs12, VMCS12_SIZE);
> > +               memcpy(vmx->nested.cached_vmcs12, new_vmcs12, VMCS12_SIZE);
> > +               kunmap(page);
> 
> + nested_release_page_clean(page);

Added this and your note about the dirty bit when applying,

thanks.

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2017-08-02 20:36 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-07-27 13:54 [PATCH] KVM: nVMX: do not pin the VMCS12 Paolo Bonzini
2017-07-27 17:20 ` David Matlack
2017-07-28  1:28   ` Wanpeng Li
2017-07-28  6:57   ` Paolo Bonzini
2017-07-28  7:29     ` Christian Borntraeger
2017-08-02 20:36   ` Radim Krčmář
2017-07-27 17:54 ` David Hildenbrand

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).