[v2,25/28] KVM: x86/mmu: Allow zapping collapsible SPTEs to use MMU read lock
diff mbox series

Message ID 20210202185734.1680553-26-bgardon@google.com
State New, archived
Headers show
Series
  • Allow parallel MMU operations with TDP MMU
Related show

Commit Message

Ben Gardon Feb. 2, 2021, 6:57 p.m. UTC
To speed the process of disabling dirty logging, change the TDP MMU
function which zaps collapsible SPTEs to run under the MMU read lock.

Signed-off-by: Ben Gardon <bgardon@google.com>
---
 arch/x86/kvm/mmu/mmu.c     |  5 ++---
 arch/x86/kvm/mmu/tdp_mmu.c | 22 +++++++++++++++-------
 2 files changed, 17 insertions(+), 10 deletions(-)

Comments

Paolo Bonzini Feb. 3, 2021, 11:34 a.m. UTC | #1
On 02/02/21 19:57, Ben Gardon wrote:
> @@ -1485,7 +1489,9 @@ void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
>  	struct kvm_mmu_page *root;
>  	int root_as_id;
>  
> -	for_each_tdp_mmu_root_yield_safe(kvm, root, false) {
> +	read_lock(&kvm->mmu_lock);
> +
> +	for_each_tdp_mmu_root_yield_safe(kvm, root, true) {
>  		root_as_id = kvm_mmu_page_as_id(root);
>  		if (root_as_id != slot->as_id)
>  			continue;
> @@ -1493,6 +1499,8 @@ void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
>  		zap_collapsible_spte_range(kvm, root, slot->base_gfn,
>  					   slot->base_gfn + slot->npages);
>  	}
> +
> +	read_unlock(&kvm->mmu_lock);
>  }


I'd prefer the functions to be consistent about who takes the lock, 
either mmu.c or tdp_mmu.c.  Since everywhere else you're doing it in 
mmu.c, that would be:

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 0554d9c5c5d4..386ee4b703d9 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -5567,10 +5567,13 @@ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
  	write_lock(&kvm->mmu_lock);
  	slot_handle_leaf(kvm, (struct kvm_memory_slot *)memslot,
  			 kvm_mmu_zap_collapsible_spte, true);
+	write_unlock(&kvm->mmu_lock);

-	if (kvm->arch.tdp_mmu_enabled)
+	if (kvm->arch.tdp_mmu_enabled) {
+		read_lock(&kvm->mmu_lock);
  		kvm_tdp_mmu_zap_collapsible_sptes(kvm, memslot);
-	write_unlock(&kvm->mmu_lock);
+		read_unlock(&kvm->mmu_lock);
+	}
  }

  void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,

and just lockdep_assert_held_read here.

> -		tdp_mmu_set_spte(kvm, &iter, 0);
> -
> -		spte_set = true;

Is it correct to remove this assignment?

Paolo
Ben Gardon Feb. 3, 2021, 6:51 p.m. UTC | #2
On Wed, Feb 3, 2021 at 3:34 AM Paolo Bonzini <pbonzini@redhat.com> wrote:
>
> On 02/02/21 19:57, Ben Gardon wrote:
> > @@ -1485,7 +1489,9 @@ void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
> >       struct kvm_mmu_page *root;
> >       int root_as_id;
> >
> > -     for_each_tdp_mmu_root_yield_safe(kvm, root, false) {
> > +     read_lock(&kvm->mmu_lock);
> > +
> > +     for_each_tdp_mmu_root_yield_safe(kvm, root, true) {
> >               root_as_id = kvm_mmu_page_as_id(root);
> >               if (root_as_id != slot->as_id)
> >                       continue;
> > @@ -1493,6 +1499,8 @@ void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
> >               zap_collapsible_spte_range(kvm, root, slot->base_gfn,
> >                                          slot->base_gfn + slot->npages);
> >       }
> > +
> > +     read_unlock(&kvm->mmu_lock);
> >  }
>
>
> I'd prefer the functions to be consistent about who takes the lock,
> either mmu.c or tdp_mmu.c.  Since everywhere else you're doing it in
> mmu.c, that would be:
>
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index 0554d9c5c5d4..386ee4b703d9 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -5567,10 +5567,13 @@ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
>         write_lock(&kvm->mmu_lock);
>         slot_handle_leaf(kvm, (struct kvm_memory_slot *)memslot,
>                          kvm_mmu_zap_collapsible_spte, true);
> +       write_unlock(&kvm->mmu_lock);
>
> -       if (kvm->arch.tdp_mmu_enabled)
> +       if (kvm->arch.tdp_mmu_enabled) {
> +               read_lock(&kvm->mmu_lock);
>                 kvm_tdp_mmu_zap_collapsible_sptes(kvm, memslot);
> -       write_unlock(&kvm->mmu_lock);
> +               read_unlock(&kvm->mmu_lock);
> +       }
>   }
>
>   void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
>
> and just lockdep_assert_held_read here.

That makes sense to me, I agree keeping it consistent is probably a good idea.

>
> > -             tdp_mmu_set_spte(kvm, &iter, 0);
> > -
> > -             spte_set = true;
>
> Is it correct to remove this assignment?

No, it was not correct to remove it. Thank you for catching that.

>
> Paolo
>

Patch
diff mbox series

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 254ff87d2a61..e3cf868be6bd 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -5517,8 +5517,7 @@  void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
 						start, end - 1, true);
 		}
 	}
-
-	kvm_mmu_unlock(kvm);
+	write_unlock(&kvm->mmu_lock);
 
 	if (kvm->arch.tdp_mmu_enabled) {
 		read_lock(&kvm->mmu_lock);
@@ -5611,10 +5610,10 @@  void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
 	write_lock(&kvm->mmu_lock);
 	slot_handle_leaf(kvm, (struct kvm_memory_slot *)memslot,
 			 kvm_mmu_zap_collapsible_spte, true);
+	write_unlock(&kvm->mmu_lock);
 
 	if (kvm->arch.tdp_mmu_enabled)
 		kvm_tdp_mmu_zap_collapsible_sptes(kvm, memslot);
-	write_unlock(&kvm->mmu_lock);
 }
 
 void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index de26762433ea..cfe66b8d39fa 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -1451,10 +1451,9 @@  static void zap_collapsible_spte_range(struct kvm *kvm,
 	rcu_read_lock();
 
 	tdp_root_for_each_pte(iter, root, start, end) {
-		if (tdp_mmu_iter_cond_resched(kvm, &iter, false, false)) {
-			spte_set = false;
+retry:
+		if (tdp_mmu_iter_cond_resched(kvm, &iter, false, true))
 			continue;
-		}
 
 		if (!is_shadow_present_pte(iter.old_spte) ||
 		    !is_last_spte(iter.old_spte, iter.level))
@@ -1465,9 +1464,14 @@  static void zap_collapsible_spte_range(struct kvm *kvm,
 		    !PageTransCompoundMap(pfn_to_page(pfn)))
 			continue;
 
-		tdp_mmu_set_spte(kvm, &iter, 0);
-
-		spte_set = true;
+		if (!tdp_mmu_zap_spte_atomic(kvm, &iter)) {
+			/*
+			 * The iter must explicitly re-read the SPTE because
+			 * the atomic cmpxchg failed.
+			 */
+			iter.old_spte = READ_ONCE(*rcu_dereference(iter.sptep));
+			goto retry;
+		}
 	}
 
 	rcu_read_unlock();
@@ -1485,7 +1489,9 @@  void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
 	struct kvm_mmu_page *root;
 	int root_as_id;
 
-	for_each_tdp_mmu_root_yield_safe(kvm, root, false) {
+	read_lock(&kvm->mmu_lock);
+
+	for_each_tdp_mmu_root_yield_safe(kvm, root, true) {
 		root_as_id = kvm_mmu_page_as_id(root);
 		if (root_as_id != slot->as_id)
 			continue;
@@ -1493,6 +1499,8 @@  void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
 		zap_collapsible_spte_range(kvm, root, slot->base_gfn,
 					   slot->base_gfn + slot->npages);
 	}
+
+	read_unlock(&kvm->mmu_lock);
 }
 
 /*