linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Ben Gardon <bgardon@google.com>
To: linux-kernel@vger.kernel.org, kvm@vger.kernel.org
Cc: Paolo Bonzini <pbonzini@redhat.com>, Peter Xu <peterx@redhat.com>,
	Sean Christopherson <seanjc@google.com>,
	Peter Shier <pshier@google.com>,
	Peter Feiner <pfeiner@google.com>,
	Junaid Shahid <junaids@google.com>,
	Jim Mattson <jmattson@google.com>,
	Yulei Zhang <yulei.kernel@gmail.com>,
	Wanpeng Li <kernellwp@gmail.com>,
	Vitaly Kuznetsov <vkuznets@redhat.com>,
	Xiao Guangrong <xiaoguangrong.eric@gmail.com>,
	Ben Gardon <bgardon@google.com>
Subject: [PATCH 05/24] kvm: x86/mmu: Fix yielding in TDP MMU
Date: Tue, 12 Jan 2021 10:10:22 -0800	[thread overview]
Message-ID: <20210112181041.356734-6-bgardon@google.com> (raw)
In-Reply-To: <20210112181041.356734-1-bgardon@google.com>

There are two problems with the way the TDP MMU yields in long running
functions. 1.) Given certain conditions, the function may not yield
reliably / frequently enough. 2.) In some functions the TDP iter risks
not making forward progress if two threads livelock yielding to
one another.

Case 1 is possible if for example, a paging structure was very large
but had few, if any writable entries. wrprot_gfn_range could traverse many
entries before finding a writable entry and yielding.

Case 2 is possible if two threads were trying to execute wrprot_gfn_range.
Each could write protect an entry and then yield. This would reset the
tdp_iter's walk over the paging structure and the loop would end up
repeating the same entry over and over, preventing either thread from
making forward progress.

Fix these issues by moving the yield to the beginning of the loop,
before other checks and only yielding if the loop has made forward
progress since the last yield.

Fixes: a6a0b05da9f3 ("kvm: x86/mmu: Support dirty logging for the TDP MMU")
Reviewed-by: Peter Feiner <pfeiner@google.com>

Signed-off-by: Ben Gardon <bgardon@google.com>
---
 arch/x86/kvm/mmu/tdp_mmu.c | 83 +++++++++++++++++++++++++++++++-------
 1 file changed, 69 insertions(+), 14 deletions(-)

diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index b2784514ca2d..1987da0da66e 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -470,9 +470,23 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
 			  gfn_t start, gfn_t end, bool can_yield)
 {
 	struct tdp_iter iter;
+	gfn_t last_goal_gfn = start;
 	bool flush_needed = false;
 
 	tdp_root_for_each_pte(iter, root, start, end) {
+		/* Ensure forward progress has been made before yielding. */
+		if (can_yield && iter.goal_gfn != last_goal_gfn &&
+		    tdp_mmu_iter_flush_cond_resched(kvm, &iter)) {
+			last_goal_gfn = iter.goal_gfn;
+			flush_needed = false;
+			/*
+			 * Yielding caused the paging structure walk to be
+			 * reset so skip to the next iteration to continue the
+			 * walk from the root.
+			 */
+			continue;
+		}
+
 		if (!is_shadow_present_pte(iter.old_spte))
 			continue;
 
@@ -487,12 +501,7 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
 			continue;
 
 		tdp_mmu_set_spte(kvm, &iter, 0);
-
-		if (can_yield)
-			flush_needed = !tdp_mmu_iter_flush_cond_resched(kvm,
-									&iter);
-		else
-			flush_needed = true;
+		flush_needed = true;
 	}
 	return flush_needed;
 }
@@ -850,12 +859,25 @@ static bool wrprot_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
 {
 	struct tdp_iter iter;
 	u64 new_spte;
+	gfn_t last_goal_gfn = start;
 	bool spte_set = false;
 
 	BUG_ON(min_level > KVM_MAX_HUGEPAGE_LEVEL);
 
 	for_each_tdp_pte_min_level(iter, root->spt, root->role.level,
 				   min_level, start, end) {
+		/* Ensure forward progress has been made before yielding. */
+		if (iter.goal_gfn != last_goal_gfn &&
+		    tdp_mmu_iter_cond_resched(kvm, &iter)) {
+			last_goal_gfn = iter.goal_gfn;
+			/*
+			 * Yielding caused the paging structure walk to be
+			 * reset so skip to the next iteration to continue the
+			 * walk from the root.
+			 */
+			continue;
+		}
+
 		if (!is_shadow_present_pte(iter.old_spte) ||
 		    !is_last_spte(iter.old_spte, iter.level))
 			continue;
@@ -864,8 +886,6 @@ static bool wrprot_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
 
 		tdp_mmu_set_spte_no_dirty_log(kvm, &iter, new_spte);
 		spte_set = true;
-
-		tdp_mmu_iter_cond_resched(kvm, &iter);
 	}
 	return spte_set;
 }
@@ -906,9 +926,22 @@ static bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
 {
 	struct tdp_iter iter;
 	u64 new_spte;
+	gfn_t last_goal_gfn = start;
 	bool spte_set = false;
 
 	tdp_root_for_each_leaf_pte(iter, root, start, end) {
+		/* Ensure forward progress has been made before yielding. */
+		if (iter.goal_gfn != last_goal_gfn &&
+		    tdp_mmu_iter_cond_resched(kvm, &iter)) {
+			last_goal_gfn = iter.goal_gfn;
+			/*
+			 * Yielding caused the paging structure walk to be
+			 * reset so skip to the next iteration to continue the
+			 * walk from the root.
+			 */
+			continue;
+		}
+
 		if (spte_ad_need_write_protect(iter.old_spte)) {
 			if (is_writable_pte(iter.old_spte))
 				new_spte = iter.old_spte & ~PT_WRITABLE_MASK;
@@ -923,8 +956,6 @@ static bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
 
 		tdp_mmu_set_spte_no_dirty_log(kvm, &iter, new_spte);
 		spte_set = true;
-
-		tdp_mmu_iter_cond_resched(kvm, &iter);
 	}
 	return spte_set;
 }
@@ -1029,9 +1060,22 @@ static bool set_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
 {
 	struct tdp_iter iter;
 	u64 new_spte;
+	gfn_t last_goal_gfn = start;
 	bool spte_set = false;
 
 	tdp_root_for_each_pte(iter, root, start, end) {
+		/* Ensure forward progress has been made before yielding. */
+		if (iter.goal_gfn != last_goal_gfn &&
+		    tdp_mmu_iter_cond_resched(kvm, &iter)) {
+			last_goal_gfn = iter.goal_gfn;
+			/*
+			 * Yielding caused the paging structure walk to be
+			 * reset so skip to the next iteration to continue the
+			 * walk from the root.
+			 */
+			continue;
+		}
+
 		if (!is_shadow_present_pte(iter.old_spte))
 			continue;
 
@@ -1039,8 +1083,6 @@ static bool set_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
 
 		tdp_mmu_set_spte(kvm, &iter, new_spte);
 		spte_set = true;
-
-		tdp_mmu_iter_cond_resched(kvm, &iter);
 	}
 
 	return spte_set;
@@ -1078,9 +1120,23 @@ static void zap_collapsible_spte_range(struct kvm *kvm,
 {
 	struct tdp_iter iter;
 	kvm_pfn_t pfn;
+	gfn_t last_goal_gfn = start;
 	bool spte_set = false;
 
 	tdp_root_for_each_pte(iter, root, start, end) {
+		/* Ensure forward progress has been made before yielding. */
+		if (iter.goal_gfn != last_goal_gfn &&
+		    tdp_mmu_iter_flush_cond_resched(kvm, &iter)) {
+			last_goal_gfn = iter.goal_gfn;
+			spte_set = false;
+			/*
+			 * Yielding caused the paging structure walk to be
+			 * reset so skip to the next iteration to continue the
+			 * walk from the root.
+			 */
+			continue;
+		}
+
 		if (!is_shadow_present_pte(iter.old_spte) ||
 		    is_last_spte(iter.old_spte, iter.level))
 			continue;
@@ -1091,8 +1147,7 @@ static void zap_collapsible_spte_range(struct kvm *kvm,
 			continue;
 
 		tdp_mmu_set_spte(kvm, &iter, 0);
-
-		spte_set = !tdp_mmu_iter_flush_cond_resched(kvm, &iter);
+		spte_set = true;
 	}
 
 	if (spte_set)
-- 
2.30.0.284.gd98b1dd5eaa7-goog


  parent reply	other threads:[~2021-01-12 18:12 UTC|newest]

Thread overview: 72+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-01-12 18:10 [PATCH 00/24] Allow parallel page faults with TDP MMU Ben Gardon
2021-01-12 18:10 ` [PATCH 01/24] locking/rwlocks: Add contention detection for rwlocks Ben Gardon
2021-01-12 18:10 ` [PATCH 02/24] sched: Add needbreak " Ben Gardon
2021-01-12 18:10 ` [PATCH 03/24] sched: Add cond_resched_rwlock Ben Gardon
2021-01-12 18:10 ` [PATCH 04/24] kvm: x86/mmu: change TDP MMU yield function returns to match cond_resched Ben Gardon
2021-01-20 18:38   ` Sean Christopherson
2021-01-21 20:22     ` Paolo Bonzini
2021-01-26 14:11     ` Paolo Bonzini
2021-01-12 18:10 ` Ben Gardon [this message]
2021-01-20 19:28   ` [PATCH 05/24] kvm: x86/mmu: Fix yielding in TDP MMU Sean Christopherson
2021-01-22  1:06     ` Ben Gardon
2021-01-12 18:10 ` [PATCH 06/24] kvm: x86/mmu: Skip no-op changes in TDP MMU functions Ben Gardon
2021-01-20 19:51   ` Sean Christopherson
2021-01-25 23:51     ` Ben Gardon
2021-01-12 18:10 ` [PATCH 07/24] kvm: x86/mmu: Add comment on __tdp_mmu_set_spte Ben Gardon
2021-01-26 14:13   ` Paolo Bonzini
2021-01-12 18:10 ` [PATCH 08/24] kvm: x86/mmu: Add lockdep when setting a TDP MMU SPTE Ben Gardon
2021-01-20 19:58   ` Sean Christopherson
2021-01-26 14:13   ` Paolo Bonzini
2021-01-12 18:10 ` [PATCH 09/24] kvm: x86/mmu: Don't redundantly clear TDP MMU pt memory Ben Gardon
2021-01-20 20:06   ` Sean Christopherson
2021-01-26 14:14   ` Paolo Bonzini
2021-01-12 18:10 ` [PATCH 10/24] kvm: x86/mmu: Factor out handle disconnected pt Ben Gardon
2021-01-20 20:30   ` Sean Christopherson
2021-01-26 14:14   ` Paolo Bonzini
2021-01-12 18:10 ` [PATCH 11/24] kvm: x86/mmu: Put TDP MMU PT walks in RCU read-critical section Ben Gardon
2021-01-20 22:19   ` Sean Christopherson
2021-01-12 18:10 ` [PATCH 12/24] kvm: x86/kvm: RCU dereference tdp mmu page table links Ben Gardon
2021-01-22 18:32   ` Sean Christopherson
2021-01-26 18:17     ` Ben Gardon
2021-01-12 18:10 ` [PATCH 13/24] kvm: x86/mmu: Only free tdp_mmu pages after a grace period Ben Gardon
2021-01-12 18:10 ` [PATCH 14/24] kvm: mmu: Wrap mmu_lock lock / unlock in a function Ben Gardon
2021-01-13  2:35   ` kernel test robot
2021-01-12 18:10 ` [PATCH 15/24] kvm: mmu: Wrap mmu_lock cond_resched and needbreak Ben Gardon
2021-01-21  0:19   ` Sean Christopherson
2021-01-21 20:17     ` Paolo Bonzini
2021-01-26 14:38     ` Paolo Bonzini
2021-01-26 17:47       ` Ben Gardon
2021-01-26 17:55         ` Paolo Bonzini
2021-01-26 18:11           ` Ben Gardon
2021-01-26 20:47             ` Paolo Bonzini
2021-01-27 20:08               ` Ben Gardon
2021-01-27 20:55                 ` Paolo Bonzini
2021-01-27 21:20                   ` Ben Gardon
2021-01-28  8:18                     ` Paolo Bonzini
2021-01-12 18:10 ` [PATCH 16/24] kvm: mmu: Wrap mmu_lock assertions Ben Gardon
2021-01-26 14:29   ` Paolo Bonzini
2021-01-12 18:10 ` [PATCH 17/24] kvm: mmu: Move mmu_lock to struct kvm_arch Ben Gardon
2021-01-12 18:10 ` [PATCH 18/24] kvm: x86/mmu: Use an rwlock for the x86 TDP MMU Ben Gardon
2021-01-21  0:45   ` Sean Christopherson
2021-01-12 18:10 ` [PATCH 19/24] kvm: x86/mmu: Protect tdp_mmu_pages with a lock Ben Gardon
2021-01-21 19:22   ` Sean Christopherson
2021-01-21 21:32     ` Sean Christopherson
2021-01-26 14:27       ` Paolo Bonzini
2021-01-26 21:47         ` Ben Gardon
2021-01-26 22:02         ` Sean Christopherson
2021-01-26 22:09           ` Sean Christopherson
2021-01-27 12:40           ` Paolo Bonzini
2021-01-26 13:37   ` Paolo Bonzini
2021-01-26 21:07     ` Ben Gardon
2021-01-12 18:10 ` [PATCH 20/24] kvm: x86/mmu: Add atomic option for setting SPTEs Ben Gardon
2021-01-13  0:05   ` kernel test robot
2021-01-26 14:21   ` Paolo Bonzini
2021-01-12 18:10 ` [PATCH 21/24] kvm: x86/mmu: Use atomic ops to set SPTEs in TDP MMU map Ben Gardon
2021-01-12 18:10 ` [PATCH 22/24] kvm: x86/mmu: Flush TLBs after zap in TDP MMU PF handler Ben Gardon
2021-01-21  0:05   ` Sean Christopherson
2021-01-12 18:10 ` [PATCH 23/24] kvm: x86/mmu: Freeze SPTEs in disconnected pages Ben Gardon
2021-01-12 18:10 ` [PATCH 24/24] kvm: x86/mmu: Allow parallel page faults for the TDP MMU Ben Gardon
2021-01-21  0:55   ` Sean Christopherson
2021-01-26 21:57     ` Ben Gardon
2021-01-27 17:14       ` Sean Christopherson
2021-01-26 13:37   ` Paolo Bonzini

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210112181041.356734-6-bgardon@google.com \
    --to=bgardon@google.com \
    --cc=jmattson@google.com \
    --cc=junaids@google.com \
    --cc=kernellwp@gmail.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=pbonzini@redhat.com \
    --cc=peterx@redhat.com \
    --cc=pfeiner@google.com \
    --cc=pshier@google.com \
    --cc=seanjc@google.com \
    --cc=vkuznets@redhat.com \
    --cc=xiaoguangrong.eric@gmail.com \
    --cc=yulei.kernel@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).