All of lore.kernel.org
 help / color / mirror / Atom feed
From: David Matlack <dmatlack@google.com>
To: kvm@vger.kernel.org
Cc: Ben Gardon <bgardon@google.com>, Joerg Roedel <joro@8bytes.org>,
	Jim Mattson <jmattson@google.com>,
	Wanpeng Li <wanpengli@tencent.com>,
	Vitaly Kuznetsov <vkuznets@redhat.com>,
	Sean Christopherson <seanjc@google.com>,
	Paolo Bonzini <pbonzini@redhat.com>,
	Junaid Shahid <junaids@google.com>,
	Andrew Jones <drjones@redhat.com>,
	Matthew Wilcox <willy@infradead.org>, Yu Zhao <yuzhao@google.com>,
	David Hildenbrand <david@redhat.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	David Matlack <dmatlack@google.com>
Subject: [PATCH v2 4/6] KVM: x86/mmu: fast_page_fault support for the TDP MMU
Date: Wed, 30 Jun 2021 21:48:00 +0000	[thread overview]
Message-ID: <20210630214802.1902448-5-dmatlack@google.com> (raw)
In-Reply-To: <20210630214802.1902448-1-dmatlack@google.com>

Make fast_page_fault interoperate with the TDP MMU by leveraging
walk_shadow_page_lockless_{begin,end} to acquire the RCU read lock and
introducing a new helper function kvm_tdp_mmu_get_last_sptep_lockless to
grab the lowest level sptep.

Suggested-by: Ben Gardon <bgardon@google.com>
Signed-off-by: David Matlack <dmatlack@google.com>
---
 arch/x86/kvm/mmu/mmu.c     | 55 +++++++++++++++++++++++++++-----------
 arch/x86/kvm/mmu/tdp_mmu.c | 36 +++++++++++++++++++++++++
 arch/x86/kvm/mmu/tdp_mmu.h |  2 ++
 3 files changed, 78 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 88c71a8a55f1..1d410278a4cc 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3105,15 +3105,45 @@ static bool is_access_allowed(u32 fault_err_code, u64 spte)
 	return spte & PT_PRESENT_MASK;
 }
 
+/*
+ * Returns the last level spte pointer of the shadow page walk for the given
+ * gpa, and sets *spte to the spte value. This spte may be non-preset.
+ *
+ * If no walk could be performed, returns NULL and *spte does not contain valid
+ * data.
+ *
+ * Constraints:
+ *  - Must be called between walk_shadow_page_lockless_{begin,end}.
+ *  - The returned sptep must not be used after walk_shadow_page_lockless_end.
+ */
+u64 *get_last_sptep_lockless(struct kvm_vcpu *vcpu, gpa_t gpa, u64 *spte)
+{
+	struct kvm_shadow_walk_iterator iterator;
+	u64 old_spte;
+	u64 *sptep = NULL;
+
+	if (is_tdp_mmu(vcpu->arch.mmu))
+		return kvm_tdp_mmu_get_last_sptep_lockless(vcpu, gpa, spte);
+
+	for_each_shadow_entry_lockless(vcpu, gpa, iterator, old_spte) {
+		sptep = iterator.sptep;
+		*spte = old_spte;
+
+		if (!is_shadow_present_pte(old_spte))
+			break;
+	}
+
+	return sptep;
+}
+
 /*
  * Returns one of RET_PF_INVALID, RET_PF_FIXED or RET_PF_SPURIOUS.
  */
 static int fast_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code)
 {
-	struct kvm_shadow_walk_iterator iterator;
-	struct kvm_mmu_page *sp;
 	int ret = RET_PF_INVALID;
 	u64 spte = 0ull;
+	u64 *sptep = NULL;
 	uint retry_count = 0;
 
 	if (!page_fault_can_be_fast(error_code))
@@ -3122,16 +3152,14 @@ static int fast_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code)
 	walk_shadow_page_lockless_begin(vcpu);
 
 	do {
+		struct kvm_mmu_page *sp;
 		u64 new_spte;
 
-		for_each_shadow_entry_lockless(vcpu, gpa, iterator, spte)
-			if (!is_shadow_present_pte(spte))
-				break;
-
+		sptep = get_last_sptep_lockless(vcpu, gpa, &spte);
 		if (!is_shadow_present_pte(spte))
 			break;
 
-		sp = sptep_to_sp(iterator.sptep);
+		sp = sptep_to_sp(sptep);
 		if (!is_last_spte(spte, sp->role.level))
 			break;
 
@@ -3189,8 +3217,7 @@ static int fast_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code)
 		 * since the gfn is not stable for indirect shadow page. See
 		 * Documentation/virt/kvm/locking.rst to get more detail.
 		 */
-		if (fast_pf_fix_direct_spte(vcpu, sp, iterator.sptep, spte,
-					    new_spte)) {
+		if (fast_pf_fix_direct_spte(vcpu, sp, sptep, spte, new_spte)) {
 			ret = RET_PF_FIXED;
 			break;
 		}
@@ -3203,7 +3230,7 @@ static int fast_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code)
 
 	} while (true);
 
-	trace_fast_page_fault(vcpu, gpa, error_code, iterator.sptep, spte, ret);
+	trace_fast_page_fault(vcpu, gpa, error_code, sptep, spte, ret);
 	walk_shadow_page_lockless_end(vcpu);
 
 	return ret;
@@ -3838,11 +3865,9 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
 	if (page_fault_handle_page_track(vcpu, error_code, gfn))
 		return RET_PF_EMULATE;
 
-	if (!is_tdp_mmu_fault) {
-		r = fast_page_fault(vcpu, gpa, error_code);
-		if (r != RET_PF_INVALID)
-			return r;
-	}
+	r = fast_page_fault(vcpu, gpa, error_code);
+	if (r != RET_PF_INVALID)
+		return r;
 
 	r = mmu_topup_memory_caches(vcpu, false);
 	if (r)
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index c6fa8d00bf9f..2c9e0ed71fa0 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -527,6 +527,10 @@ static inline bool tdp_mmu_set_spte_atomic_no_dirty_log(struct kvm *kvm,
 	if (is_removed_spte(iter->old_spte))
 		return false;
 
+	/*
+	 * TDP MMU sptes can also be concurrently cmpxchg'd in
+	 * fast_pf_fix_direct_spte as part of fast_page_fault.
+	 */
 	if (cmpxchg64(rcu_dereference(iter->sptep), iter->old_spte,
 		      new_spte) != iter->old_spte)
 		return false;
@@ -1546,3 +1550,35 @@ int kvm_tdp_mmu_get_walk_lockless(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
 
 	return leaf;
 }
+
+/*
+ * Must be called between kvm_tdp_mmu_walk_shadow_page_lockless_{begin,end}.
+ *
+ * The returned sptep must not be used after
+ * kvm_tdp_mmu_walk_shadow_page_lockless_end.
+ */
+u64 *kvm_tdp_mmu_get_last_sptep_lockless(struct kvm_vcpu *vcpu, u64 addr,
+					 u64 *spte)
+{
+	struct tdp_iter iter;
+	struct kvm_mmu *mmu = vcpu->arch.mmu;
+	gfn_t gfn = addr >> PAGE_SHIFT;
+	tdp_ptep_t sptep = NULL;
+
+	tdp_mmu_for_each_pte(iter, mmu, gfn, gfn + 1) {
+		*spte = iter.old_spte;
+		sptep = iter.sptep;
+	}
+
+	if (sptep)
+		/*
+		 * Perform the rcu dereference here since we are passing the
+		 * sptep up to the generic MMU code which does not know the
+		 * synchronization details of the TDP MMU. This is safe as long
+		 * as the caller obeys the contract that the sptep is not used
+		 * after kvm_tdp_mmu_walk_shadow_page_lockless_end.
+		 */
+		return rcu_dereference(sptep);
+
+	return NULL;
+}
diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h
index e9dde5f9c0ef..508a23bdf7da 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.h
+++ b/arch/x86/kvm/mmu/tdp_mmu.h
@@ -81,6 +81,8 @@ void kvm_tdp_mmu_walk_lockless_begin(void);
 void kvm_tdp_mmu_walk_lockless_end(void);
 int kvm_tdp_mmu_get_walk_lockless(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
 				  int *root_level);
+u64 *kvm_tdp_mmu_get_last_sptep_lockless(struct kvm_vcpu *vcpu, u64 addr,
+					 u64 *spte);
 
 #ifdef CONFIG_X86_64
 bool kvm_mmu_init_tdp_mmu(struct kvm *kvm);
-- 
2.32.0.93.g670b81a890-goog


  parent reply	other threads:[~2021-06-30 21:48 UTC|newest]

Thread overview: 33+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-06-30 21:47 [PATCH v2 0/6] KVM: x86/mmu: Fast page fault support for the TDP MMU David Matlack
2021-06-30 21:47 ` [PATCH v2 1/6] KVM: x86/mmu: Rename cr2_or_gpa to gpa in fast_page_fault David Matlack
2021-07-12 20:01   ` Sean Christopherson
2021-06-30 21:47 ` [PATCH v2 2/6] KVM: x86/mmu: Fix use of enums in trace_fast_page_fault David Matlack
2021-07-12 16:14   ` Ben Gardon
2021-07-12 18:11     ` David Matlack
2021-07-12 19:53       ` Sean Christopherson
2021-07-12 20:38         ` David Matlack
2021-06-30 21:47 ` [PATCH v2 3/6] KVM: x86/mmu: Make walk_shadow_page_lockless_{begin,end} interoperate with the TDP MMU David Matlack
2021-07-12 17:02   ` Ben Gardon
2021-07-12 18:11     ` David Matlack
2021-07-12 20:20       ` Sean Christopherson
2021-07-12 20:23   ` Sean Christopherson
2021-06-30 21:48 ` David Matlack [this message]
2021-07-01  2:54   ` [PATCH v2 4/6] KVM: x86/mmu: fast_page_fault support for " kernel test robot
2021-07-01  2:54     ` kernel test robot
2021-07-01  4:27   ` kernel test robot
2021-07-01  4:27     ` kernel test robot
2021-07-01 18:27     ` David Matlack
2021-07-01 18:27       ` David Matlack
2021-07-09 18:45   ` David Matlack
2021-07-12 17:49   ` Ben Gardon
2021-07-12 18:20     ` David Matlack
2021-07-12 21:03     ` Sean Christopherson
2021-07-12 21:24       ` David Matlack
2021-06-30 21:48 ` [PATCH v2 5/6] KVM: selftests: Fix missing break in dirty_log_perf_test arg parsing David Matlack
2021-06-30 21:48 ` [PATCH v2 6/6] KVM: selftests: Introduce access_tracking_perf_test David Matlack
2021-07-01  1:15 ` [PATCH v2 0/6] KVM: x86/mmu: Fast page fault support for the TDP MMU Matthew Wilcox
     [not found]   ` <CABgObfZUFWCAvKoxDzGjmksFnwZgbnpX9GuC+nhiVLa-Fhwj6A@mail.gmail.com>
2021-07-01 12:08     ` Matthew Wilcox
2021-07-01 16:50       ` David Matlack
2021-07-01 17:00 ` David Hildenbrand
2021-07-01 22:11   ` David Matlack
2021-07-02  7:53     ` David Hildenbrand

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210630214802.1902448-5-dmatlack@google.com \
    --to=dmatlack@google.com \
    --cc=akpm@linux-foundation.org \
    --cc=bgardon@google.com \
    --cc=david@redhat.com \
    --cc=drjones@redhat.com \
    --cc=jmattson@google.com \
    --cc=joro@8bytes.org \
    --cc=junaids@google.com \
    --cc=kvm@vger.kernel.org \
    --cc=pbonzini@redhat.com \
    --cc=seanjc@google.com \
    --cc=vkuznets@redhat.com \
    --cc=wanpengli@tencent.com \
    --cc=willy@infradead.org \
    --cc=yuzhao@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.