All of lore.kernel.org
 help / color / mirror / Atom feed
From: David Matlack <dmatlack@google.com>
To: kvm@vger.kernel.org
Cc: Ben Gardon <bgardon@google.com>, Joerg Roedel <joro@8bytes.org>,
	Jim Mattson <jmattson@google.com>,
	Wanpeng Li <wanpengli@tencent.com>,
	Vitaly Kuznetsov <vkuznets@redhat.com>,
	Sean Christopherson <seanjc@google.com>,
	Paolo Bonzini <pbonzini@redhat.com>,
	Junaid Shahid <junaids@google.com>,
	Andrew Jones <drjones@redhat.com>,
	David Matlack <dmatlack@google.com>
Subject: [PATCH 4/8] KVM: x86/mmu: Common API for lockless shadow page walks
Date: Fri, 11 Jun 2021 23:56:57 +0000	[thread overview]
Message-ID: <20210611235701.3941724-5-dmatlack@google.com> (raw)
In-Reply-To: <20210611235701.3941724-1-dmatlack@google.com>

Introduce a common API for walking the shadow page tables locklessly
that abstracts away whether the TDP MMU is enabled or not. This will be
used in a follow-up patch to support the TDP MMU in fast_page_fault.

The API can be used as follows:

  struct shadow_page_walk walk;

  walk_shadow_page_lockless_begin(vcpu);
  if (!walk_shadow_page_lockless(vcpu, addr, &walk))
    goto out;

  ... use `walk` ...

out:
  walk_shadow_page_lockless_end(vcpu);

Note: Separating walk_shadow_page_lockless_begin() from
walk_shadow_page_lockless() seems superfluous at first glance but is
needed to support fast_page_fault() since it performs multiple walks
under the same begin/end block.

No functional change intended.

Signed-off-by: David Matlack <dmatlack@google.com>
---
 arch/x86/kvm/mmu/mmu.c          | 96 ++++++++++++++++++++-------------
 arch/x86/kvm/mmu/mmu_internal.h | 15 ++++++
 arch/x86/kvm/mmu/tdp_mmu.c      | 34 ++++++------
 arch/x86/kvm/mmu/tdp_mmu.h      |  6 ++-
 4 files changed, 96 insertions(+), 55 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 1d0fe1445e04..8140c262f4d3 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -623,6 +623,11 @@ static bool mmu_spte_age(u64 *sptep)
 
 static void walk_shadow_page_lockless_begin(struct kvm_vcpu *vcpu)
 {
+	if (is_vcpu_using_tdp_mmu(vcpu)) {
+		kvm_tdp_mmu_walk_lockless_begin();
+		return;
+	}
+
 	/*
 	 * Prevent page table teardown by making any free-er wait during
 	 * kvm_flush_remote_tlbs() IPI to all active vcpus.
@@ -638,6 +643,11 @@ static void walk_shadow_page_lockless_begin(struct kvm_vcpu *vcpu)
 
 static void walk_shadow_page_lockless_end(struct kvm_vcpu *vcpu)
 {
+	if (is_vcpu_using_tdp_mmu(vcpu)) {
+		kvm_tdp_mmu_walk_lockless_end();
+		return;
+	}
+
 	/*
 	 * Make sure the write to vcpu->mode is not reordered in front of
 	 * reads to sptes.  If it does, kvm_mmu_commit_zap_page() can see us
@@ -3501,59 +3511,61 @@ static bool mmio_info_in_cache(struct kvm_vcpu *vcpu, u64 addr, bool direct)
 }
 
 /*
- * Return the level of the lowest level SPTE added to sptes.
- * That SPTE may be non-present.
+ * Walks the shadow page table for the given address until a leaf or non-present
+ * spte is encountered.
+ *
+ * Returns false if no walk could be performed, in which case `walk` does not
+ * contain any valid data.
+ *
+ * Must be called between walk_shadow_page_lockless_{begin,end}.
  */
-static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, int *root_level)
+static bool walk_shadow_page_lockless(struct kvm_vcpu *vcpu, u64 addr,
+				      struct shadow_page_walk *walk)
 {
-	struct kvm_shadow_walk_iterator iterator;
-	int leaf = -1;
+	struct kvm_shadow_walk_iterator it;
+	bool walk_ok = false;
 	u64 spte;
 
-	walk_shadow_page_lockless_begin(vcpu);
+	if (is_vcpu_using_tdp_mmu(vcpu))
+		return kvm_tdp_mmu_walk_lockless(vcpu, addr, walk);
 
-	for (shadow_walk_init(&iterator, vcpu, addr),
-	     *root_level = iterator.level;
-	     shadow_walk_okay(&iterator);
-	     __shadow_walk_next(&iterator, spte)) {
-		leaf = iterator.level;
-		spte = mmu_spte_get_lockless(iterator.sptep);
+	shadow_walk_init(&it, vcpu, addr);
+	walk->root_level = it.level;
 
-		sptes[leaf] = spte;
+	for (; shadow_walk_okay(&it); __shadow_walk_next(&it, spte)) {
+		walk_ok = true;
+
+		spte = mmu_spte_get_lockless(it.sptep);
+		walk->last_level = it.level;
+		walk->sptes[it.level] = spte;
 
 		if (!is_shadow_present_pte(spte))
 			break;
 	}
 
-	walk_shadow_page_lockless_end(vcpu);
-
-	return leaf;
+	return walk_ok;
 }
 
 /* return true if reserved bit(s) are detected on a valid, non-MMIO SPTE. */
 static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
 {
-	u64 sptes[PT64_ROOT_MAX_LEVEL + 1];
+	struct shadow_page_walk walk;
 	struct rsvd_bits_validate *rsvd_check;
-	int root, leaf, level;
+	int last_level, level;
 	bool reserved = false;
 
-	if (!VALID_PAGE(vcpu->arch.mmu->root_hpa)) {
-		*sptep = 0ull;
+	*sptep = 0ull;
+
+	if (!VALID_PAGE(vcpu->arch.mmu->root_hpa))
 		return reserved;
-	}
 
-	if (is_vcpu_using_tdp_mmu(vcpu))
-		leaf = kvm_tdp_mmu_get_walk(vcpu, addr, sptes, &root);
-	else
-		leaf = get_walk(vcpu, addr, sptes, &root);
+	walk_shadow_page_lockless_begin(vcpu);
 
-	if (unlikely(leaf < 0)) {
-		*sptep = 0ull;
-		return reserved;
-	}
+	if (!walk_shadow_page_lockless(vcpu, addr, &walk))
+		goto out;
 
-	*sptep = sptes[leaf];
+	last_level = walk.last_level;
+	*sptep = walk.sptes[last_level];
 
 	/*
 	 * Skip reserved bits checks on the terminal leaf if it's not a valid
@@ -3561,29 +3573,37 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
 	 * design, always have reserved bits set.  The purpose of the checks is
 	 * to detect reserved bits on non-MMIO SPTEs. i.e. buggy SPTEs.
 	 */
-	if (!is_shadow_present_pte(sptes[leaf]))
-		leaf++;
+	if (!is_shadow_present_pte(walk.sptes[last_level]))
+		last_level++;
 
 	rsvd_check = &vcpu->arch.mmu->shadow_zero_check;
 
-	for (level = root; level >= leaf; level--)
+	for (level = walk.root_level; level >= last_level; level--) {
+		u64 spte = walk.sptes[level];
+
 		/*
 		 * Use a bitwise-OR instead of a logical-OR to aggregate the
 		 * reserved bit and EPT's invalid memtype/XWR checks to avoid
 		 * adding a Jcc in the loop.
 		 */
-		reserved |= __is_bad_mt_xwr(rsvd_check, sptes[level]) |
-			    __is_rsvd_bits_set(rsvd_check, sptes[level], level);
+		reserved |= __is_bad_mt_xwr(rsvd_check, spte) |
+			    __is_rsvd_bits_set(rsvd_check, spte, level);
+	}
 
 	if (reserved) {
 		pr_err("%s: reserved bits set on MMU-present spte, addr 0x%llx, hierarchy:\n",
 		       __func__, addr);
-		for (level = root; level >= leaf; level--)
+		for (level = walk.root_level; level >= last_level; level--) {
+			u64 spte = walk.sptes[level];
+
 			pr_err("------ spte = 0x%llx level = %d, rsvd bits = 0x%llx",
-			       sptes[level], level,
-			       rsvd_check->rsvd_bits_mask[(sptes[level] >> 7) & 1][level-1]);
+			       spte, level,
+			       rsvd_check->rsvd_bits_mask[(spte >> 7) & 1][level-1]);
+		}
 	}
 
+out:
+	walk_shadow_page_lockless_end(vcpu);
 	return reserved;
 }
 
diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
index d64ccb417c60..26da6ca30fbf 100644
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -165,4 +165,19 @@ void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc);
 void account_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp);
 void unaccount_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp);
 
+struct shadow_page_walk {
+	/* The level of the root spte in the walk. */
+	int root_level;
+
+	/*
+	 * The level of the last spte in the walk. The last spte is either the
+	 * leaf of the walk (which may or may not be present) or the first
+	 * non-present spte encountered during the walk.
+	 */
+	int last_level;
+
+	/* The spte value at each level. */
+	u64 sptes[PT64_ROOT_MAX_LEVEL + 1];
+};
+
 #endif /* __KVM_X86_MMU_INTERNAL_H */
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index f4cc79dabeae..36f4844a5f95 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -1504,28 +1504,32 @@ bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm,
 	return spte_set;
 }
 
-/*
- * Return the level of the lowest level SPTE added to sptes.
- * That SPTE may be non-present.
- */
-int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
-			 int *root_level)
+void kvm_tdp_mmu_walk_lockless_begin(void)
+{
+	rcu_read_lock();
+}
+
+void kvm_tdp_mmu_walk_lockless_end(void)
+{
+	rcu_read_unlock();
+}
+
+bool kvm_tdp_mmu_walk_lockless(struct kvm_vcpu *vcpu, u64 addr,
+			       struct shadow_page_walk *walk)
 {
 	struct tdp_iter iter;
 	struct kvm_mmu *mmu = vcpu->arch.mmu;
 	gfn_t gfn = addr >> PAGE_SHIFT;
-	int leaf = -1;
+	bool walk_ok = false;
 
-	*root_level = vcpu->arch.mmu->shadow_root_level;
-
-	rcu_read_lock();
+	walk->root_level = vcpu->arch.mmu->shadow_root_level;
 
 	tdp_mmu_for_each_pte(iter, mmu, gfn, gfn + 1) {
-		leaf = iter.level;
-		sptes[leaf] = iter.old_spte;
-	}
+		walk_ok = true;
 
-	rcu_read_unlock();
+		walk->last_level = iter.level;
+		walk->sptes[iter.level] = iter.old_spte;
+	}
 
-	return leaf;
+	return walk_ok;
 }
diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h
index c8cf12809fcf..772d11bbb92a 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.h
+++ b/arch/x86/kvm/mmu/tdp_mmu.h
@@ -76,8 +76,10 @@ bool kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
 bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm,
 				   struct kvm_memory_slot *slot, gfn_t gfn);
 
-int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
-			 int *root_level);
+void kvm_tdp_mmu_walk_lockless_begin(void);
+void kvm_tdp_mmu_walk_lockless_end(void);
+bool kvm_tdp_mmu_walk_lockless(struct kvm_vcpu *vcpu, u64 addr,
+			       struct shadow_page_walk *walk);
 
 #ifdef CONFIG_X86_64
 void kvm_mmu_init_tdp_mmu(struct kvm *kvm);
-- 
2.32.0.272.g935e593368-goog


  parent reply	other threads:[~2021-06-11 23:58 UTC|newest]

Thread overview: 33+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-06-11 23:56 [PATCH 0/8] KVM: x86/mmu: Fast page fault support for the TDP MMU David Matlack
2021-06-11 23:56 ` [PATCH 1/8] KVM: x86/mmu: Refactor is_tdp_mmu_root() David Matlack
2021-06-14 17:56   ` Ben Gardon
2021-06-14 19:07   ` Sean Christopherson
2021-06-14 21:23     ` David Matlack
2021-06-14 21:39       ` Sean Christopherson
2021-06-14 22:01         ` David Matlack
2021-06-11 23:56 ` [PATCH 2/8] KVM: x86/mmu: Rename cr2_or_gpa to gpa in fast_page_fault David Matlack
2021-06-14 17:56   ` Ben Gardon
2021-06-11 23:56 ` [PATCH 3/8] KVM: x86/mmu: Fix use of enums in trace_fast_page_fault David Matlack
2021-06-11 23:56 ` David Matlack [this message]
2021-06-14 17:56   ` [PATCH 4/8] KVM: x86/mmu: Common API for lockless shadow page walks Ben Gardon
2021-06-11 23:56 ` [PATCH 5/8] KVM: x86/mmu: Also record spteps in shadow_page_walk David Matlack
2021-06-14 17:56   ` Ben Gardon
2021-06-14 22:27   ` David Matlack
2021-06-14 22:59   ` Sean Christopherson
2021-06-14 23:39     ` David Matlack
2021-06-15  0:22       ` Sean Christopherson
2021-06-11 23:56 ` [PATCH 6/8] KVM: x86/mmu: fast_page_fault support for the TDP MMU David Matlack
2021-06-11 23:59   ` David Matlack
2021-06-14 17:56     ` Ben Gardon
2021-06-14 22:34       ` David Matlack
2021-06-11 23:57 ` [PATCH 7/8] KVM: selftests: Fix missing break in dirty_log_perf_test arg parsing David Matlack
2021-06-14 17:56   ` Ben Gardon
2021-06-11 23:57 ` [PATCH 8/8] KVM: selftests: Introduce access_tracking_perf_test David Matlack
2021-06-14 17:56   ` Ben Gardon
2021-06-14 21:47     ` David Matlack
2021-06-14  9:54 ` [PATCH 0/8] KVM: x86/mmu: Fast page fault support for the TDP MMU Paolo Bonzini
2021-06-14 21:08   ` David Matlack
2021-06-15  7:16     ` Paolo Bonzini
2021-06-16 19:27       ` David Matlack
2021-06-16 19:31         ` Paolo Bonzini
2021-06-16 23:15 [PATCH 4/8] KVM: x86/mmu: Common API for lockless shadow page walks kernel test robot

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210611235701.3941724-5-dmatlack@google.com \
    --to=dmatlack@google.com \
    --cc=bgardon@google.com \
    --cc=drjones@redhat.com \
    --cc=jmattson@google.com \
    --cc=joro@8bytes.org \
    --cc=junaids@google.com \
    --cc=kvm@vger.kernel.org \
    --cc=pbonzini@redhat.com \
    --cc=seanjc@google.com \
    --cc=vkuznets@redhat.com \
    --cc=wanpengli@tencent.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.