kvm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Sean Christopherson <seanjc@google.com>
To: Paolo Bonzini <pbonzini@redhat.com>
Cc: Sean Christopherson <seanjc@google.com>,
	Vitaly Kuznetsov <vkuznets@redhat.com>,
	Wanpeng Li <wanpengli@tencent.com>,
	Jim Mattson <jmattson@google.com>, Joerg Roedel <joro@8bytes.org>,
	kvm@vger.kernel.org, linux-kernel@vger.kernel.org,
	Ben Gardon <bgardon@google.com>,
	Brijesh Singh <brijesh.singh@amd.com>,
	Tom Lendacky <thomas.lendacky@amd.com>
Subject: [PATCH v2 06/17] KVM: x86/mmu: Ensure MMU pages are available when allocating roots
Date: Thu,  4 Mar 2021 17:10:50 -0800	[thread overview]
Message-ID: <20210305011101.3597423-7-seanjc@google.com> (raw)
In-Reply-To: <20210305011101.3597423-1-seanjc@google.com>

Hold the mmu_lock for write for the entire duration of allocating and
initializing an MMU's roots.  This ensures there are MMU pages available
and thus prevents root allocations from failing.  That in turn fixes a
bug where KVM would fail to free valid PAE roots if a one of the later
roots failed to allocate.

Add a comment to make_mmu_pages_available() to call out that the limit
is a soft limit, e.g. KVM will temporarily exceed the threshold if a
page fault allocates multiple shadow pages and there was only one page
"available".

Note, KVM _still_ leaks the PAE roots if the guest PDPTR checks fail.
This will be addressed in a future commit.

Cc: Ben Gardon <bgardon@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 arch/x86/kvm/mmu/mmu.c     | 50 +++++++++++++++-----------------------
 arch/x86/kvm/mmu/tdp_mmu.c | 23 ++++--------------
 2 files changed, 25 insertions(+), 48 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index dd9d5cc13a46..7ebfbc77b050 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -2403,6 +2403,15 @@ static int make_mmu_pages_available(struct kvm_vcpu *vcpu)
 
 	kvm_mmu_zap_oldest_mmu_pages(vcpu->kvm, KVM_REFILL_PAGES - avail);
 
+	/*
+	 * Note, this check is intentionally soft, it only guarantees that one
+	 * page is available, while the caller may end up allocating as many as
+	 * four pages, e.g. for PAE roots or for 5-level paging.  Temporarily
+	 * exceeding the (arbitrary by default) limit will not harm the host,
+	 * being too agressive may unnecessarily kill the guest, and getting an
+	 * exact count is far more trouble than it's worth, especially in the
+	 * page fault paths.
+	 */
 	if (!kvm_mmu_available_pages(vcpu->kvm))
 		return -ENOSPC;
 	return 0;
@@ -3220,16 +3229,9 @@ static hpa_t mmu_alloc_root(struct kvm_vcpu *vcpu, gfn_t gfn, gva_t gva,
 {
 	struct kvm_mmu_page *sp;
 
-	write_lock(&vcpu->kvm->mmu_lock);
-
-	if (make_mmu_pages_available(vcpu)) {
-		write_unlock(&vcpu->kvm->mmu_lock);
-		return INVALID_PAGE;
-	}
 	sp = kvm_mmu_get_page(vcpu, gfn, gva, level, direct, ACC_ALL);
 	++sp->root_count;
 
-	write_unlock(&vcpu->kvm->mmu_lock);
 	return __pa(sp->spt);
 }
 
@@ -3242,16 +3244,9 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
 
 	if (is_tdp_mmu_enabled(vcpu->kvm)) {
 		root = kvm_tdp_mmu_get_vcpu_root_hpa(vcpu);
-
-		if (!VALID_PAGE(root))
-			return -ENOSPC;
 		mmu->root_hpa = root;
 	} else if (shadow_root_level >= PT64_ROOT_4LEVEL) {
-		root = mmu_alloc_root(vcpu, 0, 0, shadow_root_level,
-				      true);
-
-		if (!VALID_PAGE(root))
-			return -ENOSPC;
+		root = mmu_alloc_root(vcpu, 0, 0, shadow_root_level, true);
 		mmu->root_hpa = root;
 	} else if (shadow_root_level == PT32E_ROOT_LEVEL) {
 		for (i = 0; i < 4; ++i) {
@@ -3259,8 +3254,6 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
 
 			root = mmu_alloc_root(vcpu, i << (30 - PAGE_SHIFT),
 					      i << 30, PT32_ROOT_LEVEL, true);
-			if (!VALID_PAGE(root))
-				return -ENOSPC;
 			mmu->pae_root[i] = root | PT_PRESENT_MASK;
 		}
 		mmu->root_hpa = __pa(mmu->pae_root);
@@ -3296,8 +3289,6 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 
 		root = mmu_alloc_root(vcpu, root_gfn, 0,
 				      mmu->shadow_root_level, false);
-		if (!VALID_PAGE(root))
-			return -ENOSPC;
 		mmu->root_hpa = root;
 		goto set_root_pgd;
 	}
@@ -3316,6 +3307,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 
 	for (i = 0; i < 4; ++i) {
 		MMU_WARN_ON(VALID_PAGE(mmu->pae_root[i]));
+
 		if (mmu->root_level == PT32E_ROOT_LEVEL) {
 			pdptr = mmu->get_pdptr(vcpu, i);
 			if (!(pdptr & PT_PRESENT_MASK)) {
@@ -3329,8 +3321,6 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 
 		root = mmu_alloc_root(vcpu, root_gfn, i << 30,
 				      PT32_ROOT_LEVEL, false);
-		if (!VALID_PAGE(root))
-			return -ENOSPC;
 		mmu->pae_root[i] = root | pm_mask;
 	}
 
@@ -3394,14 +3384,6 @@ static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
-static int mmu_alloc_roots(struct kvm_vcpu *vcpu)
-{
-	if (vcpu->arch.mmu->direct_map)
-		return mmu_alloc_direct_roots(vcpu);
-	else
-		return mmu_alloc_shadow_roots(vcpu);
-}
-
 void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
 {
 	int i;
@@ -4846,7 +4828,15 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu)
 	r = mmu_alloc_special_roots(vcpu);
 	if (r)
 		goto out;
-	r = mmu_alloc_roots(vcpu);
+	write_lock(&vcpu->kvm->mmu_lock);
+	if (make_mmu_pages_available(vcpu))
+		r = -ENOSPC;
+	else if (vcpu->arch.mmu->direct_map)
+		r = mmu_alloc_direct_roots(vcpu);
+	else
+		r = mmu_alloc_shadow_roots(vcpu);
+	write_unlock(&vcpu->kvm->mmu_lock);
+
 	kvm_mmu_sync_roots(vcpu);
 	if (r)
 		goto out;
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 70226e0875fe..50ef757c5586 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -137,22 +137,21 @@ static struct kvm_mmu_page *alloc_tdp_mmu_page(struct kvm_vcpu *vcpu, gfn_t gfn,
 	return sp;
 }
 
-static struct kvm_mmu_page *get_tdp_mmu_vcpu_root(struct kvm_vcpu *vcpu)
+hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu)
 {
 	union kvm_mmu_page_role role;
 	struct kvm *kvm = vcpu->kvm;
 	struct kvm_mmu_page *root;
 
+	lockdep_assert_held_write(&kvm->mmu_lock);
+
 	role = page_role_for_level(vcpu, vcpu->arch.mmu->shadow_root_level);
 
-	write_lock(&kvm->mmu_lock);
-
 	/* Check for an existing root before allocating a new one. */
 	for_each_tdp_mmu_root(kvm, root) {
 		if (root->role.word == role.word) {
 			kvm_mmu_get_root(kvm, root);
-			write_unlock(&kvm->mmu_lock);
-			return root;
+			goto out;
 		}
 	}
 
@@ -161,19 +160,7 @@ static struct kvm_mmu_page *get_tdp_mmu_vcpu_root(struct kvm_vcpu *vcpu)
 
 	list_add(&root->link, &kvm->arch.tdp_mmu_roots);
 
-	write_unlock(&kvm->mmu_lock);
-
-	return root;
-}
-
-hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu)
-{
-	struct kvm_mmu_page *root;
-
-	root = get_tdp_mmu_vcpu_root(vcpu);
-	if (!root)
-		return INVALID_PAGE;
-
+out:
 	return __pa(root->spt);
 }
 
-- 
2.30.1.766.gb4fecdf3b7-goog


  parent reply	other threads:[~2021-03-05  1:11 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-03-05  1:10 [PATCH v2 00/17] KVM: x86/mmu: Lots of bug fixes Sean Christopherson
2021-03-05  1:10 ` [PATCH v2 01/17] KVM: nSVM: Set the shadow root level to the TDP level for nested NPT Sean Christopherson
2021-03-05  1:10 ` [PATCH v2 02/17] KVM: x86/mmu: Alloc page for PDPTEs when shadowing 32-bit NPT with 64-bit Sean Christopherson
2021-03-05  1:10 ` [PATCH v2 03/17] KVM: x86/mmu: Capture 'mmu' in a local variable when allocating roots Sean Christopherson
2021-03-05  1:10 ` [PATCH v2 04/17] KVM: x86/mmu: Allocate the lm_root before allocating PAE roots Sean Christopherson
2021-03-05  1:10 ` [PATCH v2 05/17] KVM: x86/mmu: Allocate pae_root and lm_root pages in dedicated helper Sean Christopherson
2021-03-05 17:34   ` Paolo Bonzini
2021-03-05  1:10 ` Sean Christopherson [this message]
2021-03-05  1:10 ` [PATCH v2 07/17] KVM: x86/mmu: Check PDPTRs before allocating PAE roots Sean Christopherson
2021-04-08 11:15   ` Wanpeng Li
2021-04-08 12:09     ` Paolo Bonzini
2021-04-08 15:48       ` Sean Christopherson
2021-04-08 15:57         ` Paolo Bonzini
2021-04-08 16:27           ` Sean Christopherson
2021-04-08 16:30             ` Paolo Bonzini
2021-03-05  1:10 ` [PATCH v2 08/17] KVM: x86/mmu: Fix and unconditionally enable WARNs to detect PAE leaks Sean Christopherson
2021-03-05  1:10 ` [PATCH v2 09/17] KVM: x86/mmu: Use '0' as the one and only value for an invalid PAE root Sean Christopherson
2021-03-05 17:52   ` Paolo Bonzini
2021-03-05 18:22     ` Sean Christopherson
2021-03-05 18:23       ` Paolo Bonzini
2021-03-05  1:10 ` [PATCH v2 10/17] KVM: x86/mmu: Set the C-bit in the PDPTRs and LM pseudo-PDPTRs Sean Christopherson
2021-03-05  1:10 ` [PATCH v2 11/17] KVM: x86/mmu: Mark the PAE roots as decrypted for shadow paging Sean Christopherson
2021-03-05 17:44   ` Paolo Bonzini
2021-03-05 18:02     ` Sean Christopherson
2021-03-05  1:10 ` [PATCH v2 12/17] KVM: SVM: Don't strip the C-bit from CR2 on #PF interception Sean Christopherson
2021-03-05  1:10 ` [PATCH v2 13/17] KVM: nVMX: Defer the MMU reload to the normal path on an EPTP switch Sean Christopherson
2021-03-05  1:10 ` [PATCH v2 14/17] KVM: x86: Defer the MMU unload to the normal path on an global INVPCID Sean Christopherson
2021-03-05  1:10 ` [PATCH v2 15/17] KVM: x86/mmu: Unexport MMU load/unload functions Sean Christopherson
2021-03-05  1:11 ` [PATCH v2 16/17] KVM: x86/mmu: Sync roots after MMU load iff load as successful Sean Christopherson
2021-03-05  1:11 ` [PATCH v2 17/17] KVM: x86/mmu: WARN on NULL pae_root or lm_root, or bad shadow root level Sean Christopherson
2021-03-05 17:53 ` [PATCH v2 00/17] KVM: x86/mmu: Lots of bug fixes Paolo Bonzini

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210305011101.3597423-7-seanjc@google.com \
    --to=seanjc@google.com \
    --cc=bgardon@google.com \
    --cc=brijesh.singh@amd.com \
    --cc=jmattson@google.com \
    --cc=joro@8bytes.org \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=pbonzini@redhat.com \
    --cc=thomas.lendacky@amd.com \
    --cc=vkuznets@redhat.com \
    --cc=wanpengli@tencent.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).