stable.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] KVM: x86/mmu: Remove the defunct update_pte() paging hook
@ 2021-05-14 11:38 Jack Wang
  2021-05-14 14:21 ` Greg KH
  0 siblings, 1 reply; 4+ messages in thread
From: Jack Wang @ 2021-05-14 11:38 UTC (permalink / raw)
  To: gregkh, sashal, stable; +Cc: Sean Christopherson, Yu Zhang, Paolo Bonzini

From: Sean Christopherson <seanjc@google.com>

commit c5e2184d1544f9e56140791eff1a351bea2e63b9 upstream

Remove the update_pte() shadow paging logic, which was obsoleted by
commit 4731d4c7a077 ("KVM: MMU: out of sync shadow core"), but never
removed.  As pointed out by Yu, KVM never write protects leaf page
tables for the purposes of shadow paging, and instead marks their
associated shadow page as unsync so that the guest can write PTEs at
will.

The update_pte() path, which predates the unsync logic, optimizes COW
scenarios by refreshing leaf SPTEs when they are written, as opposed to
zapping the SPTE, restarting the guest, and installing the new SPTE on
the subsequent fault.  Since KVM no longer write-protects leaf page
tables, update_pte() is unreachable and can be dropped.

Reported-by: Yu Zhang <yu.c.zhang@intel.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210115004051.4099250-1-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(jwang: backport to 5.4 to fix a warning on AMD nested Virtualization)
Signed-off-by: Jack Wang <jinpu.wang@ionos.com>
---
We hit a warning in  WARNING: CPU: 62 PID: 29302 at arch/x86/kvm/mmu.c:2250 nonpaging_update_pte+0x5/0x10 [kvm]
on AMD Opteron(tm) Processor 6386 SE with kernel 5.4.113, it seems nested L2 is running, I notice a similar bug
report on https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1884058.

I did test with kvm-unit-tests on both Intel Broadwell/Skylake, AMD Opteron, no
regression, basic VM tests work fine too on 5.4 kernel.
the commit c5e2184d1544f9e56140791eff1a351bea2e63b9 can be cherry-picked cleanly
to kernel 5.10+.

 arch/x86/include/asm/kvm_host.h |  3 ---
 arch/x86/kvm/mmu.c              | 33 ++-------------------------------
 arch/x86/kvm/x86.c              |  1 -
 3 files changed, 2 insertions(+), 35 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index c52b7073a5ab..4bc476d7fa6c 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -391,8 +391,6 @@ struct kvm_mmu {
 	int (*sync_page)(struct kvm_vcpu *vcpu,
 			 struct kvm_mmu_page *sp);
 	void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa);
-	void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
-			   u64 *spte, const void *pte);
 	hpa_t root_hpa;
 	gpa_t root_cr3;
 	union kvm_mmu_role mmu_role;
@@ -944,7 +942,6 @@ struct kvm_arch {
 struct kvm_vm_stat {
 	ulong mmu_shadow_zapped;
 	ulong mmu_pte_write;
-	ulong mmu_pte_updated;
 	ulong mmu_pde_zapped;
 	ulong mmu_flooded;
 	ulong mmu_recycled;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 47c27c6e3842..b9400087141d 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2243,13 +2243,6 @@ static void nonpaging_invlpg(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root)
 {
 }
 
-static void nonpaging_update_pte(struct kvm_vcpu *vcpu,
-				 struct kvm_mmu_page *sp, u64 *spte,
-				 const void *pte)
-{
-	WARN_ON(1);
-}
-
 #define KVM_PAGE_ARRAY_NR 16
 
 struct kvm_mmu_pages {
@@ -4356,7 +4349,6 @@ static void nonpaging_init_context(struct kvm_vcpu *vcpu,
 	context->gva_to_gpa = nonpaging_gva_to_gpa;
 	context->sync_page = nonpaging_sync_page;
 	context->invlpg = nonpaging_invlpg;
-	context->update_pte = nonpaging_update_pte;
 	context->root_level = 0;
 	context->shadow_root_level = PT32E_ROOT_LEVEL;
 	context->direct_map = true;
@@ -4935,7 +4927,6 @@ static void paging64_init_context_common(struct kvm_vcpu *vcpu,
 	context->gva_to_gpa = paging64_gva_to_gpa;
 	context->sync_page = paging64_sync_page;
 	context->invlpg = paging64_invlpg;
-	context->update_pte = paging64_update_pte;
 	context->shadow_root_level = level;
 	context->direct_map = false;
 }
@@ -4964,7 +4955,6 @@ static void paging32_init_context(struct kvm_vcpu *vcpu,
 	context->gva_to_gpa = paging32_gva_to_gpa;
 	context->sync_page = paging32_sync_page;
 	context->invlpg = paging32_invlpg;
-	context->update_pte = paging32_update_pte;
 	context->shadow_root_level = PT32E_ROOT_LEVEL;
 	context->direct_map = false;
 }
@@ -5039,7 +5029,6 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
 	context->page_fault = tdp_page_fault;
 	context->sync_page = nonpaging_sync_page;
 	context->invlpg = nonpaging_invlpg;
-	context->update_pte = nonpaging_update_pte;
 	context->shadow_root_level = kvm_x86_ops->get_tdp_level(vcpu);
 	context->direct_map = true;
 	context->set_cr3 = kvm_x86_ops->set_tdp_cr3;
@@ -5172,7 +5161,6 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
 	context->gva_to_gpa = ept_gva_to_gpa;
 	context->sync_page = ept_sync_page;
 	context->invlpg = ept_invlpg;
-	context->update_pte = ept_update_pte;
 	context->root_level = PT64_ROOT_4LEVEL;
 	context->direct_map = false;
 	context->mmu_role.as_u64 = new_role.as_u64;
@@ -5312,19 +5300,6 @@ void kvm_mmu_unload(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_unload);
 
-static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,
-				  struct kvm_mmu_page *sp, u64 *spte,
-				  const void *new)
-{
-	if (sp->role.level != PT_PAGE_TABLE_LEVEL) {
-		++vcpu->kvm->stat.mmu_pde_zapped;
-		return;
-        }
-
-	++vcpu->kvm->stat.mmu_pte_updated;
-	vcpu->arch.mmu->update_pte(vcpu, sp, spte, new);
-}
-
 static bool need_remote_flush(u64 old, u64 new)
 {
 	if (!is_shadow_present_pte(old))
@@ -5490,14 +5465,10 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 
 		local_flush = true;
 		while (npte--) {
-			u32 base_role = vcpu->arch.mmu->mmu_role.base.word;
-
 			entry = *spte;
 			mmu_page_zap_pte(vcpu->kvm, sp, spte);
-			if (gentry &&
-			      !((sp->role.word ^ base_role)
-			      & mmu_base_role_mask.word) && rmap_can_add(vcpu))
-				mmu_pte_write_new_pte(vcpu, sp, spte, &gentry);
+			if (gentry && sp->role.level != PG_LEVEL_4K)
+				++vcpu->kvm->stat.mmu_pde_zapped;
 			if (need_remote_flush(entry, *spte))
 				remote_flush = true;
 			++spte;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 153659e8f403..b19de7d42b46 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -208,7 +208,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ "l1d_flush", VCPU_STAT(l1d_flush) },
 	{ "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) },
 	{ "mmu_pte_write", VM_STAT(mmu_pte_write) },
-	{ "mmu_pte_updated", VM_STAT(mmu_pte_updated) },
 	{ "mmu_pde_zapped", VM_STAT(mmu_pde_zapped) },
 	{ "mmu_flooded", VM_STAT(mmu_flooded) },
 	{ "mmu_recycled", VM_STAT(mmu_recycled) },
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] KVM: x86/mmu: Remove the defunct update_pte() paging hook
  2021-05-14 11:38 [PATCH] KVM: x86/mmu: Remove the defunct update_pte() paging hook Jack Wang
@ 2021-05-14 14:21 ` Greg KH
  2021-05-17 17:13   ` Sean Christopherson
  0 siblings, 1 reply; 4+ messages in thread
From: Greg KH @ 2021-05-14 14:21 UTC (permalink / raw)
  To: Jack Wang; +Cc: sashal, stable, Sean Christopherson, Yu Zhang, Paolo Bonzini

On Fri, May 14, 2021 at 01:38:53PM +0200, Jack Wang wrote:
> From: Sean Christopherson <seanjc@google.com>
> 
> commit c5e2184d1544f9e56140791eff1a351bea2e63b9 upstream
> 
> Remove the update_pte() shadow paging logic, which was obsoleted by
> commit 4731d4c7a077 ("KVM: MMU: out of sync shadow core"), but never
> removed.  As pointed out by Yu, KVM never write protects leaf page
> tables for the purposes of shadow paging, and instead marks their
> associated shadow page as unsync so that the guest can write PTEs at
> will.
> 
> The update_pte() path, which predates the unsync logic, optimizes COW
> scenarios by refreshing leaf SPTEs when they are written, as opposed to
> zapping the SPTE, restarting the guest, and installing the new SPTE on
> the subsequent fault.  Since KVM no longer write-protects leaf page
> tables, update_pte() is unreachable and can be dropped.
> 
> Reported-by: Yu Zhang <yu.c.zhang@intel.com>
> Signed-off-by: Sean Christopherson <seanjc@google.com>
> Message-Id: <20210115004051.4099250-1-seanjc@google.com>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> (jwang: backport to 5.4 to fix a warning on AMD nested Virtualization)
> Signed-off-by: Jack Wang <jinpu.wang@ionos.com>
> ---
> We hit a warning in  WARNING: CPU: 62 PID: 29302 at arch/x86/kvm/mmu.c:2250 nonpaging_update_pte+0x5/0x10 [kvm]
> on AMD Opteron(tm) Processor 6386 SE with kernel 5.4.113, it seems nested L2 is running, I notice a similar bug
> report on https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1884058.
> 
> I did test with kvm-unit-tests on both Intel Broadwell/Skylake, AMD Opteron, no
> regression, basic VM tests work fine too on 5.4 kernel.
> the commit c5e2184d1544f9e56140791eff1a351bea2e63b9 can be cherry-picked cleanly
> to kernel 5.10+.

Now queued up, thanks.

greg k-h

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] KVM: x86/mmu: Remove the defunct update_pte() paging hook
  2021-05-14 14:21 ` Greg KH
@ 2021-05-17 17:13   ` Sean Christopherson
  2021-05-17 17:17     ` Jinpu Wang
  0 siblings, 1 reply; 4+ messages in thread
From: Sean Christopherson @ 2021-05-17 17:13 UTC (permalink / raw)
  To: Greg KH; +Cc: Jack Wang, sashal, stable, Yu Zhang, Paolo Bonzini

On Fri, May 14, 2021, Greg KH wrote:
> On Fri, May 14, 2021 at 01:38:53PM +0200, Jack Wang wrote:
> > From: Sean Christopherson <seanjc@google.com>
> > 
> > commit c5e2184d1544f9e56140791eff1a351bea2e63b9 upstream
> > 
> > Remove the update_pte() shadow paging logic, which was obsoleted by
> > commit 4731d4c7a077 ("KVM: MMU: out of sync shadow core"), but never
> > removed.  As pointed out by Yu, KVM never write protects leaf page
> > tables for the purposes of shadow paging, and instead marks their
> > associated shadow page as unsync so that the guest can write PTEs at
> > will.
> > 
> > The update_pte() path, which predates the unsync logic, optimizes COW
> > scenarios by refreshing leaf SPTEs when they are written, as opposed to
> > zapping the SPTE, restarting the guest, and installing the new SPTE on
> > the subsequent fault.  Since KVM no longer write-protects leaf page
> > tables, update_pte() is unreachable and can be dropped.
> > 
> > Reported-by: Yu Zhang <yu.c.zhang@intel.com>
> > Signed-off-by: Sean Christopherson <seanjc@google.com>
> > Message-Id: <20210115004051.4099250-1-seanjc@google.com>
> > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> > (jwang: backport to 5.4 to fix a warning on AMD nested Virtualization)
> > Signed-off-by: Jack Wang <jinpu.wang@ionos.com>
> > ---
> > We hit a warning in  WARNING: CPU: 62 PID: 29302 at arch/x86/kvm/mmu.c:2250 nonpaging_update_pte+0x5/0x10 [kvm]
> > on AMD Opteron(tm) Processor 6386 SE with kernel 5.4.113, it seems nested L2 is running, I notice a similar bug
> > report on https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1884058.
> > 
> > I did test with kvm-unit-tests on both Intel Broadwell/Skylake, AMD Opteron, no
> > regression, basic VM tests work fine too on 5.4 kernel.
> > the commit c5e2184d1544f9e56140791eff1a351bea2e63b9 can be cherry-picked cleanly
> > to kernel 5.10+.

Ah, drat.  The WARN fires because update_pte() comes from the current MMU context,
and older kernels are missing a check to ensure the current MMU context is
"compatible" with the target shadow page's context.

That bug was inadvertantly fixed by commit a102a674e423 ("KVM: x86/mmu: Don't drop
level/direct from MMU role calculation"), but I didn't tag that for stable because
I incorrectly thought that adding a check on the "direct" role was a nop.  From
that changelog:

    While doing so, stop ignoring "direct".  The field is effectively ignored
    anyways because kvm_mmu_pte_write() is only reached with an indirect mmu
    and the loop only walks indirect shadow pages, but double checking "direct"
    literally costs nothing.

Specifically, the "kvm_mmu_pte_write() is only reached with an indirect mmu" part
fails to account for the case where where the VM has one or more indirect MMUs
_and_ a direct MMU (the current MMU context).

All that said, ripping out this code works too, and is preferable since the code
is dead for its intended purpose, i.e. reaching the update_pte() code can only
be a random, unwanted collision.

> Now queued up, thanks.

Belated thumbs up, thanks!

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] KVM: x86/mmu: Remove the defunct update_pte() paging hook
  2021-05-17 17:13   ` Sean Christopherson
@ 2021-05-17 17:17     ` Jinpu Wang
  0 siblings, 0 replies; 4+ messages in thread
From: Jinpu Wang @ 2021-05-17 17:17 UTC (permalink / raw)
  To: Sean Christopherson; +Cc: Greg KH, Sasha Levin, stable, Yu Zhang, Paolo Bonzini

On Mon, May 17, 2021 at 7:13 PM Sean Christopherson <seanjc@google.com> wrote:
>
> On Fri, May 14, 2021, Greg KH wrote:
> > On Fri, May 14, 2021 at 01:38:53PM +0200, Jack Wang wrote:
> > > From: Sean Christopherson <seanjc@google.com>
> > >
> > > commit c5e2184d1544f9e56140791eff1a351bea2e63b9 upstream
> > >
> > > Remove the update_pte() shadow paging logic, which was obsoleted by
> > > commit 4731d4c7a077 ("KVM: MMU: out of sync shadow core"), but never
> > > removed.  As pointed out by Yu, KVM never write protects leaf page
> > > tables for the purposes of shadow paging, and instead marks their
> > > associated shadow page as unsync so that the guest can write PTEs at
> > > will.
> > >
> > > The update_pte() path, which predates the unsync logic, optimizes COW
> > > scenarios by refreshing leaf SPTEs when they are written, as opposed to
> > > zapping the SPTE, restarting the guest, and installing the new SPTE on
> > > the subsequent fault.  Since KVM no longer write-protects leaf page
> > > tables, update_pte() is unreachable and can be dropped.
> > >
> > > Reported-by: Yu Zhang <yu.c.zhang@intel.com>
> > > Signed-off-by: Sean Christopherson <seanjc@google.com>
> > > Message-Id: <20210115004051.4099250-1-seanjc@google.com>
> > > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> > > (jwang: backport to 5.4 to fix a warning on AMD nested Virtualization)
> > > Signed-off-by: Jack Wang <jinpu.wang@ionos.com>
> > > ---
> > > We hit a warning in  WARNING: CPU: 62 PID: 29302 at arch/x86/kvm/mmu.c:2250 nonpaging_update_pte+0x5/0x10 [kvm]
> > > on AMD Opteron(tm) Processor 6386 SE with kernel 5.4.113, it seems nested L2 is running, I notice a similar bug
> > > report on https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1884058.
> > >
> > > I did test with kvm-unit-tests on both Intel Broadwell/Skylake, AMD Opteron, no
> > > regression, basic VM tests work fine too on 5.4 kernel.
> > > the commit c5e2184d1544f9e56140791eff1a351bea2e63b9 can be cherry-picked cleanly
> > > to kernel 5.10+.
>
> Ah, drat.  The WARN fires because update_pte() comes from the current MMU context,
> and older kernels are missing a check to ensure the current MMU context is
> "compatible" with the target shadow page's context.
>
> That bug was inadvertantly fixed by commit a102a674e423 ("KVM: x86/mmu: Don't drop
> level/direct from MMU role calculation"), but I didn't tag that for stable because
> I incorrectly thought that adding a check on the "direct" role was a nop.  From
> that changelog:
>
>     While doing so, stop ignoring "direct".  The field is effectively ignored
>     anyways because kvm_mmu_pte_write() is only reached with an indirect mmu
>     and the loop only walks indirect shadow pages, but double checking "direct"
>     literally costs nothing.
>
> Specifically, the "kvm_mmu_pte_write() is only reached with an indirect mmu" part
> fails to account for the case where where the VM has one or more indirect MMUs
> _and_ a direct MMU (the current MMU context).
>
> All that said, ripping out this code works too, and is preferable since the code
> is dead for its intended purpose, i.e. reaching the update_pte() code can only
> be a random, unwanted collision.
>
> > Now queued up, thanks.
>
> Belated thumbs up, thanks!
Hi Sean!

Thanks for checking and detailed implaination.

Regards!
Jack Wang

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2021-05-17 17:18 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-05-14 11:38 [PATCH] KVM: x86/mmu: Remove the defunct update_pte() paging hook Jack Wang
2021-05-14 14:21 ` Greg KH
2021-05-17 17:13   ` Sean Christopherson
2021-05-17 17:17     ` Jinpu Wang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).