linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Sean Christopherson <sean.j.christopherson@intel.com>
To: "Paolo Bonzini" <pbonzini@redhat.com>,
	"Radim Krčmář" <rkrcmar@redhat.com>
Cc: Sean Christopherson <sean.j.christopherson@intel.com>,
	Vitaly Kuznetsov <vkuznets@redhat.com>,
	Wanpeng Li <wanpengli@tencent.com>,
	Jim Mattson <jmattson@google.com>, Joerg Roedel <joro@8bytes.org>,
	kvm@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: [PATCH 13/16] KVM: x86/mmu: Move calls to thp_adjust() down a level
Date: Fri,  6 Dec 2019 15:57:26 -0800	[thread overview]
Message-ID: <20191206235729.29263-14-sean.j.christopherson@intel.com> (raw)
In-Reply-To: <20191206235729.29263-1-sean.j.christopherson@intel.com>

Move the calls to thp_adjust() down a level from the page fault handlers
to the map/fetch helpers and remove the page count shuffling done in
thp_adjust().

Despite holding a reference to the underlying page while processing a
page fault, the page fault flows don't actually rely on holding a
reference to the page when thp_adjust() is called.  At that point, the
fault handlers hold mmu_lock, which prevents mmu_notifier from completing
any invalidations, and have verified no invalidations from mmu_notifier
have occurred since the page reference was acquired (which is done prior
to taking mmu_lock).

The kvm_release_pfn_clean()/kvm_get_pfn() dance in thp_adjust() is a
quirk that is necessitated because thp_adjust() modifies the pfn that is
consumed by its caller.  Because the page fault handlers call
kvm_release_pfn_clean() on said pfn, thp_adjust() needs to transfer the
reference to the correct pfn purely for correctness when the pfn is
released.

Calling thp_adjust() from __direct_map() and FNAME(fetch) means the pfn
adjustment doesn't change the pfn as seen by the page fault handlers,
i.e. the pfn released by the page fault handlers is the same pfn that
was returned by gfn_to_pfn().

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
---
 arch/x86/kvm/mmu/mmu.c         | 31 ++++++++++++-------------------
 arch/x86/kvm/mmu/paging_tmpl.h | 11 ++++++-----
 2 files changed, 18 insertions(+), 24 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 49e5d48e7327..904fb466dd24 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3341,24 +3341,15 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu,
 	    !kvm_is_zone_device_pfn(pfn) && level == PT_PAGE_TABLE_LEVEL &&
 	    PageTransCompoundMap(pfn_to_page(pfn))) {
 		unsigned long mask;
+
 		/*
-		 * mmu_notifier_retry was successful and we hold the
-		 * mmu_lock here, so the pmd can't become splitting
-		 * from under us, and in turn
-		 * __split_huge_page_refcount() can't run from under
-		 * us and we can safely transfer the refcount from
-		 * PG_tail to PG_head as we switch the pfn to tail to
-		 * head.
+		 * mmu_notifier_retry() was successful and mmu_lock is held, so
+		 * the pmd can't be split from under us.
 		 */
 		*levelp = level = PT_DIRECTORY_LEVEL;
 		mask = KVM_PAGES_PER_HPAGE(level) - 1;
 		VM_BUG_ON((gfn & mask) != (pfn & mask));
-		if (pfn & mask) {
-			kvm_release_pfn_clean(pfn);
-			pfn &= ~mask;
-			kvm_get_pfn(pfn);
-			*pfnp = pfn;
-		}
+		*pfnp = pfn & ~mask;
 	}
 }
 
@@ -3386,8 +3377,9 @@ static void disallowed_hugepage_adjust(struct kvm_shadow_walk_iterator it,
 }
 
 static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write,
-			int map_writable, int level, kvm_pfn_t pfn,
-			bool prefault, bool account_disallowed_nx_lpage)
+			int map_writable, int level, int max_level,
+			kvm_pfn_t pfn, bool prefault,
+			bool account_disallowed_nx_lpage)
 {
 	struct kvm_shadow_walk_iterator it;
 	struct kvm_mmu_page *sp;
@@ -3398,6 +3390,9 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write,
 	if (!VALID_PAGE(vcpu->arch.mmu->root_hpa))
 		return RET_PF_RETRY;
 
+	if (likely(max_level > PT_PAGE_TABLE_LEVEL))
+		transparent_hugepage_adjust(vcpu, gfn, &pfn, &level);
+
 	trace_kvm_mmu_spte_requested(gpa, level, pfn);
 	for_each_shadow_entry(vcpu, gpa, it) {
 		/*
@@ -4216,10 +4211,8 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
 		goto out_unlock;
 	if (make_mmu_pages_available(vcpu) < 0)
 		goto out_unlock;
-	if (likely(max_level > PT_PAGE_TABLE_LEVEL))
-		transparent_hugepage_adjust(vcpu, gfn, &pfn, &level);
-	r = __direct_map(vcpu, gpa, write, map_writable, level, pfn, prefault,
-			 is_tdp && lpage_disallowed);
+	r = __direct_map(vcpu, gpa, write, map_writable, level, max_level, pfn,
+			 prefault, is_tdp && lpage_disallowed);
 
 out_unlock:
 	spin_unlock(&vcpu->kvm->mmu_lock);
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index 7d57ec576df0..3b0ba2a77e28 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -613,7 +613,7 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
  */
 static int FNAME(fetch)(struct kvm_vcpu *vcpu, gpa_t addr,
 			 struct guest_walker *gw,
-			 int write_fault, int hlevel,
+			 int write_fault, int hlevel, int max_level,
 			 kvm_pfn_t pfn, bool map_writable, bool prefault,
 			 bool lpage_disallowed)
 {
@@ -673,6 +673,9 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gpa_t addr,
 	gfn = gw->gfn | ((addr & PT_LVL_OFFSET_MASK(gw->level)) >> PAGE_SHIFT);
 	base_gfn = gfn;
 
+	if (max_level > PT_PAGE_TABLE_LEVEL)
+		transparent_hugepage_adjust(vcpu, gw->gfn, &pfn, &hlevel);
+
 	trace_kvm_mmu_spte_requested(addr, gw->level, pfn);
 
 	for (; shadow_walk_okay(&it); shadow_walk_next(&it)) {
@@ -865,10 +868,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gpa_t addr, u32 error_code,
 	kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT);
 	if (make_mmu_pages_available(vcpu) < 0)
 		goto out_unlock;
-	if (max_level > PT_PAGE_TABLE_LEVEL)
-		transparent_hugepage_adjust(vcpu, walker.gfn, &pfn, &level);
-	r = FNAME(fetch)(vcpu, addr, &walker, write_fault,
-			 level, pfn, map_writable, prefault, lpage_disallowed);
+	r = FNAME(fetch)(vcpu, addr, &walker, write_fault, level, max_level,
+			 pfn, map_writable, prefault, lpage_disallowed);
 	kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT);
 
 out_unlock:
-- 
2.24.0


  parent reply	other threads:[~2019-12-06 23:57 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-12-06 23:57 [PATCH 00/16] KVM: x86: MMU page fault clean-up Sean Christopherson
2019-12-06 23:57 ` [PATCH 01/16] KVM: x86: Use gpa_t for cr2/gpa to fix TDP support on 32-bit KVM Sean Christopherson
2019-12-06 23:57 ` [PATCH 02/16] KVM: x86/mmu: Move definition of make_mmu_pages_available() up Sean Christopherson
2019-12-06 23:57 ` [PATCH 03/16] KVM: x86/mmu: Fold nonpaging_map() into nonpaging_page_fault() Sean Christopherson
2019-12-06 23:57 ` [PATCH 04/16] KVM: x86/mmu: Move nonpaging_page_fault() below try_async_pf() Sean Christopherson
2019-12-06 23:57 ` [PATCH 05/16] KVM: x86/mmu: Refactor handling of cache consistency with TDP Sean Christopherson
2019-12-06 23:57 ` [PATCH 06/16] KVM: x86/mmu: Refactor the per-slot level calculation in mapping_level() Sean Christopherson
2019-12-06 23:57 ` [PATCH 07/16] KVM: x86/mmu: Refactor handling of forced 4k pages in page faults Sean Christopherson
2019-12-06 23:57 ` [PATCH 08/16] KVM: x86/mmu: Incorporate guest's page level into max level for shadow MMU Sean Christopherson
2019-12-06 23:57 ` [PATCH 09/16] KVM: x86/mmu: Persist gfn_lpage_is_disallowed() to max_level Sean Christopherson
2019-12-06 23:57 ` [PATCH 10/16] KVM: x86/mmu: Rename lpage_disallowed to account_disallowed_nx_lpage Sean Christopherson
2019-12-06 23:57 ` [PATCH 11/16] KVM: x86/mmu: Consolidate tdp_page_fault() and nonpaging_page_fault() Sean Christopherson
2019-12-06 23:57 ` [PATCH 12/16] KVM: x86/mmu: Move transparent_hugepage_adjust() above __direct_map() Sean Christopherson
2019-12-06 23:57 ` Sean Christopherson [this message]
2019-12-06 23:57 ` [PATCH 14/16] KVM: x86/mmu: Move root_hpa validity checks to top of page fault handler Sean Christopherson
2019-12-06 23:57 ` [PATCH 15/16] KVM: x86/mmu: WARN on an invalid root_hpa Sean Christopherson
2019-12-06 23:57 ` [PATCH 16/16] KVM: x86/mmu: WARN if root_hpa is invalid when handling a page fault Sean Christopherson
2019-12-09 15:31 ` [PATCH 00/16] KVM: x86: MMU page fault clean-up Paolo Bonzini

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191206235729.29263-14-sean.j.christopherson@intel.com \
    --to=sean.j.christopherson@intel.com \
    --cc=jmattson@google.com \
    --cc=joro@8bytes.org \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=pbonzini@redhat.com \
    --cc=rkrcmar@redhat.com \
    --cc=vkuznets@redhat.com \
    --cc=wanpengli@tencent.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).