From mboxrd@z Thu Jan  1 00:00:00 1970
From: Junaid Shahid <junaids@google.com>
Subject: [PATCH v2 3/5] kvm: x86: mmu: Fast Page Fault path retries
Date: Tue,  8 Nov 2016 15:00:28 -0800
Message-ID: <1478646030-101103-4-git-send-email-junaids@google.com>
References: <cover.1476839873.git.junaids@google.com>
 <1478646030-101103-1-git-send-email-junaids@google.com>
Cc: andreslc@google.com, pfeiner@google.com, pbonzini@redhat.com,
        guangrong.xiao@linux.intel.com
To: kvm@vger.kernel.org
Return-path: <kvm-owner@vger.kernel.org>
Received: from mail-pf0-f180.google.com ([209.85.192.180]:36328 "EHLO
        mail-pf0-f180.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
        with ESMTP id S932234AbcKHXAf (ORCPT <rfc822;kvm@vger.kernel.org>);
        Tue, 8 Nov 2016 18:00:35 -0500
Received: by mail-pf0-f180.google.com with SMTP id 189so115203349pfz.3
        for <kvm@vger.kernel.org>; Tue, 08 Nov 2016 15:00:35 -0800 (PST)
In-Reply-To: <1478646030-101103-1-git-send-email-junaids@google.com>
Sender: kvm-owner@vger.kernel.org
List-ID: <kvm.vger.kernel.org>

This change adds retries into the Fast Page Fault path. Without the
retries, the code still works, but if a retry does end up being needed,
then it will result in a second page fault for the same memory access,
which will cause much more overhead compared to just retrying within the
original fault.

This would be especially useful with the upcoming fast access tracking
change, as that would make it more likely for retries to be needed
(e.g. due to read and write faults happening on different CPUs at
the same time).

Signed-off-by: Junaid Shahid <junaids@google.com>
---
 arch/x86/kvm/mmu.c | 117 +++++++++++++++++++++++++++++++----------------------
 1 file changed, 69 insertions(+), 48 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index e580134..a22a8a2 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2889,6 +2889,10 @@ static bool page_fault_can_be_fast(u32 error_code)
 	return true;
 }
 
+/*
+ * Returns true if the SPTE was fixed successfully. Otherwise,
+ * someone else modified the SPTE from its original value.
+ */
 static bool
 fast_pf_fix_direct_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
 			u64 *sptep, u64 spte)
@@ -2915,8 +2919,10 @@ fast_pf_fix_direct_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
 	 *
 	 * Compare with set_spte where instead shadow_dirty_mask is set.
 	 */
-	if (cmpxchg64(sptep, spte, spte | PT_WRITABLE_MASK) == spte)
-		kvm_vcpu_mark_page_dirty(vcpu, gfn);
+	if (cmpxchg64(sptep, spte, spte | PT_WRITABLE_MASK) != spte)
+		return false;
+
+	kvm_vcpu_mark_page_dirty(vcpu, gfn);
 
 	return true;
 }
@@ -2933,6 +2939,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
 	struct kvm_mmu_page *sp;
 	bool ret = false;
 	u64 spte = 0ull;
+	uint retry_count = 0;
 
 	if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
 		return false;
@@ -2945,57 +2952,71 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
 		if (!is_shadow_present_pte(spte) || iterator.level < level)
 			break;
 
-	/*
-	 * If the mapping has been changed, let the vcpu fault on the
-	 * same address again.
-	 */
-	if (!is_shadow_present_pte(spte)) {
-		ret = true;
-		goto exit;
-	}
+	do {
+		/*
+		 * If the mapping has been changed, let the vcpu fault on the
+		 * same address again.
+		 */
+		if (!is_shadow_present_pte(spte)) {
+			ret = true;
+			break;
+		}
 
-	sp = page_header(__pa(iterator.sptep));
-	if (!is_last_spte(spte, sp->role.level))
-		goto exit;
+		sp = page_header(__pa(iterator.sptep));
+		if (!is_last_spte(spte, sp->role.level))
+			break;
 
-	/*
-	 * Check if it is a spurious fault caused by TLB lazily flushed.
-	 *
-	 * Need not check the access of upper level table entries since
-	 * they are always ACC_ALL.
-	 */
-	 if (is_writable_pte(spte)) {
-		ret = true;
-		goto exit;
-	}
+		/*
+		 * Check if it is a spurious fault caused by TLB lazily flushed.
+		 *
+		 * Need not check the access of upper level table entries since
+		 * they are always ACC_ALL.
+		 */
+		if (is_writable_pte(spte)) {
+			ret = true;
+			break;
+		}
 
-	/*
-	 * Currently, to simplify the code, only the spte write-protected
-	 * by dirty-log can be fast fixed.
-	 */
-	if (!spte_can_locklessly_be_made_writable(spte))
-		goto exit;
+		/*
+		 * Currently, to simplify the code, only the spte
+		 * write-protected by dirty-log can be fast fixed.
+		 */
+		if (!spte_can_locklessly_be_made_writable(spte))
+			break;
 
-	/*
-	 * Do not fix write-permission on the large spte since we only dirty
-	 * the first page into the dirty-bitmap in fast_pf_fix_direct_spte()
-	 * that means other pages are missed if its slot is dirty-logged.
-	 *
-	 * Instead, we let the slow page fault path create a normal spte to
-	 * fix the access.
-	 *
-	 * See the comments in kvm_arch_commit_memory_region().
-	 */
-	if (sp->role.level > PT_PAGE_TABLE_LEVEL)
-		goto exit;
+		/*
+		 * Do not fix write-permission on the large spte since we only
+		 * dirty the first page into the dirty-bitmap in
+		 * fast_pf_fix_direct_spte() that means other pages are missed
+		 * if its slot is dirty-logged.
+		 *
+		 * Instead, we let the slow page fault path create a normal spte
+		 * to fix the access.
+		 *
+		 * See the comments in kvm_arch_commit_memory_region().
+		 */
+		if (sp->role.level > PT_PAGE_TABLE_LEVEL)
+			break;
+
+		/*
+		 * Currently, fast page fault only works for direct mapping
+		 * since the gfn is not stable for indirect shadow page. See
+		 * Documentation/virtual/kvm/locking.txt to get more detail.
+		 */
+		ret = fast_pf_fix_direct_spte(vcpu, sp, iterator.sptep, spte);
+		if (ret)
+			break;
+
+		if (++retry_count > 4) {
+			printk_once(KERN_WARNING
+				    "Fast #PF retrying more than 4 times.\n");
+			break;
+		}
+
+		spte = mmu_spte_get_lockless(iterator.sptep);
+
+	} while (true);
 
-	/*
-	 * Currently, fast page fault only works for direct mapping since
-	 * the gfn is not stable for indirect shadow page.
-	 * See Documentation/virtual/kvm/locking.txt to get more detail.
-	 */
-	ret = fast_pf_fix_direct_spte(vcpu, sp, iterator.sptep, spte);
-exit:
 	trace_fast_page_fault(vcpu, gva, error_code, iterator.sptep,
 			      spte, ret);
 	walk_shadow_page_lockless_end(vcpu);
-- 
2.8.0.rc3.226.g39d4020