All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/4] KVM: MMU: Clean up walk_addr_generic()
@ 2011-06-09 13:59 Takuya Yoshikawa
  2011-06-09 14:01 ` [PATCH 1/4] KVM: MMU: Clean up the error handling of walk_addr_generic() Takuya Yoshikawa
                   ` (3 more replies)
  0 siblings, 4 replies; 8+ messages in thread
From: Takuya Yoshikawa @ 2011-06-09 13:59 UTC (permalink / raw)
  To: avi, mtosatti; +Cc: kvm, yoshikawa.takuya, mingo

This patch set tries to do some of the cleanups suggested by Ingo.

  http://www.spinics.net/lists/kvm/msg55458.html

The end result may include a bit of my taste.  So any comments will be
appreciated.

Thanks,
  Takuya

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 1/4] KVM: MMU: Clean up the error handling of walk_addr_generic()
  2011-06-09 13:59 [PATCH 0/4] KVM: MMU: Clean up walk_addr_generic() Takuya Yoshikawa
@ 2011-06-09 14:01 ` Takuya Yoshikawa
  2011-06-12 15:45   ` Avi Kivity
  2011-06-09 14:02 ` [PATCH 2/4] KVM: MMU: Move some variables into the walk loop Takuya Yoshikawa
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 8+ messages in thread
From: Takuya Yoshikawa @ 2011-06-09 14:01 UTC (permalink / raw)
  To: avi, mtosatti; +Cc: kvm, yoshikawa.takuya, mingo

From: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>

Avoid two step jumps to the error handling part.  This eliminates the
use of the variables present and rsvd_fault.

We also mark the variables write/user/fetch_fault with const to show
these do not change in the function.

These were suggested by Ingo Molnar.

Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
---
 arch/x86/kvm/paging_tmpl.h |   64 +++++++++++++++++++------------------------
 1 files changed, 28 insertions(+), 36 deletions(-)

diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 6c4dc01..51e5990 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -125,18 +125,17 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
 	gfn_t table_gfn;
 	unsigned index, pt_access, uninitialized_var(pte_access);
 	gpa_t pte_gpa;
-	bool eperm, present, rsvd_fault;
-	int offset, write_fault, user_fault, fetch_fault;
-
-	write_fault = access & PFERR_WRITE_MASK;
-	user_fault = access & PFERR_USER_MASK;
-	fetch_fault = access & PFERR_FETCH_MASK;
+	bool eperm;
+	int offset;
+	const int write_fault = access & PFERR_WRITE_MASK;
+	const int user_fault  = access & PFERR_USER_MASK;
+	const int fetch_fault = access & PFERR_FETCH_MASK;
+	u16 errcode = 0;
 
 	trace_kvm_mmu_pagetable_walk(addr, write_fault, user_fault,
 				     fetch_fault);
 walk:
-	present = true;
-	eperm = rsvd_fault = false;
+	eperm = false;
 	walker->level = mmu->root_level;
 	pte           = mmu->get_cr3(vcpu);
 
@@ -145,7 +144,7 @@ walk:
 		pte = kvm_pdptr_read_mmu(vcpu, mmu, (addr >> 30) & 3);
 		trace_kvm_mmu_paging_element(pte, walker->level);
 		if (!is_present_gpte(pte)) {
-			present = false;
+			errcode |= PFERR_PRESENT_MASK;
 			goto error;
 		}
 		--walker->level;
@@ -171,34 +170,34 @@ walk:
 		real_gfn = mmu->translate_gpa(vcpu, gfn_to_gpa(table_gfn),
 					      PFERR_USER_MASK|PFERR_WRITE_MASK);
 		if (unlikely(real_gfn == UNMAPPED_GVA)) {
-			present = false;
-			break;
+			errcode |= PFERR_PRESENT_MASK;
+			goto error;
 		}
 		real_gfn = gpa_to_gfn(real_gfn);
 
 		host_addr = gfn_to_hva(vcpu->kvm, real_gfn);
 		if (unlikely(kvm_is_error_hva(host_addr))) {
-			present = false;
-			break;
+			errcode |= PFERR_PRESENT_MASK;
+			goto error;
 		}
 
 		ptep_user = (pt_element_t __user *)((void *)host_addr + offset);
 		if (unlikely(__copy_from_user(&pte, ptep_user, sizeof(pte)))) {
-			present = false;
-			break;
+			errcode |= PFERR_PRESENT_MASK;
+			goto error;
 		}
 
 		trace_kvm_mmu_paging_element(pte, walker->level);
 
 		if (unlikely(!is_present_gpte(pte))) {
-			present = false;
-			break;
+			errcode |= PFERR_PRESENT_MASK;
+			goto error;
 		}
 
 		if (unlikely(is_rsvd_bits_set(&vcpu->arch.mmu, pte,
 					      walker->level))) {
-			rsvd_fault = true;
-			break;
+			errcode |= PFERR_RSVD_MASK;
+			goto error;
 		}
 
 		if (unlikely(write_fault && !is_writable_pte(pte)
@@ -213,16 +212,15 @@ walk:
 			eperm = true;
 #endif
 
-		if (!eperm && !rsvd_fault
-		    && unlikely(!(pte & PT_ACCESSED_MASK))) {
+		if (!eperm && unlikely(!(pte & PT_ACCESSED_MASK))) {
 			int ret;
 			trace_kvm_mmu_set_accessed_bit(table_gfn, index,
 						       sizeof(pte));
 			ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index,
 						  pte, pte|PT_ACCESSED_MASK);
 			if (unlikely(ret < 0)) {
-				present = false;
-				break;
+				errcode |= PFERR_PRESENT_MASK;
+				goto error;
 			} else if (ret)
 				goto walk;
 
@@ -270,7 +268,7 @@ walk:
 		--walker->level;
 	}
 
-	if (unlikely(!present || eperm || rsvd_fault))
+	if (unlikely(eperm))
 		goto error;
 
 	if (write_fault && unlikely(!is_dirty_gpte(pte))) {
@@ -280,7 +278,7 @@ walk:
 		ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index,
 					  pte, pte|PT_DIRTY_MASK);
 		if (unlikely(ret < 0)) {
-			present = false;
+			errcode |= PFERR_PRESENT_MASK;
 			goto error;
 		} else if (ret)
 			goto walk;
@@ -297,19 +295,13 @@ walk:
 	return 1;
 
 error:
-	walker->fault.vector = PF_VECTOR;
-	walker->fault.error_code_valid = true;
-	walker->fault.error_code = 0;
-	if (present)
-		walker->fault.error_code |= PFERR_PRESENT_MASK;
-
-	walker->fault.error_code |= write_fault | user_fault;
-
+	errcode |= write_fault | user_fault;
 	if (fetch_fault && mmu->nx)
-		walker->fault.error_code |= PFERR_FETCH_MASK;
-	if (rsvd_fault)
-		walker->fault.error_code |= PFERR_RSVD_MASK;
+		errcode |= PFERR_FETCH_MASK;
 
+	walker->fault.vector = PF_VECTOR;
+	walker->fault.error_code_valid = true;
+	walker->fault.error_code = errcode;
 	walker->fault.address = addr;
 	walker->fault.nested_page_fault = mmu != vcpu->arch.walk_mmu;
 
-- 
1.7.4.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 2/4] KVM: MMU: Move some variables into the walk loop
  2011-06-09 13:59 [PATCH 0/4] KVM: MMU: Clean up walk_addr_generic() Takuya Yoshikawa
  2011-06-09 14:01 ` [PATCH 1/4] KVM: MMU: Clean up the error handling of walk_addr_generic() Takuya Yoshikawa
@ 2011-06-09 14:02 ` Takuya Yoshikawa
  2011-06-09 14:03 ` [PATCH 3/4] KVM: MMU: Update walker->pt/pte_access directly Takuya Yoshikawa
  2011-06-09 14:05 ` [PATCH 4/4] KVM: MMU: Split out the main body of walk_addr_generic() Takuya Yoshikawa
  3 siblings, 0 replies; 8+ messages in thread
From: Takuya Yoshikawa @ 2011-06-09 14:02 UTC (permalink / raw)
  To: avi, mtosatti; +Cc: kvm, yoshikawa.takuya, mingo

From: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>

This will help us split out the big body of the walk loop later.

Signed-off-by: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
---
 arch/x86/kvm/paging_tmpl.h |   16 +++++++---------
 1 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 51e5990..3270789 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -122,11 +122,8 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
 {
 	pt_element_t pte;
 	pt_element_t __user *ptep_user;
-	gfn_t table_gfn;
-	unsigned index, pt_access, uninitialized_var(pte_access);
-	gpa_t pte_gpa;
+	unsigned pt_access, uninitialized_var(pte_access);
 	bool eperm;
-	int offset;
 	const int write_fault = access & PFERR_WRITE_MASK;
 	const int user_fault  = access & PFERR_USER_MASK;
 	const int fetch_fault = access & PFERR_FETCH_MASK;
@@ -158,12 +155,11 @@ walk:
 	for (;;) {
 		gfn_t real_gfn;
 		unsigned long host_addr;
+		unsigned index  = PT_INDEX(addr, walker->level);
+		int offset      = index * sizeof(pt_element_t);
+		gfn_t table_gfn = gpte_to_gfn(pte);
+		gpa_t pte_gpa   = gfn_to_gpa(table_gfn) + offset;
 
-		index = PT_INDEX(addr, walker->level);
-
-		table_gfn = gpte_to_gfn(pte);
-		offset    = index * sizeof(pt_element_t);
-		pte_gpa   = gfn_to_gpa(table_gfn) + offset;
 		walker->table_gfn[walker->level - 1] = table_gfn;
 		walker->pte_gpa[walker->level - 1] = pte_gpa;
 
@@ -273,6 +269,8 @@ walk:
 
 	if (write_fault && unlikely(!is_dirty_gpte(pte))) {
 		int ret;
+		unsigned index  = PT_INDEX(addr, walker->level);
+		gfn_t table_gfn = walker->table_gfn[walker->level - 1];
 
 		trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte));
 		ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index,
-- 
1.7.4.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 3/4] KVM: MMU: Update walker->pt/pte_access directly
  2011-06-09 13:59 [PATCH 0/4] KVM: MMU: Clean up walk_addr_generic() Takuya Yoshikawa
  2011-06-09 14:01 ` [PATCH 1/4] KVM: MMU: Clean up the error handling of walk_addr_generic() Takuya Yoshikawa
  2011-06-09 14:02 ` [PATCH 2/4] KVM: MMU: Move some variables into the walk loop Takuya Yoshikawa
@ 2011-06-09 14:03 ` Takuya Yoshikawa
  2011-06-09 14:05 ` [PATCH 4/4] KVM: MMU: Split out the main body of walk_addr_generic() Takuya Yoshikawa
  3 siblings, 0 replies; 8+ messages in thread
From: Takuya Yoshikawa @ 2011-06-09 14:03 UTC (permalink / raw)
  To: avi, mtosatti; +Cc: kvm, yoshikawa.takuya, mingo

From: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>

This will help us split out the big body of the walk loop later.

The only functional change is when we return from walk_addr_generic()
with with an error.  The original code did not change these but the new
one may change.  But this should be safe unless callers use these on
that case.

Signed-off-by: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
---
 arch/x86/kvm/paging_tmpl.h |   12 +++++-------
 1 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 3270789..711336b 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -122,7 +122,6 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
 {
 	pt_element_t pte;
 	pt_element_t __user *ptep_user;
-	unsigned pt_access, uninitialized_var(pte_access);
 	bool eperm;
 	const int write_fault = access & PFERR_WRITE_MASK;
 	const int user_fault  = access & PFERR_USER_MASK;
@@ -150,7 +149,7 @@ walk:
 	ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) ||
 	       (mmu->get_cr3(vcpu) & CR3_NONPAE_RESERVED_BITS) == 0);
 
-	pt_access = ACC_ALL;
+	walker->pt_access = ACC_ALL;
 
 	for (;;) {
 		gfn_t real_gfn;
@@ -224,7 +223,8 @@ walk:
 			pte |= PT_ACCESSED_MASK;
 		}
 
-		pte_access = pt_access & FNAME(gpte_access)(vcpu, pte);
+		walker->pte_access = walker->pt_access &
+				     FNAME(gpte_access)(vcpu, pte);
 
 		walker->ptes[walker->level - 1] = pte;
 
@@ -260,7 +260,7 @@ walk:
 			break;
 		}
 
-		pt_access = pte_access;
+		walker->pt_access = walker->pte_access;
 		--walker->level;
 	}
 
@@ -286,10 +286,8 @@ walk:
 		walker->ptes[walker->level - 1] = pte;
 	}
 
-	walker->pt_access = pt_access;
-	walker->pte_access = pte_access;
 	pgprintk("%s: pte %llx pte_access %x pt_access %x\n",
-		 __func__, (u64)pte, pte_access, pt_access);
+		 __func__, (u64)pte, walker->pte_access, walker->pt_access);
 	return 1;
 
 error:
-- 
1.7.4.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 4/4] KVM: MMU: Split out the main body of walk_addr_generic()
  2011-06-09 13:59 [PATCH 0/4] KVM: MMU: Clean up walk_addr_generic() Takuya Yoshikawa
                   ` (2 preceding siblings ...)
  2011-06-09 14:03 ` [PATCH 3/4] KVM: MMU: Update walker->pt/pte_access directly Takuya Yoshikawa
@ 2011-06-09 14:05 ` Takuya Yoshikawa
  2011-06-09 14:18   ` Ingo Molnar
  3 siblings, 1 reply; 8+ messages in thread
From: Takuya Yoshikawa @ 2011-06-09 14:05 UTC (permalink / raw)
  To: avi, mtosatti; +Cc: kvm, yoshikawa.takuya, mingo

From: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>

The code has clearly suffered from over inlining.  So make the body of
the walk loop a separate function: do_walk().

This will make it easy to do more cleanups and optimizations later.

This was suggested by Ingo Molnar.

Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
---
 arch/x86/kvm/mmu.c         |   21 ++++
 arch/x86/kvm/paging_tmpl.h |  227 ++++++++++++++++++++++++--------------------
 2 files changed, 145 insertions(+), 103 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 2d14434..16ccf4b 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -69,6 +69,27 @@ char *audit_point_name[] = {
 	"post sync"
 };
 
+/*
+ * do_walk() returns one of these.
+ *
+ * WALK_NEXT:		Continue the walk loop.
+ * WALK_DONE:		Break from the walk loop.
+ * WALK_RETRY:		Retry walk.
+ * WALK_NOT_PRESENT:	Set PFERR_PRESENT_MASK and goto error.
+ * WALK_RSVD_FAULT:	Set PFERR_RSVD_MASK and goto error.
+ * WALK_ERROR:		Goto error.
+ * WALK_ABORT:		Return immediately.
+ */
+enum {
+	WALK_NEXT,
+	WALK_DONE,
+	WALK_RETRY,
+	WALK_NOT_PRESENT,
+	WALK_RSVD_FAULT,
+	WALK_ERROR,
+	WALK_ABORT
+};
+
 #undef MMU_DEBUG
 
 #ifdef MMU_DEBUG
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 711336b..4913aa5 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -114,6 +114,111 @@ static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte)
 }
 
 /*
+ * Walk one level.
+ * Guest pte and its user address will be put in *pte and *ptep_user.
+ */
+static inline int
+FNAME(do_walk)(struct guest_walker *walker, struct kvm_vcpu *vcpu,
+	       struct kvm_mmu *mmu, gva_t addr, u32 access, bool *eperm,
+	       pt_element_t *pte, pt_element_t __user **ptep_user)
+{
+	gfn_t real_gfn;
+	unsigned long host_addr;
+	unsigned index  = PT_INDEX(addr, walker->level);
+	int offset      = index * sizeof(pt_element_t);
+	gfn_t table_gfn = gpte_to_gfn(*pte);
+	gpa_t pte_gpa   = gfn_to_gpa(table_gfn) + offset;
+	const int write_fault = access & PFERR_WRITE_MASK;
+	const int user_fault  = access & PFERR_USER_MASK;
+	const int fetch_fault = access & PFERR_FETCH_MASK;
+
+	walker->table_gfn[walker->level - 1] = table_gfn;
+	walker->pte_gpa[walker->level - 1] = pte_gpa;
+
+	real_gfn = mmu->translate_gpa(vcpu, gfn_to_gpa(table_gfn),
+				      PFERR_USER_MASK|PFERR_WRITE_MASK);
+	if (unlikely(real_gfn == UNMAPPED_GVA))
+		return WALK_NOT_PRESENT;
+	real_gfn = gpa_to_gfn(real_gfn);
+
+	host_addr = gfn_to_hva(vcpu->kvm, real_gfn);
+	if (unlikely(kvm_is_error_hva(host_addr)))
+		return WALK_NOT_PRESENT;
+
+	*ptep_user = (pt_element_t __user *)((void *)host_addr + offset);
+	if (unlikely(__copy_from_user(pte, *ptep_user, sizeof(*pte))))
+		return WALK_NOT_PRESENT;
+
+	trace_kvm_mmu_paging_element(*pte, walker->level);
+
+	if (unlikely(!is_present_gpte(*pte)))
+		return WALK_NOT_PRESENT;
+
+	if (unlikely(is_rsvd_bits_set(&vcpu->arch.mmu, *pte, walker->level)))
+		return WALK_RSVD_FAULT;
+
+	if (unlikely(write_fault && !is_writable_pte(*pte)
+		     && (user_fault || is_write_protection(vcpu))))
+		*eperm = true;
+
+	if (unlikely(user_fault && !(*pte & PT_USER_MASK)))
+		*eperm = true;
+
+#if PTTYPE == 64
+	if (unlikely(fetch_fault && (*pte & PT64_NX_MASK)))
+		*eperm = true;
+#endif
+
+	if (!*eperm && unlikely(!(*pte & PT_ACCESSED_MASK))) {
+		int ret;
+
+		trace_kvm_mmu_set_accessed_bit(table_gfn, index, sizeof(*pte));
+		ret = FNAME(cmpxchg_gpte)(vcpu, mmu, *ptep_user, index,
+					  *pte, *pte|PT_ACCESSED_MASK);
+		if (unlikely(ret < 0))
+			return WALK_NOT_PRESENT;
+		else if (ret)
+			return WALK_RETRY;
+
+		mark_page_dirty(vcpu->kvm, table_gfn);
+		*pte |= PT_ACCESSED_MASK;
+	}
+
+	walker->pte_access = walker->pt_access & FNAME(gpte_access)(vcpu, *pte);
+
+	walker->ptes[walker->level - 1] = *pte;
+
+	if ((walker->level == PT_PAGE_TABLE_LEVEL) ||
+	    ((walker->level == PT_DIRECTORY_LEVEL) && is_large_pte(*pte) &&
+	     (PTTYPE == 64 || is_pse(vcpu))) ||
+	    ((walker->level == PT_PDPE_LEVEL) && is_large_pte(*pte) &&
+	     (mmu->root_level == PT64_ROOT_LEVEL))) {
+		gpa_t real_gpa;
+		gfn_t gfn;
+		u32 ac;
+
+		gfn = gpte_to_gfn_lvl(*pte, walker->level);
+		gfn += (addr & PT_LVL_OFFSET_MASK(walker->level)) >> PAGE_SHIFT;
+
+		if (PTTYPE == 32 && (walker->level == PT_DIRECTORY_LEVEL) &&
+		    is_cpuid_PSE36())
+			gfn += pse36_gfn_delta(*pte);
+
+		ac = write_fault | fetch_fault | user_fault;
+
+		real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(gfn), ac);
+		if (real_gpa == UNMAPPED_GVA)
+			return WALK_ABORT;
+
+		walker->gfn = real_gpa >> PAGE_SHIFT;
+
+		return WALK_DONE;
+	}
+
+	return WALK_NEXT;
+}
+
+/*
  * Fetch a guest pte for a guest virtual address
  */
 static int FNAME(walk_addr_generic)(struct guest_walker *walker,
@@ -130,7 +235,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
 
 	trace_kvm_mmu_pagetable_walk(addr, write_fault, user_fault,
 				     fetch_fault);
-walk:
+walk_retry:
 	eperm = false;
 	walker->level = mmu->root_level;
 	pte           = mmu->get_cr3(vcpu);
@@ -152,118 +257,34 @@ walk:
 	walker->pt_access = ACC_ALL;
 
 	for (;;) {
-		gfn_t real_gfn;
-		unsigned long host_addr;
-		unsigned index  = PT_INDEX(addr, walker->level);
-		int offset      = index * sizeof(pt_element_t);
-		gfn_t table_gfn = gpte_to_gfn(pte);
-		gpa_t pte_gpa   = gfn_to_gpa(table_gfn) + offset;
-
-		walker->table_gfn[walker->level - 1] = table_gfn;
-		walker->pte_gpa[walker->level - 1] = pte_gpa;
-
-		real_gfn = mmu->translate_gpa(vcpu, gfn_to_gpa(table_gfn),
-					      PFERR_USER_MASK|PFERR_WRITE_MASK);
-		if (unlikely(real_gfn == UNMAPPED_GVA)) {
-			errcode |= PFERR_PRESENT_MASK;
-			goto error;
-		}
-		real_gfn = gpa_to_gfn(real_gfn);
-
-		host_addr = gfn_to_hva(vcpu->kvm, real_gfn);
-		if (unlikely(kvm_is_error_hva(host_addr))) {
-			errcode |= PFERR_PRESENT_MASK;
-			goto error;
-		}
-
-		ptep_user = (pt_element_t __user *)((void *)host_addr + offset);
-		if (unlikely(__copy_from_user(&pte, ptep_user, sizeof(pte)))) {
-			errcode |= PFERR_PRESENT_MASK;
-			goto error;
-		}
-
-		trace_kvm_mmu_paging_element(pte, walker->level);
+		int ret;
 
-		if (unlikely(!is_present_gpte(pte))) {
+		ret = FNAME(do_walk)(walker, vcpu, mmu, addr, access,
+				     &eperm, &pte, &ptep_user);
+		switch (ret) {
+		case WALK_NEXT:
+			break;
+		case WALK_DONE:
+			goto walk_done;
+		case WALK_RETRY:
+			goto walk_retry;
+		case WALK_NOT_PRESENT:
 			errcode |= PFERR_PRESENT_MASK;
 			goto error;
-		}
-
-		if (unlikely(is_rsvd_bits_set(&vcpu->arch.mmu, pte,
-					      walker->level))) {
+		case WALK_RSVD_FAULT:
 			errcode |= PFERR_RSVD_MASK;
 			goto error;
-		}
-
-		if (unlikely(write_fault && !is_writable_pte(pte)
-			     && (user_fault || is_write_protection(vcpu))))
-			eperm = true;
-
-		if (unlikely(user_fault && !(pte & PT_USER_MASK)))
-			eperm = true;
-
-#if PTTYPE == 64
-		if (unlikely(fetch_fault && (pte & PT64_NX_MASK)))
-			eperm = true;
-#endif
-
-		if (!eperm && unlikely(!(pte & PT_ACCESSED_MASK))) {
-			int ret;
-			trace_kvm_mmu_set_accessed_bit(table_gfn, index,
-						       sizeof(pte));
-			ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index,
-						  pte, pte|PT_ACCESSED_MASK);
-			if (unlikely(ret < 0)) {
-				errcode |= PFERR_PRESENT_MASK;
-				goto error;
-			} else if (ret)
-				goto walk;
-
-			mark_page_dirty(vcpu->kvm, table_gfn);
-			pte |= PT_ACCESSED_MASK;
-		}
-
-		walker->pte_access = walker->pt_access &
-				     FNAME(gpte_access)(vcpu, pte);
-
-		walker->ptes[walker->level - 1] = pte;
-
-		if ((walker->level == PT_PAGE_TABLE_LEVEL) ||
-		    ((walker->level == PT_DIRECTORY_LEVEL) &&
-				is_large_pte(pte) &&
-				(PTTYPE == 64 || is_pse(vcpu))) ||
-		    ((walker->level == PT_PDPE_LEVEL) &&
-				is_large_pte(pte) &&
-				mmu->root_level == PT64_ROOT_LEVEL)) {
-			int lvl = walker->level;
-			gpa_t real_gpa;
-			gfn_t gfn;
-			u32 ac;
-
-			gfn = gpte_to_gfn_lvl(pte, lvl);
-			gfn += (addr & PT_LVL_OFFSET_MASK(lvl)) >> PAGE_SHIFT;
-
-			if (PTTYPE == 32 &&
-			    walker->level == PT_DIRECTORY_LEVEL &&
-			    is_cpuid_PSE36())
-				gfn += pse36_gfn_delta(pte);
-
-			ac = write_fault | fetch_fault | user_fault;
-
-			real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(gfn),
-						      ac);
-			if (real_gpa == UNMAPPED_GVA)
-				return 0;
-
-			walker->gfn = real_gpa >> PAGE_SHIFT;
-
-			break;
+		case WALK_ERROR:
+			goto error;
+		case WALK_ABORT:
+			return 0;
 		}
 
 		walker->pt_access = walker->pte_access;
 		--walker->level;
 	}
 
+walk_done:
 	if (unlikely(eperm))
 		goto error;
 
@@ -279,7 +300,7 @@ walk:
 			errcode |= PFERR_PRESENT_MASK;
 			goto error;
 		} else if (ret)
-			goto walk;
+			goto walk_retry;
 
 		mark_page_dirty(vcpu->kvm, table_gfn);
 		pte |= PT_DIRTY_MASK;
-- 
1.7.4.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH 4/4] KVM: MMU: Split out the main body of walk_addr_generic()
  2011-06-09 14:05 ` [PATCH 4/4] KVM: MMU: Split out the main body of walk_addr_generic() Takuya Yoshikawa
@ 2011-06-09 14:18   ` Ingo Molnar
  2011-06-12 15:47     ` Avi Kivity
  0 siblings, 1 reply; 8+ messages in thread
From: Ingo Molnar @ 2011-06-09 14:18 UTC (permalink / raw)
  To: Takuya Yoshikawa; +Cc: avi, mtosatti, kvm, yoshikawa.takuya


* Takuya Yoshikawa <takuya.yoshikawa@gmail.com> wrote:

> +/*
> + * do_walk() returns one of these.
> + *
> + * WALK_NEXT:		Continue the walk loop.
> + * WALK_DONE:		Break from the walk loop.
> + * WALK_RETRY:		Retry walk.
> + * WALK_NOT_PRESENT:		Set PFERR_PRESENT_MASK and goto error.
> + * WALK_RSVD_FAULT:		Set PFERR_RSVD_MASK and goto error.
> + * WALK_ERROR:		Goto error.
> + * WALK_ABORT:		Return immediately.

hm, this iterator turned out to be more complex than i thought it 
would become. Avi, are you still happy with that?

> +	if (!*eperm && unlikely(!(*pte & PT_ACCESSED_MASK))) {
> +		int ret;
> +
> +		trace_kvm_mmu_set_accessed_bit(table_gfn, index, sizeof(*pte));
> +		ret = FNAME(cmpxchg_gpte)(vcpu, mmu, *ptep_user, index,
> +					  *pte, *pte|PT_ACCESSED_MASK);
> +		if (unlikely(ret < 0))
> +			return WALK_NOT_PRESENT;
> +		else if (ret)
> +			return WALK_RETRY;
> +
> +		mark_page_dirty(vcpu->kvm, table_gfn);
> +		*pte |= PT_ACCESSED_MASK;
> +	}

This wants to move into a set-accessed-bit helper inline.

> +	if ((walker->level == PT_PAGE_TABLE_LEVEL) ||
> +	    ((walker->level == PT_DIRECTORY_LEVEL) && is_large_pte(*pte) &&
> +	     (PTTYPE == 64 || is_pse(vcpu))) ||
> +	    ((walker->level == PT_PDPE_LEVEL) && is_large_pte(*pte) &&
> +	     (mmu->root_level == PT64_ROOT_LEVEL))) {

This condition wants to move into a is-pte-large inline function.

> +		gpa_t real_gpa;
> +		gfn_t gfn;
> +		u32 ac;
> +
> +		gfn = gpte_to_gfn_lvl(*pte, walker->level);
> +		gfn += (addr & PT_LVL_OFFSET_MASK(walker->level)) >> PAGE_SHIFT;
> +
> +		if (PTTYPE == 32 && (walker->level == PT_DIRECTORY_LEVEL) &&
> +		    is_cpuid_PSE36())
> +			gfn += pse36_gfn_delta(*pte);
> +
> +		ac = write_fault | fetch_fault | user_fault;
> +
> +		real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(gfn), ac);
> +		if (real_gpa == UNMAPPED_GVA)
> +			return WALK_ABORT;
> +
> +		walker->gfn = real_gpa >> PAGE_SHIFT;
> +
> +		return WALK_DONE;

And this would look cleaner if it was in a handle-large-pte inline function?

> +		ret = FNAME(do_walk)(walker, vcpu, mmu, addr, access,
> +				     &eperm, &pte, &ptep_user);
> +		switch (ret) {
> +		case WALK_NEXT:
> +			break;
> +		case WALK_DONE:
> +			goto walk_done;
> +		case WALK_RETRY:
> +			goto walk_retry;
> +		case WALK_NOT_PRESENT:
>  			errcode |= PFERR_PRESENT_MASK;
>  			goto error;
> +		case WALK_RSVD_FAULT:
>  			errcode |= PFERR_RSVD_MASK;
>  			goto error;
> +		case WALK_ERROR:
> +			goto error;
> +		case WALK_ABORT:
> +			return 0;

Btw., there's a stylistic trick you could use here to make the 
iteration logic even clearer:

		switch (ret) {
		case WALK_NEXT:						break;
		case WALK_DONE:						goto walk_done;
		case WALK_RETRY:					goto walk_retry;
		case WALK_NOT_PRESENT:	errcode |= PFERR_PRESENT_MASK;	goto error;
		case WALK_RSVD_FAULT:	errcode |= PFERR_RSVD_MASK;	goto error;
		case WALK_ERROR:					goto error;
		case WALK_ABORT:					return 0;
		}

But it's a pure matter of taste - it might not really fit into KVM 
code. Avi's call :-)

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/4] KVM: MMU: Clean up the error handling of walk_addr_generic()
  2011-06-09 14:01 ` [PATCH 1/4] KVM: MMU: Clean up the error handling of walk_addr_generic() Takuya Yoshikawa
@ 2011-06-12 15:45   ` Avi Kivity
  0 siblings, 0 replies; 8+ messages in thread
From: Avi Kivity @ 2011-06-12 15:45 UTC (permalink / raw)
  To: Takuya Yoshikawa; +Cc: mtosatti, kvm, yoshikawa.takuya, mingo

On 06/09/2011 05:01 PM, Takuya Yoshikawa wrote:
> From: Takuya Yoshikawa<yoshikawa.takuya@oss.ntt.co.jp>
>
> Avoid two step jumps to the error handling part.  This eliminates the
> use of the variables present and rsvd_fault.
>
> We also mark the variables write/user/fetch_fault with const to show
> these do not change in the function.
>
> These were suggested by Ingo Molnar.
>   error:
> -	walker->fault.vector = PF_VECTOR;
> -	walker->fault.error_code_valid = true;
> -	walker->fault.error_code = 0;
> -	if (present)
> -		walker->fault.error_code |= PFERR_PRESENT_MASK;
> -
> -	walker->fault.error_code |= write_fault | user_fault;
> -
> +	errcode |= write_fault | user_fault;
>   	if (fetch_fault&&  mmu->nx)
> -		walker->fault.error_code |= PFERR_FETCH_MASK;
> -	if (rsvd_fault)
> -		walker->fault.error_code |= PFERR_RSVD_MASK;
> +		errcode |= PFERR_FETCH_MASK;
>

This area has changed, please regenerate against 'next'.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 4/4] KVM: MMU: Split out the main body of walk_addr_generic()
  2011-06-09 14:18   ` Ingo Molnar
@ 2011-06-12 15:47     ` Avi Kivity
  0 siblings, 0 replies; 8+ messages in thread
From: Avi Kivity @ 2011-06-12 15:47 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Takuya Yoshikawa, mtosatti, kvm, yoshikawa.takuya

On 06/09/2011 05:18 PM, Ingo Molnar wrote:
> * Takuya Yoshikawa<takuya.yoshikawa@gmail.com>  wrote:
>
> >  +/*
> >  + * do_walk() returns one of these.
> >  + *
> >  + * WALK_NEXT:		Continue the walk loop.
> >  + * WALK_DONE:		Break from the walk loop.
> >  + * WALK_RETRY:		Retry walk.
> >  + * WALK_NOT_PRESENT:		Set PFERR_PRESENT_MASK and goto error.
> >  + * WALK_RSVD_FAULT:		Set PFERR_RSVD_MASK and goto error.
> >  + * WALK_ERROR:		Goto error.
> >  + * WALK_ABORT:		Return immediately.
>
> hm, this iterator turned out to be more complex than i thought it
> would become. Avi, are you still happy with that?

No - a lot of code is spent just communicating between the two 
functions.  Best to leave it in a single function.

> >  +	if ((walker->level == PT_PAGE_TABLE_LEVEL) ||
> >  +	    ((walker->level == PT_DIRECTORY_LEVEL)&&  is_large_pte(*pte)&&
> >  +	     (PTTYPE == 64 || is_pse(vcpu))) ||
> >  +	    ((walker->level == PT_PDPE_LEVEL)&&  is_large_pte(*pte)&&
> >  +	     (mmu->root_level == PT64_ROOT_LEVEL))) {
>
> This condition wants to move into a is-pte-large inline function.

Better, is_last_gpte().  We already have an is_last_spte().

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2011-06-12 15:48 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-06-09 13:59 [PATCH 0/4] KVM: MMU: Clean up walk_addr_generic() Takuya Yoshikawa
2011-06-09 14:01 ` [PATCH 1/4] KVM: MMU: Clean up the error handling of walk_addr_generic() Takuya Yoshikawa
2011-06-12 15:45   ` Avi Kivity
2011-06-09 14:02 ` [PATCH 2/4] KVM: MMU: Move some variables into the walk loop Takuya Yoshikawa
2011-06-09 14:03 ` [PATCH 3/4] KVM: MMU: Update walker->pt/pte_access directly Takuya Yoshikawa
2011-06-09 14:05 ` [PATCH 4/4] KVM: MMU: Split out the main body of walk_addr_generic() Takuya Yoshikawa
2011-06-09 14:18   ` Ingo Molnar
2011-06-12 15:47     ` Avi Kivity

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.