linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/4] KVM: MMU: fix permission_fault()
@ 2016-03-25 13:19 Xiao Guangrong
  2016-03-25 13:19 ` [PATCH 2/4] KVM: MMU: simplify the logic of __mmu_unsync_walk() Xiao Guangrong
                   ` (5 more replies)
  0 siblings, 6 replies; 23+ messages in thread
From: Xiao Guangrong @ 2016-03-25 13:19 UTC (permalink / raw)
  To: pbonzini; +Cc: gleb, mtosatti, kvm, linux-kernel, Xiao Guangrong

kvm-unit-tests complained about the PFEC is not set properly, e.g,:
test pte.rw pte.d pte.nx pde.p pde.rw pde.pse user fetch: FAIL: error code 15 
expected 5
Dump mapping: address: 0x123400000000
------L4: 3e95007
------L3: 3e96007
------L2: 2000083

It's caused by the reason that PFEC returned to guest is copied from the
PFEC triggered by shadow page table

This patch fixes it and makes the logic of updating errcode more clean

Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
---
 arch/x86/kvm/mmu.h         | 8 ++++----
 arch/x86/kvm/paging_tmpl.h | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index b70df72..81bffd1 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -154,7 +154,7 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
 				  unsigned pfec)
 {
 	int cpl = kvm_x86_ops->get_cpl(vcpu);
-	unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
+	unsigned long errcode, rflags = kvm_x86_ops->get_rflags(vcpu);
 
 	/*
 	 * If CPL < 3, SMAP prevention are disabled if EFLAGS.AC = 1.
@@ -175,7 +175,7 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
 	bool fault = (mmu->permissions[index] >> pte_access) & 1;
 
 	WARN_ON(pfec & (PFERR_PK_MASK | PFERR_RSVD_MASK));
-	pfec |= PFERR_PRESENT_MASK;
+	errcode = PFERR_PRESENT_MASK;
 
 	if (unlikely(mmu->pkru_mask)) {
 		u32 pkru_bits, offset;
@@ -193,11 +193,11 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
 			((pte_access & PT_USER_MASK) << (PFERR_RSVD_BIT - PT_USER_SHIFT));
 
 		pkru_bits &= mmu->pkru_mask >> offset;
-		pfec |= -pkru_bits & PFERR_PK_MASK;
+		errcode |= -pkru_bits & PFERR_PK_MASK;
 		fault |= (pkru_bits != 0);
 	}
 
-	return -(uint32_t)fault & pfec;
+	return -(uint32_t)fault & errcode;
 }
 
 void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm);
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 1d971c7..bc019f7 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -360,7 +360,7 @@ retry_walk:
 			goto error;
 
 		if (unlikely(is_rsvd_bits_set(mmu, pte, walker->level))) {
-			errcode |= PFERR_RSVD_MASK | PFERR_PRESENT_MASK;
+			errcode = PFERR_RSVD_MASK | PFERR_PRESENT_MASK;
 			goto error;
 		}
 
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 2/4] KVM: MMU: simplify the logic of __mmu_unsync_walk()
  2016-03-25 13:19 [PATCH 1/4] KVM: MMU: fix permission_fault() Xiao Guangrong
@ 2016-03-25 13:19 ` Xiao Guangrong
  2016-03-25 13:19 ` [PATCH 3/4] KVM: MMU: reduce the size of mmu_page_path Xiao Guangrong
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 23+ messages in thread
From: Xiao Guangrong @ 2016-03-25 13:19 UTC (permalink / raw)
  To: pbonzini; +Cc: gleb, mtosatti, kvm, linux-kernel, Xiao Guangrong

Each time i looked into the logic of walking unsync shadow pages, it costs
lots of time to understand what it is doing. The trick of this logic is
that the item, sp and idx, saved to kvm_mmu_pages is the sp and the index
in the _parent_ level and it lacks any comment to explain this fact

This patch simplifies it by saving the sp and its index to kvm_mmu_pages,
then it is much easier to understand the operations on the its index

Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
---
 arch/x86/kvm/mmu.c | 40 ++++++++++++++++++++++------------------
 1 file changed, 22 insertions(+), 18 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 6bdfbc2..e273144 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1830,6 +1830,8 @@ static inline void clear_unsync_child_bit(struct kvm_mmu_page *sp, int idx)
 	__clear_bit(idx, sp->unsync_child_bitmap);
 }
 
+#define INVALID_INDEX (-1)
+
 static int __mmu_unsync_walk(struct kvm_mmu_page *sp,
 			   struct kvm_mmu_pages *pvec)
 {
@@ -1846,10 +1848,10 @@ static int __mmu_unsync_walk(struct kvm_mmu_page *sp,
 
 		child = page_header(ent & PT64_BASE_ADDR_MASK);
 
-		if (child->unsync_children) {
-			if (mmu_pages_add(pvec, child, i))
-				return -ENOSPC;
+		if (mmu_pages_add(pvec, sp, i))
+			return -ENOSPC;
 
+		if (child->unsync_children) {
 			ret = __mmu_unsync_walk(child, pvec);
 			if (!ret) {
 				clear_unsync_child_bit(sp, i);
@@ -1860,7 +1862,13 @@ static int __mmu_unsync_walk(struct kvm_mmu_page *sp,
 				return ret;
 		} else if (child->unsync) {
 			nr_unsync_leaf++;
-			if (mmu_pages_add(pvec, child, i))
+
+			/*
+			 * the unsync is on the last level so its 'idx' is
+			 * useless, we set it to INVALID_INDEX to catch
+			 * potential bugs.
+			 */
+			if (mmu_pages_add(pvec, child, INVALID_INDEX))
 				return -ENOSPC;
 		} else
 			clear_unsync_child_bit(sp, i);
@@ -1869,8 +1877,6 @@ static int __mmu_unsync_walk(struct kvm_mmu_page *sp,
 	return nr_unsync_leaf;
 }
 
-#define INVALID_INDEX (-1)
-
 static int mmu_unsync_walk(struct kvm_mmu_page *sp,
 			   struct kvm_mmu_pages *pvec)
 {
@@ -1878,7 +1884,6 @@ static int mmu_unsync_walk(struct kvm_mmu_page *sp,
 	if (!sp->unsync_children)
 		return 0;
 
-	mmu_pages_add(pvec, sp, INVALID_INDEX);
 	return __mmu_unsync_walk(sp, pvec);
 }
 
@@ -1994,16 +1999,18 @@ static int mmu_pages_next(struct kvm_mmu_pages *pvec,
 {
 	int n;
 
-	for (n = i+1; n < pvec->nr; n++) {
+	for (n = i + 1; n < pvec->nr; n++) {
 		struct kvm_mmu_page *sp = pvec->page[n].sp;
 		unsigned idx = pvec->page[n].idx;
 		int level = sp->role.level;
 
-		parents->idx[level-1] = idx;
-		if (level == PT_PAGE_TABLE_LEVEL)
+		if (level == PT_PAGE_TABLE_LEVEL) {
+			WARN_ON(idx != INVALID_INDEX);
 			break;
+		}
 
-		parents->parent[level-2] = sp;
+		parents->idx[level - 2] = idx;
+		parents->parent[level - 2] = sp;
 	}
 
 	return n;
@@ -2018,19 +2025,16 @@ static int mmu_pages_first(struct kvm_mmu_pages *pvec,
 	if (pvec->nr == 0)
 		return 0;
 
-	WARN_ON(pvec->page[0].idx != INVALID_INDEX);
-
 	sp = pvec->page[0].sp;
 	level = sp->role.level;
 	WARN_ON(level == PT_PAGE_TABLE_LEVEL);
 
-	parents->parent[level-2] = sp;
-
-	/* Also set up a sentinel.  Further entries in pvec are all
+	/*
+	 * Also set up a sentinel. Further entries in pvec are all
 	 * children of sp, so this element is never overwritten.
 	 */
-	parents->parent[level-1] = NULL;
-	return mmu_pages_next(pvec, parents, 0);
+	parents->parent[level - 1] = NULL;
+	return mmu_pages_next(pvec, parents, -1);
 }
 
 static void mmu_pages_clear_parents(struct mmu_page_path *parents)
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 3/4] KVM: MMU: reduce the size of mmu_page_path
  2016-03-25 13:19 [PATCH 1/4] KVM: MMU: fix permission_fault() Xiao Guangrong
  2016-03-25 13:19 ` [PATCH 2/4] KVM: MMU: simplify the logic of __mmu_unsync_walk() Xiao Guangrong
@ 2016-03-25 13:19 ` Xiao Guangrong
  2016-03-25 13:45   ` Paolo Bonzini
  2016-03-25 13:19 ` [PATCH 4/4] KVM: MMU: skip obsolete sp in for_each_gfn_*() Xiao Guangrong
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 23+ messages in thread
From: Xiao Guangrong @ 2016-03-25 13:19 UTC (permalink / raw)
  To: pbonzini; +Cc: gleb, mtosatti, kvm, linux-kernel, Xiao Guangrong

Currently only PT64_ROOT_LEVEL - 1 levels are used, one additional entry
in .parent[] is used as a sentinel, the additional entry in .idx[] is
purely wasted

This patch reduces its size and sets the sentinel on the upper level of
the place where we start from

Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
---
 arch/x86/kvm/mmu.c | 32 ++++++++++++--------------------
 1 file changed, 12 insertions(+), 20 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index e273144..c396e8b 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1984,12 +1984,12 @@ static bool kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn,
 }
 
 struct mmu_page_path {
-	struct kvm_mmu_page *parent[PT64_ROOT_LEVEL];
-	unsigned int idx[PT64_ROOT_LEVEL];
+	struct kvm_mmu_page *parent[PT64_ROOT_LEVEL - 1];
+	unsigned int idx[PT64_ROOT_LEVEL - 1];
 };
 
 #define for_each_sp(pvec, sp, parents, i)			\
-		for (i = mmu_pages_first(&pvec, &parents);	\
+		for (i =  mmu_pages_next(&pvec, &parents, -1);	\
 			i < pvec.nr && ({ sp = pvec.page[i].sp; 1;});	\
 			i = mmu_pages_next(&pvec, &parents, i))
 
@@ -2016,25 +2016,15 @@ static int mmu_pages_next(struct kvm_mmu_pages *pvec,
 	return n;
 }
 
-static int mmu_pages_first(struct kvm_mmu_pages *pvec,
-			   struct mmu_page_path *parents)
+static void
+mmu_pages_init(struct mmu_page_path *parents, struct kvm_mmu_page *parent)
 {
-	struct kvm_mmu_page *sp;
-	int level;
-
-	if (pvec->nr == 0)
-		return 0;
-
-	sp = pvec->page[0].sp;
-	level = sp->role.level;
-	WARN_ON(level == PT_PAGE_TABLE_LEVEL);
-
 	/*
-	 * Also set up a sentinel. Further entries in pvec are all
-	 * children of sp, so this element is never overwritten.
+	 * set up a sentinel. Further entries in pvec are all children of
+	 * sp, so this element is never overwritten.
 	 */
-	parents->parent[level - 1] = NULL;
-	return mmu_pages_next(pvec, parents, -1);
+	if (parent->role.level < PT64_ROOT_LEVEL)
+		parents->parent[parent->role.level - 1] = NULL;
 }
 
 static void mmu_pages_clear_parents(struct mmu_page_path *parents)
@@ -2051,7 +2041,7 @@ static void mmu_pages_clear_parents(struct mmu_page_path *parents)
 		WARN_ON(idx == INVALID_INDEX);
 		clear_unsync_child_bit(sp, idx);
 		level++;
-	} while (!sp->unsync_children);
+	} while (!sp->unsync_children && (level < PT64_ROOT_LEVEL - 1));
 }
 
 static void mmu_sync_children(struct kvm_vcpu *vcpu,
@@ -2064,6 +2054,7 @@ static void mmu_sync_children(struct kvm_vcpu *vcpu,
 	LIST_HEAD(invalid_list);
 	bool flush = false;
 
+	mmu_pages_init(&parents, parent);
 	while (mmu_unsync_walk(parent, &pages)) {
 		bool protected = false;
 
@@ -2335,6 +2326,7 @@ static int mmu_zap_unsync_children(struct kvm *kvm,
 	if (parent->role.level == PT_PAGE_TABLE_LEVEL)
 		return 0;
 
+	mmu_pages_init(&parents, parent);
 	while (mmu_unsync_walk(parent, &pages)) {
 		struct kvm_mmu_page *sp;
 
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 4/4] KVM: MMU: skip obsolete sp in for_each_gfn_*()
  2016-03-25 13:19 [PATCH 1/4] KVM: MMU: fix permission_fault() Xiao Guangrong
  2016-03-25 13:19 ` [PATCH 2/4] KVM: MMU: simplify the logic of __mmu_unsync_walk() Xiao Guangrong
  2016-03-25 13:19 ` [PATCH 3/4] KVM: MMU: reduce the size of mmu_page_path Xiao Guangrong
@ 2016-03-25 13:19 ` Xiao Guangrong
  2016-03-29  9:44   ` Paolo Bonzini
  2016-03-25 13:35 ` [PATCH 1/4] KVM: MMU: fix permission_fault() Paolo Bonzini
                   ` (2 subsequent siblings)
  5 siblings, 1 reply; 23+ messages in thread
From: Xiao Guangrong @ 2016-03-25 13:19 UTC (permalink / raw)
  To: pbonzini; +Cc: gleb, mtosatti, kvm, linux-kernel, Xiao Guangrong

The obsolete sp should not be used on current vCPUs and should not hurt
vCPU's running, so skip it from for_each_gfn_sp() and
for_each_gfn_indirect_valid_sp()

The side effort is we will double check role.invalid in kvm_mmu_get_page()
but i think it is okay as role is well cached

Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
---
 arch/x86/kvm/mmu.c | 28 ++++++++++++----------------
 1 file changed, 12 insertions(+), 16 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index c396e8b..4d66a9e 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1906,18 +1906,17 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
  * since it has been deleted from active_mmu_pages but still can be found
  * at hast list.
  *
- * for_each_gfn_indirect_valid_sp has skipped that kind of page and
- * kvm_mmu_get_page(), the only user of for_each_gfn_sp(), has skipped
- * all the obsolete pages.
+ * for_each_gfn_valid_sp() has skipped that kind of pages.
  */
-#define for_each_gfn_sp(_kvm, _sp, _gfn)				\
+#define for_each_gfn_valid_sp(_kvm, _sp, _gfn)				\
 	hlist_for_each_entry(_sp,					\
 	  &(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)], hash_link) \
-		if ((_sp)->gfn != (_gfn)) {} else
+		if ((_sp)->gfn != (_gfn) || is_obsolete_sp((_kvm), (_sp)) \
+			|| (_sp)->role.invalid) {} else
 
 #define for_each_gfn_indirect_valid_sp(_kvm, _sp, _gfn)			\
-	for_each_gfn_sp(_kvm, _sp, _gfn)				\
-		if ((_sp)->role.direct || (_sp)->role.invalid) {} else
+	for_each_gfn_valid_sp(_kvm, _sp, _gfn)				\
+		if ((_sp)->role.direct) {} else
 
 /* @sp->gfn should be write-protected at the call site */
 static bool __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
@@ -1958,6 +1957,11 @@ static void kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) { }
 static void mmu_audit_disable(void) { }
 #endif
 
+static bool is_obsolete_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
+{
+	return unlikely(sp->mmu_valid_gen != kvm->arch.mmu_valid_gen);
+}
+
 static bool kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
 			 struct list_head *invalid_list)
 {
@@ -2092,11 +2096,6 @@ static void clear_sp_write_flooding_count(u64 *spte)
 	__clear_sp_write_flooding_count(sp);
 }
 
-static bool is_obsolete_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
-{
-	return unlikely(sp->mmu_valid_gen != kvm->arch.mmu_valid_gen);
-}
-
 static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 					     gfn_t gfn,
 					     gva_t gaddr,
@@ -2123,10 +2122,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 		quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1;
 		role.quadrant = quadrant;
 	}
-	for_each_gfn_sp(vcpu->kvm, sp, gfn) {
-		if (is_obsolete_sp(vcpu->kvm, sp))
-			continue;
-
+	for_each_gfn_valid_sp(vcpu->kvm, sp, gfn) {
 		if (!need_sync && sp->unsync)
 			need_sync = true;
 
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/4] KVM: MMU: fix permission_fault()
  2016-03-25 13:19 [PATCH 1/4] KVM: MMU: fix permission_fault() Xiao Guangrong
                   ` (2 preceding siblings ...)
  2016-03-25 13:19 ` [PATCH 4/4] KVM: MMU: skip obsolete sp in for_each_gfn_*() Xiao Guangrong
@ 2016-03-25 13:35 ` Paolo Bonzini
  2016-03-25 13:41   ` Xiao Guangrong
  2016-03-25 14:21 ` Paolo Bonzini
  2016-04-06  8:56 ` Paolo Bonzini
  5 siblings, 1 reply; 23+ messages in thread
From: Paolo Bonzini @ 2016-03-25 13:35 UTC (permalink / raw)
  To: Xiao Guangrong; +Cc: gleb, mtosatti, kvm, linux-kernel



On 25/03/2016 14:19, Xiao Guangrong wrote:
> kvm-unit-tests complained about the PFEC is not set properly, e.g,:
> test pte.rw pte.d pte.nx pde.p pde.rw pde.pse user fetch: FAIL: error code 15 
> expected 5
> Dump mapping: address: 0x123400000000
> ------L4: 3e95007
> ------L3: 3e96007
> ------L2: 2000083

What's the command line for the reproducer?

> diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
> index b70df72..81bffd1 100644
> --- a/arch/x86/kvm/mmu.h
> +++ b/arch/x86/kvm/mmu.h
> @@ -154,7 +154,7 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
>  				  unsigned pfec)
>  {
>  	int cpl = kvm_x86_ops->get_cpl(vcpu);
> -	unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
> +	unsigned long errcode, rflags = kvm_x86_ops->get_rflags(vcpu);
>  
>  	/*
>  	 * If CPL < 3, SMAP prevention are disabled if EFLAGS.AC = 1.
> @@ -175,7 +175,7 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
>  	bool fault = (mmu->permissions[index] >> pte_access) & 1;
>  
>  	WARN_ON(pfec & (PFERR_PK_MASK | PFERR_RSVD_MASK));
> -	pfec |= PFERR_PRESENT_MASK;
> +	errcode = PFERR_PRESENT_MASK;

So is this patch doing the same as "KVM: MMU: precompute page fault
error code"?  It was necessary after all. :)

Paolo

>  
>  	if (unlikely(mmu->pkru_mask)) {
>  		u32 pkru_bits, offset;
> @@ -193,11 +193,11 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
>  			((pte_access & PT_USER_MASK) << (PFERR_RSVD_BIT - PT_USER_SHIFT));
>  
>  		pkru_bits &= mmu->pkru_mask >> offset;
> -		pfec |= -pkru_bits & PFERR_PK_MASK;
> +		errcode |= -pkru_bits & PFERR_PK_MASK;
>  		fault |= (pkru_bits != 0);
>  	}
>  
> -	return -(uint32_t)fault & pfec;
> +	return -(uint32_t)fault & errcode;
>  }
>  
>  void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm);
> diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
> index 1d971c7..bc019f7 100644
> --- a/arch/x86/kvm/paging_tmpl.h
> +++ b/arch/x86/kvm/paging_tmpl.h
> @@ -360,7 +360,7 @@ retry_walk:
>  			goto error;
>  
>  		if (unlikely(is_rsvd_bits_set(mmu, pte, walker->level))) {
> -			errcode |= PFERR_RSVD_MASK | PFERR_PRESENT_MASK;
> +			errcode = PFERR_RSVD_MASK | PFERR_PRESENT_MASK;
>  			goto error;
>  		}
>  

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/4] KVM: MMU: fix permission_fault()
  2016-03-25 13:35 ` [PATCH 1/4] KVM: MMU: fix permission_fault() Paolo Bonzini
@ 2016-03-25 13:41   ` Xiao Guangrong
  2016-03-25 13:50     ` Paolo Bonzini
  0 siblings, 1 reply; 23+ messages in thread
From: Xiao Guangrong @ 2016-03-25 13:41 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: gleb, mtosatti, kvm, linux-kernel



On 03/25/2016 09:35 PM, Paolo Bonzini wrote:
>
>
> On 25/03/2016 14:19, Xiao Guangrong wrote:
>> kvm-unit-tests complained about the PFEC is not set properly, e.g,:
>> test pte.rw pte.d pte.nx pde.p pde.rw pde.pse user fetch: FAIL: error code 15
>> expected 5
>> Dump mapping: address: 0x123400000000
>> ------L4: 3e95007
>> ------L3: 3e96007
>> ------L2: 2000083
>
> What's the command line for the reproducer?

QEMU=/home/eric/qemu/x86_64-softmmu/qemu-system-x86_64 ./x86-run x86/access.flat

>
>> diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
>> index b70df72..81bffd1 100644
>> --- a/arch/x86/kvm/mmu.h
>> +++ b/arch/x86/kvm/mmu.h
>> @@ -154,7 +154,7 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
>>   				  unsigned pfec)
>>   {
>>   	int cpl = kvm_x86_ops->get_cpl(vcpu);
>> -	unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
>> +	unsigned long errcode, rflags = kvm_x86_ops->get_rflags(vcpu);
>>
>>   	/*
>>   	 * If CPL < 3, SMAP prevention are disabled if EFLAGS.AC = 1.
>> @@ -175,7 +175,7 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
>>   	bool fault = (mmu->permissions[index] >> pte_access) & 1;
>>
>>   	WARN_ON(pfec & (PFERR_PK_MASK | PFERR_RSVD_MASK));
>> -	pfec |= PFERR_PRESENT_MASK;
>> +	errcode = PFERR_PRESENT_MASK;
>
> So is this patch doing the same as "KVM: MMU: precompute page fault
> error code"?  It was necessary after all. :)

Sorry for my mistake... I missed the logic you changed :(

I still prefer to calculating the error code on the fault path which is rare, or
think a way to encapsulate it to permission_fault()...

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 3/4] KVM: MMU: reduce the size of mmu_page_path
  2016-03-25 13:19 ` [PATCH 3/4] KVM: MMU: reduce the size of mmu_page_path Xiao Guangrong
@ 2016-03-25 13:45   ` Paolo Bonzini
  2016-03-25 13:48     ` Xiao Guangrong
  0 siblings, 1 reply; 23+ messages in thread
From: Paolo Bonzini @ 2016-03-25 13:45 UTC (permalink / raw)
  To: Xiao Guangrong; +Cc: gleb, mtosatti, kvm, linux-kernel



On 25/03/2016 14:19, Xiao Guangrong wrote:
> Currently only PT64_ROOT_LEVEL - 1 levels are used, one additional entry
> in .parent[] is used as a sentinel, the additional entry in .idx[] is
> purely wasted
> 
> This patch reduces its size and sets the sentinel on the upper level of
> the place where we start from

This patch and the previous one are basically redoing commit
0a47cd85833e ("KVM: MMU: Fix ubsan warnings", 2016-03-04).  While you
find your version easier to understand, I of course find mine easier.

Rather than getting stuck in a ko fight, the solution is to stick with
the code in KVM and add comments.  I'll give it a try...

Paolo

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 3/4] KVM: MMU: reduce the size of mmu_page_path
  2016-03-25 13:45   ` Paolo Bonzini
@ 2016-03-25 13:48     ` Xiao Guangrong
  2016-03-25 13:56       ` Paolo Bonzini
  0 siblings, 1 reply; 23+ messages in thread
From: Xiao Guangrong @ 2016-03-25 13:48 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: gleb, mtosatti, kvm, linux-kernel



On 03/25/2016 09:45 PM, Paolo Bonzini wrote:
>
>
> On 25/03/2016 14:19, Xiao Guangrong wrote:
>> Currently only PT64_ROOT_LEVEL - 1 levels are used, one additional entry
>> in .parent[] is used as a sentinel, the additional entry in .idx[] is
>> purely wasted
>>
>> This patch reduces its size and sets the sentinel on the upper level of
>> the place where we start from
>
> This patch and the previous one are basically redoing commit
> 0a47cd85833e ("KVM: MMU: Fix ubsan warnings", 2016-03-04).  While you
> find your version easier to understand, I of course find mine easier.
>
> Rather than getting stuck in a ko fight, the solution is to stick with
> the code in KVM and add comments.  I'll give it a try...

If you do not like this one, we can just make the .index is
[PT64_ROOT_LEVEL - 1] and keep the sentinel in .parents[], that little
change and nice code shape.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/4] KVM: MMU: fix permission_fault()
  2016-03-25 13:41   ` Xiao Guangrong
@ 2016-03-25 13:50     ` Paolo Bonzini
  0 siblings, 0 replies; 23+ messages in thread
From: Paolo Bonzini @ 2016-03-25 13:50 UTC (permalink / raw)
  To: Xiao Guangrong; +Cc: gleb, mtosatti, kvm, linux-kernel



On 25/03/2016 14:41, Xiao Guangrong wrote:
>>>
>>
>> So is this patch doing the same as "KVM: MMU: precompute page fault
>> error code"?  It was necessary after all. :)
> 
> Sorry for my mistake... I missed the logic you changed :(
> 
> I still prefer to calculating the error code on the fault path which is
> rare, or think a way to encapsulate it to permission_fault()...

Yes, I will apply your patch.

Paolo

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 3/4] KVM: MMU: reduce the size of mmu_page_path
  2016-03-25 13:48     ` Xiao Guangrong
@ 2016-03-25 13:56       ` Paolo Bonzini
  2016-03-25 14:07         ` Xiao Guangrong
  0 siblings, 1 reply; 23+ messages in thread
From: Paolo Bonzini @ 2016-03-25 13:56 UTC (permalink / raw)
  To: Xiao Guangrong; +Cc: gleb, mtosatti, kvm, linux-kernel



On 25/03/2016 14:48, Xiao Guangrong wrote:
>>>
>>
>> This patch and the previous one are basically redoing commit
>> 0a47cd85833e ("KVM: MMU: Fix ubsan warnings", 2016-03-04).  While you
>> find your version easier to understand, I of course find mine easier.
>>
>> Rather than getting stuck in a ko fight, the solution is to stick with
>> the code in KVM and add comments.  I'll give it a try...
> 
> If you do not like this one, we can just make the .index is
> [PT64_ROOT_LEVEL - 1] and keep the sentinel in .parents[], that little
> change and nice code shape.

I suppose you'd have something like this then:

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 70e95d097ef1..15e1735a2e3a 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1980,7 +1980,7 @@ static bool kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn,
 
 struct mmu_page_path {
 	struct kvm_mmu_page *parent[PT64_ROOT_LEVEL];
-	unsigned int idx[PT64_ROOT_LEVEL];
+	unsigned int idx[PT64_ROOT_LEVEL-1];
 };
 
 #define for_each_sp(pvec, sp, parents, i)			\
@@ -2037,13 +2037,14 @@ static void mmu_pages_clear_parents(struct mmu_page_path *parents)
 {
 	struct kvm_mmu_page *sp;
 	unsigned int level = 0;
+	unsigned int idx;
 
 	do {
-		unsigned int idx = parents->idx[level];
 		sp = parents->parent[level];
-		if (!sp)
+		if (!sp || WARN_ON(level == PT64_ROOT_LEVEL-1))
 			return;
 
+		idx = parents->idx[level];
 		WARN_ON(idx == INVALID_INDEX);
 		clear_unsync_child_bit(sp, idx);
 		level++;

By making the arrays the same size, the effect of the sentinel seems
clearer to me.  It doesn't seem worth 4 bytes (and strictly speaking
those 4 bytes would be there anyway due to padding)...

Paolo

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* Re: [PATCH 3/4] KVM: MMU: reduce the size of mmu_page_path
  2016-03-25 13:56       ` Paolo Bonzini
@ 2016-03-25 14:07         ` Xiao Guangrong
  2016-03-25 14:22           ` Paolo Bonzini
  0 siblings, 1 reply; 23+ messages in thread
From: Xiao Guangrong @ 2016-03-25 14:07 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: gleb, mtosatti, kvm, linux-kernel



On 03/25/2016 09:56 PM, Paolo Bonzini wrote:
>
>
> On 25/03/2016 14:48, Xiao Guangrong wrote:
>>>>
>>>
>>> This patch and the previous one are basically redoing commit
>>> 0a47cd85833e ("KVM: MMU: Fix ubsan warnings", 2016-03-04).  While you
>>> find your version easier to understand, I of course find mine easier.
>>>
>>> Rather than getting stuck in a ko fight, the solution is to stick with
>>> the code in KVM and add comments.  I'll give it a try...
>>
>> If you do not like this one, we can just make the .index is
>> [PT64_ROOT_LEVEL - 1] and keep the sentinel in .parents[], that little
>> change and nice code shape.
>
> I suppose you'd have something like this then:
>
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index 70e95d097ef1..15e1735a2e3a 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -1980,7 +1980,7 @@ static bool kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn,
>
>   struct mmu_page_path {
>   	struct kvm_mmu_page *parent[PT64_ROOT_LEVEL];
> -	unsigned int idx[PT64_ROOT_LEVEL];
> +	unsigned int idx[PT64_ROOT_LEVEL-1];
>   };
>
>   #define for_each_sp(pvec, sp, parents, i)			\
> @@ -2037,13 +2037,14 @@ static void mmu_pages_clear_parents(struct mmu_page_path *parents)
>   {
>   	struct kvm_mmu_page *sp;
>   	unsigned int level = 0;
> +	unsigned int idx;
>
>   	do {
> -		unsigned int idx = parents->idx[level];
>   		sp = parents->parent[level];
> -		if (!sp)
> +		if (!sp || WARN_ON(level == PT64_ROOT_LEVEL-1))
>   			return;
>
> +		idx = parents->idx[level];
>   		WARN_ON(idx == INVALID_INDEX);
>   		clear_unsync_child_bit(sp, idx);
>   		level++;
>

Yes, exactly.

[ actually, we can keep mmu_pages_clear_parents() unchanged ]

> By making the arrays the same size, the effect of the sentinel seems
> clearer to me.  It doesn't seem worth 4 bytes (and strictly speaking
> those 4 bytes would be there anyway due to padding)...

The sentinel is NULL forever so it can not go to the inner loop anyway...

Okay, i am not strong opinion on it, it is not a big deal. Let's
happily drop it if you really dislike it. :)

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/4] KVM: MMU: fix permission_fault()
  2016-03-25 13:19 [PATCH 1/4] KVM: MMU: fix permission_fault() Xiao Guangrong
                   ` (3 preceding siblings ...)
  2016-03-25 13:35 ` [PATCH 1/4] KVM: MMU: fix permission_fault() Paolo Bonzini
@ 2016-03-25 14:21 ` Paolo Bonzini
  2016-03-29 17:43   ` Xiao Guangrong
  2016-04-06  8:56 ` Paolo Bonzini
  5 siblings, 1 reply; 23+ messages in thread
From: Paolo Bonzini @ 2016-03-25 14:21 UTC (permalink / raw)
  To: Xiao Guangrong; +Cc: gleb, mtosatti, kvm, linux-kernel



On 25/03/2016 14:19, Xiao Guangrong wrote:
>  	WARN_ON(pfec & (PFERR_PK_MASK | PFERR_RSVD_MASK));
> -	pfec |= PFERR_PRESENT_MASK;
> +	errcode = PFERR_PRESENT_MASK;
>  
>  	if (unlikely(mmu->pkru_mask)) {
>  		u32 pkru_bits, offset;
> @@ -193,11 +193,11 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
>  			((pte_access & PT_USER_MASK) << (PFERR_RSVD_BIT - PT_USER_SHIFT));
>  
>  		pkru_bits &= mmu->pkru_mask >> offset;
> -		pfec |= -pkru_bits & PFERR_PK_MASK;
> +		errcode |= -pkru_bits & PFERR_PK_MASK;
>  		fault |= (pkru_bits != 0);
>  	}
>  
> -	return -(uint32_t)fault & pfec;
> +	return -(uint32_t)fault & errcode;
>  }

I have another doubt here.

If you get a fault due to U=0, you would not get PFERR_PK_MASK.  This
is checked implicitly through the pte_user bit which we moved to
PFERR_RSVD_BIT.  However, if you get a fault due to W=0 _and_
PKRU.AD=1 or PKRU.WD=1 for the page's protection key, would the PK
bit be set in the error code?  If not, we would need something like
this:

diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 81bffd1524c4..6835a551a5c4 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -172,12 +172,11 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
 	unsigned long smap = (cpl - 3) & (rflags & X86_EFLAGS_AC);
 	int index = (pfec >> 1) +
 		    (smap >> (X86_EFLAGS_AC_BIT - PFERR_RSVD_BIT + 1));
-	bool fault = (mmu->permissions[index] >> pte_access) & 1;
 
 	WARN_ON(pfec & (PFERR_PK_MASK | PFERR_RSVD_MASK));
-	errcode = PFERR_PRESENT_MASK;
+	errcode = (mmu->permissions[index] >> pte_access) & PFERR_PRESENT_MASK;
 
-	if (unlikely(mmu->pkru_mask)) {
+	if (unlikely(-errcode & mmu->pkru_mask)) {
 		u32 pkru_bits, offset;
 
 		/*
@@ -188,11 +187,10 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
 			((pte_access & PT_USER_MASK) << (PFERR_RSVD_BIT - PT_USER_SHIFT));
 
 		pkru_bits &= mmu->pkru_mask >> offset;
-		errcode |= -pkru_bits & PFERR_PK_MASK;
-		fault |= (pkru_bits != 0);
+		errcode |= pkru_bits ? PFERR_PK_MASK | PFERR_PRESENT_MASK : 0;
 	}
 
-	return -(uint32_t)fault & errcode;
+	return errcode;
 }
 
 void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm);


Thanks,

Paolo

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* Re: [PATCH 3/4] KVM: MMU: reduce the size of mmu_page_path
  2016-03-25 14:07         ` Xiao Guangrong
@ 2016-03-25 14:22           ` Paolo Bonzini
  0 siblings, 0 replies; 23+ messages in thread
From: Paolo Bonzini @ 2016-03-25 14:22 UTC (permalink / raw)
  To: Xiao Guangrong; +Cc: gleb, mtosatti, kvm, linux-kernel



On 25/03/2016 15:07, Xiao Guangrong wrote:
>>
>> @@ -2037,13 +2037,14 @@ static void mmu_pages_clear_parents(struct
>> mmu_page_path *parents)
>>   {
>>       struct kvm_mmu_page *sp;
>>       unsigned int level = 0;
>> +    unsigned int idx;
>>
>>       do {
>> -        unsigned int idx = parents->idx[level];
>>           sp = parents->parent[level];
>> -        if (!sp)
>> +        if (!sp || WARN_ON(level == PT64_ROOT_LEVEL-1))
>>               return;
>>
>> +        idx = parents->idx[level];
>>           WARN_ON(idx == INVALID_INDEX);
>>           clear_unsync_child_bit(sp, idx);
>>           level++;
>>
> 
> Yes, exactly.
> 
> [ actually, we can keep mmu_pages_clear_parents() unchanged ]

You cannot because ubsan would complain. :)

Paolo

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 4/4] KVM: MMU: skip obsolete sp in for_each_gfn_*()
  2016-03-25 13:19 ` [PATCH 4/4] KVM: MMU: skip obsolete sp in for_each_gfn_*() Xiao Guangrong
@ 2016-03-29  9:44   ` Paolo Bonzini
  0 siblings, 0 replies; 23+ messages in thread
From: Paolo Bonzini @ 2016-03-29  9:44 UTC (permalink / raw)
  To: Xiao Guangrong; +Cc: gleb, mtosatti, kvm, linux-kernel

On 25/03/2016 14:19, Xiao Guangrong wrote:
> The obsolete sp should not be used on current vCPUs and should not hurt
> vCPU's running, so skip it from for_each_gfn_sp() and
> for_each_gfn_indirect_valid_sp()
> 
> The side effort is we will double check role.invalid in kvm_mmu_get_page()
> but i think it is okay as role is well cached
> 
> Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>

Queued for 4.7.

Paolo

> ---
>  arch/x86/kvm/mmu.c | 28 ++++++++++++----------------
>  1 file changed, 12 insertions(+), 16 deletions(-)
> 
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index c396e8b..4d66a9e 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -1906,18 +1906,17 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
>   * since it has been deleted from active_mmu_pages but still can be found
>   * at hast list.
>   *
> - * for_each_gfn_indirect_valid_sp has skipped that kind of page and
> - * kvm_mmu_get_page(), the only user of for_each_gfn_sp(), has skipped
> - * all the obsolete pages.
> + * for_each_gfn_valid_sp() has skipped that kind of pages.
>   */
> -#define for_each_gfn_sp(_kvm, _sp, _gfn)				\
> +#define for_each_gfn_valid_sp(_kvm, _sp, _gfn)				\
>  	hlist_for_each_entry(_sp,					\
>  	  &(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)], hash_link) \
> -		if ((_sp)->gfn != (_gfn)) {} else
> +		if ((_sp)->gfn != (_gfn) || is_obsolete_sp((_kvm), (_sp)) \
> +			|| (_sp)->role.invalid) {} else
>  
>  #define for_each_gfn_indirect_valid_sp(_kvm, _sp, _gfn)			\
> -	for_each_gfn_sp(_kvm, _sp, _gfn)				\
> -		if ((_sp)->role.direct || (_sp)->role.invalid) {} else
> +	for_each_gfn_valid_sp(_kvm, _sp, _gfn)				\
> +		if ((_sp)->role.direct) {} else
>  
>  /* @sp->gfn should be write-protected at the call site */
>  static bool __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
> @@ -1958,6 +1957,11 @@ static void kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) { }
>  static void mmu_audit_disable(void) { }
>  #endif
>  
> +static bool is_obsolete_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
> +{
> +	return unlikely(sp->mmu_valid_gen != kvm->arch.mmu_valid_gen);
> +}
> +
>  static bool kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
>  			 struct list_head *invalid_list)
>  {
> @@ -2092,11 +2096,6 @@ static void clear_sp_write_flooding_count(u64 *spte)
>  	__clear_sp_write_flooding_count(sp);
>  }
>  
> -static bool is_obsolete_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
> -{
> -	return unlikely(sp->mmu_valid_gen != kvm->arch.mmu_valid_gen);
> -}
> -
>  static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
>  					     gfn_t gfn,
>  					     gva_t gaddr,
> @@ -2123,10 +2122,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
>  		quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1;
>  		role.quadrant = quadrant;
>  	}
> -	for_each_gfn_sp(vcpu->kvm, sp, gfn) {
> -		if (is_obsolete_sp(vcpu->kvm, sp))
> -			continue;
> -
> +	for_each_gfn_valid_sp(vcpu->kvm, sp, gfn) {
>  		if (!need_sync && sp->unsync)
>  			need_sync = true;
>  
> 

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/4] KVM: MMU: fix permission_fault()
  2016-03-25 14:21 ` Paolo Bonzini
@ 2016-03-29 17:43   ` Xiao Guangrong
  2016-03-29 20:09     ` Paolo Bonzini
  0 siblings, 1 reply; 23+ messages in thread
From: Xiao Guangrong @ 2016-03-29 17:43 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: gleb, mtosatti, kvm, linux-kernel



On 03/25/2016 10:21 PM, Paolo Bonzini wrote:
>
>
> On 25/03/2016 14:19, Xiao Guangrong wrote:
>>   	WARN_ON(pfec & (PFERR_PK_MASK | PFERR_RSVD_MASK));
>> -	pfec |= PFERR_PRESENT_MASK;
>> +	errcode = PFERR_PRESENT_MASK;
>>
>>   	if (unlikely(mmu->pkru_mask)) {
>>   		u32 pkru_bits, offset;
>> @@ -193,11 +193,11 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
>>   			((pte_access & PT_USER_MASK) << (PFERR_RSVD_BIT - PT_USER_SHIFT));
>>
>>   		pkru_bits &= mmu->pkru_mask >> offset;
>> -		pfec |= -pkru_bits & PFERR_PK_MASK;
>> +		errcode |= -pkru_bits & PFERR_PK_MASK;
>>   		fault |= (pkru_bits != 0);
>>   	}
>>
>> -	return -(uint32_t)fault & pfec;
>> +	return -(uint32_t)fault & errcode;
>>   }
>
> I have another doubt here.
>
> If you get a fault due to U=0, you would not get PFERR_PK_MASK.  This
> is checked implicitly through the pte_user bit which we moved to
> PFERR_RSVD_BIT.  However, if you get a fault due to W=0 _and_
> PKRU.AD=1 or PKRU.WD=1 for the page's protection key, would the PK
> bit be set in the error code?  If not, we would need something like
> this:

Based on the SDM:
PK flag (bit 5).
This flag is 1 if (1) IA32_EFER.LMA = CR4.PKE = 1; (2) the access causing the page-fault exception 
was a data access; (3) the linear address was a user-mode address with protection key i; and (5) the 
PKRU register (see Section 4.6.2) is such that either (a) ADi = 1; or (b) the following all hold: 
(i) WDi = 1; (ii) the access is a write access; and (iii) either CR0.WP = 1 or the access causing 
the page-fault exception was a user-mode access.

So I think PKEY check and ordinary check are independent, i.e, PFEC.PKEY may be
set even if the on permission on the page table is not adequate.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/4] KVM: MMU: fix permission_fault()
  2016-03-29 17:43   ` Xiao Guangrong
@ 2016-03-29 20:09     ` Paolo Bonzini
  2016-03-30  1:56       ` Xiao Guangrong
  0 siblings, 1 reply; 23+ messages in thread
From: Paolo Bonzini @ 2016-03-29 20:09 UTC (permalink / raw)
  To: Xiao Guangrong; +Cc: gleb, mtosatti, kvm, linux-kernel



On 29/03/2016 19:43, Xiao Guangrong wrote:
> Based on the SDM:
> PK flag (bit 5).
> This flag is 1 if (1) IA32_EFER.LMA = CR4.PKE = 1; (2) the access
> causing the page-fault exception was a data access; (3) the linear
> address was a user-mode address with protection key i; and (5) the PKRU
> register (see Section 4.6.2) is such that either (a) ADi = 1; or (b) the
> following all hold: (i) WDi = 1; (ii) the access is a write access; and
> (iii) either CR0.WP = 1 or the access causing the page-fault exception
> was a user-mode access.
> 
> So I think PKEY check and ordinary check are independent, i.e, PFEC.PKEY
> may be set even if the on permission on the page table is not adequate.

x86/access.flat is currently using the "other" definition, i.e., PFEC.PK
is only set if W=1 or CR0.WP=0 && PFEC.U=0 or PFEC.W=0.  Can you use it
(with ept=1 of course) to check what the processor is doing?

Paolo

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/4] KVM: MMU: fix permission_fault()
  2016-03-29 20:09     ` Paolo Bonzini
@ 2016-03-30  1:56       ` Xiao Guangrong
  2016-03-30  6:36         ` Paolo Bonzini
  0 siblings, 1 reply; 23+ messages in thread
From: Xiao Guangrong @ 2016-03-30  1:56 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: gleb, mtosatti, kvm, linux-kernel



On 03/30/2016 04:09 AM, Paolo Bonzini wrote:
>
>
> On 29/03/2016 19:43, Xiao Guangrong wrote:
>> Based on the SDM:
>> PK flag (bit 5).
>> This flag is 1 if (1) IA32_EFER.LMA = CR4.PKE = 1; (2) the access
>> causing the page-fault exception was a data access; (3) the linear
>> address was a user-mode address with protection key i; and (5) the PKRU
>> register (see Section 4.6.2) is such that either (a) ADi = 1; or (b) the
>> following all hold: (i) WDi = 1; (ii) the access is a write access; and
>> (iii) either CR0.WP = 1 or the access causing the page-fault exception
>> was a user-mode access.
>>
>> So I think PKEY check and ordinary check are independent, i.e, PFEC.PKEY
>> may be set even if the on permission on the page table is not adequate.
>
> x86/access.flat is currently using the "other" definition, i.e., PFEC.PK
> is only set if W=1 or CR0.WP=0 && PFEC.U=0 or PFEC.W=0.  Can you use it
> (with ept=1 of course) to check what the processor is doing?
>

Sure.

And ept=1 is hard to trigger MMU issue, i am enabling PKEY on shadow
MMU, let's see what will happen. ;)

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/4] KVM: MMU: fix permission_fault()
  2016-03-30  1:56       ` Xiao Guangrong
@ 2016-03-30  6:36         ` Paolo Bonzini
  2016-03-30  6:39           ` Xiao Guangrong
  0 siblings, 1 reply; 23+ messages in thread
From: Paolo Bonzini @ 2016-03-30  6:36 UTC (permalink / raw)
  To: Xiao Guangrong; +Cc: gleb, mtosatti, kvm, linux-kernel



On 30/03/2016 03:56, Xiao Guangrong wrote:
>> x86/access.flat is currently using the "other" definition, i.e., PFEC.PK
>> is only set if W=1 or CR0.WP=0 && PFEC.U=0 or PFEC.W=0.  Can you use it
>> (with ept=1 of course) to check what the processor is doing?
> 
> Sure.
> 
> And ept=1 is hard to trigger MMU issue, i am enabling PKEY on shadow
> MMU, let's see what will happen. ;)

No, don't do that!

ept=1 lets you test what the processor does.  It means you cannot test
permission_fault(), but what we want here is just reverse engineering
the microcode.  ept=1 lets you do exactly that.

Paolo

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/4] KVM: MMU: fix permission_fault()
  2016-03-30  6:36         ` Paolo Bonzini
@ 2016-03-30  6:39           ` Xiao Guangrong
  2016-04-06  3:27             ` Xiao Guangrong
  0 siblings, 1 reply; 23+ messages in thread
From: Xiao Guangrong @ 2016-03-30  6:39 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: gleb, mtosatti, kvm, linux-kernel, Han, Huaitong



On 03/30/2016 02:36 PM, Paolo Bonzini wrote:
>
>
> On 30/03/2016 03:56, Xiao Guangrong wrote:
>>> x86/access.flat is currently using the "other" definition, i.e., PFEC.PK
>>> is only set if W=1 or CR0.WP=0 && PFEC.U=0 or PFEC.W=0.  Can you use it
>>> (with ept=1 of course) to check what the processor is doing?
>>
>> Sure.
>>
>> And ept=1 is hard to trigger MMU issue, i am enabling PKEY on shadow
>> MMU, let's see what will happen. ;)
>
> No, don't do that!
>
> ept=1 lets you test what the processor does.  It means you cannot test
> permission_fault(), but what we want here is just reverse engineering
> the microcode.  ept=1 lets you do exactly that.

Yes, i got this point. Huaitong will do the test once the machine gets
free.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/4] KVM: MMU: fix permission_fault()
  2016-03-30  6:39           ` Xiao Guangrong
@ 2016-04-06  3:27             ` Xiao Guangrong
  2016-04-06  8:17               ` Paolo Bonzini
  0 siblings, 1 reply; 23+ messages in thread
From: Xiao Guangrong @ 2016-04-06  3:27 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: gleb, mtosatti, kvm, linux-kernel, Han, Huaitong



On 03/30/2016 02:39 PM, Xiao Guangrong wrote:
>
>
> On 03/30/2016 02:36 PM, Paolo Bonzini wrote:
>>
>>
>> On 30/03/2016 03:56, Xiao Guangrong wrote:
>>>> x86/access.flat is currently using the "other" definition, i.e., PFEC.PK
>>>> is only set if W=1 or CR0.WP=0 && PFEC.U=0 or PFEC.W=0.  Can you use it
>>>> (with ept=1 of course) to check what the processor is doing?
>>>
>>> Sure.
>>>
>>> And ept=1 is hard to trigger MMU issue, i am enabling PKEY on shadow
>>> MMU, let's see what will happen. ;)
>>
>> No, don't do that!
>>
>> ept=1 lets you test what the processor does.  It means you cannot test
>> permission_fault(), but what we want here is just reverse engineering
>> the microcode.  ept=1 lets you do exactly that.
>
> Yes, i got this point. Huaitong will do the test once the machine gets
> free.

I tested it and it is failed:

test pte.p pte.user pde.p pde.user pde.a pde.pse pkru.wd pkey=1 user write efer.nx cr4.pke: FAIL: 
error code 27 expected 7
Dump mapping: address: 0x123400000000
------L4: 2ebe007
------L3: 2ebf007
------L2: 8000000020000a5

So PFEC.PKEY is set even if the ordinary check failed (caused by pde.rw = 0), the kvm code is
right. :)

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/4] KVM: MMU: fix permission_fault()
  2016-04-06  3:27             ` Xiao Guangrong
@ 2016-04-06  8:17               ` Paolo Bonzini
  0 siblings, 0 replies; 23+ messages in thread
From: Paolo Bonzini @ 2016-04-06  8:17 UTC (permalink / raw)
  To: Xiao Guangrong; +Cc: gleb, mtosatti, kvm, linux-kernel, Han, Huaitong



On 06/04/2016 05:27, Xiao Guangrong wrote:
> 
> I tested it and it is failed:
> 
> test pte.p pte.user pde.p pde.user pde.a pde.pse pkru.wd pkey=1 user
> write efer.nx cr4.pke: FAIL: error code 27 expected 7
> Dump mapping: address: 0x123400000000
> ------L4: 2ebe007
> ------L3: 2ebf007
> ------L2: 8000000020000a5
> 
> So PFEC.PKEY is set even if the ordinary check failed (caused by pde.rw
> = 0), the kvm code is
> right. :)

Cool, thanks very much.  I'll fix both QEMU and kvm-unit-tests.

Paolo

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/4] KVM: MMU: fix permission_fault()
  2016-03-25 13:19 [PATCH 1/4] KVM: MMU: fix permission_fault() Xiao Guangrong
                   ` (4 preceding siblings ...)
  2016-03-25 14:21 ` Paolo Bonzini
@ 2016-04-06  8:56 ` Paolo Bonzini
  2016-04-06 15:09   ` Xiao Guangrong
  5 siblings, 1 reply; 23+ messages in thread
From: Paolo Bonzini @ 2016-04-06  8:56 UTC (permalink / raw)
  To: Xiao Guangrong; +Cc: gleb, mtosatti, kvm, linux-kernel



On 25/03/2016 14:19, Xiao Guangrong wrote:
> @@ -193,11 +193,11 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
>  			((pte_access & PT_USER_MASK) << (PFERR_RSVD_BIT - PT_USER_SHIFT));

One more tweak is needed in the line above; pfec - 1 must become pfec &
~1, because you've removed the

	pfec |= PFERR_PRESENT_MASK;

line.  Applied with this change.

Paolo

>  
>  		pkru_bits &= mmu->pkru_mask >> offset;
> -		pfec |= -pkru_bits & PFERR_PK_MASK;
> +		errcode |= -pkru_bits & PFERR_PK_MASK;
>  		fault |= (pkru_bits != 0);
>  	}
>  
> -	return -(uint32_t)fault & pfec;
> +	return -(uint32_t)fault & errcode;
>  }
>  
>  void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm);

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/4] KVM: MMU: fix permission_fault()
  2016-04-06  8:56 ` Paolo Bonzini
@ 2016-04-06 15:09   ` Xiao Guangrong
  0 siblings, 0 replies; 23+ messages in thread
From: Xiao Guangrong @ 2016-04-06 15:09 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: gleb, mtosatti, kvm, linux-kernel



On 04/06/2016 04:56 PM, Paolo Bonzini wrote:
>
>
> On 25/03/2016 14:19, Xiao Guangrong wrote:
>> @@ -193,11 +193,11 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
>>   			((pte_access & PT_USER_MASK) << (PFERR_RSVD_BIT - PT_USER_SHIFT));
>
> One more tweak is needed in the line above; pfec - 1 must become pfec &
> ~1, because you've removed the
>
> 	pfec |= PFERR_PRESENT_MASK;
>
> line.  Applied with this change.

Yes, indeed. Thanks for your fix.

^ permalink raw reply	[flat|nested] 23+ messages in thread

end of thread, other threads:[~2016-04-06 15:11 UTC | newest]

Thread overview: 23+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-03-25 13:19 [PATCH 1/4] KVM: MMU: fix permission_fault() Xiao Guangrong
2016-03-25 13:19 ` [PATCH 2/4] KVM: MMU: simplify the logic of __mmu_unsync_walk() Xiao Guangrong
2016-03-25 13:19 ` [PATCH 3/4] KVM: MMU: reduce the size of mmu_page_path Xiao Guangrong
2016-03-25 13:45   ` Paolo Bonzini
2016-03-25 13:48     ` Xiao Guangrong
2016-03-25 13:56       ` Paolo Bonzini
2016-03-25 14:07         ` Xiao Guangrong
2016-03-25 14:22           ` Paolo Bonzini
2016-03-25 13:19 ` [PATCH 4/4] KVM: MMU: skip obsolete sp in for_each_gfn_*() Xiao Guangrong
2016-03-29  9:44   ` Paolo Bonzini
2016-03-25 13:35 ` [PATCH 1/4] KVM: MMU: fix permission_fault() Paolo Bonzini
2016-03-25 13:41   ` Xiao Guangrong
2016-03-25 13:50     ` Paolo Bonzini
2016-03-25 14:21 ` Paolo Bonzini
2016-03-29 17:43   ` Xiao Guangrong
2016-03-29 20:09     ` Paolo Bonzini
2016-03-30  1:56       ` Xiao Guangrong
2016-03-30  6:36         ` Paolo Bonzini
2016-03-30  6:39           ` Xiao Guangrong
2016-04-06  3:27             ` Xiao Guangrong
2016-04-06  8:17               ` Paolo Bonzini
2016-04-06  8:56 ` Paolo Bonzini
2016-04-06 15:09   ` Xiao Guangrong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).