[PATCH 0/3] KVM: x86: sync SPTEs on page/EPT fault injection

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH 0/3] KVM: x86: sync SPTEs on page/EPT fault injection
@ 2020-03-26  9:35 Paolo Bonzini
  2020-03-26  9:35 ` [PATCH 1/3] KVM: x86: introduce kvm_mmu_invalidate_gva Paolo Bonzini
                   ` (2 more replies)
  0 siblings, 3 replies; 14+ messages in thread
From: Paolo Bonzini @ 2020-03-26  9:35 UTC (permalink / raw)
  To: linux-kernel, kvm; +Cc: Junaid Shahid, Sean Christopherson, Vitaly Kuznetsov

This is my take on Junaid and Sean's patch, from the TLB cleanup series.
It passes initial tests, including my usual guest installation and
kvm-unit-tests battery (with both ept=0 and ept=1), but I'm not sure if
there's anything that isn't covered by kvm-unit-tests, especially for
nested.  I have not yet run guest installation tests under nested
virt but I will before merging the whole TLB cleanup series.

Please review!

Junaid Shahid (1):
  KVM: x86: Sync SPTEs when injecting page/EPT fault into L1

Paolo Bonzini (2):
  KVM: x86: introduce kvm_mmu_invalidate_gva
  KVM: x86: cleanup kvm_inject_emulated_page_fault

 arch/x86/include/asm/kvm_host.h |  2 +
 arch/x86/kvm/mmu/mmu.c          | 77 +++++++++++++++++++--------------
 arch/x86/kvm/mmu/paging_tmpl.h  |  2 +-
 arch/x86/kvm/vmx/nested.c       | 12 ++---
 arch/x86/kvm/vmx/vmx.c          |  2 +-
 arch/x86/kvm/x86.c              | 16 +++++--
 6 files changed, 67 insertions(+), 44 deletions(-)

-- 
2.18.2


^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 1/3] KVM: x86: introduce kvm_mmu_invalidate_gva
  2020-03-26  9:35 [PATCH 0/3] KVM: x86: sync SPTEs on page/EPT fault injection Paolo Bonzini
@ 2020-03-26  9:35 ` Paolo Bonzini
  2020-03-28 18:26   ` Sean Christopherson
  2020-03-26  9:35 ` [PATCH 2/3] KVM: x86: cleanup kvm_inject_emulated_page_fault Paolo Bonzini
  2020-03-26  9:35 ` [PATCH 3/3] KVM: x86: Sync SPTEs when injecting page/EPT fault into L1 Paolo Bonzini
  2 siblings, 1 reply; 14+ messages in thread
From: Paolo Bonzini @ 2020-03-26  9:35 UTC (permalink / raw)
  To: linux-kernel, kvm; +Cc: Junaid Shahid, Sean Christopherson, Vitaly Kuznetsov

Wrap the combination of mmu->invlpg and kvm_x86_ops->tlb_flush_gva
into a new function.  This function also lets us specify the host PGD to
invalidate and also the MMU, both of which will be useful in fixing and
simplifying kvm_inject_emulated_page_fault.

A nested guest's MMU however has g_context->invlpg == NULL.  Instead of
setting it to nonpaging_invlpg, make kvm_mmu_invalidate_gva the only
entry point to mmu->invlpg and make a NULL invlpg pointer equivalent
to nonpaging_invlpg, saving a retpoline.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  2 +
 arch/x86/kvm/mmu/mmu.c          | 71 +++++++++++++++++++++------------
 2 files changed, 47 insertions(+), 26 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 328b1765ff76..f6a1ece1bb4a 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1506,6 +1506,8 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
 int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code,
 		       void *insn, int insn_len);
 void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva);
+void kvm_mmu_invalidate_gva(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+		            gva_t gva, unsigned long root_hpa);
 void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid);
 void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t new_cr3, bool skip_tlb_flush);
 
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 560e85ebdf22..e26c9a583e75 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -2153,10 +2153,6 @@ static int nonpaging_sync_page(struct kvm_vcpu *vcpu,
 	return 0;
 }
 
-static void nonpaging_invlpg(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root)
-{
-}
-
 static void nonpaging_update_pte(struct kvm_vcpu *vcpu,
 				 struct kvm_mmu_page *sp, u64 *spte,
 				 const void *pte)
@@ -4237,7 +4233,7 @@ static void nonpaging_init_context(struct kvm_vcpu *vcpu,
 	context->page_fault = nonpaging_page_fault;
 	context->gva_to_gpa = nonpaging_gva_to_gpa;
 	context->sync_page = nonpaging_sync_page;
-	context->invlpg = nonpaging_invlpg;
+	context->invlpg = NULL;
 	context->update_pte = nonpaging_update_pte;
 	context->root_level = 0;
 	context->shadow_root_level = PT32E_ROOT_LEVEL;
@@ -4928,7 +4924,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
 	context->mmu_role.as_u64 = new_role.as_u64;
 	context->page_fault = kvm_tdp_page_fault;
 	context->sync_page = nonpaging_sync_page;
-	context->invlpg = nonpaging_invlpg;
+	context->invlpg = NULL;
 	context->update_pte = nonpaging_update_pte;
 	context->shadow_root_level = kvm_x86_ops->get_tdp_level(vcpu);
 	context->direct_map = true;
@@ -5096,6 +5092,12 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
 	g_context->get_pdptr         = kvm_pdptr_read;
 	g_context->inject_page_fault = kvm_inject_page_fault;
 
+	/*
+	 * L2 page tables are never shadowed, so there is no need to sync
+	 * SPTEs.
+	 */
+	g_context->invlpg            = NULL;
+
 	/*
 	 * Note that arch.mmu->gva_to_gpa translates l2_gpa to l1_gpa using
 	 * L1's nested page tables (e.g. EPT12). The nested translation
@@ -5497,37 +5499,54 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code,
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);
 
-void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
+void kvm_mmu_invalidate_gva(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+		            gva_t gva, unsigned long root_hpa)
 {
-	struct kvm_mmu *mmu = vcpu->arch.mmu;
 	int i;
 
-	/* INVLPG on a * non-canonical address is a NOP according to the SDM.  */
-	if (is_noncanonical_address(gva, vcpu))
+	/* It's actually a GPA for vcpu->arch.guest_mmu.  */
+	if (mmu != &vcpu->arch.guest_mmu) {
+		/* INVLPG on a non-canonical address is a NOP according to the SDM.  */
+		if (is_noncanonical_address(gva, vcpu))
+			return;
+
+		kvm_x86_ops->tlb_flush_gva(vcpu, gva);
+	}
+
+	if (!mmu->invlpg)
 		return;
 
-	mmu->invlpg(vcpu, gva, mmu->root_hpa);
+	if (root_hpa == INVALID_PAGE) {
+		mmu->invlpg(vcpu, gva, mmu->root_hpa);
 
-	/*
-	 * INVLPG is required to invalidate any global mappings for the VA,
-	 * irrespective of PCID. Since it would take us roughly similar amount
-	 * of work to determine whether any of the prev_root mappings of the VA
-	 * is marked global, or to just sync it blindly, so we might as well
-	 * just always sync it.
-	 *
-	 * Mappings not reachable via the current cr3 or the prev_roots will be
-	 * synced when switching to that cr3, so nothing needs to be done here
-	 * for them.
-	 */
-	for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
-		if (VALID_PAGE(mmu->prev_roots[i].hpa))
-			mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa);
+		/*
+		 * INVLPG is required to invalidate any global mappings for the VA,
+		 * irrespective of PCID. Since it would take us roughly similar amount
+		 * of work to determine whether any of the prev_root mappings of the VA
+		 * is marked global, or to just sync it blindly, so we might as well
+		 * just always sync it.
+		 *
+		 * Mappings not reachable via the current cr3 or the prev_roots will be
+		 * synced when switching to that cr3, so nothing needs to be done here
+		 * for them.
+		 */
+		for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
+			if (VALID_PAGE(mmu->prev_roots[i].hpa))
+				mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa);
+	} else {
+		mmu->invlpg(vcpu, gva, root_hpa);
+	}
+}
+EXPORT_SYMBOL_GPL(kvm_mmu_invalidate_gva);
 
-	kvm_x86_ops->tlb_flush_gva(vcpu, gva);
+void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
+{
+	kvm_mmu_invalidate_gva(vcpu, vcpu->arch.mmu, gva, INVALID_PAGE);
 	++vcpu->stat.invlpg;
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_invlpg);
 
+
 void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid)
 {
 	struct kvm_mmu *mmu = vcpu->arch.mmu;
-- 
2.18.2



^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH 1/3] KVM: x86: introduce kvm_mmu_invalidate_gva
  2020-03-26  9:35 ` [PATCH 1/3] KVM: x86: introduce kvm_mmu_invalidate_gva Paolo Bonzini
@ 2020-03-28 18:26   ` Sean Christopherson
  2020-03-30 10:45     ` Paolo Bonzini
  0 siblings, 1 reply; 14+ messages in thread
From: Sean Christopherson @ 2020-03-28 18:26 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: linux-kernel, kvm, Junaid Shahid, Vitaly Kuznetsov

On Thu, Mar 26, 2020 at 05:35:14AM -0400, Paolo Bonzini wrote:
> Wrap the combination of mmu->invlpg and kvm_x86_ops->tlb_flush_gva
> into a new function.  This function also lets us specify the host PGD to
> invalidate and also the MMU, both of which will be useful in fixing and
> simplifying kvm_inject_emulated_page_fault.
> 
> A nested guest's MMU however has g_context->invlpg == NULL.  Instead of
> setting it to nonpaging_invlpg, make kvm_mmu_invalidate_gva the only
> entry point to mmu->invlpg and make a NULL invlpg pointer equivalent
> to nonpaging_invlpg, saving a retpoline.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  arch/x86/include/asm/kvm_host.h |  2 +
>  arch/x86/kvm/mmu/mmu.c          | 71 +++++++++++++++++++++------------
>  2 files changed, 47 insertions(+), 26 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 328b1765ff76..f6a1ece1bb4a 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1506,6 +1506,8 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
>  int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code,
>  		       void *insn, int insn_len);
>  void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva);
> +void kvm_mmu_invalidate_gva(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
> +		            gva_t gva, unsigned long root_hpa);
>  void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid);
>  void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t new_cr3, bool skip_tlb_flush);
>  
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index 560e85ebdf22..e26c9a583e75 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -2153,10 +2153,6 @@ static int nonpaging_sync_page(struct kvm_vcpu *vcpu,
>  	return 0;
>  }
>  
> -static void nonpaging_invlpg(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root)
> -{
> -}
> -
>  static void nonpaging_update_pte(struct kvm_vcpu *vcpu,
>  				 struct kvm_mmu_page *sp, u64 *spte,
>  				 const void *pte)
> @@ -4237,7 +4233,7 @@ static void nonpaging_init_context(struct kvm_vcpu *vcpu,
>  	context->page_fault = nonpaging_page_fault;
>  	context->gva_to_gpa = nonpaging_gva_to_gpa;
>  	context->sync_page = nonpaging_sync_page;
> -	context->invlpg = nonpaging_invlpg;
> +	context->invlpg = NULL;
>  	context->update_pte = nonpaging_update_pte;
>  	context->root_level = 0;
>  	context->shadow_root_level = PT32E_ROOT_LEVEL;
> @@ -4928,7 +4924,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
>  	context->mmu_role.as_u64 = new_role.as_u64;
>  	context->page_fault = kvm_tdp_page_fault;
>  	context->sync_page = nonpaging_sync_page;
> -	context->invlpg = nonpaging_invlpg;
> +	context->invlpg = NULL;
>  	context->update_pte = nonpaging_update_pte;
>  	context->shadow_root_level = kvm_x86_ops->get_tdp_level(vcpu);
>  	context->direct_map = true;
> @@ -5096,6 +5092,12 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
>  	g_context->get_pdptr         = kvm_pdptr_read;
>  	g_context->inject_page_fault = kvm_inject_page_fault;
>  
> +	/*
> +	 * L2 page tables are never shadowed, so there is no need to sync
> +	 * SPTEs.
> +	 */
> +	g_context->invlpg            = NULL;
> +
>  	/*
>  	 * Note that arch.mmu->gva_to_gpa translates l2_gpa to l1_gpa using
>  	 * L1's nested page tables (e.g. EPT12). The nested translation
> @@ -5497,37 +5499,54 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code,
>  }
>  EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);
>  
> -void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
> +void kvm_mmu_invalidate_gva(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
> +		            gva_t gva, unsigned long root_hpa)

As pointed out by the build bot, @root_hpa needs to be hpa_t.

>  {
> -	struct kvm_mmu *mmu = vcpu->arch.mmu;
>  	int i;
>  
> -	/* INVLPG on a * non-canonical address is a NOP according to the SDM.  */
> -	if (is_noncanonical_address(gva, vcpu))
> +	/* It's actually a GPA for vcpu->arch.guest_mmu.  */
> +	if (mmu != &vcpu->arch.guest_mmu) {

Doesn't need to be addressed here, but this is not the first time in this
series (the large TLB flushing series) that I've struggled to parse
"guest_mmu".  Would it make sense to rename it something like nested_tdp_mmu
or l2_tdp_mmu?

A bit ugly, but it'd be nice to avoid the mental challenge of remembering
that guest_mmu is in play if and only if nested TDP is enabled.

> +		/* INVLPG on a non-canonical address is a NOP according to the SDM.  */
> +		if (is_noncanonical_address(gva, vcpu))
> +			return;
> +
> +		kvm_x86_ops->tlb_flush_gva(vcpu, gva);
> +	}
> +
> +	if (!mmu->invlpg)
>  		return;
>  
> -	mmu->invlpg(vcpu, gva, mmu->root_hpa);
> +	if (root_hpa == INVALID_PAGE) {
> +		mmu->invlpg(vcpu, gva, mmu->root_hpa);
>  
> -	/*
> -	 * INVLPG is required to invalidate any global mappings for the VA,
> -	 * irrespective of PCID. Since it would take us roughly similar amount
> -	 * of work to determine whether any of the prev_root mappings of the VA
> -	 * is marked global, or to just sync it blindly, so we might as well
> -	 * just always sync it.
> -	 *
> -	 * Mappings not reachable via the current cr3 or the prev_roots will be
> -	 * synced when switching to that cr3, so nothing needs to be done here
> -	 * for them.
> -	 */
> -	for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
> -		if (VALID_PAGE(mmu->prev_roots[i].hpa))
> -			mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa);
> +		/*
> +		 * INVLPG is required to invalidate any global mappings for the VA,
> +		 * irrespective of PCID. Since it would take us roughly similar amount
> +		 * of work to determine whether any of the prev_root mappings of the VA
> +		 * is marked global, or to just sync it blindly, so we might as well
> +		 * just always sync it.
> +		 *
> +		 * Mappings not reachable via the current cr3 or the prev_roots will be
> +		 * synced when switching to that cr3, so nothing needs to be done here
> +		 * for them.
> +		 */
> +		for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
> +			if (VALID_PAGE(mmu->prev_roots[i].hpa))
> +				mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa);
> +	} else {
> +		mmu->invlpg(vcpu, gva, root_hpa);
> +	}
> +}
> +EXPORT_SYMBOL_GPL(kvm_mmu_invalidate_gva);
>  
> -	kvm_x86_ops->tlb_flush_gva(vcpu, gva);
> +void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
> +{
> +	kvm_mmu_invalidate_gva(vcpu, vcpu->arch.mmu, gva, INVALID_PAGE);
>  	++vcpu->stat.invlpg;
>  }
>  EXPORT_SYMBOL_GPL(kvm_mmu_invlpg);
>  
> +
>  void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid)
>  {
>  	struct kvm_mmu *mmu = vcpu->arch.mmu;
> -- 
> 2.18.2
> 
> 

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 1/3] KVM: x86: introduce kvm_mmu_invalidate_gva
  2020-03-28 18:26   ` Sean Christopherson
@ 2020-03-30 10:45     ` Paolo Bonzini
  2020-03-30 18:47       ` Sean Christopherson
  0 siblings, 1 reply; 14+ messages in thread
From: Paolo Bonzini @ 2020-03-30 10:45 UTC (permalink / raw)
  To: Sean Christopherson; +Cc: linux-kernel, kvm, Junaid Shahid, Vitaly Kuznetsov

On 28/03/20 19:26, Sean Christopherson wrote:
>> +	if (mmu != &vcpu->arch.guest_mmu) {
> Doesn't need to be addressed here, but this is not the first time in this
> series (the large TLB flushing series) that I've struggled to parse
> "guest_mmu".  Would it make sense to rename it something like nested_tdp_mmu
> or l2_tdp_mmu?
> 
> A bit ugly, but it'd be nice to avoid the mental challenge of remembering
> that guest_mmu is in play if and only if nested TDP is enabled.

No, it's not ugly at all.  My vote would be for shadow_tdp_mmu.

Paolo


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 1/3] KVM: x86: introduce kvm_mmu_invalidate_gva
  2020-03-30 10:45     ` Paolo Bonzini
@ 2020-03-30 18:47       ` Sean Christopherson
  2020-03-31 10:33         ` Vitaly Kuznetsov
  0 siblings, 1 reply; 14+ messages in thread
From: Sean Christopherson @ 2020-03-30 18:47 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: linux-kernel, kvm, Junaid Shahid, Vitaly Kuznetsov

On Mon, Mar 30, 2020 at 12:45:34PM +0200, Paolo Bonzini wrote:
> On 28/03/20 19:26, Sean Christopherson wrote:
> >> +	if (mmu != &vcpu->arch.guest_mmu) {
> > Doesn't need to be addressed here, but this is not the first time in this
> > series (the large TLB flushing series) that I've struggled to parse
> > "guest_mmu".  Would it make sense to rename it something like nested_tdp_mmu
> > or l2_tdp_mmu?
> > 
> > A bit ugly, but it'd be nice to avoid the mental challenge of remembering
> > that guest_mmu is in play if and only if nested TDP is enabled.
> 
> No, it's not ugly at all.  My vote would be for shadow_tdp_mmu.

Works for me.  My vote is for anything other than guest_mmu :-)

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 1/3] KVM: x86: introduce kvm_mmu_invalidate_gva
  2020-03-30 18:47       ` Sean Christopherson
@ 2020-03-31 10:33         ` Vitaly Kuznetsov
  2020-03-31 12:16           ` Paolo Bonzini
  0 siblings, 1 reply; 14+ messages in thread
From: Vitaly Kuznetsov @ 2020-03-31 10:33 UTC (permalink / raw)
  To: Sean Christopherson, Paolo Bonzini; +Cc: linux-kernel, kvm, Junaid Shahid

Sean Christopherson <sean.j.christopherson@intel.com> writes:

> On Mon, Mar 30, 2020 at 12:45:34PM +0200, Paolo Bonzini wrote:
>> On 28/03/20 19:26, Sean Christopherson wrote:
>> >> +	if (mmu != &vcpu->arch.guest_mmu) {
>> > Doesn't need to be addressed here, but this is not the first time in this
>> > series (the large TLB flushing series) that I've struggled to parse
>> > "guest_mmu".  Would it make sense to rename it something like nested_tdp_mmu
>> > or l2_tdp_mmu?
>> > 
>> > A bit ugly, but it'd be nice to avoid the mental challenge of remembering
>> > that guest_mmu is in play if and only if nested TDP is enabled.
>> 
>> No, it's not ugly at all.  My vote would be for shadow_tdp_mmu.
>
> Works for me.  My vote is for anything other than guest_mmu :-)
>

Oh come on guys, nobody protested when I called it this way :-)

Peronally, I don't quite like 'shadow_tdp_mmu' because it doesn't have
any particular reference to the fact that it is a nested/L2 related
thing (maybe it's just a shadow MMU?) Also, we already have a thing
called 'nested_mmu'... Maybe let's be bold and rename all three things,
like

root_mmu -> l1_mmu
guest_mmu -> l1_nested_mmu
nested_mmu -> l2_mmu (l2_walk_mmu)

or something like that?

-- 
Vitaly


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 1/3] KVM: x86: introduce kvm_mmu_invalidate_gva
  2020-03-31 10:33         ` Vitaly Kuznetsov
@ 2020-03-31 12:16           ` Paolo Bonzini
  0 siblings, 0 replies; 14+ messages in thread
From: Paolo Bonzini @ 2020-03-31 12:16 UTC (permalink / raw)
  To: Vitaly Kuznetsov, Sean Christopherson; +Cc: linux-kernel, kvm, Junaid Shahid

On 31/03/20 12:33, Vitaly Kuznetsov wrote:
>> Works for me.  My vote is for anything other than guest_mmu :-)
>
> Oh come on guys, nobody protested when I called it this way :-)

Sure I take full responsibility for that. :)

> Peronally, I don't quite like 'shadow_tdp_mmu' because it doesn't have
> any particular reference to the fact that it is a nested/L2 related
> thing (maybe it's just a shadow MMU?)

Well, nested virt is the only case in which you shadow TDP.  Both
interpretations work:

* "shadow tdp_mmu": an MMU for two-dimensional page tables that employs
shadowing

* "shadow_tdp MMU": the MMU for two-dimensional page tables.

> Also, we already have a thing
> called 'nested_mmu'... Maybe let's be bold and rename all three things,
> like
>
> root_mmu -> l1_mmu
> guest_mmu -> l1_nested_mmu
> nested_mmu -> l2_mmu (l2_walk_mmu)

I am not particularly fond of using l1/l2 outside code that specifically
deals with nested virt.  Also, l1_nested_mmu is too confusing with
respect to the current nested_mmu (likewise for root_mmu I would rename
it to guest_mmu but it would be an awful source of mental confusion as
well as semantic source code conflicts).

That said, I wouldn't mind replacing nested_mmu to something else, for
example nested_walk_mmu.

Paolo

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 2/3] KVM: x86: cleanup kvm_inject_emulated_page_fault
  2020-03-26  9:35 [PATCH 0/3] KVM: x86: sync SPTEs on page/EPT fault injection Paolo Bonzini
  2020-03-26  9:35 ` [PATCH 1/3] KVM: x86: introduce kvm_mmu_invalidate_gva Paolo Bonzini
@ 2020-03-26  9:35 ` Paolo Bonzini
  2020-03-26 13:41   ` Vitaly Kuznetsov
  2020-03-28 18:41   ` Sean Christopherson
  2020-03-26  9:35 ` [PATCH 3/3] KVM: x86: Sync SPTEs when injecting page/EPT fault into L1 Paolo Bonzini
  2 siblings, 2 replies; 14+ messages in thread
From: Paolo Bonzini @ 2020-03-26  9:35 UTC (permalink / raw)
  To: linux-kernel, kvm; +Cc: Junaid Shahid, Sean Christopherson, Vitaly Kuznetsov

To reconstruct the kvm_mmu to be used for page fault injection, we
can simply use fault->nested_page_fault.  This matches how
fault->nested_page_fault is assigned in the first place by
FNAME(walk_addr_generic).

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c         | 6 ------
 arch/x86/kvm/mmu/paging_tmpl.h | 2 +-
 arch/x86/kvm/x86.c             | 7 +++----
 3 files changed, 4 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index e26c9a583e75..6250e31ac617 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4353,12 +4353,6 @@ static unsigned long get_cr3(struct kvm_vcpu *vcpu)
 	return kvm_read_cr3(vcpu);
 }
 
-static void inject_page_fault(struct kvm_vcpu *vcpu,
-			      struct x86_exception *fault)
-{
-	vcpu->arch.mmu->inject_page_fault(vcpu, fault);
-}
-
 static bool sync_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn,
 			   unsigned int access, int *nr_present)
 {
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index 1ddbfff64ccc..ae646acf6703 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -812,7 +812,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gpa_t addr, u32 error_code,
 	if (!r) {
 		pgprintk("%s: guest page fault\n", __func__);
 		if (!prefault)
-			inject_page_fault(vcpu, &walker.fault);
+			kvm_inject_emulated_page_fault(vcpu, &walker.fault);
 
 		return RET_PF_RETRY;
 	}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 64ed6e6e2b56..522905523bf0 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -614,12 +614,11 @@ EXPORT_SYMBOL_GPL(kvm_inject_page_fault);
 bool kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu,
 				    struct x86_exception *fault)
 {
+	struct kvm_mmu *fault_mmu;
 	WARN_ON_ONCE(fault->vector != PF_VECTOR);
 
-	if (mmu_is_nested(vcpu) && !fault->nested_page_fault)
-		vcpu->arch.nested_mmu.inject_page_fault(vcpu, fault);
-	else
-		vcpu->arch.mmu->inject_page_fault(vcpu, fault);
+	fault_mmu = fault->nested_page_fault ? vcpu->arch.mmu : vcpu->arch.walk_mmu;
+	fault_mmu->inject_page_fault(vcpu, fault);
 
 	return fault->nested_page_fault;
 }
-- 
2.18.2



^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/3] KVM: x86: cleanup kvm_inject_emulated_page_fault
  2020-03-26  9:35 ` [PATCH 2/3] KVM: x86: cleanup kvm_inject_emulated_page_fault Paolo Bonzini
@ 2020-03-26 13:41   ` Vitaly Kuznetsov
  2020-03-26 19:45     ` Paolo Bonzini
  2020-03-28 18:41   ` Sean Christopherson
  1 sibling, 1 reply; 14+ messages in thread
From: Vitaly Kuznetsov @ 2020-03-26 13:41 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: Junaid Shahid, Sean Christopherson, linux-kernel, kvm

Paolo Bonzini <pbonzini@redhat.com> writes:

> To reconstruct the kvm_mmu to be used for page fault injection, we
> can simply use fault->nested_page_fault.  This matches how
> fault->nested_page_fault is assigned in the first place by
> FNAME(walk_addr_generic).
>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  arch/x86/kvm/mmu/mmu.c         | 6 ------
>  arch/x86/kvm/mmu/paging_tmpl.h | 2 +-
>  arch/x86/kvm/x86.c             | 7 +++----
>  3 files changed, 4 insertions(+), 11 deletions(-)
>
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index e26c9a583e75..6250e31ac617 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -4353,12 +4353,6 @@ static unsigned long get_cr3(struct kvm_vcpu *vcpu)
>  	return kvm_read_cr3(vcpu);
>  }
>  
> -static void inject_page_fault(struct kvm_vcpu *vcpu,
> -			      struct x86_exception *fault)
> -{
> -	vcpu->arch.mmu->inject_page_fault(vcpu, fault);
> -}
> -

This is already gone with Sean's "KVM: x86: Consolidate logic for
injecting page faults to L1".

It would probably make sense to have a combined series (or a branch on
kvm.git) to simplify testing efforts.

>  static bool sync_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn,
>  			   unsigned int access, int *nr_present)
>  {
> diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
> index 1ddbfff64ccc..ae646acf6703 100644
> --- a/arch/x86/kvm/mmu/paging_tmpl.h
> +++ b/arch/x86/kvm/mmu/paging_tmpl.h
> @@ -812,7 +812,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gpa_t addr, u32 error_code,
>  	if (!r) {
>  		pgprintk("%s: guest page fault\n", __func__);
>  		if (!prefault)
> -			inject_page_fault(vcpu, &walker.fault);
> +			kvm_inject_emulated_page_fault(vcpu, &walker.fault);
>  
>  		return RET_PF_RETRY;
>  	}
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 64ed6e6e2b56..522905523bf0 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -614,12 +614,11 @@ EXPORT_SYMBOL_GPL(kvm_inject_page_fault);
>  bool kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu,
>  				    struct x86_exception *fault)
>  {
> +	struct kvm_mmu *fault_mmu;
>  	WARN_ON_ONCE(fault->vector != PF_VECTOR);
>  
> -	if (mmu_is_nested(vcpu) && !fault->nested_page_fault)
> -		vcpu->arch.nested_mmu.inject_page_fault(vcpu, fault);
> -	else
> -		vcpu->arch.mmu->inject_page_fault(vcpu, fault);
> +	fault_mmu = fault->nested_page_fault ? vcpu->arch.mmu : vcpu->arch.walk_mmu;
> +	fault_mmu->inject_page_fault(vcpu, fault);
>  
>  	return fault->nested_page_fault;
>  }

-- 
Vitaly


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/3] KVM: x86: cleanup kvm_inject_emulated_page_fault
  2020-03-26 13:41   ` Vitaly Kuznetsov
@ 2020-03-26 19:45     ` Paolo Bonzini
  2020-03-27 12:48       ` Vitaly Kuznetsov
  0 siblings, 1 reply; 14+ messages in thread
From: Paolo Bonzini @ 2020-03-26 19:45 UTC (permalink / raw)
  To: Vitaly Kuznetsov; +Cc: Junaid Shahid, Sean Christopherson, linux-kernel, kvm

On 26/03/20 14:41, Vitaly Kuznetsov wrote:
> Paolo Bonzini <pbonzini@redhat.com> writes:
> 
>> To reconstruct the kvm_mmu to be used for page fault injection, we
>> can simply use fault->nested_page_fault.  This matches how
>> fault->nested_page_fault is assigned in the first place by
>> FNAME(walk_addr_generic).
>>
>> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
>> ---
>>  arch/x86/kvm/mmu/mmu.c         | 6 ------
>>  arch/x86/kvm/mmu/paging_tmpl.h | 2 +-
>>  arch/x86/kvm/x86.c             | 7 +++----
>>  3 files changed, 4 insertions(+), 11 deletions(-)
>>
>> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
>> index e26c9a583e75..6250e31ac617 100644
>> --- a/arch/x86/kvm/mmu/mmu.c
>> +++ b/arch/x86/kvm/mmu/mmu.c
>> @@ -4353,12 +4353,6 @@ static unsigned long get_cr3(struct kvm_vcpu *vcpu)
>>  	return kvm_read_cr3(vcpu);
>>  }
>>  
>> -static void inject_page_fault(struct kvm_vcpu *vcpu,
>> -			      struct x86_exception *fault)
>> -{
>> -	vcpu->arch.mmu->inject_page_fault(vcpu, fault);
>> -}
>> -
> 
> This is already gone with Sean's "KVM: x86: Consolidate logic for
> injecting page faults to L1".
> 
> It would probably make sense to have a combined series (or a branch on
> kvm.git) to simplify testing efforts.

Yes, these three patches replace part of Sean's (the patch you mention
and the next one, "KVM: x86: Sync SPTEs when injecting page/EPT fault
into L1").

I pushed the result to a branch named kvm-tlb-cleanup on kvm.git.

Paolo


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/3] KVM: x86: cleanup kvm_inject_emulated_page_fault
  2020-03-26 19:45     ` Paolo Bonzini
@ 2020-03-27 12:48       ` Vitaly Kuznetsov
  0 siblings, 0 replies; 14+ messages in thread
From: Vitaly Kuznetsov @ 2020-03-27 12:48 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: Junaid Shahid, Sean Christopherson, linux-kernel, kvm

Paolo Bonzini <pbonzini@redhat.com> writes:

> On 26/03/20 14:41, Vitaly Kuznetsov wrote:
>> Paolo Bonzini <pbonzini@redhat.com> writes:
>> 
>>> To reconstruct the kvm_mmu to be used for page fault injection, we
>>> can simply use fault->nested_page_fault.  This matches how
>>> fault->nested_page_fault is assigned in the first place by
>>> FNAME(walk_addr_generic).
>>>
>>> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
>>> ---
>>>  arch/x86/kvm/mmu/mmu.c         | 6 ------
>>>  arch/x86/kvm/mmu/paging_tmpl.h | 2 +-
>>>  arch/x86/kvm/x86.c             | 7 +++----
>>>  3 files changed, 4 insertions(+), 11 deletions(-)
>>>
>>> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
>>> index e26c9a583e75..6250e31ac617 100644
>>> --- a/arch/x86/kvm/mmu/mmu.c
>>> +++ b/arch/x86/kvm/mmu/mmu.c
>>> @@ -4353,12 +4353,6 @@ static unsigned long get_cr3(struct kvm_vcpu *vcpu)
>>>  	return kvm_read_cr3(vcpu);
>>>  }
>>>  
>>> -static void inject_page_fault(struct kvm_vcpu *vcpu,
>>> -			      struct x86_exception *fault)
>>> -{
>>> -	vcpu->arch.mmu->inject_page_fault(vcpu, fault);
>>> -}
>>> -
>> 
>> This is already gone with Sean's "KVM: x86: Consolidate logic for
>> injecting page faults to L1".
>> 
>> It would probably make sense to have a combined series (or a branch on
>> kvm.git) to simplify testing efforts.
>
> Yes, these three patches replace part of Sean's (the patch you mention
> and the next one, "KVM: x86: Sync SPTEs when injecting page/EPT fault
> into L1").
>
> I pushed the result to a branch named kvm-tlb-cleanup on kvm.git.
>

Thank you,

I've tested it with Hyper-V on both VMX and SVM with and without PV TLB
flush and nothing immediately blew up. I'm also observing a very nice
19000 -> 14000 cycles improvement on tight cpuid loop test (with EVMCS
enabled).

-- 
Vitaly


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/3] KVM: x86: cleanup kvm_inject_emulated_page_fault
  2020-03-26  9:35 ` [PATCH 2/3] KVM: x86: cleanup kvm_inject_emulated_page_fault Paolo Bonzini
  2020-03-26 13:41   ` Vitaly Kuznetsov
@ 2020-03-28 18:41   ` Sean Christopherson
  1 sibling, 0 replies; 14+ messages in thread
From: Sean Christopherson @ 2020-03-28 18:41 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: linux-kernel, kvm, Junaid Shahid, Vitaly Kuznetsov

On Thu, Mar 26, 2020 at 05:35:15AM -0400, Paolo Bonzini wrote:
> To reconstruct the kvm_mmu to be used for page fault injection, we
> can simply use fault->nested_page_fault.  This matches how
> fault->nested_page_fault is assigned in the first place by
> FNAME(walk_addr_generic).
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  arch/x86/kvm/mmu/mmu.c         | 6 ------
>  arch/x86/kvm/mmu/paging_tmpl.h | 2 +-
>  arch/x86/kvm/x86.c             | 7 +++----
>  3 files changed, 4 insertions(+), 11 deletions(-)
> 
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index e26c9a583e75..6250e31ac617 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -4353,12 +4353,6 @@ static unsigned long get_cr3(struct kvm_vcpu *vcpu)
>  	return kvm_read_cr3(vcpu);
>  }
>  
> -static void inject_page_fault(struct kvm_vcpu *vcpu,
> -			      struct x86_exception *fault)
> -{
> -	vcpu->arch.mmu->inject_page_fault(vcpu, fault);
> -}
> -
>  static bool sync_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn,
>  			   unsigned int access, int *nr_present)
>  {
> diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
> index 1ddbfff64ccc..ae646acf6703 100644
> --- a/arch/x86/kvm/mmu/paging_tmpl.h
> +++ b/arch/x86/kvm/mmu/paging_tmpl.h
> @@ -812,7 +812,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gpa_t addr, u32 error_code,
>  	if (!r) {
>  		pgprintk("%s: guest page fault\n", __func__);
>  		if (!prefault)
> -			inject_page_fault(vcpu, &walker.fault);
> +			kvm_inject_emulated_page_fault(vcpu, &walker.fault);
>  
>  		return RET_PF_RETRY;
>  	}
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 64ed6e6e2b56..522905523bf0 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -614,12 +614,11 @@ EXPORT_SYMBOL_GPL(kvm_inject_page_fault);
>  bool kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu,
>  				    struct x86_exception *fault)
>  {
> +	struct kvm_mmu *fault_mmu;
>  	WARN_ON_ONCE(fault->vector != PF_VECTOR);
>  
> -	if (mmu_is_nested(vcpu) && !fault->nested_page_fault)
> -		vcpu->arch.nested_mmu.inject_page_fault(vcpu, fault);
> -	else
> -		vcpu->arch.mmu->inject_page_fault(vcpu, fault);
> +	fault_mmu = fault->nested_page_fault ? vcpu->arch.mmu : vcpu->arch.walk_mmu;

Apparently I'm in a nitpicky mood.  IMO, a newline after the colon is
easier to parse

	fault_mmu = fault->nested_page_fault ? vcpu->arch.mmu :
					       vcpu->arch.walk_mmu;

FWIW, I really like that "inject into the nested_mmu if it's not a nested
page fault" logic goes away.  That trips me up every time I look at it.

> +	fault_mmu->inject_page_fault(vcpu, fault);
>  
>  	return fault->nested_page_fault;
>  }
> -- 
> 2.18.2
> 
> 

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 3/3] KVM: x86: Sync SPTEs when injecting page/EPT fault into L1
  2020-03-26  9:35 [PATCH 0/3] KVM: x86: sync SPTEs on page/EPT fault injection Paolo Bonzini
  2020-03-26  9:35 ` [PATCH 1/3] KVM: x86: introduce kvm_mmu_invalidate_gva Paolo Bonzini
  2020-03-26  9:35 ` [PATCH 2/3] KVM: x86: cleanup kvm_inject_emulated_page_fault Paolo Bonzini
@ 2020-03-26  9:35 ` Paolo Bonzini
  2020-03-28 18:29   ` Sean Christopherson
  2 siblings, 1 reply; 14+ messages in thread
From: Paolo Bonzini @ 2020-03-26  9:35 UTC (permalink / raw)
  To: linux-kernel, kvm; +Cc: Junaid Shahid, Sean Christopherson, Vitaly Kuznetsov

From: Junaid Shahid <junaids@google.com>

When injecting a page fault or EPT violation/misconfiguration, KVM is
not syncing any shadow PTEs associated with the faulting address,
including those in previous MMUs that are associated with L1's current
EPTP (in a nested EPT scenario), nor is it flushing any hardware TLB
entries.  All this is done by kvm_mmu_invalidate_gva.

Page faults that are either !PRESENT or RSVD are exempt from the flushing,
as the CPU is not allowed to cache such translations.

Signed-off-by: Junaid Shahid <junaids@google.com>
Co-developed-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Message-Id: <20200320212833.3507-8-sean.j.christopherson@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/nested.c | 12 ++++++------
 arch/x86/kvm/vmx/vmx.c    |  2 +-
 arch/x86/kvm/x86.c        | 11 ++++++++++-
 3 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 2c450b0ba592..1586b1b5ba93 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -4559,7 +4559,7 @@ static int nested_vmx_get_vmptr(struct kvm_vcpu *vcpu, gpa_t *vmpointer)
 		return 1;
 
 	if (kvm_read_guest_virt(vcpu, gva, vmpointer, sizeof(*vmpointer), &e)) {
-		kvm_inject_page_fault(vcpu, &e);
+		kvm_inject_emulated_page_fault(vcpu, &e);
 		return 1;
 	}
 
@@ -4868,7 +4868,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
 			return 1;
 		/* _system ok, nested_vmx_check_permission has verified cpl=0 */
 		if (kvm_write_guest_virt_system(vcpu, gva, &value, len, &e)) {
-			kvm_inject_page_fault(vcpu, &e);
+			kvm_inject_emulated_page_fault(vcpu, &e);
 			return 1;
 		}
 	}
@@ -4942,7 +4942,7 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
 					instr_info, false, len, &gva))
 			return 1;
 		if (kvm_read_guest_virt(vcpu, gva, &value, len, &e)) {
-			kvm_inject_page_fault(vcpu, &e);
+			kvm_inject_emulated_page_fault(vcpu, &e);
 			return 1;
 		}
 	}
@@ -5107,7 +5107,7 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu)
 	/* *_system ok, nested_vmx_check_permission has verified cpl=0 */
 	if (kvm_write_guest_virt_system(vcpu, gva, (void *)&current_vmptr,
 					sizeof(gpa_t), &e)) {
-		kvm_inject_page_fault(vcpu, &e);
+		kvm_inject_emulated_page_fault(vcpu, &e);
 		return 1;
 	}
 	return nested_vmx_succeed(vcpu);
@@ -5151,7 +5151,7 @@ static int handle_invept(struct kvm_vcpu *vcpu)
 			vmx_instruction_info, false, sizeof(operand), &gva))
 		return 1;
 	if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) {
-		kvm_inject_page_fault(vcpu, &e);
+		kvm_inject_emulated_page_fault(vcpu, &e);
 		return 1;
 	}
 
@@ -5219,7 +5219,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
 			vmx_instruction_info, false, sizeof(operand), &gva))
 		return 1;
 	if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) {
-		kvm_inject_page_fault(vcpu, &e);
+		kvm_inject_emulated_page_fault(vcpu, &e);
 		return 1;
 	}
 	if (operand.vpid >> 16)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 07299a957d4a..c944726b3c0c 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -5404,7 +5404,7 @@ static int handle_invpcid(struct kvm_vcpu *vcpu)
 		return 1;
 
 	if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) {
-		kvm_inject_page_fault(vcpu, &e);
+		kvm_inject_emulated_page_fault(vcpu, &e);
 		return 1;
 	}
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 522905523bf0..dbca6c3bd0db 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -618,8 +618,17 @@ bool kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu,
 	WARN_ON_ONCE(fault->vector != PF_VECTOR);
 
 	fault_mmu = fault->nested_page_fault ? vcpu->arch.mmu : vcpu->arch.walk_mmu;
-	fault_mmu->inject_page_fault(vcpu, fault);
 
+	/*
+	 * Invalidate the TLB entry for the faulting address, if it exists,
+	 * else the access will fault indefinitely (and to emulate hardware).
+	 */
+	if ((fault->error_code & PFERR_PRESENT_MASK)
+	    && !(fault->error_code & PFERR_RSVD_MASK))
+		kvm_mmu_invalidate_gva(vcpu, fault_mmu,
+				       fault->address, fault_mmu->root_hpa);
+
+	fault_mmu->inject_page_fault(vcpu, fault);
 	return fault->nested_page_fault;
 }
 EXPORT_SYMBOL_GPL(kvm_inject_emulated_page_fault);
-- 
2.18.2


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH 3/3] KVM: x86: Sync SPTEs when injecting page/EPT fault into L1
  2020-03-26  9:35 ` [PATCH 3/3] KVM: x86: Sync SPTEs when injecting page/EPT fault into L1 Paolo Bonzini
@ 2020-03-28 18:29   ` Sean Christopherson
  0 siblings, 0 replies; 14+ messages in thread
From: Sean Christopherson @ 2020-03-28 18:29 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: linux-kernel, kvm, Junaid Shahid, Vitaly Kuznetsov

On Thu, Mar 26, 2020 at 05:35:16AM -0400, Paolo Bonzini wrote:
> From: Junaid Shahid <junaids@google.com>
> 
> When injecting a page fault or EPT violation/misconfiguration, KVM is
> not syncing any shadow PTEs associated with the faulting address,
> including those in previous MMUs that are associated with L1's current
> EPTP (in a nested EPT scenario), nor is it flushing any hardware TLB
> entries.  All this is done by kvm_mmu_invalidate_gva.
> 
> Page faults that are either !PRESENT or RSVD are exempt from the flushing,
> as the CPU is not allowed to cache such translations.
> 
> Signed-off-by: Junaid Shahid <junaids@google.com>
> Co-developed-by: Sean Christopherson <sean.j.christopherson@intel.com>
> Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
> Message-Id: <20200320212833.3507-8-sean.j.christopherson@intel.com>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  arch/x86/kvm/vmx/nested.c | 12 ++++++------
>  arch/x86/kvm/vmx/vmx.c    |  2 +-
>  arch/x86/kvm/x86.c        | 11 ++++++++++-
>  3 files changed, 17 insertions(+), 8 deletions(-)
> 
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 522905523bf0..dbca6c3bd0db 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -618,8 +618,17 @@ bool kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu,
>  	WARN_ON_ONCE(fault->vector != PF_VECTOR);
>  
>  	fault_mmu = fault->nested_page_fault ? vcpu->arch.mmu : vcpu->arch.walk_mmu;
> -	fault_mmu->inject_page_fault(vcpu, fault);
>  
> +	/*
> +	 * Invalidate the TLB entry for the faulting address, if it exists,
> +	 * else the access will fault indefinitely (and to emulate hardware).
> +	 */
> +	if ((fault->error_code & PFERR_PRESENT_MASK)
> +	    && !(fault->error_code & PFERR_RSVD_MASK))

What kind of heathen puts && on the new line?  :-D

> +		kvm_mmu_invalidate_gva(vcpu, fault_mmu,
> +				       fault->address, fault_mmu->root_hpa);

Another nit, why have the new line after fault_mmu?  I.e.

		kvm_mmu_invalidate_gva(vcpu, fault_mmu, fault->address,
				       fault_mmu->root_hpa);


> +
> +	fault_mmu->inject_page_fault(vcpu, fault);
>  	return fault->nested_page_fault;
>  }
>  EXPORT_SYMBOL_GPL(kvm_inject_emulated_page_fault);
> -- 
> 2.18.2
> 

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2020-03-31 12:16 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-03-26  9:35 [PATCH 0/3] KVM: x86: sync SPTEs on page/EPT fault injection Paolo Bonzini
2020-03-26  9:35 ` [PATCH 1/3] KVM: x86: introduce kvm_mmu_invalidate_gva Paolo Bonzini
2020-03-28 18:26   ` Sean Christopherson
2020-03-30 10:45     ` Paolo Bonzini
2020-03-30 18:47       ` Sean Christopherson
2020-03-31 10:33         ` Vitaly Kuznetsov
2020-03-31 12:16           ` Paolo Bonzini
2020-03-26  9:35 ` [PATCH 2/3] KVM: x86: cleanup kvm_inject_emulated_page_fault Paolo Bonzini
2020-03-26 13:41   ` Vitaly Kuznetsov
2020-03-26 19:45     ` Paolo Bonzini
2020-03-27 12:48       ` Vitaly Kuznetsov
2020-03-28 18:41   ` Sean Christopherson
2020-03-26  9:35 ` [PATCH 3/3] KVM: x86: Sync SPTEs when injecting page/EPT fault into L1 Paolo Bonzini
2020-03-28 18:29   ` Sean Christopherson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.