[PATCH RFC 0/7] x86/kvm/nVMX: optimize MMU switch between L1 and L2

linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH RFC 0/7] x86/kvm/nVMX: optimize MMU switch between L1 and L2
@ 2018-07-20 13:26 Vitaly Kuznetsov
  2018-07-20 13:26 ` [PATCH RFC 1/7] x86/kvm/mmu: make vcpu->mmu a pointer to the current MMU Vitaly Kuznetsov
                   ` (6 more replies)
  0 siblings, 7 replies; 8+ messages in thread
From: Vitaly Kuznetsov @ 2018-07-20 13:26 UTC (permalink / raw)
  To: kvm
  Cc: Paolo Bonzini, Radim Krčmář,
	Jim Mattson, Liran Alon, linux-kernel

Currently, when we switch from L1 to L2 we do the following:
- Re-initialize L1 MMU as shadow EPT MMU (nested_ept_init_mmu_context())
- Re-initialize 'nested' MMU (nested_vmx_load_cr3() -> init_kvm_nested_mmu())
- Reload MMU root upon guest entry.

When we switch back we do:
- Re-initialize L1 MMU (nested_vmx_load_cr3() -> init_kvm_tdp_mmu())
- Reload MMU root upon guest entry.

This seems to be sub-optimal. Initializing MMU is expensive (thanks to
update_permission_bitmask(), update_pkru_bitmask(),..) and reloading MMU
root doesn't come for free.

Try to approach the issue by splitting L1-normal and L1-nested MMUs and
checking if MMU reset is really needed. This spares us about 1000 cpu
cycles on nested vmexit.

RFC part:
- Does this look like a plausible solution?
- SVM nested can probably be optimized in the same way.
- Doesn mmu_update_needed() cover everything?

Vitaly Kuznetsov (7):
  x86/kvm/mmu: make vcpu->mmu a pointer to the current MMU
  x86/kvm/mmu.c: set get_pdptr hook in kvm_init_shadow_ept_mmu()
  x86/kvm/mmu.c: add kvm_mmu parameter to kvm_mmu_free_roots()
  x86/kvm/mmu: introduce guest_mmu
  x86/kvm/mmu: get rid of redundant kvm_mmu_setup()
  x86/kvm/nVMX: introduce scache for kvm_init_shadow_ept_mmu
  x86/kvm/nVMX: optimize MMU switch from nested_vmx_load_cr3()

 arch/x86/include/asm/kvm_host.h |  36 ++++-
 arch/x86/kvm/cpuid.c            |   2 +-
 arch/x86/kvm/mmu.c              | 282 ++++++++++++++++++++++++++--------------
 arch/x86/kvm/mmu.h              |   2 +-
 arch/x86/kvm/mmu_audit.c        |  12 +-
 arch/x86/kvm/paging_tmpl.h      |  17 +--
 arch/x86/kvm/svm.c              |  20 +--
 arch/x86/kvm/vmx.c              |  52 +++++---
 arch/x86/kvm/x86.c              |  34 ++---
 9 files changed, 292 insertions(+), 165 deletions(-)

-- 
2.14.4


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH RFC 1/7] x86/kvm/mmu: make vcpu->mmu a pointer to the current MMU
  2018-07-20 13:26 [PATCH RFC 0/7] x86/kvm/nVMX: optimize MMU switch between L1 and L2 Vitaly Kuznetsov
@ 2018-07-20 13:26 ` Vitaly Kuznetsov
  2018-07-20 13:26 ` [PATCH RFC 2/7] x86/kvm/mmu.c: set get_pdptr hook in kvm_init_shadow_ept_mmu() Vitaly Kuznetsov
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Vitaly Kuznetsov @ 2018-07-20 13:26 UTC (permalink / raw)
  To: kvm
  Cc: Paolo Bonzini, Radim Krčmář,
	Jim Mattson, Liran Alon, linux-kernel

As a preparation to full MMU split between L1 and L2 make vcpu->arch.mmu
a pointer to the currently used mmu. For now, this is always
vcpu->arch.root_mmu. No functional change.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |   5 +-
 arch/x86/kvm/mmu.c              | 150 ++++++++++++++++++++--------------------
 arch/x86/kvm/mmu.h              |   2 +-
 arch/x86/kvm/mmu_audit.c        |  12 ++--
 arch/x86/kvm/paging_tmpl.h      |  17 ++---
 arch/x86/kvm/svm.c              |  14 ++--
 arch/x86/kvm/vmx.c              |  13 ++--
 arch/x86/kvm/x86.c              |  20 +++---
 8 files changed, 120 insertions(+), 113 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index c13cd28d9d1b..2aa3c43ca5e5 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -521,7 +521,10 @@ struct kvm_vcpu_arch {
 	 * the paging mode of the l1 guest. This context is always used to
 	 * handle faults.
 	 */
-	struct kvm_mmu mmu;
+	struct kvm_mmu *mmu;
+
+	/* Non-nested MMU for L1 */
+	struct kvm_mmu root_mmu;
 
 	/*
 	 * Paging state of an L2 guest (used for nested npt)
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index d594690d8b95..914e4eea487a 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2122,7 +2122,7 @@ static bool __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
 		return false;
 	}
 
-	if (vcpu->arch.mmu.sync_page(vcpu, sp) == 0) {
+	if (vcpu->arch.mmu->sync_page(vcpu, sp) == 0) {
 		kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list);
 		return false;
 	}
@@ -2316,14 +2316,14 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 	int collisions = 0;
 	LIST_HEAD(invalid_list);
 
-	role = vcpu->arch.mmu.base_role;
+	role = vcpu->arch.mmu->base_role;
 	role.level = level;
 	role.direct = direct;
 	if (role.direct)
 		role.cr4_pae = 0;
 	role.access = access;
-	if (!vcpu->arch.mmu.direct_map
-	    && vcpu->arch.mmu.root_level <= PT32_ROOT_LEVEL) {
+	if (!vcpu->arch.mmu->direct_map
+	    && vcpu->arch.mmu->root_level <= PT32_ROOT_LEVEL) {
 		quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level));
 		quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1;
 		role.quadrant = quadrant;
@@ -2396,17 +2396,17 @@ static void shadow_walk_init(struct kvm_shadow_walk_iterator *iterator,
 			     struct kvm_vcpu *vcpu, u64 addr)
 {
 	iterator->addr = addr;
-	iterator->shadow_addr = vcpu->arch.mmu.root_hpa;
-	iterator->level = vcpu->arch.mmu.shadow_root_level;
+	iterator->shadow_addr = vcpu->arch.mmu->root_hpa;
+	iterator->level = vcpu->arch.mmu->shadow_root_level;
 
 	if (iterator->level == PT64_ROOT_4LEVEL &&
-	    vcpu->arch.mmu.root_level < PT64_ROOT_4LEVEL &&
-	    !vcpu->arch.mmu.direct_map)
+	    vcpu->arch.mmu->root_level < PT64_ROOT_4LEVEL &&
+	    !vcpu->arch.mmu->direct_map)
 		--iterator->level;
 
 	if (iterator->level == PT32E_ROOT_LEVEL) {
 		iterator->shadow_addr
-			= vcpu->arch.mmu.pae_root[(addr >> 30) & 3];
+			= vcpu->arch.mmu->pae_root[(addr >> 30) & 3];
 		iterator->shadow_addr &= PT64_BASE_ADDR_MASK;
 		--iterator->level;
 		if (!iterator->shadow_addr)
@@ -2974,7 +2974,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, int write, int map_writable,
 	int emulate = 0;
 	gfn_t pseudo_gfn;
 
-	if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
+	if (!VALID_PAGE(vcpu->arch.mmu->root_hpa))
 		return 0;
 
 	for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) {
@@ -3189,7 +3189,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
 	u64 spte = 0ull;
 	uint retry_count = 0;
 
-	if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
+	if (!VALID_PAGE(vcpu->arch.mmu->root_hpa))
 		return false;
 
 	if (!page_fault_can_be_fast(error_code))
@@ -3362,7 +3362,7 @@ void kvm_mmu_free_roots(struct kvm_vcpu *vcpu)
 {
 	int i;
 	LIST_HEAD(invalid_list);
-	struct kvm_mmu *mmu = &vcpu->arch.mmu;
+	struct kvm_mmu *mmu = vcpu->arch.mmu;
 
 	if (!VALID_PAGE(mmu->root_hpa))
 		return;
@@ -3402,20 +3402,20 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
 	struct kvm_mmu_page *sp;
 	unsigned i;
 
-	if (vcpu->arch.mmu.shadow_root_level >= PT64_ROOT_4LEVEL) {
+	if (vcpu->arch.mmu->shadow_root_level >= PT64_ROOT_4LEVEL) {
 		spin_lock(&vcpu->kvm->mmu_lock);
 		if(make_mmu_pages_available(vcpu) < 0) {
 			spin_unlock(&vcpu->kvm->mmu_lock);
 			return -ENOSPC;
 		}
 		sp = kvm_mmu_get_page(vcpu, 0, 0,
-				vcpu->arch.mmu.shadow_root_level, 1, ACC_ALL);
+				vcpu->arch.mmu->shadow_root_level, 1, ACC_ALL);
 		++sp->root_count;
 		spin_unlock(&vcpu->kvm->mmu_lock);
-		vcpu->arch.mmu.root_hpa = __pa(sp->spt);
-	} else if (vcpu->arch.mmu.shadow_root_level == PT32E_ROOT_LEVEL) {
+		vcpu->arch.mmu->root_hpa = __pa(sp->spt);
+	} else if (vcpu->arch.mmu->shadow_root_level == PT32E_ROOT_LEVEL) {
 		for (i = 0; i < 4; ++i) {
-			hpa_t root = vcpu->arch.mmu.pae_root[i];
+			hpa_t root = vcpu->arch.mmu->pae_root[i];
 
 			MMU_WARN_ON(VALID_PAGE(root));
 			spin_lock(&vcpu->kvm->mmu_lock);
@@ -3428,9 +3428,9 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
 			root = __pa(sp->spt);
 			++sp->root_count;
 			spin_unlock(&vcpu->kvm->mmu_lock);
-			vcpu->arch.mmu.pae_root[i] = root | PT_PRESENT_MASK;
+			vcpu->arch.mmu->pae_root[i] = root | PT_PRESENT_MASK;
 		}
-		vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root);
+		vcpu->arch.mmu->root_hpa = __pa(vcpu->arch.mmu->pae_root);
 	} else
 		BUG();
 
@@ -3444,7 +3444,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 	gfn_t root_gfn;
 	int i;
 
-	root_gfn = vcpu->arch.mmu.get_cr3(vcpu) >> PAGE_SHIFT;
+	root_gfn = vcpu->arch.mmu->get_cr3(vcpu) >> PAGE_SHIFT;
 
 	if (mmu_check_root(vcpu, root_gfn))
 		return 1;
@@ -3453,8 +3453,8 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 	 * Do we shadow a long mode page table? If so we need to
 	 * write-protect the guests page table root.
 	 */
-	if (vcpu->arch.mmu.root_level >= PT64_ROOT_4LEVEL) {
-		hpa_t root = vcpu->arch.mmu.root_hpa;
+	if (vcpu->arch.mmu->root_level >= PT64_ROOT_4LEVEL) {
+		hpa_t root = vcpu->arch.mmu->root_hpa;
 
 		MMU_WARN_ON(VALID_PAGE(root));
 
@@ -3464,11 +3464,11 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 			return -ENOSPC;
 		}
 		sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
-				vcpu->arch.mmu.shadow_root_level, 0, ACC_ALL);
+				vcpu->arch.mmu->shadow_root_level, 0, ACC_ALL);
 		root = __pa(sp->spt);
 		++sp->root_count;
 		spin_unlock(&vcpu->kvm->mmu_lock);
-		vcpu->arch.mmu.root_hpa = root;
+		vcpu->arch.mmu->root_hpa = root;
 		return 0;
 	}
 
@@ -3478,17 +3478,17 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 	 * the shadow page table may be a PAE or a long mode page table.
 	 */
 	pm_mask = PT_PRESENT_MASK;
-	if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_4LEVEL)
+	if (vcpu->arch.mmu->shadow_root_level == PT64_ROOT_4LEVEL)
 		pm_mask |= PT_ACCESSED_MASK | PT_WRITABLE_MASK | PT_USER_MASK;
 
 	for (i = 0; i < 4; ++i) {
-		hpa_t root = vcpu->arch.mmu.pae_root[i];
+		hpa_t root = vcpu->arch.mmu->pae_root[i];
 
 		MMU_WARN_ON(VALID_PAGE(root));
-		if (vcpu->arch.mmu.root_level == PT32E_ROOT_LEVEL) {
-			pdptr = vcpu->arch.mmu.get_pdptr(vcpu, i);
+		if (vcpu->arch.mmu->root_level == PT32E_ROOT_LEVEL) {
+			pdptr = vcpu->arch.mmu->get_pdptr(vcpu, i);
 			if (!(pdptr & PT_PRESENT_MASK)) {
-				vcpu->arch.mmu.pae_root[i] = 0;
+				vcpu->arch.mmu->pae_root[i] = 0;
 				continue;
 			}
 			root_gfn = pdptr >> PAGE_SHIFT;
@@ -3506,16 +3506,16 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 		++sp->root_count;
 		spin_unlock(&vcpu->kvm->mmu_lock);
 
-		vcpu->arch.mmu.pae_root[i] = root | pm_mask;
+		vcpu->arch.mmu->pae_root[i] = root | pm_mask;
 	}
-	vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root);
+	vcpu->arch.mmu->root_hpa = __pa(vcpu->arch.mmu->pae_root);
 
 	/*
 	 * If we shadow a 32 bit page table with a long mode page
 	 * table we enter this path.
 	 */
-	if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_4LEVEL) {
-		if (vcpu->arch.mmu.lm_root == NULL) {
+	if (vcpu->arch.mmu->shadow_root_level == PT64_ROOT_4LEVEL) {
+		if (vcpu->arch.mmu->lm_root == NULL) {
 			/*
 			 * The additional page necessary for this is only
 			 * allocated on demand.
@@ -3527,12 +3527,12 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 			if (lm_root == NULL)
 				return 1;
 
-			lm_root[0] = __pa(vcpu->arch.mmu.pae_root) | pm_mask;
+			lm_root[0] = __pa(vcpu->arch.mmu->pae_root) | pm_mask;
 
-			vcpu->arch.mmu.lm_root = lm_root;
+			vcpu->arch.mmu->lm_root = lm_root;
 		}
 
-		vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.lm_root);
+		vcpu->arch.mmu->root_hpa = __pa(vcpu->arch.mmu->lm_root);
 	}
 
 	return 0;
@@ -3540,7 +3540,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 
 static int mmu_alloc_roots(struct kvm_vcpu *vcpu)
 {
-	if (vcpu->arch.mmu.direct_map)
+	if (vcpu->arch.mmu->direct_map)
 		return mmu_alloc_direct_roots(vcpu);
 	else
 		return mmu_alloc_shadow_roots(vcpu);
@@ -3551,23 +3551,23 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu)
 	int i;
 	struct kvm_mmu_page *sp;
 
-	if (vcpu->arch.mmu.direct_map)
+	if (vcpu->arch.mmu->direct_map)
 		return;
 
-	if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
+	if (!VALID_PAGE(vcpu->arch.mmu->root_hpa))
 		return;
 
 	vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY);
 	kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC);
-	if (vcpu->arch.mmu.root_level >= PT64_ROOT_4LEVEL) {
-		hpa_t root = vcpu->arch.mmu.root_hpa;
+	if (vcpu->arch.mmu->root_level >= PT64_ROOT_4LEVEL) {
+		hpa_t root = vcpu->arch.mmu->root_hpa;
 		sp = page_header(root);
 		mmu_sync_children(vcpu, sp);
 		kvm_mmu_audit(vcpu, AUDIT_POST_SYNC);
 		return;
 	}
 	for (i = 0; i < 4; ++i) {
-		hpa_t root = vcpu->arch.mmu.pae_root[i];
+		hpa_t root = vcpu->arch.mmu->pae_root[i];
 
 		if (root && VALID_PAGE(root)) {
 			root &= PT64_BASE_ADDR_MASK;
@@ -3646,7 +3646,7 @@ walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
 	int root, leaf;
 	bool reserved = false;
 
-	if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
+	if (!VALID_PAGE(vcpu->arch.mmu->root_hpa))
 		goto exit;
 
 	walk_shadow_page_lockless_begin(vcpu);
@@ -3663,7 +3663,7 @@ walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
 		if (!is_shadow_present_pte(spte))
 			break;
 
-		reserved |= is_shadow_zero_bits_set(&vcpu->arch.mmu, spte,
+		reserved |= is_shadow_zero_bits_set(vcpu->arch.mmu, spte,
 						    iterator.level);
 	}
 
@@ -3742,7 +3742,7 @@ static void shadow_page_table_clear_flood(struct kvm_vcpu *vcpu, gva_t addr)
 	struct kvm_shadow_walk_iterator iterator;
 	u64 spte;
 
-	if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
+	if (!VALID_PAGE(vcpu->arch.mmu->root_hpa))
 		return;
 
 	walk_shadow_page_lockless_begin(vcpu);
@@ -3769,7 +3769,7 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
 	if (r)
 		return r;
 
-	MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
+	MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa));
 
 
 	return nonpaging_map(vcpu, gva & PAGE_MASK,
@@ -3782,8 +3782,8 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn)
 
 	arch.token = (vcpu->arch.apf.id++ << 12) | vcpu->vcpu_id;
 	arch.gfn = gfn;
-	arch.direct_map = vcpu->arch.mmu.direct_map;
-	arch.cr3 = vcpu->arch.mmu.get_cr3(vcpu);
+	arch.direct_map = vcpu->arch.mmu->direct_map;
+	arch.cr3 = vcpu->arch.mmu->get_cr3(vcpu);
 
 	return kvm_setup_async_pf(vcpu, gva, kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch);
 }
@@ -3888,7 +3888,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
 	int write = error_code & PFERR_WRITE_MASK;
 	bool map_writable;
 
-	MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
+	MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa));
 
 	if (page_fault_handle_page_track(vcpu, error_code, gfn))
 		return RET_PF_EMULATE;
@@ -3965,7 +3965,7 @@ static unsigned long get_cr3(struct kvm_vcpu *vcpu)
 static void inject_page_fault(struct kvm_vcpu *vcpu,
 			      struct x86_exception *fault)
 {
-	vcpu->arch.mmu.inject_page_fault(vcpu, fault);
+	vcpu->arch.mmu->inject_page_fault(vcpu, fault);
 }
 
 static bool sync_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn,
@@ -4473,7 +4473,7 @@ static void paging32E_init_context(struct kvm_vcpu *vcpu,
 
 static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
 {
-	struct kvm_mmu *context = &vcpu->arch.mmu;
+	struct kvm_mmu *context = vcpu->arch.mmu;
 
 	context->base_role.word = 0;
 	context->base_role.guest_mode = is_guest_mode(vcpu);
@@ -4523,7 +4523,7 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
 {
 	bool smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
 	bool smap = kvm_read_cr4_bits(vcpu, X86_CR4_SMAP);
-	struct kvm_mmu *context = &vcpu->arch.mmu;
+	struct kvm_mmu *context = vcpu->arch.mmu;
 
 	MMU_WARN_ON(VALID_PAGE(context->root_hpa));
 
@@ -4552,7 +4552,7 @@ EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu);
 void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
 			     bool accessed_dirty)
 {
-	struct kvm_mmu *context = &vcpu->arch.mmu;
+	struct kvm_mmu *context = vcpu->arch.mmu;
 
 	MMU_WARN_ON(VALID_PAGE(context->root_hpa));
 
@@ -4580,7 +4580,7 @@ EXPORT_SYMBOL_GPL(kvm_init_shadow_ept_mmu);
 
 static void init_kvm_softmmu(struct kvm_vcpu *vcpu)
 {
-	struct kvm_mmu *context = &vcpu->arch.mmu;
+	struct kvm_mmu *context = vcpu->arch.mmu;
 
 	kvm_init_shadow_mmu(vcpu);
 	context->set_cr3           = kvm_x86_ops->set_cr3;
@@ -4598,7 +4598,7 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
 	g_context->inject_page_fault = kvm_inject_page_fault;
 
 	/*
-	 * Note that arch.mmu.gva_to_gpa translates l2_gpa to l1_gpa using
+	 * Note that arch.mmu->gva_to_gpa translates l2_gpa to l1_gpa using
 	 * L1's nested page tables (e.g. EPT12). The nested translation
 	 * of l2_gva to l1_gpa is done by arch.nested_mmu.gva_to_gpa using
 	 * L2's page tables as the first level of translation and L1's
@@ -4661,7 +4661,7 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu)
 	if (r)
 		goto out;
 	/* set_cr3() should ensure TLB has been flushed */
-	vcpu->arch.mmu.set_cr3(vcpu, vcpu->arch.mmu.root_hpa);
+	vcpu->arch.mmu->set_cr3(vcpu, vcpu->arch.mmu->root_hpa);
 out:
 	return r;
 }
@@ -4670,7 +4670,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_load);
 void kvm_mmu_unload(struct kvm_vcpu *vcpu)
 {
 	kvm_mmu_free_roots(vcpu);
-	WARN_ON(VALID_PAGE(vcpu->arch.mmu.root_hpa));
+	WARN_ON(VALID_PAGE(vcpu->arch.mmu->root_hpa));
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_unload);
 
@@ -4684,7 +4684,7 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,
         }
 
 	++vcpu->kvm->stat.mmu_pte_updated;
-	vcpu->arch.mmu.update_pte(vcpu, sp, spte, new);
+	vcpu->arch.mmu->update_pte(vcpu, sp, spte, new);
 }
 
 static bool need_remote_flush(u64 old, u64 new)
@@ -4874,7 +4874,7 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 			entry = *spte;
 			mmu_page_zap_pte(vcpu->kvm, sp, spte);
 			if (gentry &&
-			      !((sp->role.word ^ vcpu->arch.mmu.base_role.word)
+			      !((sp->role.word ^ vcpu->arch.mmu->base_role.word)
 			      & mask.word) && rmap_can_add(vcpu))
 				mmu_pte_write_new_pte(vcpu, sp, spte, &gentry);
 			if (need_remote_flush(entry, *spte))
@@ -4892,7 +4892,7 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
 	gpa_t gpa;
 	int r;
 
-	if (vcpu->arch.mmu.direct_map)
+	if (vcpu->arch.mmu->direct_map)
 		return 0;
 
 	gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL);
@@ -4928,10 +4928,10 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
 {
 	int r, emulation_type = EMULTYPE_RETRY;
 	enum emulation_result er;
-	bool direct = vcpu->arch.mmu.direct_map;
+	bool direct = vcpu->arch.mmu->direct_map;
 
 	/* With shadow page tables, fault_address contains a GVA or nGPA.  */
-	if (vcpu->arch.mmu.direct_map) {
+	if (vcpu->arch.mmu->direct_map) {
 		vcpu->arch.gpa_available = true;
 		vcpu->arch.gpa_val = cr2;
 	}
@@ -4946,8 +4946,9 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
 	}
 
 	if (r == RET_PF_INVALID) {
-		r = vcpu->arch.mmu.page_fault(vcpu, cr2, lower_32_bits(error_code),
-					      false);
+		r = vcpu->arch.mmu->page_fault(vcpu, cr2,
+					       lower_32_bits(error_code),
+					       false);
 		WARN_ON(r == RET_PF_INVALID);
 	}
 
@@ -4963,7 +4964,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
 	 * paging in both guests. If true, we simply unprotect the page
 	 * and resume the guest.
 	 */
-	if (vcpu->arch.mmu.direct_map &&
+	if (vcpu->arch.mmu->direct_map &&
 	    (error_code & PFERR_NESTED_GUEST_PAGE) == PFERR_NESTED_GUEST_PAGE) {
 		kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(cr2));
 		return 1;
@@ -5000,7 +5001,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);
 
 void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
 {
-	vcpu->arch.mmu.invlpg(vcpu, gva);
+	vcpu->arch.mmu->invlpg(vcpu, gva);
 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
 	++vcpu->stat.invlpg;
 }
@@ -5020,8 +5021,8 @@ EXPORT_SYMBOL_GPL(kvm_disable_tdp);
 
 static void free_mmu_pages(struct kvm_vcpu *vcpu)
 {
-	free_page((unsigned long)vcpu->arch.mmu.pae_root);
-	free_page((unsigned long)vcpu->arch.mmu.lm_root);
+	free_page((unsigned long)vcpu->arch.mmu->pae_root);
+	free_page((unsigned long)vcpu->arch.mmu->lm_root);
 }
 
 static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
@@ -5038,18 +5039,19 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
 	if (!page)
 		return -ENOMEM;
 
-	vcpu->arch.mmu.pae_root = page_address(page);
+	vcpu->arch.mmu->pae_root = page_address(page);
 	for (i = 0; i < 4; ++i)
-		vcpu->arch.mmu.pae_root[i] = INVALID_PAGE;
+		vcpu->arch.mmu->pae_root[i] = INVALID_PAGE;
 
 	return 0;
 }
 
 int kvm_mmu_create(struct kvm_vcpu *vcpu)
 {
-	vcpu->arch.walk_mmu = &vcpu->arch.mmu;
-	vcpu->arch.mmu.root_hpa = INVALID_PAGE;
-	vcpu->arch.mmu.translate_gpa = translate_gpa;
+	vcpu->arch.mmu = &vcpu->arch.root_mmu;
+	vcpu->arch.walk_mmu = &vcpu->arch.root_mmu;
+	vcpu->arch.mmu->root_hpa = INVALID_PAGE;
+	vcpu->arch.mmu->translate_gpa = translate_gpa;
 	vcpu->arch.nested_mmu.translate_gpa = translate_nested_gpa;
 
 	return alloc_mmu_pages(vcpu);
@@ -5057,7 +5059,7 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu)
 
 void kvm_mmu_setup(struct kvm_vcpu *vcpu)
 {
-	MMU_WARN_ON(VALID_PAGE(vcpu->arch.mmu.root_hpa));
+	MMU_WARN_ON(VALID_PAGE(vcpu->arch.mmu->root_hpa));
 
 	init_kvm_mmu(vcpu);
 }
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 5b408c0ad612..8207c1146f4d 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -79,7 +79,7 @@ static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
 
 static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu)
 {
-	if (likely(vcpu->arch.mmu.root_hpa != INVALID_PAGE))
+	if (likely(vcpu->arch.mmu->root_hpa != INVALID_PAGE))
 		return 0;
 
 	return kvm_mmu_load(vcpu);
diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c
index 1272861e77b9..abac7e208853 100644
--- a/arch/x86/kvm/mmu_audit.c
+++ b/arch/x86/kvm/mmu_audit.c
@@ -59,19 +59,19 @@ static void mmu_spte_walk(struct kvm_vcpu *vcpu, inspect_spte_fn fn)
 	int i;
 	struct kvm_mmu_page *sp;
 
-	if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
+	if (!VALID_PAGE(vcpu->arch.mmu->root_hpa))
 		return;
 
-	if (vcpu->arch.mmu.root_level >= PT64_ROOT_4LEVEL) {
-		hpa_t root = vcpu->arch.mmu.root_hpa;
+	if (vcpu->arch.mmu->root_level >= PT64_ROOT_4LEVEL) {
+		hpa_t root = vcpu->arch.mmu->root_hpa;
 
 		sp = page_header(root);
-		__mmu_spte_walk(vcpu, sp, fn, vcpu->arch.mmu.root_level);
+		__mmu_spte_walk(vcpu, sp, fn, vcpu->arch.mmu->root_level);
 		return;
 	}
 
 	for (i = 0; i < 4; ++i) {
-		hpa_t root = vcpu->arch.mmu.pae_root[i];
+		hpa_t root = vcpu->arch.mmu->pae_root[i];
 
 		if (root && VALID_PAGE(root)) {
 			root &= PT64_BASE_ADDR_MASK;
@@ -122,7 +122,7 @@ static void audit_mappings(struct kvm_vcpu *vcpu, u64 *sptep, int level)
 	hpa =  pfn << PAGE_SHIFT;
 	if ((*sptep & PT64_BASE_ADDR_MASK) != hpa)
 		audit_printk(vcpu->kvm, "levels %d pfn %llx hpa %llx "
-			     "ent %llxn", vcpu->arch.mmu.root_level, pfn,
+			     "ent %llxn", vcpu->arch.mmu->root_level, pfn,
 			     hpa, *sptep);
 }
 
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 6288e9d7068e..e86dc5b9d6cd 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -158,14 +158,15 @@ static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu,
 				  struct kvm_mmu_page *sp, u64 *spte,
 				  u64 gpte)
 {
-	if (is_rsvd_bits_set(&vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL))
+	if (is_rsvd_bits_set(vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL))
 		goto no_present;
 
 	if (!FNAME(is_present_gpte)(gpte))
 		goto no_present;
 
 	/* if accessed bit is not supported prefetch non accessed gpte */
-	if (PT_HAVE_ACCESSED_DIRTY(&vcpu->arch.mmu) && !(gpte & PT_GUEST_ACCESSED_MASK))
+	if (PT_HAVE_ACCESSED_DIRTY(vcpu->arch.mmu) &&
+	    !(gpte & PT_GUEST_ACCESSED_MASK))
 		goto no_present;
 
 	return false;
@@ -480,7 +481,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
 static int FNAME(walk_addr)(struct guest_walker *walker,
 			    struct kvm_vcpu *vcpu, gva_t addr, u32 access)
 {
-	return FNAME(walk_addr_generic)(walker, vcpu, &vcpu->arch.mmu, addr,
+	return FNAME(walk_addr_generic)(walker, vcpu, vcpu->arch.mmu, addr,
 					access);
 }
 
@@ -509,7 +510,7 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
 
 	gfn = gpte_to_gfn(gpte);
 	pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
-	FNAME(protect_clean_gpte)(&vcpu->arch.mmu, &pte_access, gpte);
+	FNAME(protect_clean_gpte)(vcpu->arch.mmu, &pte_access, gpte);
 	pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn,
 			no_dirty_log && (pte_access & ACC_WRITE_MASK));
 	if (is_error_pfn(pfn))
@@ -604,7 +605,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
 
 	direct_access = gw->pte_access;
 
-	top_level = vcpu->arch.mmu.root_level;
+	top_level = vcpu->arch.mmu->root_level;
 	if (top_level == PT32E_ROOT_LEVEL)
 		top_level = PT32_ROOT_LEVEL;
 	/*
@@ -616,7 +617,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
 	if (FNAME(gpte_changed)(vcpu, gw, top_level))
 		goto out_gpte_changed;
 
-	if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
+	if (!VALID_PAGE(vcpu->arch.mmu->root_hpa))
 		goto out_gpte_changed;
 
 	for (shadow_walk_init(&it, vcpu, addr);
@@ -871,7 +872,7 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
 	 */
 	mmu_topup_memory_caches(vcpu);
 
-	if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) {
+	if (!VALID_PAGE(vcpu->arch.mmu->root_hpa)) {
 		WARN_ON(1);
 		return;
 	}
@@ -1003,7 +1004,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 		gfn = gpte_to_gfn(gpte);
 		pte_access = sp->role.access;
 		pte_access &= FNAME(gpte_access)(vcpu, gpte);
-		FNAME(protect_clean_gpte)(&vcpu->arch.mmu, &pte_access, gpte);
+		FNAME(protect_clean_gpte)(vcpu->arch.mmu, &pte_access, gpte);
 
 		if (sync_mmio_spte(vcpu, &sp->spt[i], gfn, pte_access,
 		      &nr_present))
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index f059a73f0fd0..3b3b9839c2b5 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2920,18 +2920,18 @@ static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
 {
 	WARN_ON(mmu_is_nested(vcpu));
 	kvm_init_shadow_mmu(vcpu);
-	vcpu->arch.mmu.set_cr3           = nested_svm_set_tdp_cr3;
-	vcpu->arch.mmu.get_cr3           = nested_svm_get_tdp_cr3;
-	vcpu->arch.mmu.get_pdptr         = nested_svm_get_tdp_pdptr;
-	vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit;
-	vcpu->arch.mmu.shadow_root_level = get_npt_level(vcpu);
-	reset_shadow_zero_bits_mask(vcpu, &vcpu->arch.mmu);
+	vcpu->arch.mmu->set_cr3           = nested_svm_set_tdp_cr3;
+	vcpu->arch.mmu->get_cr3           = nested_svm_get_tdp_cr3;
+	vcpu->arch.mmu->get_pdptr         = nested_svm_get_tdp_pdptr;
+	vcpu->arch.mmu->inject_page_fault = nested_svm_inject_npf_exit;
+	vcpu->arch.mmu->shadow_root_level = get_npt_level(vcpu);
+	reset_shadow_zero_bits_mask(vcpu, vcpu->arch.mmu);
 	vcpu->arch.walk_mmu              = &vcpu->arch.nested_mmu;
 }
 
 static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu)
 {
-	vcpu->arch.walk_mmu = &vcpu->arch.mmu;
+	vcpu->arch.walk_mmu = &vcpu->arch.root_mmu;
 }
 
 static int nested_svm_check_permissions(struct vcpu_svm *svm)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 559a12b6184d..418277fd1b7c 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -4755,9 +4755,10 @@ static inline void __vmx_flush_tlb(struct kvm_vcpu *vcpu, int vpid,
 				bool invalidate_gpa)
 {
 	if (enable_ept && (invalidate_gpa || !enable_vpid)) {
-		if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
+		if (!VALID_PAGE(vcpu->arch.mmu->root_hpa))
 			return;
-		ept_sync_context(construct_eptp(vcpu, vcpu->arch.mmu.root_hpa));
+		ept_sync_context(construct_eptp(vcpu,
+						vcpu->arch.mmu->root_hpa));
 	} else {
 		vpid_sync_context(vpid);
 	}
@@ -10577,9 +10578,9 @@ static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
 			to_vmx(vcpu)->nested.msrs.ept_caps &
 			VMX_EPT_EXECUTE_ONLY_BIT,
 			nested_ept_ad_enabled(vcpu));
-	vcpu->arch.mmu.set_cr3           = vmx_set_cr3;
-	vcpu->arch.mmu.get_cr3           = nested_ept_get_cr3;
-	vcpu->arch.mmu.inject_page_fault = nested_ept_inject_page_fault;
+	vcpu->arch.mmu->set_cr3           = vmx_set_cr3;
+	vcpu->arch.mmu->get_cr3           = nested_ept_get_cr3;
+	vcpu->arch.mmu->inject_page_fault = nested_ept_inject_page_fault;
 
 	vcpu->arch.walk_mmu              = &vcpu->arch.nested_mmu;
 	return 0;
@@ -10587,7 +10588,7 @@ static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
 
 static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu)
 {
-	vcpu->arch.walk_mmu = &vcpu->arch.mmu;
+	vcpu->arch.walk_mmu = &vcpu->arch.root_mmu;
 }
 
 static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0046aa70205a..a98fafd6ac24 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -502,7 +502,7 @@ static bool kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fau
 	if (mmu_is_nested(vcpu) && !fault->nested_page_fault)
 		vcpu->arch.nested_mmu.inject_page_fault(vcpu, fault);
 	else
-		vcpu->arch.mmu.inject_page_fault(vcpu, fault);
+		vcpu->arch.mmu->inject_page_fault(vcpu, fault);
 
 	return fault->nested_page_fault;
 }
@@ -601,7 +601,7 @@ int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3)
 	for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
 		if ((pdpte[i] & PT_PRESENT_MASK) &&
 		    (pdpte[i] &
-		     vcpu->arch.mmu.guest_rsvd_check.rsvd_bits_mask[0][2])) {
+		     vcpu->arch.mmu->guest_rsvd_check.rsvd_bits_mask[0][2])) {
 			ret = 0;
 			goto out;
 		}
@@ -4700,7 +4700,7 @@ gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
 
 	/* NPT walks are always user-walks */
 	access |= PFERR_USER_MASK;
-	t_gpa  = vcpu->arch.mmu.gva_to_gpa(vcpu, gpa, access, exception);
+	t_gpa  = vcpu->arch.mmu->gva_to_gpa(vcpu, gpa, access, exception);
 
 	return t_gpa;
 }
@@ -5780,7 +5780,7 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2,
 	if (emulation_type & EMULTYPE_NO_REEXECUTE)
 		return false;
 
-	if (!vcpu->arch.mmu.direct_map) {
+	if (!vcpu->arch.mmu->direct_map) {
 		/*
 		 * Write permission should be allowed since only
 		 * write access need to be emulated.
@@ -5813,7 +5813,7 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2,
 	kvm_release_pfn_clean(pfn);
 
 	/* The instructions are well-emulated on direct mmu. */
-	if (vcpu->arch.mmu.direct_map) {
+	if (vcpu->arch.mmu->direct_map) {
 		unsigned int indirect_shadow_pages;
 
 		spin_lock(&vcpu->kvm->mmu_lock);
@@ -5877,7 +5877,7 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
 	vcpu->arch.last_retry_eip = ctxt->eip;
 	vcpu->arch.last_retry_addr = cr2;
 
-	if (!vcpu->arch.mmu.direct_map)
+	if (!vcpu->arch.mmu->direct_map)
 		gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL);
 
 	kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
@@ -9171,7 +9171,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
 {
 	int r;
 
-	if ((vcpu->arch.mmu.direct_map != work->arch.direct_map) ||
+	if ((vcpu->arch.mmu->direct_map != work->arch.direct_map) ||
 	      work->wakeup_all)
 		return;
 
@@ -9179,11 +9179,11 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
 	if (unlikely(r))
 		return;
 
-	if (!vcpu->arch.mmu.direct_map &&
-	      work->arch.cr3 != vcpu->arch.mmu.get_cr3(vcpu))
+	if (!vcpu->arch.mmu->direct_map &&
+	      work->arch.cr3 != vcpu->arch.mmu->get_cr3(vcpu))
 		return;
 
-	vcpu->arch.mmu.page_fault(vcpu, work->gva, 0, true);
+	vcpu->arch.mmu->page_fault(vcpu, work->gva, 0, true);
 }
 
 static inline u32 kvm_async_pf_hash_fn(gfn_t gfn)
-- 
2.14.4


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH RFC 2/7] x86/kvm/mmu.c: set get_pdptr hook in kvm_init_shadow_ept_mmu()
  2018-07-20 13:26 [PATCH RFC 0/7] x86/kvm/nVMX: optimize MMU switch between L1 and L2 Vitaly Kuznetsov
  2018-07-20 13:26 ` [PATCH RFC 1/7] x86/kvm/mmu: make vcpu->mmu a pointer to the current MMU Vitaly Kuznetsov
@ 2018-07-20 13:26 ` Vitaly Kuznetsov
  2018-07-20 13:26 ` [PATCH RFC 3/7] x86/kvm/mmu.c: add kvm_mmu parameter to kvm_mmu_free_roots() Vitaly Kuznetsov
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Vitaly Kuznetsov @ 2018-07-20 13:26 UTC (permalink / raw)
  To: kvm
  Cc: Paolo Bonzini, Radim Krčmář,
	Jim Mattson, Liran Alon, linux-kernel

kvm_init_shadow_ept_mmu() doesn't set get_pdptr() hook and is this
not a problem just because MMU context is already initialized and this
hook points to kvm_pdptr_read(). As we're intended to use a dedicated
MMU for shadow EPT MMU set this hook explicitly.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
---
 arch/x86/kvm/mmu.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 914e4eea487a..10598dd396ed 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -4570,6 +4570,8 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
 	context->direct_map = false;
 	context->base_role.ad_disabled = !accessed_dirty;
 	context->base_role.guest_mode = 1;
+	context->get_pdptr = kvm_pdptr_read;
+
 	update_permission_bitmask(vcpu, context, true);
 	update_pkru_bitmask(vcpu, context, true);
 	update_last_nonleaf_level(vcpu, context);
-- 
2.14.4


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH RFC 3/7] x86/kvm/mmu.c: add kvm_mmu parameter to kvm_mmu_free_roots()
  2018-07-20 13:26 [PATCH RFC 0/7] x86/kvm/nVMX: optimize MMU switch between L1 and L2 Vitaly Kuznetsov
  2018-07-20 13:26 ` [PATCH RFC 1/7] x86/kvm/mmu: make vcpu->mmu a pointer to the current MMU Vitaly Kuznetsov
  2018-07-20 13:26 ` [PATCH RFC 2/7] x86/kvm/mmu.c: set get_pdptr hook in kvm_init_shadow_ept_mmu() Vitaly Kuznetsov
@ 2018-07-20 13:26 ` Vitaly Kuznetsov
  2018-07-20 13:26 ` [PATCH RFC 4/7] x86/kvm/mmu: introduce guest_mmu Vitaly Kuznetsov
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Vitaly Kuznetsov @ 2018-07-20 13:26 UTC (permalink / raw)
  To: kvm
  Cc: Paolo Bonzini, Radim Krčmář,
	Jim Mattson, Liran Alon, linux-kernel

Add an option to specify which MMU root we want to free. This will
be used when nested and non-nested MMUs for L1 are split.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 2 +-
 arch/x86/kvm/mmu.c              | 7 +++----
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 2aa3c43ca5e5..d819947d0a97 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1282,7 +1282,7 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
 int kvm_mmu_load(struct kvm_vcpu *vcpu);
 void kvm_mmu_unload(struct kvm_vcpu *vcpu);
 void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
-void kvm_mmu_free_roots(struct kvm_vcpu *vcpu);
+void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu);
 gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
 			   struct x86_exception *exception);
 gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 10598dd396ed..8c5e5664d830 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3358,11 +3358,10 @@ static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa,
 	*root_hpa = INVALID_PAGE;
 }
 
-void kvm_mmu_free_roots(struct kvm_vcpu *vcpu)
+void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
 {
 	int i;
 	LIST_HEAD(invalid_list);
-	struct kvm_mmu *mmu = vcpu->arch.mmu;
 
 	if (!VALID_PAGE(mmu->root_hpa))
 		return;
@@ -3954,7 +3953,7 @@ static void nonpaging_init_context(struct kvm_vcpu *vcpu,
 
 void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu)
 {
-	kvm_mmu_free_roots(vcpu);
+	kvm_mmu_free_roots(vcpu, vcpu->arch.mmu);
 }
 
 static unsigned long get_cr3(struct kvm_vcpu *vcpu)
@@ -4671,7 +4670,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_load);
 
 void kvm_mmu_unload(struct kvm_vcpu *vcpu)
 {
-	kvm_mmu_free_roots(vcpu);
+	kvm_mmu_free_roots(vcpu, vcpu->arch.mmu);
 	WARN_ON(VALID_PAGE(vcpu->arch.mmu->root_hpa));
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_unload);
-- 
2.14.4


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH RFC 4/7] x86/kvm/mmu: introduce guest_mmu
  2018-07-20 13:26 [PATCH RFC 0/7] x86/kvm/nVMX: optimize MMU switch between L1 and L2 Vitaly Kuznetsov
                   ` (2 preceding siblings ...)
  2018-07-20 13:26 ` [PATCH RFC 3/7] x86/kvm/mmu.c: add kvm_mmu parameter to kvm_mmu_free_roots() Vitaly Kuznetsov
@ 2018-07-20 13:26 ` Vitaly Kuznetsov
  2018-07-20 13:26 ` [PATCH RFC 5/7] x86/kvm/mmu: get rid of redundant kvm_mmu_setup() Vitaly Kuznetsov
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Vitaly Kuznetsov @ 2018-07-20 13:26 UTC (permalink / raw)
  To: kvm
  Cc: Paolo Bonzini, Radim Krčmář,
	Jim Mattson, Liran Alon, linux-kernel

When EPT is used for nested guest we need to re-init MMU as shadow
EPT MMU (nested_ept_init_mmu_context() does that). When we return back
from L2 to L1 kvm_mmu_reset_context() in nested_vmx_load_cr3() resets
MMU back to normal TDP mode. Add a special 'guest_mmu' so we can avoid
structure re-initialization and MMU root reload (in future, this is a
preparatory patch).

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  3 +++
 arch/x86/kvm/mmu.c              | 12 ++++++++----
 arch/x86/kvm/vmx.c              | 29 ++++++++++++++++++++---------
 3 files changed, 31 insertions(+), 13 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index d819947d0a97..3d0d26142619 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -526,6 +526,9 @@ struct kvm_vcpu_arch {
 	/* Non-nested MMU for L1 */
 	struct kvm_mmu root_mmu;
 
+	/* L1 MMU when running nested */
+	struct kvm_mmu guest_mmu;
+
 	/*
 	 * Paging state of an L2 guest (used for nested npt)
 	 *
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 8c5e5664d830..76849eda1a5b 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -4670,8 +4670,10 @@ EXPORT_SYMBOL_GPL(kvm_mmu_load);
 
 void kvm_mmu_unload(struct kvm_vcpu *vcpu)
 {
-	kvm_mmu_free_roots(vcpu, vcpu->arch.mmu);
-	WARN_ON(VALID_PAGE(vcpu->arch.mmu->root_hpa));
+	kvm_mmu_free_roots(vcpu, &vcpu->arch.root_mmu);
+	WARN_ON(VALID_PAGE(vcpu->arch.root_mmu.root_hpa));
+	kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu);
+	WARN_ON(VALID_PAGE(vcpu->arch.guest_mmu.root_hpa));
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_unload);
 
@@ -5051,8 +5053,10 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu)
 {
 	vcpu->arch.mmu = &vcpu->arch.root_mmu;
 	vcpu->arch.walk_mmu = &vcpu->arch.root_mmu;
-	vcpu->arch.mmu->root_hpa = INVALID_PAGE;
-	vcpu->arch.mmu->translate_gpa = translate_gpa;
+	vcpu->arch.root_mmu.root_hpa = INVALID_PAGE;
+	vcpu->arch.root_mmu.translate_gpa = translate_gpa;
+	vcpu->arch.guest_mmu.root_hpa = INVALID_PAGE;
+	vcpu->arch.guest_mmu.translate_gpa = translate_gpa;
 	vcpu->arch.nested_mmu.translate_gpa = translate_nested_gpa;
 
 	return alloc_mmu_pages(vcpu);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 418277fd1b7c..5feb52991065 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -8012,8 +8012,10 @@ static inline void nested_release_vmcs12(struct vcpu_vmx *vmx)
  * Free whatever needs to be freed from vmx->nested when L1 goes down, or
  * just stops using VMX.
  */
-static void free_nested(struct vcpu_vmx *vmx)
+static void free_nested(struct kvm_vcpu *vcpu)
 {
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+
 	if (!vmx->nested.vmxon && !vmx->nested.smm.vmxon)
 		return;
 
@@ -8045,6 +8047,8 @@ static void free_nested(struct vcpu_vmx *vmx)
 		vmx->nested.pi_desc = NULL;
 	}
 
+	kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu);
+
 	free_loaded_vmcs(&vmx->nested.vmcs02);
 }
 
@@ -8053,7 +8057,7 @@ static int handle_vmoff(struct kvm_vcpu *vcpu)
 {
 	if (!nested_vmx_check_permission(vcpu))
 		return 1;
-	free_nested(to_vmx(vcpu));
+	free_nested(vcpu);
 	nested_vmx_succeed(vcpu);
 	return kvm_skip_emulated_instruction(vcpu);
 }
@@ -8084,6 +8088,8 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
 	if (vmptr == vmx->nested.current_vmptr)
 		nested_release_vmcs12(vmx);
 
+	kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu);
+
 	kvm_vcpu_write_guest(vcpu,
 			vmptr + offsetof(struct vmcs12, launch_state),
 			&zero, sizeof(zero));
@@ -8428,6 +8434,8 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
 		}
 
 		nested_release_vmcs12(vmx);
+
+		kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu);
 		/*
 		 * Load VMCS12 from guest memory since it is not already
 		 * cached.
@@ -10244,12 +10252,12 @@ static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
  */
 static void vmx_free_vcpu_nested(struct kvm_vcpu *vcpu)
 {
-       struct vcpu_vmx *vmx = to_vmx(vcpu);
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
-       vcpu_load(vcpu);
-       vmx_switch_vmcs(vcpu, &vmx->vmcs01);
-       free_nested(vmx);
-       vcpu_put(vcpu);
+	vcpu_load(vcpu);
+	vmx_switch_vmcs(vcpu, &vmx->vmcs01);
+	free_nested(vcpu);
+	vcpu_put(vcpu);
 }
 
 static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
@@ -10573,7 +10581,9 @@ static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
 	if (!valid_ept_address(vcpu, nested_ept_get_cr3(vcpu)))
 		return 1;
 
-	kvm_mmu_unload(vcpu);
+	vcpu->arch.mmu = &vcpu->arch.guest_mmu;
+	kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu);
+
 	kvm_init_shadow_ept_mmu(vcpu,
 			to_vmx(vcpu)->nested.msrs.ept_caps &
 			VMX_EPT_EXECUTE_ONLY_BIT,
@@ -10588,6 +10598,7 @@ static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
 
 static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu)
 {
+	vcpu->arch.mmu = &vcpu->arch.root_mmu;
 	vcpu->arch.walk_mmu = &vcpu->arch.root_mmu;
 }
 
@@ -12444,7 +12455,7 @@ static void vmx_leave_nested(struct kvm_vcpu *vcpu)
 		to_vmx(vcpu)->nested.nested_run_pending = 0;
 		nested_vmx_vmexit(vcpu, -1, 0, 0);
 	}
-	free_nested(to_vmx(vcpu));
+	free_nested(vcpu);
 }
 
 /*
-- 
2.14.4


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH RFC 5/7] x86/kvm/mmu: get rid of redundant kvm_mmu_setup()
  2018-07-20 13:26 [PATCH RFC 0/7] x86/kvm/nVMX: optimize MMU switch between L1 and L2 Vitaly Kuznetsov
                   ` (3 preceding siblings ...)
  2018-07-20 13:26 ` [PATCH RFC 4/7] x86/kvm/mmu: introduce guest_mmu Vitaly Kuznetsov
@ 2018-07-20 13:26 ` Vitaly Kuznetsov
  2018-07-20 13:26 ` [PATCH RFC 6/7] x86/kvm/nVMX: introduce scache for kvm_init_shadow_ept_mmu Vitaly Kuznetsov
  2018-07-20 13:26 ` [PATCH RFC 7/7] x86/kvm/nVMX: optimize MMU switch from nested_vmx_load_cr3() Vitaly Kuznetsov
  6 siblings, 0 replies; 8+ messages in thread
From: Vitaly Kuznetsov @ 2018-07-20 13:26 UTC (permalink / raw)
  To: kvm
  Cc: Paolo Bonzini, Radim Krčmář,
	Jim Mattson, Liran Alon, linux-kernel

kvm_mmu_setup() is used only once in kvm_arch_vcpu_setup() and
kvm_mmu_reset_context() is good enough: we have no root to free so the end
result is not any different. init_kvm_mmu() can now be dropped too.

This is a preparatory change to optimizing kvm_mmu_reset_context().

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  1 -
 arch/x86/kvm/mmu.c              | 17 +++--------------
 arch/x86/kvm/x86.c              |  2 +-
 3 files changed, 4 insertions(+), 16 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 3d0d26142619..2c0b493e09f7 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1128,7 +1128,6 @@ void kvm_mmu_module_exit(void);
 
 void kvm_mmu_destroy(struct kvm_vcpu *vcpu);
 int kvm_mmu_create(struct kvm_vcpu *vcpu);
-void kvm_mmu_setup(struct kvm_vcpu *vcpu);
 void kvm_mmu_init_vm(struct kvm *kvm);
 void kvm_mmu_uninit_vm(struct kvm *kvm);
 void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 76849eda1a5b..fb6652643b15 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -4633,8 +4633,10 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
 	update_last_nonleaf_level(vcpu, g_context);
 }
 
-static void init_kvm_mmu(struct kvm_vcpu *vcpu)
+void kvm_mmu_reset_context(struct kvm_vcpu *vcpu)
 {
+	kvm_mmu_unload(vcpu);
+
 	if (mmu_is_nested(vcpu))
 		init_kvm_nested_mmu(vcpu);
 	else if (tdp_enabled)
@@ -4642,12 +4644,6 @@ static void init_kvm_mmu(struct kvm_vcpu *vcpu)
 	else
 		init_kvm_softmmu(vcpu);
 }
-
-void kvm_mmu_reset_context(struct kvm_vcpu *vcpu)
-{
-	kvm_mmu_unload(vcpu);
-	init_kvm_mmu(vcpu);
-}
 EXPORT_SYMBOL_GPL(kvm_mmu_reset_context);
 
 int kvm_mmu_load(struct kvm_vcpu *vcpu)
@@ -5062,13 +5058,6 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu)
 	return alloc_mmu_pages(vcpu);
 }
 
-void kvm_mmu_setup(struct kvm_vcpu *vcpu)
-{
-	MMU_WARN_ON(VALID_PAGE(vcpu->arch.mmu->root_hpa));
-
-	init_kvm_mmu(vcpu);
-}
-
 static void kvm_mmu_invalidate_zap_pages_in_memslot(struct kvm *kvm,
 			struct kvm_memory_slot *slot,
 			struct kvm_page_track_notifier_node *node)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a98fafd6ac24..5510a7f50195 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8333,7 +8333,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 	kvm_vcpu_mtrr_init(vcpu);
 	vcpu_load(vcpu);
 	kvm_vcpu_reset(vcpu, false);
-	kvm_mmu_setup(vcpu);
+	kvm_mmu_reset_context(vcpu);
 	vcpu_put(vcpu);
 	return 0;
 }
-- 
2.14.4


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH RFC 6/7] x86/kvm/nVMX: introduce scache for kvm_init_shadow_ept_mmu
  2018-07-20 13:26 [PATCH RFC 0/7] x86/kvm/nVMX: optimize MMU switch between L1 and L2 Vitaly Kuznetsov
                   ` (4 preceding siblings ...)
  2018-07-20 13:26 ` [PATCH RFC 5/7] x86/kvm/mmu: get rid of redundant kvm_mmu_setup() Vitaly Kuznetsov
@ 2018-07-20 13:26 ` Vitaly Kuznetsov
  2018-07-20 13:26 ` [PATCH RFC 7/7] x86/kvm/nVMX: optimize MMU switch from nested_vmx_load_cr3() Vitaly Kuznetsov
  6 siblings, 0 replies; 8+ messages in thread
From: Vitaly Kuznetsov @ 2018-07-20 13:26 UTC (permalink / raw)
  To: kvm
  Cc: Paolo Bonzini, Radim Krčmář,
	Jim Mattson, Liran Alon, linux-kernel

MMU re-initialization is expensive, in particular,
update_permission_bitmask() and update_pkru_bitmask() are.

Cache the data used to setup shadow EPT MMU and avoid full re-init when
it is unchanged.

kvm_mmu_free_roots() can be dropped from nested_ept_init_mmu_context()
as we always do kvm_mmu_reset_context() in nested_vmx_load_cr3().

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 15 +++++++++++++
 arch/x86/kvm/mmu.c              | 50 ++++++++++++++++++++++++++++++++++++++++-
 arch/x86/kvm/vmx.c              |  5 +++--
 3 files changed, 67 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 2c0b493e09f7..fa73cf13c4d0 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -325,6 +325,19 @@ struct rsvd_bits_validate {
 	u64 bad_mt_xwr;
 };
 
+/* Source data used to setup MMU */
+struct kvm_mmu_sdata_cache {
+	unsigned int valid:1;
+	unsigned int ept_ad:1;
+	unsigned int execonly:1;
+	unsigned int cr0_wp:1;
+	unsigned int cr4_pae:1;
+	unsigned int cr4_pse:1;
+	unsigned int cr4_pke:1;
+	unsigned int cr4_smap:1;
+	unsigned int cr4_smep:1;
+};
+
 /*
  * x86 supports 4 paging modes (5-level 64-bit, 4-level 64-bit, 3-level 32-bit,
  * and 2-level 32-bit).  The kvm_mmu structure abstracts the details of the
@@ -387,6 +400,8 @@ struct kvm_mmu {
 	bool nx;
 
 	u64 pdptrs[4]; /* pae */
+
+	struct kvm_mmu_sdata_cache scache;
 };
 
 enum pmc_type {
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index fb6652643b15..eed1773453cd 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -4548,12 +4548,60 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu);
 
+static inline bool shadow_ept_mmu_update_needed(struct kvm_vcpu *vcpu,
+					bool execonly, bool accessed_dirty)
+{
+	struct kvm_mmu *context = vcpu->arch.mmu;
+	bool cr4_smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP) != 0;
+	bool cr4_smap = kvm_read_cr4_bits(vcpu, X86_CR4_SMAP) != 0;
+	bool cr4_pke = kvm_read_cr4_bits(vcpu, X86_CR4_PKE) != 0;
+	bool cr0_wp = is_write_protection(vcpu);
+	bool cr4_pse = is_pse(vcpu);
+	bool res = false;
+
+	if (!context->scache.valid) {
+		res = true;
+		context->scache.valid = 1;
+	}
+	if (context->scache.ept_ad != accessed_dirty) {
+		context->scache.ept_ad = accessed_dirty;
+		res = true;
+	}
+	if (context->scache.execonly != execonly) {
+		res = true;
+		context->scache.execonly = execonly;
+	}
+	if (context->scache.cr4_smap != cr4_smap) {
+		res = true;
+		context->scache.cr4_smap = cr4_smap;
+	}
+	if (context->scache.cr4_smep != cr4_smep) {
+		res = true;
+		context->scache.cr4_smep = cr4_smep;
+	}
+	if (context->scache.cr4_pse != cr4_pse) {
+		res = true;
+		context->scache.cr4_pse = cr4_pse;
+	}
+	if (context->scache.cr4_pke != cr4_pke) {
+		res = true;
+		context->scache.cr4_pke = cr4_pke;
+	}
+	if (context->scache.cr0_wp != cr0_wp) {
+		res = true;
+		context->scache.cr0_wp = cr0_wp;
+	}
+
+	return res;
+}
+
 void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
 			     bool accessed_dirty)
 {
 	struct kvm_mmu *context = vcpu->arch.mmu;
 
-	MMU_WARN_ON(VALID_PAGE(context->root_hpa));
+	if (!shadow_ept_mmu_update_needed(vcpu, execonly, accessed_dirty))
+		return;
 
 	context->shadow_root_level = PT64_ROOT_4LEVEL;
 
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 5feb52991065..3467665a75d5 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -10577,12 +10577,13 @@ static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu)
 
 static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
 {
+	unsigned long cr3 = nested_ept_get_cr3(vcpu);
+
 	WARN_ON(mmu_is_nested(vcpu));
-	if (!valid_ept_address(vcpu, nested_ept_get_cr3(vcpu)))
+	if (!valid_ept_address(vcpu, cr3))
 		return 1;
 
 	vcpu->arch.mmu = &vcpu->arch.guest_mmu;
-	kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu);
 
 	kvm_init_shadow_ept_mmu(vcpu,
 			to_vmx(vcpu)->nested.msrs.ept_caps &
-- 
2.14.4


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH RFC 7/7] x86/kvm/nVMX: optimize MMU switch from nested_vmx_load_cr3()
  2018-07-20 13:26 [PATCH RFC 0/7] x86/kvm/nVMX: optimize MMU switch between L1 and L2 Vitaly Kuznetsov
                   ` (5 preceding siblings ...)
  2018-07-20 13:26 ` [PATCH RFC 6/7] x86/kvm/nVMX: introduce scache for kvm_init_shadow_ept_mmu Vitaly Kuznetsov
@ 2018-07-20 13:26 ` Vitaly Kuznetsov
  6 siblings, 0 replies; 8+ messages in thread
From: Vitaly Kuznetsov @ 2018-07-20 13:26 UTC (permalink / raw)
  To: kvm
  Cc: Paolo Bonzini, Radim Krčmář,
	Jim Mattson, Liran Alon, linux-kernel

Now we have everything in place to stop doing MMU reload when we switch
from L1 to L2 and back. Generalize shadow_ept_mmu_update_needed() making it
suitable for kvm_mmu_reset_context().

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 10 +++++-
 arch/x86/kvm/cpuid.c            |  2 +-
 arch/x86/kvm/mmu.c              | 74 +++++++++++++++++++++++++++++++----------
 arch/x86/kvm/svm.c              |  6 ++--
 arch/x86/kvm/vmx.c              |  7 ++--
 arch/x86/kvm/x86.c              | 14 ++++----
 6 files changed, 81 insertions(+), 32 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index fa73cf13c4d0..63ad28c40c1d 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -327,15 +327,22 @@ struct rsvd_bits_validate {
 
 /* Source data used to setup MMU */
 struct kvm_mmu_sdata_cache {
+	unsigned long cr3;
+
 	unsigned int valid:1;
+	unsigned int smm:1;
 	unsigned int ept_ad:1;
 	unsigned int execonly:1;
+	unsigned int cr0_pg:1;
 	unsigned int cr0_wp:1;
 	unsigned int cr4_pae:1;
 	unsigned int cr4_pse:1;
 	unsigned int cr4_pke:1;
 	unsigned int cr4_smap:1;
 	unsigned int cr4_smep:1;
+	unsigned int cr4_la57:1;
+	unsigned int efer_lma:1;
+	unsigned int efer_nx:1;
 };
 
 /*
@@ -1149,7 +1156,8 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
 		u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 p_mask,
 		u64 acc_track_mask, u64 me_mask);
 
-void kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
+void kvm_mmu_reset_context(struct kvm_vcpu *vcpu, bool check_if_unchanged);
+
 void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
 				      struct kvm_memory_slot *memslot);
 void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 7e042e3d47fd..b0efd08075d8 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -142,7 +142,7 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
 
 	/* Update physical-address width */
 	vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
-	kvm_mmu_reset_context(vcpu);
+	kvm_mmu_reset_context(vcpu, false);
 
 	kvm_pmu_refresh(vcpu);
 	return 0;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index eed1773453cd..9c08ee2e517a 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -4470,10 +4470,8 @@ static void paging32E_init_context(struct kvm_vcpu *vcpu,
 	paging64_init_context_common(vcpu, context, PT32E_ROOT_LEVEL);
 }
 
-static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
+static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
 {
-	struct kvm_mmu *context = vcpu->arch.mmu;
-
 	context->base_role.word = 0;
 	context->base_role.guest_mode = is_guest_mode(vcpu);
 	context->base_role.smm = is_smm(vcpu);
@@ -4548,21 +4546,30 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu);
 
-static inline bool shadow_ept_mmu_update_needed(struct kvm_vcpu *vcpu,
-					bool execonly, bool accessed_dirty)
+static inline bool mmu_update_needed(struct kvm_vcpu *vcpu,
+				     struct kvm_mmu *context,
+				     bool execonly, bool accessed_dirty)
 {
-	struct kvm_mmu *context = vcpu->arch.mmu;
 	bool cr4_smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP) != 0;
 	bool cr4_smap = kvm_read_cr4_bits(vcpu, X86_CR4_SMAP) != 0;
 	bool cr4_pke = kvm_read_cr4_bits(vcpu, X86_CR4_PKE) != 0;
-	bool cr0_wp = is_write_protection(vcpu);
+	bool cr4_la57 = kvm_read_cr4_bits(vcpu, X86_CR4_LA57) != 0;
 	bool cr4_pse = is_pse(vcpu);
+	bool cr0_wp = is_write_protection(vcpu);
+	bool cr0_pg = is_paging(vcpu);
+	bool efer_nx = is_nx(vcpu);
+	bool efer_lma = is_long_mode(vcpu);
+	bool smm = is_smm(vcpu);
 	bool res = false;
 
 	if (!context->scache.valid) {
 		res = true;
 		context->scache.valid = 1;
 	}
+	if (context->scache.smm != smm) {
+		context->scache.smm = smm;
+		res = true;
+	}
 	if (context->scache.ept_ad != accessed_dirty) {
 		context->scache.ept_ad = accessed_dirty;
 		res = true;
@@ -4587,10 +4594,26 @@ static inline bool shadow_ept_mmu_update_needed(struct kvm_vcpu *vcpu,
 		res = true;
 		context->scache.cr4_pke = cr4_pke;
 	}
+	if (context->scache.cr4_la57 != cr4_la57) {
+		res = true;
+		context->scache.cr4_la57 = cr4_la57;
+	}
 	if (context->scache.cr0_wp != cr0_wp) {
 		res = true;
 		context->scache.cr0_wp = cr0_wp;
 	}
+	if (context->scache.cr0_pg != cr0_pg) {
+		res = true;
+		context->scache.cr0_pg = cr0_pg;
+	}
+	if (context->scache.efer_nx != efer_nx) {
+		res = true;
+		context->scache.efer_nx = efer_nx;
+	}
+	if (context->scache.efer_lma != efer_lma) {
+		res = true;
+		context->scache.efer_lma = efer_lma;
+	}
 
 	return res;
 }
@@ -4600,7 +4623,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
 {
 	struct kvm_mmu *context = vcpu->arch.mmu;
 
-	if (!shadow_ept_mmu_update_needed(vcpu, execonly, accessed_dirty))
+	if (!mmu_update_needed(vcpu, context, execonly, accessed_dirty))
 		return;
 
 	context->shadow_root_level = PT64_ROOT_4LEVEL;
@@ -4627,10 +4650,8 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
 }
 EXPORT_SYMBOL_GPL(kvm_init_shadow_ept_mmu);
 
-static void init_kvm_softmmu(struct kvm_vcpu *vcpu)
+static void init_kvm_softmmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
 {
-	struct kvm_mmu *context = vcpu->arch.mmu;
-
 	kvm_init_shadow_mmu(vcpu);
 	context->set_cr3           = kvm_x86_ops->set_cr3;
 	context->get_cr3           = get_cr3;
@@ -4638,10 +4659,9 @@ static void init_kvm_softmmu(struct kvm_vcpu *vcpu)
 	context->inject_page_fault = kvm_inject_page_fault;
 }
 
-static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
+static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu,
+				struct kvm_mmu *g_context)
 {
-	struct kvm_mmu *g_context = &vcpu->arch.nested_mmu;
-
 	g_context->get_cr3           = get_cr3;
 	g_context->get_pdptr         = kvm_pdptr_read;
 	g_context->inject_page_fault = kvm_inject_page_fault;
@@ -4681,16 +4701,34 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
 	update_last_nonleaf_level(vcpu, g_context);
 }
 
-void kvm_mmu_reset_context(struct kvm_vcpu *vcpu)
+void kvm_mmu_reset_context(struct kvm_vcpu *vcpu, bool check_if_unchanged)
 {
+	struct kvm_mmu *context = mmu_is_nested(vcpu) ?
+		&vcpu->arch.nested_mmu : vcpu->arch.mmu;
+
+	if (check_if_unchanged && !mmu_update_needed(vcpu, context, 0, 0) &&
+	    context->scache.cr3 == vcpu->arch.mmu->get_cr3(vcpu)) {
+		/*
+		 * Nothing changed but TLB should always be flushed, e.g. when
+		 * we switch between L1 and L2.
+		 */
+		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+		return;
+	} else if (!check_if_unchanged) {
+		context->scache.valid = 0;
+	}
+
 	kvm_mmu_unload(vcpu);
 
 	if (mmu_is_nested(vcpu))
-		init_kvm_nested_mmu(vcpu);
+		init_kvm_nested_mmu(vcpu, context);
 	else if (tdp_enabled)
-		init_kvm_tdp_mmu(vcpu);
+		init_kvm_tdp_mmu(vcpu, context);
 	else
-		init_kvm_softmmu(vcpu);
+		init_kvm_softmmu(vcpu, context);
+
+	if (check_if_unchanged)
+		context->scache.cr3 = vcpu->arch.mmu->get_cr3(vcpu);
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_reset_context);
 
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 3b3b9839c2b5..6c1db96971c0 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1574,7 +1574,7 @@ static void init_vmcb(struct vcpu_svm *svm)
 	 * It also updates the guest-visible cr0 value.
 	 */
 	svm_set_cr0(&svm->vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET);
-	kvm_mmu_reset_context(&svm->vcpu);
+	kvm_mmu_reset_context(&svm->vcpu, false);
 
 	save->cr4 = X86_CR4_PAE;
 	/* rdx = ?? */
@@ -3380,7 +3380,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
 	nested_svm_unmap(page);
 
 	nested_svm_uninit_mmu_context(&svm->vcpu);
-	kvm_mmu_reset_context(&svm->vcpu);
+	kvm_mmu_reset_context(&svm->vcpu, false);
 	kvm_mmu_load(&svm->vcpu);
 
 	return 0;
@@ -3466,7 +3466,7 @@ static void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
 		(void)kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3);
 
 	/* Guest paging mode is active - reset mmu */
-	kvm_mmu_reset_context(&svm->vcpu);
+	kvm_mmu_reset_context(&svm->vcpu, false);
 
 	svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2;
 	kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, nested_vmcb->save.rax);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 3467665a75d5..a85ed004a4ba 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -4696,7 +4696,7 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
 	fix_rmode_seg(VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]);
 	fix_rmode_seg(VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]);
 
-	kvm_mmu_reset_context(vcpu);
+	kvm_mmu_reset_context(vcpu, false);
 }
 
 static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
@@ -11123,6 +11123,8 @@ static bool nested_cr3_valid(struct kvm_vcpu *vcpu, unsigned long val)
 static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool nested_ept,
 			       u32 *entry_failure_code)
 {
+	bool mmu_reset_force = false;
+
 	if (cr3 != kvm_read_cr3(vcpu) || (!nested_ept && pdptrs_changed(vcpu))) {
 		if (!nested_cr3_valid(vcpu, cr3)) {
 			*entry_failure_code = ENTRY_FAIL_DEFAULT;
@@ -11135,6 +11137,7 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool ne
 		 */
 		if (!is_long_mode(vcpu) && is_pae(vcpu) && is_paging(vcpu) &&
 		    !nested_ept) {
+			mmu_reset_force = true;
 			if (!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)) {
 				*entry_failure_code = ENTRY_FAIL_PDPTE;
 				return 1;
@@ -11145,7 +11148,7 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool ne
 		__set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
 	}
 
-	kvm_mmu_reset_context(vcpu);
+	kvm_mmu_reset_context(vcpu, !mmu_reset_force);
 	return 0;
 }
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5510a7f50195..3288a7e303ec 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -695,7 +695,7 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 	}
 
 	if ((cr0 ^ old_cr0) & update_bits)
-		kvm_mmu_reset_context(vcpu);
+		kvm_mmu_reset_context(vcpu, false);
 
 	if (((cr0 ^ old_cr0) & X86_CR0_CD) &&
 	    kvm_arch_has_noncoherent_dma(vcpu->kvm) &&
@@ -836,7 +836,7 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 
 	if (((cr4 ^ old_cr4) & pdptr_bits) ||
 	    (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
-		kvm_mmu_reset_context(vcpu);
+		kvm_mmu_reset_context(vcpu, false);
 
 	if ((cr4 ^ old_cr4) & (X86_CR4_OSXSAVE | X86_CR4_PKE))
 		kvm_update_cpuid(vcpu);
@@ -1162,7 +1162,7 @@ static int set_efer(struct kvm_vcpu *vcpu, u64 efer)
 
 	/* Update reserved bits */
 	if ((efer ^ old_efer) & EFER_NX)
-		kvm_mmu_reset_context(vcpu);
+		kvm_mmu_reset_context(vcpu, false);
 
 	return 0;
 }
@@ -5898,7 +5898,7 @@ static void kvm_smm_changed(struct kvm_vcpu *vcpu)
 		kvm_make_request(KVM_REQ_EVENT, vcpu);
 	}
 
-	kvm_mmu_reset_context(vcpu);
+	kvm_mmu_reset_context(vcpu, false);
 }
 
 static void kvm_set_hflags(struct kvm_vcpu *vcpu, unsigned emul_flags)
@@ -7156,7 +7156,7 @@ static void enter_smm(struct kvm_vcpu *vcpu)
 		kvm_x86_ops->set_efer(vcpu, 0);
 
 	kvm_update_cpuid(vcpu);
-	kvm_mmu_reset_context(vcpu);
+	kvm_mmu_reset_context(vcpu, false);
 }
 
 static void process_smi(struct kvm_vcpu *vcpu)
@@ -8058,7 +8058,7 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
 	srcu_read_unlock(&vcpu->kvm->srcu, idx);
 
 	if (mmu_reset_needed)
-		kvm_mmu_reset_context(vcpu);
+		kvm_mmu_reset_context(vcpu, false);
 
 	max_bits = KVM_NR_INTERRUPTS;
 	pending_vec = find_first_bit(
@@ -8333,7 +8333,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 	kvm_vcpu_mtrr_init(vcpu);
 	vcpu_load(vcpu);
 	kvm_vcpu_reset(vcpu, false);
-	kvm_mmu_reset_context(vcpu);
+	kvm_mmu_reset_context(vcpu, false);
 	vcpu_put(vcpu);
 	return 0;
 }
-- 
2.14.4


^ permalink raw reply related	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2018-07-20 13:27 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-07-20 13:26 [PATCH RFC 0/7] x86/kvm/nVMX: optimize MMU switch between L1 and L2 Vitaly Kuznetsov
2018-07-20 13:26 ` [PATCH RFC 1/7] x86/kvm/mmu: make vcpu->mmu a pointer to the current MMU Vitaly Kuznetsov
2018-07-20 13:26 ` [PATCH RFC 2/7] x86/kvm/mmu.c: set get_pdptr hook in kvm_init_shadow_ept_mmu() Vitaly Kuznetsov
2018-07-20 13:26 ` [PATCH RFC 3/7] x86/kvm/mmu.c: add kvm_mmu parameter to kvm_mmu_free_roots() Vitaly Kuznetsov
2018-07-20 13:26 ` [PATCH RFC 4/7] x86/kvm/mmu: introduce guest_mmu Vitaly Kuznetsov
2018-07-20 13:26 ` [PATCH RFC 5/7] x86/kvm/mmu: get rid of redundant kvm_mmu_setup() Vitaly Kuznetsov
2018-07-20 13:26 ` [PATCH RFC 6/7] x86/kvm/nVMX: introduce scache for kvm_init_shadow_ept_mmu Vitaly Kuznetsov
2018-07-20 13:26 ` [PATCH RFC 7/7] x86/kvm/nVMX: optimize MMU switch from nested_vmx_load_cr3() Vitaly Kuznetsov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).