All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH v2 03/31] KVM: arm/arm64: Remove unused params in mmu functions
@ 2017-10-03  3:10 ` Jintack Lim
  0 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:10 UTC (permalink / raw)
  To: christoffer.dall, marc.zyngier, kvmarm
  Cc: jintack, pbonzini, rkrcmar, catalin.marinas, will.deacon, linux,
	mark.rutland, linux-arm-kernel, kvm, linux-kernel, Jintack Lim

From: Christoffer Dall <christoffer.dall@linaro.org>

stage2_flush_xxx functions take a pointer to the kvm struct as the first
parameter but they are never used. Clean this up before modifying mmu
code for nested virtualization support.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---
 virt/kvm/arm/mmu.c | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index f2d5b6c..0a5f5ca 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -315,8 +315,7 @@ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
 	} while (pgd++, addr = next, addr != end);
 }
 
-static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd,
-			      phys_addr_t addr, phys_addr_t end)
+static void stage2_flush_ptes(pmd_t *pmd, phys_addr_t addr, phys_addr_t end)
 {
 	pte_t *pte;
 
@@ -327,8 +326,7 @@ static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd,
 	} while (pte++, addr += PAGE_SIZE, addr != end);
 }
 
-static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud,
-			      phys_addr_t addr, phys_addr_t end)
+static void stage2_flush_pmds(pud_t *pud, phys_addr_t addr, phys_addr_t end)
 {
 	pmd_t *pmd;
 	phys_addr_t next;
@@ -340,13 +338,12 @@ static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud,
 			if (pmd_thp_or_huge(*pmd))
 				kvm_flush_dcache_pmd(*pmd);
 			else
-				stage2_flush_ptes(kvm, pmd, addr, next);
+				stage2_flush_ptes(pmd, addr, next);
 		}
 	} while (pmd++, addr = next, addr != end);
 }
 
-static void stage2_flush_puds(struct kvm *kvm, pgd_t *pgd,
-			      phys_addr_t addr, phys_addr_t end)
+static void stage2_flush_puds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end)
 {
 	pud_t *pud;
 	phys_addr_t next;
@@ -358,7 +355,7 @@ static void stage2_flush_puds(struct kvm *kvm, pgd_t *pgd,
 			if (stage2_pud_huge(*pud))
 				kvm_flush_dcache_pud(*pud);
 			else
-				stage2_flush_pmds(kvm, pud, addr, next);
+				stage2_flush_pmds(pud, addr, next);
 		}
 	} while (pud++, addr = next, addr != end);
 }
@@ -374,7 +371,7 @@ static void stage2_flush_memslot(struct kvm *kvm,
 	pgd = kvm->arch.pgd + stage2_pgd_index(addr);
 	do {
 		next = stage2_pgd_addr_end(addr, end);
-		stage2_flush_puds(kvm, pgd, addr, next);
+		stage2_flush_puds(pgd, addr, next);
 	} while (pgd++, addr = next, addr != end);
 }
 
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 03/31] KVM: arm/arm64: Remove unused params in mmu functions
@ 2017-10-03  3:10 ` Jintack Lim
  0 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:10 UTC (permalink / raw)
  To: linux-arm-kernel

From: Christoffer Dall <christoffer.dall@linaro.org>

stage2_flush_xxx functions take a pointer to the kvm struct as the first
parameter but they are never used. Clean this up before modifying mmu
code for nested virtualization support.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---
 virt/kvm/arm/mmu.c | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index f2d5b6c..0a5f5ca 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -315,8 +315,7 @@ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
 	} while (pgd++, addr = next, addr != end);
 }
 
-static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd,
-			      phys_addr_t addr, phys_addr_t end)
+static void stage2_flush_ptes(pmd_t *pmd, phys_addr_t addr, phys_addr_t end)
 {
 	pte_t *pte;
 
@@ -327,8 +326,7 @@ static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd,
 	} while (pte++, addr += PAGE_SIZE, addr != end);
 }
 
-static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud,
-			      phys_addr_t addr, phys_addr_t end)
+static void stage2_flush_pmds(pud_t *pud, phys_addr_t addr, phys_addr_t end)
 {
 	pmd_t *pmd;
 	phys_addr_t next;
@@ -340,13 +338,12 @@ static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud,
 			if (pmd_thp_or_huge(*pmd))
 				kvm_flush_dcache_pmd(*pmd);
 			else
-				stage2_flush_ptes(kvm, pmd, addr, next);
+				stage2_flush_ptes(pmd, addr, next);
 		}
 	} while (pmd++, addr = next, addr != end);
 }
 
-static void stage2_flush_puds(struct kvm *kvm, pgd_t *pgd,
-			      phys_addr_t addr, phys_addr_t end)
+static void stage2_flush_puds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end)
 {
 	pud_t *pud;
 	phys_addr_t next;
@@ -358,7 +355,7 @@ static void stage2_flush_puds(struct kvm *kvm, pgd_t *pgd,
 			if (stage2_pud_huge(*pud))
 				kvm_flush_dcache_pud(*pud);
 			else
-				stage2_flush_pmds(kvm, pud, addr, next);
+				stage2_flush_pmds(pud, addr, next);
 		}
 	} while (pud++, addr = next, addr != end);
 }
@@ -374,7 +371,7 @@ static void stage2_flush_memslot(struct kvm *kvm,
 	pgd = kvm->arch.pgd + stage2_pgd_index(addr);
 	do {
 		next = stage2_pgd_addr_end(addr, end);
-		stage2_flush_puds(kvm, pgd, addr, next);
+		stage2_flush_puds(pgd, addr, next);
 	} while (pgd++, addr = next, addr != end);
 }
 
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 04/31] KVM: arm/arm64: Abstract stage-2 MMU state into a separate structure
  2017-10-03  3:10 ` Jintack Lim
  (?)
@ 2017-10-03  3:10   ` Jintack Lim
  -1 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:10 UTC (permalink / raw)
  To: christoffer.dall, marc.zyngier, kvmarm
  Cc: jintack, pbonzini, rkrcmar, catalin.marinas, will.deacon, linux,
	mark.rutland, linux-arm-kernel, kvm, linux-kernel, Jintack Lim

From: Christoffer Dall <christoffer.dall@linaro.org>

Abstract stage-2 MMU state into a separate structure and change all
callers referring to page tables, VMIDs, and the VTTBR to use this new
indirection.

This is about to become very handy when using shadow stage-2 page
tables.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---
 arch/arm/include/asm/kvm_asm.h    |   7 +-
 arch/arm/include/asm/kvm_host.h   |  26 +++++---
 arch/arm/kvm/hyp/switch.c         |   5 +-
 arch/arm/kvm/hyp/tlb.c            |  18 ++---
 arch/arm64/include/asm/kvm_asm.h  |   7 +-
 arch/arm64/include/asm/kvm_host.h |  10 ++-
 arch/arm64/kvm/hyp/switch.c       |   5 +-
 arch/arm64/kvm/hyp/tlb.c          |  38 +++++------
 virt/kvm/arm/arm.c                |  34 +++++-----
 virt/kvm/arm/mmu.c                | 137 +++++++++++++++++++++-----------------
 10 files changed, 163 insertions(+), 124 deletions(-)

diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
index 14d68a4..71b7255 100644
--- a/arch/arm/include/asm/kvm_asm.h
+++ b/arch/arm/include/asm/kvm_asm.h
@@ -57,6 +57,7 @@
 #ifndef __ASSEMBLY__
 struct kvm;
 struct kvm_vcpu;
+struct kvm_s2_mmu;
 
 extern char __kvm_hyp_init[];
 extern char __kvm_hyp_init_end[];
@@ -64,9 +65,9 @@
 extern char __kvm_hyp_vector[];
 
 extern void __kvm_flush_vm_context(void);
-extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
-extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
-extern void __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu);
+extern void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa);
+extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu);
+extern void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu);
 
 extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
 
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 7e9e6c8..78d826e 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -53,9 +53,21 @@
 int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
 void kvm_reset_coprocs(struct kvm_vcpu *vcpu);
 
-struct kvm_arch {
-	/* VTTBR value associated with below pgd and vmid */
+struct kvm_s2_mmu {
+	/* The VMID generation used for the virt. memory system */
+	u64    vmid_gen;
+	u32    vmid;
+
+	/* Stage-2 page table */
+	pgd_t *pgd;
+
+	/* VTTBR value associated with above pgd and vmid */
 	u64    vttbr;
+};
+
+struct kvm_arch {
+	/* Stage 2 paging state for the VM */
+	struct kvm_s2_mmu mmu;
 
 	/* The last vcpu id that ran on each physical CPU */
 	int __percpu *last_vcpu_ran;
@@ -65,13 +77,6 @@ struct kvm_arch {
 	 * here.
 	 */
 
-	/* The VMID generation used for the virt. memory system */
-	u64    vmid_gen;
-	u32    vmid;
-
-	/* Stage-2 page table */
-	pgd_t *pgd;
-
 	/* Interrupt controller */
 	struct vgic_dist	vgic;
 	int max_vcpus;
@@ -185,6 +190,9 @@ struct kvm_vcpu_arch {
 
 	/* Detect first run of a vcpu */
 	bool has_run_once;
+
+	/* Stage 2 paging state used by the hardware on next switch */
+	struct kvm_s2_mmu *hw_mmu;
 };
 
 struct kvm_vm_stat {
diff --git a/arch/arm/kvm/hyp/switch.c b/arch/arm/kvm/hyp/switch.c
index ebd2dd4..4814671 100644
--- a/arch/arm/kvm/hyp/switch.c
+++ b/arch/arm/kvm/hyp/switch.c
@@ -75,8 +75,9 @@ static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu)
 
 static void __hyp_text __activate_vm(struct kvm_vcpu *vcpu)
 {
-	struct kvm *kvm = kern_hyp_va(vcpu->kvm);
-	write_sysreg(kvm->arch.vttbr, VTTBR);
+	struct kvm_s2_mmu *mmu = kern_hyp_va(vcpu->arch.hw_mmu);
+
+	write_sysreg(mmu->vttbr, VTTBR);
 	write_sysreg(vcpu->arch.midr, VPIDR);
 }
 
diff --git a/arch/arm/kvm/hyp/tlb.c b/arch/arm/kvm/hyp/tlb.c
index 6d810af..56f0a49 100644
--- a/arch/arm/kvm/hyp/tlb.c
+++ b/arch/arm/kvm/hyp/tlb.c
@@ -34,13 +34,13 @@
  * As v7 does not support flushing per IPA, just nuke the whole TLB
  * instead, ignoring the ipa value.
  */
-void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm)
+void __hyp_text __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
 {
 	dsb(ishst);
 
 	/* Switch to requested VMID */
-	kvm = kern_hyp_va(kvm);
-	write_sysreg(kvm->arch.vttbr, VTTBR);
+	mmu = kern_hyp_va(mmu);
+	write_sysreg(mmu->vttbr, VTTBR);
 	isb();
 
 	write_sysreg(0, TLBIALLIS);
@@ -50,17 +50,17 @@ void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm)
 	write_sysreg(0, VTTBR);
 }
 
-void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
+void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu,
+					 phys_addr_t ipa)
 {
-	__kvm_tlb_flush_vmid(kvm);
+	__kvm_tlb_flush_vmid(mmu);
 }
 
-void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu)
+void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu)
 {
-	struct kvm *kvm = kern_hyp_va(kern_hyp_va(vcpu)->kvm);
-
 	/* Switch to requested VMID */
-	write_sysreg(kvm->arch.vttbr, VTTBR);
+	mmu = kern_hyp_va(mmu);
+	write_sysreg(mmu->vttbr, VTTBR);
 	isb();
 
 	write_sysreg(0, TLBIALL);
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 26a64d0..ff6244f 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -44,6 +44,7 @@
 #ifndef __ASSEMBLY__
 struct kvm;
 struct kvm_vcpu;
+struct kvm_s2_mmu;
 
 extern char __kvm_hyp_init[];
 extern char __kvm_hyp_init_end[];
@@ -51,9 +52,9 @@
 extern char __kvm_hyp_vector[];
 
 extern void __kvm_flush_vm_context(void);
-extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
-extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
-extern void __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu);
+extern void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa);
+extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu);
+extern void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu);
 
 extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
 
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 373235c..e7e9f70 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -50,7 +50,7 @@
 int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext);
 void __extended_idmap_trampoline(phys_addr_t boot_pgd, phys_addr_t idmap_start);
 
-struct kvm_arch {
+struct kvm_s2_mmu {
 	/* The VMID generation used for the virt. memory system */
 	u64    vmid_gen;
 	u32    vmid;
@@ -61,6 +61,11 @@ struct kvm_arch {
 
 	/* VTTBR value associated with above pgd and vmid */
 	u64    vttbr;
+};
+
+struct kvm_arch {
+	/* Stage 2 paging state for the VM */
+	struct kvm_s2_mmu mmu;
 
 	/* The last vcpu id that ran on each physical CPU */
 	int __percpu *last_vcpu_ran;
@@ -329,6 +334,9 @@ struct kvm_vcpu_arch {
 
 	/* Detect first run of a vcpu */
 	bool has_run_once;
+
+	/* Stage 2 paging state used by the hardware on next switch */
+	struct kvm_s2_mmu *hw_mmu;
 };
 
 #define vcpu_gp_regs(v)		(&(v)->arch.ctxt.gp_regs)
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 2a64a5c..8b1b3e9 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -181,8 +181,9 @@ static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu)
 
 static void __hyp_text __activate_vm(struct kvm_vcpu *vcpu)
 {
-	struct kvm *kvm = kern_hyp_va(vcpu->kvm);
-	write_sysreg(kvm->arch.vttbr, vttbr_el2);
+	struct kvm_s2_mmu *mmu = kern_hyp_va(vcpu->arch.hw_mmu);
+
+	write_sysreg(mmu->vttbr, vttbr_el2);
 }
 
 static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c
index 73464a9..0897678 100644
--- a/arch/arm64/kvm/hyp/tlb.c
+++ b/arch/arm64/kvm/hyp/tlb.c
@@ -18,7 +18,7 @@
 #include <asm/kvm_hyp.h>
 #include <asm/tlbflush.h>
 
-static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm)
+static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm_s2_mmu *mmu)
 {
 	u64 val;
 
@@ -29,16 +29,16 @@ static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm)
 	 * bits. Changing E2H is impossible (goodbye TTBR1_EL2), so
 	 * let's flip TGE before executing the TLB operation.
 	 */
-	write_sysreg(kvm->arch.vttbr, vttbr_el2);
+	write_sysreg(mmu->vttbr, vttbr_el2);
 	val = read_sysreg(hcr_el2);
 	val &= ~HCR_TGE;
 	write_sysreg(val, hcr_el2);
 	isb();
 }
 
-static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm *kvm)
+static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm_s2_mmu *mmu)
 {
-	write_sysreg(kvm->arch.vttbr, vttbr_el2);
+	write_sysreg(mmu->vttbr, vttbr_el2);
 	isb();
 }
 
@@ -47,7 +47,7 @@ static hyp_alternate_select(__tlb_switch_to_guest,
 			    __tlb_switch_to_guest_vhe,
 			    ARM64_HAS_VIRT_HOST_EXTN);
 
-static void __hyp_text __tlb_switch_to_host_vhe(struct kvm *kvm)
+static void __hyp_text __tlb_switch_to_host_vhe(struct kvm_s2_mmu *mmu)
 {
 	/*
 	 * We're done with the TLB operation, let's restore the host's
@@ -57,7 +57,7 @@ static void __hyp_text __tlb_switch_to_host_vhe(struct kvm *kvm)
 	write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
 }
 
-static void __hyp_text __tlb_switch_to_host_nvhe(struct kvm *kvm)
+static void __hyp_text __tlb_switch_to_host_nvhe(struct kvm_s2_mmu *mmu)
 {
 	write_sysreg(0, vttbr_el2);
 }
@@ -67,13 +67,14 @@ static hyp_alternate_select(__tlb_switch_to_host,
 			    __tlb_switch_to_host_vhe,
 			    ARM64_HAS_VIRT_HOST_EXTN);
 
-void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
+void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu,
+					 phys_addr_t ipa)
 {
 	dsb(ishst);
 
 	/* Switch to requested VMID */
-	kvm = kern_hyp_va(kvm);
-	__tlb_switch_to_guest()(kvm);
+	mmu = kern_hyp_va(mmu);
+	__tlb_switch_to_guest()(mmu);
 
 	/*
 	 * We could do so much better if we had the VA as well.
@@ -116,36 +117,35 @@ void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
 	if (!has_vhe() && icache_is_vpipt())
 		__flush_icache_all();
 
-	__tlb_switch_to_host()(kvm);
+	__tlb_switch_to_host()(mmu);
 }
 
-void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm)
+void __hyp_text __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
 {
 	dsb(ishst);
 
 	/* Switch to requested VMID */
-	kvm = kern_hyp_va(kvm);
-	__tlb_switch_to_guest()(kvm);
+	mmu = kern_hyp_va(mmu);
+	__tlb_switch_to_guest()(mmu);
 
 	__tlbi(vmalls12e1is);
 	dsb(ish);
 	isb();
 
-	__tlb_switch_to_host()(kvm);
+	__tlb_switch_to_host()(mmu);
 }
 
-void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu)
+void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu)
 {
-	struct kvm *kvm = kern_hyp_va(kern_hyp_va(vcpu)->kvm);
-
 	/* Switch to requested VMID */
-	__tlb_switch_to_guest()(kvm);
+	mmu = kern_hyp_va(mmu);
+	__tlb_switch_to_guest()(mmu);
 
 	__tlbi(vmalle1);
 	dsb(nsh);
 	isb();
 
-	__tlb_switch_to_host()(kvm);
+	__tlb_switch_to_host()(mmu);
 }
 
 void __hyp_text __kvm_flush_vm_context(void)
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 0ff2997..bee27bb 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -138,7 +138,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	kvm_vgic_early_init(kvm);
 
 	/* Mark the initial VMID generation invalid */
-	kvm->arch.vmid_gen = 0;
+	kvm->arch.mmu.vmid_gen = 0;
 
 	/* The maximum number of VCPUs is limited by the host's GIC model */
 	kvm->arch.max_vcpus = vgic_present ?
@@ -334,6 +334,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 
 	kvm_arm_reset_debug_ptr(vcpu);
 
+	vcpu->arch.hw_mmu = &vcpu->kvm->arch.mmu;
+
 	return kvm_vgic_vcpu_init(vcpu);
 }
 
@@ -348,7 +350,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 	 * over-invalidation doesn't affect correctness.
 	 */
 	if (*last_ran != vcpu->vcpu_id) {
-		kvm_call_hyp(__kvm_tlb_flush_local_vmid, vcpu);
+		kvm_call_hyp(__kvm_tlb_flush_local_vmid, &vcpu->kvm->arch.mmu);
 		*last_ran = vcpu->vcpu_id;
 	}
 
@@ -442,25 +444,26 @@ void force_vm_exit(const cpumask_t *mask)
  * VMID for the new generation, we must flush necessary caches and TLBs on all
  * CPUs.
  */
-static bool need_new_vmid_gen(struct kvm *kvm)
+static bool need_new_vmid_gen(struct kvm_s2_mmu *mmu)
 {
-	return unlikely(kvm->arch.vmid_gen != atomic64_read(&kvm_vmid_gen));
+	return unlikely(mmu->vmid_gen != atomic64_read(&kvm_vmid_gen));
 }
 
 /**
  * update_vttbr - Update the VTTBR with a valid VMID before the guest runs
- * @kvm	The guest that we are about to run
+ * @kvm: The guest that we are about to run
+ * @mmu: The stage-2 translation context to update
  *
  * Called from kvm_arch_vcpu_ioctl_run before entering the guest to ensure the
  * VM has a valid VMID, otherwise assigns a new one and flushes corresponding
  * caches and TLBs.
  */
-static void update_vttbr(struct kvm *kvm)
+static void update_vttbr(struct kvm *kvm, struct kvm_s2_mmu *mmu)
 {
 	phys_addr_t pgd_phys;
 	u64 vmid;
 
-	if (!need_new_vmid_gen(kvm))
+	if (!need_new_vmid_gen(mmu))
 		return;
 
 	spin_lock(&kvm_vmid_lock);
@@ -470,7 +473,7 @@ static void update_vttbr(struct kvm *kvm)
 	 * already allocated a valid vmid for this vm, then this vcpu should
 	 * use the same vmid.
 	 */
-	if (!need_new_vmid_gen(kvm)) {
+	if (!need_new_vmid_gen(mmu)) {
 		spin_unlock(&kvm_vmid_lock);
 		return;
 	}
@@ -494,16 +497,17 @@ static void update_vttbr(struct kvm *kvm)
 		kvm_call_hyp(__kvm_flush_vm_context);
 	}
 
-	kvm->arch.vmid_gen = atomic64_read(&kvm_vmid_gen);
-	kvm->arch.vmid = kvm_next_vmid;
+	mmu->vmid_gen = atomic64_read(&kvm_vmid_gen);
+	mmu->vmid = kvm_next_vmid;
 	kvm_next_vmid++;
 	kvm_next_vmid &= (1 << kvm_vmid_bits) - 1;
 
 	/* update vttbr to be used with the new vmid */
-	pgd_phys = virt_to_phys(kvm->arch.pgd);
+	pgd_phys = virt_to_phys(mmu->pgd);
 	BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK);
-	vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK(kvm_vmid_bits);
-	kvm->arch.vttbr = pgd_phys | vmid;
+	vmid = ((u64)(mmu->vmid) << VTTBR_VMID_SHIFT) &
+	       VTTBR_VMID_MASK(kvm_vmid_bits);
+	mmu->vttbr = pgd_phys | vmid;
 
 	spin_unlock(&kvm_vmid_lock);
 }
@@ -638,7 +642,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 		 */
 		cond_resched();
 
-		update_vttbr(vcpu->kvm);
+		update_vttbr(vcpu->kvm, vcpu->arch.hw_mmu);
 
 		check_vcpu_requests(vcpu);
 
@@ -677,7 +681,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 		 */
 		smp_store_mb(vcpu->mode, IN_GUEST_MODE);
 
-		if (ret <= 0 || need_new_vmid_gen(vcpu->kvm) ||
+		if (ret <= 0 || need_new_vmid_gen(vcpu->arch.hw_mmu) ||
 		    kvm_request_pending(vcpu)) {
 			vcpu->mode = OUTSIDE_GUEST_MODE;
 			local_irq_enable();
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index 0a5f5ca..d8ea1f9 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -64,9 +64,9 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
 	kvm_call_hyp(__kvm_tlb_flush_vmid, kvm);
 }
 
-static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
+static void kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa)
 {
-	kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa);
+	kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ipa);
 }
 
 /*
@@ -103,13 +103,14 @@ static bool kvm_is_device_pfn(unsigned long pfn)
  * Function clears a PMD entry, flushes addr 1st and 2nd stage TLBs. Marks all
  * pages in the range dirty.
  */
-static void stage2_dissolve_pmd(struct kvm *kvm, phys_addr_t addr, pmd_t *pmd)
+static void stage2_dissolve_pmd(struct kvm_s2_mmu *mmu, phys_addr_t addr,
+				pmd_t *pmd)
 {
 	if (!pmd_thp_or_huge(*pmd))
 		return;
 
 	pmd_clear(pmd);
-	kvm_tlb_flush_vmid_ipa(kvm, addr);
+	kvm_tlb_flush_vmid_ipa(mmu, addr);
 	put_page(virt_to_page(pmd));
 }
 
@@ -145,31 +146,34 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc)
 	return p;
 }
 
-static void clear_stage2_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr)
+static void clear_stage2_pgd_entry(struct kvm_s2_mmu *mmu,
+				   pgd_t *pgd, phys_addr_t addr)
 {
 	pud_t *pud_table __maybe_unused = stage2_pud_offset(pgd, 0UL);
 	stage2_pgd_clear(pgd);
-	kvm_tlb_flush_vmid_ipa(kvm, addr);
+	kvm_tlb_flush_vmid_ipa(mmu, addr);
 	stage2_pud_free(pud_table);
 	put_page(virt_to_page(pgd));
 }
 
-static void clear_stage2_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr)
+static void clear_stage2_pud_entry(struct kvm_s2_mmu *mmu,
+				   pud_t *pud, phys_addr_t addr)
 {
 	pmd_t *pmd_table __maybe_unused = stage2_pmd_offset(pud, 0);
 	VM_BUG_ON(stage2_pud_huge(*pud));
 	stage2_pud_clear(pud);
-	kvm_tlb_flush_vmid_ipa(kvm, addr);
+	kvm_tlb_flush_vmid_ipa(mmu, addr);
 	stage2_pmd_free(pmd_table);
 	put_page(virt_to_page(pud));
 }
 
-static void clear_stage2_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr)
+static void clear_stage2_pmd_entry(struct kvm_s2_mmu *mmu,
+				   pmd_t *pmd, phys_addr_t addr)
 {
 	pte_t *pte_table = pte_offset_kernel(pmd, 0);
 	VM_BUG_ON(pmd_thp_or_huge(*pmd));
 	pmd_clear(pmd);
-	kvm_tlb_flush_vmid_ipa(kvm, addr);
+	kvm_tlb_flush_vmid_ipa(mmu, addr);
 	pte_free_kernel(NULL, pte_table);
 	put_page(virt_to_page(pmd));
 }
@@ -194,7 +198,7 @@ static void clear_stage2_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr
  * the corresponding TLBs, we call kvm_flush_dcache_p*() to make sure
  * the IO subsystem will never hit in the cache.
  */
-static void unmap_stage2_ptes(struct kvm *kvm, pmd_t *pmd,
+static void unmap_stage2_ptes(struct kvm_s2_mmu *mmu, pmd_t *pmd,
 		       phys_addr_t addr, phys_addr_t end)
 {
 	phys_addr_t start_addr = addr;
@@ -206,7 +210,7 @@ static void unmap_stage2_ptes(struct kvm *kvm, pmd_t *pmd,
 			pte_t old_pte = *pte;
 
 			kvm_set_pte(pte, __pte(0));
-			kvm_tlb_flush_vmid_ipa(kvm, addr);
+			kvm_tlb_flush_vmid_ipa(mmu, addr);
 
 			/* No need to invalidate the cache for device mappings */
 			if (!kvm_is_device_pfn(pte_pfn(old_pte)))
@@ -217,10 +221,10 @@ static void unmap_stage2_ptes(struct kvm *kvm, pmd_t *pmd,
 	} while (pte++, addr += PAGE_SIZE, addr != end);
 
 	if (stage2_pte_table_empty(start_pte))
-		clear_stage2_pmd_entry(kvm, pmd, start_addr);
+		clear_stage2_pmd_entry(mmu, pmd, start_addr);
 }
 
-static void unmap_stage2_pmds(struct kvm *kvm, pud_t *pud,
+static void unmap_stage2_pmds(struct kvm_s2_mmu *mmu, pud_t *pud,
 		       phys_addr_t addr, phys_addr_t end)
 {
 	phys_addr_t next, start_addr = addr;
@@ -234,22 +238,22 @@ static void unmap_stage2_pmds(struct kvm *kvm, pud_t *pud,
 				pmd_t old_pmd = *pmd;
 
 				pmd_clear(pmd);
-				kvm_tlb_flush_vmid_ipa(kvm, addr);
+				kvm_tlb_flush_vmid_ipa(mmu, addr);
 
 				kvm_flush_dcache_pmd(old_pmd);
 
 				put_page(virt_to_page(pmd));
 			} else {
-				unmap_stage2_ptes(kvm, pmd, addr, next);
+				unmap_stage2_ptes(mmu, pmd, addr, next);
 			}
 		}
 	} while (pmd++, addr = next, addr != end);
 
 	if (stage2_pmd_table_empty(start_pmd))
-		clear_stage2_pud_entry(kvm, pud, start_addr);
+		clear_stage2_pud_entry(mmu, pud, start_addr);
 }
 
-static void unmap_stage2_puds(struct kvm *kvm, pgd_t *pgd,
+static void unmap_stage2_puds(struct kvm_s2_mmu *mmu, pgd_t *pgd,
 		       phys_addr_t addr, phys_addr_t end)
 {
 	phys_addr_t next, start_addr = addr;
@@ -263,17 +267,17 @@ static void unmap_stage2_puds(struct kvm *kvm, pgd_t *pgd,
 				pud_t old_pud = *pud;
 
 				stage2_pud_clear(pud);
-				kvm_tlb_flush_vmid_ipa(kvm, addr);
+				kvm_tlb_flush_vmid_ipa(mmu, addr);
 				kvm_flush_dcache_pud(old_pud);
 				put_page(virt_to_page(pud));
 			} else {
-				unmap_stage2_pmds(kvm, pud, addr, next);
+				unmap_stage2_pmds(mmu, pud, addr, next);
 			}
 		}
 	} while (pud++, addr = next, addr != end);
 
 	if (stage2_pud_table_empty(start_pud))
-		clear_stage2_pgd_entry(kvm, pgd, start_addr);
+		clear_stage2_pgd_entry(mmu, pgd, start_addr);
 }
 
 /**
@@ -292,20 +296,21 @@ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
 	pgd_t *pgd;
 	phys_addr_t addr = start, end = start + size;
 	phys_addr_t next;
+	struct kvm_s2_mmu *mmu = &kvm->arch.mmu;
 
 	assert_spin_locked(&kvm->mmu_lock);
-	pgd = kvm->arch.pgd + stage2_pgd_index(addr);
+	pgd = mmu->pgd + stage2_pgd_index(addr);
 	do {
 		/*
 		 * Make sure the page table is still active, as another thread
 		 * could have possibly freed the page table, while we released
 		 * the lock.
 		 */
-		if (!READ_ONCE(kvm->arch.pgd))
+		if (!READ_ONCE(mmu->pgd))
 			break;
 		next = stage2_pgd_addr_end(addr, end);
 		if (!stage2_pgd_none(*pgd))
-			unmap_stage2_puds(kvm, pgd, addr, next);
+			unmap_stage2_puds(mmu, pgd, addr, next);
 		/*
 		 * If the range is too large, release the kvm->mmu_lock
 		 * to prevent starvation and lockup detector warnings.
@@ -360,7 +365,7 @@ static void stage2_flush_puds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end)
 	} while (pud++, addr = next, addr != end);
 }
 
-static void stage2_flush_memslot(struct kvm *kvm,
+static void stage2_flush_memslot(struct kvm_s2_mmu *mmu,
 				 struct kvm_memory_slot *memslot)
 {
 	phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT;
@@ -368,7 +373,7 @@ static void stage2_flush_memslot(struct kvm *kvm,
 	phys_addr_t next;
 	pgd_t *pgd;
 
-	pgd = kvm->arch.pgd + stage2_pgd_index(addr);
+	pgd = mmu->pgd + stage2_pgd_index(addr);
 	do {
 		next = stage2_pgd_addr_end(addr, end);
 		stage2_flush_puds(pgd, addr, next);
@@ -393,7 +398,7 @@ static void stage2_flush_vm(struct kvm *kvm)
 
 	slots = kvm_memslots(kvm);
 	kvm_for_each_memslot(memslot, slots)
-		stage2_flush_memslot(kvm, memslot);
+		stage2_flush_memslot(&kvm->arch.mmu, memslot);
 
 	spin_unlock(&kvm->mmu_lock);
 	srcu_read_unlock(&kvm->srcu, idx);
@@ -745,8 +750,9 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
 int kvm_alloc_stage2_pgd(struct kvm *kvm)
 {
 	pgd_t *pgd;
+	struct kvm_s2_mmu *mmu = &kvm->arch.mmu;
 
-	if (kvm->arch.pgd != NULL) {
+	if (mmu->pgd != NULL) {
 		kvm_err("kvm_arch already initialized?\n");
 		return -EINVAL;
 	}
@@ -756,7 +762,8 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm)
 	if (!pgd)
 		return -ENOMEM;
 
-	kvm->arch.pgd = pgd;
+	mmu->pgd = pgd;
+
 	return 0;
 }
 
@@ -831,19 +838,20 @@ void stage2_unmap_vm(struct kvm *kvm)
  * kvm_free_stage2_pgd - free all stage-2 tables
  * @kvm:	The KVM struct pointer for the VM.
  *
- * Walks the level-1 page table pointed to by kvm->arch.pgd and frees all
+ * Walks the level-1 page table pointed to by kvm->arch.mmu.pgd and frees all
  * underlying level-2 and level-3 tables before freeing the actual level-1 table
  * and setting the struct pointer to NULL.
  */
 void kvm_free_stage2_pgd(struct kvm *kvm)
 {
 	void *pgd = NULL;
+	struct kvm_s2_mmu *mmu = &kvm->arch.mmu;
 
 	spin_lock(&kvm->mmu_lock);
-	if (kvm->arch.pgd) {
+	if (mmu->pgd) {
 		unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE);
-		pgd = READ_ONCE(kvm->arch.pgd);
-		kvm->arch.pgd = NULL;
+		pgd = READ_ONCE(mmu->pgd);
+		mmu->pgd = NULL;
 	}
 	spin_unlock(&kvm->mmu_lock);
 
@@ -852,13 +860,14 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
 		free_pages_exact(pgd, S2_PGD_SIZE);
 }
 
-static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
+static pud_t *stage2_get_pud(struct kvm_s2_mmu *mmu,
+			     struct kvm_mmu_memory_cache *cache,
 			     phys_addr_t addr)
 {
 	pgd_t *pgd;
 	pud_t *pud;
 
-	pgd = kvm->arch.pgd + stage2_pgd_index(addr);
+	pgd = mmu->pgd + stage2_pgd_index(addr);
 	if (WARN_ON(stage2_pgd_none(*pgd))) {
 		if (!cache)
 			return NULL;
@@ -870,13 +879,14 @@ static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache
 	return stage2_pud_offset(pgd, addr);
 }
 
-static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
+static pmd_t *stage2_get_pmd(struct kvm_s2_mmu *mmu,
+			     struct kvm_mmu_memory_cache *cache,
 			     phys_addr_t addr)
 {
 	pud_t *pud;
 	pmd_t *pmd;
 
-	pud = stage2_get_pud(kvm, cache, addr);
+	pud = stage2_get_pud(mmu, cache, addr);
 	if (!pud)
 		return NULL;
 
@@ -891,12 +901,13 @@ static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache
 	return stage2_pmd_offset(pud, addr);
 }
 
-static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
+static int stage2_set_pmd_huge(struct kvm_s2_mmu *mmu,
+			       struct kvm_mmu_memory_cache
 			       *cache, phys_addr_t addr, const pmd_t *new_pmd)
 {
 	pmd_t *pmd, old_pmd;
 
-	pmd = stage2_get_pmd(kvm, cache, addr);
+	pmd = stage2_get_pmd(mmu, cache, addr);
 	VM_BUG_ON(!pmd);
 
 	/*
@@ -913,7 +924,7 @@ static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
 	old_pmd = *pmd;
 	if (pmd_present(old_pmd)) {
 		pmd_clear(pmd);
-		kvm_tlb_flush_vmid_ipa(kvm, addr);
+		kvm_tlb_flush_vmid_ipa(mmu, addr);
 	} else {
 		get_page(virt_to_page(pmd));
 	}
@@ -922,7 +933,8 @@ static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
 	return 0;
 }
 
-static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
+static int stage2_set_pte(struct kvm_s2_mmu *mmu,
+			  struct kvm_mmu_memory_cache *cache,
 			  phys_addr_t addr, const pte_t *new_pte,
 			  unsigned long flags)
 {
@@ -934,7 +946,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
 	VM_BUG_ON(logging_active && !cache);
 
 	/* Create stage-2 page table mapping - Levels 0 and 1 */
-	pmd = stage2_get_pmd(kvm, cache, addr);
+	pmd = stage2_get_pmd(mmu, cache, addr);
 	if (!pmd) {
 		/*
 		 * Ignore calls from kvm_set_spte_hva for unallocated
@@ -948,7 +960,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
 	 * allocate page.
 	 */
 	if (logging_active)
-		stage2_dissolve_pmd(kvm, addr, pmd);
+		stage2_dissolve_pmd(mmu, addr, pmd);
 
 	/* Create stage-2 page mappings - Level 2 */
 	if (pmd_none(*pmd)) {
@@ -968,7 +980,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
 	old_pte = *pte;
 	if (pte_present(old_pte)) {
 		kvm_set_pte(pte, __pte(0));
-		kvm_tlb_flush_vmid_ipa(kvm, addr);
+		kvm_tlb_flush_vmid_ipa(mmu, addr);
 	} else {
 		get_page(virt_to_page(pte));
 	}
@@ -1028,7 +1040,7 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
 		if (ret)
 			goto out;
 		spin_lock(&kvm->mmu_lock);
-		ret = stage2_set_pte(kvm, &cache, addr, &pte,
+		ret = stage2_set_pte(&kvm->arch.mmu, &cache, addr, &pte,
 						KVM_S2PTE_FLAG_IS_IOMAP);
 		spin_unlock(&kvm->mmu_lock);
 		if (ret)
@@ -1166,12 +1178,13 @@ static void  stage2_wp_puds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end)
  * @addr:	Start address of range
  * @end:	End address of range
  */
-static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
+static void stage2_wp_range(struct kvm *kvm, struct kvm_s2_mmu *mmu,
+			    phys_addr_t addr, phys_addr_t end)
 {
 	pgd_t *pgd;
 	phys_addr_t next;
 
-	pgd = kvm->arch.pgd + stage2_pgd_index(addr);
+	pgd = mmu->pgd + stage2_pgd_index(addr);
 	do {
 		/*
 		 * Release kvm_mmu_lock periodically if the memory region is
@@ -1183,7 +1196,7 @@ static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
 		 * the lock.
 		 */
 		cond_resched_lock(&kvm->mmu_lock);
-		if (!READ_ONCE(kvm->arch.pgd))
+		if (!READ_ONCE(mmu->pgd))
 			break;
 		next = stage2_pgd_addr_end(addr, end);
 		if (stage2_pgd_present(*pgd))
@@ -1212,7 +1225,7 @@ void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot)
 	phys_addr_t end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT;
 
 	spin_lock(&kvm->mmu_lock);
-	stage2_wp_range(kvm, start, end);
+	stage2_wp_range(kvm, &kvm->arch.mmu, start, end);
 	spin_unlock(&kvm->mmu_lock);
 	kvm_flush_remote_tlbs(kvm);
 }
@@ -1236,7 +1249,7 @@ static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
 	phys_addr_t start = (base_gfn +  __ffs(mask)) << PAGE_SHIFT;
 	phys_addr_t end = (base_gfn + __fls(mask) + 1) << PAGE_SHIFT;
 
-	stage2_wp_range(kvm, start, end);
+	stage2_wp_range(kvm, &kvm->arch.mmu, start, end);
 }
 
 /*
@@ -1292,6 +1305,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	pgprot_t mem_type = PAGE_S2;
 	bool logging_active = memslot_is_logging(memslot);
 	unsigned long flags = 0;
+	struct kvm_s2_mmu *mmu = vcpu->arch.hw_mmu;
 
 	write_fault = kvm_is_write_fault(vcpu);
 	if (fault_status == FSC_PERM && !write_fault) {
@@ -1388,7 +1402,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 			kvm_set_pfn_dirty(pfn);
 		}
 		coherent_cache_guest_page(vcpu, pfn, PMD_SIZE);
-		ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd);
+		ret = stage2_set_pmd_huge(mmu, memcache, fault_ipa, &new_pmd);
 	} else {
 		pte_t new_pte = pfn_pte(pfn, mem_type);
 
@@ -1398,7 +1412,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 			mark_page_dirty(kvm, gfn);
 		}
 		coherent_cache_guest_page(vcpu, pfn, PAGE_SIZE);
-		ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, flags);
+		ret = stage2_set_pte(mmu, memcache, fault_ipa, &new_pte, flags);
 	}
 
 out_unlock:
@@ -1426,7 +1440,7 @@ static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
 
 	spin_lock(&vcpu->kvm->mmu_lock);
 
-	pmd = stage2_get_pmd(vcpu->kvm, NULL, fault_ipa);
+	pmd = stage2_get_pmd(vcpu->arch.hw_mmu, NULL, fault_ipa);
 	if (!pmd || pmd_none(*pmd))	/* Nothing there */
 		goto out;
 
@@ -1594,7 +1608,7 @@ int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
 {
 	unsigned long end = hva + PAGE_SIZE;
 
-	if (!kvm->arch.pgd)
+	if (!kvm->arch.mmu.pgd)
 		return 0;
 
 	trace_kvm_unmap_hva(hva);
@@ -1605,7 +1619,7 @@ int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
 int kvm_unmap_hva_range(struct kvm *kvm,
 			unsigned long start, unsigned long end)
 {
-	if (!kvm->arch.pgd)
+	if (!kvm->arch.mmu.pgd)
 		return 0;
 
 	trace_kvm_unmap_hva_range(start, end);
@@ -1625,7 +1639,7 @@ static int kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data
 	 * therefore stage2_set_pte() never needs to clear out a huge PMD
 	 * through this calling path.
 	 */
-	stage2_set_pte(kvm, NULL, gpa, pte, 0);
+	stage2_set_pte(&kvm->arch.mmu, NULL, gpa, pte, 0);
 	return 0;
 }
 
@@ -1635,7 +1649,7 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
 	unsigned long end = hva + PAGE_SIZE;
 	pte_t stage2_pte;
 
-	if (!kvm->arch.pgd)
+	if (!kvm->arch.mmu.pgd)
 		return;
 
 	trace_kvm_set_spte_hva(hva);
@@ -1649,7 +1663,7 @@ static int kvm_age_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
 	pte_t *pte;
 
 	WARN_ON(size != PAGE_SIZE && size != PMD_SIZE);
-	pmd = stage2_get_pmd(kvm, NULL, gpa);
+	pmd = stage2_get_pmd(&kvm->arch.mmu, NULL, gpa);
 	if (!pmd || pmd_none(*pmd))	/* Nothing there */
 		return 0;
 
@@ -1669,7 +1683,7 @@ static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *
 	pte_t *pte;
 
 	WARN_ON(size != PAGE_SIZE && size != PMD_SIZE);
-	pmd = stage2_get_pmd(kvm, NULL, gpa);
+	pmd = stage2_get_pmd(&kvm->arch.mmu, NULL, gpa);
 	if (!pmd || pmd_none(*pmd))	/* Nothing there */
 		return 0;
 
@@ -1898,9 +1912,10 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
 
 	spin_lock(&kvm->mmu_lock);
 	if (ret)
-		unmap_stage2_range(kvm, mem->guest_phys_addr, mem->memory_size);
+		unmap_stage2_range(kvm, mem->guest_phys_addr,
+				   mem->memory_size);
 	else
-		stage2_flush_memslot(kvm, memslot);
+		stage2_flush_memslot(&kvm->arch.mmu, memslot);
 	spin_unlock(&kvm->mmu_lock);
 out:
 	up_read(&current->mm->mmap_sem);
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 04/31] KVM: arm/arm64: Abstract stage-2 MMU state into a separate structure
@ 2017-10-03  3:10   ` Jintack Lim
  0 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:10 UTC (permalink / raw)
  To: christoffer.dall, marc.zyngier, kvmarm
  Cc: kvm, catalin.marinas, will.deacon, linux, linux-kernel, pbonzini,
	linux-arm-kernel

From: Christoffer Dall <christoffer.dall@linaro.org>

Abstract stage-2 MMU state into a separate structure and change all
callers referring to page tables, VMIDs, and the VTTBR to use this new
indirection.

This is about to become very handy when using shadow stage-2 page
tables.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---
 arch/arm/include/asm/kvm_asm.h    |   7 +-
 arch/arm/include/asm/kvm_host.h   |  26 +++++---
 arch/arm/kvm/hyp/switch.c         |   5 +-
 arch/arm/kvm/hyp/tlb.c            |  18 ++---
 arch/arm64/include/asm/kvm_asm.h  |   7 +-
 arch/arm64/include/asm/kvm_host.h |  10 ++-
 arch/arm64/kvm/hyp/switch.c       |   5 +-
 arch/arm64/kvm/hyp/tlb.c          |  38 +++++------
 virt/kvm/arm/arm.c                |  34 +++++-----
 virt/kvm/arm/mmu.c                | 137 +++++++++++++++++++++-----------------
 10 files changed, 163 insertions(+), 124 deletions(-)

diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
index 14d68a4..71b7255 100644
--- a/arch/arm/include/asm/kvm_asm.h
+++ b/arch/arm/include/asm/kvm_asm.h
@@ -57,6 +57,7 @@
 #ifndef __ASSEMBLY__
 struct kvm;
 struct kvm_vcpu;
+struct kvm_s2_mmu;
 
 extern char __kvm_hyp_init[];
 extern char __kvm_hyp_init_end[];
@@ -64,9 +65,9 @@
 extern char __kvm_hyp_vector[];
 
 extern void __kvm_flush_vm_context(void);
-extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
-extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
-extern void __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu);
+extern void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa);
+extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu);
+extern void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu);
 
 extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
 
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 7e9e6c8..78d826e 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -53,9 +53,21 @@
 int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
 void kvm_reset_coprocs(struct kvm_vcpu *vcpu);
 
-struct kvm_arch {
-	/* VTTBR value associated with below pgd and vmid */
+struct kvm_s2_mmu {
+	/* The VMID generation used for the virt. memory system */
+	u64    vmid_gen;
+	u32    vmid;
+
+	/* Stage-2 page table */
+	pgd_t *pgd;
+
+	/* VTTBR value associated with above pgd and vmid */
 	u64    vttbr;
+};
+
+struct kvm_arch {
+	/* Stage 2 paging state for the VM */
+	struct kvm_s2_mmu mmu;
 
 	/* The last vcpu id that ran on each physical CPU */
 	int __percpu *last_vcpu_ran;
@@ -65,13 +77,6 @@ struct kvm_arch {
 	 * here.
 	 */
 
-	/* The VMID generation used for the virt. memory system */
-	u64    vmid_gen;
-	u32    vmid;
-
-	/* Stage-2 page table */
-	pgd_t *pgd;
-
 	/* Interrupt controller */
 	struct vgic_dist	vgic;
 	int max_vcpus;
@@ -185,6 +190,9 @@ struct kvm_vcpu_arch {
 
 	/* Detect first run of a vcpu */
 	bool has_run_once;
+
+	/* Stage 2 paging state used by the hardware on next switch */
+	struct kvm_s2_mmu *hw_mmu;
 };
 
 struct kvm_vm_stat {
diff --git a/arch/arm/kvm/hyp/switch.c b/arch/arm/kvm/hyp/switch.c
index ebd2dd4..4814671 100644
--- a/arch/arm/kvm/hyp/switch.c
+++ b/arch/arm/kvm/hyp/switch.c
@@ -75,8 +75,9 @@ static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu)
 
 static void __hyp_text __activate_vm(struct kvm_vcpu *vcpu)
 {
-	struct kvm *kvm = kern_hyp_va(vcpu->kvm);
-	write_sysreg(kvm->arch.vttbr, VTTBR);
+	struct kvm_s2_mmu *mmu = kern_hyp_va(vcpu->arch.hw_mmu);
+
+	write_sysreg(mmu->vttbr, VTTBR);
 	write_sysreg(vcpu->arch.midr, VPIDR);
 }
 
diff --git a/arch/arm/kvm/hyp/tlb.c b/arch/arm/kvm/hyp/tlb.c
index 6d810af..56f0a49 100644
--- a/arch/arm/kvm/hyp/tlb.c
+++ b/arch/arm/kvm/hyp/tlb.c
@@ -34,13 +34,13 @@
  * As v7 does not support flushing per IPA, just nuke the whole TLB
  * instead, ignoring the ipa value.
  */
-void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm)
+void __hyp_text __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
 {
 	dsb(ishst);
 
 	/* Switch to requested VMID */
-	kvm = kern_hyp_va(kvm);
-	write_sysreg(kvm->arch.vttbr, VTTBR);
+	mmu = kern_hyp_va(mmu);
+	write_sysreg(mmu->vttbr, VTTBR);
 	isb();
 
 	write_sysreg(0, TLBIALLIS);
@@ -50,17 +50,17 @@ void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm)
 	write_sysreg(0, VTTBR);
 }
 
-void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
+void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu,
+					 phys_addr_t ipa)
 {
-	__kvm_tlb_flush_vmid(kvm);
+	__kvm_tlb_flush_vmid(mmu);
 }
 
-void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu)
+void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu)
 {
-	struct kvm *kvm = kern_hyp_va(kern_hyp_va(vcpu)->kvm);
-
 	/* Switch to requested VMID */
-	write_sysreg(kvm->arch.vttbr, VTTBR);
+	mmu = kern_hyp_va(mmu);
+	write_sysreg(mmu->vttbr, VTTBR);
 	isb();
 
 	write_sysreg(0, TLBIALL);
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 26a64d0..ff6244f 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -44,6 +44,7 @@
 #ifndef __ASSEMBLY__
 struct kvm;
 struct kvm_vcpu;
+struct kvm_s2_mmu;
 
 extern char __kvm_hyp_init[];
 extern char __kvm_hyp_init_end[];
@@ -51,9 +52,9 @@
 extern char __kvm_hyp_vector[];
 
 extern void __kvm_flush_vm_context(void);
-extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
-extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
-extern void __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu);
+extern void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa);
+extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu);
+extern void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu);
 
 extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
 
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 373235c..e7e9f70 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -50,7 +50,7 @@
 int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext);
 void __extended_idmap_trampoline(phys_addr_t boot_pgd, phys_addr_t idmap_start);
 
-struct kvm_arch {
+struct kvm_s2_mmu {
 	/* The VMID generation used for the virt. memory system */
 	u64    vmid_gen;
 	u32    vmid;
@@ -61,6 +61,11 @@ struct kvm_arch {
 
 	/* VTTBR value associated with above pgd and vmid */
 	u64    vttbr;
+};
+
+struct kvm_arch {
+	/* Stage 2 paging state for the VM */
+	struct kvm_s2_mmu mmu;
 
 	/* The last vcpu id that ran on each physical CPU */
 	int __percpu *last_vcpu_ran;
@@ -329,6 +334,9 @@ struct kvm_vcpu_arch {
 
 	/* Detect first run of a vcpu */
 	bool has_run_once;
+
+	/* Stage 2 paging state used by the hardware on next switch */
+	struct kvm_s2_mmu *hw_mmu;
 };
 
 #define vcpu_gp_regs(v)		(&(v)->arch.ctxt.gp_regs)
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 2a64a5c..8b1b3e9 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -181,8 +181,9 @@ static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu)
 
 static void __hyp_text __activate_vm(struct kvm_vcpu *vcpu)
 {
-	struct kvm *kvm = kern_hyp_va(vcpu->kvm);
-	write_sysreg(kvm->arch.vttbr, vttbr_el2);
+	struct kvm_s2_mmu *mmu = kern_hyp_va(vcpu->arch.hw_mmu);
+
+	write_sysreg(mmu->vttbr, vttbr_el2);
 }
 
 static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c
index 73464a9..0897678 100644
--- a/arch/arm64/kvm/hyp/tlb.c
+++ b/arch/arm64/kvm/hyp/tlb.c
@@ -18,7 +18,7 @@
 #include <asm/kvm_hyp.h>
 #include <asm/tlbflush.h>
 
-static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm)
+static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm_s2_mmu *mmu)
 {
 	u64 val;
 
@@ -29,16 +29,16 @@ static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm)
 	 * bits. Changing E2H is impossible (goodbye TTBR1_EL2), so
 	 * let's flip TGE before executing the TLB operation.
 	 */
-	write_sysreg(kvm->arch.vttbr, vttbr_el2);
+	write_sysreg(mmu->vttbr, vttbr_el2);
 	val = read_sysreg(hcr_el2);
 	val &= ~HCR_TGE;
 	write_sysreg(val, hcr_el2);
 	isb();
 }
 
-static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm *kvm)
+static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm_s2_mmu *mmu)
 {
-	write_sysreg(kvm->arch.vttbr, vttbr_el2);
+	write_sysreg(mmu->vttbr, vttbr_el2);
 	isb();
 }
 
@@ -47,7 +47,7 @@ static hyp_alternate_select(__tlb_switch_to_guest,
 			    __tlb_switch_to_guest_vhe,
 			    ARM64_HAS_VIRT_HOST_EXTN);
 
-static void __hyp_text __tlb_switch_to_host_vhe(struct kvm *kvm)
+static void __hyp_text __tlb_switch_to_host_vhe(struct kvm_s2_mmu *mmu)
 {
 	/*
 	 * We're done with the TLB operation, let's restore the host's
@@ -57,7 +57,7 @@ static void __hyp_text __tlb_switch_to_host_vhe(struct kvm *kvm)
 	write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
 }
 
-static void __hyp_text __tlb_switch_to_host_nvhe(struct kvm *kvm)
+static void __hyp_text __tlb_switch_to_host_nvhe(struct kvm_s2_mmu *mmu)
 {
 	write_sysreg(0, vttbr_el2);
 }
@@ -67,13 +67,14 @@ static hyp_alternate_select(__tlb_switch_to_host,
 			    __tlb_switch_to_host_vhe,
 			    ARM64_HAS_VIRT_HOST_EXTN);
 
-void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
+void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu,
+					 phys_addr_t ipa)
 {
 	dsb(ishst);
 
 	/* Switch to requested VMID */
-	kvm = kern_hyp_va(kvm);
-	__tlb_switch_to_guest()(kvm);
+	mmu = kern_hyp_va(mmu);
+	__tlb_switch_to_guest()(mmu);
 
 	/*
 	 * We could do so much better if we had the VA as well.
@@ -116,36 +117,35 @@ void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
 	if (!has_vhe() && icache_is_vpipt())
 		__flush_icache_all();
 
-	__tlb_switch_to_host()(kvm);
+	__tlb_switch_to_host()(mmu);
 }
 
-void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm)
+void __hyp_text __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
 {
 	dsb(ishst);
 
 	/* Switch to requested VMID */
-	kvm = kern_hyp_va(kvm);
-	__tlb_switch_to_guest()(kvm);
+	mmu = kern_hyp_va(mmu);
+	__tlb_switch_to_guest()(mmu);
 
 	__tlbi(vmalls12e1is);
 	dsb(ish);
 	isb();
 
-	__tlb_switch_to_host()(kvm);
+	__tlb_switch_to_host()(mmu);
 }
 
-void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu)
+void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu)
 {
-	struct kvm *kvm = kern_hyp_va(kern_hyp_va(vcpu)->kvm);
-
 	/* Switch to requested VMID */
-	__tlb_switch_to_guest()(kvm);
+	mmu = kern_hyp_va(mmu);
+	__tlb_switch_to_guest()(mmu);
 
 	__tlbi(vmalle1);
 	dsb(nsh);
 	isb();
 
-	__tlb_switch_to_host()(kvm);
+	__tlb_switch_to_host()(mmu);
 }
 
 void __hyp_text __kvm_flush_vm_context(void)
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 0ff2997..bee27bb 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -138,7 +138,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	kvm_vgic_early_init(kvm);
 
 	/* Mark the initial VMID generation invalid */
-	kvm->arch.vmid_gen = 0;
+	kvm->arch.mmu.vmid_gen = 0;
 
 	/* The maximum number of VCPUs is limited by the host's GIC model */
 	kvm->arch.max_vcpus = vgic_present ?
@@ -334,6 +334,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 
 	kvm_arm_reset_debug_ptr(vcpu);
 
+	vcpu->arch.hw_mmu = &vcpu->kvm->arch.mmu;
+
 	return kvm_vgic_vcpu_init(vcpu);
 }
 
@@ -348,7 +350,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 	 * over-invalidation doesn't affect correctness.
 	 */
 	if (*last_ran != vcpu->vcpu_id) {
-		kvm_call_hyp(__kvm_tlb_flush_local_vmid, vcpu);
+		kvm_call_hyp(__kvm_tlb_flush_local_vmid, &vcpu->kvm->arch.mmu);
 		*last_ran = vcpu->vcpu_id;
 	}
 
@@ -442,25 +444,26 @@ void force_vm_exit(const cpumask_t *mask)
  * VMID for the new generation, we must flush necessary caches and TLBs on all
  * CPUs.
  */
-static bool need_new_vmid_gen(struct kvm *kvm)
+static bool need_new_vmid_gen(struct kvm_s2_mmu *mmu)
 {
-	return unlikely(kvm->arch.vmid_gen != atomic64_read(&kvm_vmid_gen));
+	return unlikely(mmu->vmid_gen != atomic64_read(&kvm_vmid_gen));
 }
 
 /**
  * update_vttbr - Update the VTTBR with a valid VMID before the guest runs
- * @kvm	The guest that we are about to run
+ * @kvm: The guest that we are about to run
+ * @mmu: The stage-2 translation context to update
  *
  * Called from kvm_arch_vcpu_ioctl_run before entering the guest to ensure the
  * VM has a valid VMID, otherwise assigns a new one and flushes corresponding
  * caches and TLBs.
  */
-static void update_vttbr(struct kvm *kvm)
+static void update_vttbr(struct kvm *kvm, struct kvm_s2_mmu *mmu)
 {
 	phys_addr_t pgd_phys;
 	u64 vmid;
 
-	if (!need_new_vmid_gen(kvm))
+	if (!need_new_vmid_gen(mmu))
 		return;
 
 	spin_lock(&kvm_vmid_lock);
@@ -470,7 +473,7 @@ static void update_vttbr(struct kvm *kvm)
 	 * already allocated a valid vmid for this vm, then this vcpu should
 	 * use the same vmid.
 	 */
-	if (!need_new_vmid_gen(kvm)) {
+	if (!need_new_vmid_gen(mmu)) {
 		spin_unlock(&kvm_vmid_lock);
 		return;
 	}
@@ -494,16 +497,17 @@ static void update_vttbr(struct kvm *kvm)
 		kvm_call_hyp(__kvm_flush_vm_context);
 	}
 
-	kvm->arch.vmid_gen = atomic64_read(&kvm_vmid_gen);
-	kvm->arch.vmid = kvm_next_vmid;
+	mmu->vmid_gen = atomic64_read(&kvm_vmid_gen);
+	mmu->vmid = kvm_next_vmid;
 	kvm_next_vmid++;
 	kvm_next_vmid &= (1 << kvm_vmid_bits) - 1;
 
 	/* update vttbr to be used with the new vmid */
-	pgd_phys = virt_to_phys(kvm->arch.pgd);
+	pgd_phys = virt_to_phys(mmu->pgd);
 	BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK);
-	vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK(kvm_vmid_bits);
-	kvm->arch.vttbr = pgd_phys | vmid;
+	vmid = ((u64)(mmu->vmid) << VTTBR_VMID_SHIFT) &
+	       VTTBR_VMID_MASK(kvm_vmid_bits);
+	mmu->vttbr = pgd_phys | vmid;
 
 	spin_unlock(&kvm_vmid_lock);
 }
@@ -638,7 +642,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 		 */
 		cond_resched();
 
-		update_vttbr(vcpu->kvm);
+		update_vttbr(vcpu->kvm, vcpu->arch.hw_mmu);
 
 		check_vcpu_requests(vcpu);
 
@@ -677,7 +681,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 		 */
 		smp_store_mb(vcpu->mode, IN_GUEST_MODE);
 
-		if (ret <= 0 || need_new_vmid_gen(vcpu->kvm) ||
+		if (ret <= 0 || need_new_vmid_gen(vcpu->arch.hw_mmu) ||
 		    kvm_request_pending(vcpu)) {
 			vcpu->mode = OUTSIDE_GUEST_MODE;
 			local_irq_enable();
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index 0a5f5ca..d8ea1f9 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -64,9 +64,9 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
 	kvm_call_hyp(__kvm_tlb_flush_vmid, kvm);
 }
 
-static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
+static void kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa)
 {
-	kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa);
+	kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ipa);
 }
 
 /*
@@ -103,13 +103,14 @@ static bool kvm_is_device_pfn(unsigned long pfn)
  * Function clears a PMD entry, flushes addr 1st and 2nd stage TLBs. Marks all
  * pages in the range dirty.
  */
-static void stage2_dissolve_pmd(struct kvm *kvm, phys_addr_t addr, pmd_t *pmd)
+static void stage2_dissolve_pmd(struct kvm_s2_mmu *mmu, phys_addr_t addr,
+				pmd_t *pmd)
 {
 	if (!pmd_thp_or_huge(*pmd))
 		return;
 
 	pmd_clear(pmd);
-	kvm_tlb_flush_vmid_ipa(kvm, addr);
+	kvm_tlb_flush_vmid_ipa(mmu, addr);
 	put_page(virt_to_page(pmd));
 }
 
@@ -145,31 +146,34 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc)
 	return p;
 }
 
-static void clear_stage2_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr)
+static void clear_stage2_pgd_entry(struct kvm_s2_mmu *mmu,
+				   pgd_t *pgd, phys_addr_t addr)
 {
 	pud_t *pud_table __maybe_unused = stage2_pud_offset(pgd, 0UL);
 	stage2_pgd_clear(pgd);
-	kvm_tlb_flush_vmid_ipa(kvm, addr);
+	kvm_tlb_flush_vmid_ipa(mmu, addr);
 	stage2_pud_free(pud_table);
 	put_page(virt_to_page(pgd));
 }
 
-static void clear_stage2_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr)
+static void clear_stage2_pud_entry(struct kvm_s2_mmu *mmu,
+				   pud_t *pud, phys_addr_t addr)
 {
 	pmd_t *pmd_table __maybe_unused = stage2_pmd_offset(pud, 0);
 	VM_BUG_ON(stage2_pud_huge(*pud));
 	stage2_pud_clear(pud);
-	kvm_tlb_flush_vmid_ipa(kvm, addr);
+	kvm_tlb_flush_vmid_ipa(mmu, addr);
 	stage2_pmd_free(pmd_table);
 	put_page(virt_to_page(pud));
 }
 
-static void clear_stage2_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr)
+static void clear_stage2_pmd_entry(struct kvm_s2_mmu *mmu,
+				   pmd_t *pmd, phys_addr_t addr)
 {
 	pte_t *pte_table = pte_offset_kernel(pmd, 0);
 	VM_BUG_ON(pmd_thp_or_huge(*pmd));
 	pmd_clear(pmd);
-	kvm_tlb_flush_vmid_ipa(kvm, addr);
+	kvm_tlb_flush_vmid_ipa(mmu, addr);
 	pte_free_kernel(NULL, pte_table);
 	put_page(virt_to_page(pmd));
 }
@@ -194,7 +198,7 @@ static void clear_stage2_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr
  * the corresponding TLBs, we call kvm_flush_dcache_p*() to make sure
  * the IO subsystem will never hit in the cache.
  */
-static void unmap_stage2_ptes(struct kvm *kvm, pmd_t *pmd,
+static void unmap_stage2_ptes(struct kvm_s2_mmu *mmu, pmd_t *pmd,
 		       phys_addr_t addr, phys_addr_t end)
 {
 	phys_addr_t start_addr = addr;
@@ -206,7 +210,7 @@ static void unmap_stage2_ptes(struct kvm *kvm, pmd_t *pmd,
 			pte_t old_pte = *pte;
 
 			kvm_set_pte(pte, __pte(0));
-			kvm_tlb_flush_vmid_ipa(kvm, addr);
+			kvm_tlb_flush_vmid_ipa(mmu, addr);
 
 			/* No need to invalidate the cache for device mappings */
 			if (!kvm_is_device_pfn(pte_pfn(old_pte)))
@@ -217,10 +221,10 @@ static void unmap_stage2_ptes(struct kvm *kvm, pmd_t *pmd,
 	} while (pte++, addr += PAGE_SIZE, addr != end);
 
 	if (stage2_pte_table_empty(start_pte))
-		clear_stage2_pmd_entry(kvm, pmd, start_addr);
+		clear_stage2_pmd_entry(mmu, pmd, start_addr);
 }
 
-static void unmap_stage2_pmds(struct kvm *kvm, pud_t *pud,
+static void unmap_stage2_pmds(struct kvm_s2_mmu *mmu, pud_t *pud,
 		       phys_addr_t addr, phys_addr_t end)
 {
 	phys_addr_t next, start_addr = addr;
@@ -234,22 +238,22 @@ static void unmap_stage2_pmds(struct kvm *kvm, pud_t *pud,
 				pmd_t old_pmd = *pmd;
 
 				pmd_clear(pmd);
-				kvm_tlb_flush_vmid_ipa(kvm, addr);
+				kvm_tlb_flush_vmid_ipa(mmu, addr);
 
 				kvm_flush_dcache_pmd(old_pmd);
 
 				put_page(virt_to_page(pmd));
 			} else {
-				unmap_stage2_ptes(kvm, pmd, addr, next);
+				unmap_stage2_ptes(mmu, pmd, addr, next);
 			}
 		}
 	} while (pmd++, addr = next, addr != end);
 
 	if (stage2_pmd_table_empty(start_pmd))
-		clear_stage2_pud_entry(kvm, pud, start_addr);
+		clear_stage2_pud_entry(mmu, pud, start_addr);
 }
 
-static void unmap_stage2_puds(struct kvm *kvm, pgd_t *pgd,
+static void unmap_stage2_puds(struct kvm_s2_mmu *mmu, pgd_t *pgd,
 		       phys_addr_t addr, phys_addr_t end)
 {
 	phys_addr_t next, start_addr = addr;
@@ -263,17 +267,17 @@ static void unmap_stage2_puds(struct kvm *kvm, pgd_t *pgd,
 				pud_t old_pud = *pud;
 
 				stage2_pud_clear(pud);
-				kvm_tlb_flush_vmid_ipa(kvm, addr);
+				kvm_tlb_flush_vmid_ipa(mmu, addr);
 				kvm_flush_dcache_pud(old_pud);
 				put_page(virt_to_page(pud));
 			} else {
-				unmap_stage2_pmds(kvm, pud, addr, next);
+				unmap_stage2_pmds(mmu, pud, addr, next);
 			}
 		}
 	} while (pud++, addr = next, addr != end);
 
 	if (stage2_pud_table_empty(start_pud))
-		clear_stage2_pgd_entry(kvm, pgd, start_addr);
+		clear_stage2_pgd_entry(mmu, pgd, start_addr);
 }
 
 /**
@@ -292,20 +296,21 @@ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
 	pgd_t *pgd;
 	phys_addr_t addr = start, end = start + size;
 	phys_addr_t next;
+	struct kvm_s2_mmu *mmu = &kvm->arch.mmu;
 
 	assert_spin_locked(&kvm->mmu_lock);
-	pgd = kvm->arch.pgd + stage2_pgd_index(addr);
+	pgd = mmu->pgd + stage2_pgd_index(addr);
 	do {
 		/*
 		 * Make sure the page table is still active, as another thread
 		 * could have possibly freed the page table, while we released
 		 * the lock.
 		 */
-		if (!READ_ONCE(kvm->arch.pgd))
+		if (!READ_ONCE(mmu->pgd))
 			break;
 		next = stage2_pgd_addr_end(addr, end);
 		if (!stage2_pgd_none(*pgd))
-			unmap_stage2_puds(kvm, pgd, addr, next);
+			unmap_stage2_puds(mmu, pgd, addr, next);
 		/*
 		 * If the range is too large, release the kvm->mmu_lock
 		 * to prevent starvation and lockup detector warnings.
@@ -360,7 +365,7 @@ static void stage2_flush_puds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end)
 	} while (pud++, addr = next, addr != end);
 }
 
-static void stage2_flush_memslot(struct kvm *kvm,
+static void stage2_flush_memslot(struct kvm_s2_mmu *mmu,
 				 struct kvm_memory_slot *memslot)
 {
 	phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT;
@@ -368,7 +373,7 @@ static void stage2_flush_memslot(struct kvm *kvm,
 	phys_addr_t next;
 	pgd_t *pgd;
 
-	pgd = kvm->arch.pgd + stage2_pgd_index(addr);
+	pgd = mmu->pgd + stage2_pgd_index(addr);
 	do {
 		next = stage2_pgd_addr_end(addr, end);
 		stage2_flush_puds(pgd, addr, next);
@@ -393,7 +398,7 @@ static void stage2_flush_vm(struct kvm *kvm)
 
 	slots = kvm_memslots(kvm);
 	kvm_for_each_memslot(memslot, slots)
-		stage2_flush_memslot(kvm, memslot);
+		stage2_flush_memslot(&kvm->arch.mmu, memslot);
 
 	spin_unlock(&kvm->mmu_lock);
 	srcu_read_unlock(&kvm->srcu, idx);
@@ -745,8 +750,9 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
 int kvm_alloc_stage2_pgd(struct kvm *kvm)
 {
 	pgd_t *pgd;
+	struct kvm_s2_mmu *mmu = &kvm->arch.mmu;
 
-	if (kvm->arch.pgd != NULL) {
+	if (mmu->pgd != NULL) {
 		kvm_err("kvm_arch already initialized?\n");
 		return -EINVAL;
 	}
@@ -756,7 +762,8 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm)
 	if (!pgd)
 		return -ENOMEM;
 
-	kvm->arch.pgd = pgd;
+	mmu->pgd = pgd;
+
 	return 0;
 }
 
@@ -831,19 +838,20 @@ void stage2_unmap_vm(struct kvm *kvm)
  * kvm_free_stage2_pgd - free all stage-2 tables
  * @kvm:	The KVM struct pointer for the VM.
  *
- * Walks the level-1 page table pointed to by kvm->arch.pgd and frees all
+ * Walks the level-1 page table pointed to by kvm->arch.mmu.pgd and frees all
  * underlying level-2 and level-3 tables before freeing the actual level-1 table
  * and setting the struct pointer to NULL.
  */
 void kvm_free_stage2_pgd(struct kvm *kvm)
 {
 	void *pgd = NULL;
+	struct kvm_s2_mmu *mmu = &kvm->arch.mmu;
 
 	spin_lock(&kvm->mmu_lock);
-	if (kvm->arch.pgd) {
+	if (mmu->pgd) {
 		unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE);
-		pgd = READ_ONCE(kvm->arch.pgd);
-		kvm->arch.pgd = NULL;
+		pgd = READ_ONCE(mmu->pgd);
+		mmu->pgd = NULL;
 	}
 	spin_unlock(&kvm->mmu_lock);
 
@@ -852,13 +860,14 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
 		free_pages_exact(pgd, S2_PGD_SIZE);
 }
 
-static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
+static pud_t *stage2_get_pud(struct kvm_s2_mmu *mmu,
+			     struct kvm_mmu_memory_cache *cache,
 			     phys_addr_t addr)
 {
 	pgd_t *pgd;
 	pud_t *pud;
 
-	pgd = kvm->arch.pgd + stage2_pgd_index(addr);
+	pgd = mmu->pgd + stage2_pgd_index(addr);
 	if (WARN_ON(stage2_pgd_none(*pgd))) {
 		if (!cache)
 			return NULL;
@@ -870,13 +879,14 @@ static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache
 	return stage2_pud_offset(pgd, addr);
 }
 
-static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
+static pmd_t *stage2_get_pmd(struct kvm_s2_mmu *mmu,
+			     struct kvm_mmu_memory_cache *cache,
 			     phys_addr_t addr)
 {
 	pud_t *pud;
 	pmd_t *pmd;
 
-	pud = stage2_get_pud(kvm, cache, addr);
+	pud = stage2_get_pud(mmu, cache, addr);
 	if (!pud)
 		return NULL;
 
@@ -891,12 +901,13 @@ static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache
 	return stage2_pmd_offset(pud, addr);
 }
 
-static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
+static int stage2_set_pmd_huge(struct kvm_s2_mmu *mmu,
+			       struct kvm_mmu_memory_cache
 			       *cache, phys_addr_t addr, const pmd_t *new_pmd)
 {
 	pmd_t *pmd, old_pmd;
 
-	pmd = stage2_get_pmd(kvm, cache, addr);
+	pmd = stage2_get_pmd(mmu, cache, addr);
 	VM_BUG_ON(!pmd);
 
 	/*
@@ -913,7 +924,7 @@ static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
 	old_pmd = *pmd;
 	if (pmd_present(old_pmd)) {
 		pmd_clear(pmd);
-		kvm_tlb_flush_vmid_ipa(kvm, addr);
+		kvm_tlb_flush_vmid_ipa(mmu, addr);
 	} else {
 		get_page(virt_to_page(pmd));
 	}
@@ -922,7 +933,8 @@ static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
 	return 0;
 }
 
-static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
+static int stage2_set_pte(struct kvm_s2_mmu *mmu,
+			  struct kvm_mmu_memory_cache *cache,
 			  phys_addr_t addr, const pte_t *new_pte,
 			  unsigned long flags)
 {
@@ -934,7 +946,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
 	VM_BUG_ON(logging_active && !cache);
 
 	/* Create stage-2 page table mapping - Levels 0 and 1 */
-	pmd = stage2_get_pmd(kvm, cache, addr);
+	pmd = stage2_get_pmd(mmu, cache, addr);
 	if (!pmd) {
 		/*
 		 * Ignore calls from kvm_set_spte_hva for unallocated
@@ -948,7 +960,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
 	 * allocate page.
 	 */
 	if (logging_active)
-		stage2_dissolve_pmd(kvm, addr, pmd);
+		stage2_dissolve_pmd(mmu, addr, pmd);
 
 	/* Create stage-2 page mappings - Level 2 */
 	if (pmd_none(*pmd)) {
@@ -968,7 +980,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
 	old_pte = *pte;
 	if (pte_present(old_pte)) {
 		kvm_set_pte(pte, __pte(0));
-		kvm_tlb_flush_vmid_ipa(kvm, addr);
+		kvm_tlb_flush_vmid_ipa(mmu, addr);
 	} else {
 		get_page(virt_to_page(pte));
 	}
@@ -1028,7 +1040,7 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
 		if (ret)
 			goto out;
 		spin_lock(&kvm->mmu_lock);
-		ret = stage2_set_pte(kvm, &cache, addr, &pte,
+		ret = stage2_set_pte(&kvm->arch.mmu, &cache, addr, &pte,
 						KVM_S2PTE_FLAG_IS_IOMAP);
 		spin_unlock(&kvm->mmu_lock);
 		if (ret)
@@ -1166,12 +1178,13 @@ static void  stage2_wp_puds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end)
  * @addr:	Start address of range
  * @end:	End address of range
  */
-static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
+static void stage2_wp_range(struct kvm *kvm, struct kvm_s2_mmu *mmu,
+			    phys_addr_t addr, phys_addr_t end)
 {
 	pgd_t *pgd;
 	phys_addr_t next;
 
-	pgd = kvm->arch.pgd + stage2_pgd_index(addr);
+	pgd = mmu->pgd + stage2_pgd_index(addr);
 	do {
 		/*
 		 * Release kvm_mmu_lock periodically if the memory region is
@@ -1183,7 +1196,7 @@ static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
 		 * the lock.
 		 */
 		cond_resched_lock(&kvm->mmu_lock);
-		if (!READ_ONCE(kvm->arch.pgd))
+		if (!READ_ONCE(mmu->pgd))
 			break;
 		next = stage2_pgd_addr_end(addr, end);
 		if (stage2_pgd_present(*pgd))
@@ -1212,7 +1225,7 @@ void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot)
 	phys_addr_t end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT;
 
 	spin_lock(&kvm->mmu_lock);
-	stage2_wp_range(kvm, start, end);
+	stage2_wp_range(kvm, &kvm->arch.mmu, start, end);
 	spin_unlock(&kvm->mmu_lock);
 	kvm_flush_remote_tlbs(kvm);
 }
@@ -1236,7 +1249,7 @@ static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
 	phys_addr_t start = (base_gfn +  __ffs(mask)) << PAGE_SHIFT;
 	phys_addr_t end = (base_gfn + __fls(mask) + 1) << PAGE_SHIFT;
 
-	stage2_wp_range(kvm, start, end);
+	stage2_wp_range(kvm, &kvm->arch.mmu, start, end);
 }
 
 /*
@@ -1292,6 +1305,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	pgprot_t mem_type = PAGE_S2;
 	bool logging_active = memslot_is_logging(memslot);
 	unsigned long flags = 0;
+	struct kvm_s2_mmu *mmu = vcpu->arch.hw_mmu;
 
 	write_fault = kvm_is_write_fault(vcpu);
 	if (fault_status == FSC_PERM && !write_fault) {
@@ -1388,7 +1402,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 			kvm_set_pfn_dirty(pfn);
 		}
 		coherent_cache_guest_page(vcpu, pfn, PMD_SIZE);
-		ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd);
+		ret = stage2_set_pmd_huge(mmu, memcache, fault_ipa, &new_pmd);
 	} else {
 		pte_t new_pte = pfn_pte(pfn, mem_type);
 
@@ -1398,7 +1412,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 			mark_page_dirty(kvm, gfn);
 		}
 		coherent_cache_guest_page(vcpu, pfn, PAGE_SIZE);
-		ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, flags);
+		ret = stage2_set_pte(mmu, memcache, fault_ipa, &new_pte, flags);
 	}
 
 out_unlock:
@@ -1426,7 +1440,7 @@ static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
 
 	spin_lock(&vcpu->kvm->mmu_lock);
 
-	pmd = stage2_get_pmd(vcpu->kvm, NULL, fault_ipa);
+	pmd = stage2_get_pmd(vcpu->arch.hw_mmu, NULL, fault_ipa);
 	if (!pmd || pmd_none(*pmd))	/* Nothing there */
 		goto out;
 
@@ -1594,7 +1608,7 @@ int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
 {
 	unsigned long end = hva + PAGE_SIZE;
 
-	if (!kvm->arch.pgd)
+	if (!kvm->arch.mmu.pgd)
 		return 0;
 
 	trace_kvm_unmap_hva(hva);
@@ -1605,7 +1619,7 @@ int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
 int kvm_unmap_hva_range(struct kvm *kvm,
 			unsigned long start, unsigned long end)
 {
-	if (!kvm->arch.pgd)
+	if (!kvm->arch.mmu.pgd)
 		return 0;
 
 	trace_kvm_unmap_hva_range(start, end);
@@ -1625,7 +1639,7 @@ static int kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data
 	 * therefore stage2_set_pte() never needs to clear out a huge PMD
 	 * through this calling path.
 	 */
-	stage2_set_pte(kvm, NULL, gpa, pte, 0);
+	stage2_set_pte(&kvm->arch.mmu, NULL, gpa, pte, 0);
 	return 0;
 }
 
@@ -1635,7 +1649,7 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
 	unsigned long end = hva + PAGE_SIZE;
 	pte_t stage2_pte;
 
-	if (!kvm->arch.pgd)
+	if (!kvm->arch.mmu.pgd)
 		return;
 
 	trace_kvm_set_spte_hva(hva);
@@ -1649,7 +1663,7 @@ static int kvm_age_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
 	pte_t *pte;
 
 	WARN_ON(size != PAGE_SIZE && size != PMD_SIZE);
-	pmd = stage2_get_pmd(kvm, NULL, gpa);
+	pmd = stage2_get_pmd(&kvm->arch.mmu, NULL, gpa);
 	if (!pmd || pmd_none(*pmd))	/* Nothing there */
 		return 0;
 
@@ -1669,7 +1683,7 @@ static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *
 	pte_t *pte;
 
 	WARN_ON(size != PAGE_SIZE && size != PMD_SIZE);
-	pmd = stage2_get_pmd(kvm, NULL, gpa);
+	pmd = stage2_get_pmd(&kvm->arch.mmu, NULL, gpa);
 	if (!pmd || pmd_none(*pmd))	/* Nothing there */
 		return 0;
 
@@ -1898,9 +1912,10 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
 
 	spin_lock(&kvm->mmu_lock);
 	if (ret)
-		unmap_stage2_range(kvm, mem->guest_phys_addr, mem->memory_size);
+		unmap_stage2_range(kvm, mem->guest_phys_addr,
+				   mem->memory_size);
 	else
-		stage2_flush_memslot(kvm, memslot);
+		stage2_flush_memslot(&kvm->arch.mmu, memslot);
 	spin_unlock(&kvm->mmu_lock);
 out:
 	up_read(&current->mm->mmap_sem);
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 04/31] KVM: arm/arm64: Abstract stage-2 MMU state into a separate structure
@ 2017-10-03  3:10   ` Jintack Lim
  0 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:10 UTC (permalink / raw)
  To: linux-arm-kernel

From: Christoffer Dall <christoffer.dall@linaro.org>

Abstract stage-2 MMU state into a separate structure and change all
callers referring to page tables, VMIDs, and the VTTBR to use this new
indirection.

This is about to become very handy when using shadow stage-2 page
tables.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---
 arch/arm/include/asm/kvm_asm.h    |   7 +-
 arch/arm/include/asm/kvm_host.h   |  26 +++++---
 arch/arm/kvm/hyp/switch.c         |   5 +-
 arch/arm/kvm/hyp/tlb.c            |  18 ++---
 arch/arm64/include/asm/kvm_asm.h  |   7 +-
 arch/arm64/include/asm/kvm_host.h |  10 ++-
 arch/arm64/kvm/hyp/switch.c       |   5 +-
 arch/arm64/kvm/hyp/tlb.c          |  38 +++++------
 virt/kvm/arm/arm.c                |  34 +++++-----
 virt/kvm/arm/mmu.c                | 137 +++++++++++++++++++++-----------------
 10 files changed, 163 insertions(+), 124 deletions(-)

diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
index 14d68a4..71b7255 100644
--- a/arch/arm/include/asm/kvm_asm.h
+++ b/arch/arm/include/asm/kvm_asm.h
@@ -57,6 +57,7 @@
 #ifndef __ASSEMBLY__
 struct kvm;
 struct kvm_vcpu;
+struct kvm_s2_mmu;
 
 extern char __kvm_hyp_init[];
 extern char __kvm_hyp_init_end[];
@@ -64,9 +65,9 @@
 extern char __kvm_hyp_vector[];
 
 extern void __kvm_flush_vm_context(void);
-extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
-extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
-extern void __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu);
+extern void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa);
+extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu);
+extern void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu);
 
 extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
 
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 7e9e6c8..78d826e 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -53,9 +53,21 @@
 int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
 void kvm_reset_coprocs(struct kvm_vcpu *vcpu);
 
-struct kvm_arch {
-	/* VTTBR value associated with below pgd and vmid */
+struct kvm_s2_mmu {
+	/* The VMID generation used for the virt. memory system */
+	u64    vmid_gen;
+	u32    vmid;
+
+	/* Stage-2 page table */
+	pgd_t *pgd;
+
+	/* VTTBR value associated with above pgd and vmid */
 	u64    vttbr;
+};
+
+struct kvm_arch {
+	/* Stage 2 paging state for the VM */
+	struct kvm_s2_mmu mmu;
 
 	/* The last vcpu id that ran on each physical CPU */
 	int __percpu *last_vcpu_ran;
@@ -65,13 +77,6 @@ struct kvm_arch {
 	 * here.
 	 */
 
-	/* The VMID generation used for the virt. memory system */
-	u64    vmid_gen;
-	u32    vmid;
-
-	/* Stage-2 page table */
-	pgd_t *pgd;
-
 	/* Interrupt controller */
 	struct vgic_dist	vgic;
 	int max_vcpus;
@@ -185,6 +190,9 @@ struct kvm_vcpu_arch {
 
 	/* Detect first run of a vcpu */
 	bool has_run_once;
+
+	/* Stage 2 paging state used by the hardware on next switch */
+	struct kvm_s2_mmu *hw_mmu;
 };
 
 struct kvm_vm_stat {
diff --git a/arch/arm/kvm/hyp/switch.c b/arch/arm/kvm/hyp/switch.c
index ebd2dd4..4814671 100644
--- a/arch/arm/kvm/hyp/switch.c
+++ b/arch/arm/kvm/hyp/switch.c
@@ -75,8 +75,9 @@ static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu)
 
 static void __hyp_text __activate_vm(struct kvm_vcpu *vcpu)
 {
-	struct kvm *kvm = kern_hyp_va(vcpu->kvm);
-	write_sysreg(kvm->arch.vttbr, VTTBR);
+	struct kvm_s2_mmu *mmu = kern_hyp_va(vcpu->arch.hw_mmu);
+
+	write_sysreg(mmu->vttbr, VTTBR);
 	write_sysreg(vcpu->arch.midr, VPIDR);
 }
 
diff --git a/arch/arm/kvm/hyp/tlb.c b/arch/arm/kvm/hyp/tlb.c
index 6d810af..56f0a49 100644
--- a/arch/arm/kvm/hyp/tlb.c
+++ b/arch/arm/kvm/hyp/tlb.c
@@ -34,13 +34,13 @@
  * As v7 does not support flushing per IPA, just nuke the whole TLB
  * instead, ignoring the ipa value.
  */
-void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm)
+void __hyp_text __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
 {
 	dsb(ishst);
 
 	/* Switch to requested VMID */
-	kvm = kern_hyp_va(kvm);
-	write_sysreg(kvm->arch.vttbr, VTTBR);
+	mmu = kern_hyp_va(mmu);
+	write_sysreg(mmu->vttbr, VTTBR);
 	isb();
 
 	write_sysreg(0, TLBIALLIS);
@@ -50,17 +50,17 @@ void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm)
 	write_sysreg(0, VTTBR);
 }
 
-void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
+void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu,
+					 phys_addr_t ipa)
 {
-	__kvm_tlb_flush_vmid(kvm);
+	__kvm_tlb_flush_vmid(mmu);
 }
 
-void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu)
+void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu)
 {
-	struct kvm *kvm = kern_hyp_va(kern_hyp_va(vcpu)->kvm);
-
 	/* Switch to requested VMID */
-	write_sysreg(kvm->arch.vttbr, VTTBR);
+	mmu = kern_hyp_va(mmu);
+	write_sysreg(mmu->vttbr, VTTBR);
 	isb();
 
 	write_sysreg(0, TLBIALL);
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 26a64d0..ff6244f 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -44,6 +44,7 @@
 #ifndef __ASSEMBLY__
 struct kvm;
 struct kvm_vcpu;
+struct kvm_s2_mmu;
 
 extern char __kvm_hyp_init[];
 extern char __kvm_hyp_init_end[];
@@ -51,9 +52,9 @@
 extern char __kvm_hyp_vector[];
 
 extern void __kvm_flush_vm_context(void);
-extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
-extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
-extern void __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu);
+extern void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa);
+extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu);
+extern void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu);
 
 extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
 
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 373235c..e7e9f70 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -50,7 +50,7 @@
 int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext);
 void __extended_idmap_trampoline(phys_addr_t boot_pgd, phys_addr_t idmap_start);
 
-struct kvm_arch {
+struct kvm_s2_mmu {
 	/* The VMID generation used for the virt. memory system */
 	u64    vmid_gen;
 	u32    vmid;
@@ -61,6 +61,11 @@ struct kvm_arch {
 
 	/* VTTBR value associated with above pgd and vmid */
 	u64    vttbr;
+};
+
+struct kvm_arch {
+	/* Stage 2 paging state for the VM */
+	struct kvm_s2_mmu mmu;
 
 	/* The last vcpu id that ran on each physical CPU */
 	int __percpu *last_vcpu_ran;
@@ -329,6 +334,9 @@ struct kvm_vcpu_arch {
 
 	/* Detect first run of a vcpu */
 	bool has_run_once;
+
+	/* Stage 2 paging state used by the hardware on next switch */
+	struct kvm_s2_mmu *hw_mmu;
 };
 
 #define vcpu_gp_regs(v)		(&(v)->arch.ctxt.gp_regs)
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 2a64a5c..8b1b3e9 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -181,8 +181,9 @@ static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu)
 
 static void __hyp_text __activate_vm(struct kvm_vcpu *vcpu)
 {
-	struct kvm *kvm = kern_hyp_va(vcpu->kvm);
-	write_sysreg(kvm->arch.vttbr, vttbr_el2);
+	struct kvm_s2_mmu *mmu = kern_hyp_va(vcpu->arch.hw_mmu);
+
+	write_sysreg(mmu->vttbr, vttbr_el2);
 }
 
 static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c
index 73464a9..0897678 100644
--- a/arch/arm64/kvm/hyp/tlb.c
+++ b/arch/arm64/kvm/hyp/tlb.c
@@ -18,7 +18,7 @@
 #include <asm/kvm_hyp.h>
 #include <asm/tlbflush.h>
 
-static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm)
+static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm_s2_mmu *mmu)
 {
 	u64 val;
 
@@ -29,16 +29,16 @@ static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm)
 	 * bits. Changing E2H is impossible (goodbye TTBR1_EL2), so
 	 * let's flip TGE before executing the TLB operation.
 	 */
-	write_sysreg(kvm->arch.vttbr, vttbr_el2);
+	write_sysreg(mmu->vttbr, vttbr_el2);
 	val = read_sysreg(hcr_el2);
 	val &= ~HCR_TGE;
 	write_sysreg(val, hcr_el2);
 	isb();
 }
 
-static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm *kvm)
+static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm_s2_mmu *mmu)
 {
-	write_sysreg(kvm->arch.vttbr, vttbr_el2);
+	write_sysreg(mmu->vttbr, vttbr_el2);
 	isb();
 }
 
@@ -47,7 +47,7 @@ static hyp_alternate_select(__tlb_switch_to_guest,
 			    __tlb_switch_to_guest_vhe,
 			    ARM64_HAS_VIRT_HOST_EXTN);
 
-static void __hyp_text __tlb_switch_to_host_vhe(struct kvm *kvm)
+static void __hyp_text __tlb_switch_to_host_vhe(struct kvm_s2_mmu *mmu)
 {
 	/*
 	 * We're done with the TLB operation, let's restore the host's
@@ -57,7 +57,7 @@ static void __hyp_text __tlb_switch_to_host_vhe(struct kvm *kvm)
 	write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
 }
 
-static void __hyp_text __tlb_switch_to_host_nvhe(struct kvm *kvm)
+static void __hyp_text __tlb_switch_to_host_nvhe(struct kvm_s2_mmu *mmu)
 {
 	write_sysreg(0, vttbr_el2);
 }
@@ -67,13 +67,14 @@ static hyp_alternate_select(__tlb_switch_to_host,
 			    __tlb_switch_to_host_vhe,
 			    ARM64_HAS_VIRT_HOST_EXTN);
 
-void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
+void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu,
+					 phys_addr_t ipa)
 {
 	dsb(ishst);
 
 	/* Switch to requested VMID */
-	kvm = kern_hyp_va(kvm);
-	__tlb_switch_to_guest()(kvm);
+	mmu = kern_hyp_va(mmu);
+	__tlb_switch_to_guest()(mmu);
 
 	/*
 	 * We could do so much better if we had the VA as well.
@@ -116,36 +117,35 @@ void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
 	if (!has_vhe() && icache_is_vpipt())
 		__flush_icache_all();
 
-	__tlb_switch_to_host()(kvm);
+	__tlb_switch_to_host()(mmu);
 }
 
-void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm)
+void __hyp_text __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
 {
 	dsb(ishst);
 
 	/* Switch to requested VMID */
-	kvm = kern_hyp_va(kvm);
-	__tlb_switch_to_guest()(kvm);
+	mmu = kern_hyp_va(mmu);
+	__tlb_switch_to_guest()(mmu);
 
 	__tlbi(vmalls12e1is);
 	dsb(ish);
 	isb();
 
-	__tlb_switch_to_host()(kvm);
+	__tlb_switch_to_host()(mmu);
 }
 
-void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu)
+void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu)
 {
-	struct kvm *kvm = kern_hyp_va(kern_hyp_va(vcpu)->kvm);
-
 	/* Switch to requested VMID */
-	__tlb_switch_to_guest()(kvm);
+	mmu = kern_hyp_va(mmu);
+	__tlb_switch_to_guest()(mmu);
 
 	__tlbi(vmalle1);
 	dsb(nsh);
 	isb();
 
-	__tlb_switch_to_host()(kvm);
+	__tlb_switch_to_host()(mmu);
 }
 
 void __hyp_text __kvm_flush_vm_context(void)
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 0ff2997..bee27bb 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -138,7 +138,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	kvm_vgic_early_init(kvm);
 
 	/* Mark the initial VMID generation invalid */
-	kvm->arch.vmid_gen = 0;
+	kvm->arch.mmu.vmid_gen = 0;
 
 	/* The maximum number of VCPUs is limited by the host's GIC model */
 	kvm->arch.max_vcpus = vgic_present ?
@@ -334,6 +334,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 
 	kvm_arm_reset_debug_ptr(vcpu);
 
+	vcpu->arch.hw_mmu = &vcpu->kvm->arch.mmu;
+
 	return kvm_vgic_vcpu_init(vcpu);
 }
 
@@ -348,7 +350,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 	 * over-invalidation doesn't affect correctness.
 	 */
 	if (*last_ran != vcpu->vcpu_id) {
-		kvm_call_hyp(__kvm_tlb_flush_local_vmid, vcpu);
+		kvm_call_hyp(__kvm_tlb_flush_local_vmid, &vcpu->kvm->arch.mmu);
 		*last_ran = vcpu->vcpu_id;
 	}
 
@@ -442,25 +444,26 @@ void force_vm_exit(const cpumask_t *mask)
  * VMID for the new generation, we must flush necessary caches and TLBs on all
  * CPUs.
  */
-static bool need_new_vmid_gen(struct kvm *kvm)
+static bool need_new_vmid_gen(struct kvm_s2_mmu *mmu)
 {
-	return unlikely(kvm->arch.vmid_gen != atomic64_read(&kvm_vmid_gen));
+	return unlikely(mmu->vmid_gen != atomic64_read(&kvm_vmid_gen));
 }
 
 /**
  * update_vttbr - Update the VTTBR with a valid VMID before the guest runs
- * @kvm	The guest that we are about to run
+ * @kvm: The guest that we are about to run
+ * @mmu: The stage-2 translation context to update
  *
  * Called from kvm_arch_vcpu_ioctl_run before entering the guest to ensure the
  * VM has a valid VMID, otherwise assigns a new one and flushes corresponding
  * caches and TLBs.
  */
-static void update_vttbr(struct kvm *kvm)
+static void update_vttbr(struct kvm *kvm, struct kvm_s2_mmu *mmu)
 {
 	phys_addr_t pgd_phys;
 	u64 vmid;
 
-	if (!need_new_vmid_gen(kvm))
+	if (!need_new_vmid_gen(mmu))
 		return;
 
 	spin_lock(&kvm_vmid_lock);
@@ -470,7 +473,7 @@ static void update_vttbr(struct kvm *kvm)
 	 * already allocated a valid vmid for this vm, then this vcpu should
 	 * use the same vmid.
 	 */
-	if (!need_new_vmid_gen(kvm)) {
+	if (!need_new_vmid_gen(mmu)) {
 		spin_unlock(&kvm_vmid_lock);
 		return;
 	}
@@ -494,16 +497,17 @@ static void update_vttbr(struct kvm *kvm)
 		kvm_call_hyp(__kvm_flush_vm_context);
 	}
 
-	kvm->arch.vmid_gen = atomic64_read(&kvm_vmid_gen);
-	kvm->arch.vmid = kvm_next_vmid;
+	mmu->vmid_gen = atomic64_read(&kvm_vmid_gen);
+	mmu->vmid = kvm_next_vmid;
 	kvm_next_vmid++;
 	kvm_next_vmid &= (1 << kvm_vmid_bits) - 1;
 
 	/* update vttbr to be used with the new vmid */
-	pgd_phys = virt_to_phys(kvm->arch.pgd);
+	pgd_phys = virt_to_phys(mmu->pgd);
 	BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK);
-	vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK(kvm_vmid_bits);
-	kvm->arch.vttbr = pgd_phys | vmid;
+	vmid = ((u64)(mmu->vmid) << VTTBR_VMID_SHIFT) &
+	       VTTBR_VMID_MASK(kvm_vmid_bits);
+	mmu->vttbr = pgd_phys | vmid;
 
 	spin_unlock(&kvm_vmid_lock);
 }
@@ -638,7 +642,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 		 */
 		cond_resched();
 
-		update_vttbr(vcpu->kvm);
+		update_vttbr(vcpu->kvm, vcpu->arch.hw_mmu);
 
 		check_vcpu_requests(vcpu);
 
@@ -677,7 +681,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 		 */
 		smp_store_mb(vcpu->mode, IN_GUEST_MODE);
 
-		if (ret <= 0 || need_new_vmid_gen(vcpu->kvm) ||
+		if (ret <= 0 || need_new_vmid_gen(vcpu->arch.hw_mmu) ||
 		    kvm_request_pending(vcpu)) {
 			vcpu->mode = OUTSIDE_GUEST_MODE;
 			local_irq_enable();
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index 0a5f5ca..d8ea1f9 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -64,9 +64,9 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
 	kvm_call_hyp(__kvm_tlb_flush_vmid, kvm);
 }
 
-static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
+static void kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa)
 {
-	kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa);
+	kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ipa);
 }
 
 /*
@@ -103,13 +103,14 @@ static bool kvm_is_device_pfn(unsigned long pfn)
  * Function clears a PMD entry, flushes addr 1st and 2nd stage TLBs. Marks all
  * pages in the range dirty.
  */
-static void stage2_dissolve_pmd(struct kvm *kvm, phys_addr_t addr, pmd_t *pmd)
+static void stage2_dissolve_pmd(struct kvm_s2_mmu *mmu, phys_addr_t addr,
+				pmd_t *pmd)
 {
 	if (!pmd_thp_or_huge(*pmd))
 		return;
 
 	pmd_clear(pmd);
-	kvm_tlb_flush_vmid_ipa(kvm, addr);
+	kvm_tlb_flush_vmid_ipa(mmu, addr);
 	put_page(virt_to_page(pmd));
 }
 
@@ -145,31 +146,34 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc)
 	return p;
 }
 
-static void clear_stage2_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr)
+static void clear_stage2_pgd_entry(struct kvm_s2_mmu *mmu,
+				   pgd_t *pgd, phys_addr_t addr)
 {
 	pud_t *pud_table __maybe_unused = stage2_pud_offset(pgd, 0UL);
 	stage2_pgd_clear(pgd);
-	kvm_tlb_flush_vmid_ipa(kvm, addr);
+	kvm_tlb_flush_vmid_ipa(mmu, addr);
 	stage2_pud_free(pud_table);
 	put_page(virt_to_page(pgd));
 }
 
-static void clear_stage2_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr)
+static void clear_stage2_pud_entry(struct kvm_s2_mmu *mmu,
+				   pud_t *pud, phys_addr_t addr)
 {
 	pmd_t *pmd_table __maybe_unused = stage2_pmd_offset(pud, 0);
 	VM_BUG_ON(stage2_pud_huge(*pud));
 	stage2_pud_clear(pud);
-	kvm_tlb_flush_vmid_ipa(kvm, addr);
+	kvm_tlb_flush_vmid_ipa(mmu, addr);
 	stage2_pmd_free(pmd_table);
 	put_page(virt_to_page(pud));
 }
 
-static void clear_stage2_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr)
+static void clear_stage2_pmd_entry(struct kvm_s2_mmu *mmu,
+				   pmd_t *pmd, phys_addr_t addr)
 {
 	pte_t *pte_table = pte_offset_kernel(pmd, 0);
 	VM_BUG_ON(pmd_thp_or_huge(*pmd));
 	pmd_clear(pmd);
-	kvm_tlb_flush_vmid_ipa(kvm, addr);
+	kvm_tlb_flush_vmid_ipa(mmu, addr);
 	pte_free_kernel(NULL, pte_table);
 	put_page(virt_to_page(pmd));
 }
@@ -194,7 +198,7 @@ static void clear_stage2_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr
  * the corresponding TLBs, we call kvm_flush_dcache_p*() to make sure
  * the IO subsystem will never hit in the cache.
  */
-static void unmap_stage2_ptes(struct kvm *kvm, pmd_t *pmd,
+static void unmap_stage2_ptes(struct kvm_s2_mmu *mmu, pmd_t *pmd,
 		       phys_addr_t addr, phys_addr_t end)
 {
 	phys_addr_t start_addr = addr;
@@ -206,7 +210,7 @@ static void unmap_stage2_ptes(struct kvm *kvm, pmd_t *pmd,
 			pte_t old_pte = *pte;
 
 			kvm_set_pte(pte, __pte(0));
-			kvm_tlb_flush_vmid_ipa(kvm, addr);
+			kvm_tlb_flush_vmid_ipa(mmu, addr);
 
 			/* No need to invalidate the cache for device mappings */
 			if (!kvm_is_device_pfn(pte_pfn(old_pte)))
@@ -217,10 +221,10 @@ static void unmap_stage2_ptes(struct kvm *kvm, pmd_t *pmd,
 	} while (pte++, addr += PAGE_SIZE, addr != end);
 
 	if (stage2_pte_table_empty(start_pte))
-		clear_stage2_pmd_entry(kvm, pmd, start_addr);
+		clear_stage2_pmd_entry(mmu, pmd, start_addr);
 }
 
-static void unmap_stage2_pmds(struct kvm *kvm, pud_t *pud,
+static void unmap_stage2_pmds(struct kvm_s2_mmu *mmu, pud_t *pud,
 		       phys_addr_t addr, phys_addr_t end)
 {
 	phys_addr_t next, start_addr = addr;
@@ -234,22 +238,22 @@ static void unmap_stage2_pmds(struct kvm *kvm, pud_t *pud,
 				pmd_t old_pmd = *pmd;
 
 				pmd_clear(pmd);
-				kvm_tlb_flush_vmid_ipa(kvm, addr);
+				kvm_tlb_flush_vmid_ipa(mmu, addr);
 
 				kvm_flush_dcache_pmd(old_pmd);
 
 				put_page(virt_to_page(pmd));
 			} else {
-				unmap_stage2_ptes(kvm, pmd, addr, next);
+				unmap_stage2_ptes(mmu, pmd, addr, next);
 			}
 		}
 	} while (pmd++, addr = next, addr != end);
 
 	if (stage2_pmd_table_empty(start_pmd))
-		clear_stage2_pud_entry(kvm, pud, start_addr);
+		clear_stage2_pud_entry(mmu, pud, start_addr);
 }
 
-static void unmap_stage2_puds(struct kvm *kvm, pgd_t *pgd,
+static void unmap_stage2_puds(struct kvm_s2_mmu *mmu, pgd_t *pgd,
 		       phys_addr_t addr, phys_addr_t end)
 {
 	phys_addr_t next, start_addr = addr;
@@ -263,17 +267,17 @@ static void unmap_stage2_puds(struct kvm *kvm, pgd_t *pgd,
 				pud_t old_pud = *pud;
 
 				stage2_pud_clear(pud);
-				kvm_tlb_flush_vmid_ipa(kvm, addr);
+				kvm_tlb_flush_vmid_ipa(mmu, addr);
 				kvm_flush_dcache_pud(old_pud);
 				put_page(virt_to_page(pud));
 			} else {
-				unmap_stage2_pmds(kvm, pud, addr, next);
+				unmap_stage2_pmds(mmu, pud, addr, next);
 			}
 		}
 	} while (pud++, addr = next, addr != end);
 
 	if (stage2_pud_table_empty(start_pud))
-		clear_stage2_pgd_entry(kvm, pgd, start_addr);
+		clear_stage2_pgd_entry(mmu, pgd, start_addr);
 }
 
 /**
@@ -292,20 +296,21 @@ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
 	pgd_t *pgd;
 	phys_addr_t addr = start, end = start + size;
 	phys_addr_t next;
+	struct kvm_s2_mmu *mmu = &kvm->arch.mmu;
 
 	assert_spin_locked(&kvm->mmu_lock);
-	pgd = kvm->arch.pgd + stage2_pgd_index(addr);
+	pgd = mmu->pgd + stage2_pgd_index(addr);
 	do {
 		/*
 		 * Make sure the page table is still active, as another thread
 		 * could have possibly freed the page table, while we released
 		 * the lock.
 		 */
-		if (!READ_ONCE(kvm->arch.pgd))
+		if (!READ_ONCE(mmu->pgd))
 			break;
 		next = stage2_pgd_addr_end(addr, end);
 		if (!stage2_pgd_none(*pgd))
-			unmap_stage2_puds(kvm, pgd, addr, next);
+			unmap_stage2_puds(mmu, pgd, addr, next);
 		/*
 		 * If the range is too large, release the kvm->mmu_lock
 		 * to prevent starvation and lockup detector warnings.
@@ -360,7 +365,7 @@ static void stage2_flush_puds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end)
 	} while (pud++, addr = next, addr != end);
 }
 
-static void stage2_flush_memslot(struct kvm *kvm,
+static void stage2_flush_memslot(struct kvm_s2_mmu *mmu,
 				 struct kvm_memory_slot *memslot)
 {
 	phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT;
@@ -368,7 +373,7 @@ static void stage2_flush_memslot(struct kvm *kvm,
 	phys_addr_t next;
 	pgd_t *pgd;
 
-	pgd = kvm->arch.pgd + stage2_pgd_index(addr);
+	pgd = mmu->pgd + stage2_pgd_index(addr);
 	do {
 		next = stage2_pgd_addr_end(addr, end);
 		stage2_flush_puds(pgd, addr, next);
@@ -393,7 +398,7 @@ static void stage2_flush_vm(struct kvm *kvm)
 
 	slots = kvm_memslots(kvm);
 	kvm_for_each_memslot(memslot, slots)
-		stage2_flush_memslot(kvm, memslot);
+		stage2_flush_memslot(&kvm->arch.mmu, memslot);
 
 	spin_unlock(&kvm->mmu_lock);
 	srcu_read_unlock(&kvm->srcu, idx);
@@ -745,8 +750,9 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
 int kvm_alloc_stage2_pgd(struct kvm *kvm)
 {
 	pgd_t *pgd;
+	struct kvm_s2_mmu *mmu = &kvm->arch.mmu;
 
-	if (kvm->arch.pgd != NULL) {
+	if (mmu->pgd != NULL) {
 		kvm_err("kvm_arch already initialized?\n");
 		return -EINVAL;
 	}
@@ -756,7 +762,8 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm)
 	if (!pgd)
 		return -ENOMEM;
 
-	kvm->arch.pgd = pgd;
+	mmu->pgd = pgd;
+
 	return 0;
 }
 
@@ -831,19 +838,20 @@ void stage2_unmap_vm(struct kvm *kvm)
  * kvm_free_stage2_pgd - free all stage-2 tables
  * @kvm:	The KVM struct pointer for the VM.
  *
- * Walks the level-1 page table pointed to by kvm->arch.pgd and frees all
+ * Walks the level-1 page table pointed to by kvm->arch.mmu.pgd and frees all
  * underlying level-2 and level-3 tables before freeing the actual level-1 table
  * and setting the struct pointer to NULL.
  */
 void kvm_free_stage2_pgd(struct kvm *kvm)
 {
 	void *pgd = NULL;
+	struct kvm_s2_mmu *mmu = &kvm->arch.mmu;
 
 	spin_lock(&kvm->mmu_lock);
-	if (kvm->arch.pgd) {
+	if (mmu->pgd) {
 		unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE);
-		pgd = READ_ONCE(kvm->arch.pgd);
-		kvm->arch.pgd = NULL;
+		pgd = READ_ONCE(mmu->pgd);
+		mmu->pgd = NULL;
 	}
 	spin_unlock(&kvm->mmu_lock);
 
@@ -852,13 +860,14 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
 		free_pages_exact(pgd, S2_PGD_SIZE);
 }
 
-static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
+static pud_t *stage2_get_pud(struct kvm_s2_mmu *mmu,
+			     struct kvm_mmu_memory_cache *cache,
 			     phys_addr_t addr)
 {
 	pgd_t *pgd;
 	pud_t *pud;
 
-	pgd = kvm->arch.pgd + stage2_pgd_index(addr);
+	pgd = mmu->pgd + stage2_pgd_index(addr);
 	if (WARN_ON(stage2_pgd_none(*pgd))) {
 		if (!cache)
 			return NULL;
@@ -870,13 +879,14 @@ static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache
 	return stage2_pud_offset(pgd, addr);
 }
 
-static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
+static pmd_t *stage2_get_pmd(struct kvm_s2_mmu *mmu,
+			     struct kvm_mmu_memory_cache *cache,
 			     phys_addr_t addr)
 {
 	pud_t *pud;
 	pmd_t *pmd;
 
-	pud = stage2_get_pud(kvm, cache, addr);
+	pud = stage2_get_pud(mmu, cache, addr);
 	if (!pud)
 		return NULL;
 
@@ -891,12 +901,13 @@ static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache
 	return stage2_pmd_offset(pud, addr);
 }
 
-static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
+static int stage2_set_pmd_huge(struct kvm_s2_mmu *mmu,
+			       struct kvm_mmu_memory_cache
 			       *cache, phys_addr_t addr, const pmd_t *new_pmd)
 {
 	pmd_t *pmd, old_pmd;
 
-	pmd = stage2_get_pmd(kvm, cache, addr);
+	pmd = stage2_get_pmd(mmu, cache, addr);
 	VM_BUG_ON(!pmd);
 
 	/*
@@ -913,7 +924,7 @@ static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
 	old_pmd = *pmd;
 	if (pmd_present(old_pmd)) {
 		pmd_clear(pmd);
-		kvm_tlb_flush_vmid_ipa(kvm, addr);
+		kvm_tlb_flush_vmid_ipa(mmu, addr);
 	} else {
 		get_page(virt_to_page(pmd));
 	}
@@ -922,7 +933,8 @@ static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
 	return 0;
 }
 
-static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
+static int stage2_set_pte(struct kvm_s2_mmu *mmu,
+			  struct kvm_mmu_memory_cache *cache,
 			  phys_addr_t addr, const pte_t *new_pte,
 			  unsigned long flags)
 {
@@ -934,7 +946,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
 	VM_BUG_ON(logging_active && !cache);
 
 	/* Create stage-2 page table mapping - Levels 0 and 1 */
-	pmd = stage2_get_pmd(kvm, cache, addr);
+	pmd = stage2_get_pmd(mmu, cache, addr);
 	if (!pmd) {
 		/*
 		 * Ignore calls from kvm_set_spte_hva for unallocated
@@ -948,7 +960,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
 	 * allocate page.
 	 */
 	if (logging_active)
-		stage2_dissolve_pmd(kvm, addr, pmd);
+		stage2_dissolve_pmd(mmu, addr, pmd);
 
 	/* Create stage-2 page mappings - Level 2 */
 	if (pmd_none(*pmd)) {
@@ -968,7 +980,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
 	old_pte = *pte;
 	if (pte_present(old_pte)) {
 		kvm_set_pte(pte, __pte(0));
-		kvm_tlb_flush_vmid_ipa(kvm, addr);
+		kvm_tlb_flush_vmid_ipa(mmu, addr);
 	} else {
 		get_page(virt_to_page(pte));
 	}
@@ -1028,7 +1040,7 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
 		if (ret)
 			goto out;
 		spin_lock(&kvm->mmu_lock);
-		ret = stage2_set_pte(kvm, &cache, addr, &pte,
+		ret = stage2_set_pte(&kvm->arch.mmu, &cache, addr, &pte,
 						KVM_S2PTE_FLAG_IS_IOMAP);
 		spin_unlock(&kvm->mmu_lock);
 		if (ret)
@@ -1166,12 +1178,13 @@ static void  stage2_wp_puds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end)
  * @addr:	Start address of range
  * @end:	End address of range
  */
-static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
+static void stage2_wp_range(struct kvm *kvm, struct kvm_s2_mmu *mmu,
+			    phys_addr_t addr, phys_addr_t end)
 {
 	pgd_t *pgd;
 	phys_addr_t next;
 
-	pgd = kvm->arch.pgd + stage2_pgd_index(addr);
+	pgd = mmu->pgd + stage2_pgd_index(addr);
 	do {
 		/*
 		 * Release kvm_mmu_lock periodically if the memory region is
@@ -1183,7 +1196,7 @@ static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
 		 * the lock.
 		 */
 		cond_resched_lock(&kvm->mmu_lock);
-		if (!READ_ONCE(kvm->arch.pgd))
+		if (!READ_ONCE(mmu->pgd))
 			break;
 		next = stage2_pgd_addr_end(addr, end);
 		if (stage2_pgd_present(*pgd))
@@ -1212,7 +1225,7 @@ void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot)
 	phys_addr_t end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT;
 
 	spin_lock(&kvm->mmu_lock);
-	stage2_wp_range(kvm, start, end);
+	stage2_wp_range(kvm, &kvm->arch.mmu, start, end);
 	spin_unlock(&kvm->mmu_lock);
 	kvm_flush_remote_tlbs(kvm);
 }
@@ -1236,7 +1249,7 @@ static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
 	phys_addr_t start = (base_gfn +  __ffs(mask)) << PAGE_SHIFT;
 	phys_addr_t end = (base_gfn + __fls(mask) + 1) << PAGE_SHIFT;
 
-	stage2_wp_range(kvm, start, end);
+	stage2_wp_range(kvm, &kvm->arch.mmu, start, end);
 }
 
 /*
@@ -1292,6 +1305,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	pgprot_t mem_type = PAGE_S2;
 	bool logging_active = memslot_is_logging(memslot);
 	unsigned long flags = 0;
+	struct kvm_s2_mmu *mmu = vcpu->arch.hw_mmu;
 
 	write_fault = kvm_is_write_fault(vcpu);
 	if (fault_status == FSC_PERM && !write_fault) {
@@ -1388,7 +1402,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 			kvm_set_pfn_dirty(pfn);
 		}
 		coherent_cache_guest_page(vcpu, pfn, PMD_SIZE);
-		ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd);
+		ret = stage2_set_pmd_huge(mmu, memcache, fault_ipa, &new_pmd);
 	} else {
 		pte_t new_pte = pfn_pte(pfn, mem_type);
 
@@ -1398,7 +1412,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 			mark_page_dirty(kvm, gfn);
 		}
 		coherent_cache_guest_page(vcpu, pfn, PAGE_SIZE);
-		ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, flags);
+		ret = stage2_set_pte(mmu, memcache, fault_ipa, &new_pte, flags);
 	}
 
 out_unlock:
@@ -1426,7 +1440,7 @@ static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
 
 	spin_lock(&vcpu->kvm->mmu_lock);
 
-	pmd = stage2_get_pmd(vcpu->kvm, NULL, fault_ipa);
+	pmd = stage2_get_pmd(vcpu->arch.hw_mmu, NULL, fault_ipa);
 	if (!pmd || pmd_none(*pmd))	/* Nothing there */
 		goto out;
 
@@ -1594,7 +1608,7 @@ int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
 {
 	unsigned long end = hva + PAGE_SIZE;
 
-	if (!kvm->arch.pgd)
+	if (!kvm->arch.mmu.pgd)
 		return 0;
 
 	trace_kvm_unmap_hva(hva);
@@ -1605,7 +1619,7 @@ int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
 int kvm_unmap_hva_range(struct kvm *kvm,
 			unsigned long start, unsigned long end)
 {
-	if (!kvm->arch.pgd)
+	if (!kvm->arch.mmu.pgd)
 		return 0;
 
 	trace_kvm_unmap_hva_range(start, end);
@@ -1625,7 +1639,7 @@ static int kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data
 	 * therefore stage2_set_pte() never needs to clear out a huge PMD
 	 * through this calling path.
 	 */
-	stage2_set_pte(kvm, NULL, gpa, pte, 0);
+	stage2_set_pte(&kvm->arch.mmu, NULL, gpa, pte, 0);
 	return 0;
 }
 
@@ -1635,7 +1649,7 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
 	unsigned long end = hva + PAGE_SIZE;
 	pte_t stage2_pte;
 
-	if (!kvm->arch.pgd)
+	if (!kvm->arch.mmu.pgd)
 		return;
 
 	trace_kvm_set_spte_hva(hva);
@@ -1649,7 +1663,7 @@ static int kvm_age_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
 	pte_t *pte;
 
 	WARN_ON(size != PAGE_SIZE && size != PMD_SIZE);
-	pmd = stage2_get_pmd(kvm, NULL, gpa);
+	pmd = stage2_get_pmd(&kvm->arch.mmu, NULL, gpa);
 	if (!pmd || pmd_none(*pmd))	/* Nothing there */
 		return 0;
 
@@ -1669,7 +1683,7 @@ static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *
 	pte_t *pte;
 
 	WARN_ON(size != PAGE_SIZE && size != PMD_SIZE);
-	pmd = stage2_get_pmd(kvm, NULL, gpa);
+	pmd = stage2_get_pmd(&kvm->arch.mmu, NULL, gpa);
 	if (!pmd || pmd_none(*pmd))	/* Nothing there */
 		return 0;
 
@@ -1898,9 +1912,10 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
 
 	spin_lock(&kvm->mmu_lock);
 	if (ret)
-		unmap_stage2_range(kvm, mem->guest_phys_addr, mem->memory_size);
+		unmap_stage2_range(kvm, mem->guest_phys_addr,
+				   mem->memory_size);
 	else
-		stage2_flush_memslot(kvm, memslot);
+		stage2_flush_memslot(&kvm->arch.mmu, memslot);
 	spin_unlock(&kvm->mmu_lock);
 out:
 	up_read(&current->mm->mmap_sem);
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 05/31] KVM: arm/arm64: Support mmu for the virtual EL2 execution
  2017-10-03  3:10 ` Jintack Lim
@ 2017-10-03  3:10   ` Jintack Lim
  -1 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:10 UTC (permalink / raw)
  To: christoffer.dall, marc.zyngier, kvmarm
  Cc: jintack, pbonzini, rkrcmar, catalin.marinas, will.deacon, linux,
	mark.rutland, linux-arm-kernel, kvm, linux-kernel, Jintack Lim

From: Christoffer Dall <christoffer.dall@linaro.org>

When running a guest hypervisor in virtual EL2, the translation context
has to be separate from the rest of the system, including the guest
EL1/0 translation regime, so we allocate a separate VMID for this mode.

Considering that we have two different vttbr values due to separate
VMIDs, it's racy to keep a vttbr value in a struct (kvm_s2_mmu) and
share it between multiple vcpus. So, remove the shared vttbr field, and
set up per-vcpu hw_vttbr field.

Hypercalls to flush tlb now have vttbr as a parameter instead of mmu,
since mmu structure does not have vttbr any more.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---

Notes:
    v1-->v2:
    Fixed a bug that hw_vttbr was not initialized correctly in kvm_arch_vcpu_init()
    where vmid is not allocated yet. This prevented the guest from booting on 32bit
    arm; hw_vttbr is set on each entry on aarch64, so it was fine.

 arch/arm/include/asm/kvm_asm.h       |  6 ++--
 arch/arm/include/asm/kvm_emulate.h   |  4 +++
 arch/arm/include/asm/kvm_host.h      | 14 +++++---
 arch/arm/include/asm/kvm_mmu.h       | 11 ++++++
 arch/arm/kvm/hyp/switch.c            |  4 +--
 arch/arm/kvm/hyp/tlb.c               | 15 ++++-----
 arch/arm64/include/asm/kvm_asm.h     |  6 ++--
 arch/arm64/include/asm/kvm_emulate.h |  8 +++++
 arch/arm64/include/asm/kvm_host.h    | 14 +++++---
 arch/arm64/include/asm/kvm_mmu.h     | 11 ++++++
 arch/arm64/kvm/hyp/switch.c          |  4 +--
 arch/arm64/kvm/hyp/tlb.c             | 34 +++++++++----------
 virt/kvm/arm/arm.c                   | 65 +++++++++++++++++++++---------------
 virt/kvm/arm/mmu.c                   |  9 +++--
 14 files changed, 128 insertions(+), 77 deletions(-)

diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
index 71b7255..23a79bd 100644
--- a/arch/arm/include/asm/kvm_asm.h
+++ b/arch/arm/include/asm/kvm_asm.h
@@ -65,9 +65,9 @@
 extern char __kvm_hyp_vector[];
 
 extern void __kvm_flush_vm_context(void);
-extern void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa);
-extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu);
-extern void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu);
+extern void __kvm_tlb_flush_vmid_ipa(u64 vttbr, phys_addr_t ipa);
+extern void __kvm_tlb_flush_vmid(u64 vttbr);
+extern void __kvm_tlb_flush_local_vmid(u64 vttbr);
 
 extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
 
diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index 29a4dec..24a3fbf 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -293,4 +293,8 @@ static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu,
 	}
 }
 
+static inline struct kvm_s2_vmid *vcpu_get_active_vmid(struct kvm_vcpu *vcpu)
+{
+	return &vcpu->kvm->arch.mmu.vmid;
+}
 #endif /* __ARM_KVM_EMULATE_H__ */
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 78d826e..33ccdbe 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -53,16 +53,18 @@
 int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
 void kvm_reset_coprocs(struct kvm_vcpu *vcpu);
 
-struct kvm_s2_mmu {
+struct kvm_s2_vmid {
 	/* The VMID generation used for the virt. memory system */
 	u64    vmid_gen;
 	u32    vmid;
+};
+
+struct kvm_s2_mmu {
+	struct kvm_s2_vmid vmid;
+	struct kvm_s2_vmid el2_vmid;
 
 	/* Stage-2 page table */
 	pgd_t *pgd;
-
-	/* VTTBR value associated with above pgd and vmid */
-	u64    vttbr;
 };
 
 struct kvm_arch {
@@ -193,6 +195,9 @@ struct kvm_vcpu_arch {
 
 	/* Stage 2 paging state used by the hardware on next switch */
 	struct kvm_s2_mmu *hw_mmu;
+
+	/* VTTBR value used by the hardware on next switch */
+	u64 hw_vttbr;
 };
 
 struct kvm_vm_stat {
@@ -239,6 +244,7 @@ static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
 {
 }
 
+unsigned int get_kvm_vmid_bits(void);
 struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
 struct kvm_vcpu __percpu **kvm_get_running_vcpus(void);
 void kvm_arm_halt_guest(struct kvm *kvm);
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index fa6f217..86fdc70 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -221,6 +221,17 @@ static inline unsigned int kvm_get_vmid_bits(void)
 	return 8;
 }
 
+static inline u64 kvm_get_vttbr(struct kvm_s2_vmid *vmid,
+				struct kvm_s2_mmu *mmu)
+{
+	u64 vmid_field, baddr;
+
+	baddr = virt_to_phys(mmu->pgd);
+	vmid_field = ((u64)vmid->vmid << VTTBR_VMID_SHIFT) &
+		VTTBR_VMID_MASK(get_kvm_vmid_bits());
+	return baddr | vmid_field;
+}
+
 #endif	/* !__ASSEMBLY__ */
 
 #endif /* __ARM_KVM_MMU_H__ */
diff --git a/arch/arm/kvm/hyp/switch.c b/arch/arm/kvm/hyp/switch.c
index 4814671..4798e39 100644
--- a/arch/arm/kvm/hyp/switch.c
+++ b/arch/arm/kvm/hyp/switch.c
@@ -75,9 +75,7 @@ static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu)
 
 static void __hyp_text __activate_vm(struct kvm_vcpu *vcpu)
 {
-	struct kvm_s2_mmu *mmu = kern_hyp_va(vcpu->arch.hw_mmu);
-
-	write_sysreg(mmu->vttbr, VTTBR);
+	write_sysreg(vcpu->arch.hw_vttbr, VTTBR);
 	write_sysreg(vcpu->arch.midr, VPIDR);
 }
 
diff --git a/arch/arm/kvm/hyp/tlb.c b/arch/arm/kvm/hyp/tlb.c
index 56f0a49..562ad0b 100644
--- a/arch/arm/kvm/hyp/tlb.c
+++ b/arch/arm/kvm/hyp/tlb.c
@@ -34,13 +34,12 @@
  * As v7 does not support flushing per IPA, just nuke the whole TLB
  * instead, ignoring the ipa value.
  */
-void __hyp_text __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
+void __hyp_text __kvm_tlb_flush_vmid(u64 vttbr)
 {
 	dsb(ishst);
 
 	/* Switch to requested VMID */
-	mmu = kern_hyp_va(mmu);
-	write_sysreg(mmu->vttbr, VTTBR);
+	write_sysreg(vttbr, VTTBR);
 	isb();
 
 	write_sysreg(0, TLBIALLIS);
@@ -50,17 +49,15 @@ void __hyp_text __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
 	write_sysreg(0, VTTBR);
 }
 
-void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu,
-					 phys_addr_t ipa)
+void __hyp_text __kvm_tlb_flush_vmid_ipa(u64 vttbr, phys_addr_t ipa)
 {
-	__kvm_tlb_flush_vmid(mmu);
+	__kvm_tlb_flush_vmid(vttbr);
 }
 
-void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu)
+void __hyp_text __kvm_tlb_flush_local_vmid(u64 vttbr)
 {
 	/* Switch to requested VMID */
-	mmu = kern_hyp_va(mmu);
-	write_sysreg(mmu->vttbr, VTTBR);
+	write_sysreg(vttbr, VTTBR);
 	isb();
 
 	write_sysreg(0, TLBIALL);
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index ff6244f..e492749 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -52,9 +52,9 @@
 extern char __kvm_hyp_vector[];
 
 extern void __kvm_flush_vm_context(void);
-extern void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa);
-extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu);
-extern void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu);
+extern void __kvm_tlb_flush_vmid_ipa(u64 vttbr, phys_addr_t ipa);
+extern void __kvm_tlb_flush_vmid(u64 vttbr);
+extern void __kvm_tlb_flush_local_vmid(u64 vttbr);
 
 extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
 
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 4776bfc..71a3a04 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -385,4 +385,12 @@ static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu,
 	return data;		/* Leave LE untouched */
 }
 
+static inline struct kvm_s2_vmid *vcpu_get_active_vmid(struct kvm_vcpu *vcpu)
+{
+	if (unlikely(is_hyp_ctxt(vcpu)))
+		return &vcpu->kvm->arch.mmu.el2_vmid;
+
+	return &vcpu->kvm->arch.mmu.vmid;
+}
+
 #endif /* __ARM64_KVM_EMULATE_H__ */
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index e7e9f70..a7edf0e 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -50,17 +50,19 @@
 int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext);
 void __extended_idmap_trampoline(phys_addr_t boot_pgd, phys_addr_t idmap_start);
 
-struct kvm_s2_mmu {
+struct kvm_s2_vmid {
 	/* The VMID generation used for the virt. memory system */
 	u64    vmid_gen;
 	u32    vmid;
+};
+
+struct kvm_s2_mmu {
+	struct kvm_s2_vmid vmid;
+	struct kvm_s2_vmid el2_vmid;
 
 	/* 1-level 2nd stage table and lock */
 	spinlock_t pgd_lock;
 	pgd_t *pgd;
-
-	/* VTTBR value associated with above pgd and vmid */
-	u64    vttbr;
 };
 
 struct kvm_arch {
@@ -337,6 +339,9 @@ struct kvm_vcpu_arch {
 
 	/* Stage 2 paging state used by the hardware on next switch */
 	struct kvm_s2_mmu *hw_mmu;
+
+	/* VTTBR value used by the hardware on next switch */
+	u64 hw_vttbr;
 };
 
 #define vcpu_gp_regs(v)		(&(v)->arch.ctxt.gp_regs)
@@ -394,6 +399,7 @@ static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
 {
 }
 
+unsigned int get_kvm_vmid_bits(void);
 struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
 struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void);
 void kvm_arm_halt_guest(struct kvm *kvm);
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index a89cc22..21c0299 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -312,5 +312,16 @@ static inline unsigned int kvm_get_vmid_bits(void)
 	return (cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR1_VMIDBITS_SHIFT) == 2) ? 16 : 8;
 }
 
+static inline u64 kvm_get_vttbr(struct kvm_s2_vmid *vmid,
+				struct kvm_s2_mmu *mmu)
+{
+	u64 vmid_field, baddr;
+
+	baddr = virt_to_phys(mmu->pgd);
+	vmid_field = ((u64)vmid->vmid << VTTBR_VMID_SHIFT) &
+		VTTBR_VMID_MASK(get_kvm_vmid_bits());
+	return baddr | vmid_field;
+}
+
 #endif /* __ASSEMBLY__ */
 #endif /* __ARM64_KVM_MMU_H__ */
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 8b1b3e9..3626e76 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -181,9 +181,7 @@ static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu)
 
 static void __hyp_text __activate_vm(struct kvm_vcpu *vcpu)
 {
-	struct kvm_s2_mmu *mmu = kern_hyp_va(vcpu->arch.hw_mmu);
-
-	write_sysreg(mmu->vttbr, vttbr_el2);
+	write_sysreg(vcpu->arch.hw_vttbr, vttbr_el2);
 }
 
 static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c
index 0897678..680b960 100644
--- a/arch/arm64/kvm/hyp/tlb.c
+++ b/arch/arm64/kvm/hyp/tlb.c
@@ -18,7 +18,7 @@
 #include <asm/kvm_hyp.h>
 #include <asm/tlbflush.h>
 
-static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm_s2_mmu *mmu)
+static void __hyp_text __tlb_switch_to_guest_vhe(u64 vttbr)
 {
 	u64 val;
 
@@ -29,16 +29,16 @@ static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm_s2_mmu *mmu)
 	 * bits. Changing E2H is impossible (goodbye TTBR1_EL2), so
 	 * let's flip TGE before executing the TLB operation.
 	 */
-	write_sysreg(mmu->vttbr, vttbr_el2);
+	write_sysreg(vttbr, vttbr_el2);
 	val = read_sysreg(hcr_el2);
 	val &= ~HCR_TGE;
 	write_sysreg(val, hcr_el2);
 	isb();
 }
 
-static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm_s2_mmu *mmu)
+static void __hyp_text __tlb_switch_to_guest_nvhe(u64 vttbr)
 {
-	write_sysreg(mmu->vttbr, vttbr_el2);
+	write_sysreg(vttbr, vttbr_el2);
 	isb();
 }
 
@@ -47,7 +47,7 @@ static hyp_alternate_select(__tlb_switch_to_guest,
 			    __tlb_switch_to_guest_vhe,
 			    ARM64_HAS_VIRT_HOST_EXTN);
 
-static void __hyp_text __tlb_switch_to_host_vhe(struct kvm_s2_mmu *mmu)
+static void __hyp_text __tlb_switch_to_host_vhe(void)
 {
 	/*
 	 * We're done with the TLB operation, let's restore the host's
@@ -57,7 +57,7 @@ static void __hyp_text __tlb_switch_to_host_vhe(struct kvm_s2_mmu *mmu)
 	write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
 }
 
-static void __hyp_text __tlb_switch_to_host_nvhe(struct kvm_s2_mmu *mmu)
+static void __hyp_text __tlb_switch_to_host_nvhe(void)
 {
 	write_sysreg(0, vttbr_el2);
 }
@@ -67,14 +67,12 @@ static hyp_alternate_select(__tlb_switch_to_host,
 			    __tlb_switch_to_host_vhe,
 			    ARM64_HAS_VIRT_HOST_EXTN);
 
-void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu,
-					 phys_addr_t ipa)
+void __hyp_text __kvm_tlb_flush_vmid_ipa(u64 vttbr, phys_addr_t ipa)
 {
 	dsb(ishst);
 
 	/* Switch to requested VMID */
-	mmu = kern_hyp_va(mmu);
-	__tlb_switch_to_guest()(mmu);
+	__tlb_switch_to_guest()(vttbr);
 
 	/*
 	 * We could do so much better if we had the VA as well.
@@ -117,35 +115,33 @@ void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu,
 	if (!has_vhe() && icache_is_vpipt())
 		__flush_icache_all();
 
-	__tlb_switch_to_host()(mmu);
+	__tlb_switch_to_host()();
 }
 
-void __hyp_text __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
+void __hyp_text __kvm_tlb_flush_vmid(u64 vttbr)
 {
 	dsb(ishst);
 
 	/* Switch to requested VMID */
-	mmu = kern_hyp_va(mmu);
-	__tlb_switch_to_guest()(mmu);
+	__tlb_switch_to_guest()(vttbr);
 
 	__tlbi(vmalls12e1is);
 	dsb(ish);
 	isb();
 
-	__tlb_switch_to_host()(mmu);
+	__tlb_switch_to_host()();
 }
 
-void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu)
+void __hyp_text __kvm_tlb_flush_local_vmid(u64 vttbr)
 {
 	/* Switch to requested VMID */
-	mmu = kern_hyp_va(mmu);
-	__tlb_switch_to_guest()(mmu);
+	__tlb_switch_to_guest()(vttbr);
 
 	__tlbi(vmalle1);
 	dsb(nsh);
 	isb();
 
-	__tlb_switch_to_host()(mmu);
+	__tlb_switch_to_host()();
 }
 
 void __hyp_text __kvm_flush_vm_context(void)
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index bee27bb..41e0654 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -75,6 +75,11 @@ static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu)
 	__this_cpu_write(kvm_arm_running_vcpu, vcpu);
 }
 
+unsigned int get_kvm_vmid_bits(void)
+{
+	return kvm_vmid_bits;
+}
+
 /**
  * kvm_arm_get_running_vcpu - get the vcpu running on the current CPU.
  * Must be called from non-preemptible context
@@ -138,7 +143,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	kvm_vgic_early_init(kvm);
 
 	/* Mark the initial VMID generation invalid */
-	kvm->arch.mmu.vmid_gen = 0;
+	kvm->arch.mmu.vmid.vmid_gen = 0;
+	kvm->arch.mmu.el2_vmid.vmid_gen = 0;
 
 	/* The maximum number of VCPUs is limited by the host's GIC model */
 	kvm->arch.max_vcpus = vgic_present ?
@@ -325,6 +331,8 @@ void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
 
 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 {
+	struct kvm_s2_mmu *mmu = &vcpu->kvm->arch.mmu;
+
 	/* Force users to call KVM_ARM_VCPU_INIT */
 	vcpu->arch.target = -1;
 	bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
@@ -334,7 +342,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 
 	kvm_arm_reset_debug_ptr(vcpu);
 
-	vcpu->arch.hw_mmu = &vcpu->kvm->arch.mmu;
+	vcpu->arch.hw_mmu = mmu;
 
 	return kvm_vgic_vcpu_init(vcpu);
 }
@@ -350,7 +358,10 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 	 * over-invalidation doesn't affect correctness.
 	 */
 	if (*last_ran != vcpu->vcpu_id) {
-		kvm_call_hyp(__kvm_tlb_flush_local_vmid, &vcpu->kvm->arch.mmu);
+		struct kvm_s2_mmu *mmu = &vcpu->kvm->arch.mmu;
+		u64 vttbr = kvm_get_vttbr(&mmu->vmid, mmu);
+
+		kvm_call_hyp(__kvm_tlb_flush_local_vmid, vttbr);
 		*last_ran = vcpu->vcpu_id;
 	}
 
@@ -434,36 +445,38 @@ void force_vm_exit(const cpumask_t *mask)
 
 /**
  * need_new_vmid_gen - check that the VMID is still valid
- * @kvm: The VM's VMID to check
+ * @vmid: The VMID to check
  *
  * return true if there is a new generation of VMIDs being used
  *
- * The hardware supports only 256 values with the value zero reserved for the
- * host, so we check if an assigned value belongs to a previous generation,
- * which which requires us to assign a new value. If we're the first to use a
- * VMID for the new generation, we must flush necessary caches and TLBs on all
- * CPUs.
+ * The hardware supports a limited set of values with the value zero reserved
+ * for the host, so we check if an assigned value belongs to a previous
+ * generation, which which requires us to assign a new value. If we're the
+ * first to use a VMID for the new generation, we must flush necessary caches
+ * and TLBs on all CPUs.
  */
-static bool need_new_vmid_gen(struct kvm_s2_mmu *mmu)
+static bool need_new_vmid_gen(struct kvm_s2_vmid *vmid)
 {
-	return unlikely(mmu->vmid_gen != atomic64_read(&kvm_vmid_gen));
+	return unlikely(vmid->vmid_gen != atomic64_read(&kvm_vmid_gen));
 }
 
 /**
  * update_vttbr - Update the VTTBR with a valid VMID before the guest runs
  * @kvm: The guest that we are about to run
- * @mmu: The stage-2 translation context to update
+ * @vmid: The stage-2 VMID information struct
  *
  * Called from kvm_arch_vcpu_ioctl_run before entering the guest to ensure the
  * VM has a valid VMID, otherwise assigns a new one and flushes corresponding
  * caches and TLBs.
  */
-static void update_vttbr(struct kvm *kvm, struct kvm_s2_mmu *mmu)
+static void update_vttbr(struct kvm *kvm, struct kvm_s2_vmid *vmid)
 {
-	phys_addr_t pgd_phys;
-	u64 vmid;
+	struct kvm_s2_mmu *mmu = &kvm->arch.mmu;
+	struct kvm_vcpu *vcpu;
+	int i = 0;
+	u64 new_vttbr;
 
-	if (!need_new_vmid_gen(mmu))
+	if (!need_new_vmid_gen(vmid))
 		return;
 
 	spin_lock(&kvm_vmid_lock);
@@ -473,7 +486,7 @@ static void update_vttbr(struct kvm *kvm, struct kvm_s2_mmu *mmu)
 	 * already allocated a valid vmid for this vm, then this vcpu should
 	 * use the same vmid.
 	 */
-	if (!need_new_vmid_gen(mmu)) {
+	if (!need_new_vmid_gen(vmid)) {
 		spin_unlock(&kvm_vmid_lock);
 		return;
 	}
@@ -497,17 +510,15 @@ static void update_vttbr(struct kvm *kvm, struct kvm_s2_mmu *mmu)
 		kvm_call_hyp(__kvm_flush_vm_context);
 	}
 
-	mmu->vmid_gen = atomic64_read(&kvm_vmid_gen);
-	mmu->vmid = kvm_next_vmid;
+	vmid->vmid_gen = atomic64_read(&kvm_vmid_gen);
+	vmid->vmid = kvm_next_vmid;
 	kvm_next_vmid++;
 	kvm_next_vmid &= (1 << kvm_vmid_bits) - 1;
 
-	/* update vttbr to be used with the new vmid */
-	pgd_phys = virt_to_phys(mmu->pgd);
-	BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK);
-	vmid = ((u64)(mmu->vmid) << VTTBR_VMID_SHIFT) &
-	       VTTBR_VMID_MASK(kvm_vmid_bits);
-	mmu->vttbr = pgd_phys | vmid;
+	new_vttbr = kvm_get_vttbr(&mmu->vmid, mmu);
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		vcpu->arch.hw_vttbr = new_vttbr;
+	}
 
 	spin_unlock(&kvm_vmid_lock);
 }
@@ -642,7 +653,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 		 */
 		cond_resched();
 
-		update_vttbr(vcpu->kvm, vcpu->arch.hw_mmu);
+		update_vttbr(vcpu->kvm, vcpu_get_active_vmid(vcpu));
 
 		check_vcpu_requests(vcpu);
 
@@ -681,7 +692,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 		 */
 		smp_store_mb(vcpu->mode, IN_GUEST_MODE);
 
-		if (ret <= 0 || need_new_vmid_gen(vcpu->arch.hw_mmu) ||
+		if (ret <= 0 || need_new_vmid_gen(vcpu_get_active_vmid(vcpu)) ||
 		    kvm_request_pending(vcpu)) {
 			vcpu->mode = OUTSIDE_GUEST_MODE;
 			local_irq_enable();
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index d8ea1f9..0edcf23 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -61,12 +61,17 @@ static bool memslot_is_logging(struct kvm_memory_slot *memslot)
  */
 void kvm_flush_remote_tlbs(struct kvm *kvm)
 {
-	kvm_call_hyp(__kvm_tlb_flush_vmid, kvm);
+	struct kvm_s2_mmu *mmu = &kvm->arch.mmu;
+	u64 vttbr = kvm_get_vttbr(&mmu->vmid, mmu);
+
+	kvm_call_hyp(__kvm_tlb_flush_vmid, vttbr);
 }
 
 static void kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa)
 {
-	kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ipa);
+	u64 vttbr = kvm_get_vttbr(&mmu->vmid, mmu);
+
+	kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, vttbr, ipa);
 }
 
 /*
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 05/31] KVM: arm/arm64: Support mmu for the virtual EL2 execution
@ 2017-10-03  3:10   ` Jintack Lim
  0 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:10 UTC (permalink / raw)
  To: linux-arm-kernel

From: Christoffer Dall <christoffer.dall@linaro.org>

When running a guest hypervisor in virtual EL2, the translation context
has to be separate from the rest of the system, including the guest
EL1/0 translation regime, so we allocate a separate VMID for this mode.

Considering that we have two different vttbr values due to separate
VMIDs, it's racy to keep a vttbr value in a struct (kvm_s2_mmu) and
share it between multiple vcpus. So, remove the shared vttbr field, and
set up per-vcpu hw_vttbr field.

Hypercalls to flush tlb now have vttbr as a parameter instead of mmu,
since mmu structure does not have vttbr any more.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---

Notes:
    v1-->v2:
    Fixed a bug that hw_vttbr was not initialized correctly in kvm_arch_vcpu_init()
    where vmid is not allocated yet. This prevented the guest from booting on 32bit
    arm; hw_vttbr is set on each entry on aarch64, so it was fine.

 arch/arm/include/asm/kvm_asm.h       |  6 ++--
 arch/arm/include/asm/kvm_emulate.h   |  4 +++
 arch/arm/include/asm/kvm_host.h      | 14 +++++---
 arch/arm/include/asm/kvm_mmu.h       | 11 ++++++
 arch/arm/kvm/hyp/switch.c            |  4 +--
 arch/arm/kvm/hyp/tlb.c               | 15 ++++-----
 arch/arm64/include/asm/kvm_asm.h     |  6 ++--
 arch/arm64/include/asm/kvm_emulate.h |  8 +++++
 arch/arm64/include/asm/kvm_host.h    | 14 +++++---
 arch/arm64/include/asm/kvm_mmu.h     | 11 ++++++
 arch/arm64/kvm/hyp/switch.c          |  4 +--
 arch/arm64/kvm/hyp/tlb.c             | 34 +++++++++----------
 virt/kvm/arm/arm.c                   | 65 +++++++++++++++++++++---------------
 virt/kvm/arm/mmu.c                   |  9 +++--
 14 files changed, 128 insertions(+), 77 deletions(-)

diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
index 71b7255..23a79bd 100644
--- a/arch/arm/include/asm/kvm_asm.h
+++ b/arch/arm/include/asm/kvm_asm.h
@@ -65,9 +65,9 @@
 extern char __kvm_hyp_vector[];
 
 extern void __kvm_flush_vm_context(void);
-extern void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa);
-extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu);
-extern void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu);
+extern void __kvm_tlb_flush_vmid_ipa(u64 vttbr, phys_addr_t ipa);
+extern void __kvm_tlb_flush_vmid(u64 vttbr);
+extern void __kvm_tlb_flush_local_vmid(u64 vttbr);
 
 extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
 
diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index 29a4dec..24a3fbf 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -293,4 +293,8 @@ static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu,
 	}
 }
 
+static inline struct kvm_s2_vmid *vcpu_get_active_vmid(struct kvm_vcpu *vcpu)
+{
+	return &vcpu->kvm->arch.mmu.vmid;
+}
 #endif /* __ARM_KVM_EMULATE_H__ */
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 78d826e..33ccdbe 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -53,16 +53,18 @@
 int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
 void kvm_reset_coprocs(struct kvm_vcpu *vcpu);
 
-struct kvm_s2_mmu {
+struct kvm_s2_vmid {
 	/* The VMID generation used for the virt. memory system */
 	u64    vmid_gen;
 	u32    vmid;
+};
+
+struct kvm_s2_mmu {
+	struct kvm_s2_vmid vmid;
+	struct kvm_s2_vmid el2_vmid;
 
 	/* Stage-2 page table */
 	pgd_t *pgd;
-
-	/* VTTBR value associated with above pgd and vmid */
-	u64    vttbr;
 };
 
 struct kvm_arch {
@@ -193,6 +195,9 @@ struct kvm_vcpu_arch {
 
 	/* Stage 2 paging state used by the hardware on next switch */
 	struct kvm_s2_mmu *hw_mmu;
+
+	/* VTTBR value used by the hardware on next switch */
+	u64 hw_vttbr;
 };
 
 struct kvm_vm_stat {
@@ -239,6 +244,7 @@ static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
 {
 }
 
+unsigned int get_kvm_vmid_bits(void);
 struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
 struct kvm_vcpu __percpu **kvm_get_running_vcpus(void);
 void kvm_arm_halt_guest(struct kvm *kvm);
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index fa6f217..86fdc70 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -221,6 +221,17 @@ static inline unsigned int kvm_get_vmid_bits(void)
 	return 8;
 }
 
+static inline u64 kvm_get_vttbr(struct kvm_s2_vmid *vmid,
+				struct kvm_s2_mmu *mmu)
+{
+	u64 vmid_field, baddr;
+
+	baddr = virt_to_phys(mmu->pgd);
+	vmid_field = ((u64)vmid->vmid << VTTBR_VMID_SHIFT) &
+		VTTBR_VMID_MASK(get_kvm_vmid_bits());
+	return baddr | vmid_field;
+}
+
 #endif	/* !__ASSEMBLY__ */
 
 #endif /* __ARM_KVM_MMU_H__ */
diff --git a/arch/arm/kvm/hyp/switch.c b/arch/arm/kvm/hyp/switch.c
index 4814671..4798e39 100644
--- a/arch/arm/kvm/hyp/switch.c
+++ b/arch/arm/kvm/hyp/switch.c
@@ -75,9 +75,7 @@ static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu)
 
 static void __hyp_text __activate_vm(struct kvm_vcpu *vcpu)
 {
-	struct kvm_s2_mmu *mmu = kern_hyp_va(vcpu->arch.hw_mmu);
-
-	write_sysreg(mmu->vttbr, VTTBR);
+	write_sysreg(vcpu->arch.hw_vttbr, VTTBR);
 	write_sysreg(vcpu->arch.midr, VPIDR);
 }
 
diff --git a/arch/arm/kvm/hyp/tlb.c b/arch/arm/kvm/hyp/tlb.c
index 56f0a49..562ad0b 100644
--- a/arch/arm/kvm/hyp/tlb.c
+++ b/arch/arm/kvm/hyp/tlb.c
@@ -34,13 +34,12 @@
  * As v7 does not support flushing per IPA, just nuke the whole TLB
  * instead, ignoring the ipa value.
  */
-void __hyp_text __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
+void __hyp_text __kvm_tlb_flush_vmid(u64 vttbr)
 {
 	dsb(ishst);
 
 	/* Switch to requested VMID */
-	mmu = kern_hyp_va(mmu);
-	write_sysreg(mmu->vttbr, VTTBR);
+	write_sysreg(vttbr, VTTBR);
 	isb();
 
 	write_sysreg(0, TLBIALLIS);
@@ -50,17 +49,15 @@ void __hyp_text __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
 	write_sysreg(0, VTTBR);
 }
 
-void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu,
-					 phys_addr_t ipa)
+void __hyp_text __kvm_tlb_flush_vmid_ipa(u64 vttbr, phys_addr_t ipa)
 {
-	__kvm_tlb_flush_vmid(mmu);
+	__kvm_tlb_flush_vmid(vttbr);
 }
 
-void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu)
+void __hyp_text __kvm_tlb_flush_local_vmid(u64 vttbr)
 {
 	/* Switch to requested VMID */
-	mmu = kern_hyp_va(mmu);
-	write_sysreg(mmu->vttbr, VTTBR);
+	write_sysreg(vttbr, VTTBR);
 	isb();
 
 	write_sysreg(0, TLBIALL);
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index ff6244f..e492749 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -52,9 +52,9 @@
 extern char __kvm_hyp_vector[];
 
 extern void __kvm_flush_vm_context(void);
-extern void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa);
-extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu);
-extern void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu);
+extern void __kvm_tlb_flush_vmid_ipa(u64 vttbr, phys_addr_t ipa);
+extern void __kvm_tlb_flush_vmid(u64 vttbr);
+extern void __kvm_tlb_flush_local_vmid(u64 vttbr);
 
 extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
 
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 4776bfc..71a3a04 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -385,4 +385,12 @@ static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu,
 	return data;		/* Leave LE untouched */
 }
 
+static inline struct kvm_s2_vmid *vcpu_get_active_vmid(struct kvm_vcpu *vcpu)
+{
+	if (unlikely(is_hyp_ctxt(vcpu)))
+		return &vcpu->kvm->arch.mmu.el2_vmid;
+
+	return &vcpu->kvm->arch.mmu.vmid;
+}
+
 #endif /* __ARM64_KVM_EMULATE_H__ */
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index e7e9f70..a7edf0e 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -50,17 +50,19 @@
 int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext);
 void __extended_idmap_trampoline(phys_addr_t boot_pgd, phys_addr_t idmap_start);
 
-struct kvm_s2_mmu {
+struct kvm_s2_vmid {
 	/* The VMID generation used for the virt. memory system */
 	u64    vmid_gen;
 	u32    vmid;
+};
+
+struct kvm_s2_mmu {
+	struct kvm_s2_vmid vmid;
+	struct kvm_s2_vmid el2_vmid;
 
 	/* 1-level 2nd stage table and lock */
 	spinlock_t pgd_lock;
 	pgd_t *pgd;
-
-	/* VTTBR value associated with above pgd and vmid */
-	u64    vttbr;
 };
 
 struct kvm_arch {
@@ -337,6 +339,9 @@ struct kvm_vcpu_arch {
 
 	/* Stage 2 paging state used by the hardware on next switch */
 	struct kvm_s2_mmu *hw_mmu;
+
+	/* VTTBR value used by the hardware on next switch */
+	u64 hw_vttbr;
 };
 
 #define vcpu_gp_regs(v)		(&(v)->arch.ctxt.gp_regs)
@@ -394,6 +399,7 @@ static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
 {
 }
 
+unsigned int get_kvm_vmid_bits(void);
 struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
 struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void);
 void kvm_arm_halt_guest(struct kvm *kvm);
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index a89cc22..21c0299 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -312,5 +312,16 @@ static inline unsigned int kvm_get_vmid_bits(void)
 	return (cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR1_VMIDBITS_SHIFT) == 2) ? 16 : 8;
 }
 
+static inline u64 kvm_get_vttbr(struct kvm_s2_vmid *vmid,
+				struct kvm_s2_mmu *mmu)
+{
+	u64 vmid_field, baddr;
+
+	baddr = virt_to_phys(mmu->pgd);
+	vmid_field = ((u64)vmid->vmid << VTTBR_VMID_SHIFT) &
+		VTTBR_VMID_MASK(get_kvm_vmid_bits());
+	return baddr | vmid_field;
+}
+
 #endif /* __ASSEMBLY__ */
 #endif /* __ARM64_KVM_MMU_H__ */
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 8b1b3e9..3626e76 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -181,9 +181,7 @@ static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu)
 
 static void __hyp_text __activate_vm(struct kvm_vcpu *vcpu)
 {
-	struct kvm_s2_mmu *mmu = kern_hyp_va(vcpu->arch.hw_mmu);
-
-	write_sysreg(mmu->vttbr, vttbr_el2);
+	write_sysreg(vcpu->arch.hw_vttbr, vttbr_el2);
 }
 
 static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c
index 0897678..680b960 100644
--- a/arch/arm64/kvm/hyp/tlb.c
+++ b/arch/arm64/kvm/hyp/tlb.c
@@ -18,7 +18,7 @@
 #include <asm/kvm_hyp.h>
 #include <asm/tlbflush.h>
 
-static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm_s2_mmu *mmu)
+static void __hyp_text __tlb_switch_to_guest_vhe(u64 vttbr)
 {
 	u64 val;
 
@@ -29,16 +29,16 @@ static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm_s2_mmu *mmu)
 	 * bits. Changing E2H is impossible (goodbye TTBR1_EL2), so
 	 * let's flip TGE before executing the TLB operation.
 	 */
-	write_sysreg(mmu->vttbr, vttbr_el2);
+	write_sysreg(vttbr, vttbr_el2);
 	val = read_sysreg(hcr_el2);
 	val &= ~HCR_TGE;
 	write_sysreg(val, hcr_el2);
 	isb();
 }
 
-static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm_s2_mmu *mmu)
+static void __hyp_text __tlb_switch_to_guest_nvhe(u64 vttbr)
 {
-	write_sysreg(mmu->vttbr, vttbr_el2);
+	write_sysreg(vttbr, vttbr_el2);
 	isb();
 }
 
@@ -47,7 +47,7 @@ static hyp_alternate_select(__tlb_switch_to_guest,
 			    __tlb_switch_to_guest_vhe,
 			    ARM64_HAS_VIRT_HOST_EXTN);
 
-static void __hyp_text __tlb_switch_to_host_vhe(struct kvm_s2_mmu *mmu)
+static void __hyp_text __tlb_switch_to_host_vhe(void)
 {
 	/*
 	 * We're done with the TLB operation, let's restore the host's
@@ -57,7 +57,7 @@ static void __hyp_text __tlb_switch_to_host_vhe(struct kvm_s2_mmu *mmu)
 	write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
 }
 
-static void __hyp_text __tlb_switch_to_host_nvhe(struct kvm_s2_mmu *mmu)
+static void __hyp_text __tlb_switch_to_host_nvhe(void)
 {
 	write_sysreg(0, vttbr_el2);
 }
@@ -67,14 +67,12 @@ static hyp_alternate_select(__tlb_switch_to_host,
 			    __tlb_switch_to_host_vhe,
 			    ARM64_HAS_VIRT_HOST_EXTN);
 
-void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu,
-					 phys_addr_t ipa)
+void __hyp_text __kvm_tlb_flush_vmid_ipa(u64 vttbr, phys_addr_t ipa)
 {
 	dsb(ishst);
 
 	/* Switch to requested VMID */
-	mmu = kern_hyp_va(mmu);
-	__tlb_switch_to_guest()(mmu);
+	__tlb_switch_to_guest()(vttbr);
 
 	/*
 	 * We could do so much better if we had the VA as well.
@@ -117,35 +115,33 @@ void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu,
 	if (!has_vhe() && icache_is_vpipt())
 		__flush_icache_all();
 
-	__tlb_switch_to_host()(mmu);
+	__tlb_switch_to_host()();
 }
 
-void __hyp_text __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
+void __hyp_text __kvm_tlb_flush_vmid(u64 vttbr)
 {
 	dsb(ishst);
 
 	/* Switch to requested VMID */
-	mmu = kern_hyp_va(mmu);
-	__tlb_switch_to_guest()(mmu);
+	__tlb_switch_to_guest()(vttbr);
 
 	__tlbi(vmalls12e1is);
 	dsb(ish);
 	isb();
 
-	__tlb_switch_to_host()(mmu);
+	__tlb_switch_to_host()();
 }
 
-void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu)
+void __hyp_text __kvm_tlb_flush_local_vmid(u64 vttbr)
 {
 	/* Switch to requested VMID */
-	mmu = kern_hyp_va(mmu);
-	__tlb_switch_to_guest()(mmu);
+	__tlb_switch_to_guest()(vttbr);
 
 	__tlbi(vmalle1);
 	dsb(nsh);
 	isb();
 
-	__tlb_switch_to_host()(mmu);
+	__tlb_switch_to_host()();
 }
 
 void __hyp_text __kvm_flush_vm_context(void)
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index bee27bb..41e0654 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -75,6 +75,11 @@ static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu)
 	__this_cpu_write(kvm_arm_running_vcpu, vcpu);
 }
 
+unsigned int get_kvm_vmid_bits(void)
+{
+	return kvm_vmid_bits;
+}
+
 /**
  * kvm_arm_get_running_vcpu - get the vcpu running on the current CPU.
  * Must be called from non-preemptible context
@@ -138,7 +143,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	kvm_vgic_early_init(kvm);
 
 	/* Mark the initial VMID generation invalid */
-	kvm->arch.mmu.vmid_gen = 0;
+	kvm->arch.mmu.vmid.vmid_gen = 0;
+	kvm->arch.mmu.el2_vmid.vmid_gen = 0;
 
 	/* The maximum number of VCPUs is limited by the host's GIC model */
 	kvm->arch.max_vcpus = vgic_present ?
@@ -325,6 +331,8 @@ void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
 
 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 {
+	struct kvm_s2_mmu *mmu = &vcpu->kvm->arch.mmu;
+
 	/* Force users to call KVM_ARM_VCPU_INIT */
 	vcpu->arch.target = -1;
 	bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
@@ -334,7 +342,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 
 	kvm_arm_reset_debug_ptr(vcpu);
 
-	vcpu->arch.hw_mmu = &vcpu->kvm->arch.mmu;
+	vcpu->arch.hw_mmu = mmu;
 
 	return kvm_vgic_vcpu_init(vcpu);
 }
@@ -350,7 +358,10 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 	 * over-invalidation doesn't affect correctness.
 	 */
 	if (*last_ran != vcpu->vcpu_id) {
-		kvm_call_hyp(__kvm_tlb_flush_local_vmid, &vcpu->kvm->arch.mmu);
+		struct kvm_s2_mmu *mmu = &vcpu->kvm->arch.mmu;
+		u64 vttbr = kvm_get_vttbr(&mmu->vmid, mmu);
+
+		kvm_call_hyp(__kvm_tlb_flush_local_vmid, vttbr);
 		*last_ran = vcpu->vcpu_id;
 	}
 
@@ -434,36 +445,38 @@ void force_vm_exit(const cpumask_t *mask)
 
 /**
  * need_new_vmid_gen - check that the VMID is still valid
- * @kvm: The VM's VMID to check
+ * @vmid: The VMID to check
  *
  * return true if there is a new generation of VMIDs being used
  *
- * The hardware supports only 256 values with the value zero reserved for the
- * host, so we check if an assigned value belongs to a previous generation,
- * which which requires us to assign a new value. If we're the first to use a
- * VMID for the new generation, we must flush necessary caches and TLBs on all
- * CPUs.
+ * The hardware supports a limited set of values with the value zero reserved
+ * for the host, so we check if an assigned value belongs to a previous
+ * generation, which which requires us to assign a new value. If we're the
+ * first to use a VMID for the new generation, we must flush necessary caches
+ * and TLBs on all CPUs.
  */
-static bool need_new_vmid_gen(struct kvm_s2_mmu *mmu)
+static bool need_new_vmid_gen(struct kvm_s2_vmid *vmid)
 {
-	return unlikely(mmu->vmid_gen != atomic64_read(&kvm_vmid_gen));
+	return unlikely(vmid->vmid_gen != atomic64_read(&kvm_vmid_gen));
 }
 
 /**
  * update_vttbr - Update the VTTBR with a valid VMID before the guest runs
  * @kvm: The guest that we are about to run
- * @mmu: The stage-2 translation context to update
+ * @vmid: The stage-2 VMID information struct
  *
  * Called from kvm_arch_vcpu_ioctl_run before entering the guest to ensure the
  * VM has a valid VMID, otherwise assigns a new one and flushes corresponding
  * caches and TLBs.
  */
-static void update_vttbr(struct kvm *kvm, struct kvm_s2_mmu *mmu)
+static void update_vttbr(struct kvm *kvm, struct kvm_s2_vmid *vmid)
 {
-	phys_addr_t pgd_phys;
-	u64 vmid;
+	struct kvm_s2_mmu *mmu = &kvm->arch.mmu;
+	struct kvm_vcpu *vcpu;
+	int i = 0;
+	u64 new_vttbr;
 
-	if (!need_new_vmid_gen(mmu))
+	if (!need_new_vmid_gen(vmid))
 		return;
 
 	spin_lock(&kvm_vmid_lock);
@@ -473,7 +486,7 @@ static void update_vttbr(struct kvm *kvm, struct kvm_s2_mmu *mmu)
 	 * already allocated a valid vmid for this vm, then this vcpu should
 	 * use the same vmid.
 	 */
-	if (!need_new_vmid_gen(mmu)) {
+	if (!need_new_vmid_gen(vmid)) {
 		spin_unlock(&kvm_vmid_lock);
 		return;
 	}
@@ -497,17 +510,15 @@ static void update_vttbr(struct kvm *kvm, struct kvm_s2_mmu *mmu)
 		kvm_call_hyp(__kvm_flush_vm_context);
 	}
 
-	mmu->vmid_gen = atomic64_read(&kvm_vmid_gen);
-	mmu->vmid = kvm_next_vmid;
+	vmid->vmid_gen = atomic64_read(&kvm_vmid_gen);
+	vmid->vmid = kvm_next_vmid;
 	kvm_next_vmid++;
 	kvm_next_vmid &= (1 << kvm_vmid_bits) - 1;
 
-	/* update vttbr to be used with the new vmid */
-	pgd_phys = virt_to_phys(mmu->pgd);
-	BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK);
-	vmid = ((u64)(mmu->vmid) << VTTBR_VMID_SHIFT) &
-	       VTTBR_VMID_MASK(kvm_vmid_bits);
-	mmu->vttbr = pgd_phys | vmid;
+	new_vttbr = kvm_get_vttbr(&mmu->vmid, mmu);
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		vcpu->arch.hw_vttbr = new_vttbr;
+	}
 
 	spin_unlock(&kvm_vmid_lock);
 }
@@ -642,7 +653,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 		 */
 		cond_resched();
 
-		update_vttbr(vcpu->kvm, vcpu->arch.hw_mmu);
+		update_vttbr(vcpu->kvm, vcpu_get_active_vmid(vcpu));
 
 		check_vcpu_requests(vcpu);
 
@@ -681,7 +692,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 		 */
 		smp_store_mb(vcpu->mode, IN_GUEST_MODE);
 
-		if (ret <= 0 || need_new_vmid_gen(vcpu->arch.hw_mmu) ||
+		if (ret <= 0 || need_new_vmid_gen(vcpu_get_active_vmid(vcpu)) ||
 		    kvm_request_pending(vcpu)) {
 			vcpu->mode = OUTSIDE_GUEST_MODE;
 			local_irq_enable();
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index d8ea1f9..0edcf23 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -61,12 +61,17 @@ static bool memslot_is_logging(struct kvm_memory_slot *memslot)
  */
 void kvm_flush_remote_tlbs(struct kvm *kvm)
 {
-	kvm_call_hyp(__kvm_tlb_flush_vmid, kvm);
+	struct kvm_s2_mmu *mmu = &kvm->arch.mmu;
+	u64 vttbr = kvm_get_vttbr(&mmu->vmid, mmu);
+
+	kvm_call_hyp(__kvm_tlb_flush_vmid, vttbr);
 }
 
 static void kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa)
 {
-	kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ipa);
+	u64 vttbr = kvm_get_vttbr(&mmu->vmid, mmu);
+
+	kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, vttbr, ipa);
 }
 
 /*
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 06/31] KVM: arm64: Invalidate virtual EL2 TLB entries when needed
  2017-10-03  3:10 ` Jintack Lim
  (?)
@ 2017-10-03  3:10   ` Jintack Lim
  -1 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:10 UTC (permalink / raw)
  To: christoffer.dall, marc.zyngier, kvmarm
  Cc: jintack, pbonzini, rkrcmar, catalin.marinas, will.deacon, linux,
	mark.rutland, linux-arm-kernel, kvm, linux-kernel, Jintack Lim

From: Christoffer Dall <christoffer.dall@linaro.org>

Sometimes when we are invalidating the TLB for a certain S2 MMU
context, this context can also have EL2 context associated with it and
we have to invalidate this too.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---
 virt/kvm/arm/arm.c |  5 +++++
 virt/kvm/arm/mmu.c | 23 ++++++++++++++++++++++-
 2 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 41e0654..63dd897 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -362,6 +362,11 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 		u64 vttbr = kvm_get_vttbr(&mmu->vmid, mmu);
 
 		kvm_call_hyp(__kvm_tlb_flush_local_vmid, vttbr);
+
+		if (mmu->el2_vmid.vmid) {
+			vttbr = kvm_get_vttbr(&mmu->el2_vmid, mmu);
+			kvm_call_hyp(__kvm_tlb_flush_local_vmid, vttbr);
+		}
 		*last_ran = vcpu->vcpu_id;
 	}
 
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index 0edcf23..184cdc9 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -64,7 +64,21 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
 	struct kvm_s2_mmu *mmu = &kvm->arch.mmu;
 	u64 vttbr = kvm_get_vttbr(&mmu->vmid, mmu);
 
-	kvm_call_hyp(__kvm_tlb_flush_vmid, vttbr);
+	if (!mmu->el2_vmid.vmid) {
+		/*
+		 * For a normal (i.e. non-nested) guest, flush entries for the
+		 * given VMID *
+		 */
+		kvm_call_hyp(__kvm_tlb_flush_vmid, vttbr);
+	} else {
+		/*
+		 * When supporting nested virtualization, we can have multiple
+		 * VMIDs in play for each VCPU in the VM, so it's really not
+		 * worth it to try to quiesce the system and flush all the
+		 * VMIDs that may be in use, instead just nuke the whole thing.
+		 */
+		kvm_call_hyp(__kvm_flush_vm_context);
+	}
 }
 
 static void kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa)
@@ -72,6 +86,13 @@ static void kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa)
 	u64 vttbr = kvm_get_vttbr(&mmu->vmid, mmu);
 
 	kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, vttbr, ipa);
+
+	if (!mmu->el2_vmid.vmid) {
+		/* Nothing to do more for a non-nested guest */
+		return;
+	}
+	vttbr = kvm_get_vttbr(&mmu->el2_vmid, mmu);
+	kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, vttbr, ipa);
 }
 
 /*
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 06/31] KVM: arm64: Invalidate virtual EL2 TLB entries when needed
@ 2017-10-03  3:10   ` Jintack Lim
  0 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:10 UTC (permalink / raw)
  To: christoffer.dall, marc.zyngier, kvmarm
  Cc: kvm, catalin.marinas, will.deacon, linux, linux-kernel, pbonzini,
	linux-arm-kernel

From: Christoffer Dall <christoffer.dall@linaro.org>

Sometimes when we are invalidating the TLB for a certain S2 MMU
context, this context can also have EL2 context associated with it and
we have to invalidate this too.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---
 virt/kvm/arm/arm.c |  5 +++++
 virt/kvm/arm/mmu.c | 23 ++++++++++++++++++++++-
 2 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 41e0654..63dd897 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -362,6 +362,11 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 		u64 vttbr = kvm_get_vttbr(&mmu->vmid, mmu);
 
 		kvm_call_hyp(__kvm_tlb_flush_local_vmid, vttbr);
+
+		if (mmu->el2_vmid.vmid) {
+			vttbr = kvm_get_vttbr(&mmu->el2_vmid, mmu);
+			kvm_call_hyp(__kvm_tlb_flush_local_vmid, vttbr);
+		}
 		*last_ran = vcpu->vcpu_id;
 	}
 
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index 0edcf23..184cdc9 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -64,7 +64,21 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
 	struct kvm_s2_mmu *mmu = &kvm->arch.mmu;
 	u64 vttbr = kvm_get_vttbr(&mmu->vmid, mmu);
 
-	kvm_call_hyp(__kvm_tlb_flush_vmid, vttbr);
+	if (!mmu->el2_vmid.vmid) {
+		/*
+		 * For a normal (i.e. non-nested) guest, flush entries for the
+		 * given VMID *
+		 */
+		kvm_call_hyp(__kvm_tlb_flush_vmid, vttbr);
+	} else {
+		/*
+		 * When supporting nested virtualization, we can have multiple
+		 * VMIDs in play for each VCPU in the VM, so it's really not
+		 * worth it to try to quiesce the system and flush all the
+		 * VMIDs that may be in use, instead just nuke the whole thing.
+		 */
+		kvm_call_hyp(__kvm_flush_vm_context);
+	}
 }
 
 static void kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa)
@@ -72,6 +86,13 @@ static void kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa)
 	u64 vttbr = kvm_get_vttbr(&mmu->vmid, mmu);
 
 	kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, vttbr, ipa);
+
+	if (!mmu->el2_vmid.vmid) {
+		/* Nothing to do more for a non-nested guest */
+		return;
+	}
+	vttbr = kvm_get_vttbr(&mmu->el2_vmid, mmu);
+	kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, vttbr, ipa);
 }
 
 /*
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 06/31] KVM: arm64: Invalidate virtual EL2 TLB entries when needed
@ 2017-10-03  3:10   ` Jintack Lim
  0 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:10 UTC (permalink / raw)
  To: linux-arm-kernel

From: Christoffer Dall <christoffer.dall@linaro.org>

Sometimes when we are invalidating the TLB for a certain S2 MMU
context, this context can also have EL2 context associated with it and
we have to invalidate this too.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---
 virt/kvm/arm/arm.c |  5 +++++
 virt/kvm/arm/mmu.c | 23 ++++++++++++++++++++++-
 2 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 41e0654..63dd897 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -362,6 +362,11 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 		u64 vttbr = kvm_get_vttbr(&mmu->vmid, mmu);
 
 		kvm_call_hyp(__kvm_tlb_flush_local_vmid, vttbr);
+
+		if (mmu->el2_vmid.vmid) {
+			vttbr = kvm_get_vttbr(&mmu->el2_vmid, mmu);
+			kvm_call_hyp(__kvm_tlb_flush_local_vmid, vttbr);
+		}
 		*last_ran = vcpu->vcpu_id;
 	}
 
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index 0edcf23..184cdc9 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -64,7 +64,21 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
 	struct kvm_s2_mmu *mmu = &kvm->arch.mmu;
 	u64 vttbr = kvm_get_vttbr(&mmu->vmid, mmu);
 
-	kvm_call_hyp(__kvm_tlb_flush_vmid, vttbr);
+	if (!mmu->el2_vmid.vmid) {
+		/*
+		 * For a normal (i.e. non-nested) guest, flush entries for the
+		 * given VMID *
+		 */
+		kvm_call_hyp(__kvm_tlb_flush_vmid, vttbr);
+	} else {
+		/*
+		 * When supporting nested virtualization, we can have multiple
+		 * VMIDs in play for each VCPU in the VM, so it's really not
+		 * worth it to try to quiesce the system and flush all the
+		 * VMIDs that may be in use, instead just nuke the whole thing.
+		 */
+		kvm_call_hyp(__kvm_flush_vm_context);
+	}
 }
 
 static void kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa)
@@ -72,6 +86,13 @@ static void kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa)
 	u64 vttbr = kvm_get_vttbr(&mmu->vmid, mmu);
 
 	kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, vttbr, ipa);
+
+	if (!mmu->el2_vmid.vmid) {
+		/* Nothing to do more for a non-nested guest */
+		return;
+	}
+	vttbr = kvm_get_vttbr(&mmu->el2_vmid, mmu);
+	kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, vttbr, ipa);
 }
 
 /*
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 07/31] KVM: arm64: Setup vttbr_el2 on each VM entry
  2017-10-03  3:10 ` Jintack Lim
  (?)
@ 2017-10-03  3:10   ` Jintack Lim
  -1 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:10 UTC (permalink / raw)
  To: christoffer.dall, marc.zyngier, kvmarm
  Cc: jintack, pbonzini, rkrcmar, catalin.marinas, will.deacon, linux,
	mark.rutland, linux-arm-kernel, kvm, linux-kernel

From: Christoffer Dall <christoffer.dall@linaro.org>

Now that the vttbr value will be different depending on the VM's
exception level, we set it on each VM entry.

We only have one mmu instance at this point, but there will be
multiple of them if we come to run nested VMs.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Jintack Lim <jintack@cs.columbia.edu>
---
 arch/arm64/kvm/context.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/arch/arm64/kvm/context.c b/arch/arm64/kvm/context.c
index a7811e1..afd1702 100644
--- a/arch/arm64/kvm/context.c
+++ b/arch/arm64/kvm/context.c
@@ -18,6 +18,7 @@
 #include <linux/kvm_host.h>
 #include <asm/kvm_emulate.h>
 #include <asm/esr.h>
+#include <asm/kvm_mmu.h>
 
 struct el1_el2_map {
 	enum vcpu_sysreg	el1;
@@ -174,6 +175,15 @@ static void flush_shadow_el1_sysregs(struct kvm_vcpu *vcpu)
 		flush_shadow_el1_sysregs_nvhe(vcpu);
 }
 
+static void setup_s2_mmu(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s2_mmu *mmu = &vcpu->kvm->arch.mmu;
+	struct kvm_s2_vmid *vmid = vcpu_get_active_vmid(vcpu);
+
+	vcpu->arch.hw_vttbr = kvm_get_vttbr(vmid, mmu);
+	vcpu->arch.hw_mmu = mmu;
+}
+
 /*
  * List of EL0 and EL1 registers which we allow the virtual EL2 mode to access
  * directly without trapping. This is possible because the impact of
@@ -323,6 +333,8 @@ void kvm_arm_setup_shadow_state(struct kvm_vcpu *vcpu)
 		setup_mpidr_el1(vcpu);
 		ctxt->hw_sys_regs = ctxt->sys_regs;
 	}
+
+	setup_s2_mmu(vcpu);
 }
 
 /**
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 07/31] KVM: arm64: Setup vttbr_el2 on each VM entry
@ 2017-10-03  3:10   ` Jintack Lim
  0 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:10 UTC (permalink / raw)
  To: christoffer.dall, marc.zyngier, kvmarm
  Cc: kvm, catalin.marinas, will.deacon, linux, linux-kernel, pbonzini,
	linux-arm-kernel

From: Christoffer Dall <christoffer.dall@linaro.org>

Now that the vttbr value will be different depending on the VM's
exception level, we set it on each VM entry.

We only have one mmu instance at this point, but there will be
multiple of them if we come to run nested VMs.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Jintack Lim <jintack@cs.columbia.edu>
---
 arch/arm64/kvm/context.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/arch/arm64/kvm/context.c b/arch/arm64/kvm/context.c
index a7811e1..afd1702 100644
--- a/arch/arm64/kvm/context.c
+++ b/arch/arm64/kvm/context.c
@@ -18,6 +18,7 @@
 #include <linux/kvm_host.h>
 #include <asm/kvm_emulate.h>
 #include <asm/esr.h>
+#include <asm/kvm_mmu.h>
 
 struct el1_el2_map {
 	enum vcpu_sysreg	el1;
@@ -174,6 +175,15 @@ static void flush_shadow_el1_sysregs(struct kvm_vcpu *vcpu)
 		flush_shadow_el1_sysregs_nvhe(vcpu);
 }
 
+static void setup_s2_mmu(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s2_mmu *mmu = &vcpu->kvm->arch.mmu;
+	struct kvm_s2_vmid *vmid = vcpu_get_active_vmid(vcpu);
+
+	vcpu->arch.hw_vttbr = kvm_get_vttbr(vmid, mmu);
+	vcpu->arch.hw_mmu = mmu;
+}
+
 /*
  * List of EL0 and EL1 registers which we allow the virtual EL2 mode to access
  * directly without trapping. This is possible because the impact of
@@ -323,6 +333,8 @@ void kvm_arm_setup_shadow_state(struct kvm_vcpu *vcpu)
 		setup_mpidr_el1(vcpu);
 		ctxt->hw_sys_regs = ctxt->sys_regs;
 	}
+
+	setup_s2_mmu(vcpu);
 }
 
 /**
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 07/31] KVM: arm64: Setup vttbr_el2 on each VM entry
@ 2017-10-03  3:10   ` Jintack Lim
  0 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:10 UTC (permalink / raw)
  To: linux-arm-kernel

From: Christoffer Dall <christoffer.dall@linaro.org>

Now that the vttbr value will be different depending on the VM's
exception level, we set it on each VM entry.

We only have one mmu instance at this point, but there will be
multiple of them if we come to run nested VMs.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Jintack Lim <jintack@cs.columbia.edu>
---
 arch/arm64/kvm/context.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/arch/arm64/kvm/context.c b/arch/arm64/kvm/context.c
index a7811e1..afd1702 100644
--- a/arch/arm64/kvm/context.c
+++ b/arch/arm64/kvm/context.c
@@ -18,6 +18,7 @@
 #include <linux/kvm_host.h>
 #include <asm/kvm_emulate.h>
 #include <asm/esr.h>
+#include <asm/kvm_mmu.h>
 
 struct el1_el2_map {
 	enum vcpu_sysreg	el1;
@@ -174,6 +175,15 @@ static void flush_shadow_el1_sysregs(struct kvm_vcpu *vcpu)
 		flush_shadow_el1_sysregs_nvhe(vcpu);
 }
 
+static void setup_s2_mmu(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s2_mmu *mmu = &vcpu->kvm->arch.mmu;
+	struct kvm_s2_vmid *vmid = vcpu_get_active_vmid(vcpu);
+
+	vcpu->arch.hw_vttbr = kvm_get_vttbr(vmid, mmu);
+	vcpu->arch.hw_mmu = mmu;
+}
+
 /*
  * List of EL0 and EL1 registers which we allow the virtual EL2 mode to access
  * directly without trapping. This is possible because the impact of
@@ -323,6 +333,8 @@ void kvm_arm_setup_shadow_state(struct kvm_vcpu *vcpu)
 		setup_mpidr_el1(vcpu);
 		ctxt->hw_sys_regs = ctxt->sys_regs;
 	}
+
+	setup_s2_mmu(vcpu);
 }
 
 /**
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 08/31] KVM: arm/arm64: Make mmu functions non-static
  2017-10-03  3:10 ` Jintack Lim
@ 2017-10-03  3:10   ` Jintack Lim
  -1 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:10 UTC (permalink / raw)
  To: christoffer.dall, marc.zyngier, kvmarm
  Cc: jintack, pbonzini, rkrcmar, catalin.marinas, will.deacon, linux,
	mark.rutland, linux-arm-kernel, kvm, linux-kernel, Jintack Lim

From: Christoffer Dall <christoffer.dall@linaro.org>

Make mmu functions non-static so that we can reuse those functions
to support mmu for the nested VMs.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---
 arch/arm64/include/asm/kvm_mmu.h |  9 ++++
 virt/kvm/arm/mmu.c               | 94 +++++++++++++++++++++++-----------------
 2 files changed, 64 insertions(+), 39 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 21c0299..bceaec1 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -145,9 +145,18 @@ static inline unsigned long __kern_hyp_va(unsigned long v)
 
 void stage2_unmap_vm(struct kvm *kvm);
 int kvm_alloc_stage2_pgd(struct kvm *kvm);
+int __kvm_alloc_stage2_pgd(struct kvm_s2_mmu *mmu);
 void kvm_free_stage2_pgd(struct kvm *kvm);
+void __kvm_free_stage2_pgd(struct kvm *kvm, struct kvm_s2_mmu *mmu);
 int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
 			  phys_addr_t pa, unsigned long size, bool writable);
+void kvm_unmap_stage2_range(struct kvm *kvm, struct kvm_s2_mmu *mmu,
+			    phys_addr_t start, u64 size);
+void kvm_stage2_wp_range(struct kvm *kvm, struct kvm_s2_mmu *mmu,
+			 phys_addr_t addr, phys_addr_t end);
+void kvm_stage2_flush_range(struct kvm_s2_mmu *mmu,
+			    phys_addr_t start, phys_addr_t end);
+
 
 int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run);
 
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index 184cdc9..ca10799 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -307,7 +307,7 @@ static void unmap_stage2_puds(struct kvm_s2_mmu *mmu, pgd_t *pgd,
 }
 
 /**
- * unmap_stage2_range -- Clear stage2 page table entries to unmap a range
+ * kvm_unmap_stage2_range -- Clear stage2 page table entries to unmap a range
  * @kvm:   The VM pointer
  * @start: The intermediate physical base address of the range to unmap
  * @size:  The size of the area to unmap
@@ -317,12 +317,12 @@ static void unmap_stage2_puds(struct kvm_s2_mmu *mmu, pgd_t *pgd,
  * destroying the VM), otherwise another faulting VCPU may come in and mess
  * with things behind our backs.
  */
-static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
+void kvm_unmap_stage2_range(struct kvm *kvm, struct kvm_s2_mmu *mmu,
+			    phys_addr_t start, u64 size)
 {
 	pgd_t *pgd;
 	phys_addr_t addr = start, end = start + size;
 	phys_addr_t next;
-	struct kvm_s2_mmu *mmu = &kvm->arch.mmu;
 
 	assert_spin_locked(&kvm->mmu_lock);
 	pgd = mmu->pgd + stage2_pgd_index(addr);
@@ -391,11 +391,10 @@ static void stage2_flush_puds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end)
 	} while (pud++, addr = next, addr != end);
 }
 
-static void stage2_flush_memslot(struct kvm_s2_mmu *mmu,
-				 struct kvm_memory_slot *memslot)
+void kvm_stage2_flush_range(struct kvm_s2_mmu *mmu,
+			    phys_addr_t start, phys_addr_t end)
 {
-	phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT;
-	phys_addr_t end = addr + PAGE_SIZE * memslot->npages;
+	phys_addr_t addr = start;
 	phys_addr_t next;
 	pgd_t *pgd;
 
@@ -406,6 +405,15 @@ static void stage2_flush_memslot(struct kvm_s2_mmu *mmu,
 	} while (pgd++, addr = next, addr != end);
 }
 
+static void stage2_flush_memslot(struct kvm_s2_mmu *mmu,
+		struct kvm_memory_slot *memslot)
+{
+	phys_addr_t start = memslot->base_gfn << PAGE_SHIFT;
+	phys_addr_t end = start + PAGE_SIZE * memslot->npages;
+
+	kvm_stage2_flush_range(mmu, start, end);
+}
+
 /**
  * stage2_flush_vm - Invalidate cache for pages mapped in stage 2
  * @kvm: The struct kvm pointer
@@ -762,21 +770,9 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
 				     __phys_to_pfn(phys_addr), PAGE_HYP_DEVICE);
 }
 
-/**
- * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation.
- * @kvm:	The KVM struct pointer for the VM.
- *
- * Allocates only the stage-2 HW PGD level table(s) (can support either full
- * 40-bit input addresses or limited to 32-bit input addresses). Clears the
- * allocated pages.
- *
- * Note we don't need locking here as this is only called when the VM is
- * created, which can only be done once.
- */
-int kvm_alloc_stage2_pgd(struct kvm *kvm)
+int __kvm_alloc_stage2_pgd(struct kvm_s2_mmu *mmu)
 {
 	pgd_t *pgd;
-	struct kvm_s2_mmu *mmu = &kvm->arch.mmu;
 
 	if (mmu->pgd != NULL) {
 		kvm_err("kvm_arch already initialized?\n");
@@ -793,6 +789,22 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm)
 	return 0;
 }
 
+/**
+ * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation.
+ * @kvm:	The KVM struct pointer for the VM.
+ *
+ * Allocates only the stage-2 HW PGD level table(s) (can support either full
+ * 40-bit input addresses or limited to 32-bit input addresses). Clears the
+ * allocated pages.
+ *
+ * Note we don't need locking here as this is only called when the VM is
+ * created, which can only be done once.
+ */
+int kvm_alloc_stage2_pgd(struct kvm *kvm)
+{
+	return __kvm_alloc_stage2_pgd(&kvm->arch.mmu);
+}
+
 static void stage2_unmap_memslot(struct kvm *kvm,
 				 struct kvm_memory_slot *memslot)
 {
@@ -828,7 +840,8 @@ static void stage2_unmap_memslot(struct kvm *kvm,
 
 		if (!(vma->vm_flags & VM_PFNMAP)) {
 			gpa_t gpa = addr + (vm_start - memslot->userspace_addr);
-			unmap_stage2_range(kvm, gpa, vm_end - vm_start);
+			kvm_unmap_stage2_range(kvm, &kvm->arch.mmu, gpa,
+					       vm_end - vm_start);
 		}
 		hva = vm_end;
 	} while (hva < reg_end);
@@ -860,22 +873,13 @@ void stage2_unmap_vm(struct kvm *kvm)
 	srcu_read_unlock(&kvm->srcu, idx);
 }
 
-/**
- * kvm_free_stage2_pgd - free all stage-2 tables
- * @kvm:	The KVM struct pointer for the VM.
- *
- * Walks the level-1 page table pointed to by kvm->arch.mmu.pgd and frees all
- * underlying level-2 and level-3 tables before freeing the actual level-1 table
- * and setting the struct pointer to NULL.
- */
-void kvm_free_stage2_pgd(struct kvm *kvm)
+void __kvm_free_stage2_pgd(struct kvm *kvm, struct kvm_s2_mmu *mmu)
 {
 	void *pgd = NULL;
-	struct kvm_s2_mmu *mmu = &kvm->arch.mmu;
 
 	spin_lock(&kvm->mmu_lock);
 	if (mmu->pgd) {
-		unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE);
+		kvm_unmap_stage2_range(kvm, mmu, 0, KVM_PHYS_SIZE);
 		pgd = READ_ONCE(mmu->pgd);
 		mmu->pgd = NULL;
 	}
@@ -885,6 +889,18 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
 	if (pgd)
 		free_pages_exact(pgd, S2_PGD_SIZE);
 }
+/**
+ * kvm_free_stage2_pgd - free all stage-2 tables
+ * @kvm:	The KVM struct pointer for the VM.
+ *
+ * Walks the level-1 page table pointed to by kvm->arch.mmu.pgd and frees all
+ * underlying level-2 and level-3 tables before freeing the actual level-1 table
+ * and setting the struct pointer to NULL.
+ */
+void kvm_free_stage2_pgd(struct kvm *kvm)
+{
+	__kvm_free_stage2_pgd(kvm, &kvm->arch.mmu);
+}
 
 static pud_t *stage2_get_pud(struct kvm_s2_mmu *mmu,
 			     struct kvm_mmu_memory_cache *cache,
@@ -1204,7 +1220,7 @@ static void  stage2_wp_puds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end)
  * @addr:	Start address of range
  * @end:	End address of range
  */
-static void stage2_wp_range(struct kvm *kvm, struct kvm_s2_mmu *mmu,
+void kvm_stage2_wp_range(struct kvm *kvm, struct kvm_s2_mmu *mmu,
 			    phys_addr_t addr, phys_addr_t end)
 {
 	pgd_t *pgd;
@@ -1251,7 +1267,7 @@ void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot)
 	phys_addr_t end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT;
 
 	spin_lock(&kvm->mmu_lock);
-	stage2_wp_range(kvm, &kvm->arch.mmu, start, end);
+	kvm_stage2_wp_range(kvm, &kvm->arch.mmu, start, end);
 	spin_unlock(&kvm->mmu_lock);
 	kvm_flush_remote_tlbs(kvm);
 }
@@ -1275,7 +1291,7 @@ static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
 	phys_addr_t start = (base_gfn +  __ffs(mask)) << PAGE_SHIFT;
 	phys_addr_t end = (base_gfn + __fls(mask) + 1) << PAGE_SHIFT;
 
-	stage2_wp_range(kvm, &kvm->arch.mmu, start, end);
+	kvm_stage2_wp_range(kvm, &kvm->arch.mmu, start, end);
 }
 
 /*
@@ -1626,7 +1642,7 @@ static int handle_hva_to_gpa(struct kvm *kvm,
 
 static int kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
 {
-	unmap_stage2_range(kvm, gpa, size);
+	kvm_unmap_stage2_range(kvm, &kvm->arch.mmu, gpa, size);
 	return 0;
 }
 
@@ -1938,8 +1954,8 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
 
 	spin_lock(&kvm->mmu_lock);
 	if (ret)
-		unmap_stage2_range(kvm, mem->guest_phys_addr,
-				   mem->memory_size);
+		kvm_unmap_stage2_range(kvm, &kvm->arch.mmu,
+				       mem->guest_phys_addr, mem->memory_size);
 	else
 		stage2_flush_memslot(&kvm->arch.mmu, memslot);
 	spin_unlock(&kvm->mmu_lock);
@@ -1975,7 +1991,7 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
 	phys_addr_t size = slot->npages << PAGE_SHIFT;
 
 	spin_lock(&kvm->mmu_lock);
-	unmap_stage2_range(kvm, gpa, size);
+	kvm_unmap_stage2_range(kvm, &kvm->arch.mmu, gpa, size);
 	spin_unlock(&kvm->mmu_lock);
 }
 
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 08/31] KVM: arm/arm64: Make mmu functions non-static
@ 2017-10-03  3:10   ` Jintack Lim
  0 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:10 UTC (permalink / raw)
  To: linux-arm-kernel

From: Christoffer Dall <christoffer.dall@linaro.org>

Make mmu functions non-static so that we can reuse those functions
to support mmu for the nested VMs.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---
 arch/arm64/include/asm/kvm_mmu.h |  9 ++++
 virt/kvm/arm/mmu.c               | 94 +++++++++++++++++++++++-----------------
 2 files changed, 64 insertions(+), 39 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 21c0299..bceaec1 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -145,9 +145,18 @@ static inline unsigned long __kern_hyp_va(unsigned long v)
 
 void stage2_unmap_vm(struct kvm *kvm);
 int kvm_alloc_stage2_pgd(struct kvm *kvm);
+int __kvm_alloc_stage2_pgd(struct kvm_s2_mmu *mmu);
 void kvm_free_stage2_pgd(struct kvm *kvm);
+void __kvm_free_stage2_pgd(struct kvm *kvm, struct kvm_s2_mmu *mmu);
 int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
 			  phys_addr_t pa, unsigned long size, bool writable);
+void kvm_unmap_stage2_range(struct kvm *kvm, struct kvm_s2_mmu *mmu,
+			    phys_addr_t start, u64 size);
+void kvm_stage2_wp_range(struct kvm *kvm, struct kvm_s2_mmu *mmu,
+			 phys_addr_t addr, phys_addr_t end);
+void kvm_stage2_flush_range(struct kvm_s2_mmu *mmu,
+			    phys_addr_t start, phys_addr_t end);
+
 
 int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run);
 
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index 184cdc9..ca10799 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -307,7 +307,7 @@ static void unmap_stage2_puds(struct kvm_s2_mmu *mmu, pgd_t *pgd,
 }
 
 /**
- * unmap_stage2_range -- Clear stage2 page table entries to unmap a range
+ * kvm_unmap_stage2_range -- Clear stage2 page table entries to unmap a range
  * @kvm:   The VM pointer
  * @start: The intermediate physical base address of the range to unmap
  * @size:  The size of the area to unmap
@@ -317,12 +317,12 @@ static void unmap_stage2_puds(struct kvm_s2_mmu *mmu, pgd_t *pgd,
  * destroying the VM), otherwise another faulting VCPU may come in and mess
  * with things behind our backs.
  */
-static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
+void kvm_unmap_stage2_range(struct kvm *kvm, struct kvm_s2_mmu *mmu,
+			    phys_addr_t start, u64 size)
 {
 	pgd_t *pgd;
 	phys_addr_t addr = start, end = start + size;
 	phys_addr_t next;
-	struct kvm_s2_mmu *mmu = &kvm->arch.mmu;
 
 	assert_spin_locked(&kvm->mmu_lock);
 	pgd = mmu->pgd + stage2_pgd_index(addr);
@@ -391,11 +391,10 @@ static void stage2_flush_puds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end)
 	} while (pud++, addr = next, addr != end);
 }
 
-static void stage2_flush_memslot(struct kvm_s2_mmu *mmu,
-				 struct kvm_memory_slot *memslot)
+void kvm_stage2_flush_range(struct kvm_s2_mmu *mmu,
+			    phys_addr_t start, phys_addr_t end)
 {
-	phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT;
-	phys_addr_t end = addr + PAGE_SIZE * memslot->npages;
+	phys_addr_t addr = start;
 	phys_addr_t next;
 	pgd_t *pgd;
 
@@ -406,6 +405,15 @@ static void stage2_flush_memslot(struct kvm_s2_mmu *mmu,
 	} while (pgd++, addr = next, addr != end);
 }
 
+static void stage2_flush_memslot(struct kvm_s2_mmu *mmu,
+		struct kvm_memory_slot *memslot)
+{
+	phys_addr_t start = memslot->base_gfn << PAGE_SHIFT;
+	phys_addr_t end = start + PAGE_SIZE * memslot->npages;
+
+	kvm_stage2_flush_range(mmu, start, end);
+}
+
 /**
  * stage2_flush_vm - Invalidate cache for pages mapped in stage 2
  * @kvm: The struct kvm pointer
@@ -762,21 +770,9 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
 				     __phys_to_pfn(phys_addr), PAGE_HYP_DEVICE);
 }
 
-/**
- * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation.
- * @kvm:	The KVM struct pointer for the VM.
- *
- * Allocates only the stage-2 HW PGD level table(s) (can support either full
- * 40-bit input addresses or limited to 32-bit input addresses). Clears the
- * allocated pages.
- *
- * Note we don't need locking here as this is only called when the VM is
- * created, which can only be done once.
- */
-int kvm_alloc_stage2_pgd(struct kvm *kvm)
+int __kvm_alloc_stage2_pgd(struct kvm_s2_mmu *mmu)
 {
 	pgd_t *pgd;
-	struct kvm_s2_mmu *mmu = &kvm->arch.mmu;
 
 	if (mmu->pgd != NULL) {
 		kvm_err("kvm_arch already initialized?\n");
@@ -793,6 +789,22 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm)
 	return 0;
 }
 
+/**
+ * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation.
+ * @kvm:	The KVM struct pointer for the VM.
+ *
+ * Allocates only the stage-2 HW PGD level table(s) (can support either full
+ * 40-bit input addresses or limited to 32-bit input addresses). Clears the
+ * allocated pages.
+ *
+ * Note we don't need locking here as this is only called when the VM is
+ * created, which can only be done once.
+ */
+int kvm_alloc_stage2_pgd(struct kvm *kvm)
+{
+	return __kvm_alloc_stage2_pgd(&kvm->arch.mmu);
+}
+
 static void stage2_unmap_memslot(struct kvm *kvm,
 				 struct kvm_memory_slot *memslot)
 {
@@ -828,7 +840,8 @@ static void stage2_unmap_memslot(struct kvm *kvm,
 
 		if (!(vma->vm_flags & VM_PFNMAP)) {
 			gpa_t gpa = addr + (vm_start - memslot->userspace_addr);
-			unmap_stage2_range(kvm, gpa, vm_end - vm_start);
+			kvm_unmap_stage2_range(kvm, &kvm->arch.mmu, gpa,
+					       vm_end - vm_start);
 		}
 		hva = vm_end;
 	} while (hva < reg_end);
@@ -860,22 +873,13 @@ void stage2_unmap_vm(struct kvm *kvm)
 	srcu_read_unlock(&kvm->srcu, idx);
 }
 
-/**
- * kvm_free_stage2_pgd - free all stage-2 tables
- * @kvm:	The KVM struct pointer for the VM.
- *
- * Walks the level-1 page table pointed to by kvm->arch.mmu.pgd and frees all
- * underlying level-2 and level-3 tables before freeing the actual level-1 table
- * and setting the struct pointer to NULL.
- */
-void kvm_free_stage2_pgd(struct kvm *kvm)
+void __kvm_free_stage2_pgd(struct kvm *kvm, struct kvm_s2_mmu *mmu)
 {
 	void *pgd = NULL;
-	struct kvm_s2_mmu *mmu = &kvm->arch.mmu;
 
 	spin_lock(&kvm->mmu_lock);
 	if (mmu->pgd) {
-		unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE);
+		kvm_unmap_stage2_range(kvm, mmu, 0, KVM_PHYS_SIZE);
 		pgd = READ_ONCE(mmu->pgd);
 		mmu->pgd = NULL;
 	}
@@ -885,6 +889,18 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
 	if (pgd)
 		free_pages_exact(pgd, S2_PGD_SIZE);
 }
+/**
+ * kvm_free_stage2_pgd - free all stage-2 tables
+ * @kvm:	The KVM struct pointer for the VM.
+ *
+ * Walks the level-1 page table pointed to by kvm->arch.mmu.pgd and frees all
+ * underlying level-2 and level-3 tables before freeing the actual level-1 table
+ * and setting the struct pointer to NULL.
+ */
+void kvm_free_stage2_pgd(struct kvm *kvm)
+{
+	__kvm_free_stage2_pgd(kvm, &kvm->arch.mmu);
+}
 
 static pud_t *stage2_get_pud(struct kvm_s2_mmu *mmu,
 			     struct kvm_mmu_memory_cache *cache,
@@ -1204,7 +1220,7 @@ static void  stage2_wp_puds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end)
  * @addr:	Start address of range
  * @end:	End address of range
  */
-static void stage2_wp_range(struct kvm *kvm, struct kvm_s2_mmu *mmu,
+void kvm_stage2_wp_range(struct kvm *kvm, struct kvm_s2_mmu *mmu,
 			    phys_addr_t addr, phys_addr_t end)
 {
 	pgd_t *pgd;
@@ -1251,7 +1267,7 @@ void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot)
 	phys_addr_t end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT;
 
 	spin_lock(&kvm->mmu_lock);
-	stage2_wp_range(kvm, &kvm->arch.mmu, start, end);
+	kvm_stage2_wp_range(kvm, &kvm->arch.mmu, start, end);
 	spin_unlock(&kvm->mmu_lock);
 	kvm_flush_remote_tlbs(kvm);
 }
@@ -1275,7 +1291,7 @@ static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
 	phys_addr_t start = (base_gfn +  __ffs(mask)) << PAGE_SHIFT;
 	phys_addr_t end = (base_gfn + __fls(mask) + 1) << PAGE_SHIFT;
 
-	stage2_wp_range(kvm, &kvm->arch.mmu, start, end);
+	kvm_stage2_wp_range(kvm, &kvm->arch.mmu, start, end);
 }
 
 /*
@@ -1626,7 +1642,7 @@ static int handle_hva_to_gpa(struct kvm *kvm,
 
 static int kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
 {
-	unmap_stage2_range(kvm, gpa, size);
+	kvm_unmap_stage2_range(kvm, &kvm->arch.mmu, gpa, size);
 	return 0;
 }
 
@@ -1938,8 +1954,8 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
 
 	spin_lock(&kvm->mmu_lock);
 	if (ret)
-		unmap_stage2_range(kvm, mem->guest_phys_addr,
-				   mem->memory_size);
+		kvm_unmap_stage2_range(kvm, &kvm->arch.mmu,
+				       mem->guest_phys_addr, mem->memory_size);
 	else
 		stage2_flush_memslot(&kvm->arch.mmu, memslot);
 	spin_unlock(&kvm->mmu_lock);
@@ -1975,7 +1991,7 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
 	phys_addr_t size = slot->npages << PAGE_SHIFT;
 
 	spin_lock(&kvm->mmu_lock);
-	unmap_stage2_range(kvm, gpa, size);
+	kvm_unmap_stage2_range(kvm, &kvm->arch.mmu, gpa, size);
 	spin_unlock(&kvm->mmu_lock);
 }
 
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 09/31] KVM: arm/arm64: Manage mmus for nested VMs
  2017-10-03  3:10 ` Jintack Lim
  (?)
@ 2017-10-03  3:10   ` Jintack Lim
  -1 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:10 UTC (permalink / raw)
  To: christoffer.dall, marc.zyngier, kvmarm
  Cc: jintack, pbonzini, rkrcmar, catalin.marinas, will.deacon, linux,
	mark.rutland, linux-arm-kernel, kvm, linux-kernel, Jintack Lim

Now that a hypervisor can run in the virtual EL2, the guest hypervisor
can assign any VMID to its own VMs. To avoid conflicts between VMIDs
among a host and guest(s), the host hypervisor maps each VMID from a
guest hypervisor's view (i.e. virtual VMID) to an unique shadow VMID.
It also manages a set of shadow stage-2 page tables for each shadow
VMID. All this information is stored in kvm_nested_s2_mmu struct.

A host hypervisor manages a list of kvm_nested_s2_mmu objects per VM. On
a VM entry it searches an object in the list using a virtual VMID as a
key.

Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---

Notes:
    v1-->v2:
    - This is a merged commit of [RFC 39/55] and [RFC 40/55].
    - Updated the commit message and comments.
    - Defer creating a new nested mmu structure until we enter the VM with stage 2
      paging enabled, which was previously done on vttbr_el2 write operations.
    - Use the existing kvm->mmu_lock when iterating nested mmus instead of creating one.

 arch/arm/include/asm/kvm_host.h      |  12 ++++
 arch/arm64/include/asm/kvm_emulate.h |  13 ++---
 arch/arm64/include/asm/kvm_host.h    |  25 ++++++++
 arch/arm64/include/asm/kvm_mmu.h     |  21 +++++++
 arch/arm64/kvm/Makefile              |   1 +
 arch/arm64/kvm/context.c             |   2 +-
 arch/arm64/kvm/mmu-nested.c          | 108 +++++++++++++++++++++++++++++++++++
 virt/kvm/arm/arm.c                   |   1 +
 8 files changed, 174 insertions(+), 9 deletions(-)
 create mode 100644 arch/arm64/kvm/mmu-nested.c

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 33ccdbe..d84c1c1 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -67,6 +67,15 @@ struct kvm_s2_mmu {
 	pgd_t *pgd;
 };
 
+/* Per shadow VMID mmu structure. This is only for nested virtualization */
+struct kvm_nested_s2_mmu {
+	struct kvm_s2_mmu mmu;
+
+	u64 virtual_vttbr;
+
+	struct list_head list;
+};
+
 struct kvm_arch {
 	/* Stage 2 paging state for the VM */
 	struct kvm_s2_mmu mmu;
@@ -79,6 +88,9 @@ struct kvm_arch {
 	 * here.
 	 */
 
+	/* Never used on arm but added to be compatible with arm64 */
+	struct list_head nested_mmu_list;
+
 	/* Interrupt controller */
 	struct vgic_dist	vgic;
 	int max_vcpus;
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 71a3a04..f476576 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -199,6 +199,11 @@ static inline bool is_hyp_ctxt(const struct kvm_vcpu *vcpu)
 	return false;
 }
 
+static inline bool vcpu_nested_stage2_enabled(const struct kvm_vcpu *vcpu)
+{
+	return (vcpu_sys_reg(vcpu, HCR_EL2) & HCR_VM);
+}
+
 static inline u32 kvm_vcpu_get_hsr(const struct kvm_vcpu *vcpu)
 {
 	return vcpu->arch.fault.esr_el2;
@@ -385,12 +390,4 @@ static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu,
 	return data;		/* Leave LE untouched */
 }
 
-static inline struct kvm_s2_vmid *vcpu_get_active_vmid(struct kvm_vcpu *vcpu)
-{
-	if (unlikely(is_hyp_ctxt(vcpu)))
-		return &vcpu->kvm->arch.mmu.el2_vmid;
-
-	return &vcpu->kvm->arch.mmu.vmid;
-}
-
 #endif /* __ARM64_KVM_EMULATE_H__ */
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index a7edf0e..0c37e49 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -65,6 +65,28 @@ struct kvm_s2_mmu {
 	pgd_t *pgd;
 };
 
+/* Per shadow VMID mmu structure */
+struct kvm_nested_s2_mmu {
+	struct kvm_s2_mmu mmu;
+
+	/*
+	 * virtual_vttbr contains vttbr_el2 value from the guest hypervisor.
+	 * We use vmid field as a key to search for this mmu object in the list,
+	 * and ignore baddr field.
+	 *
+	 * Note that we may use both of vmid field and baddr field respectively
+	 * to find a shadow VMID and a pointer to the shadow stage-2 page
+	 * table, then combine them to set up hw_vttbr. The only benefit of
+	 * doing that would be reusing shadow stage-2 page tables for different
+	 * VMIDs, which is not usual. So, we choose the current design for the
+	 * simplicity.
+	 *
+	 */
+	u64 virtual_vttbr;
+
+	struct list_head list;
+};
+
 struct kvm_arch {
 	/* Stage 2 paging state for the VM */
 	struct kvm_s2_mmu mmu;
@@ -77,6 +99,9 @@ struct kvm_arch {
 
 	/* Interrupt controller */
 	struct vgic_dist	vgic;
+
+	/* Stage 2 shadow paging contexts for nested L2 VM */
+	struct list_head nested_mmu_list;
 };
 
 #define KVM_NR_MEM_OBJS     40
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index bceaec1..452912f 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -112,6 +112,7 @@
 #include <asm/cacheflush.h>
 #include <asm/mmu_context.h>
 #include <asm/pgtable.h>
+#include <asm/kvm_emulate.h>
 
 static inline unsigned long __kern_hyp_va(unsigned long v)
 {
@@ -321,6 +322,10 @@ static inline unsigned int kvm_get_vmid_bits(void)
 	return (cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR1_VMIDBITS_SHIFT) == 2) ? 16 : 8;
 }
 
+struct kvm_nested_s2_mmu *get_nested_mmu(struct kvm_vcpu *vcpu, u64 vttbr);
+struct kvm_s2_mmu *vcpu_get_active_s2_mmu(struct kvm_vcpu *vcpu);
+void update_nested_s2_mmu(struct kvm_vcpu *vcpu);
+
 static inline u64 kvm_get_vttbr(struct kvm_s2_vmid *vmid,
 				struct kvm_s2_mmu *mmu)
 {
@@ -332,5 +337,21 @@ static inline u64 kvm_get_vttbr(struct kvm_s2_vmid *vmid,
 	return baddr | vmid_field;
 }
 
+static inline u64 get_vmid(u64 vttbr)
+{
+	return (vttbr & VTTBR_VMID_MASK(get_kvm_vmid_bits())) >>
+	       VTTBR_VMID_SHIFT;
+}
+
+static inline struct kvm_s2_vmid *vcpu_get_active_vmid(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s2_mmu *mmu = vcpu_get_active_s2_mmu(vcpu);
+
+	if (unlikely(is_hyp_ctxt(vcpu)))
+		return &mmu->el2_vmid;
+	else
+		return &mmu->vmid;
+}
+
 #endif /* __ASSEMBLY__ */
 #endif /* __ARM64_KVM_MMU_H__ */
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 0263ef0..5300db0 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -37,4 +37,5 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o
 kvm-$(CONFIG_KVM_ARM_PMU) += $(KVM)/arm/pmu.o
 
 kvm-$(CONFIG_KVM_ARM_HOST) += nested.o
+kvm-$(CONFIG_KVM_ARM_HOST) += mmu-nested.o
 kvm-$(CONFIG_KVM_ARM_HOST) += emulate-nested.o
diff --git a/arch/arm64/kvm/context.c b/arch/arm64/kvm/context.c
index afd1702..762d4a5 100644
--- a/arch/arm64/kvm/context.c
+++ b/arch/arm64/kvm/context.c
@@ -177,7 +177,7 @@ static void flush_shadow_el1_sysregs(struct kvm_vcpu *vcpu)
 
 static void setup_s2_mmu(struct kvm_vcpu *vcpu)
 {
-	struct kvm_s2_mmu *mmu = &vcpu->kvm->arch.mmu;
+	struct kvm_s2_mmu *mmu = vcpu_get_active_s2_mmu(vcpu);
 	struct kvm_s2_vmid *vmid = vcpu_get_active_vmid(vcpu);
 
 	vcpu->arch.hw_vttbr = kvm_get_vttbr(vmid, mmu);
diff --git a/arch/arm64/kvm/mmu-nested.c b/arch/arm64/kvm/mmu-nested.c
new file mode 100644
index 0000000..c436daf
--- /dev/null
+++ b/arch/arm64/kvm/mmu-nested.c
@@ -0,0 +1,108 @@
+/*
+ * Copyright (C) 2017 - Columbia University and Linaro Ltd.
+ * Author: Jintack Lim <jintack.lim@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/kvm_host.h>
+
+#include <asm/kvm_arm.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_mmu.h>
+
+static struct kvm_nested_s2_mmu *lookup_nested_mmu(struct kvm_vcpu *vcpu,
+						   u64 vttbr)
+{
+	struct kvm_nested_s2_mmu *mmu;
+	u64 virtual_vmid;
+	u64 target_vmid = get_vmid(vttbr);
+	struct list_head *nested_mmu_list = &vcpu->kvm->arch.nested_mmu_list;
+
+	/* Search a mmu in the list using the virtual VMID as a key */
+	list_for_each_entry_rcu(mmu, nested_mmu_list, list) {
+		virtual_vmid = get_vmid(mmu->virtual_vttbr);
+		if (target_vmid == virtual_vmid)
+			return mmu;
+	}
+	return NULL;
+}
+
+/**
+ * create_nested_mmu - create mmu for the given virtual VMID
+ *
+ * Called from setup_s2_mmu before entering the nested VM to ensure the shadow
+ * stage 2 page table is allocated and it is valid to use.
+ */
+static struct kvm_nested_s2_mmu *create_nested_mmu(struct kvm_vcpu *vcpu,
+						   u64 vttbr)
+{
+	struct kvm_nested_s2_mmu *nested_mmu, *tmp_mmu;
+	struct list_head *nested_mmu_list = &vcpu->kvm->arch.nested_mmu_list;
+	bool need_free = false;
+	int ret;
+
+	nested_mmu = kzalloc(sizeof(struct kvm_nested_s2_mmu), GFP_KERNEL);
+	if (!nested_mmu)
+		return NULL;
+
+	ret = __kvm_alloc_stage2_pgd(&nested_mmu->mmu);
+	if (ret) {
+		kfree(nested_mmu);
+		return NULL;
+	}
+
+	spin_lock(&vcpu->kvm->mmu_lock);
+	tmp_mmu = lookup_nested_mmu(vcpu, vttbr);
+	if (!tmp_mmu) {
+		list_add_rcu(&nested_mmu->list, nested_mmu_list);
+	} else {
+		/*
+		 * Somebody already put a new nested_mmu for this virtual VMID
+		 * to the list behind our back.
+		 */
+		need_free = true;
+	}
+	spin_unlock(&vcpu->kvm->mmu_lock);
+
+	if (need_free) {
+		__kvm_free_stage2_pgd(vcpu->kvm, &nested_mmu->mmu);
+		kfree(nested_mmu);
+		nested_mmu = tmp_mmu;
+	}
+
+	/* The virtual VMID will be used as a key when searching a mmu */
+	nested_mmu->virtual_vttbr = vttbr;
+
+	return nested_mmu;
+}
+
+static struct kvm_s2_mmu *get_s2_mmu_nested(struct kvm_vcpu *vcpu)
+{
+	u64 vttbr = vcpu_sys_reg(vcpu, VTTBR_EL2);
+	struct kvm_nested_s2_mmu *nested_mmu;
+
+	nested_mmu = lookup_nested_mmu(vcpu, vttbr);
+	if (!nested_mmu)
+		nested_mmu = create_nested_mmu(vcpu, vttbr);
+
+	return &nested_mmu->mmu;
+}
+
+struct kvm_s2_mmu *vcpu_get_active_s2_mmu(struct kvm_vcpu *vcpu)
+{
+	if (is_hyp_ctxt(vcpu) || !vcpu_nested_stage2_enabled(vcpu))
+		return &vcpu->kvm->arch.mmu;
+
+	return get_s2_mmu_nested(vcpu);
+}
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 63dd897..4548d77 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -145,6 +145,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	/* Mark the initial VMID generation invalid */
 	kvm->arch.mmu.vmid.vmid_gen = 0;
 	kvm->arch.mmu.el2_vmid.vmid_gen = 0;
+	INIT_LIST_HEAD(&kvm->arch.nested_mmu_list);
 
 	/* The maximum number of VCPUs is limited by the host's GIC model */
 	kvm->arch.max_vcpus = vgic_present ?
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 09/31] KVM: arm/arm64: Manage mmus for nested VMs
@ 2017-10-03  3:10   ` Jintack Lim
  0 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:10 UTC (permalink / raw)
  To: christoffer.dall, marc.zyngier, kvmarm
  Cc: kvm, catalin.marinas, will.deacon, linux, linux-kernel, pbonzini,
	linux-arm-kernel

Now that a hypervisor can run in the virtual EL2, the guest hypervisor
can assign any VMID to its own VMs. To avoid conflicts between VMIDs
among a host and guest(s), the host hypervisor maps each VMID from a
guest hypervisor's view (i.e. virtual VMID) to an unique shadow VMID.
It also manages a set of shadow stage-2 page tables for each shadow
VMID. All this information is stored in kvm_nested_s2_mmu struct.

A host hypervisor manages a list of kvm_nested_s2_mmu objects per VM. On
a VM entry it searches an object in the list using a virtual VMID as a
key.

Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---

Notes:
    v1-->v2:
    - This is a merged commit of [RFC 39/55] and [RFC 40/55].
    - Updated the commit message and comments.
    - Defer creating a new nested mmu structure until we enter the VM with stage 2
      paging enabled, which was previously done on vttbr_el2 write operations.
    - Use the existing kvm->mmu_lock when iterating nested mmus instead of creating one.

 arch/arm/include/asm/kvm_host.h      |  12 ++++
 arch/arm64/include/asm/kvm_emulate.h |  13 ++---
 arch/arm64/include/asm/kvm_host.h    |  25 ++++++++
 arch/arm64/include/asm/kvm_mmu.h     |  21 +++++++
 arch/arm64/kvm/Makefile              |   1 +
 arch/arm64/kvm/context.c             |   2 +-
 arch/arm64/kvm/mmu-nested.c          | 108 +++++++++++++++++++++++++++++++++++
 virt/kvm/arm/arm.c                   |   1 +
 8 files changed, 174 insertions(+), 9 deletions(-)
 create mode 100644 arch/arm64/kvm/mmu-nested.c

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 33ccdbe..d84c1c1 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -67,6 +67,15 @@ struct kvm_s2_mmu {
 	pgd_t *pgd;
 };
 
+/* Per shadow VMID mmu structure. This is only for nested virtualization */
+struct kvm_nested_s2_mmu {
+	struct kvm_s2_mmu mmu;
+
+	u64 virtual_vttbr;
+
+	struct list_head list;
+};
+
 struct kvm_arch {
 	/* Stage 2 paging state for the VM */
 	struct kvm_s2_mmu mmu;
@@ -79,6 +88,9 @@ struct kvm_arch {
 	 * here.
 	 */
 
+	/* Never used on arm but added to be compatible with arm64 */
+	struct list_head nested_mmu_list;
+
 	/* Interrupt controller */
 	struct vgic_dist	vgic;
 	int max_vcpus;
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 71a3a04..f476576 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -199,6 +199,11 @@ static inline bool is_hyp_ctxt(const struct kvm_vcpu *vcpu)
 	return false;
 }
 
+static inline bool vcpu_nested_stage2_enabled(const struct kvm_vcpu *vcpu)
+{
+	return (vcpu_sys_reg(vcpu, HCR_EL2) & HCR_VM);
+}
+
 static inline u32 kvm_vcpu_get_hsr(const struct kvm_vcpu *vcpu)
 {
 	return vcpu->arch.fault.esr_el2;
@@ -385,12 +390,4 @@ static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu,
 	return data;		/* Leave LE untouched */
 }
 
-static inline struct kvm_s2_vmid *vcpu_get_active_vmid(struct kvm_vcpu *vcpu)
-{
-	if (unlikely(is_hyp_ctxt(vcpu)))
-		return &vcpu->kvm->arch.mmu.el2_vmid;
-
-	return &vcpu->kvm->arch.mmu.vmid;
-}
-
 #endif /* __ARM64_KVM_EMULATE_H__ */
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index a7edf0e..0c37e49 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -65,6 +65,28 @@ struct kvm_s2_mmu {
 	pgd_t *pgd;
 };
 
+/* Per shadow VMID mmu structure */
+struct kvm_nested_s2_mmu {
+	struct kvm_s2_mmu mmu;
+
+	/*
+	 * virtual_vttbr contains vttbr_el2 value from the guest hypervisor.
+	 * We use vmid field as a key to search for this mmu object in the list,
+	 * and ignore baddr field.
+	 *
+	 * Note that we may use both of vmid field and baddr field respectively
+	 * to find a shadow VMID and a pointer to the shadow stage-2 page
+	 * table, then combine them to set up hw_vttbr. The only benefit of
+	 * doing that would be reusing shadow stage-2 page tables for different
+	 * VMIDs, which is not usual. So, we choose the current design for the
+	 * simplicity.
+	 *
+	 */
+	u64 virtual_vttbr;
+
+	struct list_head list;
+};
+
 struct kvm_arch {
 	/* Stage 2 paging state for the VM */
 	struct kvm_s2_mmu mmu;
@@ -77,6 +99,9 @@ struct kvm_arch {
 
 	/* Interrupt controller */
 	struct vgic_dist	vgic;
+
+	/* Stage 2 shadow paging contexts for nested L2 VM */
+	struct list_head nested_mmu_list;
 };
 
 #define KVM_NR_MEM_OBJS     40
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index bceaec1..452912f 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -112,6 +112,7 @@
 #include <asm/cacheflush.h>
 #include <asm/mmu_context.h>
 #include <asm/pgtable.h>
+#include <asm/kvm_emulate.h>
 
 static inline unsigned long __kern_hyp_va(unsigned long v)
 {
@@ -321,6 +322,10 @@ static inline unsigned int kvm_get_vmid_bits(void)
 	return (cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR1_VMIDBITS_SHIFT) == 2) ? 16 : 8;
 }
 
+struct kvm_nested_s2_mmu *get_nested_mmu(struct kvm_vcpu *vcpu, u64 vttbr);
+struct kvm_s2_mmu *vcpu_get_active_s2_mmu(struct kvm_vcpu *vcpu);
+void update_nested_s2_mmu(struct kvm_vcpu *vcpu);
+
 static inline u64 kvm_get_vttbr(struct kvm_s2_vmid *vmid,
 				struct kvm_s2_mmu *mmu)
 {
@@ -332,5 +337,21 @@ static inline u64 kvm_get_vttbr(struct kvm_s2_vmid *vmid,
 	return baddr | vmid_field;
 }
 
+static inline u64 get_vmid(u64 vttbr)
+{
+	return (vttbr & VTTBR_VMID_MASK(get_kvm_vmid_bits())) >>
+	       VTTBR_VMID_SHIFT;
+}
+
+static inline struct kvm_s2_vmid *vcpu_get_active_vmid(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s2_mmu *mmu = vcpu_get_active_s2_mmu(vcpu);
+
+	if (unlikely(is_hyp_ctxt(vcpu)))
+		return &mmu->el2_vmid;
+	else
+		return &mmu->vmid;
+}
+
 #endif /* __ASSEMBLY__ */
 #endif /* __ARM64_KVM_MMU_H__ */
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 0263ef0..5300db0 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -37,4 +37,5 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o
 kvm-$(CONFIG_KVM_ARM_PMU) += $(KVM)/arm/pmu.o
 
 kvm-$(CONFIG_KVM_ARM_HOST) += nested.o
+kvm-$(CONFIG_KVM_ARM_HOST) += mmu-nested.o
 kvm-$(CONFIG_KVM_ARM_HOST) += emulate-nested.o
diff --git a/arch/arm64/kvm/context.c b/arch/arm64/kvm/context.c
index afd1702..762d4a5 100644
--- a/arch/arm64/kvm/context.c
+++ b/arch/arm64/kvm/context.c
@@ -177,7 +177,7 @@ static void flush_shadow_el1_sysregs(struct kvm_vcpu *vcpu)
 
 static void setup_s2_mmu(struct kvm_vcpu *vcpu)
 {
-	struct kvm_s2_mmu *mmu = &vcpu->kvm->arch.mmu;
+	struct kvm_s2_mmu *mmu = vcpu_get_active_s2_mmu(vcpu);
 	struct kvm_s2_vmid *vmid = vcpu_get_active_vmid(vcpu);
 
 	vcpu->arch.hw_vttbr = kvm_get_vttbr(vmid, mmu);
diff --git a/arch/arm64/kvm/mmu-nested.c b/arch/arm64/kvm/mmu-nested.c
new file mode 100644
index 0000000..c436daf
--- /dev/null
+++ b/arch/arm64/kvm/mmu-nested.c
@@ -0,0 +1,108 @@
+/*
+ * Copyright (C) 2017 - Columbia University and Linaro Ltd.
+ * Author: Jintack Lim <jintack.lim@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/kvm_host.h>
+
+#include <asm/kvm_arm.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_mmu.h>
+
+static struct kvm_nested_s2_mmu *lookup_nested_mmu(struct kvm_vcpu *vcpu,
+						   u64 vttbr)
+{
+	struct kvm_nested_s2_mmu *mmu;
+	u64 virtual_vmid;
+	u64 target_vmid = get_vmid(vttbr);
+	struct list_head *nested_mmu_list = &vcpu->kvm->arch.nested_mmu_list;
+
+	/* Search a mmu in the list using the virtual VMID as a key */
+	list_for_each_entry_rcu(mmu, nested_mmu_list, list) {
+		virtual_vmid = get_vmid(mmu->virtual_vttbr);
+		if (target_vmid == virtual_vmid)
+			return mmu;
+	}
+	return NULL;
+}
+
+/**
+ * create_nested_mmu - create mmu for the given virtual VMID
+ *
+ * Called from setup_s2_mmu before entering the nested VM to ensure the shadow
+ * stage 2 page table is allocated and it is valid to use.
+ */
+static struct kvm_nested_s2_mmu *create_nested_mmu(struct kvm_vcpu *vcpu,
+						   u64 vttbr)
+{
+	struct kvm_nested_s2_mmu *nested_mmu, *tmp_mmu;
+	struct list_head *nested_mmu_list = &vcpu->kvm->arch.nested_mmu_list;
+	bool need_free = false;
+	int ret;
+
+	nested_mmu = kzalloc(sizeof(struct kvm_nested_s2_mmu), GFP_KERNEL);
+	if (!nested_mmu)
+		return NULL;
+
+	ret = __kvm_alloc_stage2_pgd(&nested_mmu->mmu);
+	if (ret) {
+		kfree(nested_mmu);
+		return NULL;
+	}
+
+	spin_lock(&vcpu->kvm->mmu_lock);
+	tmp_mmu = lookup_nested_mmu(vcpu, vttbr);
+	if (!tmp_mmu) {
+		list_add_rcu(&nested_mmu->list, nested_mmu_list);
+	} else {
+		/*
+		 * Somebody already put a new nested_mmu for this virtual VMID
+		 * to the list behind our back.
+		 */
+		need_free = true;
+	}
+	spin_unlock(&vcpu->kvm->mmu_lock);
+
+	if (need_free) {
+		__kvm_free_stage2_pgd(vcpu->kvm, &nested_mmu->mmu);
+		kfree(nested_mmu);
+		nested_mmu = tmp_mmu;
+	}
+
+	/* The virtual VMID will be used as a key when searching a mmu */
+	nested_mmu->virtual_vttbr = vttbr;
+
+	return nested_mmu;
+}
+
+static struct kvm_s2_mmu *get_s2_mmu_nested(struct kvm_vcpu *vcpu)
+{
+	u64 vttbr = vcpu_sys_reg(vcpu, VTTBR_EL2);
+	struct kvm_nested_s2_mmu *nested_mmu;
+
+	nested_mmu = lookup_nested_mmu(vcpu, vttbr);
+	if (!nested_mmu)
+		nested_mmu = create_nested_mmu(vcpu, vttbr);
+
+	return &nested_mmu->mmu;
+}
+
+struct kvm_s2_mmu *vcpu_get_active_s2_mmu(struct kvm_vcpu *vcpu)
+{
+	if (is_hyp_ctxt(vcpu) || !vcpu_nested_stage2_enabled(vcpu))
+		return &vcpu->kvm->arch.mmu;
+
+	return get_s2_mmu_nested(vcpu);
+}
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 63dd897..4548d77 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -145,6 +145,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	/* Mark the initial VMID generation invalid */
 	kvm->arch.mmu.vmid.vmid_gen = 0;
 	kvm->arch.mmu.el2_vmid.vmid_gen = 0;
+	INIT_LIST_HEAD(&kvm->arch.nested_mmu_list);
 
 	/* The maximum number of VCPUs is limited by the host's GIC model */
 	kvm->arch.max_vcpus = vgic_present ?
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 09/31] KVM: arm/arm64: Manage mmus for nested VMs
@ 2017-10-03  3:10   ` Jintack Lim
  0 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:10 UTC (permalink / raw)
  To: linux-arm-kernel

Now that a hypervisor can run in the virtual EL2, the guest hypervisor
can assign any VMID to its own VMs. To avoid conflicts between VMIDs
among a host and guest(s), the host hypervisor maps each VMID from a
guest hypervisor's view (i.e. virtual VMID) to an unique shadow VMID.
It also manages a set of shadow stage-2 page tables for each shadow
VMID. All this information is stored in kvm_nested_s2_mmu struct.

A host hypervisor manages a list of kvm_nested_s2_mmu objects per VM. On
a VM entry it searches an object in the list using a virtual VMID as a
key.

Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---

Notes:
    v1-->v2:
    - This is a merged commit of [RFC 39/55] and [RFC 40/55].
    - Updated the commit message and comments.
    - Defer creating a new nested mmu structure until we enter the VM with stage 2
      paging enabled, which was previously done on vttbr_el2 write operations.
    - Use the existing kvm->mmu_lock when iterating nested mmus instead of creating one.

 arch/arm/include/asm/kvm_host.h      |  12 ++++
 arch/arm64/include/asm/kvm_emulate.h |  13 ++---
 arch/arm64/include/asm/kvm_host.h    |  25 ++++++++
 arch/arm64/include/asm/kvm_mmu.h     |  21 +++++++
 arch/arm64/kvm/Makefile              |   1 +
 arch/arm64/kvm/context.c             |   2 +-
 arch/arm64/kvm/mmu-nested.c          | 108 +++++++++++++++++++++++++++++++++++
 virt/kvm/arm/arm.c                   |   1 +
 8 files changed, 174 insertions(+), 9 deletions(-)
 create mode 100644 arch/arm64/kvm/mmu-nested.c

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 33ccdbe..d84c1c1 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -67,6 +67,15 @@ struct kvm_s2_mmu {
 	pgd_t *pgd;
 };
 
+/* Per shadow VMID mmu structure. This is only for nested virtualization */
+struct kvm_nested_s2_mmu {
+	struct kvm_s2_mmu mmu;
+
+	u64 virtual_vttbr;
+
+	struct list_head list;
+};
+
 struct kvm_arch {
 	/* Stage 2 paging state for the VM */
 	struct kvm_s2_mmu mmu;
@@ -79,6 +88,9 @@ struct kvm_arch {
 	 * here.
 	 */
 
+	/* Never used on arm but added to be compatible with arm64 */
+	struct list_head nested_mmu_list;
+
 	/* Interrupt controller */
 	struct vgic_dist	vgic;
 	int max_vcpus;
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 71a3a04..f476576 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -199,6 +199,11 @@ static inline bool is_hyp_ctxt(const struct kvm_vcpu *vcpu)
 	return false;
 }
 
+static inline bool vcpu_nested_stage2_enabled(const struct kvm_vcpu *vcpu)
+{
+	return (vcpu_sys_reg(vcpu, HCR_EL2) & HCR_VM);
+}
+
 static inline u32 kvm_vcpu_get_hsr(const struct kvm_vcpu *vcpu)
 {
 	return vcpu->arch.fault.esr_el2;
@@ -385,12 +390,4 @@ static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu,
 	return data;		/* Leave LE untouched */
 }
 
-static inline struct kvm_s2_vmid *vcpu_get_active_vmid(struct kvm_vcpu *vcpu)
-{
-	if (unlikely(is_hyp_ctxt(vcpu)))
-		return &vcpu->kvm->arch.mmu.el2_vmid;
-
-	return &vcpu->kvm->arch.mmu.vmid;
-}
-
 #endif /* __ARM64_KVM_EMULATE_H__ */
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index a7edf0e..0c37e49 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -65,6 +65,28 @@ struct kvm_s2_mmu {
 	pgd_t *pgd;
 };
 
+/* Per shadow VMID mmu structure */
+struct kvm_nested_s2_mmu {
+	struct kvm_s2_mmu mmu;
+
+	/*
+	 * virtual_vttbr contains vttbr_el2 value from the guest hypervisor.
+	 * We use vmid field as a key to search for this mmu object in the list,
+	 * and ignore baddr field.
+	 *
+	 * Note that we may use both of vmid field and baddr field respectively
+	 * to find a shadow VMID and a pointer to the shadow stage-2 page
+	 * table, then combine them to set up hw_vttbr. The only benefit of
+	 * doing that would be reusing shadow stage-2 page tables for different
+	 * VMIDs, which is not usual. So, we choose the current design for the
+	 * simplicity.
+	 *
+	 */
+	u64 virtual_vttbr;
+
+	struct list_head list;
+};
+
 struct kvm_arch {
 	/* Stage 2 paging state for the VM */
 	struct kvm_s2_mmu mmu;
@@ -77,6 +99,9 @@ struct kvm_arch {
 
 	/* Interrupt controller */
 	struct vgic_dist	vgic;
+
+	/* Stage 2 shadow paging contexts for nested L2 VM */
+	struct list_head nested_mmu_list;
 };
 
 #define KVM_NR_MEM_OBJS     40
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index bceaec1..452912f 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -112,6 +112,7 @@
 #include <asm/cacheflush.h>
 #include <asm/mmu_context.h>
 #include <asm/pgtable.h>
+#include <asm/kvm_emulate.h>
 
 static inline unsigned long __kern_hyp_va(unsigned long v)
 {
@@ -321,6 +322,10 @@ static inline unsigned int kvm_get_vmid_bits(void)
 	return (cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR1_VMIDBITS_SHIFT) == 2) ? 16 : 8;
 }
 
+struct kvm_nested_s2_mmu *get_nested_mmu(struct kvm_vcpu *vcpu, u64 vttbr);
+struct kvm_s2_mmu *vcpu_get_active_s2_mmu(struct kvm_vcpu *vcpu);
+void update_nested_s2_mmu(struct kvm_vcpu *vcpu);
+
 static inline u64 kvm_get_vttbr(struct kvm_s2_vmid *vmid,
 				struct kvm_s2_mmu *mmu)
 {
@@ -332,5 +337,21 @@ static inline u64 kvm_get_vttbr(struct kvm_s2_vmid *vmid,
 	return baddr | vmid_field;
 }
 
+static inline u64 get_vmid(u64 vttbr)
+{
+	return (vttbr & VTTBR_VMID_MASK(get_kvm_vmid_bits())) >>
+	       VTTBR_VMID_SHIFT;
+}
+
+static inline struct kvm_s2_vmid *vcpu_get_active_vmid(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s2_mmu *mmu = vcpu_get_active_s2_mmu(vcpu);
+
+	if (unlikely(is_hyp_ctxt(vcpu)))
+		return &mmu->el2_vmid;
+	else
+		return &mmu->vmid;
+}
+
 #endif /* __ASSEMBLY__ */
 #endif /* __ARM64_KVM_MMU_H__ */
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 0263ef0..5300db0 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -37,4 +37,5 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o
 kvm-$(CONFIG_KVM_ARM_PMU) += $(KVM)/arm/pmu.o
 
 kvm-$(CONFIG_KVM_ARM_HOST) += nested.o
+kvm-$(CONFIG_KVM_ARM_HOST) += mmu-nested.o
 kvm-$(CONFIG_KVM_ARM_HOST) += emulate-nested.o
diff --git a/arch/arm64/kvm/context.c b/arch/arm64/kvm/context.c
index afd1702..762d4a5 100644
--- a/arch/arm64/kvm/context.c
+++ b/arch/arm64/kvm/context.c
@@ -177,7 +177,7 @@ static void flush_shadow_el1_sysregs(struct kvm_vcpu *vcpu)
 
 static void setup_s2_mmu(struct kvm_vcpu *vcpu)
 {
-	struct kvm_s2_mmu *mmu = &vcpu->kvm->arch.mmu;
+	struct kvm_s2_mmu *mmu = vcpu_get_active_s2_mmu(vcpu);
 	struct kvm_s2_vmid *vmid = vcpu_get_active_vmid(vcpu);
 
 	vcpu->arch.hw_vttbr = kvm_get_vttbr(vmid, mmu);
diff --git a/arch/arm64/kvm/mmu-nested.c b/arch/arm64/kvm/mmu-nested.c
new file mode 100644
index 0000000..c436daf
--- /dev/null
+++ b/arch/arm64/kvm/mmu-nested.c
@@ -0,0 +1,108 @@
+/*
+ * Copyright (C) 2017 - Columbia University and Linaro Ltd.
+ * Author: Jintack Lim <jintack.lim@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/kvm_host.h>
+
+#include <asm/kvm_arm.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_mmu.h>
+
+static struct kvm_nested_s2_mmu *lookup_nested_mmu(struct kvm_vcpu *vcpu,
+						   u64 vttbr)
+{
+	struct kvm_nested_s2_mmu *mmu;
+	u64 virtual_vmid;
+	u64 target_vmid = get_vmid(vttbr);
+	struct list_head *nested_mmu_list = &vcpu->kvm->arch.nested_mmu_list;
+
+	/* Search a mmu in the list using the virtual VMID as a key */
+	list_for_each_entry_rcu(mmu, nested_mmu_list, list) {
+		virtual_vmid = get_vmid(mmu->virtual_vttbr);
+		if (target_vmid == virtual_vmid)
+			return mmu;
+	}
+	return NULL;
+}
+
+/**
+ * create_nested_mmu - create mmu for the given virtual VMID
+ *
+ * Called from setup_s2_mmu before entering the nested VM to ensure the shadow
+ * stage 2 page table is allocated and it is valid to use.
+ */
+static struct kvm_nested_s2_mmu *create_nested_mmu(struct kvm_vcpu *vcpu,
+						   u64 vttbr)
+{
+	struct kvm_nested_s2_mmu *nested_mmu, *tmp_mmu;
+	struct list_head *nested_mmu_list = &vcpu->kvm->arch.nested_mmu_list;
+	bool need_free = false;
+	int ret;
+
+	nested_mmu = kzalloc(sizeof(struct kvm_nested_s2_mmu), GFP_KERNEL);
+	if (!nested_mmu)
+		return NULL;
+
+	ret = __kvm_alloc_stage2_pgd(&nested_mmu->mmu);
+	if (ret) {
+		kfree(nested_mmu);
+		return NULL;
+	}
+
+	spin_lock(&vcpu->kvm->mmu_lock);
+	tmp_mmu = lookup_nested_mmu(vcpu, vttbr);
+	if (!tmp_mmu) {
+		list_add_rcu(&nested_mmu->list, nested_mmu_list);
+	} else {
+		/*
+		 * Somebody already put a new nested_mmu for this virtual VMID
+		 * to the list behind our back.
+		 */
+		need_free = true;
+	}
+	spin_unlock(&vcpu->kvm->mmu_lock);
+
+	if (need_free) {
+		__kvm_free_stage2_pgd(vcpu->kvm, &nested_mmu->mmu);
+		kfree(nested_mmu);
+		nested_mmu = tmp_mmu;
+	}
+
+	/* The virtual VMID will be used as a key when searching a mmu */
+	nested_mmu->virtual_vttbr = vttbr;
+
+	return nested_mmu;
+}
+
+static struct kvm_s2_mmu *get_s2_mmu_nested(struct kvm_vcpu *vcpu)
+{
+	u64 vttbr = vcpu_sys_reg(vcpu, VTTBR_EL2);
+	struct kvm_nested_s2_mmu *nested_mmu;
+
+	nested_mmu = lookup_nested_mmu(vcpu, vttbr);
+	if (!nested_mmu)
+		nested_mmu = create_nested_mmu(vcpu, vttbr);
+
+	return &nested_mmu->mmu;
+}
+
+struct kvm_s2_mmu *vcpu_get_active_s2_mmu(struct kvm_vcpu *vcpu)
+{
+	if (is_hyp_ctxt(vcpu) || !vcpu_nested_stage2_enabled(vcpu))
+		return &vcpu->kvm->arch.mmu;
+
+	return get_s2_mmu_nested(vcpu);
+}
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 63dd897..4548d77 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -145,6 +145,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	/* Mark the initial VMID generation invalid */
 	kvm->arch.mmu.vmid.vmid_gen = 0;
 	kvm->arch.mmu.el2_vmid.vmid_gen = 0;
+	INIT_LIST_HEAD(&kvm->arch.nested_mmu_list);
 
 	/* The maximum number of VCPUs is limited by the host's GIC model */
 	kvm->arch.max_vcpus = vgic_present ?
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 10/31] KVM: arm/arm64: Unmap/flush shadow stage 2 page tables
  2017-10-03  3:10 ` Jintack Lim
  (?)
@ 2017-10-03  3:10   ` Jintack Lim
  -1 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:10 UTC (permalink / raw)
  To: christoffer.dall, marc.zyngier, kvmarm
  Cc: jintack, pbonzini, rkrcmar, catalin.marinas, will.deacon, linux,
	mark.rutland, linux-arm-kernel, kvm, linux-kernel, Jintack Lim

From: Christoffer Dall <christoffer.dall@linaro.org>

Unmap/flush shadow stage 2 page tables for the nested VMs as well as the
stage 2 page table for the guest hypervisor.

Note: A bunch of the code in mmu.c relating to MMU notifiers is
currently dealt with in an extremely abrupt way, for example by clearing
out an entire shadow stage-2 table. This will be handled in a more
efficient way using the reverse mapping feature in a later version of
the patch series.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---

Notes:
    v1-->v2:
    - Removed an unnecessary iteration for each vcpu in kvm_nested_s2_all_vcpus_*()
      functions and remove all_vcpus in the function names; a list of nested mmu
      is per VM, not per vcpu.
    - Renamed kvm_nested_s2_unmap() to kvm_nested_s2_clear()
    - Renamed kvm_nested_s2_teardown() to kvm_nested_s2_free()
    - Removed the unused kvm_nested_s2_init() function.

 arch/arm/include/asm/kvm_mmu.h   |  6 ++++++
 arch/arm64/include/asm/kvm_mmu.h |  5 +++++
 arch/arm64/kvm/mmu-nested.c      | 40 ++++++++++++++++++++++++++++++++++++++++
 virt/kvm/arm/arm.c               |  6 +++++-
 virt/kvm/arm/mmu.c               | 17 +++++++++++++++++
 5 files changed, 73 insertions(+), 1 deletion(-)

diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 86fdc70..d3eafc5 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -221,6 +221,12 @@ static inline unsigned int kvm_get_vmid_bits(void)
 	return 8;
 }
 
+static inline void kvm_nested_s2_unmap(struct kvm_vcpu *vcpu) { }
+static inline void kvm_nested_s2_free(struct kvm *kvm) { }
+static inline void kvm_nested_s2_wp(struct kvm *kvm) { }
+static inline void kvm_nested_s2_clear(struct kvm *kvm) { }
+static inline void kvm_nested_s2_flush(struct kvm *kvm) { }
+
 static inline u64 kvm_get_vttbr(struct kvm_s2_vmid *vmid,
 				struct kvm_s2_mmu *mmu)
 {
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 452912f..7fc7a83 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -325,6 +325,11 @@ static inline unsigned int kvm_get_vmid_bits(void)
 struct kvm_nested_s2_mmu *get_nested_mmu(struct kvm_vcpu *vcpu, u64 vttbr);
 struct kvm_s2_mmu *vcpu_get_active_s2_mmu(struct kvm_vcpu *vcpu);
 void update_nested_s2_mmu(struct kvm_vcpu *vcpu);
+void kvm_nested_s2_unmap(struct kvm_vcpu *vcpu);
+void kvm_nested_s2_free(struct kvm *kvm);
+void kvm_nested_s2_wp(struct kvm *kvm);
+void kvm_nested_s2_clear(struct kvm *kvm);
+void kvm_nested_s2_flush(struct kvm *kvm);
 
 static inline u64 kvm_get_vttbr(struct kvm_s2_vmid *vmid,
 				struct kvm_s2_mmu *mmu)
diff --git a/arch/arm64/kvm/mmu-nested.c b/arch/arm64/kvm/mmu-nested.c
index c436daf..3ee20f2 100644
--- a/arch/arm64/kvm/mmu-nested.c
+++ b/arch/arm64/kvm/mmu-nested.c
@@ -1,6 +1,7 @@
 /*
  * Copyright (C) 2017 - Columbia University and Linaro Ltd.
  * Author: Jintack Lim <jintack.lim@linaro.org>
+ * Author: Christoffer Dall <cdall@cs.columbia.edu>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -21,6 +22,45 @@
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_mmu.h>
 
+/* expects kvm->mmu_lock to be held */
+void kvm_nested_s2_wp(struct kvm *kvm)
+{
+	struct kvm_nested_s2_mmu *nested_mmu;
+	struct list_head *nested_mmu_list = &kvm->arch.nested_mmu_list;
+
+	list_for_each_entry_rcu(nested_mmu, nested_mmu_list, list)
+		kvm_stage2_wp_range(kvm, &nested_mmu->mmu, 0, KVM_PHYS_SIZE);
+}
+
+/* expects kvm->mmu_lock to be held */
+void kvm_nested_s2_clear(struct kvm *kvm)
+{
+	struct kvm_nested_s2_mmu *nested_mmu;
+	struct list_head *nested_mmu_list = &kvm->arch.nested_mmu_list;
+
+	list_for_each_entry_rcu(nested_mmu, nested_mmu_list, list)
+		kvm_unmap_stage2_range(kvm, &nested_mmu->mmu, 0, KVM_PHYS_SIZE);
+}
+
+/* expects kvm->mmu_lock to be held */
+void kvm_nested_s2_flush(struct kvm *kvm)
+{
+	struct kvm_nested_s2_mmu *nested_mmu;
+	struct list_head *nested_mmu_list = &kvm->arch.nested_mmu_list;
+
+	list_for_each_entry_rcu(nested_mmu, nested_mmu_list, list)
+		kvm_stage2_flush_range(&nested_mmu->mmu, 0, KVM_PHYS_SIZE);
+}
+
+void kvm_nested_s2_free(struct kvm *kvm)
+{
+	struct kvm_nested_s2_mmu *nested_mmu;
+	struct list_head *nested_mmu_list = &kvm->arch.nested_mmu_list;
+
+	list_for_each_entry_rcu(nested_mmu, nested_mmu_list, list)
+		__kvm_free_stage2_pgd(kvm, &nested_mmu->mmu);
+}
+
 static struct kvm_nested_s2_mmu *lookup_nested_mmu(struct kvm_vcpu *vcpu,
 						   u64 vttbr)
 {
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 4548d77..08706f8 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -187,6 +187,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 	free_percpu(kvm->arch.last_vcpu_ran);
 	kvm->arch.last_vcpu_ran = NULL;
 
+	kvm_nested_s2_free(kvm);
+
 	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
 		if (kvm->vcpus[i]) {
 			kvm_arch_vcpu_free(kvm->vcpus[i]);
@@ -926,8 +928,10 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
 	 * Ensure a rebooted VM will fault in RAM pages and detect if the
 	 * guest MMU is turned off and flush the caches as needed.
 	 */
-	if (vcpu->arch.has_run_once)
+	if (vcpu->arch.has_run_once) {
 		stage2_unmap_vm(vcpu->kvm);
+		kvm_nested_s2_clear(vcpu->kvm);
+	}
 
 	vcpu_reset_hcr(vcpu);
 
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index ca10799..3143f81 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -434,6 +434,8 @@ static void stage2_flush_vm(struct kvm *kvm)
 	kvm_for_each_memslot(memslot, slots)
 		stage2_flush_memslot(&kvm->arch.mmu, memslot);
 
+	kvm_nested_s2_flush(kvm);
+
 	spin_unlock(&kvm->mmu_lock);
 	srcu_read_unlock(&kvm->srcu, idx);
 }
@@ -1268,6 +1270,7 @@ void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot)
 
 	spin_lock(&kvm->mmu_lock);
 	kvm_stage2_wp_range(kvm, &kvm->arch.mmu, start, end);
+	kvm_nested_s2_wp(kvm);
 	spin_unlock(&kvm->mmu_lock);
 	kvm_flush_remote_tlbs(kvm);
 }
@@ -1306,6 +1309,7 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
 		gfn_t gfn_offset, unsigned long mask)
 {
 	kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask);
+	kvm_nested_s2_wp(kvm);
 }
 
 static void coherent_cache_guest_page(struct kvm_vcpu *vcpu, kvm_pfn_t pfn,
@@ -1643,6 +1647,7 @@ static int handle_hva_to_gpa(struct kvm *kvm,
 static int kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
 {
 	kvm_unmap_stage2_range(kvm, &kvm->arch.mmu, gpa, size);
+	kvm_nested_s2_clear(kvm);
 	return 0;
 }
 
@@ -1682,6 +1687,7 @@ static int kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data
 	 * through this calling path.
 	 */
 	stage2_set_pte(&kvm->arch.mmu, NULL, gpa, pte, 0);
+	kvm_nested_s2_clear(kvm);
 	return 0;
 }
 
@@ -1716,6 +1722,11 @@ static int kvm_age_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
 	if (pte_none(*pte))
 		return 0;
 
+	/*
+	 * TODO: Handle nested_mmu structures here using the reverse mapping in
+	 * a later version of patch series.
+	 */
+
 	return stage2_ptep_test_and_clear_young(pte);
 }
 
@@ -1736,6 +1747,11 @@ static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *
 	if (!pte_none(*pte))		/* Just a page... */
 		return pte_young(*pte);
 
+	/*
+	 * TODO: Handle nested_mmu structures here using the reverse mapping in
+	 * a later version of patch series.
+	 */
+
 	return 0;
 }
 
@@ -1992,6 +2008,7 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
 
 	spin_lock(&kvm->mmu_lock);
 	kvm_unmap_stage2_range(kvm, &kvm->arch.mmu, gpa, size);
+	kvm_nested_s2_clear(kvm);
 	spin_unlock(&kvm->mmu_lock);
 }
 
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 10/31] KVM: arm/arm64: Unmap/flush shadow stage 2 page tables
@ 2017-10-03  3:10   ` Jintack Lim
  0 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:10 UTC (permalink / raw)
  To: christoffer.dall, marc.zyngier, kvmarm
  Cc: kvm, catalin.marinas, will.deacon, linux, linux-kernel, pbonzini,
	linux-arm-kernel

From: Christoffer Dall <christoffer.dall@linaro.org>

Unmap/flush shadow stage 2 page tables for the nested VMs as well as the
stage 2 page table for the guest hypervisor.

Note: A bunch of the code in mmu.c relating to MMU notifiers is
currently dealt with in an extremely abrupt way, for example by clearing
out an entire shadow stage-2 table. This will be handled in a more
efficient way using the reverse mapping feature in a later version of
the patch series.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---

Notes:
    v1-->v2:
    - Removed an unnecessary iteration for each vcpu in kvm_nested_s2_all_vcpus_*()
      functions and remove all_vcpus in the function names; a list of nested mmu
      is per VM, not per vcpu.
    - Renamed kvm_nested_s2_unmap() to kvm_nested_s2_clear()
    - Renamed kvm_nested_s2_teardown() to kvm_nested_s2_free()
    - Removed the unused kvm_nested_s2_init() function.

 arch/arm/include/asm/kvm_mmu.h   |  6 ++++++
 arch/arm64/include/asm/kvm_mmu.h |  5 +++++
 arch/arm64/kvm/mmu-nested.c      | 40 ++++++++++++++++++++++++++++++++++++++++
 virt/kvm/arm/arm.c               |  6 +++++-
 virt/kvm/arm/mmu.c               | 17 +++++++++++++++++
 5 files changed, 73 insertions(+), 1 deletion(-)

diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 86fdc70..d3eafc5 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -221,6 +221,12 @@ static inline unsigned int kvm_get_vmid_bits(void)
 	return 8;
 }
 
+static inline void kvm_nested_s2_unmap(struct kvm_vcpu *vcpu) { }
+static inline void kvm_nested_s2_free(struct kvm *kvm) { }
+static inline void kvm_nested_s2_wp(struct kvm *kvm) { }
+static inline void kvm_nested_s2_clear(struct kvm *kvm) { }
+static inline void kvm_nested_s2_flush(struct kvm *kvm) { }
+
 static inline u64 kvm_get_vttbr(struct kvm_s2_vmid *vmid,
 				struct kvm_s2_mmu *mmu)
 {
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 452912f..7fc7a83 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -325,6 +325,11 @@ static inline unsigned int kvm_get_vmid_bits(void)
 struct kvm_nested_s2_mmu *get_nested_mmu(struct kvm_vcpu *vcpu, u64 vttbr);
 struct kvm_s2_mmu *vcpu_get_active_s2_mmu(struct kvm_vcpu *vcpu);
 void update_nested_s2_mmu(struct kvm_vcpu *vcpu);
+void kvm_nested_s2_unmap(struct kvm_vcpu *vcpu);
+void kvm_nested_s2_free(struct kvm *kvm);
+void kvm_nested_s2_wp(struct kvm *kvm);
+void kvm_nested_s2_clear(struct kvm *kvm);
+void kvm_nested_s2_flush(struct kvm *kvm);
 
 static inline u64 kvm_get_vttbr(struct kvm_s2_vmid *vmid,
 				struct kvm_s2_mmu *mmu)
diff --git a/arch/arm64/kvm/mmu-nested.c b/arch/arm64/kvm/mmu-nested.c
index c436daf..3ee20f2 100644
--- a/arch/arm64/kvm/mmu-nested.c
+++ b/arch/arm64/kvm/mmu-nested.c
@@ -1,6 +1,7 @@
 /*
  * Copyright (C) 2017 - Columbia University and Linaro Ltd.
  * Author: Jintack Lim <jintack.lim@linaro.org>
+ * Author: Christoffer Dall <cdall@cs.columbia.edu>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -21,6 +22,45 @@
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_mmu.h>
 
+/* expects kvm->mmu_lock to be held */
+void kvm_nested_s2_wp(struct kvm *kvm)
+{
+	struct kvm_nested_s2_mmu *nested_mmu;
+	struct list_head *nested_mmu_list = &kvm->arch.nested_mmu_list;
+
+	list_for_each_entry_rcu(nested_mmu, nested_mmu_list, list)
+		kvm_stage2_wp_range(kvm, &nested_mmu->mmu, 0, KVM_PHYS_SIZE);
+}
+
+/* expects kvm->mmu_lock to be held */
+void kvm_nested_s2_clear(struct kvm *kvm)
+{
+	struct kvm_nested_s2_mmu *nested_mmu;
+	struct list_head *nested_mmu_list = &kvm->arch.nested_mmu_list;
+
+	list_for_each_entry_rcu(nested_mmu, nested_mmu_list, list)
+		kvm_unmap_stage2_range(kvm, &nested_mmu->mmu, 0, KVM_PHYS_SIZE);
+}
+
+/* expects kvm->mmu_lock to be held */
+void kvm_nested_s2_flush(struct kvm *kvm)
+{
+	struct kvm_nested_s2_mmu *nested_mmu;
+	struct list_head *nested_mmu_list = &kvm->arch.nested_mmu_list;
+
+	list_for_each_entry_rcu(nested_mmu, nested_mmu_list, list)
+		kvm_stage2_flush_range(&nested_mmu->mmu, 0, KVM_PHYS_SIZE);
+}
+
+void kvm_nested_s2_free(struct kvm *kvm)
+{
+	struct kvm_nested_s2_mmu *nested_mmu;
+	struct list_head *nested_mmu_list = &kvm->arch.nested_mmu_list;
+
+	list_for_each_entry_rcu(nested_mmu, nested_mmu_list, list)
+		__kvm_free_stage2_pgd(kvm, &nested_mmu->mmu);
+}
+
 static struct kvm_nested_s2_mmu *lookup_nested_mmu(struct kvm_vcpu *vcpu,
 						   u64 vttbr)
 {
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 4548d77..08706f8 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -187,6 +187,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 	free_percpu(kvm->arch.last_vcpu_ran);
 	kvm->arch.last_vcpu_ran = NULL;
 
+	kvm_nested_s2_free(kvm);
+
 	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
 		if (kvm->vcpus[i]) {
 			kvm_arch_vcpu_free(kvm->vcpus[i]);
@@ -926,8 +928,10 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
 	 * Ensure a rebooted VM will fault in RAM pages and detect if the
 	 * guest MMU is turned off and flush the caches as needed.
 	 */
-	if (vcpu->arch.has_run_once)
+	if (vcpu->arch.has_run_once) {
 		stage2_unmap_vm(vcpu->kvm);
+		kvm_nested_s2_clear(vcpu->kvm);
+	}
 
 	vcpu_reset_hcr(vcpu);
 
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index ca10799..3143f81 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -434,6 +434,8 @@ static void stage2_flush_vm(struct kvm *kvm)
 	kvm_for_each_memslot(memslot, slots)
 		stage2_flush_memslot(&kvm->arch.mmu, memslot);
 
+	kvm_nested_s2_flush(kvm);
+
 	spin_unlock(&kvm->mmu_lock);
 	srcu_read_unlock(&kvm->srcu, idx);
 }
@@ -1268,6 +1270,7 @@ void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot)
 
 	spin_lock(&kvm->mmu_lock);
 	kvm_stage2_wp_range(kvm, &kvm->arch.mmu, start, end);
+	kvm_nested_s2_wp(kvm);
 	spin_unlock(&kvm->mmu_lock);
 	kvm_flush_remote_tlbs(kvm);
 }
@@ -1306,6 +1309,7 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
 		gfn_t gfn_offset, unsigned long mask)
 {
 	kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask);
+	kvm_nested_s2_wp(kvm);
 }
 
 static void coherent_cache_guest_page(struct kvm_vcpu *vcpu, kvm_pfn_t pfn,
@@ -1643,6 +1647,7 @@ static int handle_hva_to_gpa(struct kvm *kvm,
 static int kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
 {
 	kvm_unmap_stage2_range(kvm, &kvm->arch.mmu, gpa, size);
+	kvm_nested_s2_clear(kvm);
 	return 0;
 }
 
@@ -1682,6 +1687,7 @@ static int kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data
 	 * through this calling path.
 	 */
 	stage2_set_pte(&kvm->arch.mmu, NULL, gpa, pte, 0);
+	kvm_nested_s2_clear(kvm);
 	return 0;
 }
 
@@ -1716,6 +1722,11 @@ static int kvm_age_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
 	if (pte_none(*pte))
 		return 0;
 
+	/*
+	 * TODO: Handle nested_mmu structures here using the reverse mapping in
+	 * a later version of patch series.
+	 */
+
 	return stage2_ptep_test_and_clear_young(pte);
 }
 
@@ -1736,6 +1747,11 @@ static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *
 	if (!pte_none(*pte))		/* Just a page... */
 		return pte_young(*pte);
 
+	/*
+	 * TODO: Handle nested_mmu structures here using the reverse mapping in
+	 * a later version of patch series.
+	 */
+
 	return 0;
 }
 
@@ -1992,6 +2008,7 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
 
 	spin_lock(&kvm->mmu_lock);
 	kvm_unmap_stage2_range(kvm, &kvm->arch.mmu, gpa, size);
+	kvm_nested_s2_clear(kvm);
 	spin_unlock(&kvm->mmu_lock);
 }
 
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 10/31] KVM: arm/arm64: Unmap/flush shadow stage 2 page tables
@ 2017-10-03  3:10   ` Jintack Lim
  0 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:10 UTC (permalink / raw)
  To: linux-arm-kernel

From: Christoffer Dall <christoffer.dall@linaro.org>

Unmap/flush shadow stage 2 page tables for the nested VMs as well as the
stage 2 page table for the guest hypervisor.

Note: A bunch of the code in mmu.c relating to MMU notifiers is
currently dealt with in an extremely abrupt way, for example by clearing
out an entire shadow stage-2 table. This will be handled in a more
efficient way using the reverse mapping feature in a later version of
the patch series.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---

Notes:
    v1-->v2:
    - Removed an unnecessary iteration for each vcpu in kvm_nested_s2_all_vcpus_*()
      functions and remove all_vcpus in the function names; a list of nested mmu
      is per VM, not per vcpu.
    - Renamed kvm_nested_s2_unmap() to kvm_nested_s2_clear()
    - Renamed kvm_nested_s2_teardown() to kvm_nested_s2_free()
    - Removed the unused kvm_nested_s2_init() function.

 arch/arm/include/asm/kvm_mmu.h   |  6 ++++++
 arch/arm64/include/asm/kvm_mmu.h |  5 +++++
 arch/arm64/kvm/mmu-nested.c      | 40 ++++++++++++++++++++++++++++++++++++++++
 virt/kvm/arm/arm.c               |  6 +++++-
 virt/kvm/arm/mmu.c               | 17 +++++++++++++++++
 5 files changed, 73 insertions(+), 1 deletion(-)

diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 86fdc70..d3eafc5 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -221,6 +221,12 @@ static inline unsigned int kvm_get_vmid_bits(void)
 	return 8;
 }
 
+static inline void kvm_nested_s2_unmap(struct kvm_vcpu *vcpu) { }
+static inline void kvm_nested_s2_free(struct kvm *kvm) { }
+static inline void kvm_nested_s2_wp(struct kvm *kvm) { }
+static inline void kvm_nested_s2_clear(struct kvm *kvm) { }
+static inline void kvm_nested_s2_flush(struct kvm *kvm) { }
+
 static inline u64 kvm_get_vttbr(struct kvm_s2_vmid *vmid,
 				struct kvm_s2_mmu *mmu)
 {
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 452912f..7fc7a83 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -325,6 +325,11 @@ static inline unsigned int kvm_get_vmid_bits(void)
 struct kvm_nested_s2_mmu *get_nested_mmu(struct kvm_vcpu *vcpu, u64 vttbr);
 struct kvm_s2_mmu *vcpu_get_active_s2_mmu(struct kvm_vcpu *vcpu);
 void update_nested_s2_mmu(struct kvm_vcpu *vcpu);
+void kvm_nested_s2_unmap(struct kvm_vcpu *vcpu);
+void kvm_nested_s2_free(struct kvm *kvm);
+void kvm_nested_s2_wp(struct kvm *kvm);
+void kvm_nested_s2_clear(struct kvm *kvm);
+void kvm_nested_s2_flush(struct kvm *kvm);
 
 static inline u64 kvm_get_vttbr(struct kvm_s2_vmid *vmid,
 				struct kvm_s2_mmu *mmu)
diff --git a/arch/arm64/kvm/mmu-nested.c b/arch/arm64/kvm/mmu-nested.c
index c436daf..3ee20f2 100644
--- a/arch/arm64/kvm/mmu-nested.c
+++ b/arch/arm64/kvm/mmu-nested.c
@@ -1,6 +1,7 @@
 /*
  * Copyright (C) 2017 - Columbia University and Linaro Ltd.
  * Author: Jintack Lim <jintack.lim@linaro.org>
+ * Author: Christoffer Dall <cdall@cs.columbia.edu>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -21,6 +22,45 @@
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_mmu.h>
 
+/* expects kvm->mmu_lock to be held */
+void kvm_nested_s2_wp(struct kvm *kvm)
+{
+	struct kvm_nested_s2_mmu *nested_mmu;
+	struct list_head *nested_mmu_list = &kvm->arch.nested_mmu_list;
+
+	list_for_each_entry_rcu(nested_mmu, nested_mmu_list, list)
+		kvm_stage2_wp_range(kvm, &nested_mmu->mmu, 0, KVM_PHYS_SIZE);
+}
+
+/* expects kvm->mmu_lock to be held */
+void kvm_nested_s2_clear(struct kvm *kvm)
+{
+	struct kvm_nested_s2_mmu *nested_mmu;
+	struct list_head *nested_mmu_list = &kvm->arch.nested_mmu_list;
+
+	list_for_each_entry_rcu(nested_mmu, nested_mmu_list, list)
+		kvm_unmap_stage2_range(kvm, &nested_mmu->mmu, 0, KVM_PHYS_SIZE);
+}
+
+/* expects kvm->mmu_lock to be held */
+void kvm_nested_s2_flush(struct kvm *kvm)
+{
+	struct kvm_nested_s2_mmu *nested_mmu;
+	struct list_head *nested_mmu_list = &kvm->arch.nested_mmu_list;
+
+	list_for_each_entry_rcu(nested_mmu, nested_mmu_list, list)
+		kvm_stage2_flush_range(&nested_mmu->mmu, 0, KVM_PHYS_SIZE);
+}
+
+void kvm_nested_s2_free(struct kvm *kvm)
+{
+	struct kvm_nested_s2_mmu *nested_mmu;
+	struct list_head *nested_mmu_list = &kvm->arch.nested_mmu_list;
+
+	list_for_each_entry_rcu(nested_mmu, nested_mmu_list, list)
+		__kvm_free_stage2_pgd(kvm, &nested_mmu->mmu);
+}
+
 static struct kvm_nested_s2_mmu *lookup_nested_mmu(struct kvm_vcpu *vcpu,
 						   u64 vttbr)
 {
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 4548d77..08706f8 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -187,6 +187,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 	free_percpu(kvm->arch.last_vcpu_ran);
 	kvm->arch.last_vcpu_ran = NULL;
 
+	kvm_nested_s2_free(kvm);
+
 	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
 		if (kvm->vcpus[i]) {
 			kvm_arch_vcpu_free(kvm->vcpus[i]);
@@ -926,8 +928,10 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
 	 * Ensure a rebooted VM will fault in RAM pages and detect if the
 	 * guest MMU is turned off and flush the caches as needed.
 	 */
-	if (vcpu->arch.has_run_once)
+	if (vcpu->arch.has_run_once) {
 		stage2_unmap_vm(vcpu->kvm);
+		kvm_nested_s2_clear(vcpu->kvm);
+	}
 
 	vcpu_reset_hcr(vcpu);
 
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index ca10799..3143f81 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -434,6 +434,8 @@ static void stage2_flush_vm(struct kvm *kvm)
 	kvm_for_each_memslot(memslot, slots)
 		stage2_flush_memslot(&kvm->arch.mmu, memslot);
 
+	kvm_nested_s2_flush(kvm);
+
 	spin_unlock(&kvm->mmu_lock);
 	srcu_read_unlock(&kvm->srcu, idx);
 }
@@ -1268,6 +1270,7 @@ void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot)
 
 	spin_lock(&kvm->mmu_lock);
 	kvm_stage2_wp_range(kvm, &kvm->arch.mmu, start, end);
+	kvm_nested_s2_wp(kvm);
 	spin_unlock(&kvm->mmu_lock);
 	kvm_flush_remote_tlbs(kvm);
 }
@@ -1306,6 +1309,7 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
 		gfn_t gfn_offset, unsigned long mask)
 {
 	kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask);
+	kvm_nested_s2_wp(kvm);
 }
 
 static void coherent_cache_guest_page(struct kvm_vcpu *vcpu, kvm_pfn_t pfn,
@@ -1643,6 +1647,7 @@ static int handle_hva_to_gpa(struct kvm *kvm,
 static int kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
 {
 	kvm_unmap_stage2_range(kvm, &kvm->arch.mmu, gpa, size);
+	kvm_nested_s2_clear(kvm);
 	return 0;
 }
 
@@ -1682,6 +1687,7 @@ static int kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data
 	 * through this calling path.
 	 */
 	stage2_set_pte(&kvm->arch.mmu, NULL, gpa, pte, 0);
+	kvm_nested_s2_clear(kvm);
 	return 0;
 }
 
@@ -1716,6 +1722,11 @@ static int kvm_age_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
 	if (pte_none(*pte))
 		return 0;
 
+	/*
+	 * TODO: Handle nested_mmu structures here using the reverse mapping in
+	 * a later version of patch series.
+	 */
+
 	return stage2_ptep_test_and_clear_young(pte);
 }
 
@@ -1736,6 +1747,11 @@ static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *
 	if (!pte_none(*pte))		/* Just a page... */
 		return pte_young(*pte);
 
+	/*
+	 * TODO: Handle nested_mmu structures here using the reverse mapping in
+	 * a later version of patch series.
+	 */
+
 	return 0;
 }
 
@@ -1992,6 +2008,7 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
 
 	spin_lock(&kvm->mmu_lock);
 	kvm_unmap_stage2_range(kvm, &kvm->arch.mmu, gpa, size);
+	kvm_nested_s2_clear(kvm);
 	spin_unlock(&kvm->mmu_lock);
 }
 
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 11/31] KVM: arm64: Implement nested Stage-2 page table walk logic
  2017-10-03  3:10 ` Jintack Lim
  (?)
@ 2017-10-03  3:10   ` Jintack Lim
  -1 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:10 UTC (permalink / raw)
  To: christoffer.dall, marc.zyngier, kvmarm
  Cc: jintack, pbonzini, rkrcmar, catalin.marinas, will.deacon, linux,
	mark.rutland, linux-arm-kernel, kvm, linux-kernel, Jintack Lim

From: Christoffer Dall <christoffer.dall@linaro.org>

Based on the pseudo-code in the ARM ARM, implement a stage 2 software
page table walker.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---

Notes:
    v1-->v2:
    - Handled different endianness between the host and the guest hypervisor
    - Decoupled the stage-2 PTW from injecting exceptions. This will come in handy
      when we just want to walk the page table.
    - Added esr and upper_attr fields in kvm_s2_trans struct
    - Reworked pa_max() to have KVM_PHYS_SHIFT
    - Updated comment about the continuous bits

 arch/arm/include/asm/kvm_mmu.h   |  16 +++
 arch/arm64/include/asm/esr.h     |   1 +
 arch/arm64/include/asm/kvm_arm.h |   3 +
 arch/arm64/include/asm/kvm_mmu.h |  12 ++
 arch/arm64/kvm/mmu-nested.c      | 241 +++++++++++++++++++++++++++++++++++++++
 5 files changed, 273 insertions(+)

diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index d3eafc5..5fab21a 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -221,6 +221,22 @@ static inline unsigned int kvm_get_vmid_bits(void)
 	return 8;
 }
 
+struct kvm_s2_trans {
+	phys_addr_t output;
+	phys_addr_t block_size;
+	bool writable;
+	bool readable;
+	int level;
+	u32 esr;
+	u64 upper_attr;
+};
+
+static inline int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa,
+				     struct kvm_s2_trans *result)
+{
+	return 0;
+}
+
 static inline void kvm_nested_s2_unmap(struct kvm_vcpu *vcpu) { }
 static inline void kvm_nested_s2_free(struct kvm *kvm) { }
 static inline void kvm_nested_s2_wp(struct kvm *kvm) { }
diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index 210fde6..bc6610b 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -108,6 +108,7 @@
 #define ESR_ELx_CM 		(UL(1) << 8)
 
 /* ISS field definitions for exceptions taken in to Hyp */
+#define ESR_ELx_FSC_ADDRSZ	(0x00)
 #define ESR_ELx_CV		(UL(1) << 24)
 #define ESR_ELx_COND_SHIFT	(20)
 #define ESR_ELx_COND_MASK	(UL(0xF) << ESR_ELx_COND_SHIFT)
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index a1274b7..3993703 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -104,6 +104,7 @@
 #define VTCR_EL2_RES1		(1 << 31)
 #define VTCR_EL2_HD		(1 << 22)
 #define VTCR_EL2_HA		(1 << 21)
+#define VTCR_EL2_PS_SHIFT	TCR_EL2_PS_SHIFT
 #define VTCR_EL2_PS_MASK	TCR_EL2_PS_MASK
 #define VTCR_EL2_TG0_MASK	TCR_TG0_MASK
 #define VTCR_EL2_TG0_4K		TCR_TG0_4K
@@ -177,6 +178,8 @@
 #define VTTBR_VMID_SHIFT  (UL(48))
 #define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT)
 
+#define SCTLR_EE	(UL(1) << 25)
+
 /* Hyp System Trap Register */
 #define HSTR_EL2_T(x)	(1 << x)
 
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 7fc7a83..c4efcd5 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -322,9 +322,21 @@ static inline unsigned int kvm_get_vmid_bits(void)
 	return (cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR1_VMIDBITS_SHIFT) == 2) ? 16 : 8;
 }
 
+struct kvm_s2_trans {
+	phys_addr_t output;
+	phys_addr_t block_size;
+	bool writable;
+	bool readable;
+	int level;
+	u32 esr;
+	u64 upper_attr;
+};
+
 struct kvm_nested_s2_mmu *get_nested_mmu(struct kvm_vcpu *vcpu, u64 vttbr);
 struct kvm_s2_mmu *vcpu_get_active_s2_mmu(struct kvm_vcpu *vcpu);
 void update_nested_s2_mmu(struct kvm_vcpu *vcpu);
+int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa,
+		       struct kvm_s2_trans *result);
 void kvm_nested_s2_unmap(struct kvm_vcpu *vcpu);
 void kvm_nested_s2_free(struct kvm *kvm);
 void kvm_nested_s2_wp(struct kvm *kvm);
diff --git a/arch/arm64/kvm/mmu-nested.c b/arch/arm64/kvm/mmu-nested.c
index 3ee20f2..fb694b7 100644
--- a/arch/arm64/kvm/mmu-nested.c
+++ b/arch/arm64/kvm/mmu-nested.c
@@ -22,6 +22,247 @@
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_mmu.h>
 
+struct s2_walk_info {
+	unsigned int pgshift;
+	unsigned int pgsize;
+	unsigned int ps;
+	unsigned int sl;
+	unsigned int t0sz;
+};
+
+static unsigned int ps_to_output_size(unsigned int ps)
+{
+	switch (ps) {
+	case 0: return 32;
+	case 1: return 36;
+	case 2: return 40;
+	case 3: return 42;
+	case 4: return 44;
+	case 5:
+	default:
+		return 48;
+	}
+}
+
+static unsigned int pa_max(void)
+{
+	 /* We always emulate a VM with maximum PA size of KVM_PHYS_SIZE. */
+	return KVM_PHYS_SHIFT;
+}
+
+static int esr_s2_fault(struct kvm_vcpu *vcpu, int level, u32 fsc)
+{
+	u32 esr;
+
+	esr = kvm_vcpu_get_hsr(vcpu) & ~ESR_ELx_FSC;
+	esr |= fsc;
+	esr |= level & 0x3;
+	return esr;
+}
+
+static int check_base_s2_limits(struct kvm_vcpu *vcpu, struct s2_walk_info *wi,
+				int level, int input_size, int stride)
+{
+	int start_size;
+
+	/* Check translation limits */
+	switch (wi->pgsize) {
+	case SZ_64K:
+		if (level == 0 || (level == 1 && pa_max() <= 42))
+			return -EFAULT;
+		break;
+	case SZ_16K:
+		if (level == 0 || (level == 1 && pa_max() <= 40))
+			return -EFAULT;
+		break;
+	case SZ_4K:
+		if (level < 0 || (level == 0 && pa_max() <= 42))
+			return -EFAULT;
+		break;
+	}
+
+	/* Check input size limits */
+	if (input_size > pa_max() &&
+	    (!vcpu_mode_is_32bit(vcpu) || input_size > 40))
+		return -EFAULT;
+
+	/* Check number of entries in starting level table */
+	start_size = input_size - ((3 - level) * stride + wi->pgshift);
+	if (start_size < 1 || start_size > stride + 4)
+		return -EFAULT;
+
+	return 0;
+}
+
+/* Check if output is within boundaries */
+static int check_output_size(struct kvm_vcpu *vcpu, struct s2_walk_info *wi,
+			     phys_addr_t output)
+{
+	unsigned int output_size = ps_to_output_size(wi->ps);
+
+	if (output_size > pa_max())
+		output_size = pa_max();
+
+	if (output_size != 48 && (output & GENMASK_ULL(47, output_size)))
+		return -1;
+
+	return 0;
+}
+
+/*
+ * This is essentially a C-version of the pseudo code from the ARM ARM
+ * AArch64.TranslationTableWalk  function.  I strongly recommend looking at
+ * that pseudocode in trying to understand this.
+ *
+ * Must be called with the kvm->srcy read lock held
+ */
+static int walk_nested_s2_pgd(struct kvm_vcpu *vcpu, phys_addr_t ipa,
+			      struct s2_walk_info *wi, struct kvm_s2_trans *out)
+{
+	u64 vttbr = vcpu->arch.ctxt.sys_regs[VTTBR_EL2];
+	int first_block_level, level, stride, input_size, base_lower_bound;
+	phys_addr_t base_addr;
+	unsigned int addr_top, addr_bottom;
+	u64 desc;  /* page table entry */
+	int ret;
+	phys_addr_t paddr;
+
+	switch (wi->pgsize) {
+	case SZ_64K:
+	case SZ_16K:
+		level = 3 - wi->sl;
+		first_block_level = 2;
+		break;
+	case SZ_4K:
+		level = 2 - wi->sl;
+		first_block_level = 1;
+		break;
+	default:
+		/* GCC is braindead */
+		WARN(1, "Page size is none of 4K, 16K or 64K");
+	}
+
+	stride = wi->pgshift - 3;
+	input_size = 64 - wi->t0sz;
+	if (input_size > 48 || input_size < 25)
+		return -EFAULT;
+
+	ret = check_base_s2_limits(vcpu, wi, level, input_size, stride);
+	if (WARN_ON(ret))
+		return ret;
+
+	if (check_output_size(vcpu, wi, vttbr)) {
+		out->esr = esr_s2_fault(vcpu, level, ESR_ELx_FSC_ADDRSZ);
+		return 1;
+	}
+
+	base_lower_bound = 3 + input_size - ((3 - level) * stride +
+			   wi->pgshift);
+	base_addr = vttbr & GENMASK_ULL(47, base_lower_bound);
+
+	addr_top = input_size - 1;
+
+	while (1) {
+		phys_addr_t index;
+
+		addr_bottom = (3 - level) * stride + wi->pgshift;
+		index = (ipa & GENMASK_ULL(addr_top, addr_bottom))
+			>> (addr_bottom - 3);
+
+		paddr = base_addr | index;
+		ret = kvm_read_guest(vcpu->kvm, paddr, &desc, sizeof(desc));
+		if (ret < 0)
+			return ret;
+
+		/*
+		 * Handle reversedescriptors if endianness differs between the
+		 * host and the guest hypervisor.
+		 */
+		if (vcpu_sys_reg(vcpu, SCTLR_EL2) & SCTLR_EE)
+			desc = be64_to_cpu(desc);
+		else
+			desc = le64_to_cpu(desc);
+
+		/* Check for valid descriptor at this point */
+		if (!(desc & 1) || ((desc & 3) == 1 && level == 3)) {
+			out->esr = esr_s2_fault(vcpu, level, ESR_ELx_FSC_FAULT);
+			return 1;
+		}
+
+		/* We're at the final level or block translation level */
+		if ((desc & 3) == 1 || level == 3)
+			break;
+
+		if (check_output_size(vcpu, wi, desc)) {
+			out->esr = esr_s2_fault(vcpu, level, ESR_ELx_FSC_ADDRSZ);
+			return 1;
+		}
+
+		base_addr = desc & GENMASK_ULL(47, wi->pgshift);
+
+		level += 1;
+		addr_top = addr_bottom - 1;
+	}
+
+	if (level < first_block_level) {
+		out->esr = esr_s2_fault(vcpu, level, ESR_ELx_FSC_FAULT);
+		return 1;
+	}
+
+	/*
+	 * We don't use the contiguous bit in the stage-2 ptes, so skip check
+	 * for misprogramming of the contiguous bit.
+	 */
+
+	if (check_output_size(vcpu, wi, desc)) {
+		out->esr = esr_s2_fault(vcpu, level, ESR_ELx_FSC_ADDRSZ);
+		return 1;
+	}
+
+	if (!(desc & BIT(10))) {
+		out->esr = esr_s2_fault(vcpu, level, ESR_ELx_FSC_ACCESS);
+		return 1;
+	}
+
+	/* Calculate and return the result */
+	paddr = (desc & GENMASK_ULL(47, addr_bottom)) |
+		(ipa & GENMASK_ULL(addr_bottom - 1, 0));
+	out->output = paddr;
+	out->block_size = 1UL << ((3 - level) * stride + wi->pgshift);
+	out->readable = desc & (0b01 << 6);
+	out->writable = desc & (0b10 << 6);
+	out->level = level;
+	out->upper_attr = desc & GENMASK_ULL(63, 52);
+	return 0;
+}
+
+int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa,
+		       struct kvm_s2_trans *result)
+{
+	u64 vtcr = vcpu->arch.ctxt.sys_regs[VTCR_EL2];
+	struct s2_walk_info wi;
+
+	if (!nested_virt_in_use(vcpu))
+		return 0;
+
+	wi.t0sz = vtcr & TCR_EL2_T0SZ_MASK;
+
+	switch (vtcr & VTCR_EL2_TG0_MASK) {
+	case VTCR_EL2_TG0_4K:
+		wi.pgshift = 12;	 break;
+	case VTCR_EL2_TG0_16K:
+		wi.pgshift = 14;	 break;
+	case VTCR_EL2_TG0_64K:
+	default:
+		wi.pgshift = 16;	 break;
+	}
+	wi.pgsize = 1UL << wi.pgshift;
+	wi.ps = (vtcr & VTCR_EL2_PS_MASK) >> VTCR_EL2_PS_SHIFT;
+	wi.sl = (vtcr & VTCR_EL2_SL0_MASK) >> VTCR_EL2_SL0_SHIFT;
+
+	return walk_nested_s2_pgd(vcpu, gipa, &wi, result);
+}
+
 /* expects kvm->mmu_lock to be held */
 void kvm_nested_s2_wp(struct kvm *kvm)
 {
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 11/31] KVM: arm64: Implement nested Stage-2 page table walk logic
@ 2017-10-03  3:10   ` Jintack Lim
  0 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:10 UTC (permalink / raw)
  To: christoffer.dall, marc.zyngier, kvmarm
  Cc: kvm, catalin.marinas, will.deacon, linux, linux-kernel, pbonzini,
	linux-arm-kernel

From: Christoffer Dall <christoffer.dall@linaro.org>

Based on the pseudo-code in the ARM ARM, implement a stage 2 software
page table walker.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---

Notes:
    v1-->v2:
    - Handled different endianness between the host and the guest hypervisor
    - Decoupled the stage-2 PTW from injecting exceptions. This will come in handy
      when we just want to walk the page table.
    - Added esr and upper_attr fields in kvm_s2_trans struct
    - Reworked pa_max() to have KVM_PHYS_SHIFT
    - Updated comment about the continuous bits

 arch/arm/include/asm/kvm_mmu.h   |  16 +++
 arch/arm64/include/asm/esr.h     |   1 +
 arch/arm64/include/asm/kvm_arm.h |   3 +
 arch/arm64/include/asm/kvm_mmu.h |  12 ++
 arch/arm64/kvm/mmu-nested.c      | 241 +++++++++++++++++++++++++++++++++++++++
 5 files changed, 273 insertions(+)

diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index d3eafc5..5fab21a 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -221,6 +221,22 @@ static inline unsigned int kvm_get_vmid_bits(void)
 	return 8;
 }
 
+struct kvm_s2_trans {
+	phys_addr_t output;
+	phys_addr_t block_size;
+	bool writable;
+	bool readable;
+	int level;
+	u32 esr;
+	u64 upper_attr;
+};
+
+static inline int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa,
+				     struct kvm_s2_trans *result)
+{
+	return 0;
+}
+
 static inline void kvm_nested_s2_unmap(struct kvm_vcpu *vcpu) { }
 static inline void kvm_nested_s2_free(struct kvm *kvm) { }
 static inline void kvm_nested_s2_wp(struct kvm *kvm) { }
diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index 210fde6..bc6610b 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -108,6 +108,7 @@
 #define ESR_ELx_CM 		(UL(1) << 8)
 
 /* ISS field definitions for exceptions taken in to Hyp */
+#define ESR_ELx_FSC_ADDRSZ	(0x00)
 #define ESR_ELx_CV		(UL(1) << 24)
 #define ESR_ELx_COND_SHIFT	(20)
 #define ESR_ELx_COND_MASK	(UL(0xF) << ESR_ELx_COND_SHIFT)
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index a1274b7..3993703 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -104,6 +104,7 @@
 #define VTCR_EL2_RES1		(1 << 31)
 #define VTCR_EL2_HD		(1 << 22)
 #define VTCR_EL2_HA		(1 << 21)
+#define VTCR_EL2_PS_SHIFT	TCR_EL2_PS_SHIFT
 #define VTCR_EL2_PS_MASK	TCR_EL2_PS_MASK
 #define VTCR_EL2_TG0_MASK	TCR_TG0_MASK
 #define VTCR_EL2_TG0_4K		TCR_TG0_4K
@@ -177,6 +178,8 @@
 #define VTTBR_VMID_SHIFT  (UL(48))
 #define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT)
 
+#define SCTLR_EE	(UL(1) << 25)
+
 /* Hyp System Trap Register */
 #define HSTR_EL2_T(x)	(1 << x)
 
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 7fc7a83..c4efcd5 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -322,9 +322,21 @@ static inline unsigned int kvm_get_vmid_bits(void)
 	return (cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR1_VMIDBITS_SHIFT) == 2) ? 16 : 8;
 }
 
+struct kvm_s2_trans {
+	phys_addr_t output;
+	phys_addr_t block_size;
+	bool writable;
+	bool readable;
+	int level;
+	u32 esr;
+	u64 upper_attr;
+};
+
 struct kvm_nested_s2_mmu *get_nested_mmu(struct kvm_vcpu *vcpu, u64 vttbr);
 struct kvm_s2_mmu *vcpu_get_active_s2_mmu(struct kvm_vcpu *vcpu);
 void update_nested_s2_mmu(struct kvm_vcpu *vcpu);
+int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa,
+		       struct kvm_s2_trans *result);
 void kvm_nested_s2_unmap(struct kvm_vcpu *vcpu);
 void kvm_nested_s2_free(struct kvm *kvm);
 void kvm_nested_s2_wp(struct kvm *kvm);
diff --git a/arch/arm64/kvm/mmu-nested.c b/arch/arm64/kvm/mmu-nested.c
index 3ee20f2..fb694b7 100644
--- a/arch/arm64/kvm/mmu-nested.c
+++ b/arch/arm64/kvm/mmu-nested.c
@@ -22,6 +22,247 @@
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_mmu.h>
 
+struct s2_walk_info {
+	unsigned int pgshift;
+	unsigned int pgsize;
+	unsigned int ps;
+	unsigned int sl;
+	unsigned int t0sz;
+};
+
+static unsigned int ps_to_output_size(unsigned int ps)
+{
+	switch (ps) {
+	case 0: return 32;
+	case 1: return 36;
+	case 2: return 40;
+	case 3: return 42;
+	case 4: return 44;
+	case 5:
+	default:
+		return 48;
+	}
+}
+
+static unsigned int pa_max(void)
+{
+	 /* We always emulate a VM with maximum PA size of KVM_PHYS_SIZE. */
+	return KVM_PHYS_SHIFT;
+}
+
+static int esr_s2_fault(struct kvm_vcpu *vcpu, int level, u32 fsc)
+{
+	u32 esr;
+
+	esr = kvm_vcpu_get_hsr(vcpu) & ~ESR_ELx_FSC;
+	esr |= fsc;
+	esr |= level & 0x3;
+	return esr;
+}
+
+static int check_base_s2_limits(struct kvm_vcpu *vcpu, struct s2_walk_info *wi,
+				int level, int input_size, int stride)
+{
+	int start_size;
+
+	/* Check translation limits */
+	switch (wi->pgsize) {
+	case SZ_64K:
+		if (level == 0 || (level == 1 && pa_max() <= 42))
+			return -EFAULT;
+		break;
+	case SZ_16K:
+		if (level == 0 || (level == 1 && pa_max() <= 40))
+			return -EFAULT;
+		break;
+	case SZ_4K:
+		if (level < 0 || (level == 0 && pa_max() <= 42))
+			return -EFAULT;
+		break;
+	}
+
+	/* Check input size limits */
+	if (input_size > pa_max() &&
+	    (!vcpu_mode_is_32bit(vcpu) || input_size > 40))
+		return -EFAULT;
+
+	/* Check number of entries in starting level table */
+	start_size = input_size - ((3 - level) * stride + wi->pgshift);
+	if (start_size < 1 || start_size > stride + 4)
+		return -EFAULT;
+
+	return 0;
+}
+
+/* Check if output is within boundaries */
+static int check_output_size(struct kvm_vcpu *vcpu, struct s2_walk_info *wi,
+			     phys_addr_t output)
+{
+	unsigned int output_size = ps_to_output_size(wi->ps);
+
+	if (output_size > pa_max())
+		output_size = pa_max();
+
+	if (output_size != 48 && (output & GENMASK_ULL(47, output_size)))
+		return -1;
+
+	return 0;
+}
+
+/*
+ * This is essentially a C-version of the pseudo code from the ARM ARM
+ * AArch64.TranslationTableWalk  function.  I strongly recommend looking at
+ * that pseudocode in trying to understand this.
+ *
+ * Must be called with the kvm->srcy read lock held
+ */
+static int walk_nested_s2_pgd(struct kvm_vcpu *vcpu, phys_addr_t ipa,
+			      struct s2_walk_info *wi, struct kvm_s2_trans *out)
+{
+	u64 vttbr = vcpu->arch.ctxt.sys_regs[VTTBR_EL2];
+	int first_block_level, level, stride, input_size, base_lower_bound;
+	phys_addr_t base_addr;
+	unsigned int addr_top, addr_bottom;
+	u64 desc;  /* page table entry */
+	int ret;
+	phys_addr_t paddr;
+
+	switch (wi->pgsize) {
+	case SZ_64K:
+	case SZ_16K:
+		level = 3 - wi->sl;
+		first_block_level = 2;
+		break;
+	case SZ_4K:
+		level = 2 - wi->sl;
+		first_block_level = 1;
+		break;
+	default:
+		/* GCC is braindead */
+		WARN(1, "Page size is none of 4K, 16K or 64K");
+	}
+
+	stride = wi->pgshift - 3;
+	input_size = 64 - wi->t0sz;
+	if (input_size > 48 || input_size < 25)
+		return -EFAULT;
+
+	ret = check_base_s2_limits(vcpu, wi, level, input_size, stride);
+	if (WARN_ON(ret))
+		return ret;
+
+	if (check_output_size(vcpu, wi, vttbr)) {
+		out->esr = esr_s2_fault(vcpu, level, ESR_ELx_FSC_ADDRSZ);
+		return 1;
+	}
+
+	base_lower_bound = 3 + input_size - ((3 - level) * stride +
+			   wi->pgshift);
+	base_addr = vttbr & GENMASK_ULL(47, base_lower_bound);
+
+	addr_top = input_size - 1;
+
+	while (1) {
+		phys_addr_t index;
+
+		addr_bottom = (3 - level) * stride + wi->pgshift;
+		index = (ipa & GENMASK_ULL(addr_top, addr_bottom))
+			>> (addr_bottom - 3);
+
+		paddr = base_addr | index;
+		ret = kvm_read_guest(vcpu->kvm, paddr, &desc, sizeof(desc));
+		if (ret < 0)
+			return ret;
+
+		/*
+		 * Handle reversedescriptors if endianness differs between the
+		 * host and the guest hypervisor.
+		 */
+		if (vcpu_sys_reg(vcpu, SCTLR_EL2) & SCTLR_EE)
+			desc = be64_to_cpu(desc);
+		else
+			desc = le64_to_cpu(desc);
+
+		/* Check for valid descriptor at this point */
+		if (!(desc & 1) || ((desc & 3) == 1 && level == 3)) {
+			out->esr = esr_s2_fault(vcpu, level, ESR_ELx_FSC_FAULT);
+			return 1;
+		}
+
+		/* We're at the final level or block translation level */
+		if ((desc & 3) == 1 || level == 3)
+			break;
+
+		if (check_output_size(vcpu, wi, desc)) {
+			out->esr = esr_s2_fault(vcpu, level, ESR_ELx_FSC_ADDRSZ);
+			return 1;
+		}
+
+		base_addr = desc & GENMASK_ULL(47, wi->pgshift);
+
+		level += 1;
+		addr_top = addr_bottom - 1;
+	}
+
+	if (level < first_block_level) {
+		out->esr = esr_s2_fault(vcpu, level, ESR_ELx_FSC_FAULT);
+		return 1;
+	}
+
+	/*
+	 * We don't use the contiguous bit in the stage-2 ptes, so skip check
+	 * for misprogramming of the contiguous bit.
+	 */
+
+	if (check_output_size(vcpu, wi, desc)) {
+		out->esr = esr_s2_fault(vcpu, level, ESR_ELx_FSC_ADDRSZ);
+		return 1;
+	}
+
+	if (!(desc & BIT(10))) {
+		out->esr = esr_s2_fault(vcpu, level, ESR_ELx_FSC_ACCESS);
+		return 1;
+	}
+
+	/* Calculate and return the result */
+	paddr = (desc & GENMASK_ULL(47, addr_bottom)) |
+		(ipa & GENMASK_ULL(addr_bottom - 1, 0));
+	out->output = paddr;
+	out->block_size = 1UL << ((3 - level) * stride + wi->pgshift);
+	out->readable = desc & (0b01 << 6);
+	out->writable = desc & (0b10 << 6);
+	out->level = level;
+	out->upper_attr = desc & GENMASK_ULL(63, 52);
+	return 0;
+}
+
+int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa,
+		       struct kvm_s2_trans *result)
+{
+	u64 vtcr = vcpu->arch.ctxt.sys_regs[VTCR_EL2];
+	struct s2_walk_info wi;
+
+	if (!nested_virt_in_use(vcpu))
+		return 0;
+
+	wi.t0sz = vtcr & TCR_EL2_T0SZ_MASK;
+
+	switch (vtcr & VTCR_EL2_TG0_MASK) {
+	case VTCR_EL2_TG0_4K:
+		wi.pgshift = 12;	 break;
+	case VTCR_EL2_TG0_16K:
+		wi.pgshift = 14;	 break;
+	case VTCR_EL2_TG0_64K:
+	default:
+		wi.pgshift = 16;	 break;
+	}
+	wi.pgsize = 1UL << wi.pgshift;
+	wi.ps = (vtcr & VTCR_EL2_PS_MASK) >> VTCR_EL2_PS_SHIFT;
+	wi.sl = (vtcr & VTCR_EL2_SL0_MASK) >> VTCR_EL2_SL0_SHIFT;
+
+	return walk_nested_s2_pgd(vcpu, gipa, &wi, result);
+}
+
 /* expects kvm->mmu_lock to be held */
 void kvm_nested_s2_wp(struct kvm *kvm)
 {
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 11/31] KVM: arm64: Implement nested Stage-2 page table walk logic
@ 2017-10-03  3:10   ` Jintack Lim
  0 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:10 UTC (permalink / raw)
  To: linux-arm-kernel

From: Christoffer Dall <christoffer.dall@linaro.org>

Based on the pseudo-code in the ARM ARM, implement a stage 2 software
page table walker.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---

Notes:
    v1-->v2:
    - Handled different endianness between the host and the guest hypervisor
    - Decoupled the stage-2 PTW from injecting exceptions. This will come in handy
      when we just want to walk the page table.
    - Added esr and upper_attr fields in kvm_s2_trans struct
    - Reworked pa_max() to have KVM_PHYS_SHIFT
    - Updated comment about the continuous bits

 arch/arm/include/asm/kvm_mmu.h   |  16 +++
 arch/arm64/include/asm/esr.h     |   1 +
 arch/arm64/include/asm/kvm_arm.h |   3 +
 arch/arm64/include/asm/kvm_mmu.h |  12 ++
 arch/arm64/kvm/mmu-nested.c      | 241 +++++++++++++++++++++++++++++++++++++++
 5 files changed, 273 insertions(+)

diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index d3eafc5..5fab21a 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -221,6 +221,22 @@ static inline unsigned int kvm_get_vmid_bits(void)
 	return 8;
 }
 
+struct kvm_s2_trans {
+	phys_addr_t output;
+	phys_addr_t block_size;
+	bool writable;
+	bool readable;
+	int level;
+	u32 esr;
+	u64 upper_attr;
+};
+
+static inline int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa,
+				     struct kvm_s2_trans *result)
+{
+	return 0;
+}
+
 static inline void kvm_nested_s2_unmap(struct kvm_vcpu *vcpu) { }
 static inline void kvm_nested_s2_free(struct kvm *kvm) { }
 static inline void kvm_nested_s2_wp(struct kvm *kvm) { }
diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index 210fde6..bc6610b 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -108,6 +108,7 @@
 #define ESR_ELx_CM 		(UL(1) << 8)
 
 /* ISS field definitions for exceptions taken in to Hyp */
+#define ESR_ELx_FSC_ADDRSZ	(0x00)
 #define ESR_ELx_CV		(UL(1) << 24)
 #define ESR_ELx_COND_SHIFT	(20)
 #define ESR_ELx_COND_MASK	(UL(0xF) << ESR_ELx_COND_SHIFT)
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index a1274b7..3993703 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -104,6 +104,7 @@
 #define VTCR_EL2_RES1		(1 << 31)
 #define VTCR_EL2_HD		(1 << 22)
 #define VTCR_EL2_HA		(1 << 21)
+#define VTCR_EL2_PS_SHIFT	TCR_EL2_PS_SHIFT
 #define VTCR_EL2_PS_MASK	TCR_EL2_PS_MASK
 #define VTCR_EL2_TG0_MASK	TCR_TG0_MASK
 #define VTCR_EL2_TG0_4K		TCR_TG0_4K
@@ -177,6 +178,8 @@
 #define VTTBR_VMID_SHIFT  (UL(48))
 #define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT)
 
+#define SCTLR_EE	(UL(1) << 25)
+
 /* Hyp System Trap Register */
 #define HSTR_EL2_T(x)	(1 << x)
 
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 7fc7a83..c4efcd5 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -322,9 +322,21 @@ static inline unsigned int kvm_get_vmid_bits(void)
 	return (cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR1_VMIDBITS_SHIFT) == 2) ? 16 : 8;
 }
 
+struct kvm_s2_trans {
+	phys_addr_t output;
+	phys_addr_t block_size;
+	bool writable;
+	bool readable;
+	int level;
+	u32 esr;
+	u64 upper_attr;
+};
+
 struct kvm_nested_s2_mmu *get_nested_mmu(struct kvm_vcpu *vcpu, u64 vttbr);
 struct kvm_s2_mmu *vcpu_get_active_s2_mmu(struct kvm_vcpu *vcpu);
 void update_nested_s2_mmu(struct kvm_vcpu *vcpu);
+int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa,
+		       struct kvm_s2_trans *result);
 void kvm_nested_s2_unmap(struct kvm_vcpu *vcpu);
 void kvm_nested_s2_free(struct kvm *kvm);
 void kvm_nested_s2_wp(struct kvm *kvm);
diff --git a/arch/arm64/kvm/mmu-nested.c b/arch/arm64/kvm/mmu-nested.c
index 3ee20f2..fb694b7 100644
--- a/arch/arm64/kvm/mmu-nested.c
+++ b/arch/arm64/kvm/mmu-nested.c
@@ -22,6 +22,247 @@
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_mmu.h>
 
+struct s2_walk_info {
+	unsigned int pgshift;
+	unsigned int pgsize;
+	unsigned int ps;
+	unsigned int sl;
+	unsigned int t0sz;
+};
+
+static unsigned int ps_to_output_size(unsigned int ps)
+{
+	switch (ps) {
+	case 0: return 32;
+	case 1: return 36;
+	case 2: return 40;
+	case 3: return 42;
+	case 4: return 44;
+	case 5:
+	default:
+		return 48;
+	}
+}
+
+static unsigned int pa_max(void)
+{
+	 /* We always emulate a VM with maximum PA size of KVM_PHYS_SIZE. */
+	return KVM_PHYS_SHIFT;
+}
+
+static int esr_s2_fault(struct kvm_vcpu *vcpu, int level, u32 fsc)
+{
+	u32 esr;
+
+	esr = kvm_vcpu_get_hsr(vcpu) & ~ESR_ELx_FSC;
+	esr |= fsc;
+	esr |= level & 0x3;
+	return esr;
+}
+
+static int check_base_s2_limits(struct kvm_vcpu *vcpu, struct s2_walk_info *wi,
+				int level, int input_size, int stride)
+{
+	int start_size;
+
+	/* Check translation limits */
+	switch (wi->pgsize) {
+	case SZ_64K:
+		if (level == 0 || (level == 1 && pa_max() <= 42))
+			return -EFAULT;
+		break;
+	case SZ_16K:
+		if (level == 0 || (level == 1 && pa_max() <= 40))
+			return -EFAULT;
+		break;
+	case SZ_4K:
+		if (level < 0 || (level == 0 && pa_max() <= 42))
+			return -EFAULT;
+		break;
+	}
+
+	/* Check input size limits */
+	if (input_size > pa_max() &&
+	    (!vcpu_mode_is_32bit(vcpu) || input_size > 40))
+		return -EFAULT;
+
+	/* Check number of entries in starting level table */
+	start_size = input_size - ((3 - level) * stride + wi->pgshift);
+	if (start_size < 1 || start_size > stride + 4)
+		return -EFAULT;
+
+	return 0;
+}
+
+/* Check if output is within boundaries */
+static int check_output_size(struct kvm_vcpu *vcpu, struct s2_walk_info *wi,
+			     phys_addr_t output)
+{
+	unsigned int output_size = ps_to_output_size(wi->ps);
+
+	if (output_size > pa_max())
+		output_size = pa_max();
+
+	if (output_size != 48 && (output & GENMASK_ULL(47, output_size)))
+		return -1;
+
+	return 0;
+}
+
+/*
+ * This is essentially a C-version of the pseudo code from the ARM ARM
+ * AArch64.TranslationTableWalk  function.  I strongly recommend looking at
+ * that pseudocode in trying to understand this.
+ *
+ * Must be called with the kvm->srcy read lock held
+ */
+static int walk_nested_s2_pgd(struct kvm_vcpu *vcpu, phys_addr_t ipa,
+			      struct s2_walk_info *wi, struct kvm_s2_trans *out)
+{
+	u64 vttbr = vcpu->arch.ctxt.sys_regs[VTTBR_EL2];
+	int first_block_level, level, stride, input_size, base_lower_bound;
+	phys_addr_t base_addr;
+	unsigned int addr_top, addr_bottom;
+	u64 desc;  /* page table entry */
+	int ret;
+	phys_addr_t paddr;
+
+	switch (wi->pgsize) {
+	case SZ_64K:
+	case SZ_16K:
+		level = 3 - wi->sl;
+		first_block_level = 2;
+		break;
+	case SZ_4K:
+		level = 2 - wi->sl;
+		first_block_level = 1;
+		break;
+	default:
+		/* GCC is braindead */
+		WARN(1, "Page size is none of 4K, 16K or 64K");
+	}
+
+	stride = wi->pgshift - 3;
+	input_size = 64 - wi->t0sz;
+	if (input_size > 48 || input_size < 25)
+		return -EFAULT;
+
+	ret = check_base_s2_limits(vcpu, wi, level, input_size, stride);
+	if (WARN_ON(ret))
+		return ret;
+
+	if (check_output_size(vcpu, wi, vttbr)) {
+		out->esr = esr_s2_fault(vcpu, level, ESR_ELx_FSC_ADDRSZ);
+		return 1;
+	}
+
+	base_lower_bound = 3 + input_size - ((3 - level) * stride +
+			   wi->pgshift);
+	base_addr = vttbr & GENMASK_ULL(47, base_lower_bound);
+
+	addr_top = input_size - 1;
+
+	while (1) {
+		phys_addr_t index;
+
+		addr_bottom = (3 - level) * stride + wi->pgshift;
+		index = (ipa & GENMASK_ULL(addr_top, addr_bottom))
+			>> (addr_bottom - 3);
+
+		paddr = base_addr | index;
+		ret = kvm_read_guest(vcpu->kvm, paddr, &desc, sizeof(desc));
+		if (ret < 0)
+			return ret;
+
+		/*
+		 * Handle reversedescriptors if endianness differs between the
+		 * host and the guest hypervisor.
+		 */
+		if (vcpu_sys_reg(vcpu, SCTLR_EL2) & SCTLR_EE)
+			desc = be64_to_cpu(desc);
+		else
+			desc = le64_to_cpu(desc);
+
+		/* Check for valid descriptor@this point */
+		if (!(desc & 1) || ((desc & 3) == 1 && level == 3)) {
+			out->esr = esr_s2_fault(vcpu, level, ESR_ELx_FSC_FAULT);
+			return 1;
+		}
+
+		/* We're at the final level or block translation level */
+		if ((desc & 3) == 1 || level == 3)
+			break;
+
+		if (check_output_size(vcpu, wi, desc)) {
+			out->esr = esr_s2_fault(vcpu, level, ESR_ELx_FSC_ADDRSZ);
+			return 1;
+		}
+
+		base_addr = desc & GENMASK_ULL(47, wi->pgshift);
+
+		level += 1;
+		addr_top = addr_bottom - 1;
+	}
+
+	if (level < first_block_level) {
+		out->esr = esr_s2_fault(vcpu, level, ESR_ELx_FSC_FAULT);
+		return 1;
+	}
+
+	/*
+	 * We don't use the contiguous bit in the stage-2 ptes, so skip check
+	 * for misprogramming of the contiguous bit.
+	 */
+
+	if (check_output_size(vcpu, wi, desc)) {
+		out->esr = esr_s2_fault(vcpu, level, ESR_ELx_FSC_ADDRSZ);
+		return 1;
+	}
+
+	if (!(desc & BIT(10))) {
+		out->esr = esr_s2_fault(vcpu, level, ESR_ELx_FSC_ACCESS);
+		return 1;
+	}
+
+	/* Calculate and return the result */
+	paddr = (desc & GENMASK_ULL(47, addr_bottom)) |
+		(ipa & GENMASK_ULL(addr_bottom - 1, 0));
+	out->output = paddr;
+	out->block_size = 1UL << ((3 - level) * stride + wi->pgshift);
+	out->readable = desc & (0b01 << 6);
+	out->writable = desc & (0b10 << 6);
+	out->level = level;
+	out->upper_attr = desc & GENMASK_ULL(63, 52);
+	return 0;
+}
+
+int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa,
+		       struct kvm_s2_trans *result)
+{
+	u64 vtcr = vcpu->arch.ctxt.sys_regs[VTCR_EL2];
+	struct s2_walk_info wi;
+
+	if (!nested_virt_in_use(vcpu))
+		return 0;
+
+	wi.t0sz = vtcr & TCR_EL2_T0SZ_MASK;
+
+	switch (vtcr & VTCR_EL2_TG0_MASK) {
+	case VTCR_EL2_TG0_4K:
+		wi.pgshift = 12;	 break;
+	case VTCR_EL2_TG0_16K:
+		wi.pgshift = 14;	 break;
+	case VTCR_EL2_TG0_64K:
+	default:
+		wi.pgshift = 16;	 break;
+	}
+	wi.pgsize = 1UL << wi.pgshift;
+	wi.ps = (vtcr & VTCR_EL2_PS_MASK) >> VTCR_EL2_PS_SHIFT;
+	wi.sl = (vtcr & VTCR_EL2_SL0_MASK) >> VTCR_EL2_SL0_SHIFT;
+
+	return walk_nested_s2_pgd(vcpu, gipa, &wi, result);
+}
+
 /* expects kvm->mmu_lock to be held */
 void kvm_nested_s2_wp(struct kvm *kvm)
 {
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 12/31] KVM: arm/arm64: Handle shadow stage 2 page faults
  2017-10-03  3:10 ` Jintack Lim
  (?)
@ 2017-10-03  3:10   ` Jintack Lim
  -1 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:10 UTC (permalink / raw)
  To: christoffer.dall, marc.zyngier, kvmarm
  Cc: jintack, pbonzini, rkrcmar, catalin.marinas, will.deacon, linux,
	mark.rutland, linux-arm-kernel, kvm, linux-kernel, Jintack Lim

From: Christoffer Dall <christoffer.dall@linaro.org>

If we are faulting on a shadow stage 2 translation, we first walk the
guest hypervisor's stage 2 page table to see if it has a mapping. If
not, we inject a stage 2 page fault to the virtual EL2. Otherwise, we
create a mapping in the shadow stage 2 page table.

Note that we have to deal with two IPAs when we got a showdow stage 2
page fault. One is the address we faulted on, and is in the L2 guest
phys space. The other is from the guest stage-2 page table walk, and is
in the L1 guest phys space.  To differentiate them, we rename variable
names so that fault_ipa is used for the former and ipa is used for the
latter.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---

Notes:
    v1-->v2:
    - Added a common function to inject s2 faults.
    - Align L1 IPA as well as L2 IPA in transparent_hugepage_adjust(). This will
    come in handy when creating a rmap entry with both IPAs.

 arch/arm/include/asm/kvm_emulate.h   |  7 ++++
 arch/arm/include/asm/kvm_mmu.h       |  4 ++
 arch/arm64/include/asm/kvm_emulate.h |  5 +++
 arch/arm64/include/asm/kvm_mmu.h     |  1 +
 arch/arm64/kvm/mmu-nested.c          |  8 ++++
 virt/kvm/arm/mmio.c                  | 12 +++---
 virt/kvm/arm/mmu.c                   | 75 +++++++++++++++++++++++++++++-------
 7 files changed, 92 insertions(+), 20 deletions(-)

diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index 24a3fbf..8136464 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -297,4 +297,11 @@ static inline struct kvm_s2_vmid *vcpu_get_active_vmid(struct kvm_vcpu *vcpu)
 {
 	return &vcpu->kvm->arch.mmu.vmid;
 }
+
+/* arm architecture doesn't support the nesting */
+static inline bool kvm_is_shadow_s2_fault(struct kvm_vcpu *vcpu)
+{
+	return false;
+}
+
 #endif /* __ARM_KVM_EMULATE_H__ */
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 5fab21a..6a22846 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -242,6 +242,10 @@ static inline void kvm_nested_s2_free(struct kvm *kvm) { }
 static inline void kvm_nested_s2_wp(struct kvm *kvm) { }
 static inline void kvm_nested_s2_clear(struct kvm *kvm) { }
 static inline void kvm_nested_s2_flush(struct kvm *kvm) { }
+static inline int kvm_inject_s2_fault(struct kvm_vcpu *vcpu, u64 esr_el2)
+{
+	return 0;
+}
 
 static inline u64 kvm_get_vttbr(struct kvm_s2_vmid *vmid,
 				struct kvm_s2_mmu *mmu)
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index f476576..c66554b 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -390,4 +390,9 @@ static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu,
 	return data;		/* Leave LE untouched */
 }
 
+static inline bool kvm_is_shadow_s2_fault(struct kvm_vcpu *vcpu)
+{
+	return vcpu_nested_stage2_enabled(vcpu) && !is_hyp_ctxt(vcpu);
+}
+
 #endif /* __ARM64_KVM_EMULATE_H__ */
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index c4efcd5..425e4a2 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -342,6 +342,7 @@ int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa,
 void kvm_nested_s2_wp(struct kvm *kvm);
 void kvm_nested_s2_clear(struct kvm *kvm);
 void kvm_nested_s2_flush(struct kvm *kvm);
+int kvm_inject_s2_fault(struct kvm_vcpu *vcpu, u64 esr_el2);
 
 static inline u64 kvm_get_vttbr(struct kvm_s2_vmid *vmid,
 				struct kvm_s2_mmu *mmu)
diff --git a/arch/arm64/kvm/mmu-nested.c b/arch/arm64/kvm/mmu-nested.c
index fb694b7..75570cc 100644
--- a/arch/arm64/kvm/mmu-nested.c
+++ b/arch/arm64/kvm/mmu-nested.c
@@ -60,6 +60,14 @@ static int esr_s2_fault(struct kvm_vcpu *vcpu, int level, u32 fsc)
 	return esr;
 }
 
+int kvm_inject_s2_fault(struct kvm_vcpu *vcpu, u64 esr_el2)
+{
+	vcpu->arch.ctxt.sys_regs[FAR_EL2] = vcpu->arch.fault.far_el2;
+	vcpu->arch.ctxt.sys_regs[HPFAR_EL2] = vcpu->arch.fault.hpfar_el2;
+
+	return kvm_inject_nested_sync(vcpu, esr_el2);
+}
+
 static int check_base_s2_limits(struct kvm_vcpu *vcpu, struct s2_walk_info *wi,
 				int level, int input_size, int stride)
 {
diff --git a/virt/kvm/arm/mmio.c b/virt/kvm/arm/mmio.c
index b6e715f..a1009c2 100644
--- a/virt/kvm/arm/mmio.c
+++ b/virt/kvm/arm/mmio.c
@@ -153,7 +153,7 @@ static int decode_hsr(struct kvm_vcpu *vcpu, bool *is_write, int *len)
 }
 
 int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
-		 phys_addr_t fault_ipa)
+		 phys_addr_t ipa)
 {
 	unsigned long data;
 	unsigned long rt;
@@ -182,22 +182,22 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
 		data = vcpu_data_guest_to_host(vcpu, vcpu_get_reg(vcpu, rt),
 					       len);
 
-		trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, data);
+		trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, ipa, data);
 		kvm_mmio_write_buf(data_buf, len, data);
 
-		ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, fault_ipa, len,
+		ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, ipa, len,
 				       data_buf);
 	} else {
 		trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, len,
-			       fault_ipa, 0);
+			       ipa, 0);
 
-		ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, fault_ipa, len,
+		ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, ipa, len,
 				      data_buf);
 	}
 
 	/* Now prepare kvm_run for the potential return to userland. */
 	run->mmio.is_write	= is_write;
-	run->mmio.phys_addr	= fault_ipa;
+	run->mmio.phys_addr	= ipa;
 	run->mmio.len		= len;
 
 	if (!ret) {
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index 3143f81..25d3d73 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -1098,7 +1098,8 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
 	return ret;
 }
 
-static bool transparent_hugepage_adjust(kvm_pfn_t *pfnp, phys_addr_t *ipap)
+static bool transparent_hugepage_adjust(kvm_pfn_t *pfnp, phys_addr_t *ipap,
+					phys_addr_t *fault_ipap)
 {
 	kvm_pfn_t pfn = *pfnp;
 	gfn_t gfn = *ipap >> PAGE_SHIFT;
@@ -1126,6 +1127,7 @@ static bool transparent_hugepage_adjust(kvm_pfn_t *pfnp, phys_addr_t *ipap)
 		mask = PTRS_PER_PMD - 1;
 		VM_BUG_ON((gfn & mask) != (pfn & mask));
 		if (pfn & mask) {
+			*fault_ipap &= PMD_MASK;
 			*ipap &= PMD_MASK;
 			kvm_release_pfn_clean(pfn);
 			pfn &= ~mask;
@@ -1337,13 +1339,15 @@ static void kvm_send_hwpoison_signal(unsigned long address,
 }
 
 static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
-			  struct kvm_memory_slot *memslot, unsigned long hva,
-			  unsigned long fault_status)
+			  struct kvm_s2_trans *nested,
+			  struct kvm_memory_slot *memslot,
+			  unsigned long hva, unsigned long fault_status)
 {
 	int ret;
 	bool write_fault, writable, hugetlb = false, force_pte = false;
 	unsigned long mmu_seq;
-	gfn_t gfn = fault_ipa >> PAGE_SHIFT;
+	phys_addr_t ipa = fault_ipa;
+	gfn_t gfn;
 	struct kvm *kvm = vcpu->kvm;
 	struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
 	struct vm_area_struct *vma;
@@ -1368,9 +1372,23 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 		return -EFAULT;
 	}
 
-	if (is_vm_hugetlb_page(vma) && !logging_active) {
+	if (kvm_is_shadow_s2_fault(vcpu)) {
+		ipa = nested->output;
+
+		/*
+		 * If we're about to create a shadow stage 2 entry, then we
+		 * can only create huge mappings if the guest hypervisor also
+		 * uses a huge mapping.
+		 */
+		if (nested->block_size != PMD_SIZE)
+			force_pte = true;
+	}
+	gfn = ipa >> PAGE_SHIFT;
+
+
+	if (!force_pte && is_vm_hugetlb_page(vma) && !logging_active) {
 		hugetlb = true;
-		gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT;
+		gfn = (ipa & PMD_MASK) >> PAGE_SHIFT;
 	} else {
 		/*
 		 * Pages belonging to memslots that don't have the same
@@ -1438,7 +1456,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 		goto out_unlock;
 
 	if (!hugetlb && !force_pte)
-		hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa);
+		hugetlb = transparent_hugepage_adjust(&pfn, &ipa, &fault_ipa);
 
 	if (hugetlb) {
 		pmd_t new_pmd = pfn_pmd(pfn, mem_type);
@@ -1525,8 +1543,10 @@ static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
 int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
 	unsigned long fault_status;
-	phys_addr_t fault_ipa;
+	phys_addr_t fault_ipa; /* The address we faulted on */
+	phys_addr_t ipa; /* Always the IPA in the L1 guest phys space */
 	struct kvm_memory_slot *memslot;
+	struct kvm_s2_trans nested_trans;
 	unsigned long hva;
 	bool is_iabt, write_fault, writable;
 	gfn_t gfn;
@@ -1538,7 +1558,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 		return 1;
 	}
 
-	fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
+	ipa = fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
 
 	trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_hsr(vcpu),
 			      kvm_vcpu_get_hfar(vcpu), fault_ipa);
@@ -1547,6 +1567,12 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	fault_status = kvm_vcpu_trap_get_fault_type(vcpu);
 	if (fault_status != FSC_FAULT && fault_status != FSC_PERM &&
 	    fault_status != FSC_ACCESS) {
+		/*
+		 * We must never see an address size fault on shadow stage 2
+		 * page table walk, because we would have injected an addr
+		 * size fault when we walked the nested s2 page and not
+		 * create the shadow entry.
+		 */
 		kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n",
 			kvm_vcpu_trap_get_class(vcpu),
 			(unsigned long)kvm_vcpu_trap_get_fault(vcpu),
@@ -1556,7 +1582,27 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 
 	idx = srcu_read_lock(&vcpu->kvm->srcu);
 
-	gfn = fault_ipa >> PAGE_SHIFT;
+	/*
+	 * We may have faulted on a shadow stage 2 page table if we are
+	 * running a nested guest.  In this case, we have to resovle the L2
+	 * IPA to the L1 IPA first, before knowing what kind of memory should
+	 * back the L1 IPA.
+	 *
+	 * If the shadow stage 2 page table walk faults, then we simply inject
+	 * this to the guest and carry on.
+	 */
+	if (kvm_is_shadow_s2_fault(vcpu)) {
+		nested_trans.esr = 0;
+		ret = kvm_walk_nested_s2(vcpu, fault_ipa, &nested_trans);
+		if (nested_trans.esr)
+			kvm_inject_s2_fault(vcpu, nested_trans.esr);
+		if (ret)
+			goto out_unlock;
+
+		ipa = nested_trans.output;
+	}
+
+	gfn = ipa >> PAGE_SHIFT;
 	memslot = gfn_to_memslot(vcpu->kvm, gfn);
 	hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable);
 	write_fault = kvm_is_write_fault(vcpu);
@@ -1590,13 +1636,13 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 		 * faulting VA. This is always 12 bits, irrespective
 		 * of the page size.
 		 */
-		fault_ipa |= kvm_vcpu_get_hfar(vcpu) & ((1 << 12) - 1);
-		ret = io_mem_abort(vcpu, run, fault_ipa);
+		ipa |= kvm_vcpu_get_hfar(vcpu) & ((1 << 12) - 1);
+		ret = io_mem_abort(vcpu, run, ipa);
 		goto out_unlock;
 	}
 
 	/* Userspace should not be able to register out-of-bounds IPAs */
-	VM_BUG_ON(fault_ipa >= KVM_PHYS_SIZE);
+	VM_BUG_ON(ipa >= KVM_PHYS_SIZE);
 
 	if (fault_status == FSC_ACCESS) {
 		handle_access_fault(vcpu, fault_ipa);
@@ -1604,7 +1650,8 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 		goto out_unlock;
 	}
 
-	ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status);
+	ret = user_mem_abort(vcpu, fault_ipa, &nested_trans,
+			     memslot, hva, fault_status);
 	if (ret == 0)
 		ret = 1;
 out_unlock:
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 12/31] KVM: arm/arm64: Handle shadow stage 2 page faults
@ 2017-10-03  3:10   ` Jintack Lim
  0 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:10 UTC (permalink / raw)
  To: christoffer.dall, marc.zyngier, kvmarm
  Cc: kvm, catalin.marinas, will.deacon, linux, linux-kernel, pbonzini,
	linux-arm-kernel

From: Christoffer Dall <christoffer.dall@linaro.org>

If we are faulting on a shadow stage 2 translation, we first walk the
guest hypervisor's stage 2 page table to see if it has a mapping. If
not, we inject a stage 2 page fault to the virtual EL2. Otherwise, we
create a mapping in the shadow stage 2 page table.

Note that we have to deal with two IPAs when we got a showdow stage 2
page fault. One is the address we faulted on, and is in the L2 guest
phys space. The other is from the guest stage-2 page table walk, and is
in the L1 guest phys space.  To differentiate them, we rename variable
names so that fault_ipa is used for the former and ipa is used for the
latter.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---

Notes:
    v1-->v2:
    - Added a common function to inject s2 faults.
    - Align L1 IPA as well as L2 IPA in transparent_hugepage_adjust(). This will
    come in handy when creating a rmap entry with both IPAs.

 arch/arm/include/asm/kvm_emulate.h   |  7 ++++
 arch/arm/include/asm/kvm_mmu.h       |  4 ++
 arch/arm64/include/asm/kvm_emulate.h |  5 +++
 arch/arm64/include/asm/kvm_mmu.h     |  1 +
 arch/arm64/kvm/mmu-nested.c          |  8 ++++
 virt/kvm/arm/mmio.c                  | 12 +++---
 virt/kvm/arm/mmu.c                   | 75 +++++++++++++++++++++++++++++-------
 7 files changed, 92 insertions(+), 20 deletions(-)

diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index 24a3fbf..8136464 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -297,4 +297,11 @@ static inline struct kvm_s2_vmid *vcpu_get_active_vmid(struct kvm_vcpu *vcpu)
 {
 	return &vcpu->kvm->arch.mmu.vmid;
 }
+
+/* arm architecture doesn't support the nesting */
+static inline bool kvm_is_shadow_s2_fault(struct kvm_vcpu *vcpu)
+{
+	return false;
+}
+
 #endif /* __ARM_KVM_EMULATE_H__ */
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 5fab21a..6a22846 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -242,6 +242,10 @@ static inline void kvm_nested_s2_free(struct kvm *kvm) { }
 static inline void kvm_nested_s2_wp(struct kvm *kvm) { }
 static inline void kvm_nested_s2_clear(struct kvm *kvm) { }
 static inline void kvm_nested_s2_flush(struct kvm *kvm) { }
+static inline int kvm_inject_s2_fault(struct kvm_vcpu *vcpu, u64 esr_el2)
+{
+	return 0;
+}
 
 static inline u64 kvm_get_vttbr(struct kvm_s2_vmid *vmid,
 				struct kvm_s2_mmu *mmu)
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index f476576..c66554b 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -390,4 +390,9 @@ static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu,
 	return data;		/* Leave LE untouched */
 }
 
+static inline bool kvm_is_shadow_s2_fault(struct kvm_vcpu *vcpu)
+{
+	return vcpu_nested_stage2_enabled(vcpu) && !is_hyp_ctxt(vcpu);
+}
+
 #endif /* __ARM64_KVM_EMULATE_H__ */
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index c4efcd5..425e4a2 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -342,6 +342,7 @@ int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa,
 void kvm_nested_s2_wp(struct kvm *kvm);
 void kvm_nested_s2_clear(struct kvm *kvm);
 void kvm_nested_s2_flush(struct kvm *kvm);
+int kvm_inject_s2_fault(struct kvm_vcpu *vcpu, u64 esr_el2);
 
 static inline u64 kvm_get_vttbr(struct kvm_s2_vmid *vmid,
 				struct kvm_s2_mmu *mmu)
diff --git a/arch/arm64/kvm/mmu-nested.c b/arch/arm64/kvm/mmu-nested.c
index fb694b7..75570cc 100644
--- a/arch/arm64/kvm/mmu-nested.c
+++ b/arch/arm64/kvm/mmu-nested.c
@@ -60,6 +60,14 @@ static int esr_s2_fault(struct kvm_vcpu *vcpu, int level, u32 fsc)
 	return esr;
 }
 
+int kvm_inject_s2_fault(struct kvm_vcpu *vcpu, u64 esr_el2)
+{
+	vcpu->arch.ctxt.sys_regs[FAR_EL2] = vcpu->arch.fault.far_el2;
+	vcpu->arch.ctxt.sys_regs[HPFAR_EL2] = vcpu->arch.fault.hpfar_el2;
+
+	return kvm_inject_nested_sync(vcpu, esr_el2);
+}
+
 static int check_base_s2_limits(struct kvm_vcpu *vcpu, struct s2_walk_info *wi,
 				int level, int input_size, int stride)
 {
diff --git a/virt/kvm/arm/mmio.c b/virt/kvm/arm/mmio.c
index b6e715f..a1009c2 100644
--- a/virt/kvm/arm/mmio.c
+++ b/virt/kvm/arm/mmio.c
@@ -153,7 +153,7 @@ static int decode_hsr(struct kvm_vcpu *vcpu, bool *is_write, int *len)
 }
 
 int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
-		 phys_addr_t fault_ipa)
+		 phys_addr_t ipa)
 {
 	unsigned long data;
 	unsigned long rt;
@@ -182,22 +182,22 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
 		data = vcpu_data_guest_to_host(vcpu, vcpu_get_reg(vcpu, rt),
 					       len);
 
-		trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, data);
+		trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, ipa, data);
 		kvm_mmio_write_buf(data_buf, len, data);
 
-		ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, fault_ipa, len,
+		ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, ipa, len,
 				       data_buf);
 	} else {
 		trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, len,
-			       fault_ipa, 0);
+			       ipa, 0);
 
-		ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, fault_ipa, len,
+		ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, ipa, len,
 				      data_buf);
 	}
 
 	/* Now prepare kvm_run for the potential return to userland. */
 	run->mmio.is_write	= is_write;
-	run->mmio.phys_addr	= fault_ipa;
+	run->mmio.phys_addr	= ipa;
 	run->mmio.len		= len;
 
 	if (!ret) {
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index 3143f81..25d3d73 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -1098,7 +1098,8 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
 	return ret;
 }
 
-static bool transparent_hugepage_adjust(kvm_pfn_t *pfnp, phys_addr_t *ipap)
+static bool transparent_hugepage_adjust(kvm_pfn_t *pfnp, phys_addr_t *ipap,
+					phys_addr_t *fault_ipap)
 {
 	kvm_pfn_t pfn = *pfnp;
 	gfn_t gfn = *ipap >> PAGE_SHIFT;
@@ -1126,6 +1127,7 @@ static bool transparent_hugepage_adjust(kvm_pfn_t *pfnp, phys_addr_t *ipap)
 		mask = PTRS_PER_PMD - 1;
 		VM_BUG_ON((gfn & mask) != (pfn & mask));
 		if (pfn & mask) {
+			*fault_ipap &= PMD_MASK;
 			*ipap &= PMD_MASK;
 			kvm_release_pfn_clean(pfn);
 			pfn &= ~mask;
@@ -1337,13 +1339,15 @@ static void kvm_send_hwpoison_signal(unsigned long address,
 }
 
 static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
-			  struct kvm_memory_slot *memslot, unsigned long hva,
-			  unsigned long fault_status)
+			  struct kvm_s2_trans *nested,
+			  struct kvm_memory_slot *memslot,
+			  unsigned long hva, unsigned long fault_status)
 {
 	int ret;
 	bool write_fault, writable, hugetlb = false, force_pte = false;
 	unsigned long mmu_seq;
-	gfn_t gfn = fault_ipa >> PAGE_SHIFT;
+	phys_addr_t ipa = fault_ipa;
+	gfn_t gfn;
 	struct kvm *kvm = vcpu->kvm;
 	struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
 	struct vm_area_struct *vma;
@@ -1368,9 +1372,23 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 		return -EFAULT;
 	}
 
-	if (is_vm_hugetlb_page(vma) && !logging_active) {
+	if (kvm_is_shadow_s2_fault(vcpu)) {
+		ipa = nested->output;
+
+		/*
+		 * If we're about to create a shadow stage 2 entry, then we
+		 * can only create huge mappings if the guest hypervisor also
+		 * uses a huge mapping.
+		 */
+		if (nested->block_size != PMD_SIZE)
+			force_pte = true;
+	}
+	gfn = ipa >> PAGE_SHIFT;
+
+
+	if (!force_pte && is_vm_hugetlb_page(vma) && !logging_active) {
 		hugetlb = true;
-		gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT;
+		gfn = (ipa & PMD_MASK) >> PAGE_SHIFT;
 	} else {
 		/*
 		 * Pages belonging to memslots that don't have the same
@@ -1438,7 +1456,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 		goto out_unlock;
 
 	if (!hugetlb && !force_pte)
-		hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa);
+		hugetlb = transparent_hugepage_adjust(&pfn, &ipa, &fault_ipa);
 
 	if (hugetlb) {
 		pmd_t new_pmd = pfn_pmd(pfn, mem_type);
@@ -1525,8 +1543,10 @@ static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
 int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
 	unsigned long fault_status;
-	phys_addr_t fault_ipa;
+	phys_addr_t fault_ipa; /* The address we faulted on */
+	phys_addr_t ipa; /* Always the IPA in the L1 guest phys space */
 	struct kvm_memory_slot *memslot;
+	struct kvm_s2_trans nested_trans;
 	unsigned long hva;
 	bool is_iabt, write_fault, writable;
 	gfn_t gfn;
@@ -1538,7 +1558,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 		return 1;
 	}
 
-	fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
+	ipa = fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
 
 	trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_hsr(vcpu),
 			      kvm_vcpu_get_hfar(vcpu), fault_ipa);
@@ -1547,6 +1567,12 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	fault_status = kvm_vcpu_trap_get_fault_type(vcpu);
 	if (fault_status != FSC_FAULT && fault_status != FSC_PERM &&
 	    fault_status != FSC_ACCESS) {
+		/*
+		 * We must never see an address size fault on shadow stage 2
+		 * page table walk, because we would have injected an addr
+		 * size fault when we walked the nested s2 page and not
+		 * create the shadow entry.
+		 */
 		kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n",
 			kvm_vcpu_trap_get_class(vcpu),
 			(unsigned long)kvm_vcpu_trap_get_fault(vcpu),
@@ -1556,7 +1582,27 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 
 	idx = srcu_read_lock(&vcpu->kvm->srcu);
 
-	gfn = fault_ipa >> PAGE_SHIFT;
+	/*
+	 * We may have faulted on a shadow stage 2 page table if we are
+	 * running a nested guest.  In this case, we have to resovle the L2
+	 * IPA to the L1 IPA first, before knowing what kind of memory should
+	 * back the L1 IPA.
+	 *
+	 * If the shadow stage 2 page table walk faults, then we simply inject
+	 * this to the guest and carry on.
+	 */
+	if (kvm_is_shadow_s2_fault(vcpu)) {
+		nested_trans.esr = 0;
+		ret = kvm_walk_nested_s2(vcpu, fault_ipa, &nested_trans);
+		if (nested_trans.esr)
+			kvm_inject_s2_fault(vcpu, nested_trans.esr);
+		if (ret)
+			goto out_unlock;
+
+		ipa = nested_trans.output;
+	}
+
+	gfn = ipa >> PAGE_SHIFT;
 	memslot = gfn_to_memslot(vcpu->kvm, gfn);
 	hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable);
 	write_fault = kvm_is_write_fault(vcpu);
@@ -1590,13 +1636,13 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 		 * faulting VA. This is always 12 bits, irrespective
 		 * of the page size.
 		 */
-		fault_ipa |= kvm_vcpu_get_hfar(vcpu) & ((1 << 12) - 1);
-		ret = io_mem_abort(vcpu, run, fault_ipa);
+		ipa |= kvm_vcpu_get_hfar(vcpu) & ((1 << 12) - 1);
+		ret = io_mem_abort(vcpu, run, ipa);
 		goto out_unlock;
 	}
 
 	/* Userspace should not be able to register out-of-bounds IPAs */
-	VM_BUG_ON(fault_ipa >= KVM_PHYS_SIZE);
+	VM_BUG_ON(ipa >= KVM_PHYS_SIZE);
 
 	if (fault_status == FSC_ACCESS) {
 		handle_access_fault(vcpu, fault_ipa);
@@ -1604,7 +1650,8 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 		goto out_unlock;
 	}
 
-	ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status);
+	ret = user_mem_abort(vcpu, fault_ipa, &nested_trans,
+			     memslot, hva, fault_status);
 	if (ret == 0)
 		ret = 1;
 out_unlock:
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 12/31] KVM: arm/arm64: Handle shadow stage 2 page faults
@ 2017-10-03  3:10   ` Jintack Lim
  0 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:10 UTC (permalink / raw)
  To: linux-arm-kernel

From: Christoffer Dall <christoffer.dall@linaro.org>

If we are faulting on a shadow stage 2 translation, we first walk the
guest hypervisor's stage 2 page table to see if it has a mapping. If
not, we inject a stage 2 page fault to the virtual EL2. Otherwise, we
create a mapping in the shadow stage 2 page table.

Note that we have to deal with two IPAs when we got a showdow stage 2
page fault. One is the address we faulted on, and is in the L2 guest
phys space. The other is from the guest stage-2 page table walk, and is
in the L1 guest phys space.  To differentiate them, we rename variable
names so that fault_ipa is used for the former and ipa is used for the
latter.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---

Notes:
    v1-->v2:
    - Added a common function to inject s2 faults.
    - Align L1 IPA as well as L2 IPA in transparent_hugepage_adjust(). This will
    come in handy when creating a rmap entry with both IPAs.

 arch/arm/include/asm/kvm_emulate.h   |  7 ++++
 arch/arm/include/asm/kvm_mmu.h       |  4 ++
 arch/arm64/include/asm/kvm_emulate.h |  5 +++
 arch/arm64/include/asm/kvm_mmu.h     |  1 +
 arch/arm64/kvm/mmu-nested.c          |  8 ++++
 virt/kvm/arm/mmio.c                  | 12 +++---
 virt/kvm/arm/mmu.c                   | 75 +++++++++++++++++++++++++++++-------
 7 files changed, 92 insertions(+), 20 deletions(-)

diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index 24a3fbf..8136464 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -297,4 +297,11 @@ static inline struct kvm_s2_vmid *vcpu_get_active_vmid(struct kvm_vcpu *vcpu)
 {
 	return &vcpu->kvm->arch.mmu.vmid;
 }
+
+/* arm architecture doesn't support the nesting */
+static inline bool kvm_is_shadow_s2_fault(struct kvm_vcpu *vcpu)
+{
+	return false;
+}
+
 #endif /* __ARM_KVM_EMULATE_H__ */
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 5fab21a..6a22846 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -242,6 +242,10 @@ static inline void kvm_nested_s2_free(struct kvm *kvm) { }
 static inline void kvm_nested_s2_wp(struct kvm *kvm) { }
 static inline void kvm_nested_s2_clear(struct kvm *kvm) { }
 static inline void kvm_nested_s2_flush(struct kvm *kvm) { }
+static inline int kvm_inject_s2_fault(struct kvm_vcpu *vcpu, u64 esr_el2)
+{
+	return 0;
+}
 
 static inline u64 kvm_get_vttbr(struct kvm_s2_vmid *vmid,
 				struct kvm_s2_mmu *mmu)
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index f476576..c66554b 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -390,4 +390,9 @@ static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu,
 	return data;		/* Leave LE untouched */
 }
 
+static inline bool kvm_is_shadow_s2_fault(struct kvm_vcpu *vcpu)
+{
+	return vcpu_nested_stage2_enabled(vcpu) && !is_hyp_ctxt(vcpu);
+}
+
 #endif /* __ARM64_KVM_EMULATE_H__ */
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index c4efcd5..425e4a2 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -342,6 +342,7 @@ int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa,
 void kvm_nested_s2_wp(struct kvm *kvm);
 void kvm_nested_s2_clear(struct kvm *kvm);
 void kvm_nested_s2_flush(struct kvm *kvm);
+int kvm_inject_s2_fault(struct kvm_vcpu *vcpu, u64 esr_el2);
 
 static inline u64 kvm_get_vttbr(struct kvm_s2_vmid *vmid,
 				struct kvm_s2_mmu *mmu)
diff --git a/arch/arm64/kvm/mmu-nested.c b/arch/arm64/kvm/mmu-nested.c
index fb694b7..75570cc 100644
--- a/arch/arm64/kvm/mmu-nested.c
+++ b/arch/arm64/kvm/mmu-nested.c
@@ -60,6 +60,14 @@ static int esr_s2_fault(struct kvm_vcpu *vcpu, int level, u32 fsc)
 	return esr;
 }
 
+int kvm_inject_s2_fault(struct kvm_vcpu *vcpu, u64 esr_el2)
+{
+	vcpu->arch.ctxt.sys_regs[FAR_EL2] = vcpu->arch.fault.far_el2;
+	vcpu->arch.ctxt.sys_regs[HPFAR_EL2] = vcpu->arch.fault.hpfar_el2;
+
+	return kvm_inject_nested_sync(vcpu, esr_el2);
+}
+
 static int check_base_s2_limits(struct kvm_vcpu *vcpu, struct s2_walk_info *wi,
 				int level, int input_size, int stride)
 {
diff --git a/virt/kvm/arm/mmio.c b/virt/kvm/arm/mmio.c
index b6e715f..a1009c2 100644
--- a/virt/kvm/arm/mmio.c
+++ b/virt/kvm/arm/mmio.c
@@ -153,7 +153,7 @@ static int decode_hsr(struct kvm_vcpu *vcpu, bool *is_write, int *len)
 }
 
 int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
-		 phys_addr_t fault_ipa)
+		 phys_addr_t ipa)
 {
 	unsigned long data;
 	unsigned long rt;
@@ -182,22 +182,22 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
 		data = vcpu_data_guest_to_host(vcpu, vcpu_get_reg(vcpu, rt),
 					       len);
 
-		trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, data);
+		trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, ipa, data);
 		kvm_mmio_write_buf(data_buf, len, data);
 
-		ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, fault_ipa, len,
+		ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, ipa, len,
 				       data_buf);
 	} else {
 		trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, len,
-			       fault_ipa, 0);
+			       ipa, 0);
 
-		ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, fault_ipa, len,
+		ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, ipa, len,
 				      data_buf);
 	}
 
 	/* Now prepare kvm_run for the potential return to userland. */
 	run->mmio.is_write	= is_write;
-	run->mmio.phys_addr	= fault_ipa;
+	run->mmio.phys_addr	= ipa;
 	run->mmio.len		= len;
 
 	if (!ret) {
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index 3143f81..25d3d73 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -1098,7 +1098,8 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
 	return ret;
 }
 
-static bool transparent_hugepage_adjust(kvm_pfn_t *pfnp, phys_addr_t *ipap)
+static bool transparent_hugepage_adjust(kvm_pfn_t *pfnp, phys_addr_t *ipap,
+					phys_addr_t *fault_ipap)
 {
 	kvm_pfn_t pfn = *pfnp;
 	gfn_t gfn = *ipap >> PAGE_SHIFT;
@@ -1126,6 +1127,7 @@ static bool transparent_hugepage_adjust(kvm_pfn_t *pfnp, phys_addr_t *ipap)
 		mask = PTRS_PER_PMD - 1;
 		VM_BUG_ON((gfn & mask) != (pfn & mask));
 		if (pfn & mask) {
+			*fault_ipap &= PMD_MASK;
 			*ipap &= PMD_MASK;
 			kvm_release_pfn_clean(pfn);
 			pfn &= ~mask;
@@ -1337,13 +1339,15 @@ static void kvm_send_hwpoison_signal(unsigned long address,
 }
 
 static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
-			  struct kvm_memory_slot *memslot, unsigned long hva,
-			  unsigned long fault_status)
+			  struct kvm_s2_trans *nested,
+			  struct kvm_memory_slot *memslot,
+			  unsigned long hva, unsigned long fault_status)
 {
 	int ret;
 	bool write_fault, writable, hugetlb = false, force_pte = false;
 	unsigned long mmu_seq;
-	gfn_t gfn = fault_ipa >> PAGE_SHIFT;
+	phys_addr_t ipa = fault_ipa;
+	gfn_t gfn;
 	struct kvm *kvm = vcpu->kvm;
 	struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
 	struct vm_area_struct *vma;
@@ -1368,9 +1372,23 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 		return -EFAULT;
 	}
 
-	if (is_vm_hugetlb_page(vma) && !logging_active) {
+	if (kvm_is_shadow_s2_fault(vcpu)) {
+		ipa = nested->output;
+
+		/*
+		 * If we're about to create a shadow stage 2 entry, then we
+		 * can only create huge mappings if the guest hypervisor also
+		 * uses a huge mapping.
+		 */
+		if (nested->block_size != PMD_SIZE)
+			force_pte = true;
+	}
+	gfn = ipa >> PAGE_SHIFT;
+
+
+	if (!force_pte && is_vm_hugetlb_page(vma) && !logging_active) {
 		hugetlb = true;
-		gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT;
+		gfn = (ipa & PMD_MASK) >> PAGE_SHIFT;
 	} else {
 		/*
 		 * Pages belonging to memslots that don't have the same
@@ -1438,7 +1456,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 		goto out_unlock;
 
 	if (!hugetlb && !force_pte)
-		hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa);
+		hugetlb = transparent_hugepage_adjust(&pfn, &ipa, &fault_ipa);
 
 	if (hugetlb) {
 		pmd_t new_pmd = pfn_pmd(pfn, mem_type);
@@ -1525,8 +1543,10 @@ static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
 int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
 	unsigned long fault_status;
-	phys_addr_t fault_ipa;
+	phys_addr_t fault_ipa; /* The address we faulted on */
+	phys_addr_t ipa; /* Always the IPA in the L1 guest phys space */
 	struct kvm_memory_slot *memslot;
+	struct kvm_s2_trans nested_trans;
 	unsigned long hva;
 	bool is_iabt, write_fault, writable;
 	gfn_t gfn;
@@ -1538,7 +1558,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 		return 1;
 	}
 
-	fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
+	ipa = fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
 
 	trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_hsr(vcpu),
 			      kvm_vcpu_get_hfar(vcpu), fault_ipa);
@@ -1547,6 +1567,12 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	fault_status = kvm_vcpu_trap_get_fault_type(vcpu);
 	if (fault_status != FSC_FAULT && fault_status != FSC_PERM &&
 	    fault_status != FSC_ACCESS) {
+		/*
+		 * We must never see an address size fault on shadow stage 2
+		 * page table walk, because we would have injected an addr
+		 * size fault when we walked the nested s2 page and not
+		 * create the shadow entry.
+		 */
 		kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n",
 			kvm_vcpu_trap_get_class(vcpu),
 			(unsigned long)kvm_vcpu_trap_get_fault(vcpu),
@@ -1556,7 +1582,27 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 
 	idx = srcu_read_lock(&vcpu->kvm->srcu);
 
-	gfn = fault_ipa >> PAGE_SHIFT;
+	/*
+	 * We may have faulted on a shadow stage 2 page table if we are
+	 * running a nested guest.  In this case, we have to resovle the L2
+	 * IPA to the L1 IPA first, before knowing what kind of memory should
+	 * back the L1 IPA.
+	 *
+	 * If the shadow stage 2 page table walk faults, then we simply inject
+	 * this to the guest and carry on.
+	 */
+	if (kvm_is_shadow_s2_fault(vcpu)) {
+		nested_trans.esr = 0;
+		ret = kvm_walk_nested_s2(vcpu, fault_ipa, &nested_trans);
+		if (nested_trans.esr)
+			kvm_inject_s2_fault(vcpu, nested_trans.esr);
+		if (ret)
+			goto out_unlock;
+
+		ipa = nested_trans.output;
+	}
+
+	gfn = ipa >> PAGE_SHIFT;
 	memslot = gfn_to_memslot(vcpu->kvm, gfn);
 	hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable);
 	write_fault = kvm_is_write_fault(vcpu);
@@ -1590,13 +1636,13 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 		 * faulting VA. This is always 12 bits, irrespective
 		 * of the page size.
 		 */
-		fault_ipa |= kvm_vcpu_get_hfar(vcpu) & ((1 << 12) - 1);
-		ret = io_mem_abort(vcpu, run, fault_ipa);
+		ipa |= kvm_vcpu_get_hfar(vcpu) & ((1 << 12) - 1);
+		ret = io_mem_abort(vcpu, run, ipa);
 		goto out_unlock;
 	}
 
 	/* Userspace should not be able to register out-of-bounds IPAs */
-	VM_BUG_ON(fault_ipa >= KVM_PHYS_SIZE);
+	VM_BUG_ON(ipa >= KVM_PHYS_SIZE);
 
 	if (fault_status == FSC_ACCESS) {
 		handle_access_fault(vcpu, fault_ipa);
@@ -1604,7 +1650,8 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 		goto out_unlock;
 	}
 
-	ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status);
+	ret = user_mem_abort(vcpu, fault_ipa, &nested_trans,
+			     memslot, hva, fault_status);
 	if (ret == 0)
 		ret = 1;
 out_unlock:
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 13/31] KVM: arm/arm64: Move kvm_is_write_fault to header file
  2017-10-03  3:10 ` Jintack Lim
  (?)
@ 2017-10-03  3:10   ` Jintack Lim
  -1 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:10 UTC (permalink / raw)
  To: christoffer.dall, marc.zyngier, kvmarm
  Cc: jintack, pbonzini, rkrcmar, catalin.marinas, will.deacon, linux,
	mark.rutland, linux-arm-kernel, kvm, linux-kernel

From: Christoffer Dall <christoffer.dall@linaro.org>

Move this little function to the header files for arm/arm64 so other
code can make use of it directly.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_emulate.h   | 8 ++++++++
 arch/arm64/include/asm/kvm_emulate.h | 8 ++++++++
 virt/kvm/arm/mmu.c                   | 8 --------
 3 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index 8136464..9b745d9 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -223,6 +223,14 @@ static inline u8 kvm_vcpu_trap_get_fault_type(struct kvm_vcpu *vcpu)
 	return kvm_vcpu_get_hsr(vcpu) & HSR_FSC_TYPE;
 }
 
+static inline bool kvm_is_write_fault(struct kvm_vcpu *vcpu)
+{
+	if (kvm_vcpu_trap_is_iabt(vcpu))
+		return false;
+
+	return kvm_vcpu_dabt_iswrite(vcpu);
+}
+
 static inline u32 kvm_vcpu_hvc_get_imm(struct kvm_vcpu *vcpu)
 {
 	return kvm_vcpu_get_hsr(vcpu) & HSR_HVC_IMM_MASK;
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index c66554b..4c47bc7 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -307,6 +307,14 @@ static inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu)
 	return (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT;
 }
 
+static inline bool kvm_is_write_fault(struct kvm_vcpu *vcpu)
+{
+	if (kvm_vcpu_trap_is_iabt(vcpu))
+		return false;
+
+	return kvm_vcpu_dabt_iswrite(vcpu);
+}
+
 static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu)
 {
 	return vcpu_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK;
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index 25d3d73..74941ad 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -1141,14 +1141,6 @@ static bool transparent_hugepage_adjust(kvm_pfn_t *pfnp, phys_addr_t *ipap,
 	return false;
 }
 
-static bool kvm_is_write_fault(struct kvm_vcpu *vcpu)
-{
-	if (kvm_vcpu_trap_is_iabt(vcpu))
-		return false;
-
-	return kvm_vcpu_dabt_iswrite(vcpu);
-}
-
 /**
  * stage2_wp_ptes - write protect PMD range
  * @pmd:	pointer to pmd entry
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 13/31] KVM: arm/arm64: Move kvm_is_write_fault to header file
@ 2017-10-03  3:10   ` Jintack Lim
  0 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:10 UTC (permalink / raw)
  To: christoffer.dall, marc.zyngier, kvmarm
  Cc: kvm, catalin.marinas, will.deacon, linux, linux-kernel, pbonzini,
	linux-arm-kernel

From: Christoffer Dall <christoffer.dall@linaro.org>

Move this little function to the header files for arm/arm64 so other
code can make use of it directly.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_emulate.h   | 8 ++++++++
 arch/arm64/include/asm/kvm_emulate.h | 8 ++++++++
 virt/kvm/arm/mmu.c                   | 8 --------
 3 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index 8136464..9b745d9 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -223,6 +223,14 @@ static inline u8 kvm_vcpu_trap_get_fault_type(struct kvm_vcpu *vcpu)
 	return kvm_vcpu_get_hsr(vcpu) & HSR_FSC_TYPE;
 }
 
+static inline bool kvm_is_write_fault(struct kvm_vcpu *vcpu)
+{
+	if (kvm_vcpu_trap_is_iabt(vcpu))
+		return false;
+
+	return kvm_vcpu_dabt_iswrite(vcpu);
+}
+
 static inline u32 kvm_vcpu_hvc_get_imm(struct kvm_vcpu *vcpu)
 {
 	return kvm_vcpu_get_hsr(vcpu) & HSR_HVC_IMM_MASK;
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index c66554b..4c47bc7 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -307,6 +307,14 @@ static inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu)
 	return (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT;
 }
 
+static inline bool kvm_is_write_fault(struct kvm_vcpu *vcpu)
+{
+	if (kvm_vcpu_trap_is_iabt(vcpu))
+		return false;
+
+	return kvm_vcpu_dabt_iswrite(vcpu);
+}
+
 static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu)
 {
 	return vcpu_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK;
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index 25d3d73..74941ad 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -1141,14 +1141,6 @@ static bool transparent_hugepage_adjust(kvm_pfn_t *pfnp, phys_addr_t *ipap,
 	return false;
 }
 
-static bool kvm_is_write_fault(struct kvm_vcpu *vcpu)
-{
-	if (kvm_vcpu_trap_is_iabt(vcpu))
-		return false;
-
-	return kvm_vcpu_dabt_iswrite(vcpu);
-}
-
 /**
  * stage2_wp_ptes - write protect PMD range
  * @pmd:	pointer to pmd entry
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 13/31] KVM: arm/arm64: Move kvm_is_write_fault to header file
@ 2017-10-03  3:10   ` Jintack Lim
  0 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:10 UTC (permalink / raw)
  To: linux-arm-kernel

From: Christoffer Dall <christoffer.dall@linaro.org>

Move this little function to the header files for arm/arm64 so other
code can make use of it directly.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_emulate.h   | 8 ++++++++
 arch/arm64/include/asm/kvm_emulate.h | 8 ++++++++
 virt/kvm/arm/mmu.c                   | 8 --------
 3 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index 8136464..9b745d9 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -223,6 +223,14 @@ static inline u8 kvm_vcpu_trap_get_fault_type(struct kvm_vcpu *vcpu)
 	return kvm_vcpu_get_hsr(vcpu) & HSR_FSC_TYPE;
 }
 
+static inline bool kvm_is_write_fault(struct kvm_vcpu *vcpu)
+{
+	if (kvm_vcpu_trap_is_iabt(vcpu))
+		return false;
+
+	return kvm_vcpu_dabt_iswrite(vcpu);
+}
+
 static inline u32 kvm_vcpu_hvc_get_imm(struct kvm_vcpu *vcpu)
 {
 	return kvm_vcpu_get_hsr(vcpu) & HSR_HVC_IMM_MASK;
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index c66554b..4c47bc7 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -307,6 +307,14 @@ static inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu)
 	return (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT;
 }
 
+static inline bool kvm_is_write_fault(struct kvm_vcpu *vcpu)
+{
+	if (kvm_vcpu_trap_is_iabt(vcpu))
+		return false;
+
+	return kvm_vcpu_dabt_iswrite(vcpu);
+}
+
 static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu)
 {
 	return vcpu_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK;
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index 25d3d73..74941ad 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -1141,14 +1141,6 @@ static bool transparent_hugepage_adjust(kvm_pfn_t *pfnp, phys_addr_t *ipap,
 	return false;
 }
 
-static bool kvm_is_write_fault(struct kvm_vcpu *vcpu)
-{
-	if (kvm_vcpu_trap_is_iabt(vcpu))
-		return false;
-
-	return kvm_vcpu_dabt_iswrite(vcpu);
-}
-
 /**
  * stage2_wp_ptes - write protect PMD range
  * @pmd:	pointer to pmd entry
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 14/31] KVM: arm/arm64: Forward the guest hypervisor's stage 2 permission faults
  2017-10-03  3:10 ` Jintack Lim
  (?)
@ 2017-10-03  3:10   ` Jintack Lim
  -1 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:10 UTC (permalink / raw)
  To: christoffer.dall, marc.zyngier, kvmarm
  Cc: jintack, pbonzini, rkrcmar, catalin.marinas, will.deacon, linux,
	mark.rutland, linux-arm-kernel, kvm, linux-kernel, Jintack Lim

From: Christoffer Dall <christoffer.dall@linaro.org>

We can have discrepancies between the nested stage 2 page table and the
shadow one in a couple of cases.  For example, the guest hypervisor can
mark a page writable but the host hypervisor maps the page read-only in
the shadow page table, if using something like KSM on the host level.
In this case, a write fault is handled directly by the host hypervisor.
But we could also simply have a read-only page mapped read-only in both
tables, in which case the host hypervisor cannot do anything else than
telling the guest hypervisor about the fault.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---
 arch/arm/include/asm/kvm_mmu.h   |  7 +++++++
 arch/arm64/include/asm/kvm_mmu.h |  2 ++
 arch/arm64/kvm/mmu-nested.c      | 22 ++++++++++++++++++++++
 virt/kvm/arm/mmu.c               |  7 +++++++
 4 files changed, 38 insertions(+)

diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 6a22846..1c5b652 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -237,6 +237,13 @@ static inline int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa,
 	return 0;
 }
 
+static inline int kvm_s2_handle_perm_fault(struct kvm_vcpu *vcpu,
+					   phys_addr_t fault_ipa,
+					   struct kvm_s2_trans *trans)
+{
+	return 0;
+}
+
 static inline void kvm_nested_s2_unmap(struct kvm_vcpu *vcpu) { }
 static inline void kvm_nested_s2_free(struct kvm *kvm) { }
 static inline void kvm_nested_s2_wp(struct kvm *kvm) { }
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 425e4a2..239bb89 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -337,6 +337,8 @@ struct kvm_s2_trans {
 void update_nested_s2_mmu(struct kvm_vcpu *vcpu);
 int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa,
 		       struct kvm_s2_trans *result);
+int kvm_s2_handle_perm_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
+			     struct kvm_s2_trans *trans);
 void kvm_nested_s2_unmap(struct kvm_vcpu *vcpu);
 void kvm_nested_s2_free(struct kvm *kvm);
 void kvm_nested_s2_wp(struct kvm *kvm);
diff --git a/arch/arm64/kvm/mmu-nested.c b/arch/arm64/kvm/mmu-nested.c
index 75570cc..a440d7b 100644
--- a/arch/arm64/kvm/mmu-nested.c
+++ b/arch/arm64/kvm/mmu-nested.c
@@ -271,6 +271,28 @@ int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa,
 	return walk_nested_s2_pgd(vcpu, gipa, &wi, result);
 }
 
+/*
+ * Returns non-zero if permission fault is handled by injecting it to the next
+ * level hypervisor.
+ */
+int kvm_s2_handle_perm_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
+			     struct kvm_s2_trans *trans)
+{
+	unsigned long fault_status = kvm_vcpu_trap_get_fault_type(vcpu);
+	bool write_fault = kvm_is_write_fault(vcpu);
+
+	if (fault_status != FSC_PERM)
+		return 0;
+
+	if ((write_fault && !trans->writable) ||
+	    (!write_fault && !trans->readable)) {
+		trans->esr = esr_s2_fault(vcpu, trans->level, ESR_ELx_FSC_PERM);
+		return 1;
+	}
+
+	return 0;
+}
+
 /* expects kvm->mmu_lock to be held */
 void kvm_nested_s2_wp(struct kvm *kvm)
 {
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index 74941ad..4fb7b3b 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -1591,6 +1591,13 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 		if (ret)
 			goto out_unlock;
 
+		nested_trans.esr = 0;
+		ret = kvm_s2_handle_perm_fault(vcpu, fault_ipa, &nested_trans);
+		if (nested_trans.esr)
+			kvm_inject_s2_fault(vcpu, nested_trans.esr);
+		if (ret)
+			goto out_unlock;
+
 		ipa = nested_trans.output;
 	}
 
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 14/31] KVM: arm/arm64: Forward the guest hypervisor's stage 2 permission faults
@ 2017-10-03  3:10   ` Jintack Lim
  0 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:10 UTC (permalink / raw)
  To: christoffer.dall, marc.zyngier, kvmarm
  Cc: kvm, catalin.marinas, will.deacon, linux, linux-kernel, pbonzini,
	linux-arm-kernel

From: Christoffer Dall <christoffer.dall@linaro.org>

We can have discrepancies between the nested stage 2 page table and the
shadow one in a couple of cases.  For example, the guest hypervisor can
mark a page writable but the host hypervisor maps the page read-only in
the shadow page table, if using something like KSM on the host level.
In this case, a write fault is handled directly by the host hypervisor.
But we could also simply have a read-only page mapped read-only in both
tables, in which case the host hypervisor cannot do anything else than
telling the guest hypervisor about the fault.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---
 arch/arm/include/asm/kvm_mmu.h   |  7 +++++++
 arch/arm64/include/asm/kvm_mmu.h |  2 ++
 arch/arm64/kvm/mmu-nested.c      | 22 ++++++++++++++++++++++
 virt/kvm/arm/mmu.c               |  7 +++++++
 4 files changed, 38 insertions(+)

diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 6a22846..1c5b652 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -237,6 +237,13 @@ static inline int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa,
 	return 0;
 }
 
+static inline int kvm_s2_handle_perm_fault(struct kvm_vcpu *vcpu,
+					   phys_addr_t fault_ipa,
+					   struct kvm_s2_trans *trans)
+{
+	return 0;
+}
+
 static inline void kvm_nested_s2_unmap(struct kvm_vcpu *vcpu) { }
 static inline void kvm_nested_s2_free(struct kvm *kvm) { }
 static inline void kvm_nested_s2_wp(struct kvm *kvm) { }
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 425e4a2..239bb89 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -337,6 +337,8 @@ struct kvm_s2_trans {
 void update_nested_s2_mmu(struct kvm_vcpu *vcpu);
 int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa,
 		       struct kvm_s2_trans *result);
+int kvm_s2_handle_perm_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
+			     struct kvm_s2_trans *trans);
 void kvm_nested_s2_unmap(struct kvm_vcpu *vcpu);
 void kvm_nested_s2_free(struct kvm *kvm);
 void kvm_nested_s2_wp(struct kvm *kvm);
diff --git a/arch/arm64/kvm/mmu-nested.c b/arch/arm64/kvm/mmu-nested.c
index 75570cc..a440d7b 100644
--- a/arch/arm64/kvm/mmu-nested.c
+++ b/arch/arm64/kvm/mmu-nested.c
@@ -271,6 +271,28 @@ int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa,
 	return walk_nested_s2_pgd(vcpu, gipa, &wi, result);
 }
 
+/*
+ * Returns non-zero if permission fault is handled by injecting it to the next
+ * level hypervisor.
+ */
+int kvm_s2_handle_perm_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
+			     struct kvm_s2_trans *trans)
+{
+	unsigned long fault_status = kvm_vcpu_trap_get_fault_type(vcpu);
+	bool write_fault = kvm_is_write_fault(vcpu);
+
+	if (fault_status != FSC_PERM)
+		return 0;
+
+	if ((write_fault && !trans->writable) ||
+	    (!write_fault && !trans->readable)) {
+		trans->esr = esr_s2_fault(vcpu, trans->level, ESR_ELx_FSC_PERM);
+		return 1;
+	}
+
+	return 0;
+}
+
 /* expects kvm->mmu_lock to be held */
 void kvm_nested_s2_wp(struct kvm *kvm)
 {
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index 74941ad..4fb7b3b 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -1591,6 +1591,13 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 		if (ret)
 			goto out_unlock;
 
+		nested_trans.esr = 0;
+		ret = kvm_s2_handle_perm_fault(vcpu, fault_ipa, &nested_trans);
+		if (nested_trans.esr)
+			kvm_inject_s2_fault(vcpu, nested_trans.esr);
+		if (ret)
+			goto out_unlock;
+
 		ipa = nested_trans.output;
 	}
 
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 14/31] KVM: arm/arm64: Forward the guest hypervisor's stage 2 permission faults
@ 2017-10-03  3:10   ` Jintack Lim
  0 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:10 UTC (permalink / raw)
  To: linux-arm-kernel

From: Christoffer Dall <christoffer.dall@linaro.org>

We can have discrepancies between the nested stage 2 page table and the
shadow one in a couple of cases.  For example, the guest hypervisor can
mark a page writable but the host hypervisor maps the page read-only in
the shadow page table, if using something like KSM on the host level.
In this case, a write fault is handled directly by the host hypervisor.
But we could also simply have a read-only page mapped read-only in both
tables, in which case the host hypervisor cannot do anything else than
telling the guest hypervisor about the fault.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---
 arch/arm/include/asm/kvm_mmu.h   |  7 +++++++
 arch/arm64/include/asm/kvm_mmu.h |  2 ++
 arch/arm64/kvm/mmu-nested.c      | 22 ++++++++++++++++++++++
 virt/kvm/arm/mmu.c               |  7 +++++++
 4 files changed, 38 insertions(+)

diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 6a22846..1c5b652 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -237,6 +237,13 @@ static inline int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa,
 	return 0;
 }
 
+static inline int kvm_s2_handle_perm_fault(struct kvm_vcpu *vcpu,
+					   phys_addr_t fault_ipa,
+					   struct kvm_s2_trans *trans)
+{
+	return 0;
+}
+
 static inline void kvm_nested_s2_unmap(struct kvm_vcpu *vcpu) { }
 static inline void kvm_nested_s2_free(struct kvm *kvm) { }
 static inline void kvm_nested_s2_wp(struct kvm *kvm) { }
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 425e4a2..239bb89 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -337,6 +337,8 @@ struct kvm_s2_trans {
 void update_nested_s2_mmu(struct kvm_vcpu *vcpu);
 int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa,
 		       struct kvm_s2_trans *result);
+int kvm_s2_handle_perm_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
+			     struct kvm_s2_trans *trans);
 void kvm_nested_s2_unmap(struct kvm_vcpu *vcpu);
 void kvm_nested_s2_free(struct kvm *kvm);
 void kvm_nested_s2_wp(struct kvm *kvm);
diff --git a/arch/arm64/kvm/mmu-nested.c b/arch/arm64/kvm/mmu-nested.c
index 75570cc..a440d7b 100644
--- a/arch/arm64/kvm/mmu-nested.c
+++ b/arch/arm64/kvm/mmu-nested.c
@@ -271,6 +271,28 @@ int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa,
 	return walk_nested_s2_pgd(vcpu, gipa, &wi, result);
 }
 
+/*
+ * Returns non-zero if permission fault is handled by injecting it to the next
+ * level hypervisor.
+ */
+int kvm_s2_handle_perm_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
+			     struct kvm_s2_trans *trans)
+{
+	unsigned long fault_status = kvm_vcpu_trap_get_fault_type(vcpu);
+	bool write_fault = kvm_is_write_fault(vcpu);
+
+	if (fault_status != FSC_PERM)
+		return 0;
+
+	if ((write_fault && !trans->writable) ||
+	    (!write_fault && !trans->readable)) {
+		trans->esr = esr_s2_fault(vcpu, trans->level, ESR_ELx_FSC_PERM);
+		return 1;
+	}
+
+	return 0;
+}
+
 /* expects kvm->mmu_lock to be held */
 void kvm_nested_s2_wp(struct kvm *kvm)
 {
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index 74941ad..4fb7b3b 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -1591,6 +1591,13 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 		if (ret)
 			goto out_unlock;
 
+		nested_trans.esr = 0;
+		ret = kvm_s2_handle_perm_fault(vcpu, fault_ipa, &nested_trans);
+		if (nested_trans.esr)
+			kvm_inject_s2_fault(vcpu, nested_trans.esr);
+		if (ret)
+			goto out_unlock;
+
 		ipa = nested_trans.output;
 	}
 
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 15/31] KVM: arm64: Move system register helper functions around
  2017-10-03  3:10 ` Jintack Lim
  (?)
@ 2017-10-03  3:10   ` Jintack Lim
  -1 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:10 UTC (permalink / raw)
  To: christoffer.dall, marc.zyngier, kvmarm
  Cc: jintack, pbonzini, rkrcmar, catalin.marinas, will.deacon, linux,
	mark.rutland, linux-arm-kernel, kvm, linux-kernel, Jintack Lim

From: Jintack Lim <jintack@cs.columbia.edu>

We are about to add a framework to handle system instruction traps. To
reuse existing helper functions, let's move them around.

No functional change.

Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---
 arch/arm64/kvm/sys_regs.c | 89 ++++++++++++++++++++++++-----------------------
 1 file changed, 45 insertions(+), 44 deletions(-)

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 395b964..541bb97 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1621,6 +1621,51 @@ static bool access_id_aa64mmfr0_el1(struct kvm_vcpu *v,
 	{ SYS_DESC(SYS_SP_EL2), NULL, reset_special, SP_EL2, 0},
 };
 
+#define reg_to_match_value(x)						\
+	({								\
+		unsigned long val;					\
+		val  = (x)->Op0 << 14;					\
+		val |= (x)->Op1 << 11;					\
+		val |= (x)->CRn << 7;					\
+		val |= (x)->CRm << 3;					\
+		val |= (x)->Op2;					\
+		val;							\
+	 })
+
+static int match_sys_reg(const void *key, const void *elt)
+{
+	const unsigned long pval = (unsigned long)key;
+	const struct sys_reg_desc *r = elt;
+
+	return pval - reg_to_match_value(r);
+}
+
+static const struct sys_reg_desc *find_reg(const struct sys_reg_params *params,
+					 const struct sys_reg_desc table[],
+					 unsigned int num)
+{
+	unsigned long pval = reg_to_match_value(params);
+
+	return bsearch((void *)pval, table, num, sizeof(table[0]),
+		       match_sys_reg);
+}
+
+static void perform_access(struct kvm_vcpu *vcpu,
+			   struct sys_reg_params *params,
+			   const struct sys_reg_desc *r)
+{
+	/*
+	 * Not having an accessor means that we have configured a trap
+	 * that we don't know how to handle. This certainly qualifies
+	 * as a gross bug that should be fixed right away.
+	 */
+	BUG_ON(!r->access);
+
+	/* Skip instruction if instructed so */
+	if (likely(r->access(vcpu, params, r)))
+		kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
+}
+
 static bool trap_dbgidr(struct kvm_vcpu *vcpu,
 			struct sys_reg_params *p,
 			const struct sys_reg_desc *r)
@@ -1968,56 +2013,12 @@ static const struct sys_reg_desc *get_target_table(unsigned target,
 	}
 }
 
-#define reg_to_match_value(x)						\
-	({								\
-		unsigned long val;					\
-		val  = (x)->Op0 << 14;					\
-		val |= (x)->Op1 << 11;					\
-		val |= (x)->CRn << 7;					\
-		val |= (x)->CRm << 3;					\
-		val |= (x)->Op2;					\
-		val;							\
-	 })
-
-static int match_sys_reg(const void *key, const void *elt)
-{
-	const unsigned long pval = (unsigned long)key;
-	const struct sys_reg_desc *r = elt;
-
-	return pval - reg_to_match_value(r);
-}
-
-static const struct sys_reg_desc *find_reg(const struct sys_reg_params *params,
-					 const struct sys_reg_desc table[],
-					 unsigned int num)
-{
-	unsigned long pval = reg_to_match_value(params);
-
-	return bsearch((void *)pval, table, num, sizeof(table[0]), match_sys_reg);
-}
-
 int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
 	kvm_inject_undefined(vcpu);
 	return 1;
 }
 
-static void perform_access(struct kvm_vcpu *vcpu,
-			   struct sys_reg_params *params,
-			   const struct sys_reg_desc *r)
-{
-	/*
-	 * Not having an accessor means that we have configured a trap
-	 * that we don't know how to handle. This certainly qualifies
-	 * as a gross bug that should be fixed right away.
-	 */
-	BUG_ON(!r->access);
-
-	/* Skip instruction if instructed so */
-	if (likely(r->access(vcpu, params, r)))
-		kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
-}
-
 /*
  * emulate_cp --  tries to match a sys_reg access in a handling table, and
  *                call the corresponding trap handler.
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 15/31] KVM: arm64: Move system register helper functions around
@ 2017-10-03  3:10   ` Jintack Lim
  0 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:10 UTC (permalink / raw)
  To: christoffer.dall, marc.zyngier, kvmarm
  Cc: kvm, catalin.marinas, will.deacon, linux, linux-kernel, pbonzini,
	linux-arm-kernel

From: Jintack Lim <jintack@cs.columbia.edu>

We are about to add a framework to handle system instruction traps. To
reuse existing helper functions, let's move them around.

No functional change.

Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---
 arch/arm64/kvm/sys_regs.c | 89 ++++++++++++++++++++++++-----------------------
 1 file changed, 45 insertions(+), 44 deletions(-)

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 395b964..541bb97 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1621,6 +1621,51 @@ static bool access_id_aa64mmfr0_el1(struct kvm_vcpu *v,
 	{ SYS_DESC(SYS_SP_EL2), NULL, reset_special, SP_EL2, 0},
 };
 
+#define reg_to_match_value(x)						\
+	({								\
+		unsigned long val;					\
+		val  = (x)->Op0 << 14;					\
+		val |= (x)->Op1 << 11;					\
+		val |= (x)->CRn << 7;					\
+		val |= (x)->CRm << 3;					\
+		val |= (x)->Op2;					\
+		val;							\
+	 })
+
+static int match_sys_reg(const void *key, const void *elt)
+{
+	const unsigned long pval = (unsigned long)key;
+	const struct sys_reg_desc *r = elt;
+
+	return pval - reg_to_match_value(r);
+}
+
+static const struct sys_reg_desc *find_reg(const struct sys_reg_params *params,
+					 const struct sys_reg_desc table[],
+					 unsigned int num)
+{
+	unsigned long pval = reg_to_match_value(params);
+
+	return bsearch((void *)pval, table, num, sizeof(table[0]),
+		       match_sys_reg);
+}
+
+static void perform_access(struct kvm_vcpu *vcpu,
+			   struct sys_reg_params *params,
+			   const struct sys_reg_desc *r)
+{
+	/*
+	 * Not having an accessor means that we have configured a trap
+	 * that we don't know how to handle. This certainly qualifies
+	 * as a gross bug that should be fixed right away.
+	 */
+	BUG_ON(!r->access);
+
+	/* Skip instruction if instructed so */
+	if (likely(r->access(vcpu, params, r)))
+		kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
+}
+
 static bool trap_dbgidr(struct kvm_vcpu *vcpu,
 			struct sys_reg_params *p,
 			const struct sys_reg_desc *r)
@@ -1968,56 +2013,12 @@ static const struct sys_reg_desc *get_target_table(unsigned target,
 	}
 }
 
-#define reg_to_match_value(x)						\
-	({								\
-		unsigned long val;					\
-		val  = (x)->Op0 << 14;					\
-		val |= (x)->Op1 << 11;					\
-		val |= (x)->CRn << 7;					\
-		val |= (x)->CRm << 3;					\
-		val |= (x)->Op2;					\
-		val;							\
-	 })
-
-static int match_sys_reg(const void *key, const void *elt)
-{
-	const unsigned long pval = (unsigned long)key;
-	const struct sys_reg_desc *r = elt;
-
-	return pval - reg_to_match_value(r);
-}
-
-static const struct sys_reg_desc *find_reg(const struct sys_reg_params *params,
-					 const struct sys_reg_desc table[],
-					 unsigned int num)
-{
-	unsigned long pval = reg_to_match_value(params);
-
-	return bsearch((void *)pval, table, num, sizeof(table[0]), match_sys_reg);
-}
-
 int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
 	kvm_inject_undefined(vcpu);
 	return 1;
 }
 
-static void perform_access(struct kvm_vcpu *vcpu,
-			   struct sys_reg_params *params,
-			   const struct sys_reg_desc *r)
-{
-	/*
-	 * Not having an accessor means that we have configured a trap
-	 * that we don't know how to handle. This certainly qualifies
-	 * as a gross bug that should be fixed right away.
-	 */
-	BUG_ON(!r->access);
-
-	/* Skip instruction if instructed so */
-	if (likely(r->access(vcpu, params, r)))
-		kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
-}
-
 /*
  * emulate_cp --  tries to match a sys_reg access in a handling table, and
  *                call the corresponding trap handler.
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 15/31] KVM: arm64: Move system register helper functions around
@ 2017-10-03  3:10   ` Jintack Lim
  0 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:10 UTC (permalink / raw)
  To: linux-arm-kernel

From: Jintack Lim <jintack@cs.columbia.edu>

We are about to add a framework to handle system instruction traps. To
reuse existing helper functions, let's move them around.

No functional change.

Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---
 arch/arm64/kvm/sys_regs.c | 89 ++++++++++++++++++++++++-----------------------
 1 file changed, 45 insertions(+), 44 deletions(-)

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 395b964..541bb97 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1621,6 +1621,51 @@ static bool access_id_aa64mmfr0_el1(struct kvm_vcpu *v,
 	{ SYS_DESC(SYS_SP_EL2), NULL, reset_special, SP_EL2, 0},
 };
 
+#define reg_to_match_value(x)						\
+	({								\
+		unsigned long val;					\
+		val  = (x)->Op0 << 14;					\
+		val |= (x)->Op1 << 11;					\
+		val |= (x)->CRn << 7;					\
+		val |= (x)->CRm << 3;					\
+		val |= (x)->Op2;					\
+		val;							\
+	 })
+
+static int match_sys_reg(const void *key, const void *elt)
+{
+	const unsigned long pval = (unsigned long)key;
+	const struct sys_reg_desc *r = elt;
+
+	return pval - reg_to_match_value(r);
+}
+
+static const struct sys_reg_desc *find_reg(const struct sys_reg_params *params,
+					 const struct sys_reg_desc table[],
+					 unsigned int num)
+{
+	unsigned long pval = reg_to_match_value(params);
+
+	return bsearch((void *)pval, table, num, sizeof(table[0]),
+		       match_sys_reg);
+}
+
+static void perform_access(struct kvm_vcpu *vcpu,
+			   struct sys_reg_params *params,
+			   const struct sys_reg_desc *r)
+{
+	/*
+	 * Not having an accessor means that we have configured a trap
+	 * that we don't know how to handle. This certainly qualifies
+	 * as a gross bug that should be fixed right away.
+	 */
+	BUG_ON(!r->access);
+
+	/* Skip instruction if instructed so */
+	if (likely(r->access(vcpu, params, r)))
+		kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
+}
+
 static bool trap_dbgidr(struct kvm_vcpu *vcpu,
 			struct sys_reg_params *p,
 			const struct sys_reg_desc *r)
@@ -1968,56 +2013,12 @@ static const struct sys_reg_desc *get_target_table(unsigned target,
 	}
 }
 
-#define reg_to_match_value(x)						\
-	({								\
-		unsigned long val;					\
-		val  = (x)->Op0 << 14;					\
-		val |= (x)->Op1 << 11;					\
-		val |= (x)->CRn << 7;					\
-		val |= (x)->CRm << 3;					\
-		val |= (x)->Op2;					\
-		val;							\
-	 })
-
-static int match_sys_reg(const void *key, const void *elt)
-{
-	const unsigned long pval = (unsigned long)key;
-	const struct sys_reg_desc *r = elt;
-
-	return pval - reg_to_match_value(r);
-}
-
-static const struct sys_reg_desc *find_reg(const struct sys_reg_params *params,
-					 const struct sys_reg_desc table[],
-					 unsigned int num)
-{
-	unsigned long pval = reg_to_match_value(params);
-
-	return bsearch((void *)pval, table, num, sizeof(table[0]), match_sys_reg);
-}
-
 int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
 	kvm_inject_undefined(vcpu);
 	return 1;
 }
 
-static void perform_access(struct kvm_vcpu *vcpu,
-			   struct sys_reg_params *params,
-			   const struct sys_reg_desc *r)
-{
-	/*
-	 * Not having an accessor means that we have configured a trap
-	 * that we don't know how to handle. This certainly qualifies
-	 * as a gross bug that should be fixed right away.
-	 */
-	BUG_ON(!r->access);
-
-	/* Skip instruction if instructed so */
-	if (likely(r->access(vcpu, params, r)))
-		kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
-}
-
 /*
  * emulate_cp --  tries to match a sys_reg access in a handling table, and
  *                call the corresponding trap handler.
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 16/31] KVM: arm64: Introduce sys_reg_desc.forward_trap
  2017-10-03  3:10 ` Jintack Lim
  (?)
@ 2017-10-03  3:10   ` Jintack Lim
  -1 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:10 UTC (permalink / raw)
  To: christoffer.dall, marc.zyngier, kvmarm
  Cc: jintack, pbonzini, rkrcmar, catalin.marinas, will.deacon, linux,
	mark.rutland, linux-arm-kernel, kvm, linux-kernel, Jintack Lim

This introduces a function prototype to determine if we need to forward
system instruction traps to the virtual EL2. The implementation of
forward_trap functions for each system instruction will be added in
later patches.

Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---
 arch/arm64/kvm/sys_regs.c | 8 ++++++++
 arch/arm64/kvm/sys_regs.h | 6 ++++++
 2 files changed, 14 insertions(+)

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 541bb97..88ce172 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1661,6 +1661,14 @@ static void perform_access(struct kvm_vcpu *vcpu,
 	 */
 	BUG_ON(!r->access);
 
+	/*
+	 * Forward this trap to the virtual EL2 if the guest hypervisor has
+	 * configured to trap the current instruction.
+	 */
+	if (nested_virt_in_use(vcpu) && r->forward_trap
+	    && unlikely(r->forward_trap(vcpu)))
+		return;
+
 	/* Skip instruction if instructed so */
 	if (likely(r->access(vcpu, params, r)))
 		kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h
index 827717b..6dd4008 100644
--- a/arch/arm64/kvm/sys_regs.h
+++ b/arch/arm64/kvm/sys_regs.h
@@ -61,6 +61,12 @@ struct sys_reg_desc {
 			const struct kvm_one_reg *reg, void __user *uaddr);
 	int (*set_user)(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
 			const struct kvm_one_reg *reg, void __user *uaddr);
+
+	/*
+	 * Forward the trap to the virtual EL2 if the guest hypervisor has
+	 * configured to trap the current instruction.
+	 */
+	bool (*forward_trap)(struct kvm_vcpu *vcpu);
 };
 
 static inline void print_sys_reg_instr(const struct sys_reg_params *p)
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 16/31] KVM: arm64: Introduce sys_reg_desc.forward_trap
@ 2017-10-03  3:10   ` Jintack Lim
  0 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:10 UTC (permalink / raw)
  To: christoffer.dall, marc.zyngier, kvmarm
  Cc: kvm, catalin.marinas, will.deacon, linux, linux-kernel, pbonzini,
	linux-arm-kernel

This introduces a function prototype to determine if we need to forward
system instruction traps to the virtual EL2. The implementation of
forward_trap functions for each system instruction will be added in
later patches.

Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---
 arch/arm64/kvm/sys_regs.c | 8 ++++++++
 arch/arm64/kvm/sys_regs.h | 6 ++++++
 2 files changed, 14 insertions(+)

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 541bb97..88ce172 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1661,6 +1661,14 @@ static void perform_access(struct kvm_vcpu *vcpu,
 	 */
 	BUG_ON(!r->access);
 
+	/*
+	 * Forward this trap to the virtual EL2 if the guest hypervisor has
+	 * configured to trap the current instruction.
+	 */
+	if (nested_virt_in_use(vcpu) && r->forward_trap
+	    && unlikely(r->forward_trap(vcpu)))
+		return;
+
 	/* Skip instruction if instructed so */
 	if (likely(r->access(vcpu, params, r)))
 		kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h
index 827717b..6dd4008 100644
--- a/arch/arm64/kvm/sys_regs.h
+++ b/arch/arm64/kvm/sys_regs.h
@@ -61,6 +61,12 @@ struct sys_reg_desc {
 			const struct kvm_one_reg *reg, void __user *uaddr);
 	int (*set_user)(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
 			const struct kvm_one_reg *reg, void __user *uaddr);
+
+	/*
+	 * Forward the trap to the virtual EL2 if the guest hypervisor has
+	 * configured to trap the current instruction.
+	 */
+	bool (*forward_trap)(struct kvm_vcpu *vcpu);
 };
 
 static inline void print_sys_reg_instr(const struct sys_reg_params *p)
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 16/31] KVM: arm64: Introduce sys_reg_desc.forward_trap
@ 2017-10-03  3:10   ` Jintack Lim
  0 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:10 UTC (permalink / raw)
  To: linux-arm-kernel

This introduces a function prototype to determine if we need to forward
system instruction traps to the virtual EL2. The implementation of
forward_trap functions for each system instruction will be added in
later patches.

Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---
 arch/arm64/kvm/sys_regs.c | 8 ++++++++
 arch/arm64/kvm/sys_regs.h | 6 ++++++
 2 files changed, 14 insertions(+)

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 541bb97..88ce172 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1661,6 +1661,14 @@ static void perform_access(struct kvm_vcpu *vcpu,
 	 */
 	BUG_ON(!r->access);
 
+	/*
+	 * Forward this trap to the virtual EL2 if the guest hypervisor has
+	 * configured to trap the current instruction.
+	 */
+	if (nested_virt_in_use(vcpu) && r->forward_trap
+	    && unlikely(r->forward_trap(vcpu)))
+		return;
+
 	/* Skip instruction if instructed so */
 	if (likely(r->access(vcpu, params, r)))
 		kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h
index 827717b..6dd4008 100644
--- a/arch/arm64/kvm/sys_regs.h
+++ b/arch/arm64/kvm/sys_regs.h
@@ -61,6 +61,12 @@ struct sys_reg_desc {
 			const struct kvm_one_reg *reg, void __user *uaddr);
 	int (*set_user)(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
 			const struct kvm_one_reg *reg, void __user *uaddr);
+
+	/*
+	 * Forward the trap to the virtual EL2 if the guest hypervisor has
+	 * configured to trap the current instruction.
+	 */
+	bool (*forward_trap)(struct kvm_vcpu *vcpu);
 };
 
 static inline void print_sys_reg_instr(const struct sys_reg_params *p)
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 17/31] KVM: arm64: Rework the system instruction emulation framework
  2017-10-03  3:10 ` Jintack Lim
  (?)
@ 2017-10-03  3:10   ` Jintack Lim
  -1 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:10 UTC (permalink / raw)
  To: christoffer.dall, marc.zyngier, kvmarm
  Cc: jintack, pbonzini, rkrcmar, catalin.marinas, will.deacon, linux,
	mark.rutland, linux-arm-kernel, kvm, linux-kernel, Jintack Lim

Rework the system instruction emulation framework to handle potentially
all system instruction traps other than MSR/MRS instructions. Those
system instructions would be AT and TLBI instructions controlled by
HCR_EL2.NV, AT, and TTLB bits.

Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---
 arch/arm64/kvm/sys_regs.c | 66 ++++++++++++++++++-----------------------------
 1 file changed, 25 insertions(+), 41 deletions(-)

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 88ce172..481bea64 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1621,6 +1621,11 @@ static bool access_id_aa64mmfr0_el1(struct kvm_vcpu *v,
 	{ SYS_DESC(SYS_SP_EL2), NULL, reset_special, SP_EL2, 0},
 };
 
+#define SYS_INSN_TO_DESC(insn, access_fn, forward_fn)	\
+	{ SYS_DESC((insn)), (access_fn), NULL, 0, 0, NULL, NULL, (forward_fn) }
+static struct sys_reg_desc sys_insn_descs[] = {
+};
+
 #define reg_to_match_value(x)						\
 	({								\
 		unsigned long val;					\
@@ -1674,6 +1679,25 @@ static void perform_access(struct kvm_vcpu *vcpu,
 		kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
 }
 
+static int emulate_sys_instr(struct kvm_vcpu *vcpu, struct sys_reg_params *p)
+{
+
+	const struct sys_reg_desc *r;
+
+	/* Search from the system instruction table. */
+	r = find_reg(p, sys_insn_descs, ARRAY_SIZE(sys_insn_descs));
+
+	if (likely(r)) {
+		perform_access(vcpu, p, r);
+	} else {
+		kvm_err("Unsupported guest sys instruction at: %lx\n",
+			*vcpu_pc(vcpu));
+		print_sys_reg_instr(p);
+		kvm_inject_undefined(vcpu);
+	}
+	return 1;
+}
+
 static bool trap_dbgidr(struct kvm_vcpu *vcpu,
 			struct sys_reg_params *p,
 			const struct sys_reg_desc *r)
@@ -2236,47 +2260,6 @@ static int emulate_sys_reg(struct kvm_vcpu *vcpu,
 	return 1;
 }
 
-static int emulate_tlbi(struct kvm_vcpu *vcpu,
-			     struct sys_reg_params *params)
-{
-	/* TODO: support tlbi instruction emulation*/
-	kvm_inject_undefined(vcpu);
-	return 1;
-}
-
-static int emulate_at(struct kvm_vcpu *vcpu,
-			     struct sys_reg_params *params)
-{
-	/* TODO: support address translation instruction emulation */
-	kvm_inject_undefined(vcpu);
-	return 1;
-}
-
-static int emulate_sys_instr(struct kvm_vcpu *vcpu,
-			     struct sys_reg_params *params)
-{
-	int ret = 0;
-
-	/*
-	 * Forward this trap to the virtual EL2 if the virtual HCR_EL2.NV
-	 * bit is set.
-	 */
-	if (forward_nv_traps(vcpu))
-		return kvm_inject_nested_sync(vcpu, kvm_vcpu_get_hsr(vcpu));
-
-	/* TLB maintenance instructions*/
-	if (params->CRn == 0b1000)
-		ret = emulate_tlbi(vcpu, params);
-	/* Address Translation instructions */
-	else if (params->CRn == 0b0111 && params->CRm == 0b1000)
-		ret = emulate_at(vcpu, params);
-
-	if (ret)
-		kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
-
-	return ret;
-}
-
 static void reset_sys_reg_descs(struct kvm_vcpu *vcpu,
 			      const struct sys_reg_desc *table, size_t num)
 {
@@ -2754,6 +2737,7 @@ void kvm_sys_reg_table_init(void)
 	BUG_ON(check_sysreg_table(cp15_regs, ARRAY_SIZE(cp15_regs)));
 	BUG_ON(check_sysreg_table(cp15_64_regs, ARRAY_SIZE(cp15_64_regs)));
 	BUG_ON(check_sysreg_table(invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs)));
+	BUG_ON(check_sysreg_table(sys_insn_descs, ARRAY_SIZE(sys_insn_descs)));
 
 	/* We abuse the reset function to overwrite the table itself. */
 	for (i = 0; i < ARRAY_SIZE(invariant_sys_regs); i++)
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 17/31] KVM: arm64: Rework the system instruction emulation framework
@ 2017-10-03  3:10   ` Jintack Lim
  0 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:10 UTC (permalink / raw)
  To: christoffer.dall, marc.zyngier, kvmarm
  Cc: kvm, catalin.marinas, will.deacon, linux, linux-kernel, pbonzini,
	linux-arm-kernel

Rework the system instruction emulation framework to handle potentially
all system instruction traps other than MSR/MRS instructions. Those
system instructions would be AT and TLBI instructions controlled by
HCR_EL2.NV, AT, and TTLB bits.

Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---
 arch/arm64/kvm/sys_regs.c | 66 ++++++++++++++++++-----------------------------
 1 file changed, 25 insertions(+), 41 deletions(-)

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 88ce172..481bea64 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1621,6 +1621,11 @@ static bool access_id_aa64mmfr0_el1(struct kvm_vcpu *v,
 	{ SYS_DESC(SYS_SP_EL2), NULL, reset_special, SP_EL2, 0},
 };
 
+#define SYS_INSN_TO_DESC(insn, access_fn, forward_fn)	\
+	{ SYS_DESC((insn)), (access_fn), NULL, 0, 0, NULL, NULL, (forward_fn) }
+static struct sys_reg_desc sys_insn_descs[] = {
+};
+
 #define reg_to_match_value(x)						\
 	({								\
 		unsigned long val;					\
@@ -1674,6 +1679,25 @@ static void perform_access(struct kvm_vcpu *vcpu,
 		kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
 }
 
+static int emulate_sys_instr(struct kvm_vcpu *vcpu, struct sys_reg_params *p)
+{
+
+	const struct sys_reg_desc *r;
+
+	/* Search from the system instruction table. */
+	r = find_reg(p, sys_insn_descs, ARRAY_SIZE(sys_insn_descs));
+
+	if (likely(r)) {
+		perform_access(vcpu, p, r);
+	} else {
+		kvm_err("Unsupported guest sys instruction at: %lx\n",
+			*vcpu_pc(vcpu));
+		print_sys_reg_instr(p);
+		kvm_inject_undefined(vcpu);
+	}
+	return 1;
+}
+
 static bool trap_dbgidr(struct kvm_vcpu *vcpu,
 			struct sys_reg_params *p,
 			const struct sys_reg_desc *r)
@@ -2236,47 +2260,6 @@ static int emulate_sys_reg(struct kvm_vcpu *vcpu,
 	return 1;
 }
 
-static int emulate_tlbi(struct kvm_vcpu *vcpu,
-			     struct sys_reg_params *params)
-{
-	/* TODO: support tlbi instruction emulation*/
-	kvm_inject_undefined(vcpu);
-	return 1;
-}
-
-static int emulate_at(struct kvm_vcpu *vcpu,
-			     struct sys_reg_params *params)
-{
-	/* TODO: support address translation instruction emulation */
-	kvm_inject_undefined(vcpu);
-	return 1;
-}
-
-static int emulate_sys_instr(struct kvm_vcpu *vcpu,
-			     struct sys_reg_params *params)
-{
-	int ret = 0;
-
-	/*
-	 * Forward this trap to the virtual EL2 if the virtual HCR_EL2.NV
-	 * bit is set.
-	 */
-	if (forward_nv_traps(vcpu))
-		return kvm_inject_nested_sync(vcpu, kvm_vcpu_get_hsr(vcpu));
-
-	/* TLB maintenance instructions*/
-	if (params->CRn == 0b1000)
-		ret = emulate_tlbi(vcpu, params);
-	/* Address Translation instructions */
-	else if (params->CRn == 0b0111 && params->CRm == 0b1000)
-		ret = emulate_at(vcpu, params);
-
-	if (ret)
-		kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
-
-	return ret;
-}
-
 static void reset_sys_reg_descs(struct kvm_vcpu *vcpu,
 			      const struct sys_reg_desc *table, size_t num)
 {
@@ -2754,6 +2737,7 @@ void kvm_sys_reg_table_init(void)
 	BUG_ON(check_sysreg_table(cp15_regs, ARRAY_SIZE(cp15_regs)));
 	BUG_ON(check_sysreg_table(cp15_64_regs, ARRAY_SIZE(cp15_64_regs)));
 	BUG_ON(check_sysreg_table(invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs)));
+	BUG_ON(check_sysreg_table(sys_insn_descs, ARRAY_SIZE(sys_insn_descs)));
 
 	/* We abuse the reset function to overwrite the table itself. */
 	for (i = 0; i < ARRAY_SIZE(invariant_sys_regs); i++)
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 17/31] KVM: arm64: Rework the system instruction emulation framework
@ 2017-10-03  3:10   ` Jintack Lim
  0 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:10 UTC (permalink / raw)
  To: linux-arm-kernel

Rework the system instruction emulation framework to handle potentially
all system instruction traps other than MSR/MRS instructions. Those
system instructions would be AT and TLBI instructions controlled by
HCR_EL2.NV, AT, and TTLB bits.

Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---
 arch/arm64/kvm/sys_regs.c | 66 ++++++++++++++++++-----------------------------
 1 file changed, 25 insertions(+), 41 deletions(-)

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 88ce172..481bea64 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1621,6 +1621,11 @@ static bool access_id_aa64mmfr0_el1(struct kvm_vcpu *v,
 	{ SYS_DESC(SYS_SP_EL2), NULL, reset_special, SP_EL2, 0},
 };
 
+#define SYS_INSN_TO_DESC(insn, access_fn, forward_fn)	\
+	{ SYS_DESC((insn)), (access_fn), NULL, 0, 0, NULL, NULL, (forward_fn) }
+static struct sys_reg_desc sys_insn_descs[] = {
+};
+
 #define reg_to_match_value(x)						\
 	({								\
 		unsigned long val;					\
@@ -1674,6 +1679,25 @@ static void perform_access(struct kvm_vcpu *vcpu,
 		kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
 }
 
+static int emulate_sys_instr(struct kvm_vcpu *vcpu, struct sys_reg_params *p)
+{
+
+	const struct sys_reg_desc *r;
+
+	/* Search from the system instruction table. */
+	r = find_reg(p, sys_insn_descs, ARRAY_SIZE(sys_insn_descs));
+
+	if (likely(r)) {
+		perform_access(vcpu, p, r);
+	} else {
+		kvm_err("Unsupported guest sys instruction at: %lx\n",
+			*vcpu_pc(vcpu));
+		print_sys_reg_instr(p);
+		kvm_inject_undefined(vcpu);
+	}
+	return 1;
+}
+
 static bool trap_dbgidr(struct kvm_vcpu *vcpu,
 			struct sys_reg_params *p,
 			const struct sys_reg_desc *r)
@@ -2236,47 +2260,6 @@ static int emulate_sys_reg(struct kvm_vcpu *vcpu,
 	return 1;
 }
 
-static int emulate_tlbi(struct kvm_vcpu *vcpu,
-			     struct sys_reg_params *params)
-{
-	/* TODO: support tlbi instruction emulation*/
-	kvm_inject_undefined(vcpu);
-	return 1;
-}
-
-static int emulate_at(struct kvm_vcpu *vcpu,
-			     struct sys_reg_params *params)
-{
-	/* TODO: support address translation instruction emulation */
-	kvm_inject_undefined(vcpu);
-	return 1;
-}
-
-static int emulate_sys_instr(struct kvm_vcpu *vcpu,
-			     struct sys_reg_params *params)
-{
-	int ret = 0;
-
-	/*
-	 * Forward this trap to the virtual EL2 if the virtual HCR_EL2.NV
-	 * bit is set.
-	 */
-	if (forward_nv_traps(vcpu))
-		return kvm_inject_nested_sync(vcpu, kvm_vcpu_get_hsr(vcpu));
-
-	/* TLB maintenance instructions*/
-	if (params->CRn == 0b1000)
-		ret = emulate_tlbi(vcpu, params);
-	/* Address Translation instructions */
-	else if (params->CRn == 0b0111 && params->CRm == 0b1000)
-		ret = emulate_at(vcpu, params);
-
-	if (ret)
-		kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
-
-	return ret;
-}
-
 static void reset_sys_reg_descs(struct kvm_vcpu *vcpu,
 			      const struct sys_reg_desc *table, size_t num)
 {
@@ -2754,6 +2737,7 @@ void kvm_sys_reg_table_init(void)
 	BUG_ON(check_sysreg_table(cp15_regs, ARRAY_SIZE(cp15_regs)));
 	BUG_ON(check_sysreg_table(cp15_64_regs, ARRAY_SIZE(cp15_64_regs)));
 	BUG_ON(check_sysreg_table(invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs)));
+	BUG_ON(check_sysreg_table(sys_insn_descs, ARRAY_SIZE(sys_insn_descs)));
 
 	/* We abuse the reset function to overwrite the table itself. */
 	for (i = 0; i < ARRAY_SIZE(invariant_sys_regs); i++)
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 18/31] KVM: arm64: Enumerate AT and TLBI instructions to emulate
  2017-10-03  3:10 ` Jintack Lim
  (?)
@ 2017-10-03  3:11   ` Jintack Lim
  -1 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:11 UTC (permalink / raw)
  To: christoffer.dall, marc.zyngier, kvmarm
  Cc: jintack, pbonzini, rkrcmar, catalin.marinas, will.deacon, linux,
	mark.rutland, linux-arm-kernel, kvm, linux-kernel, Jintack Lim

List all system instructions to emulate. This patch only introduces the
definitions, emulation handlers will be added in subsequent patches.

Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---
 arch/arm64/include/asm/sysreg.h | 38 ++++++++++++++++++++++++++++++++++++++
 arch/arm64/kvm/sys_regs.c       | 26 ++++++++++++++++++++++++++
 2 files changed, 64 insertions(+)

diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index a051d42..53df733 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -367,6 +367,44 @@
 
 #define SYS_SP_EL2			sys_reg(3, 6, 4, 1, 0)
 
+/* AT instructions */
+#define AT_Op0 1
+#define AT_CRn 7
+
+#define AT_S1E1R	sys_insn(AT_Op0, 0, AT_CRn, 8, 0)
+#define AT_S1E1W	sys_insn(AT_Op0, 0, AT_CRn, 8, 1)
+#define AT_S1E0R	sys_insn(AT_Op0, 0, AT_CRn, 8, 2)
+#define AT_S1E0W	sys_insn(AT_Op0, 0, AT_CRn, 8, 3)
+#define AT_S1E1RP	sys_insn(AT_Op0, 0, AT_CRn, 9, 0)
+#define AT_S1E1WP	sys_insn(AT_Op0, 0, AT_CRn, 9, 1)
+#define AT_S1E2R	sys_insn(AT_Op0, 4, AT_CRn, 8, 0)
+#define AT_S1E2W	sys_insn(AT_Op0, 4, AT_CRn, 8, 1)
+#define AT_S12E1R	sys_insn(AT_Op0, 4, AT_CRn, 8, 4)
+#define AT_S12E1W	sys_insn(AT_Op0, 4, AT_CRn, 8, 5)
+#define AT_S12E0R	sys_insn(AT_Op0, 4, AT_CRn, 8, 6)
+#define AT_S12E0W	sys_insn(AT_Op0, 4, AT_CRn, 8, 7)
+
+/* TLBI instructions */
+#define TLBI_Op0	1
+#define TLBI_Op1_EL2	4	/* Accessible from EL2 or higher */
+#define TLBI_CRn	8
+#define tlbi_insn_el2(CRm, Op2)	sys_insn(TLBI_Op0, TLBI_Op1_EL2, TLBI_CRn, (CRm), (Op2))
+
+#define TLBI_IPAS2E1IS	tlbi_insn_el2(0, 1)
+#define TLBI_IPAS2LE1IS	tlbi_insn_el2(0, 5)
+#define TLBI_ALLE2IS	tlbi_insn_el2(3, 0)
+#define TLBI_VAE2IS	tlbi_insn_el2(3, 1)
+#define TLBI_ALLE1IS	tlbi_insn_el2(3, 4)
+#define TLBI_VALE2IS	tlbi_insn_el2(3, 5)
+#define TLBI_VMALLS12E1IS	tlbi_insn_el2(3, 6)
+#define TLBI_IPAS2E1	tlbi_insn_el2(4, 1)
+#define TLBI_IPAS2LE1	tlbi_insn_el2(4, 5)
+#define TLBI_ALLE2	tlbi_insn_el2(7, 0)
+#define TLBI_VAE2	tlbi_insn_el2(7, 1)
+#define TLBI_ALLE1	tlbi_insn_el2(7, 4)
+#define TLBI_VALE2	tlbi_insn_el2(7, 5)
+#define TLBI_VMALLS12E1	tlbi_insn_el2(7, 6)
+
 /* Common SCTLR_ELx flags. */
 #define SCTLR_ELx_EE    (1 << 25)
 #define SCTLR_ELx_I	(1 << 12)
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 481bea64..8d04926 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1624,6 +1624,32 @@ static bool access_id_aa64mmfr0_el1(struct kvm_vcpu *v,
 #define SYS_INSN_TO_DESC(insn, access_fn, forward_fn)	\
 	{ SYS_DESC((insn)), (access_fn), NULL, 0, 0, NULL, NULL, (forward_fn) }
 static struct sys_reg_desc sys_insn_descs[] = {
+	SYS_INSN_TO_DESC(AT_S1E1R, NULL, NULL),
+	SYS_INSN_TO_DESC(AT_S1E1W, NULL, NULL),
+	SYS_INSN_TO_DESC(AT_S1E0R, NULL, NULL),
+	SYS_INSN_TO_DESC(AT_S1E0W, NULL, NULL),
+	SYS_INSN_TO_DESC(AT_S1E1RP, NULL, NULL),
+	SYS_INSN_TO_DESC(AT_S1E1WP, NULL, NULL),
+	SYS_INSN_TO_DESC(AT_S1E2R, NULL, NULL),
+	SYS_INSN_TO_DESC(AT_S1E2W, NULL, NULL),
+	SYS_INSN_TO_DESC(AT_S12E1R, NULL, NULL),
+	SYS_INSN_TO_DESC(AT_S12E1W, NULL, NULL),
+	SYS_INSN_TO_DESC(AT_S12E0R, NULL, NULL),
+	SYS_INSN_TO_DESC(AT_S12E0W, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_IPAS2E1IS, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_IPAS2LE1IS, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_ALLE2IS, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_VAE2IS, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_ALLE1IS, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_VALE2IS, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_VMALLS12E1IS, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_IPAS2E1, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_IPAS2LE1, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_ALLE2, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_VAE2, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_ALLE1, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_VALE2, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_VMALLS12E1, NULL, NULL),
 };
 
 #define reg_to_match_value(x)						\
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 18/31] KVM: arm64: Enumerate AT and TLBI instructions to emulate
@ 2017-10-03  3:11   ` Jintack Lim
  0 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:11 UTC (permalink / raw)
  To: christoffer.dall, marc.zyngier, kvmarm
  Cc: kvm, catalin.marinas, will.deacon, linux, linux-kernel, pbonzini,
	linux-arm-kernel

List all system instructions to emulate. This patch only introduces the
definitions, emulation handlers will be added in subsequent patches.

Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---
 arch/arm64/include/asm/sysreg.h | 38 ++++++++++++++++++++++++++++++++++++++
 arch/arm64/kvm/sys_regs.c       | 26 ++++++++++++++++++++++++++
 2 files changed, 64 insertions(+)

diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index a051d42..53df733 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -367,6 +367,44 @@
 
 #define SYS_SP_EL2			sys_reg(3, 6, 4, 1, 0)
 
+/* AT instructions */
+#define AT_Op0 1
+#define AT_CRn 7
+
+#define AT_S1E1R	sys_insn(AT_Op0, 0, AT_CRn, 8, 0)
+#define AT_S1E1W	sys_insn(AT_Op0, 0, AT_CRn, 8, 1)
+#define AT_S1E0R	sys_insn(AT_Op0, 0, AT_CRn, 8, 2)
+#define AT_S1E0W	sys_insn(AT_Op0, 0, AT_CRn, 8, 3)
+#define AT_S1E1RP	sys_insn(AT_Op0, 0, AT_CRn, 9, 0)
+#define AT_S1E1WP	sys_insn(AT_Op0, 0, AT_CRn, 9, 1)
+#define AT_S1E2R	sys_insn(AT_Op0, 4, AT_CRn, 8, 0)
+#define AT_S1E2W	sys_insn(AT_Op0, 4, AT_CRn, 8, 1)
+#define AT_S12E1R	sys_insn(AT_Op0, 4, AT_CRn, 8, 4)
+#define AT_S12E1W	sys_insn(AT_Op0, 4, AT_CRn, 8, 5)
+#define AT_S12E0R	sys_insn(AT_Op0, 4, AT_CRn, 8, 6)
+#define AT_S12E0W	sys_insn(AT_Op0, 4, AT_CRn, 8, 7)
+
+/* TLBI instructions */
+#define TLBI_Op0	1
+#define TLBI_Op1_EL2	4	/* Accessible from EL2 or higher */
+#define TLBI_CRn	8
+#define tlbi_insn_el2(CRm, Op2)	sys_insn(TLBI_Op0, TLBI_Op1_EL2, TLBI_CRn, (CRm), (Op2))
+
+#define TLBI_IPAS2E1IS	tlbi_insn_el2(0, 1)
+#define TLBI_IPAS2LE1IS	tlbi_insn_el2(0, 5)
+#define TLBI_ALLE2IS	tlbi_insn_el2(3, 0)
+#define TLBI_VAE2IS	tlbi_insn_el2(3, 1)
+#define TLBI_ALLE1IS	tlbi_insn_el2(3, 4)
+#define TLBI_VALE2IS	tlbi_insn_el2(3, 5)
+#define TLBI_VMALLS12E1IS	tlbi_insn_el2(3, 6)
+#define TLBI_IPAS2E1	tlbi_insn_el2(4, 1)
+#define TLBI_IPAS2LE1	tlbi_insn_el2(4, 5)
+#define TLBI_ALLE2	tlbi_insn_el2(7, 0)
+#define TLBI_VAE2	tlbi_insn_el2(7, 1)
+#define TLBI_ALLE1	tlbi_insn_el2(7, 4)
+#define TLBI_VALE2	tlbi_insn_el2(7, 5)
+#define TLBI_VMALLS12E1	tlbi_insn_el2(7, 6)
+
 /* Common SCTLR_ELx flags. */
 #define SCTLR_ELx_EE    (1 << 25)
 #define SCTLR_ELx_I	(1 << 12)
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 481bea64..8d04926 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1624,6 +1624,32 @@ static bool access_id_aa64mmfr0_el1(struct kvm_vcpu *v,
 #define SYS_INSN_TO_DESC(insn, access_fn, forward_fn)	\
 	{ SYS_DESC((insn)), (access_fn), NULL, 0, 0, NULL, NULL, (forward_fn) }
 static struct sys_reg_desc sys_insn_descs[] = {
+	SYS_INSN_TO_DESC(AT_S1E1R, NULL, NULL),
+	SYS_INSN_TO_DESC(AT_S1E1W, NULL, NULL),
+	SYS_INSN_TO_DESC(AT_S1E0R, NULL, NULL),
+	SYS_INSN_TO_DESC(AT_S1E0W, NULL, NULL),
+	SYS_INSN_TO_DESC(AT_S1E1RP, NULL, NULL),
+	SYS_INSN_TO_DESC(AT_S1E1WP, NULL, NULL),
+	SYS_INSN_TO_DESC(AT_S1E2R, NULL, NULL),
+	SYS_INSN_TO_DESC(AT_S1E2W, NULL, NULL),
+	SYS_INSN_TO_DESC(AT_S12E1R, NULL, NULL),
+	SYS_INSN_TO_DESC(AT_S12E1W, NULL, NULL),
+	SYS_INSN_TO_DESC(AT_S12E0R, NULL, NULL),
+	SYS_INSN_TO_DESC(AT_S12E0W, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_IPAS2E1IS, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_IPAS2LE1IS, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_ALLE2IS, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_VAE2IS, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_ALLE1IS, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_VALE2IS, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_VMALLS12E1IS, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_IPAS2E1, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_IPAS2LE1, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_ALLE2, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_VAE2, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_ALLE1, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_VALE2, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_VMALLS12E1, NULL, NULL),
 };
 
 #define reg_to_match_value(x)						\
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 18/31] KVM: arm64: Enumerate AT and TLBI instructions to emulate
@ 2017-10-03  3:11   ` Jintack Lim
  0 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:11 UTC (permalink / raw)
  To: linux-arm-kernel

List all system instructions to emulate. This patch only introduces the
definitions, emulation handlers will be added in subsequent patches.

Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---
 arch/arm64/include/asm/sysreg.h | 38 ++++++++++++++++++++++++++++++++++++++
 arch/arm64/kvm/sys_regs.c       | 26 ++++++++++++++++++++++++++
 2 files changed, 64 insertions(+)

diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index a051d42..53df733 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -367,6 +367,44 @@
 
 #define SYS_SP_EL2			sys_reg(3, 6, 4, 1, 0)
 
+/* AT instructions */
+#define AT_Op0 1
+#define AT_CRn 7
+
+#define AT_S1E1R	sys_insn(AT_Op0, 0, AT_CRn, 8, 0)
+#define AT_S1E1W	sys_insn(AT_Op0, 0, AT_CRn, 8, 1)
+#define AT_S1E0R	sys_insn(AT_Op0, 0, AT_CRn, 8, 2)
+#define AT_S1E0W	sys_insn(AT_Op0, 0, AT_CRn, 8, 3)
+#define AT_S1E1RP	sys_insn(AT_Op0, 0, AT_CRn, 9, 0)
+#define AT_S1E1WP	sys_insn(AT_Op0, 0, AT_CRn, 9, 1)
+#define AT_S1E2R	sys_insn(AT_Op0, 4, AT_CRn, 8, 0)
+#define AT_S1E2W	sys_insn(AT_Op0, 4, AT_CRn, 8, 1)
+#define AT_S12E1R	sys_insn(AT_Op0, 4, AT_CRn, 8, 4)
+#define AT_S12E1W	sys_insn(AT_Op0, 4, AT_CRn, 8, 5)
+#define AT_S12E0R	sys_insn(AT_Op0, 4, AT_CRn, 8, 6)
+#define AT_S12E0W	sys_insn(AT_Op0, 4, AT_CRn, 8, 7)
+
+/* TLBI instructions */
+#define TLBI_Op0	1
+#define TLBI_Op1_EL2	4	/* Accessible from EL2 or higher */
+#define TLBI_CRn	8
+#define tlbi_insn_el2(CRm, Op2)	sys_insn(TLBI_Op0, TLBI_Op1_EL2, TLBI_CRn, (CRm), (Op2))
+
+#define TLBI_IPAS2E1IS	tlbi_insn_el2(0, 1)
+#define TLBI_IPAS2LE1IS	tlbi_insn_el2(0, 5)
+#define TLBI_ALLE2IS	tlbi_insn_el2(3, 0)
+#define TLBI_VAE2IS	tlbi_insn_el2(3, 1)
+#define TLBI_ALLE1IS	tlbi_insn_el2(3, 4)
+#define TLBI_VALE2IS	tlbi_insn_el2(3, 5)
+#define TLBI_VMALLS12E1IS	tlbi_insn_el2(3, 6)
+#define TLBI_IPAS2E1	tlbi_insn_el2(4, 1)
+#define TLBI_IPAS2LE1	tlbi_insn_el2(4, 5)
+#define TLBI_ALLE2	tlbi_insn_el2(7, 0)
+#define TLBI_VAE2	tlbi_insn_el2(7, 1)
+#define TLBI_ALLE1	tlbi_insn_el2(7, 4)
+#define TLBI_VALE2	tlbi_insn_el2(7, 5)
+#define TLBI_VMALLS12E1	tlbi_insn_el2(7, 6)
+
 /* Common SCTLR_ELx flags. */
 #define SCTLR_ELx_EE    (1 << 25)
 #define SCTLR_ELx_I	(1 << 12)
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 481bea64..8d04926 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1624,6 +1624,32 @@ static bool access_id_aa64mmfr0_el1(struct kvm_vcpu *v,
 #define SYS_INSN_TO_DESC(insn, access_fn, forward_fn)	\
 	{ SYS_DESC((insn)), (access_fn), NULL, 0, 0, NULL, NULL, (forward_fn) }
 static struct sys_reg_desc sys_insn_descs[] = {
+	SYS_INSN_TO_DESC(AT_S1E1R, NULL, NULL),
+	SYS_INSN_TO_DESC(AT_S1E1W, NULL, NULL),
+	SYS_INSN_TO_DESC(AT_S1E0R, NULL, NULL),
+	SYS_INSN_TO_DESC(AT_S1E0W, NULL, NULL),
+	SYS_INSN_TO_DESC(AT_S1E1RP, NULL, NULL),
+	SYS_INSN_TO_DESC(AT_S1E1WP, NULL, NULL),
+	SYS_INSN_TO_DESC(AT_S1E2R, NULL, NULL),
+	SYS_INSN_TO_DESC(AT_S1E2W, NULL, NULL),
+	SYS_INSN_TO_DESC(AT_S12E1R, NULL, NULL),
+	SYS_INSN_TO_DESC(AT_S12E1W, NULL, NULL),
+	SYS_INSN_TO_DESC(AT_S12E0R, NULL, NULL),
+	SYS_INSN_TO_DESC(AT_S12E0W, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_IPAS2E1IS, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_IPAS2LE1IS, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_ALLE2IS, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_VAE2IS, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_ALLE1IS, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_VALE2IS, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_VMALLS12E1IS, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_IPAS2E1, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_IPAS2LE1, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_ALLE2, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_VAE2, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_ALLE1, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_VALE2, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_VMALLS12E1, NULL, NULL),
 };
 
 #define reg_to_match_value(x)						\
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 19/31] KVM: arm64: Describe AT instruction emulation design
  2017-10-03  3:10 ` Jintack Lim
  (?)
@ 2017-10-03  3:11   ` Jintack Lim
  -1 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:11 UTC (permalink / raw)
  To: christoffer.dall, marc.zyngier, kvmarm
  Cc: jintack, pbonzini, rkrcmar, catalin.marinas, will.deacon, linux,
	mark.rutland, linux-arm-kernel, kvm, linux-kernel, Jintack Lim

This design overview will help to digest the subsequent patches that
implement AT instruction emulation.

Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---
 arch/arm64/kvm/sys_regs.c | 66 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 66 insertions(+)

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 8d04926..d8728cc 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1621,6 +1621,72 @@ static bool access_id_aa64mmfr0_el1(struct kvm_vcpu *v,
 	{ SYS_DESC(SYS_SP_EL2), NULL, reset_special, SP_EL2, 0},
 };
 
+/*
+ * AT instruction emulation
+ *
+ * We emulate AT instructions executed in the virtual EL2.
+ * Basic strategy for the stage-1 translation emulation is to load proper
+ * context, which depends on the trapped instruction and the virtual HCR_EL2,
+ * to the EL1 virtual memory control registers and execute S1E[01] instructions
+ * in EL2. See below for more detail.
+ *
+ * For the stage-2 translation, which is necessary for S12E[01] emulation,
+ * we walk the guest hypervisor's stage-2 page table in software.
+ *
+ * The stage-1 translation emulations can be divided into two groups depending
+ * on the translation regime.
+ *
+ * 1. EL2 AT instructions: S1E2x
+ * +-----------------------------------------------------------------------+
+ * |                             |         Setting for the emulation       |
+ * | Virtual HCR_EL2.E2H on trap |-----------------------------------------+
+ * |                             | Phys EL1 regs | Phys NV, NV1 | Phys TGE |
+ * |-----------------------------------------------------------------------|
+ * |             0               |     vEL2      |    (1, 1)    |    0     |
+ * |             1               |     vEL2      |    (0, 0)    |    0     |
+ * +-----------------------------------------------------------------------+
+ *
+ * We emulate the EL2 AT instructions by loading virtual EL2 context
+ * to the EL1 virtual memory control registers and executing corresponding
+ * EL1 AT instructions.
+ *
+ * We set physical NV and NV1 bits to use EL2 page table format for non-VHE
+ * guest hypervisor (i.e. HCR_EL2.E2H == 0). As a VHE guest hypervisor uses the
+ * EL1 page table format, we don't set those bits.
+ *
+ * We should clear physical TGE bit not to use the EL2 translation regime when
+ * the host uses the VHE feature.
+ *
+ *
+ * 2. EL0/EL1 AT instructions: S1E[01]x, S12E1x
+ * +----------------------------------------------------------------------+
+ * |   Virtual HCR_EL2 on trap  |        Setting for the emulation        |
+ * |----------------------------------------------------------------------+
+ * | (vE2H, vTGE) | (vNV, vNV1) | Phys EL1 regs | Phys NV, NV1 | Phys TGE |
+ * |----------------------------------------------------------------------|
+ * |    (0, 0)*   |   (0, 0)    |      vEL1     |    (0, 0)    |    0     |
+ * |    (0, 0)    |   (1, 1)    |      vEL1     |    (1, 1)    |    0     |
+ * |    (1, 1)    |   (0, 0)    |      vEL2     |    (0, 0)    |    0     |
+ * |    (1, 1)    |   (1, 1)    |      vEL2     |    (1, 1)    |    0     |
+ * +----------------------------------------------------------------------+
+ *
+ * *For (0, 0) in the 'Virtual HCR_EL2 on trap' column, it actually means
+ *  (1, 1). Keep them (0, 0) just for the readability.
+ *
+ * We set physical EL1 virtual memory control registers depending on
+ * (vE2H, vTGE) pair. When the pair is (0, 0) where AT instructions are
+ * supposed to use EL0/EL1 translation regime, we load the EL1 registers with
+ * the virtual EL1 registers (i.e. EL1 registers from the guest hypervisor's
+ * point of view). When the pair is (1, 1), however, AT instructions are defined
+ * to apply EL2 translation regime. To emulate this behavior, we load the EL1
+ * registers with the virtual EL2 context. (i.e the shadow registers)
+ *
+ * We respect the virtual NV and NV1 bit for the emulation. When those bits are
+ * set, it means that a guest hypervisor would like to use EL2 page table format
+ * for the EL1 translation regime. We emulate this by setting the physical
+ * NV and NV1 bits.
+ */
+
 #define SYS_INSN_TO_DESC(insn, access_fn, forward_fn)	\
 	{ SYS_DESC((insn)), (access_fn), NULL, 0, 0, NULL, NULL, (forward_fn) }
 static struct sys_reg_desc sys_insn_descs[] = {
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 19/31] KVM: arm64: Describe AT instruction emulation design
@ 2017-10-03  3:11   ` Jintack Lim
  0 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:11 UTC (permalink / raw)
  To: christoffer.dall, marc.zyngier, kvmarm
  Cc: kvm, catalin.marinas, will.deacon, linux, linux-kernel, pbonzini,
	linux-arm-kernel

This design overview will help to digest the subsequent patches that
implement AT instruction emulation.

Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---
 arch/arm64/kvm/sys_regs.c | 66 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 66 insertions(+)

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 8d04926..d8728cc 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1621,6 +1621,72 @@ static bool access_id_aa64mmfr0_el1(struct kvm_vcpu *v,
 	{ SYS_DESC(SYS_SP_EL2), NULL, reset_special, SP_EL2, 0},
 };
 
+/*
+ * AT instruction emulation
+ *
+ * We emulate AT instructions executed in the virtual EL2.
+ * Basic strategy for the stage-1 translation emulation is to load proper
+ * context, which depends on the trapped instruction and the virtual HCR_EL2,
+ * to the EL1 virtual memory control registers and execute S1E[01] instructions
+ * in EL2. See below for more detail.
+ *
+ * For the stage-2 translation, which is necessary for S12E[01] emulation,
+ * we walk the guest hypervisor's stage-2 page table in software.
+ *
+ * The stage-1 translation emulations can be divided into two groups depending
+ * on the translation regime.
+ *
+ * 1. EL2 AT instructions: S1E2x
+ * +-----------------------------------------------------------------------+
+ * |                             |         Setting for the emulation       |
+ * | Virtual HCR_EL2.E2H on trap |-----------------------------------------+
+ * |                             | Phys EL1 regs | Phys NV, NV1 | Phys TGE |
+ * |-----------------------------------------------------------------------|
+ * |             0               |     vEL2      |    (1, 1)    |    0     |
+ * |             1               |     vEL2      |    (0, 0)    |    0     |
+ * +-----------------------------------------------------------------------+
+ *
+ * We emulate the EL2 AT instructions by loading virtual EL2 context
+ * to the EL1 virtual memory control registers and executing corresponding
+ * EL1 AT instructions.
+ *
+ * We set physical NV and NV1 bits to use EL2 page table format for non-VHE
+ * guest hypervisor (i.e. HCR_EL2.E2H == 0). As a VHE guest hypervisor uses the
+ * EL1 page table format, we don't set those bits.
+ *
+ * We should clear physical TGE bit not to use the EL2 translation regime when
+ * the host uses the VHE feature.
+ *
+ *
+ * 2. EL0/EL1 AT instructions: S1E[01]x, S12E1x
+ * +----------------------------------------------------------------------+
+ * |   Virtual HCR_EL2 on trap  |        Setting for the emulation        |
+ * |----------------------------------------------------------------------+
+ * | (vE2H, vTGE) | (vNV, vNV1) | Phys EL1 regs | Phys NV, NV1 | Phys TGE |
+ * |----------------------------------------------------------------------|
+ * |    (0, 0)*   |   (0, 0)    |      vEL1     |    (0, 0)    |    0     |
+ * |    (0, 0)    |   (1, 1)    |      vEL1     |    (1, 1)    |    0     |
+ * |    (1, 1)    |   (0, 0)    |      vEL2     |    (0, 0)    |    0     |
+ * |    (1, 1)    |   (1, 1)    |      vEL2     |    (1, 1)    |    0     |
+ * +----------------------------------------------------------------------+
+ *
+ * *For (0, 0) in the 'Virtual HCR_EL2 on trap' column, it actually means
+ *  (1, 1). Keep them (0, 0) just for the readability.
+ *
+ * We set physical EL1 virtual memory control registers depending on
+ * (vE2H, vTGE) pair. When the pair is (0, 0) where AT instructions are
+ * supposed to use EL0/EL1 translation regime, we load the EL1 registers with
+ * the virtual EL1 registers (i.e. EL1 registers from the guest hypervisor's
+ * point of view). When the pair is (1, 1), however, AT instructions are defined
+ * to apply EL2 translation regime. To emulate this behavior, we load the EL1
+ * registers with the virtual EL2 context. (i.e the shadow registers)
+ *
+ * We respect the virtual NV and NV1 bit for the emulation. When those bits are
+ * set, it means that a guest hypervisor would like to use EL2 page table format
+ * for the EL1 translation regime. We emulate this by setting the physical
+ * NV and NV1 bits.
+ */
+
 #define SYS_INSN_TO_DESC(insn, access_fn, forward_fn)	\
 	{ SYS_DESC((insn)), (access_fn), NULL, 0, 0, NULL, NULL, (forward_fn) }
 static struct sys_reg_desc sys_insn_descs[] = {
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 19/31] KVM: arm64: Describe AT instruction emulation design
@ 2017-10-03  3:11   ` Jintack Lim
  0 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:11 UTC (permalink / raw)
  To: linux-arm-kernel

This design overview will help to digest the subsequent patches that
implement AT instruction emulation.

Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---
 arch/arm64/kvm/sys_regs.c | 66 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 66 insertions(+)

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 8d04926..d8728cc 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1621,6 +1621,72 @@ static bool access_id_aa64mmfr0_el1(struct kvm_vcpu *v,
 	{ SYS_DESC(SYS_SP_EL2), NULL, reset_special, SP_EL2, 0},
 };
 
+/*
+ * AT instruction emulation
+ *
+ * We emulate AT instructions executed in the virtual EL2.
+ * Basic strategy for the stage-1 translation emulation is to load proper
+ * context, which depends on the trapped instruction and the virtual HCR_EL2,
+ * to the EL1 virtual memory control registers and execute S1E[01] instructions
+ * in EL2. See below for more detail.
+ *
+ * For the stage-2 translation, which is necessary for S12E[01] emulation,
+ * we walk the guest hypervisor's stage-2 page table in software.
+ *
+ * The stage-1 translation emulations can be divided into two groups depending
+ * on the translation regime.
+ *
+ * 1. EL2 AT instructions: S1E2x
+ * +-----------------------------------------------------------------------+
+ * |                             |         Setting for the emulation       |
+ * | Virtual HCR_EL2.E2H on trap |-----------------------------------------+
+ * |                             | Phys EL1 regs | Phys NV, NV1 | Phys TGE |
+ * |-----------------------------------------------------------------------|
+ * |             0               |     vEL2      |    (1, 1)    |    0     |
+ * |             1               |     vEL2      |    (0, 0)    |    0     |
+ * +-----------------------------------------------------------------------+
+ *
+ * We emulate the EL2 AT instructions by loading virtual EL2 context
+ * to the EL1 virtual memory control registers and executing corresponding
+ * EL1 AT instructions.
+ *
+ * We set physical NV and NV1 bits to use EL2 page table format for non-VHE
+ * guest hypervisor (i.e. HCR_EL2.E2H == 0). As a VHE guest hypervisor uses the
+ * EL1 page table format, we don't set those bits.
+ *
+ * We should clear physical TGE bit not to use the EL2 translation regime when
+ * the host uses the VHE feature.
+ *
+ *
+ * 2. EL0/EL1 AT instructions: S1E[01]x, S12E1x
+ * +----------------------------------------------------------------------+
+ * |   Virtual HCR_EL2 on trap  |        Setting for the emulation        |
+ * |----------------------------------------------------------------------+
+ * | (vE2H, vTGE) | (vNV, vNV1) | Phys EL1 regs | Phys NV, NV1 | Phys TGE |
+ * |----------------------------------------------------------------------|
+ * |    (0, 0)*   |   (0, 0)    |      vEL1     |    (0, 0)    |    0     |
+ * |    (0, 0)    |   (1, 1)    |      vEL1     |    (1, 1)    |    0     |
+ * |    (1, 1)    |   (0, 0)    |      vEL2     |    (0, 0)    |    0     |
+ * |    (1, 1)    |   (1, 1)    |      vEL2     |    (1, 1)    |    0     |
+ * +----------------------------------------------------------------------+
+ *
+ * *For (0, 0) in the 'Virtual HCR_EL2 on trap' column, it actually means
+ *  (1, 1). Keep them (0, 0) just for the readability.
+ *
+ * We set physical EL1 virtual memory control registers depending on
+ * (vE2H, vTGE) pair. When the pair is (0, 0) where AT instructions are
+ * supposed to use EL0/EL1 translation regime, we load the EL1 registers with
+ * the virtual EL1 registers (i.e. EL1 registers from the guest hypervisor's
+ * point of view). When the pair is (1, 1), however, AT instructions are defined
+ * to apply EL2 translation regime. To emulate this behavior, we load the EL1
+ * registers with the virtual EL2 context. (i.e the shadow registers)
+ *
+ * We respect the virtual NV and NV1 bit for the emulation. When those bits are
+ * set, it means that a guest hypervisor would like to use EL2 page table format
+ * for the EL1 translation regime. We emulate this by setting the physical
+ * NV and NV1 bits.
+ */
+
 #define SYS_INSN_TO_DESC(insn, access_fn, forward_fn)	\
 	{ SYS_DESC((insn)), (access_fn), NULL, 0, 0, NULL, NULL, (forward_fn) }
 static struct sys_reg_desc sys_insn_descs[] = {
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 20/31] KVM: arm64: Implement AT instruction handling
  2017-10-03  3:10 ` Jintack Lim
  (?)
@ 2017-10-03  3:11   ` Jintack Lim
  -1 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:11 UTC (permalink / raw)
  To: christoffer.dall, marc.zyngier, kvmarm
  Cc: jintack, pbonzini, rkrcmar, catalin.marinas, will.deacon, linux,
	mark.rutland, linux-arm-kernel, kvm, linux-kernel, Jintack Lim

Implement AT instruction handling logic in EL2. This will be used to
emulate AT instructions executed in the virtual EL2.

AT instruction emulation works by loading the proper processor context,
which depends on the trapped instruction and the virtual HCR_EL2, to the
EL1 virtual memory control registers and executing AT instructions. Note
that ctxt->hw_sys_regs is expected to have the proper processor context
before calling the handling function(__kvm_at_insn) implemented in this
patch.

Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---
 arch/arm64/include/asm/kvm_asm.h |   3 +
 arch/arm64/kvm/hyp/Makefile      |   1 +
 arch/arm64/kvm/hyp/at.c          | 131 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 135 insertions(+)
 create mode 100644 arch/arm64/kvm/hyp/at.c

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index e492749..4bded9d 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -56,6 +56,9 @@
 extern void __kvm_tlb_flush_vmid(u64 vttbr);
 extern void __kvm_tlb_flush_local_vmid(u64 vttbr);
 
+extern void __kvm_at_insn(struct kvm_vcpu *vcpu, unsigned long vaddr,
+			  bool el2_regime, int sys_encoding);
+
 extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
 
 extern u64 __vgic_v3_get_ich_vtr_el2(void);
diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile
index 14c4e3b..1b03adb 100644
--- a/arch/arm64/kvm/hyp/Makefile
+++ b/arch/arm64/kvm/hyp/Makefile
@@ -16,6 +16,7 @@ obj-$(CONFIG_KVM_ARM_HOST) += entry.o
 obj-$(CONFIG_KVM_ARM_HOST) += switch.o
 obj-$(CONFIG_KVM_ARM_HOST) += fpsimd.o
 obj-$(CONFIG_KVM_ARM_HOST) += tlb.o
+obj-$(CONFIG_KVM_ARM_HOST) += at.o
 obj-$(CONFIG_KVM_ARM_HOST) += hyp-entry.o
 obj-$(CONFIG_KVM_ARM_HOST) += s2-setup.o
 
diff --git a/arch/arm64/kvm/hyp/at.c b/arch/arm64/kvm/hyp/at.c
new file mode 100644
index 0000000..d491d94
--- /dev/null
+++ b/arch/arm64/kvm/hyp/at.c
@@ -0,0 +1,131 @@
+/*
+ * Copyright (C) 2017 - Linaro Ltd
+ * Author: Jintack Lim <jintack.lim@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <asm/kvm_hyp.h>
+
+static void __hyp_text __save_vmregs(struct kvm_cpu_context *ctxt)
+{
+	u64 *sys_regs = kern_hyp_va(ctxt->hw_sys_regs);
+
+	sys_regs[TTBR0_EL1]     = read_sysreg_el1(ttbr0);
+	sys_regs[TTBR1_EL1]     = read_sysreg_el1(ttbr1);
+	sys_regs[TCR_EL1]       = read_sysreg_el1(tcr);
+	sys_regs[SCTLR_EL1]     = read_sysreg_el1(sctlr);
+}
+
+static void __hyp_text __restore_vmregs(struct kvm_cpu_context *ctxt)
+{
+	u64 *sys_regs = kern_hyp_va(ctxt->hw_sys_regs);
+
+	write_sysreg_el1(sys_regs[TTBR0_EL1],   ttbr0);
+	write_sysreg_el1(sys_regs[TTBR1_EL1],   ttbr1);
+	write_sysreg_el1(sys_regs[TCR_EL1],     tcr);
+	write_sysreg_el1(sys_regs[SCTLR_EL1],   sctlr);
+}
+
+void __hyp_text __at_switch_to_guest_nvhe(struct kvm_vcpu *vcpu,
+					  bool el2_regime)
+{
+	struct kvm_cpu_context *host_ctxt;
+	struct kvm_cpu_context *guest_ctxt;
+	u64 val;
+
+	host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
+	guest_ctxt = &vcpu->arch.ctxt;
+
+	__save_vmregs(host_ctxt);
+	__restore_vmregs(guest_ctxt);
+
+	val = read_sysreg(hcr_el2);
+	if (el2_regime)
+		val |= (HCR_NV | HCR_NV1);
+	write_sysreg(val, hcr_el2);
+}
+
+void __hyp_text __at_switch_to_guest_vhe(struct kvm_vcpu *vcpu, bool el2_regime)
+{
+	struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt;
+	u64 val;
+
+	__restore_vmregs(guest_ctxt);
+
+	val = read_sysreg(hcr_el2);
+	val &= ~HCR_TGE;
+	if (el2_regime)
+		val |= (HCR_NV | HCR_NV1);
+	write_sysreg(val, hcr_el2);
+}
+
+/*
+ * Switching to guest.
+ *
+ * 1. [nvhe] Save host vm regs
+ * 2. [both] Restore guest vm regs
+ * 3. [both] Set HCR_EL2.NV/NV1 bit if necessary
+ * 4. [vhe]  Clear HCR_EL2.TGE
+ */
+static hyp_alternate_select(__at_switch_to_guest,
+			    __at_switch_to_guest_nvhe, __at_switch_to_guest_vhe,
+			    ARM64_HAS_VIRT_HOST_EXTN);
+
+void __hyp_text __kvm_at_insn(struct kvm_vcpu *vcpu, unsigned long vaddr,
+			      bool el2_regime, int sys_encoding)
+{
+	struct kvm_cpu_context *ctxt = &vcpu->arch.ctxt;
+	struct kvm_cpu_context *host_ctxt;
+
+	host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
+
+	__at_switch_to_guest()(vcpu, el2_regime);
+
+	switch (sys_encoding) {
+	case AT_S1E1R:
+	case AT_S1E2R:
+		asm volatile("at s1e1r, %0" : : "r" (vaddr));
+		break;
+	case AT_S1E1W:
+	case AT_S1E2W:
+		asm volatile("at s1e1w, %0" : : "r" (vaddr));
+		break;
+	case AT_S1E0R:
+		asm volatile("at s1e0r, %0" : : "r" (vaddr));
+		break;
+	case AT_S1E0W:
+		asm volatile("at s1e0w, %0" : : "r" (vaddr));
+		break;
+	case AT_S1E1RP:
+		asm volatile("at s1e1rp, %0" : : "r" (vaddr));
+		break;
+	case AT_S1E1WP:
+		asm volatile("at s1e1wp, %0" : : "r" (vaddr));
+		break;
+	default:
+		break;
+	}
+
+	/* Save the translation result to the virtual machine's context */
+	ctxt->sys_regs[PAR_EL1] = read_sysreg(par_el1);
+
+	/* Switch to the host */
+	if (has_vhe()) {
+		write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
+	} else {
+		/* We don't save guest vm regs; we didn't make any changes */
+		__restore_vmregs(host_ctxt);
+		write_sysreg(HCR_RW, hcr_el2);
+	}
+}
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 20/31] KVM: arm64: Implement AT instruction handling
@ 2017-10-03  3:11   ` Jintack Lim
  0 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:11 UTC (permalink / raw)
  To: christoffer.dall, marc.zyngier, kvmarm
  Cc: kvm, catalin.marinas, will.deacon, linux, linux-kernel, pbonzini,
	linux-arm-kernel

Implement AT instruction handling logic in EL2. This will be used to
emulate AT instructions executed in the virtual EL2.

AT instruction emulation works by loading the proper processor context,
which depends on the trapped instruction and the virtual HCR_EL2, to the
EL1 virtual memory control registers and executing AT instructions. Note
that ctxt->hw_sys_regs is expected to have the proper processor context
before calling the handling function(__kvm_at_insn) implemented in this
patch.

Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---
 arch/arm64/include/asm/kvm_asm.h |   3 +
 arch/arm64/kvm/hyp/Makefile      |   1 +
 arch/arm64/kvm/hyp/at.c          | 131 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 135 insertions(+)
 create mode 100644 arch/arm64/kvm/hyp/at.c

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index e492749..4bded9d 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -56,6 +56,9 @@
 extern void __kvm_tlb_flush_vmid(u64 vttbr);
 extern void __kvm_tlb_flush_local_vmid(u64 vttbr);
 
+extern void __kvm_at_insn(struct kvm_vcpu *vcpu, unsigned long vaddr,
+			  bool el2_regime, int sys_encoding);
+
 extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
 
 extern u64 __vgic_v3_get_ich_vtr_el2(void);
diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile
index 14c4e3b..1b03adb 100644
--- a/arch/arm64/kvm/hyp/Makefile
+++ b/arch/arm64/kvm/hyp/Makefile
@@ -16,6 +16,7 @@ obj-$(CONFIG_KVM_ARM_HOST) += entry.o
 obj-$(CONFIG_KVM_ARM_HOST) += switch.o
 obj-$(CONFIG_KVM_ARM_HOST) += fpsimd.o
 obj-$(CONFIG_KVM_ARM_HOST) += tlb.o
+obj-$(CONFIG_KVM_ARM_HOST) += at.o
 obj-$(CONFIG_KVM_ARM_HOST) += hyp-entry.o
 obj-$(CONFIG_KVM_ARM_HOST) += s2-setup.o
 
diff --git a/arch/arm64/kvm/hyp/at.c b/arch/arm64/kvm/hyp/at.c
new file mode 100644
index 0000000..d491d94
--- /dev/null
+++ b/arch/arm64/kvm/hyp/at.c
@@ -0,0 +1,131 @@
+/*
+ * Copyright (C) 2017 - Linaro Ltd
+ * Author: Jintack Lim <jintack.lim@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <asm/kvm_hyp.h>
+
+static void __hyp_text __save_vmregs(struct kvm_cpu_context *ctxt)
+{
+	u64 *sys_regs = kern_hyp_va(ctxt->hw_sys_regs);
+
+	sys_regs[TTBR0_EL1]     = read_sysreg_el1(ttbr0);
+	sys_regs[TTBR1_EL1]     = read_sysreg_el1(ttbr1);
+	sys_regs[TCR_EL1]       = read_sysreg_el1(tcr);
+	sys_regs[SCTLR_EL1]     = read_sysreg_el1(sctlr);
+}
+
+static void __hyp_text __restore_vmregs(struct kvm_cpu_context *ctxt)
+{
+	u64 *sys_regs = kern_hyp_va(ctxt->hw_sys_regs);
+
+	write_sysreg_el1(sys_regs[TTBR0_EL1],   ttbr0);
+	write_sysreg_el1(sys_regs[TTBR1_EL1],   ttbr1);
+	write_sysreg_el1(sys_regs[TCR_EL1],     tcr);
+	write_sysreg_el1(sys_regs[SCTLR_EL1],   sctlr);
+}
+
+void __hyp_text __at_switch_to_guest_nvhe(struct kvm_vcpu *vcpu,
+					  bool el2_regime)
+{
+	struct kvm_cpu_context *host_ctxt;
+	struct kvm_cpu_context *guest_ctxt;
+	u64 val;
+
+	host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
+	guest_ctxt = &vcpu->arch.ctxt;
+
+	__save_vmregs(host_ctxt);
+	__restore_vmregs(guest_ctxt);
+
+	val = read_sysreg(hcr_el2);
+	if (el2_regime)
+		val |= (HCR_NV | HCR_NV1);
+	write_sysreg(val, hcr_el2);
+}
+
+void __hyp_text __at_switch_to_guest_vhe(struct kvm_vcpu *vcpu, bool el2_regime)
+{
+	struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt;
+	u64 val;
+
+	__restore_vmregs(guest_ctxt);
+
+	val = read_sysreg(hcr_el2);
+	val &= ~HCR_TGE;
+	if (el2_regime)
+		val |= (HCR_NV | HCR_NV1);
+	write_sysreg(val, hcr_el2);
+}
+
+/*
+ * Switching to guest.
+ *
+ * 1. [nvhe] Save host vm regs
+ * 2. [both] Restore guest vm regs
+ * 3. [both] Set HCR_EL2.NV/NV1 bit if necessary
+ * 4. [vhe]  Clear HCR_EL2.TGE
+ */
+static hyp_alternate_select(__at_switch_to_guest,
+			    __at_switch_to_guest_nvhe, __at_switch_to_guest_vhe,
+			    ARM64_HAS_VIRT_HOST_EXTN);
+
+void __hyp_text __kvm_at_insn(struct kvm_vcpu *vcpu, unsigned long vaddr,
+			      bool el2_regime, int sys_encoding)
+{
+	struct kvm_cpu_context *ctxt = &vcpu->arch.ctxt;
+	struct kvm_cpu_context *host_ctxt;
+
+	host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
+
+	__at_switch_to_guest()(vcpu, el2_regime);
+
+	switch (sys_encoding) {
+	case AT_S1E1R:
+	case AT_S1E2R:
+		asm volatile("at s1e1r, %0" : : "r" (vaddr));
+		break;
+	case AT_S1E1W:
+	case AT_S1E2W:
+		asm volatile("at s1e1w, %0" : : "r" (vaddr));
+		break;
+	case AT_S1E0R:
+		asm volatile("at s1e0r, %0" : : "r" (vaddr));
+		break;
+	case AT_S1E0W:
+		asm volatile("at s1e0w, %0" : : "r" (vaddr));
+		break;
+	case AT_S1E1RP:
+		asm volatile("at s1e1rp, %0" : : "r" (vaddr));
+		break;
+	case AT_S1E1WP:
+		asm volatile("at s1e1wp, %0" : : "r" (vaddr));
+		break;
+	default:
+		break;
+	}
+
+	/* Save the translation result to the virtual machine's context */
+	ctxt->sys_regs[PAR_EL1] = read_sysreg(par_el1);
+
+	/* Switch to the host */
+	if (has_vhe()) {
+		write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
+	} else {
+		/* We don't save guest vm regs; we didn't make any changes */
+		__restore_vmregs(host_ctxt);
+		write_sysreg(HCR_RW, hcr_el2);
+	}
+}
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 20/31] KVM: arm64: Implement AT instruction handling
@ 2017-10-03  3:11   ` Jintack Lim
  0 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:11 UTC (permalink / raw)
  To: linux-arm-kernel

Implement AT instruction handling logic in EL2. This will be used to
emulate AT instructions executed in the virtual EL2.

AT instruction emulation works by loading the proper processor context,
which depends on the trapped instruction and the virtual HCR_EL2, to the
EL1 virtual memory control registers and executing AT instructions. Note
that ctxt->hw_sys_regs is expected to have the proper processor context
before calling the handling function(__kvm_at_insn) implemented in this
patch.

Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---
 arch/arm64/include/asm/kvm_asm.h |   3 +
 arch/arm64/kvm/hyp/Makefile      |   1 +
 arch/arm64/kvm/hyp/at.c          | 131 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 135 insertions(+)
 create mode 100644 arch/arm64/kvm/hyp/at.c

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index e492749..4bded9d 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -56,6 +56,9 @@
 extern void __kvm_tlb_flush_vmid(u64 vttbr);
 extern void __kvm_tlb_flush_local_vmid(u64 vttbr);
 
+extern void __kvm_at_insn(struct kvm_vcpu *vcpu, unsigned long vaddr,
+			  bool el2_regime, int sys_encoding);
+
 extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
 
 extern u64 __vgic_v3_get_ich_vtr_el2(void);
diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile
index 14c4e3b..1b03adb 100644
--- a/arch/arm64/kvm/hyp/Makefile
+++ b/arch/arm64/kvm/hyp/Makefile
@@ -16,6 +16,7 @@ obj-$(CONFIG_KVM_ARM_HOST) += entry.o
 obj-$(CONFIG_KVM_ARM_HOST) += switch.o
 obj-$(CONFIG_KVM_ARM_HOST) += fpsimd.o
 obj-$(CONFIG_KVM_ARM_HOST) += tlb.o
+obj-$(CONFIG_KVM_ARM_HOST) += at.o
 obj-$(CONFIG_KVM_ARM_HOST) += hyp-entry.o
 obj-$(CONFIG_KVM_ARM_HOST) += s2-setup.o
 
diff --git a/arch/arm64/kvm/hyp/at.c b/arch/arm64/kvm/hyp/at.c
new file mode 100644
index 0000000..d491d94
--- /dev/null
+++ b/arch/arm64/kvm/hyp/at.c
@@ -0,0 +1,131 @@
+/*
+ * Copyright (C) 2017 - Linaro Ltd
+ * Author: Jintack Lim <jintack.lim@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <asm/kvm_hyp.h>
+
+static void __hyp_text __save_vmregs(struct kvm_cpu_context *ctxt)
+{
+	u64 *sys_regs = kern_hyp_va(ctxt->hw_sys_regs);
+
+	sys_regs[TTBR0_EL1]     = read_sysreg_el1(ttbr0);
+	sys_regs[TTBR1_EL1]     = read_sysreg_el1(ttbr1);
+	sys_regs[TCR_EL1]       = read_sysreg_el1(tcr);
+	sys_regs[SCTLR_EL1]     = read_sysreg_el1(sctlr);
+}
+
+static void __hyp_text __restore_vmregs(struct kvm_cpu_context *ctxt)
+{
+	u64 *sys_regs = kern_hyp_va(ctxt->hw_sys_regs);
+
+	write_sysreg_el1(sys_regs[TTBR0_EL1],   ttbr0);
+	write_sysreg_el1(sys_regs[TTBR1_EL1],   ttbr1);
+	write_sysreg_el1(sys_regs[TCR_EL1],     tcr);
+	write_sysreg_el1(sys_regs[SCTLR_EL1],   sctlr);
+}
+
+void __hyp_text __at_switch_to_guest_nvhe(struct kvm_vcpu *vcpu,
+					  bool el2_regime)
+{
+	struct kvm_cpu_context *host_ctxt;
+	struct kvm_cpu_context *guest_ctxt;
+	u64 val;
+
+	host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
+	guest_ctxt = &vcpu->arch.ctxt;
+
+	__save_vmregs(host_ctxt);
+	__restore_vmregs(guest_ctxt);
+
+	val = read_sysreg(hcr_el2);
+	if (el2_regime)
+		val |= (HCR_NV | HCR_NV1);
+	write_sysreg(val, hcr_el2);
+}
+
+void __hyp_text __at_switch_to_guest_vhe(struct kvm_vcpu *vcpu, bool el2_regime)
+{
+	struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt;
+	u64 val;
+
+	__restore_vmregs(guest_ctxt);
+
+	val = read_sysreg(hcr_el2);
+	val &= ~HCR_TGE;
+	if (el2_regime)
+		val |= (HCR_NV | HCR_NV1);
+	write_sysreg(val, hcr_el2);
+}
+
+/*
+ * Switching to guest.
+ *
+ * 1. [nvhe] Save host vm regs
+ * 2. [both] Restore guest vm regs
+ * 3. [both] Set HCR_EL2.NV/NV1 bit if necessary
+ * 4. [vhe]  Clear HCR_EL2.TGE
+ */
+static hyp_alternate_select(__at_switch_to_guest,
+			    __at_switch_to_guest_nvhe, __at_switch_to_guest_vhe,
+			    ARM64_HAS_VIRT_HOST_EXTN);
+
+void __hyp_text __kvm_at_insn(struct kvm_vcpu *vcpu, unsigned long vaddr,
+			      bool el2_regime, int sys_encoding)
+{
+	struct kvm_cpu_context *ctxt = &vcpu->arch.ctxt;
+	struct kvm_cpu_context *host_ctxt;
+
+	host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
+
+	__at_switch_to_guest()(vcpu, el2_regime);
+
+	switch (sys_encoding) {
+	case AT_S1E1R:
+	case AT_S1E2R:
+		asm volatile("at s1e1r, %0" : : "r" (vaddr));
+		break;
+	case AT_S1E1W:
+	case AT_S1E2W:
+		asm volatile("at s1e1w, %0" : : "r" (vaddr));
+		break;
+	case AT_S1E0R:
+		asm volatile("at s1e0r, %0" : : "r" (vaddr));
+		break;
+	case AT_S1E0W:
+		asm volatile("at s1e0w, %0" : : "r" (vaddr));
+		break;
+	case AT_S1E1RP:
+		asm volatile("at s1e1rp, %0" : : "r" (vaddr));
+		break;
+	case AT_S1E1WP:
+		asm volatile("at s1e1wp, %0" : : "r" (vaddr));
+		break;
+	default:
+		break;
+	}
+
+	/* Save the translation result to the virtual machine's context */
+	ctxt->sys_regs[PAR_EL1] = read_sysreg(par_el1);
+
+	/* Switch to the host */
+	if (has_vhe()) {
+		write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
+	} else {
+		/* We don't save guest vm regs; we didn't make any changes */
+		__restore_vmregs(host_ctxt);
+		write_sysreg(HCR_RW, hcr_el2);
+	}
+}
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 21/31] KVM: arm64: Emulate AT S1E[01] instructions
  2017-10-03  3:10 ` Jintack Lim
  (?)
@ 2017-10-03  3:11   ` Jintack Lim
  -1 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:11 UTC (permalink / raw)
  To: christoffer.dall, marc.zyngier, kvmarm
  Cc: jintack, pbonzini, rkrcmar, catalin.marinas, will.deacon, linux,
	mark.rutland, linux-arm-kernel, kvm, linux-kernel, Jintack Lim

Emulate AT S1E[01] instructions by issuing the same instructions in EL2. We
set the physical EL1 registers, NV and NV1 bits as described in the AT
instruction emulation overview.

Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---
 arch/arm64/include/asm/kvm_emulate.h | 11 +++++++++++
 arch/arm64/kvm/sys_regs.c            | 32 ++++++++++++++++++++++++++------
 2 files changed, 37 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 4c47bc7..a494db2 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -185,6 +185,17 @@ static inline bool vcpu_el2_tge_is_set(const struct kvm_vcpu *vcpu)
 	return (vcpu_sys_reg(vcpu, HCR_EL2) & HCR_TGE);
 }
 
+
+/*
+ * When the NV and NV1 bits are set, the EL2 page table format is used for the
+ * EL1 translation regime.
+ */
+static inline bool vcpu_el2_format_used(const struct kvm_vcpu *vcpu)
+{
+	return ((vcpu_sys_reg(vcpu, HCR_EL2) & HCR_NV) &&
+		(vcpu_sys_reg(vcpu, HCR_EL2) & HCR_NV1));
+}
+
 static inline bool is_hyp_ctxt(const struct kvm_vcpu *vcpu)
 {
 	/*
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index d8728cc..a82274f 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1621,6 +1621,26 @@ static bool access_id_aa64mmfr0_el1(struct kvm_vcpu *v,
 	{ SYS_DESC(SYS_SP_EL2), NULL, reset_special, SP_EL2, 0},
 };
 
+static bool handle_s1e01(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+			 const struct sys_reg_desc *r)
+{
+	struct kvm_cpu_context *ctxt = &vcpu->arch.ctxt;
+	bool el2_format;
+	int sys_encoding = sys_insn(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2);
+
+	/* See '2. EL0/EL1 AT instructions: S1E[01]x, S12E1x' table. */
+	if (vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu))
+		ctxt->hw_sys_regs = ctxt->shadow_sys_regs;
+	else
+		ctxt->hw_sys_regs = ctxt->sys_regs;
+
+	el2_format = vcpu_el2_format_used(vcpu);
+
+	kvm_call_hyp(__kvm_at_insn, vcpu, p->regval, el2_format, sys_encoding);
+
+	return true;
+}
+
 /*
  * AT instruction emulation
  *
@@ -1690,12 +1710,12 @@ static bool access_id_aa64mmfr0_el1(struct kvm_vcpu *v,
 #define SYS_INSN_TO_DESC(insn, access_fn, forward_fn)	\
 	{ SYS_DESC((insn)), (access_fn), NULL, 0, 0, NULL, NULL, (forward_fn) }
 static struct sys_reg_desc sys_insn_descs[] = {
-	SYS_INSN_TO_DESC(AT_S1E1R, NULL, NULL),
-	SYS_INSN_TO_DESC(AT_S1E1W, NULL, NULL),
-	SYS_INSN_TO_DESC(AT_S1E0R, NULL, NULL),
-	SYS_INSN_TO_DESC(AT_S1E0W, NULL, NULL),
-	SYS_INSN_TO_DESC(AT_S1E1RP, NULL, NULL),
-	SYS_INSN_TO_DESC(AT_S1E1WP, NULL, NULL),
+	SYS_INSN_TO_DESC(AT_S1E1R, handle_s1e01, NULL),
+	SYS_INSN_TO_DESC(AT_S1E1W, handle_s1e01, NULL),
+	SYS_INSN_TO_DESC(AT_S1E0R, handle_s1e01, NULL),
+	SYS_INSN_TO_DESC(AT_S1E0W, handle_s1e01, NULL),
+	SYS_INSN_TO_DESC(AT_S1E1RP, handle_s1e01, NULL),
+	SYS_INSN_TO_DESC(AT_S1E1WP, handle_s1e01, NULL),
 	SYS_INSN_TO_DESC(AT_S1E2R, NULL, NULL),
 	SYS_INSN_TO_DESC(AT_S1E2W, NULL, NULL),
 	SYS_INSN_TO_DESC(AT_S12E1R, NULL, NULL),
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 21/31] KVM: arm64: Emulate AT S1E[01] instructions
@ 2017-10-03  3:11   ` Jintack Lim
  0 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:11 UTC (permalink / raw)
  To: christoffer.dall, marc.zyngier, kvmarm
  Cc: kvm, catalin.marinas, will.deacon, linux, linux-kernel, pbonzini,
	linux-arm-kernel

Emulate AT S1E[01] instructions by issuing the same instructions in EL2. We
set the physical EL1 registers, NV and NV1 bits as described in the AT
instruction emulation overview.

Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---
 arch/arm64/include/asm/kvm_emulate.h | 11 +++++++++++
 arch/arm64/kvm/sys_regs.c            | 32 ++++++++++++++++++++++++++------
 2 files changed, 37 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 4c47bc7..a494db2 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -185,6 +185,17 @@ static inline bool vcpu_el2_tge_is_set(const struct kvm_vcpu *vcpu)
 	return (vcpu_sys_reg(vcpu, HCR_EL2) & HCR_TGE);
 }
 
+
+/*
+ * When the NV and NV1 bits are set, the EL2 page table format is used for the
+ * EL1 translation regime.
+ */
+static inline bool vcpu_el2_format_used(const struct kvm_vcpu *vcpu)
+{
+	return ((vcpu_sys_reg(vcpu, HCR_EL2) & HCR_NV) &&
+		(vcpu_sys_reg(vcpu, HCR_EL2) & HCR_NV1));
+}
+
 static inline bool is_hyp_ctxt(const struct kvm_vcpu *vcpu)
 {
 	/*
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index d8728cc..a82274f 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1621,6 +1621,26 @@ static bool access_id_aa64mmfr0_el1(struct kvm_vcpu *v,
 	{ SYS_DESC(SYS_SP_EL2), NULL, reset_special, SP_EL2, 0},
 };
 
+static bool handle_s1e01(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+			 const struct sys_reg_desc *r)
+{
+	struct kvm_cpu_context *ctxt = &vcpu->arch.ctxt;
+	bool el2_format;
+	int sys_encoding = sys_insn(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2);
+
+	/* See '2. EL0/EL1 AT instructions: S1E[01]x, S12E1x' table. */
+	if (vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu))
+		ctxt->hw_sys_regs = ctxt->shadow_sys_regs;
+	else
+		ctxt->hw_sys_regs = ctxt->sys_regs;
+
+	el2_format = vcpu_el2_format_used(vcpu);
+
+	kvm_call_hyp(__kvm_at_insn, vcpu, p->regval, el2_format, sys_encoding);
+
+	return true;
+}
+
 /*
  * AT instruction emulation
  *
@@ -1690,12 +1710,12 @@ static bool access_id_aa64mmfr0_el1(struct kvm_vcpu *v,
 #define SYS_INSN_TO_DESC(insn, access_fn, forward_fn)	\
 	{ SYS_DESC((insn)), (access_fn), NULL, 0, 0, NULL, NULL, (forward_fn) }
 static struct sys_reg_desc sys_insn_descs[] = {
-	SYS_INSN_TO_DESC(AT_S1E1R, NULL, NULL),
-	SYS_INSN_TO_DESC(AT_S1E1W, NULL, NULL),
-	SYS_INSN_TO_DESC(AT_S1E0R, NULL, NULL),
-	SYS_INSN_TO_DESC(AT_S1E0W, NULL, NULL),
-	SYS_INSN_TO_DESC(AT_S1E1RP, NULL, NULL),
-	SYS_INSN_TO_DESC(AT_S1E1WP, NULL, NULL),
+	SYS_INSN_TO_DESC(AT_S1E1R, handle_s1e01, NULL),
+	SYS_INSN_TO_DESC(AT_S1E1W, handle_s1e01, NULL),
+	SYS_INSN_TO_DESC(AT_S1E0R, handle_s1e01, NULL),
+	SYS_INSN_TO_DESC(AT_S1E0W, handle_s1e01, NULL),
+	SYS_INSN_TO_DESC(AT_S1E1RP, handle_s1e01, NULL),
+	SYS_INSN_TO_DESC(AT_S1E1WP, handle_s1e01, NULL),
 	SYS_INSN_TO_DESC(AT_S1E2R, NULL, NULL),
 	SYS_INSN_TO_DESC(AT_S1E2W, NULL, NULL),
 	SYS_INSN_TO_DESC(AT_S12E1R, NULL, NULL),
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 21/31] KVM: arm64: Emulate AT S1E[01] instructions
@ 2017-10-03  3:11   ` Jintack Lim
  0 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:11 UTC (permalink / raw)
  To: linux-arm-kernel

Emulate AT S1E[01] instructions by issuing the same instructions in EL2. We
set the physical EL1 registers, NV and NV1 bits as described in the AT
instruction emulation overview.

Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---
 arch/arm64/include/asm/kvm_emulate.h | 11 +++++++++++
 arch/arm64/kvm/sys_regs.c            | 32 ++++++++++++++++++++++++++------
 2 files changed, 37 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 4c47bc7..a494db2 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -185,6 +185,17 @@ static inline bool vcpu_el2_tge_is_set(const struct kvm_vcpu *vcpu)
 	return (vcpu_sys_reg(vcpu, HCR_EL2) & HCR_TGE);
 }
 
+
+/*
+ * When the NV and NV1 bits are set, the EL2 page table format is used for the
+ * EL1 translation regime.
+ */
+static inline bool vcpu_el2_format_used(const struct kvm_vcpu *vcpu)
+{
+	return ((vcpu_sys_reg(vcpu, HCR_EL2) & HCR_NV) &&
+		(vcpu_sys_reg(vcpu, HCR_EL2) & HCR_NV1));
+}
+
 static inline bool is_hyp_ctxt(const struct kvm_vcpu *vcpu)
 {
 	/*
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index d8728cc..a82274f 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1621,6 +1621,26 @@ static bool access_id_aa64mmfr0_el1(struct kvm_vcpu *v,
 	{ SYS_DESC(SYS_SP_EL2), NULL, reset_special, SP_EL2, 0},
 };
 
+static bool handle_s1e01(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+			 const struct sys_reg_desc *r)
+{
+	struct kvm_cpu_context *ctxt = &vcpu->arch.ctxt;
+	bool el2_format;
+	int sys_encoding = sys_insn(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2);
+
+	/* See '2. EL0/EL1 AT instructions: S1E[01]x, S12E1x' table. */
+	if (vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu))
+		ctxt->hw_sys_regs = ctxt->shadow_sys_regs;
+	else
+		ctxt->hw_sys_regs = ctxt->sys_regs;
+
+	el2_format = vcpu_el2_format_used(vcpu);
+
+	kvm_call_hyp(__kvm_at_insn, vcpu, p->regval, el2_format, sys_encoding);
+
+	return true;
+}
+
 /*
  * AT instruction emulation
  *
@@ -1690,12 +1710,12 @@ static bool access_id_aa64mmfr0_el1(struct kvm_vcpu *v,
 #define SYS_INSN_TO_DESC(insn, access_fn, forward_fn)	\
 	{ SYS_DESC((insn)), (access_fn), NULL, 0, 0, NULL, NULL, (forward_fn) }
 static struct sys_reg_desc sys_insn_descs[] = {
-	SYS_INSN_TO_DESC(AT_S1E1R, NULL, NULL),
-	SYS_INSN_TO_DESC(AT_S1E1W, NULL, NULL),
-	SYS_INSN_TO_DESC(AT_S1E0R, NULL, NULL),
-	SYS_INSN_TO_DESC(AT_S1E0W, NULL, NULL),
-	SYS_INSN_TO_DESC(AT_S1E1RP, NULL, NULL),
-	SYS_INSN_TO_DESC(AT_S1E1WP, NULL, NULL),
+	SYS_INSN_TO_DESC(AT_S1E1R, handle_s1e01, NULL),
+	SYS_INSN_TO_DESC(AT_S1E1W, handle_s1e01, NULL),
+	SYS_INSN_TO_DESC(AT_S1E0R, handle_s1e01, NULL),
+	SYS_INSN_TO_DESC(AT_S1E0W, handle_s1e01, NULL),
+	SYS_INSN_TO_DESC(AT_S1E1RP, handle_s1e01, NULL),
+	SYS_INSN_TO_DESC(AT_S1E1WP, handle_s1e01, NULL),
 	SYS_INSN_TO_DESC(AT_S1E2R, NULL, NULL),
 	SYS_INSN_TO_DESC(AT_S1E2W, NULL, NULL),
 	SYS_INSN_TO_DESC(AT_S12E1R, NULL, NULL),
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 22/31] KVM: arm64: Emulate AT S1E2 instructions
  2017-10-03  3:10 ` Jintack Lim
  (?)
@ 2017-10-03  3:11   ` Jintack Lim
  -1 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:11 UTC (permalink / raw)
  To: christoffer.dall, marc.zyngier, kvmarm
  Cc: jintack, pbonzini, rkrcmar, catalin.marinas, will.deacon, linux,
	mark.rutland, linux-arm-kernel, kvm, linux-kernel, Jintack Lim

Emulate AT S1E2 instructions by issuing the corresponding S1E1
instructions in EL2. We set the physical EL1 registers and the HCR_EL2
register as described in the AT instruction emulation overview.

Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---
 arch/arm64/kvm/sys_regs.c | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index a82274f..cb46db5 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1641,6 +1641,21 @@ static bool handle_s1e01(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 	return true;
 }
 
+static bool handle_s1e2(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+			const struct sys_reg_desc *r)
+{
+	struct kvm_cpu_context *ctxt = &vcpu->arch.ctxt;
+	bool el2_format;
+	int sys_encoding = sys_insn(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2);
+
+	/* See the '1. EL2 AT instructions: S1E2x' table */
+	ctxt->hw_sys_regs = ctxt->shadow_sys_regs;
+	el2_format = !vcpu_el2_e2h_is_set(vcpu);
+
+	kvm_call_hyp(__kvm_at_insn, vcpu, p->regval, el2_format, sys_encoding);
+	return true;
+}
+
 /*
  * AT instruction emulation
  *
@@ -1716,8 +1731,8 @@ static bool handle_s1e01(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 	SYS_INSN_TO_DESC(AT_S1E0W, handle_s1e01, NULL),
 	SYS_INSN_TO_DESC(AT_S1E1RP, handle_s1e01, NULL),
 	SYS_INSN_TO_DESC(AT_S1E1WP, handle_s1e01, NULL),
-	SYS_INSN_TO_DESC(AT_S1E2R, NULL, NULL),
-	SYS_INSN_TO_DESC(AT_S1E2W, NULL, NULL),
+	SYS_INSN_TO_DESC(AT_S1E2R, handle_s1e2, NULL),
+	SYS_INSN_TO_DESC(AT_S1E2W, handle_s1e2, NULL),
 	SYS_INSN_TO_DESC(AT_S12E1R, NULL, NULL),
 	SYS_INSN_TO_DESC(AT_S12E1W, NULL, NULL),
 	SYS_INSN_TO_DESC(AT_S12E0R, NULL, NULL),
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 22/31] KVM: arm64: Emulate AT S1E2 instructions
@ 2017-10-03  3:11   ` Jintack Lim
  0 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:11 UTC (permalink / raw)
  To: christoffer.dall, marc.zyngier, kvmarm
  Cc: kvm, catalin.marinas, will.deacon, linux, linux-kernel, pbonzini,
	linux-arm-kernel

Emulate AT S1E2 instructions by issuing the corresponding S1E1
instructions in EL2. We set the physical EL1 registers and the HCR_EL2
register as described in the AT instruction emulation overview.

Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---
 arch/arm64/kvm/sys_regs.c | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index a82274f..cb46db5 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1641,6 +1641,21 @@ static bool handle_s1e01(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 	return true;
 }
 
+static bool handle_s1e2(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+			const struct sys_reg_desc *r)
+{
+	struct kvm_cpu_context *ctxt = &vcpu->arch.ctxt;
+	bool el2_format;
+	int sys_encoding = sys_insn(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2);
+
+	/* See the '1. EL2 AT instructions: S1E2x' table */
+	ctxt->hw_sys_regs = ctxt->shadow_sys_regs;
+	el2_format = !vcpu_el2_e2h_is_set(vcpu);
+
+	kvm_call_hyp(__kvm_at_insn, vcpu, p->regval, el2_format, sys_encoding);
+	return true;
+}
+
 /*
  * AT instruction emulation
  *
@@ -1716,8 +1731,8 @@ static bool handle_s1e01(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 	SYS_INSN_TO_DESC(AT_S1E0W, handle_s1e01, NULL),
 	SYS_INSN_TO_DESC(AT_S1E1RP, handle_s1e01, NULL),
 	SYS_INSN_TO_DESC(AT_S1E1WP, handle_s1e01, NULL),
-	SYS_INSN_TO_DESC(AT_S1E2R, NULL, NULL),
-	SYS_INSN_TO_DESC(AT_S1E2W, NULL, NULL),
+	SYS_INSN_TO_DESC(AT_S1E2R, handle_s1e2, NULL),
+	SYS_INSN_TO_DESC(AT_S1E2W, handle_s1e2, NULL),
 	SYS_INSN_TO_DESC(AT_S12E1R, NULL, NULL),
 	SYS_INSN_TO_DESC(AT_S12E1W, NULL, NULL),
 	SYS_INSN_TO_DESC(AT_S12E0R, NULL, NULL),
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 22/31] KVM: arm64: Emulate AT S1E2 instructions
@ 2017-10-03  3:11   ` Jintack Lim
  0 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:11 UTC (permalink / raw)
  To: linux-arm-kernel

Emulate AT S1E2 instructions by issuing the corresponding S1E1
instructions in EL2. We set the physical EL1 registers and the HCR_EL2
register as described in the AT instruction emulation overview.

Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---
 arch/arm64/kvm/sys_regs.c | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index a82274f..cb46db5 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1641,6 +1641,21 @@ static bool handle_s1e01(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 	return true;
 }
 
+static bool handle_s1e2(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+			const struct sys_reg_desc *r)
+{
+	struct kvm_cpu_context *ctxt = &vcpu->arch.ctxt;
+	bool el2_format;
+	int sys_encoding = sys_insn(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2);
+
+	/* See the '1. EL2 AT instructions: S1E2x' table */
+	ctxt->hw_sys_regs = ctxt->shadow_sys_regs;
+	el2_format = !vcpu_el2_e2h_is_set(vcpu);
+
+	kvm_call_hyp(__kvm_at_insn, vcpu, p->regval, el2_format, sys_encoding);
+	return true;
+}
+
 /*
  * AT instruction emulation
  *
@@ -1716,8 +1731,8 @@ static bool handle_s1e01(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 	SYS_INSN_TO_DESC(AT_S1E0W, handle_s1e01, NULL),
 	SYS_INSN_TO_DESC(AT_S1E1RP, handle_s1e01, NULL),
 	SYS_INSN_TO_DESC(AT_S1E1WP, handle_s1e01, NULL),
-	SYS_INSN_TO_DESC(AT_S1E2R, NULL, NULL),
-	SYS_INSN_TO_DESC(AT_S1E2W, NULL, NULL),
+	SYS_INSN_TO_DESC(AT_S1E2R, handle_s1e2, NULL),
+	SYS_INSN_TO_DESC(AT_S1E2W, handle_s1e2, NULL),
 	SYS_INSN_TO_DESC(AT_S12E1R, NULL, NULL),
 	SYS_INSN_TO_DESC(AT_S12E1W, NULL, NULL),
 	SYS_INSN_TO_DESC(AT_S12E0R, NULL, NULL),
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 23/31] KVM: arm64: Emulate AT S12E[01] instructions
  2017-10-03  3:10 ` Jintack Lim
  (?)
@ 2017-10-03  3:11   ` Jintack Lim
  -1 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:11 UTC (permalink / raw)
  To: christoffer.dall, marc.zyngier, kvmarm
  Cc: jintack, pbonzini, rkrcmar, catalin.marinas, will.deacon, linux,
	mark.rutland, linux-arm-kernel, kvm, linux-kernel, Jintack Lim

Emulating AT A12E[01] instructions involves two steps. First, do the
stage-1 translation by reusing the existing AT emulation functions. Then
do the stage-2 translation by walking the guest hypervisor's stage-2
page table in software. Record the translation result to PAR_EL1.

Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---
 arch/arm64/include/asm/kvm_arm.h |  1 +
 arch/arm64/kvm/sys_regs.c        | 99 ++++++++++++++++++++++++++++++++++++++--
 2 files changed, 96 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 3993703..e160895 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -111,6 +111,7 @@
 #define VTCR_EL2_TG0_16K	TCR_TG0_16K
 #define VTCR_EL2_TG0_64K	TCR_TG0_64K
 #define VTCR_EL2_SH0_MASK	TCR_SH0_MASK
+#define VTCR_EL2_SH0_SHIFT	TCR_SH0_SHIFT
 #define VTCR_EL2_SH0_INNER	TCR_SH0_INNER
 #define VTCR_EL2_ORGN0_MASK	TCR_ORGN0_MASK
 #define VTCR_EL2_ORGN0_WBWA	TCR_ORGN0_WBWA
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index cb46db5..7950ee0 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1656,6 +1656,97 @@ static bool handle_s1e2(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 	return true;
 }
 
+static u64 setup_par_aborted(u32 esr)
+{
+	u64 par = 0;
+
+	/* S [9]: fault in the stage 2 translation */
+	par |= (1 << 9);
+	/* FST [6:1]: Fault status code  */
+	par |= (esr << 1);
+	/* F [0]: translation is aborted */
+	par |= 1;
+
+	return par;
+}
+
+static u64 setup_par_completed(struct kvm_vcpu *vcpu, struct kvm_s2_trans *out)
+{
+	u64 par, vtcr_sh0;
+
+	/* F [0]: Translation is completed successfully */
+	par = 0;
+	/* ATTR [63:56] */
+	par |= out->upper_attr;
+	/* PA [47:12] */
+	par |= out->output & GENMASK_ULL(11, 0);
+	/* RES1 [11] */
+	par |= (1UL << 11);
+	/* SH [8:7]: Shareability attribute */
+	vtcr_sh0 = vcpu_sys_reg(vcpu, VTCR_EL2) & VTCR_EL2_SH0_MASK;
+	par |= (vtcr_sh0 >> VTCR_EL2_SH0_SHIFT) << 7;
+
+	return par;
+}
+
+static bool handle_s12(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+		       const struct sys_reg_desc *r, bool write)
+{
+	u64 par, va;
+	u32 esr;
+	phys_addr_t ipa;
+	struct kvm_s2_trans out;
+	int ret;
+
+	/* Do the stage-1 translation */
+	handle_s1e01(vcpu, p, r);
+	par = vcpu_sys_reg(vcpu, PAR_EL1);
+	if (par & 1) {
+		/* The stage-1 translation aborted */
+		return true;
+	}
+
+	/* Do the stage-2 translation */
+	va = p->regval;
+	ipa = (par & GENMASK_ULL(47, 12)) | (va & GENMASK_ULL(11, 0));
+	out.esr = 0;
+	ret = kvm_walk_nested_s2(vcpu, ipa, &out);
+	if (ret < 0)
+		return false;
+
+	/* Check if the stage-2 PTW is aborted */
+	if (out.esr) {
+		esr = out.esr;
+		goto s2_trans_abort;
+	}
+
+	/* Check the access permission */
+	if ((!write && !out.readable) || (write && !out.writable)) {
+		esr = ESR_ELx_FSC_PERM;
+		esr |= out.level & 0x3;
+		goto s2_trans_abort;
+	}
+
+	vcpu_sys_reg(vcpu, PAR_EL1) = setup_par_completed(vcpu, &out);
+	return true;
+
+s2_trans_abort:
+	vcpu_sys_reg(vcpu, PAR_EL1) = setup_par_aborted(esr);
+	return true;
+}
+
+static bool handle_s12r(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+			const struct sys_reg_desc *r)
+{
+	return handle_s12(vcpu, p, r, false);
+}
+
+static bool handle_s12w(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+			const struct sys_reg_desc *r)
+{
+	return handle_s12(vcpu, p, r, true);
+}
+
 /*
  * AT instruction emulation
  *
@@ -1733,10 +1824,10 @@ static bool handle_s1e2(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 	SYS_INSN_TO_DESC(AT_S1E1WP, handle_s1e01, NULL),
 	SYS_INSN_TO_DESC(AT_S1E2R, handle_s1e2, NULL),
 	SYS_INSN_TO_DESC(AT_S1E2W, handle_s1e2, NULL),
-	SYS_INSN_TO_DESC(AT_S12E1R, NULL, NULL),
-	SYS_INSN_TO_DESC(AT_S12E1W, NULL, NULL),
-	SYS_INSN_TO_DESC(AT_S12E0R, NULL, NULL),
-	SYS_INSN_TO_DESC(AT_S12E0W, NULL, NULL),
+	SYS_INSN_TO_DESC(AT_S12E1R, handle_s12r, NULL),
+	SYS_INSN_TO_DESC(AT_S12E1W, handle_s12w, NULL),
+	SYS_INSN_TO_DESC(AT_S12E0R, handle_s12r, NULL),
+	SYS_INSN_TO_DESC(AT_S12E0W, handle_s12w, NULL),
 	SYS_INSN_TO_DESC(TLBI_IPAS2E1IS, NULL, NULL),
 	SYS_INSN_TO_DESC(TLBI_IPAS2LE1IS, NULL, NULL),
 	SYS_INSN_TO_DESC(TLBI_ALLE2IS, NULL, NULL),
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 23/31] KVM: arm64: Emulate AT S12E[01] instructions
@ 2017-10-03  3:11   ` Jintack Lim
  0 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:11 UTC (permalink / raw)
  To: christoffer.dall, marc.zyngier, kvmarm
  Cc: kvm, catalin.marinas, will.deacon, linux, linux-kernel, pbonzini,
	linux-arm-kernel

Emulating AT A12E[01] instructions involves two steps. First, do the
stage-1 translation by reusing the existing AT emulation functions. Then
do the stage-2 translation by walking the guest hypervisor's stage-2
page table in software. Record the translation result to PAR_EL1.

Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---
 arch/arm64/include/asm/kvm_arm.h |  1 +
 arch/arm64/kvm/sys_regs.c        | 99 ++++++++++++++++++++++++++++++++++++++--
 2 files changed, 96 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 3993703..e160895 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -111,6 +111,7 @@
 #define VTCR_EL2_TG0_16K	TCR_TG0_16K
 #define VTCR_EL2_TG0_64K	TCR_TG0_64K
 #define VTCR_EL2_SH0_MASK	TCR_SH0_MASK
+#define VTCR_EL2_SH0_SHIFT	TCR_SH0_SHIFT
 #define VTCR_EL2_SH0_INNER	TCR_SH0_INNER
 #define VTCR_EL2_ORGN0_MASK	TCR_ORGN0_MASK
 #define VTCR_EL2_ORGN0_WBWA	TCR_ORGN0_WBWA
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index cb46db5..7950ee0 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1656,6 +1656,97 @@ static bool handle_s1e2(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 	return true;
 }
 
+static u64 setup_par_aborted(u32 esr)
+{
+	u64 par = 0;
+
+	/* S [9]: fault in the stage 2 translation */
+	par |= (1 << 9);
+	/* FST [6:1]: Fault status code  */
+	par |= (esr << 1);
+	/* F [0]: translation is aborted */
+	par |= 1;
+
+	return par;
+}
+
+static u64 setup_par_completed(struct kvm_vcpu *vcpu, struct kvm_s2_trans *out)
+{
+	u64 par, vtcr_sh0;
+
+	/* F [0]: Translation is completed successfully */
+	par = 0;
+	/* ATTR [63:56] */
+	par |= out->upper_attr;
+	/* PA [47:12] */
+	par |= out->output & GENMASK_ULL(11, 0);
+	/* RES1 [11] */
+	par |= (1UL << 11);
+	/* SH [8:7]: Shareability attribute */
+	vtcr_sh0 = vcpu_sys_reg(vcpu, VTCR_EL2) & VTCR_EL2_SH0_MASK;
+	par |= (vtcr_sh0 >> VTCR_EL2_SH0_SHIFT) << 7;
+
+	return par;
+}
+
+static bool handle_s12(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+		       const struct sys_reg_desc *r, bool write)
+{
+	u64 par, va;
+	u32 esr;
+	phys_addr_t ipa;
+	struct kvm_s2_trans out;
+	int ret;
+
+	/* Do the stage-1 translation */
+	handle_s1e01(vcpu, p, r);
+	par = vcpu_sys_reg(vcpu, PAR_EL1);
+	if (par & 1) {
+		/* The stage-1 translation aborted */
+		return true;
+	}
+
+	/* Do the stage-2 translation */
+	va = p->regval;
+	ipa = (par & GENMASK_ULL(47, 12)) | (va & GENMASK_ULL(11, 0));
+	out.esr = 0;
+	ret = kvm_walk_nested_s2(vcpu, ipa, &out);
+	if (ret < 0)
+		return false;
+
+	/* Check if the stage-2 PTW is aborted */
+	if (out.esr) {
+		esr = out.esr;
+		goto s2_trans_abort;
+	}
+
+	/* Check the access permission */
+	if ((!write && !out.readable) || (write && !out.writable)) {
+		esr = ESR_ELx_FSC_PERM;
+		esr |= out.level & 0x3;
+		goto s2_trans_abort;
+	}
+
+	vcpu_sys_reg(vcpu, PAR_EL1) = setup_par_completed(vcpu, &out);
+	return true;
+
+s2_trans_abort:
+	vcpu_sys_reg(vcpu, PAR_EL1) = setup_par_aborted(esr);
+	return true;
+}
+
+static bool handle_s12r(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+			const struct sys_reg_desc *r)
+{
+	return handle_s12(vcpu, p, r, false);
+}
+
+static bool handle_s12w(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+			const struct sys_reg_desc *r)
+{
+	return handle_s12(vcpu, p, r, true);
+}
+
 /*
  * AT instruction emulation
  *
@@ -1733,10 +1824,10 @@ static bool handle_s1e2(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 	SYS_INSN_TO_DESC(AT_S1E1WP, handle_s1e01, NULL),
 	SYS_INSN_TO_DESC(AT_S1E2R, handle_s1e2, NULL),
 	SYS_INSN_TO_DESC(AT_S1E2W, handle_s1e2, NULL),
-	SYS_INSN_TO_DESC(AT_S12E1R, NULL, NULL),
-	SYS_INSN_TO_DESC(AT_S12E1W, NULL, NULL),
-	SYS_INSN_TO_DESC(AT_S12E0R, NULL, NULL),
-	SYS_INSN_TO_DESC(AT_S12E0W, NULL, NULL),
+	SYS_INSN_TO_DESC(AT_S12E1R, handle_s12r, NULL),
+	SYS_INSN_TO_DESC(AT_S12E1W, handle_s12w, NULL),
+	SYS_INSN_TO_DESC(AT_S12E0R, handle_s12r, NULL),
+	SYS_INSN_TO_DESC(AT_S12E0W, handle_s12w, NULL),
 	SYS_INSN_TO_DESC(TLBI_IPAS2E1IS, NULL, NULL),
 	SYS_INSN_TO_DESC(TLBI_IPAS2LE1IS, NULL, NULL),
 	SYS_INSN_TO_DESC(TLBI_ALLE2IS, NULL, NULL),
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 23/31] KVM: arm64: Emulate AT S12E[01] instructions
@ 2017-10-03  3:11   ` Jintack Lim
  0 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:11 UTC (permalink / raw)
  To: linux-arm-kernel

Emulating AT A12E[01] instructions involves two steps. First, do the
stage-1 translation by reusing the existing AT emulation functions. Then
do the stage-2 translation by walking the guest hypervisor's stage-2
page table in software. Record the translation result to PAR_EL1.

Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---
 arch/arm64/include/asm/kvm_arm.h |  1 +
 arch/arm64/kvm/sys_regs.c        | 99 ++++++++++++++++++++++++++++++++++++++--
 2 files changed, 96 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 3993703..e160895 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -111,6 +111,7 @@
 #define VTCR_EL2_TG0_16K	TCR_TG0_16K
 #define VTCR_EL2_TG0_64K	TCR_TG0_64K
 #define VTCR_EL2_SH0_MASK	TCR_SH0_MASK
+#define VTCR_EL2_SH0_SHIFT	TCR_SH0_SHIFT
 #define VTCR_EL2_SH0_INNER	TCR_SH0_INNER
 #define VTCR_EL2_ORGN0_MASK	TCR_ORGN0_MASK
 #define VTCR_EL2_ORGN0_WBWA	TCR_ORGN0_WBWA
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index cb46db5..7950ee0 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1656,6 +1656,97 @@ static bool handle_s1e2(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 	return true;
 }
 
+static u64 setup_par_aborted(u32 esr)
+{
+	u64 par = 0;
+
+	/* S [9]: fault in the stage 2 translation */
+	par |= (1 << 9);
+	/* FST [6:1]: Fault status code  */
+	par |= (esr << 1);
+	/* F [0]: translation is aborted */
+	par |= 1;
+
+	return par;
+}
+
+static u64 setup_par_completed(struct kvm_vcpu *vcpu, struct kvm_s2_trans *out)
+{
+	u64 par, vtcr_sh0;
+
+	/* F [0]: Translation is completed successfully */
+	par = 0;
+	/* ATTR [63:56] */
+	par |= out->upper_attr;
+	/* PA [47:12] */
+	par |= out->output & GENMASK_ULL(11, 0);
+	/* RES1 [11] */
+	par |= (1UL << 11);
+	/* SH [8:7]: Shareability attribute */
+	vtcr_sh0 = vcpu_sys_reg(vcpu, VTCR_EL2) & VTCR_EL2_SH0_MASK;
+	par |= (vtcr_sh0 >> VTCR_EL2_SH0_SHIFT) << 7;
+
+	return par;
+}
+
+static bool handle_s12(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+		       const struct sys_reg_desc *r, bool write)
+{
+	u64 par, va;
+	u32 esr;
+	phys_addr_t ipa;
+	struct kvm_s2_trans out;
+	int ret;
+
+	/* Do the stage-1 translation */
+	handle_s1e01(vcpu, p, r);
+	par = vcpu_sys_reg(vcpu, PAR_EL1);
+	if (par & 1) {
+		/* The stage-1 translation aborted */
+		return true;
+	}
+
+	/* Do the stage-2 translation */
+	va = p->regval;
+	ipa = (par & GENMASK_ULL(47, 12)) | (va & GENMASK_ULL(11, 0));
+	out.esr = 0;
+	ret = kvm_walk_nested_s2(vcpu, ipa, &out);
+	if (ret < 0)
+		return false;
+
+	/* Check if the stage-2 PTW is aborted */
+	if (out.esr) {
+		esr = out.esr;
+		goto s2_trans_abort;
+	}
+
+	/* Check the access permission */
+	if ((!write && !out.readable) || (write && !out.writable)) {
+		esr = ESR_ELx_FSC_PERM;
+		esr |= out.level & 0x3;
+		goto s2_trans_abort;
+	}
+
+	vcpu_sys_reg(vcpu, PAR_EL1) = setup_par_completed(vcpu, &out);
+	return true;
+
+s2_trans_abort:
+	vcpu_sys_reg(vcpu, PAR_EL1) = setup_par_aborted(esr);
+	return true;
+}
+
+static bool handle_s12r(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+			const struct sys_reg_desc *r)
+{
+	return handle_s12(vcpu, p, r, false);
+}
+
+static bool handle_s12w(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+			const struct sys_reg_desc *r)
+{
+	return handle_s12(vcpu, p, r, true);
+}
+
 /*
  * AT instruction emulation
  *
@@ -1733,10 +1824,10 @@ static bool handle_s1e2(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 	SYS_INSN_TO_DESC(AT_S1E1WP, handle_s1e01, NULL),
 	SYS_INSN_TO_DESC(AT_S1E2R, handle_s1e2, NULL),
 	SYS_INSN_TO_DESC(AT_S1E2W, handle_s1e2, NULL),
-	SYS_INSN_TO_DESC(AT_S12E1R, NULL, NULL),
-	SYS_INSN_TO_DESC(AT_S12E1W, NULL, NULL),
-	SYS_INSN_TO_DESC(AT_S12E0R, NULL, NULL),
-	SYS_INSN_TO_DESC(AT_S12E0W, NULL, NULL),
+	SYS_INSN_TO_DESC(AT_S12E1R, handle_s12r, NULL),
+	SYS_INSN_TO_DESC(AT_S12E1W, handle_s12w, NULL),
+	SYS_INSN_TO_DESC(AT_S12E0R, handle_s12r, NULL),
+	SYS_INSN_TO_DESC(AT_S12E0W, handle_s12w, NULL),
 	SYS_INSN_TO_DESC(TLBI_IPAS2E1IS, NULL, NULL),
 	SYS_INSN_TO_DESC(TLBI_IPAS2LE1IS, NULL, NULL),
 	SYS_INSN_TO_DESC(TLBI_ALLE2IS, NULL, NULL),
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 24/31] KVM: arm64: Emulate TLBI ALLE2(IS) instruction
  2017-10-03  3:10 ` Jintack Lim
  (?)
@ 2017-10-03  3:11   ` Jintack Lim
  -1 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:11 UTC (permalink / raw)
  To: christoffer.dall, marc.zyngier, kvmarm
  Cc: jintack, pbonzini, rkrcmar, catalin.marinas, will.deacon, linux,
	mark.rutland, linux-arm-kernel, kvm, linux-kernel, Jintack Lim

Emulate TLBI ALLE2(IS) instruction executed in the virtual EL2. Since we
emulate the virtual EL2 in the EL1, we invalidate EL1&0 regime stage 1
TLB entries with setting vttbr_el2 having the VMID of the virtual EL2.

Note that we are able to emulate TLBI ALLE2IS precisely by only
invalidating stage 1 TLB entries via TLBI VMALL1IS instruction, but to
make it simeple, we reuse the existing function, __kvm_tlb_flush_vmid(),
which invalidates both of stage 1 and 2 TLB entries.

Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---
 arch/arm64/kvm/sys_regs.c | 35 +++++++++++++++++++++++++++++++++--
 1 file changed, 33 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 7950ee0..90329b7 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1747,6 +1747,37 @@ static bool handle_s12w(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 	return handle_s12(vcpu, p, r, true);
 }
 
+static bool handle_alle2(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+			 const struct sys_reg_desc *r)
+{
+	struct kvm_s2_mmu *mmu = &vcpu->kvm->arch.mmu;
+	u64 vttbr = kvm_get_vttbr(&mmu->el2_vmid, mmu);
+
+	/*
+	 * To emulate invalidating all EL2 regime stage 1 TLB entries,
+	 * invalidate EL1&0 regime stage 1 TLB entries with the virtual EL2's
+	 * VMID.
+	 */
+	kvm_call_hyp(__kvm_tlb_flush_local_vmid, vttbr);
+	return true;
+}
+
+static bool handle_alle2is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+			   const struct sys_reg_desc *r)
+{
+	struct kvm_s2_mmu *mmu = &vcpu->kvm->arch.mmu;
+	u64 vttbr = kvm_get_vttbr(&mmu->el2_vmid, mmu);
+
+	/*
+	 * To emulate invalidating all EL2 regime stage 1 TLB entries for all
+	 * PEs, executing TLBI VMALLE1IS is enough. But reuse the existing
+	 * interface for the simplicity; invalidating stage 2 entries doesn't
+	 * affect the correctness.
+	 */
+	kvm_call_hyp(__kvm_tlb_flush_vmid, vttbr);
+	return true;
+}
+
 /*
  * AT instruction emulation
  *
@@ -1830,14 +1861,14 @@ static bool handle_s12w(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 	SYS_INSN_TO_DESC(AT_S12E0W, handle_s12w, NULL),
 	SYS_INSN_TO_DESC(TLBI_IPAS2E1IS, NULL, NULL),
 	SYS_INSN_TO_DESC(TLBI_IPAS2LE1IS, NULL, NULL),
-	SYS_INSN_TO_DESC(TLBI_ALLE2IS, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_ALLE2IS, handle_alle2is, NULL),
 	SYS_INSN_TO_DESC(TLBI_VAE2IS, NULL, NULL),
 	SYS_INSN_TO_DESC(TLBI_ALLE1IS, NULL, NULL),
 	SYS_INSN_TO_DESC(TLBI_VALE2IS, NULL, NULL),
 	SYS_INSN_TO_DESC(TLBI_VMALLS12E1IS, NULL, NULL),
 	SYS_INSN_TO_DESC(TLBI_IPAS2E1, NULL, NULL),
 	SYS_INSN_TO_DESC(TLBI_IPAS2LE1, NULL, NULL),
-	SYS_INSN_TO_DESC(TLBI_ALLE2, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_ALLE2, handle_alle2, NULL),
 	SYS_INSN_TO_DESC(TLBI_VAE2, NULL, NULL),
 	SYS_INSN_TO_DESC(TLBI_ALLE1, NULL, NULL),
 	SYS_INSN_TO_DESC(TLBI_VALE2, NULL, NULL),
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 24/31] KVM: arm64: Emulate TLBI ALLE2(IS) instruction
@ 2017-10-03  3:11   ` Jintack Lim
  0 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:11 UTC (permalink / raw)
  To: christoffer.dall, marc.zyngier, kvmarm
  Cc: kvm, catalin.marinas, will.deacon, linux, linux-kernel, pbonzini,
	linux-arm-kernel

Emulate TLBI ALLE2(IS) instruction executed in the virtual EL2. Since we
emulate the virtual EL2 in the EL1, we invalidate EL1&0 regime stage 1
TLB entries with setting vttbr_el2 having the VMID of the virtual EL2.

Note that we are able to emulate TLBI ALLE2IS precisely by only
invalidating stage 1 TLB entries via TLBI VMALL1IS instruction, but to
make it simeple, we reuse the existing function, __kvm_tlb_flush_vmid(),
which invalidates both of stage 1 and 2 TLB entries.

Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---
 arch/arm64/kvm/sys_regs.c | 35 +++++++++++++++++++++++++++++++++--
 1 file changed, 33 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 7950ee0..90329b7 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1747,6 +1747,37 @@ static bool handle_s12w(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 	return handle_s12(vcpu, p, r, true);
 }
 
+static bool handle_alle2(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+			 const struct sys_reg_desc *r)
+{
+	struct kvm_s2_mmu *mmu = &vcpu->kvm->arch.mmu;
+	u64 vttbr = kvm_get_vttbr(&mmu->el2_vmid, mmu);
+
+	/*
+	 * To emulate invalidating all EL2 regime stage 1 TLB entries,
+	 * invalidate EL1&0 regime stage 1 TLB entries with the virtual EL2's
+	 * VMID.
+	 */
+	kvm_call_hyp(__kvm_tlb_flush_local_vmid, vttbr);
+	return true;
+}
+
+static bool handle_alle2is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+			   const struct sys_reg_desc *r)
+{
+	struct kvm_s2_mmu *mmu = &vcpu->kvm->arch.mmu;
+	u64 vttbr = kvm_get_vttbr(&mmu->el2_vmid, mmu);
+
+	/*
+	 * To emulate invalidating all EL2 regime stage 1 TLB entries for all
+	 * PEs, executing TLBI VMALLE1IS is enough. But reuse the existing
+	 * interface for the simplicity; invalidating stage 2 entries doesn't
+	 * affect the correctness.
+	 */
+	kvm_call_hyp(__kvm_tlb_flush_vmid, vttbr);
+	return true;
+}
+
 /*
  * AT instruction emulation
  *
@@ -1830,14 +1861,14 @@ static bool handle_s12w(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 	SYS_INSN_TO_DESC(AT_S12E0W, handle_s12w, NULL),
 	SYS_INSN_TO_DESC(TLBI_IPAS2E1IS, NULL, NULL),
 	SYS_INSN_TO_DESC(TLBI_IPAS2LE1IS, NULL, NULL),
-	SYS_INSN_TO_DESC(TLBI_ALLE2IS, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_ALLE2IS, handle_alle2is, NULL),
 	SYS_INSN_TO_DESC(TLBI_VAE2IS, NULL, NULL),
 	SYS_INSN_TO_DESC(TLBI_ALLE1IS, NULL, NULL),
 	SYS_INSN_TO_DESC(TLBI_VALE2IS, NULL, NULL),
 	SYS_INSN_TO_DESC(TLBI_VMALLS12E1IS, NULL, NULL),
 	SYS_INSN_TO_DESC(TLBI_IPAS2E1, NULL, NULL),
 	SYS_INSN_TO_DESC(TLBI_IPAS2LE1, NULL, NULL),
-	SYS_INSN_TO_DESC(TLBI_ALLE2, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_ALLE2, handle_alle2, NULL),
 	SYS_INSN_TO_DESC(TLBI_VAE2, NULL, NULL),
 	SYS_INSN_TO_DESC(TLBI_ALLE1, NULL, NULL),
 	SYS_INSN_TO_DESC(TLBI_VALE2, NULL, NULL),
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 24/31] KVM: arm64: Emulate TLBI ALLE2(IS) instruction
@ 2017-10-03  3:11   ` Jintack Lim
  0 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:11 UTC (permalink / raw)
  To: linux-arm-kernel

Emulate TLBI ALLE2(IS) instruction executed in the virtual EL2. Since we
emulate the virtual EL2 in the EL1, we invalidate EL1&0 regime stage 1
TLB entries with setting vttbr_el2 having the VMID of the virtual EL2.

Note that we are able to emulate TLBI ALLE2IS precisely by only
invalidating stage 1 TLB entries via TLBI VMALL1IS instruction, but to
make it simeple, we reuse the existing function, __kvm_tlb_flush_vmid(),
which invalidates both of stage 1 and 2 TLB entries.

Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---
 arch/arm64/kvm/sys_regs.c | 35 +++++++++++++++++++++++++++++++++--
 1 file changed, 33 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 7950ee0..90329b7 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1747,6 +1747,37 @@ static bool handle_s12w(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 	return handle_s12(vcpu, p, r, true);
 }
 
+static bool handle_alle2(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+			 const struct sys_reg_desc *r)
+{
+	struct kvm_s2_mmu *mmu = &vcpu->kvm->arch.mmu;
+	u64 vttbr = kvm_get_vttbr(&mmu->el2_vmid, mmu);
+
+	/*
+	 * To emulate invalidating all EL2 regime stage 1 TLB entries,
+	 * invalidate EL1&0 regime stage 1 TLB entries with the virtual EL2's
+	 * VMID.
+	 */
+	kvm_call_hyp(__kvm_tlb_flush_local_vmid, vttbr);
+	return true;
+}
+
+static bool handle_alle2is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+			   const struct sys_reg_desc *r)
+{
+	struct kvm_s2_mmu *mmu = &vcpu->kvm->arch.mmu;
+	u64 vttbr = kvm_get_vttbr(&mmu->el2_vmid, mmu);
+
+	/*
+	 * To emulate invalidating all EL2 regime stage 1 TLB entries for all
+	 * PEs, executing TLBI VMALLE1IS is enough. But reuse the existing
+	 * interface for the simplicity; invalidating stage 2 entries doesn't
+	 * affect the correctness.
+	 */
+	kvm_call_hyp(__kvm_tlb_flush_vmid, vttbr);
+	return true;
+}
+
 /*
  * AT instruction emulation
  *
@@ -1830,14 +1861,14 @@ static bool handle_s12w(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 	SYS_INSN_TO_DESC(AT_S12E0W, handle_s12w, NULL),
 	SYS_INSN_TO_DESC(TLBI_IPAS2E1IS, NULL, NULL),
 	SYS_INSN_TO_DESC(TLBI_IPAS2LE1IS, NULL, NULL),
-	SYS_INSN_TO_DESC(TLBI_ALLE2IS, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_ALLE2IS, handle_alle2is, NULL),
 	SYS_INSN_TO_DESC(TLBI_VAE2IS, NULL, NULL),
 	SYS_INSN_TO_DESC(TLBI_ALLE1IS, NULL, NULL),
 	SYS_INSN_TO_DESC(TLBI_VALE2IS, NULL, NULL),
 	SYS_INSN_TO_DESC(TLBI_VMALLS12E1IS, NULL, NULL),
 	SYS_INSN_TO_DESC(TLBI_IPAS2E1, NULL, NULL),
 	SYS_INSN_TO_DESC(TLBI_IPAS2LE1, NULL, NULL),
-	SYS_INSN_TO_DESC(TLBI_ALLE2, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_ALLE2, handle_alle2, NULL),
 	SYS_INSN_TO_DESC(TLBI_VAE2, NULL, NULL),
 	SYS_INSN_TO_DESC(TLBI_ALLE1, NULL, NULL),
 	SYS_INSN_TO_DESC(TLBI_VALE2, NULL, NULL),
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 25/31] KVM: arm64: Emulate TLBI VAE2* instrutions
  2017-10-03  3:10 ` Jintack Lim
  (?)
@ 2017-10-03  3:11   ` Jintack Lim
  -1 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:11 UTC (permalink / raw)
  To: christoffer.dall, marc.zyngier, kvmarm
  Cc: jintack, pbonzini, rkrcmar, catalin.marinas, will.deacon, linux,
	mark.rutland, linux-arm-kernel, kvm, linux-kernel, Jintack Lim

Emulate TLBI VAE2* instruction executed in the virtual EL2. Based on the
same principle as TLBI ALLE2 instruction, we can simply emulate those
instructions by executing corresponding VAE1* instructions with the
virtual EL2's VMID assigned by the host hypervisor.

Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---
 arch/arm64/include/asm/kvm_asm.h |  1 +
 arch/arm64/kvm/hyp/tlb.c         | 28 ++++++++++++++++++++++++++++
 arch/arm64/kvm/sys_regs.c        | 25 +++++++++++++++++++++----
 3 files changed, 50 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 4bded9d..cd7fb85 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -55,6 +55,7 @@
 extern void __kvm_tlb_flush_vmid_ipa(u64 vttbr, phys_addr_t ipa);
 extern void __kvm_tlb_flush_vmid(u64 vttbr);
 extern void __kvm_tlb_flush_local_vmid(u64 vttbr);
+extern void __kvm_tlb_vae2(u64 vttbr, u64 va, u64 sys_encoding);
 
 extern void __kvm_at_insn(struct kvm_vcpu *vcpu, unsigned long vaddr,
 			  bool el2_regime, int sys_encoding);
diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c
index 680b960..bd8b92c 100644
--- a/arch/arm64/kvm/hyp/tlb.c
+++ b/arch/arm64/kvm/hyp/tlb.c
@@ -151,3 +151,31 @@ void __hyp_text __kvm_flush_vm_context(void)
 	asm volatile("ic ialluis" : : );
 	dsb(ish);
 }
+
+void __hyp_text __kvm_tlb_vae2(u64 vttbr, u64 va, u64 sys_encoding)
+{
+	/* Switch to requested VMID */
+	__tlb_switch_to_guest()(vttbr);
+
+	/* Execute the EL1 version of TLBI VAE2* instruction */
+	switch (sys_encoding) {
+	case TLBI_VAE2IS:
+		__tlbi(vae1is, va);
+		break;
+	case TLBI_VALE2IS:
+		__tlbi(vale1is, va);
+		break;
+	case TLBI_VAE2:
+		__tlbi(vae1, va);
+		break;
+	case TLBI_VALE2:
+		__tlbi(vale1, va);
+		break;
+	default:
+		break;
+	}
+	dsb(nsh);
+	isb();
+
+	__tlb_switch_to_host()();
+}
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 90329b7..a1ae8fb 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1778,6 +1778,23 @@ static bool handle_alle2is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 	return true;
 }
 
+static bool handle_vae2(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+		       const struct sys_reg_desc *r)
+{
+	struct kvm_s2_mmu *mmu = &vcpu->kvm->arch.mmu;
+	u64 vttbr = kvm_get_vttbr(&mmu->el2_vmid, mmu);
+	int sys_encoding = sys_insn(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2);
+
+	/*
+	 * Based on the same principle as TLBI ALLE2 instruction emulation, we
+	 * emulate TLBI VAE2* instructions by executing corresponding TLBI VAE1*
+	 * instructions with the virtual EL2's VMID assigned by the host
+	 * hypervisor.
+	 */
+	kvm_call_hyp(__kvm_tlb_vae2, vttbr, p->regval, sys_encoding);
+	return true;
+}
+
 /*
  * AT instruction emulation
  *
@@ -1862,16 +1879,16 @@ static bool handle_alle2is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 	SYS_INSN_TO_DESC(TLBI_IPAS2E1IS, NULL, NULL),
 	SYS_INSN_TO_DESC(TLBI_IPAS2LE1IS, NULL, NULL),
 	SYS_INSN_TO_DESC(TLBI_ALLE2IS, handle_alle2is, NULL),
-	SYS_INSN_TO_DESC(TLBI_VAE2IS, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_VAE2IS, handle_vae2, NULL),
 	SYS_INSN_TO_DESC(TLBI_ALLE1IS, NULL, NULL),
-	SYS_INSN_TO_DESC(TLBI_VALE2IS, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_VALE2IS, handle_vae2, NULL),
 	SYS_INSN_TO_DESC(TLBI_VMALLS12E1IS, NULL, NULL),
 	SYS_INSN_TO_DESC(TLBI_IPAS2E1, NULL, NULL),
 	SYS_INSN_TO_DESC(TLBI_IPAS2LE1, NULL, NULL),
 	SYS_INSN_TO_DESC(TLBI_ALLE2, handle_alle2, NULL),
-	SYS_INSN_TO_DESC(TLBI_VAE2, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_VAE2, handle_vae2, NULL),
 	SYS_INSN_TO_DESC(TLBI_ALLE1, NULL, NULL),
-	SYS_INSN_TO_DESC(TLBI_VALE2, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_VALE2, handle_vae2, NULL),
 	SYS_INSN_TO_DESC(TLBI_VMALLS12E1, NULL, NULL),
 };
 
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 25/31] KVM: arm64: Emulate TLBI VAE2* instrutions
@ 2017-10-03  3:11   ` Jintack Lim
  0 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:11 UTC (permalink / raw)
  To: christoffer.dall, marc.zyngier, kvmarm
  Cc: kvm, catalin.marinas, will.deacon, linux, linux-kernel, pbonzini,
	linux-arm-kernel

Emulate TLBI VAE2* instruction executed in the virtual EL2. Based on the
same principle as TLBI ALLE2 instruction, we can simply emulate those
instructions by executing corresponding VAE1* instructions with the
virtual EL2's VMID assigned by the host hypervisor.

Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---
 arch/arm64/include/asm/kvm_asm.h |  1 +
 arch/arm64/kvm/hyp/tlb.c         | 28 ++++++++++++++++++++++++++++
 arch/arm64/kvm/sys_regs.c        | 25 +++++++++++++++++++++----
 3 files changed, 50 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 4bded9d..cd7fb85 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -55,6 +55,7 @@
 extern void __kvm_tlb_flush_vmid_ipa(u64 vttbr, phys_addr_t ipa);
 extern void __kvm_tlb_flush_vmid(u64 vttbr);
 extern void __kvm_tlb_flush_local_vmid(u64 vttbr);
+extern void __kvm_tlb_vae2(u64 vttbr, u64 va, u64 sys_encoding);
 
 extern void __kvm_at_insn(struct kvm_vcpu *vcpu, unsigned long vaddr,
 			  bool el2_regime, int sys_encoding);
diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c
index 680b960..bd8b92c 100644
--- a/arch/arm64/kvm/hyp/tlb.c
+++ b/arch/arm64/kvm/hyp/tlb.c
@@ -151,3 +151,31 @@ void __hyp_text __kvm_flush_vm_context(void)
 	asm volatile("ic ialluis" : : );
 	dsb(ish);
 }
+
+void __hyp_text __kvm_tlb_vae2(u64 vttbr, u64 va, u64 sys_encoding)
+{
+	/* Switch to requested VMID */
+	__tlb_switch_to_guest()(vttbr);
+
+	/* Execute the EL1 version of TLBI VAE2* instruction */
+	switch (sys_encoding) {
+	case TLBI_VAE2IS:
+		__tlbi(vae1is, va);
+		break;
+	case TLBI_VALE2IS:
+		__tlbi(vale1is, va);
+		break;
+	case TLBI_VAE2:
+		__tlbi(vae1, va);
+		break;
+	case TLBI_VALE2:
+		__tlbi(vale1, va);
+		break;
+	default:
+		break;
+	}
+	dsb(nsh);
+	isb();
+
+	__tlb_switch_to_host()();
+}
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 90329b7..a1ae8fb 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1778,6 +1778,23 @@ static bool handle_alle2is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 	return true;
 }
 
+static bool handle_vae2(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+		       const struct sys_reg_desc *r)
+{
+	struct kvm_s2_mmu *mmu = &vcpu->kvm->arch.mmu;
+	u64 vttbr = kvm_get_vttbr(&mmu->el2_vmid, mmu);
+	int sys_encoding = sys_insn(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2);
+
+	/*
+	 * Based on the same principle as TLBI ALLE2 instruction emulation, we
+	 * emulate TLBI VAE2* instructions by executing corresponding TLBI VAE1*
+	 * instructions with the virtual EL2's VMID assigned by the host
+	 * hypervisor.
+	 */
+	kvm_call_hyp(__kvm_tlb_vae2, vttbr, p->regval, sys_encoding);
+	return true;
+}
+
 /*
  * AT instruction emulation
  *
@@ -1862,16 +1879,16 @@ static bool handle_alle2is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 	SYS_INSN_TO_DESC(TLBI_IPAS2E1IS, NULL, NULL),
 	SYS_INSN_TO_DESC(TLBI_IPAS2LE1IS, NULL, NULL),
 	SYS_INSN_TO_DESC(TLBI_ALLE2IS, handle_alle2is, NULL),
-	SYS_INSN_TO_DESC(TLBI_VAE2IS, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_VAE2IS, handle_vae2, NULL),
 	SYS_INSN_TO_DESC(TLBI_ALLE1IS, NULL, NULL),
-	SYS_INSN_TO_DESC(TLBI_VALE2IS, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_VALE2IS, handle_vae2, NULL),
 	SYS_INSN_TO_DESC(TLBI_VMALLS12E1IS, NULL, NULL),
 	SYS_INSN_TO_DESC(TLBI_IPAS2E1, NULL, NULL),
 	SYS_INSN_TO_DESC(TLBI_IPAS2LE1, NULL, NULL),
 	SYS_INSN_TO_DESC(TLBI_ALLE2, handle_alle2, NULL),
-	SYS_INSN_TO_DESC(TLBI_VAE2, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_VAE2, handle_vae2, NULL),
 	SYS_INSN_TO_DESC(TLBI_ALLE1, NULL, NULL),
-	SYS_INSN_TO_DESC(TLBI_VALE2, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_VALE2, handle_vae2, NULL),
 	SYS_INSN_TO_DESC(TLBI_VMALLS12E1, NULL, NULL),
 };
 
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 25/31] KVM: arm64: Emulate TLBI VAE2* instrutions
@ 2017-10-03  3:11   ` Jintack Lim
  0 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:11 UTC (permalink / raw)
  To: linux-arm-kernel

Emulate TLBI VAE2* instruction executed in the virtual EL2. Based on the
same principle as TLBI ALLE2 instruction, we can simply emulate those
instructions by executing corresponding VAE1* instructions with the
virtual EL2's VMID assigned by the host hypervisor.

Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---
 arch/arm64/include/asm/kvm_asm.h |  1 +
 arch/arm64/kvm/hyp/tlb.c         | 28 ++++++++++++++++++++++++++++
 arch/arm64/kvm/sys_regs.c        | 25 +++++++++++++++++++++----
 3 files changed, 50 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 4bded9d..cd7fb85 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -55,6 +55,7 @@
 extern void __kvm_tlb_flush_vmid_ipa(u64 vttbr, phys_addr_t ipa);
 extern void __kvm_tlb_flush_vmid(u64 vttbr);
 extern void __kvm_tlb_flush_local_vmid(u64 vttbr);
+extern void __kvm_tlb_vae2(u64 vttbr, u64 va, u64 sys_encoding);
 
 extern void __kvm_at_insn(struct kvm_vcpu *vcpu, unsigned long vaddr,
 			  bool el2_regime, int sys_encoding);
diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c
index 680b960..bd8b92c 100644
--- a/arch/arm64/kvm/hyp/tlb.c
+++ b/arch/arm64/kvm/hyp/tlb.c
@@ -151,3 +151,31 @@ void __hyp_text __kvm_flush_vm_context(void)
 	asm volatile("ic ialluis" : : );
 	dsb(ish);
 }
+
+void __hyp_text __kvm_tlb_vae2(u64 vttbr, u64 va, u64 sys_encoding)
+{
+	/* Switch to requested VMID */
+	__tlb_switch_to_guest()(vttbr);
+
+	/* Execute the EL1 version of TLBI VAE2* instruction */
+	switch (sys_encoding) {
+	case TLBI_VAE2IS:
+		__tlbi(vae1is, va);
+		break;
+	case TLBI_VALE2IS:
+		__tlbi(vale1is, va);
+		break;
+	case TLBI_VAE2:
+		__tlbi(vae1, va);
+		break;
+	case TLBI_VALE2:
+		__tlbi(vale1, va);
+		break;
+	default:
+		break;
+	}
+	dsb(nsh);
+	isb();
+
+	__tlb_switch_to_host()();
+}
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 90329b7..a1ae8fb 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1778,6 +1778,23 @@ static bool handle_alle2is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 	return true;
 }
 
+static bool handle_vae2(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+		       const struct sys_reg_desc *r)
+{
+	struct kvm_s2_mmu *mmu = &vcpu->kvm->arch.mmu;
+	u64 vttbr = kvm_get_vttbr(&mmu->el2_vmid, mmu);
+	int sys_encoding = sys_insn(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2);
+
+	/*
+	 * Based on the same principle as TLBI ALLE2 instruction emulation, we
+	 * emulate TLBI VAE2* instructions by executing corresponding TLBI VAE1*
+	 * instructions with the virtual EL2's VMID assigned by the host
+	 * hypervisor.
+	 */
+	kvm_call_hyp(__kvm_tlb_vae2, vttbr, p->regval, sys_encoding);
+	return true;
+}
+
 /*
  * AT instruction emulation
  *
@@ -1862,16 +1879,16 @@ static bool handle_alle2is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 	SYS_INSN_TO_DESC(TLBI_IPAS2E1IS, NULL, NULL),
 	SYS_INSN_TO_DESC(TLBI_IPAS2LE1IS, NULL, NULL),
 	SYS_INSN_TO_DESC(TLBI_ALLE2IS, handle_alle2is, NULL),
-	SYS_INSN_TO_DESC(TLBI_VAE2IS, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_VAE2IS, handle_vae2, NULL),
 	SYS_INSN_TO_DESC(TLBI_ALLE1IS, NULL, NULL),
-	SYS_INSN_TO_DESC(TLBI_VALE2IS, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_VALE2IS, handle_vae2, NULL),
 	SYS_INSN_TO_DESC(TLBI_VMALLS12E1IS, NULL, NULL),
 	SYS_INSN_TO_DESC(TLBI_IPAS2E1, NULL, NULL),
 	SYS_INSN_TO_DESC(TLBI_IPAS2LE1, NULL, NULL),
 	SYS_INSN_TO_DESC(TLBI_ALLE2, handle_alle2, NULL),
-	SYS_INSN_TO_DESC(TLBI_VAE2, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_VAE2, handle_vae2, NULL),
 	SYS_INSN_TO_DESC(TLBI_ALLE1, NULL, NULL),
-	SYS_INSN_TO_DESC(TLBI_VALE2, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_VALE2, handle_vae2, NULL),
 	SYS_INSN_TO_DESC(TLBI_VMALLS12E1, NULL, NULL),
 };
 
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 26/31] KVM: arm64: Emulate TLBI ALLE1(IS)
  2017-10-03  3:10 ` Jintack Lim
  (?)
@ 2017-10-03  3:11   ` Jintack Lim
  -1 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:11 UTC (permalink / raw)
  To: christoffer.dall, marc.zyngier, kvmarm
  Cc: jintack, pbonzini, rkrcmar, catalin.marinas, will.deacon, linux,
	mark.rutland, linux-arm-kernel, kvm, linux-kernel, Jintack Lim

TLBI ALLE1(IS) instruction invalidates all EL1&0 regime stage 1 and 2
TLB entries (on all PEs in the same Inner Shareable domain). To emulate
these instructions, we first need to clear all the mappings in the
shadow page tables since executing those instructions implies the change
of mappings in the stage 2 page tables maintained by the guest
hypervisor.  We then need to invalidate all EL1&0 regime stage 1 and 2
TLB entries of all VMIDs, which are assigned by the host hypervisor, for
this VM.

Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---
 arch/arm64/kvm/sys_regs.c | 29 +++++++++++++++++++++++++++--
 1 file changed, 27 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index a1ae8fb..5a82de9 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1795,6 +1795,31 @@ static bool handle_vae2(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 	return true;
 }
 
+static bool handle_alle1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+			   const struct sys_reg_desc *r)
+{
+	struct kvm_s2_mmu *mmu = &vcpu->kvm->arch.mmu;
+	u64 vttbr = kvm_get_vttbr(&mmu->vmid, mmu);
+
+	if (vcpu->kvm->arch.mmu.vmid.vmid_gen) {
+		/*
+		 * Invalidate the stage 1 and 2 TLB entries for the host OS
+		 * in a VM only if there is one.
+		 */
+		kvm_call_hyp(__kvm_tlb_flush_vmid, vttbr);
+	}
+
+	spin_lock(&vcpu->kvm->mmu_lock);
+	/*
+	 * Clear all mappings in the shadow page tables and invalidate the stage
+	 * 1 and 2 TLB entries via kvm_tlb_flush_vmid_ipa().
+	 */
+	kvm_nested_s2_clear(vcpu->kvm);
+	spin_unlock(&vcpu->kvm->mmu_lock);
+
+	return true;
+}
+
 /*
  * AT instruction emulation
  *
@@ -1880,14 +1905,14 @@ static bool handle_vae2(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 	SYS_INSN_TO_DESC(TLBI_IPAS2LE1IS, NULL, NULL),
 	SYS_INSN_TO_DESC(TLBI_ALLE2IS, handle_alle2is, NULL),
 	SYS_INSN_TO_DESC(TLBI_VAE2IS, handle_vae2, NULL),
-	SYS_INSN_TO_DESC(TLBI_ALLE1IS, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_ALLE1IS, handle_alle1is, NULL),
 	SYS_INSN_TO_DESC(TLBI_VALE2IS, handle_vae2, NULL),
 	SYS_INSN_TO_DESC(TLBI_VMALLS12E1IS, NULL, NULL),
 	SYS_INSN_TO_DESC(TLBI_IPAS2E1, NULL, NULL),
 	SYS_INSN_TO_DESC(TLBI_IPAS2LE1, NULL, NULL),
 	SYS_INSN_TO_DESC(TLBI_ALLE2, handle_alle2, NULL),
 	SYS_INSN_TO_DESC(TLBI_VAE2, handle_vae2, NULL),
-	SYS_INSN_TO_DESC(TLBI_ALLE1, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_ALLE1, handle_alle1is, NULL),
 	SYS_INSN_TO_DESC(TLBI_VALE2, handle_vae2, NULL),
 	SYS_INSN_TO_DESC(TLBI_VMALLS12E1, NULL, NULL),
 };
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 26/31] KVM: arm64: Emulate TLBI ALLE1(IS)
@ 2017-10-03  3:11   ` Jintack Lim
  0 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:11 UTC (permalink / raw)
  To: christoffer.dall, marc.zyngier, kvmarm
  Cc: kvm, catalin.marinas, will.deacon, linux, linux-kernel, pbonzini,
	linux-arm-kernel

TLBI ALLE1(IS) instruction invalidates all EL1&0 regime stage 1 and 2
TLB entries (on all PEs in the same Inner Shareable domain). To emulate
these instructions, we first need to clear all the mappings in the
shadow page tables since executing those instructions implies the change
of mappings in the stage 2 page tables maintained by the guest
hypervisor.  We then need to invalidate all EL1&0 regime stage 1 and 2
TLB entries of all VMIDs, which are assigned by the host hypervisor, for
this VM.

Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---
 arch/arm64/kvm/sys_regs.c | 29 +++++++++++++++++++++++++++--
 1 file changed, 27 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index a1ae8fb..5a82de9 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1795,6 +1795,31 @@ static bool handle_vae2(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 	return true;
 }
 
+static bool handle_alle1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+			   const struct sys_reg_desc *r)
+{
+	struct kvm_s2_mmu *mmu = &vcpu->kvm->arch.mmu;
+	u64 vttbr = kvm_get_vttbr(&mmu->vmid, mmu);
+
+	if (vcpu->kvm->arch.mmu.vmid.vmid_gen) {
+		/*
+		 * Invalidate the stage 1 and 2 TLB entries for the host OS
+		 * in a VM only if there is one.
+		 */
+		kvm_call_hyp(__kvm_tlb_flush_vmid, vttbr);
+	}
+
+	spin_lock(&vcpu->kvm->mmu_lock);
+	/*
+	 * Clear all mappings in the shadow page tables and invalidate the stage
+	 * 1 and 2 TLB entries via kvm_tlb_flush_vmid_ipa().
+	 */
+	kvm_nested_s2_clear(vcpu->kvm);
+	spin_unlock(&vcpu->kvm->mmu_lock);
+
+	return true;
+}
+
 /*
  * AT instruction emulation
  *
@@ -1880,14 +1905,14 @@ static bool handle_vae2(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 	SYS_INSN_TO_DESC(TLBI_IPAS2LE1IS, NULL, NULL),
 	SYS_INSN_TO_DESC(TLBI_ALLE2IS, handle_alle2is, NULL),
 	SYS_INSN_TO_DESC(TLBI_VAE2IS, handle_vae2, NULL),
-	SYS_INSN_TO_DESC(TLBI_ALLE1IS, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_ALLE1IS, handle_alle1is, NULL),
 	SYS_INSN_TO_DESC(TLBI_VALE2IS, handle_vae2, NULL),
 	SYS_INSN_TO_DESC(TLBI_VMALLS12E1IS, NULL, NULL),
 	SYS_INSN_TO_DESC(TLBI_IPAS2E1, NULL, NULL),
 	SYS_INSN_TO_DESC(TLBI_IPAS2LE1, NULL, NULL),
 	SYS_INSN_TO_DESC(TLBI_ALLE2, handle_alle2, NULL),
 	SYS_INSN_TO_DESC(TLBI_VAE2, handle_vae2, NULL),
-	SYS_INSN_TO_DESC(TLBI_ALLE1, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_ALLE1, handle_alle1is, NULL),
 	SYS_INSN_TO_DESC(TLBI_VALE2, handle_vae2, NULL),
 	SYS_INSN_TO_DESC(TLBI_VMALLS12E1, NULL, NULL),
 };
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 26/31] KVM: arm64: Emulate TLBI ALLE1(IS)
@ 2017-10-03  3:11   ` Jintack Lim
  0 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:11 UTC (permalink / raw)
  To: linux-arm-kernel

TLBI ALLE1(IS) instruction invalidates all EL1&0 regime stage 1 and 2
TLB entries (on all PEs in the same Inner Shareable domain). To emulate
these instructions, we first need to clear all the mappings in the
shadow page tables since executing those instructions implies the change
of mappings in the stage 2 page tables maintained by the guest
hypervisor.  We then need to invalidate all EL1&0 regime stage 1 and 2
TLB entries of all VMIDs, which are assigned by the host hypervisor, for
this VM.

Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---
 arch/arm64/kvm/sys_regs.c | 29 +++++++++++++++++++++++++++--
 1 file changed, 27 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index a1ae8fb..5a82de9 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1795,6 +1795,31 @@ static bool handle_vae2(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 	return true;
 }
 
+static bool handle_alle1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+			   const struct sys_reg_desc *r)
+{
+	struct kvm_s2_mmu *mmu = &vcpu->kvm->arch.mmu;
+	u64 vttbr = kvm_get_vttbr(&mmu->vmid, mmu);
+
+	if (vcpu->kvm->arch.mmu.vmid.vmid_gen) {
+		/*
+		 * Invalidate the stage 1 and 2 TLB entries for the host OS
+		 * in a VM only if there is one.
+		 */
+		kvm_call_hyp(__kvm_tlb_flush_vmid, vttbr);
+	}
+
+	spin_lock(&vcpu->kvm->mmu_lock);
+	/*
+	 * Clear all mappings in the shadow page tables and invalidate the stage
+	 * 1 and 2 TLB entries via kvm_tlb_flush_vmid_ipa().
+	 */
+	kvm_nested_s2_clear(vcpu->kvm);
+	spin_unlock(&vcpu->kvm->mmu_lock);
+
+	return true;
+}
+
 /*
  * AT instruction emulation
  *
@@ -1880,14 +1905,14 @@ static bool handle_vae2(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 	SYS_INSN_TO_DESC(TLBI_IPAS2LE1IS, NULL, NULL),
 	SYS_INSN_TO_DESC(TLBI_ALLE2IS, handle_alle2is, NULL),
 	SYS_INSN_TO_DESC(TLBI_VAE2IS, handle_vae2, NULL),
-	SYS_INSN_TO_DESC(TLBI_ALLE1IS, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_ALLE1IS, handle_alle1is, NULL),
 	SYS_INSN_TO_DESC(TLBI_VALE2IS, handle_vae2, NULL),
 	SYS_INSN_TO_DESC(TLBI_VMALLS12E1IS, NULL, NULL),
 	SYS_INSN_TO_DESC(TLBI_IPAS2E1, NULL, NULL),
 	SYS_INSN_TO_DESC(TLBI_IPAS2LE1, NULL, NULL),
 	SYS_INSN_TO_DESC(TLBI_ALLE2, handle_alle2, NULL),
 	SYS_INSN_TO_DESC(TLBI_VAE2, handle_vae2, NULL),
-	SYS_INSN_TO_DESC(TLBI_ALLE1, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_ALLE1, handle_alle1is, NULL),
 	SYS_INSN_TO_DESC(TLBI_VALE2, handle_vae2, NULL),
 	SYS_INSN_TO_DESC(TLBI_VMALLS12E1, NULL, NULL),
 };
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 27/31] KVM: arm64: Emulate TLBI VMALLS12E1(IS) instruction
  2017-10-03  3:10 ` Jintack Lim
  (?)
@ 2017-10-03  3:11   ` Jintack Lim
  -1 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:11 UTC (permalink / raw)
  To: christoffer.dall, marc.zyngier, kvmarm
  Cc: jintack, pbonzini, rkrcmar, catalin.marinas, will.deacon, linux,
	mark.rutland, linux-arm-kernel, kvm, linux-kernel, Jintack Lim

Based on the same principle as TLBI ALLE1(IS) emulation, we clear the
mappings in the shadow stage-2 page tables and invalidate TLB entries.
But this time we do it only for the current VMID from the guest
hypervisor's perspective, not for all VMIDs.

Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---
 arch/arm64/include/asm/kvm_mmu.h |  2 ++
 arch/arm64/kvm/mmu-nested.c      | 23 +++++++++++++++++++++++
 arch/arm64/kvm/sys_regs.c        | 33 +++++++++++++++++++++++++++++++--
 3 files changed, 56 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 239bb89..6681be1 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -345,6 +345,8 @@ int kvm_s2_handle_perm_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 void kvm_nested_s2_clear(struct kvm *kvm);
 void kvm_nested_s2_flush(struct kvm *kvm);
 int kvm_inject_s2_fault(struct kvm_vcpu *vcpu, u64 esr_el2);
+bool kvm_nested_s2_clear_curr_vmid(struct kvm_vcpu *vcpu, phys_addr_t start,
+				   u64 size);
 
 static inline u64 kvm_get_vttbr(struct kvm_s2_vmid *vmid,
 				struct kvm_s2_mmu *mmu)
diff --git a/arch/arm64/kvm/mmu-nested.c b/arch/arm64/kvm/mmu-nested.c
index a440d7b..2189f2b 100644
--- a/arch/arm64/kvm/mmu-nested.c
+++ b/arch/arm64/kvm/mmu-nested.c
@@ -349,6 +349,29 @@ static struct kvm_nested_s2_mmu *lookup_nested_mmu(struct kvm_vcpu *vcpu,
 	return NULL;
 }
 
+/*
+ * Clear mappings in the shadow stage 2 page tables for the current VMID from
+ * the perspective of the guest hypervisor.
+ * This function expects kvm->mmu_lock to be held.
+ */
+bool kvm_nested_s2_clear_curr_vmid(struct kvm_vcpu *vcpu, phys_addr_t start,
+				   u64 size)
+{
+	struct kvm_nested_s2_mmu *nested_mmu;
+	u64 vttbr = vcpu_sys_reg(vcpu, VTTBR_EL2);
+
+	/*
+	 * Look up a mmu that is used for the current VMID from the guest
+	 * hypervisor's view.
+	 */
+	nested_mmu = lookup_nested_mmu(vcpu, vttbr);
+	if (!nested_mmu)
+		return false;
+
+	kvm_unmap_stage2_range(vcpu->kvm, &nested_mmu->mmu, start, size);
+	return true;
+}
+
 /**
  * create_nested_mmu - create mmu for the given virtual VMID
  *
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 5a82de9..5fd47ad 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1820,6 +1820,35 @@ static bool handle_alle1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 	return true;
 }
 
+static bool handle_vmalls12e1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+				const struct sys_reg_desc *r)
+{
+	u64 vttbr;
+	struct kvm_s2_mmu *mmu;
+	bool ret;
+
+	spin_lock(&vcpu->kvm->mmu_lock);
+	/*
+	 * Clear mappings in the shadow page tables and invalidate the stage
+	 * 1 and 2 TLB entries via kvm_tlb_flush_vmid_ipa() for the current
+	 * VMID.
+	 */
+	ret = kvm_nested_s2_clear_curr_vmid(vcpu, 0, KVM_PHYS_SIZE);
+	spin_unlock(&vcpu->kvm->mmu_lock);
+
+	if (!ret) {
+		/*
+		 * Invalidate TLB entries explicitly for the case that the
+		 * current VMID is for the host OS in the VM; we don't manage
+		 * shadow stage 2 page tables for it.
+		 */
+		mmu = &vcpu->kvm->arch.mmu;
+		vttbr = kvm_get_vttbr(&mmu->vmid, mmu);
+		kvm_call_hyp(__kvm_tlb_flush_vmid, vttbr);
+	}
+	return true;
+}
+
 /*
  * AT instruction emulation
  *
@@ -1907,14 +1936,14 @@ static bool handle_alle1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 	SYS_INSN_TO_DESC(TLBI_VAE2IS, handle_vae2, NULL),
 	SYS_INSN_TO_DESC(TLBI_ALLE1IS, handle_alle1is, NULL),
 	SYS_INSN_TO_DESC(TLBI_VALE2IS, handle_vae2, NULL),
-	SYS_INSN_TO_DESC(TLBI_VMALLS12E1IS, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_VMALLS12E1IS, handle_vmalls12e1is, NULL),
 	SYS_INSN_TO_DESC(TLBI_IPAS2E1, NULL, NULL),
 	SYS_INSN_TO_DESC(TLBI_IPAS2LE1, NULL, NULL),
 	SYS_INSN_TO_DESC(TLBI_ALLE2, handle_alle2, NULL),
 	SYS_INSN_TO_DESC(TLBI_VAE2, handle_vae2, NULL),
 	SYS_INSN_TO_DESC(TLBI_ALLE1, handle_alle1is, NULL),
 	SYS_INSN_TO_DESC(TLBI_VALE2, handle_vae2, NULL),
-	SYS_INSN_TO_DESC(TLBI_VMALLS12E1, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_VMALLS12E1, handle_vmalls12e1is, NULL),
 };
 
 #define reg_to_match_value(x)						\
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 27/31] KVM: arm64: Emulate TLBI VMALLS12E1(IS) instruction
@ 2017-10-03  3:11   ` Jintack Lim
  0 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:11 UTC (permalink / raw)
  To: christoffer.dall, marc.zyngier, kvmarm
  Cc: kvm, catalin.marinas, will.deacon, linux, linux-kernel, pbonzini,
	linux-arm-kernel

Based on the same principle as TLBI ALLE1(IS) emulation, we clear the
mappings in the shadow stage-2 page tables and invalidate TLB entries.
But this time we do it only for the current VMID from the guest
hypervisor's perspective, not for all VMIDs.

Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---
 arch/arm64/include/asm/kvm_mmu.h |  2 ++
 arch/arm64/kvm/mmu-nested.c      | 23 +++++++++++++++++++++++
 arch/arm64/kvm/sys_regs.c        | 33 +++++++++++++++++++++++++++++++--
 3 files changed, 56 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 239bb89..6681be1 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -345,6 +345,8 @@ int kvm_s2_handle_perm_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 void kvm_nested_s2_clear(struct kvm *kvm);
 void kvm_nested_s2_flush(struct kvm *kvm);
 int kvm_inject_s2_fault(struct kvm_vcpu *vcpu, u64 esr_el2);
+bool kvm_nested_s2_clear_curr_vmid(struct kvm_vcpu *vcpu, phys_addr_t start,
+				   u64 size);
 
 static inline u64 kvm_get_vttbr(struct kvm_s2_vmid *vmid,
 				struct kvm_s2_mmu *mmu)
diff --git a/arch/arm64/kvm/mmu-nested.c b/arch/arm64/kvm/mmu-nested.c
index a440d7b..2189f2b 100644
--- a/arch/arm64/kvm/mmu-nested.c
+++ b/arch/arm64/kvm/mmu-nested.c
@@ -349,6 +349,29 @@ static struct kvm_nested_s2_mmu *lookup_nested_mmu(struct kvm_vcpu *vcpu,
 	return NULL;
 }
 
+/*
+ * Clear mappings in the shadow stage 2 page tables for the current VMID from
+ * the perspective of the guest hypervisor.
+ * This function expects kvm->mmu_lock to be held.
+ */
+bool kvm_nested_s2_clear_curr_vmid(struct kvm_vcpu *vcpu, phys_addr_t start,
+				   u64 size)
+{
+	struct kvm_nested_s2_mmu *nested_mmu;
+	u64 vttbr = vcpu_sys_reg(vcpu, VTTBR_EL2);
+
+	/*
+	 * Look up a mmu that is used for the current VMID from the guest
+	 * hypervisor's view.
+	 */
+	nested_mmu = lookup_nested_mmu(vcpu, vttbr);
+	if (!nested_mmu)
+		return false;
+
+	kvm_unmap_stage2_range(vcpu->kvm, &nested_mmu->mmu, start, size);
+	return true;
+}
+
 /**
  * create_nested_mmu - create mmu for the given virtual VMID
  *
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 5a82de9..5fd47ad 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1820,6 +1820,35 @@ static bool handle_alle1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 	return true;
 }
 
+static bool handle_vmalls12e1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+				const struct sys_reg_desc *r)
+{
+	u64 vttbr;
+	struct kvm_s2_mmu *mmu;
+	bool ret;
+
+	spin_lock(&vcpu->kvm->mmu_lock);
+	/*
+	 * Clear mappings in the shadow page tables and invalidate the stage
+	 * 1 and 2 TLB entries via kvm_tlb_flush_vmid_ipa() for the current
+	 * VMID.
+	 */
+	ret = kvm_nested_s2_clear_curr_vmid(vcpu, 0, KVM_PHYS_SIZE);
+	spin_unlock(&vcpu->kvm->mmu_lock);
+
+	if (!ret) {
+		/*
+		 * Invalidate TLB entries explicitly for the case that the
+		 * current VMID is for the host OS in the VM; we don't manage
+		 * shadow stage 2 page tables for it.
+		 */
+		mmu = &vcpu->kvm->arch.mmu;
+		vttbr = kvm_get_vttbr(&mmu->vmid, mmu);
+		kvm_call_hyp(__kvm_tlb_flush_vmid, vttbr);
+	}
+	return true;
+}
+
 /*
  * AT instruction emulation
  *
@@ -1907,14 +1936,14 @@ static bool handle_alle1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 	SYS_INSN_TO_DESC(TLBI_VAE2IS, handle_vae2, NULL),
 	SYS_INSN_TO_DESC(TLBI_ALLE1IS, handle_alle1is, NULL),
 	SYS_INSN_TO_DESC(TLBI_VALE2IS, handle_vae2, NULL),
-	SYS_INSN_TO_DESC(TLBI_VMALLS12E1IS, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_VMALLS12E1IS, handle_vmalls12e1is, NULL),
 	SYS_INSN_TO_DESC(TLBI_IPAS2E1, NULL, NULL),
 	SYS_INSN_TO_DESC(TLBI_IPAS2LE1, NULL, NULL),
 	SYS_INSN_TO_DESC(TLBI_ALLE2, handle_alle2, NULL),
 	SYS_INSN_TO_DESC(TLBI_VAE2, handle_vae2, NULL),
 	SYS_INSN_TO_DESC(TLBI_ALLE1, handle_alle1is, NULL),
 	SYS_INSN_TO_DESC(TLBI_VALE2, handle_vae2, NULL),
-	SYS_INSN_TO_DESC(TLBI_VMALLS12E1, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_VMALLS12E1, handle_vmalls12e1is, NULL),
 };
 
 #define reg_to_match_value(x)						\
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 27/31] KVM: arm64: Emulate TLBI VMALLS12E1(IS) instruction
@ 2017-10-03  3:11   ` Jintack Lim
  0 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:11 UTC (permalink / raw)
  To: linux-arm-kernel

Based on the same principle as TLBI ALLE1(IS) emulation, we clear the
mappings in the shadow stage-2 page tables and invalidate TLB entries.
But this time we do it only for the current VMID from the guest
hypervisor's perspective, not for all VMIDs.

Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---
 arch/arm64/include/asm/kvm_mmu.h |  2 ++
 arch/arm64/kvm/mmu-nested.c      | 23 +++++++++++++++++++++++
 arch/arm64/kvm/sys_regs.c        | 33 +++++++++++++++++++++++++++++++--
 3 files changed, 56 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 239bb89..6681be1 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -345,6 +345,8 @@ int kvm_s2_handle_perm_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 void kvm_nested_s2_clear(struct kvm *kvm);
 void kvm_nested_s2_flush(struct kvm *kvm);
 int kvm_inject_s2_fault(struct kvm_vcpu *vcpu, u64 esr_el2);
+bool kvm_nested_s2_clear_curr_vmid(struct kvm_vcpu *vcpu, phys_addr_t start,
+				   u64 size);
 
 static inline u64 kvm_get_vttbr(struct kvm_s2_vmid *vmid,
 				struct kvm_s2_mmu *mmu)
diff --git a/arch/arm64/kvm/mmu-nested.c b/arch/arm64/kvm/mmu-nested.c
index a440d7b..2189f2b 100644
--- a/arch/arm64/kvm/mmu-nested.c
+++ b/arch/arm64/kvm/mmu-nested.c
@@ -349,6 +349,29 @@ static struct kvm_nested_s2_mmu *lookup_nested_mmu(struct kvm_vcpu *vcpu,
 	return NULL;
 }
 
+/*
+ * Clear mappings in the shadow stage 2 page tables for the current VMID from
+ * the perspective of the guest hypervisor.
+ * This function expects kvm->mmu_lock to be held.
+ */
+bool kvm_nested_s2_clear_curr_vmid(struct kvm_vcpu *vcpu, phys_addr_t start,
+				   u64 size)
+{
+	struct kvm_nested_s2_mmu *nested_mmu;
+	u64 vttbr = vcpu_sys_reg(vcpu, VTTBR_EL2);
+
+	/*
+	 * Look up a mmu that is used for the current VMID from the guest
+	 * hypervisor's view.
+	 */
+	nested_mmu = lookup_nested_mmu(vcpu, vttbr);
+	if (!nested_mmu)
+		return false;
+
+	kvm_unmap_stage2_range(vcpu->kvm, &nested_mmu->mmu, start, size);
+	return true;
+}
+
 /**
  * create_nested_mmu - create mmu for the given virtual VMID
  *
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 5a82de9..5fd47ad 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1820,6 +1820,35 @@ static bool handle_alle1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 	return true;
 }
 
+static bool handle_vmalls12e1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+				const struct sys_reg_desc *r)
+{
+	u64 vttbr;
+	struct kvm_s2_mmu *mmu;
+	bool ret;
+
+	spin_lock(&vcpu->kvm->mmu_lock);
+	/*
+	 * Clear mappings in the shadow page tables and invalidate the stage
+	 * 1 and 2 TLB entries via kvm_tlb_flush_vmid_ipa() for the current
+	 * VMID.
+	 */
+	ret = kvm_nested_s2_clear_curr_vmid(vcpu, 0, KVM_PHYS_SIZE);
+	spin_unlock(&vcpu->kvm->mmu_lock);
+
+	if (!ret) {
+		/*
+		 * Invalidate TLB entries explicitly for the case that the
+		 * current VMID is for the host OS in the VM; we don't manage
+		 * shadow stage 2 page tables for it.
+		 */
+		mmu = &vcpu->kvm->arch.mmu;
+		vttbr = kvm_get_vttbr(&mmu->vmid, mmu);
+		kvm_call_hyp(__kvm_tlb_flush_vmid, vttbr);
+	}
+	return true;
+}
+
 /*
  * AT instruction emulation
  *
@@ -1907,14 +1936,14 @@ static bool handle_alle1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 	SYS_INSN_TO_DESC(TLBI_VAE2IS, handle_vae2, NULL),
 	SYS_INSN_TO_DESC(TLBI_ALLE1IS, handle_alle1is, NULL),
 	SYS_INSN_TO_DESC(TLBI_VALE2IS, handle_vae2, NULL),
-	SYS_INSN_TO_DESC(TLBI_VMALLS12E1IS, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_VMALLS12E1IS, handle_vmalls12e1is, NULL),
 	SYS_INSN_TO_DESC(TLBI_IPAS2E1, NULL, NULL),
 	SYS_INSN_TO_DESC(TLBI_IPAS2LE1, NULL, NULL),
 	SYS_INSN_TO_DESC(TLBI_ALLE2, handle_alle2, NULL),
 	SYS_INSN_TO_DESC(TLBI_VAE2, handle_vae2, NULL),
 	SYS_INSN_TO_DESC(TLBI_ALLE1, handle_alle1is, NULL),
 	SYS_INSN_TO_DESC(TLBI_VALE2, handle_vae2, NULL),
-	SYS_INSN_TO_DESC(TLBI_VMALLS12E1, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_VMALLS12E1, handle_vmalls12e1is, NULL),
 };
 
 #define reg_to_match_value(x)						\
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 28/31] KVM: arm64: Emulate TLBI IPAS2E1* instructions
  2017-10-03  3:10 ` Jintack Lim
  (?)
@ 2017-10-03  3:11   ` Jintack Lim
  -1 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:11 UTC (permalink / raw)
  To: christoffer.dall, marc.zyngier, kvmarm
  Cc: jintack, pbonzini, rkrcmar, catalin.marinas, will.deacon, linux,
	mark.rutland, linux-arm-kernel, kvm, linux-kernel, Jintack Lim

Based on the same principle as TLBI ALLE1(IS) and TLBI VMALLS12E1(IS)
emulation, we clear the mappings in the shadow stage-2 page tables and
invalidate TLB entries. We do it only for one mapping for the current
VMID from the guest hypervisor's view.

Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---
 arch/arm64/kvm/sys_regs.c | 38 ++++++++++++++++++++++++++++++++++----
 1 file changed, 34 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 5fd47ad..eb91f00 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1849,6 +1849,36 @@ static bool handle_vmalls12e1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 	return true;
 }
 
+static bool handle_ipas2e1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+			     const struct sys_reg_desc *r)
+{
+	u64 vttbr;
+	struct kvm_s2_mmu *mmu;
+	bool ret;
+
+	spin_lock(&vcpu->kvm->mmu_lock);
+	/*
+	 * Clear a mapping in the shadow page tables and invalidate the stage
+	 * 2 TLB entries via kvm_tlb_flush_vmid_ipa() for the current
+	 * VMID and the given ipa.
+	 */
+	ret = kvm_nested_s2_clear_curr_vmid(vcpu, p->regval, PAGE_SIZE);
+	spin_unlock(&vcpu->kvm->mmu_lock);
+
+	if (!ret) {
+		/*
+		 * Invalidate TLB entries explicitly for the case that the
+		 * current VMID is for the host OS in the VM; we don't manage
+		 * shadow stage 2 page tables for it.
+		 */
+		mmu = &vcpu->kvm->arch.mmu;
+		vttbr = kvm_get_vttbr(&mmu->vmid, mmu);
+		kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, vttbr, p->regval);
+	}
+
+	return true;
+}
+
 /*
  * AT instruction emulation
  *
@@ -1930,15 +1960,15 @@ static bool handle_vmalls12e1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 	SYS_INSN_TO_DESC(AT_S12E1W, handle_s12w, NULL),
 	SYS_INSN_TO_DESC(AT_S12E0R, handle_s12r, NULL),
 	SYS_INSN_TO_DESC(AT_S12E0W, handle_s12w, NULL),
-	SYS_INSN_TO_DESC(TLBI_IPAS2E1IS, NULL, NULL),
-	SYS_INSN_TO_DESC(TLBI_IPAS2LE1IS, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_IPAS2E1IS, handle_ipas2e1is, NULL),
+	SYS_INSN_TO_DESC(TLBI_IPAS2LE1IS, handle_ipas2e1is, NULL),
 	SYS_INSN_TO_DESC(TLBI_ALLE2IS, handle_alle2is, NULL),
 	SYS_INSN_TO_DESC(TLBI_VAE2IS, handle_vae2, NULL),
 	SYS_INSN_TO_DESC(TLBI_ALLE1IS, handle_alle1is, NULL),
 	SYS_INSN_TO_DESC(TLBI_VALE2IS, handle_vae2, NULL),
 	SYS_INSN_TO_DESC(TLBI_VMALLS12E1IS, handle_vmalls12e1is, NULL),
-	SYS_INSN_TO_DESC(TLBI_IPAS2E1, NULL, NULL),
-	SYS_INSN_TO_DESC(TLBI_IPAS2LE1, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_IPAS2E1, handle_ipas2e1is, NULL),
+	SYS_INSN_TO_DESC(TLBI_IPAS2LE1, handle_ipas2e1is, NULL),
 	SYS_INSN_TO_DESC(TLBI_ALLE2, handle_alle2, NULL),
 	SYS_INSN_TO_DESC(TLBI_VAE2, handle_vae2, NULL),
 	SYS_INSN_TO_DESC(TLBI_ALLE1, handle_alle1is, NULL),
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 28/31] KVM: arm64: Emulate TLBI IPAS2E1* instructions
@ 2017-10-03  3:11   ` Jintack Lim
  0 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:11 UTC (permalink / raw)
  To: christoffer.dall, marc.zyngier, kvmarm
  Cc: kvm, catalin.marinas, will.deacon, linux, linux-kernel, pbonzini,
	linux-arm-kernel

Based on the same principle as TLBI ALLE1(IS) and TLBI VMALLS12E1(IS)
emulation, we clear the mappings in the shadow stage-2 page tables and
invalidate TLB entries. We do it only for one mapping for the current
VMID from the guest hypervisor's view.

Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---
 arch/arm64/kvm/sys_regs.c | 38 ++++++++++++++++++++++++++++++++++----
 1 file changed, 34 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 5fd47ad..eb91f00 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1849,6 +1849,36 @@ static bool handle_vmalls12e1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 	return true;
 }
 
+static bool handle_ipas2e1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+			     const struct sys_reg_desc *r)
+{
+	u64 vttbr;
+	struct kvm_s2_mmu *mmu;
+	bool ret;
+
+	spin_lock(&vcpu->kvm->mmu_lock);
+	/*
+	 * Clear a mapping in the shadow page tables and invalidate the stage
+	 * 2 TLB entries via kvm_tlb_flush_vmid_ipa() for the current
+	 * VMID and the given ipa.
+	 */
+	ret = kvm_nested_s2_clear_curr_vmid(vcpu, p->regval, PAGE_SIZE);
+	spin_unlock(&vcpu->kvm->mmu_lock);
+
+	if (!ret) {
+		/*
+		 * Invalidate TLB entries explicitly for the case that the
+		 * current VMID is for the host OS in the VM; we don't manage
+		 * shadow stage 2 page tables for it.
+		 */
+		mmu = &vcpu->kvm->arch.mmu;
+		vttbr = kvm_get_vttbr(&mmu->vmid, mmu);
+		kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, vttbr, p->regval);
+	}
+
+	return true;
+}
+
 /*
  * AT instruction emulation
  *
@@ -1930,15 +1960,15 @@ static bool handle_vmalls12e1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 	SYS_INSN_TO_DESC(AT_S12E1W, handle_s12w, NULL),
 	SYS_INSN_TO_DESC(AT_S12E0R, handle_s12r, NULL),
 	SYS_INSN_TO_DESC(AT_S12E0W, handle_s12w, NULL),
-	SYS_INSN_TO_DESC(TLBI_IPAS2E1IS, NULL, NULL),
-	SYS_INSN_TO_DESC(TLBI_IPAS2LE1IS, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_IPAS2E1IS, handle_ipas2e1is, NULL),
+	SYS_INSN_TO_DESC(TLBI_IPAS2LE1IS, handle_ipas2e1is, NULL),
 	SYS_INSN_TO_DESC(TLBI_ALLE2IS, handle_alle2is, NULL),
 	SYS_INSN_TO_DESC(TLBI_VAE2IS, handle_vae2, NULL),
 	SYS_INSN_TO_DESC(TLBI_ALLE1IS, handle_alle1is, NULL),
 	SYS_INSN_TO_DESC(TLBI_VALE2IS, handle_vae2, NULL),
 	SYS_INSN_TO_DESC(TLBI_VMALLS12E1IS, handle_vmalls12e1is, NULL),
-	SYS_INSN_TO_DESC(TLBI_IPAS2E1, NULL, NULL),
-	SYS_INSN_TO_DESC(TLBI_IPAS2LE1, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_IPAS2E1, handle_ipas2e1is, NULL),
+	SYS_INSN_TO_DESC(TLBI_IPAS2LE1, handle_ipas2e1is, NULL),
 	SYS_INSN_TO_DESC(TLBI_ALLE2, handle_alle2, NULL),
 	SYS_INSN_TO_DESC(TLBI_VAE2, handle_vae2, NULL),
 	SYS_INSN_TO_DESC(TLBI_ALLE1, handle_alle1is, NULL),
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 28/31] KVM: arm64: Emulate TLBI IPAS2E1* instructions
@ 2017-10-03  3:11   ` Jintack Lim
  0 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:11 UTC (permalink / raw)
  To: linux-arm-kernel

Based on the same principle as TLBI ALLE1(IS) and TLBI VMALLS12E1(IS)
emulation, we clear the mappings in the shadow stage-2 page tables and
invalidate TLB entries. We do it only for one mapping for the current
VMID from the guest hypervisor's view.

Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---
 arch/arm64/kvm/sys_regs.c | 38 ++++++++++++++++++++++++++++++++++----
 1 file changed, 34 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 5fd47ad..eb91f00 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1849,6 +1849,36 @@ static bool handle_vmalls12e1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 	return true;
 }
 
+static bool handle_ipas2e1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+			     const struct sys_reg_desc *r)
+{
+	u64 vttbr;
+	struct kvm_s2_mmu *mmu;
+	bool ret;
+
+	spin_lock(&vcpu->kvm->mmu_lock);
+	/*
+	 * Clear a mapping in the shadow page tables and invalidate the stage
+	 * 2 TLB entries via kvm_tlb_flush_vmid_ipa() for the current
+	 * VMID and the given ipa.
+	 */
+	ret = kvm_nested_s2_clear_curr_vmid(vcpu, p->regval, PAGE_SIZE);
+	spin_unlock(&vcpu->kvm->mmu_lock);
+
+	if (!ret) {
+		/*
+		 * Invalidate TLB entries explicitly for the case that the
+		 * current VMID is for the host OS in the VM; we don't manage
+		 * shadow stage 2 page tables for it.
+		 */
+		mmu = &vcpu->kvm->arch.mmu;
+		vttbr = kvm_get_vttbr(&mmu->vmid, mmu);
+		kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, vttbr, p->regval);
+	}
+
+	return true;
+}
+
 /*
  * AT instruction emulation
  *
@@ -1930,15 +1960,15 @@ static bool handle_vmalls12e1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 	SYS_INSN_TO_DESC(AT_S12E1W, handle_s12w, NULL),
 	SYS_INSN_TO_DESC(AT_S12E0R, handle_s12r, NULL),
 	SYS_INSN_TO_DESC(AT_S12E0W, handle_s12w, NULL),
-	SYS_INSN_TO_DESC(TLBI_IPAS2E1IS, NULL, NULL),
-	SYS_INSN_TO_DESC(TLBI_IPAS2LE1IS, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_IPAS2E1IS, handle_ipas2e1is, NULL),
+	SYS_INSN_TO_DESC(TLBI_IPAS2LE1IS, handle_ipas2e1is, NULL),
 	SYS_INSN_TO_DESC(TLBI_ALLE2IS, handle_alle2is, NULL),
 	SYS_INSN_TO_DESC(TLBI_VAE2IS, handle_vae2, NULL),
 	SYS_INSN_TO_DESC(TLBI_ALLE1IS, handle_alle1is, NULL),
 	SYS_INSN_TO_DESC(TLBI_VALE2IS, handle_vae2, NULL),
 	SYS_INSN_TO_DESC(TLBI_VMALLS12E1IS, handle_vmalls12e1is, NULL),
-	SYS_INSN_TO_DESC(TLBI_IPAS2E1, NULL, NULL),
-	SYS_INSN_TO_DESC(TLBI_IPAS2LE1, NULL, NULL),
+	SYS_INSN_TO_DESC(TLBI_IPAS2E1, handle_ipas2e1is, NULL),
+	SYS_INSN_TO_DESC(TLBI_IPAS2LE1, handle_ipas2e1is, NULL),
 	SYS_INSN_TO_DESC(TLBI_ALLE2, handle_alle2, NULL),
 	SYS_INSN_TO_DESC(TLBI_VAE2, handle_vae2, NULL),
 	SYS_INSN_TO_DESC(TLBI_ALLE1, handle_alle1is, NULL),
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 29/31] KVM: arm64: Respect the virtual HCR_EL2.AT and NV setting
  2017-10-03  3:10 ` Jintack Lim
  (?)
@ 2017-10-03  3:11   ` Jintack Lim
  -1 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:11 UTC (permalink / raw)
  To: christoffer.dall, marc.zyngier, kvmarm
  Cc: jintack, pbonzini, rkrcmar, catalin.marinas, will.deacon, linux,
	mark.rutland, linux-arm-kernel, kvm, linux-kernel, Jintack Lim

Forward system instruction traps to the virtual EL2 if a corresponding
bit in the virtual HCR_EL2 is set.

Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---

Notes:
    v1-->v2:
    This is a new commit.  We can rework existing forward_nv_traps() and
    forward_nv1_traps() defined in rfc-v2 cpu patches to reuse forward_traps()
    function

 arch/arm64/include/asm/kvm_arm.h |  1 +
 arch/arm64/kvm/sys_regs.c        | 69 +++++++++++++++++++++++++---------------
 2 files changed, 44 insertions(+), 26 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index e160895..925edfd 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -23,6 +23,7 @@
 #include <asm/types.h>
 
 /* Hyp Configuration Register (HCR) bits */
+#define HCR_AT		(UL(1) << 44)
 #define HCR_NV1		(UL(1) << 43)
 #define HCR_NV		(UL(1) << 42)
 #define HCR_E2H		(UL(1) << 34)
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index eb91f00..89e73af 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -966,6 +966,23 @@ static bool access_cntp_cval(struct kvm_vcpu *vcpu,
 	return true;
 }
 
+static bool forward_traps(struct kvm_vcpu *vcpu, u64 control_bit)
+{
+	bool control_bit_set;
+
+	control_bit_set = vcpu_sys_reg(vcpu, HCR_EL2) & control_bit;
+	if (!vcpu_mode_el2(vcpu) && control_bit_set) {
+		kvm_inject_nested_sync(vcpu, kvm_vcpu_get_hsr(vcpu));
+		return true;
+	}
+	return false;
+}
+
+static bool forward_at_traps(struct kvm_vcpu *vcpu)
+{
+	return forward_traps(vcpu, HCR_AT);
+}
+
 /* This function is to support the recursive nested virtualization */
 bool forward_nv_traps(struct kvm_vcpu *vcpu)
 {
@@ -1948,32 +1965,32 @@ static bool handle_ipas2e1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 #define SYS_INSN_TO_DESC(insn, access_fn, forward_fn)	\
 	{ SYS_DESC((insn)), (access_fn), NULL, 0, 0, NULL, NULL, (forward_fn) }
 static struct sys_reg_desc sys_insn_descs[] = {
-	SYS_INSN_TO_DESC(AT_S1E1R, handle_s1e01, NULL),
-	SYS_INSN_TO_DESC(AT_S1E1W, handle_s1e01, NULL),
-	SYS_INSN_TO_DESC(AT_S1E0R, handle_s1e01, NULL),
-	SYS_INSN_TO_DESC(AT_S1E0W, handle_s1e01, NULL),
-	SYS_INSN_TO_DESC(AT_S1E1RP, handle_s1e01, NULL),
-	SYS_INSN_TO_DESC(AT_S1E1WP, handle_s1e01, NULL),
-	SYS_INSN_TO_DESC(AT_S1E2R, handle_s1e2, NULL),
-	SYS_INSN_TO_DESC(AT_S1E2W, handle_s1e2, NULL),
-	SYS_INSN_TO_DESC(AT_S12E1R, handle_s12r, NULL),
-	SYS_INSN_TO_DESC(AT_S12E1W, handle_s12w, NULL),
-	SYS_INSN_TO_DESC(AT_S12E0R, handle_s12r, NULL),
-	SYS_INSN_TO_DESC(AT_S12E0W, handle_s12w, NULL),
-	SYS_INSN_TO_DESC(TLBI_IPAS2E1IS, handle_ipas2e1is, NULL),
-	SYS_INSN_TO_DESC(TLBI_IPAS2LE1IS, handle_ipas2e1is, NULL),
-	SYS_INSN_TO_DESC(TLBI_ALLE2IS, handle_alle2is, NULL),
-	SYS_INSN_TO_DESC(TLBI_VAE2IS, handle_vae2, NULL),
-	SYS_INSN_TO_DESC(TLBI_ALLE1IS, handle_alle1is, NULL),
-	SYS_INSN_TO_DESC(TLBI_VALE2IS, handle_vae2, NULL),
-	SYS_INSN_TO_DESC(TLBI_VMALLS12E1IS, handle_vmalls12e1is, NULL),
-	SYS_INSN_TO_DESC(TLBI_IPAS2E1, handle_ipas2e1is, NULL),
-	SYS_INSN_TO_DESC(TLBI_IPAS2LE1, handle_ipas2e1is, NULL),
-	SYS_INSN_TO_DESC(TLBI_ALLE2, handle_alle2, NULL),
-	SYS_INSN_TO_DESC(TLBI_VAE2, handle_vae2, NULL),
-	SYS_INSN_TO_DESC(TLBI_ALLE1, handle_alle1is, NULL),
-	SYS_INSN_TO_DESC(TLBI_VALE2, handle_vae2, NULL),
-	SYS_INSN_TO_DESC(TLBI_VMALLS12E1, handle_vmalls12e1is, NULL),
+	SYS_INSN_TO_DESC(AT_S1E1R, handle_s1e01, forward_at_traps),
+	SYS_INSN_TO_DESC(AT_S1E1W, handle_s1e01, forward_at_traps),
+	SYS_INSN_TO_DESC(AT_S1E0R, handle_s1e01, forward_at_traps),
+	SYS_INSN_TO_DESC(AT_S1E0W, handle_s1e01, forward_at_traps),
+	SYS_INSN_TO_DESC(AT_S1E1RP, handle_s1e01, forward_at_traps),
+	SYS_INSN_TO_DESC(AT_S1E1WP, handle_s1e01, forward_at_traps),
+	SYS_INSN_TO_DESC(AT_S1E2R, handle_s1e2, forward_nv_traps),
+	SYS_INSN_TO_DESC(AT_S1E2W, handle_s1e2, forward_nv_traps),
+	SYS_INSN_TO_DESC(AT_S12E1R, handle_s12r, forward_nv_traps),
+	SYS_INSN_TO_DESC(AT_S12E1W, handle_s12w, forward_nv_traps),
+	SYS_INSN_TO_DESC(AT_S12E0R, handle_s12r, forward_nv_traps),
+	SYS_INSN_TO_DESC(AT_S12E0W, handle_s12w, forward_nv_traps),
+	SYS_INSN_TO_DESC(TLBI_IPAS2E1IS, handle_ipas2e1is, forward_nv_traps),
+	SYS_INSN_TO_DESC(TLBI_IPAS2LE1IS, handle_ipas2e1is, forward_nv_traps),
+	SYS_INSN_TO_DESC(TLBI_ALLE2IS, handle_alle2is, forward_nv_traps),
+	SYS_INSN_TO_DESC(TLBI_VAE2IS, handle_vae2, forward_nv_traps),
+	SYS_INSN_TO_DESC(TLBI_ALLE1IS, handle_alle1is, forward_nv_traps),
+	SYS_INSN_TO_DESC(TLBI_VALE2IS, handle_vae2, forward_nv_traps),
+	SYS_INSN_TO_DESC(TLBI_VMALLS12E1IS, handle_vmalls12e1is, forward_nv_traps),
+	SYS_INSN_TO_DESC(TLBI_IPAS2E1, handle_ipas2e1is, forward_nv_traps),
+	SYS_INSN_TO_DESC(TLBI_IPAS2LE1, handle_ipas2e1is, forward_nv_traps),
+	SYS_INSN_TO_DESC(TLBI_ALLE2, handle_alle2, forward_nv_traps),
+	SYS_INSN_TO_DESC(TLBI_VAE2, handle_vae2, forward_nv_traps),
+	SYS_INSN_TO_DESC(TLBI_ALLE1, handle_alle1is, forward_nv_traps),
+	SYS_INSN_TO_DESC(TLBI_VALE2, handle_vae2, forward_nv_traps),
+	SYS_INSN_TO_DESC(TLBI_VMALLS12E1, handle_vmalls12e1is, forward_nv_traps),
 };
 
 #define reg_to_match_value(x)						\
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 29/31] KVM: arm64: Respect the virtual HCR_EL2.AT and NV setting
@ 2017-10-03  3:11   ` Jintack Lim
  0 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:11 UTC (permalink / raw)
  To: christoffer.dall, marc.zyngier, kvmarm
  Cc: kvm, catalin.marinas, will.deacon, linux, linux-kernel, pbonzini,
	linux-arm-kernel

Forward system instruction traps to the virtual EL2 if a corresponding
bit in the virtual HCR_EL2 is set.

Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---

Notes:
    v1-->v2:
    This is a new commit.  We can rework existing forward_nv_traps() and
    forward_nv1_traps() defined in rfc-v2 cpu patches to reuse forward_traps()
    function

 arch/arm64/include/asm/kvm_arm.h |  1 +
 arch/arm64/kvm/sys_regs.c        | 69 +++++++++++++++++++++++++---------------
 2 files changed, 44 insertions(+), 26 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index e160895..925edfd 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -23,6 +23,7 @@
 #include <asm/types.h>
 
 /* Hyp Configuration Register (HCR) bits */
+#define HCR_AT		(UL(1) << 44)
 #define HCR_NV1		(UL(1) << 43)
 #define HCR_NV		(UL(1) << 42)
 #define HCR_E2H		(UL(1) << 34)
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index eb91f00..89e73af 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -966,6 +966,23 @@ static bool access_cntp_cval(struct kvm_vcpu *vcpu,
 	return true;
 }
 
+static bool forward_traps(struct kvm_vcpu *vcpu, u64 control_bit)
+{
+	bool control_bit_set;
+
+	control_bit_set = vcpu_sys_reg(vcpu, HCR_EL2) & control_bit;
+	if (!vcpu_mode_el2(vcpu) && control_bit_set) {
+		kvm_inject_nested_sync(vcpu, kvm_vcpu_get_hsr(vcpu));
+		return true;
+	}
+	return false;
+}
+
+static bool forward_at_traps(struct kvm_vcpu *vcpu)
+{
+	return forward_traps(vcpu, HCR_AT);
+}
+
 /* This function is to support the recursive nested virtualization */
 bool forward_nv_traps(struct kvm_vcpu *vcpu)
 {
@@ -1948,32 +1965,32 @@ static bool handle_ipas2e1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 #define SYS_INSN_TO_DESC(insn, access_fn, forward_fn)	\
 	{ SYS_DESC((insn)), (access_fn), NULL, 0, 0, NULL, NULL, (forward_fn) }
 static struct sys_reg_desc sys_insn_descs[] = {
-	SYS_INSN_TO_DESC(AT_S1E1R, handle_s1e01, NULL),
-	SYS_INSN_TO_DESC(AT_S1E1W, handle_s1e01, NULL),
-	SYS_INSN_TO_DESC(AT_S1E0R, handle_s1e01, NULL),
-	SYS_INSN_TO_DESC(AT_S1E0W, handle_s1e01, NULL),
-	SYS_INSN_TO_DESC(AT_S1E1RP, handle_s1e01, NULL),
-	SYS_INSN_TO_DESC(AT_S1E1WP, handle_s1e01, NULL),
-	SYS_INSN_TO_DESC(AT_S1E2R, handle_s1e2, NULL),
-	SYS_INSN_TO_DESC(AT_S1E2W, handle_s1e2, NULL),
-	SYS_INSN_TO_DESC(AT_S12E1R, handle_s12r, NULL),
-	SYS_INSN_TO_DESC(AT_S12E1W, handle_s12w, NULL),
-	SYS_INSN_TO_DESC(AT_S12E0R, handle_s12r, NULL),
-	SYS_INSN_TO_DESC(AT_S12E0W, handle_s12w, NULL),
-	SYS_INSN_TO_DESC(TLBI_IPAS2E1IS, handle_ipas2e1is, NULL),
-	SYS_INSN_TO_DESC(TLBI_IPAS2LE1IS, handle_ipas2e1is, NULL),
-	SYS_INSN_TO_DESC(TLBI_ALLE2IS, handle_alle2is, NULL),
-	SYS_INSN_TO_DESC(TLBI_VAE2IS, handle_vae2, NULL),
-	SYS_INSN_TO_DESC(TLBI_ALLE1IS, handle_alle1is, NULL),
-	SYS_INSN_TO_DESC(TLBI_VALE2IS, handle_vae2, NULL),
-	SYS_INSN_TO_DESC(TLBI_VMALLS12E1IS, handle_vmalls12e1is, NULL),
-	SYS_INSN_TO_DESC(TLBI_IPAS2E1, handle_ipas2e1is, NULL),
-	SYS_INSN_TO_DESC(TLBI_IPAS2LE1, handle_ipas2e1is, NULL),
-	SYS_INSN_TO_DESC(TLBI_ALLE2, handle_alle2, NULL),
-	SYS_INSN_TO_DESC(TLBI_VAE2, handle_vae2, NULL),
-	SYS_INSN_TO_DESC(TLBI_ALLE1, handle_alle1is, NULL),
-	SYS_INSN_TO_DESC(TLBI_VALE2, handle_vae2, NULL),
-	SYS_INSN_TO_DESC(TLBI_VMALLS12E1, handle_vmalls12e1is, NULL),
+	SYS_INSN_TO_DESC(AT_S1E1R, handle_s1e01, forward_at_traps),
+	SYS_INSN_TO_DESC(AT_S1E1W, handle_s1e01, forward_at_traps),
+	SYS_INSN_TO_DESC(AT_S1E0R, handle_s1e01, forward_at_traps),
+	SYS_INSN_TO_DESC(AT_S1E0W, handle_s1e01, forward_at_traps),
+	SYS_INSN_TO_DESC(AT_S1E1RP, handle_s1e01, forward_at_traps),
+	SYS_INSN_TO_DESC(AT_S1E1WP, handle_s1e01, forward_at_traps),
+	SYS_INSN_TO_DESC(AT_S1E2R, handle_s1e2, forward_nv_traps),
+	SYS_INSN_TO_DESC(AT_S1E2W, handle_s1e2, forward_nv_traps),
+	SYS_INSN_TO_DESC(AT_S12E1R, handle_s12r, forward_nv_traps),
+	SYS_INSN_TO_DESC(AT_S12E1W, handle_s12w, forward_nv_traps),
+	SYS_INSN_TO_DESC(AT_S12E0R, handle_s12r, forward_nv_traps),
+	SYS_INSN_TO_DESC(AT_S12E0W, handle_s12w, forward_nv_traps),
+	SYS_INSN_TO_DESC(TLBI_IPAS2E1IS, handle_ipas2e1is, forward_nv_traps),
+	SYS_INSN_TO_DESC(TLBI_IPAS2LE1IS, handle_ipas2e1is, forward_nv_traps),
+	SYS_INSN_TO_DESC(TLBI_ALLE2IS, handle_alle2is, forward_nv_traps),
+	SYS_INSN_TO_DESC(TLBI_VAE2IS, handle_vae2, forward_nv_traps),
+	SYS_INSN_TO_DESC(TLBI_ALLE1IS, handle_alle1is, forward_nv_traps),
+	SYS_INSN_TO_DESC(TLBI_VALE2IS, handle_vae2, forward_nv_traps),
+	SYS_INSN_TO_DESC(TLBI_VMALLS12E1IS, handle_vmalls12e1is, forward_nv_traps),
+	SYS_INSN_TO_DESC(TLBI_IPAS2E1, handle_ipas2e1is, forward_nv_traps),
+	SYS_INSN_TO_DESC(TLBI_IPAS2LE1, handle_ipas2e1is, forward_nv_traps),
+	SYS_INSN_TO_DESC(TLBI_ALLE2, handle_alle2, forward_nv_traps),
+	SYS_INSN_TO_DESC(TLBI_VAE2, handle_vae2, forward_nv_traps),
+	SYS_INSN_TO_DESC(TLBI_ALLE1, handle_alle1is, forward_nv_traps),
+	SYS_INSN_TO_DESC(TLBI_VALE2, handle_vae2, forward_nv_traps),
+	SYS_INSN_TO_DESC(TLBI_VMALLS12E1, handle_vmalls12e1is, forward_nv_traps),
 };
 
 #define reg_to_match_value(x)						\
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 29/31] KVM: arm64: Respect the virtual HCR_EL2.AT and NV setting
@ 2017-10-03  3:11   ` Jintack Lim
  0 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:11 UTC (permalink / raw)
  To: linux-arm-kernel

Forward system instruction traps to the virtual EL2 if a corresponding
bit in the virtual HCR_EL2 is set.

Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---

Notes:
    v1-->v2:
    This is a new commit.  We can rework existing forward_nv_traps() and
    forward_nv1_traps() defined in rfc-v2 cpu patches to reuse forward_traps()
    function

 arch/arm64/include/asm/kvm_arm.h |  1 +
 arch/arm64/kvm/sys_regs.c        | 69 +++++++++++++++++++++++++---------------
 2 files changed, 44 insertions(+), 26 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index e160895..925edfd 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -23,6 +23,7 @@
 #include <asm/types.h>
 
 /* Hyp Configuration Register (HCR) bits */
+#define HCR_AT		(UL(1) << 44)
 #define HCR_NV1		(UL(1) << 43)
 #define HCR_NV		(UL(1) << 42)
 #define HCR_E2H		(UL(1) << 34)
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index eb91f00..89e73af 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -966,6 +966,23 @@ static bool access_cntp_cval(struct kvm_vcpu *vcpu,
 	return true;
 }
 
+static bool forward_traps(struct kvm_vcpu *vcpu, u64 control_bit)
+{
+	bool control_bit_set;
+
+	control_bit_set = vcpu_sys_reg(vcpu, HCR_EL2) & control_bit;
+	if (!vcpu_mode_el2(vcpu) && control_bit_set) {
+		kvm_inject_nested_sync(vcpu, kvm_vcpu_get_hsr(vcpu));
+		return true;
+	}
+	return false;
+}
+
+static bool forward_at_traps(struct kvm_vcpu *vcpu)
+{
+	return forward_traps(vcpu, HCR_AT);
+}
+
 /* This function is to support the recursive nested virtualization */
 bool forward_nv_traps(struct kvm_vcpu *vcpu)
 {
@@ -1948,32 +1965,32 @@ static bool handle_ipas2e1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 #define SYS_INSN_TO_DESC(insn, access_fn, forward_fn)	\
 	{ SYS_DESC((insn)), (access_fn), NULL, 0, 0, NULL, NULL, (forward_fn) }
 static struct sys_reg_desc sys_insn_descs[] = {
-	SYS_INSN_TO_DESC(AT_S1E1R, handle_s1e01, NULL),
-	SYS_INSN_TO_DESC(AT_S1E1W, handle_s1e01, NULL),
-	SYS_INSN_TO_DESC(AT_S1E0R, handle_s1e01, NULL),
-	SYS_INSN_TO_DESC(AT_S1E0W, handle_s1e01, NULL),
-	SYS_INSN_TO_DESC(AT_S1E1RP, handle_s1e01, NULL),
-	SYS_INSN_TO_DESC(AT_S1E1WP, handle_s1e01, NULL),
-	SYS_INSN_TO_DESC(AT_S1E2R, handle_s1e2, NULL),
-	SYS_INSN_TO_DESC(AT_S1E2W, handle_s1e2, NULL),
-	SYS_INSN_TO_DESC(AT_S12E1R, handle_s12r, NULL),
-	SYS_INSN_TO_DESC(AT_S12E1W, handle_s12w, NULL),
-	SYS_INSN_TO_DESC(AT_S12E0R, handle_s12r, NULL),
-	SYS_INSN_TO_DESC(AT_S12E0W, handle_s12w, NULL),
-	SYS_INSN_TO_DESC(TLBI_IPAS2E1IS, handle_ipas2e1is, NULL),
-	SYS_INSN_TO_DESC(TLBI_IPAS2LE1IS, handle_ipas2e1is, NULL),
-	SYS_INSN_TO_DESC(TLBI_ALLE2IS, handle_alle2is, NULL),
-	SYS_INSN_TO_DESC(TLBI_VAE2IS, handle_vae2, NULL),
-	SYS_INSN_TO_DESC(TLBI_ALLE1IS, handle_alle1is, NULL),
-	SYS_INSN_TO_DESC(TLBI_VALE2IS, handle_vae2, NULL),
-	SYS_INSN_TO_DESC(TLBI_VMALLS12E1IS, handle_vmalls12e1is, NULL),
-	SYS_INSN_TO_DESC(TLBI_IPAS2E1, handle_ipas2e1is, NULL),
-	SYS_INSN_TO_DESC(TLBI_IPAS2LE1, handle_ipas2e1is, NULL),
-	SYS_INSN_TO_DESC(TLBI_ALLE2, handle_alle2, NULL),
-	SYS_INSN_TO_DESC(TLBI_VAE2, handle_vae2, NULL),
-	SYS_INSN_TO_DESC(TLBI_ALLE1, handle_alle1is, NULL),
-	SYS_INSN_TO_DESC(TLBI_VALE2, handle_vae2, NULL),
-	SYS_INSN_TO_DESC(TLBI_VMALLS12E1, handle_vmalls12e1is, NULL),
+	SYS_INSN_TO_DESC(AT_S1E1R, handle_s1e01, forward_at_traps),
+	SYS_INSN_TO_DESC(AT_S1E1W, handle_s1e01, forward_at_traps),
+	SYS_INSN_TO_DESC(AT_S1E0R, handle_s1e01, forward_at_traps),
+	SYS_INSN_TO_DESC(AT_S1E0W, handle_s1e01, forward_at_traps),
+	SYS_INSN_TO_DESC(AT_S1E1RP, handle_s1e01, forward_at_traps),
+	SYS_INSN_TO_DESC(AT_S1E1WP, handle_s1e01, forward_at_traps),
+	SYS_INSN_TO_DESC(AT_S1E2R, handle_s1e2, forward_nv_traps),
+	SYS_INSN_TO_DESC(AT_S1E2W, handle_s1e2, forward_nv_traps),
+	SYS_INSN_TO_DESC(AT_S12E1R, handle_s12r, forward_nv_traps),
+	SYS_INSN_TO_DESC(AT_S12E1W, handle_s12w, forward_nv_traps),
+	SYS_INSN_TO_DESC(AT_S12E0R, handle_s12r, forward_nv_traps),
+	SYS_INSN_TO_DESC(AT_S12E0W, handle_s12w, forward_nv_traps),
+	SYS_INSN_TO_DESC(TLBI_IPAS2E1IS, handle_ipas2e1is, forward_nv_traps),
+	SYS_INSN_TO_DESC(TLBI_IPAS2LE1IS, handle_ipas2e1is, forward_nv_traps),
+	SYS_INSN_TO_DESC(TLBI_ALLE2IS, handle_alle2is, forward_nv_traps),
+	SYS_INSN_TO_DESC(TLBI_VAE2IS, handle_vae2, forward_nv_traps),
+	SYS_INSN_TO_DESC(TLBI_ALLE1IS, handle_alle1is, forward_nv_traps),
+	SYS_INSN_TO_DESC(TLBI_VALE2IS, handle_vae2, forward_nv_traps),
+	SYS_INSN_TO_DESC(TLBI_VMALLS12E1IS, handle_vmalls12e1is, forward_nv_traps),
+	SYS_INSN_TO_DESC(TLBI_IPAS2E1, handle_ipas2e1is, forward_nv_traps),
+	SYS_INSN_TO_DESC(TLBI_IPAS2LE1, handle_ipas2e1is, forward_nv_traps),
+	SYS_INSN_TO_DESC(TLBI_ALLE2, handle_alle2, forward_nv_traps),
+	SYS_INSN_TO_DESC(TLBI_VAE2, handle_vae2, forward_nv_traps),
+	SYS_INSN_TO_DESC(TLBI_ALLE1, handle_alle1is, forward_nv_traps),
+	SYS_INSN_TO_DESC(TLBI_VALE2, handle_vae2, forward_nv_traps),
+	SYS_INSN_TO_DESC(TLBI_VMALLS12E1, handle_vmalls12e1is, forward_nv_traps),
 };
 
 #define reg_to_match_value(x)						\
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 30/31] KVM: arm64: Emulate TLBI instructions accesible from EL1
  2017-10-03  3:10 ` Jintack Lim
  (?)
@ 2017-10-03  3:11   ` Jintack Lim
  -1 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:11 UTC (permalink / raw)
  To: christoffer.dall, marc.zyngier, kvmarm
  Cc: jintack, pbonzini, rkrcmar, catalin.marinas, will.deacon, linux,
	mark.rutland, linux-arm-kernel, kvm, linux-kernel, Jintack Lim

Even though a guest hypervisor can execute TLBI instructions that are
accesible at EL1 without trap, it's wrong; All those TLBI instructions
work based on current VMID, and when running a guest hypervisor current
VMID is the one for itself, not the one from the virtual vttbr_el2. So
letting a guest hypervisor execute those TLBI instructions results in
invalidating its own TLB entries and leaving invalid TLB entries
unhandled.

Therefore we trap and emulate those TLBI instructions. The emulation is
simple; we find a shadow VMID mapped to the virtual vttbr_el2, set it in
the physical vttbr_el2, then execute the same instruction in EL2.

We don't set HCR_EL2.TTLB bit yet.

Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---
 arch/arm64/include/asm/kvm_asm.h |  1 +
 arch/arm64/include/asm/kvm_mmu.h |  1 +
 arch/arm64/include/asm/sysreg.h  | 15 ++++++++++++
 arch/arm64/kvm/hyp/tlb.c         | 52 ++++++++++++++++++++++++++++++++++++++++
 arch/arm64/kvm/mmu-nested.c      |  3 +--
 arch/arm64/kvm/sys_regs.c        | 50 ++++++++++++++++++++++++++++++++++++++
 6 files changed, 120 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index cd7fb85..ce331d7 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -56,6 +56,7 @@
 extern void __kvm_tlb_flush_vmid(u64 vttbr);
 extern void __kvm_tlb_flush_local_vmid(u64 vttbr);
 extern void __kvm_tlb_vae2(u64 vttbr, u64 va, u64 sys_encoding);
+extern void __kvm_tlb_el1_instr(u64 vttbr, u64 val, u64 sys_encoding);
 
 extern void __kvm_at_insn(struct kvm_vcpu *vcpu, unsigned long vaddr,
 			  bool el2_regime, int sys_encoding);
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 6681be1..601f431 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -347,6 +347,7 @@ int kvm_s2_handle_perm_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 int kvm_inject_s2_fault(struct kvm_vcpu *vcpu, u64 esr_el2);
 bool kvm_nested_s2_clear_curr_vmid(struct kvm_vcpu *vcpu, phys_addr_t start,
 				   u64 size);
+struct kvm_nested_s2_mmu *lookup_nested_mmu(struct kvm_vcpu *vcpu, u64 vttbr);
 
 static inline u64 kvm_get_vttbr(struct kvm_s2_vmid *vmid,
 				struct kvm_s2_mmu *mmu)
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 53df733..fd6b98a 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -386,10 +386,25 @@
 
 /* TLBI instructions */
 #define TLBI_Op0	1
+#define TLBI_Op1_EL1	0	/* Accessible from EL1 or higher */
 #define TLBI_Op1_EL2	4	/* Accessible from EL2 or higher */
 #define TLBI_CRn	8
+#define tlbi_insn_el1(CRm, Op2)	sys_insn(TLBI_Op0, TLBI_Op1_EL1, TLBI_CRn, (CRm), (Op2))
 #define tlbi_insn_el2(CRm, Op2)	sys_insn(TLBI_Op0, TLBI_Op1_EL2, TLBI_CRn, (CRm), (Op2))
 
+#define TLBI_VMALLE1IS	tlbi_insn_el1(3, 0)
+#define TLBI_VAE1IS	tlbi_insn_el1(3, 1)
+#define TLBI_ASIDE1IS	tlbi_insn_el1(3, 2)
+#define TLBI_VAAE1IS	tlbi_insn_el1(3, 3)
+#define TLBI_VALE1IS	tlbi_insn_el1(3, 5)
+#define TLBI_VAALE1IS	tlbi_insn_el1(3, 7)
+#define TLBI_VMALLE1	tlbi_insn_el1(7, 0)
+#define TLBI_VAE1	tlbi_insn_el1(7, 1)
+#define TLBI_ASIDE1	tlbi_insn_el1(7, 2)
+#define TLBI_VAAE1	tlbi_insn_el1(7, 3)
+#define TLBI_VALE1	tlbi_insn_el1(7, 5)
+#define TLBI_VAALE1	tlbi_insn_el1(7, 7)
+
 #define TLBI_IPAS2E1IS	tlbi_insn_el2(0, 1)
 #define TLBI_IPAS2LE1IS	tlbi_insn_el2(0, 5)
 #define TLBI_ALLE2IS	tlbi_insn_el2(3, 0)
diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c
index bd8b92c..096c234 100644
--- a/arch/arm64/kvm/hyp/tlb.c
+++ b/arch/arm64/kvm/hyp/tlb.c
@@ -179,3 +179,55 @@ void __hyp_text __kvm_tlb_vae2(u64 vttbr, u64 va, u64 sys_encoding)
 
 	__tlb_switch_to_host()();
 }
+
+void __hyp_text __kvm_tlb_el1_instr(u64 vttbr, u64 val, u64 sys_encoding)
+{
+	/* Switch to requested VMID */
+	__tlb_switch_to_guest()(vttbr);
+
+	/* Execute the same instruction as the guest hypervisor did */
+	switch (sys_encoding) {
+	case TLBI_VMALLE1IS:
+		__tlbi(vmalle1is);
+		break;
+	case TLBI_VAE1IS:
+		__tlbi(vae1is, val);
+		break;
+	case TLBI_ASIDE1IS:
+		__tlbi(aside1is, val);
+		break;
+	case TLBI_VAAE1IS:
+		__tlbi(vaae1is, val);
+		break;
+	case TLBI_VALE1IS:
+		__tlbi(vale1is, val);
+		break;
+	case TLBI_VAALE1IS:
+		__tlbi(vaale1is, val);
+		break;
+	case TLBI_VMALLE1:
+		__tlbi(vmalle1);
+		break;
+	case TLBI_VAE1:
+		__tlbi(vae1, val);
+		break;
+	case TLBI_ASIDE1:
+		__tlbi(aside1, val);
+		break;
+	case TLBI_VAAE1:
+		__tlbi(vaae1, val);
+		break;
+	case TLBI_VALE1:
+		__tlbi(vale1, val);
+		break;
+	case TLBI_VAALE1:
+		__tlbi(vaale1, val);
+		break;
+	default:
+		break;
+	}
+	dsb(nsh);
+	isb();
+
+	__tlb_switch_to_host()();
+}
diff --git a/arch/arm64/kvm/mmu-nested.c b/arch/arm64/kvm/mmu-nested.c
index 2189f2b..8826eaa 100644
--- a/arch/arm64/kvm/mmu-nested.c
+++ b/arch/arm64/kvm/mmu-nested.c
@@ -332,8 +332,7 @@ void kvm_nested_s2_free(struct kvm *kvm)
 		__kvm_free_stage2_pgd(kvm, &nested_mmu->mmu);
 }
 
-static struct kvm_nested_s2_mmu *lookup_nested_mmu(struct kvm_vcpu *vcpu,
-						   u64 vttbr)
+struct kvm_nested_s2_mmu *lookup_nested_mmu(struct kvm_vcpu *vcpu, u64 vttbr)
 {
 	struct kvm_nested_s2_mmu *mmu;
 	u64 virtual_vmid;
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 89e73af..1dcbe70 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -983,6 +983,11 @@ static bool forward_at_traps(struct kvm_vcpu *vcpu)
 	return forward_traps(vcpu, HCR_AT);
 }
 
+static bool forward_ttlb_traps(struct kvm_vcpu *vcpu)
+{
+	return forward_traps(vcpu, HCR_TTLB);
+}
+
 /* This function is to support the recursive nested virtualization */
 bool forward_nv_traps(struct kvm_vcpu *vcpu)
 {
@@ -1896,6 +1901,37 @@ static bool handle_ipas2e1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 	return true;
 }
 
+static bool handle_tlbi_el1(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+			    const struct sys_reg_desc *r)
+{
+	u64 virtual_vttbr = vcpu_sys_reg(vcpu, VTTBR_EL2);
+	u64 vttbr;
+	struct kvm_nested_s2_mmu *nested_mmu;
+	struct kvm_s2_mmu *mmu = &vcpu->kvm->arch.mmu;
+	int sys_encoding = sys_insn(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2);
+
+	nested_mmu = lookup_nested_mmu(vcpu, virtual_vttbr);
+	if (!nested_mmu) {
+		/*
+		 * If we can't find a shadow VMID, it is either the virtual
+		 * VMID is for the host OS or the nested VM having the virtual
+		 * VMID is never executed. (Note that we create a showdow VMID
+		 * when entering a VM.) For the former, we can flush TLB
+		 * entries belonging to the host OS in a VM. For the latter, we
+		 * don't have to do anything. Since we can't differentiate
+		 * between those cases, just do what we can do for the former.
+		 */
+		mmu = &vcpu->kvm->arch.mmu;
+	} else {
+		mmu = &nested_mmu->mmu;
+	}
+
+	vttbr = kvm_get_vttbr(&mmu->vmid, mmu);
+	kvm_call_hyp(__kvm_tlb_el1_instr, vttbr, p->regval, sys_encoding);
+
+	return true;
+}
+
 /*
  * AT instruction emulation
  *
@@ -1971,6 +2007,20 @@ static bool handle_ipas2e1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 	SYS_INSN_TO_DESC(AT_S1E0W, handle_s1e01, forward_at_traps),
 	SYS_INSN_TO_DESC(AT_S1E1RP, handle_s1e01, forward_at_traps),
 	SYS_INSN_TO_DESC(AT_S1E1WP, handle_s1e01, forward_at_traps),
+
+	SYS_INSN_TO_DESC(TLBI_VMALLE1IS, handle_tlbi_el1, forward_ttlb_traps),
+	SYS_INSN_TO_DESC(TLBI_VAE1IS, handle_tlbi_el1, forward_ttlb_traps),
+	SYS_INSN_TO_DESC(TLBI_ASIDE1IS, handle_tlbi_el1, forward_ttlb_traps),
+	SYS_INSN_TO_DESC(TLBI_VAAE1IS, handle_tlbi_el1, forward_ttlb_traps),
+	SYS_INSN_TO_DESC(TLBI_VALE1IS, handle_tlbi_el1, forward_ttlb_traps),
+	SYS_INSN_TO_DESC(TLBI_VAALE1IS, handle_tlbi_el1, forward_ttlb_traps),
+	SYS_INSN_TO_DESC(TLBI_VMALLE1, handle_tlbi_el1, forward_ttlb_traps),
+	SYS_INSN_TO_DESC(TLBI_VAE1, handle_tlbi_el1, forward_ttlb_traps),
+	SYS_INSN_TO_DESC(TLBI_ASIDE1, handle_tlbi_el1, forward_ttlb_traps),
+	SYS_INSN_TO_DESC(TLBI_VAAE1, handle_tlbi_el1, forward_ttlb_traps),
+	SYS_INSN_TO_DESC(TLBI_VALE1, handle_tlbi_el1, forward_ttlb_traps),
+	SYS_INSN_TO_DESC(TLBI_VAALE1, handle_tlbi_el1, forward_ttlb_traps),
+
 	SYS_INSN_TO_DESC(AT_S1E2R, handle_s1e2, forward_nv_traps),
 	SYS_INSN_TO_DESC(AT_S1E2W, handle_s1e2, forward_nv_traps),
 	SYS_INSN_TO_DESC(AT_S12E1R, handle_s12r, forward_nv_traps),
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 30/31] KVM: arm64: Emulate TLBI instructions accesible from EL1
@ 2017-10-03  3:11   ` Jintack Lim
  0 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:11 UTC (permalink / raw)
  To: christoffer.dall, marc.zyngier, kvmarm
  Cc: kvm, catalin.marinas, will.deacon, linux, linux-kernel, pbonzini,
	linux-arm-kernel

Even though a guest hypervisor can execute TLBI instructions that are
accesible at EL1 without trap, it's wrong; All those TLBI instructions
work based on current VMID, and when running a guest hypervisor current
VMID is the one for itself, not the one from the virtual vttbr_el2. So
letting a guest hypervisor execute those TLBI instructions results in
invalidating its own TLB entries and leaving invalid TLB entries
unhandled.

Therefore we trap and emulate those TLBI instructions. The emulation is
simple; we find a shadow VMID mapped to the virtual vttbr_el2, set it in
the physical vttbr_el2, then execute the same instruction in EL2.

We don't set HCR_EL2.TTLB bit yet.

Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---
 arch/arm64/include/asm/kvm_asm.h |  1 +
 arch/arm64/include/asm/kvm_mmu.h |  1 +
 arch/arm64/include/asm/sysreg.h  | 15 ++++++++++++
 arch/arm64/kvm/hyp/tlb.c         | 52 ++++++++++++++++++++++++++++++++++++++++
 arch/arm64/kvm/mmu-nested.c      |  3 +--
 arch/arm64/kvm/sys_regs.c        | 50 ++++++++++++++++++++++++++++++++++++++
 6 files changed, 120 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index cd7fb85..ce331d7 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -56,6 +56,7 @@
 extern void __kvm_tlb_flush_vmid(u64 vttbr);
 extern void __kvm_tlb_flush_local_vmid(u64 vttbr);
 extern void __kvm_tlb_vae2(u64 vttbr, u64 va, u64 sys_encoding);
+extern void __kvm_tlb_el1_instr(u64 vttbr, u64 val, u64 sys_encoding);
 
 extern void __kvm_at_insn(struct kvm_vcpu *vcpu, unsigned long vaddr,
 			  bool el2_regime, int sys_encoding);
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 6681be1..601f431 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -347,6 +347,7 @@ int kvm_s2_handle_perm_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 int kvm_inject_s2_fault(struct kvm_vcpu *vcpu, u64 esr_el2);
 bool kvm_nested_s2_clear_curr_vmid(struct kvm_vcpu *vcpu, phys_addr_t start,
 				   u64 size);
+struct kvm_nested_s2_mmu *lookup_nested_mmu(struct kvm_vcpu *vcpu, u64 vttbr);
 
 static inline u64 kvm_get_vttbr(struct kvm_s2_vmid *vmid,
 				struct kvm_s2_mmu *mmu)
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 53df733..fd6b98a 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -386,10 +386,25 @@
 
 /* TLBI instructions */
 #define TLBI_Op0	1
+#define TLBI_Op1_EL1	0	/* Accessible from EL1 or higher */
 #define TLBI_Op1_EL2	4	/* Accessible from EL2 or higher */
 #define TLBI_CRn	8
+#define tlbi_insn_el1(CRm, Op2)	sys_insn(TLBI_Op0, TLBI_Op1_EL1, TLBI_CRn, (CRm), (Op2))
 #define tlbi_insn_el2(CRm, Op2)	sys_insn(TLBI_Op0, TLBI_Op1_EL2, TLBI_CRn, (CRm), (Op2))
 
+#define TLBI_VMALLE1IS	tlbi_insn_el1(3, 0)
+#define TLBI_VAE1IS	tlbi_insn_el1(3, 1)
+#define TLBI_ASIDE1IS	tlbi_insn_el1(3, 2)
+#define TLBI_VAAE1IS	tlbi_insn_el1(3, 3)
+#define TLBI_VALE1IS	tlbi_insn_el1(3, 5)
+#define TLBI_VAALE1IS	tlbi_insn_el1(3, 7)
+#define TLBI_VMALLE1	tlbi_insn_el1(7, 0)
+#define TLBI_VAE1	tlbi_insn_el1(7, 1)
+#define TLBI_ASIDE1	tlbi_insn_el1(7, 2)
+#define TLBI_VAAE1	tlbi_insn_el1(7, 3)
+#define TLBI_VALE1	tlbi_insn_el1(7, 5)
+#define TLBI_VAALE1	tlbi_insn_el1(7, 7)
+
 #define TLBI_IPAS2E1IS	tlbi_insn_el2(0, 1)
 #define TLBI_IPAS2LE1IS	tlbi_insn_el2(0, 5)
 #define TLBI_ALLE2IS	tlbi_insn_el2(3, 0)
diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c
index bd8b92c..096c234 100644
--- a/arch/arm64/kvm/hyp/tlb.c
+++ b/arch/arm64/kvm/hyp/tlb.c
@@ -179,3 +179,55 @@ void __hyp_text __kvm_tlb_vae2(u64 vttbr, u64 va, u64 sys_encoding)
 
 	__tlb_switch_to_host()();
 }
+
+void __hyp_text __kvm_tlb_el1_instr(u64 vttbr, u64 val, u64 sys_encoding)
+{
+	/* Switch to requested VMID */
+	__tlb_switch_to_guest()(vttbr);
+
+	/* Execute the same instruction as the guest hypervisor did */
+	switch (sys_encoding) {
+	case TLBI_VMALLE1IS:
+		__tlbi(vmalle1is);
+		break;
+	case TLBI_VAE1IS:
+		__tlbi(vae1is, val);
+		break;
+	case TLBI_ASIDE1IS:
+		__tlbi(aside1is, val);
+		break;
+	case TLBI_VAAE1IS:
+		__tlbi(vaae1is, val);
+		break;
+	case TLBI_VALE1IS:
+		__tlbi(vale1is, val);
+		break;
+	case TLBI_VAALE1IS:
+		__tlbi(vaale1is, val);
+		break;
+	case TLBI_VMALLE1:
+		__tlbi(vmalle1);
+		break;
+	case TLBI_VAE1:
+		__tlbi(vae1, val);
+		break;
+	case TLBI_ASIDE1:
+		__tlbi(aside1, val);
+		break;
+	case TLBI_VAAE1:
+		__tlbi(vaae1, val);
+		break;
+	case TLBI_VALE1:
+		__tlbi(vale1, val);
+		break;
+	case TLBI_VAALE1:
+		__tlbi(vaale1, val);
+		break;
+	default:
+		break;
+	}
+	dsb(nsh);
+	isb();
+
+	__tlb_switch_to_host()();
+}
diff --git a/arch/arm64/kvm/mmu-nested.c b/arch/arm64/kvm/mmu-nested.c
index 2189f2b..8826eaa 100644
--- a/arch/arm64/kvm/mmu-nested.c
+++ b/arch/arm64/kvm/mmu-nested.c
@@ -332,8 +332,7 @@ void kvm_nested_s2_free(struct kvm *kvm)
 		__kvm_free_stage2_pgd(kvm, &nested_mmu->mmu);
 }
 
-static struct kvm_nested_s2_mmu *lookup_nested_mmu(struct kvm_vcpu *vcpu,
-						   u64 vttbr)
+struct kvm_nested_s2_mmu *lookup_nested_mmu(struct kvm_vcpu *vcpu, u64 vttbr)
 {
 	struct kvm_nested_s2_mmu *mmu;
 	u64 virtual_vmid;
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 89e73af..1dcbe70 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -983,6 +983,11 @@ static bool forward_at_traps(struct kvm_vcpu *vcpu)
 	return forward_traps(vcpu, HCR_AT);
 }
 
+static bool forward_ttlb_traps(struct kvm_vcpu *vcpu)
+{
+	return forward_traps(vcpu, HCR_TTLB);
+}
+
 /* This function is to support the recursive nested virtualization */
 bool forward_nv_traps(struct kvm_vcpu *vcpu)
 {
@@ -1896,6 +1901,37 @@ static bool handle_ipas2e1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 	return true;
 }
 
+static bool handle_tlbi_el1(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+			    const struct sys_reg_desc *r)
+{
+	u64 virtual_vttbr = vcpu_sys_reg(vcpu, VTTBR_EL2);
+	u64 vttbr;
+	struct kvm_nested_s2_mmu *nested_mmu;
+	struct kvm_s2_mmu *mmu = &vcpu->kvm->arch.mmu;
+	int sys_encoding = sys_insn(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2);
+
+	nested_mmu = lookup_nested_mmu(vcpu, virtual_vttbr);
+	if (!nested_mmu) {
+		/*
+		 * If we can't find a shadow VMID, it is either the virtual
+		 * VMID is for the host OS or the nested VM having the virtual
+		 * VMID is never executed. (Note that we create a showdow VMID
+		 * when entering a VM.) For the former, we can flush TLB
+		 * entries belonging to the host OS in a VM. For the latter, we
+		 * don't have to do anything. Since we can't differentiate
+		 * between those cases, just do what we can do for the former.
+		 */
+		mmu = &vcpu->kvm->arch.mmu;
+	} else {
+		mmu = &nested_mmu->mmu;
+	}
+
+	vttbr = kvm_get_vttbr(&mmu->vmid, mmu);
+	kvm_call_hyp(__kvm_tlb_el1_instr, vttbr, p->regval, sys_encoding);
+
+	return true;
+}
+
 /*
  * AT instruction emulation
  *
@@ -1971,6 +2007,20 @@ static bool handle_ipas2e1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 	SYS_INSN_TO_DESC(AT_S1E0W, handle_s1e01, forward_at_traps),
 	SYS_INSN_TO_DESC(AT_S1E1RP, handle_s1e01, forward_at_traps),
 	SYS_INSN_TO_DESC(AT_S1E1WP, handle_s1e01, forward_at_traps),
+
+	SYS_INSN_TO_DESC(TLBI_VMALLE1IS, handle_tlbi_el1, forward_ttlb_traps),
+	SYS_INSN_TO_DESC(TLBI_VAE1IS, handle_tlbi_el1, forward_ttlb_traps),
+	SYS_INSN_TO_DESC(TLBI_ASIDE1IS, handle_tlbi_el1, forward_ttlb_traps),
+	SYS_INSN_TO_DESC(TLBI_VAAE1IS, handle_tlbi_el1, forward_ttlb_traps),
+	SYS_INSN_TO_DESC(TLBI_VALE1IS, handle_tlbi_el1, forward_ttlb_traps),
+	SYS_INSN_TO_DESC(TLBI_VAALE1IS, handle_tlbi_el1, forward_ttlb_traps),
+	SYS_INSN_TO_DESC(TLBI_VMALLE1, handle_tlbi_el1, forward_ttlb_traps),
+	SYS_INSN_TO_DESC(TLBI_VAE1, handle_tlbi_el1, forward_ttlb_traps),
+	SYS_INSN_TO_DESC(TLBI_ASIDE1, handle_tlbi_el1, forward_ttlb_traps),
+	SYS_INSN_TO_DESC(TLBI_VAAE1, handle_tlbi_el1, forward_ttlb_traps),
+	SYS_INSN_TO_DESC(TLBI_VALE1, handle_tlbi_el1, forward_ttlb_traps),
+	SYS_INSN_TO_DESC(TLBI_VAALE1, handle_tlbi_el1, forward_ttlb_traps),
+
 	SYS_INSN_TO_DESC(AT_S1E2R, handle_s1e2, forward_nv_traps),
 	SYS_INSN_TO_DESC(AT_S1E2W, handle_s1e2, forward_nv_traps),
 	SYS_INSN_TO_DESC(AT_S12E1R, handle_s12r, forward_nv_traps),
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 30/31] KVM: arm64: Emulate TLBI instructions accesible from EL1
@ 2017-10-03  3:11   ` Jintack Lim
  0 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:11 UTC (permalink / raw)
  To: linux-arm-kernel

Even though a guest hypervisor can execute TLBI instructions that are
accesible at EL1 without trap, it's wrong; All those TLBI instructions
work based on current VMID, and when running a guest hypervisor current
VMID is the one for itself, not the one from the virtual vttbr_el2. So
letting a guest hypervisor execute those TLBI instructions results in
invalidating its own TLB entries and leaving invalid TLB entries
unhandled.

Therefore we trap and emulate those TLBI instructions. The emulation is
simple; we find a shadow VMID mapped to the virtual vttbr_el2, set it in
the physical vttbr_el2, then execute the same instruction in EL2.

We don't set HCR_EL2.TTLB bit yet.

Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---
 arch/arm64/include/asm/kvm_asm.h |  1 +
 arch/arm64/include/asm/kvm_mmu.h |  1 +
 arch/arm64/include/asm/sysreg.h  | 15 ++++++++++++
 arch/arm64/kvm/hyp/tlb.c         | 52 ++++++++++++++++++++++++++++++++++++++++
 arch/arm64/kvm/mmu-nested.c      |  3 +--
 arch/arm64/kvm/sys_regs.c        | 50 ++++++++++++++++++++++++++++++++++++++
 6 files changed, 120 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index cd7fb85..ce331d7 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -56,6 +56,7 @@
 extern void __kvm_tlb_flush_vmid(u64 vttbr);
 extern void __kvm_tlb_flush_local_vmid(u64 vttbr);
 extern void __kvm_tlb_vae2(u64 vttbr, u64 va, u64 sys_encoding);
+extern void __kvm_tlb_el1_instr(u64 vttbr, u64 val, u64 sys_encoding);
 
 extern void __kvm_at_insn(struct kvm_vcpu *vcpu, unsigned long vaddr,
 			  bool el2_regime, int sys_encoding);
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 6681be1..601f431 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -347,6 +347,7 @@ int kvm_s2_handle_perm_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 int kvm_inject_s2_fault(struct kvm_vcpu *vcpu, u64 esr_el2);
 bool kvm_nested_s2_clear_curr_vmid(struct kvm_vcpu *vcpu, phys_addr_t start,
 				   u64 size);
+struct kvm_nested_s2_mmu *lookup_nested_mmu(struct kvm_vcpu *vcpu, u64 vttbr);
 
 static inline u64 kvm_get_vttbr(struct kvm_s2_vmid *vmid,
 				struct kvm_s2_mmu *mmu)
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 53df733..fd6b98a 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -386,10 +386,25 @@
 
 /* TLBI instructions */
 #define TLBI_Op0	1
+#define TLBI_Op1_EL1	0	/* Accessible from EL1 or higher */
 #define TLBI_Op1_EL2	4	/* Accessible from EL2 or higher */
 #define TLBI_CRn	8
+#define tlbi_insn_el1(CRm, Op2)	sys_insn(TLBI_Op0, TLBI_Op1_EL1, TLBI_CRn, (CRm), (Op2))
 #define tlbi_insn_el2(CRm, Op2)	sys_insn(TLBI_Op0, TLBI_Op1_EL2, TLBI_CRn, (CRm), (Op2))
 
+#define TLBI_VMALLE1IS	tlbi_insn_el1(3, 0)
+#define TLBI_VAE1IS	tlbi_insn_el1(3, 1)
+#define TLBI_ASIDE1IS	tlbi_insn_el1(3, 2)
+#define TLBI_VAAE1IS	tlbi_insn_el1(3, 3)
+#define TLBI_VALE1IS	tlbi_insn_el1(3, 5)
+#define TLBI_VAALE1IS	tlbi_insn_el1(3, 7)
+#define TLBI_VMALLE1	tlbi_insn_el1(7, 0)
+#define TLBI_VAE1	tlbi_insn_el1(7, 1)
+#define TLBI_ASIDE1	tlbi_insn_el1(7, 2)
+#define TLBI_VAAE1	tlbi_insn_el1(7, 3)
+#define TLBI_VALE1	tlbi_insn_el1(7, 5)
+#define TLBI_VAALE1	tlbi_insn_el1(7, 7)
+
 #define TLBI_IPAS2E1IS	tlbi_insn_el2(0, 1)
 #define TLBI_IPAS2LE1IS	tlbi_insn_el2(0, 5)
 #define TLBI_ALLE2IS	tlbi_insn_el2(3, 0)
diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c
index bd8b92c..096c234 100644
--- a/arch/arm64/kvm/hyp/tlb.c
+++ b/arch/arm64/kvm/hyp/tlb.c
@@ -179,3 +179,55 @@ void __hyp_text __kvm_tlb_vae2(u64 vttbr, u64 va, u64 sys_encoding)
 
 	__tlb_switch_to_host()();
 }
+
+void __hyp_text __kvm_tlb_el1_instr(u64 vttbr, u64 val, u64 sys_encoding)
+{
+	/* Switch to requested VMID */
+	__tlb_switch_to_guest()(vttbr);
+
+	/* Execute the same instruction as the guest hypervisor did */
+	switch (sys_encoding) {
+	case TLBI_VMALLE1IS:
+		__tlbi(vmalle1is);
+		break;
+	case TLBI_VAE1IS:
+		__tlbi(vae1is, val);
+		break;
+	case TLBI_ASIDE1IS:
+		__tlbi(aside1is, val);
+		break;
+	case TLBI_VAAE1IS:
+		__tlbi(vaae1is, val);
+		break;
+	case TLBI_VALE1IS:
+		__tlbi(vale1is, val);
+		break;
+	case TLBI_VAALE1IS:
+		__tlbi(vaale1is, val);
+		break;
+	case TLBI_VMALLE1:
+		__tlbi(vmalle1);
+		break;
+	case TLBI_VAE1:
+		__tlbi(vae1, val);
+		break;
+	case TLBI_ASIDE1:
+		__tlbi(aside1, val);
+		break;
+	case TLBI_VAAE1:
+		__tlbi(vaae1, val);
+		break;
+	case TLBI_VALE1:
+		__tlbi(vale1, val);
+		break;
+	case TLBI_VAALE1:
+		__tlbi(vaale1, val);
+		break;
+	default:
+		break;
+	}
+	dsb(nsh);
+	isb();
+
+	__tlb_switch_to_host()();
+}
diff --git a/arch/arm64/kvm/mmu-nested.c b/arch/arm64/kvm/mmu-nested.c
index 2189f2b..8826eaa 100644
--- a/arch/arm64/kvm/mmu-nested.c
+++ b/arch/arm64/kvm/mmu-nested.c
@@ -332,8 +332,7 @@ void kvm_nested_s2_free(struct kvm *kvm)
 		__kvm_free_stage2_pgd(kvm, &nested_mmu->mmu);
 }
 
-static struct kvm_nested_s2_mmu *lookup_nested_mmu(struct kvm_vcpu *vcpu,
-						   u64 vttbr)
+struct kvm_nested_s2_mmu *lookup_nested_mmu(struct kvm_vcpu *vcpu, u64 vttbr)
 {
 	struct kvm_nested_s2_mmu *mmu;
 	u64 virtual_vmid;
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 89e73af..1dcbe70 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -983,6 +983,11 @@ static bool forward_at_traps(struct kvm_vcpu *vcpu)
 	return forward_traps(vcpu, HCR_AT);
 }
 
+static bool forward_ttlb_traps(struct kvm_vcpu *vcpu)
+{
+	return forward_traps(vcpu, HCR_TTLB);
+}
+
 /* This function is to support the recursive nested virtualization */
 bool forward_nv_traps(struct kvm_vcpu *vcpu)
 {
@@ -1896,6 +1901,37 @@ static bool handle_ipas2e1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 	return true;
 }
 
+static bool handle_tlbi_el1(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+			    const struct sys_reg_desc *r)
+{
+	u64 virtual_vttbr = vcpu_sys_reg(vcpu, VTTBR_EL2);
+	u64 vttbr;
+	struct kvm_nested_s2_mmu *nested_mmu;
+	struct kvm_s2_mmu *mmu = &vcpu->kvm->arch.mmu;
+	int sys_encoding = sys_insn(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2);
+
+	nested_mmu = lookup_nested_mmu(vcpu, virtual_vttbr);
+	if (!nested_mmu) {
+		/*
+		 * If we can't find a shadow VMID, it is either the virtual
+		 * VMID is for the host OS or the nested VM having the virtual
+		 * VMID is never executed. (Note that we create a showdow VMID
+		 * when entering a VM.) For the former, we can flush TLB
+		 * entries belonging to the host OS in a VM. For the latter, we
+		 * don't have to do anything. Since we can't differentiate
+		 * between those cases, just do what we can do for the former.
+		 */
+		mmu = &vcpu->kvm->arch.mmu;
+	} else {
+		mmu = &nested_mmu->mmu;
+	}
+
+	vttbr = kvm_get_vttbr(&mmu->vmid, mmu);
+	kvm_call_hyp(__kvm_tlb_el1_instr, vttbr, p->regval, sys_encoding);
+
+	return true;
+}
+
 /*
  * AT instruction emulation
  *
@@ -1971,6 +2007,20 @@ static bool handle_ipas2e1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 	SYS_INSN_TO_DESC(AT_S1E0W, handle_s1e01, forward_at_traps),
 	SYS_INSN_TO_DESC(AT_S1E1RP, handle_s1e01, forward_at_traps),
 	SYS_INSN_TO_DESC(AT_S1E1WP, handle_s1e01, forward_at_traps),
+
+	SYS_INSN_TO_DESC(TLBI_VMALLE1IS, handle_tlbi_el1, forward_ttlb_traps),
+	SYS_INSN_TO_DESC(TLBI_VAE1IS, handle_tlbi_el1, forward_ttlb_traps),
+	SYS_INSN_TO_DESC(TLBI_ASIDE1IS, handle_tlbi_el1, forward_ttlb_traps),
+	SYS_INSN_TO_DESC(TLBI_VAAE1IS, handle_tlbi_el1, forward_ttlb_traps),
+	SYS_INSN_TO_DESC(TLBI_VALE1IS, handle_tlbi_el1, forward_ttlb_traps),
+	SYS_INSN_TO_DESC(TLBI_VAALE1IS, handle_tlbi_el1, forward_ttlb_traps),
+	SYS_INSN_TO_DESC(TLBI_VMALLE1, handle_tlbi_el1, forward_ttlb_traps),
+	SYS_INSN_TO_DESC(TLBI_VAE1, handle_tlbi_el1, forward_ttlb_traps),
+	SYS_INSN_TO_DESC(TLBI_ASIDE1, handle_tlbi_el1, forward_ttlb_traps),
+	SYS_INSN_TO_DESC(TLBI_VAAE1, handle_tlbi_el1, forward_ttlb_traps),
+	SYS_INSN_TO_DESC(TLBI_VALE1, handle_tlbi_el1, forward_ttlb_traps),
+	SYS_INSN_TO_DESC(TLBI_VAALE1, handle_tlbi_el1, forward_ttlb_traps),
+
 	SYS_INSN_TO_DESC(AT_S1E2R, handle_s1e2, forward_nv_traps),
 	SYS_INSN_TO_DESC(AT_S1E2W, handle_s1e2, forward_nv_traps),
 	SYS_INSN_TO_DESC(AT_S12E1R, handle_s12r, forward_nv_traps),
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 31/31] KVM: arm64: Fixes to toggle_cache for nesting
  2017-10-03  3:10 ` Jintack Lim
  (?)
@ 2017-10-03  3:11   ` Jintack Lim
  -1 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:11 UTC (permalink / raw)
  To: christoffer.dall, marc.zyngier, kvmarm
  Cc: jintack, pbonzini, rkrcmar, catalin.marinas, will.deacon, linux,
	mark.rutland, linux-arm-kernel, kvm, linux-kernel, Jintack Lim

From: Christoffer Dall <christoffer.dall@linaro.org>

So far we were flushing almost the entire universe whenever a VM would
load/unload the SCTLR_EL1 and the two versions of that register had
different MMU enabled settings.  This turned out to be so slow that it
prevented forward progress for a nested VM, because a scheduler timer
tick interrupt would always be pending when we reached the nested VM.

To avoid this problem, we consider the SCTLR_EL2 when evaluating if
caches are on or off when entering virtual EL2 (because this is the
value that we end up shadowing onto the hardware EL1 register).

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---
 arch/arm64/include/asm/kvm_mmu.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 601f431..7a1c581 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -240,7 +240,10 @@ static inline bool kvm_page_empty(void *ptr)
 
 static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
 {
-	return (vcpu_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101;
+	if (vcpu_mode_el2(vcpu))
+		return (vcpu_sys_reg(vcpu, SCTLR_EL2) & 0b101) == 0b101;
+	else
+		return (vcpu_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101;
 }
 
 static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu,
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 31/31] KVM: arm64: Fixes to toggle_cache for nesting
@ 2017-10-03  3:11   ` Jintack Lim
  0 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:11 UTC (permalink / raw)
  To: christoffer.dall, marc.zyngier, kvmarm
  Cc: kvm, catalin.marinas, will.deacon, linux, linux-kernel, pbonzini,
	linux-arm-kernel

From: Christoffer Dall <christoffer.dall@linaro.org>

So far we were flushing almost the entire universe whenever a VM would
load/unload the SCTLR_EL1 and the two versions of that register had
different MMU enabled settings.  This turned out to be so slow that it
prevented forward progress for a nested VM, because a scheduler timer
tick interrupt would always be pending when we reached the nested VM.

To avoid this problem, we consider the SCTLR_EL2 when evaluating if
caches are on or off when entering virtual EL2 (because this is the
value that we end up shadowing onto the hardware EL1 register).

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---
 arch/arm64/include/asm/kvm_mmu.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 601f431..7a1c581 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -240,7 +240,10 @@ static inline bool kvm_page_empty(void *ptr)
 
 static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
 {
-	return (vcpu_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101;
+	if (vcpu_mode_el2(vcpu))
+		return (vcpu_sys_reg(vcpu, SCTLR_EL2) & 0b101) == 0b101;
+	else
+		return (vcpu_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101;
 }
 
 static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu,
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 31/31] KVM: arm64: Fixes to toggle_cache for nesting
@ 2017-10-03  3:11   ` Jintack Lim
  0 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03  3:11 UTC (permalink / raw)
  To: linux-arm-kernel

From: Christoffer Dall <christoffer.dall@linaro.org>

So far we were flushing almost the entire universe whenever a VM would
load/unload the SCTLR_EL1 and the two versions of that register had
different MMU enabled settings.  This turned out to be so slow that it
prevented forward progress for a nested VM, because a scheduler timer
tick interrupt would always be pending when we reached the nested VM.

To avoid this problem, we consider the SCTLR_EL2 when evaluating if
caches are on or off when entering virtual EL2 (because this is the
value that we end up shadowing onto the hardware EL1 register).

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
---
 arch/arm64/include/asm/kvm_mmu.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 601f431..7a1c581 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -240,7 +240,10 @@ static inline bool kvm_page_empty(void *ptr)
 
 static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
 {
-	return (vcpu_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101;
+	if (vcpu_mode_el2(vcpu))
+		return (vcpu_sys_reg(vcpu, SCTLR_EL2) & 0b101) == 0b101;
+	else
+		return (vcpu_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101;
 }
 
 static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu,
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* Re: [RFC PATCH v2 19/31] KVM: arm64: Describe AT instruction emulation design
  2017-10-03  3:11   ` Jintack Lim
@ 2017-10-03 17:37     ` James Morse
  -1 siblings, 0 replies; 91+ messages in thread
From: James Morse @ 2017-10-03 17:37 UTC (permalink / raw)
  To: Jintack Lim
  Cc: christoffer.dall, marc.zyngier, kvmarm, kvm, catalin.marinas,
	will.deacon, linux, linux-kernel, pbonzini, linux-arm-kernel

Hi Jintack,

On 03/10/17 04:11, Jintack Lim wrote:
> This design overview will help to digest the subsequent patches that
> implement AT instruction emulation.

> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> index 8d04926..d8728cc 100644
> --- a/arch/arm64/kvm/sys_regs.c
> +++ b/arch/arm64/kvm/sys_regs.c
> @@ -1621,6 +1621,72 @@ static bool access_id_aa64mmfr0_el1(struct kvm_vcpu *v,
>  	{ SYS_DESC(SYS_SP_EL2), NULL, reset_special, SP_EL2, 0},
>  };
>  
> +/*
> + * AT instruction emulation
> + *
> + * We emulate AT instructions executed in the virtual EL2.

> + * Basic strategy for the stage-1 translation emulation is to load proper
> + * context, which depends on the trapped instruction and the virtual HCR_EL2,
> + * to the EL1 virtual memory control registers and execute S1E[01] instructions
> + * in EL2. See below for more detail.

What happens if the guest memory containing some stage1-page-table has been
unmapped from stage2? (e.g. its swapped to disk).

(there is some background to this: I tried to implement the kvm_translate
ioctl() using this approach, running 'at s1e1*' from EL2. I ran into problems
when parts of the guest's stage1 page tables had been unmapped from stage2.)

>From memory, I found that the AT instructions would fault-in those pages when
run from EL1, but when executing the same instruction at EL2 they just failed
without any hint of which IPA needed mapping in.

I can try digging for any left over code if we want to setup a test case for this...


Thanks,

James

^ permalink raw reply	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 19/31] KVM: arm64: Describe AT instruction emulation design
@ 2017-10-03 17:37     ` James Morse
  0 siblings, 0 replies; 91+ messages in thread
From: James Morse @ 2017-10-03 17:37 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Jintack,

On 03/10/17 04:11, Jintack Lim wrote:
> This design overview will help to digest the subsequent patches that
> implement AT instruction emulation.

> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> index 8d04926..d8728cc 100644
> --- a/arch/arm64/kvm/sys_regs.c
> +++ b/arch/arm64/kvm/sys_regs.c
> @@ -1621,6 +1621,72 @@ static bool access_id_aa64mmfr0_el1(struct kvm_vcpu *v,
>  	{ SYS_DESC(SYS_SP_EL2), NULL, reset_special, SP_EL2, 0},
>  };
>  
> +/*
> + * AT instruction emulation
> + *
> + * We emulate AT instructions executed in the virtual EL2.

> + * Basic strategy for the stage-1 translation emulation is to load proper
> + * context, which depends on the trapped instruction and the virtual HCR_EL2,
> + * to the EL1 virtual memory control registers and execute S1E[01] instructions
> + * in EL2. See below for more detail.

What happens if the guest memory containing some stage1-page-table has been
unmapped from stage2? (e.g. its swapped to disk).

(there is some background to this: I tried to implement the kvm_translate
ioctl() using this approach, running 'at s1e1*' from EL2. I ran into problems
when parts of the guest's stage1 page tables had been unmapped from stage2.)

>From memory, I found that the AT instructions would fault-in those pages when
run from EL1, but when executing the same instruction at EL2 they just failed
without any hint of which IPA needed mapping in.

I can try digging for any left over code if we want to setup a test case for this...


Thanks,

James

^ permalink raw reply	[flat|nested] 91+ messages in thread

* Re: [RFC PATCH v2 19/31] KVM: arm64: Describe AT instruction emulation design
  2017-10-03 17:37     ` James Morse
  (?)
@ 2017-10-03 21:11       ` Jintack Lim
  -1 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03 21:11 UTC (permalink / raw)
  To: James Morse
  Cc: KVM General, Marc Zyngier, Catalin Marinas, Will Deacon, linux,
	lkml - Kernel Mailing List, arm-mail-list, Paolo Bonzini, kvmarm

Hi James,

On Tue, Oct 3, 2017 at 1:37 PM, James Morse <james.morse@arm.com> wrote:
> Hi Jintack,
>
> On 03/10/17 04:11, Jintack Lim wrote:
>> This design overview will help to digest the subsequent patches that
>> implement AT instruction emulation.
>
>> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
>> index 8d04926..d8728cc 100644
>> --- a/arch/arm64/kvm/sys_regs.c
>> +++ b/arch/arm64/kvm/sys_regs.c
>> @@ -1621,6 +1621,72 @@ static bool access_id_aa64mmfr0_el1(struct kvm_vcpu *v,
>>       { SYS_DESC(SYS_SP_EL2), NULL, reset_special, SP_EL2, 0},
>>  };
>>
>> +/*
>> + * AT instruction emulation
>> + *
>> + * We emulate AT instructions executed in the virtual EL2.
>
>> + * Basic strategy for the stage-1 translation emulation is to load proper
>> + * context, which depends on the trapped instruction and the virtual HCR_EL2,
>> + * to the EL1 virtual memory control registers and execute S1E[01] instructions
>> + * in EL2. See below for more detail.
>
> What happens if the guest memory containing some stage1-page-table has been
> unmapped from stage2? (e.g. its swapped to disk).
>
> (there is some background to this: I tried to implement the kvm_translate
> ioctl() using this approach, running 'at s1e1*' from EL2. I ran into problems
> when parts of the guest's stage1 page tables had been unmapped from stage2.)
>
> From memory, I found that the AT instructions would fault-in those pages when
> run from EL1, but when executing the same instruction at EL2 they just failed
> without any hint of which IPA needed mapping in.

I think I haven't encountered this case yet, probably because I
usually don't set a swap partition.

In fact, I couldn't find pseudocode for AT instructions. If you
happened to have one, is that behavior you observed described in ARM
ARM?

Thanks,
Jintack

>
> I can try digging for any left over code if we want to setup a test case for this...
>
>
> Thanks,
>
> James
> _______________________________________________
> kvmarm mailing list
> kvmarm@lists.cs.columbia.edu
> https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
>

^ permalink raw reply	[flat|nested] 91+ messages in thread

* Re: [RFC PATCH v2 19/31] KVM: arm64: Describe AT instruction emulation design
@ 2017-10-03 21:11       ` Jintack Lim
  0 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03 21:11 UTC (permalink / raw)
  To: James Morse
  Cc: KVM General, Marc Zyngier, Catalin Marinas, Will Deacon, linux,
	lkml - Kernel Mailing List, Paolo Bonzini, kvmarm, arm-mail-list

Hi James,

On Tue, Oct 3, 2017 at 1:37 PM, James Morse <james.morse@arm.com> wrote:
> Hi Jintack,
>
> On 03/10/17 04:11, Jintack Lim wrote:
>> This design overview will help to digest the subsequent patches that
>> implement AT instruction emulation.
>
>> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
>> index 8d04926..d8728cc 100644
>> --- a/arch/arm64/kvm/sys_regs.c
>> +++ b/arch/arm64/kvm/sys_regs.c
>> @@ -1621,6 +1621,72 @@ static bool access_id_aa64mmfr0_el1(struct kvm_vcpu *v,
>>       { SYS_DESC(SYS_SP_EL2), NULL, reset_special, SP_EL2, 0},
>>  };
>>
>> +/*
>> + * AT instruction emulation
>> + *
>> + * We emulate AT instructions executed in the virtual EL2.
>
>> + * Basic strategy for the stage-1 translation emulation is to load proper
>> + * context, which depends on the trapped instruction and the virtual HCR_EL2,
>> + * to the EL1 virtual memory control registers and execute S1E[01] instructions
>> + * in EL2. See below for more detail.
>
> What happens if the guest memory containing some stage1-page-table has been
> unmapped from stage2? (e.g. its swapped to disk).
>
> (there is some background to this: I tried to implement the kvm_translate
> ioctl() using this approach, running 'at s1e1*' from EL2. I ran into problems
> when parts of the guest's stage1 page tables had been unmapped from stage2.)
>
> From memory, I found that the AT instructions would fault-in those pages when
> run from EL1, but when executing the same instruction at EL2 they just failed
> without any hint of which IPA needed mapping in.

I think I haven't encountered this case yet, probably because I
usually don't set a swap partition.

In fact, I couldn't find pseudocode for AT instructions. If you
happened to have one, is that behavior you observed described in ARM
ARM?

Thanks,
Jintack

>
> I can try digging for any left over code if we want to setup a test case for this...
>
>
> Thanks,
>
> James
> _______________________________________________
> kvmarm mailing list
> kvmarm@lists.cs.columbia.edu
> https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
>

^ permalink raw reply	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 19/31] KVM: arm64: Describe AT instruction emulation design
@ 2017-10-03 21:11       ` Jintack Lim
  0 siblings, 0 replies; 91+ messages in thread
From: Jintack Lim @ 2017-10-03 21:11 UTC (permalink / raw)
  To: linux-arm-kernel

Hi James,

On Tue, Oct 3, 2017 at 1:37 PM, James Morse <james.morse@arm.com> wrote:
> Hi Jintack,
>
> On 03/10/17 04:11, Jintack Lim wrote:
>> This design overview will help to digest the subsequent patches that
>> implement AT instruction emulation.
>
>> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
>> index 8d04926..d8728cc 100644
>> --- a/arch/arm64/kvm/sys_regs.c
>> +++ b/arch/arm64/kvm/sys_regs.c
>> @@ -1621,6 +1621,72 @@ static bool access_id_aa64mmfr0_el1(struct kvm_vcpu *v,
>>       { SYS_DESC(SYS_SP_EL2), NULL, reset_special, SP_EL2, 0},
>>  };
>>
>> +/*
>> + * AT instruction emulation
>> + *
>> + * We emulate AT instructions executed in the virtual EL2.
>
>> + * Basic strategy for the stage-1 translation emulation is to load proper
>> + * context, which depends on the trapped instruction and the virtual HCR_EL2,
>> + * to the EL1 virtual memory control registers and execute S1E[01] instructions
>> + * in EL2. See below for more detail.
>
> What happens if the guest memory containing some stage1-page-table has been
> unmapped from stage2? (e.g. its swapped to disk).
>
> (there is some background to this: I tried to implement the kvm_translate
> ioctl() using this approach, running 'at s1e1*' from EL2. I ran into problems
> when parts of the guest's stage1 page tables had been unmapped from stage2.)
>
> From memory, I found that the AT instructions would fault-in those pages when
> run from EL1, but when executing the same instruction at EL2 they just failed
> without any hint of which IPA needed mapping in.

I think I haven't encountered this case yet, probably because I
usually don't set a swap partition.

In fact, I couldn't find pseudocode for AT instructions. If you
happened to have one, is that behavior you observed described in ARM
ARM?

Thanks,
Jintack

>
> I can try digging for any left over code if we want to setup a test case for this...
>
>
> Thanks,
>
> James
> _______________________________________________
> kvmarm mailing list
> kvmarm at lists.cs.columbia.edu
> https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
>

^ permalink raw reply	[flat|nested] 91+ messages in thread

* Re: [RFC PATCH v2 19/31] KVM: arm64: Describe AT instruction emulation design
  2017-10-03 21:11       ` Jintack Lim
@ 2017-10-04  9:13         ` Marc Zyngier
  -1 siblings, 0 replies; 91+ messages in thread
From: Marc Zyngier @ 2017-10-04  9:13 UTC (permalink / raw)
  To: Jintack Lim, James Morse
  Cc: KVM General, Catalin Marinas, Will Deacon, linux,
	lkml - Kernel Mailing List, arm-mail-list, Paolo Bonzini, kvmarm

On 03/10/17 22:11, Jintack Lim wrote:
> Hi James,
> 
> On Tue, Oct 3, 2017 at 1:37 PM, James Morse <james.morse@arm.com> wrote:
>> Hi Jintack,
>>
>> On 03/10/17 04:11, Jintack Lim wrote:
>>> This design overview will help to digest the subsequent patches that
>>> implement AT instruction emulation.
>>
>>> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
>>> index 8d04926..d8728cc 100644
>>> --- a/arch/arm64/kvm/sys_regs.c
>>> +++ b/arch/arm64/kvm/sys_regs.c
>>> @@ -1621,6 +1621,72 @@ static bool access_id_aa64mmfr0_el1(struct kvm_vcpu *v,
>>>       { SYS_DESC(SYS_SP_EL2), NULL, reset_special, SP_EL2, 0},
>>>  };
>>>
>>> +/*
>>> + * AT instruction emulation
>>> + *
>>> + * We emulate AT instructions executed in the virtual EL2.
>>
>>> + * Basic strategy for the stage-1 translation emulation is to load proper
>>> + * context, which depends on the trapped instruction and the virtual HCR_EL2,
>>> + * to the EL1 virtual memory control registers and execute S1E[01] instructions
>>> + * in EL2. See below for more detail.
>>
>> What happens if the guest memory containing some stage1-page-table has been
>> unmapped from stage2? (e.g. its swapped to disk).
>>
>> (there is some background to this: I tried to implement the kvm_translate
>> ioctl() using this approach, running 'at s1e1*' from EL2. I ran into problems
>> when parts of the guest's stage1 page tables had been unmapped from stage2.)
>>
>> From memory, I found that the AT instructions would fault-in those pages when
>> run from EL1, but when executing the same instruction at EL2 they just failed
>> without any hint of which IPA needed mapping in.

Let me see if I follow:

AT S1E1 at EL1 should only generate a fault if the page table walking
itself generates a fault (the guest page tables have been swapped out),
and the fault is taken to EL2. At that point, that's a normal
translation fault, which EL2 can easily resolve and restart the AT
instruction. This is in fact no different from a faulting load/store.

Doing the same thing at EL2 would simply indeed indicate a failed
translation, and not generate a fault, which I think is what you're
observing. After all, it is the hypervisor that unmapped those pages, it
might as well properly track what is happening.

It is a bit of an odd case because the AT here is executed at vEL2
(EL1), and trapped to EL2 because of the NV bits. If it wasn't trapped,
everything would just work. In this case, I can't see any other way but
to walk the S1PT by hand, having put all the other vcpus on hold to
avoid concurrent modifications... Yes, this sucks. If only AT could do
partial walks...

The saving grace is that this only happens in the unmapped S1PT case.
The above can be used as a fallback if the AT S1 from EL2 actually fails.

> I think I haven't encountered this case yet, probably because I
> usually don't set a swap partition.
> 
> In fact, I couldn't find pseudocode for AT instructions. If you
> happened to have one, is that behavior you observed described in ARM
> ARM?

See J1.1.5 in the ARMv8 ARM Rev B.a, and the various comments indicating
how this applies to Address Translation instructions. There is also some
description of what is expected from the AT instructions in D4.2.11.

Thanks,

	M.
-- 
Jazz is not dead. It just smells funny...

^ permalink raw reply	[flat|nested] 91+ messages in thread

* [RFC PATCH v2 19/31] KVM: arm64: Describe AT instruction emulation design
@ 2017-10-04  9:13         ` Marc Zyngier
  0 siblings, 0 replies; 91+ messages in thread
From: Marc Zyngier @ 2017-10-04  9:13 UTC (permalink / raw)
  To: linux-arm-kernel

On 03/10/17 22:11, Jintack Lim wrote:
> Hi James,
> 
> On Tue, Oct 3, 2017 at 1:37 PM, James Morse <james.morse@arm.com> wrote:
>> Hi Jintack,
>>
>> On 03/10/17 04:11, Jintack Lim wrote:
>>> This design overview will help to digest the subsequent patches that
>>> implement AT instruction emulation.
>>
>>> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
>>> index 8d04926..d8728cc 100644
>>> --- a/arch/arm64/kvm/sys_regs.c
>>> +++ b/arch/arm64/kvm/sys_regs.c
>>> @@ -1621,6 +1621,72 @@ static bool access_id_aa64mmfr0_el1(struct kvm_vcpu *v,
>>>       { SYS_DESC(SYS_SP_EL2), NULL, reset_special, SP_EL2, 0},
>>>  };
>>>
>>> +/*
>>> + * AT instruction emulation
>>> + *
>>> + * We emulate AT instructions executed in the virtual EL2.
>>
>>> + * Basic strategy for the stage-1 translation emulation is to load proper
>>> + * context, which depends on the trapped instruction and the virtual HCR_EL2,
>>> + * to the EL1 virtual memory control registers and execute S1E[01] instructions
>>> + * in EL2. See below for more detail.
>>
>> What happens if the guest memory containing some stage1-page-table has been
>> unmapped from stage2? (e.g. its swapped to disk).
>>
>> (there is some background to this: I tried to implement the kvm_translate
>> ioctl() using this approach, running 'at s1e1*' from EL2. I ran into problems
>> when parts of the guest's stage1 page tables had been unmapped from stage2.)
>>
>> From memory, I found that the AT instructions would fault-in those pages when
>> run from EL1, but when executing the same instruction at EL2 they just failed
>> without any hint of which IPA needed mapping in.

Let me see if I follow:

AT S1E1 at EL1 should only generate a fault if the page table walking
itself generates a fault (the guest page tables have been swapped out),
and the fault is taken to EL2. At that point, that's a normal
translation fault, which EL2 can easily resolve and restart the AT
instruction. This is in fact no different from a faulting load/store.

Doing the same thing at EL2 would simply indeed indicate a failed
translation, and not generate a fault, which I think is what you're
observing. After all, it is the hypervisor that unmapped those pages, it
might as well properly track what is happening.

It is a bit of an odd case because the AT here is executed at vEL2
(EL1), and trapped to EL2 because of the NV bits. If it wasn't trapped,
everything would just work. In this case, I can't see any other way but
to walk the S1PT by hand, having put all the other vcpus on hold to
avoid concurrent modifications... Yes, this sucks. If only AT could do
partial walks...

The saving grace is that this only happens in the unmapped S1PT case.
The above can be used as a fallback if the AT S1 from EL2 actually fails.

> I think I haven't encountered this case yet, probably because I
> usually don't set a swap partition.
> 
> In fact, I couldn't find pseudocode for AT instructions. If you
> happened to have one, is that behavior you observed described in ARM
> ARM?

See J1.1.5 in the ARMv8 ARM Rev B.a, and the various comments indicating
how this applies to Address Translation instructions. There is also some
description of what is expected from the AT instructions in D4.2.11.

Thanks,

	M.
-- 
Jazz is not dead. It just smells funny...

^ permalink raw reply	[flat|nested] 91+ messages in thread

end of thread, other threads:[~2017-10-04  9:13 UTC | newest]

Thread overview: 91+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-10-03  3:10 [RFC PATCH v2 03/31] KVM: arm/arm64: Remove unused params in mmu functions Jintack Lim
2017-10-03  3:10 ` Jintack Lim
2017-10-03  3:10 ` [RFC PATCH v2 04/31] KVM: arm/arm64: Abstract stage-2 MMU state into a separate structure Jintack Lim
2017-10-03  3:10   ` Jintack Lim
2017-10-03  3:10   ` Jintack Lim
2017-10-03  3:10 ` [RFC PATCH v2 05/31] KVM: arm/arm64: Support mmu for the virtual EL2 execution Jintack Lim
2017-10-03  3:10   ` Jintack Lim
2017-10-03  3:10 ` [RFC PATCH v2 06/31] KVM: arm64: Invalidate virtual EL2 TLB entries when needed Jintack Lim
2017-10-03  3:10   ` Jintack Lim
2017-10-03  3:10   ` Jintack Lim
2017-10-03  3:10 ` [RFC PATCH v2 07/31] KVM: arm64: Setup vttbr_el2 on each VM entry Jintack Lim
2017-10-03  3:10   ` Jintack Lim
2017-10-03  3:10   ` Jintack Lim
2017-10-03  3:10 ` [RFC PATCH v2 08/31] KVM: arm/arm64: Make mmu functions non-static Jintack Lim
2017-10-03  3:10   ` Jintack Lim
2017-10-03  3:10 ` [RFC PATCH v2 09/31] KVM: arm/arm64: Manage mmus for nested VMs Jintack Lim
2017-10-03  3:10   ` Jintack Lim
2017-10-03  3:10   ` Jintack Lim
2017-10-03  3:10 ` [RFC PATCH v2 10/31] KVM: arm/arm64: Unmap/flush shadow stage 2 page tables Jintack Lim
2017-10-03  3:10   ` Jintack Lim
2017-10-03  3:10   ` Jintack Lim
2017-10-03  3:10 ` [RFC PATCH v2 11/31] KVM: arm64: Implement nested Stage-2 page table walk logic Jintack Lim
2017-10-03  3:10   ` Jintack Lim
2017-10-03  3:10   ` Jintack Lim
2017-10-03  3:10 ` [RFC PATCH v2 12/31] KVM: arm/arm64: Handle shadow stage 2 page faults Jintack Lim
2017-10-03  3:10   ` Jintack Lim
2017-10-03  3:10   ` Jintack Lim
2017-10-03  3:10 ` [RFC PATCH v2 13/31] KVM: arm/arm64: Move kvm_is_write_fault to header file Jintack Lim
2017-10-03  3:10   ` Jintack Lim
2017-10-03  3:10   ` Jintack Lim
2017-10-03  3:10 ` [RFC PATCH v2 14/31] KVM: arm/arm64: Forward the guest hypervisor's stage 2 permission faults Jintack Lim
2017-10-03  3:10   ` Jintack Lim
2017-10-03  3:10   ` Jintack Lim
2017-10-03  3:10 ` [RFC PATCH v2 15/31] KVM: arm64: Move system register helper functions around Jintack Lim
2017-10-03  3:10   ` Jintack Lim
2017-10-03  3:10   ` Jintack Lim
2017-10-03  3:10 ` [RFC PATCH v2 16/31] KVM: arm64: Introduce sys_reg_desc.forward_trap Jintack Lim
2017-10-03  3:10   ` Jintack Lim
2017-10-03  3:10   ` Jintack Lim
2017-10-03  3:10 ` [RFC PATCH v2 17/31] KVM: arm64: Rework the system instruction emulation framework Jintack Lim
2017-10-03  3:10   ` Jintack Lim
2017-10-03  3:10   ` Jintack Lim
2017-10-03  3:11 ` [RFC PATCH v2 18/31] KVM: arm64: Enumerate AT and TLBI instructions to emulate Jintack Lim
2017-10-03  3:11   ` Jintack Lim
2017-10-03  3:11   ` Jintack Lim
2017-10-03  3:11 ` [RFC PATCH v2 19/31] KVM: arm64: Describe AT instruction emulation design Jintack Lim
2017-10-03  3:11   ` Jintack Lim
2017-10-03  3:11   ` Jintack Lim
2017-10-03 17:37   ` James Morse
2017-10-03 17:37     ` James Morse
2017-10-03 21:11     ` Jintack Lim
2017-10-03 21:11       ` Jintack Lim
2017-10-03 21:11       ` Jintack Lim
2017-10-04  9:13       ` Marc Zyngier
2017-10-04  9:13         ` Marc Zyngier
2017-10-03  3:11 ` [RFC PATCH v2 20/31] KVM: arm64: Implement AT instruction handling Jintack Lim
2017-10-03  3:11   ` Jintack Lim
2017-10-03  3:11   ` Jintack Lim
2017-10-03  3:11 ` [RFC PATCH v2 21/31] KVM: arm64: Emulate AT S1E[01] instructions Jintack Lim
2017-10-03  3:11   ` Jintack Lim
2017-10-03  3:11   ` Jintack Lim
2017-10-03  3:11 ` [RFC PATCH v2 22/31] KVM: arm64: Emulate AT S1E2 instructions Jintack Lim
2017-10-03  3:11   ` Jintack Lim
2017-10-03  3:11   ` Jintack Lim
2017-10-03  3:11 ` [RFC PATCH v2 23/31] KVM: arm64: Emulate AT S12E[01] instructions Jintack Lim
2017-10-03  3:11   ` Jintack Lim
2017-10-03  3:11   ` Jintack Lim
2017-10-03  3:11 ` [RFC PATCH v2 24/31] KVM: arm64: Emulate TLBI ALLE2(IS) instruction Jintack Lim
2017-10-03  3:11   ` Jintack Lim
2017-10-03  3:11   ` Jintack Lim
2017-10-03  3:11 ` [RFC PATCH v2 25/31] KVM: arm64: Emulate TLBI VAE2* instrutions Jintack Lim
2017-10-03  3:11   ` Jintack Lim
2017-10-03  3:11   ` Jintack Lim
2017-10-03  3:11 ` [RFC PATCH v2 26/31] KVM: arm64: Emulate TLBI ALLE1(IS) Jintack Lim
2017-10-03  3:11   ` Jintack Lim
2017-10-03  3:11   ` Jintack Lim
2017-10-03  3:11 ` [RFC PATCH v2 27/31] KVM: arm64: Emulate TLBI VMALLS12E1(IS) instruction Jintack Lim
2017-10-03  3:11   ` Jintack Lim
2017-10-03  3:11   ` Jintack Lim
2017-10-03  3:11 ` [RFC PATCH v2 28/31] KVM: arm64: Emulate TLBI IPAS2E1* instructions Jintack Lim
2017-10-03  3:11   ` Jintack Lim
2017-10-03  3:11   ` Jintack Lim
2017-10-03  3:11 ` [RFC PATCH v2 29/31] KVM: arm64: Respect the virtual HCR_EL2.AT and NV setting Jintack Lim
2017-10-03  3:11   ` Jintack Lim
2017-10-03  3:11   ` Jintack Lim
2017-10-03  3:11 ` [RFC PATCH v2 30/31] KVM: arm64: Emulate TLBI instructions accesible from EL1 Jintack Lim
2017-10-03  3:11   ` Jintack Lim
2017-10-03  3:11   ` Jintack Lim
2017-10-03  3:11 ` [RFC PATCH v2 31/31] KVM: arm64: Fixes to toggle_cache for nesting Jintack Lim
2017-10-03  3:11   ` Jintack Lim
2017-10-03  3:11   ` Jintack Lim

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.