All of lore.kernel.org
 help / color / mirror / Atom feed
From: Christoffer Dall <c.dall@virtualopensystems.com>
To: android-virt@lists.cs.columbia.edu, kvm@vger.kernel.org
Cc: tech@virtualopensystems.com
Subject: [PATCH v7 10/12] ARM: KVM: Handle guest faults in KVM
Date: Mon, 12 Mar 2012 02:52:53 -0400	[thread overview]
Message-ID: <20120312065253.8074.63510.stgit@ubuntu> (raw)
In-Reply-To: <20120312065134.8074.36949.stgit@ubuntu>

From: Christoffer Dall <cdall@cs.columbia.edu>

Handles the guest faults in KVM by mapping in corresponding user pages
in the 2nd stage page tables.

Introduces new ARM-specific kernel memory types, PAGE_KVM_GUEST and
pgprot_guest variables used to map 2nd stage memory for KVM guests.

Leverages MMU notifiers on KVM/ARM by supporting the kvm_unmap_hva() operation,
where we remove the HVA from the 2nd stage translation. All other KVM MMU
notifierhooks are NOPs.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <c.dall@virtualopensystems.com>
---
 arch/arm/include/asm/kvm_asm.h        |    3 +
 arch/arm/include/asm/kvm_host.h       |   19 ++++
 arch/arm/include/asm/pgtable-3level.h |    8 ++
 arch/arm/include/asm/pgtable.h        |    4 +
 arch/arm/kvm/Kconfig                  |    1 
 arch/arm/kvm/interrupts.S             |   37 ++++++++
 arch/arm/kvm/mmu.c                    |  162 +++++++++++++++++++++++++++++++++
 arch/arm/mm/mmu.c                     |    3 +
 8 files changed, 236 insertions(+), 1 deletions(-)

diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
index 69afdf3..c7cbe24 100644
--- a/arch/arm/include/asm/kvm_asm.h
+++ b/arch/arm/include/asm/kvm_asm.h
@@ -32,6 +32,7 @@
 #define SMCHYP_HVBAR_W 0xfffffff0
 
 #ifndef __ASSEMBLY__
+struct kvm;
 struct kvm_vcpu;
 
 extern char __kvm_hyp_init[];
@@ -42,6 +43,8 @@ extern char __kvm_hyp_vector[];
 extern char __kvm_hyp_code_start[];
 extern char __kvm_hyp_code_end[];
 
+extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
+
 extern void __kvm_flush_vm_context(void);
 
 extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 72ba708..241a950 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -114,4 +114,23 @@ struct kvm_vcpu_stat {
 	u32 halt_wakeup;
 };
 
+#define KVM_ARCH_WANT_MMU_NOTIFIER
+struct kvm;
+int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
+
+/* We do not have shadow page tables, hence the empty hooks */
+static inline int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+{
+	return 0;
+}
+
+static inline int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
+{
+	return 0;
+}
+
+static inline void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
+{
+}
+
 #endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h
index edc3cb9..6dc5331 100644
--- a/arch/arm/include/asm/pgtable-3level.h
+++ b/arch/arm/include/asm/pgtable-3level.h
@@ -104,6 +104,14 @@
  */
 #define L_PGD_SWAPPER		(_AT(pgdval_t, 1) << 55)	/* swapper_pg_dir entry */
 
+/*
+ * 2-nd stage PTE definitions for LPAE.
+ */
+#define L_PTE2_READ		(_AT(pteval_t, 1) << 6)	/* HAP[0] */
+#define L_PTE2_WRITE		(_AT(pteval_t, 1) << 7)	/* HAP[1] */
+#define L_PTE2_NORM_WB		(_AT(pteval_t, 3) << 4)	/* MemAttr[3:2] */
+#define L_PTE2_INNER_WB		(_AT(pteval_t, 3) << 2)	/* MemAttr[1:0] */
+
 #ifndef __ASSEMBLY__
 
 #define pud_none(pud)		(!pud_val(pud))
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index 4b72287..2561a8b 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -70,6 +70,7 @@ extern void __pgd_error(const char *file, int line, pgd_t);
 
 extern pgprot_t		pgprot_user;
 extern pgprot_t		pgprot_kernel;
+extern pgprot_t		pgprot_guest;
 
 #define _MOD_PROT(p, b)	__pgprot(pgprot_val(p) | (b))
 
@@ -83,6 +84,9 @@ extern pgprot_t		pgprot_kernel;
 #define PAGE_KERNEL		_MOD_PROT(pgprot_kernel, L_PTE_XN)
 #define PAGE_KERNEL_EXEC	pgprot_kernel
 #define PAGE_HYP		_MOD_PROT(pgprot_kernel, L_PTE_USER)
+#define PAGE_KVM_GUEST		_MOD_PROT(pgprot_guest, L_PTE2_READ | \
+					  L_PTE2_WRITE | L_PTE2_NORM_WB | \
+					  L_PTE2_INNER_WB)
 
 #define __PAGE_NONE		__pgprot(_L_PTE_DEFAULT | L_PTE_RDONLY | L_PTE_XN)
 #define __PAGE_SHARED		__pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_XN)
diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig
index 83abbe0..7fa50d3 100644
--- a/arch/arm/kvm/Kconfig
+++ b/arch/arm/kvm/Kconfig
@@ -36,6 +36,7 @@ config KVM_ARM_HOST
 	depends on KVM
 	depends on MMU
 	depends on CPU_V7 && ARM_VIRT_EXT
+	select	MMU_NOTIFIER
 	---help---
 	  Provides host support for ARM processors.
 
diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
index fbc0bec..0cf4965 100644
--- a/arch/arm/kvm/interrupts.S
+++ b/arch/arm/kvm/interrupts.S
@@ -35,9 +35,46 @@ __kvm_hyp_code_start:
 	.globl __kvm_hyp_code_start
 
 @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+@  Flush per-VMID TLBs
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+
+/*
+ * void __kvm_tlb_flush_vmid(struct kvm *kvm);
+ *
+ * We rely on the hardware to broadcast the TLB invalidation to all CPUs
+ * inside the inner-shareable domain (which is the case for all v7
+ * implementations).  If we come across a non-IS SMP implementation, we'll
+ * have to use an IPI based mechanism. Until then, we stick to the simple
+ * hardware assisted version.
+ */
+ENTRY(__kvm_tlb_flush_vmid)
+	hvc	#0			@ Switch to Hyp mode
+	push	{r2, r3}
+
+	add	r0, r0, #KVM_VTTBR
+	ldrd	r2, r3, [r0]
+	mcrr	p15, 6, r2, r3, c2	@ Write VTTBR
+	isb
+	mcr     p15, 0, r0, c8, c3, 0	@ TLBIALLIS (rt ignored)
+	dsb
+	isb
+	mov	r2, #0
+	mov	r3, #0
+	mcrr	p15, 6, r2, r3, c2	@ Back to VMID #0
+	isb
+
+	pop	{r2, r3}
+	hvc	#0			@ Back to SVC
+	mov	pc, lr
+ENDPROC(__kvm_tlb_flush_vmid)
+
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
 @ Flush TLBs and instruction caches of current CPU for all VMIDs
 @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
 
+/*
+ * void __kvm_flush_vm_context(void);
+ */
 ENTRY(__kvm_flush_vm_context)
 	hvc	#0			@ switch to hyp-mode
 
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 94411a6..7eae6c2 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -19,6 +19,7 @@
 #include <asm/pgalloc.h>
 #include <asm/kvm_arm.h>
 #include <asm/kvm_mmu.h>
+#include <asm/kvm_asm.h>
 
 pgd_t *kvm_hyp_pgd;
 DEFINE_MUTEX(kvm_hyp_pgd_mutex);
@@ -169,6 +170,9 @@ out:
  * Allocates the 1st level table only of size defined by PGD2_ORDER (can
  * support either full 40-bit input addresses or limited to 32-bit input
  * addresses). Clears the allocated pages.
+ *
+ * Note we don't need locking here as this is only called when the VM is
+ * destroyed, which can only be done once.
  */
 int kvm_alloc_stage2_pgd(struct kvm *kvm)
 {
@@ -230,6 +234,9 @@ static void free_stage2_ptes(pmd_t *pmd, unsigned long addr)
  * Walks the level-1 page table pointed to by kvm->arch.pgd and frees all
  * underlying level-2 and level-3 tables before freeing the actual level-1 table
  * and setting the struct pointer to NULL.
+ *
+ * Note we don't need locking here as this is only called when the VM is
+ * destroyed, which can only be done once.
  */
 void kvm_free_stage2_pgd(struct kvm *kvm)
 {
@@ -265,7 +272,160 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
 	kvm->arch.pgd = NULL;
 }
 
+static const pte_t null_pte;
+
+static int stage2_set_pte(struct kvm *kvm, phys_addr_t addr, const pte_t *new_pte)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+
+	/* Create 2nd stage page table mapping - Level 1 */
+	pgd = kvm->arch.pgd + pgd_index(addr);
+	pud = pud_offset(pgd, addr);
+	if (pud_none(*pud)) {
+		BUG_ON(new_pte == &null_pte);
+		pmd = pmd_alloc_one(NULL, addr);
+		if (!pmd) {
+			kvm_err("Cannot allocate 2nd stage pmd\n");
+			return -ENOMEM;
+		}
+		pud_populate(NULL, pud, pmd);
+		pmd += pmd_index(addr);
+	} else
+		pmd = pmd_offset(pud, addr);
+
+	/* Create 2nd stage page table mapping - Level 2 */
+	if (pmd_none(*pmd)) {
+		BUG_ON(new_pte == &null_pte);
+		pte = pte_alloc_one_kernel(NULL, addr);
+		if (!pte) {
+			kvm_err("Cannot allocate 2nd stage pte\n");
+			return -ENOMEM;
+		}
+		pmd_populate_kernel(NULL, pmd, pte);
+		pte += pte_index(addr);
+	} else
+		pte = pte_offset_kernel(pmd, addr);
+
+	/* Create 2nd stage page table mapping - Level 3 */
+	set_pte_ext(pte, *new_pte, 0);
+
+	return 0;
+}
+
+static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
+			  gfn_t gfn, struct kvm_memory_slot *memslot)
+{
+	pte_t new_pte;
+	pfn_t pfn;
+	int ret;
+
+	pfn = gfn_to_pfn(vcpu->kvm, gfn);
+
+	if (is_error_pfn(pfn)) {
+		put_page(pfn_to_page(pfn));
+		kvm_err("Guest gfn %u (0x%08x) does not have \n"
+				"corresponding host mapping",
+				(unsigned int)gfn,
+				(unsigned int)gfn << PAGE_SHIFT);
+		return -EFAULT;
+	}
+
+	mutex_lock(&vcpu->kvm->arch.pgd_mutex);
+	new_pte = pfn_pte(pfn, PAGE_KVM_GUEST);
+	ret = stage2_set_pte(vcpu->kvm, fault_ipa, &new_pte);
+	if (ret)
+		put_page(pfn_to_page(pfn));
+	mutex_unlock(&vcpu->kvm->arch.pgd_mutex);
+
+	return ret;
+}
+
+#define HSR_ABT_FS	(0x3f)
+#define HPFAR_MASK	(~0xf)
+
+/**
+ * kvm_handle_guest_abort - handles all 2nd stage aborts
+ * @vcpu:	the VCPU pointer
+ * @run:	the kvm_run structure
+ *
+ * Any abort that gets to the host is almost guaranteed to be caused by a
+ * missing second stage translation table entry, which can mean that either the
+ * guest simply needs more memory and we must allocate an appropriate page or it
+ * can mean that the guest tried to access I/O memory, which is emulated by user
+ * space. The distinction is based on the IPA causing the fault and whether this
+ * memory region has been registered as standard RAM by user space.
+ */
 int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
-	return -EINVAL;
+	unsigned long hsr_ec;
+	unsigned long fault_status;
+	phys_addr_t fault_ipa;
+	struct kvm_memory_slot *memslot = NULL;
+	bool is_iabt;
+	gfn_t gfn;
+
+	hsr_ec = vcpu->arch.hsr >> HSR_EC_SHIFT;
+	is_iabt = (hsr_ec == HSR_EC_IABT);
+
+	/* Check that the second stage fault is a translation fault */
+	fault_status = vcpu->arch.hsr & HSR_ABT_FS;
+	if ((fault_status & 0x3c) != 0x4) {
+		kvm_err("Unsupported fault status: %lx\n",
+				fault_status & 0x3c);
+		return -EFAULT;
+	}
+
+	fault_ipa = ((phys_addr_t)vcpu->arch.hpfar & HPFAR_MASK) << 8;
+
+	gfn = fault_ipa >> PAGE_SHIFT;
+	if (!kvm_is_visible_gfn(vcpu->kvm, gfn)) {
+		if (is_iabt) {
+			kvm_err("Inst. abort on I/O address %08lx\n",
+				(unsigned long)fault_ipa);
+			return -EFAULT;
+		}
+
+		kvm_pr_unimpl("I/O address abort...");
+		return 0;
+	}
+
+	memslot = gfn_to_memslot(vcpu->kvm, gfn);
+	if (!memslot->user_alloc) {
+		kvm_err("non user-alloc memslots not supported\n");
+		return -EINVAL;
+	}
+
+	return user_mem_abort(vcpu, fault_ipa, gfn, memslot);
+}
+
+int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
+{
+	struct kvm_memslots *slots;
+	struct kvm_memory_slot *memslot;
+	int needs_stage2_flush = 0;
+
+	slots = kvm_memslots(kvm);
+
+	/* we only care about the pages that the guest sees */
+	kvm_for_each_memslot(memslot, slots) {
+		unsigned long start = memslot->userspace_addr;
+		unsigned long end;
+
+		end = start + (memslot->npages << PAGE_SHIFT);
+		if (hva >= start && hva < end) {
+			gpa_t gpa_offset = hva - start;
+			gpa_t gpa = (memslot->base_gfn << PAGE_SHIFT) + gpa_offset;
+
+			stage2_set_pte(kvm, gpa, &null_pte);
+			needs_stage2_flush = 1;
+		}
+	}
+
+	if (needs_stage2_flush)
+		__kvm_tlb_flush_vmid(kvm);
+
+	return 0;
 }
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 94c5a0c..1a2df14 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -54,9 +54,11 @@ static unsigned int cachepolicy __initdata = CPOLICY_WRITEBACK;
 static unsigned int ecc_mask __initdata = 0;
 pgprot_t pgprot_user;
 pgprot_t pgprot_kernel;
+pgprot_t pgprot_guest;
 
 EXPORT_SYMBOL(pgprot_user);
 EXPORT_SYMBOL(pgprot_kernel);
+EXPORT_SYMBOL(pgprot_guest);
 
 struct cachepolicy {
 	const char	policy[16];
@@ -504,6 +506,7 @@ static void __init build_mem_type_table(void)
 	pgprot_user   = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | user_pgprot);
 	pgprot_kernel = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG |
 				 L_PTE_DIRTY | kern_pgprot);
+	pgprot_guest  = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG);
 
 	mem_types[MT_LOW_VECTORS].prot_l1 |= ecc_mask;
 	mem_types[MT_HIGH_VECTORS].prot_l1 |= ecc_mask;


  parent reply	other threads:[~2012-03-12  6:52 UTC|newest]

Thread overview: 36+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-03-12  6:51 [PATCH v7 00/12] KVM/ARM Implementation Christoffer Dall
2012-03-12  6:51 ` [PATCH v7 01/12] KVM: Introduce __KVM_HAVE_IRQ_LINE Christoffer Dall
2012-03-23  0:41   ` [PATCH] ARM: KVM: Check the cpuid we're being asked to emulate Rusty Russell
2012-05-14 22:57     ` Christoffer Dall
2012-05-16 23:58       ` Rusty Russell
2012-05-20 18:34         ` Christoffer Dall
2012-05-21  1:13           ` Rusty Russell
2012-03-12  6:52 ` [PATCH v7 02/12] KVM: Guard mmu_notifier specific code with CONFIG_MMU_NOTIFIER Christoffer Dall
2012-03-12 15:50   ` Avi Kivity
2012-03-12  6:52 ` [PATCH v7 03/12] ARM: KVM: Initial skeleton to compile KVM support Christoffer Dall
2012-03-12  6:52 ` [PATCH v7 04/12] ARM: KVM: Hypervisor identity mapping Christoffer Dall
2012-03-12  6:52 ` [PATCH v7 05/12] ARM: KVM: Hypervisor inititalization Christoffer Dall
2012-03-12  6:52 ` [PATCH v7 06/12] ARM: KVM: Memory virtualization setup Christoffer Dall
2012-03-12  6:52 ` [PATCH v7 07/12] ARM: KVM: Inject IRQs and FIQs from userspace Christoffer Dall
2012-03-12  6:52 ` [PATCH v7 08/12] ARM: KVM: World-switch implementation Christoffer Dall
2012-03-23  0:23   ` Rusty Russell
2012-03-28 13:05     ` Avi Kivity
2012-03-28 21:57       ` Rusty Russell
2012-03-29 10:49         ` Avi Kivity
2012-05-14 18:08           ` Christoffer Dall
2012-03-12  6:52 ` [PATCH v7 09/12] ARM: KVM: Emulation framework and CP15 emulation Christoffer Dall
2012-03-12  6:52 ` Christoffer Dall [this message]
2012-03-12 15:31   ` [Android-virt] [PATCH v7 10/12] ARM: KVM: Handle guest faults in KVM Marc Zyngier
2012-03-12 16:23     ` Christoffer Dall
2012-03-12 16:28       ` Marc Zyngier
2012-03-12  6:53 ` [PATCH v7 11/12] ARM: KVM: Handle I/O aborts Christoffer Dall
2012-03-12  6:53 ` [PATCH v7 12/12] ARM: KVM: Guest wait-for-interrupts (WFI) support Christoffer Dall
2012-03-12 17:36 ` [PATCH v7 00/12] KVM/ARM Implementation Avi Kivity
2012-03-23  0:40 ` [PATCH] ARM: KVM: Remove l2ctlr write Rusty Russell
2012-05-14 22:59   ` Christoffer Dall
2012-03-29  5:11 ` [PATCH 0/3] Emulation cleanups Rusty, Russell <rusty.russell
2012-03-29  5:15   ` [PATCH 1/3] ARM: KVM: Remove l2ctlr write Rusty Russell
2012-03-29  5:17   ` [PATCH 2/3] ARM: KVM: Fake up performance counters a little more precisely Rusty Russell
2012-05-14 22:49     ` Christoffer Dall
2012-05-17  0:12       ` Rusty Russell
2012-03-29  5:17   ` [PATCH 3/3] ARM: KVM: Check the cpuid we're being asked to emulate Rusty Russell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20120312065253.8074.63510.stgit@ubuntu \
    --to=c.dall@virtualopensystems.com \
    --cc=android-virt@lists.cs.columbia.edu \
    --cc=kvm@vger.kernel.org \
    --cc=tech@virtualopensystems.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.