All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 13/13] KVM: PPC: e500: MMU API
@ 2011-05-17 23:42 Scott Wood
  2011-05-19 14:10 ` Alexander Graf
                   ` (4 more replies)
  0 siblings, 5 replies; 6+ messages in thread
From: Scott Wood @ 2011-05-17 23:42 UTC (permalink / raw)
  To: kvm-ppc

This implements a shared-memory API for giving Qemu access to the guest's
TLB.

Signed-off-by: Scott Wood <scottwood@freescale.com>
---
 Documentation/kvm/api.txt           |   79 +++++++-
 arch/powerpc/include/asm/kvm.h      |   35 +++
 arch/powerpc/include/asm/kvm_e500.h |   23 +-
 arch/powerpc/include/asm/kvm_ppc.h  |    7 +
 arch/powerpc/kvm/e500.c             |    5 +-
 arch/powerpc/kvm/e500_emulate.c     |   12 +-
 arch/powerpc/kvm/e500_tlb.c         |  418 ++++++++++++++++++++++++++---------
 arch/powerpc/kvm/e500_tlb.h         |   55 ++---
 arch/powerpc/kvm/powerpc.c          |   28 +++
 include/linux/kvm.h                 |   19 ++
 10 files changed, 515 insertions(+), 166 deletions(-)

diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt
index 42542eb..8f2de15 100644
--- a/Documentation/kvm/api.txt
+++ b/Documentation/kvm/api.txt
@@ -1268,7 +1268,7 @@ struct kvm_assigned_msix_entry {
 	__u16 padding[3];
 };
 
-4.54 KVM_SET_TSC_KHZ
+4.55 KVM_SET_TSC_KHZ
 
 Capability: KVM_CAP_TSC_CONTROL
 Architectures: x86
@@ -1279,7 +1279,7 @@ Returns: 0 on success, -1 on error
 Specifies the tsc frequency for the virtual machine. The unit of the
 frequency is KHz.
 
-4.55 KVM_GET_TSC_KHZ
+4.56 KVM_GET_TSC_KHZ
 
 Capability: KVM_CAP_GET_TSC_KHZ
 Architectures: x86
@@ -1291,6 +1291,81 @@ Returns the tsc frequency of the guest. The unit of the return value is
 KHz. If the host has unstable tsc this ioctl returns -EIO instead as an
 error.
 
+4.57 KVM_CONFIG_TLB
+
+Capability: KVM_CAP_SW_TLB
+Architectures: ppc
+Type: vcpu ioctl
+Parameters: struct kvm_config_tlb (in)
+Returns: 0 on success, -1 on error
+
+struct kvm_config_tlb {
+	__u64 params;
+	__u64 array;
+	__u32 mmu_type;
+	__u32 array_len;
+};
+
+Configures the virtual CPU's TLB array, establishing a shared memory area
+between userspace and KVM.  The "params" and "array" fields are userspace
+addresses of mmu-type-specific data structures.  The "array_len" field is an
+safety mechanism, and should be set to the size in bytes of the memory that
+userspace has reserved for the array.  It must be at least the size dictated
+by "mmu_type" and "params".
+
+While KVM_RUN is active, the shared region is under control of KVM.  Its
+contents are undefined, and any modification by userspace results in
+boundedly undefined behavior.
+
+On return from KVM_RUN, the shared region will reflect the current state of
+the guest's TLB.  If userspace makes any changes, it must call KVM_DIRTY_TLB
+to tell KVM which entries have been changed, prior to calling KVM_RUN again
+on this vcpu.
+
+For mmu types KVM_MMU_FSL_BOOKE_NOHV and KVM_MMU_FSL_BOOKE_HV:
+ - The "params" field is of type "struct kvm_fsl_booke_tlb_params".
+ - The "array" field points to an array of type "struct
+   kvm_fsl_booke_tlb_entry".
+ - The array consists of all entries in the first TLB, followed by all
+   entries in the second TLB.
+ - Within TLB0 entries are ordered first by increasing set number.  Within a
+   set, entries are ordered by way (increasing ESEL).
+   The hash for determining set number is: (MAS2 >> 12) & (num_sets - 1)
+   where "num_sets" is the tlb_sizes[] value divided by the tlb_ways[] value.
+ - Within TLB1, entries are ordered by increasing ESEL.
+
+4.58 KVM_DIRTY_TLB
+
+Capability: KVM_CAP_SW_TLB
+Architectures: ppc
+Type: vcpu ioctl
+Parameters: struct kvm_dirty_tlb (in)
+Returns: 0 on success, -1 on error
+
+struct kvm_dirty_tlb {
+	__u64 bitmap;
+	__u32 num_dirty;
+};
+
+This must be called whenever userspace has changed an entry in the shared
+TLB, prior to calling KVM_RUN on the associated vcpu.
+
+The "bitmap" field is the userspace address of an array.  This array
+consists of a number of bits, equal to the total number of TLB entries as
+determined by the last successful call to KVM_CONFIG_TLB, rounded up to the
+nearest multiple of 64.
+
+Each bit corresponds to one TLB entry, ordered the same as in the shared TLB
+array.
+
+The array is little-endian: the bit 0 is the least significant bit of the
+first byte, bit 8 is the least significant bit of the second byte, etc.
+This avoids any complications with differing word sizes.
+
+The "num_dirty" field is a performance hint for KVM to determine whether it
+should skip processing the bitmap and just invalidate everything.  It must
+be set to the number of set bits in the bitmap.
+
 5. The kvm_run structure
 
 Application code obtains a pointer to the kvm_run structure by
diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h
index d2ca5ed..2419be2 100644
--- a/arch/powerpc/include/asm/kvm.h
+++ b/arch/powerpc/include/asm/kvm.h
@@ -272,4 +272,39 @@ struct kvm_guest_debug_arch {
 #define KVM_INTERRUPT_UNSET	-2U
 #define KVM_INTERRUPT_SET_LEVEL	-3U
 
+struct kvm_fsl_booke_tlb_entry {
+	__u32 mas8;
+	__u32 mas1;
+	__u64 mas2;
+	__u64 mas7_3;
+};
+
+struct kvm_fsl_booke_tlb_params {
+	/*
+	 * book3e defines 4 TLBs, but current FSL Book E chips implement
+	 * only the first two.  The second two entries in tlb_sizes[]
+	 * and tlb_ways[] are reserved and must be zero.
+	 *
+	 * A tlb_ways value of zero means the array is fully associative.
+	 * Only TLB0 may be configured with a different associativity.  The
+	 * number of ways of TLB0 must be a power of two between 2 and 16.
+	 *
+	 * The size of TLB0 must be a multiple of the number of ways, and
+	 * the number of sets must be a power of two.
+	 *
+	 * The size of TLB1 may not exceed 64 entries.
+	 *
+	 * KVM will adjust TLBnCFG based on the sizes configured here,
+	 * though arrays greater than 2048 entries will have TLBnCFG[NENTRY]
+	 * set to zero.
+	 *
+	 * TLB0 supports 4 KiB pages.
+	 * The page sizes supported by TLB1 are as indicated by
+	 * TLB1CFG (if MMUCFG[MAVN] = 0) or TLB1PS (if MMUCFG[MAVN] = 1).
+	 */
+	__u32 tlb_sizes[4];
+	__u8 tlb_ways[4];
+	__u32 reserved[11];
+};
+
 #endif /* __LINUX_KVM_POWERPC_H */
diff --git a/arch/powerpc/include/asm/kvm_e500.h b/arch/powerpc/include/asm/kvm_e500.h
index adbfca9..fab626d 100644
--- a/arch/powerpc/include/asm/kvm_e500.h
+++ b/arch/powerpc/include/asm/kvm_e500.h
@@ -22,13 +22,6 @@
 #define E500_PID_NUM   3
 #define E500_TLB_NUM   2
 
-struct tlbe{
-	u32 mas1;
-	u32 mas2;
-	u32 mas3;
-	u32 mas7;
-};
-
 #define E500_TLB_VALID 1
 #define E500_TLB_DIRTY 2
 
@@ -40,8 +33,11 @@ struct tlbe_priv {
 struct vcpu_id_table;
 
 struct kvmppc_vcpu_e500 {
-	/* Unmodified copy of the guest's TLB. */
-	struct tlbe *gtlb_arch[E500_TLB_NUM];
+	/* Unmodified copy of the guest's TLB -- shared with Qemu. */
+	struct kvm_fsl_booke_tlb_entry *gtlb_arch;
+
+	/* Starting entry number in gtlb_arch[] */
+	int gtlb_offset[E500_TLB_NUM];
 
 	/* KVM internal information associated with each guest TLB entry */
 	struct tlbe_priv *gtlb_priv[E500_TLB_NUM];
@@ -49,6 +45,9 @@ struct kvmppc_vcpu_e500 {
 	unsigned int gtlb_size[E500_TLB_NUM];
 	unsigned int gtlb_nv[E500_TLB_NUM];
 
+	unsigned int gtlb0_ways;
+	unsigned int gtlb0_sets;
+
 	u32 host_pid[E500_PID_NUM];
 	u32 pid[E500_PID_NUM];
 	u32 svr;
@@ -56,11 +55,10 @@ struct kvmppc_vcpu_e500 {
 	u32 mas0;
 	u32 mas1;
 	u32 mas2;
-	u32 mas3;
+	u64 mas7_3;
 	u32 mas4;
 	u32 mas5;
 	u32 mas6;
-	u32 mas7;
 
 	/* vcpu id table */
 	struct vcpu_id_table *idt;
@@ -73,6 +71,9 @@ struct kvmppc_vcpu_e500 {
 	u32 tlb1cfg;
 	u64 mcar;
 
+	struct page **shared_tlb_pages;
+	int num_shared_tlb_pages;
+
 	struct kvm_vcpu vcpu;
 };
 
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index c662f14..bb3d418 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -152,4 +152,11 @@ int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
 
 void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid);
 
+int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
+			      struct kvm_config_tlb *cfg);
+int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu,
+			     struct kvm_dirty_tlb *cfg);
+
+void kvmppc_core_heavy_exit(struct kvm_vcpu *vcpu);
+
 #endif /* __POWERPC_KVM_PPC_H__ */
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
index 43923c3..628f723 100644
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c
@@ -110,7 +110,7 @@ void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
 	sregs->u.e.mas0 = vcpu_e500->mas0;
 	sregs->u.e.mas1 = vcpu_e500->mas1;
 	sregs->u.e.mas2 = vcpu_e500->mas2;
-	sregs->u.e.mas7_3 = ((u64)vcpu_e500->mas7 << 32) | vcpu_e500->mas3;
+	sregs->u.e.mas7_3 = vcpu_e500->mas7_3;
 	sregs->u.e.mas4 = vcpu_e500->mas4;
 	sregs->u.e.mas6 = vcpu_e500->mas6;
 
@@ -143,8 +143,7 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
 		vcpu_e500->mas0 = sregs->u.e.mas0;
 		vcpu_e500->mas1 = sregs->u.e.mas1;
 		vcpu_e500->mas2 = sregs->u.e.mas2;
-		vcpu_e500->mas7 = sregs->u.e.mas7_3 >> 32;
-		vcpu_e500->mas3 = (u32)sregs->u.e.mas7_3;
+		vcpu_e500->mas7_3 = sregs->u.e.mas7_3;
 		vcpu_e500->mas4 = sregs->u.e.mas4;
 		vcpu_e500->mas6 = sregs->u.e.mas6;
 	}
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
index 69cd665..4fb0ebb 100644
--- a/arch/powerpc/kvm/e500_emulate.c
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -91,13 +91,17 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
 	case SPRN_MAS2:
 		vcpu_e500->mas2 = spr_val; break;
 	case SPRN_MAS3:
-		vcpu_e500->mas3 = spr_val; break;
+		vcpu_e500->mas7_3 &= ~(u64)0xffffffff;
+		vcpu_e500->mas7_3 |= spr_val;
+		break;
 	case SPRN_MAS4:
 		vcpu_e500->mas4 = spr_val; break;
 	case SPRN_MAS6:
 		vcpu_e500->mas6 = spr_val; break;
 	case SPRN_MAS7:
-		vcpu_e500->mas7 = spr_val; break;
+		vcpu_e500->mas7_3 &= (u64)0xffffffff;
+		vcpu_e500->mas7_3 |= (u64)spr_val << 32;
+		break;
 	case SPRN_L1CSR0:
 		vcpu_e500->l1csr0 = spr_val;
 		vcpu_e500->l1csr0 &= ~(L1CSR0_DCFI | L1CSR0_CLFC);
@@ -154,13 +158,13 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
 	case SPRN_MAS2:
 		kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas2); break;
 	case SPRN_MAS3:
-		kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas3); break;
+		kvmppc_set_gpr(vcpu, rt, (u32)vcpu_e500->mas7_3); break;
 	case SPRN_MAS4:
 		kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas4); break;
 	case SPRN_MAS6:
 		kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas6); break;
 	case SPRN_MAS7:
-		kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas7); break;
+		kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas7_3 >> 32); break;
 
 	case SPRN_TLB0CFG:
 		kvmppc_set_gpr(vcpu, rt, vcpu_e500->tlb0cfg); break;
diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index d099d93..008f770 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -18,6 +18,11 @@
 #include <linux/kvm.h>
 #include <linux/kvm_host.h>
 #include <linux/highmem.h>
+#include <linux/log2.h>
+#include <linux/uaccess.h>
+#include <linux/sched.h>
+#include <linux/rwsem.h>
+#include <linux/vmalloc.h>
 #include <asm/kvm_ppc.h>
 #include <asm/kvm_e500.h>
 
@@ -63,6 +68,13 @@ static DEFINE_PER_CPU(unsigned long, pcpu_last_used_sid);
 
 static unsigned int tlb1_entry_num;
 
+static struct kvm_fsl_booke_tlb_entry *
+get_entry(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel, int entry)
+{
+	int offset = vcpu_e500->gtlb_offset[tlbsel];
+	return &vcpu_e500->gtlb_arch[offset + entry];
+}
+
 /*
  * Allocate a free shadow id and setup a valid sid mapping in given entry.
  * A mapping is only valid when vcpu_id_table and pcpu_id_table are match.
@@ -192,20 +204,23 @@ void kvmppc_e500_recalc_shadow_pid(struct kvmppc_vcpu_e500 *vcpu_e500)
 void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu)
 {
 	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
-	struct tlbe *tlbe;
+	struct kvm_fsl_booke_tlb_entry *tlbe;
 	int i, tlbsel;
 
-	printk("| %8s | %8s | %8s | %8s | %8s |\n",
-			"nr", "mas1", "mas2", "mas3", "mas7");
+	printk("| %8s | %8s | %16s | %16s |\n",
+	       "nr", "mas1", "mas2", "mas7_3");
 
 	for (tlbsel = 0; tlbsel < 2; tlbsel++) {
+		int offset = vcpu_e500->gtlb_offset[tlbsel];
+
 		printk("Guest TLB%d:\n", tlbsel);
 		for (i = 0; i < vcpu_e500->gtlb_size[tlbsel]; i++) {
-			tlbe = &vcpu_e500->gtlb_arch[tlbsel][i];
+			tlbe = &vcpu_e500->gtlb_arch[offset + i];
 			if (tlbe->mas1 & MAS1_VALID)
-				printk(" G[%d][%3d] |  %08X | %08X | %08X | %08X |\n",
-					tlbsel, i, tlbe->mas1, tlbe->mas2,
-					tlbe->mas3, tlbe->mas7);
+				printk(" G[%d][%3d] |  %08X | %016llX | %016llX |\n",
+				       tlbsel, i, tlbe->mas1,
+				       (unsigned long long)tlbe->mas2,
+				       (unsigned long long)tlbe->mas7_3);
 		}
 	}
 }
@@ -216,7 +231,7 @@ static inline unsigned int tlb0_get_next_victim(
 	unsigned int victim;
 
 	victim = vcpu_e500->gtlb_nv[0]++;
-	if (unlikely(vcpu_e500->gtlb_nv[0] >= KVM_E500_TLB0_WAY_NUM))
+	if (unlikely(vcpu_e500->gtlb_nv[0] >= vcpu_e500->gtlb0_ways))
 		vcpu_e500->gtlb_nv[0] = 0;
 
 	return victim;
@@ -228,9 +243,9 @@ static inline unsigned int tlb1_max_shadow_size(void)
 	return tlb1_entry_num - tlbcam_index - 1;
 }
 
-static inline int tlbe_is_writable(struct tlbe *tlbe)
+static inline int tlbe_is_writable(struct kvm_fsl_booke_tlb_entry *tlbe)
 {
-	return tlbe->mas3 & (MAS3_SW|MAS3_UW);
+	return tlbe->mas7_3 & (MAS3_SW|MAS3_UW);
 }
 
 static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode)
@@ -261,40 +276,40 @@ static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode)
 /*
  * writing shadow tlb entry to host TLB
  */
-static inline void __write_host_tlbe(struct tlbe *stlbe, uint32_t mas0)
+static inline void __write_host_tlbe(struct kvm_fsl_booke_tlb_entry *stlbe,
+				     uint32_t mas0)
 {
 	unsigned long flags;
 
 	local_irq_save(flags);
 	mtspr(SPRN_MAS0, mas0);
 	mtspr(SPRN_MAS1, stlbe->mas1);
-	mtspr(SPRN_MAS2, stlbe->mas2);
-	mtspr(SPRN_MAS3, stlbe->mas3);
-	mtspr(SPRN_MAS7, stlbe->mas7);
+	mtspr(SPRN_MAS2, (unsigned long)stlbe->mas2);
+	mtspr(SPRN_MAS3, (u32)stlbe->mas7_3);
+	mtspr(SPRN_MAS7, (u32)(stlbe->mas7_3 >> 32));
 	asm volatile("isync; tlbwe" : : : "memory");
 	local_irq_restore(flags);
 }
 
 static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
-		int tlbsel, int esel, struct tlbe *stlbe)
+		int tlbsel, int esel, struct kvm_fsl_booke_tlb_entry *stlbe)
 {
 	if (tlbsel = 0) {
-		__write_host_tlbe(stlbe,
-				  MAS0_TLBSEL(0) |
-				  MAS0_ESEL(esel & (KVM_E500_TLB0_WAY_NUM - 1)));
+		int way = esel & (vcpu_e500->gtlb0_ways - 1);
+		__write_host_tlbe(stlbe, MAS0_TLBSEL(0) | MAS0_ESEL(way));
 	} else {
 		__write_host_tlbe(stlbe,
 				  MAS0_TLBSEL(1) |
 				  MAS0_ESEL(to_htlb1_esel(esel)));
 	}
 	trace_kvm_stlb_write(index_of(tlbsel, esel), stlbe->mas1, stlbe->mas2,
-			     stlbe->mas3, stlbe->mas7);
+			     (u32)stlbe->mas7_3, (u32)(stlbe->mas7_3 >> 32));
 }
 
 void kvmppc_map_magic(struct kvm_vcpu *vcpu)
 {
 	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
-	struct tlbe magic;
+	struct kvm_fsl_booke_tlb_entry magic;
 	ulong shared_page = ((ulong)vcpu->arch.shared) & PAGE_MASK;
 	unsigned int stid;
 	pfn_t pfn;
@@ -308,9 +323,8 @@ void kvmppc_map_magic(struct kvm_vcpu *vcpu)
 	magic.mas1 = MAS1_VALID | MAS1_TS | MAS1_TID(stid) |
 		     MAS1_TSIZE(BOOK3E_PAGESZ_4K);
 	magic.mas2 = vcpu->arch.magic_page_ea | MAS2_M;
-	magic.mas3 = (pfn << PAGE_SHIFT) |
-		     MAS3_SW | MAS3_SR | MAS3_UW | MAS3_UR;
-	magic.mas7 = pfn >> (32 - PAGE_SHIFT);
+	magic.mas7_3 = ((u64)pfn << PAGE_SHIFT) |
+		       MAS3_SW | MAS3_SR | MAS3_UW | MAS3_UR;
 
 	__write_host_tlbe(&magic, MAS0_TLBSEL(1) | MAS0_ESEL(tlbcam_index));
 	preempt_enable();
@@ -331,7 +345,8 @@ void kvmppc_e500_tlb_put(struct kvm_vcpu *vcpu)
 static void kvmppc_e500_stlbe_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500,
 					 int tlbsel, int esel)
 {
-	struct tlbe *gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel];
+	struct kvm_fsl_booke_tlb_entry *gtlbe +		get_entry(vcpu_e500, tlbsel, esel);
 	struct vcpu_id_table *idt = vcpu_e500->idt;
 	unsigned int pr, tid, ts, pid;
 	u32 val, eaddr;
@@ -377,25 +392,50 @@ static void kvmppc_e500_stlbe_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500,
 	}
 }
 
+static int tlb0_set_base(struct kvmppc_vcpu_e500 *vcpu_e500, gva_t addr)
+{
+	int set_base;
+
+	set_base = (addr >> PAGE_SHIFT) & (vcpu_e500->gtlb0_sets - 1);
+	set_base *= vcpu_e500->gtlb0_ways;
+
+	return set_base;
+}
+
+static int get_tlb_esel(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel)
+{
+	int esel = get_tlb_esel_bit(vcpu_e500);
+
+	if (tlbsel = 0) {
+		esel &= vcpu_e500->gtlb0_ways - 1;
+		esel += tlb0_set_base(vcpu_e500, vcpu_e500->mas2);
+	} else {
+		esel &= vcpu_e500->gtlb_size[1] - 1;
+	}
+
+	return esel;
+}
+
 /* Search the guest TLB for a matching entry. */
 static int kvmppc_e500_tlb_index(struct kvmppc_vcpu_e500 *vcpu_e500,
 		gva_t eaddr, int tlbsel, unsigned int pid, int as)
 {
 	int size = vcpu_e500->gtlb_size[tlbsel];
-	int set_base;
+	int set_base, offset;
 	int i;
 
 	if (tlbsel = 0) {
-		int mask = size / KVM_E500_TLB0_WAY_NUM - 1;
-		set_base = (eaddr >> PAGE_SHIFT) & mask;
-		set_base *= KVM_E500_TLB0_WAY_NUM;
-		size = KVM_E500_TLB0_WAY_NUM;
+		set_base = tlb0_set_base(vcpu_e500, eaddr);
+		size = vcpu_e500->gtlb0_ways;
 	} else {
 		set_base = 0;
 	}
 
+	offset = vcpu_e500->gtlb_offset[tlbsel];
+
 	for (i = 0; i < size; i++) {
-		struct tlbe *tlbe = &vcpu_e500->gtlb_arch[tlbsel][set_base + i];
+		struct kvm_fsl_booke_tlb_entry *tlbe +			&vcpu_e500->gtlb_arch[offset + set_base + i];
 		unsigned int tid;
 
 		if (eaddr < get_tlb_eaddr(tlbe))
@@ -421,7 +461,7 @@ static int kvmppc_e500_tlb_index(struct kvmppc_vcpu_e500 *vcpu_e500,
 }
 
 static inline void kvmppc_e500_priv_setup(struct tlbe_priv *priv,
-					  struct tlbe *gtlbe,
+					  struct kvm_fsl_booke_tlb_entry *gtlbe,
 					  pfn_t pfn)
 {
 	priv->pfn = pfn;
@@ -463,17 +503,17 @@ static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu,
 		| MAS1_TSIZE(tsized);
 	vcpu_e500->mas2 = (eaddr & MAS2_EPN)
 		| (vcpu_e500->mas4 & MAS2_ATTRIB_MASK);
-	vcpu_e500->mas3 &= MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3;
+	vcpu_e500->mas7_3 &= MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3;
 	vcpu_e500->mas6 = (vcpu_e500->mas6 & MAS6_SPID1)
 		| (get_cur_pid(vcpu) << 16)
 		| (as ? MAS6_SAS : 0);
-	vcpu_e500->mas7 = 0;
 }
 
 static inline void kvmppc_e500_setup_stlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
-					   struct tlbe *gtlbe, int tsize,
-					   struct tlbe_priv *priv,
-					   u64 gvaddr, struct tlbe *stlbe)
+					   struct kvm_fsl_booke_tlb_entry *gtlbe,
+					   int tsize, struct tlbe_priv *priv,
+					   u64 gvaddr,
+					   struct kvm_fsl_booke_tlb_entry *stlbe)
 {
 	pfn_t pfn = priv->pfn;
 	unsigned int stid;
@@ -488,16 +528,14 @@ static inline void kvmppc_e500_setup_stlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
 	stlbe->mas2 = (gvaddr & MAS2_EPN)
 		| e500_shadow_mas2_attrib(gtlbe->mas2,
 				vcpu_e500->vcpu.arch.shared->msr & MSR_PR);
-	stlbe->mas3 = ((pfn << PAGE_SHIFT) & MAS3_RPN)
-		| e500_shadow_mas3_attrib(gtlbe->mas3,
+	stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT)
+		| e500_shadow_mas3_attrib(gtlbe->mas7_3,
 				vcpu_e500->vcpu.arch.shared->msr & MSR_PR);
-	stlbe->mas7 = (pfn >> (32 - PAGE_SHIFT)) & MAS7_RPN;
 }
 
-
 static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
-	u64 gvaddr, gfn_t gfn, struct tlbe *gtlbe, int tlbsel, int esel,
-	struct tlbe *stlbe)
+	u64 gvaddr, gfn_t gfn, struct kvm_fsl_booke_tlb_entry *gtlbe,
+	int tlbsel, int esel, struct kvm_fsl_booke_tlb_entry *stlbe)
 {
 	struct kvm_memory_slot *slot;
 	unsigned long pfn, hva;
@@ -609,11 +647,11 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
 
 /* XXX only map the one-one case, for now use TLB0 */
 static int kvmppc_e500_tlb0_map(struct kvmppc_vcpu_e500 *vcpu_e500,
-				int esel, struct tlbe *stlbe)
+				int esel, struct kvm_fsl_booke_tlb_entry *stlbe)
 {
-	struct tlbe *gtlbe;
+	struct kvm_fsl_booke_tlb_entry *gtlbe;
 
-	gtlbe = &vcpu_e500->gtlb_arch[0][esel];
+	gtlbe = get_entry(vcpu_e500, 0, esel);
 
 	kvmppc_e500_shadow_map(vcpu_e500, get_tlb_eaddr(gtlbe),
 			get_tlb_raddr(gtlbe) >> PAGE_SHIFT,
@@ -626,7 +664,8 @@ static int kvmppc_e500_tlb0_map(struct kvmppc_vcpu_e500 *vcpu_e500,
  * the shadow TLB. */
 /* XXX for both one-one and one-to-many , for now use TLB1 */
 static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500,
-		u64 gvaddr, gfn_t gfn, struct tlbe *gtlbe, struct tlbe *stlbe)
+		u64 gvaddr, gfn_t gfn, struct kvm_fsl_booke_tlb_entry *gtlbe,
+		struct kvm_fsl_booke_tlb_entry *stlbe)
 {
 	unsigned int victim;
 
@@ -652,7 +691,8 @@ static inline int kvmppc_e500_gtlbe_invalidate(
 				struct kvmppc_vcpu_e500 *vcpu_e500,
 				int tlbsel, int esel)
 {
-	struct tlbe *gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel];
+	struct kvm_fsl_booke_tlb_entry *gtlbe +		get_entry(vcpu_e500, tlbsel, esel);
 
 	if (unlikely(get_tlb_iprot(gtlbe)))
 		return -1;
@@ -715,18 +755,17 @@ int kvmppc_e500_emul_tlbre(struct kvm_vcpu *vcpu)
 {
 	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
 	int tlbsel, esel;
-	struct tlbe *gtlbe;
+	struct kvm_fsl_booke_tlb_entry *gtlbe;
 
 	tlbsel = get_tlb_tlbsel(vcpu_e500);
 	esel = get_tlb_esel(vcpu_e500, tlbsel);
 
-	gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel];
+	gtlbe = get_entry(vcpu_e500, tlbsel, esel);
 	vcpu_e500->mas0 &= ~MAS0_NV(~0);
 	vcpu_e500->mas0 |= MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]);
 	vcpu_e500->mas1 = gtlbe->mas1;
 	vcpu_e500->mas2 = gtlbe->mas2;
-	vcpu_e500->mas3 = gtlbe->mas3;
-	vcpu_e500->mas7 = gtlbe->mas7;
+	vcpu_e500->mas7_3 = gtlbe->mas7_3;
 
 	return EMULATE_DONE;
 }
@@ -737,7 +776,7 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb)
 	int as = !!get_cur_sas(vcpu_e500);
 	unsigned int pid = get_cur_spid(vcpu_e500);
 	int esel, tlbsel;
-	struct tlbe *gtlbe = NULL;
+	struct kvm_fsl_booke_tlb_entry *gtlbe = NULL;
 	gva_t ea;
 
 	ea = kvmppc_get_gpr(vcpu, rb);
@@ -745,7 +784,7 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb)
 	for (tlbsel = 0; tlbsel < 2; tlbsel++) {
 		esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, as);
 		if (esel >= 0) {
-			gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel];
+			gtlbe = get_entry(vcpu_e500, tlbsel, esel);
 			break;
 		}
 	}
@@ -755,8 +794,7 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb)
 			| MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]);
 		vcpu_e500->mas1 = gtlbe->mas1;
 		vcpu_e500->mas2 = gtlbe->mas2;
-		vcpu_e500->mas3 = gtlbe->mas3;
-		vcpu_e500->mas7 = gtlbe->mas7;
+		vcpu_e500->mas7_3 = gtlbe->mas7_3;
 	} else {
 		int victim;
 
@@ -771,8 +809,7 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb)
 			| (vcpu_e500->mas4 & MAS4_TSIZED(~0));
 		vcpu_e500->mas2 &= MAS2_EPN;
 		vcpu_e500->mas2 |= vcpu_e500->mas4 & MAS2_ATTRIB_MASK;
-		vcpu_e500->mas3 &= MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3;
-		vcpu_e500->mas7 = 0;
+		vcpu_e500->mas7_3 &= MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3;
 	}
 
 	kvmppc_set_exit_type(vcpu, EMULATED_TLBSX_EXITS);
@@ -782,28 +819,27 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb)
 int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
 {
 	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
-	struct tlbe *gtlbe;
+	struct kvm_fsl_booke_tlb_entry *gtlbe;
 	int tlbsel, esel;
 
 	tlbsel = get_tlb_tlbsel(vcpu_e500);
 	esel = get_tlb_esel(vcpu_e500, tlbsel);
 
-	gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel];
+	gtlbe = get_entry(vcpu_e500, tlbsel, esel);
 
 	if (get_tlb_v(gtlbe))
 		kvmppc_e500_stlbe_invalidate(vcpu_e500, tlbsel, esel);
 
 	gtlbe->mas1 = vcpu_e500->mas1;
 	gtlbe->mas2 = vcpu_e500->mas2;
-	gtlbe->mas3 = vcpu_e500->mas3;
-	gtlbe->mas7 = vcpu_e500->mas7;
+	gtlbe->mas7_3 = vcpu_e500->mas7_3;
 
 	trace_kvm_gtlb_write(vcpu_e500->mas0, gtlbe->mas1, gtlbe->mas2,
-			     gtlbe->mas3, gtlbe->mas7);
+			     (u32)gtlbe->mas7_3, (u32)(gtlbe->mas7_3 >> 32));
 
 	/* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */
 	if (tlbe_is_host_safe(vcpu, gtlbe)) {
-		struct tlbe stlbe;
+		struct kvm_fsl_booke_tlb_entry stlbe;
 		int stlbsel, sesel;
 		u64 eaddr;
 		u64 raddr;
@@ -877,9 +913,11 @@ gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int index,
 			gva_t eaddr)
 {
 	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
-	struct tlbe *gtlbe -		&vcpu_e500->gtlb_arch[tlbsel_of(index)][esel_of(index)];
-	u64 pgmask = get_tlb_bytes(gtlbe) - 1;
+	struct kvm_fsl_booke_tlb_entry *gtlbe;
+	u64 pgmask;
+
+	gtlbe = get_entry(vcpu_e500, tlbsel_of(index), esel_of(index));
+	pgmask = get_tlb_bytes(gtlbe) - 1;
 
 	return get_tlb_raddr(gtlbe) | (eaddr & pgmask);
 }
@@ -893,12 +931,12 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr,
 {
 	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
 	struct tlbe_priv *priv;
-	struct tlbe *gtlbe, stlbe;
+	struct kvm_fsl_booke_tlb_entry *gtlbe, stlbe;
 	int tlbsel = tlbsel_of(index);
 	int esel = esel_of(index);
 	int stlbsel, sesel;
 
-	gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel];
+	gtlbe = get_entry(vcpu_e500, tlbsel, esel);
 
 	preempt_disable();
 	switch (tlbsel) {
@@ -956,51 +994,229 @@ void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid)
 
 void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500)
 {
-	struct tlbe *tlbe;
+	struct kvm_fsl_booke_tlb_entry *tlbe;
 
 	/* Insert large initial mapping for guest. */
-	tlbe = &vcpu_e500->gtlb_arch[1][0];
+	tlbe = get_entry(vcpu_e500, 1, 0);
 	tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_256M);
 	tlbe->mas2 = 0;
-	tlbe->mas3 = E500_TLB_SUPER_PERM_MASK;
-	tlbe->mas7 = 0;
+	tlbe->mas7_3 = E500_TLB_SUPER_PERM_MASK;
 
 	/* 4K map for serial output. Used by kernel wrapper. */
-	tlbe = &vcpu_e500->gtlb_arch[1][1];
+	tlbe = get_entry(vcpu_e500, 1, 1);
 	tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_4K);
 	tlbe->mas2 = (0xe0004500 & 0xFFFFF000) | MAS2_I | MAS2_G;
-	tlbe->mas3 = (0xe0004500 & 0xFFFFF000) | E500_TLB_SUPER_PERM_MASK;
-	tlbe->mas7 = 0;
+	tlbe->mas7_3 = (0xe0004500 & 0xFFFFF000) | E500_TLB_SUPER_PERM_MASK;
+}
+
+static void clear_tlb_privs(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+	int stlbsel, i;
+
+	for (stlbsel = 0; stlbsel < 2; stlbsel++) {
+		for (i = 0; i < vcpu_e500->gtlb_size[stlbsel]; i++) {
+			struct tlbe_priv *priv +				&vcpu_e500->gtlb_priv[stlbsel][i];
+			kvmppc_e500_priv_release(priv);
+		}
+	}
+}
+
+static void free_gtlb(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+	int i;
+
+	clear_tlb_privs(vcpu_e500);
+
+	kfree(vcpu_e500->gtlb_priv[0]);
+	kfree(vcpu_e500->gtlb_priv[1]);
+
+	if (vcpu_e500->shared_tlb_pages) {
+		vfree((void *)(round_down((uintptr_t)vcpu_e500->gtlb_arch,
+					  PAGE_SIZE)));
+
+		for (i = 0; i < vcpu_e500->num_shared_tlb_pages; i++)
+			put_page(vcpu_e500->shared_tlb_pages[i]);
+
+		vcpu_e500->num_shared_tlb_pages = 0;
+		vcpu_e500->shared_tlb_pages = NULL;
+	} else {
+		kfree(vcpu_e500->gtlb_arch);
+	}
+
+	vcpu_e500->gtlb_arch = NULL;
+}
+
+int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
+			      struct kvm_config_tlb *cfg)
+{
+	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+	struct kvm_fsl_booke_tlb_params params;
+	char *virt;
+	struct page **pages;
+	struct tlbe_priv *privs[2] = {};
+	size_t array_len;
+	u32 sets;
+	int num_pages, ret, i;
+
+	if (cfg->mmu_type != KVM_MMU_FSL_BOOKE_NOHV)
+		return -EINVAL;
+
+	if (copy_from_user(&params, (void __user *)(uintptr_t)cfg->params,
+			   sizeof(params)))
+		return -EFAULT;
+
+	if (params.tlb_sizes[1] > 64)
+		return -EINVAL;
+	if (params.tlb_sizes[2] != 0 || params.tlb_sizes[3] != 0)
+		return -EINVAL;
+	if (params.tlb_ways[1] != 0 || params.tlb_ways[2] != 0 ||
+	    params.tlb_ways[3] != 0)
+		return -EINVAL;
+
+	if (!is_power_of_2(params.tlb_ways[0]))
+		return -EINVAL;
+
+	sets = params.tlb_sizes[0] >> ilog2(params.tlb_ways[0]);
+	if (!is_power_of_2(sets))
+		return -EINVAL;
+
+	array_len = params.tlb_sizes[0] + params.tlb_sizes[1];
+	array_len *= sizeof(struct kvm_fsl_booke_tlb_entry);
+
+	if (cfg->array_len < array_len)
+		return -EINVAL;
+
+	num_pages = DIV_ROUND_UP(cfg->array + array_len - 1, PAGE_SIZE) -
+		    cfg->array / PAGE_SIZE;
+	pages = kmalloc(sizeof(struct page *) * num_pages, GFP_KERNEL);
+	if (!pages)
+		return -ENOMEM;
+
+	ret = get_user_pages_fast(cfg->array, num_pages, 1, pages);
+	if (ret < 0)
+		goto err_pages;
+
+	if (ret != num_pages) {
+		num_pages = ret;
+		ret = -EFAULT;
+		goto err_put_page;
+	}
+
+	virt = vmap(pages, num_pages, VM_MAP, PAGE_KERNEL);
+	if (!virt)
+		goto err_put_page;
+
+	privs[0] = kzalloc(sizeof(struct tlbe_priv) * params.tlb_sizes[0],
+			   GFP_KERNEL);
+	privs[1] = kzalloc(sizeof(struct tlbe_priv) * params.tlb_sizes[1],
+			   GFP_KERNEL);
+
+	if (!privs[0] || !privs[1])
+		goto err_put_page;
+
+	kvmppc_e500_id_table_reset_all(vcpu_e500);
+	free_gtlb(vcpu_e500);
+
+	vcpu_e500->gtlb_priv[0] = privs[0];
+	vcpu_e500->gtlb_priv[1] = privs[1];
+
+	vcpu_e500->gtlb_arch = (struct kvm_fsl_booke_tlb_entry *)
+		(virt + (cfg->array & (PAGE_SIZE - 1)));
+
+	vcpu_e500->gtlb_size[0] = params.tlb_sizes[0];
+	vcpu_e500->gtlb_size[1] = params.tlb_sizes[1];
+
+	vcpu_e500->gtlb_offset[0] = 0;
+	vcpu_e500->gtlb_offset[1] = params.tlb_sizes[0];
+
+	vcpu_e500->tlb0cfg = mfspr(SPRN_TLB0CFG) & ~0xfffUL;
+	if (params.tlb_sizes[0] <= 2048)
+		vcpu_e500->tlb0cfg |= params.tlb_sizes[0];
+
+	vcpu_e500->tlb1cfg = mfspr(SPRN_TLB1CFG) & ~0xfffUL;
+	vcpu_e500->tlb1cfg |= params.tlb_sizes[1];
+
+	vcpu_e500->shared_tlb_pages = pages;
+	vcpu_e500->num_shared_tlb_pages = num_pages;
+
+	vcpu_e500->gtlb0_ways = params.tlb_ways[0];
+	vcpu_e500->gtlb0_sets = sets;
+
+	return 0;
+
+err_put_page:
+	kfree(privs[0]);
+	kfree(privs[1]);
+
+	for (i = 0; i < num_pages; i++)
+		put_page(pages[i]);
+
+err_pages:
+	kfree(pages);
+	return ret;
+}
+
+int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu,
+			     struct kvm_dirty_tlb *dirty)
+{
+	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+
+	clear_tlb_privs(vcpu_e500);
+	kvmppc_e500_id_table_reset_all(vcpu_e500);
+
+	return 0;
+}
+
+void kvmppc_core_heavy_exit(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+	int i;
+
+	/*
+	 * We may have modified the guest TLB, so mark it dirty.
+	 * We only do it on an actual return to userspace, to avoid
+	 * adding more overhead to getting scheduled out -- and avoid
+	 * any locking issues with getting preempted in the middle of
+	 * KVM_CONFIG_TLB, etc.
+	 */
+
+	for (i = 0; i < vcpu_e500->num_shared_tlb_pages; i++)
+		set_page_dirty_lock(vcpu_e500->shared_tlb_pages[i]);
 }
 
 int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
 {
+	int entry_size = sizeof(struct kvm_fsl_booke_tlb_entry);
+	int entries = KVM_E500_TLB0_SIZE + KVM_E500_TLB1_SIZE;
+
 	tlb1_entry_num = mfspr(SPRN_TLB1CFG) & 0xFFF;
 
 	vcpu_e500->gtlb_size[0] = KVM_E500_TLB0_SIZE;
-	vcpu_e500->gtlb_arch[0] -		kzalloc(sizeof(struct tlbe) * KVM_E500_TLB0_SIZE, GFP_KERNEL);
-	if (vcpu_e500->gtlb_arch[0] = NULL)
-		goto err_out;
-
 	vcpu_e500->gtlb_size[1] = KVM_E500_TLB1_SIZE;
-	vcpu_e500->gtlb_arch[1] -		kzalloc(sizeof(struct tlbe) * KVM_E500_TLB1_SIZE, GFP_KERNEL);
-	if (vcpu_e500->gtlb_arch[1] = NULL)
-		goto err_out_guest0;
 
-	vcpu_e500->gtlb_priv[0] = (struct tlbe_priv *)
+	vcpu_e500->gtlb0_ways = KVM_E500_TLB0_WAY_NUM;
+	vcpu_e500->gtlb0_sets = KVM_E500_TLB0_SIZE / KVM_E500_TLB0_WAY_NUM;
+
+	vcpu_e500->gtlb_arch = kmalloc(entries * entry_size, GFP_KERNEL);
+	if (!vcpu_e500->gtlb_arch)
+		return -ENOMEM;
+
+	vcpu_e500->gtlb_offset[0] = 0;
+	vcpu_e500->gtlb_offset[1] = KVM_E500_TLB0_SIZE;
+
+	vcpu_e500->gtlb_priv[0]  		kzalloc(sizeof(struct tlbe_priv) * KVM_E500_TLB0_SIZE, GFP_KERNEL);
 	if (vcpu_e500->gtlb_priv[0] = NULL)
-		goto err_out_guest1;
-	vcpu_e500->gtlb_priv[1] = (struct tlbe_priv *)
+		goto err;
+	vcpu_e500->gtlb_priv[1]  		kzalloc(sizeof(struct tlbe_priv) * KVM_E500_TLB1_SIZE, GFP_KERNEL);
 
 	if (vcpu_e500->gtlb_priv[1] = NULL)
-		goto err_out_priv0;
+		goto err;
 
 	if (kvmppc_e500_id_table_alloc(vcpu_e500) = NULL)
-		goto err_out_priv1;
+		goto err;
 
 	/* Init TLB configuration register */
 	vcpu_e500->tlb0cfg = mfspr(SPRN_TLB0CFG) & ~0xfffUL;
@@ -1010,31 +1226,13 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
 
 	return 0;
 
-err_out_priv1:
-	kfree(vcpu_e500->gtlb_priv[1]);
-err_out_priv0:
-	kfree(vcpu_e500->gtlb_priv[0]);
-err_out_guest1:
-	kfree(vcpu_e500->gtlb_arch[1]);
-err_out_guest0:
-	kfree(vcpu_e500->gtlb_arch[0]);
-err_out:
+err:
+	free_gtlb(vcpu_e500);
 	return -1;
 }
 
 void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500)
 {
-	int stlbsel, i;
-
-	/* release all privs */
-	for (stlbsel = 0; stlbsel < 2; stlbsel++)
-		for (i = 0; i < vcpu_e500->gtlb_size[stlbsel]; i++) {
-			struct tlbe_priv *priv -				&vcpu_e500->gtlb_priv[stlbsel][i];
-			kvmppc_e500_priv_release(priv);
-		}
-
 	kvmppc_e500_id_table_free(vcpu_e500);
-	kfree(vcpu_e500->gtlb_arch[1]);
-	kfree(vcpu_e500->gtlb_arch[0]);
+	free_gtlb(vcpu_e500);
 }
diff --git a/arch/powerpc/kvm/e500_tlb.h b/arch/powerpc/kvm/e500_tlb.h
index 59b88e9..81f79ff 100644
--- a/arch/powerpc/kvm/e500_tlb.h
+++ b/arch/powerpc/kvm/e500_tlb.h
@@ -20,13 +20,9 @@
 #include <asm/tlb.h>
 #include <asm/kvm_e500.h>
 
-#define KVM_E500_TLB0_WAY_SIZE_BIT	7	/* Fixed */
-#define KVM_E500_TLB0_WAY_SIZE		(1UL << KVM_E500_TLB0_WAY_SIZE_BIT)
-#define KVM_E500_TLB0_WAY_SIZE_MASK	(KVM_E500_TLB0_WAY_SIZE - 1)
-
-#define KVM_E500_TLB0_WAY_NUM_BIT	1	/* No greater than 7 */
-#define KVM_E500_TLB0_WAY_NUM		(1UL << KVM_E500_TLB0_WAY_NUM_BIT)
-#define KVM_E500_TLB0_WAY_NUM_MASK	(KVM_E500_TLB0_WAY_NUM - 1)
+/* This geometry is the legacy default -- can be overridden by userspace */
+#define KVM_E500_TLB0_WAY_SIZE		128
+#define KVM_E500_TLB0_WAY_NUM		2
 
 #define KVM_E500_TLB0_SIZE  (KVM_E500_TLB0_WAY_SIZE * KVM_E500_TLB0_WAY_NUM)
 #define KVM_E500_TLB1_SIZE  16
@@ -58,50 +54,54 @@ extern void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *);
 extern void kvmppc_e500_recalc_shadow_pid(struct kvmppc_vcpu_e500 *);
 
 /* TLB helper functions */
-static inline unsigned int get_tlb_size(const struct tlbe *tlbe)
+static inline unsigned int
+get_tlb_size(const struct kvm_fsl_booke_tlb_entry *tlbe)
 {
 	return (tlbe->mas1 >> 7) & 0x1f;
 }
 
-static inline gva_t get_tlb_eaddr(const struct tlbe *tlbe)
+static inline gva_t get_tlb_eaddr(const struct kvm_fsl_booke_tlb_entry *tlbe)
 {
 	return tlbe->mas2 & 0xfffff000;
 }
 
-static inline u64 get_tlb_bytes(const struct tlbe *tlbe)
+static inline u64 get_tlb_bytes(const struct kvm_fsl_booke_tlb_entry *tlbe)
 {
 	unsigned int pgsize = get_tlb_size(tlbe);
 	return 1ULL << 10 << pgsize;
 }
 
-static inline gva_t get_tlb_end(const struct tlbe *tlbe)
+static inline gva_t get_tlb_end(const struct kvm_fsl_booke_tlb_entry *tlbe)
 {
 	u64 bytes = get_tlb_bytes(tlbe);
 	return get_tlb_eaddr(tlbe) + bytes - 1;
 }
 
-static inline u64 get_tlb_raddr(const struct tlbe *tlbe)
+static inline u64 get_tlb_raddr(const struct kvm_fsl_booke_tlb_entry *tlbe)
 {
-	u64 rpn = tlbe->mas7;
-	return (rpn << 32) | (tlbe->mas3 & 0xfffff000);
+	return tlbe->mas7_3 & ~0xfffULL;
 }
 
-static inline unsigned int get_tlb_tid(const struct tlbe *tlbe)
+static inline unsigned int
+get_tlb_tid(const struct kvm_fsl_booke_tlb_entry *tlbe)
 {
 	return (tlbe->mas1 >> 16) & 0xff;
 }
 
-static inline unsigned int get_tlb_ts(const struct tlbe *tlbe)
+static inline unsigned int
+get_tlb_ts(const struct kvm_fsl_booke_tlb_entry *tlbe)
 {
 	return (tlbe->mas1 >> 12) & 0x1;
 }
 
-static inline unsigned int get_tlb_v(const struct tlbe *tlbe)
+static inline unsigned int
+get_tlb_v(const struct kvm_fsl_booke_tlb_entry *tlbe)
 {
 	return (tlbe->mas1 >> 31) & 0x1;
 }
 
-static inline unsigned int get_tlb_iprot(const struct tlbe *tlbe)
+static inline unsigned int
+get_tlb_iprot(const struct kvm_fsl_booke_tlb_entry *tlbe)
 {
 	return (tlbe->mas1 >> 30) & 0x1;
 }
@@ -155,25 +155,8 @@ static inline unsigned int get_tlb_esel_bit(
 	return (vcpu_e500->mas0 >> 16) & 0xfff;
 }
 
-static inline unsigned int get_tlb_esel(
-		const struct kvmppc_vcpu_e500 *vcpu_e500,
-		int tlbsel)
-{
-	unsigned int esel = get_tlb_esel_bit(vcpu_e500);
-
-	if (tlbsel = 0) {
-		esel &= KVM_E500_TLB0_WAY_NUM_MASK;
-		esel |= ((vcpu_e500->mas2 >> 12) & KVM_E500_TLB0_WAY_SIZE_MASK)
-				<< KVM_E500_TLB0_WAY_NUM_BIT;
-	} else {
-		esel &= KVM_E500_TLB1_SIZE - 1;
-	}
-
-	return esel;
-}
-
 static inline int tlbe_is_host_safe(const struct kvm_vcpu *vcpu,
-			const struct tlbe *tlbe)
+			const struct kvm_fsl_booke_tlb_entry *tlbe)
 {
 	gpa_t gpa;
 
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index d5beb72..212ca2e 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -187,6 +187,9 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_ENABLE_CAP:
 	case KVM_CAP_PPC_OSI:
 	case KVM_CAP_PPC_GET_PVINFO:
+#ifdef CONFIG_KVM_E500
+	case KVM_CAP_SW_TLB:
+#endif
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
@@ -503,6 +506,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	kvm_guest_exit();
 	local_irq_enable();
 
+#ifdef CONFIG_KVM_E500
+	kvmppc_core_heavy_exit(vcpu);
+#endif
+
 	if (vcpu->sigset_active)
 		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
 
@@ -583,6 +590,27 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
 		break;
 	}
+
+#ifdef CONFIG_KVM_E500
+	case KVM_CONFIG_TLB: {
+		struct kvm_config_tlb cfg;
+		r = -EFAULT;
+		if (copy_from_user(&cfg, argp, sizeof(cfg)))
+			goto out;
+		r = kvm_vcpu_ioctl_config_tlb(vcpu, &cfg);
+		break;
+	}
+
+	case KVM_DIRTY_TLB: {
+		struct kvm_dirty_tlb dirty;
+		r = -EFAULT;
+		if (copy_from_user(&dirty, argp, sizeof(dirty)))
+			goto out;
+		r = kvm_vcpu_ioctl_dirty_tlb(vcpu, &dirty);
+		break;
+	}
+#endif
+
 	default:
 		r = -EINVAL;
 	}
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 55ef181..daa79867 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -544,6 +544,7 @@ struct kvm_ppc_pvinfo {
 #define KVM_CAP_TSC_CONTROL 60
 #define KVM_CAP_GET_TSC_KHZ 61
 #define KVM_CAP_PPC_BOOKE_SREGS 62
+#define KVM_CAP_SW_TLB 63
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -623,6 +624,21 @@ struct kvm_clock_data {
 	__u32 pad[9];
 };
 
+#define KVM_MMU_FSL_BOOKE_NOHV		0
+#define KVM_MMU_FSL_BOOKE_HV		1
+
+struct kvm_config_tlb {
+	__u64 params;
+	__u64 array;
+	__u32 mmu_type;
+	__u32 array_len;
+};
+
+struct kvm_dirty_tlb {
+	__u64 bitmap;
+	__u32 num_dirty;
+};
+
 /*
  * ioctls for VM fds
  */
@@ -746,6 +762,9 @@ struct kvm_clock_data {
 /* Available with KVM_CAP_XCRS */
 #define KVM_GET_XCRS		  _IOR(KVMIO,  0xa6, struct kvm_xcrs)
 #define KVM_SET_XCRS		  _IOW(KVMIO,  0xa7, struct kvm_xcrs)
+/* Available with KVM_CAP_SW_TLB */
+#define KVM_CONFIG_TLB		  _IOW(KVMIO,  0xa8, struct kvm_config_tlb)
+#define KVM_DIRTY_TLB		  _IOW(KVMIO,  0xa9, struct kvm_dirty_tlb)
 
 #define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
 
-- 
1.7.4.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH 13/13] KVM: PPC: e500: MMU API
  2011-05-17 23:42 [PATCH 13/13] KVM: PPC: e500: MMU API Scott Wood
@ 2011-05-19 14:10 ` Alexander Graf
  2011-05-19 16:28 ` Scott Wood
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Alexander Graf @ 2011-05-19 14:10 UTC (permalink / raw)
  To: kvm-ppc

On 05/18/2011 01:42 AM, Scott Wood wrote:
> This implements a shared-memory API for giving Qemu access to the guest's
> TLB.
>
> Signed-off-by: Scott Wood<scottwood@freescale.com>
> ---
>   Documentation/kvm/api.txt           |   79 +++++++-
>   arch/powerpc/include/asm/kvm.h      |   35 +++
>   arch/powerpc/include/asm/kvm_e500.h |   23 +-
>   arch/powerpc/include/asm/kvm_ppc.h  |    7 +
>   arch/powerpc/kvm/e500.c             |    5 +-
>   arch/powerpc/kvm/e500_emulate.c     |   12 +-
>   arch/powerpc/kvm/e500_tlb.c         |  418 ++++++++++++++++++++++++++---------
>   arch/powerpc/kvm/e500_tlb.h         |   55 ++---
>   arch/powerpc/kvm/powerpc.c          |   28 +++
>   include/linux/kvm.h                 |   19 ++
>   10 files changed, 515 insertions(+), 166 deletions(-)
>
> diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt
> index 42542eb..8f2de15 100644
> --- a/Documentation/kvm/api.txt
> +++ b/Documentation/kvm/api.txt
> @@ -1268,7 +1268,7 @@ struct kvm_assigned_msix_entry {
>   	__u16 padding[3];
>   };
>
> -4.54 KVM_SET_TSC_KHZ
> +4.55 KVM_SET_TSC_KHZ
>
>   Capability: KVM_CAP_TSC_CONTROL
>   Architectures: x86
> @@ -1279,7 +1279,7 @@ Returns: 0 on success, -1 on error
>   Specifies the tsc frequency for the virtual machine. The unit of the
>   frequency is KHz.
>
> -4.55 KVM_GET_TSC_KHZ
> +4.56 KVM_GET_TSC_KHZ
>
>   Capability: KVM_CAP_GET_TSC_KHZ
>   Architectures: x86
> @@ -1291,6 +1291,81 @@ Returns the tsc frequency of the guest. The unit of the return value is
>   KHz. If the host has unstable tsc this ioctl returns -EIO instead as an
>   error.
>
> +4.57 KVM_CONFIG_TLB
> +
> +Capability: KVM_CAP_SW_TLB
> +Architectures: ppc
> +Type: vcpu ioctl
> +Parameters: struct kvm_config_tlb (in)
> +Returns: 0 on success, -1 on error
> +
> +struct kvm_config_tlb {
> +	__u64 params;
> +	__u64 array;
> +	__u32 mmu_type;
> +	__u32 array_len;
> +};
> +
> +Configures the virtual CPU's TLB array, establishing a shared memory area
> +between userspace and KVM.  The "params" and "array" fields are userspace
> +addresses of mmu-type-specific data structures.  The "array_len" field is an
> +safety mechanism, and should be set to the size in bytes of the memory that
> +userspace has reserved for the array.  It must be at least the size dictated
> +by "mmu_type" and "params".
> +
> +While KVM_RUN is active, the shared region is under control of KVM.  Its
> +contents are undefined, and any modification by userspace results in
> +boundedly undefined behavior.
> +
> +On return from KVM_RUN, the shared region will reflect the current state of
> +the guest's TLB.  If userspace makes any changes, it must call KVM_DIRTY_TLB
> +to tell KVM which entries have been changed, prior to calling KVM_RUN again
> +on this vcpu.
> +
> +For mmu types KVM_MMU_FSL_BOOKE_NOHV and KVM_MMU_FSL_BOOKE_HV:

Can we rename those KVM_MMU_BOOKE_206_NOHV and KVM_MMU_BOOKE_206_HV? 
Same for the function names. It's just easier to read and more accurate. 
Also I don't like how it says KVM_MMU_FSL :). It's really a booke interface.

> + - The "params" field is of type "struct kvm_fsl_booke_tlb_params".

I suppose params is a pointer?

> + - The "array" field points to an array of type "struct
> +   kvm_fsl_booke_tlb_entry".
> + - The array consists of all entries in the first TLB, followed by all
> +   entries in the second TLB.
> + - Within TLB0 entries are ordered first by increasing set number.  Within a
> +   set, entries are ordered by way (increasing ESEL).
> +   The hash for determining set number is: (MAS2>>  12)&  (num_sets - 1)
> +   where "num_sets" is the tlb_sizes[] value divided by the tlb_ways[] value.
> + - Within TLB1, entries are ordered by increasing ESEL.

Don't special-case it. TLB1 basically contains of only a single set. The 
MMU interface here shouldn't be too e500 specific :).

> +
> +4.58 KVM_DIRTY_TLB
> +
> +Capability: KVM_CAP_SW_TLB
> +Architectures: ppc
> +Type: vcpu ioctl
> +Parameters: struct kvm_dirty_tlb (in)
> +Returns: 0 on success, -1 on error
> +
> +struct kvm_dirty_tlb {
> +	__u64 bitmap;
> +	__u32 num_dirty;
> +};
> +
> +This must be called whenever userspace has changed an entry in the shared
> +TLB, prior to calling KVM_RUN on the associated vcpu.
> +
> +The "bitmap" field is the userspace address of an array.  This array
> +consists of a number of bits, equal to the total number of TLB entries as
> +determined by the last successful call to KVM_CONFIG_TLB, rounded up to the
> +nearest multiple of 64.
> +
> +Each bit corresponds to one TLB entry, ordered the same as in the shared TLB
> +array.
> +
> +The array is little-endian: the bit 0 is the least significant bit of the
> +first byte, bit 8 is the least significant bit of the second byte, etc.
> +This avoids any complications with differing word sizes.
> +
> +The "num_dirty" field is a performance hint for KVM to determine whether it
> +should skip processing the bitmap and just invalidate everything.  It must
> +be set to the number of set bits in the bitmap.
> +
>   5. The kvm_run structure
>
>   Application code obtains a pointer to the kvm_run structure by
> diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h
> index d2ca5ed..2419be2 100644
> --- a/arch/powerpc/include/asm/kvm.h
> +++ b/arch/powerpc/include/asm/kvm.h
> @@ -272,4 +272,39 @@ struct kvm_guest_debug_arch {
>   #define KVM_INTERRUPT_UNSET	-2U
>   #define KVM_INTERRUPT_SET_LEVEL	-3U
>
> +struct kvm_fsl_booke_tlb_entry {
> +	__u32 mas8;
> +	__u32 mas1;
> +	__u64 mas2;
> +	__u64 mas7_3;

Is there a good reason to not include the other MAS registers as defined 
by the architecture? That'd make the interface easily extensible for the 
future and should only eat a few additional bytes, no?

> +};
> +
> +struct kvm_fsl_booke_tlb_params {
> +	/*
> +	 * book3e defines 4 TLBs, but current FSL Book E chips implement
> +	 * only the first two.  The second two entries in tlb_sizes[]
> +	 * and tlb_ways[] are reserved and must be zero.

I don't think that part is necessary. It's really an implementation detail.

> +	 *
> +	 * A tlb_ways value of zero means the array is fully associative.
> +	 * Only TLB0 may be configured with a different associativity.  The
> +	 * number of ways of TLB0 must be a power of two between 2 and 16.
> +	 *
> +	 * The size of TLB0 must be a multiple of the number of ways, and
> +	 * the number of sets must be a power of two.
> +	 *
> +	 * The size of TLB1 may not exceed 64 entries.

Why not?

> +	 *
> +	 * KVM will adjust TLBnCFG based on the sizes configured here,
> +	 * though arrays greater than 2048 entries will have TLBnCFG[NENTRY]
> +	 * set to zero.
> +	 *
> +	 * TLB0 supports 4 KiB pages.
> +	 * The page sizes supported by TLB1 are as indicated by
> +	 * TLB1CFG (if MMUCFG[MAVN] = 0) or TLB1PS (if MMUCFG[MAVN] = 1).

This is not part of the interface, is it?

> +	 */
> +	__u32 tlb_sizes[4];
> +	__u8 tlb_ways[4];
> +	__u32 reserved[11];
> +};
> +
>   #endif /* __LINUX_KVM_POWERPC_H */
> diff --git a/arch/powerpc/include/asm/kvm_e500.h b/arch/powerpc/include/asm/kvm_e500.h
> index adbfca9..fab626d 100644
> --- a/arch/powerpc/include/asm/kvm_e500.h
> +++ b/arch/powerpc/include/asm/kvm_e500.h
> @@ -22,13 +22,6 @@
>   #define E500_PID_NUM   3
>   #define E500_TLB_NUM   2
>
> -struct tlbe{
> -	u32 mas1;
> -	u32 mas2;
> -	u32 mas3;
> -	u32 mas7;
> -};
> -
>   #define E500_TLB_VALID 1
>   #define E500_TLB_DIRTY 2
>
> @@ -40,8 +33,11 @@ struct tlbe_priv {
>   struct vcpu_id_table;
>
>   struct kvmppc_vcpu_e500 {
> -	/* Unmodified copy of the guest's TLB. */
> -	struct tlbe *gtlb_arch[E500_TLB_NUM];
> +	/* Unmodified copy of the guest's TLB -- shared with Qemu. */

s/Qemu/user space/

> +	struct kvm_fsl_booke_tlb_entry *gtlb_arch;
> +
> +	/* Starting entry number in gtlb_arch[] */
> +	int gtlb_offset[E500_TLB_NUM];
>
>   	/* KVM internal information associated with each guest TLB entry */
>   	struct tlbe_priv *gtlb_priv[E500_TLB_NUM];
> @@ -49,6 +45,9 @@ struct kvmppc_vcpu_e500 {
>   	unsigned int gtlb_size[E500_TLB_NUM];
>   	unsigned int gtlb_nv[E500_TLB_NUM];
>
> +	unsigned int gtlb0_ways;
> +	unsigned int gtlb0_sets;
> +
>   	u32 host_pid[E500_PID_NUM];
>   	u32 pid[E500_PID_NUM];
>   	u32 svr;
> @@ -56,11 +55,10 @@ struct kvmppc_vcpu_e500 {
>   	u32 mas0;
>   	u32 mas1;
>   	u32 mas2;
> -	u32 mas3;
> +	u64 mas7_3;
>   	u32 mas4;
>   	u32 mas5;
>   	u32 mas6;
> -	u32 mas7;
>
>   	/* vcpu id table */
>   	struct vcpu_id_table *idt;
> @@ -73,6 +71,9 @@ struct kvmppc_vcpu_e500 {
>   	u32 tlb1cfg;
>   	u64 mcar;
>
> +	struct page **shared_tlb_pages;
> +	int num_shared_tlb_pages;
> +
>   	struct kvm_vcpu vcpu;
>   };
>
> diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
> index c662f14..bb3d418 100644
> --- a/arch/powerpc/include/asm/kvm_ppc.h
> +++ b/arch/powerpc/include/asm/kvm_ppc.h
> @@ -152,4 +152,11 @@ int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
>
>   void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid);
>
> +int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
> +			      struct kvm_config_tlb *cfg);
> +int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu,
> +			     struct kvm_dirty_tlb *cfg);
> +
> +void kvmppc_core_heavy_exit(struct kvm_vcpu *vcpu);
> +
>   #endif /* __POWERPC_KVM_PPC_H__ */
> diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
> index 43923c3..628f723 100644
> --- a/arch/powerpc/kvm/e500.c
> +++ b/arch/powerpc/kvm/e500.c
> @@ -110,7 +110,7 @@ void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
>   	sregs->u.e.mas0 = vcpu_e500->mas0;
>   	sregs->u.e.mas1 = vcpu_e500->mas1;
>   	sregs->u.e.mas2 = vcpu_e500->mas2;
> -	sregs->u.e.mas7_3 = ((u64)vcpu_e500->mas7<<  32) | vcpu_e500->mas3;
> +	sregs->u.e.mas7_3 = vcpu_e500->mas7_3;
>   	sregs->u.e.mas4 = vcpu_e500->mas4;
>   	sregs->u.e.mas6 = vcpu_e500->mas6;
>
> @@ -143,8 +143,7 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
>   		vcpu_e500->mas0 = sregs->u.e.mas0;
>   		vcpu_e500->mas1 = sregs->u.e.mas1;
>   		vcpu_e500->mas2 = sregs->u.e.mas2;
> -		vcpu_e500->mas7 = sregs->u.e.mas7_3>>  32;
> -		vcpu_e500->mas3 = (u32)sregs->u.e.mas7_3;
> +		vcpu_e500->mas7_3 = sregs->u.e.mas7_3;
>   		vcpu_e500->mas4 = sregs->u.e.mas4;
>   		vcpu_e500->mas6 = sregs->u.e.mas6;
>   	}
> diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
> index 69cd665..4fb0ebb 100644
> --- a/arch/powerpc/kvm/e500_emulate.c
> +++ b/arch/powerpc/kvm/e500_emulate.c
> @@ -91,13 +91,17 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
>   	case SPRN_MAS2:
>   		vcpu_e500->mas2 = spr_val; break;
>   	case SPRN_MAS3:
> -		vcpu_e500->mas3 = spr_val; break;
> +		vcpu_e500->mas7_3&= ~(u64)0xffffffff;
> +		vcpu_e500->mas7_3 |= spr_val;
> +		break;
>   	case SPRN_MAS4:
>   		vcpu_e500->mas4 = spr_val; break;
>   	case SPRN_MAS6:
>   		vcpu_e500->mas6 = spr_val; break;
>   	case SPRN_MAS7:
> -		vcpu_e500->mas7 = spr_val; break;
> +		vcpu_e500->mas7_3&= (u64)0xffffffff;
> +		vcpu_e500->mas7_3 |= (u64)spr_val<<  32;
> +		break;
>   	case SPRN_L1CSR0:
>   		vcpu_e500->l1csr0 = spr_val;
>   		vcpu_e500->l1csr0&= ~(L1CSR0_DCFI | L1CSR0_CLFC);
> @@ -154,13 +158,13 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
>   	case SPRN_MAS2:
>   		kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas2); break;
>   	case SPRN_MAS3:
> -		kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas3); break;
> +		kvmppc_set_gpr(vcpu, rt, (u32)vcpu_e500->mas7_3); break;
>   	case SPRN_MAS4:
>   		kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas4); break;
>   	case SPRN_MAS6:
>   		kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas6); break;
>   	case SPRN_MAS7:
> -		kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas7); break;
> +		kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas7_3>>  32); break;
>
>   	case SPRN_TLB0CFG:
>   		kvmppc_set_gpr(vcpu, rt, vcpu_e500->tlb0cfg); break;
> diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
> index d099d93..008f770 100644
> --- a/arch/powerpc/kvm/e500_tlb.c
> +++ b/arch/powerpc/kvm/e500_tlb.c
> @@ -18,6 +18,11 @@
>   #include<linux/kvm.h>
>   #include<linux/kvm_host.h>
>   #include<linux/highmem.h>
> +#include<linux/log2.h>
> +#include<linux/uaccess.h>
> +#include<linux/sched.h>
> +#include<linux/rwsem.h>
> +#include<linux/vmalloc.h>
>   #include<asm/kvm_ppc.h>
>   #include<asm/kvm_e500.h>
>
> @@ -63,6 +68,13 @@ static DEFINE_PER_CPU(unsigned long, pcpu_last_used_sid);
>
>   static unsigned int tlb1_entry_num;
>
> +static struct kvm_fsl_booke_tlb_entry *
> +get_entry(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel, int entry)
> +{
> +	int offset = vcpu_e500->gtlb_offset[tlbsel];
> +	return&vcpu_e500->gtlb_arch[offset + entry];
> +}
> +
>   /*
>    * Allocate a free shadow id and setup a valid sid mapping in given entry.
>    * A mapping is only valid when vcpu_id_table and pcpu_id_table are match.
> @@ -192,20 +204,23 @@ void kvmppc_e500_recalc_shadow_pid(struct kvmppc_vcpu_e500 *vcpu_e500)
>   void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu)
>   {
>   	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
> -	struct tlbe *tlbe;
> +	struct kvm_fsl_booke_tlb_entry *tlbe;
>   	int i, tlbsel;
>
> -	printk("| %8s | %8s | %8s | %8s | %8s |\n",
> -			"nr", "mas1", "mas2", "mas3", "mas7");
> +	printk("| %8s | %8s | %16s | %16s |\n",
> +	       "nr", "mas1", "mas2", "mas7_3");
>
>   	for (tlbsel = 0; tlbsel<  2; tlbsel++) {
> +		int offset = vcpu_e500->gtlb_offset[tlbsel];
> +
>   		printk("Guest TLB%d:\n", tlbsel);
>   		for (i = 0; i<  vcpu_e500->gtlb_size[tlbsel]; i++) {
> -			tlbe =&vcpu_e500->gtlb_arch[tlbsel][i];
> +			tlbe =&vcpu_e500->gtlb_arch[offset + i];
>   			if (tlbe->mas1&  MAS1_VALID)
> -				printk(" G[%d][%3d] |  %08X | %08X | %08X | %08X |\n",
> -					tlbsel, i, tlbe->mas1, tlbe->mas2,
> -					tlbe->mas3, tlbe->mas7);
> +				printk(" G[%d][%3d] |  %08X | %016llX | %016llX |\n",
> +				       tlbsel, i, tlbe->mas1,
> +				       (unsigned long long)tlbe->mas2,
> +				       (unsigned long long)tlbe->mas7_3);
>   		}
>   	}
>   }
> @@ -216,7 +231,7 @@ static inline unsigned int tlb0_get_next_victim(
>   	unsigned int victim;
>
>   	victim = vcpu_e500->gtlb_nv[0]++;
> -	if (unlikely(vcpu_e500->gtlb_nv[0]>= KVM_E500_TLB0_WAY_NUM))
> +	if (unlikely(vcpu_e500->gtlb_nv[0]>= vcpu_e500->gtlb0_ways))
>   		vcpu_e500->gtlb_nv[0] = 0;
>
>   	return victim;
> @@ -228,9 +243,9 @@ static inline unsigned int tlb1_max_shadow_size(void)
>   	return tlb1_entry_num - tlbcam_index - 1;
>   }
>
> -static inline int tlbe_is_writable(struct tlbe *tlbe)
> +static inline int tlbe_is_writable(struct kvm_fsl_booke_tlb_entry *tlbe)
>   {
> -	return tlbe->mas3&  (MAS3_SW|MAS3_UW);
> +	return tlbe->mas7_3&  (MAS3_SW|MAS3_UW);
>   }
>
>   static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode)
> @@ -261,40 +276,40 @@ static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode)
>   /*
>    * writing shadow tlb entry to host TLB
>    */
> -static inline void __write_host_tlbe(struct tlbe *stlbe, uint32_t mas0)
> +static inline void __write_host_tlbe(struct kvm_fsl_booke_tlb_entry *stlbe,
> +				     uint32_t mas0)
>   {
>   	unsigned long flags;
>
>   	local_irq_save(flags);
>   	mtspr(SPRN_MAS0, mas0);
>   	mtspr(SPRN_MAS1, stlbe->mas1);
> -	mtspr(SPRN_MAS2, stlbe->mas2);
> -	mtspr(SPRN_MAS3, stlbe->mas3);
> -	mtspr(SPRN_MAS7, stlbe->mas7);
> +	mtspr(SPRN_MAS2, (unsigned long)stlbe->mas2);
> +	mtspr(SPRN_MAS3, (u32)stlbe->mas7_3);
> +	mtspr(SPRN_MAS7, (u32)(stlbe->mas7_3>>  32));
>   	asm volatile("isync; tlbwe" : : : "memory");
>   	local_irq_restore(flags);
>   }
>
>   static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
> -		int tlbsel, int esel, struct tlbe *stlbe)
> +		int tlbsel, int esel, struct kvm_fsl_booke_tlb_entry *stlbe)
>   {
>   	if (tlbsel = 0) {
> -		__write_host_tlbe(stlbe,
> -				  MAS0_TLBSEL(0) |
> -				  MAS0_ESEL(esel&  (KVM_E500_TLB0_WAY_NUM - 1)));
> +		int way = esel&  (vcpu_e500->gtlb0_ways - 1);
> +		__write_host_tlbe(stlbe, MAS0_TLBSEL(0) | MAS0_ESEL(way));
>   	} else {
>   		__write_host_tlbe(stlbe,
>   				  MAS0_TLBSEL(1) |
>   				  MAS0_ESEL(to_htlb1_esel(esel)));
>   	}
>   	trace_kvm_stlb_write(index_of(tlbsel, esel), stlbe->mas1, stlbe->mas2,
> -			     stlbe->mas3, stlbe->mas7);
> +			     (u32)stlbe->mas7_3, (u32)(stlbe->mas7_3>>  32));
>   }
>
>   void kvmppc_map_magic(struct kvm_vcpu *vcpu)
>   {
>   	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
> -	struct tlbe magic;
> +	struct kvm_fsl_booke_tlb_entry magic;
>   	ulong shared_page = ((ulong)vcpu->arch.shared)&  PAGE_MASK;
>   	unsigned int stid;
>   	pfn_t pfn;
> @@ -308,9 +323,8 @@ void kvmppc_map_magic(struct kvm_vcpu *vcpu)
>   	magic.mas1 = MAS1_VALID | MAS1_TS | MAS1_TID(stid) |
>   		     MAS1_TSIZE(BOOK3E_PAGESZ_4K);
>   	magic.mas2 = vcpu->arch.magic_page_ea | MAS2_M;
> -	magic.mas3 = (pfn<<  PAGE_SHIFT) |
> -		     MAS3_SW | MAS3_SR | MAS3_UW | MAS3_UR;
> -	magic.mas7 = pfn>>  (32 - PAGE_SHIFT);
> +	magic.mas7_3 = ((u64)pfn<<  PAGE_SHIFT) |
> +		       MAS3_SW | MAS3_SR | MAS3_UW | MAS3_UR;
>
>   	__write_host_tlbe(&magic, MAS0_TLBSEL(1) | MAS0_ESEL(tlbcam_index));
>   	preempt_enable();
> @@ -331,7 +345,8 @@ void kvmppc_e500_tlb_put(struct kvm_vcpu *vcpu)
>   static void kvmppc_e500_stlbe_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500,
>   					 int tlbsel, int esel)
>   {
> -	struct tlbe *gtlbe =&vcpu_e500->gtlb_arch[tlbsel][esel];
> +	struct kvm_fsl_booke_tlb_entry *gtlbe > +		get_entry(vcpu_e500, tlbsel, esel);
>   	struct vcpu_id_table *idt = vcpu_e500->idt;
>   	unsigned int pr, tid, ts, pid;
>   	u32 val, eaddr;
> @@ -377,25 +392,50 @@ static void kvmppc_e500_stlbe_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500,
>   	}
>   }
>
> +static int tlb0_set_base(struct kvmppc_vcpu_e500 *vcpu_e500, gva_t addr)
> +{
> +	int set_base;
> +
> +	set_base = (addr>>  PAGE_SHIFT)&  (vcpu_e500->gtlb0_sets - 1);
> +	set_base *= vcpu_e500->gtlb0_ways;
> +
> +	return set_base;
> +}
> +
> +static int get_tlb_esel(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel)
> +{
> +	int esel = get_tlb_esel_bit(vcpu_e500);
> +
> +	if (tlbsel = 0) {
> +		esel&= vcpu_e500->gtlb0_ways - 1;
> +		esel += tlb0_set_base(vcpu_e500, vcpu_e500->mas2);
> +	} else {
> +		esel&= vcpu_e500->gtlb_size[1] - 1;
> +	}
> +
> +	return esel;
> +}
> +
>   /* Search the guest TLB for a matching entry. */
>   static int kvmppc_e500_tlb_index(struct kvmppc_vcpu_e500 *vcpu_e500,
>   		gva_t eaddr, int tlbsel, unsigned int pid, int as)
>   {
>   	int size = vcpu_e500->gtlb_size[tlbsel];
> -	int set_base;
> +	int set_base, offset;
>   	int i;
>
>   	if (tlbsel = 0) {
> -		int mask = size / KVM_E500_TLB0_WAY_NUM - 1;
> -		set_base = (eaddr>>  PAGE_SHIFT)&  mask;
> -		set_base *= KVM_E500_TLB0_WAY_NUM;
> -		size = KVM_E500_TLB0_WAY_NUM;
> +		set_base = tlb0_set_base(vcpu_e500, eaddr);
> +		size = vcpu_e500->gtlb0_ways;
>   	} else {
>   		set_base = 0;
>   	}
>
> +	offset = vcpu_e500->gtlb_offset[tlbsel];
> +
>   	for (i = 0; i<  size; i++) {
> -		struct tlbe *tlbe =&vcpu_e500->gtlb_arch[tlbsel][set_base + i];
> +		struct kvm_fsl_booke_tlb_entry *tlbe > +			&vcpu_e500->gtlb_arch[offset + set_base + i];
>   		unsigned int tid;
>
>   		if (eaddr<  get_tlb_eaddr(tlbe))
> @@ -421,7 +461,7 @@ static int kvmppc_e500_tlb_index(struct kvmppc_vcpu_e500 *vcpu_e500,
>   }
>
>   static inline void kvmppc_e500_priv_setup(struct tlbe_priv *priv,
> -					  struct tlbe *gtlbe,
> +					  struct kvm_fsl_booke_tlb_entry *gtlbe,
>   					  pfn_t pfn)
>   {
>   	priv->pfn = pfn;
> @@ -463,17 +503,17 @@ static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu,
>   		| MAS1_TSIZE(tsized);
>   	vcpu_e500->mas2 = (eaddr&  MAS2_EPN)
>   		| (vcpu_e500->mas4&  MAS2_ATTRIB_MASK);
> -	vcpu_e500->mas3&= MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3;
> +	vcpu_e500->mas7_3&= MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3;
>   	vcpu_e500->mas6 = (vcpu_e500->mas6&  MAS6_SPID1)
>   		| (get_cur_pid(vcpu)<<  16)
>   		| (as ? MAS6_SAS : 0);
> -	vcpu_e500->mas7 = 0;
>   }
>
>   static inline void kvmppc_e500_setup_stlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
> -					   struct tlbe *gtlbe, int tsize,
> -					   struct tlbe_priv *priv,
> -					   u64 gvaddr, struct tlbe *stlbe)
> +					   struct kvm_fsl_booke_tlb_entry *gtlbe,
> +					   int tsize, struct tlbe_priv *priv,
> +					   u64 gvaddr,
> +					   struct kvm_fsl_booke_tlb_entry *stlbe)
>   {
>   	pfn_t pfn = priv->pfn;
>   	unsigned int stid;
> @@ -488,16 +528,14 @@ static inline void kvmppc_e500_setup_stlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
>   	stlbe->mas2 = (gvaddr&  MAS2_EPN)
>   		| e500_shadow_mas2_attrib(gtlbe->mas2,
>   				vcpu_e500->vcpu.arch.shared->msr&  MSR_PR);
> -	stlbe->mas3 = ((pfn<<  PAGE_SHIFT)&  MAS3_RPN)
> -		| e500_shadow_mas3_attrib(gtlbe->mas3,
> +	stlbe->mas7_3 = ((u64)pfn<<  PAGE_SHIFT)
> +		| e500_shadow_mas3_attrib(gtlbe->mas7_3,
>   				vcpu_e500->vcpu.arch.shared->msr&  MSR_PR);
> -	stlbe->mas7 = (pfn>>  (32 - PAGE_SHIFT))&  MAS7_RPN;
>   }
>
> -
>   static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
> -	u64 gvaddr, gfn_t gfn, struct tlbe *gtlbe, int tlbsel, int esel,
> -	struct tlbe *stlbe)
> +	u64 gvaddr, gfn_t gfn, struct kvm_fsl_booke_tlb_entry *gtlbe,
> +	int tlbsel, int esel, struct kvm_fsl_booke_tlb_entry *stlbe)
>   {
>   	struct kvm_memory_slot *slot;
>   	unsigned long pfn, hva;
> @@ -609,11 +647,11 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
>
>   /* XXX only map the one-one case, for now use TLB0 */
>   static int kvmppc_e500_tlb0_map(struct kvmppc_vcpu_e500 *vcpu_e500,
> -				int esel, struct tlbe *stlbe)
> +				int esel, struct kvm_fsl_booke_tlb_entry *stlbe)
>   {
> -	struct tlbe *gtlbe;
> +	struct kvm_fsl_booke_tlb_entry *gtlbe;
>
> -	gtlbe =&vcpu_e500->gtlb_arch[0][esel];
> +	gtlbe = get_entry(vcpu_e500, 0, esel);
>
>   	kvmppc_e500_shadow_map(vcpu_e500, get_tlb_eaddr(gtlbe),
>   			get_tlb_raddr(gtlbe)>>  PAGE_SHIFT,
> @@ -626,7 +664,8 @@ static int kvmppc_e500_tlb0_map(struct kvmppc_vcpu_e500 *vcpu_e500,
>    * the shadow TLB. */
>   /* XXX for both one-one and one-to-many , for now use TLB1 */
>   static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500,
> -		u64 gvaddr, gfn_t gfn, struct tlbe *gtlbe, struct tlbe *stlbe)
> +		u64 gvaddr, gfn_t gfn, struct kvm_fsl_booke_tlb_entry *gtlbe,
> +		struct kvm_fsl_booke_tlb_entry *stlbe)
>   {
>   	unsigned int victim;
>
> @@ -652,7 +691,8 @@ static inline int kvmppc_e500_gtlbe_invalidate(
>   				struct kvmppc_vcpu_e500 *vcpu_e500,
>   				int tlbsel, int esel)
>   {
> -	struct tlbe *gtlbe =&vcpu_e500->gtlb_arch[tlbsel][esel];
> +	struct kvm_fsl_booke_tlb_entry *gtlbe > +		get_entry(vcpu_e500, tlbsel, esel);
>
>   	if (unlikely(get_tlb_iprot(gtlbe)))
>   		return -1;
> @@ -715,18 +755,17 @@ int kvmppc_e500_emul_tlbre(struct kvm_vcpu *vcpu)
>   {
>   	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
>   	int tlbsel, esel;
> -	struct tlbe *gtlbe;
> +	struct kvm_fsl_booke_tlb_entry *gtlbe;
>
>   	tlbsel = get_tlb_tlbsel(vcpu_e500);
>   	esel = get_tlb_esel(vcpu_e500, tlbsel);
>
> -	gtlbe =&vcpu_e500->gtlb_arch[tlbsel][esel];
> +	gtlbe = get_entry(vcpu_e500, tlbsel, esel);
>   	vcpu_e500->mas0&= ~MAS0_NV(~0);
>   	vcpu_e500->mas0 |= MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]);
>   	vcpu_e500->mas1 = gtlbe->mas1;
>   	vcpu_e500->mas2 = gtlbe->mas2;
> -	vcpu_e500->mas3 = gtlbe->mas3;
> -	vcpu_e500->mas7 = gtlbe->mas7;
> +	vcpu_e500->mas7_3 = gtlbe->mas7_3;
>
>   	return EMULATE_DONE;
>   }
> @@ -737,7 +776,7 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb)
>   	int as = !!get_cur_sas(vcpu_e500);
>   	unsigned int pid = get_cur_spid(vcpu_e500);
>   	int esel, tlbsel;
> -	struct tlbe *gtlbe = NULL;
> +	struct kvm_fsl_booke_tlb_entry *gtlbe = NULL;
>   	gva_t ea;
>
>   	ea = kvmppc_get_gpr(vcpu, rb);
> @@ -745,7 +784,7 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb)
>   	for (tlbsel = 0; tlbsel<  2; tlbsel++) {
>   		esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, as);
>   		if (esel>= 0) {
> -			gtlbe =&vcpu_e500->gtlb_arch[tlbsel][esel];
> +			gtlbe = get_entry(vcpu_e500, tlbsel, esel);
>   			break;
>   		}
>   	}
> @@ -755,8 +794,7 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb)
>   			| MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]);
>   		vcpu_e500->mas1 = gtlbe->mas1;
>   		vcpu_e500->mas2 = gtlbe->mas2;
> -		vcpu_e500->mas3 = gtlbe->mas3;
> -		vcpu_e500->mas7 = gtlbe->mas7;
> +		vcpu_e500->mas7_3 = gtlbe->mas7_3;
>   	} else {
>   		int victim;
>
> @@ -771,8 +809,7 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb)
>   			| (vcpu_e500->mas4&  MAS4_TSIZED(~0));
>   		vcpu_e500->mas2&= MAS2_EPN;
>   		vcpu_e500->mas2 |= vcpu_e500->mas4&  MAS2_ATTRIB_MASK;
> -		vcpu_e500->mas3&= MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3;
> -		vcpu_e500->mas7 = 0;
> +		vcpu_e500->mas7_3&= MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3;
>   	}
>
>   	kvmppc_set_exit_type(vcpu, EMULATED_TLBSX_EXITS);
> @@ -782,28 +819,27 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb)
>   int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
>   {
>   	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
> -	struct tlbe *gtlbe;
> +	struct kvm_fsl_booke_tlb_entry *gtlbe;
>   	int tlbsel, esel;
>
>   	tlbsel = get_tlb_tlbsel(vcpu_e500);
>   	esel = get_tlb_esel(vcpu_e500, tlbsel);
>
> -	gtlbe =&vcpu_e500->gtlb_arch[tlbsel][esel];
> +	gtlbe = get_entry(vcpu_e500, tlbsel, esel);
>
>   	if (get_tlb_v(gtlbe))
>   		kvmppc_e500_stlbe_invalidate(vcpu_e500, tlbsel, esel);
>
>   	gtlbe->mas1 = vcpu_e500->mas1;
>   	gtlbe->mas2 = vcpu_e500->mas2;
> -	gtlbe->mas3 = vcpu_e500->mas3;
> -	gtlbe->mas7 = vcpu_e500->mas7;
> +	gtlbe->mas7_3 = vcpu_e500->mas7_3;
>
>   	trace_kvm_gtlb_write(vcpu_e500->mas0, gtlbe->mas1, gtlbe->mas2,
> -			     gtlbe->mas3, gtlbe->mas7);
> +			     (u32)gtlbe->mas7_3, (u32)(gtlbe->mas7_3>>  32));
>
>   	/* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */
>   	if (tlbe_is_host_safe(vcpu, gtlbe)) {
> -		struct tlbe stlbe;
> +		struct kvm_fsl_booke_tlb_entry stlbe;
>   		int stlbsel, sesel;
>   		u64 eaddr;
>   		u64 raddr;
> @@ -877,9 +913,11 @@ gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int index,
>   			gva_t eaddr)
>   {
>   	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
> -	struct tlbe *gtlbe > -		&vcpu_e500->gtlb_arch[tlbsel_of(index)][esel_of(index)];
> -	u64 pgmask = get_tlb_bytes(gtlbe) - 1;
> +	struct kvm_fsl_booke_tlb_entry *gtlbe;
> +	u64 pgmask;
> +
> +	gtlbe = get_entry(vcpu_e500, tlbsel_of(index), esel_of(index));
> +	pgmask = get_tlb_bytes(gtlbe) - 1;
>
>   	return get_tlb_raddr(gtlbe) | (eaddr&  pgmask);
>   }
> @@ -893,12 +931,12 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr,
>   {
>   	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
>   	struct tlbe_priv *priv;
> -	struct tlbe *gtlbe, stlbe;
> +	struct kvm_fsl_booke_tlb_entry *gtlbe, stlbe;
>   	int tlbsel = tlbsel_of(index);
>   	int esel = esel_of(index);
>   	int stlbsel, sesel;
>
> -	gtlbe =&vcpu_e500->gtlb_arch[tlbsel][esel];
> +	gtlbe = get_entry(vcpu_e500, tlbsel, esel);
>
>   	preempt_disable();
>   	switch (tlbsel) {
> @@ -956,51 +994,229 @@ void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid)
>
>   void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500)
>   {
> -	struct tlbe *tlbe;
> +	struct kvm_fsl_booke_tlb_entry *tlbe;
>
>   	/* Insert large initial mapping for guest. */
> -	tlbe =&vcpu_e500->gtlb_arch[1][0];
> +	tlbe = get_entry(vcpu_e500, 1, 0);
>   	tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_256M);
>   	tlbe->mas2 = 0;
> -	tlbe->mas3 = E500_TLB_SUPER_PERM_MASK;
> -	tlbe->mas7 = 0;
> +	tlbe->mas7_3 = E500_TLB_SUPER_PERM_MASK;
>
>   	/* 4K map for serial output. Used by kernel wrapper. */

Which kernel wrapper? At the end of the day, these initial TLB values 
should be pushed down from Qemu btw. They really don't belong here. A 
different guest (u-boot for example) might need completely different 
mappings.

> -	tlbe =&vcpu_e500->gtlb_arch[1][1];
> +	tlbe = get_entry(vcpu_e500, 1, 1);
>   	tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_4K);
>   	tlbe->mas2 = (0xe0004500&  0xFFFFF000) | MAS2_I | MAS2_G;
> -	tlbe->mas3 = (0xe0004500&  0xFFFFF000) | E500_TLB_SUPER_PERM_MASK;
> -	tlbe->mas7 = 0;
> +	tlbe->mas7_3 = (0xe0004500&  0xFFFFF000) | E500_TLB_SUPER_PERM_MASK;
> +}
> +
> +static void clear_tlb_privs(struct kvmppc_vcpu_e500 *vcpu_e500)
> +{
> +	int stlbsel, i;
> +
> +	for (stlbsel = 0; stlbsel<  2; stlbsel++) {
> +		for (i = 0; i<  vcpu_e500->gtlb_size[stlbsel]; i++) {
> +			struct tlbe_priv *priv > +				&vcpu_e500->gtlb_priv[stlbsel][i];
> +			kvmppc_e500_priv_release(priv);
> +		}
> +	}
> +}
> +
> +static void free_gtlb(struct kvmppc_vcpu_e500 *vcpu_e500)
> +{
> +	int i;
> +
> +	clear_tlb_privs(vcpu_e500);
> +
> +	kfree(vcpu_e500->gtlb_priv[0]);
> +	kfree(vcpu_e500->gtlb_priv[1]);
> +
> +	if (vcpu_e500->shared_tlb_pages) {
> +		vfree((void *)(round_down((uintptr_t)vcpu_e500->gtlb_arch,
> +					  PAGE_SIZE)));
> +
> +		for (i = 0; i<  vcpu_e500->num_shared_tlb_pages; i++)
> +			put_page(vcpu_e500->shared_tlb_pages[i]);
> +
> +		vcpu_e500->num_shared_tlb_pages = 0;
> +		vcpu_e500->shared_tlb_pages = NULL;
> +	} else {
> +		kfree(vcpu_e500->gtlb_arch);
> +	}
> +
> +	vcpu_e500->gtlb_arch = NULL;
> +}
> +
> +int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
> +			      struct kvm_config_tlb *cfg)
> +{
> +	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
> +	struct kvm_fsl_booke_tlb_params params;
> +	char *virt;
> +	struct page **pages;
> +	struct tlbe_priv *privs[2] = {};
> +	size_t array_len;
> +	u32 sets;
> +	int num_pages, ret, i;
> +
> +	if (cfg->mmu_type != KVM_MMU_FSL_BOOKE_NOHV)
> +		return -EINVAL;
> +
> +	if (copy_from_user(&params, (void __user *)(uintptr_t)cfg->params,
> +			   sizeof(params)))
> +		return -EFAULT;
> +
> +	if (params.tlb_sizes[1]>  64)
> +		return -EINVAL;
> +	if (params.tlb_sizes[2] != 0 || params.tlb_sizes[3] != 0)
> +		return -EINVAL;
> +	if (params.tlb_ways[1] != 0 || params.tlb_ways[2] != 0 |

Hrm - I always thought that basically the TLB ID would be:

   (some bits of the EA) || (ESEL)

So in the TLB1 case, "some bits" would mean "no bits" and ESEL is all of 
the information we get. And ESEL is ways, no?


Alex


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 13/13] KVM: PPC: e500: MMU API
  2011-05-17 23:42 [PATCH 13/13] KVM: PPC: e500: MMU API Scott Wood
  2011-05-19 14:10 ` Alexander Graf
@ 2011-05-19 16:28 ` Scott Wood
  2011-05-19 17:37 ` Alexander Graf
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Scott Wood @ 2011-05-19 16:28 UTC (permalink / raw)
  To: kvm-ppc

On Thu, 19 May 2011 16:10:48 +0200
Alexander Graf <agraf@suse.de> wrote:

> On 05/18/2011 01:42 AM, Scott Wood wrote:
> > +For mmu types KVM_MMU_FSL_BOOKE_NOHV and KVM_MMU_FSL_BOOKE_HV:
> 
> Can we rename those KVM_MMU_BOOKE_206_NOHV and KVM_MMU_BOOKE_206_HV? 
> Same for the function names. It's just easier to read and more accurate. 
> Also I don't like how it says KVM_MMU_FSL :). It's really a booke interface.

That's more or less how I had it initially.  In our earlier discussion I
changed it to FSL because of assumptions about TLB layout, such as the set
hash:

http://www.spinics.net/lists/kvm-ppc/msg02641.html
http://www.spinics.net/lists/kvm-ppc/msg02645.html

> > + - The "params" field is of type "struct kvm_fsl_booke_tlb_params".
> 
> I suppose params is a pointer?

Yes.

> > + - The "array" field points to an array of type "struct
> > +   kvm_fsl_booke_tlb_entry".
> > + - The array consists of all entries in the first TLB, followed by all
> > +   entries in the second TLB.
> > + - Within TLB0 entries are ordered first by increasing set number.  Within a
> > +   set, entries are ordered by way (increasing ESEL).
> > +   The hash for determining set number is: (MAS2>>  12)&  (num_sets - 1)
> > +   where "num_sets" is the tlb_sizes[] value divided by the tlb_ways[] value.
> > + - Within TLB1, entries are ordered by increasing ESEL.
> 
> Don't special-case it. TLB1 basically contains of only a single set. The 
> MMU interface here shouldn't be too e500 specific :).

How do I define the hash for determining set number without being
e500-specific?

> > diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h
> > index d2ca5ed..2419be2 100644
> > --- a/arch/powerpc/include/asm/kvm.h
> > +++ b/arch/powerpc/include/asm/kvm.h
> > @@ -272,4 +272,39 @@ struct kvm_guest_debug_arch {
> >   #define KVM_INTERRUPT_UNSET	-2U
> >   #define KVM_INTERRUPT_SET_LEVEL	-3U
> >
> > +struct kvm_fsl_booke_tlb_entry {
> > +	__u32 mas8;
> > +	__u32 mas1;
> > +	__u64 mas2;
> > +	__u64 mas7_3;
> 
> Is there a good reason to not include the other MAS registers as defined 
> by the architecture? That'd make the interface easily extensible for the 
> future and should only eat a few additional bytes, no?

Those registers aren't part of a TLB entry, they're used for identifying
the slot or searching.

> > +struct kvm_fsl_booke_tlb_params {
> > +	/*
> > +	 * book3e defines 4 TLBs, but current FSL Book E chips implement
> > +	 * only the first two.  The second two entries in tlb_sizes[]
> > +	 * and tlb_ways[] are reserved and must be zero.
> 
> I don't think that part is necessary. It's really an implementation detail.

It is not an implementation detail.  KVM on FSL Book E does not support
TLB2 or TLB3.  I had more generic wording when it was not FSL specific,
where the supported TLBs were determined by reading the TLB config
registers.

> > +	 *
> > +	 * A tlb_ways value of zero means the array is fully associative.
> > +	 * Only TLB0 may be configured with a different associativity.  The
> > +	 * number of ways of TLB0 must be a power of two between 2 and 16.
> > +	 *
> > +	 * The size of TLB0 must be a multiple of the number of ways, and
> > +	 * the number of sets must be a power of two.
> > +	 *
> > +	 * The size of TLB1 may not exceed 64 entries.
> 
> Why not?

I wanted to set some limit, at least as long as we process TLB1 linearly
rather than use some fancy data structure.  64 is the most we currently
have in hardware, and I don't think we have any plans to increase that
while maintaining full associativity.

If we need to change it later we can -- either with a capability, or by
having qemu iterate to find the max.

> > +	 * KVM will adjust TLBnCFG based on the sizes configured here,
> > +	 * though arrays greater than 2048 entries will have TLBnCFG[NENTRY]
> > +	 * set to zero.
> > +	 *
> > +	 * TLB0 supports 4 KiB pages.
> > +	 * The page sizes supported by TLB1 are as indicated by
> > +	 * TLB1CFG (if MMUCFG[MAVN] = 0) or TLB1PS (if MMUCFG[MAVN] = 1).
> 
> This is not part of the interface, is it?

It provides a way for qemu to discover what page sizes KVM will deal with.

> > @@ -956,51 +994,229 @@ void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid)
> >
> >   void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500)
> >   {
> > -	struct tlbe *tlbe;
> > +	struct kvm_fsl_booke_tlb_entry *tlbe;
> >
> >   	/* Insert large initial mapping for guest. */
> > -	tlbe =&vcpu_e500->gtlb_arch[1][0];
> > +	tlbe = get_entry(vcpu_e500, 1, 0);
> >   	tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_256M);
> >   	tlbe->mas2 = 0;
> > -	tlbe->mas3 = E500_TLB_SUPER_PERM_MASK;
> > -	tlbe->mas7 = 0;
> > +	tlbe->mas7_3 = E500_TLB_SUPER_PERM_MASK;
> >
> >   	/* 4K map for serial output. Used by kernel wrapper. */
> 
> Which kernel wrapper? At the end of the day, these initial TLB values 
> should be pushed down from Qemu btw. They really don't belong here. A 
> different guest (u-boot for example) might need completely different 
> mappings.

That was not added by this patchset...  and yes, the plan is for qemu to
set the inital TLB using the MMU interface.  The entries created here are
just for legacy qemu.

> > +
> > +int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
> > +			      struct kvm_config_tlb *cfg)
> > +{
> > +	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
> > +	struct kvm_fsl_booke_tlb_params params;
> > +	char *virt;
> > +	struct page **pages;
> > +	struct tlbe_priv *privs[2] = {};
> > +	size_t array_len;
> > +	u32 sets;
> > +	int num_pages, ret, i;
> > +
> > +	if (cfg->mmu_type != KVM_MMU_FSL_BOOKE_NOHV)
> > +		return -EINVAL;
> > +
> > +	if (copy_from_user(&params, (void __user *)(uintptr_t)cfg->params,
> > +			   sizeof(params)))
> > +		return -EFAULT;
> > +
> > +	if (params.tlb_sizes[1]>  64)
> > +		return -EINVAL;
> > +	if (params.tlb_sizes[2] != 0 || params.tlb_sizes[3] != 0)
> > +		return -EINVAL;
> > +	if (params.tlb_ways[1] != 0 || params.tlb_ways[2] != 0 |
> 
> Hrm - I always thought that basically the TLB ID would be:
> 
>    (some bits of the EA) || (ESEL)
> 
> So in the TLB1 case, "some bits" would mean "no bits" and ESEL is all of 
> the information we get. And ESEL is ways, no?

TLBnCFG allows ASSOC to be either zero or equal to NENTRY to indicate
full associativity.  In this API I chose just one way of representing it
for simplicity.  I can pick ASSOC = NENTRY instead if you prefer.

-Scott


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 13/13] KVM: PPC: e500: MMU API
  2011-05-17 23:42 [PATCH 13/13] KVM: PPC: e500: MMU API Scott Wood
  2011-05-19 14:10 ` Alexander Graf
  2011-05-19 16:28 ` Scott Wood
@ 2011-05-19 17:37 ` Alexander Graf
  2011-05-19 18:12 ` Scott Wood
  2011-06-02 23:17 ` Scott Wood
  4 siblings, 0 replies; 6+ messages in thread
From: Alexander Graf @ 2011-05-19 17:37 UTC (permalink / raw)
  To: kvm-ppc


On 19.05.2011, at 18:28, Scott Wood wrote:

> On Thu, 19 May 2011 16:10:48 +0200
> Alexander Graf <agraf@suse.de> wrote:
> 
>> On 05/18/2011 01:42 AM, Scott Wood wrote:
>>> +For mmu types KVM_MMU_FSL_BOOKE_NOHV and KVM_MMU_FSL_BOOKE_HV:
>> 
>> Can we rename those KVM_MMU_BOOKE_206_NOHV and KVM_MMU_BOOKE_206_HV? 
>> Same for the function names. It's just easier to read and more accurate. 
>> Also I don't like how it says KVM_MMU_FSL :). It's really a booke interface.
> 
> That's more or less how I had it initially.  In our earlier discussion I
> changed it to FSL because of assumptions about TLB layout, such as the set
> hash:
> 
> http://www.spinics.net/lists/kvm-ppc/msg02641.html
> http://www.spinics.net/lists/kvm-ppc/msg02645.html

Ah, that discussion was about calling it book3e, and that name is too generic. If the interface could satisfy any book3e 2.06 MMU, then calling it BOOKE_206 would be just fine. If it can only handle e500, call it FSL or E500 - whichever fits you better. I'd prefer if the interface was clever enough to also be used by non-FSL 2.06 implementations, in case any of those follow, if it's not too much additional overhead.

> 
>>> + - The "params" field is of type "struct kvm_fsl_booke_tlb_params".
>> 
>> I suppose params is a pointer?
> 
> Yes.
> 
>>> + - The "array" field points to an array of type "struct
>>> +   kvm_fsl_booke_tlb_entry".
>>> + - The array consists of all entries in the first TLB, followed by all
>>> +   entries in the second TLB.
>>> + - Within TLB0 entries are ordered first by increasing set number.  Within a
>>> +   set, entries are ordered by way (increasing ESEL).
>>> +   The hash for determining set number is: (MAS2>>  12)&  (num_sets - 1)
>>> +   where "num_sets" is the tlb_sizes[] value divided by the tlb_ways[] value.
>>> + - Within TLB1, entries are ordered by increasing ESEL.
>> 
>> Don't special-case it. TLB1 basically contains of only a single set. The 
>> MMU interface here shouldn't be too e500 specific :).
> 
> How do I define the hash for determining set number without being
> e500-specific?

Hm. Isn't the hash always the way you put it above? Maybe I misread the spec :). The only obvious thing it states there are the corner cases O_o.

> 
>>> diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h
>>> index d2ca5ed..2419be2 100644
>>> --- a/arch/powerpc/include/asm/kvm.h
>>> +++ b/arch/powerpc/include/asm/kvm.h
>>> @@ -272,4 +272,39 @@ struct kvm_guest_debug_arch {
>>>  #define KVM_INTERRUPT_UNSET	-2U
>>>  #define KVM_INTERRUPT_SET_LEVEL	-3U
>>> 
>>> +struct kvm_fsl_booke_tlb_entry {
>>> +	__u32 mas8;
>>> +	__u32 mas1;
>>> +	__u64 mas2;
>>> +	__u64 mas7_3;
>> 
>> Is there a good reason to not include the other MAS registers as defined 
>> by the architecture? That'd make the interface easily extensible for the 
>> future and should only eat a few additional bytes, no?
> 
> Those registers aren't part of a TLB entry, they're used for identifying
> the slot or searching.

Ah, yeah :). Sorry for the fuss.

> 
>>> +struct kvm_fsl_booke_tlb_params {
>>> +	/*
>>> +	 * book3e defines 4 TLBs, but current FSL Book E chips implement
>>> +	 * only the first two.  The second two entries in tlb_sizes[]
>>> +	 * and tlb_ways[] are reserved and must be zero.
>> 
>> I don't think that part is necessary. It's really an implementation detail.
> 
> It is not an implementation detail.  KVM on FSL Book E does not support
> TLB2 or TLB3.  I had more generic wording when it was not FSL specific,
> where the supported TLBs were determined by reading the TLB config
> registers.

Either the struct is 100% e500 specific, then you only need 2 TLBs in there, or it's generic, then the actual implementation of TLB2 and TLB3 are open and as of yet unimplemented, but not reserved for eternity :).

> 
>>> +	 *
>>> +	 * A tlb_ways value of zero means the array is fully associative.
>>> +	 * Only TLB0 may be configured with a different associativity.  The
>>> +	 * number of ways of TLB0 must be a power of two between 2 and 16.
>>> +	 *
>>> +	 * The size of TLB0 must be a multiple of the number of ways, and
>>> +	 * the number of sets must be a power of two.
>>> +	 *
>>> +	 * The size of TLB1 may not exceed 64 entries.
>> 
>> Why not?
> 
> I wanted to set some limit, at least as long as we process TLB1 linearly
> rather than use some fancy data structure.  64 is the most we currently
> have in hardware, and I don't think we have any plans to increase that
> while maintaining full associativity.
> 
> If we need to change it later we can -- either with a capability, or by
> having qemu iterate to find the max.

Ok :).

> 
>>> +	 * KVM will adjust TLBnCFG based on the sizes configured here,
>>> +	 * though arrays greater than 2048 entries will have TLBnCFG[NENTRY]
>>> +	 * set to zero.
>>> +	 *
>>> +	 * TLB0 supports 4 KiB pages.
>>> +	 * The page sizes supported by TLB1 are as indicated by
>>> +	 * TLB1CFG (if MMUCFG[MAVN] = 0) or TLB1PS (if MMUCFG[MAVN] = 1).
>> 
>> This is not part of the interface, is it?
> 
> It provides a way for qemu to discover what page sizes KVM will deal with.

Hrm. So where does it get MMUCFG from?

> 
>>> @@ -956,51 +994,229 @@ void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid)
>>> 
>>>  void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500)
>>>  {
>>> -	struct tlbe *tlbe;
>>> +	struct kvm_fsl_booke_tlb_entry *tlbe;
>>> 
>>>  	/* Insert large initial mapping for guest. */
>>> -	tlbe =&vcpu_e500->gtlb_arch[1][0];
>>> +	tlbe = get_entry(vcpu_e500, 1, 0);
>>>  	tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_256M);
>>>  	tlbe->mas2 = 0;
>>> -	tlbe->mas3 = E500_TLB_SUPER_PERM_MASK;
>>> -	tlbe->mas7 = 0;
>>> +	tlbe->mas7_3 = E500_TLB_SUPER_PERM_MASK;
>>> 
>>>  	/* 4K map for serial output. Used by kernel wrapper. */
>> 
>> Which kernel wrapper? At the end of the day, these initial TLB values 
>> should be pushed down from Qemu btw. They really don't belong here. A 
>> different guest (u-boot for example) might need completely different 
>> mappings.
> 
> That was not added by this patchset...  and yes, the plan is for qemu to
> set the inital TLB using the MMU interface.  The entries created here are
> just for legacy qemu.

It just came to mind when looking through the patch - of course the code needs to stay for old user space, but I wanted to make sure we're on the same page wrt where we're going with TLB inits :).

> 
>>> +
>>> +int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
>>> +			      struct kvm_config_tlb *cfg)
>>> +{
>>> +	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
>>> +	struct kvm_fsl_booke_tlb_params params;
>>> +	char *virt;
>>> +	struct page **pages;
>>> +	struct tlbe_priv *privs[2] = {};
>>> +	size_t array_len;
>>> +	u32 sets;
>>> +	int num_pages, ret, i;
>>> +
>>> +	if (cfg->mmu_type != KVM_MMU_FSL_BOOKE_NOHV)
>>> +		return -EINVAL;
>>> +
>>> +	if (copy_from_user(&params, (void __user *)(uintptr_t)cfg->params,
>>> +			   sizeof(params)))
>>> +		return -EFAULT;
>>> +
>>> +	if (params.tlb_sizes[1]>  64)
>>> +		return -EINVAL;
>>> +	if (params.tlb_sizes[2] != 0 || params.tlb_sizes[3] != 0)
>>> +		return -EINVAL;
>>> +	if (params.tlb_ways[1] != 0 || params.tlb_ways[2] != 0 |
>> 
>> Hrm - I always thought that basically the TLB ID would be:
>> 
>>   (some bits of the EA) || (ESEL)
>> 
>> So in the TLB1 case, "some bits" would mean "no bits" and ESEL is all of 
>> the information we get. And ESEL is ways, no?
> 
> TLBnCFG allows ASSOC to be either zero or equal to NENTRY to indicate
> full associativity.  In this API I chose just one way of representing it
> for simplicity.  I can pick ASSOC = NENTRY instead if you prefer.

Ah, ok. I'd find it easier to understand if we just always stick to (EA) || (ESEL) :).


Alex


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 13/13] KVM: PPC: e500: MMU API
  2011-05-17 23:42 [PATCH 13/13] KVM: PPC: e500: MMU API Scott Wood
                   ` (2 preceding siblings ...)
  2011-05-19 17:37 ` Alexander Graf
@ 2011-05-19 18:12 ` Scott Wood
  2011-06-02 23:17 ` Scott Wood
  4 siblings, 0 replies; 6+ messages in thread
From: Scott Wood @ 2011-05-19 18:12 UTC (permalink / raw)
  To: kvm-ppc

On Thu, 19 May 2011 19:37:05 +0200
Alexander Graf <agraf@suse.de> wrote:

> 
> On 19.05.2011, at 18:28, Scott Wood wrote:
> 
> > On Thu, 19 May 2011 16:10:48 +0200
> > Alexander Graf <agraf@suse.de> wrote:
> > 
> >> On 05/18/2011 01:42 AM, Scott Wood wrote:
> >>> +For mmu types KVM_MMU_FSL_BOOKE_NOHV and KVM_MMU_FSL_BOOKE_HV:
> >> 
> >> Can we rename those KVM_MMU_BOOKE_206_NOHV and KVM_MMU_BOOKE_206_HV? 
> >> Same for the function names. It's just easier to read and more accurate. 
> >> Also I don't like how it says KVM_MMU_FSL :). It's really a booke interface.
> > 
> > That's more or less how I had it initially.  In our earlier discussion I
> > changed it to FSL because of assumptions about TLB layout, such as the set
> > hash:
> > 
> > http://www.spinics.net/lists/kvm-ppc/msg02641.html
> > http://www.spinics.net/lists/kvm-ppc/msg02645.html
> 
> Ah, that discussion was about calling it book3e, and that name is too generic. If the interface could satisfy any book3e 2.06 MMU, then calling it BOOKE_206 would be just fine.

OK, I was considering 2.06+ implicit in "book3e" versus "booke", as that
seems to be the naming scheme that other parts of the kernel are using, so
I misinterpreted the objection.

> If it can only handle e500, call it FSL or E500 - whichever fits you
> better. I'd prefer if the interface was clever enough to also be used by
> non-FSL 2.06 implementations, in case any of those follow, if it's not too
> much additional overhead.

I think the main difficulty in making it arbitrary book3e 2.06 is the set
hash.

Note that most of this can be reused with new MMU IDs that specify
different hash (or other) behavior.

> >>> + - The "array" field points to an array of type "struct
> >>> +   kvm_fsl_booke_tlb_entry".
> >>> + - The array consists of all entries in the first TLB, followed by all
> >>> +   entries in the second TLB.
> >>> + - Within TLB0 entries are ordered first by increasing set number.  Within a
> >>> +   set, entries are ordered by way (increasing ESEL).
> >>> +   The hash for determining set number is: (MAS2>>  12)&  (num_sets - 1)
> >>> +   where "num_sets" is the tlb_sizes[] value divided by the tlb_ways[] value.
> >>> + - Within TLB1, entries are ordered by increasing ESEL.
> >> 
> >> Don't special-case it. TLB1 basically contains of only a single set. The 
> >> MMU interface here shouldn't be too e500 specific :).
> > 
> > How do I define the hash for determining set number without being
> > e500-specific?
> 
> Hm. Isn't the hash always the way you put it above? Maybe I misread the spec :). The only obvious thing it states there are the corner cases O_o.

The ISA never defines the hash, it just says that ESEL selects from the
set of possible entries for a given MAS2[EPN].  MAS1{TID] and
MAS1[TSIZE] can also be used as hash inputs if HES is supported.  Using the
low bits of the page number is the obvious thing to do, but not the only
thing allowed.

> >>> +struct kvm_fsl_booke_tlb_params {
> >>> +	/*
> >>> +	 * book3e defines 4 TLBs, but current FSL Book E chips implement
> >>> +	 * only the first two.  The second two entries in tlb_sizes[]
> >>> +	 * and tlb_ways[] are reserved and must be zero.
> >> 
> >> I don't think that part is necessary. It's really an implementation detail.
> > 
> > It is not an implementation detail.  KVM on FSL Book E does not support
> > TLB2 or TLB3.  I had more generic wording when it was not FSL specific,
> > where the supported TLBs were determined by reading the TLB config
> > registers.
> 
> Either the struct is 100% e500 specific, then you only need 2 TLBs in there, or it's generic, then the actual implementation of TLB2 and TLB3 are open and as of yet unimplemented, but not reserved for eternity :).

Ah.  Well, perhaps this struct (and tlb_entry) should be book3e 2.06
even if the MMU ID is FSL-specific.

> >>> +	 * KVM will adjust TLBnCFG based on the sizes configured here,
> >>> +	 * though arrays greater than 2048 entries will have TLBnCFG[NENTRY]
> >>> +	 * set to zero.
> >>> +	 *
> >>> +	 * TLB0 supports 4 KiB pages.
> >>> +	 * The page sizes supported by TLB1 are as indicated by
> >>> +	 * TLB1CFG (if MMUCFG[MAVN] = 0) or TLB1PS (if MMUCFG[MAVN] = 1).
> >> 
> >> This is not part of the interface, is it?
> > 
> > It provides a way for qemu to discover what page sizes KVM will deal with.
> 
> Hrm. So where does it get MMUCFG from?

KVM_GET_SREGS

-Scott


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 13/13] KVM: PPC: e500: MMU API
  2011-05-17 23:42 [PATCH 13/13] KVM: PPC: e500: MMU API Scott Wood
                   ` (3 preceding siblings ...)
  2011-05-19 18:12 ` Scott Wood
@ 2011-06-02 23:17 ` Scott Wood
  4 siblings, 0 replies; 6+ messages in thread
From: Scott Wood @ 2011-06-02 23:17 UTC (permalink / raw)
  To: kvm-ppc

This implements a shared-memory API for giving host userspace access to
the guest's TLB.

Signed-off-by: Scott Wood <scottwood@freescale.com>
---
 Documentation/virtual/kvm/api.txt   |   84 +++++++-
 arch/powerpc/include/asm/kvm.h      |   35 +++
 arch/powerpc/include/asm/kvm_e500.h |   23 +-
 arch/powerpc/include/asm/kvm_ppc.h  |    7 +
 arch/powerpc/kvm/e500.c             |    5 +-
 arch/powerpc/kvm/e500_emulate.c     |   12 +-
 arch/powerpc/kvm/e500_tlb.c         |  418 ++++++++++++++++++++++++++---------
 arch/powerpc/kvm/e500_tlb.h         |   55 ++---
 arch/powerpc/kvm/powerpc.c          |   28 +++
 include/linux/kvm.h                 |   19 ++
 10 files changed, 517 insertions(+), 169 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 3755a39..08ad12c 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1268,7 +1268,7 @@ struct kvm_assigned_msix_entry {
 	__u16 padding[3];
 };
 
-4.54 KVM_SET_TSC_KHZ
+4.55 KVM_SET_TSC_KHZ
 
 Capability: KVM_CAP_TSC_CONTROL
 Architectures: x86
@@ -1279,7 +1279,7 @@ Returns: 0 on success, -1 on error
 Specifies the tsc frequency for the virtual machine. The unit of the
 frequency is KHz.
 
-4.55 KVM_GET_TSC_KHZ
+4.56 KVM_GET_TSC_KHZ
 
 Capability: KVM_CAP_GET_TSC_KHZ
 Architectures: x86
@@ -1291,7 +1291,7 @@ Returns the tsc frequency of the guest. The unit of the return value is
 KHz. If the host has unstable tsc this ioctl returns -EIO instead as an
 error.
 
-4.56 KVM_GET_LAPIC
+4.57 KVM_GET_LAPIC
 
 Capability: KVM_CAP_IRQCHIP
 Architectures: x86
@@ -1307,7 +1307,7 @@ struct kvm_lapic_state {
 Reads the Local APIC registers and copies them into the input argument.  The
 data format and layout are the same as documented in the architecture manual.
 
-4.57 KVM_SET_LAPIC
+4.58 KVM_SET_LAPIC
 
 Capability: KVM_CAP_IRQCHIP
 Architectures: x86
@@ -1323,7 +1323,7 @@ struct kvm_lapic_state {
 Copies the input argument into the the Local APIC registers.  The data format
 and layout are the same as documented in the architecture manual.
 
-4.56 KVM_IOEVENTFD
+4.59 KVM_IOEVENTFD
 
 Capability: KVM_CAP_IOEVENTFD
 Architectures: all
@@ -1353,6 +1353,80 @@ The following flags are defined:
 If datamatch flag is set, the event will be signaled only if the written value
 to the registered address is equal to datamatch in struct kvm_ioeventfd.
 
+4.60 KVM_CONFIG_TLB
+
+Capability: KVM_CAP_SW_TLB
+Architectures: ppc
+Type: vcpu ioctl
+Parameters: struct kvm_config_tlb (in)
+Returns: 0 on success, -1 on error
+
+struct kvm_config_tlb {
+	__u64 params;
+	__u64 array;
+	__u32 mmu_type;
+	__u32 array_len;
+};
+
+Configures the virtual CPU's TLB array, establishing a shared memory area
+between userspace and KVM.  The "params" and "array" fields are userspace
+addresses of mmu-type-specific data structures.  The "array_len" field is an
+safety mechanism, and should be set to the size in bytes of the memory that
+userspace has reserved for the array.  It must be at least the size dictated
+by "mmu_type" and "params".
+
+While KVM_RUN is active, the shared region is under control of KVM.  Its
+contents are undefined, and any modification by userspace results in
+boundedly undefined behavior.
+
+On return from KVM_RUN, the shared region will reflect the current state of
+the guest's TLB.  If userspace makes any changes, it must call KVM_DIRTY_TLB
+to tell KVM which entries have been changed, prior to calling KVM_RUN again
+on this vcpu.
+
+For mmu types KVM_MMU_FSL_BOOKE_NOHV and KVM_MMU_FSL_BOOKE_HV:
+ - The "params" field is of type "struct kvm_book3e_206_tlb_params".
+ - The "array" field points to an array of type "struct
+   kvm_book3e_206_tlb_entry".
+ - The array consists of all entries in the first TLB, followed by all
+   entries in the second TLB.
+ - Within a TLB, entries are ordered first by increasing set number.  Within a
+   set, entries are ordered by way (increasing ESEL).
+ - The hash for determining set number in TLB0 is: (MAS2 >> 12) & (num_sets - 1)
+   where "num_sets" is the tlb_sizes[] value divided by the tlb_ways[] value.
+
+4.61 KVM_DIRTY_TLB
+
+Capability: KVM_CAP_SW_TLB
+Architectures: ppc
+Type: vcpu ioctl
+Parameters: struct kvm_dirty_tlb (in)
+Returns: 0 on success, -1 on error
+
+struct kvm_dirty_tlb {
+	__u64 bitmap;
+	__u32 num_dirty;
+};
+
+This must be called whenever userspace has changed an entry in the shared
+TLB, prior to calling KVM_RUN on the associated vcpu.
+
+The "bitmap" field is the userspace address of an array.  This array
+consists of a number of bits, equal to the total number of TLB entries as
+determined by the last successful call to KVM_CONFIG_TLB, rounded up to the
+nearest multiple of 64.
+
+Each bit corresponds to one TLB entry, ordered the same as in the shared TLB
+array.
+
+The array is little-endian: the bit 0 is the least significant bit of the
+first byte, bit 8 is the least significant bit of the second byte, etc.
+This avoids any complications with differing word sizes.
+
+The "num_dirty" field is a performance hint for KVM to determine whether it
+should skip processing the bitmap and just invalidate everything.  It must
+be set to the number of set bits in the bitmap.
+
 5. The kvm_run structure
 
 Application code obtains a pointer to the kvm_run structure by
diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h
index d2ca5ed..1a6dedf 100644
--- a/arch/powerpc/include/asm/kvm.h
+++ b/arch/powerpc/include/asm/kvm.h
@@ -272,4 +272,39 @@ struct kvm_guest_debug_arch {
 #define KVM_INTERRUPT_UNSET	-2U
 #define KVM_INTERRUPT_SET_LEVEL	-3U
 
+struct kvm_book3e_206_tlb_entry {
+	__u32 mas8;
+	__u32 mas1;
+	__u64 mas2;
+	__u64 mas7_3;
+};
+
+struct kvm_book3e_206_tlb_params {
+	/*
+	 * For mmu types KVM_MMU_FSL_BOOKE_NOHV and KVM_MMU_FSL_BOOKE_HV:
+	 *
+	 * - The number of ways of TLB0 must be a power of two between 2 and
+	 *   16.
+	 * - TLB1 must be fully associative.
+	 * - The size of TLB0 must be a multiple of the number of ways, and
+	 *   the number of sets must be a power of two.
+	 * - The size of TLB1 may not exceed 64 entries.
+	 * - TLB0 supports 4 KiB pages.
+	 * - The page sizes supported by TLB1 are as indicated by
+	 *   TLB1CFG (if MMUCFG[MAVN] = 0) or TLB1PS (if MMUCFG[MAVN] = 1)
+	 *   as returned by KVM_GET_SREGS.
+	 * - TLB2 and TLB3 are reserved, and their entries in tlb_sizes[]
+	 *   and tlb_ways[] must be zero.
+	 *
+	 * tlb_ways[n] = tlb_sizes[n] means the array is fully associative.
+	 *
+	 * KVM will adjust TLBnCFG based on the sizes configured here,
+	 * though arrays greater than 2048 entries will have TLBnCFG[NENTRY]
+	 * set to zero.
+	 */
+	__u32 tlb_sizes[4];
+	__u32 tlb_ways[4];
+	__u32 reserved[8];
+};
+
 #endif /* __LINUX_KVM_POWERPC_H */
diff --git a/arch/powerpc/include/asm/kvm_e500.h b/arch/powerpc/include/asm/kvm_e500.h
index adbfca9..060fabe 100644
--- a/arch/powerpc/include/asm/kvm_e500.h
+++ b/arch/powerpc/include/asm/kvm_e500.h
@@ -22,13 +22,6 @@
 #define E500_PID_NUM   3
 #define E500_TLB_NUM   2
 
-struct tlbe{
-	u32 mas1;
-	u32 mas2;
-	u32 mas3;
-	u32 mas7;
-};
-
 #define E500_TLB_VALID 1
 #define E500_TLB_DIRTY 2
 
@@ -40,8 +33,11 @@ struct tlbe_priv {
 struct vcpu_id_table;
 
 struct kvmppc_vcpu_e500 {
-	/* Unmodified copy of the guest's TLB. */
-	struct tlbe *gtlb_arch[E500_TLB_NUM];
+	/* Unmodified copy of the guest's TLB -- shared with host userspace. */
+	struct kvm_book3e_206_tlb_entry *gtlb_arch;
+
+	/* Starting entry number in gtlb_arch[] */
+	int gtlb_offset[E500_TLB_NUM];
 
 	/* KVM internal information associated with each guest TLB entry */
 	struct tlbe_priv *gtlb_priv[E500_TLB_NUM];
@@ -49,6 +45,9 @@ struct kvmppc_vcpu_e500 {
 	unsigned int gtlb_size[E500_TLB_NUM];
 	unsigned int gtlb_nv[E500_TLB_NUM];
 
+	unsigned int gtlb0_ways;
+	unsigned int gtlb0_sets;
+
 	u32 host_pid[E500_PID_NUM];
 	u32 pid[E500_PID_NUM];
 	u32 svr;
@@ -56,11 +55,10 @@ struct kvmppc_vcpu_e500 {
 	u32 mas0;
 	u32 mas1;
 	u32 mas2;
-	u32 mas3;
+	u64 mas7_3;
 	u32 mas4;
 	u32 mas5;
 	u32 mas6;
-	u32 mas7;
 
 	/* vcpu id table */
 	struct vcpu_id_table *idt;
@@ -73,6 +71,9 @@ struct kvmppc_vcpu_e500 {
 	u32 tlb1cfg;
 	u64 mcar;
 
+	struct page **shared_tlb_pages;
+	int num_shared_tlb_pages;
+
 	struct kvm_vcpu vcpu;
 };
 
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index c662f14..bb3d418 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -152,4 +152,11 @@ int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
 
 void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid);
 
+int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
+			      struct kvm_config_tlb *cfg);
+int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu,
+			     struct kvm_dirty_tlb *cfg);
+
+void kvmppc_core_heavy_exit(struct kvm_vcpu *vcpu);
+
 #endif /* __POWERPC_KVM_PPC_H__ */
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
index 43923c3..628f723 100644
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c
@@ -110,7 +110,7 @@ void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
 	sregs->u.e.mas0 = vcpu_e500->mas0;
 	sregs->u.e.mas1 = vcpu_e500->mas1;
 	sregs->u.e.mas2 = vcpu_e500->mas2;
-	sregs->u.e.mas7_3 = ((u64)vcpu_e500->mas7 << 32) | vcpu_e500->mas3;
+	sregs->u.e.mas7_3 = vcpu_e500->mas7_3;
 	sregs->u.e.mas4 = vcpu_e500->mas4;
 	sregs->u.e.mas6 = vcpu_e500->mas6;
 
@@ -143,8 +143,7 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
 		vcpu_e500->mas0 = sregs->u.e.mas0;
 		vcpu_e500->mas1 = sregs->u.e.mas1;
 		vcpu_e500->mas2 = sregs->u.e.mas2;
-		vcpu_e500->mas7 = sregs->u.e.mas7_3 >> 32;
-		vcpu_e500->mas3 = (u32)sregs->u.e.mas7_3;
+		vcpu_e500->mas7_3 = sregs->u.e.mas7_3;
 		vcpu_e500->mas4 = sregs->u.e.mas4;
 		vcpu_e500->mas6 = sregs->u.e.mas6;
 	}
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
index d48ae39..e0d3609 100644
--- a/arch/powerpc/kvm/e500_emulate.c
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -95,13 +95,17 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
 	case SPRN_MAS2:
 		vcpu_e500->mas2 = spr_val; break;
 	case SPRN_MAS3:
-		vcpu_e500->mas3 = spr_val; break;
+		vcpu_e500->mas7_3 &= ~(u64)0xffffffff;
+		vcpu_e500->mas7_3 |= spr_val;
+		break;
 	case SPRN_MAS4:
 		vcpu_e500->mas4 = spr_val; break;
 	case SPRN_MAS6:
 		vcpu_e500->mas6 = spr_val; break;
 	case SPRN_MAS7:
-		vcpu_e500->mas7 = spr_val; break;
+		vcpu_e500->mas7_3 &= (u64)0xffffffff;
+		vcpu_e500->mas7_3 |= (u64)spr_val << 32;
+		break;
 	case SPRN_L1CSR0:
 		vcpu_e500->l1csr0 = spr_val;
 		vcpu_e500->l1csr0 &= ~(L1CSR0_DCFI | L1CSR0_CLFC);
@@ -158,13 +162,13 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
 	case SPRN_MAS2:
 		kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas2); break;
 	case SPRN_MAS3:
-		kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas3); break;
+		kvmppc_set_gpr(vcpu, rt, (u32)vcpu_e500->mas7_3); break;
 	case SPRN_MAS4:
 		kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas4); break;
 	case SPRN_MAS6:
 		kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas6); break;
 	case SPRN_MAS7:
-		kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas7); break;
+		kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas7_3 >> 32); break;
 
 	case SPRN_TLB0CFG:
 		kvmppc_set_gpr(vcpu, rt, vcpu_e500->tlb0cfg); break;
diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index 88346cf..284763a 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -18,6 +18,11 @@
 #include <linux/kvm.h>
 #include <linux/kvm_host.h>
 #include <linux/highmem.h>
+#include <linux/log2.h>
+#include <linux/uaccess.h>
+#include <linux/sched.h>
+#include <linux/rwsem.h>
+#include <linux/vmalloc.h>
 #include <asm/kvm_ppc.h>
 #include <asm/kvm_e500.h>
 
@@ -65,6 +70,13 @@ static DEFINE_PER_CPU(unsigned long, pcpu_last_used_sid);
 
 static unsigned int tlb1_entry_num;
 
+static struct kvm_book3e_206_tlb_entry *
+get_entry(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel, int entry)
+{
+	int offset = vcpu_e500->gtlb_offset[tlbsel];
+	return &vcpu_e500->gtlb_arch[offset + entry];
+}
+
 /*
  * Allocate a free shadow id and setup a valid sid mapping in given entry.
  * A mapping is only valid when vcpu_id_table and pcpu_id_table are match.
@@ -221,20 +233,23 @@ void kvmppc_e500_recalc_shadow_pid(struct kvmppc_vcpu_e500 *vcpu_e500)
 void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu)
 {
 	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
-	struct tlbe *tlbe;
+	struct kvm_book3e_206_tlb_entry *tlbe;
 	int i, tlbsel;
 
-	printk("| %8s | %8s | %8s | %8s | %8s |\n",
-			"nr", "mas1", "mas2", "mas3", "mas7");
+	printk("| %8s | %8s | %16s | %16s |\n",
+	       "nr", "mas1", "mas2", "mas7_3");
 
 	for (tlbsel = 0; tlbsel < 2; tlbsel++) {
+		int offset = vcpu_e500->gtlb_offset[tlbsel];
+
 		printk("Guest TLB%d:\n", tlbsel);
 		for (i = 0; i < vcpu_e500->gtlb_size[tlbsel]; i++) {
-			tlbe = &vcpu_e500->gtlb_arch[tlbsel][i];
+			tlbe = &vcpu_e500->gtlb_arch[offset + i];
 			if (tlbe->mas1 & MAS1_VALID)
-				printk(" G[%d][%3d] |  %08X | %08X | %08X | %08X |\n",
-					tlbsel, i, tlbe->mas1, tlbe->mas2,
-					tlbe->mas3, tlbe->mas7);
+				printk(" G[%d][%3d] |  %08X | %016llX | %016llX |\n",
+				       tlbsel, i, tlbe->mas1,
+				       (unsigned long long)tlbe->mas2,
+				       (unsigned long long)tlbe->mas7_3);
 		}
 	}
 }
@@ -245,7 +260,7 @@ static inline unsigned int tlb0_get_next_victim(
 	unsigned int victim;
 
 	victim = vcpu_e500->gtlb_nv[0]++;
-	if (unlikely(vcpu_e500->gtlb_nv[0] >= KVM_E500_TLB0_WAY_NUM))
+	if (unlikely(vcpu_e500->gtlb_nv[0] >= vcpu_e500->gtlb0_ways))
 		vcpu_e500->gtlb_nv[0] = 0;
 
 	return victim;
@@ -257,9 +272,9 @@ static inline unsigned int tlb1_max_shadow_size(void)
 	return tlb1_entry_num - tlbcam_index - 1;
 }
 
-static inline int tlbe_is_writable(struct tlbe *tlbe)
+static inline int tlbe_is_writable(struct kvm_book3e_206_tlb_entry *tlbe)
 {
-	return tlbe->mas3 & (MAS3_SW|MAS3_UW);
+	return tlbe->mas7_3 & (MAS3_SW|MAS3_UW);
 }
 
 static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode)
@@ -290,40 +305,40 @@ static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode)
 /*
  * writing shadow tlb entry to host TLB
  */
-static inline void __write_host_tlbe(struct tlbe *stlbe, uint32_t mas0)
+static inline void __write_host_tlbe(struct kvm_book3e_206_tlb_entry *stlbe,
+				     uint32_t mas0)
 {
 	unsigned long flags;
 
 	local_irq_save(flags);
 	mtspr(SPRN_MAS0, mas0);
 	mtspr(SPRN_MAS1, stlbe->mas1);
-	mtspr(SPRN_MAS2, stlbe->mas2);
-	mtspr(SPRN_MAS3, stlbe->mas3);
-	mtspr(SPRN_MAS7, stlbe->mas7);
+	mtspr(SPRN_MAS2, (unsigned long)stlbe->mas2);
+	mtspr(SPRN_MAS3, (u32)stlbe->mas7_3);
+	mtspr(SPRN_MAS7, (u32)(stlbe->mas7_3 >> 32));
 	asm volatile("isync; tlbwe" : : : "memory");
 	local_irq_restore(flags);
 }
 
 static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
-		int tlbsel, int esel, struct tlbe *stlbe)
+		int tlbsel, int esel, struct kvm_book3e_206_tlb_entry *stlbe)
 {
 	if (tlbsel = 0) {
-		__write_host_tlbe(stlbe,
-				  MAS0_TLBSEL(0) |
-				  MAS0_ESEL(esel & (KVM_E500_TLB0_WAY_NUM - 1)));
+		int way = esel & (vcpu_e500->gtlb0_ways - 1);
+		__write_host_tlbe(stlbe, MAS0_TLBSEL(0) | MAS0_ESEL(way));
 	} else {
 		__write_host_tlbe(stlbe,
 				  MAS0_TLBSEL(1) |
 				  MAS0_ESEL(to_htlb1_esel(esel)));
 	}
 	trace_kvm_stlb_write(index_of(tlbsel, esel), stlbe->mas1, stlbe->mas2,
-			     stlbe->mas3, stlbe->mas7);
+			     (u32)stlbe->mas7_3, (u32)(stlbe->mas7_3 >> 32));
 }
 
 void kvmppc_map_magic(struct kvm_vcpu *vcpu)
 {
 	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
-	struct tlbe magic;
+	struct kvm_book3e_206_tlb_entry magic;
 	ulong shared_page = ((ulong)vcpu->arch.shared) & PAGE_MASK;
 	unsigned int stid;
 	pfn_t pfn;
@@ -337,9 +352,8 @@ void kvmppc_map_magic(struct kvm_vcpu *vcpu)
 	magic.mas1 = MAS1_VALID | MAS1_TS | MAS1_TID(stid) |
 		     MAS1_TSIZE(BOOK3E_PAGESZ_4K);
 	magic.mas2 = vcpu->arch.magic_page_ea | MAS2_M;
-	magic.mas3 = (pfn << PAGE_SHIFT) |
-		     MAS3_SW | MAS3_SR | MAS3_UW | MAS3_UR;
-	magic.mas7 = pfn >> (32 - PAGE_SHIFT);
+	magic.mas7_3 = ((u64)pfn << PAGE_SHIFT) |
+		       MAS3_SW | MAS3_SR | MAS3_UW | MAS3_UR;
 
 	__write_host_tlbe(&magic, MAS0_TLBSEL(1) | MAS0_ESEL(tlbcam_index));
 	preempt_enable();
@@ -360,7 +374,8 @@ void kvmppc_e500_tlb_put(struct kvm_vcpu *vcpu)
 static void kvmppc_e500_stlbe_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500,
 					 int tlbsel, int esel)
 {
-	struct tlbe *gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel];
+	struct kvm_book3e_206_tlb_entry *gtlbe +		get_entry(vcpu_e500, tlbsel, esel);
 	struct vcpu_id_table *idt = vcpu_e500->idt;
 	unsigned int pr, tid, ts, pid;
 	u32 val, eaddr;
@@ -414,25 +429,50 @@ static void kvmppc_e500_stlbe_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500,
 	preempt_enable();
 }
 
+static int tlb0_set_base(struct kvmppc_vcpu_e500 *vcpu_e500, gva_t addr)
+{
+	int set_base;
+
+	set_base = (addr >> PAGE_SHIFT) & (vcpu_e500->gtlb0_sets - 1);
+	set_base *= vcpu_e500->gtlb0_ways;
+
+	return set_base;
+}
+
+static int get_tlb_esel(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel)
+{
+	int esel = get_tlb_esel_bit(vcpu_e500);
+
+	if (tlbsel = 0) {
+		esel &= vcpu_e500->gtlb0_ways - 1;
+		esel += tlb0_set_base(vcpu_e500, vcpu_e500->mas2);
+	} else {
+		esel &= vcpu_e500->gtlb_size[1] - 1;
+	}
+
+	return esel;
+}
+
 /* Search the guest TLB for a matching entry. */
 static int kvmppc_e500_tlb_index(struct kvmppc_vcpu_e500 *vcpu_e500,
 		gva_t eaddr, int tlbsel, unsigned int pid, int as)
 {
 	int size = vcpu_e500->gtlb_size[tlbsel];
-	int set_base;
+	int set_base, offset;
 	int i;
 
 	if (tlbsel = 0) {
-		int mask = size / KVM_E500_TLB0_WAY_NUM - 1;
-		set_base = (eaddr >> PAGE_SHIFT) & mask;
-		set_base *= KVM_E500_TLB0_WAY_NUM;
-		size = KVM_E500_TLB0_WAY_NUM;
+		set_base = tlb0_set_base(vcpu_e500, eaddr);
+		size = vcpu_e500->gtlb0_ways;
 	} else {
 		set_base = 0;
 	}
 
+	offset = vcpu_e500->gtlb_offset[tlbsel];
+
 	for (i = 0; i < size; i++) {
-		struct tlbe *tlbe = &vcpu_e500->gtlb_arch[tlbsel][set_base + i];
+		struct kvm_book3e_206_tlb_entry *tlbe +			&vcpu_e500->gtlb_arch[offset + set_base + i];
 		unsigned int tid;
 
 		if (eaddr < get_tlb_eaddr(tlbe))
@@ -458,7 +498,7 @@ static int kvmppc_e500_tlb_index(struct kvmppc_vcpu_e500 *vcpu_e500,
 }
 
 static inline void kvmppc_e500_priv_setup(struct tlbe_priv *priv,
-					  struct tlbe *gtlbe,
+					  struct kvm_book3e_206_tlb_entry *gtlbe,
 					  pfn_t pfn)
 {
 	priv->pfn = pfn;
@@ -500,17 +540,17 @@ static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu,
 		| MAS1_TSIZE(tsized);
 	vcpu_e500->mas2 = (eaddr & MAS2_EPN)
 		| (vcpu_e500->mas4 & MAS2_ATTRIB_MASK);
-	vcpu_e500->mas3 &= MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3;
+	vcpu_e500->mas7_3 &= MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3;
 	vcpu_e500->mas6 = (vcpu_e500->mas6 & MAS6_SPID1)
 		| (get_cur_pid(vcpu) << 16)
 		| (as ? MAS6_SAS : 0);
-	vcpu_e500->mas7 = 0;
 }
 
 static inline void kvmppc_e500_setup_stlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
-					   struct tlbe *gtlbe, int tsize,
-					   struct tlbe_priv *priv,
-					   u64 gvaddr, struct tlbe *stlbe)
+					   struct kvm_book3e_206_tlb_entry *gtlbe,
+					   int tsize, struct tlbe_priv *priv,
+					   u64 gvaddr,
+					   struct kvm_book3e_206_tlb_entry *stlbe)
 {
 	pfn_t pfn = priv->pfn;
 	unsigned int stid;
@@ -525,16 +565,14 @@ static inline void kvmppc_e500_setup_stlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
 	stlbe->mas2 = (gvaddr & MAS2_EPN)
 		| e500_shadow_mas2_attrib(gtlbe->mas2,
 				vcpu_e500->vcpu.arch.shared->msr & MSR_PR);
-	stlbe->mas3 = ((pfn << PAGE_SHIFT) & MAS3_RPN)
-		| e500_shadow_mas3_attrib(gtlbe->mas3,
+	stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT)
+		| e500_shadow_mas3_attrib(gtlbe->mas7_3,
 				vcpu_e500->vcpu.arch.shared->msr & MSR_PR);
-	stlbe->mas7 = (pfn >> (32 - PAGE_SHIFT)) & MAS7_RPN;
 }
 
-
 static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
-	u64 gvaddr, gfn_t gfn, struct tlbe *gtlbe, int tlbsel, int esel,
-	struct tlbe *stlbe)
+	u64 gvaddr, gfn_t gfn, struct kvm_book3e_206_tlb_entry *gtlbe,
+	int tlbsel, int esel, struct kvm_book3e_206_tlb_entry *stlbe)
 {
 	struct kvm_memory_slot *slot;
 	unsigned long pfn, hva;
@@ -646,11 +684,11 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
 
 /* XXX only map the one-one case, for now use TLB0 */
 static int kvmppc_e500_tlb0_map(struct kvmppc_vcpu_e500 *vcpu_e500,
-				int esel, struct tlbe *stlbe)
+				int esel, struct kvm_book3e_206_tlb_entry *stlbe)
 {
-	struct tlbe *gtlbe;
+	struct kvm_book3e_206_tlb_entry *gtlbe;
 
-	gtlbe = &vcpu_e500->gtlb_arch[0][esel];
+	gtlbe = get_entry(vcpu_e500, 0, esel);
 
 	kvmppc_e500_shadow_map(vcpu_e500, get_tlb_eaddr(gtlbe),
 			get_tlb_raddr(gtlbe) >> PAGE_SHIFT,
@@ -663,7 +701,8 @@ static int kvmppc_e500_tlb0_map(struct kvmppc_vcpu_e500 *vcpu_e500,
  * the shadow TLB. */
 /* XXX for both one-one and one-to-many , for now use TLB1 */
 static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500,
-		u64 gvaddr, gfn_t gfn, struct tlbe *gtlbe, struct tlbe *stlbe)
+		u64 gvaddr, gfn_t gfn, struct kvm_book3e_206_tlb_entry *gtlbe,
+		struct kvm_book3e_206_tlb_entry *stlbe)
 {
 	unsigned int victim;
 
@@ -689,7 +728,8 @@ static inline int kvmppc_e500_gtlbe_invalidate(
 				struct kvmppc_vcpu_e500 *vcpu_e500,
 				int tlbsel, int esel)
 {
-	struct tlbe *gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel];
+	struct kvm_book3e_206_tlb_entry *gtlbe +		get_entry(vcpu_e500, tlbsel, esel);
 
 	if (unlikely(get_tlb_iprot(gtlbe)))
 		return -1;
@@ -752,18 +792,17 @@ int kvmppc_e500_emul_tlbre(struct kvm_vcpu *vcpu)
 {
 	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
 	int tlbsel, esel;
-	struct tlbe *gtlbe;
+	struct kvm_book3e_206_tlb_entry *gtlbe;
 
 	tlbsel = get_tlb_tlbsel(vcpu_e500);
 	esel = get_tlb_esel(vcpu_e500, tlbsel);
 
-	gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel];
+	gtlbe = get_entry(vcpu_e500, tlbsel, esel);
 	vcpu_e500->mas0 &= ~MAS0_NV(~0);
 	vcpu_e500->mas0 |= MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]);
 	vcpu_e500->mas1 = gtlbe->mas1;
 	vcpu_e500->mas2 = gtlbe->mas2;
-	vcpu_e500->mas3 = gtlbe->mas3;
-	vcpu_e500->mas7 = gtlbe->mas7;
+	vcpu_e500->mas7_3 = gtlbe->mas7_3;
 
 	return EMULATE_DONE;
 }
@@ -774,7 +813,7 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb)
 	int as = !!get_cur_sas(vcpu_e500);
 	unsigned int pid = get_cur_spid(vcpu_e500);
 	int esel, tlbsel;
-	struct tlbe *gtlbe = NULL;
+	struct kvm_book3e_206_tlb_entry *gtlbe = NULL;
 	gva_t ea;
 
 	ea = kvmppc_get_gpr(vcpu, rb);
@@ -782,7 +821,7 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb)
 	for (tlbsel = 0; tlbsel < 2; tlbsel++) {
 		esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, as);
 		if (esel >= 0) {
-			gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel];
+			gtlbe = get_entry(vcpu_e500, tlbsel, esel);
 			break;
 		}
 	}
@@ -792,8 +831,7 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb)
 			| MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]);
 		vcpu_e500->mas1 = gtlbe->mas1;
 		vcpu_e500->mas2 = gtlbe->mas2;
-		vcpu_e500->mas3 = gtlbe->mas3;
-		vcpu_e500->mas7 = gtlbe->mas7;
+		vcpu_e500->mas7_3 = gtlbe->mas7_3;
 	} else {
 		int victim;
 
@@ -808,8 +846,7 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb)
 			| (vcpu_e500->mas4 & MAS4_TSIZED(~0));
 		vcpu_e500->mas2 &= MAS2_EPN;
 		vcpu_e500->mas2 |= vcpu_e500->mas4 & MAS2_ATTRIB_MASK;
-		vcpu_e500->mas3 &= MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3;
-		vcpu_e500->mas7 = 0;
+		vcpu_e500->mas7_3 &= MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3;
 	}
 
 	kvmppc_set_exit_type(vcpu, EMULATED_TLBSX_EXITS);
@@ -819,28 +856,27 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb)
 int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
 {
 	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
-	struct tlbe *gtlbe;
+	struct kvm_book3e_206_tlb_entry *gtlbe;
 	int tlbsel, esel;
 
 	tlbsel = get_tlb_tlbsel(vcpu_e500);
 	esel = get_tlb_esel(vcpu_e500, tlbsel);
 
-	gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel];
+	gtlbe = get_entry(vcpu_e500, tlbsel, esel);
 
 	if (get_tlb_v(gtlbe))
 		kvmppc_e500_stlbe_invalidate(vcpu_e500, tlbsel, esel);
 
 	gtlbe->mas1 = vcpu_e500->mas1;
 	gtlbe->mas2 = vcpu_e500->mas2;
-	gtlbe->mas3 = vcpu_e500->mas3;
-	gtlbe->mas7 = vcpu_e500->mas7;
+	gtlbe->mas7_3 = vcpu_e500->mas7_3;
 
 	trace_kvm_gtlb_write(vcpu_e500->mas0, gtlbe->mas1, gtlbe->mas2,
-			     gtlbe->mas3, gtlbe->mas7);
+			     (u32)gtlbe->mas7_3, (u32)(gtlbe->mas7_3 >> 32));
 
 	/* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */
 	if (tlbe_is_host_safe(vcpu, gtlbe)) {
-		struct tlbe stlbe;
+		struct kvm_book3e_206_tlb_entry stlbe;
 		int stlbsel, sesel;
 		u64 eaddr;
 		u64 raddr;
@@ -914,9 +950,11 @@ gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int index,
 			gva_t eaddr)
 {
 	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
-	struct tlbe *gtlbe -		&vcpu_e500->gtlb_arch[tlbsel_of(index)][esel_of(index)];
-	u64 pgmask = get_tlb_bytes(gtlbe) - 1;
+	struct kvm_book3e_206_tlb_entry *gtlbe;
+	u64 pgmask;
+
+	gtlbe = get_entry(vcpu_e500, tlbsel_of(index), esel_of(index));
+	pgmask = get_tlb_bytes(gtlbe) - 1;
 
 	return get_tlb_raddr(gtlbe) | (eaddr & pgmask);
 }
@@ -930,12 +968,12 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr,
 {
 	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
 	struct tlbe_priv *priv;
-	struct tlbe *gtlbe, stlbe;
+	struct kvm_book3e_206_tlb_entry *gtlbe, stlbe;
 	int tlbsel = tlbsel_of(index);
 	int esel = esel_of(index);
 	int stlbsel, sesel;
 
-	gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel];
+	gtlbe = get_entry(vcpu_e500, tlbsel, esel);
 
 	preempt_disable();
 	switch (tlbsel) {
@@ -993,51 +1031,229 @@ void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid)
 
 void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500)
 {
-	struct tlbe *tlbe;
+	struct kvm_book3e_206_tlb_entry *tlbe;
 
 	/* Insert large initial mapping for guest. */
-	tlbe = &vcpu_e500->gtlb_arch[1][0];
+	tlbe = get_entry(vcpu_e500, 1, 0);
 	tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_256M);
 	tlbe->mas2 = 0;
-	tlbe->mas3 = E500_TLB_SUPER_PERM_MASK;
-	tlbe->mas7 = 0;
+	tlbe->mas7_3 = E500_TLB_SUPER_PERM_MASK;
 
 	/* 4K map for serial output. Used by kernel wrapper. */
-	tlbe = &vcpu_e500->gtlb_arch[1][1];
+	tlbe = get_entry(vcpu_e500, 1, 1);
 	tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_4K);
 	tlbe->mas2 = (0xe0004500 & 0xFFFFF000) | MAS2_I | MAS2_G;
-	tlbe->mas3 = (0xe0004500 & 0xFFFFF000) | E500_TLB_SUPER_PERM_MASK;
-	tlbe->mas7 = 0;
+	tlbe->mas7_3 = (0xe0004500 & 0xFFFFF000) | E500_TLB_SUPER_PERM_MASK;
+}
+
+static void clear_tlb_privs(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+	int stlbsel, i;
+
+	for (stlbsel = 0; stlbsel < 2; stlbsel++) {
+		for (i = 0; i < vcpu_e500->gtlb_size[stlbsel]; i++) {
+			struct tlbe_priv *priv +				&vcpu_e500->gtlb_priv[stlbsel][i];
+			kvmppc_e500_priv_release(priv);
+		}
+	}
+}
+
+static void free_gtlb(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+	int i;
+
+	clear_tlb_privs(vcpu_e500);
+
+	kfree(vcpu_e500->gtlb_priv[0]);
+	kfree(vcpu_e500->gtlb_priv[1]);
+
+	if (vcpu_e500->shared_tlb_pages) {
+		vfree((void *)(round_down((uintptr_t)vcpu_e500->gtlb_arch,
+					  PAGE_SIZE)));
+
+		for (i = 0; i < vcpu_e500->num_shared_tlb_pages; i++)
+			put_page(vcpu_e500->shared_tlb_pages[i]);
+
+		vcpu_e500->num_shared_tlb_pages = 0;
+		vcpu_e500->shared_tlb_pages = NULL;
+	} else {
+		kfree(vcpu_e500->gtlb_arch);
+	}
+
+	vcpu_e500->gtlb_arch = NULL;
+}
+
+int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
+			      struct kvm_config_tlb *cfg)
+{
+	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+	struct kvm_book3e_206_tlb_params params;
+	char *virt;
+	struct page **pages;
+	struct tlbe_priv *privs[2] = {};
+	size_t array_len;
+	u32 sets;
+	int num_pages, ret, i;
+
+	if (cfg->mmu_type != KVM_MMU_FSL_BOOKE_NOHV)
+		return -EINVAL;
+
+	if (copy_from_user(&params, (void __user *)(uintptr_t)cfg->params,
+			   sizeof(params)))
+		return -EFAULT;
+
+	if (params.tlb_sizes[1] > 64)
+		return -EINVAL;
+	if (params.tlb_sizes[2] != 0 || params.tlb_sizes[3] != 0)
+		return -EINVAL;
+	if (params.tlb_ways[1] != 0 || params.tlb_ways[2] != 0 ||
+	    params.tlb_ways[3] != 0)
+		return -EINVAL;
+
+	if (!is_power_of_2(params.tlb_ways[0]))
+		return -EINVAL;
+
+	sets = params.tlb_sizes[0] >> ilog2(params.tlb_ways[0]);
+	if (!is_power_of_2(sets))
+		return -EINVAL;
+
+	array_len = params.tlb_sizes[0] + params.tlb_sizes[1];
+	array_len *= sizeof(struct kvm_book3e_206_tlb_entry);
+
+	if (cfg->array_len < array_len)
+		return -EINVAL;
+
+	num_pages = DIV_ROUND_UP(cfg->array + array_len - 1, PAGE_SIZE) -
+		    cfg->array / PAGE_SIZE;
+	pages = kmalloc(sizeof(struct page *) * num_pages, GFP_KERNEL);
+	if (!pages)
+		return -ENOMEM;
+
+	ret = get_user_pages_fast(cfg->array, num_pages, 1, pages);
+	if (ret < 0)
+		goto err_pages;
+
+	if (ret != num_pages) {
+		num_pages = ret;
+		ret = -EFAULT;
+		goto err_put_page;
+	}
+
+	virt = vmap(pages, num_pages, VM_MAP, PAGE_KERNEL);
+	if (!virt)
+		goto err_put_page;
+
+	privs[0] = kzalloc(sizeof(struct tlbe_priv) * params.tlb_sizes[0],
+			   GFP_KERNEL);
+	privs[1] = kzalloc(sizeof(struct tlbe_priv) * params.tlb_sizes[1],
+			   GFP_KERNEL);
+
+	if (!privs[0] || !privs[1])
+		goto err_put_page;
+
+	kvmppc_e500_id_table_reset_all(vcpu_e500);
+	free_gtlb(vcpu_e500);
+
+	vcpu_e500->gtlb_priv[0] = privs[0];
+	vcpu_e500->gtlb_priv[1] = privs[1];
+
+	vcpu_e500->gtlb_arch = (struct kvm_book3e_206_tlb_entry *)
+		(virt + (cfg->array & (PAGE_SIZE - 1)));
+
+	vcpu_e500->gtlb_size[0] = params.tlb_sizes[0];
+	vcpu_e500->gtlb_size[1] = params.tlb_sizes[1];
+
+	vcpu_e500->gtlb_offset[0] = 0;
+	vcpu_e500->gtlb_offset[1] = params.tlb_sizes[0];
+
+	vcpu_e500->tlb0cfg = mfspr(SPRN_TLB0CFG) & ~0xfffUL;
+	if (params.tlb_sizes[0] <= 2048)
+		vcpu_e500->tlb0cfg |= params.tlb_sizes[0];
+
+	vcpu_e500->tlb1cfg = mfspr(SPRN_TLB1CFG) & ~0xfffUL;
+	vcpu_e500->tlb1cfg |= params.tlb_sizes[1];
+
+	vcpu_e500->shared_tlb_pages = pages;
+	vcpu_e500->num_shared_tlb_pages = num_pages;
+
+	vcpu_e500->gtlb0_ways = params.tlb_ways[0];
+	vcpu_e500->gtlb0_sets = sets;
+
+	return 0;
+
+err_put_page:
+	kfree(privs[0]);
+	kfree(privs[1]);
+
+	for (i = 0; i < num_pages; i++)
+		put_page(pages[i]);
+
+err_pages:
+	kfree(pages);
+	return ret;
+}
+
+int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu,
+			     struct kvm_dirty_tlb *dirty)
+{
+	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+
+	clear_tlb_privs(vcpu_e500);
+	kvmppc_e500_id_table_reset_all(vcpu_e500);
+
+	return 0;
+}
+
+void kvmppc_core_heavy_exit(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+	int i;
+
+	/*
+	 * We may have modified the guest TLB, so mark it dirty.
+	 * We only do it on an actual return to userspace, to avoid
+	 * adding more overhead to getting scheduled out -- and avoid
+	 * any locking issues with getting preempted in the middle of
+	 * KVM_CONFIG_TLB, etc.
+	 */
+
+	for (i = 0; i < vcpu_e500->num_shared_tlb_pages; i++)
+		set_page_dirty_lock(vcpu_e500->shared_tlb_pages[i]);
 }
 
 int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
 {
+	int entry_size = sizeof(struct kvm_book3e_206_tlb_entry);
+	int entries = KVM_E500_TLB0_SIZE + KVM_E500_TLB1_SIZE;
+
 	tlb1_entry_num = mfspr(SPRN_TLB1CFG) & 0xFFF;
 
 	vcpu_e500->gtlb_size[0] = KVM_E500_TLB0_SIZE;
-	vcpu_e500->gtlb_arch[0] -		kzalloc(sizeof(struct tlbe) * KVM_E500_TLB0_SIZE, GFP_KERNEL);
-	if (vcpu_e500->gtlb_arch[0] = NULL)
-		goto err_out;
-
 	vcpu_e500->gtlb_size[1] = KVM_E500_TLB1_SIZE;
-	vcpu_e500->gtlb_arch[1] -		kzalloc(sizeof(struct tlbe) * KVM_E500_TLB1_SIZE, GFP_KERNEL);
-	if (vcpu_e500->gtlb_arch[1] = NULL)
-		goto err_out_guest0;
 
-	vcpu_e500->gtlb_priv[0] = (struct tlbe_priv *)
+	vcpu_e500->gtlb0_ways = KVM_E500_TLB0_WAY_NUM;
+	vcpu_e500->gtlb0_sets = KVM_E500_TLB0_SIZE / KVM_E500_TLB0_WAY_NUM;
+
+	vcpu_e500->gtlb_arch = kmalloc(entries * entry_size, GFP_KERNEL);
+	if (!vcpu_e500->gtlb_arch)
+		return -ENOMEM;
+
+	vcpu_e500->gtlb_offset[0] = 0;
+	vcpu_e500->gtlb_offset[1] = KVM_E500_TLB0_SIZE;
+
+	vcpu_e500->gtlb_priv[0]  		kzalloc(sizeof(struct tlbe_priv) * KVM_E500_TLB0_SIZE, GFP_KERNEL);
 	if (vcpu_e500->gtlb_priv[0] = NULL)
-		goto err_out_guest1;
-	vcpu_e500->gtlb_priv[1] = (struct tlbe_priv *)
+		goto err;
+	vcpu_e500->gtlb_priv[1]  		kzalloc(sizeof(struct tlbe_priv) * KVM_E500_TLB1_SIZE, GFP_KERNEL);
 
 	if (vcpu_e500->gtlb_priv[1] = NULL)
-		goto err_out_priv0;
+		goto err;
 
 	if (kvmppc_e500_id_table_alloc(vcpu_e500) = NULL)
-		goto err_out_priv1;
+		goto err;
 
 	/* Init TLB configuration register */
 	vcpu_e500->tlb0cfg = mfspr(SPRN_TLB0CFG) & ~0xfffUL;
@@ -1047,31 +1263,13 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
 
 	return 0;
 
-err_out_priv1:
-	kfree(vcpu_e500->gtlb_priv[1]);
-err_out_priv0:
-	kfree(vcpu_e500->gtlb_priv[0]);
-err_out_guest1:
-	kfree(vcpu_e500->gtlb_arch[1]);
-err_out_guest0:
-	kfree(vcpu_e500->gtlb_arch[0]);
-err_out:
+err:
+	free_gtlb(vcpu_e500);
 	return -1;
 }
 
 void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500)
 {
-	int stlbsel, i;
-
-	/* release all privs */
-	for (stlbsel = 0; stlbsel < 2; stlbsel++)
-		for (i = 0; i < vcpu_e500->gtlb_size[stlbsel]; i++) {
-			struct tlbe_priv *priv -				&vcpu_e500->gtlb_priv[stlbsel][i];
-			kvmppc_e500_priv_release(priv);
-		}
-
 	kvmppc_e500_id_table_free(vcpu_e500);
-	kfree(vcpu_e500->gtlb_arch[1]);
-	kfree(vcpu_e500->gtlb_arch[0]);
+	free_gtlb(vcpu_e500);
 }
diff --git a/arch/powerpc/kvm/e500_tlb.h b/arch/powerpc/kvm/e500_tlb.h
index 59b88e9..2c29640 100644
--- a/arch/powerpc/kvm/e500_tlb.h
+++ b/arch/powerpc/kvm/e500_tlb.h
@@ -20,13 +20,9 @@
 #include <asm/tlb.h>
 #include <asm/kvm_e500.h>
 
-#define KVM_E500_TLB0_WAY_SIZE_BIT	7	/* Fixed */
-#define KVM_E500_TLB0_WAY_SIZE		(1UL << KVM_E500_TLB0_WAY_SIZE_BIT)
-#define KVM_E500_TLB0_WAY_SIZE_MASK	(KVM_E500_TLB0_WAY_SIZE - 1)
-
-#define KVM_E500_TLB0_WAY_NUM_BIT	1	/* No greater than 7 */
-#define KVM_E500_TLB0_WAY_NUM		(1UL << KVM_E500_TLB0_WAY_NUM_BIT)
-#define KVM_E500_TLB0_WAY_NUM_MASK	(KVM_E500_TLB0_WAY_NUM - 1)
+/* This geometry is the legacy default -- can be overridden by userspace */
+#define KVM_E500_TLB0_WAY_SIZE		128
+#define KVM_E500_TLB0_WAY_NUM		2
 
 #define KVM_E500_TLB0_SIZE  (KVM_E500_TLB0_WAY_SIZE * KVM_E500_TLB0_WAY_NUM)
 #define KVM_E500_TLB1_SIZE  16
@@ -58,50 +54,54 @@ extern void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *);
 extern void kvmppc_e500_recalc_shadow_pid(struct kvmppc_vcpu_e500 *);
 
 /* TLB helper functions */
-static inline unsigned int get_tlb_size(const struct tlbe *tlbe)
+static inline unsigned int
+get_tlb_size(const struct kvm_book3e_206_tlb_entry *tlbe)
 {
 	return (tlbe->mas1 >> 7) & 0x1f;
 }
 
-static inline gva_t get_tlb_eaddr(const struct tlbe *tlbe)
+static inline gva_t get_tlb_eaddr(const struct kvm_book3e_206_tlb_entry *tlbe)
 {
 	return tlbe->mas2 & 0xfffff000;
 }
 
-static inline u64 get_tlb_bytes(const struct tlbe *tlbe)
+static inline u64 get_tlb_bytes(const struct kvm_book3e_206_tlb_entry *tlbe)
 {
 	unsigned int pgsize = get_tlb_size(tlbe);
 	return 1ULL << 10 << pgsize;
 }
 
-static inline gva_t get_tlb_end(const struct tlbe *tlbe)
+static inline gva_t get_tlb_end(const struct kvm_book3e_206_tlb_entry *tlbe)
 {
 	u64 bytes = get_tlb_bytes(tlbe);
 	return get_tlb_eaddr(tlbe) + bytes - 1;
 }
 
-static inline u64 get_tlb_raddr(const struct tlbe *tlbe)
+static inline u64 get_tlb_raddr(const struct kvm_book3e_206_tlb_entry *tlbe)
 {
-	u64 rpn = tlbe->mas7;
-	return (rpn << 32) | (tlbe->mas3 & 0xfffff000);
+	return tlbe->mas7_3 & ~0xfffULL;
 }
 
-static inline unsigned int get_tlb_tid(const struct tlbe *tlbe)
+static inline unsigned int
+get_tlb_tid(const struct kvm_book3e_206_tlb_entry *tlbe)
 {
 	return (tlbe->mas1 >> 16) & 0xff;
 }
 
-static inline unsigned int get_tlb_ts(const struct tlbe *tlbe)
+static inline unsigned int
+get_tlb_ts(const struct kvm_book3e_206_tlb_entry *tlbe)
 {
 	return (tlbe->mas1 >> 12) & 0x1;
 }
 
-static inline unsigned int get_tlb_v(const struct tlbe *tlbe)
+static inline unsigned int
+get_tlb_v(const struct kvm_book3e_206_tlb_entry *tlbe)
 {
 	return (tlbe->mas1 >> 31) & 0x1;
 }
 
-static inline unsigned int get_tlb_iprot(const struct tlbe *tlbe)
+static inline unsigned int
+get_tlb_iprot(const struct kvm_book3e_206_tlb_entry *tlbe)
 {
 	return (tlbe->mas1 >> 30) & 0x1;
 }
@@ -155,25 +155,8 @@ static inline unsigned int get_tlb_esel_bit(
 	return (vcpu_e500->mas0 >> 16) & 0xfff;
 }
 
-static inline unsigned int get_tlb_esel(
-		const struct kvmppc_vcpu_e500 *vcpu_e500,
-		int tlbsel)
-{
-	unsigned int esel = get_tlb_esel_bit(vcpu_e500);
-
-	if (tlbsel = 0) {
-		esel &= KVM_E500_TLB0_WAY_NUM_MASK;
-		esel |= ((vcpu_e500->mas2 >> 12) & KVM_E500_TLB0_WAY_SIZE_MASK)
-				<< KVM_E500_TLB0_WAY_NUM_BIT;
-	} else {
-		esel &= KVM_E500_TLB1_SIZE - 1;
-	}
-
-	return esel;
-}
-
 static inline int tlbe_is_host_safe(const struct kvm_vcpu *vcpu,
-			const struct tlbe *tlbe)
+			const struct kvm_book3e_206_tlb_entry *tlbe)
 {
 	gpa_t gpa;
 
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index d5beb72..212ca2e 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -187,6 +187,9 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_ENABLE_CAP:
 	case KVM_CAP_PPC_OSI:
 	case KVM_CAP_PPC_GET_PVINFO:
+#ifdef CONFIG_KVM_E500
+	case KVM_CAP_SW_TLB:
+#endif
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
@@ -503,6 +506,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	kvm_guest_exit();
 	local_irq_enable();
 
+#ifdef CONFIG_KVM_E500
+	kvmppc_core_heavy_exit(vcpu);
+#endif
+
 	if (vcpu->sigset_active)
 		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
 
@@ -583,6 +590,27 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
 		break;
 	}
+
+#ifdef CONFIG_KVM_E500
+	case KVM_CONFIG_TLB: {
+		struct kvm_config_tlb cfg;
+		r = -EFAULT;
+		if (copy_from_user(&cfg, argp, sizeof(cfg)))
+			goto out;
+		r = kvm_vcpu_ioctl_config_tlb(vcpu, &cfg);
+		break;
+	}
+
+	case KVM_DIRTY_TLB: {
+		struct kvm_dirty_tlb dirty;
+		r = -EFAULT;
+		if (copy_from_user(&dirty, argp, sizeof(dirty)))
+			goto out;
+		r = kvm_vcpu_ioctl_dirty_tlb(vcpu, &dirty);
+		break;
+	}
+#endif
+
 	default:
 		r = -EINVAL;
 	}
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 55ef181..daa79867 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -544,6 +544,7 @@ struct kvm_ppc_pvinfo {
 #define KVM_CAP_TSC_CONTROL 60
 #define KVM_CAP_GET_TSC_KHZ 61
 #define KVM_CAP_PPC_BOOKE_SREGS 62
+#define KVM_CAP_SW_TLB 1002
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -623,6 +624,21 @@ struct kvm_clock_data {
 	__u32 pad[9];
 };
 
+#define KVM_MMU_FSL_BOOKE_NOHV		0
+#define KVM_MMU_FSL_BOOKE_HV		1
+
+struct kvm_config_tlb {
+	__u64 params;
+	__u64 array;
+	__u32 mmu_type;
+	__u32 array_len;
+};
+
+struct kvm_dirty_tlb {
+	__u64 bitmap;
+	__u32 num_dirty;
+};
+
 /*
  * ioctls for VM fds
  */
@@ -746,6 +762,9 @@ struct kvm_clock_data {
 /* Available with KVM_CAP_XCRS */
 #define KVM_GET_XCRS		  _IOR(KVMIO,  0xa6, struct kvm_xcrs)
 #define KVM_SET_XCRS		  _IOW(KVMIO,  0xa7, struct kvm_xcrs)
+/* Available with KVM_CAP_SW_TLB */
+#define KVM_CONFIG_TLB		  _IOW(KVMIO,  0xa8, struct kvm_config_tlb)
+#define KVM_DIRTY_TLB		  _IOW(KVMIO,  0xa9, struct kvm_dirty_tlb)
 
 #define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
 
-- 
1.7.4.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2011-06-02 23:17 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-05-17 23:42 [PATCH 13/13] KVM: PPC: e500: MMU API Scott Wood
2011-05-19 14:10 ` Alexander Graf
2011-05-19 16:28 ` Scott Wood
2011-05-19 17:37 ` Alexander Graf
2011-05-19 18:12 ` Scott Wood
2011-06-02 23:17 ` Scott Wood

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.