All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/2] kvm/e500v2: MMU optimization
@ 2010-09-08  9:40 ` Liu Yu
  0 siblings, 0 replies; 46+ messages in thread
From: Liu Yu @ 2010-09-08  9:40 UTC (permalink / raw)
  To: kvm-u79uwXL29TY76Z2rM5mHXA, kvm-ppc-u79uwXL29TY76Z2rM5mHXA
  Cc: agraf-l3A5Bk7waGM

The patchset aims at mapping guest TLB1 to host TLB0.
And it includes:
[PATCH 1/2] kvm/e500v2: Remove shadow tlb
[PATCH 2/2] kvm/e500v2: mapping guest TLB1 to host TLB0

The reason we need patch 1 is because patch 1 make things simple and flexible.
Only applying patch 1 aslo make kvm work.

^ permalink raw reply	[flat|nested] 46+ messages in thread

* [PATCH 0/2] kvm/e500v2: MMU optimization
@ 2010-09-08  9:40 ` Liu Yu
  0 siblings, 0 replies; 46+ messages in thread
From: Liu Yu @ 2010-09-08  9:40 UTC (permalink / raw)
  To: kvm-u79uwXL29TY76Z2rM5mHXA, kvm-ppc-u79uwXL29TY76Z2rM5mHXA
  Cc: agraf-l3A5Bk7waGM

The patchset aims at mapping guest TLB1 to host TLB0.
And it includes:
[PATCH 1/2] kvm/e500v2: Remove shadow tlb
[PATCH 2/2] kvm/e500v2: mapping guest TLB1 to host TLB0

The reason we need patch 1 is because patch 1 make things simple and flexible.
Only applying patch 1 aslo make kvm work.


^ permalink raw reply	[flat|nested] 46+ messages in thread

* [PATCH 1/2] kvm/e500v2: Remove shadow tlb
       [not found] ` <1283938806-2981-1-git-send-email-yu.liu-KZfg59tc24xl57MIdRCFDg@public.gmane.org>
@ 2010-09-08  9:40     ` Liu Yu
  2010-09-08 16:07     ` Hollis Blanchard
  1 sibling, 0 replies; 46+ messages in thread
From: Liu Yu @ 2010-09-08  9:40 UTC (permalink / raw)
  To: kvm-u79uwXL29TY76Z2rM5mHXA, kvm-ppc-u79uwXL29TY76Z2rM5mHXA
  Cc: agraf-l3A5Bk7waGM, Liu Yu

It is unnecessary to keep shadow tlb.
first, shadow tlb keep fixed value in shadow, which make things unflexible.
second, remove shadow tlb can save a lot memory.

This patch remove shadow tlb and caculate the shadow tlb entry value
before we write it to hardware.

Also we use new struct tlbe_ref to trace the relation
between guest tlb entry and page.

Signed-off-by: Liu Yu <yu.liu-KZfg59tc24xl57MIdRCFDg@public.gmane.org>
---
 arch/powerpc/include/asm/kvm_e500.h |    7 +-
 arch/powerpc/kvm/e500_tlb.c         |  287 +++++++++++++----------------------
 2 files changed, 108 insertions(+), 186 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_e500.h b/arch/powerpc/include/asm/kvm_e500.h
index 7fea26f..cb785f9 100644
--- a/arch/powerpc/include/asm/kvm_e500.h
+++ b/arch/powerpc/include/asm/kvm_e500.h
@@ -29,13 +29,18 @@ struct tlbe{
 	u32 mas7;
 };
 
+struct tlbe_ref {
+	struct page *page;
+	struct tlbe *gtlbe;
+};
+
 struct kvmppc_vcpu_e500 {
 	/* Unmodified copy of the guest's TLB. */
 	struct tlbe *guest_tlb[E500_TLB_NUM];
 	/* TLB that's actually used when the guest is running. */
 	struct tlbe *shadow_tlb[E500_TLB_NUM];
 	/* Pages which are referenced in the shadow TLB. */
-	struct page **shadow_pages[E500_TLB_NUM];
+	struct tlbe_ref *shadow_refs[E500_TLB_NUM];
 
 	unsigned int guest_tlb_size[E500_TLB_NUM];
 	unsigned int shadow_tlb_size[E500_TLB_NUM];
diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index f11ca0f..0b657af 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved.
+ * Copyright (C) 2008, 2010 Freescale Semiconductor, Inc. All rights reserved.
  *
  * Author: Yu Liu, yu.liu-KZfg59tc24xl57MIdRCFDg@public.gmane.org
  *
@@ -48,17 +48,6 @@ void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu)
 					tlbe->mas3, tlbe->mas7);
 		}
 	}
-
-	for (tlbsel = 0; tlbsel < 2; tlbsel++) {
-		printk("Shadow TLB%d:\n", tlbsel);
-		for (i = 0; i < vcpu_e500->shadow_tlb_size[tlbsel]; i++) {
-			tlbe = &vcpu_e500->shadow_tlb[tlbsel][i];
-			if (tlbe->mas1 & MAS1_VALID)
-				printk(" S[%d][%3d] |  %08X | %08X | %08X | %08X |\n",
-					tlbsel, i, tlbe->mas1, tlbe->mas2,
-					tlbe->mas3, tlbe->mas7);
-		}
-	}
 }
 
 static inline unsigned int tlb0_get_next_victim(
@@ -121,10 +110,8 @@ static inline void __write_host_tlbe(struct tlbe *stlbe)
 }
 
 static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
-		int tlbsel, int esel)
+		int tlbsel, int esel, struct tlbe *stlbe)
 {
-	struct tlbe *stlbe = &vcpu_e500->shadow_tlb[tlbsel][esel];
-
 	local_irq_disable();
 	if (tlbsel == 0) {
 		__write_host_tlbe(stlbe);
@@ -139,28 +126,12 @@ static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
 		mtspr(SPRN_MAS0, mas0);
 	}
 	local_irq_enable();
+	trace_kvm_stlb_write(index_of(tlbsel, esel), stlbe->mas1, stlbe->mas2,
+			stlbe->mas3, stlbe->mas7);
 }
 
 void kvmppc_e500_tlb_load(struct kvm_vcpu *vcpu, int cpu)
 {
-	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
-	int i;
-	unsigned register mas0;
-
-	/* Load all valid TLB1 entries to reduce guest tlb miss fault */
-	local_irq_disable();
-	mas0 = mfspr(SPRN_MAS0);
-	for (i = 0; i < tlb1_max_shadow_size(); i++) {
-		struct tlbe *stlbe = &vcpu_e500->shadow_tlb[1][i];
-
-		if (get_tlb_v(stlbe)) {
-			mtspr(SPRN_MAS0, MAS0_TLBSEL(1)
-					| MAS0_ESEL(to_htlb1_esel(i)));
-			__write_host_tlbe(stlbe);
-		}
-	}
-	mtspr(SPRN_MAS0, mas0);
-	local_irq_enable();
 }
 
 void kvmppc_e500_tlb_put(struct kvm_vcpu *vcpu)
@@ -202,16 +173,19 @@ static int kvmppc_e500_tlb_index(struct kvmppc_vcpu_e500 *vcpu_e500,
 }
 
 static void kvmppc_e500_shadow_release(struct kvmppc_vcpu_e500 *vcpu_e500,
-		int tlbsel, int esel)
+		int stlbsel, int sesel)
 {
-	struct tlbe *stlbe = &vcpu_e500->shadow_tlb[tlbsel][esel];
-	struct page *page = vcpu_e500->shadow_pages[tlbsel][esel];
+	struct tlbe_ref *ref;
+	struct page *page;
+
+	ref = &vcpu_e500->shadow_refs[stlbsel][sesel];
+	page = ref->page;
 
 	if (page) {
-		vcpu_e500->shadow_pages[tlbsel][esel] = NULL;
+		ref->page = NULL;
 
-		if (get_tlb_v(stlbe)) {
-			if (tlbe_is_writable(stlbe))
+		if (get_tlb_v(ref->gtlbe)) {
+			if (tlbe_is_writable(ref->gtlbe))
 				kvm_release_page_dirty(page);
 			else
 				kvm_release_page_clean(page);
@@ -219,46 +193,19 @@ static void kvmppc_e500_shadow_release(struct kvmppc_vcpu_e500 *vcpu_e500,
 	}
 }
 
-static void kvmppc_e500_stlbe_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500,
-		int tlbsel, int esel)
-{
-	struct tlbe *stlbe = &vcpu_e500->shadow_tlb[tlbsel][esel];
-
-	kvmppc_e500_shadow_release(vcpu_e500, tlbsel, esel);
-	stlbe->mas1 = 0;
-	/* XXX doesn't compile */
-#if 0
-	trace_kvm_stlb_inval(index_of(tlbsel, esel), stlbe->mas1, stlbe->mas2,
-			     stlbe->mas3, stlbe->mas7);
-#endif
-}
-
 static void kvmppc_e500_tlb1_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500,
-		gva_t eaddr, gva_t eend, u32 tid)
+		int esel)
 {
-	unsigned int pid = tid & 0xff;
+	struct tlbe stlbe;
 	unsigned int i;
 
-	/* XXX Replace loop with fancy data structures. */
-	for (i = 0; i < vcpu_e500->guest_tlb_size[1]; i++) {
-		struct tlbe *stlbe = &vcpu_e500->shadow_tlb[1][i];
-		unsigned int tid;
-
-		if (!get_tlb_v(stlbe))
-			continue;
-
-		if (eend < get_tlb_eaddr(stlbe))
-			continue;
-
-		if (eaddr > get_tlb_end(stlbe))
-			continue;
-
-		tid = get_tlb_tid(stlbe);
-		if (tid && (tid != pid))
-			continue;
+	stlbe.mas1 = 0;
+	for (i = 0; i < KVM_E500_TLB1_SIZE; i++) {
+		struct tlbe_ref *ref =
+			&vcpu_e500->shadow_refs[1][i];
 
-		kvmppc_e500_stlbe_invalidate(vcpu_e500, 1, i);
-		write_host_tlbe(vcpu_e500, 1, i);
+		if (ref->gtlbe == &vcpu_e500->guest_tlb[1][esel])
+			write_host_tlbe(vcpu_e500, 1, i, &stlbe);
 	}
 }
 
@@ -289,14 +236,29 @@ static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu,
 	vcpu_e500->mas7 = 0;
 }
 
+static inline void kvmppc_e500_setup_stlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
+		struct tlbe *gtlbe, struct tlbe_ref *ref,
+		u64 gvaddr, struct tlbe *stlbe)
+{
+	hpa_t hpaddr = page_to_phys(ref->page);
+
+	/* Force TS=1 IPROT=0 TSIZE=4KB for all guest mappings. */
+	stlbe->mas1 = MAS1_TSIZE(BOOK3E_PAGESZ_4K)
+		| MAS1_TID(get_tlb_tid(gtlbe)) | MAS1_TS | MAS1_VALID;
+	stlbe->mas2 = (gvaddr & MAS2_EPN)
+		| e500_shadow_mas2_attrib(gtlbe->mas2,
+				vcpu_e500->vcpu.arch.msr & MSR_PR);
+	stlbe->mas3 = (hpaddr & MAS3_RPN)
+		| e500_shadow_mas3_attrib(gtlbe->mas3,
+				vcpu_e500->vcpu.arch.msr & MSR_PR);
+	stlbe->mas7 = (hpaddr >> 32) & MAS7_RPN;
+}
+
 static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
-	u64 gvaddr, gfn_t gfn, struct tlbe *gtlbe, int tlbsel, int esel)
+	gfn_t gfn, struct tlbe *gtlbe, int stlbsel, int sesel)
 {
 	struct page *new_page;
-	struct tlbe *stlbe;
-	hpa_t hpaddr;
-
-	stlbe = &vcpu_e500->shadow_tlb[tlbsel][esel];
+	struct tlbe_ref *ref;
 
 	/* Get reference to new page. */
 	new_page = gfn_to_page(vcpu_e500->vcpu.kvm, gfn);
@@ -305,39 +267,24 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
 		kvm_release_page_clean(new_page);
 		return;
 	}
-	hpaddr = page_to_phys(new_page);
 
 	/* Drop reference to old page. */
-	kvmppc_e500_shadow_release(vcpu_e500, tlbsel, esel);
-
-	vcpu_e500->shadow_pages[tlbsel][esel] = new_page;
+	kvmppc_e500_shadow_release(vcpu_e500, stlbsel, sesel);
 
-	/* Force TS=1 IPROT=0 TSIZE=4KB for all guest mappings. */
-	stlbe->mas1 = MAS1_TSIZE(BOOK3E_PAGESZ_4K)
-		| MAS1_TID(get_tlb_tid(gtlbe)) | MAS1_TS | MAS1_VALID;
-	stlbe->mas2 = (gvaddr & MAS2_EPN)
-		| e500_shadow_mas2_attrib(gtlbe->mas2,
-				vcpu_e500->vcpu.arch.msr & MSR_PR);
-	stlbe->mas3 = (hpaddr & MAS3_RPN)
-		| e500_shadow_mas3_attrib(gtlbe->mas3,
-				vcpu_e500->vcpu.arch.msr & MSR_PR);
-	stlbe->mas7 = (hpaddr >> 32) & MAS7_RPN;
-
-	trace_kvm_stlb_write(index_of(tlbsel, esel), stlbe->mas1, stlbe->mas2,
-			     stlbe->mas3, stlbe->mas7);
+	ref = &vcpu_e500->shadow_refs[stlbsel][sesel];
+	ref->page = new_page;
+	ref->gtlbe = gtlbe;
 }
 
 /* XXX only map the one-one case, for now use TLB0 */
-static int kvmppc_e500_stlbe_map(struct kvmppc_vcpu_e500 *vcpu_e500,
-		int tlbsel, int esel)
+static int kvmppc_e500_tlb0_map(struct kvmppc_vcpu_e500 *vcpu_e500, int esel)
 {
 	struct tlbe *gtlbe;
 
-	gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel];
+	gtlbe = &vcpu_e500->guest_tlb[0][esel];
 
-	kvmppc_e500_shadow_map(vcpu_e500, get_tlb_eaddr(gtlbe),
-			get_tlb_raddr(gtlbe) >> PAGE_SHIFT,
-			gtlbe, tlbsel, esel);
+	kvmppc_e500_shadow_map(vcpu_e500, get_tlb_raddr(gtlbe) >> PAGE_SHIFT,
+			gtlbe, 0, esel);
 
 	return esel;
 }
@@ -355,7 +302,7 @@ static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500,
 	if (unlikely(vcpu_e500->guest_tlb_nv[1] >= tlb1_max_shadow_size()))
 		vcpu_e500->guest_tlb_nv[1] = 0;
 
-	kvmppc_e500_shadow_map(vcpu_e500, gvaddr, gfn, gtlbe, 1, victim);
+	kvmppc_e500_shadow_map(vcpu_e500, gfn, gtlbe, 1, victim);
 
 	return victim;
 }
@@ -366,33 +313,19 @@ static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500,
 void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode)
 {
 	if (usermode) {
-		struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
-		int i;
-
-		/* XXX Replace loop with fancy data structures. */
-		for (i = 0; i < tlb1_max_shadow_size(); i++)
-			kvmppc_e500_stlbe_invalidate(vcpu_e500, 1, i);
-
 		_tlbil_all();
 	}
 }
 
-static int kvmppc_e500_gtlbe_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500,
-		int tlbsel, int esel)
+static inline int kvmppc_e500_gtlbe_invalidate(
+				struct kvmppc_vcpu_e500 *vcpu_e500,
+				int tlbsel, int esel)
 {
 	struct tlbe *gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel];
 
 	if (unlikely(get_tlb_iprot(gtlbe)))
 		return -1;
 
-	if (tlbsel == 1) {
-		kvmppc_e500_tlb1_invalidate(vcpu_e500, get_tlb_eaddr(gtlbe),
-				get_tlb_end(gtlbe),
-				get_tlb_tid(gtlbe));
-	} else {
-		kvmppc_e500_stlbe_invalidate(vcpu_e500, tlbsel, esel);
-	}
-
 	gtlbe->mas1 = 0;
 
 	return 0;
@@ -515,23 +448,16 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb)
 int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
 {
 	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
-	u64 eaddr;
-	u64 raddr;
-	u32 tid;
 	struct tlbe *gtlbe;
-	int tlbsel, esel, stlbsel, sesel;
+	int tlbsel, esel;
 
 	tlbsel = get_tlb_tlbsel(vcpu_e500);
 	esel = get_tlb_esel(vcpu_e500, tlbsel);
 
 	gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel];
 
-	if (get_tlb_v(gtlbe) && tlbsel == 1) {
-		eaddr = get_tlb_eaddr(gtlbe);
-		tid = get_tlb_tid(gtlbe);
-		kvmppc_e500_tlb1_invalidate(vcpu_e500, eaddr,
-				get_tlb_end(gtlbe), tid);
-	}
+	if (get_tlb_v(gtlbe) && tlbsel == 1)
+		kvmppc_e500_tlb1_invalidate(vcpu_e500, esel);
 
 	gtlbe->mas1 = vcpu_e500->mas1;
 	gtlbe->mas2 = vcpu_e500->mas2;
@@ -543,35 +469,37 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
 
 	/* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */
 	if (tlbe_is_host_safe(vcpu, gtlbe)) {
+		struct tlbe stlbe;
+		int stlbsel, sesel;
+		struct tlbe_ref *ref;
+		u64 eaddr;
+
 		switch (tlbsel) {
 		case 0:
 			/* TLB0 */
 			gtlbe->mas1 &= ~MAS1_TSIZE(~0);
 			gtlbe->mas1 |= MAS1_TSIZE(BOOK3E_PAGESZ_4K);
 
+			eaddr =  get_tlb_eaddr(gtlbe);
 			stlbsel = 0;
-			sesel = kvmppc_e500_stlbe_map(vcpu_e500, 0, esel);
+			sesel = kvmppc_e500_tlb0_map(vcpu_e500, esel);
+
+			ref = &vcpu_e500->shadow_refs[stlbsel][sesel];
+
+			kvmppc_e500_setup_stlbe(vcpu_e500, gtlbe,
+					        ref, eaddr, &stlbe);
+			write_host_tlbe(vcpu_e500, stlbsel, sesel, &stlbe);
 
 			break;
 
 		case 1:
-			/* TLB1 */
-			eaddr = get_tlb_eaddr(gtlbe);
-			raddr = get_tlb_raddr(gtlbe);
-
-			/* Create a 4KB mapping on the host.
-			 * If the guest wanted a large page,
-			 * only the first 4KB is mapped here and the rest
-			 * are mapped on the fly. */
-			stlbsel = 1;
-			sesel = kvmppc_e500_tlb1_map(vcpu_e500, eaddr,
-					raddr >> PAGE_SHIFT, gtlbe);
+			/* Large page breaks into 4KB pages.
+			 * And all these 4KB pages will be mapped on the fly. */
 			break;
 
 		default:
 			BUG();
 		}
-		write_host_tlbe(vcpu_e500, stlbsel, sesel);
 	}
 
 	return EMULATE_DONE;
@@ -618,25 +546,20 @@ gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int index,
 
 void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
 {
-	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
-	int tlbsel, i;
-
-	for (tlbsel = 0; tlbsel < 2; tlbsel++)
-		for (i = 0; i < vcpu_e500->guest_tlb_size[tlbsel]; i++)
-			kvmppc_e500_shadow_release(vcpu_e500, tlbsel, i);
-
-	/* discard all guest mapping */
-	_tlbil_all();
 }
 
 void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr,
 			unsigned int index)
 {
 	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+	struct tlbe_ref *ref;
+	struct tlbe *gtlbe, stlbe;
 	int tlbsel = tlbsel_of(index);
 	int esel = esel_of(index);
 	int stlbsel, sesel;
 
+	gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel];
+
 	switch (tlbsel) {
 	case 0:
 		stlbsel = 0;
@@ -645,8 +568,6 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr,
 
 	case 1: {
 		gfn_t gfn = gpaddr >> PAGE_SHIFT;
-		struct tlbe *gtlbe
-			= &vcpu_e500->guest_tlb[tlbsel][esel];
 
 		stlbsel = 1;
 		sesel = kvmppc_e500_tlb1_map(vcpu_e500, eaddr, gfn, gtlbe);
@@ -657,7 +578,11 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr,
 		BUG();
 		break;
 	}
-	write_host_tlbe(vcpu_e500, stlbsel, sesel);
+
+	ref = &vcpu_e500->shadow_refs[stlbsel][sesel];
+
+	kvmppc_e500_setup_stlbe(vcpu_e500, gtlbe, ref, eaddr, &stlbe);
+	write_host_tlbe(vcpu_e500, stlbsel, sesel, &stlbe);
 }
 
 int kvmppc_e500_tlb_search(struct kvm_vcpu *vcpu,
@@ -704,33 +629,21 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
 	if (vcpu_e500->guest_tlb[0] == NULL)
 		goto err_out;
 
-	vcpu_e500->shadow_tlb_size[0] = KVM_E500_TLB0_SIZE;
-	vcpu_e500->shadow_tlb[0] =
-		kzalloc(sizeof(struct tlbe) * KVM_E500_TLB0_SIZE, GFP_KERNEL);
-	if (vcpu_e500->shadow_tlb[0] == NULL)
-		goto err_out_guest0;
-
 	vcpu_e500->guest_tlb_size[1] = KVM_E500_TLB1_SIZE;
 	vcpu_e500->guest_tlb[1] =
 		kzalloc(sizeof(struct tlbe) * KVM_E500_TLB1_SIZE, GFP_KERNEL);
 	if (vcpu_e500->guest_tlb[1] == NULL)
-		goto err_out_shadow0;
+		goto err_out_guest0;
 
-	vcpu_e500->shadow_tlb_size[1] = tlb1_entry_num;
-	vcpu_e500->shadow_tlb[1] =
-		kzalloc(sizeof(struct tlbe) * tlb1_entry_num, GFP_KERNEL);
-	if (vcpu_e500->shadow_tlb[1] == NULL)
+	vcpu_e500->shadow_refs[0] = (struct tlbe_ref *)
+		kzalloc(sizeof(struct tlbe_ref) * KVM_E500_TLB0_SIZE, GFP_KERNEL);
+	if (vcpu_e500->shadow_refs[0] == NULL)
 		goto err_out_guest1;
 
-	vcpu_e500->shadow_pages[0] = (struct page **)
-		kzalloc(sizeof(struct page *) * KVM_E500_TLB0_SIZE, GFP_KERNEL);
-	if (vcpu_e500->shadow_pages[0] == NULL)
-		goto err_out_shadow1;
-
-	vcpu_e500->shadow_pages[1] = (struct page **)
-		kzalloc(sizeof(struct page *) * tlb1_entry_num, GFP_KERNEL);
-	if (vcpu_e500->shadow_pages[1] == NULL)
-		goto err_out_page0;
+	vcpu_e500->shadow_refs[1] = (struct tlbe_ref *)
+		kzalloc(sizeof(struct tlbe_ref) * KVM_E500_TLB1_SIZE, GFP_KERNEL);
+	if (vcpu_e500->shadow_refs[1] == NULL)
+		goto err_out_ref0;
 
 	/* Init TLB configuration register */
 	vcpu_e500->tlb0cfg = mfspr(SPRN_TLB0CFG) & ~0xfffUL;
@@ -740,14 +653,10 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
 
 	return 0;
 
-err_out_page0:
-	kfree(vcpu_e500->shadow_pages[0]);
-err_out_shadow1:
-	kfree(vcpu_e500->shadow_tlb[1]);
+err_out_ref0:
+	kfree(vcpu_e500->shadow_refs[0]);
 err_out_guest1:
 	kfree(vcpu_e500->guest_tlb[1]);
-err_out_shadow0:
-	kfree(vcpu_e500->shadow_tlb[0]);
 err_out_guest0:
 	kfree(vcpu_e500->guest_tlb[0]);
 err_out:
@@ -756,10 +665,18 @@ err_out:
 
 void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500)
 {
-	kfree(vcpu_e500->shadow_pages[1]);
-	kfree(vcpu_e500->shadow_pages[0]);
-	kfree(vcpu_e500->shadow_tlb[1]);
+	int stlbsel, i;
+
+	/* release all pages */
+	for (stlbsel = 0; stlbsel < 2; stlbsel++)
+		for (i = 0; i < vcpu_e500->guest_tlb_size[stlbsel]; i++)
+			kvmppc_e500_shadow_release(vcpu_e500, stlbsel, i);
+
+	/* discard all guest mapping */
+	_tlbil_all();
+
+	kfree(vcpu_e500->shadow_refs[1]);
+	kfree(vcpu_e500->shadow_refs[0]);
 	kfree(vcpu_e500->guest_tlb[1]);
-	kfree(vcpu_e500->shadow_tlb[0]);
 	kfree(vcpu_e500->guest_tlb[0]);
 }
-- 
1.6.4

^ permalink raw reply related	[flat|nested] 46+ messages in thread

* [PATCH 1/2] kvm/e500v2: Remove shadow tlb
@ 2010-09-08  9:40     ` Liu Yu
  0 siblings, 0 replies; 46+ messages in thread
From: Liu Yu @ 2010-09-08  9:40 UTC (permalink / raw)
  To: kvm-u79uwXL29TY76Z2rM5mHXA, kvm-ppc-u79uwXL29TY76Z2rM5mHXA
  Cc: agraf-l3A5Bk7waGM, Liu Yu

It is unnecessary to keep shadow tlb.
first, shadow tlb keep fixed value in shadow, which make things unflexible.
second, remove shadow tlb can save a lot memory.

This patch remove shadow tlb and caculate the shadow tlb entry value
before we write it to hardware.

Also we use new struct tlbe_ref to trace the relation
between guest tlb entry and page.

Signed-off-by: Liu Yu <yu.liu@freescale.com>
---
 arch/powerpc/include/asm/kvm_e500.h |    7 +-
 arch/powerpc/kvm/e500_tlb.c         |  287 +++++++++++++----------------------
 2 files changed, 108 insertions(+), 186 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_e500.h b/arch/powerpc/include/asm/kvm_e500.h
index 7fea26f..cb785f9 100644
--- a/arch/powerpc/include/asm/kvm_e500.h
+++ b/arch/powerpc/include/asm/kvm_e500.h
@@ -29,13 +29,18 @@ struct tlbe{
 	u32 mas7;
 };
 
+struct tlbe_ref {
+	struct page *page;
+	struct tlbe *gtlbe;
+};
+
 struct kvmppc_vcpu_e500 {
 	/* Unmodified copy of the guest's TLB. */
 	struct tlbe *guest_tlb[E500_TLB_NUM];
 	/* TLB that's actually used when the guest is running. */
 	struct tlbe *shadow_tlb[E500_TLB_NUM];
 	/* Pages which are referenced in the shadow TLB. */
-	struct page **shadow_pages[E500_TLB_NUM];
+	struct tlbe_ref *shadow_refs[E500_TLB_NUM];
 
 	unsigned int guest_tlb_size[E500_TLB_NUM];
 	unsigned int shadow_tlb_size[E500_TLB_NUM];
diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index f11ca0f..0b657af 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved.
+ * Copyright (C) 2008, 2010 Freescale Semiconductor, Inc. All rights reserved.
  *
  * Author: Yu Liu, yu.liu@freescale.com
  *
@@ -48,17 +48,6 @@ void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu)
 					tlbe->mas3, tlbe->mas7);
 		}
 	}
-
-	for (tlbsel = 0; tlbsel < 2; tlbsel++) {
-		printk("Shadow TLB%d:\n", tlbsel);
-		for (i = 0; i < vcpu_e500->shadow_tlb_size[tlbsel]; i++) {
-			tlbe = &vcpu_e500->shadow_tlb[tlbsel][i];
-			if (tlbe->mas1 & MAS1_VALID)
-				printk(" S[%d][%3d] |  %08X | %08X | %08X | %08X |\n",
-					tlbsel, i, tlbe->mas1, tlbe->mas2,
-					tlbe->mas3, tlbe->mas7);
-		}
-	}
 }
 
 static inline unsigned int tlb0_get_next_victim(
@@ -121,10 +110,8 @@ static inline void __write_host_tlbe(struct tlbe *stlbe)
 }
 
 static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
-		int tlbsel, int esel)
+		int tlbsel, int esel, struct tlbe *stlbe)
 {
-	struct tlbe *stlbe = &vcpu_e500->shadow_tlb[tlbsel][esel];
-
 	local_irq_disable();
 	if (tlbsel = 0) {
 		__write_host_tlbe(stlbe);
@@ -139,28 +126,12 @@ static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
 		mtspr(SPRN_MAS0, mas0);
 	}
 	local_irq_enable();
+	trace_kvm_stlb_write(index_of(tlbsel, esel), stlbe->mas1, stlbe->mas2,
+			stlbe->mas3, stlbe->mas7);
 }
 
 void kvmppc_e500_tlb_load(struct kvm_vcpu *vcpu, int cpu)
 {
-	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
-	int i;
-	unsigned register mas0;
-
-	/* Load all valid TLB1 entries to reduce guest tlb miss fault */
-	local_irq_disable();
-	mas0 = mfspr(SPRN_MAS0);
-	for (i = 0; i < tlb1_max_shadow_size(); i++) {
-		struct tlbe *stlbe = &vcpu_e500->shadow_tlb[1][i];
-
-		if (get_tlb_v(stlbe)) {
-			mtspr(SPRN_MAS0, MAS0_TLBSEL(1)
-					| MAS0_ESEL(to_htlb1_esel(i)));
-			__write_host_tlbe(stlbe);
-		}
-	}
-	mtspr(SPRN_MAS0, mas0);
-	local_irq_enable();
 }
 
 void kvmppc_e500_tlb_put(struct kvm_vcpu *vcpu)
@@ -202,16 +173,19 @@ static int kvmppc_e500_tlb_index(struct kvmppc_vcpu_e500 *vcpu_e500,
 }
 
 static void kvmppc_e500_shadow_release(struct kvmppc_vcpu_e500 *vcpu_e500,
-		int tlbsel, int esel)
+		int stlbsel, int sesel)
 {
-	struct tlbe *stlbe = &vcpu_e500->shadow_tlb[tlbsel][esel];
-	struct page *page = vcpu_e500->shadow_pages[tlbsel][esel];
+	struct tlbe_ref *ref;
+	struct page *page;
+
+	ref = &vcpu_e500->shadow_refs[stlbsel][sesel];
+	page = ref->page;
 
 	if (page) {
-		vcpu_e500->shadow_pages[tlbsel][esel] = NULL;
+		ref->page = NULL;
 
-		if (get_tlb_v(stlbe)) {
-			if (tlbe_is_writable(stlbe))
+		if (get_tlb_v(ref->gtlbe)) {
+			if (tlbe_is_writable(ref->gtlbe))
 				kvm_release_page_dirty(page);
 			else
 				kvm_release_page_clean(page);
@@ -219,46 +193,19 @@ static void kvmppc_e500_shadow_release(struct kvmppc_vcpu_e500 *vcpu_e500,
 	}
 }
 
-static void kvmppc_e500_stlbe_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500,
-		int tlbsel, int esel)
-{
-	struct tlbe *stlbe = &vcpu_e500->shadow_tlb[tlbsel][esel];
-
-	kvmppc_e500_shadow_release(vcpu_e500, tlbsel, esel);
-	stlbe->mas1 = 0;
-	/* XXX doesn't compile */
-#if 0
-	trace_kvm_stlb_inval(index_of(tlbsel, esel), stlbe->mas1, stlbe->mas2,
-			     stlbe->mas3, stlbe->mas7);
-#endif
-}
-
 static void kvmppc_e500_tlb1_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500,
-		gva_t eaddr, gva_t eend, u32 tid)
+		int esel)
 {
-	unsigned int pid = tid & 0xff;
+	struct tlbe stlbe;
 	unsigned int i;
 
-	/* XXX Replace loop with fancy data structures. */
-	for (i = 0; i < vcpu_e500->guest_tlb_size[1]; i++) {
-		struct tlbe *stlbe = &vcpu_e500->shadow_tlb[1][i];
-		unsigned int tid;
-
-		if (!get_tlb_v(stlbe))
-			continue;
-
-		if (eend < get_tlb_eaddr(stlbe))
-			continue;
-
-		if (eaddr > get_tlb_end(stlbe))
-			continue;
-
-		tid = get_tlb_tid(stlbe);
-		if (tid && (tid != pid))
-			continue;
+	stlbe.mas1 = 0;
+	for (i = 0; i < KVM_E500_TLB1_SIZE; i++) {
+		struct tlbe_ref *ref +			&vcpu_e500->shadow_refs[1][i];
 
-		kvmppc_e500_stlbe_invalidate(vcpu_e500, 1, i);
-		write_host_tlbe(vcpu_e500, 1, i);
+		if (ref->gtlbe = &vcpu_e500->guest_tlb[1][esel])
+			write_host_tlbe(vcpu_e500, 1, i, &stlbe);
 	}
 }
 
@@ -289,14 +236,29 @@ static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu,
 	vcpu_e500->mas7 = 0;
 }
 
+static inline void kvmppc_e500_setup_stlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
+		struct tlbe *gtlbe, struct tlbe_ref *ref,
+		u64 gvaddr, struct tlbe *stlbe)
+{
+	hpa_t hpaddr = page_to_phys(ref->page);
+
+	/* Force TS=1 IPROT=0 TSIZE=4KB for all guest mappings. */
+	stlbe->mas1 = MAS1_TSIZE(BOOK3E_PAGESZ_4K)
+		| MAS1_TID(get_tlb_tid(gtlbe)) | MAS1_TS | MAS1_VALID;
+	stlbe->mas2 = (gvaddr & MAS2_EPN)
+		| e500_shadow_mas2_attrib(gtlbe->mas2,
+				vcpu_e500->vcpu.arch.msr & MSR_PR);
+	stlbe->mas3 = (hpaddr & MAS3_RPN)
+		| e500_shadow_mas3_attrib(gtlbe->mas3,
+				vcpu_e500->vcpu.arch.msr & MSR_PR);
+	stlbe->mas7 = (hpaddr >> 32) & MAS7_RPN;
+}
+
 static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
-	u64 gvaddr, gfn_t gfn, struct tlbe *gtlbe, int tlbsel, int esel)
+	gfn_t gfn, struct tlbe *gtlbe, int stlbsel, int sesel)
 {
 	struct page *new_page;
-	struct tlbe *stlbe;
-	hpa_t hpaddr;
-
-	stlbe = &vcpu_e500->shadow_tlb[tlbsel][esel];
+	struct tlbe_ref *ref;
 
 	/* Get reference to new page. */
 	new_page = gfn_to_page(vcpu_e500->vcpu.kvm, gfn);
@@ -305,39 +267,24 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
 		kvm_release_page_clean(new_page);
 		return;
 	}
-	hpaddr = page_to_phys(new_page);
 
 	/* Drop reference to old page. */
-	kvmppc_e500_shadow_release(vcpu_e500, tlbsel, esel);
-
-	vcpu_e500->shadow_pages[tlbsel][esel] = new_page;
+	kvmppc_e500_shadow_release(vcpu_e500, stlbsel, sesel);
 
-	/* Force TS=1 IPROT=0 TSIZE=4KB for all guest mappings. */
-	stlbe->mas1 = MAS1_TSIZE(BOOK3E_PAGESZ_4K)
-		| MAS1_TID(get_tlb_tid(gtlbe)) | MAS1_TS | MAS1_VALID;
-	stlbe->mas2 = (gvaddr & MAS2_EPN)
-		| e500_shadow_mas2_attrib(gtlbe->mas2,
-				vcpu_e500->vcpu.arch.msr & MSR_PR);
-	stlbe->mas3 = (hpaddr & MAS3_RPN)
-		| e500_shadow_mas3_attrib(gtlbe->mas3,
-				vcpu_e500->vcpu.arch.msr & MSR_PR);
-	stlbe->mas7 = (hpaddr >> 32) & MAS7_RPN;
-
-	trace_kvm_stlb_write(index_of(tlbsel, esel), stlbe->mas1, stlbe->mas2,
-			     stlbe->mas3, stlbe->mas7);
+	ref = &vcpu_e500->shadow_refs[stlbsel][sesel];
+	ref->page = new_page;
+	ref->gtlbe = gtlbe;
 }
 
 /* XXX only map the one-one case, for now use TLB0 */
-static int kvmppc_e500_stlbe_map(struct kvmppc_vcpu_e500 *vcpu_e500,
-		int tlbsel, int esel)
+static int kvmppc_e500_tlb0_map(struct kvmppc_vcpu_e500 *vcpu_e500, int esel)
 {
 	struct tlbe *gtlbe;
 
-	gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel];
+	gtlbe = &vcpu_e500->guest_tlb[0][esel];
 
-	kvmppc_e500_shadow_map(vcpu_e500, get_tlb_eaddr(gtlbe),
-			get_tlb_raddr(gtlbe) >> PAGE_SHIFT,
-			gtlbe, tlbsel, esel);
+	kvmppc_e500_shadow_map(vcpu_e500, get_tlb_raddr(gtlbe) >> PAGE_SHIFT,
+			gtlbe, 0, esel);
 
 	return esel;
 }
@@ -355,7 +302,7 @@ static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500,
 	if (unlikely(vcpu_e500->guest_tlb_nv[1] >= tlb1_max_shadow_size()))
 		vcpu_e500->guest_tlb_nv[1] = 0;
 
-	kvmppc_e500_shadow_map(vcpu_e500, gvaddr, gfn, gtlbe, 1, victim);
+	kvmppc_e500_shadow_map(vcpu_e500, gfn, gtlbe, 1, victim);
 
 	return victim;
 }
@@ -366,33 +313,19 @@ static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500,
 void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode)
 {
 	if (usermode) {
-		struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
-		int i;
-
-		/* XXX Replace loop with fancy data structures. */
-		for (i = 0; i < tlb1_max_shadow_size(); i++)
-			kvmppc_e500_stlbe_invalidate(vcpu_e500, 1, i);
-
 		_tlbil_all();
 	}
 }
 
-static int kvmppc_e500_gtlbe_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500,
-		int tlbsel, int esel)
+static inline int kvmppc_e500_gtlbe_invalidate(
+				struct kvmppc_vcpu_e500 *vcpu_e500,
+				int tlbsel, int esel)
 {
 	struct tlbe *gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel];
 
 	if (unlikely(get_tlb_iprot(gtlbe)))
 		return -1;
 
-	if (tlbsel = 1) {
-		kvmppc_e500_tlb1_invalidate(vcpu_e500, get_tlb_eaddr(gtlbe),
-				get_tlb_end(gtlbe),
-				get_tlb_tid(gtlbe));
-	} else {
-		kvmppc_e500_stlbe_invalidate(vcpu_e500, tlbsel, esel);
-	}
-
 	gtlbe->mas1 = 0;
 
 	return 0;
@@ -515,23 +448,16 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb)
 int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
 {
 	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
-	u64 eaddr;
-	u64 raddr;
-	u32 tid;
 	struct tlbe *gtlbe;
-	int tlbsel, esel, stlbsel, sesel;
+	int tlbsel, esel;
 
 	tlbsel = get_tlb_tlbsel(vcpu_e500);
 	esel = get_tlb_esel(vcpu_e500, tlbsel);
 
 	gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel];
 
-	if (get_tlb_v(gtlbe) && tlbsel = 1) {
-		eaddr = get_tlb_eaddr(gtlbe);
-		tid = get_tlb_tid(gtlbe);
-		kvmppc_e500_tlb1_invalidate(vcpu_e500, eaddr,
-				get_tlb_end(gtlbe), tid);
-	}
+	if (get_tlb_v(gtlbe) && tlbsel = 1)
+		kvmppc_e500_tlb1_invalidate(vcpu_e500, esel);
 
 	gtlbe->mas1 = vcpu_e500->mas1;
 	gtlbe->mas2 = vcpu_e500->mas2;
@@ -543,35 +469,37 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
 
 	/* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */
 	if (tlbe_is_host_safe(vcpu, gtlbe)) {
+		struct tlbe stlbe;
+		int stlbsel, sesel;
+		struct tlbe_ref *ref;
+		u64 eaddr;
+
 		switch (tlbsel) {
 		case 0:
 			/* TLB0 */
 			gtlbe->mas1 &= ~MAS1_TSIZE(~0);
 			gtlbe->mas1 |= MAS1_TSIZE(BOOK3E_PAGESZ_4K);
 
+			eaddr =  get_tlb_eaddr(gtlbe);
 			stlbsel = 0;
-			sesel = kvmppc_e500_stlbe_map(vcpu_e500, 0, esel);
+			sesel = kvmppc_e500_tlb0_map(vcpu_e500, esel);
+
+			ref = &vcpu_e500->shadow_refs[stlbsel][sesel];
+
+			kvmppc_e500_setup_stlbe(vcpu_e500, gtlbe,
+					        ref, eaddr, &stlbe);
+			write_host_tlbe(vcpu_e500, stlbsel, sesel, &stlbe);
 
 			break;
 
 		case 1:
-			/* TLB1 */
-			eaddr = get_tlb_eaddr(gtlbe);
-			raddr = get_tlb_raddr(gtlbe);
-
-			/* Create a 4KB mapping on the host.
-			 * If the guest wanted a large page,
-			 * only the first 4KB is mapped here and the rest
-			 * are mapped on the fly. */
-			stlbsel = 1;
-			sesel = kvmppc_e500_tlb1_map(vcpu_e500, eaddr,
-					raddr >> PAGE_SHIFT, gtlbe);
+			/* Large page breaks into 4KB pages.
+			 * And all these 4KB pages will be mapped on the fly. */
 			break;
 
 		default:
 			BUG();
 		}
-		write_host_tlbe(vcpu_e500, stlbsel, sesel);
 	}
 
 	return EMULATE_DONE;
@@ -618,25 +546,20 @@ gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int index,
 
 void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
 {
-	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
-	int tlbsel, i;
-
-	for (tlbsel = 0; tlbsel < 2; tlbsel++)
-		for (i = 0; i < vcpu_e500->guest_tlb_size[tlbsel]; i++)
-			kvmppc_e500_shadow_release(vcpu_e500, tlbsel, i);
-
-	/* discard all guest mapping */
-	_tlbil_all();
 }
 
 void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr,
 			unsigned int index)
 {
 	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+	struct tlbe_ref *ref;
+	struct tlbe *gtlbe, stlbe;
 	int tlbsel = tlbsel_of(index);
 	int esel = esel_of(index);
 	int stlbsel, sesel;
 
+	gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel];
+
 	switch (tlbsel) {
 	case 0:
 		stlbsel = 0;
@@ -645,8 +568,6 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr,
 
 	case 1: {
 		gfn_t gfn = gpaddr >> PAGE_SHIFT;
-		struct tlbe *gtlbe
-			= &vcpu_e500->guest_tlb[tlbsel][esel];
 
 		stlbsel = 1;
 		sesel = kvmppc_e500_tlb1_map(vcpu_e500, eaddr, gfn, gtlbe);
@@ -657,7 +578,11 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr,
 		BUG();
 		break;
 	}
-	write_host_tlbe(vcpu_e500, stlbsel, sesel);
+
+	ref = &vcpu_e500->shadow_refs[stlbsel][sesel];
+
+	kvmppc_e500_setup_stlbe(vcpu_e500, gtlbe, ref, eaddr, &stlbe);
+	write_host_tlbe(vcpu_e500, stlbsel, sesel, &stlbe);
 }
 
 int kvmppc_e500_tlb_search(struct kvm_vcpu *vcpu,
@@ -704,33 +629,21 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
 	if (vcpu_e500->guest_tlb[0] = NULL)
 		goto err_out;
 
-	vcpu_e500->shadow_tlb_size[0] = KVM_E500_TLB0_SIZE;
-	vcpu_e500->shadow_tlb[0] -		kzalloc(sizeof(struct tlbe) * KVM_E500_TLB0_SIZE, GFP_KERNEL);
-	if (vcpu_e500->shadow_tlb[0] = NULL)
-		goto err_out_guest0;
-
 	vcpu_e500->guest_tlb_size[1] = KVM_E500_TLB1_SIZE;
 	vcpu_e500->guest_tlb[1]  		kzalloc(sizeof(struct tlbe) * KVM_E500_TLB1_SIZE, GFP_KERNEL);
 	if (vcpu_e500->guest_tlb[1] = NULL)
-		goto err_out_shadow0;
+		goto err_out_guest0;
 
-	vcpu_e500->shadow_tlb_size[1] = tlb1_entry_num;
-	vcpu_e500->shadow_tlb[1] -		kzalloc(sizeof(struct tlbe) * tlb1_entry_num, GFP_KERNEL);
-	if (vcpu_e500->shadow_tlb[1] = NULL)
+	vcpu_e500->shadow_refs[0] = (struct tlbe_ref *)
+		kzalloc(sizeof(struct tlbe_ref) * KVM_E500_TLB0_SIZE, GFP_KERNEL);
+	if (vcpu_e500->shadow_refs[0] = NULL)
 		goto err_out_guest1;
 
-	vcpu_e500->shadow_pages[0] = (struct page **)
-		kzalloc(sizeof(struct page *) * KVM_E500_TLB0_SIZE, GFP_KERNEL);
-	if (vcpu_e500->shadow_pages[0] = NULL)
-		goto err_out_shadow1;
-
-	vcpu_e500->shadow_pages[1] = (struct page **)
-		kzalloc(sizeof(struct page *) * tlb1_entry_num, GFP_KERNEL);
-	if (vcpu_e500->shadow_pages[1] = NULL)
-		goto err_out_page0;
+	vcpu_e500->shadow_refs[1] = (struct tlbe_ref *)
+		kzalloc(sizeof(struct tlbe_ref) * KVM_E500_TLB1_SIZE, GFP_KERNEL);
+	if (vcpu_e500->shadow_refs[1] = NULL)
+		goto err_out_ref0;
 
 	/* Init TLB configuration register */
 	vcpu_e500->tlb0cfg = mfspr(SPRN_TLB0CFG) & ~0xfffUL;
@@ -740,14 +653,10 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
 
 	return 0;
 
-err_out_page0:
-	kfree(vcpu_e500->shadow_pages[0]);
-err_out_shadow1:
-	kfree(vcpu_e500->shadow_tlb[1]);
+err_out_ref0:
+	kfree(vcpu_e500->shadow_refs[0]);
 err_out_guest1:
 	kfree(vcpu_e500->guest_tlb[1]);
-err_out_shadow0:
-	kfree(vcpu_e500->shadow_tlb[0]);
 err_out_guest0:
 	kfree(vcpu_e500->guest_tlb[0]);
 err_out:
@@ -756,10 +665,18 @@ err_out:
 
 void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500)
 {
-	kfree(vcpu_e500->shadow_pages[1]);
-	kfree(vcpu_e500->shadow_pages[0]);
-	kfree(vcpu_e500->shadow_tlb[1]);
+	int stlbsel, i;
+
+	/* release all pages */
+	for (stlbsel = 0; stlbsel < 2; stlbsel++)
+		for (i = 0; i < vcpu_e500->guest_tlb_size[stlbsel]; i++)
+			kvmppc_e500_shadow_release(vcpu_e500, stlbsel, i);
+
+	/* discard all guest mapping */
+	_tlbil_all();
+
+	kfree(vcpu_e500->shadow_refs[1]);
+	kfree(vcpu_e500->shadow_refs[0]);
 	kfree(vcpu_e500->guest_tlb[1]);
-	kfree(vcpu_e500->shadow_tlb[0]);
 	kfree(vcpu_e500->guest_tlb[0]);
 }
-- 
1.6.4



^ permalink raw reply related	[flat|nested] 46+ messages in thread

* [PATCH 2/2] kvm/e500v2: mapping guest TLB1 to host TLB0
  2010-09-08  9:40     ` Liu Yu
@ 2010-09-08  9:40       ` Liu Yu
  -1 siblings, 0 replies; 46+ messages in thread
From: Liu Yu @ 2010-09-08  9:40 UTC (permalink / raw)
  To: kvm, kvm-ppc; +Cc: agraf, Liu Yu

Current guest TLB1 is mapped to host TLB1.
As host kernel only provides 4K uncontinuous pages,
we have to break guest large mapping into 4K shadow mappings.
These 4K shadow mappings are then mapped into host TLB1 on fly.
As host TLB1 only has 13 free entries, there's serious tlb miss.

Since e500v2 has a big number of TLB0 entries,
it should be help to map those 4K shadow mappings to host TLB0.
To achieve this, we need to unlink guest tlb and host tlb,
So that guest TLB1 mappings can route to any host TLB0 entries freely.

Pages/mappings are considerred in the same kind as host tlb entry.
This patch remove the link between pages and guest tlb entry to do the unlink.
And keep host_tlb0_ref in each vcpu to trace pages.
Then it's easy to map guest TLB1 to host TLB0.

In guest ramdisk boot test(guest mainly uses TLB1),
with this patch, the tlb miss number get down 90%.

Signed-off-by: Liu Yu <yu.liu@freescale.com>
---
 arch/powerpc/include/asm/kvm_e500.h |    7 +-
 arch/powerpc/kvm/e500.c             |    4 +
 arch/powerpc/kvm/e500_tlb.c         |  280 ++++++++++++-----------------------
 arch/powerpc/kvm/e500_tlb.h         |    1 +
 4 files changed, 104 insertions(+), 188 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_e500.h b/arch/powerpc/include/asm/kvm_e500.h
index cb785f9..16c0ed0 100644
--- a/arch/powerpc/include/asm/kvm_e500.h
+++ b/arch/powerpc/include/asm/kvm_e500.h
@@ -37,13 +37,10 @@ struct tlbe_ref {
 struct kvmppc_vcpu_e500 {
 	/* Unmodified copy of the guest's TLB. */
 	struct tlbe *guest_tlb[E500_TLB_NUM];
-	/* TLB that's actually used when the guest is running. */
-	struct tlbe *shadow_tlb[E500_TLB_NUM];
-	/* Pages which are referenced in the shadow TLB. */
-	struct tlbe_ref *shadow_refs[E500_TLB_NUM];
+	/* Pages which are referenced in host TLB. */
+	struct tlbe_ref *host_tlb0_ref;
 
 	unsigned int guest_tlb_size[E500_TLB_NUM];
-	unsigned int shadow_tlb_size[E500_TLB_NUM];
 	unsigned int guest_tlb_nv[E500_TLB_NUM];
 
 	u32 host_pid[E500_PID_NUM];
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
index e8a00b0..14af6d7 100644
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c
@@ -146,6 +146,10 @@ static int __init kvmppc_e500_init(void)
 	if (r)
 		return r;
 
+	r = kvmppc_e500_mmu_init();
+	if (r)
+		return r;
+
 	/* copy extra E500 exception handlers */
 	ivor[0] = mfspr(SPRN_IVOR32);
 	ivor[1] = mfspr(SPRN_IVOR33);
diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index 0b657af..a6c2320 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -25,9 +25,15 @@
 #include "e500_tlb.h"
 #include "trace.h"
 
-#define to_htlb1_esel(esel) (tlb1_entry_num - (esel) - 1)
+static unsigned int host_tlb0_entry_num;
+static unsigned int host_tlb0_assoc;
+static unsigned int host_tlb0_assoc_bit;
 
-static unsigned int tlb1_entry_num;
+static inline unsigned int get_tlb0_entry_offset(u32 eaddr, u32 esel)
+{
+	return ((eaddr & 0x7F000) >> (12 - host_tlb0_assoc_bit) |
+			(esel & (host_tlb0_assoc - 1)));
+}
 
 void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu)
 {
@@ -62,11 +68,6 @@ static inline unsigned int tlb0_get_next_victim(
 	return victim;
 }
 
-static inline unsigned int tlb1_max_shadow_size(void)
-{
-	return tlb1_entry_num - tlbcam_index;
-}
-
 static inline int tlbe_is_writable(struct tlbe *tlbe)
 {
 	return tlbe->mas3 & (MAS3_SW|MAS3_UW);
@@ -100,7 +101,7 @@ static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode)
 /*
  * writing shadow tlb entry to host TLB
  */
-static inline void __write_host_tlbe(struct tlbe *stlbe)
+static inline void __host_tlbe_write(struct tlbe *stlbe)
 {
 	mtspr(SPRN_MAS1, stlbe->mas1);
 	mtspr(SPRN_MAS2, stlbe->mas2);
@@ -109,25 +110,22 @@ static inline void __write_host_tlbe(struct tlbe *stlbe)
 	__asm__ __volatile__ ("tlbwe\n" : : );
 }
 
-static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
-		int tlbsel, int esel, struct tlbe *stlbe)
+static inline u32 host_tlb0_write(struct kvmppc_vcpu_e500 *vcpu_e500,
+                                   u32 gvaddr, struct tlbe *stlbe)
 {
-	local_irq_disable();
-	if (tlbsel == 0) {
-		__write_host_tlbe(stlbe);
-	} else {
-		unsigned register mas0;
+	unsigned register mas0;
 
-		mas0 = mfspr(SPRN_MAS0);
+	local_irq_disable();
 
-		mtspr(SPRN_MAS0, MAS0_TLBSEL(1) | MAS0_ESEL(to_htlb1_esel(esel)));
-		__write_host_tlbe(stlbe);
+	mas0 = mfspr(SPRN_MAS0);
+	__host_tlbe_write(stlbe);
 
-		mtspr(SPRN_MAS0, mas0);
-	}
 	local_irq_enable();
-	trace_kvm_stlb_write(index_of(tlbsel, esel), stlbe->mas1, stlbe->mas2,
+
+	trace_kvm_stlb_write(mas0, stlbe->mas1, stlbe->mas2,
 			stlbe->mas3, stlbe->mas7);
+
+	return (mas0 & 0x0FFF0000) >> 16;
 }
 
 void kvmppc_e500_tlb_load(struct kvm_vcpu *vcpu, int cpu)
@@ -172,41 +170,17 @@ static int kvmppc_e500_tlb_index(struct kvmppc_vcpu_e500 *vcpu_e500,
 	return -1;
 }
 
-static void kvmppc_e500_shadow_release(struct kvmppc_vcpu_e500 *vcpu_e500,
-		int stlbsel, int sesel)
+static inline void kvmppc_e500_ref_release(struct tlbe_ref *ref)
 {
-	struct tlbe_ref *ref;
-	struct page *page;
-
-	ref = &vcpu_e500->shadow_refs[stlbsel][sesel];
-	page = ref->page;
-
-	if (page) {
-		ref->page = NULL;
+	struct page *pg = ref->page;
 
-		if (get_tlb_v(ref->gtlbe)) {
-			if (tlbe_is_writable(ref->gtlbe))
-				kvm_release_page_dirty(page);
-			else
-				kvm_release_page_clean(page);
-		}
+	if (pg) {
+		if (get_tlb_v(ref->gtlbe) && tlbe_is_writable(ref->gtlbe))
+			kvm_release_page_dirty(pg);
+		else
+			kvm_release_page_clean(pg);
 	}
-}
 
-static void kvmppc_e500_tlb1_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500,
-		int esel)
-{
-	struct tlbe stlbe;
-	unsigned int i;
-
-	stlbe.mas1 = 0;
-	for (i = 0; i < KVM_E500_TLB1_SIZE; i++) {
-		struct tlbe_ref *ref =
-			&vcpu_e500->shadow_refs[1][i];
-
-		if (ref->gtlbe == &vcpu_e500->guest_tlb[1][esel])
-			write_host_tlbe(vcpu_e500, 1, i, &stlbe);
-	}
 }
 
 static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu,
@@ -236,77 +210,6 @@ static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu,
 	vcpu_e500->mas7 = 0;
 }
 
-static inline void kvmppc_e500_setup_stlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
-		struct tlbe *gtlbe, struct tlbe_ref *ref,
-		u64 gvaddr, struct tlbe *stlbe)
-{
-	hpa_t hpaddr = page_to_phys(ref->page);
-
-	/* Force TS=1 IPROT=0 TSIZE=4KB for all guest mappings. */
-	stlbe->mas1 = MAS1_TSIZE(BOOK3E_PAGESZ_4K)
-		| MAS1_TID(get_tlb_tid(gtlbe)) | MAS1_TS | MAS1_VALID;
-	stlbe->mas2 = (gvaddr & MAS2_EPN)
-		| e500_shadow_mas2_attrib(gtlbe->mas2,
-				vcpu_e500->vcpu.arch.msr & MSR_PR);
-	stlbe->mas3 = (hpaddr & MAS3_RPN)
-		| e500_shadow_mas3_attrib(gtlbe->mas3,
-				vcpu_e500->vcpu.arch.msr & MSR_PR);
-	stlbe->mas7 = (hpaddr >> 32) & MAS7_RPN;
-}
-
-static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
-	gfn_t gfn, struct tlbe *gtlbe, int stlbsel, int sesel)
-{
-	struct page *new_page;
-	struct tlbe_ref *ref;
-
-	/* Get reference to new page. */
-	new_page = gfn_to_page(vcpu_e500->vcpu.kvm, gfn);
-	if (is_error_page(new_page)) {
-		printk(KERN_ERR "Couldn't get guest page for gfn %lx!\n", gfn);
-		kvm_release_page_clean(new_page);
-		return;
-	}
-
-	/* Drop reference to old page. */
-	kvmppc_e500_shadow_release(vcpu_e500, stlbsel, sesel);
-
-	ref = &vcpu_e500->shadow_refs[stlbsel][sesel];
-	ref->page = new_page;
-	ref->gtlbe = gtlbe;
-}
-
-/* XXX only map the one-one case, for now use TLB0 */
-static int kvmppc_e500_tlb0_map(struct kvmppc_vcpu_e500 *vcpu_e500, int esel)
-{
-	struct tlbe *gtlbe;
-
-	gtlbe = &vcpu_e500->guest_tlb[0][esel];
-
-	kvmppc_e500_shadow_map(vcpu_e500, get_tlb_raddr(gtlbe) >> PAGE_SHIFT,
-			gtlbe, 0, esel);
-
-	return esel;
-}
-
-/* Caller must ensure that the specified guest TLB entry is safe to insert into
- * the shadow TLB. */
-/* XXX for both one-one and one-to-many , for now use TLB1 */
-static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500,
-		u64 gvaddr, gfn_t gfn, struct tlbe *gtlbe)
-{
-	unsigned int victim;
-
-	victim = vcpu_e500->guest_tlb_nv[1]++;
-
-	if (unlikely(vcpu_e500->guest_tlb_nv[1] >= tlb1_max_shadow_size()))
-		vcpu_e500->guest_tlb_nv[1] = 0;
-
-	kvmppc_e500_shadow_map(vcpu_e500, gfn, gtlbe, 1, victim);
-
-	return victim;
-}
-
 /* Invalidate all guest kernel mappings when enter usermode,
  * so that when they fault back in they will get the
  * proper permission bits. */
@@ -445,6 +348,52 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb)
 	return EMULATE_DONE;
 }
 
+static int kvmppc_e500_mmu_map_tlb0(struct kvmppc_vcpu_e500 *vcpu_e500,
+                                    struct tlbe *gtlbe,
+				    gfn_t gfn,
+				    u64 gvaddr)
+{
+	struct page *new_page;
+	struct tlbe_ref *ref;
+	u32 esel;
+	hpa_t hpaddr;
+	struct tlbe stlbe;
+
+	/* Find related page. */
+	new_page = gfn_to_page(vcpu_e500->vcpu.kvm, gfn);
+	if (is_error_page(new_page)) {
+		printk(KERN_ERR "Couldn't get guest page for gfn %llx!\n", gfn);
+		kvm_release_page_clean(new_page);
+		return -1;
+	}
+
+	hpaddr = page_to_phys(new_page);
+
+	/* Setup the shadow tlb entry */
+	/* Force TS=1 IPROT=0 TSIZE=4KB for all mappings go to TLB0. */
+	stlbe.mas1 = MAS1_TSIZE(BOOK3E_PAGESZ_4K)
+		| MAS1_TID(get_tlb_tid(gtlbe)) | MAS1_TS | MAS1_VALID;
+	stlbe.mas2 = (gvaddr & MAS2_EPN)
+		| e500_shadow_mas2_attrib(gtlbe->mas2,
+				vcpu_e500->vcpu.arch.msr & MSR_PR);
+	stlbe.mas3 = (hpaddr & MAS3_RPN)
+		| e500_shadow_mas3_attrib(gtlbe->mas3,
+				vcpu_e500->vcpu.arch.msr & MSR_PR);
+	stlbe.mas7 = (hpaddr >> 32) & MAS7_RPN;
+
+	esel = host_tlb0_write(vcpu_e500, gvaddr, &stlbe);
+
+	ref = &vcpu_e500->host_tlb0_ref[get_tlb0_entry_offset(gvaddr, esel)];
+
+	/* Release old page and setup new page */
+	kvmppc_e500_ref_release(ref);
+
+	ref->page = new_page;
+	ref->gtlbe = gtlbe;
+
+	return 0;
+}
+
 int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
 {
 	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
@@ -457,7 +406,7 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
 	gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel];
 
 	if (get_tlb_v(gtlbe) && tlbsel == 1)
-		kvmppc_e500_tlb1_invalidate(vcpu_e500, esel);
+		_tlbil_all();
 
 	gtlbe->mas1 = vcpu_e500->mas1;
 	gtlbe->mas2 = vcpu_e500->mas2;
@@ -469,27 +418,15 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
 
 	/* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */
 	if (tlbe_is_host_safe(vcpu, gtlbe)) {
-		struct tlbe stlbe;
-		int stlbsel, sesel;
-		struct tlbe_ref *ref;
-		u64 eaddr;
-
 		switch (tlbsel) {
 		case 0:
 			/* TLB0 */
 			gtlbe->mas1 &= ~MAS1_TSIZE(~0);
 			gtlbe->mas1 |= MAS1_TSIZE(BOOK3E_PAGESZ_4K);
 
-			eaddr =  get_tlb_eaddr(gtlbe);
-			stlbsel = 0;
-			sesel = kvmppc_e500_tlb0_map(vcpu_e500, esel);
-
-			ref = &vcpu_e500->shadow_refs[stlbsel][sesel];
-
-			kvmppc_e500_setup_stlbe(vcpu_e500, gtlbe,
-					        ref, eaddr, &stlbe);
-			write_host_tlbe(vcpu_e500, stlbsel, sesel, &stlbe);
-
+			kvmppc_e500_mmu_map_tlb0(vcpu_e500, gtlbe,
+                                    get_tlb_raddr(gtlbe) >> PAGE_SHIFT,
+                                    get_tlb_eaddr(gtlbe));
 			break;
 
 		case 1:
@@ -552,37 +489,15 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr,
 			unsigned int index)
 {
 	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
-	struct tlbe_ref *ref;
-	struct tlbe *gtlbe, stlbe;
+	struct tlbe *gtlbe;
 	int tlbsel = tlbsel_of(index);
 	int esel = esel_of(index);
-	int stlbsel, sesel;
+	gfn_t gfn;
 
 	gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel];
+	gfn = gpaddr >> PAGE_SHIFT;
 
-	switch (tlbsel) {
-	case 0:
-		stlbsel = 0;
-		sesel = esel;
-		break;
-
-	case 1: {
-		gfn_t gfn = gpaddr >> PAGE_SHIFT;
-
-		stlbsel = 1;
-		sesel = kvmppc_e500_tlb1_map(vcpu_e500, eaddr, gfn, gtlbe);
-		break;
-	}
-
-	default:
-		BUG();
-		break;
-	}
-
-	ref = &vcpu_e500->shadow_refs[stlbsel][sesel];
-
-	kvmppc_e500_setup_stlbe(vcpu_e500, gtlbe, ref, eaddr, &stlbe);
-	write_host_tlbe(vcpu_e500, stlbsel, sesel, &stlbe);
+	kvmppc_e500_mmu_map_tlb0(vcpu_e500, gtlbe, gfn, eaddr);
 }
 
 int kvmppc_e500_tlb_search(struct kvm_vcpu *vcpu,
@@ -621,8 +536,6 @@ void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500)
 
 int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
 {
-	tlb1_entry_num = mfspr(SPRN_TLB1CFG) & 0xFFF;
-
 	vcpu_e500->guest_tlb_size[0] = KVM_E500_TLB0_SIZE;
 	vcpu_e500->guest_tlb[0] =
 		kzalloc(sizeof(struct tlbe) * KVM_E500_TLB0_SIZE, GFP_KERNEL);
@@ -635,16 +548,10 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
 	if (vcpu_e500->guest_tlb[1] == NULL)
 		goto err_out_guest0;
 
-	vcpu_e500->shadow_refs[0] = (struct tlbe_ref *)
-		kzalloc(sizeof(struct tlbe_ref) * KVM_E500_TLB0_SIZE, GFP_KERNEL);
-	if (vcpu_e500->shadow_refs[0] == NULL)
+	vcpu_e500->host_tlb0_ref = kzalloc(sizeof(struct tlbe_ref) * host_tlb0_entry_num, GFP_KERNEL);
+	if (vcpu_e500->host_tlb0_ref == NULL)
 		goto err_out_guest1;
 
-	vcpu_e500->shadow_refs[1] = (struct tlbe_ref *)
-		kzalloc(sizeof(struct tlbe_ref) * KVM_E500_TLB1_SIZE, GFP_KERNEL);
-	if (vcpu_e500->shadow_refs[1] == NULL)
-		goto err_out_ref0;
-
 	/* Init TLB configuration register */
 	vcpu_e500->tlb0cfg = mfspr(SPRN_TLB0CFG) & ~0xfffUL;
 	vcpu_e500->tlb0cfg |= vcpu_e500->guest_tlb_size[0];
@@ -653,8 +560,6 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
 
 	return 0;
 
-err_out_ref0:
-	kfree(vcpu_e500->shadow_refs[0]);
 err_out_guest1:
 	kfree(vcpu_e500->guest_tlb[1]);
 err_out_guest0:
@@ -665,18 +570,27 @@ err_out:
 
 void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500)
 {
-	int stlbsel, i;
+	int i;
 
 	/* release all pages */
-	for (stlbsel = 0; stlbsel < 2; stlbsel++)
-		for (i = 0; i < vcpu_e500->guest_tlb_size[stlbsel]; i++)
-			kvmppc_e500_shadow_release(vcpu_e500, stlbsel, i);
+	for (i = 0; i < host_tlb0_entry_num; i++) {
+		struct tlbe_ref *ref = &vcpu_e500->host_tlb0_ref[i];
+
+		kvmppc_e500_ref_release(ref);
+	}
 
 	/* discard all guest mapping */
 	_tlbil_all();
 
-	kfree(vcpu_e500->shadow_refs[1]);
-	kfree(vcpu_e500->shadow_refs[0]);
 	kfree(vcpu_e500->guest_tlb[1]);
 	kfree(vcpu_e500->guest_tlb[0]);
 }
+
+int kvmppc_e500_mmu_init(void)
+{
+	host_tlb0_entry_num = mfspr(SPRN_TLB0CFG) & 0xFFF;
+	host_tlb0_assoc = (mfspr(SPRN_TLB0CFG) >> 24) & 0xFF;
+	for (; host_tlb0_assoc >> (host_tlb0_assoc_bit + 1); host_tlb0_assoc_bit++);
+
+	return 0;
+}
diff --git a/arch/powerpc/kvm/e500_tlb.h b/arch/powerpc/kvm/e500_tlb.h
index d28e301..851e523 100644
--- a/arch/powerpc/kvm/e500_tlb.h
+++ b/arch/powerpc/kvm/e500_tlb.h
@@ -55,6 +55,7 @@ extern void kvmppc_e500_tlb_load(struct kvm_vcpu *, int);
 extern int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *);
 extern void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *);
 extern void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *);
+extern int kvmppc_e500_mmu_init(void);
 
 /* TLB helper functions */
 static inline unsigned int get_tlb_size(const struct tlbe *tlbe)
-- 
1.6.4



^ permalink raw reply related	[flat|nested] 46+ messages in thread

* [PATCH 2/2] kvm/e500v2: mapping guest TLB1 to host TLB0
@ 2010-09-08  9:40       ` Liu Yu
  0 siblings, 0 replies; 46+ messages in thread
From: Liu Yu @ 2010-09-08  9:40 UTC (permalink / raw)
  To: kvm, kvm-ppc; +Cc: agraf, Liu Yu

Current guest TLB1 is mapped to host TLB1.
As host kernel only provides 4K uncontinuous pages,
we have to break guest large mapping into 4K shadow mappings.
These 4K shadow mappings are then mapped into host TLB1 on fly.
As host TLB1 only has 13 free entries, there's serious tlb miss.

Since e500v2 has a big number of TLB0 entries,
it should be help to map those 4K shadow mappings to host TLB0.
To achieve this, we need to unlink guest tlb and host tlb,
So that guest TLB1 mappings can route to any host TLB0 entries freely.

Pages/mappings are considerred in the same kind as host tlb entry.
This patch remove the link between pages and guest tlb entry to do the unlink.
And keep host_tlb0_ref in each vcpu to trace pages.
Then it's easy to map guest TLB1 to host TLB0.

In guest ramdisk boot test(guest mainly uses TLB1),
with this patch, the tlb miss number get down 90%.

Signed-off-by: Liu Yu <yu.liu@freescale.com>
---
 arch/powerpc/include/asm/kvm_e500.h |    7 +-
 arch/powerpc/kvm/e500.c             |    4 +
 arch/powerpc/kvm/e500_tlb.c         |  280 ++++++++++++-----------------------
 arch/powerpc/kvm/e500_tlb.h         |    1 +
 4 files changed, 104 insertions(+), 188 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_e500.h b/arch/powerpc/include/asm/kvm_e500.h
index cb785f9..16c0ed0 100644
--- a/arch/powerpc/include/asm/kvm_e500.h
+++ b/arch/powerpc/include/asm/kvm_e500.h
@@ -37,13 +37,10 @@ struct tlbe_ref {
 struct kvmppc_vcpu_e500 {
 	/* Unmodified copy of the guest's TLB. */
 	struct tlbe *guest_tlb[E500_TLB_NUM];
-	/* TLB that's actually used when the guest is running. */
-	struct tlbe *shadow_tlb[E500_TLB_NUM];
-	/* Pages which are referenced in the shadow TLB. */
-	struct tlbe_ref *shadow_refs[E500_TLB_NUM];
+	/* Pages which are referenced in host TLB. */
+	struct tlbe_ref *host_tlb0_ref;
 
 	unsigned int guest_tlb_size[E500_TLB_NUM];
-	unsigned int shadow_tlb_size[E500_TLB_NUM];
 	unsigned int guest_tlb_nv[E500_TLB_NUM];
 
 	u32 host_pid[E500_PID_NUM];
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
index e8a00b0..14af6d7 100644
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c
@@ -146,6 +146,10 @@ static int __init kvmppc_e500_init(void)
 	if (r)
 		return r;
 
+	r = kvmppc_e500_mmu_init();
+	if (r)
+		return r;
+
 	/* copy extra E500 exception handlers */
 	ivor[0] = mfspr(SPRN_IVOR32);
 	ivor[1] = mfspr(SPRN_IVOR33);
diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index 0b657af..a6c2320 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -25,9 +25,15 @@
 #include "e500_tlb.h"
 #include "trace.h"
 
-#define to_htlb1_esel(esel) (tlb1_entry_num - (esel) - 1)
+static unsigned int host_tlb0_entry_num;
+static unsigned int host_tlb0_assoc;
+static unsigned int host_tlb0_assoc_bit;
 
-static unsigned int tlb1_entry_num;
+static inline unsigned int get_tlb0_entry_offset(u32 eaddr, u32 esel)
+{
+	return ((eaddr & 0x7F000) >> (12 - host_tlb0_assoc_bit) |
+			(esel & (host_tlb0_assoc - 1)));
+}
 
 void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu)
 {
@@ -62,11 +68,6 @@ static inline unsigned int tlb0_get_next_victim(
 	return victim;
 }
 
-static inline unsigned int tlb1_max_shadow_size(void)
-{
-	return tlb1_entry_num - tlbcam_index;
-}
-
 static inline int tlbe_is_writable(struct tlbe *tlbe)
 {
 	return tlbe->mas3 & (MAS3_SW|MAS3_UW);
@@ -100,7 +101,7 @@ static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode)
 /*
  * writing shadow tlb entry to host TLB
  */
-static inline void __write_host_tlbe(struct tlbe *stlbe)
+static inline void __host_tlbe_write(struct tlbe *stlbe)
 {
 	mtspr(SPRN_MAS1, stlbe->mas1);
 	mtspr(SPRN_MAS2, stlbe->mas2);
@@ -109,25 +110,22 @@ static inline void __write_host_tlbe(struct tlbe *stlbe)
 	__asm__ __volatile__ ("tlbwe\n" : : );
 }
 
-static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
-		int tlbsel, int esel, struct tlbe *stlbe)
+static inline u32 host_tlb0_write(struct kvmppc_vcpu_e500 *vcpu_e500,
+                                   u32 gvaddr, struct tlbe *stlbe)
 {
-	local_irq_disable();
-	if (tlbsel = 0) {
-		__write_host_tlbe(stlbe);
-	} else {
-		unsigned register mas0;
+	unsigned register mas0;
 
-		mas0 = mfspr(SPRN_MAS0);
+	local_irq_disable();
 
-		mtspr(SPRN_MAS0, MAS0_TLBSEL(1) | MAS0_ESEL(to_htlb1_esel(esel)));
-		__write_host_tlbe(stlbe);
+	mas0 = mfspr(SPRN_MAS0);
+	__host_tlbe_write(stlbe);
 
-		mtspr(SPRN_MAS0, mas0);
-	}
 	local_irq_enable();
-	trace_kvm_stlb_write(index_of(tlbsel, esel), stlbe->mas1, stlbe->mas2,
+
+	trace_kvm_stlb_write(mas0, stlbe->mas1, stlbe->mas2,
 			stlbe->mas3, stlbe->mas7);
+
+	return (mas0 & 0x0FFF0000) >> 16;
 }
 
 void kvmppc_e500_tlb_load(struct kvm_vcpu *vcpu, int cpu)
@@ -172,41 +170,17 @@ static int kvmppc_e500_tlb_index(struct kvmppc_vcpu_e500 *vcpu_e500,
 	return -1;
 }
 
-static void kvmppc_e500_shadow_release(struct kvmppc_vcpu_e500 *vcpu_e500,
-		int stlbsel, int sesel)
+static inline void kvmppc_e500_ref_release(struct tlbe_ref *ref)
 {
-	struct tlbe_ref *ref;
-	struct page *page;
-
-	ref = &vcpu_e500->shadow_refs[stlbsel][sesel];
-	page = ref->page;
-
-	if (page) {
-		ref->page = NULL;
+	struct page *pg = ref->page;
 
-		if (get_tlb_v(ref->gtlbe)) {
-			if (tlbe_is_writable(ref->gtlbe))
-				kvm_release_page_dirty(page);
-			else
-				kvm_release_page_clean(page);
-		}
+	if (pg) {
+		if (get_tlb_v(ref->gtlbe) && tlbe_is_writable(ref->gtlbe))
+			kvm_release_page_dirty(pg);
+		else
+			kvm_release_page_clean(pg);
 	}
-}
 
-static void kvmppc_e500_tlb1_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500,
-		int esel)
-{
-	struct tlbe stlbe;
-	unsigned int i;
-
-	stlbe.mas1 = 0;
-	for (i = 0; i < KVM_E500_TLB1_SIZE; i++) {
-		struct tlbe_ref *ref -			&vcpu_e500->shadow_refs[1][i];
-
-		if (ref->gtlbe = &vcpu_e500->guest_tlb[1][esel])
-			write_host_tlbe(vcpu_e500, 1, i, &stlbe);
-	}
 }
 
 static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu,
@@ -236,77 +210,6 @@ static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu,
 	vcpu_e500->mas7 = 0;
 }
 
-static inline void kvmppc_e500_setup_stlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
-		struct tlbe *gtlbe, struct tlbe_ref *ref,
-		u64 gvaddr, struct tlbe *stlbe)
-{
-	hpa_t hpaddr = page_to_phys(ref->page);
-
-	/* Force TS=1 IPROT=0 TSIZE=4KB for all guest mappings. */
-	stlbe->mas1 = MAS1_TSIZE(BOOK3E_PAGESZ_4K)
-		| MAS1_TID(get_tlb_tid(gtlbe)) | MAS1_TS | MAS1_VALID;
-	stlbe->mas2 = (gvaddr & MAS2_EPN)
-		| e500_shadow_mas2_attrib(gtlbe->mas2,
-				vcpu_e500->vcpu.arch.msr & MSR_PR);
-	stlbe->mas3 = (hpaddr & MAS3_RPN)
-		| e500_shadow_mas3_attrib(gtlbe->mas3,
-				vcpu_e500->vcpu.arch.msr & MSR_PR);
-	stlbe->mas7 = (hpaddr >> 32) & MAS7_RPN;
-}
-
-static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
-	gfn_t gfn, struct tlbe *gtlbe, int stlbsel, int sesel)
-{
-	struct page *new_page;
-	struct tlbe_ref *ref;
-
-	/* Get reference to new page. */
-	new_page = gfn_to_page(vcpu_e500->vcpu.kvm, gfn);
-	if (is_error_page(new_page)) {
-		printk(KERN_ERR "Couldn't get guest page for gfn %lx!\n", gfn);
-		kvm_release_page_clean(new_page);
-		return;
-	}
-
-	/* Drop reference to old page. */
-	kvmppc_e500_shadow_release(vcpu_e500, stlbsel, sesel);
-
-	ref = &vcpu_e500->shadow_refs[stlbsel][sesel];
-	ref->page = new_page;
-	ref->gtlbe = gtlbe;
-}
-
-/* XXX only map the one-one case, for now use TLB0 */
-static int kvmppc_e500_tlb0_map(struct kvmppc_vcpu_e500 *vcpu_e500, int esel)
-{
-	struct tlbe *gtlbe;
-
-	gtlbe = &vcpu_e500->guest_tlb[0][esel];
-
-	kvmppc_e500_shadow_map(vcpu_e500, get_tlb_raddr(gtlbe) >> PAGE_SHIFT,
-			gtlbe, 0, esel);
-
-	return esel;
-}
-
-/* Caller must ensure that the specified guest TLB entry is safe to insert into
- * the shadow TLB. */
-/* XXX for both one-one and one-to-many , for now use TLB1 */
-static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500,
-		u64 gvaddr, gfn_t gfn, struct tlbe *gtlbe)
-{
-	unsigned int victim;
-
-	victim = vcpu_e500->guest_tlb_nv[1]++;
-
-	if (unlikely(vcpu_e500->guest_tlb_nv[1] >= tlb1_max_shadow_size()))
-		vcpu_e500->guest_tlb_nv[1] = 0;
-
-	kvmppc_e500_shadow_map(vcpu_e500, gfn, gtlbe, 1, victim);
-
-	return victim;
-}
-
 /* Invalidate all guest kernel mappings when enter usermode,
  * so that when they fault back in they will get the
  * proper permission bits. */
@@ -445,6 +348,52 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb)
 	return EMULATE_DONE;
 }
 
+static int kvmppc_e500_mmu_map_tlb0(struct kvmppc_vcpu_e500 *vcpu_e500,
+                                    struct tlbe *gtlbe,
+				    gfn_t gfn,
+				    u64 gvaddr)
+{
+	struct page *new_page;
+	struct tlbe_ref *ref;
+	u32 esel;
+	hpa_t hpaddr;
+	struct tlbe stlbe;
+
+	/* Find related page. */
+	new_page = gfn_to_page(vcpu_e500->vcpu.kvm, gfn);
+	if (is_error_page(new_page)) {
+		printk(KERN_ERR "Couldn't get guest page for gfn %llx!\n", gfn);
+		kvm_release_page_clean(new_page);
+		return -1;
+	}
+
+	hpaddr = page_to_phys(new_page);
+
+	/* Setup the shadow tlb entry */
+	/* Force TS=1 IPROT=0 TSIZE=4KB for all mappings go to TLB0. */
+	stlbe.mas1 = MAS1_TSIZE(BOOK3E_PAGESZ_4K)
+		| MAS1_TID(get_tlb_tid(gtlbe)) | MAS1_TS | MAS1_VALID;
+	stlbe.mas2 = (gvaddr & MAS2_EPN)
+		| e500_shadow_mas2_attrib(gtlbe->mas2,
+				vcpu_e500->vcpu.arch.msr & MSR_PR);
+	stlbe.mas3 = (hpaddr & MAS3_RPN)
+		| e500_shadow_mas3_attrib(gtlbe->mas3,
+				vcpu_e500->vcpu.arch.msr & MSR_PR);
+	stlbe.mas7 = (hpaddr >> 32) & MAS7_RPN;
+
+	esel = host_tlb0_write(vcpu_e500, gvaddr, &stlbe);
+
+	ref = &vcpu_e500->host_tlb0_ref[get_tlb0_entry_offset(gvaddr, esel)];
+
+	/* Release old page and setup new page */
+	kvmppc_e500_ref_release(ref);
+
+	ref->page = new_page;
+	ref->gtlbe = gtlbe;
+
+	return 0;
+}
+
 int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
 {
 	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
@@ -457,7 +406,7 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
 	gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel];
 
 	if (get_tlb_v(gtlbe) && tlbsel = 1)
-		kvmppc_e500_tlb1_invalidate(vcpu_e500, esel);
+		_tlbil_all();
 
 	gtlbe->mas1 = vcpu_e500->mas1;
 	gtlbe->mas2 = vcpu_e500->mas2;
@@ -469,27 +418,15 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
 
 	/* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */
 	if (tlbe_is_host_safe(vcpu, gtlbe)) {
-		struct tlbe stlbe;
-		int stlbsel, sesel;
-		struct tlbe_ref *ref;
-		u64 eaddr;
-
 		switch (tlbsel) {
 		case 0:
 			/* TLB0 */
 			gtlbe->mas1 &= ~MAS1_TSIZE(~0);
 			gtlbe->mas1 |= MAS1_TSIZE(BOOK3E_PAGESZ_4K);
 
-			eaddr =  get_tlb_eaddr(gtlbe);
-			stlbsel = 0;
-			sesel = kvmppc_e500_tlb0_map(vcpu_e500, esel);
-
-			ref = &vcpu_e500->shadow_refs[stlbsel][sesel];
-
-			kvmppc_e500_setup_stlbe(vcpu_e500, gtlbe,
-					        ref, eaddr, &stlbe);
-			write_host_tlbe(vcpu_e500, stlbsel, sesel, &stlbe);
-
+			kvmppc_e500_mmu_map_tlb0(vcpu_e500, gtlbe,
+                                    get_tlb_raddr(gtlbe) >> PAGE_SHIFT,
+                                    get_tlb_eaddr(gtlbe));
 			break;
 
 		case 1:
@@ -552,37 +489,15 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr,
 			unsigned int index)
 {
 	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
-	struct tlbe_ref *ref;
-	struct tlbe *gtlbe, stlbe;
+	struct tlbe *gtlbe;
 	int tlbsel = tlbsel_of(index);
 	int esel = esel_of(index);
-	int stlbsel, sesel;
+	gfn_t gfn;
 
 	gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel];
+	gfn = gpaddr >> PAGE_SHIFT;
 
-	switch (tlbsel) {
-	case 0:
-		stlbsel = 0;
-		sesel = esel;
-		break;
-
-	case 1: {
-		gfn_t gfn = gpaddr >> PAGE_SHIFT;
-
-		stlbsel = 1;
-		sesel = kvmppc_e500_tlb1_map(vcpu_e500, eaddr, gfn, gtlbe);
-		break;
-	}
-
-	default:
-		BUG();
-		break;
-	}
-
-	ref = &vcpu_e500->shadow_refs[stlbsel][sesel];
-
-	kvmppc_e500_setup_stlbe(vcpu_e500, gtlbe, ref, eaddr, &stlbe);
-	write_host_tlbe(vcpu_e500, stlbsel, sesel, &stlbe);
+	kvmppc_e500_mmu_map_tlb0(vcpu_e500, gtlbe, gfn, eaddr);
 }
 
 int kvmppc_e500_tlb_search(struct kvm_vcpu *vcpu,
@@ -621,8 +536,6 @@ void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500)
 
 int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
 {
-	tlb1_entry_num = mfspr(SPRN_TLB1CFG) & 0xFFF;
-
 	vcpu_e500->guest_tlb_size[0] = KVM_E500_TLB0_SIZE;
 	vcpu_e500->guest_tlb[0]  		kzalloc(sizeof(struct tlbe) * KVM_E500_TLB0_SIZE, GFP_KERNEL);
@@ -635,16 +548,10 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
 	if (vcpu_e500->guest_tlb[1] = NULL)
 		goto err_out_guest0;
 
-	vcpu_e500->shadow_refs[0] = (struct tlbe_ref *)
-		kzalloc(sizeof(struct tlbe_ref) * KVM_E500_TLB0_SIZE, GFP_KERNEL);
-	if (vcpu_e500->shadow_refs[0] = NULL)
+	vcpu_e500->host_tlb0_ref = kzalloc(sizeof(struct tlbe_ref) * host_tlb0_entry_num, GFP_KERNEL);
+	if (vcpu_e500->host_tlb0_ref = NULL)
 		goto err_out_guest1;
 
-	vcpu_e500->shadow_refs[1] = (struct tlbe_ref *)
-		kzalloc(sizeof(struct tlbe_ref) * KVM_E500_TLB1_SIZE, GFP_KERNEL);
-	if (vcpu_e500->shadow_refs[1] = NULL)
-		goto err_out_ref0;
-
 	/* Init TLB configuration register */
 	vcpu_e500->tlb0cfg = mfspr(SPRN_TLB0CFG) & ~0xfffUL;
 	vcpu_e500->tlb0cfg |= vcpu_e500->guest_tlb_size[0];
@@ -653,8 +560,6 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
 
 	return 0;
 
-err_out_ref0:
-	kfree(vcpu_e500->shadow_refs[0]);
 err_out_guest1:
 	kfree(vcpu_e500->guest_tlb[1]);
 err_out_guest0:
@@ -665,18 +570,27 @@ err_out:
 
 void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500)
 {
-	int stlbsel, i;
+	int i;
 
 	/* release all pages */
-	for (stlbsel = 0; stlbsel < 2; stlbsel++)
-		for (i = 0; i < vcpu_e500->guest_tlb_size[stlbsel]; i++)
-			kvmppc_e500_shadow_release(vcpu_e500, stlbsel, i);
+	for (i = 0; i < host_tlb0_entry_num; i++) {
+		struct tlbe_ref *ref = &vcpu_e500->host_tlb0_ref[i];
+
+		kvmppc_e500_ref_release(ref);
+	}
 
 	/* discard all guest mapping */
 	_tlbil_all();
 
-	kfree(vcpu_e500->shadow_refs[1]);
-	kfree(vcpu_e500->shadow_refs[0]);
 	kfree(vcpu_e500->guest_tlb[1]);
 	kfree(vcpu_e500->guest_tlb[0]);
 }
+
+int kvmppc_e500_mmu_init(void)
+{
+	host_tlb0_entry_num = mfspr(SPRN_TLB0CFG) & 0xFFF;
+	host_tlb0_assoc = (mfspr(SPRN_TLB0CFG) >> 24) & 0xFF;
+	for (; host_tlb0_assoc >> (host_tlb0_assoc_bit + 1); host_tlb0_assoc_bit++);
+
+	return 0;
+}
diff --git a/arch/powerpc/kvm/e500_tlb.h b/arch/powerpc/kvm/e500_tlb.h
index d28e301..851e523 100644
--- a/arch/powerpc/kvm/e500_tlb.h
+++ b/arch/powerpc/kvm/e500_tlb.h
@@ -55,6 +55,7 @@ extern void kvmppc_e500_tlb_load(struct kvm_vcpu *, int);
 extern int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *);
 extern void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *);
 extern void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *);
+extern int kvmppc_e500_mmu_init(void);
 
 /* TLB helper functions */
 static inline unsigned int get_tlb_size(const struct tlbe *tlbe)
-- 
1.6.4



^ permalink raw reply related	[flat|nested] 46+ messages in thread

* Re: [PATCH 1/2] kvm/e500v2: Remove shadow tlb
       [not found]     ` <1283938806-2981-2-git-send-email-yu.liu-KZfg59tc24xl57MIdRCFDg@public.gmane.org>
@ 2010-09-08 16:06         ` Hollis Blanchard
  0 siblings, 0 replies; 46+ messages in thread
From: Hollis Blanchard @ 2010-09-08 16:06 UTC (permalink / raw)
  To: Liu Yu
  Cc: kvm-u79uwXL29TY76Z2rM5mHXA, kvm-ppc-u79uwXL29TY76Z2rM5mHXA,
	agraf-l3A5Bk7waGM

On 09/08/2010 02:40 AM, Liu Yu wrote:
> It is unnecessary to keep shadow tlb.
> first, shadow tlb keep fixed value in shadow, which make things unflexible.
> second, remove shadow tlb can save a lot memory.
>
> This patch remove shadow tlb and caculate the shadow tlb entry value
> before we write it to hardware.
>
> Also we use new struct tlbe_ref to trace the relation
> between guest tlb entry and page.

Did you look at the performance impact?

Back in the day, we did essentially the same thing on 440. However, 
rather than discard the whole TLB when context switching away from the 
host (to be demand-faulted when the guest is resumed), we found a 
noticeable performance improvement by preserving a shadow TLB across 
context switches. We only use it in the vcpu_put/vcpu_load path.

Of course, our TLB was much smaller (64 entries), so the use model may 
not be the same at all (e.g. it takes longer to restore a full guest TLB 
working set, but maybe it's not really possible to use all 1024 TLB0 
entries in one timeslice anyways).

--
Hollis Blanchard
Mentor Graphics, Embedded Systems Division

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 1/2] kvm/e500v2: Remove shadow tlb
@ 2010-09-08 16:06         ` Hollis Blanchard
  0 siblings, 0 replies; 46+ messages in thread
From: Hollis Blanchard @ 2010-09-08 16:06 UTC (permalink / raw)
  To: Liu Yu
  Cc: kvm-u79uwXL29TY76Z2rM5mHXA, kvm-ppc-u79uwXL29TY76Z2rM5mHXA,
	agraf-l3A5Bk7waGM

On 09/08/2010 02:40 AM, Liu Yu wrote:
> It is unnecessary to keep shadow tlb.
> first, shadow tlb keep fixed value in shadow, which make things unflexible.
> second, remove shadow tlb can save a lot memory.
>
> This patch remove shadow tlb and caculate the shadow tlb entry value
> before we write it to hardware.
>
> Also we use new struct tlbe_ref to trace the relation
> between guest tlb entry and page.

Did you look at the performance impact?

Back in the day, we did essentially the same thing on 440. However, 
rather than discard the whole TLB when context switching away from the 
host (to be demand-faulted when the guest is resumed), we found a 
noticeable performance improvement by preserving a shadow TLB across 
context switches. We only use it in the vcpu_put/vcpu_load path.

Of course, our TLB was much smaller (64 entries), so the use model may 
not be the same at all (e.g. it takes longer to restore a full guest TLB 
working set, but maybe it's not really possible to use all 1024 TLB0 
entries in one timeslice anyways).

--
Hollis Blanchard
Mentor Graphics, Embedded Systems Division

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 0/2] kvm/e500v2: MMU optimization
       [not found] ` <1283938806-2981-1-git-send-email-yu.liu-KZfg59tc24xl57MIdRCFDg@public.gmane.org>
@ 2010-09-08 16:07     ` Hollis Blanchard
  2010-09-08 16:07     ` Hollis Blanchard
  1 sibling, 0 replies; 46+ messages in thread
From: Hollis Blanchard @ 2010-09-08 16:07 UTC (permalink / raw)
  To: Liu Yu
  Cc: kvm-u79uwXL29TY76Z2rM5mHXA, kvm-ppc-u79uwXL29TY76Z2rM5mHXA,
	agraf-l3A5Bk7waGM

On 09/08/2010 02:40 AM, Liu Yu wrote:
> The patchset aims at mapping guest TLB1 to host TLB0.
> And it includes:
> [PATCH 1/2] kvm/e500v2: Remove shadow tlb
> [PATCH 2/2] kvm/e500v2: mapping guest TLB1 to host TLB0
>
> The reason we need patch 1 is because patch 1 make things simple and flexible.
> Only applying patch 1 aslo make kvm work.

I've always thought the best long-term "optimization" on these cores is 
to share in the host PID allocation (i.e. __init_new_context()). This 
way, the TID in guest mappings would not overlap the TID in host 
mappings, and guest mappings could be demand-faulted rather than swapped 
wholesale. To do that, you would need to track the host PID in KVM data 
structures, I guess in the tlbe_ref structure.

--
Hollis Blanchard
Mentor Graphics, Embedded Systems Division

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 0/2] kvm/e500v2: MMU optimization
@ 2010-09-08 16:07     ` Hollis Blanchard
  0 siblings, 0 replies; 46+ messages in thread
From: Hollis Blanchard @ 2010-09-08 16:07 UTC (permalink / raw)
  To: Liu Yu
  Cc: kvm-u79uwXL29TY76Z2rM5mHXA, kvm-ppc-u79uwXL29TY76Z2rM5mHXA,
	agraf-l3A5Bk7waGM

On 09/08/2010 02:40 AM, Liu Yu wrote:
> The patchset aims at mapping guest TLB1 to host TLB0.
> And it includes:
> [PATCH 1/2] kvm/e500v2: Remove shadow tlb
> [PATCH 2/2] kvm/e500v2: mapping guest TLB1 to host TLB0
>
> The reason we need patch 1 is because patch 1 make things simple and flexible.
> Only applying patch 1 aslo make kvm work.

I've always thought the best long-term "optimization" on these cores is 
to share in the host PID allocation (i.e. __init_new_context()). This 
way, the TID in guest mappings would not overlap the TID in host 
mappings, and guest mappings could be demand-faulted rather than swapped 
wholesale. To do that, you would need to track the host PID in KVM data 
structures, I guess in the tlbe_ref structure.

--
Hollis Blanchard
Mentor Graphics, Embedded Systems Division

^ permalink raw reply	[flat|nested] 46+ messages in thread

* RE: [PATCH 0/2] kvm/e500v2: MMU optimization
       [not found]     ` <4C87B4AB.7010009-nmGgyN9QBj3QT0dZR+AlfA@public.gmane.org>
@ 2010-09-09 11:03         ` Liu Yu-B13201
  0 siblings, 0 replies; 46+ messages in thread
From: Liu Yu-B13201 @ 2010-09-09 11:03 UTC (permalink / raw)
  To: Hollis Blanchard
  Cc: kvm-u79uwXL29TY76Z2rM5mHXA, kvm-ppc-u79uwXL29TY76Z2rM5mHXA,
	agraf-l3A5Bk7waGM

 

> -----Original Message-----
> From: kvm-ppc-owner-u79uwXL29TY76Z2rM5mHXA@public.gmane.org 
> [mailto:kvm-ppc-owner-u79uwXL29TY76Z2rM5mHXA@public.gmane.org] On Behalf Of Hollis Blanchard
> Sent: Thursday, September 09, 2010 12:07 AM
> To: Liu Yu-B13201
> Cc: kvm-u79uwXL29TY76Z2rM5mHXA@public.gmane.org; kvm-ppc-u79uwXL29TY76Z2rM5mHXA@public.gmane.org; agraf-l3A5Bk7waGM@public.gmane.org
> Subject: Re: [PATCH 0/2] kvm/e500v2: MMU optimization
> 
> On 09/08/2010 02:40 AM, Liu Yu wrote:
> > The patchset aims at mapping guest TLB1 to host TLB0.
> > And it includes:
> > [PATCH 1/2] kvm/e500v2: Remove shadow tlb
> > [PATCH 2/2] kvm/e500v2: mapping guest TLB1 to host TLB0
> >
> > The reason we need patch 1 is because patch 1 make things 
> simple and flexible.
> > Only applying patch 1 aslo make kvm work.
> 
> I've always thought the best long-term "optimization" on 
> these cores is 
> to share in the host PID allocation (i.e. __init_new_context()). This 
> way, the TID in guest mappings would not overlap the TID in host 
> mappings, and guest mappings could be demand-faulted rather 
> than swapped 
> wholesale. To do that, you would need to track the host PID 
> in KVM data 
> structures, I guess in the tlbe_ref structure.
> 

Hi Hollis,

Guest uses AS=1 and host uses AS=0,
so even guest uses the same TID with host, they're in different space.

Then why guest needs to care about host TID?


Thanks,
Yu

^ permalink raw reply	[flat|nested] 46+ messages in thread

* RE: [PATCH 0/2] kvm/e500v2: MMU optimization
@ 2010-09-09 11:03         ` Liu Yu-B13201
  0 siblings, 0 replies; 46+ messages in thread
From: Liu Yu-B13201 @ 2010-09-09 11:03 UTC (permalink / raw)
  To: Hollis Blanchard
  Cc: kvm-u79uwXL29TY76Z2rM5mHXA, kvm-ppc-u79uwXL29TY76Z2rM5mHXA,
	agraf-l3A5Bk7waGM

 

> -----Original Message-----
> From: kvm-ppc-owner@vger.kernel.org 
> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of Hollis Blanchard
> Sent: Thursday, September 09, 2010 12:07 AM
> To: Liu Yu-B13201
> Cc: kvm@vger.kernel.org; kvm-ppc@vger.kernel.org; agraf@suse.de
> Subject: Re: [PATCH 0/2] kvm/e500v2: MMU optimization
> 
> On 09/08/2010 02:40 AM, Liu Yu wrote:
> > The patchset aims at mapping guest TLB1 to host TLB0.
> > And it includes:
> > [PATCH 1/2] kvm/e500v2: Remove shadow tlb
> > [PATCH 2/2] kvm/e500v2: mapping guest TLB1 to host TLB0
> >
> > The reason we need patch 1 is because patch 1 make things 
> simple and flexible.
> > Only applying patch 1 aslo make kvm work.
> 
> I've always thought the best long-term "optimization" on 
> these cores is 
> to share in the host PID allocation (i.e. __init_new_context()). This 
> way, the TID in guest mappings would not overlap the TID in host 
> mappings, and guest mappings could be demand-faulted rather 
> than swapped 
> wholesale. To do that, you would need to track the host PID 
> in KVM data 
> structures, I guess in the tlbe_ref structure.
> 

Hi Hollis,

Guest uses AS=1 and host uses AS=0,
so even guest uses the same TID with host, they're in different space.

Then why guest needs to care about host TID?


Thanks,
Yu


^ permalink raw reply	[flat|nested] 46+ messages in thread

* RE: [PATCH 1/2] kvm/e500v2: Remove shadow tlb
  2010-09-08 16:06         ` Hollis Blanchard
@ 2010-09-09 11:16           ` Liu Yu-B13201
  -1 siblings, 0 replies; 46+ messages in thread
From: Liu Yu-B13201 @ 2010-09-09 11:16 UTC (permalink / raw)
  To: Hollis Blanchard; +Cc: kvm, kvm-ppc, agraf

 

> -----Original Message-----
> From: Hollis Blanchard [mailto:hollis_blanchard@mentor.com] 
> Sent: Thursday, September 09, 2010 12:07 AM
> To: Liu Yu-B13201
> Cc: kvm@vger.kernel.org; kvm-ppc@vger.kernel.org; agraf@suse.de
> Subject: Re: [PATCH 1/2] kvm/e500v2: Remove shadow tlb
> 
> On 09/08/2010 02:40 AM, Liu Yu wrote:
> > It is unnecessary to keep shadow tlb.
> > first, shadow tlb keep fixed value in shadow, which make 
> things unflexible.
> > second, remove shadow tlb can save a lot memory.
> >
> > This patch remove shadow tlb and caculate the shadow tlb entry value
> > before we write it to hardware.
> >
> > Also we use new struct tlbe_ref to trace the relation
> > between guest tlb entry and page.
> 
> Did you look at the performance impact?
> 
> Back in the day, we did essentially the same thing on 440. However, 
> rather than discard the whole TLB when context switching away 
> from the 
> host (to be demand-faulted when the guest is resumed), we found a 
> noticeable performance improvement by preserving a shadow TLB across 
> context switches. We only use it in the vcpu_put/vcpu_load path.
> 
> Of course, our TLB was much smaller (64 entries), so the use 
> model may 
> not be the same at all (e.g. it takes longer to restore a 
> full guest TLB 
> working set, but maybe it's not really possible to use all 1024 TLB0 
> entries in one timeslice anyways).
> 

Yes, it's hard to resume TLB0. We only resume TLB1 in previous code.
But TLB1 is even more smaller (13 free entries) than 440,
So that it still has little possibility to get hit.
thus the resumption is useless.

And we plan to use shadow PID to minimize the TLB invalidation.
Then we don't need to invalidate TLB when guest schedule out.
So that we don't need to resume TLB entries.

But thit method require dynamic TID in shadow TLB,
So we wouldn't like to keep shadow TLB anyway.

Thanks,
Yu


^ permalink raw reply	[flat|nested] 46+ messages in thread

* RE: [PATCH 1/2] kvm/e500v2: Remove shadow tlb
@ 2010-09-09 11:16           ` Liu Yu-B13201
  0 siblings, 0 replies; 46+ messages in thread
From: Liu Yu-B13201 @ 2010-09-09 11:16 UTC (permalink / raw)
  To: Hollis Blanchard; +Cc: kvm, kvm-ppc, agraf

 

> -----Original Message-----
> From: Hollis Blanchard [mailto:hollis_blanchard@mentor.com] 
> Sent: Thursday, September 09, 2010 12:07 AM
> To: Liu Yu-B13201
> Cc: kvm@vger.kernel.org; kvm-ppc@vger.kernel.org; agraf@suse.de
> Subject: Re: [PATCH 1/2] kvm/e500v2: Remove shadow tlb
> 
> On 09/08/2010 02:40 AM, Liu Yu wrote:
> > It is unnecessary to keep shadow tlb.
> > first, shadow tlb keep fixed value in shadow, which make 
> things unflexible.
> > second, remove shadow tlb can save a lot memory.
> >
> > This patch remove shadow tlb and caculate the shadow tlb entry value
> > before we write it to hardware.
> >
> > Also we use new struct tlbe_ref to trace the relation
> > between guest tlb entry and page.
> 
> Did you look at the performance impact?
> 
> Back in the day, we did essentially the same thing on 440. However, 
> rather than discard the whole TLB when context switching away 
> from the 
> host (to be demand-faulted when the guest is resumed), we found a 
> noticeable performance improvement by preserving a shadow TLB across 
> context switches. We only use it in the vcpu_put/vcpu_load path.
> 
> Of course, our TLB was much smaller (64 entries), so the use 
> model may 
> not be the same at all (e.g. it takes longer to restore a 
> full guest TLB 
> working set, but maybe it's not really possible to use all 1024 TLB0 
> entries in one timeslice anyways).
> 

Yes, it's hard to resume TLB0. We only resume TLB1 in previous code.
But TLB1 is even more smaller (13 free entries) than 440,
So that it still has little possibility to get hit.
thus the resumption is useless.

And we plan to use shadow PID to minimize the TLB invalidation.
Then we don't need to invalidate TLB when guest schedule out.
So that we don't need to resume TLB entries.

But thit method require dynamic TID in shadow TLB,
So we wouldn't like to keep shadow TLB anyway.

Thanks,
Yu


^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 0/2] kvm/e500v2: MMU optimization
       [not found]         ` <A9833F0F7901024DA08417AA5A9887D91BF248-bKEhWGtIRUJ4Lp7cDGe+DVjVikpgYyvb5NbjCUgZEJk@public.gmane.org>
@ 2010-09-09 16:23             ` Hollis Blanchard
  0 siblings, 0 replies; 46+ messages in thread
From: Hollis Blanchard @ 2010-09-09 16:23 UTC (permalink / raw)
  To: Liu Yu-B13201
  Cc: kvm-u79uwXL29TY76Z2rM5mHXA, kvm-ppc-u79uwXL29TY76Z2rM5mHXA,
	agraf-l3A5Bk7waGM

On 09/09/2010 04:03 AM, Liu Yu-B13201 wrote:
>> -----Original Message-----
>> From: kvm-ppc-owner-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
>> [mailto:kvm-ppc-owner-u79uwXL29TY76Z2rM5mHXA@public.gmane.org] On Behalf Of Hollis Blanchard
>> Sent: Thursday, September 09, 2010 12:07 AM
>> To: Liu Yu-B13201
>> Cc: kvm-u79uwXL29TY76Z2rM5mHXA@public.gmane.org; kvm-ppc-u79uwXL29TY76Z2rM5mHXA@public.gmane.org; agraf-l3A5Bk7waGM@public.gmane.org
>> Subject: Re: [PATCH 0/2] kvm/e500v2: MMU optimization
>>
>> On 09/08/2010 02:40 AM, Liu Yu wrote:
>>      
>>> The patchset aims at mapping guest TLB1 to host TLB0.
>>> And it includes:
>>> [PATCH 1/2] kvm/e500v2: Remove shadow tlb
>>> [PATCH 2/2] kvm/e500v2: mapping guest TLB1 to host TLB0
>>>
>>> The reason we need patch 1 is because patch 1 make things
>>>        
>> simple and flexible.
>>      
>>> Only applying patch 1 aslo make kvm work.
>>>        
>> I've always thought the best long-term "optimization" on
>> these cores is
>> to share in the host PID allocation (i.e. __init_new_context()). This
>> way, the TID in guest mappings would not overlap the TID in host
>> mappings, and guest mappings could be demand-faulted rather
>> than swapped
>> wholesale. To do that, you would need to track the host PID
>> in KVM data
>> structures, I guess in the tlbe_ref structure.
>>
>>      
> Hi Hollis,
>
> Guest uses AS=1 and host uses AS=0,
> so even guest uses the same TID with host, they're in different space.
>
> Then why guest needs to care about host TID?
>
>    
You're absolutely right, but this makes a couple key assumptions:
1. The guest doesn't try to use AS=1. This is already false in Linux, 
because the udbg code uses an AS=1 mapping for the UART, but this can be 
configured out (with a small loss in functionality). In non-Linux guests 
the AS=0 restriction could be onerous.
2. A Book E MMU. If we participate in the host "MMU context" allocation, 
the guest -> host address space code could be generalized to include 
e.g. an e600 guest on an e500 host, or vice versa.

So you're right that "optimization" is not the right word; this would be 
more of a functionality and design improvement. (In fact, I suppose it 
could reduce performance on Book E, since AS=1 space actually *is* 
unused by the host. I think it would be worth finding out.)

Hollis Blanchard
Mentor Graphics, Embedded Systems Division

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 0/2] kvm/e500v2: MMU optimization
@ 2010-09-09 16:23             ` Hollis Blanchard
  0 siblings, 0 replies; 46+ messages in thread
From: Hollis Blanchard @ 2010-09-09 16:23 UTC (permalink / raw)
  To: Liu Yu-B13201
  Cc: kvm-u79uwXL29TY76Z2rM5mHXA, kvm-ppc-u79uwXL29TY76Z2rM5mHXA,
	agraf-l3A5Bk7waGM

On 09/09/2010 04:03 AM, Liu Yu-B13201 wrote:
>> -----Original Message-----
>> From: kvm-ppc-owner@vger.kernel.org
>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of Hollis Blanchard
>> Sent: Thursday, September 09, 2010 12:07 AM
>> To: Liu Yu-B13201
>> Cc: kvm@vger.kernel.org; kvm-ppc@vger.kernel.org; agraf@suse.de
>> Subject: Re: [PATCH 0/2] kvm/e500v2: MMU optimization
>>
>> On 09/08/2010 02:40 AM, Liu Yu wrote:
>>      
>>> The patchset aims at mapping guest TLB1 to host TLB0.
>>> And it includes:
>>> [PATCH 1/2] kvm/e500v2: Remove shadow tlb
>>> [PATCH 2/2] kvm/e500v2: mapping guest TLB1 to host TLB0
>>>
>>> The reason we need patch 1 is because patch 1 make things
>>>        
>> simple and flexible.
>>      
>>> Only applying patch 1 aslo make kvm work.
>>>        
>> I've always thought the best long-term "optimization" on
>> these cores is
>> to share in the host PID allocation (i.e. __init_new_context()). This
>> way, the TID in guest mappings would not overlap the TID in host
>> mappings, and guest mappings could be demand-faulted rather
>> than swapped
>> wholesale. To do that, you would need to track the host PID
>> in KVM data
>> structures, I guess in the tlbe_ref structure.
>>
>>      
> Hi Hollis,
>
> Guest uses AS=1 and host uses AS=0,
> so even guest uses the same TID with host, they're in different space.
>
> Then why guest needs to care about host TID?
>
>    
You're absolutely right, but this makes a couple key assumptions:
1. The guest doesn't try to use AS=1. This is already false in Linux, 
because the udbg code uses an AS=1 mapping for the UART, but this can be 
configured out (with a small loss in functionality). In non-Linux guests 
the AS=0 restriction could be onerous.
2. A Book E MMU. If we participate in the host "MMU context" allocation, 
the guest -> host address space code could be generalized to include 
e.g. an e600 guest on an e500 host, or vice versa.

So you're right that "optimization" is not the right word; this would be 
more of a functionality and design improvement. (In fact, I suppose it 
could reduce performance on Book E, since AS=1 space actually *is* 
unused by the host. I think it would be worth finding out.)

Hollis Blanchard
Mentor Graphics, Embedded Systems Division




^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 1/2] kvm/e500v2: Remove shadow tlb
       [not found]           ` <A9833F0F7901024DA08417AA5A9887D91BF24A-bKEhWGtIRUJ4Lp7cDGe+DVjVikpgYyvb5NbjCUgZEJk@public.gmane.org>
@ 2010-09-09 18:13               ` Hollis Blanchard
  0 siblings, 0 replies; 46+ messages in thread
From: Hollis Blanchard @ 2010-09-09 18:13 UTC (permalink / raw)
  To: Liu Yu-B13201
  Cc: kvm-u79uwXL29TY76Z2rM5mHXA, kvm-ppc-u79uwXL29TY76Z2rM5mHXA,
	agraf-l3A5Bk7waGM

On 09/09/2010 04:16 AM, Liu Yu-B13201 wrote:
> Yes, it's hard to resume TLB0. We only resume TLB1 in previous code.
> But TLB1 is even more smaller (13 free entries) than 440,
> So that it still has little possibility to get hit.
> thus the resumption is useless.
>    
The only reason hits are unlikely in TLB1 is because you still don't 
have large page support in the host. Once you have that, you can use 
TLB1 for large guest mappings, and it will become extremely likely that 
you get hits in TLB1. This is true even if the guest wants 256MB but the 
host supports only e.g. 16MB large pages, and must split the guest 
mapping into multiple large host pages.

When will you have hugetlbfs for e500? That's going to make such a 
dramatic difference, I'm not sure it's worth investing time in 
optimizing the MMU code until then.

Hollis Blanchard
Mentor Graphics, Embedded Systems Division

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 1/2] kvm/e500v2: Remove shadow tlb
@ 2010-09-09 18:13               ` Hollis Blanchard
  0 siblings, 0 replies; 46+ messages in thread
From: Hollis Blanchard @ 2010-09-09 18:13 UTC (permalink / raw)
  To: Liu Yu-B13201
  Cc: kvm-u79uwXL29TY76Z2rM5mHXA, kvm-ppc-u79uwXL29TY76Z2rM5mHXA,
	agraf-l3A5Bk7waGM

On 09/09/2010 04:16 AM, Liu Yu-B13201 wrote:
> Yes, it's hard to resume TLB0. We only resume TLB1 in previous code.
> But TLB1 is even more smaller (13 free entries) than 440,
> So that it still has little possibility to get hit.
> thus the resumption is useless.
>    
The only reason hits are unlikely in TLB1 is because you still don't 
have large page support in the host. Once you have that, you can use 
TLB1 for large guest mappings, and it will become extremely likely that 
you get hits in TLB1. This is true even if the guest wants 256MB but the 
host supports only e.g. 16MB large pages, and must split the guest 
mapping into multiple large host pages.

When will you have hugetlbfs for e500? That's going to make such a 
dramatic difference, I'm not sure it's worth investing time in 
optimizing the MMU code until then.

Hollis Blanchard
Mentor Graphics, Embedded Systems Division



^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 2/2] kvm/e500v2: mapping guest TLB1 to host TLB0
       [not found]       ` <1283938806-2981-3-git-send-email-yu.liu-KZfg59tc24xl57MIdRCFDg@public.gmane.org>
@ 2010-09-09 23:24           ` Alexander Graf
  0 siblings, 0 replies; 46+ messages in thread
From: Alexander Graf @ 2010-09-09 23:24 UTC (permalink / raw)
  To: Liu Yu; +Cc: kvm-u79uwXL29TY76Z2rM5mHXA, kvm-ppc-u79uwXL29TY76Z2rM5mHXA


On 08.09.2010, at 11:40, Liu Yu wrote:

> Current guest TLB1 is mapped to host TLB1.
> As host kernel only provides 4K uncontinuous pages,
> we have to break guest large mapping into 4K shadow mappings.
> These 4K shadow mappings are then mapped into host TLB1 on fly.
> As host TLB1 only has 13 free entries, there's serious tlb miss.
> 
> Since e500v2 has a big number of TLB0 entries,
> it should be help to map those 4K shadow mappings to host TLB0.
> To achieve this, we need to unlink guest tlb and host tlb,
> So that guest TLB1 mappings can route to any host TLB0 entries freely.
> 
> Pages/mappings are considerred in the same kind as host tlb entry.
> This patch remove the link between pages and guest tlb entry to do the unlink.
> And keep host_tlb0_ref in each vcpu to trace pages.
> Then it's easy to map guest TLB1 to host TLB0.
> 
> In guest ramdisk boot test(guest mainly uses TLB1),
> with this patch, the tlb miss number get down 90%.
> 
> Signed-off-by: Liu Yu <yu.liu-KZfg59tc24xl57MIdRCFDg@public.gmane.org>
> ---
> arch/powerpc/include/asm/kvm_e500.h |    7 +-
> arch/powerpc/kvm/e500.c             |    4 +
> arch/powerpc/kvm/e500_tlb.c         |  280 ++++++++++++-----------------------
> arch/powerpc/kvm/e500_tlb.h         |    1 +
> 4 files changed, 104 insertions(+), 188 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/kvm_e500.h b/arch/powerpc/include/asm/kvm_e500.h
> index cb785f9..16c0ed0 100644
> --- a/arch/powerpc/include/asm/kvm_e500.h
> +++ b/arch/powerpc/include/asm/kvm_e500.h
> @@ -37,13 +37,10 @@ struct tlbe_ref {
> struct kvmppc_vcpu_e500 {
> 	/* Unmodified copy of the guest's TLB. */
> 	struct tlbe *guest_tlb[E500_TLB_NUM];
> -	/* TLB that's actually used when the guest is running. */
> -	struct tlbe *shadow_tlb[E500_TLB_NUM];
> -	/* Pages which are referenced in the shadow TLB. */
> -	struct tlbe_ref *shadow_refs[E500_TLB_NUM];
> +	/* Pages which are referenced in host TLB. */
> +	struct tlbe_ref *host_tlb0_ref;
> 
> 	unsigned int guest_tlb_size[E500_TLB_NUM];
> -	unsigned int shadow_tlb_size[E500_TLB_NUM];
> 	unsigned int guest_tlb_nv[E500_TLB_NUM];
> 
> 	u32 host_pid[E500_PID_NUM];
> diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
> index e8a00b0..14af6d7 100644
> --- a/arch/powerpc/kvm/e500.c
> +++ b/arch/powerpc/kvm/e500.c
> @@ -146,6 +146,10 @@ static int __init kvmppc_e500_init(void)
> 	if (r)
> 		return r;
> 
> +	r = kvmppc_e500_mmu_init();
> +	if (r)
> +		return r;
> +
> 	/* copy extra E500 exception handlers */
> 	ivor[0] = mfspr(SPRN_IVOR32);
> 	ivor[1] = mfspr(SPRN_IVOR33);
> diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
> index 0b657af..a6c2320 100644
> --- a/arch/powerpc/kvm/e500_tlb.c
> +++ b/arch/powerpc/kvm/e500_tlb.c
> @@ -25,9 +25,15 @@
> #include "e500_tlb.h"
> #include "trace.h"
> 
> -#define to_htlb1_esel(esel) (tlb1_entry_num - (esel) - 1)
> +static unsigned int host_tlb0_entry_num;
> +static unsigned int host_tlb0_assoc;
> +static unsigned int host_tlb0_assoc_bit;

bits.

> 
> -static unsigned int tlb1_entry_num;
> +static inline unsigned int get_tlb0_entry_offset(u32 eaddr, u32 esel)
> +{
> +	return ((eaddr & 0x7F000) >> (12 - host_tlb0_assoc_bit) |
> +			(esel & (host_tlb0_assoc - 1)));
> +}
> 
> void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu)
> {
> @@ -62,11 +68,6 @@ static inline unsigned int tlb0_get_next_victim(
> 	return victim;
> }
> 
> -static inline unsigned int tlb1_max_shadow_size(void)
> -{
> -	return tlb1_entry_num - tlbcam_index;
> -}
> -
> static inline int tlbe_is_writable(struct tlbe *tlbe)
> {
> 	return tlbe->mas3 & (MAS3_SW|MAS3_UW);
> @@ -100,7 +101,7 @@ static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode)
> /*
>  * writing shadow tlb entry to host TLB
>  */
> -static inline void __write_host_tlbe(struct tlbe *stlbe)
> +static inline void __host_tlbe_write(struct tlbe *stlbe)
> {
> 	mtspr(SPRN_MAS1, stlbe->mas1);
> 	mtspr(SPRN_MAS2, stlbe->mas2);
> @@ -109,25 +110,22 @@ static inline void __write_host_tlbe(struct tlbe *stlbe)
> 	__asm__ __volatile__ ("tlbwe\n" : : );
> }
> 
> -static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
> -		int tlbsel, int esel, struct tlbe *stlbe)
> +static inline u32 host_tlb0_write(struct kvmppc_vcpu_e500 *vcpu_e500,
> +                                   u32 gvaddr, struct tlbe *stlbe)
> {
> -	local_irq_disable();
> -	if (tlbsel == 0) {
> -		__write_host_tlbe(stlbe);
> -	} else {
> -		unsigned register mas0;
> +	unsigned register mas0;
> 
> -		mas0 = mfspr(SPRN_MAS0);
> +	local_irq_disable();

Do you have to disable interrupts for a tlb write? If you get preempted before the write, the entry you overwrite could be different. But you don't protect against that either way. And if you get preempted afterwards, you could lose the entry. But since you enable interrupts beyond that, that could happen either way too.

So what's the reason for the disable here?

> 
> -		mtspr(SPRN_MAS0, MAS0_TLBSEL(1) | MAS0_ESEL(to_htlb1_esel(esel)));
> -		__write_host_tlbe(stlbe);
> +	mas0 = mfspr(SPRN_MAS0);
> +	__host_tlbe_write(stlbe);
> 
> -		mtspr(SPRN_MAS0, mas0);
> -	}
> 	local_irq_enable();
> -	trace_kvm_stlb_write(index_of(tlbsel, esel), stlbe->mas1, stlbe->mas2,
> +
> +	trace_kvm_stlb_write(mas0, stlbe->mas1, stlbe->mas2,
> 			stlbe->mas3, stlbe->mas7);
> +
> +	return (mas0 & 0x0FFF0000) >> 16;
> }
> 
> void kvmppc_e500_tlb_load(struct kvm_vcpu *vcpu, int cpu)
> @@ -172,41 +170,17 @@ static int kvmppc_e500_tlb_index(struct kvmppc_vcpu_e500 *vcpu_e500,
> 	return -1;
> }
> 
> -static void kvmppc_e500_shadow_release(struct kvmppc_vcpu_e500 *vcpu_e500,
> -		int stlbsel, int sesel)
> +static inline void kvmppc_e500_ref_release(struct tlbe_ref *ref)
> {
> -	struct tlbe_ref *ref;
> -	struct page *page;
> -
> -	ref = &vcpu_e500->shadow_refs[stlbsel][sesel];
> -	page = ref->page;
> -
> -	if (page) {
> -		ref->page = NULL;
> +	struct page *pg = ref->page;
> 
> -		if (get_tlb_v(ref->gtlbe)) {
> -			if (tlbe_is_writable(ref->gtlbe))
> -				kvm_release_page_dirty(page);
> -			else
> -				kvm_release_page_clean(page);
> -		}
> +	if (pg) {
> +		if (get_tlb_v(ref->gtlbe) && tlbe_is_writable(ref->gtlbe))
> +			kvm_release_page_dirty(pg);
> +		else
> +			kvm_release_page_clean(pg);
> 	}
> -}
> 
> -static void kvmppc_e500_tlb1_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500,
> -		int esel)
> -{
> -	struct tlbe stlbe;
> -	unsigned int i;
> -
> -	stlbe.mas1 = 0;
> -	for (i = 0; i < KVM_E500_TLB1_SIZE; i++) {
> -		struct tlbe_ref *ref =
> -			&vcpu_e500->shadow_refs[1][i];
> -
> -		if (ref->gtlbe == &vcpu_e500->guest_tlb[1][esel])
> -			write_host_tlbe(vcpu_e500, 1, i, &stlbe);
> -	}
> }
> 
> static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu,
> @@ -236,77 +210,6 @@ static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu,
> 	vcpu_e500->mas7 = 0;
> }
> 
> -static inline void kvmppc_e500_setup_stlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
> -		struct tlbe *gtlbe, struct tlbe_ref *ref,
> -		u64 gvaddr, struct tlbe *stlbe)
> -{
> -	hpa_t hpaddr = page_to_phys(ref->page);
> -
> -	/* Force TS=1 IPROT=0 TSIZE=4KB for all guest mappings. */
> -	stlbe->mas1 = MAS1_TSIZE(BOOK3E_PAGESZ_4K)
> -		| MAS1_TID(get_tlb_tid(gtlbe)) | MAS1_TS | MAS1_VALID;
> -	stlbe->mas2 = (gvaddr & MAS2_EPN)
> -		| e500_shadow_mas2_attrib(gtlbe->mas2,
> -				vcpu_e500->vcpu.arch.msr & MSR_PR);
> -	stlbe->mas3 = (hpaddr & MAS3_RPN)
> -		| e500_shadow_mas3_attrib(gtlbe->mas3,
> -				vcpu_e500->vcpu.arch.msr & MSR_PR);
> -	stlbe->mas7 = (hpaddr >> 32) & MAS7_RPN;
> -}
> -
> -static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
> -	gfn_t gfn, struct tlbe *gtlbe, int stlbsel, int sesel)
> -{
> -	struct page *new_page;
> -	struct tlbe_ref *ref;
> -
> -	/* Get reference to new page. */
> -	new_page = gfn_to_page(vcpu_e500->vcpu.kvm, gfn);
> -	if (is_error_page(new_page)) {
> -		printk(KERN_ERR "Couldn't get guest page for gfn %lx!\n", gfn);
> -		kvm_release_page_clean(new_page);
> -		return;
> -	}
> -
> -	/* Drop reference to old page. */
> -	kvmppc_e500_shadow_release(vcpu_e500, stlbsel, sesel);
> -
> -	ref = &vcpu_e500->shadow_refs[stlbsel][sesel];
> -	ref->page = new_page;
> -	ref->gtlbe = gtlbe;
> -}
> -
> -/* XXX only map the one-one case, for now use TLB0 */
> -static int kvmppc_e500_tlb0_map(struct kvmppc_vcpu_e500 *vcpu_e500, int esel)
> -{
> -	struct tlbe *gtlbe;
> -
> -	gtlbe = &vcpu_e500->guest_tlb[0][esel];
> -
> -	kvmppc_e500_shadow_map(vcpu_e500, get_tlb_raddr(gtlbe) >> PAGE_SHIFT,
> -			gtlbe, 0, esel);
> -
> -	return esel;
> -}
> -
> -/* Caller must ensure that the specified guest TLB entry is safe to insert into
> - * the shadow TLB. */
> -/* XXX for both one-one and one-to-many , for now use TLB1 */
> -static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500,
> -		u64 gvaddr, gfn_t gfn, struct tlbe *gtlbe)
> -{
> -	unsigned int victim;
> -
> -	victim = vcpu_e500->guest_tlb_nv[1]++;
> -
> -	if (unlikely(vcpu_e500->guest_tlb_nv[1] >= tlb1_max_shadow_size()))
> -		vcpu_e500->guest_tlb_nv[1] = 0;
> -
> -	kvmppc_e500_shadow_map(vcpu_e500, gfn, gtlbe, 1, victim);
> -
> -	return victim;
> -}
> -
> /* Invalidate all guest kernel mappings when enter usermode,
>  * so that when they fault back in they will get the
>  * proper permission bits. */
> @@ -445,6 +348,52 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb)
> 	return EMULATE_DONE;
> }
> 
> +static int kvmppc_e500_mmu_map_tlb0(struct kvmppc_vcpu_e500 *vcpu_e500,
> +                                    struct tlbe *gtlbe,
> +				    gfn_t gfn,
> +				    u64 gvaddr)
> +{
> +	struct page *new_page;
> +	struct tlbe_ref *ref;
> +	u32 esel;
> +	hpa_t hpaddr;
> +	struct tlbe stlbe;
> +
> +	/* Find related page. */
> +	new_page = gfn_to_page(vcpu_e500->vcpu.kvm, gfn);
> +	if (is_error_page(new_page)) {
> +		printk(KERN_ERR "Couldn't get guest page for gfn %llx!\n", gfn);
> +		kvm_release_page_clean(new_page);
> +		return -1;
> +	}
> +
> +	hpaddr = page_to_phys(new_page);
> +
> +	/* Setup the shadow tlb entry */
> +	/* Force TS=1 IPROT=0 TSIZE=4KB for all mappings go to TLB0. */
> +	stlbe.mas1 = MAS1_TSIZE(BOOK3E_PAGESZ_4K)
> +		| MAS1_TID(get_tlb_tid(gtlbe)) | MAS1_TS | MAS1_VALID;
> +	stlbe.mas2 = (gvaddr & MAS2_EPN)
> +		| e500_shadow_mas2_attrib(gtlbe->mas2,
> +				vcpu_e500->vcpu.arch.msr & MSR_PR);
> +	stlbe.mas3 = (hpaddr & MAS3_RPN)
> +		| e500_shadow_mas3_attrib(gtlbe->mas3,
> +				vcpu_e500->vcpu.arch.msr & MSR_PR);
> +	stlbe.mas7 = (hpaddr >> 32) & MAS7_RPN;
> +
> +	esel = host_tlb0_write(vcpu_e500, gvaddr, &stlbe);
> +
> +	ref = &vcpu_e500->host_tlb0_ref[get_tlb0_entry_offset(gvaddr, esel)];
> +
> +	/* Release old page and setup new page */
> +	kvmppc_e500_ref_release(ref);
> +
> +	ref->page = new_page;
> +	ref->gtlbe = gtlbe;
> +
> +	return 0;
> +}
> +
> int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
> {
> 	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
> @@ -457,7 +406,7 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
> 	gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel];
> 
> 	if (get_tlb_v(gtlbe) && tlbsel == 1)
> -		kvmppc_e500_tlb1_invalidate(vcpu_e500, esel);
> +		_tlbil_all();
> 
> 	gtlbe->mas1 = vcpu_e500->mas1;
> 	gtlbe->mas2 = vcpu_e500->mas2;
> @@ -469,27 +418,15 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
> 
> 	/* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */
> 	if (tlbe_is_host_safe(vcpu, gtlbe)) {
> -		struct tlbe stlbe;
> -		int stlbsel, sesel;
> -		struct tlbe_ref *ref;
> -		u64 eaddr;
> -
> 		switch (tlbsel) {
> 		case 0:
> 			/* TLB0 */
> 			gtlbe->mas1 &= ~MAS1_TSIZE(~0);
> 			gtlbe->mas1 |= MAS1_TSIZE(BOOK3E_PAGESZ_4K);
> 
> -			eaddr =  get_tlb_eaddr(gtlbe);
> -			stlbsel = 0;
> -			sesel = kvmppc_e500_tlb0_map(vcpu_e500, esel);
> -
> -			ref = &vcpu_e500->shadow_refs[stlbsel][sesel];
> -
> -			kvmppc_e500_setup_stlbe(vcpu_e500, gtlbe,
> -					        ref, eaddr, &stlbe);
> -			write_host_tlbe(vcpu_e500, stlbsel, sesel, &stlbe);
> -
> +			kvmppc_e500_mmu_map_tlb0(vcpu_e500, gtlbe,
> +                                    get_tlb_raddr(gtlbe) >> PAGE_SHIFT,
> +                                    get_tlb_eaddr(gtlbe));
> 			break;
> 
> 		case 1:
> @@ -552,37 +489,15 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr,
> 			unsigned int index)
> {
> 	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
> -	struct tlbe_ref *ref;
> -	struct tlbe *gtlbe, stlbe;
> +	struct tlbe *gtlbe;
> 	int tlbsel = tlbsel_of(index);
> 	int esel = esel_of(index);
> -	int stlbsel, sesel;
> +	gfn_t gfn;
> 
> 	gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel];
> +	gfn = gpaddr >> PAGE_SHIFT;
> 
> -	switch (tlbsel) {
> -	case 0:
> -		stlbsel = 0;
> -		sesel = esel;
> -		break;
> -
> -	case 1: {
> -		gfn_t gfn = gpaddr >> PAGE_SHIFT;
> -
> -		stlbsel = 1;
> -		sesel = kvmppc_e500_tlb1_map(vcpu_e500, eaddr, gfn, gtlbe);
> -		break;
> -	}
> -
> -	default:
> -		BUG();
> -		break;
> -	}
> -
> -	ref = &vcpu_e500->shadow_refs[stlbsel][sesel];
> -
> -	kvmppc_e500_setup_stlbe(vcpu_e500, gtlbe, ref, eaddr, &stlbe);
> -	write_host_tlbe(vcpu_e500, stlbsel, sesel, &stlbe);
> +	kvmppc_e500_mmu_map_tlb0(vcpu_e500, gtlbe, gfn, eaddr);
> }
> 
> int kvmppc_e500_tlb_search(struct kvm_vcpu *vcpu,
> @@ -621,8 +536,6 @@ void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500)
> 
> int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
> {
> -	tlb1_entry_num = mfspr(SPRN_TLB1CFG) & 0xFFF;
> -
> 	vcpu_e500->guest_tlb_size[0] = KVM_E500_TLB0_SIZE;
> 	vcpu_e500->guest_tlb[0] =
> 		kzalloc(sizeof(struct tlbe) * KVM_E500_TLB0_SIZE, GFP_KERNEL);
> @@ -635,16 +548,10 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
> 	if (vcpu_e500->guest_tlb[1] == NULL)
> 		goto err_out_guest0;
> 
> -	vcpu_e500->shadow_refs[0] = (struct tlbe_ref *)
> -		kzalloc(sizeof(struct tlbe_ref) * KVM_E500_TLB0_SIZE, GFP_KERNEL);
> -	if (vcpu_e500->shadow_refs[0] == NULL)
> +	vcpu_e500->host_tlb0_ref = kzalloc(sizeof(struct tlbe_ref) * host_tlb0_entry_num, GFP_KERNEL);
> +	if (vcpu_e500->host_tlb0_ref == NULL)
> 		goto err_out_guest1;
> 
> -	vcpu_e500->shadow_refs[1] = (struct tlbe_ref *)
> -		kzalloc(sizeof(struct tlbe_ref) * KVM_E500_TLB1_SIZE, GFP_KERNEL);
> -	if (vcpu_e500->shadow_refs[1] == NULL)
> -		goto err_out_ref0;
> -
> 	/* Init TLB configuration register */
> 	vcpu_e500->tlb0cfg = mfspr(SPRN_TLB0CFG) & ~0xfffUL;
> 	vcpu_e500->tlb0cfg |= vcpu_e500->guest_tlb_size[0];
> @@ -653,8 +560,6 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
> 
> 	return 0;
> 
> -err_out_ref0:
> -	kfree(vcpu_e500->shadow_refs[0]);
> err_out_guest1:
> 	kfree(vcpu_e500->guest_tlb[1]);
> err_out_guest0:
> @@ -665,18 +570,27 @@ err_out:
> 
> void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500)
> {
> -	int stlbsel, i;
> +	int i;
> 
> 	/* release all pages */
> -	for (stlbsel = 0; stlbsel < 2; stlbsel++)
> -		for (i = 0; i < vcpu_e500->guest_tlb_size[stlbsel]; i++)
> -			kvmppc_e500_shadow_release(vcpu_e500, stlbsel, i);
> +	for (i = 0; i < host_tlb0_entry_num; i++) {
> +		struct tlbe_ref *ref = &vcpu_e500->host_tlb0_ref[i];
> +
> +		kvmppc_e500_ref_release(ref);
> +	}
> 
> 	/* discard all guest mapping */
> 	_tlbil_all();
> 
> -	kfree(vcpu_e500->shadow_refs[1]);
> -	kfree(vcpu_e500->shadow_refs[0]);
> 	kfree(vcpu_e500->guest_tlb[1]);
> 	kfree(vcpu_e500->guest_tlb[0]);
> }
> +
> +int kvmppc_e500_mmu_init(void)
> +{
> +	host_tlb0_entry_num = mfspr(SPRN_TLB0CFG) & 0xFFF;
> +	host_tlb0_assoc = (mfspr(SPRN_TLB0CFG) >> 24) & 0xFF;
> +	for (; host_tlb0_assoc >> (host_tlb0_assoc_bit + 1); host_tlb0_assoc_bit++);

Ugh. While loop please.

The rest looks good from what I can tell - not being a BookE MMU expert that is.
I'm still worried about the userspace switch which results in a complete MMU flush. Couldn't we just allocate a new PID for that?


Alex

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 2/2] kvm/e500v2: mapping guest TLB1 to host TLB0
@ 2010-09-09 23:24           ` Alexander Graf
  0 siblings, 0 replies; 46+ messages in thread
From: Alexander Graf @ 2010-09-09 23:24 UTC (permalink / raw)
  To: Liu Yu; +Cc: kvm-u79uwXL29TY76Z2rM5mHXA, kvm-ppc-u79uwXL29TY76Z2rM5mHXA


On 08.09.2010, at 11:40, Liu Yu wrote:

> Current guest TLB1 is mapped to host TLB1.
> As host kernel only provides 4K uncontinuous pages,
> we have to break guest large mapping into 4K shadow mappings.
> These 4K shadow mappings are then mapped into host TLB1 on fly.
> As host TLB1 only has 13 free entries, there's serious tlb miss.
> 
> Since e500v2 has a big number of TLB0 entries,
> it should be help to map those 4K shadow mappings to host TLB0.
> To achieve this, we need to unlink guest tlb and host tlb,
> So that guest TLB1 mappings can route to any host TLB0 entries freely.
> 
> Pages/mappings are considerred in the same kind as host tlb entry.
> This patch remove the link between pages and guest tlb entry to do the unlink.
> And keep host_tlb0_ref in each vcpu to trace pages.
> Then it's easy to map guest TLB1 to host TLB0.
> 
> In guest ramdisk boot test(guest mainly uses TLB1),
> with this patch, the tlb miss number get down 90%.
> 
> Signed-off-by: Liu Yu <yu.liu@freescale.com>
> ---
> arch/powerpc/include/asm/kvm_e500.h |    7 +-
> arch/powerpc/kvm/e500.c             |    4 +
> arch/powerpc/kvm/e500_tlb.c         |  280 ++++++++++++-----------------------
> arch/powerpc/kvm/e500_tlb.h         |    1 +
> 4 files changed, 104 insertions(+), 188 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/kvm_e500.h b/arch/powerpc/include/asm/kvm_e500.h
> index cb785f9..16c0ed0 100644
> --- a/arch/powerpc/include/asm/kvm_e500.h
> +++ b/arch/powerpc/include/asm/kvm_e500.h
> @@ -37,13 +37,10 @@ struct tlbe_ref {
> struct kvmppc_vcpu_e500 {
> 	/* Unmodified copy of the guest's TLB. */
> 	struct tlbe *guest_tlb[E500_TLB_NUM];
> -	/* TLB that's actually used when the guest is running. */
> -	struct tlbe *shadow_tlb[E500_TLB_NUM];
> -	/* Pages which are referenced in the shadow TLB. */
> -	struct tlbe_ref *shadow_refs[E500_TLB_NUM];
> +	/* Pages which are referenced in host TLB. */
> +	struct tlbe_ref *host_tlb0_ref;
> 
> 	unsigned int guest_tlb_size[E500_TLB_NUM];
> -	unsigned int shadow_tlb_size[E500_TLB_NUM];
> 	unsigned int guest_tlb_nv[E500_TLB_NUM];
> 
> 	u32 host_pid[E500_PID_NUM];
> diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
> index e8a00b0..14af6d7 100644
> --- a/arch/powerpc/kvm/e500.c
> +++ b/arch/powerpc/kvm/e500.c
> @@ -146,6 +146,10 @@ static int __init kvmppc_e500_init(void)
> 	if (r)
> 		return r;
> 
> +	r = kvmppc_e500_mmu_init();
> +	if (r)
> +		return r;
> +
> 	/* copy extra E500 exception handlers */
> 	ivor[0] = mfspr(SPRN_IVOR32);
> 	ivor[1] = mfspr(SPRN_IVOR33);
> diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
> index 0b657af..a6c2320 100644
> --- a/arch/powerpc/kvm/e500_tlb.c
> +++ b/arch/powerpc/kvm/e500_tlb.c
> @@ -25,9 +25,15 @@
> #include "e500_tlb.h"
> #include "trace.h"
> 
> -#define to_htlb1_esel(esel) (tlb1_entry_num - (esel) - 1)
> +static unsigned int host_tlb0_entry_num;
> +static unsigned int host_tlb0_assoc;
> +static unsigned int host_tlb0_assoc_bit;

bits.

> 
> -static unsigned int tlb1_entry_num;
> +static inline unsigned int get_tlb0_entry_offset(u32 eaddr, u32 esel)
> +{
> +	return ((eaddr & 0x7F000) >> (12 - host_tlb0_assoc_bit) |
> +			(esel & (host_tlb0_assoc - 1)));
> +}
> 
> void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu)
> {
> @@ -62,11 +68,6 @@ static inline unsigned int tlb0_get_next_victim(
> 	return victim;
> }
> 
> -static inline unsigned int tlb1_max_shadow_size(void)
> -{
> -	return tlb1_entry_num - tlbcam_index;
> -}
> -
> static inline int tlbe_is_writable(struct tlbe *tlbe)
> {
> 	return tlbe->mas3 & (MAS3_SW|MAS3_UW);
> @@ -100,7 +101,7 @@ static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode)
> /*
>  * writing shadow tlb entry to host TLB
>  */
> -static inline void __write_host_tlbe(struct tlbe *stlbe)
> +static inline void __host_tlbe_write(struct tlbe *stlbe)
> {
> 	mtspr(SPRN_MAS1, stlbe->mas1);
> 	mtspr(SPRN_MAS2, stlbe->mas2);
> @@ -109,25 +110,22 @@ static inline void __write_host_tlbe(struct tlbe *stlbe)
> 	__asm__ __volatile__ ("tlbwe\n" : : );
> }
> 
> -static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
> -		int tlbsel, int esel, struct tlbe *stlbe)
> +static inline u32 host_tlb0_write(struct kvmppc_vcpu_e500 *vcpu_e500,
> +                                   u32 gvaddr, struct tlbe *stlbe)
> {
> -	local_irq_disable();
> -	if (tlbsel = 0) {
> -		__write_host_tlbe(stlbe);
> -	} else {
> -		unsigned register mas0;
> +	unsigned register mas0;
> 
> -		mas0 = mfspr(SPRN_MAS0);
> +	local_irq_disable();

Do you have to disable interrupts for a tlb write? If you get preempted before the write, the entry you overwrite could be different. But you don't protect against that either way. And if you get preempted afterwards, you could lose the entry. But since you enable interrupts beyond that, that could happen either way too.

So what's the reason for the disable here?

> 
> -		mtspr(SPRN_MAS0, MAS0_TLBSEL(1) | MAS0_ESEL(to_htlb1_esel(esel)));
> -		__write_host_tlbe(stlbe);
> +	mas0 = mfspr(SPRN_MAS0);
> +	__host_tlbe_write(stlbe);
> 
> -		mtspr(SPRN_MAS0, mas0);
> -	}
> 	local_irq_enable();
> -	trace_kvm_stlb_write(index_of(tlbsel, esel), stlbe->mas1, stlbe->mas2,
> +
> +	trace_kvm_stlb_write(mas0, stlbe->mas1, stlbe->mas2,
> 			stlbe->mas3, stlbe->mas7);
> +
> +	return (mas0 & 0x0FFF0000) >> 16;
> }
> 
> void kvmppc_e500_tlb_load(struct kvm_vcpu *vcpu, int cpu)
> @@ -172,41 +170,17 @@ static int kvmppc_e500_tlb_index(struct kvmppc_vcpu_e500 *vcpu_e500,
> 	return -1;
> }
> 
> -static void kvmppc_e500_shadow_release(struct kvmppc_vcpu_e500 *vcpu_e500,
> -		int stlbsel, int sesel)
> +static inline void kvmppc_e500_ref_release(struct tlbe_ref *ref)
> {
> -	struct tlbe_ref *ref;
> -	struct page *page;
> -
> -	ref = &vcpu_e500->shadow_refs[stlbsel][sesel];
> -	page = ref->page;
> -
> -	if (page) {
> -		ref->page = NULL;
> +	struct page *pg = ref->page;
> 
> -		if (get_tlb_v(ref->gtlbe)) {
> -			if (tlbe_is_writable(ref->gtlbe))
> -				kvm_release_page_dirty(page);
> -			else
> -				kvm_release_page_clean(page);
> -		}
> +	if (pg) {
> +		if (get_tlb_v(ref->gtlbe) && tlbe_is_writable(ref->gtlbe))
> +			kvm_release_page_dirty(pg);
> +		else
> +			kvm_release_page_clean(pg);
> 	}
> -}
> 
> -static void kvmppc_e500_tlb1_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500,
> -		int esel)
> -{
> -	struct tlbe stlbe;
> -	unsigned int i;
> -
> -	stlbe.mas1 = 0;
> -	for (i = 0; i < KVM_E500_TLB1_SIZE; i++) {
> -		struct tlbe_ref *ref > -			&vcpu_e500->shadow_refs[1][i];
> -
> -		if (ref->gtlbe = &vcpu_e500->guest_tlb[1][esel])
> -			write_host_tlbe(vcpu_e500, 1, i, &stlbe);
> -	}
> }
> 
> static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu,
> @@ -236,77 +210,6 @@ static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu,
> 	vcpu_e500->mas7 = 0;
> }
> 
> -static inline void kvmppc_e500_setup_stlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
> -		struct tlbe *gtlbe, struct tlbe_ref *ref,
> -		u64 gvaddr, struct tlbe *stlbe)
> -{
> -	hpa_t hpaddr = page_to_phys(ref->page);
> -
> -	/* Force TS=1 IPROT=0 TSIZE=4KB for all guest mappings. */
> -	stlbe->mas1 = MAS1_TSIZE(BOOK3E_PAGESZ_4K)
> -		| MAS1_TID(get_tlb_tid(gtlbe)) | MAS1_TS | MAS1_VALID;
> -	stlbe->mas2 = (gvaddr & MAS2_EPN)
> -		| e500_shadow_mas2_attrib(gtlbe->mas2,
> -				vcpu_e500->vcpu.arch.msr & MSR_PR);
> -	stlbe->mas3 = (hpaddr & MAS3_RPN)
> -		| e500_shadow_mas3_attrib(gtlbe->mas3,
> -				vcpu_e500->vcpu.arch.msr & MSR_PR);
> -	stlbe->mas7 = (hpaddr >> 32) & MAS7_RPN;
> -}
> -
> -static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
> -	gfn_t gfn, struct tlbe *gtlbe, int stlbsel, int sesel)
> -{
> -	struct page *new_page;
> -	struct tlbe_ref *ref;
> -
> -	/* Get reference to new page. */
> -	new_page = gfn_to_page(vcpu_e500->vcpu.kvm, gfn);
> -	if (is_error_page(new_page)) {
> -		printk(KERN_ERR "Couldn't get guest page for gfn %lx!\n", gfn);
> -		kvm_release_page_clean(new_page);
> -		return;
> -	}
> -
> -	/* Drop reference to old page. */
> -	kvmppc_e500_shadow_release(vcpu_e500, stlbsel, sesel);
> -
> -	ref = &vcpu_e500->shadow_refs[stlbsel][sesel];
> -	ref->page = new_page;
> -	ref->gtlbe = gtlbe;
> -}
> -
> -/* XXX only map the one-one case, for now use TLB0 */
> -static int kvmppc_e500_tlb0_map(struct kvmppc_vcpu_e500 *vcpu_e500, int esel)
> -{
> -	struct tlbe *gtlbe;
> -
> -	gtlbe = &vcpu_e500->guest_tlb[0][esel];
> -
> -	kvmppc_e500_shadow_map(vcpu_e500, get_tlb_raddr(gtlbe) >> PAGE_SHIFT,
> -			gtlbe, 0, esel);
> -
> -	return esel;
> -}
> -
> -/* Caller must ensure that the specified guest TLB entry is safe to insert into
> - * the shadow TLB. */
> -/* XXX for both one-one and one-to-many , for now use TLB1 */
> -static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500,
> -		u64 gvaddr, gfn_t gfn, struct tlbe *gtlbe)
> -{
> -	unsigned int victim;
> -
> -	victim = vcpu_e500->guest_tlb_nv[1]++;
> -
> -	if (unlikely(vcpu_e500->guest_tlb_nv[1] >= tlb1_max_shadow_size()))
> -		vcpu_e500->guest_tlb_nv[1] = 0;
> -
> -	kvmppc_e500_shadow_map(vcpu_e500, gfn, gtlbe, 1, victim);
> -
> -	return victim;
> -}
> -
> /* Invalidate all guest kernel mappings when enter usermode,
>  * so that when they fault back in they will get the
>  * proper permission bits. */
> @@ -445,6 +348,52 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb)
> 	return EMULATE_DONE;
> }
> 
> +static int kvmppc_e500_mmu_map_tlb0(struct kvmppc_vcpu_e500 *vcpu_e500,
> +                                    struct tlbe *gtlbe,
> +				    gfn_t gfn,
> +				    u64 gvaddr)
> +{
> +	struct page *new_page;
> +	struct tlbe_ref *ref;
> +	u32 esel;
> +	hpa_t hpaddr;
> +	struct tlbe stlbe;
> +
> +	/* Find related page. */
> +	new_page = gfn_to_page(vcpu_e500->vcpu.kvm, gfn);
> +	if (is_error_page(new_page)) {
> +		printk(KERN_ERR "Couldn't get guest page for gfn %llx!\n", gfn);
> +		kvm_release_page_clean(new_page);
> +		return -1;
> +	}
> +
> +	hpaddr = page_to_phys(new_page);
> +
> +	/* Setup the shadow tlb entry */
> +	/* Force TS=1 IPROT=0 TSIZE=4KB for all mappings go to TLB0. */
> +	stlbe.mas1 = MAS1_TSIZE(BOOK3E_PAGESZ_4K)
> +		| MAS1_TID(get_tlb_tid(gtlbe)) | MAS1_TS | MAS1_VALID;
> +	stlbe.mas2 = (gvaddr & MAS2_EPN)
> +		| e500_shadow_mas2_attrib(gtlbe->mas2,
> +				vcpu_e500->vcpu.arch.msr & MSR_PR);
> +	stlbe.mas3 = (hpaddr & MAS3_RPN)
> +		| e500_shadow_mas3_attrib(gtlbe->mas3,
> +				vcpu_e500->vcpu.arch.msr & MSR_PR);
> +	stlbe.mas7 = (hpaddr >> 32) & MAS7_RPN;
> +
> +	esel = host_tlb0_write(vcpu_e500, gvaddr, &stlbe);
> +
> +	ref = &vcpu_e500->host_tlb0_ref[get_tlb0_entry_offset(gvaddr, esel)];
> +
> +	/* Release old page and setup new page */
> +	kvmppc_e500_ref_release(ref);
> +
> +	ref->page = new_page;
> +	ref->gtlbe = gtlbe;
> +
> +	return 0;
> +}
> +
> int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
> {
> 	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
> @@ -457,7 +406,7 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
> 	gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel];
> 
> 	if (get_tlb_v(gtlbe) && tlbsel = 1)
> -		kvmppc_e500_tlb1_invalidate(vcpu_e500, esel);
> +		_tlbil_all();
> 
> 	gtlbe->mas1 = vcpu_e500->mas1;
> 	gtlbe->mas2 = vcpu_e500->mas2;
> @@ -469,27 +418,15 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
> 
> 	/* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */
> 	if (tlbe_is_host_safe(vcpu, gtlbe)) {
> -		struct tlbe stlbe;
> -		int stlbsel, sesel;
> -		struct tlbe_ref *ref;
> -		u64 eaddr;
> -
> 		switch (tlbsel) {
> 		case 0:
> 			/* TLB0 */
> 			gtlbe->mas1 &= ~MAS1_TSIZE(~0);
> 			gtlbe->mas1 |= MAS1_TSIZE(BOOK3E_PAGESZ_4K);
> 
> -			eaddr =  get_tlb_eaddr(gtlbe);
> -			stlbsel = 0;
> -			sesel = kvmppc_e500_tlb0_map(vcpu_e500, esel);
> -
> -			ref = &vcpu_e500->shadow_refs[stlbsel][sesel];
> -
> -			kvmppc_e500_setup_stlbe(vcpu_e500, gtlbe,
> -					        ref, eaddr, &stlbe);
> -			write_host_tlbe(vcpu_e500, stlbsel, sesel, &stlbe);
> -
> +			kvmppc_e500_mmu_map_tlb0(vcpu_e500, gtlbe,
> +                                    get_tlb_raddr(gtlbe) >> PAGE_SHIFT,
> +                                    get_tlb_eaddr(gtlbe));
> 			break;
> 
> 		case 1:
> @@ -552,37 +489,15 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr,
> 			unsigned int index)
> {
> 	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
> -	struct tlbe_ref *ref;
> -	struct tlbe *gtlbe, stlbe;
> +	struct tlbe *gtlbe;
> 	int tlbsel = tlbsel_of(index);
> 	int esel = esel_of(index);
> -	int stlbsel, sesel;
> +	gfn_t gfn;
> 
> 	gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel];
> +	gfn = gpaddr >> PAGE_SHIFT;
> 
> -	switch (tlbsel) {
> -	case 0:
> -		stlbsel = 0;
> -		sesel = esel;
> -		break;
> -
> -	case 1: {
> -		gfn_t gfn = gpaddr >> PAGE_SHIFT;
> -
> -		stlbsel = 1;
> -		sesel = kvmppc_e500_tlb1_map(vcpu_e500, eaddr, gfn, gtlbe);
> -		break;
> -	}
> -
> -	default:
> -		BUG();
> -		break;
> -	}
> -
> -	ref = &vcpu_e500->shadow_refs[stlbsel][sesel];
> -
> -	kvmppc_e500_setup_stlbe(vcpu_e500, gtlbe, ref, eaddr, &stlbe);
> -	write_host_tlbe(vcpu_e500, stlbsel, sesel, &stlbe);
> +	kvmppc_e500_mmu_map_tlb0(vcpu_e500, gtlbe, gfn, eaddr);
> }
> 
> int kvmppc_e500_tlb_search(struct kvm_vcpu *vcpu,
> @@ -621,8 +536,6 @@ void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500)
> 
> int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
> {
> -	tlb1_entry_num = mfspr(SPRN_TLB1CFG) & 0xFFF;
> -
> 	vcpu_e500->guest_tlb_size[0] = KVM_E500_TLB0_SIZE;
> 	vcpu_e500->guest_tlb[0] > 		kzalloc(sizeof(struct tlbe) * KVM_E500_TLB0_SIZE, GFP_KERNEL);
> @@ -635,16 +548,10 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
> 	if (vcpu_e500->guest_tlb[1] = NULL)
> 		goto err_out_guest0;
> 
> -	vcpu_e500->shadow_refs[0] = (struct tlbe_ref *)
> -		kzalloc(sizeof(struct tlbe_ref) * KVM_E500_TLB0_SIZE, GFP_KERNEL);
> -	if (vcpu_e500->shadow_refs[0] = NULL)
> +	vcpu_e500->host_tlb0_ref = kzalloc(sizeof(struct tlbe_ref) * host_tlb0_entry_num, GFP_KERNEL);
> +	if (vcpu_e500->host_tlb0_ref = NULL)
> 		goto err_out_guest1;
> 
> -	vcpu_e500->shadow_refs[1] = (struct tlbe_ref *)
> -		kzalloc(sizeof(struct tlbe_ref) * KVM_E500_TLB1_SIZE, GFP_KERNEL);
> -	if (vcpu_e500->shadow_refs[1] = NULL)
> -		goto err_out_ref0;
> -
> 	/* Init TLB configuration register */
> 	vcpu_e500->tlb0cfg = mfspr(SPRN_TLB0CFG) & ~0xfffUL;
> 	vcpu_e500->tlb0cfg |= vcpu_e500->guest_tlb_size[0];
> @@ -653,8 +560,6 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
> 
> 	return 0;
> 
> -err_out_ref0:
> -	kfree(vcpu_e500->shadow_refs[0]);
> err_out_guest1:
> 	kfree(vcpu_e500->guest_tlb[1]);
> err_out_guest0:
> @@ -665,18 +570,27 @@ err_out:
> 
> void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500)
> {
> -	int stlbsel, i;
> +	int i;
> 
> 	/* release all pages */
> -	for (stlbsel = 0; stlbsel < 2; stlbsel++)
> -		for (i = 0; i < vcpu_e500->guest_tlb_size[stlbsel]; i++)
> -			kvmppc_e500_shadow_release(vcpu_e500, stlbsel, i);
> +	for (i = 0; i < host_tlb0_entry_num; i++) {
> +		struct tlbe_ref *ref = &vcpu_e500->host_tlb0_ref[i];
> +
> +		kvmppc_e500_ref_release(ref);
> +	}
> 
> 	/* discard all guest mapping */
> 	_tlbil_all();
> 
> -	kfree(vcpu_e500->shadow_refs[1]);
> -	kfree(vcpu_e500->shadow_refs[0]);
> 	kfree(vcpu_e500->guest_tlb[1]);
> 	kfree(vcpu_e500->guest_tlb[0]);
> }
> +
> +int kvmppc_e500_mmu_init(void)
> +{
> +	host_tlb0_entry_num = mfspr(SPRN_TLB0CFG) & 0xFFF;
> +	host_tlb0_assoc = (mfspr(SPRN_TLB0CFG) >> 24) & 0xFF;
> +	for (; host_tlb0_assoc >> (host_tlb0_assoc_bit + 1); host_tlb0_assoc_bit++);

Ugh. While loop please.

The rest looks good from what I can tell - not being a BookE MMU expert that is.
I'm still worried about the userspace switch which results in a complete MMU flush. Couldn't we just allocate a new PID for that?


Alex


^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 1/2] kvm/e500v2: Remove shadow tlb
       [not found]               ` <4C8923D2.5070308-nmGgyN9QBj3QT0dZR+AlfA@public.gmane.org>
@ 2010-09-09 23:26                   ` Alexander Graf
  0 siblings, 0 replies; 46+ messages in thread
From: Alexander Graf @ 2010-09-09 23:26 UTC (permalink / raw)
  To: Hollis Blanchard
  Cc: Liu Yu-B13201, kvm-u79uwXL29TY76Z2rM5mHXA,
	kvm-ppc-u79uwXL29TY76Z2rM5mHXA


On 09.09.2010, at 20:13, Hollis Blanchard wrote:

> On 09/09/2010 04:16 AM, Liu Yu-B13201 wrote:
>> Yes, it's hard to resume TLB0. We only resume TLB1 in previous code.
>> But TLB1 is even more smaller (13 free entries) than 440,
>> So that it still has little possibility to get hit.
>> thus the resumption is useless.
>>   
> The only reason hits are unlikely in TLB1 is because you still don't have large page support in the host. Once you have that, you can use TLB1 for large guest mappings, and it will become extremely likely that you get hits in TLB1. This is true even if the guest wants 256MB but the host supports only e.g. 16MB large pages, and must split the guest mapping into multiple large host pages.
> 
> When will you have hugetlbfs for e500? That's going to make such a dramatic difference, I'm not sure it's worth investing time in optimizing the MMU code until then.

I'm not sure I agree. Sure, huge pages give another big win, but the state as is should at least be fast enough for prototyping.


Alex

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 1/2] kvm/e500v2: Remove shadow tlb
@ 2010-09-09 23:26                   ` Alexander Graf
  0 siblings, 0 replies; 46+ messages in thread
From: Alexander Graf @ 2010-09-09 23:26 UTC (permalink / raw)
  To: Hollis Blanchard
  Cc: Liu Yu-B13201, kvm-u79uwXL29TY76Z2rM5mHXA,
	kvm-ppc-u79uwXL29TY76Z2rM5mHXA


On 09.09.2010, at 20:13, Hollis Blanchard wrote:

> On 09/09/2010 04:16 AM, Liu Yu-B13201 wrote:
>> Yes, it's hard to resume TLB0. We only resume TLB1 in previous code.
>> But TLB1 is even more smaller (13 free entries) than 440,
>> So that it still has little possibility to get hit.
>> thus the resumption is useless.
>>   
> The only reason hits are unlikely in TLB1 is because you still don't have large page support in the host. Once you have that, you can use TLB1 for large guest mappings, and it will become extremely likely that you get hits in TLB1. This is true even if the guest wants 256MB but the host supports only e.g. 16MB large pages, and must split the guest mapping into multiple large host pages.
> 
> When will you have hugetlbfs for e500? That's going to make such a dramatic difference, I'm not sure it's worth investing time in optimizing the MMU code until then.

I'm not sure I agree. Sure, huge pages give another big win, but the state as is should at least be fast enough for prototyping.


Alex


^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 1/2] kvm/e500v2: Remove shadow tlb
       [not found]                   ` <C1EBC75A-91B0-49D9-8AD5-571C6BAF22B7-l3A5Bk7waGM@public.gmane.org>
@ 2010-09-09 23:39                       ` Hollis Blanchard
  0 siblings, 0 replies; 46+ messages in thread
From: Hollis Blanchard @ 2010-09-09 23:39 UTC (permalink / raw)
  To: Alexander Graf
  Cc: Liu Yu-B13201, kvm-u79uwXL29TY76Z2rM5mHXA,
	kvm-ppc-u79uwXL29TY76Z2rM5mHXA

On 09/09/2010 04:26 PM, Alexander Graf wrote:
> On 09.09.2010, at 20:13, Hollis Blanchard wrote:
>    
>> On 09/09/2010 04:16 AM, Liu Yu-B13201 wrote:
>>      
>>> Yes, it's hard to resume TLB0. We only resume TLB1 in previous code.
>>> But TLB1 is even more smaller (13 free entries) than 440,
>>> So that it still has little possibility to get hit.
>>> thus the resumption is useless.
>>>
>>>        
>> The only reason hits are unlikely in TLB1 is because you still don't have large page support in the host. Once you have that, you can use TLB1 for large guest mappings, and it will become extremely likely that you get hits in TLB1. This is true even if the guest wants 256MB but the host supports only e.g. 16MB large pages, and must split the guest mapping into multiple large host pages.
>>
>> When will you have hugetlbfs for e500? That's going to make such a dramatic difference, I'm not sure it's worth investing time in optimizing the MMU code until then.
>>      
> I'm not sure I agree. Sure, huge pages give another big win, but the state as is should at least be fast enough for prototyping.
>    
Sure, and it sounds like you can prototype with it already. My point is 
that, in your 80-20 rule of optimization, the 20% is going to change 
radically once large page support is in place.

Remember that the guest kernel is mapped with just a couple large pages. 
During guest Linux boot on 440, I think about half the boot time is 
spent TLB thrashing in the initcalls. Using TLB0 can ameliorate that for 
now, but why bother, since it doesn't help you towards the real solution?

I'm not saying this shouldn't be committed, if that's how you 
interpreted my comments, but in my opinion there are more useful things 
to do than continuing to optimize a path that is going to disappear in 
the future. Once you *do* have hugetlbfs in the host, you're not going 
to want to use TLB0 for guest TLB1 mappings any more anyways.

Hollis Blanchard
Mentor Graphics, Embedded Systems Division

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 1/2] kvm/e500v2: Remove shadow tlb
@ 2010-09-09 23:39                       ` Hollis Blanchard
  0 siblings, 0 replies; 46+ messages in thread
From: Hollis Blanchard @ 2010-09-09 23:39 UTC (permalink / raw)
  To: Alexander Graf
  Cc: Liu Yu-B13201, kvm-u79uwXL29TY76Z2rM5mHXA,
	kvm-ppc-u79uwXL29TY76Z2rM5mHXA

On 09/09/2010 04:26 PM, Alexander Graf wrote:
> On 09.09.2010, at 20:13, Hollis Blanchard wrote:
>    
>> On 09/09/2010 04:16 AM, Liu Yu-B13201 wrote:
>>      
>>> Yes, it's hard to resume TLB0. We only resume TLB1 in previous code.
>>> But TLB1 is even more smaller (13 free entries) than 440,
>>> So that it still has little possibility to get hit.
>>> thus the resumption is useless.
>>>
>>>        
>> The only reason hits are unlikely in TLB1 is because you still don't have large page support in the host. Once you have that, you can use TLB1 for large guest mappings, and it will become extremely likely that you get hits in TLB1. This is true even if the guest wants 256MB but the host supports only e.g. 16MB large pages, and must split the guest mapping into multiple large host pages.
>>
>> When will you have hugetlbfs for e500? That's going to make such a dramatic difference, I'm not sure it's worth investing time in optimizing the MMU code until then.
>>      
> I'm not sure I agree. Sure, huge pages give another big win, but the state as is should at least be fast enough for prototyping.
>    
Sure, and it sounds like you can prototype with it already. My point is 
that, in your 80-20 rule of optimization, the 20% is going to change 
radically once large page support is in place.

Remember that the guest kernel is mapped with just a couple large pages. 
During guest Linux boot on 440, I think about half the boot time is 
spent TLB thrashing in the initcalls. Using TLB0 can ameliorate that for 
now, but why bother, since it doesn't help you towards the real solution?

I'm not saying this shouldn't be committed, if that's how you 
interpreted my comments, but in my opinion there are more useful things 
to do than continuing to optimize a path that is going to disappear in 
the future. Once you *do* have hugetlbfs in the host, you're not going 
to want to use TLB0 for guest TLB1 mappings any more anyways.

Hollis Blanchard
Mentor Graphics, Embedded Systems Division



^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 1/2] kvm/e500v2: Remove shadow tlb
  2010-09-09 23:39                       ` Hollis Blanchard
@ 2010-09-09 23:42                         ` Alexander Graf
  -1 siblings, 0 replies; 46+ messages in thread
From: Alexander Graf @ 2010-09-09 23:42 UTC (permalink / raw)
  To: Hollis Blanchard; +Cc: Liu Yu-B13201, kvm, kvm-ppc


On 10.09.2010, at 01:39, Hollis Blanchard wrote:

> On 09/09/2010 04:26 PM, Alexander Graf wrote:
>> On 09.09.2010, at 20:13, Hollis Blanchard wrote:
>>   
>>> On 09/09/2010 04:16 AM, Liu Yu-B13201 wrote:
>>>     
>>>> Yes, it's hard to resume TLB0. We only resume TLB1 in previous code.
>>>> But TLB1 is even more smaller (13 free entries) than 440,
>>>> So that it still has little possibility to get hit.
>>>> thus the resumption is useless.
>>>> 
>>>>       
>>> The only reason hits are unlikely in TLB1 is because you still don't have large page support in the host. Once you have that, you can use TLB1 for large guest mappings, and it will become extremely likely that you get hits in TLB1. This is true even if the guest wants 256MB but the host supports only e.g. 16MB large pages, and must split the guest mapping into multiple large host pages.
>>> 
>>> When will you have hugetlbfs for e500? That's going to make such a dramatic difference, I'm not sure it's worth investing time in optimizing the MMU code until then.
>>>     
>> I'm not sure I agree. Sure, huge pages give another big win, but the state as is should at least be fast enough for prototyping.
>>   
> Sure, and it sounds like you can prototype with it already. My point is that, in your 80-20 rule of optimization, the 20% is going to change radically once large page support is in place.
> 
> Remember that the guest kernel is mapped with just a couple large pages. During guest Linux boot on 440, I think about half the boot time is spent TLB thrashing in the initcalls. Using TLB0 can ameliorate that for now, but why bother, since it doesn't help you towards the real solution?
> 
> I'm not saying this shouldn't be committed, if that's how you interpreted my comments, but in my opinion there are more useful things to do than continuing to optimize a path that is going to disappear in the future. Once you *do* have hugetlbfs in the host, you're not going to want to use TLB0 for guest TLB1 mappings any more anyways.

That depends on the use cases. As long as there are no transparent huge pages available, not using hugetlbfs gives you a lot of benefit:

  - ksm
  - swapping
  - lazy allocation

So while I agree that supporting huge pages is crucial to high performance kvm, I'm not convinced it's the only path to optimize for. Look at x86 - few people actually use hugetlbfs there.


Alex


^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 1/2] kvm/e500v2: Remove shadow tlb
@ 2010-09-09 23:42                         ` Alexander Graf
  0 siblings, 0 replies; 46+ messages in thread
From: Alexander Graf @ 2010-09-09 23:42 UTC (permalink / raw)
  To: Hollis Blanchard; +Cc: Liu Yu-B13201, kvm, kvm-ppc


On 10.09.2010, at 01:39, Hollis Blanchard wrote:

> On 09/09/2010 04:26 PM, Alexander Graf wrote:
>> On 09.09.2010, at 20:13, Hollis Blanchard wrote:
>>   
>>> On 09/09/2010 04:16 AM, Liu Yu-B13201 wrote:
>>>     
>>>> Yes, it's hard to resume TLB0. We only resume TLB1 in previous code.
>>>> But TLB1 is even more smaller (13 free entries) than 440,
>>>> So that it still has little possibility to get hit.
>>>> thus the resumption is useless.
>>>> 
>>>>       
>>> The only reason hits are unlikely in TLB1 is because you still don't have large page support in the host. Once you have that, you can use TLB1 for large guest mappings, and it will become extremely likely that you get hits in TLB1. This is true even if the guest wants 256MB but the host supports only e.g. 16MB large pages, and must split the guest mapping into multiple large host pages.
>>> 
>>> When will you have hugetlbfs for e500? That's going to make such a dramatic difference, I'm not sure it's worth investing time in optimizing the MMU code until then.
>>>     
>> I'm not sure I agree. Sure, huge pages give another big win, but the state as is should at least be fast enough for prototyping.
>>   
> Sure, and it sounds like you can prototype with it already. My point is that, in your 80-20 rule of optimization, the 20% is going to change radically once large page support is in place.
> 
> Remember that the guest kernel is mapped with just a couple large pages. During guest Linux boot on 440, I think about half the boot time is spent TLB thrashing in the initcalls. Using TLB0 can ameliorate that for now, but why bother, since it doesn't help you towards the real solution?
> 
> I'm not saying this shouldn't be committed, if that's how you interpreted my comments, but in my opinion there are more useful things to do than continuing to optimize a path that is going to disappear in the future. Once you *do* have hugetlbfs in the host, you're not going to want to use TLB0 for guest TLB1 mappings any more anyways.

That depends on the use cases. As long as there are no transparent huge pages available, not using hugetlbfs gives you a lot of benefit:

  - ksm
  - swapping
  - lazy allocation

So while I agree that supporting huge pages is crucial to high performance kvm, I'm not convinced it's the only path to optimize for. Look at x86 - few people actually use hugetlbfs there.


Alex


^ permalink raw reply	[flat|nested] 46+ messages in thread

* RE: [PATCH 0/2] kvm/e500v2: MMU optimization
  2010-09-09 16:23             ` Hollis Blanchard
@ 2010-09-10  2:59               ` Liu Yu-B13201
  -1 siblings, 0 replies; 46+ messages in thread
From: Liu Yu-B13201 @ 2010-09-10  2:59 UTC (permalink / raw)
  To: Hollis Blanchard; +Cc: kvm, kvm-ppc, agraf

 

> -----Original Message-----
> From: Hollis Blanchard [mailto:hollis_blanchard@mentor.com] 
> Sent: Friday, September 10, 2010 12:23 AM
> To: Liu Yu-B13201
> Cc: kvm@vger.kernel.org; kvm-ppc@vger.kernel.org; agraf@suse.de
> Subject: Re: [PATCH 0/2] kvm/e500v2: MMU optimization
> 
> >>
> >>      
> > Hi Hollis,
> >
> > Guest uses AS=1 and host uses AS=0,
> > so even guest uses the same TID with host, they're in 
> different space.
> >
> > Then why guest needs to care about host TID?
> >
> >    
> You're absolutely right, but this makes a couple key assumptions:
> 1. The guest doesn't try to use AS=1. This is already false in Linux, 
> because the udbg code uses an AS=1 mapping for the UART, but 
> this can be 
> configured out (with a small loss in functionality). In 
> non-Linux guests 
> the AS=0 restriction could be onerous.

We could map (guest AS, guest TID) to (shadow TID),
So that we still don't need to bother host.

> 2. A Book E MMU. If we participate in the host "MMU context" 
> allocation, 
> the guest -> host address space code could be generalized to include 
> e.g. an e600 guest on an e500 host, or vice versa.
> 

Hmm.. Not sure it's a real requirement.


Thanks,
Yu


^ permalink raw reply	[flat|nested] 46+ messages in thread

* RE: [PATCH 0/2] kvm/e500v2: MMU optimization
@ 2010-09-10  2:59               ` Liu Yu-B13201
  0 siblings, 0 replies; 46+ messages in thread
From: Liu Yu-B13201 @ 2010-09-10  2:59 UTC (permalink / raw)
  To: Hollis Blanchard; +Cc: kvm, kvm-ppc, agraf

 

> -----Original Message-----
> From: Hollis Blanchard [mailto:hollis_blanchard@mentor.com] 
> Sent: Friday, September 10, 2010 12:23 AM
> To: Liu Yu-B13201
> Cc: kvm@vger.kernel.org; kvm-ppc@vger.kernel.org; agraf@suse.de
> Subject: Re: [PATCH 0/2] kvm/e500v2: MMU optimization
> 
> >>
> >>      
> > Hi Hollis,
> >
> > Guest uses AS=1 and host uses AS=0,
> > so even guest uses the same TID with host, they're in 
> different space.
> >
> > Then why guest needs to care about host TID?
> >
> >    
> You're absolutely right, but this makes a couple key assumptions:
> 1. The guest doesn't try to use AS=1. This is already false in Linux, 
> because the udbg code uses an AS=1 mapping for the UART, but 
> this can be 
> configured out (with a small loss in functionality). In 
> non-Linux guests 
> the AS=0 restriction could be onerous.

We could map (guest AS, guest TID) to (shadow TID),
So that we still don't need to bother host.

> 2. A Book E MMU. If we participate in the host "MMU context" 
> allocation, 
> the guest -> host address space code could be generalized to include 
> e.g. an e600 guest on an e500 host, or vice versa.
> 

Hmm.. Not sure it's a real requirement.


Thanks,
Yu


^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 2/2] kvm/e500v2: mapping guest TLB1 to host TLB0
       [not found]           ` <E759E1E3-7457-41B7-B36E-75FA42E107B1-l3A5Bk7waGM@public.gmane.org>
@ 2010-09-15  9:29               ` Alexander Graf
  0 siblings, 0 replies; 46+ messages in thread
From: Alexander Graf @ 2010-09-15  9:29 UTC (permalink / raw)
  To: Alexander Graf
  Cc: Liu Yu, kvm-u79uwXL29TY76Z2rM5mHXA, kvm-ppc-u79uwXL29TY76Z2rM5mHXA

Liu, ping?


On 10.09.2010, at 01:24, Alexander Graf wrote:

> 
> On 08.09.2010, at 11:40, Liu Yu wrote:
> 
>> Current guest TLB1 is mapped to host TLB1.
>> As host kernel only provides 4K uncontinuous pages,
>> we have to break guest large mapping into 4K shadow mappings.
>> These 4K shadow mappings are then mapped into host TLB1 on fly.
>> As host TLB1 only has 13 free entries, there's serious tlb miss.
>> 
>> Since e500v2 has a big number of TLB0 entries,
>> it should be help to map those 4K shadow mappings to host TLB0.
>> To achieve this, we need to unlink guest tlb and host tlb,
>> So that guest TLB1 mappings can route to any host TLB0 entries freely.
>> 
>> Pages/mappings are considerred in the same kind as host tlb entry.
>> This patch remove the link between pages and guest tlb entry to do the unlink.
>> And keep host_tlb0_ref in each vcpu to trace pages.
>> Then it's easy to map guest TLB1 to host TLB0.
>> 
>> In guest ramdisk boot test(guest mainly uses TLB1),
>> with this patch, the tlb miss number get down 90%.
>> 
>> Signed-off-by: Liu Yu <yu.liu-KZfg59tc24xl57MIdRCFDg@public.gmane.org>
>> ---
>> arch/powerpc/include/asm/kvm_e500.h |    7 +-
>> arch/powerpc/kvm/e500.c             |    4 +
>> arch/powerpc/kvm/e500_tlb.c         |  280 ++++++++++++-----------------------
>> arch/powerpc/kvm/e500_tlb.h         |    1 +
>> 4 files changed, 104 insertions(+), 188 deletions(-)

[snip]


Alex--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 2/2] kvm/e500v2: mapping guest TLB1 to host TLB0
@ 2010-09-15  9:29               ` Alexander Graf
  0 siblings, 0 replies; 46+ messages in thread
From: Alexander Graf @ 2010-09-15  9:29 UTC (permalink / raw)
  To: Alexander Graf
  Cc: Liu Yu, kvm-u79uwXL29TY76Z2rM5mHXA, kvm-ppc-u79uwXL29TY76Z2rM5mHXA

Liu, ping?


On 10.09.2010, at 01:24, Alexander Graf wrote:

> 
> On 08.09.2010, at 11:40, Liu Yu wrote:
> 
>> Current guest TLB1 is mapped to host TLB1.
>> As host kernel only provides 4K uncontinuous pages,
>> we have to break guest large mapping into 4K shadow mappings.
>> These 4K shadow mappings are then mapped into host TLB1 on fly.
>> As host TLB1 only has 13 free entries, there's serious tlb miss.
>> 
>> Since e500v2 has a big number of TLB0 entries,
>> it should be help to map those 4K shadow mappings to host TLB0.
>> To achieve this, we need to unlink guest tlb and host tlb,
>> So that guest TLB1 mappings can route to any host TLB0 entries freely.
>> 
>> Pages/mappings are considerred in the same kind as host tlb entry.
>> This patch remove the link between pages and guest tlb entry to do the unlink.
>> And keep host_tlb0_ref in each vcpu to trace pages.
>> Then it's easy to map guest TLB1 to host TLB0.
>> 
>> In guest ramdisk boot test(guest mainly uses TLB1),
>> with this patch, the tlb miss number get down 90%.
>> 
>> Signed-off-by: Liu Yu <yu.liu@freescale.com>
>> ---
>> arch/powerpc/include/asm/kvm_e500.h |    7 +-
>> arch/powerpc/kvm/e500.c             |    4 +
>> arch/powerpc/kvm/e500_tlb.c         |  280 ++++++++++++-----------------------
>> arch/powerpc/kvm/e500_tlb.h         |    1 +
>> 4 files changed, 104 insertions(+), 188 deletions(-)

[snip]


Alex

^ permalink raw reply	[flat|nested] 46+ messages in thread

* RE: [PATCH 2/2] kvm/e500v2: mapping guest TLB1 to host TLB0
  2010-09-09 23:24           ` Alexander Graf
@ 2010-09-16 11:35             ` Liu Yu-B13201
  -1 siblings, 0 replies; 46+ messages in thread
From: Liu Yu-B13201 @ 2010-09-16 11:35 UTC (permalink / raw)
  To: Alexander Graf; +Cc: kvm, kvm-ppc

 

> -----Original Message-----
> From: Alexander Graf [mailto:agraf@suse.de] 
> Sent: Friday, September 10, 2010 7:24 AM
> To: Liu Yu-B13201
> Cc: kvm@vger.kernel.org; kvm-ppc@vger.kernel.org
> Subject: Re: [PATCH 2/2] kvm/e500v2: mapping guest TLB1 to host TLB0
> 
> 
> On 08.09.2010, at 11:40, Liu Yu wrote:
> 
> > Current guest TLB1 is mapped to host TLB1.
> > As host kernel only provides 4K uncontinuous pages,
> > we have to break guest large mapping into 4K shadow mappings.
> > These 4K shadow mappings are then mapped into host TLB1 on fly.
> > As host TLB1 only has 13 free entries, there's serious tlb miss.
> > 
> > Since e500v2 has a big number of TLB0 entries,
> > it should be help to map those 4K shadow mappings to host TLB0.
> > To achieve this, we need to unlink guest tlb and host tlb,
> > So that guest TLB1 mappings can route to any host TLB0 
> entries freely.
> > 
> > Pages/mappings are considerred in the same kind as host tlb entry.
> > This patch remove the link between pages and guest tlb 
> entry to do the unlink.
> > And keep host_tlb0_ref in each vcpu to trace pages.
> > Then it's easy to map guest TLB1 to host TLB0.
> > 
> > In guest ramdisk boot test(guest mainly uses TLB1),
> > with this patch, the tlb miss number get down 90%.
> > 
> > Signed-off-by: Liu Yu <yu.liu@freescale.com>
> > ---
> > arch/powerpc/include/asm/kvm_e500.h |    7 +-
> > arch/powerpc/kvm/e500.c             |    4 +
> > arch/powerpc/kvm/e500_tlb.c         |  280 
> ++++++++++++-----------------------
> > arch/powerpc/kvm/e500_tlb.h         |    1 +
> > 4 files changed, 104 insertions(+), 188 deletions(-)
> > 

> 
> > 
> > -static unsigned int tlb1_entry_num;
> > +static inline unsigned int get_tlb0_entry_offset(u32 
> eaddr, u32 esel)
> > +{
> > +	return ((eaddr & 0x7F000) >> (12 - host_tlb0_assoc_bit) |
> > +			(esel & (host_tlb0_assoc - 1)));
> > +}
> > 
> > void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu)
> > {
> > @@ -62,11 +68,6 @@ static inline unsigned int tlb0_get_next_victim(
> > 	return victim;
> > }
> > 
> > -static inline unsigned int tlb1_max_shadow_size(void)
> > -{
> > -	return tlb1_entry_num - tlbcam_index;
> > -}
> > -
> > static inline int tlbe_is_writable(struct tlbe *tlbe)
> > {
> > 	return tlbe->mas3 & (MAS3_SW|MAS3_UW);
> > @@ -100,7 +101,7 @@ static inline u32 
> e500_shadow_mas2_attrib(u32 mas2, int usermode)
> > /*
> >  * writing shadow tlb entry to host TLB
> >  */
> > -static inline void __write_host_tlbe(struct tlbe *stlbe)
> > +static inline void __host_tlbe_write(struct tlbe *stlbe)
> > {
> > 	mtspr(SPRN_MAS1, stlbe->mas1);
> > 	mtspr(SPRN_MAS2, stlbe->mas2);
> > @@ -109,25 +110,22 @@ static inline void 
> __write_host_tlbe(struct tlbe *stlbe)
> > 	__asm__ __volatile__ ("tlbwe\n" : : );
> > }
> > 
> > -static inline void write_host_tlbe(struct kvmppc_vcpu_e500 
> *vcpu_e500,
> > -		int tlbsel, int esel, struct tlbe *stlbe)
> > +static inline u32 host_tlb0_write(struct kvmppc_vcpu_e500 
> *vcpu_e500,
> > +                                   u32 gvaddr, struct tlbe *stlbe)
> > {
> > -	local_irq_disable();
> > -	if (tlbsel == 0) {
> > -		__write_host_tlbe(stlbe);
> > -	} else {
> > -		unsigned register mas0;
> > +	unsigned register mas0;
> > 
> > -		mas0 = mfspr(SPRN_MAS0);
> > +	local_irq_disable();
> 
> Do you have to disable interrupts for a tlb write? If you get 
> preempted before the write, the entry you overwrite could be 
> different. But you don't protect against that either way. And 
> if you get preempted afterwards, you could lose the entry. 
> But since you enable interrupts beyond that, that could 
> happen either way too.
> 
> So what's the reason for the disable here?
> 

Hello Alex,

Doesn't local_irq_disable() also disable preempt?
The reason to disable interrupts is because it's possible to have tlb
misses in interrupt service route.


Thanks,
Yu


^ permalink raw reply	[flat|nested] 46+ messages in thread

* RE: [PATCH 2/2] kvm/e500v2: mapping guest TLB1 to host TLB0
@ 2010-09-16 11:35             ` Liu Yu-B13201
  0 siblings, 0 replies; 46+ messages in thread
From: Liu Yu-B13201 @ 2010-09-16 11:35 UTC (permalink / raw)
  To: Alexander Graf; +Cc: kvm, kvm-ppc

 

> -----Original Message-----
> From: Alexander Graf [mailto:agraf@suse.de] 
> Sent: Friday, September 10, 2010 7:24 AM
> To: Liu Yu-B13201
> Cc: kvm@vger.kernel.org; kvm-ppc@vger.kernel.org
> Subject: Re: [PATCH 2/2] kvm/e500v2: mapping guest TLB1 to host TLB0
> 
> 
> On 08.09.2010, at 11:40, Liu Yu wrote:
> 
> > Current guest TLB1 is mapped to host TLB1.
> > As host kernel only provides 4K uncontinuous pages,
> > we have to break guest large mapping into 4K shadow mappings.
> > These 4K shadow mappings are then mapped into host TLB1 on fly.
> > As host TLB1 only has 13 free entries, there's serious tlb miss.
> > 
> > Since e500v2 has a big number of TLB0 entries,
> > it should be help to map those 4K shadow mappings to host TLB0.
> > To achieve this, we need to unlink guest tlb and host tlb,
> > So that guest TLB1 mappings can route to any host TLB0 
> entries freely.
> > 
> > Pages/mappings are considerred in the same kind as host tlb entry.
> > This patch remove the link between pages and guest tlb 
> entry to do the unlink.
> > And keep host_tlb0_ref in each vcpu to trace pages.
> > Then it's easy to map guest TLB1 to host TLB0.
> > 
> > In guest ramdisk boot test(guest mainly uses TLB1),
> > with this patch, the tlb miss number get down 90%.
> > 
> > Signed-off-by: Liu Yu <yu.liu@freescale.com>
> > ---
> > arch/powerpc/include/asm/kvm_e500.h |    7 +-
> > arch/powerpc/kvm/e500.c             |    4 +
> > arch/powerpc/kvm/e500_tlb.c         |  280 
> ++++++++++++-----------------------
> > arch/powerpc/kvm/e500_tlb.h         |    1 +
> > 4 files changed, 104 insertions(+), 188 deletions(-)
> > 

> 
> > 
> > -static unsigned int tlb1_entry_num;
> > +static inline unsigned int get_tlb0_entry_offset(u32 
> eaddr, u32 esel)
> > +{
> > +	return ((eaddr & 0x7F000) >> (12 - host_tlb0_assoc_bit) |
> > +			(esel & (host_tlb0_assoc - 1)));
> > +}
> > 
> > void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu)
> > {
> > @@ -62,11 +68,6 @@ static inline unsigned int tlb0_get_next_victim(
> > 	return victim;
> > }
> > 
> > -static inline unsigned int tlb1_max_shadow_size(void)
> > -{
> > -	return tlb1_entry_num - tlbcam_index;
> > -}
> > -
> > static inline int tlbe_is_writable(struct tlbe *tlbe)
> > {
> > 	return tlbe->mas3 & (MAS3_SW|MAS3_UW);
> > @@ -100,7 +101,7 @@ static inline u32 
> e500_shadow_mas2_attrib(u32 mas2, int usermode)
> > /*
> >  * writing shadow tlb entry to host TLB
> >  */
> > -static inline void __write_host_tlbe(struct tlbe *stlbe)
> > +static inline void __host_tlbe_write(struct tlbe *stlbe)
> > {
> > 	mtspr(SPRN_MAS1, stlbe->mas1);
> > 	mtspr(SPRN_MAS2, stlbe->mas2);
> > @@ -109,25 +110,22 @@ static inline void 
> __write_host_tlbe(struct tlbe *stlbe)
> > 	__asm__ __volatile__ ("tlbwe\n" : : );
> > }
> > 
> > -static inline void write_host_tlbe(struct kvmppc_vcpu_e500 
> *vcpu_e500,
> > -		int tlbsel, int esel, struct tlbe *stlbe)
> > +static inline u32 host_tlb0_write(struct kvmppc_vcpu_e500 
> *vcpu_e500,
> > +                                   u32 gvaddr, struct tlbe *stlbe)
> > {
> > -	local_irq_disable();
> > -	if (tlbsel = 0) {
> > -		__write_host_tlbe(stlbe);
> > -	} else {
> > -		unsigned register mas0;
> > +	unsigned register mas0;
> > 
> > -		mas0 = mfspr(SPRN_MAS0);
> > +	local_irq_disable();
> 
> Do you have to disable interrupts for a tlb write? If you get 
> preempted before the write, the entry you overwrite could be 
> different. But you don't protect against that either way. And 
> if you get preempted afterwards, you could lose the entry. 
> But since you enable interrupts beyond that, that could 
> happen either way too.
> 
> So what's the reason for the disable here?
> 

Hello Alex,

Doesn't local_irq_disable() also disable preempt?
The reason to disable interrupts is because it's possible to have tlb
misses in interrupt service route.


Thanks,
Yu


^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 2/2] kvm/e500v2: mapping guest TLB1 to host TLB0
       [not found]             ` <A9833F0F7901024DA08417AA5A9887D921DD39-bKEhWGtIRUJ4Lp7cDGe+DVjVikpgYyvb5NbjCUgZEJk@public.gmane.org>
@ 2010-09-16 11:44                 ` Alexander Graf
  0 siblings, 0 replies; 46+ messages in thread
From: Alexander Graf @ 2010-09-16 11:44 UTC (permalink / raw)
  To: Liu Yu-B13201; +Cc: kvm-u79uwXL29TY76Z2rM5mHXA, kvm-ppc-u79uwXL29TY76Z2rM5mHXA

Liu Yu-B13201 wrote:
>  
>
>   
>> -----Original Message-----
>> From: Alexander Graf [mailto:agraf-l3A5Bk7waGM@public.gmane.org] 
>> Sent: Friday, September 10, 2010 7:24 AM
>> To: Liu Yu-B13201
>> Cc: kvm-u79uwXL29TY76Z2rM5mHXA@public.gmane.org; kvm-ppc-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
>> Subject: Re: [PATCH 2/2] kvm/e500v2: mapping guest TLB1 to host TLB0
>>
>>
>> On 08.09.2010, at 11:40, Liu Yu wrote:
>>
>>     
>>> Current guest TLB1 is mapped to host TLB1.
>>> As host kernel only provides 4K uncontinuous pages,
>>> we have to break guest large mapping into 4K shadow mappings.
>>> These 4K shadow mappings are then mapped into host TLB1 on fly.
>>> As host TLB1 only has 13 free entries, there's serious tlb miss.
>>>
>>> Since e500v2 has a big number of TLB0 entries,
>>> it should be help to map those 4K shadow mappings to host TLB0.
>>> To achieve this, we need to unlink guest tlb and host tlb,
>>> So that guest TLB1 mappings can route to any host TLB0 
>>>       
>> entries freely.
>>     
>>> Pages/mappings are considerred in the same kind as host tlb entry.
>>> This patch remove the link between pages and guest tlb 
>>>       
>> entry to do the unlink.
>>     
>>> And keep host_tlb0_ref in each vcpu to trace pages.
>>> Then it's easy to map guest TLB1 to host TLB0.
>>>
>>> In guest ramdisk boot test(guest mainly uses TLB1),
>>> with this patch, the tlb miss number get down 90%.
>>>
>>> Signed-off-by: Liu Yu <yu.liu-KZfg59tc24xl57MIdRCFDg@public.gmane.org>
>>> ---
>>> arch/powerpc/include/asm/kvm_e500.h |    7 +-
>>> arch/powerpc/kvm/e500.c             |    4 +
>>> arch/powerpc/kvm/e500_tlb.c         |  280 
>>>       
>> ++++++++++++-----------------------
>>     
>>> arch/powerpc/kvm/e500_tlb.h         |    1 +
>>> 4 files changed, 104 insertions(+), 188 deletions(-)
>>>
>>>       
>
>   
>>> -static unsigned int tlb1_entry_num;
>>> +static inline unsigned int get_tlb0_entry_offset(u32 
>>>       
>> eaddr, u32 esel)
>>     
>>> +{
>>> +	return ((eaddr & 0x7F000) >> (12 - host_tlb0_assoc_bit) |
>>> +			(esel & (host_tlb0_assoc - 1)));
>>> +}
>>>
>>> void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu)
>>> {
>>> @@ -62,11 +68,6 @@ static inline unsigned int tlb0_get_next_victim(
>>> 	return victim;
>>> }
>>>
>>> -static inline unsigned int tlb1_max_shadow_size(void)
>>> -{
>>> -	return tlb1_entry_num - tlbcam_index;
>>> -}
>>> -
>>> static inline int tlbe_is_writable(struct tlbe *tlbe)
>>> {
>>> 	return tlbe->mas3 & (MAS3_SW|MAS3_UW);
>>> @@ -100,7 +101,7 @@ static inline u32 
>>>       
>> e500_shadow_mas2_attrib(u32 mas2, int usermode)
>>     
>>> /*
>>>  * writing shadow tlb entry to host TLB
>>>  */
>>> -static inline void __write_host_tlbe(struct tlbe *stlbe)
>>> +static inline void __host_tlbe_write(struct tlbe *stlbe)
>>> {
>>> 	mtspr(SPRN_MAS1, stlbe->mas1);
>>> 	mtspr(SPRN_MAS2, stlbe->mas2);
>>> @@ -109,25 +110,22 @@ static inline void 
>>>       
>> __write_host_tlbe(struct tlbe *stlbe)
>>     
>>> 	__asm__ __volatile__ ("tlbwe\n" : : );
>>> }
>>>
>>> -static inline void write_host_tlbe(struct kvmppc_vcpu_e500 
>>>       
>> *vcpu_e500,
>>     
>>> -		int tlbsel, int esel, struct tlbe *stlbe)
>>> +static inline u32 host_tlb0_write(struct kvmppc_vcpu_e500 
>>>       
>> *vcpu_e500,
>>     
>>> +                                   u32 gvaddr, struct tlbe *stlbe)
>>> {
>>> -	local_irq_disable();
>>> -	if (tlbsel == 0) {
>>> -		__write_host_tlbe(stlbe);
>>> -	} else {
>>> -		unsigned register mas0;
>>> +	unsigned register mas0;
>>>
>>> -		mas0 = mfspr(SPRN_MAS0);
>>> +	local_irq_disable();
>>>       
>> Do you have to disable interrupts for a tlb write? If you get 
>> preempted before the write, the entry you overwrite could be 
>> different. But you don't protect against that either way. And 
>> if you get preempted afterwards, you could lose the entry. 
>> But since you enable interrupts beyond that, that could 
>> happen either way too.
>>
>> So what's the reason for the disable here?
>>
>>     
>
> Hello Alex,
>
> Doesn't local_irq_disable() also disable preempt?
> The reason to disable interrupts is because it's possible to have tlb
> misses in interrupt service route.
>   

Yes, local_irq_disable disables preempt. That's exactly what I was
referring to :).
I don't understand the statement about the service route. A tlb miss
still arrives with local_irq_disable.


Alex

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 2/2] kvm/e500v2: mapping guest TLB1 to host TLB0
@ 2010-09-16 11:44                 ` Alexander Graf
  0 siblings, 0 replies; 46+ messages in thread
From: Alexander Graf @ 2010-09-16 11:44 UTC (permalink / raw)
  To: Liu Yu-B13201; +Cc: kvm-u79uwXL29TY76Z2rM5mHXA, kvm-ppc-u79uwXL29TY76Z2rM5mHXA

Liu Yu-B13201 wrote:
>  
>
>   
>> -----Original Message-----
>> From: Alexander Graf [mailto:agraf@suse.de] 
>> Sent: Friday, September 10, 2010 7:24 AM
>> To: Liu Yu-B13201
>> Cc: kvm@vger.kernel.org; kvm-ppc@vger.kernel.org
>> Subject: Re: [PATCH 2/2] kvm/e500v2: mapping guest TLB1 to host TLB0
>>
>>
>> On 08.09.2010, at 11:40, Liu Yu wrote:
>>
>>     
>>> Current guest TLB1 is mapped to host TLB1.
>>> As host kernel only provides 4K uncontinuous pages,
>>> we have to break guest large mapping into 4K shadow mappings.
>>> These 4K shadow mappings are then mapped into host TLB1 on fly.
>>> As host TLB1 only has 13 free entries, there's serious tlb miss.
>>>
>>> Since e500v2 has a big number of TLB0 entries,
>>> it should be help to map those 4K shadow mappings to host TLB0.
>>> To achieve this, we need to unlink guest tlb and host tlb,
>>> So that guest TLB1 mappings can route to any host TLB0 
>>>       
>> entries freely.
>>     
>>> Pages/mappings are considerred in the same kind as host tlb entry.
>>> This patch remove the link between pages and guest tlb 
>>>       
>> entry to do the unlink.
>>     
>>> And keep host_tlb0_ref in each vcpu to trace pages.
>>> Then it's easy to map guest TLB1 to host TLB0.
>>>
>>> In guest ramdisk boot test(guest mainly uses TLB1),
>>> with this patch, the tlb miss number get down 90%.
>>>
>>> Signed-off-by: Liu Yu <yu.liu@freescale.com>
>>> ---
>>> arch/powerpc/include/asm/kvm_e500.h |    7 +-
>>> arch/powerpc/kvm/e500.c             |    4 +
>>> arch/powerpc/kvm/e500_tlb.c         |  280 
>>>       
>> ++++++++++++-----------------------
>>     
>>> arch/powerpc/kvm/e500_tlb.h         |    1 +
>>> 4 files changed, 104 insertions(+), 188 deletions(-)
>>>
>>>       
>
>   
>>> -static unsigned int tlb1_entry_num;
>>> +static inline unsigned int get_tlb0_entry_offset(u32 
>>>       
>> eaddr, u32 esel)
>>     
>>> +{
>>> +	return ((eaddr & 0x7F000) >> (12 - host_tlb0_assoc_bit) |
>>> +			(esel & (host_tlb0_assoc - 1)));
>>> +}
>>>
>>> void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu)
>>> {
>>> @@ -62,11 +68,6 @@ static inline unsigned int tlb0_get_next_victim(
>>> 	return victim;
>>> }
>>>
>>> -static inline unsigned int tlb1_max_shadow_size(void)
>>> -{
>>> -	return tlb1_entry_num - tlbcam_index;
>>> -}
>>> -
>>> static inline int tlbe_is_writable(struct tlbe *tlbe)
>>> {
>>> 	return tlbe->mas3 & (MAS3_SW|MAS3_UW);
>>> @@ -100,7 +101,7 @@ static inline u32 
>>>       
>> e500_shadow_mas2_attrib(u32 mas2, int usermode)
>>     
>>> /*
>>>  * writing shadow tlb entry to host TLB
>>>  */
>>> -static inline void __write_host_tlbe(struct tlbe *stlbe)
>>> +static inline void __host_tlbe_write(struct tlbe *stlbe)
>>> {
>>> 	mtspr(SPRN_MAS1, stlbe->mas1);
>>> 	mtspr(SPRN_MAS2, stlbe->mas2);
>>> @@ -109,25 +110,22 @@ static inline void 
>>>       
>> __write_host_tlbe(struct tlbe *stlbe)
>>     
>>> 	__asm__ __volatile__ ("tlbwe\n" : : );
>>> }
>>>
>>> -static inline void write_host_tlbe(struct kvmppc_vcpu_e500 
>>>       
>> *vcpu_e500,
>>     
>>> -		int tlbsel, int esel, struct tlbe *stlbe)
>>> +static inline u32 host_tlb0_write(struct kvmppc_vcpu_e500 
>>>       
>> *vcpu_e500,
>>     
>>> +                                   u32 gvaddr, struct tlbe *stlbe)
>>> {
>>> -	local_irq_disable();
>>> -	if (tlbsel = 0) {
>>> -		__write_host_tlbe(stlbe);
>>> -	} else {
>>> -		unsigned register mas0;
>>> +	unsigned register mas0;
>>>
>>> -		mas0 = mfspr(SPRN_MAS0);
>>> +	local_irq_disable();
>>>       
>> Do you have to disable interrupts for a tlb write? If you get 
>> preempted before the write, the entry you overwrite could be 
>> different. But you don't protect against that either way. And 
>> if you get preempted afterwards, you could lose the entry. 
>> But since you enable interrupts beyond that, that could 
>> happen either way too.
>>
>> So what's the reason for the disable here?
>>
>>     
>
> Hello Alex,
>
> Doesn't local_irq_disable() also disable preempt?
> The reason to disable interrupts is because it's possible to have tlb
> misses in interrupt service route.
>   

Yes, local_irq_disable disables preempt. That's exactly what I was
referring to :).
I don't understand the statement about the service route. A tlb miss
still arrives with local_irq_disable.


Alex


^ permalink raw reply	[flat|nested] 46+ messages in thread

* RE: [PATCH 2/2] kvm/e500v2: mapping guest TLB1 to host TLB0
  2010-09-16 11:44                 ` Alexander Graf
@ 2010-09-17  8:47                   ` Liu Yu-B13201
  -1 siblings, 0 replies; 46+ messages in thread
From: Liu Yu-B13201 @ 2010-09-17  8:47 UTC (permalink / raw)
  To: Alexander Graf; +Cc: kvm, kvm-ppc

 

> -----Original Message-----
> From: kvm-ppc-owner@vger.kernel.org 
> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of Alexander Graf
> Sent: Thursday, September 16, 2010 7:44 PM
> To: Liu Yu-B13201
> Cc: kvm@vger.kernel.org; kvm-ppc@vger.kernel.org
> Subject: Re: [PATCH 2/2] kvm/e500v2: mapping guest TLB1 to host TLB0
> 
> >>     
> >>> /*
> >>>  * writing shadow tlb entry to host TLB
> >>>  */
> >>> -static inline void __write_host_tlbe(struct tlbe *stlbe)
> >>> +static inline void __host_tlbe_write(struct tlbe *stlbe)
> >>> {
> >>> 	mtspr(SPRN_MAS1, stlbe->mas1);
> >>> 	mtspr(SPRN_MAS2, stlbe->mas2);
> >>> @@ -109,25 +110,22 @@ static inline void 
> >>>       
> >> __write_host_tlbe(struct tlbe *stlbe)
> >>     
> >>> 	__asm__ __volatile__ ("tlbwe\n" : : );
> >>> }
> >>>
> >>> -static inline void write_host_tlbe(struct kvmppc_vcpu_e500 
> >>>       
> >> *vcpu_e500,
> >>     
> >>> -		int tlbsel, int esel, struct tlbe *stlbe)
> >>> +static inline u32 host_tlb0_write(struct kvmppc_vcpu_e500 
> >>>       
> >> *vcpu_e500,
> >>     
> >>> +                                   u32 gvaddr, struct 
> tlbe *stlbe)
> >>> {
> >>> -	local_irq_disable();
> >>> -	if (tlbsel == 0) {
> >>> -		__write_host_tlbe(stlbe);
> >>> -	} else {
> >>> -		unsigned register mas0;
> >>> +	unsigned register mas0;
> >>>
> >>> -		mas0 = mfspr(SPRN_MAS0);
> >>> +	local_irq_disable();
> >>>       
> >> Do you have to disable interrupts for a tlb write? If you get 
> >> preempted before the write, the entry you overwrite could be 
> >> different. But you don't protect against that either way. And 
> >> if you get preempted afterwards, you could lose the entry. 
> >> But since you enable interrupts beyond that, that could 
> >> happen either way too.
> >>
> >> So what's the reason for the disable here?
> >>
> >>     
> >
> > Hello Alex,
> >
> > Doesn't local_irq_disable() also disable preempt?
> > The reason to disable interrupts is because it's possible 
> to have tlb
> > misses in interrupt service route.
> >   
> 
> Yes, local_irq_disable disables preempt. That's exactly what I was
> referring to :).
> I don't understand the statement about the service route. A tlb miss
> still arrives with local_irq_disable.
> 
> 

Use local_irq_disable here is to make sure we update masN in atomic.
If get preempted when we update part of masN,
then we may write wrong TLB entry in hardware.

And local_irq_disable blocks external and decrement interrupts.
Althought it's unlikely to have tlb miss in external and decrement interrrupt handler of current kernel.
It still has the possibility..


Thanks,
Yu


^ permalink raw reply	[flat|nested] 46+ messages in thread

* RE: [PATCH 2/2] kvm/e500v2: mapping guest TLB1 to host TLB0
@ 2010-09-17  8:47                   ` Liu Yu-B13201
  0 siblings, 0 replies; 46+ messages in thread
From: Liu Yu-B13201 @ 2010-09-17  8:47 UTC (permalink / raw)
  To: Alexander Graf; +Cc: kvm, kvm-ppc

 

> -----Original Message-----
> From: kvm-ppc-owner@vger.kernel.org 
> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of Alexander Graf
> Sent: Thursday, September 16, 2010 7:44 PM
> To: Liu Yu-B13201
> Cc: kvm@vger.kernel.org; kvm-ppc@vger.kernel.org
> Subject: Re: [PATCH 2/2] kvm/e500v2: mapping guest TLB1 to host TLB0
> 
> >>     
> >>> /*
> >>>  * writing shadow tlb entry to host TLB
> >>>  */
> >>> -static inline void __write_host_tlbe(struct tlbe *stlbe)
> >>> +static inline void __host_tlbe_write(struct tlbe *stlbe)
> >>> {
> >>> 	mtspr(SPRN_MAS1, stlbe->mas1);
> >>> 	mtspr(SPRN_MAS2, stlbe->mas2);
> >>> @@ -109,25 +110,22 @@ static inline void 
> >>>       
> >> __write_host_tlbe(struct tlbe *stlbe)
> >>     
> >>> 	__asm__ __volatile__ ("tlbwe\n" : : );
> >>> }
> >>>
> >>> -static inline void write_host_tlbe(struct kvmppc_vcpu_e500 
> >>>       
> >> *vcpu_e500,
> >>     
> >>> -		int tlbsel, int esel, struct tlbe *stlbe)
> >>> +static inline u32 host_tlb0_write(struct kvmppc_vcpu_e500 
> >>>       
> >> *vcpu_e500,
> >>     
> >>> +                                   u32 gvaddr, struct 
> tlbe *stlbe)
> >>> {
> >>> -	local_irq_disable();
> >>> -	if (tlbsel = 0) {
> >>> -		__write_host_tlbe(stlbe);
> >>> -	} else {
> >>> -		unsigned register mas0;
> >>> +	unsigned register mas0;
> >>>
> >>> -		mas0 = mfspr(SPRN_MAS0);
> >>> +	local_irq_disable();
> >>>       
> >> Do you have to disable interrupts for a tlb write? If you get 
> >> preempted before the write, the entry you overwrite could be 
> >> different. But you don't protect against that either way. And 
> >> if you get preempted afterwards, you could lose the entry. 
> >> But since you enable interrupts beyond that, that could 
> >> happen either way too.
> >>
> >> So what's the reason for the disable here?
> >>
> >>     
> >
> > Hello Alex,
> >
> > Doesn't local_irq_disable() also disable preempt?
> > The reason to disable interrupts is because it's possible 
> to have tlb
> > misses in interrupt service route.
> >   
> 
> Yes, local_irq_disable disables preempt. That's exactly what I was
> referring to :).
> I don't understand the statement about the service route. A tlb miss
> still arrives with local_irq_disable.
> 
> 

Use local_irq_disable here is to make sure we update masN in atomic.
If get preempted when we update part of masN,
then we may write wrong TLB entry in hardware.

And local_irq_disable blocks external and decrement interrupts.
Althought it's unlikely to have tlb miss in external and decrement interrrupt handler of current kernel.
It still has the possibility..


Thanks,
Yu


^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 2/2] kvm/e500v2: mapping guest TLB1 to host TLB0
       [not found]                   ` <A9833F0F7901024DA08417AA5A9887D921DE82-bKEhWGtIRUJ4Lp7cDGe+DVjVikpgYyvb5NbjCUgZEJk@public.gmane.org>
@ 2010-09-17 10:19                       ` Alexander Graf
  0 siblings, 0 replies; 46+ messages in thread
From: Alexander Graf @ 2010-09-17 10:19 UTC (permalink / raw)
  To: Liu Yu-B13201; +Cc: kvm-u79uwXL29TY76Z2rM5mHXA, kvm-ppc-u79uwXL29TY76Z2rM5mHXA


On 17.09.2010, at 10:47, Liu Yu-B13201 wrote:

> 
> 
>> -----Original Message-----
>> From: kvm-ppc-owner-u79uwXL29TY76Z2rM5mHXA@public.gmane.org 
>> [mailto:kvm-ppc-owner-u79uwXL29TY76Z2rM5mHXA@public.gmane.org] On Behalf Of Alexander Graf
>> Sent: Thursday, September 16, 2010 7:44 PM
>> To: Liu Yu-B13201
>> Cc: kvm-u79uwXL29TY76Z2rM5mHXA@public.gmane.org; kvm-ppc-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
>> Subject: Re: [PATCH 2/2] kvm/e500v2: mapping guest TLB1 to host TLB0
>> 
>>>> 
>>>>> /*
>>>>> * writing shadow tlb entry to host TLB
>>>>> */
>>>>> -static inline void __write_host_tlbe(struct tlbe *stlbe)
>>>>> +static inline void __host_tlbe_write(struct tlbe *stlbe)
>>>>> {
>>>>> 	mtspr(SPRN_MAS1, stlbe->mas1);
>>>>> 	mtspr(SPRN_MAS2, stlbe->mas2);
>>>>> @@ -109,25 +110,22 @@ static inline void 
>>>>> 
>>>> __write_host_tlbe(struct tlbe *stlbe)
>>>> 
>>>>> 	__asm__ __volatile__ ("tlbwe\n" : : );
>>>>> }
>>>>> 
>>>>> -static inline void write_host_tlbe(struct kvmppc_vcpu_e500 
>>>>> 
>>>> *vcpu_e500,
>>>> 
>>>>> -		int tlbsel, int esel, struct tlbe *stlbe)
>>>>> +static inline u32 host_tlb0_write(struct kvmppc_vcpu_e500 
>>>>> 
>>>> *vcpu_e500,
>>>> 
>>>>> +                                   u32 gvaddr, struct 
>> tlbe *stlbe)
>>>>> {
>>>>> -	local_irq_disable();
>>>>> -	if (tlbsel == 0) {
>>>>> -		__write_host_tlbe(stlbe);
>>>>> -	} else {
>>>>> -		unsigned register mas0;
>>>>> +	unsigned register mas0;
>>>>> 
>>>>> -		mas0 = mfspr(SPRN_MAS0);
>>>>> +	local_irq_disable();
>>>>> 
>>>> Do you have to disable interrupts for a tlb write? If you get 
>>>> preempted before the write, the entry you overwrite could be 
>>>> different. But you don't protect against that either way. And 
>>>> if you get preempted afterwards, you could lose the entry. 
>>>> But since you enable interrupts beyond that, that could 
>>>> happen either way too.
>>>> 
>>>> So what's the reason for the disable here?
>>>> 
>>>> 
>>> 
>>> Hello Alex,
>>> 
>>> Doesn't local_irq_disable() also disable preempt?
>>> The reason to disable interrupts is because it's possible 
>> to have tlb
>>> misses in interrupt service route.
>>> 
>> 
>> Yes, local_irq_disable disables preempt. That's exactly what I was
>> referring to :).
>> I don't understand the statement about the service route. A tlb miss
>> still arrives with local_irq_disable.
>> 
>> 
> 
> Use local_irq_disable here is to make sure we update masN in atomic.
> If get preempted when we update part of masN,
> then we may write wrong TLB entry in hardware.

I see, makes sense. Doing the mfmsr of MAS0 in an irq disabled section doesn't make sense to me though. Also, wouldn't it be better to do the irq disabling inside of __host_tlbe_write using irqsave, not the explicit enable/disable?

> And local_irq_disable blocks external and decrement interrupts.
> Althought it's unlikely to have tlb miss in external and decrement interrrupt handler of current kernel.
> It still has the possibility..

Yup :).


Alex

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 2/2] kvm/e500v2: mapping guest TLB1 to host TLB0
@ 2010-09-17 10:19                       ` Alexander Graf
  0 siblings, 0 replies; 46+ messages in thread
From: Alexander Graf @ 2010-09-17 10:19 UTC (permalink / raw)
  To: Liu Yu-B13201; +Cc: kvm-u79uwXL29TY76Z2rM5mHXA, kvm-ppc-u79uwXL29TY76Z2rM5mHXA


On 17.09.2010, at 10:47, Liu Yu-B13201 wrote:

> 
> 
>> -----Original Message-----
>> From: kvm-ppc-owner@vger.kernel.org 
>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of Alexander Graf
>> Sent: Thursday, September 16, 2010 7:44 PM
>> To: Liu Yu-B13201
>> Cc: kvm@vger.kernel.org; kvm-ppc@vger.kernel.org
>> Subject: Re: [PATCH 2/2] kvm/e500v2: mapping guest TLB1 to host TLB0
>> 
>>>> 
>>>>> /*
>>>>> * writing shadow tlb entry to host TLB
>>>>> */
>>>>> -static inline void __write_host_tlbe(struct tlbe *stlbe)
>>>>> +static inline void __host_tlbe_write(struct tlbe *stlbe)
>>>>> {
>>>>> 	mtspr(SPRN_MAS1, stlbe->mas1);
>>>>> 	mtspr(SPRN_MAS2, stlbe->mas2);
>>>>> @@ -109,25 +110,22 @@ static inline void 
>>>>> 
>>>> __write_host_tlbe(struct tlbe *stlbe)
>>>> 
>>>>> 	__asm__ __volatile__ ("tlbwe\n" : : );
>>>>> }
>>>>> 
>>>>> -static inline void write_host_tlbe(struct kvmppc_vcpu_e500 
>>>>> 
>>>> *vcpu_e500,
>>>> 
>>>>> -		int tlbsel, int esel, struct tlbe *stlbe)
>>>>> +static inline u32 host_tlb0_write(struct kvmppc_vcpu_e500 
>>>>> 
>>>> *vcpu_e500,
>>>> 
>>>>> +                                   u32 gvaddr, struct 
>> tlbe *stlbe)
>>>>> {
>>>>> -	local_irq_disable();
>>>>> -	if (tlbsel = 0) {
>>>>> -		__write_host_tlbe(stlbe);
>>>>> -	} else {
>>>>> -		unsigned register mas0;
>>>>> +	unsigned register mas0;
>>>>> 
>>>>> -		mas0 = mfspr(SPRN_MAS0);
>>>>> +	local_irq_disable();
>>>>> 
>>>> Do you have to disable interrupts for a tlb write? If you get 
>>>> preempted before the write, the entry you overwrite could be 
>>>> different. But you don't protect against that either way. And 
>>>> if you get preempted afterwards, you could lose the entry. 
>>>> But since you enable interrupts beyond that, that could 
>>>> happen either way too.
>>>> 
>>>> So what's the reason for the disable here?
>>>> 
>>>> 
>>> 
>>> Hello Alex,
>>> 
>>> Doesn't local_irq_disable() also disable preempt?
>>> The reason to disable interrupts is because it's possible 
>> to have tlb
>>> misses in interrupt service route.
>>> 
>> 
>> Yes, local_irq_disable disables preempt. That's exactly what I was
>> referring to :).
>> I don't understand the statement about the service route. A tlb miss
>> still arrives with local_irq_disable.
>> 
>> 
> 
> Use local_irq_disable here is to make sure we update masN in atomic.
> If get preempted when we update part of masN,
> then we may write wrong TLB entry in hardware.

I see, makes sense. Doing the mfmsr of MAS0 in an irq disabled section doesn't make sense to me though. Also, wouldn't it be better to do the irq disabling inside of __host_tlbe_write using irqsave, not the explicit enable/disable?

> And local_irq_disable blocks external and decrement interrupts.
> Althought it's unlikely to have tlb miss in external and decrement interrrupt handler of current kernel.
> It still has the possibility..

Yup :).


Alex


^ permalink raw reply	[flat|nested] 46+ messages in thread

* RE: [PATCH 2/2] kvm/e500v2: mapping guest TLB1 to host TLB0
  2010-09-17 10:19                       ` Alexander Graf
@ 2010-09-17 11:28                         ` Liu Yu-B13201
  -1 siblings, 0 replies; 46+ messages in thread
From: Liu Yu-B13201 @ 2010-09-17 11:28 UTC (permalink / raw)
  To: Alexander Graf; +Cc: kvm, kvm-ppc

 

> -----Original Message-----
> From: Alexander Graf [mailto:agraf@suse.de] 
> Sent: Friday, September 17, 2010 6:20 PM
> To: Liu Yu-B13201
> Cc: kvm@vger.kernel.org; kvm-ppc@vger.kernel.org
> Subject: Re: [PATCH 2/2] kvm/e500v2: mapping guest TLB1 to host TLB0
> 
> 
> On 17.09.2010, at 10:47, Liu Yu-B13201 wrote:
> 
> > 
> > 
> >> -----Original Message-----
> >> From: kvm-ppc-owner@vger.kernel.org 
> >> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of Alexander Graf
> >> Sent: Thursday, September 16, 2010 7:44 PM
> >> To: Liu Yu-B13201
> >> Cc: kvm@vger.kernel.org; kvm-ppc@vger.kernel.org
> >> Subject: Re: [PATCH 2/2] kvm/e500v2: mapping guest TLB1 to 
> host TLB0
> >> 
> >>>> 
> >>>>> /*
> >>>>> * writing shadow tlb entry to host TLB
> >>>>> */
> >>>>> -static inline void __write_host_tlbe(struct tlbe *stlbe)
> >>>>> +static inline void __host_tlbe_write(struct tlbe *stlbe)
> >>>>> {
> >>>>> 	mtspr(SPRN_MAS1, stlbe->mas1);
> >>>>> 	mtspr(SPRN_MAS2, stlbe->mas2);
> >>>>> @@ -109,25 +110,22 @@ static inline void 
> >>>>> 
> >>>> __write_host_tlbe(struct tlbe *stlbe)
> >>>> 
> >>>>> 	__asm__ __volatile__ ("tlbwe\n" : : );
> >>>>> }
> >>>>> 
> >>>>> -static inline void write_host_tlbe(struct kvmppc_vcpu_e500 
> >>>>> 
> >>>> *vcpu_e500,
> >>>> 
> >>>>> -		int tlbsel, int esel, struct tlbe *stlbe)
> >>>>> +static inline u32 host_tlb0_write(struct kvmppc_vcpu_e500 
> >>>>> 
> >>>> *vcpu_e500,
> >>>> 
> >>>>> +                                   u32 gvaddr, struct 
> >> tlbe *stlbe)
> >>>>> {
> >>>>> -	local_irq_disable();
> >>>>> -	if (tlbsel == 0) {
> >>>>> -		__write_host_tlbe(stlbe);
> >>>>> -	} else {
> >>>>> -		unsigned register mas0;
> >>>>> +	unsigned register mas0;
> >>>>> 
> >>>>> -		mas0 = mfspr(SPRN_MAS0);
> >>>>> +	local_irq_disable();
> >>>>> 
> >>>> Do you have to disable interrupts for a tlb write? If you get 
> >>>> preempted before the write, the entry you overwrite could be 
> >>>> different. But you don't protect against that either way. And 
> >>>> if you get preempted afterwards, you could lose the entry. 
> >>>> But since you enable interrupts beyond that, that could 
> >>>> happen either way too.
> >>>> 
> >>>> So what's the reason for the disable here?
> >>>> 
> >>>> 
> >>> 
> >>> Hello Alex,
> >>> 
> >>> Doesn't local_irq_disable() also disable preempt?
> >>> The reason to disable interrupts is because it's possible 
> >> to have tlb
> >>> misses in interrupt service route.
> >>> 
> >> 
> >> Yes, local_irq_disable disables preempt. That's exactly what I was
> >> referring to :).
> >> I don't understand the statement about the service route. 
> A tlb miss
> >> still arrives with local_irq_disable.
> >> 
> >> 
> > 
> > Use local_irq_disable here is to make sure we update masN in atomic.
> > If get preempted when we update part of masN,
> > then we may write wrong TLB entry in hardware.
> 
> I see, makes sense. Doing the mfmsr of MAS0 in an irq 
> disabled section doesn't make sense to me though. 

Hardware choose the position and put TLB entry by itself.
I need to get the position from MAS0.
So that I know exactly which old entry is overrided.

> Also, wouldn't it be better to do the irq disabling inside of 
> __host_tlbe_write using irqsave, not the explicit enable/disable?
> 

Oh? What benefit we can get from this?


Yu


^ permalink raw reply	[flat|nested] 46+ messages in thread

* RE: [PATCH 2/2] kvm/e500v2: mapping guest TLB1 to host TLB0
@ 2010-09-17 11:28                         ` Liu Yu-B13201
  0 siblings, 0 replies; 46+ messages in thread
From: Liu Yu-B13201 @ 2010-09-17 11:28 UTC (permalink / raw)
  To: Alexander Graf; +Cc: kvm, kvm-ppc

 

> -----Original Message-----
> From: Alexander Graf [mailto:agraf@suse.de] 
> Sent: Friday, September 17, 2010 6:20 PM
> To: Liu Yu-B13201
> Cc: kvm@vger.kernel.org; kvm-ppc@vger.kernel.org
> Subject: Re: [PATCH 2/2] kvm/e500v2: mapping guest TLB1 to host TLB0
> 
> 
> On 17.09.2010, at 10:47, Liu Yu-B13201 wrote:
> 
> > 
> > 
> >> -----Original Message-----
> >> From: kvm-ppc-owner@vger.kernel.org 
> >> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of Alexander Graf
> >> Sent: Thursday, September 16, 2010 7:44 PM
> >> To: Liu Yu-B13201
> >> Cc: kvm@vger.kernel.org; kvm-ppc@vger.kernel.org
> >> Subject: Re: [PATCH 2/2] kvm/e500v2: mapping guest TLB1 to 
> host TLB0
> >> 
> >>>> 
> >>>>> /*
> >>>>> * writing shadow tlb entry to host TLB
> >>>>> */
> >>>>> -static inline void __write_host_tlbe(struct tlbe *stlbe)
> >>>>> +static inline void __host_tlbe_write(struct tlbe *stlbe)
> >>>>> {
> >>>>> 	mtspr(SPRN_MAS1, stlbe->mas1);
> >>>>> 	mtspr(SPRN_MAS2, stlbe->mas2);
> >>>>> @@ -109,25 +110,22 @@ static inline void 
> >>>>> 
> >>>> __write_host_tlbe(struct tlbe *stlbe)
> >>>> 
> >>>>> 	__asm__ __volatile__ ("tlbwe\n" : : );
> >>>>> }
> >>>>> 
> >>>>> -static inline void write_host_tlbe(struct kvmppc_vcpu_e500 
> >>>>> 
> >>>> *vcpu_e500,
> >>>> 
> >>>>> -		int tlbsel, int esel, struct tlbe *stlbe)
> >>>>> +static inline u32 host_tlb0_write(struct kvmppc_vcpu_e500 
> >>>>> 
> >>>> *vcpu_e500,
> >>>> 
> >>>>> +                                   u32 gvaddr, struct 
> >> tlbe *stlbe)
> >>>>> {
> >>>>> -	local_irq_disable();
> >>>>> -	if (tlbsel = 0) {
> >>>>> -		__write_host_tlbe(stlbe);
> >>>>> -	} else {
> >>>>> -		unsigned register mas0;
> >>>>> +	unsigned register mas0;
> >>>>> 
> >>>>> -		mas0 = mfspr(SPRN_MAS0);
> >>>>> +	local_irq_disable();
> >>>>> 
> >>>> Do you have to disable interrupts for a tlb write? If you get 
> >>>> preempted before the write, the entry you overwrite could be 
> >>>> different. But you don't protect against that either way. And 
> >>>> if you get preempted afterwards, you could lose the entry. 
> >>>> But since you enable interrupts beyond that, that could 
> >>>> happen either way too.
> >>>> 
> >>>> So what's the reason for the disable here?
> >>>> 
> >>>> 
> >>> 
> >>> Hello Alex,
> >>> 
> >>> Doesn't local_irq_disable() also disable preempt?
> >>> The reason to disable interrupts is because it's possible 
> >> to have tlb
> >>> misses in interrupt service route.
> >>> 
> >> 
> >> Yes, local_irq_disable disables preempt. That's exactly what I was
> >> referring to :).
> >> I don't understand the statement about the service route. 
> A tlb miss
> >> still arrives with local_irq_disable.
> >> 
> >> 
> > 
> > Use local_irq_disable here is to make sure we update masN in atomic.
> > If get preempted when we update part of masN,
> > then we may write wrong TLB entry in hardware.
> 
> I see, makes sense. Doing the mfmsr of MAS0 in an irq 
> disabled section doesn't make sense to me though. 

Hardware choose the position and put TLB entry by itself.
I need to get the position from MAS0.
So that I know exactly which old entry is overrided.

> Also, wouldn't it be better to do the irq disabling inside of 
> __host_tlbe_write using irqsave, not the explicit enable/disable?
> 

Oh? What benefit we can get from this?


Yu


^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 2/2] kvm/e500v2: mapping guest TLB1 to host TLB0
       [not found]                         ` <A9833F0F7901024DA08417AA5A9887D921DE99-bKEhWGtIRUJ4Lp7cDGe+DVjVikpgYyvb5NbjCUgZEJk@public.gmane.org>
@ 2010-09-17 11:34                             ` Alexander Graf
  0 siblings, 0 replies; 46+ messages in thread
From: Alexander Graf @ 2010-09-17 11:34 UTC (permalink / raw)
  To: Liu Yu-B13201; +Cc: kvm-u79uwXL29TY76Z2rM5mHXA, kvm-ppc-u79uwXL29TY76Z2rM5mHXA


On 17.09.2010, at 13:28, Liu Yu-B13201 wrote:

> 
> 
>> -----Original Message-----
>> From: Alexander Graf [mailto:agraf-l3A5Bk7waGM@public.gmane.org] 
>> Sent: Friday, September 17, 2010 6:20 PM
>> To: Liu Yu-B13201
>> Cc: kvm-u79uwXL29TY76Z2rM5mHXA@public.gmane.org; kvm-ppc-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
>> Subject: Re: [PATCH 2/2] kvm/e500v2: mapping guest TLB1 to host TLB0
>> 
>> 
>> On 17.09.2010, at 10:47, Liu Yu-B13201 wrote:
>> 
>>> 
>>> 
>>>> -----Original Message-----
>>>> From: kvm-ppc-owner-u79uwXL29TY76Z2rM5mHXA@public.gmane.org 
>>>> [mailto:kvm-ppc-owner-u79uwXL29TY76Z2rM5mHXA@public.gmane.org] On Behalf Of Alexander Graf
>>>> Sent: Thursday, September 16, 2010 7:44 PM
>>>> To: Liu Yu-B13201
>>>> Cc: kvm-u79uwXL29TY76Z2rM5mHXA@public.gmane.org; kvm-ppc-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
>>>> Subject: Re: [PATCH 2/2] kvm/e500v2: mapping guest TLB1 to 
>> host TLB0
>>>> 
>>>>>> 
>>>>>>> /*
>>>>>>> * writing shadow tlb entry to host TLB
>>>>>>> */
>>>>>>> -static inline void __write_host_tlbe(struct tlbe *stlbe)
>>>>>>> +static inline void __host_tlbe_write(struct tlbe *stlbe)
>>>>>>> {
>>>>>>> 	mtspr(SPRN_MAS1, stlbe->mas1);
>>>>>>> 	mtspr(SPRN_MAS2, stlbe->mas2);
>>>>>>> @@ -109,25 +110,22 @@ static inline void 
>>>>>>> 
>>>>>> __write_host_tlbe(struct tlbe *stlbe)
>>>>>> 
>>>>>>> 	__asm__ __volatile__ ("tlbwe\n" : : );
>>>>>>> }
>>>>>>> 
>>>>>>> -static inline void write_host_tlbe(struct kvmppc_vcpu_e500 
>>>>>>> 
>>>>>> *vcpu_e500,
>>>>>> 
>>>>>>> -		int tlbsel, int esel, struct tlbe *stlbe)
>>>>>>> +static inline u32 host_tlb0_write(struct kvmppc_vcpu_e500 
>>>>>>> 
>>>>>> *vcpu_e500,
>>>>>> 
>>>>>>> +                                   u32 gvaddr, struct 
>>>> tlbe *stlbe)
>>>>>>> {
>>>>>>> -	local_irq_disable();
>>>>>>> -	if (tlbsel == 0) {
>>>>>>> -		__write_host_tlbe(stlbe);
>>>>>>> -	} else {
>>>>>>> -		unsigned register mas0;
>>>>>>> +	unsigned register mas0;
>>>>>>> 
>>>>>>> -		mas0 = mfspr(SPRN_MAS0);
>>>>>>> +	local_irq_disable();
>>>>>>> 
>>>>>> Do you have to disable interrupts for a tlb write? If you get 
>>>>>> preempted before the write, the entry you overwrite could be 
>>>>>> different. But you don't protect against that either way. And 
>>>>>> if you get preempted afterwards, you could lose the entry. 
>>>>>> But since you enable interrupts beyond that, that could 
>>>>>> happen either way too.
>>>>>> 
>>>>>> So what's the reason for the disable here?
>>>>>> 
>>>>>> 
>>>>> 
>>>>> Hello Alex,
>>>>> 
>>>>> Doesn't local_irq_disable() also disable preempt?
>>>>> The reason to disable interrupts is because it's possible 
>>>> to have tlb
>>>>> misses in interrupt service route.
>>>>> 
>>>> 
>>>> Yes, local_irq_disable disables preempt. That's exactly what I was
>>>> referring to :).
>>>> I don't understand the statement about the service route. 
>> A tlb miss
>>>> still arrives with local_irq_disable.
>>>> 
>>>> 
>>> 
>>> Use local_irq_disable here is to make sure we update masN in atomic.
>>> If get preempted when we update part of masN,
>>> then we may write wrong TLB entry in hardware.
>> 
>> I see, makes sense. Doing the mfmsr of MAS0 in an irq 
>> disabled section doesn't make sense to me though. 
> 
> Hardware choose the position and put TLB entry by itself.
> I need to get the position from MAS0.
> So that I know exactly which old entry is overrided.

Huh? But you do the mfmsr before the tlb write? Or does MAS0 contain the "next" tlb pointer?

> 
>> Also, wouldn't it be better to do the irq disabling inside of 
>> __host_tlbe_write using irqsave, not the explicit enable/disable?
>> 
> 
> Oh? What benefit we can get from this?

Cleaner structure. If writing a tlb entry needs to happen atomically, the function you call should be atomic. In your case that's reasonably simple to do. Also, you could return MAS0 in that function. That would make things a lot more obvious. While you're at it, a comment explaining what the function does doesn't hurt either :)


Alex

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 2/2] kvm/e500v2: mapping guest TLB1 to host TLB0
@ 2010-09-17 11:34                             ` Alexander Graf
  0 siblings, 0 replies; 46+ messages in thread
From: Alexander Graf @ 2010-09-17 11:34 UTC (permalink / raw)
  To: Liu Yu-B13201; +Cc: kvm-u79uwXL29TY76Z2rM5mHXA, kvm-ppc-u79uwXL29TY76Z2rM5mHXA


On 17.09.2010, at 13:28, Liu Yu-B13201 wrote:

> 
> 
>> -----Original Message-----
>> From: Alexander Graf [mailto:agraf@suse.de] 
>> Sent: Friday, September 17, 2010 6:20 PM
>> To: Liu Yu-B13201
>> Cc: kvm@vger.kernel.org; kvm-ppc@vger.kernel.org
>> Subject: Re: [PATCH 2/2] kvm/e500v2: mapping guest TLB1 to host TLB0
>> 
>> 
>> On 17.09.2010, at 10:47, Liu Yu-B13201 wrote:
>> 
>>> 
>>> 
>>>> -----Original Message-----
>>>> From: kvm-ppc-owner@vger.kernel.org 
>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of Alexander Graf
>>>> Sent: Thursday, September 16, 2010 7:44 PM
>>>> To: Liu Yu-B13201
>>>> Cc: kvm@vger.kernel.org; kvm-ppc@vger.kernel.org
>>>> Subject: Re: [PATCH 2/2] kvm/e500v2: mapping guest TLB1 to 
>> host TLB0
>>>> 
>>>>>> 
>>>>>>> /*
>>>>>>> * writing shadow tlb entry to host TLB
>>>>>>> */
>>>>>>> -static inline void __write_host_tlbe(struct tlbe *stlbe)
>>>>>>> +static inline void __host_tlbe_write(struct tlbe *stlbe)
>>>>>>> {
>>>>>>> 	mtspr(SPRN_MAS1, stlbe->mas1);
>>>>>>> 	mtspr(SPRN_MAS2, stlbe->mas2);
>>>>>>> @@ -109,25 +110,22 @@ static inline void 
>>>>>>> 
>>>>>> __write_host_tlbe(struct tlbe *stlbe)
>>>>>> 
>>>>>>> 	__asm__ __volatile__ ("tlbwe\n" : : );
>>>>>>> }
>>>>>>> 
>>>>>>> -static inline void write_host_tlbe(struct kvmppc_vcpu_e500 
>>>>>>> 
>>>>>> *vcpu_e500,
>>>>>> 
>>>>>>> -		int tlbsel, int esel, struct tlbe *stlbe)
>>>>>>> +static inline u32 host_tlb0_write(struct kvmppc_vcpu_e500 
>>>>>>> 
>>>>>> *vcpu_e500,
>>>>>> 
>>>>>>> +                                   u32 gvaddr, struct 
>>>> tlbe *stlbe)
>>>>>>> {
>>>>>>> -	local_irq_disable();
>>>>>>> -	if (tlbsel = 0) {
>>>>>>> -		__write_host_tlbe(stlbe);
>>>>>>> -	} else {
>>>>>>> -		unsigned register mas0;
>>>>>>> +	unsigned register mas0;
>>>>>>> 
>>>>>>> -		mas0 = mfspr(SPRN_MAS0);
>>>>>>> +	local_irq_disable();
>>>>>>> 
>>>>>> Do you have to disable interrupts for a tlb write? If you get 
>>>>>> preempted before the write, the entry you overwrite could be 
>>>>>> different. But you don't protect against that either way. And 
>>>>>> if you get preempted afterwards, you could lose the entry. 
>>>>>> But since you enable interrupts beyond that, that could 
>>>>>> happen either way too.
>>>>>> 
>>>>>> So what's the reason for the disable here?
>>>>>> 
>>>>>> 
>>>>> 
>>>>> Hello Alex,
>>>>> 
>>>>> Doesn't local_irq_disable() also disable preempt?
>>>>> The reason to disable interrupts is because it's possible 
>>>> to have tlb
>>>>> misses in interrupt service route.
>>>>> 
>>>> 
>>>> Yes, local_irq_disable disables preempt. That's exactly what I was
>>>> referring to :).
>>>> I don't understand the statement about the service route. 
>> A tlb miss
>>>> still arrives with local_irq_disable.
>>>> 
>>>> 
>>> 
>>> Use local_irq_disable here is to make sure we update masN in atomic.
>>> If get preempted when we update part of masN,
>>> then we may write wrong TLB entry in hardware.
>> 
>> I see, makes sense. Doing the mfmsr of MAS0 in an irq 
>> disabled section doesn't make sense to me though. 
> 
> Hardware choose the position and put TLB entry by itself.
> I need to get the position from MAS0.
> So that I know exactly which old entry is overrided.

Huh? But you do the mfmsr before the tlb write? Or does MAS0 contain the "next" tlb pointer?

> 
>> Also, wouldn't it be better to do the irq disabling inside of 
>> __host_tlbe_write using irqsave, not the explicit enable/disable?
>> 
> 
> Oh? What benefit we can get from this?

Cleaner structure. If writing a tlb entry needs to happen atomically, the function you call should be atomic. In your case that's reasonably simple to do. Also, you could return MAS0 in that function. That would make things a lot more obvious. While you're at it, a comment explaining what the function does doesn't hurt either :)


Alex


^ permalink raw reply	[flat|nested] 46+ messages in thread

* RE: [PATCH 2/2] kvm/e500v2: mapping guest TLB1 to host TLB0
  2010-09-17 11:34                             ` Alexander Graf
@ 2010-09-17 12:33                               ` Liu Yu-B13201
  -1 siblings, 0 replies; 46+ messages in thread
From: Liu Yu-B13201 @ 2010-09-17 12:33 UTC (permalink / raw)
  To: Alexander Graf; +Cc: kvm, kvm-ppc

 

> -----Original Message-----
> From: Alexander Graf [mailto:agraf@suse.de] 
> Sent: Friday, September 17, 2010 7:34 PM
> To: Liu Yu-B13201
> Cc: kvm@vger.kernel.org; kvm-ppc@vger.kernel.org
> Subject: Re: [PATCH 2/2] kvm/e500v2: mapping guest TLB1 to host TLB0
> 
> 
> On 17.09.2010, at 13:28, Liu Yu-B13201 wrote:
> 
> > 
> > 
> >> -----Original Message-----
> >> From: Alexander Graf [mailto:agraf@suse.de] 
> >> Sent: Friday, September 17, 2010 6:20 PM
> >> To: Liu Yu-B13201
> >> Cc: kvm@vger.kernel.org; kvm-ppc@vger.kernel.org
> >> Subject: Re: [PATCH 2/2] kvm/e500v2: mapping guest TLB1 to 
> host TLB0
> >> 
> >> 
> >> On 17.09.2010, at 10:47, Liu Yu-B13201 wrote:
> >> 
> >>> 
> >>> 
> >>>> -----Original Message-----
> >>>> From: kvm-ppc-owner@vger.kernel.org 
> >>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of 
> Alexander Graf
> >>>> Sent: Thursday, September 16, 2010 7:44 PM
> >>>> To: Liu Yu-B13201
> >>>> Cc: kvm@vger.kernel.org; kvm-ppc@vger.kernel.org
> >>>> Subject: Re: [PATCH 2/2] kvm/e500v2: mapping guest TLB1 to 
> >> host TLB0
> >>>> 
> >>>>>> 
> >>>>>>> /*
> >>>>>>> * writing shadow tlb entry to host TLB
> >>>>>>> */
> >>>>>>> -static inline void __write_host_tlbe(struct tlbe *stlbe)
> >>>>>>> +static inline void __host_tlbe_write(struct tlbe *stlbe)
> >>>>>>> {
> >>>>>>> 	mtspr(SPRN_MAS1, stlbe->mas1);
> >>>>>>> 	mtspr(SPRN_MAS2, stlbe->mas2);
> >>>>>>> @@ -109,25 +110,22 @@ static inline void 
> >>>>>>> 
> >>>>>> __write_host_tlbe(struct tlbe *stlbe)
> >>>>>> 
> >>>>>>> 	__asm__ __volatile__ ("tlbwe\n" : : );
> >>>>>>> }
> >>>>>>> 
> >>>>>>> -static inline void write_host_tlbe(struct kvmppc_vcpu_e500 
> >>>>>>> 
> >>>>>> *vcpu_e500,
> >>>>>> 
> >>>>>>> -		int tlbsel, int esel, struct tlbe *stlbe)
> >>>>>>> +static inline u32 host_tlb0_write(struct kvmppc_vcpu_e500 
> >>>>>>> 
> >>>>>> *vcpu_e500,
> >>>>>> 
> >>>>>>> +                                   u32 gvaddr, struct 
> >>>> tlbe *stlbe)
> >>>>>>> {
> >>>>>>> -	local_irq_disable();
> >>>>>>> -	if (tlbsel == 0) {
> >>>>>>> -		__write_host_tlbe(stlbe);
> >>>>>>> -	} else {
> >>>>>>> -		unsigned register mas0;
> >>>>>>> +	unsigned register mas0;
> >>>>>>> 
> >>>>>>> -		mas0 = mfspr(SPRN_MAS0);
> >>>>>>> +	local_irq_disable();
> >>>>>>> 
> >>>>>> Do you have to disable interrupts for a tlb write? If you get 
> >>>>>> preempted before the write, the entry you overwrite could be 
> >>>>>> different. But you don't protect against that either way. And 
> >>>>>> if you get preempted afterwards, you could lose the entry. 
> >>>>>> But since you enable interrupts beyond that, that could 
> >>>>>> happen either way too.
> >>>>>> 
> >>>>>> So what's the reason for the disable here?
> >>>>>> 
> >>>>>> 
> >>>>> 
> >>>>> Hello Alex,
> >>>>> 
> >>>>> Doesn't local_irq_disable() also disable preempt?
> >>>>> The reason to disable interrupts is because it's possible 
> >>>> to have tlb
> >>>>> misses in interrupt service route.
> >>>>> 
> >>>> 
> >>>> Yes, local_irq_disable disables preempt. That's exactly 
> what I was
> >>>> referring to :).
> >>>> I don't understand the statement about the service route. 
> >> A tlb miss
> >>>> still arrives with local_irq_disable.
> >>>> 
> >>>> 
> >>> 
> >>> Use local_irq_disable here is to make sure we update masN 
> in atomic.
> >>> If get preempted when we update part of masN,
> >>> then we may write wrong TLB entry in hardware.
> >> 
> >> I see, makes sense. Doing the mfmsr of MAS0 in an irq 
> >> disabled section doesn't make sense to me though. 
> > 
> > Hardware choose the position and put TLB entry by itself.
> > I need to get the position from MAS0.
> > So that I know exactly which old entry is overrided.
> 
> Huh? But you do the mfmsr before the tlb write? Or does MAS0 
> contain the "next" tlb pointer?

There's no mfmsr.
The MAS0 contain the position we are going to write the entry in.

> 
> > 
> >> Also, wouldn't it be better to do the irq disabling inside of 
> >> __host_tlbe_write using irqsave, not the explicit enable/disable?
> >> 
> > 
> > Oh? What benefit we can get from this?
> 
> Cleaner structure. If writing a tlb entry needs to happen 
> atomically, the function you call should be atomic. In your 
> case that's reasonably simple to do. Also, you could return 
> MAS0 in that function. That would make things a lot more 
> obvious. While you're at it, a comment explaining what the 
> function does doesn't hurt either :)
> 

Sound reasonable.:)
but why using irqsave?


Yu


^ permalink raw reply	[flat|nested] 46+ messages in thread

* RE: [PATCH 2/2] kvm/e500v2: mapping guest TLB1 to host TLB0
@ 2010-09-17 12:33                               ` Liu Yu-B13201
  0 siblings, 0 replies; 46+ messages in thread
From: Liu Yu-B13201 @ 2010-09-17 12:33 UTC (permalink / raw)
  To: Alexander Graf; +Cc: kvm, kvm-ppc

 

> -----Original Message-----
> From: Alexander Graf [mailto:agraf@suse.de] 
> Sent: Friday, September 17, 2010 7:34 PM
> To: Liu Yu-B13201
> Cc: kvm@vger.kernel.org; kvm-ppc@vger.kernel.org
> Subject: Re: [PATCH 2/2] kvm/e500v2: mapping guest TLB1 to host TLB0
> 
> 
> On 17.09.2010, at 13:28, Liu Yu-B13201 wrote:
> 
> > 
> > 
> >> -----Original Message-----
> >> From: Alexander Graf [mailto:agraf@suse.de] 
> >> Sent: Friday, September 17, 2010 6:20 PM
> >> To: Liu Yu-B13201
> >> Cc: kvm@vger.kernel.org; kvm-ppc@vger.kernel.org
> >> Subject: Re: [PATCH 2/2] kvm/e500v2: mapping guest TLB1 to 
> host TLB0
> >> 
> >> 
> >> On 17.09.2010, at 10:47, Liu Yu-B13201 wrote:
> >> 
> >>> 
> >>> 
> >>>> -----Original Message-----
> >>>> From: kvm-ppc-owner@vger.kernel.org 
> >>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of 
> Alexander Graf
> >>>> Sent: Thursday, September 16, 2010 7:44 PM
> >>>> To: Liu Yu-B13201
> >>>> Cc: kvm@vger.kernel.org; kvm-ppc@vger.kernel.org
> >>>> Subject: Re: [PATCH 2/2] kvm/e500v2: mapping guest TLB1 to 
> >> host TLB0
> >>>> 
> >>>>>> 
> >>>>>>> /*
> >>>>>>> * writing shadow tlb entry to host TLB
> >>>>>>> */
> >>>>>>> -static inline void __write_host_tlbe(struct tlbe *stlbe)
> >>>>>>> +static inline void __host_tlbe_write(struct tlbe *stlbe)
> >>>>>>> {
> >>>>>>> 	mtspr(SPRN_MAS1, stlbe->mas1);
> >>>>>>> 	mtspr(SPRN_MAS2, stlbe->mas2);
> >>>>>>> @@ -109,25 +110,22 @@ static inline void 
> >>>>>>> 
> >>>>>> __write_host_tlbe(struct tlbe *stlbe)
> >>>>>> 
> >>>>>>> 	__asm__ __volatile__ ("tlbwe\n" : : );
> >>>>>>> }
> >>>>>>> 
> >>>>>>> -static inline void write_host_tlbe(struct kvmppc_vcpu_e500 
> >>>>>>> 
> >>>>>> *vcpu_e500,
> >>>>>> 
> >>>>>>> -		int tlbsel, int esel, struct tlbe *stlbe)
> >>>>>>> +static inline u32 host_tlb0_write(struct kvmppc_vcpu_e500 
> >>>>>>> 
> >>>>>> *vcpu_e500,
> >>>>>> 
> >>>>>>> +                                   u32 gvaddr, struct 
> >>>> tlbe *stlbe)
> >>>>>>> {
> >>>>>>> -	local_irq_disable();
> >>>>>>> -	if (tlbsel = 0) {
> >>>>>>> -		__write_host_tlbe(stlbe);
> >>>>>>> -	} else {
> >>>>>>> -		unsigned register mas0;
> >>>>>>> +	unsigned register mas0;
> >>>>>>> 
> >>>>>>> -		mas0 = mfspr(SPRN_MAS0);
> >>>>>>> +	local_irq_disable();
> >>>>>>> 
> >>>>>> Do you have to disable interrupts for a tlb write? If you get 
> >>>>>> preempted before the write, the entry you overwrite could be 
> >>>>>> different. But you don't protect against that either way. And 
> >>>>>> if you get preempted afterwards, you could lose the entry. 
> >>>>>> But since you enable interrupts beyond that, that could 
> >>>>>> happen either way too.
> >>>>>> 
> >>>>>> So what's the reason for the disable here?
> >>>>>> 
> >>>>>> 
> >>>>> 
> >>>>> Hello Alex,
> >>>>> 
> >>>>> Doesn't local_irq_disable() also disable preempt?
> >>>>> The reason to disable interrupts is because it's possible 
> >>>> to have tlb
> >>>>> misses in interrupt service route.
> >>>>> 
> >>>> 
> >>>> Yes, local_irq_disable disables preempt. That's exactly 
> what I was
> >>>> referring to :).
> >>>> I don't understand the statement about the service route. 
> >> A tlb miss
> >>>> still arrives with local_irq_disable.
> >>>> 
> >>>> 
> >>> 
> >>> Use local_irq_disable here is to make sure we update masN 
> in atomic.
> >>> If get preempted when we update part of masN,
> >>> then we may write wrong TLB entry in hardware.
> >> 
> >> I see, makes sense. Doing the mfmsr of MAS0 in an irq 
> >> disabled section doesn't make sense to me though. 
> > 
> > Hardware choose the position and put TLB entry by itself.
> > I need to get the position from MAS0.
> > So that I know exactly which old entry is overrided.
> 
> Huh? But you do the mfmsr before the tlb write? Or does MAS0 
> contain the "next" tlb pointer?

There's no mfmsr.
The MAS0 contain the position we are going to write the entry in.

> 
> > 
> >> Also, wouldn't it be better to do the irq disabling inside of 
> >> __host_tlbe_write using irqsave, not the explicit enable/disable?
> >> 
> > 
> > Oh? What benefit we can get from this?
> 
> Cleaner structure. If writing a tlb entry needs to happen 
> atomically, the function you call should be atomic. In your 
> case that's reasonably simple to do. Also, you could return 
> MAS0 in that function. That would make things a lot more 
> obvious. While you're at it, a comment explaining what the 
> function does doesn't hurt either :)
> 

Sound reasonable.:)
but why using irqsave?


Yu


^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 2/2] kvm/e500v2: mapping guest TLB1 to host TLB0
  2010-09-17 12:33                               ` Liu Yu-B13201
@ 2010-09-17 12:34                                 ` Alexander Graf
  -1 siblings, 0 replies; 46+ messages in thread
From: Alexander Graf @ 2010-09-17 12:34 UTC (permalink / raw)
  To: Liu Yu-B13201; +Cc: kvm, kvm-ppc


On 17.09.2010, at 14:33, Liu Yu-B13201 wrote:

> 
> 
>> -----Original Message-----
>> From: Alexander Graf [mailto:agraf@suse.de] 
>> Sent: Friday, September 17, 2010 7:34 PM
>> To: Liu Yu-B13201
>> Cc: kvm@vger.kernel.org; kvm-ppc@vger.kernel.org
>> Subject: Re: [PATCH 2/2] kvm/e500v2: mapping guest TLB1 to host TLB0
>> 
>> 
>> On 17.09.2010, at 13:28, Liu Yu-B13201 wrote:
>> 
>>> 
>>> 
>>>> -----Original Message-----
>>>> From: Alexander Graf [mailto:agraf@suse.de] 
>>>> Sent: Friday, September 17, 2010 6:20 PM
>>>> To: Liu Yu-B13201
>>>> Cc: kvm@vger.kernel.org; kvm-ppc@vger.kernel.org
>>>> Subject: Re: [PATCH 2/2] kvm/e500v2: mapping guest TLB1 to 
>> host TLB0
>>>> 
>>>> 
>>>> On 17.09.2010, at 10:47, Liu Yu-B13201 wrote:
>>>> 
>>>>> 
>>>>> 
>>>>>> -----Original Message-----
>>>>>> From: kvm-ppc-owner@vger.kernel.org 
>>>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of 
>> Alexander Graf
>>>>>> Sent: Thursday, September 16, 2010 7:44 PM
>>>>>> To: Liu Yu-B13201
>>>>>> Cc: kvm@vger.kernel.org; kvm-ppc@vger.kernel.org
>>>>>> Subject: Re: [PATCH 2/2] kvm/e500v2: mapping guest TLB1 to 
>>>> host TLB0
>>>>>> 
>>>>>>>> 
>>>>>>>>> /*
>>>>>>>>> * writing shadow tlb entry to host TLB
>>>>>>>>> */
>>>>>>>>> -static inline void __write_host_tlbe(struct tlbe *stlbe)
>>>>>>>>> +static inline void __host_tlbe_write(struct tlbe *stlbe)
>>>>>>>>> {
>>>>>>>>> 	mtspr(SPRN_MAS1, stlbe->mas1);
>>>>>>>>> 	mtspr(SPRN_MAS2, stlbe->mas2);
>>>>>>>>> @@ -109,25 +110,22 @@ static inline void 
>>>>>>>>> 
>>>>>>>> __write_host_tlbe(struct tlbe *stlbe)
>>>>>>>> 
>>>>>>>>> 	__asm__ __volatile__ ("tlbwe\n" : : );
>>>>>>>>> }
>>>>>>>>> 
>>>>>>>>> -static inline void write_host_tlbe(struct kvmppc_vcpu_e500 
>>>>>>>>> 
>>>>>>>> *vcpu_e500,
>>>>>>>> 
>>>>>>>>> -		int tlbsel, int esel, struct tlbe *stlbe)
>>>>>>>>> +static inline u32 host_tlb0_write(struct kvmppc_vcpu_e500 
>>>>>>>>> 
>>>>>>>> *vcpu_e500,
>>>>>>>> 
>>>>>>>>> +                                   u32 gvaddr, struct 
>>>>>> tlbe *stlbe)
>>>>>>>>> {
>>>>>>>>> -	local_irq_disable();
>>>>>>>>> -	if (tlbsel == 0) {
>>>>>>>>> -		__write_host_tlbe(stlbe);
>>>>>>>>> -	} else {
>>>>>>>>> -		unsigned register mas0;
>>>>>>>>> +	unsigned register mas0;
>>>>>>>>> 
>>>>>>>>> -		mas0 = mfspr(SPRN_MAS0);
>>>>>>>>> +	local_irq_disable();
>>>>>>>>> 
>>>>>>>> Do you have to disable interrupts for a tlb write? If you get 
>>>>>>>> preempted before the write, the entry you overwrite could be 
>>>>>>>> different. But you don't protect against that either way. And 
>>>>>>>> if you get preempted afterwards, you could lose the entry. 
>>>>>>>> But since you enable interrupts beyond that, that could 
>>>>>>>> happen either way too.
>>>>>>>> 
>>>>>>>> So what's the reason for the disable here?
>>>>>>>> 
>>>>>>>> 
>>>>>>> 
>>>>>>> Hello Alex,
>>>>>>> 
>>>>>>> Doesn't local_irq_disable() also disable preempt?
>>>>>>> The reason to disable interrupts is because it's possible 
>>>>>> to have tlb
>>>>>>> misses in interrupt service route.
>>>>>>> 
>>>>>> 
>>>>>> Yes, local_irq_disable disables preempt. That's exactly 
>> what I was
>>>>>> referring to :).
>>>>>> I don't understand the statement about the service route. 
>>>> A tlb miss
>>>>>> still arrives with local_irq_disable.
>>>>>> 
>>>>>> 
>>>>> 
>>>>> Use local_irq_disable here is to make sure we update masN 
>> in atomic.
>>>>> If get preempted when we update part of masN,
>>>>> then we may write wrong TLB entry in hardware.
>>>> 
>>>> I see, makes sense. Doing the mfmsr of MAS0 in an irq 
>>>> disabled section doesn't make sense to me though. 
>>> 
>>> Hardware choose the position and put TLB entry by itself.
>>> I need to get the position from MAS0.
>>> So that I know exactly which old entry is overrided.
>> 
>> Huh? But you do the mfmsr before the tlb write? Or does MAS0 
>> contain the "next" tlb pointer?
> 
> There's no mfmsr.
> The MAS0 contain the position we are going to write the entry in.
> 
>> 
>>> 
>>>> Also, wouldn't it be better to do the irq disabling inside of 
>>>> __host_tlbe_write using irqsave, not the explicit enable/disable?
>>>> 
>>> 
>>> Oh? What benefit we can get from this?
>> 
>> Cleaner structure. If writing a tlb entry needs to happen 
>> atomically, the function you call should be atomic. In your 
>> case that's reasonably simple to do. Also, you could return 
>> MAS0 in that function. That would make things a lot more 
>> obvious. While you're at it, a comment explaining what the 
>> function does doesn't hurt either :)
>> 
> 
> Sound reasonable.:)
> but why using irqsave?

So the same function can be used from an irq disabled and an irq enabled context. Explicit irq_disable/irq_enable calls don't stack well ;)

Alex


^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 2/2] kvm/e500v2: mapping guest TLB1 to host TLB0
@ 2010-09-17 12:34                                 ` Alexander Graf
  0 siblings, 0 replies; 46+ messages in thread
From: Alexander Graf @ 2010-09-17 12:34 UTC (permalink / raw)
  To: Liu Yu-B13201; +Cc: kvm, kvm-ppc


On 17.09.2010, at 14:33, Liu Yu-B13201 wrote:

> 
> 
>> -----Original Message-----
>> From: Alexander Graf [mailto:agraf@suse.de] 
>> Sent: Friday, September 17, 2010 7:34 PM
>> To: Liu Yu-B13201
>> Cc: kvm@vger.kernel.org; kvm-ppc@vger.kernel.org
>> Subject: Re: [PATCH 2/2] kvm/e500v2: mapping guest TLB1 to host TLB0
>> 
>> 
>> On 17.09.2010, at 13:28, Liu Yu-B13201 wrote:
>> 
>>> 
>>> 
>>>> -----Original Message-----
>>>> From: Alexander Graf [mailto:agraf@suse.de] 
>>>> Sent: Friday, September 17, 2010 6:20 PM
>>>> To: Liu Yu-B13201
>>>> Cc: kvm@vger.kernel.org; kvm-ppc@vger.kernel.org
>>>> Subject: Re: [PATCH 2/2] kvm/e500v2: mapping guest TLB1 to 
>> host TLB0
>>>> 
>>>> 
>>>> On 17.09.2010, at 10:47, Liu Yu-B13201 wrote:
>>>> 
>>>>> 
>>>>> 
>>>>>> -----Original Message-----
>>>>>> From: kvm-ppc-owner@vger.kernel.org 
>>>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of 
>> Alexander Graf
>>>>>> Sent: Thursday, September 16, 2010 7:44 PM
>>>>>> To: Liu Yu-B13201
>>>>>> Cc: kvm@vger.kernel.org; kvm-ppc@vger.kernel.org
>>>>>> Subject: Re: [PATCH 2/2] kvm/e500v2: mapping guest TLB1 to 
>>>> host TLB0
>>>>>> 
>>>>>>>> 
>>>>>>>>> /*
>>>>>>>>> * writing shadow tlb entry to host TLB
>>>>>>>>> */
>>>>>>>>> -static inline void __write_host_tlbe(struct tlbe *stlbe)
>>>>>>>>> +static inline void __host_tlbe_write(struct tlbe *stlbe)
>>>>>>>>> {
>>>>>>>>> 	mtspr(SPRN_MAS1, stlbe->mas1);
>>>>>>>>> 	mtspr(SPRN_MAS2, stlbe->mas2);
>>>>>>>>> @@ -109,25 +110,22 @@ static inline void 
>>>>>>>>> 
>>>>>>>> __write_host_tlbe(struct tlbe *stlbe)
>>>>>>>> 
>>>>>>>>> 	__asm__ __volatile__ ("tlbwe\n" : : );
>>>>>>>>> }
>>>>>>>>> 
>>>>>>>>> -static inline void write_host_tlbe(struct kvmppc_vcpu_e500 
>>>>>>>>> 
>>>>>>>> *vcpu_e500,
>>>>>>>> 
>>>>>>>>> -		int tlbsel, int esel, struct tlbe *stlbe)
>>>>>>>>> +static inline u32 host_tlb0_write(struct kvmppc_vcpu_e500 
>>>>>>>>> 
>>>>>>>> *vcpu_e500,
>>>>>>>> 
>>>>>>>>> +                                   u32 gvaddr, struct 
>>>>>> tlbe *stlbe)
>>>>>>>>> {
>>>>>>>>> -	local_irq_disable();
>>>>>>>>> -	if (tlbsel = 0) {
>>>>>>>>> -		__write_host_tlbe(stlbe);
>>>>>>>>> -	} else {
>>>>>>>>> -		unsigned register mas0;
>>>>>>>>> +	unsigned register mas0;
>>>>>>>>> 
>>>>>>>>> -		mas0 = mfspr(SPRN_MAS0);
>>>>>>>>> +	local_irq_disable();
>>>>>>>>> 
>>>>>>>> Do you have to disable interrupts for a tlb write? If you get 
>>>>>>>> preempted before the write, the entry you overwrite could be 
>>>>>>>> different. But you don't protect against that either way. And 
>>>>>>>> if you get preempted afterwards, you could lose the entry. 
>>>>>>>> But since you enable interrupts beyond that, that could 
>>>>>>>> happen either way too.
>>>>>>>> 
>>>>>>>> So what's the reason for the disable here?
>>>>>>>> 
>>>>>>>> 
>>>>>>> 
>>>>>>> Hello Alex,
>>>>>>> 
>>>>>>> Doesn't local_irq_disable() also disable preempt?
>>>>>>> The reason to disable interrupts is because it's possible 
>>>>>> to have tlb
>>>>>>> misses in interrupt service route.
>>>>>>> 
>>>>>> 
>>>>>> Yes, local_irq_disable disables preempt. That's exactly 
>> what I was
>>>>>> referring to :).
>>>>>> I don't understand the statement about the service route. 
>>>> A tlb miss
>>>>>> still arrives with local_irq_disable.
>>>>>> 
>>>>>> 
>>>>> 
>>>>> Use local_irq_disable here is to make sure we update masN 
>> in atomic.
>>>>> If get preempted when we update part of masN,
>>>>> then we may write wrong TLB entry in hardware.
>>>> 
>>>> I see, makes sense. Doing the mfmsr of MAS0 in an irq 
>>>> disabled section doesn't make sense to me though. 
>>> 
>>> Hardware choose the position and put TLB entry by itself.
>>> I need to get the position from MAS0.
>>> So that I know exactly which old entry is overrided.
>> 
>> Huh? But you do the mfmsr before the tlb write? Or does MAS0 
>> contain the "next" tlb pointer?
> 
> There's no mfmsr.
> The MAS0 contain the position we are going to write the entry in.
> 
>> 
>>> 
>>>> Also, wouldn't it be better to do the irq disabling inside of 
>>>> __host_tlbe_write using irqsave, not the explicit enable/disable?
>>>> 
>>> 
>>> Oh? What benefit we can get from this?
>> 
>> Cleaner structure. If writing a tlb entry needs to happen 
>> atomically, the function you call should be atomic. In your 
>> case that's reasonably simple to do. Also, you could return 
>> MAS0 in that function. That would make things a lot more 
>> obvious. While you're at it, a comment explaining what the 
>> function does doesn't hurt either :)
>> 
> 
> Sound reasonable.:)
> but why using irqsave?

So the same function can be used from an irq disabled and an irq enabled context. Explicit irq_disable/irq_enable calls don't stack well ;)

Alex


^ permalink raw reply	[flat|nested] 46+ messages in thread

end of thread, other threads:[~2010-09-17 12:34 UTC | newest]

Thread overview: 46+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-09-08  9:40 [PATCH 0/2] kvm/e500v2: MMU optimization Liu Yu
2010-09-08  9:40 ` Liu Yu
     [not found] ` <1283938806-2981-1-git-send-email-yu.liu-KZfg59tc24xl57MIdRCFDg@public.gmane.org>
2010-09-08  9:40   ` [PATCH 1/2] kvm/e500v2: Remove shadow tlb Liu Yu
2010-09-08  9:40     ` Liu Yu
2010-09-08  9:40     ` [PATCH 2/2] kvm/e500v2: mapping guest TLB1 to host TLB0 Liu Yu
2010-09-08  9:40       ` Liu Yu
     [not found]       ` <1283938806-2981-3-git-send-email-yu.liu-KZfg59tc24xl57MIdRCFDg@public.gmane.org>
2010-09-09 23:24         ` Alexander Graf
2010-09-09 23:24           ` Alexander Graf
     [not found]           ` <E759E1E3-7457-41B7-B36E-75FA42E107B1-l3A5Bk7waGM@public.gmane.org>
2010-09-15  9:29             ` Alexander Graf
2010-09-15  9:29               ` Alexander Graf
2010-09-16 11:35           ` Liu Yu-B13201
2010-09-16 11:35             ` Liu Yu-B13201
     [not found]             ` <A9833F0F7901024DA08417AA5A9887D921DD39-bKEhWGtIRUJ4Lp7cDGe+DVjVikpgYyvb5NbjCUgZEJk@public.gmane.org>
2010-09-16 11:44               ` Alexander Graf
2010-09-16 11:44                 ` Alexander Graf
2010-09-17  8:47                 ` Liu Yu-B13201
2010-09-17  8:47                   ` Liu Yu-B13201
     [not found]                   ` <A9833F0F7901024DA08417AA5A9887D921DE82-bKEhWGtIRUJ4Lp7cDGe+DVjVikpgYyvb5NbjCUgZEJk@public.gmane.org>
2010-09-17 10:19                     ` Alexander Graf
2010-09-17 10:19                       ` Alexander Graf
2010-09-17 11:28                       ` Liu Yu-B13201
2010-09-17 11:28                         ` Liu Yu-B13201
     [not found]                         ` <A9833F0F7901024DA08417AA5A9887D921DE99-bKEhWGtIRUJ4Lp7cDGe+DVjVikpgYyvb5NbjCUgZEJk@public.gmane.org>
2010-09-17 11:34                           ` Alexander Graf
2010-09-17 11:34                             ` Alexander Graf
2010-09-17 12:33                             ` Liu Yu-B13201
2010-09-17 12:33                               ` Liu Yu-B13201
2010-09-17 12:34                               ` Alexander Graf
2010-09-17 12:34                                 ` Alexander Graf
     [not found]     ` <1283938806-2981-2-git-send-email-yu.liu-KZfg59tc24xl57MIdRCFDg@public.gmane.org>
2010-09-08 16:06       ` [PATCH 1/2] kvm/e500v2: Remove shadow tlb Hollis Blanchard
2010-09-08 16:06         ` Hollis Blanchard
2010-09-09 11:16         ` Liu Yu-B13201
2010-09-09 11:16           ` Liu Yu-B13201
     [not found]           ` <A9833F0F7901024DA08417AA5A9887D91BF24A-bKEhWGtIRUJ4Lp7cDGe+DVjVikpgYyvb5NbjCUgZEJk@public.gmane.org>
2010-09-09 18:13             ` Hollis Blanchard
2010-09-09 18:13               ` Hollis Blanchard
     [not found]               ` <4C8923D2.5070308-nmGgyN9QBj3QT0dZR+AlfA@public.gmane.org>
2010-09-09 23:26                 ` Alexander Graf
2010-09-09 23:26                   ` Alexander Graf
     [not found]                   ` <C1EBC75A-91B0-49D9-8AD5-571C6BAF22B7-l3A5Bk7waGM@public.gmane.org>
2010-09-09 23:39                     ` Hollis Blanchard
2010-09-09 23:39                       ` Hollis Blanchard
2010-09-09 23:42                       ` Alexander Graf
2010-09-09 23:42                         ` Alexander Graf
2010-09-08 16:07   ` [PATCH 0/2] kvm/e500v2: MMU optimization Hollis Blanchard
2010-09-08 16:07     ` Hollis Blanchard
     [not found]     ` <4C87B4AB.7010009-nmGgyN9QBj3QT0dZR+AlfA@public.gmane.org>
2010-09-09 11:03       ` Liu Yu-B13201
2010-09-09 11:03         ` Liu Yu-B13201
     [not found]         ` <A9833F0F7901024DA08417AA5A9887D91BF248-bKEhWGtIRUJ4Lp7cDGe+DVjVikpgYyvb5NbjCUgZEJk@public.gmane.org>
2010-09-09 16:23           ` Hollis Blanchard
2010-09-09 16:23             ` Hollis Blanchard
2010-09-10  2:59             ` Liu Yu-B13201
2010-09-10  2:59               ` Liu Yu-B13201

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.