All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/6] KVM MMU: remove unused struct
@ 2010-04-12  7:59 Xiao Guangrong
  2010-04-12  8:01 ` [PATCH 2/6] KVM MMU: fix kvm_mmu_zap_page() and its calling path Xiao Guangrong
                   ` (4 more replies)
  0 siblings, 5 replies; 36+ messages in thread
From: Xiao Guangrong @ 2010-04-12  7:59 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Marcelo Tosatti, KVM list, LKML

Remove 'struct kvm_unsync_walk' since it's not used now

Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
---
 arch/x86/kvm/mmu.c |    5 -----
 1 files changed, 0 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index b44380b..a23ca75 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -172,11 +172,6 @@ struct kvm_shadow_walk_iterator {
 	     shadow_walk_okay(&(_walker));			\
 	     shadow_walk_next(&(_walker)))
 
-
-struct kvm_unsync_walk {
-	int (*entry) (struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk);
-};
-
 typedef int (*mmu_parent_walk_fn) (struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp);
 
 static struct kmem_cache *pte_chain_cache;
-- 
1.6.1.2


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 2/6] KVM MMU: fix kvm_mmu_zap_page() and its calling path
  2010-04-12  7:59 [PATCH 1/6] KVM MMU: remove unused struct Xiao Guangrong
@ 2010-04-12  8:01 ` Xiao Guangrong
  2010-04-12  8:24   ` Avi Kivity
  2010-04-12 17:10   ` Marcelo Tosatti
  2010-04-12  8:02 ` [PATCH 3/6] KVM MMU: optimize/cleanup for marking parent unsync Xiao Guangrong
                   ` (3 subsequent siblings)
  4 siblings, 2 replies; 36+ messages in thread
From: Xiao Guangrong @ 2010-04-12  8:01 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Marcelo Tosatti, KVM list, LKML

- calculate zapped page number properly in mmu_zap_unsync_children()
- calculate freeed page number properly kvm_mmu_change_mmu_pages()
- restart list walking if have children page zapped

Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
---
 arch/x86/kvm/mmu.c |    7 ++++---
 1 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index a23ca75..8f4f781 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1483,8 +1483,8 @@ static int mmu_zap_unsync_children(struct kvm *kvm,
 		for_each_sp(pages, sp, parents, i) {
 			kvm_mmu_zap_page(kvm, sp);
 			mmu_pages_clear_parents(&parents);
+			zapped++;
 		}
-		zapped += pages.nr;
 		kvm_mmu_pages_init(parent, &parents, &pages);
 	}
 
@@ -1540,7 +1540,7 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages)
 
 			page = container_of(kvm->arch.active_mmu_pages.prev,
 					    struct kvm_mmu_page, link);
-			kvm_mmu_zap_page(kvm, page);
+			used_pages -= kvm_mmu_zap_page(kvm, page);
 			used_pages--;
 		}
 		kvm->arch.n_free_mmu_pages = 0;
@@ -1589,7 +1589,8 @@ static void mmu_unshadow(struct kvm *kvm, gfn_t gfn)
 		    && !sp->role.invalid) {
 			pgprintk("%s: zap %lx %x\n",
 				 __func__, gfn, sp->role.word);
-			kvm_mmu_zap_page(kvm, sp);
+			if (kvm_mmu_zap_page(kvm, sp))
+				nn = bucket->first;
 		}
 	}
 }
-- 
1.6.1.2



^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 3/6] KVM MMU: optimize/cleanup for marking parent unsync
  2010-04-12  7:59 [PATCH 1/6] KVM MMU: remove unused struct Xiao Guangrong
  2010-04-12  8:01 ` [PATCH 2/6] KVM MMU: fix kvm_mmu_zap_page() and its calling path Xiao Guangrong
@ 2010-04-12  8:02 ` Xiao Guangrong
  2010-04-12  8:32   ` Avi Kivity
  2010-04-12 17:12   ` Marcelo Tosatti
  2010-04-12  8:03 ` [PATCH 4/6] KVM MMU: optimize for writing cr4 Xiao Guangrong
                   ` (2 subsequent siblings)
  4 siblings, 2 replies; 36+ messages in thread
From: Xiao Guangrong @ 2010-04-12  8:02 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Marcelo Tosatti, KVM list, LKML

- 'vcpu' is not used while mark parent unsync, so remove it
- if it has alread marked unsync, no need to walk it's parent

Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
---
 arch/x86/kvm/mmu.c |   69 +++++++++++++++++----------------------------------
 1 files changed, 23 insertions(+), 46 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 8f4f781..5154d70 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -172,7 +172,7 @@ struct kvm_shadow_walk_iterator {
 	     shadow_walk_okay(&(_walker));			\
 	     shadow_walk_next(&(_walker)))
 
-typedef int (*mmu_parent_walk_fn) (struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp);
+typedef int (*mmu_parent_walk_fn) (struct kvm_mmu_page *sp, u64 *spte);
 
 static struct kmem_cache *pte_chain_cache;
 static struct kmem_cache *rmap_desc_cache;
@@ -1000,74 +1000,51 @@ static void mmu_page_remove_parent_pte(struct kvm_mmu_page *sp,
 }
 
 
-static void mmu_parent_walk(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
-			    mmu_parent_walk_fn fn)
+static void mmu_parent_walk(struct kvm_mmu_page *sp, mmu_parent_walk_fn fn)
 {
 	struct kvm_pte_chain *pte_chain;
 	struct hlist_node *node;
 	struct kvm_mmu_page *parent_sp;
 	int i;
 
-	if (!sp->multimapped && sp->parent_pte) {
+	if (!sp->parent_pte)
+		return;
+
+	if (!sp->multimapped) {
 		parent_sp = page_header(__pa(sp->parent_pte));
-		fn(vcpu, parent_sp);
-		mmu_parent_walk(vcpu, parent_sp, fn);
+		if (fn(parent_sp, sp->parent_pte))
+			mmu_parent_walk(parent_sp, fn);
 		return;
 	}
+
 	hlist_for_each_entry(pte_chain, node, &sp->parent_ptes, link)
 		for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i) {
-			if (!pte_chain->parent_ptes[i])
+			u64 *spte = pte_chain->parent_ptes[i];
+			if (!spte)
 				break;
-			parent_sp = page_header(__pa(pte_chain->parent_ptes[i]));
-			fn(vcpu, parent_sp);
-			mmu_parent_walk(vcpu, parent_sp, fn);
+			parent_sp = page_header(__pa(spte));
+			if (fn(parent_sp, spte))
+				mmu_parent_walk(parent_sp, fn);
 		}
 }
 
-static void kvm_mmu_update_unsync_bitmap(u64 *spte)
+static int mark_unsync(struct kvm_mmu_page *sp, u64 *spte)
 {
 	unsigned int index;
-	struct kvm_mmu_page *sp = page_header(__pa(spte));
 
 	index = spte - sp->spt;
-	if (!__test_and_set_bit(index, sp->unsync_child_bitmap))
-		sp->unsync_children++;
-	WARN_ON(!sp->unsync_children);
-}
-
-static void kvm_mmu_update_parents_unsync(struct kvm_mmu_page *sp)
-{
-	struct kvm_pte_chain *pte_chain;
-	struct hlist_node *node;
-	int i;
-
-	if (!sp->parent_pte)
-		return;
-
-	if (!sp->multimapped) {
-		kvm_mmu_update_unsync_bitmap(sp->parent_pte);
-		return;
-	}
+	if (__test_and_set_bit(index, sp->unsync_child_bitmap))
+		return 0;
 
-	hlist_for_each_entry(pte_chain, node, &sp->parent_ptes, link)
-		for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i) {
-			if (!pte_chain->parent_ptes[i])
-				break;
-			kvm_mmu_update_unsync_bitmap(pte_chain->parent_ptes[i]);
-		}
-}
+	if (sp->unsync_children++)
+		return 0;
 
-static int unsync_walk_fn(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
-{
-	kvm_mmu_update_parents_unsync(sp);
 	return 1;
 }
 
-static void kvm_mmu_mark_parents_unsync(struct kvm_vcpu *vcpu,
-					struct kvm_mmu_page *sp)
+static void kvm_mmu_mark_parents_unsync(struct kvm_mmu_page *sp)
 {
-	mmu_parent_walk(vcpu, sp, unsync_walk_fn);
-	kvm_mmu_update_parents_unsync(sp);
+	mmu_parent_walk(sp, mark_unsync);
 }
 
 static void nonpaging_prefetch_page(struct kvm_vcpu *vcpu,
@@ -1344,7 +1321,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 			mmu_page_add_parent_pte(vcpu, sp, parent_pte);
 			if (sp->unsync_children) {
 				set_bit(KVM_REQ_MMU_SYNC, &vcpu->requests);
-				kvm_mmu_mark_parents_unsync(vcpu, sp);
+				kvm_mmu_mark_parents_unsync(sp);
 			}
 			trace_kvm_mmu_get_page(sp, false);
 			return sp;
@@ -1756,7 +1733,7 @@ static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 	++vcpu->kvm->stat.mmu_unsync;
 	sp->unsync = 1;
 
-	kvm_mmu_mark_parents_unsync(vcpu, sp);
+	kvm_mmu_mark_parents_unsync(sp);
 
 	mmu_convert_notrap(sp);
 	return 0;
-- 
1.6.1.2



^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 4/6] KVM MMU: optimize for writing cr4
  2010-04-12  7:59 [PATCH 1/6] KVM MMU: remove unused struct Xiao Guangrong
  2010-04-12  8:01 ` [PATCH 2/6] KVM MMU: fix kvm_mmu_zap_page() and its calling path Xiao Guangrong
  2010-04-12  8:02 ` [PATCH 3/6] KVM MMU: optimize/cleanup for marking parent unsync Xiao Guangrong
@ 2010-04-12  8:03 ` Xiao Guangrong
  2010-04-12  8:34   ` Avi Kivity
  2010-04-12  8:05 ` [PATCH 5/6] KVM MMU: reduce kvm_mmu_page size Xiao Guangrong
  2010-04-12  8:06 ` [PATCH 6/6] KVM MMU: optimize synchronization shadow pages Xiao Guangrong
  4 siblings, 1 reply; 36+ messages in thread
From: Xiao Guangrong @ 2010-04-12  8:03 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Marcelo Tosatti, KVM list, LKML

Usually, OS changes CR4.PGE bit to flush all global page, under this
case, no need reset mmu and just flush tlb

Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
---
 arch/x86/kvm/x86.c |    9 +++++++++
 1 files changed, 9 insertions(+), 0 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index fd5c3d3..2aaa6fb 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -463,6 +463,15 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 	unsigned long old_cr4 = kvm_read_cr4(vcpu);
 	unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE;
 
+	if (cr4 == old_cr4)
+		return;
+
+	if ((cr4 ^ old_cr4) == X86_CR4_PGE) {
+		kvm_mmu_sync_roots(vcpu);
+		kvm_mmu_flush_tlb(vcpu);
+		return;
+	}
+
 	if (cr4 & CR4_RESERVED_BITS) {
 		kvm_inject_gp(vcpu, 0);
 		return;
-- 
1.6.1.2



^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 5/6] KVM MMU: reduce kvm_mmu_page size
  2010-04-12  7:59 [PATCH 1/6] KVM MMU: remove unused struct Xiao Guangrong
                   ` (2 preceding siblings ...)
  2010-04-12  8:03 ` [PATCH 4/6] KVM MMU: optimize for writing cr4 Xiao Guangrong
@ 2010-04-12  8:05 ` Xiao Guangrong
  2010-04-12  8:36   ` Avi Kivity
  2010-04-12  8:06 ` [PATCH 6/6] KVM MMU: optimize synchronization shadow pages Xiao Guangrong
  4 siblings, 1 reply; 36+ messages in thread
From: Xiao Guangrong @ 2010-04-12  8:05 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Marcelo Tosatti, KVM list, LKML

'multimapped' and 'unsync' in 'struct kvm_mmu_page' are just indication
field, we can use flag bits instand of them

Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
---
 arch/x86/include/asm/kvm_host.h |    5 ++-
 arch/x86/kvm/mmu.c              |   65 ++++++++++++++++++++++++++++-----------
 arch/x86/kvm/mmutrace.h         |    7 ++--
 arch/x86/kvm/paging_tmpl.h      |    2 +-
 4 files changed, 55 insertions(+), 24 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 0c49c88..d463bc6 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -202,9 +202,10 @@ struct kvm_mmu_page {
 	 * in this shadow page.
 	 */
 	DECLARE_BITMAP(slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS);
-	int multimapped;         /* More than one parent_pte? */
 	int root_count;          /* Currently serving as active root */
-	bool unsync;
+	#define MMU_PAGE_MULTIMAPPED 0x1        /* More than one parent_pte? */
+	#define MMU_PAGE_UNSYNC 0x2
+	unsigned int flags;
 	unsigned int unsync_children;
 	union {
 		u64 *parent_pte;               /* !multimapped */
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 5154d70..18eceb2 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -266,6 +266,36 @@ static int is_last_spte(u64 pte, int level)
 	return 0;
 }
 
+static bool mmu_page_is_multimapped(struct kvm_mmu_page *sp)
+{
+	return !!(sp->flags & MMU_PAGE_MULTIMAPPED);
+}
+
+static void mmu_page_mark_multimapped(struct kvm_mmu_page *sp)
+{
+	sp->flags |= MMU_PAGE_MULTIMAPPED;
+}
+
+static void mmu_page_clear_multimapped(struct kvm_mmu_page *sp)
+{
+	sp->flags &= ~MMU_PAGE_MULTIMAPPED;
+}
+
+static bool mmu_page_is_unsync(struct kvm_mmu_page *sp)
+{
+	return !!(sp->flags & MMU_PAGE_UNSYNC);
+}
+
+static void mmu_page_mark_unsync(struct kvm_mmu_page *sp)
+{
+	sp->flags |= MMU_PAGE_UNSYNC;
+}
+
+static void mmu_page_clear_unsync(struct kvm_mmu_page *sp)
+{
+	sp->flags &= ~MMU_PAGE_UNSYNC;
+}
+
 static pfn_t spte_to_pfn(u64 pte)
 {
 	return (pte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
@@ -918,7 +948,7 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
 	set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
 	list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
 	bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS);
-	sp->multimapped = 0;
+	sp->flags = 0;
 	sp->parent_pte = parent_pte;
 	--vcpu->kvm->arch.n_free_mmu_pages;
 	return sp;
@@ -933,14 +963,14 @@ static void mmu_page_add_parent_pte(struct kvm_vcpu *vcpu,
 
 	if (!parent_pte)
 		return;
-	if (!sp->multimapped) {
+	if (!mmu_page_is_multimapped(sp)) {
 		u64 *old = sp->parent_pte;
 
 		if (!old) {
 			sp->parent_pte = parent_pte;
 			return;
 		}
-		sp->multimapped = 1;
+		mmu_page_mark_multimapped(sp);
 		pte_chain = mmu_alloc_pte_chain(vcpu);
 		INIT_HLIST_HEAD(&sp->parent_ptes);
 		hlist_add_head(&pte_chain->link, &sp->parent_ptes);
@@ -968,7 +998,7 @@ static void mmu_page_remove_parent_pte(struct kvm_mmu_page *sp,
 	struct hlist_node *node;
 	int i;
 
-	if (!sp->multimapped) {
+	if (!mmu_page_is_multimapped(sp)) {
 		BUG_ON(sp->parent_pte != parent_pte);
 		sp->parent_pte = NULL;
 		return;
@@ -990,7 +1020,7 @@ static void mmu_page_remove_parent_pte(struct kvm_mmu_page *sp,
 				hlist_del(&pte_chain->link);
 				mmu_free_pte_chain(pte_chain);
 				if (hlist_empty(&sp->parent_ptes)) {
-					sp->multimapped = 0;
+					mmu_page_clear_multimapped(sp);
 					sp->parent_pte = NULL;
 				}
 			}
@@ -1010,7 +1040,7 @@ static void mmu_parent_walk(struct kvm_mmu_page *sp, mmu_parent_walk_fn fn)
 	if (!sp->parent_pte)
 		return;
 
-	if (!sp->multimapped) {
+	if (!mmu_page_is_multimapped(sp)) {
 		parent_sp = page_header(__pa(sp->parent_pte));
 		if (fn(parent_sp, sp->parent_pte))
 			mmu_parent_walk(parent_sp, fn);
@@ -1086,7 +1116,7 @@ static int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp,
 {
 	int i;
 
-	if (sp->unsync)
+	if (mmu_page_is_unsync(sp))
 		for (i=0; i < pvec->nr; i++)
 			if (pvec->page[i].sp == sp)
 				return 0;
@@ -1122,7 +1152,7 @@ static int __mmu_unsync_walk(struct kvm_mmu_page *sp,
 					return ret;
 			}
 
-			if (child->unsync) {
+			if (mmu_page_is_unsync(child)) {
 				nr_unsync_leaf++;
 				if (mmu_pages_add(pvec, child, i))
 					return -ENOSPC;
@@ -1168,8 +1198,8 @@ static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn)
 
 static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp)
 {
-	WARN_ON(!sp->unsync);
-	sp->unsync = 0;
+	WARN_ON(!mmu_page_is_unsync(sp));
+	mmu_page_clear_unsync(sp);
 	--kvm->stat.mmu_unsync;
 }
 
@@ -1311,7 +1341,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 	bucket = &vcpu->kvm->arch.mmu_page_hash[index];
 	hlist_for_each_entry_safe(sp, node, tmp, bucket, hash_link)
 		if (sp->gfn == gfn) {
-			if (sp->unsync)
+			if (mmu_page_is_unsync(sp))
 				if (kvm_sync_page(vcpu, sp))
 					continue;
 
@@ -1427,8 +1457,8 @@ static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp)
 {
 	u64 *parent_pte;
 
-	while (sp->multimapped || sp->parent_pte) {
-		if (!sp->multimapped)
+	while (mmu_page_is_multimapped(sp) || sp->parent_pte) {
+		if (!mmu_page_is_multimapped(sp))
 			parent_pte = sp->parent_pte;
 		else {
 			struct kvm_pte_chain *chain;
@@ -1480,7 +1510,7 @@ static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp)
 	kvm_flush_remote_tlbs(kvm);
 	if (!sp->role.invalid && !sp->role.direct)
 		unaccount_shadowed(kvm, sp->gfn);
-	if (sp->unsync)
+	if (mmu_page_is_unsync(sp))
 		kvm_unlink_unsync_page(kvm, sp);
 	if (!sp->root_count) {
 		hlist_del(&sp->hash_link);
@@ -1731,8 +1761,7 @@ static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 			return 1;
 	}
 	++vcpu->kvm->stat.mmu_unsync;
-	sp->unsync = 1;
-
+	mmu_page_mark_unsync(sp);
 	kvm_mmu_mark_parents_unsync(sp);
 
 	mmu_convert_notrap(sp);
@@ -1748,7 +1777,7 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
 	if (shadow) {
 		if (shadow->role.level != PT_PAGE_TABLE_LEVEL)
 			return 1;
-		if (shadow->unsync)
+		 if (mmu_page_is_unsync(shadow))
 			return 0;
 		if (can_unsync && oos_shadow)
 			return kvm_unsync_page(vcpu, shadow);
@@ -3373,7 +3402,7 @@ static void audit_write_protection(struct kvm_vcpu *vcpu)
 	list_for_each_entry(sp, &vcpu->kvm->arch.active_mmu_pages, link) {
 		if (sp->role.direct)
 			continue;
-		if (sp->unsync)
+		if (mmu_page_is_unsync(sp))
 			continue;
 
 		gfn = unalias_gfn(vcpu->kvm, sp->gfn);
diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h
index 1fe956a..63a7d9d 100644
--- a/arch/x86/kvm/mmutrace.h
+++ b/arch/x86/kvm/mmutrace.h
@@ -11,13 +11,13 @@
 	__field(__u64, gfn) \
 	__field(__u32, role) \
 	__field(__u32, root_count) \
-	__field(__u32, unsync)
+	__field(__u32, flags)
 
 #define KVM_MMU_PAGE_ASSIGN(sp)			     \
 	__entry->gfn = sp->gfn;			     \
 	__entry->role = sp->role.word;		     \
 	__entry->root_count = sp->root_count;        \
-	__entry->unsync = sp->unsync;
+	__entry->flags = sp->flags;
 
 #define KVM_MMU_PAGE_PRINTK() ({				        \
 	const char *ret = p->buffer + p->len;				\
@@ -38,7 +38,8 @@
 			 role.cr4_pge ? "" : "!",			\
 			 role.nxe ? "" : "!",				\
 			 __entry->root_count,				\
-			 __entry->unsync ? "unsync" : "sync", 0);	\
+			 __entry->flags & MMU_PAGE_UNSYNC ?		\
+						"unsync" : "sync", 0);	\
 	ret;								\
 		})
 
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index d9dea28..f6de555 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -263,7 +263,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
 	gpte = *(const pt_element_t *)pte;
 	if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) {
 		if (!is_present_gpte(gpte)) {
-			if (page->unsync)
+			if (mmu_page_is_unsync(page))
 				new_spte = shadow_trap_nonpresent_pte;
 			else
 				new_spte = shadow_notrap_nonpresent_pte;
-- 
1.6.1.2




^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 6/6] KVM MMU: optimize synchronization shadow pages
  2010-04-12  7:59 [PATCH 1/6] KVM MMU: remove unused struct Xiao Guangrong
                   ` (3 preceding siblings ...)
  2010-04-12  8:05 ` [PATCH 5/6] KVM MMU: reduce kvm_mmu_page size Xiao Guangrong
@ 2010-04-12  8:06 ` Xiao Guangrong
  2010-04-12  8:43   ` Avi Kivity
  4 siblings, 1 reply; 36+ messages in thread
From: Xiao Guangrong @ 2010-04-12  8:06 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Marcelo Tosatti, KVM list, LKML

- chain all unsync shadow pages then we can fetch them quickly
- flush local/remote tlb after all shadow page synced

Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
---
 arch/x86/include/asm/kvm_host.h |    1 +
 arch/x86/kvm/mmu.c              |   82 ++++++++++++++++++---------------------
 2 files changed, 39 insertions(+), 44 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index d463bc6..ae543fb 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -207,6 +207,7 @@ struct kvm_mmu_page {
 	#define MMU_PAGE_UNSYNC 0x2
 	unsigned int flags;
 	unsigned int unsync_children;
+	struct list_head unsync_link;
 	union {
 		u64 *parent_pte;               /* !multimapped */
 		struct hlist_head parent_ptes; /* multimapped, kvm_pte_chain */
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 18eceb2..fcb6299 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -177,6 +177,8 @@ typedef int (*mmu_parent_walk_fn) (struct kvm_mmu_page *sp, u64 *spte);
 static struct kmem_cache *pte_chain_cache;
 static struct kmem_cache *rmap_desc_cache;
 static struct kmem_cache *mmu_page_header_cache;
+static struct list_head unsync_mmu_page_list =
+			LIST_HEAD_INIT(unsync_mmu_page_list);
 
 static u64 __read_mostly shadow_trap_nonpresent_pte;
 static u64 __read_mostly shadow_notrap_nonpresent_pte;
@@ -950,6 +952,7 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
 	bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS);
 	sp->flags = 0;
 	sp->parent_pte = parent_pte;
+	INIT_LIST_HEAD(&sp->unsync_link);
 	--vcpu->kvm->arch.n_free_mmu_pages;
 	return sp;
 }
@@ -1200,12 +1203,14 @@ static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp)
 {
 	WARN_ON(!mmu_page_is_unsync(sp));
 	mmu_page_clear_unsync(sp);
+	list_del(&sp->unsync_link);
 	--kvm->stat.mmu_unsync;
 }
 
 static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp);
 
-static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
+static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
+			   bool *flush_local_tlb, bool *flush_remote_tlb)
 {
 	if (sp->role.glevels != vcpu->arch.mmu.root_level) {
 		kvm_mmu_zap_page(vcpu->kvm, sp);
@@ -1214,17 +1219,31 @@ static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 
 	trace_kvm_mmu_sync_page(sp);
 	if (rmap_write_protect(vcpu->kvm, sp->gfn))
-		kvm_flush_remote_tlbs(vcpu->kvm);
+		*flush_remote_tlb = true;
 	kvm_unlink_unsync_page(vcpu->kvm, sp);
 	if (vcpu->arch.mmu.sync_page(vcpu, sp)) {
 		kvm_mmu_zap_page(vcpu->kvm, sp);
 		return 1;
 	}
 
-	kvm_mmu_flush_tlb(vcpu);
+	*flush_local_tlb = true;
 	return 0;
 }
 
+static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
+{
+	bool flush_local_tlb = false, flush_remote_tlb = false;
+	int ret;
+
+	ret = __kvm_sync_page(vcpu, sp, &flush_local_tlb, &flush_remote_tlb);
+	if (flush_local_tlb)
+		kvm_mmu_flush_tlb(vcpu);
+	if (flush_remote_tlb)
+		kvm_flush_remote_tlbs(vcpu->kvm);
+
+       return ret;
+}
+
 struct mmu_page_path {
 	struct kvm_mmu_page *parent[PT64_ROOT_LEVEL-1];
 	unsigned int idx[PT64_ROOT_LEVEL-1];
@@ -1284,31 +1303,24 @@ static void kvm_mmu_pages_init(struct kvm_mmu_page *parent,
 	pvec->nr = 0;
 }
 
-static void mmu_sync_children(struct kvm_vcpu *vcpu,
-			      struct kvm_mmu_page *parent)
+static void mmu_sync_pages(struct kvm_vcpu *vcpu)
 {
-	int i;
-	struct kvm_mmu_page *sp;
-	struct mmu_page_path parents;
-	struct kvm_mmu_pages pages;
-
-	kvm_mmu_pages_init(parent, &parents, &pages);
-	while (mmu_unsync_walk(parent, &pages)) {
-		int protected = 0;
-
-		for_each_sp(pages, sp, parents, i)
-			protected |= rmap_write_protect(vcpu->kvm, sp->gfn);
+	struct list_head *p, *next;
+	bool flush_local_tlb = false, flush_remote_tlb = false;
 
-		if (protected)
-			kvm_flush_remote_tlbs(vcpu->kvm);
+	if (list_empty(&unsync_mmu_page_list))
+		return;
 
-		for_each_sp(pages, sp, parents, i) {
-			kvm_sync_page(vcpu, sp);
-			mmu_pages_clear_parents(&parents);
-		}
-		cond_resched_lock(&vcpu->kvm->mmu_lock);
-		kvm_mmu_pages_init(parent, &parents, &pages);
+	list_for_each_safe(p, next, &unsync_mmu_page_list) {
+		struct kvm_mmu_page *sp;
+		sp = list_entry(p, struct kvm_mmu_page, unsync_link);
+		__kvm_sync_page(vcpu, sp, &flush_local_tlb, &flush_remote_tlb);
 	}
+
+	if (flush_local_tlb)
+		kvm_mmu_flush_tlb(vcpu);
+	if (flush_remote_tlb)
+		kvm_flush_remote_tlbs(vcpu->kvm);
 }
 
 static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
@@ -1762,6 +1774,7 @@ static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 	}
 	++vcpu->kvm->stat.mmu_unsync;
 	mmu_page_mark_unsync(sp);
+	list_add(&sp->unsync_link, &unsync_mmu_page_list);
 	kvm_mmu_mark_parents_unsync(sp);
 
 	mmu_convert_notrap(sp);
@@ -2121,26 +2134,7 @@ static int mmu_alloc_roots(struct kvm_vcpu *vcpu)
 
 static void mmu_sync_roots(struct kvm_vcpu *vcpu)
 {
-	int i;
-	struct kvm_mmu_page *sp;
-
-	if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
-		return;
-	if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) {
-		hpa_t root = vcpu->arch.mmu.root_hpa;
-		sp = page_header(root);
-		mmu_sync_children(vcpu, sp);
-		return;
-	}
-	for (i = 0; i < 4; ++i) {
-		hpa_t root = vcpu->arch.mmu.pae_root[i];
-
-		if (root && VALID_PAGE(root)) {
-			root &= PT64_BASE_ADDR_MASK;
-			sp = page_header(root);
-			mmu_sync_children(vcpu, sp);
-		}
-	}
+	mmu_sync_pages(vcpu);
 }
 
 void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
-- 
1.6.1.2



^ permalink raw reply related	[flat|nested] 36+ messages in thread

* Re: [PATCH 2/6] KVM MMU: fix kvm_mmu_zap_page() and its calling path
  2010-04-12  8:01 ` [PATCH 2/6] KVM MMU: fix kvm_mmu_zap_page() and its calling path Xiao Guangrong
@ 2010-04-12  8:24   ` Avi Kivity
  2010-04-12  8:53     ` Xiao Guangrong
  2010-04-12 17:10   ` Marcelo Tosatti
  1 sibling, 1 reply; 36+ messages in thread
From: Avi Kivity @ 2010-04-12  8:24 UTC (permalink / raw)
  To: Xiao Guangrong; +Cc: Marcelo Tosatti, KVM list, LKML

On 04/12/2010 11:01 AM, Xiao Guangrong wrote:
> - calculate zapped page number properly in mmu_zap_unsync_children()
> - calculate freeed page number properly kvm_mmu_change_mmu_pages()
> - restart list walking if have children page zapped
>
> Signed-off-by: Xiao Guangrong<xiaoguangrong@cn.fujitsu.com>
> ---
>   arch/x86/kvm/mmu.c |    7 ++++---
>   1 files changed, 4 insertions(+), 3 deletions(-)
>
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index a23ca75..8f4f781 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -1483,8 +1483,8 @@ static int mmu_zap_unsync_children(struct kvm *kvm,
>   		for_each_sp(pages, sp, parents, i) {
>   			kvm_mmu_zap_page(kvm, sp);
>   			mmu_pages_clear_parents(&parents);
> +			zapped++;
>   		}
> -		zapped += pages.nr;
>   		kvm_mmu_pages_init(parent,&parents,&pages);
>   	}
>    

This looks correct, I don't understand how we work in the first place.  
Marcelo?

> @@ -1540,7 +1540,7 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages)
>
>   			page = container_of(kvm->arch.active_mmu_pages.prev,
>   					    struct kvm_mmu_page, link);
> -			kvm_mmu_zap_page(kvm, page);
> +			used_pages -= kvm_mmu_zap_page(kvm, page);
>   			used_pages--;
>   		}
>    

This too.  Wow.

>   		kvm->arch.n_free_mmu_pages = 0;
> @@ -1589,7 +1589,8 @@ static void mmu_unshadow(struct kvm *kvm, gfn_t gfn)
>   		&&  !sp->role.invalid) {
>   			pgprintk("%s: zap %lx %x\n",
>   				 __func__, gfn, sp->role.word);
> -			kvm_mmu_zap_page(kvm, sp);
> +			if (kvm_mmu_zap_page(kvm, sp))
> +				nn = bucket->first;
>   		}
>   	}
>    

I don't understand why this is needed.

-- 
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 3/6] KVM MMU: optimize/cleanup for marking parent unsync
  2010-04-12  8:02 ` [PATCH 3/6] KVM MMU: optimize/cleanup for marking parent unsync Xiao Guangrong
@ 2010-04-12  8:32   ` Avi Kivity
  2010-04-12  8:55     ` Xiao Guangrong
  2010-04-12 17:12   ` Marcelo Tosatti
  1 sibling, 1 reply; 36+ messages in thread
From: Avi Kivity @ 2010-04-12  8:32 UTC (permalink / raw)
  To: Xiao Guangrong; +Cc: Marcelo Tosatti, KVM list, LKML

On 04/12/2010 11:02 AM, Xiao Guangrong wrote:
> - 'vcpu' is not used while mark parent unsync, so remove it
> - if it has alread marked unsync, no need to walk it's parent
>
>    

Please separate these two changes.

The optimization looks good.  Perhaps it can be done even nicer using 
mutually recursive functions (mmu_parent_walk calls mmu_parent_walk_fn 
which calls mmu_parent_walk on the parent), but let's not change too 
many things at a time.

-- 
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 4/6] KVM MMU: optimize for writing cr4
  2010-04-12  8:03 ` [PATCH 4/6] KVM MMU: optimize for writing cr4 Xiao Guangrong
@ 2010-04-12  8:34   ` Avi Kivity
  2010-04-12 10:42     ` Xiao Guangrong
  0 siblings, 1 reply; 36+ messages in thread
From: Avi Kivity @ 2010-04-12  8:34 UTC (permalink / raw)
  To: Xiao Guangrong; +Cc: Marcelo Tosatti, KVM list, LKML

On 04/12/2010 11:03 AM, Xiao Guangrong wrote:
> Usually, OS changes CR4.PGE bit to flush all global page, under this
> case, no need reset mmu and just flush tlb
>
> Signed-off-by: Xiao Guangrong<xiaoguangrong@cn.fujitsu.com>
> ---
>   arch/x86/kvm/x86.c |    9 +++++++++
>   1 files changed, 9 insertions(+), 0 deletions(-)
>
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index fd5c3d3..2aaa6fb 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -463,6 +463,15 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
>   	unsigned long old_cr4 = kvm_read_cr4(vcpu);
>   	unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE;
>
> +	if (cr4 == old_cr4)
> +		return;
> +
> +	if ((cr4 ^ old_cr4) == X86_CR4_PGE) {
> +		kvm_mmu_sync_roots(vcpu);
> +		kvm_mmu_flush_tlb(vcpu);
> +		return;
> +	}
> +
>   	if (cr4&  CR4_RESERVED_BITS) {
>   		kvm_inject_gp(vcpu, 0);
>   		return;
>    

Later we have:

>         kvm_x86_ops->set_cr4(vcpu, cr4);
>         vcpu->arch.cr4 = cr4;
>         vcpu->arch.mmu.base_role.cr4_pge = (cr4 & X86_CR4_PGE) && 
> !tdp_enabled;

All of which depend on cr4.

-- 
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 5/6] KVM MMU: reduce kvm_mmu_page size
  2010-04-12  8:05 ` [PATCH 5/6] KVM MMU: reduce kvm_mmu_page size Xiao Guangrong
@ 2010-04-12  8:36   ` Avi Kivity
  2010-04-12 11:11     ` Xiao Guangrong
  0 siblings, 1 reply; 36+ messages in thread
From: Avi Kivity @ 2010-04-12  8:36 UTC (permalink / raw)
  To: Xiao Guangrong; +Cc: Marcelo Tosatti, KVM list, LKML

On 04/12/2010 11:05 AM, Xiao Guangrong wrote:
> 'multimapped' and 'unsync' in 'struct kvm_mmu_page' are just indication
> field, we can use flag bits instand of them
>
> @@ -202,9 +202,10 @@ struct kvm_mmu_page {
>   	 * in this shadow page.
>   	 */
>   	DECLARE_BITMAP(slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS);
> -	int multimapped;         /* More than one parent_pte? */
>   	int root_count;          /* Currently serving as active root */
> -	bool unsync;
> +	#define MMU_PAGE_MULTIMAPPED 0x1        /* More than one parent_pte? */
> +	#define MMU_PAGE_UNSYNC 0x2
> +	unsigned int flags;
>   	unsigned int unsync_children;
>   	union {
>    

Please just use bool for multimapped.  If we grow more than 4 flags, we 
can use 'bool flag : 1;' to reduce space while retaining readability.

-- 
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 6/6] KVM MMU: optimize synchronization shadow pages
  2010-04-12  8:06 ` [PATCH 6/6] KVM MMU: optimize synchronization shadow pages Xiao Guangrong
@ 2010-04-12  8:43   ` Avi Kivity
  2010-04-12 11:14     ` Xiao Guangrong
  0 siblings, 1 reply; 36+ messages in thread
From: Avi Kivity @ 2010-04-12  8:43 UTC (permalink / raw)
  To: Xiao Guangrong; +Cc: Marcelo Tosatti, KVM list, LKML

On 04/12/2010 11:06 AM, Xiao Guangrong wrote:
> - chain all unsync shadow pages then we can fetch them quickly
> - flush local/remote tlb after all shadow page synced
>
> Signed-off-by: Xiao Guangrong<xiaoguangrong@cn.fujitsu.com>
> ---
>   arch/x86/include/asm/kvm_host.h |    1 +
>   arch/x86/kvm/mmu.c              |   82 ++++++++++++++++++---------------------
>   2 files changed, 39 insertions(+), 44 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index d463bc6..ae543fb 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -207,6 +207,7 @@ struct kvm_mmu_page {
>   	#define MMU_PAGE_UNSYNC 0x2
>   	unsigned int flags;
>   	unsigned int unsync_children;
> +	struct list_head unsync_link;
>   	union {
>   		u64 *parent_pte;               /* !multimapped */
>   		struct hlist_head parent_ptes; /* multimapped, kvm_pte_chain */
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index 18eceb2..fcb6299 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -177,6 +177,8 @@ typedef int (*mmu_parent_walk_fn) (struct kvm_mmu_page *sp, u64 *spte);
>   static struct kmem_cache *pte_chain_cache;
>   static struct kmem_cache *rmap_desc_cache;
>   static struct kmem_cache *mmu_page_header_cache;
> +static struct list_head unsync_mmu_page_list =
> +			LIST_HEAD_INIT(unsync_mmu_page_list);
>
>    

Does this really need to be global?  Should be per guest.

>   static u64 __read_mostly shadow_trap_nonpresent_pte;
>   static u64 __read_mostly shadow_notrap_nonpresent_pte;
> @@ -950,6 +952,7 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
>   	bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS);
>   	sp->flags = 0;
>   	sp->parent_pte = parent_pte;
> +	INIT_LIST_HEAD(&sp->unsync_link);
>   	--vcpu->kvm->arch.n_free_mmu_pages;
>   	return sp;
>   }
> @@ -1200,12 +1203,14 @@ static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp)
>   {
>   	WARN_ON(!mmu_page_is_unsync(sp));
>   	mmu_page_clear_unsync(sp);
> +	list_del(&sp->unsync_link);
>   	--kvm->stat.mmu_unsync;
>   }
>    

Locking?

> -		if (protected)
> -			kvm_flush_remote_tlbs(vcpu->kvm);
> +	if (list_empty(&unsync_mmu_page_list))
> +		return;
>    

Will this ever happen?  In my experience we usually have a ton of 
unsyncs lying around.

> +	list_for_each_safe(p, next,&unsync_mmu_page_list) {
> +		struct kvm_mmu_page *sp;
> +		sp = list_entry(p, struct kvm_mmu_page, unsync_link);
> +		__kvm_sync_page(vcpu, sp,&flush_local_tlb,&flush_remote_tlb);
>   	}
>    

That's counter to the point of unsync.  We only want to sync the minimum 
number of pages - all pages reachable by the new root.  The others we 
want to keep writeable to reduce our fault count.

-- 
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 2/6] KVM MMU: fix kvm_mmu_zap_page() and its calling path
  2010-04-12  8:24   ` Avi Kivity
@ 2010-04-12  8:53     ` Xiao Guangrong
  2010-04-12  9:08       ` Avi Kivity
  0 siblings, 1 reply; 36+ messages in thread
From: Xiao Guangrong @ 2010-04-12  8:53 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Marcelo Tosatti, KVM list, LKML



Avi Kivity wrote:

> 
>>           kvm->arch.n_free_mmu_pages = 0;
>> @@ -1589,7 +1589,8 @@ static void mmu_unshadow(struct kvm *kvm, gfn_t
>> gfn)
>>           &&  !sp->role.invalid) {
>>               pgprintk("%s: zap %lx %x\n",
>>                    __func__, gfn, sp->role.word);
>> -            kvm_mmu_zap_page(kvm, sp);
>> +            if (kvm_mmu_zap_page(kvm, sp))
>> +                nn = bucket->first;
>>           }
>>       }
>>    
> 
> I don't understand why this is needed.

There is the code segment in mmu_unshadow():

|hlist_for_each_entry_safe(sp, node, nn, bucket, hash_link) {
|		if (sp->gfn == gfn && !sp->role.direct
|		    && !sp->role.invalid) {
|			pgprintk("%s: zap %lx %x\n",
|				 __func__, gfn, sp->role.word);
|			kvm_mmu_zap_page(kvm, sp);
|		}
|	}

in the loop, if nn is zapped, hlist_for_each_entry_safe() access nn will
cause crash. and it's checked in other functions, such as kvm_mmu_zap_all(),
kvm_mmu_unprotect_page()...

Thanks,
Xiao


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 3/6] KVM MMU: optimize/cleanup for marking parent unsync
  2010-04-12  8:32   ` Avi Kivity
@ 2010-04-12  8:55     ` Xiao Guangrong
  0 siblings, 0 replies; 36+ messages in thread
From: Xiao Guangrong @ 2010-04-12  8:55 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Marcelo Tosatti, KVM list, LKML



Avi Kivity wrote:
> On 04/12/2010 11:02 AM, Xiao Guangrong wrote:
>> - 'vcpu' is not used while mark parent unsync, so remove it
>> - if it has alread marked unsync, no need to walk it's parent
>>
>>    
> 
> Please separate these two changes.
> 
> The optimization looks good.  Perhaps it can be done even nicer using
> mutually recursive functions (mmu_parent_walk calls mmu_parent_walk_fn
> which calls mmu_parent_walk on the parent), but let's not change too
> many things at a time.

OK, i'll separate it in the next version

Thanks,
Xiao

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 2/6] KVM MMU: fix kvm_mmu_zap_page() and its calling path
  2010-04-12  8:53     ` Xiao Guangrong
@ 2010-04-12  9:08       ` Avi Kivity
  2010-04-12  9:22         ` Xiao Guangrong
  0 siblings, 1 reply; 36+ messages in thread
From: Avi Kivity @ 2010-04-12  9:08 UTC (permalink / raw)
  To: Xiao Guangrong; +Cc: Marcelo Tosatti, KVM list, LKML

On 04/12/2010 11:53 AM, Xiao Guangrong wrote:
>>
>>>            kvm->arch.n_free_mmu_pages = 0;
>>> @@ -1589,7 +1589,8 @@ static void mmu_unshadow(struct kvm *kvm, gfn_t
>>> gfn)
>>>            &&   !sp->role.invalid) {
>>>                pgprintk("%s: zap %lx %x\n",
>>>                     __func__, gfn, sp->role.word);
>>> -            kvm_mmu_zap_page(kvm, sp);
>>> +            if (kvm_mmu_zap_page(kvm, sp))
>>> +                nn = bucket->first;
>>>            }
>>>        }
>>>
>>>        
>> I don't understand why this is needed.
>>      
> There is the code segment in mmu_unshadow():
>
> |hlist_for_each_entry_safe(sp, node, nn, bucket, hash_link) {
> |		if (sp->gfn == gfn&&  !sp->role.direct
> |		&&  !sp->role.invalid) {
> |			pgprintk("%s: zap %lx %x\n",
> |				 __func__, gfn, sp->role.word);
> |			kvm_mmu_zap_page(kvm, sp);
> |		}
> |	}
>
> in the loop, if nn is zapped, hlist_for_each_entry_safe() access nn will
> cause crash. and it's checked in other functions, such as kvm_mmu_zap_all(),
> kvm_mmu_unprotect_page()...
>    

hlist_for_each_entry_safe() is supposed to be be safe against removal of 
the element that is pointed to by the iteration cursor.

-- 
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 2/6] KVM MMU: fix kvm_mmu_zap_page() and its calling path
  2010-04-12  9:08       ` Avi Kivity
@ 2010-04-12  9:22         ` Xiao Guangrong
  2010-04-12 10:25           ` Avi Kivity
  0 siblings, 1 reply; 36+ messages in thread
From: Xiao Guangrong @ 2010-04-12  9:22 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Marcelo Tosatti, KVM list, LKML

Hi Avi,

Avi Kivity wrote:

> hlist_for_each_entry_safe() is supposed to be be safe against removal of
> the element that is pointed to by the iteration cursor.

If we destroyed the next point, hlist_for_each_entry_safe() is unsafe.

List hlist_for_each_entry_safe()'s code:

|#define hlist_for_each_entry_safe(tpos, pos, n, head, member) 		 \
|	for (pos = (head)->first;					 \
|	     pos && ({ n = pos->next; 1; }) && 				 \
|		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
|	     pos = n)

if n is destroyed:
'pos = n, n = pos->next'
then it access n again, it's unsafe/illegal for us.

Xiao

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 2/6] KVM MMU: fix kvm_mmu_zap_page() and its calling path
  2010-04-12  9:22         ` Xiao Guangrong
@ 2010-04-12 10:25           ` Avi Kivity
  2010-04-12 12:22             ` Xiao Guangrong
  0 siblings, 1 reply; 36+ messages in thread
From: Avi Kivity @ 2010-04-12 10:25 UTC (permalink / raw)
  To: Xiao Guangrong; +Cc: Marcelo Tosatti, KVM list, LKML

On 04/12/2010 12:22 PM, Xiao Guangrong wrote:
> Hi Avi,
>
> Avi Kivity wrote:
>
>    
>> hlist_for_each_entry_safe() is supposed to be be safe against removal of
>> the element that is pointed to by the iteration cursor.
>>      
> If we destroyed the next point, hlist_for_each_entry_safe() is unsafe.
>
> List hlist_for_each_entry_safe()'s code:
>
> |#define hlist_for_each_entry_safe(tpos, pos, n, head, member) 		 \
> |	for (pos = (head)->first;					 \
> |	     pos&&  ({ n = pos->next; 1; })&&  				\
> |		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
> |	     pos = n)
>
> if n is destroyed:
> 'pos = n, n = pos->next'
> then it access n again, it's unsafe/illegal for us.
>    

But kvm_mmu_zap_page() will only destroy sp == tpos == pos; n points at 
pos->next already, so it's safe.

-- 
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 4/6] KVM MMU: optimize for writing cr4
  2010-04-12  8:34   ` Avi Kivity
@ 2010-04-12 10:42     ` Xiao Guangrong
  2010-04-12 11:22       ` Avi Kivity
  0 siblings, 1 reply; 36+ messages in thread
From: Xiao Guangrong @ 2010-04-12 10:42 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Marcelo Tosatti, KVM list, LKML

Hi Avi,

Thanks for your comments.

Avi Kivity wrote:

> Later we have:
> 
>>         kvm_x86_ops->set_cr4(vcpu, cr4);
>>         vcpu->arch.cr4 = cr4;
>>         vcpu->arch.mmu.base_role.cr4_pge = (cr4 & X86_CR4_PGE) &&
>> !tdp_enabled;
> 
> All of which depend on cr4.

Oh, destroy_kvm_mmu() is not really destroyed cr3 and we can reload it later
form shadow page cache, so, maybe this patch is unnecessary.

But, i have a another question here, why we need encode 'cr4 & X86_CR4_PGE' into
base_role.cr4_gpe? Why we need allocation different shadow page for global page
and no-global page?

As i know, global page is not static in TLB, and x86 cpu also may flush them form TLB,
maybe we no need treat global page specially... Am i miss something? :-(

Xiao


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 5/6] KVM MMU: reduce kvm_mmu_page size
  2010-04-12  8:36   ` Avi Kivity
@ 2010-04-12 11:11     ` Xiao Guangrong
  0 siblings, 0 replies; 36+ messages in thread
From: Xiao Guangrong @ 2010-04-12 11:11 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Marcelo Tosatti, KVM list, LKML



Avi Kivity wrote:
> On 04/12/2010 11:05 AM, Xiao Guangrong wrote:
>> 'multimapped' and 'unsync' in 'struct kvm_mmu_page' are just indication
>> field, we can use flag bits instand of them
>>
>> @@ -202,9 +202,10 @@ struct kvm_mmu_page {
>>        * in this shadow page.
>>        */
>>       DECLARE_BITMAP(slot_bitmap, KVM_MEMORY_SLOTS +
>> KVM_PRIVATE_MEM_SLOTS);
>> -    int multimapped;         /* More than one parent_pte? */
>>       int root_count;          /* Currently serving as active root */
>> -    bool unsync;
>> +    #define MMU_PAGE_MULTIMAPPED 0x1        /* More than one
>> parent_pte? */
>> +    #define MMU_PAGE_UNSYNC 0x2
>> +    unsigned int flags;
>>       unsigned int unsync_children;
>>       union {
>>    
> 
> Please just use bool for multimapped.  If we grow more than 4 flags, we
> can use 'bool flag : 1;' to reduce space while retaining readability.
> 

Yeah, good idea, i'll fix it in the next version

Thanks,
Xiao

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 6/6] KVM MMU: optimize synchronization shadow pages
  2010-04-12  8:43   ` Avi Kivity
@ 2010-04-12 11:14     ` Xiao Guangrong
  0 siblings, 0 replies; 36+ messages in thread
From: Xiao Guangrong @ 2010-04-12 11:14 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Marcelo Tosatti, KVM list, LKML



Avi Kivity wrote:
> On 04/12/2010 11:06 AM, Xiao Guangrong wrote:
>> - chain all unsync shadow pages then we can fetch them quickly
>> - flush local/remote tlb after all shadow page synced
>>
>> Signed-off-by: Xiao Guangrong<xiaoguangrong@cn.fujitsu.com>
>> ---
>>   arch/x86/include/asm/kvm_host.h |    1 +
>>   arch/x86/kvm/mmu.c              |   82
>> ++++++++++++++++++---------------------
>>   2 files changed, 39 insertions(+), 44 deletions(-)
>>
>> diff --git a/arch/x86/include/asm/kvm_host.h
>> b/arch/x86/include/asm/kvm_host.h
>> index d463bc6..ae543fb 100644
>> --- a/arch/x86/include/asm/kvm_host.h
>> +++ b/arch/x86/include/asm/kvm_host.h
>> @@ -207,6 +207,7 @@ struct kvm_mmu_page {
>>       #define MMU_PAGE_UNSYNC 0x2
>>       unsigned int flags;
>>       unsigned int unsync_children;
>> +    struct list_head unsync_link;
>>       union {
>>           u64 *parent_pte;               /* !multimapped */
>>           struct hlist_head parent_ptes; /* multimapped, kvm_pte_chain */
>> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
>> index 18eceb2..fcb6299 100644
>> --- a/arch/x86/kvm/mmu.c
>> +++ b/arch/x86/kvm/mmu.c
>> @@ -177,6 +177,8 @@ typedef int (*mmu_parent_walk_fn) (struct
>> kvm_mmu_page *sp, u64 *spte);
>>   static struct kmem_cache *pte_chain_cache;
>>   static struct kmem_cache *rmap_desc_cache;
>>   static struct kmem_cache *mmu_page_header_cache;
>> +static struct list_head unsync_mmu_page_list =
>> +            LIST_HEAD_INIT(unsync_mmu_page_list);
>>
>>    
> 
> Does this really need to be global?  Should be per guest.
> 

Ah... this patch need more cooking, i'll improve it... thanks for you point it out.

Xiao

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 4/6] KVM MMU: optimize for writing cr4
  2010-04-12 10:42     ` Xiao Guangrong
@ 2010-04-12 11:22       ` Avi Kivity
  2010-04-13  3:07         ` Xiao Guangrong
  0 siblings, 1 reply; 36+ messages in thread
From: Avi Kivity @ 2010-04-12 11:22 UTC (permalink / raw)
  To: Xiao Guangrong; +Cc: Marcelo Tosatti, KVM list, LKML

On 04/12/2010 01:42 PM, Xiao Guangrong wrote:
> Hi Avi,
>
> Thanks for your comments.
>
> Avi Kivity wrote:
>
>    
>> Later we have:
>>
>>      
>>>          kvm_x86_ops->set_cr4(vcpu, cr4);
>>>          vcpu->arch.cr4 = cr4;
>>>          vcpu->arch.mmu.base_role.cr4_pge = (cr4&  X86_CR4_PGE)&&
>>> !tdp_enabled;
>>>        
>> All of which depend on cr4.
>>      
> Oh, destroy_kvm_mmu() is not really destroyed cr3 and we can reload it later
> form shadow page cache, so, maybe this patch is unnecessary.
>
> But, i have a another question here, why we need encode 'cr4&  X86_CR4_PGE' into
> base_role.cr4_gpe? Why we need allocation different shadow page for global page
> and no-global page?
>    

See 6364a3918cb.  It was reverted later due to a problem with the 
implementation.  I'm not sure whether I want to fix the bug and restore 
that patch, or to drop it altogether and give the guest ownership of 
cr4.pge.  See cr4_guest_owned_bits (currently only used on ept).

> As i know, global page is not static in TLB, and x86 cpu also may flush them form TLB,
> maybe we no need treat global page specially... Am i miss something? :-(
>    

You can't read reverted patches? :)

-- 
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 2/6] KVM MMU: fix kvm_mmu_zap_page() and its calling path
  2010-04-12 10:25           ` Avi Kivity
@ 2010-04-12 12:22             ` Xiao Guangrong
  2010-04-12 12:49               ` Avi Kivity
  0 siblings, 1 reply; 36+ messages in thread
From: Xiao Guangrong @ 2010-04-12 12:22 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Marcelo Tosatti, KVM list, LKML



Avi Kivity wrote:
> On 04/12/2010 12:22 PM, Xiao Guangrong wrote:
>> Hi Avi,
>>
>> Avi Kivity wrote:

> But kvm_mmu_zap_page() will only destroy sp == tpos == pos; n points at
> pos->next already, so it's safe.
> 

kvm_mmu_zap_page(sp) not only zaps sp but also zaps all sp's unsync children
pages, if n is just sp's unsyc child, just at the same hlist and just behind sp,
it will crash. :-)

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 2/6] KVM MMU: fix kvm_mmu_zap_page() and its calling path
  2010-04-12 12:22             ` Xiao Guangrong
@ 2010-04-12 12:49               ` Avi Kivity
  0 siblings, 0 replies; 36+ messages in thread
From: Avi Kivity @ 2010-04-12 12:49 UTC (permalink / raw)
  To: Xiao Guangrong; +Cc: Marcelo Tosatti, KVM list, LKML

On 04/12/2010 03:22 PM, Xiao Guangrong wrote:
>
>> But kvm_mmu_zap_page() will only destroy sp == tpos == pos; n points at
>> pos->next already, so it's safe.
>>
>>      
> kvm_mmu_zap_page(sp) not only zaps sp but also zaps all sp's unsync children
> pages, if n is just sp's unsyc child, just at the same hlist and just behind sp,
> it will crash. :-)
>    

Ouch.  I see now, thanks for explaining.

One way to fix it is to make kvm_mmu_zap_page() only zap the page it is 
given, and use sp->role.invalid on its children.  But it's better to fix 
it now quickly and do the more involved fixes later.

Just change the assignment to a 'goto restart;' please, I don't like 
playing with list_for_each internals.

-- 
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 2/6] KVM MMU: fix kvm_mmu_zap_page() and its calling path
  2010-04-12  8:01 ` [PATCH 2/6] KVM MMU: fix kvm_mmu_zap_page() and its calling path Xiao Guangrong
  2010-04-12  8:24   ` Avi Kivity
@ 2010-04-12 17:10   ` Marcelo Tosatti
  2010-04-13  1:34     ` Xiao Guangrong
  1 sibling, 1 reply; 36+ messages in thread
From: Marcelo Tosatti @ 2010-04-12 17:10 UTC (permalink / raw)
  To: Xiao Guangrong; +Cc: Avi Kivity, KVM list, LKML

On Mon, Apr 12, 2010 at 04:01:09PM +0800, Xiao Guangrong wrote:
> - calculate zapped page number properly in mmu_zap_unsync_children()
> - calculate freeed page number properly kvm_mmu_change_mmu_pages()
> - restart list walking if have children page zapped
> 
> Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
> ---
>  arch/x86/kvm/mmu.c |    7 ++++---
>  1 files changed, 4 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index a23ca75..8f4f781 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -1483,8 +1483,8 @@ static int mmu_zap_unsync_children(struct kvm *kvm,
>  		for_each_sp(pages, sp, parents, i) {
>  			kvm_mmu_zap_page(kvm, sp);
>  			mmu_pages_clear_parents(&parents);
> +			zapped++;
>  		}
> -		zapped += pages.nr;
>  		kvm_mmu_pages_init(parent, &parents, &pages);
>  	}

Don't see why this is needed? The for_each_sp loop uses pvec.nr.

> @@ -1540,7 +1540,7 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages)
>  
>  			page = container_of(kvm->arch.active_mmu_pages.prev,
>  					    struct kvm_mmu_page, link);
> -			kvm_mmu_zap_page(kvm, page);
> +			used_pages -= kvm_mmu_zap_page(kvm, page);
>  			used_pages--;
>  		}
>  		kvm->arch.n_free_mmu_pages = 0;

Oops.

> @@ -1589,7 +1589,8 @@ static void mmu_unshadow(struct kvm *kvm, gfn_t gfn)
>  		    && !sp->role.invalid) {
>  			pgprintk("%s: zap %lx %x\n",
>  				 __func__, gfn, sp->role.word);
> -			kvm_mmu_zap_page(kvm, sp);
> +			if (kvm_mmu_zap_page(kvm, sp))
> +				nn = bucket->first;

Oops 2.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 3/6] KVM MMU: optimize/cleanup for marking parent unsync
  2010-04-12  8:02 ` [PATCH 3/6] KVM MMU: optimize/cleanup for marking parent unsync Xiao Guangrong
  2010-04-12  8:32   ` Avi Kivity
@ 2010-04-12 17:12   ` Marcelo Tosatti
  2010-04-13  1:53     ` Xiao Guangrong
  1 sibling, 1 reply; 36+ messages in thread
From: Marcelo Tosatti @ 2010-04-12 17:12 UTC (permalink / raw)
  To: Xiao Guangrong; +Cc: Avi Kivity, KVM list, LKML

On Mon, Apr 12, 2010 at 04:02:24PM +0800, Xiao Guangrong wrote:
> - 'vcpu' is not used while mark parent unsync, so remove it
> - if it has alread marked unsync, no need to walk it's parent
> 
> Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>

Xiao,

Did you actually see this codepath as being performance sensitive? 

I'd prefer to not touch it.


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 2/6] KVM MMU: fix kvm_mmu_zap_page() and its calling path
  2010-04-12 17:10   ` Marcelo Tosatti
@ 2010-04-13  1:34     ` Xiao Guangrong
  2010-04-13 14:59       ` Marcelo Tosatti
  0 siblings, 1 reply; 36+ messages in thread
From: Xiao Guangrong @ 2010-04-13  1:34 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: Avi Kivity, KVM list, LKML



Marcelo Tosatti wrote:

>> @@ -1483,8 +1483,8 @@ static int mmu_zap_unsync_children(struct kvm *kvm,
>>  		for_each_sp(pages, sp, parents, i) {
>>  			kvm_mmu_zap_page(kvm, sp);
>>  			mmu_pages_clear_parents(&parents);
>> +			zapped++;
>>  		}
>> -		zapped += pages.nr;
>>  		kvm_mmu_pages_init(parent, &parents, &pages);
>>  	}
> 
> Don't see why this is needed? The for_each_sp loop uses pvec.nr.

I think mmu_zap_unsync_children() should return the number of zapped pages then we
can adjust the number of free pages in kvm_mmu_change_mmu_pages(), but pages.nr no
only includes the unsync/zapped pages but also includes their parents.

Thanks,
Xiao

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 3/6] KVM MMU: optimize/cleanup for marking parent unsync
  2010-04-12 17:12   ` Marcelo Tosatti
@ 2010-04-13  1:53     ` Xiao Guangrong
  2010-04-13 11:58       ` Avi Kivity
  2010-04-13 15:01       ` Marcelo Tosatti
  0 siblings, 2 replies; 36+ messages in thread
From: Xiao Guangrong @ 2010-04-13  1:53 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: Avi Kivity, KVM list, LKML



Marcelo Tosatti wrote:

> Xiao,
> 
> Did you actually see this codepath as being performance sensitive? 

Actually, i not run benchmarks to contrast the performance before this patch
and after this patch.

> 
> I'd prefer to not touch it.

This patch avoids walk all parents and i think this overload is really unnecessary.
It has other tricks in this codepath but i not noticed? :-)

Thanks,
Xiao

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 4/6] KVM MMU: optimize for writing cr4
  2010-04-12 11:22       ` Avi Kivity
@ 2010-04-13  3:07         ` Xiao Guangrong
  2010-04-13  6:42           ` Avi Kivity
  0 siblings, 1 reply; 36+ messages in thread
From: Xiao Guangrong @ 2010-04-13  3:07 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Marcelo Tosatti, KVM list, LKML



Avi Kivity wrote:

> See 6364a3918cb.  It was reverted later due to a problem with the
> implementation.  I'm not sure whether I want to fix the bug and restore
> that patch, or to drop it altogether and give the guest ownership of
> cr4.pge.  See cr4_guest_owned_bits (currently only used on ept).
> 

Oh, i see, thanks very much.

>> As i know, global page is not static in TLB, and x86 cpu also may
>> flush them form TLB,
>> maybe we no need treat global page specially... Am i miss something? :-(
>>    
> 
> You can't read reverted patches? :)

I usually use 'get blame' to look into source, and not noticed reverted
patches, i'll pay more attention on those.

Below code still confused me:

| vcpu->arch.mmu.base_role.cr4_pge = (cr4&  X86_CR4_PGE)&&!tdp_enabled; 

And i found the commit 87778d60ee:

|    KVM: MMU: Segregate mmu pages created with different cr4.pge settings
|    
|    Don't allow a vcpu with cr4.pge cleared to use a shadow page created with
|    cr4.pge set; this might cause a cr3 switch not to sync ptes that have the
|    global bit set (the global bit has no effect if !cr4.pge).
|    
|    This can only occur on smp with different cr4.pge settings for different
|    vcpus (since a cr4 change will resync the shadow ptes), but there's no
|    cost to being correct here.

In current code, cr3 switch will sync all unsync shadow pages(regardless it's
global or not) and this issue not live now, so, do we need also revert this
patch?

Xiao


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 4/6] KVM MMU: optimize for writing cr4
  2010-04-13  3:07         ` Xiao Guangrong
@ 2010-04-13  6:42           ` Avi Kivity
  0 siblings, 0 replies; 36+ messages in thread
From: Avi Kivity @ 2010-04-13  6:42 UTC (permalink / raw)
  To: Xiao Guangrong; +Cc: Marcelo Tosatti, KVM list, LKML

On 04/13/2010 06:07 AM, Xiao Guangrong wrote:
>
> And i found the commit 87778d60ee:
>
> |    KVM: MMU: Segregate mmu pages created with different cr4.pge settings
> |
> |    Don't allow a vcpu with cr4.pge cleared to use a shadow page created with
> |    cr4.pge set; this might cause a cr3 switch not to sync ptes that have the
> |    global bit set (the global bit has no effect if !cr4.pge).
> |
> |    This can only occur on smp with different cr4.pge settings for different
> |    vcpus (since a cr4 change will resync the shadow ptes), but there's no
> |    cost to being correct here.
>
> In current code, cr3 switch will sync all unsync shadow pages(regardless it's
> global or not) and this issue not live now, so, do we need also revert this
> patch?
>    

One path is to revert this patch.  The other is to restore the 
optimization that relies on it.  I'm not sure which is best.

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 3/6] KVM MMU: optimize/cleanup for marking parent unsync
  2010-04-13  1:53     ` Xiao Guangrong
@ 2010-04-13 11:58       ` Avi Kivity
  2010-04-13 15:01       ` Marcelo Tosatti
  1 sibling, 0 replies; 36+ messages in thread
From: Avi Kivity @ 2010-04-13 11:58 UTC (permalink / raw)
  To: Xiao Guangrong; +Cc: Marcelo Tosatti, KVM list, LKML

On 04/13/2010 04:53 AM, Xiao Guangrong wrote:
>
>> I'd prefer to not touch it.
>>      
> This patch avoids walk all parents and i think this overload is really unnecessary.
> It has other tricks in this codepath but i not noticed? :-)
>    

There is also the advantage of code size reduction.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 2/6] KVM MMU: fix kvm_mmu_zap_page() and its calling path
  2010-04-13  1:34     ` Xiao Guangrong
@ 2010-04-13 14:59       ` Marcelo Tosatti
  2010-04-14  2:14         ` Xiao Guangrong
  0 siblings, 1 reply; 36+ messages in thread
From: Marcelo Tosatti @ 2010-04-13 14:59 UTC (permalink / raw)
  To: Xiao Guangrong; +Cc: Avi Kivity, KVM list, LKML

On Tue, Apr 13, 2010 at 09:34:14AM +0800, Xiao Guangrong wrote:
> 
> 
> Marcelo Tosatti wrote:
> 
> >> @@ -1483,8 +1483,8 @@ static int mmu_zap_unsync_children(struct kvm *kvm,
> >>  		for_each_sp(pages, sp, parents, i) {
> >>  			kvm_mmu_zap_page(kvm, sp);
> >>  			mmu_pages_clear_parents(&parents);
> >> +			zapped++;
> >>  		}
> >> -		zapped += pages.nr;
> >>  		kvm_mmu_pages_init(parent, &parents, &pages);
> >>  	}
> > 
> > Don't see why this is needed? The for_each_sp loop uses pvec.nr.
> 
> I think mmu_zap_unsync_children() should return the number of zapped pages then we
> can adjust the number of free pages in kvm_mmu_change_mmu_pages(), but pages.nr no
> only includes the unsync/zapped pages but also includes their parents.

Oh i see. I think its safer to check for list_empty then to rely on
proper accounting there, like __kvm_mmu_free_some_pages does.



^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 3/6] KVM MMU: optimize/cleanup for marking parent unsync
  2010-04-13  1:53     ` Xiao Guangrong
  2010-04-13 11:58       ` Avi Kivity
@ 2010-04-13 15:01       ` Marcelo Tosatti
  2010-04-14  3:23         ` Xiao Guangrong
  1 sibling, 1 reply; 36+ messages in thread
From: Marcelo Tosatti @ 2010-04-13 15:01 UTC (permalink / raw)
  To: Xiao Guangrong; +Cc: Avi Kivity, KVM list, LKML

On Tue, Apr 13, 2010 at 09:53:07AM +0800, Xiao Guangrong wrote:
> 
> 
> Marcelo Tosatti wrote:
> 
> > Xiao,
> > 
> > Did you actually see this codepath as being performance sensitive? 
> 
> Actually, i not run benchmarks to contrast the performance before this patch
> and after this patch.
> 
> > 
> > I'd prefer to not touch it.
> 
> This patch avoids walk all parents and i think this overload is really unnecessary.
> It has other tricks in this codepath but i not noticed? :-)

My point is that there is no point in optimizing something unless its
performance sensitive. And as i recall, mmu_unsync_walk was much more
sensitive performance wise than parent walking. Actually, gfn_to_memslot 
seems more important since its also noticeable on EPT/NPT hosts.


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 2/6] KVM MMU: fix kvm_mmu_zap_page() and its calling path
  2010-04-13 14:59       ` Marcelo Tosatti
@ 2010-04-14  2:14         ` Xiao Guangrong
  2010-04-14 16:31           ` Marcelo Tosatti
  0 siblings, 1 reply; 36+ messages in thread
From: Xiao Guangrong @ 2010-04-14  2:14 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: Avi Kivity, KVM list, LKML



Marcelo Tosatti wrote:
> On Tue, Apr 13, 2010 at 09:34:14AM +0800, Xiao Guangrong wrote:
>>
>> Marcelo Tosatti wrote:
>>
>>>> @@ -1483,8 +1483,8 @@ static int mmu_zap_unsync_children(struct kvm *kvm,
>>>>  		for_each_sp(pages, sp, parents, i) {
>>>>  			kvm_mmu_zap_page(kvm, sp);
>>>>  			mmu_pages_clear_parents(&parents);
>>>> +			zapped++;
>>>>  		}
>>>> -		zapped += pages.nr;
>>>>  		kvm_mmu_pages_init(parent, &parents, &pages);
>>>>  	}
>>> Don't see why this is needed? The for_each_sp loop uses pvec.nr.
>> I think mmu_zap_unsync_children() should return the number of zapped pages then we
>> can adjust the number of free pages in kvm_mmu_change_mmu_pages(), but pages.nr no
>> only includes the unsync/zapped pages but also includes their parents.
> 
> Oh i see. I think its safer to check for list_empty then to rely on
> proper accounting there, like __kvm_mmu_free_some_pages does.

Do you mean that we'd better add WARN_ON(list_empty()) code in kvm_mmu_zap_page()?

Thanks,
Xiao

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 3/6] KVM MMU: optimize/cleanup for marking parent unsync
  2010-04-13 15:01       ` Marcelo Tosatti
@ 2010-04-14  3:23         ` Xiao Guangrong
  2010-04-14  3:58           ` Xiao Guangrong
  2010-04-14 16:35           ` Marcelo Tosatti
  0 siblings, 2 replies; 36+ messages in thread
From: Xiao Guangrong @ 2010-04-14  3:23 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: Avi Kivity, KVM list, LKML



Marcelo Tosatti wrote:

>>> I'd prefer to not touch it.
>> This patch avoids walk all parents and i think this overload is really unnecessary.
>> It has other tricks in this codepath but i not noticed? :-)
> 
> My point is that there is no point in optimizing something unless its
> performance sensitive.

Hi Marcelo,

I think optimizing not only means 'performance' but also means 'smaller code'(maybe 'cleanup'
is more suitable) and 'logic optimize'(do little things), i'm not sure this patch whether can
improve system performance obviously but it optimize the code logic and reduce code size, and
it not harm other code and system performance, right? :-)

Actually, the origin code has a bug, the code segment in mmu_parent_walk():

|	if (!sp->multimapped && sp->parent_pte) {
|		......
|		return;
|	}
|	hlist_for_each_entry(pte_chain, node, &sp->parent_ptes, link)
|		for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i) {
|			......
|		}

So, if sp->parent_pte == NULL, it's unsafe...

> And as i recall, mmu_unsync_walk was much more
> sensitive performance wise than parent walking. Actually, gfn_to_memslot 
> seems more important since its also noticeable on EPT/NPT hosts.

Yeah, i also noticed these and i'm looking into these code.

Thanks,
Xiao

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 3/6] KVM MMU: optimize/cleanup for marking parent unsync
  2010-04-14  3:23         ` Xiao Guangrong
@ 2010-04-14  3:58           ` Xiao Guangrong
  2010-04-14 16:35           ` Marcelo Tosatti
  1 sibling, 0 replies; 36+ messages in thread
From: Xiao Guangrong @ 2010-04-14  3:58 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: Avi Kivity, KVM list, LKML



Xiao Guangrong wrote:
> 
> Actually, the origin code has a bug, the code segment in mmu_parent_walk():
> 
> |	if (!sp->multimapped && sp->parent_pte) {
> |		......
> |		return;
> |	}
> |	hlist_for_each_entry(pte_chain, node, &sp->parent_ptes, link)
> |		for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i) {
> |			......
> |		}
> 
> So, if sp->parent_pte == NULL, it's unsafe...

Marcelo, please ignore this, it not a bug, just my mistake, sorry...

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 2/6] KVM MMU: fix kvm_mmu_zap_page() and its calling path
  2010-04-14  2:14         ` Xiao Guangrong
@ 2010-04-14 16:31           ` Marcelo Tosatti
  0 siblings, 0 replies; 36+ messages in thread
From: Marcelo Tosatti @ 2010-04-14 16:31 UTC (permalink / raw)
  To: Xiao Guangrong; +Cc: Avi Kivity, KVM list, LKML

On Wed, Apr 14, 2010 at 10:14:29AM +0800, Xiao Guangrong wrote:
> 
> 
> Marcelo Tosatti wrote:
> > On Tue, Apr 13, 2010 at 09:34:14AM +0800, Xiao Guangrong wrote:
> >>
> >> Marcelo Tosatti wrote:
> >>
> >>>> @@ -1483,8 +1483,8 @@ static int mmu_zap_unsync_children(struct kvm *kvm,
> >>>>  		for_each_sp(pages, sp, parents, i) {
> >>>>  			kvm_mmu_zap_page(kvm, sp);
> >>>>  			mmu_pages_clear_parents(&parents);
> >>>> +			zapped++;
> >>>>  		}
> >>>> -		zapped += pages.nr;
> >>>>  		kvm_mmu_pages_init(parent, &parents, &pages);
> >>>>  	}
> >>> Don't see why this is needed? The for_each_sp loop uses pvec.nr.
> >> I think mmu_zap_unsync_children() should return the number of zapped pages then we
> >> can adjust the number of free pages in kvm_mmu_change_mmu_pages(), but pages.nr no
> >> only includes the unsync/zapped pages but also includes their parents.
> > 
> > Oh i see. I think its safer to check for list_empty then to rely on
> > proper accounting there, like __kvm_mmu_free_some_pages does.
> 
> Do you mean that we'd better add WARN_ON(list_empty()) code in kvm_mmu_zap_page()?

Just break out of the loop if
list_empty(&vcpu->kvm->arch.active_mmu_pages).


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 3/6] KVM MMU: optimize/cleanup for marking parent unsync
  2010-04-14  3:23         ` Xiao Guangrong
  2010-04-14  3:58           ` Xiao Guangrong
@ 2010-04-14 16:35           ` Marcelo Tosatti
  1 sibling, 0 replies; 36+ messages in thread
From: Marcelo Tosatti @ 2010-04-14 16:35 UTC (permalink / raw)
  To: Xiao Guangrong; +Cc: Avi Kivity, KVM list, LKML

On Wed, Apr 14, 2010 at 11:23:38AM +0800, Xiao Guangrong wrote:
> 
> 
> Marcelo Tosatti wrote:
> 
> >>> I'd prefer to not touch it.
> >> This patch avoids walk all parents and i think this overload is really unnecessary.
> >> It has other tricks in this codepath but i not noticed? :-)
> > 
> > My point is that there is no point in optimizing something unless its
> > performance sensitive.
> 
> Hi Marcelo,
> 
> I think optimizing not only means 'performance' but also means 'smaller code'(maybe 'cleanup'
> is more suitable) and 'logic optimize'(do little things), i'm not sure this patch whether can
> improve system performance obviously but it optimize the code logic and reduce code size, and
> it not harm other code and system performance, right? :-)

Right, but this walking code already is compact and stable. Removing the
unused code variables/definitions is fine, but i'd prefer to not change
the logic just for the sake of code reduction.

> Actually, the origin code has a bug, the code segment in mmu_parent_walk():
> 
> |	if (!sp->multimapped && sp->parent_pte) {
> |		......
> |		return;
> |	}
> |	hlist_for_each_entry(pte_chain, node, &sp->parent_ptes, link)
> |		for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i) {
> |			......
> |		}
> 
> So, if sp->parent_pte == NULL, it's unsafe...
> 
> > And as i recall, mmu_unsync_walk was much more
> > sensitive performance wise than parent walking. Actually, gfn_to_memslot 
> > seems more important since its also noticeable on EPT/NPT hosts.
> 
> Yeah, i also noticed these and i'm looking into these code.

Great.


^ permalink raw reply	[flat|nested] 36+ messages in thread

end of thread, other threads:[~2010-04-14 16:59 UTC | newest]

Thread overview: 36+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-04-12  7:59 [PATCH 1/6] KVM MMU: remove unused struct Xiao Guangrong
2010-04-12  8:01 ` [PATCH 2/6] KVM MMU: fix kvm_mmu_zap_page() and its calling path Xiao Guangrong
2010-04-12  8:24   ` Avi Kivity
2010-04-12  8:53     ` Xiao Guangrong
2010-04-12  9:08       ` Avi Kivity
2010-04-12  9:22         ` Xiao Guangrong
2010-04-12 10:25           ` Avi Kivity
2010-04-12 12:22             ` Xiao Guangrong
2010-04-12 12:49               ` Avi Kivity
2010-04-12 17:10   ` Marcelo Tosatti
2010-04-13  1:34     ` Xiao Guangrong
2010-04-13 14:59       ` Marcelo Tosatti
2010-04-14  2:14         ` Xiao Guangrong
2010-04-14 16:31           ` Marcelo Tosatti
2010-04-12  8:02 ` [PATCH 3/6] KVM MMU: optimize/cleanup for marking parent unsync Xiao Guangrong
2010-04-12  8:32   ` Avi Kivity
2010-04-12  8:55     ` Xiao Guangrong
2010-04-12 17:12   ` Marcelo Tosatti
2010-04-13  1:53     ` Xiao Guangrong
2010-04-13 11:58       ` Avi Kivity
2010-04-13 15:01       ` Marcelo Tosatti
2010-04-14  3:23         ` Xiao Guangrong
2010-04-14  3:58           ` Xiao Guangrong
2010-04-14 16:35           ` Marcelo Tosatti
2010-04-12  8:03 ` [PATCH 4/6] KVM MMU: optimize for writing cr4 Xiao Guangrong
2010-04-12  8:34   ` Avi Kivity
2010-04-12 10:42     ` Xiao Guangrong
2010-04-12 11:22       ` Avi Kivity
2010-04-13  3:07         ` Xiao Guangrong
2010-04-13  6:42           ` Avi Kivity
2010-04-12  8:05 ` [PATCH 5/6] KVM MMU: reduce kvm_mmu_page size Xiao Guangrong
2010-04-12  8:36   ` Avi Kivity
2010-04-12 11:11     ` Xiao Guangrong
2010-04-12  8:06 ` [PATCH 6/6] KVM MMU: optimize synchronization shadow pages Xiao Guangrong
2010-04-12  8:43   ` Avi Kivity
2010-04-12 11:14     ` Xiao Guangrong

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.