linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v3 0/5] KVM: MMU: unify and cleanup the code of walking pte list
@ 2013-02-05  8:52 Xiao Guangrong
  2013-02-05  8:53 ` [PATCH v3 1/5] KVM: MMU: introduce mmu_spte_establish Xiao Guangrong
                   ` (5 more replies)
  0 siblings, 6 replies; 9+ messages in thread
From: Xiao Guangrong @ 2013-02-05  8:52 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: Gleb Natapov, LKML, KVM

Current code has two ways to walk pte_list, the one is pte_list_walk and
the another way is rmap_get_first and rmap_get_next, they have the same logic.
This patchset tries to unify the code and also make the code more tidy.

Patch 1: KVM: MMU: introduce mmu_spte_establish, which tries to eliminates
the different between walking parent pte list and rmap, prepare for the
later patch.

Patch 2: KVM: MMU: clarify the logic in kvm_set_pte_rmapp, which prepares for
the next patch, no logic changed.

Patch 3: KVM: MMU: unify the code of walking pte list, unify the walking code.

Patch 4: KVM: MMU: fix spte assertion, fix a minor bug and remove the duplicate
code.

Patch 5: KVM: MMU: fast drop all spte on the pte_list, optimize for dropping
all sptes on rmap and remove all the "goto restart" pattern introduced by
the Patch 3.

Marcelo, Gleb, please apply them after applying the patchset of
[PATCH v3 0/3] KVM: MMU: simple cleanups.

Changelog:
v3:
- address Gleb's comments, remove the remained "goto restart" in
  kvm_set_pte_rmapp
- improve the changelog

Thanks!


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH v3 1/5] KVM: MMU: introduce mmu_spte_establish
  2013-02-05  8:52 [PATCH v3 0/5] KVM: MMU: unify and cleanup the code of walking pte list Xiao Guangrong
@ 2013-02-05  8:53 ` Xiao Guangrong
  2013-02-08 21:35   ` Marcelo Tosatti
  2013-02-05  8:53 ` [PATCH v3 2/5] KVM: MMU: clarify the logic in kvm_set_pte_rmapp Xiao Guangrong
                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 9+ messages in thread
From: Xiao Guangrong @ 2013-02-05  8:53 UTC (permalink / raw)
  To: Xiao Guangrong; +Cc: Marcelo Tosatti, Gleb Natapov, LKML, KVM

There is little different between walking parent pte and walking ramp:
all spte in rmap must be present but this is not true on parent pte list,
in kvm_mmu_alloc_page, we always link the parent list before set the spte
to present

This patch introduce mmu_spte_establish which set the spte before linking
it to parent list to eliminates the different then it is possible to unify
the code of walking pte list

Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
---
 arch/x86/kvm/mmu.c         |   81 ++++++++++++++++++++++---------------------
 arch/x86/kvm/paging_tmpl.h |   16 ++++-----
 2 files changed, 48 insertions(+), 49 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 8041454..68d4d5f 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1482,9 +1482,6 @@ static unsigned kvm_page_table_hashfn(gfn_t gfn)
 static void mmu_page_add_parent_pte(struct kvm_vcpu *vcpu,
 				    struct kvm_mmu_page *sp, u64 *parent_pte)
 {
-	if (!parent_pte)
-		return;
-
 	pte_list_add(vcpu, parent_pte, &sp->parent_ptes);
 }

@@ -1502,7 +1499,7 @@ static void drop_parent_pte(struct kvm_mmu_page *sp,
 }

 static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
-					       u64 *parent_pte, int direct)
+					       int direct)
 {
 	struct kvm_mmu_page *sp;
 	sp = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache);
@@ -1512,7 +1509,6 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
 	set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
 	list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
 	sp->parent_ptes = 0;
-	mmu_page_add_parent_pte(vcpu, sp, parent_pte);
 	kvm_mod_used_mmu_pages(vcpu->kvm, +1);
 	return sp;
 }
@@ -1845,8 +1841,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 					     gva_t gaddr,
 					     unsigned level,
 					     int direct,
-					     unsigned access,
-					     u64 *parent_pte)
+					     unsigned access)
 {
 	union kvm_mmu_page_role role;
 	unsigned quadrant;
@@ -1876,19 +1871,15 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 		if (sp->unsync && kvm_sync_page_transient(vcpu, sp))
 			break;

-		mmu_page_add_parent_pte(vcpu, sp, parent_pte);
-		if (sp->unsync_children) {
+		if (sp->unsync_children)
 			kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
-			kvm_mmu_mark_parents_unsync(sp);
-		} else if (sp->unsync)
-			kvm_mmu_mark_parents_unsync(sp);

 		__clear_sp_write_flooding_count(sp);
 		trace_kvm_mmu_get_page(sp, false);
 		return sp;
 	}
 	++vcpu->kvm->stat.mmu_cache_miss;
-	sp = kvm_mmu_alloc_page(vcpu, parent_pte, direct);
+	sp = kvm_mmu_alloc_page(vcpu, direct);
 	if (!sp)
 		return sp;
 	sp->gfn = gfn;
@@ -1908,6 +1899,35 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 	return sp;
 }

+static void link_shadow_page(u64 *sptep, struct kvm_mmu_page *sp)
+{
+	u64 spte;
+
+	spte = __pa(sp->spt) | PT_PRESENT_MASK | PT_WRITABLE_MASK |
+	       shadow_user_mask | shadow_x_mask | shadow_accessed_mask;
+
+	mmu_spte_set(sptep, spte);
+}
+
+static struct kvm_mmu_page *
+mmu_spte_establish(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn, gva_t gaddr,
+		   unsigned level, int direct, unsigned access)
+{
+	struct kvm_mmu_page *sp;
+
+	WARN_ON(is_shadow_present_pte(*spte));
+
+	sp = kvm_mmu_get_page(vcpu, gfn, gaddr, level, direct, access);
+
+	link_shadow_page(spte, sp);
+	mmu_page_add_parent_pte(vcpu, sp, spte);
+
+	if (sp->unsync_children || sp->unsync)
+		kvm_mmu_mark_parents_unsync(sp);
+
+	return sp;
+}
+
 static void shadow_walk_init(struct kvm_shadow_walk_iterator *iterator,
 			     struct kvm_vcpu *vcpu, u64 addr)
 {
@@ -1957,16 +1977,6 @@ static void shadow_walk_next(struct kvm_shadow_walk_iterator *iterator)
 	return __shadow_walk_next(iterator, *iterator->sptep);
 }

-static void link_shadow_page(u64 *sptep, struct kvm_mmu_page *sp)
-{
-	u64 spte;
-
-	spte = __pa(sp->spt) | PT_PRESENT_MASK | PT_WRITABLE_MASK |
-	       shadow_user_mask | shadow_x_mask | shadow_accessed_mask;
-
-	mmu_spte_set(sptep, spte);
-}
-
 static void validate_direct_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 				   unsigned direct_access)
 {
@@ -2023,11 +2033,6 @@ static void kvm_mmu_page_unlink_children(struct kvm *kvm,
 		mmu_page_zap_pte(kvm, sp, sp->spt + i);
 }

-static void kvm_mmu_put_page(struct kvm_mmu_page *sp, u64 *parent_pte)
-{
-	mmu_page_remove_parent_pte(sp, parent_pte);
-}
-
 static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp)
 {
 	u64 *sptep;
@@ -2582,9 +2587,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
 			bool prefault)
 {
 	struct kvm_shadow_walk_iterator iterator;
-	struct kvm_mmu_page *sp;
 	int emulate = 0;
-	gfn_t pseudo_gfn;

 	for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) {
 		if (iterator.level == level) {
@@ -2602,12 +2605,11 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
 			u64 base_addr = iterator.addr;

 			base_addr &= PT64_LVL_ADDR_MASK(iterator.level);
-			pseudo_gfn = base_addr >> PAGE_SHIFT;
-			sp = kvm_mmu_get_page(vcpu, pseudo_gfn, iterator.addr,
-					      iterator.level - 1,
-					      1, ACC_ALL, iterator.sptep);
+			mmu_spte_establish(vcpu, iterator.sptep,
+					   gpa_to_gfn(base_addr),
+					   iterator.addr, iterator.level - 1,
+					   1, ACC_ALL);

-			link_shadow_page(iterator.sptep, sp);
 		}
 	}
 	return emulate;
@@ -2926,7 +2928,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
 		spin_lock(&vcpu->kvm->mmu_lock);
 		kvm_mmu_free_some_pages(vcpu);
 		sp = kvm_mmu_get_page(vcpu, 0, 0, PT64_ROOT_LEVEL,
-				      1, ACC_ALL, NULL);
+				      1, ACC_ALL);
 		++sp->root_count;
 		spin_unlock(&vcpu->kvm->mmu_lock);
 		vcpu->arch.mmu.root_hpa = __pa(sp->spt);
@@ -2939,8 +2941,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
 			kvm_mmu_free_some_pages(vcpu);
 			sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT),
 					      i << 30,
-					      PT32_ROOT_LEVEL, 1, ACC_ALL,
-					      NULL);
+					      PT32_ROOT_LEVEL, 1, ACC_ALL);
 			root = __pa(sp->spt);
 			++sp->root_count;
 			spin_unlock(&vcpu->kvm->mmu_lock);
@@ -2977,7 +2978,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 		spin_lock(&vcpu->kvm->mmu_lock);
 		kvm_mmu_free_some_pages(vcpu);
 		sp = kvm_mmu_get_page(vcpu, root_gfn, 0, PT64_ROOT_LEVEL,
-				      0, ACC_ALL, NULL);
+				      0, ACC_ALL);
 		root = __pa(sp->spt);
 		++sp->root_count;
 		spin_unlock(&vcpu->kvm->mmu_lock);
@@ -3012,7 +3013,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 		kvm_mmu_free_some_pages(vcpu);
 		sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30,
 				      PT32_ROOT_LEVEL, 0,
-				      ACC_ALL, NULL);
+				      ACC_ALL);
 		root = __pa(sp->spt);
 		++sp->root_count;
 		spin_unlock(&vcpu->kvm->mmu_lock);
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 105dd5b..3605ff7 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -434,8 +434,9 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
 		sp = NULL;
 		if (!is_shadow_present_pte(*it.sptep)) {
 			table_gfn = gw->table_gfn[it.level - 2];
-			sp = kvm_mmu_get_page(vcpu, table_gfn, addr, it.level-1,
-					      false, access, it.sptep);
+			sp = mmu_spte_establish(vcpu, it.sptep, table_gfn,
+						addr, it.level-1, false,
+						access);
 		}

 		/*
@@ -444,9 +445,6 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
 		 */
 		if (FNAME(gpte_changed)(vcpu, gw, it.level - 1))
 			goto out_gpte_changed;
-
-		if (sp)
-			link_shadow_page(it.sptep, sp);
 	}

 	for (;
@@ -464,9 +462,8 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,

 		direct_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);

-		sp = kvm_mmu_get_page(vcpu, direct_gfn, addr, it.level-1,
-				      true, direct_access, it.sptep);
-		link_shadow_page(it.sptep, sp);
+		mmu_spte_establish(vcpu, it.sptep, direct_gfn, addr,
+				   it.level-1, true, direct_access);
 	}

 	clear_sp_write_flooding_count(it.sptep);
@@ -478,7 +475,8 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,

 out_gpte_changed:
 	if (sp)
-		kvm_mmu_put_page(sp, it.sptep);
+		drop_parent_pte(sp, it.sptep);
+
 	kvm_release_pfn_clean(pfn);
 	return 0;
 }
-- 
1.7.7.6


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH v3 2/5] KVM: MMU: clarify the logic in kvm_set_pte_rmapp
  2013-02-05  8:52 [PATCH v3 0/5] KVM: MMU: unify and cleanup the code of walking pte list Xiao Guangrong
  2013-02-05  8:53 ` [PATCH v3 1/5] KVM: MMU: introduce mmu_spte_establish Xiao Guangrong
@ 2013-02-05  8:53 ` Xiao Guangrong
  2013-02-05  8:54 ` [PATCH v3 3/5] KVM: MMU: unify the code of walking pte list Xiao Guangrong
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 9+ messages in thread
From: Xiao Guangrong @ 2013-02-05  8:53 UTC (permalink / raw)
  To: Xiao Guangrong; +Cc: Marcelo Tosatti, Gleb Natapov, LKML, KVM

In kvm_set_pte_rmapp, if the new mapping is writable, we need to remove
all spte pointing to that page otherwisewe we only need to adjust the sptes
to let them point to the new page. This patch clarifys the logic and makes
the later patch more clean

[ Impact: no logic changed ]

Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
---
 arch/x86/kvm/mmu.c |   17 ++++++++---------
 1 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 68d4d5f..a0dc0d7 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1225,16 +1225,16 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
 	WARN_ON(pte_huge(*ptep));
 	new_pfn = pte_pfn(*ptep);

-	for (sptep = rmap_get_first(*rmapp, &iter); sptep;) {
-		BUG_ON(!is_shadow_present_pte(*sptep));
-		rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", sptep, *sptep);
+	if (pte_write(*ptep))
+		need_flush = kvm_unmap_rmapp(kvm, rmapp, slot, data);
+	else
+		for (sptep = rmap_get_first(*rmapp, &iter); sptep;) {
+			BUG_ON(!is_shadow_present_pte(*sptep));
+			rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n",
+				    sptep, *sptep);

-		need_flush = 1;
+			need_flush = 1;

-		if (pte_write(*ptep)) {
-			drop_spte(kvm, sptep);
-			sptep = rmap_get_first(*rmapp, &iter);
-		} else {
 			new_spte = *sptep & ~PT64_BASE_ADDR_MASK;
 			new_spte |= (u64)new_pfn << PAGE_SHIFT;

@@ -1246,7 +1246,6 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
 			mmu_spte_set(sptep, new_spte);
 			sptep = rmap_get_next(&iter);
 		}
-	}

 	if (need_flush)
 		kvm_flush_remote_tlbs(kvm);
-- 
1.7.7.6


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH v3 3/5] KVM: MMU: unify the code of walking pte list
  2013-02-05  8:52 [PATCH v3 0/5] KVM: MMU: unify and cleanup the code of walking pte list Xiao Guangrong
  2013-02-05  8:53 ` [PATCH v3 1/5] KVM: MMU: introduce mmu_spte_establish Xiao Guangrong
  2013-02-05  8:53 ` [PATCH v3 2/5] KVM: MMU: clarify the logic in kvm_set_pte_rmapp Xiao Guangrong
@ 2013-02-05  8:54 ` Xiao Guangrong
  2013-02-05  8:54 ` [PATCH v3 4/5] KVM: MMU: fix spte assertion Xiao Guangrong
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 9+ messages in thread
From: Xiao Guangrong @ 2013-02-05  8:54 UTC (permalink / raw)
  To: Xiao Guangrong; +Cc: Marcelo Tosatti, Gleb Natapov, LKML, KVM

Current code has two ways to walk pte_list, the one is pte_list_walk and
the another way is rmap_get_first and rmap_get_next, they have the same logic.

This patch introduces for_each_spte_in_pte_list to integrate their code

[ Impact: no logic changed, most of the change is function/struct rename ]

Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
---
 arch/x86/kvm/mmu.c       |  178 ++++++++++++++++++++++------------------------
 arch/x86/kvm/mmu_audit.c |    5 +-
 2 files changed, 86 insertions(+), 97 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index a0dc0d7..2291ea3 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -945,26 +945,75 @@ static void pte_list_remove(u64 *spte, unsigned long *pte_list)
 	}
 }

-typedef void (*pte_list_walk_fn) (u64 *spte);
-static void pte_list_walk(unsigned long *pte_list, pte_list_walk_fn fn)
+/*
+ * Used by the following functions to iterate through the sptes linked by a
+ * pte_list.  All fields are private and not assumed to be used outside.
+ */
+struct pte_list_iterator {
+	/* private fields */
+	struct pte_list_desc *desc;	/* holds the sptep if not NULL */
+	int pos;			/* index of the sptep */
+};
+
+/*
+ * Iteration must be started by this function.  This should also be used after
+ * removing/dropping sptes from the pte_list link because in such cases the
+ * information in the itererator may not be valid.
+ *
+ * Returns sptep if found, NULL otherwise.
+ */
+static u64 *pte_list_get_first(unsigned long pte_list,
+			       struct pte_list_iterator *iter)
 {
-	struct pte_list_desc *desc;
-	int i;
+	if (!pte_list)
+		return NULL;

-	if (!*pte_list)
-		return;
+	if (!(pte_list & 1)) {
+		iter->desc = NULL;
+		return (u64 *)pte_list;
+	}
+
+	iter->desc = (struct pte_list_desc *)(pte_list & ~1ul);
+	iter->pos = 0;
+	return iter->desc->sptes[iter->pos];
+}
+
+/*
+ * Must be used with a valid iterator: e.g. after pte_list_get_next().
+ *
+ * Returns sptep if found, NULL otherwise.
+ */
+static u64 *pte_list_get_next(struct pte_list_iterator *iter)
+{
+	if (iter->desc) {
+		if (iter->pos < PTE_LIST_EXT - 1) {
+			u64 *sptep;
+
+			++iter->pos;
+			sptep = iter->desc->sptes[iter->pos];
+			if (sptep)
+				return sptep;
+		}

-	if (!(*pte_list & 1))
-		return fn((u64 *)*pte_list);
+		iter->desc = iter->desc->more;

-	desc = (struct pte_list_desc *)(*pte_list & ~1ul);
-	while (desc) {
-		for (i = 0; i < PTE_LIST_EXT && desc->sptes[i]; ++i)
-			fn(desc->sptes[i]);
-		desc = desc->more;
+		if (iter->desc) {
+			iter->pos = 0;
+			/* desc->sptes[0] cannot be NULL */
+			return iter->desc->sptes[iter->pos];
+		}
 	}
+
+	return NULL;
 }

+#define for_each_spte_in_pte_list(pte_list, iter, spte)		\
+	   for (spte = pte_list_get_first(pte_list, &(iter));	\
+	      spte != NULL; spte = pte_list_get_next(&(iter)))
+
+#define for_each_spte_in_rmap(rmap, iter, spte)			\
+	   for_each_spte_in_pte_list(rmap, iter, spte)
+
 static unsigned long *__gfn_to_rmap(gfn_t gfn, int level,
 				    struct kvm_memory_slot *slot)
 {
@@ -1016,67 +1065,6 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
 	pte_list_remove(spte, rmapp);
 }

-/*
- * Used by the following functions to iterate through the sptes linked by a
- * rmap.  All fields are private and not assumed to be used outside.
- */
-struct rmap_iterator {
-	/* private fields */
-	struct pte_list_desc *desc;	/* holds the sptep if not NULL */
-	int pos;			/* index of the sptep */
-};
-
-/*
- * Iteration must be started by this function.  This should also be used after
- * removing/dropping sptes from the rmap link because in such cases the
- * information in the itererator may not be valid.
- *
- * Returns sptep if found, NULL otherwise.
- */
-static u64 *rmap_get_first(unsigned long rmap, struct rmap_iterator *iter)
-{
-	if (!rmap)
-		return NULL;
-
-	if (!(rmap & 1)) {
-		iter->desc = NULL;
-		return (u64 *)rmap;
-	}
-
-	iter->desc = (struct pte_list_desc *)(rmap & ~1ul);
-	iter->pos = 0;
-	return iter->desc->sptes[iter->pos];
-}
-
-/*
- * Must be used with a valid iterator: e.g. after rmap_get_first().
- *
- * Returns sptep if found, NULL otherwise.
- */
-static u64 *rmap_get_next(struct rmap_iterator *iter)
-{
-	if (iter->desc) {
-		if (iter->pos < PTE_LIST_EXT - 1) {
-			u64 *sptep;
-
-			++iter->pos;
-			sptep = iter->desc->sptes[iter->pos];
-			if (sptep)
-				return sptep;
-		}
-
-		iter->desc = iter->desc->more;
-
-		if (iter->desc) {
-			iter->pos = 0;
-			/* desc->sptes[0] cannot be NULL */
-			return iter->desc->sptes[iter->pos];
-		}
-	}
-
-	return NULL;
-}
-
 static void drop_spte(struct kvm *kvm, u64 *sptep)
 {
 	if (mmu_spte_clear_track_bits(sptep))
@@ -1137,14 +1125,13 @@ static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp,
 				 bool pt_protect)
 {
 	u64 *sptep;
-	struct rmap_iterator iter;
+	struct pte_list_iterator iter;
 	bool flush = false;

-	for (sptep = rmap_get_first(*rmapp, &iter); sptep;) {
+	for_each_spte_in_rmap(*rmapp, iter, sptep) {
 		BUG_ON(!(*sptep & PT_PRESENT_MASK));

 		flush |= spte_write_protect(kvm, sptep, pt_protect);
-		sptep = rmap_get_next(&iter);
 	}

 	return flush;
@@ -1198,15 +1185,14 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
 			   struct kvm_memory_slot *slot, unsigned long data)
 {
 	u64 *sptep;
-	struct rmap_iterator iter;
+	struct pte_list_iterator iter;
 	int need_tlb_flush = 0;

-	while ((sptep = rmap_get_first(*rmapp, &iter))) {
-		BUG_ON(!(*sptep & PT_PRESENT_MASK));
-		rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", sptep, *sptep);
-
+restart:
+	for_each_spte_in_rmap(*rmapp, iter, sptep) {
 		drop_spte(kvm, sptep);
 		need_tlb_flush = 1;
+		goto restart;
 	}

 	return need_tlb_flush;
@@ -1216,7 +1202,7 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
 			     struct kvm_memory_slot *slot, unsigned long data)
 {
 	u64 *sptep;
-	struct rmap_iterator iter;
+	struct pte_list_iterator iter;
 	int need_flush = 0;
 	u64 new_spte;
 	pte_t *ptep = (pte_t *)data;
@@ -1228,7 +1214,7 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
 	if (pte_write(*ptep))
 		need_flush = kvm_unmap_rmapp(kvm, rmapp, slot, data);
 	else
-		for (sptep = rmap_get_first(*rmapp, &iter); sptep;) {
+		for_each_spte_in_rmap(*rmapp, iter, sptep) {
 			BUG_ON(!is_shadow_present_pte(*sptep));
 			rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n",
 				    sptep, *sptep);
@@ -1244,7 +1230,6 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,

 			mmu_spte_clear_track_bits(sptep);
 			mmu_spte_set(sptep, new_spte);
-			sptep = rmap_get_next(&iter);
 		}

 	if (need_flush)
@@ -1335,7 +1320,7 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
 			 struct kvm_memory_slot *slot, unsigned long data)
 {
 	u64 *sptep;
-	struct rmap_iterator uninitialized_var(iter);
+	struct pte_list_iterator uninitialized_var(iter);
 	int young = 0;

 	/*
@@ -1351,8 +1336,7 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
 		goto out;
 	}

-	for (sptep = rmap_get_first(*rmapp, &iter); sptep;
-	     sptep = rmap_get_next(&iter)) {
+	for_each_spte_in_rmap(*rmapp, iter, sptep) {
 		BUG_ON(!is_shadow_present_pte(*sptep));

 		if (*sptep & shadow_accessed_mask) {
@@ -1371,7 +1355,7 @@ static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
 			      struct kvm_memory_slot *slot, unsigned long data)
 {
 	u64 *sptep;
-	struct rmap_iterator iter;
+	struct pte_list_iterator iter;
 	int young = 0;

 	/*
@@ -1382,8 +1366,7 @@ static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
 	if (!shadow_accessed_mask)
 		goto out;

-	for (sptep = rmap_get_first(*rmapp, &iter); sptep;
-	     sptep = rmap_get_next(&iter)) {
+	for_each_spte_in_rmap(*rmapp, iter, sptep) {
 		BUG_ON(!is_shadow_present_pte(*sptep));

 		if (*sptep & shadow_accessed_mask) {
@@ -1515,7 +1498,11 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
 static void mark_unsync(u64 *spte);
 static void kvm_mmu_mark_parents_unsync(struct kvm_mmu_page *sp)
 {
-	pte_list_walk(&sp->parent_ptes, mark_unsync);
+	struct pte_list_iterator iter;
+	u64 *spte;
+
+	for_each_spte_in_pte_list(sp->parent_ptes, iter, spte)
+		mark_unsync(spte);
 }

 static void mark_unsync(u64 *spte)
@@ -2035,10 +2022,13 @@ static void kvm_mmu_page_unlink_children(struct kvm *kvm,
 static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp)
 {
 	u64 *sptep;
-	struct rmap_iterator iter;
+	struct pte_list_iterator iter;

-	while ((sptep = rmap_get_first(sp->parent_ptes, &iter)))
+restart:
+	for_each_spte_in_pte_list(sp->parent_ptes, iter, sptep) {
 		drop_parent_pte(sp, sptep);
+		goto restart;
+	}
 }

 static int mmu_zap_unsync_children(struct kvm *kvm,
diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c
index daff69e..a08d384 100644
--- a/arch/x86/kvm/mmu_audit.c
+++ b/arch/x86/kvm/mmu_audit.c
@@ -190,15 +190,14 @@ static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp)
 {
 	unsigned long *rmapp;
 	u64 *sptep;
-	struct rmap_iterator iter;
+	struct pte_list_iterator iter;

 	if (sp->role.direct || sp->unsync || sp->role.invalid)
 		return;

 	rmapp = gfn_to_rmap(kvm, sp->gfn, PT_PAGE_TABLE_LEVEL);

-	for (sptep = rmap_get_first(*rmapp, &iter); sptep;
-	     sptep = rmap_get_next(&iter)) {
+	for_each_spte_in_rmap(*rmapp, iter, sptep) {
 		if (is_writable_pte(*sptep))
 			audit_printk(kvm, "shadow page has writable "
 				     "mappings: gfn %llx role %x\n",
-- 
1.7.7.6


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH v3 4/5] KVM: MMU: fix spte assertion
  2013-02-05  8:52 [PATCH v3 0/5] KVM: MMU: unify and cleanup the code of walking pte list Xiao Guangrong
                   ` (2 preceding siblings ...)
  2013-02-05  8:54 ` [PATCH v3 3/5] KVM: MMU: unify the code of walking pte list Xiao Guangrong
@ 2013-02-05  8:54 ` Xiao Guangrong
  2013-02-05  8:55 ` [PATCH v3 5/5] KVM: MMU: fast drop all spte on the pte_list Xiao Guangrong
  2013-02-05 13:04 ` [PATCH v3 0/5] KVM: MMU: unify and cleanup the code of walking pte list Gleb Natapov
  5 siblings, 0 replies; 9+ messages in thread
From: Xiao Guangrong @ 2013-02-05  8:54 UTC (permalink / raw)
  To: Xiao Guangrong; +Cc: Marcelo Tosatti, Gleb Natapov, LKML, KVM

PT_PRESENT_MASK bit is not enough to see the spte has already been mapped
into pte-list for mmio spte also set this bit. Use is_shadow_present_pte
instead to fix it

Also, this patch move many assertions to the common place to clean up the
code

Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
---
 arch/x86/kvm/mmu.c |   20 ++++++--------------
 1 files changed, 6 insertions(+), 14 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 2291ea3..58f813a 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1009,7 +1009,9 @@ static u64 *pte_list_get_next(struct pte_list_iterator *iter)

 #define for_each_spte_in_pte_list(pte_list, iter, spte)		\
 	   for (spte = pte_list_get_first(pte_list, &(iter));	\
-	      spte != NULL; spte = pte_list_get_next(&(iter)))
+	      spte != NULL &&					\
+	      ({WARN_ON(!is_shadow_present_pte(*(spte))); 1; });\
+		   spte = pte_list_get_next(&iter))

 #define for_each_spte_in_rmap(rmap, iter, spte)			\
 	   for_each_spte_in_pte_list(rmap, iter, spte)
@@ -1128,11 +1130,8 @@ static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp,
 	struct pte_list_iterator iter;
 	bool flush = false;

-	for_each_spte_in_rmap(*rmapp, iter, sptep) {
-		BUG_ON(!(*sptep & PT_PRESENT_MASK));
-
+	for_each_spte_in_rmap(*rmapp, iter, sptep)
 		flush |= spte_write_protect(kvm, sptep, pt_protect);
-	}

 	return flush;
 }
@@ -1215,7 +1214,6 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
 		need_flush = kvm_unmap_rmapp(kvm, rmapp, slot, data);
 	else
 		for_each_spte_in_rmap(*rmapp, iter, sptep) {
-			BUG_ON(!is_shadow_present_pte(*sptep));
 			rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n",
 				    sptep, *sptep);

@@ -1336,15 +1334,12 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
 		goto out;
 	}

-	for_each_spte_in_rmap(*rmapp, iter, sptep) {
-		BUG_ON(!is_shadow_present_pte(*sptep));
-
+	for_each_spte_in_rmap(*rmapp, iter, sptep)
 		if (*sptep & shadow_accessed_mask) {
 			young = 1;
 			clear_bit((ffs(shadow_accessed_mask) - 1),
 				 (unsigned long *)sptep);
 		}
-	}
 out:
 	/* @data has hva passed to kvm_age_hva(). */
 	trace_kvm_age_page(data, slot, young);
@@ -1366,14 +1361,11 @@ static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
 	if (!shadow_accessed_mask)
 		goto out;

-	for_each_spte_in_rmap(*rmapp, iter, sptep) {
-		BUG_ON(!is_shadow_present_pte(*sptep));
-
+	for_each_spte_in_rmap(*rmapp, iter, sptep)
 		if (*sptep & shadow_accessed_mask) {
 			young = 1;
 			break;
 		}
-	}
 out:
 	return young;
 }
-- 
1.7.7.6


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH v3 5/5] KVM: MMU: fast drop all spte on the pte_list
  2013-02-05  8:52 [PATCH v3 0/5] KVM: MMU: unify and cleanup the code of walking pte list Xiao Guangrong
                   ` (3 preceding siblings ...)
  2013-02-05  8:54 ` [PATCH v3 4/5] KVM: MMU: fix spte assertion Xiao Guangrong
@ 2013-02-05  8:55 ` Xiao Guangrong
  2013-02-08 21:48   ` Marcelo Tosatti
  2013-02-05 13:04 ` [PATCH v3 0/5] KVM: MMU: unify and cleanup the code of walking pte list Gleb Natapov
  5 siblings, 1 reply; 9+ messages in thread
From: Xiao Guangrong @ 2013-02-05  8:55 UTC (permalink / raw)
  To: Xiao Guangrong; +Cc: Marcelo Tosatti, Gleb Natapov, LKML, KVM

If a shadow page is being zapped or a host page is going to be freed, kvm
will drop all the reverse-mappings on the shadow page or the gfn. Currently,
it drops the reverse-mapping one by one - it deletes the first reverse mapping,
then moves other reverse-mapping between the description-table. When the
last description-table become empty, it will be freed.

It works well if we only have a few reverse-mappings, but some pte_lists are
very long, during my tracking, i saw some gfns have more than 200 sptes listed
on its pte-list (1G memory in guest on softmmu). Optimization for dropping such
long pte-list is worthwhile, at lease it is good for deletion memslots and
ksm/thp merge pages.

This patch introduce a better way to optimize for this case, it walks all the
reverse-mappings and clear them, then free all description-tables together.

Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
---
 arch/x86/kvm/mmu.c |   36 +++++++++++++++++++++++++++---------
 1 files changed, 27 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 58f813a..aa7a887 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -945,6 +945,25 @@ static void pte_list_remove(u64 *spte, unsigned long *pte_list)
 	}
 }

+static void pte_list_destroy(unsigned long *pte_list)
+{
+	struct pte_list_desc *desc;
+	unsigned long list_value = *pte_list;
+
+	*pte_list = 0;
+
+	if (!(list_value & 1))
+		return;
+
+	desc = (struct pte_list_desc *)(list_value & ~1ul);
+	while (desc) {
+		struct pte_list_desc *next_desc = desc->more;
+
+		mmu_free_pte_list_desc(desc);
+		desc = next_desc;
+	}
+}
+
 /*
  * Used by the following functions to iterate through the sptes linked by a
  * pte_list.  All fields are private and not assumed to be used outside.
@@ -1183,17 +1202,17 @@ static bool rmap_write_protect(struct kvm *kvm, u64 gfn)
 static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
 			   struct kvm_memory_slot *slot, unsigned long data)
 {
-	u64 *sptep;
 	struct pte_list_iterator iter;
+	u64 *sptep;
 	int need_tlb_flush = 0;

-restart:
 	for_each_spte_in_rmap(*rmapp, iter, sptep) {
-		drop_spte(kvm, sptep);
+		mmu_spte_clear_track_bits(sptep);
 		need_tlb_flush = 1;
-		goto restart;
 	}

+	pte_list_destroy(rmapp);
+
 	return need_tlb_flush;
 }

@@ -2016,11 +2035,10 @@ static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp)
 	u64 *sptep;
 	struct pte_list_iterator iter;

-restart:
-	for_each_spte_in_pte_list(sp->parent_ptes, iter, sptep) {
-		drop_parent_pte(sp, sptep);
-		goto restart;
-	}
+	for_each_spte_in_pte_list(sp->parent_ptes, iter, sptep)
+		mmu_spte_clear_no_track(sptep);
+
+	pte_list_destroy(&sp->parent_ptes);
 }

 static int mmu_zap_unsync_children(struct kvm *kvm,
-- 
1.7.7.6


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH v3 0/5] KVM: MMU: unify and cleanup the code of walking pte list
  2013-02-05  8:52 [PATCH v3 0/5] KVM: MMU: unify and cleanup the code of walking pte list Xiao Guangrong
                   ` (4 preceding siblings ...)
  2013-02-05  8:55 ` [PATCH v3 5/5] KVM: MMU: fast drop all spte on the pte_list Xiao Guangrong
@ 2013-02-05 13:04 ` Gleb Natapov
  5 siblings, 0 replies; 9+ messages in thread
From: Gleb Natapov @ 2013-02-05 13:04 UTC (permalink / raw)
  To: Xiao Guangrong; +Cc: Marcelo Tosatti, LKML, KVM

On Tue, Feb 05, 2013 at 04:52:35PM +0800, Xiao Guangrong wrote:
> Current code has two ways to walk pte_list, the one is pte_list_walk and
> the another way is rmap_get_first and rmap_get_next, they have the same logic.
> This patchset tries to unify the code and also make the code more tidy.
> 
> Patch 1: KVM: MMU: introduce mmu_spte_establish, which tries to eliminates
> the different between walking parent pte list and rmap, prepare for the
> later patch.
> 
> Patch 2: KVM: MMU: clarify the logic in kvm_set_pte_rmapp, which prepares for
> the next patch, no logic changed.
> 
> Patch 3: KVM: MMU: unify the code of walking pte list, unify the walking code.
> 
> Patch 4: KVM: MMU: fix spte assertion, fix a minor bug and remove the duplicate
> code.
> 
> Patch 5: KVM: MMU: fast drop all spte on the pte_list, optimize for dropping
> all sptes on rmap and remove all the "goto restart" pattern introduced by
> the Patch 3.
> 
> Marcelo, Gleb, please apply them after applying the patchset of
> [PATCH v3 0/3] KVM: MMU: simple cleanups.
> 
> Changelog:
> v3:
> - address Gleb's comments, remove the remained "goto restart" in
>   kvm_set_pte_rmapp
> - improve the changelog
> 
Reviewed-by: Gleb Natapov <gleb@redhat.com>

--
			Gleb.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v3 1/5] KVM: MMU: introduce mmu_spte_establish
  2013-02-05  8:53 ` [PATCH v3 1/5] KVM: MMU: introduce mmu_spte_establish Xiao Guangrong
@ 2013-02-08 21:35   ` Marcelo Tosatti
  0 siblings, 0 replies; 9+ messages in thread
From: Marcelo Tosatti @ 2013-02-08 21:35 UTC (permalink / raw)
  To: Xiao Guangrong; +Cc: Gleb Natapov, LKML, KVM

On Tue, Feb 05, 2013 at 04:53:19PM +0800, Xiao Guangrong wrote:
> There is little different between walking parent pte and walking ramp:
> all spte in rmap must be present but this is not true on parent pte list,
> in kvm_mmu_alloc_page, we always link the parent list before set the spte
> to present
> 
> This patch introduce mmu_spte_establish which set the spte before linking
> it to parent list to eliminates the different then it is possible to unify
> the code of walking pte list
> 
> Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
> ---
>  arch/x86/kvm/mmu.c         |   81 ++++++++++++++++++++++---------------------
>  arch/x86/kvm/paging_tmpl.h |   16 ++++-----
>  2 files changed, 48 insertions(+), 49 deletions(-)
> 
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index 8041454..68d4d5f 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -1482,9 +1482,6 @@ static unsigned kvm_page_table_hashfn(gfn_t gfn)
>  static void mmu_page_add_parent_pte(struct kvm_vcpu *vcpu,
>  				    struct kvm_mmu_page *sp, u64 *parent_pte)
>  {
> -	if (!parent_pte)
> -		return;
> -
>  	pte_list_add(vcpu, parent_pte, &sp->parent_ptes);
>  }
> 
> @@ -1502,7 +1499,7 @@ static void drop_parent_pte(struct kvm_mmu_page *sp,
>  }
> 
>  static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
> -					       u64 *parent_pte, int direct)
> +					       int direct)
>  {
>  	struct kvm_mmu_page *sp;
>  	sp = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache);
> @@ -1512,7 +1509,6 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
>  	set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
>  	list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
>  	sp->parent_ptes = 0;
> -	mmu_page_add_parent_pte(vcpu, sp, parent_pte);
>  	kvm_mod_used_mmu_pages(vcpu->kvm, +1);
>  	return sp;
>  }
> @@ -1845,8 +1841,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
>  					     gva_t gaddr,
>  					     unsigned level,
>  					     int direct,
> -					     unsigned access,
> -					     u64 *parent_pte)
> +					     unsigned access)
>  {
>  	union kvm_mmu_page_role role;
>  	unsigned quadrant;
> @@ -1876,19 +1871,15 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
>  		if (sp->unsync && kvm_sync_page_transient(vcpu, sp))
>  			break;
> 
> -		mmu_page_add_parent_pte(vcpu, sp, parent_pte);
> -		if (sp->unsync_children) {
> +		if (sp->unsync_children)
>  			kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
> -			kvm_mmu_mark_parents_unsync(sp);
> -		} else if (sp->unsync)
> -			kvm_mmu_mark_parents_unsync(sp);
> 
>  		__clear_sp_write_flooding_count(sp);
>  		trace_kvm_mmu_get_page(sp, false);
>  		return sp;
>  	}
>  	++vcpu->kvm->stat.mmu_cache_miss;
> -	sp = kvm_mmu_alloc_page(vcpu, parent_pte, direct);
> +	sp = kvm_mmu_alloc_page(vcpu, direct);
>  	if (!sp)
>  		return sp;
>  	sp->gfn = gfn;
> @@ -1908,6 +1899,35 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
>  	return sp;
>  }
> 
> +static void link_shadow_page(u64 *sptep, struct kvm_mmu_page *sp)
> +{
> +	u64 spte;
> +
> +	spte = __pa(sp->spt) | PT_PRESENT_MASK | PT_WRITABLE_MASK |
> +	       shadow_user_mask | shadow_x_mask | shadow_accessed_mask;
> +
> +	mmu_spte_set(sptep, spte);
> +}
> +
> +static struct kvm_mmu_page *
> +mmu_spte_establish(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn, gva_t gaddr,
> +		   unsigned level, int direct, unsigned access)
> +{
> +	struct kvm_mmu_page *sp;
> +
> +	WARN_ON(is_shadow_present_pte(*spte));
> +
> +	sp = kvm_mmu_get_page(vcpu, gfn, gaddr, level, direct, access);
> +
> +	link_shadow_page(spte, sp);
> +	mmu_page_add_parent_pte(vcpu, sp, spte);
> +
> +	if (sp->unsync_children || sp->unsync)
> +		kvm_mmu_mark_parents_unsync(sp);
> +
> +	return sp;
> +}
> +
>  static void shadow_walk_init(struct kvm_shadow_walk_iterator *iterator,
>  			     struct kvm_vcpu *vcpu, u64 addr)
>  {
> @@ -1957,16 +1977,6 @@ static void shadow_walk_next(struct kvm_shadow_walk_iterator *iterator)
>  	return __shadow_walk_next(iterator, *iterator->sptep);
>  }
> 
> -static void link_shadow_page(u64 *sptep, struct kvm_mmu_page *sp)
> -{
> -	u64 spte;
> -
> -	spte = __pa(sp->spt) | PT_PRESENT_MASK | PT_WRITABLE_MASK |
> -	       shadow_user_mask | shadow_x_mask | shadow_accessed_mask;
> -
> -	mmu_spte_set(sptep, spte);
> -}
> -
>  static void validate_direct_spte(struct kvm_vcpu *vcpu, u64 *sptep,
>  				   unsigned direct_access)
>  {
> @@ -2023,11 +2033,6 @@ static void kvm_mmu_page_unlink_children(struct kvm *kvm,
>  		mmu_page_zap_pte(kvm, sp, sp->spt + i);
>  }
> 
> -static void kvm_mmu_put_page(struct kvm_mmu_page *sp, u64 *parent_pte)
> -{
> -	mmu_page_remove_parent_pte(sp, parent_pte);
> -}
> -
>  static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp)
>  {
>  	u64 *sptep;
> @@ -2582,9 +2587,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
>  			bool prefault)
>  {
>  	struct kvm_shadow_walk_iterator iterator;
> -	struct kvm_mmu_page *sp;
>  	int emulate = 0;
> -	gfn_t pseudo_gfn;
> 
>  	for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) {
>  		if (iterator.level == level) {
> @@ -2602,12 +2605,11 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
>  			u64 base_addr = iterator.addr;
> 
>  			base_addr &= PT64_LVL_ADDR_MASK(iterator.level);
> -			pseudo_gfn = base_addr >> PAGE_SHIFT;
> -			sp = kvm_mmu_get_page(vcpu, pseudo_gfn, iterator.addr,
> -					      iterator.level - 1,
> -					      1, ACC_ALL, iterator.sptep);
> +			mmu_spte_establish(vcpu, iterator.sptep,
> +					   gpa_to_gfn(base_addr),
> +					   iterator.addr, iterator.level - 1,
> +					   1, ACC_ALL);
> 
> -			link_shadow_page(iterator.sptep, sp);
>  		}
>  	}
>  	return emulate;
> @@ -2926,7 +2928,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
>  		spin_lock(&vcpu->kvm->mmu_lock);
>  		kvm_mmu_free_some_pages(vcpu);
>  		sp = kvm_mmu_get_page(vcpu, 0, 0, PT64_ROOT_LEVEL,
> -				      1, ACC_ALL, NULL);
> +				      1, ACC_ALL);
>  		++sp->root_count;
>  		spin_unlock(&vcpu->kvm->mmu_lock);
>  		vcpu->arch.mmu.root_hpa = __pa(sp->spt);
> @@ -2939,8 +2941,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
>  			kvm_mmu_free_some_pages(vcpu);
>  			sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT),
>  					      i << 30,
> -					      PT32_ROOT_LEVEL, 1, ACC_ALL,
> -					      NULL);
> +					      PT32_ROOT_LEVEL, 1, ACC_ALL);
>  			root = __pa(sp->spt);
>  			++sp->root_count;
>  			spin_unlock(&vcpu->kvm->mmu_lock);
> @@ -2977,7 +2978,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
>  		spin_lock(&vcpu->kvm->mmu_lock);
>  		kvm_mmu_free_some_pages(vcpu);
>  		sp = kvm_mmu_get_page(vcpu, root_gfn, 0, PT64_ROOT_LEVEL,
> -				      0, ACC_ALL, NULL);
> +				      0, ACC_ALL);
>  		root = __pa(sp->spt);
>  		++sp->root_count;
>  		spin_unlock(&vcpu->kvm->mmu_lock);
> @@ -3012,7 +3013,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
>  		kvm_mmu_free_some_pages(vcpu);
>  		sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30,
>  				      PT32_ROOT_LEVEL, 0,
> -				      ACC_ALL, NULL);
> +				      ACC_ALL);
>  		root = __pa(sp->spt);
>  		++sp->root_count;
>  		spin_unlock(&vcpu->kvm->mmu_lock);
> diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
> index 105dd5b..3605ff7 100644
> --- a/arch/x86/kvm/paging_tmpl.h
> +++ b/arch/x86/kvm/paging_tmpl.h
> @@ -434,8 +434,9 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
>  		sp = NULL;
>  		if (!is_shadow_present_pte(*it.sptep)) {
>  			table_gfn = gw->table_gfn[it.level - 2];
> -			sp = kvm_mmu_get_page(vcpu, table_gfn, addr, it.level-1,
> -					      false, access, it.sptep);
> +			sp = mmu_spte_establish(vcpu, it.sptep, table_gfn,
> +						addr, it.level-1, false,
> +						access);
>  		}
> 
>  		/*
> @@ -444,9 +445,6 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
>  		 */
>  		if (FNAME(gpte_changed)(vcpu, gw, it.level - 1))
>  			goto out_gpte_changed;
> -
> -		if (sp)
> -			link_shadow_page(it.sptep, sp);
>  	}
> 
>  	for (;
> @@ -464,9 +462,8 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
> 
>  		direct_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
> 
> -		sp = kvm_mmu_get_page(vcpu, direct_gfn, addr, it.level-1,
> -				      true, direct_access, it.sptep);
> -		link_shadow_page(it.sptep, sp);
> +		mmu_spte_establish(vcpu, it.sptep, direct_gfn, addr,
 +				   it.level-1, true, direct_access);
>  	}
> 
>  	clear_sp_write_flooding_count(it.sptep);
> @@ -478,7 +475,8 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
> 
>  out_gpte_changed:
>  	if (sp)
> -		kvm_mmu_put_page(sp, it.sptep);
> +		drop_parent_pte(sp, it.sptep);
> +

Unclear why TLB flushing not necessary here (entry it could have been 
added to remote CPU's TLB since it has been linked).

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v3 5/5] KVM: MMU: fast drop all spte on the pte_list
  2013-02-05  8:55 ` [PATCH v3 5/5] KVM: MMU: fast drop all spte on the pte_list Xiao Guangrong
@ 2013-02-08 21:48   ` Marcelo Tosatti
  0 siblings, 0 replies; 9+ messages in thread
From: Marcelo Tosatti @ 2013-02-08 21:48 UTC (permalink / raw)
  To: Xiao Guangrong; +Cc: Gleb Natapov, LKML, KVM

On Tue, Feb 05, 2013 at 04:55:37PM +0800, Xiao Guangrong wrote:
> If a shadow page is being zapped or a host page is going to be freed, kvm
> will drop all the reverse-mappings on the shadow page or the gfn. Currently,
> it drops the reverse-mapping one by one - it deletes the first reverse mapping,
> then moves other reverse-mapping between the description-table. When the
> last description-table become empty, it will be freed.
> 
> It works well if we only have a few reverse-mappings, but some pte_lists are
> very long, during my tracking, i saw some gfns have more than 200 sptes listed
> on its pte-list (1G memory in guest on softmmu). Optimization for dropping such
> long pte-list is worthwhile, at lease it is good for deletion memslots and
> ksm/thp merge pages.
> 
> This patch introduce a better way to optimize for this case, it walks all the
> reverse-mappings and clear them, then free all description-tables together.
> 
> Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
> ---
>  arch/x86/kvm/mmu.c |   36 +++++++++++++++++++++++++++---------
>  1 files changed, 27 insertions(+), 9 deletions(-)
> 
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index 58f813a..aa7a887 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -945,6 +945,25 @@ static void pte_list_remove(u64 *spte, unsigned long *pte_list)
>  	}
>  }
> 
> +static void pte_list_destroy(unsigned long *pte_list)
> +{
> +	struct pte_list_desc *desc;
> +	unsigned long list_value = *pte_list;
> +
> +	*pte_list = 0;
> +
> +	if (!(list_value & 1))
> +		return;
> +
> +	desc = (struct pte_list_desc *)(list_value & ~1ul);
> +	while (desc) {
> +		struct pte_list_desc *next_desc = desc->more;
> +
> +		mmu_free_pte_list_desc(desc);
> +		desc = next_desc;
> +	}
> +}
> +
>  /*
>   * Used by the following functions to iterate through the sptes linked by a
>   * pte_list.  All fields are private and not assumed to be used outside.
> @@ -1183,17 +1202,17 @@ static bool rmap_write_protect(struct kvm *kvm, u64 gfn)
>  static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
>  			   struct kvm_memory_slot *slot, unsigned long data)
>  {
> -	u64 *sptep;
>  	struct pte_list_iterator iter;
> +	u64 *sptep;
>  	int need_tlb_flush = 0;
> 
> -restart:
>  	for_each_spte_in_rmap(*rmapp, iter, sptep) {
> -		drop_spte(kvm, sptep);
> +		mmu_spte_clear_track_bits(sptep);
>  		need_tlb_flush = 1;
> -		goto restart;
>  	}
> 
> +	pte_list_destroy(rmapp);
> +
>  	return need_tlb_flush;
>  }
> 
> @@ -2016,11 +2035,10 @@ static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp)
>  	u64 *sptep;
>  	struct pte_list_iterator iter;
> 
> -restart:
> -	for_each_spte_in_pte_list(sp->parent_ptes, iter, sptep) {
> -		drop_parent_pte(sp, sptep);
> -		goto restart;
> -	}
> +	for_each_spte_in_pte_list(sp->parent_ptes, iter, sptep)
> +		mmu_spte_clear_no_track(sptep);
> +
> +	pte_list_destroy(&sp->parent_ptes);
>  }

Do we lose the crash information of pte_list_remove? 
It has been shown to be useful in several cases.


^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2013-02-08 21:50 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-02-05  8:52 [PATCH v3 0/5] KVM: MMU: unify and cleanup the code of walking pte list Xiao Guangrong
2013-02-05  8:53 ` [PATCH v3 1/5] KVM: MMU: introduce mmu_spte_establish Xiao Guangrong
2013-02-08 21:35   ` Marcelo Tosatti
2013-02-05  8:53 ` [PATCH v3 2/5] KVM: MMU: clarify the logic in kvm_set_pte_rmapp Xiao Guangrong
2013-02-05  8:54 ` [PATCH v3 3/5] KVM: MMU: unify the code of walking pte list Xiao Guangrong
2013-02-05  8:54 ` [PATCH v3 4/5] KVM: MMU: fix spte assertion Xiao Guangrong
2013-02-05  8:55 ` [PATCH v3 5/5] KVM: MMU: fast drop all spte on the pte_list Xiao Guangrong
2013-02-08 21:48   ` Marcelo Tosatti
2013-02-05 13:04 ` [PATCH v3 0/5] KVM: MMU: unify and cleanup the code of walking pte list Gleb Natapov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).