linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/4] KVM: MMU: rename 'sp->root_count' to 'sp->active_count'
@ 2010-09-20 14:18 Xiao Guangrong
  2010-09-20 14:19 ` [PATCH 2/4] KVM: MMU: support unsync sp out of the protection of 'mmu_lock' Xiao Guangrong
                   ` (2 more replies)
  0 siblings, 3 replies; 10+ messages in thread
From: Xiao Guangrong @ 2010-09-20 14:18 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Marcelo Tosatti, LKML, KVM

Now, sp->root_count is only used by the root sp, in the later patch, we will increase
it to keep unsync sp alive while it's out of 'kvm->mmu_lock''s protection, so rename the
name to fits its use

Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
---
 arch/x86/include/asm/kvm_host.h |    7 ++++++-
 arch/x86/kvm/mmu.c              |   20 ++++++++++----------
 arch/x86/kvm/mmutrace.h         |    8 ++++----
 3 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 8c5779d..55abc76 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -206,7 +206,12 @@ struct kvm_mmu_page {
 	DECLARE_BITMAP(slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS);
 	bool multimapped;         /* More than one parent_pte? */
 	bool unsync;
-	int root_count;          /* Currently serving as active root */
+	/*
+	 * if active_count > 0, it means that this page is not freed
+	 * immediately, it's used by active root and unsync pages which
+	 * out of kvm->mmu_lock's protection currently.
+	 */
+	int active_count;
 	unsigned int unsync_children;
 	union {
 		u64 *parent_pte;               /* !multimapped */
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 3ce56bf..839852d 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1683,7 +1683,7 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
 		unaccount_shadowed(kvm, sp->gfn);
 	if (sp->unsync)
 		kvm_unlink_unsync_page(kvm, sp);
-	if (!sp->root_count) {
+	if (!sp->active_count) {
 		/* Count self */
 		ret++;
 		list_move(&sp->link, invalid_list);
@@ -1709,7 +1709,7 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
 
 	do {
 		sp = list_first_entry(invalid_list, struct kvm_mmu_page, link);
-		WARN_ON(!sp->role.invalid || sp->root_count);
+		WARN_ON(!sp->role.invalid || sp->active_count);
 		kvm_mmu_free_page(kvm, sp);
 	} while (!list_empty(invalid_list));
 
@@ -2326,8 +2326,8 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
 		hpa_t root = vcpu->arch.mmu.root_hpa;
 
 		sp = page_header(root);
-		--sp->root_count;
-		if (!sp->root_count && sp->role.invalid) {
+		--sp->active_count;
+		if (!sp->active_count && sp->role.invalid) {
 			kvm_mmu_prepare_zap_page(vcpu->kvm, sp, &invalid_list);
 			kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
 		}
@@ -2341,8 +2341,8 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
 		if (root) {
 			root &= PT64_BASE_ADDR_MASK;
 			sp = page_header(root);
-			--sp->root_count;
-			if (!sp->root_count && sp->role.invalid)
+			--sp->active_count;
+			if (!sp->active_count && sp->role.invalid)
 				kvm_mmu_prepare_zap_page(vcpu->kvm, sp,
 							 &invalid_list);
 		}
@@ -2375,7 +2375,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
 		kvm_mmu_free_some_pages(vcpu);
 		sp = kvm_mmu_get_page(vcpu, 0, 0, PT64_ROOT_LEVEL,
 				      1, ACC_ALL, NULL);
-		++sp->root_count;
+		++sp->active_count;
 		spin_unlock(&vcpu->kvm->mmu_lock);
 		vcpu->arch.mmu.root_hpa = __pa(sp->spt);
 	} else if (vcpu->arch.mmu.shadow_root_level == PT32E_ROOT_LEVEL) {
@@ -2389,7 +2389,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
 					      PT32_ROOT_LEVEL, 1, ACC_ALL,
 					      NULL);
 			root = __pa(sp->spt);
-			++sp->root_count;
+			++sp->active_count;
 			spin_unlock(&vcpu->kvm->mmu_lock);
 			vcpu->arch.mmu.pae_root[i] = root | PT_PRESENT_MASK;
 			vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root);
@@ -2426,7 +2426,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 		sp = kvm_mmu_get_page(vcpu, root_gfn, 0, PT64_ROOT_LEVEL,
 				      0, ACC_ALL, NULL);
 		root = __pa(sp->spt);
-		++sp->root_count;
+		++sp->active_count;
 		spin_unlock(&vcpu->kvm->mmu_lock);
 		vcpu->arch.mmu.root_hpa = root;
 		return 0;
@@ -2461,7 +2461,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 				      PT32_ROOT_LEVEL, 0,
 				      ACC_ALL, NULL);
 		root = __pa(sp->spt);
-		++sp->root_count;
+		++sp->active_count;
 		spin_unlock(&vcpu->kvm->mmu_lock);
 
 		vcpu->arch.mmu.pae_root[i] = root | pm_mask;
diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h
index b60b4fd..70c8bfd 100644
--- a/arch/x86/kvm/mmutrace.h
+++ b/arch/x86/kvm/mmutrace.h
@@ -10,13 +10,13 @@
 #define KVM_MMU_PAGE_FIELDS \
 	__field(__u64, gfn) \
 	__field(__u32, role) \
-	__field(__u32, root_count) \
+	__field(__u32, active_count) \
 	__field(bool, unsync)
 
 #define KVM_MMU_PAGE_ASSIGN(sp)			     \
 	__entry->gfn = sp->gfn;			     \
 	__entry->role = sp->role.word;		     \
-	__entry->root_count = sp->root_count;        \
+	__entry->active_count = sp->active_count;        \
 	__entry->unsync = sp->unsync;
 
 #define KVM_MMU_PAGE_PRINTK() ({				        \
@@ -29,7 +29,7 @@
 	role.word = __entry->role;					\
 									\
 	trace_seq_printf(p, "sp gfn %llx %u%s q%u%s %s%s"		\
-			 " %snxe root %u %s%c",				\
+			 " %snxe active %u %s%c",			\
 			 __entry->gfn, role.level,			\
 			 role.cr4_pae ? " pae" : "",			\
 			 role.quadrant,					\
@@ -37,7 +37,7 @@
 			 access_str[role.access],			\
 			 role.invalid ? " invalid" : "",		\
 			 role.nxe ? "" : "!",				\
-			 __entry->root_count,				\
+			 __entry->active_count,				\
 			 __entry->unsync ? "unsync" : "sync", 0);	\
 	ret;								\
 		})
-- 
1.7.0.4

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 2/4] KVM: MMU: support unsync sp out of the protection of 'mmu_lock'
  2010-09-20 14:18 [PATCH 1/4] KVM: MMU: rename 'sp->root_count' to 'sp->active_count' Xiao Guangrong
@ 2010-09-20 14:19 ` Xiao Guangrong
  2010-09-20 15:19   ` Avi Kivity
  2010-09-20 14:20 ` [PATCH 3/4] KVM: MMU: move reserved bits check to FNAME(update_pte) Xiao Guangrong
  2010-09-20 14:21 ` [PATCH 4/4] KVM: MMU: Don't touch unsync sp in kvm_mmu_pte_write() Xiao Guangrong
  2 siblings, 1 reply; 10+ messages in thread
From: Xiao Guangrong @ 2010-09-20 14:19 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Marcelo Tosatti, LKML, KVM

It allows keep unsync sp alive while it's out of the protection, later we can use
kvm_mmu_free_page() to free it if !sp->active_count

Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
---
 arch/x86/kvm/mmu.c |   16 ++++++++++------
 1 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 839852d..4b7af3f 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -996,7 +996,6 @@ static void kvm_mmu_free_page(struct kvm *kvm, struct kvm_mmu_page *sp)
 {
 	ASSERT(is_empty_shadow_page(sp->spt));
 	hlist_del(&sp->hash_link);
-	list_del(&sp->link);
 	__free_page(virt_to_page(sp->spt));
 	if (!sp->role.direct)
 		__free_page(virt_to_page(sp->gfns));
@@ -1681,9 +1680,8 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
 	kvm_mmu_unlink_parents(kvm, sp);
 	if (!sp->role.invalid && !sp->role.direct)
 		unaccount_shadowed(kvm, sp->gfn);
-	if (sp->unsync)
-		kvm_unlink_unsync_page(kvm, sp);
-	if (!sp->active_count) {
+
+	if (!sp->active_count || sp->unsync) {
 		/* Count self */
 		ret++;
 		list_move(&sp->link, invalid_list);
@@ -1692,6 +1690,8 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
 		kvm_reload_remote_mmus(kvm);
 	}
 
+	if (sp->unsync)
+		kvm_unlink_unsync_page(kvm, sp);
 	sp->role.invalid = 1;
 	kvm_mmu_reset_last_pte_updated(kvm);
 	return ret;
@@ -1709,8 +1709,12 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
 
 	do {
 		sp = list_first_entry(invalid_list, struct kvm_mmu_page, link);
-		WARN_ON(!sp->role.invalid || sp->active_count);
-		kvm_mmu_free_page(kvm, sp);
+		WARN_ON(!sp->role.invalid);
+		list_del(&sp->link);
+		if (sp->active_count)
+			WARN_ON(!sp->unsync);
+		else
+			kvm_mmu_free_page(kvm, sp);
 	} while (!list_empty(invalid_list));
 
 }
-- 
1.7.0.4


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 3/4] KVM: MMU: move reserved bits check to FNAME(update_pte)
  2010-09-20 14:18 [PATCH 1/4] KVM: MMU: rename 'sp->root_count' to 'sp->active_count' Xiao Guangrong
  2010-09-20 14:19 ` [PATCH 2/4] KVM: MMU: support unsync sp out of the protection of 'mmu_lock' Xiao Guangrong
@ 2010-09-20 14:20 ` Xiao Guangrong
  2010-09-20 14:21 ` [PATCH 4/4] KVM: MMU: Don't touch unsync sp in kvm_mmu_pte_write() Xiao Guangrong
  2 siblings, 0 replies; 10+ messages in thread
From: Xiao Guangrong @ 2010-09-20 14:20 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Marcelo Tosatti, LKML, KVM

Move reserved bits check then invlpg path can use it

Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com> 
---
 arch/x86/kvm/mmu.c         |    3 ---
 arch/x86/kvm/paging_tmpl.h |    4 ++++
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 0ccb67f..9d7da39 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3003,9 +3003,6 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,
 		return;
         }
 
-	if (is_rsvd_bits_set(&vcpu->arch.mmu, *(u64 *)new, PT_PAGE_TABLE_LEVEL))
-		return;
-
 	++vcpu->kvm->stat.mmu_pte_updated;
 	if (!sp->role.cr4_pae)
 		paging32_update_pte(vcpu, sp, spte, new);
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index ab9a594..e540118 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -311,6 +311,10 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
 	u64 new_spte;
 
 	gpte = *(const pt_element_t *)pte;
+
+	if (is_rsvd_bits_set(&vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL))
+		return;
+
 	if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) {
 		if (!is_present_gpte(gpte)) {
 			if (sp->unsync)
-- 
1.7.0.4


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 4/4] KVM: MMU: Don't touch unsync sp in kvm_mmu_pte_write()
  2010-09-20 14:18 [PATCH 1/4] KVM: MMU: rename 'sp->root_count' to 'sp->active_count' Xiao Guangrong
  2010-09-20 14:19 ` [PATCH 2/4] KVM: MMU: support unsync sp out of the protection of 'mmu_lock' Xiao Guangrong
  2010-09-20 14:20 ` [PATCH 3/4] KVM: MMU: move reserved bits check to FNAME(update_pte) Xiao Guangrong
@ 2010-09-20 14:21 ` Xiao Guangrong
  2010-09-20 15:24   ` Avi Kivity
  2 siblings, 1 reply; 10+ messages in thread
From: Xiao Guangrong @ 2010-09-20 14:21 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Marcelo Tosatti, LKML, KVM

Gfn may have many shadow pages, when one sp need be synced, we write
protected sp->gfn and sync this sp but we keep other shadow pages
asynchronous

So, while gfn happen page fault, let it not touches unsync page, the unsync
page only updated at invlpg/flush TLB time

Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
---
 arch/x86/include/asm/kvm_host.h |    2 +-
 arch/x86/kvm/mmu.c              |   25 ++++++++++++++++---------
 arch/x86/kvm/paging_tmpl.h      |   34 ++++++++++++++++++++++++++++------
 3 files changed, 45 insertions(+), 16 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 55abc76..b685ecf 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -420,7 +420,7 @@ struct kvm_arch {
 	unsigned int n_used_mmu_pages;
 	unsigned int n_requested_mmu_pages;
 	unsigned int n_max_mmu_pages;
-	atomic_t invlpg_counter;
+	unsigned int invlpg_counter;
 	struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
 	/*
 	 * Hash table of struct kvm_mmu_page.
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 4b7af3f..0ccb67f 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2677,6 +2677,10 @@ static bool is_rsvd_bits_set(struct kvm_mmu *mmu, u64 gpte, int level)
 	return (gpte & mmu->rsvd_bits_mask[bit7][level-1]) != 0;
 }
 
+static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
+					  u64 gpte);
+static void mmu_release_page_from_pte_write(struct kvm_vcpu *vcpu);
+
 #define PTTYPE 64
 #include "paging_tmpl.h"
 #undef PTTYPE
@@ -3063,6 +3067,14 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 	vcpu->arch.update_pte.pfn = pfn;
 }
 
+static void mmu_release_page_from_pte_write(struct kvm_vcpu *vcpu)
+{
+	if (!is_error_pfn(vcpu->arch.update_pte.pfn)) {
+		kvm_release_pfn_clean(vcpu->arch.update_pte.pfn);
+		vcpu->arch.update_pte.pfn = bad_pfn;
+	}
+}
+
 static void kvm_mmu_access_page(struct kvm_vcpu *vcpu, gfn_t gfn)
 {
 	u64 *spte = vcpu->arch.last_pte_updated;
@@ -3095,15 +3107,12 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 	int flooded = 0;
 	int npte;
 	int r;
-	int invlpg_counter;
 	bool remote_flush, local_flush, zap_page;
 
 	zap_page = remote_flush = local_flush = false;
 
 	pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes);
 
-	invlpg_counter = atomic_read(&vcpu->kvm->arch.invlpg_counter);
-
 	/*
 	 * Assume that the pte write on a page table of the same type
 	 * as the current vcpu paging mode.  This is nearly always true
@@ -3136,8 +3145,6 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 
 	mmu_guess_page_from_pte_write(vcpu, gpa, gentry);
 	spin_lock(&vcpu->kvm->mmu_lock);
-	if (atomic_read(&vcpu->kvm->arch.invlpg_counter) != invlpg_counter)
-		gentry = 0;
 	kvm_mmu_access_page(vcpu, gfn);
 	kvm_mmu_free_some_pages(vcpu);
 	++vcpu->kvm->stat.mmu_pte_write;
@@ -3157,6 +3164,9 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 
 	mask.cr0_wp = mask.cr4_pae = mask.nxe = 1;
 	for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn, node) {
+		if (sp->unsync)
+			continue;
+
 		pte_size = sp->role.cr4_pae ? 8 : 4;
 		misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1);
 		misaligned |= bytes < 4;
@@ -3216,10 +3226,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 	kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
 	trace_kvm_mmu_audit(vcpu, AUDIT_POST_PTE_WRITE);
 	spin_unlock(&vcpu->kvm->mmu_lock);
-	if (!is_error_pfn(vcpu->arch.update_pte.pfn)) {
-		kvm_release_pfn_clean(vcpu->arch.update_pte.pfn);
-		vcpu->arch.update_pte.pfn = bad_pfn;
-	}
+	mmu_release_page_from_pte_write(vcpu);
 }
 
 int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 2bdd843..ab9a594 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -609,11 +609,13 @@ out_unlock:
 static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
 {
 	struct kvm_shadow_walk_iterator iterator;
-	struct kvm_mmu_page *sp;
+	struct kvm_mmu_page *sp = NULL;
+	unsigned int invlpg_counter;
 	gpa_t pte_gpa = -1;
 	int level;
-	u64 *sptep;
+	u64 gentry, *sptep = NULL;
 	int need_flush = 0;
+	bool prefetch = true;
 
 	spin_lock(&vcpu->kvm->mmu_lock);
 
@@ -643,6 +645,7 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
 				need_flush = 1;
 			} else
 				__set_spte(sptep, shadow_trap_nonpresent_pte);
+			sp->active_count++;
 			break;
 		}
 
@@ -653,16 +656,35 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
 	if (need_flush)
 		kvm_flush_remote_tlbs(vcpu->kvm);
 
-	atomic_inc(&vcpu->kvm->arch.invlpg_counter);
+	invlpg_counter = ++vcpu->kvm->arch.invlpg_counter;
 
 	spin_unlock(&vcpu->kvm->mmu_lock);
 
 	if (pte_gpa == -1)
 		return;
 
-	if (mmu_topup_memory_caches(vcpu))
-		return;
-	kvm_mmu_pte_write(vcpu, pte_gpa, NULL, sizeof(pt_element_t), 0);
+	if (mmu_topup_memory_caches(vcpu) ||
+	  kvm_read_guest(vcpu->kvm, pte_gpa, &gentry, sizeof(pt_element_t)))
+		prefetch = false;
+	else
+		mmu_guess_page_from_pte_write(vcpu, pte_gpa, gentry);
+
+	spin_lock(&vcpu->kvm->mmu_lock);
+	sp->active_count--;
+	if (sp->role.invalid) {
+		if (!sp->active_count)
+			kvm_mmu_free_page(vcpu->kvm, sp);
+		goto unlock_exit;
+	}
+
+	if (prefetch && vcpu->kvm->arch.invlpg_counter == invlpg_counter) {
+		++vcpu->kvm->stat.mmu_pte_updated;
+		FNAME(update_pte)(vcpu, sp, sptep, &gentry);
+	}
+
+unlock_exit:
+	spin_unlock(&vcpu->kvm->mmu_lock);
+	mmu_release_page_from_pte_write(vcpu);
 }
 
 static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access,
-- 
1.7.0.4


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH 2/4] KVM: MMU: support unsync sp out of the protection of 'mmu_lock'
  2010-09-20 14:19 ` [PATCH 2/4] KVM: MMU: support unsync sp out of the protection of 'mmu_lock' Xiao Guangrong
@ 2010-09-20 15:19   ` Avi Kivity
  2010-09-23  3:05     ` Xiao Guangrong
  0 siblings, 1 reply; 10+ messages in thread
From: Avi Kivity @ 2010-09-20 15:19 UTC (permalink / raw)
  To: Xiao Guangrong; +Cc: Marcelo Tosatti, LKML, KVM

  On 09/20/2010 04:19 PM, Xiao Guangrong wrote:
> It allows keep unsync sp alive while it's out of the protection, later we can use
> kvm_mmu_free_page() to free it if !sp->active_count

Don't understand.  Of course unsync pages exist outside mmu_lock...?


>   
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index 839852d..4b7af3f 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -996,7 +996,6 @@ static void kvm_mmu_free_page(struct kvm *kvm, struct kvm_mmu_page *sp)
>   {
>   	ASSERT(is_empty_shadow_page(sp->spt));
>   	hlist_del(&sp->hash_link);
> -	list_del(&sp->link);
>   	__free_page(virt_to_page(sp->spt));
>   	if (!sp->role.direct)
>   		__free_page(virt_to_page(sp->gfns));
> @@ -1681,9 +1680,8 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
>   	kvm_mmu_unlink_parents(kvm, sp);
>   	if (!sp->role.invalid&&  !sp->role.direct)
>   		unaccount_shadowed(kvm, sp->gfn);
> -	if (sp->unsync)
> -		kvm_unlink_unsync_page(kvm, sp);
> -	if (!sp->active_count) {
> +
> +	if (!sp->active_count || sp->unsync) {
>   		/* Count self */
>   		ret++;
>   		list_move(&sp->link, invalid_list);

How can you drop an active unsync page?

I'm missing something here.

> @@ -1692,6 +1690,8 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
>   		kvm_reload_remote_mmus(kvm);
>   	}
>
> +	if (sp->unsync)
> +		kvm_unlink_unsync_page(kvm, sp);
>   	sp->role.invalid = 1;
>   	kvm_mmu_reset_last_pte_updated(kvm);
>   	return ret;
> @@ -1709,8 +1709,12 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
>
>   	do {
>   		sp = list_first_entry(invalid_list, struct kvm_mmu_page, link);
> -		WARN_ON(!sp->role.invalid || sp->active_count);
> -		kvm_mmu_free_page(kvm, sp);
> +		WARN_ON(!sp->role.invalid);
> +		list_del(&sp->link);
> +		if (sp->active_count)
> +			WARN_ON(!sp->unsync);
> +		else
> +			kvm_mmu_free_page(kvm, sp);
>   	} while (!list_empty(invalid_list));
>
>   }


-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 4/4] KVM: MMU: Don't touch unsync sp in kvm_mmu_pte_write()
  2010-09-20 14:21 ` [PATCH 4/4] KVM: MMU: Don't touch unsync sp in kvm_mmu_pte_write() Xiao Guangrong
@ 2010-09-20 15:24   ` Avi Kivity
  2010-09-23  2:59     ` Xiao Guangrong
  0 siblings, 1 reply; 10+ messages in thread
From: Avi Kivity @ 2010-09-20 15:24 UTC (permalink / raw)
  To: Xiao Guangrong; +Cc: Marcelo Tosatti, LKML, KVM

  On 09/20/2010 04:21 PM, Xiao Guangrong wrote:
> Gfn may have many shadow pages, when one sp need be synced, we write
> protected sp->gfn and sync this sp but we keep other shadow pages
> asynchronous
>
> So, while gfn happen page fault, let it not touches unsync page, the unsync
> page only updated at invlpg/flush TLB time
>
> @@ -3157,6 +3164,9 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
>
>   	mask.cr0_wp = mask.cr4_pae = mask.nxe = 1;
>   	for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn, node) {
> +		if (sp->unsync)
> +			continue;
> +
>

Not sure this is a win.  If a gpte is updated from having p=0 to p=1 (or 
permissions upgraded), we may not have an invlpg to sync the spte, since 
the hardware doesn't require it.  With this change, we may get an extra #PF.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 4/4] KVM: MMU: Don't touch unsync sp in kvm_mmu_pte_write()
  2010-09-20 15:24   ` Avi Kivity
@ 2010-09-23  2:59     ` Xiao Guangrong
  2010-09-26 13:09       ` Avi Kivity
  0 siblings, 1 reply; 10+ messages in thread
From: Xiao Guangrong @ 2010-09-23  2:59 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Marcelo Tosatti, LKML, KVM


On 09/20/2010 11:24 PM, Avi Kivity wrote:
>  On 09/20/2010 04:21 PM, Xiao Guangrong wrote:
>> Gfn may have many shadow pages, when one sp need be synced, we write
>> protected sp->gfn and sync this sp but we keep other shadow pages
>> asynchronous
>>
>> So, while gfn happen page fault, let it not touches unsync page, the
>> unsync
>> page only updated at invlpg/flush TLB time
>>
>> @@ -3157,6 +3164,9 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu,
>> gpa_t gpa,
>>
>>       mask.cr0_wp = mask.cr4_pae = mask.nxe = 1;
>>       for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn, node) {
>> +        if (sp->unsync)
>> +            continue;
>> +
>>
> 
> Not sure this is a win.  If a gpte is updated from having p=0 to p=1 (or
> permissions upgraded), we may not have an invlpg to sync the spte, since
> the hardware doesn't require it.  With this change, we may get an extra
> #PF.
> 

Avi,

Thanks for your review, i think this case is not too bad since:

1: This case only impacts local vcpu since if permissions is increased, it's 
   no need send IPT to flush remote vcpu's tlb, so even if we update unsync
   spte in kvm_mmu_pte_write() path, the #PF still occur on other vcpus. 

2: If the unsync sp which is updated in kvm_mmu_pte_write() is not using by the
   vcpu, it will sync automatically after it's loaded.

3: If the sp is using, update this sp in kvm_mmu_pte_write() will avoid extra #PF,
   in this case, two(or more) sps have the same gfn, there are mapped in the same
   page table and with different kinds(unsync/sync), i thinks this case is infrequency.
   And even we updated it, we can not sure it can be accessed latter, 

So, i think it's better lazily update unsync sp until it's used or the flush time,
your opinion? :-)

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 2/4] KVM: MMU: support unsync sp out of the protection of 'mmu_lock'
  2010-09-20 15:19   ` Avi Kivity
@ 2010-09-23  3:05     ` Xiao Guangrong
  2010-09-26 13:02       ` Avi Kivity
  0 siblings, 1 reply; 10+ messages in thread
From: Xiao Guangrong @ 2010-09-23  3:05 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Marcelo Tosatti, LKML, KVM

On 09/20/2010 11:19 PM, Avi Kivity wrote:
>  On 09/20/2010 04:19 PM, Xiao Guangrong wrote:
>> It allows keep unsync sp alive while it's out of the protection, later
>> we can use
>> kvm_mmu_free_page() to free it if !sp->active_count
> 
> Don't understand.  Of course unsync pages exist outside mmu_lock...?

Yes.

sorry, not describe it clear in the log.

> 
> 
>>   diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
>> index 839852d..4b7af3f 100644
>> --- a/arch/x86/kvm/mmu.c
>> +++ b/arch/x86/kvm/mmu.c
>> @@ -996,7 +996,6 @@ static void kvm_mmu_free_page(struct kvm *kvm,
>> struct kvm_mmu_page *sp)
>>   {
>>       ASSERT(is_empty_shadow_page(sp->spt));
>>       hlist_del(&sp->hash_link);
>> -    list_del(&sp->link);
>>       __free_page(virt_to_page(sp->spt));
>>       if (!sp->role.direct)
>>           __free_page(virt_to_page(sp->gfns));
>> @@ -1681,9 +1680,8 @@ static int kvm_mmu_prepare_zap_page(struct kvm
>> *kvm, struct kvm_mmu_page *sp,
>>       kvm_mmu_unlink_parents(kvm, sp);
>>       if (!sp->role.invalid&&  !sp->role.direct)
>>           unaccount_shadowed(kvm, sp->gfn);
>> -    if (sp->unsync)
>> -        kvm_unlink_unsync_page(kvm, sp);
>> -    if (!sp->active_count) {
>> +
>> +    if (!sp->active_count || sp->unsync) {
>>           /* Count self */
>>           ret++;
>>           list_move(&sp->link, invalid_list);
> 
> How can you drop an active unsync page?
> 
> I'm missing something here.
> 

Umm, this feature is used like this:

hold mmu_lock
increase sp->active_count
release mmu_lock
......
hold mmu_lock
increase sp->active_count
if (!sp->active_count && sp->invalid)
	kvm_mmu_free_page(sp);
......
release mmu_lock


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 2/4] KVM: MMU: support unsync sp out of the protection of 'mmu_lock'
  2010-09-23  3:05     ` Xiao Guangrong
@ 2010-09-26 13:02       ` Avi Kivity
  0 siblings, 0 replies; 10+ messages in thread
From: Avi Kivity @ 2010-09-26 13:02 UTC (permalink / raw)
  To: Xiao Guangrong; +Cc: Marcelo Tosatti, LKML, KVM

  On 09/23/2010 05:05 AM, Xiao Guangrong wrote:
> >
> >  How can you drop an active unsync page?
> >
> >  I'm missing something here.
> >
>
> Umm, this feature is used like this:
>
> hold mmu_lock
> increase sp->active_count
> release mmu_lock
> ......
> hold mmu_lock
> increase sp->active_count
> if (!sp->active_count&&  sp->invalid)
> 	kvm_mmu_free_page(sp);
> ......
> release mmu_lock
>
>

Well, the implementation is confusing.  Maybe we should have 
mmu_ref_sp() and mmu_drop_sp() wrappers to manage the reference counts 
and call mmu_free_page() automatically.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 4/4] KVM: MMU: Don't touch unsync sp in kvm_mmu_pte_write()
  2010-09-23  2:59     ` Xiao Guangrong
@ 2010-09-26 13:09       ` Avi Kivity
  0 siblings, 0 replies; 10+ messages in thread
From: Avi Kivity @ 2010-09-26 13:09 UTC (permalink / raw)
  To: Xiao Guangrong; +Cc: Marcelo Tosatti, LKML, KVM

  On 09/23/2010 04:59 AM, Xiao Guangrong wrote:
> On 09/20/2010 11:24 PM, Avi Kivity wrote:
> >   On 09/20/2010 04:21 PM, Xiao Guangrong wrote:
> >>  Gfn may have many shadow pages, when one sp need be synced, we write
> >>  protected sp->gfn and sync this sp but we keep other shadow pages
> >>  asynchronous
> >>
> >>  So, while gfn happen page fault, let it not touches unsync page, the
> >>  unsync
> >>  page only updated at invlpg/flush TLB time
> >>
> >>  @@ -3157,6 +3164,9 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu,
> >>  gpa_t gpa,
> >>
> >>        mask.cr0_wp = mask.cr4_pae = mask.nxe = 1;
> >>        for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn, node) {
> >>  +        if (sp->unsync)
> >>  +            continue;
> >>  +
> >>
> >
> >  Not sure this is a win.  If a gpte is updated from having p=0 to p=1 (or
> >  permissions upgraded), we may not have an invlpg to sync the spte, since
> >  the hardware doesn't require it.  With this change, we may get an extra
> >  #PF.
> >
>
> Avi,
>
> Thanks for your review, i think this case is not too bad since:
>
> 1: This case only impacts local vcpu since if permissions is increased, it's
>     no need send IPT to flush remote vcpu's tlb, so even if we update unsync
>     spte in kvm_mmu_pte_write() path, the #PF still occur on other vcpus.

IIRC, the cpu will re-validate the tlb entry from the page tables before 
issuing a fault, so we won't see a spurious fault.  Not 100% sure.

For !P -> P, there won't be a tlb entry, so 100% there won't be a 
spurious fault.

> 2: If the unsync sp which is updated in kvm_mmu_pte_write() is not using by the
>     vcpu, it will sync automatically after it's loaded.

True, and this is a likely case.

> 3: If the sp is using, update this sp in kvm_mmu_pte_write() will avoid extra #PF,
>     in this case, two(or more) sps have the same gfn, there are mapped in the same
>     page table and with different kinds(unsync/sync), i thinks this case is infrequency.
>     And even we updated it, we can not sure it can be accessed latter,

If it's infrequent, the why do we optimize it?

> So, i think it's better lazily update unsync sp until it's used or the flush time,
> your opinion? :-)
>

Any performance numbers?

To me it seems saving a possible exit is worth extra computation.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2010-09-26 13:09 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-09-20 14:18 [PATCH 1/4] KVM: MMU: rename 'sp->root_count' to 'sp->active_count' Xiao Guangrong
2010-09-20 14:19 ` [PATCH 2/4] KVM: MMU: support unsync sp out of the protection of 'mmu_lock' Xiao Guangrong
2010-09-20 15:19   ` Avi Kivity
2010-09-23  3:05     ` Xiao Guangrong
2010-09-26 13:02       ` Avi Kivity
2010-09-20 14:20 ` [PATCH 3/4] KVM: MMU: move reserved bits check to FNAME(update_pte) Xiao Guangrong
2010-09-20 14:21 ` [PATCH 4/4] KVM: MMU: Don't touch unsync sp in kvm_mmu_pte_write() Xiao Guangrong
2010-09-20 15:24   ` Avi Kivity
2010-09-23  2:59     ` Xiao Guangrong
2010-09-26 13:09       ` Avi Kivity

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).