All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/8] KVM: fix tracing kvm_try_async_get_page
@ 2010-10-27  9:01 Xiao Guangrong
  2010-10-27  9:02 ` [PATCH 2/8] KVM: cleanup aysnc_pf tracepoints Xiao Guangrong
                   ` (7 more replies)
  0 siblings, 8 replies; 23+ messages in thread
From: Xiao Guangrong @ 2010-10-27  9:01 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Marcelo Tosatti, LKML, KVM

Tracing 'async' and *pfn is useless, since 'async' is always true,
and '*pfn' is always "fault_pfn'

We can trace 'gva' and 'gfn' instead, it can help us to see the
life-cycle of an async_pf

Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
---
 arch/x86/kvm/mmu.c         |    2 +-
 include/trace/events/kvm.h |   12 +++++++-----
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 7ab734d..7449803 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2618,7 +2618,7 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool no_apf, gfn_t gfn,
 	put_page(pfn_to_page(*pfn));
 
 	if (!no_apf && can_do_async_pf(vcpu)) {
-		trace_kvm_try_async_get_page(async, *pfn);
+		trace_kvm_try_async_get_page(gva, gfn);
 		if (kvm_find_async_pf_gfn(vcpu, gfn)) {
 			trace_kvm_async_pf_doublefault(gva, gfn);
 			kvm_make_request(KVM_REQ_APF_HALT, vcpu);
diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
index 9c2cc6a..30063c6 100644
--- a/include/trace/events/kvm.h
+++ b/include/trace/events/kvm.h
@@ -188,18 +188,20 @@ TRACE_EVENT(kvm_age_page,
 #ifdef CONFIG_KVM_ASYNC_PF
 TRACE_EVENT(
 	kvm_try_async_get_page,
-	TP_PROTO(bool async, u64 pfn),
-	TP_ARGS(async, pfn),
+	TP_PROTO(u64 gva, u64 gfn),
+	TP_ARGS(gva, gfn),
 
 	TP_STRUCT__entry(
-		__field(__u64, pfn)
+		__field(u64, gva)
+		__field(u64, gfn)
 		),
 
 	TP_fast_assign(
-		__entry->pfn = (!async) ? pfn : (u64)-1;
+		__entry->gva = gva;
+		__entry->gfn = gfn;
 		),
 
-	TP_printk("pfn %#llx", __entry->pfn)
+	TP_printk("gva = %#llx, gfn = %#llx", __entry->gva, __entry->gfn)
 );
 
 TRACE_EVENT(
-- 
1.7.0.4

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 2/8] KVM: cleanup aysnc_pf tracepoints
  2010-10-27  9:01 [PATCH 1/8] KVM: fix tracing kvm_try_async_get_page Xiao Guangrong
@ 2010-10-27  9:02 ` Xiao Guangrong
  2010-10-27 10:10   ` Gleb Natapov
  2010-10-27  9:03 ` [PATCH 3/8] KVM: fix searching async gfn in kvm_async_pf_gfn_slot Xiao Guangrong
                   ` (6 subsequent siblings)
  7 siblings, 1 reply; 23+ messages in thread
From: Xiao Guangrong @ 2010-10-27  9:02 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Marcelo Tosatti, LKML, KVM

Use 'DECLARE_EVENT_CLASS' to cleanup async_pf tracepoints

Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
---
 include/trace/events/kvm.h |   76 ++++++++++++++++++++-----------------------
 1 files changed, 35 insertions(+), 41 deletions(-)

diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
index 30063c6..7bec396 100644
--- a/include/trace/events/kvm.h
+++ b/include/trace/events/kvm.h
@@ -186,59 +186,71 @@ TRACE_EVENT(kvm_age_page,
 );
 
 #ifdef CONFIG_KVM_ASYNC_PF
-TRACE_EVENT(
-	kvm_try_async_get_page,
+DECLARE_EVENT_CLASS(kvm_async_get_page_class,
+
 	TP_PROTO(u64 gva, u64 gfn),
+
 	TP_ARGS(gva, gfn),
 
 	TP_STRUCT__entry(
-		__field(u64, gva)
+		__field(__u64, gva)
 		__field(u64, gfn)
-		),
+	),
 
 	TP_fast_assign(
 		__entry->gva = gva;
 		__entry->gfn = gfn;
-		),
+	),
 
 	TP_printk("gva = %#llx, gfn = %#llx", __entry->gva, __entry->gfn)
 );
 
-TRACE_EVENT(
-	kvm_async_pf_not_present,
+DEFINE_EVENT(kvm_async_get_page_class, kvm_try_async_get_page,
+
+	TP_PROTO(u64 gva, u64 gfn),
+
+	TP_ARGS(gva, gfn)
+);
+
+DEFINE_EVENT(kvm_async_get_page_class, kvm_async_pf_doublefault,
+
+	TP_PROTO(u64 gva, u64 gfn),
+
+	TP_ARGS(gva, gfn)
+);
+
+DECLARE_EVENT_CLASS(kvm_async_pf_nopresent_ready,
+
 	TP_PROTO(u64 token, u64 gva),
+
 	TP_ARGS(token, gva),
 
 	TP_STRUCT__entry(
 		__field(__u64, token)
 		__field(__u64, gva)
-		),
+	),
 
 	TP_fast_assign(
 		__entry->token = token;
 		__entry->gva = gva;
-		),
+	),
+
+	TP_printk("token %#llx gva %#llx", __entry->token, __entry->gva)
 
-	TP_printk("token %#llx gva %#llx not present", __entry->token,
-		  __entry->gva)
 );
 
-TRACE_EVENT(
-	kvm_async_pf_ready,
+DEFINE_EVENT(kvm_async_pf_nopresent_ready, kvm_async_pf_not_present,
+
 	TP_PROTO(u64 token, u64 gva),
-	TP_ARGS(token, gva),
 
-	TP_STRUCT__entry(
-		__field(__u64, token)
-		__field(__u64, gva)
-		),
+	TP_ARGS(token, gva)
+);
 
-	TP_fast_assign(
-		__entry->token = token;
-		__entry->gva = gva;
-		),
+DEFINE_EVENT(kvm_async_pf_nopresent_ready, kvm_async_pf_ready,
+
+	TP_PROTO(u64 token, u64 gva),
 
-	TP_printk("token %#llx gva %#llx ready", __entry->token, __entry->gva)
+	TP_ARGS(token, gva)
 );
 
 TRACE_EVENT(
@@ -262,24 +274,6 @@ TRACE_EVENT(
 		  __entry->address, __entry->pfn)
 );
 
-TRACE_EVENT(
-	kvm_async_pf_doublefault,
-	TP_PROTO(u64 gva, u64 gfn),
-	TP_ARGS(gva, gfn),
-
-	TP_STRUCT__entry(
-		__field(u64, gva)
-		__field(u64, gfn)
-		),
-
-	TP_fast_assign(
-		__entry->gva = gva;
-		__entry->gfn = gfn;
-		),
-
-	TP_printk("gva = %#llx, gfn = %#llx", __entry->gva, __entry->gfn)
-);
-
 #endif
 
 #endif /* _TRACE_KVM_MAIN_H */
-- 
1.7.0.4


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 3/8] KVM: fix searching async gfn in kvm_async_pf_gfn_slot
  2010-10-27  9:01 [PATCH 1/8] KVM: fix tracing kvm_try_async_get_page Xiao Guangrong
  2010-10-27  9:02 ` [PATCH 2/8] KVM: cleanup aysnc_pf tracepoints Xiao Guangrong
@ 2010-10-27  9:03 ` Xiao Guangrong
  2010-10-27 10:29   ` Gleb Natapov
  2010-10-27  9:04 ` [PATCH 4/8] KVM: avoid unnecessary wait for a async pf Xiao Guangrong
                   ` (5 subsequent siblings)
  7 siblings, 1 reply; 23+ messages in thread
From: Xiao Guangrong @ 2010-10-27  9:03 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Marcelo Tosatti, LKML, KVM

The 'gfn' is not recorded if the next slot is empty

Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
---
 arch/x86/kvm/x86.c |    4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7876ec8..16f42ff 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6207,8 +6207,8 @@ static u32 kvm_async_pf_gfn_slot(struct kvm_vcpu *vcpu, gfn_t gfn)
 	u32 key = kvm_async_pf_hash_fn(gfn);
 
 	for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU) &&
-		     (vcpu->arch.apf.gfns[key] != gfn ||
-		      vcpu->arch.apf.gfns[key] == ~0); i++)
+		     (vcpu->arch.apf.gfns[key] != gfn &&
+		      vcpu->arch.apf.gfns[key] != ~0); i++)
 		key = kvm_async_pf_next_probe(key);
 
 	return key;
-- 
1.7.0.4


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 4/8] KVM: avoid unnecessary wait for a async pf
  2010-10-27  9:01 [PATCH 1/8] KVM: fix tracing kvm_try_async_get_page Xiao Guangrong
  2010-10-27  9:02 ` [PATCH 2/8] KVM: cleanup aysnc_pf tracepoints Xiao Guangrong
  2010-10-27  9:03 ` [PATCH 3/8] KVM: fix searching async gfn in kvm_async_pf_gfn_slot Xiao Guangrong
@ 2010-10-27  9:04 ` Xiao Guangrong
  2010-10-27 10:42   ` Gleb Natapov
  2010-10-27  9:05 ` [PATCH 5/8] KVM: don't touch vcpu stat after async pf is complete Xiao Guangrong
                   ` (4 subsequent siblings)
  7 siblings, 1 reply; 23+ messages in thread
From: Xiao Guangrong @ 2010-10-27  9:04 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Marcelo Tosatti, LKML, KVM

In current code, it checks async pf completion out of the wait context,
like this:

if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
		    !vcpu->arch.apf.halted)
			r = vcpu_enter_guest(vcpu);
		else {
			......
			kvm_vcpu_block(vcpu)
			 ^- waiting until 'async_pf.done' is not empty
}
	
kvm_check_async_pf_completion(vcpu)
 ^- delete list from async_pf.done

So, if we check aysnc pf completion first, it can be blocked at
kvm_vcpu_block

Fixed by move the check into wait context

Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
---
 arch/x86/kvm/x86.c       |    3 ---
 include/linux/kvm_host.h |    1 -
 virt/kvm/async_pf.c      |    6 ++++--
 virt/kvm/async_pf.h      |    6 ++++++
 virt/kvm/kvm_main.c      |    3 ++-
 5 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 16f42ff..0b2c420 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5321,8 +5321,6 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
 			vcpu->run->exit_reason = KVM_EXIT_INTR;
 			++vcpu->stat.request_irq_exits;
 		}
-		
-		kvm_check_async_pf_completion(vcpu);
 
 		if (signal_pending(current)) {
 			r = -EINTR;
@@ -6108,7 +6106,6 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
 {
 	return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
 		!vcpu->arch.apf.halted)
-		|| !list_empty_careful(&vcpu->async_pf.done)
 		|| vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED
 		|| vcpu->arch.nmi_pending ||
 		(kvm_arch_interrupt_allowed(vcpu) &&
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ee4314e..0c1b7c5 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -90,7 +90,6 @@ struct kvm_async_pf {
 };
 
 void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu);
-void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu);
 int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
 		       struct kvm_arch_async_pf *arch);
 int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu);
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index 60df9e0..e213ca4 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -120,13 +120,13 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
 	vcpu->async_pf.queued = 0;
 }
 
-void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
+bool kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
 {
 	struct kvm_async_pf *work;
 
 	if (list_empty_careful(&vcpu->async_pf.done) ||
 	    !kvm_arch_can_inject_async_page_present(vcpu))
-		return;
+		return false;
 
 	spin_lock(&vcpu->async_pf.lock);
 	work = list_first_entry(&vcpu->async_pf.done, typeof(*work), link);
@@ -142,6 +142,8 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
 	if (work->page)
 		put_page(work->page);
 	kmem_cache_free(async_pf_cache, work);
+
+	return true;
 }
 
 int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
diff --git a/virt/kvm/async_pf.h b/virt/kvm/async_pf.h
index e7ef644..e21533c 100644
--- a/virt/kvm/async_pf.h
+++ b/virt/kvm/async_pf.h
@@ -27,10 +27,16 @@
 int kvm_async_pf_init(void);
 void kvm_async_pf_deinit(void);
 void kvm_async_pf_vcpu_init(struct kvm_vcpu *vcpu);
+bool kvm_check_async_pf_completion(struct kvm_vcpu *vcpu);
+
 #else
 #define kvm_async_pf_init() (0)
 #define kvm_async_pf_deinit() do{}while(0)
 #define kvm_async_pf_vcpu_init(C) do{}while(0)
+static inline bool kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
+{
+	return false;
+}
 #endif
 
 #endif
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 88d869e..d9aed28 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1347,7 +1347,8 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
 	for (;;) {
 		prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
 
-		if (kvm_arch_vcpu_runnable(vcpu)) {
+		if (kvm_arch_vcpu_runnable(vcpu) ||
+		     kvm_check_async_pf_completion(vcpu)) {
 			kvm_make_request(KVM_REQ_UNHALT, vcpu);
 			break;
 		}
-- 
1.7.0.4


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 5/8] KVM: don't touch vcpu stat after async pf is complete
  2010-10-27  9:01 [PATCH 1/8] KVM: fix tracing kvm_try_async_get_page Xiao Guangrong
                   ` (2 preceding siblings ...)
  2010-10-27  9:04 ` [PATCH 4/8] KVM: avoid unnecessary wait for a async pf Xiao Guangrong
@ 2010-10-27  9:05 ` Xiao Guangrong
  2010-10-27 10:44   ` Gleb Natapov
  2010-10-27  9:07 ` [PATCH 6/8] KVM: simply wakup async pf Xiao Guangrong
                   ` (3 subsequent siblings)
  7 siblings, 1 reply; 23+ messages in thread
From: Xiao Guangrong @ 2010-10-27  9:05 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Marcelo Tosatti, LKML, KVM

Don't make a KVM_REQ_UNHALT request after async pf is completed since it
can break guest's 'halt' instruction.

Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
---
 arch/x86/include/asm/kvm_host.h |    1 +
 arch/x86/kvm/x86.c              |    5 +++++
 virt/kvm/async_pf.c             |    1 +
 virt/kvm/kvm_main.c             |    7 +++++--
 4 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 1be0058..d01677b 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -822,6 +822,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
 				 struct kvm_async_pf *work);
 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
 			       struct kvm_async_pf *work);
+void kvm_arch_async_pf_completion(struct kvm_vcpu *vcpu);
 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu);
 extern bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn);
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0b2c420..c0e7ad0 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6280,6 +6280,11 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
 	}
 }
 
+void kvm_arch_async_pf_completion(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.apf.halted = false;
+}
+
 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
 {
 	if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED))
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index e213ca4..5307a32 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -142,6 +142,7 @@ bool kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
 	if (work->page)
 		put_page(work->page);
 	kmem_cache_free(async_pf_cache, work);
+	kvm_arch_async_pf_completion(vcpu);
 
 	return true;
 }
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index d9aed28..23a8b06 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1347,11 +1347,14 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
 	for (;;) {
 		prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
 
-		if (kvm_arch_vcpu_runnable(vcpu) ||
-		     kvm_check_async_pf_completion(vcpu)) {
+		if (kvm_arch_vcpu_runnable(vcpu)) {
 			kvm_make_request(KVM_REQ_UNHALT, vcpu);
 			break;
 		}
+
+		if (kvm_check_async_pf_completion(vcpu))
+			break;
+
 		if (kvm_cpu_has_pending_timer(vcpu))
 			break;
 		if (signal_pending(current))
-- 
1.7.0.4


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 6/8] KVM: simply wakup async pf
  2010-10-27  9:01 [PATCH 1/8] KVM: fix tracing kvm_try_async_get_page Xiao Guangrong
                   ` (3 preceding siblings ...)
  2010-10-27  9:05 ` [PATCH 5/8] KVM: don't touch vcpu stat after async pf is complete Xiao Guangrong
@ 2010-10-27  9:07 ` Xiao Guangrong
  2010-10-27 10:50   ` Gleb Natapov
  2010-10-27  9:09 ` [PATCH 7/8] KVM: make async_pf work queue lockless Xiao Guangrong
                   ` (2 subsequent siblings)
  7 siblings, 1 reply; 23+ messages in thread
From: Xiao Guangrong @ 2010-10-27  9:07 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Marcelo Tosatti, LKML, KVM

The current way is queued a complete async pf with:
	asyc_pf.page = bad_page
	async_pf.arch.gfn = 0

It has two problems while kvm_check_async_pf_completion handle this
async_pf:
- since !async_pf.page, it can retry a pseudo #PF
- it can delete gfn 0 from vcpu->arch.apf.gfns[]

Actually, we can simply record this wakeup request and let
kvm_check_async_pf_completion simply break the wait

Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
---
 include/linux/kvm_host.h |    1 +
 virt/kvm/async_pf.c      |   21 ++++++---------------
 2 files changed, 7 insertions(+), 15 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 0c1b7c5..d91add9 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -131,6 +131,7 @@ struct kvm_vcpu {
 		struct list_head queue;
 		struct list_head done;
 		spinlock_t lock;
+		bool wakeup;
 	} async_pf;
 #endif
 
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index 5307a32..0d1f6c4 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -124,6 +124,11 @@ bool kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
 {
 	struct kvm_async_pf *work;
 
+	if (vcpu->async_pf.wakeup) {
+		vcpu->async_pf.wakeup = false;
+		return true;
+	}
+
 	if (list_empty_careful(&vcpu->async_pf.done) ||
 	    !kvm_arch_can_inject_async_page_present(vcpu))
 		return false;
@@ -197,20 +202,6 @@ retry_sync:
 
 int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu)
 {
-	struct kvm_async_pf *work;
-
-	if (!list_empty(&vcpu->async_pf.done))
-		return 0;
-
-	work = kmem_cache_zalloc(async_pf_cache, GFP_ATOMIC);
-	if (!work)
-		return -ENOMEM;
-
-	work->page = bad_page;
-	get_page(bad_page);
-	INIT_LIST_HEAD(&work->queue); /* for list_del to work */
-
-	list_add_tail(&work->link, &vcpu->async_pf.done);
-	vcpu->async_pf.queued++;
+	vcpu->async_pf.wakeup = true;
 	return 0;
 }
-- 
1.7.0.4


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 7/8] KVM: make async_pf work queue lockless
  2010-10-27  9:01 [PATCH 1/8] KVM: fix tracing kvm_try_async_get_page Xiao Guangrong
                   ` (4 preceding siblings ...)
  2010-10-27  9:07 ` [PATCH 6/8] KVM: simply wakup async pf Xiao Guangrong
@ 2010-10-27  9:09 ` Xiao Guangrong
  2010-10-27 11:41   ` Gleb Natapov
  2010-10-27  9:10 ` [PATCH 8/8] KVM: add debugfs file to show the number of async pf Xiao Guangrong
  2010-10-27  9:59 ` [PATCH 1/8] KVM: fix tracing kvm_try_async_get_page Gleb Natapov
  7 siblings, 1 reply; 23+ messages in thread
From: Xiao Guangrong @ 2010-10-27  9:09 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Marcelo Tosatti, LKML, KVM

The async_pf number is very few since only pending interrupt can
let it re-enter to the guest mode.

During my test(Host 4 CPU + 4G, Guest 4 VCPU + 6G), it's no
more than 10 requests in the system.

So, we can only increase the completion counter in the work queue
context, and walk vcpu->async_pf.queue list to get all completed
async_pf

Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
---
 include/linux/kvm_host.h |    4 +--
 virt/kvm/async_pf.c      |   50 +++++++++++++++++++--------------------------
 2 files changed, 22 insertions(+), 32 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index d91add9..33c03c3 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -78,7 +78,6 @@ int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
 #ifdef CONFIG_KVM_ASYNC_PF
 struct kvm_async_pf {
 	struct work_struct work;
-	struct list_head link;
 	struct list_head queue;
 	struct kvm_vcpu *vcpu;
 	struct mm_struct *mm;
@@ -127,10 +126,9 @@ struct kvm_vcpu {
 
 #ifdef CONFIG_KVM_ASYNC_PF
 	struct {
+		atomic_t done;
 		u32 queued;
 		struct list_head queue;
-		struct list_head done;
-		spinlock_t lock;
 		bool wakeup;
 	} async_pf;
 #endif
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index 0d1f6c4..f10de1e 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -49,9 +49,7 @@ void kvm_async_pf_deinit(void)
 
 void kvm_async_pf_vcpu_init(struct kvm_vcpu *vcpu)
 {
-	INIT_LIST_HEAD(&vcpu->async_pf.done);
 	INIT_LIST_HEAD(&vcpu->async_pf.queue);
-	spin_lock_init(&vcpu->async_pf.lock);
 }
 
 static void async_pf_execute(struct work_struct *work)
@@ -72,11 +70,9 @@ static void async_pf_execute(struct work_struct *work)
 	up_read(&mm->mmap_sem);
 	unuse_mm(mm);
 
-	spin_lock(&vcpu->async_pf.lock);
-	list_add_tail(&apf->link, &vcpu->async_pf.done);
 	apf->page = page;
 	apf->done = true;
-	spin_unlock(&vcpu->async_pf.lock);
+	atomic_inc(&vcpu->async_pf.done);
 
 	/*
 	 * apf may be freed by kvm_check_async_pf_completion() after
@@ -101,52 +97,48 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
 				   typeof(*work), queue);
 		cancel_work_sync(&work->work);
 		list_del(&work->queue);
-		if (!work->done) /* work was canceled */
-			kmem_cache_free(async_pf_cache, work);
-	}
-
-	spin_lock(&vcpu->async_pf.lock);
-	while (!list_empty(&vcpu->async_pf.done)) {
-		struct kvm_async_pf *work =
-			list_entry(vcpu->async_pf.done.next,
-				   typeof(*work), link);
-		list_del(&work->link);
 		if (work->page)
 			put_page(work->page);
 		kmem_cache_free(async_pf_cache, work);
 	}
-	spin_unlock(&vcpu->async_pf.lock);
 
 	vcpu->async_pf.queued = 0;
+	atomic_set(&vcpu->async_pf.done, 0);
 }
 
 bool kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
 {
 	struct kvm_async_pf *work;
+	struct list_head *pos, *temp;
 
 	if (vcpu->async_pf.wakeup) {
 		vcpu->async_pf.wakeup = false;
 		return true;
 	}
 
-	if (list_empty_careful(&vcpu->async_pf.done) ||
+	if (!atomic_read(&vcpu->async_pf.done) ||
 	    !kvm_arch_can_inject_async_page_present(vcpu))
 		return false;
 
-	spin_lock(&vcpu->async_pf.lock);
-	work = list_first_entry(&vcpu->async_pf.done, typeof(*work), link);
-	list_del(&work->link);
-	spin_unlock(&vcpu->async_pf.lock);
+	list_for_each_safe(pos, temp, &vcpu->async_pf.queue) {
+		work = list_entry(pos, typeof(*work), queue);
+		if (!work->done)
+			continue;
 
-	if (work->page)
-		kvm_arch_async_page_ready(vcpu, work);
-	kvm_arch_async_page_present(vcpu, work);
+		if (work->page) {
+			kvm_arch_async_page_ready(vcpu, work);
+			put_page(work->page);
+		}
+
+		kvm_arch_async_page_present(vcpu, work);
+
+		list_del(&work->queue);
+		vcpu->async_pf.queued--;
+		kmem_cache_free(async_pf_cache, work);
+		if (atomic_dec_and_test(&vcpu->async_pf.done))
+			break;
+	}
 
-	list_del(&work->queue);
-	vcpu->async_pf.queued--;
-	if (work->page)
-		put_page(work->page);
-	kmem_cache_free(async_pf_cache, work);
 	kvm_arch_async_pf_completion(vcpu);
 
 	return true;
-- 
1.7.0.4


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 8/8] KVM: add debugfs file to show the number of async pf
  2010-10-27  9:01 [PATCH 1/8] KVM: fix tracing kvm_try_async_get_page Xiao Guangrong
                   ` (5 preceding siblings ...)
  2010-10-27  9:09 ` [PATCH 7/8] KVM: make async_pf work queue lockless Xiao Guangrong
@ 2010-10-27  9:10 ` Xiao Guangrong
  2010-10-27 10:58   ` Gleb Natapov
  2010-10-27  9:59 ` [PATCH 1/8] KVM: fix tracing kvm_try_async_get_page Gleb Natapov
  7 siblings, 1 reply; 23+ messages in thread
From: Xiao Guangrong @ 2010-10-27  9:10 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Marcelo Tosatti, LKML, KVM

It can help us to see the state of async pf

Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
---
 arch/x86/kvm/x86.c |    3 +++
 1 files changed, 3 insertions(+), 0 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c0e7ad0..2dd8059 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -88,6 +88,8 @@ static u64 __read_mostly efer_reserved_bits = 0xfffffffffffffffeULL;
 
 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
+#define VCPU_ASYNC_PF_STAT offsetof(struct kvm_vcpu, async_pf.queued), \
+							KVM_STAT_VCPU
 
 static void update_cr8_intercept(struct kvm_vcpu *vcpu);
 static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
@@ -141,6 +143,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) },
 	{ "irq_injections", VCPU_STAT(irq_injections) },
 	{ "nmi_injections", VCPU_STAT(nmi_injections) },
+	{ "async_pf_queued", VCPU_ASYNC_PF_STAT},
 	{ "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) },
 	{ "mmu_pte_write", VM_STAT(mmu_pte_write) },
 	{ "mmu_pte_updated", VM_STAT(mmu_pte_updated) },
-- 
1.7.0.4


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/8] KVM: fix tracing kvm_try_async_get_page
  2010-10-27  9:01 [PATCH 1/8] KVM: fix tracing kvm_try_async_get_page Xiao Guangrong
                   ` (6 preceding siblings ...)
  2010-10-27  9:10 ` [PATCH 8/8] KVM: add debugfs file to show the number of async pf Xiao Guangrong
@ 2010-10-27  9:59 ` Gleb Natapov
  7 siblings, 0 replies; 23+ messages in thread
From: Gleb Natapov @ 2010-10-27  9:59 UTC (permalink / raw)
  To: Xiao Guangrong; +Cc: Avi Kivity, Marcelo Tosatti, LKML, KVM

On Wed, Oct 27, 2010 at 05:01:49PM +0800, Xiao Guangrong wrote:
> Tracing 'async' and *pfn is useless, since 'async' is always true,
> and '*pfn' is always "fault_pfn'
> 
> We can trace 'gva' and 'gfn' instead, it can help us to see the
> life-cycle of an async_pf
> 
Make sense. This is leftover from earlier patches iteration where this
trace function was called when async==false too.
 
> Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
> ---
>  arch/x86/kvm/mmu.c         |    2 +-
>  include/trace/events/kvm.h |   12 +++++++-----
>  2 files changed, 8 insertions(+), 6 deletions(-)
> 
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index 7ab734d..7449803 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -2618,7 +2618,7 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool no_apf, gfn_t gfn,
>  	put_page(pfn_to_page(*pfn));
>  
>  	if (!no_apf && can_do_async_pf(vcpu)) {
> -		trace_kvm_try_async_get_page(async, *pfn);
> +		trace_kvm_try_async_get_page(gva, gfn);
>  		if (kvm_find_async_pf_gfn(vcpu, gfn)) {
>  			trace_kvm_async_pf_doublefault(gva, gfn);
>  			kvm_make_request(KVM_REQ_APF_HALT, vcpu);
> diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
> index 9c2cc6a..30063c6 100644
> --- a/include/trace/events/kvm.h
> +++ b/include/trace/events/kvm.h
> @@ -188,18 +188,20 @@ TRACE_EVENT(kvm_age_page,
>  #ifdef CONFIG_KVM_ASYNC_PF
>  TRACE_EVENT(
>  	kvm_try_async_get_page,
> -	TP_PROTO(bool async, u64 pfn),
> -	TP_ARGS(async, pfn),
> +	TP_PROTO(u64 gva, u64 gfn),
> +	TP_ARGS(gva, gfn),
>  
>  	TP_STRUCT__entry(
> -		__field(__u64, pfn)
> +		__field(u64, gva)
> +		__field(u64, gfn)
>  		),
>  
>  	TP_fast_assign(
> -		__entry->pfn = (!async) ? pfn : (u64)-1;
> +		__entry->gva = gva;
> +		__entry->gfn = gfn;
>  		),
>  
> -	TP_printk("pfn %#llx", __entry->pfn)
> +	TP_printk("gva = %#llx, gfn = %#llx", __entry->gva, __entry->gfn)
>  );
>  
>  TRACE_EVENT(
> -- 
> 1.7.0.4
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

--
			Gleb.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 2/8] KVM: cleanup aysnc_pf tracepoints
  2010-10-27  9:02 ` [PATCH 2/8] KVM: cleanup aysnc_pf tracepoints Xiao Guangrong
@ 2010-10-27 10:10   ` Gleb Natapov
  0 siblings, 0 replies; 23+ messages in thread
From: Gleb Natapov @ 2010-10-27 10:10 UTC (permalink / raw)
  To: Xiao Guangrong; +Cc: Avi Kivity, Marcelo Tosatti, LKML, KVM

On Wed, Oct 27, 2010 at 05:02:50PM +0800, Xiao Guangrong wrote:
> Use 'DECLARE_EVENT_CLASS' to cleanup async_pf tracepoints
> 
> Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Nice

Acked-by: Gleb Natapov <gleb@redhat.com>

> ---
>  include/trace/events/kvm.h |   76 ++++++++++++++++++++-----------------------
>  1 files changed, 35 insertions(+), 41 deletions(-)
> 
> diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
> index 30063c6..7bec396 100644
> --- a/include/trace/events/kvm.h
> +++ b/include/trace/events/kvm.h
> @@ -186,59 +186,71 @@ TRACE_EVENT(kvm_age_page,
>  );
>  
>  #ifdef CONFIG_KVM_ASYNC_PF
> -TRACE_EVENT(
> -	kvm_try_async_get_page,
> +DECLARE_EVENT_CLASS(kvm_async_get_page_class,
> +
>  	TP_PROTO(u64 gva, u64 gfn),
> +
>  	TP_ARGS(gva, gfn),
>  
>  	TP_STRUCT__entry(
> -		__field(u64, gva)
> +		__field(__u64, gva)
>  		__field(u64, gfn)
> -		),
> +	),
>  
>  	TP_fast_assign(
>  		__entry->gva = gva;
>  		__entry->gfn = gfn;
> -		),
> +	),
>  
>  	TP_printk("gva = %#llx, gfn = %#llx", __entry->gva, __entry->gfn)
>  );
>  
> -TRACE_EVENT(
> -	kvm_async_pf_not_present,
> +DEFINE_EVENT(kvm_async_get_page_class, kvm_try_async_get_page,
> +
> +	TP_PROTO(u64 gva, u64 gfn),
> +
> +	TP_ARGS(gva, gfn)
> +);
> +
> +DEFINE_EVENT(kvm_async_get_page_class, kvm_async_pf_doublefault,
> +
> +	TP_PROTO(u64 gva, u64 gfn),
> +
> +	TP_ARGS(gva, gfn)
> +);
> +
> +DECLARE_EVENT_CLASS(kvm_async_pf_nopresent_ready,
> +
>  	TP_PROTO(u64 token, u64 gva),
> +
>  	TP_ARGS(token, gva),
>  
>  	TP_STRUCT__entry(
>  		__field(__u64, token)
>  		__field(__u64, gva)
> -		),
> +	),
>  
>  	TP_fast_assign(
>  		__entry->token = token;
>  		__entry->gva = gva;
> -		),
> +	),
> +
> +	TP_printk("token %#llx gva %#llx", __entry->token, __entry->gva)
>  
> -	TP_printk("token %#llx gva %#llx not present", __entry->token,
> -		  __entry->gva)
>  );
>  
> -TRACE_EVENT(
> -	kvm_async_pf_ready,
> +DEFINE_EVENT(kvm_async_pf_nopresent_ready, kvm_async_pf_not_present,
> +
>  	TP_PROTO(u64 token, u64 gva),
> -	TP_ARGS(token, gva),
>  
> -	TP_STRUCT__entry(
> -		__field(__u64, token)
> -		__field(__u64, gva)
> -		),
> +	TP_ARGS(token, gva)
> +);
>  
> -	TP_fast_assign(
> -		__entry->token = token;
> -		__entry->gva = gva;
> -		),
> +DEFINE_EVENT(kvm_async_pf_nopresent_ready, kvm_async_pf_ready,
> +
> +	TP_PROTO(u64 token, u64 gva),
>  
> -	TP_printk("token %#llx gva %#llx ready", __entry->token, __entry->gva)
> +	TP_ARGS(token, gva)
>  );
>  
>  TRACE_EVENT(
> @@ -262,24 +274,6 @@ TRACE_EVENT(
>  		  __entry->address, __entry->pfn)
>  );
>  
> -TRACE_EVENT(
> -	kvm_async_pf_doublefault,
> -	TP_PROTO(u64 gva, u64 gfn),
> -	TP_ARGS(gva, gfn),
> -
> -	TP_STRUCT__entry(
> -		__field(u64, gva)
> -		__field(u64, gfn)
> -		),
> -
> -	TP_fast_assign(
> -		__entry->gva = gva;
> -		__entry->gfn = gfn;
> -		),
> -
> -	TP_printk("gva = %#llx, gfn = %#llx", __entry->gva, __entry->gfn)
> -);
> -
>  #endif
>  
>  #endif /* _TRACE_KVM_MAIN_H */
> -- 
> 1.7.0.4
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

--
			Gleb.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 3/8] KVM: fix searching async gfn in kvm_async_pf_gfn_slot
  2010-10-27  9:03 ` [PATCH 3/8] KVM: fix searching async gfn in kvm_async_pf_gfn_slot Xiao Guangrong
@ 2010-10-27 10:29   ` Gleb Natapov
  0 siblings, 0 replies; 23+ messages in thread
From: Gleb Natapov @ 2010-10-27 10:29 UTC (permalink / raw)
  To: Xiao Guangrong; +Cc: Avi Kivity, Marcelo Tosatti, LKML, KVM

On Wed, Oct 27, 2010 at 05:03:33PM +0800, Xiao Guangrong wrote:
> The 'gfn' is not recorded if the next slot is empty
> 
This function is not used for gfn recording, only to search for recorded
gfn, but without the fix it walks over all entries if gfn is not in the
hash.

> Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Acked-by: Gleb Natapov <gleb@redhat.com>

> ---
>  arch/x86/kvm/x86.c |    4 ++--
>  1 files changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 7876ec8..16f42ff 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -6207,8 +6207,8 @@ static u32 kvm_async_pf_gfn_slot(struct kvm_vcpu *vcpu, gfn_t gfn)
>  	u32 key = kvm_async_pf_hash_fn(gfn);
>  
>  	for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU) &&
> -		     (vcpu->arch.apf.gfns[key] != gfn ||
> -		      vcpu->arch.apf.gfns[key] == ~0); i++)
> +		     (vcpu->arch.apf.gfns[key] != gfn &&
> +		      vcpu->arch.apf.gfns[key] != ~0); i++)
>  		key = kvm_async_pf_next_probe(key);
>  
>  	return key;
> -- 
> 1.7.0.4
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

--
			Gleb.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 4/8] KVM: avoid unnecessary wait for a async pf
  2010-10-27  9:04 ` [PATCH 4/8] KVM: avoid unnecessary wait for a async pf Xiao Guangrong
@ 2010-10-27 10:42   ` Gleb Natapov
  2010-10-28  7:06     ` Xiao Guangrong
  0 siblings, 1 reply; 23+ messages in thread
From: Gleb Natapov @ 2010-10-27 10:42 UTC (permalink / raw)
  To: Xiao Guangrong; +Cc: Avi Kivity, Marcelo Tosatti, LKML, KVM

On Wed, Oct 27, 2010 at 05:04:58PM +0800, Xiao Guangrong wrote:
> In current code, it checks async pf completion out of the wait context,
> like this:
> 
> if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
> 		    !vcpu->arch.apf.halted)
> 			r = vcpu_enter_guest(vcpu);
> 		else {
> 			......
> 			kvm_vcpu_block(vcpu)
> 			 ^- waiting until 'async_pf.done' is not empty
> }
> 	
> kvm_check_async_pf_completion(vcpu)
>  ^- delete list from async_pf.done
> 
> So, if we check aysnc pf completion first, it can be blocked at
> kvm_vcpu_block
> 
Correct, but it can be fixed by adding vcpu->arch.apf.halted = false; to
kvm_arch_async_page_present(), no?
Adding kvm_check_async_pf_completion() to arch independent kvm_vcpu_block()
constrains how other archs may implement async pf support IMO.
 
> Fixed by move the check into wait context
> 
> Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
> ---
>  arch/x86/kvm/x86.c       |    3 ---
>  include/linux/kvm_host.h |    1 -
>  virt/kvm/async_pf.c      |    6 ++++--
>  virt/kvm/async_pf.h      |    6 ++++++
>  virt/kvm/kvm_main.c      |    3 ++-
>  5 files changed, 12 insertions(+), 7 deletions(-)
> 
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 16f42ff..0b2c420 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -5321,8 +5321,6 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
>  			vcpu->run->exit_reason = KVM_EXIT_INTR;
>  			++vcpu->stat.request_irq_exits;
>  		}
> -		
> -		kvm_check_async_pf_completion(vcpu);
>  
>  		if (signal_pending(current)) {
>  			r = -EINTR;
> @@ -6108,7 +6106,6 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
>  {
>  	return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
>  		!vcpu->arch.apf.halted)
> -		|| !list_empty_careful(&vcpu->async_pf.done)
>  		|| vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED
>  		|| vcpu->arch.nmi_pending ||
>  		(kvm_arch_interrupt_allowed(vcpu) &&
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index ee4314e..0c1b7c5 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -90,7 +90,6 @@ struct kvm_async_pf {
>  };
>  
>  void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu);
> -void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu);
>  int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
>  		       struct kvm_arch_async_pf *arch);
>  int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu);
> diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
> index 60df9e0..e213ca4 100644
> --- a/virt/kvm/async_pf.c
> +++ b/virt/kvm/async_pf.c
> @@ -120,13 +120,13 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
>  	vcpu->async_pf.queued = 0;
>  }
>  
> -void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
> +bool kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
>  {
>  	struct kvm_async_pf *work;
>  
>  	if (list_empty_careful(&vcpu->async_pf.done) ||
>  	    !kvm_arch_can_inject_async_page_present(vcpu))
> -		return;
> +		return false;
>  
>  	spin_lock(&vcpu->async_pf.lock);
>  	work = list_first_entry(&vcpu->async_pf.done, typeof(*work), link);
> @@ -142,6 +142,8 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
>  	if (work->page)
>  		put_page(work->page);
>  	kmem_cache_free(async_pf_cache, work);
> +
> +	return true;
>  }
>  
>  int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
> diff --git a/virt/kvm/async_pf.h b/virt/kvm/async_pf.h
> index e7ef644..e21533c 100644
> --- a/virt/kvm/async_pf.h
> +++ b/virt/kvm/async_pf.h
> @@ -27,10 +27,16 @@
>  int kvm_async_pf_init(void);
>  void kvm_async_pf_deinit(void);
>  void kvm_async_pf_vcpu_init(struct kvm_vcpu *vcpu);
> +bool kvm_check_async_pf_completion(struct kvm_vcpu *vcpu);
> +
>  #else
>  #define kvm_async_pf_init() (0)
>  #define kvm_async_pf_deinit() do{}while(0)
>  #define kvm_async_pf_vcpu_init(C) do{}while(0)
> +static inline bool kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
> +{
> +	return false;
> +}
>  #endif
>  
>  #endif
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 88d869e..d9aed28 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -1347,7 +1347,8 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
>  	for (;;) {
>  		prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
>  
> -		if (kvm_arch_vcpu_runnable(vcpu)) {
> +		if (kvm_arch_vcpu_runnable(vcpu) ||
> +		     kvm_check_async_pf_completion(vcpu)) {
>  			kvm_make_request(KVM_REQ_UNHALT, vcpu);
>  			break;
>  		}
> -- 
> 1.7.0.4
> 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
			Gleb.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 5/8] KVM: don't touch vcpu stat after async pf is complete
  2010-10-27  9:05 ` [PATCH 5/8] KVM: don't touch vcpu stat after async pf is complete Xiao Guangrong
@ 2010-10-27 10:44   ` Gleb Natapov
  2010-10-28  7:35     ` Xiao Guangrong
  0 siblings, 1 reply; 23+ messages in thread
From: Gleb Natapov @ 2010-10-27 10:44 UTC (permalink / raw)
  To: Xiao Guangrong; +Cc: Avi Kivity, Marcelo Tosatti, LKML, KVM

On Wed, Oct 27, 2010 at 05:05:57PM +0800, Xiao Guangrong wrote:
> Don't make a KVM_REQ_UNHALT request after async pf is completed since it
> can break guest's 'halt' instruction.
> 
Why is it a problem? CPU may be unhalted by different events so OS
shouldn't depend on it.

> Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
> ---
>  arch/x86/include/asm/kvm_host.h |    1 +
>  arch/x86/kvm/x86.c              |    5 +++++
>  virt/kvm/async_pf.c             |    1 +
>  virt/kvm/kvm_main.c             |    7 +++++--
>  4 files changed, 12 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 1be0058..d01677b 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -822,6 +822,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
>  				 struct kvm_async_pf *work);
>  void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
>  			       struct kvm_async_pf *work);
> +void kvm_arch_async_pf_completion(struct kvm_vcpu *vcpu);
>  bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu);
>  extern bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn);
>  
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 0b2c420..c0e7ad0 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -6280,6 +6280,11 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
>  	}
>  }
>  
> +void kvm_arch_async_pf_completion(struct kvm_vcpu *vcpu)
> +{
> +	vcpu->arch.apf.halted = false;
> +}
> +
>  bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
>  {
>  	if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED))
> diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
> index e213ca4..5307a32 100644
> --- a/virt/kvm/async_pf.c
> +++ b/virt/kvm/async_pf.c
> @@ -142,6 +142,7 @@ bool kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
>  	if (work->page)
>  		put_page(work->page);
>  	kmem_cache_free(async_pf_cache, work);
> +	kvm_arch_async_pf_completion(vcpu);
>  
>  	return true;
>  }
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index d9aed28..23a8b06 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -1347,11 +1347,14 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
>  	for (;;) {
>  		prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
>  
> -		if (kvm_arch_vcpu_runnable(vcpu) ||
> -		     kvm_check_async_pf_completion(vcpu)) {
> +		if (kvm_arch_vcpu_runnable(vcpu)) {
>  			kvm_make_request(KVM_REQ_UNHALT, vcpu);
>  			break;
>  		}
> +
> +		if (kvm_check_async_pf_completion(vcpu))
> +			break;
> +
>  		if (kvm_cpu_has_pending_timer(vcpu))
>  			break;
>  		if (signal_pending(current))
> -- 
> 1.7.0.4
> 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
			Gleb.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 6/8] KVM: simply wakup async pf
  2010-10-27  9:07 ` [PATCH 6/8] KVM: simply wakup async pf Xiao Guangrong
@ 2010-10-27 10:50   ` Gleb Natapov
  2010-10-28  7:59     ` Xiao Guangrong
  0 siblings, 1 reply; 23+ messages in thread
From: Gleb Natapov @ 2010-10-27 10:50 UTC (permalink / raw)
  To: Xiao Guangrong; +Cc: Avi Kivity, Marcelo Tosatti, LKML, KVM

On Wed, Oct 27, 2010 at 05:07:32PM +0800, Xiao Guangrong wrote:
> The current way is queued a complete async pf with:
> 	asyc_pf.page = bad_page
> 	async_pf.arch.gfn = 0
> 
> It has two problems while kvm_check_async_pf_completion handle this
> async_pf:
> - since !async_pf.page, it can retry a pseudo #PF
kvm_arch_async_page_ready checks for is_error_page()

> - it can delete gfn 0 from vcpu->arch.apf.gfns[]
kvm_arch_async_page_present() checks for is_error_page() too and,
in case of PV guest, injects special token if it is true. 

After your patch special token will not be injected and migration will
not work.

> Actually, we can simply record this wakeup request and let
> kvm_check_async_pf_completion simply break the wait
> 
May be wakeup_all function naming is misleading. It means wake up all PV
guest processes by sending broadcast async pf notification. It is not
about waking host vcpu thread.

> Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
> ---
>  include/linux/kvm_host.h |    1 +
>  virt/kvm/async_pf.c      |   21 ++++++---------------
>  2 files changed, 7 insertions(+), 15 deletions(-)
> 
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 0c1b7c5..d91add9 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -131,6 +131,7 @@ struct kvm_vcpu {
>  		struct list_head queue;
>  		struct list_head done;
>  		spinlock_t lock;
> +		bool wakeup;
>  	} async_pf;
>  #endif
>  
> diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
> index 5307a32..0d1f6c4 100644
> --- a/virt/kvm/async_pf.c
> +++ b/virt/kvm/async_pf.c
> @@ -124,6 +124,11 @@ bool kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
>  {
>  	struct kvm_async_pf *work;
>  
> +	if (vcpu->async_pf.wakeup) {
> +		vcpu->async_pf.wakeup = false;
> +		return true;
> +	}
> +
>  	if (list_empty_careful(&vcpu->async_pf.done) ||
>  	    !kvm_arch_can_inject_async_page_present(vcpu))
>  		return false;
> @@ -197,20 +202,6 @@ retry_sync:
>  
>  int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu)
>  {
> -	struct kvm_async_pf *work;
> -
> -	if (!list_empty(&vcpu->async_pf.done))
> -		return 0;
> -
> -	work = kmem_cache_zalloc(async_pf_cache, GFP_ATOMIC);
> -	if (!work)
> -		return -ENOMEM;
> -
> -	work->page = bad_page;
> -	get_page(bad_page);
> -	INIT_LIST_HEAD(&work->queue); /* for list_del to work */
> -
> -	list_add_tail(&work->link, &vcpu->async_pf.done);
> -	vcpu->async_pf.queued++;
> +	vcpu->async_pf.wakeup = true;
>  	return 0;
>  }
> -- 
> 1.7.0.4
> 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
			Gleb.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 8/8] KVM: add debugfs file to show the number of async pf
  2010-10-27  9:10 ` [PATCH 8/8] KVM: add debugfs file to show the number of async pf Xiao Guangrong
@ 2010-10-27 10:58   ` Gleb Natapov
  2010-10-28  9:09     ` Xiao Guangrong
  0 siblings, 1 reply; 23+ messages in thread
From: Gleb Natapov @ 2010-10-27 10:58 UTC (permalink / raw)
  To: Xiao Guangrong; +Cc: Avi Kivity, Marcelo Tosatti, LKML, KVM

On Wed, Oct 27, 2010 at 05:10:51PM +0800, Xiao Guangrong wrote:
> It can help us to see the state of async pf
> 
I have patch to add three async pf statistics:
apf_not_present
apf_present
apf_doublefault

But Avi now wants to deprecate debugfs interface completely and move
towards ftrace, so I had to drop it.

> Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
> ---
>  arch/x86/kvm/x86.c |    3 +++
>  1 files changed, 3 insertions(+), 0 deletions(-)
> 
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index c0e7ad0..2dd8059 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -88,6 +88,8 @@ static u64 __read_mostly efer_reserved_bits = 0xfffffffffffffffeULL;
>  
>  #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
>  #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
> +#define VCPU_ASYNC_PF_STAT offsetof(struct kvm_vcpu, async_pf.queued), \
> +							KVM_STAT_VCPU
>  
>  static void update_cr8_intercept(struct kvm_vcpu *vcpu);
>  static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
> @@ -141,6 +143,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
>  	{ "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) },
>  	{ "irq_injections", VCPU_STAT(irq_injections) },
>  	{ "nmi_injections", VCPU_STAT(nmi_injections) },
> +	{ "async_pf_queued", VCPU_ASYNC_PF_STAT},
>  	{ "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) },
>  	{ "mmu_pte_write", VM_STAT(mmu_pte_write) },
>  	{ "mmu_pte_updated", VM_STAT(mmu_pte_updated) },
> -- 
> 1.7.0.4
> 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
			Gleb.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 7/8] KVM: make async_pf work queue lockless
  2010-10-27  9:09 ` [PATCH 7/8] KVM: make async_pf work queue lockless Xiao Guangrong
@ 2010-10-27 11:41   ` Gleb Natapov
  2010-10-28  9:08     ` Xiao Guangrong
  0 siblings, 1 reply; 23+ messages in thread
From: Gleb Natapov @ 2010-10-27 11:41 UTC (permalink / raw)
  To: Xiao Guangrong; +Cc: Avi Kivity, Marcelo Tosatti, LKML, KVM

On Wed, Oct 27, 2010 at 05:09:41PM +0800, Xiao Guangrong wrote:
> The async_pf number is very few since only pending interrupt can
> let it re-enter to the guest mode.
> 
> During my test(Host 4 CPU + 4G, Guest 4 VCPU + 6G), it's no
> more than 10 requests in the system.
> 
> So, we can only increase the completion counter in the work queue
> context, and walk vcpu->async_pf.queue list to get all completed
> async_pf
> 
That depends on the load. I used memory cgroups to create very big
memory pressure and I saw hundreds of apfs per second. We shouldn't
optimize for very low numbers. With vcpu->async_pf.queue having more
then one element I am not sure your patch is beneficial.

> Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
> ---
>  include/linux/kvm_host.h |    4 +--
>  virt/kvm/async_pf.c      |   50 +++++++++++++++++++--------------------------
>  2 files changed, 22 insertions(+), 32 deletions(-)
> 
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index d91add9..33c03c3 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -78,7 +78,6 @@ int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
>  #ifdef CONFIG_KVM_ASYNC_PF
>  struct kvm_async_pf {
>  	struct work_struct work;
> -	struct list_head link;
>  	struct list_head queue;
>  	struct kvm_vcpu *vcpu;
>  	struct mm_struct *mm;
> @@ -127,10 +126,9 @@ struct kvm_vcpu {
>  
>  #ifdef CONFIG_KVM_ASYNC_PF
>  	struct {
> +		atomic_t done;
>  		u32 queued;
>  		struct list_head queue;
> -		struct list_head done;
> -		spinlock_t lock;
>  		bool wakeup;
>  	} async_pf;
>  #endif
> diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
> index 0d1f6c4..f10de1e 100644
> --- a/virt/kvm/async_pf.c
> +++ b/virt/kvm/async_pf.c
> @@ -49,9 +49,7 @@ void kvm_async_pf_deinit(void)
>  
>  void kvm_async_pf_vcpu_init(struct kvm_vcpu *vcpu)
>  {
> -	INIT_LIST_HEAD(&vcpu->async_pf.done);
>  	INIT_LIST_HEAD(&vcpu->async_pf.queue);
> -	spin_lock_init(&vcpu->async_pf.lock);
>  }
>  
>  static void async_pf_execute(struct work_struct *work)
> @@ -72,11 +70,9 @@ static void async_pf_execute(struct work_struct *work)
>  	up_read(&mm->mmap_sem);
>  	unuse_mm(mm);
>  
> -	spin_lock(&vcpu->async_pf.lock);
> -	list_add_tail(&apf->link, &vcpu->async_pf.done);
>  	apf->page = page;
>  	apf->done = true;
> -	spin_unlock(&vcpu->async_pf.lock);
> +	atomic_inc(&vcpu->async_pf.done);
>  
>  	/*
>  	 * apf may be freed by kvm_check_async_pf_completion() after
> @@ -101,52 +97,48 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
>  				   typeof(*work), queue);
>  		cancel_work_sync(&work->work);
>  		list_del(&work->queue);
> -		if (!work->done) /* work was canceled */
> -			kmem_cache_free(async_pf_cache, work);
> -	}
> -
> -	spin_lock(&vcpu->async_pf.lock);
> -	while (!list_empty(&vcpu->async_pf.done)) {
> -		struct kvm_async_pf *work =
> -			list_entry(vcpu->async_pf.done.next,
> -				   typeof(*work), link);
> -		list_del(&work->link);
>  		if (work->page)
>  			put_page(work->page);
>  		kmem_cache_free(async_pf_cache, work);
>  	}
> -	spin_unlock(&vcpu->async_pf.lock);
>  
>  	vcpu->async_pf.queued = 0;
> +	atomic_set(&vcpu->async_pf.done, 0);
>  }
>  
>  bool kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
>  {
>  	struct kvm_async_pf *work;
> +	struct list_head *pos, *temp;
>  
>  	if (vcpu->async_pf.wakeup) {
>  		vcpu->async_pf.wakeup = false;
>  		return true;
>  	}
>  
> -	if (list_empty_careful(&vcpu->async_pf.done) ||
> +	if (!atomic_read(&vcpu->async_pf.done) ||
>  	    !kvm_arch_can_inject_async_page_present(vcpu))
>  		return false;
>  
> -	spin_lock(&vcpu->async_pf.lock);
> -	work = list_first_entry(&vcpu->async_pf.done, typeof(*work), link);
> -	list_del(&work->link);
> -	spin_unlock(&vcpu->async_pf.lock);
> +	list_for_each_safe(pos, temp, &vcpu->async_pf.queue) {
> +		work = list_entry(pos, typeof(*work), queue);
> +		if (!work->done)
> +			continue;
>  
> -	if (work->page)
> -		kvm_arch_async_page_ready(vcpu, work);
> -	kvm_arch_async_page_present(vcpu, work);
> +		if (work->page) {
> +			kvm_arch_async_page_ready(vcpu, work);
> +			put_page(work->page);
> +		}
> +
> +		kvm_arch_async_page_present(vcpu, work);
> +
> +		list_del(&work->queue);
> +		vcpu->async_pf.queued--;
> +		kmem_cache_free(async_pf_cache, work);
> +		if (atomic_dec_and_test(&vcpu->async_pf.done))
> +			break;
You should do atomic_dec() and always break. We cannot inject two apfs during
one vcpu entry.

> +	}
>  
> -	list_del(&work->queue);
> -	vcpu->async_pf.queued--;
> -	if (work->page)
> -		put_page(work->page);
> -	kmem_cache_free(async_pf_cache, work);
>  	kvm_arch_async_pf_completion(vcpu);
>  
>  	return true;
> -- 
> 1.7.0.4
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

--
			Gleb.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 4/8] KVM: avoid unnecessary wait for a async pf
  2010-10-27 10:42   ` Gleb Natapov
@ 2010-10-28  7:06     ` Xiao Guangrong
  0 siblings, 0 replies; 23+ messages in thread
From: Xiao Guangrong @ 2010-10-28  7:06 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: Avi Kivity, Marcelo Tosatti, LKML, KVM

On 10/27/2010 06:42 PM, Gleb Natapov wrote:
> On Wed, Oct 27, 2010 at 05:04:58PM +0800, Xiao Guangrong wrote:
>> In current code, it checks async pf completion out of the wait context,
>> like this:
>>
>> if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
>> 		    !vcpu->arch.apf.halted)
>> 			r = vcpu_enter_guest(vcpu);
>> 		else {
>> 			......
>> 			kvm_vcpu_block(vcpu)
>> 			 ^- waiting until 'async_pf.done' is not empty
>> }
>> 	
>> kvm_check_async_pf_completion(vcpu)
>>  ^- delete list from async_pf.done
>>
>> So, if we check aysnc pf completion first, it can be blocked at
>> kvm_vcpu_block
>>
> Correct, but it can be fixed by adding vcpu->arch.apf.halted = false; to
> kvm_arch_async_page_present(), no?
> Adding kvm_check_async_pf_completion() to arch independent kvm_vcpu_block()
> constrains how other archs may implement async pf support IMO.
>  

Um, i think it's reasonable, will fix it address your comment.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 5/8] KVM: don't touch vcpu stat after async pf is complete
  2010-10-27 10:44   ` Gleb Natapov
@ 2010-10-28  7:35     ` Xiao Guangrong
  2010-10-28  7:41       ` Gleb Natapov
  0 siblings, 1 reply; 23+ messages in thread
From: Xiao Guangrong @ 2010-10-28  7:35 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: Avi Kivity, Marcelo Tosatti, LKML, KVM

On 10/27/2010 06:44 PM, Gleb Natapov wrote:
> On Wed, Oct 27, 2010 at 05:05:57PM +0800, Xiao Guangrong wrote:
>> Don't make a KVM_REQ_UNHALT request after async pf is completed since it
>> can break guest's 'halt' instruction.
>>
> Why is it a problem? CPU may be unhalted by different events so OS
> shouldn't depend on it.
> 

We don't know how guest OS handles it after HLT instruction is completed,
according to X86's spec, only NMI/INTR/RESET/INIT/SMI can break halt state,
it violations the hardware behavior if we allow other event break this
state. Your opinion? :-)

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 5/8] KVM: don't touch vcpu stat after async pf is complete
  2010-10-28  7:35     ` Xiao Guangrong
@ 2010-10-28  7:41       ` Gleb Natapov
  0 siblings, 0 replies; 23+ messages in thread
From: Gleb Natapov @ 2010-10-28  7:41 UTC (permalink / raw)
  To: Xiao Guangrong; +Cc: Avi Kivity, Marcelo Tosatti, LKML, KVM

On Thu, Oct 28, 2010 at 03:35:13PM +0800, Xiao Guangrong wrote:
> On 10/27/2010 06:44 PM, Gleb Natapov wrote:
> > On Wed, Oct 27, 2010 at 05:05:57PM +0800, Xiao Guangrong wrote:
> >> Don't make a KVM_REQ_UNHALT request after async pf is completed since it
> >> can break guest's 'halt' instruction.
> >>
> > Why is it a problem? CPU may be unhalted by different events so OS
> > shouldn't depend on it.
> > 
> 
> We don't know how guest OS handles it after HLT instruction is completed,
> according to X86's spec, only NMI/INTR/RESET/INIT/SMI can break halt state,
> it violations the hardware behavior if we allow other event break this
> state. Your opinion? :-)
I agree in principle, but since SMI (which is completely out of guest OS
control) can cause CPU to exit halt, in practice OS can't rely on CPU to
be unhalted only by events controlled by OS itself. In the past we had a
bug that any timer even unhalted vcpu even when timer interrupt was masked.
The only practical problem it caused was that vcpu that executed cli;
1: hlt; jmp 1b sequence still consumed host cpu time. That said I am not
against fixing it if the fix is easy. Your current fix though relies on
patch 4 that I have problem with. 

--
			Gleb.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 6/8] KVM: simply wakup async pf
  2010-10-27 10:50   ` Gleb Natapov
@ 2010-10-28  7:59     ` Xiao Guangrong
  0 siblings, 0 replies; 23+ messages in thread
From: Xiao Guangrong @ 2010-10-28  7:59 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: Avi Kivity, Marcelo Tosatti, LKML, KVM

On 10/27/2010 06:50 PM, Gleb Natapov wrote:
> On Wed, Oct 27, 2010 at 05:07:32PM +0800, Xiao Guangrong wrote:
>> The current way is queued a complete async pf with:
>> 	asyc_pf.page = bad_page
>> 	async_pf.arch.gfn = 0
>>
>> It has two problems while kvm_check_async_pf_completion handle this
>> async_pf:
>> - since !async_pf.page, it can retry a pseudo #PF
> kvm_arch_async_page_ready checks for is_error_page()
> 
>> - it can delete gfn 0 from vcpu->arch.apf.gfns[]
> kvm_arch_async_page_present() checks for is_error_page() too and,
> in case of PV guest, injects special token if it is true. 
> 

Ah, sorry for my stupid questions.

> After your patch special token will not be injected and migration will
> not work.
> 
>> Actually, we can simply record this wakeup request and let
>> kvm_check_async_pf_completion simply break the wait
>>
> May be wakeup_all function naming is misleading. It means wake up all PV
> guest processes by sending broadcast async pf notification. It is not
> about waking host vcpu thread.
> 

I'm not good at the KVM PV way, i'll dig into it, please ignore this patch,
thanks.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 7/8] KVM: make async_pf work queue lockless
  2010-10-27 11:41   ` Gleb Natapov
@ 2010-10-28  9:08     ` Xiao Guangrong
  2010-10-28  9:14       ` Gleb Natapov
  0 siblings, 1 reply; 23+ messages in thread
From: Xiao Guangrong @ 2010-10-28  9:08 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: Avi Kivity, Marcelo Tosatti, LKML, KVM

On 10/27/2010 07:41 PM, Gleb Natapov wrote:
> On Wed, Oct 27, 2010 at 05:09:41PM +0800, Xiao Guangrong wrote:
>> The async_pf number is very few since only pending interrupt can
>> let it re-enter to the guest mode.
>>
>> During my test(Host 4 CPU + 4G, Guest 4 VCPU + 6G), it's no
>> more than 10 requests in the system.
>>
>> So, we can only increase the completion counter in the work queue
>> context, and walk vcpu->async_pf.queue list to get all completed
>> async_pf
>>
> That depends on the load. I used memory cgroups to create very big
> memory pressure and I saw hundreds of apfs per second. We shouldn't
> optimize for very low numbers. With vcpu->async_pf.queue having more
> then one element I am not sure your patch is beneficial.
> 

Maybe we need a new no-lock way to record the complete apfs, i'll reproduce
your test environment and improve it.

>> +
>> +		list_del(&work->queue);
>> +		vcpu->async_pf.queued--;
>> +		kmem_cache_free(async_pf_cache, work);
>> +		if (atomic_dec_and_test(&vcpu->async_pf.done))
>> +			break;
> You should do atomic_dec() and always break. We cannot inject two apfs during
> one vcpu entry.
> 

Sorry, i'm little confused. 

Why 'atomic_dec_and_test(&vcpu->async_pf.done)' always break? async_pf.done is used to
record the complete apfs and many apfs may be completed when vcpu enters guest mode(it
means vcpu->async_pf.done > 1)

Look at the current code:

void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
{
	......
	spin_lock(&vcpu->async_pf.lock);
	work = list_first_entry(&vcpu->async_pf.done, typeof(*work), link);
	list_del(&work->link);
	spin_unlock(&vcpu->async_pf.lock);
	......
}

You only handle one complete apf, why we inject them at once? I missed something? :-(


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 8/8] KVM: add debugfs file to show the number of async pf
  2010-10-27 10:58   ` Gleb Natapov
@ 2010-10-28  9:09     ` Xiao Guangrong
  0 siblings, 0 replies; 23+ messages in thread
From: Xiao Guangrong @ 2010-10-28  9:09 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: Avi Kivity, Marcelo Tosatti, LKML, KVM

On 10/27/2010 06:58 PM, Gleb Natapov wrote:
> On Wed, Oct 27, 2010 at 05:10:51PM +0800, Xiao Guangrong wrote:
>> It can help us to see the state of async pf
>>
> I have patch to add three async pf statistics:
> apf_not_present
> apf_present
> apf_doublefault
> 
> But Avi now wants to deprecate debugfs interface completely and move
> towards ftrace, so I had to drop it.
> 

OK, let's ignore this patch, thanks :-) 

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 7/8] KVM: make async_pf work queue lockless
  2010-10-28  9:08     ` Xiao Guangrong
@ 2010-10-28  9:14       ` Gleb Natapov
  0 siblings, 0 replies; 23+ messages in thread
From: Gleb Natapov @ 2010-10-28  9:14 UTC (permalink / raw)
  To: Xiao Guangrong; +Cc: Avi Kivity, Marcelo Tosatti, LKML, KVM

On Thu, Oct 28, 2010 at 05:08:58PM +0800, Xiao Guangrong wrote:
> On 10/27/2010 07:41 PM, Gleb Natapov wrote:
> > On Wed, Oct 27, 2010 at 05:09:41PM +0800, Xiao Guangrong wrote:
> >> The async_pf number is very few since only pending interrupt can
> >> let it re-enter to the guest mode.
> >>
> >> During my test(Host 4 CPU + 4G, Guest 4 VCPU + 6G), it's no
> >> more than 10 requests in the system.
> >>
> >> So, we can only increase the completion counter in the work queue
> >> context, and walk vcpu->async_pf.queue list to get all completed
> >> async_pf
> >>
> > That depends on the load. I used memory cgroups to create very big
> > memory pressure and I saw hundreds of apfs per second. We shouldn't
> > optimize for very low numbers. With vcpu->async_pf.queue having more
> > then one element I am not sure your patch is beneficial.
> > 
> 
> Maybe we need a new no-lock way to record the complete apfs, i'll reproduce
> your test environment and improve it.
> 
That is always welcomed :)

> >> +
> >> +		list_del(&work->queue);
> >> +		vcpu->async_pf.queued--;
> >> +		kmem_cache_free(async_pf_cache, work);
> >> +		if (atomic_dec_and_test(&vcpu->async_pf.done))
> >> +			break;
> > You should do atomic_dec() and always break. We cannot inject two apfs during
> > one vcpu entry.
> > 
> 
> Sorry, i'm little confused. 
> 
> Why 'atomic_dec_and_test(&vcpu->async_pf.done)' always break? async_pf.done is used to
In your code it is not, but it should (at least if guest is PV, read
below).

> record the complete apfs and many apfs may be completed when vcpu enters guest mode(it
> means vcpu->async_pf.done > 1)
> 
Correct, but only one apf should be handled on each vcpu entry in case
of PV guest. Look at kvm_arch_async_page_present(vcpu, work); that is called
in a loop in your code. If vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED
is not null it injects exception into the guest. You can't inject more
then one exception on each guest entry. If guest is not PV you are
correct that we can loop here until vcpu->async_pf.done == 0.

> Look at the current code:
> 
> void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
> {
> 	......
> 	spin_lock(&vcpu->async_pf.lock);
> 	work = list_first_entry(&vcpu->async_pf.done, typeof(*work), link);
> 	list_del(&work->link);
> 	spin_unlock(&vcpu->async_pf.lock);
> 	......
> }
> 
> You only handle one complete apf, why we inject them at once? I missed something? :-(

--
			Gleb.

^ permalink raw reply	[flat|nested] 23+ messages in thread

end of thread, other threads:[~2010-10-28  9:14 UTC | newest]

Thread overview: 23+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-10-27  9:01 [PATCH 1/8] KVM: fix tracing kvm_try_async_get_page Xiao Guangrong
2010-10-27  9:02 ` [PATCH 2/8] KVM: cleanup aysnc_pf tracepoints Xiao Guangrong
2010-10-27 10:10   ` Gleb Natapov
2010-10-27  9:03 ` [PATCH 3/8] KVM: fix searching async gfn in kvm_async_pf_gfn_slot Xiao Guangrong
2010-10-27 10:29   ` Gleb Natapov
2010-10-27  9:04 ` [PATCH 4/8] KVM: avoid unnecessary wait for a async pf Xiao Guangrong
2010-10-27 10:42   ` Gleb Natapov
2010-10-28  7:06     ` Xiao Guangrong
2010-10-27  9:05 ` [PATCH 5/8] KVM: don't touch vcpu stat after async pf is complete Xiao Guangrong
2010-10-27 10:44   ` Gleb Natapov
2010-10-28  7:35     ` Xiao Guangrong
2010-10-28  7:41       ` Gleb Natapov
2010-10-27  9:07 ` [PATCH 6/8] KVM: simply wakup async pf Xiao Guangrong
2010-10-27 10:50   ` Gleb Natapov
2010-10-28  7:59     ` Xiao Guangrong
2010-10-27  9:09 ` [PATCH 7/8] KVM: make async_pf work queue lockless Xiao Guangrong
2010-10-27 11:41   ` Gleb Natapov
2010-10-28  9:08     ` Xiao Guangrong
2010-10-28  9:14       ` Gleb Natapov
2010-10-27  9:10 ` [PATCH 8/8] KVM: add debugfs file to show the number of async pf Xiao Guangrong
2010-10-27 10:58   ` Gleb Natapov
2010-10-28  9:09     ` Xiao Guangrong
2010-10-27  9:59 ` [PATCH 1/8] KVM: fix tracing kvm_try_async_get_page Gleb Natapov

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.