All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] KVM: halt-polling: poll if emulated lapic timer will fire soon
@ 2016-05-19 13:27 Wanpeng Li
  2016-05-19 13:46 ` Christian Borntraeger
                   ` (2 more replies)
  0 siblings, 3 replies; 19+ messages in thread
From: Wanpeng Li @ 2016-05-19 13:27 UTC (permalink / raw)
  To: linux-kernel, kvm
  Cc: Wanpeng Li, Paolo Bonzini, Radim Krčmář,
	David Matlack, Christian Borntraeger

From: Wanpeng Li <wanpeng.li@hotmail.com>

If an emulated lapic timer will fire soon(in the scope of 10us the
base of dynamic halt-polling, lower-end of message passing workload
latency TCP_RR's poll time < 10us) we can treat it as a short halt,
and poll to wait it fire, the fire callback apic_timer_fn() will set
KVM_REQ_PENDING_TIMER, and this flag will be check during busy poll.
This can avoid context switch overhead and the latency which we wake
up vCPU.

iperf TCP get ~6% bandwidth improvement.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: David Matlack <dmatlack@google.com>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
---
v1 -> v2:
 * add return statement to non-x86 archs
 * capture never expire case for x86 (hrtimer is not started)

 arch/arm/include/asm/kvm_host.h     |  4 ++++
 arch/arm64/include/asm/kvm_host.h   |  4 ++++
 arch/mips/include/asm/kvm_host.h    |  4 ++++
 arch/powerpc/include/asm/kvm_host.h |  4 ++++
 arch/s390/include/asm/kvm_host.h    |  4 ++++
 arch/x86/kvm/lapic.c                | 11 +++++++++++
 arch/x86/kvm/lapic.h                |  1 +
 arch/x86/kvm/x86.c                  |  5 +++++
 include/linux/kvm_host.h            |  1 +
 virt/kvm/kvm_main.c                 | 14 ++++++++++----
 10 files changed, 48 insertions(+), 4 deletions(-)

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 4cd8732..a5fd858 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -284,6 +284,10 @@ static inline void kvm_arch_sync_events(struct kvm *kvm) {}
 static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
 static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
 static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
+static inline u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu)
+{
+	return -1ULL;
+}
 
 static inline void kvm_arm_init_debug(void) {}
 static inline void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) {}
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index d49399d..94e227a 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -359,6 +359,10 @@ static inline void kvm_arch_sync_events(struct kvm *kvm) {}
 static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
 static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
 static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
+static inline u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu)
+{
+	return -1ULL;
+}
 
 void kvm_arm_init_debug(void);
 void kvm_arm_setup_debug(struct kvm_vcpu *vcpu);
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index 9a37a10..456bc42 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -813,6 +813,10 @@ static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
 static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
 static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
 static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
+static inline u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu)
+{
+	return -1ULL;
+}
 static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
 
 #endif /* __MIPS_KVM_HOST_H__ */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index ec35af3..5986c79 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -729,5 +729,9 @@ static inline void kvm_arch_exit(void) {}
 static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
 static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
 static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
+static inline u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu)
+{
+	return -1ULL;
+}
 
 #endif /* __POWERPC_KVM_HOST_H__ */
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 37b9017..bdb01a1 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -696,6 +696,10 @@ static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
 		struct kvm_memory_slot *slot) {}
 static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
 static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
+static inline u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu)
+{
+	return -1ULL;
+}
 
 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu);
 
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index bbb5b28..cfeeac3 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -256,6 +256,17 @@ static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic)
 	return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_TSCDEADLINE;
 }
 
+u64 apic_get_timer_expire(struct kvm_vcpu *vcpu)
+{
+	struct kvm_lapic *apic = vcpu->arch.apic;
+	struct hrtimer *timer = &apic->lapic_timer.timer;
+
+	if (!hrtimer_active(timer))
+		return -1ULL;
+	else
+		return ktime_to_ns(hrtimer_get_remaining(timer));
+}
+
 static inline int apic_lvt_nmi_mode(u32 lvt_val)
 {
 	return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI;
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 891c6da..ee4da6c 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -212,4 +212,5 @@ bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
 			struct kvm_vcpu **dest_vcpu);
 int kvm_vector_to_index(u32 vector, u32 dest_vcpus,
 			const unsigned long *bitmap, u32 bitmap_size);
+u64 apic_get_timer_expire(struct kvm_vcpu *vcpu);
 #endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a8c7ca3..9b5ad99 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7623,6 +7623,11 @@ bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu)
 struct static_key kvm_no_apic_vcpu __read_mostly;
 EXPORT_SYMBOL_GPL(kvm_no_apic_vcpu);
 
+u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu)
+{
+	return apic_get_timer_expire(vcpu);
+}
+
 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 {
 	struct page *page;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index b1fa8f1..14d6c23 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -663,6 +663,7 @@ int kvm_vcpu_yield_to(struct kvm_vcpu *target);
 void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu);
 void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
 void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
+u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu);
 
 void kvm_flush_remote_tlbs(struct kvm *kvm);
 void kvm_reload_remote_mmus(struct kvm *kvm);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index dd4ac9d..e4bb30b 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -78,6 +78,9 @@ module_param(halt_poll_ns_grow, uint, S_IRUGO | S_IWUSR);
 static unsigned int halt_poll_ns_shrink;
 module_param(halt_poll_ns_shrink, uint, S_IRUGO | S_IWUSR);
 
+/* lower-end of message passing workload latency TCP_RR's poll time < 10us */
+static unsigned int halt_poll_ns_base = 10000;
+
 /*
  * Ordering of locks:
  *
@@ -1966,7 +1969,7 @@ static void grow_halt_poll_ns(struct kvm_vcpu *vcpu)
 	grow = READ_ONCE(halt_poll_ns_grow);
 	/* 10us base */
 	if (val == 0 && grow)
-		val = 10000;
+		val = halt_poll_ns_base;
 	else
 		val *= grow;
 
@@ -2014,12 +2017,15 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
 	ktime_t start, cur;
 	DECLARE_SWAITQUEUE(wait);
 	bool waited = false;
-	u64 block_ns;
+	u64 block_ns, delta, remaining;
 
+	remaining = kvm_arch_timer_remaining(vcpu);
 	start = cur = ktime_get();
-	if (vcpu->halt_poll_ns) {
-		ktime_t stop = ktime_add_ns(ktime_get(), vcpu->halt_poll_ns);
+	if (vcpu->halt_poll_ns || remaining < halt_poll_ns_base) {
+		ktime_t stop;
 
+		delta = vcpu->halt_poll_ns ? vcpu->halt_poll_ns : remaining;
+		stop = ktime_add_ns(ktime_get(), delta);
 		++vcpu->stat.halt_attempted_poll;
 		do {
 			/*
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH v2] KVM: halt-polling: poll if emulated lapic timer will fire soon
  2016-05-19 13:27 [PATCH v2] KVM: halt-polling: poll if emulated lapic timer will fire soon Wanpeng Li
@ 2016-05-19 13:46 ` Christian Borntraeger
  2016-05-19 13:57 ` Paolo Bonzini
  2016-05-19 18:01 ` David Matlack
  2 siblings, 0 replies; 19+ messages in thread
From: Christian Borntraeger @ 2016-05-19 13:46 UTC (permalink / raw)
  To: Wanpeng Li, linux-kernel, kvm
  Cc: Wanpeng Li, Paolo Bonzini, Radim Krčmář, David Matlack

On 05/19/2016 03:27 PM, Wanpeng Li wrote:
> From: Wanpeng Li <wanpeng.li@hotmail.com>
> 
> If an emulated lapic timer will fire soon(in the scope of 10us the
> base of dynamic halt-polling, lower-end of message passing workload
> latency TCP_RR's poll time < 10us) we can treat it as a short halt,
> and poll to wait it fire, the fire callback apic_timer_fn() will set
> KVM_REQ_PENDING_TIMER, and this flag will be check during busy poll.
> This can avoid context switch overhead and the latency which we wake
> up vCPU.
> 
> iperf TCP get ~6% bandwidth improvement.
> 
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Cc: Radim Krčmář <rkrcmar@redhat.com>
> Cc: David Matlack <dmatlack@google.com>
> Cc: Christian Borntraeger <borntraeger@de.ibm.com>
> Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>

Looks good to me. I have not checked what happens when I provide an s390
implementation so I cannot say anything about performance. It looks like
it does not hurt on s390 - which is what I would expect.



> ---
> v1 -> v2:
>  * add return statement to non-x86 archs
>  * capture never expire case for x86 (hrtimer is not started)
> 
>  arch/arm/include/asm/kvm_host.h     |  4 ++++
>  arch/arm64/include/asm/kvm_host.h   |  4 ++++
>  arch/mips/include/asm/kvm_host.h    |  4 ++++
>  arch/powerpc/include/asm/kvm_host.h |  4 ++++
>  arch/s390/include/asm/kvm_host.h    |  4 ++++
>  arch/x86/kvm/lapic.c                | 11 +++++++++++
>  arch/x86/kvm/lapic.h                |  1 +
>  arch/x86/kvm/x86.c                  |  5 +++++
>  include/linux/kvm_host.h            |  1 +
>  virt/kvm/kvm_main.c                 | 14 ++++++++++----
>  10 files changed, 48 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
> index 4cd8732..a5fd858 100644
> --- a/arch/arm/include/asm/kvm_host.h
> +++ b/arch/arm/include/asm/kvm_host.h
> @@ -284,6 +284,10 @@ static inline void kvm_arch_sync_events(struct kvm *kvm) {}
>  static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
>  static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
>  static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
> +static inline u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu)
> +{
> +	return -1ULL;
> +}
> 
>  static inline void kvm_arm_init_debug(void) {}
>  static inline void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) {}
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index d49399d..94e227a 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -359,6 +359,10 @@ static inline void kvm_arch_sync_events(struct kvm *kvm) {}
>  static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
>  static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
>  static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
> +static inline u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu)
> +{
> +	return -1ULL;
> +}
> 
>  void kvm_arm_init_debug(void);
>  void kvm_arm_setup_debug(struct kvm_vcpu *vcpu);
> diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
> index 9a37a10..456bc42 100644
> --- a/arch/mips/include/asm/kvm_host.h
> +++ b/arch/mips/include/asm/kvm_host.h
> @@ -813,6 +813,10 @@ static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
>  static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
>  static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
>  static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
> +static inline u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu)
> +{
> +	return -1ULL;
> +}
>  static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
> 
>  #endif /* __MIPS_KVM_HOST_H__ */
> diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
> index ec35af3..5986c79 100644
> --- a/arch/powerpc/include/asm/kvm_host.h
> +++ b/arch/powerpc/include/asm/kvm_host.h
> @@ -729,5 +729,9 @@ static inline void kvm_arch_exit(void) {}
>  static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
>  static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
>  static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
> +static inline u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu)
> +{
> +	return -1ULL;
> +}
> 
>  #endif /* __POWERPC_KVM_HOST_H__ */
> diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
> index 37b9017..bdb01a1 100644
> --- a/arch/s390/include/asm/kvm_host.h
> +++ b/arch/s390/include/asm/kvm_host.h
> @@ -696,6 +696,10 @@ static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
>  		struct kvm_memory_slot *slot) {}
>  static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
>  static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
> +static inline u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu)
> +{
> +	return -1ULL;
> +}
> 
>  void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu);
> 
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index bbb5b28..cfeeac3 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -256,6 +256,17 @@ static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic)
>  	return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_TSCDEADLINE;
>  }
> 
> +u64 apic_get_timer_expire(struct kvm_vcpu *vcpu)
> +{
> +	struct kvm_lapic *apic = vcpu->arch.apic;
> +	struct hrtimer *timer = &apic->lapic_timer.timer;
> +
> +	if (!hrtimer_active(timer))
> +		return -1ULL;
> +	else
> +		return ktime_to_ns(hrtimer_get_remaining(timer));
> +}
> +
>  static inline int apic_lvt_nmi_mode(u32 lvt_val)
>  {
>  	return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI;
> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
> index 891c6da..ee4da6c 100644
> --- a/arch/x86/kvm/lapic.h
> +++ b/arch/x86/kvm/lapic.h
> @@ -212,4 +212,5 @@ bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
>  			struct kvm_vcpu **dest_vcpu);
>  int kvm_vector_to_index(u32 vector, u32 dest_vcpus,
>  			const unsigned long *bitmap, u32 bitmap_size);
> +u64 apic_get_timer_expire(struct kvm_vcpu *vcpu);
>  #endif
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index a8c7ca3..9b5ad99 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -7623,6 +7623,11 @@ bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu)
>  struct static_key kvm_no_apic_vcpu __read_mostly;
>  EXPORT_SYMBOL_GPL(kvm_no_apic_vcpu);
> 
> +u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu)
> +{
> +	return apic_get_timer_expire(vcpu);
> +}
> +
>  int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
>  {
>  	struct page *page;
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index b1fa8f1..14d6c23 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -663,6 +663,7 @@ int kvm_vcpu_yield_to(struct kvm_vcpu *target);
>  void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu);
>  void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
>  void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
> +u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu);
> 
>  void kvm_flush_remote_tlbs(struct kvm *kvm);
>  void kvm_reload_remote_mmus(struct kvm *kvm);
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index dd4ac9d..e4bb30b 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -78,6 +78,9 @@ module_param(halt_poll_ns_grow, uint, S_IRUGO | S_IWUSR);
>  static unsigned int halt_poll_ns_shrink;
>  module_param(halt_poll_ns_shrink, uint, S_IRUGO | S_IWUSR);
> 
> +/* lower-end of message passing workload latency TCP_RR's poll time < 10us */
> +static unsigned int halt_poll_ns_base = 10000;
> +
>  /*
>   * Ordering of locks:
>   *
> @@ -1966,7 +1969,7 @@ static void grow_halt_poll_ns(struct kvm_vcpu *vcpu)
>  	grow = READ_ONCE(halt_poll_ns_grow);
>  	/* 10us base */
>  	if (val == 0 && grow)
> -		val = 10000;
> +		val = halt_poll_ns_base;
>  	else
>  		val *= grow;
> 
> @@ -2014,12 +2017,15 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
>  	ktime_t start, cur;
>  	DECLARE_SWAITQUEUE(wait);
>  	bool waited = false;
> -	u64 block_ns;
> +	u64 block_ns, delta, remaining;
> 
> +	remaining = kvm_arch_timer_remaining(vcpu);
>  	start = cur = ktime_get();
> -	if (vcpu->halt_poll_ns) {
> -		ktime_t stop = ktime_add_ns(ktime_get(), vcpu->halt_poll_ns);
> +	if (vcpu->halt_poll_ns || remaining < halt_poll_ns_base) {
> +		ktime_t stop;
> 
> +		delta = vcpu->halt_poll_ns ? vcpu->halt_poll_ns : remaining;
> +		stop = ktime_add_ns(ktime_get(), delta);
>  		++vcpu->stat.halt_attempted_poll;
>  		do {
>  			/*
> 

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2] KVM: halt-polling: poll if emulated lapic timer will fire soon
  2016-05-19 13:27 [PATCH v2] KVM: halt-polling: poll if emulated lapic timer will fire soon Wanpeng Li
  2016-05-19 13:46 ` Christian Borntraeger
@ 2016-05-19 13:57 ` Paolo Bonzini
  2016-05-19 14:52   ` Christian Borntraeger
  2016-05-24  2:48   ` Wanpeng Li
  2016-05-19 18:01 ` David Matlack
  2 siblings, 2 replies; 19+ messages in thread
From: Paolo Bonzini @ 2016-05-19 13:57 UTC (permalink / raw)
  To: Wanpeng Li, linux-kernel, kvm
  Cc: Wanpeng Li, Radim Krčmář,
	David Matlack, Christian Borntraeger



On 19/05/2016 15:27, Wanpeng Li wrote:
> From: Wanpeng Li <wanpeng.li@hotmail.com>
> 
> If an emulated lapic timer will fire soon(in the scope of 10us the
> base of dynamic halt-polling, lower-end of message passing workload
> latency TCP_RR's poll time < 10us) we can treat it as a short halt,
> and poll to wait it fire, the fire callback apic_timer_fn() will set
> KVM_REQ_PENDING_TIMER, and this flag will be check during busy poll.
> This can avoid context switch overhead and the latency which we wake
> up vCPU.

Would this work too and be simpler?

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 4fd482fb9260..8d42f5304d94 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1964,16 +1964,12 @@ static void grow_halt_poll_ns(struct kvm_vcpu *vcpu)
 
 	old = val = vcpu->halt_poll_ns;
 	grow = READ_ONCE(halt_poll_ns_grow);
-	/* 10us base */
-	if (val == 0 && grow)
-		val = 10000;
-	else
-		val *= grow;
+	val *= grow;
 
 	if (val > halt_poll_ns)
 		val = halt_poll_ns;
 
-	vcpu->halt_poll_ns = val;
+	vcpu->halt_poll_ns = max(10000u, val);
 	trace_kvm_halt_poll_ns_grow(vcpu->vcpu_id, val, old);
 }
 
@@ -1988,7 +1984,7 @@ static void shrink_halt_poll_ns(struct kvm_vcpu *vcpu)
 	else
 		val /= shrink;
 
-	vcpu->halt_poll_ns = val;
+	vcpu->halt_poll_ns = max(10000u, val);
 	trace_kvm_halt_poll_ns_shrink(vcpu->vcpu_id, val, old);
 }
 

(Plus moving 10000 into a module parameter?)  Can you measure higher CPU
utilization than with your patch?  David, what do you think?

Thanks,

Paolo

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH v2] KVM: halt-polling: poll if emulated lapic timer will fire soon
  2016-05-19 13:57 ` Paolo Bonzini
@ 2016-05-19 14:52   ` Christian Borntraeger
  2016-05-19 14:56     ` Paolo Bonzini
  2016-05-24  2:48   ` Wanpeng Li
  1 sibling, 1 reply; 19+ messages in thread
From: Christian Borntraeger @ 2016-05-19 14:52 UTC (permalink / raw)
  To: Paolo Bonzini, Wanpeng Li, linux-kernel, kvm
  Cc: Wanpeng Li, Radim Krčmář, David Matlack

On 05/19/2016 03:57 PM, Paolo Bonzini wrote:
> 
> 
> On 19/05/2016 15:27, Wanpeng Li wrote:
>> From: Wanpeng Li <wanpeng.li@hotmail.com>
>>
>> If an emulated lapic timer will fire soon(in the scope of 10us the
>> base of dynamic halt-polling, lower-end of message passing workload
>> latency TCP_RR's poll time < 10us) we can treat it as a short halt,
>> and poll to wait it fire, the fire callback apic_timer_fn() will set
>> KVM_REQ_PENDING_TIMER, and this flag will be check during busy poll.
>> This can avoid context switch overhead and the latency which we wake
>> up vCPU.
> 
> Would this work too and be simpler?

Hmm, your patch does only fiddle with the grow/shrink logic (which might
be a good idea independently of this change), but the original patch 
actually takes into account that we have a guaranteed maximum time by a 
wakeup timer - IOW we know exactly what the maximum poll time is.

> 
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 4fd482fb9260..8d42f5304d94 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -1964,16 +1964,12 @@ static void grow_halt_poll_ns(struct kvm_vcpu *vcpu)
> 
>  	old = val = vcpu->halt_poll_ns;
>  	grow = READ_ONCE(halt_poll_ns_grow);
> -	/* 10us base */
> -	if (val == 0 && grow)
> -		val = 10000;
> -	else
> -		val *= grow;
> +	val *= grow;
> 
>  	if (val > halt_poll_ns)
>  		val = halt_poll_ns;
> 
> -	vcpu->halt_poll_ns = val;
> +	vcpu->halt_poll_ns = max(10000u, val);

>  	trace_kvm_halt_poll_ns_grow(vcpu->vcpu_id, val, old);
>  }
> 
> @@ -1988,7 +1984,7 @@ static void shrink_halt_poll_ns(struct kvm_vcpu *vcpu)
>  	else
>  		val /= shrink;
> 
> -	vcpu->halt_poll_ns = val;
> +	vcpu->halt_poll_ns = max(10000u, val);

That would prevent halt_poll_ns from going 0, no?


>  	trace_kvm_halt_poll_ns_shrink(vcpu->vcpu_id, val, old);
>  }
> 
> 
> (Plus moving 10000 into a module parameter?)  Can you measure higher CPU
> utilization than with your patch?  David, what do you think?
> 
> Thanks,
> 
> Paolo
> 

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2] KVM: halt-polling: poll if emulated lapic timer will fire soon
  2016-05-19 14:52   ` Christian Borntraeger
@ 2016-05-19 14:56     ` Paolo Bonzini
  2016-05-19 15:03       ` Christian Borntraeger
  0 siblings, 1 reply; 19+ messages in thread
From: Paolo Bonzini @ 2016-05-19 14:56 UTC (permalink / raw)
  To: Christian Borntraeger, Wanpeng Li, linux-kernel, kvm
  Cc: Wanpeng Li, Radim Krčmář, David Matlack



On 19/05/2016 16:52, Christian Borntraeger wrote:
>> > Would this work too and be simpler?
> Hmm, your patch does only fiddle with the grow/shrink logic (which might
> be a good idea independently of this change), but the original patch 
> actually takes into account that we have a guaranteed maximum time by a 
> wakeup timer - IOW we know exactly what the maximum poll time is.
> 

Yes, it's different.  The question is whether a 10us poll (40,000 clock
cycles) has an impact even if it's sometimes wrong.

Thanks,

Paolo

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2] KVM: halt-polling: poll if emulated lapic timer will fire soon
  2016-05-19 14:56     ` Paolo Bonzini
@ 2016-05-19 15:03       ` Christian Borntraeger
  2016-05-19 15:06         ` Paolo Bonzini
  0 siblings, 1 reply; 19+ messages in thread
From: Christian Borntraeger @ 2016-05-19 15:03 UTC (permalink / raw)
  To: Paolo Bonzini, Wanpeng Li, linux-kernel, kvm
  Cc: Wanpeng Li, Radim Krčmář, David Matlack

On 05/19/2016 04:56 PM, Paolo Bonzini wrote:
> 
> 
> On 19/05/2016 16:52, Christian Borntraeger wrote:
>>>> Would this work too and be simpler?
>> Hmm, your patch does only fiddle with the grow/shrink logic (which might
>> be a good idea independently of this change), but the original patch 
>> actually takes into account that we have a guaranteed maximum time by a 
>> wakeup timer - IOW we know exactly what the maximum poll time is.
>>
> 
> Yes, it's different.  The question is whether a 10us poll (40,000 clock
> cycles) has an impact even if it's sometimes wrong.

Valid question. As I said, this change might be something good independent from
the original patch. (it might make it unnecessary, though) On the other hand
I can handle ~30 guest entry/exit cycles of a simple exit like diag9c.
Needs measurement. 

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2] KVM: halt-polling: poll if emulated lapic timer will fire soon
  2016-05-19 15:03       ` Christian Borntraeger
@ 2016-05-19 15:06         ` Paolo Bonzini
  2016-05-19 15:42           ` Christian Borntraeger
  0 siblings, 1 reply; 19+ messages in thread
From: Paolo Bonzini @ 2016-05-19 15:06 UTC (permalink / raw)
  To: Christian Borntraeger, Wanpeng Li, linux-kernel, kvm
  Cc: Wanpeng Li, Radim Krčmář, David Matlack



On 19/05/2016 17:03, Christian Borntraeger wrote:
> > > Would this work too and be simpler?
> > > Hmm, your patch does only fiddle with the grow/shrink logic (which might
> > > be a good idea independently of this change), but the original patch 
> > > actually takes into account that we have a guaranteed maximum time by a 
> > > wakeup timer - IOW we know exactly what the maximum poll time is.
> > 
> > Yes, it's different.  The question is whether a 10us poll (40,000 clock
> > cycles) has an impact even if it's sometimes wrong.
> 
> Valid question. As I said, this change might be something good independent from
> the original patch. (it might make it unnecessary, though) On the other hand
> I can handle ~30 guest entry/exit cycles of a simple exit like diag9c.
> Needs measurement. 

Actually I'm okay with the original patch, and especially on s390 where
the maximum poll time is small it may make a bigger difference.  Though
I suppose the timer interrupt is not floating?

Since it's not 4.7 material, I'll wait for your experiments and David's
remarks.

Paolo

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2] KVM: halt-polling: poll if emulated lapic timer will fire soon
  2016-05-19 15:06         ` Paolo Bonzini
@ 2016-05-19 15:42           ` Christian Borntraeger
  0 siblings, 0 replies; 19+ messages in thread
From: Christian Borntraeger @ 2016-05-19 15:42 UTC (permalink / raw)
  To: Paolo Bonzini, Wanpeng Li, linux-kernel, kvm
  Cc: Wanpeng Li, Radim Krčmář, David Matlack

On 05/19/2016 05:06 PM, Paolo Bonzini wrote:
> 
> 
> On 19/05/2016 17:03, Christian Borntraeger wrote:
>>>> Would this work too and be simpler?
>>>> Hmm, your patch does only fiddle with the grow/shrink logic (which might
>>>> be a good idea independently of this change), but the original patch 
>>>> actually takes into account that we have a guaranteed maximum time by a 
>>>> wakeup timer - IOW we know exactly what the maximum poll time is.
>>>
>>> Yes, it's different.  The question is whether a 10us poll (40,000 clock
>>> cycles) has an impact even if it's sometimes wrong.
>>
>> Valid question. As I said, this change might be something good independent from
>> the original patch. (it might make it unnecessary, though) On the other hand
>> I can handle ~30 guest entry/exit cycles of a simple exit like diag9c.
>> Needs measurement. 
> 
> Actually I'm okay with the original patch, and especially on s390 where
> the maximum poll time is small it may make a bigger difference.  Though
> I suppose the timer interrupt is not floating?

Right its cpu local. So a timer wakeup would be considered valid (if the timer
kicks in before the poll ends - the poll does also check if the timer expires and
maybe the hrtimer is a bit late.
> 
> Since it's not 4.7 material, I'll wait for your experiments and David's
> remarks.

I will try to get both patches scheduled but it might take a while.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2] KVM: halt-polling: poll if emulated lapic timer will fire soon
  2016-05-19 13:27 [PATCH v2] KVM: halt-polling: poll if emulated lapic timer will fire soon Wanpeng Li
  2016-05-19 13:46 ` Christian Borntraeger
  2016-05-19 13:57 ` Paolo Bonzini
@ 2016-05-19 18:01 ` David Matlack
  2016-05-19 18:36   ` David Matlack
  2 siblings, 1 reply; 19+ messages in thread
From: David Matlack @ 2016-05-19 18:01 UTC (permalink / raw)
  To: Wanpeng Li
  Cc: linux-kernel, kvm list, Wanpeng Li, Paolo Bonzini,
	Radim Krčmář,
	Christian Borntraeger

On Thu, May 19, 2016 at 6:27 AM, Wanpeng Li <kernellwp@gmail.com> wrote:
> From: Wanpeng Li <wanpeng.li@hotmail.com>
>
> If an emulated lapic timer will fire soon(in the scope of 10us the
> base of dynamic halt-polling, lower-end of message passing workload
> latency TCP_RR's poll time < 10us) we can treat it as a short halt,
> and poll to wait it fire, the fire callback apic_timer_fn() will set
> KVM_REQ_PENDING_TIMER, and this flag will be check during busy poll.
> This can avoid context switch overhead and the latency which we wake
> up vCPU.

If I understand correctly, your patch aims to reduce the latency of
(APIC Timer expires) -> (Guest resumes execution) using halt-polling.
Let me know if I'm misunderstanding.

In general, I don't think it makes sense to poll for timer interrupts.
We know when the timer interrupt is going to arrive. If we care about
the latency of delivering that interrupt to the guest, we should
program the hrtimer to wake us up slightly early, and then deliver the
virtual timer interrupt right on time (I think KVM's TSC Deadline
Timer emulation already does this). I'm curious to know if this scheme
would give the same performance improvement to iperf as your patch.

We discussed this a bit before on the mailing list before
(https://lkml.org/lkml/2016/3/29/680). I'd like to see halt-polling
and timer interrupts go in the opposite direction: if the next timer
event (from any timer) is less than vcpu->halt_poll_ns, don't poll at
all.

>
> iperf TCP get ~6% bandwidth improvement.

Can you explain why your patch results in this bandwidth improvement?

>
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Cc: Radim Krčmář <rkrcmar@redhat.com>
> Cc: David Matlack <dmatlack@google.com>
> Cc: Christian Borntraeger <borntraeger@de.ibm.com>
> Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
> ---
> v1 -> v2:
>  * add return statement to non-x86 archs
>  * capture never expire case for x86 (hrtimer is not started)
>
>  arch/arm/include/asm/kvm_host.h     |  4 ++++
>  arch/arm64/include/asm/kvm_host.h   |  4 ++++
>  arch/mips/include/asm/kvm_host.h    |  4 ++++
>  arch/powerpc/include/asm/kvm_host.h |  4 ++++
>  arch/s390/include/asm/kvm_host.h    |  4 ++++
>  arch/x86/kvm/lapic.c                | 11 +++++++++++
>  arch/x86/kvm/lapic.h                |  1 +
>  arch/x86/kvm/x86.c                  |  5 +++++
>  include/linux/kvm_host.h            |  1 +
>  virt/kvm/kvm_main.c                 | 14 ++++++++++----
>  10 files changed, 48 insertions(+), 4 deletions(-)
>
> diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
> index 4cd8732..a5fd858 100644
> --- a/arch/arm/include/asm/kvm_host.h
> +++ b/arch/arm/include/asm/kvm_host.h
> @@ -284,6 +284,10 @@ static inline void kvm_arch_sync_events(struct kvm *kvm) {}
>  static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
>  static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
>  static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
> +static inline u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu)
> +{
> +       return -1ULL;
> +}
>
>  static inline void kvm_arm_init_debug(void) {}
>  static inline void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) {}
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index d49399d..94e227a 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -359,6 +359,10 @@ static inline void kvm_arch_sync_events(struct kvm *kvm) {}
>  static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
>  static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
>  static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
> +static inline u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu)
> +{
> +       return -1ULL;
> +}
>
>  void kvm_arm_init_debug(void);
>  void kvm_arm_setup_debug(struct kvm_vcpu *vcpu);
> diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
> index 9a37a10..456bc42 100644
> --- a/arch/mips/include/asm/kvm_host.h
> +++ b/arch/mips/include/asm/kvm_host.h
> @@ -813,6 +813,10 @@ static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
>  static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
>  static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
>  static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
> +static inline u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu)
> +{
> +       return -1ULL;
> +}
>  static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
>
>  #endif /* __MIPS_KVM_HOST_H__ */
> diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
> index ec35af3..5986c79 100644
> --- a/arch/powerpc/include/asm/kvm_host.h
> +++ b/arch/powerpc/include/asm/kvm_host.h
> @@ -729,5 +729,9 @@ static inline void kvm_arch_exit(void) {}
>  static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
>  static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
>  static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
> +static inline u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu)
> +{
> +       return -1ULL;
> +}
>
>  #endif /* __POWERPC_KVM_HOST_H__ */
> diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
> index 37b9017..bdb01a1 100644
> --- a/arch/s390/include/asm/kvm_host.h
> +++ b/arch/s390/include/asm/kvm_host.h
> @@ -696,6 +696,10 @@ static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
>                 struct kvm_memory_slot *slot) {}
>  static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
>  static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
> +static inline u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu)
> +{
> +       return -1ULL;
> +}
>
>  void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu);
>
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index bbb5b28..cfeeac3 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -256,6 +256,17 @@ static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic)
>         return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_TSCDEADLINE;
>  }
>
> +u64 apic_get_timer_expire(struct kvm_vcpu *vcpu)
> +{
> +       struct kvm_lapic *apic = vcpu->arch.apic;
> +       struct hrtimer *timer = &apic->lapic_timer.timer;
> +
> +       if (!hrtimer_active(timer))
> +               return -1ULL;
> +       else
> +               return ktime_to_ns(hrtimer_get_remaining(timer));
> +}
> +
>  static inline int apic_lvt_nmi_mode(u32 lvt_val)
>  {
>         return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI;
> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
> index 891c6da..ee4da6c 100644
> --- a/arch/x86/kvm/lapic.h
> +++ b/arch/x86/kvm/lapic.h
> @@ -212,4 +212,5 @@ bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
>                         struct kvm_vcpu **dest_vcpu);
>  int kvm_vector_to_index(u32 vector, u32 dest_vcpus,
>                         const unsigned long *bitmap, u32 bitmap_size);
> +u64 apic_get_timer_expire(struct kvm_vcpu *vcpu);
>  #endif
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index a8c7ca3..9b5ad99 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -7623,6 +7623,11 @@ bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu)
>  struct static_key kvm_no_apic_vcpu __read_mostly;
>  EXPORT_SYMBOL_GPL(kvm_no_apic_vcpu);
>
> +u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu)
> +{
> +       return apic_get_timer_expire(vcpu);
> +}
> +
>  int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
>  {
>         struct page *page;
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index b1fa8f1..14d6c23 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -663,6 +663,7 @@ int kvm_vcpu_yield_to(struct kvm_vcpu *target);
>  void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu);
>  void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
>  void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
> +u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu);
>
>  void kvm_flush_remote_tlbs(struct kvm *kvm);
>  void kvm_reload_remote_mmus(struct kvm *kvm);
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index dd4ac9d..e4bb30b 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -78,6 +78,9 @@ module_param(halt_poll_ns_grow, uint, S_IRUGO | S_IWUSR);
>  static unsigned int halt_poll_ns_shrink;
>  module_param(halt_poll_ns_shrink, uint, S_IRUGO | S_IWUSR);
>
> +/* lower-end of message passing workload latency TCP_RR's poll time < 10us */
> +static unsigned int halt_poll_ns_base = 10000;
> +
>  /*
>   * Ordering of locks:
>   *
> @@ -1966,7 +1969,7 @@ static void grow_halt_poll_ns(struct kvm_vcpu *vcpu)
>         grow = READ_ONCE(halt_poll_ns_grow);
>         /* 10us base */
>         if (val == 0 && grow)
> -               val = 10000;
> +               val = halt_poll_ns_base;
>         else
>                 val *= grow;
>
> @@ -2014,12 +2017,15 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
>         ktime_t start, cur;
>         DECLARE_SWAITQUEUE(wait);
>         bool waited = false;
> -       u64 block_ns;
> +       u64 block_ns, delta, remaining;
>
> +       remaining = kvm_arch_timer_remaining(vcpu);
>         start = cur = ktime_get();
> -       if (vcpu->halt_poll_ns) {
> -               ktime_t stop = ktime_add_ns(ktime_get(), vcpu->halt_poll_ns);
> +       if (vcpu->halt_poll_ns || remaining < halt_poll_ns_base) {
> +               ktime_t stop;
>
> +               delta = vcpu->halt_poll_ns ? vcpu->halt_poll_ns : remaining;
> +               stop = ktime_add_ns(ktime_get(), delta);
>                 ++vcpu->stat.halt_attempted_poll;
>                 do {
>                         /*
> --
> 1.9.1
>

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2] KVM: halt-polling: poll if emulated lapic timer will fire soon
  2016-05-19 18:01 ` David Matlack
@ 2016-05-19 18:36   ` David Matlack
  2016-05-20  2:04     ` Yang Zhang
  0 siblings, 1 reply; 19+ messages in thread
From: David Matlack @ 2016-05-19 18:36 UTC (permalink / raw)
  To: Wanpeng Li
  Cc: linux-kernel, kvm list, Wanpeng Li, Paolo Bonzini,
	Radim Krčmář,
	Christian Borntraeger

On Thu, May 19, 2016 at 11:01 AM, David Matlack <dmatlack@google.com> wrote:
> On Thu, May 19, 2016 at 6:27 AM, Wanpeng Li <kernellwp@gmail.com> wrote:
>> From: Wanpeng Li <wanpeng.li@hotmail.com>
>>
>> If an emulated lapic timer will fire soon(in the scope of 10us the
>> base of dynamic halt-polling, lower-end of message passing workload
>> latency TCP_RR's poll time < 10us) we can treat it as a short halt,
>> and poll to wait it fire, the fire callback apic_timer_fn() will set
>> KVM_REQ_PENDING_TIMER, and this flag will be check during busy poll.
>> This can avoid context switch overhead and the latency which we wake
>> up vCPU.
>
> If I understand correctly, your patch aims to reduce the latency of
> (APIC Timer expires) -> (Guest resumes execution) using halt-polling.
> Let me know if I'm misunderstanding.
>
> In general, I don't think it makes sense to poll for timer interrupts.
> We know when the timer interrupt is going to arrive. If we care about
> the latency of delivering that interrupt to the guest, we should
> program the hrtimer to wake us up slightly early, and then deliver the
> virtual timer interrupt right on time (I think KVM's TSC Deadline
> Timer emulation already does this).

(It looks like the way to enable this feature is to set the module
parameter lapic_timer_advance_ns and make sure your guest is using the
TSC Deadline timer instead of the APIC Timer.)

> I'm curious to know if this scheme
> would give the same performance improvement to iperf as your patch.
>
> We discussed this a bit before on the mailing list before
> (https://lkml.org/lkml/2016/3/29/680). I'd like to see halt-polling
> and timer interrupts go in the opposite direction: if the next timer
> event (from any timer) is less than vcpu->halt_poll_ns, don't poll at
> all.
>
>>
>> iperf TCP get ~6% bandwidth improvement.
>
> Can you explain why your patch results in this bandwidth improvement?
>
>>
>> Cc: Paolo Bonzini <pbonzini@redhat.com>
>> Cc: Radim Krčmář <rkrcmar@redhat.com>
>> Cc: David Matlack <dmatlack@google.com>
>> Cc: Christian Borntraeger <borntraeger@de.ibm.com>
>> Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
>> ---
>> v1 -> v2:
>>  * add return statement to non-x86 archs
>>  * capture never expire case for x86 (hrtimer is not started)
>>
>>  arch/arm/include/asm/kvm_host.h     |  4 ++++
>>  arch/arm64/include/asm/kvm_host.h   |  4 ++++
>>  arch/mips/include/asm/kvm_host.h    |  4 ++++
>>  arch/powerpc/include/asm/kvm_host.h |  4 ++++
>>  arch/s390/include/asm/kvm_host.h    |  4 ++++
>>  arch/x86/kvm/lapic.c                | 11 +++++++++++
>>  arch/x86/kvm/lapic.h                |  1 +
>>  arch/x86/kvm/x86.c                  |  5 +++++
>>  include/linux/kvm_host.h            |  1 +
>>  virt/kvm/kvm_main.c                 | 14 ++++++++++----
>>  10 files changed, 48 insertions(+), 4 deletions(-)
>>
>> diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
>> index 4cd8732..a5fd858 100644
>> --- a/arch/arm/include/asm/kvm_host.h
>> +++ b/arch/arm/include/asm/kvm_host.h
>> @@ -284,6 +284,10 @@ static inline void kvm_arch_sync_events(struct kvm *kvm) {}
>>  static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
>>  static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
>>  static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
>> +static inline u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu)
>> +{
>> +       return -1ULL;
>> +}
>>
>>  static inline void kvm_arm_init_debug(void) {}
>>  static inline void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) {}
>> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
>> index d49399d..94e227a 100644
>> --- a/arch/arm64/include/asm/kvm_host.h
>> +++ b/arch/arm64/include/asm/kvm_host.h
>> @@ -359,6 +359,10 @@ static inline void kvm_arch_sync_events(struct kvm *kvm) {}
>>  static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
>>  static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
>>  static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
>> +static inline u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu)
>> +{
>> +       return -1ULL;
>> +}
>>
>>  void kvm_arm_init_debug(void);
>>  void kvm_arm_setup_debug(struct kvm_vcpu *vcpu);
>> diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
>> index 9a37a10..456bc42 100644
>> --- a/arch/mips/include/asm/kvm_host.h
>> +++ b/arch/mips/include/asm/kvm_host.h
>> @@ -813,6 +813,10 @@ static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
>>  static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
>>  static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
>>  static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
>> +static inline u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu)
>> +{
>> +       return -1ULL;
>> +}
>>  static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
>>
>>  #endif /* __MIPS_KVM_HOST_H__ */
>> diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
>> index ec35af3..5986c79 100644
>> --- a/arch/powerpc/include/asm/kvm_host.h
>> +++ b/arch/powerpc/include/asm/kvm_host.h
>> @@ -729,5 +729,9 @@ static inline void kvm_arch_exit(void) {}
>>  static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
>>  static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
>>  static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
>> +static inline u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu)
>> +{
>> +       return -1ULL;
>> +}
>>
>>  #endif /* __POWERPC_KVM_HOST_H__ */
>> diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
>> index 37b9017..bdb01a1 100644
>> --- a/arch/s390/include/asm/kvm_host.h
>> +++ b/arch/s390/include/asm/kvm_host.h
>> @@ -696,6 +696,10 @@ static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
>>                 struct kvm_memory_slot *slot) {}
>>  static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
>>  static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
>> +static inline u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu)
>> +{
>> +       return -1ULL;
>> +}
>>
>>  void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu);
>>
>> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
>> index bbb5b28..cfeeac3 100644
>> --- a/arch/x86/kvm/lapic.c
>> +++ b/arch/x86/kvm/lapic.c
>> @@ -256,6 +256,17 @@ static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic)
>>         return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_TSCDEADLINE;
>>  }
>>
>> +u64 apic_get_timer_expire(struct kvm_vcpu *vcpu)
>> +{
>> +       struct kvm_lapic *apic = vcpu->arch.apic;
>> +       struct hrtimer *timer = &apic->lapic_timer.timer;
>> +
>> +       if (!hrtimer_active(timer))
>> +               return -1ULL;
>> +       else
>> +               return ktime_to_ns(hrtimer_get_remaining(timer));
>> +}
>> +
>>  static inline int apic_lvt_nmi_mode(u32 lvt_val)
>>  {
>>         return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI;
>> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
>> index 891c6da..ee4da6c 100644
>> --- a/arch/x86/kvm/lapic.h
>> +++ b/arch/x86/kvm/lapic.h
>> @@ -212,4 +212,5 @@ bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
>>                         struct kvm_vcpu **dest_vcpu);
>>  int kvm_vector_to_index(u32 vector, u32 dest_vcpus,
>>                         const unsigned long *bitmap, u32 bitmap_size);
>> +u64 apic_get_timer_expire(struct kvm_vcpu *vcpu);
>>  #endif
>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>> index a8c7ca3..9b5ad99 100644
>> --- a/arch/x86/kvm/x86.c
>> +++ b/arch/x86/kvm/x86.c
>> @@ -7623,6 +7623,11 @@ bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu)
>>  struct static_key kvm_no_apic_vcpu __read_mostly;
>>  EXPORT_SYMBOL_GPL(kvm_no_apic_vcpu);
>>
>> +u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu)
>> +{
>> +       return apic_get_timer_expire(vcpu);
>> +}
>> +
>>  int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
>>  {
>>         struct page *page;
>> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
>> index b1fa8f1..14d6c23 100644
>> --- a/include/linux/kvm_host.h
>> +++ b/include/linux/kvm_host.h
>> @@ -663,6 +663,7 @@ int kvm_vcpu_yield_to(struct kvm_vcpu *target);
>>  void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu);
>>  void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
>>  void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
>> +u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu);
>>
>>  void kvm_flush_remote_tlbs(struct kvm *kvm);
>>  void kvm_reload_remote_mmus(struct kvm *kvm);
>> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
>> index dd4ac9d..e4bb30b 100644
>> --- a/virt/kvm/kvm_main.c
>> +++ b/virt/kvm/kvm_main.c
>> @@ -78,6 +78,9 @@ module_param(halt_poll_ns_grow, uint, S_IRUGO | S_IWUSR);
>>  static unsigned int halt_poll_ns_shrink;
>>  module_param(halt_poll_ns_shrink, uint, S_IRUGO | S_IWUSR);
>>
>> +/* lower-end of message passing workload latency TCP_RR's poll time < 10us */
>> +static unsigned int halt_poll_ns_base = 10000;
>> +
>>  /*
>>   * Ordering of locks:
>>   *
>> @@ -1966,7 +1969,7 @@ static void grow_halt_poll_ns(struct kvm_vcpu *vcpu)
>>         grow = READ_ONCE(halt_poll_ns_grow);
>>         /* 10us base */
>>         if (val == 0 && grow)
>> -               val = 10000;
>> +               val = halt_poll_ns_base;
>>         else
>>                 val *= grow;
>>
>> @@ -2014,12 +2017,15 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
>>         ktime_t start, cur;
>>         DECLARE_SWAITQUEUE(wait);
>>         bool waited = false;
>> -       u64 block_ns;
>> +       u64 block_ns, delta, remaining;
>>
>> +       remaining = kvm_arch_timer_remaining(vcpu);
>>         start = cur = ktime_get();
>> -       if (vcpu->halt_poll_ns) {
>> -               ktime_t stop = ktime_add_ns(ktime_get(), vcpu->halt_poll_ns);
>> +       if (vcpu->halt_poll_ns || remaining < halt_poll_ns_base) {
>> +               ktime_t stop;
>>
>> +               delta = vcpu->halt_poll_ns ? vcpu->halt_poll_ns : remaining;
>> +               stop = ktime_add_ns(ktime_get(), delta);
>>                 ++vcpu->stat.halt_attempted_poll;
>>                 do {
>>                         /*
>> --
>> 1.9.1
>>

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2] KVM: halt-polling: poll if emulated lapic timer will fire soon
  2016-05-19 18:36   ` David Matlack
@ 2016-05-20  2:04     ` Yang Zhang
  2016-05-20  5:53       ` Wanpeng Li
  2016-05-20 18:37       ` David Matlack
  0 siblings, 2 replies; 19+ messages in thread
From: Yang Zhang @ 2016-05-20  2:04 UTC (permalink / raw)
  To: David Matlack, Wanpeng Li
  Cc: linux-kernel, kvm list, Wanpeng Li, Paolo Bonzini,
	Radim Krčmář,
	Christian Borntraeger

On 2016/5/20 2:36, David Matlack wrote:
> On Thu, May 19, 2016 at 11:01 AM, David Matlack <dmatlack@google.com> wrote:
>> On Thu, May 19, 2016 at 6:27 AM, Wanpeng Li <kernellwp@gmail.com> wrote:
>>> From: Wanpeng Li <wanpeng.li@hotmail.com>
>>>
>>> If an emulated lapic timer will fire soon(in the scope of 10us the
>>> base of dynamic halt-polling, lower-end of message passing workload
>>> latency TCP_RR's poll time < 10us) we can treat it as a short halt,
>>> and poll to wait it fire, the fire callback apic_timer_fn() will set
>>> KVM_REQ_PENDING_TIMER, and this flag will be check during busy poll.
>>> This can avoid context switch overhead and the latency which we wake
>>> up vCPU.
>>
>> If I understand correctly, your patch aims to reduce the latency of
>> (APIC Timer expires) -> (Guest resumes execution) using halt-polling.
>> Let me know if I'm misunderstanding.
>>
>> In general, I don't think it makes sense to poll for timer interrupts.
>> We know when the timer interrupt is going to arrive. If we care about
>> the latency of delivering that interrupt to the guest, we should
>> program the hrtimer to wake us up slightly early, and then deliver the
>> virtual timer interrupt right on time (I think KVM's TSC Deadline
>> Timer emulation already does this).
>
> (It looks like the way to enable this feature is to set the module
> parameter lapic_timer_advance_ns and make sure your guest is using the
> TSC Deadline timer instead of the APIC Timer.)

This feature is slightly different from current advance expiration way. 
Advance expiration rely on the VCPU is running(do polling before 
vmentry). But in some cases, the timer interrupt may be blocked by other 
thread(i.e., IF bit is clear) and VCPU cannot be scheduled to run 
immediately. So even advance the timer early, VCPU may still see the 
latency. But polling is different, it ensures the VCPU to aware the 
timer expiration before schedule out.

>
>> I'm curious to know if this scheme
>> would give the same performance improvement to iperf as your patch.
>>
>> We discussed this a bit before on the mailing list before
>> (https://lkml.org/lkml/2016/3/29/680). I'd like to see halt-polling
>> and timer interrupts go in the opposite direction: if the next timer
>> event (from any timer) is less than vcpu->halt_poll_ns, don't poll at
>> all.
>>
>>>
>>> iperf TCP get ~6% bandwidth improvement.
>>
>> Can you explain why your patch results in this bandwidth improvement?

It should be reasonable. I have seen the same improvement with ctx 
switch benchmark: The latency is reduce from ~2600ns to ~2300ns with the 
similar mechanism.(The same idea but different implementation)

>>
>>>
>>> Cc: Paolo Bonzini <pbonzini@redhat.com>
>>> Cc: Radim Krčmář <rkrcmar@redhat.com>
>>> Cc: David Matlack <dmatlack@google.com>
>>> Cc: Christian Borntraeger <borntraeger@de.ibm.com>
>>> Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
>>> ---
>>> v1 -> v2:
>>>  * add return statement to non-x86 archs
>>>  * capture never expire case for x86 (hrtimer is not started)
>>>
>>>  arch/arm/include/asm/kvm_host.h     |  4 ++++
>>>  arch/arm64/include/asm/kvm_host.h   |  4 ++++
>>>  arch/mips/include/asm/kvm_host.h    |  4 ++++
>>>  arch/powerpc/include/asm/kvm_host.h |  4 ++++
>>>  arch/s390/include/asm/kvm_host.h    |  4 ++++
>>>  arch/x86/kvm/lapic.c                | 11 +++++++++++
>>>  arch/x86/kvm/lapic.h                |  1 +
>>>  arch/x86/kvm/x86.c                  |  5 +++++
>>>  include/linux/kvm_host.h            |  1 +
>>>  virt/kvm/kvm_main.c                 | 14 ++++++++++----
>>>  10 files changed, 48 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
>>> index 4cd8732..a5fd858 100644
>>> --- a/arch/arm/include/asm/kvm_host.h
>>> +++ b/arch/arm/include/asm/kvm_host.h
>>> @@ -284,6 +284,10 @@ static inline void kvm_arch_sync_events(struct kvm *kvm) {}
>>>  static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
>>>  static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
>>>  static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
>>> +static inline u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu)
>>> +{
>>> +       return -1ULL;
>>> +}
>>>
>>>  static inline void kvm_arm_init_debug(void) {}
>>>  static inline void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) {}
>>> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
>>> index d49399d..94e227a 100644
>>> --- a/arch/arm64/include/asm/kvm_host.h
>>> +++ b/arch/arm64/include/asm/kvm_host.h
>>> @@ -359,6 +359,10 @@ static inline void kvm_arch_sync_events(struct kvm *kvm) {}
>>>  static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
>>>  static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
>>>  static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
>>> +static inline u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu)
>>> +{
>>> +       return -1ULL;
>>> +}
>>>
>>>  void kvm_arm_init_debug(void);
>>>  void kvm_arm_setup_debug(struct kvm_vcpu *vcpu);
>>> diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
>>> index 9a37a10..456bc42 100644
>>> --- a/arch/mips/include/asm/kvm_host.h
>>> +++ b/arch/mips/include/asm/kvm_host.h
>>> @@ -813,6 +813,10 @@ static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
>>>  static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
>>>  static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
>>>  static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
>>> +static inline u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu)
>>> +{
>>> +       return -1ULL;
>>> +}
>>>  static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
>>>
>>>  #endif /* __MIPS_KVM_HOST_H__ */
>>> diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
>>> index ec35af3..5986c79 100644
>>> --- a/arch/powerpc/include/asm/kvm_host.h
>>> +++ b/arch/powerpc/include/asm/kvm_host.h
>>> @@ -729,5 +729,9 @@ static inline void kvm_arch_exit(void) {}
>>>  static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
>>>  static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
>>>  static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
>>> +static inline u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu)
>>> +{
>>> +       return -1ULL;
>>> +}
>>>
>>>  #endif /* __POWERPC_KVM_HOST_H__ */
>>> diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
>>> index 37b9017..bdb01a1 100644
>>> --- a/arch/s390/include/asm/kvm_host.h
>>> +++ b/arch/s390/include/asm/kvm_host.h
>>> @@ -696,6 +696,10 @@ static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
>>>                 struct kvm_memory_slot *slot) {}
>>>  static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
>>>  static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
>>> +static inline u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu)
>>> +{
>>> +       return -1ULL;
>>> +}
>>>
>>>  void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu);
>>>
>>> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
>>> index bbb5b28..cfeeac3 100644
>>> --- a/arch/x86/kvm/lapic.c
>>> +++ b/arch/x86/kvm/lapic.c
>>> @@ -256,6 +256,17 @@ static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic)
>>>         return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_TSCDEADLINE;
>>>  }
>>>
>>> +u64 apic_get_timer_expire(struct kvm_vcpu *vcpu)
>>> +{
>>> +       struct kvm_lapic *apic = vcpu->arch.apic;
>>> +       struct hrtimer *timer = &apic->lapic_timer.timer;
>>> +
>>> +       if (!hrtimer_active(timer))
>>> +               return -1ULL;
>>> +       else
>>> +               return ktime_to_ns(hrtimer_get_remaining(timer));
>>> +}
>>> +
>>>  static inline int apic_lvt_nmi_mode(u32 lvt_val)
>>>  {
>>>         return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI;
>>> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
>>> index 891c6da..ee4da6c 100644
>>> --- a/arch/x86/kvm/lapic.h
>>> +++ b/arch/x86/kvm/lapic.h
>>> @@ -212,4 +212,5 @@ bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
>>>                         struct kvm_vcpu **dest_vcpu);
>>>  int kvm_vector_to_index(u32 vector, u32 dest_vcpus,
>>>                         const unsigned long *bitmap, u32 bitmap_size);
>>> +u64 apic_get_timer_expire(struct kvm_vcpu *vcpu);
>>>  #endif
>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>>> index a8c7ca3..9b5ad99 100644
>>> --- a/arch/x86/kvm/x86.c
>>> +++ b/arch/x86/kvm/x86.c
>>> @@ -7623,6 +7623,11 @@ bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu)
>>>  struct static_key kvm_no_apic_vcpu __read_mostly;
>>>  EXPORT_SYMBOL_GPL(kvm_no_apic_vcpu);
>>>
>>> +u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu)
>>> +{
>>> +       return apic_get_timer_expire(vcpu);
>>> +}
>>> +
>>>  int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
>>>  {
>>>         struct page *page;
>>> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
>>> index b1fa8f1..14d6c23 100644
>>> --- a/include/linux/kvm_host.h
>>> +++ b/include/linux/kvm_host.h
>>> @@ -663,6 +663,7 @@ int kvm_vcpu_yield_to(struct kvm_vcpu *target);
>>>  void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu);
>>>  void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
>>>  void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
>>> +u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu);
>>>
>>>  void kvm_flush_remote_tlbs(struct kvm *kvm);
>>>  void kvm_reload_remote_mmus(struct kvm *kvm);
>>> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
>>> index dd4ac9d..e4bb30b 100644
>>> --- a/virt/kvm/kvm_main.c
>>> +++ b/virt/kvm/kvm_main.c
>>> @@ -78,6 +78,9 @@ module_param(halt_poll_ns_grow, uint, S_IRUGO | S_IWUSR);
>>>  static unsigned int halt_poll_ns_shrink;
>>>  module_param(halt_poll_ns_shrink, uint, S_IRUGO | S_IWUSR);
>>>
>>> +/* lower-end of message passing workload latency TCP_RR's poll time < 10us */
>>> +static unsigned int halt_poll_ns_base = 10000;
>>> +
>>>  /*
>>>   * Ordering of locks:
>>>   *
>>> @@ -1966,7 +1969,7 @@ static void grow_halt_poll_ns(struct kvm_vcpu *vcpu)
>>>         grow = READ_ONCE(halt_poll_ns_grow);
>>>         /* 10us base */
>>>         if (val == 0 && grow)
>>> -               val = 10000;
>>> +               val = halt_poll_ns_base;
>>>         else
>>>                 val *= grow;
>>>
>>> @@ -2014,12 +2017,15 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
>>>         ktime_t start, cur;
>>>         DECLARE_SWAITQUEUE(wait);
>>>         bool waited = false;
>>> -       u64 block_ns;
>>> +       u64 block_ns, delta, remaining;
>>>
>>> +       remaining = kvm_arch_timer_remaining(vcpu);
>>>         start = cur = ktime_get();
>>> -       if (vcpu->halt_poll_ns) {
>>> -               ktime_t stop = ktime_add_ns(ktime_get(), vcpu->halt_poll_ns);
>>> +       if (vcpu->halt_poll_ns || remaining < halt_poll_ns_base) {
>>> +               ktime_t stop;
>>>
>>> +               delta = vcpu->halt_poll_ns ? vcpu->halt_poll_ns : remaining;
>>> +               stop = ktime_add_ns(ktime_get(), delta);
>>>                 ++vcpu->stat.halt_attempted_poll;
>>>                 do {
>>>                         /*
>>> --
>>> 1.9.1
>>>
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>


-- 
best regards
yang

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2] KVM: halt-polling: poll if emulated lapic timer will fire soon
  2016-05-20  2:04     ` Yang Zhang
@ 2016-05-20  5:53       ` Wanpeng Li
  2016-05-20 18:37       ` David Matlack
  1 sibling, 0 replies; 19+ messages in thread
From: Wanpeng Li @ 2016-05-20  5:53 UTC (permalink / raw)
  To: Yang Zhang
  Cc: David Matlack, linux-kernel, kvm list, Wanpeng Li, Paolo Bonzini,
	Radim Krčmář,
	Christian Borntraeger

2016-05-20 10:04 GMT+08:00 Yang Zhang <yang.zhang.wz@gmail.com>:
> On 2016/5/20 2:36, David Matlack wrote:
>>
>> On Thu, May 19, 2016 at 11:01 AM, David Matlack <dmatlack@google.com>
>> wrote:
>>>
>>> On Thu, May 19, 2016 at 6:27 AM, Wanpeng Li <kernellwp@gmail.com> wrote:
>>>>
>>>> From: Wanpeng Li <wanpeng.li@hotmail.com>
>>>>
>>>> If an emulated lapic timer will fire soon(in the scope of 10us the
>>>> base of dynamic halt-polling, lower-end of message passing workload
>>>> latency TCP_RR's poll time < 10us) we can treat it as a short halt,
>>>> and poll to wait it fire, the fire callback apic_timer_fn() will set
>>>> KVM_REQ_PENDING_TIMER, and this flag will be check during busy poll.
>>>> This can avoid context switch overhead and the latency which we wake
>>>> up vCPU.
>>>
>>>
>>> If I understand correctly, your patch aims to reduce the latency of
>>> (APIC Timer expires) -> (Guest resumes execution) using halt-polling.
>>> Let me know if I'm misunderstanding.
>>>
>>> In general, I don't think it makes sense to poll for timer interrupts.
>>> We know when the timer interrupt is going to arrive. If we care about
>>> the latency of delivering that interrupt to the guest, we should
>>> program the hrtimer to wake us up slightly early, and then deliver the
>>> virtual timer interrupt right on time (I think KVM's TSC Deadline
>>> Timer emulation already does this).
>>
>>
>> (It looks like the way to enable this feature is to set the module
>> parameter lapic_timer_advance_ns and make sure your guest is using the
>> TSC Deadline timer instead of the APIC Timer.)
>
>
> This feature is slightly different from current advance expiration way.
> Advance expiration rely on the VCPU is running(do polling before vmentry).
> But in some cases, the timer interrupt may be blocked by other thread(i.e.,
> IF bit is clear) and VCPU cannot be scheduled to run immediately. So even
> advance the timer early, VCPU may still see the latency. But polling is
> different, it ensures the VCPU to aware the timer expiration before schedule
> out.

Great explanation, Yang! I prefer to include this statement in my
patch description.

>
>>
>>> I'm curious to know if this scheme
>>> would give the same performance improvement to iperf as your patch.
>>>
>>> We discussed this a bit before on the mailing list before
>>> (https://lkml.org/lkml/2016/3/29/680). I'd like to see halt-polling
>>> and timer interrupts go in the opposite direction: if the next timer
>>> event (from any timer) is less than vcpu->halt_poll_ns, don't poll at
>>> all.
>>>
>>>>
>>>> iperf TCP get ~6% bandwidth improvement.
>>>
>>>
>>> Can you explain why your patch results in this bandwidth improvement?
>
>
> It should be reasonable. I have seen the same improvement with ctx switch
> benchmark: The latency is reduce from ~2600ns to ~2300ns with the similar
> mechanism.(The same idea but different implementation)

Good to know it. ;-)

Regards,
Wanpeng Li

>
>>>
>>>>
>>>> Cc: Paolo Bonzini <pbonzini@redhat.com>
>>>> Cc: Radim Krčmář <rkrcmar@redhat.com>
>>>> Cc: David Matlack <dmatlack@google.com>
>>>> Cc: Christian Borntraeger <borntraeger@de.ibm.com>
>>>> Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
>>>> ---
>>>> v1 -> v2:
>>>>  * add return statement to non-x86 archs
>>>>  * capture never expire case for x86 (hrtimer is not started)
>>>>
>>>>  arch/arm/include/asm/kvm_host.h     |  4 ++++
>>>>  arch/arm64/include/asm/kvm_host.h   |  4 ++++
>>>>  arch/mips/include/asm/kvm_host.h    |  4 ++++
>>>>  arch/powerpc/include/asm/kvm_host.h |  4 ++++
>>>>  arch/s390/include/asm/kvm_host.h    |  4 ++++
>>>>  arch/x86/kvm/lapic.c                | 11 +++++++++++
>>>>  arch/x86/kvm/lapic.h                |  1 +
>>>>  arch/x86/kvm/x86.c                  |  5 +++++
>>>>  include/linux/kvm_host.h            |  1 +
>>>>  virt/kvm/kvm_main.c                 | 14 ++++++++++----
>>>>  10 files changed, 48 insertions(+), 4 deletions(-)
>>>>
>>>> diff --git a/arch/arm/include/asm/kvm_host.h
>>>> b/arch/arm/include/asm/kvm_host.h
>>>> index 4cd8732..a5fd858 100644
>>>> --- a/arch/arm/include/asm/kvm_host.h
>>>> +++ b/arch/arm/include/asm/kvm_host.h
>>>> @@ -284,6 +284,10 @@ static inline void kvm_arch_sync_events(struct kvm
>>>> *kvm) {}
>>>>  static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
>>>>  static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
>>>>  static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
>>>> +static inline u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu)
>>>> +{
>>>> +       return -1ULL;
>>>> +}
>>>>
>>>>  static inline void kvm_arm_init_debug(void) {}
>>>>  static inline void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) {}
>>>> diff --git a/arch/arm64/include/asm/kvm_host.h
>>>> b/arch/arm64/include/asm/kvm_host.h
>>>> index d49399d..94e227a 100644
>>>> --- a/arch/arm64/include/asm/kvm_host.h
>>>> +++ b/arch/arm64/include/asm/kvm_host.h
>>>> @@ -359,6 +359,10 @@ static inline void kvm_arch_sync_events(struct kvm
>>>> *kvm) {}
>>>>  static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
>>>>  static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
>>>>  static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
>>>> +static inline u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu)
>>>> +{
>>>> +       return -1ULL;
>>>> +}
>>>>
>>>>  void kvm_arm_init_debug(void);
>>>>  void kvm_arm_setup_debug(struct kvm_vcpu *vcpu);
>>>> diff --git a/arch/mips/include/asm/kvm_host.h
>>>> b/arch/mips/include/asm/kvm_host.h
>>>> index 9a37a10..456bc42 100644
>>>> --- a/arch/mips/include/asm/kvm_host.h
>>>> +++ b/arch/mips/include/asm/kvm_host.h
>>>> @@ -813,6 +813,10 @@ static inline void kvm_arch_vcpu_uninit(struct
>>>> kvm_vcpu *vcpu) {}
>>>>  static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
>>>>  static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
>>>>  static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
>>>> +static inline u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu)
>>>> +{
>>>> +       return -1ULL;
>>>> +}
>>>>  static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
>>>>
>>>>  #endif /* __MIPS_KVM_HOST_H__ */
>>>> diff --git a/arch/powerpc/include/asm/kvm_host.h
>>>> b/arch/powerpc/include/asm/kvm_host.h
>>>> index ec35af3..5986c79 100644
>>>> --- a/arch/powerpc/include/asm/kvm_host.h
>>>> +++ b/arch/powerpc/include/asm/kvm_host.h
>>>> @@ -729,5 +729,9 @@ static inline void kvm_arch_exit(void) {}
>>>>  static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
>>>>  static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
>>>>  static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
>>>> +static inline u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu)
>>>> +{
>>>> +       return -1ULL;
>>>> +}
>>>>
>>>>  #endif /* __POWERPC_KVM_HOST_H__ */
>>>> diff --git a/arch/s390/include/asm/kvm_host.h
>>>> b/arch/s390/include/asm/kvm_host.h
>>>> index 37b9017..bdb01a1 100644
>>>> --- a/arch/s390/include/asm/kvm_host.h
>>>> +++ b/arch/s390/include/asm/kvm_host.h
>>>> @@ -696,6 +696,10 @@ static inline void
>>>> kvm_arch_flush_shadow_memslot(struct kvm *kvm,
>>>>                 struct kvm_memory_slot *slot) {}
>>>>  static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
>>>>  static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
>>>> +static inline u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu)
>>>> +{
>>>> +       return -1ULL;
>>>> +}
>>>>
>>>>  void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu);
>>>>
>>>> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
>>>> index bbb5b28..cfeeac3 100644
>>>> --- a/arch/x86/kvm/lapic.c
>>>> +++ b/arch/x86/kvm/lapic.c
>>>> @@ -256,6 +256,17 @@ static inline int apic_lvtt_tscdeadline(struct
>>>> kvm_lapic *apic)
>>>>         return apic->lapic_timer.timer_mode ==
>>>> APIC_LVT_TIMER_TSCDEADLINE;
>>>>  }
>>>>
>>>> +u64 apic_get_timer_expire(struct kvm_vcpu *vcpu)
>>>> +{
>>>> +       struct kvm_lapic *apic = vcpu->arch.apic;
>>>> +       struct hrtimer *timer = &apic->lapic_timer.timer;
>>>> +
>>>> +       if (!hrtimer_active(timer))
>>>> +               return -1ULL;
>>>> +       else
>>>> +               return ktime_to_ns(hrtimer_get_remaining(timer));
>>>> +}
>>>> +
>>>>  static inline int apic_lvt_nmi_mode(u32 lvt_val)
>>>>  {
>>>>         return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) ==
>>>> APIC_DM_NMI;
>>>> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
>>>> index 891c6da..ee4da6c 100644
>>>> --- a/arch/x86/kvm/lapic.h
>>>> +++ b/arch/x86/kvm/lapic.h
>>>> @@ -212,4 +212,5 @@ bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm,
>>>> struct kvm_lapic_irq *irq,
>>>>                         struct kvm_vcpu **dest_vcpu);
>>>>  int kvm_vector_to_index(u32 vector, u32 dest_vcpus,
>>>>                         const unsigned long *bitmap, u32 bitmap_size);
>>>> +u64 apic_get_timer_expire(struct kvm_vcpu *vcpu);
>>>>  #endif
>>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>>>> index a8c7ca3..9b5ad99 100644
>>>> --- a/arch/x86/kvm/x86.c
>>>> +++ b/arch/x86/kvm/x86.c
>>>> @@ -7623,6 +7623,11 @@ bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu)
>>>>  struct static_key kvm_no_apic_vcpu __read_mostly;
>>>>  EXPORT_SYMBOL_GPL(kvm_no_apic_vcpu);
>>>>
>>>> +u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu)
>>>> +{
>>>> +       return apic_get_timer_expire(vcpu);
>>>> +}
>>>> +
>>>>  int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
>>>>  {
>>>>         struct page *page;
>>>> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
>>>> index b1fa8f1..14d6c23 100644
>>>> --- a/include/linux/kvm_host.h
>>>> +++ b/include/linux/kvm_host.h
>>>> @@ -663,6 +663,7 @@ int kvm_vcpu_yield_to(struct kvm_vcpu *target);
>>>>  void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu);
>>>>  void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
>>>>  void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
>>>> +u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu);
>>>>
>>>>  void kvm_flush_remote_tlbs(struct kvm *kvm);
>>>>  void kvm_reload_remote_mmus(struct kvm *kvm);
>>>> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
>>>> index dd4ac9d..e4bb30b 100644
>>>> --- a/virt/kvm/kvm_main.c
>>>> +++ b/virt/kvm/kvm_main.c
>>>> @@ -78,6 +78,9 @@ module_param(halt_poll_ns_grow, uint, S_IRUGO |
>>>> S_IWUSR);
>>>>  static unsigned int halt_poll_ns_shrink;
>>>>  module_param(halt_poll_ns_shrink, uint, S_IRUGO | S_IWUSR);
>>>>
>>>> +/* lower-end of message passing workload latency TCP_RR's poll time <
>>>> 10us */
>>>> +static unsigned int halt_poll_ns_base = 10000;
>>>> +
>>>>  /*
>>>>   * Ordering of locks:
>>>>   *
>>>> @@ -1966,7 +1969,7 @@ static void grow_halt_poll_ns(struct kvm_vcpu
>>>> *vcpu)
>>>>         grow = READ_ONCE(halt_poll_ns_grow);
>>>>         /* 10us base */
>>>>         if (val == 0 && grow)
>>>> -               val = 10000;
>>>> +               val = halt_poll_ns_base;
>>>>         else
>>>>                 val *= grow;
>>>>
>>>> @@ -2014,12 +2017,15 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
>>>>         ktime_t start, cur;
>>>>         DECLARE_SWAITQUEUE(wait);
>>>>         bool waited = false;
>>>> -       u64 block_ns;
>>>> +       u64 block_ns, delta, remaining;
>>>>
>>>> +       remaining = kvm_arch_timer_remaining(vcpu);
>>>>         start = cur = ktime_get();
>>>> -       if (vcpu->halt_poll_ns) {
>>>> -               ktime_t stop = ktime_add_ns(ktime_get(),
>>>> vcpu->halt_poll_ns);
>>>> +       if (vcpu->halt_poll_ns || remaining < halt_poll_ns_base) {
>>>> +               ktime_t stop;
>>>>
>>>> +               delta = vcpu->halt_poll_ns ? vcpu->halt_poll_ns :
>>>> remaining;
>>>> +               stop = ktime_add_ns(ktime_get(), delta);
>>>>                 ++vcpu->stat.halt_attempted_poll;
>>>>                 do {
>>>>                         /*
>>>> --
>>>> 1.9.1
>>>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe kvm" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>
>
>
> --
> best regards
> yang



-- 
Regards,
Wanpeng Li

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2] KVM: halt-polling: poll if emulated lapic timer will fire soon
  2016-05-20  2:04     ` Yang Zhang
  2016-05-20  5:53       ` Wanpeng Li
@ 2016-05-20 18:37       ` David Matlack
  2016-05-23  1:26         ` Yang Zhang
  1 sibling, 1 reply; 19+ messages in thread
From: David Matlack @ 2016-05-20 18:37 UTC (permalink / raw)
  To: Yang Zhang
  Cc: Wanpeng Li, linux-kernel, kvm list, Wanpeng Li, Paolo Bonzini,
	Radim Krčmář,
	Christian Borntraeger

On Thu, May 19, 2016 at 7:04 PM, Yang Zhang <yang.zhang.wz@gmail.com> wrote:
> On 2016/5/20 2:36, David Matlack wrote:
>>
>> On Thu, May 19, 2016 at 11:01 AM, David Matlack <dmatlack@google.com>
>> wrote:
>>>
>>> On Thu, May 19, 2016 at 6:27 AM, Wanpeng Li <kernellwp@gmail.com> wrote:
>>>>
>>>> From: Wanpeng Li <wanpeng.li@hotmail.com>
>>>>
>>>> If an emulated lapic timer will fire soon(in the scope of 10us the
>>>> base of dynamic halt-polling, lower-end of message passing workload
>>>> latency TCP_RR's poll time < 10us) we can treat it as a short halt,
>>>> and poll to wait it fire, the fire callback apic_timer_fn() will set
>>>> KVM_REQ_PENDING_TIMER, and this flag will be check during busy poll.
>>>> This can avoid context switch overhead and the latency which we wake
>>>> up vCPU.
>>>
>>>
>>> If I understand correctly, your patch aims to reduce the latency of
>>> (APIC Timer expires) -> (Guest resumes execution) using halt-polling.
>>> Let me know if I'm misunderstanding.
>>>
>>> In general, I don't think it makes sense to poll for timer interrupts.
>>> We know when the timer interrupt is going to arrive. If we care about
>>> the latency of delivering that interrupt to the guest, we should
>>> program the hrtimer to wake us up slightly early, and then deliver the
>>> virtual timer interrupt right on time (I think KVM's TSC Deadline
>>> Timer emulation already does this).
>>
>>
>> (It looks like the way to enable this feature is to set the module
>> parameter lapic_timer_advance_ns and make sure your guest is using the
>> TSC Deadline timer instead of the APIC Timer.)
>
>
> This feature is slightly different from current advance expiration way.
> Advance expiration rely on the VCPU is running(do polling before vmentry).
> But in some cases, the timer interrupt may be blocked by other thread(i.e.,
> IF bit is clear) and VCPU cannot be scheduled to run immediately. So even
> advance the timer early, VCPU may still see the latency. But polling is
> different, it ensures the VCPU to aware the timer expiration before schedule
> out.
>
>>
>>> I'm curious to know if this scheme
>>> would give the same performance improvement to iperf as your patch.
>>>
>>> We discussed this a bit before on the mailing list before
>>> (https://lkml.org/lkml/2016/3/29/680). I'd like to see halt-polling
>>> and timer interrupts go in the opposite direction: if the next timer
>>> event (from any timer) is less than vcpu->halt_poll_ns, don't poll at
>>> all.
>>>
>>>>
>>>> iperf TCP get ~6% bandwidth improvement.
>>>
>>>
>>> Can you explain why your patch results in this bandwidth improvement?
>
>
> It should be reasonable. I have seen the same improvement with ctx switch
> benchmark: The latency is reduce from ~2600ns to ~2300ns with the similar
> mechanism.(The same idea but different implementation)

It's not obvious to me why polling for a timer interrupt would improve
context switch latency. Can you explain a bit more?

>
>>>
>>>>
>>>> Cc: Paolo Bonzini <pbonzini@redhat.com>
>>>> Cc: Radim Krčmář <rkrcmar@redhat.com>
>>>> Cc: David Matlack <dmatlack@google.com>
>>>> Cc: Christian Borntraeger <borntraeger@de.ibm.com>
>>>> Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
>>>> ---
>>>> v1 -> v2:
>>>>  * add return statement to non-x86 archs
>>>>  * capture never expire case for x86 (hrtimer is not started)
>>>>
>>>>  arch/arm/include/asm/kvm_host.h     |  4 ++++
>>>>  arch/arm64/include/asm/kvm_host.h   |  4 ++++
>>>>  arch/mips/include/asm/kvm_host.h    |  4 ++++
>>>>  arch/powerpc/include/asm/kvm_host.h |  4 ++++
>>>>  arch/s390/include/asm/kvm_host.h    |  4 ++++
>>>>  arch/x86/kvm/lapic.c                | 11 +++++++++++
>>>>  arch/x86/kvm/lapic.h                |  1 +
>>>>  arch/x86/kvm/x86.c                  |  5 +++++
>>>>  include/linux/kvm_host.h            |  1 +
>>>>  virt/kvm/kvm_main.c                 | 14 ++++++++++----
>>>>  10 files changed, 48 insertions(+), 4 deletions(-)
>>>>
>>>> diff --git a/arch/arm/include/asm/kvm_host.h
>>>> b/arch/arm/include/asm/kvm_host.h
>>>> index 4cd8732..a5fd858 100644
>>>> --- a/arch/arm/include/asm/kvm_host.h
>>>> +++ b/arch/arm/include/asm/kvm_host.h
>>>> @@ -284,6 +284,10 @@ static inline void kvm_arch_sync_events(struct kvm
>>>> *kvm) {}
>>>>  static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
>>>>  static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
>>>>  static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
>>>> +static inline u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu)
>>>> +{
>>>> +       return -1ULL;
>>>> +}
>>>>
>>>>  static inline void kvm_arm_init_debug(void) {}
>>>>  static inline void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) {}
>>>> diff --git a/arch/arm64/include/asm/kvm_host.h
>>>> b/arch/arm64/include/asm/kvm_host.h
>>>> index d49399d..94e227a 100644
>>>> --- a/arch/arm64/include/asm/kvm_host.h
>>>> +++ b/arch/arm64/include/asm/kvm_host.h
>>>> @@ -359,6 +359,10 @@ static inline void kvm_arch_sync_events(struct kvm
>>>> *kvm) {}
>>>>  static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
>>>>  static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
>>>>  static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
>>>> +static inline u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu)
>>>> +{
>>>> +       return -1ULL;
>>>> +}
>>>>
>>>>  void kvm_arm_init_debug(void);
>>>>  void kvm_arm_setup_debug(struct kvm_vcpu *vcpu);
>>>> diff --git a/arch/mips/include/asm/kvm_host.h
>>>> b/arch/mips/include/asm/kvm_host.h
>>>> index 9a37a10..456bc42 100644
>>>> --- a/arch/mips/include/asm/kvm_host.h
>>>> +++ b/arch/mips/include/asm/kvm_host.h
>>>> @@ -813,6 +813,10 @@ static inline void kvm_arch_vcpu_uninit(struct
>>>> kvm_vcpu *vcpu) {}
>>>>  static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
>>>>  static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
>>>>  static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
>>>> +static inline u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu)
>>>> +{
>>>> +       return -1ULL;
>>>> +}
>>>>  static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
>>>>
>>>>  #endif /* __MIPS_KVM_HOST_H__ */
>>>> diff --git a/arch/powerpc/include/asm/kvm_host.h
>>>> b/arch/powerpc/include/asm/kvm_host.h
>>>> index ec35af3..5986c79 100644
>>>> --- a/arch/powerpc/include/asm/kvm_host.h
>>>> +++ b/arch/powerpc/include/asm/kvm_host.h
>>>> @@ -729,5 +729,9 @@ static inline void kvm_arch_exit(void) {}
>>>>  static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
>>>>  static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
>>>>  static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
>>>> +static inline u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu)
>>>> +{
>>>> +       return -1ULL;
>>>> +}
>>>>
>>>>  #endif /* __POWERPC_KVM_HOST_H__ */
>>>> diff --git a/arch/s390/include/asm/kvm_host.h
>>>> b/arch/s390/include/asm/kvm_host.h
>>>> index 37b9017..bdb01a1 100644
>>>> --- a/arch/s390/include/asm/kvm_host.h
>>>> +++ b/arch/s390/include/asm/kvm_host.h
>>>> @@ -696,6 +696,10 @@ static inline void
>>>> kvm_arch_flush_shadow_memslot(struct kvm *kvm,
>>>>                 struct kvm_memory_slot *slot) {}
>>>>  static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
>>>>  static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
>>>> +static inline u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu)
>>>> +{
>>>> +       return -1ULL;
>>>> +}
>>>>
>>>>  void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu);
>>>>
>>>> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
>>>> index bbb5b28..cfeeac3 100644
>>>> --- a/arch/x86/kvm/lapic.c
>>>> +++ b/arch/x86/kvm/lapic.c
>>>> @@ -256,6 +256,17 @@ static inline int apic_lvtt_tscdeadline(struct
>>>> kvm_lapic *apic)
>>>>         return apic->lapic_timer.timer_mode ==
>>>> APIC_LVT_TIMER_TSCDEADLINE;
>>>>  }
>>>>
>>>> +u64 apic_get_timer_expire(struct kvm_vcpu *vcpu)
>>>> +{
>>>> +       struct kvm_lapic *apic = vcpu->arch.apic;
>>>> +       struct hrtimer *timer = &apic->lapic_timer.timer;
>>>> +
>>>> +       if (!hrtimer_active(timer))
>>>> +               return -1ULL;
>>>> +       else
>>>> +               return ktime_to_ns(hrtimer_get_remaining(timer));
>>>> +}
>>>> +
>>>>  static inline int apic_lvt_nmi_mode(u32 lvt_val)
>>>>  {
>>>>         return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) ==
>>>> APIC_DM_NMI;
>>>> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
>>>> index 891c6da..ee4da6c 100644
>>>> --- a/arch/x86/kvm/lapic.h
>>>> +++ b/arch/x86/kvm/lapic.h
>>>> @@ -212,4 +212,5 @@ bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm,
>>>> struct kvm_lapic_irq *irq,
>>>>                         struct kvm_vcpu **dest_vcpu);
>>>>  int kvm_vector_to_index(u32 vector, u32 dest_vcpus,
>>>>                         const unsigned long *bitmap, u32 bitmap_size);
>>>> +u64 apic_get_timer_expire(struct kvm_vcpu *vcpu);
>>>>  #endif
>>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>>>> index a8c7ca3..9b5ad99 100644
>>>> --- a/arch/x86/kvm/x86.c
>>>> +++ b/arch/x86/kvm/x86.c
>>>> @@ -7623,6 +7623,11 @@ bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu)
>>>>  struct static_key kvm_no_apic_vcpu __read_mostly;
>>>>  EXPORT_SYMBOL_GPL(kvm_no_apic_vcpu);
>>>>
>>>> +u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu)
>>>> +{
>>>> +       return apic_get_timer_expire(vcpu);
>>>> +}
>>>> +
>>>>  int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
>>>>  {
>>>>         struct page *page;
>>>> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
>>>> index b1fa8f1..14d6c23 100644
>>>> --- a/include/linux/kvm_host.h
>>>> +++ b/include/linux/kvm_host.h
>>>> @@ -663,6 +663,7 @@ int kvm_vcpu_yield_to(struct kvm_vcpu *target);
>>>>  void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu);
>>>>  void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
>>>>  void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
>>>> +u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu);
>>>>
>>>>  void kvm_flush_remote_tlbs(struct kvm *kvm);
>>>>  void kvm_reload_remote_mmus(struct kvm *kvm);
>>>> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
>>>> index dd4ac9d..e4bb30b 100644
>>>> --- a/virt/kvm/kvm_main.c
>>>> +++ b/virt/kvm/kvm_main.c
>>>> @@ -78,6 +78,9 @@ module_param(halt_poll_ns_grow, uint, S_IRUGO |
>>>> S_IWUSR);
>>>>  static unsigned int halt_poll_ns_shrink;
>>>>  module_param(halt_poll_ns_shrink, uint, S_IRUGO | S_IWUSR);
>>>>
>>>> +/* lower-end of message passing workload latency TCP_RR's poll time <
>>>> 10us */
>>>> +static unsigned int halt_poll_ns_base = 10000;
>>>> +
>>>>  /*
>>>>   * Ordering of locks:
>>>>   *
>>>> @@ -1966,7 +1969,7 @@ static void grow_halt_poll_ns(struct kvm_vcpu
>>>> *vcpu)
>>>>         grow = READ_ONCE(halt_poll_ns_grow);
>>>>         /* 10us base */
>>>>         if (val == 0 && grow)
>>>> -               val = 10000;
>>>> +               val = halt_poll_ns_base;
>>>>         else
>>>>                 val *= grow;
>>>>
>>>> @@ -2014,12 +2017,15 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
>>>>         ktime_t start, cur;
>>>>         DECLARE_SWAITQUEUE(wait);
>>>>         bool waited = false;
>>>> -       u64 block_ns;
>>>> +       u64 block_ns, delta, remaining;
>>>>
>>>> +       remaining = kvm_arch_timer_remaining(vcpu);
>>>>         start = cur = ktime_get();
>>>> -       if (vcpu->halt_poll_ns) {
>>>> -               ktime_t stop = ktime_add_ns(ktime_get(),
>>>> vcpu->halt_poll_ns);
>>>> +       if (vcpu->halt_poll_ns || remaining < halt_poll_ns_base) {
>>>> +               ktime_t stop;
>>>>
>>>> +               delta = vcpu->halt_poll_ns ? vcpu->halt_poll_ns :
>>>> remaining;
>>>> +               stop = ktime_add_ns(ktime_get(), delta);
>>>>                 ++vcpu->stat.halt_attempted_poll;
>>>>                 do {
>>>>                         /*
>>>> --
>>>> 1.9.1
>>>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe kvm" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>
>
>
> --
> best regards
> yang

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2] KVM: halt-polling: poll if emulated lapic timer will fire soon
  2016-05-20 18:37       ` David Matlack
@ 2016-05-23  1:26         ` Yang Zhang
  2016-05-23 18:04           ` David Matlack
  0 siblings, 1 reply; 19+ messages in thread
From: Yang Zhang @ 2016-05-23  1:26 UTC (permalink / raw)
  To: David Matlack
  Cc: Wanpeng Li, linux-kernel, kvm list, Wanpeng Li, Paolo Bonzini,
	Radim Krčmář,
	Christian Borntraeger

On 2016/5/21 2:37, David Matlack wrote:
> On Thu, May 19, 2016 at 7:04 PM, Yang Zhang <yang.zhang.wz@gmail.com> wrote:
>> On 2016/5/20 2:36, David Matlack wrote:
>>>
>>> On Thu, May 19, 2016 at 11:01 AM, David Matlack <dmatlack@google.com>
>>> wrote:
>>>>
>>>> On Thu, May 19, 2016 at 6:27 AM, Wanpeng Li <kernellwp@gmail.com> wrote:
>>>>>
>>>>> From: Wanpeng Li <wanpeng.li@hotmail.com>
>>>>>
>>>>> If an emulated lapic timer will fire soon(in the scope of 10us the
>>>>> base of dynamic halt-polling, lower-end of message passing workload
>>>>> latency TCP_RR's poll time < 10us) we can treat it as a short halt,
>>>>> and poll to wait it fire, the fire callback apic_timer_fn() will set
>>>>> KVM_REQ_PENDING_TIMER, and this flag will be check during busy poll.
>>>>> This can avoid context switch overhead and the latency which we wake
>>>>> up vCPU.
>>>>
>>>>
>>>> If I understand correctly, your patch aims to reduce the latency of
>>>> (APIC Timer expires) -> (Guest resumes execution) using halt-polling.
>>>> Let me know if I'm misunderstanding.
>>>>
>>>> In general, I don't think it makes sense to poll for timer interrupts.
>>>> We know when the timer interrupt is going to arrive. If we care about
>>>> the latency of delivering that interrupt to the guest, we should
>>>> program the hrtimer to wake us up slightly early, and then deliver the
>>>> virtual timer interrupt right on time (I think KVM's TSC Deadline
>>>> Timer emulation already does this).
>>>
>>>
>>> (It looks like the way to enable this feature is to set the module
>>> parameter lapic_timer_advance_ns and make sure your guest is using the
>>> TSC Deadline timer instead of the APIC Timer.)
>>
>>
>> This feature is slightly different from current advance expiration way.
>> Advance expiration rely on the VCPU is running(do polling before vmentry).
>> But in some cases, the timer interrupt may be blocked by other thread(i.e.,
>> IF bit is clear) and VCPU cannot be scheduled to run immediately. So even
>> advance the timer early, VCPU may still see the latency. But polling is
>> different, it ensures the VCPU to aware the timer expiration before schedule
>> out.
>>
>>>
>>>> I'm curious to know if this scheme
>>>> would give the same performance improvement to iperf as your patch.
>>>>
>>>> We discussed this a bit before on the mailing list before
>>>> (https://lkml.org/lkml/2016/3/29/680). I'd like to see halt-polling
>>>> and timer interrupts go in the opposite direction: if the next timer
>>>> event (from any timer) is less than vcpu->halt_poll_ns, don't poll at
>>>> all.
>>>>
>>>>>
>>>>> iperf TCP get ~6% bandwidth improvement.
>>>>
>>>>
>>>> Can you explain why your patch results in this bandwidth improvement?
>>
>>
>> It should be reasonable. I have seen the same improvement with ctx switch
>> benchmark: The latency is reduce from ~2600ns to ~2300ns with the similar
>> mechanism.(The same idea but different implementation)
>
> It's not obvious to me why polling for a timer interrupt would improve
> context switch latency. Can you explain a bit more?

We have a workload which using high resolution timer(less than 1ms) 
inside guest. It rely on the timer to wakeup itself. Sometimes the timer 
is expected to fired just after the VCPU is blocked due to execute halt 
instruction. But the thread who is running in the CPU will turn off the 
hardware interrupt for long time due to disk access. This will cause the 
timer interrupt been blocked until the interrupt is re-open.
For optimization, we let VCPU to poll for a while if the next timer will 
arrive soon before schedule out. And the result shows good when running 
several workloads inside guest.

-- 
best regards
yang

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2] KVM: halt-polling: poll if emulated lapic timer will fire soon
  2016-05-23  1:26         ` Yang Zhang
@ 2016-05-23 18:04           ` David Matlack
  2016-05-24  1:13             ` Yang Zhang
  0 siblings, 1 reply; 19+ messages in thread
From: David Matlack @ 2016-05-23 18:04 UTC (permalink / raw)
  To: Yang Zhang
  Cc: Wanpeng Li, linux-kernel, kvm list, Wanpeng Li, Paolo Bonzini,
	Radim Krčmář,
	Christian Borntraeger

On Sun, May 22, 2016 at 6:26 PM, Yang Zhang <yang.zhang.wz@gmail.com> wrote:
> On 2016/5/21 2:37, David Matlack wrote:
>>
>> It's not obvious to me why polling for a timer interrupt would improve
>> context switch latency. Can you explain a bit more?
>
>
> We have a workload which using high resolution timer(less than 1ms) inside
> guest. It rely on the timer to wakeup itself. Sometimes the timer is
> expected to fired just after the VCPU is blocked due to execute halt
> instruction. But the thread who is running in the CPU will turn off the
> hardware interrupt for long time due to disk access. This will cause the
> timer interrupt been blocked until the interrupt is re-open.

Does this happen on the idle thread (swapper)? If not, halt-polling
may not help; it only polls if there are no other runnable threads.

> For optimization, we let VCPU to poll for a while if the next timer will
> arrive soon before schedule out. And the result shows good when running
> several workloads inside guest.

Thanks for the explanation, I appreciate it.

>
> --
> best regards
> yang

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2] KVM: halt-polling: poll if emulated lapic timer will fire soon
  2016-05-23 18:04           ` David Matlack
@ 2016-05-24  1:13             ` Yang Zhang
  2016-05-24  1:16               ` David Matlack
  0 siblings, 1 reply; 19+ messages in thread
From: Yang Zhang @ 2016-05-24  1:13 UTC (permalink / raw)
  To: David Matlack
  Cc: Wanpeng Li, linux-kernel, kvm list, Wanpeng Li, Paolo Bonzini,
	Radim Krčmář,
	Christian Borntraeger

On 2016/5/24 2:04, David Matlack wrote:
> On Sun, May 22, 2016 at 6:26 PM, Yang Zhang <yang.zhang.wz@gmail.com> wrote:
>> On 2016/5/21 2:37, David Matlack wrote:
>>>
>>> It's not obvious to me why polling for a timer interrupt would improve
>>> context switch latency. Can you explain a bit more?
>>
>>
>> We have a workload which using high resolution timer(less than 1ms) inside
>> guest. It rely on the timer to wakeup itself. Sometimes the timer is
>> expected to fired just after the VCPU is blocked due to execute halt
>> instruction. But the thread who is running in the CPU will turn off the
>> hardware interrupt for long time due to disk access. This will cause the
>> timer interrupt been blocked until the interrupt is re-open.
>
> Does this happen on the idle thread (swapper)? If not, halt-polling
> may not help; it only polls if there are no other runnable threads.

Yes, there is no runnable task inside guest.

>
>> For optimization, we let VCPU to poll for a while if the next timer will
>> arrive soon before schedule out. And the result shows good when running
>> several workloads inside guest.
>
> Thanks for the explanation, I appreciate it.
>
>>
>> --
>> best regards
>> yang


-- 
best regards
yang

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2] KVM: halt-polling: poll if emulated lapic timer will fire soon
  2016-05-24  1:13             ` Yang Zhang
@ 2016-05-24  1:16               ` David Matlack
  2016-05-24  2:55                 ` Yang Zhang
  0 siblings, 1 reply; 19+ messages in thread
From: David Matlack @ 2016-05-24  1:16 UTC (permalink / raw)
  To: Yang Zhang
  Cc: Wanpeng Li, linux-kernel, kvm list, Wanpeng Li, Paolo Bonzini,
	Radim Krčmář,
	Christian Borntraeger

On Mon, May 23, 2016 at 6:13 PM, Yang Zhang <yang.zhang.wz@gmail.com> wrote:
> On 2016/5/24 2:04, David Matlack wrote:
>>
>> On Sun, May 22, 2016 at 6:26 PM, Yang Zhang <yang.zhang.wz@gmail.com>
>> wrote:
>>>
>>> On 2016/5/21 2:37, David Matlack wrote:
>>>>
>>>>
>>>> It's not obvious to me why polling for a timer interrupt would improve
>>>> context switch latency. Can you explain a bit more?
>>>
>>>
>>>
>>> We have a workload which using high resolution timer(less than 1ms)
>>> inside
>>> guest. It rely on the timer to wakeup itself. Sometimes the timer is
>>> expected to fired just after the VCPU is blocked due to execute halt
>>> instruction. But the thread who is running in the CPU will turn off the
>>> hardware interrupt for long time due to disk access. This will cause the
>>> timer interrupt been blocked until the interrupt is re-open.
>>
>>
>> Does this happen on the idle thread (swapper)? If not, halt-polling
>> may not help; it only polls if there are no other runnable threads.
>
>
> Yes, there is no runnable task inside guest.

Sorry for the confusion, my question was about the host, not the
guest. Halt-polling only polls if there are no other runnable threads
on the host CPU (see the check for single_task_running() in
kvm_vcpu_block()).

>
>
>>
>>> For optimization, we let VCPU to poll for a while if the next timer will
>>> arrive soon before schedule out. And the result shows good when running
>>> several workloads inside guest.
>>
>>
>> Thanks for the explanation, I appreciate it.
>>
>>>
>>> --
>>> best regards
>>> yang
>
>
>
> --
> best regards
> yang

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2] KVM: halt-polling: poll if emulated lapic timer will fire soon
  2016-05-19 13:57 ` Paolo Bonzini
  2016-05-19 14:52   ` Christian Borntraeger
@ 2016-05-24  2:48   ` Wanpeng Li
  1 sibling, 0 replies; 19+ messages in thread
From: Wanpeng Li @ 2016-05-24  2:48 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: linux-kernel, kvm, Wanpeng Li, Radim Krčmář,
	David Matlack, Christian Borntraeger

2016-05-19 21:57 GMT+08:00 Paolo Bonzini <pbonzini@redhat.com>:
>
>
> On 19/05/2016 15:27, Wanpeng Li wrote:
>> From: Wanpeng Li <wanpeng.li@hotmail.com>
>>
>> If an emulated lapic timer will fire soon(in the scope of 10us the
>> base of dynamic halt-polling, lower-end of message passing workload
>> latency TCP_RR's poll time < 10us) we can treat it as a short halt,
>> and poll to wait it fire, the fire callback apic_timer_fn() will set
>> KVM_REQ_PENDING_TIMER, and this flag will be check during busy poll.
>> This can avoid context switch overhead and the latency which we wake
>> up vCPU.
>
> Would this work too and be simpler?
>
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 4fd482fb9260..8d42f5304d94 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -1964,16 +1964,12 @@ static void grow_halt_poll_ns(struct kvm_vcpu *vcpu)
>
>         old = val = vcpu->halt_poll_ns;
>         grow = READ_ONCE(halt_poll_ns_grow);
> -       /* 10us base */
> -       if (val == 0 && grow)
> -               val = 10000;
> -       else
> -               val *= grow;
> +       val *= grow;
>
>         if (val > halt_poll_ns)
>                 val = halt_poll_ns;
>
> -       vcpu->halt_poll_ns = val;
> +       vcpu->halt_poll_ns = max(10000u, val);
>         trace_kvm_halt_poll_ns_grow(vcpu->vcpu_id, val, old);
>  }
>
> @@ -1988,7 +1984,7 @@ static void shrink_halt_poll_ns(struct kvm_vcpu *vcpu)
>         else
>                 val /= shrink;
>
> -       vcpu->halt_poll_ns = val;
> +       vcpu->halt_poll_ns = max(10000u, val);
>         trace_kvm_halt_poll_ns_shrink(vcpu->vcpu_id, val, old);
>  }
>
>
> (Plus moving 10000 into a module parameter?)  Can you measure higher CPU
> utilization than with your patch?  David, what do you think?

Add ~6% more time in C0 above mine.

Regards,
Wanpeng Li

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2] KVM: halt-polling: poll if emulated lapic timer will fire soon
  2016-05-24  1:16               ` David Matlack
@ 2016-05-24  2:55                 ` Yang Zhang
  0 siblings, 0 replies; 19+ messages in thread
From: Yang Zhang @ 2016-05-24  2:55 UTC (permalink / raw)
  To: David Matlack
  Cc: Wanpeng Li, linux-kernel, kvm list, Wanpeng Li, Paolo Bonzini,
	Radim Krčmář,
	Christian Borntraeger

On 2016/5/24 9:16, David Matlack wrote:
> On Mon, May 23, 2016 at 6:13 PM, Yang Zhang <yang.zhang.wz@gmail.com> wrote:
>> On 2016/5/24 2:04, David Matlack wrote:
>>>
>>> On Sun, May 22, 2016 at 6:26 PM, Yang Zhang <yang.zhang.wz@gmail.com>
>>> wrote:
>>>>
>>>> On 2016/5/21 2:37, David Matlack wrote:
>>>>>
>>>>>
>>>>> It's not obvious to me why polling for a timer interrupt would improve
>>>>> context switch latency. Can you explain a bit more?
>>>>
>>>>
>>>>
>>>> We have a workload which using high resolution timer(less than 1ms)
>>>> inside
>>>> guest. It rely on the timer to wakeup itself. Sometimes the timer is
>>>> expected to fired just after the VCPU is blocked due to execute halt
>>>> instruction. But the thread who is running in the CPU will turn off the
>>>> hardware interrupt for long time due to disk access. This will cause the
>>>> timer interrupt been blocked until the interrupt is re-open.
>>>
>>>
>>> Does this happen on the idle thread (swapper)? If not, halt-polling
>>> may not help; it only polls if there are no other runnable threads.
>>
>>
>> Yes, there is no runnable task inside guest.
>
> Sorry for the confusion, my question was about the host, not the
> guest. Halt-polling only polls if there are no other runnable threads
> on the host CPU (see the check for single_task_running() in
> kvm_vcpu_block()).

ok, i see your point. I didn't check it before. But i have observed that 
the host CPU may wake up immediately after entering the power idle. Then 
another task takes the CPU until the schedule signal arrived. And this 
will cause timer injection delay by several ms.

>
>>
>>
>>>
>>>> For optimization, we let VCPU to poll for a while if the next timer will
>>>> arrive soon before schedule out. And the result shows good when running
>>>> several workloads inside guest.
>>>
>>>
>>> Thanks for the explanation, I appreciate it.
>>>
>>>>
>>>> --
>>>> best regards
>>>> yang
>>
>>
>>
>> --
>> best regards
>> yang


-- 
best regards
yang

^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2016-05-24  2:55 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-05-19 13:27 [PATCH v2] KVM: halt-polling: poll if emulated lapic timer will fire soon Wanpeng Li
2016-05-19 13:46 ` Christian Borntraeger
2016-05-19 13:57 ` Paolo Bonzini
2016-05-19 14:52   ` Christian Borntraeger
2016-05-19 14:56     ` Paolo Bonzini
2016-05-19 15:03       ` Christian Borntraeger
2016-05-19 15:06         ` Paolo Bonzini
2016-05-19 15:42           ` Christian Borntraeger
2016-05-24  2:48   ` Wanpeng Li
2016-05-19 18:01 ` David Matlack
2016-05-19 18:36   ` David Matlack
2016-05-20  2:04     ` Yang Zhang
2016-05-20  5:53       ` Wanpeng Li
2016-05-20 18:37       ` David Matlack
2016-05-23  1:26         ` Yang Zhang
2016-05-23 18:04           ` David Matlack
2016-05-24  1:13             ` Yang Zhang
2016-05-24  1:16               ` David Matlack
2016-05-24  2:55                 ` Yang Zhang

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.