linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v3] KVM: halt-polling: poll if emulated lapic timer will fire soon
@ 2016-05-23  0:42 Wanpeng Li
  2016-05-23 18:01 ` David Matlack
  2016-05-27  6:40 ` David Hildenbrand
  0 siblings, 2 replies; 10+ messages in thread
From: Wanpeng Li @ 2016-05-23  0:42 UTC (permalink / raw)
  To: linux-kernel, kvm
  Cc: Wanpeng Li, Paolo Bonzini, Radim Krčmář,
	David Matlack, Christian Borntraeger, Yang Zhang

From: Wanpeng Li <wanpeng.li@hotmail.com>

If an emulated lapic timer will fire soon(in the scope of 10us the
base of dynamic halt-polling, lower-end of message passing workload
latency TCP_RR's poll time < 10us) we can treat it as a short halt,
and poll to wait it fire, the fire callback apic_timer_fn() will set
KVM_REQ_PENDING_TIMER, and this flag will be check during busy poll.
This can avoid context switch overhead and the latency which we wake
up vCPU.

This feature is slightly different from current advance expiration 
way. Advance expiration rely on the vCPU is running(do polling before 
vmentry). But in some cases, the timer interrupt may be blocked by 
other thread(i.e., IF bit is clear) and vCPU cannot be scheduled to 
run immediately. So even advance the timer early, vCPU may still see 
the latency. But polling is different, it ensures the vCPU to aware 
the timer expiration before schedule out.

iperf TCP get ~6% bandwidth improvement.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: David Matlack <dmatlack@google.com>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Yang Zhang <yang.zhang.wz@gmail.com>
Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
---
v2 -> v3:
 * add Yang's statement to patch description
v1 -> v2:
 * add return statement to non-x86 archs
 * capture never expire case for x86 (hrtimer is not started)

 arch/arm/include/asm/kvm_host.h     |  4 ++++
 arch/arm64/include/asm/kvm_host.h   |  4 ++++
 arch/mips/include/asm/kvm_host.h    |  4 ++++
 arch/powerpc/include/asm/kvm_host.h |  4 ++++
 arch/s390/include/asm/kvm_host.h    |  4 ++++
 arch/x86/kvm/lapic.c                | 11 +++++++++++
 arch/x86/kvm/lapic.h                |  1 +
 arch/x86/kvm/x86.c                  |  5 +++++
 include/linux/kvm_host.h            |  1 +
 virt/kvm/kvm_main.c                 | 14 ++++++++++----
 10 files changed, 48 insertions(+), 4 deletions(-)

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 4cd8732..a5fd858 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -284,6 +284,10 @@ static inline void kvm_arch_sync_events(struct kvm *kvm) {}
 static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
 static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
 static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
+static inline u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu)
+{
+	return -1ULL;
+}
 
 static inline void kvm_arm_init_debug(void) {}
 static inline void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) {}
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index d49399d..94e227a 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -359,6 +359,10 @@ static inline void kvm_arch_sync_events(struct kvm *kvm) {}
 static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
 static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
 static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
+static inline u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu)
+{
+	return -1ULL;
+}
 
 void kvm_arm_init_debug(void);
 void kvm_arm_setup_debug(struct kvm_vcpu *vcpu);
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index 9a37a10..456bc42 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -813,6 +813,10 @@ static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
 static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
 static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
 static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
+static inline u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu)
+{
+	return -1ULL;
+}
 static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
 
 #endif /* __MIPS_KVM_HOST_H__ */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index ec35af3..5986c79 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -729,5 +729,9 @@ static inline void kvm_arch_exit(void) {}
 static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
 static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
 static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
+static inline u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu)
+{
+	return -1ULL;
+}
 
 #endif /* __POWERPC_KVM_HOST_H__ */
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 37b9017..bdb01a1 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -696,6 +696,10 @@ static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
 		struct kvm_memory_slot *slot) {}
 static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
 static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
+static inline u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu)
+{
+	return -1ULL;
+}
 
 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu);
 
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index bbb5b28..cfeeac3 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -256,6 +256,17 @@ static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic)
 	return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_TSCDEADLINE;
 }
 
+u64 apic_get_timer_expire(struct kvm_vcpu *vcpu)
+{
+	struct kvm_lapic *apic = vcpu->arch.apic;
+	struct hrtimer *timer = &apic->lapic_timer.timer;
+
+	if (!hrtimer_active(timer))
+		return -1ULL;
+	else
+		return ktime_to_ns(hrtimer_get_remaining(timer));
+}
+
 static inline int apic_lvt_nmi_mode(u32 lvt_val)
 {
 	return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI;
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 891c6da..ee4da6c 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -212,4 +212,5 @@ bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
 			struct kvm_vcpu **dest_vcpu);
 int kvm_vector_to_index(u32 vector, u32 dest_vcpus,
 			const unsigned long *bitmap, u32 bitmap_size);
+u64 apic_get_timer_expire(struct kvm_vcpu *vcpu);
 #endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a8c7ca3..9b5ad99 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7623,6 +7623,11 @@ bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu)
 struct static_key kvm_no_apic_vcpu __read_mostly;
 EXPORT_SYMBOL_GPL(kvm_no_apic_vcpu);
 
+u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu)
+{
+	return apic_get_timer_expire(vcpu);
+}
+
 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 {
 	struct page *page;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index b1fa8f1..14d6c23 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -663,6 +663,7 @@ int kvm_vcpu_yield_to(struct kvm_vcpu *target);
 void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu);
 void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
 void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
+u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu);
 
 void kvm_flush_remote_tlbs(struct kvm *kvm);
 void kvm_reload_remote_mmus(struct kvm *kvm);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index dd4ac9d..e4bb30b 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -78,6 +78,9 @@ module_param(halt_poll_ns_grow, uint, S_IRUGO | S_IWUSR);
 static unsigned int halt_poll_ns_shrink;
 module_param(halt_poll_ns_shrink, uint, S_IRUGO | S_IWUSR);
 
+/* lower-end of message passing workload latency TCP_RR's poll time < 10us */
+static unsigned int halt_poll_ns_base = 10000;
+
 /*
  * Ordering of locks:
  *
@@ -1966,7 +1969,7 @@ static void grow_halt_poll_ns(struct kvm_vcpu *vcpu)
 	grow = READ_ONCE(halt_poll_ns_grow);
 	/* 10us base */
 	if (val == 0 && grow)
-		val = 10000;
+		val = halt_poll_ns_base;
 	else
 		val *= grow;
 
@@ -2014,12 +2017,15 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
 	ktime_t start, cur;
 	DECLARE_SWAITQUEUE(wait);
 	bool waited = false;
-	u64 block_ns;
+	u64 block_ns, delta, remaining;
 
+	remaining = kvm_arch_timer_remaining(vcpu);
 	start = cur = ktime_get();
-	if (vcpu->halt_poll_ns) {
-		ktime_t stop = ktime_add_ns(ktime_get(), vcpu->halt_poll_ns);
+	if (vcpu->halt_poll_ns || remaining < halt_poll_ns_base) {
+		ktime_t stop;
 
+		delta = vcpu->halt_poll_ns ? vcpu->halt_poll_ns : remaining;
+		stop = ktime_add_ns(ktime_get(), delta);
 		++vcpu->stat.halt_attempted_poll;
 		do {
 			/*
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH v3] KVM: halt-polling: poll if emulated lapic timer will fire soon
  2016-05-23  0:42 [PATCH v3] KVM: halt-polling: poll if emulated lapic timer will fire soon Wanpeng Li
@ 2016-05-23 18:01 ` David Matlack
  2016-05-24  2:19   ` Wanpeng Li
  2016-05-26  8:04   ` Wanpeng Li
  2016-05-27  6:40 ` David Hildenbrand
  1 sibling, 2 replies; 10+ messages in thread
From: David Matlack @ 2016-05-23 18:01 UTC (permalink / raw)
  To: Wanpeng Li
  Cc: linux-kernel, kvm list, Wanpeng Li, Paolo Bonzini,
	Radim Krčmář,
	Christian Borntraeger, Yang Zhang

On Sun, May 22, 2016 at 5:42 PM, Wanpeng Li <kernellwp@gmail.com> wrote:
> From: Wanpeng Li <wanpeng.li@hotmail.com>

I'm ok with this patch, but I'd like to better understand the target
workloads. What type of workloads do you expect to benefit from this?

>
> If an emulated lapic timer will fire soon(in the scope of 10us the
> base of dynamic halt-polling, lower-end of message passing workload
> latency TCP_RR's poll time < 10us) we can treat it as a short halt,
> and poll to wait it fire, the fire callback apic_timer_fn() will set
> KVM_REQ_PENDING_TIMER, and this flag will be check during busy poll.
> This can avoid context switch overhead and the latency which we wake
> up vCPU.
>
> This feature is slightly different from current advance expiration
> way. Advance expiration rely on the vCPU is running(do polling before
> vmentry). But in some cases, the timer interrupt may be blocked by
> other thread(i.e., IF bit is clear) and vCPU cannot be scheduled to
> run immediately. So even advance the timer early, vCPU may still see
> the latency. But polling is different, it ensures the vCPU to aware
> the timer expiration before schedule out.
>
> iperf TCP get ~6% bandwidth improvement.

I think my question got lost in the previous thread :). Can you
explain why TCP bandwidth improves with this patch?

>
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Cc: Radim Krčmář <rkrcmar@redhat.com>
> Cc: David Matlack <dmatlack@google.com>
> Cc: Christian Borntraeger <borntraeger@de.ibm.com>
> Cc: Yang Zhang <yang.zhang.wz@gmail.com>
> Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
> ---
> v2 -> v3:
>  * add Yang's statement to patch description
> v1 -> v2:
>  * add return statement to non-x86 archs
>  * capture never expire case for x86 (hrtimer is not started)
>
>  arch/arm/include/asm/kvm_host.h     |  4 ++++
>  arch/arm64/include/asm/kvm_host.h   |  4 ++++
>  arch/mips/include/asm/kvm_host.h    |  4 ++++
>  arch/powerpc/include/asm/kvm_host.h |  4 ++++
>  arch/s390/include/asm/kvm_host.h    |  4 ++++
>  arch/x86/kvm/lapic.c                | 11 +++++++++++
>  arch/x86/kvm/lapic.h                |  1 +
>  arch/x86/kvm/x86.c                  |  5 +++++
>  include/linux/kvm_host.h            |  1 +
>  virt/kvm/kvm_main.c                 | 14 ++++++++++----
>  10 files changed, 48 insertions(+), 4 deletions(-)
>
> diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
> index 4cd8732..a5fd858 100644
> --- a/arch/arm/include/asm/kvm_host.h
> +++ b/arch/arm/include/asm/kvm_host.h
> @@ -284,6 +284,10 @@ static inline void kvm_arch_sync_events(struct kvm *kvm) {}
>  static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
>  static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
>  static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
> +static inline u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu)
> +{
> +       return -1ULL;
> +}
>
>  static inline void kvm_arm_init_debug(void) {}
>  static inline void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) {}
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index d49399d..94e227a 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -359,6 +359,10 @@ static inline void kvm_arch_sync_events(struct kvm *kvm) {}
>  static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
>  static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
>  static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
> +static inline u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu)
> +{
> +       return -1ULL;
> +}
>
>  void kvm_arm_init_debug(void);
>  void kvm_arm_setup_debug(struct kvm_vcpu *vcpu);
> diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
> index 9a37a10..456bc42 100644
> --- a/arch/mips/include/asm/kvm_host.h
> +++ b/arch/mips/include/asm/kvm_host.h
> @@ -813,6 +813,10 @@ static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
>  static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
>  static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
>  static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
> +static inline u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu)
> +{
> +       return -1ULL;
> +}
>  static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
>
>  #endif /* __MIPS_KVM_HOST_H__ */
> diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
> index ec35af3..5986c79 100644
> --- a/arch/powerpc/include/asm/kvm_host.h
> +++ b/arch/powerpc/include/asm/kvm_host.h
> @@ -729,5 +729,9 @@ static inline void kvm_arch_exit(void) {}
>  static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
>  static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
>  static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
> +static inline u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu)
> +{
> +       return -1ULL;
> +}
>
>  #endif /* __POWERPC_KVM_HOST_H__ */
> diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
> index 37b9017..bdb01a1 100644
> --- a/arch/s390/include/asm/kvm_host.h
> +++ b/arch/s390/include/asm/kvm_host.h
> @@ -696,6 +696,10 @@ static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
>                 struct kvm_memory_slot *slot) {}
>  static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
>  static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
> +static inline u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu)
> +{
> +       return -1ULL;
> +}
>
>  void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu);
>
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index bbb5b28..cfeeac3 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -256,6 +256,17 @@ static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic)
>         return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_TSCDEADLINE;
>  }
>
> +u64 apic_get_timer_expire(struct kvm_vcpu *vcpu)
> +{
> +       struct kvm_lapic *apic = vcpu->arch.apic;
> +       struct hrtimer *timer = &apic->lapic_timer.timer;
> +
> +       if (!hrtimer_active(timer))
> +               return -1ULL;
> +       else
> +               return ktime_to_ns(hrtimer_get_remaining(timer));

There's a chance for a time-of-check to time-of-use race here, where
the timer expires after checking if it's active. It seems benign though,
I'd expect hrtimer_get_remaining() to underflow, which is the desired
behavior. In the worse case we halt-poll for up to 10 us, which is ok.
Do you agree?

> +}
> +
>  static inline int apic_lvt_nmi_mode(u32 lvt_val)
>  {
>         return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI;
> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
> index 891c6da..ee4da6c 100644
> --- a/arch/x86/kvm/lapic.h
> +++ b/arch/x86/kvm/lapic.h
> @@ -212,4 +212,5 @@ bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
>                         struct kvm_vcpu **dest_vcpu);
>  int kvm_vector_to_index(u32 vector, u32 dest_vcpus,
>                         const unsigned long *bitmap, u32 bitmap_size);
> +u64 apic_get_timer_expire(struct kvm_vcpu *vcpu);
>  #endif
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index a8c7ca3..9b5ad99 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -7623,6 +7623,11 @@ bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu)
>  struct static_key kvm_no_apic_vcpu __read_mostly;
>  EXPORT_SYMBOL_GPL(kvm_no_apic_vcpu);
>
> +u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu)
> +{
> +       return apic_get_timer_expire(vcpu);
> +}
> +
>  int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
>  {
>         struct page *page;
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index b1fa8f1..14d6c23 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -663,6 +663,7 @@ int kvm_vcpu_yield_to(struct kvm_vcpu *target);
>  void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu);
>  void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
>  void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
> +u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu);
>
>  void kvm_flush_remote_tlbs(struct kvm *kvm);
>  void kvm_reload_remote_mmus(struct kvm *kvm);
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index dd4ac9d..e4bb30b 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -78,6 +78,9 @@ module_param(halt_poll_ns_grow, uint, S_IRUGO | S_IWUSR);
>  static unsigned int halt_poll_ns_shrink;
>  module_param(halt_poll_ns_shrink, uint, S_IRUGO | S_IWUSR);
>
> +/* lower-end of message passing workload latency TCP_RR's poll time < 10us */
> +static unsigned int halt_poll_ns_base = 10000;
> +
>  /*
>   * Ordering of locks:
>   *
> @@ -1966,7 +1969,7 @@ static void grow_halt_poll_ns(struct kvm_vcpu *vcpu)
>         grow = READ_ONCE(halt_poll_ns_grow);
>         /* 10us base */
>         if (val == 0 && grow)
> -               val = 10000;
> +               val = halt_poll_ns_base;
>         else
>                 val *= grow;
>
> @@ -2014,12 +2017,15 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
>         ktime_t start, cur;
>         DECLARE_SWAITQUEUE(wait);
>         bool waited = false;
> -       u64 block_ns;
> +       u64 block_ns, delta, remaining;
>
> +       remaining = kvm_arch_timer_remaining(vcpu);
>         start = cur = ktime_get();
> -       if (vcpu->halt_poll_ns) {
> -               ktime_t stop = ktime_add_ns(ktime_get(), vcpu->halt_poll_ns);
> +       if (vcpu->halt_poll_ns || remaining < halt_poll_ns_base) {
> +               ktime_t stop;
>
> +               delta = vcpu->halt_poll_ns ? vcpu->halt_poll_ns : remaining;
> +               stop = ktime_add_ns(ktime_get(), delta);
>                 ++vcpu->stat.halt_attempted_poll;
>                 do {
>                         /*
> --
> 1.9.1
>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v3] KVM: halt-polling: poll if emulated lapic timer will fire soon
  2016-05-23 18:01 ` David Matlack
@ 2016-05-24  2:19   ` Wanpeng Li
  2016-05-24  2:25     ` Wanpeng Li
  2016-05-24  7:05     ` Wanpeng Li
  2016-05-26  8:04   ` Wanpeng Li
  1 sibling, 2 replies; 10+ messages in thread
From: Wanpeng Li @ 2016-05-24  2:19 UTC (permalink / raw)
  To: David Matlack
  Cc: linux-kernel, kvm list, Wanpeng Li, Paolo Bonzini,
	Radim Krčmář,
	Christian Borntraeger, Yang Zhang

2016-05-24 2:01 GMT+08:00 David Matlack <dmatlack@google.com>:
> On Sun, May 22, 2016 at 5:42 PM, Wanpeng Li <kernellwp@gmail.com> wrote:
>> From: Wanpeng Li <wanpeng.li@hotmail.com>
>
> I'm ok with this patch, but I'd like to better understand the target
> workloads. What type of workloads do you expect to benefit from this?

dynticks guests I think is one of workloads which can get benefit,
there are lots of upcoming fire timers captured by my feature. Even
during TCP testing. And also the workload of Yang's.

>
>>
>> If an emulated lapic timer will fire soon(in the scope of 10us the
>> base of dynamic halt-polling, lower-end of message passing workload
>> latency TCP_RR's poll time < 10us) we can treat it as a short halt,
>> and poll to wait it fire, the fire callback apic_timer_fn() will set
>> KVM_REQ_PENDING_TIMER, and this flag will be check during busy poll.
>> This can avoid context switch overhead and the latency which we wake
>> up vCPU.
>>
>> This feature is slightly different from current advance expiration
>> way. Advance expiration rely on the vCPU is running(do polling before
>> vmentry). But in some cases, the timer interrupt may be blocked by
>> other thread(i.e., IF bit is clear) and vCPU cannot be scheduled to
>> run immediately. So even advance the timer early, vCPU may still see
>> the latency. But polling is different, it ensures the vCPU to aware
>> the timer expiration before schedule out.
>>
>> iperf TCP get ~6% bandwidth improvement.
>
> I think my question got lost in the previous thread :). Can you
> explain why TCP bandwidth improves with this patch?

Ditto.

>
>>
>> Cc: Paolo Bonzini <pbonzini@redhat.com>
>> Cc: Radim Krčmář <rkrcmar@redhat.com>
>> Cc: David Matlack <dmatlack@google.com>
>> Cc: Christian Borntraeger <borntraeger@de.ibm.com>
>> Cc: Yang Zhang <yang.zhang.wz@gmail.com>
>> Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
>> ---
>> v2 -> v3:
>>  * add Yang's statement to patch description
>> v1 -> v2:
>>  * add return statement to non-x86 archs
>>  * capture never expire case for x86 (hrtimer is not started)
>>
>>  arch/arm/include/asm/kvm_host.h     |  4 ++++
>>  arch/arm64/include/asm/kvm_host.h   |  4 ++++
>>  arch/mips/include/asm/kvm_host.h    |  4 ++++
>>  arch/powerpc/include/asm/kvm_host.h |  4 ++++
>>  arch/s390/include/asm/kvm_host.h    |  4 ++++
>>  arch/x86/kvm/lapic.c                | 11 +++++++++++
>>  arch/x86/kvm/lapic.h                |  1 +
>>  arch/x86/kvm/x86.c                  |  5 +++++
>>  include/linux/kvm_host.h            |  1 +
>>  virt/kvm/kvm_main.c                 | 14 ++++++++++----
>>  10 files changed, 48 insertions(+), 4 deletions(-)
>>
>> diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
>> index 4cd8732..a5fd858 100644
>> --- a/arch/arm/include/asm/kvm_host.h
>> +++ b/arch/arm/include/asm/kvm_host.h
>> @@ -284,6 +284,10 @@ static inline void kvm_arch_sync_events(struct kvm *kvm) {}
>>  static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
>>  static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
>>  static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
>> +static inline u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu)
>> +{
>> +       return -1ULL;
>> +}
>>
>>  static inline void kvm_arm_init_debug(void) {}
>>  static inline void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) {}
>> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
>> index d49399d..94e227a 100644
>> --- a/arch/arm64/include/asm/kvm_host.h
>> +++ b/arch/arm64/include/asm/kvm_host.h
>> @@ -359,6 +359,10 @@ static inline void kvm_arch_sync_events(struct kvm *kvm) {}
>>  static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
>>  static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
>>  static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
>> +static inline u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu)
>> +{
>> +       return -1ULL;
>> +}
>>
>>  void kvm_arm_init_debug(void);
>>  void kvm_arm_setup_debug(struct kvm_vcpu *vcpu);
>> diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
>> index 9a37a10..456bc42 100644
>> --- a/arch/mips/include/asm/kvm_host.h
>> +++ b/arch/mips/include/asm/kvm_host.h
>> @@ -813,6 +813,10 @@ static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
>>  static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
>>  static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
>>  static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
>> +static inline u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu)
>> +{
>> +       return -1ULL;
>> +}
>>  static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
>>
>>  #endif /* __MIPS_KVM_HOST_H__ */
>> diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
>> index ec35af3..5986c79 100644
>> --- a/arch/powerpc/include/asm/kvm_host.h
>> +++ b/arch/powerpc/include/asm/kvm_host.h
>> @@ -729,5 +729,9 @@ static inline void kvm_arch_exit(void) {}
>>  static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
>>  static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
>>  static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
>> +static inline u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu)
>> +{
>> +       return -1ULL;
>> +}
>>
>>  #endif /* __POWERPC_KVM_HOST_H__ */
>> diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
>> index 37b9017..bdb01a1 100644
>> --- a/arch/s390/include/asm/kvm_host.h
>> +++ b/arch/s390/include/asm/kvm_host.h
>> @@ -696,6 +696,10 @@ static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
>>                 struct kvm_memory_slot *slot) {}
>>  static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
>>  static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
>> +static inline u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu)
>> +{
>> +       return -1ULL;
>> +}
>>
>>  void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu);
>>
>> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
>> index bbb5b28..cfeeac3 100644
>> --- a/arch/x86/kvm/lapic.c
>> +++ b/arch/x86/kvm/lapic.c
>> @@ -256,6 +256,17 @@ static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic)
>>         return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_TSCDEADLINE;
>>  }
>>
>> +u64 apic_get_timer_expire(struct kvm_vcpu *vcpu)
>> +{
>> +       struct kvm_lapic *apic = vcpu->arch.apic;
>> +       struct hrtimer *timer = &apic->lapic_timer.timer;
>> +
>> +       if (!hrtimer_active(timer))
>> +               return -1ULL;
>> +       else
>> +               return ktime_to_ns(hrtimer_get_remaining(timer));
>
> There's a chance for a time-of-check to time-of-use race here, where
> the timer expires after checking if it's active. It seems benign though,
> I'd expect hrtimer_get_remaining() to underflow, which is the desired
> behavior. In the worse case we halt-poll for up to 10 us, which is ok.
> Do you agree?

Good point out. I agreed.

Regards,
Wanpeng Li

>
>> +}
>> +
>>  static inline int apic_lvt_nmi_mode(u32 lvt_val)
>>  {
>>         return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI;
>> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
>> index 891c6da..ee4da6c 100644
>> --- a/arch/x86/kvm/lapic.h
>> +++ b/arch/x86/kvm/lapic.h
>> @@ -212,4 +212,5 @@ bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
>>                         struct kvm_vcpu **dest_vcpu);
>>  int kvm_vector_to_index(u32 vector, u32 dest_vcpus,
>>                         const unsigned long *bitmap, u32 bitmap_size);
>> +u64 apic_get_timer_expire(struct kvm_vcpu *vcpu);
>>  #endif
>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>> index a8c7ca3..9b5ad99 100644
>> --- a/arch/x86/kvm/x86.c
>> +++ b/arch/x86/kvm/x86.c
>> @@ -7623,6 +7623,11 @@ bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu)
>>  struct static_key kvm_no_apic_vcpu __read_mostly;
>>  EXPORT_SYMBOL_GPL(kvm_no_apic_vcpu);
>>
>> +u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu)
>> +{
>> +       return apic_get_timer_expire(vcpu);
>> +}
>> +
>>  int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
>>  {
>>         struct page *page;
>> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
>> index b1fa8f1..14d6c23 100644
>> --- a/include/linux/kvm_host.h
>> +++ b/include/linux/kvm_host.h
>> @@ -663,6 +663,7 @@ int kvm_vcpu_yield_to(struct kvm_vcpu *target);
>>  void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu);
>>  void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
>>  void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
>> +u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu);
>>
>>  void kvm_flush_remote_tlbs(struct kvm *kvm);
>>  void kvm_reload_remote_mmus(struct kvm *kvm);
>> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
>> index dd4ac9d..e4bb30b 100644
>> --- a/virt/kvm/kvm_main.c
>> +++ b/virt/kvm/kvm_main.c
>> @@ -78,6 +78,9 @@ module_param(halt_poll_ns_grow, uint, S_IRUGO | S_IWUSR);
>>  static unsigned int halt_poll_ns_shrink;
>>  module_param(halt_poll_ns_shrink, uint, S_IRUGO | S_IWUSR);
>>
>> +/* lower-end of message passing workload latency TCP_RR's poll time < 10us */
>> +static unsigned int halt_poll_ns_base = 10000;
>> +
>>  /*
>>   * Ordering of locks:
>>   *
>> @@ -1966,7 +1969,7 @@ static void grow_halt_poll_ns(struct kvm_vcpu *vcpu)
>>         grow = READ_ONCE(halt_poll_ns_grow);
>>         /* 10us base */
>>         if (val == 0 && grow)
>> -               val = 10000;
>> +               val = halt_poll_ns_base;
>>         else
>>                 val *= grow;
>>
>> @@ -2014,12 +2017,15 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
>>         ktime_t start, cur;
>>         DECLARE_SWAITQUEUE(wait);
>>         bool waited = false;
>> -       u64 block_ns;
>> +       u64 block_ns, delta, remaining;
>>
>> +       remaining = kvm_arch_timer_remaining(vcpu);
>>         start = cur = ktime_get();
>> -       if (vcpu->halt_poll_ns) {
>> -               ktime_t stop = ktime_add_ns(ktime_get(), vcpu->halt_poll_ns);
>> +       if (vcpu->halt_poll_ns || remaining < halt_poll_ns_base) {
>> +               ktime_t stop;
>>
>> +               delta = vcpu->halt_poll_ns ? vcpu->halt_poll_ns : remaining;
>> +               stop = ktime_add_ns(ktime_get(), delta);
>>                 ++vcpu->stat.halt_attempted_poll;
>>                 do {
>>                         /*
>> --
>> 1.9.1
>>



-- 
Regards,
Wanpeng Li

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v3] KVM: halt-polling: poll if emulated lapic timer will fire soon
  2016-05-24  2:19   ` Wanpeng Li
@ 2016-05-24  2:25     ` Wanpeng Li
  2016-05-24  6:59       ` Christian Borntraeger
  2016-05-24  7:05     ` Wanpeng Li
  1 sibling, 1 reply; 10+ messages in thread
From: Wanpeng Li @ 2016-05-24  2:25 UTC (permalink / raw)
  To: David Matlack
  Cc: linux-kernel, kvm list, Wanpeng Li, Paolo Bonzini,
	Radim Krčmář,
	Christian Borntraeger, Yang Zhang

2016-05-24 10:19 GMT+08:00 Wanpeng Li <kernellwp@gmail.com>:
> 2016-05-24 2:01 GMT+08:00 David Matlack <dmatlack@google.com>:
>> On Sun, May 22, 2016 at 5:42 PM, Wanpeng Li <kernellwp@gmail.com> wrote:
>>> From: Wanpeng Li <wanpeng.li@hotmail.com>
>>
>> I'm ok with this patch, but I'd like to better understand the target
>> workloads. What type of workloads do you expect to benefit from this?
>
> dynticks guests I think is one of workloads which can get benefit,
> there are lots of upcoming fire timers captured by my feature. Even
> during TCP testing. And also the workload of Yang's.

Do you think I should add an module parameter to enable/disable it
during module insmod or current patch is fine?

Regards,
Wanpeng Li

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v3] KVM: halt-polling: poll if emulated lapic timer will fire soon
  2016-05-24  2:25     ` Wanpeng Li
@ 2016-05-24  6:59       ` Christian Borntraeger
  2016-05-24  7:10         ` Wanpeng Li
  2016-05-24  7:14         ` Wanpeng Li
  0 siblings, 2 replies; 10+ messages in thread
From: Christian Borntraeger @ 2016-05-24  6:59 UTC (permalink / raw)
  To: Wanpeng Li, David Matlack
  Cc: linux-kernel, kvm list, Wanpeng Li, Paolo Bonzini,
	Radim Krčmář,
	Yang Zhang

On 05/24/2016 04:25 AM, Wanpeng Li wrote:
> 2016-05-24 10:19 GMT+08:00 Wanpeng Li <kernellwp@gmail.com>:
>> 2016-05-24 2:01 GMT+08:00 David Matlack <dmatlack@google.com>:
>>> On Sun, May 22, 2016 at 5:42 PM, Wanpeng Li <kernellwp@gmail.com> wrote:
>>>> From: Wanpeng Li <wanpeng.li@hotmail.com>
>>>
>>> I'm ok with this patch, but I'd like to better understand the target
>>> workloads. What type of workloads do you expect to benefit from this?
>>
>> dynticks guests I think is one of workloads which can get benefit,
>> there are lots of upcoming fire timers captured by my feature. Even
>> during TCP testing. And also the workload of Yang's.
> 
> Do you think I should add an module parameter to enable/disable it
> during module insmod or current patch is fine?

What about getting rid of this hunk

-		val = 10000;
+		val = halt_poll_ns_base;


and then rename "halt_poll_ns_base" into "halt_poll_ns_timer" that
can be changed as module parameter?




I also experimented with an s390 implementation, which seems pretty straightforward.
It is probably something like the following (whitespace damaged due to pcopy/paste)
and needs more testing.

diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 38bbc98..a97739d 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -682,6 +682,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
                                 struct kvm_async_pf *work);
 
 extern int sie64a(struct kvm_s390_sie_block *, u64 *);
+extern u64 kvm_s390_timer_remaining(struct kvm_vcpu *vcpu);
 extern char sie_exit;
 
 static inline void kvm_arch_hardware_disable(void) {}
@@ -699,7 +700,7 @@ static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
 static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
 static inline u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu)
 {
-       return -1ULL;
+       return kvm_s390_timer_remaining(vcpu);
 }
 
 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu);
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 5a80af7..5b209a2 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -936,6 +936,17 @@ static u64 __calculate_sltime(struct kvm_vcpu *vcpu)
        return sltime;
 }
 
+
+u64 kvm_s390_timer_remaining(struct kvm_vcpu *vcpu)
+{
+       u64 result;
+
+       preempt_disable();
+       result = __calculate_sltime(vcpu);
+       preempt_enable();
+       return result;
+}
+
 int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
 {
        u64 sltime;

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH v3] KVM: halt-polling: poll if emulated lapic timer will fire soon
  2016-05-24  2:19   ` Wanpeng Li
  2016-05-24  2:25     ` Wanpeng Li
@ 2016-05-24  7:05     ` Wanpeng Li
  1 sibling, 0 replies; 10+ messages in thread
From: Wanpeng Li @ 2016-05-24  7:05 UTC (permalink / raw)
  To: David Matlack
  Cc: linux-kernel, kvm list, Wanpeng Li, Paolo Bonzini,
	Radim Krčmář,
	Christian Borntraeger, Yang Zhang

2016-05-24 10:19 GMT+08:00 Wanpeng Li <kernellwp@gmail.com>:
> 2016-05-24 2:01 GMT+08:00 David Matlack <dmatlack@google.com>:
>> On Sun, May 22, 2016 at 5:42 PM, Wanpeng Li <kernellwp@gmail.com> wrote:
>>> From: Wanpeng Li <wanpeng.li@hotmail.com>
>>
>> I'm ok with this patch, but I'd like to better understand the target
>> workloads. What type of workloads do you expect to benefit from this?
>
> dynticks guests I think is one of workloads which can get benefit,
> there are lots of upcoming fire timers captured by my feature. Even
> during TCP testing. And also the workload of Yang's.
>
>>
>>>
>>> If an emulated lapic timer will fire soon(in the scope of 10us the
>>> base of dynamic halt-polling, lower-end of message passing workload
>>> latency TCP_RR's poll time < 10us) we can treat it as a short halt,
>>> and poll to wait it fire, the fire callback apic_timer_fn() will set
>>> KVM_REQ_PENDING_TIMER, and this flag will be check during busy poll.
>>> This can avoid context switch overhead and the latency which we wake
>>> up vCPU.
>>>
>>> This feature is slightly different from current advance expiration
>>> way. Advance expiration rely on the vCPU is running(do polling before
>>> vmentry). But in some cases, the timer interrupt may be blocked by
>>> other thread(i.e., IF bit is clear) and vCPU cannot be scheduled to
>>> run immediately. So even advance the timer early, vCPU may still see
>>> the latency. But polling is different, it ensures the vCPU to aware
>>> the timer expiration before schedule out.
>>>
>>> iperf TCP get ~6% bandwidth improvement.
>>
>> I think my question got lost in the previous thread :). Can you
>> explain why TCP bandwidth improves with this patch?
>

Please forget TCP stuff. I run lmbench ctx switch benchmark:

echo HRTICK > /sys/kernel/debug/sched_features in dynticks guests.

Context switching - times in microseconds - smaller is better
-------------------------------------------------------------------------
Host                 OS  2p/0K 2p/16K 2p/64K 8p/16K 8p/64K 16p/16K 16p/64K
                         ctxsw  ctxsw  ctxsw ctxsw  ctxsw   ctxsw   ctxsw
--------- ------------- ------ ------ ------ ------ ------ ------- -------
kernel     Linux 4.6.0+ 7.9800   11.0   10.8   14.6 9.4300    13.0
10.2 vanilla
kernel     Linux 4.6.0+   15.3   13.6   10.7   12.5 9.0000    12.8 7.38000 poll

Regards,
Wanpeng Li

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v3] KVM: halt-polling: poll if emulated lapic timer will fire soon
  2016-05-24  6:59       ` Christian Borntraeger
@ 2016-05-24  7:10         ` Wanpeng Li
  2016-05-24  7:14         ` Wanpeng Li
  1 sibling, 0 replies; 10+ messages in thread
From: Wanpeng Li @ 2016-05-24  7:10 UTC (permalink / raw)
  To: Christian Borntraeger
  Cc: David Matlack, linux-kernel, kvm list, Wanpeng Li, Paolo Bonzini,
	Radim Krčmář,
	Yang Zhang

2016-05-24 14:59 GMT+08:00 Christian Borntraeger <borntraeger@de.ibm.com>:
> On 05/24/2016 04:25 AM, Wanpeng Li wrote:
>> 2016-05-24 10:19 GMT+08:00 Wanpeng Li <kernellwp@gmail.com>:
>>> 2016-05-24 2:01 GMT+08:00 David Matlack <dmatlack@google.com>:
>>>> On Sun, May 22, 2016 at 5:42 PM, Wanpeng Li <kernellwp@gmail.com> wrote:
>>>>> From: Wanpeng Li <wanpeng.li@hotmail.com>
>>>>
>>>> I'm ok with this patch, but I'd like to better understand the target
>>>> workloads. What type of workloads do you expect to benefit from this?
>>>
>>> dynticks guests I think is one of workloads which can get benefit,
>>> there are lots of upcoming fire timers captured by my feature. Even
>>> during TCP testing. And also the workload of Yang's.
>>
>> Do you think I should add an module parameter to enable/disable it
>> during module insmod or current patch is fine?
>
> What about getting rid of this hunk
>
> -               val = 10000;
> +               val = halt_poll_ns_base;
>
>
> and then rename "halt_poll_ns_base" into "halt_poll_ns_timer" that
> can be changed as module parameter?

Good point,

>
>
> I also experimented with an s390 implementation, which seems pretty straightforward.
> It is probably something like the following (whitespace damaged due to pcopy/paste)
> and needs more testing.
>
> diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
> index 38bbc98..a97739d 100644
> --- a/arch/s390/include/asm/kvm_host.h
> +++ b/arch/s390/include/asm/kvm_host.h
> @@ -682,6 +682,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
>                                  struct kvm_async_pf *work);
>
>  extern int sie64a(struct kvm_s390_sie_block *, u64 *);
> +extern u64 kvm_s390_timer_remaining(struct kvm_vcpu *vcpu);
>  extern char sie_exit;
>
>  static inline void kvm_arch_hardware_disable(void) {}
> @@ -699,7 +700,7 @@ static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
>  static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
>  static inline u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu)
>  {
> -       return -1ULL;
> +       return kvm_s390_timer_remaining(vcpu);
>  }
>
>  void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu);
> diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
> index 5a80af7..5b209a2 100644
> --- a/arch/s390/kvm/interrupt.c
> +++ b/arch/s390/kvm/interrupt.c
> @@ -936,6 +936,17 @@ static u64 __calculate_sltime(struct kvm_vcpu *vcpu)
>         return sltime;
>  }
>
> +
> +u64 kvm_s390_timer_remaining(struct kvm_vcpu *vcpu)
> +{
> +       u64 result;
> +
> +       preempt_disable();
> +       result = __calculate_sltime(vcpu);
> +       preempt_enable();
> +       return result;
> +}
> +
>  int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
>  {
>         u64 sltime;
>



-- 
Regards,
Wanpeng Li

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v3] KVM: halt-polling: poll if emulated lapic timer will fire soon
  2016-05-24  6:59       ` Christian Borntraeger
  2016-05-24  7:10         ` Wanpeng Li
@ 2016-05-24  7:14         ` Wanpeng Li
  1 sibling, 0 replies; 10+ messages in thread
From: Wanpeng Li @ 2016-05-24  7:14 UTC (permalink / raw)
  To: Christian Borntraeger
  Cc: David Matlack, linux-kernel, kvm list, Wanpeng Li, Paolo Bonzini,
	Radim Krčmář,
	Yang Zhang

2016-05-24 14:59 GMT+08:00 Christian Borntraeger <borntraeger@de.ibm.com>:
> On 05/24/2016 04:25 AM, Wanpeng Li wrote:
>> 2016-05-24 10:19 GMT+08:00 Wanpeng Li <kernellwp@gmail.com>:
>>> 2016-05-24 2:01 GMT+08:00 David Matlack <dmatlack@google.com>:
>>>> On Sun, May 22, 2016 at 5:42 PM, Wanpeng Li <kernellwp@gmail.com> wrote:
>>>>> From: Wanpeng Li <wanpeng.li@hotmail.com>
>>>>
>>>> I'm ok with this patch, but I'd like to better understand the target
>>>> workloads. What type of workloads do you expect to benefit from this?
>>>
>>> dynticks guests I think is one of workloads which can get benefit,
>>> there are lots of upcoming fire timers captured by my feature. Even
>>> during TCP testing. And also the workload of Yang's.
>>
>> Do you think I should add an module parameter to enable/disable it
>> during module insmod or current patch is fine?
>
> What about getting rid of this hunk
>
> -               val = 10000;
> +               val = halt_poll_ns_base;
>
>
> and then rename "halt_poll_ns_base" into "halt_poll_ns_timer" that
> can be changed as module parameter?

Good idea, actually I remember Paolo mentioned to change this as an
module parameter in another thread.

>
> I also experimented with an s390 implementation, which seems pretty straightforward.
> It is probably something like the following (whitespace damaged due to pcopy/paste)
> and needs more testing.

Great work, Christian. I will send out a new version w/ module parameter.

Regards,
Wanpeng Li

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v3] KVM: halt-polling: poll if emulated lapic timer will fire soon
  2016-05-23 18:01 ` David Matlack
  2016-05-24  2:19   ` Wanpeng Li
@ 2016-05-26  8:04   ` Wanpeng Li
  1 sibling, 0 replies; 10+ messages in thread
From: Wanpeng Li @ 2016-05-26  8:04 UTC (permalink / raw)
  To: David Matlack
  Cc: linux-kernel, kvm list, Wanpeng Li, Paolo Bonzini,
	Radim Krčmář,
	Christian Borntraeger, Yang Zhang, Yunhong Jiang

2016-05-24 2:01 GMT+08:00 David Matlack <dmatlack@google.com>:
> On Sun, May 22, 2016 at 5:42 PM, Wanpeng Li <kernellwp@gmail.com> wrote:
>> From: Wanpeng Li <wanpeng.li@hotmail.com>
>
> I'm ok with this patch, but I'd like to better understand the target
> workloads. What type of workloads do you expect to benefit from this?
>

In addition, as Paolo great pointed out
http://www.spinics.net/lists/kvm/msg132983.html, the upcoming vmx
preemption timer also can get benefit, preemption time will just
switch to hrtimer if block, if the remaining expire time is enough
small and there is just a single task in this pCPU, poll will cut the
latency.

Regards,
Wanpeng Li

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v3] KVM: halt-polling: poll if emulated lapic timer will fire soon
  2016-05-23  0:42 [PATCH v3] KVM: halt-polling: poll if emulated lapic timer will fire soon Wanpeng Li
  2016-05-23 18:01 ` David Matlack
@ 2016-05-27  6:40 ` David Hildenbrand
  1 sibling, 0 replies; 10+ messages in thread
From: David Hildenbrand @ 2016-05-27  6:40 UTC (permalink / raw)
  To: Wanpeng Li
  Cc: linux-kernel, kvm, Wanpeng Li, Paolo Bonzini,
	Radim Krčmář,
	David Matlack, Christian Borntraeger, Yang Zhang


> diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
> index 4cd8732..a5fd858 100644
> --- a/arch/arm/include/asm/kvm_host.h
> +++ b/arch/arm/include/asm/kvm_host.h
> @@ -284,6 +284,10 @@ static inline void kvm_arch_sync_events(struct kvm *kvm) {}
>  static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
>  static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
>  static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
> +static inline u64 kvm_arch_timer_remaining(struct kvm_vcpu *vcpu)
> +{
> +	return -1ULL;

U64_MAX for all applicable cases?

David

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2016-05-27  6:40 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-05-23  0:42 [PATCH v3] KVM: halt-polling: poll if emulated lapic timer will fire soon Wanpeng Li
2016-05-23 18:01 ` David Matlack
2016-05-24  2:19   ` Wanpeng Li
2016-05-24  2:25     ` Wanpeng Li
2016-05-24  6:59       ` Christian Borntraeger
2016-05-24  7:10         ` Wanpeng Li
2016-05-24  7:14         ` Wanpeng Li
2016-05-24  7:05     ` Wanpeng Li
2016-05-26  8:04   ` Wanpeng Li
2016-05-27  6:40 ` David Hildenbrand

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).