All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3 1/2] KVM: x86: introduce cancel_hv_tscdeadline
@ 2016-06-30  0:52 Wanpeng Li
  2016-06-30  0:52 ` [PATCH v3 2/2] KVM: x86: fix underflow in TSC deadline calculation Wanpeng Li
  0 siblings, 1 reply; 3+ messages in thread
From: Wanpeng Li @ 2016-06-30  0:52 UTC (permalink / raw)
  To: linux-kernel, kvm
  Cc: Wanpeng Li, Paolo Bonzini, Radim Krčmář, Yunhong Jiang

From: Wanpeng Li <wanpeng.li@hotmail.com>

Introduce cancel_hv_tscdeadline() to encapsulate preemption 
timer cancel stuff.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Yunhong Jiang <yunhong.jiang@intel.com>
Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
---
 arch/x86/kvm/lapic.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index fdc05ae..9c20ac1 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1349,14 +1349,19 @@ bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use);
 
+static void cancel_hv_tscdeadline(struct kvm_lapic *apic)
+{
+	kvm_x86_ops->cancel_hv_timer(apic->vcpu);
+	apic->lapic_timer.hv_timer_in_use = false;
+}
+
 void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
 
 	WARN_ON(!apic->lapic_timer.hv_timer_in_use);
 	WARN_ON(swait_active(&vcpu->wq));
-	kvm_x86_ops->cancel_hv_timer(vcpu);
-	apic->lapic_timer.hv_timer_in_use = false;
+	cancel_hv_tscdeadline(apic);
 	apic_timer_expired(apic);
 }
 EXPORT_SYMBOL_GPL(kvm_lapic_expired_hv_timer);
@@ -1376,10 +1381,8 @@ void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu)
 			hrtimer_cancel(&apic->lapic_timer.timer);
 
 			/* In case the sw timer triggered in the window */
-			if (atomic_read(&apic->lapic_timer.pending)) {
-				apic->lapic_timer.hv_timer_in_use = false;
-				kvm_x86_ops->cancel_hv_timer(apic->vcpu);
-			}
+			if (atomic_read(&apic->lapic_timer.pending))
+				cancel_hv_tscdeadline(apic);
 		}
 		trace_kvm_hv_timer_state(vcpu->vcpu_id,
 				apic->lapic_timer.hv_timer_in_use);
@@ -1395,8 +1398,7 @@ void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu)
 	if (!apic->lapic_timer.hv_timer_in_use)
 		return;
 
-	kvm_x86_ops->cancel_hv_timer(vcpu);
-	apic->lapic_timer.hv_timer_in_use = false;
+	cancel_hv_tscdeadline(apic);
 
 	if (atomic_read(&apic->lapic_timer.pending))
 		return;
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH v3 2/2] KVM: x86: fix underflow in TSC deadline calculation
  2016-06-30  0:52 [PATCH v3 1/2] KVM: x86: introduce cancel_hv_tscdeadline Wanpeng Li
@ 2016-06-30  0:52 ` Wanpeng Li
  2016-06-30 10:21   ` Paolo Bonzini
  0 siblings, 1 reply; 3+ messages in thread
From: Wanpeng Li @ 2016-06-30  0:52 UTC (permalink / raw)
  To: linux-kernel, kvm
  Cc: Wanpeng Li, Paolo Bonzini, Radim Krčmář, Yunhong Jiang

From: Wanpeng Li <wanpeng.li@hotmail.com>

INFO: rcu_sched detected stalls on CPUs/tasks:
 1-...: (11800 GPs behind) idle=45d/140000000000000/0 softirq=0/0 fqs=21663 
 (detected by 0, t=65016 jiffies, g=11500, c=11499, q=719)
Task dump for CPU 1:
qemu-system-x86 R  running task        0  3529   3525 0x00080808
 ffff8802021791a0 ffff880212895040 0000000000000001 00007f1c2c00db40
 ffff8801dd20fcd3 ffffc90002b98000 ffff8801dd20fc88 ffff8801dd20fcf8
 0000000000000286 ffff8801dd2ac538 ffff8801dd20fcc0 ffffffffc06949c9
Call Trace:
? kvm_write_guest_cached+0xb9/0x160 [kvm]
? __delay+0xf/0x20
? wait_lapic_expire+0x14a/0x200 [kvm]
? kvm_arch_vcpu_ioctl_run+0xcbe/0x1b00 [kvm]
? kvm_arch_vcpu_ioctl_run+0xe34/0x1b00 [kvm]
? kvm_vcpu_ioctl+0x2d3/0x7c0 [kvm]
? __fget+0x5/0x210
? do_vfs_ioctl+0x96/0x6a0
? __fget_light+0x2a/0x90
? SyS_ioctl+0x79/0x90
? do_syscall_64+0x7c/0x1e0
? entry_SYSCALL64_slow_path+0x25/0x25

This can be reproduced readily by running a full dynticks guest(since hrtimer 
in guest is heavily used) w/ lapic_timer_advance disabled.

If fail to program hardware preemption timer, we will fallback to hrtimer based 
method, however, a previous programmed preemption timer miss to cancel in this 
scenario which results in one hardware preemption timer and one hrtimer emulated 
tsc deadline timer run simultaneously. So sometimes the target guest deadline 
tsc is earlier than guest tsc, which leads to the computation in vmx_set_hv_timer 
can underflow and cause delta_tsc to be set a huge value, then host soft lockup 
as above. 

This patch fix it by cancelling the previous programmed preemption timer if there 
is once we failed to program the new preemption timer and fallback to hrtimer 
based method.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Yunhong Jiang <yunhong.jiang@intel.com>
Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
---
v2 -> v3:
 * depends on start_hv_tscdeadline caller to start sw_timer
 * move lapic_timer.pending check in start_hv_tscdeadline
v1 -> v2:
 * abstract the set_hv_timer and cancel_hv_tscdeadline

 arch/x86/kvm/lapic.c | 51 +++++++++++++++++++++++++--------------------------
 1 file changed, 25 insertions(+), 26 deletions(-)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 9c20ac1..b44c587 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1366,27 +1366,35 @@ void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvm_lapic_expired_hv_timer);
 
+static bool start_hv_tscdeadline(struct kvm_lapic *apic)
+{
+	u64 tscdeadline = apic->lapic_timer.tscdeadline;
+
+	if (atomic_read(&apic->lapic_timer.pending) ||
+		kvm_x86_ops->set_hv_timer(apic->vcpu, tscdeadline)) {
+		if (apic->lapic_timer.hv_timer_in_use)
+			cancel_hv_tscdeadline(apic);
+	} else {
+		apic->lapic_timer.hv_timer_in_use = true;
+		hrtimer_cancel(&apic->lapic_timer.timer);
+
+		/* In case the sw timer triggered in the window */
+		if (atomic_read(&apic->lapic_timer.pending))
+			cancel_hv_tscdeadline(apic);
+	}
+	trace_kvm_hv_timer_state(apic->vcpu->vcpu_id,
+			apic->lapic_timer.hv_timer_in_use);
+	return kvm_lapic_hv_timer_in_use(apic->vcpu);
+}
+
 void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
 
 	WARN_ON(apic->lapic_timer.hv_timer_in_use);
 
-	if (apic_lvtt_tscdeadline(apic) &&
-	    !atomic_read(&apic->lapic_timer.pending)) {
-		u64 tscdeadline = apic->lapic_timer.tscdeadline;
-
-		if (!kvm_x86_ops->set_hv_timer(vcpu, tscdeadline)) {
-			apic->lapic_timer.hv_timer_in_use = true;
-			hrtimer_cancel(&apic->lapic_timer.timer);
-
-			/* In case the sw timer triggered in the window */
-			if (atomic_read(&apic->lapic_timer.pending))
-				cancel_hv_tscdeadline(apic);
-		}
-		trace_kvm_hv_timer_state(vcpu->vcpu_id,
-				apic->lapic_timer.hv_timer_in_use);
-	}
+	if (apic_lvtt_tscdeadline(apic))
+		start_hv_tscdeadline(apic);
 }
 EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_hv_timer);
 
@@ -1452,18 +1460,9 @@ static void start_apic_timer(struct kvm_lapic *apic)
 			   apic->lapic_timer.period,
 			   ktime_to_ns(ktime_add_ns(now,
 					apic->lapic_timer.period)));
-	} else if (apic_lvtt_tscdeadline(apic)) {
-		/* lapic timer in tsc deadline mode */
-		u64 tscdeadline = apic->lapic_timer.tscdeadline;
-
-		if (kvm_x86_ops->set_hv_timer &&
-		    !kvm_x86_ops->set_hv_timer(apic->vcpu, tscdeadline)) {
-			apic->lapic_timer.hv_timer_in_use = true;
-			trace_kvm_hv_timer_state(apic->vcpu->vcpu_id,
-					apic->lapic_timer.hv_timer_in_use);
-		} else
+	} else if (apic_lvtt_tscdeadline(apic))
+		if (!(kvm_x86_ops->set_hv_timer && start_hv_tscdeadline(apic)))
 			start_sw_tscdeadline(apic);
-	}
 }
 
 static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH v3 2/2] KVM: x86: fix underflow in TSC deadline calculation
  2016-06-30  0:52 ` [PATCH v3 2/2] KVM: x86: fix underflow in TSC deadline calculation Wanpeng Li
@ 2016-06-30 10:21   ` Paolo Bonzini
  0 siblings, 0 replies; 3+ messages in thread
From: Paolo Bonzini @ 2016-06-30 10:21 UTC (permalink / raw)
  To: Wanpeng Li, linux-kernel, kvm
  Cc: Wanpeng Li, Radim Krčmář, Yunhong Jiang



On 30/06/2016 02:52, Wanpeng Li wrote:
> From: Wanpeng Li <wanpeng.li@hotmail.com>
> 
> INFO: rcu_sched detected stalls on CPUs/tasks:
>  1-...: (11800 GPs behind) idle=45d/140000000000000/0 softirq=0/0 fqs=21663 
>  (detected by 0, t=65016 jiffies, g=11500, c=11499, q=719)
> Task dump for CPU 1:
> qemu-system-x86 R  running task        0  3529   3525 0x00080808
>  ffff8802021791a0 ffff880212895040 0000000000000001 00007f1c2c00db40
>  ffff8801dd20fcd3 ffffc90002b98000 ffff8801dd20fc88 ffff8801dd20fcf8
>  0000000000000286 ffff8801dd2ac538 ffff8801dd20fcc0 ffffffffc06949c9
> Call Trace:
> ? kvm_write_guest_cached+0xb9/0x160 [kvm]
> ? __delay+0xf/0x20
> ? wait_lapic_expire+0x14a/0x200 [kvm]
> ? kvm_arch_vcpu_ioctl_run+0xcbe/0x1b00 [kvm]
> ? kvm_arch_vcpu_ioctl_run+0xe34/0x1b00 [kvm]
> ? kvm_vcpu_ioctl+0x2d3/0x7c0 [kvm]
> ? __fget+0x5/0x210
> ? do_vfs_ioctl+0x96/0x6a0
> ? __fget_light+0x2a/0x90
> ? SyS_ioctl+0x79/0x90
> ? do_syscall_64+0x7c/0x1e0
> ? entry_SYSCALL64_slow_path+0x25/0x25
> 
> This can be reproduced readily by running a full dynticks guest(since hrtimer 
> in guest is heavily used) w/ lapic_timer_advance disabled.
> 
> If fail to program hardware preemption timer, we will fallback to hrtimer based 
> method, however, a previous programmed preemption timer miss to cancel in this 
> scenario which results in one hardware preemption timer and one hrtimer emulated 
> tsc deadline timer run simultaneously. So sometimes the target guest deadline 
> tsc is earlier than guest tsc, which leads to the computation in vmx_set_hv_timer 
> can underflow and cause delta_tsc to be set a huge value, then host soft lockup 
> as above. 
> 
> This patch fix it by cancelling the previous programmed preemption timer if there 
> is once we failed to program the new preemption timer and fallback to hrtimer 
> based method.
> 
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Cc: Radim Krčmář <rkrcmar@redhat.com>
> Cc: Yunhong Jiang <yunhong.jiang@intel.com>
> Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
> ---
> v2 -> v3:
>  * depends on start_hv_tscdeadline caller to start sw_timer
>  * move lapic_timer.pending check in start_hv_tscdeadline

Thanks, this looks good.  There are only a couple aesthetic changes
I'd like to make:

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index b44c5879b9b6..22a6474af220 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1384,7 +1384,7 @@ static bool start_hv_tscdeadline(struct kvm_lapic *apic)
        }
        trace_kvm_hv_timer_state(apic->vcpu->vcpu_id,
                        apic->lapic_timer.hv_timer_in_use);
-       return kvm_lapic_hv_timer_in_use(apic->vcpu);
+       return apic->lapic_timer.hv_timer_in_use;
 }
 
 void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu)
@@ -1460,9 +1460,10 @@ static void start_apic_timer(struct kvm_lapic *apic)
                           apic->lapic_timer.period,
                           ktime_to_ns(ktime_add_ns(now,
                                        apic->lapic_timer.period)));
-       } else if (apic_lvtt_tscdeadline(apic))
+       } else if (apic_lvtt_tscdeadline(apic)) {
                if (!(kvm_x86_ops->set_hv_timer && start_hv_tscdeadline(apic)))
                        start_sw_tscdeadline(apic);
+       }
 }
 
 static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)


Paolo

^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2016-06-30 10:21 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-06-30  0:52 [PATCH v3 1/2] KVM: x86: introduce cancel_hv_tscdeadline Wanpeng Li
2016-06-30  0:52 ` [PATCH v3 2/2] KVM: x86: fix underflow in TSC deadline calculation Wanpeng Li
2016-06-30 10:21   ` Paolo Bonzini

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.