All of lore.kernel.org
 help / color / mirror / Atom feed
* KVM: x86: do not execute halted vcpus (resend)
@ 2008-09-08 18:23 Marcelo Tosatti
  2008-09-09 14:15 ` Avi Kivity
  0 siblings, 1 reply; 7+ messages in thread
From: Marcelo Tosatti @ 2008-09-08 18:23 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm-devel


Offline or uninitialized vcpu's can be executed if requested to perform
userspace work. 

Follow Avi's suggestion to handle halted vcpu's in the main loop,
simplifying kvm_emulate_halt(). Introduce a new vcpu->requests bit to
indicate events that promote state from halted to running.

Also standardize vcpu wake sites.

Avi, please confirm that this does not break Windows reboot (which I can't
reproduce).

Signed-off-by: Marcelo Tosatti <mtosatti <at> redhat.com>


diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 4cb4430..634132a 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -200,10 +200,9 @@ static int __pit_timer_fn(struct kvm_kpit_state *ps)
 
 	if (!atomic_inc_and_test(&pt->pending))
 		set_bit(KVM_REQ_PENDING_TIMER, &vcpu0->requests);
-	if (vcpu0 && waitqueue_active(&vcpu0->wq)) {
-		vcpu0->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+
+	if (vcpu0 && waitqueue_active(&vcpu0->wq))
 		wake_up_interruptible(&vcpu0->wq);
-	}
 
 	pt->timer.expires = ktime_add_ns(pt->timer.expires, pt->period);
 	pt->scheduled = ktime_to_ns(pt->timer.expires);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index be94f93..fd00f69 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -339,13 +339,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 		} else
 			apic_clear_vector(vector, apic->regs + APIC_TMR);
 
-		if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE)
-			kvm_vcpu_kick(vcpu);
-		else if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) {
-			vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
-			if (waitqueue_active(&vcpu->wq))
-				wake_up_interruptible(&vcpu->wq);
-		}
+		kvm_vcpu_kick(vcpu);
 
 		result = (orig_irr == 0);
 		break;
@@ -384,8 +378,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 		if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
 			vcpu->arch.sipi_vector = vector;
 			vcpu->arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED;
-			if (waitqueue_active(&vcpu->wq))
-				wake_up_interruptible(&vcpu->wq);
+			kvm_vcpu_kick(vcpu);
 		}
 		break;
 
@@ -950,10 +943,9 @@ static int __apic_timer_fn(struct kvm_lapic *apic)
 
 	if(!atomic_inc_and_test(&apic->timer.pending))
 		set_bit(KVM_REQ_PENDING_TIMER, &apic->vcpu->requests);
-	if (waitqueue_active(q)) {
-		apic->vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+	if (waitqueue_active(q))
 		wake_up_interruptible(q);
-	}
+
 	if (apic_lvtt_period(apic)) {
 		result = 1;
 		apic->timer.dev.expires = ktime_add_ns(
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3f3cb71..bf98d40 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2798,11 +2798,6 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
 	KVMTRACE_0D(HLT, vcpu, handler);
 	if (irqchip_in_kernel(vcpu->kvm)) {
 		vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
-		up_read(&vcpu->kvm->slots_lock);
-		kvm_vcpu_block(vcpu);
-		down_read(&vcpu->kvm->slots_lock);
-		if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE)
-			return -EINTR;
 		return 1;
 	} else {
 		vcpu->run->exit_reason = KVM_EXIT_HLT;
@@ -3097,24 +3092,10 @@ static void vapic_exit(struct kvm_vcpu *vcpu)
 	up_read(&vcpu->kvm->slots_lock);
 }
 
-static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
 	int r;
 
-	if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) {
-		pr_debug("vcpu %d received sipi with vector # %x\n",
-		       vcpu->vcpu_id, vcpu->arch.sipi_vector);
-		kvm_lapic_reset(vcpu);
-		r = kvm_x86_ops->vcpu_reset(vcpu);
-		if (r)
-			return r;
-		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
-	}
-
-	down_read(&vcpu->kvm->slots_lock);
-	vapic_enter(vcpu);
-
-again:
 	if (vcpu->requests)
 		if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
 			kvm_mmu_unload(vcpu);
@@ -3151,22 +3132,13 @@ again:
 
 	local_irq_disable();
 
-	if (vcpu->requests || need_resched()) {
+	if (vcpu->requests || need_resched() || signal_pending(current)) {
 		local_irq_enable();
 		preempt_enable();
 		r = 1;
 		goto out;
 	}
 
-	if (signal_pending(current)) {
-		local_irq_enable();
-		preempt_enable();
-		r = -EINTR;
-		kvm_run->exit_reason = KVM_EXIT_INTR;
-		++vcpu->stat.signal_exits;
-		goto out;
-	}
-
 	if (vcpu->guest_debug.enabled)
 		kvm_x86_ops->guest_debug_pre(vcpu);
 
@@ -3227,26 +3199,63 @@ again:
 	kvm_lapic_sync_from_vapic(vcpu);
 
 	r = kvm_x86_ops->handle_exit(kvm_run, vcpu);
+out:
+	return r;
+}
 
-	if (r > 0) {
-		if (dm_request_for_irq_injection(vcpu, kvm_run)) {
-			r = -EINTR;
-			kvm_run->exit_reason = KVM_EXIT_INTR;
-			++vcpu->stat.request_irq_exits;
-			goto out;
-		}
-		if (!need_resched())
-			goto again;
+static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	int r;
+
+	if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) {
+		printk("vcpu %d received sipi with vector # %x\n",
+		       vcpu->vcpu_id, vcpu->arch.sipi_vector);
+		kvm_lapic_reset(vcpu);
+		r = kvm_x86_ops->vcpu_reset(vcpu);
+		if (r)
+			return r;
+		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
 	}
 
-out:
-	up_read(&vcpu->kvm->slots_lock);
-	if (r > 0) {
-		kvm_resched(vcpu);
-		down_read(&vcpu->kvm->slots_lock);
-		goto again;
+	down_read(&vcpu->kvm->slots_lock);
+	vapic_enter(vcpu);
+
+	r = 1;
+	while (r > 0) {
+		if (kvm_arch_vcpu_runnable(vcpu))
+			r = vcpu_enter_guest(vcpu, kvm_run);
+		else {
+			up_read(&vcpu->kvm->slots_lock);
+			kvm_vcpu_block(vcpu);
+			down_read(&vcpu->kvm->slots_lock);
+			if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests))
+				if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
+					vcpu->arch.mp_state =
+							KVM_MP_STATE_RUNNABLE;
+			if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE)
+				r = -EINTR;
+		}
+
+		if (r > 0) {
+			if (dm_request_for_irq_injection(vcpu, kvm_run)) {
+				r = -EINTR;
+				kvm_run->exit_reason = KVM_EXIT_INTR;
+				++vcpu->stat.request_irq_exits;
+			}
+			if (signal_pending(current)) {
+				r = -EINTR;
+				kvm_run->exit_reason = KVM_EXIT_INTR;
+				++vcpu->stat.signal_exits;
+			}
+			if (need_resched()) {
+				up_read(&vcpu->kvm->slots_lock);
+				kvm_resched(vcpu);
+				down_read(&vcpu->kvm->slots_lock);
+			}
+		}
 	}
 
+	up_read(&vcpu->kvm->slots_lock);
 	post_kvm_run_save(vcpu, kvm_run);
 
 	vapic_exit(vcpu);
@@ -3266,6 +3275,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
 	if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
 		kvm_vcpu_block(vcpu);
+		clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
 		r = -EAGAIN;
 		goto out;
 	}
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index a18aaad..4b03643 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -34,6 +34,7 @@
 #define KVM_REQ_MMU_RELOAD         3
 #define KVM_REQ_TRIPLE_FAULT       4
 #define KVM_REQ_PENDING_TIMER      5
+#define KVM_REQ_UNHALT             6
 
 struct kvm_vcpu;
 extern struct kmem_cache *kvm_vcpu_cache;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index de3b029..63e661b 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -980,12 +980,12 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
 	for (;;) {
 		prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
 
-		if (kvm_cpu_has_interrupt(vcpu))
-			break;
-		if (kvm_cpu_has_pending_timer(vcpu))
-			break;
-		if (kvm_arch_vcpu_runnable(vcpu))
+		if (kvm_cpu_has_interrupt(vcpu) ||
+		    kvm_cpu_has_pending_timer(vcpu) ||
+		    kvm_arch_vcpu_runnable(vcpu)) {
+			set_bit(KVM_REQ_UNHALT, &vcpu->requests);
 			break;
+		}
 		if (signal_pending(current))
 			break;
 

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: KVM: x86: do not execute halted vcpus (resend)
  2008-09-08 18:23 KVM: x86: do not execute halted vcpus (resend) Marcelo Tosatti
@ 2008-09-09 14:15 ` Avi Kivity
  2008-09-10 12:04   ` Avi Kivity
  0 siblings, 1 reply; 7+ messages in thread
From: Avi Kivity @ 2008-09-09 14:15 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: kvm-devel

Marcelo Tosatti wrote:
> Offline or uninitialized vcpu's can be executed if requested to perform
> userspace work. 
>
> Follow Avi's suggestion to handle halted vcpu's in the main loop,
> simplifying kvm_emulate_halt(). Introduce a new vcpu->requests bit to
> indicate events that promote state from halted to running.
>
> Also standardize vcpu wake sites.
>
> Avi, please confirm that this does not break Windows reboot (which I can't
> reproduce).
>   

I couldn't reproduce it either (probably mistested earlier), so I 
applied it.  Sorry about the drops.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: KVM: x86: do not execute halted vcpus (resend)
  2008-09-09 14:15 ` Avi Kivity
@ 2008-09-10 12:04   ` Avi Kivity
  2008-09-10 18:37     ` Marcelo Tosatti
  0 siblings, 1 reply; 7+ messages in thread
From: Avi Kivity @ 2008-09-10 12:04 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: kvm-devel

Avi Kivity wrote:
> Marcelo Tosatti wrote:
>> Offline or uninitialized vcpu's can be executed if requested to perform
>> userspace work.
>> Follow Avi's suggestion to handle halted vcpu's in the main loop,
>> simplifying kvm_emulate_halt(). Introduce a new vcpu->requests bit to
>> indicate events that promote state from halted to running.
>>
>> Also standardize vcpu wake sites.
>>
>> Avi, please confirm that this does not break Windows reboot (which I 
>> can't
>> reproduce).
>>   
>
> I couldn't reproduce it either (probably mistested earlier), so I 
> applied it.  Sorry about the drops.
>

Actually it does reproduce.  If you reboot twice, the second reboot will 
hang during reset (with cs:ip = f000:fff0).  Plain Windows XP.


-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: KVM: x86: do not execute halted vcpus (resend)
  2008-09-10 12:04   ` Avi Kivity
@ 2008-09-10 18:37     ` Marcelo Tosatti
  2008-09-10 18:42       ` Avi Kivity
  0 siblings, 1 reply; 7+ messages in thread
From: Marcelo Tosatti @ 2008-09-10 18:37 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm-devel

On Wed, Sep 10, 2008 at 03:04:10PM +0300, Avi Kivity wrote:
> Avi Kivity wrote:
>> Marcelo Tosatti wrote:
>>> Offline or uninitialized vcpu's can be executed if requested to perform
>>> userspace work.
>>> Follow Avi's suggestion to handle halted vcpu's in the main loop,
>>> simplifying kvm_emulate_halt(). Introduce a new vcpu->requests bit to
>>> indicate events that promote state from halted to running.
>>>
>>> Also standardize vcpu wake sites.
>>>
>>> Avi, please confirm that this does not break Windows reboot (which I  
>>> can't
>>> reproduce).
>>>   
>>
>> I couldn't reproduce it either (probably mistested earlier), so I  
>> applied it.  Sorry about the drops.
>>
>
> Actually it does reproduce.  If you reboot twice, the second reboot will  
> hang during reset (with cs:ip = f000:fff0).  Plain Windows XP.

OK, easily reproducible by adding a sleep at the start of
qemu_kvm_system_reset.

The problem is that the vcpu HLT's between the KBD ioport write and the
actual system reset by the IO thread. By then time there will be no
event that takes it out of STATE_HALTED.

The following fixes it, however older userspace will remain borked.
Alternative is to special case f000:fff0 in-kernel. What is preferred?

diff --git a/libkvm/libkvm.h b/libkvm/libkvm.h
index 346eedf..f3790fa 100644
--- a/libkvm/libkvm.h
+++ b/libkvm/libkvm.h
@@ -330,7 +330,13 @@ int kvm_set_mpstate(kvm_context_t kvm, int vcpu,
  */
 static inline int kvm_reset_mpstate(kvm_context_t kvm, int vcpu)
 {
-    struct kvm_mp_state mp_state = {.mp_state = KVM_MP_STATE_UNINITIALIZED};
+    struct kvm_mp_state mp_state;
+
+    if (vcpu)
+        mp_state.mp_state = KVM_MP_STATE_UNINITIALIZED;
+    else
+        mp_state.mp_state = KVM_MP_STATE_RUNNABLE;
+
     return kvm_set_mpstate(kvm, vcpu, &mp_state);
 }
 #endif
diff --git a/qemu/qemu-kvm-x86.c b/qemu/qemu-kvm-x86.c
index 5daedd1..9983e5a 100644
--- a/qemu/qemu-kvm-x86.c
+++ b/qemu/qemu-kvm-x86.c
@@ -673,15 +673,13 @@ int handle_tpr_access(void *opaque, int vcpu,
 void kvm_arch_cpu_reset(CPUState *env)
 {
     kvm_arch_load_regs(env);
-    if (env->cpu_index != 0) {
-	if (kvm_irqchip_in_kernel(kvm_context)) {
+    if (kvm_irqchip_in_kernel(kvm_context)) {
 #ifdef KVM_CAP_MP_STATE
-	    kvm_reset_mpstate(kvm_context, env->cpu_index);
+        kvm_reset_mpstate(kvm_context, env->cpu_index);
 #endif
-	} else {
-	    env->interrupt_request &= ~CPU_INTERRUPT_HARD;
-	    env->halted = 1;
-	    env->exception_index = EXCP_HLT;
-	}
+    } else if (env->cpu_index != 0) {
+        env->interrupt_request &= ~CPU_INTERRUPT_HARD;
+        env->halted = 1;
+        env->exception_index = EXCP_HLT;
     }
 }

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: KVM: x86: do not execute halted vcpus (resend)
  2008-09-10 18:37     ` Marcelo Tosatti
@ 2008-09-10 18:42       ` Avi Kivity
  0 siblings, 0 replies; 7+ messages in thread
From: Avi Kivity @ 2008-09-10 18:42 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: kvm-devel

Marcelo Tosatti wrote:
>> Actually it does reproduce.  If you reboot twice, the second reboot will  
>> hang during reset (with cs:ip = f000:fff0).  Plain Windows XP.
>>     
>
> OK, easily reproducible by adding a sleep at the start of
> qemu_kvm_system_reset.
>
> The problem is that the vcpu HLT's between the KBD ioport write and the
> actual system reset by the IO thread. By then time there will be no
> event that takes it out of STATE_HALTED.
>
> The following fixes it, however older userspace will remain borked.
> Alternative is to special case f000:fff0 in-kernel. What is preferred?
>
>   

Special-case the kvm_set_sregs ioctl with cs:eip f000:fff0 (and cr0.pe=0
for good measure).

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: KVM: x86: do not execute halted vcpus (resend)
  2008-09-10 19:40 Marcelo Tosatti
@ 2008-09-11  8:07 ` Avi Kivity
  0 siblings, 0 replies; 7+ messages in thread
From: Avi Kivity @ 2008-09-11  8:07 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: kvm-devel

Marcelo Tosatti wrote:
> KVM: x86: unhalt vcpu0 on reset
>
> Since "KVM: x86: do not execute halted vcpus", HLT by vcpu0 before system
> reset by the IO thread will hang the guest.
>
> Mark vcpu as runnable in such case.
>   

Applied, thanks.  I still want userspace fixed, so please resend the
libkvm patch, changelogged and signoffed.

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: KVM: x86: do not execute halted vcpus (resend)
@ 2008-09-10 19:40 Marcelo Tosatti
  2008-09-11  8:07 ` Avi Kivity
  0 siblings, 1 reply; 7+ messages in thread
From: Marcelo Tosatti @ 2008-09-10 19:40 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm-devel

On Wed, Sep 10, 2008 at 09:42:05PM +0300, Avi Kivity wrote:
> Marcelo Tosatti wrote:
> >> Actually it does reproduce.  If you reboot twice, the second reboot will  
> >> hang during reset (with cs:ip = f000:fff0).  Plain Windows XP.
> >>     
> >
> > OK, easily reproducible by adding a sleep at the start of
> > qemu_kvm_system_reset.
> >
> > The problem is that the vcpu HLT's between the KBD ioport write and the
> > actual system reset by the IO thread. By then time there will be no
> > event that takes it out of STATE_HALTED.
> >
> > The following fixes it, however older userspace will remain borked.
> > Alternative is to special case f000:fff0 in-kernel. What is preferred?
> >
> >   
> 
> Special-case the kvm_set_sregs ioctl with cs:eip f000:fff0 (and cr0.pe=0
> for good measure).

KVM: x86: unhalt vcpu0 on reset

Since "KVM: x86: do not execute halted vcpus", HLT by vcpu0 before system
reset by the IO thread will hang the guest.

Mark vcpu as runnable in such case.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>


Index: kvm.tip/arch/x86/kvm/x86.c
===================================================================
--- kvm.tip.orig/arch/x86/kvm/x86.c
+++ kvm.tip/arch/x86/kvm/x86.c
@@ -3959,6 +3959,11 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct
 	kvm_set_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
 	kvm_set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
 
+	if (vcpu->vcpu_id == 0 && kvm_rip_read(vcpu) == 0xfff0 &&
+	    sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 &&
+	    !(vcpu->arch.cr0 & X86_CR0_PE))
+		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+
 	vcpu_put(vcpu);
 
 	return 0;


^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2008-09-11  8:09 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-09-08 18:23 KVM: x86: do not execute halted vcpus (resend) Marcelo Tosatti
2008-09-09 14:15 ` Avi Kivity
2008-09-10 12:04   ` Avi Kivity
2008-09-10 18:37     ` Marcelo Tosatti
2008-09-10 18:42       ` Avi Kivity
2008-09-10 19:40 Marcelo Tosatti
2008-09-11  8:07 ` Avi Kivity

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.