kvm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [patch 0/8] RFC: in-kernel timer emulation changes
@ 2009-07-06  1:55 Marcelo Tosatti
  2009-07-06  1:55 ` [patch 1/8] KVM: timer interface unification Marcelo Tosatti
                   ` (8 more replies)
  0 siblings, 9 replies; 23+ messages in thread
From: Marcelo Tosatti @ 2009-07-06  1:55 UTC (permalink / raw)
  To: kvm

This still needs to survive kvm-autotest, posting early for comments.

It replaces hrtimer based emulation with ktime_t comparisons on guest
entry (avoids host load when guests are scheduled out, removes a
spinlock acquision on entry (i8254.c's inject_lock), and makes future
improvements easier).

-- 


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [patch 1/8] KVM: timer interface unification
  2009-07-06  1:55 [patch 0/8] RFC: in-kernel timer emulation changes Marcelo Tosatti
@ 2009-07-06  1:55 ` Marcelo Tosatti
  2009-07-08 13:33   ` Avi Kivity
  2009-07-06  1:55 ` [patch 2/8] KVM: move lapic timer ack to EOI handler Marcelo Tosatti
                   ` (7 subsequent siblings)
  8 siblings, 1 reply; 23+ messages in thread
From: Marcelo Tosatti @ 2009-07-06  1:55 UTC (permalink / raw)
  To: kvm; +Cc: Marcelo Tosatti

[-- Attachment #1: kvm-timer-abstract --]
[-- Type: text/plain, Size: 16572 bytes --]

Hide details of timer emulation behind an interface, and unify
the hrtimer based implementation.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Index: kvm-new/arch/x86/kvm/i8254.c
===================================================================
--- kvm-new.orig/arch/x86/kvm/i8254.c
+++ kvm-new/arch/x86/kvm/i8254.c
@@ -113,7 +113,7 @@ static s64 __kpit_elapsed(struct kvm *kv
 	 * itself with the initial count and continues counting
 	 * from there.
 	 */
-	remaining = hrtimer_expires_remaining(&ps->pit_timer.timer);
+	remaining = kvm_timer_remaining(&ps->pit_timer);
 	elapsed = ps->pit_timer.period - ktime_to_ns(remaining);
 	elapsed = mod_64(elapsed, ps->pit_timer.period);
 
@@ -229,7 +229,7 @@ int pit_has_pending_timer(struct kvm_vcp
 	struct kvm_pit *pit = vcpu->kvm->arch.vpit;
 
 	if (pit && kvm_vcpu_is_bsp(vcpu) && pit->pit_state.irq_ack)
-		return atomic_read(&pit->pit_state.pit_timer.pending);
+		return kvm_timer_has_pending(&pit->pit_state.pit_timer);
 	return 0;
 }
 
@@ -238,42 +238,17 @@ static void kvm_pit_ack_irq(struct kvm_i
 	struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state,
 						 irq_ack_notifier);
 	spin_lock(&ps->inject_lock);
-	if (atomic_dec_return(&ps->pit_timer.pending) < 0)
-		atomic_inc(&ps->pit_timer.pending);
+	kvm_timer_ack(&ps->pit_timer);
 	ps->irq_ack = 1;
 	spin_unlock(&ps->inject_lock);
 }
 
-void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
-{
-	struct kvm_pit *pit = vcpu->kvm->arch.vpit;
-	struct hrtimer *timer;
-
-	if (!kvm_vcpu_is_bsp(vcpu) || !pit)
-		return;
-
-	timer = &pit->pit_state.pit_timer.timer;
-	if (hrtimer_cancel(timer))
-		hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
-}
-
 static void destroy_pit_timer(struct kvm_timer *pt)
 {
 	pr_debug("pit: execute del timer!\n");
-	hrtimer_cancel(&pt->timer);
+	kvm_timer_cancel(pt);
 }
 
-static bool kpit_is_periodic(struct kvm_timer *ktimer)
-{
-	struct kvm_kpit_state *ps = container_of(ktimer, struct kvm_kpit_state,
-						 pit_timer);
-	return ps->is_periodic;
-}
-
-static struct kvm_timer_ops kpit_ops = {
-	.is_periodic = kpit_is_periodic,
-};
-
 static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period)
 {
 	struct kvm_timer *pt = &ps->pit_timer;
@@ -284,20 +259,10 @@ static void create_pit_timer(struct kvm_
 	pr_debug("pit: create pit timer, interval is %llu nsec\n", interval);
 
 	/* TODO The new value only affected after the retriggered */
-	hrtimer_cancel(&pt->timer);
-	pt->period = interval;
-	ps->is_periodic = is_period;
-
-	pt->timer.function = kvm_timer_fn;
-	pt->t_ops = &kpit_ops;
-	pt->kvm = ps->pit->kvm;
-	pt->vcpu = pt->kvm->bsp_vcpu;
+	kvm_timer_cancel(pt);
 
-	atomic_set(&pt->pending, 0);
 	ps->irq_ack = 1;
-
-	hrtimer_start(&pt->timer, ktime_add_ns(ktime_get(), interval),
-		      HRTIMER_MODE_ABS);
+	kvm_timer_start(pt, interval, is_period);
 }
 
 static void pit_load_count(struct kvm *kvm, int channel, u32 val)
@@ -545,7 +510,7 @@ static int speaker_ioport_read(struct kv
 	return 0;
 }
 
-void kvm_pit_reset(struct kvm_pit *pit)
+static void kvm_pit_reset(struct kvm_pit *pit)
 {
 	int i;
 	struct kvm_kpit_channel_state *c;
@@ -559,7 +524,7 @@ void kvm_pit_reset(struct kvm_pit *pit)
 	}
 	mutex_unlock(&pit->pit_state.lock);
 
-	atomic_set(&pit->pit_state.pit_timer.pending, 0);
+	kvm_timer_reset(&pit->pit_state.pit_timer);
 	pit->pit_state.irq_ack = 1;
 }
 
@@ -568,7 +533,7 @@ static void pit_mask_notifer(struct kvm_
 	struct kvm_pit *pit = container_of(kimn, struct kvm_pit, mask_notifier);
 
 	if (!mask) {
-		atomic_set(&pit->pit_state.pit_timer.pending, 0);
+		kvm_timer_reset(&pit->pit_state.pit_timer);
 		pit->pit_state.irq_ack = 1;
 	}
 }
@@ -608,8 +573,8 @@ struct kvm_pit *kvm_create_pit(struct kv
 
 	pit_state = &pit->pit_state;
 	pit_state->pit = pit;
-	hrtimer_init(&pit_state->pit_timer.timer,
-		     CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+	kvm_timer_init(kvm, &pit_state->pit_timer);
+
 	pit_state->irq_ack_notifier.gsi = 0;
 	pit_state->irq_ack_notifier.irq_acked = kvm_pit_ack_irq;
 	kvm_register_irq_ack_notifier(kvm, &pit_state->irq_ack_notifier);
@@ -634,14 +599,11 @@ struct kvm_pit *kvm_create_pit(struct kv
 
 void kvm_free_pit(struct kvm *kvm)
 {
-	struct hrtimer *timer;
-
 	if (kvm->arch.vpit) {
+		kvm_timer_cancel(&kvm->arch.vpit->pit_state.pit_timer);
 		kvm_unregister_irq_mask_notifier(kvm, 0,
 					       &kvm->arch.vpit->mask_notifier);
 		mutex_lock(&kvm->arch.vpit->pit_state.lock);
-		timer = &kvm->arch.vpit->pit_state.pit_timer.timer;
-		hrtimer_cancel(timer);
 		kvm_free_irq_source_id(kvm, kvm->arch.vpit->irq_source_id);
 		mutex_unlock(&kvm->arch.vpit->pit_state.lock);
 		kfree(kvm->arch.vpit);
@@ -686,7 +648,7 @@ void kvm_inject_pit_timer_irqs(struct kv
 		 * last one has been acked.
 		 */
 		spin_lock(&ps->inject_lock);
-		if (atomic_read(&ps->pit_timer.pending) && ps->irq_ack) {
+		if (kvm_timer_has_pending(&ps->pit_timer) && ps->irq_ack) {
 			ps->irq_ack = 0;
 			inject = 1;
 		}
Index: kvm-new/arch/x86/kvm/kvm_timer.h
===================================================================
--- kvm-new.orig/arch/x86/kvm/kvm_timer.h
+++ kvm-new/arch/x86/kvm/kvm_timer.h
@@ -1,18 +1,25 @@
 
 struct kvm_timer {
 	struct hrtimer timer;
-	s64 period; 				/* unit: ns */
-	atomic_t pending;			/* accumulated triggered timers */
+	s64 period; 			/* unit: ns */
+	atomic_t pending;		/* accumulated triggered timers */
 	bool reinject;
-	struct kvm_timer_ops *t_ops;
+	bool periodic;
 	struct kvm *kvm;
 	struct kvm_vcpu *vcpu;
 };
 
-struct kvm_timer_ops {
-        bool (*is_periodic)(struct kvm_timer *);
-};
+void kvm_timer_init(struct kvm *kvm, struct kvm_timer *ktimer);
+void kvm_timer_start(struct kvm_timer *ktimer, u64 interval, bool periodic);
+void kvm_timer_cancel(struct kvm_timer *ktimer);
+void kvm_timer_vcpu_bind(struct kvm_timer *ktimer, struct kvm_vcpu *vcpu);
+
+int kvm_timer_has_pending(struct kvm_timer *ktimer);
+void kvm_timer_ack(struct kvm_timer *ktimer);
+void kvm_timer_reset(struct kvm_timer *ktimer);
+
+void kvm_migrate_timer(struct kvm_timer *ktimer);
 
 
-enum hrtimer_restart kvm_timer_fn(struct hrtimer *data);
+ktime_t kvm_timer_remaining(struct kvm_timer *ktimer);
 
Index: kvm-new/arch/x86/kvm/lapic.c
===================================================================
--- kvm-new.orig/arch/x86/kvm/lapic.c
+++ kvm-new/arch/x86/kvm/lapic.c
@@ -485,7 +485,7 @@ static u32 apic_get_tmcct(struct kvm_lap
 	if (apic_get_reg(apic, APIC_TMICT) == 0)
 		return 0;
 
-	remaining = hrtimer_expires_remaining(&apic->lapic_timer.timer);
+	remaining = kvm_timer_remaining(&apic->lapic_timer);
 	if (ktime_to_ns(remaining) < 0)
 		remaining = ktime_set(0, 0);
 
@@ -601,28 +601,13 @@ static void update_divide_count(struct k
 
 static void start_apic_timer(struct kvm_lapic *apic)
 {
-	ktime_t now = apic->lapic_timer.timer.base->get_time();
-
-	apic->lapic_timer.period = apic_get_reg(apic, APIC_TMICT) *
+	u64 period = apic_get_reg(apic, APIC_TMICT) *
 		    APIC_BUS_CYCLE_NS * apic->divide_count;
-	atomic_set(&apic->lapic_timer.pending, 0);
 
-	if (!apic->lapic_timer.period)
+	if (!period)
 		return;
 
-	hrtimer_start(&apic->lapic_timer.timer,
-		      ktime_add_ns(now, apic->lapic_timer.period),
-		      HRTIMER_MODE_ABS);
-
-	apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016"
-			   PRIx64 ", "
-			   "timer initial count 0x%x, period %lldns, "
-			   "expire @ 0x%016" PRIx64 ".\n", __func__,
-			   APIC_BUS_CYCLE_NS, ktime_to_ns(now),
-			   apic_get_reg(apic, APIC_TMICT),
-			   apic->lapic_timer.period,
-			   ktime_to_ns(ktime_add_ns(now,
-					apic->lapic_timer.period)));
+	kvm_timer_start(&apic->lapic_timer, period, apic_lvtt_period(apic));
 }
 
 static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
@@ -706,8 +691,6 @@ static int apic_mmio_write(struct kvm_io
 				apic_set_reg(apic, APIC_LVTT + 0x10 * i,
 					     lvt_val | APIC_LVT_MASKED);
 			}
-			atomic_set(&apic->lapic_timer.pending, 0);
-
 		}
 		break;
 
@@ -738,7 +721,7 @@ static int apic_mmio_write(struct kvm_io
 		break;
 
 	case APIC_TMICT:
-		hrtimer_cancel(&apic->lapic_timer.timer);
+		kvm_timer_cancel(&apic->lapic_timer);
 		apic_set_reg(apic, APIC_TMICT, val);
 		start_apic_timer(apic);
 		return 0;
@@ -763,7 +746,7 @@ void kvm_free_lapic(struct kvm_vcpu *vcp
 	if (!vcpu->arch.apic)
 		return;
 
-	hrtimer_cancel(&vcpu->arch.apic->lapic_timer.timer);
+	kvm_timer_cancel(&vcpu->arch.apic->lapic_timer);
 
 	if (vcpu->arch.apic->regs_page)
 		__free_page(vcpu->arch.apic->regs_page);
@@ -834,7 +817,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vc
 	ASSERT(apic != NULL);
 
 	/* Stop the timer in case it's a reset to an active apic */
-	hrtimer_cancel(&apic->lapic_timer.timer);
+	kvm_timer_cancel(&apic->lapic_timer);
 
 	apic_set_reg(apic, APIC_ID, vcpu->vcpu_id << 24);
 	apic_set_reg(apic, APIC_LVR, APIC_VERSION);
@@ -860,7 +843,6 @@ void kvm_lapic_reset(struct kvm_vcpu *vc
 	}
 	apic->irr_pending = false;
 	update_divide_count(apic);
-	atomic_set(&apic->lapic_timer.pending, 0);
 	if (kvm_vcpu_is_bsp(vcpu))
 		vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP;
 	apic_update_ppr(apic);
@@ -889,19 +871,12 @@ int kvm_lapic_enabled(struct kvm_vcpu *v
  *----------------------------------------------------------------------
  */
 
-static bool lapic_is_periodic(struct kvm_timer *ktimer)
-{
-	struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic,
-					      lapic_timer);
-	return apic_lvtt_period(apic);
-}
-
 int apic_has_pending_timer(struct kvm_vcpu *vcpu)
 {
 	struct kvm_lapic *lapic = vcpu->arch.apic;
 
 	if (lapic && apic_enabled(lapic) && apic_lvt_enabled(lapic, APIC_LVTT))
-		return atomic_read(&lapic->lapic_timer.pending);
+		return kvm_timer_has_pending(&lapic->lapic_timer);
 
 	return 0;
 }
@@ -928,10 +903,6 @@ void kvm_apic_nmi_wd_deliver(struct kvm_
 		kvm_apic_local_deliver(apic, APIC_LVT0);
 }
 
-static struct kvm_timer_ops lapic_timer_ops = {
-	.is_periodic = lapic_is_periodic,
-};
-
 static const struct kvm_io_device_ops apic_mmio_ops = {
 	.read     = apic_mmio_read,
 	.write    = apic_mmio_write,
@@ -960,12 +931,8 @@ int kvm_create_lapic(struct kvm_vcpu *vc
 	memset(apic->regs, 0, PAGE_SIZE);
 	apic->vcpu = vcpu;
 
-	hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
-		     HRTIMER_MODE_ABS);
-	apic->lapic_timer.timer.function = kvm_timer_fn;
-	apic->lapic_timer.t_ops = &lapic_timer_ops;
-	apic->lapic_timer.kvm = vcpu->kvm;
-	apic->lapic_timer.vcpu = vcpu;
+	kvm_timer_init(vcpu->kvm, &apic->lapic_timer);
+	kvm_timer_vcpu_bind(&apic->lapic_timer, vcpu);
 
 	apic->base_address = APIC_DEFAULT_PHYS_BASE;
 	vcpu->arch.apic_base = APIC_DEFAULT_PHYS_BASE;
@@ -1015,9 +982,9 @@ void kvm_inject_apic_timer_irqs(struct k
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
 
-	if (apic && atomic_read(&apic->lapic_timer.pending) > 0) {
+	if (apic && kvm_timer_has_pending(&apic->lapic_timer)) {
 		if (kvm_apic_local_deliver(apic, APIC_LVTT))
-			atomic_dec(&apic->lapic_timer.pending);
+			kvm_timer_ack(&apic->lapic_timer);
 	}
 }
 
@@ -1043,24 +1010,11 @@ void kvm_apic_post_state_restore(struct 
 			     MSR_IA32_APICBASE_BASE;
 	apic_set_reg(apic, APIC_LVR, APIC_VERSION);
 	apic_update_ppr(apic);
-	hrtimer_cancel(&apic->lapic_timer.timer);
+	kvm_timer_cancel(&apic->lapic_timer);
 	update_divide_count(apic);
 	start_apic_timer(apic);
 }
 
-void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
-{
-	struct kvm_lapic *apic = vcpu->arch.apic;
-	struct hrtimer *timer;
-
-	if (!apic)
-		return;
-
-	timer = &apic->lapic_timer.timer;
-	if (hrtimer_cancel(timer))
-		hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
-}
-
 void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu)
 {
 	u32 data;
Index: kvm-new/arch/x86/kvm/timer.c
===================================================================
--- kvm-new.orig/arch/x86/kvm/timer.c
+++ kvm-new/arch/x86/kvm/timer.c
@@ -23,7 +23,7 @@ static int __kvm_timer_fn(struct kvm_vcp
 	if (waitqueue_active(q))
 		wake_up_interruptible(q);
 
-	if (ktimer->t_ops->is_periodic(ktimer)) {
+	if (ktimer->periodic) {
 		hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
 		restart_timer = 1;
 	}
@@ -31,7 +31,7 @@ static int __kvm_timer_fn(struct kvm_vcp
 	return restart_timer;
 }
 
-enum hrtimer_restart kvm_timer_fn(struct hrtimer *data)
+static enum hrtimer_restart kvm_timer_fn(struct hrtimer *data)
 {
 	int restart_timer;
 	struct kvm_vcpu *vcpu;
@@ -48,3 +48,58 @@ enum hrtimer_restart kvm_timer_fn(struct
 		return HRTIMER_NORESTART;
 }
 
+void kvm_timer_init(struct kvm *kvm, struct kvm_timer *ktimer)
+{
+	ktimer->kvm = kvm;
+	hrtimer_init(&ktimer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+	ktimer->timer.function = kvm_timer_fn;
+}
+
+void kvm_timer_vcpu_bind(struct kvm_timer *ktimer, struct kvm_vcpu *vcpu)
+{
+	ktimer->vcpu = vcpu;
+}
+
+void kvm_timer_start(struct kvm_timer *ktimer, u64 interval, bool periodic)
+{
+	hrtimer_cancel(&ktimer->timer);
+	atomic_set(&ktimer->pending, 0);
+	ktimer->periodic = periodic;
+	ktimer->period = interval;
+	hrtimer_start(&ktimer->timer, ktime_add_ns(ktime_get(), interval),
+			HRTIMER_MODE_ABS);
+}
+
+void kvm_timer_cancel(struct kvm_timer *ktimer)
+{
+	hrtimer_cancel(&ktimer->timer);
+	atomic_set(&ktimer->pending, 0);
+}
+
+int kvm_timer_has_pending(struct kvm_timer *ktimer)
+{
+	return atomic_read(&ktimer->pending);
+}
+
+void kvm_timer_ack(struct kvm_timer *ktimer)
+{
+	if (atomic_dec_return(&ktimer->pending) < 0)
+		atomic_inc(&ktimer->pending);
+}
+
+void kvm_timer_reset(struct kvm_timer *ktimer)
+{
+	atomic_set(&ktimer->pending, 0);
+}
+
+void kvm_migrate_timer(struct kvm_timer *ktimer)
+{
+	if (hrtimer_cancel(&ktimer->timer))
+		hrtimer_start_expires(&ktimer->timer, HRTIMER_MODE_ABS);
+}
+
+ktime_t kvm_timer_remaining(struct kvm_timer *ktimer)
+{
+	return hrtimer_expires_remaining(&ktimer->timer);
+}
+
Index: kvm-new/arch/x86/kvm/i8254.h
===================================================================
--- kvm-new.orig/arch/x86/kvm/i8254.h
+++ kvm-new/arch/x86/kvm/i8254.h
@@ -22,7 +22,6 @@ struct kvm_kpit_channel_state {
 struct kvm_kpit_state {
 	struct kvm_kpit_channel_state channels[3];
 	struct kvm_timer pit_timer;
-	bool is_periodic;
 	u32    speaker_data_on;
 	struct mutex lock;
 	struct kvm_pit *pit;
@@ -52,6 +51,5 @@ void kvm_inject_pit_timer_irqs(struct kv
 void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val);
 struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags);
 void kvm_free_pit(struct kvm *kvm);
-void kvm_pit_reset(struct kvm_pit *pit);
 
 #endif
Index: kvm-new/arch/x86/kvm/irq.c
===================================================================
--- kvm-new.orig/arch/x86/kvm/irq.c
+++ kvm-new/arch/x86/kvm/irq.c
@@ -94,6 +94,26 @@ void kvm_inject_pending_timer_irqs(struc
 }
 EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs);
 
+static void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
+{
+	struct kvm_lapic *apic = vcpu->arch.apic;
+
+	if (!apic)
+		return;
+
+	kvm_migrate_timer(&apic->lapic_timer);
+}
+
+static void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
+{
+	struct kvm_pit *pit = vcpu->kvm->arch.vpit;
+
+	if (!kvm_vcpu_is_bsp(vcpu) || !pit)
+		return;
+
+	kvm_migrate_timer(&pit->pit_state.pit_timer);
+}
+
 void __kvm_migrate_timers(struct kvm_vcpu *vcpu)
 {
 	__kvm_migrate_apic_timer(vcpu);
Index: kvm-new/arch/x86/kvm/irq.h
===================================================================
--- kvm-new.orig/arch/x86/kvm/irq.h
+++ kvm-new/arch/x86/kvm/irq.h
@@ -94,8 +94,6 @@ void kvm_pic_reset(struct kvm_kpic_state
 void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
 void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
 void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu);
-void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu);
-void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu);
 void __kvm_migrate_timers(struct kvm_vcpu *vcpu);
 
 int pit_has_pending_timer(struct kvm_vcpu *vcpu);
Index: kvm-new/arch/x86/kvm/x86.c
===================================================================
--- kvm-new.orig/arch/x86/kvm/x86.c
+++ kvm-new/arch/x86/kvm/x86.c
@@ -4565,6 +4565,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(st
 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 {
 	int r;
+	struct kvm *kvm = vcpu->kvm;
 
 	/* We do fxsave: this must be aligned. */
 	BUG_ON((unsigned long)&vcpu->arch.host_fx_image & 0xF);
@@ -4578,6 +4579,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu 
 	if (r < 0)
 		goto free_vcpu;
 
+	if (kvm->arch.vpit && kvm_vcpu_is_bsp(vcpu))
+		kvm_timer_vcpu_bind(&kvm->arch.vpit->pit_state.pit_timer, vcpu);
+
 	return 0;
 free_vcpu:
 	kvm_x86_ops->vcpu_free(vcpu);

-- 


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [patch 2/8] KVM: move lapic timer ack to EOI handler
  2009-07-06  1:55 [patch 0/8] RFC: in-kernel timer emulation changes Marcelo Tosatti
  2009-07-06  1:55 ` [patch 1/8] KVM: timer interface unification Marcelo Tosatti
@ 2009-07-06  1:55 ` Marcelo Tosatti
  2009-07-06  1:55 ` [patch 3/8] KVM: x86: per-vcpu timer list Marcelo Tosatti
                   ` (6 subsequent siblings)
  8 siblings, 0 replies; 23+ messages in thread
From: Marcelo Tosatti @ 2009-07-06  1:55 UTC (permalink / raw)
  To: kvm; +Cc: Marcelo Tosatti

[-- Attachment #1: kvm-timer-lapic-ack --]
[-- Type: text/plain, Size: 1269 bytes --]

lapic timer is acked on interrupt injection, move it to EOI
handler.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Index: kvm-new/arch/x86/kvm/lapic.c
===================================================================
--- kvm-new.orig/arch/x86/kvm/lapic.c
+++ kvm-new/arch/x86/kvm/lapic.c
@@ -427,6 +427,7 @@ int kvm_apic_compare_prio(struct kvm_vcp
 static void apic_set_eoi(struct kvm_lapic *apic)
 {
 	int vector = apic_find_highest_isr(apic);
+	int lvt_vector = apic_get_reg(apic, APIC_LVTT) & APIC_VECTOR_MASK;
 	int trigger_mode;
 	/*
 	 * Not every write EOI will has corresponding ISR,
@@ -435,6 +436,9 @@ static void apic_set_eoi(struct kvm_lapi
 	if (vector == -1)
 		return;
 
+	if (vector == lvt_vector)
+		kvm_timer_ack(&apic->lapic_timer);
+
 	apic_clear_vector(vector, apic->regs + APIC_ISR);
 	apic_update_ppr(apic);
 
@@ -982,10 +986,8 @@ void kvm_inject_apic_timer_irqs(struct k
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
 
-	if (apic && kvm_timer_has_pending(&apic->lapic_timer)) {
-		if (kvm_apic_local_deliver(apic, APIC_LVTT))
-			kvm_timer_ack(&apic->lapic_timer);
-	}
+	if (apic && kvm_timer_has_pending(&apic->lapic_timer))
+		kvm_apic_local_deliver(apic, APIC_LVTT);
 }
 
 int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)

-- 


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [patch 3/8] KVM: x86: per-vcpu timer list
  2009-07-06  1:55 [patch 0/8] RFC: in-kernel timer emulation changes Marcelo Tosatti
  2009-07-06  1:55 ` [patch 1/8] KVM: timer interface unification Marcelo Tosatti
  2009-07-06  1:55 ` [patch 2/8] KVM: move lapic timer ack to EOI handler Marcelo Tosatti
@ 2009-07-06  1:55 ` Marcelo Tosatti
  2009-07-06  1:55 ` [patch 4/8] KVM: x86: replace hrtimer based timer emulation Marcelo Tosatti
                   ` (5 subsequent siblings)
  8 siblings, 0 replies; 23+ messages in thread
From: Marcelo Tosatti @ 2009-07-06  1:55 UTC (permalink / raw)
  To: kvm; +Cc: Marcelo Tosatti

[-- Attachment #1: kvm-per-vcpu-timer-list --]
[-- Type: text/plain, Size: 1962 bytes --]

Add a per-vcpu list of (emulated) timers.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Index: kvm-new/arch/x86/include/asm/kvm_host.h
===================================================================
--- kvm-new.orig/arch/x86/include/asm/kvm_host.h
+++ kvm-new/arch/x86/include/asm/kvm_host.h
@@ -375,6 +375,8 @@ struct kvm_vcpu_arch {
 	u64 mcg_status;
 	u64 mcg_ctl;
 	u64 *mce_banks;
+
+	struct list_head timers;
 };
 
 struct kvm_mem_alias {
Index: kvm-new/arch/x86/kvm/kvm_timer.h
===================================================================
--- kvm-new.orig/arch/x86/kvm/kvm_timer.h
+++ kvm-new/arch/x86/kvm/kvm_timer.h
@@ -7,6 +7,7 @@ struct kvm_timer {
 	bool periodic;
 	struct kvm *kvm;
 	struct kvm_vcpu *vcpu;
+	struct list_head vcpu_timer;
 };
 
 void kvm_timer_init(struct kvm *kvm, struct kvm_timer *ktimer);
Index: kvm-new/arch/x86/kvm/x86.c
===================================================================
--- kvm-new.orig/arch/x86/kvm/x86.c
+++ kvm-new/arch/x86/kvm/x86.c
@@ -4644,6 +4644,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *
 	BUG_ON(vcpu->kvm == NULL);
 	kvm = vcpu->kvm;
 
+	INIT_LIST_HEAD(&vcpu->arch.timers);
+
 	vcpu->arch.mmu.root_hpa = INVALID_PAGE;
 	if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_bsp(vcpu))
 		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
Index: kvm-new/arch/x86/kvm/timer.c
===================================================================
--- kvm-new.orig/arch/x86/kvm/timer.c
+++ kvm-new/arch/x86/kvm/timer.c
@@ -53,11 +53,13 @@ void kvm_timer_init(struct kvm *kvm, str
 	ktimer->kvm = kvm;
 	hrtimer_init(&ktimer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
 	ktimer->timer.function = kvm_timer_fn;
+	INIT_LIST_HEAD(&ktimer->vcpu_timer);
 }
 
 void kvm_timer_vcpu_bind(struct kvm_timer *ktimer, struct kvm_vcpu *vcpu)
 {
 	ktimer->vcpu = vcpu;
+	list_add(&ktimer->vcpu_timer, &vcpu->arch.timers);
 }
 
 void kvm_timer_start(struct kvm_timer *ktimer, u64 interval, bool periodic)

-- 


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [patch 4/8] KVM: x86: replace hrtimer based timer emulation
  2009-07-06  1:55 [patch 0/8] RFC: in-kernel timer emulation changes Marcelo Tosatti
                   ` (2 preceding siblings ...)
  2009-07-06  1:55 ` [patch 3/8] KVM: x86: per-vcpu timer list Marcelo Tosatti
@ 2009-07-06  1:55 ` Marcelo Tosatti
  2009-07-08 12:58   ` Gleb Natapov
  2009-07-08 13:41   ` Avi Kivity
  2009-07-06  1:55 ` [patch 5/8] KVM: timer: honor noreinject tunable Marcelo Tosatti
                   ` (4 subsequent siblings)
  8 siblings, 2 replies; 23+ messages in thread
From: Marcelo Tosatti @ 2009-07-06  1:55 UTC (permalink / raw)
  To: kvm; +Cc: Marcelo Tosatti

[-- Attachment #1: kvm-timer-rework --]
[-- Type: text/plain, Size: 20897 bytes --]

Replace hrtimer based timer emulation with host timebase (ktime_t)
comparisons on guest entry.

This avoids host load when guests are scheduled out, removes a
spinlock acquision on entry (i8254.c's inject_lock), and makes future
improvements easier.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Index: kvm-new/arch/x86/kvm/x86.c
===================================================================
--- kvm-new.orig/arch/x86/kvm/x86.c
+++ kvm-new/arch/x86/kvm/x86.c
@@ -3461,8 +3461,6 @@ static int vcpu_enter_guest(struct kvm_v
 		goto out;
 
 	if (vcpu->requests) {
-		if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests))
-			__kvm_migrate_timers(vcpu);
 		if (test_and_clear_bit(KVM_REQ_KVMCLOCK_UPDATE, &vcpu->requests))
 			kvm_write_guest_time(vcpu);
 		if (test_and_clear_bit(KVM_REQ_MMU_SYNC, &vcpu->requests))
@@ -3482,6 +3480,9 @@ static int vcpu_enter_guest(struct kvm_v
 		}
 	}
 
+
+	kvm_inject_pending_timer_irqs(vcpu);
+
 	preempt_disable();
 
 	kvm_x86_ops->prepare_guest_switch(vcpu);
@@ -3499,6 +3500,8 @@ static int vcpu_enter_guest(struct kvm_v
 		goto out;
 	}
 
+	kvm_vcpu_arm_exit(vcpu);
+
 	if (vcpu->arch.exception.pending)
 		__queue_exception(vcpu);
 	else
@@ -3564,6 +3567,8 @@ static int vcpu_enter_guest(struct kvm_v
 
 	preempt_enable();
 
+	kvm_vcpu_cleanup_timer(vcpu);
+
 	down_read(&vcpu->kvm->slots_lock);
 
 	/*
@@ -3627,10 +3632,6 @@ static int __vcpu_run(struct kvm_vcpu *v
 		if (r <= 0)
 			break;
 
-		clear_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests);
-		if (kvm_cpu_has_pending_timer(vcpu))
-			kvm_inject_pending_timer_irqs(vcpu);
-
 		if (dm_request_for_irq_injection(vcpu, kvm_run)) {
 			r = -EINTR;
 			kvm_run->exit_reason = KVM_EXIT_INTR;
@@ -4579,6 +4580,8 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu 
 	if (r < 0)
 		goto free_vcpu;
 
+	kvm_vcpu_init_armed_exit(vcpu);
+
 	if (kvm->arch.vpit && kvm_vcpu_is_bsp(vcpu))
 		kvm_timer_vcpu_bind(&kvm->arch.vpit->pit_state.pit_timer, vcpu);
 
Index: kvm-new/virt/kvm/kvm_main.c
===================================================================
--- kvm-new.orig/virt/kvm/kvm_main.c
+++ kvm-new/virt/kvm/kvm_main.c
@@ -1656,11 +1656,19 @@ void mark_page_dirty(struct kvm *kvm, gf
 	}
 }
 
+#ifndef KVM_ARCH_HAVE_TIMER_EVENT
+ktime_t kvm_vcpu_next_timer_event(struct kvm_vcpu *vcpu)
+{
+	return (ktime_t) { .tv64 = KTIME_MAX };
+}
+#endif
+
 /*
  * The vCPU has executed a HLT instruction with in-kernel mode enabled.
  */
 void kvm_vcpu_block(struct kvm_vcpu *vcpu)
 {
+	ktime_t expires;
 	DEFINE_WAIT(wait);
 
 	for (;;) {
@@ -1677,8 +1685,9 @@ void kvm_vcpu_block(struct kvm_vcpu *vcp
 		if (signal_pending(current))
 			break;
 
+		expires = kvm_vcpu_next_timer_event(vcpu);
 		vcpu_put(vcpu);
-		schedule();
+		schedule_hrtimeout(&expires, HRTIMER_MODE_ABS);
 		vcpu_load(vcpu);
 	}
 
Index: kvm-new/arch/x86/kvm/i8254.c
===================================================================
--- kvm-new.orig/arch/x86/kvm/i8254.c
+++ kvm-new/arch/x86/kvm/i8254.c
@@ -224,15 +224,6 @@ static void pit_latch_status(struct kvm 
 	}
 }
 
-int pit_has_pending_timer(struct kvm_vcpu *vcpu)
-{
-	struct kvm_pit *pit = vcpu->kvm->arch.vpit;
-
-	if (pit && kvm_vcpu_is_bsp(vcpu) && pit->pit_state.irq_ack)
-		return kvm_timer_has_pending(&pit->pit_state.pit_timer);
-	return 0;
-}
-
 static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian)
 {
 	struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state,
@@ -548,6 +539,36 @@ static const struct kvm_io_device_ops sp
 	.write    = speaker_ioport_write,
 };
 
+static void pit_inject(struct kvm_timer *ktimer)
+{
+	int i;
+	struct kvm_vcpu *vcpu;
+	struct kvm *kvm = ktimer->kvm;
+
+	mutex_lock(&kvm->irq_lock);
+	kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1);
+	kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0);
+	mutex_unlock(&kvm->irq_lock);
+
+	/*
+	 * Provides NMI watchdog support via Virtual Wire mode.
+	 * The route is: PIT -> PIC -> LVT0 in NMI mode.
+	 *
+	 * Note: Our Virtual Wire implementation is simplified, only
+	 * propagating PIT interrupts to all VCPUs when they have set
+	 * LVT0 to NMI delivery. Other PIC interrupts are just sent to
+	 * VCPU0, and only if its LVT0 is in EXTINT mode.
+	 */
+	if (kvm->arch.vapics_in_nmi_mode > 0)
+		kvm_for_each_vcpu(i, vcpu, kvm)
+			kvm_apic_nmi_wd_deliver(vcpu);
+}
+
+struct kvm_timer_ops kpit_ops = {
+	.inject = pit_inject,
+	.name = "pit",
+};
+
 /* Caller must have writers lock on slots_lock */
 struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
 {
@@ -573,7 +594,7 @@ struct kvm_pit *kvm_create_pit(struct kv
 
 	pit_state = &pit->pit_state;
 	pit_state->pit = pit;
-	kvm_timer_init(kvm, &pit_state->pit_timer);
+	kvm_timer_init(kvm, &pit_state->pit_timer, &kpit_ops);
 
 	pit_state->irq_ack_notifier.gsi = 0;
 	pit_state->irq_ack_notifier.irq_acked = kvm_pit_ack_irq;
@@ -610,50 +631,3 @@ void kvm_free_pit(struct kvm *kvm)
 	}
 }
 
-static void __inject_pit_timer_intr(struct kvm *kvm)
-{
-	struct kvm_vcpu *vcpu;
-	int i;
-
-	mutex_lock(&kvm->irq_lock);
-	kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1);
-	kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0);
-	mutex_unlock(&kvm->irq_lock);
-
-	/*
-	 * Provides NMI watchdog support via Virtual Wire mode.
-	 * The route is: PIT -> PIC -> LVT0 in NMI mode.
-	 *
-	 * Note: Our Virtual Wire implementation is simplified, only
-	 * propagating PIT interrupts to all VCPUs when they have set
-	 * LVT0 to NMI delivery. Other PIC interrupts are just sent to
-	 * VCPU0, and only if its LVT0 is in EXTINT mode.
-	 */
-	if (kvm->arch.vapics_in_nmi_mode > 0)
-		kvm_for_each_vcpu(i, vcpu, kvm)
-			kvm_apic_nmi_wd_deliver(vcpu);
-}
-
-void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu)
-{
-	struct kvm_pit *pit = vcpu->kvm->arch.vpit;
-	struct kvm *kvm = vcpu->kvm;
-	struct kvm_kpit_state *ps;
-
-	if (vcpu && pit) {
-		int inject = 0;
-		ps = &pit->pit_state;
-
-		/* Try to inject pending interrupts when
-		 * last one has been acked.
-		 */
-		spin_lock(&ps->inject_lock);
-		if (kvm_timer_has_pending(&ps->pit_timer) && ps->irq_ack) {
-			ps->irq_ack = 0;
-			inject = 1;
-		}
-		spin_unlock(&ps->inject_lock);
-		if (inject)
-			__inject_pit_timer_intr(kvm);
-	}
-}
Index: kvm-new/arch/x86/kvm/lapic.c
===================================================================
--- kvm-new.orig/arch/x86/kvm/lapic.c
+++ kvm-new/arch/x86/kvm/lapic.c
@@ -875,16 +875,6 @@ int kvm_lapic_enabled(struct kvm_vcpu *v
  *----------------------------------------------------------------------
  */
 
-int apic_has_pending_timer(struct kvm_vcpu *vcpu)
-{
-	struct kvm_lapic *lapic = vcpu->arch.apic;
-
-	if (lapic && apic_enabled(lapic) && apic_lvt_enabled(lapic, APIC_LVTT))
-		return kvm_timer_has_pending(&lapic->lapic_timer);
-
-	return 0;
-}
-
 static int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type)
 {
 	u32 reg = apic_get_reg(apic, lvt_type);
@@ -912,6 +902,20 @@ static const struct kvm_io_device_ops ap
 	.write    = apic_mmio_write,
 };
 
+void inject_lapic_timer(struct kvm_timer *ktimer)
+{
+	struct kvm_vcpu *vcpu = ktimer->vcpu;
+	struct kvm_lapic *apic = vcpu->arch.apic;
+
+	if (apic)
+		kvm_apic_local_deliver(apic, APIC_LVTT);
+}
+
+struct kvm_timer_ops lapic_timer_ops = {
+	.inject = inject_lapic_timer,
+	.name = "lapic",
+};
+
 int kvm_create_lapic(struct kvm_vcpu *vcpu)
 {
 	struct kvm_lapic *apic;
@@ -935,7 +939,7 @@ int kvm_create_lapic(struct kvm_vcpu *vc
 	memset(apic->regs, 0, PAGE_SIZE);
 	apic->vcpu = vcpu;
 
-	kvm_timer_init(vcpu->kvm, &apic->lapic_timer);
+	kvm_timer_init(vcpu->kvm, &apic->lapic_timer, &lapic_timer_ops);
 	kvm_timer_vcpu_bind(&apic->lapic_timer, vcpu);
 
 	apic->base_address = APIC_DEFAULT_PHYS_BASE;
@@ -982,14 +986,6 @@ int kvm_apic_accept_pic_intr(struct kvm_
 	return r;
 }
 
-void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
-{
-	struct kvm_lapic *apic = vcpu->arch.apic;
-
-	if (apic && kvm_timer_has_pending(&apic->lapic_timer))
-		kvm_apic_local_deliver(apic, APIC_LVTT);
-}
-
 int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
 {
 	int vector = kvm_apic_has_interrupt(vcpu);
Index: kvm-new/arch/x86/include/asm/kvm_host.h
===================================================================
--- kvm-new.orig/arch/x86/include/asm/kvm_host.h
+++ kvm-new/arch/x86/include/asm/kvm_host.h
@@ -377,6 +377,7 @@ struct kvm_vcpu_arch {
 	u64 *mce_banks;
 
 	struct list_head timers;
+	struct hrtimer exit_timer;
 };
 
 struct kvm_mem_alias {
@@ -800,4 +801,7 @@ int kvm_unmap_hva(struct kvm *kvm, unsig
 int kvm_age_hva(struct kvm *kvm, unsigned long hva);
 int cpuid_maxphyaddr(struct kvm_vcpu *vcpu);
 
+#define KVM_ARCH_HAVE_TIMER_EVENT
+ktime_t kvm_vcpu_next_timer_event(struct kvm_vcpu *vcpu);
+
 #endif /* _ASM_X86_KVM_HOST_H */
Index: kvm-new/arch/x86/kvm/irq.c
===================================================================
--- kvm-new.orig/arch/x86/kvm/irq.c
+++ kvm-new/arch/x86/kvm/irq.c
@@ -26,18 +26,19 @@
 #include "i8254.h"
 #include "x86.h"
 
-/*
- * check if there are pending timer events
- * to be processed.
- */
 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 {
-	int ret;
+	ktime_t now, expires;
 
-	ret = pit_has_pending_timer(vcpu);
-	ret |= apic_has_pending_timer(vcpu);
+	expires = kvm_vcpu_next_timer_event(vcpu);
+	now = ktime_get();
+	if (expires.tv64 <= now.tv64) {
+		if (kvm_arch_interrupt_allowed(vcpu))
+			set_bit(KVM_REQ_UNHALT, &vcpu->requests);
+		return 1;
+	}
 
-	return ret;
+	return 0;
 }
 EXPORT_SYMBOL(kvm_cpu_has_pending_timer);
 
@@ -86,36 +87,3 @@ int kvm_cpu_get_interrupt(struct kvm_vcp
 }
 EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt);
 
-void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu)
-{
-	kvm_inject_apic_timer_irqs(vcpu);
-	kvm_inject_pit_timer_irqs(vcpu);
-	/* TODO: PIT, RTC etc. */
-}
-EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs);
-
-static void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
-{
-	struct kvm_lapic *apic = vcpu->arch.apic;
-
-	if (!apic)
-		return;
-
-	kvm_migrate_timer(&apic->lapic_timer);
-}
-
-static void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
-{
-	struct kvm_pit *pit = vcpu->kvm->arch.vpit;
-
-	if (!kvm_vcpu_is_bsp(vcpu) || !pit)
-		return;
-
-	kvm_migrate_timer(&pit->pit_state.pit_timer);
-}
-
-void __kvm_migrate_timers(struct kvm_vcpu *vcpu)
-{
-	__kvm_migrate_apic_timer(vcpu);
-	__kvm_migrate_pit_timer(vcpu);
-}
Index: kvm-new/arch/x86/kvm/svm.c
===================================================================
--- kvm-new.orig/arch/x86/kvm/svm.c
+++ kvm-new/arch/x86/kvm/svm.c
@@ -738,7 +738,6 @@ static void svm_vcpu_load(struct kvm_vcp
 		delta = vcpu->arch.host_tsc - tsc_this;
 		svm->vmcb->control.tsc_offset += delta;
 		vcpu->cpu = cpu;
-		kvm_migrate_timers(vcpu);
 	}
 
 	for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
Index: kvm-new/arch/x86/kvm/vmx.c
===================================================================
--- kvm-new.orig/arch/x86/kvm/vmx.c
+++ kvm-new/arch/x86/kvm/vmx.c
@@ -703,7 +703,6 @@ static void vmx_vcpu_load(struct kvm_vcp
 
 	if (vcpu->cpu != cpu) {
 		vcpu_clear(vmx);
-		kvm_migrate_timers(vcpu);
 		vpid_sync_vcpu_all(vmx);
 		local_irq_disable();
 		list_add(&vmx->local_vcpus_link,
Index: kvm-new/arch/x86/kvm/kvm_timer.h
===================================================================
--- kvm-new.orig/arch/x86/kvm/kvm_timer.h
+++ kvm-new/arch/x86/kvm/kvm_timer.h
@@ -1,26 +1,41 @@
+struct kvm_timer_ops;
 
 struct kvm_timer {
-	struct hrtimer timer;
-	s64 period; 			/* unit: ns */
-	atomic_t pending;		/* accumulated triggered timers */
+	ktime_t count_load_time;
+	ktime_t inject_time;
+	u64 period; 				/* unit: ns */
+	u64 acked_events;
+
+	bool can_inject;
 	bool reinject;
 	bool periodic;
+
 	struct kvm *kvm;
 	struct kvm_vcpu *vcpu;
 	struct list_head vcpu_timer;
+	struct kvm_timer_ops *ops;
 };
 
-void kvm_timer_init(struct kvm *kvm, struct kvm_timer *ktimer);
+struct kvm_timer_ops {
+	void (*inject)(struct kvm_timer *);
+	char *name;
+};
+
+void kvm_timer_init(struct kvm *kvm, struct kvm_timer *ktimer,
+		    struct kvm_timer_ops *ops);
+void kvm_timer_vcpu_bind(struct kvm_timer *ktimer, struct kvm_vcpu *vcpu);
 void kvm_timer_start(struct kvm_timer *ktimer, u64 interval, bool periodic);
 void kvm_timer_cancel(struct kvm_timer *ktimer);
-void kvm_timer_vcpu_bind(struct kvm_timer *ktimer, struct kvm_vcpu *vcpu);
-
 int kvm_timer_has_pending(struct kvm_timer *ktimer);
 void kvm_timer_ack(struct kvm_timer *ktimer);
 void kvm_timer_reset(struct kvm_timer *ktimer);
 
 void kvm_migrate_timer(struct kvm_timer *ktimer);
 
+void kvm_vcpu_init_armed_exit(struct kvm_vcpu *vcpu);
 
-ktime_t kvm_timer_remaining(struct kvm_timer *ktimer);
+void kvm_vcpu_arm_exit(struct kvm_vcpu *vcpu);
+void kvm_vcpu_cleanup_timer(struct kvm_vcpu *vcpu);
 
+ktime_t kvm_timer_next_event(struct kvm_timer *ktimer);
+ktime_t kvm_timer_remaining(struct kvm_timer *ktimer);
Index: kvm-new/arch/x86/kvm/timer.c
===================================================================
--- kvm-new.orig/arch/x86/kvm/timer.c
+++ kvm-new/arch/x86/kvm/timer.c
@@ -1,107 +1,176 @@
+/*
+ *
+ * Copyright (C) 2009 Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
 #include <linux/kvm_host.h>
 #include <linux/kvm.h>
 #include <linux/hrtimer.h>
 #include <asm/atomic.h>
 #include "kvm_timer.h"
 
-static int __kvm_timer_fn(struct kvm_vcpu *vcpu, struct kvm_timer *ktimer)
-{
-	int restart_timer = 0;
-	wait_queue_head_t *q = &vcpu->wq;
-
-	/*
-	 * There is a race window between reading and incrementing, but we do
-	 * not care about potentially loosing timer events in the !reinject
-	 * case anyway.
-	 */
-	if (ktimer->reinject || !atomic_read(&ktimer->pending)) {
-		atomic_inc(&ktimer->pending);
-		/* FIXME: this code should not know anything about vcpus */
-		set_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests);
-	}
-
-	if (waitqueue_active(q))
-		wake_up_interruptible(q);
-
-	if (ktimer->periodic) {
-		hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
-		restart_timer = 1;
-	}
-
-	return restart_timer;
-}
-
-static enum hrtimer_restart kvm_timer_fn(struct hrtimer *data)
-{
-	int restart_timer;
-	struct kvm_vcpu *vcpu;
-	struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
-
-	vcpu = ktimer->vcpu;
-	if (!vcpu)
-		return HRTIMER_NORESTART;
-
-	restart_timer = __kvm_timer_fn(vcpu, ktimer);
-	if (restart_timer)
-		return HRTIMER_RESTART;
-	else
-		return HRTIMER_NORESTART;
-}
 
-void kvm_timer_init(struct kvm *kvm, struct kvm_timer *ktimer)
+void kvm_timer_init(struct kvm *kvm, struct kvm_timer *ktimer,
+		    struct kvm_timer_ops *ops)
 {
 	ktimer->kvm = kvm;
-	hrtimer_init(&ktimer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
-	ktimer->timer.function = kvm_timer_fn;
 	INIT_LIST_HEAD(&ktimer->vcpu_timer);
+	ktimer->ops = ops;
+	ktimer->can_inject = false;
 }
 
 void kvm_timer_vcpu_bind(struct kvm_timer *ktimer, struct kvm_vcpu *vcpu)
 {
 	ktimer->vcpu = vcpu;
-	list_add(&ktimer->vcpu_timer, &vcpu->arch.timers);
 }
 
 void kvm_timer_start(struct kvm_timer *ktimer, u64 interval, bool periodic)
 {
-	hrtimer_cancel(&ktimer->timer);
-	atomic_set(&ktimer->pending, 0);
 	ktimer->periodic = periodic;
 	ktimer->period = interval;
-	hrtimer_start(&ktimer->timer, ktime_add_ns(ktime_get(), interval),
-			HRTIMER_MODE_ABS);
+	ktimer->count_load_time = ktime_get();
+	ktimer->acked_events = 0;
+	ktimer->can_inject = true;
+
+	WARN_ON(interval == 0);
+
+	list_add(&ktimer->vcpu_timer, &ktimer->vcpu->arch.timers);
 }
 
 void kvm_timer_cancel(struct kvm_timer *ktimer)
 {
-	hrtimer_cancel(&ktimer->timer);
-	atomic_set(&ktimer->pending, 0);
+	if (!list_empty(&ktimer->vcpu_timer))
+		list_del_init(&ktimer->vcpu_timer);
 }
 
-int kvm_timer_has_pending(struct kvm_timer *ktimer)
+void kvm_timer_reset(struct kvm_timer *ktimer)
 {
-	return atomic_read(&ktimer->pending);
+	ktimer->can_inject = true;
 }
 
 void kvm_timer_ack(struct kvm_timer *ktimer)
 {
-	if (atomic_dec_return(&ktimer->pending) < 0)
-		atomic_inc(&ktimer->pending);
+	ktimer->acked_events++;
+	ktimer->can_inject = true;
 }
 
-void kvm_timer_reset(struct kvm_timer *ktimer)
+static ktime_t periodic_timer_next_event(struct kvm_timer *ktimer)
 {
-	atomic_set(&ktimer->pending, 0);
+	ktime_t last_acked_event;
+
+	last_acked_event = ktime_add_ns(ktimer->count_load_time,
+					ktimer->acked_events * ktimer->period);
+
+	return ktime_add_ns(last_acked_event, ktimer->period);
 }
 
-void kvm_migrate_timer(struct kvm_timer *ktimer)
+ktime_t kvm_timer_next_event(struct kvm_timer *ktimer)
 {
-	if (hrtimer_cancel(&ktimer->timer))
-		hrtimer_start_expires(&ktimer->timer, HRTIMER_MODE_ABS);
+	if (!ktimer->periodic)
+		return ktime_add_ns(ktimer->count_load_time, ktimer->period);
+	else
+		return periodic_timer_next_event(ktimer);
 }
 
 ktime_t kvm_timer_remaining(struct kvm_timer *ktimer)
 {
-	return hrtimer_expires_remaining(&ktimer->timer);
+	ktime_t now = ktime_get();
+
+	return ktime_sub(kvm_timer_next_event(ktimer), now);
 }
 
+struct kvm_timer *kvm_vcpu_injectable_timer_event(struct kvm_vcpu *vcpu)
+{
+	struct kvm_timer *ktimer, *ktimer_expire = NULL;
+	ktime_t expires = { .tv64 = KTIME_MAX };
+
+	list_for_each_entry(ktimer, &vcpu->arch.timers, vcpu_timer) {
+		ktime_t this_expires = { .tv64 = KTIME_MAX };
+
+		if (ktimer->can_inject)
+			this_expires = kvm_timer_next_event(ktimer);
+
+		if (this_expires.tv64 < expires.tv64) {
+			expires = this_expires;
+			ktimer_expire = ktimer;
+		}
+	}
+
+	return ktimer_expire;
+}
+
+/*
+ * when the next vcpu timer expires, in host timebase.
+ */
+ktime_t kvm_vcpu_next_timer_event(struct kvm_vcpu *vcpu)
+{
+	ktime_t expires = { .tv64 = KTIME_MAX };
+	struct kvm_timer *ktimer = kvm_vcpu_injectable_timer_event(vcpu);
+
+	if (!ktimer)
+		return expires;
+
+	return kvm_timer_next_event(ktimer);
+}
+
+void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu)
+{
+	struct kvm_timer *ktimer, *n;
+	ktime_t now = ktime_get();
+
+	list_for_each_entry_safe(ktimer, n, &vcpu->arch.timers, vcpu_timer) {
+		ktime_t expire;
+
+		if (!ktimer->can_inject)
+			continue;
+
+		expire = kvm_timer_next_event(ktimer);
+		if (ktime_to_ns(now) < ktime_to_ns(expire))
+			continue;
+
+		ktimer->can_inject = false;
+		ktimer->ops->inject(ktimer);
+		if (!ktimer->periodic)
+			list_del_init(&ktimer->vcpu_timer);
+	}
+}
+
+/* arm/disarm exit */
+
+static enum hrtimer_restart kvm_timer_fn(struct hrtimer *data)
+{
+	return HRTIMER_NORESTART;
+}
+
+void kvm_vcpu_init_armed_exit(struct kvm_vcpu *vcpu)
+{
+	hrtimer_init(&vcpu->arch.exit_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+	vcpu->arch.exit_timer.function = kvm_timer_fn;
+}
+
+void kvm_vcpu_arm_exit(struct kvm_vcpu *vcpu)
+{
+	ktime_t expire;
+	ktime_t now;
+	struct kvm_timer *ktimer = kvm_vcpu_injectable_timer_event(vcpu);
+
+	if (!ktimer)
+		return;
+
+	now = ktime_get();
+	expire = kvm_timer_next_event(ktimer);
+
+	if (expire.tv64 != KTIME_MAX)
+		hrtimer_start(&vcpu->arch.exit_timer, expire, HRTIMER_MODE_ABS);
+}
+
+void kvm_vcpu_cleanup_timer(struct kvm_vcpu *vcpu)
+{
+	hrtimer_cancel(&vcpu->arch.exit_timer);
+}
+
+
Index: kvm-new/arch/x86/kvm/irq.h
===================================================================
--- kvm-new.orig/arch/x86/kvm/irq.h
+++ kvm-new/arch/x86/kvm/irq.h
@@ -94,9 +94,5 @@ void kvm_pic_reset(struct kvm_kpic_state
 void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
 void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
 void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu);
-void __kvm_migrate_timers(struct kvm_vcpu *vcpu);
-
-int pit_has_pending_timer(struct kvm_vcpu *vcpu);
-int apic_has_pending_timer(struct kvm_vcpu *vcpu);
 
 #endif
Index: kvm-new/include/linux/kvm_host.h
===================================================================
--- kvm-new.orig/include/linux/kvm_host.h
+++ kvm-new/include/linux/kvm_host.h
@@ -30,15 +30,14 @@
  * vcpu->requests bit members
  */
 #define KVM_REQ_TLB_FLUSH          0
-#define KVM_REQ_MIGRATE_TIMER      1
-#define KVM_REQ_REPORT_TPR_ACCESS  2
-#define KVM_REQ_MMU_RELOAD         3
-#define KVM_REQ_TRIPLE_FAULT       4
-#define KVM_REQ_PENDING_TIMER      5
-#define KVM_REQ_UNHALT             6
-#define KVM_REQ_MMU_SYNC           7
-#define KVM_REQ_KVMCLOCK_UPDATE    8
-#define KVM_REQ_KICK               9
+#define KVM_REQ_REPORT_TPR_ACCESS  1
+#define KVM_REQ_MMU_RELOAD         2
+#define KVM_REQ_TRIPLE_FAULT       3
+#define KVM_REQ_PENDING_TIMER      4
+#define KVM_REQ_UNHALT             5
+#define KVM_REQ_MMU_SYNC           6
+#define KVM_REQ_KVMCLOCK_UPDATE    7
+#define KVM_REQ_KICK               8
 
 #define KVM_USERSPACE_IRQ_SOURCE_ID	0
 
@@ -469,11 +468,6 @@ static inline hpa_t pfn_to_hpa(pfn_t pfn
 	return (hpa_t)pfn << PAGE_SHIFT;
 }
 
-static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu)
-{
-	set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests);
-}
-
 enum kvm_stat_kind {
 	KVM_STAT_VM,
 	KVM_STAT_VCPU,

-- 


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [patch 5/8] KVM: timer: honor noreinject tunable
  2009-07-06  1:55 [patch 0/8] RFC: in-kernel timer emulation changes Marcelo Tosatti
                   ` (3 preceding siblings ...)
  2009-07-06  1:55 ` [patch 4/8] KVM: x86: replace hrtimer based timer emulation Marcelo Tosatti
@ 2009-07-06  1:55 ` Marcelo Tosatti
  2009-07-06  1:55 ` [patch 6/8] KVM: timer: optimize next_timer_event and vcpu_arm_exit Marcelo Tosatti
                   ` (3 subsequent siblings)
  8 siblings, 0 replies; 23+ messages in thread
From: Marcelo Tosatti @ 2009-07-06  1:55 UTC (permalink / raw)
  To: kvm; +Cc: Marcelo Tosatti

[-- Attachment #1: handle-no-reinject --]
[-- Type: text/plain, Size: 1100 bytes --]

Skip events which have been missed by the guest.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Index: kvm-new/arch/x86/kvm/timer.c
===================================================================
--- kvm-new.orig/arch/x86/kvm/timer.c
+++ kvm-new/arch/x86/kvm/timer.c
@@ -58,6 +58,21 @@ void kvm_timer_ack(struct kvm_timer *kti
 	ktimer->can_inject = true;
 }
 
+static void no_reinject_adjust(struct kvm_timer *ktimer, ktime_t expire)
+{
+	u64 unacked_events, delta;
+	ktime_t now;
+
+	now = ktime_get();
+	delta = ktime_to_ns(ktime_sub(now, expire));
+	unacked_events = 0;
+
+	if (now.tv64 > expire.tv64)
+		unacked_events = div_u64(delta, ktimer->period);
+	if (unacked_events > 1)
+		ktimer->acked_events += unacked_events-1;
+}
+
 static ktime_t periodic_timer_next_event(struct kvm_timer *ktimer)
 {
 	ktime_t last_acked_event;
@@ -136,6 +151,8 @@ void kvm_inject_pending_timer_irqs(struc
 		ktimer->ops->inject(ktimer);
 		if (!ktimer->periodic)
 			list_del_init(&ktimer->vcpu_timer);
+		else if (unlikely(!ktimer->reinject))
+			no_reinject_adjust(ktimer, expire);
 	}
 }
 

-- 


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [patch 6/8] KVM: timer: optimize next_timer_event and vcpu_arm_exit
  2009-07-06  1:55 [patch 0/8] RFC: in-kernel timer emulation changes Marcelo Tosatti
                   ` (4 preceding siblings ...)
  2009-07-06  1:55 ` [patch 5/8] KVM: timer: honor noreinject tunable Marcelo Tosatti
@ 2009-07-06  1:55 ` Marcelo Tosatti
  2009-07-06  1:55 ` [patch 7/8] KVM: PIT: removed unused code Marcelo Tosatti
                   ` (2 subsequent siblings)
  8 siblings, 0 replies; 23+ messages in thread
From: Marcelo Tosatti @ 2009-07-06  1:55 UTC (permalink / raw)
  To: kvm; +Cc: Marcelo Tosatti

[-- Attachment #1: kvm-armexit-optimize --]
[-- Type: text/plain, Size: 1445 bytes --]

Which reduces the added entry/exit overhead down to ~= 30 cycles.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Index: kvm-new/arch/x86/kvm/timer.c
===================================================================
--- kvm-new.orig/arch/x86/kvm/timer.c
+++ kvm-new/arch/x86/kvm/timer.c
@@ -135,14 +135,15 @@ ktime_t kvm_vcpu_next_timer_event(struct
 void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu)
 {
 	struct kvm_timer *ktimer, *n;
-	ktime_t now = ktime_get();
 
 	list_for_each_entry_safe(ktimer, n, &vcpu->arch.timers, vcpu_timer) {
-		ktime_t expire;
+		ktime_t expire, now;
 
 		if (!ktimer->can_inject)
 			continue;
 
+		now = ktime_get();
+
 		expire = kvm_timer_next_event(ktimer);
 		if (ktime_to_ns(now) < ktime_to_ns(expire))
 			continue;
@@ -173,8 +174,12 @@ void kvm_vcpu_arm_exit(struct kvm_vcpu *
 {
 	ktime_t expire;
 	ktime_t now;
-	struct kvm_timer *ktimer = kvm_vcpu_injectable_timer_event(vcpu);
+	struct kvm_timer *ktimer;
+
+	if (hrtimer_active(&vcpu->arch.exit_timer))
+		return;
 
+	ktimer = kvm_vcpu_injectable_timer_event(vcpu);
 	if (!ktimer)
 		return;
 
Index: kvm-new/arch/x86/kvm/x86.c
===================================================================
--- kvm-new.orig/arch/x86/kvm/x86.c
+++ kvm-new/arch/x86/kvm/x86.c
@@ -3567,8 +3567,6 @@ static int vcpu_enter_guest(struct kvm_v
 
 	preempt_enable();
 
-	kvm_vcpu_cleanup_timer(vcpu);
-
 	down_read(&vcpu->kvm->slots_lock);
 
 	/*

-- 


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [patch 7/8] KVM: PIT: removed unused code
  2009-07-06  1:55 [patch 0/8] RFC: in-kernel timer emulation changes Marcelo Tosatti
                   ` (5 preceding siblings ...)
  2009-07-06  1:55 ` [patch 6/8] KVM: timer: optimize next_timer_event and vcpu_arm_exit Marcelo Tosatti
@ 2009-07-06  1:55 ` Marcelo Tosatti
  2009-07-06  1:55 ` [patch 8/8] kvmctl: time testcase Marcelo Tosatti
  2009-07-08 13:43 ` [patch 0/8] RFC: in-kernel timer emulation changes Avi Kivity
  8 siblings, 0 replies; 23+ messages in thread
From: Marcelo Tosatti @ 2009-07-06  1:55 UTC (permalink / raw)
  To: kvm; +Cc: Marcelo Tosatti

[-- Attachment #1: kvm-cleanup-pit --]
[-- Type: text/plain, Size: 1920 bytes --]

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Index: kvm-new/arch/x86/kvm/i8254.c
===================================================================
--- kvm-new.orig/arch/x86/kvm/i8254.c
+++ kvm-new/arch/x86/kvm/i8254.c
@@ -228,10 +228,7 @@ static void kvm_pit_ack_irq(struct kvm_i
 {
 	struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state,
 						 irq_ack_notifier);
-	spin_lock(&ps->inject_lock);
 	kvm_timer_ack(&ps->pit_timer);
-	ps->irq_ack = 1;
-	spin_unlock(&ps->inject_lock);
 }
 
 static void destroy_pit_timer(struct kvm_timer *pt)
@@ -252,7 +249,6 @@ static void create_pit_timer(struct kvm_
 	/* TODO The new value only affected after the retriggered */
 	kvm_timer_cancel(pt);
 
-	ps->irq_ack = 1;
 	kvm_timer_start(pt, interval, is_period);
 }
 
@@ -516,17 +512,14 @@ static void kvm_pit_reset(struct kvm_pit
 	mutex_unlock(&pit->pit_state.lock);
 
 	kvm_timer_reset(&pit->pit_state.pit_timer);
-	pit->pit_state.irq_ack = 1;
 }
 
 static void pit_mask_notifer(struct kvm_irq_mask_notifier *kimn, bool mask)
 {
 	struct kvm_pit *pit = container_of(kimn, struct kvm_pit, mask_notifier);
 
-	if (!mask) {
+	if (!mask)
 		kvm_timer_reset(&pit->pit_state.pit_timer);
-		pit->pit_state.irq_ack = 1;
-	}
 }
 
 static const struct kvm_io_device_ops pit_dev_ops = {
@@ -587,7 +580,6 @@ struct kvm_pit *kvm_create_pit(struct kv
 
 	mutex_init(&pit->pit_state.lock);
 	mutex_lock(&pit->pit_state.lock);
-	spin_lock_init(&pit->pit_state.inject_lock);
 
 	kvm->arch.vpit = pit;
 	pit->kvm = kvm;
Index: kvm-new/arch/x86/kvm/i8254.h
===================================================================
--- kvm-new.orig/arch/x86/kvm/i8254.h
+++ kvm-new/arch/x86/kvm/i8254.h
@@ -25,8 +25,6 @@ struct kvm_kpit_state {
 	u32    speaker_data_on;
 	struct mutex lock;
 	struct kvm_pit *pit;
-	spinlock_t inject_lock;
-	unsigned long irq_ack;
 	struct kvm_irq_ack_notifier irq_ack_notifier;
 };
 

-- 


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [patch 8/8] kvmctl: time testcase
  2009-07-06  1:55 [patch 0/8] RFC: in-kernel timer emulation changes Marcelo Tosatti
                   ` (6 preceding siblings ...)
  2009-07-06  1:55 ` [patch 7/8] KVM: PIT: removed unused code Marcelo Tosatti
@ 2009-07-06  1:55 ` Marcelo Tosatti
  2009-07-08 13:43 ` [patch 0/8] RFC: in-kernel timer emulation changes Avi Kivity
  8 siblings, 0 replies; 23+ messages in thread
From: Marcelo Tosatti @ 2009-07-06  1:55 UTC (permalink / raw)
  To: kvm; +Cc: Marcelo Tosatti

[-- Attachment #1: timer-testcase --]
[-- Type: text/plain, Size: 19119 bytes --]

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Index: qemu-kvm/kvm/user/config-x86-common.mak
===================================================================
--- qemu-kvm.orig/kvm/user/config-x86-common.mak
+++ qemu-kvm/kvm/user/config-x86-common.mak
@@ -56,6 +56,9 @@ $(TEST_DIR)/tsc.flat: $(cstart.o) $(TEST
 $(TEST_DIR)/apic.flat: $(cstart.o) $(TEST_DIR)/apic.o $(TEST_DIR)/vm.o \
 		       $(TEST_DIR)/print.o 
 
+$(TEST_DIR)/time.flat: $(cstart.o) $(TEST_DIR)/time.o $(TEST_DIR)/vm.o \
+		       $(TEST_DIR)/print.o
+
 $(TEST_DIR)/realmode.flat: $(TEST_DIR)/realmode.o
 	$(CC) -m32 -nostdlib -o $@ -Wl,-T,$(TEST_DIR)/realmode.lds $^
 
Index: qemu-kvm/kvm/user/config-x86_64.mak
===================================================================
--- qemu-kvm.orig/kvm/user/config-x86_64.mak
+++ qemu-kvm/kvm/user/config-x86_64.mak
@@ -7,6 +7,7 @@ CFLAGS += -D__x86_64__
 tests = $(TEST_DIR)/access.flat $(TEST_DIR)/irq.flat $(TEST_DIR)/sieve.flat \
       $(TEST_DIR)/simple.flat $(TEST_DIR)/stringio.flat \
       $(TEST_DIR)/memtest1.flat $(TEST_DIR)/emulator.flat \
-      $(TEST_DIR)/hypercall.flat $(TEST_DIR)/apic.flat
+      $(TEST_DIR)/hypercall.flat $(TEST_DIR)/apic.flat  \
+      $(TEST_DIR)/time.flat
 
 include config-x86-common.mak
Index: qemu-kvm/kvm/user/test/x86/io.h
===================================================================
--- /dev/null
+++ qemu-kvm/kvm/user/test/x86/io.h
@@ -0,0 +1,35 @@
+static inline void outb(unsigned char val, unsigned short port)
+{
+	asm volatile("outb %0, %w1": : "a"(val), "Nd" (port));
+}
+
+static inline void outw(unsigned short val, unsigned short port)
+{
+	asm volatile("outw %0, %w1": : "a"(val), "Nd" (port));
+}
+
+static inline void outl(unsigned long val, unsigned short port)
+{
+	asm volatile("outl %0, %w1": : "a"(val), "Nd" (port));
+}
+
+static inline unsigned char inb(unsigned short port)
+{
+	unsigned char val;
+	asm volatile("inb %w1, %0": "=a"(val) : "Nd" (port));
+	return val;
+}
+
+static inline short inw(unsigned short port)
+{
+	short val;
+	asm volatile("inw %w1, %0": "=a"(val) : "Nd" (port));
+	return val;
+}
+
+static inline unsigned int inl(unsigned short port)
+{
+	unsigned int val;
+	asm volatile("inl %w1, %0": "=a"(val) : "Nd" (port));
+	return val;
+}
Index: qemu-kvm/kvm/user/test/x86/time.c
===================================================================
--- /dev/null
+++ qemu-kvm/kvm/user/test/x86/time.c
@@ -0,0 +1,766 @@
+#include "libcflat.h"
+#include "apic.h"
+#include "vm.h"
+#include "io.h"
+
+#ifndef NULL
+#define NULL ((void*)0)
+#endif
+
+static void *g_apic;
+static void *g_ioapic;
+
+typedef unsigned char u8;
+typedef unsigned short u16;
+typedef unsigned u32;
+typedef unsigned long ulong;
+typedef unsigned long long u64;
+
+typedef u64 ns_t;
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+
+typedef struct {
+    unsigned short offset0;
+    unsigned short selector;
+    unsigned short ist : 3;
+    unsigned short : 5;
+    unsigned short type : 4;
+    unsigned short : 1;
+    unsigned short dpl : 2;
+    unsigned short p : 1;
+    unsigned short offset1;
+#ifdef __x86_64__
+    unsigned offset2;
+    unsigned reserved;
+#endif
+} idt_entry_t;
+
+typedef struct {
+    ulong rflags;
+    ulong cs;
+    ulong rip;
+    ulong func;
+    ulong regs[sizeof(ulong)*2];
+} isr_regs_t;
+
+#ifdef __x86_64__
+#  define R "r"
+#else
+#  define R "e"
+#endif
+
+extern char isr_entry_point[];
+
+asm (
+    "isr_entry_point: \n"
+#ifdef __x86_64__
+    "push %r15 \n\t"
+    "push %r14 \n\t"
+    "push %r13 \n\t"
+    "push %r12 \n\t"
+    "push %r11 \n\t"
+    "push %r10 \n\t"
+    "push %r9  \n\t"
+    "push %r8  \n\t"
+#endif
+    "push %"R "di \n\t"
+    "push %"R "si \n\t"
+    "push %"R "bp \n\t"
+    "push %"R "sp \n\t"
+    "push %"R "bx \n\t"
+    "push %"R "dx \n\t"
+    "push %"R "cx \n\t"
+    "push %"R "ax \n\t"
+#ifdef __x86_64__
+    "mov %rsp, %rdi \n\t"
+    "callq *8*16(%rsp) \n\t"
+#else
+    "push %esp \n\t"
+    "calll *4+4*8(%esp) \n\t"
+    "add $4, %esp \n\t"
+#endif
+    "pop %"R "ax \n\t"
+    "pop %"R "cx \n\t"
+    "pop %"R "dx \n\t"
+    "pop %"R "bx \n\t"
+    "pop %"R "bp \n\t"
+    "pop %"R "bp \n\t"
+    "pop %"R "si \n\t"
+    "pop %"R "di \n\t"
+#ifdef __x86_64__
+    "pop %r8  \n\t"
+    "pop %r9  \n\t"
+    "pop %r10 \n\t"
+    "pop %r11 \n\t"
+    "pop %r12 \n\t"
+    "pop %r13 \n\t"
+    "pop %r14 \n\t"
+    "pop %r15 \n\t"
+#endif
+#ifdef __x86_64__
+    "add $8, %rsp \n\t"
+    "iretq \n\t"
+#else
+    "add $4, %esp \n\t"
+    "iretl \n\t"
+#endif
+    );
+
+static idt_entry_t idt[256];
+
+static int g_fail;
+static int g_tests;
+
+static void report(const char *msg, int pass)
+{
+    ++g_tests;
+    printf("%s: %s\n", msg, (pass ? "PASS" : "FAIL"));
+    if (!pass)
+        ++g_fail;
+}
+
+static u32 apic_read(unsigned reg)
+{
+    return *(volatile u32 *)(g_apic + reg);
+}
+
+static void apic_write(unsigned reg, u32 val)
+{
+    *(volatile u32 *)(g_apic + reg) = val;
+}
+
+static void test_lapic_existence(void)
+{
+    u32 lvr;
+
+    lvr = apic_read(APIC_LVR);
+    printf("apic version: %x\n", lvr);
+    report("apic existence", (u16)lvr == 0x14);
+}
+
+static u16 read_cs(void)
+{
+    u16 v;
+
+    asm("mov %%cs, %0" : "=rm"(v));
+    return v;
+}
+
+static void init_idt(void)
+{
+    struct {
+        u16 limit;
+        ulong idt;
+    } __attribute__((packed)) idt_ptr = {
+        sizeof(idt_entry_t) * 256 - 1,
+        (ulong)&idt,
+    };
+
+    asm volatile("lidt %0" : : "m"(idt_ptr));
+}
+
+static void set_idt_entry(unsigned vec, void (*func)(isr_regs_t *regs))
+{
+    u8 *thunk = vmalloc(50);
+    ulong ptr = (ulong)thunk;
+    idt_entry_t ent = {
+        .offset0 = ptr,
+        .selector = read_cs(),
+        .ist = 0,
+        .type = 14,
+        .dpl = 0,
+        .p = 1,
+        .offset1 = ptr >> 16,
+#ifdef __x86_64__
+        .offset2 = ptr >> 32,
+#endif
+    };
+#ifdef __x86_64__
+    /* sub $8, %rsp */
+    *thunk++ = 0x48; *thunk++ = 0x83; *thunk++ = 0xec; *thunk++ = 0x08;
+    /* mov $func_low, %(rsp) */
+    *thunk++ = 0xc7; *thunk++ = 0x04; *thunk++ = 0x24;
+    *(u32 *)thunk = (ulong)func; thunk += 4;
+    /* mov $func_high, %(rsp+4) */
+    *thunk++ = 0xc7; *thunk++ = 0x44; *thunk++ = 0x24; *thunk++ = 0x04;
+    *(u32 *)thunk = (ulong)func >> 32; thunk += 4;
+    /* jmp isr_entry_point */
+    *thunk ++ = 0xe9;
+    *(u32 *)thunk = (ulong)isr_entry_point - (ulong)(thunk + 4);
+#else
+    /* push $func */
+    *thunk++ = 0x68;
+    *(u32 *)thunk = (ulong)func;
+    /* jmp isr_entry_point */
+    *thunk ++ = 0xe9;
+    *(u32 *)thunk = (ulong)isr_entry_point - (ulong)(thunk + 4);
+#endif
+    idt[vec] = ent;
+}
+
+static void irq_disable(void)
+{
+    asm volatile("cli");
+}
+
+static void irq_enable(void)
+{
+    asm volatile("sti");
+}
+
+static void eoi(void)
+{
+    apic_write(APIC_EOI, 0);
+}
+
+static int ipi_count;
+
+static void self_ipi_isr(isr_regs_t *regs)
+{
+    ++ipi_count;
+    eoi();
+}
+
+static void test_self_ipi(void)
+{
+    int vec = 0xf1;
+
+    set_idt_entry(vec, self_ipi_isr);
+    irq_enable();
+    apic_write(APIC_ICR,
+               APIC_DEST_SELF | APIC_DEST_PHYSICAL | APIC_DM_FIXED | vec);
+    asm volatile ("nop");
+    report("self ipi", ipi_count == 1);
+}
+
+static void ioapic_write_reg(unsigned reg, u32 value)
+{
+    *(volatile u32 *)g_ioapic = reg;
+    *(volatile u32 *)(g_ioapic + 0x10) = value;
+}
+
+typedef struct {
+    u8 vector;
+    u8 delivery_mode:3;
+    u8 dest_mode:1;
+    u8 delivery_status:1;
+    u8 polarity:1;
+    u8 remote_irr:1;
+    u8 trig_mode:1;
+    u8 mask:1;
+    u8 reserve:7;
+    u8 reserved[4];
+    u8 dest_id;
+} ioapic_redir_entry_t;
+
+static void ioapic_write_redir(unsigned line, ioapic_redir_entry_t e)
+{
+    ioapic_write_reg(0x10 + line * 2 + 0, ((u32 *)&e)[0]);
+    ioapic_write_reg(0x10 + line * 2 + 1, ((u32 *)&e)[1]);
+}
+
+static void set_ioapic_redir(unsigned line, unsigned vec)
+{
+    ioapic_redir_entry_t e = {
+        .vector = vec,
+        .delivery_mode = 0,
+        .trig_mode = 0,
+    };
+
+    ioapic_write_redir(line, e);
+}
+
+static void enable_apic(void)
+{
+    apic_write(0xf0, 0x1ff); /* spurious vector register */
+}
+
+/* interrupt handlers */
+
+struct int_table {
+    void (*func)(isr_regs_t *regs);
+    void (*irq_handler)(void *irq_priv);
+    void *irq_priv;
+};
+
+static struct int_table int_handlers[];
+
+#define decl_irq_handler(N)                         \
+static void timer_int_##N(isr_regs_t *regs) {       \
+    struct int_table *t = &int_handlers[N];         \
+    t->irq_handler(t->irq_priv);                    \
+    eoi();                                          \
+}
+
+void set_irq_handler(int vec, void (*func)(void *irq_priv), void *irq_priv);
+
+void hlt(void) { asm volatile("hlt"); }
+
+#define NS_FREQ 1000000000ULL
+
+#define ns2cyc(ns) (ns*cpu_hz)/NS_FREQ
+#define cyc2ns(cyc) (cyc*NS_FREQ)/cpu_hz
+
+#define us_to_ns(n) (1000*n)
+#define ms_to_ns(n) (1000000*n)
+#define s_to_ns(n)  (1000000000*n)
+
+#define ns_to_ms(x) (x / 1000000)
+
+#define udelay(n) nsdelay(us_to_ns(n))
+#define mdelay(n) nsdelay(ms_to_ns(n))
+#define sdelay(n) nsdelay(s_to_ns(n))
+
+u64 cpu_hz;
+
+u64 rdtsc(void)
+{
+        unsigned a, d;
+
+        asm volatile("rdtsc" : "=a"(a), "=d"(d));
+        return a | (u64)d << 32;
+}
+
+void wrtsc(u64 tsc)
+{
+        unsigned a = tsc, d = tsc >> 32;
+
+        asm volatile("wrmsr" : : "a"(a), "d"(d), "c"(0x10));
+}
+
+void nsdelay(u64 ns)
+{
+    u64 entry = cyc2ns(rdtsc());
+
+    do {
+        __asm__ volatile ("nop");
+    } while (cyc2ns(rdtsc()) - entry < ns);
+}
+
+struct clocksource {
+    char *name;
+    int (*init) (void);
+    u64 (*read) (void);
+    u64 freq;
+};
+
+/* return count in nanoseconds */
+u64 clocksource_read(struct clocksource *clock)
+{
+    u64 val = clock->read();
+
+    val = (val * NS_FREQ) / clock->freq;
+    return val;
+}
+
+enum clockevt_type { CLOCKEVT_PERIODIC, CLOCKEVT_ONESHOT, };
+
+struct clockevent {
+    char *name;
+    u64 (*init) (int vec);
+    int (*arm) (u64 count, enum clockevt_type);
+    void (*cancel)(void);
+    u64 (*remain) (void);
+
+    u64 freq;
+    unsigned vec;
+};
+
+void clock_arm(struct clockevent *clockevt, enum clockevt_type type,
+               u64 period)
+{
+    u64 count = (period * clockevt->freq) / NS_FREQ;
+
+    clockevt->arm(count, type);
+}
+
+/* -------- TSC clocksource ------------- */
+
+int tsc_init(void) { printf("%s\n", __func__); return 0; }
+u64 tsc_read(void) { return rdtsc(); }
+
+struct clocksource tsc = {
+    .name = "tsc",
+    .init = tsc_init,
+    .read = tsc_read,
+};
+
+/* --------- ACPI clocksource ----------- */
+
+#define ACPI_PORT 0xb008
+#define ACPI_FREQ 3579545
+
+int acpi_init(void) { printf("%s\n", __func__); return 0; }
+u64 acpi_read(void) { return inl(ACPI_PORT); }
+
+struct clocksource acpi = {
+    .name = "acpi",
+    .init = acpi_init,
+    .read = acpi_read,
+    .freq = ACPI_FREQ,
+};
+
+//struct clocksource *clocksources[] = { &tsc, &acpi, };
+struct clocksource *clocksources[] = { &tsc };
+
+/* --------- LAPIC clockevent ---------- */
+
+#define aprint(reg)  printf("%s = %x\n", #reg , apic_read(reg))
+
+static void dummy(void *irq_priv)
+{
+}
+
+u64 lapic_timer_init(int vec)
+{
+    u64 hz;
+
+    set_irq_handler(vec, dummy, NULL);
+    apic_write(APIC_LVTT, vec);
+    apic_write(APIC_TDCR, 0xB); /* divide by 1 */
+    apic_write(APIC_TMICT, 0xffffffff);
+    sdelay(1);
+    hz = 0xffffffff - apic_read(APIC_TMCCT);
+    printf("%s: detected %d Hz timer\n", __func__, hz);
+    return hz;
+}
+
+int lapic_timer_arm(u64 period, enum clockevt_type type)
+{
+    if (type == CLOCKEVT_PERIODIC)
+        apic_write(APIC_LVTT, apic_read(APIC_LVTT) | 1 << 17);
+    /* divide count */
+    apic_write(APIC_TDCR, 0xB);
+    /* initial count */
+    apic_write(APIC_TMICT, period);
+    return 0;
+}
+
+void lapic_timer_cancel(void)
+{
+    apic_write(APIC_LVTT, apic_read(APIC_LVTT) & ~(1 << 17)); /* one-shot */
+    apic_write(APIC_TMICT, 0);
+}
+
+u64 lapic_timer_remain(void)
+{
+    return apic_read(APIC_TMCCT);
+}
+
+struct clockevent lapic_timer = {
+    .name = "lapic",
+    .init = lapic_timer_init,
+    .arm = lapic_timer_arm,
+    .cancel = lapic_timer_cancel,
+    .remain = lapic_timer_remain,
+};
+
+/* ---------- PIT clockevent --------- */
+
+#define PIT_FREQ 1193181
+#define PIT_CNT_0 0x40
+#define PIT_CNT_1 0x41
+#define PIT_CNT_2 0x42
+#define PIT_TCW 0x43
+
+u64 pit_timer_remain(void)
+{
+    	outb(0xf0, PIT_TCW);
+        return inb(PIT_CNT_0) | inb(PIT_CNT_0) << 8;
+}
+
+u64 pit_timer_init(int vec)
+{
+    set_ioapic_redir(0, vec);
+    /* mask LINT0, int is coming through IO-APIC */
+    apic_write(APIC_LVT0, 1 << 16);
+    return PIT_FREQ;
+}
+
+int pit_timer_arm(u64 period, enum clockevt_type type)
+{
+    unsigned char ctrl_word = 0x30;
+
+    if (type == CLOCKEVT_PERIODIC)
+        ctrl_word |= 0x4;
+    outb(ctrl_word, PIT_TCW);
+    outb(period & 0xff, PIT_CNT_0);
+    outb((period & 0xff00) >> 8, PIT_CNT_0);
+    return 0;
+}
+
+void pit_timer_cancel(void)
+{
+    unsigned char ctrl_word = 0x30;
+    outb(ctrl_word, PIT_TCW);
+    outb(0, PIT_CNT_0);
+    outb(0, PIT_CNT_0);
+}
+
+struct clockevent pit_timer = {
+    .name = "pit",
+    .init = pit_timer_init,
+    .arm = pit_timer_arm,
+    .cancel = pit_timer_cancel,
+    .remain = pit_timer_remain,
+};
+
+
+#define NR_CLOCKEVENTS 2
+
+/* clockevent initialization */
+struct clockevent *clockevents[NR_CLOCKEVENTS] = {
+    &pit_timer, &lapic_timer,
+};
+
+decl_irq_handler(0)
+decl_irq_handler(1)
+
+static struct int_table int_handlers[NR_CLOCKEVENTS] = {
+    { .func = timer_int_0 }, { .func = timer_int_1 },
+};
+
+#define TIMER_VEC_BASE 0x90
+
+void set_irq_handler(int vec, void (*func)(void *irq_priv), void *irq_priv)
+{
+    int int_table_idx = vec - TIMER_VEC_BASE;
+
+    if (int_table_idx >= NR_CLOCKEVENTS)
+        printf("%s invalid vec\n", __func__);
+
+    int_handlers[int_table_idx].irq_handler = func;
+    int_handlers[int_table_idx].irq_priv = irq_priv;
+}
+
+void init_interrupts(void)
+{
+    int i;
+
+    for (i = 0; i < NR_CLOCKEVENTS; i++) {
+        int vec = TIMER_VEC_BASE+i;
+
+        set_idt_entry(vec, int_handlers[i].func);
+    }
+}
+
+int init_clockevents(void)
+{
+    int i;
+    int vec = TIMER_VEC_BASE;
+
+    for (i=0; i < ARRAY_SIZE(clockevents); i++) {
+        u64 freq = clockevents[i]->init(vec);
+        clockevents[i]->freq = freq;
+        clockevents[i]->vec = vec;
+        vec++;
+    }
+    return 0;
+}
+
+/*
+ *
+ * TODO:
+ * 1. test every divisor possible
+ * 3. monotonicity of clocksources
+ * 4. tests to mimic Linux calibration sites
+ * 5. SMP
+ *
+ */
+
+/* actual tests */
+
+#define TIME_TABLE_SZ 100
+struct time_table {
+    u64 val[TIME_TABLE_SZ];
+    int idx;
+    struct clocksource *source;
+    u64 period;
+};
+
+void time_table_record(struct time_table *t)
+{
+    t->val[t->idx] = clocksource_read(t->source);
+    t->idx++;
+    if (t->idx >= TIME_TABLE_SZ)
+        t->idx = 0;
+}
+
+void inspect_table(struct time_table *t)
+{
+    int i;
+    int percent_avg = 0;
+
+    for (i = 1; i < t->idx; i++) {
+	u64 fire_period = t->val[i] - t->val[i-1];
+        /* FIXME: handle clock wraparound */
+        if (t->val[i] < t->val[i-1])
+            break;
+        percent_avg += (fire_period*100) / t->period;
+    }
+
+    percent_avg /= t->idx-1;
+    printf(" %d ms percent_off_avg = %d\n", ns_to_ms(t->period), percent_avg);
+
+}
+
+static void timer_int_record(void *irq_priv)
+{
+    time_table_record(irq_priv);
+}
+
+void test_periodic_one_clock(struct clockevent *clockevt,
+                             struct clocksource *source, u64 period)
+{
+    int i;
+    struct time_table *t = vmalloc(sizeof(struct time_table));
+
+    t->idx = 0;
+    t->period = period;
+    t->source = source;
+
+    clockevt->cancel();
+    set_irq_handler(clockevt->vec, timer_int_record, t);
+
+    clock_arm(clockevt, CLOCKEVT_PERIODIC, period);
+
+    for (i=0;i<50;i++)
+        hlt();
+
+    clockevt->cancel();
+    inspect_table(t);
+    vfree(t);
+}
+
+static int periodic_freqs[] = { 1, 2, 10, 15, 20, 50, 100, 200 };
+
+void test_periodic_events(void)
+{
+    int i, n, x;
+
+
+    for (x = 0; x < ARRAY_SIZE(clocksources); x++) {
+        struct clocksource *clocksource = clocksources[x];
+
+        for (i = 0; i < ARRAY_SIZE(clockevents); i++) {
+            struct clockevent *clockevt = clockevents[i];
+
+            printf("clockevent = %s clocksource = %s\n", clockevt->name,
+                   clocksource->name);
+
+            for (n = 0; n < ARRAY_SIZE(periodic_freqs); n++)
+                test_periodic_one_clock(clockevt, clocksource,
+                                        ms_to_ns(periodic_freqs[n]));
+        }
+    }
+}
+
+static void int_handler_reinject(void *irq_priv)
+{
+    int *ints = (int *)irq_priv;
+    *ints += 1;
+}
+
+void test_reinjection(void)
+{
+    int i;
+    u64 period = ms_to_ns(1);
+
+    for (i = 0; i < ARRAY_SIZE(clockevents); i++) {
+        struct clockevent *clockevt = clockevents[i];
+        int ints = 0;
+
+        printf("clockevent = %s\n", clockevt->name);
+
+        clockevt->cancel();
+        set_irq_handler(clockevt->vec, int_handler_reinject, &ints);
+
+        clock_arm(clockevt, CLOCKEVT_PERIODIC, period);
+        irq_disable();
+        mdelay(100);
+        irq_enable();
+        printf("irqoff delay=100 ints=%d\n", ints);
+
+        clockevt->cancel();
+    }
+}
+
+/* early calibration with PIT to detect TSC frequency, which is necessary
+ * to find lapic frequency.
+ */
+int timer_isr;
+static void timer_int_handler(void *irq_priv)
+{
+    timer_isr++;
+}
+
+void early_calibrate_cpu_hz(void)
+{
+    u64 t1, t2;
+    int ints_per_sec = (PIT_FREQ/0xffff)+1;
+
+    timer_isr = 0;
+    t1 = rdtsc();
+    do {
+        pit_timer.arm(0xffff, CLOCKEVT_ONESHOT);
+        __asm__ volatile ("hlt");
+    } while (timer_isr < ints_per_sec);
+    t2 = rdtsc();
+    cpu_hz = t2 - t1;
+    printf("detected %lld MHz cpu\n", cpu_hz);
+    tsc.freq = cpu_hz;
+}
+
+void early_calibrate(void)
+{
+    pit_timer.init(TIMER_VEC_BASE);
+    set_irq_handler(TIMER_VEC_BASE, timer_int_handler, NULL);
+    early_calibrate_cpu_hz();
+}
+
+int main()
+{
+    setup_vm();
+    init_interrupts();
+
+    g_apic = vmap(0xfee00000, 0x1000);
+    g_ioapic = vmap(0xfec00000, 0x1000);
+
+    test_lapic_existence();
+
+    enable_apic();
+    init_idt();
+
+    test_self_ipi();
+
+    early_calibrate();
+    init_clockevents();
+
+    test_reinjection();
+    test_periodic_events();
+
+    return g_fail != 0;
+}
+
+
+#if 0
+void calibrate(struct clocksource *csource, struct clockevent *cevent)
+{
+    u64 t1, t2;
+    int ints_per_sec = (cevent->freq / c
+
+    csource->wait();
+    t1 = clocksource_read(csource);
+    do {
+        cevent->arm(count, CLOCKEVT_ONESHOT);
+        __asm__ volatile ("hlt");
+    } while (timer_isr < ints_per_sec);
+    t2 = clocksource_read(csource);
+
+    printf("");
+}
+#endif
+

-- 


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [patch 4/8] KVM: x86: replace hrtimer based timer emulation
  2009-07-06  1:55 ` [patch 4/8] KVM: x86: replace hrtimer based timer emulation Marcelo Tosatti
@ 2009-07-08 12:58   ` Gleb Natapov
  2009-07-08 13:17     ` Marcelo Tosatti
  2009-07-08 13:41   ` Avi Kivity
  1 sibling, 1 reply; 23+ messages in thread
From: Gleb Natapov @ 2009-07-08 12:58 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: kvm

Excellent patch series.

On Sun, Jul 05, 2009 at 10:55:15PM -0300, Marcelo Tosatti wrote:
>  int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
>  {
> -	int ret;
> +	ktime_t now, expires;
>  
> -	ret = pit_has_pending_timer(vcpu);
> -	ret |= apic_has_pending_timer(vcpu);
> +	expires = kvm_vcpu_next_timer_event(vcpu);
> +	now = ktime_get();
> +	if (expires.tv64 <= now.tv64) {
> +		if (kvm_arch_interrupt_allowed(vcpu))
> +			set_bit(KVM_REQ_UNHALT, &vcpu->requests);
You shouldn't unhalt vcpu here. Not every timer event will generate
interrupt (vector can be masked in pic/ioapic) or timer event can
generate NMI instead of interrupt. Leaving this code out probably means
that you can't remove kvm_inject_pending_timer_irqs() call from
__vcpu_run().

> +		return 1;
> +	}
>  
> -	return ret;
> +	return 0;
>  }
>  EXPORT_SYMBOL(kvm_cpu_has_pending_timer);
>  

--
			Gleb.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [patch 4/8] KVM: x86: replace hrtimer based timer emulation
  2009-07-08 12:58   ` Gleb Natapov
@ 2009-07-08 13:17     ` Marcelo Tosatti
  2009-07-08 13:39       ` Gleb Natapov
  2009-07-08 16:29       ` Gleb Natapov
  0 siblings, 2 replies; 23+ messages in thread
From: Marcelo Tosatti @ 2009-07-08 13:17 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm

[-- Attachment #1: Type: text/plain, Size: 1746 bytes --]

On Wed, Jul 08, 2009 at 03:58:19PM +0300, Gleb Natapov wrote:
> Excellent patch series.
> 
> On Sun, Jul 05, 2009 at 10:55:15PM -0300, Marcelo Tosatti wrote:
> >  int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
> >  {
> > -	int ret;
> > +	ktime_t now, expires;
> >  
> > -	ret = pit_has_pending_timer(vcpu);
> > -	ret |= apic_has_pending_timer(vcpu);
> > +	expires = kvm_vcpu_next_timer_event(vcpu);
> > +	now = ktime_get();
> > +	if (expires.tv64 <= now.tv64) {
> > +		if (kvm_arch_interrupt_allowed(vcpu))
> > +			set_bit(KVM_REQ_UNHALT, &vcpu->requests);
> You shouldn't unhalt vcpu here. Not every timer event will generate
> interrupt (vector can be masked in pic/ioapic)

Yeah. Note however that kvm_vcpu_next_timer_event only returns the
expiration time for events that have been acked (for timers that have
had events injected, but not acked it returns KTIME_MAX).

So, the code above will set one spurious unhalt if:

- inject timer irq
- guest acks irq
- guest mask irq
- unhalt (once)

I had a "kvm_timer_mask" callback before (along with the attached
patch), but decided to keep it simpler using the ack trick above.

I suppose one spurious unhalt is harmless, or is it a correctness issue?

> or timer event can generate NMI instead of interrupt.

In the NMI case it should not unhalt the processor?

(but yes, bypassing the irq injection system its not a very beatiful
shortcut, but its done in other places too eg i8254.c NMI injection via
all cpus LINT0).

> Leaving this code out probably means
> that you can't remove kvm_inject_pending_timer_irqs() call from
> __vcpu_run().
> 
> > +		return 1;
> > +	}
> >  
> > -	return ret;
> > +	return 0;
> >  }
> >  EXPORT_SYMBOL(kvm_cpu_has_pending_timer);
> >  
> 
> --
> 			Gleb.

[-- Attachment #2: kvm-int-mask-helpers --]
[-- Type: text/plain, Size: 3761 bytes --]

Index: kvm/arch/x86/kvm/i8259.c
===================================================================
--- kvm.orig/arch/x86/kvm/i8259.c
+++ kvm/arch/x86/kvm/i8259.c
@@ -122,6 +122,18 @@ static inline int get_priority(struct kv
 	return priority;
 }
 
+int pic_int_is_masked(struct kvm *kvm, int irq)
+{
+	u8 imr;
+	struct kvm_pic *s = pic_irqchip(kvm);
+
+	pic_lock(s);
+	imr = s->pics[SELECT_PIC(irq)].imr;
+	pic_unlock(s);
+
+	return (imr & (1 << irq));
+}
+
 /*
  * return the pic wanted interrupt. return -1 if none
  */
Index: kvm/arch/x86/kvm/irq.c
===================================================================
--- kvm.orig/arch/x86/kvm/irq.c
+++ kvm/arch/x86/kvm/irq.c
@@ -42,6 +42,29 @@ int kvm_cpu_has_pending_timer(struct kvm
 EXPORT_SYMBOL(kvm_cpu_has_pending_timer);
 
 /*
+ * The PIT interrupt can be masked on different levels: PIC/IOAPIC/LAPIC.
+ * Hide the maze from the PIT mask notifier, all it cares about is whether
+ * the interrupt can reach the processor.
+ */
+void kvm_pit_mask_notifier(struct kvm_irq_mask_notifier *kimn, bool mask)
+{
+	bool masked = 0;
+	struct kvm *kvm;
+	struct kvm_pit *pit = container_of(kimn, struct kvm_pit, mask_notifier);
+
+	kvm = pit->kvm;
+
+	/* LAPIC hardware disabled or INTIN0 programmed as ExtINT */
+	if (kvm_apic_accept_pic_intr(kvm->vcpus[0])) {
+		masked = pic_int_is_masked(kvm, 0);
+	/* IOAPIC -> LAPIC */
+	} else
+		masked = ioapic_int_is_masked(kvm, 0);
+
+	kvm_pit_internal_mask_notifier(kvm, mask);
+}
+
+/*
  * check if there is pending interrupt without
  * intack.
  */
Index: kvm/virt/kvm/ioapic.c
===================================================================
--- kvm.orig/virt/kvm/ioapic.c
+++ kvm/virt/kvm/ioapic.c
@@ -101,6 +101,16 @@ static int ioapic_service(struct kvm_ioa
 	return injected;
 }
 
+int ioapic_int_is_masked(struct kvm *kvm, int pin)
+{
+	struct kvm_ioapic *ioapic = kvm->arch.vioapic;
+
+	if (!ioapic)
+		return 1;
+
+	return ioapic->redirtbl[pin].fields.mask;
+}
+
 static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
 {
 	unsigned index;
Index: kvm/arch/x86/kvm/i8254.c
===================================================================
--- kvm.orig/arch/x86/kvm/i8254.c
+++ kvm/arch/x86/kvm/i8254.c
@@ -558,9 +558,9 @@ void kvm_pit_reset(struct kvm_pit *pit)
 	pit->pit_state.irq_ack = 1;
 }
 
-static void pit_mask_notifer(struct kvm_irq_mask_notifier *kimn, bool mask)
+void kvm_pit_internal_mask_notifier(struct kvm *kvm, bool mask)
 {
-	struct kvm_pit *pit = container_of(kimn, struct kvm_pit, mask_notifier);
+	struct kvm_pit *pit = kvm->arch.vpit;
 
 	if (!mask) {
 		atomic_set(&pit->pit_state.pit_timer.pending, 0);
@@ -614,8 +614,8 @@ struct kvm_pit *kvm_create_pit(struct kv
 
 	kvm_pit_reset(pit);
 
-	pit->mask_notifier.func = pit_mask_notifer;
-	kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier);
+	pit->mask_notifier.func = kvm_pit_mask_notifier;
+	kvm_register_irq_mask_notifier(kvm, 0, &kvm->arch.vpit->mask_notifier);
 
 	kvm_iodevice_init(&pit->dev, &pit_dev_ops);
 	kvm_io_bus_register_dev(&kvm->pio_bus, &pit->dev);
Index: kvm/arch/x86/kvm/irq.h
===================================================================
--- kvm.orig/arch/x86/kvm/irq.h
+++ kvm/arch/x86/kvm/irq.h
@@ -98,7 +98,13 @@ void __kvm_migrate_apic_timer(struct kvm
 void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu);
 void __kvm_migrate_timers(struct kvm_vcpu *vcpu);
 
+void kvm_pit_internal_mask_notifier(struct kvm *kvm, bool mask);
+void kvm_pit_mask_notifier(struct kvm_irq_mask_notifier *kimn, bool mask);
+
 int pit_has_pending_timer(struct kvm_vcpu *vcpu);
 int apic_has_pending_timer(struct kvm_vcpu *vcpu);
 
+int pic_int_is_masked(struct kvm *kvm, int irq);
+int ioapic_int_is_masked(struct kvm *kvm, int irq);
+
 #endif

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [patch 1/8] KVM: timer interface unification
  2009-07-06  1:55 ` [patch 1/8] KVM: timer interface unification Marcelo Tosatti
@ 2009-07-08 13:33   ` Avi Kivity
  0 siblings, 0 replies; 23+ messages in thread
From: Avi Kivity @ 2009-07-08 13:33 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: kvm

On 07/06/2009 04:55 AM, Marcelo Tosatti wrote:
> Hide details of timer emulation behind an interface, and unify
> the hrtimer based implementation.
>    

For bisectability, might want to split into three: introduce generic 
timer, migrate lapic timer, migrate pit timer.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [patch 4/8] KVM: x86: replace hrtimer based timer emulation
  2009-07-08 13:17     ` Marcelo Tosatti
@ 2009-07-08 13:39       ` Gleb Natapov
  2009-07-08 15:42         ` Marcelo Tosatti
  2009-07-08 16:29       ` Gleb Natapov
  1 sibling, 1 reply; 23+ messages in thread
From: Gleb Natapov @ 2009-07-08 13:39 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: kvm

On Wed, Jul 08, 2009 at 10:17:21AM -0300, Marcelo Tosatti wrote:
> On Wed, Jul 08, 2009 at 03:58:19PM +0300, Gleb Natapov wrote:
> > Excellent patch series.
> > 
> > On Sun, Jul 05, 2009 at 10:55:15PM -0300, Marcelo Tosatti wrote:
> > >  int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
> > >  {
> > > -	int ret;
> > > +	ktime_t now, expires;
> > >  
> > > -	ret = pit_has_pending_timer(vcpu);
> > > -	ret |= apic_has_pending_timer(vcpu);
> > > +	expires = kvm_vcpu_next_timer_event(vcpu);
> > > +	now = ktime_get();
> > > +	if (expires.tv64 <= now.tv64) {
> > > +		if (kvm_arch_interrupt_allowed(vcpu))
> > > +			set_bit(KVM_REQ_UNHALT, &vcpu->requests);
> > You shouldn't unhalt vcpu here. Not every timer event will generate
> > interrupt (vector can be masked in pic/ioapic)
> 
> Yeah. Note however that kvm_vcpu_next_timer_event only returns the
> expiration time for events that have been acked (for timers that have
> had events injected, but not acked it returns KTIME_MAX).
> 
> So, the code above will set one spurious unhalt if:
> 
> - inject timer irq
> - guest acks irq
> - guest mask irq
> - unhalt (once)
> 
> I had a "kvm_timer_mask" callback before (along with the attached
> patch), but decided to keep it simpler using the ack trick above.
> 
> I suppose one spurious unhalt is harmless, or is it a correctness issue?
> 
This is correctness issue. We should be as close to real CPU as
possible. This will save us may hours of debugging later :)

> > or timer event can generate NMI instead of interrupt.
> 
> In the NMI case it should not unhalt the processor?
Why? It should. It should jump to NMI handler.

> 
> (but yes, bypassing the irq injection system its not a very beatiful
> shortcut, but its done in other places too eg i8254.c NMI injection via
> all cpus LINT0).
> 
> > Leaving this code out probably means
> > that you can't remove kvm_inject_pending_timer_irqs() call from
> > __vcpu_run().
> > 
> > > +		return 1;
> > > +	}
> > >  
> > > -	return ret;
> > > +	return 0;
> > >  }
> > >  EXPORT_SYMBOL(kvm_cpu_has_pending_timer);
> > >  
> > 
> > --
> > 			Gleb.

> Index: kvm/arch/x86/kvm/i8259.c
> ===================================================================
> --- kvm.orig/arch/x86/kvm/i8259.c
> +++ kvm/arch/x86/kvm/i8259.c
> @@ -122,6 +122,18 @@ static inline int get_priority(struct kv
>  	return priority;
>  }
>  
> +int pic_int_is_masked(struct kvm *kvm, int irq)
> +{
> +	u8 imr;
> +	struct kvm_pic *s = pic_irqchip(kvm);
> +
> +	pic_lock(s);
> +	imr = s->pics[SELECT_PIC(irq)].imr;
> +	pic_unlock(s);
> +
> +	return (imr & (1 << irq));
> +}
> +
>  /*
>   * return the pic wanted interrupt. return -1 if none
>   */
> Index: kvm/arch/x86/kvm/irq.c
> ===================================================================
> --- kvm.orig/arch/x86/kvm/irq.c
> +++ kvm/arch/x86/kvm/irq.c
> @@ -42,6 +42,29 @@ int kvm_cpu_has_pending_timer(struct kvm
>  EXPORT_SYMBOL(kvm_cpu_has_pending_timer);
>  
>  /*
> + * The PIT interrupt can be masked on different levels: PIC/IOAPIC/LAPIC.
> + * Hide the maze from the PIT mask notifier, all it cares about is whether
> + * the interrupt can reach the processor.
> + */
> +void kvm_pit_mask_notifier(struct kvm_irq_mask_notifier *kimn, bool mask)
> +{
> +	bool masked = 0;
> +	struct kvm *kvm;
> +	struct kvm_pit *pit = container_of(kimn, struct kvm_pit, mask_notifier);
> +
> +	kvm = pit->kvm;
> +
> +	/* LAPIC hardware disabled or INTIN0 programmed as ExtINT */
> +	if (kvm_apic_accept_pic_intr(kvm->vcpus[0])) {
> +		masked = pic_int_is_masked(kvm, 0);
> +	/* IOAPIC -> LAPIC */
> +	} else
> +		masked = ioapic_int_is_masked(kvm, 0);
> +
> +	kvm_pit_internal_mask_notifier(kvm, mask);
> +}
> +
> +/*
>   * check if there is pending interrupt without
>   * intack.
>   */
> Index: kvm/virt/kvm/ioapic.c
> ===================================================================
> --- kvm.orig/virt/kvm/ioapic.c
> +++ kvm/virt/kvm/ioapic.c
> @@ -101,6 +101,16 @@ static int ioapic_service(struct kvm_ioa
>  	return injected;
>  }
>  
> +int ioapic_int_is_masked(struct kvm *kvm, int pin)
> +{
> +	struct kvm_ioapic *ioapic = kvm->arch.vioapic;
> +
> +	if (!ioapic)
> +		return 1;
> +
> +	return ioapic->redirtbl[pin].fields.mask;
> +}
> +
>  static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
>  {
>  	unsigned index;
> Index: kvm/arch/x86/kvm/i8254.c
> ===================================================================
> --- kvm.orig/arch/x86/kvm/i8254.c
> +++ kvm/arch/x86/kvm/i8254.c
> @@ -558,9 +558,9 @@ void kvm_pit_reset(struct kvm_pit *pit)
>  	pit->pit_state.irq_ack = 1;
>  }
>  
> -static void pit_mask_notifer(struct kvm_irq_mask_notifier *kimn, bool mask)
> +void kvm_pit_internal_mask_notifier(struct kvm *kvm, bool mask)
>  {
> -	struct kvm_pit *pit = container_of(kimn, struct kvm_pit, mask_notifier);
> +	struct kvm_pit *pit = kvm->arch.vpit;
>  
>  	if (!mask) {
>  		atomic_set(&pit->pit_state.pit_timer.pending, 0);
> @@ -614,8 +614,8 @@ struct kvm_pit *kvm_create_pit(struct kv
>  
>  	kvm_pit_reset(pit);
>  
> -	pit->mask_notifier.func = pit_mask_notifer;
> -	kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier);
> +	pit->mask_notifier.func = kvm_pit_mask_notifier;
> +	kvm_register_irq_mask_notifier(kvm, 0, &kvm->arch.vpit->mask_notifier);
>  
>  	kvm_iodevice_init(&pit->dev, &pit_dev_ops);
>  	kvm_io_bus_register_dev(&kvm->pio_bus, &pit->dev);
> Index: kvm/arch/x86/kvm/irq.h
> ===================================================================
> --- kvm.orig/arch/x86/kvm/irq.h
> +++ kvm/arch/x86/kvm/irq.h
> @@ -98,7 +98,13 @@ void __kvm_migrate_apic_timer(struct kvm
>  void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu);
>  void __kvm_migrate_timers(struct kvm_vcpu *vcpu);
>  
> +void kvm_pit_internal_mask_notifier(struct kvm *kvm, bool mask);
> +void kvm_pit_mask_notifier(struct kvm_irq_mask_notifier *kimn, bool mask);
> +
>  int pit_has_pending_timer(struct kvm_vcpu *vcpu);
>  int apic_has_pending_timer(struct kvm_vcpu *vcpu);
>  
> +int pic_int_is_masked(struct kvm *kvm, int irq);
> +int ioapic_int_is_masked(struct kvm *kvm, int irq);
> +
>  #endif


--
			Gleb.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [patch 4/8] KVM: x86: replace hrtimer based timer emulation
  2009-07-06  1:55 ` [patch 4/8] KVM: x86: replace hrtimer based timer emulation Marcelo Tosatti
  2009-07-08 12:58   ` Gleb Natapov
@ 2009-07-08 13:41   ` Avi Kivity
  2009-07-08 16:24     ` Marcelo Tosatti
  1 sibling, 1 reply; 23+ messages in thread
From: Avi Kivity @ 2009-07-08 13:41 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: kvm

On 07/06/2009 04:55 AM, Marcelo Tosatti wrote:
> Replace hrtimer based timer emulation with host timebase (ktime_t)
> comparisons on guest entry.
>
> This avoids host load when guests are scheduled out, removes a
> spinlock acquision on entry (i8254.c's inject_lock), and makes future
> improvements easier.
>    

I wonder if we're really winning with this.  Guests should be 
scheduled-out-but-not-halted rarely, and in all other cases we need to 
keep the timer.  A timer comparison on each guest entry might be 
expensive (maybe not so much with tsc based timers).

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [patch 0/8] RFC: in-kernel timer emulation changes
  2009-07-06  1:55 [patch 0/8] RFC: in-kernel timer emulation changes Marcelo Tosatti
                   ` (7 preceding siblings ...)
  2009-07-06  1:55 ` [patch 8/8] kvmctl: time testcase Marcelo Tosatti
@ 2009-07-08 13:43 ` Avi Kivity
  8 siblings, 0 replies; 23+ messages in thread
From: Avi Kivity @ 2009-07-08 13:43 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: kvm

On 07/06/2009 04:55 AM, Marcelo Tosatti wrote:
> This still needs to survive kvm-autotest, posting early for comments.
>
> It replaces hrtimer based emulation with ktime_t comparisons on guest
> entry (avoids host load when guests are scheduled out, removes a
> spinlock acquision on entry (i8254.c's inject_lock), and makes future
> improvements easier).
>    

Overall, very nice.  I await the autotest results with interest :)

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [patch 4/8] KVM: x86: replace hrtimer based timer emulation
  2009-07-08 13:39       ` Gleb Natapov
@ 2009-07-08 15:42         ` Marcelo Tosatti
  2009-07-08 16:13           ` Gleb Natapov
  0 siblings, 1 reply; 23+ messages in thread
From: Marcelo Tosatti @ 2009-07-08 15:42 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm

On Wed, Jul 08, 2009 at 04:39:56PM +0300, Gleb Natapov wrote:
> On Wed, Jul 08, 2009 at 10:17:21AM -0300, Marcelo Tosatti wrote:
> > On Wed, Jul 08, 2009 at 03:58:19PM +0300, Gleb Natapov wrote:
> > > Excellent patch series.
> > > 
> > > On Sun, Jul 05, 2009 at 10:55:15PM -0300, Marcelo Tosatti wrote:
> > > >  int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
> > > >  {
> > > > -	int ret;
> > > > +	ktime_t now, expires;
> > > >  
> > > > -	ret = pit_has_pending_timer(vcpu);
> > > > -	ret |= apic_has_pending_timer(vcpu);
> > > > +	expires = kvm_vcpu_next_timer_event(vcpu);
> > > > +	now = ktime_get();
> > > > +	if (expires.tv64 <= now.tv64) {
> > > > +		if (kvm_arch_interrupt_allowed(vcpu))
> > > > +			set_bit(KVM_REQ_UNHALT, &vcpu->requests);
> > > You shouldn't unhalt vcpu here. Not every timer event will generate
> > > interrupt (vector can be masked in pic/ioapic)
> > 
> > Yeah. Note however that kvm_vcpu_next_timer_event only returns the
> > expiration time for events that have been acked (for timers that have
> > had events injected, but not acked it returns KTIME_MAX).
> > 
> > So, the code above will set one spurious unhalt if:
> > 
> > - inject timer irq
> > - guest acks irq
> > - guest mask irq
> > - unhalt (once)
> > 
> > I had a "kvm_timer_mask" callback before (along with the attached
> > patch), but decided to keep it simpler using the ack trick above.
> > 
> > I suppose one spurious unhalt is harmless, or is it a correctness issue?
> > 
> This is correctness issue. We should be as close to real CPU as
> possible. This will save us may hours of debugging later :)

Hum, fine. Will update the kvm_timer_mask patch below and let you know.

> > > or timer event can generate NMI instead of interrupt.
> > 
> > In the NMI case it should not unhalt the processor?
> Why? It should. It should jump to NMI handler.

I meant unhalt as in KVM_REQ_UNHALT so vcpu_enter_guest runs.

What did you mention about ISR/IRR again?


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [patch 4/8] KVM: x86: replace hrtimer based timer emulation
  2009-07-08 15:42         ` Marcelo Tosatti
@ 2009-07-08 16:13           ` Gleb Natapov
  0 siblings, 0 replies; 23+ messages in thread
From: Gleb Natapov @ 2009-07-08 16:13 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: kvm

On Wed, Jul 08, 2009 at 12:42:52PM -0300, Marcelo Tosatti wrote:
> On Wed, Jul 08, 2009 at 04:39:56PM +0300, Gleb Natapov wrote:
> > On Wed, Jul 08, 2009 at 10:17:21AM -0300, Marcelo Tosatti wrote:
> > > On Wed, Jul 08, 2009 at 03:58:19PM +0300, Gleb Natapov wrote:
> > > > Excellent patch series.
> > > > 
> > > > On Sun, Jul 05, 2009 at 10:55:15PM -0300, Marcelo Tosatti wrote:
> > > > >  int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
> > > > >  {
> > > > > -	int ret;
> > > > > +	ktime_t now, expires;
> > > > >  
> > > > > -	ret = pit_has_pending_timer(vcpu);
> > > > > -	ret |= apic_has_pending_timer(vcpu);
> > > > > +	expires = kvm_vcpu_next_timer_event(vcpu);
> > > > > +	now = ktime_get();
> > > > > +	if (expires.tv64 <= now.tv64) {
> > > > > +		if (kvm_arch_interrupt_allowed(vcpu))
> > > > > +			set_bit(KVM_REQ_UNHALT, &vcpu->requests);
> > > > You shouldn't unhalt vcpu here. Not every timer event will generate
> > > > interrupt (vector can be masked in pic/ioapic)
> > > 
> > > Yeah. Note however that kvm_vcpu_next_timer_event only returns the
> > > expiration time for events that have been acked (for timers that have
> > > had events injected, but not acked it returns KTIME_MAX).
> > > 
> > > So, the code above will set one spurious unhalt if:
> > > 
> > > - inject timer irq
> > > - guest acks irq
> > > - guest mask irq
> > > - unhalt (once)
> > > 
> > > I had a "kvm_timer_mask" callback before (along with the attached
> > > patch), but decided to keep it simpler using the ack trick above.
> > > 
> > > I suppose one spurious unhalt is harmless, or is it a correctness issue?
> > > 
> > This is correctness issue. We should be as close to real CPU as
> > possible. This will save us may hours of debugging later :)
> 
> Hum, fine. Will update the kvm_timer_mask patch below and let you know.
> 
> > > > or timer event can generate NMI instead of interrupt.
> > > 
> > > In the NMI case it should not unhalt the processor?
> > Why? It should. It should jump to NMI handler.
> 
> I meant unhalt as in KVM_REQ_UNHALT so vcpu_enter_guest runs.
> 
Yes. It should. Inside vcpu_enter_guest() NMI will be injected
and nmi handler will be executed.

> What did you mention about ISR/IRR again?
On real HW the following may happen:
 Timer interrupt delivered to apic and placed into IRR
 Timer interrupt delivered to cpu and moved from IRR to ISR
 New timer interrupt delivered to apic and placed into IRR before
  previous one is acked.

In your patch ktimer->can_inject is set to false when timer is injected
and next interrupt is injected only after OS acks previous timer. So the
above situation cannot happen. I don't know if this important or not. It
is possible to write code that will work only with former behaviour, but
I don't see why somebody will want to do that. We can emulate former behaviour
though. If we will no rely on acks to count delivered event but make ->inject
callback return status that will indicate if interrupt was delivered to apic or
not.

--
			Gleb.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [patch 4/8] KVM: x86: replace hrtimer based timer emulation
  2009-07-08 13:41   ` Avi Kivity
@ 2009-07-08 16:24     ` Marcelo Tosatti
  2009-07-08 16:36       ` Avi Kivity
  0 siblings, 1 reply; 23+ messages in thread
From: Marcelo Tosatti @ 2009-07-08 16:24 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm

On Wed, Jul 08, 2009 at 04:41:22PM +0300, Avi Kivity wrote:
> On 07/06/2009 04:55 AM, Marcelo Tosatti wrote:
>> Replace hrtimer based timer emulation with host timebase (ktime_t)
>> comparisons on guest entry.
>>
>> This avoids host load when guests are scheduled out, removes a
>> spinlock acquision on entry (i8254.c's inject_lock), and makes future
>> improvements easier.
>>    
>
> I wonder if we're really winning with this.  Guests should be  
> scheduled-out-but-not-halted rarely, and in all other cases we need to  
> keep the timer.  A timer comparison on each guest entry might be  
> expensive (maybe not so much with tsc based timers).

Any activity outside of guest mode that takes more than the period of
the timer (think 1000Hz) causes unnecessary host load.

Booting a RHEL5 UP without VNC or serial output on an idle host:

timer_int_normal=95416 timer_interrupt_accumulated=873

(and it continues to increase in that rate, roughly 1%). 

Now factor in multiple guests, loaded host, and you'll probably see more
than that.


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [patch 4/8] KVM: x86: replace hrtimer based timer emulation
  2009-07-08 13:17     ` Marcelo Tosatti
  2009-07-08 13:39       ` Gleb Natapov
@ 2009-07-08 16:29       ` Gleb Natapov
  2009-07-08 16:37         ` Marcelo Tosatti
  1 sibling, 1 reply; 23+ messages in thread
From: Gleb Natapov @ 2009-07-08 16:29 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: kvm

On Wed, Jul 08, 2009 at 10:17:21AM -0300, Marcelo Tosatti wrote:
> (but yes, bypassing the irq injection system its not a very beatiful
> shortcut, but its done in other places too eg i8254.c NMI injection via
> all cpus LINT0).
> 
I've looked at this. Why do you say i8254.c NMI injection bypass the irq
injection system? It look to me like it uses usual way to send interrupt
to all cpus.

--
			Gleb.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [patch 4/8] KVM: x86: replace hrtimer based timer emulation
  2009-07-08 16:24     ` Marcelo Tosatti
@ 2009-07-08 16:36       ` Avi Kivity
  0 siblings, 0 replies; 23+ messages in thread
From: Avi Kivity @ 2009-07-08 16:36 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: kvm

On 07/08/2009 07:24 PM, Marcelo Tosatti wrote:
>> I wonder if we're really winning with this.  Guests should be
>> scheduled-out-but-not-halted rarely, and in all other cases we need to
>> keep the timer.  A timer comparison on each guest entry might be
>> expensive (maybe not so much with tsc based timers).
>>      
>
> Any activity outside of guest mode that takes more than the period of
> the timer (think 1000Hz) causes unnecessary host load.
>
> Booting a RHEL5 UP without VNC or serial output on an idle host:
>
> timer_int_normal=95416 timer_interrupt_accumulated=873
>
> (and it continues to increase in that rate, roughly 1%).
>
> Now factor in multiple guests, loaded host, and you'll probably see more
> than that.
>    

If you're using qcow, you may be seeing the non-aio accesses.  
Otherwise, I can't think what can cause 1ms latency.  Random host 
process?  long mmu resync?

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [patch 4/8] KVM: x86: replace hrtimer based timer emulation
  2009-07-08 16:29       ` Gleb Natapov
@ 2009-07-08 16:37         ` Marcelo Tosatti
  2009-07-08 16:40           ` Gleb Natapov
  0 siblings, 1 reply; 23+ messages in thread
From: Marcelo Tosatti @ 2009-07-08 16:37 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm

On Wed, Jul 08, 2009 at 07:29:58PM +0300, Gleb Natapov wrote:
> On Wed, Jul 08, 2009 at 10:17:21AM -0300, Marcelo Tosatti wrote:
> > (but yes, bypassing the irq injection system its not a very beatiful
> > shortcut, but its done in other places too eg i8254.c NMI injection via
> > all cpus LINT0).
> > 
> I've looked at this. Why do you say i8254.c NMI injection bypass the irq
> injection system? It look to me like it uses usual way to send interrupt
> to all cpus.

It goes through apic_accept_irq so its somewhat fine. But it accesses
the lapics of other vcpus locklessly (which does not happen via
kvm_set_irq path due to irq_lock protection).


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [patch 4/8] KVM: x86: replace hrtimer based timer emulation
  2009-07-08 16:37         ` Marcelo Tosatti
@ 2009-07-08 16:40           ` Gleb Natapov
  2009-07-08 16:47             ` Marcelo Tosatti
  0 siblings, 1 reply; 23+ messages in thread
From: Gleb Natapov @ 2009-07-08 16:40 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: kvm

On Wed, Jul 08, 2009 at 01:37:39PM -0300, Marcelo Tosatti wrote:
> On Wed, Jul 08, 2009 at 07:29:58PM +0300, Gleb Natapov wrote:
> > On Wed, Jul 08, 2009 at 10:17:21AM -0300, Marcelo Tosatti wrote:
> > > (but yes, bypassing the irq injection system its not a very beatiful
> > > shortcut, but its done in other places too eg i8254.c NMI injection via
> > > all cpus LINT0).
> > > 
> > I've looked at this. Why do you say i8254.c NMI injection bypass the irq
> > injection system? It look to me like it uses usual way to send interrupt
> > to all cpus.
> 
> It goes through apic_accept_irq so its somewhat fine. But it accesses
> the lapics of other vcpus locklessly (which does not happen via
> kvm_set_irq path due to irq_lock protection).
Ah, that the bug then. But otherwise it goes through usual interrupt
logic. Do you know why the lock is missing? Due to potential deadlock or
we just forget to lock?

--
			Gleb.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [patch 4/8] KVM: x86: replace hrtimer based timer emulation
  2009-07-08 16:40           ` Gleb Natapov
@ 2009-07-08 16:47             ` Marcelo Tosatti
  0 siblings, 0 replies; 23+ messages in thread
From: Marcelo Tosatti @ 2009-07-08 16:47 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm

On Wed, Jul 08, 2009 at 07:40:57PM +0300, Gleb Natapov wrote:
> On Wed, Jul 08, 2009 at 01:37:39PM -0300, Marcelo Tosatti wrote:
> > On Wed, Jul 08, 2009 at 07:29:58PM +0300, Gleb Natapov wrote:
> > > On Wed, Jul 08, 2009 at 10:17:21AM -0300, Marcelo Tosatti wrote:
> > > > (but yes, bypassing the irq injection system its not a very beatiful
> > > > shortcut, but its done in other places too eg i8254.c NMI injection via
> > > > all cpus LINT0).
> > > > 
> > > I've looked at this. Why do you say i8254.c NMI injection bypass the irq
> > > injection system? It look to me like it uses usual way to send interrupt
> > > to all cpus.
> > 
> > It goes through apic_accept_irq so its somewhat fine. But it accesses
> > the lapics of other vcpus locklessly (which does not happen via
> > kvm_set_irq path due to irq_lock protection).
> Ah, that the bug then. But otherwise it goes through usual interrupt
> logic. Do you know why the lock is missing? Due to potential deadlock or
> we just forget to lock?

There is no lock to protect the lapics right, normally isr setting is
lockless which is fine, but kvm_apic_local_deliver also reads a
register. Hum, should be fine though.


^ permalink raw reply	[flat|nested] 23+ messages in thread

end of thread, other threads:[~2009-07-08 16:47 UTC | newest]

Thread overview: 23+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-07-06  1:55 [patch 0/8] RFC: in-kernel timer emulation changes Marcelo Tosatti
2009-07-06  1:55 ` [patch 1/8] KVM: timer interface unification Marcelo Tosatti
2009-07-08 13:33   ` Avi Kivity
2009-07-06  1:55 ` [patch 2/8] KVM: move lapic timer ack to EOI handler Marcelo Tosatti
2009-07-06  1:55 ` [patch 3/8] KVM: x86: per-vcpu timer list Marcelo Tosatti
2009-07-06  1:55 ` [patch 4/8] KVM: x86: replace hrtimer based timer emulation Marcelo Tosatti
2009-07-08 12:58   ` Gleb Natapov
2009-07-08 13:17     ` Marcelo Tosatti
2009-07-08 13:39       ` Gleb Natapov
2009-07-08 15:42         ` Marcelo Tosatti
2009-07-08 16:13           ` Gleb Natapov
2009-07-08 16:29       ` Gleb Natapov
2009-07-08 16:37         ` Marcelo Tosatti
2009-07-08 16:40           ` Gleb Natapov
2009-07-08 16:47             ` Marcelo Tosatti
2009-07-08 13:41   ` Avi Kivity
2009-07-08 16:24     ` Marcelo Tosatti
2009-07-08 16:36       ` Avi Kivity
2009-07-06  1:55 ` [patch 5/8] KVM: timer: honor noreinject tunable Marcelo Tosatti
2009-07-06  1:55 ` [patch 6/8] KVM: timer: optimize next_timer_event and vcpu_arm_exit Marcelo Tosatti
2009-07-06  1:55 ` [patch 7/8] KVM: PIT: removed unused code Marcelo Tosatti
2009-07-06  1:55 ` [patch 8/8] kvmctl: time testcase Marcelo Tosatti
2009-07-08 13:43 ` [patch 0/8] RFC: in-kernel timer emulation changes Avi Kivity

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).