From mboxrd@z Thu Jan 1 00:00:00 1970 From: "Zhang Haoyu" Subject: [PATCH] kvm: ioapic: conditionally delay irq delivery duringeoi broadcast Date: Thu, 11 Sep 2014 16:47:04 +0800 Message-ID: <201409111647023813822@sangfor.com> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Cc: "qemu-devel" , "Paolo Bonzini" To: "kvm" , "Jason Wang" , "Jan Kiszka" , "Michael S.Tsirkin" Return-path: Received: from smtp.sanfor.com ([58.251.49.30]:32960 "EHLO mail.sangfor.com" rhost-flags-OK-FAIL-OK-OK) by vger.kernel.org with ESMTP id S1752838AbaIKIyq (ORCPT ); Thu, 11 Sep 2014 04:54:46 -0400 Sender: kvm-owner@vger.kernel.org List-ID: Currently, we call ioapic_service() immediately when we find the irq is still active during eoi broadcast. But for real hardware, there's some dealy between the EOI writing and irq delivery (system bus latency?). So we need to emulate this behavior. Otherwise, for a guest who haven't register a proper irq handler , it would stay in the interrupt routine as this irq would be re-injected immediately after guest enables interrupt. This would lead guest can't move forward and may miss the possibility to get proper irq handler registered (one example is windows guest resuming from hibernation). As there's no way to differ the unhandled irq from new raised ones, this patch solve this problems by scheduling a delayed work when the count of irq injected during eoi broadcast exceeds a threshold value. After this patch, the guest can move a little forward when there's no suitable irq handler in case it may register one very soon and for guest who has a bad irq detection routine ( such as note_interrupt() in linux ), this bad irq would be recognized soon as in the past. Cc: Michael S. Tsirkin Signed-off-by: Jason Wang Signed-off-by: Zhang Haoyu --- include/trace/events/kvm.h | 20 +++++++++++++++++++ virt/kvm/ioapic.c | 50 ++++++++++++++++++++++++++++++++++++++++++++-- virt/kvm/ioapic.h | 6 ++++++ 3 files changed, 74 insertions(+), 2 deletions(-) diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index 908925a..ab679c3 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h @@ -95,6 +95,26 @@ TRACE_EVENT(kvm_ioapic_set_irq, __entry->coalesced ? " (coalesced)" : "") ); +TRACE_EVENT(kvm_ioapic_delayed_eoi_inj, + TP_PROTO(__u64 e), + TP_ARGS(e), + + TP_STRUCT__entry( + __field( __u64, e ) + ), + + TP_fast_assign( + __entry->e = e; + ), + + TP_printk("dst %x vec=%u (%s|%s|%s%s)", + (u8)(__entry->e >> 56), (u8)__entry->e, + __print_symbolic((__entry->e >> 8 & 0x7), kvm_deliver_mode), + (__entry->e & (1<<11)) ? "logical" : "physical", + (__entry->e & (1<<15)) ? "level" : "edge", + (__entry->e & (1<<16)) ? "|masked" : "") +); + TRACE_EVENT(kvm_msi_set_irq, TP_PROTO(__u64 address, __u64 data), TP_ARGS(address, data), diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index e8ce34c..8e1dc67 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -405,6 +405,24 @@ void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id) spin_unlock(&ioapic->lock); } +static void kvm_ioapic_eoi_inject_work(struct work_struct *work) +{ + int i; + struct kvm_ioapic *ioapic = container_of(work, struct kvm_ioapic, + eoi_inject.work); + spin_lock(&ioapic->lock); + for (i = 0; i < IOAPIC_NUM_PINS; i++) { + union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i]; + + if (ent->fields.trig_mode != IOAPIC_LEVEL_TRIG) + continue; + + if (ioapic->irr & (1 << i) && !ent->fields.remote_irr) + ioapic_service(ioapic, i, false); + } + spin_unlock(&ioapic->lock); +} + static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, struct kvm_ioapic *ioapic, int vector, int trigger_mode) { @@ -435,8 +453,32 @@ static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG); ent->fields.remote_irr = 0; - if (ioapic->irr & (1 << i)) - ioapic_service(ioapic, i, false); + if (!ent->fields.mask && (ioapic->irr & (1 << i))) { + ++ioapic->irq_eoi[i]; + if (ioapic->irq_eoi[i] == IOAPIC_SUCCESSIVE_IRQ_MAX_COUNT) { + /* + * Real hardware does not deliver the irq so + * immediately during eoi broadcast, so we need + * to emulate this behavior. Otherwise, for + * guests who has not registered handler of a + * level irq, this irq would be injected + * immediately after guest enables interrupt + * (which happens usually at the end of the + * common interrupt routine). This would lead + * guest can't move forward and may miss the + * possibility to get proper irq handler + * registered. So we need to give some breath to + * guest. + */ + schedule_delayed_work(&ioapic->eoi_inject, 1); + ioapic->irq_eoi[i] = 0; + trace_kvm_ioapic_delayed_eoi_inj(ent->bits); + } else { + ioapic_service(ioapic, i, false); + } + } else { + ioapic->irq_eoi[i] = 0; + } } } @@ -565,12 +607,14 @@ static void kvm_ioapic_reset(struct kvm_ioapic *ioapic) { int i; + cancel_delayed_work_sync(&ioapic->eoi_inject); for (i = 0; i < IOAPIC_NUM_PINS; i++) ioapic->redirtbl[i].fields.mask = 1; ioapic->base_address = IOAPIC_DEFAULT_BASE_ADDRESS; ioapic->ioregsel = 0; ioapic->irr = 0; ioapic->id = 0; + memset(ioapic->irq_eoi, 0x00, IOAPIC_NUM_PINS); rtc_irq_eoi_tracking_reset(ioapic); update_handled_vectors(ioapic); } @@ -589,6 +633,7 @@ int kvm_ioapic_init(struct kvm *kvm) if (!ioapic) return -ENOMEM; spin_lock_init(&ioapic->lock); + INIT_DELAYED_WORK(&ioapic->eoi_inject, kvm_ioapic_eoi_inject_work); kvm->arch.vioapic = ioapic; kvm_ioapic_reset(ioapic); kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops); @@ -609,6 +654,7 @@ void kvm_ioapic_destroy(struct kvm *kvm) { struct kvm_ioapic *ioapic = kvm->arch.vioapic; + cancel_delayed_work_sync(&ioapic->eoi_inject); if (ioapic) { kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &ioapic->dev); kvm->arch.vioapic = NULL; diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h index 90d43e9..260ac0a 100644 --- a/virt/kvm/ioapic.h +++ b/virt/kvm/ioapic.h @@ -34,6 +34,10 @@ struct kvm_vcpu; #define IOAPIC_INIT 0x5 #define IOAPIC_EXTINT 0x7 +/* max successive count of the same irq during ioapic eoi broadcast, if this limitation reached, + which can be considered as interrupt storm happened */ +#define IOAPIC_SUCCESSIVE_IRQ_MAX_COUNT 10000 + #ifdef CONFIG_X86 #define RTC_GSI 8 #else @@ -59,6 +63,8 @@ struct kvm_ioapic { spinlock_t lock; DECLARE_BITMAP(handled_vectors, 256); struct rtc_status rtc_status; + struct delayed_work eoi_inject; + u32 irq_eoi[IOAPIC_NUM_PINS]; }; #ifdef DEBUG -- 1.7.12.4 From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:39144) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1XS09Y-0007Iv-QL for qemu-devel@nongnu.org; Thu, 11 Sep 2014 04:55:01 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1XS09T-0008Io-RD for qemu-devel@nongnu.org; Thu, 11 Sep 2014 04:54:56 -0400 Received: from [58.251.49.30] (port=56238 helo=mail.sangfor.com) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1XS09T-0008IA-8J for qemu-devel@nongnu.org; Thu, 11 Sep 2014 04:54:51 -0400 Date: Thu, 11 Sep 2014 16:47:04 +0800 From: "Zhang Haoyu" Message-ID: <201409111647023813822@sangfor.com> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Subject: [Qemu-devel] [PATCH] kvm: ioapic: conditionally delay irq delivery duringeoi broadcast List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: kvm , Jason Wang , Jan Kiszka , "Michael S.Tsirkin" Cc: Paolo Bonzini , qemu-devel Currently, we call ioapic_service() immediately when we find the irq is still active during eoi broadcast. But for real hardware, there's some dealy between the EOI writing and irq delivery (system bus latency?). So we need to emulate this behavior. Otherwise, for a guest who haven't register a proper irq handler , it would stay in the interrupt routine as this irq would be re-injected immediately after guest enables interrupt. This would lead guest can't move forward and may miss the possibility to get proper irq handler registered (one example is windows guest resuming from hibernation). As there's no way to differ the unhandled irq from new raised ones, this patch solve this problems by scheduling a delayed work when the count of irq injected during eoi broadcast exceeds a threshold value. After this patch, the guest can move a little forward when there's no suitable irq handler in case it may register one very soon and for guest who has a bad irq detection routine ( such as note_interrupt() in linux ), this bad irq would be recognized soon as in the past. Cc: Michael S. Tsirkin Signed-off-by: Jason Wang Signed-off-by: Zhang Haoyu --- include/trace/events/kvm.h | 20 +++++++++++++++++++ virt/kvm/ioapic.c | 50 ++++++++++++++++++++++++++++++++++++++++++++-- virt/kvm/ioapic.h | 6 ++++++ 3 files changed, 74 insertions(+), 2 deletions(-) diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index 908925a..ab679c3 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h @@ -95,6 +95,26 @@ TRACE_EVENT(kvm_ioapic_set_irq, __entry->coalesced ? " (coalesced)" : "") ); +TRACE_EVENT(kvm_ioapic_delayed_eoi_inj, + TP_PROTO(__u64 e), + TP_ARGS(e), + + TP_STRUCT__entry( + __field( __u64, e ) + ), + + TP_fast_assign( + __entry->e = e; + ), + + TP_printk("dst %x vec=%u (%s|%s|%s%s)", + (u8)(__entry->e >> 56), (u8)__entry->e, + __print_symbolic((__entry->e >> 8 & 0x7), kvm_deliver_mode), + (__entry->e & (1<<11)) ? "logical" : "physical", + (__entry->e & (1<<15)) ? "level" : "edge", + (__entry->e & (1<<16)) ? "|masked" : "") +); + TRACE_EVENT(kvm_msi_set_irq, TP_PROTO(__u64 address, __u64 data), TP_ARGS(address, data), diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index e8ce34c..8e1dc67 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -405,6 +405,24 @@ void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id) spin_unlock(&ioapic->lock); } +static void kvm_ioapic_eoi_inject_work(struct work_struct *work) +{ + int i; + struct kvm_ioapic *ioapic = container_of(work, struct kvm_ioapic, + eoi_inject.work); + spin_lock(&ioapic->lock); + for (i = 0; i < IOAPIC_NUM_PINS; i++) { + union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i]; + + if (ent->fields.trig_mode != IOAPIC_LEVEL_TRIG) + continue; + + if (ioapic->irr & (1 << i) && !ent->fields.remote_irr) + ioapic_service(ioapic, i, false); + } + spin_unlock(&ioapic->lock); +} + static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, struct kvm_ioapic *ioapic, int vector, int trigger_mode) { @@ -435,8 +453,32 @@ static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG); ent->fields.remote_irr = 0; - if (ioapic->irr & (1 << i)) - ioapic_service(ioapic, i, false); + if (!ent->fields.mask && (ioapic->irr & (1 << i))) { + ++ioapic->irq_eoi[i]; + if (ioapic->irq_eoi[i] == IOAPIC_SUCCESSIVE_IRQ_MAX_COUNT) { + /* + * Real hardware does not deliver the irq so + * immediately during eoi broadcast, so we need + * to emulate this behavior. Otherwise, for + * guests who has not registered handler of a + * level irq, this irq would be injected + * immediately after guest enables interrupt + * (which happens usually at the end of the + * common interrupt routine). This would lead + * guest can't move forward and may miss the + * possibility to get proper irq handler + * registered. So we need to give some breath to + * guest. + */ + schedule_delayed_work(&ioapic->eoi_inject, 1); + ioapic->irq_eoi[i] = 0; + trace_kvm_ioapic_delayed_eoi_inj(ent->bits); + } else { + ioapic_service(ioapic, i, false); + } + } else { + ioapic->irq_eoi[i] = 0; + } } } @@ -565,12 +607,14 @@ static void kvm_ioapic_reset(struct kvm_ioapic *ioapic) { int i; + cancel_delayed_work_sync(&ioapic->eoi_inject); for (i = 0; i < IOAPIC_NUM_PINS; i++) ioapic->redirtbl[i].fields.mask = 1; ioapic->base_address = IOAPIC_DEFAULT_BASE_ADDRESS; ioapic->ioregsel = 0; ioapic->irr = 0; ioapic->id = 0; + memset(ioapic->irq_eoi, 0x00, IOAPIC_NUM_PINS); rtc_irq_eoi_tracking_reset(ioapic); update_handled_vectors(ioapic); } @@ -589,6 +633,7 @@ int kvm_ioapic_init(struct kvm *kvm) if (!ioapic) return -ENOMEM; spin_lock_init(&ioapic->lock); + INIT_DELAYED_WORK(&ioapic->eoi_inject, kvm_ioapic_eoi_inject_work); kvm->arch.vioapic = ioapic; kvm_ioapic_reset(ioapic); kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops); @@ -609,6 +654,7 @@ void kvm_ioapic_destroy(struct kvm *kvm) { struct kvm_ioapic *ioapic = kvm->arch.vioapic; + cancel_delayed_work_sync(&ioapic->eoi_inject); if (ioapic) { kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &ioapic->dev); kvm->arch.vioapic = NULL; diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h index 90d43e9..260ac0a 100644 --- a/virt/kvm/ioapic.h +++ b/virt/kvm/ioapic.h @@ -34,6 +34,10 @@ struct kvm_vcpu; #define IOAPIC_INIT 0x5 #define IOAPIC_EXTINT 0x7 +/* max successive count of the same irq during ioapic eoi broadcast, if this limitation reached, + which can be considered as interrupt storm happened */ +#define IOAPIC_SUCCESSIVE_IRQ_MAX_COUNT 10000 + #ifdef CONFIG_X86 #define RTC_GSI 8 #else @@ -59,6 +63,8 @@ struct kvm_ioapic { spinlock_t lock; DECLARE_BITMAP(handled_vectors, 256); struct rtc_status rtc_status; + struct delayed_work eoi_inject; + u32 irq_eoi[IOAPIC_NUM_PINS]; }; #ifdef DEBUG -- 1.7.12.4