From mboxrd@z Thu Jan 1 00:00:00 1970 From: Paolo Bonzini Subject: Re: [PATCH] kvm: ioapic: conditionally delay irq delivery duringeoi broadcast Date: Fri, 12 Sep 2014 18:01:23 +0200 Message-ID: <541318D3.8010605@redhat.com> References: <201409111647023813822@sangfor.com> Mime-Version: 1.0 Content-Type: text/plain; charset=windows-1252 Content-Transfer-Encoding: 7bit Cc: qemu-devel To: Zhang Haoyu , kvm , Jason Wang , Jan Kiszka , "Michael S.Tsirkin" Return-path: Received: from mx1.redhat.com ([209.132.183.28]:11300 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751822AbaILQBi (ORCPT ); Fri, 12 Sep 2014 12:01:38 -0400 In-Reply-To: <201409111647023813822@sangfor.com> Sender: kvm-owner@vger.kernel.org List-ID: Il 11/09/2014 10:47, Zhang Haoyu ha scritto: > Currently, we call ioapic_service() immediately when we find the irq is still > active during eoi broadcast. But for real hardware, there's some dealy between > the EOI writing and irq delivery (system bus latency?). So we need to emulate > this behavior. Otherwise, for a guest who haven't register a proper irq handler > , it would stay in the interrupt routine as this irq would be re-injected > immediately after guest enables interrupt. This would lead guest can't move > forward and may miss the possibility to get proper irq handler registered (one > example is windows guest resuming from hibernation). > > As there's no way to differ the unhandled irq from new raised ones, this patch > solve this problems by scheduling a delayed work when the count of irq injected > during eoi broadcast exceeds a threshold value. After this patch, the guest can > move a little forward when there's no suitable irq handler in case it may > register one very soon and for guest who has a bad irq detection routine ( such > as note_interrupt() in linux ), this bad irq would be recognized soon as in the > past. > > Cc: Michael S. Tsirkin > Signed-off-by: Jason Wang > Signed-off-by: Zhang Haoyu > --- > include/trace/events/kvm.h | 20 +++++++++++++++++++ > virt/kvm/ioapic.c | 50 ++++++++++++++++++++++++++++++++++++++++++++-- > virt/kvm/ioapic.h | 6 ++++++ > 3 files changed, 74 insertions(+), 2 deletions(-) > > diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h > index 908925a..ab679c3 100644 > --- a/include/trace/events/kvm.h > +++ b/include/trace/events/kvm.h > @@ -95,6 +95,26 @@ TRACE_EVENT(kvm_ioapic_set_irq, > __entry->coalesced ? " (coalesced)" : "") > ); > > +TRACE_EVENT(kvm_ioapic_delayed_eoi_inj, > + TP_PROTO(__u64 e), > + TP_ARGS(e), > + > + TP_STRUCT__entry( > + __field( __u64, e ) > + ), > + > + TP_fast_assign( > + __entry->e = e; > + ), > + > + TP_printk("dst %x vec=%u (%s|%s|%s%s)", > + (u8)(__entry->e >> 56), (u8)__entry->e, > + __print_symbolic((__entry->e >> 8 & 0x7), kvm_deliver_mode), > + (__entry->e & (1<<11)) ? "logical" : "physical", > + (__entry->e & (1<<15)) ? "level" : "edge", > + (__entry->e & (1<<16)) ? "|masked" : "") > +); > + > TRACE_EVENT(kvm_msi_set_irq, > TP_PROTO(__u64 address, __u64 data), > TP_ARGS(address, data), > diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c > index e8ce34c..8e1dc67 100644 > --- a/virt/kvm/ioapic.c > +++ b/virt/kvm/ioapic.c > @@ -405,6 +405,24 @@ void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id) > spin_unlock(&ioapic->lock); > } > > +static void kvm_ioapic_eoi_inject_work(struct work_struct *work) > +{ > + int i; > + struct kvm_ioapic *ioapic = container_of(work, struct kvm_ioapic, > + eoi_inject.work); > + spin_lock(&ioapic->lock); > + for (i = 0; i < IOAPIC_NUM_PINS; i++) { > + union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i]; > + > + if (ent->fields.trig_mode != IOAPIC_LEVEL_TRIG) > + continue; > + > + if (ioapic->irr & (1 << i) && !ent->fields.remote_irr) > + ioapic_service(ioapic, i, false); > + } > + spin_unlock(&ioapic->lock); > +} > + > static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, > struct kvm_ioapic *ioapic, int vector, int trigger_mode) > { > @@ -435,8 +453,32 @@ static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, > > ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG); > ent->fields.remote_irr = 0; > - if (ioapic->irr & (1 << i)) > - ioapic_service(ioapic, i, false); > + if (!ent->fields.mask && (ioapic->irr & (1 << i))) { > + ++ioapic->irq_eoi[i]; > + if (ioapic->irq_eoi[i] == IOAPIC_SUCCESSIVE_IRQ_MAX_COUNT) { > + /* > + * Real hardware does not deliver the irq so > + * immediately during eoi broadcast, so we need > + * to emulate this behavior. Otherwise, for > + * guests who has not registered handler of a > + * level irq, this irq would be injected > + * immediately after guest enables interrupt > + * (which happens usually at the end of the > + * common interrupt routine). This would lead > + * guest can't move forward and may miss the > + * possibility to get proper irq handler > + * registered. So we need to give some breath to > + * guest. > + */ > + schedule_delayed_work(&ioapic->eoi_inject, 1); > + ioapic->irq_eoi[i] = 0; > + trace_kvm_ioapic_delayed_eoi_inj(ent->bits); > + } else { > + ioapic_service(ioapic, i, false); > + } > + } else { > + ioapic->irq_eoi[i] = 0; > + } > } > } > > @@ -565,12 +607,14 @@ static void kvm_ioapic_reset(struct kvm_ioapic *ioapic) > { > int i; > > + cancel_delayed_work_sync(&ioapic->eoi_inject); > for (i = 0; i < IOAPIC_NUM_PINS; i++) > ioapic->redirtbl[i].fields.mask = 1; > ioapic->base_address = IOAPIC_DEFAULT_BASE_ADDRESS; > ioapic->ioregsel = 0; > ioapic->irr = 0; > ioapic->id = 0; > + memset(ioapic->irq_eoi, 0x00, IOAPIC_NUM_PINS); > rtc_irq_eoi_tracking_reset(ioapic); > update_handled_vectors(ioapic); > } > @@ -589,6 +633,7 @@ int kvm_ioapic_init(struct kvm *kvm) > if (!ioapic) > return -ENOMEM; > spin_lock_init(&ioapic->lock); > + INIT_DELAYED_WORK(&ioapic->eoi_inject, kvm_ioapic_eoi_inject_work); > kvm->arch.vioapic = ioapic; > kvm_ioapic_reset(ioapic); > kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops); > @@ -609,6 +654,7 @@ void kvm_ioapic_destroy(struct kvm *kvm) > { > struct kvm_ioapic *ioapic = kvm->arch.vioapic; > > + cancel_delayed_work_sync(&ioapic->eoi_inject); > if (ioapic) { > kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &ioapic->dev); > kvm->arch.vioapic = NULL; > diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h > index 90d43e9..260ac0a 100644 > --- a/virt/kvm/ioapic.h > +++ b/virt/kvm/ioapic.h > @@ -34,6 +34,10 @@ struct kvm_vcpu; > #define IOAPIC_INIT 0x5 > #define IOAPIC_EXTINT 0x7 > > +/* max successive count of the same irq during ioapic eoi broadcast, if this limitation reached, > + which can be considered as interrupt storm happened */ > +#define IOAPIC_SUCCESSIVE_IRQ_MAX_COUNT 10000 > + > #ifdef CONFIG_X86 > #define RTC_GSI 8 > #else > @@ -59,6 +63,8 @@ struct kvm_ioapic { > spinlock_t lock; > DECLARE_BITMAP(handled_vectors, 256); > struct rtc_status rtc_status; > + struct delayed_work eoi_inject; > + u32 irq_eoi[IOAPIC_NUM_PINS]; > }; > > #ifdef DEBUG > Thanks, I'll apply the patch for 3.18. I've edited a bit the comments for English. Paolo From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:40177) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1XSTI0-0007WM-B1 for qemu-devel@nongnu.org; Fri, 12 Sep 2014 12:01:41 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1XSTHv-0002MC-5L for qemu-devel@nongnu.org; Fri, 12 Sep 2014 12:01:36 -0400 Received: from mx1.redhat.com ([209.132.183.28]:49950) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1XSTHu-0002Lo-L6 for qemu-devel@nongnu.org; Fri, 12 Sep 2014 12:01:31 -0400 Message-ID: <541318D3.8010605@redhat.com> Date: Fri, 12 Sep 2014 18:01:23 +0200 From: Paolo Bonzini MIME-Version: 1.0 References: <201409111647023813822@sangfor.com> In-Reply-To: <201409111647023813822@sangfor.com> Content-Type: text/plain; charset=windows-1252 Content-Transfer-Encoding: 7bit Subject: Re: [Qemu-devel] [PATCH] kvm: ioapic: conditionally delay irq delivery duringeoi broadcast List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Zhang Haoyu , kvm , Jason Wang , Jan Kiszka , "Michael S.Tsirkin" Cc: qemu-devel Il 11/09/2014 10:47, Zhang Haoyu ha scritto: > Currently, we call ioapic_service() immediately when we find the irq is still > active during eoi broadcast. But for real hardware, there's some dealy between > the EOI writing and irq delivery (system bus latency?). So we need to emulate > this behavior. Otherwise, for a guest who haven't register a proper irq handler > , it would stay in the interrupt routine as this irq would be re-injected > immediately after guest enables interrupt. This would lead guest can't move > forward and may miss the possibility to get proper irq handler registered (one > example is windows guest resuming from hibernation). > > As there's no way to differ the unhandled irq from new raised ones, this patch > solve this problems by scheduling a delayed work when the count of irq injected > during eoi broadcast exceeds a threshold value. After this patch, the guest can > move a little forward when there's no suitable irq handler in case it may > register one very soon and for guest who has a bad irq detection routine ( such > as note_interrupt() in linux ), this bad irq would be recognized soon as in the > past. > > Cc: Michael S. Tsirkin > Signed-off-by: Jason Wang > Signed-off-by: Zhang Haoyu > --- > include/trace/events/kvm.h | 20 +++++++++++++++++++ > virt/kvm/ioapic.c | 50 ++++++++++++++++++++++++++++++++++++++++++++-- > virt/kvm/ioapic.h | 6 ++++++ > 3 files changed, 74 insertions(+), 2 deletions(-) > > diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h > index 908925a..ab679c3 100644 > --- a/include/trace/events/kvm.h > +++ b/include/trace/events/kvm.h > @@ -95,6 +95,26 @@ TRACE_EVENT(kvm_ioapic_set_irq, > __entry->coalesced ? " (coalesced)" : "") > ); > > +TRACE_EVENT(kvm_ioapic_delayed_eoi_inj, > + TP_PROTO(__u64 e), > + TP_ARGS(e), > + > + TP_STRUCT__entry( > + __field( __u64, e ) > + ), > + > + TP_fast_assign( > + __entry->e = e; > + ), > + > + TP_printk("dst %x vec=%u (%s|%s|%s%s)", > + (u8)(__entry->e >> 56), (u8)__entry->e, > + __print_symbolic((__entry->e >> 8 & 0x7), kvm_deliver_mode), > + (__entry->e & (1<<11)) ? "logical" : "physical", > + (__entry->e & (1<<15)) ? "level" : "edge", > + (__entry->e & (1<<16)) ? "|masked" : "") > +); > + > TRACE_EVENT(kvm_msi_set_irq, > TP_PROTO(__u64 address, __u64 data), > TP_ARGS(address, data), > diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c > index e8ce34c..8e1dc67 100644 > --- a/virt/kvm/ioapic.c > +++ b/virt/kvm/ioapic.c > @@ -405,6 +405,24 @@ void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id) > spin_unlock(&ioapic->lock); > } > > +static void kvm_ioapic_eoi_inject_work(struct work_struct *work) > +{ > + int i; > + struct kvm_ioapic *ioapic = container_of(work, struct kvm_ioapic, > + eoi_inject.work); > + spin_lock(&ioapic->lock); > + for (i = 0; i < IOAPIC_NUM_PINS; i++) { > + union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i]; > + > + if (ent->fields.trig_mode != IOAPIC_LEVEL_TRIG) > + continue; > + > + if (ioapic->irr & (1 << i) && !ent->fields.remote_irr) > + ioapic_service(ioapic, i, false); > + } > + spin_unlock(&ioapic->lock); > +} > + > static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, > struct kvm_ioapic *ioapic, int vector, int trigger_mode) > { > @@ -435,8 +453,32 @@ static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, > > ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG); > ent->fields.remote_irr = 0; > - if (ioapic->irr & (1 << i)) > - ioapic_service(ioapic, i, false); > + if (!ent->fields.mask && (ioapic->irr & (1 << i))) { > + ++ioapic->irq_eoi[i]; > + if (ioapic->irq_eoi[i] == IOAPIC_SUCCESSIVE_IRQ_MAX_COUNT) { > + /* > + * Real hardware does not deliver the irq so > + * immediately during eoi broadcast, so we need > + * to emulate this behavior. Otherwise, for > + * guests who has not registered handler of a > + * level irq, this irq would be injected > + * immediately after guest enables interrupt > + * (which happens usually at the end of the > + * common interrupt routine). This would lead > + * guest can't move forward and may miss the > + * possibility to get proper irq handler > + * registered. So we need to give some breath to > + * guest. > + */ > + schedule_delayed_work(&ioapic->eoi_inject, 1); > + ioapic->irq_eoi[i] = 0; > + trace_kvm_ioapic_delayed_eoi_inj(ent->bits); > + } else { > + ioapic_service(ioapic, i, false); > + } > + } else { > + ioapic->irq_eoi[i] = 0; > + } > } > } > > @@ -565,12 +607,14 @@ static void kvm_ioapic_reset(struct kvm_ioapic *ioapic) > { > int i; > > + cancel_delayed_work_sync(&ioapic->eoi_inject); > for (i = 0; i < IOAPIC_NUM_PINS; i++) > ioapic->redirtbl[i].fields.mask = 1; > ioapic->base_address = IOAPIC_DEFAULT_BASE_ADDRESS; > ioapic->ioregsel = 0; > ioapic->irr = 0; > ioapic->id = 0; > + memset(ioapic->irq_eoi, 0x00, IOAPIC_NUM_PINS); > rtc_irq_eoi_tracking_reset(ioapic); > update_handled_vectors(ioapic); > } > @@ -589,6 +633,7 @@ int kvm_ioapic_init(struct kvm *kvm) > if (!ioapic) > return -ENOMEM; > spin_lock_init(&ioapic->lock); > + INIT_DELAYED_WORK(&ioapic->eoi_inject, kvm_ioapic_eoi_inject_work); > kvm->arch.vioapic = ioapic; > kvm_ioapic_reset(ioapic); > kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops); > @@ -609,6 +654,7 @@ void kvm_ioapic_destroy(struct kvm *kvm) > { > struct kvm_ioapic *ioapic = kvm->arch.vioapic; > > + cancel_delayed_work_sync(&ioapic->eoi_inject); > if (ioapic) { > kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &ioapic->dev); > kvm->arch.vioapic = NULL; > diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h > index 90d43e9..260ac0a 100644 > --- a/virt/kvm/ioapic.h > +++ b/virt/kvm/ioapic.h > @@ -34,6 +34,10 @@ struct kvm_vcpu; > #define IOAPIC_INIT 0x5 > #define IOAPIC_EXTINT 0x7 > > +/* max successive count of the same irq during ioapic eoi broadcast, if this limitation reached, > + which can be considered as interrupt storm happened */ > +#define IOAPIC_SUCCESSIVE_IRQ_MAX_COUNT 10000 > + > #ifdef CONFIG_X86 > #define RTC_GSI 8 > #else > @@ -59,6 +63,8 @@ struct kvm_ioapic { > spinlock_t lock; > DECLARE_BITMAP(handled_vectors, 256); > struct rtc_status rtc_status; > + struct delayed_work eoi_inject; > + u32 irq_eoi[IOAPIC_NUM_PINS]; > }; > > #ifdef DEBUG > Thanks, I'll apply the patch for 3.18. I've edited a bit the comments for English. Paolo