All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] Remove MSI IRQ storms prevention logic
@ 2011-07-15  2:44 Haitao Shan
  0 siblings, 0 replies; only message in thread
From: Haitao Shan @ 2011-07-15  2:44 UTC (permalink / raw)
  To: Keir Fraser; +Cc: xen-devel

[-- Attachment #1: Type: text/plain, Size: 8510 bytes --]

Hi, Keir,

The attached patch removes the unneeded MSI IRQ prevention logic.

The reason is:
1. The logic has negative impact on 10G NIC performance (assigned to
guest) by lowering the interrupt frequency that Xen can handle.
2. Xen already has IRQ rate limit logic, which can also help to
prevent IRQ storms.

Signed-off-by:          Shan Haitao <haitao.shan@intel.com>

diff -r 00d2c5ca26fd xen/arch/x86/hvm/vlapic.c
--- a/xen/arch/x86/hvm/vlapic.c	Fri Jul 08 18:35:24 2011 +0100
+++ b/xen/arch/x86/hvm/vlapic.c	Thu Jul 14 23:32:38 2011 +0800
@@ -400,8 +400,6 @@ void vlapic_EOI_set(struct vlapic *vlapi

     if ( vlapic_test_and_clear_vector(vector, &vlapic->regs->data[APIC_TMR]) )
         vioapic_update_EOI(vlapic_domain(vlapic), vector);
-
-    hvm_dpci_msi_eoi(current->domain, vector);
 }

 int vlapic_ipi(
diff -r 00d2c5ca26fd xen/arch/x86/irq.c
--- a/xen/arch/x86/irq.c	Fri Jul 08 18:35:24 2011 +0100
+++ b/xen/arch/x86/irq.c	Thu Jul 14 23:32:38 2011 +0800
@@ -790,22 +790,6 @@ static inline void clear_pirq_eoi(struct
         clear_bit(irq, d->arch.pv_domain.pirq_eoi_map);
 }

-static void _irq_guest_eoi(struct irq_desc *desc)
-{
-    irq_guest_action_t *action = (irq_guest_action_t *)desc->action;
-    unsigned int i, irq = desc - irq_desc;
-
-    if ( !(desc->status & IRQ_GUEST_EOI_PENDING) )
-        return;
-
-    for ( i = 0; i < action->nr_guests; ++i )
-        clear_pirq_eoi(action->guest[i],
-                       domain_irq_to_pirq(action->guest[i], irq));
-
-    desc->status &= ~(IRQ_INPROGRESS|IRQ_GUEST_EOI_PENDING);
-    desc->handler->enable(irq);
-}
-
 static void set_eoi_ready(void *data);

 static void irq_guest_eoi_timer_fn(void *data)
@@ -849,9 +833,6 @@ static void irq_guest_eoi_timer_fn(void
         on_selected_cpus(&cpu_eoi_map, set_eoi_ready, desc, 0);
         spin_lock_irq(&desc->lock);
         break;
-    case ACKTYPE_NONE:
-        _irq_guest_eoi(desc);
-        break;
     }

  out:
@@ -863,7 +844,7 @@ static void __do_IRQ_guest(int irq)
     struct irq_desc         *desc = irq_to_desc(irq);
     irq_guest_action_t *action = (irq_guest_action_t *)desc->action;
     struct domain      *d;
-    int                 i, sp, already_pending = 0;
+    int                 i, sp;
     struct pending_eoi *peoi = this_cpu(pending_eoi);
     int vector = get_irq_regs()->entry_vector;

@@ -897,45 +878,16 @@ static void __do_IRQ_guest(int irq)
         if ( (action->ack_type != ACKTYPE_NONE) &&
              !test_and_set_bool(pirq->masked) )
             action->in_flight++;
-        if ( hvm_do_IRQ_dpci(d, pirq) )
-        {
-            if ( action->ack_type == ACKTYPE_NONE )
-            {
-                already_pending += !!(desc->status & IRQ_INPROGRESS);
-                desc->status |= IRQ_INPROGRESS; /* cleared during hvm eoi */
-            }
-        }
-        else if ( send_guest_pirq(d, pirq) &&
-                  (action->ack_type == ACKTYPE_NONE) )
-        {
-            already_pending++;
-        }
+        if ( !hvm_do_IRQ_dpci(d, pirq) )
+            send_guest_pirq(d, pirq);
     }

-    stop_timer(&action->eoi_timer);
-
-    if ( (action->ack_type == ACKTYPE_NONE) &&
-         (already_pending == action->nr_guests) )
+    if ( action->ack_type != ACKTYPE_NONE )
     {
-        desc->handler->disable(irq);
-        desc->status |= IRQ_GUEST_EOI_PENDING;
-        for ( i = 0; i < already_pending; ++i )
-        {
-            d = action->guest[i];
-            set_pirq_eoi(d, domain_irq_to_pirq(d, irq));
-            /*
-             * Could check here whether the guest unmasked the event by now
-             * (or perhaps just re-issue the send_guest_pirq()), and if it
-             * can now accept the event,
-             * - clear all the pirq_eoi bits we already set,
-             * - re-enable the vector, and
-             * - skip the timer setup below.
-             */
-        }
+        stop_timer(&action->eoi_timer);
+        migrate_timer(&action->eoi_timer, smp_processor_id());
+        set_timer(&action->eoi_timer, NOW() + MILLISECS(1));
     }
-
-    migrate_timer(&action->eoi_timer, smp_processor_id());
-    set_timer(&action->eoi_timer, NOW() + MILLISECS(1));
 }

 /*
@@ -1183,13 +1135,6 @@ void desc_guest_eoi(struct irq_desc *des
     action = (irq_guest_action_t *)desc->action;
     irq = desc - irq_desc;

-    if ( action->ack_type == ACKTYPE_NONE )
-    {
-        ASSERT(!pirq->masked);
-        stop_timer(&action->eoi_timer);
-        _irq_guest_eoi(desc);
-    }
-
     if ( unlikely(!test_and_clear_bool(pirq->masked)) ||
          unlikely(--action->in_flight != 0) )
     {
@@ -1468,10 +1413,6 @@ static irq_guest_action_t *__pirq_guest_
             spin_lock_irq(&desc->lock);
         }
         break;
-    case ACKTYPE_NONE:
-        stop_timer(&action->eoi_timer);
-        _irq_guest_eoi(desc);
-        break;
     }

     /*
@@ -1509,7 +1450,7 @@ static irq_guest_action_t *__pirq_guest_
     BUG_ON(!cpus_empty(action->cpu_eoi_map));

     desc->action = NULL;
-    desc->status &= ~(IRQ_GUEST|IRQ_GUEST_EOI_PENDING|IRQ_INPROGRESS);
+    desc->status &= ~(IRQ_GUEST|IRQ_INPROGRESS);
     desc->handler->shutdown(irq);

     /* Caller frees the old guest descriptor block. */
diff -r 00d2c5ca26fd xen/drivers/passthrough/io.c
--- a/xen/drivers/passthrough/io.c	Fri Jul 08 18:35:24 2011 +0100
+++ b/xen/drivers/passthrough/io.c	Thu Jul 14 23:32:38 2011 +0800
@@ -421,58 +421,6 @@ int hvm_do_IRQ_dpci(struct domain *d, st
 }

 #ifdef SUPPORT_MSI_REMAPPING
-/* called with d->event_lock held */
-static void __msi_pirq_eoi(struct hvm_pirq_dpci *pirq_dpci)
-{
-    irq_desc_t *desc;
-
-    if ( (pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) &&
-         (pirq_dpci->flags & HVM_IRQ_DPCI_MACH_MSI) )
-    {
-        struct pirq *pirq = dpci_pirq(pirq_dpci);
-
-         BUG_ON(!local_irq_is_enabled());
-         desc = pirq_spin_lock_irq_desc(pirq, NULL);
-         if ( !desc )
-            return;
-
-         desc->status &= ~IRQ_INPROGRESS;
-         desc_guest_eoi(desc, pirq);
-    }
-}
-
-static int _hvm_dpci_msi_eoi(struct domain *d,
-                             struct hvm_pirq_dpci *pirq_dpci, void *arg)
-{
-    int vector = (long)arg;
-
-    if ( (pirq_dpci->flags & HVM_IRQ_DPCI_MACH_MSI) &&
-         (pirq_dpci->gmsi.gvec == vector) )
-    {
-        int dest = pirq_dpci->gmsi.gflags & VMSI_DEST_ID_MASK;
-        int dest_mode = !!(pirq_dpci->gmsi.gflags & VMSI_DM_MASK);
-
-        if ( vlapic_match_dest(vcpu_vlapic(current), NULL, 0, dest,
-                               dest_mode) )
-        {
-            __msi_pirq_eoi(pirq_dpci);
-            return 1;
-        }
-    }
-
-    return 0;
-}
-
-void hvm_dpci_msi_eoi(struct domain *d, int vector)
-{
-    if ( !iommu_enabled || !d->arch.hvm_domain.irq.dpci )
-       return;
-
-    spin_lock(&d->event_lock);
-    pt_pirq_iterate(d, _hvm_dpci_msi_eoi, (void *)(long)vector);
-    spin_unlock(&d->event_lock);
-}
-
 static int hvm_pci_msi_assert(struct domain *d,
                               struct hvm_pirq_dpci *pirq_dpci)
 {
@@ -510,14 +458,6 @@ static int _hvm_dirq_assist(struct domai
             else
                 hvm_pci_intx_assert(d, device, intx);
             pirq_dpci->pending++;
-
-#ifdef SUPPORT_MSI_REMAPPING
-            if ( pirq_dpci->flags & HVM_IRQ_DPCI_TRANSLATE )
-            {
-                /* for translated MSI to INTx interrupt, eoi as early
as possible */
-                __msi_pirq_eoi(pirq_dpci);
-            }
-#endif
         }

         /*
diff -r 00d2c5ca26fd xen/include/asm-x86/hvm/io.h
--- a/xen/include/asm-x86/hvm/io.h	Fri Jul 08 18:35:24 2011 +0100
+++ b/xen/include/asm-x86/hvm/io.h	Thu Jul 14 23:32:38 2011 +0800
@@ -139,6 +139,5 @@ struct hvm_hw_stdvga {
 void stdvga_init(struct domain *d);
 void stdvga_deinit(struct domain *d);

-extern void hvm_dpci_msi_eoi(struct domain *d, int vector);
 #endif /* __ASM_X86_HVM_IO_H__ */

diff -r 00d2c5ca26fd xen/include/xen/irq.h
--- a/xen/include/xen/irq.h	Fri Jul 08 18:35:24 2011 +0100
+++ b/xen/include/xen/irq.h	Thu Jul 14 23:32:38 2011 +0800
@@ -25,7 +25,6 @@ struct irqaction {
 #define IRQ_PENDING	4	/* IRQ pending - replay on enable */
 #define IRQ_REPLAY	8	/* IRQ has been replayed but not acked yet */
 #define IRQ_GUEST       16      /* IRQ is handled by guest OS(es) */
-#define IRQ_GUEST_EOI_PENDING 32 /* IRQ was disabled, pending a guest EOI */
 #define IRQ_MOVE_PENDING      64  /* IRQ is migrating to another CPUs */
 #define IRQ_PER_CPU     256     /* IRQ is per CPU */

[-- Attachment #2: remove_unneeded_msi_logic.patch --]
[-- Type: application/octet-stream, Size: 8157 bytes --]

diff -r 00d2c5ca26fd xen/arch/x86/hvm/vlapic.c
--- a/xen/arch/x86/hvm/vlapic.c	Fri Jul 08 18:35:24 2011 +0100
+++ b/xen/arch/x86/hvm/vlapic.c	Thu Jul 14 23:32:38 2011 +0800
@@ -400,8 +400,6 @@ void vlapic_EOI_set(struct vlapic *vlapi
 
     if ( vlapic_test_and_clear_vector(vector, &vlapic->regs->data[APIC_TMR]) )
         vioapic_update_EOI(vlapic_domain(vlapic), vector);
-
-    hvm_dpci_msi_eoi(current->domain, vector);
 }
 
 int vlapic_ipi(
diff -r 00d2c5ca26fd xen/arch/x86/irq.c
--- a/xen/arch/x86/irq.c	Fri Jul 08 18:35:24 2011 +0100
+++ b/xen/arch/x86/irq.c	Thu Jul 14 23:32:38 2011 +0800
@@ -790,22 +790,6 @@ static inline void clear_pirq_eoi(struct
         clear_bit(irq, d->arch.pv_domain.pirq_eoi_map);
 }
 
-static void _irq_guest_eoi(struct irq_desc *desc)
-{
-    irq_guest_action_t *action = (irq_guest_action_t *)desc->action;
-    unsigned int i, irq = desc - irq_desc;
-
-    if ( !(desc->status & IRQ_GUEST_EOI_PENDING) )
-        return;
-
-    for ( i = 0; i < action->nr_guests; ++i )
-        clear_pirq_eoi(action->guest[i],
-                       domain_irq_to_pirq(action->guest[i], irq));
-
-    desc->status &= ~(IRQ_INPROGRESS|IRQ_GUEST_EOI_PENDING);
-    desc->handler->enable(irq);
-}
-
 static void set_eoi_ready(void *data);
 
 static void irq_guest_eoi_timer_fn(void *data)
@@ -849,9 +833,6 @@ static void irq_guest_eoi_timer_fn(void 
         on_selected_cpus(&cpu_eoi_map, set_eoi_ready, desc, 0);
         spin_lock_irq(&desc->lock);
         break;
-    case ACKTYPE_NONE:
-        _irq_guest_eoi(desc);
-        break;
     }
 
  out:
@@ -863,7 +844,7 @@ static void __do_IRQ_guest(int irq)
     struct irq_desc         *desc = irq_to_desc(irq);
     irq_guest_action_t *action = (irq_guest_action_t *)desc->action;
     struct domain      *d;
-    int                 i, sp, already_pending = 0;
+    int                 i, sp;
     struct pending_eoi *peoi = this_cpu(pending_eoi);
     int vector = get_irq_regs()->entry_vector;
 
@@ -897,45 +878,16 @@ static void __do_IRQ_guest(int irq)
         if ( (action->ack_type != ACKTYPE_NONE) &&
              !test_and_set_bool(pirq->masked) )
             action->in_flight++;
-        if ( hvm_do_IRQ_dpci(d, pirq) )
-        {
-            if ( action->ack_type == ACKTYPE_NONE )
-            {
-                already_pending += !!(desc->status & IRQ_INPROGRESS);
-                desc->status |= IRQ_INPROGRESS; /* cleared during hvm eoi */
-            }
-        }
-        else if ( send_guest_pirq(d, pirq) &&
-                  (action->ack_type == ACKTYPE_NONE) )
-        {
-            already_pending++;
-        }
+        if ( !hvm_do_IRQ_dpci(d, pirq) )
+            send_guest_pirq(d, pirq);
     }
 
-    stop_timer(&action->eoi_timer);
-
-    if ( (action->ack_type == ACKTYPE_NONE) &&
-         (already_pending == action->nr_guests) )
+    if ( action->ack_type != ACKTYPE_NONE )
     {
-        desc->handler->disable(irq);
-        desc->status |= IRQ_GUEST_EOI_PENDING;
-        for ( i = 0; i < already_pending; ++i )
-        {
-            d = action->guest[i];
-            set_pirq_eoi(d, domain_irq_to_pirq(d, irq));
-            /*
-             * Could check here whether the guest unmasked the event by now
-             * (or perhaps just re-issue the send_guest_pirq()), and if it
-             * can now accept the event,
-             * - clear all the pirq_eoi bits we already set,
-             * - re-enable the vector, and
-             * - skip the timer setup below.
-             */
-        }
+        stop_timer(&action->eoi_timer);
+        migrate_timer(&action->eoi_timer, smp_processor_id());
+        set_timer(&action->eoi_timer, NOW() + MILLISECS(1));
     }
-
-    migrate_timer(&action->eoi_timer, smp_processor_id());
-    set_timer(&action->eoi_timer, NOW() + MILLISECS(1));
 }
 
 /*
@@ -1183,13 +1135,6 @@ void desc_guest_eoi(struct irq_desc *des
     action = (irq_guest_action_t *)desc->action;
     irq = desc - irq_desc;
 
-    if ( action->ack_type == ACKTYPE_NONE )
-    {
-        ASSERT(!pirq->masked);
-        stop_timer(&action->eoi_timer);
-        _irq_guest_eoi(desc);
-    }
-
     if ( unlikely(!test_and_clear_bool(pirq->masked)) ||
          unlikely(--action->in_flight != 0) )
     {
@@ -1468,10 +1413,6 @@ static irq_guest_action_t *__pirq_guest_
             spin_lock_irq(&desc->lock);
         }
         break;
-    case ACKTYPE_NONE:
-        stop_timer(&action->eoi_timer);
-        _irq_guest_eoi(desc);
-        break;
     }
 
     /*
@@ -1509,7 +1450,7 @@ static irq_guest_action_t *__pirq_guest_
     BUG_ON(!cpus_empty(action->cpu_eoi_map));
 
     desc->action = NULL;
-    desc->status &= ~(IRQ_GUEST|IRQ_GUEST_EOI_PENDING|IRQ_INPROGRESS);
+    desc->status &= ~(IRQ_GUEST|IRQ_INPROGRESS);
     desc->handler->shutdown(irq);
 
     /* Caller frees the old guest descriptor block. */
diff -r 00d2c5ca26fd xen/drivers/passthrough/io.c
--- a/xen/drivers/passthrough/io.c	Fri Jul 08 18:35:24 2011 +0100
+++ b/xen/drivers/passthrough/io.c	Thu Jul 14 23:32:38 2011 +0800
@@ -421,58 +421,6 @@ int hvm_do_IRQ_dpci(struct domain *d, st
 }
 
 #ifdef SUPPORT_MSI_REMAPPING
-/* called with d->event_lock held */
-static void __msi_pirq_eoi(struct hvm_pirq_dpci *pirq_dpci)
-{
-    irq_desc_t *desc;
-
-    if ( (pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) &&
-         (pirq_dpci->flags & HVM_IRQ_DPCI_MACH_MSI) )
-    {
-        struct pirq *pirq = dpci_pirq(pirq_dpci);
-
-         BUG_ON(!local_irq_is_enabled());
-         desc = pirq_spin_lock_irq_desc(pirq, NULL);
-         if ( !desc )
-            return;
-
-         desc->status &= ~IRQ_INPROGRESS;
-         desc_guest_eoi(desc, pirq);
-    }
-}
-
-static int _hvm_dpci_msi_eoi(struct domain *d,
-                             struct hvm_pirq_dpci *pirq_dpci, void *arg)
-{
-    int vector = (long)arg;
-
-    if ( (pirq_dpci->flags & HVM_IRQ_DPCI_MACH_MSI) &&
-         (pirq_dpci->gmsi.gvec == vector) )
-    {
-        int dest = pirq_dpci->gmsi.gflags & VMSI_DEST_ID_MASK;
-        int dest_mode = !!(pirq_dpci->gmsi.gflags & VMSI_DM_MASK);
-
-        if ( vlapic_match_dest(vcpu_vlapic(current), NULL, 0, dest,
-                               dest_mode) )
-        {
-            __msi_pirq_eoi(pirq_dpci);
-            return 1;
-        }
-    }
-
-    return 0;
-}
-
-void hvm_dpci_msi_eoi(struct domain *d, int vector)
-{
-    if ( !iommu_enabled || !d->arch.hvm_domain.irq.dpci )
-       return;
-
-    spin_lock(&d->event_lock);
-    pt_pirq_iterate(d, _hvm_dpci_msi_eoi, (void *)(long)vector);
-    spin_unlock(&d->event_lock);
-}
-
 static int hvm_pci_msi_assert(struct domain *d,
                               struct hvm_pirq_dpci *pirq_dpci)
 {
@@ -510,14 +458,6 @@ static int _hvm_dirq_assist(struct domai
             else
                 hvm_pci_intx_assert(d, device, intx);
             pirq_dpci->pending++;
-
-#ifdef SUPPORT_MSI_REMAPPING
-            if ( pirq_dpci->flags & HVM_IRQ_DPCI_TRANSLATE )
-            {
-                /* for translated MSI to INTx interrupt, eoi as early as possible */
-                __msi_pirq_eoi(pirq_dpci);
-            }
-#endif
         }
 
         /*
diff -r 00d2c5ca26fd xen/include/asm-x86/hvm/io.h
--- a/xen/include/asm-x86/hvm/io.h	Fri Jul 08 18:35:24 2011 +0100
+++ b/xen/include/asm-x86/hvm/io.h	Thu Jul 14 23:32:38 2011 +0800
@@ -139,6 +139,5 @@ struct hvm_hw_stdvga {
 void stdvga_init(struct domain *d);
 void stdvga_deinit(struct domain *d);
 
-extern void hvm_dpci_msi_eoi(struct domain *d, int vector);
 #endif /* __ASM_X86_HVM_IO_H__ */
 
diff -r 00d2c5ca26fd xen/include/xen/irq.h
--- a/xen/include/xen/irq.h	Fri Jul 08 18:35:24 2011 +0100
+++ b/xen/include/xen/irq.h	Thu Jul 14 23:32:38 2011 +0800
@@ -25,7 +25,6 @@ struct irqaction {
 #define IRQ_PENDING	4	/* IRQ pending - replay on enable */
 #define IRQ_REPLAY	8	/* IRQ has been replayed but not acked yet */
 #define IRQ_GUEST       16      /* IRQ is handled by guest OS(es) */
-#define IRQ_GUEST_EOI_PENDING 32 /* IRQ was disabled, pending a guest EOI */
 #define IRQ_MOVE_PENDING      64  /* IRQ is migrating to another CPUs */
 #define IRQ_PER_CPU     256     /* IRQ is per CPU */
 

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2011-07-15  2:44 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-07-15  2:44 [PATCH] Remove MSI IRQ storms prevention logic Haitao Shan

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.