linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* PATCH: 2.6.10 - Misrouted IRQ recovery for review
@ 2004-12-28 15:58 Alan Cox
  2004-12-28 17:11 ` Arjan van de Ven
                   ` (3 more replies)
  0 siblings, 4 replies; 14+ messages in thread
From: Alan Cox @ 2004-12-28 15:58 UTC (permalink / raw)
  To: mingo, Linux Kernel Mailing List

Ported to the new kernel/irq code.

diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.vanilla-2.6.10/kernel/irq/handle.c linux-2.6.10/kernel/irq/handle.c
--- linux.vanilla-2.6.10/kernel/irq/handle.c	2004-12-25 21:15:46.000000000 +0000
+++ linux-2.6.10/kernel/irq/handle.c	2004-12-26 23:20:04.000000000 +0000
@@ -130,7 +130,7 @@
 		desc->handler->ack(irq);
 		action_ret = handle_IRQ_event(irq, regs, desc->action);
 		if (!noirqdebug)
-			note_interrupt(irq, desc, action_ret);
+			note_interrupt(irq, desc, action_ret, regs);
 		desc->handler->end(irq);
 		return 1;
 	}
@@ -184,7 +184,7 @@
 
 		spin_lock(&desc->lock);
 		if (!noirqdebug)
-			note_interrupt(irq, desc, action_ret);
+			note_interrupt(irq, desc, action_ret, regs);
 		if (likely(!(desc->status & IRQ_PENDING)))
 			break;
 		desc->status &= ~IRQ_PENDING;
diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.vanilla-2.6.10/kernel/irq/spurious.c linux-2.6.10/kernel/irq/spurious.c
--- linux.vanilla-2.6.10/kernel/irq/spurious.c	2004-12-25 21:15:46.000000000 +0000
+++ linux-2.6.10/kernel/irq/spurious.c	2004-12-26 23:26:55.000000000 +0000
@@ -11,6 +11,77 @@
 #include <linux/kallsyms.h>
 #include <linux/interrupt.h>
 
+static int irqfixup;
+
+/*
+ *	Recovery handler for misrouted interrupts. 
+ */
+
+static int misrouted_irq(int irq, struct pt_regs *regs)
+{
+	int i;
+	irq_desc_t *desc;
+	int ok = 0;
+	int work = 0;	/* Did we do work for a real IRQ */
+	for(i = 1; i < NR_IRQS; i++)
+	{
+		struct irqaction *action;
+		if(i == irq)	/* Already tried */
+			continue;
+		desc = &irq_desc[i];
+		spin_lock(&desc->lock);
+		action = desc->action;
+		/* Already running on another processor */
+		if(desc->status & IRQ_INPROGRESS)
+		{
+			/* Already running: If it is shared get the other
+			   CPU to go looking for our mystery interrupt too */
+			if(desc->action && (desc->action->flags & SA_SHIRQ))
+				desc->status |= IRQ_PENDING;
+			spin_unlock(&desc->lock);
+			continue;
+		}
+		/* Honour the normal IRQ locking */
+		desc->status |= IRQ_INPROGRESS;
+		spin_unlock(&desc->lock);
+		while(action)
+		{
+			/* Only shared IRQ handlers are safe to call */
+			if(action->flags & SA_SHIRQ)
+			{
+				if(action->handler(i, action->dev_id, regs) == IRQ_HANDLED)
+					ok = 1;
+			}
+			action = action->next;
+		}
+		local_irq_disable();
+		/* Now clean up the flags */
+		spin_lock(&desc->lock);
+		action = desc->action;
+
+		/* While we were looking for a fixup someone queued a real
+		   IRQ clashing with our walk */
+
+		while((desc->status & IRQ_PENDING) && action)
+		{
+			/* Perform real IRQ processing for the IRQ we deferred */
+			work = 1;
+			spin_unlock(&desc->lock);
+			handle_IRQ_event(i, regs, action);
+			spin_lock(&desc->lock);
+			desc->status &= ~IRQ_PENDING;
+		}
+		desc->status &= ~IRQ_INPROGRESS;
+		/* If we did actual work for the real IRQ line we must
+		   let the IRQ controller clean up too */
+		if(work)
+			desc->handler->end(i);
+		spin_unlock(&desc->lock);
+	}
+	/* So the caller can adjust the irq error counts */
+	return ok;
+}
+
 /*
  * If 99,900 of the previous 100,000 interrupts have not been handled
  * then assume that the IRQ is stuck in some manner. Drop a diagnostic
@@ -31,7 +102,7 @@
 		printk(KERN_ERR "irq event %d: bogus return value %x\n",
 				irq, action_ret);
 	} else {
-		printk(KERN_ERR "irq %d: nobody cared!\n", irq);
+		printk(KERN_ERR "irq %d: nobody cared (try booting with the \"irqpoll\" option.\n", irq);
 	}
 	dump_stack();
 	printk(KERN_ERR "handlers:\n");
@@ -55,7 +126,7 @@
 	}
 }
 
-void note_interrupt(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret)
+void note_interrupt(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret, struct pt_regs *regs)
 {
 	if (action_ret != IRQ_HANDLED) {
 		desc->irqs_unhandled++;
@@ -63,6 +134,15 @@
 			report_bad_irq(irq, desc, action_ret);
 	}
 
+	if(unlikely(irqfixup)) { /* Don't punish working computers */
+		if((irqfixup == 2 && irq == 0) || action_ret == IRQ_NONE) {
+			int ok;
+			ok = misrouted_irq(irq, regs);
+			if(action_ret == IRQ_NONE)
+				desc->irqs_unhandled -= ok;
+		}
+	}
+
 	desc->irq_count++;
 	if (desc->irq_count < 100000)
 		return;
@@ -94,3 +174,22 @@
 
 __setup("noirqdebug", noirqdebug_setup);
 
+static int __init irqfixup_setup(char *str)
+{
+	irqfixup = 1;
+	printk(KERN_WARNING "Misrouted IRQ fixup support enabled.\n");
+	printk(KERN_WARNING "This may impact system performance.\n");
+	return 1;
+}
+
+__setup("irqfixup", irqfixup_setup);
+
+static int __init irqpoll_setup(char *str)
+{
+	irqfixup = 2;
+	printk(KERN_WARNING "Misrouted IRQ fixup and polling support enabled.\n");
+	printk(KERN_WARNING "This may significantly impact system performance.\n");
+	return 1;
+}
+
+__setup("irqpoll", irqpoll_setup);


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: PATCH: 2.6.10 - Misrouted IRQ recovery for review
  2004-12-28 17:11 ` Arjan van de Ven
@ 2004-12-28 16:37   ` Alan Cox
  2004-12-28 17:28   ` Dmitry Torokhov
  1 sibling, 0 replies; 14+ messages in thread
From: Alan Cox @ 2004-12-28 16:37 UTC (permalink / raw)
  To: Arjan van de Ven; +Cc: mingo, Linux Kernel Mailing List

> one question; I see you start passing a struct pt_regs around all over
> the place; does *anything* actually use that animal, or should we
> consider just passing a NULL .....
> (and eventually in 2.7 remove the parameter entirely from irq handlers?)

On x86-32 at least it is used because of the IRQ 13 handling for 386
systems on a maths FPU trap.

Alan


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: PATCH: 2.6.10 - Misrouted IRQ recovery for review
  2004-12-28 15:58 PATCH: 2.6.10 - Misrouted IRQ recovery for review Alan Cox
@ 2004-12-28 17:11 ` Arjan van de Ven
  2004-12-28 16:37   ` Alan Cox
  2004-12-28 17:28   ` Dmitry Torokhov
  2004-12-29 13:57 ` Ingo Molnar
                   ` (2 subsequent siblings)
  3 siblings, 2 replies; 14+ messages in thread
From: Arjan van de Ven @ 2004-12-28 17:11 UTC (permalink / raw)
  To: Alan Cox; +Cc: mingo, Linux Kernel Mailing List

On Tue, 2004-12-28 at 15:58 +0000, Alan Cox wrote:
> Ported to the new kernel/irq code.


one question; I see you start passing a struct pt_regs around all over
the place; does *anything* actually use that animal, or should we
consider just passing a NULL .....
(and eventually in 2.7 remove the parameter entirely from irq handlers?)


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: PATCH: 2.6.10 - Misrouted IRQ recovery for review
  2004-12-28 17:11 ` Arjan van de Ven
  2004-12-28 16:37   ` Alan Cox
@ 2004-12-28 17:28   ` Dmitry Torokhov
  2004-12-28 18:25     ` David S. Miller
  1 sibling, 1 reply; 14+ messages in thread
From: Dmitry Torokhov @ 2004-12-28 17:28 UTC (permalink / raw)
  To: linux-kernel; +Cc: Arjan van de Ven, Alan Cox, mingo

On Tuesday 28 December 2004 12:11 pm, Arjan van de Ven wrote:
> On Tue, 2004-12-28 at 15:58 +0000, Alan Cox wrote:
> > Ported to the new kernel/irq code.
> 
> 
> one question; I see you start passing a struct pt_regs around all over
> the place; does *anything* actually use that animal, or should we
> consider just passing a NULL .....
> (and eventually in 2.7 remove the parameter entirely from irq handlers?)
> 

>From what I saw the only thing that presently uses pt_rergs is SysRq
handler to print the call trace and if we slightly change the semantics
(instead of printing the trace immediately raise a flag and when next
interrupt arrives check it in do_IRQ and print the trace from there -
I even had some patches) we could drop pt_regs. I would very much like
to do so at least for input drivers.

-- 
Dmitry

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: PATCH: 2.6.10 - Misrouted IRQ recovery for review
  2004-12-28 17:28   ` Dmitry Torokhov
@ 2004-12-28 18:25     ` David S. Miller
  2004-12-28 18:50       ` Dmitry Torokhov
  0 siblings, 1 reply; 14+ messages in thread
From: David S. Miller @ 2004-12-28 18:25 UTC (permalink / raw)
  To: Dmitry Torokhov; +Cc: linux-kernel, arjan, alan, mingo

On Tue, 28 Dec 2004 12:28:27 -0500
Dmitry Torokhov <dtor_core@ameritech.net> wrote:

> From what I saw the only thing that presently uses pt_rergs is SysRq
> handler to print the call trace and if we slightly change the semantics
> (instead of printing the trace immediately raise a flag and when next
> interrupt arrives check it in do_IRQ and print the trace from there -
> I even had some patches) we could drop pt_regs. I would very much like
> to do so at least for input drivers.

Are you going to take a snapshot at IRQ time?  If not, then I'm
pretty much against this change.  When I do a sysrq regs dump,
I want the exact pt_regs values at interrupt time, not some
random value later in time.

Perhaps instead you could raise a flag in the input driver, and
at the top-level interrupt dispatch arch code do the register
sysrq dump.  This gives the same semantics as present, and also
allows you what you want for the input drivers.

But, even with this, there is the x86 interrupt handler Alan
mentioned which wants the pt_regs too.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: PATCH: 2.6.10 - Misrouted IRQ recovery for review
  2004-12-28 18:25     ` David S. Miller
@ 2004-12-28 18:50       ` Dmitry Torokhov
  2004-12-28 18:53         ` David S. Miller
  0 siblings, 1 reply; 14+ messages in thread
From: Dmitry Torokhov @ 2004-12-28 18:50 UTC (permalink / raw)
  To: linux-kernel; +Cc: David S. Miller, arjan, alan, mingo

On Tuesday 28 December 2004 01:25 pm, David S. Miller wrote:
> On Tue, 28 Dec 2004 12:28:27 -0500
> Dmitry Torokhov <dtor_core@ameritech.net> wrote:
> 
> > From what I saw the only thing that presently uses pt_rergs is SysRq
> > handler to print the call trace and if we slightly change the semantics
> > (instead of printing the trace immediately raise a flag and when next
> > interrupt arrives check it in do_IRQ and print the trace from there -
> > I even had some patches) we could drop pt_regs. I would very much like
> > to do so at least for input drivers.
> 
> Are you going to take a snapshot at IRQ time?  If not, then I'm
> pretty much against this change.  When I do a sysrq regs dump,
> I want the exact pt_regs values at interrupt time, not some
> random value later in time.
> 
> Perhaps instead you could raise a flag in the input driver, and
> at the top-level interrupt dispatch arch code do the register
> sysrq dump.  This gives the same semantics as present, and also
> allows you what you want for the input drivers.
> 

Please look at the patch below (handful of arches only and against
some old tree, but you'll see what I wanted to do). What I meant
by changing the semantics is that reporting is delayed by 1 interrupt.

> But, even with this, there is the x86 interrupt handler Alan
> mentioned which wants the pt_regs too.

This is for only one IRQ handler I believe which I think we can
do special-case for. Is it for math-emulation only?

--  
Dmitry

===== arch/alpha/kernel/irq.c 1.29 vs edited =====
--- 1.29/arch/alpha/kernel/irq.c	Thu Apr 22 03:40:34 2004
+++ edited/arch/alpha/kernel/irq.c	Wed May 12 23:59:19 2004
@@ -25,6 +25,7 @@
 #include <linux/irq.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
+#include <linux/sysrq.h>
 
 #include <asm/system.h>
 #include <asm/io.h>
@@ -679,6 +680,8 @@
 	spin_unlock(&desc->lock);
 
 	irq_exit();
+
+	sysrq_irq_show_registers(regs);
 }
 
 /*
===== arch/i386/kernel/irq.c 1.52 vs edited =====
--- 1.52/arch/i386/kernel/irq.c	Mon Apr 12 12:54:45 2004
+++ edited/arch/i386/kernel/irq.c	Wed May 12 23:30:51 2004
@@ -34,6 +34,7 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/kallsyms.h>
+#include <linux/sysrq.h>
 
 #include <asm/atomic.h>
 #include <asm/io.h>
@@ -569,6 +570,8 @@
 	spin_unlock(&desc->lock);
 
 	irq_exit();
+
+	sysrq_irq_show_registers(&regs);
 
 	return 1;
 }
===== arch/ia64/kernel/irq.c 1.37 vs edited =====
--- 1.37/arch/ia64/kernel/irq.c	Fri Feb 27 20:13:48 2004
+++ edited/arch/ia64/kernel/irq.c	Wed May 12 23:58:07 2004
@@ -35,6 +35,7 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/kallsyms.h>
+#include <linux/sysrq.h>
 
 #include <asm/atomic.h>
 #include <asm/io.h>
@@ -524,6 +525,9 @@
 		desc->handler->end(irq);
 		spin_unlock(&desc->lock);
 	}
+
+	sysrq_irq_show_registers(regs);
+
 	return 1;
 }
 
===== arch/ppc/kernel/irq.c 1.36 vs edited =====
--- 1.36/arch/ppc/kernel/irq.c	Wed Feb 18 22:42:58 2004
+++ edited/arch/ppc/kernel/irq.c	Thu May 13 00:04:55 2004
@@ -46,6 +46,7 @@
 #include <linux/random.h>
 #include <linux/seq_file.h>
 #include <linux/cpumask.h>
+#include <linux/sysrq.h>
 
 #include <asm/uaccess.h>
 #include <asm/bitops.h>
@@ -531,6 +532,7 @@
 		/* That's not SMP safe ... but who cares ? */
 		ppc_spurious_interrupts++;
         irq_exit();
+	sysrq_irq_show_registers(regs);
 }
 
 unsigned long probe_irq_on (void)
===== arch/ppc64/kernel/irq.c 1.52 vs edited =====
--- 1.52/arch/ppc64/kernel/irq.c	Mon Apr 12 12:54:06 2004
+++ edited/arch/ppc64/kernel/irq.c	Thu May 13 00:06:17 2004
@@ -41,6 +41,7 @@
 #include <linux/proc_fs.h>
 #include <linux/random.h>
 #include <linux/kallsyms.h>
+#include <linux/sysrq.h>
 
 #include <asm/uaccess.h>
 #include <asm/bitops.h>
@@ -617,6 +618,8 @@
 		timer_interrupt(regs);
 	}
 
+	sysrq_irq_show_registers(regs);
+
 	return 1; /* lets ret_from_int know we can do checks */
 }
 
@@ -645,6 +648,8 @@
 		ppc_spurious_interrupts++;
 
 	irq_exit();
+
+	sysrq_irq_show_registers(regs);
 
 	return 1; /* lets ret_from_int know we can do checks */
 }
===== arch/sparc/kernel/irq.c 1.28 vs edited =====
--- 1.28/arch/sparc/kernel/irq.c	Sun Feb 22 17:34:53 2004
+++ edited/arch/sparc/kernel/irq.c	Thu May 13 00:07:40 2004
@@ -30,6 +30,7 @@
 #include <linux/threads.h>
 #include <linux/spinlock.h>
 #include <linux/seq_file.h>
+#include <linux/sysrq.h>
 
 #include <asm/ptrace.h>
 #include <asm/processor.h>
@@ -341,6 +342,7 @@
 	} while (action);
 	enable_pil_irq(irq);
 	irq_exit();
+	sysrq_irq_show_registers(regs);
 }
 
 #ifdef CONFIG_BLK_DEV_FD
===== arch/sparc64/kernel/irq.c 1.40 vs edited =====
--- 1.40/arch/sparc64/kernel/irq.c	Tue Feb 24 22:04:19 2004
+++ edited/arch/sparc64/kernel/irq.c	Thu May 13 00:14:43 2004
@@ -21,6 +21,7 @@
 #include <linux/delay.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
+#include <linux/sysrq.h>
 
 #include <asm/ptrace.h>
 #include <asm/processor.h>
@@ -822,6 +823,7 @@
 		bp->flags &= ~IBF_INPROGRESS;
 	}
 	irq_exit();
+	sysrq_irq_show_registers(regs);
 }
 
 #ifdef CONFIG_BLK_DEV_FD
===== arch/x86_64/kernel/irq.c 1.22 vs edited =====
--- 1.22/arch/x86_64/kernel/irq.c	Wed Feb 18 22:42:58 2004
+++ edited/arch/x86_64/kernel/irq.c	Thu May 13 00:02:07 2004
@@ -33,6 +33,7 @@
 #include <linux/irq.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
+#include <linux/sysrq.h>
 
 #include <asm/atomic.h>
 #include <asm/io.h>
@@ -405,6 +406,9 @@
 	spin_unlock(&desc->lock);
 
 	irq_exit();
+
+	sysrq_irq_show_registers(regs);
+
 	return 1;
 }
 
===== drivers/char/sysrq.c 1.29 vs edited =====
--- 1.29/drivers/char/sysrq.c	Mon Jan 19 18:38:11 2004
+++ edited/drivers/char/sysrq.c	Wed May 12 23:39:52 2004
@@ -135,12 +135,33 @@
 
 
 /* SHOW SYSRQ HANDLERS BLOCK */
+unsigned int sysrq_register_dump_requested;
+static spinlock_t show_registers_lock = SPIN_LOCK_UNLOCKED;
+
+void __sysrq_irq_show_registers(struct pt_regs *pt_regs)
+{
+	unsigned long flags;
+	int doit = 0;
+
+	spin_lock_irqsave(&show_registers_lock, flags);
+	if (sysrq_register_dump_requested) {
+		sysrq_register_dump_requested--;
+		doit = 1;
+	}
+	spin_unlock_irqrestore(&show_registers_lock, flags);
+
+	if (doit)
+		show_regs(pt_regs);
+}
 
 static void sysrq_handle_showregs(int key, struct pt_regs *pt_regs,
 				  struct tty_struct *tty) 
 {
-	if (pt_regs)
-		show_regs(pt_regs);
+	unsigned long flags;
+
+	spin_lock_irqsave(&show_registers_lock, flags);
+	sysrq_register_dump_requested++;
+	spin_unlock_irqrestore(&show_registers_lock, flags);
 }
 static struct sysrq_key_op sysrq_showregs_op = {
 	.handler	= sysrq_handle_showregs,
===== include/linux/sysrq.h 1.5 vs edited =====
--- 1.5/include/linux/sysrq.h	Wed May  7 23:18:01 2003
+++ edited/include/linux/sysrq.h	Thu May 13 00:15:32 2004
@@ -31,13 +31,26 @@
 
 void handle_sysrq(int, struct pt_regs *, struct tty_struct *);
 
-/* 
+/*
  * Nonlocking version of handle sysrq, used by sysrq handlers that need to
  * call sysrq handlers
  */
 
 void __handle_sysrq_nolock(int, struct pt_regs *, struct tty_struct *);
 
+
+/*
+ * Check whether register dump has been requested and print it
+ */
+extern unsigned int sysrq_register_dump_requested;
+void __sysrq_irq_show_registers(struct pt_regs *);
+static inline void sysrq_irq_show_registers(struct pt_regs *pt_regs)
+{
+	if (unlikely(sysrq_register_dump_requested != 0))
+		__sysrq_irq_show_registers(pt_regs);
+}
+
+
 /*
  * Sysrq registration manipulation functions
  */
@@ -70,7 +83,7 @@
 	__sysrq_unlock_table();
 	return retval;
 }
-	
+
 static inline int register_sysrq_key(int key, struct sysrq_key_op *op_p)
 {
 	return __sysrq_swap_key_ops(key, op_p, NULL);
@@ -90,5 +103,9 @@
 
 #define register_sysrq_key(ig,nore) __reterr()
 #define unregister_sysrq_key(ig,nore) __reterr()
+
+static inline void sysrq_irq_show_registers(struct pt_regs *pt_regs)
+{
+}
 
 #endif

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: PATCH: 2.6.10 - Misrouted IRQ recovery for review
  2004-12-28 18:50       ` Dmitry Torokhov
@ 2004-12-28 18:53         ` David S. Miller
  2004-12-28 19:21           ` Dmitry Torokhov
  0 siblings, 1 reply; 14+ messages in thread
From: David S. Miller @ 2004-12-28 18:53 UTC (permalink / raw)
  To: Dmitry Torokhov; +Cc: linux-kernel, arjan, alan, mingo

On Tue, 28 Dec 2004 13:50:40 -0500
Dmitry Torokhov <dtor_core@ameritech.net> wrote:

> Please look at the patch below (handful of arches only and against
> some old tree, but you'll see what I wanted to do). What I meant
> by changing the semantics is that reporting is delayed by 1 interrupt.

This looks exactly like what I was looking for.  I think I misunderstood
your original description, which is why it is always best to communicate
ideas using patches :)

My misunderstanding what that I thought that your flag would work
like this:

1) input interrupt occurs, flag is set
2) IRQ handling completes
3) some new IRQ arrives, and this is when we test
   the flag for dumping sysrq regs

That, fortunately, is not what your patch is doing.

> This is for only one IRQ handler I believe which I think we can
> do special-case for. Is it for math-emulation only?

I rather believe it is for vm86 IRQ handling.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: PATCH: 2.6.10 - Misrouted IRQ recovery for review
  2004-12-28 18:53         ` David S. Miller
@ 2004-12-28 19:21           ` Dmitry Torokhov
  2004-12-28 19:26             ` David S. Miller
  0 siblings, 1 reply; 14+ messages in thread
From: Dmitry Torokhov @ 2004-12-28 19:21 UTC (permalink / raw)
  To: David S. Miller; +Cc: linux-kernel, arjan, alan, mingo

On Tuesday 28 December 2004 01:53 pm, David S. Miller wrote:
> On Tue, 28 Dec 2004 13:50:40 -0500
> Dmitry Torokhov <dtor_core@ameritech.net> wrote:
> 
> > Please look at the patch below (handful of arches only and against
> > some old tree, but you'll see what I wanted to do). What I meant
> > by changing the semantics is that reporting is delayed by 1 interrupt.
> 
> This looks exactly like what I was looking for.  I think I misunderstood
> your original description, which is why it is always best to communicate
> ideas using patches :)
> 
> My misunderstanding what that I thought that your flag would work
> like this:
> 
> 1) input interrupt occurs, flag is set
> 2) IRQ handling completes
> 3) some new IRQ arrives, and this is when we test
>    the flag for dumping sysrq regs
> 
> That, fortunately, is not what your patch is doing.

Well, it kind of does... I mean if register dump is somehow requested 
from outside of interrupt context then you'll get dump of the next hard
IRQ. The same goes for softirqs I guess.
  
-- 
Dmitry

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: PATCH: 2.6.10 - Misrouted IRQ recovery for review
  2004-12-28 19:21           ` Dmitry Torokhov
@ 2004-12-28 19:26             ` David S. Miller
  0 siblings, 0 replies; 14+ messages in thread
From: David S. Miller @ 2004-12-28 19:26 UTC (permalink / raw)
  To: Dmitry Torokhov; +Cc: linux-kernel, arjan, alan, mingo

On Tue, 28 Dec 2004 14:21:49 -0500
Dmitry Torokhov <dtor_core@ameritech.net> wrote:

> Well, it kind of does... I mean if register dump is somehow requested 
> from outside of interrupt context then you'll get dump of the next hard
> IRQ. The same goes for softirqs I guess.

Oh yes, that's right.  I remember one of the USB host controller
driver authors wanting to do URB processing in softirq context
and he couldn't because of the input layer using the pt_regs.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: PATCH: 2.6.10 - Misrouted IRQ recovery for review
  2004-12-28 15:58 PATCH: 2.6.10 - Misrouted IRQ recovery for review Alan Cox
  2004-12-28 17:11 ` Arjan van de Ven
@ 2004-12-29 13:57 ` Ingo Molnar
  2005-01-01 17:59 ` Pavel Machek
  2005-01-01 20:41 ` Bill Davidsen
  3 siblings, 0 replies; 14+ messages in thread
From: Ingo Molnar @ 2004-12-29 13:57 UTC (permalink / raw)
  To: Alan Cox; +Cc: Linux Kernel Mailing List, Andrew Morton, Christoph Hellwig


On Tue, 28 Dec 2004, Alan Cox wrote:

> Ported to the new kernel/irq code.

looks good to me. I think it might make sense to default-enable it for
testing and try it in -mm, to see how acceptable it would be for
mainstream (and for non-x86 architectures)? In theory this should not
break systems that have a perfect IRQ routing setup, and it could make a
crutial difference for systems that have IRQ routing problems. The current
opt-in flag will not give enough testing i believe.

	Ingo

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: PATCH: 2.6.10 - Misrouted IRQ recovery for review
  2004-12-28 15:58 PATCH: 2.6.10 - Misrouted IRQ recovery for review Alan Cox
  2004-12-28 17:11 ` Arjan van de Ven
  2004-12-29 13:57 ` Ingo Molnar
@ 2005-01-01 17:59 ` Pavel Machek
  2005-01-01 20:41 ` Bill Davidsen
  3 siblings, 0 replies; 14+ messages in thread
From: Pavel Machek @ 2005-01-01 17:59 UTC (permalink / raw)
  To: Alan Cox; +Cc: mingo, Linux Kernel Mailing List

Hi!

> Ported to the new kernel/irq code.

Perhaps some Documentation/ patch would be nice?

I always thought manually polling interrupt handlers might be usefull,
and it indeed was very usefull on philips velo 1...
								Pavel

> +static int __init irqfixup_setup(char *str)
> +{
> +	irqfixup = 1;
> +	printk(KERN_WARNING "Misrouted IRQ fixup support enabled.\n");
> +	printk(KERN_WARNING "This may impact system performance.\n");
> +	return 1;
> +}
> +
> +__setup("irqfixup", irqfixup_setup);
> +
> +static int __init irqpoll_setup(char *str)
> +{
> +	irqfixup = 2;
> +	printk(KERN_WARNING "Misrouted IRQ fixup and polling support enabled.\n");
> +	printk(KERN_WARNING "This may significantly impact system performance.\n");
> +	return 1;
> +}
> +
> +__setup("irqpoll", irqpoll_setup);
> 

-- 
People were complaining that M$ turns users into beta-testers...
...jr ghea gurz vagb qrirybcref, naq gurl frrz gb yvxr vg gung jnl!

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: PATCH: 2.6.10 - Misrouted IRQ recovery for review
  2004-12-28 15:58 PATCH: 2.6.10 - Misrouted IRQ recovery for review Alan Cox
                   ` (2 preceding siblings ...)
  2005-01-01 17:59 ` Pavel Machek
@ 2005-01-01 20:41 ` Bill Davidsen
  2005-01-02 15:18   ` Alan Cox
  3 siblings, 1 reply; 14+ messages in thread
From: Bill Davidsen @ 2005-01-01 20:41 UTC (permalink / raw)
  To: Alan Cox; +Cc: mingo, Linux Kernel Mailing List

Alan Cox wrote:
> Ported to the new kernel/irq code.

	[snip]

>  	} else {
> -		printk(KERN_ERR "irq %d: nobody cared!\n", irq);
> +		printk(KERN_ERR "irq %d: nobody cared (try booting with the \"irqpoll\" option.\n", irq);
>  	}
>  	dump_stack();
>  	printk(KERN_ERR "handlers:\n");
	[snip]

I saw this message coming out of ac2 with my runaway IRQ 18 problem, so 
I tried irqpoll, and it just "went away" beyond sysreq or other gentle 
recovery.

I suspect that the problem lies in sharing the shared IRQ, and that 
polling doesn't solve the problem, just changes it to a hang witing for 
the misrouted IRQ. Still poking for the real cause, no patch or 
anything, but acpi={off,ht}, noapic, pci=routeirq, etc have no benefit 
(for me).


-- 
bill davidsen <davidsen@tmr.com>
   CTO TMR Associates, Inc
   Doing interesting things with small computers since 1979

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: PATCH: 2.6.10 - Misrouted IRQ recovery for review
  2005-01-01 20:41 ` Bill Davidsen
@ 2005-01-02 15:18   ` Alan Cox
  2005-01-02 17:47     ` Bill Davidsen
  0 siblings, 1 reply; 14+ messages in thread
From: Alan Cox @ 2005-01-02 15:18 UTC (permalink / raw)
  To: Bill Davidsen; +Cc: mingo, Linux Kernel Mailing List

> I saw this message coming out of ac2 with my runaway IRQ 18 problem, so 
> I tried irqpoll, and it just "went away" beyond sysreq or other gentle 
> recovery.

That means that the cause of the IRQ that hung your machine was not one
we had any driver for. Thats generally BIOS bogosities on a large scale.
The irqpoll code can recover from cases where an IRQ turns up on the
wrong IRQ line but for a registered driver and when an IRQ fails to turn
up in which case the timer tick picks it up on x86 (which may or may not
make it "useful").

> I suspect that the problem lies in sharing the shared IRQ, and that 
> polling doesn't solve the problem, just changes it to a hang witing for 
> the misrouted IRQ. Still poking for the real cause, no patch or 
> anything, but acpi={off,ht}, noapic, pci=routeirq, etc have no benefit 
> (for me).

That wouldn't really fit how the hardware works. You appear to have some
unsupported device connected to that line and asserting IRQ right from
boot.

Alan


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: PATCH: 2.6.10 - Misrouted IRQ recovery for review
  2005-01-02 15:18   ` Alan Cox
@ 2005-01-02 17:47     ` Bill Davidsen
  0 siblings, 0 replies; 14+ messages in thread
From: Bill Davidsen @ 2005-01-02 17:47 UTC (permalink / raw)
  To: Alan Cox; +Cc: mingo, Linux Kernel Mailing List

Alan Cox wrote:
>>I saw this message coming out of ac2 with my runaway IRQ 18 problem, so 
>>I tried irqpoll, and it just "went away" beyond sysreq or other gentle 
>>recovery.
> 
> 
> That means that the cause of the IRQ that hung your machine was not one
> we had any driver for. Thats generally BIOS bogosities on a large scale.
> The irqpoll code can recover from cases where an IRQ turns up on the
> wrong IRQ line but for a registered driver and when an IRQ fails to turn
> up in which case the timer tick picks it up on x86 (which may or may not
> make it "useful").
> 
> 
>>I suspect that the problem lies in sharing the shared IRQ, and that 
>>polling doesn't solve the problem, just changes it to a hang witing for 
>>the misrouted IRQ. Still poking for the real cause, no patch or 
>>anything, but acpi={off,ht}, noapic, pci=routeirq, etc have no benefit 
>>(for me).
> 
> 
> That wouldn't really fit how the hardware works. You appear to have some
> unsupported device connected to that line and asserting IRQ right from
> boot.

I cautiously say I don't think that's the case. I can boot and run in 
console mode for hours, as long as I don't do anything which accesses 
the DVD burner master on ide1. Both drives on ide0 work fine, network 
works, audio works, etc.

The instant I use hdc (which *is* IRQ shared with ide0) I get the storm 
and it continues until reboot. I have tried about every acpi=, pci=, and 
noapic option I can find, without success. However, some options do move 
the IRQ for both ide interfaces to IRQ 11, where the behaviour is identical.

Under 2.4.22 from FC1 it all works fine. I do use a vaimraid driver in 
2.4 (taints kernel), but everything works fine with or without it 
loaded, and the VIAraid comes up on another IRQ in any case.

Feel free to tell me this could still be another device, but it behaves 
as if it were the ide1 (DVD) access. Oh, I did try making ide1=nodma 
without change.

Thanks for the feedback, though.

-- 
bill davidsen <davidsen@tmr.com>
   CTO TMR Associates, Inc
   Doing interesting things with small computers since 1979

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2005-01-02 17:35 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2004-12-28 15:58 PATCH: 2.6.10 - Misrouted IRQ recovery for review Alan Cox
2004-12-28 17:11 ` Arjan van de Ven
2004-12-28 16:37   ` Alan Cox
2004-12-28 17:28   ` Dmitry Torokhov
2004-12-28 18:25     ` David S. Miller
2004-12-28 18:50       ` Dmitry Torokhov
2004-12-28 18:53         ` David S. Miller
2004-12-28 19:21           ` Dmitry Torokhov
2004-12-28 19:26             ` David S. Miller
2004-12-29 13:57 ` Ingo Molnar
2005-01-01 17:59 ` Pavel Machek
2005-01-01 20:41 ` Bill Davidsen
2005-01-02 15:18   ` Alan Cox
2005-01-02 17:47     ` Bill Davidsen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).