Re: [PATCH] interrupt: discover and disable very frequent interrupts

* Re: [PATCH] interrupt: discover and disable very frequent interrupts
@ 2022-09-30  8:31 张鑫城
  0 siblings, 0 replies; 11+ messages in thread
From: 张鑫城 @ 2022-09-30  8:31 UTC (permalink / raw)
  To: hdegoede, tglx
  Cc: linux-kernel, maz, oleksandr, bigeasy, mark.rutland, michael

Hi,

Thank you very much for your valuable suggestions, I have modified the patch as follows:

Subject: [PATCH] interrupt: discover and disable very frequent interrupts

In some cases, a peripheral's interrupt will be triggered frequently,
which will keep the CPU processing the interrupt and eventually cause
the RCU to report rcu_sched self-detected stall on the CPU.

[  838.131628] rcu: INFO: rcu_sched self-detected stall on CPU
[  838.137189] rcu:     0-....: (194839 ticks this GP) idle=f02/1/0x4000000000000004
softirq=9993/9993 fqs=97428
[  838.146912] rcu:      (t=195015 jiffies g=6773 q=0)
[  838.151516] Task dump for CPU 0:
[  838.154730] systemd-sleep   R  running task        0  3445      1 0x0000000a

Signed-off-by: zhangxincheng <zhangxincheng@uniontech.com>
Change-Id: I9c92146f2772eae383c16c8c10de028b91e07150
---
 include/linux/irqdesc.h |  2 ++
 kernel/irq/spurious.c   | 36 +++++++++++++++++++++++++++++-------
 2 files changed, 31 insertions(+), 7 deletions(-)

diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index 1cd4e36890fb..a3bd521c3557 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -102,6 +102,8 @@ struct irq_desc {
 	int			parent_irq;
 	struct module		*owner;
 	const char		*name;
+	u32 gap_count;
+	u64 gap_time;
 } ____cacheline_internodealigned_in_smp;
 
 #ifdef CONFIG_SPARSE_IRQ
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index 02b2daf07441..75bd0088446a 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -188,19 +188,21 @@ static inline int bad_action_ret(irqreturn_t action_ret)
  *
  * (The other 100-of-100,000 interrupts may have been a correctly
  *  functioning device sharing an IRQ with the failing one)
+ *
+ * Some bad hardware will trigger interrupts very frequently, which will
+ * cause the CPU to process hardware interrupts all the time. So when
+ * we find this out, the interrupt should be disabled.
  */
-static void __report_bad_irq(struct irq_desc *desc, irqreturn_t action_ret)
+static void __report_bad_irq(struct irq_desc *desc, irqreturn_t action_ret, const char *msg)
 {
 	unsigned int irq = irq_desc_get_irq(desc);
 	struct irqaction *action;
 	unsigned long flags;
 
 	if (bad_action_ret(action_ret)) {
-		printk(KERN_ERR "irq event %d: bogus return value %x\n",
-				irq, action_ret);
+		printk(msg, irq, action_ret);
 	} else {
-		printk(KERN_ERR "irq %d: nobody cared (try booting with "
-				"the \"irqpoll\" option)\n", irq);
+		printk(msg, irq);
 	}
 	dump_stack();
 	printk(KERN_ERR "handlers:\n");
@@ -228,7 +230,7 @@ static void report_bad_irq(struct irq_desc *desc, irqreturn_t action_ret)
 
 	if (count > 0) {
 		count--;
-		__report_bad_irq(desc, action_ret);
+		__report_bad_irq(desc, action_ret, KERN_ERR "irq event %d: bogus return value %x\n");
 	}
 }
 
@@ -282,6 +284,25 @@ void note_interrupt(struct irq_desc *desc, irqreturn_t action_ret)
 		return;
 	}
 
+	if((desc->gap_count & 0xffff0000) == 0)
+		desc->gap_time = get_jiffies_64();
+
+	desc->gap_count ++;
+
+	if((desc->gap_count & 0x0000ffff) >= 2000) {
+		if((get_jiffies_64() - desc->gap_time) < HZ) {
+			desc->gap_count += 0x00010000;
+			desc->gap_count &= 0xffff0000;
+		} else {
+			desc->gap_count = 0;
+		}
+
+		if((desc->gap_count >> 16) > 30) {
+		__report_bad_irq(desc, action_ret, KERN_ERR "irq %d: triggered too frequently\n");
+			irq_disable(desc);
+		}
+	}
+
 	/*
 	 * We cannot call note_interrupt from the threaded handler
 	 * because we need to look at the compound of all handlers
@@ -416,7 +437,8 @@ void note_interrupt(struct irq_desc *desc, irqreturn_t action_ret)
 		/*
 		 * The interrupt is stuck
 		 */
-		__report_bad_irq(desc, action_ret);
+		__report_bad_irq(desc, action_ret, KERN_ERR "irq %d: nobody cared (try booting"
+				"with the \"irqpoll\" option)\n");
 		/*
 		 * Now kill the IRQ
 		 */
--
2.20.1


Regards,

Zhang Xincheng

^ permalink raw reply related	[flat|nested] 11+ messages in thread