linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] interrupt: discover and disable very frequent interrupts
@ 2022-09-30  6:40 Zhang Xincheng
  2022-09-30  7:05 ` Hans de Goede
                   ` (2 more replies)
  0 siblings, 3 replies; 12+ messages in thread
From: Zhang Xincheng @ 2022-09-30  6:40 UTC (permalink / raw)
  To: tglx
  Cc: linux-kernel, maz, oleksandr, hdegoede, bigeasy, mark.rutland,
	michael, zhangxincheng

From: zhangxincheng <zhangxincheng@uniontech.com>

In some cases, a peripheral's interrupt will be triggered frequently,
which will keep the CPU processing the interrupt and eventually cause
the RCU to report rcu_sched self-detected stall on the CPU.

[  838.131628] rcu: INFO: rcu_sched self-detected stall on CPU
[  838.137189] rcu:     0-....: (194839 ticks this GP) idle=f02/1/0x4000000000000004
softirq=9993/9993 fqs=97428
[  838.146912] rcu:      (t=195015 jiffies g=6773 q=0)
[  838.151516] Task dump for CPU 0:
[  838.154730] systemd-sleep   R  running task        0  3445      1 0x0000000a

Signed-off-by: zhangxincheng <zhangxincheng@uniontech.com>
Change-Id: I9c92146f2772eae383c16c8c10de028b91e07150
Signed-off-by: zhangxincheng <zhangxincheng@uniontech.com>
---
 include/linux/irqdesc.h |  2 ++
 kernel/irq/spurious.c   | 52 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 54 insertions(+)

diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index 1cd4e36890fb..a3bd521c3557 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -102,6 +102,8 @@ struct irq_desc {
 	int			parent_irq;
 	struct module		*owner;
 	const char		*name;
+	u32 gap_count;
+	u64 gap_time;
 } ____cacheline_internodealigned_in_smp;
 
 #ifdef CONFIG_SPARSE_IRQ
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index 02b2daf07441..b7162a10d92c 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -222,6 +222,38 @@ static void __report_bad_irq(struct irq_desc *desc, irqreturn_t action_ret)
 	raw_spin_unlock_irqrestore(&desc->lock, flags);
 }
 
+/*
+ * Some bad hardware will trigger interrupts very frequently, which will
+ * cause the CPU to process hardware interrupts all the time. So when
+ * we find this out, the interrupt should be disabled.
+ */
+static void __report_so_frequently_irq(struct irq_desc *desc, irqreturn_t action_ret)
+{
+	unsigned int irq = irq_desc_get_irq(desc);
+	struct irqaction *action;
+	unsigned long flags;
+
+	printk(KERN_ERR "irq %d: triggered too frequently\n",irq);
+	dump_stack();
+	printk(KERN_ERR "handlers:\n");
+
+	/*
+	 * We need to take desc->lock here. note_interrupt() is called
+	 * w/o desc->lock held, but IRQ_PROGRESS set. We might race
+	 * with something else removing an action. It's ok to take
+	 * desc->lock here. See synchronize_irq().
+	 */
+	raw_spin_lock_irqsave(&desc->lock, flags);
+	for_each_action_of_desc(desc, action) {
+		printk(KERN_ERR "[<%p>] %pf", action->handler, action->handler);
+		if (action->thread_fn)
+			printk(KERN_CONT " threaded [<%p>] %pf",
+					action->thread_fn, action->thread_fn);
+		printk(KERN_CONT "\n");
+	}
+	raw_spin_unlock_irqrestore(&desc->lock, flags);
+}
+
 static void report_bad_irq(struct irq_desc *desc, irqreturn_t action_ret)
 {
 	static int count = 100;
@@ -273,6 +305,26 @@ void note_interrupt(struct irq_desc *desc, irqreturn_t action_ret)
 {
 	unsigned int irq;
 
+	if((desc->gap_count & 0xffff0000) == 0)
+		desc->gap_time = get_jiffies_64();
+
+	desc->gap_count ++;
+
+	if((desc->gap_count & 0x0000ffff) >= 2000) {
+		if((get_jiffies_64() - desc->gap_time) < HZ) {
+			desc->gap_count += 0x00010000;
+			desc->gap_count &= 0xffff0000;
+		} else {
+			desc->gap_count = 0;
+		}
+
+		if((desc->gap_count >> 16) > 30) {
+			__report_so_frequently_irq(desc, action_ret);
+			irq_disable(desc);
+		}
+	}
+
+
 	if (desc->istate & IRQS_POLL_INPROGRESS ||
 	    irq_settings_is_polled(desc))
 		return;
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread
* Re: [PATCH] interrupt: discover and disable very frequent interrupts
@ 2022-09-30  8:31 张鑫城
  0 siblings, 0 replies; 12+ messages in thread
From: 张鑫城 @ 2022-09-30  8:31 UTC (permalink / raw)
  To: hdegoede, tglx
  Cc: linux-kernel, maz, oleksandr, bigeasy, mark.rutland, michael

Hi,

Thank you very much for your valuable suggestions, I have modified the patch as follows:

Subject: [PATCH] interrupt: discover and disable very frequent interrupts

In some cases, a peripheral's interrupt will be triggered frequently,
which will keep the CPU processing the interrupt and eventually cause
the RCU to report rcu_sched self-detected stall on the CPU.

[  838.131628] rcu: INFO: rcu_sched self-detected stall on CPU
[  838.137189] rcu:     0-....: (194839 ticks this GP) idle=f02/1/0x4000000000000004
softirq=9993/9993 fqs=97428
[  838.146912] rcu:      (t=195015 jiffies g=6773 q=0)
[  838.151516] Task dump for CPU 0:
[  838.154730] systemd-sleep   R  running task        0  3445      1 0x0000000a

Signed-off-by: zhangxincheng <zhangxincheng@uniontech.com>
Change-Id: I9c92146f2772eae383c16c8c10de028b91e07150
---
 include/linux/irqdesc.h |  2 ++
 kernel/irq/spurious.c   | 36 +++++++++++++++++++++++++++++-------
 2 files changed, 31 insertions(+), 7 deletions(-)

diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index 1cd4e36890fb..a3bd521c3557 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -102,6 +102,8 @@ struct irq_desc {
 	int			parent_irq;
 	struct module		*owner;
 	const char		*name;
+	u32 gap_count;
+	u64 gap_time;
 } ____cacheline_internodealigned_in_smp;
 
 #ifdef CONFIG_SPARSE_IRQ
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index 02b2daf07441..75bd0088446a 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -188,19 +188,21 @@ static inline int bad_action_ret(irqreturn_t action_ret)
  *
  * (The other 100-of-100,000 interrupts may have been a correctly
  *  functioning device sharing an IRQ with the failing one)
+ *
+ * Some bad hardware will trigger interrupts very frequently, which will
+ * cause the CPU to process hardware interrupts all the time. So when
+ * we find this out, the interrupt should be disabled.
  */
-static void __report_bad_irq(struct irq_desc *desc, irqreturn_t action_ret)
+static void __report_bad_irq(struct irq_desc *desc, irqreturn_t action_ret, const char *msg)
 {
 	unsigned int irq = irq_desc_get_irq(desc);
 	struct irqaction *action;
 	unsigned long flags;
 
 	if (bad_action_ret(action_ret)) {
-		printk(KERN_ERR "irq event %d: bogus return value %x\n",
-				irq, action_ret);
+		printk(msg, irq, action_ret);
 	} else {
-		printk(KERN_ERR "irq %d: nobody cared (try booting with "
-				"the \"irqpoll\" option)\n", irq);
+		printk(msg, irq);
 	}
 	dump_stack();
 	printk(KERN_ERR "handlers:\n");
@@ -228,7 +230,7 @@ static void report_bad_irq(struct irq_desc *desc, irqreturn_t action_ret)
 
 	if (count > 0) {
 		count--;
-		__report_bad_irq(desc, action_ret);
+		__report_bad_irq(desc, action_ret, KERN_ERR "irq event %d: bogus return value %x\n");
 	}
 }
 
@@ -282,6 +284,25 @@ void note_interrupt(struct irq_desc *desc, irqreturn_t action_ret)
 		return;
 	}
 
+	if((desc->gap_count & 0xffff0000) == 0)
+		desc->gap_time = get_jiffies_64();
+
+	desc->gap_count ++;
+
+	if((desc->gap_count & 0x0000ffff) >= 2000) {
+		if((get_jiffies_64() - desc->gap_time) < HZ) {
+			desc->gap_count += 0x00010000;
+			desc->gap_count &= 0xffff0000;
+		} else {
+			desc->gap_count = 0;
+		}
+
+		if((desc->gap_count >> 16) > 30) {
+		__report_bad_irq(desc, action_ret, KERN_ERR "irq %d: triggered too frequently\n");
+			irq_disable(desc);
+		}
+	}
+
 	/*
 	 * We cannot call note_interrupt from the threaded handler
 	 * because we need to look at the compound of all handlers
@@ -416,7 +437,8 @@ void note_interrupt(struct irq_desc *desc, irqreturn_t action_ret)
 		/*
 		 * The interrupt is stuck
 		 */
-		__report_bad_irq(desc, action_ret);
+		__report_bad_irq(desc, action_ret, KERN_ERR "irq %d: nobody cared (try booting"
+				"with the \"irqpoll\" option)\n");
 		/*
 		 * Now kill the IRQ
 		 */
--
2.20.1


Regards,

Zhang Xincheng

^ permalink raw reply related	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2022-10-18  1:07 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-09-30  6:40 [PATCH] interrupt: discover and disable very frequent interrupts Zhang Xincheng
2022-09-30  7:05 ` Hans de Goede
2022-09-30  9:23 ` Marc Zyngier
2022-09-30  9:57   ` Zhang Xincheng
2022-09-30 10:37     ` Marc Zyngier
2022-10-09  1:31       ` Zhang Xincheng
2022-10-09  2:13         ` Marc Zyngier
2022-10-09 10:02           ` Zhang Xincheng
2022-10-17 11:21             ` Thomas Gleixner
2022-10-18  1:05               ` Zhang Xincheng
2022-10-08  5:21 ` [interrupt] 998288b7e8: RIP:cpuidle_enter_state kernel test robot
2022-09-30  8:31 [PATCH] interrupt: discover and disable very frequent interrupts 张鑫城

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).