RE: About CPU hot-plug stress test failed in cpufreq driver

From: Anson Huang <anson.huang@nxp.com>
To: "Rafael J. Wysocki" <rafael@kernel.org>
Cc: Viresh Kumar <viresh.kumar@linaro.org>,
	Jacky Bai <ping.bai@nxp.com>,
	"linux-pm@vger.kernel.org" <linux-pm@vger.kernel.org>
Subject: RE: About CPU hot-plug stress test failed in cpufreq driver
Date: Mon, 25 Nov 2019 09:43:22 +0000	[thread overview]
Message-ID: <DB3PR0402MB391638C66C7EB93B5156A2D2F54A0@DB3PR0402MB3916.eurprd04.prod.outlook.com> (raw)
In-Reply-To: <DB3PR0402MB39165E40800E42C2E5635C7CF54A0@DB3PR0402MB3916.eurprd04.prod.outlook.com>

Hi, Rafael
	I tried to print the necessary info into DRAM instead of calling pr_info directly, then issue can be reproduced easily again, when issue happened, the LOG shows the last irq_work_sync() is NOT correctly finished. Below are the log and patch I added, the test case is simply to repeat removing CPU1/CPU2/CPU3 then adding back CPU1/CPU2/CPU3.
	When issue happens, below log shows last round of removing CPUs, CPU1/CPU2/CPU3 irq_work_sync() all worked as expected, the work->cpu is -1, then when CPU1 was added back, the irq_work flag is pending/busy on CPU1, and issue happened:

[  589.121091] timeout, i 2936, 1 -1 2 -1 3 -1 1 3

LOG:
CPUHotplug: 4758 times remaining
[  582.829724] CPU1: shutdown
[  582.832453] psci: CPU1 killed.
[  582.869673] CPU2: shutdown
[  582.872427] psci: CPU2 killed.
[  582.909674] CPU3: shutdown
[  582.912402] psci: CPU3 killed.
[  583.946543] Detected VIPT I-cache on CPU1
[  583.946586] GICv3: CPU1: found redistributor 1 region 0:0x0000000051b20000
[  583.946651] CPU1: Booted secondary processor 0x0000000001 [0x410fd042]
[  583.980754] Detected VIPT I-cache on CPU2
[  583.980791] GICv3: CPU2: found redistributor 2 region 0:0x0000000051b40000
[  583.980850] CPU2: Booted secondary processor 0x0000000002 [0x410fd042]
[  584.029700] Detected VIPT I-cache on CPU3
[  584.029741] GICv3: CPU3: found redistributor 3 region 0:0x0000000051b60000
[  584.029807] CPU3: Booted secondary processor 0x0000000003 [0x410fd042]
CPUHotplug: 4757 times remaining
[  585.141718] CPU1: shutdown
[  585.144439] psci: CPU1 killed.
[  585.193683] CPU2: shutdown
[  585.196408] psci: CPU2 killed.
[  585.241680] CPU3: shutdown
[  585.244406] psci: CPU3 killed.
[  586.273827] Detected VIPT I-cache on CPU1
[  586.273866] GICv3: CPU1: found redistributor 1 region 0:0x0000000051b20000
[  586.273926] CPU1: Booted secondary processor 0x0000000001 [0x410fd042]
[  586.308727] Detected VIPT I-cache on CPU2
[  586.308765] GICv3: CPU2: found redistributor 2 region 0:0x0000000051b40000
[  586.308826] CPU2: Booted secondary processor 0x0000000002 [0x410fd042]
[  586.357685] Detected VIPT I-cache on CPU3
[  586.357728] GICv3: CPU3: found redistributor 3 region 0:0x0000000051b60000
[  586.357791] CPU3: Booted secondary processor 0x0000000003 [0x410fd042]
CPUHotplug: 4756 times remaining
[  587.469734] CPU1: shutdown
[  587.472455] psci: CPU1 killed.
[  587.509680] CPU2: shutdown
[  587.512408] psci: CPU2 killed.
[  587.549685] CPU3: shutdown
[  587.552405] psci: CPU3 killed.
[  588.585770] Detected VIPT I-cache on CPU1
[  588.585814] GICv3: CPU1: found redistributor 1 region 0:0x0000000051b20000
[  588.585878] CPU1: Booted secondary processor 0x0000000001 [0x410fd042]
[  589.121091] timeout, i 2936, 1 -1 2 -1 3 -1 1 3
[  589.125647] timeout, i 2936, 1 -1 2 -1 3 -1 1 3
[  589.130203] timeout, i 2936, 1 -1 2 -1 3 -1 1 3
[  589.134758] timeout, i 2936, 1 -1 2 -1 3 -1 1 3
[  589.139315] timeout, i 2936, 1 -1 2 -1 3 -1 1 3
[  589.143877] timeout, i 2936, 1 -1 2 -1 3 -1 1 3
[  589.148431] timeout, i 2936, 1 -1 2 -1 3 -1 1 3
[  589.152991] timeout, i 2936, 1 -1 2 -1 3 -1 1 3
[  589.157550] timeout, i 2936, 1 -1 2 -1 3 -1 1 3
[  589.162113] timeout, i 2936, 1 -1 2 -1 3 -1 1 3
[  589.166673] timeout, i 2936, 1 -1 2 -1 3 -1 1 3

Patch:
+++ b/kernel/irq_work.c
@@ -10,6 +10,7 @@
 #include <linux/kernel.h>
 #include <linux/export.h>
 #include <linux/irq_work.h>
+#include <linux/jiffies.h>
 #include <linux/percpu.h>
 #include <linux/hardirq.h>
 #include <linux/irqflags.h>
@@ -23,7 +24,7 @@

 static DEFINE_PER_CPU(struct llist_head, raised_list);
 static DEFINE_PER_CPU(struct llist_head, lazy_list);
-
+static int log[500 * 8 * 1024];
 /*
  * Claim the entry so that no one else will poke at it.
  */
@@ -78,6 +79,7 @@ bool irq_work_queue(struct irq_work *work)
        if (!irq_work_claim(work))
                return false;

+       work->cpu = smp_processor_id();
        /* Queue the entry and raise the IPI if needed. */
        preempt_disable();
        __irq_work_queue_local(work);
@@ -105,6 +107,7 @@ bool irq_work_queue_on(struct irq_work *work, int cpu)
        /* Only queue if not already pending */
        if (!irq_work_claim(work))
                return false;
+       work->cpu = cpu;

        preempt_disable();
        if (cpu != smp_processor_id()) {
@@ -161,6 +164,7 @@ static void irq_work_run_list(struct llist_head *list)
                 */
                flags = work->flags & ~IRQ_WORK_PENDING;
                xchg(&work->flags, flags);
+               work->cpu = -1;

                work->func(work);
                /*
@@ -197,9 +201,22 @@ void irq_work_tick(void)
  */
 void irq_work_sync(struct irq_work *work)
 {
+       unsigned long timeout = jiffies + msecs_to_jiffies(500);
+       static int i = 0;
+
        lockdep_assert_irqs_enabled();

-       while (work->flags & IRQ_WORK_BUSY)
+       log[i++] = smp_processor_id();
+       log[i++] = work->cpu;
+
+       while (work->flags & IRQ_WORK_BUSY) {
+               if (time_after(jiffies, timeout))
+                       pr_warn("timeout, i %d, %d %d %d %d %d %d %d %d\n",
+                               i, log[i - 8], log[i - 7], log[i - 6], log[i - 5],
+                               log[i - 4], log[i - 3], log[i - 2], log[i - 1]);
                cpu_relax();
+       }
 }
 EXPORT_SYMBOL_GPL(irq_work_sync);

Thanks,
Anson

> Subject: RE: About CPU hot-plug stress test failed in cpufreq driver
> 
> Hi, Rafael
> 	Looks like adding pr_info() in irq_work_sync() makes issue can NOT
> be reproduced, any possibility of race happen there and the pr_info
> eliminate the race condition? I will continue run the test with the pr_info to
> see if any luck to reproduce it.
> 
> Anson
> 
> > On Fri, Nov 22, 2019 at 6:15 AM Anson Huang <anson.huang@nxp.com>
> > wrote:
> > >
> > > Hi, Rafael
> > >         Theoretically, yes, the CPU being offline will run the irq
> > > work list to make sure the irq work pending on it will be clear, but
> > > the fact is NOT,
> >
> > So this looks like a problem with irq_work_sync() working not as expected.
> >
> > >         both ondemand and schedutil governor can reproduce this
> > > issue if
> > running stress CPU hotplug test.
> > >         I tried add a "int cpu" in irq work structure to record CPU
> > > number
> > which has irq work pending, when issue happen, I can see the irq work
> > is pending at CPU #3 which is already offline, this is why issue
> > happen, but I don't know how it happens...
> > >
> > > diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h
> > > index
> > > b11fcdf..f8da06f9 100644
> > > --- a/include/linux/irq_work.h
> > > +++ b/include/linux/irq_work.h
> > > @@ -25,6 +25,7 @@ struct irq_work {
> > >         unsigned long flags;
> > >         struct llist_node llnode;
> > >         void (*func)(struct irq_work *);
> > > +       int cpu;
> > >  };
> > >
> > >  static inline
> > > diff --git a/kernel/irq_work.c b/kernel/irq_work.c index
> > > d42acaf..2e893d5 100644
> > > --- a/kernel/irq_work.c
> > > +++ b/kernel/irq_work.c
> > > @@ -10,6 +10,7 @@
> > >  #include <linux/kernel.h>
> > >  #include <linux/export.h>
> > >  #include <linux/irq_work.h>
> > > +#include <linux/jiffies.h>
> > >  #include <linux/percpu.h>
> > >  #include <linux/hardirq.h>
> > >  #include <linux/irqflags.h>
> > > @@ -78,6 +79,7 @@ bool irq_work_queue(struct irq_work *work)
> > >         if (!irq_work_claim(work))
> > >                 return false;
> > >
> > > +       work->cpu = smp_processor_id();
> > >         /* Queue the entry and raise the IPI if needed. */
> > >         preempt_disable();
> > >         __irq_work_queue_local(work); @@ -105,6 +107,7 @@ bool
> > > irq_work_queue_on(struct irq_work *work,
> > int cpu)
> > >         /* Only queue if not already pending */
> > >         if (!irq_work_claim(work))
> > >                 return false;
> > > +       work->cpu = cpu;
> > >
> > >         preempt_disable();
> > >         if (cpu != smp_processor_id()) { @@ -161,6 +164,7 @@ static
> > > void irq_work_run_list(struct llist_head *list)
> > >                  */
> > >                 flags = work->flags & ~IRQ_WORK_PENDING;
> > >                 xchg(&work->flags, flags);
> > > +               work->cpu = -1;
> > >
> > >                 work->func(work);
> > >                 /*
> > > @@ -197,9 +201,13 @@ void irq_work_tick(void)
> > >   */
> > >  void irq_work_sync(struct irq_work *work)  {
> > > +       unsigned long timeout = jiffies + msecs_to_jiffies(500);
> > >         lockdep_assert_irqs_enabled();
> >
> > Can you please add something like
> >
> > pr_info("%s: CPU %d\n", __func__, work->cpu);
> >
> > here re-run the test and collect a log again?
> >
> > I need to know if irq_work_sync() runs during CPU offline as expected.
> >
> > >
> > > -       while (work->flags & IRQ_WORK_BUSY)
> > > +       while (work->flags & IRQ_WORK_BUSY) {
> > > +               if (time_after(jiffies, timeout))
> > > +                       pr_warn("irq_work_sync 500ms timeout, work
> > > + cpu %d\n", work->cpu);
> > >                 cpu_relax();
> > > +       }
> > >  }
> > >  EXPORT_SYMBOL_GPL(irq_work_sync);