linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [Question]  softlockup in run_timer_softirq
@ 2023-02-10  9:50 liujian (CE)
  2023-02-13 20:00 ` John Stultz
  0 siblings, 1 reply; 9+ messages in thread
From: liujian (CE) @ 2023-02-10  9:50 UTC (permalink / raw)
  To: jstultz, tglx, sboyd, linux-kernel; +Cc: liujian (CE)

Hi, 

During the syz test, we encountered many problems with various timer handler
functions softlockup.

We analyze __run_timers() and find the following problem.

In the while loop of __run_timers(), because there are too many timers or
improper timer handler functions, if the processing time of the expired
timers is always greater than the time wheel's next_expiry, the function
will loop infinitely.

The following extreme test case can be used to reproduce the problem.
An extreme test case[1] is constructed to reproduce the problem.

Is this a problem or an unreasonable use?

Can we limit the running time of __run_timers() [2]?

Does anyone have a good idea to solve this problem? 
Thank you.


[1]
#include <linux/module.h>
#include <linux/slab.h>
#include <asm-generic/delay.h>

static int stop = 1;

// timer num
static int size = 1000;
module_param(size, int, 0644);
MODULE_PARM_DESC(size, "size");

// Timeout of the timer
static int interval = 100;
module_param(interval, int, 0644);
MODULE_PARM_DESC(interval, "");

//elapsed time
static int dt = 200;
module_param(dt, int, 0644);
MODULE_PARM_DESC(dt, "");

struct wrapper {
	struct timer_list timer;
	spinlock_t lock;
};

struct wrapper *wr;

static void timer_func(struct timer_list *t)
{
	struct wrapper *w = from_timer(w, t, timer);

	spin_lock_bh(&(w->lock));
	if (stop == 0) {
		udelay(dt); // elapsed time
	}
	spin_unlock_bh(&(w->lock));

	if (stop == 0) {
		mod_timer(&(w->timer), jiffies + interval);
	}
}

static int __init maint_init(void)
{
	int i;

	wr = (struct wrapper *)kzalloc(size*sizeof(struct wrapper), GFP_KERNEL);

	for (i = 0; i < size; i++) {
		struct wrapper *w = &wr[i];
		spin_lock_init(&(w->lock));
		timer_setup(&(w->timer), timer_func, 0);
		mod_timer(&(w->timer), jiffies + 20);
	}
	stop = 0;

	return 0;
}

static void __exit maint_exit(void)
{
	int i;

	stop = 1;
	udelay(100);
	for (i = 0; i < size; i++) {
		struct wrapper *w = &wr[i];
		del_timer_sync(&(w->timer));
	}
	kfree(wr);

}

module_init(maint_init);
module_exit(maint_exit);
MODULE_LICENSE("GPL");


insmod timer_test.ko size=1000 interval=100 dt=200

[2]
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 63a8ce7177dd..a215916f26cf 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -223,6 +223,9 @@  static DEFINE_MUTEX(timer_keys_mutex);
 static void timer_update_keys(struct work_struct *work);
 static DECLARE_WORK(timer_update_work, timer_update_keys);
 
+static unsigned int sysctl_timer_time_limit = 0;
+
+#ifdef CONFIG_SYSCTL
 #ifdef CONFIG_SMP
 static unsigned int sysctl_timer_migration = 1;
 
@@ -236,7 +239,6 @@  static void timers_update_migration(void)
 		static_branch_disable(&timers_migration_enabled);
 }
 
-#ifdef CONFIG_SYSCTL
 static int timer_migration_handler(struct ctl_table *table, int write,
 			    void *buffer, size_t *lenp, loff_t *ppos)
 {
@@ -249,8 +251,12 @@  static int timer_migration_handler(struct ctl_table *table, int write,
 	mutex_unlock(&timer_keys_mutex);
 	return ret;
 }
+#else
+static inline void timers_update_migration(void) { }
+#endif /* !CONFIG_SMP */
 
 static struct ctl_table timer_sysctl[] = {
+#ifdef CONFIG_SMP
 	{
 		.procname	= "timer_migration",
 		.data		= &sysctl_timer_migration,
@@ -260,6 +266,15 @@  static struct ctl_table timer_sysctl[] = {
 		.extra1		= SYSCTL_ZERO,
 		.extra2		= SYSCTL_ONE,
 	},
+#endif
+	{
+		.procname	= "timer_time_limit",
+		.data		= &sysctl_timer_time_limit,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+	},
 	{}
 };
 
@@ -270,9 +285,6 @@  static int __init timer_sysctl_init(void)
 }
 device_initcall(timer_sysctl_init);
 #endif /* CONFIG_SYSCTL */
-#else /* CONFIG_SMP */
-static inline void timers_update_migration(void) { }
-#endif /* !CONFIG_SMP */
 
 static void timer_update_keys(struct work_struct *work)
 {
@@ -1992,7 +2004,7 @@  void timer_clear_idle(void)
  * __run_timers - run all expired timers (if any) on this CPU.
  * @base: the timer vector to be processed.
  */
-static inline void __run_timers(struct timer_base *base)
+static inline void __run_timers(struct timer_base *base, unsigned long time_limit)
 {
 	struct hlist_head heads[LVL_DEPTH];
 	int levels;
@@ -2020,6 +2032,13 @@  static inline void __run_timers(struct timer_base *base)
 
 		while (levels--)
 			expire_timers(base, heads + levels);
+
+		if (unlikely(time_limit &&
+		    time_after_eq(jiffies, time_limit))) {
+			if (time_after_eq(jiffies, base->next_expiry))
+				raise_softirq(TIMER_SOFTIRQ);
+			break;
+		}
 	}
 	raw_spin_unlock_irq(&base->lock);
 	timer_base_unlock_expiry(base);
@@ -2031,10 +2050,14 @@  static inline void __run_timers(struct timer_base *base)
 static __latent_entropy void run_timer_softirq(struct softirq_action *h)
 {
 	struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
+	unsigned long time_limit = 0;
+
+	if (sysctl_timer_time_limit)
+		time_limit = jiffies + msecs_to_jiffies(sysctl_timer_time_limit);
 
-	__run_timers(base);
+	__run_timers(base, time_limit);
 	if (IS_ENABLED(CONFIG_NO_HZ_COMMON))
-		__run_timers(this_cpu_ptr(&timer_bases[BASE_DEF]));
+		__run_timers(this_cpu_ptr(&timer_bases[BASE_DEF]), time_limit);
 }
 
 /*

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [Question] softlockup in run_timer_softirq
  2023-02-10  9:50 [Question] softlockup in run_timer_softirq liujian (CE)
@ 2023-02-13 20:00 ` John Stultz
  2023-02-15  8:34   ` liujian (CE)
  2023-03-03 10:55   ` liujian (CE)
  0 siblings, 2 replies; 9+ messages in thread
From: John Stultz @ 2023-02-13 20:00 UTC (permalink / raw)
  To: liujian (CE); +Cc: tglx, sboyd, linux-kernel, peterz, Paul E. McKenney

On Fri, Feb 10, 2023 at 1:51 AM liujian (CE) <liujian56@huawei.com> wrote:
>
> During the syz test, we encountered many problems with various timer handler
> functions softlockup.
>
> We analyze __run_timers() and find the following problem.
>
> In the while loop of __run_timers(), because there are too many timers or
> improper timer handler functions, if the processing time of the expired
> timers is always greater than the time wheel's next_expiry, the function
> will loop infinitely.
>
> The following extreme test case can be used to reproduce the problem.
> An extreme test case[1] is constructed to reproduce the problem.

Thanks for reporting and sending out this data:

First, any chance you might submit this as a in-kernel-stress test?
Maybe utilizing the kernel/torture.c framework?

(Though the test may need to occasionally take a break so the system
can eventually catch up)

> Is this a problem or an unreasonable use?
>
> Can we limit the running time of __run_timers() [2]?
>
> Does anyone have a good idea to solve this problem?

So your patch reminds me of Peter's softirq_needs_break() logic:
  https://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git/log/?h=core/softirq

Maybe it could extend that series for the timer softirq as well?

thanks
-john

^ permalink raw reply	[flat|nested] 9+ messages in thread

* RE: [Question] softlockup in run_timer_softirq
  2023-02-13 20:00 ` John Stultz
@ 2023-02-15  8:34   ` liujian (CE)
  2023-05-02  3:06     ` John Stultz
  2023-03-03 10:55   ` liujian (CE)
  1 sibling, 1 reply; 9+ messages in thread
From: liujian (CE) @ 2023-02-15  8:34 UTC (permalink / raw)
  To: John Stultz; +Cc: tglx, sboyd, linux-kernel, peterz, Paul E. McKenney



> -----Original Message-----
> From: John Stultz [mailto:jstultz@google.com]
> Sent: Tuesday, February 14, 2023 4:01 AM
> To: liujian (CE) <liujian56@huawei.com>
> Cc: tglx@linutronix.de; sboyd@kernel.org; linux-kernel@vger.kernel.org;
> peterz@infradead.org; Paul E. McKenney <paulmck@kernel.org>
> Subject: Re: [Question] softlockup in run_timer_softirq
> 
> On Fri, Feb 10, 2023 at 1:51 AM liujian (CE) <liujian56@huawei.com> wrote:
> >
> > During the syz test, we encountered many problems with various timer
> > handler functions softlockup.
> >
> > We analyze __run_timers() and find the following problem.
> >
> > In the while loop of __run_timers(), because there are too many timers
> > or improper timer handler functions, if the processing time of the
> > expired timers is always greater than the time wheel's next_expiry,
> > the function will loop infinitely.
> >
> > The following extreme test case can be used to reproduce the problem.
> > An extreme test case[1] is constructed to reproduce the problem.
> 
> Thanks for reporting and sending out this data:
> 
> First, any chance you might submit this as a in-kernel-stress test?
> Maybe utilizing the kernel/torture.c framework?
> 
Okay,   I'll learn this framework and do this thing.
> (Though the test may need to occasionally take a break so the system can
> eventually catch up)
> 
> > Is this a problem or an unreasonable use?
> >
> > Can we limit the running time of __run_timers() [2]?
> >
> > Does anyone have a good idea to solve this problem?
> 
> So your patch reminds me of Peter's softirq_needs_break() logic:
> 
> https://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git/log/?h=co
> re/softirq
> 
> Maybe it could extend that series for the timer softirq as well?
> 
Thank you. Yes.
Base on the patchset and the extended patch for timer [1], the soft lockup problem does not occur.

By the way, I see this is a very old patchset?  Will this patchset push the main line? @John @Peter


 [1]
Author: Liu Jian <liujian56@huawei.com>
Date:   Tue Feb 14 09:53:46 2023 +0800

    softirq, timer: Use softirq_needs_break()
    
    In the while loop of __run_timers(), because there are too many timers or
    improper timer handler functions, if the processing time of the expired
    timers is always greater than the time wheel's next_expiry, the function
    will loop infinitely.
    
    To prevent this, use the timeout/break logic provided by SoftIRQs.If the
    running time exceeds the limit, break the loop and an additional
    TIMER_SOFTIRQ is triggered.
    
    Signed-off-by: Liu Jian <liujian56@huawei.com>

diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 63a8ce7177dd..70744a469a39 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -1992,7 +1992,7 @@ void timer_clear_idle(void)
  * __run_timers - run all expired timers (if any) on this CPU.
  * @base: the timer vector to be processed.
  */
-static inline void __run_timers(struct timer_base *base)
+static inline void __run_timers(struct timer_base *base, struct softirq_action *h)
 {
        struct hlist_head heads[LVL_DEPTH];
        int levels;
@@ -2020,6 +2020,12 @@ static inline void __run_timers(struct timer_base *base)
 
                while (levels--)
                        expire_timers(base, heads + levels);
+
+               if (softirq_needs_break(h)) {
+                       if (time_after_eq(jiffies, base->next_expiry))
+                               __raise_softirq_irqoff(TIMER_SOFTIRQ);
+                       break;
+               }
        }
        raw_spin_unlock_irq(&base->lock);
        timer_base_unlock_expiry(base);
@@ -2032,9 +2038,9 @@ static __latent_entropy void run_timer_softirq(struct softirq_action *h)
 {
        struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
 
-       __run_timers(base);
+       __run_timers(base, h);
        if (IS_ENABLED(CONFIG_NO_HZ_COMMON))
-               __run_timers(this_cpu_ptr(&timer_bases[BASE_DEF]));
+               __run_timers(this_cpu_ptr(&timer_bases[BASE_DEF]), h);
 }
 
 /*
> thanks
> -john


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* RE: [Question] softlockup in run_timer_softirq
  2023-02-13 20:00 ` John Stultz
  2023-02-15  8:34   ` liujian (CE)
@ 2023-03-03 10:55   ` liujian (CE)
  2023-03-09  7:04     ` John Stultz
  1 sibling, 1 reply; 9+ messages in thread
From: liujian (CE) @ 2023-03-03 10:55 UTC (permalink / raw)
  To: John Stultz; +Cc: tglx, sboyd, linux-kernel, peterz, Paul E. McKenney



> -----Original Message-----
> From: liujian (CE)
> Sent: Wednesday, February 15, 2023 4:34 PM
> To: 'John Stultz' <jstultz@google.com>
> Cc: tglx@linutronix.de; sboyd@kernel.org; linux-kernel@vger.kernel.org;
> peterz@infradead.org; Paul E. McKenney <paulmck@kernel.org>
> Subject: RE: [Question] softlockup in run_timer_softirq
> 
> 
> 
> > -----Original Message-----
> > From: John Stultz [mailto:jstultz@google.com]
> > Sent: Tuesday, February 14, 2023 4:01 AM
> > To: liujian (CE) <liujian56@huawei.com>
> > Cc: tglx@linutronix.de; sboyd@kernel.org;
> > linux-kernel@vger.kernel.org; peterz@infradead.org; Paul E. McKenney
> > <paulmck@kernel.org>
> > Subject: Re: [Question] softlockup in run_timer_softirq
> >
> > On Fri, Feb 10, 2023 at 1:51 AM liujian (CE) <liujian56@huawei.com> wrote:
> > >
> > > During the syz test, we encountered many problems with various timer
> > > handler functions softlockup.
> > >
> > > We analyze __run_timers() and find the following problem.
> > >
> > > In the while loop of __run_timers(), because there are too many
> > > timers or improper timer handler functions, if the processing time
> > > of the expired timers is always greater than the time wheel's
> > > next_expiry, the function will loop infinitely.
> > >
> > > The following extreme test case can be used to reproduce the problem.
> > > An extreme test case[1] is constructed to reproduce the problem.
> >
> > Thanks for reporting and sending out this data:
> >
> > First, any chance you might submit this as a in-kernel-stress test?
> > Maybe utilizing the kernel/torture.c framework?
> >
> Okay,   I'll learn this framework and do this thing.
> > (Though the test may need to occasionally take a break so the system
> > can eventually catch up)
> >
> > > Is this a problem or an unreasonable use?
> > >
> > > Can we limit the running time of __run_timers() [2]?
> > >
> > > Does anyone have a good idea to solve this problem?
> >
> > So your patch reminds me of Peter's softirq_needs_break() logic:
> >
> > https://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git/log/?
> > h=co
> > re/softirq
> >
> > Maybe it could extend that series for the timer softirq as well?
> >
> Thank you. Yes.
> Base on the patchset and the extended patch for timer [1], the soft lockup
> problem does not occur.
> 
> By the way, I see this is a very old patchset?  Will this patchset push the main
> line? @John @Peter
> 
Hi, peter,
Do you have an upstream plan for this patchset? Or other ideas.
I want to use softirq_needs_break() to limit the runtime of timer soft interrupt handler function, wonder if this is appropriate?
Thank you~
> 
>  [1]
> Author: Liu Jian <liujian56@huawei.com>
> Date:   Tue Feb 14 09:53:46 2023 +0800
> 
>     softirq, timer: Use softirq_needs_break()
> 
>     In the while loop of __run_timers(), because there are too many timers or
>     improper timer handler functions, if the processing time of the expired
>     timers is always greater than the time wheel's next_expiry, the function
>     will loop infinitely.
> 
>     To prevent this, use the timeout/break logic provided by SoftIRQs.If the
>     running time exceeds the limit, break the loop and an additional
>     TIMER_SOFTIRQ is triggered.
> 
>     Signed-off-by: Liu Jian <liujian56@huawei.com>
> 
> diff --git a/kernel/time/timer.c b/kernel/time/timer.c index
> 63a8ce7177dd..70744a469a39 100644
> --- a/kernel/time/timer.c
> +++ b/kernel/time/timer.c
> @@ -1992,7 +1992,7 @@ void timer_clear_idle(void)
>   * __run_timers - run all expired timers (if any) on this CPU.
>   * @base: the timer vector to be processed.
>   */
> -static inline void __run_timers(struct timer_base *base)
> +static inline void __run_timers(struct timer_base *base, struct
> +softirq_action *h)
>  {
>         struct hlist_head heads[LVL_DEPTH];
>         int levels;
> @@ -2020,6 +2020,12 @@ static inline void __run_timers(struct timer_base
> *base)
> 
>                 while (levels--)
>                         expire_timers(base, heads + levels);
> +
> +               if (softirq_needs_break(h)) {
> +                       if (time_after_eq(jiffies, base->next_expiry))
> +                               __raise_softirq_irqoff(TIMER_SOFTIRQ);
> +                       break;
> +               }
>         }
>         raw_spin_unlock_irq(&base->lock);
>         timer_base_unlock_expiry(base);
> @@ -2032,9 +2038,9 @@ static __latent_entropy void
> run_timer_softirq(struct softirq_action *h)  {
>         struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
> 
> -       __run_timers(base);
> +       __run_timers(base, h);
>         if (IS_ENABLED(CONFIG_NO_HZ_COMMON))
> -               __run_timers(this_cpu_ptr(&timer_bases[BASE_DEF]));
> +               __run_timers(this_cpu_ptr(&timer_bases[BASE_DEF]), h);
>  }
> 
>  /*
> > thanks
> > -john


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Question] softlockup in run_timer_softirq
  2023-03-03 10:55   ` liujian (CE)
@ 2023-03-09  7:04     ` John Stultz
  0 siblings, 0 replies; 9+ messages in thread
From: John Stultz @ 2023-03-09  7:04 UTC (permalink / raw)
  To: liujian (CE); +Cc: tglx, sboyd, linux-kernel, peterz, Paul E. McKenney

On Fri, Mar 3, 2023 at 2:55 AM liujian (CE) <liujian56@huawei.com> wrote:
> From: liujian (CE)
> > From: John Stultz [mailto:jstultz@google.com]
> > > On Fri, Feb 10, 2023 at 1:51 AM liujian (CE) <liujian56@huawei.com> wrote:
> > > > Can we limit the running time of __run_timers() [2]?
> > > >
> > > > Does anyone have a good idea to solve this problem?
> > >
> > > So your patch reminds me of Peter's softirq_needs_break() logic:
> > >
> > > https://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git/log/?
> > > h=co
> > > re/softirq
> > >
> > > Maybe it could extend that series for the timer softirq as well?
> > >
> > Thank you. Yes.
> > Base on the patchset and the extended patch for timer [1], the soft lockup
> > problem does not occur.
> >
> > By the way, I see this is a very old patchset?  Will this patchset push the main
> > line? @John @Peter
> >
> Hi, peter,
> Do you have an upstream plan for this patchset? Or other ideas.
> I want to use softirq_needs_break() to limit the runtime of timer soft interrupt handler function, wonder if this is appropriate?

My understanding is that the series was proposed but maybe caused some
regressions for the networking softirqs it was initially targeting, so
it's been stalled out.

However, if you utilize the logic to help with the timer softirq
first, that might help land the logic, and then the networking folks
can try to adapt and slowly sort out the regressions.
So I'd suggest if you have the patches working for softirqs and
showing benefit, submit them for review.

I personally was looking at the series for the block softirq (which I
got working against a 5.10 kernel), but unfortunately that code has
since been reworked to use lockless lists for its work items, it's
hard to stop if the need_break signal is set, as once we've dequeued
list entries from the list, we have to run them, as we can't easily
re-add them back in the proper list order. :/   Something I need to
re-visit eventually.

thanks
-john

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Question] softlockup in run_timer_softirq
  2023-02-15  8:34   ` liujian (CE)
@ 2023-05-02  3:06     ` John Stultz
  2023-05-04  1:49       ` liujian (CE)
  0 siblings, 1 reply; 9+ messages in thread
From: John Stultz @ 2023-05-02  3:06 UTC (permalink / raw)
  To: liujian (CE), Frank Woo, Rhine Wu
  Cc: tglx, sboyd, linux-kernel, peterz, Paul E. McKenney

On Wed, Feb 15, 2023 at 12:34 AM liujian (CE) <liujian56@huawei.com> wrote:
> > On Fri, Feb 10, 2023 at 1:51 AM liujian (CE) <liujian56@huawei.com> wrote:
> > >
> > > During the syz test, we encountered many problems with various timer
> > > handler functions softlockup.
> > >
> > > We analyze __run_timers() and find the following problem.
> > >
> > > In the while loop of __run_timers(), because there are too many timers
> > > or improper timer handler functions, if the processing time of the
> > > expired timers is always greater than the time wheel's next_expiry,
> > > the function will loop infinitely.
> > >
> > > The following extreme test case can be used to reproduce the problem.
> > > An extreme test case[1] is constructed to reproduce the problem.
> >
> > Thanks for reporting and sending out this data:
> >
> > First, any chance you might submit this as a in-kernel-stress test?
> > Maybe utilizing the kernel/torture.c framework?
> >
> Okay,   I'll learn this framework and do this thing.
> > (Though the test may need to occasionally take a break so the system can
> > eventually catch up)
> >
> > > Is this a problem or an unreasonable use?
> > >
> > > Can we limit the running time of __run_timers() [2]?
> > >
> > > Does anyone have a good idea to solve this problem?
> >
> > So your patch reminds me of Peter's softirq_needs_break() logic:
> >
> > https://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git/log/?h=co
> > re/softirq
> >
> > Maybe it could extend that series for the timer softirq as well?
> >
> Thank you. Yes.
> Base on the patchset and the extended patch for timer [1], the soft lockup problem does not occur.
>
> By the way, I see this is a very old patchset?  Will this patchset push the main line? @John @Peter
>
>
>  [1]
> Author: Liu Jian <liujian56@huawei.com>
> Date:   Tue Feb 14 09:53:46 2023 +0800
>
>     softirq, timer: Use softirq_needs_break()
>
>     In the while loop of __run_timers(), because there are too many timers or
>     improper timer handler functions, if the processing time of the expired
>     timers is always greater than the time wheel's next_expiry, the function
>     will loop infinitely.
>
>     To prevent this, use the timeout/break logic provided by SoftIRQs.If the
>     running time exceeds the limit, break the loop and an additional
>     TIMER_SOFTIRQ is triggered.
>
>     Signed-off-by: Liu Jian <liujian56@huawei.com>
>
> diff --git a/kernel/time/timer.c b/kernel/time/timer.c
> index 63a8ce7177dd..70744a469a39 100644
> --- a/kernel/time/timer.c
> +++ b/kernel/time/timer.c
> @@ -1992,7 +1992,7 @@ void timer_clear_idle(void)
>   * __run_timers - run all expired timers (if any) on this CPU.
>   * @base: the timer vector to be processed.
>   */
> -static inline void __run_timers(struct timer_base *base)
> +static inline void __run_timers(struct timer_base *base, struct softirq_action *h)
>  {
>         struct hlist_head heads[LVL_DEPTH];
>         int levels;
> @@ -2020,6 +2020,12 @@ static inline void __run_timers(struct timer_base *base)
>
>                 while (levels--)
>                         expire_timers(base, heads + levels);
> +
> +               if (softirq_needs_break(h)) {
> +                       if (time_after_eq(jiffies, base->next_expiry))
> +                               __raise_softirq_irqoff(TIMER_SOFTIRQ);
> +                       break;
> +               }
>         }
>         raw_spin_unlock_irq(&base->lock);
>         timer_base_unlock_expiry(base);
> @@ -2032,9 +2038,9 @@ static __latent_entropy void run_timer_softirq(struct softirq_action *h)
>  {
>         struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
>
> -       __run_timers(base);
> +       __run_timers(base, h);
>         if (IS_ENABLED(CONFIG_NO_HZ_COMMON))
> -               __run_timers(this_cpu_ptr(&timer_bases[BASE_DEF]));
> +               __run_timers(this_cpu_ptr(&timer_bases[BASE_DEF]), h);
>  }
>
>  /*

So I wanted to revive this old thread, as Frank Woo mentioned his team
has seen a similar issue as well.

Liujian: I'm curious if you've made any further progress with your
adapted patch ontop of PeterZ's softirq_needs_break patch series?

Might it be worth re-submitting the whole series for consideration upstream?

thanks
-john

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Question] softlockup in run_timer_softirq
  2023-05-02  3:06     ` John Stultz
@ 2023-05-04  1:49       ` liujian (CE)
  2023-05-04  2:59         ` John Stultz
  0 siblings, 1 reply; 9+ messages in thread
From: liujian (CE) @ 2023-05-04  1:49 UTC (permalink / raw)
  To: John Stultz, Frank Woo, Rhine Wu
  Cc: tglx, sboyd, linux-kernel, peterz, Paul E. McKenney



On 2023/5/2 11:06, John Stultz wrote:
> On Wed, Feb 15, 2023 at 12:34 AM liujian (CE) <liujian56@huawei.com> wrote:
>>> On Fri, Feb 10, 2023 at 1:51 AM liujian (CE) <liujian56@huawei.com> wrote:
>>>>
>>>> During the syz test, we encountered many problems with various timer
>>>> handler functions softlockup.
>>>>
>>>> We analyze __run_timers() and find the following problem.
>>>>
>>>> In the while loop of __run_timers(), because there are too many timers
>>>> or improper timer handler functions, if the processing time of the
>>>> expired timers is always greater than the time wheel's next_expiry,
>>>> the function will loop infinitely.
>>>>
>>>> The following extreme test case can be used to reproduce the problem.
>>>> An extreme test case[1] is constructed to reproduce the problem.
>>>
>>> Thanks for reporting and sending out this data:
>>>
>>> First, any chance you might submit this as a in-kernel-stress test?
>>> Maybe utilizing the kernel/torture.c framework?
>>>
>> Okay,   I'll learn this framework and do this thing.
>>> (Though the test may need to occasionally take a break so the system can
>>> eventually catch up)
>>>
>>>> Is this a problem or an unreasonable use?
>>>>
>>>> Can we limit the running time of __run_timers() [2]?
>>>>
>>>> Does anyone have a good idea to solve this problem?
>>>
>>> So your patch reminds me of Peter's softirq_needs_break() logic:
>>>
>>> https://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git/log/?h=co
>>> re/softirq
>>>
>>> Maybe it could extend that series for the timer softirq as well?
>>>
>> Thank you. Yes.
>> Base on the patchset and the extended patch for timer [1], the soft lockup problem does not occur.
>>
>> By the way, I see this is a very old patchset?  Will this patchset push the main line? @John @Peter
>>
>>
>>   [1]
>> Author: Liu Jian <liujian56@huawei.com>
>> Date:   Tue Feb 14 09:53:46 2023 +0800
>>
>>      softirq, timer: Use softirq_needs_break()
>>
>>      In the while loop of __run_timers(), because there are too many timers or
>>      improper timer handler functions, if the processing time of the expired
>>      timers is always greater than the time wheel's next_expiry, the function
>>      will loop infinitely.
>>
>>      To prevent this, use the timeout/break logic provided by SoftIRQs.If the
>>      running time exceeds the limit, break the loop and an additional
>>      TIMER_SOFTIRQ is triggered.
>>
>>      Signed-off-by: Liu Jian <liujian56@huawei.com>
>>
>> diff --git a/kernel/time/timer.c b/kernel/time/timer.c
>> index 63a8ce7177dd..70744a469a39 100644
>> --- a/kernel/time/timer.c
>> +++ b/kernel/time/timer.c
>> @@ -1992,7 +1992,7 @@ void timer_clear_idle(void)
>>    * __run_timers - run all expired timers (if any) on this CPU.
>>    * @base: the timer vector to be processed.
>>    */
>> -static inline void __run_timers(struct timer_base *base)
>> +static inline void __run_timers(struct timer_base *base, struct softirq_action *h)
>>   {
>>          struct hlist_head heads[LVL_DEPTH];
>>          int levels;
>> @@ -2020,6 +2020,12 @@ static inline void __run_timers(struct timer_base *base)
>>
>>                  while (levels--)
>>                          expire_timers(base, heads + levels);
>> +
>> +               if (softirq_needs_break(h)) {
>> +                       if (time_after_eq(jiffies, base->next_expiry))
>> +                               __raise_softirq_irqoff(TIMER_SOFTIRQ);
>> +                       break;
>> +               }
>>          }
>>          raw_spin_unlock_irq(&base->lock);
>>          timer_base_unlock_expiry(base);
>> @@ -2032,9 +2038,9 @@ static __latent_entropy void run_timer_softirq(struct softirq_action *h)
>>   {
>>          struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
>>
>> -       __run_timers(base);
>> +       __run_timers(base, h);
>>          if (IS_ENABLED(CONFIG_NO_HZ_COMMON))
>> -               __run_timers(this_cpu_ptr(&timer_bases[BASE_DEF]));
>> +               __run_timers(this_cpu_ptr(&timer_bases[BASE_DEF]), h);
>>   }
>>
>>   /*
> 
> So I wanted to revive this old thread, as Frank Woo mentioned his team
> has seen a similar issue as well.
> 
> Liujian: I'm curious if you've made any further progress with your
> adapted patch ontop of PeterZ's softirq_needs_break patch series?
> 
Hi John,
   Only the commit ("softirq, timer: Use softirq_needs_break()") is 
added to the patchset of Peter, and no other modification is made.
> Might it be worth re-submitting the whole series for consideration upstream?
> 
I agree very much and expect, because we often encounter similar 
problems when doing fuzzy tests (especially when the test machine is poor).
> thanks
> -john

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Question] softlockup in run_timer_softirq
  2023-05-04  1:49       ` liujian (CE)
@ 2023-05-04  2:59         ` John Stultz
  2023-05-05 11:37           ` liujian (CE)
  0 siblings, 1 reply; 9+ messages in thread
From: John Stultz @ 2023-05-04  2:59 UTC (permalink / raw)
  To: liujian (CE)
  Cc: Frank Woo, Rhine Wu, tglx, sboyd, linux-kernel, peterz, Paul E. McKenney

On Wed, May 3, 2023 at 6:50 PM liujian (CE) <liujian56@huawei.com> wrote:
> On 2023/5/2 11:06, John Stultz wrote:
> > So I wanted to revive this old thread, as Frank Woo mentioned his team
> > has seen a similar issue as well.
> >
> > Liujian: I'm curious if you've made any further progress with your
> > adapted patch ontop of PeterZ's softirq_needs_break patch series?
> >
> Hi John,
>    Only the commit ("softirq, timer: Use softirq_needs_break()") is
> added to the patchset of Peter, and no other modification is made.
> > Might it be worth re-submitting the whole series for consideration upstream?
> >
> I agree very much and expect, because we often encounter similar
> problems when doing fuzzy tests (especially when the test machine is poor).

Ok. Will you submit the series + your patch to the list for review and
consideration then?

Please include Frank and Rhine on CC so they can validate and provide
Tested-by: tags if it works for them as well.

thanks
-john

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Question] softlockup in run_timer_softirq
  2023-05-04  2:59         ` John Stultz
@ 2023-05-05 11:37           ` liujian (CE)
  0 siblings, 0 replies; 9+ messages in thread
From: liujian (CE) @ 2023-05-05 11:37 UTC (permalink / raw)
  To: John Stultz
  Cc: Frank Woo, Rhine Wu, tglx, sboyd, linux-kernel, peterz, Paul E. McKenney



On 2023/5/4 10:59, John Stultz wrote:
> On Wed, May 3, 2023 at 6:50 PM liujian (CE) <liujian56@huawei.com> wrote:
>> On 2023/5/2 11:06, John Stultz wrote:
>>> So I wanted to revive this old thread, as Frank Woo mentioned his team
>>> has seen a similar issue as well.
>>>
>>> Liujian: I'm curious if you've made any further progress with your
>>> adapted patch ontop of PeterZ's softirq_needs_break patch series?
>>>
>> Hi John,
>>     Only the commit ("softirq, timer: Use softirq_needs_break()") is
>> added to the patchset of Peter, and no other modification is made.
>>> Might it be worth re-submitting the whole series for consideration upstream?
>>>
>> I agree very much and expect, because we often encounter similar
>> problems when doing fuzzy tests (especially when the test machine is poor).
> 
> Ok. Will you submit the series + your patch to the list for review and
> consideration then?
> 
The patch[1] has been sent out. Please help review it. Thank you very much.
[1] 
https://lore.kernel.org/lkml/20230505113315.3307723-1-liujian56@huawei.com/

> Please include Frank and Rhine on CC so they can validate and provide
> Tested-by: tags if it works for them as well.
> 
> thanks
> -john

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2023-05-05 11:38 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-02-10  9:50 [Question] softlockup in run_timer_softirq liujian (CE)
2023-02-13 20:00 ` John Stultz
2023-02-15  8:34   ` liujian (CE)
2023-05-02  3:06     ` John Stultz
2023-05-04  1:49       ` liujian (CE)
2023-05-04  2:59         ` John Stultz
2023-05-05 11:37           ` liujian (CE)
2023-03-03 10:55   ` liujian (CE)
2023-03-09  7:04     ` John Stultz

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).