All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] timer: Lazily wakup nohz CPU when adding new timer.
@ 2015-09-28 18:48 Yunhong Jiang
  2015-10-05 20:51 ` Yunhong Jiang
                   ` (2 more replies)
  0 siblings, 3 replies; 12+ messages in thread
From: Yunhong Jiang @ 2015-09-28 18:48 UTC (permalink / raw)
  To: tglx; +Cc: linux-kernel

Currently, when a new timer added to timer wheel for a nohz_active CPU,
the target CPU will always be waked up.

In fact, if the new added timer is after the base->next_timer, we don't
need wake up the target CPU since it will not change the sleep time. A
lazy wake up is better in such scenario.

I cooked a test scenario. On my 32 cores system, a driver on CPU 15
continuous enqueues timer to CPU 8/9/10/11 with random expire and then
checks the idle_calls difference after 10 seconds. Below data shows
that lazy wake up do reduce the wakeup a lot.

		w/o Lazy	w/ lazy
CPU 8:		135		88
CPU 9:		238		43
CPU 10:		157		83
CPU 11:		172		70

Signed-off-by: Yunhong Jiang <yunhong.jiang@linux.intel.com>
---
 kernel/time/timer.c | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index d3f5e92f722a..a039d9e6b55a 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -414,6 +414,8 @@ __internal_add_timer(struct tvec_base *base, struct timer_list *timer)
 
 static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
 {
+	bool kick_nohz = false;
+
 	/* Advance base->jiffies, if the base is empty */
 	if (!base->all_timers++)
 		base->timer_jiffies = jiffies;
@@ -424,9 +426,17 @@ static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
 	 */
 	if (!(timer->flags & TIMER_DEFERRABLE)) {
 		if (!base->active_timers++ ||
-		    time_before(timer->expires, base->next_timer))
+		    time_before(timer->expires, base->next_timer)) {
 			base->next_timer = timer->expires;
-	}
+			/*
+			 * CPU in dynticks need reevaluate the timer wheel
+			 * if newer timer added with next_timer updated.
+			 */
+			if (base->nohz_active)
+				kick_nohz = true;
+		}
+	} else if (base->nohz_active && tick_nohz_full_cpu(base->cpu))
+		kick_nohz = true;
 
 	/*
 	 * Check whether the other CPU is in dynticks mode and needs
@@ -441,11 +451,8 @@ static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
 	 * require special care against races with idle_cpu(), lets deal
 	 * with that later.
 	 */
-	if (base->nohz_active) {
-		if (!(timer->flags & TIMER_DEFERRABLE) ||
-		    tick_nohz_full_cpu(base->cpu))
-			wake_up_nohz_cpu(base->cpu);
-	}
+	if (kick_nohz)
+		wake_up_nohz_cpu(base->cpu);
 }
 
 #ifdef CONFIG_TIMER_STATS
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH] timer: Lazily wakup nohz CPU when adding new timer.
  2015-09-28 18:48 [PATCH] timer: Lazily wakup nohz CPU when adding new timer Yunhong Jiang
@ 2015-10-05 20:51 ` Yunhong Jiang
  2015-10-11 18:12 ` Thomas Gleixner
  2015-11-13 16:13 ` Frederic Weisbecker
  2 siblings, 0 replies; 12+ messages in thread
From: Yunhong Jiang @ 2015-10-05 20:51 UTC (permalink / raw)
  To: tglx; +Cc: linux-kernel

Ping for any response.

Thanks
--jyh

On Mon, Sep 28, 2015 at 11:48:16AM -0700, Yunhong Jiang wrote:
> Currently, when a new timer added to timer wheel for a nohz_active CPU,
> the target CPU will always be waked up.
> 
> In fact, if the new added timer is after the base->next_timer, we don't
> need wake up the target CPU since it will not change the sleep time. A
> lazy wake up is better in such scenario.
> 
> I cooked a test scenario. On my 32 cores system, a driver on CPU 15
> continuous enqueues timer to CPU 8/9/10/11 with random expire and then
> checks the idle_calls difference after 10 seconds. Below data shows
> that lazy wake up do reduce the wakeup a lot.
> 
> 		w/o Lazy	w/ lazy
> CPU 8:		135		88
> CPU 9:		238		43
> CPU 10:		157		83
> CPU 11:		172		70
> 
> Signed-off-by: Yunhong Jiang <yunhong.jiang@linux.intel.com>
> ---
>  kernel/time/timer.c | 21 ++++++++++++++-------
>  1 file changed, 14 insertions(+), 7 deletions(-)
> 
> diff --git a/kernel/time/timer.c b/kernel/time/timer.c
> index d3f5e92f722a..a039d9e6b55a 100644
> --- a/kernel/time/timer.c
> +++ b/kernel/time/timer.c
> @@ -414,6 +414,8 @@ __internal_add_timer(struct tvec_base *base, struct timer_list *timer)
>  
>  static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
>  {
> +	bool kick_nohz = false;
> +
>  	/* Advance base->jiffies, if the base is empty */
>  	if (!base->all_timers++)
>  		base->timer_jiffies = jiffies;
> @@ -424,9 +426,17 @@ static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
>  	 */
>  	if (!(timer->flags & TIMER_DEFERRABLE)) {
>  		if (!base->active_timers++ ||
> -		    time_before(timer->expires, base->next_timer))
> +		    time_before(timer->expires, base->next_timer)) {
>  			base->next_timer = timer->expires;
> -	}
> +			/*
> +			 * CPU in dynticks need reevaluate the timer wheel
> +			 * if newer timer added with next_timer updated.
> +			 */
> +			if (base->nohz_active)
> +				kick_nohz = true;
> +		}
> +	} else if (base->nohz_active && tick_nohz_full_cpu(base->cpu))
> +		kick_nohz = true;
>  
>  	/*
>  	 * Check whether the other CPU is in dynticks mode and needs
> @@ -441,11 +451,8 @@ static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
>  	 * require special care against races with idle_cpu(), lets deal
>  	 * with that later.
>  	 */
> -	if (base->nohz_active) {
> -		if (!(timer->flags & TIMER_DEFERRABLE) ||
> -		    tick_nohz_full_cpu(base->cpu))
> -			wake_up_nohz_cpu(base->cpu);
> -	}
> +	if (kick_nohz)
> +		wake_up_nohz_cpu(base->cpu);
>  }
>  
>  #ifdef CONFIG_TIMER_STATS
> -- 
> 1.8.3.1
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] timer: Lazily wakup nohz CPU when adding new timer.
  2015-09-28 18:48 [PATCH] timer: Lazily wakup nohz CPU when adding new timer Yunhong Jiang
  2015-10-05 20:51 ` Yunhong Jiang
@ 2015-10-11 18:12 ` Thomas Gleixner
  2015-10-20 22:47   ` Yunhong Jiang
  2015-11-13 16:13 ` Frederic Weisbecker
  2 siblings, 1 reply; 12+ messages in thread
From: Thomas Gleixner @ 2015-10-11 18:12 UTC (permalink / raw)
  To: Yunhong Jiang; +Cc: linux-kernel

On Mon, 28 Sep 2015, Yunhong Jiang wrote:
>  static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
>  {
> +	bool kick_nohz = false;
> +
>  	/* Advance base->jiffies, if the base is empty */
>  	if (!base->all_timers++)
>  		base->timer_jiffies = jiffies;
> @@ -424,9 +426,17 @@ static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
>  	 */
>  	if (!(timer->flags & TIMER_DEFERRABLE)) {
>  		if (!base->active_timers++ ||
> -		    time_before(timer->expires, base->next_timer))
> +		    time_before(timer->expires, base->next_timer)) {
>  			base->next_timer = timer->expires;
> -	}
> +			/*
> +			 * CPU in dynticks need reevaluate the timer wheel
> +			 * if newer timer added with next_timer updated.
> +			 */
> +			if (base->nohz_active)
> +				kick_nohz = true;
> +		}
> +	} else if (base->nohz_active && tick_nohz_full_cpu(base->cpu))
> +		kick_nohz = true;

Why do you want to kick the other cpu when a deferrable timer got added?
  
Thanks,

	tglx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] timer: Lazily wakup nohz CPU when adding new timer.
  2015-10-11 18:12 ` Thomas Gleixner
@ 2015-10-20 22:47   ` Yunhong Jiang
  2015-10-21 10:46     ` Viresh Kumar
  0 siblings, 1 reply; 12+ messages in thread
From: Yunhong Jiang @ 2015-10-20 22:47 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: linux-kernel, viresh.kumar

On Sun, Oct 11, 2015 at 08:12:39PM +0200, Thomas Gleixner wrote:
> On Mon, 28 Sep 2015, Yunhong Jiang wrote:
> >  static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
> >  {
> > +	bool kick_nohz = false;
> > +
> >  	/* Advance base->jiffies, if the base is empty */
> >  	if (!base->all_timers++)
> >  		base->timer_jiffies = jiffies;
> > @@ -424,9 +426,17 @@ static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
> >  	 */
> >  	if (!(timer->flags & TIMER_DEFERRABLE)) {
> >  		if (!base->active_timers++ ||
> > -		    time_before(timer->expires, base->next_timer))
> > +		    time_before(timer->expires, base->next_timer)) {
> >  			base->next_timer = timer->expires;
> > -	}
> > +			/*
> > +			 * CPU in dynticks need reevaluate the timer wheel
> > +			 * if newer timer added with next_timer updated.
> > +			 */
> > +			if (base->nohz_active)
> > +				kick_nohz = true;
> > +		}
> > +	} else if (base->nohz_active && tick_nohz_full_cpu(base->cpu))
> > +		kick_nohz = true;
> 
> Why do you want to kick the other cpu when a deferrable timer got added?

This is what happens in current implementation and this patch does not 
change the logic. According to the comments, it's to avoid race with 
idle_cpu(). Frankly speaking, I didn't get the idea of the race.

Viresh, do you have any hints?

Thanks
--jyh

>   
> Thanks,
> 
> 	tglx
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] timer: Lazily wakup nohz CPU when adding new timer.
  2015-10-20 22:47   ` Yunhong Jiang
@ 2015-10-21 10:46     ` Viresh Kumar
  2015-10-22 21:40       ` Yunhong Jiang
  2015-10-27 15:11       ` Frederic Weisbecker
  0 siblings, 2 replies; 12+ messages in thread
From: Viresh Kumar @ 2015-10-21 10:46 UTC (permalink / raw)
  To: Yunhong Jiang, Frederic Weisbecker; +Cc: Thomas Gleixner, linux-kernel

Cc'ing Frederic.

On 20-10-15, 15:47, Yunhong Jiang wrote:
> On Sun, Oct 11, 2015 at 08:12:39PM +0200, Thomas Gleixner wrote:
> > On Mon, 28 Sep 2015, Yunhong Jiang wrote:
> > >  static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
> > >  {
> > > +	bool kick_nohz = false;
> > > +
> > >  	/* Advance base->jiffies, if the base is empty */
> > >  	if (!base->all_timers++)
> > >  		base->timer_jiffies = jiffies;
> > > @@ -424,9 +426,17 @@ static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
> > >  	 */
> > >  	if (!(timer->flags & TIMER_DEFERRABLE)) {
> > >  		if (!base->active_timers++ ||
> > > -		    time_before(timer->expires, base->next_timer))
> > > +		    time_before(timer->expires, base->next_timer)) {
> > >  			base->next_timer = timer->expires;
> > > -	}
> > > +			/*
> > > +			 * CPU in dynticks need reevaluate the timer wheel
> > > +			 * if newer timer added with next_timer updated.
> > > +			 */
> > > +			if (base->nohz_active)
> > > +				kick_nohz = true;
> > > +		}
> > > +	} else if (base->nohz_active && tick_nohz_full_cpu(base->cpu))
> > > +		kick_nohz = true;
> > 
> > Why do you want to kick the other cpu when a deferrable timer got added?
> 
> This is what happens in current implementation and this patch does not 
> change the logic. According to the comments, it's to avoid race with 
> idle_cpu(). Frankly speaking, I didn't get the idea of the race.
> 
> Viresh, do you have any hints?

I haven't looked at the core since few months now and looks like I
don't remember anything :)

This thread is where we discussed it initially:
http://marc.info/?l=linux-kernel&m=139039035809125

AFAIU, this is why we kick the other CPU for a deferrable timer:
- The other CPU is a full-dynticks capable CPU and may be running
  tickless and we should serve the timer in time (even if it is
  deferrable) if the CPU isn't idle.
- We could have saved the kick for a full-dynticks idle CPU, but a
  race can happen where we thought the CPU is idle, but it has just
  started serving userspace tick-lessly. And the timer wouldn't be
  served for long time, even when the cpu was busy.

Ofcourse, Frederic will kick me if I forgot the lessons he gave me
earlier :)

-- 
viresh

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] timer: Lazily wakup nohz CPU when adding new timer.
  2015-10-21 10:46     ` Viresh Kumar
@ 2015-10-22 21:40       ` Yunhong Jiang
  2015-10-23  2:19         ` Viresh Kumar
  2015-10-27 15:11       ` Frederic Weisbecker
  1 sibling, 1 reply; 12+ messages in thread
From: Yunhong Jiang @ 2015-10-22 21:40 UTC (permalink / raw)
  To: Viresh Kumar; +Cc: Frederic Weisbecker, Thomas Gleixner, linux-kernel

On Wed, Oct 21, 2015 at 04:16:31PM +0530, Viresh Kumar wrote:
> Cc'ing Frederic.
> 
> On 20-10-15, 15:47, Yunhong Jiang wrote:
> > On Sun, Oct 11, 2015 at 08:12:39PM +0200, Thomas Gleixner wrote:
> > > On Mon, 28 Sep 2015, Yunhong Jiang wrote:
> > > >  static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
> > > >  {
> > > > +	bool kick_nohz = false;
> > > > +
> > > >  	/* Advance base->jiffies, if the base is empty */
> > > >  	if (!base->all_timers++)
> > > >  		base->timer_jiffies = jiffies;
> > > > @@ -424,9 +426,17 @@ static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
> > > >  	 */
> > > >  	if (!(timer->flags & TIMER_DEFERRABLE)) {
> > > >  		if (!base->active_timers++ ||
> > > > -		    time_before(timer->expires, base->next_timer))
> > > > +		    time_before(timer->expires, base->next_timer)) {
> > > >  			base->next_timer = timer->expires;
> > > > -	}
> > > > +			/*
> > > > +			 * CPU in dynticks need reevaluate the timer wheel
> > > > +			 * if newer timer added with next_timer updated.
> > > > +			 */
> > > > +			if (base->nohz_active)
> > > > +				kick_nohz = true;
> > > > +		}
> > > > +	} else if (base->nohz_active && tick_nohz_full_cpu(base->cpu))
> > > > +		kick_nohz = true;
> > > 
> > > Why do you want to kick the other cpu when a deferrable timer got added?
> > 
> > This is what happens in current implementation and this patch does not 
> > change the logic. According to the comments, it's to avoid race with 
> > idle_cpu(). Frankly speaking, I didn't get the idea of the race.
> > 
> > Viresh, do you have any hints?
> 
> I haven't looked at the core since few months now and looks like I
> don't remember anything :)
> 
> This thread is where we discussed it initially:
> http://marc.info/?l=linux-kernel&m=139039035809125
> 

Viresh, thanks for the link, it's helpful.

> AFAIU, this is why we kick the other CPU for a deferrable timer:
> - The other CPU is a full-dynticks capable CPU and may be running
>   tickless and we should serve the timer in time (even if it is
>   deferrable) if the CPU isn't idle.
> - We could have saved the kick for a full-dynticks idle CPU, but a
>   race can happen where we thought the CPU is idle, but it has just
>   started serving userspace tick-lessly. And the timer wouldn't be
>   served for long time, even when the cpu was busy.

Thanks for explaination. Frederic's reply on that thread 
(http://marc.info/?l=linux-pm&m=139048414803209&w=2) also gives clear 
information.

A naive question is, why it's sure a tick will happen when the tickless 
processor is in idle? Is it because scheduler load balance is sure to send a 
tick to the processor in future?

Thanks
--jyh

> 
> Ofcourse, Frederic will kick me if I forgot the lessons he gave me
> earlier :)
> 
> -- 
> viresh
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] timer: Lazily wakup nohz CPU when adding new timer.
  2015-10-22 21:40       ` Yunhong Jiang
@ 2015-10-23  2:19         ` Viresh Kumar
  2015-10-23 22:10           ` Yunhong Jiang
  0 siblings, 1 reply; 12+ messages in thread
From: Viresh Kumar @ 2015-10-23  2:19 UTC (permalink / raw)
  To: Yunhong Jiang; +Cc: Frederic Weisbecker, Thomas Gleixner, linux-kernel

On 22-10-15, 14:40, Yunhong Jiang wrote:
> A naive question is, why it's sure a tick will happen when the tickless 
> processor is in idle?

How do you get this impression? I don't think anyone has said that.

We are talking about deferrable timers, which by design are only
required if the target CPU is not-idle. If it is idle, then the timer
isn't required to be serviced until the CPU wakes up. And the CPU can
take whatever time it wants to wake up again.

> Is it because scheduler load balance is sure to send a 
> tick to the processor in future?

No. We aren't expecting the CPU to wake up any time soon. Just ignore
the deferrable timer.

-- 
viresh

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] timer: Lazily wakup nohz CPU when adding new timer.
  2015-10-23  2:19         ` Viresh Kumar
@ 2015-10-23 22:10           ` Yunhong Jiang
  2015-10-24  3:20             ` Viresh Kumar
  0 siblings, 1 reply; 12+ messages in thread
From: Yunhong Jiang @ 2015-10-23 22:10 UTC (permalink / raw)
  To: Viresh Kumar; +Cc: Frederic Weisbecker, Thomas Gleixner, linux-kernel

On Fri, Oct 23, 2015 at 07:49:51AM +0530, Viresh Kumar wrote:
> On 22-10-15, 14:40, Yunhong Jiang wrote:
> > A naive question is, why it's sure a tick will happen when the tickless 
> > processor is in idle?
> 
> How do you get this impression? I don't think anyone has said that.

Viresh, thanks for your reply for my question.

I got this impression from Frederic's comments on 
http://marc.info/?l=linux-kernel&m=139048415303210&w=2, "So you simply rely 
on the next tick to see the new timer. This should work with 
CONFIG_NO_HZ_IDLE but not with CONFIG_NO_HZ_FULL since the target may be 
running without the tick".
Per my understanding of this comment, it means we can rely on the next tick 
for CONFIG_NO_HZ_IDLE, which means it's sure a tick will happen for 
CONFIG_NO_HZ_IDLE, am I right?

> 
> We are talking about deferrable timers, which by design are only
> required if the target CPU is not-idle. If it is idle, then the timer
> isn't required to be serviced until the CPU wakes up. And the CPU can
> take whatever time it wants to wake up again.

Hmm, per http://lxr.free-electrons.com/source/include/linux/timer.h#L51, the 
deferreable timer will be serviced when the CPU eventually wakes up "with a 
subsequent non-deferrable timer". If there is no non-deferrable timer, based 
on Frederic's comments, we in fact depends on next tick.

My confusion is, why we are sure there is next tick on CONFIG_NO_HZ_IDLE 
idle processor to wake it up. If there is no tick, and no other timer, will 
the timer get no chance to be waken up at all? I don't think "deferred for 
ever" is deferreable.

Thanks
-jyh

> 
> > Is it because scheduler load balance is sure to send a 
> > tick to the processor in future?
> 
> No. We aren't expecting the CPU to wake up any time soon. Just ignore
> the deferrable timer.
> 
> -- 
> viresh

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] timer: Lazily wakup nohz CPU when adding new timer.
  2015-10-23 22:10           ` Yunhong Jiang
@ 2015-10-24  3:20             ` Viresh Kumar
  2015-10-26 16:26               ` Yunhong Jiang
  0 siblings, 1 reply; 12+ messages in thread
From: Viresh Kumar @ 2015-10-24  3:20 UTC (permalink / raw)
  To: Yunhong Jiang; +Cc: Frederic Weisbecker, Thomas Gleixner, linux-kernel

On 23-10-15, 15:10, Yunhong Jiang wrote:
> I got this impression from Frederic's comments on 
> http://marc.info/?l=linux-kernel&m=139048415303210&w=2, "So you simply rely 
> on the next tick to see the new timer. This should work with 
> CONFIG_NO_HZ_IDLE but not with CONFIG_NO_HZ_FULL since the target may be 
> running without the tick".
> Per my understanding of this comment, it means we can rely on the next tick 
> for CONFIG_NO_HZ_IDLE, which means it's sure a tick will happen for 
> CONFIG_NO_HZ_IDLE, am I right?

Yeah, the CPU wouldn't like in idle for ever but the time is not known
and it can be really really long.

> Hmm, per http://lxr.free-electrons.com/source/include/linux/timer.h#L51, the 
> deferreable timer will be serviced when the CPU eventually wakes up "with a 
> subsequent non-deferrable timer".

It will be an IPI mostly..

> If there is no non-deferrable timer, based 
> on Frederic's comments, we in fact depends on next tick.

So, the cpu will wake up when it receives an IPI. The first thing we
do then is to restart the tick and we will then service all the
pending deferred timers.

> My confusion is, why we are sure there is next tick on CONFIG_NO_HZ_IDLE 
> idle processor to wake it up. If there is no tick, and no other timer, will 
> the timer get no chance to be waken up at all? I don't think "deferred for 
> ever" is deferreable.

There are many kind of works we may want to do. If its really
important to be done earlier, then it should be serviced with a timer.

deferred timers are better used for activities, which are irrelevant
once the CPU is idle. One case is doing some per-cpu load tracking for
cpufreq governors or the work that vmstat does.

Even if the CPU wakes up after few hours (hypothetically), it
shouldn't matter.

-- 
viresh

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] timer: Lazily wakup nohz CPU when adding new timer.
  2015-10-24  3:20             ` Viresh Kumar
@ 2015-10-26 16:26               ` Yunhong Jiang
  0 siblings, 0 replies; 12+ messages in thread
From: Yunhong Jiang @ 2015-10-26 16:26 UTC (permalink / raw)
  To: Viresh Kumar; +Cc: Frederic Weisbecker, Thomas Gleixner, linux-kernel

On Sat, Oct 24, 2015 at 08:50:54AM +0530, Viresh Kumar wrote:
> On 23-10-15, 15:10, Yunhong Jiang wrote:
> > I got this impression from Frederic's comments on 
> > http://marc.info/?l=linux-kernel&m=139048415303210&w=2, "So you simply rely 
> > on the next tick to see the new timer. This should work with 
> > CONFIG_NO_HZ_IDLE but not with CONFIG_NO_HZ_FULL since the target may be 
> > running without the tick".
> > Per my understanding of this comment, it means we can rely on the next tick 
> > for CONFIG_NO_HZ_IDLE, which means it's sure a tick will happen for 
> > CONFIG_NO_HZ_IDLE, am I right?
> 
> Yeah, the CPU wouldn't like in idle for ever but the time is not known
> and it can be really really long.
> 
> > Hmm, per http://lxr.free-electrons.com/source/include/linux/timer.h#L51, the 
> > deferreable timer will be serviced when the CPU eventually wakes up "with a 
> > subsequent non-deferrable timer".
> 
> It will be an IPI mostly..
> 
> > If there is no non-deferrable timer, based 
> > on Frederic's comments, we in fact depends on next tick.
> 
> So, the cpu will wake up when it receives an IPI. The first thing we
> do then is to restart the tick and we will then service all the
> pending deferred timers.
> 
> > My confusion is, why we are sure there is next tick on CONFIG_NO_HZ_IDLE 
> > idle processor to wake it up. If there is no tick, and no other timer, will 
> > the timer get no chance to be waken up at all? I don't think "deferred for 
> > ever" is deferreable.
> 
> There are many kind of works we may want to do. If its really
> important to be done earlier, then it should be serviced with a timer.
> 
> deferred timers are better used for activities, which are irrelevant
> once the CPU is idle. One case is doing some per-cpu load tracking for
> cpufreq governors or the work that vmstat does.
> 
> Even if the CPU wakes up after few hours (hypothetically), it
> shouldn't matter.

Viresh, thanks for the clarification.

So seems the original patch is correct to wakeup the full dyntick CPU even 
for deferred timer. Thomas/Fred, your idea?

Thanks
--jyh
> 
> -- 
> viresh
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] timer: Lazily wakup nohz CPU when adding new timer.
  2015-10-21 10:46     ` Viresh Kumar
  2015-10-22 21:40       ` Yunhong Jiang
@ 2015-10-27 15:11       ` Frederic Weisbecker
  1 sibling, 0 replies; 12+ messages in thread
From: Frederic Weisbecker @ 2015-10-27 15:11 UTC (permalink / raw)
  To: Viresh Kumar; +Cc: Yunhong Jiang, Thomas Gleixner, linux-kernel

On Wed, Oct 21, 2015 at 04:16:31PM +0530, Viresh Kumar wrote:
> Cc'ing Frederic.
> 
> On 20-10-15, 15:47, Yunhong Jiang wrote:
> > On Sun, Oct 11, 2015 at 08:12:39PM +0200, Thomas Gleixner wrote:
> > > On Mon, 28 Sep 2015, Yunhong Jiang wrote:
> > > >  static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
> > > >  {
> > > > +	bool kick_nohz = false;
> > > > +
> > > >  	/* Advance base->jiffies, if the base is empty */
> > > >  	if (!base->all_timers++)
> > > >  		base->timer_jiffies = jiffies;
> > > > @@ -424,9 +426,17 @@ static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
> > > >  	 */
> > > >  	if (!(timer->flags & TIMER_DEFERRABLE)) {
> > > >  		if (!base->active_timers++ ||
> > > > -		    time_before(timer->expires, base->next_timer))
> > > > +		    time_before(timer->expires, base->next_timer)) {
> > > >  			base->next_timer = timer->expires;
> > > > -	}
> > > > +			/*
> > > > +			 * CPU in dynticks need reevaluate the timer wheel
> > > > +			 * if newer timer added with next_timer updated.
> > > > +			 */
> > > > +			if (base->nohz_active)
> > > > +				kick_nohz = true;
> > > > +		}
> > > > +	} else if (base->nohz_active && tick_nohz_full_cpu(base->cpu))
> > > > +		kick_nohz = true;
> > > 
> > > Why do you want to kick the other cpu when a deferrable timer got added?
> > 
> > This is what happens in current implementation and this patch does not 
> > change the logic. According to the comments, it's to avoid race with 
> > idle_cpu(). Frankly speaking, I didn't get the idea of the race.
> > 
> > Viresh, do you have any hints?
> 
> I haven't looked at the core since few months now and looks like I
> don't remember anything :)
> 
> This thread is where we discussed it initially:
> http://marc.info/?l=linux-kernel&m=139039035809125
> 
> AFAIU, this is why we kick the other CPU for a deferrable timer:
> - The other CPU is a full-dynticks capable CPU and may be running
>   tickless and we should serve the timer in time (even if it is
>   deferrable) if the CPU isn't idle.
> - We could have saved the kick for a full-dynticks idle CPU, but a
>   race can happen where we thought the CPU is idle, but it has just
>   started serving userspace tick-lessly. And the timer wouldn't be
>   served for long time, even when the cpu was busy.

Yeah, deferrable implies "idle deferrable", not "user deferrable". So if
the CPU is tickless in userland, we want to kick it to handle the
deferrable timer.

This is further optimizable by making sure that we don't kick nohz full
CPUs when they are idle as well.

That said the handling of deferrable timers is buggy in full dynticks because
get_next_timer_interrupt() ignores deferrable timers always. I should
pass an "ignore_deferrable" parameter for non-idle dynticks but I'm afraid
this will uncover timers that people don't want to see. But we'll have to
do it eventually and maybe audit which of these deferrable timers also implies
deferrable in userspace.

> 
> Ofcourse, Frederic will kick me if I forgot the lessons he gave me
> earlier :)

Oh I know too much how easy it is to forget what $CODE_I_REVIEWED_X_MONTH_AGO
actually does ;-)

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] timer: Lazily wakup nohz CPU when adding new timer.
  2015-09-28 18:48 [PATCH] timer: Lazily wakup nohz CPU when adding new timer Yunhong Jiang
  2015-10-05 20:51 ` Yunhong Jiang
  2015-10-11 18:12 ` Thomas Gleixner
@ 2015-11-13 16:13 ` Frederic Weisbecker
  2 siblings, 0 replies; 12+ messages in thread
From: Frederic Weisbecker @ 2015-11-13 16:13 UTC (permalink / raw)
  To: Yunhong Jiang; +Cc: tglx, linux-kernel

On Mon, Sep 28, 2015 at 11:48:16AM -0700, Yunhong Jiang wrote:
> Currently, when a new timer added to timer wheel for a nohz_active CPU,
> the target CPU will always be waked up.
> 
> In fact, if the new added timer is after the base->next_timer, we don't
> need wake up the target CPU since it will not change the sleep time. A
> lazy wake up is better in such scenario.
> 
> I cooked a test scenario. On my 32 cores system, a driver on CPU 15
> continuous enqueues timer to CPU 8/9/10/11 with random expire and then
> checks the idle_calls difference after 10 seconds. Below data shows
> that lazy wake up do reduce the wakeup a lot.
> 
> 		w/o Lazy	w/ lazy
> CPU 8:		135		88
> CPU 9:		238		43
> CPU 10:		157		83
> CPU 11:		172		70
> 
> Signed-off-by: Yunhong Jiang <yunhong.jiang@linux.intel.com>
> ---
>  kernel/time/timer.c | 21 ++++++++++++++-------
>  1 file changed, 14 insertions(+), 7 deletions(-)
> 
> diff --git a/kernel/time/timer.c b/kernel/time/timer.c
> index d3f5e92f722a..a039d9e6b55a 100644
> --- a/kernel/time/timer.c
> +++ b/kernel/time/timer.c
> @@ -414,6 +414,8 @@ __internal_add_timer(struct tvec_base *base, struct timer_list *timer)
>  
>  static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
>  {
> +	bool kick_nohz = false;
> +
>  	/* Advance base->jiffies, if the base is empty */
>  	if (!base->all_timers++)
>  		base->timer_jiffies = jiffies;
> @@ -424,9 +426,17 @@ static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
>  	 */
>  	if (!(timer->flags & TIMER_DEFERRABLE)) {
>  		if (!base->active_timers++ ||
> -		    time_before(timer->expires, base->next_timer))
> +		    time_before(timer->expires, base->next_timer)) {
>  			base->next_timer = timer->expires;
> -	}
> +			/*
> +			 * CPU in dynticks need reevaluate the timer wheel
> +			 * if newer timer added with next_timer updated.
> +			 */
> +			if (base->nohz_active)
> +				kick_nohz = true;
> +		}
> +	} else if (base->nohz_active && tick_nohz_full_cpu(base->cpu))
> +		kick_nohz = true;
>  
>  	/*
>  	 * Check whether the other CPU is in dynticks mode and needs
> @@ -441,11 +451,8 @@ static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
>  	 * require special care against races with idle_cpu(), lets deal
>  	 * with that later.
>  	 */
> -	if (base->nohz_active) {
> -		if (!(timer->flags & TIMER_DEFERRABLE) ||
> -		    tick_nohz_full_cpu(base->cpu))
> -			wake_up_nohz_cpu(base->cpu);
> -	}
> +	if (kick_nohz)
> +		wake_up_nohz_cpu(base->cpu);
>  }


This patch makes sense. Thomas?

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2015-11-13 16:13 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-09-28 18:48 [PATCH] timer: Lazily wakup nohz CPU when adding new timer Yunhong Jiang
2015-10-05 20:51 ` Yunhong Jiang
2015-10-11 18:12 ` Thomas Gleixner
2015-10-20 22:47   ` Yunhong Jiang
2015-10-21 10:46     ` Viresh Kumar
2015-10-22 21:40       ` Yunhong Jiang
2015-10-23  2:19         ` Viresh Kumar
2015-10-23 22:10           ` Yunhong Jiang
2015-10-24  3:20             ` Viresh Kumar
2015-10-26 16:26               ` Yunhong Jiang
2015-10-27 15:11       ` Frederic Weisbecker
2015-11-13 16:13 ` Frederic Weisbecker

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.