linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] sched/core: Cache timer busy housekeeping target
@ 2019-07-01 12:24 Wanpeng Li
  2019-07-08  0:44 ` Wanpeng Li
  2019-07-09  1:39 ` Frederic Weisbecker
  0 siblings, 2 replies; 4+ messages in thread
From: Wanpeng Li @ 2019-07-01 12:24 UTC (permalink / raw)
  To: linux-kernel
  Cc: Ingo Molnar, Peter Zijlstra, Ingo Molnar, Frederic Weisbecker,
	Thomas Gleixner

From: Wanpeng Li <wanpengli@tencent.com>

Cache the busy housekeeping target for timer instead of researching each time.
This patch reduces the total time of get_nohz_timer_target() for busy housekeeping 
CPU from 2u~3us to less than 1us which can be observed by ftrace.

Cc: Ingo Molnar <mingo@redhat.com> 
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Frederic Weisbecker <frederic@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
---
 include/linux/hrtimer.h    | 1 +
 include/linux/sched/nohz.h | 2 +-
 kernel/sched/core.c        | 6 +++++-
 kernel/time/hrtimer.c      | 6 ++++--
 kernel/time/timer.c        | 7 +++++--
 5 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 2e8957e..0d8b271 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -198,6 +198,7 @@ enum  hrtimer_base_type {
 struct hrtimer_cpu_base {
 	raw_spinlock_t			lock;
 	unsigned int			cpu;
+	unsigned int			last_target_cpu;
 	unsigned int			active_bases;
 	unsigned int			clock_was_set_seq;
 	unsigned int			hres_active		: 1,
diff --git a/include/linux/sched/nohz.h b/include/linux/sched/nohz.h
index 1abe91f..0afb094 100644
--- a/include/linux/sched/nohz.h
+++ b/include/linux/sched/nohz.h
@@ -8,7 +8,7 @@
 
 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
 extern void nohz_balance_enter_idle(int cpu);
-extern int get_nohz_timer_target(void);
+extern int get_nohz_timer_target(unsigned int cpu);
 #else
 static inline void nohz_balance_enter_idle(int cpu) { }
 #endif
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 7968e0f..f4ba63e 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -549,11 +549,15 @@ void resched_cpu(int cpu)
  * selecting an idle CPU will add more delays to the timers than intended
  * (as that CPU's timer base may not be uptodate wrt jiffies etc).
  */
-int get_nohz_timer_target(void)
+int get_nohz_timer_target(unsigned int last_target_cpu)
 {
 	int i, cpu = smp_processor_id(), default_cpu = -1;
 	struct sched_domain *sd;
 
+	if (!idle_cpu(last_target_cpu) &&
+		housekeeping_cpu(last_target_cpu, HK_FLAG_TIMER))
+		return last_target_cpu;
+
 	if (housekeeping_cpu(cpu, HK_FLAG_TIMER)) {
 		if (!idle_cpu(cpu))
 			return cpu;
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 41dfff2..0d49bef 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -195,8 +195,10 @@ struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base,
 					 int pinned)
 {
 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
-	if (static_branch_likely(&timers_migration_enabled) && !pinned)
-		return &per_cpu(hrtimer_bases, get_nohz_timer_target());
+	if (static_branch_likely(&timers_migration_enabled) && !pinned) {
+		base->last_target_cpu = get_nohz_timer_target(base->last_target_cpu);
+		return &per_cpu(hrtimer_bases, base->last_target_cpu);
+	}
 #endif
 	return base;
 }
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 343c7ba..6ae045a 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -199,6 +199,7 @@ struct timer_base {
 	unsigned long		clk;
 	unsigned long		next_expiry;
 	unsigned int		cpu;
+	unsigned int		last_target_cpu;
 	bool			is_idle;
 	bool			must_forward_clk;
 	DECLARE_BITMAP(pending_map, WHEEL_SIZE);
@@ -865,8 +866,10 @@ get_target_base(struct timer_base *base, unsigned tflags)
 {
 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
 	if (static_branch_likely(&timers_migration_enabled) &&
-	    !(tflags & TIMER_PINNED))
-		return get_timer_cpu_base(tflags, get_nohz_timer_target());
+	    !(tflags & TIMER_PINNED)) {
+		base->last_target_cpu = get_nohz_timer_target(base->last_target_cpu);
+		return get_timer_cpu_base(tflags, base->last_target_cpu);
+	}
 #endif
 	return get_timer_this_cpu_base(tflags);
 }
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] sched/core: Cache timer busy housekeeping target
  2019-07-01 12:24 [PATCH] sched/core: Cache timer busy housekeeping target Wanpeng Li
@ 2019-07-08  0:44 ` Wanpeng Li
  2019-07-09  1:39 ` Frederic Weisbecker
  1 sibling, 0 replies; 4+ messages in thread
From: Wanpeng Li @ 2019-07-08  0:44 UTC (permalink / raw)
  To: LKML
  Cc: Ingo Molnar, Peter Zijlstra, Ingo Molnar, Frederic Weisbecker,
	Thomas Gleixner

Ping Frederic, Peterz, any comments?
On Mon, 1 Jul 2019 at 20:24, Wanpeng Li <kernellwp@gmail.com> wrote:
>
> From: Wanpeng Li <wanpengli@tencent.com>
>
> Cache the busy housekeeping target for timer instead of researching each time.
> This patch reduces the total time of get_nohz_timer_target() for busy housekeeping
> CPU from 2u~3us to less than 1us which can be observed by ftrace.
>
> Cc: Ingo Molnar <mingo@redhat.com>
> Cc: Peter Zijlstra <peterz@infradead.org>
> Cc: Frederic Weisbecker <frederic@kernel.org>
> Cc: Thomas Gleixner <tglx@linutronix.de>
> Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
> ---
>  include/linux/hrtimer.h    | 1 +
>  include/linux/sched/nohz.h | 2 +-
>  kernel/sched/core.c        | 6 +++++-
>  kernel/time/hrtimer.c      | 6 ++++--
>  kernel/time/timer.c        | 7 +++++--
>  5 files changed, 16 insertions(+), 6 deletions(-)
>
> diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
> index 2e8957e..0d8b271 100644
> --- a/include/linux/hrtimer.h
> +++ b/include/linux/hrtimer.h
> @@ -198,6 +198,7 @@ enum  hrtimer_base_type {
>  struct hrtimer_cpu_base {
>         raw_spinlock_t                  lock;
>         unsigned int                    cpu;
> +       unsigned int                    last_target_cpu;
>         unsigned int                    active_bases;
>         unsigned int                    clock_was_set_seq;
>         unsigned int                    hres_active             : 1,
> diff --git a/include/linux/sched/nohz.h b/include/linux/sched/nohz.h
> index 1abe91f..0afb094 100644
> --- a/include/linux/sched/nohz.h
> +++ b/include/linux/sched/nohz.h
> @@ -8,7 +8,7 @@
>
>  #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
>  extern void nohz_balance_enter_idle(int cpu);
> -extern int get_nohz_timer_target(void);
> +extern int get_nohz_timer_target(unsigned int cpu);
>  #else
>  static inline void nohz_balance_enter_idle(int cpu) { }
>  #endif
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 7968e0f..f4ba63e 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -549,11 +549,15 @@ void resched_cpu(int cpu)
>   * selecting an idle CPU will add more delays to the timers than intended
>   * (as that CPU's timer base may not be uptodate wrt jiffies etc).
>   */
> -int get_nohz_timer_target(void)
> +int get_nohz_timer_target(unsigned int last_target_cpu)
>  {
>         int i, cpu = smp_processor_id(), default_cpu = -1;
>         struct sched_domain *sd;
>
> +       if (!idle_cpu(last_target_cpu) &&
> +               housekeeping_cpu(last_target_cpu, HK_FLAG_TIMER))
> +               return last_target_cpu;
> +
>         if (housekeeping_cpu(cpu, HK_FLAG_TIMER)) {
>                 if (!idle_cpu(cpu))
>                         return cpu;
> diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
> index 41dfff2..0d49bef 100644
> --- a/kernel/time/hrtimer.c
> +++ b/kernel/time/hrtimer.c
> @@ -195,8 +195,10 @@ struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base,
>                                          int pinned)
>  {
>  #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
> -       if (static_branch_likely(&timers_migration_enabled) && !pinned)
> -               return &per_cpu(hrtimer_bases, get_nohz_timer_target());
> +       if (static_branch_likely(&timers_migration_enabled) && !pinned) {
> +               base->last_target_cpu = get_nohz_timer_target(base->last_target_cpu);
> +               return &per_cpu(hrtimer_bases, base->last_target_cpu);
> +       }
>  #endif
>         return base;
>  }
> diff --git a/kernel/time/timer.c b/kernel/time/timer.c
> index 343c7ba..6ae045a 100644
> --- a/kernel/time/timer.c
> +++ b/kernel/time/timer.c
> @@ -199,6 +199,7 @@ struct timer_base {
>         unsigned long           clk;
>         unsigned long           next_expiry;
>         unsigned int            cpu;
> +       unsigned int            last_target_cpu;
>         bool                    is_idle;
>         bool                    must_forward_clk;
>         DECLARE_BITMAP(pending_map, WHEEL_SIZE);
> @@ -865,8 +866,10 @@ get_target_base(struct timer_base *base, unsigned tflags)
>  {
>  #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
>         if (static_branch_likely(&timers_migration_enabled) &&
> -           !(tflags & TIMER_PINNED))
> -               return get_timer_cpu_base(tflags, get_nohz_timer_target());
> +           !(tflags & TIMER_PINNED)) {
> +               base->last_target_cpu = get_nohz_timer_target(base->last_target_cpu);
> +               return get_timer_cpu_base(tflags, base->last_target_cpu);
> +       }
>  #endif
>         return get_timer_this_cpu_base(tflags);
>  }
> --
> 2.7.4
>

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] sched/core: Cache timer busy housekeeping target
  2019-07-01 12:24 [PATCH] sched/core: Cache timer busy housekeeping target Wanpeng Li
  2019-07-08  0:44 ` Wanpeng Li
@ 2019-07-09  1:39 ` Frederic Weisbecker
  2019-07-09  2:17   ` Wanpeng Li
  1 sibling, 1 reply; 4+ messages in thread
From: Frederic Weisbecker @ 2019-07-09  1:39 UTC (permalink / raw)
  To: Wanpeng Li
  Cc: linux-kernel, Ingo Molnar, Peter Zijlstra, Ingo Molnar, Thomas Gleixner

On Mon, Jul 01, 2019 at 08:24:37PM +0800, Wanpeng Li wrote:
> diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
> index 41dfff2..0d49bef 100644
> --- a/kernel/time/hrtimer.c
> +++ b/kernel/time/hrtimer.c
> @@ -195,8 +195,10 @@ struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base,
>  					 int pinned)
>  {
>  #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
> -	if (static_branch_likely(&timers_migration_enabled) && !pinned)
> -		return &per_cpu(hrtimer_bases, get_nohz_timer_target());
> +	if (static_branch_likely(&timers_migration_enabled) && !pinned) {
> +		base->last_target_cpu = get_nohz_timer_target(base->last_target_cpu);
> +		return &per_cpu(hrtimer_bases, base->last_target_cpu);


I'm not sure this is exactly what we intend to cache here.

This doesn't return the last CPU for a given timer
(that would be timer->flags & TIMER_CPUMASK) but the last CPU that
was returned when "base" was passed.

First of all, it's always initialized to CPU 0, which is perhaps
not exactly what we want.

Also the result can be very stale and awkward. If for some reason we have:

        base(CPU 5)->last_target_cpu = 255

then later a timer is enqueued to CPU 5, the next time we re-enqueue that
timer will be to CPU 255, then the second re-enqueue will be to whatever
value we have in base(CPU 255)->last_target_cpu, etc...

For example imagine that:

	base(CPU 255)->last_target_cpu = 5

the timer will bounce between those two very distant CPU 5 and 255. So I think
you rather want "timer->flags & TIMER_CPUMASK". But note that those flags
can also be initialized to zero and therefore CPU 0, while we actually want
the CPU of the timer enqueuer for a first use. And I can't think of a
simple solution to solve that :-(  Perhaps keeping the enqueuer CPU as the
first choice (as we do upstream) is still the best thing we have.

Thanks.

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] sched/core: Cache timer busy housekeeping target
  2019-07-09  1:39 ` Frederic Weisbecker
@ 2019-07-09  2:17   ` Wanpeng Li
  0 siblings, 0 replies; 4+ messages in thread
From: Wanpeng Li @ 2019-07-09  2:17 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: LKML, Ingo Molnar, Peter Zijlstra, Ingo Molnar, Thomas Gleixner

On Tue, 9 Jul 2019 at 09:39, Frederic Weisbecker <frederic@kernel.org> wrote:
>
> On Mon, Jul 01, 2019 at 08:24:37PM +0800, Wanpeng Li wrote:
> > diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
> > index 41dfff2..0d49bef 100644
> > --- a/kernel/time/hrtimer.c
> > +++ b/kernel/time/hrtimer.c
> > @@ -195,8 +195,10 @@ struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base,
> >                                        int pinned)
> >  {
> >  #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
> > -     if (static_branch_likely(&timers_migration_enabled) && !pinned)
> > -             return &per_cpu(hrtimer_bases, get_nohz_timer_target());
> > +     if (static_branch_likely(&timers_migration_enabled) && !pinned) {
> > +             base->last_target_cpu = get_nohz_timer_target(base->last_target_cpu);
> > +             return &per_cpu(hrtimer_bases, base->last_target_cpu);
>
>
> I'm not sure this is exactly what we intend to cache here.
>
> This doesn't return the last CPU for a given timer
> (that would be timer->flags & TIMER_CPUMASK) but the last CPU that
> was returned when "base" was passed.
>
> First of all, it's always initialized to CPU 0, which is perhaps
> not exactly what we want.
>
> Also the result can be very stale and awkward. If for some reason we have:
>
>         base(CPU 5)->last_target_cpu = 255
>
> then later a timer is enqueued to CPU 5, the next time we re-enqueue that
> timer will be to CPU 255, then the second re-enqueue will be to whatever
> value we have in base(CPU 255)->last_target_cpu, etc...
>
> For example imagine that:
>
>         base(CPU 255)->last_target_cpu = 5
>
> the timer will bounce between those two very distant CPU 5 and 255. So I think
> you rather want "timer->flags & TIMER_CPUMASK". But note that those flags
> can also be initialized to zero and therefore CPU 0, while we actually want
> the CPU of the timer enqueuer for a first use. And I can't think of a
> simple solution to solve that :-(  Perhaps keeping the enqueuer CPU as the
> first choice (as we do upstream) is still the best thing we have.

Got it, thanks for pointing out this.

Wanpeng

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2019-07-09  2:17 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-07-01 12:24 [PATCH] sched/core: Cache timer busy housekeeping target Wanpeng Li
2019-07-08  0:44 ` Wanpeng Li
2019-07-09  1:39 ` Frederic Weisbecker
2019-07-09  2:17   ` Wanpeng Li

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).