linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: tip-bot for Thomas Gleixner <tipbot@zytor.com>
To: linux-tip-commits@vger.kernel.org
Cc: josh@joshtriplett.org, arjan@infradead.org,
	paulmck@linux.vnet.ibm.com, tglx@linutronix.de, riel@redhat.com,
	linux@sciencehorizons.net, torvalds@linux-foundation.org,
	lenb@kernel.org, hpa@zytor.com, fweisbec@gmail.com,
	mingo@kernel.org, clm@fb.com, linux-kernel@vger.kernel.org,
	peterz@infradead.org, edumazet@google.com
Subject: [tip:timers/core] timers: Forward the wheel clock whenever possible
Date: Thu, 7 Jul 2016 01:48:01 -0700	[thread overview]
Message-ID: <tip-a683f390b93f4d1292f849fc48d28e322046120f@git.kernel.org> (raw)
In-Reply-To: <20160704094342.512039360@linutronix.de>

Commit-ID:  a683f390b93f4d1292f849fc48d28e322046120f
Gitweb:     http://git.kernel.org/tip/a683f390b93f4d1292f849fc48d28e322046120f
Author:     Thomas Gleixner <tglx@linutronix.de>
AuthorDate: Mon, 4 Jul 2016 09:50:36 +0000
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Thu, 7 Jul 2016 10:35:11 +0200

timers: Forward the wheel clock whenever possible

The wheel clock is stale when a CPU goes into a long idle sleep. This has the
side effect that timers which are queued end up in the outer wheel levels.
That results in coarser granularity.

To solve this, we keep track of the idle state and forward the wheel clock
whenever possible.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Arjan van de Ven <arjan@infradead.org>
Cc: Chris Mason <clm@fb.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: George Spelvin <linux@sciencehorizons.net>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Len Brown <lenb@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: rt@linutronix.de
Link: http://lkml.kernel.org/r/20160704094342.512039360@linutronix.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/tick-internal.h |   1 +
 kernel/time/tick-sched.c    |  12 +++++
 kernel/time/timer.c         | 128 ++++++++++++++++++++++++++++++++++++--------
 3 files changed, 120 insertions(+), 21 deletions(-)

diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index 966a5a6..f738251 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -164,3 +164,4 @@ static inline void timers_update_migration(bool update_nohz) { }
 DECLARE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases);
 
 extern u64 get_next_timer_interrupt(unsigned long basej, u64 basem);
+void timer_clear_idle(void);
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 69abc7b..5d81f9a 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -700,6 +700,12 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
 	delta = next_tick - basemono;
 	if (delta <= (u64)TICK_NSEC) {
 		tick.tv64 = 0;
+
+		/*
+		 * Tell the timer code that the base is not idle, i.e. undo
+		 * the effect of get_next_timer_interrupt():
+		 */
+		timer_clear_idle();
 		/*
 		 * We've not stopped the tick yet, and there's a timer in the
 		 * next period, so no point in stopping it either, bail.
@@ -809,6 +815,12 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
 	tick_do_update_jiffies64(now);
 	cpu_load_update_nohz_stop();
 
+	/*
+	 * Clear the timer idle flag, so we avoid IPIs on remote queueing and
+	 * the clock forward checks in the enqueue path:
+	 */
+	timer_clear_idle();
+
 	calc_load_exit_idle();
 	touch_softlockup_watchdog_sched();
 	/*
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 658051c..9339d71 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -196,9 +196,11 @@ struct timer_base {
 	spinlock_t		lock;
 	struct timer_list	*running_timer;
 	unsigned long		clk;
+	unsigned long		next_expiry;
 	unsigned int		cpu;
 	bool			migration_enabled;
 	bool			nohz_active;
+	bool			is_idle;
 	DECLARE_BITMAP(pending_map, WHEEL_SIZE);
 	struct hlist_head	vectors[WHEEL_SIZE];
 } ____cacheline_aligned;
@@ -519,24 +521,37 @@ static void internal_add_timer(struct timer_base *base, struct timer_list *timer
 {
 	__internal_add_timer(base, timer);
 
+	if (!IS_ENABLED(CONFIG_NO_HZ_COMMON) || !base->nohz_active)
+		return;
+
 	/*
-	 * Check whether the other CPU is in dynticks mode and needs
-	 * to be triggered to reevaluate the timer wheel.  We are
-	 * protected against the other CPU fiddling with the timer by
-	 * holding the timer base lock. This also makes sure that a
-	 * CPU on the way to stop its tick can not evaluate the timer
-	 * wheel.
-	 *
-	 * Spare the IPI for deferrable timers on idle targets though.
-	 * The next busy ticks will take care of it. Except full dynticks
-	 * require special care against races with idle_cpu(), lets deal
-	 * with that later.
+	 * TODO: This wants some optimizing similar to the code below, but we
+	 * will do that when we switch from push to pull for deferrable timers.
 	 */
-	if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active) {
-		if (!(timer->flags & TIMER_DEFERRABLE) ||
-		    tick_nohz_full_cpu(base->cpu))
+	if (timer->flags & TIMER_DEFERRABLE) {
+		if (tick_nohz_full_cpu(base->cpu))
 			wake_up_nohz_cpu(base->cpu);
+		return;
 	}
+
+	/*
+	 * We might have to IPI the remote CPU if the base is idle and the
+	 * timer is not deferrable. If the other CPU is on the way to idle
+	 * then it can't set base->is_idle as we hold the base lock:
+	 */
+	if (!base->is_idle)
+		return;
+
+	/* Check whether this is the new first expiring timer: */
+	if (time_after_eq(timer->expires, base->next_expiry))
+		return;
+
+	/*
+	 * Set the next expiry time and kick the CPU so it can reevaluate the
+	 * wheel:
+	 */
+	base->next_expiry = timer->expires;
+	wake_up_nohz_cpu(base->cpu);
 }
 
 #ifdef CONFIG_TIMER_STATS
@@ -844,10 +859,11 @@ static inline struct timer_base *get_timer_base(u32 tflags)
 	return get_timer_cpu_base(tflags, tflags & TIMER_CPUMASK);
 }
 
-static inline struct timer_base *get_target_base(struct timer_base *base,
-						 unsigned tflags)
+#ifdef CONFIG_NO_HZ_COMMON
+static inline struct timer_base *
+__get_target_base(struct timer_base *base, unsigned tflags)
 {
-#if defined(CONFIG_NO_HZ_COMMON) && defined(CONFIG_SMP)
+#ifdef CONFIG_SMP
 	if ((tflags & TIMER_PINNED) || !base->migration_enabled)
 		return get_timer_this_cpu_base(tflags);
 	return get_timer_cpu_base(tflags, get_nohz_timer_target());
@@ -856,6 +872,43 @@ static inline struct timer_base *get_target_base(struct timer_base *base,
 #endif
 }
 
+static inline void forward_timer_base(struct timer_base *base)
+{
+	/*
+	 * We only forward the base when it's idle and we have a delta between
+	 * base clock and jiffies.
+	 */
+	if (!base->is_idle || (long) (jiffies - base->clk) < 2)
+		return;
+
+	/*
+	 * If the next expiry value is > jiffies, then we fast forward to
+	 * jiffies otherwise we forward to the next expiry value.
+	 */
+	if (time_after(base->next_expiry, jiffies))
+		base->clk = jiffies;
+	else
+		base->clk = base->next_expiry;
+}
+#else
+static inline struct timer_base *
+__get_target_base(struct timer_base *base, unsigned tflags)
+{
+	return get_timer_this_cpu_base(tflags);
+}
+
+static inline void forward_timer_base(struct timer_base *base) { }
+#endif
+
+static inline struct timer_base *
+get_target_base(struct timer_base *base, unsigned tflags)
+{
+	struct timer_base *target = __get_target_base(base, tflags);
+
+	forward_timer_base(target);
+	return target;
+}
+
 /*
  * We are using hashed locking: Holding per_cpu(timer_bases[x]).lock means
  * that all timers which are tied to this base are locked, and the base itself
@@ -1417,16 +1470,49 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
 
 	spin_lock(&base->lock);
 	nextevt = __next_timer_interrupt(base);
-	spin_unlock(&base->lock);
+	base->next_expiry = nextevt;
+	/*
+	 * We have a fresh next event. Check whether we can forward the base:
+	 */
+	if (time_after(nextevt, jiffies))
+		base->clk = jiffies;
+	else if (time_after(nextevt, base->clk))
+		base->clk = nextevt;
 
-	if (time_before_eq(nextevt, basej))
+	if (time_before_eq(nextevt, basej)) {
 		expires = basem;
-	else
+		base->is_idle = false;
+	} else {
 		expires = basem + (nextevt - basej) * TICK_NSEC;
+		/*
+		 * If we expect to sleep more than a tick, mark the base idle:
+		 */
+		if ((expires - basem) > TICK_NSEC)
+			base->is_idle = true;
+	}
+	spin_unlock(&base->lock);
 
 	return cmp_next_hrtimer_event(basem, expires);
 }
 
+/**
+ * timer_clear_idle - Clear the idle state of the timer base
+ *
+ * Called with interrupts disabled
+ */
+void timer_clear_idle(void)
+{
+	struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
+
+	/*
+	 * We do this unlocked. The worst outcome is a remote enqueue sending
+	 * a pointless IPI, but taking the lock would just make the window for
+	 * sending the IPI a few instructions smaller for the cost of taking
+	 * the lock in the exit from idle path.
+	 */
+	base->is_idle = false;
+}
+
 static int collect_expired_timers(struct timer_base *base,
 				  struct hlist_head *heads)
 {
@@ -1440,7 +1526,7 @@ static int collect_expired_timers(struct timer_base *base,
 
 		/*
 		 * If the next timer is ahead of time forward to current
-		 * jiffies, otherwise forward to the next expiry time.
+		 * jiffies, otherwise forward to the next expiry time:
 		 */
 		if (time_after(next, jiffies)) {
 			/* The call site will increment clock! */

  reply	other threads:[~2016-07-07  9:30 UTC|newest]

Thread overview: 60+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-07-04  9:50 [patch 4 00/22] timer: Refactor the timer wheel Thomas Gleixner
2016-07-04  9:50 ` [patch 4 01/22] timer: Make pinned a timer property Thomas Gleixner
2016-07-07  8:39   ` [tip:timers/core] timers: Make 'pinned' " tip-bot for Thomas Gleixner
2016-07-04  9:50 ` [patch 4 02/22] x86/apic/uv: Initialize timer as pinned Thomas Gleixner
2016-07-07  8:40   ` [tip:timers/core] timers, x86/apic/uv: Initialize the UV heartbeat " tip-bot for Thomas Gleixner
2016-07-04  9:50 ` [patch 4 03/22] x86/mce: Initialize " Thomas Gleixner
2016-07-07  8:40   ` [tip:timers/core] timers, x86/mce: Initialize MCE restart " tip-bot for Thomas Gleixner
2016-07-04  9:50 ` [patch 4 04/22] cpufreq/powernv: Initialize " Thomas Gleixner
2016-07-07  8:41   ` [tip:timers/core] timers, cpufreq/powernv: Initialize the gpstate " tip-bot for Thomas Gleixner
2016-07-04  9:50 ` [patch 4 05/22] driver/net/ethernet/tile: Initialize " Thomas Gleixner
2016-07-07  8:41   ` [tip:timers/core] timers, driver/net/ethernet/tile: Initialize the egress " tip-bot for Thomas Gleixner
2016-07-04  9:50 ` [patch 4 06/22] drivers/tty/metag_da: Initialize " Thomas Gleixner
2016-07-07  8:42   ` [tip:timers/core] timers, drivers/tty/metag_da: Initialize the poll " tip-bot for Thomas Gleixner
2016-07-04  9:50 ` [patch 4 07/22] drivers/tty/mips_ejtag: Initialize " Thomas Gleixner
2016-07-07  8:42   ` [tip:timers/core] timers, drivers/tty/mips_ejtag: Initialize the poll " tip-bot for Thomas Gleixner
2016-07-04  9:50 ` [patch 4 08/22] net/ipv4/inet: Initialize timers " Thomas Gleixner
2016-07-07  8:43   ` [tip:timers/core] timers, net/ipv4/inet: Initialize connection request " tip-bot for Thomas Gleixner
2016-07-04  9:50 ` [patch 4 09/22] timer: Remove mod_timer_pinned Thomas Gleixner
2016-07-07  8:43   ` [tip:timers/core] timers: Remove the deprecated mod_timer_pinned() API tip-bot for Thomas Gleixner
2016-07-04  9:50 ` [patch 4 10/22] signal: Use hrtimer for sigtimedwait Thomas Gleixner
2016-07-07  8:43   ` [tip:timers/core] signals: Use hrtimer for sigtimedwait() tip-bot for Thomas Gleixner
2016-07-04  9:50 ` [patch 4 11/22] hlist: Add hlist_is_singular_node() helper Thomas Gleixner
2016-07-07  8:44   ` [tip:timers/core] " tip-bot for Thomas Gleixner
2016-07-04  9:50 ` [patch 4 12/22] timer: Give a few structs and members proper names Thomas Gleixner
2016-07-07  8:44   ` [tip:timers/core] timers: " tip-bot for Thomas Gleixner
2016-07-04  9:50 ` [patch 4 13/22] timer: Reduce the CPU index space to 256k Thomas Gleixner
2016-07-07  8:45   ` [tip:timers/core] timers: " tip-bot for Thomas Gleixner
2016-07-04  9:50 ` [patch 4 14/22] timer: Switch to a non cascading wheel Thomas Gleixner
2016-07-07  8:45   ` [tip:timers/core] timers: Switch to a non-cascading wheel tip-bot for Thomas Gleixner
2016-08-11 15:21   ` [patch 4 14/22] timer: Switch to a non cascading wheel Jouni Malinen
2016-08-11 20:25     ` [PREEMPT-RT] " rcochran
2016-08-13  9:12       ` Jouni Malinen
2016-08-16  9:46         ` Richard Cochran
2016-08-16 14:35           ` Eric Dumazet
2016-08-17  9:05           ` Jouni Malinen
2016-08-17  9:23             ` rcochran
2016-08-12 17:50     ` Rik van Riel
2016-08-12 19:14       ` Paul E. McKenney
2016-08-16  8:55         ` Richard Cochran
2016-08-16  7:57       ` Richard Cochran
2016-07-04  9:50 ` [patch 4 15/22] timer: Remove slack leftovers Thomas Gleixner
2016-07-07  8:46   ` [tip:timers/core] timers: Remove set_timer_slack() leftovers tip-bot for Thomas Gleixner
2016-07-22 11:31   ` [patch 4 15/22] timer: Remove slack leftovers Jason A. Donenfeld
2016-07-22 13:04     ` Thomas Gleixner
2016-07-22 15:18       ` Jason A. Donenfeld
2016-07-22 22:54         ` Jason A. Donenfeld
2016-07-04  9:50 ` [patch 4 16/22] timer: Move __run_timers() function Thomas Gleixner
2016-07-07  8:46   ` [tip:timers/core] timers: " tip-bot for Anna-Maria Gleixner
2016-07-04  9:50 ` [patch 4 17/22] timer: Optimize collect timers for NOHZ Thomas Gleixner
2016-07-07  8:47   ` [tip:timers/core] timers: Optimize collect_expired_timers() " tip-bot for Anna-Maria Gleixner
2016-07-04  9:50 ` [patch 4 18/22] tick/sched: Remove pointless empty function Thomas Gleixner
2016-07-07  8:47   ` [tip:timers/core] timers/nohz: Remove pointless tick_nohz_kick_tick() function tip-bot for Thomas Gleixner
2016-07-04  9:50 ` [patch 4 19/22] timer: Forward wheel clock whenever possible Thomas Gleixner
2016-07-07  8:48   ` tip-bot for Thomas Gleixner [this message]
2016-07-04  9:50 ` [patch 4 20/22] timer: Only wake softirq if necessary Thomas Gleixner
2016-07-07  8:48   ` [tip:timers/core] timers: " tip-bot for Thomas Gleixner
2016-07-04  9:50 ` [patch 4 21/22] timer: Split out index calculation Thomas Gleixner
2016-07-07  8:48   ` [tip:timers/core] timers: " tip-bot for Anna-Maria Gleixner
2016-07-04  9:50 ` [patch 4 22/22] timer: Optimization for same expiry time in mod_timer() Thomas Gleixner
2016-07-07  8:49   ` [tip:timers/core] timers: Implement optimization " tip-bot for Anna-Maria Gleixner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=tip-a683f390b93f4d1292f849fc48d28e322046120f@git.kernel.org \
    --to=tipbot@zytor.com \
    --cc=arjan@infradead.org \
    --cc=clm@fb.com \
    --cc=edumazet@google.com \
    --cc=fweisbec@gmail.com \
    --cc=hpa@zytor.com \
    --cc=josh@joshtriplett.org \
    --cc=lenb@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-tip-commits@vger.kernel.org \
    --cc=linux@sciencehorizons.net \
    --cc=mingo@kernel.org \
    --cc=paulmck@linux.vnet.ibm.com \
    --cc=peterz@infradead.org \
    --cc=riel@redhat.com \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).