All of lore.kernel.org
 help / color / mirror / Atom feed
From: Anna-Maria Behnsen <anna-maria@linutronix.de>
To: linux-kernel@vger.kernel.org
Cc: Peter Zijlstra <peterz@infradead.org>,
	John Stultz <john.stultz@linaro.org>,
	Eric Dumazet <edumazet@google.com>,
	Thomas Gleixner <tglx@linutronix.de>,
	"Rafael J. Wysocki" <rafael.j.wysocki@intel.com>,
	linux-pm@vger.kernel.org, Arjan van de Ven <arjan@infradead.org>,
	"Paul E. McKenney" <paulmck@linux.vnet.ibm.com>,
	Frederic Weisbecker <fweisbec@gmail.com>,
	Rik van Riel <riel@redhat.com>,
	Anna-Maria Behnsen <anna-maria@linutronix.de>,
	Richard Cochran <richardcochran@gmail.com>
Subject: [PATCH v3 07/17] timer: Retrieve next expiry of pinned/non-pinned timers seperately
Date: Tue, 25 Oct 2022 15:58:40 +0200	[thread overview]
Message-ID: <20221025135850.51044-8-anna-maria@linutronix.de> (raw)
In-Reply-To: <20221025135850.51044-1-anna-maria@linutronix.de>

For the conversion of the NOHZ timer placement to a pull at expiry time
model it's required to have seperate expiry times for the pinned and the
non-pinned (movable) timers. Therefore struct timer_events is introduced.

No functional change

Originally-by: Richard Cochran (linutronix GmbH) <richardcochran@gmail.com>
Signed-off-by: Anna-Maria Behnsen <anna-maria@linutronix.de>
---
 kernel/time/tick-internal.h |  8 ++++-
 kernel/time/tick-sched.c    | 20 ++++++++----
 kernel/time/timer.c         | 65 ++++++++++++++++++++++++++++---------
 3 files changed, 70 insertions(+), 23 deletions(-)

diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index 649f2b48e8f0..fcb2d45c2934 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -8,6 +8,11 @@
 #include "timekeeping.h"
 #include "tick-sched.h"
 
+struct timer_events {
+	u64	local;
+	u64	global;
+};
+
 #ifdef CONFIG_GENERIC_CLOCKEVENTS
 
 # define TICK_DO_TIMER_NONE	-1
@@ -163,7 +168,8 @@ static inline void timers_update_nohz(void) { }
 
 DECLARE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases);
 
-extern u64 get_next_timer_interrupt(unsigned long basej, u64 basem);
+extern void get_next_timer_interrupt(unsigned long basej, u64 basem,
+				     struct timer_events *tevt);
 void timer_clear_idle(void);
 
 #define CLOCK_SET_WALL							\
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 7ffdc7ba19b4..78f172d1f3d2 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -784,7 +784,8 @@ static inline bool local_timer_softirq_pending(void)
 
 static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
 {
-	u64 basemono, next_tick, delta, expires;
+	struct timer_events tevt = { .local = KTIME_MAX, .global = KTIME_MAX };
+	u64 basemono, delta, expires;
 	unsigned long basejiff;
 	unsigned int seq;
 
@@ -809,7 +810,11 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
 	 */
 	if (rcu_needs_cpu() || arch_needs_cpu() ||
 	    irq_work_needs_cpu() || local_timer_softirq_pending()) {
-		next_tick = basemono + TICK_NSEC;
+		/*
+		 * If anyone needs the CPU, treat this as a local
+		 * timer expiring in a jiffy.
+		 */
+		tevt.local = basemono + TICK_NSEC;
 	} else {
 		/*
 		 * Get the next pending timer. If high resolution
@@ -818,17 +823,18 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
 		 * disabled this also looks at the next expiring
 		 * hrtimer.
 		 */
-		next_tick = get_next_timer_interrupt(basejiff, basemono);
-		ts->next_timer = next_tick;
+		get_next_timer_interrupt(basejiff, basemono, &tevt);
+		tevt.local = min_t(u64, tevt.local, tevt.global);
+		ts->next_timer = tevt.local;
 	}
 
 	/*
 	 * If the tick is due in the next period, keep it ticking or
 	 * force prod the timer.
 	 */
-	WARN_ON_ONCE(basemono > next_tick);
+	WARN_ON_ONCE(basemono > tevt.local);
 
-	delta = next_tick - basemono;
+	delta = tevt.local - basemono;
 	if (delta <= (u64)TICK_NSEC) {
 		/*
 		 * Tell the timer code that the base is not idle, i.e. undo
@@ -861,7 +867,7 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
 	else
 		expires = KTIME_MAX;
 
-	ts->timer_expires = min_t(u64, expires, next_tick);
+	ts->timer_expires = min_t(u64, expires, tevt.local);
 
 out:
 	return ts->timer_expires;
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index b3eea90cb212..069478e65230 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -1664,7 +1664,7 @@ static void __next_timer_interrupt(struct timer_base *base)
  * Check, if the next hrtimer event is before the next timer wheel
  * event:
  */
-static u64 cmp_next_hrtimer_event(u64 basem, u64 expires)
+static void cmp_next_hrtimer_event(u64 basem, struct timer_events *tevt)
 {
 	u64 nextevt = hrtimer_get_next_event();
 
@@ -1672,15 +1672,17 @@ static u64 cmp_next_hrtimer_event(u64 basem, u64 expires)
 	 * If high resolution timers are enabled
 	 * hrtimer_get_next_event() returns KTIME_MAX.
 	 */
-	if (expires <= nextevt)
-		return expires;
+	if (tevt->local <= nextevt)
+		return;
 
 	/*
 	 * If the next timer is already expired, return the tick base
 	 * time so the tick is fired immediately.
 	 */
-	if (nextevt <= basem)
-		return basem;
+	if (nextevt <= basem) {
+		tevt->local = basem;
+		return;
+	}
 
 	/*
 	 * Round up to the next jiffie. High resolution timers are
@@ -1690,7 +1692,7 @@ static u64 cmp_next_hrtimer_event(u64 basem, u64 expires)
 	 *
 	 * Use DIV_ROUND_UP_ULL to prevent gcc calling __divdi3
 	 */
-	return DIV_ROUND_UP_ULL(nextevt, TICK_NSEC) * TICK_NSEC;
+	tevt->local = DIV_ROUND_UP_ULL(nextevt, TICK_NSEC) * TICK_NSEC;
 }
 
 
@@ -1703,26 +1705,31 @@ static unsigned long next_timer_interrupt(struct timer_base *base)
 }
 
 /**
- * get_next_timer_interrupt - return the time (clock mono) of the next timer
+ * get_next_timer_interrupt
  * @basej:	base time jiffies
  * @basem:	base time clock monotonic
+ * @tevt:	Pointer to the storage for the expiry values
  *
- * Returns the tick aligned clock monotonic time of the next pending
- * timer or KTIME_MAX if no timer is pending.
+ * Stores the next pending local and global timer expiry values in the
+ * struct pointed to by @tevt. If a queue is empty the corresponding field
+ * is set to KTIME_MAX.
  */
-u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
+void get_next_timer_interrupt(unsigned long basej, u64 basem,
+			      struct timer_events *tevt)
 {
 	unsigned long nextevt, nextevt_local, nextevt_global;
 	struct timer_base *base_local, *base_global;
 	bool local_first, is_idle;
-	u64 expires = KTIME_MAX;
+
+	/* Preset local / global events */
+	tevt->local = tevt->global = KTIME_MAX;
 
 	/*
 	 * Pretend that there is no timer pending if the cpu is offline.
 	 * Possible pending timers will be migrated later to an active cpu.
 	 */
 	if (cpu_is_offline(smp_processor_id()))
-		return expires;
+		return;
 
 	base_local = this_cpu_ptr(&timer_bases[BASE_LOCAL]);
 	base_global = this_cpu_ptr(&timer_bases[BASE_GLOBAL]);
@@ -1779,16 +1786,44 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
 	/* We need to mark both bases in sync */
 	base_local->is_idle = base_global->is_idle = is_idle;
 
-	if (base_local->timers_pending || base_global->timers_pending) {
+	/*
+	 * If the bases are not marked idle, i.e one of the events is at
+	 * max. one tick away, then the CPU can't go into a NOHZ idle
+	 * sleep. Use the earlier event of both and store it in the local
+	 * expiry value. The next global event is irrelevant in this case
+	 * and can be left as KTIME_MAX. CPU will wakeup on time.
+	 */
+	if (!is_idle) {
 		/* If we missed a tick already, force 0 delta */
 		if (time_before_eq(nextevt, basej))
 			nextevt = basej;
-		expires = basem + (u64)(nextevt - basej) * TICK_NSEC;
+		tevt->local = basem + (u64)(nextevt - basej) * TICK_NSEC;
+		goto unlock;
 	}
+
+	/*
+	 * If the bases are marked idle, i.e. the next event on both the
+	 * local and the global queue are farther away than a tick,
+	 * evaluate both bases. No need to check whether one of the bases
+	 * has an already expired timer as this is caught by the !is_idle
+	 * condition above.
+	 */
+	if (base_local->timers_pending)
+		tevt->local = basem + (u64)(nextevt_local - basej) * TICK_NSEC;
+
+	/*
+	 * If the local queue expires first, then the global event can be
+	 * ignored. The CPU wakes up before that. If the global queue is
+	 * empty, nothing to do either.
+	 */
+	if (!local_first && base_global->timers_pending)
+		tevt->global = basem + (u64)(nextevt_global - basej) * TICK_NSEC;
+
+unlock:
 	raw_spin_unlock(&base_global->lock);
 	raw_spin_unlock(&base_local->lock);
 
-	return cmp_next_hrtimer_event(basem, expires);
+	cmp_next_hrtimer_event(basem, tevt);
 }
 
 /**
-- 
2.30.2


  parent reply	other threads:[~2022-10-25 14:00 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-10-25 13:58 [PATCH v3 00/17] timer: Move from a push remote at enqueue to a pull at expiry model Anna-Maria Behnsen
2022-10-25 13:58 ` [PATCH v3 01/17] cpufreq: Prepare timer flags for hierarchical timer pull model Anna-Maria Behnsen
2022-10-26 13:54   ` Frederic Weisbecker
2022-10-31 15:22     ` Anna-Maria Behnsen
2022-10-25 13:58 ` [PATCH v3 02/17] tick-sched: Warn when next tick seems to be in the past Anna-Maria Behnsen
2022-10-25 22:11   ` Frederic Weisbecker
2022-10-25 13:58 ` [PATCH v3 03/17] timer: Move store of next event into __next_timer_interrupt() Anna-Maria Behnsen
2022-10-26  9:33   ` Frederic Weisbecker
2022-10-25 13:58 ` [PATCH v3 04/17] timer: Split next timer interrupt logic Anna-Maria Behnsen
2022-10-26  9:40   ` Frederic Weisbecker
2022-10-25 13:58 ` [PATCH v3 05/17] timer: Rework idle logic Anna-Maria Behnsen
2022-10-26 13:27   ` Frederic Weisbecker
2022-10-25 13:58 ` [PATCH v3 06/17] timer: Keep the pinned timers separate from the others Anna-Maria Behnsen
2022-10-26 15:26   ` Frederic Weisbecker
2022-10-25 13:58 ` Anna-Maria Behnsen [this message]
2022-10-25 13:58 ` [PATCH v3 08/17] timer: Rename get_next_timer_interrupt() Anna-Maria Behnsen
2022-10-25 13:58 ` [PATCH v3 09/17] timer: Split out "get next timer interrupt" functionality Anna-Maria Behnsen
2022-10-25 13:58 ` [PATCH v3 10/17] timer: Add get next timer interrupt functionality for remote CPUs Anna-Maria Behnsen
2022-10-25 13:58 ` [PATCH v3 11/17] timer: Restructure internal locking Anna-Maria Behnsen
2022-10-25 13:58 ` [PATCH v3 12/17] timer: Check if timers base is handled already Anna-Maria Behnsen
2022-10-25 13:58 ` [PATCH v3 13/17] tick/sched: Split out jiffies update helper function Anna-Maria Behnsen
2022-10-25 13:58 ` [PATCH v3 14/17] timer: Implement the hierarchical pull model Anna-Maria Behnsen
2022-10-25 16:36   ` kernel test robot
2022-10-26  5:35   ` kernel test robot
2022-10-27  7:38   ` kernel test robot
2022-10-25 13:58 ` [PATCH v3 15/17] timer_migration: Add tracepoints Anna-Maria Behnsen
2022-10-25 13:58 ` [PATCH v3 16/17] add_timer_on(): Make sure callers have TIMER_PINNED flag Anna-Maria Behnsen
2022-10-25 13:58 ` [PATCH v3 17/17] timer: Always queue timers on the local CPU Anna-Maria Behnsen
2022-10-27  7:22   ` kernel test robot

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20221025135850.51044-8-anna-maria@linutronix.de \
    --to=anna-maria@linutronix.de \
    --cc=arjan@infradead.org \
    --cc=edumazet@google.com \
    --cc=fweisbec@gmail.com \
    --cc=john.stultz@linaro.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-pm@vger.kernel.org \
    --cc=paulmck@linux.vnet.ibm.com \
    --cc=peterz@infradead.org \
    --cc=rafael.j.wysocki@intel.com \
    --cc=richardcochran@gmail.com \
    --cc=riel@redhat.com \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.