From: Anna-Maria Behnsen <anna-maria@linutronix.de>
To: linux-kernel@vger.kernel.org
Cc: Peter Zijlstra <peterz@infradead.org>,
John Stultz <jstultz@google.com>,
Thomas Gleixner <tglx@linutronix.de>,
Eric Dumazet <edumazet@google.com>,
"Rafael J . Wysocki" <rafael.j.wysocki@intel.com>,
Arjan van de Ven <arjan@infradead.org>,
"Paul E . McKenney" <paulmck@kernel.org>,
Frederic Weisbecker <fweisbec@gmail.com>,
Rik van Riel <riel@surriel.com>,
Anna-Maria Behnsen <anna-maria@linutronix.de>,
Richard Cochran <richardcochran@gmail.com>
Subject: [PATCH v4 07/16] timer: Retrieve next expiry of pinned/non-pinned timers seperately
Date: Fri, 4 Nov 2022 15:57:28 +0100 [thread overview]
Message-ID: <20221104145737.71236-8-anna-maria@linutronix.de> (raw)
In-Reply-To: <20221104145737.71236-1-anna-maria@linutronix.de>
For the conversion of the NOHZ timer placement to a pull at expiry time
model it's required to have seperate expiry times for the pinned and the
non-pinned (movable) timers. Therefore struct timer_events is introduced.
No functional change
Originally-by: Richard Cochran (linutronix GmbH) <richardcochran@gmail.com>
Signed-off-by: Anna-Maria Behnsen <anna-maria@linutronix.de>
---
kernel/time/tick-internal.h | 8 ++++-
kernel/time/tick-sched.c | 20 ++++++++----
kernel/time/timer.c | 65 ++++++++++++++++++++++++++++---------
3 files changed, 70 insertions(+), 23 deletions(-)
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index 649f2b48e8f0..fcb2d45c2934 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -8,6 +8,11 @@
#include "timekeeping.h"
#include "tick-sched.h"
+struct timer_events {
+ u64 local;
+ u64 global;
+};
+
#ifdef CONFIG_GENERIC_CLOCKEVENTS
# define TICK_DO_TIMER_NONE -1
@@ -163,7 +168,8 @@ static inline void timers_update_nohz(void) { }
DECLARE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases);
-extern u64 get_next_timer_interrupt(unsigned long basej, u64 basem);
+extern void get_next_timer_interrupt(unsigned long basej, u64 basem,
+ struct timer_events *tevt);
void timer_clear_idle(void);
#define CLOCK_SET_WALL \
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 7ffdc7ba19b4..78f172d1f3d2 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -784,7 +784,8 @@ static inline bool local_timer_softirq_pending(void)
static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
{
- u64 basemono, next_tick, delta, expires;
+ struct timer_events tevt = { .local = KTIME_MAX, .global = KTIME_MAX };
+ u64 basemono, delta, expires;
unsigned long basejiff;
unsigned int seq;
@@ -809,7 +810,11 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
*/
if (rcu_needs_cpu() || arch_needs_cpu() ||
irq_work_needs_cpu() || local_timer_softirq_pending()) {
- next_tick = basemono + TICK_NSEC;
+ /*
+ * If anyone needs the CPU, treat this as a local
+ * timer expiring in a jiffy.
+ */
+ tevt.local = basemono + TICK_NSEC;
} else {
/*
* Get the next pending timer. If high resolution
@@ -818,17 +823,18 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
* disabled this also looks at the next expiring
* hrtimer.
*/
- next_tick = get_next_timer_interrupt(basejiff, basemono);
- ts->next_timer = next_tick;
+ get_next_timer_interrupt(basejiff, basemono, &tevt);
+ tevt.local = min_t(u64, tevt.local, tevt.global);
+ ts->next_timer = tevt.local;
}
/*
* If the tick is due in the next period, keep it ticking or
* force prod the timer.
*/
- WARN_ON_ONCE(basemono > next_tick);
+ WARN_ON_ONCE(basemono > tevt.local);
- delta = next_tick - basemono;
+ delta = tevt.local - basemono;
if (delta <= (u64)TICK_NSEC) {
/*
* Tell the timer code that the base is not idle, i.e. undo
@@ -861,7 +867,7 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
else
expires = KTIME_MAX;
- ts->timer_expires = min_t(u64, expires, next_tick);
+ ts->timer_expires = min_t(u64, expires, tevt.local);
out:
return ts->timer_expires;
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 0e2abd906916..d272672935da 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -1664,7 +1664,7 @@ static void next_expiry_recalc(struct timer_base *base)
* Check, if the next hrtimer event is before the next timer wheel
* event:
*/
-static u64 cmp_next_hrtimer_event(u64 basem, u64 expires)
+static void cmp_next_hrtimer_event(u64 basem, struct timer_events *tevt)
{
u64 nextevt = hrtimer_get_next_event();
@@ -1672,15 +1672,17 @@ static u64 cmp_next_hrtimer_event(u64 basem, u64 expires)
* If high resolution timers are enabled
* hrtimer_get_next_event() returns KTIME_MAX.
*/
- if (expires <= nextevt)
- return expires;
+ if (tevt->local <= nextevt)
+ return;
/*
* If the next timer is already expired, return the tick base
* time so the tick is fired immediately.
*/
- if (nextevt <= basem)
- return basem;
+ if (nextevt <= basem) {
+ tevt->local = basem;
+ return;
+ }
/*
* Round up to the next jiffie. High resolution timers are
@@ -1690,7 +1692,7 @@ static u64 cmp_next_hrtimer_event(u64 basem, u64 expires)
*
* Use DIV_ROUND_UP_ULL to prevent gcc calling __divdi3
*/
- return DIV_ROUND_UP_ULL(nextevt, TICK_NSEC) * TICK_NSEC;
+ tevt->local = DIV_ROUND_UP_ULL(nextevt, TICK_NSEC) * TICK_NSEC;
}
@@ -1718,26 +1720,31 @@ static void forward_base_clk(struct timer_base *base, unsigned long nextevt,
}
/**
- * get_next_timer_interrupt - return the time (clock mono) of the next timer
+ * get_next_timer_interrupt
* @basej: base time jiffies
* @basem: base time clock monotonic
+ * @tevt: Pointer to the storage for the expiry values
*
- * Returns the tick aligned clock monotonic time of the next pending
- * timer or KTIME_MAX if no timer is pending.
+ * Stores the next pending local and global timer expiry values in the
+ * struct pointed to by @tevt. If a queue is empty the corresponding field
+ * is set to KTIME_MAX.
*/
-u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
+void get_next_timer_interrupt(unsigned long basej, u64 basem,
+ struct timer_events *tevt)
{
unsigned long nextevt, nextevt_local, nextevt_global;
struct timer_base *base_local, *base_global;
bool local_first, is_idle;
- u64 expires = KTIME_MAX;
+
+ /* Preset local / global events */
+ tevt->local = tevt->global = KTIME_MAX;
/*
* Pretend that there is no timer pending if the cpu is offline.
* Possible pending timers will be migrated later to an active cpu.
*/
if (cpu_is_offline(smp_processor_id()))
- return expires;
+ return;
base_local = this_cpu_ptr(&timer_bases[BASE_LOCAL]);
base_global = this_cpu_ptr(&timer_bases[BASE_GLOBAL]);
@@ -1782,16 +1789,44 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
/* We need to mark both bases in sync */
base_local->is_idle = base_global->is_idle = is_idle;
- if (base_local->timers_pending || base_global->timers_pending) {
+ /*
+ * If the bases are not marked idle, i.e one of the events is at
+ * max. one tick away, then the CPU can't go into a NOHZ idle
+ * sleep. Use the earlier event of both and store it in the local
+ * expiry value. The next global event is irrelevant in this case
+ * and can be left as KTIME_MAX. CPU will wakeup on time.
+ */
+ if (!is_idle) {
/* If we missed a tick already, force 0 delta */
if (time_before(nextevt, basej))
nextevt = basej;
- expires = basem + (u64)(nextevt - basej) * TICK_NSEC;
+ tevt->local = basem + (u64)(nextevt - basej) * TICK_NSEC;
+ goto unlock;
}
+
+ /*
+ * If the bases are marked idle, i.e. the next event on both the
+ * local and the global queue are farther away than a tick,
+ * evaluate both bases. No need to check whether one of the bases
+ * has an already expired timer as this is caught by the !is_idle
+ * condition above.
+ */
+ if (base_local->timers_pending)
+ tevt->local = basem + (u64)(nextevt_local - basej) * TICK_NSEC;
+
+ /*
+ * If the local queue expires first, then the global event can be
+ * ignored. The CPU wakes up before that. If the global queue is
+ * empty, nothing to do either.
+ */
+ if (!local_first && base_global->timers_pending)
+ tevt->global = basem + (u64)(nextevt_global - basej) * TICK_NSEC;
+
+unlock:
raw_spin_unlock(&base_global->lock);
raw_spin_unlock(&base_local->lock);
- return cmp_next_hrtimer_event(basem, expires);
+ cmp_next_hrtimer_event(basem, tevt);
}
/**
--
2.30.2
next prev parent reply other threads:[~2022-11-04 14:58 UTC|newest]
Thread overview: 44+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-11-04 14:57 [PATCH v4 00/16] timer: Move from a push remote at enqueue to a pull at expiry model Anna-Maria Behnsen
2022-11-04 14:57 ` [PATCH v4 01/16] tick-sched: Warn when next tick seems to be in the past Anna-Maria Behnsen
2022-11-04 14:57 ` [PATCH v4 02/16] timer: Move store of next event into __next_timer_interrupt() Anna-Maria Behnsen
2022-11-04 14:57 ` [PATCH v4 03/16] timer: Split next timer interrupt logic Anna-Maria Behnsen
2022-11-04 14:57 ` [PATCH v4 04/16] timer: Rework idle logic Anna-Maria Behnsen
2022-11-04 14:57 ` [PATCH v4 05/16] add_timer_on(): Make sure callers have TIMER_PINNED flag Anna-Maria Behnsen
2022-11-04 16:43 ` Frederic Weisbecker
2022-11-07 8:11 ` Anna-Maria Behnsen
2022-11-07 10:11 ` Frederic Weisbecker
2022-11-04 14:57 ` [PATCH v4 06/16] timer: Keep the pinned timers separate from the others Anna-Maria Behnsen
2022-11-04 14:57 ` Anna-Maria Behnsen [this message]
2022-11-07 11:58 ` [PATCH v4 07/16] timer: Retrieve next expiry of pinned/non-pinned timers seperately Frederic Weisbecker
2022-11-04 14:57 ` [PATCH v4 08/16] timer: Rename get_next_timer_interrupt() Anna-Maria Behnsen
2022-11-07 12:13 ` Frederic Weisbecker
2022-11-04 14:57 ` [PATCH v4 09/16] timer: Split out "get next timer interrupt" functionality Anna-Maria Behnsen
2022-11-07 12:42 ` Frederic Weisbecker
2022-11-08 15:30 ` Anna-Maria Behnsen
2022-11-04 14:57 ` [PATCH v4 10/16] timer: Add get next timer interrupt functionality for remote CPUs Anna-Maria Behnsen
2022-11-04 14:57 ` [PATCH v4 11/16] timer: Restructure internal locking Anna-Maria Behnsen
2022-11-04 14:57 ` [PATCH v4 12/16] timer: Check if timers base is handled already Anna-Maria Behnsen
2022-11-04 14:57 ` [PATCH v4 13/16] tick/sched: Split out jiffies update helper function Anna-Maria Behnsen
2022-11-04 14:57 ` [PATCH v4 14/16] timer: Implement the hierarchical pull model Anna-Maria Behnsen
2022-11-07 22:07 ` Frederic Weisbecker
2022-11-08 16:16 ` Anna-Maria Behnsen
2022-11-09 17:12 ` Frederic Weisbecker
2022-11-08 10:47 ` Frederic Weisbecker
2022-11-08 17:02 ` Anna-Maria Behnsen
2022-11-09 17:13 ` Frederic Weisbecker
2022-11-08 11:48 ` Frederic Weisbecker
2022-11-09 16:39 ` Frederic Weisbecker
2022-11-10 6:34 ` Anna-Maria Behnsen
2022-11-14 13:09 ` Frederic Weisbecker
2022-11-15 11:31 ` Frederic Weisbecker
2022-11-24 7:47 ` Anna-Maria Behnsen
2022-11-28 16:20 ` Frederic Weisbecker
2022-11-29 10:30 ` Frederic Weisbecker
2022-11-16 13:40 ` Frederic Weisbecker
2022-11-04 14:57 ` [PATCH v4 15/16] timer_migration: Add tracepoints Anna-Maria Behnsen
2022-11-04 14:57 ` [PATCH v4 16/16] timer: Always queue timers on the local CPU Anna-Maria Behnsen
2022-11-08 4:37 ` [PATCH v4 00/16] timer: Move from a push remote at enqueue to a pull at expiry model Pavan Kondeti
2022-11-08 15:06 ` Anna-Maria Behnsen
2022-11-08 16:04 ` Pavan Kondeti
2022-11-08 17:39 ` Anna-Maria Behnsen
2022-11-08 18:48 ` Pavan Kondeti
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20221104145737.71236-8-anna-maria@linutronix.de \
--to=anna-maria@linutronix.de \
--cc=arjan@infradead.org \
--cc=edumazet@google.com \
--cc=fweisbec@gmail.com \
--cc=jstultz@google.com \
--cc=linux-kernel@vger.kernel.org \
--cc=paulmck@kernel.org \
--cc=peterz@infradead.org \
--cc=rafael.j.wysocki@intel.com \
--cc=richardcochran@gmail.com \
--cc=riel@surriel.com \
--cc=tglx@linutronix.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).