From: Marcelo Tosatti <mtosatti@redhat.com>
To: Thomas Gleixner <tglx@linutronix.de>
Cc: Anna-Maria Behnsen <anna-maria@linutronix.de>,
linux-kernel@vger.kernel.org,
Frederic Weisbecker <frederic@kernel.org>,
Peter Xu <peterx@redhat.com>,
Nitesh Narayan Lal <nitesh@redhat.com>,
Alex Belits <abelits@marvell.com>
Subject: [PATCH v6] hrtimer: avoid retrigger_next_event IPI
Date: Mon, 19 Apr 2021 16:39:02 -0300 [thread overview]
Message-ID: <20210419193902.GB57245@fuller.cnet> (raw)
In-Reply-To: <20210416160023.GA6187@fuller.cnet>
Setting the realtime clock triggers an IPI to all CPUs to reprogram
the clock event device.
However, only realtime and TAI clocks have their offsets updated
(and therefore potentially require a reprogram).
Instead of sending an IPI unconditionally, check each per CPU hrtimer base
whether it has active timers in the CLOCK_REALTIME and CLOCK_TAI bases. If
that's not the case, update the realtime and TAI base offsets remotely and
skip the IPI. This ensures that any subsequently armed timers on
CLOCK_REALTIME and CLOCK_TAI are evaluated with the correct offsets.
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
v6:
- Do not take softirq_raised into account (Peter Xu).
- Include BOOTTIME as base that requires IPI (Thomas).
- Unconditional reprogram on resume path, since there is
nothing to gain in such path anyway.
v5:
- Add missing hrtimer_update_base (Peter Xu).
v4:
- Drop unused code (Thomas).
v3:
- Nicer changelog (Thomas).
- Code style fixes (Thomas).
- Compilation warning with CONFIG_HIGH_RES_TIMERS=n (Thomas).
- Shrink preemption disabled section (Thomas).
v2:
- Only REALTIME and TAI bases are affected by offset-to-monotonic changes (Thomas).
- Don't special case nohz_full CPUs (Thomas).
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index bb5e7b0a4274..14a6e449b221 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -318,7 +318,7 @@ struct clock_event_device;
extern void hrtimer_interrupt(struct clock_event_device *dev);
-extern void clock_was_set_delayed(void);
+extern void clock_was_set_delayed(bool force_reprogram);
extern unsigned int hrtimer_resolution;
@@ -326,7 +326,7 @@ extern unsigned int hrtimer_resolution;
#define hrtimer_resolution (unsigned int)LOW_RES_NSEC
-static inline void clock_was_set_delayed(void) { }
+static inline void clock_was_set_delayed(bool force_reprogram) { }
#endif
@@ -351,7 +351,7 @@ hrtimer_expires_remaining_adjusted(const struct hrtimer *timer)
timer->base->get_time());
}
-extern void clock_was_set(void);
+extern void clock_was_set(bool);
#ifdef CONFIG_TIMERFD
extern void timerfd_clock_was_set(void);
#else
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 5c9d968187ae..2258782fd714 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -758,9 +758,17 @@ static void hrtimer_switch_to_hres(void)
retrigger_next_event(NULL);
}
+static void clock_was_set_force_reprogram_work(struct work_struct *work)
+{
+ clock_was_set(true);
+}
+
+static DECLARE_WORK(hrtimer_force_reprogram_work, clock_was_set_force_reprogram_work);
+
+
static void clock_was_set_work(struct work_struct *work)
{
- clock_was_set();
+ clock_was_set(false);
}
static DECLARE_WORK(hrtimer_work, clock_was_set_work);
@@ -769,9 +777,12 @@ static DECLARE_WORK(hrtimer_work, clock_was_set_work);
* Called from timekeeping and resume code to reprogram the hrtimer
* interrupt device on all cpus.
*/
-void clock_was_set_delayed(void)
+void clock_was_set_delayed(bool force_reprogram)
{
- schedule_work(&hrtimer_work);
+ if (force_reprogram)
+ schedule_work(&hrtimer_force_reprogram_work);
+ else
+ schedule_work(&hrtimer_work);
}
#else
@@ -871,6 +882,18 @@ static void hrtimer_reprogram(struct hrtimer *timer, bool reprogram)
tick_program_event(expires, 1);
}
+#define CLOCK_SET_BASES ((1U << HRTIMER_BASE_REALTIME) | \
+ (1U << HRTIMER_BASE_REALTIME_SOFT) | \
+ (1U << HRTIMER_BASE_TAI) | \
+ (1U << HRTIMER_BASE_TAI_SOFT) | \
+ (1U << HRTIMER_BASE_BOOTTIME) | \
+ (1U << HRTIMER_BASE_BOOTTIME_SOFT))
+
+static bool need_reprogram_timer(struct hrtimer_cpu_base *cpu_base)
+{
+ return (cpu_base->active_bases & CLOCK_SET_BASES) != 0;
+}
+
/*
* Clock realtime was set
*
@@ -882,11 +905,42 @@ static void hrtimer_reprogram(struct hrtimer *timer, bool reprogram)
* resolution timer interrupts. On UP we just disable interrupts and
* call the high resolution interrupt code.
*/
-void clock_was_set(void)
+void clock_was_set(bool force_reprogram)
{
#ifdef CONFIG_HIGH_RES_TIMERS
- /* Retrigger the CPU local events everywhere */
- on_each_cpu(retrigger_next_event, NULL, 1);
+ cpumask_var_t mask;
+ int cpu;
+
+ if (force_reprogram == true) {
+ on_each_cpu(retrigger_next_event, NULL, 1);
+ goto set_timerfd;
+ }
+
+ if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) {
+ on_each_cpu(retrigger_next_event, NULL, 1);
+ goto set_timerfd;
+ }
+
+ /* Avoid interrupting CPUs if possible */
+ cpus_read_lock();
+ for_each_online_cpu(cpu) {
+ unsigned long flags;
+ struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu);
+
+ raw_spin_lock_irqsave(&cpu_base->lock, flags);
+ if (need_reprogram_timer(cpu_base))
+ cpumask_set_cpu(cpu, mask);
+ else
+ hrtimer_update_base(cpu_base);
+ raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
+ }
+
+ preempt_disable();
+ smp_call_function_many(mask, retrigger_next_event, NULL, 1);
+ preempt_enable();
+ cpus_read_unlock();
+ free_cpumask_var(mask);
+set_timerfd:
#endif
timerfd_clock_was_set();
}
@@ -903,7 +957,7 @@ void hrtimers_resume(void)
/* Retrigger on the local CPU */
retrigger_next_event(NULL);
/* And schedule a retrigger for all others */
- clock_was_set_delayed();
+ clock_was_set_delayed(true);
}
/*
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 6aee5768c86f..3fef237267bd 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -1323,7 +1323,7 @@ int do_settimeofday64(const struct timespec64 *ts)
raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
/* signal hrtimers about time change */
- clock_was_set();
+ clock_was_set(false);
if (!ret)
audit_tk_injoffset(ts_delta);
@@ -1371,7 +1371,7 @@ static int timekeeping_inject_offset(const struct timespec64 *ts)
raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
/* signal hrtimers about time change */
- clock_was_set();
+ clock_was_set(false);
return ret;
}
@@ -1736,7 +1736,7 @@ void timekeeping_inject_sleeptime64(const struct timespec64 *delta)
raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
/* signal hrtimers about time change */
- clock_was_set();
+ clock_was_set(true);
}
#endif
@@ -2187,7 +2187,7 @@ static void timekeeping_advance(enum timekeeping_adv_mode mode)
raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
if (clock_set)
/* Have to call _delayed version, since in irq context*/
- clock_was_set_delayed();
+ clock_was_set_delayed(false);
}
/**
@@ -2425,7 +2425,7 @@ int do_adjtimex(struct __kernel_timex *txc)
timekeeping_advance(TK_ADV_FREQ);
if (tai != orig_tai)
- clock_was_set();
+ clock_was_set(false);
ntp_notify_cmos_timer();
next prev parent reply other threads:[~2021-04-19 19:40 UTC|newest]
Thread overview: 19+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-04-07 13:53 [PATCH] hrtimer: avoid retrigger_next_event IPI Marcelo Tosatti
2021-04-07 19:28 ` kernel test robot
2021-04-07 22:14 ` Frederic Weisbecker
2021-04-08 12:27 ` Marcelo Tosatti
2021-04-09 14:15 ` Thomas Gleixner
2021-04-09 16:51 ` Marcelo Tosatti
2021-04-10 7:53 ` Thomas Gleixner
2021-04-13 17:04 ` [PATCH v2] " Marcelo Tosatti
2021-04-14 17:19 ` Thomas Gleixner
2021-04-15 15:39 ` [PATCH v3] " Marcelo Tosatti
2021-04-15 18:59 ` Thomas Gleixner
2021-04-15 20:40 ` [PATCH v4] " Marcelo Tosatti
2021-04-16 16:00 ` [PATCH v5] " Marcelo Tosatti
2021-04-16 17:13 ` Peter Xu
2021-04-17 16:24 ` Thomas Gleixner
2021-04-17 16:51 ` Thomas Gleixner
2021-04-19 18:56 ` Marcelo Tosatti
2021-04-19 19:39 ` Marcelo Tosatti [this message]
2021-04-19 20:52 ` [PATCH v6] " Thomas Gleixner
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210419193902.GB57245@fuller.cnet \
--to=mtosatti@redhat.com \
--cc=abelits@marvell.com \
--cc=anna-maria@linutronix.de \
--cc=frederic@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=nitesh@redhat.com \
--cc=peterx@redhat.com \
--cc=tglx@linutronix.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).