linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Marcelo Tosatti <mtosatti@redhat.com>
To: Thomas Gleixner <tglx@linutronix.de>
Cc: Anna-Maria Behnsen <anna-maria@linutronix.de>,
	linux-kernel@vger.kernel.org,
	Frederic Weisbecker <frederic@kernel.org>,
	Peter Xu <peterx@redhat.com>,
	Nitesh Narayan Lal <nitesh@redhat.com>,
	Alex Belits <abelits@marvell.com>
Subject: [PATCH v6] hrtimer: avoid retrigger_next_event IPI
Date: Mon, 19 Apr 2021 16:39:02 -0300	[thread overview]
Message-ID: <20210419193902.GB57245@fuller.cnet> (raw)
In-Reply-To: <20210416160023.GA6187@fuller.cnet>


Setting the realtime clock triggers an IPI to all CPUs to reprogram
the clock event device.

However, only realtime and TAI clocks have their offsets updated
(and therefore potentially require a reprogram).

Instead of sending an IPI unconditionally, check each per CPU hrtimer base
whether it has active timers in the CLOCK_REALTIME and CLOCK_TAI bases. If
that's not the case, update the realtime and TAI base offsets remotely and
skip the IPI. This ensures that any subsequently armed timers on
CLOCK_REALTIME and CLOCK_TAI are evaluated with the correct offsets.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

---

v6:
  - Do not take softirq_raised into account (Peter Xu).
  - Include BOOTTIME as base that requires IPI (Thomas).
  - Unconditional reprogram on resume path, since there is
    nothing to gain in such path anyway.

v5:
  - Add missing hrtimer_update_base (Peter Xu).

v4:
   - Drop unused code (Thomas).

v3:
   - Nicer changelog  (Thomas).
   - Code style fixes (Thomas).
   - Compilation warning with CONFIG_HIGH_RES_TIMERS=n (Thomas).
   - Shrink preemption disabled section (Thomas).

v2:
   - Only REALTIME and TAI bases are affected by offset-to-monotonic changes (Thomas).
   - Don't special case nohz_full CPUs (Thomas).


diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index bb5e7b0a4274..14a6e449b221 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -318,7 +318,7 @@ struct clock_event_device;
 
 extern void hrtimer_interrupt(struct clock_event_device *dev);
 
-extern void clock_was_set_delayed(void);
+extern void clock_was_set_delayed(bool force_reprogram);
 
 extern unsigned int hrtimer_resolution;
 
@@ -326,7 +326,7 @@ extern unsigned int hrtimer_resolution;
 
 #define hrtimer_resolution	(unsigned int)LOW_RES_NSEC
 
-static inline void clock_was_set_delayed(void) { }
+static inline void clock_was_set_delayed(bool force_reprogram) { }
 
 #endif
 
@@ -351,7 +351,7 @@ hrtimer_expires_remaining_adjusted(const struct hrtimer *timer)
 						    timer->base->get_time());
 }
 
-extern void clock_was_set(void);
+extern void clock_was_set(bool);
 #ifdef CONFIG_TIMERFD
 extern void timerfd_clock_was_set(void);
 #else
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 5c9d968187ae..2258782fd714 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -758,9 +758,17 @@ static void hrtimer_switch_to_hres(void)
 	retrigger_next_event(NULL);
 }
 
+static void clock_was_set_force_reprogram_work(struct work_struct *work)
+{
+	clock_was_set(true);
+}
+
+static DECLARE_WORK(hrtimer_force_reprogram_work, clock_was_set_force_reprogram_work);
+
+
 static void clock_was_set_work(struct work_struct *work)
 {
-	clock_was_set();
+	clock_was_set(false);
 }
 
 static DECLARE_WORK(hrtimer_work, clock_was_set_work);
@@ -769,9 +777,12 @@ static DECLARE_WORK(hrtimer_work, clock_was_set_work);
  * Called from timekeeping and resume code to reprogram the hrtimer
  * interrupt device on all cpus.
  */
-void clock_was_set_delayed(void)
+void clock_was_set_delayed(bool force_reprogram)
 {
-	schedule_work(&hrtimer_work);
+	if (force_reprogram)
+		schedule_work(&hrtimer_force_reprogram_work);
+	else
+		schedule_work(&hrtimer_work);
 }
 
 #else
@@ -871,6 +882,18 @@ static void hrtimer_reprogram(struct hrtimer *timer, bool reprogram)
 	tick_program_event(expires, 1);
 }
 
+#define CLOCK_SET_BASES ((1U << HRTIMER_BASE_REALTIME) |	\
+			 (1U << HRTIMER_BASE_REALTIME_SOFT) |	\
+			 (1U << HRTIMER_BASE_TAI) |		\
+			 (1U << HRTIMER_BASE_TAI_SOFT) |	\
+			 (1U << HRTIMER_BASE_BOOTTIME) |	\
+			 (1U << HRTIMER_BASE_BOOTTIME_SOFT))
+
+static bool need_reprogram_timer(struct hrtimer_cpu_base *cpu_base)
+{
+	return (cpu_base->active_bases & CLOCK_SET_BASES) != 0;
+}
+
 /*
  * Clock realtime was set
  *
@@ -882,11 +905,42 @@ static void hrtimer_reprogram(struct hrtimer *timer, bool reprogram)
  * resolution timer interrupts. On UP we just disable interrupts and
  * call the high resolution interrupt code.
  */
-void clock_was_set(void)
+void clock_was_set(bool force_reprogram)
 {
 #ifdef CONFIG_HIGH_RES_TIMERS
-	/* Retrigger the CPU local events everywhere */
-	on_each_cpu(retrigger_next_event, NULL, 1);
+	cpumask_var_t mask;
+	int cpu;
+
+	if (force_reprogram == true) {
+		on_each_cpu(retrigger_next_event, NULL, 1);
+		goto set_timerfd;
+	}
+
+	if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) {
+		on_each_cpu(retrigger_next_event, NULL, 1);
+		goto set_timerfd;
+	}
+
+	/* Avoid interrupting CPUs if possible */
+	cpus_read_lock();
+	for_each_online_cpu(cpu) {
+		unsigned long flags;
+		struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu);
+
+		raw_spin_lock_irqsave(&cpu_base->lock, flags);
+		if (need_reprogram_timer(cpu_base))
+			cpumask_set_cpu(cpu, mask);
+		else
+			hrtimer_update_base(cpu_base);
+		raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
+	}
+
+	preempt_disable();
+	smp_call_function_many(mask, retrigger_next_event, NULL, 1);
+	preempt_enable();
+	cpus_read_unlock();
+	free_cpumask_var(mask);
+set_timerfd:
 #endif
 	timerfd_clock_was_set();
 }
@@ -903,7 +957,7 @@ void hrtimers_resume(void)
 	/* Retrigger on the local CPU */
 	retrigger_next_event(NULL);
 	/* And schedule a retrigger for all others */
-	clock_was_set_delayed();
+	clock_was_set_delayed(true);
 }
 
 /*
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 6aee5768c86f..3fef237267bd 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -1323,7 +1323,7 @@ int do_settimeofday64(const struct timespec64 *ts)
 	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
 
 	/* signal hrtimers about time change */
-	clock_was_set();
+	clock_was_set(false);
 
 	if (!ret)
 		audit_tk_injoffset(ts_delta);
@@ -1371,7 +1371,7 @@ static int timekeeping_inject_offset(const struct timespec64 *ts)
 	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
 
 	/* signal hrtimers about time change */
-	clock_was_set();
+	clock_was_set(false);
 
 	return ret;
 }
@@ -1736,7 +1736,7 @@ void timekeeping_inject_sleeptime64(const struct timespec64 *delta)
 	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
 
 	/* signal hrtimers about time change */
-	clock_was_set();
+	clock_was_set(true);
 }
 #endif
 
@@ -2187,7 +2187,7 @@ static void timekeeping_advance(enum timekeeping_adv_mode mode)
 	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
 	if (clock_set)
 		/* Have to call _delayed version, since in irq context*/
-		clock_was_set_delayed();
+		clock_was_set_delayed(false);
 }
 
 /**
@@ -2425,7 +2425,7 @@ int do_adjtimex(struct __kernel_timex *txc)
 		timekeeping_advance(TK_ADV_FREQ);
 
 	if (tai != orig_tai)
-		clock_was_set();
+		clock_was_set(false);
 
 	ntp_notify_cmos_timer();
 


  parent reply	other threads:[~2021-04-19 19:40 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-04-07 13:53 [PATCH] hrtimer: avoid retrigger_next_event IPI Marcelo Tosatti
2021-04-07 19:28 ` kernel test robot
2021-04-07 22:14 ` Frederic Weisbecker
2021-04-08 12:27   ` Marcelo Tosatti
2021-04-09 14:15 ` Thomas Gleixner
2021-04-09 16:51   ` Marcelo Tosatti
2021-04-10  7:53     ` Thomas Gleixner
2021-04-13 17:04       ` [PATCH v2] " Marcelo Tosatti
2021-04-14 17:19         ` Thomas Gleixner
2021-04-15 15:39         ` [PATCH v3] " Marcelo Tosatti
2021-04-15 18:59           ` Thomas Gleixner
2021-04-15 20:40             ` [PATCH v4] " Marcelo Tosatti
2021-04-16 16:00               ` [PATCH v5] " Marcelo Tosatti
2021-04-16 17:13                 ` Peter Xu
2021-04-17 16:24                   ` Thomas Gleixner
2021-04-17 16:51                     ` Thomas Gleixner
2021-04-19 18:56                       ` Marcelo Tosatti
2021-04-19 19:39                 ` Marcelo Tosatti [this message]
2021-04-19 20:52                   ` [PATCH v6] " Thomas Gleixner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210419193902.GB57245@fuller.cnet \
    --to=mtosatti@redhat.com \
    --cc=abelits@marvell.com \
    --cc=anna-maria@linutronix.de \
    --cc=frederic@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=nitesh@redhat.com \
    --cc=peterx@redhat.com \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).