LKML Archive on lore.kernel.org
 help / color / Atom feed
* [patch 00/12] (hr)timers: Prepare for PREEMPT_RT support
@ 2019-07-26 18:30 Thomas Gleixner
  2019-07-26 18:30 ` [patch 01/12] hrtimer: Remove task argument from hrtimer_init_sleeper() Thomas Gleixner
                   ` (12 more replies)
  0 siblings, 13 replies; 61+ messages in thread
From: Thomas Gleixner @ 2019-07-26 18:30 UTC (permalink / raw)
  To: LKML
  Cc: x86, Steven Rostedt, Sebastian Siewior, Anna-Maria Gleixner,
	Arnaldo Carvalho de Melo, Jiri Olsa, Paolo Bonzini,
	Juergen Gross

The following series brings the bulk of PREEMPT_RT specific changes for the
(hr)timer code:

  - Handle timer deletion correctly under RT to avoid priority inversion
    and life locks

    This mechanism might be useful for VMs as well when a vCPU
    executing a timer callback gets scheduled out and on another vCPU
    del_timer_sync() or hrtimer_cancel() is invoked.

    The mitigation would only work when paravirt spinlocks are
    enabled. I've not implemented that, as I don't know whether this is a
    real world issue. I just noticed that it is basically the same
    problem. Adding it would be trivial.

  - Prepare for moving most hrtimer callbacks into softirq context and mark
    timers which need to expire in hard interrupt context even on RT so
    they don't get moved.

The timerwheel still needs some special handling for IRQSAFE timers (grrrr)
which I'm still working on to find a less fugly solution.

Thanks,

	tglx




^ permalink raw reply	[flat|nested] 61+ messages in thread

* [patch 01/12] hrtimer: Remove task argument from hrtimer_init_sleeper()
  2019-07-26 18:30 [patch 00/12] (hr)timers: Prepare for PREEMPT_RT support Thomas Gleixner
@ 2019-07-26 18:30 ` Thomas Gleixner
  2019-07-26 19:57   ` Steven Rostedt
  2019-07-30 22:07   ` [tip:timers/core] " tip-bot for Thomas Gleixner
  2019-07-26 18:30 ` [patch 02/12] hrtimer: Consolidate hrtimer_init() + hrtimer_init_sleeper() calls Thomas Gleixner
                   ` (11 subsequent siblings)
  12 siblings, 2 replies; 61+ messages in thread
From: Thomas Gleixner @ 2019-07-26 18:30 UTC (permalink / raw)
  To: LKML
  Cc: x86, Steven Rostedt, Sebastian Siewior, Anna-Maria Gleixner,
	Arnaldo Carvalho de Melo, Jiri Olsa, Paolo Bonzini,
	Juergen Gross

All callers hand in 'current' and that's the only task pointer which
actually makes sense. Remove the task argument and set current in the
function.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 block/blk-mq.c                 |    2 +-
 drivers/staging/android/vsoc.c |    2 +-
 include/linux/hrtimer.h        |    3 +--
 include/linux/wait.h           |    2 +-
 kernel/futex.c                 |    2 +-
 kernel/time/hrtimer.c          |    8 ++++----
 net/core/pktgen.c              |    2 +-
 7 files changed, 10 insertions(+), 11 deletions(-)

--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -3418,7 +3418,7 @@ static bool blk_mq_poll_hybrid_sleep(str
 	hrtimer_init_on_stack(&hs.timer, CLOCK_MONOTONIC, mode);
 	hrtimer_set_expires(&hs.timer, kt);
 
-	hrtimer_init_sleeper(&hs, current);
+	hrtimer_init_sleeper(&hs);
 	do {
 		if (blk_mq_rq_state(rq) == MQ_RQ_COMPLETE)
 			break;
--- a/drivers/staging/android/vsoc.c
+++ b/drivers/staging/android/vsoc.c
@@ -442,7 +442,7 @@ static int handle_vsoc_cond_wait(struct
 		hrtimer_set_expires_range_ns(&to->timer, wake_time,
 					     current->timer_slack_ns);
 
-		hrtimer_init_sleeper(to, current);
+		hrtimer_init_sleeper(to);
 	}
 
 	while (1) {
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -463,8 +463,7 @@ extern long hrtimer_nanosleep(const stru
 			      const enum hrtimer_mode mode,
 			      const clockid_t clockid);
 
-extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
-				 struct task_struct *tsk);
+extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl);
 
 extern int schedule_hrtimeout_range(ktime_t *expires, u64 delta,
 						const enum hrtimer_mode mode);
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -489,7 +489,7 @@ do {										\
 	struct hrtimer_sleeper __t;						\
 										\
 	hrtimer_init_on_stack(&__t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);	\
-	hrtimer_init_sleeper(&__t, current);					\
+	hrtimer_init_sleeper(&__t);						\
 	if ((timeout) != KTIME_MAX)						\
 		hrtimer_start_range_ns(&__t.timer, timeout,			\
 				       current->timer_slack_ns,			\
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -490,7 +490,7 @@ futex_setup_timer(ktime_t *time, struct
 	hrtimer_init_on_stack(&timeout->timer, (flags & FLAGS_CLOCKRT) ?
 			      CLOCK_REALTIME : CLOCK_MONOTONIC,
 			      HRTIMER_MODE_ABS);
-	hrtimer_init_sleeper(timeout, current);
+	hrtimer_init_sleeper(timeout);
 
 	/*
 	 * If range_ns is 0, calling hrtimer_set_expires_range_ns() is
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1639,10 +1639,10 @@ static enum hrtimer_restart hrtimer_wake
 	return HRTIMER_NORESTART;
 }
 
-void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task)
+void hrtimer_init_sleeper(struct hrtimer_sleeper *sl)
 {
 	sl->timer.function = hrtimer_wakeup;
-	sl->task = task;
+	sl->task = current;
 }
 EXPORT_SYMBOL_GPL(hrtimer_init_sleeper);
 
@@ -1669,7 +1669,7 @@ static int __sched do_nanosleep(struct h
 {
 	struct restart_block *restart;
 
-	hrtimer_init_sleeper(t, current);
+	hrtimer_init_sleeper(t);
 
 	do {
 		set_current_state(TASK_INTERRUPTIBLE);
@@ -1930,7 +1930,7 @@ schedule_hrtimeout_range_clock(ktime_t *
 	hrtimer_init_on_stack(&t.timer, clock_id, mode);
 	hrtimer_set_expires_range_ns(&t.timer, *expires, delta);
 
-	hrtimer_init_sleeper(&t, current);
+	hrtimer_init_sleeper(&t);
 
 	hrtimer_start_expires(&t.timer, mode);
 
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2171,7 +2171,7 @@ static void spin(struct pktgen_dev *pkt_
 		} while (ktime_compare(end_time, spin_until) < 0);
 	} else {
 		/* see do_nanosleep */
-		hrtimer_init_sleeper(&t, current);
+		hrtimer_init_sleeper(&t);
 		do {
 			set_current_state(TASK_INTERRUPTIBLE);
 			hrtimer_start_expires(&t.timer, HRTIMER_MODE_ABS);



^ permalink raw reply	[flat|nested] 61+ messages in thread

* [patch 02/12] hrtimer: Consolidate hrtimer_init() + hrtimer_init_sleeper() calls
  2019-07-26 18:30 [patch 00/12] (hr)timers: Prepare for PREEMPT_RT support Thomas Gleixner
  2019-07-26 18:30 ` [patch 01/12] hrtimer: Remove task argument from hrtimer_init_sleeper() Thomas Gleixner
@ 2019-07-26 18:30 ` Thomas Gleixner
  2019-07-30 22:08   ` [tip:timers/core] " tip-bot for Sebastian Andrzej Siewior
  2019-08-01 15:49   ` tip-bot for Sebastian Andrzej Siewior
  2019-07-26 18:30 ` [patch 03/12] hrtimer: Introduce HARD expiry mode Thomas Gleixner
                   ` (10 subsequent siblings)
  12 siblings, 2 replies; 61+ messages in thread
From: Thomas Gleixner @ 2019-07-26 18:30 UTC (permalink / raw)
  To: LKML
  Cc: x86, Steven Rostedt, Sebastian Siewior, Anna-Maria Gleixner,
	Arnaldo Carvalho de Melo, Jiri Olsa, Paolo Bonzini,
	Juergen Gross

From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>

hrtimer_init_sleeper() calls require prior initialisation of the hrtimer
object which is embedded into the hrtimer_sleeper.

Combine the initialization and spare a function call. Fixup all call sites.

This is also a preparatory change for PREEMPT_RT to do hrtimer sleeper
specific initializations of the embedded hrtimer without modifying any of
the call sites.

No functional change.

[ anna-maria: Minor cleanups ]
[ tglx: Adopted to the removal of the task argument of
  	hrtimer_init_sleeper() and trivial polishing ]

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

---
v2..v3: Update to current version

v1..v2: Fix missing call site in drivers/staging/android/vsoc.c

 block/blk-mq.c                 |    3 --
 drivers/staging/android/vsoc.c |    6 +----
 include/linux/hrtimer.h        |   17 +++++++++++++---
 include/linux/wait.h           |    4 +--
 kernel/futex.c                 |    8 ++-----
 kernel/time/hrtimer.c          |   43 ++++++++++++++++++++++++++++++-----------
 net/core/pktgen.c              |    4 ---
 7 files changed, 55 insertions(+), 30 deletions(-)

--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -3415,10 +3415,9 @@ static bool blk_mq_poll_hybrid_sleep(str
 	kt = nsecs;
 
 	mode = HRTIMER_MODE_REL;
-	hrtimer_init_on_stack(&hs.timer, CLOCK_MONOTONIC, mode);
+	hrtimer_init_sleeper_on_stack(&hs, CLOCK_MONOTONIC, mode);
 	hrtimer_set_expires(&hs.timer, kt);
 
-	hrtimer_init_sleeper(&hs);
 	do {
 		if (blk_mq_rq_state(rq) == MQ_RQ_COMPLETE)
 			break;
--- a/drivers/staging/android/vsoc.c
+++ b/drivers/staging/android/vsoc.c
@@ -437,12 +437,10 @@ static int handle_vsoc_cond_wait(struct
 			return -EINVAL;
 		wake_time = ktime_set(arg->wake_time_sec, arg->wake_time_nsec);
 
-		hrtimer_init_on_stack(&to->timer, CLOCK_MONOTONIC,
-				      HRTIMER_MODE_ABS);
+		hrtimer_init_sleeper_on_stack(&to, CLOCK_MONOTONIC,
+					      HRTIMER_MODE_ABS);
 		hrtimer_set_expires_range_ns(&to->timer, wake_time,
 					     current->timer_slack_ns);
-
-		hrtimer_init_sleeper(to);
 	}
 
 	while (1) {
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -347,10 +347,15 @@ DECLARE_PER_CPU(struct tick_device, tick
 /* Initialize timers: */
 extern void hrtimer_init(struct hrtimer *timer, clockid_t which_clock,
 			 enum hrtimer_mode mode);
+extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, clockid_t clock_id,
+				 enum hrtimer_mode mode);
 
 #ifdef CONFIG_DEBUG_OBJECTS_TIMERS
 extern void hrtimer_init_on_stack(struct hrtimer *timer, clockid_t which_clock,
 				  enum hrtimer_mode mode);
+extern void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl,
+					  clockid_t clock_id,
+					  enum hrtimer_mode mode);
 
 extern void destroy_hrtimer_on_stack(struct hrtimer *timer);
 #else
@@ -360,6 +365,14 @@ static inline void hrtimer_init_on_stack
 {
 	hrtimer_init(timer, which_clock, mode);
 }
+
+static inline void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl,
+						 clockid_t clock_id,
+						 enum hrtimer_mode mode)
+{
+	hrtimer_init_sleeper(sl, clock_id, mode);
+}
+
 static inline void destroy_hrtimer_on_stack(struct hrtimer *timer) { }
 #endif
 
@@ -463,10 +476,8 @@ extern long hrtimer_nanosleep(const stru
 			      const enum hrtimer_mode mode,
 			      const clockid_t clockid);
 
-extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl);
-
 extern int schedule_hrtimeout_range(ktime_t *expires, u64 delta,
-						const enum hrtimer_mode mode);
+				    const enum hrtimer_mode mode);
 extern int schedule_hrtimeout_range_clock(ktime_t *expires,
 					  u64 delta,
 					  const enum hrtimer_mode mode,
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -488,8 +488,8 @@ do {										\
 	int __ret = 0;								\
 	struct hrtimer_sleeper __t;						\
 										\
-	hrtimer_init_on_stack(&__t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);	\
-	hrtimer_init_sleeper(&__t);						\
+	hrtimer_init_sleeper_on_stack(&__t, CLOCK_MONOTONIC,			\
+				      HRTIMER_MODE_REL);			\
 	if ((timeout) != KTIME_MAX)						\
 		hrtimer_start_range_ns(&__t.timer, timeout,			\
 				       current->timer_slack_ns,			\
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -487,11 +487,9 @@ futex_setup_timer(ktime_t *time, struct
 	if (!time)
 		return NULL;
 
-	hrtimer_init_on_stack(&timeout->timer, (flags & FLAGS_CLOCKRT) ?
-			      CLOCK_REALTIME : CLOCK_MONOTONIC,
-			      HRTIMER_MODE_ABS);
-	hrtimer_init_sleeper(timeout);
-
+	hrtimer_init_sleeper_on_stack(timeout, (flags & FLAGS_CLOCKRT) ?
+				      CLOCK_REALTIME : CLOCK_MONOTONIC,
+				      HRTIMER_MODE_ABS);
 	/*
 	 * If range_ns is 0, calling hrtimer_set_expires_range_ns() is
 	 * effectively the same as calling hrtimer_set_expires().
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -427,6 +427,17 @@ void hrtimer_init_on_stack(struct hrtime
 }
 EXPORT_SYMBOL_GPL(hrtimer_init_on_stack);
 
+static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
+				   clockid_t clock_id, enum hrtimer_mode mode);
+
+void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl,
+				   clockid_t clock_id, enum hrtimer_mode mode)
+{
+	debug_object_init_on_stack(&sl->timer, &hrtimer_debug_descr);
+	__hrtimer_init_sleeper(sl, clock_id, mode);
+}
+EXPORT_SYMBOL_GPL(hrtimer_init_sleeper_on_stack);
+
 void destroy_hrtimer_on_stack(struct hrtimer *timer)
 {
 	debug_object_free(timer, &hrtimer_debug_descr);
@@ -1639,11 +1650,27 @@ static enum hrtimer_restart hrtimer_wake
 	return HRTIMER_NORESTART;
 }
 
-void hrtimer_init_sleeper(struct hrtimer_sleeper *sl)
+static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
+				   clockid_t clock_id, enum hrtimer_mode mode)
 {
+	__hrtimer_init(&sl->timer, clock_id, mode);
 	sl->timer.function = hrtimer_wakeup;
 	sl->task = current;
 }
+
+/**
+ * hrtimer_init_sleeper - initialize sleeper to the given clock
+ * @sl:		sleeper to be initialized
+ * @clock_id:	the clock to be used
+ * @mode:	timer mode abs/rel
+ */
+void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, clockid_t clock_id,
+			  enum hrtimer_mode mode)
+{
+	debug_init(&sl->timer, clock_id, mode);
+	__hrtimer_init_sleeper(sl, clock_id, mode);
+
+}
 EXPORT_SYMBOL_GPL(hrtimer_init_sleeper);
 
 int nanosleep_copyout(struct restart_block *restart, struct timespec64 *ts)
@@ -1669,8 +1696,6 @@ static int __sched do_nanosleep(struct h
 {
 	struct restart_block *restart;
 
-	hrtimer_init_sleeper(t);
-
 	do {
 		set_current_state(TASK_INTERRUPTIBLE);
 		hrtimer_start_expires(&t->timer, mode);
@@ -1707,10 +1732,9 @@ static long __sched hrtimer_nanosleep_re
 	struct hrtimer_sleeper t;
 	int ret;
 
-	hrtimer_init_on_stack(&t.timer, restart->nanosleep.clockid,
-				HRTIMER_MODE_ABS);
+	hrtimer_init_sleeper_on_stack(&t, restart->nanosleep.clockid,
+				      HRTIMER_MODE_ABS);
 	hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires);
-
 	ret = do_nanosleep(&t, HRTIMER_MODE_ABS);
 	destroy_hrtimer_on_stack(&t.timer);
 	return ret;
@@ -1728,7 +1752,7 @@ long hrtimer_nanosleep(const struct time
 	if (dl_task(current) || rt_task(current))
 		slack = 0;
 
-	hrtimer_init_on_stack(&t.timer, clockid, mode);
+	hrtimer_init_sleeper_on_stack(&t, clockid, mode);
 	hrtimer_set_expires_range_ns(&t.timer, timespec64_to_ktime(*rqtp), slack);
 	ret = do_nanosleep(&t, mode);
 	if (ret != -ERESTART_RESTARTBLOCK)
@@ -1927,11 +1951,8 @@ schedule_hrtimeout_range_clock(ktime_t *
 		return -EINTR;
 	}
 
-	hrtimer_init_on_stack(&t.timer, clock_id, mode);
+	hrtimer_init_sleeper_on_stack(&t, clock_id, mode);
 	hrtimer_set_expires_range_ns(&t.timer, *expires, delta);
-
-	hrtimer_init_sleeper(&t);
-
 	hrtimer_start_expires(&t.timer, mode);
 
 	if (likely(t.task))
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2156,7 +2156,7 @@ static void spin(struct pktgen_dev *pkt_
 	s64 remaining;
 	struct hrtimer_sleeper t;
 
-	hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+	hrtimer_init_sleeper_on_stack(&t, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
 	hrtimer_set_expires(&t.timer, spin_until);
 
 	remaining = ktime_to_ns(hrtimer_expires_remaining(&t.timer));
@@ -2170,8 +2170,6 @@ static void spin(struct pktgen_dev *pkt_
 			end_time = ktime_get();
 		} while (ktime_compare(end_time, spin_until) < 0);
 	} else {
-		/* see do_nanosleep */
-		hrtimer_init_sleeper(&t);
 		do {
 			set_current_state(TASK_INTERRUPTIBLE);
 			hrtimer_start_expires(&t.timer, HRTIMER_MODE_ABS);



^ permalink raw reply	[flat|nested] 61+ messages in thread

* [patch 03/12] hrtimer: Introduce HARD expiry mode
  2019-07-26 18:30 [patch 00/12] (hr)timers: Prepare for PREEMPT_RT support Thomas Gleixner
  2019-07-26 18:30 ` [patch 01/12] hrtimer: Remove task argument from hrtimer_init_sleeper() Thomas Gleixner
  2019-07-26 18:30 ` [patch 02/12] hrtimer: Consolidate hrtimer_init() + hrtimer_init_sleeper() calls Thomas Gleixner
@ 2019-07-26 18:30 ` Thomas Gleixner
  2019-07-30 22:10   ` [tip:timers/core] " tip-bot for Sebastian Andrzej Siewior
  2019-08-01 15:52   ` tip-bot for Sebastian Andrzej Siewior
  2019-07-26 18:30 ` [patch 04/12] sched: Mark hrtimers to expire in hard interrupt context Thomas Gleixner
                   ` (9 subsequent siblings)
  12 siblings, 2 replies; 61+ messages in thread
From: Thomas Gleixner @ 2019-07-26 18:30 UTC (permalink / raw)
  To: LKML
  Cc: x86, Steven Rostedt, Sebastian Siewior, Anna-Maria Gleixner,
	Arnaldo Carvalho de Melo, Jiri Olsa, Paolo Bonzini,
	Juergen Gross

From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>

On PREEMPT_RT not all hrtimers can be expired in hard interrupt context
even if that is perfectly fine on a PREEMPT_RT=n kernel, e.g. because they
take regular spinlocks. Also for latency reasons PREEMPT_RT tries to defer
most hrtimers' expiry into soft interrupt context.

But there are hrtimers which must be expired in hard interrupt context even
when PREEMPT_RT is enabled:

  - hrtimers which must expiry in hard interrupt context, e.g. scheduler,
    perf, watchdog related hrtimers

  - latency critical hrtimers, e.g. nanosleep, ..., kvm lapic timer

Add a new mode flag HRTIMER_MODE_HARD which allows to mark these timers so
PREEMPT_RT will not move them into softirq expiry mode.

[ tglx: Split out of a larger combo patch. Added changelog ]

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/hrtimer.h |    6 ++++++
 1 file changed, 6 insertions(+)

--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -38,6 +38,7 @@ enum hrtimer_mode {
 	HRTIMER_MODE_REL	= 0x01,
 	HRTIMER_MODE_PINNED	= 0x02,
 	HRTIMER_MODE_SOFT	= 0x04,
+	HRTIMER_MODE_HARD	= 0x08,
 
 	HRTIMER_MODE_ABS_PINNED = HRTIMER_MODE_ABS | HRTIMER_MODE_PINNED,
 	HRTIMER_MODE_REL_PINNED = HRTIMER_MODE_REL | HRTIMER_MODE_PINNED,
@@ -48,6 +49,11 @@ enum hrtimer_mode {
 	HRTIMER_MODE_ABS_PINNED_SOFT = HRTIMER_MODE_ABS_PINNED | HRTIMER_MODE_SOFT,
 	HRTIMER_MODE_REL_PINNED_SOFT = HRTIMER_MODE_REL_PINNED | HRTIMER_MODE_SOFT,
 
+	HRTIMER_MODE_ABS_HARD	= HRTIMER_MODE_ABS | HRTIMER_MODE_HARD,
+	HRTIMER_MODE_REL_HARD	= HRTIMER_MODE_REL | HRTIMER_MODE_HARD,
+
+	HRTIMER_MODE_ABS_PINNED_HARD = HRTIMER_MODE_ABS_PINNED | HRTIMER_MODE_HARD,
+	HRTIMER_MODE_REL_PINNED_HARD = HRTIMER_MODE_REL_PINNED | HRTIMER_MODE_HARD,
 };
 
 /*



^ permalink raw reply	[flat|nested] 61+ messages in thread

* [patch 04/12] sched: Mark hrtimers to expire in hard interrupt context
  2019-07-26 18:30 [patch 00/12] (hr)timers: Prepare for PREEMPT_RT support Thomas Gleixner
                   ` (2 preceding siblings ...)
  2019-07-26 18:30 ` [patch 03/12] hrtimer: Introduce HARD expiry mode Thomas Gleixner
@ 2019-07-26 18:30 ` Thomas Gleixner
  2019-07-30 22:11   ` [tip:timers/core] " tip-bot for Thomas Gleixner
                     ` (2 more replies)
  2019-07-26 18:30 ` [patch 05/12] perf/core: " Thomas Gleixner
                   ` (8 subsequent siblings)
  12 siblings, 3 replies; 61+ messages in thread
From: Thomas Gleixner @ 2019-07-26 18:30 UTC (permalink / raw)
  To: LKML
  Cc: x86, Steven Rostedt, Sebastian Siewior, Anna-Maria Gleixner,
	Arnaldo Carvalho de Melo, Jiri Olsa, Paolo Bonzini,
	Juergen Gross

From: Thomas Gleixner <tglx@linutronix.de>

The scheduler related hrtimers need to expire in hard interrupt context
even on PREEMPT_RT enabled kernels. Mark then as such.

No functional change.

[ tglx: Split out from larger combo patch. Add changelog. ]

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/sched/core.c     |    2 +-
 kernel/sched/deadline.c |    2 +-
 kernel/sched/fair.c     |    6 ++++--
 kernel/sched/rt.c       |    4 ++--
 4 files changed, 8 insertions(+), 6 deletions(-)

--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -328,7 +328,7 @@ static void hrtick_rq_init(struct rq *rq
 	rq->hrtick_csd.info = rq;
 #endif
 
-	hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
 	rq->hrtick_timer.function = hrtick;
 }
 #else	/* CONFIG_SCHED_HRTICK */
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1053,7 +1053,7 @@ void init_dl_task_timer(struct sched_dl_
 {
 	struct hrtimer *timer = &dl_se->dl_timer;
 
-	hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
 	timer->function = dl_task_timer;
 }
 
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4972,9 +4972,11 @@ void init_cfs_bandwidth(struct cfs_bandw
 	cfs_b->period = ns_to_ktime(default_cfs_period());
 
 	INIT_LIST_HEAD(&cfs_b->throttled_cfs_rq);
-	hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
+	hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC,
+		     HRTIMER_MODE_ABS_PINNED_HARD);
 	cfs_b->period_timer.function = sched_cfs_period_timer;
-	hrtimer_init(&cfs_b->slack_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	hrtimer_init(&cfs_b->slack_timer, CLOCK_MONOTONIC,
+		     HRTIMER_MODE_REL_HARD);
 	cfs_b->slack_timer.function = sched_cfs_slack_timer;
 	cfs_b->distribute_running = 0;
 	cfs_b->slack_started = false;
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -45,8 +45,8 @@ void init_rt_bandwidth(struct rt_bandwid
 
 	raw_spin_lock_init(&rt_b->rt_runtime_lock);
 
-	hrtimer_init(&rt_b->rt_period_timer,
-			CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	hrtimer_init(&rt_b->rt_period_timer, CLOCK_MONOTONIC,
+		     HRTIMER_MODE_REL_HARD);
 	rt_b->rt_period_timer.function = sched_rt_period_timer;
 }
 



^ permalink raw reply	[flat|nested] 61+ messages in thread

* [patch 05/12] perf/core: Mark hrtimers to expire in hard interrupt context
  2019-07-26 18:30 [patch 00/12] (hr)timers: Prepare for PREEMPT_RT support Thomas Gleixner
                   ` (3 preceding siblings ...)
  2019-07-26 18:30 ` [patch 04/12] sched: Mark hrtimers to expire in hard interrupt context Thomas Gleixner
@ 2019-07-26 18:30 ` Thomas Gleixner
  2019-07-30 22:12   ` [tip:timers/core] " tip-bot for Thomas Gleixner
                     ` (2 more replies)
  2019-07-26 18:30 ` [patch 06/12] watchdog: Mark watchdog_hrtimer " Thomas Gleixner
                   ` (7 subsequent siblings)
  12 siblings, 3 replies; 61+ messages in thread
From: Thomas Gleixner @ 2019-07-26 18:30 UTC (permalink / raw)
  To: LKML
  Cc: x86, Steven Rostedt, Sebastian Siewior, Anna-Maria Gleixner,
	Arnaldo Carvalho de Melo, Jiri Olsa, Paolo Bonzini,
	Juergen Gross

From: Thomas Gleixner <tglx@linutronix.de>

To guarantee that the multiplexing mechanism and the hrtimer driven events
work on PREEMPT_RT enabled kernels it's required that the related hrtimers
expire in hard interrupt context. Mark them so PREEMPT_RT kernels wont
defer them to soft interrupt context.

No functional change.

[ tglx: Split out of larger combo patch. Added changelog ]

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
---
 kernel/events/core.c |    4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1103,7 +1103,7 @@ static void __perf_mux_hrtimer_init(stru
 	cpuctx->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * interval);
 
 	raw_spin_lock_init(&cpuctx->hrtimer_lock);
-	hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
+	hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD);
 	timer->function = perf_mux_hrtimer_handler;
 }
 
@@ -1121,7 +1121,7 @@ static int perf_mux_hrtimer_restart(stru
 	if (!cpuctx->hrtimer_active) {
 		cpuctx->hrtimer_active = 1;
 		hrtimer_forward_now(timer, cpuctx->hrtimer_interval);
-		hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED);
+		hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED_HARD);
 	}
 	raw_spin_unlock_irqrestore(&cpuctx->hrtimer_lock, flags);
 



^ permalink raw reply	[flat|nested] 61+ messages in thread

* [patch 06/12] watchdog: Mark watchdog_hrtimer to expire in hard interrupt context
  2019-07-26 18:30 [patch 00/12] (hr)timers: Prepare for PREEMPT_RT support Thomas Gleixner
                   ` (4 preceding siblings ...)
  2019-07-26 18:30 ` [patch 05/12] perf/core: " Thomas Gleixner
@ 2019-07-26 18:30 ` Thomas Gleixner
  2019-07-30 22:13   ` [tip:timers/core] " tip-bot for Sebastian Andrzej Siewior
                     ` (2 more replies)
  2019-07-26 18:30 ` [patch 07/12] KVM: LAPIC: Mark hrtimer " Thomas Gleixner
                   ` (6 subsequent siblings)
  12 siblings, 3 replies; 61+ messages in thread
From: Thomas Gleixner @ 2019-07-26 18:30 UTC (permalink / raw)
  To: LKML
  Cc: x86, Steven Rostedt, Sebastian Siewior, Anna-Maria Gleixner,
	Arnaldo Carvalho de Melo, Jiri Olsa, Paolo Bonzini,
	Juergen Gross

From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>

The watchdog hrtimer must expire in hard interrupt context even on
PREEMPT_RT=y kernels as otherwise the hard/softlockup detection logic would
not work.

No functional change.

[ tglx: Split out from larger combo patch. Added changelog ]

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/watchdog.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -490,7 +490,7 @@ static void watchdog_enable(unsigned int
 	 * Start the timer first to prevent the NMI watchdog triggering
 	 * before the timer has a chance to fire.
 	 */
-	hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
 	hrtimer->function = watchdog_timer_fn;
 	hrtimer_start(hrtimer, ns_to_ktime(sample_period),
 		      HRTIMER_MODE_REL_PINNED);



^ permalink raw reply	[flat|nested] 61+ messages in thread

* [patch 07/12] KVM: LAPIC: Mark hrtimer to expire in hard interrupt context
  2019-07-26 18:30 [patch 00/12] (hr)timers: Prepare for PREEMPT_RT support Thomas Gleixner
                   ` (5 preceding siblings ...)
  2019-07-26 18:30 ` [patch 06/12] watchdog: Mark watchdog_hrtimer " Thomas Gleixner
@ 2019-07-26 18:30 ` Thomas Gleixner
  2019-07-26 19:41   ` Paolo Bonzini
                     ` (3 more replies)
  2019-07-26 18:30 ` [patch 08/12] tick: Mark tick related hrtimers to expiry " Thomas Gleixner
                   ` (5 subsequent siblings)
  12 siblings, 4 replies; 61+ messages in thread
From: Thomas Gleixner @ 2019-07-26 18:30 UTC (permalink / raw)
  To: LKML
  Cc: x86, Steven Rostedt, Sebastian Siewior, Anna-Maria Gleixner,
	Paolo Bonzini, Arnaldo Carvalho de Melo, Jiri Olsa,
	Juergen Gross

From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>

On PREEMPT_RT enabled kernels unmarked hrtimers are moved into soft
interrupt expiry mode by default.

While that's not a functional requirement for the KVM local APIC timer
emulation, it's a latency issue which can be avoided by marking the timer
so hard interrupt context expiry is enforced.

No functional change.

[ tglx: Split out from larger combo patch. Add changelog. ]

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: kvm@vger.kernel.org
Cc: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/lapic.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2302,7 +2302,7 @@ int kvm_create_lapic(struct kvm_vcpu *vc
 	apic->vcpu = vcpu;
 
 	hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
-		     HRTIMER_MODE_ABS);
+		     HRTIMER_MODE_ABS_HARD);
 	apic->lapic_timer.timer.function = apic_timer_fn;
 	if (timer_advance_ns == -1) {
 		apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_ADJUST_INIT;



^ permalink raw reply	[flat|nested] 61+ messages in thread

* [patch 08/12] tick: Mark tick related hrtimers to expiry in hard interrupt context
  2019-07-26 18:30 [patch 00/12] (hr)timers: Prepare for PREEMPT_RT support Thomas Gleixner
                   ` (6 preceding siblings ...)
  2019-07-26 18:30 ` [patch 07/12] KVM: LAPIC: Mark hrtimer " Thomas Gleixner
@ 2019-07-26 18:30 ` Thomas Gleixner
  2019-07-30 22:14   ` [tip:timers/core] " tip-bot for Sebastian Andrzej Siewior
                     ` (2 more replies)
  2019-07-26 18:30 ` [patch 09/12] hrtimer: Move unmarked hrtimers to soft interrupt expiry on RT Thomas Gleixner
                   ` (4 subsequent siblings)
  12 siblings, 3 replies; 61+ messages in thread
From: Thomas Gleixner @ 2019-07-26 18:30 UTC (permalink / raw)
  To: LKML
  Cc: x86, Steven Rostedt, Sebastian Siewior, Anna-Maria Gleixner,
	Arnaldo Carvalho de Melo, Jiri Olsa, Paolo Bonzini,
	Juergen Gross

From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>

The tick related hrtimers, which drive the scheduler tick and hrtimer based
broadcasting are required to expire in hard interrupt context for obvious
reasons.

Mark them so PREEMPT_RT kernels wont move them to soft interrupt expiry.

No functional change.

[ tglx: Split out from larger combo patch. Add changelog ]

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/time/tick-broadcast-hrtimer.c |    2 +-
 kernel/time/tick-sched.c             |    2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

--- a/kernel/time/tick-broadcast-hrtimer.c
+++ b/kernel/time/tick-broadcast-hrtimer.c
@@ -104,7 +104,7 @@ static enum hrtimer_restart bc_handler(s
 
 void tick_setup_hrtimer_broadcast(void)
 {
-	hrtimer_init(&bctimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+	hrtimer_init(&bctimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
 	bctimer.function = bc_handler;
 	clockevents_register_device(&ce_broadcast_hrtimer);
 }
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -1327,7 +1327,7 @@ void tick_setup_sched_timer(void)
 	/*
 	 * Emulate tick processing via per-CPU hrtimers:
 	 */
-	hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+	hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
 	ts->sched_timer.function = tick_sched_timer;
 
 	/* Get the next period (per-CPU) */



^ permalink raw reply	[flat|nested] 61+ messages in thread

* [patch 09/12] hrtimer: Move unmarked hrtimers to soft interrupt expiry on RT
  2019-07-26 18:30 [patch 00/12] (hr)timers: Prepare for PREEMPT_RT support Thomas Gleixner
                   ` (7 preceding siblings ...)
  2019-07-26 18:30 ` [patch 08/12] tick: Mark tick related hrtimers to expiry " Thomas Gleixner
@ 2019-07-26 18:30 ` Thomas Gleixner
  2019-07-30 22:15   ` [tip:timers/core] " tip-bot for Sebastian Andrzej Siewior
                     ` (2 more replies)
  2019-07-26 18:30 ` [patch 10/12] hrtimer: Determine hard/soft expiry mode for hrtimer sleepers " Thomas Gleixner
                   ` (3 subsequent siblings)
  12 siblings, 3 replies; 61+ messages in thread
From: Thomas Gleixner @ 2019-07-26 18:30 UTC (permalink / raw)
  To: LKML
  Cc: x86, Steven Rostedt, Sebastian Siewior, Anna-Maria Gleixner,
	Arnaldo Carvalho de Melo, Jiri Olsa, Paolo Bonzini,
	Juergen Gross

From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>

On PREEMPT_RT not all hrtimers can be expired in hard interrupt context
even if that is perfectly fine on a PREEMPT_RT=n kernel, e.g. because they
take regular spinlocks. Also for latency reasons PREEMPT_RT tries to defer
most hrtimers' expiry into softirq context.

hrtimers marked with HRTIMER_MODE_HARD must be kept in hard interrupt
context expiry mode. Add the required logic.

No functional change for PREEMPT_RT=n kernels.

[ tglx: Split out of a larger combo patch. Added changelog ]

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/time/hrtimer.c |   11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1271,8 +1271,16 @@ static void __hrtimer_init(struct hrtime
 			   enum hrtimer_mode mode)
 {
 	bool softtimer = !!(mode & HRTIMER_MODE_SOFT);
-	int base = softtimer ? HRTIMER_MAX_CLOCK_BASES / 2 : 0;
 	struct hrtimer_cpu_base *cpu_base;
+	int base;
+
+	/*
+	 * On PREEMPT_RT enabled kernels hrtimers which are not explicitely
+	 * marked for hard interrupt expiry mode are moved into soft
+	 * interrupt context for latency reasons.
+	 */
+	if (IS_ENABLED(CONFIG_PREEMPT_RT) && !(mode & HRTIMER_MODE_HARD))
+		softtimer = true;
 
 	memset(timer, 0, sizeof(struct hrtimer));
 
@@ -1286,6 +1294,7 @@ static void __hrtimer_init(struct hrtime
 	if (clock_id == CLOCK_REALTIME && mode & HRTIMER_MODE_REL)
 		clock_id = CLOCK_MONOTONIC;
 
+	base = softtimer ? HRTIMER_MAX_CLOCK_BASES / 2 : 0;
 	base += hrtimer_clockid_to_base(clock_id);
 	timer->is_soft = softtimer;
 	timer->base = &cpu_base->clock_base[base];



^ permalink raw reply	[flat|nested] 61+ messages in thread

* [patch 10/12] hrtimer: Determine hard/soft expiry mode for hrtimer sleepers on RT
  2019-07-26 18:30 [patch 00/12] (hr)timers: Prepare for PREEMPT_RT support Thomas Gleixner
                   ` (8 preceding siblings ...)
  2019-07-26 18:30 ` [patch 09/12] hrtimer: Move unmarked hrtimers to soft interrupt expiry on RT Thomas Gleixner
@ 2019-07-26 18:30 ` Thomas Gleixner
  2019-07-26 20:44   ` Steven Rostedt
                     ` (4 more replies)
  2019-07-26 18:30 ` [patch 11/12] hrtimer: Prepare support for PREEMPT_RT Thomas Gleixner
                   ` (2 subsequent siblings)
  12 siblings, 5 replies; 61+ messages in thread
From: Thomas Gleixner @ 2019-07-26 18:30 UTC (permalink / raw)
  To: LKML
  Cc: x86, Steven Rostedt, Sebastian Siewior, Anna-Maria Gleixner,
	Arnaldo Carvalho de Melo, Jiri Olsa, Paolo Bonzini,
	Juergen Gross

From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>

On PREEMPT_RT enabled kernels hrtimers which are not explicitely marked for
hard interrupt expiry mode are moved into soft interrupt context either for
latency reasons or because the hrtimer callback takes regular spinlocks or
invokes other functions which are not suitable for hard interrupt context
on PREEMPT_RT.

The hrtimer_sleeper callback is RT compatible in hard interrupt context,
but there is a latency concern: Untrusted userspace can spawn many threads
which arm timers for the same expiry time on the same CPU. On expiry that
causes a latency spike due to the wakeup of a gazillion threads.

OTOH, priviledged real-time user space applications rely on the low latency
of hard interrupt wakeups. These syscall related wakeups are all based on
hrtimer sleepers.

If the current task is in a real-time scheduling class, mark the mode for
hard interrupt expiry.

[ tglx: Split out of a larger combo patch. Added changelog ]

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/time/hrtimer.c |   24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1662,6 +1662,30 @@ static enum hrtimer_restart hrtimer_wake
 static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
 				   clockid_t clock_id, enum hrtimer_mode mode)
 {
+	/*
+	 * On PREEMPT_RT enabled kernels hrtimers which are not explicitely
+	 * marked for hard interrupt expiry mode are moved into soft
+	 * interrupt context either for latency reasons or because the
+	 * hrtimer callback takes regular spinlocks or invokes other
+	 * functions which are not suitable for hard interrupt context on
+	 * PREEMPT_RT.
+	 *
+	 * The hrtimer_sleeper callback is RT compatible in hard interrupt
+	 * context, but there is a latency concern: Untrusted userspace can
+	 * spawn many threads which arm timers for the same expiry time on
+	 * the same CPU. That causes a latency spike due to the wakeup of
+	 * a gazillion threads.
+	 *
+	 * OTOH, priviledged real-time user space applications rely on the
+	 * low latency of hard interrupt wakeups. If the current task is in
+	 * a real-time scheduling class, mark the mode for hard interrupt
+	 * expiry.
+	 */
+	if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
+		if (task_is_realtime(current) && !(mode & HRTIMER_MODE_SOFT))
+			mode |= HRTIMER_MODE_HARD;
+	}
+
 	__hrtimer_init(&sl->timer, clock_id, mode);
 	sl->timer.function = hrtimer_wakeup;
 	sl->task = current;



^ permalink raw reply	[flat|nested] 61+ messages in thread

* [patch 11/12] hrtimer: Prepare support for PREEMPT_RT
  2019-07-26 18:30 [patch 00/12] (hr)timers: Prepare for PREEMPT_RT support Thomas Gleixner
                   ` (9 preceding siblings ...)
  2019-07-26 18:30 ` [patch 10/12] hrtimer: Determine hard/soft expiry mode for hrtimer sleepers " Thomas Gleixner
@ 2019-07-26 18:30 ` Thomas Gleixner
  2019-07-28  9:06   ` Juergen Gross
                     ` (3 more replies)
  2019-07-26 18:31 ` [patch 12/12] timers: Prepare support for PREEMPT_RT Thomas Gleixner
  2019-07-29 19:45 ` [patch 00/12] (hr)timers: Prepare for PREEMPT_RT support Peter Zijlstra
  12 siblings, 4 replies; 61+ messages in thread
From: Thomas Gleixner @ 2019-07-26 18:30 UTC (permalink / raw)
  To: LKML
  Cc: x86, Steven Rostedt, Sebastian Siewior, Anna-Maria Gleixner,
	Juergen Gross, Arnaldo Carvalho de Melo, Jiri Olsa,
	Paolo Bonzini

From: Anna-Maria Gleixner <anna-maria@linutronix.de>

When PREEMPT_RT is enabled, the soft interrupt thread can be preempted.  If
the soft interrupt thread is preempted in the middle of a timer callback,
then calling hrtimer_cancel() can lead to two issues:

  - If the caller is on a remote CPU then it has to spin wait for the timer
    handler to complete. This can result in unbound priority inversion.

  - If the caller originates from the task which preempted the timer
    handler on the same CPU, then spin waiting for the timer handler to
    complete is never going to end.

To avoid these issues, add a new lock to the timer base which is held
around the execution of the timer callbacks. If hrtimer_cancel() detects
that the timer callback is currently running, it blocks on the expiry
lock. When the callback is finished, the expiry lock is dropped by the
softirq thread which wakes up the waiter and the system makes progress.

This addresses both the priority inversion and the life lock issues.

The same issue can happen in virtual machines when the vCPU which runs a
timer callback is scheduled out. If a second vCPU of the same guest calls
hrtimer_cancel() it will spin wait for the other vCPU to be scheduled back
in. The expiry lock mechanism would avoid that. It'd be trivial to enable
this when paravirt spinlocks are enabled in a guest, but it's not clear
whether this is an actual problem in the wild, so for now it's an RT only
mechanism.

[ tglx: Refactored it for mainline ]

Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Juergen Gross <jgross@suse.com>
---
 include/linux/hrtimer.h |    8 ++++
 kernel/time/hrtimer.c   |   96 +++++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 98 insertions(+), 6 deletions(-)

--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -189,6 +189,10 @@ enum  hrtimer_base_type {
  * @nr_retries:		Total number of hrtimer interrupt retries
  * @nr_hangs:		Total number of hrtimer interrupt hangs
  * @max_hang_time:	Maximum time spent in hrtimer_interrupt
+ * @softirq_expiry_lock: Lock which is taken while softirq based hrtimer are
+ *			 expired
+ * @timer_waiters:	A hrtimer_cancel() invocation waits for the timer
+ *			callback to finish.
  * @expires_next:	absolute time of the next event, is required for remote
  *			hrtimer enqueue; it is the total first expiry time (hard
  *			and soft hrtimer are taken into account)
@@ -216,6 +220,10 @@ struct hrtimer_cpu_base {
 	unsigned short			nr_hangs;
 	unsigned int			max_hang_time;
 #endif
+#ifdef CONFIG_PREEMPT_RT
+	spinlock_t			softirq_expiry_lock;
+	atomic_t			timer_waiters;
+#endif
 	ktime_t				expires_next;
 	struct hrtimer			*next_timer;
 	ktime_t				softirq_expires_next;
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1158,6 +1158,81 @@ int hrtimer_try_to_cancel(struct hrtimer
 }
 EXPORT_SYMBOL_GPL(hrtimer_try_to_cancel);
 
+#ifdef CONFIG_PREEMPT_RT
+static void hrtimer_cpu_base_init_expiry_lock(struct hrtimer_cpu_base *base)
+{
+	spin_lock_init(&base->softirq_expiry_lock);
+}
+
+static void hrtimer_cpu_base_lock_expiry(struct hrtimer_cpu_base *base)
+{
+	spin_lock(&base->softirq_expiry_lock);
+}
+
+static void hrtimer_cpu_base_unlock_expiry(struct hrtimer_cpu_base *base)
+{
+	spin_unlock(&base->softirq_expiry_lock);
+}
+
+/*
+ * The counterpart to hrtimer_cancel_wait_running().
+ *
+ * If there is a waiter for cpu_base->expiry_lock, then it was waiting for
+ * the timer callback to finish. Drop expiry_lock and reaquire it. That
+ * allows the waiter to acquire the lock and make progress.
+ */
+static void hrtimer_sync_wait_running(struct hrtimer_cpu_base *cpu_base,
+				      unsigned long flags)
+{
+	if (atomic_read(&cpu_base->timer_waiters)) {
+		raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
+		spin_unlock(&cpu_base->softirq_expiry_lock);
+		spin_lock(&cpu_base->softirq_expiry_lock);
+		raw_spin_lock_irq(&cpu_base->lock);
+	}
+}
+
+/*
+ * This function is called on PREEMPT_RT kernels when the fast path
+ * deletion of a timer failed because the timer callback function was
+ * running.
+ *
+ * This prevents priority inversion, if the softirq thread on a remote CPU
+ * got preempted, and it prevents a life lock when the task which tries to
+ * delete a timer preempted the softirq thread running the timer callback
+ * function.
+ */
+static void hrtimer_cancel_wait_running(const struct hrtimer *timer)
+{
+	struct hrtimer_clock_base *base = timer->base;
+
+	if (!timer->is_soft || !base || !base->cpu_base)
+		return;
+
+	/*
+	 * Mark the base as contended and grab the expiry lock, which is
+	 * held by the softirq across the timer callback. Drop the lock
+	 * immediately so the softirq can expire the next timer. In theory
+	 * the timer could already be running again, but that's more than
+	 * unlikely and just causes another wait loop.
+	 */
+	atomic_inc(&base->cpu_base->timer_waiters);
+	spin_lock_bh(&base->cpu_base->softirq_expiry_lock);
+	atomic_dec(&base->cpu_base->timer_waiters);
+	spin_unlock_bh(&base->cpu_base->softirq_expiry_lock);
+}
+#else
+static inline void
+hrtimer_cpu_base_init_expiry_lock(struct hrtimer_cpu_base *base) { }
+static inline void
+hrtimer_cpu_base_lock_expiry(struct hrtimer_cpu_base *base) { }
+static inline void
+hrtimer_cpu_base_unlock_expiry(struct hrtimer_cpu_base *base) { }
+static inline void hrtimer_sync_wait_running(struct hrtimer_cpu_base *base,
+					     unsigned long flags) { }
+static inline void hrtimer_cancel_wait_running(struct hrtimer *timer) { }
+#endif
+
 /**
  * hrtimer_cancel - cancel a timer and wait for the handler to finish.
  * @timer:	the timer to be cancelled
@@ -1168,13 +1243,17 @@ EXPORT_SYMBOL_GPL(hrtimer_try_to_cancel)
  */
 int hrtimer_cancel(struct hrtimer *timer)
 {
-	for (;;) {
-		int ret = hrtimer_try_to_cancel(timer);
+	int ret;
 
-		if (ret >= 0)
-			return ret;
-		cpu_relax();
-	}
+	do {
+		ret = hrtimer_try_to_cancel(timer);
+
+		if (ret < 0) {
+			hrtimer_cancel_wait_running(timer);
+			cpu_relax();
+		}
+	} while (ret < 0);
+	return ret;
 }
 EXPORT_SYMBOL_GPL(hrtimer_cancel);
 
@@ -1469,6 +1548,8 @@ static void __hrtimer_run_queues(struct
 				break;
 
 			__run_hrtimer(cpu_base, base, timer, &basenow, flags);
+			if (active_mask == HRTIMER_ACTIVE_SOFT)
+				hrtimer_sync_wait_running(cpu_base, flags);
 		}
 	}
 }
@@ -1479,6 +1560,7 @@ static __latent_entropy void hrtimer_run
 	unsigned long flags;
 	ktime_t now;
 
+	hrtimer_cpu_base_lock_expiry(cpu_base);
 	raw_spin_lock_irqsave(&cpu_base->lock, flags);
 
 	now = hrtimer_update_base(cpu_base);
@@ -1488,6 +1570,7 @@ static __latent_entropy void hrtimer_run
 	hrtimer_update_softirq_timer(cpu_base, true);
 
 	raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
+	hrtimer_cpu_base_unlock_expiry(cpu_base);
 }
 
 #ifdef CONFIG_HIGH_RES_TIMERS
@@ -1866,6 +1949,7 @@ int hrtimers_prepare_cpu(unsigned int cp
 	cpu_base->softirq_next_timer = NULL;
 	cpu_base->expires_next = KTIME_MAX;
 	cpu_base->softirq_expires_next = KTIME_MAX;
+	hrtimer_cpu_base_init_expiry_lock(cpu_base);
 	return 0;
 }
 



^ permalink raw reply	[flat|nested] 61+ messages in thread

* [patch 12/12] timers: Prepare support for PREEMPT_RT
  2019-07-26 18:30 [patch 00/12] (hr)timers: Prepare for PREEMPT_RT support Thomas Gleixner
                   ` (10 preceding siblings ...)
  2019-07-26 18:30 ` [patch 11/12] hrtimer: Prepare support for PREEMPT_RT Thomas Gleixner
@ 2019-07-26 18:31 ` Thomas Gleixner
  2019-07-30 22:17   ` [tip:timers/core] " tip-bot for Anna-Maria Gleixner
                     ` (2 more replies)
  2019-07-29 19:45 ` [patch 00/12] (hr)timers: Prepare for PREEMPT_RT support Peter Zijlstra
  12 siblings, 3 replies; 61+ messages in thread
From: Thomas Gleixner @ 2019-07-26 18:31 UTC (permalink / raw)
  To: LKML
  Cc: x86, Steven Rostedt, Sebastian Siewior, Anna-Maria Gleixner,
	Juergen Gross, Arnaldo Carvalho de Melo, Jiri Olsa,
	Paolo Bonzini

From: Anna-Maria Gleixner <anna-maria@linutronix.de>

When PREEMPT_RT is enabled, the soft interrupt thread can be preempted.  If
the soft interrupt thread is preempted in the middle of a timer callback,
then calling del_timer_sync() can lead to two issues:

  - If the caller is on a remote CPU then it has to spin wait for the timer
    handler to complete. This can result in unbound priority inversion.

  - If the caller originates from the task which preempted the timer
    handler on the same CPU, then spin waiting for the timer handler to
    complete is never going to end.

To avoid these issues, add a new lock to the timer base which is held
around the execution of the timer callbacks. If del_timer_sync() detects
that the timer callback is currently running, it blocks on the expiry
lock. When the callback is finished, the expiry lock is dropped by the
softirq thread which wakes up the waiter and the system makes progress.

This addresses both the priority inversion and the life lock issues.

This mechanism is not used for timers which are marked IRQSAFE as for those
preemption is disabled accross the callback and therefore this situation
cannot happen. The callbacks for such timers need to be individually
audited for RT compliance.

The same issue can happen in virtual machines when the vCPU which runs a
timer callback is scheduled out. If a second vCPU of the same guest calls
del_timer_sync() it will spin wait for the other vCPU to be scheduled back
in. The expiry lock mechanism would avoid that. It'd be trivial to enable
this when paravirt spinlocks are enabled in a guest, but it's not clear
whether this is an actual problem in the wild, so for now it's an RT only
mechanism.

As the softirq thread can be preempted with PREEMPT_RT=y, the SMP variant
of del_timer_sync() needs to be used on UP as well.

[ tglx: Refactored it for mainline ]

Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Juergen Gross <jgross@suse.com>
---
 include/linux/timer.h |    2 
 kernel/time/timer.c   |  103 ++++++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 96 insertions(+), 9 deletions(-)

--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -183,7 +183,7 @@ extern void add_timer(struct timer_list
 
 extern int try_to_del_timer_sync(struct timer_list *timer);
 
-#ifdef CONFIG_SMP
+#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)
   extern int del_timer_sync(struct timer_list *timer);
 #else
 # define del_timer_sync(t)		del_timer(t)
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -196,6 +196,10 @@ EXPORT_SYMBOL(jiffies_64);
 struct timer_base {
 	raw_spinlock_t		lock;
 	struct timer_list	*running_timer;
+#ifdef CONFIG_PREEMPT_RT
+	spinlock_t		expiry_lock;
+	atomic_t		timer_waiters;
+#endif
 	unsigned long		clk;
 	unsigned long		next_expiry;
 	unsigned int		cpu;
@@ -1227,7 +1231,78 @@ int try_to_del_timer_sync(struct timer_l
 }
 EXPORT_SYMBOL(try_to_del_timer_sync);
 
-#ifdef CONFIG_SMP
+#ifdef CONFIG_PREEMPT_RT
+static __init void timer_base_init_expiry_lock(struct timer_base *base)
+{
+	spin_lock_init(&base->expiry_lock);
+}
+
+static inline void timer_base_lock_expiry(struct timer_base *base)
+{
+	spin_lock(&base->expiry_lock);
+}
+
+static inline void timer_base_unlock_expiry(struct timer_base *base)
+{
+	spin_unlock(&base->expiry_lock);
+}
+
+/*
+ * The counterpart to del_timer_wait_running().
+ *
+ * If there is a waiter for base->expiry_lock, then it was waiting for the
+ * timer callback to finish. Drop expiry_lock and reaquire it. That allows
+ * the waiter to acquire the lock and make progress.
+ */
+static void timer_sync_wait_running(struct timer_base *base)
+{
+	if (atomic_read(&base->timer_waiters)) {
+		spin_unlock(&base->expiry_lock);
+		spin_lock(&base->expiry_lock);
+	}
+}
+
+/*
+ * This function is called on PREEMPT_RT kernels when the fast path
+ * deletion of a timer failed because the timer callback function was
+ * running.
+ *
+ * This prevents priority inversion, if the softirq thread on a remote CPU
+ * got preempted, and it prevents a life lock when the task which tries to
+ * delete a timer preempted the softirq thread running the timer callback
+ * function.
+ */
+static void del_timer_wait_running(struct timer_list *timer)
+{
+	u32 tf;
+
+	tf = READ_ONCE(timer->flags);
+	if (!(tf & TIMER_MIGRATING)) {
+		struct timer_base *base = get_timer_base(tf);
+
+		/*
+		 * Mark the base as contended and grab the expiry lock,
+		 * which is held by the softirq across the timer
+		 * callback. Drop the lock immediately so the softirq can
+		 * expire the next timer. In theory the timer could already
+		 * be running again, but that's more than unlikely and just
+		 * causes another wait loop.
+		 */
+		atomic_inc(&base->timer_waiters);
+		spin_lock_bh(&base->expiry_lock);
+		atomic_dec(&base->timer_waiters);
+		spin_unlock_bh(&base->expiry_lock);
+	}
+}
+#else
+static inline void timer_base_init_expiry_lock(struct timer_base *base) { }
+static inline void timer_base_lock_expiry(struct timer_base *base) { }
+static inline void timer_base_unlock_expiry(struct timer_base *base) { }
+static inline void timer_sync_wait_running(struct timer_base *base) { }
+static inline void del_timer_wait_running(struct timer_list *timer) { }
+#endif
+
+#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)
 /**
  * del_timer_sync - deactivate a timer and wait for the handler to finish.
  * @timer: the timer to be deactivated
@@ -1266,6 +1341,8 @@ EXPORT_SYMBOL(try_to_del_timer_sync);
  */
 int del_timer_sync(struct timer_list *timer)
 {
+	int ret;
+
 #ifdef CONFIG_LOCKDEP
 	unsigned long flags;
 
@@ -1283,12 +1360,17 @@ int del_timer_sync(struct timer_list *ti
 	 * could lead to deadlock.
 	 */
 	WARN_ON(in_irq() && !(timer->flags & TIMER_IRQSAFE));
-	for (;;) {
-		int ret = try_to_del_timer_sync(timer);
-		if (ret >= 0)
-			return ret;
-		cpu_relax();
-	}
+
+	do {
+		ret = try_to_del_timer_sync(timer);
+
+		if (unlikely(ret < 0)) {
+			del_timer_wait_running(timer);
+			cpu_relax();
+		}
+	} while (ret < 0);
+
+	return ret;
 }
 EXPORT_SYMBOL(del_timer_sync);
 #endif
@@ -1360,10 +1442,13 @@ static void expire_timers(struct timer_b
 		if (timer->flags & TIMER_IRQSAFE) {
 			raw_spin_unlock(&base->lock);
 			call_timer_fn(timer, fn, baseclk);
+			base->running_timer = NULL;
 			raw_spin_lock(&base->lock);
 		} else {
 			raw_spin_unlock_irq(&base->lock);
 			call_timer_fn(timer, fn, baseclk);
+			base->running_timer = NULL;
+			timer_sync_wait_running(base);
 			raw_spin_lock_irq(&base->lock);
 		}
 	}
@@ -1658,6 +1743,7 @@ static inline void __run_timers(struct t
 	if (!time_after_eq(jiffies, base->clk))
 		return;
 
+	timer_base_lock_expiry(base);
 	raw_spin_lock_irq(&base->lock);
 
 	/*
@@ -1684,8 +1770,8 @@ static inline void __run_timers(struct t
 		while (levels--)
 			expire_timers(base, heads + levels);
 	}
-	base->running_timer = NULL;
 	raw_spin_unlock_irq(&base->lock);
+	timer_base_unlock_expiry(base);
 }
 
 /*
@@ -1930,6 +2016,7 @@ static void __init init_timer_cpu(int cp
 		base->cpu = cpu;
 		raw_spin_lock_init(&base->lock);
 		base->clk = jiffies;
+		timer_base_init_expiry_lock(base);
 	}
 }
 



^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: [patch 07/12] KVM: LAPIC: Mark hrtimer to expire in hard interrupt context
  2019-07-26 18:30 ` [patch 07/12] KVM: LAPIC: Mark hrtimer " Thomas Gleixner
@ 2019-07-26 19:41   ` Paolo Bonzini
  2019-07-30 22:14   ` [tip:timers/core] " tip-bot for Sebastian Andrzej Siewior
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 61+ messages in thread
From: Paolo Bonzini @ 2019-07-26 19:41 UTC (permalink / raw)
  To: Thomas Gleixner, LKML
  Cc: x86, Steven Rostedt, Sebastian Siewior, Anna-Maria Gleixner,
	Arnaldo Carvalho de Melo, Jiri Olsa, Juergen Gross

On 26/07/19 20:30, Thomas Gleixner wrote:
> From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> 
> On PREEMPT_RT enabled kernels unmarked hrtimers are moved into soft
> interrupt expiry mode by default.
> 
> While that's not a functional requirement for the KVM local APIC timer
> emulation, it's a latency issue which can be avoided by marking the timer
> so hard interrupt context expiry is enforced.
> 
> No functional change.
> 
> [ tglx: Split out from larger combo patch. Add changelog. ]
> 
> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> Cc: kvm@vger.kernel.org
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  arch/x86/kvm/lapic.c |    2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -2302,7 +2302,7 @@ int kvm_create_lapic(struct kvm_vcpu *vc
>  	apic->vcpu = vcpu;
>  
>  	hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
> -		     HRTIMER_MODE_ABS);
> +		     HRTIMER_MODE_ABS_HARD);
>  	apic->lapic_timer.timer.function = apic_timer_fn;
>  	if (timer_advance_ns == -1) {
>  		apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_ADJUST_INIT;
> 
> 

Acked-by: Paolo Bonzini <pbonzini@redhat.com>

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: [patch 01/12] hrtimer: Remove task argument from hrtimer_init_sleeper()
  2019-07-26 18:30 ` [patch 01/12] hrtimer: Remove task argument from hrtimer_init_sleeper() Thomas Gleixner
@ 2019-07-26 19:57   ` Steven Rostedt
  2019-07-26 20:01     ` Thomas Gleixner
  2019-07-30 22:07   ` [tip:timers/core] " tip-bot for Thomas Gleixner
  1 sibling, 1 reply; 61+ messages in thread
From: Steven Rostedt @ 2019-07-26 19:57 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: LKML, x86, Sebastian Siewior, Anna-Maria Gleixner,
	Arnaldo Carvalho de Melo, Jiri Olsa, Paolo Bonzini,
	Juergen Gross

On Fri, 26 Jul 2019 20:30:49 +0200
Thomas Gleixner <tglx@linutronix.de> wrote:

> --- a/kernel/time/hrtimer.c
> +++ b/kernel/time/hrtimer.c
> @@ -1639,10 +1639,10 @@ static enum hrtimer_restart hrtimer_wake
>  	return HRTIMER_NORESTART;
>  }
>  

Not related to the change of this patch, but I'm surprised that a
global function like this doesn't contain any kerneldoc information.

Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>

-- Steve

> -void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task)
> +void hrtimer_init_sleeper(struct hrtimer_sleeper *sl)
>  {
>  	sl->timer.function = hrtimer_wakeup;
> -	sl->task = task;
> +	sl->task = current;
>  }
>  EXPORT_SYMBOL_GPL(hrtimer_init_sleeper);
>  

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: [patch 01/12] hrtimer: Remove task argument from hrtimer_init_sleeper()
  2019-07-26 19:57   ` Steven Rostedt
@ 2019-07-26 20:01     ` Thomas Gleixner
  0 siblings, 0 replies; 61+ messages in thread
From: Thomas Gleixner @ 2019-07-26 20:01 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: LKML, x86, Sebastian Siewior, Anna-Maria Gleixner,
	Arnaldo Carvalho de Melo, Jiri Olsa, Paolo Bonzini,
	Juergen Gross

On Fri, 26 Jul 2019, Steven Rostedt wrote:

> On Fri, 26 Jul 2019 20:30:49 +0200
> Thomas Gleixner <tglx@linutronix.de> wrote:
> 
> > --- a/kernel/time/hrtimer.c
> > +++ b/kernel/time/hrtimer.c
> > @@ -1639,10 +1639,10 @@ static enum hrtimer_restart hrtimer_wake
> >  	return HRTIMER_NORESTART;
> >  }
> >  
> 
> Not related to the change of this patch, but I'm surprised that a
> global function like this doesn't contain any kerneldoc information.

Indeed, but it gets it in the next patch


^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: [patch 10/12] hrtimer: Determine hard/soft expiry mode for hrtimer sleepers on RT
  2019-07-26 18:30 ` [patch 10/12] hrtimer: Determine hard/soft expiry mode for hrtimer sleepers " Thomas Gleixner
@ 2019-07-26 20:44   ` Steven Rostedt
  2019-07-26 20:52     ` Thomas Gleixner
  2019-07-26 21:16   ` Julia Cartwright
                     ` (3 subsequent siblings)
  4 siblings, 1 reply; 61+ messages in thread
From: Steven Rostedt @ 2019-07-26 20:44 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: LKML, x86, Sebastian Siewior, Anna-Maria Gleixner,
	Arnaldo Carvalho de Melo, Jiri Olsa, Paolo Bonzini,
	Juergen Gross

On Fri, 26 Jul 2019 20:30:58 +0200
Thomas Gleixner <tglx@linutronix.de> wrote:

> +++ b/kernel/time/hrtimer.c
> @@ -1662,6 +1662,30 @@ static enum hrtimer_restart hrtimer_wake
>  static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
>  				   clockid_t clock_id, enum hrtimer_mode mode)
>  {
> +	/*
> +	 * On PREEMPT_RT enabled kernels hrtimers which are not explicitely
> +	 * marked for hard interrupt expiry mode are moved into soft
> +	 * interrupt context either for latency reasons or because the
> +	 * hrtimer callback takes regular spinlocks or invokes other
> +	 * functions which are not suitable for hard interrupt context on
> +	 * PREEMPT_RT.

Have we marked all timer handlers that have normal spin_locks as
HRTIMER_MODE_SOFT? Otherwise, can't we switch one to hard below and
having their handler grab a spin_lock/mutex in hard interrupt context
in RT?

-- Steve


> +	 *
> +	 * The hrtimer_sleeper callback is RT compatible in hard interrupt
> +	 * context, but there is a latency concern: Untrusted userspace can
> +	 * spawn many threads which arm timers for the same expiry time on
> +	 * the same CPU. That causes a latency spike due to the wakeup of
> +	 * a gazillion threads.
> +	 *
> +	 * OTOH, priviledged real-time user space applications rely on the
> +	 * low latency of hard interrupt wakeups. If the current task is in
> +	 * a real-time scheduling class, mark the mode for hard interrupt
> +	 * expiry.
> +	 */
> +	if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
> +		if (task_is_realtime(current) && !(mode & HRTIMER_MODE_SOFT))
> +			mode |= HRTIMER_MODE_HARD;
> +	}
> +
>  	__hrtimer_init(&sl->timer, clock_id, mode);
>  	sl->timer.function = hrtimer_wakeup;
>  	sl->task = current;


^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: [patch 10/12] hrtimer: Determine hard/soft expiry mode for hrtimer sleepers on RT
  2019-07-26 20:44   ` Steven Rostedt
@ 2019-07-26 20:52     ` Thomas Gleixner
  2019-07-26 20:56       ` Steven Rostedt
  0 siblings, 1 reply; 61+ messages in thread
From: Thomas Gleixner @ 2019-07-26 20:52 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: LKML, x86, Sebastian Siewior, Anna-Maria Gleixner,
	Arnaldo Carvalho de Melo, Jiri Olsa, Paolo Bonzini,
	Juergen Gross

On Fri, 26 Jul 2019, Steven Rostedt wrote:

> On Fri, 26 Jul 2019 20:30:58 +0200
> Thomas Gleixner <tglx@linutronix.de> wrote:
> 
> > +++ b/kernel/time/hrtimer.c
> > @@ -1662,6 +1662,30 @@ static enum hrtimer_restart hrtimer_wake
> >  static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
> >  				   clockid_t clock_id, enum hrtimer_mode mode)
> >  {
> > +	/*
> > +	 * On PREEMPT_RT enabled kernels hrtimers which are not explicitely
> > +	 * marked for hard interrupt expiry mode are moved into soft
> > +	 * interrupt context either for latency reasons or because the
> > +	 * hrtimer callback takes regular spinlocks or invokes other
> > +	 * functions which are not suitable for hard interrupt context on
> > +	 * PREEMPT_RT.
> 
> Have we marked all timer handlers that have normal spin_locks as
> HRTIMER_MODE_SOFT? Otherwise, can't we switch one to hard below and
> having their handler grab a spin_lock/mutex in hard interrupt context
> in RT?

See patch 09/12. We move all timers into soft mode which are not marked
MODE_HARD.

> > +	 *
> > +	 * The hrtimer_sleeper callback is RT compatible in hard interrupt

                                           ^^^^^^^^^^^^^^
> > +	 * context, but there is a latency concern: Untrusted userspace can
> > +	 * spawn many threads which arm timers for the same expiry time on
> > +	 * the same CPU. That causes a latency spike due to the wakeup of
> > +	 * a gazillion threads.
> > +	 *
> > +	 * OTOH, priviledged real-time user space applications rely on the
> > +	 * low latency of hard interrupt wakeups. If the current task is in
> > +	 * a real-time scheduling class, mark the mode for hard interrupt
> > +	 * expiry.
> > +	 */
> > +	if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
> > +		if (task_is_realtime(current) && !(mode & HRTIMER_MODE_SOFT))
> > +			mode |= HRTIMER_MODE_HARD;
> > +	}
> > +
> >  	__hrtimer_init(&sl->timer, clock_id, mode);
> >  	sl->timer.function = hrtimer_wakeup;

It's the wakeup function and nothing is supposed to override that.

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: [patch 10/12] hrtimer: Determine hard/soft expiry mode for hrtimer sleepers on RT
  2019-07-26 20:52     ` Thomas Gleixner
@ 2019-07-26 20:56       ` Steven Rostedt
  0 siblings, 0 replies; 61+ messages in thread
From: Steven Rostedt @ 2019-07-26 20:56 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: LKML, x86, Sebastian Siewior, Anna-Maria Gleixner,
	Arnaldo Carvalho de Melo, Jiri Olsa, Paolo Bonzini,
	Juergen Gross

On Fri, 26 Jul 2019 22:52:18 +0200 (CEST)
Thomas Gleixner <tglx@linutronix.de> wrote:
> > Have we marked all timer handlers that have normal spin_locks as
> > HRTIMER_MODE_SOFT? Otherwise, can't we switch one to hard below and
> > having their handler grab a spin_lock/mutex in hard interrupt context
> > in RT?  
> 
> See patch 09/12. We move all timers into soft mode which are not marked
> MODE_HARD.
> 


> > >  	sl->timer.function = hrtimer_wakeup;  
> 
> It's the wakeup function and nothing is supposed to override that.

Ah, that makes sense. Not the actual handler then.

Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>

-- Steve


^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: [patch 10/12] hrtimer: Determine hard/soft expiry mode for hrtimer sleepers on RT
  2019-07-26 18:30 ` [patch 10/12] hrtimer: Determine hard/soft expiry mode for hrtimer sleepers " Thomas Gleixner
  2019-07-26 20:44   ` Steven Rostedt
@ 2019-07-26 21:16   ` Julia Cartwright
  2019-07-26 21:30     ` Steven Rostedt
  2019-07-26 21:35     ` Thomas Gleixner
  2019-07-30 22:16   ` [tip:timers/core] " tip-bot for Sebastian Andrzej Siewior
                     ` (2 subsequent siblings)
  4 siblings, 2 replies; 61+ messages in thread
From: Julia Cartwright @ 2019-07-26 21:16 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: LKML, x86, Steven Rostedt, Sebastian Siewior,
	Anna-Maria Gleixner, Arnaldo Carvalho de Melo, Jiri Olsa,
	Paolo Bonzini, Juergen Gross

On Fri, Jul 26, 2019 at 08:30:58PM +0200, Thomas Gleixner wrote:
> From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> 
> On PREEMPT_RT enabled kernels hrtimers which are not explicitely marked for
> hard interrupt expiry mode are moved into soft interrupt context either for
> latency reasons or because the hrtimer callback takes regular spinlocks or
> invokes other functions which are not suitable for hard interrupt context
> on PREEMPT_RT.
> 
> The hrtimer_sleeper callback is RT compatible in hard interrupt context,
> but there is a latency concern: Untrusted userspace can spawn many threads
> which arm timers for the same expiry time on the same CPU. On expiry that
> causes a latency spike due to the wakeup of a gazillion threads.
> 
> OTOH, priviledged real-time user space applications rely on the low latency
> of hard interrupt wakeups. These syscall related wakeups are all based on
> hrtimer sleepers.
> 
> If the current task is in a real-time scheduling class, mark the mode for
> hard interrupt expiry.
> 
> [ tglx: Split out of a larger combo patch. Added changelog ]
> 
> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> ---
>  kernel/time/hrtimer.c |   24 ++++++++++++++++++++++++
>  1 file changed, 24 insertions(+)
> 
> --- a/kernel/time/hrtimer.c
> +++ b/kernel/time/hrtimer.c
> @@ -1662,6 +1662,30 @@ static enum hrtimer_restart hrtimer_wake
>  static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
>  				   clockid_t clock_id, enum hrtimer_mode mode)
>  {
> +	/*
> +	 * On PREEMPT_RT enabled kernels hrtimers which are not explicitely
> +	 * marked for hard interrupt expiry mode are moved into soft
> +	 * interrupt context either for latency reasons or because the
> +	 * hrtimer callback takes regular spinlocks or invokes other
> +	 * functions which are not suitable for hard interrupt context on
> +	 * PREEMPT_RT.
> +	 *
> +	 * The hrtimer_sleeper callback is RT compatible in hard interrupt
> +	 * context, but there is a latency concern: Untrusted userspace can
> +	 * spawn many threads which arm timers for the same expiry time on
> +	 * the same CPU. That causes a latency spike due to the wakeup of
> +	 * a gazillion threads.
> +	 *
> +	 * OTOH, priviledged real-time user space applications rely on the
> +	 * low latency of hard interrupt wakeups. If the current task is in
> +	 * a real-time scheduling class, mark the mode for hard interrupt
> +	 * expiry.
> +	 */
> +	if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
> +		if (task_is_realtime(current) && !(mode & HRTIMER_MODE_SOFT))
> +			mode |= HRTIMER_MODE_HARD;

Because this ends up sampling the tasks' scheduling parameters only at
the time of enqueue, it doesn't take into consideration whether or not
the task maybe holding a PI lock and later be boosted if contended by an
RT thread.

Am I correct in assuming there is an induced inversion here in this
case, because the deferred wakeup mechanism isn't part of the PI chain?

If so, is this just to be an accepted limitation at this point?  Is the
intent to argue this away as bad RT application design? :)

   Julia

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: [patch 10/12] hrtimer: Determine hard/soft expiry mode for hrtimer sleepers on RT
  2019-07-26 21:16   ` Julia Cartwright
@ 2019-07-26 21:30     ` Steven Rostedt
  2019-07-26 21:35     ` Thomas Gleixner
  1 sibling, 0 replies; 61+ messages in thread
From: Steven Rostedt @ 2019-07-26 21:30 UTC (permalink / raw)
  To: Julia Cartwright
  Cc: Thomas Gleixner, LKML, x86, Sebastian Siewior,
	Anna-Maria Gleixner, Arnaldo Carvalho de Melo, Jiri Olsa,
	Paolo Bonzini, Juergen Gross

On Fri, 26 Jul 2019 21:16:24 +0000
Julia Cartwright <julia@ni.com> wrote:

> > +	if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
> > +		if (task_is_realtime(current) && !(mode & HRTIMER_MODE_SOFT))
> > +			mode |= HRTIMER_MODE_HARD;  
> 
> Because this ends up sampling the tasks' scheduling parameters only at
> the time of enqueue, it doesn't take into consideration whether or not
> the task maybe holding a PI lock and later be boosted if contended by an
> RT thread.
> 
> Am I correct in assuming there is an induced inversion here in this
> case, because the deferred wakeup mechanism isn't part of the PI chain?
> 
> If so, is this just to be an accepted limitation at this point?  Is the
> intent to argue this away as bad RT application design? :)
> 

Well, it shouldn't be holding any kernel PI locks (aka spin_lock) when
it sleeps, but may be holding a PI futex. In which case, I would say is
a bad RT application, to have a thread sleep on a non RT timer while
holding a lock that an RT Task might take.

-- Steve

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: [patch 10/12] hrtimer: Determine hard/soft expiry mode for hrtimer sleepers on RT
  2019-07-26 21:16   ` Julia Cartwright
  2019-07-26 21:30     ` Steven Rostedt
@ 2019-07-26 21:35     ` Thomas Gleixner
  1 sibling, 0 replies; 61+ messages in thread
From: Thomas Gleixner @ 2019-07-26 21:35 UTC (permalink / raw)
  To: Julia Cartwright
  Cc: LKML, x86, Steven Rostedt, Sebastian Siewior,
	Anna-Maria Gleixner, Arnaldo Carvalho de Melo, Jiri Olsa,
	Paolo Bonzini, Juergen Gross

Julia,

On Fri, 26 Jul 2019, Julia Cartwright wrote:
> On Fri, Jul 26, 2019 at 08:30:58PM +0200, Thomas Gleixner wrote:
> > +	 * OTOH, priviledged real-time user space applications rely on the
> > +	 * low latency of hard interrupt wakeups. If the current task is in
> > +	 * a real-time scheduling class, mark the mode for hard interrupt
> > +	 * expiry.
> > +	 */
> > +	if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
> > +		if (task_is_realtime(current) && !(mode & HRTIMER_MODE_SOFT))
> > +			mode |= HRTIMER_MODE_HARD;
> 
> Because this ends up sampling the tasks' scheduling parameters only at
> the time of enqueue, it doesn't take into consideration whether or not
> the task maybe holding a PI lock and later be boosted if contended by an
> RT thread.
>
> Am I correct in assuming there is an induced inversion here in this
> case, because the deferred wakeup mechanism isn't part of the PI chain?
>
> If so, is this just to be an accepted limitation at this point?  Is the
> intent to argue this away as bad RT application design? :)

This would bring us back to the point where we moved the hrtimers
which were not marked for hardirq expiry onto the separate softirq expiry
list. That caused horrible latencies in some scenarios.

The separation of the bases into hard and soft expiry mode solved that
nicely and I haven't heard a complaint since we changed that in 4.14-rt.

So yes I'd argue it's an application issue. Holding a lock while doing
e.g. a nanosleep is not the most brilliant idea.

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: [patch 11/12] hrtimer: Prepare support for PREEMPT_RT
  2019-07-26 18:30 ` [patch 11/12] hrtimer: Prepare support for PREEMPT_RT Thomas Gleixner
@ 2019-07-28  9:06   ` Juergen Gross
  2019-07-29 15:08     ` Steven Rostedt
  2019-07-30 22:17   ` [tip:timers/core] " tip-bot for Anna-Maria Gleixner
                     ` (2 subsequent siblings)
  3 siblings, 1 reply; 61+ messages in thread
From: Juergen Gross @ 2019-07-28  9:06 UTC (permalink / raw)
  To: Thomas Gleixner, LKML
  Cc: x86, Steven Rostedt, Sebastian Siewior, Anna-Maria Gleixner,
	Arnaldo Carvalho de Melo, Jiri Olsa, Paolo Bonzini

On 26.07.19 20:30, Thomas Gleixner wrote:
> From: Anna-Maria Gleixner <anna-maria@linutronix.de>
> 
> When PREEMPT_RT is enabled, the soft interrupt thread can be preempted.  If
> the soft interrupt thread is preempted in the middle of a timer callback,
> then calling hrtimer_cancel() can lead to two issues:
> 
>    - If the caller is on a remote CPU then it has to spin wait for the timer
>      handler to complete. This can result in unbound priority inversion.
> 
>    - If the caller originates from the task which preempted the timer
>      handler on the same CPU, then spin waiting for the timer handler to
>      complete is never going to end.
> 
> To avoid these issues, add a new lock to the timer base which is held
> around the execution of the timer callbacks. If hrtimer_cancel() detects
> that the timer callback is currently running, it blocks on the expiry
> lock. When the callback is finished, the expiry lock is dropped by the
> softirq thread which wakes up the waiter and the system makes progress.
> 
> This addresses both the priority inversion and the life lock issues.
> 
> The same issue can happen in virtual machines when the vCPU which runs a
> timer callback is scheduled out. If a second vCPU of the same guest calls
> hrtimer_cancel() it will spin wait for the other vCPU to be scheduled back
> in. The expiry lock mechanism would avoid that. It'd be trivial to enable
> this when paravirt spinlocks are enabled in a guest, but it's not clear
> whether this is an actual problem in the wild, so for now it's an RT only
> mechanism.

As in virtual machines the soft interrupt thread preemption should not
be an issue, I guess the spinning is "just" sub-optimal (similar to not
using paravirt spinlocks).

In case we'd want to change that I'd rather not special case timers, but
apply a more general solution to the quite large amount of similar
cases: I assume the majority of cpu_relax() uses are affected, so adding
a paravirt op cpu_relax() might be appropriate.

That could be put under CONFIG_PARAVIRT_SPINLOCK. If called in a guest
it could ask the hypervisor to give up the physical cpu voluntarily
(in Xen this would be a "yield" hypercall).


Juergen

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: [patch 11/12] hrtimer: Prepare support for PREEMPT_RT
  2019-07-28  9:06   ` Juergen Gross
@ 2019-07-29 15:08     ` Steven Rostedt
  2019-07-29 17:30       ` Paolo Bonzini
  0 siblings, 1 reply; 61+ messages in thread
From: Steven Rostedt @ 2019-07-29 15:08 UTC (permalink / raw)
  To: Juergen Gross
  Cc: Thomas Gleixner, LKML, x86, Sebastian Siewior,
	Anna-Maria Gleixner, Arnaldo Carvalho de Melo, Jiri Olsa,
	Paolo Bonzini

On Sun, 28 Jul 2019 11:06:50 +0200
Juergen Gross <jgross@suse.com> wrote:

> In case we'd want to change that I'd rather not special case timers, but
> apply a more general solution to the quite large amount of similar
> cases: I assume the majority of cpu_relax() uses are affected, so adding
> a paravirt op cpu_relax() might be appropriate.
> 
> That could be put under CONFIG_PARAVIRT_SPINLOCK. If called in a guest
> it could ask the hypervisor to give up the physical cpu voluntarily
> (in Xen this would be a "yield" hypercall).

Seems paravirt wants our cpu_chill() ;-)

-- Steve

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: [patch 11/12] hrtimer: Prepare support for PREEMPT_RT
  2019-07-29 15:08     ` Steven Rostedt
@ 2019-07-29 17:30       ` Paolo Bonzini
  2019-07-31  8:45         ` Juergen Gross
  0 siblings, 1 reply; 61+ messages in thread
From: Paolo Bonzini @ 2019-07-29 17:30 UTC (permalink / raw)
  To: Steven Rostedt, Juergen Gross
  Cc: Thomas Gleixner, LKML, x86, Sebastian Siewior,
	Anna-Maria Gleixner, Arnaldo Carvalho de Melo, Jiri Olsa

On 29/07/19 17:08, Steven Rostedt wrote:
> On Sun, 28 Jul 2019 11:06:50 +0200
> Juergen Gross <jgross@suse.com> wrote:
> 
>> In case we'd want to change that I'd rather not special case timers, but
>> apply a more general solution to the quite large amount of similar
>> cases: I assume the majority of cpu_relax() uses are affected, so adding
>> a paravirt op cpu_relax() might be appropriate.
>>
>> That could be put under CONFIG_PARAVIRT_SPINLOCK. If called in a guest
>> it could ask the hypervisor to give up the physical cpu voluntarily
>> (in Xen this would be a "yield" hypercall).
> 
> Seems paravirt wants our cpu_chill() ;-)

Actually that is not really a joke! :)

Paolo


^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: [patch 00/12] (hr)timers: Prepare for PREEMPT_RT support
  2019-07-26 18:30 [patch 00/12] (hr)timers: Prepare for PREEMPT_RT support Thomas Gleixner
                   ` (11 preceding siblings ...)
  2019-07-26 18:31 ` [patch 12/12] timers: Prepare support for PREEMPT_RT Thomas Gleixner
@ 2019-07-29 19:45 ` Peter Zijlstra
  12 siblings, 0 replies; 61+ messages in thread
From: Peter Zijlstra @ 2019-07-29 19:45 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: LKML, x86, Steven Rostedt, Sebastian Siewior,
	Anna-Maria Gleixner, Arnaldo Carvalho de Melo, Jiri Olsa,
	Paolo Bonzini, Juergen Gross

On Fri, Jul 26, 2019 at 08:30:48PM +0200, Thomas Gleixner wrote:
> The following series brings the bulk of PREEMPT_RT specific changes for the
> (hr)timer code:
> 
>   - Handle timer deletion correctly under RT to avoid priority inversion
>     and life locks
> 
>     This mechanism might be useful for VMs as well when a vCPU
>     executing a timer callback gets scheduled out and on another vCPU
>     del_timer_sync() or hrtimer_cancel() is invoked.
> 
>     The mitigation would only work when paravirt spinlocks are
>     enabled. I've not implemented that, as I don't know whether this is a
>     real world issue. I just noticed that it is basically the same
>     problem. Adding it would be trivial.
> 
>   - Prepare for moving most hrtimer callbacks into softirq context and mark
>     timers which need to expire in hard interrupt context even on RT so
>     they don't get moved.
> 

Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>

^ permalink raw reply	[flat|nested] 61+ messages in thread

* [tip:timers/core] hrtimer: Remove task argument from hrtimer_init_sleeper()
  2019-07-26 18:30 ` [patch 01/12] hrtimer: Remove task argument from hrtimer_init_sleeper() Thomas Gleixner
  2019-07-26 19:57   ` Steven Rostedt
@ 2019-07-30 22:07   ` tip-bot for Thomas Gleixner
  1 sibling, 0 replies; 61+ messages in thread
From: tip-bot for Thomas Gleixner @ 2019-07-30 22:07 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: mingo, peterz, rostedt, hpa, tglx, linux-kernel

Commit-ID:  b74494872555d1f7888dfd9225700a363f4a84fc
Gitweb:     https://git.kernel.org/tip/b74494872555d1f7888dfd9225700a363f4a84fc
Author:     Thomas Gleixner <tglx@linutronix.de>
AuthorDate: Fri, 26 Jul 2019 20:30:49 +0200
Committer:  Thomas Gleixner <tglx@linutronix.de>
CommitDate: Tue, 30 Jul 2019 23:57:51 +0200

hrtimer: Remove task argument from hrtimer_init_sleeper()

All callers hand in 'current' and that's the only task pointer which
actually makes sense. Remove the task argument and set current in the
function.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20190726185752.791885290@linutronix.de

---
 block/blk-mq.c                 | 2 +-
 drivers/staging/android/vsoc.c | 2 +-
 include/linux/hrtimer.h        | 3 +--
 include/linux/wait.h           | 2 +-
 kernel/futex.c                 | 2 +-
 kernel/time/hrtimer.c          | 8 ++++----
 net/core/pktgen.c              | 2 +-
 7 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index b038ec680e84..5f647cb8c695 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -3418,7 +3418,7 @@ static bool blk_mq_poll_hybrid_sleep(struct request_queue *q,
 	hrtimer_init_on_stack(&hs.timer, CLOCK_MONOTONIC, mode);
 	hrtimer_set_expires(&hs.timer, kt);
 
-	hrtimer_init_sleeper(&hs, current);
+	hrtimer_init_sleeper(&hs);
 	do {
 		if (blk_mq_rq_state(rq) == MQ_RQ_COMPLETE)
 			break;
diff --git a/drivers/staging/android/vsoc.c b/drivers/staging/android/vsoc.c
index 00a1ec7b9154..ce480bcf20d2 100644
--- a/drivers/staging/android/vsoc.c
+++ b/drivers/staging/android/vsoc.c
@@ -442,7 +442,7 @@ static int handle_vsoc_cond_wait(struct file *filp, struct vsoc_cond_wait *arg)
 		hrtimer_set_expires_range_ns(&to->timer, wake_time,
 					     current->timer_slack_ns);
 
-		hrtimer_init_sleeper(to, current);
+		hrtimer_init_sleeper(to);
 	}
 
 	while (1) {
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 4971100a8cab..3c74f89367c4 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -463,8 +463,7 @@ extern long hrtimer_nanosleep(const struct timespec64 *rqtp,
 			      const enum hrtimer_mode mode,
 			      const clockid_t clockid);
 
-extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
-				 struct task_struct *tsk);
+extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl);
 
 extern int schedule_hrtimeout_range(ktime_t *expires, u64 delta,
 						const enum hrtimer_mode mode);
diff --git a/include/linux/wait.h b/include/linux/wait.h
index b6f77cf60dd7..d57832774ca6 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -489,7 +489,7 @@ do {										\
 	struct hrtimer_sleeper __t;						\
 										\
 	hrtimer_init_on_stack(&__t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);	\
-	hrtimer_init_sleeper(&__t, current);					\
+	hrtimer_init_sleeper(&__t);						\
 	if ((timeout) != KTIME_MAX)						\
 		hrtimer_start_range_ns(&__t.timer, timeout,			\
 				       current->timer_slack_ns,			\
diff --git a/kernel/futex.c b/kernel/futex.c
index 6d50728ef2e7..5e9842ea4012 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -490,7 +490,7 @@ futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,
 	hrtimer_init_on_stack(&timeout->timer, (flags & FLAGS_CLOCKRT) ?
 			      CLOCK_REALTIME : CLOCK_MONOTONIC,
 			      HRTIMER_MODE_ABS);
-	hrtimer_init_sleeper(timeout, current);
+	hrtimer_init_sleeper(timeout);
 
 	/*
 	 * If range_ns is 0, calling hrtimer_set_expires_range_ns() is
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 5ee77f1a8a92..de895d86800c 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1639,10 +1639,10 @@ static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer)
 	return HRTIMER_NORESTART;
 }
 
-void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task)
+void hrtimer_init_sleeper(struct hrtimer_sleeper *sl)
 {
 	sl->timer.function = hrtimer_wakeup;
-	sl->task = task;
+	sl->task = current;
 }
 EXPORT_SYMBOL_GPL(hrtimer_init_sleeper);
 
@@ -1669,7 +1669,7 @@ static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mod
 {
 	struct restart_block *restart;
 
-	hrtimer_init_sleeper(t, current);
+	hrtimer_init_sleeper(t);
 
 	do {
 		set_current_state(TASK_INTERRUPTIBLE);
@@ -1930,7 +1930,7 @@ schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta,
 	hrtimer_init_on_stack(&t.timer, clock_id, mode);
 	hrtimer_set_expires_range_ns(&t.timer, *expires, delta);
 
-	hrtimer_init_sleeper(&t, current);
+	hrtimer_init_sleeper(&t);
 
 	hrtimer_start_expires(&t.timer, mode);
 
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index bb9915291644..7f3cf2381f27 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2171,7 +2171,7 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until)
 		} while (ktime_compare(end_time, spin_until) < 0);
 	} else {
 		/* see do_nanosleep */
-		hrtimer_init_sleeper(&t, current);
+		hrtimer_init_sleeper(&t);
 		do {
 			set_current_state(TASK_INTERRUPTIBLE);
 			hrtimer_start_expires(&t.timer, HRTIMER_MODE_ABS);

^ permalink raw reply	[flat|nested] 61+ messages in thread

* [tip:timers/core] hrtimer: Consolidate hrtimer_init() + hrtimer_init_sleeper() calls
  2019-07-26 18:30 ` [patch 02/12] hrtimer: Consolidate hrtimer_init() + hrtimer_init_sleeper() calls Thomas Gleixner
@ 2019-07-30 22:08   ` tip-bot for Sebastian Andrzej Siewior
  2019-08-01 15:49   ` tip-bot for Sebastian Andrzej Siewior
  1 sibling, 0 replies; 61+ messages in thread
From: tip-bot for Sebastian Andrzej Siewior @ 2019-07-30 22:08 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: linux-kernel, hpa, mingo, tglx, bigeasy, anna-maria, peterz

Commit-ID:  82e18bace3dd3fe4d594b9a2915035f09a3deb55
Gitweb:     https://git.kernel.org/tip/82e18bace3dd3fe4d594b9a2915035f09a3deb55
Author:     Sebastian Andrzej Siewior <bigeasy@linutronix.de>
AuthorDate: Fri, 26 Jul 2019 20:30:50 +0200
Committer:  Thomas Gleixner <tglx@linutronix.de>
CommitDate: Tue, 30 Jul 2019 23:57:51 +0200

hrtimer: Consolidate hrtimer_init() + hrtimer_init_sleeper() calls

hrtimer_init_sleeper() calls require prior initialisation of the hrtimer
object which is embedded into the hrtimer_sleeper.

Combine the initialization and spare a function call. Fixup all call sites.

This is also a preparatory change for PREEMPT_RT to do hrtimer sleeper
specific initializations of the embedded hrtimer without modifying any of
the call sites.

No functional change.

[ anna-maria: Minor cleanups ]
[ tglx: Adopted to the removal of the task argument of
  	hrtimer_init_sleeper() and trivial polishing ]

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20190726185752.887468908@linutronix.de
---
 block/blk-mq.c                 |  3 +--
 drivers/staging/android/vsoc.c |  6 ++----
 include/linux/hrtimer.h        | 17 ++++++++++++++---
 include/linux/wait.h           |  4 ++--
 kernel/futex.c                 |  8 +++-----
 kernel/time/hrtimer.c          | 43 +++++++++++++++++++++++++++++++-----------
 net/core/pktgen.c              |  4 +---
 7 files changed, 55 insertions(+), 30 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 5f647cb8c695..df3fafbfe9a9 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -3415,10 +3415,9 @@ static bool blk_mq_poll_hybrid_sleep(struct request_queue *q,
 	kt = nsecs;
 
 	mode = HRTIMER_MODE_REL;
-	hrtimer_init_on_stack(&hs.timer, CLOCK_MONOTONIC, mode);
+	hrtimer_init_sleeper_on_stack(&hs, CLOCK_MONOTONIC, mode);
 	hrtimer_set_expires(&hs.timer, kt);
 
-	hrtimer_init_sleeper(&hs);
 	do {
 		if (blk_mq_rq_state(rq) == MQ_RQ_COMPLETE)
 			break;
diff --git a/drivers/staging/android/vsoc.c b/drivers/staging/android/vsoc.c
index ce480bcf20d2..628c06096dfd 100644
--- a/drivers/staging/android/vsoc.c
+++ b/drivers/staging/android/vsoc.c
@@ -437,12 +437,10 @@ static int handle_vsoc_cond_wait(struct file *filp, struct vsoc_cond_wait *arg)
 			return -EINVAL;
 		wake_time = ktime_set(arg->wake_time_sec, arg->wake_time_nsec);
 
-		hrtimer_init_on_stack(&to->timer, CLOCK_MONOTONIC,
-				      HRTIMER_MODE_ABS);
+		hrtimer_init_sleeper_on_stack(&to, CLOCK_MONOTONIC,
+					      HRTIMER_MODE_ABS);
 		hrtimer_set_expires_range_ns(&to->timer, wake_time,
 					     current->timer_slack_ns);
-
-		hrtimer_init_sleeper(to);
 	}
 
 	while (1) {
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 3c74f89367c4..0df373bed3d7 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -347,10 +347,15 @@ DECLARE_PER_CPU(struct tick_device, tick_cpu_device);
 /* Initialize timers: */
 extern void hrtimer_init(struct hrtimer *timer, clockid_t which_clock,
 			 enum hrtimer_mode mode);
+extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, clockid_t clock_id,
+				 enum hrtimer_mode mode);
 
 #ifdef CONFIG_DEBUG_OBJECTS_TIMERS
 extern void hrtimer_init_on_stack(struct hrtimer *timer, clockid_t which_clock,
 				  enum hrtimer_mode mode);
+extern void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl,
+					  clockid_t clock_id,
+					  enum hrtimer_mode mode);
 
 extern void destroy_hrtimer_on_stack(struct hrtimer *timer);
 #else
@@ -360,6 +365,14 @@ static inline void hrtimer_init_on_stack(struct hrtimer *timer,
 {
 	hrtimer_init(timer, which_clock, mode);
 }
+
+static inline void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl,
+						 clockid_t clock_id,
+						 enum hrtimer_mode mode)
+{
+	hrtimer_init_sleeper(sl, clock_id, mode);
+}
+
 static inline void destroy_hrtimer_on_stack(struct hrtimer *timer) { }
 #endif
 
@@ -463,10 +476,8 @@ extern long hrtimer_nanosleep(const struct timespec64 *rqtp,
 			      const enum hrtimer_mode mode,
 			      const clockid_t clockid);
 
-extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl);
-
 extern int schedule_hrtimeout_range(ktime_t *expires, u64 delta,
-						const enum hrtimer_mode mode);
+				    const enum hrtimer_mode mode);
 extern int schedule_hrtimeout_range_clock(ktime_t *expires,
 					  u64 delta,
 					  const enum hrtimer_mode mode,
diff --git a/include/linux/wait.h b/include/linux/wait.h
index d57832774ca6..4707543ef575 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -488,8 +488,8 @@ do {										\
 	int __ret = 0;								\
 	struct hrtimer_sleeper __t;						\
 										\
-	hrtimer_init_on_stack(&__t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);	\
-	hrtimer_init_sleeper(&__t);						\
+	hrtimer_init_sleeper_on_stack(&__t, CLOCK_MONOTONIC,			\
+				      HRTIMER_MODE_REL);			\
 	if ((timeout) != KTIME_MAX)						\
 		hrtimer_start_range_ns(&__t.timer, timeout,			\
 				       current->timer_slack_ns,			\
diff --git a/kernel/futex.c b/kernel/futex.c
index 5e9842ea4012..c8561aa5338e 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -487,11 +487,9 @@ futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,
 	if (!time)
 		return NULL;
 
-	hrtimer_init_on_stack(&timeout->timer, (flags & FLAGS_CLOCKRT) ?
-			      CLOCK_REALTIME : CLOCK_MONOTONIC,
-			      HRTIMER_MODE_ABS);
-	hrtimer_init_sleeper(timeout);
-
+	hrtimer_init_sleeper_on_stack(timeout, (flags & FLAGS_CLOCKRT) ?
+				      CLOCK_REALTIME : CLOCK_MONOTONIC,
+				      HRTIMER_MODE_ABS);
 	/*
 	 * If range_ns is 0, calling hrtimer_set_expires_range_ns() is
 	 * effectively the same as calling hrtimer_set_expires().
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index de895d86800c..bb55d62f631e 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -427,6 +427,17 @@ void hrtimer_init_on_stack(struct hrtimer *timer, clockid_t clock_id,
 }
 EXPORT_SYMBOL_GPL(hrtimer_init_on_stack);
 
+static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
+				   clockid_t clock_id, enum hrtimer_mode mode);
+
+void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl,
+				   clockid_t clock_id, enum hrtimer_mode mode)
+{
+	debug_object_init_on_stack(&sl->timer, &hrtimer_debug_descr);
+	__hrtimer_init_sleeper(sl, clock_id, mode);
+}
+EXPORT_SYMBOL_GPL(hrtimer_init_sleeper_on_stack);
+
 void destroy_hrtimer_on_stack(struct hrtimer *timer)
 {
 	debug_object_free(timer, &hrtimer_debug_descr);
@@ -1639,11 +1650,27 @@ static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer)
 	return HRTIMER_NORESTART;
 }
 
-void hrtimer_init_sleeper(struct hrtimer_sleeper *sl)
+static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
+				   clockid_t clock_id, enum hrtimer_mode mode)
 {
+	__hrtimer_init(&sl->timer, clock_id, mode);
 	sl->timer.function = hrtimer_wakeup;
 	sl->task = current;
 }
+
+/**
+ * hrtimer_init_sleeper - initialize sleeper to the given clock
+ * @sl:		sleeper to be initialized
+ * @clock_id:	the clock to be used
+ * @mode:	timer mode abs/rel
+ */
+void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, clockid_t clock_id,
+			  enum hrtimer_mode mode)
+{
+	debug_init(&sl->timer, clock_id, mode);
+	__hrtimer_init_sleeper(sl, clock_id, mode);
+
+}
 EXPORT_SYMBOL_GPL(hrtimer_init_sleeper);
 
 int nanosleep_copyout(struct restart_block *restart, struct timespec64 *ts)
@@ -1669,8 +1696,6 @@ static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mod
 {
 	struct restart_block *restart;
 
-	hrtimer_init_sleeper(t);
-
 	do {
 		set_current_state(TASK_INTERRUPTIBLE);
 		hrtimer_start_expires(&t->timer, mode);
@@ -1707,10 +1732,9 @@ static long __sched hrtimer_nanosleep_restart(struct restart_block *restart)
 	struct hrtimer_sleeper t;
 	int ret;
 
-	hrtimer_init_on_stack(&t.timer, restart->nanosleep.clockid,
-				HRTIMER_MODE_ABS);
+	hrtimer_init_sleeper_on_stack(&t, restart->nanosleep.clockid,
+				      HRTIMER_MODE_ABS);
 	hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires);
-
 	ret = do_nanosleep(&t, HRTIMER_MODE_ABS);
 	destroy_hrtimer_on_stack(&t.timer);
 	return ret;
@@ -1728,7 +1752,7 @@ long hrtimer_nanosleep(const struct timespec64 *rqtp,
 	if (dl_task(current) || rt_task(current))
 		slack = 0;
 
-	hrtimer_init_on_stack(&t.timer, clockid, mode);
+	hrtimer_init_sleeper_on_stack(&t, clockid, mode);
 	hrtimer_set_expires_range_ns(&t.timer, timespec64_to_ktime(*rqtp), slack);
 	ret = do_nanosleep(&t, mode);
 	if (ret != -ERESTART_RESTARTBLOCK)
@@ -1927,11 +1951,8 @@ schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta,
 		return -EINTR;
 	}
 
-	hrtimer_init_on_stack(&t.timer, clock_id, mode);
+	hrtimer_init_sleeper_on_stack(&t, clock_id, mode);
 	hrtimer_set_expires_range_ns(&t.timer, *expires, delta);
-
-	hrtimer_init_sleeper(&t);
-
 	hrtimer_start_expires(&t.timer, mode);
 
 	if (likely(t.task))
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 7f3cf2381f27..a5905975bc12 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2156,7 +2156,7 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until)
 	s64 remaining;
 	struct hrtimer_sleeper t;
 
-	hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+	hrtimer_init_sleeper_on_stack(&t, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
 	hrtimer_set_expires(&t.timer, spin_until);
 
 	remaining = ktime_to_ns(hrtimer_expires_remaining(&t.timer));
@@ -2170,8 +2170,6 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until)
 			end_time = ktime_get();
 		} while (ktime_compare(end_time, spin_until) < 0);
 	} else {
-		/* see do_nanosleep */
-		hrtimer_init_sleeper(&t);
 		do {
 			set_current_state(TASK_INTERRUPTIBLE);
 			hrtimer_start_expires(&t.timer, HRTIMER_MODE_ABS);

^ permalink raw reply	[flat|nested] 61+ messages in thread

* [tip:timers/core] hrtimer: Introduce HARD expiry mode
  2019-07-26 18:30 ` [patch 03/12] hrtimer: Introduce HARD expiry mode Thomas Gleixner
@ 2019-07-30 22:10   ` tip-bot for Sebastian Andrzej Siewior
  2019-08-01 15:52   ` tip-bot for Sebastian Andrzej Siewior
  1 sibling, 0 replies; 61+ messages in thread
From: tip-bot for Sebastian Andrzej Siewior @ 2019-07-30 22:10 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: linux-kernel, hpa, peterz, bigeasy, tglx, mingo

Commit-ID:  b04b3857625d7d91fd11fcc39da138d7602bfadd
Gitweb:     https://git.kernel.org/tip/b04b3857625d7d91fd11fcc39da138d7602bfadd
Author:     Sebastian Andrzej Siewior <bigeasy@linutronix.de>
AuthorDate: Fri, 26 Jul 2019 20:30:51 +0200
Committer:  Thomas Gleixner <tglx@linutronix.de>
CommitDate: Tue, 30 Jul 2019 23:57:53 +0200

hrtimer: Introduce HARD expiry mode

On PREEMPT_RT not all hrtimers can be expired in hard interrupt context
even if that is perfectly fine on a PREEMPT_RT=n kernel, e.g. because they
take regular spinlocks. Also for latency reasons PREEMPT_RT tries to defer
most hrtimers' expiry into soft interrupt context.

But there are hrtimers which must be expired in hard interrupt context even
when PREEMPT_RT is enabled:

  - hrtimers which must expiry in hard interrupt context, e.g. scheduler,
    perf, watchdog related hrtimers

  - latency critical hrtimers, e.g. nanosleep, ..., kvm lapic timer

Add a new mode flag HRTIMER_MODE_HARD which allows to mark these timers so
PREEMPT_RT will not move them into softirq expiry mode.

[ tglx: Split out of a larger combo patch. Added changelog ]

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20190726185752.981398465@linutronix.de

---
 include/linux/hrtimer.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 24072a0942c0..15c2ba6b6316 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -38,6 +38,7 @@ enum hrtimer_mode {
 	HRTIMER_MODE_REL	= 0x01,
 	HRTIMER_MODE_PINNED	= 0x02,
 	HRTIMER_MODE_SOFT	= 0x04,
+	HRTIMER_MODE_HARD	= 0x08,
 
 	HRTIMER_MODE_ABS_PINNED = HRTIMER_MODE_ABS | HRTIMER_MODE_PINNED,
 	HRTIMER_MODE_REL_PINNED = HRTIMER_MODE_REL | HRTIMER_MODE_PINNED,
@@ -48,6 +49,11 @@ enum hrtimer_mode {
 	HRTIMER_MODE_ABS_PINNED_SOFT = HRTIMER_MODE_ABS_PINNED | HRTIMER_MODE_SOFT,
 	HRTIMER_MODE_REL_PINNED_SOFT = HRTIMER_MODE_REL_PINNED | HRTIMER_MODE_SOFT,
 
+	HRTIMER_MODE_ABS_HARD	= HRTIMER_MODE_ABS | HRTIMER_MODE_HARD,
+	HRTIMER_MODE_REL_HARD	= HRTIMER_MODE_REL | HRTIMER_MODE_HARD,
+
+	HRTIMER_MODE_ABS_PINNED_HARD = HRTIMER_MODE_ABS_PINNED | HRTIMER_MODE_HARD,
+	HRTIMER_MODE_REL_PINNED_HARD = HRTIMER_MODE_REL_PINNED | HRTIMER_MODE_HARD,
 };
 
 /*

^ permalink raw reply	[flat|nested] 61+ messages in thread

* [tip:timers/core] sched: Mark hrtimers to expire in hard interrupt context
  2019-07-26 18:30 ` [patch 04/12] sched: Mark hrtimers to expire in hard interrupt context Thomas Gleixner
@ 2019-07-30 22:11   ` tip-bot for Thomas Gleixner
  2019-08-01 15:53   ` tip-bot for Sebastian Andrzej Siewior
  2019-08-01 18:58   ` tip-bot for Sebastian Andrzej Siewior
  2 siblings, 0 replies; 61+ messages in thread
From: tip-bot for Thomas Gleixner @ 2019-07-30 22:11 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: tglx, peterz, mingo, bigeasy, linux-kernel, hpa

Commit-ID:  b78b1e6b5b77b265a94e3027e6f0dcaad33faf9f
Gitweb:     https://git.kernel.org/tip/b78b1e6b5b77b265a94e3027e6f0dcaad33faf9f
Author:     Thomas Gleixner <tglx@linutronix.de>
AuthorDate: Fri, 26 Jul 2019 20:30:52 +0200
Committer:  Thomas Gleixner <tglx@linutronix.de>
CommitDate: Tue, 30 Jul 2019 23:57:54 +0200

sched: Mark hrtimers to expire in hard interrupt context

The scheduler related hrtimers need to expire in hard interrupt context
even on PREEMPT_RT enabled kernels. Mark then as such.

No functional change.

[ tglx: Split out from larger combo patch. Add changelog. ]

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20190726185753.077004842@linutronix.de

---
 kernel/sched/core.c     | 6 +++---
 kernel/sched/deadline.c | 4 ++--
 kernel/sched/rt.c       | 7 ++++---
 3 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2b037f195473..389e0993fbb4 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -255,7 +255,7 @@ static void __hrtick_restart(struct rq *rq)
 {
 	struct hrtimer *timer = &rq->hrtick_timer;
 
-	hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED);
+	hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED_HARD);
 }
 
 /*
@@ -314,7 +314,7 @@ void hrtick_start(struct rq *rq, u64 delay)
 	 */
 	delay = max_t(u64, delay, 10000LL);
 	hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay),
-		      HRTIMER_MODE_REL_PINNED);
+		      HRTIMER_MODE_REL_PINNED_HARD);
 }
 #endif /* CONFIG_SMP */
 
@@ -328,7 +328,7 @@ static void hrtick_rq_init(struct rq *rq)
 	rq->hrtick_csd.info = rq;
 #endif
 
-	hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
 	rq->hrtick_timer.function = hrtick;
 }
 #else	/* CONFIG_SCHED_HRTICK */
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index ef5b9f6b1d42..0359612d5443 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -923,7 +923,7 @@ static int start_dl_timer(struct task_struct *p)
 	 */
 	if (!hrtimer_is_queued(timer)) {
 		get_task_struct(p);
-		hrtimer_start(timer, act, HRTIMER_MODE_ABS);
+		hrtimer_start(timer, act, HRTIMER_MODE_ABS_HARD);
 	}
 
 	return 1;
@@ -1053,7 +1053,7 @@ void init_dl_task_timer(struct sched_dl_entity *dl_se)
 {
 	struct hrtimer *timer = &dl_se->dl_timer;
 
-	hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
 	timer->function = dl_task_timer;
 }
 
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index a532558a5176..da3e85e61013 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -45,8 +45,8 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime)
 
 	raw_spin_lock_init(&rt_b->rt_runtime_lock);
 
-	hrtimer_init(&rt_b->rt_period_timer,
-			CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	hrtimer_init(&rt_b->rt_period_timer, CLOCK_MONOTONIC,
+		     HRTIMER_MODE_REL_HARD);
 	rt_b->rt_period_timer.function = sched_rt_period_timer;
 }
 
@@ -67,7 +67,8 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
 		 * to update the period.
 		 */
 		hrtimer_forward_now(&rt_b->rt_period_timer, ns_to_ktime(0));
-		hrtimer_start_expires(&rt_b->rt_period_timer, HRTIMER_MODE_ABS_PINNED);
+		hrtimer_start_expires(&rt_b->rt_period_timer,
+				      HRTIMER_MODE_ABS_PINNED_HARD);
 	}
 	raw_spin_unlock(&rt_b->rt_runtime_lock);
 }

^ permalink raw reply	[flat|nested] 61+ messages in thread

* [tip:timers/core] perf/core: Mark hrtimers to expire in hard interrupt context
  2019-07-26 18:30 ` [patch 05/12] perf/core: " Thomas Gleixner
@ 2019-07-30 22:12   ` tip-bot for Thomas Gleixner
  2019-08-01 15:54   ` tip-bot for Sebastian Andrzej Siewior
  2019-08-01 18:59   ` tip-bot for Sebastian Andrzej Siewior
  2 siblings, 0 replies; 61+ messages in thread
From: tip-bot for Thomas Gleixner @ 2019-07-30 22:12 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: hpa, mingo, linux-kernel, tglx, bigeasy, peterz

Commit-ID:  c23a8bd3ac02df2ca5e77396df1dee247db3d49f
Gitweb:     https://git.kernel.org/tip/c23a8bd3ac02df2ca5e77396df1dee247db3d49f
Author:     Thomas Gleixner <tglx@linutronix.de>
AuthorDate: Fri, 26 Jul 2019 20:30:53 +0200
Committer:  Thomas Gleixner <tglx@linutronix.de>
CommitDate: Tue, 30 Jul 2019 23:57:54 +0200

perf/core: Mark hrtimers to expire in hard interrupt context

To guarantee that the multiplexing mechanism and the hrtimer driven events
work on PREEMPT_RT enabled kernels it's required that the related hrtimers
expire in hard interrupt context. Mark them so PREEMPT_RT kernels wont
defer them to soft interrupt context.

No functional change.

[ tglx: Split out of larger combo patch. Added changelog ]

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20190726185753.169509224@linutronix.de

---
 kernel/events/core.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 026a14541a38..9d623e257a51 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1103,7 +1103,7 @@ static void __perf_mux_hrtimer_init(struct perf_cpu_context *cpuctx, int cpu)
 	cpuctx->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * interval);
 
 	raw_spin_lock_init(&cpuctx->hrtimer_lock);
-	hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
+	hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD);
 	timer->function = perf_mux_hrtimer_handler;
 }
 
@@ -1121,7 +1121,7 @@ static int perf_mux_hrtimer_restart(struct perf_cpu_context *cpuctx)
 	if (!cpuctx->hrtimer_active) {
 		cpuctx->hrtimer_active = 1;
 		hrtimer_forward_now(timer, cpuctx->hrtimer_interval);
-		hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED);
+		hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED_HARD);
 	}
 	raw_spin_unlock_irqrestore(&cpuctx->hrtimer_lock, flags);
 
@@ -9491,7 +9491,7 @@ static void perf_swevent_start_hrtimer(struct perf_event *event)
 		period = max_t(u64, 10000, hwc->sample_period);
 	}
 	hrtimer_start(&hwc->hrtimer, ns_to_ktime(period),
-		      HRTIMER_MODE_REL_PINNED);
+		      HRTIMER_MODE_REL_PINNED_HARD);
 }
 
 static void perf_swevent_cancel_hrtimer(struct perf_event *event)
@@ -9513,7 +9513,7 @@ static void perf_swevent_init_hrtimer(struct perf_event *event)
 	if (!is_sampling_event(event))
 		return;
 
-	hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
 	hwc->hrtimer.function = perf_swevent_hrtimer;
 
 	/*

^ permalink raw reply	[flat|nested] 61+ messages in thread

* [tip:timers/core] watchdog: Mark watchdog_hrtimer to expire in hard interrupt context
  2019-07-26 18:30 ` [patch 06/12] watchdog: Mark watchdog_hrtimer " Thomas Gleixner
@ 2019-07-30 22:13   ` tip-bot for Sebastian Andrzej Siewior
  2019-08-01 15:55   ` tip-bot for Sebastian Andrzej Siewior
  2019-08-01 19:00   ` tip-bot for Sebastian Andrzej Siewior
  2 siblings, 0 replies; 61+ messages in thread
From: tip-bot for Sebastian Andrzej Siewior @ 2019-07-30 22:13 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: tglx, linux-kernel, bigeasy, mingo, hpa, peterz

Commit-ID:  2c6db53c4b4a52012e644f1f50bcc958c87f046a
Gitweb:     https://git.kernel.org/tip/2c6db53c4b4a52012e644f1f50bcc958c87f046a
Author:     Sebastian Andrzej Siewior <bigeasy@linutronix.de>
AuthorDate: Fri, 26 Jul 2019 20:30:54 +0200
Committer:  Thomas Gleixner <tglx@linutronix.de>
CommitDate: Tue, 30 Jul 2019 23:57:55 +0200

watchdog: Mark watchdog_hrtimer to expire in hard interrupt context

The watchdog hrtimer must expire in hard interrupt context even on
PREEMPT_RT=y kernels as otherwise the hard/softlockup detection logic would
not work.

No functional change.

[ tglx: Split out from larger combo patch. Added changelog ]

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20190726185753.262895510@linutronix.de

---
 kernel/watchdog.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 7f9e7b9306fe..f41334ef0971 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -490,10 +490,10 @@ static void watchdog_enable(unsigned int cpu)
 	 * Start the timer first to prevent the NMI watchdog triggering
 	 * before the timer has a chance to fire.
 	 */
-	hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
 	hrtimer->function = watchdog_timer_fn;
 	hrtimer_start(hrtimer, ns_to_ktime(sample_period),
-		      HRTIMER_MODE_REL_PINNED);
+		      HRTIMER_MODE_REL_PINNED_HARD);
 
 	/* Initialize timestamp */
 	__touch_watchdog();

^ permalink raw reply	[flat|nested] 61+ messages in thread

* [tip:timers/core] KVM: LAPIC: Mark hrtimer to expire in hard interrupt context
  2019-07-26 18:30 ` [patch 07/12] KVM: LAPIC: Mark hrtimer " Thomas Gleixner
  2019-07-26 19:41   ` Paolo Bonzini
@ 2019-07-30 22:14   ` tip-bot for Sebastian Andrzej Siewior
  2019-08-01 15:55   ` tip-bot for Sebastian Andrzej Siewior
  2019-08-01 19:01   ` tip-bot for Sebastian Andrzej Siewior
  3 siblings, 0 replies; 61+ messages in thread
From: tip-bot for Sebastian Andrzej Siewior @ 2019-07-30 22:14 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: linux-kernel, bigeasy, pbonzini, tglx, hpa, mingo, peterz

Commit-ID:  899ad4bce00d10433b64647a37f6488dd8b582c9
Gitweb:     https://git.kernel.org/tip/899ad4bce00d10433b64647a37f6488dd8b582c9
Author:     Sebastian Andrzej Siewior <bigeasy@linutronix.de>
AuthorDate: Fri, 26 Jul 2019 20:30:55 +0200
Committer:  Thomas Gleixner <tglx@linutronix.de>
CommitDate: Tue, 30 Jul 2019 23:57:55 +0200

KVM: LAPIC: Mark hrtimer to expire in hard interrupt context

On PREEMPT_RT enabled kernels unmarked hrtimers are moved into soft
interrupt expiry mode by default.

While that's not a functional requirement for the KVM local APIC timer
emulation, it's a latency issue which can be avoided by marking the timer
so hard interrupt context expiry is enforced.

No functional change.

[ tglx: Split out from larger combo patch. Add changelog. ]

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20190726185753.363363474@linutronix.de

---
 arch/x86/kvm/lapic.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 0aa158657f20..b9e516099d07 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1601,7 +1601,7 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic)
 	    likely(ns > apic->lapic_timer.timer_advance_ns)) {
 		expire = ktime_add_ns(now, ns);
 		expire = ktime_sub_ns(expire, ktimer->timer_advance_ns);
-		hrtimer_start(&ktimer->timer, expire, HRTIMER_MODE_ABS);
+		hrtimer_start(&ktimer->timer, expire, HRTIMER_MODE_ABS_HARD);
 	} else
 		apic_timer_expired(apic);
 
@@ -2302,7 +2302,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
 	apic->vcpu = vcpu;
 
 	hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
-		     HRTIMER_MODE_ABS);
+		     HRTIMER_MODE_ABS_HARD);
 	apic->lapic_timer.timer.function = apic_timer_fn;
 	if (timer_advance_ns == -1) {
 		apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_ADJUST_INIT;
@@ -2487,7 +2487,7 @@ void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
 
 	timer = &vcpu->arch.apic->lapic_timer.timer;
 	if (hrtimer_cancel(timer))
-		hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
+		hrtimer_start_expires(timer, HRTIMER_MODE_ABS_HARD);
 }
 
 /*

^ permalink raw reply	[flat|nested] 61+ messages in thread

* [tip:timers/core] tick: Mark tick related hrtimers to expiry in hard interrupt context
  2019-07-26 18:30 ` [patch 08/12] tick: Mark tick related hrtimers to expiry " Thomas Gleixner
@ 2019-07-30 22:14   ` tip-bot for Sebastian Andrzej Siewior
  2019-08-01 15:56   ` tip-bot for Sebastian Andrzej Siewior
  2019-08-01 19:01   ` tip-bot for Sebastian Andrzej Siewior
  2 siblings, 0 replies; 61+ messages in thread
From: tip-bot for Sebastian Andrzej Siewior @ 2019-07-30 22:14 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: linux-kernel, mingo, peterz, hpa, bigeasy, tglx

Commit-ID:  fe2db1c61da8176954e84d2e1f42863a945b1a21
Gitweb:     https://git.kernel.org/tip/fe2db1c61da8176954e84d2e1f42863a945b1a21
Author:     Sebastian Andrzej Siewior <bigeasy@linutronix.de>
AuthorDate: Fri, 26 Jul 2019 20:30:56 +0200
Committer:  Thomas Gleixner <tglx@linutronix.de>
CommitDate: Tue, 30 Jul 2019 23:57:56 +0200

tick: Mark tick related hrtimers to expiry in hard interrupt context

The tick related hrtimers, which drive the scheduler tick and hrtimer based
broadcasting are required to expire in hard interrupt context for obvious
reasons.

Mark them so PREEMPT_RT kernels wont move them to soft interrupt expiry.

Make the horribly formatted RCU_NONIDLE bracket maze readable while at it.

No functional change, 

[ tglx: Split out from larger combo patch. Add changelog ]

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20190726185753.459144407@linutronix.de

---
 kernel/time/tick-broadcast-hrtimer.c | 13 +++++++++----
 kernel/time/tick-sched.c             | 15 +++++++++------
 2 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/kernel/time/tick-broadcast-hrtimer.c b/kernel/time/tick-broadcast-hrtimer.c
index 5be6154e2fd2..c1f5bb590b5e 100644
--- a/kernel/time/tick-broadcast-hrtimer.c
+++ b/kernel/time/tick-broadcast-hrtimer.c
@@ -59,11 +59,16 @@ static int bc_set_next(ktime_t expires, struct clock_event_device *bc)
 	 * hrtimer_{start/cancel} functions call into tracing,
 	 * calls to these functions must be bound within RCU_NONIDLE.
 	 */
-	RCU_NONIDLE({
+	RCU_NONIDLE(
+		{
 			bc_moved = hrtimer_try_to_cancel(&bctimer) >= 0;
-			if (bc_moved)
+			if (bc_moved) {
 				hrtimer_start(&bctimer, expires,
-					      HRTIMER_MODE_ABS_PINNED);});
+					      HRTIMER_MODE_ABS_PINNED_HARD);
+			}
+		}
+	);
+
 	if (bc_moved) {
 		/* Bind the "device" to the cpu */
 		bc->bound_on = smp_processor_id();
@@ -104,7 +109,7 @@ static enum hrtimer_restart bc_handler(struct hrtimer *t)
 
 void tick_setup_hrtimer_broadcast(void)
 {
-	hrtimer_init(&bctimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+	hrtimer_init(&bctimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
 	bctimer.function = bc_handler;
 	clockevents_register_device(&ce_broadcast_hrtimer);
 }
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index be9707f68024..01ff32a02af2 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -634,10 +634,12 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
 	/* Forward the time to expire in the future */
 	hrtimer_forward(&ts->sched_timer, now, tick_period);
 
-	if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
-		hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED);
-	else
+	if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
+		hrtimer_start_expires(&ts->sched_timer,
+				      HRTIMER_MODE_ABS_PINNED_HARD);
+	} else {
 		tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
+	}
 
 	/*
 	 * Reset to make sure next tick stop doesn't get fooled by past
@@ -802,7 +804,8 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
 	}
 
 	if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
-		hrtimer_start(&ts->sched_timer, tick, HRTIMER_MODE_ABS_PINNED);
+		hrtimer_start(&ts->sched_timer, tick,
+			      HRTIMER_MODE_ABS_PINNED_HARD);
 	} else {
 		hrtimer_set_expires(&ts->sched_timer, tick);
 		tick_program_event(tick, 1);
@@ -1327,7 +1330,7 @@ void tick_setup_sched_timer(void)
 	/*
 	 * Emulate tick processing via per-CPU hrtimers:
 	 */
-	hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+	hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
 	ts->sched_timer.function = tick_sched_timer;
 
 	/* Get the next period (per-CPU) */
@@ -1342,7 +1345,7 @@ void tick_setup_sched_timer(void)
 	}
 
 	hrtimer_forward(&ts->sched_timer, now, tick_period);
-	hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED);
+	hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED_HARD);
 	tick_nohz_activate(ts, NOHZ_MODE_HIGHRES);
 }
 #endif /* HIGH_RES_TIMERS */

^ permalink raw reply	[flat|nested] 61+ messages in thread

* [tip:timers/core] hrtimer: Move unmarked hrtimers to soft interrupt expiry on RT
  2019-07-26 18:30 ` [patch 09/12] hrtimer: Move unmarked hrtimers to soft interrupt expiry on RT Thomas Gleixner
@ 2019-07-30 22:15   ` tip-bot for Sebastian Andrzej Siewior
  2019-08-01 15:57   ` tip-bot for Sebastian Andrzej Siewior
  2019-08-01 19:02   ` tip-bot for Sebastian Andrzej Siewior
  2 siblings, 0 replies; 61+ messages in thread
From: tip-bot for Sebastian Andrzej Siewior @ 2019-07-30 22:15 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: bigeasy, hpa, peterz, tglx, linux-kernel, mingo

Commit-ID:  f4f9a0e3f4c01a11043aca77006532c3c889c9ba
Gitweb:     https://git.kernel.org/tip/f4f9a0e3f4c01a11043aca77006532c3c889c9ba
Author:     Sebastian Andrzej Siewior <bigeasy@linutronix.de>
AuthorDate: Fri, 26 Jul 2019 20:30:57 +0200
Committer:  Thomas Gleixner <tglx@linutronix.de>
CommitDate: Tue, 30 Jul 2019 23:57:56 +0200

hrtimer: Move unmarked hrtimers to soft interrupt expiry on RT

On PREEMPT_RT not all hrtimers can be expired in hard interrupt context
even if that is perfectly fine on a PREEMPT_RT=n kernel, e.g. because they
take regular spinlocks. Also for latency reasons PREEMPT_RT tries to defer
most hrtimers' expiry into softirq context.

hrtimers marked with HRTIMER_MODE_HARD must be kept in hard interrupt
context expiry mode. Add the required logic.

No functional change for PREEMPT_RT=n kernels.

[ tglx: Split out of a larger combo patch. Added changelog ]

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20190726185753.551967692@linutronix.de

---
 kernel/time/hrtimer.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 0ace301a56f4..90dcc4d95e91 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1275,8 +1275,17 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
 			   enum hrtimer_mode mode)
 {
 	bool softtimer = !!(mode & HRTIMER_MODE_SOFT);
-	int base = softtimer ? HRTIMER_MAX_CLOCK_BASES / 2 : 0;
 	struct hrtimer_cpu_base *cpu_base;
+	int base;
+
+	/*
+	 * On PREEMPT_RT enabled kernels hrtimers which are not explicitely
+	 * marked for hard interrupt expiry mode are moved into soft
+	 * interrupt context for latency reasons and because the callbacks
+	 * can invoke functions which might sleep on RT, e.g. spin_lock().
+	 */
+	if (IS_ENABLED(CONFIG_PREEMPT_RT) && !(mode & HRTIMER_MODE_HARD))
+		softtimer = true;
 
 	memset(timer, 0, sizeof(struct hrtimer));
 
@@ -1290,6 +1299,7 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
 	if (clock_id == CLOCK_REALTIME && mode & HRTIMER_MODE_REL)
 		clock_id = CLOCK_MONOTONIC;
 
+	base = softtimer ? HRTIMER_MAX_CLOCK_BASES / 2 : 0;
 	base += hrtimer_clockid_to_base(clock_id);
 	timer->is_soft = softtimer;
 	timer->is_hard = !softtimer;

^ permalink raw reply	[flat|nested] 61+ messages in thread

* [tip:timers/core] hrtimer: Determine hard/soft expiry mode for hrtimer sleepers on RT
  2019-07-26 18:30 ` [patch 10/12] hrtimer: Determine hard/soft expiry mode for hrtimer sleepers " Thomas Gleixner
  2019-07-26 20:44   ` Steven Rostedt
  2019-07-26 21:16   ` Julia Cartwright
@ 2019-07-30 22:16   ` tip-bot for Sebastian Andrzej Siewior
  2019-08-01 15:58   ` tip-bot for Sebastian Andrzej Siewior
  2019-08-01 19:03   ` tip-bot for Sebastian Andrzej Siewior
  4 siblings, 0 replies; 61+ messages in thread
From: tip-bot for Sebastian Andrzej Siewior @ 2019-07-30 22:16 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: bigeasy, rostedt, linux-kernel, peterz, tglx, hpa, mingo

Commit-ID:  a6bc84e64a7dbfdeaae02f434ad1b296f2f9cd1e
Gitweb:     https://git.kernel.org/tip/a6bc84e64a7dbfdeaae02f434ad1b296f2f9cd1e
Author:     Sebastian Andrzej Siewior <bigeasy@linutronix.de>
AuthorDate: Fri, 26 Jul 2019 20:30:58 +0200
Committer:  Thomas Gleixner <tglx@linutronix.de>
CommitDate: Tue, 30 Jul 2019 23:57:57 +0200

hrtimer: Determine hard/soft expiry mode for hrtimer sleepers on RT

On PREEMPT_RT enabled kernels hrtimers which are not explicitely marked for
hard interrupt expiry mode are moved into soft interrupt context either for
latency reasons or because the hrtimer callback takes regular spinlocks or
invokes other functions which are not suitable for hard interrupt context
on PREEMPT_RT.

The hrtimer_sleeper callback is RT compatible in hard interrupt context,
but there is a latency concern: Untrusted userspace can spawn many threads
which arm timers for the same expiry time on the same CPU. On expiry that
causes a latency spike due to the wakeup of a gazillion threads.

OTOH, priviledged real-time user space applications rely on the low latency
of hard interrupt wakeups. These syscall related wakeups are all based on
hrtimer sleepers.

If the current task is in a real-time scheduling class, mark the mode for
hard interrupt expiry.

[ tglx: Split out of a larger combo patch. Added changelog ]

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20190726185753.645792403@linutronix.de

---
 kernel/time/hrtimer.c | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 90dcc4d95e91..c101f88ae8aa 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1676,6 +1676,16 @@ static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer)
 void hrtimer_sleeper_start_expires(struct hrtimer_sleeper *sl,
 				   enum hrtimer_mode mode)
 {
+	/*
+	 * Make the enqueue delivery mode check work on RT. If the sleeper
+	 * was initialized for hard interrupt delivery, force the mode bit.
+	 * This is a special case for hrtimer_sleepers because
+	 * hrtimer_init_sleeper() determines the delivery mode on RT so the
+	 * fiddling with this decision is avoided at the call sites.
+	 */
+	if (IS_ENABLED(CONFIG_PREEMPT_RT) && sl->timer.is_hard)
+		mode |= HRTIMER_MODE_HARD;
+
 	hrtimer_start_expires(&sl->timer, mode);
 }
 EXPORT_SYMBOL_GPL(hrtimer_sleeper_start_expires);
@@ -1683,6 +1693,30 @@ EXPORT_SYMBOL_GPL(hrtimer_sleeper_start_expires);
 static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
 				   clockid_t clock_id, enum hrtimer_mode mode)
 {
+	/*
+	 * On PREEMPT_RT enabled kernels hrtimers which are not explicitely
+	 * marked for hard interrupt expiry mode are moved into soft
+	 * interrupt context either for latency reasons or because the
+	 * hrtimer callback takes regular spinlocks or invokes other
+	 * functions which are not suitable for hard interrupt context on
+	 * PREEMPT_RT.
+	 *
+	 * The hrtimer_sleeper callback is RT compatible in hard interrupt
+	 * context, but there is a latency concern: Untrusted userspace can
+	 * spawn many threads which arm timers for the same expiry time on
+	 * the same CPU. That causes a latency spike due to the wakeup of
+	 * a gazillion threads.
+	 *
+	 * OTOH, priviledged real-time user space applications rely on the
+	 * low latency of hard interrupt wakeups. If the current task is in
+	 * a real-time scheduling class, mark the mode for hard interrupt
+	 * expiry.
+	 */
+	if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
+		if (task_is_realtime(current) && !(mode & HRTIMER_MODE_SOFT))
+			mode |= HRTIMER_MODE_HARD;
+	}
+
 	__hrtimer_init(&sl->timer, clock_id, mode);
 	sl->timer.function = hrtimer_wakeup;
 	sl->task = current;

^ permalink raw reply	[flat|nested] 61+ messages in thread

* [tip:timers/core] hrtimer: Prepare support for PREEMPT_RT
  2019-07-26 18:30 ` [patch 11/12] hrtimer: Prepare support for PREEMPT_RT Thomas Gleixner
  2019-07-28  9:06   ` Juergen Gross
@ 2019-07-30 22:17   ` tip-bot for Anna-Maria Gleixner
  2019-08-01 15:58   ` tip-bot for Anna-Maria Gleixner
  2019-08-01 19:04   ` tip-bot for Anna-Maria Gleixner
  3 siblings, 0 replies; 61+ messages in thread
From: tip-bot for Anna-Maria Gleixner @ 2019-07-30 22:17 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: tglx, mingo, linux-kernel, hpa, peterz, bigeasy, anna-maria

Commit-ID:  10521d890c650472e49bbbb4cf415f0fa6c29d4f
Gitweb:     https://git.kernel.org/tip/10521d890c650472e49bbbb4cf415f0fa6c29d4f
Author:     Anna-Maria Gleixner <anna-maria@linutronix.de>
AuthorDate: Fri, 26 Jul 2019 20:30:59 +0200
Committer:  Thomas Gleixner <tglx@linutronix.de>
CommitDate: Tue, 30 Jul 2019 23:57:57 +0200

hrtimer: Prepare support for PREEMPT_RT

When PREEMPT_RT is enabled, the soft interrupt thread can be preempted.  If
the soft interrupt thread is preempted in the middle of a timer callback,
then calling hrtimer_cancel() can lead to two issues:

  - If the caller is on a remote CPU then it has to spin wait for the timer
    handler to complete. This can result in unbound priority inversion.

  - If the caller originates from the task which preempted the timer
    handler on the same CPU, then spin waiting for the timer handler to
    complete is never going to end.

To avoid these issues, add a new lock to the timer base which is held
around the execution of the timer callbacks. If hrtimer_cancel() detects
that the timer callback is currently running, it blocks on the expiry
lock. When the callback is finished, the expiry lock is dropped by the
softirq thread which wakes up the waiter and the system makes progress.

This addresses both the priority inversion and the life lock issues.

The same issue can happen in virtual machines when the vCPU which runs a
timer callback is scheduled out. If a second vCPU of the same guest calls
hrtimer_cancel() it will spin wait for the other vCPU to be scheduled back
in. The expiry lock mechanism would avoid that. It'd be trivial to enable
this when paravirt spinlocks are enabled in a guest, but it's not clear
whether this is an actual problem in the wild, so for now it's an RT only
mechanism.

[ tglx: Refactored it for mainline ]

Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20190726185753.737767218@linutronix.de

---
 include/linux/hrtimer.h | 16 +++++++++
 kernel/time/hrtimer.c   | 95 +++++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 105 insertions(+), 6 deletions(-)

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 7d0d0a36a8f4..5df4bcff96d5 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -192,6 +192,10 @@ enum  hrtimer_base_type {
  * @nr_retries:		Total number of hrtimer interrupt retries
  * @nr_hangs:		Total number of hrtimer interrupt hangs
  * @max_hang_time:	Maximum time spent in hrtimer_interrupt
+ * @softirq_expiry_lock: Lock which is taken while softirq based hrtimer are
+ *			 expired
+ * @timer_waiters:	A hrtimer_cancel() invocation waits for the timer
+ *			callback to finish.
  * @expires_next:	absolute time of the next event, is required for remote
  *			hrtimer enqueue; it is the total first expiry time (hard
  *			and soft hrtimer are taken into account)
@@ -218,6 +222,10 @@ struct hrtimer_cpu_base {
 	unsigned short			nr_retries;
 	unsigned short			nr_hangs;
 	unsigned int			max_hang_time;
+#endif
+#ifdef CONFIG_PREEMPT_RT
+	spinlock_t			softirq_expiry_lock;
+	atomic_t			timer_waiters;
 #endif
 	ktime_t				expires_next;
 	struct hrtimer			*next_timer;
@@ -350,6 +358,14 @@ extern void hrtimers_resume(void);
 
 DECLARE_PER_CPU(struct tick_device, tick_cpu_device);
 
+#ifdef CONFIG_PREEMPT_RT
+void hrtimer_cancel_wait_running(const struct hrtimer *timer);
+#else
+static inline void hrtimer_cancel_wait_running(struct hrtimer *timer)
+{
+	cpu_relax();
+}
+#endif
 
 /* Exported timer functions: */
 
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index c101f88ae8aa..499122752649 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1162,6 +1162,82 @@ int hrtimer_try_to_cancel(struct hrtimer *timer)
 }
 EXPORT_SYMBOL_GPL(hrtimer_try_to_cancel);
 
+#ifdef CONFIG_PREEMPT_RT
+static void hrtimer_cpu_base_init_expiry_lock(struct hrtimer_cpu_base *base)
+{
+	spin_lock_init(&base->softirq_expiry_lock);
+}
+
+static void hrtimer_cpu_base_lock_expiry(struct hrtimer_cpu_base *base)
+{
+	spin_lock(&base->softirq_expiry_lock);
+}
+
+static void hrtimer_cpu_base_unlock_expiry(struct hrtimer_cpu_base *base)
+{
+	spin_unlock(&base->softirq_expiry_lock);
+}
+
+/*
+ * The counterpart to hrtimer_cancel_wait_running().
+ *
+ * If there is a waiter for cpu_base->expiry_lock, then it was waiting for
+ * the timer callback to finish. Drop expiry_lock and reaquire it. That
+ * allows the waiter to acquire the lock and make progress.
+ */
+static void hrtimer_sync_wait_running(struct hrtimer_cpu_base *cpu_base,
+				      unsigned long flags)
+{
+	if (atomic_read(&cpu_base->timer_waiters)) {
+		raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
+		spin_unlock(&cpu_base->softirq_expiry_lock);
+		spin_lock(&cpu_base->softirq_expiry_lock);
+		raw_spin_lock_irq(&cpu_base->lock);
+	}
+}
+
+/*
+ * This function is called on PREEMPT_RT kernels when the fast path
+ * deletion of a timer failed because the timer callback function was
+ * running.
+ *
+ * This prevents priority inversion, if the softirq thread on a remote CPU
+ * got preempted, and it prevents a life lock when the task which tries to
+ * delete a timer preempted the softirq thread running the timer callback
+ * function.
+ */
+void hrtimer_cancel_wait_running(const struct hrtimer *timer)
+{
+	struct hrtimer_clock_base *base = timer->base;
+
+	if (!timer->is_soft || !base || !base->cpu_base) {
+		cpu_relax();
+		return;
+	}
+
+	/*
+	 * Mark the base as contended and grab the expiry lock, which is
+	 * held by the softirq across the timer callback. Drop the lock
+	 * immediately so the softirq can expire the next timer. In theory
+	 * the timer could already be running again, but that's more than
+	 * unlikely and just causes another wait loop.
+	 */
+	atomic_inc(&base->cpu_base->timer_waiters);
+	spin_lock_bh(&base->cpu_base->softirq_expiry_lock);
+	atomic_dec(&base->cpu_base->timer_waiters);
+	spin_unlock_bh(&base->cpu_base->softirq_expiry_lock);
+}
+#else
+static inline void
+hrtimer_cpu_base_init_expiry_lock(struct hrtimer_cpu_base *base) { }
+static inline void
+hrtimer_cpu_base_lock_expiry(struct hrtimer_cpu_base *base) { }
+static inline void
+hrtimer_cpu_base_unlock_expiry(struct hrtimer_cpu_base *base) { }
+static inline void hrtimer_sync_wait_running(struct hrtimer_cpu_base *base,
+					     unsigned long flags) { }
+#endif
+
 /**
  * hrtimer_cancel - cancel a timer and wait for the handler to finish.
  * @timer:	the timer to be cancelled
@@ -1172,13 +1248,15 @@ EXPORT_SYMBOL_GPL(hrtimer_try_to_cancel);
  */
 int hrtimer_cancel(struct hrtimer *timer)
 {
-	for (;;) {
-		int ret = hrtimer_try_to_cancel(timer);
+	int ret;
 
-		if (ret >= 0)
-			return ret;
-		cpu_relax();
-	}
+	do {
+		ret = hrtimer_try_to_cancel(timer);
+
+		if (ret < 0)
+			hrtimer_cancel_wait_running(timer);
+	} while (ret < 0);
+	return ret;
 }
 EXPORT_SYMBOL_GPL(hrtimer_cancel);
 
@@ -1475,6 +1553,8 @@ static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now,
 				break;
 
 			__run_hrtimer(cpu_base, base, timer, &basenow, flags);
+			if (active_mask == HRTIMER_ACTIVE_SOFT)
+				hrtimer_sync_wait_running(cpu_base, flags);
 		}
 	}
 }
@@ -1485,6 +1565,7 @@ static __latent_entropy void hrtimer_run_softirq(struct softirq_action *h)
 	unsigned long flags;
 	ktime_t now;
 
+	hrtimer_cpu_base_lock_expiry(cpu_base);
 	raw_spin_lock_irqsave(&cpu_base->lock, flags);
 
 	now = hrtimer_update_base(cpu_base);
@@ -1494,6 +1575,7 @@ static __latent_entropy void hrtimer_run_softirq(struct softirq_action *h)
 	hrtimer_update_softirq_timer(cpu_base, true);
 
 	raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
+	hrtimer_cpu_base_unlock_expiry(cpu_base);
 }
 
 #ifdef CONFIG_HIGH_RES_TIMERS
@@ -1897,6 +1979,7 @@ int hrtimers_prepare_cpu(unsigned int cpu)
 	cpu_base->softirq_next_timer = NULL;
 	cpu_base->expires_next = KTIME_MAX;
 	cpu_base->softirq_expires_next = KTIME_MAX;
+	hrtimer_cpu_base_init_expiry_lock(cpu_base);
 	return 0;
 }
 

^ permalink raw reply	[flat|nested] 61+ messages in thread

* [tip:timers/core] timers: Prepare support for PREEMPT_RT
  2019-07-26 18:31 ` [patch 12/12] timers: Prepare support for PREEMPT_RT Thomas Gleixner
@ 2019-07-30 22:17   ` tip-bot for Anna-Maria Gleixner
  2019-08-01 15:59   ` tip-bot for Anna-Maria Gleixner
  2019-08-01 19:04   ` tip-bot for Anna-Maria Gleixner
  2 siblings, 0 replies; 61+ messages in thread
From: tip-bot for Anna-Maria Gleixner @ 2019-07-30 22:17 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: linux-kernel, mingo, hpa, anna-maria, tglx, bigeasy, peterz

Commit-ID:  51503daaaacd6118d627a0c1b5829191d4fa6f16
Gitweb:     https://git.kernel.org/tip/51503daaaacd6118d627a0c1b5829191d4fa6f16
Author:     Anna-Maria Gleixner <anna-maria@linutronix.de>
AuthorDate: Fri, 26 Jul 2019 20:31:00 +0200
Committer:  Thomas Gleixner <tglx@linutronix.de>
CommitDate: Tue, 30 Jul 2019 23:57:57 +0200

timers: Prepare support for PREEMPT_RT

When PREEMPT_RT is enabled, the soft interrupt thread can be preempted.  If
the soft interrupt thread is preempted in the middle of a timer callback,
then calling del_timer_sync() can lead to two issues:

  - If the caller is on a remote CPU then it has to spin wait for the timer
    handler to complete. This can result in unbound priority inversion.

  - If the caller originates from the task which preempted the timer
    handler on the same CPU, then spin waiting for the timer handler to
    complete is never going to end.

To avoid these issues, add a new lock to the timer base which is held
around the execution of the timer callbacks. If del_timer_sync() detects
that the timer callback is currently running, it blocks on the expiry
lock. When the callback is finished, the expiry lock is dropped by the
softirq thread which wakes up the waiter and the system makes progress.

This addresses both the priority inversion and the life lock issues.

This mechanism is not used for timers which are marked IRQSAFE as for those
preemption is disabled accross the callback and therefore this situation
cannot happen. The callbacks for such timers need to be individually
audited for RT compliance.

The same issue can happen in virtual machines when the vCPU which runs a
timer callback is scheduled out. If a second vCPU of the same guest calls
del_timer_sync() it will spin wait for the other vCPU to be scheduled back
in. The expiry lock mechanism would avoid that. It'd be trivial to enable
this when paravirt spinlocks are enabled in a guest, but it's not clear
whether this is an actual problem in the wild, so for now it's an RT only
mechanism.

As the softirq thread can be preempted with PREEMPT_RT=y, the SMP variant
of del_timer_sync() needs to be used on UP as well.

[ tglx: Refactored it for mainline ]

Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20190726185753.832418500@linutronix.de

---
 include/linux/timer.h |   2 +-
 kernel/time/timer.c   | 103 ++++++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 96 insertions(+), 9 deletions(-)

diff --git a/include/linux/timer.h b/include/linux/timer.h
index 282e4f2a532a..1e6650ed066d 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -183,7 +183,7 @@ extern void add_timer(struct timer_list *timer);
 
 extern int try_to_del_timer_sync(struct timer_list *timer);
 
-#ifdef CONFIG_SMP
+#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)
   extern int del_timer_sync(struct timer_list *timer);
 #else
 # define del_timer_sync(t)		del_timer(t)
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 343c7ba33b1c..673c6a0f0c45 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -196,6 +196,10 @@ EXPORT_SYMBOL(jiffies_64);
 struct timer_base {
 	raw_spinlock_t		lock;
 	struct timer_list	*running_timer;
+#ifdef CONFIG_PREEMPT_RT
+	spinlock_t		expiry_lock;
+	atomic_t		timer_waiters;
+#endif
 	unsigned long		clk;
 	unsigned long		next_expiry;
 	unsigned int		cpu;
@@ -1227,7 +1231,78 @@ int try_to_del_timer_sync(struct timer_list *timer)
 }
 EXPORT_SYMBOL(try_to_del_timer_sync);
 
-#ifdef CONFIG_SMP
+#ifdef CONFIG_PREEMPT_RT
+static __init void timer_base_init_expiry_lock(struct timer_base *base)
+{
+	spin_lock_init(&base->expiry_lock);
+}
+
+static inline void timer_base_lock_expiry(struct timer_base *base)
+{
+	spin_lock(&base->expiry_lock);
+}
+
+static inline void timer_base_unlock_expiry(struct timer_base *base)
+{
+	spin_unlock(&base->expiry_lock);
+}
+
+/*
+ * The counterpart to del_timer_wait_running().
+ *
+ * If there is a waiter for base->expiry_lock, then it was waiting for the
+ * timer callback to finish. Drop expiry_lock and reaquire it. That allows
+ * the waiter to acquire the lock and make progress.
+ */
+static void timer_sync_wait_running(struct timer_base *base)
+{
+	if (atomic_read(&base->timer_waiters)) {
+		spin_unlock(&base->expiry_lock);
+		spin_lock(&base->expiry_lock);
+	}
+}
+
+/*
+ * This function is called on PREEMPT_RT kernels when the fast path
+ * deletion of a timer failed because the timer callback function was
+ * running.
+ *
+ * This prevents priority inversion, if the softirq thread on a remote CPU
+ * got preempted, and it prevents a life lock when the task which tries to
+ * delete a timer preempted the softirq thread running the timer callback
+ * function.
+ */
+static void del_timer_wait_running(struct timer_list *timer)
+{
+	u32 tf;
+
+	tf = READ_ONCE(timer->flags);
+	if (!(tf & TIMER_MIGRATING)) {
+		struct timer_base *base = get_timer_base(tf);
+
+		/*
+		 * Mark the base as contended and grab the expiry lock,
+		 * which is held by the softirq across the timer
+		 * callback. Drop the lock immediately so the softirq can
+		 * expire the next timer. In theory the timer could already
+		 * be running again, but that's more than unlikely and just
+		 * causes another wait loop.
+		 */
+		atomic_inc(&base->timer_waiters);
+		spin_lock_bh(&base->expiry_lock);
+		atomic_dec(&base->timer_waiters);
+		spin_unlock_bh(&base->expiry_lock);
+	}
+}
+#else
+static inline void timer_base_init_expiry_lock(struct timer_base *base) { }
+static inline void timer_base_lock_expiry(struct timer_base *base) { }
+static inline void timer_base_unlock_expiry(struct timer_base *base) { }
+static inline void timer_sync_wait_running(struct timer_base *base) { }
+static inline void del_timer_wait_running(struct timer_list *timer) { }
+#endif
+
+#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)
 /**
  * del_timer_sync - deactivate a timer and wait for the handler to finish.
  * @timer: the timer to be deactivated
@@ -1266,6 +1341,8 @@ EXPORT_SYMBOL(try_to_del_timer_sync);
  */
 int del_timer_sync(struct timer_list *timer)
 {
+	int ret;
+
 #ifdef CONFIG_LOCKDEP
 	unsigned long flags;
 
@@ -1283,12 +1360,17 @@ int del_timer_sync(struct timer_list *timer)
 	 * could lead to deadlock.
 	 */
 	WARN_ON(in_irq() && !(timer->flags & TIMER_IRQSAFE));
-	for (;;) {
-		int ret = try_to_del_timer_sync(timer);
-		if (ret >= 0)
-			return ret;
-		cpu_relax();
-	}
+
+	do {
+		ret = try_to_del_timer_sync(timer);
+
+		if (unlikely(ret < 0)) {
+			del_timer_wait_running(timer);
+			cpu_relax();
+		}
+	} while (ret < 0);
+
+	return ret;
 }
 EXPORT_SYMBOL(del_timer_sync);
 #endif
@@ -1360,10 +1442,13 @@ static void expire_timers(struct timer_base *base, struct hlist_head *head)
 		if (timer->flags & TIMER_IRQSAFE) {
 			raw_spin_unlock(&base->lock);
 			call_timer_fn(timer, fn, baseclk);
+			base->running_timer = NULL;
 			raw_spin_lock(&base->lock);
 		} else {
 			raw_spin_unlock_irq(&base->lock);
 			call_timer_fn(timer, fn, baseclk);
+			base->running_timer = NULL;
+			timer_sync_wait_running(base);
 			raw_spin_lock_irq(&base->lock);
 		}
 	}
@@ -1658,6 +1743,7 @@ static inline void __run_timers(struct timer_base *base)
 	if (!time_after_eq(jiffies, base->clk))
 		return;
 
+	timer_base_lock_expiry(base);
 	raw_spin_lock_irq(&base->lock);
 
 	/*
@@ -1684,8 +1770,8 @@ static inline void __run_timers(struct timer_base *base)
 		while (levels--)
 			expire_timers(base, heads + levels);
 	}
-	base->running_timer = NULL;
 	raw_spin_unlock_irq(&base->lock);
+	timer_base_unlock_expiry(base);
 }
 
 /*
@@ -1930,6 +2016,7 @@ static void __init init_timer_cpu(int cpu)
 		base->cpu = cpu;
 		raw_spin_lock_init(&base->lock);
 		base->clk = jiffies;
+		timer_base_init_expiry_lock(base);
 	}
 }
 

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: [patch 11/12] hrtimer: Prepare support for PREEMPT_RT
  2019-07-29 17:30       ` Paolo Bonzini
@ 2019-07-31  8:45         ` Juergen Gross
  0 siblings, 0 replies; 61+ messages in thread
From: Juergen Gross @ 2019-07-31  8:45 UTC (permalink / raw)
  To: Paolo Bonzini, Steven Rostedt
  Cc: Arnaldo Carvalho de Melo, x86, Anna-Maria Gleixner,
	Sebastian Siewior, Thomas Gleixner, Jiri Olsa, LKML

On 29.07.19 19:30, Paolo Bonzini wrote:
> On 29/07/19 17:08, Steven Rostedt wrote:
>> On Sun, 28 Jul 2019 11:06:50 +0200
>> Juergen Gross <jgross@suse.com> wrote:
>>
>>> In case we'd want to change that I'd rather not special case timers, but
>>> apply a more general solution to the quite large amount of similar
>>> cases: I assume the majority of cpu_relax() uses are affected, so adding
>>> a paravirt op cpu_relax() might be appropriate.
>>>
>>> That could be put under CONFIG_PARAVIRT_SPINLOCK. If called in a guest
>>> it could ask the hypervisor to give up the physical cpu voluntarily
>>> (in Xen this would be a "yield" hypercall).
>>
>> Seems paravirt wants our cpu_chill() ;-)
> 
> Actually that is not really a joke! :)

As CONFIG_PARAVIRT is no longer so massive intrusive as some time ago
it might really be worth thinking of.

 From Xen perspective I'd really like a way to give up the vcpu instead
of doing a busy wait. And having another user and (even better) already
some patches addressing some (or all?) callsites sounds like a win-win
situation for me.

So +1 from me for cpu_chill() via a new paravirt op.


Juergen

^ permalink raw reply	[flat|nested] 61+ messages in thread

* [tip:timers/core] hrtimer: Consolidate hrtimer_init() + hrtimer_init_sleeper() calls
  2019-07-26 18:30 ` [patch 02/12] hrtimer: Consolidate hrtimer_init() + hrtimer_init_sleeper() calls Thomas Gleixner
  2019-07-30 22:08   ` [tip:timers/core] " tip-bot for Sebastian Andrzej Siewior
@ 2019-08-01 15:49   ` tip-bot for Sebastian Andrzej Siewior
  1 sibling, 0 replies; 61+ messages in thread
From: tip-bot for Sebastian Andrzej Siewior @ 2019-08-01 15:49 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: bigeasy, hpa, tglx, peterz, mingo, anna-maria, linux-kernel

Commit-ID:  dbc1625fc9deefb352f6ff26a575ae4b3ddef23a
Gitweb:     https://git.kernel.org/tip/dbc1625fc9deefb352f6ff26a575ae4b3ddef23a
Author:     Sebastian Andrzej Siewior <bigeasy@linutronix.de>
AuthorDate: Fri, 26 Jul 2019 20:30:50 +0200
Committer:  Thomas Gleixner <tglx@linutronix.de>
CommitDate: Thu, 1 Aug 2019 17:43:15 +0200

hrtimer: Consolidate hrtimer_init() + hrtimer_init_sleeper() calls

hrtimer_init_sleeper() calls require prior initialisation of the hrtimer
object which is embedded into the hrtimer_sleeper.

Combine the initialization and spare a function call. Fixup all call sites.

This is also a preparatory change for PREEMPT_RT to do hrtimer sleeper
specific initializations of the embedded hrtimer without modifying any of
the call sites.

No functional change.

[ anna-maria: Minor cleanups ]
[ tglx: Adopted to the removal of the task argument of
  	hrtimer_init_sleeper() and trivial polishing.
	Folded a fix from Stephen Rothwell for the vsoc code ]

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20190726185752.887468908@linutronix.de

---
 block/blk-mq.c                 |  3 +--
 drivers/staging/android/vsoc.c |  6 ++----
 include/linux/hrtimer.h        | 17 ++++++++++++++---
 include/linux/wait.h           |  4 ++--
 kernel/futex.c                 |  8 +++-----
 kernel/time/hrtimer.c          | 43 +++++++++++++++++++++++++++++++-----------
 net/core/pktgen.c              |  4 +---
 7 files changed, 55 insertions(+), 30 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 5f647cb8c695..df3fafbfe9a9 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -3415,10 +3415,9 @@ static bool blk_mq_poll_hybrid_sleep(struct request_queue *q,
 	kt = nsecs;
 
 	mode = HRTIMER_MODE_REL;
-	hrtimer_init_on_stack(&hs.timer, CLOCK_MONOTONIC, mode);
+	hrtimer_init_sleeper_on_stack(&hs, CLOCK_MONOTONIC, mode);
 	hrtimer_set_expires(&hs.timer, kt);
 
-	hrtimer_init_sleeper(&hs);
 	do {
 		if (blk_mq_rq_state(rq) == MQ_RQ_COMPLETE)
 			break;
diff --git a/drivers/staging/android/vsoc.c b/drivers/staging/android/vsoc.c
index ce480bcf20d2..2d6b3981afb8 100644
--- a/drivers/staging/android/vsoc.c
+++ b/drivers/staging/android/vsoc.c
@@ -437,12 +437,10 @@ static int handle_vsoc_cond_wait(struct file *filp, struct vsoc_cond_wait *arg)
 			return -EINVAL;
 		wake_time = ktime_set(arg->wake_time_sec, arg->wake_time_nsec);
 
-		hrtimer_init_on_stack(&to->timer, CLOCK_MONOTONIC,
-				      HRTIMER_MODE_ABS);
+		hrtimer_init_sleeper_on_stack(to, CLOCK_MONOTONIC,
+					      HRTIMER_MODE_ABS);
 		hrtimer_set_expires_range_ns(&to->timer, wake_time,
 					     current->timer_slack_ns);
-
-		hrtimer_init_sleeper(to);
 	}
 
 	while (1) {
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 3c74f89367c4..0df373bed3d7 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -347,10 +347,15 @@ DECLARE_PER_CPU(struct tick_device, tick_cpu_device);
 /* Initialize timers: */
 extern void hrtimer_init(struct hrtimer *timer, clockid_t which_clock,
 			 enum hrtimer_mode mode);
+extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, clockid_t clock_id,
+				 enum hrtimer_mode mode);
 
 #ifdef CONFIG_DEBUG_OBJECTS_TIMERS
 extern void hrtimer_init_on_stack(struct hrtimer *timer, clockid_t which_clock,
 				  enum hrtimer_mode mode);
+extern void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl,
+					  clockid_t clock_id,
+					  enum hrtimer_mode mode);
 
 extern void destroy_hrtimer_on_stack(struct hrtimer *timer);
 #else
@@ -360,6 +365,14 @@ static inline void hrtimer_init_on_stack(struct hrtimer *timer,
 {
 	hrtimer_init(timer, which_clock, mode);
 }
+
+static inline void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl,
+						 clockid_t clock_id,
+						 enum hrtimer_mode mode)
+{
+	hrtimer_init_sleeper(sl, clock_id, mode);
+}
+
 static inline void destroy_hrtimer_on_stack(struct hrtimer *timer) { }
 #endif
 
@@ -463,10 +476,8 @@ extern long hrtimer_nanosleep(const struct timespec64 *rqtp,
 			      const enum hrtimer_mode mode,
 			      const clockid_t clockid);
 
-extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl);
-
 extern int schedule_hrtimeout_range(ktime_t *expires, u64 delta,
-						const enum hrtimer_mode mode);
+				    const enum hrtimer_mode mode);
 extern int schedule_hrtimeout_range_clock(ktime_t *expires,
 					  u64 delta,
 					  const enum hrtimer_mode mode,
diff --git a/include/linux/wait.h b/include/linux/wait.h
index d57832774ca6..4707543ef575 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -488,8 +488,8 @@ do {										\
 	int __ret = 0;								\
 	struct hrtimer_sleeper __t;						\
 										\
-	hrtimer_init_on_stack(&__t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);	\
-	hrtimer_init_sleeper(&__t);						\
+	hrtimer_init_sleeper_on_stack(&__t, CLOCK_MONOTONIC,			\
+				      HRTIMER_MODE_REL);			\
 	if ((timeout) != KTIME_MAX)						\
 		hrtimer_start_range_ns(&__t.timer, timeout,			\
 				       current->timer_slack_ns,			\
diff --git a/kernel/futex.c b/kernel/futex.c
index 5e9842ea4012..c8561aa5338e 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -487,11 +487,9 @@ futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,
 	if (!time)
 		return NULL;
 
-	hrtimer_init_on_stack(&timeout->timer, (flags & FLAGS_CLOCKRT) ?
-			      CLOCK_REALTIME : CLOCK_MONOTONIC,
-			      HRTIMER_MODE_ABS);
-	hrtimer_init_sleeper(timeout);
-
+	hrtimer_init_sleeper_on_stack(timeout, (flags & FLAGS_CLOCKRT) ?
+				      CLOCK_REALTIME : CLOCK_MONOTONIC,
+				      HRTIMER_MODE_ABS);
 	/*
 	 * If range_ns is 0, calling hrtimer_set_expires_range_ns() is
 	 * effectively the same as calling hrtimer_set_expires().
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index de895d86800c..bb55d62f631e 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -427,6 +427,17 @@ void hrtimer_init_on_stack(struct hrtimer *timer, clockid_t clock_id,
 }
 EXPORT_SYMBOL_GPL(hrtimer_init_on_stack);
 
+static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
+				   clockid_t clock_id, enum hrtimer_mode mode);
+
+void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl,
+				   clockid_t clock_id, enum hrtimer_mode mode)
+{
+	debug_object_init_on_stack(&sl->timer, &hrtimer_debug_descr);
+	__hrtimer_init_sleeper(sl, clock_id, mode);
+}
+EXPORT_SYMBOL_GPL(hrtimer_init_sleeper_on_stack);
+
 void destroy_hrtimer_on_stack(struct hrtimer *timer)
 {
 	debug_object_free(timer, &hrtimer_debug_descr);
@@ -1639,11 +1650,27 @@ static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer)
 	return HRTIMER_NORESTART;
 }
 
-void hrtimer_init_sleeper(struct hrtimer_sleeper *sl)
+static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
+				   clockid_t clock_id, enum hrtimer_mode mode)
 {
+	__hrtimer_init(&sl->timer, clock_id, mode);
 	sl->timer.function = hrtimer_wakeup;
 	sl->task = current;
 }
+
+/**
+ * hrtimer_init_sleeper - initialize sleeper to the given clock
+ * @sl:		sleeper to be initialized
+ * @clock_id:	the clock to be used
+ * @mode:	timer mode abs/rel
+ */
+void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, clockid_t clock_id,
+			  enum hrtimer_mode mode)
+{
+	debug_init(&sl->timer, clock_id, mode);
+	__hrtimer_init_sleeper(sl, clock_id, mode);
+
+}
 EXPORT_SYMBOL_GPL(hrtimer_init_sleeper);
 
 int nanosleep_copyout(struct restart_block *restart, struct timespec64 *ts)
@@ -1669,8 +1696,6 @@ static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mod
 {
 	struct restart_block *restart;
 
-	hrtimer_init_sleeper(t);
-
 	do {
 		set_current_state(TASK_INTERRUPTIBLE);
 		hrtimer_start_expires(&t->timer, mode);
@@ -1707,10 +1732,9 @@ static long __sched hrtimer_nanosleep_restart(struct restart_block *restart)
 	struct hrtimer_sleeper t;
 	int ret;
 
-	hrtimer_init_on_stack(&t.timer, restart->nanosleep.clockid,
-				HRTIMER_MODE_ABS);
+	hrtimer_init_sleeper_on_stack(&t, restart->nanosleep.clockid,
+				      HRTIMER_MODE_ABS);
 	hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires);
-
 	ret = do_nanosleep(&t, HRTIMER_MODE_ABS);
 	destroy_hrtimer_on_stack(&t.timer);
 	return ret;
@@ -1728,7 +1752,7 @@ long hrtimer_nanosleep(const struct timespec64 *rqtp,
 	if (dl_task(current) || rt_task(current))
 		slack = 0;
 
-	hrtimer_init_on_stack(&t.timer, clockid, mode);
+	hrtimer_init_sleeper_on_stack(&t, clockid, mode);
 	hrtimer_set_expires_range_ns(&t.timer, timespec64_to_ktime(*rqtp), slack);
 	ret = do_nanosleep(&t, mode);
 	if (ret != -ERESTART_RESTARTBLOCK)
@@ -1927,11 +1951,8 @@ schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta,
 		return -EINTR;
 	}
 
-	hrtimer_init_on_stack(&t.timer, clock_id, mode);
+	hrtimer_init_sleeper_on_stack(&t, clock_id, mode);
 	hrtimer_set_expires_range_ns(&t.timer, *expires, delta);
-
-	hrtimer_init_sleeper(&t);
-
 	hrtimer_start_expires(&t.timer, mode);
 
 	if (likely(t.task))
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 7f3cf2381f27..a5905975bc12 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2156,7 +2156,7 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until)
 	s64 remaining;
 	struct hrtimer_sleeper t;
 
-	hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+	hrtimer_init_sleeper_on_stack(&t, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
 	hrtimer_set_expires(&t.timer, spin_until);
 
 	remaining = ktime_to_ns(hrtimer_expires_remaining(&t.timer));
@@ -2170,8 +2170,6 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until)
 			end_time = ktime_get();
 		} while (ktime_compare(end_time, spin_until) < 0);
 	} else {
-		/* see do_nanosleep */
-		hrtimer_init_sleeper(&t);
 		do {
 			set_current_state(TASK_INTERRUPTIBLE);
 			hrtimer_start_expires(&t.timer, HRTIMER_MODE_ABS);

^ permalink raw reply	[flat|nested] 61+ messages in thread

* [tip:timers/core] hrtimer: Introduce HARD expiry mode
  2019-07-26 18:30 ` [patch 03/12] hrtimer: Introduce HARD expiry mode Thomas Gleixner
  2019-07-30 22:10   ` [tip:timers/core] " tip-bot for Sebastian Andrzej Siewior
@ 2019-08-01 15:52   ` tip-bot for Sebastian Andrzej Siewior
  1 sibling, 0 replies; 61+ messages in thread
From: tip-bot for Sebastian Andrzej Siewior @ 2019-08-01 15:52 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: linux-kernel, peterz, bigeasy, tglx, mingo, hpa

Commit-ID:  ae6683d815895c2be1e60e1942630fa99488055b
Gitweb:     https://git.kernel.org/tip/ae6683d815895c2be1e60e1942630fa99488055b
Author:     Sebastian Andrzej Siewior <bigeasy@linutronix.de>
AuthorDate: Fri, 26 Jul 2019 20:30:51 +0200
Committer:  Thomas Gleixner <tglx@linutronix.de>
CommitDate: Thu, 1 Aug 2019 17:43:16 +0200

hrtimer: Introduce HARD expiry mode

On PREEMPT_RT not all hrtimers can be expired in hard interrupt context
even if that is perfectly fine on a PREEMPT_RT=n kernel, e.g. because they
take regular spinlocks. Also for latency reasons PREEMPT_RT tries to defer
most hrtimers' expiry into soft interrupt context.

But there are hrtimers which must be expired in hard interrupt context even
when PREEMPT_RT is enabled:

  - hrtimers which must expiry in hard interrupt context, e.g. scheduler,
    perf, watchdog related hrtimers

  - latency critical hrtimers, e.g. nanosleep, ..., kvm lapic timer

Add a new mode flag HRTIMER_MODE_HARD which allows to mark these timers so
PREEMPT_RT will not move them into softirq expiry mode.

[ tglx: Split out of a larger combo patch. Added changelog ]

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20190726185752.981398465@linutronix.de


---
 include/linux/hrtimer.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 24072a0942c0..15c2ba6b6316 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -38,6 +38,7 @@ enum hrtimer_mode {
 	HRTIMER_MODE_REL	= 0x01,
 	HRTIMER_MODE_PINNED	= 0x02,
 	HRTIMER_MODE_SOFT	= 0x04,
+	HRTIMER_MODE_HARD	= 0x08,
 
 	HRTIMER_MODE_ABS_PINNED = HRTIMER_MODE_ABS | HRTIMER_MODE_PINNED,
 	HRTIMER_MODE_REL_PINNED = HRTIMER_MODE_REL | HRTIMER_MODE_PINNED,
@@ -48,6 +49,11 @@ enum hrtimer_mode {
 	HRTIMER_MODE_ABS_PINNED_SOFT = HRTIMER_MODE_ABS_PINNED | HRTIMER_MODE_SOFT,
 	HRTIMER_MODE_REL_PINNED_SOFT = HRTIMER_MODE_REL_PINNED | HRTIMER_MODE_SOFT,
 
+	HRTIMER_MODE_ABS_HARD	= HRTIMER_MODE_ABS | HRTIMER_MODE_HARD,
+	HRTIMER_MODE_REL_HARD	= HRTIMER_MODE_REL | HRTIMER_MODE_HARD,
+
+	HRTIMER_MODE_ABS_PINNED_HARD = HRTIMER_MODE_ABS_PINNED | HRTIMER_MODE_HARD,
+	HRTIMER_MODE_REL_PINNED_HARD = HRTIMER_MODE_REL_PINNED | HRTIMER_MODE_HARD,
 };
 
 /*

^ permalink raw reply	[flat|nested] 61+ messages in thread

* [tip:timers/core] sched: Mark hrtimers to expire in hard interrupt context
  2019-07-26 18:30 ` [patch 04/12] sched: Mark hrtimers to expire in hard interrupt context Thomas Gleixner
  2019-07-30 22:11   ` [tip:timers/core] " tip-bot for Thomas Gleixner
@ 2019-08-01 15:53   ` tip-bot for Sebastian Andrzej Siewior
  2019-08-01 18:58   ` tip-bot for Sebastian Andrzej Siewior
  2 siblings, 0 replies; 61+ messages in thread
From: tip-bot for Sebastian Andrzej Siewior @ 2019-08-01 15:53 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: mingo, peterz, bigeasy, linux-kernel, hpa, tglx

Commit-ID:  8b74569a24cb61ed5406668a4e7b3cbdccba25f7
Gitweb:     https://git.kernel.org/tip/8b74569a24cb61ed5406668a4e7b3cbdccba25f7
Author:     Sebastian Andrzej Siewior <bigeasy@linutronix.de>
AuthorDate: Fri, 26 Jul 2019 20:30:52 +0200
Committer:  Thomas Gleixner <tglx@linutronix.de>
CommitDate: Thu, 1 Aug 2019 17:43:17 +0200

sched: Mark hrtimers to expire in hard interrupt context

The scheduler related hrtimers need to expire in hard interrupt context
even on PREEMPT_RT enabled kernels. Mark then as such.

No functional change.

[ tglx: Split out from larger combo patch. Add changelog. ]

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20190726185753.077004842@linutronix.de


---
 kernel/sched/core.c     | 6 +++---
 kernel/sched/deadline.c | 4 ++--
 kernel/sched/rt.c       | 7 ++++---
 3 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2b037f195473..389e0993fbb4 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -255,7 +255,7 @@ static void __hrtick_restart(struct rq *rq)
 {
 	struct hrtimer *timer = &rq->hrtick_timer;
 
-	hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED);
+	hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED_HARD);
 }
 
 /*
@@ -314,7 +314,7 @@ void hrtick_start(struct rq *rq, u64 delay)
 	 */
 	delay = max_t(u64, delay, 10000LL);
 	hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay),
-		      HRTIMER_MODE_REL_PINNED);
+		      HRTIMER_MODE_REL_PINNED_HARD);
 }
 #endif /* CONFIG_SMP */
 
@@ -328,7 +328,7 @@ static void hrtick_rq_init(struct rq *rq)
 	rq->hrtick_csd.info = rq;
 #endif
 
-	hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
 	rq->hrtick_timer.function = hrtick;
 }
 #else	/* CONFIG_SCHED_HRTICK */
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index ef5b9f6b1d42..0359612d5443 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -923,7 +923,7 @@ static int start_dl_timer(struct task_struct *p)
 	 */
 	if (!hrtimer_is_queued(timer)) {
 		get_task_struct(p);
-		hrtimer_start(timer, act, HRTIMER_MODE_ABS);
+		hrtimer_start(timer, act, HRTIMER_MODE_ABS_HARD);
 	}
 
 	return 1;
@@ -1053,7 +1053,7 @@ void init_dl_task_timer(struct sched_dl_entity *dl_se)
 {
 	struct hrtimer *timer = &dl_se->dl_timer;
 
-	hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
 	timer->function = dl_task_timer;
 }
 
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index a532558a5176..da3e85e61013 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -45,8 +45,8 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime)
 
 	raw_spin_lock_init(&rt_b->rt_runtime_lock);
 
-	hrtimer_init(&rt_b->rt_period_timer,
-			CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	hrtimer_init(&rt_b->rt_period_timer, CLOCK_MONOTONIC,
+		     HRTIMER_MODE_REL_HARD);
 	rt_b->rt_period_timer.function = sched_rt_period_timer;
 }
 
@@ -67,7 +67,8 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
 		 * to update the period.
 		 */
 		hrtimer_forward_now(&rt_b->rt_period_timer, ns_to_ktime(0));
-		hrtimer_start_expires(&rt_b->rt_period_timer, HRTIMER_MODE_ABS_PINNED);
+		hrtimer_start_expires(&rt_b->rt_period_timer,
+				      HRTIMER_MODE_ABS_PINNED_HARD);
 	}
 	raw_spin_unlock(&rt_b->rt_runtime_lock);
 }

^ permalink raw reply	[flat|nested] 61+ messages in thread

* [tip:timers/core] perf/core: Mark hrtimers to expire in hard interrupt context
  2019-07-26 18:30 ` [patch 05/12] perf/core: " Thomas Gleixner
  2019-07-30 22:12   ` [tip:timers/core] " tip-bot for Thomas Gleixner
@ 2019-08-01 15:54   ` tip-bot for Sebastian Andrzej Siewior
  2019-08-01 18:59   ` tip-bot for Sebastian Andrzej Siewior
  2 siblings, 0 replies; 61+ messages in thread
From: tip-bot for Sebastian Andrzej Siewior @ 2019-08-01 15:54 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: hpa, peterz, mingo, tglx, bigeasy, linux-kernel

Commit-ID:  ec0c297b253874917d7d6407117b94ef3358bf74
Gitweb:     https://git.kernel.org/tip/ec0c297b253874917d7d6407117b94ef3358bf74
Author:     Sebastian Andrzej Siewior <bigeasy@linutronix.de>
AuthorDate: Fri, 26 Jul 2019 20:30:53 +0200
Committer:  Thomas Gleixner <tglx@linutronix.de>
CommitDate: Thu, 1 Aug 2019 17:43:17 +0200

perf/core: Mark hrtimers to expire in hard interrupt context

To guarantee that the multiplexing mechanism and the hrtimer driven events
work on PREEMPT_RT enabled kernels it's required that the related hrtimers
expire in hard interrupt context. Mark them so PREEMPT_RT kernels wont
defer them to soft interrupt context.

No functional change.

[ tglx: Split out of larger combo patch. Added changelog ]

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20190726185753.169509224@linutronix.de


---
 kernel/events/core.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 026a14541a38..9d623e257a51 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1103,7 +1103,7 @@ static void __perf_mux_hrtimer_init(struct perf_cpu_context *cpuctx, int cpu)
 	cpuctx->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * interval);
 
 	raw_spin_lock_init(&cpuctx->hrtimer_lock);
-	hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
+	hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD);
 	timer->function = perf_mux_hrtimer_handler;
 }
 
@@ -1121,7 +1121,7 @@ static int perf_mux_hrtimer_restart(struct perf_cpu_context *cpuctx)
 	if (!cpuctx->hrtimer_active) {
 		cpuctx->hrtimer_active = 1;
 		hrtimer_forward_now(timer, cpuctx->hrtimer_interval);
-		hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED);
+		hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED_HARD);
 	}
 	raw_spin_unlock_irqrestore(&cpuctx->hrtimer_lock, flags);
 
@@ -9491,7 +9491,7 @@ static void perf_swevent_start_hrtimer(struct perf_event *event)
 		period = max_t(u64, 10000, hwc->sample_period);
 	}
 	hrtimer_start(&hwc->hrtimer, ns_to_ktime(period),
-		      HRTIMER_MODE_REL_PINNED);
+		      HRTIMER_MODE_REL_PINNED_HARD);
 }
 
 static void perf_swevent_cancel_hrtimer(struct perf_event *event)
@@ -9513,7 +9513,7 @@ static void perf_swevent_init_hrtimer(struct perf_event *event)
 	if (!is_sampling_event(event))
 		return;
 
-	hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
 	hwc->hrtimer.function = perf_swevent_hrtimer;
 
 	/*

^ permalink raw reply	[flat|nested] 61+ messages in thread

* [tip:timers/core] watchdog: Mark watchdog_hrtimer to expire in hard interrupt context
  2019-07-26 18:30 ` [patch 06/12] watchdog: Mark watchdog_hrtimer " Thomas Gleixner
  2019-07-30 22:13   ` [tip:timers/core] " tip-bot for Sebastian Andrzej Siewior
@ 2019-08-01 15:55   ` tip-bot for Sebastian Andrzej Siewior
  2019-08-01 19:00   ` tip-bot for Sebastian Andrzej Siewior
  2 siblings, 0 replies; 61+ messages in thread
From: tip-bot for Sebastian Andrzej Siewior @ 2019-08-01 15:55 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: peterz, hpa, tglx, linux-kernel, bigeasy, mingo

Commit-ID:  854bdb3cbde94bb953cd49942762b03e69ac3842
Gitweb:     https://git.kernel.org/tip/854bdb3cbde94bb953cd49942762b03e69ac3842
Author:     Sebastian Andrzej Siewior <bigeasy@linutronix.de>
AuthorDate: Fri, 26 Jul 2019 20:30:54 +0200
Committer:  Thomas Gleixner <tglx@linutronix.de>
CommitDate: Thu, 1 Aug 2019 17:43:18 +0200

watchdog: Mark watchdog_hrtimer to expire in hard interrupt context

The watchdog hrtimer must expire in hard interrupt context even on
PREEMPT_RT=y kernels as otherwise the hard/softlockup detection logic would
not work.

No functional change.

[ tglx: Split out from larger combo patch. Added changelog ]

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20190726185753.262895510@linutronix.de


---
 kernel/watchdog.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 7f9e7b9306fe..f41334ef0971 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -490,10 +490,10 @@ static void watchdog_enable(unsigned int cpu)
 	 * Start the timer first to prevent the NMI watchdog triggering
 	 * before the timer has a chance to fire.
 	 */
-	hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
 	hrtimer->function = watchdog_timer_fn;
 	hrtimer_start(hrtimer, ns_to_ktime(sample_period),
-		      HRTIMER_MODE_REL_PINNED);
+		      HRTIMER_MODE_REL_PINNED_HARD);
 
 	/* Initialize timestamp */
 	__touch_watchdog();

^ permalink raw reply	[flat|nested] 61+ messages in thread

* [tip:timers/core] KVM: LAPIC: Mark hrtimer to expire in hard interrupt context
  2019-07-26 18:30 ` [patch 07/12] KVM: LAPIC: Mark hrtimer " Thomas Gleixner
  2019-07-26 19:41   ` Paolo Bonzini
  2019-07-30 22:14   ` [tip:timers/core] " tip-bot for Sebastian Andrzej Siewior
@ 2019-08-01 15:55   ` tip-bot for Sebastian Andrzej Siewior
  2019-08-01 19:01   ` tip-bot for Sebastian Andrzej Siewior
  3 siblings, 0 replies; 61+ messages in thread
From: tip-bot for Sebastian Andrzej Siewior @ 2019-08-01 15:55 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: mingo, peterz, bigeasy, linux-kernel, hpa, tglx, pbonzini

Commit-ID:  4ca8f0af37956ca39f65c8c4d500e4fb93590433
Gitweb:     https://git.kernel.org/tip/4ca8f0af37956ca39f65c8c4d500e4fb93590433
Author:     Sebastian Andrzej Siewior <bigeasy@linutronix.de>
AuthorDate: Fri, 26 Jul 2019 20:30:55 +0200
Committer:  Thomas Gleixner <tglx@linutronix.de>
CommitDate: Thu, 1 Aug 2019 17:43:18 +0200

KVM: LAPIC: Mark hrtimer to expire in hard interrupt context

On PREEMPT_RT enabled kernels unmarked hrtimers are moved into soft
interrupt expiry mode by default.

While that's not a functional requirement for the KVM local APIC timer
emulation, it's a latency issue which can be avoided by marking the timer
so hard interrupt context expiry is enforced.

No functional change.

[ tglx: Split out from larger combo patch. Add changelog. ]

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20190726185753.363363474@linutronix.de


---
 arch/x86/kvm/lapic.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 0aa158657f20..b9e516099d07 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1601,7 +1601,7 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic)
 	    likely(ns > apic->lapic_timer.timer_advance_ns)) {
 		expire = ktime_add_ns(now, ns);
 		expire = ktime_sub_ns(expire, ktimer->timer_advance_ns);
-		hrtimer_start(&ktimer->timer, expire, HRTIMER_MODE_ABS);
+		hrtimer_start(&ktimer->timer, expire, HRTIMER_MODE_ABS_HARD);
 	} else
 		apic_timer_expired(apic);
 
@@ -2302,7 +2302,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
 	apic->vcpu = vcpu;
 
 	hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
-		     HRTIMER_MODE_ABS);
+		     HRTIMER_MODE_ABS_HARD);
 	apic->lapic_timer.timer.function = apic_timer_fn;
 	if (timer_advance_ns == -1) {
 		apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_ADJUST_INIT;
@@ -2487,7 +2487,7 @@ void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
 
 	timer = &vcpu->arch.apic->lapic_timer.timer;
 	if (hrtimer_cancel(timer))
-		hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
+		hrtimer_start_expires(timer, HRTIMER_MODE_ABS_HARD);
 }
 
 /*

^ permalink raw reply	[flat|nested] 61+ messages in thread

* [tip:timers/core] tick: Mark tick related hrtimers to expiry in hard interrupt context
  2019-07-26 18:30 ` [patch 08/12] tick: Mark tick related hrtimers to expiry " Thomas Gleixner
  2019-07-30 22:14   ` [tip:timers/core] " tip-bot for Sebastian Andrzej Siewior
@ 2019-08-01 15:56   ` tip-bot for Sebastian Andrzej Siewior
  2019-08-01 19:01   ` tip-bot for Sebastian Andrzej Siewior
  2 siblings, 0 replies; 61+ messages in thread
From: tip-bot for Sebastian Andrzej Siewior @ 2019-08-01 15:56 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: tglx, peterz, bigeasy, hpa, mingo, linux-kernel

Commit-ID:  8dd1382c4f8562ee7395c030047a8cc2bc853042
Gitweb:     https://git.kernel.org/tip/8dd1382c4f8562ee7395c030047a8cc2bc853042
Author:     Sebastian Andrzej Siewior <bigeasy@linutronix.de>
AuthorDate: Fri, 26 Jul 2019 20:30:56 +0200
Committer:  Thomas Gleixner <tglx@linutronix.de>
CommitDate: Thu, 1 Aug 2019 17:43:18 +0200

tick: Mark tick related hrtimers to expiry in hard interrupt context

The tick related hrtimers, which drive the scheduler tick and hrtimer based
broadcasting are required to expire in hard interrupt context for obvious
reasons.

Mark them so PREEMPT_RT kernels wont move them to soft interrupt expiry.

Make the horribly formatted RCU_NONIDLE bracket maze readable while at it.

No functional change, 

[ tglx: Split out from larger combo patch. Add changelog ]

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20190726185753.459144407@linutronix.de


---
 kernel/time/tick-broadcast-hrtimer.c | 13 +++++++++----
 kernel/time/tick-sched.c             | 15 +++++++++------
 2 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/kernel/time/tick-broadcast-hrtimer.c b/kernel/time/tick-broadcast-hrtimer.c
index 5be6154e2fd2..c1f5bb590b5e 100644
--- a/kernel/time/tick-broadcast-hrtimer.c
+++ b/kernel/time/tick-broadcast-hrtimer.c
@@ -59,11 +59,16 @@ static int bc_set_next(ktime_t expires, struct clock_event_device *bc)
 	 * hrtimer_{start/cancel} functions call into tracing,
 	 * calls to these functions must be bound within RCU_NONIDLE.
 	 */
-	RCU_NONIDLE({
+	RCU_NONIDLE(
+		{
 			bc_moved = hrtimer_try_to_cancel(&bctimer) >= 0;
-			if (bc_moved)
+			if (bc_moved) {
 				hrtimer_start(&bctimer, expires,
-					      HRTIMER_MODE_ABS_PINNED);});
+					      HRTIMER_MODE_ABS_PINNED_HARD);
+			}
+		}
+	);
+
 	if (bc_moved) {
 		/* Bind the "device" to the cpu */
 		bc->bound_on = smp_processor_id();
@@ -104,7 +109,7 @@ static enum hrtimer_restart bc_handler(struct hrtimer *t)
 
 void tick_setup_hrtimer_broadcast(void)
 {
-	hrtimer_init(&bctimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+	hrtimer_init(&bctimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
 	bctimer.function = bc_handler;
 	clockevents_register_device(&ce_broadcast_hrtimer);
 }
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index be9707f68024..01ff32a02af2 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -634,10 +634,12 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
 	/* Forward the time to expire in the future */
 	hrtimer_forward(&ts->sched_timer, now, tick_period);
 
-	if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
-		hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED);
-	else
+	if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
+		hrtimer_start_expires(&ts->sched_timer,
+				      HRTIMER_MODE_ABS_PINNED_HARD);
+	} else {
 		tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
+	}
 
 	/*
 	 * Reset to make sure next tick stop doesn't get fooled by past
@@ -802,7 +804,8 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
 	}
 
 	if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
-		hrtimer_start(&ts->sched_timer, tick, HRTIMER_MODE_ABS_PINNED);
+		hrtimer_start(&ts->sched_timer, tick,
+			      HRTIMER_MODE_ABS_PINNED_HARD);
 	} else {
 		hrtimer_set_expires(&ts->sched_timer, tick);
 		tick_program_event(tick, 1);
@@ -1327,7 +1330,7 @@ void tick_setup_sched_timer(void)
 	/*
 	 * Emulate tick processing via per-CPU hrtimers:
 	 */
-	hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+	hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
 	ts->sched_timer.function = tick_sched_timer;
 
 	/* Get the next period (per-CPU) */
@@ -1342,7 +1345,7 @@ void tick_setup_sched_timer(void)
 	}
 
 	hrtimer_forward(&ts->sched_timer, now, tick_period);
-	hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED);
+	hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED_HARD);
 	tick_nohz_activate(ts, NOHZ_MODE_HIGHRES);
 }
 #endif /* HIGH_RES_TIMERS */

^ permalink raw reply	[flat|nested] 61+ messages in thread

* [tip:timers/core] hrtimer: Move unmarked hrtimers to soft interrupt expiry on RT
  2019-07-26 18:30 ` [patch 09/12] hrtimer: Move unmarked hrtimers to soft interrupt expiry on RT Thomas Gleixner
  2019-07-30 22:15   ` [tip:timers/core] " tip-bot for Sebastian Andrzej Siewior
@ 2019-08-01 15:57   ` tip-bot for Sebastian Andrzej Siewior
  2019-08-01 19:02   ` tip-bot for Sebastian Andrzej Siewior
  2 siblings, 0 replies; 61+ messages in thread
From: tip-bot for Sebastian Andrzej Siewior @ 2019-08-01 15:57 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: peterz, hpa, mingo, linux-kernel, tglx, bigeasy

Commit-ID:  edd2f987491fb47949a9612743435d6d0f61f614
Gitweb:     https://git.kernel.org/tip/edd2f987491fb47949a9612743435d6d0f61f614
Author:     Sebastian Andrzej Siewior <bigeasy@linutronix.de>
AuthorDate: Fri, 26 Jul 2019 20:30:57 +0200
Committer:  Thomas Gleixner <tglx@linutronix.de>
CommitDate: Thu, 1 Aug 2019 17:43:19 +0200

hrtimer: Move unmarked hrtimers to soft interrupt expiry on RT

On PREEMPT_RT not all hrtimers can be expired in hard interrupt context
even if that is perfectly fine on a PREEMPT_RT=n kernel, e.g. because they
take regular spinlocks. Also for latency reasons PREEMPT_RT tries to defer
most hrtimers' expiry into softirq context.

hrtimers marked with HRTIMER_MODE_HARD must be kept in hard interrupt
context expiry mode. Add the required logic.

No functional change for PREEMPT_RT=n kernels.

[ tglx: Split out of a larger combo patch. Added changelog ]

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20190726185753.551967692@linutronix.de


---
 kernel/time/hrtimer.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 0ace301a56f4..90dcc4d95e91 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1275,8 +1275,17 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
 			   enum hrtimer_mode mode)
 {
 	bool softtimer = !!(mode & HRTIMER_MODE_SOFT);
-	int base = softtimer ? HRTIMER_MAX_CLOCK_BASES / 2 : 0;
 	struct hrtimer_cpu_base *cpu_base;
+	int base;
+
+	/*
+	 * On PREEMPT_RT enabled kernels hrtimers which are not explicitely
+	 * marked for hard interrupt expiry mode are moved into soft
+	 * interrupt context for latency reasons and because the callbacks
+	 * can invoke functions which might sleep on RT, e.g. spin_lock().
+	 */
+	if (IS_ENABLED(CONFIG_PREEMPT_RT) && !(mode & HRTIMER_MODE_HARD))
+		softtimer = true;
 
 	memset(timer, 0, sizeof(struct hrtimer));
 
@@ -1290,6 +1299,7 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
 	if (clock_id == CLOCK_REALTIME && mode & HRTIMER_MODE_REL)
 		clock_id = CLOCK_MONOTONIC;
 
+	base = softtimer ? HRTIMER_MAX_CLOCK_BASES / 2 : 0;
 	base += hrtimer_clockid_to_base(clock_id);
 	timer->is_soft = softtimer;
 	timer->is_hard = !softtimer;

^ permalink raw reply	[flat|nested] 61+ messages in thread

* [tip:timers/core] hrtimer: Determine hard/soft expiry mode for hrtimer sleepers on RT
  2019-07-26 18:30 ` [patch 10/12] hrtimer: Determine hard/soft expiry mode for hrtimer sleepers " Thomas Gleixner
                     ` (2 preceding siblings ...)
  2019-07-30 22:16   ` [tip:timers/core] " tip-bot for Sebastian Andrzej Siewior
@ 2019-08-01 15:58   ` tip-bot for Sebastian Andrzej Siewior
  2019-08-01 19:03   ` tip-bot for Sebastian Andrzej Siewior
  4 siblings, 0 replies; 61+ messages in thread
From: tip-bot for Sebastian Andrzej Siewior @ 2019-08-01 15:58 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: mingo, bigeasy, peterz, linux-kernel, tglx, rostedt, hpa

Commit-ID:  876f28e7bdf152da7514a28c79f83e61e0c6d30e
Gitweb:     https://git.kernel.org/tip/876f28e7bdf152da7514a28c79f83e61e0c6d30e
Author:     Sebastian Andrzej Siewior <bigeasy@linutronix.de>
AuthorDate: Fri, 26 Jul 2019 20:30:58 +0200
Committer:  Thomas Gleixner <tglx@linutronix.de>
CommitDate: Thu, 1 Aug 2019 17:43:19 +0200

hrtimer: Determine hard/soft expiry mode for hrtimer sleepers on RT

On PREEMPT_RT enabled kernels hrtimers which are not explicitely marked for
hard interrupt expiry mode are moved into soft interrupt context either for
latency reasons or because the hrtimer callback takes regular spinlocks or
invokes other functions which are not suitable for hard interrupt context
on PREEMPT_RT.

The hrtimer_sleeper callback is RT compatible in hard interrupt context,
but there is a latency concern: Untrusted userspace can spawn many threads
which arm timers for the same expiry time on the same CPU. On expiry that
causes a latency spike due to the wakeup of a gazillion threads.

OTOH, priviledged real-time user space applications rely on the low latency
of hard interrupt wakeups. These syscall related wakeups are all based on
hrtimer sleepers.

If the current task is in a real-time scheduling class, mark the mode for
hard interrupt expiry.

[ tglx: Split out of a larger combo patch. Added changelog ]

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20190726185753.645792403@linutronix.de


---
 kernel/time/hrtimer.c | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 90dcc4d95e91..c101f88ae8aa 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1676,6 +1676,16 @@ static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer)
 void hrtimer_sleeper_start_expires(struct hrtimer_sleeper *sl,
 				   enum hrtimer_mode mode)
 {
+	/*
+	 * Make the enqueue delivery mode check work on RT. If the sleeper
+	 * was initialized for hard interrupt delivery, force the mode bit.
+	 * This is a special case for hrtimer_sleepers because
+	 * hrtimer_init_sleeper() determines the delivery mode on RT so the
+	 * fiddling with this decision is avoided at the call sites.
+	 */
+	if (IS_ENABLED(CONFIG_PREEMPT_RT) && sl->timer.is_hard)
+		mode |= HRTIMER_MODE_HARD;
+
 	hrtimer_start_expires(&sl->timer, mode);
 }
 EXPORT_SYMBOL_GPL(hrtimer_sleeper_start_expires);
@@ -1683,6 +1693,30 @@ EXPORT_SYMBOL_GPL(hrtimer_sleeper_start_expires);
 static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
 				   clockid_t clock_id, enum hrtimer_mode mode)
 {
+	/*
+	 * On PREEMPT_RT enabled kernels hrtimers which are not explicitely
+	 * marked for hard interrupt expiry mode are moved into soft
+	 * interrupt context either for latency reasons or because the
+	 * hrtimer callback takes regular spinlocks or invokes other
+	 * functions which are not suitable for hard interrupt context on
+	 * PREEMPT_RT.
+	 *
+	 * The hrtimer_sleeper callback is RT compatible in hard interrupt
+	 * context, but there is a latency concern: Untrusted userspace can
+	 * spawn many threads which arm timers for the same expiry time on
+	 * the same CPU. That causes a latency spike due to the wakeup of
+	 * a gazillion threads.
+	 *
+	 * OTOH, priviledged real-time user space applications rely on the
+	 * low latency of hard interrupt wakeups. If the current task is in
+	 * a real-time scheduling class, mark the mode for hard interrupt
+	 * expiry.
+	 */
+	if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
+		if (task_is_realtime(current) && !(mode & HRTIMER_MODE_SOFT))
+			mode |= HRTIMER_MODE_HARD;
+	}
+
 	__hrtimer_init(&sl->timer, clock_id, mode);
 	sl->timer.function = hrtimer_wakeup;
 	sl->task = current;

^ permalink raw reply	[flat|nested] 61+ messages in thread

* [tip:timers/core] hrtimer: Prepare support for PREEMPT_RT
  2019-07-26 18:30 ` [patch 11/12] hrtimer: Prepare support for PREEMPT_RT Thomas Gleixner
  2019-07-28  9:06   ` Juergen Gross
  2019-07-30 22:17   ` [tip:timers/core] " tip-bot for Anna-Maria Gleixner
@ 2019-08-01 15:58   ` tip-bot for Anna-Maria Gleixner
  2019-08-01 19:04   ` tip-bot for Anna-Maria Gleixner
  3 siblings, 0 replies; 61+ messages in thread
From: tip-bot for Anna-Maria Gleixner @ 2019-08-01 15:58 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: peterz, tglx, linux-kernel, anna-maria, mingo, hpa, bigeasy

Commit-ID:  37226a1807c5f41537190462362e3e2739e22f13
Gitweb:     https://git.kernel.org/tip/37226a1807c5f41537190462362e3e2739e22f13
Author:     Anna-Maria Gleixner <anna-maria@linutronix.de>
AuthorDate: Fri, 26 Jul 2019 20:30:59 +0200
Committer:  Thomas Gleixner <tglx@linutronix.de>
CommitDate: Thu, 1 Aug 2019 17:43:19 +0200

hrtimer: Prepare support for PREEMPT_RT

When PREEMPT_RT is enabled, the soft interrupt thread can be preempted.  If
the soft interrupt thread is preempted in the middle of a timer callback,
then calling hrtimer_cancel() can lead to two issues:

  - If the caller is on a remote CPU then it has to spin wait for the timer
    handler to complete. This can result in unbound priority inversion.

  - If the caller originates from the task which preempted the timer
    handler on the same CPU, then spin waiting for the timer handler to
    complete is never going to end.

To avoid these issues, add a new lock to the timer base which is held
around the execution of the timer callbacks. If hrtimer_cancel() detects
that the timer callback is currently running, it blocks on the expiry
lock. When the callback is finished, the expiry lock is dropped by the
softirq thread which wakes up the waiter and the system makes progress.

This addresses both the priority inversion and the life lock issues.

The same issue can happen in virtual machines when the vCPU which runs a
timer callback is scheduled out. If a second vCPU of the same guest calls
hrtimer_cancel() it will spin wait for the other vCPU to be scheduled back
in. The expiry lock mechanism would avoid that. It'd be trivial to enable
this when paravirt spinlocks are enabled in a guest, but it's not clear
whether this is an actual problem in the wild, so for now it's an RT only
mechanism.

[ tglx: Refactored it for mainline ]

Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20190726185753.737767218@linutronix.de


---
 include/linux/hrtimer.h | 16 +++++++++
 kernel/time/hrtimer.c   | 95 +++++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 105 insertions(+), 6 deletions(-)

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 7d0d0a36a8f4..5df4bcff96d5 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -192,6 +192,10 @@ enum  hrtimer_base_type {
  * @nr_retries:		Total number of hrtimer interrupt retries
  * @nr_hangs:		Total number of hrtimer interrupt hangs
  * @max_hang_time:	Maximum time spent in hrtimer_interrupt
+ * @softirq_expiry_lock: Lock which is taken while softirq based hrtimer are
+ *			 expired
+ * @timer_waiters:	A hrtimer_cancel() invocation waits for the timer
+ *			callback to finish.
  * @expires_next:	absolute time of the next event, is required for remote
  *			hrtimer enqueue; it is the total first expiry time (hard
  *			and soft hrtimer are taken into account)
@@ -218,6 +222,10 @@ struct hrtimer_cpu_base {
 	unsigned short			nr_retries;
 	unsigned short			nr_hangs;
 	unsigned int			max_hang_time;
+#endif
+#ifdef CONFIG_PREEMPT_RT
+	spinlock_t			softirq_expiry_lock;
+	atomic_t			timer_waiters;
 #endif
 	ktime_t				expires_next;
 	struct hrtimer			*next_timer;
@@ -350,6 +358,14 @@ extern void hrtimers_resume(void);
 
 DECLARE_PER_CPU(struct tick_device, tick_cpu_device);
 
+#ifdef CONFIG_PREEMPT_RT
+void hrtimer_cancel_wait_running(const struct hrtimer *timer);
+#else
+static inline void hrtimer_cancel_wait_running(struct hrtimer *timer)
+{
+	cpu_relax();
+}
+#endif
 
 /* Exported timer functions: */
 
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index c101f88ae8aa..499122752649 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1162,6 +1162,82 @@ int hrtimer_try_to_cancel(struct hrtimer *timer)
 }
 EXPORT_SYMBOL_GPL(hrtimer_try_to_cancel);
 
+#ifdef CONFIG_PREEMPT_RT
+static void hrtimer_cpu_base_init_expiry_lock(struct hrtimer_cpu_base *base)
+{
+	spin_lock_init(&base->softirq_expiry_lock);
+}
+
+static void hrtimer_cpu_base_lock_expiry(struct hrtimer_cpu_base *base)
+{
+	spin_lock(&base->softirq_expiry_lock);
+}
+
+static void hrtimer_cpu_base_unlock_expiry(struct hrtimer_cpu_base *base)
+{
+	spin_unlock(&base->softirq_expiry_lock);
+}
+
+/*
+ * The counterpart to hrtimer_cancel_wait_running().
+ *
+ * If there is a waiter for cpu_base->expiry_lock, then it was waiting for
+ * the timer callback to finish. Drop expiry_lock and reaquire it. That
+ * allows the waiter to acquire the lock and make progress.
+ */
+static void hrtimer_sync_wait_running(struct hrtimer_cpu_base *cpu_base,
+				      unsigned long flags)
+{
+	if (atomic_read(&cpu_base->timer_waiters)) {
+		raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
+		spin_unlock(&cpu_base->softirq_expiry_lock);
+		spin_lock(&cpu_base->softirq_expiry_lock);
+		raw_spin_lock_irq(&cpu_base->lock);
+	}
+}
+
+/*
+ * This function is called on PREEMPT_RT kernels when the fast path
+ * deletion of a timer failed because the timer callback function was
+ * running.
+ *
+ * This prevents priority inversion, if the softirq thread on a remote CPU
+ * got preempted, and it prevents a life lock when the task which tries to
+ * delete a timer preempted the softirq thread running the timer callback
+ * function.
+ */
+void hrtimer_cancel_wait_running(const struct hrtimer *timer)
+{
+	struct hrtimer_clock_base *base = timer->base;
+
+	if (!timer->is_soft || !base || !base->cpu_base) {
+		cpu_relax();
+		return;
+	}
+
+	/*
+	 * Mark the base as contended and grab the expiry lock, which is
+	 * held by the softirq across the timer callback. Drop the lock
+	 * immediately so the softirq can expire the next timer. In theory
+	 * the timer could already be running again, but that's more than
+	 * unlikely and just causes another wait loop.
+	 */
+	atomic_inc(&base->cpu_base->timer_waiters);
+	spin_lock_bh(&base->cpu_base->softirq_expiry_lock);
+	atomic_dec(&base->cpu_base->timer_waiters);
+	spin_unlock_bh(&base->cpu_base->softirq_expiry_lock);
+}
+#else
+static inline void
+hrtimer_cpu_base_init_expiry_lock(struct hrtimer_cpu_base *base) { }
+static inline void
+hrtimer_cpu_base_lock_expiry(struct hrtimer_cpu_base *base) { }
+static inline void
+hrtimer_cpu_base_unlock_expiry(struct hrtimer_cpu_base *base) { }
+static inline void hrtimer_sync_wait_running(struct hrtimer_cpu_base *base,
+					     unsigned long flags) { }
+#endif
+
 /**
  * hrtimer_cancel - cancel a timer and wait for the handler to finish.
  * @timer:	the timer to be cancelled
@@ -1172,13 +1248,15 @@ EXPORT_SYMBOL_GPL(hrtimer_try_to_cancel);
  */
 int hrtimer_cancel(struct hrtimer *timer)
 {
-	for (;;) {
-		int ret = hrtimer_try_to_cancel(timer);
+	int ret;
 
-		if (ret >= 0)
-			return ret;
-		cpu_relax();
-	}
+	do {
+		ret = hrtimer_try_to_cancel(timer);
+
+		if (ret < 0)
+			hrtimer_cancel_wait_running(timer);
+	} while (ret < 0);
+	return ret;
 }
 EXPORT_SYMBOL_GPL(hrtimer_cancel);
 
@@ -1475,6 +1553,8 @@ static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now,
 				break;
 
 			__run_hrtimer(cpu_base, base, timer, &basenow, flags);
+			if (active_mask == HRTIMER_ACTIVE_SOFT)
+				hrtimer_sync_wait_running(cpu_base, flags);
 		}
 	}
 }
@@ -1485,6 +1565,7 @@ static __latent_entropy void hrtimer_run_softirq(struct softirq_action *h)
 	unsigned long flags;
 	ktime_t now;
 
+	hrtimer_cpu_base_lock_expiry(cpu_base);
 	raw_spin_lock_irqsave(&cpu_base->lock, flags);
 
 	now = hrtimer_update_base(cpu_base);
@@ -1494,6 +1575,7 @@ static __latent_entropy void hrtimer_run_softirq(struct softirq_action *h)
 	hrtimer_update_softirq_timer(cpu_base, true);
 
 	raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
+	hrtimer_cpu_base_unlock_expiry(cpu_base);
 }
 
 #ifdef CONFIG_HIGH_RES_TIMERS
@@ -1897,6 +1979,7 @@ int hrtimers_prepare_cpu(unsigned int cpu)
 	cpu_base->softirq_next_timer = NULL;
 	cpu_base->expires_next = KTIME_MAX;
 	cpu_base->softirq_expires_next = KTIME_MAX;
+	hrtimer_cpu_base_init_expiry_lock(cpu_base);
 	return 0;
 }
 

^ permalink raw reply	[flat|nested] 61+ messages in thread

* [tip:timers/core] timers: Prepare support for PREEMPT_RT
  2019-07-26 18:31 ` [patch 12/12] timers: Prepare support for PREEMPT_RT Thomas Gleixner
  2019-07-30 22:17   ` [tip:timers/core] " tip-bot for Anna-Maria Gleixner
@ 2019-08-01 15:59   ` tip-bot for Anna-Maria Gleixner
  2019-08-01 19:04   ` tip-bot for Anna-Maria Gleixner
  2 siblings, 0 replies; 61+ messages in thread
From: tip-bot for Anna-Maria Gleixner @ 2019-08-01 15:59 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: tglx, linux-kernel, anna-maria, bigeasy, mingo, hpa, peterz

Commit-ID:  1c2df8ac9292ea1fe6c958c198bf6bc5c768acf5
Gitweb:     https://git.kernel.org/tip/1c2df8ac9292ea1fe6c958c198bf6bc5c768acf5
Author:     Anna-Maria Gleixner <anna-maria@linutronix.de>
AuthorDate: Fri, 26 Jul 2019 20:31:00 +0200
Committer:  Thomas Gleixner <tglx@linutronix.de>
CommitDate: Thu, 1 Aug 2019 17:43:20 +0200

timers: Prepare support for PREEMPT_RT

When PREEMPT_RT is enabled, the soft interrupt thread can be preempted.  If
the soft interrupt thread is preempted in the middle of a timer callback,
then calling del_timer_sync() can lead to two issues:

  - If the caller is on a remote CPU then it has to spin wait for the timer
    handler to complete. This can result in unbound priority inversion.

  - If the caller originates from the task which preempted the timer
    handler on the same CPU, then spin waiting for the timer handler to
    complete is never going to end.

To avoid these issues, add a new lock to the timer base which is held
around the execution of the timer callbacks. If del_timer_sync() detects
that the timer callback is currently running, it blocks on the expiry
lock. When the callback is finished, the expiry lock is dropped by the
softirq thread which wakes up the waiter and the system makes progress.

This addresses both the priority inversion and the life lock issues.

This mechanism is not used for timers which are marked IRQSAFE as for those
preemption is disabled accross the callback and therefore this situation
cannot happen. The callbacks for such timers need to be individually
audited for RT compliance.

The same issue can happen in virtual machines when the vCPU which runs a
timer callback is scheduled out. If a second vCPU of the same guest calls
del_timer_sync() it will spin wait for the other vCPU to be scheduled back
in. The expiry lock mechanism would avoid that. It'd be trivial to enable
this when paravirt spinlocks are enabled in a guest, but it's not clear
whether this is an actual problem in the wild, so for now it's an RT only
mechanism.

As the softirq thread can be preempted with PREEMPT_RT=y, the SMP variant
of del_timer_sync() needs to be used on UP as well.

[ tglx: Refactored it for mainline ]

Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20190726185753.832418500@linutronix.de


---
 include/linux/timer.h |   2 +-
 kernel/time/timer.c   | 103 ++++++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 96 insertions(+), 9 deletions(-)

diff --git a/include/linux/timer.h b/include/linux/timer.h
index 282e4f2a532a..1e6650ed066d 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -183,7 +183,7 @@ extern void add_timer(struct timer_list *timer);
 
 extern int try_to_del_timer_sync(struct timer_list *timer);
 
-#ifdef CONFIG_SMP
+#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)
   extern int del_timer_sync(struct timer_list *timer);
 #else
 # define del_timer_sync(t)		del_timer(t)
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 343c7ba33b1c..673c6a0f0c45 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -196,6 +196,10 @@ EXPORT_SYMBOL(jiffies_64);
 struct timer_base {
 	raw_spinlock_t		lock;
 	struct timer_list	*running_timer;
+#ifdef CONFIG_PREEMPT_RT
+	spinlock_t		expiry_lock;
+	atomic_t		timer_waiters;
+#endif
 	unsigned long		clk;
 	unsigned long		next_expiry;
 	unsigned int		cpu;
@@ -1227,7 +1231,78 @@ int try_to_del_timer_sync(struct timer_list *timer)
 }
 EXPORT_SYMBOL(try_to_del_timer_sync);
 
-#ifdef CONFIG_SMP
+#ifdef CONFIG_PREEMPT_RT
+static __init void timer_base_init_expiry_lock(struct timer_base *base)
+{
+	spin_lock_init(&base->expiry_lock);
+}
+
+static inline void timer_base_lock_expiry(struct timer_base *base)
+{
+	spin_lock(&base->expiry_lock);
+}
+
+static inline void timer_base_unlock_expiry(struct timer_base *base)
+{
+	spin_unlock(&base->expiry_lock);
+}
+
+/*
+ * The counterpart to del_timer_wait_running().
+ *
+ * If there is a waiter for base->expiry_lock, then it was waiting for the
+ * timer callback to finish. Drop expiry_lock and reaquire it. That allows
+ * the waiter to acquire the lock and make progress.
+ */
+static void timer_sync_wait_running(struct timer_base *base)
+{
+	if (atomic_read(&base->timer_waiters)) {
+		spin_unlock(&base->expiry_lock);
+		spin_lock(&base->expiry_lock);
+	}
+}
+
+/*
+ * This function is called on PREEMPT_RT kernels when the fast path
+ * deletion of a timer failed because the timer callback function was
+ * running.
+ *
+ * This prevents priority inversion, if the softirq thread on a remote CPU
+ * got preempted, and it prevents a life lock when the task which tries to
+ * delete a timer preempted the softirq thread running the timer callback
+ * function.
+ */
+static void del_timer_wait_running(struct timer_list *timer)
+{
+	u32 tf;
+
+	tf = READ_ONCE(timer->flags);
+	if (!(tf & TIMER_MIGRATING)) {
+		struct timer_base *base = get_timer_base(tf);
+
+		/*
+		 * Mark the base as contended and grab the expiry lock,
+		 * which is held by the softirq across the timer
+		 * callback. Drop the lock immediately so the softirq can
+		 * expire the next timer. In theory the timer could already
+		 * be running again, but that's more than unlikely and just
+		 * causes another wait loop.
+		 */
+		atomic_inc(&base->timer_waiters);
+		spin_lock_bh(&base->expiry_lock);
+		atomic_dec(&base->timer_waiters);
+		spin_unlock_bh(&base->expiry_lock);
+	}
+}
+#else
+static inline void timer_base_init_expiry_lock(struct timer_base *base) { }
+static inline void timer_base_lock_expiry(struct timer_base *base) { }
+static inline void timer_base_unlock_expiry(struct timer_base *base) { }
+static inline void timer_sync_wait_running(struct timer_base *base) { }
+static inline void del_timer_wait_running(struct timer_list *timer) { }
+#endif
+
+#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)
 /**
  * del_timer_sync - deactivate a timer and wait for the handler to finish.
  * @timer: the timer to be deactivated
@@ -1266,6 +1341,8 @@ EXPORT_SYMBOL(try_to_del_timer_sync);
  */
 int del_timer_sync(struct timer_list *timer)
 {
+	int ret;
+
 #ifdef CONFIG_LOCKDEP
 	unsigned long flags;
 
@@ -1283,12 +1360,17 @@ int del_timer_sync(struct timer_list *timer)
 	 * could lead to deadlock.
 	 */
 	WARN_ON(in_irq() && !(timer->flags & TIMER_IRQSAFE));
-	for (;;) {
-		int ret = try_to_del_timer_sync(timer);
-		if (ret >= 0)
-			return ret;
-		cpu_relax();
-	}
+
+	do {
+		ret = try_to_del_timer_sync(timer);
+
+		if (unlikely(ret < 0)) {
+			del_timer_wait_running(timer);
+			cpu_relax();
+		}
+	} while (ret < 0);
+
+	return ret;
 }
 EXPORT_SYMBOL(del_timer_sync);
 #endif
@@ -1360,10 +1442,13 @@ static void expire_timers(struct timer_base *base, struct hlist_head *head)
 		if (timer->flags & TIMER_IRQSAFE) {
 			raw_spin_unlock(&base->lock);
 			call_timer_fn(timer, fn, baseclk);
+			base->running_timer = NULL;
 			raw_spin_lock(&base->lock);
 		} else {
 			raw_spin_unlock_irq(&base->lock);
 			call_timer_fn(timer, fn, baseclk);
+			base->running_timer = NULL;
+			timer_sync_wait_running(base);
 			raw_spin_lock_irq(&base->lock);
 		}
 	}
@@ -1658,6 +1743,7 @@ static inline void __run_timers(struct timer_base *base)
 	if (!time_after_eq(jiffies, base->clk))
 		return;
 
+	timer_base_lock_expiry(base);
 	raw_spin_lock_irq(&base->lock);
 
 	/*
@@ -1684,8 +1770,8 @@ static inline void __run_timers(struct timer_base *base)
 		while (levels--)
 			expire_timers(base, heads + levels);
 	}
-	base->running_timer = NULL;
 	raw_spin_unlock_irq(&base->lock);
+	timer_base_unlock_expiry(base);
 }
 
 /*
@@ -1930,6 +2016,7 @@ static void __init init_timer_cpu(int cpu)
 		base->cpu = cpu;
 		raw_spin_lock_init(&base->lock);
 		base->clk = jiffies;
+		timer_base_init_expiry_lock(base);
 	}
 }
 

^ permalink raw reply	[flat|nested] 61+ messages in thread

* [tip:timers/core] sched: Mark hrtimers to expire in hard interrupt context
  2019-07-26 18:30 ` [patch 04/12] sched: Mark hrtimers to expire in hard interrupt context Thomas Gleixner
  2019-07-30 22:11   ` [tip:timers/core] " tip-bot for Thomas Gleixner
  2019-08-01 15:53   ` tip-bot for Sebastian Andrzej Siewior
@ 2019-08-01 18:58   ` tip-bot for Sebastian Andrzej Siewior
  2 siblings, 0 replies; 61+ messages in thread
From: tip-bot for Sebastian Andrzej Siewior @ 2019-08-01 18:58 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: bigeasy, tglx, mingo, hpa, linux-kernel, peterz

Commit-ID:  d5096aa65acd0ef2d18ac8247260ab4481ade399
Gitweb:     https://git.kernel.org/tip/d5096aa65acd0ef2d18ac8247260ab4481ade399
Author:     Sebastian Andrzej Siewior <bigeasy@linutronix.de>
AuthorDate: Fri, 26 Jul 2019 20:30:52 +0200
Committer:  Thomas Gleixner <tglx@linutronix.de>
CommitDate: Thu, 1 Aug 2019 20:51:19 +0200

sched: Mark hrtimers to expire in hard interrupt context

The scheduler related hrtimers need to expire in hard interrupt context
even on PREEMPT_RT enabled kernels. Mark then as such.

No functional change.

[ tglx: Split out from larger combo patch. Add changelog. ]

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20190726185753.077004842@linutronix.de



---
 kernel/sched/core.c     | 6 +++---
 kernel/sched/deadline.c | 4 ++--
 kernel/sched/rt.c       | 7 ++++---
 3 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2b037f195473..389e0993fbb4 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -255,7 +255,7 @@ static void __hrtick_restart(struct rq *rq)
 {
 	struct hrtimer *timer = &rq->hrtick_timer;
 
-	hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED);
+	hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED_HARD);
 }
 
 /*
@@ -314,7 +314,7 @@ void hrtick_start(struct rq *rq, u64 delay)
 	 */
 	delay = max_t(u64, delay, 10000LL);
 	hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay),
-		      HRTIMER_MODE_REL_PINNED);
+		      HRTIMER_MODE_REL_PINNED_HARD);
 }
 #endif /* CONFIG_SMP */
 
@@ -328,7 +328,7 @@ static void hrtick_rq_init(struct rq *rq)
 	rq->hrtick_csd.info = rq;
 #endif
 
-	hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
 	rq->hrtick_timer.function = hrtick;
 }
 #else	/* CONFIG_SCHED_HRTICK */
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index ef5b9f6b1d42..0359612d5443 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -923,7 +923,7 @@ static int start_dl_timer(struct task_struct *p)
 	 */
 	if (!hrtimer_is_queued(timer)) {
 		get_task_struct(p);
-		hrtimer_start(timer, act, HRTIMER_MODE_ABS);
+		hrtimer_start(timer, act, HRTIMER_MODE_ABS_HARD);
 	}
 
 	return 1;
@@ -1053,7 +1053,7 @@ void init_dl_task_timer(struct sched_dl_entity *dl_se)
 {
 	struct hrtimer *timer = &dl_se->dl_timer;
 
-	hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
 	timer->function = dl_task_timer;
 }
 
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index a532558a5176..da3e85e61013 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -45,8 +45,8 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime)
 
 	raw_spin_lock_init(&rt_b->rt_runtime_lock);
 
-	hrtimer_init(&rt_b->rt_period_timer,
-			CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	hrtimer_init(&rt_b->rt_period_timer, CLOCK_MONOTONIC,
+		     HRTIMER_MODE_REL_HARD);
 	rt_b->rt_period_timer.function = sched_rt_period_timer;
 }
 
@@ -67,7 +67,8 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
 		 * to update the period.
 		 */
 		hrtimer_forward_now(&rt_b->rt_period_timer, ns_to_ktime(0));
-		hrtimer_start_expires(&rt_b->rt_period_timer, HRTIMER_MODE_ABS_PINNED);
+		hrtimer_start_expires(&rt_b->rt_period_timer,
+				      HRTIMER_MODE_ABS_PINNED_HARD);
 	}
 	raw_spin_unlock(&rt_b->rt_runtime_lock);
 }

^ permalink raw reply	[flat|nested] 61+ messages in thread

* [tip:timers/core] perf/core: Mark hrtimers to expire in hard interrupt context
  2019-07-26 18:30 ` [patch 05/12] perf/core: " Thomas Gleixner
  2019-07-30 22:12   ` [tip:timers/core] " tip-bot for Thomas Gleixner
  2019-08-01 15:54   ` tip-bot for Sebastian Andrzej Siewior
@ 2019-08-01 18:59   ` tip-bot for Sebastian Andrzej Siewior
  2 siblings, 0 replies; 61+ messages in thread
From: tip-bot for Sebastian Andrzej Siewior @ 2019-08-01 18:59 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: linux-kernel, bigeasy, tglx, hpa, peterz, mingo

Commit-ID:  30f9028b6c43fd17c006550594ea3dbb87afbf80
Gitweb:     https://git.kernel.org/tip/30f9028b6c43fd17c006550594ea3dbb87afbf80
Author:     Sebastian Andrzej Siewior <bigeasy@linutronix.de>
AuthorDate: Fri, 26 Jul 2019 20:30:53 +0200
Committer:  Thomas Gleixner <tglx@linutronix.de>
CommitDate: Thu, 1 Aug 2019 20:51:20 +0200

perf/core: Mark hrtimers to expire in hard interrupt context

To guarantee that the multiplexing mechanism and the hrtimer driven events
work on PREEMPT_RT enabled kernels it's required that the related hrtimers
expire in hard interrupt context. Mark them so PREEMPT_RT kernels wont
defer them to soft interrupt context.

No functional change.

[ tglx: Split out of larger combo patch. Added changelog ]

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20190726185753.169509224@linutronix.de



---
 kernel/events/core.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 026a14541a38..9d623e257a51 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1103,7 +1103,7 @@ static void __perf_mux_hrtimer_init(struct perf_cpu_context *cpuctx, int cpu)
 	cpuctx->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * interval);
 
 	raw_spin_lock_init(&cpuctx->hrtimer_lock);
-	hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
+	hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD);
 	timer->function = perf_mux_hrtimer_handler;
 }
 
@@ -1121,7 +1121,7 @@ static int perf_mux_hrtimer_restart(struct perf_cpu_context *cpuctx)
 	if (!cpuctx->hrtimer_active) {
 		cpuctx->hrtimer_active = 1;
 		hrtimer_forward_now(timer, cpuctx->hrtimer_interval);
-		hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED);
+		hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED_HARD);
 	}
 	raw_spin_unlock_irqrestore(&cpuctx->hrtimer_lock, flags);
 
@@ -9491,7 +9491,7 @@ static void perf_swevent_start_hrtimer(struct perf_event *event)
 		period = max_t(u64, 10000, hwc->sample_period);
 	}
 	hrtimer_start(&hwc->hrtimer, ns_to_ktime(period),
-		      HRTIMER_MODE_REL_PINNED);
+		      HRTIMER_MODE_REL_PINNED_HARD);
 }
 
 static void perf_swevent_cancel_hrtimer(struct perf_event *event)
@@ -9513,7 +9513,7 @@ static void perf_swevent_init_hrtimer(struct perf_event *event)
 	if (!is_sampling_event(event))
 		return;
 
-	hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
 	hwc->hrtimer.function = perf_swevent_hrtimer;
 
 	/*

^ permalink raw reply	[flat|nested] 61+ messages in thread

* [tip:timers/core] watchdog: Mark watchdog_hrtimer to expire in hard interrupt context
  2019-07-26 18:30 ` [patch 06/12] watchdog: Mark watchdog_hrtimer " Thomas Gleixner
  2019-07-30 22:13   ` [tip:timers/core] " tip-bot for Sebastian Andrzej Siewior
  2019-08-01 15:55   ` tip-bot for Sebastian Andrzej Siewior
@ 2019-08-01 19:00   ` tip-bot for Sebastian Andrzej Siewior
  2 siblings, 0 replies; 61+ messages in thread
From: tip-bot for Sebastian Andrzej Siewior @ 2019-08-01 19:00 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: linux-kernel, bigeasy, mingo, hpa, peterz, tglx

Commit-ID:  d2ab4cf4943576fb060b8a69341d9e0c2a952ba7
Gitweb:     https://git.kernel.org/tip/d2ab4cf4943576fb060b8a69341d9e0c2a952ba7
Author:     Sebastian Andrzej Siewior <bigeasy@linutronix.de>
AuthorDate: Fri, 26 Jul 2019 20:30:54 +0200
Committer:  Thomas Gleixner <tglx@linutronix.de>
CommitDate: Thu, 1 Aug 2019 20:51:20 +0200

watchdog: Mark watchdog_hrtimer to expire in hard interrupt context

The watchdog hrtimer must expire in hard interrupt context even on
PREEMPT_RT=y kernels as otherwise the hard/softlockup detection logic would
not work.

No functional change.

[ tglx: Split out from larger combo patch. Added changelog ]

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20190726185753.262895510@linutronix.de



---
 kernel/watchdog.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 7f9e7b9306fe..f41334ef0971 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -490,10 +490,10 @@ static void watchdog_enable(unsigned int cpu)
 	 * Start the timer first to prevent the NMI watchdog triggering
 	 * before the timer has a chance to fire.
 	 */
-	hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
 	hrtimer->function = watchdog_timer_fn;
 	hrtimer_start(hrtimer, ns_to_ktime(sample_period),
-		      HRTIMER_MODE_REL_PINNED);
+		      HRTIMER_MODE_REL_PINNED_HARD);
 
 	/* Initialize timestamp */
 	__touch_watchdog();

^ permalink raw reply	[flat|nested] 61+ messages in thread

* [tip:timers/core] KVM: LAPIC: Mark hrtimer to expire in hard interrupt context
  2019-07-26 18:30 ` [patch 07/12] KVM: LAPIC: Mark hrtimer " Thomas Gleixner
                     ` (2 preceding siblings ...)
  2019-08-01 15:55   ` tip-bot for Sebastian Andrzej Siewior
@ 2019-08-01 19:01   ` tip-bot for Sebastian Andrzej Siewior
  3 siblings, 0 replies; 61+ messages in thread
From: tip-bot for Sebastian Andrzej Siewior @ 2019-08-01 19:01 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: bigeasy, hpa, pbonzini, mingo, peterz, tglx, linux-kernel

Commit-ID:  2c0d278f3293fc59da0d183075415ca1e8c93b40
Gitweb:     https://git.kernel.org/tip/2c0d278f3293fc59da0d183075415ca1e8c93b40
Author:     Sebastian Andrzej Siewior <bigeasy@linutronix.de>
AuthorDate: Fri, 26 Jul 2019 20:30:55 +0200
Committer:  Thomas Gleixner <tglx@linutronix.de>
CommitDate: Thu, 1 Aug 2019 20:51:20 +0200

KVM: LAPIC: Mark hrtimer to expire in hard interrupt context

On PREEMPT_RT enabled kernels unmarked hrtimers are moved into soft
interrupt expiry mode by default.

While that's not a functional requirement for the KVM local APIC timer
emulation, it's a latency issue which can be avoided by marking the timer
so hard interrupt context expiry is enforced.

No functional change.

[ tglx: Split out from larger combo patch. Add changelog. ]

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20190726185753.363363474@linutronix.de



---
 arch/x86/kvm/lapic.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 0aa158657f20..b9e516099d07 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1601,7 +1601,7 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic)
 	    likely(ns > apic->lapic_timer.timer_advance_ns)) {
 		expire = ktime_add_ns(now, ns);
 		expire = ktime_sub_ns(expire, ktimer->timer_advance_ns);
-		hrtimer_start(&ktimer->timer, expire, HRTIMER_MODE_ABS);
+		hrtimer_start(&ktimer->timer, expire, HRTIMER_MODE_ABS_HARD);
 	} else
 		apic_timer_expired(apic);
 
@@ -2302,7 +2302,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
 	apic->vcpu = vcpu;
 
 	hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
-		     HRTIMER_MODE_ABS);
+		     HRTIMER_MODE_ABS_HARD);
 	apic->lapic_timer.timer.function = apic_timer_fn;
 	if (timer_advance_ns == -1) {
 		apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_ADJUST_INIT;
@@ -2487,7 +2487,7 @@ void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
 
 	timer = &vcpu->arch.apic->lapic_timer.timer;
 	if (hrtimer_cancel(timer))
-		hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
+		hrtimer_start_expires(timer, HRTIMER_MODE_ABS_HARD);
 }
 
 /*

^ permalink raw reply	[flat|nested] 61+ messages in thread

* [tip:timers/core] tick: Mark tick related hrtimers to expiry in hard interrupt context
  2019-07-26 18:30 ` [patch 08/12] tick: Mark tick related hrtimers to expiry " Thomas Gleixner
  2019-07-30 22:14   ` [tip:timers/core] " tip-bot for Sebastian Andrzej Siewior
  2019-08-01 15:56   ` tip-bot for Sebastian Andrzej Siewior
@ 2019-08-01 19:01   ` tip-bot for Sebastian Andrzej Siewior
  2 siblings, 0 replies; 61+ messages in thread
From: tip-bot for Sebastian Andrzej Siewior @ 2019-08-01 19:01 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: mingo, linux-kernel, tglx, peterz, bigeasy, hpa

Commit-ID:  902a9f9c509053161e987778dc5836d2628f53b7
Gitweb:     https://git.kernel.org/tip/902a9f9c509053161e987778dc5836d2628f53b7
Author:     Sebastian Andrzej Siewior <bigeasy@linutronix.de>
AuthorDate: Fri, 26 Jul 2019 20:30:56 +0200
Committer:  Thomas Gleixner <tglx@linutronix.de>
CommitDate: Thu, 1 Aug 2019 20:51:21 +0200

tick: Mark tick related hrtimers to expiry in hard interrupt context

The tick related hrtimers, which drive the scheduler tick and hrtimer based
broadcasting are required to expire in hard interrupt context for obvious
reasons.

Mark them so PREEMPT_RT kernels wont move them to soft interrupt expiry.

Make the horribly formatted RCU_NONIDLE bracket maze readable while at it.

No functional change, 

[ tglx: Split out from larger combo patch. Add changelog ]

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20190726185753.459144407@linutronix.de



---
 kernel/time/tick-broadcast-hrtimer.c | 13 +++++++++----
 kernel/time/tick-sched.c             | 15 +++++++++------
 2 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/kernel/time/tick-broadcast-hrtimer.c b/kernel/time/tick-broadcast-hrtimer.c
index 5be6154e2fd2..c1f5bb590b5e 100644
--- a/kernel/time/tick-broadcast-hrtimer.c
+++ b/kernel/time/tick-broadcast-hrtimer.c
@@ -59,11 +59,16 @@ static int bc_set_next(ktime_t expires, struct clock_event_device *bc)
 	 * hrtimer_{start/cancel} functions call into tracing,
 	 * calls to these functions must be bound within RCU_NONIDLE.
 	 */
-	RCU_NONIDLE({
+	RCU_NONIDLE(
+		{
 			bc_moved = hrtimer_try_to_cancel(&bctimer) >= 0;
-			if (bc_moved)
+			if (bc_moved) {
 				hrtimer_start(&bctimer, expires,
-					      HRTIMER_MODE_ABS_PINNED);});
+					      HRTIMER_MODE_ABS_PINNED_HARD);
+			}
+		}
+	);
+
 	if (bc_moved) {
 		/* Bind the "device" to the cpu */
 		bc->bound_on = smp_processor_id();
@@ -104,7 +109,7 @@ static enum hrtimer_restart bc_handler(struct hrtimer *t)
 
 void tick_setup_hrtimer_broadcast(void)
 {
-	hrtimer_init(&bctimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+	hrtimer_init(&bctimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
 	bctimer.function = bc_handler;
 	clockevents_register_device(&ce_broadcast_hrtimer);
 }
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index be9707f68024..01ff32a02af2 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -634,10 +634,12 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
 	/* Forward the time to expire in the future */
 	hrtimer_forward(&ts->sched_timer, now, tick_period);
 
-	if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
-		hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED);
-	else
+	if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
+		hrtimer_start_expires(&ts->sched_timer,
+				      HRTIMER_MODE_ABS_PINNED_HARD);
+	} else {
 		tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
+	}
 
 	/*
 	 * Reset to make sure next tick stop doesn't get fooled by past
@@ -802,7 +804,8 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
 	}
 
 	if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
-		hrtimer_start(&ts->sched_timer, tick, HRTIMER_MODE_ABS_PINNED);
+		hrtimer_start(&ts->sched_timer, tick,
+			      HRTIMER_MODE_ABS_PINNED_HARD);
 	} else {
 		hrtimer_set_expires(&ts->sched_timer, tick);
 		tick_program_event(tick, 1);
@@ -1327,7 +1330,7 @@ void tick_setup_sched_timer(void)
 	/*
 	 * Emulate tick processing via per-CPU hrtimers:
 	 */
-	hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+	hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
 	ts->sched_timer.function = tick_sched_timer;
 
 	/* Get the next period (per-CPU) */
@@ -1342,7 +1345,7 @@ void tick_setup_sched_timer(void)
 	}
 
 	hrtimer_forward(&ts->sched_timer, now, tick_period);
-	hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED);
+	hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED_HARD);
 	tick_nohz_activate(ts, NOHZ_MODE_HIGHRES);
 }
 #endif /* HIGH_RES_TIMERS */

^ permalink raw reply	[flat|nested] 61+ messages in thread

* [tip:timers/core] hrtimer: Move unmarked hrtimers to soft interrupt expiry on RT
  2019-07-26 18:30 ` [patch 09/12] hrtimer: Move unmarked hrtimers to soft interrupt expiry on RT Thomas Gleixner
  2019-07-30 22:15   ` [tip:timers/core] " tip-bot for Sebastian Andrzej Siewior
  2019-08-01 15:57   ` tip-bot for Sebastian Andrzej Siewior
@ 2019-08-01 19:02   ` tip-bot for Sebastian Andrzej Siewior
  2 siblings, 0 replies; 61+ messages in thread
From: tip-bot for Sebastian Andrzej Siewior @ 2019-08-01 19:02 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: bigeasy, hpa, linux-kernel, mingo, peterz, tglx

Commit-ID:  f5c2f0215e36d76fbb9605283dd7535af09f5770
Gitweb:     https://git.kernel.org/tip/f5c2f0215e36d76fbb9605283dd7535af09f5770
Author:     Sebastian Andrzej Siewior <bigeasy@linutronix.de>
AuthorDate: Fri, 26 Jul 2019 20:30:57 +0200
Committer:  Thomas Gleixner <tglx@linutronix.de>
CommitDate: Thu, 1 Aug 2019 20:51:21 +0200

hrtimer: Move unmarked hrtimers to soft interrupt expiry on RT

On PREEMPT_RT not all hrtimers can be expired in hard interrupt context
even if that is perfectly fine on a PREEMPT_RT=n kernel, e.g. because they
take regular spinlocks. Also for latency reasons PREEMPT_RT tries to defer
most hrtimers' expiry into softirq context.

hrtimers marked with HRTIMER_MODE_HARD must be kept in hard interrupt
context expiry mode. Add the required logic.

No functional change for PREEMPT_RT=n kernels.

[ tglx: Split out of a larger combo patch. Added changelog ]

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20190726185753.551967692@linutronix.de



---
 kernel/time/hrtimer.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 0ace301a56f4..90dcc4d95e91 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1275,8 +1275,17 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
 			   enum hrtimer_mode mode)
 {
 	bool softtimer = !!(mode & HRTIMER_MODE_SOFT);
-	int base = softtimer ? HRTIMER_MAX_CLOCK_BASES / 2 : 0;
 	struct hrtimer_cpu_base *cpu_base;
+	int base;
+
+	/*
+	 * On PREEMPT_RT enabled kernels hrtimers which are not explicitely
+	 * marked for hard interrupt expiry mode are moved into soft
+	 * interrupt context for latency reasons and because the callbacks
+	 * can invoke functions which might sleep on RT, e.g. spin_lock().
+	 */
+	if (IS_ENABLED(CONFIG_PREEMPT_RT) && !(mode & HRTIMER_MODE_HARD))
+		softtimer = true;
 
 	memset(timer, 0, sizeof(struct hrtimer));
 
@@ -1290,6 +1299,7 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
 	if (clock_id == CLOCK_REALTIME && mode & HRTIMER_MODE_REL)
 		clock_id = CLOCK_MONOTONIC;
 
+	base = softtimer ? HRTIMER_MAX_CLOCK_BASES / 2 : 0;
 	base += hrtimer_clockid_to_base(clock_id);
 	timer->is_soft = softtimer;
 	timer->is_hard = !softtimer;

^ permalink raw reply	[flat|nested] 61+ messages in thread

* [tip:timers/core] hrtimer: Determine hard/soft expiry mode for hrtimer sleepers on RT
  2019-07-26 18:30 ` [patch 10/12] hrtimer: Determine hard/soft expiry mode for hrtimer sleepers " Thomas Gleixner
                     ` (3 preceding siblings ...)
  2019-08-01 15:58   ` tip-bot for Sebastian Andrzej Siewior
@ 2019-08-01 19:03   ` tip-bot for Sebastian Andrzej Siewior
  4 siblings, 0 replies; 61+ messages in thread
From: tip-bot for Sebastian Andrzej Siewior @ 2019-08-01 19:03 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: hpa, mingo, tglx, rostedt, linux-kernel, bigeasy, peterz

Commit-ID:  1842f5a427f5323f5c19ab99b55d09b3ab5172a5
Gitweb:     https://git.kernel.org/tip/1842f5a427f5323f5c19ab99b55d09b3ab5172a5
Author:     Sebastian Andrzej Siewior <bigeasy@linutronix.de>
AuthorDate: Fri, 26 Jul 2019 20:30:58 +0200
Committer:  Thomas Gleixner <tglx@linutronix.de>
CommitDate: Thu, 1 Aug 2019 20:51:22 +0200

hrtimer: Determine hard/soft expiry mode for hrtimer sleepers on RT

On PREEMPT_RT enabled kernels hrtimers which are not explicitely marked for
hard interrupt expiry mode are moved into soft interrupt context either for
latency reasons or because the hrtimer callback takes regular spinlocks or
invokes other functions which are not suitable for hard interrupt context
on PREEMPT_RT.

The hrtimer_sleeper callback is RT compatible in hard interrupt context,
but there is a latency concern: Untrusted userspace can spawn many threads
which arm timers for the same expiry time on the same CPU. On expiry that
causes a latency spike due to the wakeup of a gazillion threads.

OTOH, priviledged real-time user space applications rely on the low latency
of hard interrupt wakeups. These syscall related wakeups are all based on
hrtimer sleepers.

If the current task is in a real-time scheduling class, mark the mode for
hard interrupt expiry.

[ tglx: Split out of a larger combo patch. Added changelog ]

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20190726185753.645792403@linutronix.de



---
 kernel/time/hrtimer.c | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 90dcc4d95e91..c101f88ae8aa 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1676,6 +1676,16 @@ static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer)
 void hrtimer_sleeper_start_expires(struct hrtimer_sleeper *sl,
 				   enum hrtimer_mode mode)
 {
+	/*
+	 * Make the enqueue delivery mode check work on RT. If the sleeper
+	 * was initialized for hard interrupt delivery, force the mode bit.
+	 * This is a special case for hrtimer_sleepers because
+	 * hrtimer_init_sleeper() determines the delivery mode on RT so the
+	 * fiddling with this decision is avoided at the call sites.
+	 */
+	if (IS_ENABLED(CONFIG_PREEMPT_RT) && sl->timer.is_hard)
+		mode |= HRTIMER_MODE_HARD;
+
 	hrtimer_start_expires(&sl->timer, mode);
 }
 EXPORT_SYMBOL_GPL(hrtimer_sleeper_start_expires);
@@ -1683,6 +1693,30 @@ EXPORT_SYMBOL_GPL(hrtimer_sleeper_start_expires);
 static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
 				   clockid_t clock_id, enum hrtimer_mode mode)
 {
+	/*
+	 * On PREEMPT_RT enabled kernels hrtimers which are not explicitely
+	 * marked for hard interrupt expiry mode are moved into soft
+	 * interrupt context either for latency reasons or because the
+	 * hrtimer callback takes regular spinlocks or invokes other
+	 * functions which are not suitable for hard interrupt context on
+	 * PREEMPT_RT.
+	 *
+	 * The hrtimer_sleeper callback is RT compatible in hard interrupt
+	 * context, but there is a latency concern: Untrusted userspace can
+	 * spawn many threads which arm timers for the same expiry time on
+	 * the same CPU. That causes a latency spike due to the wakeup of
+	 * a gazillion threads.
+	 *
+	 * OTOH, priviledged real-time user space applications rely on the
+	 * low latency of hard interrupt wakeups. If the current task is in
+	 * a real-time scheduling class, mark the mode for hard interrupt
+	 * expiry.
+	 */
+	if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
+		if (task_is_realtime(current) && !(mode & HRTIMER_MODE_SOFT))
+			mode |= HRTIMER_MODE_HARD;
+	}
+
 	__hrtimer_init(&sl->timer, clock_id, mode);
 	sl->timer.function = hrtimer_wakeup;
 	sl->task = current;

^ permalink raw reply	[flat|nested] 61+ messages in thread

* [tip:timers/core] hrtimer: Prepare support for PREEMPT_RT
  2019-07-26 18:30 ` [patch 11/12] hrtimer: Prepare support for PREEMPT_RT Thomas Gleixner
                     ` (2 preceding siblings ...)
  2019-08-01 15:58   ` tip-bot for Anna-Maria Gleixner
@ 2019-08-01 19:04   ` tip-bot for Anna-Maria Gleixner
  2019-08-20 13:26     ` Frederic Weisbecker
  3 siblings, 1 reply; 61+ messages in thread
From: tip-bot for Anna-Maria Gleixner @ 2019-08-01 19:04 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: bigeasy, peterz, linux-kernel, anna-maria, tglx, hpa, mingo

Commit-ID:  f61eff83cec9cfab31fd30a2ca8856be379cdcd5
Gitweb:     https://git.kernel.org/tip/f61eff83cec9cfab31fd30a2ca8856be379cdcd5
Author:     Anna-Maria Gleixner <anna-maria@linutronix.de>
AuthorDate: Fri, 26 Jul 2019 20:30:59 +0200
Committer:  Thomas Gleixner <tglx@linutronix.de>
CommitDate: Thu, 1 Aug 2019 20:51:22 +0200

hrtimer: Prepare support for PREEMPT_RT

When PREEMPT_RT is enabled, the soft interrupt thread can be preempted.  If
the soft interrupt thread is preempted in the middle of a timer callback,
then calling hrtimer_cancel() can lead to two issues:

  - If the caller is on a remote CPU then it has to spin wait for the timer
    handler to complete. This can result in unbound priority inversion.

  - If the caller originates from the task which preempted the timer
    handler on the same CPU, then spin waiting for the timer handler to
    complete is never going to end.

To avoid these issues, add a new lock to the timer base which is held
around the execution of the timer callbacks. If hrtimer_cancel() detects
that the timer callback is currently running, it blocks on the expiry
lock. When the callback is finished, the expiry lock is dropped by the
softirq thread which wakes up the waiter and the system makes progress.

This addresses both the priority inversion and the life lock issues.

The same issue can happen in virtual machines when the vCPU which runs a
timer callback is scheduled out. If a second vCPU of the same guest calls
hrtimer_cancel() it will spin wait for the other vCPU to be scheduled back
in. The expiry lock mechanism would avoid that. It'd be trivial to enable
this when paravirt spinlocks are enabled in a guest, but it's not clear
whether this is an actual problem in the wild, so for now it's an RT only
mechanism.

[ tglx: Refactored it for mainline ]

Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20190726185753.737767218@linutronix.de



---
 include/linux/hrtimer.h | 16 +++++++++
 kernel/time/hrtimer.c   | 95 +++++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 105 insertions(+), 6 deletions(-)

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 7d0d0a36a8f4..5df4bcff96d5 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -192,6 +192,10 @@ enum  hrtimer_base_type {
  * @nr_retries:		Total number of hrtimer interrupt retries
  * @nr_hangs:		Total number of hrtimer interrupt hangs
  * @max_hang_time:	Maximum time spent in hrtimer_interrupt
+ * @softirq_expiry_lock: Lock which is taken while softirq based hrtimer are
+ *			 expired
+ * @timer_waiters:	A hrtimer_cancel() invocation waits for the timer
+ *			callback to finish.
  * @expires_next:	absolute time of the next event, is required for remote
  *			hrtimer enqueue; it is the total first expiry time (hard
  *			and soft hrtimer are taken into account)
@@ -218,6 +222,10 @@ struct hrtimer_cpu_base {
 	unsigned short			nr_retries;
 	unsigned short			nr_hangs;
 	unsigned int			max_hang_time;
+#endif
+#ifdef CONFIG_PREEMPT_RT
+	spinlock_t			softirq_expiry_lock;
+	atomic_t			timer_waiters;
 #endif
 	ktime_t				expires_next;
 	struct hrtimer			*next_timer;
@@ -350,6 +358,14 @@ extern void hrtimers_resume(void);
 
 DECLARE_PER_CPU(struct tick_device, tick_cpu_device);
 
+#ifdef CONFIG_PREEMPT_RT
+void hrtimer_cancel_wait_running(const struct hrtimer *timer);
+#else
+static inline void hrtimer_cancel_wait_running(struct hrtimer *timer)
+{
+	cpu_relax();
+}
+#endif
 
 /* Exported timer functions: */
 
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index c101f88ae8aa..499122752649 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1162,6 +1162,82 @@ int hrtimer_try_to_cancel(struct hrtimer *timer)
 }
 EXPORT_SYMBOL_GPL(hrtimer_try_to_cancel);
 
+#ifdef CONFIG_PREEMPT_RT
+static void hrtimer_cpu_base_init_expiry_lock(struct hrtimer_cpu_base *base)
+{
+	spin_lock_init(&base->softirq_expiry_lock);
+}
+
+static void hrtimer_cpu_base_lock_expiry(struct hrtimer_cpu_base *base)
+{
+	spin_lock(&base->softirq_expiry_lock);
+}
+
+static void hrtimer_cpu_base_unlock_expiry(struct hrtimer_cpu_base *base)
+{
+	spin_unlock(&base->softirq_expiry_lock);
+}
+
+/*
+ * The counterpart to hrtimer_cancel_wait_running().
+ *
+ * If there is a waiter for cpu_base->expiry_lock, then it was waiting for
+ * the timer callback to finish. Drop expiry_lock and reaquire it. That
+ * allows the waiter to acquire the lock and make progress.
+ */
+static void hrtimer_sync_wait_running(struct hrtimer_cpu_base *cpu_base,
+				      unsigned long flags)
+{
+	if (atomic_read(&cpu_base->timer_waiters)) {
+		raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
+		spin_unlock(&cpu_base->softirq_expiry_lock);
+		spin_lock(&cpu_base->softirq_expiry_lock);
+		raw_spin_lock_irq(&cpu_base->lock);
+	}
+}
+
+/*
+ * This function is called on PREEMPT_RT kernels when the fast path
+ * deletion of a timer failed because the timer callback function was
+ * running.
+ *
+ * This prevents priority inversion, if the softirq thread on a remote CPU
+ * got preempted, and it prevents a life lock when the task which tries to
+ * delete a timer preempted the softirq thread running the timer callback
+ * function.
+ */
+void hrtimer_cancel_wait_running(const struct hrtimer *timer)
+{
+	struct hrtimer_clock_base *base = timer->base;
+
+	if (!timer->is_soft || !base || !base->cpu_base) {
+		cpu_relax();
+		return;
+	}
+
+	/*
+	 * Mark the base as contended and grab the expiry lock, which is
+	 * held by the softirq across the timer callback. Drop the lock
+	 * immediately so the softirq can expire the next timer. In theory
+	 * the timer could already be running again, but that's more than
+	 * unlikely and just causes another wait loop.
+	 */
+	atomic_inc(&base->cpu_base->timer_waiters);
+	spin_lock_bh(&base->cpu_base->softirq_expiry_lock);
+	atomic_dec(&base->cpu_base->timer_waiters);
+	spin_unlock_bh(&base->cpu_base->softirq_expiry_lock);
+}
+#else
+static inline void
+hrtimer_cpu_base_init_expiry_lock(struct hrtimer_cpu_base *base) { }
+static inline void
+hrtimer_cpu_base_lock_expiry(struct hrtimer_cpu_base *base) { }
+static inline void
+hrtimer_cpu_base_unlock_expiry(struct hrtimer_cpu_base *base) { }
+static inline void hrtimer_sync_wait_running(struct hrtimer_cpu_base *base,
+					     unsigned long flags) { }
+#endif
+
 /**
  * hrtimer_cancel - cancel a timer and wait for the handler to finish.
  * @timer:	the timer to be cancelled
@@ -1172,13 +1248,15 @@ EXPORT_SYMBOL_GPL(hrtimer_try_to_cancel);
  */
 int hrtimer_cancel(struct hrtimer *timer)
 {
-	for (;;) {
-		int ret = hrtimer_try_to_cancel(timer);
+	int ret;
 
-		if (ret >= 0)
-			return ret;
-		cpu_relax();
-	}
+	do {
+		ret = hrtimer_try_to_cancel(timer);
+
+		if (ret < 0)
+			hrtimer_cancel_wait_running(timer);
+	} while (ret < 0);
+	return ret;
 }
 EXPORT_SYMBOL_GPL(hrtimer_cancel);
 
@@ -1475,6 +1553,8 @@ static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now,
 				break;
 
 			__run_hrtimer(cpu_base, base, timer, &basenow, flags);
+			if (active_mask == HRTIMER_ACTIVE_SOFT)
+				hrtimer_sync_wait_running(cpu_base, flags);
 		}
 	}
 }
@@ -1485,6 +1565,7 @@ static __latent_entropy void hrtimer_run_softirq(struct softirq_action *h)
 	unsigned long flags;
 	ktime_t now;
 
+	hrtimer_cpu_base_lock_expiry(cpu_base);
 	raw_spin_lock_irqsave(&cpu_base->lock, flags);
 
 	now = hrtimer_update_base(cpu_base);
@@ -1494,6 +1575,7 @@ static __latent_entropy void hrtimer_run_softirq(struct softirq_action *h)
 	hrtimer_update_softirq_timer(cpu_base, true);
 
 	raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
+	hrtimer_cpu_base_unlock_expiry(cpu_base);
 }
 
 #ifdef CONFIG_HIGH_RES_TIMERS
@@ -1897,6 +1979,7 @@ int hrtimers_prepare_cpu(unsigned int cpu)
 	cpu_base->softirq_next_timer = NULL;
 	cpu_base->expires_next = KTIME_MAX;
 	cpu_base->softirq_expires_next = KTIME_MAX;
+	hrtimer_cpu_base_init_expiry_lock(cpu_base);
 	return 0;
 }
 

^ permalink raw reply	[flat|nested] 61+ messages in thread

* [tip:timers/core] timers: Prepare support for PREEMPT_RT
  2019-07-26 18:31 ` [patch 12/12] timers: Prepare support for PREEMPT_RT Thomas Gleixner
  2019-07-30 22:17   ` [tip:timers/core] " tip-bot for Anna-Maria Gleixner
  2019-08-01 15:59   ` tip-bot for Anna-Maria Gleixner
@ 2019-08-01 19:04   ` tip-bot for Anna-Maria Gleixner
  2 siblings, 0 replies; 61+ messages in thread
From: tip-bot for Anna-Maria Gleixner @ 2019-08-01 19:04 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: bigeasy, tglx, hpa, mingo, linux-kernel, peterz, anna-maria

Commit-ID:  030dcdd197d77374879bb5603d091eee7d8aba80
Gitweb:     https://git.kernel.org/tip/030dcdd197d77374879bb5603d091eee7d8aba80
Author:     Anna-Maria Gleixner <anna-maria@linutronix.de>
AuthorDate: Fri, 26 Jul 2019 20:31:00 +0200
Committer:  Thomas Gleixner <tglx@linutronix.de>
CommitDate: Thu, 1 Aug 2019 20:51:22 +0200

timers: Prepare support for PREEMPT_RT

When PREEMPT_RT is enabled, the soft interrupt thread can be preempted.  If
the soft interrupt thread is preempted in the middle of a timer callback,
then calling del_timer_sync() can lead to two issues:

  - If the caller is on a remote CPU then it has to spin wait for the timer
    handler to complete. This can result in unbound priority inversion.

  - If the caller originates from the task which preempted the timer
    handler on the same CPU, then spin waiting for the timer handler to
    complete is never going to end.

To avoid these issues, add a new lock to the timer base which is held
around the execution of the timer callbacks. If del_timer_sync() detects
that the timer callback is currently running, it blocks on the expiry
lock. When the callback is finished, the expiry lock is dropped by the
softirq thread which wakes up the waiter and the system makes progress.

This addresses both the priority inversion and the life lock issues.

This mechanism is not used for timers which are marked IRQSAFE as for those
preemption is disabled accross the callback and therefore this situation
cannot happen. The callbacks for such timers need to be individually
audited for RT compliance.

The same issue can happen in virtual machines when the vCPU which runs a
timer callback is scheduled out. If a second vCPU of the same guest calls
del_timer_sync() it will spin wait for the other vCPU to be scheduled back
in. The expiry lock mechanism would avoid that. It'd be trivial to enable
this when paravirt spinlocks are enabled in a guest, but it's not clear
whether this is an actual problem in the wild, so for now it's an RT only
mechanism.

As the softirq thread can be preempted with PREEMPT_RT=y, the SMP variant
of del_timer_sync() needs to be used on UP as well.

[ tglx: Refactored it for mainline ]

Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20190726185753.832418500@linutronix.de



---
 include/linux/timer.h |   2 +-
 kernel/time/timer.c   | 103 ++++++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 96 insertions(+), 9 deletions(-)

diff --git a/include/linux/timer.h b/include/linux/timer.h
index 282e4f2a532a..1e6650ed066d 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -183,7 +183,7 @@ extern void add_timer(struct timer_list *timer);
 
 extern int try_to_del_timer_sync(struct timer_list *timer);
 
-#ifdef CONFIG_SMP
+#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)
   extern int del_timer_sync(struct timer_list *timer);
 #else
 # define del_timer_sync(t)		del_timer(t)
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 343c7ba33b1c..673c6a0f0c45 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -196,6 +196,10 @@ EXPORT_SYMBOL(jiffies_64);
 struct timer_base {
 	raw_spinlock_t		lock;
 	struct timer_list	*running_timer;
+#ifdef CONFIG_PREEMPT_RT
+	spinlock_t		expiry_lock;
+	atomic_t		timer_waiters;
+#endif
 	unsigned long		clk;
 	unsigned long		next_expiry;
 	unsigned int		cpu;
@@ -1227,7 +1231,78 @@ int try_to_del_timer_sync(struct timer_list *timer)
 }
 EXPORT_SYMBOL(try_to_del_timer_sync);
 
-#ifdef CONFIG_SMP
+#ifdef CONFIG_PREEMPT_RT
+static __init void timer_base_init_expiry_lock(struct timer_base *base)
+{
+	spin_lock_init(&base->expiry_lock);
+}
+
+static inline void timer_base_lock_expiry(struct timer_base *base)
+{
+	spin_lock(&base->expiry_lock);
+}
+
+static inline void timer_base_unlock_expiry(struct timer_base *base)
+{
+	spin_unlock(&base->expiry_lock);
+}
+
+/*
+ * The counterpart to del_timer_wait_running().
+ *
+ * If there is a waiter for base->expiry_lock, then it was waiting for the
+ * timer callback to finish. Drop expiry_lock and reaquire it. That allows
+ * the waiter to acquire the lock and make progress.
+ */
+static void timer_sync_wait_running(struct timer_base *base)
+{
+	if (atomic_read(&base->timer_waiters)) {
+		spin_unlock(&base->expiry_lock);
+		spin_lock(&base->expiry_lock);
+	}
+}
+
+/*
+ * This function is called on PREEMPT_RT kernels when the fast path
+ * deletion of a timer failed because the timer callback function was
+ * running.
+ *
+ * This prevents priority inversion, if the softirq thread on a remote CPU
+ * got preempted, and it prevents a life lock when the task which tries to
+ * delete a timer preempted the softirq thread running the timer callback
+ * function.
+ */
+static void del_timer_wait_running(struct timer_list *timer)
+{
+	u32 tf;
+
+	tf = READ_ONCE(timer->flags);
+	if (!(tf & TIMER_MIGRATING)) {
+		struct timer_base *base = get_timer_base(tf);
+
+		/*
+		 * Mark the base as contended and grab the expiry lock,
+		 * which is held by the softirq across the timer
+		 * callback. Drop the lock immediately so the softirq can
+		 * expire the next timer. In theory the timer could already
+		 * be running again, but that's more than unlikely and just
+		 * causes another wait loop.
+		 */
+		atomic_inc(&base->timer_waiters);
+		spin_lock_bh(&base->expiry_lock);
+		atomic_dec(&base->timer_waiters);
+		spin_unlock_bh(&base->expiry_lock);
+	}
+}
+#else
+static inline void timer_base_init_expiry_lock(struct timer_base *base) { }
+static inline void timer_base_lock_expiry(struct timer_base *base) { }
+static inline void timer_base_unlock_expiry(struct timer_base *base) { }
+static inline void timer_sync_wait_running(struct timer_base *base) { }
+static inline void del_timer_wait_running(struct timer_list *timer) { }
+#endif
+
+#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)
 /**
  * del_timer_sync - deactivate a timer and wait for the handler to finish.
  * @timer: the timer to be deactivated
@@ -1266,6 +1341,8 @@ EXPORT_SYMBOL(try_to_del_timer_sync);
  */
 int del_timer_sync(struct timer_list *timer)
 {
+	int ret;
+
 #ifdef CONFIG_LOCKDEP
 	unsigned long flags;
 
@@ -1283,12 +1360,17 @@ int del_timer_sync(struct timer_list *timer)
 	 * could lead to deadlock.
 	 */
 	WARN_ON(in_irq() && !(timer->flags & TIMER_IRQSAFE));
-	for (;;) {
-		int ret = try_to_del_timer_sync(timer);
-		if (ret >= 0)
-			return ret;
-		cpu_relax();
-	}
+
+	do {
+		ret = try_to_del_timer_sync(timer);
+
+		if (unlikely(ret < 0)) {
+			del_timer_wait_running(timer);
+			cpu_relax();
+		}
+	} while (ret < 0);
+
+	return ret;
 }
 EXPORT_SYMBOL(del_timer_sync);
 #endif
@@ -1360,10 +1442,13 @@ static void expire_timers(struct timer_base *base, struct hlist_head *head)
 		if (timer->flags & TIMER_IRQSAFE) {
 			raw_spin_unlock(&base->lock);
 			call_timer_fn(timer, fn, baseclk);
+			base->running_timer = NULL;
 			raw_spin_lock(&base->lock);
 		} else {
 			raw_spin_unlock_irq(&base->lock);
 			call_timer_fn(timer, fn, baseclk);
+			base->running_timer = NULL;
+			timer_sync_wait_running(base);
 			raw_spin_lock_irq(&base->lock);
 		}
 	}
@@ -1658,6 +1743,7 @@ static inline void __run_timers(struct timer_base *base)
 	if (!time_after_eq(jiffies, base->clk))
 		return;
 
+	timer_base_lock_expiry(base);
 	raw_spin_lock_irq(&base->lock);
 
 	/*
@@ -1684,8 +1770,8 @@ static inline void __run_timers(struct timer_base *base)
 		while (levels--)
 			expire_timers(base, heads + levels);
 	}
-	base->running_timer = NULL;
 	raw_spin_unlock_irq(&base->lock);
+	timer_base_unlock_expiry(base);
 }
 
 /*
@@ -1930,6 +2016,7 @@ static void __init init_timer_cpu(int cpu)
 		base->cpu = cpu;
 		raw_spin_lock_init(&base->lock);
 		base->clk = jiffies;
+		timer_base_init_expiry_lock(base);
 	}
 }
 

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: [tip:timers/core] hrtimer: Prepare support for PREEMPT_RT
  2019-08-01 19:04   ` tip-bot for Anna-Maria Gleixner
@ 2019-08-20 13:26     ` Frederic Weisbecker
  2019-08-23  2:12       ` [tip: timers/core] hrtimer: Improve comments on handling priority inversion against softirq kthread tip-bot2 for Frederic Weisbecker
  0 siblings, 1 reply; 61+ messages in thread
From: Frederic Weisbecker @ 2019-08-20 13:26 UTC (permalink / raw)
  To: bigeasy, peterz, mingo, linux-kernel, anna-maria, tglx, hpa
  Cc: linux-tip-commits

On Thu, Aug 01, 2019 at 12:04:03PM -0700, tip-bot for Anna-Maria Gleixner wrote:
> Commit-ID:  f61eff83cec9cfab31fd30a2ca8856be379cdcd5
> Gitweb:     https://git.kernel.org/tip/f61eff83cec9cfab31fd30a2ca8856be379cdcd5
> Author:     Anna-Maria Gleixner <anna-maria@linutronix.de>
> AuthorDate: Fri, 26 Jul 2019 20:30:59 +0200
> Committer:  Thomas Gleixner <tglx@linutronix.de>
> CommitDate: Thu, 1 Aug 2019 20:51:22 +0200
> 
> hrtimer: Prepare support for PREEMPT_RT
> 
> When PREEMPT_RT is enabled, the soft interrupt thread can be preempted.  If
> the soft interrupt thread is preempted in the middle of a timer callback,
> then calling hrtimer_cancel() can lead to two issues:
> 
>   - If the caller is on a remote CPU then it has to spin wait for the timer
>     handler to complete. This can result in unbound priority inversion.
> 
>   - If the caller originates from the task which preempted the timer
>     handler on the same CPU, then spin waiting for the timer handler to
>     complete is never going to end.

[...]
> +/*
> + * This function is called on PREEMPT_RT kernels when the fast path
> + * deletion of a timer failed because the timer callback function was
> + * running.
> + *
> + * This prevents priority inversion, if the softirq thread on a remote CPU
> + * got preempted, and it prevents a life lock when the task which tries to
> + * delete a timer preempted the softirq thread running the timer callback
> + * function.
> + */
> +void hrtimer_cancel_wait_running(const struct hrtimer *timer)
> +{
> +	struct hrtimer_clock_base *base = timer->base;
> +
> +	if (!timer->is_soft || !base || !base->cpu_base) {
> +		cpu_relax();
> +		return;
> +	}
> +
> +	/*
> +	 * Mark the base as contended and grab the expiry lock, which is
> +	 * held by the softirq across the timer callback. Drop the lock
> +	 * immediately so the softirq can expire the next timer. In theory
> +	 * the timer could already be running again, but that's more than
> +	 * unlikely and just causes another wait loop.
> +	 */
> +	atomic_inc(&base->cpu_base->timer_waiters);
> +	spin_lock_bh(&base->cpu_base->softirq_expiry_lock);
> +	atomic_dec(&base->cpu_base->timer_waiters);
> +	spin_unlock_bh(&base->cpu_base->softirq_expiry_lock);
> +}

So, while reviewing the posix timers series, I stumbled upon timer_wait_running() which
lacked any explanation, which led me to hrtimer_cancel_wait_running() that was
a bit more helpful but still had blurry explanation.

In the end I found the approrpiate infomation in this commit changelog.
It might be helpful for future reviewers to apply this:

---
From ef9a4d87b6e7c43899248c376c5959f4e0bcd309 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <frederic@kernel.org>
Date: Tue, 20 Aug 2019 15:12:23 +0200
Subject: [PATCH] hrtimer: Improve comments on handling priority inversion
 against softirq kthread

The handling of a priority inversion between timer cancelling and a
a not well defined possible preemption of softirq kthread is not very
clear. Especially in the posix timers side where we don't even know why
there is a specific RT wait callback.

All the nice explanations can be found in the initial changelog of
f61eff83cec9cfab31fd30a2ca8856be379cdcd5
(hrtimer: Prepare support for PREEMPT_RT"). So lets extract the detailed
informations from there.

Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
---
 kernel/time/hrtimer.c      | 14 ++++++++++----
 kernel/time/posix-timers.c |  5 +++++
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 499122752649..833353732554 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1201,10 +1201,16 @@ static void hrtimer_sync_wait_running(struct hrtimer_cpu_base *cpu_base,
  * deletion of a timer failed because the timer callback function was
  * running.
  *
- * This prevents priority inversion, if the softirq thread on a remote CPU
- * got preempted, and it prevents a life lock when the task which tries to
- * delete a timer preempted the softirq thread running the timer callback
- * function.
+ * This prevents priority inversion: if the soft irq thread is preempted
+ * in the middle of a timer callback, then calling del_timer_sync() can
+ * lead to two issues:
+ *
+ *  - If the caller is on a remote CPU then it has to spin wait for the timer
+ *    handler to complete. This can result in unbound priority inversion.
+ *
+ *  - If the caller originates from the task which preempted the timer
+ *    handler on the same CPU, then spin waiting for the timer handler to
+ *    complete is never going to end.
  */
 void hrtimer_cancel_wait_running(const struct hrtimer *timer)
 {
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index a71c1aab071c..f6713a41e4e0 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -806,6 +806,11 @@ static int common_hrtimer_try_to_cancel(struct k_itimer *timr)
 }
 
 #ifdef CONFIG_PREEMPT_RT
+/*
+ * Prevent from priority inversion against softirq kthread in case
+ * it gets preempted while executing an htimer callback. See
+ * comments in hrtimer_cancel_wait_running.
+ */
 static struct k_itimer *timer_wait_running(struct k_itimer *timer,
 					   unsigned long *flags)
 {
-- 
2.21.0




^ permalink raw reply	[flat|nested] 61+ messages in thread

* [tip: timers/core] hrtimer: Improve comments on handling priority inversion against softirq kthread
  2019-08-20 13:26     ` Frederic Weisbecker
@ 2019-08-23  2:12       ` tip-bot2 for Frederic Weisbecker
  0 siblings, 0 replies; 61+ messages in thread
From: tip-bot2 for Frederic Weisbecker @ 2019-08-23  2:12 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: linux-kernel, Thomas Gleixner, Frederic Weisbecker

The following commit has been merged into the timers/core branch of tip:

Commit-ID:     0bee3b601b77dbe7981b5474ae8758d6bf60177a
Gitweb:        https://git.kernel.org/tip/0bee3b601b77dbe7981b5474ae8758d6bf60177a
Author:        Frederic Weisbecker <frederic@kernel.org>
AuthorDate:    Tue, 20 Aug 2019 15:12:23 +02:00
Committer:     Thomas Gleixner <tglx@linutronix.de>
CommitterDate: Tue, 20 Aug 2019 22:05:46 +02:00

hrtimer: Improve comments on handling priority inversion against softirq kthread

The handling of a priority inversion between timer cancelling and a a not
well defined possible preemption of softirq kthread is not very clear.

Especially in the posix timers side it's unclear why there is a specific RT
wait callback.

All the nice explanations can be found in the initial changelog of
f61eff83cec9 (hrtimer: Prepare support for PREEMPT_RT").

Extract the detailed informations from there and put it into comments.

Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/20190820132656.GC2093@lenoir
---
 kernel/time/hrtimer.c      | 14 ++++++++++----
 kernel/time/posix-timers.c |  6 ++++++
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 4991227..8333537 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1201,10 +1201,16 @@ static void hrtimer_sync_wait_running(struct hrtimer_cpu_base *cpu_base,
  * deletion of a timer failed because the timer callback function was
  * running.
  *
- * This prevents priority inversion, if the softirq thread on a remote CPU
- * got preempted, and it prevents a life lock when the task which tries to
- * delete a timer preempted the softirq thread running the timer callback
- * function.
+ * This prevents priority inversion: if the soft irq thread is preempted
+ * in the middle of a timer callback, then calling del_timer_sync() can
+ * lead to two issues:
+ *
+ *  - If the caller is on a remote CPU then it has to spin wait for the timer
+ *    handler to complete. This can result in unbound priority inversion.
+ *
+ *  - If the caller originates from the task which preempted the timer
+ *    handler on the same CPU, then spin waiting for the timer handler to
+ *    complete is never going to end.
  */
 void hrtimer_cancel_wait_running(const struct hrtimer *timer)
 {
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index 9e37783..0ec5b7a 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -810,6 +810,12 @@ static void common_timer_wait_running(struct k_itimer *timer)
 	hrtimer_cancel_wait_running(&timer->it.real.timer);
 }
 
+/*
+ * On PREEMPT_RT this prevent priority inversion against softirq kthread in
+ * case it gets preempted while executing a timer callback. See comments in
+ * hrtimer_cancel_wait_running. For PREEMPT_RT=n this just results in a
+ * cpu_relax().
+ */
 static struct k_itimer *timer_wait_running(struct k_itimer *timer,
 					   unsigned long *flags)
 {

^ permalink raw reply	[flat|nested] 61+ messages in thread

end of thread, back to index

Thread overview: 61+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-07-26 18:30 [patch 00/12] (hr)timers: Prepare for PREEMPT_RT support Thomas Gleixner
2019-07-26 18:30 ` [patch 01/12] hrtimer: Remove task argument from hrtimer_init_sleeper() Thomas Gleixner
2019-07-26 19:57   ` Steven Rostedt
2019-07-26 20:01     ` Thomas Gleixner
2019-07-30 22:07   ` [tip:timers/core] " tip-bot for Thomas Gleixner
2019-07-26 18:30 ` [patch 02/12] hrtimer: Consolidate hrtimer_init() + hrtimer_init_sleeper() calls Thomas Gleixner
2019-07-30 22:08   ` [tip:timers/core] " tip-bot for Sebastian Andrzej Siewior
2019-08-01 15:49   ` tip-bot for Sebastian Andrzej Siewior
2019-07-26 18:30 ` [patch 03/12] hrtimer: Introduce HARD expiry mode Thomas Gleixner
2019-07-30 22:10   ` [tip:timers/core] " tip-bot for Sebastian Andrzej Siewior
2019-08-01 15:52   ` tip-bot for Sebastian Andrzej Siewior
2019-07-26 18:30 ` [patch 04/12] sched: Mark hrtimers to expire in hard interrupt context Thomas Gleixner
2019-07-30 22:11   ` [tip:timers/core] " tip-bot for Thomas Gleixner
2019-08-01 15:53   ` tip-bot for Sebastian Andrzej Siewior
2019-08-01 18:58   ` tip-bot for Sebastian Andrzej Siewior
2019-07-26 18:30 ` [patch 05/12] perf/core: " Thomas Gleixner
2019-07-30 22:12   ` [tip:timers/core] " tip-bot for Thomas Gleixner
2019-08-01 15:54   ` tip-bot for Sebastian Andrzej Siewior
2019-08-01 18:59   ` tip-bot for Sebastian Andrzej Siewior
2019-07-26 18:30 ` [patch 06/12] watchdog: Mark watchdog_hrtimer " Thomas Gleixner
2019-07-30 22:13   ` [tip:timers/core] " tip-bot for Sebastian Andrzej Siewior
2019-08-01 15:55   ` tip-bot for Sebastian Andrzej Siewior
2019-08-01 19:00   ` tip-bot for Sebastian Andrzej Siewior
2019-07-26 18:30 ` [patch 07/12] KVM: LAPIC: Mark hrtimer " Thomas Gleixner
2019-07-26 19:41   ` Paolo Bonzini
2019-07-30 22:14   ` [tip:timers/core] " tip-bot for Sebastian Andrzej Siewior
2019-08-01 15:55   ` tip-bot for Sebastian Andrzej Siewior
2019-08-01 19:01   ` tip-bot for Sebastian Andrzej Siewior
2019-07-26 18:30 ` [patch 08/12] tick: Mark tick related hrtimers to expiry " Thomas Gleixner
2019-07-30 22:14   ` [tip:timers/core] " tip-bot for Sebastian Andrzej Siewior
2019-08-01 15:56   ` tip-bot for Sebastian Andrzej Siewior
2019-08-01 19:01   ` tip-bot for Sebastian Andrzej Siewior
2019-07-26 18:30 ` [patch 09/12] hrtimer: Move unmarked hrtimers to soft interrupt expiry on RT Thomas Gleixner
2019-07-30 22:15   ` [tip:timers/core] " tip-bot for Sebastian Andrzej Siewior
2019-08-01 15:57   ` tip-bot for Sebastian Andrzej Siewior
2019-08-01 19:02   ` tip-bot for Sebastian Andrzej Siewior
2019-07-26 18:30 ` [patch 10/12] hrtimer: Determine hard/soft expiry mode for hrtimer sleepers " Thomas Gleixner
2019-07-26 20:44   ` Steven Rostedt
2019-07-26 20:52     ` Thomas Gleixner
2019-07-26 20:56       ` Steven Rostedt
2019-07-26 21:16   ` Julia Cartwright
2019-07-26 21:30     ` Steven Rostedt
2019-07-26 21:35     ` Thomas Gleixner
2019-07-30 22:16   ` [tip:timers/core] " tip-bot for Sebastian Andrzej Siewior
2019-08-01 15:58   ` tip-bot for Sebastian Andrzej Siewior
2019-08-01 19:03   ` tip-bot for Sebastian Andrzej Siewior
2019-07-26 18:30 ` [patch 11/12] hrtimer: Prepare support for PREEMPT_RT Thomas Gleixner
2019-07-28  9:06   ` Juergen Gross
2019-07-29 15:08     ` Steven Rostedt
2019-07-29 17:30       ` Paolo Bonzini
2019-07-31  8:45         ` Juergen Gross
2019-07-30 22:17   ` [tip:timers/core] " tip-bot for Anna-Maria Gleixner
2019-08-01 15:58   ` tip-bot for Anna-Maria Gleixner
2019-08-01 19:04   ` tip-bot for Anna-Maria Gleixner
2019-08-20 13:26     ` Frederic Weisbecker
2019-08-23  2:12       ` [tip: timers/core] hrtimer: Improve comments on handling priority inversion against softirq kthread tip-bot2 for Frederic Weisbecker
2019-07-26 18:31 ` [patch 12/12] timers: Prepare support for PREEMPT_RT Thomas Gleixner
2019-07-30 22:17   ` [tip:timers/core] " tip-bot for Anna-Maria Gleixner
2019-08-01 15:59   ` tip-bot for Anna-Maria Gleixner
2019-08-01 19:04   ` tip-bot for Anna-Maria Gleixner
2019-07-29 19:45 ` [patch 00/12] (hr)timers: Prepare for PREEMPT_RT support Peter Zijlstra

LKML Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/lkml/0 lkml/git/0.git
	git clone --mirror https://lore.kernel.org/lkml/1 lkml/git/1.git
	git clone --mirror https://lore.kernel.org/lkml/2 lkml/git/2.git
	git clone --mirror https://lore.kernel.org/lkml/3 lkml/git/3.git
	git clone --mirror https://lore.kernel.org/lkml/4 lkml/git/4.git
	git clone --mirror https://lore.kernel.org/lkml/5 lkml/git/5.git
	git clone --mirror https://lore.kernel.org/lkml/6 lkml/git/6.git
	git clone --mirror https://lore.kernel.org/lkml/7 lkml/git/7.git
	git clone --mirror https://lore.kernel.org/lkml/8 lkml/git/8.git
	git clone --mirror https://lore.kernel.org/lkml/9 lkml/git/9.git
	git clone --mirror https://lore.kernel.org/lkml/10 lkml/git/10.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 lkml lkml/ https://lore.kernel.org/lkml \
		linux-kernel@vger.kernel.org
	public-inbox-index lkml

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-kernel


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git