LKML Archive on lore.kernel.org
 help / color / Atom feed
From: tip-bot for Anna-Maria Gleixner <tipbot@zytor.com>
To: linux-tip-commits@vger.kernel.org
Cc: tglx@linutronix.de, mingo@kernel.org,
	linux-kernel@vger.kernel.org, hpa@zytor.com,
	peterz@infradead.org, bigeasy@linutronix.de,
	anna-maria@linutronix.de
Subject: [tip:timers/core] hrtimer: Prepare support for PREEMPT_RT
Date: Tue, 30 Jul 2019 15:17:12 -0700
Message-ID: <tip-10521d890c650472e49bbbb4cf415f0fa6c29d4f@git.kernel.org> (raw)
In-Reply-To: <20190726185753.737767218@linutronix.de>

Commit-ID:  10521d890c650472e49bbbb4cf415f0fa6c29d4f
Gitweb:     https://git.kernel.org/tip/10521d890c650472e49bbbb4cf415f0fa6c29d4f
Author:     Anna-Maria Gleixner <anna-maria@linutronix.de>
AuthorDate: Fri, 26 Jul 2019 20:30:59 +0200
Committer:  Thomas Gleixner <tglx@linutronix.de>
CommitDate: Tue, 30 Jul 2019 23:57:57 +0200

hrtimer: Prepare support for PREEMPT_RT

When PREEMPT_RT is enabled, the soft interrupt thread can be preempted.  If
the soft interrupt thread is preempted in the middle of a timer callback,
then calling hrtimer_cancel() can lead to two issues:

  - If the caller is on a remote CPU then it has to spin wait for the timer
    handler to complete. This can result in unbound priority inversion.

  - If the caller originates from the task which preempted the timer
    handler on the same CPU, then spin waiting for the timer handler to
    complete is never going to end.

To avoid these issues, add a new lock to the timer base which is held
around the execution of the timer callbacks. If hrtimer_cancel() detects
that the timer callback is currently running, it blocks on the expiry
lock. When the callback is finished, the expiry lock is dropped by the
softirq thread which wakes up the waiter and the system makes progress.

This addresses both the priority inversion and the life lock issues.

The same issue can happen in virtual machines when the vCPU which runs a
timer callback is scheduled out. If a second vCPU of the same guest calls
hrtimer_cancel() it will spin wait for the other vCPU to be scheduled back
in. The expiry lock mechanism would avoid that. It'd be trivial to enable
this when paravirt spinlocks are enabled in a guest, but it's not clear
whether this is an actual problem in the wild, so for now it's an RT only
mechanism.

[ tglx: Refactored it for mainline ]

Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20190726185753.737767218@linutronix.de

---
 include/linux/hrtimer.h | 16 +++++++++
 kernel/time/hrtimer.c   | 95 +++++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 105 insertions(+), 6 deletions(-)

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 7d0d0a36a8f4..5df4bcff96d5 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -192,6 +192,10 @@ enum  hrtimer_base_type {
  * @nr_retries:		Total number of hrtimer interrupt retries
  * @nr_hangs:		Total number of hrtimer interrupt hangs
  * @max_hang_time:	Maximum time spent in hrtimer_interrupt
+ * @softirq_expiry_lock: Lock which is taken while softirq based hrtimer are
+ *			 expired
+ * @timer_waiters:	A hrtimer_cancel() invocation waits for the timer
+ *			callback to finish.
  * @expires_next:	absolute time of the next event, is required for remote
  *			hrtimer enqueue; it is the total first expiry time (hard
  *			and soft hrtimer are taken into account)
@@ -218,6 +222,10 @@ struct hrtimer_cpu_base {
 	unsigned short			nr_retries;
 	unsigned short			nr_hangs;
 	unsigned int			max_hang_time;
+#endif
+#ifdef CONFIG_PREEMPT_RT
+	spinlock_t			softirq_expiry_lock;
+	atomic_t			timer_waiters;
 #endif
 	ktime_t				expires_next;
 	struct hrtimer			*next_timer;
@@ -350,6 +358,14 @@ extern void hrtimers_resume(void);
 
 DECLARE_PER_CPU(struct tick_device, tick_cpu_device);
 
+#ifdef CONFIG_PREEMPT_RT
+void hrtimer_cancel_wait_running(const struct hrtimer *timer);
+#else
+static inline void hrtimer_cancel_wait_running(struct hrtimer *timer)
+{
+	cpu_relax();
+}
+#endif
 
 /* Exported timer functions: */
 
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index c101f88ae8aa..499122752649 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1162,6 +1162,82 @@ int hrtimer_try_to_cancel(struct hrtimer *timer)
 }
 EXPORT_SYMBOL_GPL(hrtimer_try_to_cancel);
 
+#ifdef CONFIG_PREEMPT_RT
+static void hrtimer_cpu_base_init_expiry_lock(struct hrtimer_cpu_base *base)
+{
+	spin_lock_init(&base->softirq_expiry_lock);
+}
+
+static void hrtimer_cpu_base_lock_expiry(struct hrtimer_cpu_base *base)
+{
+	spin_lock(&base->softirq_expiry_lock);
+}
+
+static void hrtimer_cpu_base_unlock_expiry(struct hrtimer_cpu_base *base)
+{
+	spin_unlock(&base->softirq_expiry_lock);
+}
+
+/*
+ * The counterpart to hrtimer_cancel_wait_running().
+ *
+ * If there is a waiter for cpu_base->expiry_lock, then it was waiting for
+ * the timer callback to finish. Drop expiry_lock and reaquire it. That
+ * allows the waiter to acquire the lock and make progress.
+ */
+static void hrtimer_sync_wait_running(struct hrtimer_cpu_base *cpu_base,
+				      unsigned long flags)
+{
+	if (atomic_read(&cpu_base->timer_waiters)) {
+		raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
+		spin_unlock(&cpu_base->softirq_expiry_lock);
+		spin_lock(&cpu_base->softirq_expiry_lock);
+		raw_spin_lock_irq(&cpu_base->lock);
+	}
+}
+
+/*
+ * This function is called on PREEMPT_RT kernels when the fast path
+ * deletion of a timer failed because the timer callback function was
+ * running.
+ *
+ * This prevents priority inversion, if the softirq thread on a remote CPU
+ * got preempted, and it prevents a life lock when the task which tries to
+ * delete a timer preempted the softirq thread running the timer callback
+ * function.
+ */
+void hrtimer_cancel_wait_running(const struct hrtimer *timer)
+{
+	struct hrtimer_clock_base *base = timer->base;
+
+	if (!timer->is_soft || !base || !base->cpu_base) {
+		cpu_relax();
+		return;
+	}
+
+	/*
+	 * Mark the base as contended and grab the expiry lock, which is
+	 * held by the softirq across the timer callback. Drop the lock
+	 * immediately so the softirq can expire the next timer. In theory
+	 * the timer could already be running again, but that's more than
+	 * unlikely and just causes another wait loop.
+	 */
+	atomic_inc(&base->cpu_base->timer_waiters);
+	spin_lock_bh(&base->cpu_base->softirq_expiry_lock);
+	atomic_dec(&base->cpu_base->timer_waiters);
+	spin_unlock_bh(&base->cpu_base->softirq_expiry_lock);
+}
+#else
+static inline void
+hrtimer_cpu_base_init_expiry_lock(struct hrtimer_cpu_base *base) { }
+static inline void
+hrtimer_cpu_base_lock_expiry(struct hrtimer_cpu_base *base) { }
+static inline void
+hrtimer_cpu_base_unlock_expiry(struct hrtimer_cpu_base *base) { }
+static inline void hrtimer_sync_wait_running(struct hrtimer_cpu_base *base,
+					     unsigned long flags) { }
+#endif
+
 /**
  * hrtimer_cancel - cancel a timer and wait for the handler to finish.
  * @timer:	the timer to be cancelled
@@ -1172,13 +1248,15 @@ EXPORT_SYMBOL_GPL(hrtimer_try_to_cancel);
  */
 int hrtimer_cancel(struct hrtimer *timer)
 {
-	for (;;) {
-		int ret = hrtimer_try_to_cancel(timer);
+	int ret;
 
-		if (ret >= 0)
-			return ret;
-		cpu_relax();
-	}
+	do {
+		ret = hrtimer_try_to_cancel(timer);
+
+		if (ret < 0)
+			hrtimer_cancel_wait_running(timer);
+	} while (ret < 0);
+	return ret;
 }
 EXPORT_SYMBOL_GPL(hrtimer_cancel);
 
@@ -1475,6 +1553,8 @@ static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now,
 				break;
 
 			__run_hrtimer(cpu_base, base, timer, &basenow, flags);
+			if (active_mask == HRTIMER_ACTIVE_SOFT)
+				hrtimer_sync_wait_running(cpu_base, flags);
 		}
 	}
 }
@@ -1485,6 +1565,7 @@ static __latent_entropy void hrtimer_run_softirq(struct softirq_action *h)
 	unsigned long flags;
 	ktime_t now;
 
+	hrtimer_cpu_base_lock_expiry(cpu_base);
 	raw_spin_lock_irqsave(&cpu_base->lock, flags);
 
 	now = hrtimer_update_base(cpu_base);
@@ -1494,6 +1575,7 @@ static __latent_entropy void hrtimer_run_softirq(struct softirq_action *h)
 	hrtimer_update_softirq_timer(cpu_base, true);
 
 	raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
+	hrtimer_cpu_base_unlock_expiry(cpu_base);
 }
 
 #ifdef CONFIG_HIGH_RES_TIMERS
@@ -1897,6 +1979,7 @@ int hrtimers_prepare_cpu(unsigned int cpu)
 	cpu_base->softirq_next_timer = NULL;
 	cpu_base->expires_next = KTIME_MAX;
 	cpu_base->softirq_expires_next = KTIME_MAX;
+	hrtimer_cpu_base_init_expiry_lock(cpu_base);
 	return 0;
 }
 

  parent reply index

Thread overview: 61+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-07-26 18:30 [patch 00/12] (hr)timers: Prepare for PREEMPT_RT support Thomas Gleixner
2019-07-26 18:30 ` [patch 01/12] hrtimer: Remove task argument from hrtimer_init_sleeper() Thomas Gleixner
2019-07-26 19:57   ` Steven Rostedt
2019-07-26 20:01     ` Thomas Gleixner
2019-07-30 22:07   ` [tip:timers/core] " tip-bot for Thomas Gleixner
2019-07-26 18:30 ` [patch 02/12] hrtimer: Consolidate hrtimer_init() + hrtimer_init_sleeper() calls Thomas Gleixner
2019-07-30 22:08   ` [tip:timers/core] " tip-bot for Sebastian Andrzej Siewior
2019-08-01 15:49   ` tip-bot for Sebastian Andrzej Siewior
2019-07-26 18:30 ` [patch 03/12] hrtimer: Introduce HARD expiry mode Thomas Gleixner
2019-07-30 22:10   ` [tip:timers/core] " tip-bot for Sebastian Andrzej Siewior
2019-08-01 15:52   ` tip-bot for Sebastian Andrzej Siewior
2019-07-26 18:30 ` [patch 04/12] sched: Mark hrtimers to expire in hard interrupt context Thomas Gleixner
2019-07-30 22:11   ` [tip:timers/core] " tip-bot for Thomas Gleixner
2019-08-01 15:53   ` tip-bot for Sebastian Andrzej Siewior
2019-08-01 18:58   ` tip-bot for Sebastian Andrzej Siewior
2019-07-26 18:30 ` [patch 05/12] perf/core: " Thomas Gleixner
2019-07-30 22:12   ` [tip:timers/core] " tip-bot for Thomas Gleixner
2019-08-01 15:54   ` tip-bot for Sebastian Andrzej Siewior
2019-08-01 18:59   ` tip-bot for Sebastian Andrzej Siewior
2019-07-26 18:30 ` [patch 06/12] watchdog: Mark watchdog_hrtimer " Thomas Gleixner
2019-07-30 22:13   ` [tip:timers/core] " tip-bot for Sebastian Andrzej Siewior
2019-08-01 15:55   ` tip-bot for Sebastian Andrzej Siewior
2019-08-01 19:00   ` tip-bot for Sebastian Andrzej Siewior
2019-07-26 18:30 ` [patch 07/12] KVM: LAPIC: Mark hrtimer " Thomas Gleixner
2019-07-26 19:41   ` Paolo Bonzini
2019-07-30 22:14   ` [tip:timers/core] " tip-bot for Sebastian Andrzej Siewior
2019-08-01 15:55   ` tip-bot for Sebastian Andrzej Siewior
2019-08-01 19:01   ` tip-bot for Sebastian Andrzej Siewior
2019-07-26 18:30 ` [patch 08/12] tick: Mark tick related hrtimers to expiry " Thomas Gleixner
2019-07-30 22:14   ` [tip:timers/core] " tip-bot for Sebastian Andrzej Siewior
2019-08-01 15:56   ` tip-bot for Sebastian Andrzej Siewior
2019-08-01 19:01   ` tip-bot for Sebastian Andrzej Siewior
2019-07-26 18:30 ` [patch 09/12] hrtimer: Move unmarked hrtimers to soft interrupt expiry on RT Thomas Gleixner
2019-07-30 22:15   ` [tip:timers/core] " tip-bot for Sebastian Andrzej Siewior
2019-08-01 15:57   ` tip-bot for Sebastian Andrzej Siewior
2019-08-01 19:02   ` tip-bot for Sebastian Andrzej Siewior
2019-07-26 18:30 ` [patch 10/12] hrtimer: Determine hard/soft expiry mode for hrtimer sleepers " Thomas Gleixner
2019-07-26 20:44   ` Steven Rostedt
2019-07-26 20:52     ` Thomas Gleixner
2019-07-26 20:56       ` Steven Rostedt
2019-07-26 21:16   ` Julia Cartwright
2019-07-26 21:30     ` Steven Rostedt
2019-07-26 21:35     ` Thomas Gleixner
2019-07-30 22:16   ` [tip:timers/core] " tip-bot for Sebastian Andrzej Siewior
2019-08-01 15:58   ` tip-bot for Sebastian Andrzej Siewior
2019-08-01 19:03   ` tip-bot for Sebastian Andrzej Siewior
2019-07-26 18:30 ` [patch 11/12] hrtimer: Prepare support for PREEMPT_RT Thomas Gleixner
2019-07-28  9:06   ` Juergen Gross
2019-07-29 15:08     ` Steven Rostedt
2019-07-29 17:30       ` Paolo Bonzini
2019-07-31  8:45         ` Juergen Gross
2019-07-30 22:17   ` tip-bot for Anna-Maria Gleixner [this message]
2019-08-01 15:58   ` [tip:timers/core] " tip-bot for Anna-Maria Gleixner
2019-08-01 19:04   ` tip-bot for Anna-Maria Gleixner
2019-08-20 13:26     ` Frederic Weisbecker
2019-08-23  2:12       ` [tip: timers/core] hrtimer: Improve comments on handling priority inversion against softirq kthread tip-bot2 for Frederic Weisbecker
2019-07-26 18:31 ` [patch 12/12] timers: Prepare support for PREEMPT_RT Thomas Gleixner
2019-07-30 22:17   ` [tip:timers/core] " tip-bot for Anna-Maria Gleixner
2019-08-01 15:59   ` tip-bot for Anna-Maria Gleixner
2019-08-01 19:04   ` tip-bot for Anna-Maria Gleixner
2019-07-29 19:45 ` [patch 00/12] (hr)timers: Prepare for PREEMPT_RT support Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=tip-10521d890c650472e49bbbb4cf415f0fa6c29d4f@git.kernel.org \
    --to=tipbot@zytor.com \
    --cc=anna-maria@linutronix.de \
    --cc=bigeasy@linutronix.de \
    --cc=hpa@zytor.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-tip-commits@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=peterz@infradead.org \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

LKML Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/lkml/0 lkml/git/0.git
	git clone --mirror https://lore.kernel.org/lkml/1 lkml/git/1.git
	git clone --mirror https://lore.kernel.org/lkml/2 lkml/git/2.git
	git clone --mirror https://lore.kernel.org/lkml/3 lkml/git/3.git
	git clone --mirror https://lore.kernel.org/lkml/4 lkml/git/4.git
	git clone --mirror https://lore.kernel.org/lkml/5 lkml/git/5.git
	git clone --mirror https://lore.kernel.org/lkml/6 lkml/git/6.git
	git clone --mirror https://lore.kernel.org/lkml/7 lkml/git/7.git
	git clone --mirror https://lore.kernel.org/lkml/8 lkml/git/8.git
	git clone --mirror https://lore.kernel.org/lkml/9 lkml/git/9.git
	git clone --mirror https://lore.kernel.org/lkml/10 lkml/git/10.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 lkml lkml/ https://lore.kernel.org/lkml \
		linux-kernel@vger.kernel.org
	public-inbox-index lkml

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-kernel


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git