linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Thomas Gleixner <tglx@linutronix.de>
To: LKML <linux-kernel@vger.kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>,
	Ingo Molnar <mingo@kernel.org>,
	Juri Lelli <juri.lelli@redhat.com>,
	Steven Rostedt <rostedt@goodmis.org>,
	Daniel Bristot de Oliveira <bristot@redhat.com>,
	Will Deacon <will@kernel.org>, Waiman Long <longman@redhat.com>,
	Boqun Feng <boqun.feng@gmail.com>,
	Sebastian Andrzej Siewior <bigeasy@linutronix.de>,
	Davidlohr Bueso <dave@stgolabs.net>
Subject: [patch 03/63] sched: Prepare for RT sleeping spin/rwlocks
Date: Fri, 30 Jul 2021 15:50:10 +0200	[thread overview]
Message-ID: <20210730135205.317820700@linutronix.de> (raw)
In-Reply-To: <20210730135007.155909613@linutronix.de>

From: Thomas Gleixner <tglx@linutronix.de>

Waiting for spinlocks and rwlocks on non RT enabled kernels is task::state
preserving. Any wakeup which matches the state is valid.

RT enabled kernels substitutes them with 'sleeping' spinlocks. This creates
an issue vs. task::state.

In order to block on the lock the task has to overwrite task::state and a
consecutive wakeup issued by the unlocker sets the state back to
TASK_RUNNING. As a consequence the task loses the state which was set
before the lock acquire and also any regular wakeup targeted at the task
while it is blocked on the lock.

To handle this gracefully add a 'saved_state' member to task_struct which
is used in the following way:

 1) When a task blocks on a 'sleeping' spinlock, the current state is saved
    in task::saved_state before it is set to TASK_RTLOCK_WAIT.

 2) When the task unblocks and after acquiring the lock, it restores the saved
    state.

 3) When a regular wakeup happens for a task while it is blocked then the
    state change of that wakeup is redirected to operate on task::saved_state.

    This is also required when the task state is running because the task
    might have been woken up from the lock wait and has not yet restored
    the saved state.

To make it complete provide the necessary helpers to save and restore the
saved state along with the necessary documentation how the RT lock blocking
is supposed to work.

For non-RT kernels there is no functional change.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/sched.h |   70 ++++++++++++++++++++++++++++++++++++++++++++++++++
 kernel/sched/core.c   |   33 +++++++++++++++++++++++
 2 files changed, 103 insertions(+)
---
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -155,6 +155,27 @@ struct task_group;
 		WRITE_ONCE(current->__state, (state_value));		\
 		raw_spin_unlock_irqrestore(&current->pi_lock, flags);	\
 	} while (0)
+
+
+#define current_save_and_set_rtlock_wait_state()			\
+	do {								\
+		raw_spin_lock(&current->pi_lock);			\
+		current->saved_state = current->__state;		\
+		current->saved_state_change = current->task_state_change;\
+		current->task_state_change = _THIS_IP_;			\
+		WRITE_ONCE(current->__state, TASK_RTLOCK_WAIT);		\
+		raw_spin_unlock(&current->pi_lock);			\
+	} while (0);
+
+#define current_restore_rtlock_saved_state()				\
+	do {								\
+		raw_spin_lock(&current->pi_lock);			\
+		current->task_state_change = current->saved_state_change;\
+		WRITE_ONCE(current->__state, current->saved_state);	\
+		current->saved_state = TASK_RUNNING;			\
+		raw_spin_unlock(&current->pi_lock);			\
+	} while (0);
+
 #else
 /*
  * set_current_state() includes a barrier so that the write of current->state
@@ -213,6 +234,47 @@ struct task_group;
 		raw_spin_unlock_irqrestore(&current->pi_lock, flags);	\
 	} while (0)
 
+/*
+ * PREEMPT_RT specific variants for "sleeping" spin/rwlocks
+ *
+ * RT's spin/rwlock substitutions are state preserving. The state of the
+ * task when blocking on the lock is saved in task_struct::saved_state and
+ * restored after the lock has been acquired.  These operations are
+ * serialized by task_struct::pi_lock against try_to_wake_up(). Any non RT
+ * lock related wakeups while the task is blocked on the lock are
+ * redirected to operate on task_struct::saved_state to ensure that these
+ * are not dropped. On restore task_struct::saved_state is set to
+ * TASK_RUNNING so any wakeup attempt redirected to saved_state will fail.
+ *
+ * The lock operation looks like this:
+ *
+ *	current_save_and_set_rtlock_wait_state();
+ *	for (;;) {
+ *		if (try_lock())
+ *			break;
+ *		raw_spin_unlock_irq(&lock->wait_lock);
+ *		schedule_rtlock();
+ *		raw_spin_lock_irq(&lock->wait_lock);
+ *		set_current_state(TASK_RTLOCK_WAIT);
+ *	}
+ *	current_restore_rtlock_saved_state();
+ */
+#define current_save_and_set_rtlock_wait_state()			\
+	do {								\
+		raw_spin_lock(&current->pi_lock);			\
+		current->saved_state = current->__state;		\
+		WRITE_ONCE(current->__state, TASK_RTLOCK_WAIT);		\
+		raw_spin_unlock(&current->pi_lock);			\
+	} while (0);
+
+#define current_restore_rtlock_saved_state()				\
+	do {								\
+		raw_spin_lock(&current->pi_lock);			\
+		WRITE_ONCE(current->__state, current->saved_state);	\
+		current->saved_state = TASK_RUNNING;			\
+		raw_spin_unlock(&current->pi_lock);			\
+	} while (0);
+
 #endif
 
 #define get_current_state()	READ_ONCE(current->__state)
@@ -670,6 +732,11 @@ struct task_struct {
 #endif
 	unsigned int			__state;
 
+#ifdef CONFIG_PREEMPT_RT
+	/* saved state for "spinlock sleepers" */
+	unsigned int			saved_state;
+#endif
+
 	/*
 	 * This begins the randomizable portion of task_struct. Only
 	 * scheduling-critical items should be added above here.
@@ -1359,6 +1426,9 @@ struct task_struct {
 	struct kmap_ctrl		kmap_ctrl;
 #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
 	unsigned long			task_state_change;
+# ifdef CONFIG_PREEMPT_RT
+	unsigned long			saved_state_change;
+# endif
 #endif
 	int				pagefault_disabled;
 #ifdef CONFIG_MMU
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3568,14 +3568,47 @@ static void ttwu_queue(struct task_struc
  *
  * The caller holds p::pi_lock if p != current or has preemption
  * disabled when p == current.
+ *
+ * The rules of PREEMPT_RT saved_state:
+ *
+ *   The related locking code always holds p::pi_lock when updating
+ *   p::saved_state, which means the code is fully serialized in both cases.
+ *
+ *   The lock wait and lock wakeups happen via TASK_RTLOCK_WAIT. No other
+ *   bits set. This allows to distinguish all wakeup scenarios.
  */
 static __always_inline
 bool ttwu_state_match(struct task_struct *p, unsigned int state, int *success)
 {
+	if (IS_ENABLED(CONFIG_DEBUG_PREEMPT)) {
+		WARN_ON_ONCE((state & TASK_RTLOCK_WAIT) &&
+			     state != TASK_RTLOCK_WAIT);
+	}
+
 	if (READ_ONCE(p->__state) & state) {
 		*success = 1;
 		return true;
 	}
+
+#ifdef CONFIG_PREEMPT_RT
+	/*
+	 * Saved state preserves the task state accross blocking on
+	 * a RT lock.  If the state matches, set p::saved_state to
+	 * TASK_RUNNING, but do not wake the task because it waits
+	 * for a lock wakeup. Also indicate success because from
+	 * the regular waker's point of view this has succeeded.
+	 *
+	 * After acquiring the lock the task will restore p::state
+	 * from p::saved_state which ensures that the regular
+	 * wakeup is not lost. The restore will also set
+	 * p::saved_state to TASK_RUNNING so any further tests will
+	 * not result in false positives vs. @success
+	 */
+	if (p->saved_state & state) {
+		p->saved_state = TASK_RUNNING;
+		*success = 1;
+	}
+#endif
 	return false;
 }
 


  parent reply	other threads:[~2021-07-30 14:19 UTC|newest]

Thread overview: 92+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-07-30 13:50 [patch 00/63] locking, sched: The PREEMPT-RT locking infrastructure Thomas Gleixner
2021-07-30 13:50 ` [patch 01/63] sched: Split out the wakeup state check Thomas Gleixner
2021-07-30 13:50 ` [patch 02/63] sched: Introduce TASK_RTLOCK_WAIT Thomas Gleixner
2021-07-30 13:50 ` Thomas Gleixner [this message]
2021-08-01 15:30   ` [patch 03/63] sched: Prepare for RT sleeping spin/rwlocks Mike Galbraith
2021-08-03  9:48     ` Peter Zijlstra
2021-08-03 14:04       ` Thomas Gleixner
2021-08-03 14:51         ` Peter Zijlstra
2021-08-03 20:11           ` Thomas Gleixner
2021-07-30 13:50 ` [patch 04/63] sched: Rework the __schedule() preempt argument Thomas Gleixner
2021-07-30 13:50 ` [patch 05/63] sched: Provide schedule point for RT locks Thomas Gleixner
2021-07-30 13:50 ` [patch 06/63] sched/wake_q: Provide WAKE_Q_HEAD_INITIALIZER Thomas Gleixner
2021-07-30 13:50 ` [patch 07/63] media/atomisp: Use lockdep instead of *mutex_is_locked() Thomas Gleixner
2021-07-30 13:50 ` [patch 08/63] rtmutex: Remove rt_mutex_is_locked() Thomas Gleixner
2021-07-30 13:50 ` [patch 09/63] rtmutex: Convert macros to inlines Thomas Gleixner
2021-07-30 13:50 ` [patch 10/63] rtmutex: Switch to try_cmpxchg() Thomas Gleixner
2021-07-30 13:50 ` [patch 11/63] rtmutex: Split API and implementation Thomas Gleixner
2021-07-30 13:50 ` [patch 12/63] rtmutex: Split out the inner parts of struct rtmutex Thomas Gleixner
2021-07-30 13:50 ` [patch 13/63] locking/rtmutex: Provide rt_mutex_slowlock_locked() Thomas Gleixner
2021-07-30 13:50 ` [patch 14/63] rtmutex: Provide rt_mutex_base_is_locked() Thomas Gleixner
2021-07-30 13:50 ` [patch 15/63] locking: Add base code for RT rw_semaphore and rwlock Thomas Gleixner
2021-08-04 19:37   ` Waiman Long
2021-08-05  9:04     ` Thomas Gleixner
2021-08-05 14:59       ` Waiman Long
2021-07-30 13:50 ` [patch 16/63] locking/rwsem: Add rtmutex based R/W semaphore implementation Thomas Gleixner
2021-07-30 13:50 ` [patch 17/63] locking/rtmutex: Add wake_state to rt_mutex_waiter Thomas Gleixner
2021-07-30 13:50 ` [patch 18/63] locking/rtmutex: Provide rt_wake_q and helpers Thomas Gleixner
2021-07-30 13:50 ` [patch 19/63] locking/rtmutex: Use rt_mutex_wake_q_head Thomas Gleixner
2021-07-30 13:50 ` [patch 20/63] locking/rtmutex: Prepare RT rt_mutex_wake_q for RT locks Thomas Gleixner
2021-07-30 13:50 ` [patch 21/63] locking/rtmutex: Guard regular sleeping locks specific functions Thomas Gleixner
2021-07-30 13:50 ` [patch 22/63] locking/spinlock: Split the lock types header Thomas Gleixner
2021-08-04 21:17   ` Waiman Long
2021-08-05  8:54     ` Thomas Gleixner
2021-07-30 13:50 ` [patch 23/63] locking/rtmutex: Prevent future include recursion hell Thomas Gleixner
2021-07-30 13:50 ` [patch 24/63] locking/lockdep: Reduce includes in debug_locks.h Thomas Gleixner
2021-07-30 13:50 ` [patch 25/63] rbtree: Split out the rbtree type definitions Thomas Gleixner
2021-07-30 13:50 ` [patch 26/63] locking/rtmutex: Include only rbtree types Thomas Gleixner
2021-07-30 13:50 ` [patch 27/63] locking/spinlock: Provide RT specific spinlock type Thomas Gleixner
2021-07-30 13:50 ` [patch 28/63] locking/spinlock: Provide RT variant header Thomas Gleixner
2021-07-30 13:50 ` [patch 29/63] locking/rtmutex: Provide the spin/rwlock core lock function Thomas Gleixner
2021-07-30 13:50 ` [patch 30/63] locking/spinlock: Provide RT variant Thomas Gleixner
2021-08-04 23:34   ` Waiman Long
2021-08-05  8:54     ` Thomas Gleixner
2021-07-30 13:50 ` [patch 31/63] locking/rwlock: " Thomas Gleixner
2021-07-30 13:50 ` [patch 32/63] locking/mutex: Consolidate core headers Thomas Gleixner
2021-07-30 13:50 ` [patch 33/63] locking/mutex: Move waiter to core header Thomas Gleixner
2021-07-30 13:50 ` [patch 34/63] locking/ww_mutex: Move ww_mutex declarations into ww_mutex.h Thomas Gleixner
2021-07-30 13:50 ` [patch 35/63] locking/mutex: Make mutex::wait_lock raw Thomas Gleixner
2021-07-30 13:50 ` [patch 36/63] locking/ww_mutex: Simplify lockdep annotation Thomas Gleixner
2021-07-30 13:50 ` [patch 37/63] locking/ww_mutex: Gather mutex_waiter initialization Thomas Gleixner
2021-07-30 13:50 ` [patch 38/63] locking/ww_mutex: Split up ww_mutex_unlock() Thomas Gleixner
2021-07-30 13:50 ` [patch 39/63] locking/ww_mutex: Split W/W implementation logic Thomas Gleixner
2021-07-30 13:50 ` [patch 40/63] locking/ww_mutex: Remove __sched annotation Thomas Gleixner
2021-07-30 13:50 ` [patch 41/63] locking/ww_mutex: Abstract waiter iteration Thomas Gleixner
2021-07-30 13:50 ` [patch 42/63] locking/ww_mutex: Abstract waiter enqueueing Thomas Gleixner
2021-07-30 13:50 ` [patch 43/63] locking/ww_mutex: Abstract mutex accessors Thomas Gleixner
2021-07-30 13:50 ` [patch 44/63] locking/ww_mutex: Abstract mutex types Thomas Gleixner
2021-07-30 13:50 ` [patch 45/63] locking/ww_mutex: Abstract internal lock access Thomas Gleixner
2021-07-30 13:50 ` [patch 46/63] locking/ww_mutex: Implement rt_mutex accessors Thomas Gleixner
2021-07-30 13:50 ` [patch 47/63] locking/ww_mutex: Add RT priority to W/W order Thomas Gleixner
2021-07-30 13:50 ` [patch 48/63] locking/ww_mutex: Add rt_mutex based lock type and accessors Thomas Gleixner
2021-07-30 13:50 ` [patch 49/63] locking/rtmutex: Extend the rtmutex core to support ww_mutex Thomas Gleixner
2021-07-30 13:50 ` [patch 50/63] locking/ww_mutex: Implement rtmutex based ww_mutex API functions Thomas Gleixner
2021-07-31 13:26   ` Mike Galbraith
2021-08-01 21:18     ` Thomas Gleixner
2021-07-30 13:50 ` [patch 51/63] locking/rtmutex: Add mutex variant for RT Thomas Gleixner
2021-07-30 13:50 ` [patch 52/63] lib/test_lockup: Adapt to changed variables Thomas Gleixner
2021-07-30 13:51 ` [patch 53/63] futex: Validate waiter correctly in futex_proxy_trylock_atomic() Thomas Gleixner
2021-07-30 13:51 ` [patch 54/63] futex: Cleanup stale comments Thomas Gleixner
2021-07-30 13:51 ` [patch 55/63] futex: Correct the number of requeued waiters for PI Thomas Gleixner
2021-07-30 13:51 ` [patch 56/63] futex: Restructure futex_requeue() Thomas Gleixner
2021-07-30 13:51 ` [patch 57/63] futex: Clarify comment in futex_requeue() Thomas Gleixner
2021-07-30 13:51 ` [patch 58/63] futex: Prevent requeue_pi() lock nesting issue on RT Thomas Gleixner
2021-08-02 12:56   ` Peter Zijlstra
2021-08-02 13:10     ` Peter Zijlstra
2021-08-02 14:35       ` Thomas Gleixner
2021-08-02 14:34     ` Thomas Gleixner
2021-08-03 10:28     ` Peter Zijlstra
2021-08-03 21:10       ` Thomas Gleixner
2021-08-03 10:07   ` Peter Zijlstra
2021-08-03 21:10     ` Thomas Gleixner
2021-08-03 11:20   ` Peter Zijlstra
2021-08-03 21:22     ` Thomas Gleixner
2021-07-30 13:51 ` [patch 59/63] rtmutex: Prevent lockdep false positive with PI futexes Thomas Gleixner
2021-07-30 13:51 ` [patch 60/63] preempt: Adjust PREEMPT_LOCK_OFFSET for RT Thomas Gleixner
2021-07-30 13:51 ` [patch 61/63] locking/rtmutex: Implement equal priority lock stealing Thomas Gleixner
2021-07-30 13:51 ` [patch 62/63] locking/rtmutex: Add adaptive spinwait mechanism Thomas Gleixner
2021-08-04 12:30   ` Peter Zijlstra
2021-08-04 17:49     ` Thomas Gleixner
2021-07-30 13:51 ` [patch 63/63] locking/rtmutex: Use adaptive spinwait for all rtmutex based locks Thomas Gleixner
     [not found] ` <20210803063217.2325-1-hdanton@sina.com>
2021-08-03  9:10   ` [patch 30/63] locking/spinlock: Provide RT variant Thomas Gleixner
2021-08-03 12:37 ` [patch 00/63] locking, sched: The PREEMPT-RT locking infrastructure Daniel Bristot de Oliveira

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210730135205.317820700@linutronix.de \
    --to=tglx@linutronix.de \
    --cc=bigeasy@linutronix.de \
    --cc=boqun.feng@gmail.com \
    --cc=bristot@redhat.com \
    --cc=dave@stgolabs.net \
    --cc=juri.lelli@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=longman@redhat.com \
    --cc=mingo@kernel.org \
    --cc=peterz@infradead.org \
    --cc=rostedt@goodmis.org \
    --cc=will@kernel.org \
    --subject='Re: [patch 03/63] sched: Prepare for RT sleeping spin/rwlocks' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).