linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Thomas Gleixner <tglx@linutronix.de>
To: LKML <linux-kernel@vger.kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>,
	Ingo Molnar <mingo@kernel.org>,
	Juri Lelli <juri.lelli@redhat.com>,
	Steven Rostedt <rostedt@goodmis.org>,
	Daniel Bristot de Oliveira <bristot@redhat.com>,
	Will Deacon <will@kernel.org>, Waiman Long <longman@redhat.com>,
	Boqun Feng <boqun.feng@gmail.com>,
	Sebastian Andrzej Siewior <bigeasy@linutronix.de>,
	Davidlohr Bueso <dave@stgolabs.net>
Subject: [patch 16/63] locking/rwsem: Add rtmutex based R/W semaphore implementation
Date: Fri, 30 Jul 2021 15:50:23 +0200	[thread overview]
Message-ID: <20210730135206.074657252@linutronix.de> (raw)
In-Reply-To: <20210730135007.155909613@linutronix.de>

From: Thomas Gleixner <tglx@linutronix.de>

The RT specific R/W semaphore implementation used to restrict the number of
readers to one because a writer cannot block on multiple readers and
inherit its priority or budget.

The single reader restricting was painful in various ways:

 - Performance bottleneck for multi-threaded applications in the page fault
   path (mmap sem)

 - Progress blocker for drivers which are carefully crafted to avoid the
   potential reader/writer deadlock in mainline.

The analysis of the writer code paths shows, that properly written RT tasks
should not take them. Syscalls like mmap(), file access which take mmap sem
write locked have unbound latencies which are completely unrelated to mmap
sem. Other R/W sem users like graphics drivers are not suitable for RT tasks
either.

So there is little risk to hurt RT tasks when the RT rwsem implementation is
done in the following way:

 - Allow concurrent readers

 - Make writers block until the last reader left the critical section. This
   blocking is not subject to priority/budget inheritance.

 - Readers blocked on a writer inherit their priority/budget in the normal
   way.

There is a drawback with this scheme. R/W semaphores become writer unfair
though the applications which have triggered writer starvation (mostly on
mmap_sem) in the past are not really the typical workloads running on a RT
system. So while it's unlikely to hit writer starvation, it's possible. If
there are unexpected workloads on RT systems triggering it, the problem
has to be revisited.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
V2: Fix indent fail (Peter Z)
---
 include/linux/rwsem.h  |   58 ++++++++++++++++++++++++++
 kernel/locking/rwsem.c |  108 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 166 insertions(+)
---
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -16,6 +16,9 @@
 #include <linux/spinlock.h>
 #include <linux/atomic.h>
 #include <linux/err.h>
+
+#ifndef CONFIG_PREEMPT_RT
+
 #ifdef CONFIG_RWSEM_SPIN_ON_OWNER
 #include <linux/osq_lock.h>
 #endif
@@ -119,6 +122,61 @@ static inline int rwsem_is_contended(str
 	return !list_empty(&sem->wait_list);
 }
 
+#else /* !CONFIG_PREEMPT_RT */
+
+#include <linux/rwbase_rt.h>
+
+struct rw_semaphore {
+	struct rwbase_rt	rwbase;
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	struct lockdep_map	dep_map;
+#endif
+};
+
+#define __RWSEM_INITIALIZER(name)				\
+	{							\
+		.rwbase = __RWBASE_INITIALIZER(name),		\
+		RW_DEP_MAP_INIT(name)				\
+	}
+
+#define DECLARE_RWSEM(lockname) \
+	struct rw_semaphore lockname = __RWSEM_INITIALIZER(lockname)
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+extern void  __rwsem_init(struct rw_semaphore *rwsem, const char *name,
+			  struct lock_class_key *key);
+#else
+static inline void  __rwsem_init(struct rw_semaphore *rwsem, const char *name,
+				 struct lock_class_key *key)
+{
+}
+#endif
+
+#define init_rwsem(sem)						\
+do {								\
+	static struct lock_class_key __key;			\
+								\
+	init_rwbase_rt(&(sem)->rwbase);			\
+	__rwsem_init((sem), #sem, &__key);			\
+} while (0)
+
+static __always_inline int rwsem_is_locked(struct rw_semaphore *sem)
+{
+	return rw_base_is_locked(&sem->rwbase);
+}
+
+static __always_inline int rwsem_is_contended(struct rw_semaphore *sem)
+{
+	return rw_base_is_contended(&sem->rwbase);
+}
+
+#endif /* CONFIG_PREEMPT_RT */
+
+/*
+ * The functions below are the same for all rwsem implementations including
+ * the RT specific variant.
+ */
+
 /*
  * lock for reading
  */
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -28,6 +28,7 @@
 #include <linux/rwsem.h>
 #include <linux/atomic.h>
 
+#ifndef CONFIG_PREEMPT_RT
 #include "lock_events.h"
 
 /*
@@ -1344,6 +1345,113 @@ static inline void __downgrade_write(str
 		rwsem_downgrade_wake(sem);
 }
 
+#else /* !CONFIG_PREEMPT_RT */
+
+#include "rtmutex.c"
+
+#define rwbase_set_and_save_current_state(state)	\
+	set_current_state(state)
+
+#define rwbase_restore_current_state()			\
+	__set_current_state(TASK_RUNNING)
+
+#define rwbase_rtmutex_lock_state(rtm, state)		\
+	__rt_mutex_lock(rtm, state)
+
+#define rwbase_rtmutex_slowlock_locked(rtm, state)	\
+	__rt_mutex_slowlock_locked(rtm, state)
+
+#define rwbase_rtmutex_unlock(rtm)			\
+	__rt_mutex_unlock(rtm)
+
+#define rwbase_rtmutex_trylock(rtm)			\
+	__rt_mutex_trylock(rtm)
+
+#define rwbase_signal_pending_state(state, current)	\
+	signal_pending_state(state, current)
+
+#define rwbase_schedule()				\
+	schedule()
+
+#include "rwbase_rt.c"
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+void __rwsem_init(struct rw_semaphore *sem, const char *name,
+		  struct lock_class_key *key)
+{
+	debug_check_no_locks_freed((void *)sem, sizeof(*sem));
+	lockdep_init_map(&sem->dep_map, name, key, 0);
+}
+EXPORT_SYMBOL(__rwsem_init);
+#endif
+
+static inline void __down_read(struct rw_semaphore *sem)
+{
+	rwbase_read_lock(&sem->rwbase, TASK_UNINTERRUPTIBLE);
+}
+
+static inline int __down_read_interruptible(struct rw_semaphore *sem)
+{
+	return rwbase_read_lock(&sem->rwbase, TASK_INTERRUPTIBLE);
+}
+
+static inline int __down_read_killable(struct rw_semaphore *sem)
+{
+	return rwbase_read_lock(&sem->rwbase, TASK_KILLABLE);
+}
+
+static inline int __down_read_trylock(struct rw_semaphore *sem)
+{
+	return rwbase_read_trylock(&sem->rwbase);
+}
+
+static inline void __up_read(struct rw_semaphore *sem)
+{
+	rwbase_read_unlock(&sem->rwbase, TASK_NORMAL);
+}
+
+static inline void __sched __down_write(struct rw_semaphore *sem)
+{
+	rwbase_write_lock(&sem->rwbase, TASK_UNINTERRUPTIBLE);
+}
+
+static inline int __sched __down_write_killable(struct rw_semaphore *sem)
+{
+	return rwbase_write_lock(&sem->rwbase, TASK_KILLABLE);
+}
+
+static inline int __down_write_trylock(struct rw_semaphore *sem)
+{
+	return rwbase_write_trylock(&sem->rwbase);
+}
+
+static inline void __up_write(struct rw_semaphore *sem)
+{
+	rwbase_write_unlock(&sem->rwbase);
+}
+
+static inline void __downgrade_write(struct rw_semaphore *sem)
+{
+	rwbase_write_downgrade(&sem->rwbase);
+}
+
+/* Debug stubs for the common API */
+#define DEBUG_RWSEMS_WARN_ON(c, sem)
+
+static inline void __rwsem_set_reader_owned(struct rw_semaphore *sem,
+					    struct task_struct *owner)
+{
+}
+
+static inline bool is_rwsem_reader_owned(struct rw_semaphore *sem)
+{
+	int count = atomic_read(&sem->rwbase.readers);
+
+	return count < 0 && count != READER_BIAS;
+}
+
+#endif /* CONFIG_PREEMPT_RT */
+
 /*
  * lock for reading
  */


  parent reply	other threads:[~2021-07-30 14:23 UTC|newest]

Thread overview: 92+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-07-30 13:50 [patch 00/63] locking, sched: The PREEMPT-RT locking infrastructure Thomas Gleixner
2021-07-30 13:50 ` [patch 01/63] sched: Split out the wakeup state check Thomas Gleixner
2021-07-30 13:50 ` [patch 02/63] sched: Introduce TASK_RTLOCK_WAIT Thomas Gleixner
2021-07-30 13:50 ` [patch 03/63] sched: Prepare for RT sleeping spin/rwlocks Thomas Gleixner
2021-08-01 15:30   ` Mike Galbraith
2021-08-03  9:48     ` Peter Zijlstra
2021-08-03 14:04       ` Thomas Gleixner
2021-08-03 14:51         ` Peter Zijlstra
2021-08-03 20:11           ` Thomas Gleixner
2021-07-30 13:50 ` [patch 04/63] sched: Rework the __schedule() preempt argument Thomas Gleixner
2021-07-30 13:50 ` [patch 05/63] sched: Provide schedule point for RT locks Thomas Gleixner
2021-07-30 13:50 ` [patch 06/63] sched/wake_q: Provide WAKE_Q_HEAD_INITIALIZER Thomas Gleixner
2021-07-30 13:50 ` [patch 07/63] media/atomisp: Use lockdep instead of *mutex_is_locked() Thomas Gleixner
2021-07-30 13:50 ` [patch 08/63] rtmutex: Remove rt_mutex_is_locked() Thomas Gleixner
2021-07-30 13:50 ` [patch 09/63] rtmutex: Convert macros to inlines Thomas Gleixner
2021-07-30 13:50 ` [patch 10/63] rtmutex: Switch to try_cmpxchg() Thomas Gleixner
2021-07-30 13:50 ` [patch 11/63] rtmutex: Split API and implementation Thomas Gleixner
2021-07-30 13:50 ` [patch 12/63] rtmutex: Split out the inner parts of struct rtmutex Thomas Gleixner
2021-07-30 13:50 ` [patch 13/63] locking/rtmutex: Provide rt_mutex_slowlock_locked() Thomas Gleixner
2021-07-30 13:50 ` [patch 14/63] rtmutex: Provide rt_mutex_base_is_locked() Thomas Gleixner
2021-07-30 13:50 ` [patch 15/63] locking: Add base code for RT rw_semaphore and rwlock Thomas Gleixner
2021-08-04 19:37   ` Waiman Long
2021-08-05  9:04     ` Thomas Gleixner
2021-08-05 14:59       ` Waiman Long
2021-07-30 13:50 ` Thomas Gleixner [this message]
2021-07-30 13:50 ` [patch 17/63] locking/rtmutex: Add wake_state to rt_mutex_waiter Thomas Gleixner
2021-07-30 13:50 ` [patch 18/63] locking/rtmutex: Provide rt_wake_q and helpers Thomas Gleixner
2021-07-30 13:50 ` [patch 19/63] locking/rtmutex: Use rt_mutex_wake_q_head Thomas Gleixner
2021-07-30 13:50 ` [patch 20/63] locking/rtmutex: Prepare RT rt_mutex_wake_q for RT locks Thomas Gleixner
2021-07-30 13:50 ` [patch 21/63] locking/rtmutex: Guard regular sleeping locks specific functions Thomas Gleixner
2021-07-30 13:50 ` [patch 22/63] locking/spinlock: Split the lock types header Thomas Gleixner
2021-08-04 21:17   ` Waiman Long
2021-08-05  8:54     ` Thomas Gleixner
2021-07-30 13:50 ` [patch 23/63] locking/rtmutex: Prevent future include recursion hell Thomas Gleixner
2021-07-30 13:50 ` [patch 24/63] locking/lockdep: Reduce includes in debug_locks.h Thomas Gleixner
2021-07-30 13:50 ` [patch 25/63] rbtree: Split out the rbtree type definitions Thomas Gleixner
2021-07-30 13:50 ` [patch 26/63] locking/rtmutex: Include only rbtree types Thomas Gleixner
2021-07-30 13:50 ` [patch 27/63] locking/spinlock: Provide RT specific spinlock type Thomas Gleixner
2021-07-30 13:50 ` [patch 28/63] locking/spinlock: Provide RT variant header Thomas Gleixner
2021-07-30 13:50 ` [patch 29/63] locking/rtmutex: Provide the spin/rwlock core lock function Thomas Gleixner
2021-07-30 13:50 ` [patch 30/63] locking/spinlock: Provide RT variant Thomas Gleixner
2021-08-04 23:34   ` Waiman Long
2021-08-05  8:54     ` Thomas Gleixner
2021-07-30 13:50 ` [patch 31/63] locking/rwlock: " Thomas Gleixner
2021-07-30 13:50 ` [patch 32/63] locking/mutex: Consolidate core headers Thomas Gleixner
2021-07-30 13:50 ` [patch 33/63] locking/mutex: Move waiter to core header Thomas Gleixner
2021-07-30 13:50 ` [patch 34/63] locking/ww_mutex: Move ww_mutex declarations into ww_mutex.h Thomas Gleixner
2021-07-30 13:50 ` [patch 35/63] locking/mutex: Make mutex::wait_lock raw Thomas Gleixner
2021-07-30 13:50 ` [patch 36/63] locking/ww_mutex: Simplify lockdep annotation Thomas Gleixner
2021-07-30 13:50 ` [patch 37/63] locking/ww_mutex: Gather mutex_waiter initialization Thomas Gleixner
2021-07-30 13:50 ` [patch 38/63] locking/ww_mutex: Split up ww_mutex_unlock() Thomas Gleixner
2021-07-30 13:50 ` [patch 39/63] locking/ww_mutex: Split W/W implementation logic Thomas Gleixner
2021-07-30 13:50 ` [patch 40/63] locking/ww_mutex: Remove __sched annotation Thomas Gleixner
2021-07-30 13:50 ` [patch 41/63] locking/ww_mutex: Abstract waiter iteration Thomas Gleixner
2021-07-30 13:50 ` [patch 42/63] locking/ww_mutex: Abstract waiter enqueueing Thomas Gleixner
2021-07-30 13:50 ` [patch 43/63] locking/ww_mutex: Abstract mutex accessors Thomas Gleixner
2021-07-30 13:50 ` [patch 44/63] locking/ww_mutex: Abstract mutex types Thomas Gleixner
2021-07-30 13:50 ` [patch 45/63] locking/ww_mutex: Abstract internal lock access Thomas Gleixner
2021-07-30 13:50 ` [patch 46/63] locking/ww_mutex: Implement rt_mutex accessors Thomas Gleixner
2021-07-30 13:50 ` [patch 47/63] locking/ww_mutex: Add RT priority to W/W order Thomas Gleixner
2021-07-30 13:50 ` [patch 48/63] locking/ww_mutex: Add rt_mutex based lock type and accessors Thomas Gleixner
2021-07-30 13:50 ` [patch 49/63] locking/rtmutex: Extend the rtmutex core to support ww_mutex Thomas Gleixner
2021-07-30 13:50 ` [patch 50/63] locking/ww_mutex: Implement rtmutex based ww_mutex API functions Thomas Gleixner
2021-07-31 13:26   ` Mike Galbraith
2021-08-01 21:18     ` Thomas Gleixner
2021-07-30 13:50 ` [patch 51/63] locking/rtmutex: Add mutex variant for RT Thomas Gleixner
2021-07-30 13:50 ` [patch 52/63] lib/test_lockup: Adapt to changed variables Thomas Gleixner
2021-07-30 13:51 ` [patch 53/63] futex: Validate waiter correctly in futex_proxy_trylock_atomic() Thomas Gleixner
2021-07-30 13:51 ` [patch 54/63] futex: Cleanup stale comments Thomas Gleixner
2021-07-30 13:51 ` [patch 55/63] futex: Correct the number of requeued waiters for PI Thomas Gleixner
2021-07-30 13:51 ` [patch 56/63] futex: Restructure futex_requeue() Thomas Gleixner
2021-07-30 13:51 ` [patch 57/63] futex: Clarify comment in futex_requeue() Thomas Gleixner
2021-07-30 13:51 ` [patch 58/63] futex: Prevent requeue_pi() lock nesting issue on RT Thomas Gleixner
2021-08-02 12:56   ` Peter Zijlstra
2021-08-02 13:10     ` Peter Zijlstra
2021-08-02 14:35       ` Thomas Gleixner
2021-08-02 14:34     ` Thomas Gleixner
2021-08-03 10:28     ` Peter Zijlstra
2021-08-03 21:10       ` Thomas Gleixner
2021-08-03 10:07   ` Peter Zijlstra
2021-08-03 21:10     ` Thomas Gleixner
2021-08-03 11:20   ` Peter Zijlstra
2021-08-03 21:22     ` Thomas Gleixner
2021-07-30 13:51 ` [patch 59/63] rtmutex: Prevent lockdep false positive with PI futexes Thomas Gleixner
2021-07-30 13:51 ` [patch 60/63] preempt: Adjust PREEMPT_LOCK_OFFSET for RT Thomas Gleixner
2021-07-30 13:51 ` [patch 61/63] locking/rtmutex: Implement equal priority lock stealing Thomas Gleixner
2021-07-30 13:51 ` [patch 62/63] locking/rtmutex: Add adaptive spinwait mechanism Thomas Gleixner
2021-08-04 12:30   ` Peter Zijlstra
2021-08-04 17:49     ` Thomas Gleixner
2021-07-30 13:51 ` [patch 63/63] locking/rtmutex: Use adaptive spinwait for all rtmutex based locks Thomas Gleixner
     [not found] ` <20210803063217.2325-1-hdanton@sina.com>
2021-08-03  9:10   ` [patch 30/63] locking/spinlock: Provide RT variant Thomas Gleixner
2021-08-03 12:37 ` [patch 00/63] locking, sched: The PREEMPT-RT locking infrastructure Daniel Bristot de Oliveira

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210730135206.074657252@linutronix.de \
    --to=tglx@linutronix.de \
    --cc=bigeasy@linutronix.de \
    --cc=boqun.feng@gmail.com \
    --cc=bristot@redhat.com \
    --cc=dave@stgolabs.net \
    --cc=juri.lelli@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=longman@redhat.com \
    --cc=mingo@kernel.org \
    --cc=peterz@infradead.org \
    --cc=rostedt@goodmis.org \
    --cc=will@kernel.org \
    --subject='Re: [patch 16/63] locking/rwsem: Add rtmutex based R/W semaphore implementation' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).