All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/5] locking: Percpu-rwsem rewrite
@ 2019-11-13 10:21 Peter Zijlstra
  2019-11-13 10:21 ` [PATCH 1/5] locking/percpu-rwsem, lockdep: Make percpu-rwsem use its own lockdep_map Peter Zijlstra
                   ` (5 more replies)
  0 siblings, 6 replies; 23+ messages in thread
From: Peter Zijlstra @ 2019-11-13 10:21 UTC (permalink / raw)
  To: peterz, mingo, will
  Cc: oleg, tglx, linux-kernel, bigeasy, juri.lelli, williams, bristot,
	longman, dave, jack

Yet another version of the percpu-rwsem rewrite..

This one (ab)uses the waitqueue in an entirely different and unique way, but no
longer shares it like it did. It retains the use of rcuwait for the
writer-waiting-for-readers-to-complete condition.

This one should be FIFO fair with writer-stealing.

It seems to pass locktorture torture_type=percpu_rwsem_lock. But as always,
this stuff is tricky, please look carefully.


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH 1/5] locking/percpu-rwsem, lockdep: Make percpu-rwsem use its own lockdep_map
  2019-11-13 10:21 [PATCH 0/5] locking: Percpu-rwsem rewrite Peter Zijlstra
@ 2019-11-13 10:21 ` Peter Zijlstra
  2019-11-15 20:39   ` Davidlohr Bueso
  2019-11-13 10:21 ` [PATCH 2/5] locking/percpu-rwsem: Convert to bool Peter Zijlstra
                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 23+ messages in thread
From: Peter Zijlstra @ 2019-11-13 10:21 UTC (permalink / raw)
  To: peterz, mingo, will
  Cc: oleg, tglx, linux-kernel, bigeasy, juri.lelli, williams, bristot,
	longman, dave, jack

As preparation for replacing the embedded rwsem, give percpu-rwsem its
own lockdep_map.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 include/linux/percpu-rwsem.h  |   29 +++++++++++++++++++----------
 kernel/cpu.c                  |    4 ++--
 kernel/locking/percpu-rwsem.c |   14 +++++++++++---
 kernel/locking/rwsem.c        |    4 ++--
 kernel/locking/rwsem.h        |    2 ++
 5 files changed, 36 insertions(+), 17 deletions(-)

--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h
@@ -15,8 +15,17 @@ struct percpu_rw_semaphore {
 	struct rw_semaphore	rw_sem; /* slowpath */
 	struct rcuwait          writer; /* blocked writer */
 	int			readers_block;
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	struct lockdep_map	dep_map;
+#endif
 };
 
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+#define __PERCPU_RWSEM_DEP_MAP_INIT(lockname)	.dep_map = { .name = #lockname },
+#else
+#define __PERCPU_RWSEM_DEP_MAP_INIT(lockname)
+#endif
+
 #define __DEFINE_PERCPU_RWSEM(name, is_static)				\
 static DEFINE_PER_CPU(unsigned int, __percpu_rwsem_rc_##name);		\
 is_static struct percpu_rw_semaphore name = {				\
@@ -24,7 +33,9 @@ is_static struct percpu_rw_semaphore nam
 	.read_count = &__percpu_rwsem_rc_##name,			\
 	.rw_sem = __RWSEM_INITIALIZER(name.rw_sem),			\
 	.writer = __RCUWAIT_INITIALIZER(name.writer),			\
+	__PERCPU_RWSEM_DEP_MAP_INIT(name)				\
 }
+
 #define DEFINE_PERCPU_RWSEM(name)		\
 	__DEFINE_PERCPU_RWSEM(name, /* not static */)
 #define DEFINE_STATIC_PERCPU_RWSEM(name)	\
@@ -37,7 +48,7 @@ static inline void percpu_down_read(stru
 {
 	might_sleep();
 
-	rwsem_acquire_read(&sem->rw_sem.dep_map, 0, 0, _RET_IP_);
+	rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);
 
 	preempt_disable();
 	/*
@@ -76,13 +87,15 @@ static inline int percpu_down_read_trylo
 	 */
 
 	if (ret)
-		rwsem_acquire_read(&sem->rw_sem.dep_map, 0, 1, _RET_IP_);
+		rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_);
 
 	return ret;
 }
 
 static inline void percpu_up_read(struct percpu_rw_semaphore *sem)
 {
+	rwsem_release(&sem->dep_map, _RET_IP_);
+
 	preempt_disable();
 	/*
 	 * Same as in percpu_down_read().
@@ -92,8 +105,6 @@ static inline void percpu_up_read(struct
 	else
 		__percpu_up_read(sem); /* Unconditional memory barrier */
 	preempt_enable();
-
-	rwsem_release(&sem->rw_sem.dep_map, _RET_IP_);
 }
 
 extern void percpu_down_write(struct percpu_rw_semaphore *);
@@ -110,15 +121,13 @@ extern void percpu_free_rwsem(struct per
 	__percpu_init_rwsem(sem, #sem, &rwsem_key);		\
 })
 
-#define percpu_rwsem_is_held(sem) lockdep_is_held(&(sem)->rw_sem)
-
-#define percpu_rwsem_assert_held(sem)				\
-	lockdep_assert_held(&(sem)->rw_sem)
+#define percpu_rwsem_is_held(sem)	lockdep_is_held(sem)
+#define percpu_rwsem_assert_held(sem)	lockdep_assert_held(sem)
 
 static inline void percpu_rwsem_release(struct percpu_rw_semaphore *sem,
 					bool read, unsigned long ip)
 {
-	lock_release(&sem->rw_sem.dep_map, ip);
+	lock_release(&sem->dep_map, ip);
 #ifdef CONFIG_RWSEM_SPIN_ON_OWNER
 	if (!read)
 		atomic_long_set(&sem->rw_sem.owner, RWSEM_OWNER_UNKNOWN);
@@ -128,7 +137,7 @@ static inline void percpu_rwsem_release(
 static inline void percpu_rwsem_acquire(struct percpu_rw_semaphore *sem,
 					bool read, unsigned long ip)
 {
-	lock_acquire(&sem->rw_sem.dep_map, 0, 1, read, 1, NULL, ip);
+	lock_acquire(&sem->dep_map, 0, 1, read, 1, NULL, ip);
 #ifdef CONFIG_RWSEM_SPIN_ON_OWNER
 	if (!read)
 		atomic_long_set(&sem->rw_sem.owner, (long)current);
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -331,12 +331,12 @@ void lockdep_assert_cpus_held(void)
 
 static void lockdep_acquire_cpus_lock(void)
 {
-	rwsem_acquire(&cpu_hotplug_lock.rw_sem.dep_map, 0, 0, _THIS_IP_);
+	rwsem_acquire(&cpu_hotplug_lock.dep_map, 0, 0, _THIS_IP_);
 }
 
 static void lockdep_release_cpus_lock(void)
 {
-	rwsem_release(&cpu_hotplug_lock.rw_sem.dep_map, _THIS_IP_);
+	rwsem_release(&cpu_hotplug_lock.dep_map, _THIS_IP_);
 }
 
 /*
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -11,7 +11,7 @@
 #include "rwsem.h"
 
 int __percpu_init_rwsem(struct percpu_rw_semaphore *sem,
-			const char *name, struct lock_class_key *rwsem_key)
+			const char *name, struct lock_class_key *key)
 {
 	sem->read_count = alloc_percpu(int);
 	if (unlikely(!sem->read_count))
@@ -22,6 +22,10 @@ int __percpu_init_rwsem(struct percpu_rw
 	__init_rwsem(&sem->rw_sem, name, rwsem_key);
 	rcuwait_init(&sem->writer);
 	sem->readers_block = 0;
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	debug_check_no_locks_freed((void *)sem, sizeof(*sem));
+	lockdep_init_map(&sem->dep_map, name, key, 0);
+#endif
 	return 0;
 }
 EXPORT_SYMBOL_GPL(__percpu_init_rwsem);
@@ -142,10 +146,12 @@ static bool readers_active_check(struct
 
 void percpu_down_write(struct percpu_rw_semaphore *sem)
 {
+	rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
+
 	/* Notify readers to take the slow path. */
 	rcu_sync_enter(&sem->rss);
 
-	down_write(&sem->rw_sem);
+	__down_write(&sem->rw_sem);
 
 	/*
 	 * Notify new readers to block; up until now, and thus throughout the
@@ -168,6 +174,8 @@ EXPORT_SYMBOL_GPL(percpu_down_write);
 
 void percpu_up_write(struct percpu_rw_semaphore *sem)
 {
+	rwsem_release(&sem->dep_map, _RET_IP_);
+
 	/*
 	 * Signal the writer is done, no fast path yet.
 	 *
@@ -183,7 +191,7 @@ void percpu_up_write(struct percpu_rw_se
 	/*
 	 * Release the write lock, this will allow readers back in the game.
 	 */
-	up_write(&sem->rw_sem);
+	__up_write(&sem->rw_sem);
 
 	/*
 	 * Once this completes (at least one RCU-sched grace period hence) the
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -1383,7 +1383,7 @@ static inline int __down_read_trylock(st
 /*
  * lock for writing
  */
-static inline void __down_write(struct rw_semaphore *sem)
+inline void __down_write(struct rw_semaphore *sem)
 {
 	long tmp = RWSEM_UNLOCKED_VALUE;
 
@@ -1446,7 +1446,7 @@ inline void __up_read(struct rw_semaphor
 /*
  * unlock after writing
  */
-static inline void __up_write(struct rw_semaphore *sem)
+inline void __up_write(struct rw_semaphore *sem)
 {
 	long tmp;
 
--- a/kernel/locking/rwsem.h
+++ b/kernel/locking/rwsem.h
@@ -6,5 +6,7 @@
 
 extern void __down_read(struct rw_semaphore *sem);
 extern void __up_read(struct rw_semaphore *sem);
+extern void __down_write(struct rw_semaphore *sem);
+extern void __up_write(struct rw_semaphore *sem);
 
 #endif /* __INTERNAL_RWSEM_H */



^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH 2/5] locking/percpu-rwsem: Convert to bool
  2019-11-13 10:21 [PATCH 0/5] locking: Percpu-rwsem rewrite Peter Zijlstra
  2019-11-13 10:21 ` [PATCH 1/5] locking/percpu-rwsem, lockdep: Make percpu-rwsem use its own lockdep_map Peter Zijlstra
@ 2019-11-13 10:21 ` Peter Zijlstra
  2019-11-13 10:21 ` [PATCH 3/5] locking/percpu-rwsem: Move __this_cpu_inc() into the slowpath Peter Zijlstra
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 23+ messages in thread
From: Peter Zijlstra @ 2019-11-13 10:21 UTC (permalink / raw)
  To: peterz, mingo, will
  Cc: oleg, tglx, linux-kernel, bigeasy, juri.lelli, williams, bristot,
	longman, dave, jack

Use bool where possible.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 include/linux/percpu-rwsem.h  |    6 +++---
 kernel/locking/percpu-rwsem.c |    8 ++++----
 2 files changed, 7 insertions(+), 7 deletions(-)

--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h
@@ -41,7 +41,7 @@ is_static struct percpu_rw_semaphore nam
 #define DEFINE_STATIC_PERCPU_RWSEM(name)	\
 	__DEFINE_PERCPU_RWSEM(name, static)
 
-extern int __percpu_down_read(struct percpu_rw_semaphore *, int);
+extern bool __percpu_down_read(struct percpu_rw_semaphore *, bool);
 extern void __percpu_up_read(struct percpu_rw_semaphore *);
 
 static inline void percpu_down_read(struct percpu_rw_semaphore *sem)
@@ -69,9 +69,9 @@ static inline void percpu_down_read(stru
 	preempt_enable();
 }
 
-static inline int percpu_down_read_trylock(struct percpu_rw_semaphore *sem)
+static inline bool percpu_down_read_trylock(struct percpu_rw_semaphore *sem)
 {
-	int ret = 1;
+	bool ret = true;
 
 	preempt_disable();
 	/*
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -45,7 +45,7 @@ void percpu_free_rwsem(struct percpu_rw_
 }
 EXPORT_SYMBOL_GPL(percpu_free_rwsem);
 
-int __percpu_down_read(struct percpu_rw_semaphore *sem, int try)
+bool __percpu_down_read(struct percpu_rw_semaphore *sem, bool try)
 {
 	/*
 	 * Due to having preemption disabled the decrement happens on
@@ -69,7 +69,7 @@ int __percpu_down_read(struct percpu_rw_
 	 * release in percpu_up_write().
 	 */
 	if (likely(!smp_load_acquire(&sem->readers_block)))
-		return 1;
+		return true;
 
 	/*
 	 * Per the above comment; we still have preemption disabled and
@@ -78,7 +78,7 @@ int __percpu_down_read(struct percpu_rw_
 	__percpu_up_read(sem);
 
 	if (try)
-		return 0;
+		return false;
 
 	/*
 	 * We either call schedule() in the wait, or we'll fall through
@@ -94,7 +94,7 @@ int __percpu_down_read(struct percpu_rw_
 	__up_read(&sem->rw_sem);
 
 	preempt_disable();
-	return 1;
+	return true;
 }
 EXPORT_SYMBOL_GPL(__percpu_down_read);
 



^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH 3/5] locking/percpu-rwsem: Move __this_cpu_inc() into the slowpath
  2019-11-13 10:21 [PATCH 0/5] locking: Percpu-rwsem rewrite Peter Zijlstra
  2019-11-13 10:21 ` [PATCH 1/5] locking/percpu-rwsem, lockdep: Make percpu-rwsem use its own lockdep_map Peter Zijlstra
  2019-11-13 10:21 ` [PATCH 2/5] locking/percpu-rwsem: Convert to bool Peter Zijlstra
@ 2019-11-13 10:21 ` Peter Zijlstra
  2019-11-13 10:21 ` [PATCH 4/5] locking/percpu-rwsem: Extract __percpu_down_read_trylock() Peter Zijlstra
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 23+ messages in thread
From: Peter Zijlstra @ 2019-11-13 10:21 UTC (permalink / raw)
  To: peterz, mingo, will
  Cc: oleg, tglx, linux-kernel, bigeasy, juri.lelli, williams, bristot,
	longman, dave, jack

As preparation to rework __percpu_down_read() move the
__this_cpu_inc() into it.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 include/linux/percpu-rwsem.h  |   10 ++++++----
 kernel/locking/percpu-rwsem.c |    2 ++
 2 files changed, 8 insertions(+), 4 deletions(-)

--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h
@@ -59,8 +59,9 @@ static inline void percpu_down_read(stru
 	 * and that once the synchronize_rcu() is done, the writer will see
 	 * anything we did within this RCU-sched read-size critical section.
 	 */
-	__this_cpu_inc(*sem->read_count);
-	if (unlikely(!rcu_sync_is_idle(&sem->rss)))
+	if (likely(rcu_sync_is_idle(&sem->rss)))
+		__this_cpu_inc(*sem->read_count);
+	else
 		__percpu_down_read(sem, false); /* Unconditional memory barrier */
 	/*
 	 * The preempt_enable() prevents the compiler from
@@ -77,8 +78,9 @@ static inline bool percpu_down_read_tryl
 	/*
 	 * Same as in percpu_down_read().
 	 */
-	__this_cpu_inc(*sem->read_count);
-	if (unlikely(!rcu_sync_is_idle(&sem->rss)))
+	if (likely(rcu_sync_is_idle(&sem->rss)))
+		__this_cpu_inc(*sem->read_count);
+	else
 		ret = __percpu_down_read(sem, true); /* Unconditional memory barrier */
 	preempt_enable();
 	/*
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -47,6 +47,8 @@ EXPORT_SYMBOL_GPL(percpu_free_rwsem);
 
 bool __percpu_down_read(struct percpu_rw_semaphore *sem, bool try)
 {
+	__this_cpu_inc(*sem->read_count);
+
 	/*
 	 * Due to having preemption disabled the decrement happens on
 	 * the same CPU as the increment, avoiding the



^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH 4/5] locking/percpu-rwsem: Extract __percpu_down_read_trylock()
  2019-11-13 10:21 [PATCH 0/5] locking: Percpu-rwsem rewrite Peter Zijlstra
                   ` (2 preceding siblings ...)
  2019-11-13 10:21 ` [PATCH 3/5] locking/percpu-rwsem: Move __this_cpu_inc() into the slowpath Peter Zijlstra
@ 2019-11-13 10:21 ` Peter Zijlstra
  2019-11-18 16:28   ` Oleg Nesterov
  2019-11-13 10:21 ` [PATCH 5/5] locking/percpu-rwsem: Remove the embedded rwsem Peter Zijlstra
  2019-11-15 17:14 ` [PATCH 0/5] locking: Percpu-rwsem rewrite Juri Lelli
  5 siblings, 1 reply; 23+ messages in thread
From: Peter Zijlstra @ 2019-11-13 10:21 UTC (permalink / raw)
  To: peterz, mingo, will
  Cc: oleg, tglx, linux-kernel, bigeasy, juri.lelli, williams, bristot,
	longman, dave, jack

In preparation for removing the embedded rwsem and building a custom
lock, extract the read-trylock primitive.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 kernel/locking/percpu-rwsem.c |   21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -45,7 +45,7 @@ void percpu_free_rwsem(struct percpu_rw_
 }
 EXPORT_SYMBOL_GPL(percpu_free_rwsem);
 
-bool __percpu_down_read(struct percpu_rw_semaphore *sem, bool try)
+static bool __percpu_down_read_trylock(struct percpu_rw_semaphore *sem)
 {
 	__this_cpu_inc(*sem->read_count);
 
@@ -70,14 +70,21 @@ bool __percpu_down_read(struct percpu_rw
 	 * If !readers_block the critical section starts here, matched by the
 	 * release in percpu_up_write().
 	 */
-	if (likely(!smp_load_acquire(&sem->readers_block)))
+	if (likely(!atomic_read_acquire(&sem->readers_block)))
 		return true;
 
-	/*
-	 * Per the above comment; we still have preemption disabled and
-	 * will thus decrement on the same CPU as we incremented.
-	 */
-	__percpu_up_read(sem);
+	__this_cpu_dec(*sem->read_count);
+
+	/* Prod writer to re-evaluate readers_active_check() */
+	rcuwait_wake_up(&sem->writer);
+
+	return false;
+}
+
+bool __percpu_down_read(struct percpu_rw_semaphore *sem, bool try)
+{
+	if (__percpu_down_read_trylock(sem))
+		return true;
 
 	if (try)
 		return false;



^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH 5/5] locking/percpu-rwsem: Remove the embedded rwsem
  2019-11-13 10:21 [PATCH 0/5] locking: Percpu-rwsem rewrite Peter Zijlstra
                   ` (3 preceding siblings ...)
  2019-11-13 10:21 ` [PATCH 4/5] locking/percpu-rwsem: Extract __percpu_down_read_trylock() Peter Zijlstra
@ 2019-11-13 10:21 ` Peter Zijlstra
  2019-11-18 19:53   ` Davidlohr Bueso
                     ` (2 more replies)
  2019-11-15 17:14 ` [PATCH 0/5] locking: Percpu-rwsem rewrite Juri Lelli
  5 siblings, 3 replies; 23+ messages in thread
From: Peter Zijlstra @ 2019-11-13 10:21 UTC (permalink / raw)
  To: peterz, mingo, will
  Cc: oleg, tglx, linux-kernel, bigeasy, juri.lelli, williams, bristot,
	longman, dave, jack

The filesystem freezer uses percpu-rwsem in a way that is effectively
write_non_owner() and achieves this with a few horrible hacks that
rely on the rwsem (!percpu) implementation.

When PREEMPT_RT replaces the rwsem implementation with a PI aware
variant this comes apart.

Remove the embedded rwsem and implement it using a waitqueue and an
atomic_t.

 - make readers_block an atomic, and use it, with the waitqueue
   for a blocking test-and-set write-side.

 - have the read-side wait for the 'lock' state to clear.

Have the waiters use FIFO queueing and mark them (reader/writer) with
a new WQ_FLAG. Use a custom wake_function to wake either a single
writer or all readers until a writer.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 include/linux/percpu-rwsem.h  |   19 +----
 include/linux/wait.h          |    1 
 kernel/locking/percpu-rwsem.c |  140 +++++++++++++++++++++++++++++-------------
 kernel/locking/rwsem.c        |    9 +-
 kernel/locking/rwsem.h        |   12 ---
 5 files changed, 110 insertions(+), 71 deletions(-)

--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h
@@ -3,18 +3,18 @@
 #define _LINUX_PERCPU_RWSEM_H
 
 #include <linux/atomic.h>
-#include <linux/rwsem.h>
 #include <linux/percpu.h>
 #include <linux/rcuwait.h>
+#include <linux/wait.h>
 #include <linux/rcu_sync.h>
 #include <linux/lockdep.h>
 
 struct percpu_rw_semaphore {
 	struct rcu_sync		rss;
 	unsigned int __percpu	*read_count;
-	struct rw_semaphore	rw_sem; /* slowpath */
-	struct rcuwait          writer; /* blocked writer */
-	int			readers_block;
+	struct rcuwait		writer;
+	wait_queue_head_t	waiters;
+	atomic_t		block;
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	struct lockdep_map	dep_map;
 #endif
@@ -31,8 +31,9 @@ static DEFINE_PER_CPU(unsigned int, __pe
 is_static struct percpu_rw_semaphore name = {				\
 	.rss = __RCU_SYNC_INITIALIZER(name.rss),			\
 	.read_count = &__percpu_rwsem_rc_##name,			\
-	.rw_sem = __RWSEM_INITIALIZER(name.rw_sem),			\
 	.writer = __RCUWAIT_INITIALIZER(name.writer),			\
+	.waiters = __WAIT_QUEUE_HEAD_INITIALIZER(name.waiters),		\
+	.block = ATOMIC_INIT(0),					\
 	__PERCPU_RWSEM_DEP_MAP_INIT(name)				\
 }
 
@@ -130,20 +131,12 @@ static inline void percpu_rwsem_release(
 					bool read, unsigned long ip)
 {
 	lock_release(&sem->dep_map, ip);
-#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
-	if (!read)
-		atomic_long_set(&sem->rw_sem.owner, RWSEM_OWNER_UNKNOWN);
-#endif
 }
 
 static inline void percpu_rwsem_acquire(struct percpu_rw_semaphore *sem,
 					bool read, unsigned long ip)
 {
 	lock_acquire(&sem->dep_map, 0, 1, read, 1, NULL, ip);
-#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
-	if (!read)
-		atomic_long_set(&sem->rw_sem.owner, (long)current);
-#endif
 }
 
 #endif
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -20,6 +20,7 @@ int default_wake_function(struct wait_qu
 #define WQ_FLAG_EXCLUSIVE	0x01
 #define WQ_FLAG_WOKEN		0x02
 #define WQ_FLAG_BOOKMARK	0x04
+#define WQ_FLAG_CUSTOM		0x08
 
 /*
  * A single wait-queue entry structure:
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -1,15 +1,14 @@
 // SPDX-License-Identifier: GPL-2.0-only
 #include <linux/atomic.h>
-#include <linux/rwsem.h>
 #include <linux/percpu.h>
+#include <linux/wait.h>
 #include <linux/lockdep.h>
 #include <linux/percpu-rwsem.h>
 #include <linux/rcupdate.h>
 #include <linux/sched.h>
+#include <linux/sched/task.h>
 #include <linux/errno.h>
 
-#include "rwsem.h"
-
 int __percpu_init_rwsem(struct percpu_rw_semaphore *sem,
 			const char *name, struct lock_class_key *key)
 {
@@ -17,11 +16,10 @@ int __percpu_init_rwsem(struct percpu_rw
 	if (unlikely(!sem->read_count))
 		return -ENOMEM;
 
-	/* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */
 	rcu_sync_init(&sem->rss);
-	__init_rwsem(&sem->rw_sem, name, rwsem_key);
 	rcuwait_init(&sem->writer);
-	sem->readers_block = 0;
+	init_waitqueue_head(&sem->waiters);
+	atomic_set(&sem->block, 0);
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	debug_check_no_locks_freed((void *)sem, sizeof(*sem));
 	lockdep_init_map(&sem->dep_map, name, key, 0);
@@ -54,23 +52,23 @@ static bool __percpu_down_read_trylock(s
 	 * the same CPU as the increment, avoiding the
 	 * increment-on-one-CPU-and-decrement-on-another problem.
 	 *
-	 * If the reader misses the writer's assignment of readers_block, then
-	 * the writer is guaranteed to see the reader's increment.
+	 * If the reader misses the writer's assignment of sem->block, then the
+	 * writer is guaranteed to see the reader's increment.
 	 *
 	 * Conversely, any readers that increment their sem->read_count after
-	 * the writer looks are guaranteed to see the readers_block value,
-	 * which in turn means that they are guaranteed to immediately
-	 * decrement their sem->read_count, so that it doesn't matter that the
-	 * writer missed them.
+	 * the writer looks are guaranteed to see the sem->block value, which
+	 * in turn means that they are guaranteed to immediately decrement
+	 * their sem->read_count, so that it doesn't matter that the writer
+	 * missed them.
 	 */
 
 	smp_mb(); /* A matches D */
 
 	/*
-	 * If !readers_block the critical section starts here, matched by the
+	 * If !sem->block the critical section starts here, matched by the
 	 * release in percpu_up_write().
 	 */
-	if (likely(!atomic_read_acquire(&sem->readers_block)))
+	if (likely(!atomic_read_acquire(&sem->block)))
 		return true;
 
 	__this_cpu_dec(*sem->read_count);
@@ -81,6 +79,75 @@ static bool __percpu_down_read_trylock(s
 	return false;
 }
 
+static inline bool __percpu_down_write_trylock(struct percpu_rw_semaphore *sem)
+{
+	if (atomic_read(&sem->block))
+		return false;
+
+	return atomic_xchg(&sem->block, 1) == 0;
+}
+
+static bool __percpu_rwsem_trylock(struct percpu_rw_semaphore *sem, bool reader)
+{
+	if (reader) {
+		bool ret;
+
+		preempt_disable();
+		ret = __percpu_down_read_trylock(sem);
+		preempt_enable();
+
+		return ret;
+	}
+	return __percpu_down_write_trylock(sem);
+}
+
+static int percpu_rwsem_wake_function(struct wait_queue_entry *wq_entry,
+				      unsigned int mode, int wake_flags,
+				      void *key)
+{
+	struct task_struct *p = get_task_struct(wq_entry->private);
+	bool reader = wq_entry->flags & WQ_FLAG_CUSTOM;
+	struct percpu_rw_semaphore *sem = key;
+
+	/* concurrent against percpu_down_write(), can get stolen */
+	if (!__percpu_rwsem_trylock(sem, reader))
+		return 1;
+
+	list_del_init(&wq_entry->entry);
+	smp_store_release(&wq_entry->private, NULL);
+
+	wake_up_process(p);
+	put_task_struct(p);
+
+	return !reader; /* wake 'all' readers and 1 writer */
+}
+
+static void percpu_rwsem_wait(struct percpu_rw_semaphore *sem, bool reader)
+{
+	DEFINE_WAIT_FUNC(wq_entry, percpu_rwsem_wake_function);
+	bool wait;
+
+	spin_lock_irq(&sem->waiters.lock);
+	/*
+	 * Serialize against the wakeup in percpu_up_write(), if we fail
+	 * the trylock, the wakeup must see us on the list.
+	 */
+	wait = !__percpu_rwsem_trylock(sem, reader);
+	if (wait) {
+		wq_entry.flags |= WQ_FLAG_EXCLUSIVE | reader * WQ_FLAG_CUSTOM;
+		__add_wait_queue_entry_tail(&sem->waiters, &wq_entry);
+	}
+	spin_unlock_irq(&sem->waiters.lock);
+
+	while (wait) {
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		if (!smp_load_acquire(&wq_entry.private))
+			break;
+		schedule();
+	}
+	__set_current_state(TASK_RUNNING);
+}
+
 bool __percpu_down_read(struct percpu_rw_semaphore *sem, bool try)
 {
 	if (__percpu_down_read_trylock(sem))
@@ -89,20 +156,10 @@ bool __percpu_down_read(struct percpu_rw
 	if (try)
 		return false;
 
-	/*
-	 * We either call schedule() in the wait, or we'll fall through
-	 * and reschedule on the preempt_enable() in percpu_down_read().
-	 */
-	preempt_enable_no_resched();
-
-	/*
-	 * Avoid lockdep for the down/up_read() we already have them.
-	 */
-	__down_read(&sem->rw_sem);
-	this_cpu_inc(*sem->read_count);
-	__up_read(&sem->rw_sem);
-
+	preempt_enable();
+	percpu_rwsem_wait(sem, /* .reader = */ true );
 	preempt_disable();
+
 	return true;
 }
 EXPORT_SYMBOL_GPL(__percpu_down_read);
@@ -117,7 +174,7 @@ void __percpu_up_read(struct percpu_rw_s
 	 */
 	__this_cpu_dec(*sem->read_count);
 
-	/* Prod writer to recheck readers_active */
+	/* Prod writer to re-evaluate readers_active_check() */
 	rcuwait_wake_up(&sem->writer);
 }
 EXPORT_SYMBOL_GPL(__percpu_up_read);
@@ -137,6 +194,8 @@ EXPORT_SYMBOL_GPL(__percpu_up_read);
  * zero.  If this sum is zero, then it is stable due to the fact that if any
  * newly arriving readers increment a given counter, they will immediately
  * decrement that same counter.
+ *
+ * Assumes sem->block is set.
  */
 static bool readers_active_check(struct percpu_rw_semaphore *sem)
 {
@@ -160,23 +219,22 @@ void percpu_down_write(struct percpu_rw_
 	/* Notify readers to take the slow path. */
 	rcu_sync_enter(&sem->rss);
 
-	__down_write(&sem->rw_sem);
-
 	/*
-	 * Notify new readers to block; up until now, and thus throughout the
-	 * longish rcu_sync_enter() above, new readers could still come in.
+	 * Try set sem->block; this provides writer-writer exclusion.
+	 * Having sem->block set makes new readers block.
 	 */
-	WRITE_ONCE(sem->readers_block, 1);
+	if (!__percpu_down_write_trylock(sem))
+		percpu_rwsem_wait(sem, /* .reader = */ false);
 
-	smp_mb(); /* D matches A */
+	/* smp_mb() implied by __percpu_down_writer_trylock() on success -- D matches A */
 
 	/*
-	 * If they don't see our writer of readers_block, then we are
-	 * guaranteed to see their sem->read_count increment, and therefore
-	 * will wait for them.
+	 * If they don't see our store of sem->block, then we are guaranteed to
+	 * see their sem->read_count increment, and therefore will wait for
+	 * them.
 	 */
 
-	/* Wait for all now active readers to complete. */
+	/* Wait for all active readers to complete. */
 	rcuwait_wait_event(&sem->writer, readers_active_check(sem));
 }
 EXPORT_SYMBOL_GPL(percpu_down_write);
@@ -195,12 +253,12 @@ void percpu_up_write(struct percpu_rw_se
 	 * Therefore we force it through the slow path which guarantees an
 	 * acquire and thereby guarantees the critical section's consistency.
 	 */
-	smp_store_release(&sem->readers_block, 0);
+	atomic_set_release(&sem->block, 0);
 
 	/*
-	 * Release the write lock, this will allow readers back in the game.
+	 * Prod any pending reader/writer to make progress.
 	 */
-	__up_write(&sem->rw_sem);
+	__wake_up(&sem->waiters, TASK_NORMAL, 1, sem);
 
 	/*
 	 * Once this completes (at least one RCU-sched grace period hence) the
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -28,7 +28,6 @@
 #include <linux/rwsem.h>
 #include <linux/atomic.h>
 
-#include "rwsem.h"
 #include "lock_events.h"
 
 /*
@@ -1338,7 +1337,7 @@ static struct rw_semaphore *rwsem_downgr
 /*
  * lock for reading
  */
-inline void __down_read(struct rw_semaphore *sem)
+static inline void __down_read(struct rw_semaphore *sem)
 {
 	if (!rwsem_read_trylock(sem)) {
 		rwsem_down_read_slowpath(sem, TASK_UNINTERRUPTIBLE);
@@ -1383,7 +1382,7 @@ static inline int __down_read_trylock(st
 /*
  * lock for writing
  */
-inline void __down_write(struct rw_semaphore *sem)
+static inline void __down_write(struct rw_semaphore *sem)
 {
 	long tmp = RWSEM_UNLOCKED_VALUE;
 
@@ -1426,7 +1425,7 @@ static inline int __down_write_trylock(s
 /*
  * unlock after reading
  */
-inline void __up_read(struct rw_semaphore *sem)
+static inline void __up_read(struct rw_semaphore *sem)
 {
 	long tmp;
 
@@ -1446,7 +1445,7 @@ inline void __up_read(struct rw_semaphor
 /*
  * unlock after writing
  */
-inline void __up_write(struct rw_semaphore *sem)
+static inline void __up_write(struct rw_semaphore *sem)
 {
 	long tmp;
 
--- a/kernel/locking/rwsem.h
+++ b/kernel/locking/rwsem.h
@@ -1,12 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-#ifndef __INTERNAL_RWSEM_H
-#define __INTERNAL_RWSEM_H
-#include <linux/rwsem.h>
-
-extern void __down_read(struct rw_semaphore *sem);
-extern void __up_read(struct rw_semaphore *sem);
-extern void __down_write(struct rw_semaphore *sem);
-extern void __up_write(struct rw_semaphore *sem);
-
-#endif /* __INTERNAL_RWSEM_H */



^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 0/5] locking: Percpu-rwsem rewrite
  2019-11-13 10:21 [PATCH 0/5] locking: Percpu-rwsem rewrite Peter Zijlstra
                   ` (4 preceding siblings ...)
  2019-11-13 10:21 ` [PATCH 5/5] locking/percpu-rwsem: Remove the embedded rwsem Peter Zijlstra
@ 2019-11-15 17:14 ` Juri Lelli
  5 siblings, 0 replies; 23+ messages in thread
From: Juri Lelli @ 2019-11-15 17:14 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: mingo, will, oleg, tglx, linux-kernel, bigeasy, williams,
	bristot, longman, dave, jack

Hi,

On 13/11/19 11:21, Peter Zijlstra wrote:
> Yet another version of the percpu-rwsem rewrite..
> 
> This one (ab)uses the waitqueue in an entirely different and unique way, but no
> longer shares it like it did. It retains the use of rcuwait for the
> writer-waiting-for-readers-to-complete condition.
> 
> This one should be FIFO fair with writer-stealing.
> 
> It seems to pass locktorture torture_type=percpu_rwsem_lock. But as always,
> this stuff is tricky, please look carefully.

Backported this series to v5.2.21-rt13.

locktorture looks good (running for several hours) and DEBUG_LOCKS splat
[1] not reproducible anymore.

Tested-by: Juri Lelli <juri.lelli@redhat.com>

Thanks!

Juri

1 - https://lore.kernel.org/lkml/20190326093421.GA29508@localhost.localdomain/


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/5] locking/percpu-rwsem, lockdep: Make percpu-rwsem use its own lockdep_map
  2019-11-13 10:21 ` [PATCH 1/5] locking/percpu-rwsem, lockdep: Make percpu-rwsem use its own lockdep_map Peter Zijlstra
@ 2019-11-15 20:39   ` Davidlohr Bueso
  2020-01-08  1:33     ` [PATCH] locking/percpu-rwsem: Add might_sleep() for writer locking Davidlohr Bueso
  0 siblings, 1 reply; 23+ messages in thread
From: Davidlohr Bueso @ 2019-11-15 20:39 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: mingo, will, oleg, tglx, linux-kernel, bigeasy, juri.lelli,
	williams, bristot, longman, jack

On Wed, 13 Nov 2019, Peter Zijlstra wrote:
>@@ -37,7 +48,7 @@ static inline void percpu_down_read(stru
> {
> 	might_sleep();
>
>-	rwsem_acquire_read(&sem->rw_sem.dep_map, 0, 0, _RET_IP_);
>+	rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);

Unrelated to this patch, but we also need a might_sleep() annotation
in percpu_down_write().

Thanks,
Davidlohr

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 4/5] locking/percpu-rwsem: Extract __percpu_down_read_trylock()
  2019-11-13 10:21 ` [PATCH 4/5] locking/percpu-rwsem: Extract __percpu_down_read_trylock() Peter Zijlstra
@ 2019-11-18 16:28   ` Oleg Nesterov
  0 siblings, 0 replies; 23+ messages in thread
From: Oleg Nesterov @ 2019-11-18 16:28 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: mingo, will, tglx, linux-kernel, bigeasy, juri.lelli, williams,
	bristot, longman, dave, jack

Hi Peter, sorry for delay.

I'll re-read this series tomorrow, but everything looks correct at first
glance...

Except one very minor problem in this patch, see below.

On 11/13, Peter Zijlstra wrote:
>
> -bool __percpu_down_read(struct percpu_rw_semaphore *sem, bool try)
> +static bool __percpu_down_read_trylock(struct percpu_rw_semaphore *sem)
>  {
>  	__this_cpu_inc(*sem->read_count);
>  
> @@ -70,14 +70,21 @@ bool __percpu_down_read(struct percpu_rw
>  	 * If !readers_block the critical section starts here, matched by the
>  	 * release in percpu_up_write().
>  	 */
> -	if (likely(!smp_load_acquire(&sem->readers_block)))
> +	if (likely(!atomic_read_acquire(&sem->readers_block)))

I don't think this can be compiled ;) ->readers_block is "int" until the next
patch makes it atomic_t and renames to ->block.

And. I think __percpu_down_read_trylock() should do

	if (atomic_read(&sem->block))
		return false;

at the start, before __this_cpu_inc(read_count).

Suppose that the pending writer sleeps in rcuwait_wait_event(readers_active_check).
If the new reader comes, it is better to not wake up that writer.

Oleg.


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 5/5] locking/percpu-rwsem: Remove the embedded rwsem
  2019-11-13 10:21 ` [PATCH 5/5] locking/percpu-rwsem: Remove the embedded rwsem Peter Zijlstra
@ 2019-11-18 19:53   ` Davidlohr Bueso
  2019-11-18 23:19     ` Davidlohr Bueso
  2019-12-17 10:35     ` Peter Zijlstra
  2019-11-18 21:52   ` Waiman Long
  2019-11-19 13:50   ` Waiman Long
  2 siblings, 2 replies; 23+ messages in thread
From: Davidlohr Bueso @ 2019-11-18 19:53 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: mingo, will, oleg, tglx, linux-kernel, bigeasy, juri.lelli,
	williams, bristot, longman, jack

On Wed, 13 Nov 2019, Peter Zijlstra wrote:
>@@ -54,23 +52,23 @@ static bool __percpu_down_read_trylock(s
> 	 * the same CPU as the increment, avoiding the
> 	 * increment-on-one-CPU-and-decrement-on-another problem.

Nit: Now that you've made read_count more symmetric, maybe this first
paragraph can be moved down to __percpu_rwsem_trylock() reader side,
as such:

	/*
	 * Due to having preemption disabled the decrement happens on
	 * the same CPU as the increment, avoiding the
	 * increment-on-one-CPU-and-decrement-on-another problem.
	 */
	preempt_disable();
	ret = __percpu_down_read_trylock(sem);
	preempt_enable();

> 	 *
>-	 * If the reader misses the writer's assignment of readers_block, then
>-	 * the writer is guaranteed to see the reader's increment.
>+	 * If the reader misses the writer's assignment of sem->block, then the
>+	 * writer is guaranteed to see the reader's increment.

...

> bool __percpu_down_read(struct percpu_rw_semaphore *sem, bool try)
> {
> 	if (__percpu_down_read_trylock(sem))
>@@ -89,20 +156,10 @@ bool __percpu_down_read(struct percpu_rw
> 	if (try)
> 		return false;
>
>-	/*
>-	 * We either call schedule() in the wait, or we'll fall through
>-	 * and reschedule on the preempt_enable() in percpu_down_read().
>-	 */
>-	preempt_enable_no_resched();
>-
>-	/*
>-	 * Avoid lockdep for the down/up_read() we already have them.
>-	 */
>-	__down_read(&sem->rw_sem);
>-	this_cpu_inc(*sem->read_count);
>-	__up_read(&sem->rw_sem);
>-
>+	preempt_enable();
>+	percpu_rwsem_wait(sem, /* .reader = */ true );
> 	preempt_disable();
>+
> 	return true;
> }
> EXPORT_SYMBOL_GPL(__percpu_down_read);

Do we really need to export symbol here? This function is only called
from percpu-rwsem.h.

>@@ -117,7 +174,7 @@ void __percpu_up_read(struct percpu_rw_s
> 	 */
> 	__this_cpu_dec(*sem->read_count);
>
>-	/* Prod writer to recheck readers_active */
>+	/* Prod writer to re-evaluate readers_active_check() */
> 	rcuwait_wake_up(&sem->writer);
> }
> EXPORT_SYMBOL_GPL(__percpu_up_read);
>@@ -137,6 +194,8 @@ EXPORT_SYMBOL_GPL(__percpu_up_read);
>  * zero.  If this sum is zero, then it is stable due to the fact that if any
>  * newly arriving readers increment a given counter, they will immediately
>  * decrement that same counter.
>+ *
>+ * Assumes sem->block is set.
>  */
> static bool readers_active_check(struct percpu_rw_semaphore *sem)
> {
>@@ -160,23 +219,22 @@ void percpu_down_write(struct percpu_rw_
> 	/* Notify readers to take the slow path. */
> 	rcu_sync_enter(&sem->rss);
>
>-	__down_write(&sem->rw_sem);
>-
> 	/*
>-	 * Notify new readers to block; up until now, and thus throughout the
>-	 * longish rcu_sync_enter() above, new readers could still come in.
>+	 * Try set sem->block; this provides writer-writer exclusion.
>+	 * Having sem->block set makes new readers block.
> 	 */
>-	WRITE_ONCE(sem->readers_block, 1);
>+	if (!__percpu_down_write_trylock(sem))
>+		percpu_rwsem_wait(sem, /* .reader = */ false);
>
>-	smp_mb(); /* D matches A */
>+	/* smp_mb() implied by __percpu_down_writer_trylock() on success -- D matches A */
                                               ^^^
					       write
...

>--- a/kernel/locking/rwsem.h
>+++ b/kernel/locking/rwsem.h
>@@ -1,12 +0,0 @@
>-/* SPDX-License-Identifier: GPL-2.0 */
>-
>-#ifndef __INTERNAL_RWSEM_H
>-#define __INTERNAL_RWSEM_H
>-#include <linux/rwsem.h>
>-
>-extern void __down_read(struct rw_semaphore *sem);
>-extern void __up_read(struct rw_semaphore *sem);
>-extern void __down_write(struct rw_semaphore *sem);
>-extern void __up_write(struct rw_semaphore *sem);

This is a nice side effect.

Thanks,
Davidlohr

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 5/5] locking/percpu-rwsem: Remove the embedded rwsem
  2019-11-13 10:21 ` [PATCH 5/5] locking/percpu-rwsem: Remove the embedded rwsem Peter Zijlstra
  2019-11-18 19:53   ` Davidlohr Bueso
@ 2019-11-18 21:52   ` Waiman Long
  2019-12-17 10:28     ` Peter Zijlstra
  2019-11-19 13:50   ` Waiman Long
  2 siblings, 1 reply; 23+ messages in thread
From: Waiman Long @ 2019-11-18 21:52 UTC (permalink / raw)
  To: Peter Zijlstra, mingo, will
  Cc: oleg, tglx, linux-kernel, bigeasy, juri.lelli, williams, bristot,
	dave, jack

On 11/13/19 5:21 AM, Peter Zijlstra wrote:
> @@ -130,20 +131,12 @@ static inline void percpu_rwsem_release(
>  					bool read, unsigned long ip)
>  {
>  	lock_release(&sem->dep_map, ip);
> -#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
> -	if (!read)
> -		atomic_long_set(&sem->rw_sem.owner, RWSEM_OWNER_UNKNOWN);
> -#endif
>  }
>  

This is the only place that set RWSEM_OWNER_UNKNOWN. So you may as well
remove all references to it:

diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index 00d6054687dd..8a418d9eeb7a 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -53,12 +53,6 @@ struct rw_semaphore {
 #endif
 };
 
-/*
- * Setting all bits of the owner field except bit 0 will indicate
- * that the rwsem is writer-owned with an unknown owner.
- */
-#define RWSEM_OWNER_UNKNOWN    (-2L)
-
 /* In all implementations count != 0 means locked */
 static inline int rwsem_is_locked(struct rw_semaphore *sem)
 {
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index eef04551eae7..622842754c73 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -660,8 +660,6 @@ static inline bool rwsem_can_spin_on_owner(struct
rw_semapho
        unsigned long flags;
        bool ret = true;
 
-       BUILD_BUG_ON(!(RWSEM_OWNER_UNKNOWN & RWSEM_NONSPINNABLE));
-
        if (need_resched()) {
                lockevent_inc(rwsem_opt_fail);
                return false;

Cheers,
Longman


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* Re: [PATCH 5/5] locking/percpu-rwsem: Remove the embedded rwsem
  2019-11-18 19:53   ` Davidlohr Bueso
@ 2019-11-18 23:19     ` Davidlohr Bueso
  2019-12-17 10:45       ` Peter Zijlstra
  2019-12-17 10:35     ` Peter Zijlstra
  1 sibling, 1 reply; 23+ messages in thread
From: Davidlohr Bueso @ 2019-11-18 23:19 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: mingo, will, oleg, tglx, linux-kernel, bigeasy, juri.lelli,
	williams, bristot, longman, jack

On Mon, 18 Nov 2019, Davidlohr Bueso wrote:

>On Wed, 13 Nov 2019, Peter Zijlstra wrote:
3>>bool __percpu_down_read(struct percpu_rw_semaphore *sem, bool try)
>>{
>>	if (__percpu_down_read_trylock(sem))
>>@@ -89,20 +156,10 @@ bool __percpu_down_read(struct percpu_rw
>>	if (try)
>>		return false;
>>
>>-	/*
>>-	 * We either call schedule() in the wait, or we'll fall through
>>-	 * and reschedule on the preempt_enable() in percpu_down_read().
>>-	 */
>>-	preempt_enable_no_resched();
>>-
>>-	/*
>>-	 * Avoid lockdep for the down/up_read() we already have them.
>>-	 */
>>-	__down_read(&sem->rw_sem);
>>-	this_cpu_inc(*sem->read_count);
>>-	__up_read(&sem->rw_sem);
>>-
>>+	preempt_enable();
>>+	percpu_rwsem_wait(sem, /* .reader = */ true );
>>	preempt_disable();
>>+
>>	return true;
>>}
>>EXPORT_SYMBOL_GPL(__percpu_down_read);
>
>Do we really need to export symbol here? This function is only called
>from percpu-rwsem.h.

Similarly, afaict we can get rid of __percpu_up_read() and put the
slowpath all into percpu_up_read(). Also explicitly mention the
single task nature of the writer (which is a better comment for
the rcuwait_wake_up()).

diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h
index f5ecf6a8a1dd..eda545f42fb8 100644
--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h
@@ -43,7 +43,6 @@ is_static struct percpu_rw_semaphore name = {				\
 	__DEFINE_PERCPU_RWSEM(name, static)
 
 extern bool __percpu_down_read(struct percpu_rw_semaphore *, bool);
-extern void __percpu_up_read(struct percpu_rw_semaphore *);
 
 static inline void percpu_down_read(struct percpu_rw_semaphore *sem)
 {
@@ -103,10 +102,23 @@ static inline void percpu_up_read(struct percpu_rw_semaphore *sem)
 	/*
 	 * Same as in percpu_down_read().
 	 */
-	if (likely(rcu_sync_is_idle(&sem->rss)))
+	if (likely(rcu_sync_is_idle(&sem->rss))) {
 		__this_cpu_dec(*sem->read_count);
-	else
-		__percpu_up_read(sem); /* Unconditional memory barrier */
+		goto done;
+	}
+
+	/*
+	 * slowpath; reader will only ever wake a single blocked writer.
+	 */
+	smp_mb(); /* B matches C */
+	/*
+	 * In other words, if they see our decrement (presumably to
+	 * aggregate zero, as that is the only time it matters) they
+	 * will also see our critical section.
+	 */
+	__this_cpu_dec(*sem->read_count);
+	rcuwait_wake_up(&sem->writer);
+done:
 	preempt_enable();
 }
 
diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
index 851038468efb..a5150a876626 100644
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -164,21 +164,6 @@ bool __percpu_down_read(struct percpu_rw_semaphore *sem, bool try)
 }
 EXPORT_SYMBOL_GPL(__percpu_down_read);
 
-void __percpu_up_read(struct percpu_rw_semaphore *sem)
-{
-	smp_mb(); /* B matches C */
-	/*
-	 * In other words, if they see our decrement (presumably to aggregate
-	 * zero, as that is the only time it matters) they will also see our
-	 * critical section.
-	 */
-	__this_cpu_dec(*sem->read_count);
-
-	/* Prod writer to re-evaluate readers_active_check() */
-	rcuwait_wake_up(&sem->writer);
-}
-EXPORT_SYMBOL_GPL(__percpu_up_read);
-
 #define per_cpu_sum(var)						\
 ({									\
 	typeof(var) __sum = 0;						\

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* Re: [PATCH 5/5] locking/percpu-rwsem: Remove the embedded rwsem
  2019-11-13 10:21 ` [PATCH 5/5] locking/percpu-rwsem: Remove the embedded rwsem Peter Zijlstra
  2019-11-18 19:53   ` Davidlohr Bueso
  2019-11-18 21:52   ` Waiman Long
@ 2019-11-19 13:50   ` Waiman Long
  2019-11-19 15:58     ` Oleg Nesterov
  2 siblings, 1 reply; 23+ messages in thread
From: Waiman Long @ 2019-11-19 13:50 UTC (permalink / raw)
  To: Peter Zijlstra, mingo, will
  Cc: oleg, tglx, linux-kernel, bigeasy, juri.lelli, williams, bristot,
	dave, jack

On 11/13/19 5:21 AM, Peter Zijlstra wrote:
> +static int percpu_rwsem_wake_function(struct wait_queue_entry *wq_entry,
> +				      unsigned int mode, int wake_flags,
> +				      void *key)
> +{
> +	struct task_struct *p = get_task_struct(wq_entry->private);
> +	bool reader = wq_entry->flags & WQ_FLAG_CUSTOM;
> +	struct percpu_rw_semaphore *sem = key;
> +
> +	/* concurrent against percpu_down_write(), can get stolen */
> +	if (!__percpu_rwsem_trylock(sem, reader))
> +		return 1;
> +
> +	list_del_init(&wq_entry->entry);
> +	smp_store_release(&wq_entry->private, NULL);
> +
> +	wake_up_process(p);
> +	put_task_struct(p);
> +
> +	return !reader; /* wake 'all' readers and 1 writer */
> +}
> +
> +static void percpu_rwsem_wait(struct percpu_rw_semaphore *sem, bool reader)
> +{
> +	DEFINE_WAIT_FUNC(wq_entry, percpu_rwsem_wake_function);
> +	bool wait;
> +
> +	spin_lock_irq(&sem->waiters.lock);
> +	/*
> +	 * Serialize against the wakeup in percpu_up_write(), if we fail
> +	 * the trylock, the wakeup must see us on the list.
> +	 */
> +	wait = !__percpu_rwsem_trylock(sem, reader);
> +	if (wait) {
> +		wq_entry.flags |= WQ_FLAG_EXCLUSIVE | reader * WQ_FLAG_CUSTOM;
> +		__add_wait_queue_entry_tail(&sem->waiters, &wq_entry);
> +	}
> +	spin_unlock_irq(&sem->waiters.lock);
> +
> +	while (wait) {
> +		set_current_state(TASK_UNINTERRUPTIBLE);
> +		if (!smp_load_acquire(&wq_entry.private))
> +			break;
> +		schedule();
> +	}

If I read the function correctly, you are setting the WQ_FLAG_EXCLUSIVE
for both readers and writers and __wake_up() is called with an exclusive
count of one. So only one reader or writer is woken up each time.
However, the comment above said we wake 'all' readers and 1 writer. That
doesn't match the actual code, IMO. To match the comments, you should
have set WQ_FLAG_EXCLUSIVE flag only on writer. In this case, you
probably don't need WQ_FLAG_CUSTOM to differentiate between readers and
writers.

Cheers,
Longman


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 5/5] locking/percpu-rwsem: Remove the embedded rwsem
  2019-11-19 13:50   ` Waiman Long
@ 2019-11-19 15:58     ` Oleg Nesterov
  2019-11-19 16:28       ` Waiman Long
  2019-12-17 10:26       ` Peter Zijlstra
  0 siblings, 2 replies; 23+ messages in thread
From: Oleg Nesterov @ 2019-11-19 15:58 UTC (permalink / raw)
  To: Waiman Long
  Cc: Peter Zijlstra, mingo, will, tglx, linux-kernel, bigeasy,
	juri.lelli, williams, bristot, dave, jack

On 11/19, Waiman Long wrote:
>
> On 11/13/19 5:21 AM, Peter Zijlstra wrote:
> > +static int percpu_rwsem_wake_function(struct wait_queue_entry *wq_entry,
> > +				      unsigned int mode, int wake_flags,
> > +				      void *key)
> > +{
> > +	struct task_struct *p = get_task_struct(wq_entry->private);
> > +	bool reader = wq_entry->flags & WQ_FLAG_CUSTOM;
> > +	struct percpu_rw_semaphore *sem = key;
> > +
> > +	/* concurrent against percpu_down_write(), can get stolen */
> > +	if (!__percpu_rwsem_trylock(sem, reader))
> > +		return 1;
> > +
> > +	list_del_init(&wq_entry->entry);
> > +	smp_store_release(&wq_entry->private, NULL);
> > +
> > +	wake_up_process(p);
> > +	put_task_struct(p);
> > +
> > +	return !reader; /* wake 'all' readers and 1 writer */
> > +}
> > +
>
> If I read the function correctly, you are setting the WQ_FLAG_EXCLUSIVE
> for both readers and writers and __wake_up() is called with an exclusive
> count of one. So only one reader or writer is woken up each time.

This depends on what percpu_rwsem_wake_function() returns. If it returns 1,
__wake_up_common() stops, exactly because all waiters have WQ_FLAG_EXCLUSIVE.

> However, the comment above said we wake 'all' readers and 1 writer. That
> doesn't match the actual code, IMO.

Well, "'all' readers" probably means "all readers before writer",

> To match the comments, you should
> have set WQ_FLAG_EXCLUSIVE flag only on writer. In this case, you
> probably don't need WQ_FLAG_CUSTOM to differentiate between readers and
> writers.

See above...

note also the

	if (!__percpu_rwsem_trylock(sem, reader))
		return 1;

at the start of percpu_rwsem_wake_function(). We want to stop wake_up_common()
as soon as percpu_rwsem_trylock() fails. Because we know that if it fails once
it can't succeed later. Although iiuc this can only happen if another (new)
writer races with __wake_up(&sem->waiters).


I guess WQ_FLAG_CUSTOM can be avoided, percpu_rwsem_wait() could do

	if (read)
		__add_wait_queue_entry_tail(...);
	else {
		wq_entry.flags |= WQ_FLAG_EXCLUSIVE;
		__add_wait_queue(...);
	}

but this is "unfair".

Oleg.


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 5/5] locking/percpu-rwsem: Remove the embedded rwsem
  2019-11-19 15:58     ` Oleg Nesterov
@ 2019-11-19 16:28       ` Waiman Long
  2019-12-17 10:26       ` Peter Zijlstra
  1 sibling, 0 replies; 23+ messages in thread
From: Waiman Long @ 2019-11-19 16:28 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Peter Zijlstra, mingo, will, tglx, linux-kernel, bigeasy,
	juri.lelli, williams, bristot, dave, jack

On 11/19/19 10:58 AM, Oleg Nesterov wrote:
> On 11/19, Waiman Long wrote:
>> On 11/13/19 5:21 AM, Peter Zijlstra wrote:
>>> +static int percpu_rwsem_wake_function(struct wait_queue_entry *wq_entry,
>>> +				      unsigned int mode, int wake_flags,
>>> +				      void *key)
>>> +{
>>> +	struct task_struct *p = get_task_struct(wq_entry->private);
>>> +	bool reader = wq_entry->flags & WQ_FLAG_CUSTOM;
>>> +	struct percpu_rw_semaphore *sem = key;
>>> +
>>> +	/* concurrent against percpu_down_write(), can get stolen */
>>> +	if (!__percpu_rwsem_trylock(sem, reader))
>>> +		return 1;
>>> +
>>> +	list_del_init(&wq_entry->entry);
>>> +	smp_store_release(&wq_entry->private, NULL);
>>> +
>>> +	wake_up_process(p);
>>> +	put_task_struct(p);
>>> +
>>> +	return !reader; /* wake 'all' readers and 1 writer */
>>> +}
>>> +
>> If I read the function correctly, you are setting the WQ_FLAG_EXCLUSIVE
>> for both readers and writers and __wake_up() is called with an exclusive
>> count of one. So only one reader or writer is woken up each time.
> This depends on what percpu_rwsem_wake_function() returns. If it returns 1,
> __wake_up_common() stops, exactly because all waiters have WQ_FLAG_EXCLUSIVE.
>
>> However, the comment above said we wake 'all' readers and 1 writer. That
>> doesn't match the actual code, IMO.
> Well, "'all' readers" probably means "all readers before writer",
>
>> To match the comments, you should
>> have set WQ_FLAG_EXCLUSIVE flag only on writer. In this case, you
>> probably don't need WQ_FLAG_CUSTOM to differentiate between readers and
>> writers.
> See above...
>
> note also the
>
> 	if (!__percpu_rwsem_trylock(sem, reader))
> 		return 1;
>
> at the start of percpu_rwsem_wake_function(). We want to stop wake_up_common()
> as soon as percpu_rwsem_trylock() fails. Because we know that if it fails once
> it can't succeed later. Although iiuc this can only happen if another (new)
> writer races with __wake_up(&sem->waiters).
>
>
> I guess WQ_FLAG_CUSTOM can be avoided, percpu_rwsem_wait() could do
>
> 	if (read)
> 		__add_wait_queue_entry_tail(...);
> 	else {
> 		wq_entry.flags |= WQ_FLAG_EXCLUSIVE;
> 		__add_wait_queue(...);
> 	}
>
> but this is "unfair".

Thanks for the explanation. That clarifies my understanding of the patch.

Cheers,
Longman


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 5/5] locking/percpu-rwsem: Remove the embedded rwsem
  2019-11-19 15:58     ` Oleg Nesterov
  2019-11-19 16:28       ` Waiman Long
@ 2019-12-17 10:26       ` Peter Zijlstra
  2019-12-17 10:28         ` Peter Zijlstra
  1 sibling, 1 reply; 23+ messages in thread
From: Peter Zijlstra @ 2019-12-17 10:26 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Waiman Long, mingo, will, tglx, linux-kernel, bigeasy,
	juri.lelli, williams, bristot, dave, jack

On Tue, Nov 19, 2019 at 04:58:26PM +0100, Oleg Nesterov wrote:
> On 11/19, Waiman Long wrote:
> >
> > On 11/13/19 5:21 AM, Peter Zijlstra wrote:
> > > +static int percpu_rwsem_wake_function(struct wait_queue_entry *wq_entry,
> > > +				      unsigned int mode, int wake_flags,
> > > +				      void *key)
> > > +{
> > > +	struct task_struct *p = get_task_struct(wq_entry->private);
> > > +	bool reader = wq_entry->flags & WQ_FLAG_CUSTOM;
> > > +	struct percpu_rw_semaphore *sem = key;
> > > +
> > > +	/* concurrent against percpu_down_write(), can get stolen */
> > > +	if (!__percpu_rwsem_trylock(sem, reader))
> > > +		return 1;
> > > +
> > > +	list_del_init(&wq_entry->entry);
> > > +	smp_store_release(&wq_entry->private, NULL);
> > > +
> > > +	wake_up_process(p);
> > > +	put_task_struct(p);
> > > +
> > > +	return !reader; /* wake 'all' readers and 1 writer */
> > > +}
> > > +
> >
> > If I read the function correctly, you are setting the WQ_FLAG_EXCLUSIVE
> > for both readers and writers and __wake_up() is called with an exclusive
> > count of one. So only one reader or writer is woken up each time.
> 
> This depends on what percpu_rwsem_wake_function() returns. If it returns 1,
> __wake_up_common() stops, exactly because all waiters have WQ_FLAG_EXCLUSIVE.

Indeed, let me see if I can clarify that somehow.

> > However, the comment above said we wake 'all' readers and 1 writer. That
> > doesn't match the actual code, IMO.
> 
> Well, "'all' readers" probably means "all readers before writer",

Correct.

> > To match the comments, you should
> > have set WQ_FLAG_EXCLUSIVE flag only on writer. In this case, you
> > probably don't need WQ_FLAG_CUSTOM to differentiate between readers and
> > writers.
> 
> See above...
> 
> note also the
> 
> 	if (!__percpu_rwsem_trylock(sem, reader))
> 		return 1;
> 
> at the start of percpu_rwsem_wake_function(). We want to stop wake_up_common()
> as soon as percpu_rwsem_trylock() fails. Because we know that if it fails once
> it can't succeed later. Although iiuc this can only happen if another (new)
> writer races with __wake_up(&sem->waiters).

Yes, writer-writer stealing can cause that. I even put a comment in
there :-)

> I guess WQ_FLAG_CUSTOM can be avoided, percpu_rwsem_wait() could do
> 
> 	if (read)
> 		__add_wait_queue_entry_tail(...);
> 	else {
> 		wq_entry.flags |= WQ_FLAG_EXCLUSIVE;
> 		__add_wait_queue(...);
> 	}
> 
> but this is "unfair".

Yes, I could not make it fair without that extra bit, and I figured we
have plenty bits there to play with so why not.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 5/5] locking/percpu-rwsem: Remove the embedded rwsem
  2019-12-17 10:26       ` Peter Zijlstra
@ 2019-12-17 10:28         ` Peter Zijlstra
  0 siblings, 0 replies; 23+ messages in thread
From: Peter Zijlstra @ 2019-12-17 10:28 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Waiman Long, mingo, will, tglx, linux-kernel, bigeasy,
	juri.lelli, williams, bristot, dave, jack

On Tue, Dec 17, 2019 at 11:26:54AM +0100, Peter Zijlstra wrote:
> On Tue, Nov 19, 2019 at 04:58:26PM +0100, Oleg Nesterov wrote:
> > On 11/19, Waiman Long wrote:
> > >
> > > On 11/13/19 5:21 AM, Peter Zijlstra wrote:
> > > > +static int percpu_rwsem_wake_function(struct wait_queue_entry *wq_entry,
> > > > +				      unsigned int mode, int wake_flags,
> > > > +				      void *key)
> > > > +{
> > > > +	struct task_struct *p = get_task_struct(wq_entry->private);
> > > > +	bool reader = wq_entry->flags & WQ_FLAG_CUSTOM;
> > > > +	struct percpu_rw_semaphore *sem = key;
> > > > +
> > > > +	/* concurrent against percpu_down_write(), can get stolen */
> > > > +	if (!__percpu_rwsem_trylock(sem, reader))
> > > > +		return 1;
> > > > +
> > > > +	list_del_init(&wq_entry->entry);
> > > > +	smp_store_release(&wq_entry->private, NULL);
> > > > +
> > > > +	wake_up_process(p);
> > > > +	put_task_struct(p);
> > > > +
> > > > +	return !reader; /* wake 'all' readers and 1 writer */
> > > > +}
> > > > +
> > >
> > > If I read the function correctly, you are setting the WQ_FLAG_EXCLUSIVE
> > > for both readers and writers and __wake_up() is called with an exclusive
> > > count of one. So only one reader or writer is woken up each time.
> > 
> > This depends on what percpu_rwsem_wake_function() returns. If it returns 1,
> > __wake_up_common() stops, exactly because all waiters have WQ_FLAG_EXCLUSIVE.
> 
> Indeed, let me see if I can clarify that somehow.
> 
> > > However, the comment above said we wake 'all' readers and 1 writer. That
> > > doesn't match the actual code, IMO.
> > 
> > Well, "'all' readers" probably means "all readers before writer",
> 
> Correct.

Does this clarify?

--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -101,6 +101,19 @@ static bool __percpu_rwsem_trylock(struc
 	return __percpu_down_write_trylock(sem);
 }
 
+/*
+ * The return value of wait_queue_entry::func means:
+ *
+ *  <0 - error, wakeup is terminated and the error is returned
+ *   0 - no wakeup, a next waiter is tried
+ *  >0 - woken, if EXCLUSIVE, counted towards @nr_exclusive.
+ * 
+ * We use EXCLUSIVE for both readers and writers to preserve FIFO order,
+ * and play games with the return value to allow waking multiple readers.
+ *
+ * Specifically, we wake readers until we've woken a single writer, or until a
+ * trylock fails.
+ */
 static int percpu_rwsem_wake_function(struct wait_queue_entry *wq_entry,
 				      unsigned int mode, int wake_flags,
 				      void *key)
@@ -119,7 +132,7 @@ static int percpu_rwsem_wake_function(st
 	wake_up_process(p);
 	put_task_struct(p);
 
-	return !reader; /* wake 'all' readers and 1 writer */
+	return !reader; /* wake (readers until) 1 writer */
 }
 
 static void percpu_rwsem_wait(struct percpu_rw_semaphore *sem, bool reader)

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 5/5] locking/percpu-rwsem: Remove the embedded rwsem
  2019-11-18 21:52   ` Waiman Long
@ 2019-12-17 10:28     ` Peter Zijlstra
  0 siblings, 0 replies; 23+ messages in thread
From: Peter Zijlstra @ 2019-12-17 10:28 UTC (permalink / raw)
  To: Waiman Long
  Cc: mingo, will, oleg, tglx, linux-kernel, bigeasy, juri.lelli,
	williams, bristot, dave, jack

On Mon, Nov 18, 2019 at 04:52:23PM -0500, Waiman Long wrote:
> On 11/13/19 5:21 AM, Peter Zijlstra wrote:
> > @@ -130,20 +131,12 @@ static inline void percpu_rwsem_release(
> >  					bool read, unsigned long ip)
> >  {
> >  	lock_release(&sem->dep_map, ip);
> > -#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
> > -	if (!read)
> > -		atomic_long_set(&sem->rw_sem.owner, RWSEM_OWNER_UNKNOWN);
> > -#endif
> >  }
> >  
> 
> This is the only place that set RWSEM_OWNER_UNKNOWN. So you may as well
> remove all references to it:

Done, thanks!

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 5/5] locking/percpu-rwsem: Remove the embedded rwsem
  2019-11-18 19:53   ` Davidlohr Bueso
  2019-11-18 23:19     ` Davidlohr Bueso
@ 2019-12-17 10:35     ` Peter Zijlstra
  1 sibling, 0 replies; 23+ messages in thread
From: Peter Zijlstra @ 2019-12-17 10:35 UTC (permalink / raw)
  To: Davidlohr Bueso
  Cc: mingo, will, oleg, tglx, linux-kernel, bigeasy, juri.lelli,
	williams, bristot, longman, jack

On Mon, Nov 18, 2019 at 11:53:04AM -0800, Davidlohr Bueso wrote:
> On Wed, 13 Nov 2019, Peter Zijlstra wrote:
> > @@ -54,23 +52,23 @@ static bool __percpu_down_read_trylock(s
> > 	 * the same CPU as the increment, avoiding the
> > 	 * increment-on-one-CPU-and-decrement-on-another problem.
> 
> Nit: Now that you've made read_count more symmetric, maybe this first
> paragraph can be moved down to __percpu_rwsem_trylock() reader side,
> as such:
> 
> 	/*
> 	 * Due to having preemption disabled the decrement happens on
> 	 * the same CPU as the increment, avoiding the
> 	 * increment-on-one-CPU-and-decrement-on-another problem.
> 	 */
> 	preempt_disable();
> 	ret = __percpu_down_read_trylock(sem);
> 	preempt_enable();

There's another callsite for that function too, so I think the current
place still works best.


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 5/5] locking/percpu-rwsem: Remove the embedded rwsem
  2019-11-18 23:19     ` Davidlohr Bueso
@ 2019-12-17 10:45       ` Peter Zijlstra
  0 siblings, 0 replies; 23+ messages in thread
From: Peter Zijlstra @ 2019-12-17 10:45 UTC (permalink / raw)
  To: Davidlohr Bueso
  Cc: mingo, will, oleg, tglx, linux-kernel, bigeasy, juri.lelli,
	williams, bristot, longman, jack

On Mon, Nov 18, 2019 at 03:19:35PM -0800, Davidlohr Bueso wrote:
> Similarly, afaict we can get rid of __percpu_up_read() and put the
> slowpath all into percpu_up_read(). Also explicitly mention the
> single task nature of the writer (which is a better comment for
> the rcuwait_wake_up()).

> static inline void percpu_down_read(struct percpu_rw_semaphore *sem)
> {
> @@ -103,10 +102,23 @@ static inline void percpu_up_read(struct percpu_rw_semaphore *sem)
> 	/*
> 	 * Same as in percpu_down_read().
> 	 */
> +	if (likely(rcu_sync_is_idle(&sem->rss))) {
> 		__this_cpu_dec(*sem->read_count);
> +		goto done;
> +	}
> +
> +	/*
> +	 * slowpath; reader will only ever wake a single blocked writer.
> +	 */
> +	smp_mb(); /* B matches C */
> +	/*
> +	 * In other words, if they see our decrement (presumably to
> +	 * aggregate zero, as that is the only time it matters) they
> +	 * will also see our critical section.
> +	 */
> +	__this_cpu_dec(*sem->read_count);
> +	rcuwait_wake_up(&sem->writer);
> +done:
> 	preempt_enable();
> }

Let me write that as a normal if () { } else { }.

But yes, that's small enough I suppose.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH] locking/percpu-rwsem: Add might_sleep() for writer locking
  2019-11-15 20:39   ` Davidlohr Bueso
@ 2020-01-08  1:33     ` Davidlohr Bueso
  2020-01-08  1:33       ` Davidlohr Bueso
  2020-02-11 12:48       ` [tip: locking/core] " tip-bot2 for Davidlohr Bueso
  0 siblings, 2 replies; 23+ messages in thread
From: Davidlohr Bueso @ 2020-01-08  1:33 UTC (permalink / raw)
  To: dave
  Cc: bigeasy, bristot, jack, juri.lelli, linux-kernel, longman, mingo,
	oleg, peterz, tglx, will, williams, Davidlohr Bueso

We are missing this annotation in percpu_down_write(). Correct
this.

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
---

Applies against your queue tree.

 kernel/locking/percpu-rwsem.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
index 4cbeeada8d09..08db2abfa00d 100644
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -212,6 +212,7 @@ static bool readers_active_check(struct percpu_rw_semaphore *sem)
 
 void percpu_down_write(struct percpu_rw_semaphore *sem)
 {
+	might_sleep();
 	rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
 
 	/* Notify readers to take the slow path. */
-- 
2.16.4


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH] locking/percpu-rwsem: Add might_sleep() for writer locking
  2020-01-08  1:33     ` [PATCH] locking/percpu-rwsem: Add might_sleep() for writer locking Davidlohr Bueso
@ 2020-01-08  1:33       ` Davidlohr Bueso
  2020-02-11 12:48       ` [tip: locking/core] " tip-bot2 for Davidlohr Bueso
  1 sibling, 0 replies; 23+ messages in thread
From: Davidlohr Bueso @ 2020-01-08  1:33 UTC (permalink / raw)
  To: dave
  Cc: bigeasy, bristot, jack, juri.lelli, linux-kernel, longman, mingo,
	oleg, peterz, tglx, will, williams, Davidlohr Bueso

We are missing this annotation in percpu_down_write(). Correct
this.

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
---

Applies against your queue tree.

 kernel/locking/percpu-rwsem.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
index 4cbeeada8d09..08db2abfa00d 100644
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -212,6 +212,7 @@ static bool readers_active_check(struct percpu_rw_semaphore *sem)
 
 void percpu_down_write(struct percpu_rw_semaphore *sem)
 {
+	might_sleep();
 	rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
 
 	/* Notify readers to take the slow path. */
-- 
2.16.4


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [tip: locking/core] locking/percpu-rwsem: Add might_sleep() for writer locking
  2020-01-08  1:33     ` [PATCH] locking/percpu-rwsem: Add might_sleep() for writer locking Davidlohr Bueso
  2020-01-08  1:33       ` Davidlohr Bueso
@ 2020-02-11 12:48       ` tip-bot2 for Davidlohr Bueso
  1 sibling, 0 replies; 23+ messages in thread
From: tip-bot2 for Davidlohr Bueso @ 2020-02-11 12:48 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Davidlohr Bueso, Peter Zijlstra (Intel),
	Ingo Molnar, Will Deacon, Waiman Long, x86, LKML

The following commit has been merged into the locking/core branch of tip:

Commit-ID:     41f0e29190ac9e38099a37abd1a8a4cb1dc21233
Gitweb:        https://git.kernel.org/tip/41f0e29190ac9e38099a37abd1a8a4cb1dc21233
Author:        Davidlohr Bueso <dave@stgolabs.net>
AuthorDate:    Tue, 07 Jan 2020 17:33:04 -08:00
Committer:     Ingo Molnar <mingo@kernel.org>
CommitterDate: Tue, 11 Feb 2020 13:11:02 +01:00

locking/percpu-rwsem: Add might_sleep() for writer locking

We are missing this annotation in percpu_down_write(). Correct
this.

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Will Deacon <will@kernel.org>
Acked-by: Waiman Long <longman@redhat.com>
Link: https://lkml.kernel.org/r/20200108013305.7732-1-dave@stgolabs.net
---
 kernel/locking/percpu-rwsem.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
index 8048a9a..183a3aa 100644
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -212,6 +212,7 @@ static bool readers_active_check(struct percpu_rw_semaphore *sem)
 
 void percpu_down_write(struct percpu_rw_semaphore *sem)
 {
+	might_sleep();
 	rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
 
 	/* Notify readers to take the slow path. */

^ permalink raw reply related	[flat|nested] 23+ messages in thread

end of thread, other threads:[~2020-02-11 12:49 UTC | newest]

Thread overview: 23+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-11-13 10:21 [PATCH 0/5] locking: Percpu-rwsem rewrite Peter Zijlstra
2019-11-13 10:21 ` [PATCH 1/5] locking/percpu-rwsem, lockdep: Make percpu-rwsem use its own lockdep_map Peter Zijlstra
2019-11-15 20:39   ` Davidlohr Bueso
2020-01-08  1:33     ` [PATCH] locking/percpu-rwsem: Add might_sleep() for writer locking Davidlohr Bueso
2020-01-08  1:33       ` Davidlohr Bueso
2020-02-11 12:48       ` [tip: locking/core] " tip-bot2 for Davidlohr Bueso
2019-11-13 10:21 ` [PATCH 2/5] locking/percpu-rwsem: Convert to bool Peter Zijlstra
2019-11-13 10:21 ` [PATCH 3/5] locking/percpu-rwsem: Move __this_cpu_inc() into the slowpath Peter Zijlstra
2019-11-13 10:21 ` [PATCH 4/5] locking/percpu-rwsem: Extract __percpu_down_read_trylock() Peter Zijlstra
2019-11-18 16:28   ` Oleg Nesterov
2019-11-13 10:21 ` [PATCH 5/5] locking/percpu-rwsem: Remove the embedded rwsem Peter Zijlstra
2019-11-18 19:53   ` Davidlohr Bueso
2019-11-18 23:19     ` Davidlohr Bueso
2019-12-17 10:45       ` Peter Zijlstra
2019-12-17 10:35     ` Peter Zijlstra
2019-11-18 21:52   ` Waiman Long
2019-12-17 10:28     ` Peter Zijlstra
2019-11-19 13:50   ` Waiman Long
2019-11-19 15:58     ` Oleg Nesterov
2019-11-19 16:28       ` Waiman Long
2019-12-17 10:26       ` Peter Zijlstra
2019-12-17 10:28         ` Peter Zijlstra
2019-11-15 17:14 ` [PATCH 0/5] locking: Percpu-rwsem rewrite Juri Lelli

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.