linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/2] locking/rtmutex: Delete save_state member of struct rt_mutex
@ 2018-04-10 16:27 Davidlohr Bueso
  2018-04-10 16:27 ` [PATCH 2/2] rtmutex: Reduce top-waiter blocking on a lock Davidlohr Bueso
  2018-04-20 15:25 ` [PATCH 3/2] rtmutex: Use waiter debug init,free magic numbers Davidlohr Bueso
  0 siblings, 2 replies; 9+ messages in thread
From: Davidlohr Bueso @ 2018-04-10 16:27 UTC (permalink / raw)
  To: peterz, tglx, mingo; +Cc: longman, dave, linux-kernel, Davidlohr Bueso

This field (debug) is unused. Furthermore it looks like a result
of rtmutex from -rt into upstream, where it serves to determine
if the wakeup is for a task blocked on a "sleeping spinlock",
iow if this is a regular rt_mutex_lock() or rt_spin_lock().

Of course, upstream we only have regular rt_mutexes, thus we
can safely assume nothing will be done with this field anyway.
There is also the fact that this is under debug.

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
---
 include/linux/rtmutex.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h
index 1b92a28dd672..572606acd700 100644
--- a/include/linux/rtmutex.h
+++ b/include/linux/rtmutex.h
@@ -32,7 +32,6 @@ struct rt_mutex {
 	struct rb_root_cached   waiters;
 	struct task_struct	*owner;
 #ifdef CONFIG_DEBUG_RT_MUTEXES
-	int			save_state;
 	const char		*name, *file;
 	int			line;
 	void			*magic;
-- 
2.13.6

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 2/2] rtmutex: Reduce top-waiter blocking on a lock
  2018-04-10 16:27 [PATCH 1/2] locking/rtmutex: Delete save_state member of struct rt_mutex Davidlohr Bueso
@ 2018-04-10 16:27 ` Davidlohr Bueso
  2018-04-11 12:51   ` kbuild test robot
                     ` (2 more replies)
  2018-04-20 15:25 ` [PATCH 3/2] rtmutex: Use waiter debug init,free magic numbers Davidlohr Bueso
  1 sibling, 3 replies; 9+ messages in thread
From: Davidlohr Bueso @ 2018-04-10 16:27 UTC (permalink / raw)
  To: peterz, tglx, mingo; +Cc: longman, dave, linux-kernel, Davidlohr Bueso

By applying well known spin-on-lock-owner techniques, we can avoid the
blocking overhead during the process of when the task is trying to take
the rtmutex. The idea is that as long as the owner is running, there is a
fair chance it'll release the lock soon, and thus a task trying to acquire
the rtmutex will better off spinning instead of blocking immediately after
the fastpath. This is similar to what we use for other locks, borrowed
from -rt. The main difference (due to the obvious realtime constraints)
is that top-waiter spinning must account for any new higher priority waiter,
and therefore cannot steal the lock and avoid any pi-dance. As such there
will be at most only one spinner waiter upon contended lock.

Conditions to stop spinning and block are simple:

(1) Upon need_resched()
(2) Current lock owner blocks
(3) The top-waiter has changed while spinning.

The unlock side remains unchanged as wake_up_process can safely deal with
calls where the task is not actually blocked (TASK_NORMAL). As such, there
is only unnecessary overhead dealing with the wake_q, but this allows us not
to miss any wakeups between the spinning step and the unlocking side.

Passes running the pi_stress program with increasing thread-group counts.

Signed-off-by: Davidlohr Bueso <dbues@suse.de>
---
 kernel/Kconfig.locks     |  6 ++++-
 kernel/locking/rtmutex.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 66 insertions(+), 2 deletions(-)

diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks
index 84d882f3e299..42d330e0557f 100644
--- a/kernel/Kconfig.locks
+++ b/kernel/Kconfig.locks
@@ -227,13 +227,17 @@ config MUTEX_SPIN_ON_OWNER
 	def_bool y
 	depends on SMP && ARCH_SUPPORTS_ATOMIC_RMW
 
+config RT_MUTEX_SPIN_ON_OWNER
+	def_bool y
+	depends on SMP && RT_MUTEXES && !DEBUG_RT_MUTEXES && ARCH_SUPPORTS_ATOMIC_RMW
+
 config RWSEM_SPIN_ON_OWNER
        def_bool y
        depends on SMP && RWSEM_XCHGADD_ALGORITHM && ARCH_SUPPORTS_ATOMIC_RMW
 
 config LOCK_SPIN_ON_OWNER
        def_bool y
-       depends on MUTEX_SPIN_ON_OWNER || RWSEM_SPIN_ON_OWNER
+       depends on MUTEX_SPIN_ON_OWNER || RWSEM_SPIN_ON_OWNER || RT_MUTEX_SPIN_ON_OWNER
 
 config ARCH_USE_QUEUED_SPINLOCKS
 	bool
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 4f014be7a4b8..772ca39e67e7 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -1154,6 +1154,55 @@ void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter)
 	waiter->task = NULL;
 }
 
+#ifdef CONFIG_RT_MUTEX_SPIN_ON_OWNER
+static bool rt_mutex_spin_on_owner(struct rt_mutex *lock,
+				   struct rt_mutex_waiter *waiter,
+				   struct task_struct *owner)
+{
+	bool ret = true;
+
+	/*
+	 * The last owner could have just released the lock,
+	 * immediately try taking it again.
+	 */
+	if (!owner)
+		goto done;
+
+	rcu_read_lock();
+	while (rt_mutex_owner(lock) == owner) {
+		/*
+		 * Ensure we emit the owner->on_cpu, dereference _after_
+		 * checking lock->owner still matches owner. If that fails,
+		 * owner might point to freed memory. If it still matches,
+		 * the rcu_read_lock() ensures the memory stays valid.
+		 *
+		 * Also account for changes in the lock's top-waiter, if it's
+		 * not us, it was updated while busy waiting.
+		 */
+		barrier();
+
+		if (!owner->on_cpu || need_resched() ||
+		    waiter != rt_mutex_top_waiter(lock)) {
+			ret = false;
+			break;
+		}
+
+		cpu_relax();
+	}
+	rcu_read_unlock();
+done:
+	return ret;
+}
+
+#else
+static bool rt_mutex_spin_on_owner(struct rt_mutex *lock,
+				   struct rt_mutex_waiter *waiter,
+				   struct task_struct *owner)
+{
+	return false;
+}
+#endif
+
 /**
  * __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop
  * @lock:		 the rt_mutex to take
@@ -1172,6 +1221,8 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state,
 	int ret = 0;
 
 	for (;;) {
+		struct rt_mutex_waiter *top_waiter = NULL;
+
 		/* Try to acquire the lock: */
 		if (try_to_take_rt_mutex(lock, current, waiter))
 			break;
@@ -1190,11 +1241,20 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state,
 				break;
 		}
 
+		top_waiter = rt_mutex_top_waiter(lock);
 		raw_spin_unlock_irq(&lock->wait_lock);
 
 		debug_rt_mutex_print_deadlock(waiter);
 
-		schedule();
+		/*
+		 * At this point the PI-dance is done, and, as the top waiter,
+		 * we are next in line for the lock. Try to spin on the current
+		 * owner for a while, in the hope that the lock will be released
+		 * soon. Otherwise fallback and block.
+		 */
+		if (top_waiter != waiter ||
+		    !rt_mutex_spin_on_owner(lock, waiter, rt_mutex_owner(lock)))
+			schedule();
 
 		raw_spin_lock_irq(&lock->wait_lock);
 		set_current_state(state);
-- 
2.13.6

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] rtmutex: Reduce top-waiter blocking on a lock
  2018-04-10 16:27 ` [PATCH 2/2] rtmutex: Reduce top-waiter blocking on a lock Davidlohr Bueso
@ 2018-04-11 12:51   ` kbuild test robot
  2018-04-17 16:52     ` Davidlohr Bueso
  2018-04-20 15:50   ` Peter Zijlstra
  2018-06-17 17:26   ` Davidlohr Bueso
  2 siblings, 1 reply; 9+ messages in thread
From: kbuild test robot @ 2018-04-11 12:51 UTC (permalink / raw)
  To: Davidlohr Bueso
  Cc: kbuild-all, peterz, tglx, mingo, longman, dave, linux-kernel,
	Davidlohr Bueso

Hi Davidlohr,

I love your patch! Perhaps something to improve:

[auto build test WARNING on linus/master]
[also build test WARNING on v4.16 next-20180411]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Davidlohr-Bueso/locking-rtmutex-Delete-save_state-member-of-struct-rt_mutex/20180411-155733
reproduce:
        # apt-get install sparse
        make ARCH=x86_64 allmodconfig
        make C=1 CF=-D__CHECK_ENDIAN__


sparse warnings: (new ones prefixed by >>)

>> kernel/locking/rtmutex_common.h:62:9: sparse: context imbalance in '__rt_mutex_slowlock' - unexpected unlock

vim +/__rt_mutex_slowlock +62 kernel/locking/rtmutex_common.h

23f78d4a0 kernel/rtmutex_common.h         Ingo Molnar    2006-06-27  51  
23f78d4a0 kernel/rtmutex_common.h         Ingo Molnar    2006-06-27  52  static inline struct rt_mutex_waiter *
23f78d4a0 kernel/rtmutex_common.h         Ingo Molnar    2006-06-27  53  rt_mutex_top_waiter(struct rt_mutex *lock)
23f78d4a0 kernel/rtmutex_common.h         Ingo Molnar    2006-06-27  54  {
c28d62cf5 kernel/locking/rtmutex_common.h Peter Zijlstra 2018-03-27  55  	struct rb_node *leftmost = rb_first_cached(&lock->waiters);
c28d62cf5 kernel/locking/rtmutex_common.h Peter Zijlstra 2018-03-27  56  	struct rt_mutex_waiter *w = NULL;
23f78d4a0 kernel/rtmutex_common.h         Ingo Molnar    2006-06-27  57  
c28d62cf5 kernel/locking/rtmutex_common.h Peter Zijlstra 2018-03-27  58  	if (leftmost) {
c28d62cf5 kernel/locking/rtmutex_common.h Peter Zijlstra 2018-03-27  59  		w = rb_entry(leftmost, struct rt_mutex_waiter, tree_entry);
23f78d4a0 kernel/rtmutex_common.h         Ingo Molnar    2006-06-27  60  		BUG_ON(w->lock != lock);
c28d62cf5 kernel/locking/rtmutex_common.h Peter Zijlstra 2018-03-27  61  	}
23f78d4a0 kernel/rtmutex_common.h         Ingo Molnar    2006-06-27 @62  	return w;
23f78d4a0 kernel/rtmutex_common.h         Ingo Molnar    2006-06-27  63  }
23f78d4a0 kernel/rtmutex_common.h         Ingo Molnar    2006-06-27  64  

:::::: The code at line 62 was first introduced by commit
:::::: 23f78d4a03c53cbd75d87a795378ea540aa08c86 [PATCH] pi-futex: rt mutex core

:::::: TO: Ingo Molnar <mingo@elte.hu>
:::::: CC: Linus Torvalds <torvalds@g5.osdl.org>

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] rtmutex: Reduce top-waiter blocking on a lock
  2018-04-11 12:51   ` kbuild test robot
@ 2018-04-17 16:52     ` Davidlohr Bueso
  0 siblings, 0 replies; 9+ messages in thread
From: Davidlohr Bueso @ 2018-04-17 16:52 UTC (permalink / raw)
  To: kbuild test robot
  Cc: kbuild-all, peterz, tglx, mingo, longman, linux-kernel, Davidlohr Bueso

On Wed, 11 Apr 2018, kbuild test robot wrote:

>Hi Davidlohr,
>
>I love your patch! Perhaps something to improve:
>
>[auto build test WARNING on linus/master]
>[also build test WARNING on v4.16 next-20180411]
>[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]
>
>url:    https://github.com/0day-ci/linux/commits/Davidlohr-Bueso/locking-rtmutex-Delete-save_state-member-of-struct-rt_mutex/20180411-155733
>reproduce:
>        # apt-get install sparse
>        make ARCH=x86_64 allmodconfig
>        make C=1 CF=-D__CHECK_ENDIAN__

Ok.

>
>
>sparse warnings: (new ones prefixed by >>)
>
>>> kernel/locking/rtmutex_common.h:62:9: sparse: context imbalance in '__rt_mutex_slowlock' - unexpected unlock

I did:

make C=1 CF=-D__CHECK_ENDIAN__ kernel/locking/rtmutex.o

But cannot trigger this message. There are however a ton of other
sparse issues with 'indirect_branch'. Ie:

./arch/x86/include/asm/nospec-branch.h:144:38: warning: Unknown escape '@'
./include/linux/init.h:134:6: error: attribute 'indirect_branch': unknown attribute
./include/linux/init.h:135:5: error: attribute 'indirect_branch': unknown attribute

More importantly, I don't follow where this "unexpected unlock" would come from
considering that rtmutex doesn't use __acquires/__releases annotations. Nor do
I understand why this patch would produce a new context imbalance.

Thanks,
Davidlohr

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 3/2] rtmutex: Use waiter debug init,free magic numbers
  2018-04-10 16:27 [PATCH 1/2] locking/rtmutex: Delete save_state member of struct rt_mutex Davidlohr Bueso
  2018-04-10 16:27 ` [PATCH 2/2] rtmutex: Reduce top-waiter blocking on a lock Davidlohr Bueso
@ 2018-04-20 15:25 ` Davidlohr Bueso
  1 sibling, 0 replies; 9+ messages in thread
From: Davidlohr Bueso @ 2018-04-20 15:25 UTC (permalink / raw)
  To: peterz, tglx, mingo; +Cc: longman, linux-kernel, Davidlohr Bueso

... we already use these for regular mutexes, rtmutex can
also use it, and while at it rename the whole thing since
this is specific to waiters.

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
---
 include/linux/poison.h         | 4 ++--
 kernel/locking/mutex-debug.c   | 4 ++--
 kernel/locking/rtmutex-debug.c | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/include/linux/poison.h b/include/linux/poison.h
index 15927ebc22f2..99119d34857e 100644
--- a/include/linux/poison.h
+++ b/include/linux/poison.h
@@ -79,8 +79,8 @@
 #define ATM_POISON		0xdeadbeef
 
 /********** kernel/mutexes **********/
-#define MUTEX_DEBUG_INIT	0x11
-#define MUTEX_DEBUG_FREE	0x22
+#define MUTEX_WAITER_DEBUG_INIT	0x11
+#define MUTEX_WAITER_DEBUG_FREE	0x22
 #define MUTEX_POISON_WW_CTX	((void *) 0x500 + POISON_POINTER_DELTA)
 
 /********** lib/flex_array.c **********/
diff --git a/kernel/locking/mutex-debug.c b/kernel/locking/mutex-debug.c
index 9aa713629387..79b71421d763 100644
--- a/kernel/locking/mutex-debug.c
+++ b/kernel/locking/mutex-debug.c
@@ -29,7 +29,7 @@
  */
 void debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter)
 {
-	memset(waiter, MUTEX_DEBUG_INIT, sizeof(*waiter));
+	memset(waiter, MUTEX_WAITER_DEBUG_INIT, sizeof(*waiter));
 	waiter->magic = waiter;
 	INIT_LIST_HEAD(&waiter->list);
 }
@@ -45,7 +45,7 @@ void debug_mutex_wake_waiter(struct mutex *lock, struct mutex_waiter *waiter)
 void debug_mutex_free_waiter(struct mutex_waiter *waiter)
 {
 	DEBUG_LOCKS_WARN_ON(!list_empty(&waiter->list));
-	memset(waiter, MUTEX_DEBUG_FREE, sizeof(*waiter));
+	memset(waiter, MUTEX_WAITER_DEBUG_FREE, sizeof(*waiter));
 }
 
 void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter,
diff --git a/kernel/locking/rtmutex-debug.c b/kernel/locking/rtmutex-debug.c
index fd4fe1f5b458..95e9d8efce60 100644
--- a/kernel/locking/rtmutex-debug.c
+++ b/kernel/locking/rtmutex-debug.c
@@ -157,14 +157,14 @@ void debug_rt_mutex_proxy_unlock(struct rt_mutex *lock)
 
 void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter)
 {
-	memset(waiter, 0x11, sizeof(*waiter));
+	memset(waiter, MUTEX_WAITER_DEBUG_INIT, sizeof(*waiter));
 	waiter->deadlock_task_pid = NULL;
 }
 
 void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter)
 {
 	put_pid(waiter->deadlock_task_pid);
-	memset(waiter, 0x22, sizeof(*waiter));
+	memset(waiter, MUTEX_WAITER_DEBUG_FREE, sizeof(*waiter));
 }
 
 void debug_rt_mutex_init(struct rt_mutex *lock, const char *name, struct lock_class_key *key)
-- 
2.13.6

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] rtmutex: Reduce top-waiter blocking on a lock
  2018-04-10 16:27 ` [PATCH 2/2] rtmutex: Reduce top-waiter blocking on a lock Davidlohr Bueso
  2018-04-11 12:51   ` kbuild test robot
@ 2018-04-20 15:50   ` Peter Zijlstra
  2018-04-20 16:48     ` Mike Galbraith
  2018-06-17 17:26   ` Davidlohr Bueso
  2 siblings, 1 reply; 9+ messages in thread
From: Peter Zijlstra @ 2018-04-20 15:50 UTC (permalink / raw)
  To: Davidlohr Bueso; +Cc: tglx, mingo, longman, linux-kernel, Davidlohr Bueso

On Tue, Apr 10, 2018 at 09:27:50AM -0700, Davidlohr Bueso wrote:
> By applying well known spin-on-lock-owner techniques, we can avoid the
> blocking overhead during the process of when the task is trying to take
> the rtmutex. The idea is that as long as the owner is running, there is a
> fair chance it'll release the lock soon, and thus a task trying to acquire
> the rtmutex will better off spinning instead of blocking immediately after
> the fastpath. This is similar to what we use for other locks, borrowed
> from -rt. The main difference (due to the obvious realtime constraints)
> is that top-waiter spinning must account for any new higher priority waiter,
> and therefore cannot steal the lock and avoid any pi-dance. As such there
> will be at most only one spinner waiter upon contended lock.
> 
> Conditions to stop spinning and block are simple:
> 
> (1) Upon need_resched()
> (2) Current lock owner blocks
> (3) The top-waiter has changed while spinning.
> 
> The unlock side remains unchanged as wake_up_process can safely deal with
> calls where the task is not actually blocked (TASK_NORMAL). As such, there
> is only unnecessary overhead dealing with the wake_q, but this allows us not
> to miss any wakeups between the spinning step and the unlocking side.
> 
> Passes running the pi_stress program with increasing thread-group counts.

Is this similar to what we have in RT (which, IIRC, has an optimistic
spinning implementation as well)?

ISTR there being some contention over the exact semantics of (3) many
years ago. IIRC the question was if an equal priority task was allowed
to steal; because lock stealing can lead to fairness issues. One would
expect two FIFO-50 tasks to be 'fair' wrt lock acquisition and not
starve one another.

Therefore I think we only allowed higher prio tasks to steal and kept
FIFO order for equal prioty tasks.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] rtmutex: Reduce top-waiter blocking on a lock
  2018-04-20 15:50   ` Peter Zijlstra
@ 2018-04-20 16:48     ` Mike Galbraith
  2018-04-22  2:39       ` Davidlohr Bueso
  0 siblings, 1 reply; 9+ messages in thread
From: Mike Galbraith @ 2018-04-20 16:48 UTC (permalink / raw)
  To: Peter Zijlstra, Davidlohr Bueso
  Cc: tglx, mingo, longman, linux-kernel, Davidlohr Bueso

On Fri, 2018-04-20 at 17:50 +0200, Peter Zijlstra wrote:
> On Tue, Apr 10, 2018 at 09:27:50AM -0700, Davidlohr Bueso wrote:
> > By applying well known spin-on-lock-owner techniques, we can avoid the
> > blocking overhead during the process of when the task is trying to take
> > the rtmutex. The idea is that as long as the owner is running, there is a
> > fair chance it'll release the lock soon, and thus a task trying to acquire
> > the rtmutex will better off spinning instead of blocking immediately after
> > the fastpath. This is similar to what we use for other locks, borrowed
> > from -rt. The main difference (due to the obvious realtime constraints)
> > is that top-waiter spinning must account for any new higher priority waiter,
> > and therefore cannot steal the lock and avoid any pi-dance. As such there
> > will be at most only one spinner waiter upon contended lock.
> > 
> > Conditions to stop spinning and block are simple:
> > 
> > (1) Upon need_resched()
> > (2) Current lock owner blocks
> > (3) The top-waiter has changed while spinning.
> > 
> > The unlock side remains unchanged as wake_up_process can safely deal with
> > calls where the task is not actually blocked (TASK_NORMAL). As such, there
> > is only unnecessary overhead dealing with the wake_q, but this allows us not
> > to miss any wakeups between the spinning step and the unlocking side.
> > 
> > Passes running the pi_stress program with increasing thread-group counts.
> 
> Is this similar to what we have in RT (which, IIRC, has an optimistic
> spinning implementation as well)?

For the RT spinlock replacement, the top waiter can spin.

> ISTR there being some contention over the exact semantics of (3) many
> years ago. IIRC the question was if an equal priority task was allowed
> to steal; because lock stealing can lead to fairness issues. One would
> expect two FIFO-50 tasks to be 'fair' wrt lock acquisition and not
> starve one another.
> 
> Therefore I think we only allowed higher prio tasks to steal and kept
> FIFO order for equal prioty tasks.

Yup, lateral steal is expressly forbidden for RT classes.

+#define STEAL_NORMAL  0
+#define STEAL_LATERAL 1
+
+static inline int
+rt_mutex_steal(struct rt_mutex *lock, struct rt_mutex_waiter *waiter, int mode)
+{
+       struct rt_mutex_waiter *top_waiter = rt_mutex_top_waiter(lock);
+
+       if (waiter == top_waiter || rt_mutex_waiter_less(waiter, top_waiter))
+               return 1;
+
+       /*
+        * Note that RT tasks are excluded from lateral-steals
+        * to prevent the introduction of an unbounded latency.
+        */
+       if (mode == STEAL_NORMAL || rt_task(waiter->task))
+               return 0;
+
+       return rt_mutex_waiter_equal(waiter, top_waiter);
+}
+

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] rtmutex: Reduce top-waiter blocking on a lock
  2018-04-20 16:48     ` Mike Galbraith
@ 2018-04-22  2:39       ` Davidlohr Bueso
  0 siblings, 0 replies; 9+ messages in thread
From: Davidlohr Bueso @ 2018-04-22  2:39 UTC (permalink / raw)
  To: Mike Galbraith
  Cc: Peter Zijlstra, tglx, mingo, longman, linux-kernel, Davidlohr Bueso

On Fri, 20 Apr 2018, Mike Galbraith wrote:

>On Fri, 2018-04-20 at 17:50 +0200, Peter Zijlstra wrote:
>> Is this similar to what we have in RT (which, IIRC, has an optimistic
>> spinning implementation as well)?
>
>For the RT spinlock replacement, the top waiter can spin.

Yeah and the difference with the rt-spinlock and this patch are points
(1) and (3). Probably worth mentioning and, at least at first thought,
the rt version might benefit from these breaking out of the spin loop;
lateral or normal, I doubt the rt-spinlock wants to adaptive_wait() if
the top_waiter changed.

>
>> ISTR there being some contention over the exact semantics of (3) many
>> years ago. IIRC the question was if an equal priority task was allowed
>> to steal; because lock stealing can lead to fairness issues. One would
>> expect two FIFO-50 tasks to be 'fair' wrt lock acquisition and not
>> starve one another.

Indeed.

>>
>> Therefore I think we only allowed higher prio tasks to steal and kept
>> FIFO order for equal prioty tasks.

Right. In fact this patch is a very limited version of optimistic spinning
because we have little room too break fairness and rt constraints (ie no
osq, etc).

So say waiter task A is spinning for the rtmutex when task B (with equal
prio) comes in and tries to take it as well. Because when B is being
enqueued we don't comply with rt_mutex_waiter_less(A, B), so we go rb_right.
As such the top-waiter pointer is not updated and therefore B blocks while
A keeps spinning and take the lock hopefully without blocking. And if we
do block we're still top-waiter so fifo wrt waiter B all the way.

>
>Yup, lateral steal is expressly forbidden for RT classes.

Only rt-spinlocks allow lateral stealing, this patch uses the same normal
stealing semantics as what rtmutex already provide. And either way I don't
see how rt_mutex_spin_on_owner() will influence on current rtmutex semantics
as all that changes is not calling schedule(), and we are already accounted
for and queued in the waiter tree.

Thanks,
Davidlohr

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] rtmutex: Reduce top-waiter blocking on a lock
  2018-04-10 16:27 ` [PATCH 2/2] rtmutex: Reduce top-waiter blocking on a lock Davidlohr Bueso
  2018-04-11 12:51   ` kbuild test robot
  2018-04-20 15:50   ` Peter Zijlstra
@ 2018-06-17 17:26   ` Davidlohr Bueso
  2 siblings, 0 replies; 9+ messages in thread
From: Davidlohr Bueso @ 2018-06-17 17:26 UTC (permalink / raw)
  To: peterz, tglx, mingo; +Cc: longman, linux-kernel, Davidlohr Bueso

If there are no objections, now that the merge window closed, could this
be considered for v4.19?

Thanks,
Davidlohr

On Tue, 10 Apr 2018, Davidlohr Bueso wrote:

>By applying well known spin-on-lock-owner techniques, we can avoid the
>blocking overhead during the process of when the task is trying to take
>the rtmutex. The idea is that as long as the owner is running, there is a
>fair chance it'll release the lock soon, and thus a task trying to acquire
>the rtmutex will better off spinning instead of blocking immediately after
>the fastpath. This is similar to what we use for other locks, borrowed
>from -rt. The main difference (due to the obvious realtime constraints)
>is that top-waiter spinning must account for any new higher priority waiter,
>and therefore cannot steal the lock and avoid any pi-dance. As such there
>will be at most only one spinner waiter upon contended lock.
>
>Conditions to stop spinning and block are simple:
>
>(1) Upon need_resched()
>(2) Current lock owner blocks
>(3) The top-waiter has changed while spinning.
>
>The unlock side remains unchanged as wake_up_process can safely deal with
>calls where the task is not actually blocked (TASK_NORMAL). As such, there
>is only unnecessary overhead dealing with the wake_q, but this allows us not
>to miss any wakeups between the spinning step and the unlocking side.
>
>Passes running the pi_stress program with increasing thread-group counts.
>
>Signed-off-by: Davidlohr Bueso <dbues@suse.de>
>---
> kernel/Kconfig.locks     |  6 ++++-
> kernel/locking/rtmutex.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++-
> 2 files changed, 66 insertions(+), 2 deletions(-)
>
>diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks
>index 84d882f3e299..42d330e0557f 100644
>--- a/kernel/Kconfig.locks
>+++ b/kernel/Kconfig.locks
>@@ -227,13 +227,17 @@ config MUTEX_SPIN_ON_OWNER
> 	def_bool y
> 	depends on SMP && ARCH_SUPPORTS_ATOMIC_RMW
>
>+config RT_MUTEX_SPIN_ON_OWNER
>+	def_bool y
>+	depends on SMP && RT_MUTEXES && !DEBUG_RT_MUTEXES && ARCH_SUPPORTS_ATOMIC_RMW
>+
> config RWSEM_SPIN_ON_OWNER
>        def_bool y
>        depends on SMP && RWSEM_XCHGADD_ALGORITHM && ARCH_SUPPORTS_ATOMIC_RMW
>
> config LOCK_SPIN_ON_OWNER
>        def_bool y
>-       depends on MUTEX_SPIN_ON_OWNER || RWSEM_SPIN_ON_OWNER
>+       depends on MUTEX_SPIN_ON_OWNER || RWSEM_SPIN_ON_OWNER || RT_MUTEX_SPIN_ON_OWNER
>
> config ARCH_USE_QUEUED_SPINLOCKS
> 	bool
>diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
>index 4f014be7a4b8..772ca39e67e7 100644
>--- a/kernel/locking/rtmutex.c
>+++ b/kernel/locking/rtmutex.c
>@@ -1154,6 +1154,55 @@ void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter)
> 	waiter->task = NULL;
> }
>
>+#ifdef CONFIG_RT_MUTEX_SPIN_ON_OWNER
>+static bool rt_mutex_spin_on_owner(struct rt_mutex *lock,
>+				   struct rt_mutex_waiter *waiter,
>+				   struct task_struct *owner)
>+{
>+	bool ret = true;
>+
>+	/*
>+	 * The last owner could have just released the lock,
>+	 * immediately try taking it again.
>+	 */
>+	if (!owner)
>+		goto done;
>+
>+	rcu_read_lock();
>+	while (rt_mutex_owner(lock) == owner) {
>+		/*
>+		 * Ensure we emit the owner->on_cpu, dereference _after_
>+		 * checking lock->owner still matches owner. If that fails,
>+		 * owner might point to freed memory. If it still matches,
>+		 * the rcu_read_lock() ensures the memory stays valid.
>+		 *
>+		 * Also account for changes in the lock's top-waiter, if it's
>+		 * not us, it was updated while busy waiting.
>+		 */
>+		barrier();
>+
>+		if (!owner->on_cpu || need_resched() ||
>+		    waiter != rt_mutex_top_waiter(lock)) {
>+			ret = false;
>+			break;
>+		}
>+
>+		cpu_relax();
>+	}
>+	rcu_read_unlock();
>+done:
>+	return ret;
>+}
>+
>+#else
>+static bool rt_mutex_spin_on_owner(struct rt_mutex *lock,
>+				   struct rt_mutex_waiter *waiter,
>+				   struct task_struct *owner)
>+{
>+	return false;
>+}
>+#endif
>+
> /**
>  * __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop
>  * @lock:		 the rt_mutex to take
>@@ -1172,6 +1221,8 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state,
> 	int ret = 0;
>
> 	for (;;) {
>+		struct rt_mutex_waiter *top_waiter = NULL;
>+
> 		/* Try to acquire the lock: */
> 		if (try_to_take_rt_mutex(lock, current, waiter))
> 			break;
>@@ -1190,11 +1241,20 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state,
> 				break;
> 		}
>
>+		top_waiter = rt_mutex_top_waiter(lock);
> 		raw_spin_unlock_irq(&lock->wait_lock);
>
> 		debug_rt_mutex_print_deadlock(waiter);
>
>-		schedule();
>+		/*
>+		 * At this point the PI-dance is done, and, as the top waiter,
>+		 * we are next in line for the lock. Try to spin on the current
>+		 * owner for a while, in the hope that the lock will be released
>+		 * soon. Otherwise fallback and block.
>+		 */
>+		if (top_waiter != waiter ||
>+		    !rt_mutex_spin_on_owner(lock, waiter, rt_mutex_owner(lock)))
>+			schedule();
>
> 		raw_spin_lock_irq(&lock->wait_lock);
> 		set_current_state(state);
>-- 
>2.13.6
>

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2018-06-17 17:26 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-04-10 16:27 [PATCH 1/2] locking/rtmutex: Delete save_state member of struct rt_mutex Davidlohr Bueso
2018-04-10 16:27 ` [PATCH 2/2] rtmutex: Reduce top-waiter blocking on a lock Davidlohr Bueso
2018-04-11 12:51   ` kbuild test robot
2018-04-17 16:52     ` Davidlohr Bueso
2018-04-20 15:50   ` Peter Zijlstra
2018-04-20 16:48     ` Mike Galbraith
2018-04-22  2:39       ` Davidlohr Bueso
2018-06-17 17:26   ` Davidlohr Bueso
2018-04-20 15:25 ` [PATCH 3/2] rtmutex: Use waiter debug init,free magic numbers Davidlohr Bueso

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).