All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC] locking/mutex: Fix starvation of sleeping waiters
@ 2016-07-18 16:16 Imre Deak
  2016-07-18 17:15 ` Peter Zijlstra
  0 siblings, 1 reply; 19+ messages in thread
From: Imre Deak @ 2016-07-18 16:16 UTC (permalink / raw)
  To: linux-kernel
  Cc: Peter Zijlstra, Ingo Molnar, Chris Wilson, Daniel Vetter,
	Ville Syrjälä

Currently a thread sleeping on a mutex wait queue can be delayed
indefinitely by other threads managing to steal the lock, that is
acquiring the lock out-of-order before the sleepers. I noticed this via
a testcase (see the Reference: below) where one CPU was unlocking /
relocking a mutex in a tight loop while another CPU was delayed
indefinitely trying to wake up and get the lock but losing out to the
first CPU and going back to sleep:

CPU0:                        CPU1:
mutex_lock->acquire
                             mutex_lock->sleep
mutex_unlock->wake CPU1
                             wakeup
mutex_lock->acquire
                             trylock fail->sleep
mutex_unlock->wake CPU1
                             wakeup
mutex_lock->acquire
                             trylock fail->sleep
...			     ...

To fix this we can make sure that CPU1 makes progress by avoiding the
fastpath locking, optimistic spinning and trylocking if there is any
waiter on the list.  The corresponding check can be done without holding
wait_lock, since the goal is only to make sure sleepers make progress
and not to guarantee that the locking will happen in FIFO order.

CC: Peter Zijlstra <peterz@infradead.org>
CC: Ingo Molnar <mingo@redhat.com>
CC: Chris Wilson <chris@chris-wilson.co.uk>
CC: Daniel Vetter <daniel.vetter@intel.com>
CC: Ville Syrjälä <ville.syrjala@linux.intel.com>
Reference: https://bugs.freedesktop.org/show_bug.cgi?id=96701
Signed-off-by: Imre Deak <imre.deak@intel.com>
---
 include/linux/mutex.h  |  5 +++++
 kernel/locking/mutex.c | 33 +++++++++++++++++++++------------
 2 files changed, 26 insertions(+), 12 deletions(-)

diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index 2cb7531..562dfa8 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -130,6 +130,11 @@ static inline int mutex_is_locked(struct mutex *lock)
 	return atomic_read(&lock->count) != 1;
 }
 
+static inline int mutex_has_waiters(struct mutex *lock)
+{
+	return !list_empty(&lock->wait_list);
+}
+
 /*
  * See kernel/locking/mutex.c for detailed documentation of these APIs.
  * Also see Documentation/locking/mutex-design.txt.
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index a70b90d..d18b531 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -276,7 +276,7 @@ static inline int mutex_can_spin_on_owner(struct mutex *lock)
  */
 static inline bool mutex_try_to_acquire(struct mutex *lock)
 {
-	return !mutex_is_locked(lock) &&
+	return !mutex_is_locked(lock) && !mutex_has_waiters(lock) &&
 		(atomic_cmpxchg_acquire(&lock->count, 1, 0) == 1);
 }
 
@@ -520,7 +520,8 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 	preempt_disable();
 	mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip);
 
-	if (mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx)) {
+	if (!mutex_has_waiters(lock) &&
+	    mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx)) {
 		/* got the lock, yay! */
 		preempt_enable();
 		return 0;
@@ -532,7 +533,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 	 * Once more, try to acquire the lock. Only try-lock the mutex if
 	 * it is unlocked to reduce unnecessary xchg() operations.
 	 */
-	if (!mutex_is_locked(lock) &&
+	if (!mutex_is_locked(lock) && !mutex_has_waiters(lock) &&
 	    (atomic_xchg_acquire(&lock->count, 0) == 1))
 		goto skip_wait;
 
@@ -556,7 +557,8 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 		 * other waiters. We only attempt the xchg if the count is
 		 * non-negative in order to avoid unnecessary xchg operations:
 		 */
-		if (atomic_read(&lock->count) >= 0 &&
+		if (lock->wait_list.next == &waiter.list &&
+		    atomic_read(&lock->count) >= 0 &&
 		    (atomic_xchg_acquire(&lock->count, -1) == 1))
 			break;
 
@@ -789,10 +791,11 @@ __mutex_lock_interruptible_slowpath(struct mutex *lock);
  */
 int __sched mutex_lock_interruptible(struct mutex *lock)
 {
-	int ret;
+	int ret = -1;
 
 	might_sleep();
-	ret =  __mutex_fastpath_lock_retval(&lock->count);
+	if (!mutex_has_waiters(lock))
+		ret = __mutex_fastpath_lock_retval(&lock->count);
 	if (likely(!ret)) {
 		mutex_set_owner(lock);
 		return 0;
@@ -804,10 +807,11 @@ EXPORT_SYMBOL(mutex_lock_interruptible);
 
 int __sched mutex_lock_killable(struct mutex *lock)
 {
-	int ret;
+	int ret = -1;
 
 	might_sleep();
-	ret = __mutex_fastpath_lock_retval(&lock->count);
+	if (!mutex_has_waiters(lock))
+		ret = __mutex_fastpath_lock_retval(&lock->count);
 	if (likely(!ret)) {
 		mutex_set_owner(lock);
 		return 0;
@@ -905,6 +909,9 @@ int __sched mutex_trylock(struct mutex *lock)
 {
 	int ret;
 
+	if (mutex_has_waiters(lock))
+	       return 0;
+
 	ret = __mutex_fastpath_trylock(&lock->count, __mutex_trylock_slowpath);
 	if (ret)
 		mutex_set_owner(lock);
@@ -917,11 +924,12 @@ EXPORT_SYMBOL(mutex_trylock);
 int __sched
 __ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
 {
-	int ret;
+	int ret = -1;
 
 	might_sleep();
 
-	ret = __mutex_fastpath_lock_retval(&lock->base.count);
+	if (!mutex_has_waiters(lock))
+		ret = __mutex_fastpath_lock_retval(&lock->base.count);
 
 	if (likely(!ret)) {
 		ww_mutex_set_context_fastpath(lock, ctx);
@@ -935,11 +943,12 @@ EXPORT_SYMBOL(__ww_mutex_lock);
 int __sched
 __ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
 {
-	int ret;
+	int ret = -1;
 
 	might_sleep();
 
-	ret = __mutex_fastpath_lock_retval(&lock->base.count);
+	if (!mutex_has_waiters(lock))
+	      ret = __mutex_fastpath_lock_retval(&lock->base.count);
 
 	if (likely(!ret)) {
 		ww_mutex_set_context_fastpath(lock, ctx);
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [RFC] locking/mutex: Fix starvation of sleeping waiters
  2016-07-18 16:16 [RFC] locking/mutex: Fix starvation of sleeping waiters Imre Deak
@ 2016-07-18 17:15 ` Peter Zijlstra
  2016-07-18 17:47   ` Jason Low
  0 siblings, 1 reply; 19+ messages in thread
From: Peter Zijlstra @ 2016-07-18 17:15 UTC (permalink / raw)
  To: Imre Deak
  Cc: linux-kernel, Ingo Molnar, Chris Wilson, Daniel Vetter,
	Ville Syrj??l??,
	Waiman Long, Davidlohr Bueso, Jason Low

On Mon, Jul 18, 2016 at 07:16:47PM +0300, Imre Deak wrote:
> Currently a thread sleeping on a mutex wait queue can be delayed
> indefinitely by other threads managing to steal the lock, that is
> acquiring the lock out-of-order before the sleepers. I noticed this via
> a testcase (see the Reference: below) where one CPU was unlocking /
> relocking a mutex in a tight loop while another CPU was delayed
> indefinitely trying to wake up and get the lock but losing out to the
> first CPU and going back to sleep:
> 
> CPU0:                        CPU1:
> mutex_lock->acquire
>                              mutex_lock->sleep
> mutex_unlock->wake CPU1
>                              wakeup
> mutex_lock->acquire
>                              trylock fail->sleep
> mutex_unlock->wake CPU1
>                              wakeup
> mutex_lock->acquire
>                              trylock fail->sleep
> ...			     ...
> 
> To fix this we can make sure that CPU1 makes progress by avoiding the
> fastpath locking, optimistic spinning and trylocking if there is any
> waiter on the list.  The corresponding check can be done without holding
> wait_lock, since the goal is only to make sure sleepers make progress
> and not to guarantee that the locking will happen in FIFO order.

I think we went over this before, that will also completely destroy
performance under a number of workloads.

I'd have to go dig up that thread, but let's Cc more people first.


> 
> CC: Peter Zijlstra <peterz@infradead.org>
> CC: Ingo Molnar <mingo@redhat.com>
> CC: Chris Wilson <chris@chris-wilson.co.uk>
> CC: Daniel Vetter <daniel.vetter@intel.com>
> CC: Ville Syrj??l?? <ville.syrjala@linux.intel.com>
> Reference: https://bugs.freedesktop.org/show_bug.cgi?id=96701
> Signed-off-by: Imre Deak <imre.deak@intel.com>
> ---
>  include/linux/mutex.h  |  5 +++++
>  kernel/locking/mutex.c | 33 +++++++++++++++++++++------------
>  2 files changed, 26 insertions(+), 12 deletions(-)
> 
> diff --git a/include/linux/mutex.h b/include/linux/mutex.h
> index 2cb7531..562dfa8 100644
> --- a/include/linux/mutex.h
> +++ b/include/linux/mutex.h
> @@ -130,6 +130,11 @@ static inline int mutex_is_locked(struct mutex *lock)
>  	return atomic_read(&lock->count) != 1;
>  }
>  
> +static inline int mutex_has_waiters(struct mutex *lock)
> +{
> +	return !list_empty(&lock->wait_list);
> +}
> +
>  /*
>   * See kernel/locking/mutex.c for detailed documentation of these APIs.
>   * Also see Documentation/locking/mutex-design.txt.
> diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
> index a70b90d..d18b531 100644
> --- a/kernel/locking/mutex.c
> +++ b/kernel/locking/mutex.c
> @@ -276,7 +276,7 @@ static inline int mutex_can_spin_on_owner(struct mutex *lock)
>   */
>  static inline bool mutex_try_to_acquire(struct mutex *lock)
>  {
> -	return !mutex_is_locked(lock) &&
> +	return !mutex_is_locked(lock) && !mutex_has_waiters(lock) &&
>  		(atomic_cmpxchg_acquire(&lock->count, 1, 0) == 1);
>  }
>  
> @@ -520,7 +520,8 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
>  	preempt_disable();
>  	mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip);
>  
> -	if (mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx)) {
> +	if (!mutex_has_waiters(lock) &&
> +	    mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx)) {
>  		/* got the lock, yay! */
>  		preempt_enable();
>  		return 0;
> @@ -532,7 +533,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
>  	 * Once more, try to acquire the lock. Only try-lock the mutex if
>  	 * it is unlocked to reduce unnecessary xchg() operations.
>  	 */
> -	if (!mutex_is_locked(lock) &&
> +	if (!mutex_is_locked(lock) && !mutex_has_waiters(lock) &&
>  	    (atomic_xchg_acquire(&lock->count, 0) == 1))
>  		goto skip_wait;
>  
> @@ -556,7 +557,8 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
>  		 * other waiters. We only attempt the xchg if the count is
>  		 * non-negative in order to avoid unnecessary xchg operations:
>  		 */
> -		if (atomic_read(&lock->count) >= 0 &&
> +		if (lock->wait_list.next == &waiter.list &&
> +		    atomic_read(&lock->count) >= 0 &&
>  		    (atomic_xchg_acquire(&lock->count, -1) == 1))
>  			break;
>  
> @@ -789,10 +791,11 @@ __mutex_lock_interruptible_slowpath(struct mutex *lock);
>   */
>  int __sched mutex_lock_interruptible(struct mutex *lock)
>  {
> -	int ret;
> +	int ret = -1;
>  
>  	might_sleep();
> -	ret =  __mutex_fastpath_lock_retval(&lock->count);
> +	if (!mutex_has_waiters(lock))
> +		ret = __mutex_fastpath_lock_retval(&lock->count);
>  	if (likely(!ret)) {
>  		mutex_set_owner(lock);
>  		return 0;
> @@ -804,10 +807,11 @@ EXPORT_SYMBOL(mutex_lock_interruptible);
>  
>  int __sched mutex_lock_killable(struct mutex *lock)
>  {
> -	int ret;
> +	int ret = -1;
>  
>  	might_sleep();
> -	ret = __mutex_fastpath_lock_retval(&lock->count);
> +	if (!mutex_has_waiters(lock))
> +		ret = __mutex_fastpath_lock_retval(&lock->count);
>  	if (likely(!ret)) {
>  		mutex_set_owner(lock);
>  		return 0;
> @@ -905,6 +909,9 @@ int __sched mutex_trylock(struct mutex *lock)
>  {
>  	int ret;
>  
> +	if (mutex_has_waiters(lock))
> +	       return 0;
> +
>  	ret = __mutex_fastpath_trylock(&lock->count, __mutex_trylock_slowpath);
>  	if (ret)
>  		mutex_set_owner(lock);
> @@ -917,11 +924,12 @@ EXPORT_SYMBOL(mutex_trylock);
>  int __sched
>  __ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
>  {
> -	int ret;
> +	int ret = -1;
>  
>  	might_sleep();
>  
> -	ret = __mutex_fastpath_lock_retval(&lock->base.count);
> +	if (!mutex_has_waiters(lock))
> +		ret = __mutex_fastpath_lock_retval(&lock->base.count);
>  
>  	if (likely(!ret)) {
>  		ww_mutex_set_context_fastpath(lock, ctx);
> @@ -935,11 +943,12 @@ EXPORT_SYMBOL(__ww_mutex_lock);
>  int __sched
>  __ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
>  {
> -	int ret;
> +	int ret = -1;
>  
>  	might_sleep();
>  
> -	ret = __mutex_fastpath_lock_retval(&lock->base.count);
> +	if (!mutex_has_waiters(lock))
> +	      ret = __mutex_fastpath_lock_retval(&lock->base.count);
>  
>  	if (likely(!ret)) {
>  		ww_mutex_set_context_fastpath(lock, ctx);
> -- 
> 2.5.0
> 

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [RFC] locking/mutex: Fix starvation of sleeping waiters
  2016-07-18 17:15 ` Peter Zijlstra
@ 2016-07-18 17:47   ` Jason Low
  2016-07-19 16:53     ` Imre Deak
  0 siblings, 1 reply; 19+ messages in thread
From: Jason Low @ 2016-07-18 17:47 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: jason.low2, Imre Deak, linux-kernel, Ingo Molnar, Chris Wilson,
	Daniel Vetter, Ville Syrj??l??,
	Waiman Long, Davidlohr Bueso, jason.low2

On Mon, 2016-07-18 at 19:15 +0200, Peter Zijlstra wrote:
> On Mon, Jul 18, 2016 at 07:16:47PM +0300, Imre Deak wrote:
> > Currently a thread sleeping on a mutex wait queue can be delayed
> > indefinitely by other threads managing to steal the lock, that is
> > acquiring the lock out-of-order before the sleepers. I noticed this via
> > a testcase (see the Reference: below) where one CPU was unlocking /
> > relocking a mutex in a tight loop while another CPU was delayed
> > indefinitely trying to wake up and get the lock but losing out to the
> > first CPU and going back to sleep:
> > 
> > CPU0:                        CPU1:
> > mutex_lock->acquire
> >                              mutex_lock->sleep
> > mutex_unlock->wake CPU1
> >                              wakeup
> > mutex_lock->acquire
> >                              trylock fail->sleep
> > mutex_unlock->wake CPU1
> >                              wakeup
> > mutex_lock->acquire
> >                              trylock fail->sleep
> > ...			     ...
> > 
> > To fix this we can make sure that CPU1 makes progress by avoiding the
> > fastpath locking, optimistic spinning and trylocking if there is any
> > waiter on the list.  The corresponding check can be done without holding
> > wait_lock, since the goal is only to make sure sleepers make progress
> > and not to guarantee that the locking will happen in FIFO order.
> 
> I think we went over this before, that will also completely destroy
> performance under a number of workloads.

Yup, once a thread becomes a waiter, all other threads will need to
follow suit, so this change would effectively disable optimistic
spinning in some workloads.

A few months ago, we worked on patches that allow the waiter to return
to optimistic spinning to help reduce starvation. Longman sent out a
version 3 patch set, and it sounded like we were fine with the concept.

Jason

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [RFC] locking/mutex: Fix starvation of sleeping waiters
  2016-07-18 17:47   ` Jason Low
@ 2016-07-19 16:53     ` Imre Deak
  2016-07-19 22:57       ` Jason Low
  2016-07-19 23:04       ` [RFC] Avoid mutex starvation when optimistic spinning is disabled Jason Low
  0 siblings, 2 replies; 19+ messages in thread
From: Imre Deak @ 2016-07-19 16:53 UTC (permalink / raw)
  To: Jason Low, Peter Zijlstra
  Cc: linux-kernel, Ingo Molnar, Chris Wilson, Daniel Vetter,
	Ville Syrj??l??,
	Waiman Long, Davidlohr Bueso, jason.low2

On ma, 2016-07-18 at 10:47 -0700, Jason Low wrote:
> On Mon, 2016-07-18 at 19:15 +0200, Peter Zijlstra wrote:
> > On Mon, Jul 18, 2016 at 07:16:47PM +0300, Imre Deak wrote:
> > > Currently a thread sleeping on a mutex wait queue can be delayed
> > > indefinitely by other threads managing to steal the lock, that is
> > > acquiring the lock out-of-order before the sleepers. I noticed
> > > this via
> > > a testcase (see the Reference: below) where one CPU was unlocking
> > > /
> > > relocking a mutex in a tight loop while another CPU was delayed
> > > indefinitely trying to wake up and get the lock but losing out to
> > > the
> > > first CPU and going back to sleep:
> > > 
> > > CPU0:                        CPU1:
> > > mutex_lock->acquire
> > >                              mutex_lock->sleep
> > > mutex_unlock->wake CPU1
> > >                              wakeup
> > > mutex_lock->acquire
> > >                              trylock fail->sleep
> > > mutex_unlock->wake CPU1
> > >                              wakeup
> > > mutex_lock->acquire
> > >                              trylock fail->sleep
> > > ...			     ...
> > > 
> > > To fix this we can make sure that CPU1 makes progress by avoiding
> > > the
> > > fastpath locking, optimistic spinning and trylocking if there is
> > > any
> > > waiter on the list.  The corresponding check can be done without
> > > holding
> > > wait_lock, since the goal is only to make sure sleepers make
> > > progress
> > > and not to guarantee that the locking will happen in FIFO order.
> > 
> > I think we went over this before, that will also completely destroy
> > performance under a number of workloads.
> 
> Yup, once a thread becomes a waiter, all other threads will need to
> follow suit, so this change would effectively disable optimistic
> spinning in some workloads.
> 
> A few months ago, we worked on patches that allow the waiter to
> return
> to optimistic spinning to help reduce starvation. Longman sent out a
> version 3 patch set, and it sounded like we were fine with the
> concept.

Thanks, with v4 he just sent I couldn't trigger the above problem.

However this only works if mutex spinning is enabled, if it's disabled
I still hit the problem due to the other forms of lock stealing. So
could we prevent these if mutex spinning is anyway disabled?

--Imre

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [RFC] locking/mutex: Fix starvation of sleeping waiters
  2016-07-19 16:53     ` Imre Deak
@ 2016-07-19 22:57       ` Jason Low
  2016-07-19 23:04       ` [RFC] Avoid mutex starvation when optimistic spinning is disabled Jason Low
  1 sibling, 0 replies; 19+ messages in thread
From: Jason Low @ 2016-07-19 22:57 UTC (permalink / raw)
  To: imre.deak
  Cc: jason.low2, Peter Zijlstra, linux-kernel, Ingo Molnar,
	Chris Wilson, Daniel Vetter, Ville Syrj??l??,
	Waiman Long, Davidlohr Bueso

On Tue, 2016-07-19 at 19:53 +0300, Imre Deak wrote:
> On ma, 2016-07-18 at 10:47 -0700, Jason Low wrote:
> > On Mon, 2016-07-18 at 19:15 +0200, Peter Zijlstra wrote:

> > > I think we went over this before, that will also completely destroy
> > > performance under a number of workloads.
> > 
> > Yup, once a thread becomes a waiter, all other threads will need to
> > follow suit, so this change would effectively disable optimistic
> > spinning in some workloads.
> > 
> > A few months ago, we worked on patches that allow the waiter to
> > return
> > to optimistic spinning to help reduce starvation. Longman sent out a
> > version 3 patch set, and it sounded like we were fine with the
> > concept.
> 
> Thanks, with v4 he just sent I couldn't trigger the above problem.
> 
> However this only works if mutex spinning is enabled, if it's disabled
> I still hit the problem due to the other forms of lock stealing. So
> could we prevent these if mutex spinning is anyway disabled?

Good point, when optimistic spinning is disabled, waiters could still
get starved because other threads could steal the lock in the fastpath
and the waiter wouldn't be able to spin for the lock.

One option to address this is by enforcing a ceiling on the amount of
"time" a waiter needs to wait on the lock to avoid starvation when
optimistic spinning is disabled. This would be better than just
unconditionally disabling the fastpath whenever there is a waiter,
because that could reduce performance by quite a bit.

Instead, we can still allow threads to acquire the lock in the fastpath
if there are waiters, but yield the lock to a waiter if the waiter loops
too many times waiting for the lock in the slowpath in the
!CONFIG_MUTEX_OPTIMISTIC_SPINNING case.

I can send out an initial patch for this.

Jason

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [RFC] Avoid mutex starvation when optimistic spinning is disabled
  2016-07-19 16:53     ` Imre Deak
  2016-07-19 22:57       ` Jason Low
@ 2016-07-19 23:04       ` Jason Low
  2016-07-20  4:39         ` Jason Low
  1 sibling, 1 reply; 19+ messages in thread
From: Jason Low @ 2016-07-19 23:04 UTC (permalink / raw)
  To: imre.deak
  Cc: jason.low2, Peter Zijlstra, linux-kernel, Ingo Molnar,
	Chris Wilson, Daniel Vetter, Waiman Long, Davidlohr Bueso,
	jason.low2

Hi Imre,

Here is a patch which prevents a thread from spending too much "time"
waiting for a mutex in the !CONFIG_MUTEX_SPIN_ON_OWNER case.

Would you like to try this out and see if this addresses the mutex
starvation issue you are seeing in your workload when optimistic
spinning is disabled?

Thanks.

---
Signed-off-by: Jason Low <jason.low2@hpe.com>
---
 include/linux/mutex.h  |  2 ++
 kernel/locking/mutex.c | 61 +++++++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 58 insertions(+), 5 deletions(-)

diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index 2cb7531..c1ca68d 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -57,6 +57,8 @@ struct mutex {
 #endif
 #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
 	struct optimistic_spin_queue osq; /* Spinner MCS lock */
+#else
+	bool yield_to_waiter; /* Prevent starvation when spinning disabled */
 #endif
 #ifdef CONFIG_DEBUG_MUTEXES
 	void			*magic;
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index a70b90d..fec78a7 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -55,6 +55,8 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
 	mutex_clear_owner(lock);
 #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
 	osq_lock_init(&lock->osq);
+#else
+	lock->yield_to_waiter = false;
 #endif
 
 	debug_mutex_init(lock, name, key);
@@ -71,6 +73,9 @@ EXPORT_SYMBOL(__mutex_init);
  */
 __visible void __sched __mutex_lock_slowpath(atomic_t *lock_count);
 
+
+static inline bool need_yield_to_waiter(struct mutex *lock);
+
 /**
  * mutex_lock - acquire the mutex
  * @lock: the mutex to be acquired
@@ -95,11 +100,15 @@ __visible void __sched __mutex_lock_slowpath(atomic_t *lock_count);
 void __sched mutex_lock(struct mutex *lock)
 {
 	might_sleep();
+
 	/*
 	 * The locking fastpath is the 1->0 transition from
 	 * 'unlocked' into 'locked' state.
 	 */
-	__mutex_fastpath_lock(&lock->count, __mutex_lock_slowpath);
+	if (!need_yield_to_waiter(lock))
+		__mutex_fastpath_lock(&lock->count, __mutex_lock_slowpath);
+	else
+		__mutex_lock_slowpath(&lock->count);
 	mutex_set_owner(lock);
 }
 
@@ -398,12 +407,39 @@ done:
 
 	return false;
 }
+
+static inline void do_yield_to_waiter(struct mutex *lock, int loops)
+{
+	return;
+}
+
+static inline bool need_yield_to_waiter(struct mutex *lock)
+{
+	return false;
+}
+
 #else
 static bool mutex_optimistic_spin(struct mutex *lock,
 				  struct ww_acquire_ctx *ww_ctx, const bool use_ww_ctx)
 {
 	return false;
 }
+
+#define MUTEX_MAX_WAIT 16
+
+static inline void do_yield_to_waiter(struct mutex *lock, int loops)
+{
+	if (loops < MUTEX_MAX_WAIT)
+		return;
+
+	if (lock->yield_to_waiter != true)
+		lock->yield_to_waiter =true;
+}
+
+static inline bool need_yield_to_waiter(struct mutex *lock)
+{
+	return lock->yield_to_waiter;
+}
 #endif
 
 __visible __used noinline
@@ -510,6 +546,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 	struct mutex_waiter waiter;
 	unsigned long flags;
 	int ret;
+	int loop = 0;
 
 	if (use_ww_ctx) {
 		struct ww_mutex *ww = container_of(lock, struct ww_mutex, base);
@@ -546,6 +583,8 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 	lock_contended(&lock->dep_map, ip);
 
 	for (;;) {
+		loop++;
+
 		/*
 		 * Lets try to take the lock again - this is needed even if
 		 * we get here for the first time (shortly after failing to
@@ -580,6 +619,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 		/* didn't get the lock, go to sleep: */
 		spin_unlock_mutex(&lock->wait_lock, flags);
 		schedule_preempt_disabled();
+		do_yield_to_waiter(lock, loop);
 		spin_lock_mutex(&lock->wait_lock, flags);
 	}
 	__set_task_state(task, TASK_RUNNING);
@@ -590,6 +630,11 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 		atomic_set(&lock->count, 0);
 	debug_mutex_free_waiter(&waiter);
 
+#ifndef CONFIG_MUTEX_SPIN_ON_OWNER
+	if (lock->yield_to_waiter)
+		lock->yield_to_waiter = false;
+#endif
+
 skip_wait:
 	/* got the lock - cleanup and rejoice! */
 	lock_acquired(&lock->dep_map, ip);
@@ -789,10 +834,13 @@ __mutex_lock_interruptible_slowpath(struct mutex *lock);
  */
 int __sched mutex_lock_interruptible(struct mutex *lock)
 {
-	int ret;
+	int ret = 1;
 
 	might_sleep();
-	ret =  __mutex_fastpath_lock_retval(&lock->count);
+
+	if (!need_yield_to_waiter(lock))
+		ret =  __mutex_fastpath_lock_retval(&lock->count);
+
 	if (likely(!ret)) {
 		mutex_set_owner(lock);
 		return 0;
@@ -804,10 +852,13 @@ EXPORT_SYMBOL(mutex_lock_interruptible);
 
 int __sched mutex_lock_killable(struct mutex *lock)
 {
-	int ret;
+	int ret = 1;
 
 	might_sleep();
-	ret = __mutex_fastpath_lock_retval(&lock->count);
+
+	if (!need_yield_to_waiter(lock))
+		ret = __mutex_fastpath_lock_retval(&lock->count);
+
 	if (likely(!ret)) {
 		mutex_set_owner(lock);
 		return 0;
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [RFC] Avoid mutex starvation when optimistic spinning is disabled
  2016-07-19 23:04       ` [RFC] Avoid mutex starvation when optimistic spinning is disabled Jason Low
@ 2016-07-20  4:39         ` Jason Low
  2016-07-20 13:29           ` Imre Deak
  2016-07-20 18:37           ` Waiman Long
  0 siblings, 2 replies; 19+ messages in thread
From: Jason Low @ 2016-07-20  4:39 UTC (permalink / raw)
  To: imre.deak
  Cc: jason.low2, Peter Zijlstra, linux-kernel, Ingo Molnar,
	Chris Wilson, Daniel Vetter, Waiman Long, Davidlohr Bueso,
	jason.low2

On Tue, 2016-07-19 at 16:04 -0700, Jason Low wrote:
> Hi Imre,
> 
> Here is a patch which prevents a thread from spending too much "time"
> waiting for a mutex in the !CONFIG_MUTEX_SPIN_ON_OWNER case.
> 
> Would you like to try this out and see if this addresses the mutex
> starvation issue you are seeing in your workload when optimistic
> spinning is disabled?

Although it looks like it didn't take care of the 'lock stealing' case
in the slowpath. Here is the updated fixed version:

---
Signed-off-by: Jason Low <jason.low2@hpe.com>
---
 include/linux/mutex.h  |  2 ++
 kernel/locking/mutex.c | 65 ++++++++++++++++++++++++++++++++++++++++++++------
 2 files changed, 60 insertions(+), 7 deletions(-)

diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index 2cb7531..c1ca68d 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -57,6 +57,8 @@ struct mutex {
 #endif
 #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
 	struct optimistic_spin_queue osq; /* Spinner MCS lock */
+#else
+	bool yield_to_waiter; /* Prevent starvation when spinning disabled */
 #endif
 #ifdef CONFIG_DEBUG_MUTEXES
 	void			*magic;
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index a70b90d..6c915ca 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -55,6 +55,8 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
 	mutex_clear_owner(lock);
 #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
 	osq_lock_init(&lock->osq);
+#else
+	lock->yield_to_waiter = false;
 #endif
 
 	debug_mutex_init(lock, name, key);
@@ -71,6 +73,9 @@ EXPORT_SYMBOL(__mutex_init);
  */
 __visible void __sched __mutex_lock_slowpath(atomic_t *lock_count);
 
+
+static inline bool need_yield_to_waiter(struct mutex *lock);
+
 /**
  * mutex_lock - acquire the mutex
  * @lock: the mutex to be acquired
@@ -95,11 +100,15 @@ __visible void __sched __mutex_lock_slowpath(atomic_t *lock_count);
 void __sched mutex_lock(struct mutex *lock)
 {
 	might_sleep();
+
 	/*
 	 * The locking fastpath is the 1->0 transition from
 	 * 'unlocked' into 'locked' state.
 	 */
-	__mutex_fastpath_lock(&lock->count, __mutex_lock_slowpath);
+	if (!need_yield_to_waiter(lock))
+		__mutex_fastpath_lock(&lock->count, __mutex_lock_slowpath);
+	else
+		__mutex_lock_slowpath(&lock->count);
 	mutex_set_owner(lock);
 }
 
@@ -398,12 +407,39 @@ done:
 
 	return false;
 }
+
+static inline void do_yield_to_waiter(struct mutex *lock, int loops)
+{
+	return;
+}
+
+static inline bool need_yield_to_waiter(struct mutex *lock)
+{
+	return false;
+}
+
 #else
 static bool mutex_optimistic_spin(struct mutex *lock,
 				  struct ww_acquire_ctx *ww_ctx, const bool use_ww_ctx)
 {
 	return false;
 }
+
+#define MUTEX_MAX_WAIT 32
+
+static inline void do_yield_to_waiter(struct mutex *lock, int loops)
+{
+	if (loops < MUTEX_MAX_WAIT)
+		return;
+
+	if (lock->yield_to_waiter != true)
+		lock->yield_to_waiter =true;
+}
+
+static inline bool need_yield_to_waiter(struct mutex *lock)
+{
+	return lock->yield_to_waiter;
+}
 #endif
 
 __visible __used noinline
@@ -510,6 +546,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 	struct mutex_waiter waiter;
 	unsigned long flags;
 	int ret;
+	int loop = 0;
 
 	if (use_ww_ctx) {
 		struct ww_mutex *ww = container_of(lock, struct ww_mutex, base);
@@ -532,7 +569,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 	 * Once more, try to acquire the lock. Only try-lock the mutex if
 	 * it is unlocked to reduce unnecessary xchg() operations.
 	 */
-	if (!mutex_is_locked(lock) &&
+	if (!need_yield_to_waiter(lock) && !mutex_is_locked(lock) &&
 	    (atomic_xchg_acquire(&lock->count, 0) == 1))
 		goto skip_wait;
 
@@ -546,6 +583,8 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 	lock_contended(&lock->dep_map, ip);
 
 	for (;;) {
+		loop++;
+
 		/*
 		 * Lets try to take the lock again - this is needed even if
 		 * we get here for the first time (shortly after failing to
@@ -556,7 +595,8 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 		 * other waiters. We only attempt the xchg if the count is
 		 * non-negative in order to avoid unnecessary xchg operations:
 		 */
-		if (atomic_read(&lock->count) >= 0 &&
+		if ((!need_yield_to_waiter(lock) || loop > 1) &&
+		    atomic_read(&lock->count) >= 0 &&
 		    (atomic_xchg_acquire(&lock->count, -1) == 1))
 			break;
 
@@ -581,6 +621,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 		spin_unlock_mutex(&lock->wait_lock, flags);
 		schedule_preempt_disabled();
 		spin_lock_mutex(&lock->wait_lock, flags);
+		do_yield_to_waiter(lock, loop);
 	}
 	__set_task_state(task, TASK_RUNNING);
 
@@ -590,6 +631,10 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 		atomic_set(&lock->count, 0);
 	debug_mutex_free_waiter(&waiter);
 
+#ifndef CONFIG_MUTEX_SPIN_ON_OWNER
+	lock->yield_to_waiter = false;
+#endif
+
 skip_wait:
 	/* got the lock - cleanup and rejoice! */
 	lock_acquired(&lock->dep_map, ip);
@@ -789,10 +834,13 @@ __mutex_lock_interruptible_slowpath(struct mutex *lock);
  */
 int __sched mutex_lock_interruptible(struct mutex *lock)
 {
-	int ret;
+	int ret = 1;
 
 	might_sleep();
-	ret =  __mutex_fastpath_lock_retval(&lock->count);
+
+	if (!need_yield_to_waiter(lock))
+		ret =  __mutex_fastpath_lock_retval(&lock->count);
+
 	if (likely(!ret)) {
 		mutex_set_owner(lock);
 		return 0;
@@ -804,10 +852,13 @@ EXPORT_SYMBOL(mutex_lock_interruptible);
 
 int __sched mutex_lock_killable(struct mutex *lock)
 {
-	int ret;
+	int ret = 1;
 
 	might_sleep();
-	ret = __mutex_fastpath_lock_retval(&lock->count);
+
+	if (!need_yield_to_waiter(lock))
+		ret = __mutex_fastpath_lock_retval(&lock->count);
+
 	if (likely(!ret)) {
 		mutex_set_owner(lock);
 		return 0;
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [RFC] Avoid mutex starvation when optimistic spinning is disabled
  2016-07-20  4:39         ` Jason Low
@ 2016-07-20 13:29           ` Imre Deak
  2016-07-21 20:57             ` Jason Low
  2016-07-20 18:37           ` Waiman Long
  1 sibling, 1 reply; 19+ messages in thread
From: Imre Deak @ 2016-07-20 13:29 UTC (permalink / raw)
  To: Jason Low
  Cc: Peter Zijlstra, linux-kernel, Ingo Molnar, Chris Wilson,
	Daniel Vetter, Waiman Long, Davidlohr Bueso, jason.low2

On ti, 2016-07-19 at 21:39 -0700, Jason Low wrote:
> On Tue, 2016-07-19 at 16:04 -0700, Jason Low wrote:
> > Hi Imre,
> > 
> > Here is a patch which prevents a thread from spending too much "time"
> > waiting for a mutex in the !CONFIG_MUTEX_SPIN_ON_OWNER case.
> > 
> > Would you like to try this out and see if this addresses the mutex
> > starvation issue you are seeing in your workload when optimistic
> > spinning is disabled?
> 
> Although it looks like it didn't take care of the 'lock stealing' case
> in the slowpath. Here is the updated fixed version:

This also got rid of the problem, I only needed to change the ww
functions accordingly. Also, imo mutex_trylock() needs the same
handling and lock->yield_to_waiter should be reset only in the waiter
thread that has set it.

--Imre

> ---
> Signed-off-by: Jason Low <jason.low2@hpe.com>
> ---
>  include/linux/mutex.h  |  2 ++
>  kernel/locking/mutex.c | 65 ++++++++++++++++++++++++++++++++++++++++++++------
>  2 files changed, 60 insertions(+), 7 deletions(-)
> 
> diff --git a/include/linux/mutex.h b/include/linux/mutex.h
> index 2cb7531..c1ca68d 100644
> --- a/include/linux/mutex.h
> +++ b/include/linux/mutex.h
> @@ -57,6 +57,8 @@ struct mutex {
>  #endif
>  #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
>  	struct optimistic_spin_queue osq; /* Spinner MCS lock */
> +#else
> +	bool yield_to_waiter; /* Prevent starvation when spinning disabled */
>  #endif
>  #ifdef CONFIG_DEBUG_MUTEXES
>  	void			*magic;
> diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
> index a70b90d..6c915ca 100644
> --- a/kernel/locking/mutex.c
> +++ b/kernel/locking/mutex.c
> @@ -55,6 +55,8 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
>  	mutex_clear_owner(lock);
>  #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
>  	osq_lock_init(&lock->osq);
> +#else
> +	lock->yield_to_waiter = false;
>  #endif
>  
>  	debug_mutex_init(lock, name, key);
> @@ -71,6 +73,9 @@ EXPORT_SYMBOL(__mutex_init);
>   */
>  __visible void __sched __mutex_lock_slowpath(atomic_t *lock_count);
>  
> +
> +static inline bool need_yield_to_waiter(struct mutex *lock);
> +
>  /**
>   * mutex_lock - acquire the mutex
>   * @lock: the mutex to be acquired
> @@ -95,11 +100,15 @@ __visible void __sched __mutex_lock_slowpath(atomic_t *lock_count);
>  void __sched mutex_lock(struct mutex *lock)
>  {
>  	might_sleep();
> +
>  	/*
>  	 * The locking fastpath is the 1->0 transition from
>  	 * 'unlocked' into 'locked' state.
>  	 */
> -	__mutex_fastpath_lock(&lock->count, __mutex_lock_slowpath);
> +	if (!need_yield_to_waiter(lock))
> +		__mutex_fastpath_lock(&lock->count, __mutex_lock_slowpath);
> +	else
> +		__mutex_lock_slowpath(&lock->count);
>  	mutex_set_owner(lock);
>  }
>  
> @@ -398,12 +407,39 @@ done:
>  
>  	return false;
>  }
> +
> +static inline void do_yield_to_waiter(struct mutex *lock, int loops)
> +{
> +	return;
> +}
> +
> +static inline bool need_yield_to_waiter(struct mutex *lock)
> +{
> +	return false;
> +}
> +
>  #else
>  static bool mutex_optimistic_spin(struct mutex *lock,
>  				  struct ww_acquire_ctx *ww_ctx, const bool use_ww_ctx)
>  {
>  	return false;
>  }
> +
> +#define MUTEX_MAX_WAIT 32
> +
> +static inline void do_yield_to_waiter(struct mutex *lock, int loops)
> +{
> +	if (loops < MUTEX_MAX_WAIT)
> +		return;
> +
> +	if (lock->yield_to_waiter != true)
> +		lock->yield_to_waiter =true;
> +}
> +
> +static inline bool need_yield_to_waiter(struct mutex *lock)
> +{
> +	return lock->yield_to_waiter;
> +}
>  #endif
>  
>  __visible __used noinline
> @@ -510,6 +546,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
>  	struct mutex_waiter waiter;
>  	unsigned long flags;
>  	int ret;
> +	int loop = 0;
>  
>  	if (use_ww_ctx) {
>  		struct ww_mutex *ww = container_of(lock, struct ww_mutex, base);
> @@ -532,7 +569,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
>  	 * Once more, try to acquire the lock. Only try-lock the mutex if
>  	 * it is unlocked to reduce unnecessary xchg() operations.
>  	 */
> -	if (!mutex_is_locked(lock) &&
> +	if (!need_yield_to_waiter(lock) && !mutex_is_locked(lock) &&
>  	    (atomic_xchg_acquire(&lock->count, 0) == 1))
>  		goto skip_wait;
>  
> @@ -546,6 +583,8 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
>  	lock_contended(&lock->dep_map, ip);
>  
>  	for (;;) {
> +		loop++;
> +
>  		/*
>  		 * Lets try to take the lock again - this is needed even if
>  		 * we get here for the first time (shortly after failing to
> @@ -556,7 +595,8 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
>  		 * other waiters. We only attempt the xchg if the count is
>  		 * non-negative in order to avoid unnecessary xchg operations:
>  		 */
> -		if (atomic_read(&lock->count) >= 0 &&
> +		if ((!need_yield_to_waiter(lock) || loop > 1) &&
> +		    atomic_read(&lock->count) >= 0 &&
>  		    (atomic_xchg_acquire(&lock->count, -1) == 1))
>  			break;
>  
> @@ -581,6 +621,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
>  		spin_unlock_mutex(&lock->wait_lock, flags);
>  		schedule_preempt_disabled();
>  		spin_lock_mutex(&lock->wait_lock, flags);
> +		do_yield_to_waiter(lock, loop);
>  	}
>  	__set_task_state(task, TASK_RUNNING);
>  
> @@ -590,6 +631,10 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
>  		atomic_set(&lock->count, 0);
>  	debug_mutex_free_waiter(&waiter);
>  
> +#ifndef CONFIG_MUTEX_SPIN_ON_OWNER
> +	lock->yield_to_waiter = false;
> +#endif
> +
>  skip_wait:
>  	/* got the lock - cleanup and rejoice! */
>  	lock_acquired(&lock->dep_map, ip);
> @@ -789,10 +834,13 @@ __mutex_lock_interruptible_slowpath(struct mutex *lock);
>   */
>  int __sched mutex_lock_interruptible(struct mutex *lock)
>  {
> -	int ret;
> +	int ret = 1;
>  
>  	might_sleep();
> -	ret =  __mutex_fastpath_lock_retval(&lock->count);
> +
> +	if (!need_yield_to_waiter(lock))
> +		ret =  __mutex_fastpath_lock_retval(&lock->count);
> +
>  	if (likely(!ret)) {
>  		mutex_set_owner(lock);
>  		return 0;
> @@ -804,10 +852,13 @@ EXPORT_SYMBOL(mutex_lock_interruptible);
>  
>  int __sched mutex_lock_killable(struct mutex *lock)
>  {
> -	int ret;
> +	int ret = 1;
>  
>  	might_sleep();
> -	ret = __mutex_fastpath_lock_retval(&lock->count);
> +
> +	if (!need_yield_to_waiter(lock))
> +		ret = __mutex_fastpath_lock_retval(&lock->count);
> +
>  	if (likely(!ret)) {
>  		mutex_set_owner(lock);
>  		return 0;

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [RFC] Avoid mutex starvation when optimistic spinning is disabled
  2016-07-20  4:39         ` Jason Low
  2016-07-20 13:29           ` Imre Deak
@ 2016-07-20 18:37           ` Waiman Long
  2016-07-21 22:29             ` Jason Low
  1 sibling, 1 reply; 19+ messages in thread
From: Waiman Long @ 2016-07-20 18:37 UTC (permalink / raw)
  To: Jason Low
  Cc: imre.deak, Peter Zijlstra, linux-kernel, Ingo Molnar,
	Chris Wilson, Daniel Vetter, Davidlohr Bueso, jason.low2

On 07/20/2016 12:39 AM, Jason Low wrote:
> On Tue, 2016-07-19 at 16:04 -0700, Jason Low wrote:
>> Hi Imre,
>>
>> Here is a patch which prevents a thread from spending too much "time"
>> waiting for a mutex in the !CONFIG_MUTEX_SPIN_ON_OWNER case.
>>
>> Would you like to try this out and see if this addresses the mutex
>> starvation issue you are seeing in your workload when optimistic
>> spinning is disabled?
> Although it looks like it didn't take care of the 'lock stealing' case
> in the slowpath. Here is the updated fixed version:
>
> ---
> Signed-off-by: Jason Low<jason.low2@hpe.com>
> ---
>   include/linux/mutex.h  |  2 ++
>   kernel/locking/mutex.c | 65 ++++++++++++++++++++++++++++++++++++++++++++------
>   2 files changed, 60 insertions(+), 7 deletions(-)
>
> diff --git a/include/linux/mutex.h b/include/linux/mutex.h
> index 2cb7531..c1ca68d 100644
> --- a/include/linux/mutex.h
> +++ b/include/linux/mutex.h
> @@ -57,6 +57,8 @@ struct mutex {
>   #endif
>   #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
>   	struct optimistic_spin_queue osq; /* Spinner MCS lock */
> +#else
> +	bool yield_to_waiter; /* Prevent starvation when spinning disabled */
>   #endif
>   #ifdef CONFIG_DEBUG_MUTEXES
>   	void			*magic;

You don't need that on non-SMP system. So maybe you should put it under 
#ifdef CONFIG_SMP block.

I actually have a similar boolean variable in my latest v4 mutex 
patchset. We could probably merge them together.

> diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
> index a70b90d..6c915ca 100644
> --- a/kernel/locking/mutex.c
> +++ b/kernel/locking/mutex.c
> @@ -55,6 +55,8 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
>   	mutex_clear_owner(lock);
>   #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
>   	osq_lock_init(&lock->osq);
> +#else
> +	lock->yield_to_waiter = false;
>   #endif
>
>   	debug_mutex_init(lock, name, key);
> @@ -71,6 +73,9 @@ EXPORT_SYMBOL(__mutex_init);
>    */
>   __visible void __sched __mutex_lock_slowpath(atomic_t *lock_count);
>
> +
> +static inline bool need_yield_to_waiter(struct mutex *lock);
> +
>   /**
>    * mutex_lock - acquire the mutex
>    * @lock: the mutex to be acquired
> @@ -95,11 +100,15 @@ __visible void __sched __mutex_lock_slowpath(atomic_t *lock_count);
>   void __sched mutex_lock(struct mutex *lock)
>   {
>   	might_sleep();
> +
>   	/*
>   	 * The locking fastpath is the 1->0 transition from
>   	 * 'unlocked' into 'locked' state.
>   	 */
> -	__mutex_fastpath_lock(&lock->count, __mutex_lock_slowpath);
> +	if (!need_yield_to_waiter(lock))
> +		__mutex_fastpath_lock(&lock->count, __mutex_lock_slowpath);
> +	else
> +		__mutex_lock_slowpath(&lock->count);
>   	mutex_set_owner(lock);
>   }
>
> @@ -398,12 +407,39 @@ done:
>
>   	return false;
>   }
> +
> +static inline void do_yield_to_waiter(struct mutex *lock, int loops)
> +{
> +	return;
> +}
> +
> +static inline bool need_yield_to_waiter(struct mutex *lock)
> +{
> +	return false;
> +}
> +
>   #else
>   static bool mutex_optimistic_spin(struct mutex *lock,
>   				  struct ww_acquire_ctx *ww_ctx, const bool use_ww_ctx)
>   {
>   	return false;
>   }
> +
> +#define MUTEX_MAX_WAIT 32
> +
> +static inline void do_yield_to_waiter(struct mutex *lock, int loops)
> +{
> +	if (loops<  MUTEX_MAX_WAIT)
> +		return;
> +
> +	if (lock->yield_to_waiter != true)
> +		lock->yield_to_waiter =true;
> +}
> +
> +static inline bool need_yield_to_waiter(struct mutex *lock)
> +{
> +	return lock->yield_to_waiter;
> +}
>   #endif
>
>   __visible __used noinline
> @@ -510,6 +546,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
>   	struct mutex_waiter waiter;
>   	unsigned long flags;
>   	int ret;
> +	int loop = 0;
>
>   	if (use_ww_ctx) {
>   		struct ww_mutex *ww = container_of(lock, struct ww_mutex, base);
> @@ -532,7 +569,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
>   	 * Once more, try to acquire the lock. Only try-lock the mutex if
>   	 * it is unlocked to reduce unnecessary xchg() operations.
>   	 */
> -	if (!mutex_is_locked(lock)&&
> +	if (!need_yield_to_waiter(lock)&&  !mutex_is_locked(lock)&&
>   	(atomic_xchg_acquire(&lock->count, 0) == 1))
>   		goto skip_wait;
>
> @@ -546,6 +583,8 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
>   	lock_contended(&lock->dep_map, ip);
>
>   	for (;;) {
> +		loop++;
> +
>   		/*
>   		 * Lets try to take the lock again - this is needed even if
>   		 * we get here for the first time (shortly after failing to
> @@ -556,7 +595,8 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
>   		 * other waiters. We only attempt the xchg if the count is
>   		 * non-negative in order to avoid unnecessary xchg operations:
>   		 */
> -		if (atomic_read(&lock->count)>= 0&&
> +		if ((!need_yield_to_waiter(lock) || loop>  1)&&
> +		    atomic_read(&lock->count)>= 0&&
>   		(atomic_xchg_acquire(&lock->count, -1) == 1))
>   	

I think you need to reset the yield_to_waiter variable here when loop > 
1 instead of at the end of the loop.

> 		break;
>
> @@ -581,6 +621,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
>   		spin_unlock_mutex(&lock->wait_lock, flags);
>   		schedule_preempt_disabled();
>   		spin_lock_mutex(&lock->wait_lock, flags);
> +		do_yield_to_waiter(lock, loop);
>   	}
>   	__set_task_state(task, TASK_RUNNING);
>
> @@ -590,6 +631,10 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
>   		atomic_set(&lock->count, 0);
>   	debug_mutex_free_waiter(&waiter);
>
> +#ifndef CONFIG_MUTEX_SPIN_ON_OWNER
> +	lock->yield_to_waiter = false;
> +#endif
> +

Maybe you should do the reset in an inline function instead.

>   skip_wait:
>   	/* got the lock - cleanup and rejoice! */
>   	lock_acquired(&lock->dep_map, ip);
> @@ -789,10 +834,13 @@ __mutex_lock_interruptible_slowpath(struct mutex *lock);
>    */
>   int __sched mutex_lock_interruptible(struct mutex *lock)
>   {
> -	int ret;
> +	int ret = 1;
>
>   	might_sleep();
> -	ret =  __mutex_fastpath_lock_retval(&lock->count);
> +
> +	if (!need_yield_to_waiter(lock))
> +		ret =  __mutex_fastpath_lock_retval(&lock->count);
> +
>   	if (likely(!ret)) {
>   		mutex_set_owner(lock);
>   		return 0;
> @@ -804,10 +852,13 @@ EXPORT_SYMBOL(mutex_lock_interruptible);
>
>   int __sched mutex_lock_killable(struct mutex *lock)
>   {
> -	int ret;
> +	int ret = 1;
>
>   	might_sleep();
> -	ret = __mutex_fastpath_lock_retval(&lock->count);
> +
> +	if (!need_yield_to_waiter(lock))
> +		ret = __mutex_fastpath_lock_retval(&lock->count);
> +
>   	if (likely(!ret)) {
>   		mutex_set_owner(lock);
>   		return 0;

Cheers,
Longman

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [RFC] Avoid mutex starvation when optimistic spinning is disabled
  2016-07-20 13:29           ` Imre Deak
@ 2016-07-21 20:57             ` Jason Low
  2016-07-22 17:55               ` Waiman Long
  0 siblings, 1 reply; 19+ messages in thread
From: Jason Low @ 2016-07-21 20:57 UTC (permalink / raw)
  To: imre.deak
  Cc: jason.low2, Peter Zijlstra, linux-kernel, Ingo Molnar,
	Chris Wilson, Daniel Vetter, Waiman Long, Davidlohr Bueso

On Wed, 2016-07-20 at 16:29 +0300, Imre Deak wrote:
> On ti, 2016-07-19 at 21:39 -0700, Jason Low wrote:
> > On Tue, 2016-07-19 at 16:04 -0700, Jason Low wrote:
> > > Hi Imre,
> > > 
> > > Here is a patch which prevents a thread from spending too much "time"
> > > waiting for a mutex in the !CONFIG_MUTEX_SPIN_ON_OWNER case.
> > > 
> > > Would you like to try this out and see if this addresses the mutex
> > > starvation issue you are seeing in your workload when optimistic
> > > spinning is disabled?
> > 
> > Although it looks like it didn't take care of the 'lock stealing' case
> > in the slowpath. Here is the updated fixed version:
> 
> This also got rid of the problem, I only needed to change the ww
> functions accordingly. Also, imo mutex_trylock() needs the same
> handling

Good point. I supposed mutex_trylock() may not be causing starvation
issues, but I agree that it makes sense if mutex_trylock() fails too if
threads are supposed to yield to a waiter. I'll make the update.

Thanks,
Jason

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [RFC] Avoid mutex starvation when optimistic spinning is disabled
  2016-07-20 18:37           ` Waiman Long
@ 2016-07-21 22:29             ` Jason Low
  2016-07-22  9:34               ` Imre Deak
  2016-07-22 18:01               ` Waiman Long
  0 siblings, 2 replies; 19+ messages in thread
From: Jason Low @ 2016-07-21 22:29 UTC (permalink / raw)
  To: Waiman Long
  Cc: jason.low2, imre.deak, Peter Zijlstra, linux-kernel, Ingo Molnar,
	Chris Wilson, Daniel Vetter, Davidlohr Bueso, jason.low2

On Wed, 2016-07-20 at 14:37 -0400, Waiman Long wrote:
> On 07/20/2016 12:39 AM, Jason Low wrote:
> > On Tue, 2016-07-19 at 16:04 -0700, Jason Low wrote:
> >> Hi Imre,
> >>
> >> Here is a patch which prevents a thread from spending too much "time"
> >> waiting for a mutex in the !CONFIG_MUTEX_SPIN_ON_OWNER case.
> >>
> >> Would you like to try this out and see if this addresses the mutex
> >> starvation issue you are seeing in your workload when optimistic
> >> spinning is disabled?
> > Although it looks like it didn't take care of the 'lock stealing' case
> > in the slowpath. Here is the updated fixed version:
> >
> > ---
> > Signed-off-by: Jason Low<jason.low2@hpe.com>
> > ---
> >   include/linux/mutex.h  |  2 ++
> >   kernel/locking/mutex.c | 65 ++++++++++++++++++++++++++++++++++++++++++++------
> >   2 files changed, 60 insertions(+), 7 deletions(-)
> >
> > diff --git a/include/linux/mutex.h b/include/linux/mutex.h
> > index 2cb7531..c1ca68d 100644
> > --- a/include/linux/mutex.h
> > +++ b/include/linux/mutex.h
> > @@ -57,6 +57,8 @@ struct mutex {
> >   #endif
> >   #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
> >   	struct optimistic_spin_queue osq; /* Spinner MCS lock */
> > +#else
> > +	bool yield_to_waiter; /* Prevent starvation when spinning disabled */
> >   #endif
> >   #ifdef CONFIG_DEBUG_MUTEXES
> >   	void			*magic;
> 
> You don't need that on non-SMP system. So maybe you should put it under 
> #ifdef CONFIG_SMP block.

Right, maybe something like:

    #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
	...
	...
    #elif !defined(CONFIG_SMP) /* If optimistic spinning disabled */
        bool yield_to_waiter;
    #endif

> > @@ -556,7 +595,8 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
> >   		 * other waiters. We only attempt the xchg if the count is
> >   		 * non-negative in order to avoid unnecessary xchg operations:
> >   		 */
> > -		if (atomic_read(&lock->count)>= 0&&
> > +		if ((!need_yield_to_waiter(lock) || loop>  1)&&
> > +		    atomic_read(&lock->count)>= 0&&
> >   		(atomic_xchg_acquire(&lock->count, -1) == 1))
> >   	
> 
> I think you need to reset the yield_to_waiter variable here when loop > 
> 1 instead of at the end of the loop.

So I think in the current state, only the top waiter would be able to
both set and clear the yield_to_waiter variable anyway. However, I agree
that this detail is not obvious and it would be better to reset the
variable here when loop > 1 to make it more readable.

> > 		break;
> >
> > @@ -581,6 +621,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
> >   		spin_unlock_mutex(&lock->wait_lock, flags);
> >   		schedule_preempt_disabled();
> >   		spin_lock_mutex(&lock->wait_lock, flags);
> > +		do_yield_to_waiter(lock, loop);
> >   	}
> >   	__set_task_state(task, TASK_RUNNING);
> >
> > @@ -590,6 +631,10 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
> >   		atomic_set(&lock->count, 0);
> >   	debug_mutex_free_waiter(&waiter);
> >
> > +#ifndef CONFIG_MUTEX_SPIN_ON_OWNER
> > +	lock->yield_to_waiter = false;
> > +#endif
> > +
> 
> Maybe you should do the reset in an inline function instead.

Yes, this should be abstracted into a function like we do with
do_yield_to_waiter().


Jason

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [RFC] Avoid mutex starvation when optimistic spinning is disabled
  2016-07-21 22:29             ` Jason Low
@ 2016-07-22  9:34               ` Imre Deak
  2016-07-22 18:44                 ` Jason Low
  2016-07-22 18:01               ` Waiman Long
  1 sibling, 1 reply; 19+ messages in thread
From: Imre Deak @ 2016-07-22  9:34 UTC (permalink / raw)
  To: Jason Low, Waiman Long
  Cc: Peter Zijlstra, linux-kernel, Ingo Molnar, Chris Wilson,
	Daniel Vetter, Davidlohr Bueso, jason.low2

On to, 2016-07-21 at 15:29 -0700, Jason Low wrote:
> On Wed, 2016-07-20 at 14:37 -0400, Waiman Long wrote:
> > On 07/20/2016 12:39 AM, Jason Low wrote:
> > > On Tue, 2016-07-19 at 16:04 -0700, Jason Low wrote:
> > > > Hi Imre,
> > > > 
> > > > Here is a patch which prevents a thread from spending too much
> > > > "time"
> > > > waiting for a mutex in the !CONFIG_MUTEX_SPIN_ON_OWNER case.
> > > > 
> > > > Would you like to try this out and see if this addresses the
> > > > mutex
> > > > starvation issue you are seeing in your workload when
> > > > optimistic
> > > > spinning is disabled?
> > > Although it looks like it didn't take care of the 'lock stealing'
> > > case
> > > in the slowpath. Here is the updated fixed version:
> > > 
> > > ---
> > > Signed-off-by: Jason Low<jason.low2@hpe.com>
> > > ---
> > >   include/linux/mutex.h  |  2 ++
> > >   kernel/locking/mutex.c | 65
> > > ++++++++++++++++++++++++++++++++++++++++++++------
> > >   2 files changed, 60 insertions(+), 7 deletions(-)
> > > 
> > > diff --git a/include/linux/mutex.h b/include/linux/mutex.h
> > > index 2cb7531..c1ca68d 100644
> > > --- a/include/linux/mutex.h
> > > +++ b/include/linux/mutex.h
> > > @@ -57,6 +57,8 @@ struct mutex {
> > >   #endif
> > >   #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
> > >   	struct optimistic_spin_queue osq; /* Spinner MCS lock
> > > */
> > > +#else
> > > +	bool yield_to_waiter; /* Prevent starvation when
> > > spinning disabled */
> > >   #endif
> > >   #ifdef CONFIG_DEBUG_MUTEXES
> > >   	void			*magic;
> > 
> > You don't need that on non-SMP system. So maybe you should put it
> > under 
> > #ifdef CONFIG_SMP block.
> 
> Right, maybe something like:
> 
>     #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
> 	...
> 	...
>     #elif !defined(CONFIG_SMP) /* If optimistic spinning disabled */
>         bool yield_to_waiter;
>     #endif
> 
> > > @@ -556,7 +595,8 @@ __mutex_lock_common(struct mutex *lock, long
> > > state, unsigned int subclass,
> > >   		 * other waiters. We only attempt the xchg if
> > > the count is
> > >   		 * non-negative in order to avoid unnecessary
> > > xchg operations:
> > >   		 */
> > > -		if (atomic_read(&lock->count)>= 0&&
> > > +		if ((!need_yield_to_waiter(lock) || loop>  1)&&
> > > +		    atomic_read(&lock->count)>= 0&&
> > >   		(atomic_xchg_acquire(&lock->count, -1) == 1))
> > >   	
> > 
> > I think you need to reset the yield_to_waiter variable here when
> > loop > 
> > 1 instead of at the end of the loop.
> 
> So I think in the current state, only the top waiter would be able to
> both set and clear the yield_to_waiter variable anyway. However, I
> agree
> that this detail is not obvious and it would be better to reset the
> variable here when loop > 1 to make it more readable.

AFAICS an interruptible waiter behind the top waiter receiving a signal
and grabbing the lock could also reset yield_to_waiter incorrectly in
that way, increasing the top waiter's delay arbitrarily.

--Imre

> 
> > > 		break;
> > > 
> > > @@ -581,6 +621,7 @@ __mutex_lock_common(struct mutex *lock, long
> > > state, unsigned int subclass,
> > >   		spin_unlock_mutex(&lock->wait_lock, flags);
> > >   		schedule_preempt_disabled();
> > >   		spin_lock_mutex(&lock->wait_lock, flags);
> > > +		do_yield_to_waiter(lock, loop);
> > >   	}
> > >   	__set_task_state(task, TASK_RUNNING);
> > > 
> > > @@ -590,6 +631,10 @@ __mutex_lock_common(struct mutex *lock, long
> > > state, unsigned int subclass,
> > >   		atomic_set(&lock->count, 0);
> > >   	debug_mutex_free_waiter(&waiter);
> > > 
> > > +#ifndef CONFIG_MUTEX_SPIN_ON_OWNER
> > > +	lock->yield_to_waiter = false;
> > > +#endif
> > > +
> > 
> > Maybe you should do the reset in an inline function instead.
> 
> Yes, this should be abstracted into a function like we do with
> do_yield_to_waiter().
> 
> 
> Jason
> 

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [RFC] Avoid mutex starvation when optimistic spinning is disabled
  2016-07-21 20:57             ` Jason Low
@ 2016-07-22 17:55               ` Waiman Long
  2016-07-22 18:03                 ` Davidlohr Bueso
  0 siblings, 1 reply; 19+ messages in thread
From: Waiman Long @ 2016-07-22 17:55 UTC (permalink / raw)
  To: Jason Low
  Cc: imre.deak, Peter Zijlstra, linux-kernel, Ingo Molnar,
	Chris Wilson, Daniel Vetter, Davidlohr Bueso

On 07/21/2016 04:57 PM, Jason Low wrote:
> On Wed, 2016-07-20 at 16:29 +0300, Imre Deak wrote:
>> On ti, 2016-07-19 at 21:39 -0700, Jason Low wrote:
>>> On Tue, 2016-07-19 at 16:04 -0700, Jason Low wrote:
>>>> Hi Imre,
>>>>
>>>> Here is a patch which prevents a thread from spending too much "time"
>>>> waiting for a mutex in the !CONFIG_MUTEX_SPIN_ON_OWNER case.
>>>>
>>>> Would you like to try this out and see if this addresses the mutex
>>>> starvation issue you are seeing in your workload when optimistic
>>>> spinning is disabled?
>>> Although it looks like it didn't take care of the 'lock stealing' case
>>> in the slowpath. Here is the updated fixed version:
>> This also got rid of the problem, I only needed to change the ww
>> functions accordingly. Also, imo mutex_trylock() needs the same
>> handling
> Good point. I supposed mutex_trylock() may not be causing starvation
> issues, but I agree that it makes sense if mutex_trylock() fails too if
> threads are supposed to yield to a waiter. I'll make the update.
>
> Thanks,
> Jason
>

I think making mutex_trylock() fail maybe a bit too far. Do we really 
have any real workload that cause starvation problem  because of that. 
Code that does mutex_trylock() in a loop can certainly cause lock 
starvation, but it is not how mutex_trylock() is supposed to be used. We 
can't build in safeguard for all the possible abuses of the mutex APIs.

Cheers,
Longman

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [RFC] Avoid mutex starvation when optimistic spinning is disabled
  2016-07-21 22:29             ` Jason Low
  2016-07-22  9:34               ` Imre Deak
@ 2016-07-22 18:01               ` Waiman Long
  1 sibling, 0 replies; 19+ messages in thread
From: Waiman Long @ 2016-07-22 18:01 UTC (permalink / raw)
  To: Jason Low
  Cc: imre.deak, Peter Zijlstra, linux-kernel, Ingo Molnar,
	Chris Wilson, Daniel Vetter, Davidlohr Bueso, jason.low2

On 07/21/2016 06:29 PM, Jason Low wrote:
> On Wed, 2016-07-20 at 14:37 -0400, Waiman Long wrote:
>> On 07/20/2016 12:39 AM, Jason Low wrote:
>>> On Tue, 2016-07-19 at 16:04 -0700, Jason Low wrote:
>>>> Hi Imre,
>>>>
>>>> Here is a patch which prevents a thread from spending too much "time"
>>>> waiting for a mutex in the !CONFIG_MUTEX_SPIN_ON_OWNER case.
>>>>
>>>> Would you like to try this out and see if this addresses the mutex
>>>> starvation issue you are seeing in your workload when optimistic
>>>> spinning is disabled?
>>> Although it looks like it didn't take care of the 'lock stealing' case
>>> in the slowpath. Here is the updated fixed version:
>>>
>>> ---
>>> Signed-off-by: Jason Low<jason.low2@hpe.com>
>>> ---
>>>    include/linux/mutex.h  |  2 ++
>>>    kernel/locking/mutex.c | 65 ++++++++++++++++++++++++++++++++++++++++++++------
>>>    2 files changed, 60 insertions(+), 7 deletions(-)
>>>
>>> diff --git a/include/linux/mutex.h b/include/linux/mutex.h
>>> index 2cb7531..c1ca68d 100644
>>> --- a/include/linux/mutex.h
>>> +++ b/include/linux/mutex.h
>>> @@ -57,6 +57,8 @@ struct mutex {
>>>    #endif
>>>    #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
>>>    	struct optimistic_spin_queue osq; /* Spinner MCS lock */
>>> +#else
>>> +	bool yield_to_waiter; /* Prevent starvation when spinning disabled */
>>>    #endif
>>>    #ifdef CONFIG_DEBUG_MUTEXES
>>>    	void			*magic;
>> You don't need that on non-SMP system. So maybe you should put it under
>> #ifdef CONFIG_SMP block.
> Right, maybe something like:
>
>      #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
> 	...
> 	...
>      #elif !defined(CONFIG_SMP) /* If optimistic spinning disabled */
>          bool yield_to_waiter;
>      #endif
>
>>> @@ -556,7 +595,8 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
>>>    		 * other waiters. We only attempt the xchg if the count is
>>>    		 * non-negative in order to avoid unnecessary xchg operations:
>>>    		 */
>>> -		if (atomic_read(&lock->count)>= 0&&
>>> +		if ((!need_yield_to_waiter(lock) || loop>   1)&&
>>> +		    atomic_read(&lock->count)>= 0&&
>>>    		(atomic_xchg_acquire(&lock->count, -1) == 1))
>>>    	
>> I think you need to reset the yield_to_waiter variable here when loop>
>> 1 instead of at the end of the loop.
> So I think in the current state, only the top waiter would be able to
> both set and clear the yield_to_waiter variable anyway. However, I agree
> that this detail is not obvious and it would be better to reset the
> variable here when loop>  1 to make it more readable.

You should only reset the variable when loop > 1. You may also need to 
check in the error exit path as well.

Cheers,
Longman

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [RFC] Avoid mutex starvation when optimistic spinning is disabled
  2016-07-22 17:55               ` Waiman Long
@ 2016-07-22 18:03                 ` Davidlohr Bueso
  2016-07-22 18:29                   ` Imre Deak
  0 siblings, 1 reply; 19+ messages in thread
From: Davidlohr Bueso @ 2016-07-22 18:03 UTC (permalink / raw)
  To: Waiman Long
  Cc: Jason Low, imre.deak, Peter Zijlstra, linux-kernel, Ingo Molnar,
	Chris Wilson, Daniel Vetter

On Fri, 22 Jul 2016, Waiman Long wrote:

>I think making mutex_trylock() fail maybe a bit too far. Do we really 
>have any real workload that cause starvation problem  because of that. 
>Code that does mutex_trylock() in a loop can certainly cause lock 
>starvation, but it is not how mutex_trylock() is supposed to be used. 
>We can't build in safeguard for all the possible abuses of the mutex 
>APIs.

True, and that's actually why I think that 'fixing' the !SPIN_ON_OWNER case
is a bit too far in the first place: most of the archs that will care about
this already have ARCH_SUPPORTS_ATOMIC_RMW. The extra code for dealing with
this is not worth it imo.

Thanks,
Davidlohr

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [RFC] Avoid mutex starvation when optimistic spinning is disabled
  2016-07-22 18:03                 ` Davidlohr Bueso
@ 2016-07-22 18:29                   ` Imre Deak
  2016-07-22 19:26                     ` Davidlohr Bueso
  0 siblings, 1 reply; 19+ messages in thread
From: Imre Deak @ 2016-07-22 18:29 UTC (permalink / raw)
  To: Davidlohr Bueso, Waiman Long
  Cc: Jason Low, Peter Zijlstra, linux-kernel, Ingo Molnar,
	Chris Wilson, Daniel Vetter

On Fri, 2016-07-22 at 11:03 -0700, Davidlohr Bueso wrote:
> On Fri, 22 Jul 2016, Waiman Long wrote:
> 
> > I think making mutex_trylock() fail maybe a bit too far. Do we
> > really 
> > have any real workload that cause starvation problem  because of
> > that. 
> > Code that does mutex_trylock() in a loop can certainly cause lock 
> > starvation, but it is not how mutex_trylock() is supposed to be
> > used. 
> > We can't build in safeguard for all the possible abuses of the
> > mutex 
> > APIs.
>
> True, and that's actually why I think that 'fixing' the
> !SPIN_ON_OWNER case
> is a bit too far in the first place: most of the archs that will care
> about
> this already have ARCH_SUPPORTS_ATOMIC_RMW. The extra code for
> dealing with
> this is not worth it imo.

SPIN_ON_OWNER is also disabled in case of DEBUG_MUTEXES, which is the
config where I wanted to avoid starvation in the first place.

--Imre

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [RFC] Avoid mutex starvation when optimistic spinning is disabled
  2016-07-22  9:34               ` Imre Deak
@ 2016-07-22 18:44                 ` Jason Low
  0 siblings, 0 replies; 19+ messages in thread
From: Jason Low @ 2016-07-22 18:44 UTC (permalink / raw)
  To: imre.deak
  Cc: jason.low2, Waiman Long, Peter Zijlstra, linux-kernel,
	Ingo Molnar, Chris Wilson, Daniel Vetter, Davidlohr Bueso

On Fri, 2016-07-22 at 12:34 +0300, Imre Deak wrote:
> On to, 2016-07-21 at 15:29 -0700, Jason Low wrote:
> > On Wed, 2016-07-20 at 14:37 -0400, Waiman Long wrote:
> > > On 07/20/2016 12:39 AM, Jason Low wrote:
> > > > On Tue, 2016-07-19 at 16:04 -0700, Jason Low wrote:
> > > > > Hi Imre,
> > > > > 
> > > > > Here is a patch which prevents a thread from spending too much
> > > > > "time"
> > > > > waiting for a mutex in the !CONFIG_MUTEX_SPIN_ON_OWNER case.
> > > > > 
> > > > > Would you like to try this out and see if this addresses the
> > > > > mutex
> > > > > starvation issue you are seeing in your workload when
> > > > > optimistic
> > > > > spinning is disabled?
> > > > Although it looks like it didn't take care of the 'lock stealing'
> > > > case
> > > > in the slowpath. Here is the updated fixed version:
> > > > 
> > > > ---
> > > > Signed-off-by: Jason Low<jason.low2@hpe.com>
> > > > ---
> > > >   include/linux/mutex.h  |  2 ++
> > > >   kernel/locking/mutex.c | 65
> > > > ++++++++++++++++++++++++++++++++++++++++++++------
> > > >   2 files changed, 60 insertions(+), 7 deletions(-)
> > > > 
> > > > diff --git a/include/linux/mutex.h b/include/linux/mutex.h
> > > > index 2cb7531..c1ca68d 100644
> > > > --- a/include/linux/mutex.h
> > > > +++ b/include/linux/mutex.h
> > > > @@ -57,6 +57,8 @@ struct mutex {
> > > >   #endif
> > > >   #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
> > > >   	struct optimistic_spin_queue osq; /* Spinner MCS lock
> > > > */
> > > > +#else
> > > > +	bool yield_to_waiter; /* Prevent starvation when
> > > > spinning disabled */
> > > >   #endif
> > > >   #ifdef CONFIG_DEBUG_MUTEXES
> > > >   	void			*magic;
> > > 
> > > You don't need that on non-SMP system. So maybe you should put it
> > > under 
> > > #ifdef CONFIG_SMP block.
> > 
> > Right, maybe something like:
> > 
> >     #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
> > 	...
> > 	...
> >     #elif !defined(CONFIG_SMP) /* If optimistic spinning disabled */
> >         bool yield_to_waiter;
> >     #endif
> > 
> > > > @@ -556,7 +595,8 @@ __mutex_lock_common(struct mutex *lock, long
> > > > state, unsigned int subclass,
> > > >   		 * other waiters. We only attempt the xchg if
> > > > the count is
> > > >   		 * non-negative in order to avoid unnecessary
> > > > xchg operations:
> > > >   		 */
> > > > -		if (atomic_read(&lock->count)>= 0&&
> > > > +		if ((!need_yield_to_waiter(lock) || loop>  1)&&
> > > > +		    atomic_read(&lock->count)>= 0&&
> > > >   		(atomic_xchg_acquire(&lock->count, -1) == 1))
> > > >   	
> > > 
> > > I think you need to reset the yield_to_waiter variable here when
> > > loop > 
> > > 1 instead of at the end of the loop.
> > 
> > So I think in the current state, only the top waiter would be able to
> > both set and clear the yield_to_waiter variable anyway. However, I
> > agree
> > that this detail is not obvious and it would be better to reset the
> > variable here when loop > 1 to make it more readable.
> 
> AFAICS an interruptible waiter behind the top waiter receiving a signal
> and grabbing the lock could also reset yield_to_waiter incorrectly in
> that way, increasing the top waiter's delay arbitrarily.

Okay, fair enough  :)

The reset will get moved so that only the waiter yielded to can call it.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [RFC] Avoid mutex starvation when optimistic spinning is disabled
  2016-07-22 18:29                   ` Imre Deak
@ 2016-07-22 19:26                     ` Davidlohr Bueso
  2016-07-22 19:53                       ` Imre Deak
  0 siblings, 1 reply; 19+ messages in thread
From: Davidlohr Bueso @ 2016-07-22 19:26 UTC (permalink / raw)
  To: Imre Deak
  Cc: Waiman Long, Jason Low, Peter Zijlstra, linux-kernel,
	Ingo Molnar, Chris Wilson, Daniel Vetter

On Fri, 22 Jul 2016, Imre Deak wrote:

>On Fri, 2016-07-22 at 11:03 -0700, Davidlohr Bueso wrote:
>> On Fri, 22 Jul 2016, Waiman Long wrote:
>>
>> > I think making mutex_trylock() fail maybe a bit too far. Do we
>> > really
>> > have any real workload that cause starvation problem  because of
>> > that.
>> > Code that does mutex_trylock() in a loop can certainly cause lock
>> > starvation, but it is not how mutex_trylock() is supposed to be
>> > used.
>> > We can't build in safeguard for all the possible abuses of the
>> > mutex
>> > APIs.
>>
>> True, and that's actually why I think that 'fixing' the
>> !SPIN_ON_OWNER case
>> is a bit too far in the first place: most of the archs that will care
>> about
>> this already have ARCH_SUPPORTS_ATOMIC_RMW. The extra code for
>> dealing with
>> this is not worth it imo.
>
>SPIN_ON_OWNER is also disabled in case of DEBUG_MUTEXES, which is the
>config where I wanted to avoid starvation in the first place.

Well yes, but know of course that that option is even less common than
archs with non atomic Rmw.

Thanks,
Davidlohr

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [RFC] Avoid mutex starvation when optimistic spinning is disabled
  2016-07-22 19:26                     ` Davidlohr Bueso
@ 2016-07-22 19:53                       ` Imre Deak
  0 siblings, 0 replies; 19+ messages in thread
From: Imre Deak @ 2016-07-22 19:53 UTC (permalink / raw)
  To: Davidlohr Bueso
  Cc: Waiman Long, Jason Low, Peter Zijlstra, linux-kernel,
	Ingo Molnar, Chris Wilson, Daniel Vetter

On Fri, 2016-07-22 at 12:26 -0700, Davidlohr Bueso wrote:
> On Fri, 22 Jul 2016, Imre Deak wrote:
> 
> > On Fri, 2016-07-22 at 11:03 -0700, Davidlohr Bueso wrote:
> > > On Fri, 22 Jul 2016, Waiman Long wrote:
> > > 
> > > > I think making mutex_trylock() fail maybe a bit too far. Do we
> > > > really
> > > > have any real workload that cause starvation problem  because
> > > > of
> > > > that.
> > > > Code that does mutex_trylock() in a loop can certainly cause
> > > > lock
> > > > starvation, but it is not how mutex_trylock() is supposed to be
> > > > used.
> > > > We can't build in safeguard for all the possible abuses of the
> > > > mutex
> > > > APIs.
> > > 
> > > True, and that's actually why I think that 'fixing' the
> > > !SPIN_ON_OWNER case
> > > is a bit too far in the first place: most of the archs that will
> > > care
> > > about
> > > this already have ARCH_SUPPORTS_ATOMIC_RMW. The extra code for
> > > dealing with
> > > this is not worth it imo.
> > 
> > SPIN_ON_OWNER is also disabled in case of DEBUG_MUTEXES, which is
> > the
> > config where I wanted to avoid starvation in the first place.
> 
> Well yes, but know of course that that option is even less common
> than
> archs with non atomic Rmw.

The point is that it's broken atm.

--Imre

^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2016-07-22 19:53 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-07-18 16:16 [RFC] locking/mutex: Fix starvation of sleeping waiters Imre Deak
2016-07-18 17:15 ` Peter Zijlstra
2016-07-18 17:47   ` Jason Low
2016-07-19 16:53     ` Imre Deak
2016-07-19 22:57       ` Jason Low
2016-07-19 23:04       ` [RFC] Avoid mutex starvation when optimistic spinning is disabled Jason Low
2016-07-20  4:39         ` Jason Low
2016-07-20 13:29           ` Imre Deak
2016-07-21 20:57             ` Jason Low
2016-07-22 17:55               ` Waiman Long
2016-07-22 18:03                 ` Davidlohr Bueso
2016-07-22 18:29                   ` Imre Deak
2016-07-22 19:26                     ` Davidlohr Bueso
2016-07-22 19:53                       ` Imre Deak
2016-07-20 18:37           ` Waiman Long
2016-07-21 22:29             ` Jason Low
2016-07-22  9:34               ` Imre Deak
2016-07-22 18:44                 ` Jason Low
2016-07-22 18:01               ` Waiman Long

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.