* [RFC][PATCH] mutex: Optimize mutex_trylock() fast-path
@ 2016-06-01 18:58 Peter Zijlstra
2016-06-02 13:27 ` Paul E. McKenney
` (2 more replies)
0 siblings, 3 replies; 4+ messages in thread
From: Peter Zijlstra @ 2016-06-01 18:58 UTC (permalink / raw)
To: Ingo Molnar, Davidlohr Bueso, Waiman Long, Paul McKenney, viro
Cc: linux-kernel
A while back Viro posted a number of 'interesting' mutex_is_locked()
users on IRC, one of those was RCU.
RCU seems to use mutex_is_locked() to avoid doing mutex_trylock(), the
regular load before modify pattern.
While the use isn't wrong per se, its curious in that its needed at all,
mutex_trylock() should be good enough on its own to avoid the pointless
cacheline bounces.
So fix those and remove the mutex_is_locked() (ab)use from RCU.
Reported-by: Al Viro <viro@ZenIV.linux.org.uk>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
arch/ia64/include/asm/mutex.h | 2 +-
arch/powerpc/include/asm/mutex.h | 2 +-
arch/x86/include/asm/mutex_32.h | 2 +-
arch/x86/include/asm/mutex_64.h | 6 +++---
include/asm-generic/mutex-dec.h | 2 +-
include/asm-generic/mutex-xchg.h | 6 +++++-
kernel/rcu/tree.c | 1 -
7 files changed, 12 insertions(+), 9 deletions(-)
diff --git a/arch/ia64/include/asm/mutex.h b/arch/ia64/include/asm/mutex.h
index f41e66d65e31..28cb819e0ff9 100644
--- a/arch/ia64/include/asm/mutex.h
+++ b/arch/ia64/include/asm/mutex.h
@@ -82,7 +82,7 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
static inline int
__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
{
- if (cmpxchg_acq(count, 1, 0) == 1)
+ if (atomic_read(count) == 1 && cmpxchg_acq(count, 1, 0) == 1)
return 1;
return 0;
}
diff --git a/arch/powerpc/include/asm/mutex.h b/arch/powerpc/include/asm/mutex.h
index 127ab23e1f6c..078155fa1189 100644
--- a/arch/powerpc/include/asm/mutex.h
+++ b/arch/powerpc/include/asm/mutex.h
@@ -124,7 +124,7 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
static inline int
__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
{
- if (likely(__mutex_cmpxchg_lock(count, 1, 0) == 1))
+ if (likely(atomic_read(count) == 1 && __mutex_cmpxchg_lock(count, 1, 0) == 1))
return 1;
return 0;
}
diff --git a/arch/x86/include/asm/mutex_32.h b/arch/x86/include/asm/mutex_32.h
index 85e6cda45a02..e9355a84fc67 100644
--- a/arch/x86/include/asm/mutex_32.h
+++ b/arch/x86/include/asm/mutex_32.h
@@ -101,7 +101,7 @@ static inline int __mutex_fastpath_trylock(atomic_t *count,
int (*fail_fn)(atomic_t *))
{
/* cmpxchg because it never induces a false contention state. */
- if (likely(atomic_cmpxchg(count, 1, 0) == 1))
+ if (likely(atomic_read(count) == 1 && atomic_cmpxchg(count, 1, 0) == 1))
return 1;
return 0;
diff --git a/arch/x86/include/asm/mutex_64.h b/arch/x86/include/asm/mutex_64.h
index 07537a44216e..d9850758464e 100644
--- a/arch/x86/include/asm/mutex_64.h
+++ b/arch/x86/include/asm/mutex_64.h
@@ -118,10 +118,10 @@ do { \
static inline int __mutex_fastpath_trylock(atomic_t *count,
int (*fail_fn)(atomic_t *))
{
- if (likely(atomic_cmpxchg(count, 1, 0) == 1))
+ if (likely(atomic_read(count) == 1 && atomic_cmpxchg(count, 1, 0) == 1))
return 1;
- else
- return 0;
+
+ return 0;
}
#endif /* _ASM_X86_MUTEX_64_H */
diff --git a/include/asm-generic/mutex-dec.h b/include/asm-generic/mutex-dec.h
index fd694cfd678a..c54829d3de37 100644
--- a/include/asm-generic/mutex-dec.h
+++ b/include/asm-generic/mutex-dec.h
@@ -80,7 +80,7 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
static inline int
__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
{
- if (likely(atomic_cmpxchg_acquire(count, 1, 0) == 1))
+ if (likely(atomic_read(count) == 1 && atomic_cmpxchg_acquire(count, 1, 0) == 1))
return 1;
return 0;
}
diff --git a/include/asm-generic/mutex-xchg.h b/include/asm-generic/mutex-xchg.h
index a6b4a7bd6ac9..3269ec4e195f 100644
--- a/include/asm-generic/mutex-xchg.h
+++ b/include/asm-generic/mutex-xchg.h
@@ -91,8 +91,12 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
static inline int
__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
{
- int prev = atomic_xchg_acquire(count, 0);
+ int prev;
+ if (atomic_read(count) != 1)
+ return 0;
+
+ prev = atomic_xchg_acquire(count, 0);
if (unlikely(prev < 0)) {
/*
* The lock was marked contended so we must restore that
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index c7f1bc4f817c..b7326893221f 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -3681,7 +3681,6 @@ static bool exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
if (ULONG_CMP_LT(READ_ONCE(rnp->exp_seq_rq), s) &&
(rnp == rnp_root ||
ULONG_CMP_LT(READ_ONCE(rnp_root->exp_seq_rq), s)) &&
- !mutex_is_locked(&rsp->exp_mutex) &&
mutex_trylock(&rsp->exp_mutex))
goto fastpath;
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [RFC][PATCH] mutex: Optimize mutex_trylock() fast-path
2016-06-01 18:58 [RFC][PATCH] mutex: Optimize mutex_trylock() fast-path Peter Zijlstra
@ 2016-06-02 13:27 ` Paul E. McKenney
2016-06-02 14:26 ` Davidlohr Bueso
2016-06-08 14:26 ` [tip:locking/core] locking/mutex: " tip-bot for Peter Zijlstra
2 siblings, 0 replies; 4+ messages in thread
From: Paul E. McKenney @ 2016-06-02 13:27 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Ingo Molnar, Davidlohr Bueso, Waiman Long, viro, linux-kernel
On Wed, Jun 01, 2016 at 08:58:15PM +0200, Peter Zijlstra wrote:
>
> A while back Viro posted a number of 'interesting' mutex_is_locked()
> users on IRC, one of those was RCU.
>
> RCU seems to use mutex_is_locked() to avoid doing mutex_trylock(), the
> regular load before modify pattern.
Definitely -- the old versions of mutex_trylock() looked to take ownership
of the cacheline, which most definitely is not what you want if you have
a bunch of CPUs doing expedited grace periods concurrently.
> While the use isn't wrong per se, its curious in that its needed at all,
> mutex_trylock() should be good enough on its own to avoid the pointless
> cacheline bounces.
>
> So fix those and remove the mutex_is_locked() (ab)use from RCU.
Given the changes below that push the mutex_is_locked() check into
mutex_trylock():
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Or I could pull the changes into -rcu if you prefer.
Thanx, Paul
> Reported-by: Al Viro <viro@ZenIV.linux.org.uk>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> ---
> arch/ia64/include/asm/mutex.h | 2 +-
> arch/powerpc/include/asm/mutex.h | 2 +-
> arch/x86/include/asm/mutex_32.h | 2 +-
> arch/x86/include/asm/mutex_64.h | 6 +++---
> include/asm-generic/mutex-dec.h | 2 +-
> include/asm-generic/mutex-xchg.h | 6 +++++-
> kernel/rcu/tree.c | 1 -
> 7 files changed, 12 insertions(+), 9 deletions(-)
>
> diff --git a/arch/ia64/include/asm/mutex.h b/arch/ia64/include/asm/mutex.h
> index f41e66d65e31..28cb819e0ff9 100644
> --- a/arch/ia64/include/asm/mutex.h
> +++ b/arch/ia64/include/asm/mutex.h
> @@ -82,7 +82,7 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
> static inline int
> __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
> {
> - if (cmpxchg_acq(count, 1, 0) == 1)
> + if (atomic_read(count) == 1 && cmpxchg_acq(count, 1, 0) == 1)
> return 1;
> return 0;
> }
> diff --git a/arch/powerpc/include/asm/mutex.h b/arch/powerpc/include/asm/mutex.h
> index 127ab23e1f6c..078155fa1189 100644
> --- a/arch/powerpc/include/asm/mutex.h
> +++ b/arch/powerpc/include/asm/mutex.h
> @@ -124,7 +124,7 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
> static inline int
> __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
> {
> - if (likely(__mutex_cmpxchg_lock(count, 1, 0) == 1))
> + if (likely(atomic_read(count) == 1 && __mutex_cmpxchg_lock(count, 1, 0) == 1))
> return 1;
> return 0;
> }
> diff --git a/arch/x86/include/asm/mutex_32.h b/arch/x86/include/asm/mutex_32.h
> index 85e6cda45a02..e9355a84fc67 100644
> --- a/arch/x86/include/asm/mutex_32.h
> +++ b/arch/x86/include/asm/mutex_32.h
> @@ -101,7 +101,7 @@ static inline int __mutex_fastpath_trylock(atomic_t *count,
> int (*fail_fn)(atomic_t *))
> {
> /* cmpxchg because it never induces a false contention state. */
> - if (likely(atomic_cmpxchg(count, 1, 0) == 1))
> + if (likely(atomic_read(count) == 1 && atomic_cmpxchg(count, 1, 0) == 1))
> return 1;
>
> return 0;
> diff --git a/arch/x86/include/asm/mutex_64.h b/arch/x86/include/asm/mutex_64.h
> index 07537a44216e..d9850758464e 100644
> --- a/arch/x86/include/asm/mutex_64.h
> +++ b/arch/x86/include/asm/mutex_64.h
> @@ -118,10 +118,10 @@ do { \
> static inline int __mutex_fastpath_trylock(atomic_t *count,
> int (*fail_fn)(atomic_t *))
> {
> - if (likely(atomic_cmpxchg(count, 1, 0) == 1))
> + if (likely(atomic_read(count) == 1 && atomic_cmpxchg(count, 1, 0) == 1))
> return 1;
> - else
> - return 0;
> +
> + return 0;
> }
>
> #endif /* _ASM_X86_MUTEX_64_H */
> diff --git a/include/asm-generic/mutex-dec.h b/include/asm-generic/mutex-dec.h
> index fd694cfd678a..c54829d3de37 100644
> --- a/include/asm-generic/mutex-dec.h
> +++ b/include/asm-generic/mutex-dec.h
> @@ -80,7 +80,7 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
> static inline int
> __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
> {
> - if (likely(atomic_cmpxchg_acquire(count, 1, 0) == 1))
> + if (likely(atomic_read(count) == 1 && atomic_cmpxchg_acquire(count, 1, 0) == 1))
> return 1;
> return 0;
> }
> diff --git a/include/asm-generic/mutex-xchg.h b/include/asm-generic/mutex-xchg.h
> index a6b4a7bd6ac9..3269ec4e195f 100644
> --- a/include/asm-generic/mutex-xchg.h
> +++ b/include/asm-generic/mutex-xchg.h
> @@ -91,8 +91,12 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
> static inline int
> __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
> {
> - int prev = atomic_xchg_acquire(count, 0);
> + int prev;
>
> + if (atomic_read(count) != 1)
> + return 0;
> +
> + prev = atomic_xchg_acquire(count, 0);
> if (unlikely(prev < 0)) {
> /*
> * The lock was marked contended so we must restore that
> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> index c7f1bc4f817c..b7326893221f 100644
> --- a/kernel/rcu/tree.c
> +++ b/kernel/rcu/tree.c
> @@ -3681,7 +3681,6 @@ static bool exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
> if (ULONG_CMP_LT(READ_ONCE(rnp->exp_seq_rq), s) &&
> (rnp == rnp_root ||
> ULONG_CMP_LT(READ_ONCE(rnp_root->exp_seq_rq), s)) &&
> - !mutex_is_locked(&rsp->exp_mutex) &&
> mutex_trylock(&rsp->exp_mutex))
> goto fastpath;
>
>
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [RFC][PATCH] mutex: Optimize mutex_trylock() fast-path
2016-06-01 18:58 [RFC][PATCH] mutex: Optimize mutex_trylock() fast-path Peter Zijlstra
2016-06-02 13:27 ` Paul E. McKenney
@ 2016-06-02 14:26 ` Davidlohr Bueso
2016-06-08 14:26 ` [tip:locking/core] locking/mutex: " tip-bot for Peter Zijlstra
2 siblings, 0 replies; 4+ messages in thread
From: Davidlohr Bueso @ 2016-06-02 14:26 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Ingo Molnar, Waiman Long, Paul McKenney, viro, linux-kernel
On Wed, 01 Jun 2016, Peter Zijlstra wrote:
>
>A while back Viro posted a number of 'interesting' mutex_is_locked()
>users on IRC, one of those was RCU.
>
>RCU seems to use mutex_is_locked() to avoid doing mutex_trylock(), the
>regular load before modify pattern.
>
>While the use isn't wrong per se, its curious in that its needed at all,
>mutex_trylock() should be good enough on its own to avoid the pointless
>cacheline bounces.
Yeah, and we use ccas just about everywhere else for mutexes.
>
>So fix those and remove the mutex_is_locked() (ab)use from RCU.
>
>Reported-by: Al Viro <viro@ZenIV.linux.org.uk>
>Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Davidlohr Bueso <dave@stgolabs.net>
^ permalink raw reply [flat|nested] 4+ messages in thread
* [tip:locking/core] locking/mutex: Optimize mutex_trylock() fast-path
2016-06-01 18:58 [RFC][PATCH] mutex: Optimize mutex_trylock() fast-path Peter Zijlstra
2016-06-02 13:27 ` Paul E. McKenney
2016-06-02 14:26 ` Davidlohr Bueso
@ 2016-06-08 14:26 ` tip-bot for Peter Zijlstra
2 siblings, 0 replies; 4+ messages in thread
From: tip-bot for Peter Zijlstra @ 2016-06-08 14:26 UTC (permalink / raw)
To: linux-tip-commits
Cc: linux-kernel, dave, viro, Waiman.Long, peterz, akpm, hpa, tglx,
torvalds, mingo, paulmck
Commit-ID: 6428671bae97caa7040e24e79e969fd87908f4f3
Gitweb: http://git.kernel.org/tip/6428671bae97caa7040e24e79e969fd87908f4f3
Author: Peter Zijlstra <peterz@infradead.org>
AuthorDate: Wed, 1 Jun 2016 20:58:15 +0200
Committer: Ingo Molnar <mingo@kernel.org>
CommitDate: Wed, 8 Jun 2016 15:17:01 +0200
locking/mutex: Optimize mutex_trylock() fast-path
A while back Viro posted a number of 'interesting' mutex_is_locked()
users on IRC, one of those was RCU.
RCU seems to use mutex_is_locked() to avoid doing mutex_trylock(), the
regular load before modify pattern.
While the use isn't wrong per se, its curious in that its needed at all,
mutex_trylock() should be good enough on its own to avoid the pointless
cacheline bounces.
So fix those and remove the mutex_is_locked() (ab)use from RCU.
Reported-by: Al Viro <viro@ZenIV.linux.org.uk>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Paul McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Davidlohr Bueso <dave@stgolabs.net>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman Long <Waiman.Long@hpe.com>
Link: http://lkml.kernel.org/r/20160601185815.GW3190@twins.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
arch/ia64/include/asm/mutex.h | 2 +-
arch/powerpc/include/asm/mutex.h | 2 +-
arch/x86/include/asm/mutex_32.h | 2 +-
arch/x86/include/asm/mutex_64.h | 6 +++---
include/asm-generic/mutex-dec.h | 2 +-
include/asm-generic/mutex-xchg.h | 6 +++++-
kernel/rcu/tree.c | 1 -
7 files changed, 12 insertions(+), 9 deletions(-)
diff --git a/arch/ia64/include/asm/mutex.h b/arch/ia64/include/asm/mutex.h
index f41e66d..28cb819 100644
--- a/arch/ia64/include/asm/mutex.h
+++ b/arch/ia64/include/asm/mutex.h
@@ -82,7 +82,7 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
static inline int
__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
{
- if (cmpxchg_acq(count, 1, 0) == 1)
+ if (atomic_read(count) == 1 && cmpxchg_acq(count, 1, 0) == 1)
return 1;
return 0;
}
diff --git a/arch/powerpc/include/asm/mutex.h b/arch/powerpc/include/asm/mutex.h
index 127ab23..078155f 100644
--- a/arch/powerpc/include/asm/mutex.h
+++ b/arch/powerpc/include/asm/mutex.h
@@ -124,7 +124,7 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
static inline int
__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
{
- if (likely(__mutex_cmpxchg_lock(count, 1, 0) == 1))
+ if (likely(atomic_read(count) == 1 && __mutex_cmpxchg_lock(count, 1, 0) == 1))
return 1;
return 0;
}
diff --git a/arch/x86/include/asm/mutex_32.h b/arch/x86/include/asm/mutex_32.h
index 85e6cda..e9355a8 100644
--- a/arch/x86/include/asm/mutex_32.h
+++ b/arch/x86/include/asm/mutex_32.h
@@ -101,7 +101,7 @@ static inline int __mutex_fastpath_trylock(atomic_t *count,
int (*fail_fn)(atomic_t *))
{
/* cmpxchg because it never induces a false contention state. */
- if (likely(atomic_cmpxchg(count, 1, 0) == 1))
+ if (likely(atomic_read(count) == 1 && atomic_cmpxchg(count, 1, 0) == 1))
return 1;
return 0;
diff --git a/arch/x86/include/asm/mutex_64.h b/arch/x86/include/asm/mutex_64.h
index 07537a4..d985075 100644
--- a/arch/x86/include/asm/mutex_64.h
+++ b/arch/x86/include/asm/mutex_64.h
@@ -118,10 +118,10 @@ do { \
static inline int __mutex_fastpath_trylock(atomic_t *count,
int (*fail_fn)(atomic_t *))
{
- if (likely(atomic_cmpxchg(count, 1, 0) == 1))
+ if (likely(atomic_read(count) == 1 && atomic_cmpxchg(count, 1, 0) == 1))
return 1;
- else
- return 0;
+
+ return 0;
}
#endif /* _ASM_X86_MUTEX_64_H */
diff --git a/include/asm-generic/mutex-dec.h b/include/asm-generic/mutex-dec.h
index fd694cf..c54829d 100644
--- a/include/asm-generic/mutex-dec.h
+++ b/include/asm-generic/mutex-dec.h
@@ -80,7 +80,7 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
static inline int
__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
{
- if (likely(atomic_cmpxchg_acquire(count, 1, 0) == 1))
+ if (likely(atomic_read(count) == 1 && atomic_cmpxchg_acquire(count, 1, 0) == 1))
return 1;
return 0;
}
diff --git a/include/asm-generic/mutex-xchg.h b/include/asm-generic/mutex-xchg.h
index a6b4a7b..3269ec4 100644
--- a/include/asm-generic/mutex-xchg.h
+++ b/include/asm-generic/mutex-xchg.h
@@ -91,8 +91,12 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
static inline int
__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
{
- int prev = atomic_xchg_acquire(count, 0);
+ int prev;
+ if (atomic_read(count) != 1)
+ return 0;
+
+ prev = atomic_xchg_acquire(count, 0);
if (unlikely(prev < 0)) {
/*
* The lock was marked contended so we must restore that
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index c7f1bc4..b732689 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -3681,7 +3681,6 @@ static bool exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
if (ULONG_CMP_LT(READ_ONCE(rnp->exp_seq_rq), s) &&
(rnp == rnp_root ||
ULONG_CMP_LT(READ_ONCE(rnp_root->exp_seq_rq), s)) &&
- !mutex_is_locked(&rsp->exp_mutex) &&
mutex_trylock(&rsp->exp_mutex))
goto fastpath;
^ permalink raw reply related [flat|nested] 4+ messages in thread
end of thread, other threads:[~2016-06-08 14:27 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-06-01 18:58 [RFC][PATCH] mutex: Optimize mutex_trylock() fast-path Peter Zijlstra
2016-06-02 13:27 ` Paul E. McKenney
2016-06-02 14:26 ` Davidlohr Bueso
2016-06-08 14:26 ` [tip:locking/core] locking/mutex: " tip-bot for Peter Zijlstra
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).