linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] locking: Remove an insn from spin and write locks
@ 2018-08-20 15:06 Matthew Wilcox
  2018-08-20 15:14 ` Waiman Long
  0 siblings, 1 reply; 8+ messages in thread
From: Matthew Wilcox @ 2018-08-20 15:06 UTC (permalink / raw)
  To: Arnd Bergmann, linux-arch, linux-kernel, Ingo Molnar,
	Peter Zijlstra, Will Deacon, Waiman Long, Thomas Gleixner
  Cc: Matthew Wilcox

Both spin locks and write locks currently do:

 f0 0f b1 17             lock cmpxchg %edx,(%rdi)
 85 c0                   test   %eax,%eax
 75 05                   jne    [slowpath]

This 'test' insn is superfluous; the cmpxchg insn sets the Z flag
appropriately.  Peter pointed out that using atomic_try_cmpxchg()
will let the compiler know this is true.  Comparing before/after
disassemblies show the only effect is to remove this insn.

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/asm-generic/qrwlock.h   | 6 +++---
 include/asm-generic/qspinlock.h | 9 +++++----
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/include/asm-generic/qrwlock.h b/include/asm-generic/qrwlock.h
index 0f7062bd55e5..9a1beb7ad0de 100644
--- a/include/asm-generic/qrwlock.h
+++ b/include/asm-generic/qrwlock.h
@@ -71,8 +71,7 @@ static inline int queued_write_trylock(struct qrwlock *lock)
 	if (unlikely(cnts))
 		return 0;
 
-	return likely(atomic_cmpxchg_acquire(&lock->cnts,
-					     cnts, cnts | _QW_LOCKED) == cnts);
+	return likely(atomic_try_cmpxchg(&lock->cnts, &cnts, _QW_LOCKED));
 }
 /**
  * queued_read_lock - acquire read lock of a queue rwlock
@@ -96,8 +95,9 @@ static inline void queued_read_lock(struct qrwlock *lock)
  */
 static inline void queued_write_lock(struct qrwlock *lock)
 {
+	u32 cnts = 0;
 	/* Optimize for the unfair lock case where the fair flag is 0. */
-	if (atomic_cmpxchg_acquire(&lock->cnts, 0, _QW_LOCKED) == 0)
+	if (atomic_try_cmpxchg_acquire(&lock->cnts, &cnts, _QW_LOCKED))
 		return;
 
 	queued_write_lock_slowpath(lock);
diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
index 95263e943fcc..d125f0c0b3d9 100644
--- a/include/asm-generic/qspinlock.h
+++ b/include/asm-generic/qspinlock.h
@@ -68,8 +68,10 @@ int queued_spin_is_contended(const struct qspinlock *lock)
  */
 static __always_inline int queued_spin_trylock(struct qspinlock *lock)
 {
+	u32 val = 0;
+
 	if (!atomic_read(&lock->val) &&
-	   (atomic_cmpxchg_acquire(&lock->val, 0, _Q_LOCKED_VAL) == 0))
+	    (atomic_try_cmpxchg(&lock->val, &val, _Q_LOCKED_VAL)))
 		return 1;
 	return 0;
 }
@@ -82,10 +84,9 @@ extern void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
  */
 static __always_inline void queued_spin_lock(struct qspinlock *lock)
 {
-	u32 val;
+	u32 val = 0;
 
-	val = atomic_cmpxchg_acquire(&lock->val, 0, _Q_LOCKED_VAL);
-	if (likely(val == 0))
+	if (likely(atomic_try_cmpxchg(&lock->val, &val, _Q_LOCKED_VAL)))
 		return;
 	queued_spin_lock_slowpath(lock, val);
 }
-- 
2.18.0


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH] locking: Remove an insn from spin and write locks
  2018-08-20 15:06 [PATCH] locking: Remove an insn from spin and write locks Matthew Wilcox
@ 2018-08-20 15:14 ` Waiman Long
  2018-08-20 15:50   ` Matthew Wilcox
  0 siblings, 1 reply; 8+ messages in thread
From: Waiman Long @ 2018-08-20 15:14 UTC (permalink / raw)
  To: Matthew Wilcox, Arnd Bergmann, linux-arch, linux-kernel,
	Ingo Molnar, Peter Zijlstra, Will Deacon, Thomas Gleixner

On 08/20/2018 11:06 AM, Matthew Wilcox wrote:
> Both spin locks and write locks currently do:
>
>  f0 0f b1 17             lock cmpxchg %edx,(%rdi)
>  85 c0                   test   %eax,%eax
>  75 05                   jne    [slowpath]
>
> This 'test' insn is superfluous; the cmpxchg insn sets the Z flag
> appropriately.  Peter pointed out that using atomic_try_cmpxchg()
> will let the compiler know this is true.  Comparing before/after
> disassemblies show the only effect is to remove this insn.
>
> Suggested-by: Peter Zijlstra <peterz@infradead.org>
> Signed-off-by: Matthew Wilcox <willy@infradead.org>
> ---
>  include/asm-generic/qrwlock.h   | 6 +++---
>  include/asm-generic/qspinlock.h | 9 +++++----
>  2 files changed, 8 insertions(+), 7 deletions(-)
>
> diff --git a/include/asm-generic/qrwlock.h b/include/asm-generic/qrwlock.h
> index 0f7062bd55e5..9a1beb7ad0de 100644
> --- a/include/asm-generic/qrwlock.h
> +++ b/include/asm-generic/qrwlock.h
> @@ -71,8 +71,7 @@ static inline int queued_write_trylock(struct qrwlock *lock)
>  	if (unlikely(cnts))
>  		return 0;
>  
> -	return likely(atomic_cmpxchg_acquire(&lock->cnts,
> -					     cnts, cnts | _QW_LOCKED) == cnts);
> +	return likely(atomic_try_cmpxchg(&lock->cnts, &cnts, _QW_LOCKED));
>  }
>  /**
>   * queued_read_lock - acquire read lock of a queue rwlock
> @@ -96,8 +95,9 @@ static inline void queued_read_lock(struct qrwlock *lock)
>   */
>  static inline void queued_write_lock(struct qrwlock *lock)
>  {
> +	u32 cnts = 0;
>  	/* Optimize for the unfair lock case where the fair flag is 0. */
> -	if (atomic_cmpxchg_acquire(&lock->cnts, 0, _QW_LOCKED) == 0)
> +	if (atomic_try_cmpxchg_acquire(&lock->cnts, &cnts, _QW_LOCKED))
>  		return;
>  
>  	queued_write_lock_slowpath(lock);
> diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
> index 95263e943fcc..d125f0c0b3d9 100644
> --- a/include/asm-generic/qspinlock.h
> +++ b/include/asm-generic/qspinlock.h
> @@ -68,8 +68,10 @@ int queued_spin_is_contended(const struct qspinlock *lock)
>   */
>  static __always_inline int queued_spin_trylock(struct qspinlock *lock)
>  {
> +	u32 val = 0;
> +
>  	if (!atomic_read(&lock->val) &&
> -	   (atomic_cmpxchg_acquire(&lock->val, 0, _Q_LOCKED_VAL) == 0))
> +	    (atomic_try_cmpxchg(&lock->val, &val, _Q_LOCKED_VAL)))

Should you keep the _acquire suffix?

>  		return 1;
>  	return 0;
>  }
> @@ -82,10 +84,9 @@ extern void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
>   */
>  static __always_inline void queued_spin_lock(struct qspinlock *lock)
>  {
> -	u32 val;
> +	u32 val = 0;
>  
> -	val = atomic_cmpxchg_acquire(&lock->val, 0, _Q_LOCKED_VAL);
> -	if (likely(val == 0))
> +	if (likely(atomic_try_cmpxchg(&lock->val, &val, _Q_LOCKED_VAL)))
>  		return;
>  	queued_spin_lock_slowpath(lock, val);
>  }

Ditto.

BTW, qspinlock and qrwlock are now also used by AArch64, mips and sparc.
Have you tried to see what the effect will be for those architecture?

-Longman


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] locking: Remove an insn from spin and write locks
  2018-08-20 15:14 ` Waiman Long
@ 2018-08-20 15:50   ` Matthew Wilcox
  2018-08-20 15:55     ` Waiman Long
  2018-08-20 15:56     ` Peter Zijlstra
  0 siblings, 2 replies; 8+ messages in thread
From: Matthew Wilcox @ 2018-08-20 15:50 UTC (permalink / raw)
  To: Waiman Long
  Cc: Arnd Bergmann, linux-arch, linux-kernel, Ingo Molnar,
	Peter Zijlstra, Will Deacon, Thomas Gleixner

On Mon, Aug 20, 2018 at 11:14:04AM -0400, Waiman Long wrote:
> On 08/20/2018 11:06 AM, Matthew Wilcox wrote:
> > Both spin locks and write locks currently do:
> >
> >  f0 0f b1 17             lock cmpxchg %edx,(%rdi)
> >  85 c0                   test   %eax,%eax
> >  75 05                   jne    [slowpath]
> >
> > This 'test' insn is superfluous; the cmpxchg insn sets the Z flag
> > appropriately.  Peter pointed out that using atomic_try_cmpxchg()
> > will let the compiler know this is true.  Comparing before/after
> > disassemblies show the only effect is to remove this insn.
...
> >  static __always_inline int queued_spin_trylock(struct qspinlock *lock)
> >  {
> > +	u32 val = 0;
> > +
> >  	if (!atomic_read(&lock->val) &&
> > -	   (atomic_cmpxchg_acquire(&lock->val, 0, _Q_LOCKED_VAL) == 0))
> > +	    (atomic_try_cmpxchg(&lock->val, &val, _Q_LOCKED_VAL)))
> 
> Should you keep the _acquire suffix?

I don't know ;-)  Probably.  Peter didn't include it as part of his
suggested fix, but on reviewing the documentation, it seems likely that
it should be retained.  I put them back in and (as expected) it changes
nothing on x86-64.

> BTW, qspinlock and qrwlock are now also used by AArch64, mips and sparc.
> Have you tried to see what the effect will be for those architecture?

Nope!  That's why I cc'd linux-arch, because I don't know who (other
than arm64 and x86) is using q-locks these days.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] locking: Remove an insn from spin and write locks
  2018-08-20 15:50   ` Matthew Wilcox
@ 2018-08-20 15:55     ` Waiman Long
  2018-08-20 15:56     ` Peter Zijlstra
  1 sibling, 0 replies; 8+ messages in thread
From: Waiman Long @ 2018-08-20 15:55 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Arnd Bergmann, linux-arch, linux-kernel, Ingo Molnar,
	Peter Zijlstra, Will Deacon, Thomas Gleixner

On 08/20/2018 11:50 AM, Matthew Wilcox wrote:
> On Mon, Aug 20, 2018 at 11:14:04AM -0400, Waiman Long wrote:
>> On 08/20/2018 11:06 AM, Matthew Wilcox wrote:
>>> Both spin locks and write locks currently do:
>>>
>>>  f0 0f b1 17             lock cmpxchg %edx,(%rdi)
>>>  85 c0                   test   %eax,%eax
>>>  75 05                   jne    [slowpath]
>>>
>>> This 'test' insn is superfluous; the cmpxchg insn sets the Z flag
>>> appropriately.  Peter pointed out that using atomic_try_cmpxchg()
>>> will let the compiler know this is true.  Comparing before/after
>>> disassemblies show the only effect is to remove this insn.
> ...
>>>  static __always_inline int queued_spin_trylock(struct qspinlock *lock)
>>>  {
>>> +	u32 val = 0;
>>> +
>>>  	if (!atomic_read(&lock->val) &&
>>> -	   (atomic_cmpxchg_acquire(&lock->val, 0, _Q_LOCKED_VAL) == 0))
>>> +	    (atomic_try_cmpxchg(&lock->val, &val, _Q_LOCKED_VAL)))
>> Should you keep the _acquire suffix?
> I don't know ;-)  Probably.  Peter didn't include it as part of his
> suggested fix, but on reviewing the documentation, it seems likely that
> it should be retained.  I put them back in and (as expected) it changes
> nothing on x86-64.

We will certainly need to keep the _acquire suffix or it will likely
regress performance for arm64.

>> BTW, qspinlock and qrwlock are now also used by AArch64, mips and sparc.
>> Have you tried to see what the effect will be for those architecture?
> Nope!  That's why I cc'd linux-arch, because I don't know who (other
> than arm64 and x86) is using q-locks these days.

I think both Sparc and mips are using qlocks now, though these
architectures are not the ones that I am interested in. I do like to
make sure that there will be no regression for arm64. Will should be
able to answer that.

Cheers,
Longman


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] locking: Remove an insn from spin and write locks
  2018-08-20 15:50   ` Matthew Wilcox
  2018-08-20 15:55     ` Waiman Long
@ 2018-08-20 15:56     ` Peter Zijlstra
  2018-08-20 16:26       ` Matthew Wilcox
  1 sibling, 1 reply; 8+ messages in thread
From: Peter Zijlstra @ 2018-08-20 15:56 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Waiman Long, Arnd Bergmann, linux-arch, linux-kernel,
	Ingo Molnar, Will Deacon, Thomas Gleixner

On Mon, Aug 20, 2018 at 08:50:02AM -0700, Matthew Wilcox wrote:
> On Mon, Aug 20, 2018 at 11:14:04AM -0400, Waiman Long wrote:
> > On 08/20/2018 11:06 AM, Matthew Wilcox wrote:
> > > Both spin locks and write locks currently do:
> > >
> > >  f0 0f b1 17             lock cmpxchg %edx,(%rdi)
> > >  85 c0                   test   %eax,%eax
> > >  75 05                   jne    [slowpath]
> > >
> > > This 'test' insn is superfluous; the cmpxchg insn sets the Z flag
> > > appropriately.  Peter pointed out that using atomic_try_cmpxchg()
> > > will let the compiler know this is true.  Comparing before/after
> > > disassemblies show the only effect is to remove this insn.
> ...
> > >  static __always_inline int queued_spin_trylock(struct qspinlock *lock)
> > >  {
> > > +	u32 val = 0;
> > > +
> > >  	if (!atomic_read(&lock->val) &&
> > > -	   (atomic_cmpxchg_acquire(&lock->val, 0, _Q_LOCKED_VAL) == 0))
> > > +	    (atomic_try_cmpxchg(&lock->val, &val, _Q_LOCKED_VAL)))
> > 
> > Should you keep the _acquire suffix?
> 
> I don't know ;-)  Probably.  Peter didn't include it as part of his
> suggested fix, but on reviewing the documentation, it seems likely that
> it should be retained.  I put them back in and (as expected) it changes
> nothing on x86-64.

Yeah, _acquire should be retained; sorry about loosing that. I'm neck
deep into tlb invalidate stuff and wrote this without much thinking
involved.



^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] locking: Remove an insn from spin and write locks
  2018-08-20 15:56     ` Peter Zijlstra
@ 2018-08-20 16:26       ` Matthew Wilcox
  2018-08-21  1:54         ` Will Deacon
  2018-10-02 10:07         ` [tip:locking/core] locking/spinlocks: Remove an instruction " tip-bot for Matthew Wilcox
  0 siblings, 2 replies; 8+ messages in thread
From: Matthew Wilcox @ 2018-08-20 16:26 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Waiman Long, Arnd Bergmann, linux-arch, linux-kernel,
	Ingo Molnar, Will Deacon, Thomas Gleixner

On Mon, Aug 20, 2018 at 05:56:50PM +0200, Peter Zijlstra wrote:
> Yeah, _acquire should be retained; sorry about loosing that. I'm neck
> deep into tlb invalidate stuff and wrote this without much thinking
> involved.

NP.  Here's the current version I've got, with some updated likely()
hints.

From 337298a88266f7b21492f893c2bf05409a5392c8 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Mon, 20 Aug 2018 10:19:14 -0400
Subject: [PATCH] locking: Remove an insn from spin and write locks

Both spin locks and write locks currently do:

 f0 0f b1 17             lock cmpxchg %edx,(%rdi)
 85 c0                   test   %eax,%eax
 75 05                   jne    [slowpath]

This 'test' insn is superfluous; the cmpxchg insn sets the Z flag
appropriately.  Peter pointed out that using atomic_try_cmpxchg_acquire()
will let the compiler know this is true.  Comparing before/after
disassemblies show the only effect is to remove this insn.

Take this opportunity to make the spin & write lock code resemble each
other more closely and have similar likely() hints.

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/asm-generic/qrwlock.h   |  7 ++++---
 include/asm-generic/qspinlock.h | 17 ++++++++++-------
 2 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/include/asm-generic/qrwlock.h b/include/asm-generic/qrwlock.h
index 0f7062bd55e5..36254d2da8e0 100644
--- a/include/asm-generic/qrwlock.h
+++ b/include/asm-generic/qrwlock.h
@@ -71,8 +71,8 @@ static inline int queued_write_trylock(struct qrwlock *lock)
 	if (unlikely(cnts))
 		return 0;
 
-	return likely(atomic_cmpxchg_acquire(&lock->cnts,
-					     cnts, cnts | _QW_LOCKED) == cnts);
+	return likely(atomic_try_cmpxchg_acquire(&lock->cnts, &cnts,
+				_QW_LOCKED));
 }
 /**
  * queued_read_lock - acquire read lock of a queue rwlock
@@ -96,8 +96,9 @@ static inline void queued_read_lock(struct qrwlock *lock)
  */
 static inline void queued_write_lock(struct qrwlock *lock)
 {
+	u32 cnts = 0;
 	/* Optimize for the unfair lock case where the fair flag is 0. */
-	if (atomic_cmpxchg_acquire(&lock->cnts, 0, _QW_LOCKED) == 0)
+	if (likely(atomic_try_cmpxchg_acquire(&lock->cnts, &cnts, _QW_LOCKED)))
 		return;
 
 	queued_write_lock_slowpath(lock);
diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
index 95263e943fcc..24e7915eee56 100644
--- a/include/asm-generic/qspinlock.h
+++ b/include/asm-generic/qspinlock.h
@@ -68,10 +68,14 @@ int queued_spin_is_contended(const struct qspinlock *lock)
  */
 static __always_inline int queued_spin_trylock(struct qspinlock *lock)
 {
-	if (!atomic_read(&lock->val) &&
-	   (atomic_cmpxchg_acquire(&lock->val, 0, _Q_LOCKED_VAL) == 0))
-		return 1;
-	return 0;
+	u32 val;
+
+	val = atomic_read(&lock->val);
+	if (unlikely(val))
+		return 0;
+
+	return likely(atomic_try_cmpxchg_acquire(&lock->val, &val,
+				_Q_LOCKED_VAL));
 }
 
 extern void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
@@ -82,10 +86,9 @@ extern void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
  */
 static __always_inline void queued_spin_lock(struct qspinlock *lock)
 {
-	u32 val;
+	u32 val = 0;
 
-	val = atomic_cmpxchg_acquire(&lock->val, 0, _Q_LOCKED_VAL);
-	if (likely(val == 0))
+	if (likely(atomic_try_cmpxchg_acquire(&lock->val, &val, _Q_LOCKED_VAL)))
 		return;
 	queued_spin_lock_slowpath(lock, val);
 }
-- 
2.18.0


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH] locking: Remove an insn from spin and write locks
  2018-08-20 16:26       ` Matthew Wilcox
@ 2018-08-21  1:54         ` Will Deacon
  2018-10-02 10:07         ` [tip:locking/core] locking/spinlocks: Remove an instruction " tip-bot for Matthew Wilcox
  1 sibling, 0 replies; 8+ messages in thread
From: Will Deacon @ 2018-08-21  1:54 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Peter Zijlstra, Waiman Long, Arnd Bergmann, linux-arch,
	linux-kernel, Ingo Molnar, Thomas Gleixner

On Mon, Aug 20, 2018 at 09:26:39AM -0700, Matthew Wilcox wrote:
> On Mon, Aug 20, 2018 at 05:56:50PM +0200, Peter Zijlstra wrote:
> > Yeah, _acquire should be retained; sorry about loosing that. I'm neck
> > deep into tlb invalidate stuff and wrote this without much thinking
> > involved.
> 
> NP.  Here's the current version I've got, with some updated likely()
> hints.
> 
> From 337298a88266f7b21492f893c2bf05409a5392c8 Mon Sep 17 00:00:00 2001
> From: Matthew Wilcox <willy@infradead.org>
> Date: Mon, 20 Aug 2018 10:19:14 -0400
> Subject: [PATCH] locking: Remove an insn from spin and write locks
> 
> Both spin locks and write locks currently do:
> 
>  f0 0f b1 17             lock cmpxchg %edx,(%rdi)
>  85 c0                   test   %eax,%eax
>  75 05                   jne    [slowpath]
> 
> This 'test' insn is superfluous; the cmpxchg insn sets the Z flag
> appropriately.  Peter pointed out that using atomic_try_cmpxchg_acquire()
> will let the compiler know this is true.  Comparing before/after
> disassemblies show the only effect is to remove this insn.
> 
> Take this opportunity to make the spin & write lock code resemble each
> other more closely and have similar likely() hints.
> 
> Suggested-by: Peter Zijlstra <peterz@infradead.org>
> Signed-off-by: Matthew Wilcox <willy@infradead.org>
> ---
>  include/asm-generic/qrwlock.h   |  7 ++++---
>  include/asm-generic/qspinlock.h | 17 ++++++++++-------
>  2 files changed, 14 insertions(+), 10 deletions(-)

Shouldn't make any difference on arm64, so:

Acked-by: Will Deacon <will.deacon@arm.com>

Will

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [tip:locking/core] locking/spinlocks: Remove an instruction from spin and write locks
  2018-08-20 16:26       ` Matthew Wilcox
  2018-08-21  1:54         ` Will Deacon
@ 2018-10-02 10:07         ` tip-bot for Matthew Wilcox
  1 sibling, 0 replies; 8+ messages in thread
From: tip-bot for Matthew Wilcox @ 2018-10-02 10:07 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: peterz, will.deacon, tglx, linux-kernel, longman, torvalds,
	mingo, hpa, willy, arnd

Commit-ID:  27df89689e257cccb604fdf56c91a75a25aa554a
Gitweb:     https://git.kernel.org/tip/27df89689e257cccb604fdf56c91a75a25aa554a
Author:     Matthew Wilcox <willy@infradead.org>
AuthorDate: Mon, 20 Aug 2018 10:19:14 -0400
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Tue, 2 Oct 2018 09:49:42 +0200

locking/spinlocks: Remove an instruction from spin and write locks

Both spin locks and write locks currently do:

 f0 0f b1 17             lock cmpxchg %edx,(%rdi)
 85 c0                   test   %eax,%eax
 75 05                   jne    [slowpath]

This 'test' insn is superfluous; the cmpxchg insn sets the Z flag
appropriately.  Peter pointed out that using atomic_try_cmpxchg_acquire()
will let the compiler know this is true.  Comparing before/after
disassemblies show the only effect is to remove this insn.

Take this opportunity to make the spin & write lock code resemble each
other more closely and have similar likely() hints.

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman Long <longman@redhat.com>
Link: http://lkml.kernel.org/r/20180820162639.GC25153@bombadil.infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/asm-generic/qrwlock.h   |  7 ++++---
 include/asm-generic/qspinlock.h | 16 +++++++++-------
 2 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/include/asm-generic/qrwlock.h b/include/asm-generic/qrwlock.h
index 0f7062bd55e5..36254d2da8e0 100644
--- a/include/asm-generic/qrwlock.h
+++ b/include/asm-generic/qrwlock.h
@@ -71,8 +71,8 @@ static inline int queued_write_trylock(struct qrwlock *lock)
 	if (unlikely(cnts))
 		return 0;
 
-	return likely(atomic_cmpxchg_acquire(&lock->cnts,
-					     cnts, cnts | _QW_LOCKED) == cnts);
+	return likely(atomic_try_cmpxchg_acquire(&lock->cnts, &cnts,
+				_QW_LOCKED));
 }
 /**
  * queued_read_lock - acquire read lock of a queue rwlock
@@ -96,8 +96,9 @@ static inline void queued_read_lock(struct qrwlock *lock)
  */
 static inline void queued_write_lock(struct qrwlock *lock)
 {
+	u32 cnts = 0;
 	/* Optimize for the unfair lock case where the fair flag is 0. */
-	if (atomic_cmpxchg_acquire(&lock->cnts, 0, _QW_LOCKED) == 0)
+	if (likely(atomic_try_cmpxchg_acquire(&lock->cnts, &cnts, _QW_LOCKED)))
 		return;
 
 	queued_write_lock_slowpath(lock);
diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
index 9cc457597ddf..7541fa707f5b 100644
--- a/include/asm-generic/qspinlock.h
+++ b/include/asm-generic/qspinlock.h
@@ -66,10 +66,12 @@ static __always_inline int queued_spin_is_contended(struct qspinlock *lock)
  */
 static __always_inline int queued_spin_trylock(struct qspinlock *lock)
 {
-	if (!atomic_read(&lock->val) &&
-	   (atomic_cmpxchg_acquire(&lock->val, 0, _Q_LOCKED_VAL) == 0))
-		return 1;
-	return 0;
+	u32 val = atomic_read(&lock->val);
+
+	if (unlikely(val))
+		return 0;
+
+	return likely(atomic_try_cmpxchg_acquire(&lock->val, &val, _Q_LOCKED_VAL));
 }
 
 extern void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
@@ -80,11 +82,11 @@ extern void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
  */
 static __always_inline void queued_spin_lock(struct qspinlock *lock)
 {
-	u32 val;
+	u32 val = 0;
 
-	val = atomic_cmpxchg_acquire(&lock->val, 0, _Q_LOCKED_VAL);
-	if (likely(val == 0))
+	if (likely(atomic_try_cmpxchg_acquire(&lock->val, &val, _Q_LOCKED_VAL)))
 		return;
+
 	queued_spin_lock_slowpath(lock, val);
 }
 

^ permalink raw reply related	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2018-10-02 10:08 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-08-20 15:06 [PATCH] locking: Remove an insn from spin and write locks Matthew Wilcox
2018-08-20 15:14 ` Waiman Long
2018-08-20 15:50   ` Matthew Wilcox
2018-08-20 15:55     ` Waiman Long
2018-08-20 15:56     ` Peter Zijlstra
2018-08-20 16:26       ` Matthew Wilcox
2018-08-21  1:54         ` Will Deacon
2018-10-02 10:07         ` [tip:locking/core] locking/spinlocks: Remove an instruction " tip-bot for Matthew Wilcox

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).