All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] locking: Remove an insn from spin and write locks
@ 2018-08-20 15:06 Matthew Wilcox
  2018-08-20 15:14 ` Waiman Long
  0 siblings, 1 reply; 9+ messages in thread
From: Matthew Wilcox @ 2018-08-20 15:06 UTC (permalink / raw)
  To: Arnd Bergmann, linux-arch, linux-kernel, Ingo Molnar,
	Peter Zijlstra, Will Deacon, Waiman Long, Thomas Gleixner
  Cc: Matthew Wilcox

Both spin locks and write locks currently do:

 f0 0f b1 17             lock cmpxchg %edx,(%rdi)
 85 c0                   test   %eax,%eax
 75 05                   jne    [slowpath]

This 'test' insn is superfluous; the cmpxchg insn sets the Z flag
appropriately.  Peter pointed out that using atomic_try_cmpxchg()
will let the compiler know this is true.  Comparing before/after
disassemblies show the only effect is to remove this insn.

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/asm-generic/qrwlock.h   | 6 +++---
 include/asm-generic/qspinlock.h | 9 +++++----
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/include/asm-generic/qrwlock.h b/include/asm-generic/qrwlock.h
index 0f7062bd55e5..9a1beb7ad0de 100644
--- a/include/asm-generic/qrwlock.h
+++ b/include/asm-generic/qrwlock.h
@@ -71,8 +71,7 @@ static inline int queued_write_trylock(struct qrwlock *lock)
 	if (unlikely(cnts))
 		return 0;
 
-	return likely(atomic_cmpxchg_acquire(&lock->cnts,
-					     cnts, cnts | _QW_LOCKED) == cnts);
+	return likely(atomic_try_cmpxchg(&lock->cnts, &cnts, _QW_LOCKED));
 }
 /**
  * queued_read_lock - acquire read lock of a queue rwlock
@@ -96,8 +95,9 @@ static inline void queued_read_lock(struct qrwlock *lock)
  */
 static inline void queued_write_lock(struct qrwlock *lock)
 {
+	u32 cnts = 0;
 	/* Optimize for the unfair lock case where the fair flag is 0. */
-	if (atomic_cmpxchg_acquire(&lock->cnts, 0, _QW_LOCKED) == 0)
+	if (atomic_try_cmpxchg_acquire(&lock->cnts, &cnts, _QW_LOCKED))
 		return;
 
 	queued_write_lock_slowpath(lock);
diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
index 95263e943fcc..d125f0c0b3d9 100644
--- a/include/asm-generic/qspinlock.h
+++ b/include/asm-generic/qspinlock.h
@@ -68,8 +68,10 @@ int queued_spin_is_contended(const struct qspinlock *lock)
  */
 static __always_inline int queued_spin_trylock(struct qspinlock *lock)
 {
+	u32 val = 0;
+
 	if (!atomic_read(&lock->val) &&
-	   (atomic_cmpxchg_acquire(&lock->val, 0, _Q_LOCKED_VAL) == 0))
+	    (atomic_try_cmpxchg(&lock->val, &val, _Q_LOCKED_VAL)))
 		return 1;
 	return 0;
 }
@@ -82,10 +84,9 @@ extern void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
  */
 static __always_inline void queued_spin_lock(struct qspinlock *lock)
 {
-	u32 val;
+	u32 val = 0;
 
-	val = atomic_cmpxchg_acquire(&lock->val, 0, _Q_LOCKED_VAL);
-	if (likely(val == 0))
+	if (likely(atomic_try_cmpxchg(&lock->val, &val, _Q_LOCKED_VAL)))
 		return;
 	queued_spin_lock_slowpath(lock, val);
 }
-- 
2.18.0


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] locking: Remove an insn from spin and write locks
  2018-08-20 15:06 [PATCH] locking: Remove an insn from spin and write locks Matthew Wilcox
@ 2018-08-20 15:14 ` Waiman Long
  2018-08-20 15:50   ` Matthew Wilcox
  0 siblings, 1 reply; 9+ messages in thread
From: Waiman Long @ 2018-08-20 15:14 UTC (permalink / raw)
  To: Matthew Wilcox, Arnd Bergmann, linux-arch, linux-kernel,
	Ingo Molnar, Peter Zijlstra, Will Deacon, Thomas Gleixner

On 08/20/2018 11:06 AM, Matthew Wilcox wrote:
> Both spin locks and write locks currently do:
>
>  f0 0f b1 17             lock cmpxchg %edx,(%rdi)
>  85 c0                   test   %eax,%eax
>  75 05                   jne    [slowpath]
>
> This 'test' insn is superfluous; the cmpxchg insn sets the Z flag
> appropriately.  Peter pointed out that using atomic_try_cmpxchg()
> will let the compiler know this is true.  Comparing before/after
> disassemblies show the only effect is to remove this insn.
>
> Suggested-by: Peter Zijlstra <peterz@infradead.org>
> Signed-off-by: Matthew Wilcox <willy@infradead.org>
> ---
>  include/asm-generic/qrwlock.h   | 6 +++---
>  include/asm-generic/qspinlock.h | 9 +++++----
>  2 files changed, 8 insertions(+), 7 deletions(-)
>
> diff --git a/include/asm-generic/qrwlock.h b/include/asm-generic/qrwlock.h
> index 0f7062bd55e5..9a1beb7ad0de 100644
> --- a/include/asm-generic/qrwlock.h
> +++ b/include/asm-generic/qrwlock.h
> @@ -71,8 +71,7 @@ static inline int queued_write_trylock(struct qrwlock *lock)
>  	if (unlikely(cnts))
>  		return 0;
>  
> -	return likely(atomic_cmpxchg_acquire(&lock->cnts,
> -					     cnts, cnts | _QW_LOCKED) == cnts);
> +	return likely(atomic_try_cmpxchg(&lock->cnts, &cnts, _QW_LOCKED));
>  }
>  /**
>   * queued_read_lock - acquire read lock of a queue rwlock
> @@ -96,8 +95,9 @@ static inline void queued_read_lock(struct qrwlock *lock)
>   */
>  static inline void queued_write_lock(struct qrwlock *lock)
>  {
> +	u32 cnts = 0;
>  	/* Optimize for the unfair lock case where the fair flag is 0. */
> -	if (atomic_cmpxchg_acquire(&lock->cnts, 0, _QW_LOCKED) == 0)
> +	if (atomic_try_cmpxchg_acquire(&lock->cnts, &cnts, _QW_LOCKED))
>  		return;
>  
>  	queued_write_lock_slowpath(lock);
> diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
> index 95263e943fcc..d125f0c0b3d9 100644
> --- a/include/asm-generic/qspinlock.h
> +++ b/include/asm-generic/qspinlock.h
> @@ -68,8 +68,10 @@ int queued_spin_is_contended(const struct qspinlock *lock)
>   */
>  static __always_inline int queued_spin_trylock(struct qspinlock *lock)
>  {
> +	u32 val = 0;
> +
>  	if (!atomic_read(&lock->val) &&
> -	   (atomic_cmpxchg_acquire(&lock->val, 0, _Q_LOCKED_VAL) == 0))
> +	    (atomic_try_cmpxchg(&lock->val, &val, _Q_LOCKED_VAL)))

Should you keep the _acquire suffix?

>  		return 1;
>  	return 0;
>  }
> @@ -82,10 +84,9 @@ extern void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
>   */
>  static __always_inline void queued_spin_lock(struct qspinlock *lock)
>  {
> -	u32 val;
> +	u32 val = 0;
>  
> -	val = atomic_cmpxchg_acquire(&lock->val, 0, _Q_LOCKED_VAL);
> -	if (likely(val == 0))
> +	if (likely(atomic_try_cmpxchg(&lock->val, &val, _Q_LOCKED_VAL)))
>  		return;
>  	queued_spin_lock_slowpath(lock, val);
>  }

Ditto.

BTW, qspinlock and qrwlock are now also used by AArch64, mips and sparc.
Have you tried to see what the effect will be for those architecture?

-Longman


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] locking: Remove an insn from spin and write locks
  2018-08-20 15:14 ` Waiman Long
@ 2018-08-20 15:50   ` Matthew Wilcox
  2018-08-20 15:55     ` Waiman Long
  2018-08-20 15:56     ` Peter Zijlstra
  0 siblings, 2 replies; 9+ messages in thread
From: Matthew Wilcox @ 2018-08-20 15:50 UTC (permalink / raw)
  To: Waiman Long
  Cc: Arnd Bergmann, linux-arch, linux-kernel, Ingo Molnar,
	Peter Zijlstra, Will Deacon, Thomas Gleixner

On Mon, Aug 20, 2018 at 11:14:04AM -0400, Waiman Long wrote:
> On 08/20/2018 11:06 AM, Matthew Wilcox wrote:
> > Both spin locks and write locks currently do:
> >
> >  f0 0f b1 17             lock cmpxchg %edx,(%rdi)
> >  85 c0                   test   %eax,%eax
> >  75 05                   jne    [slowpath]
> >
> > This 'test' insn is superfluous; the cmpxchg insn sets the Z flag
> > appropriately.  Peter pointed out that using atomic_try_cmpxchg()
> > will let the compiler know this is true.  Comparing before/after
> > disassemblies show the only effect is to remove this insn.
...
> >  static __always_inline int queued_spin_trylock(struct qspinlock *lock)
> >  {
> > +	u32 val = 0;
> > +
> >  	if (!atomic_read(&lock->val) &&
> > -	   (atomic_cmpxchg_acquire(&lock->val, 0, _Q_LOCKED_VAL) == 0))
> > +	    (atomic_try_cmpxchg(&lock->val, &val, _Q_LOCKED_VAL)))
> 
> Should you keep the _acquire suffix?

I don't know ;-)  Probably.  Peter didn't include it as part of his
suggested fix, but on reviewing the documentation, it seems likely that
it should be retained.  I put them back in and (as expected) it changes
nothing on x86-64.

> BTW, qspinlock and qrwlock are now also used by AArch64, mips and sparc.
> Have you tried to see what the effect will be for those architecture?

Nope!  That's why I cc'd linux-arch, because I don't know who (other
than arm64 and x86) is using q-locks these days.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] locking: Remove an insn from spin and write locks
  2018-08-20 15:50   ` Matthew Wilcox
@ 2018-08-20 15:55     ` Waiman Long
  2018-08-20 15:56     ` Peter Zijlstra
  1 sibling, 0 replies; 9+ messages in thread
From: Waiman Long @ 2018-08-20 15:55 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Arnd Bergmann, linux-arch, linux-kernel, Ingo Molnar,
	Peter Zijlstra, Will Deacon, Thomas Gleixner

On 08/20/2018 11:50 AM, Matthew Wilcox wrote:
> On Mon, Aug 20, 2018 at 11:14:04AM -0400, Waiman Long wrote:
>> On 08/20/2018 11:06 AM, Matthew Wilcox wrote:
>>> Both spin locks and write locks currently do:
>>>
>>>  f0 0f b1 17             lock cmpxchg %edx,(%rdi)
>>>  85 c0                   test   %eax,%eax
>>>  75 05                   jne    [slowpath]
>>>
>>> This 'test' insn is superfluous; the cmpxchg insn sets the Z flag
>>> appropriately.  Peter pointed out that using atomic_try_cmpxchg()
>>> will let the compiler know this is true.  Comparing before/after
>>> disassemblies show the only effect is to remove this insn.
> ...
>>>  static __always_inline int queued_spin_trylock(struct qspinlock *lock)
>>>  {
>>> +	u32 val = 0;
>>> +
>>>  	if (!atomic_read(&lock->val) &&
>>> -	   (atomic_cmpxchg_acquire(&lock->val, 0, _Q_LOCKED_VAL) == 0))
>>> +	    (atomic_try_cmpxchg(&lock->val, &val, _Q_LOCKED_VAL)))
>> Should you keep the _acquire suffix?
> I don't know ;-)  Probably.  Peter didn't include it as part of his
> suggested fix, but on reviewing the documentation, it seems likely that
> it should be retained.  I put them back in and (as expected) it changes
> nothing on x86-64.

We will certainly need to keep the _acquire suffix or it will likely
regress performance for arm64.

>> BTW, qspinlock and qrwlock are now also used by AArch64, mips and sparc.
>> Have you tried to see what the effect will be for those architecture?
> Nope!  That's why I cc'd linux-arch, because I don't know who (other
> than arm64 and x86) is using q-locks these days.

I think both Sparc and mips are using qlocks now, though these
architectures are not the ones that I am interested in. I do like to
make sure that there will be no regression for arm64. Will should be
able to answer that.

Cheers,
Longman


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] locking: Remove an insn from spin and write locks
  2018-08-20 15:50   ` Matthew Wilcox
  2018-08-20 15:55     ` Waiman Long
@ 2018-08-20 15:56     ` Peter Zijlstra
  2018-08-20 16:26         ` Matthew Wilcox
  1 sibling, 1 reply; 9+ messages in thread
From: Peter Zijlstra @ 2018-08-20 15:56 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Waiman Long, Arnd Bergmann, linux-arch, linux-kernel,
	Ingo Molnar, Will Deacon, Thomas Gleixner

On Mon, Aug 20, 2018 at 08:50:02AM -0700, Matthew Wilcox wrote:
> On Mon, Aug 20, 2018 at 11:14:04AM -0400, Waiman Long wrote:
> > On 08/20/2018 11:06 AM, Matthew Wilcox wrote:
> > > Both spin locks and write locks currently do:
> > >
> > >  f0 0f b1 17             lock cmpxchg %edx,(%rdi)
> > >  85 c0                   test   %eax,%eax
> > >  75 05                   jne    [slowpath]
> > >
> > > This 'test' insn is superfluous; the cmpxchg insn sets the Z flag
> > > appropriately.  Peter pointed out that using atomic_try_cmpxchg()
> > > will let the compiler know this is true.  Comparing before/after
> > > disassemblies show the only effect is to remove this insn.
> ...
> > >  static __always_inline int queued_spin_trylock(struct qspinlock *lock)
> > >  {
> > > +	u32 val = 0;
> > > +
> > >  	if (!atomic_read(&lock->val) &&
> > > -	   (atomic_cmpxchg_acquire(&lock->val, 0, _Q_LOCKED_VAL) == 0))
> > > +	    (atomic_try_cmpxchg(&lock->val, &val, _Q_LOCKED_VAL)))
> > 
> > Should you keep the _acquire suffix?
> 
> I don't know ;-)  Probably.  Peter didn't include it as part of his
> suggested fix, but on reviewing the documentation, it seems likely that
> it should be retained.  I put them back in and (as expected) it changes
> nothing on x86-64.

Yeah, _acquire should be retained; sorry about loosing that. I'm neck
deep into tlb invalidate stuff and wrote this without much thinking
involved.



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] locking: Remove an insn from spin and write locks
@ 2018-08-20 16:26         ` Matthew Wilcox
  0 siblings, 0 replies; 9+ messages in thread
From: Matthew Wilcox @ 2018-08-20 16:26 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Waiman Long, Arnd Bergmann, linux-arch, linux-kernel,
	Ingo Molnar, Will Deacon, Thomas Gleixner

On Mon, Aug 20, 2018 at 05:56:50PM +0200, Peter Zijlstra wrote:
> Yeah, _acquire should be retained; sorry about loosing that. I'm neck
> deep into tlb invalidate stuff and wrote this without much thinking
> involved.

NP.  Here's the current version I've got, with some updated likely()
hints.

From 337298a88266f7b21492f893c2bf05409a5392c8 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Mon, 20 Aug 2018 10:19:14 -0400
Subject: [PATCH] locking: Remove an insn from spin and write locks

Both spin locks and write locks currently do:

 f0 0f b1 17             lock cmpxchg %edx,(%rdi)
 85 c0                   test   %eax,%eax
 75 05                   jne    [slowpath]

This 'test' insn is superfluous; the cmpxchg insn sets the Z flag
appropriately.  Peter pointed out that using atomic_try_cmpxchg_acquire()
will let the compiler know this is true.  Comparing before/after
disassemblies show the only effect is to remove this insn.

Take this opportunity to make the spin & write lock code resemble each
other more closely and have similar likely() hints.

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/asm-generic/qrwlock.h   |  7 ++++---
 include/asm-generic/qspinlock.h | 17 ++++++++++-------
 2 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/include/asm-generic/qrwlock.h b/include/asm-generic/qrwlock.h
index 0f7062bd55e5..36254d2da8e0 100644
--- a/include/asm-generic/qrwlock.h
+++ b/include/asm-generic/qrwlock.h
@@ -71,8 +71,8 @@ static inline int queued_write_trylock(struct qrwlock *lock)
 	if (unlikely(cnts))
 		return 0;
 
-	return likely(atomic_cmpxchg_acquire(&lock->cnts,
-					     cnts, cnts | _QW_LOCKED) == cnts);
+	return likely(atomic_try_cmpxchg_acquire(&lock->cnts, &cnts,
+				_QW_LOCKED));
 }
 /**
  * queued_read_lock - acquire read lock of a queue rwlock
@@ -96,8 +96,9 @@ static inline void queued_read_lock(struct qrwlock *lock)
  */
 static inline void queued_write_lock(struct qrwlock *lock)
 {
+	u32 cnts = 0;
 	/* Optimize for the unfair lock case where the fair flag is 0. */
-	if (atomic_cmpxchg_acquire(&lock->cnts, 0, _QW_LOCKED) == 0)
+	if (likely(atomic_try_cmpxchg_acquire(&lock->cnts, &cnts, _QW_LOCKED)))
 		return;
 
 	queued_write_lock_slowpath(lock);
diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
index 95263e943fcc..24e7915eee56 100644
--- a/include/asm-generic/qspinlock.h
+++ b/include/asm-generic/qspinlock.h
@@ -68,10 +68,14 @@ int queued_spin_is_contended(const struct qspinlock *lock)
  */
 static __always_inline int queued_spin_trylock(struct qspinlock *lock)
 {
-	if (!atomic_read(&lock->val) &&
-	   (atomic_cmpxchg_acquire(&lock->val, 0, _Q_LOCKED_VAL) == 0))
-		return 1;
-	return 0;
+	u32 val;
+
+	val = atomic_read(&lock->val);
+	if (unlikely(val))
+		return 0;
+
+	return likely(atomic_try_cmpxchg_acquire(&lock->val, &val,
+				_Q_LOCKED_VAL));
 }
 
 extern void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
@@ -82,10 +86,9 @@ extern void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
  */
 static __always_inline void queued_spin_lock(struct qspinlock *lock)
 {
-	u32 val;
+	u32 val = 0;
 
-	val = atomic_cmpxchg_acquire(&lock->val, 0, _Q_LOCKED_VAL);
-	if (likely(val == 0))
+	if (likely(atomic_try_cmpxchg_acquire(&lock->val, &val, _Q_LOCKED_VAL)))
 		return;
 	queued_spin_lock_slowpath(lock, val);
 }
-- 
2.18.0


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] locking: Remove an insn from spin and write locks
@ 2018-08-20 16:26         ` Matthew Wilcox
  0 siblings, 0 replies; 9+ messages in thread
From: Matthew Wilcox @ 2018-08-20 16:26 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Waiman Long, Arnd Bergmann, linux-arch, linux-kernel,
	Ingo Molnar, Will Deacon, Thomas Gleixner

On Mon, Aug 20, 2018 at 05:56:50PM +0200, Peter Zijlstra wrote:
> Yeah, _acquire should be retained; sorry about loosing that. I'm neck
> deep into tlb invalidate stuff and wrote this without much thinking
> involved.

NP.  Here's the current version I've got, with some updated likely()
hints.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] locking: Remove an insn from spin and write locks
  2018-08-20 16:26         ` Matthew Wilcox
  (?)
@ 2018-08-21  1:54         ` Will Deacon
  -1 siblings, 0 replies; 9+ messages in thread
From: Will Deacon @ 2018-08-21  1:54 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Peter Zijlstra, Waiman Long, Arnd Bergmann, linux-arch,
	linux-kernel, Ingo Molnar, Thomas Gleixner

On Mon, Aug 20, 2018 at 09:26:39AM -0700, Matthew Wilcox wrote:
> On Mon, Aug 20, 2018 at 05:56:50PM +0200, Peter Zijlstra wrote:
> > Yeah, _acquire should be retained; sorry about loosing that. I'm neck
> > deep into tlb invalidate stuff and wrote this without much thinking
> > involved.
> 
> NP.  Here's the current version I've got, with some updated likely()
> hints.
> 
> From 337298a88266f7b21492f893c2bf05409a5392c8 Mon Sep 17 00:00:00 2001
> From: Matthew Wilcox <willy@infradead.org>
> Date: Mon, 20 Aug 2018 10:19:14 -0400
> Subject: [PATCH] locking: Remove an insn from spin and write locks
> 
> Both spin locks and write locks currently do:
> 
>  f0 0f b1 17             lock cmpxchg %edx,(%rdi)
>  85 c0                   test   %eax,%eax
>  75 05                   jne    [slowpath]
> 
> This 'test' insn is superfluous; the cmpxchg insn sets the Z flag
> appropriately.  Peter pointed out that using atomic_try_cmpxchg_acquire()
> will let the compiler know this is true.  Comparing before/after
> disassemblies show the only effect is to remove this insn.
> 
> Take this opportunity to make the spin & write lock code resemble each
> other more closely and have similar likely() hints.
> 
> Suggested-by: Peter Zijlstra <peterz@infradead.org>
> Signed-off-by: Matthew Wilcox <willy@infradead.org>
> ---
>  include/asm-generic/qrwlock.h   |  7 ++++---
>  include/asm-generic/qspinlock.h | 17 ++++++++++-------
>  2 files changed, 14 insertions(+), 10 deletions(-)

Shouldn't make any difference on arm64, so:

Acked-by: Will Deacon <will.deacon@arm.com>

Will

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [tip:locking/core] locking/spinlocks: Remove an instruction from spin and write locks
  2018-08-20 16:26         ` Matthew Wilcox
  (?)
  (?)
@ 2018-10-02 10:07         ` tip-bot for Matthew Wilcox
  -1 siblings, 0 replies; 9+ messages in thread
From: tip-bot for Matthew Wilcox @ 2018-10-02 10:07 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: peterz, will.deacon, tglx, linux-kernel, longman, torvalds,
	mingo, hpa, willy, arnd

Commit-ID:  27df89689e257cccb604fdf56c91a75a25aa554a
Gitweb:     https://git.kernel.org/tip/27df89689e257cccb604fdf56c91a75a25aa554a
Author:     Matthew Wilcox <willy@infradead.org>
AuthorDate: Mon, 20 Aug 2018 10:19:14 -0400
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Tue, 2 Oct 2018 09:49:42 +0200

locking/spinlocks: Remove an instruction from spin and write locks

Both spin locks and write locks currently do:

 f0 0f b1 17             lock cmpxchg %edx,(%rdi)
 85 c0                   test   %eax,%eax
 75 05                   jne    [slowpath]

This 'test' insn is superfluous; the cmpxchg insn sets the Z flag
appropriately.  Peter pointed out that using atomic_try_cmpxchg_acquire()
will let the compiler know this is true.  Comparing before/after
disassemblies show the only effect is to remove this insn.

Take this opportunity to make the spin & write lock code resemble each
other more closely and have similar likely() hints.

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman Long <longman@redhat.com>
Link: http://lkml.kernel.org/r/20180820162639.GC25153@bombadil.infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/asm-generic/qrwlock.h   |  7 ++++---
 include/asm-generic/qspinlock.h | 16 +++++++++-------
 2 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/include/asm-generic/qrwlock.h b/include/asm-generic/qrwlock.h
index 0f7062bd55e5..36254d2da8e0 100644
--- a/include/asm-generic/qrwlock.h
+++ b/include/asm-generic/qrwlock.h
@@ -71,8 +71,8 @@ static inline int queued_write_trylock(struct qrwlock *lock)
 	if (unlikely(cnts))
 		return 0;
 
-	return likely(atomic_cmpxchg_acquire(&lock->cnts,
-					     cnts, cnts | _QW_LOCKED) == cnts);
+	return likely(atomic_try_cmpxchg_acquire(&lock->cnts, &cnts,
+				_QW_LOCKED));
 }
 /**
  * queued_read_lock - acquire read lock of a queue rwlock
@@ -96,8 +96,9 @@ static inline void queued_read_lock(struct qrwlock *lock)
  */
 static inline void queued_write_lock(struct qrwlock *lock)
 {
+	u32 cnts = 0;
 	/* Optimize for the unfair lock case where the fair flag is 0. */
-	if (atomic_cmpxchg_acquire(&lock->cnts, 0, _QW_LOCKED) == 0)
+	if (likely(atomic_try_cmpxchg_acquire(&lock->cnts, &cnts, _QW_LOCKED)))
 		return;
 
 	queued_write_lock_slowpath(lock);
diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
index 9cc457597ddf..7541fa707f5b 100644
--- a/include/asm-generic/qspinlock.h
+++ b/include/asm-generic/qspinlock.h
@@ -66,10 +66,12 @@ static __always_inline int queued_spin_is_contended(struct qspinlock *lock)
  */
 static __always_inline int queued_spin_trylock(struct qspinlock *lock)
 {
-	if (!atomic_read(&lock->val) &&
-	   (atomic_cmpxchg_acquire(&lock->val, 0, _Q_LOCKED_VAL) == 0))
-		return 1;
-	return 0;
+	u32 val = atomic_read(&lock->val);
+
+	if (unlikely(val))
+		return 0;
+
+	return likely(atomic_try_cmpxchg_acquire(&lock->val, &val, _Q_LOCKED_VAL));
 }
 
 extern void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
@@ -80,11 +82,11 @@ extern void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
  */
 static __always_inline void queued_spin_lock(struct qspinlock *lock)
 {
-	u32 val;
+	u32 val = 0;
 
-	val = atomic_cmpxchg_acquire(&lock->val, 0, _Q_LOCKED_VAL);
-	if (likely(val == 0))
+	if (likely(atomic_try_cmpxchg_acquire(&lock->val, &val, _Q_LOCKED_VAL)))
 		return;
+
 	queued_spin_lock_slowpath(lock, val);
 }
 

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2018-10-02 10:08 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-08-20 15:06 [PATCH] locking: Remove an insn from spin and write locks Matthew Wilcox
2018-08-20 15:14 ` Waiman Long
2018-08-20 15:50   ` Matthew Wilcox
2018-08-20 15:55     ` Waiman Long
2018-08-20 15:56     ` Peter Zijlstra
2018-08-20 16:26       ` Matthew Wilcox
2018-08-20 16:26         ` Matthew Wilcox
2018-08-21  1:54         ` Will Deacon
2018-10-02 10:07         ` [tip:locking/core] locking/spinlocks: Remove an instruction " tip-bot for Matthew Wilcox

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.