All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/2] start using spin primitives in sched and locking
@ 2017-08-20  9:25 Nicholas Piggin
  2017-08-20  9:25 ` [PATCH 1/2] locking: Use spin primitives for busy loops Nicholas Piggin
                   ` (2 more replies)
  0 siblings, 3 replies; 8+ messages in thread
From: Nicholas Piggin @ 2017-08-20  9:25 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar; +Cc: Nicholas Piggin, linux-kernel

Nicholas Piggin (2):
  locking: Use spin primitives for busy loops
  sched/idle: Use spin loop primitives for polling idle

 include/linux/bit_spinlock.h        |  5 ++---
 include/linux/seqlock.h             |  9 ++++-----
 kernel/locking/mcs_spinlock.h       |  6 ++----
 kernel/locking/mutex.c              | 10 ++++++++--
 kernel/locking/osq_lock.c           | 17 +++++++++++++----
 kernel/locking/qrwlock.c            | 11 ++++++++---
 kernel/locking/qspinlock.c          | 14 ++++++++++----
 kernel/locking/qspinlock_paravirt.h | 16 ++++++++++++----
 kernel/locking/rwsem-xadd.c         |  9 +++++++--
 kernel/sched/idle.c                 |  7 ++++++-
 10 files changed, 72 insertions(+), 32 deletions(-)

-- 
2.13.3

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 1/2] locking: Use spin primitives for busy loops
  2017-08-20  9:25 [PATCH 0/2] start using spin primitives in sched and locking Nicholas Piggin
@ 2017-08-20  9:25 ` Nicholas Piggin
  2017-09-01 12:23   ` Peter Zijlstra
  2017-08-20  9:25 ` [PATCH 2/2] sched/idle: Use spin loop primitives for polling idle Nicholas Piggin
  2017-09-01  4:01 ` [PATCH 0/2] start using spin primitives in sched and locking Nicholas Piggin
  2 siblings, 1 reply; 8+ messages in thread
From: Nicholas Piggin @ 2017-08-20  9:25 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar; +Cc: Nicholas Piggin, linux-kernel

Commit fd851a3cdc ("spin loop primitives for busy waiting") introduced
a begin/relax/end sequence for busy loops, to improve behaviour with
some architectures.

Convert most of the generic locking primitives over to use these spin
primitives.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 include/linux/bit_spinlock.h        |  5 ++---
 include/linux/seqlock.h             |  9 ++++-----
 kernel/locking/mcs_spinlock.h       |  6 ++----
 kernel/locking/mutex.c              | 10 ++++++++--
 kernel/locking/osq_lock.c           | 17 +++++++++++++----
 kernel/locking/qrwlock.c            | 11 ++++++++---
 kernel/locking/qspinlock.c          | 14 ++++++++++----
 kernel/locking/qspinlock_paravirt.h | 16 ++++++++++++----
 kernel/locking/rwsem-xadd.c         |  9 +++++++--
 9 files changed, 66 insertions(+), 31 deletions(-)

diff --git a/include/linux/bit_spinlock.h b/include/linux/bit_spinlock.h
index 3b5bafce4337..4cec87d9cde8 100644
--- a/include/linux/bit_spinlock.h
+++ b/include/linux/bit_spinlock.h
@@ -3,6 +3,7 @@
 
 #include <linux/kernel.h>
 #include <linux/preempt.h>
+#include <linux/processor.h>
 #include <linux/atomic.h>
 #include <linux/bug.h>
 
@@ -25,9 +26,7 @@ static inline void bit_spin_lock(int bitnum, unsigned long *addr)
 #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
 	while (unlikely(test_and_set_bit_lock(bitnum, addr))) {
 		preempt_enable();
-		do {
-			cpu_relax();
-		} while (test_bit(bitnum, addr));
+		spin_until_cond(!test_bit(bitnum, addr));
 		preempt_disable();
 	}
 #endif
diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index ead97654c4e9..f4bd4a6c89d9 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -32,6 +32,7 @@
  * by Keith Owens and Andrea Arcangeli
  */
 
+#include <linux/processor.h>
 #include <linux/spinlock.h>
 #include <linux/preempt.h>
 #include <linux/lockdep.h>
@@ -108,12 +109,10 @@ static inline unsigned __read_seqcount_begin(const seqcount_t *s)
 {
 	unsigned ret;
 
-repeat:
 	ret = READ_ONCE(s->sequence);
-	if (unlikely(ret & 1)) {
-		cpu_relax();
-		goto repeat;
-	}
+	if (unlikely(ret & 1))
+		spin_until_cond( !((ret = READ_ONCE(s->sequence)) & 1) );
+
 	return ret;
 }
 
diff --git a/kernel/locking/mcs_spinlock.h b/kernel/locking/mcs_spinlock.h
index 6a385aabcce7..a91a0cc46a4c 100644
--- a/kernel/locking/mcs_spinlock.h
+++ b/kernel/locking/mcs_spinlock.h
@@ -27,8 +27,7 @@ struct mcs_spinlock {
  */
 #define arch_mcs_spin_lock_contended(l)					\
 do {									\
-	while (!(smp_load_acquire(l)))					\
-		cpu_relax();						\
+	spin_until_cond(smp_load_acquire(l));				\
 } while (0)
 #endif
 
@@ -107,8 +106,7 @@ void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
 		if (likely(cmpxchg_release(lock, node, NULL) == node))
 			return;
 		/* Wait until the next pointer is set */
-		while (!(next = READ_ONCE(node->next)))
-			cpu_relax();
+		spin_until_cond((next = READ_ONCE(node->next)) != 0);
 	}
 
 	/* Pass lock to next waiter. */
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 858a07590e39..0ffa1cd7f12b 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -427,6 +427,7 @@ bool mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner,
 	bool ret = true;
 
 	rcu_read_lock();
+	spin_begin();
 	while (__mutex_owner(lock) == owner) {
 		/*
 		 * Ensure we emit the owner->on_cpu, dereference _after_
@@ -450,8 +451,9 @@ bool mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner,
 			break;
 		}
 
-		cpu_relax();
+		spin_cpu_relax();
 	}
+	spin_end();
 	rcu_read_unlock();
 
 	return ret;
@@ -532,6 +534,7 @@ mutex_optimistic_spin(struct mutex *lock, struct ww_acquire_ctx *ww_ctx,
 			goto fail;
 	}
 
+	spin_begin();
 	for (;;) {
 		struct task_struct *owner;
 
@@ -553,8 +556,9 @@ mutex_optimistic_spin(struct mutex *lock, struct ww_acquire_ctx *ww_ctx,
 		 * memory barriers as we'll eventually observe the right
 		 * values at the cost of a few extra spins.
 		 */
-		cpu_relax();
+		spin_cpu_relax();
 	}
+	spin_end();
 
 	if (!waiter)
 		osq_unlock(&lock->osq);
@@ -563,6 +567,8 @@ mutex_optimistic_spin(struct mutex *lock, struct ww_acquire_ctx *ww_ctx,
 
 
 fail_unlock:
+	spin_end();
+
 	if (!waiter)
 		osq_unlock(&lock->osq);
 
diff --git a/kernel/locking/osq_lock.c b/kernel/locking/osq_lock.c
index a3167941093b..9dd58bbe60b7 100644
--- a/kernel/locking/osq_lock.c
+++ b/kernel/locking/osq_lock.c
@@ -53,6 +53,7 @@ osq_wait_next(struct optimistic_spin_queue *lock,
 	 */
 	old = prev ? prev->cpu : OSQ_UNLOCKED_VAL;
 
+	spin_begin();
 	for (;;) {
 		if (atomic_read(&lock->tail) == curr &&
 		    atomic_cmpxchg_acquire(&lock->tail, curr, old) == curr) {
@@ -80,8 +81,9 @@ osq_wait_next(struct optimistic_spin_queue *lock,
 				break;
 		}
 
-		cpu_relax();
+		spin_cpu_relax();
 	}
+	spin_end();
 
 	return next;
 }
@@ -107,6 +109,8 @@ bool osq_lock(struct optimistic_spin_queue *lock)
 	if (old == OSQ_UNLOCKED_VAL)
 		return true;
 
+	spin_begin();
+
 	prev = decode_cpu(old);
 	node->prev = prev;
 	WRITE_ONCE(prev->next, node);
@@ -129,8 +133,9 @@ bool osq_lock(struct optimistic_spin_queue *lock)
 		if (need_resched() || vcpu_is_preempted(node_cpu(node->prev)))
 			goto unqueue;
 
-		cpu_relax();
+		spin_cpu_relax();
 	}
+	spin_end();
 	return true;
 
 unqueue:
@@ -152,10 +157,12 @@ bool osq_lock(struct optimistic_spin_queue *lock)
 		 * in which case we should observe @node->locked becomming
 		 * true.
 		 */
-		if (smp_load_acquire(&node->locked))
+		if (smp_load_acquire(&node->locked)) {
+			spin_end();
 			return true;
+		}
 
-		cpu_relax();
+		spin_cpu_relax();
 
 		/*
 		 * Or we race against a concurrent unqueue()'s step-B, in which
@@ -164,6 +171,8 @@ bool osq_lock(struct optimistic_spin_queue *lock)
 		prev = READ_ONCE(node->prev);
 	}
 
+	spin_end();
+
 	/*
 	 * Step - B -- stabilize @next
 	 *
diff --git a/kernel/locking/qrwlock.c b/kernel/locking/qrwlock.c
index 2655f26ec882..186ff495097d 100644
--- a/kernel/locking/qrwlock.c
+++ b/kernel/locking/qrwlock.c
@@ -54,10 +54,12 @@ struct __qrwlock {
 static __always_inline void
 rspin_until_writer_unlock(struct qrwlock *lock, u32 cnts)
 {
+	spin_begin();
 	while ((cnts & _QW_WMASK) == _QW_LOCKED) {
-		cpu_relax();
+		spin_cpu_relax();
 		cnts = atomic_read_acquire(&lock->cnts);
 	}
+	spin_end();
 }
 
 /**
@@ -124,6 +126,7 @@ void queued_write_lock_slowpath(struct qrwlock *lock)
 	 * Set the waiting flag to notify readers that a writer is pending,
 	 * or wait for a previous writer to go away.
 	 */
+	spin_begin();
 	for (;;) {
 		struct __qrwlock *l = (struct __qrwlock *)lock;
 
@@ -131,7 +134,7 @@ void queued_write_lock_slowpath(struct qrwlock *lock)
 		   (cmpxchg_relaxed(&l->wmode, 0, _QW_WAITING) == 0))
 			break;
 
-		cpu_relax();
+		spin_cpu_relax();
 	}
 
 	/* When no more readers, set the locked flag */
@@ -142,8 +145,10 @@ void queued_write_lock_slowpath(struct qrwlock *lock)
 					    _QW_LOCKED) == _QW_WAITING))
 			break;
 
-		cpu_relax();
+		spin_cpu_relax();
 	}
+	spin_end();
+
 unlock:
 	arch_spin_unlock(&lock->wait_lock);
 }
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index fd24153e8a48..52ebcebf6fa8 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -362,6 +362,7 @@ void queued_spin_unlock_wait(struct qspinlock *lock)
 {
 	u32 val;
 
+	spin_begin();
 	for (;;) {
 		val = atomic_read(&lock->val);
 
@@ -372,14 +373,15 @@ void queued_spin_unlock_wait(struct qspinlock *lock)
 			break;
 
 		/* not locked, but pending, wait until we observe the lock */
-		cpu_relax();
+		spin_cpu_relax();
 	}
 
 	/* any unlock is good */
 	while (atomic_read(&lock->val) & _Q_LOCKED_MASK)
-		cpu_relax();
+		spin_cpu_relax();
 
 done:
+	spin_end();
 	smp_acquire__after_ctrl_dep();
 }
 EXPORT_SYMBOL(queued_spin_unlock_wait);
@@ -428,8 +430,10 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
 	 * 0,1,0 -> 0,0,1
 	 */
 	if (val == _Q_PENDING_VAL) {
+		spin_begin();
 		while ((val = atomic_read(&lock->val)) == _Q_PENDING_VAL)
-			cpu_relax();
+			spin_cpu_relax();
+		spin_end();
 	}
 
 	/*
@@ -609,8 +613,10 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
 	 * contended path; wait for next if not observed yet, release.
 	 */
 	if (!next) {
+		spin_begin();
 		while (!(next = READ_ONCE(node->next)))
-			cpu_relax();
+			spin_cpu_relax();
+		spin_end();
 	}
 
 	arch_mcs_spin_unlock_contended(&next->locked);
diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
index 4ccfcaae5b89..88817e41fadf 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -293,15 +293,19 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev)
 	bool wait_early;
 
 	for (;;) {
+		spin_begin();
 		for (wait_early = false, loop = SPIN_THRESHOLD; loop; loop--) {
-			if (READ_ONCE(node->locked))
+			if (READ_ONCE(node->locked)) {
+				spin_end();
 				return;
+			}
 			if (pv_wait_early(pp, loop)) {
 				wait_early = true;
 				break;
 			}
-			cpu_relax();
+			spin_cpu_relax();
 		}
+		spin_end();
 
 		/*
 		 * Order pn->state vs pn->locked thusly:
@@ -417,11 +421,15 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node)
 		 * disable lock stealing before attempting to acquire the lock.
 		 */
 		set_pending(lock);
+		spin_begin();
 		for (loop = SPIN_THRESHOLD; loop; loop--) {
-			if (trylock_clear_pending(lock))
+			if (trylock_clear_pending(lock)) {
+				spin_end();
 				goto gotlock;
-			cpu_relax();
+			}
+			spin_cpu_relax();
 		}
+		spin_end();
 		clear_pending(lock);
 
 
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index 34e727f18e49..2d0e539f1a95 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -358,6 +358,7 @@ static noinline bool rwsem_spin_on_owner(struct rw_semaphore *sem)
 		goto out;
 
 	rcu_read_lock();
+	spin_begin();
 	while (sem->owner == owner) {
 		/*
 		 * Ensure we emit the owner->on_cpu, dereference _after_
@@ -373,12 +374,14 @@ static noinline bool rwsem_spin_on_owner(struct rw_semaphore *sem)
 		 */
 		if (!owner->on_cpu || need_resched() ||
 				vcpu_is_preempted(task_cpu(owner))) {
+			spin_end();
 			rcu_read_unlock();
 			return false;
 		}
 
-		cpu_relax();
+		spin_cpu_relax();
 	}
+	spin_end();
 	rcu_read_unlock();
 out:
 	/*
@@ -408,6 +411,7 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
 	 *  2) readers own the lock as we can't determine if they are
 	 *     actively running or not.
 	 */
+	spin_begin();
 	while (rwsem_spin_on_owner(sem)) {
 		/*
 		 * Try to acquire the lock
@@ -432,8 +436,9 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
 		 * memory barriers as we'll eventually observe the right
 		 * values at the cost of a few extra spins.
 		 */
-		cpu_relax();
+		spin_cpu_relax();
 	}
+	spin_end();
 	osq_unlock(&sem->osq);
 done:
 	preempt_enable();
-- 
2.13.3

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 2/2] sched/idle: Use spin loop primitives for polling idle
  2017-08-20  9:25 [PATCH 0/2] start using spin primitives in sched and locking Nicholas Piggin
  2017-08-20  9:25 ` [PATCH 1/2] locking: Use spin primitives for busy loops Nicholas Piggin
@ 2017-08-20  9:25 ` Nicholas Piggin
  2017-09-01 12:24   ` Peter Zijlstra
  2017-09-01  4:01 ` [PATCH 0/2] start using spin primitives in sched and locking Nicholas Piggin
  2 siblings, 1 reply; 8+ messages in thread
From: Nicholas Piggin @ 2017-08-20  9:25 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar; +Cc: Nicholas Piggin, linux-kernel

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 kernel/sched/idle.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 6c23e30c0e5c..b884980da8ef 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -11,6 +11,7 @@
 #include <linux/stackprotector.h>
 #include <linux/suspend.h>
 #include <linux/livepatch.h>
+#include <linux/processor.h>
 
 #include <asm/tlb.h>
 
@@ -64,9 +65,13 @@ static noinline int __cpuidle cpu_idle_poll(void)
 	trace_cpu_idle_rcuidle(0, smp_processor_id());
 	local_irq_enable();
 	stop_critical_timings();
+
+	spin_begin();
 	while (!tif_need_resched() &&
 		(cpu_idle_force_poll || tick_check_broadcast_expired()))
-		cpu_relax();
+		spin_cpu_relax();
+	spin_end();
+
 	start_critical_timings();
 	trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
 	rcu_idle_exit();
-- 
2.13.3

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH 0/2] start using spin primitives in sched and locking
  2017-08-20  9:25 [PATCH 0/2] start using spin primitives in sched and locking Nicholas Piggin
  2017-08-20  9:25 ` [PATCH 1/2] locking: Use spin primitives for busy loops Nicholas Piggin
  2017-08-20  9:25 ` [PATCH 2/2] sched/idle: Use spin loop primitives for polling idle Nicholas Piggin
@ 2017-09-01  4:01 ` Nicholas Piggin
  2 siblings, 0 replies; 8+ messages in thread
From: Nicholas Piggin @ 2017-09-01  4:01 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar; +Cc: linux-kernel

Hi guys,

Any thoughts on these?

Thanks,
Nick

On Sun, 20 Aug 2017 19:25:00 +1000
Nicholas Piggin <npiggin@gmail.com> wrote:

> Nicholas Piggin (2):
>   locking: Use spin primitives for busy loops
>   sched/idle: Use spin loop primitives for polling idle
> 
>  include/linux/bit_spinlock.h        |  5 ++---
>  include/linux/seqlock.h             |  9 ++++-----
>  kernel/locking/mcs_spinlock.h       |  6 ++----
>  kernel/locking/mutex.c              | 10 ++++++++--
>  kernel/locking/osq_lock.c           | 17 +++++++++++++----
>  kernel/locking/qrwlock.c            | 11 ++++++++---
>  kernel/locking/qspinlock.c          | 14 ++++++++++----
>  kernel/locking/qspinlock_paravirt.h | 16 ++++++++++++----
>  kernel/locking/rwsem-xadd.c         |  9 +++++++--
>  kernel/sched/idle.c                 |  7 ++++++-
>  10 files changed, 72 insertions(+), 32 deletions(-)
> 

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2] locking: Use spin primitives for busy loops
  2017-08-20  9:25 ` [PATCH 1/2] locking: Use spin primitives for busy loops Nicholas Piggin
@ 2017-09-01 12:23   ` Peter Zijlstra
  2017-09-06  0:08     ` Nicholas Piggin
  0 siblings, 1 reply; 8+ messages in thread
From: Peter Zijlstra @ 2017-09-01 12:23 UTC (permalink / raw)
  To: Nicholas Piggin; +Cc: Ingo Molnar, linux-kernel


This really should have been many patches.

On Sun, Aug 20, 2017 at 07:25:01PM +1000, Nicholas Piggin wrote:

> @@ -108,12 +109,10 @@ static inline unsigned __read_seqcount_begin(const seqcount_t *s)
>  {
>  	unsigned ret;
>  
> -repeat:
>  	ret = READ_ONCE(s->sequence);
> -	if (unlikely(ret & 1)) {
> -		cpu_relax();
> -		goto repeat;
> -	}
> +	if (unlikely(ret & 1))
> +		spin_until_cond( !((ret = READ_ONCE(s->sequence)) & 1) );
> +

That seems to suggest something like: cond_load(), similar to
smp_cond_load_acquire(), except without the smp/acquire parts.

>  	return ret;
>  }
>  
> diff --git a/kernel/locking/mcs_spinlock.h b/kernel/locking/mcs_spinlock.h
> index 6a385aabcce7..a91a0cc46a4c 100644
> --- a/kernel/locking/mcs_spinlock.h
> +++ b/kernel/locking/mcs_spinlock.h
> @@ -27,8 +27,7 @@ struct mcs_spinlock {
>   */
>  #define arch_mcs_spin_lock_contended(l)					\
>  do {									\
> -	while (!(smp_load_acquire(l)))					\
> -		cpu_relax();						\
> +	spin_until_cond(smp_load_acquire(l));				\
>  } while (0)

Torn on whether is makes sense to use smp_cond_load_acquire() here
instead. That avoids issuing the barrier on each loop.

>  #endif
>  
> @@ -107,8 +106,7 @@ void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
>  		if (likely(cmpxchg_release(lock, node, NULL) == node))
>  			return;
>  		/* Wait until the next pointer is set */
> -		while (!(next = READ_ONCE(node->next)))
> -			cpu_relax();
> +		spin_until_cond((next = READ_ONCE(node->next)) != 0);
>  	}
>  
>  	/* Pass lock to next waiter. */

Hmm, you could've used that same pattern above too I suppose,

	spin_until_cond(!((ret = READ_ONCE(s->sequence)) & 1));

> diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
> index 858a07590e39..0ffa1cd7f12b 100644
> --- a/kernel/locking/mutex.c
> +++ b/kernel/locking/mutex.c
> @@ -427,6 +427,7 @@ bool mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner,
>  	bool ret = true;
>  
>  	rcu_read_lock();
> +	spin_begin();
>  	while (__mutex_owner(lock) == owner) {
>  		/*
>  		 * Ensure we emit the owner->on_cpu, dereference _after_
> @@ -450,8 +451,9 @@ bool mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner,
>  			break;
>  		}
>  
> -		cpu_relax();
> +		spin_cpu_relax();
>  	}
> +	spin_end();
>  	rcu_read_unlock();

Since we just did a trylock we expect to wait, which is why we don't do
a condition test before spin_begin(), right?

>  
>  	return ret;
> @@ -532,6 +534,7 @@ mutex_optimistic_spin(struct mutex *lock, struct ww_acquire_ctx *ww_ctx,
>  			goto fail;
>  	}
>  
> +	spin_begin();
>  	for (;;) {
>  		struct task_struct *owner;
>  
> @@ -553,8 +556,9 @@ mutex_optimistic_spin(struct mutex *lock, struct ww_acquire_ctx *ww_ctx,
>  		 * memory barriers as we'll eventually observe the right
>  		 * values at the cost of a few extra spins.
>  		 */
> -		cpu_relax();
> +		spin_cpu_relax();
>  	}
> +	spin_end();
>  
>  	if (!waiter)
>  		osq_unlock(&lock->osq);
> @@ -563,6 +567,8 @@ mutex_optimistic_spin(struct mutex *lock, struct ww_acquire_ctx *ww_ctx,
>  
>  
>  fail_unlock:
> +	spin_end();
> +
>  	if (!waiter)
>  		osq_unlock(&lock->osq);
>  

The same, we just did a trylock before calling this.

However!, I think you just created a nested spin_begin()/spin_end,
looking at how PPC implements that, this doesn't look right.

> diff --git a/kernel/locking/osq_lock.c b/kernel/locking/osq_lock.c
> index a3167941093b..9dd58bbe60b7 100644
> --- a/kernel/locking/osq_lock.c
> +++ b/kernel/locking/osq_lock.c
> @@ -53,6 +53,7 @@ osq_wait_next(struct optimistic_spin_queue *lock,
>  	 */
>  	old = prev ? prev->cpu : OSQ_UNLOCKED_VAL;
>  
> +	spin_begin();
>  	for (;;) {
>  		if (atomic_read(&lock->tail) == curr &&
>  		    atomic_cmpxchg_acquire(&lock->tail, curr, old) == curr) {
> @@ -80,8 +81,9 @@ osq_wait_next(struct optimistic_spin_queue *lock,
>  				break;
>  		}
>  
> -		cpu_relax();
> +		spin_cpu_relax();
>  	}
> +	spin_end();
>  
>  	return next;
>  }

This however doesn't have a fast-path, we unconditionally do
spin_begin(). That looks sub-optimal. From looking at PPC that drops the
HT priority even though we might not end up waiting at all.


> @@ -107,6 +109,8 @@ bool osq_lock(struct optimistic_spin_queue *lock)
>  	if (old == OSQ_UNLOCKED_VAL)
>  		return true;
>  
> +	spin_begin();
> +
>  	prev = decode_cpu(old);
>  	node->prev = prev;
>  	WRITE_ONCE(prev->next, node);

Why so early? You again do HMT_low() even though we might not hit the
cpu_relax().

> @@ -129,8 +133,9 @@ bool osq_lock(struct optimistic_spin_queue *lock)
>  		if (need_resched() || vcpu_is_preempted(node_cpu(node->prev)))
>  			goto unqueue;
>  
> -		cpu_relax();
> +		spin_cpu_relax();
>  	}
> +	spin_end();
>  	return true;
>  
>  unqueue:



> @@ -152,10 +157,12 @@ bool osq_lock(struct optimistic_spin_queue *lock)
>  		 * in which case we should observe @node->locked becomming
>  		 * true.
>  		 */
> -		if (smp_load_acquire(&node->locked))
> +		if (smp_load_acquire(&node->locked)) {
> +			spin_end();
>  			return true;
> +		}
>  
> -		cpu_relax();
> +		spin_cpu_relax();
>  
>  		/*
>  		 * Or we race against a concurrent unqueue()'s step-B, in which
> @@ -164,6 +171,8 @@ bool osq_lock(struct optimistic_spin_queue *lock)
>  		prev = READ_ONCE(node->prev);
>  	}
>  
> +	spin_end();
> +

So here we did all of step-A in low-prio, that doesn't look right.

>  	/*
>  	 * Step - B -- stabilize @next
>  	 *
> diff --git a/kernel/locking/qrwlock.c b/kernel/locking/qrwlock.c
> index 2655f26ec882..186ff495097d 100644
> --- a/kernel/locking/qrwlock.c
> +++ b/kernel/locking/qrwlock.c
> @@ -54,10 +54,12 @@ struct __qrwlock {
>  static __always_inline void
>  rspin_until_writer_unlock(struct qrwlock *lock, u32 cnts)
>  {
> +	spin_begin();
>  	while ((cnts & _QW_WMASK) == _QW_LOCKED) {
> -		cpu_relax();
> +		spin_cpu_relax();
>  		cnts = atomic_read_acquire(&lock->cnts);
>  	}
> +	spin_end();
>  }

Again, unconditional :/

>  /**
> @@ -124,6 +126,7 @@ void queued_write_lock_slowpath(struct qrwlock *lock)
>  	 * Set the waiting flag to notify readers that a writer is pending,
>  	 * or wait for a previous writer to go away.
>  	 */
> +	spin_begin();
>  	for (;;) {
>  		struct __qrwlock *l = (struct __qrwlock *)lock;
>  
> @@ -131,7 +134,7 @@ void queued_write_lock_slowpath(struct qrwlock *lock)
>  		   (cmpxchg_relaxed(&l->wmode, 0, _QW_WAITING) == 0))
>  			break;
>  
> -		cpu_relax();
> +		spin_cpu_relax();
>  	}
>  	/* When no more readers, set the locked flag */
> @@ -142,8 +145,10 @@ void queued_write_lock_slowpath(struct qrwlock *lock)
>  					    _QW_LOCKED) == _QW_WAITING))
>  			break;
>  
> -		cpu_relax();
> +		spin_cpu_relax();
>  	}
> +	spin_end();
> +
>  unlock:
>  	arch_spin_unlock(&lock->wait_lock);
>  }

Would not something like:

	do {
		spin_until_cond(!READ_ONCE(l->wmode));
	} while (try_cmpxchg_relaxed(&l->wmode, 0, _QW_WAITING));

Be a better pattern here? Then we have a fast-path without low-medium
flips.


> diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
> index 4ccfcaae5b89..88817e41fadf 100644
> --- a/kernel/locking/qspinlock_paravirt.h
> +++ b/kernel/locking/qspinlock_paravirt.h
> @@ -293,15 +293,19 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev)
>  	bool wait_early;
>  
>  	for (;;) {
> +		spin_begin();

again, unconditional..

>  		for (wait_early = false, loop = SPIN_THRESHOLD; loop; loop--) {
> -			if (READ_ONCE(node->locked))
> +			if (READ_ONCE(node->locked)) {
> +				spin_end();
>  				return;
> +			}
>  			if (pv_wait_early(pp, loop)) {
>  				wait_early = true;
>  				break;
>  			}
> -			cpu_relax();
> +			spin_cpu_relax();
>  		}
> +		spin_end();
>  
>  		/*
>  		 * Order pn->state vs pn->locked thusly:
> @@ -417,11 +421,15 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node)
>  		 * disable lock stealing before attempting to acquire the lock.
>  		 */
>  		set_pending(lock);
> +		spin_begin();

and again...

>  		for (loop = SPIN_THRESHOLD; loop; loop--) {
> -			if (trylock_clear_pending(lock))
> +			if (trylock_clear_pending(lock)) {
> +				spin_end();
>  				goto gotlock;
> -			cpu_relax();
> +			}
> +			spin_cpu_relax();
>  		}
> +		spin_end();
>  		clear_pending(lock);
>  



> diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
> index 34e727f18e49..2d0e539f1a95 100644
> --- a/kernel/locking/rwsem-xadd.c
> +++ b/kernel/locking/rwsem-xadd.c
> @@ -358,6 +358,7 @@ static noinline bool rwsem_spin_on_owner(struct rw_semaphore *sem)
>  		goto out;
>  
>  	rcu_read_lock();
> +	spin_begin();
>  	while (sem->owner == owner) {
>  		/*
>  		 * Ensure we emit the owner->on_cpu, dereference _after_
> @@ -373,12 +374,14 @@ static noinline bool rwsem_spin_on_owner(struct rw_semaphore *sem)
>  		 */
>  		if (!owner->on_cpu || need_resched() ||
>  				vcpu_is_preempted(task_cpu(owner))) {
> +			spin_end();
>  			rcu_read_unlock();
>  			return false;
>  		}
>  
> -		cpu_relax();
> +		spin_cpu_relax();
>  	}
> +	spin_end();
>  	rcu_read_unlock();
>  out:
>  	/*

rwsem_spin_on_owner() is different from the mutex one in that we don't
first do a trylock() which then makes this unconditional.

So I think we want the rwsem_optimistic_spin() main loop re-ordered to
first do the trylock and _then_ wait if it fails. Not first wait and
then try.

> @@ -408,6 +411,7 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
>  	 *  2) readers own the lock as we can't determine if they are
>  	 *     actively running or not.
>  	 */
> +	spin_begin();
>  	while (rwsem_spin_on_owner(sem)) {
>  		/*
>  		 * Try to acquire the lock
> @@ -432,8 +436,9 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
>  		 * memory barriers as we'll eventually observe the right
>  		 * values at the cost of a few extra spins.
>  		 */
> -		cpu_relax();
> +		spin_cpu_relax();
>  	}
> +	spin_end();
>  	osq_unlock(&sem->osq);
>  done:
>  	preempt_enable();

And I think you did the recursive spin_begin thing again here.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] sched/idle: Use spin loop primitives for polling idle
  2017-08-20  9:25 ` [PATCH 2/2] sched/idle: Use spin loop primitives for polling idle Nicholas Piggin
@ 2017-09-01 12:24   ` Peter Zijlstra
  2017-09-06  0:15     ` Nicholas Piggin
  0 siblings, 1 reply; 8+ messages in thread
From: Peter Zijlstra @ 2017-09-01 12:24 UTC (permalink / raw)
  To: Nicholas Piggin; +Cc: Ingo Molnar, linux-kernel

On Sun, Aug 20, 2017 at 07:25:02PM +1000, Nicholas Piggin wrote:
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
>  kernel/sched/idle.c | 7 ++++++-
>  1 file changed, 6 insertions(+), 1 deletion(-)
> 
> diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
> index 6c23e30c0e5c..b884980da8ef 100644
> --- a/kernel/sched/idle.c
> +++ b/kernel/sched/idle.c
> @@ -11,6 +11,7 @@
>  #include <linux/stackprotector.h>
>  #include <linux/suspend.h>
>  #include <linux/livepatch.h>
> +#include <linux/processor.h>
>  
>  #include <asm/tlb.h>
>  
> @@ -64,9 +65,13 @@ static noinline int __cpuidle cpu_idle_poll(void)
>  	trace_cpu_idle_rcuidle(0, smp_processor_id());
>  	local_irq_enable();
>  	stop_critical_timings();
> +
> +	spin_begin();
>  	while (!tif_need_resched() &&
>  		(cpu_idle_force_poll || tick_check_broadcast_expired()))
> -		cpu_relax();
> +		spin_cpu_relax();
> +	spin_end();

Do we want at least one tif_need_resched() check before we drop into low
prio mode?

> +
>  	start_critical_timings();
>  	trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
>  	rcu_idle_exit();
> -- 
> 2.13.3
> 

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2] locking: Use spin primitives for busy loops
  2017-09-01 12:23   ` Peter Zijlstra
@ 2017-09-06  0:08     ` Nicholas Piggin
  0 siblings, 0 replies; 8+ messages in thread
From: Nicholas Piggin @ 2017-09-06  0:08 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Ingo Molnar, linux-kernel

Hi Peter,

Thanks for the very good reviews, sorry been a bit busy and I figure this
didn't get time to make 4.14 so I'll take your comments and submit again
for next window.

On Fri, 1 Sep 2017 14:23:20 +0200
Peter Zijlstra <peterz@infradead.org> wrote:

> This really should have been many patches.

Sure, will do.

> 
> On Sun, Aug 20, 2017 at 07:25:01PM +1000, Nicholas Piggin wrote:
> 
> > @@ -108,12 +109,10 @@ static inline unsigned __read_seqcount_begin(const seqcount_t *s)
> >  {
> >  	unsigned ret;
> >  
> > -repeat:
> >  	ret = READ_ONCE(s->sequence);
> > -	if (unlikely(ret & 1)) {
> > -		cpu_relax();
> > -		goto repeat;
> > -	}
> > +	if (unlikely(ret & 1))
> > +		spin_until_cond( !((ret = READ_ONCE(s->sequence)) & 1) );
> > +  
> 
> That seems to suggest something like: cond_load(), similar to
> smp_cond_load_acquire(), except without the smp/acquire parts.
> 
> >  	return ret;
> >  }
> >  
> > diff --git a/kernel/locking/mcs_spinlock.h b/kernel/locking/mcs_spinlock.h
> > index 6a385aabcce7..a91a0cc46a4c 100644
> > --- a/kernel/locking/mcs_spinlock.h
> > +++ b/kernel/locking/mcs_spinlock.h
> > @@ -27,8 +27,7 @@ struct mcs_spinlock {
> >   */
> >  #define arch_mcs_spin_lock_contended(l)					\
> >  do {									\
> > -	while (!(smp_load_acquire(l)))					\
> > -		cpu_relax();						\
> > +	spin_until_cond(smp_load_acquire(l));				\
> >  } while (0)  
> 
> Torn on whether is makes sense to use smp_cond_load_acquire() here
> instead. That avoids issuing the barrier on each loop.

Oh, possibly. I'll take a look at the asm and maybe you have more
opinions when I resubmit as an individual patch.

> 
> >  #endif
> >  
> > @@ -107,8 +106,7 @@ void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> >  		if (likely(cmpxchg_release(lock, node, NULL) == node))
> >  			return;
> >  		/* Wait until the next pointer is set */
> > -		while (!(next = READ_ONCE(node->next)))
> > -			cpu_relax();
> > +		spin_until_cond((next = READ_ONCE(node->next)) != 0);
> >  	}
> >  
> >  	/* Pass lock to next waiter. */  
> 
> Hmm, you could've used that same pattern above too I suppose,
> 
> 	spin_until_cond(!((ret = READ_ONCE(s->sequence)) & 1));
> 
> > diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
> > index 858a07590e39..0ffa1cd7f12b 100644
> > --- a/kernel/locking/mutex.c
> > +++ b/kernel/locking/mutex.c
> > @@ -427,6 +427,7 @@ bool mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner,
> >  	bool ret = true;
> >  
> >  	rcu_read_lock();
> > +	spin_begin();
> >  	while (__mutex_owner(lock) == owner) {
> >  		/*
> >  		 * Ensure we emit the owner->on_cpu, dereference _after_
> > @@ -450,8 +451,9 @@ bool mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner,
> >  			break;
> >  		}
> >  
> > -		cpu_relax();
> > +		spin_cpu_relax();
> >  	}
> > +	spin_end();
> >  	rcu_read_unlock();  
> 
> Since we just did a trylock we expect to wait, which is why we don't do
> a condition test before spin_begin(), right?

Yes. This is supposed to be the pattern although as you noticed I may have
got it wrong :P

> 
> >  
> >  	return ret;
> > @@ -532,6 +534,7 @@ mutex_optimistic_spin(struct mutex *lock, struct ww_acquire_ctx *ww_ctx,
> >  			goto fail;
> >  	}
> >  
> > +	spin_begin();
> >  	for (;;) {
> >  		struct task_struct *owner;
> >  
> > @@ -553,8 +556,9 @@ mutex_optimistic_spin(struct mutex *lock, struct ww_acquire_ctx *ww_ctx,
> >  		 * memory barriers as we'll eventually observe the right
> >  		 * values at the cost of a few extra spins.
> >  		 */
> > -		cpu_relax();
> > +		spin_cpu_relax();
> >  	}
> > +	spin_end();
> >  
> >  	if (!waiter)
> >  		osq_unlock(&lock->osq);
> > @@ -563,6 +567,8 @@ mutex_optimistic_spin(struct mutex *lock, struct ww_acquire_ctx *ww_ctx,
> >  
> >  
> >  fail_unlock:
> > +	spin_end();
> > +
> >  	if (!waiter)
> >  		osq_unlock(&lock->osq);
> >    
> 
> The same, we just did a trylock before calling this.
> 
> However!, I think you just created a nested spin_begin()/spin_end,
> looking at how PPC implements that, this doesn't look right.

You're right, spin_on_owner does not need the spin_begin/spin_end because
it should be always called within spinning context. Thanks!

> 
> > diff --git a/kernel/locking/osq_lock.c b/kernel/locking/osq_lock.c
> > index a3167941093b..9dd58bbe60b7 100644
> > --- a/kernel/locking/osq_lock.c
> > +++ b/kernel/locking/osq_lock.c
> > @@ -53,6 +53,7 @@ osq_wait_next(struct optimistic_spin_queue *lock,
> >  	 */
> >  	old = prev ? prev->cpu : OSQ_UNLOCKED_VAL;
> >  
> > +	spin_begin();
> >  	for (;;) {
> >  		if (atomic_read(&lock->tail) == curr &&
> >  		    atomic_cmpxchg_acquire(&lock->tail, curr, old) == curr) {
> > @@ -80,8 +81,9 @@ osq_wait_next(struct optimistic_spin_queue *lock,
> >  				break;
> >  		}
> >  
> > -		cpu_relax();
> > +		spin_cpu_relax();
> >  	}
> > +	spin_end();
> >  
> >  	return next;
> >  }  
> 
> This however doesn't have a fast-path, we unconditionally do
> spin_begin(). That looks sub-optimal. From looking at PPC that drops the
> HT priority even though we might not end up waiting at all.

Okay. Should we have a little spin loop at the end there which spins
on lock->tail == curr || node->next ?

> 
> 
> > @@ -107,6 +109,8 @@ bool osq_lock(struct optimistic_spin_queue *lock)
> >  	if (old == OSQ_UNLOCKED_VAL)
> >  		return true;
> >  
> > +	spin_begin();
> > +
> >  	prev = decode_cpu(old);
> >  	node->prev = prev;
> >  	WRITE_ONCE(prev->next, node);  
> 
> Why so early? You again do HMT_low() even though we might not hit the
> cpu_relax().

I was thinking this is a general slowpath, but I guess we don't want to
go to low priority if we are to unqueue and reschedule. Thanks.

> 
> > @@ -129,8 +133,9 @@ bool osq_lock(struct optimistic_spin_queue *lock)
> >  		if (need_resched() || vcpu_is_preempted(node_cpu(node->prev)))
> >  			goto unqueue;
> >  
> > -		cpu_relax();
> > +		spin_cpu_relax();
> >  	}
> > +	spin_end();
> >  	return true;
> >  
> >  unqueue:  
> 
> 
> 
> > @@ -152,10 +157,12 @@ bool osq_lock(struct optimistic_spin_queue *lock)
> >  		 * in which case we should observe @node->locked becomming
> >  		 * true.
> >  		 */
> > -		if (smp_load_acquire(&node->locked))
> > +		if (smp_load_acquire(&node->locked)) {
> > +			spin_end();
> >  			return true;
> > +		}
> >  
> > -		cpu_relax();
> > +		spin_cpu_relax();
> >  
> >  		/*
> >  		 * Or we race against a concurrent unqueue()'s step-B, in which
> > @@ -164,6 +171,8 @@ bool osq_lock(struct optimistic_spin_queue *lock)
> >  		prev = READ_ONCE(node->prev);
> >  	}
> >  
> > +	spin_end();
> > +  
> 
> So here we did all of step-A in low-prio, that doesn't look right.

Yep, will fix.

> 
> >  	/*
> >  	 * Step - B -- stabilize @next
> >  	 *
> > diff --git a/kernel/locking/qrwlock.c b/kernel/locking/qrwlock.c
> > index 2655f26ec882..186ff495097d 100644
> > --- a/kernel/locking/qrwlock.c
> > +++ b/kernel/locking/qrwlock.c
> > @@ -54,10 +54,12 @@ struct __qrwlock {
> >  static __always_inline void
> >  rspin_until_writer_unlock(struct qrwlock *lock, u32 cnts)
> >  {
> > +	spin_begin();
> >  	while ((cnts & _QW_WMASK) == _QW_LOCKED) {
> > -		cpu_relax();
> > +		spin_cpu_relax();
> >  		cnts = atomic_read_acquire(&lock->cnts);
> >  	}
> > +	spin_end();
> >  }  
> 
> Again, unconditional :/

This is not always a slowpath?

> 
> >  /**
> > @@ -124,6 +126,7 @@ void queued_write_lock_slowpath(struct qrwlock *lock)
> >  	 * Set the waiting flag to notify readers that a writer is pending,
> >  	 * or wait for a previous writer to go away.
> >  	 */
> > +	spin_begin();
> >  	for (;;) {
> >  		struct __qrwlock *l = (struct __qrwlock *)lock;
> >  
> > @@ -131,7 +134,7 @@ void queued_write_lock_slowpath(struct qrwlock *lock)
> >  		   (cmpxchg_relaxed(&l->wmode, 0, _QW_WAITING) == 0))
> >  			break;
> >  
> > -		cpu_relax();
> > +		spin_cpu_relax();
> >  	}
> >  	/* When no more readers, set the locked flag */
> > @@ -142,8 +145,10 @@ void queued_write_lock_slowpath(struct qrwlock *lock)
> >  					    _QW_LOCKED) == _QW_WAITING))
> >  			break;
> >  
> > -		cpu_relax();
> > +		spin_cpu_relax();
> >  	}
> > +	spin_end();
> > +
> >  unlock:
> >  	arch_spin_unlock(&lock->wait_lock);
> >  }  
> 
> Would not something like:
> 
> 	do {
> 		spin_until_cond(!READ_ONCE(l->wmode));
> 	} while (try_cmpxchg_relaxed(&l->wmode, 0, _QW_WAITING));
> 
> Be a better pattern here? Then we have a fast-path without low-medium
> flips.

Could be so, I'll take a look.


> > diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
> > index 4ccfcaae5b89..88817e41fadf 100644
> > --- a/kernel/locking/qspinlock_paravirt.h
> > +++ b/kernel/locking/qspinlock_paravirt.h
> > @@ -293,15 +293,19 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev)
> >  	bool wait_early;
> >  
> >  	for (;;) {
> > +		spin_begin();  
> 
> again, unconditional..

Hmm, also thought could be expected to spin here. I'll try to understand
the code better.

> > @@ -373,12 +374,14 @@ static noinline bool rwsem_spin_on_owner(struct rw_semaphore *sem)
> >  		 */
> >  		if (!owner->on_cpu || need_resched() ||
> >  				vcpu_is_preempted(task_cpu(owner))) {
> > +			spin_end();
> >  			rcu_read_unlock();
> >  			return false;
> >  		}
> >  
> > -		cpu_relax();
> > +		spin_cpu_relax();
> >  	}
> > +	spin_end();
> >  	rcu_read_unlock();
> >  out:
> >  	/*  
> 
> rwsem_spin_on_owner() is different from the mutex one in that we don't
> first do a trylock() which then makes this unconditional.
> 
> So I think we want the rwsem_optimistic_spin() main loop re-ordered to
> first do the trylock and _then_ wait if it fails. Not first wait and
> then try.

Okay thanks.

> 
> > @@ -408,6 +411,7 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
> >  	 *  2) readers own the lock as we can't determine if they are
> >  	 *     actively running or not.
> >  	 */
> > +	spin_begin();
> >  	while (rwsem_spin_on_owner(sem)) {
> >  		/*
> >  		 * Try to acquire the lock
> > @@ -432,8 +436,9 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
> >  		 * memory barriers as we'll eventually observe the right
> >  		 * values at the cost of a few extra spins.
> >  		 */
> > -		cpu_relax();
> > +		spin_cpu_relax();
> >  	}
> > +	spin_end();
> >  	osq_unlock(&sem->osq);
> >  done:
> >  	preempt_enable();  
> 
> And I think you did the recursive spin_begin thing again here.

Much appreciated! Thanks Peter I'll split up and resubmit after the
merge window closes.

Thanks,
Nick

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] sched/idle: Use spin loop primitives for polling idle
  2017-09-01 12:24   ` Peter Zijlstra
@ 2017-09-06  0:15     ` Nicholas Piggin
  0 siblings, 0 replies; 8+ messages in thread
From: Nicholas Piggin @ 2017-09-06  0:15 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Ingo Molnar, linux-kernel

On Fri, 1 Sep 2017 14:24:54 +0200
Peter Zijlstra <peterz@infradead.org> wrote:

> On Sun, Aug 20, 2017 at 07:25:02PM +1000, Nicholas Piggin wrote:
> > Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> > ---
> >  kernel/sched/idle.c | 7 ++++++-
> >  1 file changed, 6 insertions(+), 1 deletion(-)
> > 
> > diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
> > index 6c23e30c0e5c..b884980da8ef 100644
> > --- a/kernel/sched/idle.c
> > +++ b/kernel/sched/idle.c
> > @@ -11,6 +11,7 @@
> >  #include <linux/stackprotector.h>
> >  #include <linux/suspend.h>
> >  #include <linux/livepatch.h>
> > +#include <linux/processor.h>
> >  
> >  #include <asm/tlb.h>
> >  
> > @@ -64,9 +65,13 @@ static noinline int __cpuidle cpu_idle_poll(void)
> >  	trace_cpu_idle_rcuidle(0, smp_processor_id());
> >  	local_irq_enable();
> >  	stop_critical_timings();
> > +
> > +	spin_begin();
> >  	while (!tif_need_resched() &&
> >  		(cpu_idle_force_poll || tick_check_broadcast_expired()))
> > -		cpu_relax();
> > +		spin_cpu_relax();
> > +	spin_end();  
> 
> Do we want at least one tif_need_resched() check before we drop into low
> prio mode?

Well we've already done one to get here. My thinking is once we decide
to go idle, get there as fast as we can and avoid too much icache and
branches etc as we're going down. Admittedly the existing cpu_relax code
does a check first, but the difference there is that it can make such a
check without adding extra code. It's more difficult for spin_begin/spin_end
to do the same, so I don't want to add extra junk that other CPUs don't need.
I don't think it will hurt powerpc.

Thanks,
Nick

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2017-09-06  0:15 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-08-20  9:25 [PATCH 0/2] start using spin primitives in sched and locking Nicholas Piggin
2017-08-20  9:25 ` [PATCH 1/2] locking: Use spin primitives for busy loops Nicholas Piggin
2017-09-01 12:23   ` Peter Zijlstra
2017-09-06  0:08     ` Nicholas Piggin
2017-08-20  9:25 ` [PATCH 2/2] sched/idle: Use spin loop primitives for polling idle Nicholas Piggin
2017-09-01 12:24   ` Peter Zijlstra
2017-09-06  0:15     ` Nicholas Piggin
2017-09-01  4:01 ` [PATCH 0/2] start using spin primitives in sched and locking Nicholas Piggin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.