All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/3] locking/rwsem: __rwsem_mark_wake() improvements
@ 2016-08-05  8:04 Davidlohr Bueso
  2016-08-05  8:04 ` [PATCH 1/3] locking/rwsem: Return void in __rwsem_mark_wake() Davidlohr Bueso
                   ` (2 more replies)
  0 siblings, 3 replies; 12+ messages in thread
From: Davidlohr Bueso @ 2016-08-05  8:04 UTC (permalink / raw)
  To: peterz, mingo; +Cc: Waiman.Long, jason.low2, wanpeng.li, dave, linux-kernel

Hi,

Wanpeng recently reminded me of how horrible this function is, and we
are now in a position to improve it; see patch 3 for details.

Please consider for v4.9.

Thanks!

Davidlohr Bueso (3):
  locking/rwsem: Return void in __rwsem_mark_wake()
  locking/rwsem: Remove a few useless comments
  locking/rwsem: Scan the wait_list for readers only once

 kernel/locking/rwsem-xadd.c | 92 ++++++++++++++++++++-------------------------
 1 file changed, 41 insertions(+), 51 deletions(-)

-- 
2.6.6

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 1/3] locking/rwsem: Return void in __rwsem_mark_wake()
  2016-08-05  8:04 [PATCH 0/3] locking/rwsem: __rwsem_mark_wake() improvements Davidlohr Bueso
@ 2016-08-05  8:04 ` Davidlohr Bueso
  2016-08-18 11:00   ` [tip:locking/core] " tip-bot for Davidlohr Bueso
  2016-08-18 13:41   ` tip-bot for Davidlohr Bueso
  2016-08-05  8:04 ` [PATCH 2/3] locking/rwsem: Remove a few useless comments Davidlohr Bueso
  2016-08-05  8:04 ` [PATCH 3/3] locking/rwsem: Scan the wait_list for readers only once Davidlohr Bueso
  2 siblings, 2 replies; 12+ messages in thread
From: Davidlohr Bueso @ 2016-08-05  8:04 UTC (permalink / raw)
  To: peterz, mingo
  Cc: Waiman.Long, jason.low2, wanpeng.li, dave, linux-kernel, Davidlohr Bueso

We currently return a rw_semaphore structure, which is the
same lock we passed to the function's argument in the first
place. While there are several functions that choose this
return value, the callers use it, for example, for things
like ERR_PTR. This is not the case for __rwsem_mark_wake(),
and in addition this function is really about the lock
waiters (which we know there are at this point), so its
somewhat odd to be returning the sem structure.

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
---
 kernel/locking/rwsem-xadd.c | 35 +++++++++++++++++------------------
 1 file changed, 17 insertions(+), 18 deletions(-)

diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index 447e08de1fab..b03623172277 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -121,16 +121,17 @@ enum rwsem_wake_type {
  * - woken process blocks are discarded from the list after having task zeroed
  * - writers are only marked woken if downgrading is false
  */
-static struct rw_semaphore *
-__rwsem_mark_wake(struct rw_semaphore *sem,
-		  enum rwsem_wake_type wake_type, struct wake_q_head *wake_q)
+static void __rwsem_mark_wake(struct rw_semaphore *sem,
+			      enum rwsem_wake_type wake_type,
+			      struct wake_q_head *wake_q)
 {
 	struct rwsem_waiter *waiter;
 	struct task_struct *tsk;
 	struct list_head *next;
-	long oldcount, woken, loop, adjustment;
+	long loop, oldcount, woken = 0, adjustment = 0;
 
 	waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
+
 	if (waiter->type == RWSEM_WAITING_FOR_WRITE) {
 		if (wake_type == RWSEM_WAKE_ANY) {
 			/*
@@ -142,19 +143,19 @@ __rwsem_mark_wake(struct rw_semaphore *sem,
 			 */
 			wake_q_add(wake_q, waiter->task);
 		}
-		goto out;
+
+		return;
 	}
 
-	/* Writers might steal the lock before we grant it to the next reader.
+	/*
+	 * Writers might steal the lock before we grant it to the next reader.
 	 * We prefer to do the first reader grant before counting readers
 	 * so we can bail out early if a writer stole the lock.
 	 */
-	adjustment = 0;
 	if (wake_type != RWSEM_WAKE_READ_OWNED) {
 		adjustment = RWSEM_ACTIVE_READ_BIAS;
  try_reader_grant:
 		oldcount = atomic_long_fetch_add(adjustment, &sem->count);
-
 		if (unlikely(oldcount < RWSEM_WAITING_BIAS)) {
 			/*
 			 * If the count is still less than RWSEM_WAITING_BIAS
@@ -164,7 +165,8 @@ __rwsem_mark_wake(struct rw_semaphore *sem,
 			 */
 			if (atomic_long_add_return(-adjustment, &sem->count) <
 			    RWSEM_WAITING_BIAS)
-				goto out;
+				return;
+
 			/* Last active locker left. Retry waking readers. */
 			goto try_reader_grant;
 		}
@@ -176,11 +178,11 @@ __rwsem_mark_wake(struct rw_semaphore *sem,
 		rwsem_set_reader_owned(sem);
 	}
 
-	/* Grant an infinite number of read locks to the readers at the front
+	/*
+	 * Grant an infinite number of read locks to the readers at the front
 	 * of the queue.  Note we increment the 'active part' of the count by
 	 * the number of readers before waking any processes up.
 	 */
-	woken = 0;
 	do {
 		woken++;
 
@@ -219,9 +221,6 @@ __rwsem_mark_wake(struct rw_semaphore *sem,
 
 	sem->wait_list.next = next;
 	next->prev = &sem->wait_list;
-
- out:
-	return sem;
 }
 
 /*
@@ -255,7 +254,7 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
 	if (count == RWSEM_WAITING_BIAS ||
 	    (count > RWSEM_WAITING_BIAS &&
 	     adjustment != -RWSEM_ACTIVE_READ_BIAS))
-		sem = __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
+		__rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
 
 	raw_spin_unlock_irq(&sem->wait_lock);
 	wake_up_q(&wake_q);
@@ -505,7 +504,7 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state)
 		if (count > RWSEM_WAITING_BIAS) {
 			WAKE_Q(wake_q);
 
-			sem = __rwsem_mark_wake(sem, RWSEM_WAKE_READERS, &wake_q);
+			__rwsem_mark_wake(sem, RWSEM_WAKE_READERS, &wake_q);
 			/*
 			 * The wakeup is normally called _after_ the wait_lock
 			 * is released, but given that we are proactively waking
@@ -616,7 +615,7 @@ locked:
 
 	/* do nothing if list empty */
 	if (!list_empty(&sem->wait_list))
-		sem = __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
+		__rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
 
 	raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
 	wake_up_q(&wake_q);
@@ -640,7 +639,7 @@ struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
 
 	/* do nothing if list empty */
 	if (!list_empty(&sem->wait_list))
-		sem = __rwsem_mark_wake(sem, RWSEM_WAKE_READ_OWNED, &wake_q);
+		__rwsem_mark_wake(sem, RWSEM_WAKE_READ_OWNED, &wake_q);
 
 	raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
 	wake_up_q(&wake_q);
-- 
2.6.6

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 2/3] locking/rwsem: Remove a few useless comments
  2016-08-05  8:04 [PATCH 0/3] locking/rwsem: __rwsem_mark_wake() improvements Davidlohr Bueso
  2016-08-05  8:04 ` [PATCH 1/3] locking/rwsem: Return void in __rwsem_mark_wake() Davidlohr Bueso
@ 2016-08-05  8:04 ` Davidlohr Bueso
  2016-08-18 11:00   ` [tip:locking/core] " tip-bot for Davidlohr Bueso
  2016-08-18 13:42   ` tip-bot for Davidlohr Bueso
  2016-08-05  8:04 ` [PATCH 3/3] locking/rwsem: Scan the wait_list for readers only once Davidlohr Bueso
  2 siblings, 2 replies; 12+ messages in thread
From: Davidlohr Bueso @ 2016-08-05  8:04 UTC (permalink / raw)
  To: peterz, mingo
  Cc: Waiman.Long, jason.low2, wanpeng.li, dave, linux-kernel, Davidlohr Bueso

Our rwsem code (xadd, at least) is rather well documented, but
there are a few really annoying comments in there that serve
no purpose and we shouldn't bother with them.

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
---
 kernel/locking/rwsem-xadd.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index b03623172277..e02fe3289b5a 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -234,7 +234,6 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
 	struct task_struct *tsk = current;
 	WAKE_Q(wake_q);
 
-	/* set up my own style of waitqueue */
 	waiter.task = tsk;
 	waiter.type = RWSEM_WAITING_FOR_READ;
 
@@ -613,7 +612,6 @@ struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
 	raw_spin_lock_irqsave(&sem->wait_lock, flags);
 locked:
 
-	/* do nothing if list empty */
 	if (!list_empty(&sem->wait_list))
 		__rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
 
@@ -637,7 +635,6 @@ struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
 
 	raw_spin_lock_irqsave(&sem->wait_lock, flags);
 
-	/* do nothing if list empty */
 	if (!list_empty(&sem->wait_list))
 		__rwsem_mark_wake(sem, RWSEM_WAKE_READ_OWNED, &wake_q);
 
-- 
2.6.6

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 3/3] locking/rwsem: Scan the wait_list for readers only once
  2016-08-05  8:04 [PATCH 0/3] locking/rwsem: __rwsem_mark_wake() improvements Davidlohr Bueso
  2016-08-05  8:04 ` [PATCH 1/3] locking/rwsem: Return void in __rwsem_mark_wake() Davidlohr Bueso
  2016-08-05  8:04 ` [PATCH 2/3] locking/rwsem: Remove a few useless comments Davidlohr Bueso
@ 2016-08-05  8:04 ` Davidlohr Bueso
  2016-08-06  6:05   ` Ingo Molnar
                     ` (2 more replies)
  2 siblings, 3 replies; 12+ messages in thread
From: Davidlohr Bueso @ 2016-08-05  8:04 UTC (permalink / raw)
  To: peterz, mingo
  Cc: Waiman.Long, jason.low2, wanpeng.li, dave, linux-kernel, Davidlohr Bueso

When wanting to wakeup readers, __rwsem_mark_wakeup() currently
iterates the wait_list twice while looking to wakeup the first N
queued reader-tasks. While this can be quite inefficient, it was
there such that a awoken reader would be first and foremost
acknowledged by the lock counter.

Keeping the same logic, we can further benefit from the use of
wake_qs and avoid entirely the first wait_list iteration that sets
the counter as wake_up_process() isn't going to occur right away,
and therefore we maintain the counter->list order of going about
things.

Other than saving cycles with O(n) "scanning", this change also
nicely cleans up a good chunk of __rwsem_mark_wakeup(); both
visually and less tedious to read.

For example, the following improvements where seen on some will
it scale microbenchmarks, on a 48-core Haswell:

                                     v4.7              v4.7-rwsem-v1
Hmean    signal1-processes-8    5792691.42 (  0.00%)  5771971.04 ( -0.36%)
Hmean    signal1-processes-12   6081199.96 (  0.00%)  6072174.38 ( -0.15%)
Hmean    signal1-processes-21   3071137.71 (  0.00%)  3041336.72 ( -0.97%)
Hmean    signal1-processes-48   3712039.98 (  0.00%)  3708113.59 ( -0.11%)
Hmean    signal1-processes-79   4464573.45 (  0.00%)  4682798.66 (  4.89%)
Hmean    signal1-processes-110  4486842.01 (  0.00%)  4633781.71 (  3.27%)
Hmean    signal1-processes-141  4611816.83 (  0.00%)  4692725.38 (  1.75%)
Hmean    signal1-processes-172  4638157.05 (  0.00%)  4714387.86 (  1.64%)
Hmean    signal1-processes-203  4465077.80 (  0.00%)  4690348.07 (  5.05%)
Hmean    signal1-processes-224  4410433.74 (  0.00%)  4687534.43 (  6.28%)

Stddev   signal1-processes-8       6360.47 (  0.00%)     8455.31 ( 32.94%)
Stddev   signal1-processes-12      4004.98 (  0.00%)     9156.13 (128.62%)
Stddev   signal1-processes-21      3273.14 (  0.00%)     5016.80 ( 53.27%)
Stddev   signal1-processes-48     28420.25 (  0.00%)    26576.22 ( -6.49%)
Stddev   signal1-processes-79     22038.34 (  0.00%)    18992.70 (-13.82%)
Stddev   signal1-processes-110    23226.93 (  0.00%)    17245.79 (-25.75%)
Stddev   signal1-processes-141     6358.98 (  0.00%)     7636.14 ( 20.08%)
Stddev   signal1-processes-172     9523.70 (  0.00%)     4824.75 (-49.34%)
Stddev   signal1-processes-203    13915.33 (  0.00%)     9326.33 (-32.98%)
Stddev   signal1-processes-224    15573.94 (  0.00%)    10613.82 (-31.85%)

Other runs that saw improvements include context_switch and pipe; and
as expected, this is particularly highlighted on larger thread counts
as it becomes more expensive to walk the list twice.

No change in wakeup ordering or semantics.

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
---
 kernel/locking/rwsem-xadd.c | 58 ++++++++++++++++++++-------------------------
 1 file changed, 26 insertions(+), 32 deletions(-)

diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index e02fe3289b5a..2337b4bb2366 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -125,12 +125,14 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem,
 			      enum rwsem_wake_type wake_type,
 			      struct wake_q_head *wake_q)
 {
-	struct rwsem_waiter *waiter;
-	struct task_struct *tsk;
-	struct list_head *next;
-	long loop, oldcount, woken = 0, adjustment = 0;
+	struct rwsem_waiter *waiter, *tmp;
+	long oldcount, woken = 0, adjustment = 0;
 
-	waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
+	/*
+	 * Take a peek at the queue head waiter such that we can determine
+	 * the wakeup(s) to perform.
+	 */
+	waiter = list_first_entry(&sem->wait_list, struct rwsem_waiter, list);
 
 	if (waiter->type == RWSEM_WAITING_FOR_WRITE) {
 		if (wake_type == RWSEM_WAKE_ANY) {
@@ -180,36 +182,21 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem,
 
 	/*
 	 * Grant an infinite number of read locks to the readers at the front
-	 * of the queue.  Note we increment the 'active part' of the count by
-	 * the number of readers before waking any processes up.
+	 * of the queue. We know that woken will be at least 1 as we accounted
+	 * for above. Note we increment the 'active part' of the count by the
+	 * number of readers before waking any processes up.
 	 */
-	do {
-		woken++;
+	list_for_each_entry_safe(waiter, tmp, &sem->wait_list, list) {
+		struct task_struct *tsk;
 
-		if (waiter->list.next == &sem->wait_list)
+		if (waiter->type == RWSEM_WAITING_FOR_WRITE)
 			break;
 
-		waiter = list_entry(waiter->list.next,
-					struct rwsem_waiter, list);
-
-	} while (waiter->type != RWSEM_WAITING_FOR_WRITE);
-
-	adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment;
-	if (waiter->type != RWSEM_WAITING_FOR_WRITE)
-		/* hit end of list above */
-		adjustment -= RWSEM_WAITING_BIAS;
-
-	if (adjustment)
-		atomic_long_add(adjustment, &sem->count);
-
-	next = sem->wait_list.next;
-	loop = woken;
-	do {
-		waiter = list_entry(next, struct rwsem_waiter, list);
-		next = waiter->list.next;
+		woken++;
 		tsk = waiter->task;
 
 		wake_q_add(wake_q, tsk);
+		list_del(&waiter->list);
 		/*
 		 * Ensure that the last operation is setting the reader
 		 * waiter to nil such that rwsem_down_read_failed() cannot
@@ -217,10 +204,16 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem,
 		 * to the task to wakeup.
 		 */
 		smp_store_release(&waiter->task, NULL);
-	} while (--loop);
+	}
 
-	sem->wait_list.next = next;
-	next->prev = &sem->wait_list;
+	adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment;
+	if (list_empty(&sem->wait_list)) {
+		/* hit end of list above */
+		adjustment -= RWSEM_WAITING_BIAS;
+	}
+
+	if (adjustment)
+		atomic_long_add(adjustment, &sem->count);
 }
 
 /*
@@ -245,7 +238,8 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
 	/* we're now waiting on the lock, but no longer actively locking */
 	count = atomic_long_add_return(adjustment, &sem->count);
 
-	/* If there are no active locks, wake the front queued process(es).
+	/*
+	 * If there are no active locks, wake the front queued process(es).
 	 *
 	 * If there are no writers and we are first in the queue,
 	 * wake our own waiter to join the existing active readers !
-- 
2.6.6

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH 3/3] locking/rwsem: Scan the wait_list for readers only once
  2016-08-05  8:04 ` [PATCH 3/3] locking/rwsem: Scan the wait_list for readers only once Davidlohr Bueso
@ 2016-08-06  6:05   ` Ingo Molnar
  2016-08-08 16:37     ` Davidlohr Bueso
  2016-08-18 11:01   ` [tip:locking/core] " tip-bot for Davidlohr Bueso
  2016-08-18 13:42   ` tip-bot for Davidlohr Bueso
  2 siblings, 1 reply; 12+ messages in thread
From: Ingo Molnar @ 2016-08-06  6:05 UTC (permalink / raw)
  To: Davidlohr Bueso
  Cc: peterz, Waiman.Long, jason.low2, wanpeng.li, linux-kernel,
	Davidlohr Bueso


* Davidlohr Bueso <dave@stgolabs.net> wrote:

> When wanting to wakeup readers, __rwsem_mark_wakeup() currently
> iterates the wait_list twice while looking to wakeup the first N
> queued reader-tasks. While this can be quite inefficient, it was
> there such that a awoken reader would be first and foremost
> acknowledged by the lock counter.
> 
> Keeping the same logic, we can further benefit from the use of
> wake_qs and avoid entirely the first wait_list iteration that sets
> the counter as wake_up_process() isn't going to occur right away,
> and therefore we maintain the counter->list order of going about
> things.
> 
> Other than saving cycles with O(n) "scanning", this change also
> nicely cleans up a good chunk of __rwsem_mark_wakeup(); both
> visually and less tedious to read.
> 
> For example, the following improvements where seen on some will
> it scale microbenchmarks, on a 48-core Haswell:
> 
>                                      v4.7              v4.7-rwsem-v1
> Hmean    signal1-processes-8    5792691.42 (  0.00%)  5771971.04 ( -0.36%)
> Hmean    signal1-processes-12   6081199.96 (  0.00%)  6072174.38 ( -0.15%)
> Hmean    signal1-processes-21   3071137.71 (  0.00%)  3041336.72 ( -0.97%)
> Hmean    signal1-processes-48   3712039.98 (  0.00%)  3708113.59 ( -0.11%)
> Hmean    signal1-processes-79   4464573.45 (  0.00%)  4682798.66 (  4.89%)
> Hmean    signal1-processes-110  4486842.01 (  0.00%)  4633781.71 (  3.27%)
> Hmean    signal1-processes-141  4611816.83 (  0.00%)  4692725.38 (  1.75%)
> Hmean    signal1-processes-172  4638157.05 (  0.00%)  4714387.86 (  1.64%)
> Hmean    signal1-processes-203  4465077.80 (  0.00%)  4690348.07 (  5.05%)
> Hmean    signal1-processes-224  4410433.74 (  0.00%)  4687534.43 (  6.28%)

Please always make it clear in changelogs what the numbers mean, that higher 
numbers are better, etc. - so that people don't have to re-read it 2-3 times to 
figure out what it means.

Also, what are 'will it scale microbenchmarks'?

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 3/3] locking/rwsem: Scan the wait_list for readers only once
  2016-08-06  6:05   ` Ingo Molnar
@ 2016-08-08 16:37     ` Davidlohr Bueso
  0 siblings, 0 replies; 12+ messages in thread
From: Davidlohr Bueso @ 2016-08-08 16:37 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: peterz, Waiman.Long, jason.low2, wanpeng.li, linux-kernel,
	Davidlohr Bueso

On Sat, 06 Aug 2016, Ingo Molnar wrote:

>Also, what are 'will it scale microbenchmarks'?

https://github.com/antonblanchard/will-it-scale

Thanks,
Davidlohr

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [tip:locking/core] locking/rwsem: Return void in __rwsem_mark_wake()
  2016-08-05  8:04 ` [PATCH 1/3] locking/rwsem: Return void in __rwsem_mark_wake() Davidlohr Bueso
@ 2016-08-18 11:00   ` tip-bot for Davidlohr Bueso
  2016-08-18 13:41   ` tip-bot for Davidlohr Bueso
  1 sibling, 0 replies; 12+ messages in thread
From: tip-bot for Davidlohr Bueso @ 2016-08-18 11:00 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: peterz, akpm, dbueso, torvalds, mingo, linux-kernel, paulmck,
	hpa, dave, tglx

Commit-ID:  c12e5d30677063af69efc6f898beb714012d5af6
Gitweb:     http://git.kernel.org/tip/c12e5d30677063af69efc6f898beb714012d5af6
Author:     Davidlohr Bueso <dave@stgolabs.net>
AuthorDate: Fri, 5 Aug 2016 01:04:43 -0700
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Thu, 18 Aug 2016 11:35:39 +0200

locking/rwsem: Return void in __rwsem_mark_wake()

We currently return a rw_semaphore structure, which is the
same lock we passed to the function's argument in the first
place. While there are several functions that choose this
return value, the callers use it, for example, for things
like ERR_PTR. This is not the case for __rwsem_mark_wake(),
and in addition this function is really about the lock
waiters (which we know there are at this point), so its
somewhat odd to be returning the sem structure.

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman.Long@hp.com
Cc: dave@stgolabs.net
Cc: jason.low2@hpe.com
Cc: wanpeng.li@hotmail.com
Link: http://lkml.kernel.org/r/1470384285-32163-2-git-send-email-dave@stgolabs.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/locking/rwsem-xadd.c | 35 +++++++++++++++++------------------
 1 file changed, 17 insertions(+), 18 deletions(-)

diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index 447e08d..b036231 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -121,16 +121,17 @@ enum rwsem_wake_type {
  * - woken process blocks are discarded from the list after having task zeroed
  * - writers are only marked woken if downgrading is false
  */
-static struct rw_semaphore *
-__rwsem_mark_wake(struct rw_semaphore *sem,
-		  enum rwsem_wake_type wake_type, struct wake_q_head *wake_q)
+static void __rwsem_mark_wake(struct rw_semaphore *sem,
+			      enum rwsem_wake_type wake_type,
+			      struct wake_q_head *wake_q)
 {
 	struct rwsem_waiter *waiter;
 	struct task_struct *tsk;
 	struct list_head *next;
-	long oldcount, woken, loop, adjustment;
+	long loop, oldcount, woken = 0, adjustment = 0;
 
 	waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
+
 	if (waiter->type == RWSEM_WAITING_FOR_WRITE) {
 		if (wake_type == RWSEM_WAKE_ANY) {
 			/*
@@ -142,19 +143,19 @@ __rwsem_mark_wake(struct rw_semaphore *sem,
 			 */
 			wake_q_add(wake_q, waiter->task);
 		}
-		goto out;
+
+		return;
 	}
 
-	/* Writers might steal the lock before we grant it to the next reader.
+	/*
+	 * Writers might steal the lock before we grant it to the next reader.
 	 * We prefer to do the first reader grant before counting readers
 	 * so we can bail out early if a writer stole the lock.
 	 */
-	adjustment = 0;
 	if (wake_type != RWSEM_WAKE_READ_OWNED) {
 		adjustment = RWSEM_ACTIVE_READ_BIAS;
  try_reader_grant:
 		oldcount = atomic_long_fetch_add(adjustment, &sem->count);
-
 		if (unlikely(oldcount < RWSEM_WAITING_BIAS)) {
 			/*
 			 * If the count is still less than RWSEM_WAITING_BIAS
@@ -164,7 +165,8 @@ __rwsem_mark_wake(struct rw_semaphore *sem,
 			 */
 			if (atomic_long_add_return(-adjustment, &sem->count) <
 			    RWSEM_WAITING_BIAS)
-				goto out;
+				return;
+
 			/* Last active locker left. Retry waking readers. */
 			goto try_reader_grant;
 		}
@@ -176,11 +178,11 @@ __rwsem_mark_wake(struct rw_semaphore *sem,
 		rwsem_set_reader_owned(sem);
 	}
 
-	/* Grant an infinite number of read locks to the readers at the front
+	/*
+	 * Grant an infinite number of read locks to the readers at the front
 	 * of the queue.  Note we increment the 'active part' of the count by
 	 * the number of readers before waking any processes up.
 	 */
-	woken = 0;
 	do {
 		woken++;
 
@@ -219,9 +221,6 @@ __rwsem_mark_wake(struct rw_semaphore *sem,
 
 	sem->wait_list.next = next;
 	next->prev = &sem->wait_list;
-
- out:
-	return sem;
 }
 
 /*
@@ -255,7 +254,7 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
 	if (count == RWSEM_WAITING_BIAS ||
 	    (count > RWSEM_WAITING_BIAS &&
 	     adjustment != -RWSEM_ACTIVE_READ_BIAS))
-		sem = __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
+		__rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
 
 	raw_spin_unlock_irq(&sem->wait_lock);
 	wake_up_q(&wake_q);
@@ -505,7 +504,7 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state)
 		if (count > RWSEM_WAITING_BIAS) {
 			WAKE_Q(wake_q);
 
-			sem = __rwsem_mark_wake(sem, RWSEM_WAKE_READERS, &wake_q);
+			__rwsem_mark_wake(sem, RWSEM_WAKE_READERS, &wake_q);
 			/*
 			 * The wakeup is normally called _after_ the wait_lock
 			 * is released, but given that we are proactively waking
@@ -616,7 +615,7 @@ locked:
 
 	/* do nothing if list empty */
 	if (!list_empty(&sem->wait_list))
-		sem = __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
+		__rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
 
 	raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
 	wake_up_q(&wake_q);
@@ -640,7 +639,7 @@ struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
 
 	/* do nothing if list empty */
 	if (!list_empty(&sem->wait_list))
-		sem = __rwsem_mark_wake(sem, RWSEM_WAKE_READ_OWNED, &wake_q);
+		__rwsem_mark_wake(sem, RWSEM_WAKE_READ_OWNED, &wake_q);
 
 	raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
 	wake_up_q(&wake_q);

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [tip:locking/core] locking/rwsem: Remove a few useless comments
  2016-08-05  8:04 ` [PATCH 2/3] locking/rwsem: Remove a few useless comments Davidlohr Bueso
@ 2016-08-18 11:00   ` tip-bot for Davidlohr Bueso
  2016-08-18 13:42   ` tip-bot for Davidlohr Bueso
  1 sibling, 0 replies; 12+ messages in thread
From: tip-bot for Davidlohr Bueso @ 2016-08-18 11:00 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: tglx, paulmck, torvalds, dbueso, dave, linux-kernel, peterz, hpa,
	akpm, mingo

Commit-ID:  d6094b9650a7cfc5cc0f799d55741365f5b8424f
Gitweb:     http://git.kernel.org/tip/d6094b9650a7cfc5cc0f799d55741365f5b8424f
Author:     Davidlohr Bueso <dave@stgolabs.net>
AuthorDate: Fri, 5 Aug 2016 01:04:44 -0700
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Thu, 18 Aug 2016 11:35:39 +0200

locking/rwsem: Remove a few useless comments

Our rwsem code (xadd, at least) is rather well documented, but
there are a few really annoying comments in there that serve
no purpose and we shouldn't bother with them.

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman.Long@hp.com
Cc: dave@stgolabs.net
Cc: jason.low2@hpe.com
Cc: wanpeng.li@hotmail.com
Link: http://lkml.kernel.org/r/1470384285-32163-3-git-send-email-dave@stgolabs.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/locking/rwsem-xadd.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index b036231..e02fe32 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -234,7 +234,6 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
 	struct task_struct *tsk = current;
 	WAKE_Q(wake_q);
 
-	/* set up my own style of waitqueue */
 	waiter.task = tsk;
 	waiter.type = RWSEM_WAITING_FOR_READ;
 
@@ -613,7 +612,6 @@ struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
 	raw_spin_lock_irqsave(&sem->wait_lock, flags);
 locked:
 
-	/* do nothing if list empty */
 	if (!list_empty(&sem->wait_list))
 		__rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
 
@@ -637,7 +635,6 @@ struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
 
 	raw_spin_lock_irqsave(&sem->wait_lock, flags);
 
-	/* do nothing if list empty */
 	if (!list_empty(&sem->wait_list))
 		__rwsem_mark_wake(sem, RWSEM_WAKE_READ_OWNED, &wake_q);
 

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [tip:locking/core] locking/rwsem: Scan the wait_list for readers only once
  2016-08-05  8:04 ` [PATCH 3/3] locking/rwsem: Scan the wait_list for readers only once Davidlohr Bueso
  2016-08-06  6:05   ` Ingo Molnar
@ 2016-08-18 11:01   ` tip-bot for Davidlohr Bueso
  2016-08-18 13:42   ` tip-bot for Davidlohr Bueso
  2 siblings, 0 replies; 12+ messages in thread
From: tip-bot for Davidlohr Bueso @ 2016-08-18 11:01 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: mingo, dave, peterz, tglx, paulmck, dbueso, linux-kernel,
	torvalds, hpa, akpm

Commit-ID:  1dc3e13144720bd1b0adbcfac044234c2d09b2a4
Gitweb:     http://git.kernel.org/tip/1dc3e13144720bd1b0adbcfac044234c2d09b2a4
Author:     Davidlohr Bueso <dave@stgolabs.net>
AuthorDate: Fri, 5 Aug 2016 01:04:45 -0700
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Thu, 18 Aug 2016 11:35:39 +0200

locking/rwsem: Scan the wait_list for readers only once

When wanting to wakeup readers, __rwsem_mark_wakeup() currently
iterates the wait_list twice while looking to wakeup the first N
queued reader-tasks. While this can be quite inefficient, it was
there such that a awoken reader would be first and foremost
acknowledged by the lock counter.

Keeping the same logic, we can further benefit from the use of
wake_qs and avoid entirely the first wait_list iteration that sets
the counter as wake_up_process() isn't going to occur right away,
and therefore we maintain the counter->list order of going about
things.

Other than saving cycles with O(n) "scanning", this change also
nicely cleans up a good chunk of __rwsem_mark_wakeup(); both
visually and less tedious to read.

For example, the following improvements where seen on some will
it scale microbenchmarks, on a 48-core Haswell:

                                       v4.7              v4.7-rwsem-v1
  Hmean    signal1-processes-8    5792691.42 (  0.00%)  5771971.04 ( -0.36%)
  Hmean    signal1-processes-12   6081199.96 (  0.00%)  6072174.38 ( -0.15%)
  Hmean    signal1-processes-21   3071137.71 (  0.00%)  3041336.72 ( -0.97%)
  Hmean    signal1-processes-48   3712039.98 (  0.00%)  3708113.59 ( -0.11%)
  Hmean    signal1-processes-79   4464573.45 (  0.00%)  4682798.66 (  4.89%)
  Hmean    signal1-processes-110  4486842.01 (  0.00%)  4633781.71 (  3.27%)
  Hmean    signal1-processes-141  4611816.83 (  0.00%)  4692725.38 (  1.75%)
  Hmean    signal1-processes-172  4638157.05 (  0.00%)  4714387.86 (  1.64%)
  Hmean    signal1-processes-203  4465077.80 (  0.00%)  4690348.07 (  5.05%)
  Hmean    signal1-processes-224  4410433.74 (  0.00%)  4687534.43 (  6.28%)

  Stddev   signal1-processes-8       6360.47 (  0.00%)     8455.31 ( 32.94%)
  Stddev   signal1-processes-12      4004.98 (  0.00%)     9156.13 (128.62%)
  Stddev   signal1-processes-21      3273.14 (  0.00%)     5016.80 ( 53.27%)
  Stddev   signal1-processes-48     28420.25 (  0.00%)    26576.22 ( -6.49%)
  Stddev   signal1-processes-79     22038.34 (  0.00%)    18992.70 (-13.82%)
  Stddev   signal1-processes-110    23226.93 (  0.00%)    17245.79 (-25.75%)
  Stddev   signal1-processes-141     6358.98 (  0.00%)     7636.14 ( 20.08%)
  Stddev   signal1-processes-172     9523.70 (  0.00%)     4824.75 (-49.34%)
  Stddev   signal1-processes-203    13915.33 (  0.00%)     9326.33 (-32.98%)
  Stddev   signal1-processes-224    15573.94 (  0.00%)    10613.82 (-31.85%)

Other runs that saw improvements include context_switch and pipe; and
as expected, this is particularly highlighted on larger thread counts
as it becomes more expensive to walk the list twice.

No change in wakeup ordering or semantics.

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman.Long@hp.com
Cc: dave@stgolabs.net
Cc: jason.low2@hpe.com
Cc: wanpeng.li@hotmail.com
Link: http://lkml.kernel.org/r/1470384285-32163-4-git-send-email-dave@stgolabs.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/locking/rwsem-xadd.c | 58 ++++++++++++++++++++-------------------------
 1 file changed, 26 insertions(+), 32 deletions(-)

diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index e02fe32..2337b4b 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -125,12 +125,14 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem,
 			      enum rwsem_wake_type wake_type,
 			      struct wake_q_head *wake_q)
 {
-	struct rwsem_waiter *waiter;
-	struct task_struct *tsk;
-	struct list_head *next;
-	long loop, oldcount, woken = 0, adjustment = 0;
+	struct rwsem_waiter *waiter, *tmp;
+	long oldcount, woken = 0, adjustment = 0;
 
-	waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
+	/*
+	 * Take a peek at the queue head waiter such that we can determine
+	 * the wakeup(s) to perform.
+	 */
+	waiter = list_first_entry(&sem->wait_list, struct rwsem_waiter, list);
 
 	if (waiter->type == RWSEM_WAITING_FOR_WRITE) {
 		if (wake_type == RWSEM_WAKE_ANY) {
@@ -180,36 +182,21 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem,
 
 	/*
 	 * Grant an infinite number of read locks to the readers at the front
-	 * of the queue.  Note we increment the 'active part' of the count by
-	 * the number of readers before waking any processes up.
+	 * of the queue. We know that woken will be at least 1 as we accounted
+	 * for above. Note we increment the 'active part' of the count by the
+	 * number of readers before waking any processes up.
 	 */
-	do {
-		woken++;
+	list_for_each_entry_safe(waiter, tmp, &sem->wait_list, list) {
+		struct task_struct *tsk;
 
-		if (waiter->list.next == &sem->wait_list)
+		if (waiter->type == RWSEM_WAITING_FOR_WRITE)
 			break;
 
-		waiter = list_entry(waiter->list.next,
-					struct rwsem_waiter, list);
-
-	} while (waiter->type != RWSEM_WAITING_FOR_WRITE);
-
-	adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment;
-	if (waiter->type != RWSEM_WAITING_FOR_WRITE)
-		/* hit end of list above */
-		adjustment -= RWSEM_WAITING_BIAS;
-
-	if (adjustment)
-		atomic_long_add(adjustment, &sem->count);
-
-	next = sem->wait_list.next;
-	loop = woken;
-	do {
-		waiter = list_entry(next, struct rwsem_waiter, list);
-		next = waiter->list.next;
+		woken++;
 		tsk = waiter->task;
 
 		wake_q_add(wake_q, tsk);
+		list_del(&waiter->list);
 		/*
 		 * Ensure that the last operation is setting the reader
 		 * waiter to nil such that rwsem_down_read_failed() cannot
@@ -217,10 +204,16 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem,
 		 * to the task to wakeup.
 		 */
 		smp_store_release(&waiter->task, NULL);
-	} while (--loop);
+	}
 
-	sem->wait_list.next = next;
-	next->prev = &sem->wait_list;
+	adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment;
+	if (list_empty(&sem->wait_list)) {
+		/* hit end of list above */
+		adjustment -= RWSEM_WAITING_BIAS;
+	}
+
+	if (adjustment)
+		atomic_long_add(adjustment, &sem->count);
 }
 
 /*
@@ -245,7 +238,8 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
 	/* we're now waiting on the lock, but no longer actively locking */
 	count = atomic_long_add_return(adjustment, &sem->count);
 
-	/* If there are no active locks, wake the front queued process(es).
+	/*
+	 * If there are no active locks, wake the front queued process(es).
 	 *
 	 * If there are no writers and we are first in the queue,
 	 * wake our own waiter to join the existing active readers !

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [tip:locking/core] locking/rwsem: Return void in __rwsem_mark_wake()
  2016-08-05  8:04 ` [PATCH 1/3] locking/rwsem: Return void in __rwsem_mark_wake() Davidlohr Bueso
  2016-08-18 11:00   ` [tip:locking/core] " tip-bot for Davidlohr Bueso
@ 2016-08-18 13:41   ` tip-bot for Davidlohr Bueso
  1 sibling, 0 replies; 12+ messages in thread
From: tip-bot for Davidlohr Bueso @ 2016-08-18 13:41 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: torvalds, tglx, paulmck, peterz, dbueso, akpm, linux-kernel,
	dave, hpa, mingo

Commit-ID:  84b23f9b58687a11ced66cc4be9b0219e8ecab84
Gitweb:     http://git.kernel.org/tip/84b23f9b58687a11ced66cc4be9b0219e8ecab84
Author:     Davidlohr Bueso <dave@stgolabs.net>
AuthorDate: Fri, 5 Aug 2016 01:04:43 -0700
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Thu, 18 Aug 2016 15:37:03 +0200

locking/rwsem: Return void in __rwsem_mark_wake()

We currently return a rw_semaphore structure, which is the
same lock we passed to the function's argument in the first
place. While there are several functions that choose this
return value, the callers use it, for example, for things
like ERR_PTR. This is not the case for __rwsem_mark_wake(),
and in addition this function is really about the lock
waiters (which we know there are at this point), so its
somewhat odd to be returning the sem structure.

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman.Long@hp.com
Cc: dave@stgolabs.net
Cc: jason.low2@hpe.com
Cc: wanpeng.li@hotmail.com
Link: http://lkml.kernel.org/r/1470384285-32163-2-git-send-email-dave@stgolabs.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/locking/rwsem-xadd.c | 35 +++++++++++++++++------------------
 1 file changed, 17 insertions(+), 18 deletions(-)

diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index 447e08d..b036231 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -121,16 +121,17 @@ enum rwsem_wake_type {
  * - woken process blocks are discarded from the list after having task zeroed
  * - writers are only marked woken if downgrading is false
  */
-static struct rw_semaphore *
-__rwsem_mark_wake(struct rw_semaphore *sem,
-		  enum rwsem_wake_type wake_type, struct wake_q_head *wake_q)
+static void __rwsem_mark_wake(struct rw_semaphore *sem,
+			      enum rwsem_wake_type wake_type,
+			      struct wake_q_head *wake_q)
 {
 	struct rwsem_waiter *waiter;
 	struct task_struct *tsk;
 	struct list_head *next;
-	long oldcount, woken, loop, adjustment;
+	long loop, oldcount, woken = 0, adjustment = 0;
 
 	waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
+
 	if (waiter->type == RWSEM_WAITING_FOR_WRITE) {
 		if (wake_type == RWSEM_WAKE_ANY) {
 			/*
@@ -142,19 +143,19 @@ __rwsem_mark_wake(struct rw_semaphore *sem,
 			 */
 			wake_q_add(wake_q, waiter->task);
 		}
-		goto out;
+
+		return;
 	}
 
-	/* Writers might steal the lock before we grant it to the next reader.
+	/*
+	 * Writers might steal the lock before we grant it to the next reader.
 	 * We prefer to do the first reader grant before counting readers
 	 * so we can bail out early if a writer stole the lock.
 	 */
-	adjustment = 0;
 	if (wake_type != RWSEM_WAKE_READ_OWNED) {
 		adjustment = RWSEM_ACTIVE_READ_BIAS;
  try_reader_grant:
 		oldcount = atomic_long_fetch_add(adjustment, &sem->count);
-
 		if (unlikely(oldcount < RWSEM_WAITING_BIAS)) {
 			/*
 			 * If the count is still less than RWSEM_WAITING_BIAS
@@ -164,7 +165,8 @@ __rwsem_mark_wake(struct rw_semaphore *sem,
 			 */
 			if (atomic_long_add_return(-adjustment, &sem->count) <
 			    RWSEM_WAITING_BIAS)
-				goto out;
+				return;
+
 			/* Last active locker left. Retry waking readers. */
 			goto try_reader_grant;
 		}
@@ -176,11 +178,11 @@ __rwsem_mark_wake(struct rw_semaphore *sem,
 		rwsem_set_reader_owned(sem);
 	}
 
-	/* Grant an infinite number of read locks to the readers at the front
+	/*
+	 * Grant an infinite number of read locks to the readers at the front
 	 * of the queue.  Note we increment the 'active part' of the count by
 	 * the number of readers before waking any processes up.
 	 */
-	woken = 0;
 	do {
 		woken++;
 
@@ -219,9 +221,6 @@ __rwsem_mark_wake(struct rw_semaphore *sem,
 
 	sem->wait_list.next = next;
 	next->prev = &sem->wait_list;
-
- out:
-	return sem;
 }
 
 /*
@@ -255,7 +254,7 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
 	if (count == RWSEM_WAITING_BIAS ||
 	    (count > RWSEM_WAITING_BIAS &&
 	     adjustment != -RWSEM_ACTIVE_READ_BIAS))
-		sem = __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
+		__rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
 
 	raw_spin_unlock_irq(&sem->wait_lock);
 	wake_up_q(&wake_q);
@@ -505,7 +504,7 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state)
 		if (count > RWSEM_WAITING_BIAS) {
 			WAKE_Q(wake_q);
 
-			sem = __rwsem_mark_wake(sem, RWSEM_WAKE_READERS, &wake_q);
+			__rwsem_mark_wake(sem, RWSEM_WAKE_READERS, &wake_q);
 			/*
 			 * The wakeup is normally called _after_ the wait_lock
 			 * is released, but given that we are proactively waking
@@ -616,7 +615,7 @@ locked:
 
 	/* do nothing if list empty */
 	if (!list_empty(&sem->wait_list))
-		sem = __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
+		__rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
 
 	raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
 	wake_up_q(&wake_q);
@@ -640,7 +639,7 @@ struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
 
 	/* do nothing if list empty */
 	if (!list_empty(&sem->wait_list))
-		sem = __rwsem_mark_wake(sem, RWSEM_WAKE_READ_OWNED, &wake_q);
+		__rwsem_mark_wake(sem, RWSEM_WAKE_READ_OWNED, &wake_q);
 
 	raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
 	wake_up_q(&wake_q);

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [tip:locking/core] locking/rwsem: Remove a few useless comments
  2016-08-05  8:04 ` [PATCH 2/3] locking/rwsem: Remove a few useless comments Davidlohr Bueso
  2016-08-18 11:00   ` [tip:locking/core] " tip-bot for Davidlohr Bueso
@ 2016-08-18 13:42   ` tip-bot for Davidlohr Bueso
  1 sibling, 0 replies; 12+ messages in thread
From: tip-bot for Davidlohr Bueso @ 2016-08-18 13:42 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: linux-kernel, dave, paulmck, dbueso, peterz, akpm, torvalds,
	mingo, tglx, hpa

Commit-ID:  c2867bbaf5d8f1534cae15175a389c5cbf58fec1
Gitweb:     http://git.kernel.org/tip/c2867bbaf5d8f1534cae15175a389c5cbf58fec1
Author:     Davidlohr Bueso <dave@stgolabs.net>
AuthorDate: Fri, 5 Aug 2016 01:04:44 -0700
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Thu, 18 Aug 2016 15:37:07 +0200

locking/rwsem: Remove a few useless comments

Our rwsem code (xadd, at least) is rather well documented, but
there are a few really annoying comments in there that serve
no purpose and we shouldn't bother with them.

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman.Long@hp.com
Cc: dave@stgolabs.net
Cc: jason.low2@hpe.com
Cc: wanpeng.li@hotmail.com
Link: http://lkml.kernel.org/r/1470384285-32163-3-git-send-email-dave@stgolabs.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/locking/rwsem-xadd.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index b036231..e02fe32 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -234,7 +234,6 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
 	struct task_struct *tsk = current;
 	WAKE_Q(wake_q);
 
-	/* set up my own style of waitqueue */
 	waiter.task = tsk;
 	waiter.type = RWSEM_WAITING_FOR_READ;
 
@@ -613,7 +612,6 @@ struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
 	raw_spin_lock_irqsave(&sem->wait_lock, flags);
 locked:
 
-	/* do nothing if list empty */
 	if (!list_empty(&sem->wait_list))
 		__rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
 
@@ -637,7 +635,6 @@ struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
 
 	raw_spin_lock_irqsave(&sem->wait_lock, flags);
 
-	/* do nothing if list empty */
 	if (!list_empty(&sem->wait_list))
 		__rwsem_mark_wake(sem, RWSEM_WAKE_READ_OWNED, &wake_q);
 

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [tip:locking/core] locking/rwsem: Scan the wait_list for readers only once
  2016-08-05  8:04 ` [PATCH 3/3] locking/rwsem: Scan the wait_list for readers only once Davidlohr Bueso
  2016-08-06  6:05   ` Ingo Molnar
  2016-08-18 11:01   ` [tip:locking/core] " tip-bot for Davidlohr Bueso
@ 2016-08-18 13:42   ` tip-bot for Davidlohr Bueso
  2 siblings, 0 replies; 12+ messages in thread
From: tip-bot for Davidlohr Bueso @ 2016-08-18 13:42 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: dbueso, linux-kernel, torvalds, peterz, hpa, paulmck, akpm, tglx,
	mingo, dave

Commit-ID:  70800c3c0cc525baa38fd0fe4660f2c27f1bfeeb
Gitweb:     http://git.kernel.org/tip/70800c3c0cc525baa38fd0fe4660f2c27f1bfeeb
Author:     Davidlohr Bueso <dave@stgolabs.net>
AuthorDate: Fri, 5 Aug 2016 01:04:45 -0700
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Thu, 18 Aug 2016 15:37:11 +0200

locking/rwsem: Scan the wait_list for readers only once

When wanting to wakeup readers, __rwsem_mark_wakeup() currently
iterates the wait_list twice while looking to wakeup the first N
queued reader-tasks. While this can be quite inefficient, it was
there such that a awoken reader would be first and foremost
acknowledged by the lock counter.

Keeping the same logic, we can further benefit from the use of
wake_qs and avoid entirely the first wait_list iteration that sets
the counter as wake_up_process() isn't going to occur right away,
and therefore we maintain the counter->list order of going about
things.

Other than saving cycles with O(n) "scanning", this change also
nicely cleans up a good chunk of __rwsem_mark_wakeup(); both
visually and less tedious to read.

For example, the following improvements where seen on some will
it scale microbenchmarks, on a 48-core Haswell:

                                       v4.7              v4.7-rwsem-v1
  Hmean    signal1-processes-8    5792691.42 (  0.00%)  5771971.04 ( -0.36%)
  Hmean    signal1-processes-12   6081199.96 (  0.00%)  6072174.38 ( -0.15%)
  Hmean    signal1-processes-21   3071137.71 (  0.00%)  3041336.72 ( -0.97%)
  Hmean    signal1-processes-48   3712039.98 (  0.00%)  3708113.59 ( -0.11%)
  Hmean    signal1-processes-79   4464573.45 (  0.00%)  4682798.66 (  4.89%)
  Hmean    signal1-processes-110  4486842.01 (  0.00%)  4633781.71 (  3.27%)
  Hmean    signal1-processes-141  4611816.83 (  0.00%)  4692725.38 (  1.75%)
  Hmean    signal1-processes-172  4638157.05 (  0.00%)  4714387.86 (  1.64%)
  Hmean    signal1-processes-203  4465077.80 (  0.00%)  4690348.07 (  5.05%)
  Hmean    signal1-processes-224  4410433.74 (  0.00%)  4687534.43 (  6.28%)

  Stddev   signal1-processes-8       6360.47 (  0.00%)     8455.31 ( 32.94%)
  Stddev   signal1-processes-12      4004.98 (  0.00%)     9156.13 (128.62%)
  Stddev   signal1-processes-21      3273.14 (  0.00%)     5016.80 ( 53.27%)
  Stddev   signal1-processes-48     28420.25 (  0.00%)    26576.22 ( -6.49%)
  Stddev   signal1-processes-79     22038.34 (  0.00%)    18992.70 (-13.82%)
  Stddev   signal1-processes-110    23226.93 (  0.00%)    17245.79 (-25.75%)
  Stddev   signal1-processes-141     6358.98 (  0.00%)     7636.14 ( 20.08%)
  Stddev   signal1-processes-172     9523.70 (  0.00%)     4824.75 (-49.34%)
  Stddev   signal1-processes-203    13915.33 (  0.00%)     9326.33 (-32.98%)
  Stddev   signal1-processes-224    15573.94 (  0.00%)    10613.82 (-31.85%)

Other runs that saw improvements include context_switch and pipe; and
as expected, this is particularly highlighted on larger thread counts
as it becomes more expensive to walk the list twice.

No change in wakeup ordering or semantics.

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman.Long@hp.com
Cc: dave@stgolabs.net
Cc: jason.low2@hpe.com
Cc: wanpeng.li@hotmail.com
Link: http://lkml.kernel.org/r/1470384285-32163-4-git-send-email-dave@stgolabs.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/locking/rwsem-xadd.c | 58 ++++++++++++++++++++-------------------------
 1 file changed, 26 insertions(+), 32 deletions(-)

diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index e02fe32..2337b4b 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -125,12 +125,14 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem,
 			      enum rwsem_wake_type wake_type,
 			      struct wake_q_head *wake_q)
 {
-	struct rwsem_waiter *waiter;
-	struct task_struct *tsk;
-	struct list_head *next;
-	long loop, oldcount, woken = 0, adjustment = 0;
+	struct rwsem_waiter *waiter, *tmp;
+	long oldcount, woken = 0, adjustment = 0;
 
-	waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
+	/*
+	 * Take a peek at the queue head waiter such that we can determine
+	 * the wakeup(s) to perform.
+	 */
+	waiter = list_first_entry(&sem->wait_list, struct rwsem_waiter, list);
 
 	if (waiter->type == RWSEM_WAITING_FOR_WRITE) {
 		if (wake_type == RWSEM_WAKE_ANY) {
@@ -180,36 +182,21 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem,
 
 	/*
 	 * Grant an infinite number of read locks to the readers at the front
-	 * of the queue.  Note we increment the 'active part' of the count by
-	 * the number of readers before waking any processes up.
+	 * of the queue. We know that woken will be at least 1 as we accounted
+	 * for above. Note we increment the 'active part' of the count by the
+	 * number of readers before waking any processes up.
 	 */
-	do {
-		woken++;
+	list_for_each_entry_safe(waiter, tmp, &sem->wait_list, list) {
+		struct task_struct *tsk;
 
-		if (waiter->list.next == &sem->wait_list)
+		if (waiter->type == RWSEM_WAITING_FOR_WRITE)
 			break;
 
-		waiter = list_entry(waiter->list.next,
-					struct rwsem_waiter, list);
-
-	} while (waiter->type != RWSEM_WAITING_FOR_WRITE);
-
-	adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment;
-	if (waiter->type != RWSEM_WAITING_FOR_WRITE)
-		/* hit end of list above */
-		adjustment -= RWSEM_WAITING_BIAS;
-
-	if (adjustment)
-		atomic_long_add(adjustment, &sem->count);
-
-	next = sem->wait_list.next;
-	loop = woken;
-	do {
-		waiter = list_entry(next, struct rwsem_waiter, list);
-		next = waiter->list.next;
+		woken++;
 		tsk = waiter->task;
 
 		wake_q_add(wake_q, tsk);
+		list_del(&waiter->list);
 		/*
 		 * Ensure that the last operation is setting the reader
 		 * waiter to nil such that rwsem_down_read_failed() cannot
@@ -217,10 +204,16 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem,
 		 * to the task to wakeup.
 		 */
 		smp_store_release(&waiter->task, NULL);
-	} while (--loop);
+	}
 
-	sem->wait_list.next = next;
-	next->prev = &sem->wait_list;
+	adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment;
+	if (list_empty(&sem->wait_list)) {
+		/* hit end of list above */
+		adjustment -= RWSEM_WAITING_BIAS;
+	}
+
+	if (adjustment)
+		atomic_long_add(adjustment, &sem->count);
 }
 
 /*
@@ -245,7 +238,8 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
 	/* we're now waiting on the lock, but no longer actively locking */
 	count = atomic_long_add_return(adjustment, &sem->count);
 
-	/* If there are no active locks, wake the front queued process(es).
+	/*
+	 * If there are no active locks, wake the front queued process(es).
 	 *
 	 * If there are no writers and we are first in the queue,
 	 * wake our own waiter to join the existing active readers !

^ permalink raw reply related	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2016-08-18 13:45 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-08-05  8:04 [PATCH 0/3] locking/rwsem: __rwsem_mark_wake() improvements Davidlohr Bueso
2016-08-05  8:04 ` [PATCH 1/3] locking/rwsem: Return void in __rwsem_mark_wake() Davidlohr Bueso
2016-08-18 11:00   ` [tip:locking/core] " tip-bot for Davidlohr Bueso
2016-08-18 13:41   ` tip-bot for Davidlohr Bueso
2016-08-05  8:04 ` [PATCH 2/3] locking/rwsem: Remove a few useless comments Davidlohr Bueso
2016-08-18 11:00   ` [tip:locking/core] " tip-bot for Davidlohr Bueso
2016-08-18 13:42   ` tip-bot for Davidlohr Bueso
2016-08-05  8:04 ` [PATCH 3/3] locking/rwsem: Scan the wait_list for readers only once Davidlohr Bueso
2016-08-06  6:05   ` Ingo Molnar
2016-08-08 16:37     ` Davidlohr Bueso
2016-08-18 11:01   ` [tip:locking/core] " tip-bot for Davidlohr Bueso
2016-08-18 13:42   ` tip-bot for Davidlohr Bueso

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.