RCU Archive on lore.kernel.org
 help / color / Atom feed
From: paulmck@kernel.org
To: rcu@vger.kernel.org
Cc: linux-kernel@vger.kernel.org, kernel-team@fb.com,
	mingo@kernel.org, jiangshanlai@gmail.com, dipankar@in.ibm.com,
	akpm@linux-foundation.org, mathieu.desnoyers@efficios.com,
	josh@joshtriplett.org, tglx@linutronix.de, peterz@infradead.org,
	rostedt@goodmis.org, dhowells@redhat.com, edumazet@google.com,
	fweisbec@gmail.com, oleg@redhat.com, joel@joelfernandes.org,
	"Paul E. McKenney" <paulmck@kernel.org>
Subject: [PATCH tip/core/rcu 06/30] refperf: Hoist function-pointer calls out of the loop
Date: Mon, 22 Jun 2020 17:29:49 -0700
Message-ID: <20200623003013.26252-6-paulmck@kernel.org> (raw)
In-Reply-To: <20200623002941.GA26089@paulmck-ThinkPad-P72>

From: "Paul E. McKenney" <paulmck@kernel.org>

Current runs show PREEMPT=n rcu_read_lock()/rcu_read_unlock() pairs
consuming between 20 and 30 nanoseconds, when in fact the actual value is
zero, give or take the barrier() asm's effect on compiler optimizations.
The additional overhead is caused by function calls through pointers
(especially in these days of Spectre mitigations) and perhaps also
needless argument passing, a non-const loop limit, and an upcounting loop.

This commit therefore combines the ->readlock() and ->readunlock()
function pointers into a single ->readsection() function pointer that
takes the loop count as a const parameter and keeps any data passed
from the read-lock to the read-unlock internal to this new function.

These changes reduce the measured overhead of the aforementioned
PREEMPT=n rcu_read_lock()/rcu_read_unlock() pairs from between 20 and
30 nanoseconds to somewhere south of 500 picoseconds.

Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/refperf.c | 92 ++++++++++++++++++++++------------------------------
 1 file changed, 38 insertions(+), 54 deletions(-)

diff --git a/kernel/rcu/refperf.c b/kernel/rcu/refperf.c
index 4d686fd..57c7b7a 100644
--- a/kernel/rcu/refperf.c
+++ b/kernel/rcu/refperf.c
@@ -108,23 +108,20 @@ static int exp_idx;
 struct ref_perf_ops {
 	void (*init)(void);
 	void (*cleanup)(void);
-	int (*readlock)(void);
-	void (*readunlock)(int idx);
+	void (*readsection)(const int nloops);
 	const char *name;
 };
 
 static struct ref_perf_ops *cur_ops;
 
-// Definitions for RCU ref perf testing.
-static int ref_rcu_read_lock(void) __acquires(RCU)
+static void ref_rcu_read_section(const int nloops)
 {
-	rcu_read_lock();
-	return 0;
-}
+	int i;
 
-static void ref_rcu_read_unlock(int idx) __releases(RCU)
-{
-	rcu_read_unlock();
+	for (i = nloops; i >= 0; i--) {
+		rcu_read_lock();
+		rcu_read_unlock();
+	}
 }
 
 static void rcu_sync_perf_init(void)
@@ -133,8 +130,7 @@ static void rcu_sync_perf_init(void)
 
 static struct ref_perf_ops rcu_ops = {
 	.init		= rcu_sync_perf_init,
-	.readlock	= ref_rcu_read_lock,
-	.readunlock	= ref_rcu_read_unlock,
+	.readsection	= ref_rcu_read_section,
 	.name		= "rcu"
 };
 
@@ -143,42 +139,39 @@ static struct ref_perf_ops rcu_ops = {
 DEFINE_STATIC_SRCU(srcu_refctl_perf);
 static struct srcu_struct *srcu_ctlp = &srcu_refctl_perf;
 
-static int srcu_ref_perf_read_lock(void) __acquires(srcu_ctlp)
+static void srcu_ref_perf_read_section(int nloops)
 {
-	return srcu_read_lock(srcu_ctlp);
-}
+	int i;
+	int idx;
 
-static void srcu_ref_perf_read_unlock(int idx) __releases(srcu_ctlp)
-{
-	srcu_read_unlock(srcu_ctlp, idx);
+	for (i = nloops; i >= 0; i--) {
+		idx = srcu_read_lock(srcu_ctlp);
+		srcu_read_unlock(srcu_ctlp, idx);
+	}
 }
 
 static struct ref_perf_ops srcu_ops = {
 	.init		= rcu_sync_perf_init,
-	.readlock	= srcu_ref_perf_read_lock,
-	.readunlock	= srcu_ref_perf_read_unlock,
+	.readsection	= srcu_ref_perf_read_section,
 	.name		= "srcu"
 };
 
 // Definitions for reference count
 static atomic_t refcnt;
 
-static int srcu_ref_perf_refcnt_lock(void)
+static void ref_perf_refcnt_section(const int nloops)
 {
-	atomic_inc(&refcnt);
-	return 0;
-}
+	int i;
 
-static void srcu_ref_perf_refcnt_unlock(int idx) __releases(srcu_ctlp)
-{
-	atomic_dec(&refcnt);
-	srcu_read_unlock(srcu_ctlp, idx);
+	for (i = nloops; i >= 0; i--) {
+		atomic_inc(&refcnt);
+		atomic_dec(&refcnt);
+	}
 }
 
 static struct ref_perf_ops refcnt_ops = {
 	.init		= rcu_sync_perf_init,
-	.readlock	= srcu_ref_perf_refcnt_lock,
-	.readunlock	= srcu_ref_perf_refcnt_unlock,
+	.readsection	= ref_perf_refcnt_section,
 	.name		= "refcnt"
 };
 
@@ -190,21 +183,19 @@ static void ref_perf_rwlock_init(void)
 	rwlock_init(&test_rwlock);
 }
 
-static int ref_perf_rwlock_lock(void)
+static void ref_perf_rwlock_section(const int nloops)
 {
-	read_lock(&test_rwlock);
-	return 0;
-}
+	int i;
 
-static void ref_perf_rwlock_unlock(int idx)
-{
-	read_unlock(&test_rwlock);
+	for (i = nloops; i >= 0; i--) {
+		read_lock(&test_rwlock);
+		read_unlock(&test_rwlock);
+	}
 }
 
 static struct ref_perf_ops rwlock_ops = {
 	.init		= ref_perf_rwlock_init,
-	.readlock	= ref_perf_rwlock_lock,
-	.readunlock	= ref_perf_rwlock_unlock,
+	.readsection	= ref_perf_rwlock_section,
 	.name		= "rwlock"
 };
 
@@ -216,21 +207,19 @@ static void ref_perf_rwsem_init(void)
 	init_rwsem(&test_rwsem);
 }
 
-static int ref_perf_rwsem_lock(void)
+static void ref_perf_rwsem_section(const int nloops)
 {
-	down_read(&test_rwsem);
-	return 0;
-}
+	int i;
 
-static void ref_perf_rwsem_unlock(int idx)
-{
-	up_read(&test_rwsem);
+	for (i = nloops; i >= 0; i--) {
+		down_read(&test_rwsem);
+		up_read(&test_rwsem);
+	}
 }
 
 static struct ref_perf_ops rwsem_ops = {
 	.init		= ref_perf_rwsem_init,
-	.readlock	= ref_perf_rwsem_lock,
-	.readunlock	= ref_perf_rwsem_unlock,
+	.readsection	= ref_perf_rwsem_section,
 	.name		= "rwsem"
 };
 
@@ -242,8 +231,6 @@ ref_perf_reader(void *arg)
 	unsigned long flags;
 	long me = (long)arg;
 	struct reader_task *rt = &(reader_tasks[me]);
-	unsigned long spincnt;
-	int idx;
 	u64 start;
 	s64 duration;
 
@@ -275,10 +262,7 @@ ref_perf_reader(void *arg)
 
 	VERBOSE_PERFOUT("ref_perf_reader %ld: experiment %d started", me, exp_idx);
 
-	for (spincnt = 0; spincnt < loops; spincnt++) {
-		idx = cur_ops->readlock();
-		cur_ops->readunlock(idx);
-	}
+	cur_ops->readsection(loops);
 
 	duration = ktime_get_mono_fast_ns() - start;
 	local_irq_restore(flags);
-- 
2.9.5


  parent reply index

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-06-23  0:29 [PATCH tip/core/rcu 0/30] Add read-side scalability tests for v5.9 Paul E. McKenney
2020-06-23  0:29 ` [PATCH tip/core/rcu 01/30] rcuperf: Remove useless while loops around wait_event paulmck
2020-06-23  0:29 ` [PATCH tip/core/rcu 02/30] refperf: Add a test to measure performance of read-side synchronization paulmck
2020-06-23  0:29 ` [PATCH tip/core/rcu 03/30] rcuperf: Add comments explaining the high reader overhead paulmck
2020-06-23  0:29 ` [PATCH tip/core/rcu 04/30] torture: Add refperf to the rcutorture scripting paulmck
2020-06-23  0:29 ` [PATCH tip/core/rcu 05/30] refperf: Add holdoff parameter to allow CPUs to come online paulmck
2020-06-23  0:29 ` paulmck [this message]
2020-06-23  0:29 ` [PATCH tip/core/rcu 07/30] refperf: Allow decimal nanoseconds paulmck
2020-06-23  0:29 ` [PATCH tip/core/rcu 08/30] refperf: Convert nreaders to a module parameter paulmck
2020-06-23  0:29 ` [PATCH tip/core/rcu 09/30] refperf: Provide module parameter to specify number of experiments paulmck
2020-06-23  0:29 ` [PATCH tip/core/rcu 10/30] refperf: Dynamically allocate experiment-summary output buffer paulmck
2020-06-23  0:29 ` [PATCH tip/core/rcu 11/30] refperf: Dynamically allocate thread-summary " paulmck
2020-06-23  0:29 ` [PATCH tip/core/rcu 12/30] refperf: Make functions static paulmck
2020-06-23  0:29 ` [PATCH tip/core/rcu 13/30] refperf: Tune reader measurement interval paulmck
2020-06-23  0:29 ` [PATCH tip/core/rcu 14/30] refperf: Convert reader_task structure's "start" field to int paulmck
2020-06-23  0:29 ` [PATCH tip/core/rcu 15/30] refperf: More closely synchronize reader start times paulmck
2020-06-23  0:29 ` [PATCH tip/core/rcu 16/30] refperf: Add warmup and cooldown processing phases paulmck
2020-06-23  0:30 ` [PATCH tip/core/rcu 17/30] refperf: Label experiment-number column "Runs" paulmck
2020-06-23  0:30 ` [PATCH tip/core/rcu 18/30] refperf: Output per-experiment data points paulmck
2020-06-23  0:30 ` [PATCH tip/core/rcu 19/30] refperf: Simplify initialization-time wakeup protocol paulmck
2020-06-23  0:30 ` [PATCH tip/core/rcu 20/30] refperf: Add read-side delay module parameter paulmck
2020-06-23  0:30 ` [PATCH tip/core/rcu 21/30] refperf: Adjust refperf.loop default value paulmck
2020-06-23  0:30 ` [PATCH tip/core/rcu 22/30] doc: Document rcuperf's module parameters paulmck
2020-06-23  0:30 ` [PATCH tip/core/rcu 23/30] refperf: Work around 64-bit division paulmck
2020-06-23  0:30 ` [PATCH tip/core/rcu 24/30] refperf: Change readdelay module parameter to nanoseconds paulmck
2020-06-23  0:30 ` [PATCH tip/core/rcu 25/30] refperf: Add test for RCU Tasks Trace readers paulmck
2020-06-23  0:30 ` [PATCH tip/core/rcu 26/30] refperf: Add test for RCU Tasks readers paulmck
2020-06-23  0:30 ` [PATCH tip/core/rcu 27/30] rcu-tasks: Fix synchronize_rcu_tasks_trace() header comment paulmck
2020-06-23  0:30 ` [PATCH tip/core/rcu 28/30] refperf: Rename RCU_REF_PERF_TEST to RCU_REF_SCALE_TEST paulmck
2020-06-23  0:30 ` [PATCH tip/core/rcu 29/30] refperf: Rename refperf.c to refscale.c and change internal names paulmck
2020-06-23  0:30 ` [PATCH tip/core/rcu 30/30] refscale: Change --torture type from refperf to refscale paulmck

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200623003013.26252-6-paulmck@kernel.org \
    --to=paulmck@kernel.org \
    --cc=akpm@linux-foundation.org \
    --cc=dhowells@redhat.com \
    --cc=dipankar@in.ibm.com \
    --cc=edumazet@google.com \
    --cc=fweisbec@gmail.com \
    --cc=jiangshanlai@gmail.com \
    --cc=joel@joelfernandes.org \
    --cc=josh@joshtriplett.org \
    --cc=kernel-team@fb.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mathieu.desnoyers@efficios.com \
    --cc=mingo@kernel.org \
    --cc=oleg@redhat.com \
    --cc=peterz@infradead.org \
    --cc=rcu@vger.kernel.org \
    --cc=rostedt@goodmis.org \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

RCU Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/rcu/0 rcu/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 rcu rcu/ https://lore.kernel.org/rcu \
		rcu@vger.kernel.org
	public-inbox-index rcu

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.rcu


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git