linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Namhyung Kim <namhyung@kernel.org>
To: Peter Zijlstra <peterz@infradead.org>,
	Ingo Molnar <mingo@kernel.org>, Will Deacon <will@kernel.org>,
	Waiman Long <longman@redhat.com>,
	Boqun Feng <boqun.feng@gmail.com>
Cc: LKML <linux-kernel@vger.kernel.org>,
	Thomas Gleixner <tglx@linutronix.de>,
	Steven Rostedt <rostedt@goodmis.org>,
	Byungchul Park <byungchul.park@lge.com>,
	"Paul E. McKenney" <paul.mckenney@linaro.org>,
	Mathieu Desnoyers <mathieu.desnoyers@efficios.com>,
	Radoslaw Burny <rburny@google.com>
Subject: [PATCH 11/12] locking/mutex: Revive fast functions for CONFIG_LOCK_TRACEPOINTS
Date: Tue,  8 Feb 2022 10:42:07 -0800	[thread overview]
Message-ID: <20220208184208.79303-12-namhyung@kernel.org> (raw)
In-Reply-To: <20220208184208.79303-1-namhyung@kernel.org>

The CONFIG_LOCK_TRACEPOINTS used the same path as CONFIG_DEBUG_ALLOC
or CONFIG_LOCKDEP but it caused performance impact on mutex as it
removes _fast variants at the beginning.

I'm not entirely sure why it removed the fast versions when lockdep is
on.  It seems easy to add the required annotation when the fast
version is succeeded as far as the tracpoints are concerned.

It reduces around 2% of elapsed time when I ran a micro-benchmark.  It
was `perf bench sched messaging -p -l 1000000` and it had a pretty low
lock contention rate (under 1% of total lock acquisition).  So the
improvement should come from the fast path.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 include/linux/mutex.h  |  2 +-
 kernel/locking/mutex.c | 30 ++++++++++++++++++++----------
 2 files changed, 21 insertions(+), 11 deletions(-)

diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index b2d018250a41..49bde305453e 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -172,7 +172,7 @@ do {							\
  * See kernel/locking/mutex.c for detailed documentation of these APIs.
  * Also see Documentation/locking/mutex-design.rst.
  */
-#ifdef CONFIG_LOCK_INFO
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
 extern void mutex_lock_nested(struct mutex *lock, unsigned int subclass);
 extern void _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest_lock);
 
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 8733b96ce20a..f8bc4ae312a0 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -149,7 +149,7 @@ static inline bool __mutex_trylock(struct mutex *lock)
 	return !__mutex_trylock_common(lock, false);
 }
 
-#ifndef CONFIG_LOCK_INFO
+#ifndef CONFIG_DEBUG_LOCK_ALLOC
 /*
  * Lockdep annotations are contained to the slow paths for simplicity.
  * There is nothing that would stop spreading the lockdep annotations outwards
@@ -245,7 +245,7 @@ static void __mutex_handoff(struct mutex *lock, struct task_struct *task)
 	}
 }
 
-#ifndef CONFIG_LOCK_INFO
+#ifndef CONFIG_DEBUG_LOCK_ALLOC
 /*
  * We split the mutex lock/unlock logic into separate fastpath and
  * slowpath functions, to reduce the register pressure on the fastpath.
@@ -281,6 +281,8 @@ void __sched mutex_lock(struct mutex *lock)
 
 	if (!__mutex_trylock_fast(lock))
 		__mutex_lock_slowpath(lock);
+	else
+		mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
 }
 EXPORT_SYMBOL(mutex_lock);
 #endif
@@ -533,9 +535,11 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne
  */
 void __sched mutex_unlock(struct mutex *lock)
 {
-#ifndef CONFIG_LOCK_INFO
-	if (__mutex_unlock_fast(lock))
+#ifndef CONFIG_DEBUG_LOCK_ALLOC
+	if (__mutex_unlock_fast(lock)) {
+		mutex_release(&lock->dep_map, _RET_IP_);
 		return;
+	}
 #endif
 	__mutex_unlock_slowpath(lock, _RET_IP_);
 }
@@ -591,7 +595,7 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
 		if (ww_ctx->acquired == 0)
 			ww_ctx->wounded = 0;
 
-#ifdef CONFIG_LOCK_INFO
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
 		nest_lock = &ww_ctx->dep_map;
 #endif
 	}
@@ -778,7 +782,7 @@ int ww_mutex_trylock(struct ww_mutex *ww, struct ww_acquire_ctx *ww_ctx)
 }
 EXPORT_SYMBOL(ww_mutex_trylock);
 
-#ifdef CONFIG_LOCK_INFO
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
 void __sched
 mutex_lock_nested(struct mutex *lock, unsigned int subclass)
 {
@@ -937,7 +941,7 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne
 	wake_up_q(&wake_q);
 }
 
-#ifndef CONFIG_LOCK_INFO
+#ifndef CONFIG_DEBUG_LOCK_ALLOC
 /*
  * Here come the less common (and hence less performance-critical) APIs:
  * mutex_lock_interruptible() and mutex_trylock().
@@ -964,8 +968,10 @@ int __sched mutex_lock_interruptible(struct mutex *lock)
 {
 	might_sleep();
 
-	if (__mutex_trylock_fast(lock))
+	if (__mutex_trylock_fast(lock)) {
+		mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
 		return 0;
+	}
 
 	return __mutex_lock_interruptible_slowpath(lock);
 }
@@ -988,8 +994,10 @@ int __sched mutex_lock_killable(struct mutex *lock)
 {
 	might_sleep();
 
-	if (__mutex_trylock_fast(lock))
+	if (__mutex_trylock_fast(lock)) {
+		mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
 		return 0;
+	}
 
 	return __mutex_lock_killable_slowpath(lock);
 }
@@ -1078,7 +1086,7 @@ int __sched mutex_trylock(struct mutex *lock)
 }
 EXPORT_SYMBOL(mutex_trylock);
 
-#ifndef CONFIG_LOCK_INFO
+#ifndef CONFIG_DEBUG_LOCK_ALLOC
 int __sched
 ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
 {
@@ -1087,6 +1095,7 @@ ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
 	if (__mutex_trylock_fast(&lock->base)) {
 		if (ctx)
 			ww_mutex_set_context_fastpath(lock, ctx);
+		mutex_acquire(&lock->base.dep_map, 0, 0, _RET_IP_);
 		return 0;
 	}
 
@@ -1102,6 +1111,7 @@ ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
 	if (__mutex_trylock_fast(&lock->base)) {
 		if (ctx)
 			ww_mutex_set_context_fastpath(lock, ctx);
+		mutex_acquire(&lock->base.dep_map, 0, 0, _RET_IP_);
 		return 0;
 	}
 
-- 
2.35.0.263.gb82422642f-goog


  parent reply	other threads:[~2022-02-08 18:43 UTC|newest]

Thread overview: 54+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-02-08 18:41 [RFC 00/12] locking: Separate lock tracepoints from lockdep/lock_stat (v1) Namhyung Kim
2022-02-08 18:41 ` [PATCH 01/12] locking: Pass correct outer wait type info Namhyung Kim
2022-02-08 18:41 ` [PATCH 02/12] cgroup: rstat: Make cgroup_rstat_cpu_lock name readable Namhyung Kim
2022-02-08 18:46   ` Tejun Heo
2022-02-08 19:16     ` Namhyung Kim
2022-02-08 23:51       ` Namhyung Kim
2022-02-08 18:41 ` [PATCH 03/12] timer: Protect lockdep functions with #ifdef Namhyung Kim
2022-02-08 19:36   ` Steven Rostedt
2022-02-08 20:29     ` Namhyung Kim
2022-02-08 21:19       ` Steven Rostedt
2022-02-08 18:42 ` [PATCH 04/12] workqueue: " Namhyung Kim
2022-02-08 18:48   ` Tejun Heo
2022-02-08 19:17     ` Namhyung Kim
2022-02-08 19:38   ` Steven Rostedt
2022-02-08 18:42 ` [PATCH 05/12] drm/i915: " Namhyung Kim
2022-02-08 18:51   ` Jani Nikula
2022-02-08 19:22     ` Namhyung Kim
2022-02-09 13:49       ` Jani Nikula
2022-02-09 16:27         ` Steven Rostedt
2022-02-09 19:28           ` Namhyung Kim
2022-02-08 18:42 ` [PATCH 06/12] btrfs: change lockdep class size check using ks->names Namhyung Kim
2022-02-08 19:03   ` David Sterba
2022-02-08 18:42 ` [PATCH 07/12] locking: Introduce CONFIG_LOCK_INFO Namhyung Kim
2022-02-08 18:42 ` [PATCH 08/12] locking/mutex: Init name properly w/ CONFIG_LOCK_INFO Namhyung Kim
2022-02-08 18:42 ` [PATCH 09/12] locking: Add more static lockdep init macros Namhyung Kim
2022-02-08 18:42 ` [PATCH 10/12] locking: Add CONFIG_LOCK_TRACEPOINTS option Namhyung Kim
2022-02-08 18:42 ` Namhyung Kim [this message]
2022-02-09  8:40   ` [PATCH 11/12] locking/mutex: Revive fast functions for CONFIG_LOCK_TRACEPOINTS Peter Zijlstra
2022-02-09 20:15     ` Namhyung Kim
2022-02-08 18:42 ` [PATCH 12/12] locking: Move lock_acquired() from the fast path Namhyung Kim
2022-02-08 19:14 ` [RFC 00/12] locking: Separate lock tracepoints from lockdep/lock_stat (v1) Namhyung Kim
2022-02-09  9:09 ` Peter Zijlstra
2022-02-09 18:19   ` Waiman Long
2022-02-09 18:29     ` Mathieu Desnoyers
2022-02-09 19:02       ` Waiman Long
2022-02-09 19:17         ` Mathieu Desnoyers
2022-02-09 19:37           ` Waiman Long
2022-02-09 19:22         ` Namhyung Kim
2022-02-09 19:28           ` Mathieu Desnoyers
2022-02-09 19:45             ` Namhyung Kim
2022-02-09 19:56               ` Mathieu Desnoyers
2022-02-09 20:17               ` Waiman Long
2022-02-10  0:27                 ` Namhyung Kim
2022-02-10  2:12                   ` Waiman Long
2022-02-10  9:33                 ` Peter Zijlstra
2022-02-10  0:32   ` Namhyung Kim
2022-02-10  9:13     ` Peter Zijlstra
2022-02-10 19:14       ` Paul E. McKenney
2022-02-10 19:27         ` Waiman Long
2022-02-10 20:10           ` Paul E. McKenney
2022-02-11  5:57             ` Namhyung Kim
2022-02-11  5:55       ` Namhyung Kim
2022-02-11 10:39         ` Peter Zijlstra
2022-02-08 19:43 [RFC RESEND 00/12] locking: Separate lock tracepoints from lockdep/lock_stat (v1.1) Namhyung Kim
2022-02-08 19:43 ` [PATCH 11/12] locking/mutex: Revive fast functions for CONFIG_LOCK_TRACEPOINTS Namhyung Kim

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220208184208.79303-12-namhyung@kernel.org \
    --to=namhyung@kernel.org \
    --cc=boqun.feng@gmail.com \
    --cc=byungchul.park@lge.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=longman@redhat.com \
    --cc=mathieu.desnoyers@efficios.com \
    --cc=mingo@kernel.org \
    --cc=paul.mckenney@linaro.org \
    --cc=peterz@infradead.org \
    --cc=rburny@google.com \
    --cc=rostedt@goodmis.org \
    --cc=tglx@linutronix.de \
    --cc=will@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).