linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 6/7] mmap_lock: increment histogram whenever mmap_lock is acquired
@ 2020-05-28 23:53 Axel Rasmussen
  0 siblings, 0 replies; only message in thread
From: Axel Rasmussen @ 2020-05-28 23:53 UTC (permalink / raw)
  To: Andrew Morton, David Rientjes, Davidlohr Bueso, Ingo Molnar,
	Ingo Molnar, Jerome Glisse, Laurent Dufour, Liam R . Howlett,
	Matthew Wilcox, Michel Lespinasse, Peter Zijlstra,
	Vlastimil Babka, Will Deacon
  Cc: linux-fsdevel, linux-kernel, linux-mm, AKASHI Takahiro,
	Aleksa Sarai, Alexander Potapenko, Alexey Dobriyan, Al Viro,
	Andrei Vagin, Ard Biesheuvel, Brendan Higgins, chenqiwu,
	Christian Brauner, Christian Kellner, Corentin Labbe,
	Daniel Jordan, Dan Williams, David Gow, David S. Miller,
	Dmitry V. Levin, Eric W. Biederman, Eugene Syromiatnikov,
	Jamie Liu, Jason Gunthorpe, John Garry, John Hubbard,
	Jonathan Adams, Junaid Shahid, Kees Cook, Kirill A. Shutemov,
	Konstantin Khlebnikov, Krzysztof Kozlowski, Mark Rutland,
	Masahiro Yamada, Masami Hiramatsu, Mathieu Desnoyers,
	Michal Hocko, Mikhail Zaslonko, Petr Mladek, Ralph Campbell,
	Randy Dunlap, Roman Gushchin, Shakeel Butt, Steven Rostedt,
	Tal Gilboa, Thomas Gleixner, Uwe Kleine-König,
	Vincenzo Frascino, Yang Shi, Yu Zhao, Axel Rasmussen

To minimize overhead, we call down_{read,write}_trylock first. On
success, record the acquisition with "0" latency. On failure, we call
e.g. down_{read,write} and record the actual measured latency.

The reason for recording uncontended acquisitions is so we have an
accurate count of total acquisition attempts. This allows us to compute
meaningful percentiles over the histogram data - e.g., "x% of lock
acquisitions succeeded in <= y ms".

Note that in e.g. the killable lock case, we record the latency even if
the acquisition doesn't actually succeed. This is so we can see cases
where we waited a long time, even if acquisition eventually ended up
failing.

Nested locks are a weird case, in that we can't do the "only compute
latency in the contended case" optimization without reaching into
rwsem.c's internal API. The approach this commit uses (removing "static
inline" from the couple of functions we need, and forward declaring them
in mmap_lock.c) was chosen instead of:

- Adding mmap_lock specific code to rwsem.c directly, as this would
  pollute a generic file with a specific use case's implementation.

- Moving the API internals we need up into rwsem.h, as this would expose
  them more widely than arguably we'd like.

- Just computing latency in all cases, as this would add nontrivial
  overhead to the uncontended lock case.

Signed-off-by: Axel Rasmussen <axelrasmussen@google.com>
---
 include/linux/mmap_lock.h | 87 +++++++++++++++++++++++++++++++++++----
 kernel/locking/rwsem.c    |  4 +-
 mm/Makefile               |  1 +
 mm/mmap_lock.c            | 34 +++++++++++++++
 4 files changed, 117 insertions(+), 9 deletions(-)
 create mode 100644 mm/mmap_lock.c

diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h
index 9dc632add390..1c212a403911 100644
--- a/include/linux/mmap_lock.h
+++ b/include/linux/mmap_lock.h
@@ -1,11 +1,80 @@
 #ifndef _LINUX_MMAP_LOCK_H
 #define _LINUX_MMAP_LOCK_H
 
+#include <linux/histogram.h>
+#include <linux/mm_types.h>
 #include <linux/mmdebug.h>
+#include <linux/types.h>
+#include <linux/sched/clock.h>
 
 #define MMAP_LOCK_INITIALIZER(name) \
 	.mmap_lock = __RWSEM_INITIALIZER(name.mmap_lock),
 
+#ifdef CONFIG_MMAP_LOCK_HISTOGRAMS
+static inline void __mmap_lock_histogram_record_duration(struct mm_struct *mm,
+							 u64 duration)
+{
+	if (likely(mm->mmap_lock_contention))
+		histogram_record_rcu(mm->mmap_lock_contention, duration, 1);
+}
+
+static inline void mmap_lock_histogram_record(struct mm_struct *mm,
+					      u64 start_time_ns)
+{
+	__mmap_lock_histogram_record_duration(mm,
+					      sched_clock() - start_time_ns);
+}
+#endif
+
+static inline bool __mmap_trylock(struct mm_struct *mm,
+				  int (*trylock)(struct rw_semaphore *))
+{
+	bool ret = trylock(&mm->mmap_lock) != 0;
+
+#ifdef CONFIG_MMAP_LOCK_HISTOGRAMS
+	if (ret)
+		__mmap_lock_histogram_record_duration(mm, 0);
+#endif
+	return ret;
+}
+
+static inline void __mmap_lock(struct mm_struct *mm,
+			       int (*trylock)(struct rw_semaphore *),
+			       void (*lock)(struct rw_semaphore *))
+{
+#ifdef CONFIG_MMAP_LOCK_HISTOGRAMS
+	u64 start_time_ns;
+
+	if (!__mmap_trylock(mm, trylock)) {
+		start_time_ns = sched_clock();
+		lock(&mm->mmap_lock);
+		mmap_lock_histogram_record(mm, start_time_ns);
+	}
+#else
+	lock(&mm->mmap_lock);
+#endif
+}
+
+static inline int __mmap_lock_return(struct mm_struct *mm,
+				     int (*trylock)(struct rw_semaphore *),
+				     int (*lock)(struct rw_semaphore *))
+{
+#ifdef CONFIG_MMAP_LOCK_HISTOGRAMS
+	u64 start_time_ns;
+	int ret;
+
+	if (!__mmap_trylock(mm, trylock)) {
+		start_time_ns = sched_clock();
+		ret = lock(&mm->mmap_lock);
+		mmap_lock_histogram_record(mm, start_time_ns);
+		return ret;
+	}
+	return 0;
+#else
+	return lock(&mm->mmap_lock);
+#endif
+}
+
 static inline void mmap_init_lock(struct mm_struct *mm)
 {
 	init_rwsem(&mm->mmap_lock);
@@ -13,22 +82,26 @@ static inline void mmap_init_lock(struct mm_struct *mm)
 
 static inline void mmap_write_lock(struct mm_struct *mm)
 {
-	down_write(&mm->mmap_lock);
+	__mmap_lock(mm, down_write_trylock, down_write);
 }
 
+#ifdef CONFIG_MMAP_LOCK_HISTOGRAMS
+void mmap_write_lock_nested(struct mm_struct *mm, int subclass);
+#else
 static inline void mmap_write_lock_nested(struct mm_struct *mm, int subclass)
 {
 	down_write_nested(&mm->mmap_lock, subclass);
 }
+#endif
 
 static inline int mmap_write_lock_killable(struct mm_struct *mm)
 {
-	return down_write_killable(&mm->mmap_lock);
+	return __mmap_lock_return(mm, down_write_trylock, down_write_killable);
 }
 
 static inline bool mmap_write_trylock(struct mm_struct *mm)
 {
-	return down_write_trylock(&mm->mmap_lock) != 0;
+	return __mmap_trylock(mm, down_write_trylock);
 }
 
 static inline void mmap_write_unlock(struct mm_struct *mm)
@@ -43,17 +116,17 @@ static inline void mmap_write_downgrade(struct mm_struct *mm)
 
 static inline void mmap_read_lock(struct mm_struct *mm)
 {
-	down_read(&mm->mmap_lock);
+	__mmap_lock(mm, down_read_trylock, down_read);
 }
 
 static inline int mmap_read_lock_killable(struct mm_struct *mm)
 {
-	return down_read_killable(&mm->mmap_lock);
+	return __mmap_lock_return(mm, down_read_trylock, down_read_killable);
 }
 
 static inline bool mmap_read_trylock(struct mm_struct *mm)
 {
-	return down_read_trylock(&mm->mmap_lock) != 0;
+	return __mmap_trylock(mm, down_read_trylock);
 }
 
 static inline void mmap_read_unlock(struct mm_struct *mm)
@@ -63,7 +136,7 @@ static inline void mmap_read_unlock(struct mm_struct *mm)
 
 static inline bool mmap_read_trylock_non_owner(struct mm_struct *mm)
 {
-	if (down_read_trylock(&mm->mmap_lock)) {
+	if (__mmap_trylock(mm, down_read_trylock)) {
 		rwsem_release(&mm->mmap_lock.dep_map, _RET_IP_);
 		return true;
 	}
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index f11b9bd3431d..d041c5ae8b4d 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -1380,7 +1380,7 @@ static inline int __down_read_trylock(struct rw_semaphore *sem)
 /*
  * lock for writing
  */
-static inline void __down_write(struct rw_semaphore *sem)
+void __down_write(struct rw_semaphore *sem)
 {
 	long tmp = RWSEM_UNLOCKED_VALUE;
 
@@ -1405,7 +1405,7 @@ static inline int __down_write_killable(struct rw_semaphore *sem)
 	return 0;
 }
 
-static inline int __down_write_trylock(struct rw_semaphore *sem)
+int __down_write_trylock(struct rw_semaphore *sem)
 {
 	long tmp;
 
diff --git a/mm/Makefile b/mm/Makefile
index fccd3756b25f..2a54af3fc715 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -112,3 +112,4 @@ obj-$(CONFIG_MEMFD_CREATE) += memfd.o
 obj-$(CONFIG_MAPPING_DIRTY_HELPERS) += mapping_dirty_helpers.o
 obj-$(CONFIG_PTDUMP_CORE) += ptdump.o
 obj-$(CONFIG_PAGE_REPORTING) += page_reporting.o
+obj-$(CONFIG_MMAP_LOCK_HISTOGRAMS) += mmap_lock.o
diff --git a/mm/mmap_lock.c b/mm/mmap_lock.c
new file mode 100644
index 000000000000..f166cf40c60a
--- /dev/null
+++ b/mm/mmap_lock.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/kernel.h>
+#include <linux/lockdep.h>
+#include <linux/mmap_lock.h>
+
+#define MMAP_LOCK_CONTENDED(_mm, try, lock)                                    \
+	do {                                                                   \
+		if (!try(&(_mm)->mmap_lock)) {                                 \
+			u64 start_time_ns;                                     \
+			lock_contended(&(_mm)->mmap_lock.dep_map, _RET_IP_);   \
+			start_time_ns = sched_clock();                         \
+			lock(&(_mm)->mmap_lock);                               \
+			mmap_lock_histogram_record(_mm, start_time_ns);        \
+		} else {                                                       \
+			__mmap_lock_histogram_record_duration(_mm, 0);         \
+		}                                                              \
+		lock_acquired(&(_mm)->mmap_lock.dep_map, _RET_IP_);            \
+	} while (0)
+
+/* Defined in kernel/locking/rwsem.c. */
+extern int __down_write_trylock(struct rw_semaphore *sem);
+extern void __down_write(struct rw_semaphore *sem);
+
+void mmap_write_lock_nested(struct mm_struct *mm, int subclass)
+{
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	might_sleep();
+	rwsem_acquire(&mm->mmap_lock.dep_map, subclass, 0, _RET_IP_);
+	MMAP_LOCK_CONTENDED(mm, __down_write_trylock, __down_write);
+#else
+	mmap_write_lock(mm);
+#endif
+}
+EXPORT_SYMBOL(mmap_write_lock_nested);
-- 
2.27.0.rc0.183.gde8f92d652-goog



^ permalink raw reply related	[flat|nested] only message in thread

only message in thread, other threads:[~2020-05-28 23:53 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-05-28 23:53 [PATCH v2 6/7] mmap_lock: increment histogram whenever mmap_lock is acquired Axel Rasmussen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).