linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] mmap_lock: add tracepoints around lock acquisition
@ 2020-09-17 18:13 Axel Rasmussen
  2020-09-17 19:42 ` Steven Rostedt
  2020-09-21  4:57 ` Yafang Shao
  0 siblings, 2 replies; 14+ messages in thread
From: Axel Rasmussen @ 2020-09-17 18:13 UTC (permalink / raw)
  To: Steven Rostedt, Ingo Molnar, Andrew Morton, Vlastimil Babka,
	Michel Lespinasse, Daniel Jordan, Davidlohr Bueso
  Cc: Yafang Shao, linux-kernel, linux-mm, Axel Rasmussen

The goal of these tracepoints is to be able to debug lock contention
issues. This lock is acquired on most (all?) mmap / munmap / page fault
operations, so a multi-threaded process which does a lot of these can
experience significant contention.

We trace just before we start acquisition, when the acquisition returns
(whether it succeeded or not), and when the lock is released (or
downgraded). The events are broken out by lock type (read / write).

The events are also broken out by memcg path. For container-based
workloads, users often think of several processes in a memcg as a single
logical "task", so collecting statistics at this level is useful.

These events *do not* include latency bucket information, which means
for a proper latency histogram users will need to use BPF instead of
event histograms. The benefit we get from this is simpler code.

This patch is a no-op if the Kconfig option is not enabled. If it is,
tracepoints are still disabled by default (configurable at runtime);
the only fixed cost here is un-inlining a few functions. As best as
I've been able to measure, the overhead this introduces is a small
fraction of 1%. Actually hooking up the tracepoints to BPF introduces
additional overhead, depending on exactly what the BPF program is
collecting.
---
 include/linux/mmap_lock.h        |  28 +++-
 include/trace/events/mmap_lock.h |  73 ++++++++++
 mm/Kconfig                       |  17 +++
 mm/Makefile                      |   1 +
 mm/mmap_lock.c                   | 224 +++++++++++++++++++++++++++++++
 5 files changed, 342 insertions(+), 1 deletion(-)
 create mode 100644 include/trace/events/mmap_lock.h
 create mode 100644 mm/mmap_lock.c

diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h
index 0707671851a8..d12aa2ff6c05 100644
--- a/include/linux/mmap_lock.h
+++ b/include/linux/mmap_lock.h
@@ -1,11 +1,35 @@
 #ifndef _LINUX_MMAP_LOCK_H
 #define _LINUX_MMAP_LOCK_H
 
+#include <linux/lockdep.h>
+#include <linux/mm_types.h>
 #include <linux/mmdebug.h>
+#include <linux/rwsem.h>
+#include <linux/types.h>
 
 #define MMAP_LOCK_INITIALIZER(name) \
 	.mmap_lock = __RWSEM_INITIALIZER((name).mmap_lock),
 
+#ifdef CONFIG_MMAP_LOCK_STATS
+
+void mmap_init_lock(struct mm_struct *mm);
+void mmap_write_lock(struct mm_struct *mm);
+void mmap_write_lock_nested(struct mm_struct *mm, int subclass);
+int mmap_write_lock_killable(struct mm_struct *mm);
+bool mmap_write_trylock(struct mm_struct *mm);
+void mmap_write_unlock(struct mm_struct *mm);
+void mmap_write_downgrade(struct mm_struct *mm);
+void mmap_read_lock(struct mm_struct *mm);
+int mmap_read_lock_killable(struct mm_struct *mm);
+bool mmap_read_trylock(struct mm_struct *mm);
+void mmap_read_unlock(struct mm_struct *mm);
+bool mmap_read_trylock_non_owner(struct mm_struct *mm);
+void mmap_read_unlock_non_owner(struct mm_struct *mm);
+void mmap_assert_locked(struct mm_struct *mm);
+void mmap_assert_write_locked(struct mm_struct *mm);
+
+#else /* !CONFIG_MMAP_LOCK_STATS */
+
 static inline void mmap_init_lock(struct mm_struct *mm)
 {
 	init_rwsem(&mm->mmap_lock);
@@ -63,7 +87,7 @@ static inline void mmap_read_unlock(struct mm_struct *mm)
 
 static inline bool mmap_read_trylock_non_owner(struct mm_struct *mm)
 {
-	if (down_read_trylock(&mm->mmap_lock)) {
+	if (mmap_read_trylock(mm)) {
 		rwsem_release(&mm->mmap_lock.dep_map, _RET_IP_);
 		return true;
 	}
@@ -87,4 +111,6 @@ static inline void mmap_assert_write_locked(struct mm_struct *mm)
 	VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_lock), mm);
 }
 
+#endif /* CONFIG_MMAP_LOCK_STATS */
+
 #endif /* _LINUX_MMAP_LOCK_H */
diff --git a/include/trace/events/mmap_lock.h b/include/trace/events/mmap_lock.h
new file mode 100644
index 000000000000..549c662e6ed8
--- /dev/null
+++ b/include/trace/events/mmap_lock.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mmap_lock
+
+#if !defined(_TRACE_MMAP_LOCK_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_MMAP_LOCK_H
+
+#include <linux/tracepoint.h>
+#include <linux/types.h>
+
+struct mm_struct;
+
+DECLARE_EVENT_CLASS(
+	mmap_lock_template,
+
+	TP_PROTO(struct mm_struct *mm, const char *memcg_path, u64 duration,
+		bool write, bool success),
+
+	TP_ARGS(mm, memcg_path, duration, write, success),
+
+	TP_STRUCT__entry(
+		__field(struct mm_struct *, mm)
+		__string(memcg_path, memcg_path)
+		__field(u64, duration)
+		__field(bool, write)
+		__field(bool, success)
+	),
+
+	TP_fast_assign(
+		__entry->mm = mm;
+		__assign_str(memcg_path, memcg_path);
+		__entry->duration = duration;
+		__entry->write = write;
+		__entry->success = success;
+	),
+
+	TP_printk(
+		"mm=%p memcg_path=%s duration=%llu write=%s success=%s\n",
+		__entry->mm,
+		__get_str(memcg_path),
+		__entry->duration,
+		__entry->write ? "true" : "false",
+		__entry->success ? "true" : "false")
+	);
+
+DEFINE_EVENT(mmap_lock_template, mmap_lock_start_locking,
+
+	TP_PROTO(struct mm_struct *mm, const char *memcg_path, u64 duration,
+		bool write, bool success),
+
+	TP_ARGS(mm, memcg_path, duration, write, success)
+);
+
+DEFINE_EVENT(mmap_lock_template, mmap_lock_acquire_returned,
+
+	TP_PROTO(struct mm_struct *mm, const char *memcg_path, u64 duration,
+		bool write, bool success),
+
+	TP_ARGS(mm, memcg_path, duration, write, success)
+);
+
+DEFINE_EVENT(mmap_lock_template, mmap_lock_released,
+
+	TP_PROTO(struct mm_struct *mm, const char *memcg_path, u64 duration,
+		bool write, bool success),
+
+	TP_ARGS(mm, memcg_path, duration, write, success)
+);
+
+#endif /* _TRACE_MMAP_LOCK_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/mm/Kconfig b/mm/Kconfig
index 6c974888f86f..b602df8bcee0 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -818,6 +818,23 @@ config DEVICE_PRIVATE
 config FRAME_VECTOR
 	bool
 
+config MMAP_LOCK_STATS
+	bool "mmap_lock stats / instrumentation"
+	select HISTOGRAM
+	default n
+
+	help
+	  Enables tracepoints around mmap_lock (start aquiring, acquire
+	  returned, and released), which are off by default + controlled at
+	  runtime. These can be used for deeper debugging of contention
+	  issues, via e.g. BPF.
+
+	  This option has a small (small fraction of 1%) fixed overhead
+	  even if tracepoints aren't actually in use at runtime, since it
+	  requires un-inlining some functions.
+
+	  If unsure, say "n".
+
 config ARCH_USES_HIGH_VMA_FLAGS
 	bool
 config ARCH_HAS_PKEYS
diff --git a/mm/Makefile b/mm/Makefile
index d5649f1c12c0..eb6ed855a002 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -121,3 +121,4 @@ obj-$(CONFIG_MEMFD_CREATE) += memfd.o
 obj-$(CONFIG_MAPPING_DIRTY_HELPERS) += mapping_dirty_helpers.o
 obj-$(CONFIG_PTDUMP_CORE) += ptdump.o
 obj-$(CONFIG_PAGE_REPORTING) += page_reporting.o
+obj-$(CONFIG_MMAP_LOCK_STATS) += mmap_lock.o
diff --git a/mm/mmap_lock.c b/mm/mmap_lock.c
new file mode 100644
index 000000000000..1624f90164c0
--- /dev/null
+++ b/mm/mmap_lock.c
@@ -0,0 +1,224 @@
+// SPDX-License-Identifier: GPL-2.0
+#define CREATE_TRACE_POINTS
+#include <trace/events/mmap_lock.h>
+
+#include <linux/cgroup.h>
+#include <linux/memcontrol.h>
+#include <linux/mmap_lock.h>
+#include <linux/percpu.h>
+#include <linux/smp.h>
+#include <linux/trace_events.h>
+#include <linux/sched/clock.h>
+
+#ifdef CONFIG_MEMCG
+
+DEFINE_PER_CPU(char[MAX_FILTER_STR_VAL], trace_memcg_path);
+
+/*
+ * Write the given mm_struct's memcg path to a percpu buffer, and return a
+ * pointer to it. If the path cannot be determined, the buffer will contain the
+ * empty string.
+ *
+ * Note: buffers are allocated per-cpu to avoid locking, so preemption must be
+ * disabled by the caller before calling us, and re-enabled only after the
+ * caller is done with the pointer.
+ */
+static const char *get_mm_memcg_path(struct mm_struct *mm)
+{
+	struct mem_cgroup *memcg = get_mem_cgroup_from_mm(mm);
+
+	if (memcg != NULL && likely(memcg->css.cgroup != NULL)) {
+		char *buf = this_cpu_ptr(trace_memcg_path);
+
+		cgroup_path(memcg->css.cgroup, buf, MAX_FILTER_STR_VAL);
+		return buf;
+	}
+	return "";
+}
+
+#define TRACE_MMAP_LOCK_EVENT(type, mm, ...)                                   \
+	do {                                                                   \
+		if (trace_mmap_lock_##type##_enabled()) {                      \
+			get_cpu();                                             \
+			trace_mmap_lock_##type(mm, get_mm_memcg_path(mm),      \
+					       ##__VA_ARGS__);                 \
+			put_cpu();                                             \
+		}                                                              \
+	} while (0)
+
+#else /* !CONFIG_MEMCG */
+
+#define TRACE_MMAP_LOCK_EVENT(type, mm, ...)                                   \
+	trace_mmap_lock_##type(mm, "", ##__VA_ARGS__)
+
+#endif /* CONFIG_MEMCG */
+
+/*
+ * Trace calls must be in a separate file, as otherwise there's a circuclar
+ * dependency between linux/mmap_lock.h and trace/events/mmap_lock.h.
+ */
+
+static void trace_start_locking(struct mm_struct *mm, bool write)
+{
+	TRACE_MMAP_LOCK_EVENT(start_locking, mm, 0, write, true);
+}
+
+static void trace_acquire_returned(struct mm_struct *mm, u64 start_time_ns,
+				   bool write, bool success)
+{
+	TRACE_MMAP_LOCK_EVENT(acquire_returned, mm,
+			      sched_clock() - start_time_ns, write, success);
+}
+
+static void trace_released(struct mm_struct *mm, bool write)
+{
+	TRACE_MMAP_LOCK_EVENT(released, mm, 0, write, true);
+}
+
+static bool trylock_impl(struct mm_struct *mm,
+			 int (*trylock)(struct rw_semaphore *), bool write)
+{
+	bool ret;
+
+	trace_start_locking(mm, write);
+	ret = trylock(&mm->mmap_lock) != 0;
+	/* Avoid calling sched_clock() for trylocks; assume duration = 0. */
+	TRACE_MMAP_LOCK_EVENT(acquire_returned, mm, 0, write, ret);
+	return ret;
+}
+
+static inline void lock_impl(struct mm_struct *mm,
+			     void (*lock)(struct rw_semaphore *), bool write)
+{
+	u64 start_time_ns;
+
+	trace_start_locking(mm, write);
+	start_time_ns = sched_clock();
+	lock(&mm->mmap_lock);
+	trace_acquire_returned(mm, start_time_ns, write, true);
+}
+
+static inline int lock_return_impl(struct mm_struct *mm,
+				   int (*lock)(struct rw_semaphore *),
+				   bool write)
+{
+	u64 start_time_ns;
+	int ret;
+
+	trace_start_locking(mm, write);
+	start_time_ns = sched_clock();
+	ret = lock(&mm->mmap_lock);
+	trace_acquire_returned(mm, start_time_ns, write, ret == 0);
+	return ret;
+}
+
+static inline void unlock_impl(struct mm_struct *mm,
+			       void (*unlock)(struct rw_semaphore *),
+			       bool write)
+{
+	unlock(&mm->mmap_lock);
+	trace_released(mm, write);
+}
+
+void mmap_init_lock(struct mm_struct *mm)
+{
+	init_rwsem(&mm->mmap_lock);
+}
+
+void mmap_write_lock(struct mm_struct *mm)
+{
+	lock_impl(mm, down_write, true);
+}
+EXPORT_SYMBOL(mmap_write_lock);
+
+void mmap_write_lock_nested(struct mm_struct *mm, int subclass)
+{
+	u64 start_time_ns;
+
+	trace_start_locking(mm, true);
+	start_time_ns = sched_clock();
+	down_write_nested(&mm->mmap_lock, subclass);
+	trace_acquire_returned(mm, start_time_ns, true, true);
+}
+EXPORT_SYMBOL(mmap_write_lock_nested);
+
+int mmap_write_lock_killable(struct mm_struct *mm)
+{
+	return lock_return_impl(mm, down_write_killable, true);
+}
+EXPORT_SYMBOL(mmap_write_lock_killable);
+
+bool mmap_write_trylock(struct mm_struct *mm)
+{
+	return trylock_impl(mm, down_write_trylock, true);
+}
+EXPORT_SYMBOL(mmap_write_trylock);
+
+void mmap_write_unlock(struct mm_struct *mm)
+{
+	unlock_impl(mm, up_write, true);
+}
+EXPORT_SYMBOL(mmap_write_unlock);
+
+void mmap_write_downgrade(struct mm_struct *mm)
+{
+	downgrade_write(&mm->mmap_lock);
+	TRACE_MMAP_LOCK_EVENT(acquire_returned, mm, 0, false, true);
+}
+EXPORT_SYMBOL(mmap_write_downgrade);
+
+void mmap_read_lock(struct mm_struct *mm)
+{
+	lock_impl(mm, down_read, false);
+}
+EXPORT_SYMBOL(mmap_read_lock);
+
+int mmap_read_lock_killable(struct mm_struct *mm)
+{
+	return lock_return_impl(mm, down_read_killable, false);
+}
+EXPORT_SYMBOL(mmap_read_lock_killable);
+
+bool mmap_read_trylock(struct mm_struct *mm)
+{
+	return trylock_impl(mm, down_read_trylock, false);
+}
+EXPORT_SYMBOL(mmap_read_trylock);
+
+void mmap_read_unlock(struct mm_struct *mm)
+{
+	unlock_impl(mm, up_read, false);
+}
+EXPORT_SYMBOL(mmap_read_unlock);
+
+bool mmap_read_trylock_non_owner(struct mm_struct *mm)
+{
+	if (mmap_read_trylock(mm)) {
+		rwsem_release(&mm->mmap_lock.dep_map, _RET_IP_);
+		trace_released(mm, false);
+		return true;
+	}
+	return false;
+}
+EXPORT_SYMBOL(mmap_read_trylock_non_owner);
+
+void mmap_read_unlock_non_owner(struct mm_struct *mm)
+{
+	up_read_non_owner(&mm->mmap_lock);
+	trace_released(mm, false);
+}
+EXPORT_SYMBOL(mmap_read_unlock_non_owner);
+
+void mmap_assert_locked(struct mm_struct *mm)
+{
+	lockdep_assert_held(&mm->mmap_lock);
+	VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_lock), mm);
+}
+EXPORT_SYMBOL(mmap_assert_locked);
+
+void mmap_assert_write_locked(struct mm_struct *mm)
+{
+	lockdep_assert_held_write(&mm->mmap_lock);
+	VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_lock), mm);
+}
+EXPORT_SYMBOL(mmap_assert_write_locked);
-- 
2.28.0.618.gf4bc123cb7-goog


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH] mmap_lock: add tracepoints around lock acquisition
  2020-09-17 18:13 [PATCH] mmap_lock: add tracepoints around lock acquisition Axel Rasmussen
@ 2020-09-17 19:42 ` Steven Rostedt
  2020-09-18 20:26   ` Axel Rasmussen
  2020-09-21  4:57 ` Yafang Shao
  1 sibling, 1 reply; 14+ messages in thread
From: Steven Rostedt @ 2020-09-17 19:42 UTC (permalink / raw)
  To: Axel Rasmussen
  Cc: Ingo Molnar, Andrew Morton, Vlastimil Babka, Michel Lespinasse,
	Daniel Jordan, Davidlohr Bueso, Yafang Shao, linux-kernel,
	linux-mm

On Thu, 17 Sep 2020 11:13:47 -0700
Axel Rasmussen <axelrasmussen@google.com> wrote:

> +/*
> + * Trace calls must be in a separate file, as otherwise there's a circuclar
> + * dependency between linux/mmap_lock.h and trace/events/mmap_lock.h.
> + */
> +
> +static void trace_start_locking(struct mm_struct *mm, bool write)

Please don't use "trace_" for functions, as that should be reserved for the
actual tracepoint functions. Please use "do_trace_" or whatever so there's
no confusion about this being a tracepoint, even if it's just a function
that calls the tracepoint.

> +{
> +	TRACE_MMAP_LOCK_EVENT(start_locking, mm, 0, write, true);
> +}
> +
> +static void trace_acquire_returned(struct mm_struct *mm, u64 start_time_ns,
> +				   bool write, bool success)
> +{
> +	TRACE_MMAP_LOCK_EVENT(acquire_returned, mm,
> +			      sched_clock() - start_time_ns, write, success);
> +}
> +
> +static void trace_released(struct mm_struct *mm, bool write)
> +{
> +	TRACE_MMAP_LOCK_EVENT(released, mm, 0, write, true);
> +}
> +

> +static inline void lock_impl(struct mm_struct *mm,
> +			     void (*lock)(struct rw_semaphore *), bool write)
> +{
> +	u64 start_time_ns;
> +
> +	trace_start_locking(mm, write);
> +	start_time_ns = sched_clock();
> +	lock(&mm->mmap_lock);
> +	trace_acquire_returned(mm, start_time_ns, write, true);
> +}
> +

Why record the start time and pass it in for return, when this can be done
by simply recording the start and return and then using the timestamps of
the trace events to calculate the duration, offline or as synthetic events:


 # cd /sys/kernel/tracing/
 # echo 'duration u64 time' > synthetic_events
 # echo 'hist:keys=common_pid:ts0=common_timestamp.usecs" > events/mmap_lock/mmap_lock_start_locking/trigger
 # echo 'hist:keys=common_pid:dur=common_timestamp.usecs-$ts0:onmatch(mmap_lock.mmap_lock_start_locking).trace(duration,$dur)" > events/mmap_lock/mmap_lock_acquire_returned/trigger
 # echo 1 > events/synthetic/duration/enable
 # cat trace
# tracer: nop
#
# entries-in-buffer/entries-written: 148/148   #P:8
#
#                              _-----=> irqs-off
#                             / _----=> need-resched
#                            | / _---=> hardirq/softirq
#                            || / _--=> preempt-depth
#                            ||| /     delay
#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION
#              | |       |   ||||       |         |
            bash-1613  [007] ...3  3186.431687: duration: time=3
            bash-1613  [007] ...3  3186.431722: duration: time=2
            bash-1613  [007] ...3  3186.431772: duration: time=2
            bash-1613  [001] ...3  3188.372001: duration: time=6
            bash-1613  [001] ...3  3188.372324: duration: time=6
            bash-1613  [001] ...3  3188.372332: duration: time=4
            bash-1613  [001] ...3  3188.373557: duration: time=5
            bash-1613  [001] ...3  3188.373595: duration: time=3
             cat-1868  [002] ...3  3188.373608: duration: time=8
            bash-1613  [001] ...3  3188.373613: duration: time=4
            bash-1613  [001] ...3  3188.373635: duration: time=3
             cat-1868  [002] ...3  3188.373646: duration: time=4
            bash-1613  [001] ...3  3188.373652: duration: time=3
            bash-1613  [001] ...3  3188.373669: duration: time=3

 # echo 'hist:keys=time' > events/synthetic/duration/trigger
 # cat events/synthetic/duration/hist
# event histogram
#
# trigger info: hist:keys=time:vals=hitcount:sort=hitcount:size=2048 [active]
#

{ time:        114 } hitcount:          1
{ time:         15 } hitcount:          1
{ time:         11 } hitcount:          1
{ time:         21 } hitcount:          1
{ time:         10 } hitcount:          1
{ time:         46 } hitcount:          1
{ time:         29 } hitcount:          1
{ time:         13 } hitcount:          2
{ time:         16 } hitcount:          3
{ time:          9 } hitcount:          3
{ time:          8 } hitcount:          3
{ time:          7 } hitcount:          8
{ time:          6 } hitcount:         10
{ time:          5 } hitcount:         28
{ time:          4 } hitcount:        121
{ time:          1 } hitcount:        523
{ time:          3 } hitcount:        581
{ time:          2 } hitcount:        882

Totals:
    Hits: 2171
    Entries: 18
    Dropped: 0

And with this I could do a bunch of things like stack trace on max hits and
other features that the tracing histograms give us.

-- Steve

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] mmap_lock: add tracepoints around lock acquisition
  2020-09-17 19:42 ` Steven Rostedt
@ 2020-09-18 20:26   ` Axel Rasmussen
  2020-09-18 20:41     ` Steven Rostedt
  0 siblings, 1 reply; 14+ messages in thread
From: Axel Rasmussen @ 2020-09-18 20:26 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: Ingo Molnar, Andrew Morton, Vlastimil Babka, Michel Lespinasse,
	Daniel Jordan, Davidlohr Bueso, Yafang Shao, LKML, Linux MM

On Thu, Sep 17, 2020 at 12:43 PM Steven Rostedt <rostedt@goodmis.org> wrote:
>
> On Thu, 17 Sep 2020 11:13:47 -0700
> Axel Rasmussen <axelrasmussen@google.com> wrote:
>
> > +/*
> > + * Trace calls must be in a separate file, as otherwise there's a circuclar
> > + * dependency between linux/mmap_lock.h and trace/events/mmap_lock.h.
> > + */
> > +
> > +static void trace_start_locking(struct mm_struct *mm, bool write)
>
> Please don't use "trace_" for functions, as that should be reserved for the
> actual tracepoint functions. Please use "do_trace_" or whatever so there's
> no confusion about this being a tracepoint, even if it's just a function
> that calls the tracepoint.

Done; I'll send a v2 with this change.

>
> > +{
> > +     TRACE_MMAP_LOCK_EVENT(start_locking, mm, 0, write, true);
> > +}
> > +
> > +static void trace_acquire_returned(struct mm_struct *mm, u64 start_time_ns,
> > +                                bool write, bool success)
> > +{
> > +     TRACE_MMAP_LOCK_EVENT(acquire_returned, mm,
> > +                           sched_clock() - start_time_ns, write, success);
> > +}
> > +
> > +static void trace_released(struct mm_struct *mm, bool write)
> > +{
> > +     TRACE_MMAP_LOCK_EVENT(released, mm, 0, write, true);
> > +}
> > +
>
> > +static inline void lock_impl(struct mm_struct *mm,
> > +                          void (*lock)(struct rw_semaphore *), bool write)
> > +{
> > +     u64 start_time_ns;
> > +
> > +     trace_start_locking(mm, write);
> > +     start_time_ns = sched_clock();
> > +     lock(&mm->mmap_lock);
> > +     trace_acquire_returned(mm, start_time_ns, write, true);
> > +}
> > +
>
> Why record the start time and pass it in for return, when this can be done
> by simply recording the start and return and then using the timestamps of
> the trace events to calculate the duration, offline or as synthetic events:

First, thanks for the detailed feedback! As a newbie this is very helpful. :)

I agree in principle, and I almost have a working version as you
suggest, but I can't see a way to get string fields working.

I believe in trace event headers the typical way to define a string
field  is as a "const char *", with the __string, __assign_str, and
__get_str helpers. But, from reading trace_events_synth.c, this isn't
really supported, in that it only supports "char []". But, the hist
trigger code just does a strcmp() of the type string, it doesn't do
any type conversion, so it considers these types incompatible:

After this:
# echo 'mmap_lock_latency u64 time; char memcg_path[256]' >
/sys/kernel/tracing/synthetic_events

Trying to setup the hist trigger gives (the ^ points to the beginning
of keys=>m<emcg_path ... not sure the formatting will be preserved
properly in e-mail):
# cat /sys/kernel/tracing/error_log
[   15.823725] hist:mmap_lock:mmap_lock_acquire_returned: error: Param
type doesn't match synthetic event field type
  Command: hist:keys=memcg_path:latency=common_timestamp.usecs-$ts0:onmatch(mmap_lock.mmap_lock_start_locking).mmap_lock_latency($latency,memcg_path)
                     ^

I tried grepping "char [^\[]+\[" in include/trace/events/, and it
seems nobody is defining fixed-length string fields like that, so I
think that's the wrong solution. I checked the docs about defining
variables (https://www.kernel.org/doc/html/v5.2/trace/histogram.html)
and it doesn't support anything complex like a cast, just - and +.

Any advice?



>
>
>  # cd /sys/kernel/tracing/
>  # echo 'duration u64 time' > synthetic_events
>  # echo 'hist:keys=common_pid:ts0=common_timestamp.usecs" > events/mmap_lock/mmap_lock_start_locking/trigger
>  # echo 'hist:keys=common_pid:dur=common_timestamp.usecs-$ts0:onmatch(mmap_lock.mmap_lock_start_locking).trace(duration,$dur)" > events/mmap_lock/mmap_lock_acquire_returned/trigger
>  # echo 1 > events/synthetic/duration/enable
>  # cat trace
> # tracer: nop
> #
> # entries-in-buffer/entries-written: 148/148   #P:8
> #
> #                              _-----=> irqs-off
> #                             / _----=> need-resched
> #                            | / _---=> hardirq/softirq
> #                            || / _--=> preempt-depth
> #                            ||| /     delay
> #           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION
> #              | |       |   ||||       |         |
>             bash-1613  [007] ...3  3186.431687: duration: time=3
>             bash-1613  [007] ...3  3186.431722: duration: time=2
>             bash-1613  [007] ...3  3186.431772: duration: time=2
>             bash-1613  [001] ...3  3188.372001: duration: time=6
>             bash-1613  [001] ...3  3188.372324: duration: time=6
>             bash-1613  [001] ...3  3188.372332: duration: time=4
>             bash-1613  [001] ...3  3188.373557: duration: time=5
>             bash-1613  [001] ...3  3188.373595: duration: time=3
>              cat-1868  [002] ...3  3188.373608: duration: time=8
>             bash-1613  [001] ...3  3188.373613: duration: time=4
>             bash-1613  [001] ...3  3188.373635: duration: time=3
>              cat-1868  [002] ...3  3188.373646: duration: time=4
>             bash-1613  [001] ...3  3188.373652: duration: time=3
>             bash-1613  [001] ...3  3188.373669: duration: time=3
>
>  # echo 'hist:keys=time' > events/synthetic/duration/trigger
>  # cat events/synthetic/duration/hist
> # event histogram
> #
> # trigger info: hist:keys=time:vals=hitcount:sort=hitcount:size=2048 [active]
> #
>
> { time:        114 } hitcount:          1
> { time:         15 } hitcount:          1
> { time:         11 } hitcount:          1
> { time:         21 } hitcount:          1
> { time:         10 } hitcount:          1
> { time:         46 } hitcount:          1
> { time:         29 } hitcount:          1
> { time:         13 } hitcount:          2
> { time:         16 } hitcount:          3
> { time:          9 } hitcount:          3
> { time:          8 } hitcount:          3
> { time:          7 } hitcount:          8
> { time:          6 } hitcount:         10
> { time:          5 } hitcount:         28
> { time:          4 } hitcount:        121
> { time:          1 } hitcount:        523
> { time:          3 } hitcount:        581
> { time:          2 } hitcount:        882
>
> Totals:
>     Hits: 2171
>     Entries: 18
>     Dropped: 0
>
> And with this I could do a bunch of things like stack trace on max hits and
> other features that the tracing histograms give us.
>
> -- Steve

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] mmap_lock: add tracepoints around lock acquisition
  2020-09-18 20:26   ` Axel Rasmussen
@ 2020-09-18 20:41     ` Steven Rostedt
  2020-09-23 22:07       ` Tom Zanussi
  0 siblings, 1 reply; 14+ messages in thread
From: Steven Rostedt @ 2020-09-18 20:41 UTC (permalink / raw)
  To: Axel Rasmussen, Tom Zanussi
  Cc: Ingo Molnar, Andrew Morton, Vlastimil Babka, Michel Lespinasse,
	Daniel Jordan, Davidlohr Bueso, Yafang Shao, LKML, Linux MM

On Fri, 18 Sep 2020 13:26:37 -0700
Axel Rasmussen <axelrasmussen@google.com> wrote:

> On Thu, Sep 17, 2020 at 12:43 PM Steven Rostedt <rostedt@goodmis.org> wrote:
> >
> > On Thu, 17 Sep 2020 11:13:47 -0700
> > Axel Rasmussen <axelrasmussen@google.com> wrote:
> >  
> > > +/*
> > > + * Trace calls must be in a separate file, as otherwise there's a circuclar
> > > + * dependency between linux/mmap_lock.h and trace/events/mmap_lock.h.
> > > + */
> > > +
> > > +static void trace_start_locking(struct mm_struct *mm, bool write)  
> >
> > Please don't use "trace_" for functions, as that should be reserved for the
> > actual tracepoint functions. Please use "do_trace_" or whatever so there's
> > no confusion about this being a tracepoint, even if it's just a function
> > that calls the tracepoint.  
> 
> Done; I'll send a v2 with this change.
> 
> >  
> > > +{
> > > +     TRACE_MMAP_LOCK_EVENT(start_locking, mm, 0, write, true);
> > > +}
> > > +
> > > +static void trace_acquire_returned(struct mm_struct *mm, u64 start_time_ns,
> > > +                                bool write, bool success)
> > > +{
> > > +     TRACE_MMAP_LOCK_EVENT(acquire_returned, mm,
> > > +                           sched_clock() - start_time_ns, write, success);
> > > +}
> > > +
> > > +static void trace_released(struct mm_struct *mm, bool write)
> > > +{
> > > +     TRACE_MMAP_LOCK_EVENT(released, mm, 0, write, true);
> > > +}
> > > +  
> >  
> > > +static inline void lock_impl(struct mm_struct *mm,
> > > +                          void (*lock)(struct rw_semaphore *), bool write)
> > > +{
> > > +     u64 start_time_ns;
> > > +
> > > +     trace_start_locking(mm, write);
> > > +     start_time_ns = sched_clock();
> > > +     lock(&mm->mmap_lock);
> > > +     trace_acquire_returned(mm, start_time_ns, write, true);
> > > +}
> > > +  
> >
> > Why record the start time and pass it in for return, when this can be done
> > by simply recording the start and return and then using the timestamps of
> > the trace events to calculate the duration, offline or as synthetic events:  
> 
> First, thanks for the detailed feedback! As a newbie this is very helpful. :)
> 
> I agree in principle, and I almost have a working version as you
> suggest, but I can't see a way to get string fields working.
> 
> I believe in trace event headers the typical way to define a string
> field  is as a "const char *", with the __string, __assign_str, and
> __get_str helpers. But, from reading trace_events_synth.c, this isn't
> really supported, in that it only supports "char []". But, the hist
> trigger code just does a strcmp() of the type string, it doesn't do
> any type conversion, so it considers these types incompatible:
> 
> After this:
> # echo 'mmap_lock_latency u64 time; char memcg_path[256]' >
> /sys/kernel/tracing/synthetic_events
> 
> Trying to setup the hist trigger gives (the ^ points to the beginning
> of keys=>m<emcg_path ... not sure the formatting will be preserved
> properly in e-mail):
> # cat /sys/kernel/tracing/error_log
> [   15.823725] hist:mmap_lock:mmap_lock_acquire_returned: error: Param
> type doesn't match synthetic event field type
>   Command: hist:keys=memcg_path:latency=common_timestamp.usecs-$ts0:onmatch(mmap_lock.mmap_lock_start_locking).mmap_lock_latency($latency,memcg_path)
>                      ^
> 
> I tried grepping "char [^\[]+\[" in include/trace/events/, and it
> seems nobody is defining fixed-length string fields like that, so I
> think that's the wrong solution. I checked the docs about defining
> variables (https://www.kernel.org/doc/html/v5.2/trace/histogram.html)
> and it doesn't support anything complex like a cast, just - and +.
> 
> Any advice?

Tom,

Do you think we could make histograms support the above somehow?

-- Steve

> 
> 
> 
> >
> >
> >  # cd /sys/kernel/tracing/
> >  # echo 'duration u64 time' > synthetic_events
> >  # echo 'hist:keys=common_pid:ts0=common_timestamp.usecs" > events/mmap_lock/mmap_lock_start_locking/trigger
> >  # echo 'hist:keys=common_pid:dur=common_timestamp.usecs-$ts0:onmatch(mmap_lock.mmap_lock_start_locking).trace(duration,$dur)" > events/mmap_lock/mmap_lock_acquire_returned/trigger
> >  # echo 1 > events/synthetic/duration/enable
> >  # cat trace
> > # tracer: nop
> > #
> > # entries-in-buffer/entries-written: 148/148   #P:8
> > #
> > #                              _-----=> irqs-off
> > #                             / _----=> need-resched
> > #                            | / _---=> hardirq/softirq
> > #                            || / _--=> preempt-depth
> > #                            ||| /     delay
> > #           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION
> > #              | |       |   ||||       |         |
> >             bash-1613  [007] ...3  3186.431687: duration: time=3
> >             bash-1613  [007] ...3  3186.431722: duration: time=2
> >             bash-1613  [007] ...3  3186.431772: duration: time=2
> >             bash-1613  [001] ...3  3188.372001: duration: time=6
> >             bash-1613  [001] ...3  3188.372324: duration: time=6
> >             bash-1613  [001] ...3  3188.372332: duration: time=4
> >             bash-1613  [001] ...3  3188.373557: duration: time=5
> >             bash-1613  [001] ...3  3188.373595: duration: time=3
> >              cat-1868  [002] ...3  3188.373608: duration: time=8
> >             bash-1613  [001] ...3  3188.373613: duration: time=4
> >             bash-1613  [001] ...3  3188.373635: duration: time=3
> >              cat-1868  [002] ...3  3188.373646: duration: time=4
> >             bash-1613  [001] ...3  3188.373652: duration: time=3
> >             bash-1613  [001] ...3  3188.373669: duration: time=3
> >
> >  # echo 'hist:keys=time' > events/synthetic/duration/trigger
> >  # cat events/synthetic/duration/hist
> > # event histogram
> > #
> > # trigger info: hist:keys=time:vals=hitcount:sort=hitcount:size=2048 [active]
> > #
> >
> > { time:        114 } hitcount:          1
> > { time:         15 } hitcount:          1
> > { time:         11 } hitcount:          1
> > { time:         21 } hitcount:          1
> > { time:         10 } hitcount:          1
> > { time:         46 } hitcount:          1
> > { time:         29 } hitcount:          1
> > { time:         13 } hitcount:          2
> > { time:         16 } hitcount:          3
> > { time:          9 } hitcount:          3
> > { time:          8 } hitcount:          3
> > { time:          7 } hitcount:          8
> > { time:          6 } hitcount:         10
> > { time:          5 } hitcount:         28
> > { time:          4 } hitcount:        121
> > { time:          1 } hitcount:        523
> > { time:          3 } hitcount:        581
> > { time:          2 } hitcount:        882
> >
> > Totals:
> >     Hits: 2171
> >     Entries: 18
> >     Dropped: 0
> >
> > And with this I could do a bunch of things like stack trace on max hits and
> > other features that the tracing histograms give us.
> >
> > -- Steve  


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] mmap_lock: add tracepoints around lock acquisition
  2020-09-17 18:13 [PATCH] mmap_lock: add tracepoints around lock acquisition Axel Rasmussen
  2020-09-17 19:42 ` Steven Rostedt
@ 2020-09-21  4:57 ` Yafang Shao
  2020-09-21 16:53   ` Axel Rasmussen
  1 sibling, 1 reply; 14+ messages in thread
From: Yafang Shao @ 2020-09-21  4:57 UTC (permalink / raw)
  To: Axel Rasmussen
  Cc: Steven Rostedt, Ingo Molnar, Andrew Morton, Vlastimil Babka,
	Michel Lespinasse, Daniel Jordan, Davidlohr Bueso, LKML,
	Linux MM

On Fri, Sep 18, 2020 at 2:13 AM Axel Rasmussen <axelrasmussen@google.com> wrote:
>
> The goal of these tracepoints is to be able to debug lock contention
> issues. This lock is acquired on most (all?) mmap / munmap / page fault
> operations, so a multi-threaded process which does a lot of these can
> experience significant contention.
>
> We trace just before we start acquisition, when the acquisition returns
> (whether it succeeded or not), and when the lock is released (or
> downgraded). The events are broken out by lock type (read / write).
>
> The events are also broken out by memcg path. For container-based
> workloads, users often think of several processes in a memcg as a single
> logical "task", so collecting statistics at this level is useful.
>
> These events *do not* include latency bucket information, which means
> for a proper latency histogram users will need to use BPF instead of
> event histograms. The benefit we get from this is simpler code.
>
> This patch is a no-op if the Kconfig option is not enabled. If it is,
> tracepoints are still disabled by default (configurable at runtime);
> the only fixed cost here is un-inlining a few functions. As best as
> I've been able to measure, the overhead this introduces is a small
> fraction of 1%. Actually hooking up the tracepoints to BPF introduces
> additional overhead, depending on exactly what the BPF program is
> collecting.

Are there any methods to avoid un-inlining these wrappers ?

For example,
// include/linux/mmap_lock.h

void mmap_lock_start_trace_wrapper();
void mmap_lock_acquire_trace_wrapper();

static inline void mmap_write_lock(struct mm_struct *mm)
{
    mmap_lock_start_trace_wrapper();
    down_write(&mm->mmap_lock);
    mmap_lock_acquire_trace_wrapper();
}

// mm/mmap_lock.c
void mmap_lock_start_trace_wrapper()
{
    trace_mmap_lock_start();
}

void mmap_lock_start_trace_wrapper()
{
    trace_mmap_lock_acquired();
}



> ---
>  include/linux/mmap_lock.h        |  28 +++-
>  include/trace/events/mmap_lock.h |  73 ++++++++++
>  mm/Kconfig                       |  17 +++
>  mm/Makefile                      |   1 +
>  mm/mmap_lock.c                   | 224 +++++++++++++++++++++++++++++++
>  5 files changed, 342 insertions(+), 1 deletion(-)
>  create mode 100644 include/trace/events/mmap_lock.h
>  create mode 100644 mm/mmap_lock.c
>
> diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h
> index 0707671851a8..d12aa2ff6c05 100644
> --- a/include/linux/mmap_lock.h
> +++ b/include/linux/mmap_lock.h
> @@ -1,11 +1,35 @@
>  #ifndef _LINUX_MMAP_LOCK_H
>  #define _LINUX_MMAP_LOCK_H
>
> +#include <linux/lockdep.h>
> +#include <linux/mm_types.h>
>  #include <linux/mmdebug.h>
> +#include <linux/rwsem.h>
> +#include <linux/types.h>
>
>  #define MMAP_LOCK_INITIALIZER(name) \
>         .mmap_lock = __RWSEM_INITIALIZER((name).mmap_lock),
>
> +#ifdef CONFIG_MMAP_LOCK_STATS
> +
> +void mmap_init_lock(struct mm_struct *mm);
> +void mmap_write_lock(struct mm_struct *mm);
> +void mmap_write_lock_nested(struct mm_struct *mm, int subclass);
> +int mmap_write_lock_killable(struct mm_struct *mm);
> +bool mmap_write_trylock(struct mm_struct *mm);
> +void mmap_write_unlock(struct mm_struct *mm);
> +void mmap_write_downgrade(struct mm_struct *mm);
> +void mmap_read_lock(struct mm_struct *mm);
> +int mmap_read_lock_killable(struct mm_struct *mm);
> +bool mmap_read_trylock(struct mm_struct *mm);
> +void mmap_read_unlock(struct mm_struct *mm);
> +bool mmap_read_trylock_non_owner(struct mm_struct *mm);
> +void mmap_read_unlock_non_owner(struct mm_struct *mm);
> +void mmap_assert_locked(struct mm_struct *mm);
> +void mmap_assert_write_locked(struct mm_struct *mm);
> +
> +#else /* !CONFIG_MMAP_LOCK_STATS */
> +
>  static inline void mmap_init_lock(struct mm_struct *mm)
>  {
>         init_rwsem(&mm->mmap_lock);
> @@ -63,7 +87,7 @@ static inline void mmap_read_unlock(struct mm_struct *mm)
>
>  static inline bool mmap_read_trylock_non_owner(struct mm_struct *mm)
>  {
> -       if (down_read_trylock(&mm->mmap_lock)) {
> +       if (mmap_read_trylock(mm)) {
>                 rwsem_release(&mm->mmap_lock.dep_map, _RET_IP_);
>                 return true;
>         }
> @@ -87,4 +111,6 @@ static inline void mmap_assert_write_locked(struct mm_struct *mm)
>         VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_lock), mm);
>  }
>
> +#endif /* CONFIG_MMAP_LOCK_STATS */
> +
>  #endif /* _LINUX_MMAP_LOCK_H */
> diff --git a/include/trace/events/mmap_lock.h b/include/trace/events/mmap_lock.h
> new file mode 100644
> index 000000000000..549c662e6ed8
> --- /dev/null
> +++ b/include/trace/events/mmap_lock.h
> @@ -0,0 +1,73 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#undef TRACE_SYSTEM
> +#define TRACE_SYSTEM mmap_lock
> +
> +#if !defined(_TRACE_MMAP_LOCK_H) || defined(TRACE_HEADER_MULTI_READ)
> +#define _TRACE_MMAP_LOCK_H
> +
> +#include <linux/tracepoint.h>
> +#include <linux/types.h>
> +
> +struct mm_struct;
> +
> +DECLARE_EVENT_CLASS(
> +       mmap_lock_template,
> +
> +       TP_PROTO(struct mm_struct *mm, const char *memcg_path, u64 duration,
> +               bool write, bool success),
> +
> +       TP_ARGS(mm, memcg_path, duration, write, success),
> +
> +       TP_STRUCT__entry(
> +               __field(struct mm_struct *, mm)
> +               __string(memcg_path, memcg_path)
> +               __field(u64, duration)
> +               __field(bool, write)
> +               __field(bool, success)
> +       ),
> +
> +       TP_fast_assign(
> +               __entry->mm = mm;
> +               __assign_str(memcg_path, memcg_path);
> +               __entry->duration = duration;
> +               __entry->write = write;
> +               __entry->success = success;
> +       ),
> +
> +       TP_printk(
> +               "mm=%p memcg_path=%s duration=%llu write=%s success=%s\n",
> +               __entry->mm,
> +               __get_str(memcg_path),
> +               __entry->duration,
> +               __entry->write ? "true" : "false",
> +               __entry->success ? "true" : "false")
> +       );
> +
> +DEFINE_EVENT(mmap_lock_template, mmap_lock_start_locking,
> +
> +       TP_PROTO(struct mm_struct *mm, const char *memcg_path, u64 duration,
> +               bool write, bool success),
> +
> +       TP_ARGS(mm, memcg_path, duration, write, success)
> +);
> +
> +DEFINE_EVENT(mmap_lock_template, mmap_lock_acquire_returned,
> +
> +       TP_PROTO(struct mm_struct *mm, const char *memcg_path, u64 duration,
> +               bool write, bool success),
> +
> +       TP_ARGS(mm, memcg_path, duration, write, success)
> +);
> +
> +DEFINE_EVENT(mmap_lock_template, mmap_lock_released,
> +
> +       TP_PROTO(struct mm_struct *mm, const char *memcg_path, u64 duration,
> +               bool write, bool success),
> +
> +       TP_ARGS(mm, memcg_path, duration, write, success)
> +);
> +
> +#endif /* _TRACE_MMAP_LOCK_H */
> +
> +/* This part must be outside protection */
> +#include <trace/define_trace.h>
> diff --git a/mm/Kconfig b/mm/Kconfig
> index 6c974888f86f..b602df8bcee0 100644
> --- a/mm/Kconfig
> +++ b/mm/Kconfig
> @@ -818,6 +818,23 @@ config DEVICE_PRIVATE
>  config FRAME_VECTOR
>         bool
>
> +config MMAP_LOCK_STATS
> +       bool "mmap_lock stats / instrumentation"
> +       select HISTOGRAM
> +       default n
> +
> +       help
> +         Enables tracepoints around mmap_lock (start aquiring, acquire
> +         returned, and released), which are off by default + controlled at
> +         runtime. These can be used for deeper debugging of contention
> +         issues, via e.g. BPF.
> +
> +         This option has a small (small fraction of 1%) fixed overhead
> +         even if tracepoints aren't actually in use at runtime, since it
> +         requires un-inlining some functions.
> +
> +         If unsure, say "n".
> +
>  config ARCH_USES_HIGH_VMA_FLAGS
>         bool
>  config ARCH_HAS_PKEYS
> diff --git a/mm/Makefile b/mm/Makefile
> index d5649f1c12c0..eb6ed855a002 100644
> --- a/mm/Makefile
> +++ b/mm/Makefile
> @@ -121,3 +121,4 @@ obj-$(CONFIG_MEMFD_CREATE) += memfd.o
>  obj-$(CONFIG_MAPPING_DIRTY_HELPERS) += mapping_dirty_helpers.o
>  obj-$(CONFIG_PTDUMP_CORE) += ptdump.o
>  obj-$(CONFIG_PAGE_REPORTING) += page_reporting.o
> +obj-$(CONFIG_MMAP_LOCK_STATS) += mmap_lock.o
> diff --git a/mm/mmap_lock.c b/mm/mmap_lock.c
> new file mode 100644
> index 000000000000..1624f90164c0
> --- /dev/null
> +++ b/mm/mmap_lock.c
> @@ -0,0 +1,224 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#define CREATE_TRACE_POINTS
> +#include <trace/events/mmap_lock.h>
> +
> +#include <linux/cgroup.h>
> +#include <linux/memcontrol.h>
> +#include <linux/mmap_lock.h>
> +#include <linux/percpu.h>
> +#include <linux/smp.h>
> +#include <linux/trace_events.h>
> +#include <linux/sched/clock.h>
> +
> +#ifdef CONFIG_MEMCG
> +
> +DEFINE_PER_CPU(char[MAX_FILTER_STR_VAL], trace_memcg_path);
> +
> +/*
> + * Write the given mm_struct's memcg path to a percpu buffer, and return a
> + * pointer to it. If the path cannot be determined, the buffer will contain the
> + * empty string.
> + *
> + * Note: buffers are allocated per-cpu to avoid locking, so preemption must be
> + * disabled by the caller before calling us, and re-enabled only after the
> + * caller is done with the pointer.
> + */
> +static const char *get_mm_memcg_path(struct mm_struct *mm)
> +{
> +       struct mem_cgroup *memcg = get_mem_cgroup_from_mm(mm);
> +
> +       if (memcg != NULL && likely(memcg->css.cgroup != NULL)) {
> +               char *buf = this_cpu_ptr(trace_memcg_path);
> +
> +               cgroup_path(memcg->css.cgroup, buf, MAX_FILTER_STR_VAL);
> +               return buf;
> +       }
> +       return "";
> +}
> +
> +#define TRACE_MMAP_LOCK_EVENT(type, mm, ...)                                   \
> +       do {                                                                   \
> +               if (trace_mmap_lock_##type##_enabled()) {                      \
> +                       get_cpu();                                             \
> +                       trace_mmap_lock_##type(mm, get_mm_memcg_path(mm),      \
> +                                              ##__VA_ARGS__);                 \
> +                       put_cpu();                                             \
> +               }                                                              \
> +       } while (0)
> +
> +#else /* !CONFIG_MEMCG */
> +
> +#define TRACE_MMAP_LOCK_EVENT(type, mm, ...)                                   \
> +       trace_mmap_lock_##type(mm, "", ##__VA_ARGS__)
> +
> +#endif /* CONFIG_MEMCG */
> +
> +/*
> + * Trace calls must be in a separate file, as otherwise there's a circuclar
> + * dependency between linux/mmap_lock.h and trace/events/mmap_lock.h.
> + */
> +
> +static void trace_start_locking(struct mm_struct *mm, bool write)
> +{
> +       TRACE_MMAP_LOCK_EVENT(start_locking, mm, 0, write, true);
> +}
> +
> +static void trace_acquire_returned(struct mm_struct *mm, u64 start_time_ns,
> +                                  bool write, bool success)
> +{
> +       TRACE_MMAP_LOCK_EVENT(acquire_returned, mm,
> +                             sched_clock() - start_time_ns, write, success);
> +}
> +
> +static void trace_released(struct mm_struct *mm, bool write)
> +{
> +       TRACE_MMAP_LOCK_EVENT(released, mm, 0, write, true);
> +}
> +
> +static bool trylock_impl(struct mm_struct *mm,
> +                        int (*trylock)(struct rw_semaphore *), bool write)
> +{
> +       bool ret;
> +
> +       trace_start_locking(mm, write);
> +       ret = trylock(&mm->mmap_lock) != 0;
> +       /* Avoid calling sched_clock() for trylocks; assume duration = 0. */
> +       TRACE_MMAP_LOCK_EVENT(acquire_returned, mm, 0, write, ret);
> +       return ret;
> +}
> +
> +static inline void lock_impl(struct mm_struct *mm,
> +                            void (*lock)(struct rw_semaphore *), bool write)
> +{
> +       u64 start_time_ns;
> +
> +       trace_start_locking(mm, write);
> +       start_time_ns = sched_clock();
> +       lock(&mm->mmap_lock);
> +       trace_acquire_returned(mm, start_time_ns, write, true);
> +}
> +
> +static inline int lock_return_impl(struct mm_struct *mm,
> +                                  int (*lock)(struct rw_semaphore *),
> +                                  bool write)
> +{
> +       u64 start_time_ns;
> +       int ret;
> +
> +       trace_start_locking(mm, write);
> +       start_time_ns = sched_clock();
> +       ret = lock(&mm->mmap_lock);
> +       trace_acquire_returned(mm, start_time_ns, write, ret == 0);
> +       return ret;
> +}
> +
> +static inline void unlock_impl(struct mm_struct *mm,
> +                              void (*unlock)(struct rw_semaphore *),
> +                              bool write)
> +{
> +       unlock(&mm->mmap_lock);
> +       trace_released(mm, write);
> +}
> +
> +void mmap_init_lock(struct mm_struct *mm)
> +{
> +       init_rwsem(&mm->mmap_lock);
> +}
> +
> +void mmap_write_lock(struct mm_struct *mm)
> +{
> +       lock_impl(mm, down_write, true);
> +}
> +EXPORT_SYMBOL(mmap_write_lock);
> +
> +void mmap_write_lock_nested(struct mm_struct *mm, int subclass)
> +{
> +       u64 start_time_ns;
> +
> +       trace_start_locking(mm, true);
> +       start_time_ns = sched_clock();
> +       down_write_nested(&mm->mmap_lock, subclass);
> +       trace_acquire_returned(mm, start_time_ns, true, true);
> +}
> +EXPORT_SYMBOL(mmap_write_lock_nested);
> +
> +int mmap_write_lock_killable(struct mm_struct *mm)
> +{
> +       return lock_return_impl(mm, down_write_killable, true);
> +}
> +EXPORT_SYMBOL(mmap_write_lock_killable);
> +
> +bool mmap_write_trylock(struct mm_struct *mm)
> +{
> +       return trylock_impl(mm, down_write_trylock, true);
> +}
> +EXPORT_SYMBOL(mmap_write_trylock);
> +
> +void mmap_write_unlock(struct mm_struct *mm)
> +{
> +       unlock_impl(mm, up_write, true);
> +}
> +EXPORT_SYMBOL(mmap_write_unlock);
> +
> +void mmap_write_downgrade(struct mm_struct *mm)
> +{
> +       downgrade_write(&mm->mmap_lock);
> +       TRACE_MMAP_LOCK_EVENT(acquire_returned, mm, 0, false, true);
> +}
> +EXPORT_SYMBOL(mmap_write_downgrade);
> +
> +void mmap_read_lock(struct mm_struct *mm)
> +{
> +       lock_impl(mm, down_read, false);
> +}
> +EXPORT_SYMBOL(mmap_read_lock);
> +
> +int mmap_read_lock_killable(struct mm_struct *mm)
> +{
> +       return lock_return_impl(mm, down_read_killable, false);
> +}
> +EXPORT_SYMBOL(mmap_read_lock_killable);
> +
> +bool mmap_read_trylock(struct mm_struct *mm)
> +{
> +       return trylock_impl(mm, down_read_trylock, false);
> +}
> +EXPORT_SYMBOL(mmap_read_trylock);
> +
> +void mmap_read_unlock(struct mm_struct *mm)
> +{
> +       unlock_impl(mm, up_read, false);
> +}
> +EXPORT_SYMBOL(mmap_read_unlock);
> +
> +bool mmap_read_trylock_non_owner(struct mm_struct *mm)
> +{
> +       if (mmap_read_trylock(mm)) {
> +               rwsem_release(&mm->mmap_lock.dep_map, _RET_IP_);
> +               trace_released(mm, false);
> +               return true;
> +       }
> +       return false;
> +}
> +EXPORT_SYMBOL(mmap_read_trylock_non_owner);
> +
> +void mmap_read_unlock_non_owner(struct mm_struct *mm)
> +{
> +       up_read_non_owner(&mm->mmap_lock);
> +       trace_released(mm, false);
> +}
> +EXPORT_SYMBOL(mmap_read_unlock_non_owner);
> +
> +void mmap_assert_locked(struct mm_struct *mm)
> +{
> +       lockdep_assert_held(&mm->mmap_lock);
> +       VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_lock), mm);
> +}
> +EXPORT_SYMBOL(mmap_assert_locked);
> +
> +void mmap_assert_write_locked(struct mm_struct *mm)
> +{
> +       lockdep_assert_held_write(&mm->mmap_lock);
> +       VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_lock), mm);
> +}
> +EXPORT_SYMBOL(mmap_assert_write_locked);
> --
> 2.28.0.618.gf4bc123cb7-goog
>


-- 
Thanks
Yafang

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] mmap_lock: add tracepoints around lock acquisition
  2020-09-21  4:57 ` Yafang Shao
@ 2020-09-21 16:53   ` Axel Rasmussen
  2020-09-22  4:09     ` Yafang Shao
  0 siblings, 1 reply; 14+ messages in thread
From: Axel Rasmussen @ 2020-09-21 16:53 UTC (permalink / raw)
  To: Yafang Shao
  Cc: Steven Rostedt, Ingo Molnar, Andrew Morton, Vlastimil Babka,
	Michel Lespinasse, Daniel Jordan, Davidlohr Bueso, LKML,
	Linux MM

On Sun, Sep 20, 2020 at 9:58 PM Yafang Shao <laoar.shao@gmail.com> wrote:
>
> On Fri, Sep 18, 2020 at 2:13 AM Axel Rasmussen <axelrasmussen@google.com> wrote:
> >
> > The goal of these tracepoints is to be able to debug lock contention
> > issues. This lock is acquired on most (all?) mmap / munmap / page fault
> > operations, so a multi-threaded process which does a lot of these can
> > experience significant contention.
> >
> > We trace just before we start acquisition, when the acquisition returns
> > (whether it succeeded or not), and when the lock is released (or
> > downgraded). The events are broken out by lock type (read / write).
> >
> > The events are also broken out by memcg path. For container-based
> > workloads, users often think of several processes in a memcg as a single
> > logical "task", so collecting statistics at this level is useful.
> >
> > These events *do not* include latency bucket information, which means
> > for a proper latency histogram users will need to use BPF instead of
> > event histograms. The benefit we get from this is simpler code.
> >
> > This patch is a no-op if the Kconfig option is not enabled. If it is,
> > tracepoints are still disabled by default (configurable at runtime);
> > the only fixed cost here is un-inlining a few functions. As best as
> > I've been able to measure, the overhead this introduces is a small
> > fraction of 1%. Actually hooking up the tracepoints to BPF introduces
> > additional overhead, depending on exactly what the BPF program is
> > collecting.
>
> Are there any methods to avoid un-inlining these wrappers ?
>
> For example,
> // include/linux/mmap_lock.h
>
> void mmap_lock_start_trace_wrapper();
> void mmap_lock_acquire_trace_wrapper();
>
> static inline void mmap_write_lock(struct mm_struct *mm)
> {
>     mmap_lock_start_trace_wrapper();
>     down_write(&mm->mmap_lock);
>     mmap_lock_acquire_trace_wrapper();
> }
>
> // mm/mmap_lock.c
> void mmap_lock_start_trace_wrapper()
> {
>     trace_mmap_lock_start();
> }
>
> void mmap_lock_start_trace_wrapper()
> {
>     trace_mmap_lock_acquired();
> }

We can do something like that, but I don't think it would end up being better.

At the end of the day, because the trace stuff cannot be in the
header, we have to add an extra function call one way or the other.
This would just move the call one step further down the call stack.
So, I don't think it would affect performance in the
CONFIG_MMAP_LOCK_STATS + tracepoints not enabled at runtime case.

Also the wrappers aren't quite so simple as this, they need some
parameters to work. (the struct mm_struct, whether it was a read or a
write lock, and whether or not the lock operation succeeded), so it
would mean adding more inlined code, which I think adds up to be a
nontrivial amount since these wrappers are called so often in the
kernel.

If you feel strongly, let me know and I can send a version as you
describe and we can compare the two.

>
>
>
> > ---
> >  include/linux/mmap_lock.h        |  28 +++-
> >  include/trace/events/mmap_lock.h |  73 ++++++++++
> >  mm/Kconfig                       |  17 +++
> >  mm/Makefile                      |   1 +
> >  mm/mmap_lock.c                   | 224 +++++++++++++++++++++++++++++++
> >  5 files changed, 342 insertions(+), 1 deletion(-)
> >  create mode 100644 include/trace/events/mmap_lock.h
> >  create mode 100644 mm/mmap_lock.c
> >
> > diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h
> > index 0707671851a8..d12aa2ff6c05 100644
> > --- a/include/linux/mmap_lock.h
> > +++ b/include/linux/mmap_lock.h
> > @@ -1,11 +1,35 @@
> >  #ifndef _LINUX_MMAP_LOCK_H
> >  #define _LINUX_MMAP_LOCK_H
> >
> > +#include <linux/lockdep.h>
> > +#include <linux/mm_types.h>
> >  #include <linux/mmdebug.h>
> > +#include <linux/rwsem.h>
> > +#include <linux/types.h>
> >
> >  #define MMAP_LOCK_INITIALIZER(name) \
> >         .mmap_lock = __RWSEM_INITIALIZER((name).mmap_lock),
> >
> > +#ifdef CONFIG_MMAP_LOCK_STATS
> > +
> > +void mmap_init_lock(struct mm_struct *mm);
> > +void mmap_write_lock(struct mm_struct *mm);
> > +void mmap_write_lock_nested(struct mm_struct *mm, int subclass);
> > +int mmap_write_lock_killable(struct mm_struct *mm);
> > +bool mmap_write_trylock(struct mm_struct *mm);
> > +void mmap_write_unlock(struct mm_struct *mm);
> > +void mmap_write_downgrade(struct mm_struct *mm);
> > +void mmap_read_lock(struct mm_struct *mm);
> > +int mmap_read_lock_killable(struct mm_struct *mm);
> > +bool mmap_read_trylock(struct mm_struct *mm);
> > +void mmap_read_unlock(struct mm_struct *mm);
> > +bool mmap_read_trylock_non_owner(struct mm_struct *mm);
> > +void mmap_read_unlock_non_owner(struct mm_struct *mm);
> > +void mmap_assert_locked(struct mm_struct *mm);
> > +void mmap_assert_write_locked(struct mm_struct *mm);
> > +
> > +#else /* !CONFIG_MMAP_LOCK_STATS */
> > +
> >  static inline void mmap_init_lock(struct mm_struct *mm)
> >  {
> >         init_rwsem(&mm->mmap_lock);
> > @@ -63,7 +87,7 @@ static inline void mmap_read_unlock(struct mm_struct *mm)
> >
> >  static inline bool mmap_read_trylock_non_owner(struct mm_struct *mm)
> >  {
> > -       if (down_read_trylock(&mm->mmap_lock)) {
> > +       if (mmap_read_trylock(mm)) {
> >                 rwsem_release(&mm->mmap_lock.dep_map, _RET_IP_);
> >                 return true;
> >         }
> > @@ -87,4 +111,6 @@ static inline void mmap_assert_write_locked(struct mm_struct *mm)
> >         VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_lock), mm);
> >  }
> >
> > +#endif /* CONFIG_MMAP_LOCK_STATS */
> > +
> >  #endif /* _LINUX_MMAP_LOCK_H */
> > diff --git a/include/trace/events/mmap_lock.h b/include/trace/events/mmap_lock.h
> > new file mode 100644
> > index 000000000000..549c662e6ed8
> > --- /dev/null
> > +++ b/include/trace/events/mmap_lock.h
> > @@ -0,0 +1,73 @@
> > +/* SPDX-License-Identifier: GPL-2.0 */
> > +#undef TRACE_SYSTEM
> > +#define TRACE_SYSTEM mmap_lock
> > +
> > +#if !defined(_TRACE_MMAP_LOCK_H) || defined(TRACE_HEADER_MULTI_READ)
> > +#define _TRACE_MMAP_LOCK_H
> > +
> > +#include <linux/tracepoint.h>
> > +#include <linux/types.h>
> > +
> > +struct mm_struct;
> > +
> > +DECLARE_EVENT_CLASS(
> > +       mmap_lock_template,
> > +
> > +       TP_PROTO(struct mm_struct *mm, const char *memcg_path, u64 duration,
> > +               bool write, bool success),
> > +
> > +       TP_ARGS(mm, memcg_path, duration, write, success),
> > +
> > +       TP_STRUCT__entry(
> > +               __field(struct mm_struct *, mm)
> > +               __string(memcg_path, memcg_path)
> > +               __field(u64, duration)
> > +               __field(bool, write)
> > +               __field(bool, success)
> > +       ),
> > +
> > +       TP_fast_assign(
> > +               __entry->mm = mm;
> > +               __assign_str(memcg_path, memcg_path);
> > +               __entry->duration = duration;
> > +               __entry->write = write;
> > +               __entry->success = success;
> > +       ),
> > +
> > +       TP_printk(
> > +               "mm=%p memcg_path=%s duration=%llu write=%s success=%s\n",
> > +               __entry->mm,
> > +               __get_str(memcg_path),
> > +               __entry->duration,
> > +               __entry->write ? "true" : "false",
> > +               __entry->success ? "true" : "false")
> > +       );
> > +
> > +DEFINE_EVENT(mmap_lock_template, mmap_lock_start_locking,
> > +
> > +       TP_PROTO(struct mm_struct *mm, const char *memcg_path, u64 duration,
> > +               bool write, bool success),
> > +
> > +       TP_ARGS(mm, memcg_path, duration, write, success)
> > +);
> > +
> > +DEFINE_EVENT(mmap_lock_template, mmap_lock_acquire_returned,
> > +
> > +       TP_PROTO(struct mm_struct *mm, const char *memcg_path, u64 duration,
> > +               bool write, bool success),
> > +
> > +       TP_ARGS(mm, memcg_path, duration, write, success)
> > +);
> > +
> > +DEFINE_EVENT(mmap_lock_template, mmap_lock_released,
> > +
> > +       TP_PROTO(struct mm_struct *mm, const char *memcg_path, u64 duration,
> > +               bool write, bool success),
> > +
> > +       TP_ARGS(mm, memcg_path, duration, write, success)
> > +);
> > +
> > +#endif /* _TRACE_MMAP_LOCK_H */
> > +
> > +/* This part must be outside protection */
> > +#include <trace/define_trace.h>
> > diff --git a/mm/Kconfig b/mm/Kconfig
> > index 6c974888f86f..b602df8bcee0 100644
> > --- a/mm/Kconfig
> > +++ b/mm/Kconfig
> > @@ -818,6 +818,23 @@ config DEVICE_PRIVATE
> >  config FRAME_VECTOR
> >         bool
> >
> > +config MMAP_LOCK_STATS
> > +       bool "mmap_lock stats / instrumentation"
> > +       select HISTOGRAM
> > +       default n
> > +
> > +       help
> > +         Enables tracepoints around mmap_lock (start aquiring, acquire
> > +         returned, and released), which are off by default + controlled at
> > +         runtime. These can be used for deeper debugging of contention
> > +         issues, via e.g. BPF.
> > +
> > +         This option has a small (small fraction of 1%) fixed overhead
> > +         even if tracepoints aren't actually in use at runtime, since it
> > +         requires un-inlining some functions.
> > +
> > +         If unsure, say "n".
> > +
> >  config ARCH_USES_HIGH_VMA_FLAGS
> >         bool
> >  config ARCH_HAS_PKEYS
> > diff --git a/mm/Makefile b/mm/Makefile
> > index d5649f1c12c0..eb6ed855a002 100644
> > --- a/mm/Makefile
> > +++ b/mm/Makefile
> > @@ -121,3 +121,4 @@ obj-$(CONFIG_MEMFD_CREATE) += memfd.o
> >  obj-$(CONFIG_MAPPING_DIRTY_HELPERS) += mapping_dirty_helpers.o
> >  obj-$(CONFIG_PTDUMP_CORE) += ptdump.o
> >  obj-$(CONFIG_PAGE_REPORTING) += page_reporting.o
> > +obj-$(CONFIG_MMAP_LOCK_STATS) += mmap_lock.o
> > diff --git a/mm/mmap_lock.c b/mm/mmap_lock.c
> > new file mode 100644
> > index 000000000000..1624f90164c0
> > --- /dev/null
> > +++ b/mm/mmap_lock.c
> > @@ -0,0 +1,224 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +#define CREATE_TRACE_POINTS
> > +#include <trace/events/mmap_lock.h>
> > +
> > +#include <linux/cgroup.h>
> > +#include <linux/memcontrol.h>
> > +#include <linux/mmap_lock.h>
> > +#include <linux/percpu.h>
> > +#include <linux/smp.h>
> > +#include <linux/trace_events.h>
> > +#include <linux/sched/clock.h>
> > +
> > +#ifdef CONFIG_MEMCG
> > +
> > +DEFINE_PER_CPU(char[MAX_FILTER_STR_VAL], trace_memcg_path);
> > +
> > +/*
> > + * Write the given mm_struct's memcg path to a percpu buffer, and return a
> > + * pointer to it. If the path cannot be determined, the buffer will contain the
> > + * empty string.
> > + *
> > + * Note: buffers are allocated per-cpu to avoid locking, so preemption must be
> > + * disabled by the caller before calling us, and re-enabled only after the
> > + * caller is done with the pointer.
> > + */
> > +static const char *get_mm_memcg_path(struct mm_struct *mm)
> > +{
> > +       struct mem_cgroup *memcg = get_mem_cgroup_from_mm(mm);
> > +
> > +       if (memcg != NULL && likely(memcg->css.cgroup != NULL)) {
> > +               char *buf = this_cpu_ptr(trace_memcg_path);
> > +
> > +               cgroup_path(memcg->css.cgroup, buf, MAX_FILTER_STR_VAL);
> > +               return buf;
> > +       }
> > +       return "";
> > +}
> > +
> > +#define TRACE_MMAP_LOCK_EVENT(type, mm, ...)                                   \
> > +       do {                                                                   \
> > +               if (trace_mmap_lock_##type##_enabled()) {                      \
> > +                       get_cpu();                                             \
> > +                       trace_mmap_lock_##type(mm, get_mm_memcg_path(mm),      \
> > +                                              ##__VA_ARGS__);                 \
> > +                       put_cpu();                                             \
> > +               }                                                              \
> > +       } while (0)
> > +
> > +#else /* !CONFIG_MEMCG */
> > +
> > +#define TRACE_MMAP_LOCK_EVENT(type, mm, ...)                                   \
> > +       trace_mmap_lock_##type(mm, "", ##__VA_ARGS__)
> > +
> > +#endif /* CONFIG_MEMCG */
> > +
> > +/*
> > + * Trace calls must be in a separate file, as otherwise there's a circuclar
> > + * dependency between linux/mmap_lock.h and trace/events/mmap_lock.h.
> > + */
> > +
> > +static void trace_start_locking(struct mm_struct *mm, bool write)
> > +{
> > +       TRACE_MMAP_LOCK_EVENT(start_locking, mm, 0, write, true);
> > +}
> > +
> > +static void trace_acquire_returned(struct mm_struct *mm, u64 start_time_ns,
> > +                                  bool write, bool success)
> > +{
> > +       TRACE_MMAP_LOCK_EVENT(acquire_returned, mm,
> > +                             sched_clock() - start_time_ns, write, success);
> > +}
> > +
> > +static void trace_released(struct mm_struct *mm, bool write)
> > +{
> > +       TRACE_MMAP_LOCK_EVENT(released, mm, 0, write, true);
> > +}
> > +
> > +static bool trylock_impl(struct mm_struct *mm,
> > +                        int (*trylock)(struct rw_semaphore *), bool write)
> > +{
> > +       bool ret;
> > +
> > +       trace_start_locking(mm, write);
> > +       ret = trylock(&mm->mmap_lock) != 0;
> > +       /* Avoid calling sched_clock() for trylocks; assume duration = 0. */
> > +       TRACE_MMAP_LOCK_EVENT(acquire_returned, mm, 0, write, ret);
> > +       return ret;
> > +}
> > +
> > +static inline void lock_impl(struct mm_struct *mm,
> > +                            void (*lock)(struct rw_semaphore *), bool write)
> > +{
> > +       u64 start_time_ns;
> > +
> > +       trace_start_locking(mm, write);
> > +       start_time_ns = sched_clock();
> > +       lock(&mm->mmap_lock);
> > +       trace_acquire_returned(mm, start_time_ns, write, true);
> > +}
> > +
> > +static inline int lock_return_impl(struct mm_struct *mm,
> > +                                  int (*lock)(struct rw_semaphore *),
> > +                                  bool write)
> > +{
> > +       u64 start_time_ns;
> > +       int ret;
> > +
> > +       trace_start_locking(mm, write);
> > +       start_time_ns = sched_clock();
> > +       ret = lock(&mm->mmap_lock);
> > +       trace_acquire_returned(mm, start_time_ns, write, ret == 0);
> > +       return ret;
> > +}
> > +
> > +static inline void unlock_impl(struct mm_struct *mm,
> > +                              void (*unlock)(struct rw_semaphore *),
> > +                              bool write)
> > +{
> > +       unlock(&mm->mmap_lock);
> > +       trace_released(mm, write);
> > +}
> > +
> > +void mmap_init_lock(struct mm_struct *mm)
> > +{
> > +       init_rwsem(&mm->mmap_lock);
> > +}
> > +
> > +void mmap_write_lock(struct mm_struct *mm)
> > +{
> > +       lock_impl(mm, down_write, true);
> > +}
> > +EXPORT_SYMBOL(mmap_write_lock);
> > +
> > +void mmap_write_lock_nested(struct mm_struct *mm, int subclass)
> > +{
> > +       u64 start_time_ns;
> > +
> > +       trace_start_locking(mm, true);
> > +       start_time_ns = sched_clock();
> > +       down_write_nested(&mm->mmap_lock, subclass);
> > +       trace_acquire_returned(mm, start_time_ns, true, true);
> > +}
> > +EXPORT_SYMBOL(mmap_write_lock_nested);
> > +
> > +int mmap_write_lock_killable(struct mm_struct *mm)
> > +{
> > +       return lock_return_impl(mm, down_write_killable, true);
> > +}
> > +EXPORT_SYMBOL(mmap_write_lock_killable);
> > +
> > +bool mmap_write_trylock(struct mm_struct *mm)
> > +{
> > +       return trylock_impl(mm, down_write_trylock, true);
> > +}
> > +EXPORT_SYMBOL(mmap_write_trylock);
> > +
> > +void mmap_write_unlock(struct mm_struct *mm)
> > +{
> > +       unlock_impl(mm, up_write, true);
> > +}
> > +EXPORT_SYMBOL(mmap_write_unlock);
> > +
> > +void mmap_write_downgrade(struct mm_struct *mm)
> > +{
> > +       downgrade_write(&mm->mmap_lock);
> > +       TRACE_MMAP_LOCK_EVENT(acquire_returned, mm, 0, false, true);
> > +}
> > +EXPORT_SYMBOL(mmap_write_downgrade);
> > +
> > +void mmap_read_lock(struct mm_struct *mm)
> > +{
> > +       lock_impl(mm, down_read, false);
> > +}
> > +EXPORT_SYMBOL(mmap_read_lock);
> > +
> > +int mmap_read_lock_killable(struct mm_struct *mm)
> > +{
> > +       return lock_return_impl(mm, down_read_killable, false);
> > +}
> > +EXPORT_SYMBOL(mmap_read_lock_killable);
> > +
> > +bool mmap_read_trylock(struct mm_struct *mm)
> > +{
> > +       return trylock_impl(mm, down_read_trylock, false);
> > +}
> > +EXPORT_SYMBOL(mmap_read_trylock);
> > +
> > +void mmap_read_unlock(struct mm_struct *mm)
> > +{
> > +       unlock_impl(mm, up_read, false);
> > +}
> > +EXPORT_SYMBOL(mmap_read_unlock);
> > +
> > +bool mmap_read_trylock_non_owner(struct mm_struct *mm)
> > +{
> > +       if (mmap_read_trylock(mm)) {
> > +               rwsem_release(&mm->mmap_lock.dep_map, _RET_IP_);
> > +               trace_released(mm, false);
> > +               return true;
> > +       }
> > +       return false;
> > +}
> > +EXPORT_SYMBOL(mmap_read_trylock_non_owner);
> > +
> > +void mmap_read_unlock_non_owner(struct mm_struct *mm)
> > +{
> > +       up_read_non_owner(&mm->mmap_lock);
> > +       trace_released(mm, false);
> > +}
> > +EXPORT_SYMBOL(mmap_read_unlock_non_owner);
> > +
> > +void mmap_assert_locked(struct mm_struct *mm)
> > +{
> > +       lockdep_assert_held(&mm->mmap_lock);
> > +       VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_lock), mm);
> > +}
> > +EXPORT_SYMBOL(mmap_assert_locked);
> > +
> > +void mmap_assert_write_locked(struct mm_struct *mm)
> > +{
> > +       lockdep_assert_held_write(&mm->mmap_lock);
> > +       VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_lock), mm);
> > +}
> > +EXPORT_SYMBOL(mmap_assert_write_locked);
> > --
> > 2.28.0.618.gf4bc123cb7-goog
> >
>
>
> --
> Thanks
> Yafang

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] mmap_lock: add tracepoints around lock acquisition
  2020-09-21 16:53   ` Axel Rasmussen
@ 2020-09-22  4:09     ` Yafang Shao
  2020-09-22 16:39       ` Axel Rasmussen
  2020-09-22 16:51       ` Steven Rostedt
  0 siblings, 2 replies; 14+ messages in thread
From: Yafang Shao @ 2020-09-22  4:09 UTC (permalink / raw)
  To: Axel Rasmussen
  Cc: Steven Rostedt, Ingo Molnar, Andrew Morton, Vlastimil Babka,
	Michel Lespinasse, Daniel Jordan, Davidlohr Bueso, LKML,
	Linux MM

On Tue, Sep 22, 2020 at 12:53 AM Axel Rasmussen
<axelrasmussen@google.com> wrote:
>
> On Sun, Sep 20, 2020 at 9:58 PM Yafang Shao <laoar.shao@gmail.com> wrote:
> >
> > On Fri, Sep 18, 2020 at 2:13 AM Axel Rasmussen <axelrasmussen@google.com> wrote:
> > >
> > > The goal of these tracepoints is to be able to debug lock contention
> > > issues. This lock is acquired on most (all?) mmap / munmap / page fault
> > > operations, so a multi-threaded process which does a lot of these can
> > > experience significant contention.
> > >
> > > We trace just before we start acquisition, when the acquisition returns
> > > (whether it succeeded or not), and when the lock is released (or
> > > downgraded). The events are broken out by lock type (read / write).
> > >
> > > The events are also broken out by memcg path. For container-based
> > > workloads, users often think of several processes in a memcg as a single
> > > logical "task", so collecting statistics at this level is useful.
> > >
> > > These events *do not* include latency bucket information, which means
> > > for a proper latency histogram users will need to use BPF instead of
> > > event histograms. The benefit we get from this is simpler code.
> > >
> > > This patch is a no-op if the Kconfig option is not enabled. If it is,
> > > tracepoints are still disabled by default (configurable at runtime);
> > > the only fixed cost here is un-inlining a few functions. As best as
> > > I've been able to measure, the overhead this introduces is a small
> > > fraction of 1%. Actually hooking up the tracepoints to BPF introduces
> > > additional overhead, depending on exactly what the BPF program is
> > > collecting.
> >
> > Are there any methods to avoid un-inlining these wrappers ?
> >
> > For example,
> > // include/linux/mmap_lock.h
> >
> > void mmap_lock_start_trace_wrapper();
> > void mmap_lock_acquire_trace_wrapper();
> >
> > static inline void mmap_write_lock(struct mm_struct *mm)
> > {
> >     mmap_lock_start_trace_wrapper();
> >     down_write(&mm->mmap_lock);
> >     mmap_lock_acquire_trace_wrapper();
> > }
> >
> > // mm/mmap_lock.c
> > void mmap_lock_start_trace_wrapper()
> > {
> >     trace_mmap_lock_start();
> > }
> >
> > void mmap_lock_start_trace_wrapper()
> > {
> >     trace_mmap_lock_acquired();
> > }
>
> We can do something like that, but I don't think it would end up being better.
>
> At the end of the day, because the trace stuff cannot be in the
> header, we have to add an extra function call one way or the other.
> This would just move the call one step further down the call stack.
> So, I don't think it would affect performance in the
> CONFIG_MMAP_LOCK_STATS + tracepoints not enabled at runtime case.
>

Right, it seems we have to add an extra function call.

> Also the wrappers aren't quite so simple as this, they need some
> parameters to work. (the struct mm_struct, whether it was a read or a
> write lock, and whether or not the lock operation succeeded), so it
> would mean adding more inlined code, which I think adds up to be a
> nontrivial amount since these wrappers are called so often in the
> kernel.
>
> If you feel strongly, let me know and I can send a version as you
> describe and we can compare the two.
>

These tracepoints will be less useful if we have to turn on the config
to enable it.
I don't mind implementing it that way if we can't optimize it.

Maybe Steven can give some suggestions, Steven ?

> >
> >
> >
> > > ---
> > >  include/linux/mmap_lock.h        |  28 +++-
> > >  include/trace/events/mmap_lock.h |  73 ++++++++++
> > >  mm/Kconfig                       |  17 +++
> > >  mm/Makefile                      |   1 +
> > >  mm/mmap_lock.c                   | 224 +++++++++++++++++++++++++++++++
> > >  5 files changed, 342 insertions(+), 1 deletion(-)
> > >  create mode 100644 include/trace/events/mmap_lock.h
> > >  create mode 100644 mm/mmap_lock.c
> > >
> > > diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h
> > > index 0707671851a8..d12aa2ff6c05 100644
> > > --- a/include/linux/mmap_lock.h
> > > +++ b/include/linux/mmap_lock.h
> > > @@ -1,11 +1,35 @@
> > >  #ifndef _LINUX_MMAP_LOCK_H
> > >  #define _LINUX_MMAP_LOCK_H
> > >
> > > +#include <linux/lockdep.h>
> > > +#include <linux/mm_types.h>
> > >  #include <linux/mmdebug.h>
> > > +#include <linux/rwsem.h>
> > > +#include <linux/types.h>
> > >
> > >  #define MMAP_LOCK_INITIALIZER(name) \
> > >         .mmap_lock = __RWSEM_INITIALIZER((name).mmap_lock),
> > >
> > > +#ifdef CONFIG_MMAP_LOCK_STATS
> > > +
> > > +void mmap_init_lock(struct mm_struct *mm);
> > > +void mmap_write_lock(struct mm_struct *mm);
> > > +void mmap_write_lock_nested(struct mm_struct *mm, int subclass);
> > > +int mmap_write_lock_killable(struct mm_struct *mm);
> > > +bool mmap_write_trylock(struct mm_struct *mm);
> > > +void mmap_write_unlock(struct mm_struct *mm);
> > > +void mmap_write_downgrade(struct mm_struct *mm);
> > > +void mmap_read_lock(struct mm_struct *mm);
> > > +int mmap_read_lock_killable(struct mm_struct *mm);
> > > +bool mmap_read_trylock(struct mm_struct *mm);
> > > +void mmap_read_unlock(struct mm_struct *mm);
> > > +bool mmap_read_trylock_non_owner(struct mm_struct *mm);
> > > +void mmap_read_unlock_non_owner(struct mm_struct *mm);
> > > +void mmap_assert_locked(struct mm_struct *mm);
> > > +void mmap_assert_write_locked(struct mm_struct *mm);
> > > +
> > > +#else /* !CONFIG_MMAP_LOCK_STATS */
> > > +
> > >  static inline void mmap_init_lock(struct mm_struct *mm)
> > >  {
> > >         init_rwsem(&mm->mmap_lock);
> > > @@ -63,7 +87,7 @@ static inline void mmap_read_unlock(struct mm_struct *mm)
> > >
> > >  static inline bool mmap_read_trylock_non_owner(struct mm_struct *mm)
> > >  {
> > > -       if (down_read_trylock(&mm->mmap_lock)) {
> > > +       if (mmap_read_trylock(mm)) {
> > >                 rwsem_release(&mm->mmap_lock.dep_map, _RET_IP_);
> > >                 return true;
> > >         }
> > > @@ -87,4 +111,6 @@ static inline void mmap_assert_write_locked(struct mm_struct *mm)
> > >         VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_lock), mm);
> > >  }
> > >
> > > +#endif /* CONFIG_MMAP_LOCK_STATS */
> > > +
> > >  #endif /* _LINUX_MMAP_LOCK_H */
> > > diff --git a/include/trace/events/mmap_lock.h b/include/trace/events/mmap_lock.h
> > > new file mode 100644
> > > index 000000000000..549c662e6ed8
> > > --- /dev/null
> > > +++ b/include/trace/events/mmap_lock.h
> > > @@ -0,0 +1,73 @@
> > > +/* SPDX-License-Identifier: GPL-2.0 */
> > > +#undef TRACE_SYSTEM
> > > +#define TRACE_SYSTEM mmap_lock
> > > +
> > > +#if !defined(_TRACE_MMAP_LOCK_H) || defined(TRACE_HEADER_MULTI_READ)
> > > +#define _TRACE_MMAP_LOCK_H
> > > +
> > > +#include <linux/tracepoint.h>
> > > +#include <linux/types.h>
> > > +
> > > +struct mm_struct;
> > > +
> > > +DECLARE_EVENT_CLASS(
> > > +       mmap_lock_template,
> > > +
> > > +       TP_PROTO(struct mm_struct *mm, const char *memcg_path, u64 duration,
> > > +               bool write, bool success),
> > > +
> > > +       TP_ARGS(mm, memcg_path, duration, write, success),
> > > +
> > > +       TP_STRUCT__entry(
> > > +               __field(struct mm_struct *, mm)
> > > +               __string(memcg_path, memcg_path)
> > > +               __field(u64, duration)
> > > +               __field(bool, write)
> > > +               __field(bool, success)
> > > +       ),
> > > +
> > > +       TP_fast_assign(
> > > +               __entry->mm = mm;
> > > +               __assign_str(memcg_path, memcg_path);
> > > +               __entry->duration = duration;
> > > +               __entry->write = write;
> > > +               __entry->success = success;
> > > +       ),
> > > +
> > > +       TP_printk(
> > > +               "mm=%p memcg_path=%s duration=%llu write=%s success=%s\n",
> > > +               __entry->mm,
> > > +               __get_str(memcg_path),
> > > +               __entry->duration,
> > > +               __entry->write ? "true" : "false",
> > > +               __entry->success ? "true" : "false")
> > > +       );
> > > +
> > > +DEFINE_EVENT(mmap_lock_template, mmap_lock_start_locking,
> > > +
> > > +       TP_PROTO(struct mm_struct *mm, const char *memcg_path, u64 duration,
> > > +               bool write, bool success),
> > > +
> > > +       TP_ARGS(mm, memcg_path, duration, write, success)
> > > +);
> > > +
> > > +DEFINE_EVENT(mmap_lock_template, mmap_lock_acquire_returned,
> > > +
> > > +       TP_PROTO(struct mm_struct *mm, const char *memcg_path, u64 duration,
> > > +               bool write, bool success),
> > > +
> > > +       TP_ARGS(mm, memcg_path, duration, write, success)
> > > +);
> > > +
> > > +DEFINE_EVENT(mmap_lock_template, mmap_lock_released,
> > > +
> > > +       TP_PROTO(struct mm_struct *mm, const char *memcg_path, u64 duration,
> > > +               bool write, bool success),
> > > +
> > > +       TP_ARGS(mm, memcg_path, duration, write, success)
> > > +);
> > > +
> > > +#endif /* _TRACE_MMAP_LOCK_H */
> > > +
> > > +/* This part must be outside protection */
> > > +#include <trace/define_trace.h>
> > > diff --git a/mm/Kconfig b/mm/Kconfig
> > > index 6c974888f86f..b602df8bcee0 100644
> > > --- a/mm/Kconfig
> > > +++ b/mm/Kconfig
> > > @@ -818,6 +818,23 @@ config DEVICE_PRIVATE
> > >  config FRAME_VECTOR
> > >         bool
> > >
> > > +config MMAP_LOCK_STATS
> > > +       bool "mmap_lock stats / instrumentation"
> > > +       select HISTOGRAM
> > > +       default n
> > > +
> > > +       help
> > > +         Enables tracepoints around mmap_lock (start aquiring, acquire
> > > +         returned, and released), which are off by default + controlled at
> > > +         runtime. These can be used for deeper debugging of contention
> > > +         issues, via e.g. BPF.
> > > +
> > > +         This option has a small (small fraction of 1%) fixed overhead
> > > +         even if tracepoints aren't actually in use at runtime, since it
> > > +         requires un-inlining some functions.
> > > +
> > > +         If unsure, say "n".
> > > +
> > >  config ARCH_USES_HIGH_VMA_FLAGS
> > >         bool
> > >  config ARCH_HAS_PKEYS
> > > diff --git a/mm/Makefile b/mm/Makefile
> > > index d5649f1c12c0..eb6ed855a002 100644
> > > --- a/mm/Makefile
> > > +++ b/mm/Makefile
> > > @@ -121,3 +121,4 @@ obj-$(CONFIG_MEMFD_CREATE) += memfd.o
> > >  obj-$(CONFIG_MAPPING_DIRTY_HELPERS) += mapping_dirty_helpers.o
> > >  obj-$(CONFIG_PTDUMP_CORE) += ptdump.o
> > >  obj-$(CONFIG_PAGE_REPORTING) += page_reporting.o
> > > +obj-$(CONFIG_MMAP_LOCK_STATS) += mmap_lock.o
> > > diff --git a/mm/mmap_lock.c b/mm/mmap_lock.c
> > > new file mode 100644
> > > index 000000000000..1624f90164c0
> > > --- /dev/null
> > > +++ b/mm/mmap_lock.c
> > > @@ -0,0 +1,224 @@
> > > +// SPDX-License-Identifier: GPL-2.0
> > > +#define CREATE_TRACE_POINTS
> > > +#include <trace/events/mmap_lock.h>
> > > +
> > > +#include <linux/cgroup.h>
> > > +#include <linux/memcontrol.h>
> > > +#include <linux/mmap_lock.h>
> > > +#include <linux/percpu.h>
> > > +#include <linux/smp.h>
> > > +#include <linux/trace_events.h>
> > > +#include <linux/sched/clock.h>
> > > +
> > > +#ifdef CONFIG_MEMCG
> > > +
> > > +DEFINE_PER_CPU(char[MAX_FILTER_STR_VAL], trace_memcg_path);
> > > +
> > > +/*
> > > + * Write the given mm_struct's memcg path to a percpu buffer, and return a
> > > + * pointer to it. If the path cannot be determined, the buffer will contain the
> > > + * empty string.
> > > + *
> > > + * Note: buffers are allocated per-cpu to avoid locking, so preemption must be
> > > + * disabled by the caller before calling us, and re-enabled only after the
> > > + * caller is done with the pointer.
> > > + */
> > > +static const char *get_mm_memcg_path(struct mm_struct *mm)
> > > +{
> > > +       struct mem_cgroup *memcg = get_mem_cgroup_from_mm(mm);
> > > +
> > > +       if (memcg != NULL && likely(memcg->css.cgroup != NULL)) {
> > > +               char *buf = this_cpu_ptr(trace_memcg_path);
> > > +
> > > +               cgroup_path(memcg->css.cgroup, buf, MAX_FILTER_STR_VAL);
> > > +               return buf;
> > > +       }
> > > +       return "";
> > > +}
> > > +
> > > +#define TRACE_MMAP_LOCK_EVENT(type, mm, ...)                                   \
> > > +       do {                                                                   \
> > > +               if (trace_mmap_lock_##type##_enabled()) {                      \
> > > +                       get_cpu();                                             \
> > > +                       trace_mmap_lock_##type(mm, get_mm_memcg_path(mm),      \
> > > +                                              ##__VA_ARGS__);                 \
> > > +                       put_cpu();                                             \
> > > +               }                                                              \
> > > +       } while (0)
> > > +
> > > +#else /* !CONFIG_MEMCG */
> > > +
> > > +#define TRACE_MMAP_LOCK_EVENT(type, mm, ...)                                   \
> > > +       trace_mmap_lock_##type(mm, "", ##__VA_ARGS__)
> > > +
> > > +#endif /* CONFIG_MEMCG */
> > > +
> > > +/*
> > > + * Trace calls must be in a separate file, as otherwise there's a circuclar
> > > + * dependency between linux/mmap_lock.h and trace/events/mmap_lock.h.
> > > + */
> > > +
> > > +static void trace_start_locking(struct mm_struct *mm, bool write)
> > > +{
> > > +       TRACE_MMAP_LOCK_EVENT(start_locking, mm, 0, write, true);
> > > +}
> > > +
> > > +static void trace_acquire_returned(struct mm_struct *mm, u64 start_time_ns,
> > > +                                  bool write, bool success)
> > > +{
> > > +       TRACE_MMAP_LOCK_EVENT(acquire_returned, mm,
> > > +                             sched_clock() - start_time_ns, write, success);
> > > +}
> > > +
> > > +static void trace_released(struct mm_struct *mm, bool write)
> > > +{
> > > +       TRACE_MMAP_LOCK_EVENT(released, mm, 0, write, true);
> > > +}
> > > +
> > > +static bool trylock_impl(struct mm_struct *mm,
> > > +                        int (*trylock)(struct rw_semaphore *), bool write)
> > > +{
> > > +       bool ret;
> > > +
> > > +       trace_start_locking(mm, write);
> > > +       ret = trylock(&mm->mmap_lock) != 0;
> > > +       /* Avoid calling sched_clock() for trylocks; assume duration = 0. */
> > > +       TRACE_MMAP_LOCK_EVENT(acquire_returned, mm, 0, write, ret);
> > > +       return ret;
> > > +}
> > > +
> > > +static inline void lock_impl(struct mm_struct *mm,
> > > +                            void (*lock)(struct rw_semaphore *), bool write)
> > > +{
> > > +       u64 start_time_ns;
> > > +
> > > +       trace_start_locking(mm, write);
> > > +       start_time_ns = sched_clock();
> > > +       lock(&mm->mmap_lock);
> > > +       trace_acquire_returned(mm, start_time_ns, write, true);
> > > +}
> > > +
> > > +static inline int lock_return_impl(struct mm_struct *mm,
> > > +                                  int (*lock)(struct rw_semaphore *),
> > > +                                  bool write)
> > > +{
> > > +       u64 start_time_ns;
> > > +       int ret;
> > > +
> > > +       trace_start_locking(mm, write);
> > > +       start_time_ns = sched_clock();
> > > +       ret = lock(&mm->mmap_lock);
> > > +       trace_acquire_returned(mm, start_time_ns, write, ret == 0);
> > > +       return ret;
> > > +}
> > > +
> > > +static inline void unlock_impl(struct mm_struct *mm,
> > > +                              void (*unlock)(struct rw_semaphore *),
> > > +                              bool write)
> > > +{
> > > +       unlock(&mm->mmap_lock);
> > > +       trace_released(mm, write);
> > > +}
> > > +
> > > +void mmap_init_lock(struct mm_struct *mm)
> > > +{
> > > +       init_rwsem(&mm->mmap_lock);
> > > +}
> > > +
> > > +void mmap_write_lock(struct mm_struct *mm)
> > > +{
> > > +       lock_impl(mm, down_write, true);
> > > +}
> > > +EXPORT_SYMBOL(mmap_write_lock);
> > > +
> > > +void mmap_write_lock_nested(struct mm_struct *mm, int subclass)
> > > +{
> > > +       u64 start_time_ns;
> > > +
> > > +       trace_start_locking(mm, true);
> > > +       start_time_ns = sched_clock();
> > > +       down_write_nested(&mm->mmap_lock, subclass);
> > > +       trace_acquire_returned(mm, start_time_ns, true, true);
> > > +}
> > > +EXPORT_SYMBOL(mmap_write_lock_nested);
> > > +
> > > +int mmap_write_lock_killable(struct mm_struct *mm)
> > > +{
> > > +       return lock_return_impl(mm, down_write_killable, true);
> > > +}
> > > +EXPORT_SYMBOL(mmap_write_lock_killable);
> > > +
> > > +bool mmap_write_trylock(struct mm_struct *mm)
> > > +{
> > > +       return trylock_impl(mm, down_write_trylock, true);
> > > +}
> > > +EXPORT_SYMBOL(mmap_write_trylock);
> > > +
> > > +void mmap_write_unlock(struct mm_struct *mm)
> > > +{
> > > +       unlock_impl(mm, up_write, true);
> > > +}
> > > +EXPORT_SYMBOL(mmap_write_unlock);
> > > +
> > > +void mmap_write_downgrade(struct mm_struct *mm)
> > > +{
> > > +       downgrade_write(&mm->mmap_lock);
> > > +       TRACE_MMAP_LOCK_EVENT(acquire_returned, mm, 0, false, true);
> > > +}
> > > +EXPORT_SYMBOL(mmap_write_downgrade);
> > > +
> > > +void mmap_read_lock(struct mm_struct *mm)
> > > +{
> > > +       lock_impl(mm, down_read, false);
> > > +}
> > > +EXPORT_SYMBOL(mmap_read_lock);
> > > +
> > > +int mmap_read_lock_killable(struct mm_struct *mm)
> > > +{
> > > +       return lock_return_impl(mm, down_read_killable, false);
> > > +}
> > > +EXPORT_SYMBOL(mmap_read_lock_killable);
> > > +
> > > +bool mmap_read_trylock(struct mm_struct *mm)
> > > +{
> > > +       return trylock_impl(mm, down_read_trylock, false);
> > > +}
> > > +EXPORT_SYMBOL(mmap_read_trylock);
> > > +
> > > +void mmap_read_unlock(struct mm_struct *mm)
> > > +{
> > > +       unlock_impl(mm, up_read, false);
> > > +}
> > > +EXPORT_SYMBOL(mmap_read_unlock);
> > > +
> > > +bool mmap_read_trylock_non_owner(struct mm_struct *mm)
> > > +{
> > > +       if (mmap_read_trylock(mm)) {
> > > +               rwsem_release(&mm->mmap_lock.dep_map, _RET_IP_);
> > > +               trace_released(mm, false);
> > > +               return true;
> > > +       }
> > > +       return false;
> > > +}
> > > +EXPORT_SYMBOL(mmap_read_trylock_non_owner);
> > > +
> > > +void mmap_read_unlock_non_owner(struct mm_struct *mm)
> > > +{
> > > +       up_read_non_owner(&mm->mmap_lock);
> > > +       trace_released(mm, false);
> > > +}
> > > +EXPORT_SYMBOL(mmap_read_unlock_non_owner);
> > > +
> > > +void mmap_assert_locked(struct mm_struct *mm)
> > > +{
> > > +       lockdep_assert_held(&mm->mmap_lock);
> > > +       VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_lock), mm);
> > > +}
> > > +EXPORT_SYMBOL(mmap_assert_locked);
> > > +
> > > +void mmap_assert_write_locked(struct mm_struct *mm)
> > > +{
> > > +       lockdep_assert_held_write(&mm->mmap_lock);
> > > +       VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_lock), mm);
> > > +}
> > > +EXPORT_SYMBOL(mmap_assert_write_locked);
> > > --
> > > 2.28.0.618.gf4bc123cb7-goog
> > >
> >
> >
> > --
> > Thanks
> > Yafang



-- 
Thanks
Yafang

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] mmap_lock: add tracepoints around lock acquisition
  2020-09-22  4:09     ` Yafang Shao
@ 2020-09-22 16:39       ` Axel Rasmussen
  2020-09-22 16:51       ` Steven Rostedt
  1 sibling, 0 replies; 14+ messages in thread
From: Axel Rasmussen @ 2020-09-22 16:39 UTC (permalink / raw)
  To: Yafang Shao
  Cc: Steven Rostedt, Ingo Molnar, Andrew Morton, Vlastimil Babka,
	Michel Lespinasse, Daniel Jordan, Davidlohr Bueso, LKML,
	Linux MM

On Mon, Sep 21, 2020 at 9:09 PM Yafang Shao <laoar.shao@gmail.com> wrote:
>
> On Tue, Sep 22, 2020 at 12:53 AM Axel Rasmussen
> <axelrasmussen@google.com> wrote:
> >
> > On Sun, Sep 20, 2020 at 9:58 PM Yafang Shao <laoar.shao@gmail.com> wrote:
> > >
> > > On Fri, Sep 18, 2020 at 2:13 AM Axel Rasmussen <axelrasmussen@google.com> wrote:
> > > >
> > > > The goal of these tracepoints is to be able to debug lock contention
> > > > issues. This lock is acquired on most (all?) mmap / munmap / page fault
> > > > operations, so a multi-threaded process which does a lot of these can
> > > > experience significant contention.
> > > >
> > > > We trace just before we start acquisition, when the acquisition returns
> > > > (whether it succeeded or not), and when the lock is released (or
> > > > downgraded). The events are broken out by lock type (read / write).
> > > >
> > > > The events are also broken out by memcg path. For container-based
> > > > workloads, users often think of several processes in a memcg as a single
> > > > logical "task", so collecting statistics at this level is useful.
> > > >
> > > > These events *do not* include latency bucket information, which means
> > > > for a proper latency histogram users will need to use BPF instead of
> > > > event histograms. The benefit we get from this is simpler code.
> > > >
> > > > This patch is a no-op if the Kconfig option is not enabled. If it is,
> > > > tracepoints are still disabled by default (configurable at runtime);
> > > > the only fixed cost here is un-inlining a few functions. As best as
> > > > I've been able to measure, the overhead this introduces is a small
> > > > fraction of 1%. Actually hooking up the tracepoints to BPF introduces
> > > > additional overhead, depending on exactly what the BPF program is
> > > > collecting.
> > >
> > > Are there any methods to avoid un-inlining these wrappers ?
> > >
> > > For example,
> > > // include/linux/mmap_lock.h
> > >
> > > void mmap_lock_start_trace_wrapper();
> > > void mmap_lock_acquire_trace_wrapper();
> > >
> > > static inline void mmap_write_lock(struct mm_struct *mm)
> > > {
> > >     mmap_lock_start_trace_wrapper();
> > >     down_write(&mm->mmap_lock);
> > >     mmap_lock_acquire_trace_wrapper();
> > > }
> > >
> > > // mm/mmap_lock.c
> > > void mmap_lock_start_trace_wrapper()
> > > {
> > >     trace_mmap_lock_start();
> > > }
> > >
> > > void mmap_lock_start_trace_wrapper()
> > > {
> > >     trace_mmap_lock_acquired();
> > > }
> >
> > We can do something like that, but I don't think it would end up being better.
> >
> > At the end of the day, because the trace stuff cannot be in the
> > header, we have to add an extra function call one way or the other.
> > This would just move the call one step further down the call stack.
> > So, I don't think it would affect performance in the
> > CONFIG_MMAP_LOCK_STATS + tracepoints not enabled at runtime case.
> >
>
> Right, it seems we have to add an extra function call.
>
> > Also the wrappers aren't quite so simple as this, they need some
> > parameters to work. (the struct mm_struct, whether it was a read or a
> > write lock, and whether or not the lock operation succeeded), so it
> > would mean adding more inlined code, which I think adds up to be a
> > nontrivial amount since these wrappers are called so often in the
> > kernel.
> >
> > If you feel strongly, let me know and I can send a version as you
> > describe and we can compare the two.
> >
>
> These tracepoints will be less useful if we have to turn on the config
> to enable it.
> I don't mind implementing it that way if we can't optimize it.
>
> Maybe Steven can give some suggestions, Steven ?

Also, FWIW, the overhead this introduces is really quite small. I
mean, as far as I can tell it's immeasurably small - it's lost in the
normal variance I see when running a test a series of times. Perhaps
I'm being overly paranoid introducing a new CONFIG_ option, and should
just enable this always?

>
> > >
> > >
> > >
> > > > ---
> > > >  include/linux/mmap_lock.h        |  28 +++-
> > > >  include/trace/events/mmap_lock.h |  73 ++++++++++
> > > >  mm/Kconfig                       |  17 +++
> > > >  mm/Makefile                      |   1 +
> > > >  mm/mmap_lock.c                   | 224 +++++++++++++++++++++++++++++++
> > > >  5 files changed, 342 insertions(+), 1 deletion(-)
> > > >  create mode 100644 include/trace/events/mmap_lock.h
> > > >  create mode 100644 mm/mmap_lock.c
> > > >
> > > > diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h
> > > > index 0707671851a8..d12aa2ff6c05 100644
> > > > --- a/include/linux/mmap_lock.h
> > > > +++ b/include/linux/mmap_lock.h
> > > > @@ -1,11 +1,35 @@
> > > >  #ifndef _LINUX_MMAP_LOCK_H
> > > >  #define _LINUX_MMAP_LOCK_H
> > > >
> > > > +#include <linux/lockdep.h>
> > > > +#include <linux/mm_types.h>
> > > >  #include <linux/mmdebug.h>
> > > > +#include <linux/rwsem.h>
> > > > +#include <linux/types.h>
> > > >
> > > >  #define MMAP_LOCK_INITIALIZER(name) \
> > > >         .mmap_lock = __RWSEM_INITIALIZER((name).mmap_lock),
> > > >
> > > > +#ifdef CONFIG_MMAP_LOCK_STATS
> > > > +
> > > > +void mmap_init_lock(struct mm_struct *mm);
> > > > +void mmap_write_lock(struct mm_struct *mm);
> > > > +void mmap_write_lock_nested(struct mm_struct *mm, int subclass);
> > > > +int mmap_write_lock_killable(struct mm_struct *mm);
> > > > +bool mmap_write_trylock(struct mm_struct *mm);
> > > > +void mmap_write_unlock(struct mm_struct *mm);
> > > > +void mmap_write_downgrade(struct mm_struct *mm);
> > > > +void mmap_read_lock(struct mm_struct *mm);
> > > > +int mmap_read_lock_killable(struct mm_struct *mm);
> > > > +bool mmap_read_trylock(struct mm_struct *mm);
> > > > +void mmap_read_unlock(struct mm_struct *mm);
> > > > +bool mmap_read_trylock_non_owner(struct mm_struct *mm);
> > > > +void mmap_read_unlock_non_owner(struct mm_struct *mm);
> > > > +void mmap_assert_locked(struct mm_struct *mm);
> > > > +void mmap_assert_write_locked(struct mm_struct *mm);
> > > > +
> > > > +#else /* !CONFIG_MMAP_LOCK_STATS */
> > > > +
> > > >  static inline void mmap_init_lock(struct mm_struct *mm)
> > > >  {
> > > >         init_rwsem(&mm->mmap_lock);
> > > > @@ -63,7 +87,7 @@ static inline void mmap_read_unlock(struct mm_struct *mm)
> > > >
> > > >  static inline bool mmap_read_trylock_non_owner(struct mm_struct *mm)
> > > >  {
> > > > -       if (down_read_trylock(&mm->mmap_lock)) {
> > > > +       if (mmap_read_trylock(mm)) {
> > > >                 rwsem_release(&mm->mmap_lock.dep_map, _RET_IP_);
> > > >                 return true;
> > > >         }
> > > > @@ -87,4 +111,6 @@ static inline void mmap_assert_write_locked(struct mm_struct *mm)
> > > >         VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_lock), mm);
> > > >  }
> > > >
> > > > +#endif /* CONFIG_MMAP_LOCK_STATS */
> > > > +
> > > >  #endif /* _LINUX_MMAP_LOCK_H */
> > > > diff --git a/include/trace/events/mmap_lock.h b/include/trace/events/mmap_lock.h
> > > > new file mode 100644
> > > > index 000000000000..549c662e6ed8
> > > > --- /dev/null
> > > > +++ b/include/trace/events/mmap_lock.h
> > > > @@ -0,0 +1,73 @@
> > > > +/* SPDX-License-Identifier: GPL-2.0 */
> > > > +#undef TRACE_SYSTEM
> > > > +#define TRACE_SYSTEM mmap_lock
> > > > +
> > > > +#if !defined(_TRACE_MMAP_LOCK_H) || defined(TRACE_HEADER_MULTI_READ)
> > > > +#define _TRACE_MMAP_LOCK_H
> > > > +
> > > > +#include <linux/tracepoint.h>
> > > > +#include <linux/types.h>
> > > > +
> > > > +struct mm_struct;
> > > > +
> > > > +DECLARE_EVENT_CLASS(
> > > > +       mmap_lock_template,
> > > > +
> > > > +       TP_PROTO(struct mm_struct *mm, const char *memcg_path, u64 duration,
> > > > +               bool write, bool success),
> > > > +
> > > > +       TP_ARGS(mm, memcg_path, duration, write, success),
> > > > +
> > > > +       TP_STRUCT__entry(
> > > > +               __field(struct mm_struct *, mm)
> > > > +               __string(memcg_path, memcg_path)
> > > > +               __field(u64, duration)
> > > > +               __field(bool, write)
> > > > +               __field(bool, success)
> > > > +       ),
> > > > +
> > > > +       TP_fast_assign(
> > > > +               __entry->mm = mm;
> > > > +               __assign_str(memcg_path, memcg_path);
> > > > +               __entry->duration = duration;
> > > > +               __entry->write = write;
> > > > +               __entry->success = success;
> > > > +       ),
> > > > +
> > > > +       TP_printk(
> > > > +               "mm=%p memcg_path=%s duration=%llu write=%s success=%s\n",
> > > > +               __entry->mm,
> > > > +               __get_str(memcg_path),
> > > > +               __entry->duration,
> > > > +               __entry->write ? "true" : "false",
> > > > +               __entry->success ? "true" : "false")
> > > > +       );
> > > > +
> > > > +DEFINE_EVENT(mmap_lock_template, mmap_lock_start_locking,
> > > > +
> > > > +       TP_PROTO(struct mm_struct *mm, const char *memcg_path, u64 duration,
> > > > +               bool write, bool success),
> > > > +
> > > > +       TP_ARGS(mm, memcg_path, duration, write, success)
> > > > +);
> > > > +
> > > > +DEFINE_EVENT(mmap_lock_template, mmap_lock_acquire_returned,
> > > > +
> > > > +       TP_PROTO(struct mm_struct *mm, const char *memcg_path, u64 duration,
> > > > +               bool write, bool success),
> > > > +
> > > > +       TP_ARGS(mm, memcg_path, duration, write, success)
> > > > +);
> > > > +
> > > > +DEFINE_EVENT(mmap_lock_template, mmap_lock_released,
> > > > +
> > > > +       TP_PROTO(struct mm_struct *mm, const char *memcg_path, u64 duration,
> > > > +               bool write, bool success),
> > > > +
> > > > +       TP_ARGS(mm, memcg_path, duration, write, success)
> > > > +);
> > > > +
> > > > +#endif /* _TRACE_MMAP_LOCK_H */
> > > > +
> > > > +/* This part must be outside protection */
> > > > +#include <trace/define_trace.h>
> > > > diff --git a/mm/Kconfig b/mm/Kconfig
> > > > index 6c974888f86f..b602df8bcee0 100644
> > > > --- a/mm/Kconfig
> > > > +++ b/mm/Kconfig
> > > > @@ -818,6 +818,23 @@ config DEVICE_PRIVATE
> > > >  config FRAME_VECTOR
> > > >         bool
> > > >
> > > > +config MMAP_LOCK_STATS
> > > > +       bool "mmap_lock stats / instrumentation"
> > > > +       select HISTOGRAM
> > > > +       default n
> > > > +
> > > > +       help
> > > > +         Enables tracepoints around mmap_lock (start aquiring, acquire
> > > > +         returned, and released), which are off by default + controlled at
> > > > +         runtime. These can be used for deeper debugging of contention
> > > > +         issues, via e.g. BPF.
> > > > +
> > > > +         This option has a small (small fraction of 1%) fixed overhead
> > > > +         even if tracepoints aren't actually in use at runtime, since it
> > > > +         requires un-inlining some functions.
> > > > +
> > > > +         If unsure, say "n".
> > > > +
> > > >  config ARCH_USES_HIGH_VMA_FLAGS
> > > >         bool
> > > >  config ARCH_HAS_PKEYS
> > > > diff --git a/mm/Makefile b/mm/Makefile
> > > > index d5649f1c12c0..eb6ed855a002 100644
> > > > --- a/mm/Makefile
> > > > +++ b/mm/Makefile
> > > > @@ -121,3 +121,4 @@ obj-$(CONFIG_MEMFD_CREATE) += memfd.o
> > > >  obj-$(CONFIG_MAPPING_DIRTY_HELPERS) += mapping_dirty_helpers.o
> > > >  obj-$(CONFIG_PTDUMP_CORE) += ptdump.o
> > > >  obj-$(CONFIG_PAGE_REPORTING) += page_reporting.o
> > > > +obj-$(CONFIG_MMAP_LOCK_STATS) += mmap_lock.o
> > > > diff --git a/mm/mmap_lock.c b/mm/mmap_lock.c
> > > > new file mode 100644
> > > > index 000000000000..1624f90164c0
> > > > --- /dev/null
> > > > +++ b/mm/mmap_lock.c
> > > > @@ -0,0 +1,224 @@
> > > > +// SPDX-License-Identifier: GPL-2.0
> > > > +#define CREATE_TRACE_POINTS
> > > > +#include <trace/events/mmap_lock.h>
> > > > +
> > > > +#include <linux/cgroup.h>
> > > > +#include <linux/memcontrol.h>
> > > > +#include <linux/mmap_lock.h>
> > > > +#include <linux/percpu.h>
> > > > +#include <linux/smp.h>
> > > > +#include <linux/trace_events.h>
> > > > +#include <linux/sched/clock.h>
> > > > +
> > > > +#ifdef CONFIG_MEMCG
> > > > +
> > > > +DEFINE_PER_CPU(char[MAX_FILTER_STR_VAL], trace_memcg_path);
> > > > +
> > > > +/*
> > > > + * Write the given mm_struct's memcg path to a percpu buffer, and return a
> > > > + * pointer to it. If the path cannot be determined, the buffer will contain the
> > > > + * empty string.
> > > > + *
> > > > + * Note: buffers are allocated per-cpu to avoid locking, so preemption must be
> > > > + * disabled by the caller before calling us, and re-enabled only after the
> > > > + * caller is done with the pointer.
> > > > + */
> > > > +static const char *get_mm_memcg_path(struct mm_struct *mm)
> > > > +{
> > > > +       struct mem_cgroup *memcg = get_mem_cgroup_from_mm(mm);
> > > > +
> > > > +       if (memcg != NULL && likely(memcg->css.cgroup != NULL)) {
> > > > +               char *buf = this_cpu_ptr(trace_memcg_path);
> > > > +
> > > > +               cgroup_path(memcg->css.cgroup, buf, MAX_FILTER_STR_VAL);
> > > > +               return buf;
> > > > +       }
> > > > +       return "";
> > > > +}
> > > > +
> > > > +#define TRACE_MMAP_LOCK_EVENT(type, mm, ...)                                   \
> > > > +       do {                                                                   \
> > > > +               if (trace_mmap_lock_##type##_enabled()) {                      \
> > > > +                       get_cpu();                                             \
> > > > +                       trace_mmap_lock_##type(mm, get_mm_memcg_path(mm),      \
> > > > +                                              ##__VA_ARGS__);                 \
> > > > +                       put_cpu();                                             \
> > > > +               }                                                              \
> > > > +       } while (0)
> > > > +
> > > > +#else /* !CONFIG_MEMCG */
> > > > +
> > > > +#define TRACE_MMAP_LOCK_EVENT(type, mm, ...)                                   \
> > > > +       trace_mmap_lock_##type(mm, "", ##__VA_ARGS__)
> > > > +
> > > > +#endif /* CONFIG_MEMCG */
> > > > +
> > > > +/*
> > > > + * Trace calls must be in a separate file, as otherwise there's a circuclar
> > > > + * dependency between linux/mmap_lock.h and trace/events/mmap_lock.h.
> > > > + */
> > > > +
> > > > +static void trace_start_locking(struct mm_struct *mm, bool write)
> > > > +{
> > > > +       TRACE_MMAP_LOCK_EVENT(start_locking, mm, 0, write, true);
> > > > +}
> > > > +
> > > > +static void trace_acquire_returned(struct mm_struct *mm, u64 start_time_ns,
> > > > +                                  bool write, bool success)
> > > > +{
> > > > +       TRACE_MMAP_LOCK_EVENT(acquire_returned, mm,
> > > > +                             sched_clock() - start_time_ns, write, success);
> > > > +}
> > > > +
> > > > +static void trace_released(struct mm_struct *mm, bool write)
> > > > +{
> > > > +       TRACE_MMAP_LOCK_EVENT(released, mm, 0, write, true);
> > > > +}
> > > > +
> > > > +static bool trylock_impl(struct mm_struct *mm,
> > > > +                        int (*trylock)(struct rw_semaphore *), bool write)
> > > > +{
> > > > +       bool ret;
> > > > +
> > > > +       trace_start_locking(mm, write);
> > > > +       ret = trylock(&mm->mmap_lock) != 0;
> > > > +       /* Avoid calling sched_clock() for trylocks; assume duration = 0. */
> > > > +       TRACE_MMAP_LOCK_EVENT(acquire_returned, mm, 0, write, ret);
> > > > +       return ret;
> > > > +}
> > > > +
> > > > +static inline void lock_impl(struct mm_struct *mm,
> > > > +                            void (*lock)(struct rw_semaphore *), bool write)
> > > > +{
> > > > +       u64 start_time_ns;
> > > > +
> > > > +       trace_start_locking(mm, write);
> > > > +       start_time_ns = sched_clock();
> > > > +       lock(&mm->mmap_lock);
> > > > +       trace_acquire_returned(mm, start_time_ns, write, true);
> > > > +}
> > > > +
> > > > +static inline int lock_return_impl(struct mm_struct *mm,
> > > > +                                  int (*lock)(struct rw_semaphore *),
> > > > +                                  bool write)
> > > > +{
> > > > +       u64 start_time_ns;
> > > > +       int ret;
> > > > +
> > > > +       trace_start_locking(mm, write);
> > > > +       start_time_ns = sched_clock();
> > > > +       ret = lock(&mm->mmap_lock);
> > > > +       trace_acquire_returned(mm, start_time_ns, write, ret == 0);
> > > > +       return ret;
> > > > +}
> > > > +
> > > > +static inline void unlock_impl(struct mm_struct *mm,
> > > > +                              void (*unlock)(struct rw_semaphore *),
> > > > +                              bool write)
> > > > +{
> > > > +       unlock(&mm->mmap_lock);
> > > > +       trace_released(mm, write);
> > > > +}
> > > > +
> > > > +void mmap_init_lock(struct mm_struct *mm)
> > > > +{
> > > > +       init_rwsem(&mm->mmap_lock);
> > > > +}
> > > > +
> > > > +void mmap_write_lock(struct mm_struct *mm)
> > > > +{
> > > > +       lock_impl(mm, down_write, true);
> > > > +}
> > > > +EXPORT_SYMBOL(mmap_write_lock);
> > > > +
> > > > +void mmap_write_lock_nested(struct mm_struct *mm, int subclass)
> > > > +{
> > > > +       u64 start_time_ns;
> > > > +
> > > > +       trace_start_locking(mm, true);
> > > > +       start_time_ns = sched_clock();
> > > > +       down_write_nested(&mm->mmap_lock, subclass);
> > > > +       trace_acquire_returned(mm, start_time_ns, true, true);
> > > > +}
> > > > +EXPORT_SYMBOL(mmap_write_lock_nested);
> > > > +
> > > > +int mmap_write_lock_killable(struct mm_struct *mm)
> > > > +{
> > > > +       return lock_return_impl(mm, down_write_killable, true);
> > > > +}
> > > > +EXPORT_SYMBOL(mmap_write_lock_killable);
> > > > +
> > > > +bool mmap_write_trylock(struct mm_struct *mm)
> > > > +{
> > > > +       return trylock_impl(mm, down_write_trylock, true);
> > > > +}
> > > > +EXPORT_SYMBOL(mmap_write_trylock);
> > > > +
> > > > +void mmap_write_unlock(struct mm_struct *mm)
> > > > +{
> > > > +       unlock_impl(mm, up_write, true);
> > > > +}
> > > > +EXPORT_SYMBOL(mmap_write_unlock);
> > > > +
> > > > +void mmap_write_downgrade(struct mm_struct *mm)
> > > > +{
> > > > +       downgrade_write(&mm->mmap_lock);
> > > > +       TRACE_MMAP_LOCK_EVENT(acquire_returned, mm, 0, false, true);
> > > > +}
> > > > +EXPORT_SYMBOL(mmap_write_downgrade);
> > > > +
> > > > +void mmap_read_lock(struct mm_struct *mm)
> > > > +{
> > > > +       lock_impl(mm, down_read, false);
> > > > +}
> > > > +EXPORT_SYMBOL(mmap_read_lock);
> > > > +
> > > > +int mmap_read_lock_killable(struct mm_struct *mm)
> > > > +{
> > > > +       return lock_return_impl(mm, down_read_killable, false);
> > > > +}
> > > > +EXPORT_SYMBOL(mmap_read_lock_killable);
> > > > +
> > > > +bool mmap_read_trylock(struct mm_struct *mm)
> > > > +{
> > > > +       return trylock_impl(mm, down_read_trylock, false);
> > > > +}
> > > > +EXPORT_SYMBOL(mmap_read_trylock);
> > > > +
> > > > +void mmap_read_unlock(struct mm_struct *mm)
> > > > +{
> > > > +       unlock_impl(mm, up_read, false);
> > > > +}
> > > > +EXPORT_SYMBOL(mmap_read_unlock);
> > > > +
> > > > +bool mmap_read_trylock_non_owner(struct mm_struct *mm)
> > > > +{
> > > > +       if (mmap_read_trylock(mm)) {
> > > > +               rwsem_release(&mm->mmap_lock.dep_map, _RET_IP_);
> > > > +               trace_released(mm, false);
> > > > +               return true;
> > > > +       }
> > > > +       return false;
> > > > +}
> > > > +EXPORT_SYMBOL(mmap_read_trylock_non_owner);
> > > > +
> > > > +void mmap_read_unlock_non_owner(struct mm_struct *mm)
> > > > +{
> > > > +       up_read_non_owner(&mm->mmap_lock);
> > > > +       trace_released(mm, false);
> > > > +}
> > > > +EXPORT_SYMBOL(mmap_read_unlock_non_owner);
> > > > +
> > > > +void mmap_assert_locked(struct mm_struct *mm)
> > > > +{
> > > > +       lockdep_assert_held(&mm->mmap_lock);
> > > > +       VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_lock), mm);
> > > > +}
> > > > +EXPORT_SYMBOL(mmap_assert_locked);
> > > > +
> > > > +void mmap_assert_write_locked(struct mm_struct *mm)
> > > > +{
> > > > +       lockdep_assert_held_write(&mm->mmap_lock);
> > > > +       VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_lock), mm);
> > > > +}
> > > > +EXPORT_SYMBOL(mmap_assert_write_locked);
> > > > --
> > > > 2.28.0.618.gf4bc123cb7-goog
> > > >
> > >
> > >
> > > --
> > > Thanks
> > > Yafang
>
>
>
> --
> Thanks
> Yafang

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] mmap_lock: add tracepoints around lock acquisition
  2020-09-22  4:09     ` Yafang Shao
  2020-09-22 16:39       ` Axel Rasmussen
@ 2020-09-22 16:51       ` Steven Rostedt
  2020-09-23 10:04         ` Yafang Shao
  1 sibling, 1 reply; 14+ messages in thread
From: Steven Rostedt @ 2020-09-22 16:51 UTC (permalink / raw)
  To: Yafang Shao
  Cc: Axel Rasmussen, Ingo Molnar, Andrew Morton, Vlastimil Babka,
	Michel Lespinasse, Daniel Jordan, Davidlohr Bueso, LKML,
	Linux MM

On Tue, 22 Sep 2020 12:09:19 +0800
Yafang Shao <laoar.shao@gmail.com> wrote:

> > > Are there any methods to avoid un-inlining these wrappers ?
> > >
> > > For example,
> > > // include/linux/mmap_lock.h
> > >
> > > void mmap_lock_start_trace_wrapper();
> > > void mmap_lock_acquire_trace_wrapper();
> > >
> > > static inline void mmap_write_lock(struct mm_struct *mm)
> > > {
> > >     mmap_lock_start_trace_wrapper();
> > >     down_write(&mm->mmap_lock);
> > >     mmap_lock_acquire_trace_wrapper();
> > > }
> > >
> > > // mm/mmap_lock.c
> > > void mmap_lock_start_trace_wrapper()
> > > {
> > >     trace_mmap_lock_start();
> > > }
> > >
> > > void mmap_lock_start_trace_wrapper()
> > > {
> > >     trace_mmap_lock_acquired();
> > > }  
> >
> > We can do something like that, but I don't think it would end up being better.
> >
> > At the end of the day, because the trace stuff cannot be in the
> > header, we have to add an extra function call one way or the other.
> > This would just move the call one step further down the call stack.
> > So, I don't think it would affect performance in the
> > CONFIG_MMAP_LOCK_STATS + tracepoints not enabled at runtime case.
> >  
> 
> Right, it seems we have to add an extra function call.
> 
> > Also the wrappers aren't quite so simple as this, they need some
> > parameters to work. (the struct mm_struct, whether it was a read or a
> > write lock, and whether or not the lock operation succeeded), so it
> > would mean adding more inlined code, which I think adds up to be a
> > nontrivial amount since these wrappers are called so often in the
> > kernel.
> >
> > If you feel strongly, let me know and I can send a version as you
> > describe and we can compare the two.
> >  
> 
> These tracepoints will be less useful if we have to turn on the config
> to enable it.
> I don't mind implementing it that way if we can't optimize it.
> 
> Maybe Steven can give some suggestions, Steven ?
> 


What you can do, and what we have done is the following:

(see include/linux/page_ref.h)


#ifdef CONFIG_TRACING
extern struct tracepoint __tracepoint_mmap_lock_start_locking;
extern struct tracepoint __tracepoint_mmap_lock_acquire_returned;

#define mmap_lock_tracepoint_active(t) static_key_false(&(__tracepoint_mmap_lock_##t).key)

#else
#define mmap_lock_tracepoint_active(t) false
#endif

static inline void mmap_write_lock(struct mm_struct *mm)
{
	if (mmap_lock_tracepoint_active(start_locking))
		mmap_lock_start_trace_wrapper();
	down_write(&mm->mmap_lock);
	if (mmap_lock_tracepoint_active(acquire_returned))
		mmap_lock_acquire_trace_wrapper();
}


-- Steve

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] mmap_lock: add tracepoints around lock acquisition
  2020-09-22 16:51       ` Steven Rostedt
@ 2020-09-23 10:04         ` Yafang Shao
  2020-09-23 16:09           ` Steven Rostedt
  0 siblings, 1 reply; 14+ messages in thread
From: Yafang Shao @ 2020-09-23 10:04 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: Axel Rasmussen, Ingo Molnar, Andrew Morton, Vlastimil Babka,
	Michel Lespinasse, Daniel Jordan, Davidlohr Bueso, LKML,
	Linux MM

On Wed, Sep 23, 2020 at 12:51 AM Steven Rostedt <rostedt@goodmis.org> wrote:
>
> On Tue, 22 Sep 2020 12:09:19 +0800
> Yafang Shao <laoar.shao@gmail.com> wrote:
>
> > > > Are there any methods to avoid un-inlining these wrappers ?
> > > >
> > > > For example,
> > > > // include/linux/mmap_lock.h
> > > >
> > > > void mmap_lock_start_trace_wrapper();
> > > > void mmap_lock_acquire_trace_wrapper();
> > > >
> > > > static inline void mmap_write_lock(struct mm_struct *mm)
> > > > {
> > > >     mmap_lock_start_trace_wrapper();
> > > >     down_write(&mm->mmap_lock);
> > > >     mmap_lock_acquire_trace_wrapper();
> > > > }
> > > >
> > > > // mm/mmap_lock.c
> > > > void mmap_lock_start_trace_wrapper()
> > > > {
> > > >     trace_mmap_lock_start();
> > > > }
> > > >
> > > > void mmap_lock_start_trace_wrapper()
> > > > {
> > > >     trace_mmap_lock_acquired();
> > > > }
> > >
> > > We can do something like that, but I don't think it would end up being better.
> > >
> > > At the end of the day, because the trace stuff cannot be in the
> > > header, we have to add an extra function call one way or the other.
> > > This would just move the call one step further down the call stack.
> > > So, I don't think it would affect performance in the
> > > CONFIG_MMAP_LOCK_STATS + tracepoints not enabled at runtime case.
> > >
> >
> > Right, it seems we have to add an extra function call.
> >
> > > Also the wrappers aren't quite so simple as this, they need some
> > > parameters to work. (the struct mm_struct, whether it was a read or a
> > > write lock, and whether or not the lock operation succeeded), so it
> > > would mean adding more inlined code, which I think adds up to be a
> > > nontrivial amount since these wrappers are called so often in the
> > > kernel.
> > >
> > > If you feel strongly, let me know and I can send a version as you
> > > describe and we can compare the two.
> > >
> >
> > These tracepoints will be less useful if we have to turn on the config
> > to enable it.
> > I don't mind implementing it that way if we can't optimize it.
> >
> > Maybe Steven can give some suggestions, Steven ?
> >
>
>
> What you can do, and what we have done is the following:
>
> (see include/linux/page_ref.h)
>
>
> #ifdef CONFIG_TRACING
> extern struct tracepoint __tracepoint_mmap_lock_start_locking;
> extern struct tracepoint __tracepoint_mmap_lock_acquire_returned;
>
> #define mmap_lock_tracepoint_active(t) static_key_false(&(__tracepoint_mmap_lock_##t).key)
>
> #else
> #define mmap_lock_tracepoint_active(t) false
> #endif
>
> static inline void mmap_write_lock(struct mm_struct *mm)
> {
>         if (mmap_lock_tracepoint_active(start_locking))
>                 mmap_lock_start_trace_wrapper();
>         down_write(&mm->mmap_lock);
>         if (mmap_lock_tracepoint_active(acquire_returned))
>                 mmap_lock_acquire_trace_wrapper();
> }
>
>
> -- Steve


Great!

Thanks Steve.

-- 
Thanks
Yafang

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] mmap_lock: add tracepoints around lock acquisition
  2020-09-23 10:04         ` Yafang Shao
@ 2020-09-23 16:09           ` Steven Rostedt
  2020-09-23 16:40             ` Axel Rasmussen
  2020-09-24  4:28             ` Yafang Shao
  0 siblings, 2 replies; 14+ messages in thread
From: Steven Rostedt @ 2020-09-23 16:09 UTC (permalink / raw)
  To: Yafang Shao
  Cc: Axel Rasmussen, Ingo Molnar, Andrew Morton, Vlastimil Babka,
	Michel Lespinasse, Daniel Jordan, Davidlohr Bueso, LKML,
	Linux MM

On Wed, 23 Sep 2020 18:04:17 +0800
Yafang Shao <laoar.shao@gmail.com> wrote:

> > What you can do, and what we have done is the following:
> >
> > (see include/linux/page_ref.h)
> >
> >
> > #ifdef CONFIG_TRACING
> > extern struct tracepoint __tracepoint_mmap_lock_start_locking;
> > extern struct tracepoint __tracepoint_mmap_lock_acquire_returned;
> >
> > #define mmap_lock_tracepoint_active(t) static_key_false(&(__tracepoint_mmap_lock_##t).key)
> >
> > #else
> > #define mmap_lock_tracepoint_active(t) false
> > #endif
> >
> > static inline void mmap_write_lock(struct mm_struct *mm)
> > {
> >         if (mmap_lock_tracepoint_active(start_locking))
> >                 mmap_lock_start_trace_wrapper();
> >         down_write(&mm->mmap_lock);
> >         if (mmap_lock_tracepoint_active(acquire_returned))
> >                 mmap_lock_acquire_trace_wrapper();
> > }
> >
> >
> > -- Steve  
> 
> 
> Great!
> 
> Thanks Steve.

If the above becomes useful, I may just add helper functions into a
header file that you can include. Perhaps call it: tracepoint_active()
and you need to pass in as an argument the name of the tracepoint.

-- Steve

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] mmap_lock: add tracepoints around lock acquisition
  2020-09-23 16:09           ` Steven Rostedt
@ 2020-09-23 16:40             ` Axel Rasmussen
  2020-09-24  4:28             ` Yafang Shao
  1 sibling, 0 replies; 14+ messages in thread
From: Axel Rasmussen @ 2020-09-23 16:40 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: Yafang Shao, Ingo Molnar, Andrew Morton, Vlastimil Babka,
	Michel Lespinasse, Daniel Jordan, Davidlohr Bueso, LKML,
	Linux MM

On Wed, Sep 23, 2020 at 9:09 AM Steven Rostedt <rostedt@goodmis.org> wrote:
>
> On Wed, 23 Sep 2020 18:04:17 +0800
> Yafang Shao <laoar.shao@gmail.com> wrote:
>
> > > What you can do, and what we have done is the following:
> > >
> > > (see include/linux/page_ref.h)
> > >
> > >
> > > #ifdef CONFIG_TRACING
> > > extern struct tracepoint __tracepoint_mmap_lock_start_locking;
> > > extern struct tracepoint __tracepoint_mmap_lock_acquire_returned;
> > >
> > > #define mmap_lock_tracepoint_active(t) static_key_false(&(__tracepoint_mmap_lock_##t).key)
> > >
> > > #else
> > > #define mmap_lock_tracepoint_active(t) false
> > > #endif
> > >
> > > static inline void mmap_write_lock(struct mm_struct *mm)
> > > {
> > >         if (mmap_lock_tracepoint_active(start_locking))
> > >                 mmap_lock_start_trace_wrapper();
> > >         down_write(&mm->mmap_lock);
> > >         if (mmap_lock_tracepoint_active(acquire_returned))
> > >                 mmap_lock_acquire_trace_wrapper();
> > > }
> > >
> > >
> > > -- Steve
> >
> >
> > Great!
> >
> > Thanks Steve.
>
> If the above becomes useful, I may just add helper functions into a
> header file that you can include. Perhaps call it: tracepoint_active()
> and you need to pass in as an argument the name of the tracepoint.

Thanks for this suggestion Steven, it's working quite well.

I also have a very short patch to the tracing which allows plumbing
the string values through to "just work".

I plan to send out a v2 before the end of the week.

>
> -- Steve

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] mmap_lock: add tracepoints around lock acquisition
  2020-09-18 20:41     ` Steven Rostedt
@ 2020-09-23 22:07       ` Tom Zanussi
  0 siblings, 0 replies; 14+ messages in thread
From: Tom Zanussi @ 2020-09-23 22:07 UTC (permalink / raw)
  To: Steven Rostedt, Axel Rasmussen
  Cc: Ingo Molnar, Andrew Morton, Vlastimil Babka, Michel Lespinasse,
	Daniel Jordan, Davidlohr Bueso, Yafang Shao, LKML, Linux MM

Hi Steve, Axel,

On Fri, 2020-09-18 at 16:41 -0400, Steven Rostedt wrote:
> On Fri, 18 Sep 2020 13:26:37 -0700
> Axel Rasmussen <axelrasmussen@google.com> wrote:
> 
> > On Thu, Sep 17, 2020 at 12:43 PM Steven Rostedt <
> > rostedt@goodmis.org> wrote:
> > > 
> > > On Thu, 17 Sep 2020 11:13:47 -0700
> > > Axel Rasmussen <axelrasmussen@google.com> wrote:
> > >  
> > > > +/*
> > > > + * Trace calls must be in a separate file, as otherwise
> > > > there's a circuclar
> > > > + * dependency between linux/mmap_lock.h and
> > > > trace/events/mmap_lock.h.
> > > > + */
> > > > +
> > > > +static void trace_start_locking(struct mm_struct *mm, bool
> > > > write)  
> > > 
> > > Please don't use "trace_" for functions, as that should be
> > > reserved for the
> > > actual tracepoint functions. Please use "do_trace_" or whatever
> > > so there's
> > > no confusion about this being a tracepoint, even if it's just a
> > > function
> > > that calls the tracepoint.  
> > 
> > Done; I'll send a v2 with this change.
> > 
> > >  
> > > > +{
> > > > +     TRACE_MMAP_LOCK_EVENT(start_locking, mm, 0, write, true);
> > > > +}
> > > > +
> > > > +static void trace_acquire_returned(struct mm_struct *mm, u64
> > > > start_time_ns,
> > > > +                                bool write, bool success)
> > > > +{
> > > > +     TRACE_MMAP_LOCK_EVENT(acquire_returned, mm,
> > > > +                           sched_clock() - start_time_ns,
> > > > write, success);
> > > > +}
> > > > +
> > > > +static void trace_released(struct mm_struct *mm, bool write)
> > > > +{
> > > > +     TRACE_MMAP_LOCK_EVENT(released, mm, 0, write, true);
> > > > +}
> > > > +  
> > > 
> > >  
> > > > +static inline void lock_impl(struct mm_struct *mm,
> > > > +                          void (*lock)(struct rw_semaphore *),
> > > > bool write)
> > > > +{
> > > > +     u64 start_time_ns;
> > > > +
> > > > +     trace_start_locking(mm, write);
> > > > +     start_time_ns = sched_clock();
> > > > +     lock(&mm->mmap_lock);
> > > > +     trace_acquire_returned(mm, start_time_ns, write, true);
> > > > +}
> > > > +  
> > > 
> > > Why record the start time and pass it in for return, when this
> > > can be done
> > > by simply recording the start and return and then using the
> > > timestamps of
> > > the trace events to calculate the duration, offline or as
> > > synthetic events:  
> > 
> > First, thanks for the detailed feedback! As a newbie this is very
> > helpful. :)
> > 
> > I agree in principle, and I almost have a working version as you
> > suggest, but I can't see a way to get string fields working.
> > 
> > I believe in trace event headers the typical way to define a string
> > field  is as a "const char *", with the __string, __assign_str, and
> > __get_str helpers. But, from reading trace_events_synth.c, this
> > isn't
> > really supported, in that it only supports "char []". But, the hist
> > trigger code just does a strcmp() of the type string, it doesn't do
> > any type conversion, so it considers these types incompatible:
> > 
> > After this:
> > # echo 'mmap_lock_latency u64 time; char memcg_path[256]' >
> > /sys/kernel/tracing/synthetic_events
> > 
> > Trying to setup the hist trigger gives (the ^ points to the
> > beginning
> > of keys=>m<emcg_path ... not sure the formatting will be preserved
> > properly in e-mail):
> > # cat /sys/kernel/tracing/error_log
> > [   15.823725] hist:mmap_lock:mmap_lock_acquire_returned: error:
> > Param
> > type doesn't match synthetic event field type
> >   Command: hist:keys=memcg_path:latency=common_timestamp.usecs-
> > $ts0:onmatch(mmap_lock.mmap_lock_start_locking).mmap_lock_latency($
> > latency,memcg_path)
> >                      ^
> > 
> > I tried grepping "char [^\[]+\[" in include/trace/events/, and it
> > seems nobody is defining fixed-length string fields like that, so I
> > think that's the wrong solution. I checked the docs about defining
> > variables (
> > https://www.kernel.org/doc/html/v5.2/trace/histogram.html)
> > and it doesn't support anything complex like a cast, just - and +.
> > 
> > Any advice?
> 
> Tom,
> 
> Do you think we could make histograms support the above somehow?
> 

Sorry for the delayed reply - was out on vacation.

Yeah, currently the synthetic events only support constant-length
strings, which should match with __array()s in the tracepoints, but I
think they should also be made to support variable-length arrays that
would match __string() etc.

I'm thinking an array field without length specifier could be used to
in the synthetic event specification for that e.g.:

  # echo 'mmap_lock_latency u64 time; char memcg_path[]' > /sys/kernel/tracing/synthetic_events

I'll work on adding that over the next couple days or so...

Tom

> -- Steve
> 
> > 
> > 
> > 
> > > 
> > > 
> > >  # cd /sys/kernel/tracing/
> > >  # echo 'duration u64 time' > synthetic_events
> > >  # echo 'hist:keys=common_pid:ts0=common_timestamp.usecs" >
> > > events/mmap_lock/mmap_lock_start_locking/trigger
> > >  # echo 'hist:keys=common_pid:dur=common_timestamp.usecs-
> > > $ts0:onmatch(mmap_lock.mmap_lock_start_locking).trace(duration,$d
> > > ur)" > events/mmap_lock/mmap_lock_acquire_returned/trigger
> > >  # echo 1 > events/synthetic/duration/enable
> > >  # cat trace
> > > # tracer: nop
> > > #
> > > # entries-in-buffer/entries-written: 148/148   #P:8
> > > #
> > > #                              _-----=> irqs-off
> > > #                             / _----=> need-resched
> > > #                            | / _---=> hardirq/softirq
> > > #                            || / _--=> preempt-depth
> > > #                            ||| /     delay
> > > #           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION
> > > #              | |       |   ||||       |         |
> > >             bash-1613  [007] ...3  3186.431687: duration: time=3
> > >             bash-1613  [007] ...3  3186.431722: duration: time=2
> > >             bash-1613  [007] ...3  3186.431772: duration: time=2
> > >             bash-1613  [001] ...3  3188.372001: duration: time=6
> > >             bash-1613  [001] ...3  3188.372324: duration: time=6
> > >             bash-1613  [001] ...3  3188.372332: duration: time=4
> > >             bash-1613  [001] ...3  3188.373557: duration: time=5
> > >             bash-1613  [001] ...3  3188.373595: duration: time=3
> > >              cat-1868  [002] ...3  3188.373608: duration: time=8
> > >             bash-1613  [001] ...3  3188.373613: duration: time=4
> > >             bash-1613  [001] ...3  3188.373635: duration: time=3
> > >              cat-1868  [002] ...3  3188.373646: duration: time=4
> > >             bash-1613  [001] ...3  3188.373652: duration: time=3
> > >             bash-1613  [001] ...3  3188.373669: duration: time=3
> > > 
> > >  # echo 'hist:keys=time' > events/synthetic/duration/trigger
> > >  # cat events/synthetic/duration/hist
> > > # event histogram
> > > #
> > > # trigger info:
> > > hist:keys=time:vals=hitcount:sort=hitcount:size=2048 [active]
> > > #
> > > 
> > > { time:        114 } hitcount:          1
> > > { time:         15 } hitcount:          1
> > > { time:         11 } hitcount:          1
> > > { time:         21 } hitcount:          1
> > > { time:         10 } hitcount:          1
> > > { time:         46 } hitcount:          1
> > > { time:         29 } hitcount:          1
> > > { time:         13 } hitcount:          2
> > > { time:         16 } hitcount:          3
> > > { time:          9 } hitcount:          3
> > > { time:          8 } hitcount:          3
> > > { time:          7 } hitcount:          8
> > > { time:          6 } hitcount:         10
> > > { time:          5 } hitcount:         28
> > > { time:          4 } hitcount:        121
> > > { time:          1 } hitcount:        523
> > > { time:          3 } hitcount:        581
> > > { time:          2 } hitcount:        882
> > > 
> > > Totals:
> > >     Hits: 2171
> > >     Entries: 18
> > >     Dropped: 0
> > > 
> > > And with this I could do a bunch of things like stack trace on
> > > max hits and
> > > other features that the tracing histograms give us.
> > > 
> > > -- Steve  
> 
> 


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] mmap_lock: add tracepoints around lock acquisition
  2020-09-23 16:09           ` Steven Rostedt
  2020-09-23 16:40             ` Axel Rasmussen
@ 2020-09-24  4:28             ` Yafang Shao
  1 sibling, 0 replies; 14+ messages in thread
From: Yafang Shao @ 2020-09-24  4:28 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: Axel Rasmussen, Ingo Molnar, Andrew Morton, Vlastimil Babka,
	Michel Lespinasse, Daniel Jordan, Davidlohr Bueso, LKML,
	Linux MM

On Thu, Sep 24, 2020 at 12:09 AM Steven Rostedt <rostedt@goodmis.org> wrote:
>
> On Wed, 23 Sep 2020 18:04:17 +0800
> Yafang Shao <laoar.shao@gmail.com> wrote:
>
> > > What you can do, and what we have done is the following:
> > >
> > > (see include/linux/page_ref.h)
> > >
> > >
> > > #ifdef CONFIG_TRACING
> > > extern struct tracepoint __tracepoint_mmap_lock_start_locking;
> > > extern struct tracepoint __tracepoint_mmap_lock_acquire_returned;
> > >
> > > #define mmap_lock_tracepoint_active(t) static_key_false(&(__tracepoint_mmap_lock_##t).key)
> > >
> > > #else
> > > #define mmap_lock_tracepoint_active(t) false
> > > #endif
> > >
> > > static inline void mmap_write_lock(struct mm_struct *mm)
> > > {
> > >         if (mmap_lock_tracepoint_active(start_locking))
> > >                 mmap_lock_start_trace_wrapper();
> > >         down_write(&mm->mmap_lock);
> > >         if (mmap_lock_tracepoint_active(acquire_returned))
> > >                 mmap_lock_acquire_trace_wrapper();
> > > }
> > >
> > >
> > > -- Steve
> >
> >
> > Great!
> >
> > Thanks Steve.
>
> If the above becomes useful, I may just add helper functions into a
> header file that you can include. Perhaps call it: tracepoint_active()
> and you need to pass in as an argument the name of the tracepoint.
>

Yes, please. The new helper would be useful.

-- 
Thanks
Yafang

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2020-09-24  4:29 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-09-17 18:13 [PATCH] mmap_lock: add tracepoints around lock acquisition Axel Rasmussen
2020-09-17 19:42 ` Steven Rostedt
2020-09-18 20:26   ` Axel Rasmussen
2020-09-18 20:41     ` Steven Rostedt
2020-09-23 22:07       ` Tom Zanussi
2020-09-21  4:57 ` Yafang Shao
2020-09-21 16:53   ` Axel Rasmussen
2020-09-22  4:09     ` Yafang Shao
2020-09-22 16:39       ` Axel Rasmussen
2020-09-22 16:51       ` Steven Rostedt
2020-09-23 10:04         ` Yafang Shao
2020-09-23 16:09           ` Steven Rostedt
2020-09-23 16:40             ` Axel Rasmussen
2020-09-24  4:28             ` Yafang Shao

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).