[PATCH] tracing/perf: Move rcu_irq_enter/exit_irqson() to perf trace point hook

* [PATCH] tracing/perf: Move rcu_irq_enter/exit_irqson() to perf trace point hook
@ 2020-02-10 22:06 Steven Rostedt
  2020-02-11  0:30 ` Mathieu Desnoyers
                   ` (2 more replies)
  0 siblings, 3 replies; 23+ messages in thread
From: Steven Rostedt @ 2020-02-10 22:06 UTC (permalink / raw)
  To: LKML
  Cc: Peter Zijlstra, Ingo Molnar, Joel Fernandes (Google),
	Greg Kroah-Hartman, Gustavo A. R. Silva, Thomas Gleixner,
	Paul E. McKenney, Josh Triplett, Mathieu Desnoyers,
	Lai Jiangshan


From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>

Commit e6753f23d961d ("tracepoint: Make rcuidle tracepoint callers use
SRCU") removed the calls to rcu_irq_enter/exit_irqson() and replaced it with
srcu callbacks as that much faster for the rcuidle cases. But this caused an
issue with perf, because perf only uses rcu to synchronize trace points.

The issue was that if perf traced one of the "rcuidle" paths, that path no
longer enabled RCU if it was not watching, and this caused lockdep to
complain when the perf code used rcu_read_lock() and RCU was not "watching".

Commit 865e63b04e9b2 ("tracing: Add back in rcu_irq_enter/exit_irqson() for
rcuidle tracepoints") added back the rcu_irq_enter/exit_irqson() code, but
this made the srcu changes no longer applicable.

As perf is the only callback that needs the heavier weight
"rcu_irq_enter/exit_irqson()" calls, move it to the perf specific code and
not bog down those that do not require it.

Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/tracepoint.h |  8 ++------
 include/trace/perf.h       | 17 +++++++++++++++--
 kernel/rcu/tree.c          |  2 ++
 3 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 1fb11daa5c53..a83fd076a312 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -179,10 +179,8 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
 		 * For rcuidle callers, use srcu since sched-rcu	\
 		 * doesn't work from the idle path.			\
 		 */							\
-		if (rcuidle) {						\
+		if (rcuidle)						\
 			__idx = srcu_read_lock_notrace(&tracepoint_srcu);\
-			rcu_irq_enter_irqson();				\
-		}							\
 									\
 		it_func_ptr = rcu_dereference_raw((tp)->funcs);		\
 									\
@@ -194,10 +192,8 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
 			} while ((++it_func_ptr)->func);		\
 		}							\
 									\
-		if (rcuidle) {						\
-			rcu_irq_exit_irqson();				\
+		if (rcuidle)						\
 			srcu_read_unlock_notrace(&tracepoint_srcu, __idx);\
-		}							\
 									\
 		preempt_enable_notrace();				\
 	} while (0)
diff --git a/include/trace/perf.h b/include/trace/perf.h
index dbc6c74defc3..1c94ce0cd4e2 100644
--- a/include/trace/perf.h
+++ b/include/trace/perf.h
@@ -39,17 +39,27 @@ perf_trace_##call(void *__data, proto)					\
 	u64 __count = 1;						\
 	struct task_struct *__task = NULL;				\
 	struct hlist_head *head;					\
+	bool rcu_watching;						\
 	int __entry_size;						\
 	int __data_size;						\
 	int rctx;							\
 									\
+	rcu_watching = rcu_is_watching();				\
+									\
 	__data_size = trace_event_get_offsets_##call(&__data_offsets, args); \
 									\
+	if (!rcu_watching) {						\
+		/* Can not use RCU if rcu is not watching and in NMI */	\
+		if (in_nmi())						\
+			return;						\
+		rcu_irq_enter_irqson();					\
+	}								\
+									\
 	head = this_cpu_ptr(event_call->perf_events);			\
 	if (!bpf_prog_array_valid(event_call) &&			\
 	    __builtin_constant_p(!__task) && !__task &&			\
 	    hlist_empty(head))						\
-		return;							\
+		goto out;						\
 									\
 	__entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32),\
 			     sizeof(u64));				\
@@ -57,7 +67,7 @@ perf_trace_##call(void *__data, proto)					\
 									\
 	entry = perf_trace_buf_alloc(__entry_size, &__regs, &rctx);	\
 	if (!entry)							\
-		return;							\
+		goto out;						\
 									\
 	perf_fetch_caller_regs(__regs);					\
 									\
@@ -68,6 +78,9 @@ perf_trace_##call(void *__data, proto)					\
 	perf_trace_run_bpf_submit(entry, __entry_size, rctx,		\
 				  event_call, __count, __regs,		\
 				  head, __task);			\
+out:									\
+	if (!rcu_watching)						\
+		rcu_irq_exit_irqson();					\
 }
 
 /*
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 1694a6b57ad8..3e6f07b62515 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -719,6 +719,7 @@ void rcu_irq_exit_irqson(void)
 	rcu_irq_exit();
 	local_irq_restore(flags);
 }
+EXPORT_SYMBOL_GPL(rcu_irq_exit_irqson);
 
 /*
  * Exit an RCU extended quiescent state, which can be either the
@@ -890,6 +891,7 @@ void rcu_irq_enter_irqson(void)
 	rcu_irq_enter();
 	local_irq_restore(flags);
 }
+EXPORT_SYMBOL_GPL(rcu_irq_enter_irqson);
 
 /*
  * If any sort of urgency was applied to the current CPU (for example,
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 23+ messages in thread