linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/8] tracing vs rcu vs nmi
@ 2020-02-12  9:32 Peter Zijlstra
  2020-02-12  9:32 ` [PATCH 1/8] rcu: Rename rcu_irq_{enter,exit}_irqson() Peter Zijlstra
                   ` (11 more replies)
  0 siblings, 12 replies; 29+ messages in thread
From: Peter Zijlstra @ 2020-02-12  9:32 UTC (permalink / raw)
  To: linux-kernel, linux-arch, rostedt
  Cc: peterz, mingo, joel, gregkh, gustavo, tglx, paulmck, josh,
	mathieu.desnoyers, jiangshanlai

Hi all,

These here patches are the result of Mathieu and Steve trying to get commit
865e63b04e9b2 ("tracing: Add back in rcu_irq_enter/exit_irqson() for rcuidle
tracepoints") reverted again.

One of the things discovered is that tracing MUST NOT happen before nmi_enter()
or after nmi_exit(). I've only fixed x86, but quickly gone through other
architectures and there is definitely more stuff to be fixed (simply grep for
nmi_enter in your arch).




^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH 1/8] rcu: Rename rcu_irq_{enter,exit}_irqson()
  2020-02-12  9:32 [PATCH 0/8] tracing vs rcu vs nmi Peter Zijlstra
@ 2020-02-12  9:32 ` Peter Zijlstra
  2020-02-12  9:32 ` [PATCH 2/8] rcu: Mark rcu_dynticks_curr_cpu_in_eqs() inline Peter Zijlstra
                   ` (10 subsequent siblings)
  11 siblings, 0 replies; 29+ messages in thread
From: Peter Zijlstra @ 2020-02-12  9:32 UTC (permalink / raw)
  To: linux-kernel, linux-arch, rostedt
  Cc: peterz, mingo, joel, gregkh, gustavo, tglx, paulmck, josh,
	mathieu.desnoyers, jiangshanlai

The functions do in fact use local_irq_{save,restore}() and can
therefore be used when IRQs are in fact disabled. Worse, they are
already used in places where IRQs are disabled, leading to great
confusion when reading the code.

Rename them to fix this confusion.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 include/linux/rcupdate.h   |    4 ++--
 include/linux/rcutiny.h    |    4 ++--
 include/linux/rcutree.h    |    4 ++--
 include/linux/tracepoint.h |    4 ++--
 kernel/cpu_pm.c            |    4 ++--
 kernel/rcu/tree.c          |    8 ++++----
 kernel/trace/trace.c       |    4 ++--
 7 files changed, 16 insertions(+), 16 deletions(-)

--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -120,9 +120,9 @@ static inline void rcu_init_nohz(void) {
  */
 #define RCU_NONIDLE(a) \
 	do { \
-		rcu_irq_enter_irqson(); \
+		rcu_irq_enter_irqsave(); \
 		do { a; } while (0); \
-		rcu_irq_exit_irqson(); \
+		rcu_irq_exit_irqsave(); \
 	} while (0)
 
 /*
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -68,8 +68,8 @@ static inline int rcu_jiffies_till_stall
 static inline void rcu_idle_enter(void) { }
 static inline void rcu_idle_exit(void) { }
 static inline void rcu_irq_enter(void) { }
-static inline void rcu_irq_exit_irqson(void) { }
-static inline void rcu_irq_enter_irqson(void) { }
+static inline void rcu_irq_exit_irqsave(void) { }
+static inline void rcu_irq_enter_irqsave(void) { }
 static inline void rcu_irq_exit(void) { }
 static inline void exit_rcu(void) { }
 static inline bool rcu_preempt_need_deferred_qs(struct task_struct *t)
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -46,8 +46,8 @@ void rcu_idle_enter(void);
 void rcu_idle_exit(void);
 void rcu_irq_enter(void);
 void rcu_irq_exit(void);
-void rcu_irq_enter_irqson(void);
-void rcu_irq_exit_irqson(void);
+void rcu_irq_enter_irqsave(void);
+void rcu_irq_exit_irqsave(void);
 
 void exit_rcu(void);
 
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -181,7 +181,7 @@ static inline struct tracepoint *tracepo
 		 */							\
 		if (rcuidle) {						\
 			__idx = srcu_read_lock_notrace(&tracepoint_srcu);\
-			rcu_irq_enter_irqson();				\
+			rcu_irq_enter_irqsave();			\
 		}							\
 									\
 		it_func_ptr = rcu_dereference_raw((tp)->funcs);		\
@@ -195,7 +195,7 @@ static inline struct tracepoint *tracepo
 		}							\
 									\
 		if (rcuidle) {						\
-			rcu_irq_exit_irqson();				\
+			rcu_irq_exit_irqsave();				\
 			srcu_read_unlock_notrace(&tracepoint_srcu, __idx);\
 		}							\
 									\
--- a/kernel/cpu_pm.c
+++ b/kernel/cpu_pm.c
@@ -24,10 +24,10 @@ static int cpu_pm_notify(enum cpu_pm_eve
 	 * could be disfunctional in cpu idle. Copy RCU_NONIDLE code to let
 	 * RCU know this.
 	 */
-	rcu_irq_enter_irqson();
+	rcu_irq_enter_irqsave();
 	ret = __atomic_notifier_call_chain(&cpu_pm_notifier_chain, event, NULL,
 		nr_to_call, nr_calls);
-	rcu_irq_exit_irqson();
+	rcu_irq_exit_irqsave();
 
 	return notifier_to_errno(ret);
 }
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -699,10 +699,10 @@ void rcu_irq_exit(void)
 /*
  * Wrapper for rcu_irq_exit() where interrupts are enabled.
  *
- * If you add or remove a call to rcu_irq_exit_irqson(), be sure to test
+ * If you add or remove a call to rcu_irq_exit_irqsave(), be sure to test
  * with CONFIG_RCU_EQS_DEBUG=y.
  */
-void rcu_irq_exit_irqson(void)
+void rcu_irq_exit_irqsave(void)
 {
 	unsigned long flags;
 
@@ -875,10 +875,10 @@ void rcu_irq_enter(void)
 /*
  * Wrapper for rcu_irq_enter() where interrupts are enabled.
  *
- * If you add or remove a call to rcu_irq_enter_irqson(), be sure to test
+ * If you add or remove a call to rcu_irq_enter_irqsave(), be sure to test
  * with CONFIG_RCU_EQS_DEBUG=y.
  */
-void rcu_irq_enter_irqson(void)
+void rcu_irq_enter_irqsave(void)
 {
 	unsigned long flags;
 
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3004,9 +3004,9 @@ void __trace_stack(struct trace_array *t
 	if (unlikely(in_nmi()))
 		return;
 
-	rcu_irq_enter_irqson();
+	rcu_irq_enter_irqsave();
 	__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
-	rcu_irq_exit_irqson();
+	rcu_irq_exit_irqsave();
 }
 
 /**



^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH 2/8] rcu: Mark rcu_dynticks_curr_cpu_in_eqs() inline
  2020-02-12  9:32 [PATCH 0/8] tracing vs rcu vs nmi Peter Zijlstra
  2020-02-12  9:32 ` [PATCH 1/8] rcu: Rename rcu_irq_{enter,exit}_irqson() Peter Zijlstra
@ 2020-02-12  9:32 ` Peter Zijlstra
  2020-02-12  9:32 ` [PATCH 3/8] rcu,tracing: Create trace_rcu_{enter,exit}() Peter Zijlstra
                   ` (9 subsequent siblings)
  11 siblings, 0 replies; 29+ messages in thread
From: Peter Zijlstra @ 2020-02-12  9:32 UTC (permalink / raw)
  To: linux-kernel, linux-arch, rostedt
  Cc: peterz, mingo, joel, gregkh, gustavo, tglx, paulmck, josh,
	mathieu.desnoyers, jiangshanlai

Since rcu_is_watching() is notrace (and needs to be, as it can be
called from the tracers), make sure everything it in turn calls is
notrace too.

To that effect, mark rcu_dynticks_curr_cpu_in_eqs() inline, which
implies notrace, as the function is tiny.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 kernel/rcu/tree.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -294,7 +294,7 @@ static void rcu_dynticks_eqs_online(void
  *
  * No ordering, as we are sampling CPU-local information.
  */
-static bool rcu_dynticks_curr_cpu_in_eqs(void)
+static inline bool rcu_dynticks_curr_cpu_in_eqs(void)
 {
 	struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
 



^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH 3/8] rcu,tracing: Create trace_rcu_{enter,exit}()
  2020-02-12  9:32 [PATCH 0/8] tracing vs rcu vs nmi Peter Zijlstra
  2020-02-12  9:32 ` [PATCH 1/8] rcu: Rename rcu_irq_{enter,exit}_irqson() Peter Zijlstra
  2020-02-12  9:32 ` [PATCH 2/8] rcu: Mark rcu_dynticks_curr_cpu_in_eqs() inline Peter Zijlstra
@ 2020-02-12  9:32 ` Peter Zijlstra
  2020-02-12  9:32 ` [PATCH 4/8] sched,rcu,tracing: Mark preempt_count_{add,sub}() notrace Peter Zijlstra
                   ` (8 subsequent siblings)
  11 siblings, 0 replies; 29+ messages in thread
From: Peter Zijlstra @ 2020-02-12  9:32 UTC (permalink / raw)
  To: linux-kernel, linux-arch, rostedt
  Cc: peterz, mingo, joel, gregkh, gustavo, tglx, paulmck, josh,
	mathieu.desnoyers, jiangshanlai

To facilitate tracers that need RCU, add some helpers to wrap the
magic required.

The problem is that we can call into tracers (trace events and
function tracing) while RCU isn't watching and this can happen from
any context, including NMI.

It is this latter that is causing most of the trouble; we must make
sure in_nmi() returns true before we land in anything tracing,
otherwise we cannot recover.

These helpers are macros because of header-hell; they're placed here
because of the proximity to nmi_{enter,exit{().

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 include/linux/hardirq.h |   48 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)

--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -89,4 +89,52 @@ extern void irq_exit(void);
 		arch_nmi_exit();				\
 	} while (0)
 
+/*
+ * Tracing vs RCU
+ * --------------
+ *
+ * tracepoints and function-tracing can happen when RCU isn't watching (idle,
+ * or early IRQ/NMI entry).
+ *
+ * When it happens during idle or early during IRQ entry, tracing will have
+ * to inform RCU that it ought to pay attention, this is done by calling
+ * rcu_irq_enter_irqsave().
+ *
+ * On NMI entry, we must be very careful that tracing only happens after we've
+ * incremented preempt_count(), otherwise we cannot tell we're in NMI and take
+ * the special path.
+ */
+
+#define __TR_IRQ	1
+#define __TR_NMI	2
+
+#define trace_rcu_enter()					\
+({								\
+	unsigned long state = 0;				\
+	if (!rcu_is_watching())	{				\
+		if (in_nmi()) {					\
+			state = __TR_NMI;			\
+			rcu_nmi_enter();			\
+		} else {					\
+			state = __TR_IRQ;			\
+			rcu_irq_enter_irqsave();		\
+		}						\
+	}							\
+	state;							\
+})
+
+#define trace_rcu_exit(state)					\
+do {								\
+	switch (state) {					\
+	case __TR_IRQ:						\
+		rcu_irq_exit_irqsave();				\
+		break;						\
+	case __TR_NMI:						\
+		rcu_nmi_exit();					\
+		break;						\
+	default:						\
+		break;						\
+	}							\
+} while (0)
+
 #endif /* LINUX_HARDIRQ_H */



^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH 4/8] sched,rcu,tracing: Mark preempt_count_{add,sub}() notrace
  2020-02-12  9:32 [PATCH 0/8] tracing vs rcu vs nmi Peter Zijlstra
                   ` (2 preceding siblings ...)
  2020-02-12  9:32 ` [PATCH 3/8] rcu,tracing: Create trace_rcu_{enter,exit}() Peter Zijlstra
@ 2020-02-12  9:32 ` Peter Zijlstra
  2020-02-12 14:24   ` Steven Rostedt
  2020-02-12  9:32 ` [PATCH 5/8] x86,tracing: Mark debug_stack_{set_zero,reset)() notrace Peter Zijlstra
                   ` (7 subsequent siblings)
  11 siblings, 1 reply; 29+ messages in thread
From: Peter Zijlstra @ 2020-02-12  9:32 UTC (permalink / raw)
  To: linux-kernel, linux-arch, rostedt
  Cc: peterz, mingo, joel, gregkh, gustavo, tglx, paulmck, josh,
	mathieu.desnoyers, jiangshanlai

Because of the requirement that no tracing happens until after we've
incremented preempt_count, see nmi_enter() / trace_rcu_enter(), mark
these functions as notrace.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 kernel/sched/core.c |    4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3781,7 +3781,7 @@ static inline void preempt_latency_start
 	}
 }
 
-void preempt_count_add(int val)
+void notrace preempt_count_add(int val)
 {
 #ifdef CONFIG_DEBUG_PREEMPT
 	/*
@@ -3813,7 +3813,7 @@ static inline void preempt_latency_stop(
 		trace_preempt_on(CALLER_ADDR0, get_lock_parent_ip());
 }
 
-void preempt_count_sub(int val)
+void notrace preempt_count_sub(int val)
 {
 #ifdef CONFIG_DEBUG_PREEMPT
 	/*



^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH 5/8] x86,tracing: Mark debug_stack_{set_zero,reset)() notrace
  2020-02-12  9:32 [PATCH 0/8] tracing vs rcu vs nmi Peter Zijlstra
                   ` (3 preceding siblings ...)
  2020-02-12  9:32 ` [PATCH 4/8] sched,rcu,tracing: Mark preempt_count_{add,sub}() notrace Peter Zijlstra
@ 2020-02-12  9:32 ` Peter Zijlstra
  2020-02-12 14:25   ` Steven Rostedt
  2020-02-12  9:32 ` [PATCH 6/8] perf,tracing: Prepare the perf-trace interface for RCU changes Peter Zijlstra
                   ` (6 subsequent siblings)
  11 siblings, 1 reply; 29+ messages in thread
From: Peter Zijlstra @ 2020-02-12  9:32 UTC (permalink / raw)
  To: linux-kernel, linux-arch, rostedt
  Cc: peterz, mingo, joel, gregkh, gustavo, tglx, paulmck, josh,
	mathieu.desnoyers, jiangshanlai

Because do_nmi() must not call into tracing outside of
nmi_enter()/nmi_exit(), these functions must be notrace.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/kernel/cpu/common.c |    4 ++--
 arch/x86/kernel/nmi.c        |    6 ++++++
 2 files changed, 8 insertions(+), 2 deletions(-)

--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1690,14 +1690,14 @@ void syscall_init(void)
 DEFINE_PER_CPU(int, debug_stack_usage);
 DEFINE_PER_CPU(u32, debug_idt_ctr);
 
-void debug_stack_set_zero(void)
+void notrace debug_stack_set_zero(void)
 {
 	this_cpu_inc(debug_idt_ctr);
 	load_current_idt();
 }
 NOKPROBE_SYMBOL(debug_stack_set_zero);
 
-void debug_stack_reset(void)
+void notrace debug_stack_reset(void)
 {
 	if (WARN_ON(!this_cpu_read(debug_idt_ctr)))
 		return;
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -534,6 +534,9 @@ do_nmi(struct pt_regs *regs, long error_
 	}
 #endif
 
+	/*
+	 * It is important that no tracing happens before nmi_enter()!
+	 */
 	nmi_enter();
 
 	inc_irq_stat(__nmi_count);
@@ -542,6 +545,9 @@ do_nmi(struct pt_regs *regs, long error_
 		default_do_nmi(regs);
 
 	nmi_exit();
+	/*
+	 * No tracing after nmi_exit()!
+	 */
 
 #ifdef CONFIG_X86_64
 	if (unlikely(this_cpu_read(update_debug_stack))) {



^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH 6/8] perf,tracing: Prepare the perf-trace interface for RCU changes
  2020-02-12  9:32 [PATCH 0/8] tracing vs rcu vs nmi Peter Zijlstra
                   ` (4 preceding siblings ...)
  2020-02-12  9:32 ` [PATCH 5/8] x86,tracing: Mark debug_stack_{set_zero,reset)() notrace Peter Zijlstra
@ 2020-02-12  9:32 ` Peter Zijlstra
  2020-02-12 14:26   ` Steven Rostedt
  2020-02-12  9:32 ` [PATCH 7/8] tracing: Employ trace_rcu_{enter,exit}() Peter Zijlstra
                   ` (5 subsequent siblings)
  11 siblings, 1 reply; 29+ messages in thread
From: Peter Zijlstra @ 2020-02-12  9:32 UTC (permalink / raw)
  To: linux-kernel, linux-arch, rostedt
  Cc: peterz, mingo, joel, gregkh, gustavo, tglx, paulmck, josh,
	mathieu.desnoyers, jiangshanlai

The tracepoint interface will stop providing regular RCU context; make
sure we do it ourselves, since perf makes use of regular RCU protected
data.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 kernel/events/core.c |    5 +++++
 1 file changed, 5 insertions(+)

--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -8950,6 +8950,7 @@ void perf_tp_event(u16 event_type, u64 c
 {
 	struct perf_sample_data data;
 	struct perf_event *event;
+	unsigned long rcu_flags;
 
 	struct perf_raw_record raw = {
 		.frag = {
@@ -8961,6 +8962,8 @@ void perf_tp_event(u16 event_type, u64 c
 	perf_sample_data_init(&data, 0, 0);
 	data.raw = &raw;
 
+	rcu_flags = trace_rcu_enter();
+
 	perf_trace_buf_update(record, event_type);
 
 	hlist_for_each_entry_rcu(event, head, hlist_entry) {
@@ -8996,6 +8999,8 @@ void perf_tp_event(u16 event_type, u64 c
 	}
 
 	perf_swevent_put_recursion_context(rctx);
+
+	trace_rcu_exit(rcu_flags);
 }
 EXPORT_SYMBOL_GPL(perf_tp_event);
 



^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH 7/8] tracing: Employ trace_rcu_{enter,exit}()
  2020-02-12  9:32 [PATCH 0/8] tracing vs rcu vs nmi Peter Zijlstra
                   ` (5 preceding siblings ...)
  2020-02-12  9:32 ` [PATCH 6/8] perf,tracing: Prepare the perf-trace interface for RCU changes Peter Zijlstra
@ 2020-02-12  9:32 ` Peter Zijlstra
  2020-02-12  9:32 ` [PATCH 8/8] tracing: Remove regular RCU context for _rcuidle tracepoints (again) Peter Zijlstra
                   ` (4 subsequent siblings)
  11 siblings, 0 replies; 29+ messages in thread
From: Peter Zijlstra @ 2020-02-12  9:32 UTC (permalink / raw)
  To: linux-kernel, linux-arch, rostedt
  Cc: peterz, mingo, joel, gregkh, gustavo, tglx, paulmck, josh,
	mathieu.desnoyers, jiangshanlai

Replace the opencoded (and incomplete) RCU manipulations with the new
helpers to ensure a regular RCU context when calling into
__ftrace_trace_stack().

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 kernel/trace/trace.c |   19 +++----------------
 1 file changed, 3 insertions(+), 16 deletions(-)

--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2989,24 +2989,11 @@ void __trace_stack(struct trace_array *t
 		   int pc)
 {
 	struct trace_buffer *buffer = tr->array_buffer.buffer;
+	unsigned long rcu_flags;
 
-	if (rcu_is_watching()) {
-		__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
-		return;
-	}
-
-	/*
-	 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
-	 * but if the above rcu_is_watching() failed, then the NMI
-	 * triggered someplace critical, and rcu_irq_enter() should
-	 * not be called from NMI.
-	 */
-	if (unlikely(in_nmi()))
-		return;
-
-	rcu_irq_enter_irqsave();
+	rcu_flags = trace_rcu_enter();
 	__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
-	rcu_irq_exit_irqsave();
+	trace_rcu_exit(rcu_flags);
 }
 
 /**



^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH 8/8] tracing: Remove regular RCU context for _rcuidle tracepoints (again)
  2020-02-12  9:32 [PATCH 0/8] tracing vs rcu vs nmi Peter Zijlstra
                   ` (6 preceding siblings ...)
  2020-02-12  9:32 ` [PATCH 7/8] tracing: Employ trace_rcu_{enter,exit}() Peter Zijlstra
@ 2020-02-12  9:32 ` Peter Zijlstra
  2020-02-12 14:29   ` Steven Rostedt
  2020-02-12 10:01 ` [PATCH 0/8] tracing vs rcu vs nmi Peter Zijlstra
                   ` (3 subsequent siblings)
  11 siblings, 1 reply; 29+ messages in thread
From: Peter Zijlstra @ 2020-02-12  9:32 UTC (permalink / raw)
  To: linux-kernel, linux-arch, rostedt
  Cc: peterz, mingo, joel, gregkh, gustavo, tglx, paulmck, josh,
	mathieu.desnoyers, jiangshanlai

Effectively revert commit 865e63b04e9b2 ("tracing: Add back in
rcu_irq_enter/exit_irqson() for rcuidle tracepoints") now that we've
taught perf how to deal with not having an RCU context provided.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 include/linux/tracepoint.h |    8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -179,10 +179,8 @@ static inline struct tracepoint *tracepo
 		 * For rcuidle callers, use srcu since sched-rcu	\
 		 * doesn't work from the idle path.			\
 		 */							\
-		if (rcuidle) {						\
+		if (rcuidle)						\
 			__idx = srcu_read_lock_notrace(&tracepoint_srcu);\
-			rcu_irq_enter_irqsave();			\
-		}							\
 									\
 		it_func_ptr = rcu_dereference_raw((tp)->funcs);		\
 									\
@@ -194,10 +192,8 @@ static inline struct tracepoint *tracepo
 			} while ((++it_func_ptr)->func);		\
 		}							\
 									\
-		if (rcuidle) {						\
-			rcu_irq_exit_irqsave();				\
+		if (rcuidle)						\
 			srcu_read_unlock_notrace(&tracepoint_srcu, __idx);\
-		}							\
 									\
 		preempt_enable_notrace();				\
 	} while (0)



^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/8] tracing vs rcu vs nmi
  2020-02-12  9:32 [PATCH 0/8] tracing vs rcu vs nmi Peter Zijlstra
                   ` (7 preceding siblings ...)
  2020-02-12  9:32 ` [PATCH 8/8] tracing: Remove regular RCU context for _rcuidle tracepoints (again) Peter Zijlstra
@ 2020-02-12 10:01 ` Peter Zijlstra
  2020-02-12 10:56   ` Will Deacon
  2020-02-12 10:03 ` Peter Zijlstra
                   ` (2 subsequent siblings)
  11 siblings, 1 reply; 29+ messages in thread
From: Peter Zijlstra @ 2020-02-12 10:01 UTC (permalink / raw)
  To: linux-kernel, linux-arch, rostedt, james.morse, will, catalin.marinas
  Cc: mingo, joel, gregkh, gustavo, tglx, paulmck, josh,
	mathieu.desnoyers, jiangshanlai

On Wed, Feb 12, 2020 at 10:32:10AM +0100, Peter Zijlstra wrote:
> Hi all,
> 
> These here patches are the result of Mathieu and Steve trying to get commit
> 865e63b04e9b2 ("tracing: Add back in rcu_irq_enter/exit_irqson() for rcuidle
> tracepoints") reverted again.
> 
> One of the things discovered is that tracing MUST NOT happen before nmi_enter()
> or after nmi_exit(). I've only fixed x86, but quickly gone through other
> architectures and there is definitely more stuff to be fixed (simply grep for
> nmi_enter in your arch).

For ARM64:

 - apei_claim_sea()
 - __sdei_handler()
 - do_serror()
 - debug_exception_enter() / do_debug_exception()

all look dodgy.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/8] tracing vs rcu vs nmi
  2020-02-12  9:32 [PATCH 0/8] tracing vs rcu vs nmi Peter Zijlstra
                   ` (8 preceding siblings ...)
  2020-02-12 10:01 ` [PATCH 0/8] tracing vs rcu vs nmi Peter Zijlstra
@ 2020-02-12 10:03 ` Peter Zijlstra
  2020-02-12 10:07 ` Peter Zijlstra
  2020-02-12 10:09 ` Peter Zijlstra
  11 siblings, 0 replies; 29+ messages in thread
From: Peter Zijlstra @ 2020-02-12 10:03 UTC (permalink / raw)
  To: linux-kernel, linux-arch, rostedt, mpe
  Cc: mingo, joel, gregkh, gustavo, tglx, paulmck, josh,
	mathieu.desnoyers, jiangshanlai

On Wed, Feb 12, 2020 at 10:32:10AM +0100, Peter Zijlstra wrote:
> Hi all,
> 
> These here patches are the result of Mathieu and Steve trying to get commit
> 865e63b04e9b2 ("tracing: Add back in rcu_irq_enter/exit_irqson() for rcuidle
> tracepoints") reverted again.
> 
> One of the things discovered is that tracing MUST NOT happen before nmi_enter()
> or after nmi_exit(). I've only fixed x86, but quickly gone through other
> architectures and there is definitely more stuff to be fixed (simply grep for
> nmi_enter in your arch).

For Power:

 - system_reset_exception()
 - machine_check_exception()
 - soft_nmi_interrupt()
 - __perf_event_interrupt() (book3s)
 - perf_event_interrupt() (fsl)

will want looking at.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/8] tracing vs rcu vs nmi
  2020-02-12  9:32 [PATCH 0/8] tracing vs rcu vs nmi Peter Zijlstra
                   ` (9 preceding siblings ...)
  2020-02-12 10:03 ` Peter Zijlstra
@ 2020-02-12 10:07 ` Peter Zijlstra
  2020-02-12 10:09 ` Peter Zijlstra
  11 siblings, 0 replies; 29+ messages in thread
From: Peter Zijlstra @ 2020-02-12 10:07 UTC (permalink / raw)
  To: linux-kernel, linux-arch, rostedt, dalias, jcmvbkbc
  Cc: mingo, joel, gregkh, gustavo, tglx, paulmck, josh,
	mathieu.desnoyers, jiangshanlai

On Wed, Feb 12, 2020 at 10:32:10AM +0100, Peter Zijlstra wrote:
> Hi all,
> 
> These here patches are the result of Mathieu and Steve trying to get commit
> 865e63b04e9b2 ("tracing: Add back in rcu_irq_enter/exit_irqson() for rcuidle
> tracepoints") reverted again.
> 
> One of the things discovered is that tracing MUST NOT happen before nmi_enter()
> or after nmi_exit(). I've only fixed x86, but quickly gone through other
> architectures and there is definitely more stuff to be fixed (simply grep for
> nmi_enter in your arch).

Xtensa and SuperH need to mark their NMI C handler notrace.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/8] tracing vs rcu vs nmi
  2020-02-12  9:32 [PATCH 0/8] tracing vs rcu vs nmi Peter Zijlstra
                   ` (10 preceding siblings ...)
  2020-02-12 10:07 ` Peter Zijlstra
@ 2020-02-12 10:09 ` Peter Zijlstra
  11 siblings, 0 replies; 29+ messages in thread
From: Peter Zijlstra @ 2020-02-12 10:09 UTC (permalink / raw)
  To: linux-kernel, linux-arch, rostedt, linux
  Cc: mingo, joel, gregkh, gustavo, tglx, paulmck, josh,
	mathieu.desnoyers, jiangshanlai

On Wed, Feb 12, 2020 at 10:32:10AM +0100, Peter Zijlstra wrote:
> Hi all,
> 
> These here patches are the result of Mathieu and Steve trying to get commit
> 865e63b04e9b2 ("tracing: Add back in rcu_irq_enter/exit_irqson() for rcuidle
> tracepoints") reverted again.
> 
> One of the things discovered is that tracing MUST NOT happen before nmi_enter()
> or after nmi_exit(). I've only fixed x86, but quickly gone through other
> architectures and there is definitely more stuff to be fixed (simply grep for
> nmi_enter in your arch).

Someone should probably look at the whole ARM FiQ stuff, I got a little
lost, but probably handle_fiq_as_nmi() wants notrace and everything
using set_fiq_handler() wants looking at.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/8] tracing vs rcu vs nmi
  2020-02-12 10:01 ` [PATCH 0/8] tracing vs rcu vs nmi Peter Zijlstra
@ 2020-02-12 10:56   ` Will Deacon
  2020-02-12 11:58     ` Peter Zijlstra
  2020-02-12 14:32     ` Steven Rostedt
  0 siblings, 2 replies; 29+ messages in thread
From: Will Deacon @ 2020-02-12 10:56 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: linux-kernel, linux-arch, rostedt, james.morse, catalin.marinas,
	mingo, joel, gregkh, gustavo, tglx, paulmck, josh,
	mathieu.desnoyers, jiangshanlai

On Wed, Feb 12, 2020 at 11:01:06AM +0100, Peter Zijlstra wrote:
> On Wed, Feb 12, 2020 at 10:32:10AM +0100, Peter Zijlstra wrote:
> > Hi all,
> > 
> > These here patches are the result of Mathieu and Steve trying to get commit
> > 865e63b04e9b2 ("tracing: Add back in rcu_irq_enter/exit_irqson() for rcuidle
> > tracepoints") reverted again.
> > 
> > One of the things discovered is that tracing MUST NOT happen before nmi_enter()
> > or after nmi_exit(). I've only fixed x86, but quickly gone through other
> > architectures and there is definitely more stuff to be fixed (simply grep for
> > nmi_enter in your arch).
> 
> For ARM64:
> 
>  - apei_claim_sea()
>  - __sdei_handler()
>  - do_serror()
>  - debug_exception_enter() / do_debug_exception()
> 
> all look dodgy.

Hmm, so looks like we need to spinkle some 'notrace' annotations around
these. Are there are scenarios where you would want NOKPROBE_SYMBOL() but
*not* 'notrace'? We've already got the former for the debug exception
handlers and we probably (?) want it for the SDEI stuff too...

Will

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/8] tracing vs rcu vs nmi
  2020-02-12 10:56   ` Will Deacon
@ 2020-02-12 11:58     ` Peter Zijlstra
  2020-02-12 14:32     ` Steven Rostedt
  1 sibling, 0 replies; 29+ messages in thread
From: Peter Zijlstra @ 2020-02-12 11:58 UTC (permalink / raw)
  To: Will Deacon
  Cc: linux-kernel, linux-arch, rostedt, james.morse, catalin.marinas,
	mingo, joel, gregkh, gustavo, tglx, paulmck, josh,
	mathieu.desnoyers, jiangshanlai

On Wed, Feb 12, 2020 at 10:56:46AM +0000, Will Deacon wrote:
> On Wed, Feb 12, 2020 at 11:01:06AM +0100, Peter Zijlstra wrote:
> > On Wed, Feb 12, 2020 at 10:32:10AM +0100, Peter Zijlstra wrote:
> > > Hi all,
> > > 
> > > These here patches are the result of Mathieu and Steve trying to get commit
> > > 865e63b04e9b2 ("tracing: Add back in rcu_irq_enter/exit_irqson() for rcuidle
> > > tracepoints") reverted again.
> > > 
> > > One of the things discovered is that tracing MUST NOT happen before nmi_enter()
> > > or after nmi_exit(). I've only fixed x86, but quickly gone through other
> > > architectures and there is definitely more stuff to be fixed (simply grep for
> > > nmi_enter in your arch).
> > 
> > For ARM64:
> > 
> >  - apei_claim_sea()
> >  - __sdei_handler()
> >  - do_serror()
> >  - debug_exception_enter() / do_debug_exception()
> > 
> > all look dodgy.
> 
> Hmm, so looks like we need to spinkle some 'notrace' annotations around
> these. Are there are scenarios where you would want NOKPROBE_SYMBOL() but
> *not* 'notrace'? We've already got the former for the debug exception
> handlers and we probably (?) want it for the SDEI stuff too...

I'm not sure. In fact, I asked Steve on IRC if we'd want to teach
objtool to report such inconsitencies.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 4/8] sched,rcu,tracing: Mark preempt_count_{add,sub}() notrace
  2020-02-12  9:32 ` [PATCH 4/8] sched,rcu,tracing: Mark preempt_count_{add,sub}() notrace Peter Zijlstra
@ 2020-02-12 14:24   ` Steven Rostedt
  2020-02-12 15:02     ` Peter Zijlstra
  0 siblings, 1 reply; 29+ messages in thread
From: Steven Rostedt @ 2020-02-12 14:24 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: linux-kernel, linux-arch, mingo, joel, gregkh, gustavo, tglx,
	paulmck, josh, mathieu.desnoyers, jiangshanlai

On Wed, 12 Feb 2020 10:32:14 +0100
Peter Zijlstra <peterz@infradead.org> wrote:

> Because of the requirement that no tracing happens until after we've
> incremented preempt_count, see nmi_enter() / trace_rcu_enter(), mark
> these functions as notrace.

I actually depend on these function being traced. We do have
"preempt_enable_notrace()" and "preempt_disable_notrace()" for places
that shouldn't be traced. Can't we use those? (or simply
__preempt_count_add()) in the nmi_enter() code instead? (perhaps create
a preempt_count_add_notrace()).

-- Steve



> 
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> ---
>  kernel/sched/core.c |    4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -3781,7 +3781,7 @@ static inline void preempt_latency_start
>  	}
>  }
>  
> -void preempt_count_add(int val)
> +void notrace preempt_count_add(int val)
>  {
>  #ifdef CONFIG_DEBUG_PREEMPT
>  	/*
> @@ -3813,7 +3813,7 @@ static inline void preempt_latency_stop(
>  		trace_preempt_on(CALLER_ADDR0, get_lock_parent_ip());
>  }
>  
> -void preempt_count_sub(int val)
> +void notrace preempt_count_sub(int val)
>  {
>  #ifdef CONFIG_DEBUG_PREEMPT
>  	/*
> 


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 5/8] x86,tracing: Mark debug_stack_{set_zero,reset)() notrace
  2020-02-12  9:32 ` [PATCH 5/8] x86,tracing: Mark debug_stack_{set_zero,reset)() notrace Peter Zijlstra
@ 2020-02-12 14:25   ` Steven Rostedt
  2020-02-12 15:04     ` Peter Zijlstra
  0 siblings, 1 reply; 29+ messages in thread
From: Steven Rostedt @ 2020-02-12 14:25 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: linux-kernel, linux-arch, mingo, joel, gregkh, gustavo, tglx,
	paulmck, josh, mathieu.desnoyers, jiangshanlai

On Wed, 12 Feb 2020 10:32:15 +0100
Peter Zijlstra <peterz@infradead.org> wrote:

> Because do_nmi() must not call into tracing outside of
> nmi_enter()/nmi_exit(), these functions must be notrace.
> 
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> ---
>  arch/x86/kernel/cpu/common.c |    4 ++--
>  arch/x86/kernel/nmi.c        |    6 ++++++
>  2 files changed, 8 insertions(+), 2 deletions(-)
> 
> --- a/arch/x86/kernel/cpu/common.c
> +++ b/arch/x86/kernel/cpu/common.c

This entire file is notrace:

 arch/x86/kernel/cpu/Makefile:

   CFLAGS_REMOVE_common.o = -pg

-- Steve

> @@ -1690,14 +1690,14 @@ void syscall_init(void)
>  DEFINE_PER_CPU(int, debug_stack_usage);
>  DEFINE_PER_CPU(u32, debug_idt_ctr);
>  
> -void debug_stack_set_zero(void)
> +void notrace debug_stack_set_zero(void)
>  {
>  	this_cpu_inc(debug_idt_ctr);
>  	load_current_idt();
>  }
>  NOKPROBE_SYMBOL(debug_stack_set_zero);
>  
> -void debug_stack_reset(void)
> +void notrace debug_stack_reset(void)
>  {
>  	if (WARN_ON(!this_cpu_read(debug_idt_ctr)))
>  		return;
> --- a/arch/x86/kernel/nmi.c
> +++ b/arch/x86/kernel/nmi.c
> @@ -534,6 +534,9 @@ do_nmi(struct pt_regs *regs, long error_
>  	}
>  #endif
>  
> +	/*
> +	 * It is important that no tracing happens before nmi_enter()!
> +	 */
>  	nmi_enter();
>  
>  	inc_irq_stat(__nmi_count);
> @@ -542,6 +545,9 @@ do_nmi(struct pt_regs *regs, long error_
>  		default_do_nmi(regs);
>  
>  	nmi_exit();
> +	/*
> +	 * No tracing after nmi_exit()!
> +	 */
>  
>  #ifdef CONFIG_X86_64
>  	if (unlikely(this_cpu_read(update_debug_stack))) {
> 


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 6/8] perf,tracing: Prepare the perf-trace interface for RCU changes
  2020-02-12  9:32 ` [PATCH 6/8] perf,tracing: Prepare the perf-trace interface for RCU changes Peter Zijlstra
@ 2020-02-12 14:26   ` Steven Rostedt
  2020-02-12 15:07     ` Mathieu Desnoyers
  0 siblings, 1 reply; 29+ messages in thread
From: Steven Rostedt @ 2020-02-12 14:26 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: linux-kernel, linux-arch, mingo, joel, gregkh, gustavo, tglx,
	paulmck, josh, mathieu.desnoyers, jiangshanlai

On Wed, 12 Feb 2020 10:32:16 +0100
Peter Zijlstra <peterz@infradead.org> wrote:

> The tracepoint interface will stop providing regular RCU context; make
> sure we do it ourselves, since perf makes use of regular RCU protected
> data.
> 

Suggested-by: Steven Rostedt (VMware) <rosted@goodmis.org>

  ;-)

-- Steve

> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> ---
>  kernel/events/core.c |    5 +++++
>  1 file changed, 5 insertions(+)
> 
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -8950,6 +8950,7 @@ void perf_tp_event(u16 event_type, u64 c
>  {
>  	struct perf_sample_data data;
>  	struct perf_event *event;
> +	unsigned long rcu_flags;
>  
>  	struct perf_raw_record raw = {
>  		.frag = {
> @@ -8961,6 +8962,8 @@ void perf_tp_event(u16 event_type, u64 c
>  	perf_sample_data_init(&data, 0, 0);
>  	data.raw = &raw;
>  
> +	rcu_flags = trace_rcu_enter();
> +
>  	perf_trace_buf_update(record, event_type);
>  
>  	hlist_for_each_entry_rcu(event, head, hlist_entry) {
> @@ -8996,6 +8999,8 @@ void perf_tp_event(u16 event_type, u64 c
>  	}
>  
>  	perf_swevent_put_recursion_context(rctx);
> +
> +	trace_rcu_exit(rcu_flags);
>  }
>  EXPORT_SYMBOL_GPL(perf_tp_event);
>  
> 


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 8/8] tracing: Remove regular RCU context for _rcuidle tracepoints (again)
  2020-02-12  9:32 ` [PATCH 8/8] tracing: Remove regular RCU context for _rcuidle tracepoints (again) Peter Zijlstra
@ 2020-02-12 14:29   ` Steven Rostedt
  2020-02-12 15:12     ` Peter Zijlstra
  0 siblings, 1 reply; 29+ messages in thread
From: Steven Rostedt @ 2020-02-12 14:29 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: linux-kernel, linux-arch, mingo, joel, gregkh, gustavo, tglx,
	paulmck, josh, mathieu.desnoyers, jiangshanlai

On Wed, 12 Feb 2020 10:32:18 +0100
Peter Zijlstra <peterz@infradead.org> wrote:

> Effectively revert commit 865e63b04e9b2 ("tracing: Add back in
> rcu_irq_enter/exit_irqson() for rcuidle tracepoints") now that we've
> taught perf how to deal with not having an RCU context provided.
> 
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> ---
>  include/linux/tracepoint.h |    8 ++------
>  1 file changed, 2 insertions(+), 6 deletions(-)
> 
> --- a/include/linux/tracepoint.h
> +++ b/include/linux/tracepoint.h
> @@ -179,10 +179,8 @@ static inline struct tracepoint *tracepo
>  		 * For rcuidle callers, use srcu since sched-rcu	\
>  		 * doesn't work from the idle path.			\
>  		 */							\
> -		if (rcuidle) {						\
> +		if (rcuidle)						\
>  			__idx = srcu_read_lock_notrace(&tracepoint_srcu);\
> -			rcu_irq_enter_irqsave();			\
> -		}							\
>  									\
>  		it_func_ptr = rcu_dereference_raw((tp)->funcs);		\
>  									\
> @@ -194,10 +192,8 @@ static inline struct tracepoint *tracepo
>  			} while ((++it_func_ptr)->func);		\
>  		}							\
>  									\
> -		if (rcuidle) {						\
> -			rcu_irq_exit_irqsave();				\
> +		if (rcuidle)						\
>  			srcu_read_unlock_notrace(&tracepoint_srcu, __idx);\
> -		}							\
>  									\
>  		preempt_enable_notrace();				\
>  	} while (0)
> 

Which looks basically the same as this patch...


  https://lore.kernel.org/r/20200211095047.58ddf750@gandalf.local.home

-- Steve


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/8] tracing vs rcu vs nmi
  2020-02-12 10:56   ` Will Deacon
  2020-02-12 11:58     ` Peter Zijlstra
@ 2020-02-12 14:32     ` Steven Rostedt
  2020-02-12 15:21       ` Peter Zijlstra
  1 sibling, 1 reply; 29+ messages in thread
From: Steven Rostedt @ 2020-02-12 14:32 UTC (permalink / raw)
  To: Will Deacon
  Cc: Peter Zijlstra, linux-kernel, linux-arch, james.morse,
	catalin.marinas, mingo, joel, gregkh, gustavo, tglx, paulmck,
	josh, mathieu.desnoyers, jiangshanlai

On Wed, 12 Feb 2020 10:56:46 +0000
Will Deacon <will@kernel.org> wrote:

> Hmm, so looks like we need to spinkle some 'notrace' annotations around
> these. Are there are scenarios where you would want NOKPROBE_SYMBOL() but
> *not* 'notrace'? We've already got the former for the debug exception
> handlers and we probably (?) want it for the SDEI stuff too...

Note, function tracing has a lot of recursion detection, and when
something registers with the function tracer it needs to state if it
can be called when rcu is not watching, or it wont be called then.

And yes, there's plenty of places that we have "nokprobe" but allow
tracing.

I've been trying to get rid of notrace around the kernel, not add more.

-- Steve

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 4/8] sched,rcu,tracing: Mark preempt_count_{add,sub}() notrace
  2020-02-12 14:24   ` Steven Rostedt
@ 2020-02-12 15:02     ` Peter Zijlstra
  2020-02-12 15:14       ` Steven Rostedt
  0 siblings, 1 reply; 29+ messages in thread
From: Peter Zijlstra @ 2020-02-12 15:02 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: linux-kernel, linux-arch, mingo, joel, gregkh, gustavo, tglx,
	paulmck, josh, mathieu.desnoyers, jiangshanlai

On Wed, Feb 12, 2020 at 09:24:17AM -0500, Steven Rostedt wrote:
> On Wed, 12 Feb 2020 10:32:14 +0100
> Peter Zijlstra <peterz@infradead.org> wrote:
> 
> > Because of the requirement that no tracing happens until after we've
> > incremented preempt_count, see nmi_enter() / trace_rcu_enter(), mark
> > these functions as notrace.
> 
> I actually depend on these function being traced.

Why? They already have a tracepoint inside.

> We do have
> "preempt_enable_notrace()" and "preempt_disable_notrace()" for places
> that shouldn't be traced. Can't we use those? (or simply
> __preempt_count_add()) in the nmi_enter() code instead? (perhaps create
> a preempt_count_add_notrace()).

My initial patch has __preempt_count_add/sub() in, but then I figured
someone would go complain the tracepoint would go missing.


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 5/8] x86,tracing: Mark debug_stack_{set_zero,reset)() notrace
  2020-02-12 14:25   ` Steven Rostedt
@ 2020-02-12 15:04     ` Peter Zijlstra
  2020-02-12 15:18       ` Steven Rostedt
  0 siblings, 1 reply; 29+ messages in thread
From: Peter Zijlstra @ 2020-02-12 15:04 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: linux-kernel, linux-arch, mingo, joel, gregkh, gustavo, tglx,
	paulmck, josh, mathieu.desnoyers, jiangshanlai

On Wed, Feb 12, 2020 at 09:25:39AM -0500, Steven Rostedt wrote:
> On Wed, 12 Feb 2020 10:32:15 +0100
> Peter Zijlstra <peterz@infradead.org> wrote:
> 
> > Because do_nmi() must not call into tracing outside of
> > nmi_enter()/nmi_exit(), these functions must be notrace.
> > 
> > Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> > ---
> >  arch/x86/kernel/cpu/common.c |    4 ++--
> >  arch/x86/kernel/nmi.c        |    6 ++++++
> >  2 files changed, 8 insertions(+), 2 deletions(-)
> > 
> > --- a/arch/x86/kernel/cpu/common.c
> > +++ b/arch/x86/kernel/cpu/common.c
> 
> This entire file is notrace:
> 
>  arch/x86/kernel/cpu/Makefile:
> 
>    CFLAGS_REMOVE_common.o = -pg

Urgh, I hate it that that annotation is so hard to find :/ Also, there
seem to be various flavours of that Makefile magic around.

  CFLAGS_REMOVE_lockdep.o = $(CC_FLAGS_FTRACE)

is another variant I encountered.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 6/8] perf,tracing: Prepare the perf-trace interface for RCU changes
  2020-02-12 14:26   ` Steven Rostedt
@ 2020-02-12 15:07     ` Mathieu Desnoyers
  0 siblings, 0 replies; 29+ messages in thread
From: Mathieu Desnoyers @ 2020-02-12 15:07 UTC (permalink / raw)
  To: rostedt, Peter Zijlstra
  Cc: linux-kernel, linux-arch, Ingo Molnar, Joel Fernandes, Google,
	Greg Kroah-Hartman, Gustavo A. R. Silva, Thomas Gleixner,
	paulmck, Josh Triplett, Lai Jiangshan

----- On Feb 12, 2020, at 9:26 AM, rostedt rostedt@goodmis.org wrote:

> On Wed, 12 Feb 2020 10:32:16 +0100
> Peter Zijlstra <peterz@infradead.org> wrote:
> 
>> The tracepoint interface will stop providing regular RCU context; make
>> sure we do it ourselves, since perf makes use of regular RCU protected
>> data.
>> 
> 
> Suggested-by: Steven Rostedt (VMware) <rosted@goodmis.org>

and

Suggested-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>

Thanks :)

Mathieu

> 
>  ;-)
> 
> -- Steve
> 
>> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
>> ---
>>  kernel/events/core.c |    5 +++++
>>  1 file changed, 5 insertions(+)
>> 
>> --- a/kernel/events/core.c
>> +++ b/kernel/events/core.c
>> @@ -8950,6 +8950,7 @@ void perf_tp_event(u16 event_type, u64 c
>>  {
>>  	struct perf_sample_data data;
>>  	struct perf_event *event;
>> +	unsigned long rcu_flags;
>>  
>>  	struct perf_raw_record raw = {
>>  		.frag = {
>> @@ -8961,6 +8962,8 @@ void perf_tp_event(u16 event_type, u64 c
>>  	perf_sample_data_init(&data, 0, 0);
>>  	data.raw = &raw;
>>  
>> +	rcu_flags = trace_rcu_enter();
>> +
>>  	perf_trace_buf_update(record, event_type);
>>  
>>  	hlist_for_each_entry_rcu(event, head, hlist_entry) {
>> @@ -8996,6 +8999,8 @@ void perf_tp_event(u16 event_type, u64 c
>>  	}
>>  
>>  	perf_swevent_put_recursion_context(rctx);
>> +
>> +	trace_rcu_exit(rcu_flags);
>>  }
>>  EXPORT_SYMBOL_GPL(perf_tp_event);
>>  

-- 
Mathieu Desnoyers
EfficiOS Inc.
http://www.efficios.com

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 8/8] tracing: Remove regular RCU context for _rcuidle tracepoints (again)
  2020-02-12 14:29   ` Steven Rostedt
@ 2020-02-12 15:12     ` Peter Zijlstra
  0 siblings, 0 replies; 29+ messages in thread
From: Peter Zijlstra @ 2020-02-12 15:12 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: linux-kernel, linux-arch, mingo, joel, gregkh, gustavo, tglx,
	paulmck, josh, mathieu.desnoyers, jiangshanlai

On Wed, Feb 12, 2020 at 09:29:52AM -0500, Steven Rostedt wrote:
> On Wed, 12 Feb 2020 10:32:18 +0100
> Peter Zijlstra <peterz@infradead.org> wrote:
> 
> > Effectively revert commit 865e63b04e9b2 ("tracing: Add back in
> > rcu_irq_enter/exit_irqson() for rcuidle tracepoints") now that we've
> > taught perf how to deal with not having an RCU context provided.
> > 
> > Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> > ---
> >  include/linux/tracepoint.h |    8 ++------
> >  1 file changed, 2 insertions(+), 6 deletions(-)
> > 
> > --- a/include/linux/tracepoint.h
> > +++ b/include/linux/tracepoint.h
> > @@ -179,10 +179,8 @@ static inline struct tracepoint *tracepo
> >  		 * For rcuidle callers, use srcu since sched-rcu	\
> >  		 * doesn't work from the idle path.			\
> >  		 */							\
> > -		if (rcuidle) {						\
> > +		if (rcuidle)						\
> >  			__idx = srcu_read_lock_notrace(&tracepoint_srcu);\
> > -			rcu_irq_enter_irqsave();			\
> > -		}							\
> >  									\
> >  		it_func_ptr = rcu_dereference_raw((tp)->funcs);		\
> >  									\
> > @@ -194,10 +192,8 @@ static inline struct tracepoint *tracepo
> >  			} while ((++it_func_ptr)->func);		\
> >  		}							\
> >  									\
> > -		if (rcuidle) {						\
> > -			rcu_irq_exit_irqsave();				\
> > +		if (rcuidle)						\
> >  			srcu_read_unlock_notrace(&tracepoint_srcu, __idx);\
> > -		}							\
> >  									\
> >  		preempt_enable_notrace();				\
> >  	} while (0)
> > 
> 
> Which looks basically the same as this patch...
> 
>   https://lore.kernel.org/r/20200211095047.58ddf750@gandalf.local.home

It is a straight revert of 865e63b04e9b2, how different do you want it
to look?

Also it very much isn't that patch, as this one only does the revert and
doesn't mix it in with other stuff.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 4/8] sched,rcu,tracing: Mark preempt_count_{add,sub}() notrace
  2020-02-12 15:02     ` Peter Zijlstra
@ 2020-02-12 15:14       ` Steven Rostedt
  2020-02-12 15:38         ` Peter Zijlstra
  0 siblings, 1 reply; 29+ messages in thread
From: Steven Rostedt @ 2020-02-12 15:14 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: linux-kernel, linux-arch, mingo, joel, gregkh, gustavo, tglx,
	paulmck, josh, mathieu.desnoyers, jiangshanlai

On Wed, 12 Feb 2020 16:02:11 +0100
Peter Zijlstra <peterz@infradead.org> wrote:

> On Wed, Feb 12, 2020 at 09:24:17AM -0500, Steven Rostedt wrote:
> > On Wed, 12 Feb 2020 10:32:14 +0100
> > Peter Zijlstra <peterz@infradead.org> wrote:
> >   
> > > Because of the requirement that no tracing happens until after we've
> > > incremented preempt_count, see nmi_enter() / trace_rcu_enter(), mark
> > > these functions as notrace.  
> > 
> > I actually depend on these function being traced.  
> 
> Why? They already have a tracepoint inside.

Only when enabled.

> 
> > We do have
> > "preempt_enable_notrace()" and "preempt_disable_notrace()" for places
> > that shouldn't be traced. Can't we use those? (or simply
> > __preempt_count_add()) in the nmi_enter() code instead? (perhaps create
> > a preempt_count_add_notrace()).  
> 
> My initial patch has __preempt_count_add/sub() in, but then I figured
> someone would go complain the tracepoint would go missing.

Fine, but what bug are you trying to fix? I haven't seen one mentioned
yet. Function tracing has recursion protection, and tracing
preempt_count in nmi_enter() causes no problems. What's the problem you
are trying to solve?

-- Steve

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 5/8] x86,tracing: Mark debug_stack_{set_zero,reset)() notrace
  2020-02-12 15:04     ` Peter Zijlstra
@ 2020-02-12 15:18       ` Steven Rostedt
  2020-02-12 15:39         ` Peter Zijlstra
  0 siblings, 1 reply; 29+ messages in thread
From: Steven Rostedt @ 2020-02-12 15:18 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: linux-kernel, linux-arch, mingo, joel, gregkh, gustavo, tglx,
	paulmck, josh, mathieu.desnoyers, jiangshanlai

On Wed, 12 Feb 2020 16:04:40 +0100
Peter Zijlstra <peterz@infradead.org> wrote:

> > This entire file is notrace:
> > 
> >  arch/x86/kernel/cpu/Makefile:
> > 
> >    CFLAGS_REMOVE_common.o = -pg  
> 
> Urgh, I hate it that that annotation is so hard to find :/ Also, there
> seem to be various flavours of that Makefile magic around.
> 
>   CFLAGS_REMOVE_lockdep.o = $(CC_FLAGS_FTRACE)
> 
> is another variant I encountered.

Actually, that should be the only variant. That was added for various
archs, and should be used consistently throughout.

There's a clean up series for you ;-) (or whoever)

-- Steve

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/8] tracing vs rcu vs nmi
  2020-02-12 14:32     ` Steven Rostedt
@ 2020-02-12 15:21       ` Peter Zijlstra
  0 siblings, 0 replies; 29+ messages in thread
From: Peter Zijlstra @ 2020-02-12 15:21 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: Will Deacon, linux-kernel, linux-arch, james.morse,
	catalin.marinas, mingo, joel, gregkh, gustavo, tglx, paulmck,
	josh, mathieu.desnoyers, jiangshanlai

On Wed, Feb 12, 2020 at 09:32:34AM -0500, Steven Rostedt wrote:
> I've been trying to get rid of notrace around the kernel, not add more.

As is abundantly clear, I hope, we really need more this time.

You want to trace RCU itself, the concequence is we _MUST_NOT_ trace the
early NMI code until we can distinguish we're *in* NMI code.

There really is no 2 way about it; and I'm not going to wait until some
poor sucker trips over it.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 4/8] sched,rcu,tracing: Mark preempt_count_{add,sub}() notrace
  2020-02-12 15:14       ` Steven Rostedt
@ 2020-02-12 15:38         ` Peter Zijlstra
  0 siblings, 0 replies; 29+ messages in thread
From: Peter Zijlstra @ 2020-02-12 15:38 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: linux-kernel, linux-arch, mingo, joel, gregkh, gustavo, tglx,
	paulmck, josh, mathieu.desnoyers, jiangshanlai

On Wed, Feb 12, 2020 at 10:14:15AM -0500, Steven Rostedt wrote:
> > My initial patch has __preempt_count_add/sub() in, but then I figured
> > someone would go complain the tracepoint would go missing.
> 
> Fine, but what bug are you trying to fix? I haven't seen one mentioned
> yet. Function tracing has recursion protection, and tracing
> preempt_count in nmi_enter() causes no problems. What's the problem you
> are trying to solve?

The same one as yesterday; if we hit a tracer in NMI context, when
!rcu_is_watching() and in_nmi() is still 0, we're fucked.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 5/8] x86,tracing: Mark debug_stack_{set_zero,reset)() notrace
  2020-02-12 15:18       ` Steven Rostedt
@ 2020-02-12 15:39         ` Peter Zijlstra
  0 siblings, 0 replies; 29+ messages in thread
From: Peter Zijlstra @ 2020-02-12 15:39 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: linux-kernel, linux-arch, mingo, joel, gregkh, gustavo, tglx,
	paulmck, josh, mathieu.desnoyers, jiangshanlai

On Wed, Feb 12, 2020 at 10:18:26AM -0500, Steven Rostedt wrote:
> On Wed, 12 Feb 2020 16:04:40 +0100
> Peter Zijlstra <peterz@infradead.org> wrote:
> 
> > > This entire file is notrace:
> > > 
> > >  arch/x86/kernel/cpu/Makefile:
> > > 
> > >    CFLAGS_REMOVE_common.o = -pg  
> > 
> > Urgh, I hate it that that annotation is so hard to find :/ Also, there
> > seem to be various flavours of that Makefile magic around.
> > 
> >   CFLAGS_REMOVE_lockdep.o = $(CC_FLAGS_FTRACE)
> > 
> > is another variant I encountered.
> 
> Actually, that should be the only variant. That was added for various
> archs, and should be used consistently throughout.
> 
> There's a clean up series for you ;-) (or whoever)

If I'm going to clean this up I'd remove the Makefile rules entirely.
I hate these Makefile rules, they make it entirely non-obvious what is
and is not being traced.

^ permalink raw reply	[flat|nested] 29+ messages in thread

end of thread, other threads:[~2020-02-12 15:40 UTC | newest]

Thread overview: 29+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-02-12  9:32 [PATCH 0/8] tracing vs rcu vs nmi Peter Zijlstra
2020-02-12  9:32 ` [PATCH 1/8] rcu: Rename rcu_irq_{enter,exit}_irqson() Peter Zijlstra
2020-02-12  9:32 ` [PATCH 2/8] rcu: Mark rcu_dynticks_curr_cpu_in_eqs() inline Peter Zijlstra
2020-02-12  9:32 ` [PATCH 3/8] rcu,tracing: Create trace_rcu_{enter,exit}() Peter Zijlstra
2020-02-12  9:32 ` [PATCH 4/8] sched,rcu,tracing: Mark preempt_count_{add,sub}() notrace Peter Zijlstra
2020-02-12 14:24   ` Steven Rostedt
2020-02-12 15:02     ` Peter Zijlstra
2020-02-12 15:14       ` Steven Rostedt
2020-02-12 15:38         ` Peter Zijlstra
2020-02-12  9:32 ` [PATCH 5/8] x86,tracing: Mark debug_stack_{set_zero,reset)() notrace Peter Zijlstra
2020-02-12 14:25   ` Steven Rostedt
2020-02-12 15:04     ` Peter Zijlstra
2020-02-12 15:18       ` Steven Rostedt
2020-02-12 15:39         ` Peter Zijlstra
2020-02-12  9:32 ` [PATCH 6/8] perf,tracing: Prepare the perf-trace interface for RCU changes Peter Zijlstra
2020-02-12 14:26   ` Steven Rostedt
2020-02-12 15:07     ` Mathieu Desnoyers
2020-02-12  9:32 ` [PATCH 7/8] tracing: Employ trace_rcu_{enter,exit}() Peter Zijlstra
2020-02-12  9:32 ` [PATCH 8/8] tracing: Remove regular RCU context for _rcuidle tracepoints (again) Peter Zijlstra
2020-02-12 14:29   ` Steven Rostedt
2020-02-12 15:12     ` Peter Zijlstra
2020-02-12 10:01 ` [PATCH 0/8] tracing vs rcu vs nmi Peter Zijlstra
2020-02-12 10:56   ` Will Deacon
2020-02-12 11:58     ` Peter Zijlstra
2020-02-12 14:32     ` Steven Rostedt
2020-02-12 15:21       ` Peter Zijlstra
2020-02-12 10:03 ` Peter Zijlstra
2020-02-12 10:07 ` Peter Zijlstra
2020-02-12 10:09 ` Peter Zijlstra

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).