linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC PATCH] trace: prevent preemption in perf_ftrace_function_call()
@ 2021-09-23  3:42 王贇
  2021-09-23  8:57 ` 王贇
                   ` (2 more replies)
  0 siblings, 3 replies; 8+ messages in thread
From: 王贇 @ 2021-09-23  3:42 UTC (permalink / raw)
  To: Steven Rostedt, Ingo Molnar, open list

With CONFIG_DEBUG_PREEMPT we observed reports like:

  BUG: using smp_processor_id() in preemptible
  caller is perf_ftrace_function_call+0x6f/0x2e0
  CPU: 1 PID: 680 Comm: a.out Not tainted
  Call Trace:
   <TASK>
   dump_stack_lvl+0x8d/0xcf
   check_preemption_disabled+0x104/0x110
   ? optimize_nops.isra.7+0x230/0x230
   ? text_poke_bp_batch+0x9f/0x310
   perf_ftrace_function_call+0x6f/0x2e0
   ...
   __text_poke+0x5/0x620
   text_poke_bp_batch+0x9f/0x310

This tell us the CPU could be changed after task is preempted, and
the checking on CPU before preemption is helpless.

This patch just turn off preemption in perf_ftrace_function_call()
to prevent CPU changing.

Signed-off-by: Michael Wang <yun.wang@linux.alibaba.com>
---
 kernel/trace/trace_event_perf.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 6aed10e..5486b18 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -438,15 +438,17 @@ void perf_trace_buf_update(void *record, u16 type)
 	int rctx;
 	int bit;

+	preempt_disable_notrace();
+
 	if (!rcu_is_watching())
-		return;
+		goto out;

 	if ((unsigned long)ops->private != smp_processor_id())
-		return;
+		goto out;

 	bit = ftrace_test_recursion_trylock(ip, parent_ip);
 	if (bit < 0)
-		return;
+		goto out;

 	event = container_of(ops, struct perf_event, ftrace_ops);

@@ -468,16 +470,18 @@ void perf_trace_buf_update(void *record, u16 type)

 	entry = perf_trace_buf_alloc(ENTRY_SIZE, NULL, &rctx);
 	if (!entry)
-		goto out;
+		goto out_unlock;

 	entry->ip = ip;
 	entry->parent_ip = parent_ip;
 	perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, TRACE_FN,
 			      1, &regs, &head, NULL);

-out:
+out_unlock:
 	ftrace_test_recursion_unlock(bit);
 #undef ENTRY_SIZE
+out:
+	preempt_enable_notrace();
 }

 static int perf_ftrace_function_register(struct perf_event *event)
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [RFC PATCH] trace: prevent preemption in perf_ftrace_function_call()
  2021-09-23  3:42 [RFC PATCH] trace: prevent preemption in perf_ftrace_function_call() 王贇
@ 2021-09-23  8:57 ` 王贇
  2021-09-23 13:33 ` Steven Rostedt
  2021-09-24  4:38 ` [PATCH v2] " 王贇
  2 siblings, 0 replies; 8+ messages in thread
From: 王贇 @ 2021-09-23  8:57 UTC (permalink / raw)
  To: Steven Rostedt, Ingo Molnar, open list

Reported-by: Abaci <abaci@linux.alibaba.com>

On 2021/9/23 上午11:42, 王贇 wrote:
> With CONFIG_DEBUG_PREEMPT we observed reports like:
> 
>   BUG: using smp_processor_id() in preemptible
>   caller is perf_ftrace_function_call+0x6f/0x2e0
>   CPU: 1 PID: 680 Comm: a.out Not tainted
>   Call Trace:
>    <TASK>
>    dump_stack_lvl+0x8d/0xcf
>    check_preemption_disabled+0x104/0x110
>    ? optimize_nops.isra.7+0x230/0x230
>    ? text_poke_bp_batch+0x9f/0x310
>    perf_ftrace_function_call+0x6f/0x2e0
>    ...
>    __text_poke+0x5/0x620
>    text_poke_bp_batch+0x9f/0x310
> 
> This tell us the CPU could be changed after task is preempted, and
> the checking on CPU before preemption is helpless.
> 
> This patch just turn off preemption in perf_ftrace_function_call()
> to prevent CPU changing.
> 
> Signed-off-by: Michael Wang <yun.wang@linux.alibaba.com>
> ---
>  kernel/trace/trace_event_perf.c | 14 +++++++++-----
>  1 file changed, 9 insertions(+), 5 deletions(-)
> 
> diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
> index 6aed10e..5486b18 100644
> --- a/kernel/trace/trace_event_perf.c
> +++ b/kernel/trace/trace_event_perf.c
> @@ -438,15 +438,17 @@ void perf_trace_buf_update(void *record, u16 type)
>  	int rctx;
>  	int bit;
> 
> +	preempt_disable_notrace();
> +
>  	if (!rcu_is_watching())
> -		return;
> +		goto out;
> 
>  	if ((unsigned long)ops->private != smp_processor_id())
> -		return;
> +		goto out;
> 
>  	bit = ftrace_test_recursion_trylock(ip, parent_ip);
>  	if (bit < 0)
> -		return;
> +		goto out;
> 
>  	event = container_of(ops, struct perf_event, ftrace_ops);
> 
> @@ -468,16 +470,18 @@ void perf_trace_buf_update(void *record, u16 type)
> 
>  	entry = perf_trace_buf_alloc(ENTRY_SIZE, NULL, &rctx);
>  	if (!entry)
> -		goto out;
> +		goto out_unlock;
> 
>  	entry->ip = ip;
>  	entry->parent_ip = parent_ip;
>  	perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, TRACE_FN,
>  			      1, &regs, &head, NULL);
> 
> -out:
> +out_unlock:
>  	ftrace_test_recursion_unlock(bit);
>  #undef ENTRY_SIZE
> +out:
> +	preempt_enable_notrace();
>  }
> 
>  static int perf_ftrace_function_register(struct perf_event *event)
> 

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFC PATCH] trace: prevent preemption in perf_ftrace_function_call()
  2021-09-23  3:42 [RFC PATCH] trace: prevent preemption in perf_ftrace_function_call() 王贇
  2021-09-23  8:57 ` 王贇
@ 2021-09-23 13:33 ` Steven Rostedt
  2021-09-24  2:08   ` 王贇
  2021-09-24  4:38 ` [PATCH v2] " 王贇
  2 siblings, 1 reply; 8+ messages in thread
From: Steven Rostedt @ 2021-09-23 13:33 UTC (permalink / raw)
  To: 王贇; +Cc: Ingo Molnar, open list

On Thu, 23 Sep 2021 11:42:56 +0800
王贇 <yun.wang@linux.alibaba.com> wrote:

> With CONFIG_DEBUG_PREEMPT we observed reports like:
> 
>   BUG: using smp_processor_id() in preemptible
>   caller is perf_ftrace_function_call+0x6f/0x2e0
>   CPU: 1 PID: 680 Comm: a.out Not tainted
>   Call Trace:
>    <TASK>
>    dump_stack_lvl+0x8d/0xcf
>    check_preemption_disabled+0x104/0x110
>    ? optimize_nops.isra.7+0x230/0x230
>    ? text_poke_bp_batch+0x9f/0x310
>    perf_ftrace_function_call+0x6f/0x2e0
>    ...
>    __text_poke+0x5/0x620
>    text_poke_bp_batch+0x9f/0x310
> 
> This tell us the CPU could be changed after task is preempted, and
> the checking on CPU before preemption is helpless.
> 
> This patch just turn off preemption in perf_ftrace_function_call()
> to prevent CPU changing.
> 
> Signed-off-by: Michael Wang <yun.wang@linux.alibaba.com>
> ---
>  kernel/trace/trace_event_perf.c | 14 +++++++++-----
>  1 file changed, 9 insertions(+), 5 deletions(-)
> 
> diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
> index 6aed10e..5486b18 100644
> --- a/kernel/trace/trace_event_perf.c
> +++ b/kernel/trace/trace_event_perf.c
> @@ -438,15 +438,17 @@ void perf_trace_buf_update(void *record, u16 type)
>  	int rctx;
>  	int bit;
> 
> +	preempt_disable_notrace();
> +
>  	if (!rcu_is_watching())
> -		return;
> +		goto out;

You don't need preemption disabled for the above check. It's better to
disable it here and leave the above return untouched.

-- Steve

> 
>  	if ((unsigned long)ops->private != smp_processor_id())
> -		return;
> +		goto out;
> 
>  	bit = ftrace_test_recursion_trylock(ip, parent_ip);
>  	if (bit < 0)
> -		return;
> +		goto out;
> 
>  	event = container_of(ops, struct perf_event, ftrace_ops);
> 
> @@ -468,16 +470,18 @@ void perf_trace_buf_update(void *record, u16 type)
> 
>  	entry = perf_trace_buf_alloc(ENTRY_SIZE, NULL, &rctx);
>  	if (!entry)
> -		goto out;
> +		goto out_unlock;
> 
>  	entry->ip = ip;
>  	entry->parent_ip = parent_ip;
>  	perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, TRACE_FN,
>  			      1, &regs, &head, NULL);
> 
> -out:
> +out_unlock:
>  	ftrace_test_recursion_unlock(bit);
>  #undef ENTRY_SIZE
> +out:
> +	preempt_enable_notrace();
>  }
> 
>  static int perf_ftrace_function_register(struct perf_event *event)


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFC PATCH] trace: prevent preemption in perf_ftrace_function_call()
  2021-09-23 13:33 ` Steven Rostedt
@ 2021-09-24  2:08   ` 王贇
  2021-09-24  3:26     ` Steven Rostedt
  0 siblings, 1 reply; 8+ messages in thread
From: 王贇 @ 2021-09-24  2:08 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: Ingo Molnar, open list

On 2021/9/23 下午9:33, Steven Rostedt wrote:
[snip]
>> ---
>>  kernel/trace/trace_event_perf.c | 14 +++++++++-----
>>  1 file changed, 9 insertions(+), 5 deletions(-)
>>
>> diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
>> index 6aed10e..5486b18 100644
>> --- a/kernel/trace/trace_event_perf.c
>> +++ b/kernel/trace/trace_event_perf.c
>> @@ -438,15 +438,17 @@ void perf_trace_buf_update(void *record, u16 type)
>>  	int rctx;
>>  	int bit;
>>
>> +	preempt_disable_notrace();
>> +
>>  	if (!rcu_is_watching())
>> -		return;
>> +		goto out;
> 
> You don't need preemption disabled for the above check. It's better to
> disable it here and leave the above return untouched.

I found the rcu tree implementation of rcu_is_watching() will check
this_cpu_ptr(&rcu_data.dynticks), and after that enable the preemption.

If preemption happened after that and before we disable here, there are
still possibility that the CPU changed and make the dynticks checking
invalid, isn't it?

Regards,
Michael Wang

> 
> -- Steve
> 
>>
>>  	if ((unsigned long)ops->private != smp_processor_id())
>> -		return;
>> +		goto out;
>>
>>  	bit = ftrace_test_recursion_trylock(ip, parent_ip);
>>  	if (bit < 0)
>> -		return;
>> +		goto out;
>>
>>  	event = container_of(ops, struct perf_event, ftrace_ops);
>>
>> @@ -468,16 +470,18 @@ void perf_trace_buf_update(void *record, u16 type)
>>
>>  	entry = perf_trace_buf_alloc(ENTRY_SIZE, NULL, &rctx);
>>  	if (!entry)
>> -		goto out;
>> +		goto out_unlock;
>>
>>  	entry->ip = ip;
>>  	entry->parent_ip = parent_ip;
>>  	perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, TRACE_FN,
>>  			      1, &regs, &head, NULL);
>>
>> -out:
>> +out_unlock:
>>  	ftrace_test_recursion_unlock(bit);
>>  #undef ENTRY_SIZE
>> +out:
>> +	preempt_enable_notrace();
>>  }
>>
>>  static int perf_ftrace_function_register(struct perf_event *event)

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFC PATCH] trace: prevent preemption in perf_ftrace_function_call()
  2021-09-24  2:08   ` 王贇
@ 2021-09-24  3:26     ` Steven Rostedt
  2021-09-24  3:27       ` Steven Rostedt
  2021-09-24  3:36       ` 王贇
  0 siblings, 2 replies; 8+ messages in thread
From: Steven Rostedt @ 2021-09-24  3:26 UTC (permalink / raw)
  To: 王贇; +Cc: Ingo Molnar, open list

On Fri, 24 Sep 2021 10:08:10 +0800
王贇 <yun.wang@linux.alibaba.com> wrote:

> I found the rcu tree implementation of rcu_is_watching() will check
> this_cpu_ptr(&rcu_data.dynticks), and after that enable the preemption.
> 
> If preemption happened after that and before we disable here, there are
> still possibility that the CPU changed and make the dynticks checking
> invalid, isn't it?

If it can be scheduled, then RCU is definitely watching ;-)

The rcu_is_watching() is a safe guard for places that are in between
context switches. Not task context switches, but transitioning between
kernel and user space, or going into or out of idle, or transitioning
in and out of an interrupt.  There are small critical sections that RCU
is not watching, and we are actually working on making those locations
disable instrumentation (like tracing), where rcu_is_watching() will no
longer be needed.

-- Steve

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFC PATCH] trace: prevent preemption in perf_ftrace_function_call()
  2021-09-24  3:26     ` Steven Rostedt
@ 2021-09-24  3:27       ` Steven Rostedt
  2021-09-24  3:36       ` 王贇
  1 sibling, 0 replies; 8+ messages in thread
From: Steven Rostedt @ 2021-09-24  3:27 UTC (permalink / raw)
  To: 王贇; +Cc: Ingo Molnar, open list

On Thu, 23 Sep 2021 23:26:19 -0400
Steven Rostedt <rostedt@goodmis.org> wrote:

> If it can be scheduled, then RCU is definitely watching ;-)

In other words. Nothing ever gets scheduled into a location that RCU is
not watching.

-- Steve

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFC PATCH] trace: prevent preemption in perf_ftrace_function_call()
  2021-09-24  3:26     ` Steven Rostedt
  2021-09-24  3:27       ` Steven Rostedt
@ 2021-09-24  3:36       ` 王贇
  1 sibling, 0 replies; 8+ messages in thread
From: 王贇 @ 2021-09-24  3:36 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: Ingo Molnar, open list



On 2021/9/24 上午11:26, Steven Rostedt wrote:
> On Fri, 24 Sep 2021 10:08:10 +0800
> 王贇 <yun.wang@linux.alibaba.com> wrote:
> 
>> I found the rcu tree implementation of rcu_is_watching() will check
>> this_cpu_ptr(&rcu_data.dynticks), and after that enable the preemption.
>>
>> If preemption happened after that and before we disable here, there are
>> still possibility that the CPU changed and make the dynticks checking
>> invalid, isn't it?
> 
> If it can be scheduled, then RCU is definitely watching ;-)
> 
> The rcu_is_watching() is a safe guard for places that are in between
> context switches. Not task context switches, but transitioning between
> kernel and user space, or going into or out of idle, or transitioning
> in and out of an interrupt.  There are small critical sections that RCU
> is not watching, and we are actually working on making those locations
> disable instrumentation (like tracing), where rcu_is_watching() will no
> longer be needed.

Thanks for the explain :-)

Context available for scheduling should not in these situations, will
move down the 'disable' in v2.

Regards,
Michael Wang

> 
> -- Steve
> 

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH v2] trace: prevent preemption in perf_ftrace_function_call()
  2021-09-23  3:42 [RFC PATCH] trace: prevent preemption in perf_ftrace_function_call() 王贇
  2021-09-23  8:57 ` 王贇
  2021-09-23 13:33 ` Steven Rostedt
@ 2021-09-24  4:38 ` 王贇
  2 siblings, 0 replies; 8+ messages in thread
From: 王贇 @ 2021-09-24  4:38 UTC (permalink / raw)
  To: Steven Rostedt, Ingo Molnar, open list

With CONFIG_DEBUG_PREEMPT we observed reports like:

  BUG: using smp_processor_id() in preemptible
  caller is perf_ftrace_function_call+0x6f/0x2e0
  CPU: 1 PID: 680 Comm: a.out Not tainted
  Call Trace:
   <TASK>
   dump_stack_lvl+0x8d/0xcf
   check_preemption_disabled+0x104/0x110
   ? optimize_nops.isra.7+0x230/0x230
   ? text_poke_bp_batch+0x9f/0x310
   perf_ftrace_function_call+0x6f/0x2e0
   ...
   __text_poke+0x5/0x620
   text_poke_bp_batch+0x9f/0x310

This telling us the CPU could be changed after task is preempted, and
the checking on CPU before preemption will be invalid.

This patch just turn off preemption in perf_ftrace_function_call()
to prevent CPU changing.

Reported-by: Abaci <abaci@linux.alibaba.com>
Signed-off-by: Michael Wang <yun.wang@linux.alibaba.com>
---
 kernel/trace/trace_event_perf.c | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 6aed10e..dcbefdf 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -441,12 +441,19 @@ void perf_trace_buf_update(void *record, u16 type)
 	if (!rcu_is_watching())
 		return;

+	/*
+	 * Prevent CPU changing from now on. rcu must
+	 * be in watching if the task was migrated and
+	 * scheduled.
+	 */
+	preempt_disable_notrace();
+
 	if ((unsigned long)ops->private != smp_processor_id())
-		return;
+		goto out;

 	bit = ftrace_test_recursion_trylock(ip, parent_ip);
 	if (bit < 0)
-		return;
+		goto out;

 	event = container_of(ops, struct perf_event, ftrace_ops);

@@ -468,16 +475,18 @@ void perf_trace_buf_update(void *record, u16 type)

 	entry = perf_trace_buf_alloc(ENTRY_SIZE, NULL, &rctx);
 	if (!entry)
-		goto out;
+		goto unlock;

 	entry->ip = ip;
 	entry->parent_ip = parent_ip;
 	perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, TRACE_FN,
 			      1, &regs, &head, NULL);

-out:
+unlock:
 	ftrace_test_recursion_unlock(bit);
 #undef ENTRY_SIZE
+out:
+	preempt_enable_notrace();
 }

 static int perf_ftrace_function_register(struct perf_event *event)
-- 
1.8.3.1



^ permalink raw reply related	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2021-09-24  4:38 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-09-23  3:42 [RFC PATCH] trace: prevent preemption in perf_ftrace_function_call() 王贇
2021-09-23  8:57 ` 王贇
2021-09-23 13:33 ` Steven Rostedt
2021-09-24  2:08   ` 王贇
2021-09-24  3:26     ` Steven Rostedt
2021-09-24  3:27       ` Steven Rostedt
2021-09-24  3:36       ` 王贇
2021-09-24  4:38 ` [PATCH v2] " 王贇

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).