All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] cgroup: align the comm length with TASK_COMM_LEN
@ 2022-09-23  7:51 Kassey Li
  2022-09-23 15:00 ` Steven Rostedt
  2022-09-26 14:09 ` Steven Rostedt
  0 siblings, 2 replies; 8+ messages in thread
From: Kassey Li @ 2022-09-23  7:51 UTC (permalink / raw)
  To: rostedt, mingo, tj, william.kucharski; +Cc: Kassey Li, linux-kernel

__string could get a dst string with length less than
TASK_COMM_LEN.

A task->comm may change that can cause out of bounds access
for the dst string buffer, e.g in the call trace of below:

Call trace:

    dump_backtrace.cfi_jt+0x0/0x4
    show_stack+0x14/0x1c
    dump_stack+0xa0/0xd8
    die_callback+0x248/0x24c
    notify_die+0x7c/0xf8
    die+0xac/0x290
    die_kernel_fault+0x88/0x98
    die_kernel_fault+0x0/0x98
    do_page_fault+0xa0/0x544
    do_mem_abort+0x60/0x10c
    el1_da+0x1c/0xc4
    trace_event_raw_event_cgroup_migrate+0x124/0x170
    cgroup_attach_task+0x2e8/0x41c
    __cgroup1_procs_write+0x114/0x1ec
    cgroup1_tasks_write+0x10/0x18
    cgroup_file_write+0xa4/0x208
    kernfs_fop_write+0x1f0/0x2f4
    __vfs_write+0x5c/0x200
    vfs_write+0xe0/0x1a0
    ksys_write+0x74/0xdc
    __arm64_sys_write+0x18/0x20
    el0_svc_common+0xc0/0x1a4
    el0_svc_compat_handler+0x18/0x20
    el0_svc_compat+0x8/0x2c

Change it as arrary with same length TASK_COMM_LEN,
This idea is from commit d1eb650ff413 ("tracepoint: Move signal sending
tracepoint to events/signal.h").

Signed-off-by: Kassey Li <quic_yingangl@quicinc.com>
---
 include/trace/events/cgroup.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/trace/events/cgroup.h b/include/trace/events/cgroup.h
index dd7d7c9efecd..b4ef0ffa38a4 100644
--- a/include/trace/events/cgroup.h
+++ b/include/trace/events/cgroup.h
@@ -130,7 +130,7 @@ DECLARE_EVENT_CLASS(cgroup_migrate,
 		__field(	u64,		dst_id			)
 		__field(	int,		pid			)
 		__string(	dst_path,	path			)
-		__string(	comm,		task->comm		)
+		__array(char, comm, TASK_COMM_LEN)
 	),
 
 	TP_fast_assign(
@@ -139,12 +139,12 @@ DECLARE_EVENT_CLASS(cgroup_migrate,
 		__entry->dst_level = dst_cgrp->level;
 		__assign_str(dst_path, path);
 		__entry->pid = task->pid;
-		__assign_str(comm, task->comm);
+		memcpy(__entry->comm, task->comm, TASK_COMM_LEN);
 	),
 
 	TP_printk("dst_root=%d dst_id=%llu dst_level=%d dst_path=%s pid=%d comm=%s",
 		  __entry->dst_root, __entry->dst_id, __entry->dst_level,
-		  __get_str(dst_path), __entry->pid, __get_str(comm))
+		  __get_str(dst_path), __entry->pid, __entry->comm)
 );
 
 DEFINE_EVENT(cgroup_migrate, cgroup_attach_task,
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] cgroup: align the comm length with TASK_COMM_LEN
  2022-09-23  7:51 [PATCH v2] cgroup: align the comm length with TASK_COMM_LEN Kassey Li
@ 2022-09-23 15:00 ` Steven Rostedt
  2022-09-26  2:18   ` Kassey Li
  2022-09-26 14:09 ` Steven Rostedt
  1 sibling, 1 reply; 8+ messages in thread
From: Steven Rostedt @ 2022-09-23 15:00 UTC (permalink / raw)
  To: Kassey Li; +Cc: mingo, tj, william.kucharski, linux-kernel

On Fri, 23 Sep 2022 15:51:05 +0800
Kassey Li <quic_yingangl@quicinc.com> wrote:

> __string could get a dst string with length less than
> TASK_COMM_LEN.
> 
> A task->comm may change that can cause out of bounds access
> for the dst string buffer, e.g in the call trace of below:
> 
> Call trace:
> 
>     dump_backtrace.cfi_jt+0x0/0x4
>     show_stack+0x14/0x1c
>     dump_stack+0xa0/0xd8
>     die_callback+0x248/0x24c
>     notify_die+0x7c/0xf8
>     die+0xac/0x290
>     die_kernel_fault+0x88/0x98
>     die_kernel_fault+0x0/0x98
>     do_page_fault+0xa0/0x544
>     do_mem_abort+0x60/0x10c
>     el1_da+0x1c/0xc4
>     trace_event_raw_event_cgroup_migrate+0x124/0x170
>     cgroup_attach_task+0x2e8/0x41c
>     __cgroup1_procs_write+0x114/0x1ec
>     cgroup1_tasks_write+0x10/0x18
>     cgroup_file_write+0xa4/0x208
>     kernfs_fop_write+0x1f0/0x2f4
>     __vfs_write+0x5c/0x200
>     vfs_write+0xe0/0x1a0
>     ksys_write+0x74/0xdc
>     __arm64_sys_write+0x18/0x20
>     el0_svc_common+0xc0/0x1a4
>     el0_svc_compat_handler+0x18/0x20
>     el0_svc_compat+0x8/0x2c
> 
> Change it as arrary with same length TASK_COMM_LEN,
> This idea is from commit d1eb650ff413 ("tracepoint: Move signal sending
> tracepoint to events/signal.h").

This does not make sense. What exactly is the bug here?

__string() will do a strlen(task->comm) + 1 to allocate on the ring buffer.
It should not be less that task->comm. The above stack dump does not show
what happened.

This looks like another bug and I do not see how this patch addresses
the issue.

-- Steve

> 
> Signed-off-by: Kassey Li <quic_yingangl@quicinc.com>
> ---
>  include/trace/events/cgroup.h | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/include/trace/events/cgroup.h b/include/trace/events/cgroup.h
> index dd7d7c9efecd..b4ef0ffa38a4 100644
> --- a/include/trace/events/cgroup.h
> +++ b/include/trace/events/cgroup.h
> @@ -130,7 +130,7 @@ DECLARE_EVENT_CLASS(cgroup_migrate,
>  		__field(	u64,		dst_id			)
>  		__field(	int,		pid			)
>  		__string(	dst_path,	path			)
> -		__string(	comm,		task->comm		)
> +		__array(char, comm, TASK_COMM_LEN)
>  	),
>  
>  	TP_fast_assign(
> @@ -139,12 +139,12 @@ DECLARE_EVENT_CLASS(cgroup_migrate,
>  		__entry->dst_level = dst_cgrp->level;
>  		__assign_str(dst_path, path);
>  		__entry->pid = task->pid;
> -		__assign_str(comm, task->comm);
> +		memcpy(__entry->comm, task->comm, TASK_COMM_LEN);
>  	),
>  
>  	TP_printk("dst_root=%d dst_id=%llu dst_level=%d dst_path=%s pid=%d comm=%s",
>  		  __entry->dst_root, __entry->dst_id, __entry->dst_level,
> -		  __get_str(dst_path), __entry->pid, __get_str(comm))
> +		  __get_str(dst_path), __entry->pid, __entry->comm)
>  );
>  
>  DEFINE_EVENT(cgroup_migrate, cgroup_attach_task,


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] cgroup: align the comm length with TASK_COMM_LEN
  2022-09-23 15:00 ` Steven Rostedt
@ 2022-09-26  2:18   ` Kassey Li
  2022-09-26  2:42     ` Steven Rostedt
  0 siblings, 1 reply; 8+ messages in thread
From: Kassey Li @ 2022-09-26  2:18 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: mingo, tj, william.kucharski, linux-kernel



On 9/23/2022 11:00 PM, Steven Rostedt wrote:
> On Fri, 23 Sep 2022 15:51:05 +0800
> Kassey Li <quic_yingangl@quicinc.com> wrote:
> 
>> __string could get a dst string with length less than
>> TASK_COMM_LEN.
>>
>> A task->comm may change that can cause out of bounds access
>> for the dst string buffer, e.g in the call trace of below:
>>
>> Call trace:
>>
>>      dump_backtrace.cfi_jt+0x0/0x4
>>      show_stack+0x14/0x1c
>>      dump_stack+0xa0/0xd8
>>      die_callback+0x248/0x24c
>>      notify_die+0x7c/0xf8
>>      die+0xac/0x290
>>      die_kernel_fault+0x88/0x98
>>      die_kernel_fault+0x0/0x98
>>      do_page_fault+0xa0/0x544
>>      do_mem_abort+0x60/0x10c
>>      el1_da+0x1c/0xc4
>>      trace_event_raw_event_cgroup_migrate+0x124/0x170
>>      cgroup_attach_task+0x2e8/0x41c
>>      __cgroup1_procs_write+0x114/0x1ec
>>      cgroup1_tasks_write+0x10/0x18
>>      cgroup_file_write+0xa4/0x208
>>      kernfs_fop_write+0x1f0/0x2f4
>>      __vfs_write+0x5c/0x200
>>      vfs_write+0xe0/0x1a0
>>      ksys_write+0x74/0xdc
>>      __arm64_sys_write+0x18/0x20
>>      el0_svc_common+0xc0/0x1a4
>>      el0_svc_compat_handler+0x18/0x20
>>      el0_svc_compat+0x8/0x2c
>>
>> Change it as arrary with same length TASK_COMM_LEN,
>> This idea is from commit d1eb650ff413 ("tracepoint: Move signal sending
>> tracepoint to events/signal.h").
> 
> This does not make sense. What exactly is the bug here?
hi, Steven:
     hope below info can give you idea on this , let me know if you need 
more info.

kernel log:
	Unable to handle kernel write to read-only memory at virtual address 
ffffffbcf7450000

"SharedPreferenc" is task name/comm.

memory/ddr dump:

   FFFFFFBCF744FFE0| 00090020 000B0029 706F742F 7070612D 61685300 
50646572 65666572 636E6572  ...).../top-app.SharedPreferenc
   FFFFFFBCF7450000|>52800101 97FD3A05 140000B3 AA1303E0 9400193C 
B0000F88 90000D89 9137FD08 ...R.:..........<.............7.

trace stack:

-000|strcpy(inline)
-000|trace_event_raw_event_cgroup_migrate
-001|trace_cgroup_attach_task(inline)
-001|cgroup_attach_task()
-002|__read_once_size(inline)
-002|atomic_read(inline)
-002|static_key_count(inline)
-002|static_key_false(inline)
-002|trace_android_vh_cgroup_set_task(inline)
-002|__cgroup1_procs_write()
-003|cgroup1_tasks_write
-004|cgroup_file_write
-005|kernfs_fop_write$
-006|__vfs_write()
-007|vfs_write()
-008|ksys_write()
-009|__se_sys_write(inline)
-009|__arm64_sys_write()
-010|__invoke_syscall(inline)
-010|invoke_syscall(inline)
-010|el0_svc_common()
-011|el0_svc_compat_handler()
-012|el0_svc_compat(asm)




> 
> __string() will do a strlen(task->comm) + 1 to allocate on the ring buffer.
> It should not be less that task->comm. The above stack dump does not show
> what happened.
> 
> This looks like another bug and I do not see how this patch addresses
> the issue.
> 
> -- Steve
> 
>>
>> Signed-off-by: Kassey Li <quic_yingangl@quicinc.com>
>> ---
>>   include/trace/events/cgroup.h | 6 +++---
>>   1 file changed, 3 insertions(+), 3 deletions(-)
>>
>> diff --git a/include/trace/events/cgroup.h b/include/trace/events/cgroup.h
>> index dd7d7c9efecd..b4ef0ffa38a4 100644
>> --- a/include/trace/events/cgroup.h
>> +++ b/include/trace/events/cgroup.h
>> @@ -130,7 +130,7 @@ DECLARE_EVENT_CLASS(cgroup_migrate,
>>   		__field(	u64,		dst_id			)
>>   		__field(	int,		pid			)
>>   		__string(	dst_path,	path			)
>> -		__string(	comm,		task->comm		)
>> +		__array(char, comm, TASK_COMM_LEN)
>>   	),
>>   
>>   	TP_fast_assign(
>> @@ -139,12 +139,12 @@ DECLARE_EVENT_CLASS(cgroup_migrate,
>>   		__entry->dst_level = dst_cgrp->level;
>>   		__assign_str(dst_path, path);
>>   		__entry->pid = task->pid;
>> -		__assign_str(comm, task->comm);
>> +		memcpy(__entry->comm, task->comm, TASK_COMM_LEN);
	I think the problem is here, __assign_str using strcpy
	the task->comm here tail is not '\0'
	that's why it out of bounds access.

	do you want to this version or just modify the memcpy or strncpy to do 
with a known length ?  please give suggest so I can modify .

>>   	),
>>   
>>   	TP_printk("dst_root=%d dst_id=%llu dst_level=%d dst_path=%s pid=%d comm=%s",
>>   		  __entry->dst_root, __entry->dst_id, __entry->dst_level,
>> -		  __get_str(dst_path), __entry->pid, __get_str(comm))
>> +		  __get_str(dst_path), __entry->pid, __entry->comm)
>>   );
>>   
>>   DEFINE_EVENT(cgroup_migrate, cgroup_attach_task,
> 

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] cgroup: align the comm length with TASK_COMM_LEN
  2022-09-26  2:18   ` Kassey Li
@ 2022-09-26  2:42     ` Steven Rostedt
  2022-09-26 11:08       ` Kassey Li
  0 siblings, 1 reply; 8+ messages in thread
From: Steven Rostedt @ 2022-09-26  2:42 UTC (permalink / raw)
  To: Kassey Li; +Cc: mingo, tj, william.kucharski, linux-kernel

On Mon, 26 Sep 2022 10:18:55 +0800
Kassey Li <quic_yingangl@quicinc.com> wrote:

> >> @@ -139,12 +139,12 @@ DECLARE_EVENT_CLASS(cgroup_migrate,
> >>   		__entry->dst_level = dst_cgrp->level;
> >>   		__assign_str(dst_path, path);
> >>   		__entry->pid = task->pid;
> >> -		__assign_str(comm, task->comm);
> >> +		memcpy(__entry->comm, task->comm, TASK_COMM_LEN);  
> 	I think the problem is here, __assign_str using strcpy
> 	the task->comm here tail is not '\0'
> 	that's why it out of bounds access.
> 

If this is the case, then there's a lot more than just tracing that will
break. There are other places in the kernel has used strcpy() on task->comm,
and many more that do "%s" on task->comm, which would also crash on this.

> 	do you want to this version or just modify the memcpy or strncpy to do 
> with a known length ?  please give suggest so I can modify .

I'm guessing a problem exists elsewhere that makes it look like this is the
issue. I suggest finding where the '\0' is dropped (if that is indeed the
case).

-- Steve

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] cgroup: align the comm length with TASK_COMM_LEN
  2022-09-26  2:42     ` Steven Rostedt
@ 2022-09-26 11:08       ` Kassey Li
  0 siblings, 0 replies; 8+ messages in thread
From: Kassey Li @ 2022-09-26 11:08 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: mingo, tj, william.kucharski, linux-kernel



On 9/26/2022 10:42 AM, Steven Rostedt wrote:
> On Mon, 26 Sep 2022 10:18:55 +0800
> Kassey Li <quic_yingangl@quicinc.com> wrote:
> 
>>>> @@ -139,12 +139,12 @@ DECLARE_EVENT_CLASS(cgroup_migrate,
>>>>    		__entry->dst_level = dst_cgrp->level;
>>>>    		__assign_str(dst_path, path);
>>>>    		__entry->pid = task->pid;
>>>> -		__assign_str(comm, task->comm);
>>>> +		memcpy(__entry->comm, task->comm, TASK_COMM_LEN);
>> 	I think the problem is here, __assign_str using strcpy
>> 	the task->comm here tail is not '\0'
>> 	that's why it out of bounds access.
>>
> 
> If this is the case, then there's a lot more than just tracing that will
> break. There are other places in the kernel has used strcpy() on task->comm,
> and many more that do "%s" on task->comm, which would also crash on this.

You are right.

by re-check my local logs(arm64), we can see the src has '\0' as end of 
string.
but looks strcpy did not catch this and crossed.
I can not figure out how this could happen. if there is debug suggest, 
please help.


src: task->comm   SharedPreferenc   pid  28395
_____________________address|________0________4________8________C_0123456789ABCDEF
   NSD:0000::FFFFFFBD1B6C59D0|>72616853 72506465 72656665 00636E65 
SharedPreferenc.


dst: trace event buffer:
_____________________address|________0________4________8________C_0123456789ABCDEF
   NSD:0000::FFFFFFBCF744FFE0| 00090020 000B0029 706F742F 7070612D 
...).../top-app
   NSD:0000::FFFFFFBCF744FFF0| 61685300 50646572 65666572 636E6572 
.SharedPreferenc
   NSD:0000::FFFFFFBCF7450000|>52800101 97FD3A05 140000B3 AA1303E0 
...R.:..........

layout of the struct:

   [ND:0x0::0xFFFFFFBCF744FFC8] (struct 
trace_event_raw_cgroup_migrate)0xFFFFFFBCF744FFc8 = (
     [ND:0x0::0xFFFFFFBCF744FFC8] ent = (
       [ND:0x0::0xFFFFFFBCF744FFC8] type = 0x98,
       [ND:0x0::0xFFFFFFBCF744FFCA] flags = 0x1,
       [ND:0x0::0xFFFFFFBCF744FFCB] preempt_count = 0x1,
       [ND:0x0::0xFFFFFFBCF744FFCC] pid = 0x0773),
     [ND:0x0::0xFFFFFFBCF744FFD0] dst_root = 0x1,
     [ND:0x0::0xFFFFFFBCF744FFD4] dst_id = 0x6,
     [ND:0x0::0xFFFFFFBCF744FFD8] dst_level = 0x1,
     [ND:0x0::0xFFFFFFBCF744FFDC] pid = 28395 = 0x6EEB,
     [ND:0x0::0xFFFFFFBCF744FFE0] __data_loc_dst_path = 0x00090020 = '... ',
     [ND:0x0::0xFFFFFFBCF744FFE4] __data_loc_comm = 0x000B0029 = '...)',
     [ND:0x0::0xFFFFFFBCF744FFE8] __data_=_"/top-app")

name: cgroup_attach_task
ID: 152
format:
	field:unsigned short common_type;	offset:0;	size:2;	signed:0;
	field:unsigned char common_flags;	offset:2;	size:1;	signed:0;
	field:unsigned char common_preempt_count;	offset:3;	size:1;	signed:0;
	field:int common_pid;	offset:4;	size:4;	signed:1;

	field:int dst_root;	offset:8;	size:4;	signed:1;
	field:int dst_id;	offset:12;	size:4;	signed:1;
	field:int dst_level;	offset:16;	size:4;	signed:1;
	field:int pid;	offset:20;	size:4;	signed:1;
	field:__data_loc char[] dst_path;	offset:24;	size:4;	signed:0;
	field:__data_loc char[] comm;	offset:28;	size:4;	signed:0;


_____________________address|________0________4________8________C_0123456789ABCDEF
   NSD:0000::FFFFFFBCF744FFC0| 00656C64 0066D18D>01010098 00000773 
dle...f.....s...
   NSD:0000::FFFFFFBCF744FFD0| 00000001 00000006 00000001 00006EEB 
.............n..
   NSD:0000::FFFFFFBCF744FFE0| 00090020 000B0029 706F742F 7070612D 
...).../top-app
   NSD:0000::FFFFFFBCF744FFF0| 61685300 50646572 65666572 636E6572 
.SharedPreferenc
   NSD:0000::FFFFFFBCF7450000| 52800101 97FD3A05 140000B3 AA1303E0 
...R.:..........

> 
>> 	do you want to this version or just modify the memcpy or strncpy to do
>> with a known length ?  please give suggest so I can modify .
> 
> I'm guessing a problem exists elsewhere that makes it look like this is the
> issue. I suggest finding where the '\0' is dropped (if that is indeed the
> case).
> 
> -- Steve

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] cgroup: align the comm length with TASK_COMM_LEN
  2022-09-23  7:51 [PATCH v2] cgroup: align the comm length with TASK_COMM_LEN Kassey Li
  2022-09-23 15:00 ` Steven Rostedt
@ 2022-09-26 14:09 ` Steven Rostedt
  2022-09-27  1:32   ` Kassey Li
  2022-09-28  7:35   ` Kassey Li
  1 sibling, 2 replies; 8+ messages in thread
From: Steven Rostedt @ 2022-09-26 14:09 UTC (permalink / raw)
  To: Kassey Li; +Cc: mingo, tj, william.kucharski, linux-kernel

On Fri, 23 Sep 2022 15:51:05 +0800
Kassey Li <quic_yingangl@quicinc.com> wrote:

> __string could get a dst string with length less than
> TASK_COMM_LEN.
> 
> A task->comm may change that can cause out of bounds access
> for the dst string buffer, e.g in the call trace of below:
> 
> Call trace:
> 
>     dump_backtrace.cfi_jt+0x0/0x4
>     show_stack+0x14/0x1c
>     dump_stack+0xa0/0xd8
>     die_callback+0x248/0x24c
>     notify_die+0x7c/0xf8
>     die+0xac/0x290
>     die_kernel_fault+0x88/0x98
>     die_kernel_fault+0x0/0x98
>     do_page_fault+0xa0/0x544
>     do_mem_abort+0x60/0x10c
>     el1_da+0x1c/0xc4

>     trace_event_raw_event_cgroup_migrate+0x124/0x170

You're sure the above is on the strcpy()?

Note, this code has __string() which does a strlen() which appears to be
working fine.

>     cgroup_attach_task+0x2e8/0x41c
>     __cgroup1_procs_write+0x114/0x1ec
>     cgroup1_tasks_write+0x10/0x18
>     cgroup_file_write+0xa4/0x208
>     kernfs_fop_write+0x1f0/0x2f4
>     __vfs_write+0x5c/0x200
>     vfs_write+0xe0/0x1a0
>     ksys_write+0x74/0xdc
>     __arm64_sys_write+0x18/0x20
>     el0_svc_common+0xc0/0x1a4
>     el0_svc_compat_handler+0x18/0x20
>     el0_svc_compat+0x8/0x2c

Can you give the full debug report, that includes the register content and
everything else.

-- Steve

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] cgroup: align the comm length with TASK_COMM_LEN
  2022-09-26 14:09 ` Steven Rostedt
@ 2022-09-27  1:32   ` Kassey Li
  2022-09-28  7:35   ` Kassey Li
  1 sibling, 0 replies; 8+ messages in thread
From: Kassey Li @ 2022-09-27  1:32 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: mingo, tj, william.kucharski, linux-kernel



On 9/26/2022 10:09 PM, Steven Rostedt wrote:
> On Fri, 23 Sep 2022 15:51:05 +0800
> Kassey Li <quic_yingangl@quicinc.com> wrote:
> 
>> __string could get a dst string with length less than
>> TASK_COMM_LEN.
>>
>> A task->comm may change that can cause out of bounds access
>> for the dst string buffer, e.g in the call trace of below:
>>
>> Call trace:
>>
>>      dump_backtrace.cfi_jt+0x0/0x4
>>      show_stack+0x14/0x1c
>>      dump_stack+0xa0/0xd8
>>      die_callback+0x248/0x24c
>>      notify_die+0x7c/0xf8
>>      die+0xac/0x290
>>      die_kernel_fault+0x88/0x98
>>      die_kernel_fault+0x0/0x98
>>      do_page_fault+0xa0/0x544
>>      do_mem_abort+0x60/0x10c
>>      el1_da+0x1c/0xc4
> 
>>      trace_event_raw_event_cgroup_migrate+0x124/0x170
> 
> You're sure the above is on the strcpy()?
set PC to 0xffffffda401c47d4
and this reflect to the strcpy asm code of aarch64. (lib/string.c)

                          120|
   NSX:0000::FFFFFFDA401C47B8|B946C268 
                       ldr     w8,[x19,#0x6C0]   ; w8,[task,#1728]
   NSX:0000::FFFFFFDA401C47BC|79403809 
                       ldrh    w9,[x0,#0x1C]    ; w9,[entry,#28]
   NSX:0000::FFFFFFDA401C47C0|F10002FF 
                       cmp     x23,#0x0         ; x23,#0
   NSX:0000::FFFFFFDA401C47C4|B9001408 
                       str     w8,[x0,#0x14]    ; w8,[entry,#20]
   NSX:0000::FFFFFFDA401C47C8|8B090008 
                       add     x8,x0,x9         ; x8,entry,x9
   NSX:0000::FFFFFFDA401C47CC|9A970349 
                       csel    x9,x26,x23,eq
                             |
                             |
                           93|
   NSX:0000::FFFFFFDA401C47D0|3840152A 
                       ldrb    w10,[x9],#0x1    ; w10,[src],#1
 
NSX:0000::FFFFFFDA401C47D4|3800150A_________________________________________________________strb____w10,[x8],#0x1____;_w10,[dest],#1
__NSX:0000::FFFFFFDA401C47D8|35FFFFCA_________________________________________________________cbnz____w10,0xFFFFFFDA401C47D0
                             |
                             |
                             |
                             |
                             |
                             |
                          120|
   NSX:0000::FFFFFFDA401C47DC|910023E0 
                       add     x0,sp,#0x8       ; entry,sp,#8
   NSX:0000::FFFFFFDA401C47E0|AA1503E1 
                       mov     x1,x21
   NSX:0000::FFFFFFDA401C47E4|9400E4ED 
                       bl      0xFFFFFFDA401FDB98   ; 
trace_event_buffer_commit
   NSX:0000::FFFFFFDA401C47E8|F00134C9 
                       adrp    x9,0xFFFFFFDA4285F000
   NSX:0000::FFFFFFDA401C47EC|F85F83A8 
                       ldur    x8,[x29,#-0x8]   ; x8,[x29,#-8]
   NSX:0000::FFFFFFDA401C47F0|F9420929 
                       ldr     x9,[x9,#0x410]   ; x9,[x9,#1040]
   NSX:0000::FFFFFFDA401C47F4|EB08013F 
                       cmp     x9,x8
   NSX:0000::FFFFFFDA401C47F8|54000121 
                       b.ne    0xFFFFFFDA401C481C
   NSX:0000::FFFFFFDA401C47FC|A9494FF4 
                       ldp     x20,x19,[sp,#0x90]   ; 
xdst_cgrp,xtask,[sp,#144]
   NSX:0000::FFFFFFDA401C4800|A94857F6 
                       ldp     x22,x21,[sp,#0x80]   ; x__data,x21,[sp,#128]
   NSX:0000::FFFFFFDA401C4804|A9475FF8 
                       ldp     x24,x23,[sp,#0x70]   ; x24,x23,[sp,#112]
   NSX:0000::FFFFFFDA401C4808|A94667FA 
                       ldp     x26,x25,[sp,#0x60]   ; x26,x25,[sp,#96]
   NSX:0000::FFFFFFDA401C480C|A9456FFC 
                       ldp     x28,x27,[sp,#0x50]   ; x28,x27,[sp,#80]
   NSX:0000::FFFFFFDA401C4810|A9447BFD 
                       ldp     x29,x30,[sp,#0x40]   ; x29,x30,[sp,#64]
   NSX:0000::FFFFFFDA401C4814|910283FF 
                       add     sp,sp,#0xA0      ; sp,sp,#160
   NSX:0000::FFFFFFDA401C4818|D65F03C0 
                       ret
   NSX:0000::FFFFFFDA401C481C|97FC50DC 
                       bl      0xFFFFFFDA400D8B8C   ; __stack_chk_fail

> 
> Note, this code has __string() which does a strlen() which appears to be
> working fine.

  I did see this as well.

  500 #define __string(item, src) __dynamic_array(char, item,         \
  501             strlen((src) ? (const char *)(src) : "(null)") + 1)



My tester reported they could change the task->comm.
for example from "123456789abcde" to "test"
I wonder if we prepare the buffer as "test" 5bytes with __string
then  __assign_str  with "123456789abcde" new task->comm.

but this is very hard race condition.

this is not easy to reproduced, we got 2 instances of this problem.

I read others using the task->comm as trace event with memcpy as this 
patch I initialized.



> 
>>      cgroup_attach_task+0x2e8/0x41c
>>      __cgroup1_procs_write+0x114/0x1ec
>>      cgroup1_tasks_write+0x10/0x18
>>      cgroup_file_write+0xa4/0x208
>>      kernfs_fop_write+0x1f0/0x2f4
>>      __vfs_write+0x5c/0x200
>>      vfs_write+0xe0/0x1a0
>>      ksys_write+0x74/0xdc
>>      __arm64_sys_write+0x18/0x20
>>      el0_svc_common+0xc0/0x1a4
>>      el0_svc_compat_handler+0x18/0x20
>>      el0_svc_compat+0x8/0x2c
> 
> Can you give the full debug report, that includes the register content and
> everything else.

here is the crash stack of cpu regs.
you can ref the asm code above.


pc : [0xffffffda401c47d4] trace_event_raw_event_cgroup_migrate+0x124/0x170
lr : [0xffffffda401c4774] trace_event_raw_event_cgroup_migrate+0xc4/0x170
sp : ffffffc01f8e9aa0
x29: ffffffc01f8e9ae0 x28: 000000000000000b
x27: 0000000000000009 x26: ffffffda41f1e515
x25: 0000000000000008 x24: ffffffda42c72a7d
x23: ffffffbd1b6c59d0 x22: ffffffbbf383bec8
x21: 0000000000000034 x20: ffffffbd621cd000
x19: ffffffbd1b6c5140 x18: 0000000000000008
x17: ffffffbd66b96010 x16: ffffffbd621cd000
x15: ffffffbc11583860 x14: ffffffbd1b6c5ba8
x13: 0000000000000000 x12: 0000000000200000
x11: 0000000000000001 x10: 0000000000000000
x9 : ffffffbd1b6c59e0 x8 : ffffffbcf7450000
x7 : 6a716e76225c435a x6 : 0000800000008080
x5 : 0000000000000001 x4 : 0000000000000080
x3 : 0000000000000034 x2 : 0000000000000098
x1 : 0000000000000034 x0 : ffffffbcf744ffc8



> 
> -- Steve

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] cgroup: align the comm length with TASK_COMM_LEN
  2022-09-26 14:09 ` Steven Rostedt
  2022-09-27  1:32   ` Kassey Li
@ 2022-09-28  7:35   ` Kassey Li
  1 sibling, 0 replies; 8+ messages in thread
From: Kassey Li @ 2022-09-28  7:35 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: mingo, tj, william.kucharski, linux-kernel



On 9/26/2022 10:09 PM, Steven Rostedt wrote:
> On Fri, 23 Sep 2022 15:51:05 +0800
> Kassey Li <quic_yingangl@quicinc.com> wrote:
> 
>> __string could get a dst string with length less than
>> TASK_COMM_LEN.
>>
>> A task->comm may change that can cause out of bounds access
>> for the dst string buffer, e.g in the call trace of below:
>>
>> Call trace:
>>
>>      dump_backtrace.cfi_jt+0x0/0x4
>>      show_stack+0x14/0x1c
>>      dump_stack+0xa0/0xd8
>>      die_callback+0x248/0x24c
>>      notify_die+0x7c/0xf8
>>      die+0xac/0x290
>>      die_kernel_fault+0x88/0x98
>>      die_kernel_fault+0x0/0x98
>>      do_page_fault+0xa0/0x544
>>      do_mem_abort+0x60/0x10c
>>      el1_da+0x1c/0xc4
> 
>>      trace_event_raw_event_cgroup_migrate+0x124/0x170
> 
> You're sure the above is on the strcpy()?
> 
> Note, this code has __string() which does a strlen() which appears to be
> working fine.
> 
>>      cgroup_attach_task+0x2e8/0x41c
>>      __cgroup1_procs_write+0x114/0x1ec
>>      cgroup1_tasks_write+0x10/0x18
>>      cgroup_file_write+0xa4/0x208
>>      kernfs_fop_write+0x1f0/0x2f4
>>      __vfs_write+0x5c/0x200
>>      vfs_write+0xe0/0x1a0
>>      ksys_write+0x74/0xdc
>>      __arm64_sys_write+0x18/0x20
>>      el0_svc_common+0xc0/0x1a4
>>      el0_svc_compat_handler+0x18/0x20
>>      el0_svc_compat+0x8/0x2c
> 
> Can you give the full debug report, that includes the register content and
> everything else.
> 
> -- Steve

hi,Steve:
	I simulate a what I supposed problem,  but it did not panic.
	I will use kasan to debug further. thanks for your review and suggest 
on this problem. abandon this patch now currently, if there is new 
proved problem, I will come again.


diff --git a/include/linux/sched.h b/include/linux/sched.h
index 637d25c31374..d219d9f45529 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1389,6 +1389,7 @@ struct task_struct {
         ANDROID_KABI_RESERVE(7);
         ANDROID_KABI_RESERVE(8);

+       char comm1[32];
         /*
          * New fields for task_struct should be added above here, so that
          * they are included in the randomized portion of task_struct.
diff --git a/include/trace/events/cgroup.h b/include/trace/events/cgroup.h
index dd7d7c9efecd..da2426c7f99b 100644
--- a/include/trace/events/cgroup.h
+++ b/include/trace/events/cgroup.h
@@ -139,7 +139,7 @@ DECLARE_EVENT_CLASS(cgroup_migrate,
                 __entry->dst_level = dst_cgrp->level;
                 __assign_str(dst_path, path);
                 __entry->pid = task->pid;
-               __assign_str(comm, task->comm);
+               __assign_str(comm, task->comm1);
         ),

         TP_printk("dst_root=%d dst_id=%llu dst_level=%d dst_path=%s 
pid=%d comm=%s",
diff --git a/kernel/fork.c b/kernel/fork.c
index 58409b7178c2..4e0c564852c6 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1443,6 +1443,7 @@ static int copy_mm(unsigned long clone_flags, 
struct task_struct *tsk)
         tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw;
         tsk->last_switch_time = 0;
  #endif
+        strlcpy(tsk->comm1, "012345678901234567890123456789", 30);

         tsk->mm = NULL;
         tsk->active_mm = NULL;




# cat /sys/kernel/tracing/trace_pipe
               sh-874     [007] d..2   142.097349: 
cgroup_notify_populated: root=1 id=49 level=1 path=/top-app val=1
               sh-874     [007] d..1   142.097405: cgroup_attach_task: 
dst_root=1 dst_id=49 dst_level=1 dst_path=/top-app pid=1 
comm=01234567890123456789012345678

^ permalink raw reply related	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2022-09-28  7:38 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-09-23  7:51 [PATCH v2] cgroup: align the comm length with TASK_COMM_LEN Kassey Li
2022-09-23 15:00 ` Steven Rostedt
2022-09-26  2:18   ` Kassey Li
2022-09-26  2:42     ` Steven Rostedt
2022-09-26 11:08       ` Kassey Li
2022-09-26 14:09 ` Steven Rostedt
2022-09-27  1:32   ` Kassey Li
2022-09-28  7:35   ` Kassey Li

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.