linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] ARM: Wire up HAVE_SYSCALL_TRACEPOINTS
@ 2011-12-01 11:01 takuo.koguchi.sw
  2012-02-01  1:47 ` Indan Zupancic
  2012-02-01  9:46 ` Russell King - ARM Linux
  0 siblings, 2 replies; 11+ messages in thread
From: takuo.koguchi.sw @ 2011-12-01 11:01 UTC (permalink / raw)
  To: linux-kernel
  Cc: masami.hiramatsu.pt, linux, rostedt, fweisbec, mingo, jbaron,
	yrl.pp-manager.tt

    This patch is necessary to make ftrace syscall trace working for ARM.
The following events are added to /sys/kernel/debug/tracing/available_events
- raw_syscalls:sys_enter
- raw_syscalls:sys_exit
- syscalls:sys_enter_*
- syscalls:sys_exit_*

Sample output;
# tracer: nop
#
#           TASK-PID    CPU#    TIMESTAMP  FUNCTION
#              | |       |          |         |
       trace-cmd-640   [000]  1174.164948: sys_write -> 0x1
       trace-cmd-640   [000]  1174.164948: sys_exit: NR 4 = 1
       trace-cmd-640   [000]  1174.165009: sys_write(fd: 1, buf: 400c2000, count: d0)
       trace-cmd-640   [000]  1174.165009: sys_enter: NR 4 (1, 400c2000, d0, 0, d0, f54f8)
       trace-cmd-640   [000]  1174.165070: sys_write -> 0xffffffe0
       trace-cmd-640   [000]  1174.165070: sys_exit: NR 4 = -32
              sh-637   [000]  1174.165649: sys_wait4 -> 0x280
              sh-637   [000]  1174.165649: sys_exit: NR 114 = 640
              sh-637   [000]  1174.165680: sys_ioctl(fd: 3ff, cmd: 5410, arg: be82fad4)
              sh-637   [000]  1174.165710: sys_enter: NR 54 (3ff, 5410, be82fad4, be82facc, 1d578, 1b6fc)
              sh-637   [000]  1174.165710: sys_ioctl -> 0x0
              sh-637   [000]  1174.165741: sys_exit: NR 54 = 0
              sh-637   [000]  1174.165771: sys_wait4(upid: ffffffff, stat_addr: be82fae4, options: 1, ru: 0)
              sh-637   [000]  1174.165771: sys_enter: NR 114 (ffffffff, be82fae4, 1, 0, 1, be82fae4)
              sh-637   [000]  1174.165771: sys_wait4 -> 0xfffffff6
              sh-637   [000]  1174.165802: sys_exit: NR 114 = -10
              sh-637   [000]  1174.165802: sys_wait4(upid: ffffffff, stat_addr: be82fae4, options: 1, ru: 0)

Signed-off-by: Takuo Koguchi <takuo.koguchi.sw@hitachi.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jason Baron <jbaron@redhat.com>
---
 arch/arm/Kconfig                   |    1 +
 arch/arm/include/asm/syscall.h     |   45 ++++++++++++++++++++++++++++++++++++
 arch/arm/include/asm/thread_info.h |    3 ++
 arch/arm/include/asm/unistd.h      |    3 ++
 arch/arm/kernel/entry-common.S     |    4 +-
 arch/arm/kernel/ptrace.c           |   10 ++++++++
 6 files changed, 64 insertions(+), 2 deletions(-)
 create mode 100644 arch/arm/include/asm/syscall.h

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 44789ef..84181b3 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -13,6 +13,7 @@ config ARM
 	select HAVE_KPROBES if !XIP_KERNEL
 	select HAVE_KRETPROBES if (HAVE_KPROBES)
 	select HAVE_FUNCTION_TRACER if (!XIP_KERNEL)
+	select HAVE_SYSCALL_TRACEPOINTS
 	select HAVE_FTRACE_MCOUNT_RECORD if (!XIP_KERNEL)
 	select HAVE_DYNAMIC_FTRACE if (!XIP_KERNEL)
 	select HAVE_FUNCTION_GRAPH_TRACER if (!THUMB2_KERNEL)
diff --git a/arch/arm/include/asm/syscall.h b/arch/arm/include/asm/syscall.h
new file mode 100644
index 0000000..cabeb67
--- /dev/null
+++ b/arch/arm/include/asm/syscall.h
@@ -0,0 +1,45 @@
+#ifndef _ASM_ARM_SYSCALL_H
+#define _ASM_ARM_SYSCALL_H
+
+extern const unsigned long sys_call_table[];
+
+#include <linux/sched.h>
+
+static inline long syscall_get_nr(struct task_struct *task,
+				  struct pt_regs *regs)
+{
+	return regs->ARM_r7;
+}
+
+static inline long syscall_get_return_value(struct task_struct *task,
+					    struct pt_regs *regs)
+{
+	return regs->ARM_r0;
+}
+
+static inline void syscall_get_arguments(struct task_struct *task,
+					 struct pt_regs *regs,
+					 unsigned int i, unsigned int n,
+					 unsigned long *args)
+{
+	BUG_ON(i);
+	switch (n) {
+	case 6:
+		args[5] = regs->ARM_r5; /* fall through */
+	case 5:
+		args[4] = regs->ARM_r4;
+	case 4:
+		args[3] = regs->ARM_r3;
+	case 3:
+		args[2] = regs->ARM_r2;
+	case 2:
+		args[1] = regs->ARM_r1;
+	case 1:
+		args[0] = regs->ARM_r0;
+	case 0:
+		break;
+	default:
+		BUG();
+	}
+}
+#endif	/* _ASM_ARM_SYSCALL_H */
diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index 7b5cc8d..2509028 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -139,6 +139,7 @@ extern void vfp_flush_hwstate(struct thread_info *);
 #define TIF_NEED_RESCHED	1
 #define TIF_NOTIFY_RESUME	2	/* callback before returning to user */
 #define TIF_SYSCALL_TRACE	8
+#define TIF_SYSCALL_TRACEPOINT	15
 #define TIF_POLLING_NRFLAG	16
 #define TIF_USING_IWMMXT	17
 #define TIF_MEMDIE		18	/* is terminating due to OOM killer */
@@ -150,11 +151,13 @@ extern void vfp_flush_hwstate(struct thread_info *);
 #define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
 #define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
 #define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
+#define _TIF_SYSCALL_TRACEPOINT	(1 << TIF_SYSCALL_TRACEPOINT)
 #define _TIF_POLLING_NRFLAG	(1 << TIF_POLLING_NRFLAG)
 #define _TIF_USING_IWMMXT	(1 << TIF_USING_IWMMXT)
 #define _TIF_FREEZE		(1 << TIF_FREEZE)
 #define _TIF_RESTORE_SIGMASK	(1 << TIF_RESTORE_SIGMASK)
 #define _TIF_SECCOMP		(1 << TIF_SECCOMP)
+#define _TIF_SYSCALL_T_OR_A	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT)
 
 /*
  * Change these and you break ASM code in entry-common.S
diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h
index 4a11237..f4eac2d 100644
--- a/arch/arm/include/asm/unistd.h
+++ b/arch/arm/include/asm/unistd.h
@@ -405,6 +405,9 @@
 #define __NR_process_vm_readv		(__NR_SYSCALL_BASE+376)
 #define __NR_process_vm_writev		(__NR_SYSCALL_BASE+377)
 
+#ifndef __ASSEMBLY__
+#define NR_syscalls		378
+#endif
 /*
  * The following SWIs are ARM private.
  */
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index b2a27b6..a1577c2 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -87,7 +87,7 @@ ENTRY(ret_from_fork)
 	get_thread_info tsk
 	ldr	r1, [tsk, #TI_FLAGS]		@ check for syscall tracing
 	mov	why, #1
-	tst	r1, #_TIF_SYSCALL_TRACE		@ are we tracing syscalls?
+	tst	r1, #_TIF_SYSCALL_T_OR_A	@ are we tracing syscalls?
 	beq	ret_slow_syscall
 	mov	r1, sp
 	mov	r0, #1				@ trace exit [IP = 1]
@@ -443,7 +443,7 @@ ENTRY(vector_swi)
 1:
 #endif
 
-	tst	r10, #_TIF_SYSCALL_TRACE		@ are we tracing syscalls?
+	tst	r10, #_TIF_SYSCALL_T_OR_A	@ are we tracing syscalls?
 	bne	__sys_trace
 
 	cmp	scno, #NR_syscalls		@ check upper syscall limit
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index 483727a..a690c9f 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -28,6 +28,9 @@
 #include <asm/system.h>
 #include <asm/traps.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/syscalls.h>
+
 #define REG_PC	15
 #define REG_PSR	16
 /*
@@ -906,6 +909,13 @@ asmlinkage int syscall_trace(int why, struct pt_regs *regs, int scno)
 {
 	unsigned long ip;
 
+	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) {
+		if (why)
+			trace_sys_exit(regs, regs->ARM_r0);
+		else
+			trace_sys_enter(regs, scno);
+	}
+
 	if (!test_thread_flag(TIF_SYSCALL_TRACE))
 		return scno;
 	if (!(current->ptrace & PT_PTRACED))
-- 
1.7.5.4


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH] ARM: Wire up HAVE_SYSCALL_TRACEPOINTS
  2011-12-01 11:01 [PATCH] ARM: Wire up HAVE_SYSCALL_TRACEPOINTS takuo.koguchi.sw
@ 2012-02-01  1:47 ` Indan Zupancic
  2012-02-01  2:09   ` Steven Rostedt
  2012-02-02  9:21   ` Takuo Koguchi
  2012-02-01  9:46 ` Russell King - ARM Linux
  1 sibling, 2 replies; 11+ messages in thread
From: Indan Zupancic @ 2012-02-01  1:47 UTC (permalink / raw)
  To: takuo.koguchi.sw
  Cc: linux-kernel, masami.hiramatsu.pt, linux, rostedt, fweisbec,
	mingo, jbaron, yrl.pp-manager.tt, mcgrathr

Hello,

CC'ing Roland.

On Thu, December 1, 2011 12:01, takuo.koguchi.sw@hitachi.com wrote:
> diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
> index 44789ef..84181b3 100644
> --- a/arch/arm/Kconfig
> +++ b/arch/arm/Kconfig
> @@ -13,6 +13,7 @@ config ARM
>  	select HAVE_KPROBES if !XIP_KERNEL
>  	select HAVE_KRETPROBES if (HAVE_KPROBES)
>  	select HAVE_FUNCTION_TRACER if (!XIP_KERNEL)
> +	select HAVE_SYSCALL_TRACEPOINTS

Your patch totally ignores OABI, it should at least depend on CONFIG_AEABI
and on !CONFIG_OABI_COMPAT.

>  	select HAVE_FTRACE_MCOUNT_RECORD if (!XIP_KERNEL)
>  	select HAVE_DYNAMIC_FTRACE if (!XIP_KERNEL)
>  	select HAVE_FUNCTION_GRAPH_TRACER if (!THUMB2_KERNEL)
> diff --git a/arch/arm/include/asm/syscall.h b/arch/arm/include/asm/syscall.h
> new file mode 100644
> index 0000000..cabeb67
> --- /dev/null
> +++ b/arch/arm/include/asm/syscall.h
> @@ -0,0 +1,45 @@
> +#ifndef _ASM_ARM_SYSCALL_H
> +#define _ASM_ARM_SYSCALL_H
> +
> +extern const unsigned long sys_call_table[];
> +
> +#include <linux/sched.h>
> +
> +static inline long syscall_get_nr(struct task_struct *task,
> +				  struct pt_regs *regs)
> +{
> +	return regs->ARM_r7;
> +}
> +
> +static inline long syscall_get_return_value(struct task_struct *task,
> +					    struct pt_regs *regs)
> +{
> +	return regs->ARM_r0;
> +}
> +
> +static inline void syscall_get_arguments(struct task_struct *task,
> +					 struct pt_regs *regs,
> +					 unsigned int i, unsigned int n,
> +					 unsigned long *args)
> +{
> +	BUG_ON(i);

This is unacceptable, that would make this function useless.

See Rolands patch:

https://lkml.org/lkml/2009/4/24/399 for a better implementation.

> +	switch (n) {
> +	case 6:
> +		args[5] = regs->ARM_r5; /* fall through */
> +	case 5:
> +		args[4] = regs->ARM_r4;
> +	case 4:
> +		args[3] = regs->ARM_r3;
> +	case 3:
> +		args[2] = regs->ARM_r2;
> +	case 2:
> +		args[1] = regs->ARM_r1;
> +	case 1:
> +		args[0] = regs->ARM_r0;
> +	case 0:
> +		break;
> +	default:
> +		BUG();
> +	}
> +}
> +#endif	/* _ASM_ARM_SYSCALL_H */
> diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
> index 7b5cc8d..2509028 100644
> --- a/arch/arm/include/asm/thread_info.h
> +++ b/arch/arm/include/asm/thread_info.h
> @@ -139,6 +139,7 @@ extern void vfp_flush_hwstate(struct thread_info *);
>  #define TIF_NEED_RESCHED	1
>  #define TIF_NOTIFY_RESUME	2	/* callback before returning to user */
>  #define TIF_SYSCALL_TRACE	8
> +#define TIF_SYSCALL_TRACEPOINT	15
>  #define TIF_POLLING_NRFLAG	16
>  #define TIF_USING_IWMMXT	17
>  #define TIF_MEMDIE		18	/* is terminating due to OOM killer */
> @@ -150,11 +151,13 @@ extern void vfp_flush_hwstate(struct thread_info *);
>  #define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
>  #define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
>  #define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
> +#define _TIF_SYSCALL_TRACEPOINT	(1 << TIF_SYSCALL_TRACEPOINT)
>  #define _TIF_POLLING_NRFLAG	(1 << TIF_POLLING_NRFLAG)
>  #define _TIF_USING_IWMMXT	(1 << TIF_USING_IWMMXT)
>  #define _TIF_FREEZE		(1 << TIF_FREEZE)
>  #define _TIF_RESTORE_SIGMASK	(1 << TIF_RESTORE_SIGMASK)
>  #define _TIF_SECCOMP		(1 << TIF_SECCOMP)
> +#define _TIF_SYSCALL_T_OR_A	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT)

What does 'A' stand for?

>
>  /*
>   * Change these and you break ASM code in entry-common.S
> diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h
> index 4a11237..f4eac2d 100644
> --- a/arch/arm/include/asm/unistd.h
> +++ b/arch/arm/include/asm/unistd.h
> @@ -405,6 +405,9 @@
>  #define __NR_process_vm_readv		(__NR_SYSCALL_BASE+376)
>  #define __NR_process_vm_writev		(__NR_SYSCALL_BASE+377)
>
> +#ifndef __ASSEMBLY__
> +#define NR_syscalls		378
> +#endif
>  /*
>   * The following SWIs are ARM private.
>   */
> diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
> index b2a27b6..a1577c2 100644
> --- a/arch/arm/kernel/entry-common.S
> +++ b/arch/arm/kernel/entry-common.S
> @@ -87,7 +87,7 @@ ENTRY(ret_from_fork)
>  	get_thread_info tsk
>  	ldr	r1, [tsk, #TI_FLAGS]		@ check for syscall tracing
>  	mov	why, #1
> -	tst	r1, #_TIF_SYSCALL_TRACE		@ are we tracing syscalls?
> +	tst	r1, #_TIF_SYSCALL_T_OR_A	@ are we tracing syscalls?
>  	beq	ret_slow_syscall
>  	mov	r1, sp
>  	mov	r0, #1				@ trace exit [IP = 1]
> @@ -443,7 +443,7 @@ ENTRY(vector_swi)
>  1:
>  #endif
>
> -	tst	r10, #_TIF_SYSCALL_TRACE		@ are we tracing syscalls?
> +	tst	r10, #_TIF_SYSCALL_T_OR_A	@ are we tracing syscalls?
>  	bne	__sys_trace
>
>  	cmp	scno, #NR_syscalls		@ check upper syscall limit
> diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
> index 483727a..a690c9f 100644
> --- a/arch/arm/kernel/ptrace.c
> +++ b/arch/arm/kernel/ptrace.c
> @@ -28,6 +28,9 @@
>  #include <asm/system.h>
>  #include <asm/traps.h>
>
> +#define CREATE_TRACE_POINTS
> +#include <trace/events/syscalls.h>
> +
>  #define REG_PC	15
>  #define REG_PSR	16
>  /*
> @@ -906,6 +909,13 @@ asmlinkage int syscall_trace(int why, struct pt_regs *regs, int
> scno)
>  {
>  	unsigned long ip;

Not used, you get the same info from 'why'.

> +	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) {
> +		if (why)
> +			trace_sys_exit(regs, regs->ARM_r0);
> +		else
> +			trace_sys_enter(regs, scno);
> +	}
> +
>  	if (!test_thread_flag(TIF_SYSCALL_TRACE))
>  		return scno;
>  	if (!(current->ptrace & PT_PTRACED))

Greetings,

Indan



^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] ARM: Wire up HAVE_SYSCALL_TRACEPOINTS
  2012-02-01  1:47 ` Indan Zupancic
@ 2012-02-01  2:09   ` Steven Rostedt
  2012-02-02  9:21   ` Takuo Koguchi
  1 sibling, 0 replies; 11+ messages in thread
From: Steven Rostedt @ 2012-02-01  2:09 UTC (permalink / raw)
  To: Indan Zupancic
  Cc: takuo.koguchi.sw, linux-kernel, masami.hiramatsu.pt, linux,
	fweisbec, mingo, jbaron, yrl.pp-manager.tt, mcgrathr

On Wed, 2012-02-01 at 02:47 +0100, Indan Zupancic wrote:
> > +#define _TIF_SYSCALL_T_OR_A  (_TIF_SYSCALL_TRACE |
> _TIF_SYSCALL_TRACEPOINT)
> 
> What does 'A' stand for?
> 
Maybe he meant it to be TIF_SYSCALL_T_AND_A?

;-)

-- Steve



^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] ARM: Wire up HAVE_SYSCALL_TRACEPOINTS
  2011-12-01 11:01 [PATCH] ARM: Wire up HAVE_SYSCALL_TRACEPOINTS takuo.koguchi.sw
  2012-02-01  1:47 ` Indan Zupancic
@ 2012-02-01  9:46 ` Russell King - ARM Linux
  1 sibling, 0 replies; 11+ messages in thread
From: Russell King - ARM Linux @ 2012-02-01  9:46 UTC (permalink / raw)
  To: takuo.koguchi.sw
  Cc: linux-kernel, masami.hiramatsu.pt, rostedt, fweisbec, mingo,
	jbaron, yrl.pp-manager.tt

On Thu, Dec 01, 2011 at 08:01:32PM +0900, takuo.koguchi.sw@hitachi.com wrote:
> diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h
> index 4a11237..f4eac2d 100644
> --- a/arch/arm/include/asm/unistd.h
> +++ b/arch/arm/include/asm/unistd.h
> @@ -405,6 +405,9 @@
>  #define __NR_process_vm_readv		(__NR_SYSCALL_BASE+376)
>  #define __NR_process_vm_writev		(__NR_SYSCALL_BASE+377)
>  
> +#ifndef __ASSEMBLY__
> +#define NR_syscalls		378
> +#endif

So, we have 380 syscalls in the assembly code.  You're telling ftrace
that we have 378.  That's just great, because it means userspace can
trigger this trivially:

int reg_event_syscall_enter(struct ftrace_event_call *call)
{
        int ret = 0;
        int num;

        num = ((struct syscall_metadata *)call->data)->syscall_nr;
        if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
                return -ENOSYS;

And what about the ARM private syscalls?

This ftrace NR_syscalls definition would have to be some very large
number to avoid these issuing the above warning.  ftrace really needs
to lose this before ARM can start using it.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] ARM: Wire up HAVE_SYSCALL_TRACEPOINTS
  2012-02-01  1:47 ` Indan Zupancic
  2012-02-01  2:09   ` Steven Rostedt
@ 2012-02-02  9:21   ` Takuo Koguchi
  2012-02-02 11:00     ` Indan Zupancic
  1 sibling, 1 reply; 11+ messages in thread
From: Takuo Koguchi @ 2012-02-02  9:21 UTC (permalink / raw)
  To: Indan Zupancic
  Cc: linux-kernel, masami.hiramatsu.pt, linux, rostedt, fweisbec,
	mingo, jbaron, yrl.pp-manager.tt, mcgrathr

Hello,
I am glad to start getting responses.
(2012/02/01 10:47), Indan Zupancic wrote:
> Hello,
>
> CC'ing Roland.
>
> On Thu, December 1, 2011 12:01, takuo.koguchi.sw@hitachi.com wrote:
>> diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
>> index 44789ef..84181b3 100644
>> --- a/arch/arm/Kconfig
>> +++ b/arch/arm/Kconfig
>> @@ -13,6 +13,7 @@ config ARM
>>   	select HAVE_KPROBES if !XIP_KERNEL
>>   	select HAVE_KRETPROBES if (HAVE_KPROBES)
>>   	select HAVE_FUNCTION_TRACER if (!XIP_KERNEL)
>> +	select HAVE_SYSCALL_TRACEPOINTS
> Your patch totally ignores OABI, it should at least depend on CONFIG_AEABI
> and on !CONFIG_OABI_COMPAT.
Right.  As Russel King suggested, this patch depends on those configs 
until very large NR_syscalls is properly handled by ftrace.

>>   	select HAVE_FTRACE_MCOUNT_RECORD if (!XIP_KERNEL)
>>   	select HAVE_DYNAMIC_FTRACE if (!XIP_KERNEL)
>>   	select HAVE_FUNCTION_GRAPH_TRACER if (!THUMB2_KERNEL)
>> diff --git a/arch/arm/include/asm/syscall.h b/arch/arm/include/asm/syscall.h
>> new file mode 100644
>> index 0000000..cabeb67
>> --- /dev/null
>> +++ b/arch/arm/include/asm/syscall.h
>> @@ -0,0 +1,45 @@
>> +#ifndef _ASM_ARM_SYSCALL_H
>> +#define _ASM_ARM_SYSCALL_H
>> +
>> +extern const unsigned long sys_call_table[];
>> +
>> +#include<linux/sched.h>
>> +
>> +static inline long syscall_get_nr(struct task_struct *task,
>> +				  struct pt_regs *regs)
>> +{
>> +	return regs->ARM_r7;
>> +}
>> +
>> +static inline long syscall_get_return_value(struct task_struct *task,
>> +					    struct pt_regs *regs)
>> +{
>> +	return regs->ARM_r0;
>> +}
>> +
>> +static inline void syscall_get_arguments(struct task_struct *task,
>> +					 struct pt_regs *regs,
>> +					 unsigned int i, unsigned int n,
>> +					 unsigned long *args)
>> +{
>> +	BUG_ON(i);
> This is unacceptable, that would make this function useless.
>
> See Rolands patch:
>
> https://lkml.org/lkml/2009/4/24/399 for a better implementation.
Thanks.  My apology for the lack of investigation.

>> +#endif	/* _ASM_ARM_SYSCALL_H */
>> diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
>> index 7b5cc8d..2509028 100644
>> --- a/arch/arm/include/asm/thread_info.h
>> +++ b/arch/arm/include/asm/thread_info.h
>> @@ -139,6 +139,7 @@ extern void vfp_flush_hwstate(struct thread_info *);
>>   #define TIF_NEED_RESCHED	1
>>   #define TIF_NOTIFY_RESUME	2	/* callback before returning to user */
>>   #define TIF_SYSCALL_TRACE	8
>> +#define TIF_SYSCALL_TRACEPOINT	15
>>   #define TIF_POLLING_NRFLAG	16
>>   #define TIF_USING_IWMMXT	17
>>   #define TIF_MEMDIE		18	/* is terminating due to OOM killer */
>> @@ -150,11 +151,13 @@ extern void vfp_flush_hwstate(struct thread_info *);
>>   #define _TIF_NEED_RESCHED	(1<<  TIF_NEED_RESCHED)
>>   #define _TIF_NOTIFY_RESUME	(1<<  TIF_NOTIFY_RESUME)
>>   #define _TIF_SYSCALL_TRACE	(1<<  TIF_SYSCALL_TRACE)
>> +#define _TIF_SYSCALL_TRACEPOINT	(1<<  TIF_SYSCALL_TRACEPOINT)
>>   #define _TIF_POLLING_NRFLAG	(1<<  TIF_POLLING_NRFLAG)
>>   #define _TIF_USING_IWMMXT	(1<<  TIF_USING_IWMMXT)
>>   #define _TIF_FREEZE		(1<<  TIF_FREEZE)
>>   #define _TIF_RESTORE_SIGMASK	(1<<  TIF_RESTORE_SIGMASK)
>>   #define _TIF_SECCOMP		(1<<  TIF_SECCOMP)
>> +#define _TIF_SYSCALL_T_OR_A	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT)
> What does 'A' stand for?
'A' originally stands for AUDIT.  I should have used a better name as 
there is no _TIF_SYSCALL_AUDIT for ARM.
Probably it is better to define _TIF_WORK_SYSCALL_ENTRY and 
_TIF_WORK_SYSCALL_EXIT properly.

>>   /*
>>    * Change these and you break ASM code in entry-common.S
>> diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h
>> index 4a11237..f4eac2d 100644
>> --- a/arch/arm/include/asm/unistd.h
>> +++ b/arch/arm/include/asm/unistd.h
>> @@ -405,6 +405,9 @@
>>   #define __NR_process_vm_readv		(__NR_SYSCALL_BASE+376)
>>   #define __NR_process_vm_writev		(__NR_SYSCALL_BASE+377)
>>
>> +#ifndef __ASSEMBLY__
>> +#define NR_syscalls		378
>> +#endif
>>   /*
>>    * The following SWIs are ARM private.
>>    */
>> diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
>> index b2a27b6..a1577c2 100644
>> --- a/arch/arm/kernel/entry-common.S
>> +++ b/arch/arm/kernel/entry-common.S
>> @@ -87,7 +87,7 @@ ENTRY(ret_from_fork)
>>   	get_thread_info tsk
>>   	ldr	r1, [tsk, #TI_FLAGS]		@ check for syscall tracing
>>   	mov	why, #1
>> -	tst	r1, #_TIF_SYSCALL_TRACE		@ are we tracing syscalls?
>> +	tst	r1, #_TIF_SYSCALL_T_OR_A	@ are we tracing syscalls?
>>   	beq	ret_slow_syscall
>>   	mov	r1, sp
>>   	mov	r0, #1				@ trace exit [IP = 1]
>> @@ -443,7 +443,7 @@ ENTRY(vector_swi)
>>   1:
>>   #endif
>>
>> -	tst	r10, #_TIF_SYSCALL_TRACE		@ are we tracing syscalls?
>> +	tst	r10, #_TIF_SYSCALL_T_OR_A	@ are we tracing syscalls?
>>   	bne	__sys_trace
>>
>>   	cmp	scno, #NR_syscalls		@ check upper syscall limit
>> diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
>> index 483727a..a690c9f 100644
>> --- a/arch/arm/kernel/ptrace.c
>> +++ b/arch/arm/kernel/ptrace.c
>> @@ -28,6 +28,9 @@
>>   #include<asm/system.h>
>>   #include<asm/traps.h>
>>
>> +#define CREATE_TRACE_POINTS
>> +#include<trace/events/syscalls.h>
>> +
>>   #define REG_PC	15
>>   #define REG_PSR	16
>>   /*
>> @@ -906,6 +909,13 @@ asmlinkage int syscall_trace(int why, struct pt_regs *regs, int
>> scno)
>>   {
>>   	unsigned long ip;
> Not used, you get the same info from 'why'.
Sorry. But I do not understand what you suggest here.  This is the 
existing code.

>
>> +	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) {
>> +		if (why)
>> +			trace_sys_exit(regs, regs->ARM_r0);
>> +		else
>> +			trace_sys_enter(regs, scno);
>> +	}
>> +
>>   	if (!test_thread_flag(TIF_SYSCALL_TRACE))
>>   		return scno;
>>   	if (!(current->ptrace&  PT_PTRACED))
>

Thanks,

Takuo



^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] ARM: Wire up HAVE_SYSCALL_TRACEPOINTS
  2012-02-02  9:21   ` Takuo Koguchi
@ 2012-02-02 11:00     ` Indan Zupancic
  2012-02-02 11:10       ` Russell King - ARM Linux
  0 siblings, 1 reply; 11+ messages in thread
From: Indan Zupancic @ 2012-02-02 11:00 UTC (permalink / raw)
  To: Takuo Koguchi
  Cc: linux-kernel, masami.hiramatsu.pt, linux, rostedt, fweisbec,
	mingo, jbaron, yrl.pp-manager.tt, mcgrathr

On Thu, February 2, 2012 10:21, Takuo Koguchi wrote:
> Hello,
> I am glad to start getting responses.
> (2012/02/01 10:47), Indan Zupancic wrote:
>> Hello,
>>
>> CC'ing Roland.
>>
>> On Thu, December 1, 2011 12:01, takuo.koguchi.sw@hitachi.com wrote:
>>> diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
>>> index 44789ef..84181b3 100644
>>> --- a/arch/arm/Kconfig
>>> +++ b/arch/arm/Kconfig
>>> @@ -13,6 +13,7 @@ config ARM
>>>   	select HAVE_KPROBES if !XIP_KERNEL
>>>   	select HAVE_KRETPROBES if (HAVE_KPROBES)
>>>   	select HAVE_FUNCTION_TRACER if (!XIP_KERNEL)
>>> +	select HAVE_SYSCALL_TRACEPOINTS
>> Your patch totally ignores OABI, it should at least depend on CONFIG_AEABI
>> and on !CONFIG_OABI_COMPAT.
> Right.  As Russel King suggested, this patch depends on those configs
> until very large NR_syscalls is properly handled by ftrace.

It has nothing to do with large NR_syscalls. Supporting OABI is hard,
e.g. it doesn't put the syscall nr in r7, it's encoded as part of the
syscall instruction. Also the ABI for some system calls is different,
with different arg layouts (alignment of 64 bit args is different).

>
>>>   	select HAVE_FTRACE_MCOUNT_RECORD if (!XIP_KERNEL)
>>>   	select HAVE_DYNAMIC_FTRACE if (!XIP_KERNEL)
>>>   	select HAVE_FUNCTION_GRAPH_TRACER if (!THUMB2_KERNEL)
>>> diff --git a/arch/arm/include/asm/syscall.h b/arch/arm/include/asm/syscall.h
>>> new file mode 100644
>>> index 0000000..cabeb67
>>> --- /dev/null
>>> +++ b/arch/arm/include/asm/syscall.h
>>> @@ -0,0 +1,45 @@
>>> +#ifndef _ASM_ARM_SYSCALL_H
>>> +#define _ASM_ARM_SYSCALL_H
>>> +
>>> +extern const unsigned long sys_call_table[];
>>> +
>>> +#include<linux/sched.h>
>>> +
>>> +static inline long syscall_get_nr(struct task_struct *task,
>>> +				  struct pt_regs *regs)
>>> +{
>>> +	return regs->ARM_r7;
>>> +}
>>> +
>>> +static inline long syscall_get_return_value(struct task_struct *task,
>>> +					    struct pt_regs *regs)
>>> +{
>>> +	return regs->ARM_r0;
>>> +}
>>> +
>>> +static inline void syscall_get_arguments(struct task_struct *task,
>>> +					 struct pt_regs *regs,
>>> +					 unsigned int i, unsigned int n,
>>> +					 unsigned long *args)
>>> +{
>>> +	BUG_ON(i);
>> This is unacceptable, that would make this function useless.
>>
>> See Rolands patch:
>>
>> https://lkml.org/lkml/2009/4/24/399 for a better implementation.
> Thanks.  My apology for the lack of investigation.

I got both links from Will Drewry, I'm just jumping in and passing it on.

>>> +#endif	/* _ASM_ARM_SYSCALL_H */
>>> diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
>>> index 7b5cc8d..2509028 100644
>>> --- a/arch/arm/include/asm/thread_info.h
>>> +++ b/arch/arm/include/asm/thread_info.h
>>> @@ -139,6 +139,7 @@ extern void vfp_flush_hwstate(struct thread_info *);
>>>   #define TIF_NEED_RESCHED	1
>>>   #define TIF_NOTIFY_RESUME	2	/* callback before returning to user */
>>>   #define TIF_SYSCALL_TRACE	8
>>> +#define TIF_SYSCALL_TRACEPOINT	15
>>>   #define TIF_POLLING_NRFLAG	16
>>>   #define TIF_USING_IWMMXT	17
>>>   #define TIF_MEMDIE		18	/* is terminating due to OOM killer */
>>> @@ -150,11 +151,13 @@ extern void vfp_flush_hwstate(struct thread_info *);
>>>   #define _TIF_NEED_RESCHED	(1<<  TIF_NEED_RESCHED)
>>>   #define _TIF_NOTIFY_RESUME	(1<<  TIF_NOTIFY_RESUME)
>>>   #define _TIF_SYSCALL_TRACE	(1<<  TIF_SYSCALL_TRACE)
>>> +#define _TIF_SYSCALL_TRACEPOINT	(1<<  TIF_SYSCALL_TRACEPOINT)
>>>   #define _TIF_POLLING_NRFLAG	(1<<  TIF_POLLING_NRFLAG)
>>>   #define _TIF_USING_IWMMXT	(1<<  TIF_USING_IWMMXT)
>>>   #define _TIF_FREEZE		(1<<  TIF_FREEZE)
>>>   #define _TIF_RESTORE_SIGMASK	(1<<  TIF_RESTORE_SIGMASK)
>>>   #define _TIF_SECCOMP		(1<<  TIF_SECCOMP)
>>> +#define _TIF_SYSCALL_T_OR_A	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT)
>> What does 'A' stand for?
> 'A' originally stands for AUDIT.  I should have used a better name as
> there is no _TIF_SYSCALL_AUDIT for ARM.
> Probably it is better to define _TIF_WORK_SYSCALL_ENTRY and
> _TIF_WORK_SYSCALL_EXIT properly.

That would be much better.

>>>   /*
>>>    * Change these and you break ASM code in entry-common.S
>>> diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h
>>> index 4a11237..f4eac2d 100644
>>> --- a/arch/arm/include/asm/unistd.h
>>> +++ b/arch/arm/include/asm/unistd.h
>>> @@ -405,6 +405,9 @@
>>>   #define __NR_process_vm_readv		(__NR_SYSCALL_BASE+376)
>>>   #define __NR_process_vm_writev		(__NR_SYSCALL_BASE+377)
>>>
>>> +#ifndef __ASSEMBLY__
>>> +#define NR_syscalls		378
>>> +#endif
>>>   /*
>>>    * The following SWIs are ARM private.
>>>    */
>>> diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
>>> index b2a27b6..a1577c2 100644
>>> --- a/arch/arm/kernel/entry-common.S
>>> +++ b/arch/arm/kernel/entry-common.S
>>> @@ -87,7 +87,7 @@ ENTRY(ret_from_fork)
>>>   	get_thread_info tsk
>>>   	ldr	r1, [tsk, #TI_FLAGS]		@ check for syscall tracing
>>>   	mov	why, #1
>>> -	tst	r1, #_TIF_SYSCALL_TRACE		@ are we tracing syscalls?
>>> +	tst	r1, #_TIF_SYSCALL_T_OR_A	@ are we tracing syscalls?
>>>   	beq	ret_slow_syscall
>>>   	mov	r1, sp
>>>   	mov	r0, #1				@ trace exit [IP = 1]
>>> @@ -443,7 +443,7 @@ ENTRY(vector_swi)
>>>   1:
>>>   #endif
>>>
>>> -	tst	r10, #_TIF_SYSCALL_TRACE		@ are we tracing syscalls?
>>> +	tst	r10, #_TIF_SYSCALL_T_OR_A	@ are we tracing syscalls?
>>>   	bne	__sys_trace
>>>
>>>   	cmp	scno, #NR_syscalls		@ check upper syscall limit
>>> diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
>>> index 483727a..a690c9f 100644
>>> --- a/arch/arm/kernel/ptrace.c
>>> +++ b/arch/arm/kernel/ptrace.c
>>> @@ -28,6 +28,9 @@
>>>   #include<asm/system.h>
>>>   #include<asm/traps.h>
>>>
>>> +#define CREATE_TRACE_POINTS
>>> +#include<trace/events/syscalls.h>
>>> +
>>>   #define REG_PC	15
>>>   #define REG_PSR	16
>>>   /*
>>> @@ -906,6 +909,13 @@ asmlinkage int syscall_trace(int why, struct pt_regs *regs, int
>>> scno)
>>>   {
>>>   	unsigned long ip;
>> Not used, you get the same info from 'why'.
> Sorry. But I do not understand what you suggest here.  This is the
> existing code.

Sorry, I missed that.

>>
>>> +	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) {
>>> +		if (why)
>>> +			trace_sys_exit(regs, regs->ARM_r0);
>>> +		else
>>> +			trace_sys_enter(regs, scno);
>>> +	}
>>> +
>>>   	if (!test_thread_flag(TIF_SYSCALL_TRACE))
>>>   		return scno;
>>>   	if (!(current->ptrace&  PT_PTRACED))
>>
>
> Thanks,
>
> Takuo
>

Greetings,

Indan



^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] ARM: Wire up HAVE_SYSCALL_TRACEPOINTS
  2012-02-02 11:00     ` Indan Zupancic
@ 2012-02-02 11:10       ` Russell King - ARM Linux
  2012-02-02 23:38         ` Indan Zupancic
  0 siblings, 1 reply; 11+ messages in thread
From: Russell King - ARM Linux @ 2012-02-02 11:10 UTC (permalink / raw)
  To: Indan Zupancic
  Cc: Takuo Koguchi, linux-kernel, masami.hiramatsu.pt, rostedt,
	fweisbec, mingo, jbaron, yrl.pp-manager.tt, mcgrathr

On Thu, Feb 02, 2012 at 12:00:30PM +0100, Indan Zupancic wrote:
> On Thu, February 2, 2012 10:21, Takuo Koguchi wrote:
> > Right.  As Russel King suggested, this patch depends on those configs
> > until very large NR_syscalls is properly handled by ftrace.
> 
> It has nothing to do with large NR_syscalls. Supporting OABI is hard,

That's rubbish if you're doing things correctly, where correctly is
defined as 'not assuming that the syscall number is in r7, but reading
it from the thread_info->syscall member.

> e.g. it doesn't put the syscall nr in r7, it's encoded as part of the
> syscall instruction. Also the ABI for some system calls is different,
> with different arg layouts (alignment of 64 bit args is different).

OABI is a lot more simple because you know how the args are layed out
without needing a table to work out where the padding is.  You know
if you have a 64-bit argument that it follows immediately after a
32-bit argument without needing any alignment.

So:

next_arg_reg(current_arg_reg, next_size, oabi)
{
	if (oabi) {
		/* OABI case */
		next_arg_reg = current_arg_reg + 1;
	} else {
		/* EABI case */
		next_arg_reg = current_arg_reg + 1;
		if (next_size == 64 && next_arg_reg & 1)
			next_arg_reg++;
	}
	return next_arg_reg;
}

Notice how the EABI case is a lot more complicated by the alignment
rules than the OABI - not only do you need something like the above
but also you need a table to describe the size of the arguments for
every syscall in the system.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] ARM: Wire up HAVE_SYSCALL_TRACEPOINTS
  2012-02-02 11:10       ` Russell King - ARM Linux
@ 2012-02-02 23:38         ` Indan Zupancic
  2012-02-02 23:41           ` Roland McGrath
  2012-02-03  0:32           ` Russell King - ARM Linux
  0 siblings, 2 replies; 11+ messages in thread
From: Indan Zupancic @ 2012-02-02 23:38 UTC (permalink / raw)
  To: Russell King - ARM Linux
  Cc: Takuo Koguchi, linux-kernel, masami.hiramatsu.pt, rostedt,
	fweisbec, mingo, jbaron, yrl.pp-manager.tt, mcgrathr

On Thu, February 2, 2012 12:10, Russell King - ARM Linux wrote:
> On Thu, Feb 02, 2012 at 12:00:30PM +0100, Indan Zupancic wrote:
>> On Thu, February 2, 2012 10:21, Takuo Koguchi wrote:
>> > Right.  As Russel King suggested, this patch depends on those configs
>> > until very large NR_syscalls is properly handled by ftrace.
>>
>> It has nothing to do with large NR_syscalls. Supporting OABI is hard,
>
> That's rubbish if you're doing things correctly, where correctly is
> defined as 'not assuming that the syscall number is in r7, but reading
> it from the thread_info->syscall member.

It was my impression that thread_info->syscall is only set in the ptrace path.

Of course this can be changed, but it's tricky to do without adding
instructions to the syscall entry path. One way would be to have a
flag somewhere saying whether r7 or thread_info->syscall should be
used, and also set thread_info->syscall for OABI calls. That at least
won't slow down the EABI path.

>> e.g. it doesn't put the syscall nr in r7, it's encoded as part of the
>> syscall instruction. Also the ABI for some system calls is different,
>> with different arg layouts (alignment of 64 bit args is different).
>
> OABI is a lot more simple because you know how the args are layed out
> without needing a table to work out where the padding is.  You know
> if you have a 64-bit argument that it follows immediately after a
> 32-bit argument without needing any alignment.
>
> So:
>
> next_arg_reg(current_arg_reg, next_size, oabi)
> {
> 	if (oabi) {
> 		/* OABI case */
> 		next_arg_reg = current_arg_reg + 1;
> 	} else {
> 		/* EABI case */
> 		next_arg_reg = current_arg_reg + 1;
> 		if (next_size == 64 && next_arg_reg & 1)
> 			next_arg_reg++;
> 	}
> 	return next_arg_reg;
> }
>
> Notice how the EABI case is a lot more complicated by the alignment
> rules than the OABI - not only do you need something like the above

Only when you go through the args sequentially like that.

> but also you need a table to describe the size of the arguments for
> every syscall in the system.

You need that anyway if you want to handle 64-bit data as one arg
instead of two 32-bit args, no matter if it is OABI or EABI.

Like Roland said in his reply to this issue, just return the registers
and let the caller interpret the data. So ignore 64 bit arguments,
just pretend they are two 32 bit values, which they actually are anyway.
And yes you have unused padding args then, but so what? The argument
layout and meaning is syscall specific anyway, so the caller needs
specific knowledge already.

You can't return whole 64-bit args anyway except if you pretend all
arguments are 64-bit, which seems like a bad idea.

So if you have something like:

int sys_foo(int x, uint64 y);

x is arg 1, and y is arg 3 + 4 in EABI. But pretending that y is just
arg 2 makes no sense in generic syscall handling. And as you need some
syscall specific table anyway, it doesn't matter much if you need to
account for alignment or not.

If only EABI is supported everything is simple, because everyone knows
what to expect. If OABI is also supported then more changes are needed:
The above, but also some way to tell ptrace and other users if it was
an EABI or OABI system call. And currently with ptrace there is no race
free way of figuring out the OABI system call number from user space.
All in all starting with just EABI support and avoiding all the OABI
problems seems the best option.

Greetings,

Indan



^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] ARM: Wire up HAVE_SYSCALL_TRACEPOINTS
  2012-02-02 23:38         ` Indan Zupancic
@ 2012-02-02 23:41           ` Roland McGrath
  2012-02-03  0:32           ` Russell King - ARM Linux
  1 sibling, 0 replies; 11+ messages in thread
From: Roland McGrath @ 2012-02-02 23:41 UTC (permalink / raw)
  To: Indan Zupancic
  Cc: Russell King - ARM Linux, Takuo Koguchi, linux-kernel,
	masami.hiramatsu.pt, rostedt, fweisbec, mingo, jbaron,
	yrl.pp-manager.tt

On Thu, Feb 2, 2012 at 3:38 PM, Indan Zupancic <indan@nul.nu> wrote:
> It was my impression that thread_info->syscall is only set in the ptrace path.

The asm-generic/syscall.h kerneldoc says these functions should only be
used in the trace/audit path, not the fast path.  I honestly don't recall
any more why I made that a requirement of the interface, but some machines
having this sort of constraint was probably it.


Thanks,
Roland

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] ARM: Wire up HAVE_SYSCALL_TRACEPOINTS
  2012-02-02 23:38         ` Indan Zupancic
  2012-02-02 23:41           ` Roland McGrath
@ 2012-02-03  0:32           ` Russell King - ARM Linux
  2012-02-03  1:58             ` Indan Zupancic
  1 sibling, 1 reply; 11+ messages in thread
From: Russell King - ARM Linux @ 2012-02-03  0:32 UTC (permalink / raw)
  To: Indan Zupancic
  Cc: Takuo Koguchi, linux-kernel, masami.hiramatsu.pt, rostedt,
	fweisbec, mingo, jbaron, yrl.pp-manager.tt, mcgrathr

On Fri, Feb 03, 2012 at 12:38:58AM +0100, Indan Zupancic wrote:
> On Thu, February 2, 2012 12:10, Russell King - ARM Linux wrote:
> > On Thu, Feb 02, 2012 at 12:00:30PM +0100, Indan Zupancic wrote:
> >> On Thu, February 2, 2012 10:21, Takuo Koguchi wrote:
> >> > Right.  As Russel King suggested, this patch depends on those configs
> >> > until very large NR_syscalls is properly handled by ftrace.
> >>
> >> It has nothing to do with large NR_syscalls. Supporting OABI is hard,
> >
> > That's rubbish if you're doing things correctly, where correctly is
> > defined as 'not assuming that the syscall number is in r7, but reading
> > it from the thread_info->syscall member.
> 
> It was my impression that thread_info->syscall is only set in the ptrace
> path.

Well, as ptrace is the only syscall tracing we have at the moment in
the kernel, then that's how its done.

What we don't have there for ptrace is a method to read that, so
tools such as strace have had to fiddle about to discover the syscall
number.  That's something I have had a patch for some time to 'fix'
(a PTRACE_GET_SYSCALL to complement PTRACE_SET_SYSCALL) but haven't
had the motivation to try to fix that.

> Of course this can be changed, but it's tricky to do without adding
> instructions to the syscall entry path. One way would be to have a
> flag somewhere saying whether r7 or thread_info->syscall should be
> used, and also set thread_info->syscall for OABI calls. That at least
> won't slow down the EABI path.

Why would you need to change the entry path?  We already have a hook
out of the syscall path for doing tracing (via syscall_trace()) but
the fact that it sits in ptrace.c isn't an argument to create something
new.

> > Notice how the EABI case is a lot more complicated by the alignment
> > rules than the OABI - not only do you need something like the above
> 
> Only when you go through the args sequentially like that.

If you don't go through the args sequentially, then your only way of
deciding EABI args is via a table which describes the location of each
argument in the register set.

> If only EABI is supported everything is simple, because everyone knows
> what to expect. If OABI is also supported then more changes are needed:
> The above, but also some way to tell ptrace and other users if it was
> an EABI or OABI system call. And currently with ptrace there is no race
> free way of figuring out the OABI system call number from user space.

Absolute tosh, that really is.  Of course there's a way of figuring it
out.  Tools such as strace have been doing it for _years_ and have been
doing it extremely well.

Sure, some other thread may stamp over the syscall after you've entered
the kernel, but that's a bug in any case - if programs are doing that
then they're racy, and can't predict what system call they're going to
invoke.  So really that kind of race is not one to be concerned about.

And, in any case, using what's already there in syscall_trace() already
gives you a way to store and manipulate the syscall number.  So really
there's no argument over obtaining the syscall number from OABI at all.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] ARM: Wire up HAVE_SYSCALL_TRACEPOINTS
  2012-02-03  0:32           ` Russell King - ARM Linux
@ 2012-02-03  1:58             ` Indan Zupancic
  0 siblings, 0 replies; 11+ messages in thread
From: Indan Zupancic @ 2012-02-03  1:58 UTC (permalink / raw)
  To: Russell King - ARM Linux
  Cc: Takuo Koguchi, linux-kernel, masami.hiramatsu.pt, rostedt,
	fweisbec, mingo, jbaron, yrl.pp-manager.tt, mcgrathr

On Fri, February 3, 2012 01:32, Russell King - ARM Linux wrote:
> On Fri, Feb 03, 2012 at 12:38:58AM +0100, Indan Zupancic wrote:
>> On Thu, February 2, 2012 12:10, Russell King - ARM Linux wrote:
>> > On Thu, Feb 02, 2012 at 12:00:30PM +0100, Indan Zupancic wrote:
>> >> On Thu, February 2, 2012 10:21, Takuo Koguchi wrote:
>> >> > Right.  As Russel King suggested, this patch depends on those configs
>> >> > until very large NR_syscalls is properly handled by ftrace.
>> >>
>> >> It has nothing to do with large NR_syscalls. Supporting OABI is hard,
>> >
>> > That's rubbish if you're doing things correctly, where correctly is
>> > defined as 'not assuming that the syscall number is in r7, but reading
>> > it from the thread_info->syscall member.
>>
>> It was my impression that thread_info->syscall is only set in the ptrace
>> path.
>
> Well, as ptrace is the only syscall tracing we have at the moment in
> the kernel, then that's how its done.

Fair enough.

> What we don't have there for ptrace is a method to read that, so
> tools such as strace have had to fiddle about to discover the syscall
> number.  That's something I have had a patch for some time to 'fix'
> (a PTRACE_GET_SYSCALL to complement PTRACE_SET_SYSCALL) but haven't
> had the motivation to try to fix that.
>
>> Of course this can be changed, but it's tricky to do without adding
>> instructions to the syscall entry path. One way would be to have a
>> flag somewhere saying whether r7 or thread_info->syscall should be
>> used, and also set thread_info->syscall for OABI calls. That at least
>> won't slow down the EABI path.
>
> Why would you need to change the entry path?  We already have a hook
> out of the syscall path for doing tracing (via syscall_trace()) but
> the fact that it sits in ptrace.c isn't an argument to create something
> new.

Will Drewry has a seccomp BPF syscall filtering patch which needs syscall.h,
and then there's /proc/$PID/syscall, coredumps and ftrace that need a way
to get the syscall number. So yes, right now it's just ptrace, but a few
features would benefit from having a way to know the current syscall number
outside of the ptrace path.

>> > Notice how the EABI case is a lot more complicated by the alignment
>> > rules than the OABI - not only do you need something like the above
>>
>> Only when you go through the args sequentially like that.
>
> If you don't go through the args sequentially, then your only way of
> deciding EABI args is via a table which describes the location of each
> argument in the register set.

But you need something like that anyway, even if you do go sequentially
through the args. If you do anything with the args, you need to know what
they mean, and that is system call specific. If you store the meaning of
an arg then you automatically know its location. Having 64-bit args on a
32-bit system isn't something you can handle automatically and generally
without lookup tables or some other info.

syscall.h is the wrong place to handle 64-bit args specially, it should
just expose the raw syscall interface without interpretation.

>> If only EABI is supported everything is simple, because everyone knows
>> what to expect. If OABI is also supported then more changes are needed:
>> The above, but also some way to tell ptrace and other users if it was
>> an EABI or OABI system call. And currently with ptrace there is no race
>> free way of figuring out the OABI system call number from user space.
>
> Absolute tosh, that really is.  Of course there's a way of figuring it
> out.  Tools such as strace have been doing it for _years_ and have been
> doing it extremely well.

I said "race free". Reading processs memory isn't race free. Other than
that detail, yes it can be done.

> Sure, some other thread may stamp over the syscall after you've entered
> the kernel, but that's a bug in any case - if programs are doing that
> then they're racy, and can't predict what system call they're going to
> invoke.  So really that kind of race is not one to be concerned about.

Except if you use ptrace for anything security sensitive, like process
jailing.

> And, in any case, using what's already there in syscall_trace() already
> gives you a way to store and manipulate the syscall number.  So really
> there's no argument over obtaining the syscall number from OABI at all.

That would at least fix the race: Just read the info from user memory, and
afterwards set the system call to the expected one. Only problem left is
assuring that it is an EABI call or an OABI call. System call arguments
are sometimes different, if they are different in a security sensitive way
then there's still a problem. But looking at the affected system calls it
seems it's fine. So you're right that no extra OABI info needs to be passed
on. For the rare security sensitive software where it would matter it can
always not support CONFIG_OABI_COMPAT kernels.

Greetings,

Indan



^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2012-02-03  1:58 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-12-01 11:01 [PATCH] ARM: Wire up HAVE_SYSCALL_TRACEPOINTS takuo.koguchi.sw
2012-02-01  1:47 ` Indan Zupancic
2012-02-01  2:09   ` Steven Rostedt
2012-02-02  9:21   ` Takuo Koguchi
2012-02-02 11:00     ` Indan Zupancic
2012-02-02 11:10       ` Russell King - ARM Linux
2012-02-02 23:38         ` Indan Zupancic
2012-02-02 23:41           ` Roland McGrath
2012-02-03  0:32           ` Russell King - ARM Linux
2012-02-03  1:58             ` Indan Zupancic
2012-02-01  9:46 ` Russell King - ARM Linux

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).