* [PATCH v2] m68k: save extra registers on more syscall entry points
@ 2021-06-18 1:27 Michael Schmitz
2021-06-18 17:17 ` Linus Torvalds
2021-06-18 18:39 ` Eric W. Biederman
0 siblings, 2 replies; 12+ messages in thread
From: Michael Schmitz @ 2021-06-18 1:27 UTC (permalink / raw)
To: geert, linux-arch, linux-m68k; +Cc: ebiederm, torvalds, schwab, Michael Schmitz
Multiple syscalls are liable to PTRACE_EVENT_* tracing and thus
require full user context saved on the kernel stack. We only
save those registers not preserved by C code currently.
do_exit() calls ptrace_stop() which may require access to all
saved registers. Add code to save additional registers in the
switch_stack struct for exit and exit_group syscalls (similar
to what is already done for fork, vfork and clone3). According
to Eric's analysis, execve and execveat can be traced as well,
so have been given the same treatment.
I'm not sure what to do about io_uring_setup - added code to
save full context on entry, and some more code to save/restore
additional registers on the switch stack around the kworker thread
call in ret_from_kernel_thread, but this may well be redundant.
I'd need specific test cases to exercise io_uring_setup in
particular, to see whether stack offsets for pt_regs and the
switch stack have been messed up.
Boot tested on ARAnym - earlier version including io_uring entry
save also tested on real hardware unter heavy IO load.
CC: Eric W. Biederman <ebiederm@xmission.com>
CC: Linus Torvalds <torvalds@linux-foundation.org>
CC: Andreas Schwab <schwab@linux-m68k.org>
Signed-off-by: Michael Schmitz <schmitzmic@gmail.com>
--
Changes from v1:
- added exec, execve and io_uring_setup syscalls
- save extra registers around kworker thread calls
---
arch/m68k/kernel/entry.S | 41 ++++++++++++++++++++++++++++++++++-
arch/m68k/kernel/process.c | 39 +++++++++++++++++++++++++++++++++
arch/m68k/kernel/syscalls/syscall.tbl | 10 ++++-----
3 files changed, 84 insertions(+), 6 deletions(-)
diff --git a/arch/m68k/kernel/entry.S b/arch/m68k/kernel/entry.S
index 9dd76fb..02bf2a1 100644
--- a/arch/m68k/kernel/entry.S
+++ b/arch/m68k/kernel/entry.S
@@ -76,6 +76,41 @@ ENTRY(__sys_clone3)
lea %sp@(28),%sp
rts
+ENTRY(__sys_exit)
+ SAVE_SWITCH_STACK
+ pea %sp@(SWITCH_STACK_SIZE)
+ jbsr m68k_exit
+ lea %sp@(28),%sp
+ rts
+
+ENTRY(__sys_exit_group)
+ SAVE_SWITCH_STACK
+ pea %sp@(SWITCH_STACK_SIZE)
+ jbsr m68k_exit_group
+ lea %sp@(28),%sp
+ rts
+
+ENTRY(__sys_execve)
+ SAVE_SWITCH_STACK
+ pea %sp@(SWITCH_STACK_SIZE)
+ jbsr m68k_execve
+ lea %sp@(28),%sp
+ rts
+
+ENTRY(__sys_execveat)
+ SAVE_SWITCH_STACK
+ pea %sp@(SWITCH_STACK_SIZE)
+ jbsr m68k_execveat
+ lea %sp@(28),%sp
+ rts
+
+ENTRY(__sys_io_uring_setup)
+ SAVE_SWITCH_STACK
+ pea %sp@(SWITCH_STACK_SIZE)
+ jbsr m68k_io_uring_setup
+ lea %sp@(28),%sp
+ rts
+
ENTRY(sys_sigreturn)
SAVE_SWITCH_STACK
movel %sp,%sp@- | switch_stack pointer
@@ -123,9 +158,13 @@ ENTRY(ret_from_kernel_thread)
| a3 contains the kernel thread payload, d7 - its argument
movel %d1,%sp@-
jsr schedule_tail
- movel %d7,(%sp)
+ addql #4,%sp
+ | kernel threads can be traced!
+ SAVE_SWITCH_STACK
+ movel %d7,%sp@-
jsr %a3@
addql #4,%sp
+ RESTORE_SWITCH_STACK
jra ret_from_exception
#if defined(CONFIG_COLDFIRE) || !defined(CONFIG_MMU)
diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c
index da83cc8..298ac99 100644
--- a/arch/m68k/kernel/process.c
+++ b/arch/m68k/kernel/process.c
@@ -138,6 +138,45 @@ asmlinkage int m68k_clone3(struct pt_regs *regs)
return sys_clone3((struct clone_args __user *)regs->d1, regs->d2);
}
+/*
+ * Because extra registers are saved on the stack after the sys_exit()
+ * arguments, this C wrapper extracts them from pt_regs * and then calls the
+ * generic sys_exit() implementation.
+ */
+asmlinkage int m68k_exit(struct pt_regs *regs)
+{
+ return sys_exit(regs->d1);
+}
+
+/* Same for sys_exit_group ... */
+asmlinkage int m68k_exit_group(struct pt_regs *regs)
+{
+ return sys_exit_group(regs->d1);
+}
+
+/* Same for sys_exit_group ... */
+asmlinkage int m68k_execve(struct pt_regs *regs)
+{
+ return sys_execve((const char __user *)regs->d1,
+ (const char __user *const __user*)regs->d2,
+ (const char __user *const __user*)regs->d3);
+}
+
+/* Same for sys_exit_group ... */
+asmlinkage int m68k_execveat(struct pt_regs *regs)
+{
+ return sys_execveat(regs->d1, (const char __user *)regs->d2,
+ (const char __user *const __user*)regs->d3,
+ (const char __user *const __user*)regs->d4,
+ regs->d5);
+}
+
+/* and for sys_io_uring_create */
+asmlinkage int m68k_io_uring_setup(struct pt_regs *regs)
+{
+ return sys_io_uring_setup(regs->d1,(struct io_uring_params __user *)regs->d2);
+}
+
int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg,
struct task_struct *p, unsigned long tls)
{
diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl
index 0dd019d..4a1ba1d 100644
--- a/arch/m68k/kernel/syscalls/syscall.tbl
+++ b/arch/m68k/kernel/syscalls/syscall.tbl
@@ -8,7 +8,7 @@
# The <abi> is always "common" for this file
#
0 common restart_syscall sys_restart_syscall
-1 common exit sys_exit
+1 common exit __sys_exit
2 common fork __sys_fork
3 common read sys_read
4 common write sys_write
@@ -18,7 +18,7 @@
8 common creat sys_creat
9 common link sys_link
10 common unlink sys_unlink
-11 common execve sys_execve
+11 common execve __sys_execve
12 common chdir sys_chdir
13 common time sys_time32
14 common mknod sys_mknod
@@ -254,7 +254,7 @@
244 common io_submit sys_io_submit
245 common io_cancel sys_io_cancel
246 common fadvise64 sys_fadvise64
-247 common exit_group sys_exit_group
+247 common exit_group __sys_exit_group
248 common lookup_dcookie sys_lookup_dcookie
249 common epoll_create sys_epoll_create
250 common epoll_ctl sys_epoll_ctl
@@ -362,7 +362,7 @@
352 common getrandom sys_getrandom
353 common memfd_create sys_memfd_create
354 common bpf sys_bpf
-355 common execveat sys_execveat
+355 common execveat __sys_execveat
356 common socket sys_socket
357 common socketpair sys_socketpair
358 common bind sys_bind
@@ -424,7 +424,7 @@
422 common futex_time64 sys_futex
423 common sched_rr_get_interval_time64 sys_sched_rr_get_interval
424 common pidfd_send_signal sys_pidfd_send_signal
-425 common io_uring_setup sys_io_uring_setup
+425 common io_uring_setup __sys_io_uring_setup
426 common io_uring_enter sys_io_uring_enter
427 common io_uring_register sys_io_uring_register
428 common open_tree sys_open_tree
--
2.7.4
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH v2] m68k: save extra registers on more syscall entry points
2021-06-18 1:27 [PATCH v2] m68k: save extra registers on more syscall entry points Michael Schmitz
@ 2021-06-18 17:17 ` Linus Torvalds
2021-06-18 22:34 ` Michael Schmitz
2021-06-18 18:39 ` Eric W. Biederman
1 sibling, 1 reply; 12+ messages in thread
From: Linus Torvalds @ 2021-06-18 17:17 UTC (permalink / raw)
To: Michael Schmitz
Cc: Geert Uytterhoeven, linux-arch, linux-m68k, Eric W. Biederman,
Andreas Schwab
On Thu, Jun 17, 2021 at 6:27 PM Michael Schmitz <schmitzmic@gmail.com> wrote:
>
> I'd need specific test cases to exercise io_uring_setup in
> particular, to see whether stack offsets for pt_regs and the
> switch stack have been messed up.
I don't think doing this for io_uring_setup() will help any - the
problem is not in that system call thread itself, it's purely in the
kernel thread that it then starts.
And the fact that io_uring_setup() has the full stack frame won't then
help that kernel thread, for exactly the same reason that was true on
alpha: copy_thread() will actually _create_ the full stack, but when
we switch to it (through switch_to() -> resume()), the resume code in
arch/m68k/kernel/entry.S will switch to that stack, and then do
RESTORE_SWITCH_STACK which will consume it again.
So I think m68k should do the same thing as Eric's patch for alpha: do
the full stack for exit and exit_group, and for kernel thread creation
- or at least PF_IO_WORKER), do an extra stack frame on the kernel
stack, so that even after resume() we'll still have another copy of
the frame.
The alternative would be to do what x86 does: see __switch_to_asm().
Instead of doing that normal kernel entry/exit stack (with
SAVE_SWITCH_STACK and RESTORE_SWITCH_STACK), x86 has it's own very
special "only for task switching" stack frame thing, and leaves the
pt_regs etc entirely alone.
Of course, that "only for task switching" is _kind_of_ what the whole
SAVE_SWITCH_STACK is for - it's part of the name after all - but the
difference is that on alpha and m68k, it's also (and primarily) the
"full state" stack frame, used not just for task switching, but for
signal handling state and for ptrace too.
So in theory, it would be good to split this up:
(a) have the signal handling and ptrace stack be one thing (maybe
rename the "SWITCH" part of the operations to something else, like
"EXTRA" or "SIGNAL" or whatever)
(b) make a separate "for task switching only" stack frame, which is
used by that switch_to() -> resume() sequence, and that copy_thread()
has a "struct inactive_task_frame" thing for..
That way, the pt_regs/extra_regs stack frame that copy_thread()
creates wouldn't then be eaten up by the task switch.
But while that sounds like the right thing to do, it would be a rather
bigger change. I'm not entirely sure it's worth it.
Eric, comments?
Linus
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v2] m68k: save extra registers on more syscall entry points
2021-06-18 1:27 [PATCH v2] m68k: save extra registers on more syscall entry points Michael Schmitz
2021-06-18 17:17 ` Linus Torvalds
@ 2021-06-18 18:39 ` Eric W. Biederman
2021-06-18 19:06 ` Michael Schmitz
1 sibling, 1 reply; 12+ messages in thread
From: Eric W. Biederman @ 2021-06-18 18:39 UTC (permalink / raw)
To: Michael Schmitz; +Cc: geert, linux-arch, linux-m68k, torvalds, schwab
Michael Schmitz <schmitzmic@gmail.com> writes:
> Multiple syscalls are liable to PTRACE_EVENT_* tracing and thus
> require full user context saved on the kernel stack. We only
> save those registers not preserved by C code currently.
>
> do_exit() calls ptrace_stop() which may require access to all
> saved registers. Add code to save additional registers in the
> switch_stack struct for exit and exit_group syscalls (similar
> to what is already done for fork, vfork and clone3). According
> to Eric's analysis, execve and execveat can be traced as well,
> so have been given the same treatment.
>
> I'm not sure what to do about io_uring_setup - added code to
> save full context on entry, and some more code to save/restore
> additional registers on the switch stack around the kworker thread
> call in ret_from_kernel_thread, but this may well be redundant.
>
> I'd need specific test cases to exercise io_uring_setup in
> particular, to see whether stack offsets for pt_regs and the
> switch stack have been messed up.
>
> Boot tested on ARAnym - earlier version including io_uring entry
> save also tested on real hardware unter heavy IO load.
Are the registers that SAVE_SWITCH_STACK saves on m68k caller saved
registers?
The fact that the registers are saved somewhere on the stack on alpha is
what forces the registers to be saved before calling exit and exec.
Eric
> CC: Eric W. Biederman <ebiederm@xmission.com>
> CC: Linus Torvalds <torvalds@linux-foundation.org>
> CC: Andreas Schwab <schwab@linux-m68k.org>
> Signed-off-by: Michael Schmitz <schmitzmic@gmail.com>
>
> --
> Changes from v1:
>
> - added exec, execve and io_uring_setup syscalls
> - save extra registers around kworker thread calls
> ---
> arch/m68k/kernel/entry.S | 41 ++++++++++++++++++++++++++++++++++-
> arch/m68k/kernel/process.c | 39 +++++++++++++++++++++++++++++++++
> arch/m68k/kernel/syscalls/syscall.tbl | 10 ++++-----
> 3 files changed, 84 insertions(+), 6 deletions(-)
>
> diff --git a/arch/m68k/kernel/entry.S b/arch/m68k/kernel/entry.S
> index 9dd76fb..02bf2a1 100644
> --- a/arch/m68k/kernel/entry.S
> +++ b/arch/m68k/kernel/entry.S
> @@ -76,6 +76,41 @@ ENTRY(__sys_clone3)
> lea %sp@(28),%sp
> rts
>
> +ENTRY(__sys_exit)
> + SAVE_SWITCH_STACK
> + pea %sp@(SWITCH_STACK_SIZE)
> + jbsr m68k_exit
> + lea %sp@(28),%sp
> + rts
> +
> +ENTRY(__sys_exit_group)
> + SAVE_SWITCH_STACK
> + pea %sp@(SWITCH_STACK_SIZE)
> + jbsr m68k_exit_group
> + lea %sp@(28),%sp
> + rts
> +
> +ENTRY(__sys_execve)
> + SAVE_SWITCH_STACK
> + pea %sp@(SWITCH_STACK_SIZE)
> + jbsr m68k_execve
> + lea %sp@(28),%sp
> + rts
> +
> +ENTRY(__sys_execveat)
> + SAVE_SWITCH_STACK
> + pea %sp@(SWITCH_STACK_SIZE)
> + jbsr m68k_execveat
> + lea %sp@(28),%sp
> + rts
> +
> +ENTRY(__sys_io_uring_setup)
> + SAVE_SWITCH_STACK
> + pea %sp@(SWITCH_STACK_SIZE)
> + jbsr m68k_io_uring_setup
> + lea %sp@(28),%sp
> + rts
> +
> ENTRY(sys_sigreturn)
> SAVE_SWITCH_STACK
> movel %sp,%sp@- | switch_stack pointer
> @@ -123,9 +158,13 @@ ENTRY(ret_from_kernel_thread)
> | a3 contains the kernel thread payload, d7 - its argument
> movel %d1,%sp@-
> jsr schedule_tail
> - movel %d7,(%sp)
> + addql #4,%sp
> + | kernel threads can be traced!
> + SAVE_SWITCH_STACK
> + movel %d7,%sp@-
> jsr %a3@
> addql #4,%sp
> + RESTORE_SWITCH_STACK
> jra ret_from_exception
>
> #if defined(CONFIG_COLDFIRE) || !defined(CONFIG_MMU)
> diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c
> index da83cc8..298ac99 100644
> --- a/arch/m68k/kernel/process.c
> +++ b/arch/m68k/kernel/process.c
> @@ -138,6 +138,45 @@ asmlinkage int m68k_clone3(struct pt_regs *regs)
> return sys_clone3((struct clone_args __user *)regs->d1, regs->d2);
> }
>
> +/*
> + * Because extra registers are saved on the stack after the sys_exit()
> + * arguments, this C wrapper extracts them from pt_regs * and then calls the
> + * generic sys_exit() implementation.
> + */
> +asmlinkage int m68k_exit(struct pt_regs *regs)
> +{
> + return sys_exit(regs->d1);
> +}
> +
> +/* Same for sys_exit_group ... */
> +asmlinkage int m68k_exit_group(struct pt_regs *regs)
> +{
> + return sys_exit_group(regs->d1);
> +}
> +
> +/* Same for sys_exit_group ... */
> +asmlinkage int m68k_execve(struct pt_regs *regs)
> +{
> + return sys_execve((const char __user *)regs->d1,
> + (const char __user *const __user*)regs->d2,
> + (const char __user *const __user*)regs->d3);
> +}
> +
> +/* Same for sys_exit_group ... */
> +asmlinkage int m68k_execveat(struct pt_regs *regs)
> +{
> + return sys_execveat(regs->d1, (const char __user *)regs->d2,
> + (const char __user *const __user*)regs->d3,
> + (const char __user *const __user*)regs->d4,
> + regs->d5);
> +}
> +
> +/* and for sys_io_uring_create */
> +asmlinkage int m68k_io_uring_setup(struct pt_regs *regs)
> +{
> + return sys_io_uring_setup(regs->d1,(struct io_uring_params __user *)regs->d2);
> +}
> +
> int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg,
> struct task_struct *p, unsigned long tls)
> {
> diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl
> index 0dd019d..4a1ba1d 100644
> --- a/arch/m68k/kernel/syscalls/syscall.tbl
> +++ b/arch/m68k/kernel/syscalls/syscall.tbl
> @@ -8,7 +8,7 @@
> # The <abi> is always "common" for this file
> #
> 0 common restart_syscall sys_restart_syscall
> -1 common exit sys_exit
> +1 common exit __sys_exit
> 2 common fork __sys_fork
> 3 common read sys_read
> 4 common write sys_write
> @@ -18,7 +18,7 @@
> 8 common creat sys_creat
> 9 common link sys_link
> 10 common unlink sys_unlink
> -11 common execve sys_execve
> +11 common execve __sys_execve
> 12 common chdir sys_chdir
> 13 common time sys_time32
> 14 common mknod sys_mknod
> @@ -254,7 +254,7 @@
> 244 common io_submit sys_io_submit
> 245 common io_cancel sys_io_cancel
> 246 common fadvise64 sys_fadvise64
> -247 common exit_group sys_exit_group
> +247 common exit_group __sys_exit_group
> 248 common lookup_dcookie sys_lookup_dcookie
> 249 common epoll_create sys_epoll_create
> 250 common epoll_ctl sys_epoll_ctl
> @@ -362,7 +362,7 @@
> 352 common getrandom sys_getrandom
> 353 common memfd_create sys_memfd_create
> 354 common bpf sys_bpf
> -355 common execveat sys_execveat
> +355 common execveat __sys_execveat
> 356 common socket sys_socket
> 357 common socketpair sys_socketpair
> 358 common bind sys_bind
> @@ -424,7 +424,7 @@
> 422 common futex_time64 sys_futex
> 423 common sched_rr_get_interval_time64 sys_sched_rr_get_interval
> 424 common pidfd_send_signal sys_pidfd_send_signal
> -425 common io_uring_setup sys_io_uring_setup
> +425 common io_uring_setup __sys_io_uring_setup
> 426 common io_uring_enter sys_io_uring_enter
> 427 common io_uring_register sys_io_uring_register
> 428 common open_tree sys_open_tree
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v2] m68k: save extra registers on more syscall entry points
2021-06-18 18:39 ` Eric W. Biederman
@ 2021-06-18 19:06 ` Michael Schmitz
0 siblings, 0 replies; 12+ messages in thread
From: Michael Schmitz @ 2021-06-18 19:06 UTC (permalink / raw)
To: Eric W. Biederman; +Cc: geert, linux-arch, linux-m68k, torvalds, schwab
Hi Eric,
Am 19.06.2021 um 06:39 schrieb Eric W. Biederman:
> Michael Schmitz <schmitzmic@gmail.com> writes:
>
>> Multiple syscalls are liable to PTRACE_EVENT_* tracing and thus
>> require full user context saved on the kernel stack. We only
>> save those registers not preserved by C code currently.
>>
>> do_exit() calls ptrace_stop() which may require access to all
>> saved registers. Add code to save additional registers in the
>> switch_stack struct for exit and exit_group syscalls (similar
>> to what is already done for fork, vfork and clone3). According
>> to Eric's analysis, execve and execveat can be traced as well,
>> so have been given the same treatment.
>>
>> I'm not sure what to do about io_uring_setup - added code to
>> save full context on entry, and some more code to save/restore
>> additional registers on the switch stack around the kworker thread
>> call in ret_from_kernel_thread, but this may well be redundant.
>>
>> I'd need specific test cases to exercise io_uring_setup in
>> particular, to see whether stack offsets for pt_regs and the
>> switch stack have been messed up.
>>
>> Boot tested on ARAnym - earlier version including io_uring entry
>> save also tested on real hardware unter heavy IO load.
>
>
> Are the registers that SAVE_SWITCH_STACK saves on m68k caller saved
> registers?
Yes, see comments in arch/m68k/include/asm/entry.h. From my
understanding of arch/alpha/kernel/entry.S, that's the same on alpha.
Cheers,
Michael
> The fact that the registers are saved somewhere on the stack on alpha is
> what forces the registers to be saved before calling exit and exec.
>
> Eric
>
>
>> CC: Eric W. Biederman <ebiederm@xmission.com>
>> CC: Linus Torvalds <torvalds@linux-foundation.org>
>> CC: Andreas Schwab <schwab@linux-m68k.org>
>> Signed-off-by: Michael Schmitz <schmitzmic@gmail.com>
>>
>> --
>> Changes from v1:
>>
>> - added exec, execve and io_uring_setup syscalls
>> - save extra registers around kworker thread calls
>> ---
>> arch/m68k/kernel/entry.S | 41 ++++++++++++++++++++++++++++++++++-
>> arch/m68k/kernel/process.c | 39 +++++++++++++++++++++++++++++++++
>> arch/m68k/kernel/syscalls/syscall.tbl | 10 ++++-----
>> 3 files changed, 84 insertions(+), 6 deletions(-)
>>
>> diff --git a/arch/m68k/kernel/entry.S b/arch/m68k/kernel/entry.S
>> index 9dd76fb..02bf2a1 100644
>> --- a/arch/m68k/kernel/entry.S
>> +++ b/arch/m68k/kernel/entry.S
>> @@ -76,6 +76,41 @@ ENTRY(__sys_clone3)
>> lea %sp@(28),%sp
>> rts
>>
>> +ENTRY(__sys_exit)
>> + SAVE_SWITCH_STACK
>> + pea %sp@(SWITCH_STACK_SIZE)
>> + jbsr m68k_exit
>> + lea %sp@(28),%sp
>> + rts
>> +
>> +ENTRY(__sys_exit_group)
>> + SAVE_SWITCH_STACK
>> + pea %sp@(SWITCH_STACK_SIZE)
>> + jbsr m68k_exit_group
>> + lea %sp@(28),%sp
>> + rts
>> +
>> +ENTRY(__sys_execve)
>> + SAVE_SWITCH_STACK
>> + pea %sp@(SWITCH_STACK_SIZE)
>> + jbsr m68k_execve
>> + lea %sp@(28),%sp
>> + rts
>> +
>> +ENTRY(__sys_execveat)
>> + SAVE_SWITCH_STACK
>> + pea %sp@(SWITCH_STACK_SIZE)
>> + jbsr m68k_execveat
>> + lea %sp@(28),%sp
>> + rts
>> +
>> +ENTRY(__sys_io_uring_setup)
>> + SAVE_SWITCH_STACK
>> + pea %sp@(SWITCH_STACK_SIZE)
>> + jbsr m68k_io_uring_setup
>> + lea %sp@(28),%sp
>> + rts
>> +
>> ENTRY(sys_sigreturn)
>> SAVE_SWITCH_STACK
>> movel %sp,%sp@- | switch_stack pointer
>> @@ -123,9 +158,13 @@ ENTRY(ret_from_kernel_thread)
>> | a3 contains the kernel thread payload, d7 - its argument
>> movel %d1,%sp@-
>> jsr schedule_tail
>> - movel %d7,(%sp)
>> + addql #4,%sp
>> + | kernel threads can be traced!
>> + SAVE_SWITCH_STACK
>> + movel %d7,%sp@-
>> jsr %a3@
>> addql #4,%sp
>> + RESTORE_SWITCH_STACK
>> jra ret_from_exception
>>
>> #if defined(CONFIG_COLDFIRE) || !defined(CONFIG_MMU)
>> diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c
>> index da83cc8..298ac99 100644
>> --- a/arch/m68k/kernel/process.c
>> +++ b/arch/m68k/kernel/process.c
>> @@ -138,6 +138,45 @@ asmlinkage int m68k_clone3(struct pt_regs *regs)
>> return sys_clone3((struct clone_args __user *)regs->d1, regs->d2);
>> }
>>
>> +/*
>> + * Because extra registers are saved on the stack after the sys_exit()
>> + * arguments, this C wrapper extracts them from pt_regs * and then calls the
>> + * generic sys_exit() implementation.
>> + */
>> +asmlinkage int m68k_exit(struct pt_regs *regs)
>> +{
>> + return sys_exit(regs->d1);
>> +}
>> +
>> +/* Same for sys_exit_group ... */
>> +asmlinkage int m68k_exit_group(struct pt_regs *regs)
>> +{
>> + return sys_exit_group(regs->d1);
>> +}
>> +
>> +/* Same for sys_exit_group ... */
>> +asmlinkage int m68k_execve(struct pt_regs *regs)
>> +{
>> + return sys_execve((const char __user *)regs->d1,
>> + (const char __user *const __user*)regs->d2,
>> + (const char __user *const __user*)regs->d3);
>> +}
>> +
>> +/* Same for sys_exit_group ... */
>> +asmlinkage int m68k_execveat(struct pt_regs *regs)
>> +{
>> + return sys_execveat(regs->d1, (const char __user *)regs->d2,
>> + (const char __user *const __user*)regs->d3,
>> + (const char __user *const __user*)regs->d4,
>> + regs->d5);
>> +}
>> +
>> +/* and for sys_io_uring_create */
>> +asmlinkage int m68k_io_uring_setup(struct pt_regs *regs)
>> +{
>> + return sys_io_uring_setup(regs->d1,(struct io_uring_params __user *)regs->d2);
>> +}
>> +
>> int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg,
>> struct task_struct *p, unsigned long tls)
>> {
>> diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl
>> index 0dd019d..4a1ba1d 100644
>> --- a/arch/m68k/kernel/syscalls/syscall.tbl
>> +++ b/arch/m68k/kernel/syscalls/syscall.tbl
>> @@ -8,7 +8,7 @@
>> # The <abi> is always "common" for this file
>> #
>> 0 common restart_syscall sys_restart_syscall
>> -1 common exit sys_exit
>> +1 common exit __sys_exit
>> 2 common fork __sys_fork
>> 3 common read sys_read
>> 4 common write sys_write
>> @@ -18,7 +18,7 @@
>> 8 common creat sys_creat
>> 9 common link sys_link
>> 10 common unlink sys_unlink
>> -11 common execve sys_execve
>> +11 common execve __sys_execve
>> 12 common chdir sys_chdir
>> 13 common time sys_time32
>> 14 common mknod sys_mknod
>> @@ -254,7 +254,7 @@
>> 244 common io_submit sys_io_submit
>> 245 common io_cancel sys_io_cancel
>> 246 common fadvise64 sys_fadvise64
>> -247 common exit_group sys_exit_group
>> +247 common exit_group __sys_exit_group
>> 248 common lookup_dcookie sys_lookup_dcookie
>> 249 common epoll_create sys_epoll_create
>> 250 common epoll_ctl sys_epoll_ctl
>> @@ -362,7 +362,7 @@
>> 352 common getrandom sys_getrandom
>> 353 common memfd_create sys_memfd_create
>> 354 common bpf sys_bpf
>> -355 common execveat sys_execveat
>> +355 common execveat __sys_execveat
>> 356 common socket sys_socket
>> 357 common socketpair sys_socketpair
>> 358 common bind sys_bind
>> @@ -424,7 +424,7 @@
>> 422 common futex_time64 sys_futex
>> 423 common sched_rr_get_interval_time64 sys_sched_rr_get_interval
>> 424 common pidfd_send_signal sys_pidfd_send_signal
>> -425 common io_uring_setup sys_io_uring_setup
>> +425 common io_uring_setup __sys_io_uring_setup
>> 426 common io_uring_enter sys_io_uring_enter
>> 427 common io_uring_register sys_io_uring_register
>> 428 common open_tree sys_open_tree
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v2] m68k: save extra registers on more syscall entry points
2021-06-18 17:17 ` Linus Torvalds
@ 2021-06-18 22:34 ` Michael Schmitz
2021-06-18 23:38 ` Linus Torvalds
0 siblings, 1 reply; 12+ messages in thread
From: Michael Schmitz @ 2021-06-18 22:34 UTC (permalink / raw)
To: Linus Torvalds
Cc: Geert Uytterhoeven, linux-arch, linux-m68k, Eric W. Biederman,
Andreas Schwab
Hi Linus,
Am 19.06.2021 um 05:17 schrieb Linus Torvalds:
> On Thu, Jun 17, 2021 at 6:27 PM Michael Schmitz <schmitzmic@gmail.com> wrote:
>>
>> I'd need specific test cases to exercise io_uring_setup in
>> particular, to see whether stack offsets for pt_regs and the
>> switch stack have been messed up.
>
> I don't think doing this for io_uring_setup() will help any - the
> problem is not in that system call thread itself, it's purely in the
> kernel thread that it then starts.
>
> And the fact that io_uring_setup() has the full stack frame won't then
> help that kernel thread, for exactly the same reason that was true on
> alpha: copy_thread() will actually _create_ the full stack, but when
> we switch to it (through switch_to() -> resume()), the resume code in
> arch/m68k/kernel/entry.S will switch to that stack, and then do
> RESTORE_SWITCH_STACK which will consume it again.
Now I see ... I had glossed over the point in resume() where the stack
pointer was switched to the new task.
> So I think m68k should do the same thing as Eric's patch for alpha: do
> the full stack for exit and exit_group, and for kernel thread creation
> - or at least PF_IO_WORKER), do an extra stack frame on the kernel
> stack, so that even after resume() we'll still have another copy of
> the frame.
Is your patch to copy_thread() to add the extra stack frame still needed?
(Eric added switch stack save/restore around the worker thread call in
ret_from_exception, which pushes back the missing stack contents as far
as I understand ... )
There seems to be no expectation for kernel worker threads to have valid
saved user context, just space on the stack for the tracer to read. I'll
drop saving full context for io_uring_setup in v3.
> The alternative would be to do what x86 does: see __switch_to_asm().
> Instead of doing that normal kernel entry/exit stack (with
> SAVE_SWITCH_STACK and RESTORE_SWITCH_STACK), x86 has it's own very
> special "only for task switching" stack frame thing, and leaves the
> pt_regs etc entirely alone.
>
> Of course, that "only for task switching" is _kind_of_ what the whole
> SAVE_SWITCH_STACK is for - it's part of the name after all - but the
> difference is that on alpha and m68k, it's also (and primarily) the
> "full state" stack frame, used not just for task switching, but for
> signal handling state and for ptrace too.
>
> So in theory, it would be good to split this up:
>
> (a) have the signal handling and ptrace stack be one thing (maybe
> rename the "SWITCH" part of the operations to something else, like
> "EXTRA" or "SIGNAL" or whatever)
We can rename the macros, but we also expose struct switch_stack in
uapi/asm/ptrace.h - hard to rename that one.
> (b) make a separate "for task switching only" stack frame, which is
> used by that switch_to() -> resume() sequence, and that copy_thread()
> has a "struct inactive_task_frame" thing for..
That's more than a bit beyond my m68k assembly level, sorry. Maybe
Andreas can help if we decide to go that way?
Cheers,
Michael
>
> That way, the pt_regs/extra_regs stack frame that copy_thread()
> creates wouldn't then be eaten up by the task switch.
>
> But while that sounds like the right thing to do, it would be a rather
> bigger change. I'm not entirely sure it's worth it.
>
> Eric, comments?
>
> Linus
>
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v2] m68k: save extra registers on more syscall entry points
2021-06-18 22:34 ` Michael Schmitz
@ 2021-06-18 23:38 ` Linus Torvalds
2021-06-18 23:59 ` Michael Schmitz
2021-06-19 1:32 ` Michael Schmitz
0 siblings, 2 replies; 12+ messages in thread
From: Linus Torvalds @ 2021-06-18 23:38 UTC (permalink / raw)
To: Michael Schmitz
Cc: Geert Uytterhoeven, linux-arch, linux-m68k, Eric W. Biederman,
Andreas Schwab
[-- Attachment #1: Type: text/plain, Size: 654 bytes --]
On Fri, Jun 18, 2021 at 3:34 PM Michael Schmitz <schmitzmic@gmail.com> wrote:
>
> Is your patch to copy_thread() to add the extra stack frame still needed?
So it's been a long time since I did any m68k assembly, but I think
the m68k patch for the PF_IO_WORKER thread case should look something
like the attached.
Note: my only m68k work was ever on the 68008, and used the Motorola
syntax, not the odd Sun assembler syntax, so my m68k asm skills really
aren't good.
Put another way: I'd be surprised if the attached patch actually
works, but I think it's fairly close. I tried to add comments to
explain the code at least a bit.
Hmm?
Linus
[-- Attachment #2: patch.diff --]
[-- Type: text/x-patch, Size: 2100 bytes --]
arch/m68k/kernel/entry.S | 10 ++++++++++
arch/m68k/kernel/process.c | 14 +++++++++-----
2 files changed, 19 insertions(+), 5 deletions(-)
diff --git a/arch/m68k/kernel/entry.S b/arch/m68k/kernel/entry.S
index 9dd76fbb7c6b..499f14d79640 100644
--- a/arch/m68k/kernel/entry.S
+++ b/arch/m68k/kernel/entry.S
@@ -119,6 +119,15 @@ ENTRY(ret_from_fork)
addql #4,%sp
jra ret_from_exception
+ | A kernel thread will jump here directly from resume,
+ | with the stack containing the full register state
+ | (pt_regs and switch_stack).
+ |
+ | The argument will be in d7, and the kernel function
+ | to call will be in a3.
+ |
+ | If the kernel function returns, we want to return
+ | to user space - it has done a kernel_execve().
ENTRY(ret_from_kernel_thread)
| a3 contains the kernel thread payload, d7 - its argument
movel %d1,%sp@-
@@ -126,6 +135,7 @@ ENTRY(ret_from_kernel_thread)
movel %d7,(%sp)
jsr %a3@
addql #4,%sp
+ RESTORE_SWITCH_STACK
jra ret_from_exception
#if defined(CONFIG_COLDFIRE) || !defined(CONFIG_MMU)
diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c
index da83cc83e791..0705f14871a3 100644
--- a/arch/m68k/kernel/process.c
+++ b/arch/m68k/kernel/process.c
@@ -158,13 +158,17 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg,
p->thread.fs = get_fs().seg;
if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
- /* kernel thread */
- memset(frame, 0, sizeof(struct fork_frame));
+ struct switch_stack *kstp = &frame->sw - 1;
+
+ /* kernel thread - a kernel-side switch-stack and the full user fork_frame */
+ memset(kstp, 0, sizeof(struct switch_stack) + sizeof(struct fork_frame));
+
frame->regs.sr = PS_S;
- frame->sw.a3 = usp; /* function */
- frame->sw.d7 = arg;
- frame->sw.retpc = (unsigned long)ret_from_kernel_thread;
+ kstp->a3 = usp; /* function */
+ kstp->d7 = arg;
+ kstp->retpc = (unsigned long)ret_from_kernel_thread;
p->thread.usp = 0;
+ p->thread.ksp = (unsigned long)kstp;
return 0;
}
memcpy(frame, container_of(current_pt_regs(), struct fork_frame, regs),
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH v2] m68k: save extra registers on more syscall entry points
2021-06-18 23:38 ` Linus Torvalds
@ 2021-06-18 23:59 ` Michael Schmitz
2021-06-19 1:32 ` Michael Schmitz
1 sibling, 0 replies; 12+ messages in thread
From: Michael Schmitz @ 2021-06-18 23:59 UTC (permalink / raw)
To: Linus Torvalds
Cc: Geert Uytterhoeven, linux-arch, linux-m68k, Eric W. Biederman,
Andreas Schwab
Hi Linus,
Am 19.06.2021 um 11:38 schrieb Linus Torvalds:
> On Fri, Jun 18, 2021 at 3:34 PM Michael Schmitz <schmitzmic@gmail.com> wrote:
>>
>> Is your patch to copy_thread() to add the extra stack frame still needed?
>
> So it's been a long time since I did any m68k assembly, but I think
> the m68k patch for the PF_IO_WORKER thread case should look something
> like the attached.
OK, that answers my question, thanks!
>
> Note: my only m68k work was ever on the 68008, and used the Motorola
> syntax, not the odd Sun assembler syntax, so my m68k asm skills really
> aren't good.
>
> Put another way: I'd be surprised if the attached patch actually
> works, but I think it's fairly close. I tried to add comments to
> explain the code at least a bit.
>
> Hmm?
I'll give it a spin (on the emulator).
Cheers,
Michael
>
> Linus
>
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v2] m68k: save extra registers on more syscall entry points
2021-06-18 23:38 ` Linus Torvalds
2021-06-18 23:59 ` Michael Schmitz
@ 2021-06-19 1:32 ` Michael Schmitz
2021-06-19 1:54 ` Linus Torvalds
1 sibling, 1 reply; 12+ messages in thread
From: Michael Schmitz @ 2021-06-19 1:32 UTC (permalink / raw)
To: Linus Torvalds
Cc: Geert Uytterhoeven, linux-arch, linux-m68k, Eric W. Biederman,
Andreas Schwab
Hi Linus,
Am 19.06.2021 um 11:38 schrieb Linus Torvalds:
> On Fri, Jun 18, 2021 at 3:34 PM Michael Schmitz <schmitzmic@gmail.com> wrote:
>>
>> Is your patch to copy_thread() to add the extra stack frame still needed?
>
> So it's been a long time since I did any m68k assembly, but I think
> the m68k patch for the PF_IO_WORKER thread case should look something
> like the attached.
>
> Note: my only m68k work was ever on the 68008, and used the Motorola
> syntax, not the odd Sun assembler syntax, so my m68k asm skills really
> aren't good.
>
> Put another way: I'd be surprised if the attached patch actually
> works, but I think it's fairly close. I tried to add comments to
> explain the code at least a bit.
That went well:
*** FORMAT ERROR *** FORMAT=0
Current process id is 1
BAD KERNEL TRAP: 00000000
Modules linked in:
PC: [<00002af0>] resume_userspace+0x14/0x16
SR: 2204 SP: (ptrval) a2: 00000000
d0: 00000000 d1: 00000000 d2: 00000000 d3: 00000000
d4: 00000000 d5: 00000000 a0: 00000000 a1: 00000000
Process init (pid: 1, task=(ptrval))
Frame format=0
Stack from 0081bffc:
19bc0000
Call Trace:
Code: 1029 0007 660c 4cdf 073e 201f 588f dfdf <4e73> 254f 03ec e308 660a
487a ffe0 60ff 002a f6ba 598f 48e7 031e 486f 001c 61ff
Disabling lock debugging due to kernel taint
Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
Looks like the zeroed frame was restored where we'd have expected an
actual save frame?
I'll next try and apply your solution to IO worker threads only ...
Cheers,
Michael
>
> Hmm?
>
> Linus
>
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v2] m68k: save extra registers on more syscall entry points
2021-06-19 1:32 ` Michael Schmitz
@ 2021-06-19 1:54 ` Linus Torvalds
2021-06-19 2:13 ` Linus Torvalds
2021-06-19 2:17 ` Michael Schmitz
0 siblings, 2 replies; 12+ messages in thread
From: Linus Torvalds @ 2021-06-19 1:54 UTC (permalink / raw)
To: Michael Schmitz
Cc: Geert Uytterhoeven, linux-arch, linux-m68k, Eric W. Biederman,
Andreas Schwab
On Fri, Jun 18, 2021 at 6:32 PM Michael Schmitz <schmitzmic@gmail.com> wrote:
>
> *** FORMAT ERROR *** FORMAT=0
> Current process id is 1
> BAD KERNEL TRAP: 00000000
> Modules linked in:
> PC: [<00002af0>] resume_userspace+0x14/0x16
> SR: 2204 SP: (ptrval) a2: 00000000
> d0: 00000000 d1: 00000000 d2: 00000000 d3: 00000000
> d4: 00000000 d5: 00000000 a0: 00000000 a1: 00000000
Yeah, so that's presumably the rte that causes an exception due to
garbage on the stack.
The registers being zero at that point is actually expected, so that's
not much of a hint. But yeah, clearly I got some stack initialization
offset or something wrong there, and I don't know modern m68k nearly
well enough to even guess where I screwed up.
Linus
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v2] m68k: save extra registers on more syscall entry points
2021-06-19 1:54 ` Linus Torvalds
@ 2021-06-19 2:13 ` Linus Torvalds
2021-06-19 2:52 ` Michael Schmitz
2021-06-19 2:17 ` Michael Schmitz
1 sibling, 1 reply; 12+ messages in thread
From: Linus Torvalds @ 2021-06-19 2:13 UTC (permalink / raw)
To: Michael Schmitz
Cc: Geert Uytterhoeven, linux-arch, linux-m68k, Eric W. Biederman,
Andreas Schwab
[-- Attachment #1: Type: text/plain, Size: 2068 bytes --]
On Fri, Jun 18, 2021 at 6:54 PM Linus Torvalds
<torvalds@linux-foundation.org> wrote:
>
> The registers being zero at that point is actually expected, so that's
> not much of a hint. But yeah, clearly I got some stack initialization
> offset or something wrong there, and I don't know modern m68k nearly
> well enough to even guess where I screwed up.
Oh. I think I might have an idea.
In that ret_from_kernel_thread code, after it returns from thge kernel
thread, I did
addql #4,%sp
RESTORE_SWITCH_STACK
jra ret_from_exception
where that "RESTORE_SWITCH_STACK" loads the user space registers from
the user-space switch stack.
BUT.
The switch stack also has that "retpc", and that one should just be popped.
So I think that code should read
addql #4,%sp
RESTORE_SWITCH_STACK
addql #4,%sp
jra ret_from_exception
because we want to restore the switch stack registers (they'll all be
0) but we then want to get rid of retpc too before we jump to
ret_from_exception, which will do the RESTORE_ALL.
(The first 'addql' is to remove the argument we've pushed on the stack
earlier in ret_from_kernel_thread, the second addql is to remove
retpc).
All the *normal* uses of RESTORE_SWITCH_STACK will just do "rts", but
that's because they were called from the shallower stack. The
ret_from_kernel_thread case is kind of special, because it had that
stack frame layout built up manually, rather than have a call to it.
In that sense, it's a bit more like the 'do_trace_entry/exit' code,
which builds that switch stack using
subql #4,%sp
SAVE_SWITCH_STACK
and then undoes it using that same
RESTORE_SWITCH_STACK
addql #4,%sp
sequence that I _should_ have done in ret_from_kernel_thread. (Also,
see ret_from_signal)
I've attached an updated patch just in case my incoherent ramblings
above didn't explain what I meant. It's the same as the previous
patch, it just has that one extra stack update there.
Does _this_ one perhaps work?
Linus
[-- Attachment #2: patch.diff --]
[-- Type: text/x-patch, Size: 2116 bytes --]
arch/m68k/kernel/entry.S | 11 +++++++++++
arch/m68k/kernel/process.c | 14 +++++++++-----
2 files changed, 20 insertions(+), 5 deletions(-)
diff --git a/arch/m68k/kernel/entry.S b/arch/m68k/kernel/entry.S
index 9dd76fbb7c6b..88fe0e1a3793 100644
--- a/arch/m68k/kernel/entry.S
+++ b/arch/m68k/kernel/entry.S
@@ -119,6 +119,15 @@ ENTRY(ret_from_fork)
addql #4,%sp
jra ret_from_exception
+ | A kernel thread will jump here directly from resume,
+ | with the stack containing the full register state
+ | (pt_regs and switch_stack).
+ |
+ | The argument will be in d7, and the kernel function
+ | to call will be in a3.
+ |
+ | If the kernel function returns, we want to return
+ | to user space - it has done a kernel_execve().
ENTRY(ret_from_kernel_thread)
| a3 contains the kernel thread payload, d7 - its argument
movel %d1,%sp@-
@@ -126,6 +135,8 @@ ENTRY(ret_from_kernel_thread)
movel %d7,(%sp)
jsr %a3@
addql #4,%sp
+ RESTORE_SWITCH_STACK
+ addql #4,%sp
jra ret_from_exception
#if defined(CONFIG_COLDFIRE) || !defined(CONFIG_MMU)
diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c
index da83cc83e791..0705f14871a3 100644
--- a/arch/m68k/kernel/process.c
+++ b/arch/m68k/kernel/process.c
@@ -158,13 +158,17 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg,
p->thread.fs = get_fs().seg;
if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
- /* kernel thread */
- memset(frame, 0, sizeof(struct fork_frame));
+ struct switch_stack *kstp = &frame->sw - 1;
+
+ /* kernel thread - a kernel-side switch-stack and the full user fork_frame */
+ memset(kstp, 0, sizeof(struct switch_stack) + sizeof(struct fork_frame));
+
frame->regs.sr = PS_S;
- frame->sw.a3 = usp; /* function */
- frame->sw.d7 = arg;
- frame->sw.retpc = (unsigned long)ret_from_kernel_thread;
+ kstp->a3 = usp; /* function */
+ kstp->d7 = arg;
+ kstp->retpc = (unsigned long)ret_from_kernel_thread;
p->thread.usp = 0;
+ p->thread.ksp = (unsigned long)kstp;
return 0;
}
memcpy(frame, container_of(current_pt_regs(), struct fork_frame, regs),
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH v2] m68k: save extra registers on more syscall entry points
2021-06-19 1:54 ` Linus Torvalds
2021-06-19 2:13 ` Linus Torvalds
@ 2021-06-19 2:17 ` Michael Schmitz
1 sibling, 0 replies; 12+ messages in thread
From: Michael Schmitz @ 2021-06-19 2:17 UTC (permalink / raw)
To: Linus Torvalds
Cc: Geert Uytterhoeven, linux-arch, linux-m68k, Eric W. Biederman,
Andreas Schwab
Hi Linus,
Am 19.06.2021 um 13:54 schrieb Linus Torvalds:
> On Fri, Jun 18, 2021 at 6:32 PM Michael Schmitz <schmitzmic@gmail.com> wrote:
>>
>> *** FORMAT ERROR *** FORMAT=0
>> Current process id is 1
>> BAD KERNEL TRAP: 00000000
>> Modules linked in:
>> PC: [<00002af0>] resume_userspace+0x14/0x16
>> SR: 2204 SP: (ptrval) a2: 00000000
>> d0: 00000000 d1: 00000000 d2: 00000000 d3: 00000000
>> d4: 00000000 d5: 00000000 a0: 00000000 a1: 00000000
>
> Yeah, so that's presumably the rte that causes an exception due to
> garbage on the stack.
>
> The registers being zero at that point is actually expected, so that's
> not much of a hint. But yeah, clearly I got some stack initialization
> offset or something wrong there, and I don't know modern m68k nearly
> well enough to even guess where I screwed up.
It might have been me screwing up - I hand applied the patch on top of
my last one and fat fingered one bit (forgot to remove the addql #4,sp@
I had added before the switch stack save).
With the patch correctly applied, I get this dump:
Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
CPU: 0 PID: 1 Comm: init Not tainted 5.13.0-rc1-atari-fpuemu-exitfix+ #1126
Stack from 0081be08:
0081be08 003363d8 003363d8 002aebaa 000000ff 00000001 0002fa64
00818a90
0081a000 0000000b 0081be70 00028610 0032df62 0000000b 0000000b
0002e0d2
0002fa64 00000001 001a978c 0000000b 0081bf30 0081daf8 0081bf44
00000000
00000000 00000000 0081ec40 00029458 0000000b 0081a007 00030504
0000000b
00000000 00000000 00000000 00000000 00818550 00000000 0081bf90
0081bf30
00000000 0081bf68 00030066 0081da30 000042c4 0081bf30 00000000
00000000
Call Trace: [<002aebaa>] panic+0xc0/0x282
[<0002fa64>] do_signal_stop+0x0/0x14a
[<00028610>] do_exit+0x152/0x6f4
[<0002e0d2>] recalc_sigpending+0x0/0x1e
[<0002fa64>] do_signal_stop+0x0/0x14a
[<001a978c>] memcpy+0x0/0x88
[<00029458>] do_group_exit+0x40/0x7e
[<00030504>] get_signal+0x22c/0x510
[<00030066>] force_sig_info_to_task+0x7e/0x8a
[<000042c4>] do_notify_resume+0x3c/0x484
[<000302b2>] force_sig_fault_to_task+0x30/0x3c
[<000302d2>] force_sig_fault+0x14/0x1a
[<00005eb8>] send_fault_sig+0x24/0x86
[<00002b14>] do_signal_return+0x10/0x1a
[<00007008>] atari_reset+0x90/0xbc
[<0000c000>] clr_mant+0x8/0x14
No registers dumped at all - no idea how that happened.
I'll try your latest patch next ... bear with me, got a 9-year old
chewing my ear off to entertain in between tests.
Cheers,
Michael
>
> Linus
>
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v2] m68k: save extra registers on more syscall entry points
2021-06-19 2:13 ` Linus Torvalds
@ 2021-06-19 2:52 ` Michael Schmitz
0 siblings, 0 replies; 12+ messages in thread
From: Michael Schmitz @ 2021-06-19 2:52 UTC (permalink / raw)
To: Linus Torvalds
Cc: Geert Uytterhoeven, linux-arch, linux-m68k, Eric W. Biederman,
Andreas Schwab
Hi Linus,
that one boots OK now, thanks (applied on top of mine but that should
not matter). I'll run a test on the actual hardware once the previous
one is done.
Cheers,
Michael
Am 19.06.2021 um 14:13 schrieb Linus Torvalds:
> On Fri, Jun 18, 2021 at 6:54 PM Linus Torvalds
> <torvalds@linux-foundation.org> wrote:
>>
>> The registers being zero at that point is actually expected, so that's
>> not much of a hint. But yeah, clearly I got some stack initialization
>> offset or something wrong there, and I don't know modern m68k nearly
>> well enough to even guess where I screwed up.
>
> Oh. I think I might have an idea.
>
> In that ret_from_kernel_thread code, after it returns from thge kernel
> thread, I did
>
> addql #4,%sp
> RESTORE_SWITCH_STACK
> jra ret_from_exception
>
> where that "RESTORE_SWITCH_STACK" loads the user space registers from
> the user-space switch stack.
>
> BUT.
>
> The switch stack also has that "retpc", and that one should just be popped.
>
> So I think that code should read
>
> addql #4,%sp
> RESTORE_SWITCH_STACK
> addql #4,%sp
> jra ret_from_exception
>
> because we want to restore the switch stack registers (they'll all be
> 0) but we then want to get rid of retpc too before we jump to
> ret_from_exception, which will do the RESTORE_ALL.
>
> (The first 'addql' is to remove the argument we've pushed on the stack
> earlier in ret_from_kernel_thread, the second addql is to remove
> retpc).
>
> All the *normal* uses of RESTORE_SWITCH_STACK will just do "rts", but
> that's because they were called from the shallower stack. The
> ret_from_kernel_thread case is kind of special, because it had that
> stack frame layout built up manually, rather than have a call to it.
>
> In that sense, it's a bit more like the 'do_trace_entry/exit' code,
> which builds that switch stack using
>
> subql #4,%sp
> SAVE_SWITCH_STACK
>
> and then undoes it using that same
>
> RESTORE_SWITCH_STACK
> addql #4,%sp
>
> sequence that I _should_ have done in ret_from_kernel_thread. (Also,
> see ret_from_signal)
>
> I've attached an updated patch just in case my incoherent ramblings
> above didn't explain what I meant. It's the same as the previous
> patch, it just has that one extra stack update there.
>
> Does _this_ one perhaps work?
>
> Linus
>
^ permalink raw reply [flat|nested] 12+ messages in thread
end of thread, other threads:[~2021-06-19 2:53 UTC | newest]
Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-06-18 1:27 [PATCH v2] m68k: save extra registers on more syscall entry points Michael Schmitz
2021-06-18 17:17 ` Linus Torvalds
2021-06-18 22:34 ` Michael Schmitz
2021-06-18 23:38 ` Linus Torvalds
2021-06-18 23:59 ` Michael Schmitz
2021-06-19 1:32 ` Michael Schmitz
2021-06-19 1:54 ` Linus Torvalds
2021-06-19 2:13 ` Linus Torvalds
2021-06-19 2:52 ` Michael Schmitz
2021-06-19 2:17 ` Michael Schmitz
2021-06-18 18:39 ` Eric W. Biederman
2021-06-18 19:06 ` Michael Schmitz
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.