* [PATCH v2 0/4] powerpc/64: syscalls in C
@ 2019-08-27 13:55 Nicholas Piggin
2019-08-27 13:55 ` [PATCH v2 1/4] powerpc: convert to copy_thread_tls Nicholas Piggin
` (4 more replies)
0 siblings, 5 replies; 22+ messages in thread
From: Nicholas Piggin @ 2019-08-27 13:55 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Nicholas Piggin
Accounted for some feedback.
Nicholas Piggin (4):
powerpc: convert to copy_thread_tls
powerpc/64: remove support for kernel-mode syscalls
powerpc/64: system call remove non-volatile GPR save optimisation
powerpc/64: system call implement the bulk of the logic in C
arch/powerpc/Kconfig | 1 +
arch/powerpc/include/asm/asm-prototypes.h | 11 -
.../powerpc/include/asm/book3s/64/kup-radix.h | 12 +-
arch/powerpc/include/asm/cputime.h | 22 +
arch/powerpc/include/asm/ptrace.h | 3 +
arch/powerpc/include/asm/signal.h | 2 +
arch/powerpc/include/asm/switch_to.h | 5 +
arch/powerpc/include/asm/time.h | 3 +
arch/powerpc/kernel/Makefile | 3 +-
arch/powerpc/kernel/entry_64.S | 421 +++---------------
arch/powerpc/kernel/exceptions-64s.S | 2 -
arch/powerpc/kernel/process.c | 9 +-
arch/powerpc/kernel/signal.h | 2 -
arch/powerpc/kernel/syscall_64.c | 177 ++++++++
arch/powerpc/kernel/syscalls/syscall.tbl | 22 +-
15 files changed, 307 insertions(+), 388 deletions(-)
create mode 100644 arch/powerpc/kernel/syscall_64.c
--
2.22.0
^ permalink raw reply [flat|nested] 22+ messages in thread
* [PATCH v2 1/4] powerpc: convert to copy_thread_tls
2019-08-27 13:55 [PATCH v2 0/4] powerpc/64: syscalls in C Nicholas Piggin
@ 2019-08-27 13:55 ` Nicholas Piggin
2019-08-27 13:55 ` [PATCH v2 2/4] powerpc/64: remove support for kernel-mode syscalls Nicholas Piggin
` (3 subsequent siblings)
4 siblings, 0 replies; 22+ messages in thread
From: Nicholas Piggin @ 2019-08-27 13:55 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Nicholas Piggin
Commit 3033f14ab78c3 ("clone: support passing tls argument via C rather
than pt_regs magic") introduced the HAVE_COPY_THREAD_TLS option. Use it
to avoid a subtle assumption about the argument ordering of clone type
syscalls.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
No change since v1.
arch/powerpc/Kconfig | 1 +
arch/powerpc/kernel/process.c | 9 +++++----
2 files changed, 6 insertions(+), 4 deletions(-)
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index d8dcd8820369..7477a3263225 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -182,6 +182,7 @@ config PPC
select HAVE_STACKPROTECTOR if PPC64 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r13)
select HAVE_STACKPROTECTOR if PPC32 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r2)
select HAVE_CONTEXT_TRACKING if PPC64
+ select HAVE_COPY_THREAD_TLS
select HAVE_DEBUG_KMEMLEAK
select HAVE_DEBUG_STACKOVERFLOW
select HAVE_DYNAMIC_FTRACE
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 8fc4de0d22b4..24621e7e5033 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1600,8 +1600,9 @@ static void setup_ksp_vsid(struct task_struct *p, unsigned long sp)
/*
* Copy architecture-specific thread state
*/
-int copy_thread(unsigned long clone_flags, unsigned long usp,
- unsigned long kthread_arg, struct task_struct *p)
+int copy_thread_tls(unsigned long clone_flags, unsigned long usp,
+ unsigned long kthread_arg, struct task_struct *p,
+ unsigned long tls)
{
struct pt_regs *childregs, *kregs;
extern void ret_from_fork(void);
@@ -1642,10 +1643,10 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
if (clone_flags & CLONE_SETTLS) {
#ifdef CONFIG_PPC64
if (!is_32bit_task())
- childregs->gpr[13] = childregs->gpr[6];
+ childregs->gpr[13] = tls;
else
#endif
- childregs->gpr[2] = childregs->gpr[6];
+ childregs->gpr[2] = tls;
}
f = ret_from_fork;
--
2.22.0
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [PATCH v2 2/4] powerpc/64: remove support for kernel-mode syscalls
2019-08-27 13:55 [PATCH v2 0/4] powerpc/64: syscalls in C Nicholas Piggin
2019-08-27 13:55 ` [PATCH v2 1/4] powerpc: convert to copy_thread_tls Nicholas Piggin
@ 2019-08-27 13:55 ` Nicholas Piggin
2019-08-27 13:55 ` [PATCH v2 3/4] powerpc/64: system call remove non-volatile GPR save optimisation Nicholas Piggin
` (2 subsequent siblings)
4 siblings, 0 replies; 22+ messages in thread
From: Nicholas Piggin @ 2019-08-27 13:55 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Nicholas Piggin
There is support for the kernel to execute the 'sc 0' instruction and
make a system call to itself. This is a relic that is unused in the
tree, therefore untested. It's also highly questionable for modules to
be doing this.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
No change since v1.
arch/powerpc/kernel/entry_64.S | 21 ++++++---------------
arch/powerpc/kernel/exceptions-64s.S | 2 --
2 files changed, 6 insertions(+), 17 deletions(-)
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 0a0b5310f54a..6467bdab8d40 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -69,24 +69,20 @@ BEGIN_FTR_SECTION
bne .Ltabort_syscall
END_FTR_SECTION_IFSET(CPU_FTR_TM)
#endif
- andi. r10,r12,MSR_PR
mr r10,r1
- addi r1,r1,-INT_FRAME_SIZE
- beq- 1f
ld r1,PACAKSAVE(r13)
-1: std r10,0(r1)
+ std r10,0(r1)
std r11,_NIP(r1)
std r12,_MSR(r1)
std r0,GPR0(r1)
std r10,GPR1(r1)
- beq 2f /* if from kernel mode */
#ifdef CONFIG_PPC_FSL_BOOK3E
START_BTB_FLUSH_SECTION
BTB_FLUSH(r10)
END_BTB_FLUSH_SECTION
#endif
ACCOUNT_CPU_USER_ENTRY(r13, r10, r11)
-2: std r2,GPR2(r1)
+ std r2,GPR2(r1)
std r3,GPR3(r1)
mfcr r2
std r4,GPR4(r1)
@@ -122,14 +118,13 @@ END_BTB_FLUSH_SECTION
#if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(CONFIG_PPC_SPLPAR)
BEGIN_FW_FTR_SECTION
- beq 33f
- /* if from user, see if there are any DTL entries to process */
+ /* see if there are any DTL entries to process */
ld r10,PACALPPACAPTR(r13) /* get ptr to VPA */
ld r11,PACA_DTL_RIDX(r13) /* get log read index */
addi r10,r10,LPPACA_DTLIDX
LDX_BE r10,0,r10 /* get log write index */
- cmpd cr1,r11,r10
- beq+ cr1,33f
+ cmpd r11,r10
+ beq+ 33f
bl accumulate_stolen_time
REST_GPR(0,r1)
REST_4GPRS(3,r1)
@@ -203,6 +198,7 @@ system_call: /* label this so stack traces look sane */
mtctr r12
bctrl /* Call handler */
+ /* syscall_exit can exit to kernel mode, via ret_from_kernel_thread */
.Lsyscall_exit:
std r3,RESULT(r1)
@@ -216,11 +212,6 @@ system_call: /* label this so stack traces look sane */
ld r12, PACA_THREAD_INFO(r13)
ld r8,_MSR(r1)
-#ifdef CONFIG_PPC_BOOK3S
- /* No MSR:RI on BookE */
- andi. r10,r8,MSR_RI
- beq- .Lunrecov_restore
-#endif
/*
* This is a few instructions into the actual syscall exit path (which actually
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 6ba3cc2ef8ab..768f133de4f1 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1521,8 +1521,6 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception)
* system call / hypercall (0xc00, 0x4c00)
*
* The system call exception is invoked with "sc 0" and does not alter HV bit.
- * There is support for kernel code to invoke system calls but there are no
- * in-tree users.
*
* The hypercall is invoked with "sc 1" and sets HV=1.
*
--
2.22.0
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [PATCH v2 3/4] powerpc/64: system call remove non-volatile GPR save optimisation
2019-08-27 13:55 [PATCH v2 0/4] powerpc/64: syscalls in C Nicholas Piggin
2019-08-27 13:55 ` [PATCH v2 1/4] powerpc: convert to copy_thread_tls Nicholas Piggin
2019-08-27 13:55 ` [PATCH v2 2/4] powerpc/64: remove support for kernel-mode syscalls Nicholas Piggin
@ 2019-08-27 13:55 ` Nicholas Piggin
2019-08-28 9:02 ` Christophe Leroy
2019-08-27 13:55 ` [PATCH v2 4/4] powerpc/64: system call implement the bulk of the logic in C Nicholas Piggin
2019-08-28 9:06 ` [PATCH v2 0/4] powerpc/64: syscalls " Christophe Leroy
4 siblings, 1 reply; 22+ messages in thread
From: Nicholas Piggin @ 2019-08-27 13:55 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Nicholas Piggin
powerpc has an optimisation where interrupts avoid saving the
non-volatile (or callee saved) registers to the interrupt stack frame if
they are not required.
Two problems with this are that an interrupt does not always know
whether it will need non-volatiles; and if it does need them, they can
only be saved from the entry-scoped asm code (because we don't control
what the C compiler does with these registers).
system calls are the most difficult: some system calls always require
all registers (e.g., fork, to copy regs into the child). Sometimes
registers are only required under certain conditions (e.g., tracing,
signal delivery). These cases require ugly logic in the call chains
(e.g., ppc_fork), and require a lot of logic to be implemented in asm.
So remove the optimisation for system calls, and always save NVGPRs on
entry. Modern high performance CPUs are not so sensitive, because the
stores are dense in cache and can be hidden by other expensive work in
the syscall path -- the null syscall selftests benchmark on POWER9 is
not slowed (124.40ns before and 123.64ns after, i.e., within the noise).
Other interrupts retain the NVGPR optimisation for now.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
Changes since v1:
- Improve changelog
- Fix clone3 spu table entry (Segher)
arch/powerpc/kernel/entry_64.S | 72 +++++-------------------
arch/powerpc/kernel/syscalls/syscall.tbl | 22 +++++---
2 files changed, 28 insertions(+), 66 deletions(-)
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 6467bdab8d40..5a3e0b5c9ad1 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -98,13 +98,14 @@ END_BTB_FLUSH_SECTION
std r11,_XER(r1)
std r11,_CTR(r1)
std r9,GPR13(r1)
+ SAVE_NVGPRS(r1)
mflr r10
/*
* This clears CR0.SO (bit 28), which is the error indication on
* return from this system call.
*/
rldimi r2,r11,28,(63-28)
- li r11,0xc01
+ li r11,0xc00
std r10,_LINK(r1)
std r11,_TRAP(r1)
std r3,ORIG_GPR3(r1)
@@ -323,7 +324,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
/* Traced system call support */
.Lsyscall_dotrace:
- bl save_nvgprs
addi r3,r1,STACK_FRAME_OVERHEAD
bl do_syscall_trace_enter
@@ -408,7 +408,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
mtmsrd r10,1
#endif /* CONFIG_PPC_BOOK3E */
- bl save_nvgprs
addi r3,r1,STACK_FRAME_OVERHEAD
bl do_syscall_trace_leave
b ret_from_except
@@ -442,62 +441,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
_ASM_NOKPROBE_SYMBOL(system_call_common);
_ASM_NOKPROBE_SYMBOL(system_call_exit);
-/* Save non-volatile GPRs, if not already saved. */
-_GLOBAL(save_nvgprs)
- ld r11,_TRAP(r1)
- andi. r0,r11,1
- beqlr-
- SAVE_NVGPRS(r1)
- clrrdi r0,r11,1
- std r0,_TRAP(r1)
- blr
-_ASM_NOKPROBE_SYMBOL(save_nvgprs);
-
-
-/*
- * The sigsuspend and rt_sigsuspend system calls can call do_signal
- * and thus put the process into the stopped state where we might
- * want to examine its user state with ptrace. Therefore we need
- * to save all the nonvolatile registers (r14 - r31) before calling
- * the C code. Similarly, fork, vfork and clone need the full
- * register state on the stack so that it can be copied to the child.
- */
-
-_GLOBAL(ppc_fork)
- bl save_nvgprs
- bl sys_fork
- b .Lsyscall_exit
-
-_GLOBAL(ppc_vfork)
- bl save_nvgprs
- bl sys_vfork
- b .Lsyscall_exit
-
-_GLOBAL(ppc_clone)
- bl save_nvgprs
- bl sys_clone
- b .Lsyscall_exit
-
-_GLOBAL(ppc_clone3)
- bl save_nvgprs
- bl sys_clone3
- b .Lsyscall_exit
-
-_GLOBAL(ppc32_swapcontext)
- bl save_nvgprs
- bl compat_sys_swapcontext
- b .Lsyscall_exit
-
-_GLOBAL(ppc64_swapcontext)
- bl save_nvgprs
- bl sys_swapcontext
- b .Lsyscall_exit
-
-_GLOBAL(ppc_switch_endian)
- bl save_nvgprs
- bl sys_switch_endian
- b .Lsyscall_exit
-
_GLOBAL(ret_from_fork)
bl schedule_tail
REST_NVGPRS(r1)
@@ -516,6 +459,17 @@ _GLOBAL(ret_from_kernel_thread)
li r3,0
b .Lsyscall_exit
+/* Save non-volatile GPRs, if not already saved. */
+_GLOBAL(save_nvgprs)
+ ld r11,_TRAP(r1)
+ andi. r0,r11,1
+ beqlr-
+ SAVE_NVGPRS(r1)
+ clrrdi r0,r11,1
+ std r0,_TRAP(r1)
+ blr
+_ASM_NOKPROBE_SYMBOL(save_nvgprs);
+
#ifdef CONFIG_PPC_BOOK3S_64
#define FLUSH_COUNT_CACHE \
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index 43f736ed47f2..d899bcb5343e 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -9,7 +9,9 @@
#
0 nospu restart_syscall sys_restart_syscall
1 nospu exit sys_exit
-2 nospu fork ppc_fork
+2 32 fork ppc_fork sys_fork
+2 64 fork sys_fork
+2 spu fork sys_ni_syscall
3 common read sys_read
4 common write sys_write
5 common open sys_open compat_sys_open
@@ -158,7 +160,9 @@
119 32 sigreturn sys_sigreturn compat_sys_sigreturn
119 64 sigreturn sys_ni_syscall
119 spu sigreturn sys_ni_syscall
-120 nospu clone ppc_clone
+120 32 clone ppc_clone sys_clone
+120 64 clone sys_clone
+120 spu clone sys_ni_syscall
121 common setdomainname sys_setdomainname
122 common uname sys_newuname
123 common modify_ldt sys_ni_syscall
@@ -240,7 +244,9 @@
186 spu sendfile sys_sendfile64
187 common getpmsg sys_ni_syscall
188 common putpmsg sys_ni_syscall
-189 nospu vfork ppc_vfork
+189 32 vfork ppc_vfork sys_vfork
+189 64 vfork sys_vfork
+189 spu vfork sys_ni_syscall
190 common ugetrlimit sys_getrlimit compat_sys_getrlimit
191 common readahead sys_readahead compat_sys_readahead
192 32 mmap2 sys_mmap2 compat_sys_mmap2
@@ -316,8 +322,8 @@
248 32 clock_nanosleep sys_clock_nanosleep_time32
248 64 clock_nanosleep sys_clock_nanosleep
248 spu clock_nanosleep sys_clock_nanosleep
-249 32 swapcontext ppc_swapcontext ppc32_swapcontext
-249 64 swapcontext ppc64_swapcontext
+249 32 swapcontext ppc_swapcontext compat_sys_swapcontext
+249 64 swapcontext sys_swapcontext
249 spu swapcontext sys_ni_syscall
250 common tgkill sys_tgkill
251 32 utimes sys_utimes_time32
@@ -456,7 +462,7 @@
361 common bpf sys_bpf
362 nospu execveat sys_execveat compat_sys_execveat
363 32 switch_endian sys_ni_syscall
-363 64 switch_endian ppc_switch_endian
+363 64 switch_endian sys_switch_endian
363 spu switch_endian sys_ni_syscall
364 common userfaultfd sys_userfaultfd
365 common membarrier sys_membarrier
@@ -516,4 +522,6 @@
432 common fsmount sys_fsmount
433 common fspick sys_fspick
434 common pidfd_open sys_pidfd_open
-435 nospu clone3 ppc_clone3
+435 32 clone3 ppc_clone3 sys_clone3
+435 64 clone3 sys_clone3
+435 spu clone3 sys_ni_syscall
--
2.22.0
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [PATCH v2 4/4] powerpc/64: system call implement the bulk of the logic in C
2019-08-27 13:55 [PATCH v2 0/4] powerpc/64: syscalls in C Nicholas Piggin
` (2 preceding siblings ...)
2019-08-27 13:55 ` [PATCH v2 3/4] powerpc/64: system call remove non-volatile GPR save optimisation Nicholas Piggin
@ 2019-08-27 13:55 ` Nicholas Piggin
2019-08-28 6:51 ` Christophe Leroy
` (2 more replies)
2019-08-28 9:06 ` [PATCH v2 0/4] powerpc/64: syscalls " Christophe Leroy
4 siblings, 3 replies; 22+ messages in thread
From: Nicholas Piggin @ 2019-08-27 13:55 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Nicholas Piggin
System call entry and particularly exit code is beyond the limit of what
is reasonable to implement in asm.
This conversion moves all conditional branches out of the asm code,
except for the case that all GPRs should be restored at exit.
Null syscall test is about 5% faster after this patch, because the exit
work is handled under local_irq_disable, and the hard mask and pending
interrupt replay is handled after that, which avoids games with MSR.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
Changes since v1:
- Improve changelog
- Lot of code cleanups, moving helpers out to proper header locations,
etc (Christophe).
- Split unnecessary change that affected ppc32 out. I will submit it
independently (Christophe).
arch/powerpc/include/asm/asm-prototypes.h | 11 -
.../powerpc/include/asm/book3s/64/kup-radix.h | 12 +-
arch/powerpc/include/asm/cputime.h | 22 ++
arch/powerpc/include/asm/ptrace.h | 3 +
arch/powerpc/include/asm/signal.h | 2 +
arch/powerpc/include/asm/switch_to.h | 5 +
arch/powerpc/include/asm/time.h | 3 +
arch/powerpc/kernel/Makefile | 3 +-
arch/powerpc/kernel/entry_64.S | 340 +++---------------
arch/powerpc/kernel/signal.h | 2 -
arch/powerpc/kernel/syscall_64.c | 177 +++++++++
11 files changed, 273 insertions(+), 307 deletions(-)
create mode 100644 arch/powerpc/kernel/syscall_64.c
diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h
index ec1c97a8e8cb..f00ef8924a99 100644
--- a/arch/powerpc/include/asm/asm-prototypes.h
+++ b/arch/powerpc/include/asm/asm-prototypes.h
@@ -92,14 +92,6 @@ long sys_switch_endian(void);
notrace unsigned int __check_irq_replay(void);
void notrace restore_interrupts(void);
-/* ptrace */
-long do_syscall_trace_enter(struct pt_regs *regs);
-void do_syscall_trace_leave(struct pt_regs *regs);
-
-/* process */
-void restore_math(struct pt_regs *regs);
-void restore_tm_state(struct pt_regs *regs);
-
/* prom_init (OpenFirmware) */
unsigned long __init prom_init(unsigned long r3, unsigned long r4,
unsigned long pp,
@@ -110,9 +102,6 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
void __init early_setup(unsigned long dt_ptr);
void early_setup_secondary(void);
-/* time */
-void accumulate_stolen_time(void);
-
/* misc runtime */
extern u64 __bswapdi2(u64);
extern s64 __lshrdi3(s64, int);
diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h b/arch/powerpc/include/asm/book3s/64/kup-radix.h
index f254de956d6a..ef2e65ea8a73 100644
--- a/arch/powerpc/include/asm/book3s/64/kup-radix.h
+++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h
@@ -3,6 +3,7 @@
#define _ASM_POWERPC_BOOK3S_64_KUP_RADIX_H
#include <linux/const.h>
+#include <asm/reg.h>
#define AMR_KUAP_BLOCK_READ UL(0x4000000000000000)
#define AMR_KUAP_BLOCK_WRITE UL(0x8000000000000000)
@@ -56,7 +57,16 @@
#ifdef CONFIG_PPC_KUAP
-#include <asm/reg.h>
+#include <asm/mmu.h>
+#include <asm/ptrace.h>
+
+static inline void kuap_check_amr(void)
+{
+#ifdef CONFIG_PPC_KUAP_DEBUG
+ if (mmu_has_feature(MMU_FTR_RADIX_KUAP))
+ WARN_ON_ONCE(mfspr(SPRN_AMR) != AMR_KUAP_BLOCKED);
+#endif
+}
/*
* We support individually allowing read or write, but we don't support nesting
diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h
index 2431b4ada2fa..f3aa9db1a3cc 100644
--- a/arch/powerpc/include/asm/cputime.h
+++ b/arch/powerpc/include/asm/cputime.h
@@ -60,6 +60,28 @@ static inline void arch_vtime_task_switch(struct task_struct *prev)
}
#endif
+static inline void account_cpu_user_entry(void)
+{
+ unsigned long tb = mftb();
+
+ get_accounting(current)->utime += (tb - get_accounting(current)->starttime_user);
+ get_accounting(current)->starttime = tb;
+}
+static inline void account_cpu_user_exit(void)
+{
+ unsigned long tb = mftb();
+
+ get_accounting(current)->stime += (tb - get_accounting(current)->starttime);
+ get_accounting(current)->starttime_user = tb;
+}
+
#endif /* __KERNEL__ */
+#else /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
+static inline void account_cpu_user_entry(void)
+{
+}
+static inline void account_cpu_user_exit(void)
+{
+}
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
#endif /* __POWERPC_CPUTIME_H */
diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h
index feee1b21bbd5..af363086403a 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -138,6 +138,9 @@ extern unsigned long profile_pc(struct pt_regs *regs);
#define profile_pc(regs) instruction_pointer(regs)
#endif
+long do_syscall_trace_enter(struct pt_regs *regs);
+void do_syscall_trace_leave(struct pt_regs *regs);
+
#define kernel_stack_pointer(regs) ((regs)->gpr[1])
static inline int is_syscall_success(struct pt_regs *regs)
{
diff --git a/arch/powerpc/include/asm/signal.h b/arch/powerpc/include/asm/signal.h
index 0803ca8b9149..0113be8dcb59 100644
--- a/arch/powerpc/include/asm/signal.h
+++ b/arch/powerpc/include/asm/signal.h
@@ -6,4 +6,6 @@
#include <uapi/asm/signal.h>
#include <uapi/asm/ptrace.h>
+void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags);
+
#endif /* _ASM_POWERPC_SIGNAL_H */
diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h
index 5b03d8a82409..476008bc3d08 100644
--- a/arch/powerpc/include/asm/switch_to.h
+++ b/arch/powerpc/include/asm/switch_to.h
@@ -5,6 +5,7 @@
#ifndef _ASM_POWERPC_SWITCH_TO_H
#define _ASM_POWERPC_SWITCH_TO_H
+#include <linux/sched.h>
#include <asm/reg.h>
struct thread_struct;
@@ -22,6 +23,10 @@ extern void switch_booke_debug_regs(struct debug_reg *new_debug);
extern int emulate_altivec(struct pt_regs *);
+void restore_math(struct pt_regs *regs);
+
+void restore_tm_state(struct pt_regs *regs);
+
extern void flush_all_to_thread(struct task_struct *);
extern void giveup_all(struct task_struct *);
diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index 54f4ec1f9fab..ba97858c9d76 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -199,5 +199,8 @@ DECLARE_PER_CPU(u64, decrementers_next_tb);
/* Convert timebase ticks to nanoseconds */
unsigned long long tb_to_ns(unsigned long long tb_ticks);
+/* SPLPAR */
+void accumulate_stolen_time(void);
+
#endif /* __KERNEL__ */
#endif /* __POWERPC_TIME_H */
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 56dfa7a2a6f2..7f53cc07f933 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -52,7 +52,8 @@ obj-y := cputable.o ptrace.o syscalls.o \
of_platform.o prom_parse.o
obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \
signal_64.o ptrace32.o \
- paca.o nvram_64.o firmware.o
+ paca.o nvram_64.o firmware.o \
+ syscall_64.o
obj-$(CONFIG_VDSO32) += vdso32/
obj-$(CONFIG_PPC_WATCHDOG) += watchdog.o
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 5a3e0b5c9ad1..6bdcfa21ea7b 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -63,12 +63,6 @@ exception_marker:
.globl system_call_common
system_call_common:
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-BEGIN_FTR_SECTION
- extrdi. r10, r12, 1, (63-MSR_TS_T_LG) /* transaction active? */
- bne .Ltabort_syscall
-END_FTR_SECTION_IFSET(CPU_FTR_TM)
-#endif
mr r10,r1
ld r1,PACAKSAVE(r13)
std r10,0(r1)
@@ -76,350 +70,115 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
std r12,_MSR(r1)
std r0,GPR0(r1)
std r10,GPR1(r1)
+ std r2,GPR2(r1)
#ifdef CONFIG_PPC_FSL_BOOK3E
START_BTB_FLUSH_SECTION
BTB_FLUSH(r10)
END_BTB_FLUSH_SECTION
#endif
- ACCOUNT_CPU_USER_ENTRY(r13, r10, r11)
- std r2,GPR2(r1)
+ ld r2,PACATOC(r13)
+ mfcr r12
+ li r11,0
+ /* Can we avoid saving r3-r8 in common case? */
std r3,GPR3(r1)
- mfcr r2
std r4,GPR4(r1)
std r5,GPR5(r1)
std r6,GPR6(r1)
std r7,GPR7(r1)
std r8,GPR8(r1)
- li r11,0
+ /* Zero r9-r12, this should only be required when restoring all GPRs */
std r11,GPR9(r1)
std r11,GPR10(r1)
std r11,GPR11(r1)
std r11,GPR12(r1)
- std r11,_XER(r1)
- std r11,_CTR(r1)
std r9,GPR13(r1)
SAVE_NVGPRS(r1)
+ std r11,_XER(r1)
+ std r11,_CTR(r1)
mflr r10
+
/*
* This clears CR0.SO (bit 28), which is the error indication on
* return from this system call.
*/
- rldimi r2,r11,28,(63-28)
+ rldimi r12,r11,28,(63-28)
li r11,0xc00
std r10,_LINK(r1)
std r11,_TRAP(r1)
+ std r12,_CCR(r1)
std r3,ORIG_GPR3(r1)
- std r2,_CCR(r1)
- ld r2,PACATOC(r13)
- addi r9,r1,STACK_FRAME_OVERHEAD
+ addi r10,r1,STACK_FRAME_OVERHEAD
ld r11,exception_marker@toc(r2)
- std r11,-16(r9) /* "regshere" marker */
-
- kuap_check_amr r10, r11
-
-#if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(CONFIG_PPC_SPLPAR)
-BEGIN_FW_FTR_SECTION
- /* see if there are any DTL entries to process */
- ld r10,PACALPPACAPTR(r13) /* get ptr to VPA */
- ld r11,PACA_DTL_RIDX(r13) /* get log read index */
- addi r10,r10,LPPACA_DTLIDX
- LDX_BE r10,0,r10 /* get log write index */
- cmpd r11,r10
- beq+ 33f
- bl accumulate_stolen_time
- REST_GPR(0,r1)
- REST_4GPRS(3,r1)
- REST_2GPRS(7,r1)
- addi r9,r1,STACK_FRAME_OVERHEAD
-33:
-END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
-#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE && CONFIG_PPC_SPLPAR */
-
- /*
- * A syscall should always be called with interrupts enabled
- * so we just unconditionally hard-enable here. When some kind
- * of irq tracing is used, we additionally check that condition
- * is correct
- */
-#if defined(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG) && defined(CONFIG_BUG)
- lbz r10,PACAIRQSOFTMASK(r13)
-1: tdnei r10,IRQS_ENABLED
- EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING
-#endif
-
-#ifdef CONFIG_PPC_BOOK3E
- wrteei 1
-#else
- li r11,MSR_RI
- ori r11,r11,MSR_EE
- mtmsrd r11,1
-#endif /* CONFIG_PPC_BOOK3E */
-
-system_call: /* label this so stack traces look sane */
- /* We do need to set SOFTE in the stack frame or the return
- * from interrupt will be painful
- */
- li r10,IRQS_ENABLED
- std r10,SOFTE(r1)
+ std r11,-16(r10) /* "regshere" marker */
- ld r11, PACA_THREAD_INFO(r13)
- ld r10,TI_FLAGS(r11)
- andi. r11,r10,_TIF_SYSCALL_DOTRACE
- bne .Lsyscall_dotrace /* does not return */
- cmpldi 0,r0,NR_syscalls
- bge- .Lsyscall_enosys
+ /* Calling convention has r9 = orig r0, r10 = regs */
+ mr r9,r0
+ bl system_call_exception
-.Lsyscall:
-/*
- * Need to vector to 32 Bit or default sys_call_table here,
- * based on caller's run-mode / personality.
- */
- ld r11,SYS_CALL_TABLE@toc(2)
- andis. r10,r10,_TIF_32BIT@h
- beq 15f
- ld r11,COMPAT_SYS_CALL_TABLE@toc(2)
- clrldi r3,r3,32
- clrldi r4,r4,32
- clrldi r5,r5,32
- clrldi r6,r6,32
- clrldi r7,r7,32
- clrldi r8,r8,32
-15:
- slwi r0,r0,3
-
- barrier_nospec_asm
- /*
- * Prevent the load of the handler below (based on the user-passed
- * system call number) being speculatively executed until the test
- * against NR_syscalls and branch to .Lsyscall_enosys above has
- * committed.
- */
-
- ldx r12,r11,r0 /* Fetch system call handler [ptr] */
- mtctr r12
- bctrl /* Call handler */
-
- /* syscall_exit can exit to kernel mode, via ret_from_kernel_thread */
.Lsyscall_exit:
- std r3,RESULT(r1)
-
-#ifdef CONFIG_DEBUG_RSEQ
- /* Check whether the syscall is issued inside a restartable sequence */
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl rseq_syscall
- ld r3,RESULT(r1)
-#endif
-
- ld r12, PACA_THREAD_INFO(r13)
-
- ld r8,_MSR(r1)
-
-/*
- * This is a few instructions into the actual syscall exit path (which actually
- * starts at .Lsyscall_exit) to cater to kprobe blacklisting and to reduce the
- * number of visible symbols for profiling purposes.
- *
- * We can probe from system_call until this point as MSR_RI is set. But once it
- * is cleared below, we won't be able to take a trap.
- *
- * This is blacklisted from kprobes further below with _ASM_NOKPROBE_SYMBOL().
- */
-system_call_exit:
- /*
- * Disable interrupts so current_thread_info()->flags can't change,
- * and so that we don't get interrupted after loading SRR0/1.
- *
- * Leave MSR_RI enabled for now, because with THREAD_INFO_IN_TASK we
- * could fault on the load of the TI_FLAGS below.
- */
-#ifdef CONFIG_PPC_BOOK3E
- wrteei 0
-#else
- li r11,MSR_RI
- mtmsrd r11,1
-#endif /* CONFIG_PPC_BOOK3E */
+ addi r4,r1,STACK_FRAME_OVERHEAD
+ bl syscall_exit_prepare
- ld r9,TI_FLAGS(r12)
- li r11,-MAX_ERRNO
- andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK)
- bne- .Lsyscall_exit_work
+ ld r2,_CCR(r1)
+ ld r4,_NIP(r1)
+ ld r5,_MSR(r1)
+ ld r6,_LINK(r1)
- andi. r0,r8,MSR_FP
- beq 2f
-#ifdef CONFIG_ALTIVEC
- andis. r0,r8,MSR_VEC@h
- bne 3f
-#endif
-2: addi r3,r1,STACK_FRAME_OVERHEAD
- bl restore_math
- ld r8,_MSR(r1)
- ld r3,RESULT(r1)
- li r11,-MAX_ERRNO
-
-3: cmpld r3,r11
- ld r5,_CCR(r1)
- bge- .Lsyscall_error
-.Lsyscall_error_cont:
- ld r7,_NIP(r1)
BEGIN_FTR_SECTION
stdcx. r0,0,r1 /* to clear the reservation */
END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
- andi. r6,r8,MSR_PR
- ld r4,_LINK(r1)
-
- kuap_check_amr r10, r11
-#ifdef CONFIG_PPC_BOOK3S
- /*
- * Clear MSR_RI, MSR_EE is already and remains disabled. We could do
- * this later, but testing shows that doing it here causes less slow
- * down than doing it closer to the rfid.
- */
- li r11,0
- mtmsrd r11,1
+ mtspr SPRN_SRR0,r4
+ mtspr SPRN_SRR1,r5
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ std r5,PACATMSCRATCH(r13)
#endif
+ mtlr r6
- beq- 1f
- ACCOUNT_CPU_USER_EXIT(r13, r11, r12)
+ cmpdi r3,0
+ bne syscall_restore_regs
+.Lsyscall_restore_regs_cont:
BEGIN_FTR_SECTION
HMT_MEDIUM_LOW
END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- std r8, PACATMSCRATCH(r13)
-#endif
-
/*
* We don't need to restore AMR on the way back to userspace for KUAP.
* The value of AMR only matters while we're in the kernel.
*/
- ld r13,GPR13(r1) /* only restore r13 if returning to usermode */
+ mtcr r2
ld r2,GPR2(r1)
+ ld r3,GPR3(r1)
+ ld r13,GPR13(r1)
ld r1,GPR1(r1)
- mtlr r4
- mtcr r5
- mtspr SPRN_SRR0,r7
- mtspr SPRN_SRR1,r8
RFI_TO_USER
b . /* prevent speculative execution */
+_ASM_NOKPROBE_SYMBOL(system_call_common);
-1: /* exit to kernel */
- kuap_restore_amr r2
-
- ld r2,GPR2(r1)
- ld r1,GPR1(r1)
- mtlr r4
- mtcr r5
- mtspr SPRN_SRR0,r7
- mtspr SPRN_SRR1,r8
- RFI_TO_KERNEL
- b . /* prevent speculative execution */
-
-.Lsyscall_error:
- oris r5,r5,0x1000 /* Set SO bit in CR */
- neg r3,r3
- std r5,_CCR(r1)
- b .Lsyscall_error_cont
-
-/* Traced system call support */
-.Lsyscall_dotrace:
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl do_syscall_trace_enter
-
- /*
- * We use the return value of do_syscall_trace_enter() as the syscall
- * number. If the syscall was rejected for any reason do_syscall_trace_enter()
- * returns an invalid syscall number and the test below against
- * NR_syscalls will fail.
- */
- mr r0,r3
-
- /* Restore argument registers just clobbered and/or possibly changed. */
- ld r3,GPR3(r1)
- ld r4,GPR4(r1)
- ld r5,GPR5(r1)
- ld r6,GPR6(r1)
- ld r7,GPR7(r1)
- ld r8,GPR8(r1)
-
- /* Repopulate r9 and r10 for the syscall path */
- addi r9,r1,STACK_FRAME_OVERHEAD
- ld r10, PACA_THREAD_INFO(r13)
- ld r10,TI_FLAGS(r10)
-
- cmpldi r0,NR_syscalls
- blt+ .Lsyscall
-
- /* Return code is already in r3 thanks to do_syscall_trace_enter() */
- b .Lsyscall_exit
-
-
-.Lsyscall_enosys:
- li r3,-ENOSYS
- b .Lsyscall_exit
-
-.Lsyscall_exit_work:
- /* If TIF_RESTOREALL is set, don't scribble on either r3 or ccr.
- If TIF_NOERROR is set, just save r3 as it is. */
-
- andi. r0,r9,_TIF_RESTOREALL
- beq+ 0f
+syscall_restore_regs:
+ ld r3,_CTR(r1)
+ ld r4,_XER(r1)
REST_NVGPRS(r1)
- b 2f
-0: cmpld r3,r11 /* r11 is -MAX_ERRNO */
- blt+ 1f
- andi. r0,r9,_TIF_NOERROR
- bne- 1f
- ld r5,_CCR(r1)
- neg r3,r3
- oris r5,r5,0x1000 /* Set SO bit in CR */
- std r5,_CCR(r1)
-1: std r3,GPR3(r1)
-2: andi. r0,r9,(_TIF_PERSYSCALL_MASK)
- beq 4f
-
- /* Clear per-syscall TIF flags if any are set. */
-
- li r11,_TIF_PERSYSCALL_MASK
- addi r12,r12,TI_FLAGS
-3: ldarx r10,0,r12
- andc r10,r10,r11
- stdcx. r10,0,r12
- bne- 3b
- subi r12,r12,TI_FLAGS
-
-4: /* Anything else left to do? */
-BEGIN_FTR_SECTION
- lis r3,DEFAULT_PPR@highest /* Set default PPR */
- sldi r3,r3,32 /* bits 11-13 are used for ppr */
- std r3,_PPR(r1)
-END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
-
- andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP)
- beq ret_from_except_lite
-
- /* Re-enable interrupts */
-#ifdef CONFIG_PPC_BOOK3E
- wrteei 1
-#else
- li r10,MSR_RI
- ori r10,r10,MSR_EE
- mtmsrd r10,1
-#endif /* CONFIG_PPC_BOOK3E */
-
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl do_syscall_trace_leave
- b ret_from_except
+ mtctr r3
+ mtspr SPRN_XER,r4
+ ld r0,GPR0(r1)
+ REST_8GPRS(4, r1)
+ ld r12,GPR12(r1)
+ b .Lsyscall_restore_regs_cont
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-.Ltabort_syscall:
+ .globl tabort_syscall
+tabort_syscall:
/* Firstly we need to enable TM in the kernel */
mfmsr r10
li r9, 1
rldimi r10, r9, MSR_TM_LG, 63-MSR_TM_LG
mtmsrd r10, 0
+ ld r11,_NIP(r13)
+ ld r12,_MSR(r13)
+
/* tabort, this dooms the transaction, nothing else */
li r9, (TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT)
TABORT(R9)
@@ -438,9 +197,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
RFI_TO_USER
b . /* prevent speculative execution */
#endif
-_ASM_NOKPROBE_SYMBOL(system_call_common);
-_ASM_NOKPROBE_SYMBOL(system_call_exit);
-
_GLOBAL(ret_from_fork)
bl schedule_tail
REST_NVGPRS(r1)
diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h
index 800433685888..d396efca4068 100644
--- a/arch/powerpc/kernel/signal.h
+++ b/arch/powerpc/kernel/signal.h
@@ -10,8 +10,6 @@
#ifndef _POWERPC_ARCH_SIGNAL_H
#define _POWERPC_ARCH_SIGNAL_H
-extern void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags);
-
extern void __user *get_sigframe(struct ksignal *ksig, unsigned long sp,
size_t frame_size, int is_32);
diff --git a/arch/powerpc/kernel/syscall_64.c b/arch/powerpc/kernel/syscall_64.c
new file mode 100644
index 000000000000..d42519b86ddd
--- /dev/null
+++ b/arch/powerpc/kernel/syscall_64.c
@@ -0,0 +1,177 @@
+#include <linux/err.h>
+#include <asm/book3s/64/kup-radix.h>
+#include <asm/cputime.h>
+#include <asm/hw_irq.h>
+#include <asm/paca.h>
+#include <asm/ptrace.h>
+#include <asm/reg.h>
+#include <asm/signal.h>
+#include <asm/switch_to.h>
+#include <asm/syscall.h>
+#include <asm/time.h>
+
+extern void __noreturn tabort_syscall(void);
+
+typedef long (*syscall_fn)(long, long, long, long, long, long);
+
+long system_call_exception(long r3, long r4, long r5, long r6, long r7, long r8, unsigned long r0, struct pt_regs *regs)
+{
+ unsigned long ti_flags;
+ syscall_fn f;
+
+ BUG_ON(!(regs->msr & MSR_PR));
+
+ if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
+ unlikely(regs->msr & MSR_TS_T))
+ tabort_syscall();
+
+ account_cpu_user_entry();
+
+#ifdef CONFIG_PPC_SPLPAR
+ if (IS_ENABLED(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) &&
+ firmware_has_feature(FW_FEATURE_SPLPAR)) {
+ struct lppaca *lp = get_lppaca();
+
+ if (unlikely(local_paca->dtl_ridx != lp->dtl_idx))
+ accumulate_stolen_time();
+ }
+#endif
+
+ kuap_check_amr();
+
+ /*
+ * A syscall should always be called with interrupts enabled
+ * so we just unconditionally hard-enable here. When some kind
+ * of irq tracing is used, we additionally check that condition
+ * is correct
+ */
+ if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) {
+ WARN_ON(irq_soft_mask_return() != IRQS_ENABLED);
+ WARN_ON(local_paca->irq_happened);
+ }
+
+ __hard_irq_enable();
+
+ /*
+ * We do need to set SOFTE in the stack frame or the return
+ * from interrupt will be painful
+ */
+ regs->softe = IRQS_ENABLED;
+
+ ti_flags = current_thread_info()->flags;
+ if (unlikely(ti_flags & _TIF_SYSCALL_DOTRACE)) {
+ /*
+ * We use the return value of do_syscall_trace_enter() as the
+ * syscall number. If the syscall was rejected for any reason
+ * do_syscall_trace_enter() returns an invalid syscall number
+ * and the test below against NR_syscalls will fail.
+ */
+ r0 = do_syscall_trace_enter(regs);
+ }
+
+ if (unlikely(r0 >= NR_syscalls))
+ return -ENOSYS;
+
+ /* May be faster to do array_index_nospec? */
+ barrier_nospec();
+
+ if (unlikely(ti_flags & _TIF_32BIT)) {
+ f = (void *)compat_sys_call_table[r0];
+
+ r3 &= 0x00000000ffffffffULL;
+ r4 &= 0x00000000ffffffffULL;
+ r5 &= 0x00000000ffffffffULL;
+ r6 &= 0x00000000ffffffffULL;
+ r7 &= 0x00000000ffffffffULL;
+ r8 &= 0x00000000ffffffffULL;
+
+ } else {
+ f = (void *)sys_call_table[r0];
+ }
+
+ return f(r3, r4, r5, r6, r7, r8);
+}
+
+unsigned long syscall_exit_prepare(unsigned long r3, struct pt_regs *regs)
+{
+ unsigned long *ti_flagsp = ¤t_thread_info()->flags;
+ unsigned long ti_flags;
+ unsigned long ret = 0;
+
+ regs->result = r3;
+
+ /* Check whether the syscall is issued inside a restartable sequence */
+ rseq_syscall(regs);
+
+ ti_flags = *ti_flagsp;
+ if (unlikely(ti_flags & _TIF_SYSCALL_DOTRACE))
+ do_syscall_trace_leave(regs);
+
+ if (unlikely(r3 >= (unsigned long)-MAX_ERRNO)) {
+ if (likely(!(ti_flags & (_TIF_NOERROR | _TIF_RESTOREALL)))) {
+ r3 = -r3;
+ regs->ccr |= 0x10000000; /* Set SO bit in CR */
+ }
+ }
+
+ if (unlikely(ti_flags & _TIF_PERSYSCALL_MASK)) {
+ if (ti_flags & _TIF_RESTOREALL)
+ ret = _TIF_RESTOREALL;
+ else
+ regs->gpr[3] = r3;
+ clear_bits(_TIF_PERSYSCALL_MASK, ti_flagsp);
+ } else {
+ regs->gpr[3] = r3;
+ }
+
+again:
+ local_irq_disable();
+ ti_flags = READ_ONCE(*ti_flagsp);
+ while (unlikely(ti_flags & _TIF_USER_WORK_MASK)) {
+ local_irq_enable();
+ if (ti_flags & _TIF_NEED_RESCHED) {
+ schedule();
+ } else {
+ /*
+ * SIGPENDING must restore signal handler function
+ * argument GPRs, and some non-volatiles (e.g., r1).
+ * Restore all for now. This could be made lighter.
+ */
+ if (ti_flags & _TIF_SIGPENDING)
+ ret |= _TIF_RESTOREALL;
+ do_notify_resume(regs, ti_flags);
+ }
+ local_irq_disable();
+ ti_flags = READ_ONCE(*ti_flagsp);
+ }
+
+ if (IS_ENABLED(CONFIG_PPC_FPU)) {
+ unsigned long mathflags = MSR_FP;
+
+ if (IS_ENABLED(CONFIG_ALTIVEC))
+ mathflags |= MSR_VEC;
+
+ if ((regs->msr & mathflags) != mathflags)
+ restore_math(regs);
+ }
+
+ /* This pattern matches prep_irq_for_idle */
+ __mtmsrd(0, 1); /* Disable MSR_EE and MSR_RI */
+ if (unlikely(lazy_irq_pending())) {
+ __mtmsrd(MSR_RI, 1);
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+ local_irq_enable();
+ /* Took an interrupt which may have more exit work to do. */
+ goto again;
+ }
+ trace_hardirqs_on();
+ local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
+ irq_soft_mask_set(IRQS_ENABLED);
+
+ kuap_check_amr();
+
+ account_cpu_user_exit();
+
+ return ret;
+}
+
--
2.22.0
^ permalink raw reply related [flat|nested] 22+ messages in thread
* Re: [PATCH v2 4/4] powerpc/64: system call implement the bulk of the logic in C
2019-08-27 13:55 ` [PATCH v2 4/4] powerpc/64: system call implement the bulk of the logic in C Nicholas Piggin
@ 2019-08-28 6:51 ` Christophe Leroy
2019-08-28 9:41 ` Nicholas Piggin
2019-08-28 15:30 ` Michal Suchánek
2019-08-30 18:48 ` kbuild test robot
2 siblings, 1 reply; 22+ messages in thread
From: Christophe Leroy @ 2019-08-28 6:51 UTC (permalink / raw)
To: Nicholas Piggin, linuxppc-dev
Le 27/08/2019 à 15:55, Nicholas Piggin a écrit :
> System call entry and particularly exit code is beyond the limit of what
> is reasonable to implement in asm.
>
> This conversion moves all conditional branches out of the asm code,
> except for the case that all GPRs should be restored at exit.
>
> Null syscall test is about 5% faster after this patch, because the exit
> work is handled under local_irq_disable, and the hard mask and pending
> interrupt replay is handled after that, which avoids games with MSR.
>
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
> Changes since v1:
> - Improve changelog
> - Lot of code cleanups, moving helpers out to proper header locations,
> etc (Christophe).
> - Split unnecessary change that affected ppc32 out. I will submit it
> independently (Christophe).
>
> arch/powerpc/include/asm/asm-prototypes.h | 11 -
> .../powerpc/include/asm/book3s/64/kup-radix.h | 12 +-
> arch/powerpc/include/asm/cputime.h | 22 ++
> arch/powerpc/include/asm/ptrace.h | 3 +
> arch/powerpc/include/asm/signal.h | 2 +
> arch/powerpc/include/asm/switch_to.h | 5 +
> arch/powerpc/include/asm/time.h | 3 +
> arch/powerpc/kernel/Makefile | 3 +-
> arch/powerpc/kernel/entry_64.S | 340 +++---------------
> arch/powerpc/kernel/signal.h | 2 -
> arch/powerpc/kernel/syscall_64.c | 177 +++++++++
> 11 files changed, 273 insertions(+), 307 deletions(-)
> create mode 100644 arch/powerpc/kernel/syscall_64.c
>
> diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h
> index ec1c97a8e8cb..f00ef8924a99 100644
> --- a/arch/powerpc/include/asm/asm-prototypes.h
> +++ b/arch/powerpc/include/asm/asm-prototypes.h
> @@ -92,14 +92,6 @@ long sys_switch_endian(void);
> notrace unsigned int __check_irq_replay(void);
> void notrace restore_interrupts(void);
>
> -/* ptrace */
> -long do_syscall_trace_enter(struct pt_regs *regs);
> -void do_syscall_trace_leave(struct pt_regs *regs);
> -
> -/* process */
> -void restore_math(struct pt_regs *regs);
> -void restore_tm_state(struct pt_regs *regs);
> -
> /* prom_init (OpenFirmware) */
> unsigned long __init prom_init(unsigned long r3, unsigned long r4,
> unsigned long pp,
> @@ -110,9 +102,6 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
> void __init early_setup(unsigned long dt_ptr);
> void early_setup_secondary(void);
>
> -/* time */
> -void accumulate_stolen_time(void);
> -
> /* misc runtime */
> extern u64 __bswapdi2(u64);
> extern s64 __lshrdi3(s64, int);
> diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h b/arch/powerpc/include/asm/book3s/64/kup-radix.h
> index f254de956d6a..ef2e65ea8a73 100644
> --- a/arch/powerpc/include/asm/book3s/64/kup-radix.h
> +++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h
> @@ -3,6 +3,7 @@
> #define _ASM_POWERPC_BOOK3S_64_KUP_RADIX_H
>
> #include <linux/const.h>
> +#include <asm/reg.h>
>
> #define AMR_KUAP_BLOCK_READ UL(0x4000000000000000)
> #define AMR_KUAP_BLOCK_WRITE UL(0x8000000000000000)
> @@ -56,7 +57,16 @@
>
> #ifdef CONFIG_PPC_KUAP
>
> -#include <asm/reg.h>
> +#include <asm/mmu.h>
> +#include <asm/ptrace.h>
> +
> +static inline void kuap_check_amr(void)
> +{
> +#ifdef CONFIG_PPC_KUAP_DEBUG
> + if (mmu_has_feature(MMU_FTR_RADIX_KUAP))
Better:
if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) &&
mmu_has_feature(MMU_FTR_RADIX_KUAP))
> + WARN_ON_ONCE(mfspr(SPRN_AMR) != AMR_KUAP_BLOCKED);
> +#endif
> +}
>
> /*
> * We support individually allowing read or write, but we don't support nesting
> diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h
> index 2431b4ada2fa..f3aa9db1a3cc 100644
> --- a/arch/powerpc/include/asm/cputime.h
> +++ b/arch/powerpc/include/asm/cputime.h
> @@ -60,6 +60,28 @@ static inline void arch_vtime_task_switch(struct task_struct *prev)
> }
> #endif
>
> +static inline void account_cpu_user_entry(void)
> +{
> + unsigned long tb = mftb();
> +
> + get_accounting(current)->utime += (tb - get_accounting(current)->starttime_user);
> + get_accounting(current)->starttime = tb;
> +}
Can you check the generated assembly ? I remember having bad result with
get_accouting() being used several times in a arch_vtime_task_switch()
before commit 60f1d2893ee6 ("powerpc/time: inline
arch_vtime_task_switch()")
Regardless, I think it would look better as:
static inline void account_cpu_user_entry(void)
{
unsigned long tb = mftb();
struct cpu_accounting_data *acct = get_accounting(current);
acct->utime += (tb - acct->starttime_user);
acct->starttime = tb;
}
> +static inline void account_cpu_user_exit(void)
> +{
> + unsigned long tb = mftb();
> +
> + get_accounting(current)->stime += (tb - get_accounting(current)->starttime);
> + get_accounting(current)->starttime_user = tb;
> +}
Same here.
Christophe
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH v2 3/4] powerpc/64: system call remove non-volatile GPR save optimisation
2019-08-27 13:55 ` [PATCH v2 3/4] powerpc/64: system call remove non-volatile GPR save optimisation Nicholas Piggin
@ 2019-08-28 9:02 ` Christophe Leroy
2019-08-28 9:32 ` Nicholas Piggin
0 siblings, 1 reply; 22+ messages in thread
From: Christophe Leroy @ 2019-08-28 9:02 UTC (permalink / raw)
To: Nicholas Piggin, linuxppc-dev
Le 27/08/2019 à 15:55, Nicholas Piggin a écrit :
> powerpc has an optimisation where interrupts avoid saving the
> non-volatile (or callee saved) registers to the interrupt stack frame if
> they are not required.
>
> Two problems with this are that an interrupt does not always know
> whether it will need non-volatiles; and if it does need them, they can
> only be saved from the entry-scoped asm code (because we don't control
> what the C compiler does with these registers).
>
> system calls are the most difficult: some system calls always require
> all registers (e.g., fork, to copy regs into the child). Sometimes
> registers are only required under certain conditions (e.g., tracing,
> signal delivery). These cases require ugly logic in the call chains
> (e.g., ppc_fork), and require a lot of logic to be implemented in asm.
Do you really find it ugly to just call function nvgprs() before calling
sys_fork() ? I guess there are things a lot uglier.
>
> So remove the optimisation for system calls, and always save NVGPRs on
> entry. Modern high performance CPUs are not so sensitive, because the
> stores are dense in cache and can be hidden by other expensive work in
> the syscall path -- the null syscall selftests benchmark on POWER9 is
> not slowed (124.40ns before and 123.64ns after, i.e., within the noise).
I did the test on PPC32:
On an 885, null_syscall reports 2227ns (132MHz)
If saving non-volatile regs, it goes to 2419, ie +8.6%
On an 8321, null_syscall reports 1021ns (333MHz)
If saving non-volatile regs, it goes to 1100, ie +7.7%
So unless going to C compensates this degradation, I guess it is not
worth it on PPC32.
>
> Other interrupts retain the NVGPR optimisation for now.
>
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
> Changes since v1:
> - Improve changelog
> - Fix clone3 spu table entry (Segher)
>
> arch/powerpc/kernel/entry_64.S | 72 +++++-------------------
> arch/powerpc/kernel/syscalls/syscall.tbl | 22 +++++---
> 2 files changed, 28 insertions(+), 66 deletions(-)
>
> diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
> index 6467bdab8d40..5a3e0b5c9ad1 100644
> --- a/arch/powerpc/kernel/entry_64.S
> +++ b/arch/powerpc/kernel/entry_64.S
> @@ -98,13 +98,14 @@ END_BTB_FLUSH_SECTION
> std r11,_XER(r1)
> std r11,_CTR(r1)
> std r9,GPR13(r1)
> + SAVE_NVGPRS(r1)
> mflr r10
> /*
> * This clears CR0.SO (bit 28), which is the error indication on
> * return from this system call.
> */
> rldimi r2,r11,28,(63-28)
> - li r11,0xc01
> + li r11,0xc00
> std r10,_LINK(r1)
> std r11,_TRAP(r1)
> std r3,ORIG_GPR3(r1)
> @@ -323,7 +324,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
>
> /* Traced system call support */
> .Lsyscall_dotrace:
> - bl save_nvgprs
> addi r3,r1,STACK_FRAME_OVERHEAD
> bl do_syscall_trace_enter
>
> @@ -408,7 +408,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
> mtmsrd r10,1
> #endif /* CONFIG_PPC_BOOK3E */
>
> - bl save_nvgprs
> addi r3,r1,STACK_FRAME_OVERHEAD
> bl do_syscall_trace_leave
> b ret_from_except
> @@ -442,62 +441,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
> _ASM_NOKPROBE_SYMBOL(system_call_common);
> _ASM_NOKPROBE_SYMBOL(system_call_exit);
>
> -/* Save non-volatile GPRs, if not already saved. */
> -_GLOBAL(save_nvgprs)
> - ld r11,_TRAP(r1)
> - andi. r0,r11,1
> - beqlr-
> - SAVE_NVGPRS(r1)
> - clrrdi r0,r11,1
> - std r0,_TRAP(r1)
> - blr
> -_ASM_NOKPROBE_SYMBOL(save_nvgprs);
I see it is added back somewhere below. Why don't you leave it where it is ?
> -
> -
> -/*
> - * The sigsuspend and rt_sigsuspend system calls can call do_signal
> - * and thus put the process into the stopped state where we might
> - * want to examine its user state with ptrace. Therefore we need
> - * to save all the nonvolatile registers (r14 - r31) before calling
> - * the C code. Similarly, fork, vfork and clone need the full
> - * register state on the stack so that it can be copied to the child.
> - */
> -
> -_GLOBAL(ppc_fork)
> - bl save_nvgprs
> - bl sys_fork
> - b .Lsyscall_exit
> -
> -_GLOBAL(ppc_vfork)
> - bl save_nvgprs
> - bl sys_vfork
> - b .Lsyscall_exit
> -
> -_GLOBAL(ppc_clone)
> - bl save_nvgprs
> - bl sys_clone
> - b .Lsyscall_exit
> -
> -_GLOBAL(ppc_clone3)
> - bl save_nvgprs
> - bl sys_clone3
> - b .Lsyscall_exit
> -
> -_GLOBAL(ppc32_swapcontext)
> - bl save_nvgprs
> - bl compat_sys_swapcontext
> - b .Lsyscall_exit
> -
> -_GLOBAL(ppc64_swapcontext)
> - bl save_nvgprs
> - bl sys_swapcontext
> - b .Lsyscall_exit
> -
> -_GLOBAL(ppc_switch_endian)
> - bl save_nvgprs
> - bl sys_switch_endian
> - b .Lsyscall_exit
> -
> _GLOBAL(ret_from_fork)
> bl schedule_tail
> REST_NVGPRS(r1)
> @@ -516,6 +459,17 @@ _GLOBAL(ret_from_kernel_thread)
> li r3,0
> b .Lsyscall_exit
>
> +/* Save non-volatile GPRs, if not already saved. */
> +_GLOBAL(save_nvgprs)
> + ld r11,_TRAP(r1)
> + andi. r0,r11,1
> + beqlr-
> + SAVE_NVGPRS(r1)
> + clrrdi r0,r11,1
> + std r0,_TRAP(r1)
> + blr
> +_ASM_NOKPROBE_SYMBOL(save_nvgprs);
> +
Moved here.
> #ifdef CONFIG_PPC_BOOK3S_64
>
> #define FLUSH_COUNT_CACHE \
Christophe
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH v2 0/4] powerpc/64: syscalls in C
2019-08-27 13:55 [PATCH v2 0/4] powerpc/64: syscalls in C Nicholas Piggin
` (3 preceding siblings ...)
2019-08-27 13:55 ` [PATCH v2 4/4] powerpc/64: system call implement the bulk of the logic in C Nicholas Piggin
@ 2019-08-28 9:06 ` Christophe Leroy
2019-08-28 9:49 ` Nicholas Piggin
4 siblings, 1 reply; 22+ messages in thread
From: Christophe Leroy @ 2019-08-28 9:06 UTC (permalink / raw)
To: Nicholas Piggin, linuxppc-dev
Le 27/08/2019 à 15:55, Nicholas Piggin a écrit :
> Accounted for some feedback.
>
> Nicholas Piggin (4):
> powerpc: convert to copy_thread_tls
> powerpc/64: remove support for kernel-mode syscalls
> powerpc/64: system call remove non-volatile GPR save optimisation
> powerpc/64: system call implement the bulk of the logic in C
Would it be possible to split in the following parts:
1/ Implement in C whatever can be implemented without removing
non-volatile GPR save optimisation
2/ Remove non-volatile GPR save optimisation
3/ Implement in C everything else
Christophe
>
> arch/powerpc/Kconfig | 1 +
> arch/powerpc/include/asm/asm-prototypes.h | 11 -
> .../powerpc/include/asm/book3s/64/kup-radix.h | 12 +-
> arch/powerpc/include/asm/cputime.h | 22 +
> arch/powerpc/include/asm/ptrace.h | 3 +
> arch/powerpc/include/asm/signal.h | 2 +
> arch/powerpc/include/asm/switch_to.h | 5 +
> arch/powerpc/include/asm/time.h | 3 +
> arch/powerpc/kernel/Makefile | 3 +-
> arch/powerpc/kernel/entry_64.S | 421 +++---------------
> arch/powerpc/kernel/exceptions-64s.S | 2 -
> arch/powerpc/kernel/process.c | 9 +-
> arch/powerpc/kernel/signal.h | 2 -
> arch/powerpc/kernel/syscall_64.c | 177 ++++++++
> arch/powerpc/kernel/syscalls/syscall.tbl | 22 +-
> 15 files changed, 307 insertions(+), 388 deletions(-)
> create mode 100644 arch/powerpc/kernel/syscall_64.c
>
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH v2 3/4] powerpc/64: system call remove non-volatile GPR save optimisation
2019-08-28 9:02 ` Christophe Leroy
@ 2019-08-28 9:32 ` Nicholas Piggin
0 siblings, 0 replies; 22+ messages in thread
From: Nicholas Piggin @ 2019-08-28 9:32 UTC (permalink / raw)
To: Christophe Leroy, linuxppc-dev
Christophe Leroy's on August 28, 2019 7:02 pm:
>
>
> Le 27/08/2019 à 15:55, Nicholas Piggin a écrit :
>> powerpc has an optimisation where interrupts avoid saving the
>> non-volatile (or callee saved) registers to the interrupt stack frame if
>> they are not required.
>>
>> Two problems with this are that an interrupt does not always know
>> whether it will need non-volatiles; and if it does need them, they can
>> only be saved from the entry-scoped asm code (because we don't control
>> what the C compiler does with these registers).
>>
>> system calls are the most difficult: some system calls always require
>> all registers (e.g., fork, to copy regs into the child). Sometimes
>> registers are only required under certain conditions (e.g., tracing,
>> signal delivery). These cases require ugly logic in the call chains
>> (e.g., ppc_fork), and require a lot of logic to be implemented in asm.
>
> Do you really find it ugly to just call function nvgprs() before calling
> sys_fork() ? I guess there are things a lot uglier.
That's not the ugly part, the ugly part is trashing the link register
and then branching directly to where it was supposed to return, which
is bad for any CPU which has a return predictor so we try to eliminate
it from the ppc64 kernel.
>> So remove the optimisation for system calls, and always save NVGPRs on
>> entry. Modern high performance CPUs are not so sensitive, because the
>> stores are dense in cache and can be hidden by other expensive work in
>> the syscall path -- the null syscall selftests benchmark on POWER9 is
>> not slowed (124.40ns before and 123.64ns after, i.e., within the noise).
>
> I did the test on PPC32:
>
> On an 885, null_syscall reports 2227ns (132MHz)
> If saving non-volatile regs, it goes to 2419, ie +8.6%
>
> On an 8321, null_syscall reports 1021ns (333MHz)
> If saving non-volatile regs, it goes to 1100, ie +7.7%
>
> So unless going to C compensates this degradation, I guess it is not
> worth it on PPC32.
Yeah that's unfortunate. It is a good optimization for small cores.
I doubt going to C would help for PPC32, probably be even slower.
>>
>> -/* Save non-volatile GPRs, if not already saved. */
>> -_GLOBAL(save_nvgprs)
>> - ld r11,_TRAP(r1)
>> - andi. r0,r11,1
>> - beqlr-
>> - SAVE_NVGPRS(r1)
>> - clrrdi r0,r11,1
>> - std r0,_TRAP(r1)
>> - blr
>> -_ASM_NOKPROBE_SYMBOL(save_nvgprs);
>
> I see it is added back somewhere below. Why don't you leave it where it is ?
No longer used by syscalls so I it out from between other syscall
related code to improve icache.
Thanks,
Nick
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH v2 4/4] powerpc/64: system call implement the bulk of the logic in C
2019-08-28 6:51 ` Christophe Leroy
@ 2019-08-28 9:41 ` Nicholas Piggin
0 siblings, 0 replies; 22+ messages in thread
From: Nicholas Piggin @ 2019-08-28 9:41 UTC (permalink / raw)
To: Christophe Leroy, linuxppc-dev
Christophe Leroy's on August 28, 2019 4:51 pm:
>
>
> Le 27/08/2019 à 15:55, Nicholas Piggin a écrit :
>> -#include <asm/reg.h>
>> +#include <asm/mmu.h>
>> +#include <asm/ptrace.h>
>> +
>> +static inline void kuap_check_amr(void)
>> +{
>> +#ifdef CONFIG_PPC_KUAP_DEBUG
>> + if (mmu_has_feature(MMU_FTR_RADIX_KUAP))
>
> Better:
>
> if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) &&
> mmu_has_feature(MMU_FTR_RADIX_KUAP))
That is better.
>> + WARN_ON_ONCE(mfspr(SPRN_AMR) != AMR_KUAP_BLOCKED);
>> +#endif
>> +}
>>
>> /*
>> * We support individually allowing read or write, but we don't support nesting
>> diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h
>> index 2431b4ada2fa..f3aa9db1a3cc 100644
>> --- a/arch/powerpc/include/asm/cputime.h
>> +++ b/arch/powerpc/include/asm/cputime.h
>> @@ -60,6 +60,28 @@ static inline void arch_vtime_task_switch(struct task_struct *prev)
>> }
>> #endif
>>
>> +static inline void account_cpu_user_entry(void)
>> +{
>> + unsigned long tb = mftb();
>> +
>> + get_accounting(current)->utime += (tb - get_accounting(current)->starttime_user);
>> + get_accounting(current)->starttime = tb;
>> +}
>
> Can you check the generated assembly ? I remember having bad result with
> get_accouting() being used several times in a arch_vtime_task_switch()
> before commit 60f1d2893ee6 ("powerpc/time: inline
> arch_vtime_task_switch()")
It's fine on 64s but it's accounting is a constant offset from r13 so
simple load/store can be done.
> Regardless, I think it would look better as:
>
> static inline void account_cpu_user_entry(void)
> {
> unsigned long tb = mftb();
> struct cpu_accounting_data *acct = get_accounting(current);
>
> acct->utime += (tb - acct->starttime_user);
> acct->starttime = tb;
> }
Yeah that's nicer.
>
>> +static inline void account_cpu_user_exit(void)
>> +{
>> + unsigned long tb = mftb();
>> +
>> + get_accounting(current)->stime += (tb - get_accounting(current)->starttime);
>> + get_accounting(current)->starttime_user = tb;
>> +}
>
> Same here.
Will do.
Thanks,
Nick
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH v2 0/4] powerpc/64: syscalls in C
2019-08-28 9:06 ` [PATCH v2 0/4] powerpc/64: syscalls " Christophe Leroy
@ 2019-08-28 9:49 ` Nicholas Piggin
2019-08-28 9:55 ` Christophe Leroy
0 siblings, 1 reply; 22+ messages in thread
From: Nicholas Piggin @ 2019-08-28 9:49 UTC (permalink / raw)
To: Christophe Leroy, linuxppc-dev
Christophe Leroy's on August 28, 2019 7:06 pm:
>
>
> Le 27/08/2019 à 15:55, Nicholas Piggin a écrit :
>> Accounted for some feedback.
>>
>> Nicholas Piggin (4):
>> powerpc: convert to copy_thread_tls
>> powerpc/64: remove support for kernel-mode syscalls
>> powerpc/64: system call remove non-volatile GPR save optimisation
>> powerpc/64: system call implement the bulk of the logic in C
>
> Would it be possible to split in the following parts:
>
> 1/ Implement in C whatever can be implemented without removing
> non-volatile GPR save optimisation
> 2/ Remove non-volatile GPR save optimisation
> 3/ Implement in C everything else
Hmm. I'll have a look but I would rather not go back and add the
intermediate state I was hoping to avoid. I'll think about it and
if it's not too difficult I will try to add something. I have an
idea.
With your nvregs performance test on ppc32, are you doing the
nvgpr restore? The fast path should be able to avoid that.
Thanks,
Nick
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH v2 0/4] powerpc/64: syscalls in C
2019-08-28 9:49 ` Nicholas Piggin
@ 2019-08-28 9:55 ` Christophe Leroy
2019-08-29 9:38 ` Nicholas Piggin
0 siblings, 1 reply; 22+ messages in thread
From: Christophe Leroy @ 2019-08-28 9:55 UTC (permalink / raw)
To: Nicholas Piggin, linuxppc-dev
Le 28/08/2019 à 11:49, Nicholas Piggin a écrit :
> Christophe Leroy's on August 28, 2019 7:06 pm:
>>
>>
>> Le 27/08/2019 à 15:55, Nicholas Piggin a écrit :
>>> Accounted for some feedback.
>>>
>>> Nicholas Piggin (4):
>>> powerpc: convert to copy_thread_tls
>>> powerpc/64: remove support for kernel-mode syscalls
>>> powerpc/64: system call remove non-volatile GPR save optimisation
>>> powerpc/64: system call implement the bulk of the logic in C
>>
>> Would it be possible to split in the following parts:
>>
>> 1/ Implement in C whatever can be implemented without removing
>> non-volatile GPR save optimisation
>> 2/ Remove non-volatile GPR save optimisation
>> 3/ Implement in C everything else
>
> Hmm. I'll have a look but I would rather not go back and add the
> intermediate state I was hoping to avoid. I'll think about it and
> if it's not too difficult I will try to add something. I have an
> idea.
>
> With your nvregs performance test on ppc32, are you doing the
> nvgpr restore? The fast path should be able to avoid that.
I only added the SAVE_NVGPRS call in the syscall entry macro just after
the saving of volatile regs, and changed the trap from \trapno+1 to \trapno
Christophe
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH v2 4/4] powerpc/64: system call implement the bulk of the logic in C
2019-08-27 13:55 ` [PATCH v2 4/4] powerpc/64: system call implement the bulk of the logic in C Nicholas Piggin
2019-08-28 6:51 ` Christophe Leroy
@ 2019-08-28 15:30 ` Michal Suchánek
2019-08-28 22:19 ` Nicholas Piggin
2019-08-30 18:48 ` kbuild test robot
2 siblings, 1 reply; 22+ messages in thread
From: Michal Suchánek @ 2019-08-28 15:30 UTC (permalink / raw)
To: Nicholas Piggin; +Cc: linuxppc-dev
On Tue, 27 Aug 2019 23:55:48 +1000
Nicholas Piggin <npiggin@gmail.com> wrote:
> System call entry and particularly exit code is beyond the limit of what
> is reasonable to implement in asm.
>
> This conversion moves all conditional branches out of the asm code,
> except for the case that all GPRs should be restored at exit.
>
> Null syscall test is about 5% faster after this patch, because the exit
> work is handled under local_irq_disable, and the hard mask and pending
> interrupt replay is handled after that, which avoids games with MSR.
>
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
> Changes since v1:
> - Improve changelog
> - Lot of code cleanups, moving helpers out to proper header locations,
> etc (Christophe).
> - Split unnecessary change that affected ppc32 out. I will submit it
> independently (Christophe).
>
> arch/powerpc/include/asm/asm-prototypes.h | 11 -
> .../powerpc/include/asm/book3s/64/kup-radix.h | 12 +-
> arch/powerpc/include/asm/cputime.h | 22 ++
> arch/powerpc/include/asm/ptrace.h | 3 +
> arch/powerpc/include/asm/signal.h | 2 +
> arch/powerpc/include/asm/switch_to.h | 5 +
> arch/powerpc/include/asm/time.h | 3 +
> arch/powerpc/kernel/Makefile | 3 +-
> arch/powerpc/kernel/entry_64.S | 340 +++---------------
> arch/powerpc/kernel/signal.h | 2 -
> arch/powerpc/kernel/syscall_64.c | 177 +++++++++
> 11 files changed, 273 insertions(+), 307 deletions(-)
> create mode 100644 arch/powerpc/kernel/syscall_64.c
>
> diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h
> index ec1c97a8e8cb..f00ef8924a99 100644
> --- a/arch/powerpc/include/asm/asm-prototypes.h
> +++ b/arch/powerpc/include/asm/asm-prototypes.h
> @@ -92,14 +92,6 @@ long sys_switch_endian(void);
> notrace unsigned int __check_irq_replay(void);
> void notrace restore_interrupts(void);
>
> -/* ptrace */
> -long do_syscall_trace_enter(struct pt_regs *regs);
> -void do_syscall_trace_leave(struct pt_regs *regs);
> -
> -/* process */
> -void restore_math(struct pt_regs *regs);
> -void restore_tm_state(struct pt_regs *regs);
> -
> /* prom_init (OpenFirmware) */
> unsigned long __init prom_init(unsigned long r3, unsigned long r4,
> unsigned long pp,
> @@ -110,9 +102,6 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
> void __init early_setup(unsigned long dt_ptr);
> void early_setup_secondary(void);
>
> -/* time */
> -void accumulate_stolen_time(void);
> -
> /* misc runtime */
> extern u64 __bswapdi2(u64);
> extern s64 __lshrdi3(s64, int);
> diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h b/arch/powerpc/include/asm/book3s/64/kup-radix.h
> index f254de956d6a..ef2e65ea8a73 100644
> --- a/arch/powerpc/include/asm/book3s/64/kup-radix.h
> +++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h
> @@ -3,6 +3,7 @@
> #define _ASM_POWERPC_BOOK3S_64_KUP_RADIX_H
>
> #include <linux/const.h>
> +#include <asm/reg.h>
>
> #define AMR_KUAP_BLOCK_READ UL(0x4000000000000000)
> #define AMR_KUAP_BLOCK_WRITE UL(0x8000000000000000)
> @@ -56,7 +57,16 @@
>
> #ifdef CONFIG_PPC_KUAP
>
> -#include <asm/reg.h>
> +#include <asm/mmu.h>
> +#include <asm/ptrace.h>
> +
> +static inline void kuap_check_amr(void)
> +{
> +#ifdef CONFIG_PPC_KUAP_DEBUG
> + if (mmu_has_feature(MMU_FTR_RADIX_KUAP))
> + WARN_ON_ONCE(mfspr(SPRN_AMR) != AMR_KUAP_BLOCKED);
> +#endif
> +}
>
> /*
> * We support individually allowing read or write, but we don't support nesting
> diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h
> index 2431b4ada2fa..f3aa9db1a3cc 100644
> --- a/arch/powerpc/include/asm/cputime.h
> +++ b/arch/powerpc/include/asm/cputime.h
> @@ -60,6 +60,28 @@ static inline void arch_vtime_task_switch(struct task_struct *prev)
> }
> #endif
>
> +static inline void account_cpu_user_entry(void)
> +{
> + unsigned long tb = mftb();
> +
> + get_accounting(current)->utime += (tb - get_accounting(current)->starttime_user);
> + get_accounting(current)->starttime = tb;
> +}
> +static inline void account_cpu_user_exit(void)
> +{
> + unsigned long tb = mftb();
> +
> + get_accounting(current)->stime += (tb - get_accounting(current)->starttime);
> + get_accounting(current)->starttime_user = tb;
> +}
> +
> #endif /* __KERNEL__ */
> +#else /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
> +static inline void account_cpu_user_entry(void)
> +{
> +}
> +static inline void account_cpu_user_exit(void)
> +{
> +}
> #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
> #endif /* __POWERPC_CPUTIME_H */
> diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h
> index feee1b21bbd5..af363086403a 100644
> --- a/arch/powerpc/include/asm/ptrace.h
> +++ b/arch/powerpc/include/asm/ptrace.h
> @@ -138,6 +138,9 @@ extern unsigned long profile_pc(struct pt_regs *regs);
> #define profile_pc(regs) instruction_pointer(regs)
> #endif
>
> +long do_syscall_trace_enter(struct pt_regs *regs);
> +void do_syscall_trace_leave(struct pt_regs *regs);
> +
> #define kernel_stack_pointer(regs) ((regs)->gpr[1])
> static inline int is_syscall_success(struct pt_regs *regs)
> {
> diff --git a/arch/powerpc/include/asm/signal.h b/arch/powerpc/include/asm/signal.h
> index 0803ca8b9149..0113be8dcb59 100644
> --- a/arch/powerpc/include/asm/signal.h
> +++ b/arch/powerpc/include/asm/signal.h
> @@ -6,4 +6,6 @@
> #include <uapi/asm/signal.h>
> #include <uapi/asm/ptrace.h>
>
> +void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags);
> +
/srv/kernel/arch/powerpc/include/asm/signal.h:9:30: warning: ‘struct pt_regs’ declared inside parameter list will not be visible outside of this definition or declaration
void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags);
uapi/asm/ptrace.h defines user_pt_regs and asm/ptrace.h pt_regs.
I am not really sure which you wanted.
Thanks
Michal
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH v2 4/4] powerpc/64: system call implement the bulk of the logic in C
2019-08-28 15:30 ` Michal Suchánek
@ 2019-08-28 22:19 ` Nicholas Piggin
0 siblings, 0 replies; 22+ messages in thread
From: Nicholas Piggin @ 2019-08-28 22:19 UTC (permalink / raw)
To: Michal Suchánek; +Cc: linuxppc-dev
Michal Suchánek's on August 29, 2019 1:30 am:
> On Tue, 27 Aug 2019 23:55:48 +1000
> Nicholas Piggin <npiggin@gmail.com> wrote:
>
>> System call entry and particularly exit code is beyond the limit of what
>> is reasonable to implement in asm.
>>
>> This conversion moves all conditional branches out of the asm code,
>> except for the case that all GPRs should be restored at exit.
>>
>> Null syscall test is about 5% faster after this patch, because the exit
>> work is handled under local_irq_disable, and the hard mask and pending
>> interrupt replay is handled after that, which avoids games with MSR.
>>
>> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
>> ---
>> Changes since v1:
>> - Improve changelog
>> - Lot of code cleanups, moving helpers out to proper header locations,
>> etc (Christophe).
>> - Split unnecessary change that affected ppc32 out. I will submit it
>> independently (Christophe).
>>
>> arch/powerpc/include/asm/asm-prototypes.h | 11 -
>> .../powerpc/include/asm/book3s/64/kup-radix.h | 12 +-
>> arch/powerpc/include/asm/cputime.h | 22 ++
>> arch/powerpc/include/asm/ptrace.h | 3 +
>> arch/powerpc/include/asm/signal.h | 2 +
>> arch/powerpc/include/asm/switch_to.h | 5 +
>> arch/powerpc/include/asm/time.h | 3 +
>> arch/powerpc/kernel/Makefile | 3 +-
>> arch/powerpc/kernel/entry_64.S | 340 +++---------------
>> arch/powerpc/kernel/signal.h | 2 -
>> arch/powerpc/kernel/syscall_64.c | 177 +++++++++
>> 11 files changed, 273 insertions(+), 307 deletions(-)
>> create mode 100644 arch/powerpc/kernel/syscall_64.c
>>
>> diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h
>> index ec1c97a8e8cb..f00ef8924a99 100644
>> --- a/arch/powerpc/include/asm/asm-prototypes.h
>> +++ b/arch/powerpc/include/asm/asm-prototypes.h
>> @@ -92,14 +92,6 @@ long sys_switch_endian(void);
>> notrace unsigned int __check_irq_replay(void);
>> void notrace restore_interrupts(void);
>>
>> -/* ptrace */
>> -long do_syscall_trace_enter(struct pt_regs *regs);
>> -void do_syscall_trace_leave(struct pt_regs *regs);
>> -
>> -/* process */
>> -void restore_math(struct pt_regs *regs);
>> -void restore_tm_state(struct pt_regs *regs);
>> -
>> /* prom_init (OpenFirmware) */
>> unsigned long __init prom_init(unsigned long r3, unsigned long r4,
>> unsigned long pp,
>> @@ -110,9 +102,6 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
>> void __init early_setup(unsigned long dt_ptr);
>> void early_setup_secondary(void);
>>
>> -/* time */
>> -void accumulate_stolen_time(void);
>> -
>> /* misc runtime */
>> extern u64 __bswapdi2(u64);
>> extern s64 __lshrdi3(s64, int);
>> diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h b/arch/powerpc/include/asm/book3s/64/kup-radix.h
>> index f254de956d6a..ef2e65ea8a73 100644
>> --- a/arch/powerpc/include/asm/book3s/64/kup-radix.h
>> +++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h
>> @@ -3,6 +3,7 @@
>> #define _ASM_POWERPC_BOOK3S_64_KUP_RADIX_H
>>
>> #include <linux/const.h>
>> +#include <asm/reg.h>
>>
>> #define AMR_KUAP_BLOCK_READ UL(0x4000000000000000)
>> #define AMR_KUAP_BLOCK_WRITE UL(0x8000000000000000)
>> @@ -56,7 +57,16 @@
>>
>> #ifdef CONFIG_PPC_KUAP
>>
>> -#include <asm/reg.h>
>> +#include <asm/mmu.h>
>> +#include <asm/ptrace.h>
>> +
>> +static inline void kuap_check_amr(void)
>> +{
>> +#ifdef CONFIG_PPC_KUAP_DEBUG
>> + if (mmu_has_feature(MMU_FTR_RADIX_KUAP))
>> + WARN_ON_ONCE(mfspr(SPRN_AMR) != AMR_KUAP_BLOCKED);
>> +#endif
>> +}
>>
>> /*
>> * We support individually allowing read or write, but we don't support nesting
>> diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h
>> index 2431b4ada2fa..f3aa9db1a3cc 100644
>> --- a/arch/powerpc/include/asm/cputime.h
>> +++ b/arch/powerpc/include/asm/cputime.h
>> @@ -60,6 +60,28 @@ static inline void arch_vtime_task_switch(struct task_struct *prev)
>> }
>> #endif
>>
>> +static inline void account_cpu_user_entry(void)
>> +{
>> + unsigned long tb = mftb();
>> +
>> + get_accounting(current)->utime += (tb - get_accounting(current)->starttime_user);
>> + get_accounting(current)->starttime = tb;
>> +}
>> +static inline void account_cpu_user_exit(void)
>> +{
>> + unsigned long tb = mftb();
>> +
>> + get_accounting(current)->stime += (tb - get_accounting(current)->starttime);
>> + get_accounting(current)->starttime_user = tb;
>> +}
>> +
>> #endif /* __KERNEL__ */
>> +#else /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
>> +static inline void account_cpu_user_entry(void)
>> +{
>> +}
>> +static inline void account_cpu_user_exit(void)
>> +{
>> +}
>> #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
>> #endif /* __POWERPC_CPUTIME_H */
>> diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h
>> index feee1b21bbd5..af363086403a 100644
>> --- a/arch/powerpc/include/asm/ptrace.h
>> +++ b/arch/powerpc/include/asm/ptrace.h
>> @@ -138,6 +138,9 @@ extern unsigned long profile_pc(struct pt_regs *regs);
>> #define profile_pc(regs) instruction_pointer(regs)
>> #endif
>>
>> +long do_syscall_trace_enter(struct pt_regs *regs);
>> +void do_syscall_trace_leave(struct pt_regs *regs);
>> +
>> #define kernel_stack_pointer(regs) ((regs)->gpr[1])
>> static inline int is_syscall_success(struct pt_regs *regs)
>> {
>> diff --git a/arch/powerpc/include/asm/signal.h b/arch/powerpc/include/asm/signal.h
>> index 0803ca8b9149..0113be8dcb59 100644
>> --- a/arch/powerpc/include/asm/signal.h
>> +++ b/arch/powerpc/include/asm/signal.h
>> @@ -6,4 +6,6 @@
>> #include <uapi/asm/signal.h>
>> #include <uapi/asm/ptrace.h>
>>
>> +void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags);
>> +
> /srv/kernel/arch/powerpc/include/asm/signal.h:9:30: warning: ‘struct pt_regs’ declared inside parameter list will not be visible outside of this definition or declaration
> void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags);
Thanks I didn't see that.
> uapi/asm/ptrace.h defines user_pt_regs and asm/ptrace.h pt_regs.
>
> I am not really sure which you wanted.
pt_regs. That is the struct that gets saved on the kernel stack by
interrupts. user_pt_regs is part of the ptrace user ABI.
Thanks,
Nick
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH v2 0/4] powerpc/64: syscalls in C
2019-08-28 9:55 ` Christophe Leroy
@ 2019-08-29 9:38 ` Nicholas Piggin
2019-08-29 10:45 ` Nicholas Piggin
2019-08-29 11:51 ` Segher Boessenkool
0 siblings, 2 replies; 22+ messages in thread
From: Nicholas Piggin @ 2019-08-29 9:38 UTC (permalink / raw)
To: Christophe Leroy, linuxppc-dev
Christophe Leroy's on August 28, 2019 7:55 pm:
>
>
> Le 28/08/2019 à 11:49, Nicholas Piggin a écrit :
>> Christophe Leroy's on August 28, 2019 7:06 pm:
>>>
>>>
>>> Le 27/08/2019 à 15:55, Nicholas Piggin a écrit :
>>>> Accounted for some feedback.
>>>>
>>>> Nicholas Piggin (4):
>>>> powerpc: convert to copy_thread_tls
>>>> powerpc/64: remove support for kernel-mode syscalls
>>>> powerpc/64: system call remove non-volatile GPR save optimisation
>>>> powerpc/64: system call implement the bulk of the logic in C
>>>
>>> Would it be possible to split in the following parts:
>>>
>>> 1/ Implement in C whatever can be implemented without removing
>>> non-volatile GPR save optimisation
>>> 2/ Remove non-volatile GPR save optimisation
>>> 3/ Implement in C everything else
>>
>> Hmm. I'll have a look but I would rather not go back and add the
>> intermediate state I was hoping to avoid. I'll think about it and
>> if it's not too difficult I will try to add something. I have an
>> idea.
>>
>> With your nvregs performance test on ppc32, are you doing the
>> nvgpr restore? The fast path should be able to avoid that.
>
> I only added the SAVE_NVGPRS call in the syscall entry macro just after
> the saving of volatile regs, and changed the trap from \trapno+1 to \trapno
So... this actually seems to work. Haven't booted it, but the compiler
seems to do what we want.
This may be the way to go for ppc32 I think. I had a look at various
ways you could save nvgprs with some early tests and returning early
from the C call if it hits trouble without all registers saved. Most of
it becomes quite ugly.
Thanks,
Nick
diff --git a/arch/powerpc/kernel/syscall_64.c b/arch/powerpc/kernel/syscall_64.c
index d42519b86ddd..b11346447882 100644
--- a/arch/powerpc/kernel/syscall_64.c
+++ b/arch/powerpc/kernel/syscall_64.c
@@ -14,6 +14,52 @@ extern void __noreturn tabort_syscall(void);
typedef long (*syscall_fn)(long, long, long, long, long, long);
+register unsigned long r31 asm("r31");
+register unsigned long r30 asm("r30");
+register unsigned long r29 asm("r29");
+register unsigned long r28 asm("r28");
+register unsigned long r27 asm("r27");
+register unsigned long r26 asm("r26");
+register unsigned long r25 asm("r25");
+register unsigned long r24 asm("r24");
+register unsigned long r23 asm("r23");
+register unsigned long r22 asm("r22");
+register unsigned long r21 asm("r21");
+register unsigned long r20 asm("r20");
+register unsigned long r19 asm("r19");
+register unsigned long r18 asm("r18");
+register unsigned long r17 asm("r17");
+register unsigned long r16 asm("r16");
+register unsigned long r15 asm("r15");
+register unsigned long r14 asm("r14");
+
+static inline void save_nvgprs(struct pt_regs *regs)
+{
+ if (!(regs->trap & 1))
+ return;
+
+ regs->gpr[14] = r14;
+ regs->gpr[15] = r15;
+ regs->gpr[16] = r16;
+ regs->gpr[17] = r17;
+ regs->gpr[18] = r18;
+ regs->gpr[19] = r19;
+ regs->gpr[20] = r20;
+ regs->gpr[21] = r21;
+ regs->gpr[22] = r22;
+ regs->gpr[23] = r23;
+ regs->gpr[24] = r24;
+ regs->gpr[25] = r25;
+ regs->gpr[26] = r26;
+ regs->gpr[27] = r27;
+ regs->gpr[28] = r28;
+ regs->gpr[29] = r29;
+ regs->gpr[30] = r30;
+ regs->gpr[31] = r31;
+
+ regs->trap &= 0x1;
+}
+
long system_call_exception(long r3, long r4, long r5, long r6, long r7, long r8, unsigned long r0, struct pt_regs *regs)
{
unsigned long ti_flags;
@@ -66,6 +112,7 @@ long system_call_exception(long r3, long r4, long r5, long r6, long r7, long r8,
* do_syscall_trace_enter() returns an invalid syscall number
* and the test below against NR_syscalls will fail.
*/
+ save_nvgprs(regs);
r0 = do_syscall_trace_enter(regs);
}
@@ -132,6 +179,7 @@ unsigned long syscall_exit_prepare(unsigned long r3, struct pt_regs *regs)
if (ti_flags & _TIF_NEED_RESCHED) {
schedule();
} else {
+ save_nvgprs(regs);
/*
* SIGPENDING must restore signal handler function
* argument GPRs, and some non-volatiles (e.g., r1).
^ permalink raw reply related [flat|nested] 22+ messages in thread
* Re: [PATCH v2 0/4] powerpc/64: syscalls in C
2019-08-29 9:38 ` Nicholas Piggin
@ 2019-08-29 10:45 ` Nicholas Piggin
2019-08-29 11:51 ` Segher Boessenkool
1 sibling, 0 replies; 22+ messages in thread
From: Nicholas Piggin @ 2019-08-29 10:45 UTC (permalink / raw)
To: Christophe Leroy, linuxppc-dev
Nicholas Piggin's on August 29, 2019 7:38 pm:
> Christophe Leroy's on August 28, 2019 7:55 pm:
>>
>>
>> Le 28/08/2019 à 11:49, Nicholas Piggin a écrit :
>>> Christophe Leroy's on August 28, 2019 7:06 pm:
>>>>
>>>>
>>>> Le 27/08/2019 à 15:55, Nicholas Piggin a écrit :
>>>>> Accounted for some feedback.
>>>>>
>>>>> Nicholas Piggin (4):
>>>>> powerpc: convert to copy_thread_tls
>>>>> powerpc/64: remove support for kernel-mode syscalls
>>>>> powerpc/64: system call remove non-volatile GPR save optimisation
>>>>> powerpc/64: system call implement the bulk of the logic in C
>>>>
>>>> Would it be possible to split in the following parts:
>>>>
>>>> 1/ Implement in C whatever can be implemented without removing
>>>> non-volatile GPR save optimisation
>>>> 2/ Remove non-volatile GPR save optimisation
>>>> 3/ Implement in C everything else
>>>
>>> Hmm. I'll have a look but I would rather not go back and add the
>>> intermediate state I was hoping to avoid. I'll think about it and
>>> if it's not too difficult I will try to add something. I have an
>>> idea.
>>>
>>> With your nvregs performance test on ppc32, are you doing the
>>> nvgpr restore? The fast path should be able to avoid that.
>>
>> I only added the SAVE_NVGPRS call in the syscall entry macro just after
>> the saving of volatile regs, and changed the trap from \trapno+1 to \trapno
>
> So... this actually seems to work. Haven't booted it, but the compiler
> seems to do what we want.
>
Here's a really quick start for ppc32. The interrupt handling is
different enough it may be hard to merge entirely with ppc64, but
it's not really much code anyway.
Unfortunately can't restore full registers using the same method,
because we have some others like lr and cr, so the exit still must
return a code to asm.
---
arch/powerpc/kernel/Makefile | 2 +-
arch/powerpc/kernel/syscall_32.c | 167 +++++++++++++++++++++++++++++++
2 files changed, 168 insertions(+), 1 deletion(-)
create mode 100644 arch/powerpc/kernel/syscall_32.c
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 7f53cc07f933..83d5808654ec 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -107,7 +107,7 @@ extra-y += vmlinux.lds
obj-$(CONFIG_RELOCATABLE) += reloc_$(BITS).o
-obj-$(CONFIG_PPC32) += entry_32.o setup_32.o early_32.o
+obj-$(CONFIG_PPC32) += entry_32.o setup_32.o early_32.o syscall_32.o
obj-$(CONFIG_PPC64) += dma-iommu.o iommu.o
obj-$(CONFIG_KGDB) += kgdb.o
obj-$(CONFIG_BOOTX_TEXT) += btext.o
diff --git a/arch/powerpc/kernel/syscall_32.c b/arch/powerpc/kernel/syscall_32.c
new file mode 100644
index 000000000000..ff37edac76c8
--- /dev/null
+++ b/arch/powerpc/kernel/syscall_32.c
@@ -0,0 +1,167 @@
+#include <linux/err.h>
+#include <asm/cputime.h>
+#include <asm/hw_irq.h>
+#include <asm/ptrace.h>
+#include <asm/reg.h>
+#include <asm/signal.h>
+#include <asm/switch_to.h>
+#include <asm/syscall.h>
+#include <asm/time.h>
+
+typedef long (*syscall_fn)(long, long, long, long, long, long);
+
+register unsigned long r31 asm("r31");
+register unsigned long r30 asm("r30");
+register unsigned long r29 asm("r29");
+register unsigned long r28 asm("r28");
+register unsigned long r27 asm("r27");
+register unsigned long r26 asm("r26");
+register unsigned long r25 asm("r25");
+register unsigned long r24 asm("r24");
+register unsigned long r23 asm("r23");
+register unsigned long r22 asm("r22");
+register unsigned long r21 asm("r21");
+register unsigned long r20 asm("r20");
+register unsigned long r19 asm("r19");
+register unsigned long r18 asm("r18");
+register unsigned long r17 asm("r17");
+register unsigned long r16 asm("r16");
+register unsigned long r15 asm("r15");
+register unsigned long r14 asm("r14");
+register unsigned long r13 asm("r13");
+
+static void save_nvgprs(struct pt_regs *regs)
+{
+ if (!(regs->trap & 1))
+ return;
+ regs->trap &= ~0x1;
+
+ asm volatile("stmw 13, %0" : : "m"(regs->gpr[13]) : "memory");
+}
+
+long system_call_exception(long r3, long r4, long r5, long r6, long r7, long r8, unsigned long r0, struct pt_regs *regs)
+{
+ unsigned long ti_flags;
+ syscall_fn f;
+
+ BUG_ON(irqs_disabled());
+ BUG_ON(!(regs->msr & MSR_PR));
+ BUG_ON(!(regs->msr & MSR_EE));
+
+ ti_flags = current_thread_info()->flags;
+ if (unlikely(ti_flags & _TIF_SYSCALL_DOTRACE)) {
+ /*
+ * We use the return value of do_syscall_trace_enter() as the
+ * syscall number. If the syscall was rejected for any reason
+ * do_syscall_trace_enter() returns an invalid syscall number
+ * and the test below against NR_syscalls will fail.
+ */
+ save_nvgprs(regs);
+ r0 = do_syscall_trace_enter(regs);
+ }
+
+ if (unlikely(r0 >= NR_syscalls))
+ return -ENOSYS;
+
+ /* May be faster to do array_index_nospec? */
+ barrier_nospec();
+
+ f = (void *)sys_call_table[r0];
+
+ return f(r3, r4, r5, r6, r7, r8);
+}
+
+static inline void load_dbcr0(void)
+{
+ /* blah */
+}
+
+unsigned long syscall_exit_prepare(unsigned long r3, struct pt_regs *regs)
+{
+ unsigned long *ti_flagsp = ¤t_thread_info()->flags;
+ unsigned long ti_flags;
+ unsigned long ret = 0;
+
+ regs->result = r3;
+
+ /* Check whether the syscall is issued inside a restartable sequence */
+ rseq_syscall(regs);
+
+ ti_flags = *ti_flagsp;
+
+ if (unlikely(r3 >= (unsigned long)-MAX_ERRNO)) {
+ if (likely(!(ti_flags & (_TIF_NOERROR | _TIF_RESTOREALL)))) {
+ r3 = -r3;
+ regs->ccr |= 0x10000000; /* Set SO bit in CR */
+ }
+ }
+
+ if (unlikely(ti_flags & _TIF_PERSYSCALL_MASK)) {
+ if (ti_flags & _TIF_RESTOREALL)
+ ret = _TIF_RESTOREALL;
+ else
+ regs->gpr[3] = r3;
+ clear_bits(_TIF_PERSYSCALL_MASK, ti_flagsp);
+ } else {
+ regs->gpr[3] = r3;
+ }
+
+ if (unlikely(ti_flags & _TIF_SYSCALL_DOTRACE)) {
+ save_nvgprs(regs);
+ do_syscall_trace_leave(regs);
+ }
+
+ local_irq_disable();
+ ti_flags = READ_ONCE(*ti_flagsp);
+ while (unlikely(ti_flags & _TIF_USER_WORK_MASK)) {
+ local_irq_enable();
+ if (ti_flags & _TIF_NEED_RESCHED) {
+ schedule();
+ } else {
+ save_nvgprs(regs);
+ /*
+ * SIGPENDING must restore signal handler function
+ * argument GPRs, and some non-volatiles (e.g., r1).
+ * Restore all for now. This could be made lighter.
+ */
+ if (ti_flags & _TIF_SIGPENDING)
+ ret |= _TIF_RESTOREALL;
+ do_notify_resume(regs, ti_flags);
+ }
+ local_irq_disable();
+ ti_flags = READ_ONCE(*ti_flagsp);
+ }
+
+ WARN_ON(!(regs->msr & MSR_EE)); /* don't do this */
+
+ /* Tell lockdep IRQs are being enabled when we RFI */
+ trace_hardirqs_on();
+
+#if 0
+ if (IS_ENABLED(CONFIG_4xx) || IS_ENABLED(CONFIG_BOOKE)) {
+ struct thread_struct *t = ¤t->thread;
+
+ /*
+ * If the process has its own DBCR0 value, load it up. The
+ * internal debug mode bit tells us that dbcr0 should be
+ * loaded.
+ */
+ if (unlikely(t->debug.dbcr0 & DBCR0_IDM))
+ load_dbcr0();
+ }
+
+ if (IS_ENABLED(CONFIG_4xx) && !mmu_has_feature(MMU_FTR_TYPE_47x)) {
+ if (unlikely(icache_44x_need_flush))
+ flush_icache_44x();
+ }
+
+ if (IS_ENABLED(PPC_BOOK3S_32))
+ kuep_unlock();
+
+ kuap_check();
+#endif
+
+ account_cpu_user_exit();
+
+ return ret;
+}
--
2.22.0
^ permalink raw reply related [flat|nested] 22+ messages in thread
* Re: [PATCH v2 0/4] powerpc/64: syscalls in C
2019-08-29 9:38 ` Nicholas Piggin
2019-08-29 10:45 ` Nicholas Piggin
@ 2019-08-29 11:51 ` Segher Boessenkool
2019-08-29 15:49 ` Nicholas Piggin
1 sibling, 1 reply; 22+ messages in thread
From: Segher Boessenkool @ 2019-08-29 11:51 UTC (permalink / raw)
To: Nicholas Piggin; +Cc: linuxppc-dev
On Thu, Aug 29, 2019 at 07:38:01PM +1000, Nicholas Piggin wrote:
> So... this actually seems to work. Haven't booted it, but the compiler
> seems to do what we want.
From the GCC manual:
After defining a global register variable, for the current compilation
unit:
* If the register is a call-saved register, call ABI is affected: the
register will not be restored in function epilogue sequences after
the variable has been assigned. Therefore, functions cannot safely
return to callers that assume standard ABI.
and
* Accesses to the variable may be optimized as usual and the register
remains available for allocation and use in any computations,
provided that observable values of the variable are not affected.
This doesn't do what you think, or what you want, or what you think you
want ;-)
(And if you make all those regs -ffixed-* you are in for a world of hurt).
Segher
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH v2 0/4] powerpc/64: syscalls in C
2019-08-29 11:51 ` Segher Boessenkool
@ 2019-08-29 15:49 ` Nicholas Piggin
2019-08-29 22:56 ` Nicholas Piggin
0 siblings, 1 reply; 22+ messages in thread
From: Nicholas Piggin @ 2019-08-29 15:49 UTC (permalink / raw)
To: Segher Boessenkool; +Cc: linuxppc-dev
Segher Boessenkool's on August 29, 2019 9:51 pm:
> On Thu, Aug 29, 2019 at 07:38:01PM +1000, Nicholas Piggin wrote:
>> So... this actually seems to work. Haven't booted it, but the compiler
>> seems to do what we want.
>
> From the GCC manual:
>
> After defining a global register variable, for the current compilation
> unit:
>
> * If the register is a call-saved register, call ABI is affected: the
> register will not be restored in function epilogue sequences after
> the variable has been assigned. Therefore, functions cannot safely
> return to callers that assume standard ABI.
>
> and
>
> * Accesses to the variable may be optimized as usual and the register
> remains available for allocation and use in any computations,
> provided that observable values of the variable are not affected.
>
> This doesn't do what you think, or what you want, or what you think you
> want ;-)
After reading gcc docs from gcc 4 to 9, I think it does.
We want this to apply to all functions in the compilaition unit. It's
fine to use the regs temporarily, and so it's fine for called functions
in other units to call them (because they will be restored), and we
don't want them restored for our caller.
> (And if you make all those regs -ffixed-* you are in for a world of hurt).
From the look of it, -ffixed would be a little bit stronger in that it
will never allow the register to be used, wheras the global register
variable allows it to be allocated and used elsewhere so long as its
observable value is not affected. The latter is actually preferred
because it's fine for the compiler to use the regs if it wants to. It
does not even have to use r15 register when I reference r15 variable,
only whatever value it had.
Thanks,
Nick
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH v2 0/4] powerpc/64: syscalls in C
2019-08-29 15:49 ` Nicholas Piggin
@ 2019-08-29 22:56 ` Nicholas Piggin
0 siblings, 0 replies; 22+ messages in thread
From: Nicholas Piggin @ 2019-08-29 22:56 UTC (permalink / raw)
To: Segher Boessenkool; +Cc: linuxppc-dev
Nicholas Piggin's on August 30, 2019 1:49 am:
>> This doesn't do what you think, or what you want, or what you think you
>> want ;-)
>
> After reading gcc docs from gcc 4 to 9, I think it does.
>
> We want this to apply to all functions in the compilaition unit. It's
> fine to use the regs temporarily, and so it's fine for called functions
> in other units to call them (because they will be restored), and we
> don't want them restored for our caller.
>
>> (And if you make all those regs -ffixed-* you are in for a world of hurt).
>
> From the look of it, -ffixed would be a little bit stronger in that it
> will never allow the register to be used, wheras the global register
> variable allows it to be allocated and used elsewhere so long as its
> observable value is not affected. The latter is actually preferred
> because it's fine for the compiler to use the regs if it wants to. It
> does not even have to use r15 register when I reference r15 variable,
> only whatever value it had.
Ah, of course that means all of them need to be inputs to the stmw
asm (which makes sense and is documented in gcc-9 doc, which is when
the compiler was allowed to allocate them for other things.
Thanks,
Nick
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH v2 4/4] powerpc/64: system call implement the bulk of the logic in C
2019-08-27 13:55 ` [PATCH v2 4/4] powerpc/64: system call implement the bulk of the logic in C Nicholas Piggin
2019-08-28 6:51 ` Christophe Leroy
2019-08-28 15:30 ` Michal Suchánek
@ 2019-08-30 18:48 ` kbuild test robot
2019-08-30 20:04 ` Michal Suchánek
2 siblings, 1 reply; 22+ messages in thread
From: kbuild test robot @ 2019-08-30 18:48 UTC (permalink / raw)
To: Nicholas Piggin; +Cc: linuxppc-dev, kbuild-all, Nicholas Piggin
[-- Attachment #1: Type: text/plain, Size: 1258 bytes --]
Hi Nicholas,
I love your patch! Yet something to improve:
[auto build test ERROR on linus/master]
[cannot apply to v5.3-rc6 next-20190830]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]
url: https://github.com/0day-ci/linux/commits/Nicholas-Piggin/powerpc-64-syscalls-in-C/20190828-064221
config: powerpc64-defconfig (attached as .config)
compiler: powerpc64-linux-gcc (GCC) 7.4.0
reproduce:
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
GCC_VERSION=7.4.0 make.cross ARCH=powerpc64
If you fix the issue, kindly add following tag
Reported-by: kbuild test robot <lkp@intel.com>
All errors (new ones prefixed by >>):
powerpc64-linux-ld: warning: orphan section `.gnu.hash' from `linker stubs' being placed in section `.gnu.hash'.
arch/powerpc/kernel/syscall_64.o: In function `.system_call_exception':
>> syscall_64.c:(.text+0x180): undefined reference to `.tabort_syscall'
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation
[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 25347 bytes --]
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH v2 4/4] powerpc/64: system call implement the bulk of the logic in C
2019-08-30 18:48 ` kbuild test robot
@ 2019-08-30 20:04 ` Michal Suchánek
2019-09-02 10:20 ` Michael Ellerman
0 siblings, 1 reply; 22+ messages in thread
From: Michal Suchánek @ 2019-08-30 20:04 UTC (permalink / raw)
To: kbuild test robot; +Cc: linuxppc-dev, kbuild-all, Nicholas Piggin
On Sat, 31 Aug 2019 02:48:26 +0800
kbuild test robot <lkp@intel.com> wrote:
> Hi Nicholas,
>
> I love your patch! Yet something to improve:
>
> [auto build test ERROR on linus/master]
> [cannot apply to v5.3-rc6 next-20190830]
> [if your patch is applied to the wrong git tree, please drop us a note to help improve the system]
>
> url: https://github.com/0day-ci/linux/commits/Nicholas-Piggin/powerpc-64-syscalls-in-C/20190828-064221
> config: powerpc64-defconfig (attached as .config)
> compiler: powerpc64-linux-gcc (GCC) 7.4.0
> reproduce:
> wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
> chmod +x ~/bin/make.cross
> # save the attached .config to linux build tree
> GCC_VERSION=7.4.0 make.cross ARCH=powerpc64
>
> If you fix the issue, kindly add following tag
> Reported-by: kbuild test robot <lkp@intel.com>
>
> All errors (new ones prefixed by >>):
>
> powerpc64-linux-ld: warning: orphan section `.gnu.hash' from `linker stubs' being placed in section `.gnu.hash'.
> arch/powerpc/kernel/syscall_64.o: In function `.system_call_exception':
> >> syscall_64.c:(.text+0x180): undefined reference to `.tabort_syscall'
Interestingly it builds and boots for me. Is this something about
dotted vs dotless symbols depending on some config options?
I see there is _GLOBAL(ret_from_fork) just below so maybe we need
_GLOBAL(tabort_syscall) instead of .globl tabort_syscall as well.
Thanks
Michal
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH v2 4/4] powerpc/64: system call implement the bulk of the logic in C
2019-08-30 20:04 ` Michal Suchánek
@ 2019-09-02 10:20 ` Michael Ellerman
0 siblings, 0 replies; 22+ messages in thread
From: Michael Ellerman @ 2019-09-02 10:20 UTC (permalink / raw)
To: Michal Suchánek, kbuild test robot
Cc: linuxppc-dev, kbuild-all, Nicholas Piggin
Michal Suchánek <msuchanek@suse.de> writes:
> On Sat, 31 Aug 2019 02:48:26 +0800
> kbuild test robot <lkp@intel.com> wrote:
>
>> Hi Nicholas,
>>
>> I love your patch! Yet something to improve:
>>
>> [auto build test ERROR on linus/master]
>> [cannot apply to v5.3-rc6 next-20190830]
>> [if your patch is applied to the wrong git tree, please drop us a note to help improve the system]
>>
>> url: https://github.com/0day-ci/linux/commits/Nicholas-Piggin/powerpc-64-syscalls-in-C/20190828-064221
>> config: powerpc64-defconfig (attached as .config)
>> compiler: powerpc64-linux-gcc (GCC) 7.4.0
>> reproduce:
>> wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
>> chmod +x ~/bin/make.cross
>> # save the attached .config to linux build tree
>> GCC_VERSION=7.4.0 make.cross ARCH=powerpc64
>>
>> If you fix the issue, kindly add following tag
>> Reported-by: kbuild test robot <lkp@intel.com>
>>
>> All errors (new ones prefixed by >>):
>>
>> powerpc64-linux-ld: warning: orphan section `.gnu.hash' from `linker stubs' being placed in section `.gnu.hash'.
>> arch/powerpc/kernel/syscall_64.o: In function `.system_call_exception':
>> >> syscall_64.c:(.text+0x180): undefined reference to `.tabort_syscall'
>
> Interestingly it builds and boots for me. Is this something about
> dotted vs dotless symbols depending on some config options?
It's the big endian build that fails, which is ELFv1, and the linker is
saying it can't find a function called `.tabort_syscall` - which is
understandable because there isn't one. There's a text address called
`tabort_syscall` but it has no function descriptor so you can't call it
normally from C.
> I see there is _GLOBAL(ret_from_fork) just below so maybe we need
> _GLOBAL(tabort_syscall) instead of .globl tabort_syscall as well.
Yes, on ELFv1 the _GLOBAL macros creates a function descriptor for you.
This fixes it for me:
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 82bcb9a68172..8f2735da205d 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -170,8 +170,7 @@ _ASM_NOKPROBE_SYMBOL(system_call_common);
b .Lsyscall_restore_regs_cont
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- .globl tabort_syscall
-tabort_syscall:
+_GLOBAL(tabort_syscall)
/* Firstly we need to enable TM in the kernel */
mfmsr r10
li r9, 1
cheers
^ permalink raw reply related [flat|nested] 22+ messages in thread
end of thread, other threads:[~2019-09-02 10:22 UTC | newest]
Thread overview: 22+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-08-27 13:55 [PATCH v2 0/4] powerpc/64: syscalls in C Nicholas Piggin
2019-08-27 13:55 ` [PATCH v2 1/4] powerpc: convert to copy_thread_tls Nicholas Piggin
2019-08-27 13:55 ` [PATCH v2 2/4] powerpc/64: remove support for kernel-mode syscalls Nicholas Piggin
2019-08-27 13:55 ` [PATCH v2 3/4] powerpc/64: system call remove non-volatile GPR save optimisation Nicholas Piggin
2019-08-28 9:02 ` Christophe Leroy
2019-08-28 9:32 ` Nicholas Piggin
2019-08-27 13:55 ` [PATCH v2 4/4] powerpc/64: system call implement the bulk of the logic in C Nicholas Piggin
2019-08-28 6:51 ` Christophe Leroy
2019-08-28 9:41 ` Nicholas Piggin
2019-08-28 15:30 ` Michal Suchánek
2019-08-28 22:19 ` Nicholas Piggin
2019-08-30 18:48 ` kbuild test robot
2019-08-30 20:04 ` Michal Suchánek
2019-09-02 10:20 ` Michael Ellerman
2019-08-28 9:06 ` [PATCH v2 0/4] powerpc/64: syscalls " Christophe Leroy
2019-08-28 9:49 ` Nicholas Piggin
2019-08-28 9:55 ` Christophe Leroy
2019-08-29 9:38 ` Nicholas Piggin
2019-08-29 10:45 ` Nicholas Piggin
2019-08-29 11:51 ` Segher Boessenkool
2019-08-29 15:49 ` Nicholas Piggin
2019-08-29 22:56 ` Nicholas Piggin
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).