linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC WIP PATCH] powerpc/32: system call implement entry/exit logic in C
@ 2020-03-31 15:22 Christophe Leroy
  2020-04-01 11:48 ` Christophe Leroy
  0 siblings, 1 reply; 4+ messages in thread
From: Christophe Leroy @ 2020-03-31 15:22 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
	npiggin, msuchanek
  Cc: linux-kernel, linuxppc-dev

That's first try to port PPC64 syscall entry/exit logic in C to PPC32.
I've do the minimum to get it work. I have not reworked calls
to sys_fork() and friends for instance.

For the time being, it seems to work more or less but:
- ping reports EINVAL on recvfrom
- strace shows NULL instead of strings in call like open() for instance.
- the performance is definitively bad

On an 8xx, null_syscall test is about 30% slower after this patch:
- Without the patch: 284 cycles
- With the patch: 371 cycles

@nick and others, any suggestion to fix and improve ?

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
 arch/powerpc/include/asm/book3s/32/kup.h      |  21 ++
 .../powerpc/include/asm/book3s/64/kup-radix.h |  12 +-
 arch/powerpc/include/asm/hw_irq.h             |  15 +
 arch/powerpc/include/asm/kup.h                |   2 +
 arch/powerpc/include/asm/nohash/32/kup-8xx.h  |  13 +
 arch/powerpc/kernel/Makefile                  |   5 +-
 arch/powerpc/kernel/entry_32.S                | 259 ++----------------
 arch/powerpc/kernel/head_32.h                 |   3 +-
 .../kernel/{syscall_64.c => syscall.c}        |  25 +-
 9 files changed, 102 insertions(+), 253 deletions(-)
 rename arch/powerpc/kernel/{syscall_64.c => syscall.c} (97%)

diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h
index 3c0ba22dc360..c85bc5b56366 100644
--- a/arch/powerpc/include/asm/book3s/32/kup.h
+++ b/arch/powerpc/include/asm/book3s/32/kup.h
@@ -102,6 +102,27 @@ static inline void kuap_update_sr(u32 sr, u32 addr, u32 end)
 	isync();	/* Context sync required after mtsrin() */
 }
 
+static inline void kuap_restore(struct pt_regs *regs)
+{
+	u32 kuap = current->thread.kuap;
+	u32 addr = kuap & 0xf0000000;
+	u32 end = kuap << 28;
+
+	if (unlikely(!kuap))
+		return;
+
+	current->thread.kuap = 0;
+	kuap_update_sr(mfsrin(addr) & ~SR_KS, addr, end);	/* Clear Ks */
+}
+
+static inline void kuap_check(void)
+{
+	if (!IS_ENABLED(CONFIG_PPC_KUAP_DEBUG))
+		return;
+
+	WARN_ON_ONCE(current->thread.kuap != 0);
+}
+
 static __always_inline void allow_user_access(void __user *to, const void __user *from,
 					      u32 size, unsigned long dir)
 {
diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h b/arch/powerpc/include/asm/book3s/64/kup-radix.h
index 3bcef989a35d..1f2716a0dcd8 100644
--- a/arch/powerpc/include/asm/book3s/64/kup-radix.h
+++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h
@@ -60,13 +60,13 @@
 #include <asm/mmu.h>
 #include <asm/ptrace.h>
 
-static inline void kuap_restore_amr(struct pt_regs *regs)
+static inline void kuap_restore(struct pt_regs *regs)
 {
 	if (mmu_has_feature(MMU_FTR_RADIX_KUAP))
 		mtspr(SPRN_AMR, regs->kuap);
 }
 
-static inline void kuap_check_amr(void)
+static inline void kuap_check(void)
 {
 	if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) && mmu_has_feature(MMU_FTR_RADIX_KUAP))
 		WARN_ON_ONCE(mfspr(SPRN_AMR) != AMR_KUAP_BLOCKED);
@@ -141,14 +141,6 @@ bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
 		    (regs->kuap & (is_write ? AMR_KUAP_BLOCK_WRITE : AMR_KUAP_BLOCK_READ)),
 		    "Bug: %s fault blocked by AMR!", is_write ? "Write" : "Read");
 }
-#else /* CONFIG_PPC_KUAP */
-static inline void kuap_restore_amr(struct pt_regs *regs)
-{
-}
-
-static inline void kuap_check_amr(void)
-{
-}
 #endif /* CONFIG_PPC_KUAP */
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index e0e71777961f..6ccf07de6665 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -321,6 +321,16 @@ static inline void arch_local_irq_disable(void)
 		mtmsr(mfmsr() & ~MSR_EE);
 }
 
+static inline void arch_local_recovery_disable(void)
+{
+	if (IS_ENABLED(CONFIG_BOOKE))
+		wrtee(0);
+	else if (IS_ENABLED(CONFIG_PPC_8xx))
+		wrtspr(SPRN_NRI);
+	else
+		mtmsr(mfmsr() & ~(MSR_EE | MSR_RI));
+}
+
 static inline void arch_local_irq_enable(void)
 {
 	if (IS_ENABLED(CONFIG_BOOKE))
@@ -343,6 +353,11 @@ static inline bool arch_irqs_disabled(void)
 
 #define hard_irq_disable()		arch_local_irq_disable()
 
+#define __hard_irq_enable()		arch_local_irq_enable()
+#define __hard_irq_disable()		arch_local_irq_disable()
+#define __hard_EE_RI_disable()		arch_local_recovery_disable()
+#define __hard_RI_enable()		arch_local_irq_disable()
+
 static inline bool arch_irq_disabled_regs(struct pt_regs *regs)
 {
 	return !(regs->msr & MSR_EE);
diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h
index 92bcd1a26d73..1100c13b6d9e 100644
--- a/arch/powerpc/include/asm/kup.h
+++ b/arch/powerpc/include/asm/kup.h
@@ -62,6 +62,8 @@ bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
 {
 	return false;
 }
+static inline void kuap_restore(struct pt_regs *regs) { }
+static inline void kuap_check(void) { }
 #endif /* CONFIG_PPC_KUAP */
 
 static inline void allow_read_from_user(const void __user *from, unsigned long size)
diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
index 85ed2390fb99..1918d2e55da3 100644
--- a/arch/powerpc/include/asm/nohash/32/kup-8xx.h
+++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
@@ -34,6 +34,19 @@
 
 #include <asm/reg.h>
 
+static inline void kuap_restore(struct pt_regs *regs)
+{
+	mtspr(SPRN_MD_AP, regs->kuap);
+}
+
+static inline void kuap_check(void)
+{
+	if (!IS_ENABLED(CONFIG_PPC_KUAP_DEBUG))
+		return;
+
+	WARN_ON_ONCE((mfspr(SPRN_MD_AP) & 0xffff0000) != (MD_APG_KUAP & 0xffff0000));
+}
+
 static inline void allow_user_access(void __user *to, const void __user *from,
 				     unsigned long size, unsigned long dir)
 {
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 570660efbb3d..e4be425b7718 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -45,11 +45,10 @@ obj-y				:= cputable.o syscalls.o \
 				   signal.o sysfs.o cacheinfo.o time.o \
 				   prom.o traps.o setup-common.o \
 				   udbg.o misc.o io.o misc_$(BITS).o \
-				   of_platform.o prom_parse.o
+				   of_platform.o prom_parse.o syscall.o
 obj-y				+= ptrace/
 obj-$(CONFIG_PPC64)		+= setup_64.o sys_ppc32.o signal_64.o \
-				   paca.o nvram_64.o firmware.o note.o \
-				   syscall_64.o
+				   paca.o nvram_64.o firmware.o note.o
 obj-$(CONFIG_VDSO32)		+= vdso32/
 obj-$(CONFIG_PPC_WATCHDOG)	+= watchdog.o
 obj-$(CONFIG_HAVE_HW_BREAKPOINT)	+= hw_breakpoint.o
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index a6371fb8f761..103f5158bc44 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -315,162 +315,37 @@ stack_ovf:
 	RFI
 #endif
 
-#ifdef CONFIG_TRACE_IRQFLAGS
-trace_syscall_entry_irq_off:
-	/*
-	 * Syscall shouldn't happen while interrupts are disabled,
-	 * so let's do a warning here.
-	 */
-0:	trap
-	EMIT_BUG_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING
-	bl	trace_hardirqs_on
-
-	/* Now enable for real */
-	LOAD_REG_IMMEDIATE(r10, MSR_KERNEL | MSR_EE)
-	mtmsr	r10
-
-	REST_GPR(0, r1)
-	REST_4GPRS(3, r1)
-	REST_2GPRS(7, r1)
-	b	DoSyscall
-#endif /* CONFIG_TRACE_IRQFLAGS */
-
 	.globl	transfer_to_syscall
 transfer_to_syscall:
-#ifdef CONFIG_TRACE_IRQFLAGS
-	andi.	r12,r9,MSR_EE
-	beq-	trace_syscall_entry_irq_off
-#endif /* CONFIG_TRACE_IRQFLAGS */
-
-/*
- * Handle a system call.
- */
-	.stabs	"arch/powerpc/kernel/",N_SO,0,0,0f
-	.stabs	"entry_32.S",N_SO,0,0,0f
-0:
-
-_GLOBAL(DoSyscall)
-	stw	r3,ORIG_GPR3(r1)
-	li	r12,0
-	stw	r12,RESULT(r1)
-#ifdef CONFIG_TRACE_IRQFLAGS
-	/* Make sure interrupts are enabled */
-	mfmsr	r11
-	andi.	r12,r11,MSR_EE
-	/* We came in with interrupts disabled, we WARN and mark them enabled
-	 * for lockdep now */
-0:	tweqi	r12, 0
-	EMIT_BUG_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING
-#endif /* CONFIG_TRACE_IRQFLAGS */
-	lwz	r11,TI_FLAGS(r2)
-	andi.	r11,r11,_TIF_SYSCALL_DOTRACE
-	bne-	syscall_dotrace
-syscall_dotrace_cont:
-	cmplwi	0,r0,NR_syscalls
-	lis	r10,sys_call_table@h
-	ori	r10,r10,sys_call_table@l
-	slwi	r0,r0,2
-	bge-	66f
-
-	barrier_nospec_asm
-	/*
-	 * Prevent the load of the handler below (based on the user-passed
-	 * system call number) being speculatively executed until the test
-	 * against NR_syscalls and branch to .66f above has
-	 * committed.
-	 */
-
-	lwzx	r10,r10,r0	/* Fetch system call handler [ptr] */
-	mtlr	r10
-	addi	r9,r1,STACK_FRAME_OVERHEAD
-	PPC440EP_ERR42
-	blrl			/* Call handler */
-	.globl	ret_from_syscall
+	mr	r9, r0
+	addi	r10, r1, STACK_FRAME_OVERHEAD
+	bl	system_call_exception
 ret_from_syscall:
-#ifdef CONFIG_DEBUG_RSEQ
-	/* Check whether the syscall is issued inside a restartable sequence */
-	stw	r3,GPR3(r1)
-	addi    r3,r1,STACK_FRAME_OVERHEAD
-	bl      rseq_syscall
-	lwz	r3,GPR3(r1)
-#endif
-	mr	r6,r3
-	/* disable interrupts so current_thread_info()->flags can't change */
-	LOAD_REG_IMMEDIATE(r10,MSR_KERNEL)	/* doesn't include MSR_EE */
-	/* Note: We don't bother telling lockdep about it */
-	SYNC
-	mtmsr	r10
-	lwz	r9,TI_FLAGS(r2)
-	li	r8,-MAX_ERRNO
-	andi.	r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK)
-	bne-	syscall_exit_work
-	cmplw	0,r3,r8
-	blt+	syscall_exit_cont
-	lwz	r11,_CCR(r1)			/* Load CR */
-	neg	r3,r3
-	oris	r11,r11,0x1000	/* Set SO bit in CR */
-	stw	r11,_CCR(r1)
-syscall_exit_cont:
-	lwz	r8,_MSR(r1)
-#ifdef CONFIG_TRACE_IRQFLAGS
-	/* If we are going to return from the syscall with interrupts
-	 * off, we trace that here. It shouldn't normally happen.
-	 */
-	andi.	r10,r8,MSR_EE
-	bne+	1f
-	stw	r3,GPR3(r1)
-	bl      trace_hardirqs_off
-	lwz	r3,GPR3(r1)
-1:
-#endif /* CONFIG_TRACE_IRQFLAGS */
-#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
-	/* If the process has its own DBCR0 value, load it up.  The internal
-	   debug mode bit tells us that dbcr0 should be loaded. */
-	lwz	r0,THREAD+THREAD_DBCR0(r2)
-	andis.	r10,r0,DBCR0_IDM@h
-	bnel-	load_dbcr0
-#endif
-#ifdef CONFIG_44x
-BEGIN_MMU_FTR_SECTION
-	lis	r4,icache_44x_need_flush@ha
-	lwz	r5,icache_44x_need_flush@l(r4)
-	cmplwi	cr0,r5,0
-	bne-	2f
-1:
-END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_47x)
-#endif /* CONFIG_44x */
-BEGIN_FTR_SECTION
-	lwarx	r7,0,r1
-END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
-	stwcx.	r0,0,r1			/* to clear the reservation */
-	ACCOUNT_CPU_USER_EXIT(r2, r5, r7)
-#ifdef CONFIG_PPC_BOOK3S_32
-	kuep_unlock r5, r7
-#endif
-	kuap_check r2, r4
-	lwz	r4,_LINK(r1)
-	lwz	r5,_CCR(r1)
-	mtlr	r4
-	mtcr	r5
-	lwz	r7,_NIP(r1)
-	lwz	r2,GPR2(r1)
-	lwz	r1,GPR1(r1)
-#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
-	mtspr	SPRN_NRI, r0
-#endif
-	mtspr	SPRN_SRR0,r7
-	mtspr	SPRN_SRR1,r8
-	SYNC
-	RFI
-#ifdef CONFIG_44x
-2:	li	r7,0
-	iccci	r0,r0
-	stw	r7,icache_44x_need_flush@l(r4)
+	addi    r4, r1, STACK_FRAME_OVERHEAD
+	bl	syscall_exit_prepare
+	lwz	r2, _CCR(r1)
+	lwz	r4, _NIP(r1)
+	lwz	r5, _MSR(r1)
+	lwz	r6, _LINK(r1)
+	mtspr	SPRN_SRR0, r4
+	mtspr	SPRN_SRR1, r5
+	mtlr	r6
+	cmpwi	r3, 0
+	bne	2f
+1:	mtcr	r2
+	REST_GPR(2, r1)
+	REST_GPR(3, r1)
+	REST_GPR(1, r1)
+	rfi
+2:	lwz	r3, _CTR(r1)
+	lwz	r4, _XER(r1)
+	REST_NVGPRS(r1)
+	mtctr	r3
+	mtspr	SPRN_XER, r4
+	REST_GPR(0, r1)
+	REST_8GPRS(4, r1)
+	REST_GPR(12, r1)
 	b	1b
-#endif  /* CONFIG_44x */
-
-66:	li	r3,-ENOSYS
-	b	ret_from_syscall
 
 	.globl	ret_from_fork
 ret_from_fork:
@@ -490,86 +365,6 @@ ret_from_kernel_thread:
 	li	r3,0
 	b	ret_from_syscall
 
-/* Traced system call support */
-syscall_dotrace:
-	SAVE_NVGPRS(r1)
-	li	r0,0xc00
-	stw	r0,_TRAP(r1)
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	do_syscall_trace_enter
-	/*
-	 * Restore argument registers possibly just changed.
-	 * We use the return value of do_syscall_trace_enter
-	 * for call number to look up in the table (r0).
-	 */
-	mr	r0,r3
-	lwz	r3,GPR3(r1)
-	lwz	r4,GPR4(r1)
-	lwz	r5,GPR5(r1)
-	lwz	r6,GPR6(r1)
-	lwz	r7,GPR7(r1)
-	lwz	r8,GPR8(r1)
-	REST_NVGPRS(r1)
-
-	cmplwi	r0,NR_syscalls
-	/* Return code is already in r3 thanks to do_syscall_trace_enter() */
-	bge-	ret_from_syscall
-	b	syscall_dotrace_cont
-
-syscall_exit_work:
-	andi.	r0,r9,_TIF_RESTOREALL
-	beq+	0f
-	REST_NVGPRS(r1)
-	b	2f
-0:	cmplw	0,r3,r8
-	blt+	1f
-	andi.	r0,r9,_TIF_NOERROR
-	bne-	1f
-	lwz	r11,_CCR(r1)			/* Load CR */
-	neg	r3,r3
-	oris	r11,r11,0x1000	/* Set SO bit in CR */
-	stw	r11,_CCR(r1)
-
-1:	stw	r6,RESULT(r1)	/* Save result */
-	stw	r3,GPR3(r1)	/* Update return value */
-2:	andi.	r0,r9,(_TIF_PERSYSCALL_MASK)
-	beq	4f
-
-	/* Clear per-syscall TIF flags if any are set.  */
-
-	li	r11,_TIF_PERSYSCALL_MASK
-	addi	r12,r2,TI_FLAGS
-3:	lwarx	r8,0,r12
-	andc	r8,r8,r11
-#ifdef CONFIG_IBM405_ERR77
-	dcbt	0,r12
-#endif
-	stwcx.	r8,0,r12
-	bne-	3b
-	
-4:	/* Anything which requires enabling interrupts? */
-	andi.	r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP)
-	beq	ret_from_except
-
-	/* Re-enable interrupts. There is no need to trace that with
-	 * lockdep as we are supposed to have IRQs on at this point
-	 */
-	ori	r10,r10,MSR_EE
-	SYNC
-	mtmsr	r10
-
-	/* Save NVGPRS if they're not saved already */
-	lwz	r4,_TRAP(r1)
-	andi.	r4,r4,1
-	beq	5f
-	SAVE_NVGPRS(r1)
-	li	r4,0xc00
-	stw	r4,_TRAP(r1)
-5:
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	do_syscall_trace_leave
-	b	ret_from_except_full
-
 	/*
 	 * System call was called from kernel. We get here with SRR1 in r9.
 	 * Mark the exception as recoverable once we have retrieved SRR0,
diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index 9abec6cd099c..97691931a306 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -174,12 +174,13 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
 	stw	r2,GPR2(r11)
 	addi	r10,r10,STACK_FRAME_REGS_MARKER@l
 	stw	r9,_MSR(r11)
-	li	r2, \trapno + 1
+	li	r2, \trapno
 	stw	r10,8(r11)
 	stw	r2,_TRAP(r11)
 	SAVE_GPR(0, r11)
 	SAVE_4GPRS(3, r11)
 	SAVE_2GPRS(7, r11)
+	SAVE_NVGPRS(r11)
 	addi	r11,r1,STACK_FRAME_OVERHEAD
 	addi	r2,r12,-THREAD
 	stw	r11,PT_REGS(r12)
diff --git a/arch/powerpc/kernel/syscall_64.c b/arch/powerpc/kernel/syscall.c
similarity index 97%
rename from arch/powerpc/kernel/syscall_64.c
rename to arch/powerpc/kernel/syscall.c
index cf06eb443a80..a07702a85f81 100644
--- a/arch/powerpc/kernel/syscall_64.c
+++ b/arch/powerpc/kernel/syscall.c
@@ -1,8 +1,9 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
 
 #include <linux/err.h>
+#include <linux/compat.h>
+
 #include <asm/asm-prototypes.h>
-#include <asm/book3s/64/kup-radix.h>
 #include <asm/cputime.h>
 #include <asm/hw_irq.h>
 #include <asm/kprobes.h>
@@ -25,16 +26,20 @@ notrace long system_call_exception(long r3, long r4, long r5,
 	unsigned long ti_flags;
 	syscall_fn f;
 
+#ifdef CONFIG_PPC64
 	if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
 		BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED);
+#endif
 
 	trace_hardirqs_off(); /* finish reconciling */
 
-	if (IS_ENABLED(CONFIG_PPC_BOOK3S))
+	if (!IS_ENABLED(CONFIG_PPC_BOOK3E))
 		BUG_ON(!(regs->msr & MSR_RI));
 	BUG_ON(!(regs->msr & MSR_PR));
 	BUG_ON(!FULL_REGS(regs));
+#ifdef CONFIG_PPC64
 	BUG_ON(regs->softe != IRQS_ENABLED);
+#endif
 
 	account_cpu_user_entry();
 
@@ -48,7 +53,7 @@ notrace long system_call_exception(long r3, long r4, long r5,
 	}
 #endif
 
-	kuap_check_amr();
+	kuap_check();
 
 	/*
 	 * This is not required for the syscall exit path, but makes the
@@ -56,7 +61,9 @@ notrace long system_call_exception(long r3, long r4, long r5,
 	 * frame, or if the unwinder was taught the first stack frame always
 	 * returns to user with IRQS_ENABLED, this store could be avoided!
 	 */
+#ifdef CONFIG_PPC64
 	regs->softe = IRQS_ENABLED;
+#endif
 
 	local_irq_enable();
 
@@ -86,7 +93,7 @@ notrace long system_call_exception(long r3, long r4, long r5,
 	/* May be faster to do array_index_nospec? */
 	barrier_nospec();
 
-	if (unlikely(ti_flags & _TIF_32BIT)) {
+	if (is_compat_task()) {
 		f = (void *)compat_sys_call_table[r0];
 
 		r3 &= 0x00000000ffffffffULL;
@@ -148,7 +155,9 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
 		ret |= _TIF_RESTOREALL;
 	}
 
+#ifdef CONFIG_PPC64
 again:
+#endif
 	local_irq_disable();
 	ti_flags = READ_ONCE(*ti_flagsp);
 	while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) {
@@ -191,6 +200,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
 
 	/* This pattern matches prep_irq_for_idle */
 	__hard_EE_RI_disable();
+#ifdef CONFIG_PPC64
 	if (unlikely(lazy_irq_pending())) {
 		__hard_RI_enable();
 		trace_hardirqs_off();
@@ -201,12 +211,13 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
 	}
 	local_paca->irq_happened = 0;
 	irq_soft_mask_set(IRQS_ENABLED);
+#endif
 
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 	local_paca->tm_scratch = regs->msr;
 #endif
 
-	kuap_check_amr();
+	kuap_check();
 
 	account_cpu_user_exit();
 
@@ -294,7 +305,7 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned
 	local_paca->tm_scratch = regs->msr;
 #endif
 
-	kuap_check_amr();
+	kuap_check();
 
 	account_cpu_user_exit();
 
@@ -372,7 +383,7 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign
 	 * We don't need to restore AMR on the way back to userspace for KUAP.
 	 * The value of AMR only matters while we're in the kernel.
 	 */
-	kuap_restore_amr(regs);
+	kuap_restore(regs);
 
 	return ret;
 }
-- 
2.25.0


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [RFC WIP PATCH] powerpc/32: system call implement entry/exit logic in C
  2020-03-31 15:22 [RFC WIP PATCH] powerpc/32: system call implement entry/exit logic in C Christophe Leroy
@ 2020-04-01 11:48 ` Christophe Leroy
  2020-04-03  7:33   ` Nicholas Piggin
  0 siblings, 1 reply; 4+ messages in thread
From: Christophe Leroy @ 2020-04-01 11:48 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
	npiggin, msuchanek
  Cc: linuxppc-dev, linux-kernel



Le 31/03/2020 à 17:22, Christophe Leroy a écrit :
> That's first try to port PPC64 syscall entry/exit logic in C to PPC32.
> I've do the minimum to get it work. I have not reworked calls
> to sys_fork() and friends for instance.
> 
> For the time being, it seems to work more or less but:
> - ping reports EINVAL on recvfrom
> - strace shows NULL instead of strings in call like open() for instance.

For the two above problems, that's because system_call_exception() 
doesn't set orig_gpr3 whereas DoSycall() does in entry_32.S . Is that 
only done on PPC32 ?

With the following line at the begining of system_call_exception(), it 
works perfectly:

	regs->orig_gpr3 = r3;

I will now focus on performance to see if we can do something about it.

Christophe

> - the performance is definitively bad
> 
> On an 8xx, null_syscall test is about 30% slower after this patch:
> - Without the patch: 284 cycles
> - With the patch: 371 cycles
> 
> @nick and others, any suggestion to fix and improve ?
> 
> Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
> ---
>   arch/powerpc/include/asm/book3s/32/kup.h      |  21 ++
>   .../powerpc/include/asm/book3s/64/kup-radix.h |  12 +-
>   arch/powerpc/include/asm/hw_irq.h             |  15 +
>   arch/powerpc/include/asm/kup.h                |   2 +
>   arch/powerpc/include/asm/nohash/32/kup-8xx.h  |  13 +
>   arch/powerpc/kernel/Makefile                  |   5 +-
>   arch/powerpc/kernel/entry_32.S                | 259 ++----------------
>   arch/powerpc/kernel/head_32.h                 |   3 +-
>   .../kernel/{syscall_64.c => syscall.c}        |  25 +-
>   9 files changed, 102 insertions(+), 253 deletions(-)
>   rename arch/powerpc/kernel/{syscall_64.c => syscall.c} (97%)
> 
> diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h
> index 3c0ba22dc360..c85bc5b56366 100644
> --- a/arch/powerpc/include/asm/book3s/32/kup.h
> +++ b/arch/powerpc/include/asm/book3s/32/kup.h
> @@ -102,6 +102,27 @@ static inline void kuap_update_sr(u32 sr, u32 addr, u32 end)
>   	isync();	/* Context sync required after mtsrin() */
>   }
>   
> +static inline void kuap_restore(struct pt_regs *regs)
> +{
> +	u32 kuap = current->thread.kuap;
> +	u32 addr = kuap & 0xf0000000;
> +	u32 end = kuap << 28;
> +
> +	if (unlikely(!kuap))
> +		return;
> +
> +	current->thread.kuap = 0;
> +	kuap_update_sr(mfsrin(addr) & ~SR_KS, addr, end);	/* Clear Ks */
> +}
> +
> +static inline void kuap_check(void)
> +{
> +	if (!IS_ENABLED(CONFIG_PPC_KUAP_DEBUG))
> +		return;
> +
> +	WARN_ON_ONCE(current->thread.kuap != 0);
> +}
> +
>   static __always_inline void allow_user_access(void __user *to, const void __user *from,
>   					      u32 size, unsigned long dir)
>   {
> diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h b/arch/powerpc/include/asm/book3s/64/kup-radix.h
> index 3bcef989a35d..1f2716a0dcd8 100644
> --- a/arch/powerpc/include/asm/book3s/64/kup-radix.h
> +++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h
> @@ -60,13 +60,13 @@
>   #include <asm/mmu.h>
>   #include <asm/ptrace.h>
>   
> -static inline void kuap_restore_amr(struct pt_regs *regs)
> +static inline void kuap_restore(struct pt_regs *regs)
>   {
>   	if (mmu_has_feature(MMU_FTR_RADIX_KUAP))
>   		mtspr(SPRN_AMR, regs->kuap);
>   }
>   
> -static inline void kuap_check_amr(void)
> +static inline void kuap_check(void)
>   {
>   	if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) && mmu_has_feature(MMU_FTR_RADIX_KUAP))
>   		WARN_ON_ONCE(mfspr(SPRN_AMR) != AMR_KUAP_BLOCKED);
> @@ -141,14 +141,6 @@ bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
>   		    (regs->kuap & (is_write ? AMR_KUAP_BLOCK_WRITE : AMR_KUAP_BLOCK_READ)),
>   		    "Bug: %s fault blocked by AMR!", is_write ? "Write" : "Read");
>   }
> -#else /* CONFIG_PPC_KUAP */
> -static inline void kuap_restore_amr(struct pt_regs *regs)
> -{
> -}
> -
> -static inline void kuap_check_amr(void)
> -{
> -}
>   #endif /* CONFIG_PPC_KUAP */
>   
>   #endif /* __ASSEMBLY__ */
> diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
> index e0e71777961f..6ccf07de6665 100644
> --- a/arch/powerpc/include/asm/hw_irq.h
> +++ b/arch/powerpc/include/asm/hw_irq.h
> @@ -321,6 +321,16 @@ static inline void arch_local_irq_disable(void)
>   		mtmsr(mfmsr() & ~MSR_EE);
>   }
>   
> +static inline void arch_local_recovery_disable(void)
> +{
> +	if (IS_ENABLED(CONFIG_BOOKE))
> +		wrtee(0);
> +	else if (IS_ENABLED(CONFIG_PPC_8xx))
> +		wrtspr(SPRN_NRI);
> +	else
> +		mtmsr(mfmsr() & ~(MSR_EE | MSR_RI));
> +}
> +
>   static inline void arch_local_irq_enable(void)
>   {
>   	if (IS_ENABLED(CONFIG_BOOKE))
> @@ -343,6 +353,11 @@ static inline bool arch_irqs_disabled(void)
>   
>   #define hard_irq_disable()		arch_local_irq_disable()
>   
> +#define __hard_irq_enable()		arch_local_irq_enable()
> +#define __hard_irq_disable()		arch_local_irq_disable()
> +#define __hard_EE_RI_disable()		arch_local_recovery_disable()
> +#define __hard_RI_enable()		arch_local_irq_disable()
> +
>   static inline bool arch_irq_disabled_regs(struct pt_regs *regs)
>   {
>   	return !(regs->msr & MSR_EE);
> diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h
> index 92bcd1a26d73..1100c13b6d9e 100644
> --- a/arch/powerpc/include/asm/kup.h
> +++ b/arch/powerpc/include/asm/kup.h
> @@ -62,6 +62,8 @@ bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
>   {
>   	return false;
>   }
> +static inline void kuap_restore(struct pt_regs *regs) { }
> +static inline void kuap_check(void) { }
>   #endif /* CONFIG_PPC_KUAP */
>   
>   static inline void allow_read_from_user(const void __user *from, unsigned long size)
> diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
> index 85ed2390fb99..1918d2e55da3 100644
> --- a/arch/powerpc/include/asm/nohash/32/kup-8xx.h
> +++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
> @@ -34,6 +34,19 @@
>   
>   #include <asm/reg.h>
>   
> +static inline void kuap_restore(struct pt_regs *regs)
> +{
> +	mtspr(SPRN_MD_AP, regs->kuap);
> +}
> +
> +static inline void kuap_check(void)
> +{
> +	if (!IS_ENABLED(CONFIG_PPC_KUAP_DEBUG))
> +		return;
> +
> +	WARN_ON_ONCE((mfspr(SPRN_MD_AP) & 0xffff0000) != (MD_APG_KUAP & 0xffff0000));
> +}
> +
>   static inline void allow_user_access(void __user *to, const void __user *from,
>   				     unsigned long size, unsigned long dir)
>   {
> diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
> index 570660efbb3d..e4be425b7718 100644
> --- a/arch/powerpc/kernel/Makefile
> +++ b/arch/powerpc/kernel/Makefile
> @@ -45,11 +45,10 @@ obj-y				:= cputable.o syscalls.o \
>   				   signal.o sysfs.o cacheinfo.o time.o \
>   				   prom.o traps.o setup-common.o \
>   				   udbg.o misc.o io.o misc_$(BITS).o \
> -				   of_platform.o prom_parse.o
> +				   of_platform.o prom_parse.o syscall.o
>   obj-y				+= ptrace/
>   obj-$(CONFIG_PPC64)		+= setup_64.o sys_ppc32.o signal_64.o \
> -				   paca.o nvram_64.o firmware.o note.o \
> -				   syscall_64.o
> +				   paca.o nvram_64.o firmware.o note.o
>   obj-$(CONFIG_VDSO32)		+= vdso32/
>   obj-$(CONFIG_PPC_WATCHDOG)	+= watchdog.o
>   obj-$(CONFIG_HAVE_HW_BREAKPOINT)	+= hw_breakpoint.o
> diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
> index a6371fb8f761..103f5158bc44 100644
> --- a/arch/powerpc/kernel/entry_32.S
> +++ b/arch/powerpc/kernel/entry_32.S
> @@ -315,162 +315,37 @@ stack_ovf:
>   	RFI
>   #endif
>   
> -#ifdef CONFIG_TRACE_IRQFLAGS
> -trace_syscall_entry_irq_off:
> -	/*
> -	 * Syscall shouldn't happen while interrupts are disabled,
> -	 * so let's do a warning here.
> -	 */
> -0:	trap
> -	EMIT_BUG_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING
> -	bl	trace_hardirqs_on
> -
> -	/* Now enable for real */
> -	LOAD_REG_IMMEDIATE(r10, MSR_KERNEL | MSR_EE)
> -	mtmsr	r10
> -
> -	REST_GPR(0, r1)
> -	REST_4GPRS(3, r1)
> -	REST_2GPRS(7, r1)
> -	b	DoSyscall
> -#endif /* CONFIG_TRACE_IRQFLAGS */
> -
>   	.globl	transfer_to_syscall
>   transfer_to_syscall:
> -#ifdef CONFIG_TRACE_IRQFLAGS
> -	andi.	r12,r9,MSR_EE
> -	beq-	trace_syscall_entry_irq_off
> -#endif /* CONFIG_TRACE_IRQFLAGS */
> -
> -/*
> - * Handle a system call.
> - */
> -	.stabs	"arch/powerpc/kernel/",N_SO,0,0,0f
> -	.stabs	"entry_32.S",N_SO,0,0,0f
> -0:
> -
> -_GLOBAL(DoSyscall)
> -	stw	r3,ORIG_GPR3(r1)
> -	li	r12,0
> -	stw	r12,RESULT(r1)
> -#ifdef CONFIG_TRACE_IRQFLAGS
> -	/* Make sure interrupts are enabled */
> -	mfmsr	r11
> -	andi.	r12,r11,MSR_EE
> -	/* We came in with interrupts disabled, we WARN and mark them enabled
> -	 * for lockdep now */
> -0:	tweqi	r12, 0
> -	EMIT_BUG_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING
> -#endif /* CONFIG_TRACE_IRQFLAGS */
> -	lwz	r11,TI_FLAGS(r2)
> -	andi.	r11,r11,_TIF_SYSCALL_DOTRACE
> -	bne-	syscall_dotrace
> -syscall_dotrace_cont:
> -	cmplwi	0,r0,NR_syscalls
> -	lis	r10,sys_call_table@h
> -	ori	r10,r10,sys_call_table@l
> -	slwi	r0,r0,2
> -	bge-	66f
> -
> -	barrier_nospec_asm
> -	/*
> -	 * Prevent the load of the handler below (based on the user-passed
> -	 * system call number) being speculatively executed until the test
> -	 * against NR_syscalls and branch to .66f above has
> -	 * committed.
> -	 */
> -
> -	lwzx	r10,r10,r0	/* Fetch system call handler [ptr] */
> -	mtlr	r10
> -	addi	r9,r1,STACK_FRAME_OVERHEAD
> -	PPC440EP_ERR42
> -	blrl			/* Call handler */
> -	.globl	ret_from_syscall
> +	mr	r9, r0
> +	addi	r10, r1, STACK_FRAME_OVERHEAD
> +	bl	system_call_exception
>   ret_from_syscall:
> -#ifdef CONFIG_DEBUG_RSEQ
> -	/* Check whether the syscall is issued inside a restartable sequence */
> -	stw	r3,GPR3(r1)
> -	addi    r3,r1,STACK_FRAME_OVERHEAD
> -	bl      rseq_syscall
> -	lwz	r3,GPR3(r1)
> -#endif
> -	mr	r6,r3
> -	/* disable interrupts so current_thread_info()->flags can't change */
> -	LOAD_REG_IMMEDIATE(r10,MSR_KERNEL)	/* doesn't include MSR_EE */
> -	/* Note: We don't bother telling lockdep about it */
> -	SYNC
> -	mtmsr	r10
> -	lwz	r9,TI_FLAGS(r2)
> -	li	r8,-MAX_ERRNO
> -	andi.	r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK)
> -	bne-	syscall_exit_work
> -	cmplw	0,r3,r8
> -	blt+	syscall_exit_cont
> -	lwz	r11,_CCR(r1)			/* Load CR */
> -	neg	r3,r3
> -	oris	r11,r11,0x1000	/* Set SO bit in CR */
> -	stw	r11,_CCR(r1)
> -syscall_exit_cont:
> -	lwz	r8,_MSR(r1)
> -#ifdef CONFIG_TRACE_IRQFLAGS
> -	/* If we are going to return from the syscall with interrupts
> -	 * off, we trace that here. It shouldn't normally happen.
> -	 */
> -	andi.	r10,r8,MSR_EE
> -	bne+	1f
> -	stw	r3,GPR3(r1)
> -	bl      trace_hardirqs_off
> -	lwz	r3,GPR3(r1)
> -1:
> -#endif /* CONFIG_TRACE_IRQFLAGS */
> -#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
> -	/* If the process has its own DBCR0 value, load it up.  The internal
> -	   debug mode bit tells us that dbcr0 should be loaded. */
> -	lwz	r0,THREAD+THREAD_DBCR0(r2)
> -	andis.	r10,r0,DBCR0_IDM@h
> -	bnel-	load_dbcr0
> -#endif
> -#ifdef CONFIG_44x
> -BEGIN_MMU_FTR_SECTION
> -	lis	r4,icache_44x_need_flush@ha
> -	lwz	r5,icache_44x_need_flush@l(r4)
> -	cmplwi	cr0,r5,0
> -	bne-	2f
> -1:
> -END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_47x)
> -#endif /* CONFIG_44x */
> -BEGIN_FTR_SECTION
> -	lwarx	r7,0,r1
> -END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
> -	stwcx.	r0,0,r1			/* to clear the reservation */
> -	ACCOUNT_CPU_USER_EXIT(r2, r5, r7)
> -#ifdef CONFIG_PPC_BOOK3S_32
> -	kuep_unlock r5, r7
> -#endif
> -	kuap_check r2, r4
> -	lwz	r4,_LINK(r1)
> -	lwz	r5,_CCR(r1)
> -	mtlr	r4
> -	mtcr	r5
> -	lwz	r7,_NIP(r1)
> -	lwz	r2,GPR2(r1)
> -	lwz	r1,GPR1(r1)
> -#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
> -	mtspr	SPRN_NRI, r0
> -#endif
> -	mtspr	SPRN_SRR0,r7
> -	mtspr	SPRN_SRR1,r8
> -	SYNC
> -	RFI
> -#ifdef CONFIG_44x
> -2:	li	r7,0
> -	iccci	r0,r0
> -	stw	r7,icache_44x_need_flush@l(r4)
> +	addi    r4, r1, STACK_FRAME_OVERHEAD
> +	bl	syscall_exit_prepare
> +	lwz	r2, _CCR(r1)
> +	lwz	r4, _NIP(r1)
> +	lwz	r5, _MSR(r1)
> +	lwz	r6, _LINK(r1)
> +	mtspr	SPRN_SRR0, r4
> +	mtspr	SPRN_SRR1, r5
> +	mtlr	r6
> +	cmpwi	r3, 0
> +	bne	2f
> +1:	mtcr	r2
> +	REST_GPR(2, r1)
> +	REST_GPR(3, r1)
> +	REST_GPR(1, r1)
> +	rfi
> +2:	lwz	r3, _CTR(r1)
> +	lwz	r4, _XER(r1)
> +	REST_NVGPRS(r1)
> +	mtctr	r3
> +	mtspr	SPRN_XER, r4
> +	REST_GPR(0, r1)
> +	REST_8GPRS(4, r1)
> +	REST_GPR(12, r1)
>   	b	1b
> -#endif  /* CONFIG_44x */
> -
> -66:	li	r3,-ENOSYS
> -	b	ret_from_syscall
>   
>   	.globl	ret_from_fork
>   ret_from_fork:
> @@ -490,86 +365,6 @@ ret_from_kernel_thread:
>   	li	r3,0
>   	b	ret_from_syscall
>   
> -/* Traced system call support */
> -syscall_dotrace:
> -	SAVE_NVGPRS(r1)
> -	li	r0,0xc00
> -	stw	r0,_TRAP(r1)
> -	addi	r3,r1,STACK_FRAME_OVERHEAD
> -	bl	do_syscall_trace_enter
> -	/*
> -	 * Restore argument registers possibly just changed.
> -	 * We use the return value of do_syscall_trace_enter
> -	 * for call number to look up in the table (r0).
> -	 */
> -	mr	r0,r3
> -	lwz	r3,GPR3(r1)
> -	lwz	r4,GPR4(r1)
> -	lwz	r5,GPR5(r1)
> -	lwz	r6,GPR6(r1)
> -	lwz	r7,GPR7(r1)
> -	lwz	r8,GPR8(r1)
> -	REST_NVGPRS(r1)
> -
> -	cmplwi	r0,NR_syscalls
> -	/* Return code is already in r3 thanks to do_syscall_trace_enter() */
> -	bge-	ret_from_syscall
> -	b	syscall_dotrace_cont
> -
> -syscall_exit_work:
> -	andi.	r0,r9,_TIF_RESTOREALL
> -	beq+	0f
> -	REST_NVGPRS(r1)
> -	b	2f
> -0:	cmplw	0,r3,r8
> -	blt+	1f
> -	andi.	r0,r9,_TIF_NOERROR
> -	bne-	1f
> -	lwz	r11,_CCR(r1)			/* Load CR */
> -	neg	r3,r3
> -	oris	r11,r11,0x1000	/* Set SO bit in CR */
> -	stw	r11,_CCR(r1)
> -
> -1:	stw	r6,RESULT(r1)	/* Save result */
> -	stw	r3,GPR3(r1)	/* Update return value */
> -2:	andi.	r0,r9,(_TIF_PERSYSCALL_MASK)
> -	beq	4f
> -
> -	/* Clear per-syscall TIF flags if any are set.  */
> -
> -	li	r11,_TIF_PERSYSCALL_MASK
> -	addi	r12,r2,TI_FLAGS
> -3:	lwarx	r8,0,r12
> -	andc	r8,r8,r11
> -#ifdef CONFIG_IBM405_ERR77
> -	dcbt	0,r12
> -#endif
> -	stwcx.	r8,0,r12
> -	bne-	3b
> -	
> -4:	/* Anything which requires enabling interrupts? */
> -	andi.	r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP)
> -	beq	ret_from_except
> -
> -	/* Re-enable interrupts. There is no need to trace that with
> -	 * lockdep as we are supposed to have IRQs on at this point
> -	 */
> -	ori	r10,r10,MSR_EE
> -	SYNC
> -	mtmsr	r10
> -
> -	/* Save NVGPRS if they're not saved already */
> -	lwz	r4,_TRAP(r1)
> -	andi.	r4,r4,1
> -	beq	5f
> -	SAVE_NVGPRS(r1)
> -	li	r4,0xc00
> -	stw	r4,_TRAP(r1)
> -5:
> -	addi	r3,r1,STACK_FRAME_OVERHEAD
> -	bl	do_syscall_trace_leave
> -	b	ret_from_except_full
> -
>   	/*
>   	 * System call was called from kernel. We get here with SRR1 in r9.
>   	 * Mark the exception as recoverable once we have retrieved SRR0,
> diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
> index 9abec6cd099c..97691931a306 100644
> --- a/arch/powerpc/kernel/head_32.h
> +++ b/arch/powerpc/kernel/head_32.h
> @@ -174,12 +174,13 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
>   	stw	r2,GPR2(r11)
>   	addi	r10,r10,STACK_FRAME_REGS_MARKER@l
>   	stw	r9,_MSR(r11)
> -	li	r2, \trapno + 1
> +	li	r2, \trapno
>   	stw	r10,8(r11)
>   	stw	r2,_TRAP(r11)
>   	SAVE_GPR(0, r11)
>   	SAVE_4GPRS(3, r11)
>   	SAVE_2GPRS(7, r11)
> +	SAVE_NVGPRS(r11)
>   	addi	r11,r1,STACK_FRAME_OVERHEAD
>   	addi	r2,r12,-THREAD
>   	stw	r11,PT_REGS(r12)
> diff --git a/arch/powerpc/kernel/syscall_64.c b/arch/powerpc/kernel/syscall.c
> similarity index 97%
> rename from arch/powerpc/kernel/syscall_64.c
> rename to arch/powerpc/kernel/syscall.c
> index cf06eb443a80..a07702a85f81 100644
> --- a/arch/powerpc/kernel/syscall_64.c
> +++ b/arch/powerpc/kernel/syscall.c
> @@ -1,8 +1,9 @@
>   // SPDX-License-Identifier: GPL-2.0-or-later
>   
>   #include <linux/err.h>
> +#include <linux/compat.h>
> +
>   #include <asm/asm-prototypes.h>
> -#include <asm/book3s/64/kup-radix.h>
>   #include <asm/cputime.h>
>   #include <asm/hw_irq.h>
>   #include <asm/kprobes.h>
> @@ -25,16 +26,20 @@ notrace long system_call_exception(long r3, long r4, long r5,
>   	unsigned long ti_flags;
>   	syscall_fn f;
>   
> +#ifdef CONFIG_PPC64
>   	if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
>   		BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED);
> +#endif
>   
>   	trace_hardirqs_off(); /* finish reconciling */
>   
> -	if (IS_ENABLED(CONFIG_PPC_BOOK3S))
> +	if (!IS_ENABLED(CONFIG_PPC_BOOK3E))
>   		BUG_ON(!(regs->msr & MSR_RI));
>   	BUG_ON(!(regs->msr & MSR_PR));
>   	BUG_ON(!FULL_REGS(regs));
> +#ifdef CONFIG_PPC64
>   	BUG_ON(regs->softe != IRQS_ENABLED);
> +#endif
>   
>   	account_cpu_user_entry();
>   
> @@ -48,7 +53,7 @@ notrace long system_call_exception(long r3, long r4, long r5,
>   	}
>   #endif
>   
> -	kuap_check_amr();
> +	kuap_check();
>   
>   	/*
>   	 * This is not required for the syscall exit path, but makes the
> @@ -56,7 +61,9 @@ notrace long system_call_exception(long r3, long r4, long r5,
>   	 * frame, or if the unwinder was taught the first stack frame always
>   	 * returns to user with IRQS_ENABLED, this store could be avoided!
>   	 */
> +#ifdef CONFIG_PPC64
>   	regs->softe = IRQS_ENABLED;
> +#endif
>   
>   	local_irq_enable();
>   
> @@ -86,7 +93,7 @@ notrace long system_call_exception(long r3, long r4, long r5,
>   	/* May be faster to do array_index_nospec? */
>   	barrier_nospec();
>   
> -	if (unlikely(ti_flags & _TIF_32BIT)) {
> +	if (is_compat_task()) {
>   		f = (void *)compat_sys_call_table[r0];
>   
>   		r3 &= 0x00000000ffffffffULL;
> @@ -148,7 +155,9 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
>   		ret |= _TIF_RESTOREALL;
>   	}
>   
> +#ifdef CONFIG_PPC64
>   again:
> +#endif
>   	local_irq_disable();
>   	ti_flags = READ_ONCE(*ti_flagsp);
>   	while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) {
> @@ -191,6 +200,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
>   
>   	/* This pattern matches prep_irq_for_idle */
>   	__hard_EE_RI_disable();
> +#ifdef CONFIG_PPC64
>   	if (unlikely(lazy_irq_pending())) {
>   		__hard_RI_enable();
>   		trace_hardirqs_off();
> @@ -201,12 +211,13 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
>   	}
>   	local_paca->irq_happened = 0;
>   	irq_soft_mask_set(IRQS_ENABLED);
> +#endif
>   
>   #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
>   	local_paca->tm_scratch = regs->msr;
>   #endif
>   
> -	kuap_check_amr();
> +	kuap_check();
>   
>   	account_cpu_user_exit();
>   
> @@ -294,7 +305,7 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned
>   	local_paca->tm_scratch = regs->msr;
>   #endif
>   
> -	kuap_check_amr();
> +	kuap_check();
>   
>   	account_cpu_user_exit();
>   
> @@ -372,7 +383,7 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign
>   	 * We don't need to restore AMR on the way back to userspace for KUAP.
>   	 * The value of AMR only matters while we're in the kernel.
>   	 */
> -	kuap_restore_amr(regs);
> +	kuap_restore(regs);
>   
>   	return ret;
>   }
> 

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [RFC WIP PATCH] powerpc/32: system call implement entry/exit logic in C
  2020-04-01 11:48 ` Christophe Leroy
@ 2020-04-03  7:33   ` Nicholas Piggin
  2020-04-05 18:53     ` Christophe Leroy
  0 siblings, 1 reply; 4+ messages in thread
From: Nicholas Piggin @ 2020-04-03  7:33 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Christophe Leroy, Michael Ellerman,
	msuchanek, Paul Mackerras
  Cc: linux-kernel, linuxppc-dev

Christophe Leroy's on April 1, 2020 9:48 pm:
> 
> 
> Le 31/03/2020 à 17:22, Christophe Leroy a écrit :
>> That's first try to port PPC64 syscall entry/exit logic in C to PPC32.
>> I've do the minimum to get it work. I have not reworked calls
>> to sys_fork() and friends for instance.
>> 
>> For the time being, it seems to work more or less but:
>> - ping reports EINVAL on recvfrom
>> - strace shows NULL instead of strings in call like open() for instance.
> 
> For the two above problems, that's because system_call_exception() 
> doesn't set orig_gpr3 whereas DoSycall() does in entry_32.S . Is that 
> only done on PPC32 ?
> 
> With the following line at the begining of system_call_exception(), it 
> works perfectly:
> 
> 	regs->orig_gpr3 = r3;

Oh great, nice work. We should be able to make some simple helpers or
move some things a bit to reduce the amount of ifdefs in the C code.
It doesn't look too bad though.

> I will now focus on performance to see if we can do something about it.

What's the performance difference between current asm code just with
always saving NVGPRS vs C?

Thanks,
Nick


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [RFC WIP PATCH] powerpc/32: system call implement entry/exit logic in C
  2020-04-03  7:33   ` Nicholas Piggin
@ 2020-04-05 18:53     ` Christophe Leroy
  0 siblings, 0 replies; 4+ messages in thread
From: Christophe Leroy @ 2020-04-05 18:53 UTC (permalink / raw)
  To: Nicholas Piggin, Benjamin Herrenschmidt, Michael Ellerman,
	msuchanek, Paul Mackerras
  Cc: linux-kernel, linuxppc-dev



Le 03/04/2020 à 09:33, Nicholas Piggin a écrit :
> Christophe Leroy's on April 1, 2020 9:48 pm:
>>
>>
>> Le 31/03/2020 à 17:22, Christophe Leroy a écrit :
>>> That's first try to port PPC64 syscall entry/exit logic in C to PPC32.
>>> I've do the minimum to get it work. I have not reworked calls
>>> to sys_fork() and friends for instance.
>>>
>>> For the time being, it seems to work more or less but:
>>> - ping reports EINVAL on recvfrom
>>> - strace shows NULL instead of strings in call like open() for instance.
>>
>> For the two above problems, that's because system_call_exception()
>> doesn't set orig_gpr3 whereas DoSycall() does in entry_32.S . Is that
>> only done on PPC32 ?
>>
>> With the following line at the begining of system_call_exception(), it
>> works perfectly:
>>
>> 	regs->orig_gpr3 = r3;
> 
> Oh great, nice work. We should be able to make some simple helpers or
> move some things a bit to reduce the amount of ifdefs in the C code.
> It doesn't look too bad though.
> 
>> I will now focus on performance to see if we can do something about it.
> 
> What's the performance difference between current asm code just with
> always saving NVGPRS vs C?

Done new measurement and sent a series. lower values now because the 
time accounting was done twice as it was still in the ASM part.

Before the series, 311 cycles for a null_syscall
If adding SAVE_NVGPRS to the entry macro, 335 cycles

First patch: 353 cycles ie +13,5%

After a few changes, including conditional saving of non volatile 
registers, I get 325 cycles that is only +4,5%. I thing that's acceptable.

Do you see a problem with still saving non volatile registers only when 
required ?

Christophe

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2020-04-05 18:53 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-03-31 15:22 [RFC WIP PATCH] powerpc/32: system call implement entry/exit logic in C Christophe Leroy
2020-04-01 11:48 ` Christophe Leroy
2020-04-03  7:33   ` Nicholas Piggin
2020-04-05 18:53     ` Christophe Leroy

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).