All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/6] powerpc: Add ZERO_GPRS macros for register clears
@ 2022-06-01  5:48 Rohan McLure
  2022-06-01  5:48 ` [PATCH 2/6] powerpc: Provide syscall wrapper Rohan McLure
                   ` (6 more replies)
  0 siblings, 7 replies; 31+ messages in thread
From: Rohan McLure @ 2022-06-01  5:48 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Rohan McLure, npiggin

Macros for restoring saving registers to and from the stack exist.
Provide a macro for simply zeroing a range of gprs, or an individual
gpr.

Signed-off-by: Rohan McLure <rmclure@linux.ibm.com>
---
 arch/powerpc/include/asm/ppc_asm.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index 4dea2d963738..3fb37a9767f7 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -33,6 +33,19 @@
 	.endr
 .endm
 
+/*
+ * Simplification of OP_REGS, for an arbitrary right hand operand.
+ *
+ *   op  reg, rhs
+ */
+.macro BINOP_REGS op, rhs, start, end
+	.Lreg=\start
+	.rept (\end - \start + 1)
+	\op .Lreg, \rhs
+	.Lreg=.Lreg+1
+	.endr
+.endm
+
 /*
  * Macros for storing registers into and loading registers from
  * exception frames.
@@ -49,6 +62,10 @@
 #define REST_NVGPRS(base)		REST_GPRS(13, 31, base)
 #endif
 
+#define ZERO_GPRS(start, end)		BINOP_REGS li, 0, start, end
+#define ZERO_NVGPRS()			ZERO_GPRS(14, 31)
+#define ZERO_GPR(n)			ZERO_GPRS(n, n)
+
 #define SAVE_GPR(n, base)		SAVE_GPRS(n, n, base)
 #define REST_GPR(n, base)		REST_GPRS(n, n, base)
 
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH 2/6] powerpc: Provide syscall wrapper
  2022-06-01  5:48 [PATCH 1/6] powerpc: Add ZERO_GPRS macros for register clears Rohan McLure
@ 2022-06-01  5:48 ` Rohan McLure
  2022-06-01  8:29   ` Christophe Leroy
                     ` (5 more replies)
  2022-06-01  5:48 ` [PATCH 3/6] powerpc: Make syscalls save and restore gprs Rohan McLure
                   ` (5 subsequent siblings)
  6 siblings, 6 replies; 31+ messages in thread
From: Rohan McLure @ 2022-06-01  5:48 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Rohan McLure, Andrew Donnellan, npiggin

Syscall wrapper implemented as per s390, x86, arm64, providing the
option for gprs to be cleared on entry to the kernel, reducing caller
influence influence on speculation within syscall routine. The wrapper
is a macro that emits syscall handler implementations with parameters
passed by stack pointer.

For platforms supporting this syscall wrapper, emit symbols with usual
in-register parameters (`sys...`) to support calls to syscall handlers
from within the kernel.

Syscalls are wrapped on all platforms except Cell processor. SPUs require
access syscall prototypes which are omitted with ARCH_HAS_SYSCALL_WRAPPER
enabled.

Co-developed-by: Andrew Donnellan <ajd@linux.ibm.com>
Signed-off-by: Andrew Donnellan <ajd@linux.ibm.com>
Signed-off-by: Rohan McLure <rmclure@linux.ibm.com>
---
 arch/powerpc/Kconfig                       |  1 +
 arch/powerpc/include/asm/interrupt.h       |  3 +-
 arch/powerpc/include/asm/syscall_wrapper.h | 92 ++++++++++++++++++++++
 arch/powerpc/include/asm/syscalls.h        | 83 +++++++++++++------
 arch/powerpc/kernel/entry_32.S             |  6 +-
 arch/powerpc/kernel/interrupt.c            | 35 ++++----
 arch/powerpc/kernel/interrupt_64.S         | 30 +++----
 arch/powerpc/kernel/sys_ppc32.c            | 50 +++++++-----
 arch/powerpc/kernel/syscalls.c             | 19 +++--
 arch/powerpc/kernel/systbl.S               | 21 +++++
 arch/powerpc/kernel/vdso.c                 |  2 +
 11 files changed, 255 insertions(+), 87 deletions(-)
 create mode 100644 arch/powerpc/include/asm/syscall_wrapper.h

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 174edabb74fa..e58287a70061 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -137,6 +137,7 @@ config PPC
 	select ARCH_HAS_STRICT_KERNEL_RWX	if (PPC_BOOK3S || PPC_8xx || 40x) && !HIBERNATION
 	select ARCH_HAS_STRICT_KERNEL_RWX	if FSL_BOOKE && !HIBERNATION && !RANDOMIZE_BASE
 	select ARCH_HAS_STRICT_MODULE_RWX	if ARCH_HAS_STRICT_KERNEL_RWX
+	select ARCH_HAS_SYSCALL_WRAPPER		if !SPU_BASE
 	select ARCH_HAS_TICK_BROADCAST		if GENERIC_CLOCKEVENTS_BROADCAST
 	select ARCH_HAS_UACCESS_FLUSHCACHE
 	select ARCH_HAS_UBSAN_SANITIZE_ALL
diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
index f964ef5c57d8..8e8949e4db7a 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -636,8 +636,7 @@ static inline void interrupt_cond_local_irq_enable(struct pt_regs *regs)
 		local_irq_enable();
 }
 
-long system_call_exception(long r3, long r4, long r5, long r6, long r7, long r8,
-			   unsigned long r0, struct pt_regs *regs);
+long system_call_exception(unsigned long r0, struct pt_regs *regs);
 notrace unsigned long syscall_exit_prepare(unsigned long r3, struct pt_regs *regs, long scv);
 notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs);
 notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs);
diff --git a/arch/powerpc/include/asm/syscall_wrapper.h b/arch/powerpc/include/asm/syscall_wrapper.h
new file mode 100644
index 000000000000..23da22b081e4
--- /dev/null
+++ b/arch/powerpc/include/asm/syscall_wrapper.h
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * syscall_wrapper.h - powerpc specific wrappers to syscall definitions
+ *
+ * Based on arch/{x86,arm64}/include/asm/syscall_wrapper.h
+ */
+
+#ifndef __ASM_SYSCALL_WRAPPER_H
+#define __ASM_SYSCALL_WRAPPER_H
+
+struct pt_regs;
+
+#define SC_POWERPC_REGS_TO_ARGS(x, ...)				\
+	__MAP(x,__SC_ARGS					\
+	      ,,regs->gpr[3],,regs->gpr[4],,regs->gpr[5]	\
+	      ,,regs->gpr[6],,regs->gpr[7],,regs->gpr[8])
+
+#ifdef CONFIG_COMPAT
+
+#define COMPAT_SYSCALL_DEFINEx(x, name, ...)						\
+	asmlinkage long __powerpc_compat_sys##name(const struct pt_regs *regs);		\
+	ALLOW_ERROR_INJECTION(__powerpc_compat_sys##name, ERRNO);			\
+	static long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__));		\
+	static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));	\
+	asmlinkage long __powerpc_compat_sys##name(const struct pt_regs *regs)		\
+	{										\
+		return __se_compat_sys##name(SC_POWERPC_REGS_TO_ARGS(x,__VA_ARGS__));	\
+	}										\
+	static long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__))		\
+	{										\
+		return __do_compat_sys##name(__MAP(x,__SC_DELOUSE,__VA_ARGS__));	\
+	}										\
+	static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))
+
+#define COMPAT_SYSCALL_DEFINE0(sname)							\
+	asmlinkage long __powerpc_compat_sys_##sname(const struct pt_regs *__unused);	\
+	ALLOW_ERROR_INJECTION(__powerpc_compat_sys_##sname, ERRNO);			\
+	asmlinkage long __powerpc_compat_sys_##sname(const struct pt_regs *__unused)
+
+#define COND_SYSCALL_COMPAT(name)							\
+	asmlinkage long __weak __powerpc_compat_sys_##name(const struct pt_regs *regs)	\
+	{										\
+		return sys_ni_syscall();						\
+	}
+#define COMPAT_SYS_NI(name) \
+	SYSCALL_ALIAS(__powerpc_compat_sys_##name, sys_ni_posix_timers);
+
+#endif /* CONFIG_COMPAT */
+
+#define __SYSCALL_DEFINEx(x, name, ...)						\
+	asmlinkage long __powerpc_sys##name(const struct pt_regs *regs);	\
+	ALLOW_ERROR_INJECTION(__powerpc_sys##name, ERRNO);			\
+	long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));				\
+	static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__));		\
+	static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));	\
+	asmlinkage long __powerpc_sys##name(const struct pt_regs *regs)		\
+	{									\
+		return __se_sys##name(SC_POWERPC_REGS_TO_ARGS(x,__VA_ARGS__));	\
+	}									\
+	long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))				\
+	{									\
+		return __do_sys##name(__MAP(x,__SC_CAST,__VA_ARGS__));		\
+	}									\
+	static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__))		\
+	{									\
+		long ret = __do_sys##name(__MAP(x,__SC_CAST,__VA_ARGS__));	\
+		__MAP(x,__SC_TEST,__VA_ARGS__);					\
+		__PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__));		\
+		return ret;							\
+	}									\
+	static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))
+
+#define SYSCALL_DEFINE0(sname)							\
+	SYSCALL_METADATA(_##sname, 0);						\
+	long sys_##name(void);							\
+	asmlinkage long __powerpc_sys_##sname(const struct pt_regs *__unused);	\
+	ALLOW_ERROR_INJECTION(__powerpc_sys_##sname, ERRNO);			\
+	long sys_##sname(void)							\
+	{									\
+		return __powerpc_sys_##sname(NULL);				\
+	}									\
+	asmlinkage long __powerpc_sys_##sname(const struct pt_regs *__unused)
+
+#define COND_SYSCALL(name)							\
+	asmlinkage long __weak __powerpc_sys_##name(const struct pt_regs *regs)	\
+	{									\
+		return sys_ni_syscall();					\
+	}
+
+#define SYS_NI(name) SYSCALL_ALIAS(__powerpc_sys_##name, sys_ni_posix_timers);
+
+#endif /* __ASM_SYSCALL_WRAPPER_H */
diff --git a/arch/powerpc/include/asm/syscalls.h b/arch/powerpc/include/asm/syscalls.h
index a2b13e55254f..75d8e1822caf 100644
--- a/arch/powerpc/include/asm/syscalls.h
+++ b/arch/powerpc/include/asm/syscalls.h
@@ -8,14 +8,47 @@
 #include <linux/types.h>
 #include <linux/compat.h>
 
+/*
+ * For PowerPC specific syscall implementations, wrapper takes exact name and
+ * return type for a given function.
+ */
+
+#ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER
+#define PPC_SYSCALL_DEFINE(x, type, name, ...)					\
+	asmlinkage type __powerpc_##name(const struct pt_regs *regs);		\
+	ALLOW_ERROR_INJECTION(__powerpc_##name, ERRNO);				\
+	type sys_##name(__MAP(x,__SC_DECL,__VA_ARGS__));			\
+	static type __se_##name(__MAP(x,__SC_LONG,__VA_ARGS__));		\
+	static inline type __do_##name(__MAP(x,__SC_DECL,__VA_ARGS__));		\
+	asmlinkage type __powerpc_##name(const struct pt_regs *regs)		\
+	{									\
+		return __se_##name(SC_POWERPC_REGS_TO_ARGS(x,__VA_ARGS__));	\
+	}									\
+	type sys_##name(__MAP(x,__SC_DECL,__VA_ARGS__))				\
+	{									\
+		return __do_##name(__MAP(x,__SC_CAST,__VA_ARGS__));		\
+	}									\
+	static type __se_##name(__MAP(x,__SC_LONG,__VA_ARGS__))			\
+	{									\
+		type ret = __do_##name(__MAP(x,__SC_CAST,__VA_ARGS__));		\
+		__MAP(x,__SC_TEST,__VA_ARGS__);					\
+		__PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__));		\
+		return ret;							\
+	}									\
+	static inline type __do_##name(__MAP(x,__SC_DECL,__VA_ARGS__))
+#else
+#define PPC_SYSCALL_DEFINE(x, type, name, ...)					\
+	type name(__MAP(x,__SC_DECL,__VA_ARGS__))
+#endif
+
 struct rtas_args;
 
 asmlinkage long sys_mmap(unsigned long addr, size_t len,
-		unsigned long prot, unsigned long flags,
-		unsigned long fd, off_t offset);
+			 unsigned long prot, unsigned long flags,
+			 unsigned long fd, off_t offset);
 asmlinkage long sys_mmap2(unsigned long addr, size_t len,
-		unsigned long prot, unsigned long flags,
-		unsigned long fd, unsigned long pgoff);
+			  unsigned long prot, unsigned long flags,
+			  unsigned long fd, unsigned long pgoff);
 asmlinkage long ppc64_personality(unsigned long personality);
 asmlinkage long sys_rtas(struct rtas_args __user *uargs);
 int ppc_select(int n, fd_set __user *inp, fd_set __user *outp,
@@ -24,32 +57,38 @@ long ppc_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low,
 		      u32 len_high, u32 len_low);
 
 #ifdef CONFIG_COMPAT
-unsigned long compat_sys_mmap2(unsigned long addr, size_t len,
-			       unsigned long prot, unsigned long flags,
-			       unsigned long fd, unsigned long pgoff);
+asmlinkage unsigned long compat_sys_mmap2(unsigned long addr, size_t len,
+					  unsigned long prot, unsigned long flags,
+					  unsigned long fd, unsigned long pgoff);
 
-compat_ssize_t compat_sys_pread64(unsigned int fd, char __user *ubuf, compat_size_t count,
-				  u32 reg6, u32 pos1, u32 pos2);
+asmlinkage compat_ssize_t compat_sys_pread64(unsigned int fd, char __user *ubuf,
+					     compat_size_t count,
+					     u32 reg6, u32 pos1, u32 pos2);
 
-compat_ssize_t compat_sys_pwrite64(unsigned int fd, const char __user *ubuf, compat_size_t count,
-				   u32 reg6, u32 pos1, u32 pos2);
+asmlinkage compat_ssize_t compat_sys_pwrite64(unsigned int fd, const char __user *ubuf,
+					      compat_size_t count,
+					      u32 reg6, u32 pos1, u32 pos2);
 
-compat_ssize_t compat_sys_readahead(int fd, u32 r4, u32 offset1, u32 offset2, u32 count);
+asmlinkage compat_ssize_t compat_sys_readahead(int fd, u32 r4, u32 offset1,
+					       u32 offset2, u32 count);
 
-int compat_sys_truncate64(const char __user *path, u32 reg4,
-			  unsigned long len1, unsigned long len2);
+asmlinkage int compat_sys_truncate64(const char __user *path, u32 reg4,
+				     unsigned long len1, unsigned long len2);
 
-long compat_sys_fallocate(int fd, int mode, u32 offset1, u32 offset2, u32 len1, u32 len2);
+asmlinkage long compat_sys_fallocate(int fd, int mode, u32 offset1,
+				     u32 offset2, u32 len1, u32 len2);
 
-int compat_sys_ftruncate64(unsigned int fd, u32 reg4, unsigned long len1,
-			   unsigned long len2);
+asmlinkage int compat_sys_ftruncate64(unsigned int fd, u32 reg4,
+				      unsigned long len1, unsigned long len2);
 
-long ppc32_fadvise64(int fd, u32 unused, u32 offset1, u32 offset2,
-		     size_t len, int advice);
+asmlinkage long ppc32_fadvise64(int fd, u32 unused, u32 offset1, u32 offset2, 
+				size_t len, int advice);
 
-long compat_sys_sync_file_range2(int fd, unsigned int flags,
-				 unsigned int offset1, unsigned int offset2,
-				 unsigned int nbytes1, unsigned int nbytes2);
+asmlinkage long compat_sys_sync_file_range2(int fd, unsigned int flags,
+					    unsigned int offset1,
+					    unsigned int offset2,
+					    unsigned int nbytes1,
+					    unsigned int nbytes2);
 #endif
 
 #endif /* __KERNEL__ */
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 7748c278d13c..2343c577b971 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -121,9 +121,9 @@ transfer_to_syscall:
 	SAVE_NVGPRS(r1)
 	kuep_lock
 
-	/* Calling convention has r9 = orig r0, r10 = regs */
-	addi	r10,r1,STACK_FRAME_OVERHEAD
-	mr	r9,r0
+	/* Calling convention has r3 = orig r0, r4 = regs */
+	addi	r4,r1,STACK_FRAME_OVERHEAD
+	mr	r3,r0
 	bl	system_call_exception
 
 ret_from_syscall:
diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
index 784ea3289c84..a38329ede1a1 100644
--- a/arch/powerpc/kernel/interrupt.c
+++ b/arch/powerpc/kernel/interrupt.c
@@ -24,7 +24,11 @@
 unsigned long global_dbcr0[NR_CPUS];
 #endif
 
+#ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER
+typedef long (*syscall_fn)(struct pt_regs *);
+#else
 typedef long (*syscall_fn)(long, long, long, long, long, long);
+#endif
 
 #ifdef CONFIG_PPC_BOOK3S_64
 DEFINE_STATIC_KEY_FALSE(interrupt_exit_not_reentrant);
@@ -74,15 +78,13 @@ static notrace __always_inline bool prep_irq_for_enabled_exit(bool restartable)
 }
 
 /* Has to run notrace because it is entered not completely "reconciled" */
-notrace long system_call_exception(long r3, long r4, long r5,
-				   long r6, long r7, long r8,
-				   unsigned long r0, struct pt_regs *regs)
+notrace long system_call_exception(unsigned long r0, struct pt_regs *regs)
 {
 	syscall_fn f;
 
 	kuap_lock();
 
-	regs->orig_gpr3 = r3;
+	regs->orig_gpr3 = regs->gpr[3];
 
 	if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
 		BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED);
@@ -196,12 +198,6 @@ notrace long system_call_exception(long r3, long r4, long r5,
 		r0 = do_syscall_trace_enter(regs);
 		if (unlikely(r0 >= NR_syscalls))
 			return regs->gpr[3];
-		r3 = regs->gpr[3];
-		r4 = regs->gpr[4];
-		r5 = regs->gpr[5];
-		r6 = regs->gpr[6];
-		r7 = regs->gpr[7];
-		r8 = regs->gpr[8];
 
 	} else if (unlikely(r0 >= NR_syscalls)) {
 		if (unlikely(trap_is_unsupported_scv(regs))) {
@@ -218,18 +214,23 @@ notrace long system_call_exception(long r3, long r4, long r5,
 	if (unlikely(is_compat_task())) {
 		f = (void *)compat_sys_call_table[r0];
 
-		r3 &= 0x00000000ffffffffULL;
-		r4 &= 0x00000000ffffffffULL;
-		r5 &= 0x00000000ffffffffULL;
-		r6 &= 0x00000000ffffffffULL;
-		r7 &= 0x00000000ffffffffULL;
-		r8 &= 0x00000000ffffffffULL;
+		regs->gpr[3] &= 0x00000000ffffffffULL;
+		regs->gpr[4] &= 0x00000000ffffffffULL;
+		regs->gpr[5] &= 0x00000000ffffffffULL;
+		regs->gpr[6] &= 0x00000000ffffffffULL;
+		regs->gpr[7] &= 0x00000000ffffffffULL;
+		regs->gpr[8] &= 0x00000000ffffffffULL;
 
 	} else {
 		f = (void *)sys_call_table[r0];
 	}
 
-	return f(r3, r4, r5, r6, r7, r8);
+	#ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER
+	return f(regs);
+	#else
+	return f(regs->gpr[3], regs->gpr[4], regs->gpr[5],
+		 regs->gpr[6], regs->gpr[7], regs->gpr[8]);
+	#endif
 }
 
 static notrace void booke_load_dbcr0(void)
diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S
index 7bab2d7de372..b11c2bd84827 100644
--- a/arch/powerpc/kernel/interrupt_64.S
+++ b/arch/powerpc/kernel/interrupt_64.S
@@ -91,9 +91,9 @@ _ASM_NOKPROBE_SYMBOL(system_call_vectored_\name)
 	li	r11,\trapnr
 	std	r11,_TRAP(r1)
 	std	r12,_CCR(r1)
-	addi	r10,r1,STACK_FRAME_OVERHEAD
+	addi	r4,r1,STACK_FRAME_OVERHEAD
 	ld	r11,exception_marker@toc(r2)
-	std	r11,-16(r10)		/* "regshere" marker */
+	std	r11,-16(r4)		/* "regshere" marker */
 
 BEGIN_FTR_SECTION
 	HMT_MEDIUM
@@ -108,8 +108,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 	 * but this is the best we can do.
 	 */
 
-	/* Calling convention has r9 = orig r0, r10 = regs */
-	mr	r9,r0
+	/* Calling convention has r3 = orig r0, r4 = regs */
+	mr	r3,r0
 	bl	system_call_exception
 
 .Lsyscall_vectored_\name\()_exit:
@@ -285,9 +285,9 @@ END_BTB_FLUSH_SECTION
 	std	r10,_LINK(r1)
 	std	r11,_TRAP(r1)
 	std	r12,_CCR(r1)
-	addi	r10,r1,STACK_FRAME_OVERHEAD
+	addi	r4,r1,STACK_FRAME_OVERHEAD
 	ld	r11,exception_marker@toc(r2)
-	std	r11,-16(r10)		/* "regshere" marker */
+	std	r11,-16(r4)		/* "regshere" marker */
 
 #ifdef CONFIG_PPC_BOOK3S
 	li	r11,1
@@ -308,8 +308,8 @@ END_BTB_FLUSH_SECTION
 	wrteei	1
 #endif
 
-	/* Calling convention has r9 = orig r0, r10 = regs */
-	mr	r9,r0
+	/* Calling convention has r3 = orig r0, r4 = regs */
+	mr	r3,r0
 	bl	system_call_exception
 
 .Lsyscall_exit:
@@ -353,16 +353,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
 	cmpdi	r3,0
 	bne	.Lsyscall_restore_regs
 	/* Zero volatile regs that may contain sensitive kernel data */
-	li	r0,0
-	li	r4,0
-	li	r5,0
-	li	r6,0
-	li	r7,0
-	li	r8,0
-	li	r9,0
-	li	r10,0
-	li	r11,0
-	li	r12,0
+	ZERO_GPR(0)
+	ZERO_GPRS(4, 12)
 	mtctr	r0
 	mtspr	SPRN_XER,r0
 .Lsyscall_restore_regs_cont:
@@ -388,7 +380,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 	REST_NVGPRS(r1)
 	mtctr	r3
 	mtspr	SPRN_XER,r4
-	ld	r0,GPR0(r1)
+	REST_GPR(0, r1)
 	REST_GPRS(4, 12, r1)
 	b	.Lsyscall_restore_regs_cont
 .Lsyscall_rst_end:
diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c
index 16ff0399a257..aa7869dbef36 100644
--- a/arch/powerpc/kernel/sys_ppc32.c
+++ b/arch/powerpc/kernel/sys_ppc32.c
@@ -48,9 +48,10 @@
 #include <asm/syscalls.h>
 #include <asm/switch_to.h>
 
-unsigned long compat_sys_mmap2(unsigned long addr, size_t len,
-			  unsigned long prot, unsigned long flags,
-			  unsigned long fd, unsigned long pgoff)
+PPC_SYSCALL_DEFINE(6, unsigned long, compat_sys_mmap2,
+		   unsigned long, addr, size_t, len,
+		   unsigned long, prot, unsigned long, flags,
+		   unsigned long, fd, unsigned long, pgoff)
 {
 	/* This should remain 12 even if PAGE_SIZE changes */
 	return sys_mmap(addr, len, prot, flags, fd, pgoff << 12);
@@ -68,52 +69,63 @@ unsigned long compat_sys_mmap2(unsigned long addr, size_t len,
 #define merge_64(high, low) ((u64)high << 32) | low
 #endif
 
-compat_ssize_t compat_sys_pread64(unsigned int fd, char __user *ubuf, compat_size_t count,
-			     u32 reg6, u32 pos1, u32 pos2)
+PPC_SYSCALL_DEFINE(6, compat_ssize_t, compat_sys_pread64,
+		   unsigned int, fd,
+		   char __user *, ubuf, compat_size_t, count,
+		   u32, reg6, u32, pos1, u32, pos2)
 {
 	return ksys_pread64(fd, ubuf, count, merge_64(pos1, pos2));
 }
 
-compat_ssize_t compat_sys_pwrite64(unsigned int fd, const char __user *ubuf, compat_size_t count,
-			      u32 reg6, u32 pos1, u32 pos2)
+PPC_SYSCALL_DEFINE(6, compat_ssize_t, compat_sys_pwrite64,
+		   unsigned int, fd,
+		   const char __user *, ubuf, compat_size_t, count,
+		   u32, reg6, u32, pos1, u32, pos2)
 {
 	return ksys_pwrite64(fd, ubuf, count, merge_64(pos1, pos2));
 }
 
-compat_ssize_t compat_sys_readahead(int fd, u32 r4, u32 offset1, u32 offset2, u32 count)
+PPC_SYSCALL_DEFINE(5, compat_ssize_t, compat_sys_readahead,
+		   int, fd, u32, r4,
+		   u32, offset1, u32, offset2, u32, count)
 {
 	return ksys_readahead(fd, merge_64(offset1, offset2), count);
 }
 
-asmlinkage int compat_sys_truncate64(const char __user * path, u32 reg4,
-				unsigned long len1, unsigned long len2)
+PPC_SYSCALL_DEFINE(4, int, compat_sys_truncate64,
+		   const char __user *, path, u32, reg4,
+		   unsigned long, len1, unsigned long, len2)
 {
 	return ksys_truncate(path, merge_64(len1, len2));
 }
 
-asmlinkage long compat_sys_fallocate(int fd, int mode, u32 offset1, u32 offset2,
-				     u32 len1, u32 len2)
+PPC_SYSCALL_DEFINE(6, long, compat_sys_fallocate,
+		   int, fd, int, mode, u32, offset1, u32, offset2,
+		   u32, len1, u32, len2)
 {
 	return ksys_fallocate(fd, mode, ((loff_t)offset1 << 32) | offset2,
 			     merge_64(len1, len2));
 }
 
-asmlinkage int compat_sys_ftruncate64(unsigned int fd, u32 reg4, unsigned long len1,
-				 unsigned long len2)
+PPC_SYSCALL_DEFINE(4, int, compat_sys_ftruncate64,
+		   unsigned int, fd, u32, reg4, unsigned long, len1,
+		   unsigned long, len2)
 {
 	return ksys_ftruncate(fd, merge_64(len1, len2));
 }
 
-long ppc32_fadvise64(int fd, u32 unused, u32 offset1, u32 offset2,
-		     size_t len, int advice)
+PPC_SYSCALL_DEFINE(6, long, ppc32_fadvise64,
+		   int, fd, u32, unused, u32, offset1, u32, offset2,
+		   size_t, len, int, advice)
 {
 	return ksys_fadvise64_64(fd, merge_64(offset1, offset2), len,
 				 advice);
 }
 
-asmlinkage long compat_sys_sync_file_range2(int fd, unsigned int flags,
-				   unsigned offset1, unsigned offset2,
-				   unsigned nbytes1, unsigned nbytes2)
+PPC_SYSCALL_DEFINE(6, long, compat_sys_sync_file_range2, int, fd,
+		   unsigned int, flags,
+		   unsigned int, offset1, unsigned int, offset2,
+		   unsigned int, nbytes1, unsigned int, nbytes2)
 {
 	loff_t offset = merge_64(offset1, offset2);
 	loff_t nbytes = merge_64(nbytes1, nbytes2);
diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c
index c4f5b4ce926f..c64cdb5c4435 100644
--- a/arch/powerpc/kernel/syscalls.c
+++ b/arch/powerpc/kernel/syscalls.c
@@ -64,14 +64,20 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, size_t, len,
 }
 
 #ifdef CONFIG_PPC32
+asmlinkage long sys_old_select(struct sel_arg_struct __user *arg);
+asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp,
+			   fd_set __user *exp,
+			   struct __kernel_old_timeval __user *tvp);
+
 /*
  * Due to some executables calling the wrong select we sometimes
  * get wrong args.  This determines how the args are being passed
  * (a single ptr to them all args passed) then calls
  * sys_select() with the appropriate args. -- Cort
  */
-int
-ppc_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct __kernel_old_timeval __user *tvp)
+PPC_SYSCALL_DEFINE(5, long, ppc_select,
+		   int, n, fd_set __user *, inp, fd_set __user *, outp,
+		   fd_set __user *, exp, struct __kernel_old_timeval __user *, tvp)
 {
 	if ( (unsigned long)n >= 4096 )
 		return sys_old_select((void __user *)n);
@@ -81,7 +87,9 @@ ppc_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, s
 #endif
 
 #ifdef CONFIG_PPC64
-long ppc64_personality(unsigned long personality)
+asmlinkage long sys_personality(unsigned int personality);
+
+PPC_SYSCALL_DEFINE(1, long, ppc64_personality, unsigned long, personality)
 {
 	long ret;
 
@@ -95,8 +103,9 @@ long ppc64_personality(unsigned long personality)
 }
 #endif
 
-long ppc_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low,
-		      u32 len_high, u32 len_low)
+PPC_SYSCALL_DEFINE(6, long, ppc_fadvise64_64,
+		   int, fd, int, advice, u32, offset_high, u32, offset_low,
+		   u32, len_high, u32, len_low)
 {
 	return ksys_fadvise64_64(fd, (u64)offset_high << 32 | offset_low,
 				 (u64)len_high << 32 | len_low, advice);
diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.S
index cb3358886203..70cc7120fce2 100644
--- a/arch/powerpc/kernel/systbl.S
+++ b/arch/powerpc/kernel/systbl.S
@@ -16,11 +16,32 @@
 
 #ifdef CONFIG_PPC64
 	.p2align	3
+#endif
+
+/* Where the syscall wrapper macro is used, handler functions expect parameters
+   to reside on the stack. We still emit symbols with register-mapped
+   parameters, which are distinguished by a prefix. */
+
+#ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER
+
+#define __powerpc_sys_ni_syscall sys_ni_syscall
+
+#ifdef CONFIG_PPC64
+#define __SYSCALL(nr, entry)	.8byte __powerpc_##entry
+#else
+#define __SYSCALL(nr, entry)	.long __powerpc_##entry
+#endif
+
+#else
+
+#ifdef CONFIG_PPC64
 #define __SYSCALL(nr, entry)	.8byte entry
 #else
 #define __SYSCALL(nr, entry)	.long entry
 #endif
 
+#endif /* CONFIG_ARCH_HAS_SYSCALL_WRAPPER */
+
 #define __SYSCALL_WITH_COMPAT(nr, native, compat)	__SYSCALL(nr, native)
 .globl sys_call_table
 sys_call_table:
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index 717f2c9a7573..dcf57c07dbad 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -40,6 +40,8 @@
 extern char vdso32_start, vdso32_end;
 extern char vdso64_start, vdso64_end;
 
+asmlinkage long sys_ni_syscall(void);
+
 /*
  * The vdso data page (aka. systemcfg for old ppc64 fans) is here.
  * Once the early boot kernel code no longer needs to muck around
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH 3/6] powerpc: Make syscalls save and restore gprs
  2022-06-01  5:48 [PATCH 1/6] powerpc: Add ZERO_GPRS macros for register clears Rohan McLure
  2022-06-01  5:48 ` [PATCH 2/6] powerpc: Provide syscall wrapper Rohan McLure
@ 2022-06-01  5:48 ` Rohan McLure
  2022-06-01  8:33   ` Christophe Leroy
  2022-06-01  5:48 ` [PATCH 4/6] powerpc: Fix comment, use clear and restore macros Rohan McLure
                   ` (4 subsequent siblings)
  6 siblings, 1 reply; 31+ messages in thread
From: Rohan McLure @ 2022-06-01  5:48 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Rohan McLure, npiggin

Clears user state in gprs to reduce the influence of user registers on
speculation within kernel syscall handlers.

Remove conditional branches on result of `syscall_exit_prepare` to
restore non-volatile gprs, as these registers are always cleared and
hence always must be restored.

Signed-off-by: Rohan McLure <rmclure@linux.ibm.com>
---
 arch/powerpc/kernel/interrupt_64.S | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S
index b11c2bd84827..e601ed999798 100644
--- a/arch/powerpc/kernel/interrupt_64.S
+++ b/arch/powerpc/kernel/interrupt_64.S
@@ -108,6 +108,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 	 * but this is the best we can do.
 	 */
 
+	ZERO_GPRS(5, 12)
+	ZERO_NVGPRS()
+
 	/* Calling convention has r3 = orig r0, r4 = regs */
 	mr	r3,r0
 	bl	system_call_exception
@@ -138,6 +141,7 @@ BEGIN_FTR_SECTION
 	HMT_MEDIUM_LOW
 END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 
+	REST_NVGPRS(r1)
 	cmpdi	r3,0
 	bne	.Lsyscall_vectored_\name\()_restore_regs
 
@@ -180,7 +184,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 	ld	r4,_LINK(r1)
 	ld	r5,_XER(r1)
 
-	REST_NVGPRS(r1)
 	ld	r0,GPR0(r1)
 	mtcr	r2
 	mtctr	r3
@@ -308,6 +311,9 @@ END_BTB_FLUSH_SECTION
 	wrteei	1
 #endif
 
+	ZERO_GPRS(5, 12)
+	ZERO_NVGPRS()
+
 	/* Calling convention has r3 = orig r0, r4 = regs */
 	mr	r3,r0
 	bl	system_call_exception
@@ -350,6 +356,7 @@ BEGIN_FTR_SECTION
 	stdcx.	r0,0,r1			/* to clear the reservation */
 END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
 
+	REST_NVGPRS(r1)
 	cmpdi	r3,0
 	bne	.Lsyscall_restore_regs
 	/* Zero volatile regs that may contain sensitive kernel data */
@@ -377,7 +384,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 .Lsyscall_restore_regs:
 	ld	r3,_CTR(r1)
 	ld	r4,_XER(r1)
-	REST_NVGPRS(r1)
 	mtctr	r3
 	mtspr	SPRN_XER,r4
 	REST_GPR(0, r1)
@@ -445,7 +451,7 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_user)
 	bl	interrupt_exit_user_prepare
 	cmpdi	r3,0
 	bne-	.Lrestore_nvgprs_\srr
-.Lrestore_nvgprs_\srr\()_cont:
+	.Lrestore_nvgprs_\srr\()_cont:
 	std	r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
 #ifdef CONFIG_PPC_BOOK3S
 .Linterrupt_return_\srr\()_user_rst_start:
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH 4/6] powerpc: Fix comment, use clear and restore macros
  2022-06-01  5:48 [PATCH 1/6] powerpc: Add ZERO_GPRS macros for register clears Rohan McLure
  2022-06-01  5:48 ` [PATCH 2/6] powerpc: Provide syscall wrapper Rohan McLure
  2022-06-01  5:48 ` [PATCH 3/6] powerpc: Make syscalls save and restore gprs Rohan McLure
@ 2022-06-01  5:48 ` Rohan McLure
  2022-06-01  5:48 ` [PATCH 5/6] powerpc: Move syscall handler prototypes to header Rohan McLure
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 31+ messages in thread
From: Rohan McLure @ 2022-06-01  5:48 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Rohan McLure, npiggin

Only r10 is saved to the PACA. Reflect this in the inline comment.

Replace instructions for restoring gprs from the stack and clearing them
with the REST_GPRS and ZERO_GPRS convenience macros.

Signed-off-by: Rohan McLure <rmclure@linux.ibm.com>
---
 arch/powerpc/kernel/exceptions-64s.S |  2 +-
 arch/powerpc/kernel/interrupt_64.S   | 13 +++----------
 2 files changed, 4 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index b66dd6f775a4..102896fc6a86 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -281,7 +281,7 @@ BEGIN_FTR_SECTION
 	mfspr	r9,SPRN_PPR
 END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 	HMT_MEDIUM
-	std	r10,IAREA+EX_R10(r13)		/* save r10 - r12 */
+	std	r10,IAREA+EX_R10(r13)		/* save r10 */
 	.if ICFAR
 BEGIN_FTR_SECTION
 	mfspr	r10,SPRN_CFAR
diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S
index e601ed999798..92740d9889a3 100644
--- a/arch/powerpc/kernel/interrupt_64.S
+++ b/arch/powerpc/kernel/interrupt_64.S
@@ -152,17 +152,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 	/* Could zero these as per ABI, but we may consider a stricter ABI
 	 * which preserves these if libc implementations can benefit, so
 	 * restore them for now until further measurement is done. */
-	ld	r0,GPR0(r1)
-	ld	r4,GPR4(r1)
-	ld	r5,GPR5(r1)
-	ld	r6,GPR6(r1)
-	ld	r7,GPR7(r1)
-	ld	r8,GPR8(r1)
+	REST_GPR(0, r1)
+	REST_GPRS(4, 8, r1)
 	/* Zero volatile regs that may contain sensitive kernel data */
-	li	r9,0
-	li	r10,0
-	li	r11,0
-	li	r12,0
+	ZERO_GPRS(9, 12)
 	mtspr	SPRN_XER,r0
 
 	/*
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH 5/6] powerpc: Move syscall handler prototypes to header
  2022-06-01  5:48 [PATCH 1/6] powerpc: Add ZERO_GPRS macros for register clears Rohan McLure
                   ` (2 preceding siblings ...)
  2022-06-01  5:48 ` [PATCH 4/6] powerpc: Fix comment, use clear and restore macros Rohan McLure
@ 2022-06-01  5:48 ` Rohan McLure
  2022-06-01  5:48 ` [PATCH 6/6] powerpc/64s: Clear gprs on interrupt routine entry Rohan McLure
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 31+ messages in thread
From: Rohan McLure @ 2022-06-01  5:48 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Rohan McLure, npiggin

Since some power syscall handlers call into other syscall handlers with
the usual in-register calling convention, emit symbols for both
conventions when required. The prototypes for handlers supporting
in-register parameters should exist in a header rather than immediately
preceding their usage.

Signed-off-by: Rohan McLure <rmclure@linux.ibm.com>
---
 arch/powerpc/include/asm/syscalls.h | 16 ++++++++++++++++
 arch/powerpc/kernel/syscalls.c      |  7 -------
 arch/powerpc/kernel/vdso.c          |  3 +--
 3 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/include/asm/syscalls.h b/arch/powerpc/include/asm/syscalls.h
index 75d8e1822caf..1e5f2ddcabe0 100644
--- a/arch/powerpc/include/asm/syscalls.h
+++ b/arch/powerpc/include/asm/syscalls.h
@@ -43,6 +43,22 @@
 
 struct rtas_args;
 
+#ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER
+#include <linux/syscalls.h>
+asmlinkage long sys_ni_syscall(void);
+#ifdef CONFIG_PPC32
+asmlinkage long sys_old_select(struct sel_arg_struct __user *arg);
+asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp,
+			   fd_set __user *exp,
+			   struct __kernel_old_timeval __user *tvp);
+#endif /* CONFIG_PPC32 */
+
+#ifdef CONFIG_PPC64
+asmlinkage long sys_personality(unsigned int personality);
+#endif /* CONFIG_PPC64 */
+
+#endif /* CONFIG_ARCH_HAS_SYSCALL_WRAPPER */
+
 asmlinkage long sys_mmap(unsigned long addr, size_t len,
 			 unsigned long prot, unsigned long flags,
 			 unsigned long fd, off_t offset);
diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c
index c64cdb5c4435..6107bdd5dad1 100644
--- a/arch/powerpc/kernel/syscalls.c
+++ b/arch/powerpc/kernel/syscalls.c
@@ -64,11 +64,6 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, size_t, len,
 }
 
 #ifdef CONFIG_PPC32
-asmlinkage long sys_old_select(struct sel_arg_struct __user *arg);
-asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp,
-			   fd_set __user *exp,
-			   struct __kernel_old_timeval __user *tvp);
-
 /*
  * Due to some executables calling the wrong select we sometimes
  * get wrong args.  This determines how the args are being passed
@@ -87,8 +82,6 @@ PPC_SYSCALL_DEFINE(5, long, ppc_select,
 #endif
 
 #ifdef CONFIG_PPC64
-asmlinkage long sys_personality(unsigned int personality);
-
 PPC_SYSCALL_DEFINE(1, long, ppc64_personality, unsigned long, personality)
 {
 	long ret;
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index dcf57c07dbad..8a56e290fcaf 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -22,6 +22,7 @@
 #include <vdso/datapage.h>
 
 #include <asm/syscall.h>
+#include <asm/syscalls.h>
 #include <asm/processor.h>
 #include <asm/mmu.h>
 #include <asm/mmu_context.h>
@@ -40,8 +41,6 @@
 extern char vdso32_start, vdso32_end;
 extern char vdso64_start, vdso64_end;
 
-asmlinkage long sys_ni_syscall(void);
-
 /*
  * The vdso data page (aka. systemcfg for old ppc64 fans) is here.
  * Once the early boot kernel code no longer needs to muck around
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH 6/6] powerpc/64s: Clear gprs on interrupt routine entry
  2022-06-01  5:48 [PATCH 1/6] powerpc: Add ZERO_GPRS macros for register clears Rohan McLure
                   ` (3 preceding siblings ...)
  2022-06-01  5:48 ` [PATCH 5/6] powerpc: Move syscall handler prototypes to header Rohan McLure
@ 2022-06-01  5:48 ` Rohan McLure
  2022-06-01  8:37   ` Christophe Leroy
  2022-06-01  7:45 ` [PATCH 1/6] powerpc: Add ZERO_GPRS macros for register clears Christophe Leroy
  2022-06-01 16:00 ` Segher Boessenkool
  6 siblings, 1 reply; 31+ messages in thread
From: Rohan McLure @ 2022-06-01  5:48 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Rohan McLure, npiggin

Zero GPRS r0, r2-r11, r14-r31, on entry into the kernel for all
other interrupt sources to limit influence of user-space values
in potential speculation gadgets. The remaining gprs are overwritten by
entry macros to interrupt handlers, irrespective of whether or not a
given handler consumes these register values.

Prior to this commit, r14-r31 are restored on a per-interrupt basis at
exit, but now they are always restored. Remove explicit REST_NVGPRS
invocations on interrupt entry and simplify exit logic.

Signed-off-by: Rohan McLure <rmclure@linux.ibm.com>
---
 arch/powerpc/kernel/exceptions-64s.S | 19 +++++++------------
 arch/powerpc/kernel/interrupt_64.S   |  9 ++-------
 2 files changed, 9 insertions(+), 19 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 102896fc6a86..8e2c1c924a4d 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -502,6 +502,7 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real, text)
 	std	r10,0(r1)		/* make stack chain pointer	*/
 	std	r0,GPR0(r1)		/* save r0 in stackframe	*/
 	std	r10,GPR1(r1)		/* save r1 in stackframe	*/
+	ZERO_GPR(0)
 
 	/* Mark our [H]SRRs valid for return */
 	li	r10,1
@@ -538,14 +539,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 	ld	r10,IAREA+EX_R10(r13)
 	std	r9,GPR9(r1)
 	std	r10,GPR10(r1)
+	ZERO_GPRS(9, 10)
 	ld	r9,IAREA+EX_R11(r13)	/* move r11 - r13 to stackframe	*/
 	ld	r10,IAREA+EX_R12(r13)
 	ld	r11,IAREA+EX_R13(r13)
 	std	r9,GPR11(r1)
 	std	r10,GPR12(r1)
 	std	r11,GPR13(r1)
+	ZERO_GPR(11) /* keep r12 ([H]SRR1/MSR), r13 (PACA) for interrupt routine */
 
 	SAVE_NVGPRS(r1)
+	ZERO_NVGPRS()
 
 	.if IDAR
 	.if IISIDE
@@ -577,8 +581,9 @@ BEGIN_FTR_SECTION
 END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
 	ld	r10,IAREA+EX_CTR(r13)
 	std	r10,_CTR(r1)
-	std	r2,GPR2(r1)		/* save r2 in stackframe	*/
+	SAVE_GPR(2, r1)			/* save r2 in stackframe	*/
 	SAVE_GPRS(3, 8, r1)		/* save r3 - r8 in stackframe   */
+	ZERO_GPRS(2, 8)
 	mflr	r9			/* Get LR, later save to stack	*/
 	ld	r2,PACATOC(r13)		/* get kernel TOC into r2	*/
 	std	r9,_LINK(r1)
@@ -696,6 +701,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
 	mtlr	r9
 	ld	r9,_CCR(r1)
 	mtcr	r9
+	REST_NVGPRS(r1)
 	REST_GPRS(2, 13, r1)
 	REST_GPR(0, r1)
 	/* restore original r1. */
@@ -1368,11 +1374,6 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
 	b	interrupt_return_srr
 
 1:	bl	do_break
-	/*
-	 * do_break() may have changed the NV GPRS while handling a breakpoint.
-	 * If so, we need to restore them with their updated values.
-	 */
-	REST_NVGPRS(r1)
 	b	interrupt_return_srr
 
 
@@ -1598,7 +1599,6 @@ EXC_COMMON_BEGIN(alignment_common)
 	GEN_COMMON alignment
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	alignment_exception
-	REST_NVGPRS(r1) /* instruction emulation may change GPRs */
 	b	interrupt_return_srr
 
 
@@ -1708,7 +1708,6 @@ EXC_COMMON_BEGIN(program_check_common)
 .Ldo_program_check:
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	program_check_exception
-	REST_NVGPRS(r1) /* instruction emulation may change GPRs */
 	b	interrupt_return_srr
 
 
@@ -2139,7 +2138,6 @@ EXC_COMMON_BEGIN(emulation_assist_common)
 	GEN_COMMON emulation_assist
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	emulation_assist_interrupt
-	REST_NVGPRS(r1) /* instruction emulation may change GPRs */
 	b	interrupt_return_hsrr
 
 
@@ -2457,7 +2455,6 @@ EXC_COMMON_BEGIN(facility_unavailable_common)
 	GEN_COMMON facility_unavailable
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	facility_unavailable_exception
-	REST_NVGPRS(r1) /* instruction emulation may change GPRs */
 	b	interrupt_return_srr
 
 
@@ -2485,7 +2482,6 @@ EXC_COMMON_BEGIN(h_facility_unavailable_common)
 	GEN_COMMON h_facility_unavailable
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	facility_unavailable_exception
-	REST_NVGPRS(r1) /* XXX Shouldn't be necessary in practice */
 	b	interrupt_return_hsrr
 
 
@@ -2711,7 +2707,6 @@ EXC_COMMON_BEGIN(altivec_assist_common)
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 #ifdef CONFIG_ALTIVEC
 	bl	altivec_assist_exception
-	REST_NVGPRS(r1) /* instruction emulation may change GPRs */
 #else
 	bl	unknown_exception
 #endif
diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S
index 92740d9889a3..3c742c07f4b6 100644
--- a/arch/powerpc/kernel/interrupt_64.S
+++ b/arch/powerpc/kernel/interrupt_64.S
@@ -442,9 +442,6 @@ interrupt_return_\srr\()_user: /* make backtraces match the _kernel variant */
 _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_user)
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	interrupt_exit_user_prepare
-	cmpdi	r3,0
-	bne-	.Lrestore_nvgprs_\srr
-	.Lrestore_nvgprs_\srr\()_cont:
 	std	r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
 #ifdef CONFIG_PPC_BOOK3S
 .Linterrupt_return_\srr\()_user_rst_start:
@@ -458,6 +455,7 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_user)
 	stb	r11,PACAIRQHAPPENED(r13) # clear out possible HARD_DIS
 
 .Lfast_user_interrupt_return_\srr\():
+	REST_NVGPRS(r1)
 #ifdef CONFIG_PPC_BOOK3S
 	.ifc \srr,srr
 	lbz	r4,PACASRR_VALID(r13)
@@ -527,10 +525,6 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
 	b	.	/* prevent speculative execution */
 .Linterrupt_return_\srr\()_user_rst_end:
 
-.Lrestore_nvgprs_\srr\():
-	REST_NVGPRS(r1)
-	b	.Lrestore_nvgprs_\srr\()_cont
-
 #ifdef CONFIG_PPC_BOOK3S
 interrupt_return_\srr\()_user_restart:
 _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_user_restart)
@@ -571,6 +565,7 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_kernel)
 1:
 
 .Lfast_kernel_interrupt_return_\srr\():
+	REST_NVGPRS(r1)
 	cmpdi	cr1,r3,0
 #ifdef CONFIG_PPC_BOOK3S
 	.ifc \srr,srr
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 31+ messages in thread

* Re: [PATCH 1/6] powerpc: Add ZERO_GPRS macros for register clears
  2022-06-01  5:48 [PATCH 1/6] powerpc: Add ZERO_GPRS macros for register clears Rohan McLure
                   ` (4 preceding siblings ...)
  2022-06-01  5:48 ` [PATCH 6/6] powerpc/64s: Clear gprs on interrupt routine entry Rohan McLure
@ 2022-06-01  7:45 ` Christophe Leroy
  2022-06-01 16:00 ` Segher Boessenkool
  6 siblings, 0 replies; 31+ messages in thread
From: Christophe Leroy @ 2022-06-01  7:45 UTC (permalink / raw)
  To: Rohan McLure, linuxppc-dev; +Cc: npiggin



Le 01/06/2022 à 07:48, Rohan McLure a écrit :
> [You don't often get email from rmclure@linux.ibm.com. Learn why this is important at https://aka.ms/LearnAboutSenderIdentification ]
> 
> Macros for restoring saving registers to and from the stack exist.
> Provide a macro for simply zeroing a range of gprs, or an individual
> gpr.
> 
> Signed-off-by: Rohan McLure <rmclure@linux.ibm.com>
> ---
>   arch/powerpc/include/asm/ppc_asm.h | 17 +++++++++++++++++
>   1 file changed, 17 insertions(+)
> 
> diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
> index 4dea2d963738..3fb37a9767f7 100644
> --- a/arch/powerpc/include/asm/ppc_asm.h
> +++ b/arch/powerpc/include/asm/ppc_asm.h
> @@ -33,6 +33,19 @@
>          .endr
>   .endm
> 
> +/*
> + * Simplification of OP_REGS, for an arbitrary right hand operand.
> + *
> + *   op  reg, rhs

You call it "BINOP" but it is rare to have a binary operation with only 
two operands. Another name could be more appropriate ?
Or keep it as BINOP if you see another future use ? In that case have 
the 3 operands, then 'li r, 0' is same as 'addi r, 0, 0'

> + */
> +.macro BINOP_REGS op, rhs, start, end
> +       .Lreg=\start
> +       .rept (\end - \start + 1)
> +       \op .Lreg, \rhs
> +       .Lreg=.Lreg+1
> +       .endr
> +.endm
> +
>   /*
>    * Macros for storing registers into and loading registers from
>    * exception frames.
> @@ -49,6 +62,10 @@
>   #define REST_NVGPRS(base)              REST_GPRS(13, 31, base)
>   #endif
> 
> +#define ZERO_GPRS(start, end)          BINOP_REGS li, 0, start, end
> +#define ZERO_NVGPRS()                  ZERO_GPRS(14, 31)
> +#define ZERO_GPR(n)                    ZERO_GPRS(n, n)

Maybe it would be more explicit to use ZEROIZE_

> +
>   #define SAVE_GPR(n, base)              SAVE_GPRS(n, n, base)
>   #define REST_GPR(n, base)              REST_GPRS(n, n, base)
> 
> --
> 2.34.1
> 

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 2/6] powerpc: Provide syscall wrapper
  2022-06-01  5:48 ` [PATCH 2/6] powerpc: Provide syscall wrapper Rohan McLure
@ 2022-06-01  8:29   ` Christophe Leroy
  2022-06-09 13:06     ` Christophe Leroy
  2022-06-01  8:59   ` kernel test robot
                     ` (4 subsequent siblings)
  5 siblings, 1 reply; 31+ messages in thread
From: Christophe Leroy @ 2022-06-01  8:29 UTC (permalink / raw)
  To: Rohan McLure, linuxppc-dev; +Cc: Andrew Donnellan, npiggin



Le 01/06/2022 à 07:48, Rohan McLure a écrit :
> [Vous ne recevez pas souvent de courriers de la part de rmclure@linux.ibm.com. Découvrez pourquoi cela peut être important à l'adresse https://aka.ms/LearnAboutSenderIdentification.]
> 
> Syscall wrapper implemented as per s390, x86, arm64, providing the
> option for gprs to be cleared on entry to the kernel, reducing caller
> influence influence on speculation within syscall routine. The wrapper
> is a macro that emits syscall handler implementations with parameters
> passed by stack pointer.

Passing parameters by stack is going to be sub-optimal. Did you make any 
measurement of the implied performance degradation ? We usually use the 
null_syscall selftest for that everytime we touch syscall entries/exits.

Why going via stack ? The main advantage of a RISC processor like 
powerpc is that, unlike x86, there are enough registers to avoid going 
through memory. RISC processors are optimised with three operands 
operations and many registers, and usually have slow memory in return.

> 
> For platforms supporting this syscall wrapper, emit symbols with usual
> in-register parameters (`sys...`) to support calls to syscall handlers
> from within the kernel.
> 
> Syscalls are wrapped on all platforms except Cell processor. SPUs require
> access syscall prototypes which are omitted with ARCH_HAS_SYSCALL_WRAPPER
> enabled.
This commit message isn't very clear, please describe in more details 
what is done, how and why.

> 
> Co-developed-by: Andrew Donnellan <ajd@linux.ibm.com>
> Signed-off-by: Andrew Donnellan <ajd@linux.ibm.com>
> Signed-off-by: Rohan McLure <rmclure@linux.ibm.com>
> ---
>   arch/powerpc/Kconfig                       |  1 +
>   arch/powerpc/include/asm/interrupt.h       |  3 +-
>   arch/powerpc/include/asm/syscall_wrapper.h | 92 ++++++++++++++++++++++
>   arch/powerpc/include/asm/syscalls.h        | 83 +++++++++++++------
>   arch/powerpc/kernel/entry_32.S             |  6 +-
>   arch/powerpc/kernel/interrupt.c            | 35 ++++----
>   arch/powerpc/kernel/interrupt_64.S         | 30 +++----
>   arch/powerpc/kernel/sys_ppc32.c            | 50 +++++++-----
>   arch/powerpc/kernel/syscalls.c             | 19 +++--
>   arch/powerpc/kernel/systbl.S               | 21 +++++
>   arch/powerpc/kernel/vdso.c                 |  2 +
>   11 files changed, 255 insertions(+), 87 deletions(-)
>   create mode 100644 arch/powerpc/include/asm/syscall_wrapper.h
> 
> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> index 174edabb74fa..e58287a70061 100644
> --- a/arch/powerpc/Kconfig
> +++ b/arch/powerpc/Kconfig
> @@ -137,6 +137,7 @@ config PPC
>          select ARCH_HAS_STRICT_KERNEL_RWX       if (PPC_BOOK3S || PPC_8xx || 40x) && !HIBERNATION
>          select ARCH_HAS_STRICT_KERNEL_RWX       if FSL_BOOKE && !HIBERNATION && !RANDOMIZE_BASE
>          select ARCH_HAS_STRICT_MODULE_RWX       if ARCH_HAS_STRICT_KERNEL_RWX
> +       select ARCH_HAS_SYSCALL_WRAPPER         if !SPU_BASE
>          select ARCH_HAS_TICK_BROADCAST          if GENERIC_CLOCKEVENTS_BROADCAST
>          select ARCH_HAS_UACCESS_FLUSHCACHE
>          select ARCH_HAS_UBSAN_SANITIZE_ALL
> diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
> index f964ef5c57d8..8e8949e4db7a 100644
> --- a/arch/powerpc/include/asm/interrupt.h
> +++ b/arch/powerpc/include/asm/interrupt.h
> @@ -636,8 +636,7 @@ static inline void interrupt_cond_local_irq_enable(struct pt_regs *regs)
>                  local_irq_enable();
>   }
> 
> -long system_call_exception(long r3, long r4, long r5, long r6, long r7, long r8,
> -                          unsigned long r0, struct pt_regs *regs);
> +long system_call_exception(unsigned long r0, struct pt_regs *regs);
>   notrace unsigned long syscall_exit_prepare(unsigned long r3, struct pt_regs *regs, long scv);
>   notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs);
>   notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs);
> diff --git a/arch/powerpc/include/asm/syscall_wrapper.h b/arch/powerpc/include/asm/syscall_wrapper.h
> new file mode 100644
> index 000000000000..23da22b081e4
> --- /dev/null
> +++ b/arch/powerpc/include/asm/syscall_wrapper.h
> @@ -0,0 +1,92 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * syscall_wrapper.h - powerpc specific wrappers to syscall definitions
> + *
> + * Based on arch/{x86,arm64}/include/asm/syscall_wrapper.h
> + */
> +
> +#ifndef __ASM_SYSCALL_WRAPPER_H
> +#define __ASM_SYSCALL_WRAPPER_H
> +
> +struct pt_regs;
> +
> +#define SC_POWERPC_REGS_TO_ARGS(x, ...)                                \
> +       __MAP(x,__SC_ARGS                                       \
> +             ,,regs->gpr[3],,regs->gpr[4],,regs->gpr[5]        \
> +             ,,regs->gpr[6],,regs->gpr[7],,regs->gpr[8])
> +
> +#ifdef CONFIG_COMPAT
> +
> +#define COMPAT_SYSCALL_DEFINEx(x, name, ...)                                           \
> +       asmlinkage long __powerpc_compat_sys##name(const struct pt_regs *regs);         \
> +       ALLOW_ERROR_INJECTION(__powerpc_compat_sys##name, ERRNO);                       \
> +       static long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__));              \
> +       static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));       \
> +       asmlinkage long __powerpc_compat_sys##name(const struct pt_regs *regs)          \
> +       {                                                                               \
> +               return __se_compat_sys##name(SC_POWERPC_REGS_TO_ARGS(x,__VA_ARGS__));   \
> +       }                                                                               \
> +       static long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__))               \
> +       {                                                                               \
> +               return __do_compat_sys##name(__MAP(x,__SC_DELOUSE,__VA_ARGS__));        \
> +       }                                                                               \
> +       static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))
> +
> +#define COMPAT_SYSCALL_DEFINE0(sname)                                                  \
> +       asmlinkage long __powerpc_compat_sys_##sname(const struct pt_regs *__unused);   \
> +       ALLOW_ERROR_INJECTION(__powerpc_compat_sys_##sname, ERRNO);                     \
> +       asmlinkage long __powerpc_compat_sys_##sname(const struct pt_regs *__unused)
> +
> +#define COND_SYSCALL_COMPAT(name)                                                      \
> +       asmlinkage long __weak __powerpc_compat_sys_##name(const struct pt_regs *regs)  \
> +       {                                                                               \
> +               return sys_ni_syscall();                                                \
> +       }
> +#define COMPAT_SYS_NI(name) \
> +       SYSCALL_ALIAS(__powerpc_compat_sys_##name, sys_ni_posix_timers);
> +
> +#endif /* CONFIG_COMPAT */
> +
> +#define __SYSCALL_DEFINEx(x, name, ...)                                                \
> +       asmlinkage long __powerpc_sys##name(const struct pt_regs *regs);        \
> +       ALLOW_ERROR_INJECTION(__powerpc_sys##name, ERRNO);                      \
> +       long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));                         \
> +       static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__));             \
> +       static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));      \
> +       asmlinkage long __powerpc_sys##name(const struct pt_regs *regs)         \
> +       {                                                                       \
> +               return __se_sys##name(SC_POWERPC_REGS_TO_ARGS(x,__VA_ARGS__));  \
> +       }                                                                       \
> +       long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))                          \
> +       {                                                                       \
> +               return __do_sys##name(__MAP(x,__SC_CAST,__VA_ARGS__));          \
> +       }                                                                       \
> +       static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__))              \
> +       {                                                                       \
> +               long ret = __do_sys##name(__MAP(x,__SC_CAST,__VA_ARGS__));      \
> +               __MAP(x,__SC_TEST,__VA_ARGS__);                                 \
> +               __PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__));               \
> +               return ret;                                                     \
> +       }                                                                       \
> +       static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))
> +
> +#define SYSCALL_DEFINE0(sname)                                                 \
> +       SYSCALL_METADATA(_##sname, 0);                                          \
> +       long sys_##name(void);                                                  \
> +       asmlinkage long __powerpc_sys_##sname(const struct pt_regs *__unused);  \
> +       ALLOW_ERROR_INJECTION(__powerpc_sys_##sname, ERRNO);                    \
> +       long sys_##sname(void)                                                  \
> +       {                                                                       \
> +               return __powerpc_sys_##sname(NULL);                             \
> +       }                                                                       \
> +       asmlinkage long __powerpc_sys_##sname(const struct pt_regs *__unused)
> +
> +#define COND_SYSCALL(name)                                                     \
> +       asmlinkage long __weak __powerpc_sys_##name(const struct pt_regs *regs) \
> +       {                                                                       \
> +               return sys_ni_syscall();                                        \
> +       }
> +
> +#define SYS_NI(name) SYSCALL_ALIAS(__powerpc_sys_##name, sys_ni_posix_timers);
> +
> +#endif /* __ASM_SYSCALL_WRAPPER_H */
> diff --git a/arch/powerpc/include/asm/syscalls.h b/arch/powerpc/include/asm/syscalls.h
> index a2b13e55254f..75d8e1822caf 100644
> --- a/arch/powerpc/include/asm/syscalls.h
> +++ b/arch/powerpc/include/asm/syscalls.h
> @@ -8,14 +8,47 @@
>   #include <linux/types.h>
>   #include <linux/compat.h>
> 
> +/*
> + * For PowerPC specific syscall implementations, wrapper takes exact name and
> + * return type for a given function.
> + */
> +
> +#ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER
> +#define PPC_SYSCALL_DEFINE(x, type, name, ...)                                 \
> +       asmlinkage type __powerpc_##name(const struct pt_regs *regs);           \
> +       ALLOW_ERROR_INJECTION(__powerpc_##name, ERRNO);                         \
> +       type sys_##name(__MAP(x,__SC_DECL,__VA_ARGS__));                        \
> +       static type __se_##name(__MAP(x,__SC_LONG,__VA_ARGS__));                \
> +       static inline type __do_##name(__MAP(x,__SC_DECL,__VA_ARGS__));         \
> +       asmlinkage type __powerpc_##name(const struct pt_regs *regs)            \
> +       {                                                                       \
> +               return __se_##name(SC_POWERPC_REGS_TO_ARGS(x,__VA_ARGS__));     \
> +       }                                                                       \
> +       type sys_##name(__MAP(x,__SC_DECL,__VA_ARGS__))                         \
> +       {                                                                       \
> +               return __do_##name(__MAP(x,__SC_CAST,__VA_ARGS__));             \
> +       }                                                                       \
> +       static type __se_##name(__MAP(x,__SC_LONG,__VA_ARGS__))                 \
> +       {                                                                       \
> +               type ret = __do_##name(__MAP(x,__SC_CAST,__VA_ARGS__));         \
> +               __MAP(x,__SC_TEST,__VA_ARGS__);                                 \
> +               __PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__));               \
> +               return ret;                                                     \
> +       }                                                                       \
> +       static inline type __do_##name(__MAP(x,__SC_DECL,__VA_ARGS__))
> +#else
> +#define PPC_SYSCALL_DEFINE(x, type, name, ...)                                 \
> +       type name(__MAP(x,__SC_DECL,__VA_ARGS__))
> +#endif
> +
>   struct rtas_args;
> 
>   asmlinkage long sys_mmap(unsigned long addr, size_t len,
> -               unsigned long prot, unsigned long flags,
> -               unsigned long fd, off_t offset);
> +                        unsigned long prot, unsigned long flags,
> +                        unsigned long fd, off_t offset);

This is a cleanup that is not part of the change. Please move in a 
preparatory patch.


>   asmlinkage long sys_mmap2(unsigned long addr, size_t len,
> -               unsigned long prot, unsigned long flags,
> -               unsigned long fd, unsigned long pgoff);
> +                         unsigned long prot, unsigned long flags,
> +                         unsigned long fd, unsigned long pgoff);

Same.

>   asmlinkage long ppc64_personality(unsigned long personality);
>   asmlinkage long sys_rtas(struct rtas_args __user *uargs);
>   int ppc_select(int n, fd_set __user *inp, fd_set __user *outp,
> @@ -24,32 +57,38 @@ long ppc_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low,
>                        u32 len_high, u32 len_low);
> 
>   #ifdef CONFIG_COMPAT
> -unsigned long compat_sys_mmap2(unsigned long addr, size_t len,
> -                              unsigned long prot, unsigned long flags,
> -                              unsigned long fd, unsigned long pgoff);
> +asmlinkage unsigned long compat_sys_mmap2(unsigned long addr, size_t len,
> +                                         unsigned long prot, unsigned long flags,
> +                                         unsigned long fd, unsigned long pgoff);

There was some discussion in the past that asmlinkage is useless. Is 
that change and the ones below really necessary ?

If really needed, is that directly linked to this patch or should it be 
in a preparation patch ?

> 
> -compat_ssize_t compat_sys_pread64(unsigned int fd, char __user *ubuf, compat_size_t count,
> -                                 u32 reg6, u32 pos1, u32 pos2);
> +asmlinkage compat_ssize_t compat_sys_pread64(unsigned int fd, char __user *ubuf,
> +                                            compat_size_t count,
> +                                            u32 reg6, u32 pos1, u32 pos2);
> 
> -compat_ssize_t compat_sys_pwrite64(unsigned int fd, const char __user *ubuf, compat_size_t count,
> -                                  u32 reg6, u32 pos1, u32 pos2);
> +asmlinkage compat_ssize_t compat_sys_pwrite64(unsigned int fd, const char __user *ubuf,
> +                                             compat_size_t count,
> +                                             u32 reg6, u32 pos1, u32 pos2);
> 
> -compat_ssize_t compat_sys_readahead(int fd, u32 r4, u32 offset1, u32 offset2, u32 count);
> +asmlinkage compat_ssize_t compat_sys_readahead(int fd, u32 r4, u32 offset1,
> +                                              u32 offset2, u32 count);
> 
> -int compat_sys_truncate64(const char __user *path, u32 reg4,
> -                         unsigned long len1, unsigned long len2);
> +asmlinkage int compat_sys_truncate64(const char __user *path, u32 reg4,
> +                                    unsigned long len1, unsigned long len2);
> 
> -long compat_sys_fallocate(int fd, int mode, u32 offset1, u32 offset2, u32 len1, u32 len2);
> +asmlinkage long compat_sys_fallocate(int fd, int mode, u32 offset1,
> +                                    u32 offset2, u32 len1, u32 len2);
> 
> -int compat_sys_ftruncate64(unsigned int fd, u32 reg4, unsigned long len1,
> -                          unsigned long len2);
> +asmlinkage int compat_sys_ftruncate64(unsigned int fd, u32 reg4,
> +                                     unsigned long len1, unsigned long len2);
> 
> -long ppc32_fadvise64(int fd, u32 unused, u32 offset1, u32 offset2,
> -                    size_t len, int advice);
> +asmlinkage long ppc32_fadvise64(int fd, u32 unused, u32 offset1, u32 offset2,
> +                               size_t len, int advice);
> 
> -long compat_sys_sync_file_range2(int fd, unsigned int flags,
> -                                unsigned int offset1, unsigned int offset2,
> -                                unsigned int nbytes1, unsigned int nbytes2);
> +asmlinkage long compat_sys_sync_file_range2(int fd, unsigned int flags,
> +                                           unsigned int offset1,
> +                                           unsigned int offset2,
> +                                           unsigned int nbytes1,
> +                                           unsigned int nbytes2);
>   #endif
> 
>   #endif /* __KERNEL__ */
> diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
> index 7748c278d13c..2343c577b971 100644
> --- a/arch/powerpc/kernel/entry_32.S
> +++ b/arch/powerpc/kernel/entry_32.S
> @@ -121,9 +121,9 @@ transfer_to_syscall:
>          SAVE_NVGPRS(r1)
>          kuep_lock
> 
> -       /* Calling convention has r9 = orig r0, r10 = regs */
> -       addi    r10,r1,STACK_FRAME_OVERHEAD
> -       mr      r9,r0
> +       /* Calling convention has r3 = orig r0, r4 = regs */
> +       addi    r4,r1,STACK_FRAME_OVERHEAD
> +       mr      r3,r0
>          bl      system_call_exception
> 
>   ret_from_syscall:
> diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
> index 784ea3289c84..a38329ede1a1 100644
> --- a/arch/powerpc/kernel/interrupt.c
> +++ b/arch/powerpc/kernel/interrupt.c
> @@ -24,7 +24,11 @@
>   unsigned long global_dbcr0[NR_CPUS];
>   #endif
> 
> +#ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER
> +typedef long (*syscall_fn)(struct pt_regs *);
> +#else
>   typedef long (*syscall_fn)(long, long, long, long, long, long);
> +#endif
> 
>   #ifdef CONFIG_PPC_BOOK3S_64
>   DEFINE_STATIC_KEY_FALSE(interrupt_exit_not_reentrant);
> @@ -74,15 +78,13 @@ static notrace __always_inline bool prep_irq_for_enabled_exit(bool restartable)
>   }
> 
>   /* Has to run notrace because it is entered not completely "reconciled" */
> -notrace long system_call_exception(long r3, long r4, long r5,
> -                                  long r6, long r7, long r8,
> -                                  unsigned long r0, struct pt_regs *regs)
> +notrace long system_call_exception(unsigned long r0, struct pt_regs *regs)
>   {
>          syscall_fn f;
> 
>          kuap_lock();
> 
> -       regs->orig_gpr3 = r3;
> +       regs->orig_gpr3 = regs->gpr[3];
> 
>          if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
>                  BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED);
> @@ -196,12 +198,6 @@ notrace long system_call_exception(long r3, long r4, long r5,
>                  r0 = do_syscall_trace_enter(regs);
>                  if (unlikely(r0 >= NR_syscalls))
>                          return regs->gpr[3];
> -               r3 = regs->gpr[3];
> -               r4 = regs->gpr[4];
> -               r5 = regs->gpr[5];
> -               r6 = regs->gpr[6];
> -               r7 = regs->gpr[7];
> -               r8 = regs->gpr[8];
> 
>          } else if (unlikely(r0 >= NR_syscalls)) {
>                  if (unlikely(trap_is_unsupported_scv(regs))) {
> @@ -218,18 +214,23 @@ notrace long system_call_exception(long r3, long r4, long r5,
>          if (unlikely(is_compat_task())) {
>                  f = (void *)compat_sys_call_table[r0];
> 
> -               r3 &= 0x00000000ffffffffULL;
> -               r4 &= 0x00000000ffffffffULL;
> -               r5 &= 0x00000000ffffffffULL;
> -               r6 &= 0x00000000ffffffffULL;
> -               r7 &= 0x00000000ffffffffULL;
> -               r8 &= 0x00000000ffffffffULL;
> +               regs->gpr[3] &= 0x00000000ffffffffULL;
> +               regs->gpr[4] &= 0x00000000ffffffffULL;
> +               regs->gpr[5] &= 0x00000000ffffffffULL;
> +               regs->gpr[6] &= 0x00000000ffffffffULL;
> +               regs->gpr[7] &= 0x00000000ffffffffULL;
> +               regs->gpr[8] &= 0x00000000ffffffffULL;
> 
>          } else {
>                  f = (void *)sys_call_table[r0];
>          }
> 
> -       return f(r3, r4, r5, r6, r7, r8);
> +       #ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER
> +       return f(regs);
> +       #else
> +       return f(regs->gpr[3], regs->gpr[4], regs->gpr[5],
> +                regs->gpr[6], regs->gpr[7], regs->gpr[8]);
> +       #endif
>   }
> 
>   static notrace void booke_load_dbcr0(void)
> diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S
> index 7bab2d7de372..b11c2bd84827 100644
> --- a/arch/powerpc/kernel/interrupt_64.S
> +++ b/arch/powerpc/kernel/interrupt_64.S
> @@ -91,9 +91,9 @@ _ASM_NOKPROBE_SYMBOL(system_call_vectored_\name)
>          li      r11,\trapnr
>          std     r11,_TRAP(r1)
>          std     r12,_CCR(r1)
> -       addi    r10,r1,STACK_FRAME_OVERHEAD
> +       addi    r4,r1,STACK_FRAME_OVERHEAD
>          ld      r11,exception_marker@toc(r2)
> -       std     r11,-16(r10)            /* "regshere" marker */
> +       std     r11,-16(r4)             /* "regshere" marker */
> 
>   BEGIN_FTR_SECTION
>          HMT_MEDIUM
> @@ -108,8 +108,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
>           * but this is the best we can do.
>           */
> 
> -       /* Calling convention has r9 = orig r0, r10 = regs */
> -       mr      r9,r0
> +       /* Calling convention has r3 = orig r0, r4 = regs */
> +       mr      r3,r0
>          bl      system_call_exception
> 
>   .Lsyscall_vectored_\name\()_exit:
> @@ -285,9 +285,9 @@ END_BTB_FLUSH_SECTION
>          std     r10,_LINK(r1)
>          std     r11,_TRAP(r1)
>          std     r12,_CCR(r1)
> -       addi    r10,r1,STACK_FRAME_OVERHEAD
> +       addi    r4,r1,STACK_FRAME_OVERHEAD
>          ld      r11,exception_marker@toc(r2)
> -       std     r11,-16(r10)            /* "regshere" marker */
> +       std     r11,-16(r4)             /* "regshere" marker */
> 
>   #ifdef CONFIG_PPC_BOOK3S
>          li      r11,1
> @@ -308,8 +308,8 @@ END_BTB_FLUSH_SECTION
>          wrteei  1
>   #endif
> 
> -       /* Calling convention has r9 = orig r0, r10 = regs */
> -       mr      r9,r0
> +       /* Calling convention has r3 = orig r0, r4 = regs */
> +       mr      r3,r0
>          bl      system_call_exception
> 
>   .Lsyscall_exit:
> @@ -353,16 +353,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
>          cmpdi   r3,0
>          bne     .Lsyscall_restore_regs
>          /* Zero volatile regs that may contain sensitive kernel data */
> -       li      r0,0
> -       li      r4,0
> -       li      r5,0
> -       li      r6,0
> -       li      r7,0
> -       li      r8,0
> -       li      r9,0
> -       li      r10,0
> -       li      r11,0
> -       li      r12,0
> +       ZERO_GPR(0)
> +       ZERO_GPRS(4, 12)
>          mtctr   r0
>          mtspr   SPRN_XER,r0
>   .Lsyscall_restore_regs_cont:
> @@ -388,7 +380,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
>          REST_NVGPRS(r1)
>          mtctr   r3
>          mtspr   SPRN_XER,r4
> -       ld      r0,GPR0(r1)
> +       REST_GPR(0, r1)
>          REST_GPRS(4, 12, r1)
>          b       .Lsyscall_restore_regs_cont
>   .Lsyscall_rst_end:
> diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c
> index 16ff0399a257..aa7869dbef36 100644
> --- a/arch/powerpc/kernel/sys_ppc32.c
> +++ b/arch/powerpc/kernel/sys_ppc32.c
> @@ -48,9 +48,10 @@
>   #include <asm/syscalls.h>
>   #include <asm/switch_to.h>
> 
> -unsigned long compat_sys_mmap2(unsigned long addr, size_t len,
> -                         unsigned long prot, unsigned long flags,
> -                         unsigned long fd, unsigned long pgoff)
> +PPC_SYSCALL_DEFINE(6, unsigned long, compat_sys_mmap2,
> +                  unsigned long, addr, size_t, len,
> +                  unsigned long, prot, unsigned long, flags,
> +                  unsigned long, fd, unsigned long, pgoff)

This could be done in a separate patch, as it also addresses other 
topics like the syscall ftracing. Same for all syscalls in this file.

>   {
>          /* This should remain 12 even if PAGE_SIZE changes */
>          return sys_mmap(addr, len, prot, flags, fd, pgoff << 12);
> @@ -68,52 +69,63 @@ unsigned long compat_sys_mmap2(unsigned long addr, size_t len,
>   #define merge_64(high, low) ((u64)high << 32) | low
>   #endif
> 
> -compat_ssize_t compat_sys_pread64(unsigned int fd, char __user *ubuf, compat_size_t count,
> -                            u32 reg6, u32 pos1, u32 pos2)
> +PPC_SYSCALL_DEFINE(6, compat_ssize_t, compat_sys_pread64,
> +                  unsigned int, fd,
> +                  char __user *, ubuf, compat_size_t, count,
> +                  u32, reg6, u32, pos1, u32, pos2)
>   {
>          return ksys_pread64(fd, ubuf, count, merge_64(pos1, pos2));
>   }
> 
> -compat_ssize_t compat_sys_pwrite64(unsigned int fd, const char __user *ubuf, compat_size_t count,
> -                             u32 reg6, u32 pos1, u32 pos2)
> +PPC_SYSCALL_DEFINE(6, compat_ssize_t, compat_sys_pwrite64,
> +                  unsigned int, fd,
> +                  const char __user *, ubuf, compat_size_t, count,
> +                  u32, reg6, u32, pos1, u32, pos2)
>   {
>          return ksys_pwrite64(fd, ubuf, count, merge_64(pos1, pos2));
>   }
> 
> -compat_ssize_t compat_sys_readahead(int fd, u32 r4, u32 offset1, u32 offset2, u32 count)
> +PPC_SYSCALL_DEFINE(5, compat_ssize_t, compat_sys_readahead,
> +                  int, fd, u32, r4,
> +                  u32, offset1, u32, offset2, u32, count)
>   {
>          return ksys_readahead(fd, merge_64(offset1, offset2), count);
>   }
> 
> -asmlinkage int compat_sys_truncate64(const char __user * path, u32 reg4,
> -                               unsigned long len1, unsigned long len2)
> +PPC_SYSCALL_DEFINE(4, int, compat_sys_truncate64,
> +                  const char __user *, path, u32, reg4,
> +                  unsigned long, len1, unsigned long, len2)
>   {
>          return ksys_truncate(path, merge_64(len1, len2));
>   }
> 
> -asmlinkage long compat_sys_fallocate(int fd, int mode, u32 offset1, u32 offset2,
> -                                    u32 len1, u32 len2)
> +PPC_SYSCALL_DEFINE(6, long, compat_sys_fallocate,
> +                  int, fd, int, mode, u32, offset1, u32, offset2,
> +                  u32, len1, u32, len2)
>   {
>          return ksys_fallocate(fd, mode, ((loff_t)offset1 << 32) | offset2,
>                               merge_64(len1, len2));
>   }
> 
> -asmlinkage int compat_sys_ftruncate64(unsigned int fd, u32 reg4, unsigned long len1,
> -                                unsigned long len2)
> +PPC_SYSCALL_DEFINE(4, int, compat_sys_ftruncate64,
> +                  unsigned int, fd, u32, reg4, unsigned long, len1,
> +                  unsigned long, len2)
>   {
>          return ksys_ftruncate(fd, merge_64(len1, len2));
>   }
> 
> -long ppc32_fadvise64(int fd, u32 unused, u32 offset1, u32 offset2,
> -                    size_t len, int advice)
> +PPC_SYSCALL_DEFINE(6, long, ppc32_fadvise64,
> +                  int, fd, u32, unused, u32, offset1, u32, offset2,
> +                  size_t, len, int, advice)
>   {
>          return ksys_fadvise64_64(fd, merge_64(offset1, offset2), len,
>                                   advice);
>   }
> 
> -asmlinkage long compat_sys_sync_file_range2(int fd, unsigned int flags,
> -                                  unsigned offset1, unsigned offset2,
> -                                  unsigned nbytes1, unsigned nbytes2)
> +PPC_SYSCALL_DEFINE(6, long, compat_sys_sync_file_range2, int, fd,
> +                  unsigned int, flags,
> +                  unsigned int, offset1, unsigned int, offset2,
> +                  unsigned int, nbytes1, unsigned int, nbytes2)
>   {
>          loff_t offset = merge_64(offset1, offset2);
>          loff_t nbytes = merge_64(nbytes1, nbytes2);
> diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c
> index c4f5b4ce926f..c64cdb5c4435 100644
> --- a/arch/powerpc/kernel/syscalls.c
> +++ b/arch/powerpc/kernel/syscalls.c
> @@ -64,14 +64,20 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, size_t, len,
>   }
> 
>   #ifdef CONFIG_PPC32
> +asmlinkage long sys_old_select(struct sel_arg_struct __user *arg);
> +asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp,
> +                          fd_set __user *exp,
> +                          struct __kernel_old_timeval __user *tvp);
> +

Those prototypes should go in a header file. I see you do it in a later 
patch. Should be done here directly.

>   /*
>    * Due to some executables calling the wrong select we sometimes
>    * get wrong args.  This determines how the args are being passed
>    * (a single ptr to them all args passed) then calls
>    * sys_select() with the appropriate args. -- Cort
>    */
> -int
> -ppc_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct __kernel_old_timeval __user *tvp)
> +PPC_SYSCALL_DEFINE(5, long, ppc_select,
> +                  int, n, fd_set __user *, inp, fd_set __user *, outp,
> +                  fd_set __user *, exp, struct __kernel_old_timeval __user *, tvp)
>   {
>          if ( (unsigned long)n >= 4096 )
>                  return sys_old_select((void __user *)n);
> @@ -81,7 +87,9 @@ ppc_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, s
>   #endif
> 
>   #ifdef CONFIG_PPC64
> -long ppc64_personality(unsigned long personality)
> +asmlinkage long sys_personality(unsigned int personality);

Same.

> +
> +PPC_SYSCALL_DEFINE(1, long, ppc64_personality, unsigned long, personality)
>   {
>          long ret;
> 
> @@ -95,8 +103,9 @@ long ppc64_personality(unsigned long personality)
>   }
>   #endif
> 
> -long ppc_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low,
> -                     u32 len_high, u32 len_low)
> +PPC_SYSCALL_DEFINE(6, long, ppc_fadvise64_64,
> +                  int, fd, int, advice, u32, offset_high, u32, offset_low,
> +                  u32, len_high, u32, len_low)
>   {
>          return ksys_fadvise64_64(fd, (u64)offset_high << 32 | offset_low,
>                                   (u64)len_high << 32 | len_low, advice);
> diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.S
> index cb3358886203..70cc7120fce2 100644
> --- a/arch/powerpc/kernel/systbl.S
> +++ b/arch/powerpc/kernel/systbl.S
> @@ -16,11 +16,32 @@
> 
>   #ifdef CONFIG_PPC64
>          .p2align        3
> +#endif
> +
> +/* Where the syscall wrapper macro is used, handler functions expect parameters
> +   to reside on the stack. We still emit symbols with register-mapped
> +   parameters, which are distinguished by a prefix. */
> +
> +#ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER
> +
> +#define __powerpc_sys_ni_syscall sys_ni_syscall
> +
> +#ifdef CONFIG_PPC64
> +#define __SYSCALL(nr, entry)   .8byte __powerpc_##entry

Isn't .8bytes the same as .long on PPC64 ?

> +#else
> +#define __SYSCALL(nr, entry)   .long __powerpc_##entry
> +#endif
> +
> +#else
> +
> +#ifdef CONFIG_PPC64
>   #define __SYSCALL(nr, entry)   .8byte entry
>   #else
>   #define __SYSCALL(nr, entry)   .long entry
>   #endif
> 
> +#endif /* CONFIG_ARCH_HAS_SYSCALL_WRAPPER */
> +
>   #define __SYSCALL_WITH_COMPAT(nr, native, compat)      __SYSCALL(nr, native)
>   .globl sys_call_table
>   sys_call_table:
> diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
> index 717f2c9a7573..dcf57c07dbad 100644
> --- a/arch/powerpc/kernel/vdso.c
> +++ b/arch/powerpc/kernel/vdso.c
> @@ -40,6 +40,8 @@
>   extern char vdso32_start, vdso32_end;
>   extern char vdso64_start, vdso64_end;
> 
> +asmlinkage long sys_ni_syscall(void);
> +

Prototypes should go in a header file.

>   /*
>    * The vdso data page (aka. systemcfg for old ppc64 fans) is here.
>    * Once the early boot kernel code no longer needs to muck around
> --
> 2.34.1
> 

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 3/6] powerpc: Make syscalls save and restore gprs
  2022-06-01  5:48 ` [PATCH 3/6] powerpc: Make syscalls save and restore gprs Rohan McLure
@ 2022-06-01  8:33   ` Christophe Leroy
  0 siblings, 0 replies; 31+ messages in thread
From: Christophe Leroy @ 2022-06-01  8:33 UTC (permalink / raw)
  To: Rohan McLure, linuxppc-dev; +Cc: npiggin



Le 01/06/2022 à 07:48, Rohan McLure a écrit :
> [Vous ne recevez pas souvent de courriers de la part de rmclure@linux.ibm.com. Découvrez pourquoi cela peut être important à l'adresse https://aka.ms/LearnAboutSenderIdentification.]
> 
> Clears user state in gprs to reduce the influence of user registers on
> speculation within kernel syscall handlers.
> 
> Remove conditional branches on result of `syscall_exit_prepare` to
> restore non-volatile gprs, as these registers are always cleared and
> hence always must be restored.

Did you measure the implied performance loss ? That must be heavy.

In patch 2 it is said that clearing registers is optional. I can't see 
it as an option here. Some PPC32 are not impacted by speculative 
problems, could we activate that big hammer only when required ?

> 
> Signed-off-by: Rohan McLure <rmclure@linux.ibm.com>
> ---
>   arch/powerpc/kernel/interrupt_64.S | 12 +++++++++---
>   1 file changed, 9 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S
> index b11c2bd84827..e601ed999798 100644
> --- a/arch/powerpc/kernel/interrupt_64.S
> +++ b/arch/powerpc/kernel/interrupt_64.S
> @@ -108,6 +108,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
>           * but this is the best we can do.
>           */
> 
> +       ZERO_GPRS(5, 12)
> +       ZERO_NVGPRS()
> +
>          /* Calling convention has r3 = orig r0, r4 = regs */
>          mr      r3,r0
>          bl      system_call_exception
> @@ -138,6 +141,7 @@ BEGIN_FTR_SECTION
>          HMT_MEDIUM_LOW
>   END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
> 
> +       REST_NVGPRS(r1)
>          cmpdi   r3,0
>          bne     .Lsyscall_vectored_\name\()_restore_regs
> 
> @@ -180,7 +184,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
>          ld      r4,_LINK(r1)
>          ld      r5,_XER(r1)
> 
> -       REST_NVGPRS(r1)
>          ld      r0,GPR0(r1)
>          mtcr    r2
>          mtctr   r3
> @@ -308,6 +311,9 @@ END_BTB_FLUSH_SECTION
>          wrteei  1
>   #endif
> 
> +       ZERO_GPRS(5, 12)
> +       ZERO_NVGPRS()
> +
>          /* Calling convention has r3 = orig r0, r4 = regs */
>          mr      r3,r0
>          bl      system_call_exception
> @@ -350,6 +356,7 @@ BEGIN_FTR_SECTION
>          stdcx.  r0,0,r1                 /* to clear the reservation */
>   END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
> 
> +       REST_NVGPRS(r1)
>          cmpdi   r3,0
>          bne     .Lsyscall_restore_regs
>          /* Zero volatile regs that may contain sensitive kernel data */
> @@ -377,7 +384,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
>   .Lsyscall_restore_regs:
>          ld      r3,_CTR(r1)
>          ld      r4,_XER(r1)
> -       REST_NVGPRS(r1)
>          mtctr   r3
>          mtspr   SPRN_XER,r4
>          REST_GPR(0, r1)
> @@ -445,7 +451,7 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_user)
>          bl      interrupt_exit_user_prepare
>          cmpdi   r3,0
>          bne-    .Lrestore_nvgprs_\srr
> -.Lrestore_nvgprs_\srr\()_cont:
> +       .Lrestore_nvgprs_\srr\()_cont:
>          std     r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
>   #ifdef CONFIG_PPC_BOOK3S
>   .Linterrupt_return_\srr\()_user_rst_start:
> --
> 2.34.1
> 

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 6/6] powerpc/64s: Clear gprs on interrupt routine entry
  2022-06-01  5:48 ` [PATCH 6/6] powerpc/64s: Clear gprs on interrupt routine entry Rohan McLure
@ 2022-06-01  8:37   ` Christophe Leroy
  0 siblings, 0 replies; 31+ messages in thread
From: Christophe Leroy @ 2022-06-01  8:37 UTC (permalink / raw)
  To: Rohan McLure, linuxppc-dev; +Cc: npiggin



Le 01/06/2022 à 07:48, Rohan McLure a écrit :
> Zero GPRS r0, r2-r11, r14-r31, on entry into the kernel for all
> other interrupt sources to limit influence of user-space values
> in potential speculation gadgets. The remaining gprs are overwritten by
> entry macros to interrupt handlers, irrespective of whether or not a
> given handler consumes these register values.
> 
> Prior to this commit, r14-r31 are restored on a per-interrupt basis at
> exit, but now they are always restored. Remove explicit REST_NVGPRS
> invocations on interrupt entry and simplify exit logic.
> 
> Signed-off-by: Rohan McLure <rmclure@linux.ibm.com>
> ---
>   arch/powerpc/kernel/exceptions-64s.S | 19 +++++++------------
>   arch/powerpc/kernel/interrupt_64.S   |  9 ++-------
>   2 files changed, 9 insertions(+), 19 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
> index 102896fc6a86..8e2c1c924a4d 100644
> --- a/arch/powerpc/kernel/exceptions-64s.S
> +++ b/arch/powerpc/kernel/exceptions-64s.S
> @@ -502,6 +502,7 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real, text)
>   	std	r10,0(r1)		/* make stack chain pointer	*/
>   	std	r0,GPR0(r1)		/* save r0 in stackframe	*/
>   	std	r10,GPR1(r1)		/* save r1 in stackframe	*/
> +	ZERO_GPR(0)
>   
>   	/* Mark our [H]SRRs valid for return */
>   	li	r10,1
> @@ -538,14 +539,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
>   	ld	r10,IAREA+EX_R10(r13)
>   	std	r9,GPR9(r1)
>   	std	r10,GPR10(r1)
> +	ZERO_GPRS(9, 10)
>   	ld	r9,IAREA+EX_R11(r13)	/* move r11 - r13 to stackframe	*/
>   	ld	r10,IAREA+EX_R12(r13)
>   	ld	r11,IAREA+EX_R13(r13)
>   	std	r9,GPR11(r1)
>   	std	r10,GPR12(r1)
>   	std	r11,GPR13(r1)
> +	ZERO_GPR(11) /* keep r12 ([H]SRR1/MSR), r13 (PACA) for interrupt routine */
>   
>   	SAVE_NVGPRS(r1)
> +	ZERO_NVGPRS()
>   
>   	.if IDAR
>   	.if IISIDE
> @@ -577,8 +581,9 @@ BEGIN_FTR_SECTION
>   END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
>   	ld	r10,IAREA+EX_CTR(r13)
>   	std	r10,_CTR(r1)
> -	std	r2,GPR2(r1)		/* save r2 in stackframe	*/
> +	SAVE_GPR(2, r1)			/* save r2 in stackframe	*/
>   	SAVE_GPRS(3, 8, r1)		/* save r3 - r8 in stackframe   */
> +	ZERO_GPRS(2, 8)

What's the point in clearing r2 whereas it will be overwritten just below ?

>   	mflr	r9			/* Get LR, later save to stack	*/
>   	ld	r2,PACATOC(r13)		/* get kernel TOC into r2	*/
>   	std	r9,_LINK(r1)
> @@ -696,6 +701,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
>   	mtlr	r9
>   	ld	r9,_CCR(r1)
>   	mtcr	r9
> +	REST_NVGPRS(r1)
>   	REST_GPRS(2, 13, r1)
>   	REST_GPR(0, r1)
>   	/* restore original r1. */
> @@ -1368,11 +1374,6 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
>   	b	interrupt_return_srr
>   
>   1:	bl	do_break
> -	/*
> -	 * do_break() may have changed the NV GPRS while handling a breakpoint.
> -	 * If so, we need to restore them with their updated values.
> -	 */
> -	REST_NVGPRS(r1)
>   	b	interrupt_return_srr
>   
>   
> @@ -1598,7 +1599,6 @@ EXC_COMMON_BEGIN(alignment_common)
>   	GEN_COMMON alignment
>   	addi	r3,r1,STACK_FRAME_OVERHEAD
>   	bl	alignment_exception
> -	REST_NVGPRS(r1) /* instruction emulation may change GPRs */
>   	b	interrupt_return_srr
>   
>   
> @@ -1708,7 +1708,6 @@ EXC_COMMON_BEGIN(program_check_common)
>   .Ldo_program_check:
>   	addi	r3,r1,STACK_FRAME_OVERHEAD
>   	bl	program_check_exception
> -	REST_NVGPRS(r1) /* instruction emulation may change GPRs */
>   	b	interrupt_return_srr
>   
>   
> @@ -2139,7 +2138,6 @@ EXC_COMMON_BEGIN(emulation_assist_common)
>   	GEN_COMMON emulation_assist
>   	addi	r3,r1,STACK_FRAME_OVERHEAD
>   	bl	emulation_assist_interrupt
> -	REST_NVGPRS(r1) /* instruction emulation may change GPRs */
>   	b	interrupt_return_hsrr
>   
>   
> @@ -2457,7 +2455,6 @@ EXC_COMMON_BEGIN(facility_unavailable_common)
>   	GEN_COMMON facility_unavailable
>   	addi	r3,r1,STACK_FRAME_OVERHEAD
>   	bl	facility_unavailable_exception
> -	REST_NVGPRS(r1) /* instruction emulation may change GPRs */
>   	b	interrupt_return_srr
>   
>   
> @@ -2485,7 +2482,6 @@ EXC_COMMON_BEGIN(h_facility_unavailable_common)
>   	GEN_COMMON h_facility_unavailable
>   	addi	r3,r1,STACK_FRAME_OVERHEAD
>   	bl	facility_unavailable_exception
> -	REST_NVGPRS(r1) /* XXX Shouldn't be necessary in practice */
>   	b	interrupt_return_hsrr
>   
>   
> @@ -2711,7 +2707,6 @@ EXC_COMMON_BEGIN(altivec_assist_common)
>   	addi	r3,r1,STACK_FRAME_OVERHEAD
>   #ifdef CONFIG_ALTIVEC
>   	bl	altivec_assist_exception
> -	REST_NVGPRS(r1) /* instruction emulation may change GPRs */
>   #else
>   	bl	unknown_exception
>   #endif
> diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S
> index 92740d9889a3..3c742c07f4b6 100644
> --- a/arch/powerpc/kernel/interrupt_64.S
> +++ b/arch/powerpc/kernel/interrupt_64.S
> @@ -442,9 +442,6 @@ interrupt_return_\srr\()_user: /* make backtraces match the _kernel variant */
>   _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_user)
>   	addi	r3,r1,STACK_FRAME_OVERHEAD
>   	bl	interrupt_exit_user_prepare

Then should we make interrupt_exit_user_prepare() a void function if we 
don't use the returned value anymore ?

> -	cmpdi	r3,0
> -	bne-	.Lrestore_nvgprs_\srr
> -	.Lrestore_nvgprs_\srr\()_cont:
>   	std	r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
>   #ifdef CONFIG_PPC_BOOK3S
>   .Linterrupt_return_\srr\()_user_rst_start:
> @@ -458,6 +455,7 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_user)
>   	stb	r11,PACAIRQHAPPENED(r13) # clear out possible HARD_DIS
>   
>   .Lfast_user_interrupt_return_\srr\():
> +	REST_NVGPRS(r1)
>   #ifdef CONFIG_PPC_BOOK3S
>   	.ifc \srr,srr
>   	lbz	r4,PACASRR_VALID(r13)
> @@ -527,10 +525,6 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
>   	b	.	/* prevent speculative execution */
>   .Linterrupt_return_\srr\()_user_rst_end:
>   
> -.Lrestore_nvgprs_\srr\():
> -	REST_NVGPRS(r1)
> -	b	.Lrestore_nvgprs_\srr\()_cont
> -
>   #ifdef CONFIG_PPC_BOOK3S
>   interrupt_return_\srr\()_user_restart:
>   _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_user_restart)
> @@ -571,6 +565,7 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_kernel)
>   1:
>   
>   .Lfast_kernel_interrupt_return_\srr\():
> +	REST_NVGPRS(r1)
>   	cmpdi	cr1,r3,0
>   #ifdef CONFIG_PPC_BOOK3S
>   	.ifc \srr,srr

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 2/6] powerpc: Provide syscall wrapper
  2022-06-01  5:48 ` [PATCH 2/6] powerpc: Provide syscall wrapper Rohan McLure
  2022-06-01  8:29   ` Christophe Leroy
@ 2022-06-01  8:59   ` kernel test robot
  2022-06-01  9:35   ` kernel test robot
                     ` (3 subsequent siblings)
  5 siblings, 0 replies; 31+ messages in thread
From: kernel test robot @ 2022-06-01  8:59 UTC (permalink / raw)
  To: Rohan McLure, linuxppc-dev
  Cc: Rohan McLure, kbuild-all, Andrew Donnellan, npiggin

Hi Rohan,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on v5.18]
[also build test WARNING on next-20220601]
[cannot apply to powerpc/next scottwood/next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/intel-lab-lkp/linux/commits/Rohan-McLure/powerpc-Add-ZERO_GPRS-macros-for-register-clears/20220601-135400
base:    4b0986a3613c92f4ec1bdc7f60ec66fea135991f
config: powerpc-allyesconfig (https://download.01.org/0day-ci/archive/20220601/202206011624.sRj0DSyf-lkp@intel.com/config)
compiler: powerpc-linux-gcc (GCC) 11.3.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/c3efdfac99806b0d7ef4ee781283404448addc69
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Rohan-McLure/powerpc-Add-ZERO_GPRS-macros-for-register-clears/20220601-135400
        git checkout c3efdfac99806b0d7ef4ee781283404448addc69
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-11.3.0 make.cross W=1 O=build_dir ARCH=powerpc SHELL=/bin/bash arch/powerpc/kernel/

If you fix the issue, kindly add following tag where applicable
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

   In file included from include/linux/syscalls.h:98,
                    from arch/powerpc/kernel/syscalls.c:19:
>> arch/powerpc/include/asm/syscall_wrapper.h:78:14: warning: no previous prototype for 'sys_switch_endian' [-Wmissing-prototypes]
      78 |         long sys_##sname(void)                                                  \
         |              ^~~~
   arch/powerpc/kernel/syscalls.c:114:1: note: in expansion of macro 'SYSCALL_DEFINE0'
     114 | SYSCALL_DEFINE0(switch_endian)
         | ^~~~~~~~~~~~~~~


vim +/sys_switch_endian +78 arch/powerpc/include/asm/syscall_wrapper.h

    49	
    50	#define __SYSCALL_DEFINEx(x, name, ...)						\
    51		asmlinkage long __powerpc_sys##name(const struct pt_regs *regs);	\
    52		ALLOW_ERROR_INJECTION(__powerpc_sys##name, ERRNO);			\
    53		long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));				\
    54		static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__));		\
    55		static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));	\
    56		asmlinkage long __powerpc_sys##name(const struct pt_regs *regs)		\
    57		{									\
    58			return __se_sys##name(SC_POWERPC_REGS_TO_ARGS(x,__VA_ARGS__));	\
    59		}									\
    60		long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))				\
    61		{									\
    62			return __do_sys##name(__MAP(x,__SC_CAST,__VA_ARGS__));		\
    63		}									\
    64		static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__))		\
    65		{									\
    66			long ret = __do_sys##name(__MAP(x,__SC_CAST,__VA_ARGS__));	\
    67			__MAP(x,__SC_TEST,__VA_ARGS__);					\
    68			__PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__));		\
    69			return ret;							\
    70		}									\
    71		static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))
    72	
    73	#define SYSCALL_DEFINE0(sname)							\
    74		SYSCALL_METADATA(_##sname, 0);						\
    75		long sys_##name(void);							\
    76		asmlinkage long __powerpc_sys_##sname(const struct pt_regs *__unused);	\
    77		ALLOW_ERROR_INJECTION(__powerpc_sys_##sname, ERRNO);			\
  > 78		long sys_##sname(void)							\
    79		{									\
    80			return __powerpc_sys_##sname(NULL);				\
    81		}									\
    82		asmlinkage long __powerpc_sys_##sname(const struct pt_regs *__unused)
    83	

-- 
0-DAY CI Kernel Test Service
https://01.org/lkp

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 2/6] powerpc: Provide syscall wrapper
  2022-06-01  5:48 ` [PATCH 2/6] powerpc: Provide syscall wrapper Rohan McLure
  2022-06-01  8:29   ` Christophe Leroy
  2022-06-01  8:59   ` kernel test robot
@ 2022-06-01  9:35   ` kernel test robot
  2022-06-01 12:23   ` kernel test robot
                     ` (2 subsequent siblings)
  5 siblings, 0 replies; 31+ messages in thread
From: kernel test robot @ 2022-06-01  9:35 UTC (permalink / raw)
  To: Rohan McLure, linuxppc-dev
  Cc: Rohan McLure, kbuild-all, Andrew Donnellan, npiggin

Hi Rohan,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on v5.18]
[also build test ERROR on next-20220601]
[cannot apply to powerpc/next scottwood/next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/intel-lab-lkp/linux/commits/Rohan-McLure/powerpc-Add-ZERO_GPRS-macros-for-register-clears/20220601-135400
base:    4b0986a3613c92f4ec1bdc7f60ec66fea135991f
config: powerpc-allnoconfig (https://download.01.org/0day-ci/archive/20220601/202206011740.FugDNwNm-lkp@intel.com/config)
compiler: powerpc-linux-gcc (GCC) 11.3.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/c3efdfac99806b0d7ef4ee781283404448addc69
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Rohan-McLure/powerpc-Add-ZERO_GPRS-macros-for-register-clears/20220601-135400
        git checkout c3efdfac99806b0d7ef4ee781283404448addc69
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-11.3.0 make.cross W=1 O=build_dir ARCH=powerpc SHELL=/bin/bash arch/powerpc/kernel/ fs/ kernel/ mm/

If you fix the issue, kindly add following tag where applicable
Reported-by: kernel test robot <lkp@intel.com>

All error/warnings (new ones prefixed by >>):

   In file included from include/linux/syscalls.h:98,
                    from arch/powerpc/kernel/syscalls.c:19:
>> arch/powerpc/include/asm/syscall_wrapper.h:78:14: error: no previous prototype for 'sys_switch_endian' [-Werror=missing-prototypes]
      78 |         long sys_##sname(void)                                                  \
         |              ^~~~
   arch/powerpc/kernel/syscalls.c:114:1: note: in expansion of macro 'SYSCALL_DEFINE0'
     114 | SYSCALL_DEFINE0(switch_endian)
         | ^~~~~~~~~~~~~~~
   cc1: all warnings being treated as errors
--
   kernel/fork.c:163:13: warning: no previous prototype for 'arch_release_task_struct' [-Wmissing-prototypes]
     163 | void __weak arch_release_task_struct(struct task_struct *tsk)
         |             ^~~~~~~~~~~~~~~~~~~~~~~~
   kernel/fork.c:854:20: warning: no previous prototype for 'arch_task_cache_init' [-Wmissing-prototypes]
     854 | void __init __weak arch_task_cache_init(void) { }
         |                    ^~~~~~~~~~~~~~~~~~~~
   In file included from include/linux/syscalls.h:98,
                    from kernel/fork.c:55:
>> arch/powerpc/include/asm/syscall_wrapper.h:78:14: warning: no previous prototype for 'sys_fork' [-Wmissing-prototypes]
      78 |         long sys_##sname(void)                                                  \
         |              ^~~~
   kernel/fork.c:2695:1: note: in expansion of macro 'SYSCALL_DEFINE0'
    2695 | SYSCALL_DEFINE0(fork)
         | ^~~~~~~~~~~~~~~
>> arch/powerpc/include/asm/syscall_wrapper.h:78:14: warning: no previous prototype for 'sys_vfork' [-Wmissing-prototypes]
      78 |         long sys_##sname(void)                                                  \
         |              ^~~~
   kernel/fork.c:2711:1: note: in expansion of macro 'SYSCALL_DEFINE0'
    2711 | SYSCALL_DEFINE0(vfork)
         | ^~~~~~~~~~~~~~~
--
   In file included from include/linux/syscalls.h:98,
                    from kernel/signal.c:30:
>> arch/powerpc/include/asm/syscall_wrapper.h:78:14: warning: no previous prototype for 'sys_restart_syscall' [-Wmissing-prototypes]
      78 |         long sys_##sname(void)                                                  \
         |              ^~~~
   kernel/signal.c:3004:1: note: in expansion of macro 'SYSCALL_DEFINE0'
    3004 | SYSCALL_DEFINE0(restart_syscall)
         | ^~~~~~~~~~~~~~~
>> arch/powerpc/include/asm/syscall_wrapper.h:78:14: warning: no previous prototype for 'sys_sgetmask' [-Wmissing-prototypes]
      78 |         long sys_##sname(void)                                                  \
         |              ^~~~
   kernel/signal.c:4568:1: note: in expansion of macro 'SYSCALL_DEFINE0'
    4568 | SYSCALL_DEFINE0(sgetmask)
         | ^~~~~~~~~~~~~~~
>> arch/powerpc/include/asm/syscall_wrapper.h:78:14: warning: no previous prototype for 'sys_pause' [-Wmissing-prototypes]
      78 |         long sys_##sname(void)                                                  \
         |              ^~~~
   kernel/signal.c:4607:1: note: in expansion of macro 'SYSCALL_DEFINE0'
    4607 | SYSCALL_DEFINE0(pause)
         | ^~~~~~~~~~~~~~~
--
   In file included from include/linux/compat.h:34,
                    from kernel/sys.c:47:
>> arch/powerpc/include/asm/syscall_wrapper.h:78:14: warning: no previous prototype for 'sys_getpid' [-Wmissing-prototypes]
      78 |         long sys_##sname(void)                                                  \
         |              ^~~~
   kernel/sys.c:933:1: note: in expansion of macro 'SYSCALL_DEFINE0'
     933 | SYSCALL_DEFINE0(getpid)
         | ^~~~~~~~~~~~~~~
>> arch/powerpc/include/asm/syscall_wrapper.h:78:14: warning: no previous prototype for 'sys_gettid' [-Wmissing-prototypes]
      78 |         long sys_##sname(void)                                                  \
         |              ^~~~
   kernel/sys.c:939:1: note: in expansion of macro 'SYSCALL_DEFINE0'
     939 | SYSCALL_DEFINE0(gettid)
         | ^~~~~~~~~~~~~~~
>> arch/powerpc/include/asm/syscall_wrapper.h:78:14: warning: no previous prototype for 'sys_getppid' [-Wmissing-prototypes]
      78 |         long sys_##sname(void)                                                  \
         |              ^~~~
   kernel/sys.c:950:1: note: in expansion of macro 'SYSCALL_DEFINE0'
     950 | SYSCALL_DEFINE0(getppid)
         | ^~~~~~~~~~~~~~~
>> arch/powerpc/include/asm/syscall_wrapper.h:78:14: warning: no previous prototype for 'sys_getuid' [-Wmissing-prototypes]
      78 |         long sys_##sname(void)                                                  \
         |              ^~~~
   kernel/sys.c:961:1: note: in expansion of macro 'SYSCALL_DEFINE0'
     961 | SYSCALL_DEFINE0(getuid)
         | ^~~~~~~~~~~~~~~
>> arch/powerpc/include/asm/syscall_wrapper.h:78:14: warning: no previous prototype for 'sys_geteuid' [-Wmissing-prototypes]
      78 |         long sys_##sname(void)                                                  \
         |              ^~~~
   kernel/sys.c:967:1: note: in expansion of macro 'SYSCALL_DEFINE0'
     967 | SYSCALL_DEFINE0(geteuid)
         | ^~~~~~~~~~~~~~~
>> arch/powerpc/include/asm/syscall_wrapper.h:78:14: warning: no previous prototype for 'sys_getgid' [-Wmissing-prototypes]
      78 |         long sys_##sname(void)                                                  \
         |              ^~~~
   kernel/sys.c:973:1: note: in expansion of macro 'SYSCALL_DEFINE0'
     973 | SYSCALL_DEFINE0(getgid)
         | ^~~~~~~~~~~~~~~
>> arch/powerpc/include/asm/syscall_wrapper.h:78:14: warning: no previous prototype for 'sys_getegid' [-Wmissing-prototypes]
      78 |         long sys_##sname(void)                                                  \
         |              ^~~~
   kernel/sys.c:979:1: note: in expansion of macro 'SYSCALL_DEFINE0'
     979 | SYSCALL_DEFINE0(getegid)
         | ^~~~~~~~~~~~~~~
>> arch/powerpc/include/asm/syscall_wrapper.h:78:14: warning: no previous prototype for 'sys_getpgrp' [-Wmissing-prototypes]
      78 |         long sys_##sname(void)                                                  \
         |              ^~~~
   kernel/sys.c:1154:1: note: in expansion of macro 'SYSCALL_DEFINE0'
    1154 | SYSCALL_DEFINE0(getpgrp)
         | ^~~~~~~~~~~~~~~
>> arch/powerpc/include/asm/syscall_wrapper.h:78:14: warning: no previous prototype for 'sys_setsid' [-Wmissing-prototypes]
      78 |         long sys_##sname(void)                                                  \
         |              ^~~~
   kernel/sys.c:1233:1: note: in expansion of macro 'SYSCALL_DEFINE0'
    1233 | SYSCALL_DEFINE0(setsid)
         | ^~~~~~~~~~~~~~~
--
   In file included from include/linux/syscalls.h:98,
                    from mm/mlock.c:19:
>> arch/powerpc/include/asm/syscall_wrapper.h:78:14: warning: no previous prototype for 'sys_munlockall' [-Wmissing-prototypes]
      78 |         long sys_##sname(void)                                                  \
         |              ^~~~
   mm/mlock.c:725:1: note: in expansion of macro 'SYSCALL_DEFINE0'
     725 | SYSCALL_DEFINE0(munlockall)
         | ^~~~~~~~~~~~~~~
--
   In file included from include/linux/syscalls.h:98,
                    from fs/open.c:27:
>> arch/powerpc/include/asm/syscall_wrapper.h:78:14: warning: no previous prototype for 'sys_vhangup' [-Wmissing-prototypes]
      78 |         long sys_##sname(void)                                                  \
         |              ^~~~
   fs/open.c:1375:1: note: in expansion of macro 'SYSCALL_DEFINE0'
    1375 | SYSCALL_DEFINE0(vhangup)
         | ^~~~~~~~~~~~~~~
--
   In file included from include/linux/syscalls.h:98,
                    from fs/sync.c:15:
>> arch/powerpc/include/asm/syscall_wrapper.h:78:14: warning: no previous prototype for 'sys_sync' [-Wmissing-prototypes]
      78 |         long sys_##sname(void)                                                  \
         |              ^~~~
   fs/sync.c:111:1: note: in expansion of macro 'SYSCALL_DEFINE0'
     111 | SYSCALL_DEFINE0(sync)
         | ^~~~~~~~~~~~~~~
--
   kernel/sched/core.c:5235:20: warning: no previous prototype for 'task_sched_runtime' [-Wmissing-prototypes]
    5235 | unsigned long long task_sched_runtime(struct task_struct *p)
         |                    ^~~~~~~~~~~~~~~~~~
   In file included from include/linux/syscalls.h:98,
                    from include/linux/syscalls_api.h:1,
                    from kernel/sched/core.c:13:
>> arch/powerpc/include/asm/syscall_wrapper.h:78:14: warning: no previous prototype for 'sys_sched_yield' [-Wmissing-prototypes]
      78 |         long sys_##sname(void)                                                  \
         |              ^~~~
   kernel/sched/core.c:8150:1: note: in expansion of macro 'SYSCALL_DEFINE0'
    8150 | SYSCALL_DEFINE0(sched_yield)
         | ^~~~~~~~~~~~~~~
   kernel/sched/core.c:9426:13: warning: no previous prototype for 'sched_init_smp' [-Wmissing-prototypes]
    9426 | void __init sched_init_smp(void)
         |             ^~~~~~~~~~~~~~
   kernel/sched/core.c:9454:13: warning: no previous prototype for 'sched_init' [-Wmissing-prototypes]
    9454 | void __init sched_init(void)
         |             ^~~~~~~~~~


vim +/sys_switch_endian +78 arch/powerpc/include/asm/syscall_wrapper.h

    49	
    50	#define __SYSCALL_DEFINEx(x, name, ...)						\
    51		asmlinkage long __powerpc_sys##name(const struct pt_regs *regs);	\
    52		ALLOW_ERROR_INJECTION(__powerpc_sys##name, ERRNO);			\
    53		long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));				\
    54		static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__));		\
    55		static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));	\
    56		asmlinkage long __powerpc_sys##name(const struct pt_regs *regs)		\
    57		{									\
    58			return __se_sys##name(SC_POWERPC_REGS_TO_ARGS(x,__VA_ARGS__));	\
    59		}									\
    60		long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))				\
    61		{									\
    62			return __do_sys##name(__MAP(x,__SC_CAST,__VA_ARGS__));		\
    63		}									\
    64		static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__))		\
    65		{									\
    66			long ret = __do_sys##name(__MAP(x,__SC_CAST,__VA_ARGS__));	\
    67			__MAP(x,__SC_TEST,__VA_ARGS__);					\
    68			__PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__));		\
    69			return ret;							\
    70		}									\
    71		static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))
    72	
    73	#define SYSCALL_DEFINE0(sname)							\
    74		SYSCALL_METADATA(_##sname, 0);						\
    75		long sys_##name(void);							\
    76		asmlinkage long __powerpc_sys_##sname(const struct pt_regs *__unused);	\
    77		ALLOW_ERROR_INJECTION(__powerpc_sys_##sname, ERRNO);			\
  > 78		long sys_##sname(void)							\
    79		{									\
    80			return __powerpc_sys_##sname(NULL);				\
    81		}									\
    82		asmlinkage long __powerpc_sys_##sname(const struct pt_regs *__unused)
    83	

-- 
0-DAY CI Kernel Test Service
https://01.org/lkp

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 2/6] powerpc: Provide syscall wrapper
  2022-06-01  5:48 ` [PATCH 2/6] powerpc: Provide syscall wrapper Rohan McLure
                     ` (2 preceding siblings ...)
  2022-06-01  9:35   ` kernel test robot
@ 2022-06-01 12:23   ` kernel test robot
  2022-06-01 14:33   ` Christophe Leroy
  2022-06-03  9:04   ` Arnd Bergmann
  5 siblings, 0 replies; 31+ messages in thread
From: kernel test robot @ 2022-06-01 12:23 UTC (permalink / raw)
  To: Rohan McLure, linuxppc-dev
  Cc: Rohan McLure, kbuild-all, Andrew Donnellan, npiggin

Hi Rohan,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on v5.18]
[also build test ERROR on next-20220601]
[cannot apply to powerpc/next scottwood/next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/intel-lab-lkp/linux/commits/Rohan-McLure/powerpc-Add-ZERO_GPRS-macros-for-register-clears/20220601-135400
base:    4b0986a3613c92f4ec1bdc7f60ec66fea135991f
config: powerpc64-allnoconfig (https://download.01.org/0day-ci/archive/20220601/202206012020.14R31W6c-lkp@intel.com/config)
compiler: powerpc64-linux-gcc (GCC) 11.3.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/c3efdfac99806b0d7ef4ee781283404448addc69
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Rohan-McLure/powerpc-Add-ZERO_GPRS-macros-for-register-clears/20220601-135400
        git checkout c3efdfac99806b0d7ef4ee781283404448addc69
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-11.3.0 make.cross W=1 O=build_dir ARCH=powerpc SHELL=/bin/bash arch/powerpc/kernel/

If you fix the issue, kindly add following tag where applicable
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   In file included from include/linux/syscalls.h:98,
                    from arch/powerpc/kernel/signal_64.c:23:
>> arch/powerpc/include/asm/syscall_wrapper.h:78:14: error: no previous prototype for 'sys_rt_sigreturn' [-Werror=missing-prototypes]
      78 |         long sys_##sname(void)                                                  \
         |              ^~~~
   arch/powerpc/kernel/signal_64.c:736:1: note: in expansion of macro 'SYSCALL_DEFINE0'
     736 | SYSCALL_DEFINE0(rt_sigreturn)
         | ^~~~~~~~~~~~~~~
   cc1: all warnings being treated as errors


vim +/sys_rt_sigreturn +78 arch/powerpc/include/asm/syscall_wrapper.h

    49	
    50	#define __SYSCALL_DEFINEx(x, name, ...)						\
    51		asmlinkage long __powerpc_sys##name(const struct pt_regs *regs);	\
    52		ALLOW_ERROR_INJECTION(__powerpc_sys##name, ERRNO);			\
    53		long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));				\
    54		static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__));		\
    55		static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));	\
    56		asmlinkage long __powerpc_sys##name(const struct pt_regs *regs)		\
    57		{									\
    58			return __se_sys##name(SC_POWERPC_REGS_TO_ARGS(x,__VA_ARGS__));	\
    59		}									\
    60		long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))				\
    61		{									\
    62			return __do_sys##name(__MAP(x,__SC_CAST,__VA_ARGS__));		\
    63		}									\
    64		static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__))		\
    65		{									\
    66			long ret = __do_sys##name(__MAP(x,__SC_CAST,__VA_ARGS__));	\
    67			__MAP(x,__SC_TEST,__VA_ARGS__);					\
    68			__PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__));		\
    69			return ret;							\
    70		}									\
    71		static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))
    72	
    73	#define SYSCALL_DEFINE0(sname)							\
    74		SYSCALL_METADATA(_##sname, 0);						\
    75		long sys_##name(void);							\
    76		asmlinkage long __powerpc_sys_##sname(const struct pt_regs *__unused);	\
    77		ALLOW_ERROR_INJECTION(__powerpc_sys_##sname, ERRNO);			\
  > 78		long sys_##sname(void)							\
    79		{									\
    80			return __powerpc_sys_##sname(NULL);				\
    81		}									\
    82		asmlinkage long __powerpc_sys_##sname(const struct pt_regs *__unused)
    83	

-- 
0-DAY CI Kernel Test Service
https://01.org/lkp

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 2/6] powerpc: Provide syscall wrapper
  2022-06-01  5:48 ` [PATCH 2/6] powerpc: Provide syscall wrapper Rohan McLure
                     ` (3 preceding siblings ...)
  2022-06-01 12:23   ` kernel test robot
@ 2022-06-01 14:33   ` Christophe Leroy
  2022-06-03  3:24     ` Rohan McLure
  2022-06-03  9:04   ` Arnd Bergmann
  5 siblings, 1 reply; 31+ messages in thread
From: Christophe Leroy @ 2022-06-01 14:33 UTC (permalink / raw)
  To: Rohan McLure, linuxppc-dev; +Cc: Andrew Donnellan, npiggin



Le 01/06/2022 à 07:48, Rohan McLure a écrit :
> [Vous ne recevez pas souvent de courriers de la part de rmclure@linux.ibm.com. Découvrez pourquoi cela peut être important à l'adresse https://aka.ms/LearnAboutSenderIdentification.]
> 
> Syscall wrapper implemented as per s390, x86, arm64, providing the
> option for gprs to be cleared on entry to the kernel, reducing caller
> influence influence on speculation within syscall routine. The wrapper
> is a macro that emits syscall handler implementations with parameters
> passed by stack pointer.
> 
> For platforms supporting this syscall wrapper, emit symbols with usual
> in-register parameters (`sys...`) to support calls to syscall handlers
> from within the kernel.
> 
> Syscalls are wrapped on all platforms except Cell processor. SPUs require
> access syscall prototypes which are omitted with ARCH_HAS_SYSCALL_WRAPPER
> enabled.
> 

Also wondering why I get duplicated syscall functions. Shouldn't the 
sys_ ones go away once we implement the __powerpc_sys_ ones ?

c001e9a0 <sys_fork>:
c001e9a0:	94 21 ff b0 	stwu    r1,-80(r1)
c001e9a4:	7c 08 02 a6 	mflr    r0
c001e9a8:	38 a0 00 40 	li      r5,64
c001e9ac:	38 80 00 00 	li      r4,0
c001e9b0:	38 61 00 08 	addi    r3,r1,8
c001e9b4:	90 01 00 54 	stw     r0,84(r1)
c001e9b8:	4b ff 6d 55 	bl      c001570c <memset>
c001e9bc:	38 61 00 08 	addi    r3,r1,8
c001e9c0:	39 20 00 11 	li      r9,17
c001e9c4:	91 21 00 1c 	stw     r9,28(r1)
c001e9c8:	4b ff fb 31 	bl      c001e4f8 <kernel_clone>
c001e9cc:	80 01 00 54 	lwz     r0,84(r1)
c001e9d0:	38 21 00 50 	addi    r1,r1,80
c001e9d4:	7c 08 03 a6 	mtlr    r0
c001e9d8:	4e 80 00 20 	blr

c001e9dc <__powerpc_sys_fork>:
c001e9dc:	94 21 ff b0 	stwu    r1,-80(r1)
c001e9e0:	7c 08 02 a6 	mflr    r0
c001e9e4:	38 a0 00 40 	li      r5,64
c001e9e8:	38 80 00 00 	li      r4,0
c001e9ec:	38 61 00 08 	addi    r3,r1,8
c001e9f0:	90 01 00 54 	stw     r0,84(r1)
c001e9f4:	4b ff 6d 19 	bl      c001570c <memset>
c001e9f8:	38 61 00 08 	addi    r3,r1,8
c001e9fc:	39 20 00 11 	li      r9,17
c001ea00:	91 21 00 1c 	stw     r9,28(r1)
c001ea04:	4b ff fa f5 	bl      c001e4f8 <kernel_clone>
c001ea08:	80 01 00 54 	lwz     r0,84(r1)
c001ea0c:	38 21 00 50 	addi    r1,r1,80
c001ea10:	7c 08 03 a6 	mtlr    r0
c001ea14:	4e 80 00 20 	blr

c001ea18 <sys_vfork>:
c001ea18:	94 21 ff b0 	stwu    r1,-80(r1)
c001ea1c:	7c 08 02 a6 	mflr    r0
c001ea20:	38 a0 00 38 	li      r5,56
c001ea24:	38 80 00 00 	li      r4,0
c001ea28:	38 61 00 10 	addi    r3,r1,16
c001ea2c:	90 01 00 54 	stw     r0,84(r1)
c001ea30:	4b ff 6c dd 	bl      c001570c <memset>
c001ea34:	38 61 00 08 	addi    r3,r1,8
c001ea38:	39 40 00 00 	li      r10,0
c001ea3c:	39 60 41 00 	li      r11,16640
c001ea40:	39 20 00 11 	li      r9,17
c001ea44:	91 41 00 08 	stw     r10,8(r1)
c001ea48:	91 61 00 0c 	stw     r11,12(r1)
c001ea4c:	91 21 00 1c 	stw     r9,28(r1)
c001ea50:	4b ff fa a9 	bl      c001e4f8 <kernel_clone>
c001ea54:	80 01 00 54 	lwz     r0,84(r1)
c001ea58:	38 21 00 50 	addi    r1,r1,80
c001ea5c:	7c 08 03 a6 	mtlr    r0
c001ea60:	4e 80 00 20 	blr

c001ea64 <__powerpc_sys_vfork>:
c001ea64:	94 21 ff b0 	stwu    r1,-80(r1)
c001ea68:	7c 08 02 a6 	mflr    r0
c001ea6c:	38 a0 00 38 	li      r5,56
c001ea70:	38 80 00 00 	li      r4,0
c001ea74:	38 61 00 10 	addi    r3,r1,16
c001ea78:	90 01 00 54 	stw     r0,84(r1)
c001ea7c:	4b ff 6c 91 	bl      c001570c <memset>
c001ea80:	38 61 00 08 	addi    r3,r1,8
c001ea84:	39 40 00 00 	li      r10,0
c001ea88:	39 60 41 00 	li      r11,16640
c001ea8c:	39 20 00 11 	li      r9,17
c001ea90:	91 41 00 08 	stw     r10,8(r1)
c001ea94:	91 61 00 0c 	stw     r11,12(r1)
c001ea98:	91 21 00 1c 	stw     r9,28(r1)
c001ea9c:	4b ff fa 5d 	bl      c001e4f8 <kernel_clone>
c001eaa0:	80 01 00 54 	lwz     r0,84(r1)
c001eaa4:	38 21 00 50 	addi    r1,r1,80
c001eaa8:	7c 08 03 a6 	mtlr    r0
c001eaac:	4e 80 00 20 	blr



Christophe


> Co-developed-by: Andrew Donnellan <ajd@linux.ibm.com>
> Signed-off-by: Andrew Donnellan <ajd@linux.ibm.com>
> Signed-off-by: Rohan McLure <rmclure@linux.ibm.com>
> ---
>   arch/powerpc/Kconfig                       |  1 +
>   arch/powerpc/include/asm/interrupt.h       |  3 +-
>   arch/powerpc/include/asm/syscall_wrapper.h | 92 ++++++++++++++++++++++
>   arch/powerpc/include/asm/syscalls.h        | 83 +++++++++++++------
>   arch/powerpc/kernel/entry_32.S             |  6 +-
>   arch/powerpc/kernel/interrupt.c            | 35 ++++----
>   arch/powerpc/kernel/interrupt_64.S         | 30 +++----
>   arch/powerpc/kernel/sys_ppc32.c            | 50 +++++++-----
>   arch/powerpc/kernel/syscalls.c             | 19 +++--
>   arch/powerpc/kernel/systbl.S               | 21 +++++
>   arch/powerpc/kernel/vdso.c                 |  2 +
>   11 files changed, 255 insertions(+), 87 deletions(-)
>   create mode 100644 arch/powerpc/include/asm/syscall_wrapper.h
> 
> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> index 174edabb74fa..e58287a70061 100644
> --- a/arch/powerpc/Kconfig
> +++ b/arch/powerpc/Kconfig
> @@ -137,6 +137,7 @@ config PPC
>          select ARCH_HAS_STRICT_KERNEL_RWX       if (PPC_BOOK3S || PPC_8xx || 40x) && !HIBERNATION
>          select ARCH_HAS_STRICT_KERNEL_RWX       if FSL_BOOKE && !HIBERNATION && !RANDOMIZE_BASE
>          select ARCH_HAS_STRICT_MODULE_RWX       if ARCH_HAS_STRICT_KERNEL_RWX
> +       select ARCH_HAS_SYSCALL_WRAPPER         if !SPU_BASE
>          select ARCH_HAS_TICK_BROADCAST          if GENERIC_CLOCKEVENTS_BROADCAST
>          select ARCH_HAS_UACCESS_FLUSHCACHE
>          select ARCH_HAS_UBSAN_SANITIZE_ALL
> diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
> index f964ef5c57d8..8e8949e4db7a 100644
> --- a/arch/powerpc/include/asm/interrupt.h
> +++ b/arch/powerpc/include/asm/interrupt.h
> @@ -636,8 +636,7 @@ static inline void interrupt_cond_local_irq_enable(struct pt_regs *regs)
>                  local_irq_enable();
>   }
> 
> -long system_call_exception(long r3, long r4, long r5, long r6, long r7, long r8,
> -                          unsigned long r0, struct pt_regs *regs);
> +long system_call_exception(unsigned long r0, struct pt_regs *regs);
>   notrace unsigned long syscall_exit_prepare(unsigned long r3, struct pt_regs *regs, long scv);
>   notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs);
>   notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs);
> diff --git a/arch/powerpc/include/asm/syscall_wrapper.h b/arch/powerpc/include/asm/syscall_wrapper.h
> new file mode 100644
> index 000000000000..23da22b081e4
> --- /dev/null
> +++ b/arch/powerpc/include/asm/syscall_wrapper.h
> @@ -0,0 +1,92 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * syscall_wrapper.h - powerpc specific wrappers to syscall definitions
> + *
> + * Based on arch/{x86,arm64}/include/asm/syscall_wrapper.h
> + */
> +
> +#ifndef __ASM_SYSCALL_WRAPPER_H
> +#define __ASM_SYSCALL_WRAPPER_H
> +
> +struct pt_regs;
> +
> +#define SC_POWERPC_REGS_TO_ARGS(x, ...)                                \
> +       __MAP(x,__SC_ARGS                                       \
> +             ,,regs->gpr[3],,regs->gpr[4],,regs->gpr[5]        \
> +             ,,regs->gpr[6],,regs->gpr[7],,regs->gpr[8])
> +
> +#ifdef CONFIG_COMPAT
> +
> +#define COMPAT_SYSCALL_DEFINEx(x, name, ...)                                           \
> +       asmlinkage long __powerpc_compat_sys##name(const struct pt_regs *regs);         \
> +       ALLOW_ERROR_INJECTION(__powerpc_compat_sys##name, ERRNO);                       \
> +       static long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__));              \
> +       static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));       \
> +       asmlinkage long __powerpc_compat_sys##name(const struct pt_regs *regs)          \
> +       {                                                                               \
> +               return __se_compat_sys##name(SC_POWERPC_REGS_TO_ARGS(x,__VA_ARGS__));   \
> +       }                                                                               \
> +       static long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__))               \
> +       {                                                                               \
> +               return __do_compat_sys##name(__MAP(x,__SC_DELOUSE,__VA_ARGS__));        \
> +       }                                                                               \
> +       static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))
> +
> +#define COMPAT_SYSCALL_DEFINE0(sname)                                                  \
> +       asmlinkage long __powerpc_compat_sys_##sname(const struct pt_regs *__unused);   \
> +       ALLOW_ERROR_INJECTION(__powerpc_compat_sys_##sname, ERRNO);                     \
> +       asmlinkage long __powerpc_compat_sys_##sname(const struct pt_regs *__unused)
> +
> +#define COND_SYSCALL_COMPAT(name)                                                      \
> +       asmlinkage long __weak __powerpc_compat_sys_##name(const struct pt_regs *regs)  \
> +       {                                                                               \
> +               return sys_ni_syscall();                                                \
> +       }
> +#define COMPAT_SYS_NI(name) \
> +       SYSCALL_ALIAS(__powerpc_compat_sys_##name, sys_ni_posix_timers);
> +
> +#endif /* CONFIG_COMPAT */
> +
> +#define __SYSCALL_DEFINEx(x, name, ...)                                                \
> +       asmlinkage long __powerpc_sys##name(const struct pt_regs *regs);        \
> +       ALLOW_ERROR_INJECTION(__powerpc_sys##name, ERRNO);                      \
> +       long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));                         \
> +       static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__));             \
> +       static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));      \
> +       asmlinkage long __powerpc_sys##name(const struct pt_regs *regs)         \
> +       {                                                                       \
> +               return __se_sys##name(SC_POWERPC_REGS_TO_ARGS(x,__VA_ARGS__));  \
> +       }                                                                       \
> +       long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))                          \
> +       {                                                                       \
> +               return __do_sys##name(__MAP(x,__SC_CAST,__VA_ARGS__));          \
> +       }                                                                       \
> +       static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__))              \
> +       {                                                                       \
> +               long ret = __do_sys##name(__MAP(x,__SC_CAST,__VA_ARGS__));      \
> +               __MAP(x,__SC_TEST,__VA_ARGS__);                                 \
> +               __PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__));               \
> +               return ret;                                                     \
> +       }                                                                       \
> +       static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))
> +
> +#define SYSCALL_DEFINE0(sname)                                                 \
> +       SYSCALL_METADATA(_##sname, 0);                                          \
> +       long sys_##name(void);                                                  \
> +       asmlinkage long __powerpc_sys_##sname(const struct pt_regs *__unused);  \
> +       ALLOW_ERROR_INJECTION(__powerpc_sys_##sname, ERRNO);                    \
> +       long sys_##sname(void)                                                  \
> +       {                                                                       \
> +               return __powerpc_sys_##sname(NULL);                             \
> +       }                                                                       \
> +       asmlinkage long __powerpc_sys_##sname(const struct pt_regs *__unused)
> +
> +#define COND_SYSCALL(name)                                                     \
> +       asmlinkage long __weak __powerpc_sys_##name(const struct pt_regs *regs) \
> +       {                                                                       \
> +               return sys_ni_syscall();                                        \
> +       }
> +
> +#define SYS_NI(name) SYSCALL_ALIAS(__powerpc_sys_##name, sys_ni_posix_timers);
> +
> +#endif /* __ASM_SYSCALL_WRAPPER_H */
> diff --git a/arch/powerpc/include/asm/syscalls.h b/arch/powerpc/include/asm/syscalls.h
> index a2b13e55254f..75d8e1822caf 100644
> --- a/arch/powerpc/include/asm/syscalls.h
> +++ b/arch/powerpc/include/asm/syscalls.h
> @@ -8,14 +8,47 @@
>   #include <linux/types.h>
>   #include <linux/compat.h>
> 
> +/*
> + * For PowerPC specific syscall implementations, wrapper takes exact name and
> + * return type for a given function.
> + */
> +
> +#ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER
> +#define PPC_SYSCALL_DEFINE(x, type, name, ...)                                 \
> +       asmlinkage type __powerpc_##name(const struct pt_regs *regs);           \
> +       ALLOW_ERROR_INJECTION(__powerpc_##name, ERRNO);                         \
> +       type sys_##name(__MAP(x,__SC_DECL,__VA_ARGS__));                        \
> +       static type __se_##name(__MAP(x,__SC_LONG,__VA_ARGS__));                \
> +       static inline type __do_##name(__MAP(x,__SC_DECL,__VA_ARGS__));         \
> +       asmlinkage type __powerpc_##name(const struct pt_regs *regs)            \
> +       {                                                                       \
> +               return __se_##name(SC_POWERPC_REGS_TO_ARGS(x,__VA_ARGS__));     \
> +       }                                                                       \
> +       type sys_##name(__MAP(x,__SC_DECL,__VA_ARGS__))                         \
> +       {                                                                       \
> +               return __do_##name(__MAP(x,__SC_CAST,__VA_ARGS__));             \
> +       }                                                                       \
> +       static type __se_##name(__MAP(x,__SC_LONG,__VA_ARGS__))                 \
> +       {                                                                       \
> +               type ret = __do_##name(__MAP(x,__SC_CAST,__VA_ARGS__));         \
> +               __MAP(x,__SC_TEST,__VA_ARGS__);                                 \
> +               __PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__));               \
> +               return ret;                                                     \
> +       }                                                                       \
> +       static inline type __do_##name(__MAP(x,__SC_DECL,__VA_ARGS__))
> +#else
> +#define PPC_SYSCALL_DEFINE(x, type, name, ...)                                 \
> +       type name(__MAP(x,__SC_DECL,__VA_ARGS__))
> +#endif
> +
>   struct rtas_args;
> 
>   asmlinkage long sys_mmap(unsigned long addr, size_t len,
> -               unsigned long prot, unsigned long flags,
> -               unsigned long fd, off_t offset);
> +                        unsigned long prot, unsigned long flags,
> +                        unsigned long fd, off_t offset);
>   asmlinkage long sys_mmap2(unsigned long addr, size_t len,
> -               unsigned long prot, unsigned long flags,
> -               unsigned long fd, unsigned long pgoff);
> +                         unsigned long prot, unsigned long flags,
> +                         unsigned long fd, unsigned long pgoff);
>   asmlinkage long ppc64_personality(unsigned long personality);
>   asmlinkage long sys_rtas(struct rtas_args __user *uargs);
>   int ppc_select(int n, fd_set __user *inp, fd_set __user *outp,
> @@ -24,32 +57,38 @@ long ppc_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low,
>                        u32 len_high, u32 len_low);
> 
>   #ifdef CONFIG_COMPAT
> -unsigned long compat_sys_mmap2(unsigned long addr, size_t len,
> -                              unsigned long prot, unsigned long flags,
> -                              unsigned long fd, unsigned long pgoff);
> +asmlinkage unsigned long compat_sys_mmap2(unsigned long addr, size_t len,
> +                                         unsigned long prot, unsigned long flags,
> +                                         unsigned long fd, unsigned long pgoff);
> 
> -compat_ssize_t compat_sys_pread64(unsigned int fd, char __user *ubuf, compat_size_t count,
> -                                 u32 reg6, u32 pos1, u32 pos2);
> +asmlinkage compat_ssize_t compat_sys_pread64(unsigned int fd, char __user *ubuf,
> +                                            compat_size_t count,
> +                                            u32 reg6, u32 pos1, u32 pos2);
> 
> -compat_ssize_t compat_sys_pwrite64(unsigned int fd, const char __user *ubuf, compat_size_t count,
> -                                  u32 reg6, u32 pos1, u32 pos2);
> +asmlinkage compat_ssize_t compat_sys_pwrite64(unsigned int fd, const char __user *ubuf,
> +                                             compat_size_t count,
> +                                             u32 reg6, u32 pos1, u32 pos2);
> 
> -compat_ssize_t compat_sys_readahead(int fd, u32 r4, u32 offset1, u32 offset2, u32 count);
> +asmlinkage compat_ssize_t compat_sys_readahead(int fd, u32 r4, u32 offset1,
> +                                              u32 offset2, u32 count);
> 
> -int compat_sys_truncate64(const char __user *path, u32 reg4,
> -                         unsigned long len1, unsigned long len2);
> +asmlinkage int compat_sys_truncate64(const char __user *path, u32 reg4,
> +                                    unsigned long len1, unsigned long len2);
> 
> -long compat_sys_fallocate(int fd, int mode, u32 offset1, u32 offset2, u32 len1, u32 len2);
> +asmlinkage long compat_sys_fallocate(int fd, int mode, u32 offset1,
> +                                    u32 offset2, u32 len1, u32 len2);
> 
> -int compat_sys_ftruncate64(unsigned int fd, u32 reg4, unsigned long len1,
> -                          unsigned long len2);
> +asmlinkage int compat_sys_ftruncate64(unsigned int fd, u32 reg4,
> +                                     unsigned long len1, unsigned long len2);
> 
> -long ppc32_fadvise64(int fd, u32 unused, u32 offset1, u32 offset2,
> -                    size_t len, int advice);
> +asmlinkage long ppc32_fadvise64(int fd, u32 unused, u32 offset1, u32 offset2,
> +                               size_t len, int advice);
> 
> -long compat_sys_sync_file_range2(int fd, unsigned int flags,
> -                                unsigned int offset1, unsigned int offset2,
> -                                unsigned int nbytes1, unsigned int nbytes2);
> +asmlinkage long compat_sys_sync_file_range2(int fd, unsigned int flags,
> +                                           unsigned int offset1,
> +                                           unsigned int offset2,
> +                                           unsigned int nbytes1,
> +                                           unsigned int nbytes2);
>   #endif
> 
>   #endif /* __KERNEL__ */
> diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
> index 7748c278d13c..2343c577b971 100644
> --- a/arch/powerpc/kernel/entry_32.S
> +++ b/arch/powerpc/kernel/entry_32.S
> @@ -121,9 +121,9 @@ transfer_to_syscall:
>          SAVE_NVGPRS(r1)
>          kuep_lock
> 
> -       /* Calling convention has r9 = orig r0, r10 = regs */
> -       addi    r10,r1,STACK_FRAME_OVERHEAD
> -       mr      r9,r0
> +       /* Calling convention has r3 = orig r0, r4 = regs */
> +       addi    r4,r1,STACK_FRAME_OVERHEAD
> +       mr      r3,r0
>          bl      system_call_exception
> 
>   ret_from_syscall:
> diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
> index 784ea3289c84..a38329ede1a1 100644
> --- a/arch/powerpc/kernel/interrupt.c
> +++ b/arch/powerpc/kernel/interrupt.c
> @@ -24,7 +24,11 @@
>   unsigned long global_dbcr0[NR_CPUS];
>   #endif
> 
> +#ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER
> +typedef long (*syscall_fn)(struct pt_regs *);
> +#else
>   typedef long (*syscall_fn)(long, long, long, long, long, long);
> +#endif
> 
>   #ifdef CONFIG_PPC_BOOK3S_64
>   DEFINE_STATIC_KEY_FALSE(interrupt_exit_not_reentrant);
> @@ -74,15 +78,13 @@ static notrace __always_inline bool prep_irq_for_enabled_exit(bool restartable)
>   }
> 
>   /* Has to run notrace because it is entered not completely "reconciled" */
> -notrace long system_call_exception(long r3, long r4, long r5,
> -                                  long r6, long r7, long r8,
> -                                  unsigned long r0, struct pt_regs *regs)
> +notrace long system_call_exception(unsigned long r0, struct pt_regs *regs)
>   {
>          syscall_fn f;
> 
>          kuap_lock();
> 
> -       regs->orig_gpr3 = r3;
> +       regs->orig_gpr3 = regs->gpr[3];
> 
>          if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
>                  BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED);
> @@ -196,12 +198,6 @@ notrace long system_call_exception(long r3, long r4, long r5,
>                  r0 = do_syscall_trace_enter(regs);
>                  if (unlikely(r0 >= NR_syscalls))
>                          return regs->gpr[3];
> -               r3 = regs->gpr[3];
> -               r4 = regs->gpr[4];
> -               r5 = regs->gpr[5];
> -               r6 = regs->gpr[6];
> -               r7 = regs->gpr[7];
> -               r8 = regs->gpr[8];
> 
>          } else if (unlikely(r0 >= NR_syscalls)) {
>                  if (unlikely(trap_is_unsupported_scv(regs))) {
> @@ -218,18 +214,23 @@ notrace long system_call_exception(long r3, long r4, long r5,
>          if (unlikely(is_compat_task())) {
>                  f = (void *)compat_sys_call_table[r0];
> 
> -               r3 &= 0x00000000ffffffffULL;
> -               r4 &= 0x00000000ffffffffULL;
> -               r5 &= 0x00000000ffffffffULL;
> -               r6 &= 0x00000000ffffffffULL;
> -               r7 &= 0x00000000ffffffffULL;
> -               r8 &= 0x00000000ffffffffULL;
> +               regs->gpr[3] &= 0x00000000ffffffffULL;
> +               regs->gpr[4] &= 0x00000000ffffffffULL;
> +               regs->gpr[5] &= 0x00000000ffffffffULL;
> +               regs->gpr[6] &= 0x00000000ffffffffULL;
> +               regs->gpr[7] &= 0x00000000ffffffffULL;
> +               regs->gpr[8] &= 0x00000000ffffffffULL;
> 
>          } else {
>                  f = (void *)sys_call_table[r0];
>          }
> 
> -       return f(r3, r4, r5, r6, r7, r8);
> +       #ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER
> +       return f(regs);
> +       #else
> +       return f(regs->gpr[3], regs->gpr[4], regs->gpr[5],
> +                regs->gpr[6], regs->gpr[7], regs->gpr[8]);
> +       #endif
>   }
> 
>   static notrace void booke_load_dbcr0(void)
> diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S
> index 7bab2d7de372..b11c2bd84827 100644
> --- a/arch/powerpc/kernel/interrupt_64.S
> +++ b/arch/powerpc/kernel/interrupt_64.S
> @@ -91,9 +91,9 @@ _ASM_NOKPROBE_SYMBOL(system_call_vectored_\name)
>          li      r11,\trapnr
>          std     r11,_TRAP(r1)
>          std     r12,_CCR(r1)
> -       addi    r10,r1,STACK_FRAME_OVERHEAD
> +       addi    r4,r1,STACK_FRAME_OVERHEAD
>          ld      r11,exception_marker@toc(r2)
> -       std     r11,-16(r10)            /* "regshere" marker */
> +       std     r11,-16(r4)             /* "regshere" marker */
> 
>   BEGIN_FTR_SECTION
>          HMT_MEDIUM
> @@ -108,8 +108,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
>           * but this is the best we can do.
>           */
> 
> -       /* Calling convention has r9 = orig r0, r10 = regs */
> -       mr      r9,r0
> +       /* Calling convention has r3 = orig r0, r4 = regs */
> +       mr      r3,r0
>          bl      system_call_exception
> 
>   .Lsyscall_vectored_\name\()_exit:
> @@ -285,9 +285,9 @@ END_BTB_FLUSH_SECTION
>          std     r10,_LINK(r1)
>          std     r11,_TRAP(r1)
>          std     r12,_CCR(r1)
> -       addi    r10,r1,STACK_FRAME_OVERHEAD
> +       addi    r4,r1,STACK_FRAME_OVERHEAD
>          ld      r11,exception_marker@toc(r2)
> -       std     r11,-16(r10)            /* "regshere" marker */
> +       std     r11,-16(r4)             /* "regshere" marker */
> 
>   #ifdef CONFIG_PPC_BOOK3S
>          li      r11,1
> @@ -308,8 +308,8 @@ END_BTB_FLUSH_SECTION
>          wrteei  1
>   #endif
> 
> -       /* Calling convention has r9 = orig r0, r10 = regs */
> -       mr      r9,r0
> +       /* Calling convention has r3 = orig r0, r4 = regs */
> +       mr      r3,r0
>          bl      system_call_exception
> 
>   .Lsyscall_exit:
> @@ -353,16 +353,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
>          cmpdi   r3,0
>          bne     .Lsyscall_restore_regs
>          /* Zero volatile regs that may contain sensitive kernel data */
> -       li      r0,0
> -       li      r4,0
> -       li      r5,0
> -       li      r6,0
> -       li      r7,0
> -       li      r8,0
> -       li      r9,0
> -       li      r10,0
> -       li      r11,0
> -       li      r12,0
> +       ZERO_GPR(0)
> +       ZERO_GPRS(4, 12)
>          mtctr   r0
>          mtspr   SPRN_XER,r0
>   .Lsyscall_restore_regs_cont:
> @@ -388,7 +380,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
>          REST_NVGPRS(r1)
>          mtctr   r3
>          mtspr   SPRN_XER,r4
> -       ld      r0,GPR0(r1)
> +       REST_GPR(0, r1)
>          REST_GPRS(4, 12, r1)
>          b       .Lsyscall_restore_regs_cont
>   .Lsyscall_rst_end:
> diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c
> index 16ff0399a257..aa7869dbef36 100644
> --- a/arch/powerpc/kernel/sys_ppc32.c
> +++ b/arch/powerpc/kernel/sys_ppc32.c
> @@ -48,9 +48,10 @@
>   #include <asm/syscalls.h>
>   #include <asm/switch_to.h>
> 
> -unsigned long compat_sys_mmap2(unsigned long addr, size_t len,
> -                         unsigned long prot, unsigned long flags,
> -                         unsigned long fd, unsigned long pgoff)
> +PPC_SYSCALL_DEFINE(6, unsigned long, compat_sys_mmap2,
> +                  unsigned long, addr, size_t, len,
> +                  unsigned long, prot, unsigned long, flags,
> +                  unsigned long, fd, unsigned long, pgoff)
>   {
>          /* This should remain 12 even if PAGE_SIZE changes */
>          return sys_mmap(addr, len, prot, flags, fd, pgoff << 12);
> @@ -68,52 +69,63 @@ unsigned long compat_sys_mmap2(unsigned long addr, size_t len,
>   #define merge_64(high, low) ((u64)high << 32) | low
>   #endif
> 
> -compat_ssize_t compat_sys_pread64(unsigned int fd, char __user *ubuf, compat_size_t count,
> -                            u32 reg6, u32 pos1, u32 pos2)
> +PPC_SYSCALL_DEFINE(6, compat_ssize_t, compat_sys_pread64,
> +                  unsigned int, fd,
> +                  char __user *, ubuf, compat_size_t, count,
> +                  u32, reg6, u32, pos1, u32, pos2)
>   {
>          return ksys_pread64(fd, ubuf, count, merge_64(pos1, pos2));
>   }
> 
> -compat_ssize_t compat_sys_pwrite64(unsigned int fd, const char __user *ubuf, compat_size_t count,
> -                             u32 reg6, u32 pos1, u32 pos2)
> +PPC_SYSCALL_DEFINE(6, compat_ssize_t, compat_sys_pwrite64,
> +                  unsigned int, fd,
> +                  const char __user *, ubuf, compat_size_t, count,
> +                  u32, reg6, u32, pos1, u32, pos2)
>   {
>          return ksys_pwrite64(fd, ubuf, count, merge_64(pos1, pos2));
>   }
> 
> -compat_ssize_t compat_sys_readahead(int fd, u32 r4, u32 offset1, u32 offset2, u32 count)
> +PPC_SYSCALL_DEFINE(5, compat_ssize_t, compat_sys_readahead,
> +                  int, fd, u32, r4,
> +                  u32, offset1, u32, offset2, u32, count)
>   {
>          return ksys_readahead(fd, merge_64(offset1, offset2), count);
>   }
> 
> -asmlinkage int compat_sys_truncate64(const char __user * path, u32 reg4,
> -                               unsigned long len1, unsigned long len2)
> +PPC_SYSCALL_DEFINE(4, int, compat_sys_truncate64,
> +                  const char __user *, path, u32, reg4,
> +                  unsigned long, len1, unsigned long, len2)
>   {
>          return ksys_truncate(path, merge_64(len1, len2));
>   }
> 
> -asmlinkage long compat_sys_fallocate(int fd, int mode, u32 offset1, u32 offset2,
> -                                    u32 len1, u32 len2)
> +PPC_SYSCALL_DEFINE(6, long, compat_sys_fallocate,
> +                  int, fd, int, mode, u32, offset1, u32, offset2,
> +                  u32, len1, u32, len2)
>   {
>          return ksys_fallocate(fd, mode, ((loff_t)offset1 << 32) | offset2,
>                               merge_64(len1, len2));
>   }
> 
> -asmlinkage int compat_sys_ftruncate64(unsigned int fd, u32 reg4, unsigned long len1,
> -                                unsigned long len2)
> +PPC_SYSCALL_DEFINE(4, int, compat_sys_ftruncate64,
> +                  unsigned int, fd, u32, reg4, unsigned long, len1,
> +                  unsigned long, len2)
>   {
>          return ksys_ftruncate(fd, merge_64(len1, len2));
>   }
> 
> -long ppc32_fadvise64(int fd, u32 unused, u32 offset1, u32 offset2,
> -                    size_t len, int advice)
> +PPC_SYSCALL_DEFINE(6, long, ppc32_fadvise64,
> +                  int, fd, u32, unused, u32, offset1, u32, offset2,
> +                  size_t, len, int, advice)
>   {
>          return ksys_fadvise64_64(fd, merge_64(offset1, offset2), len,
>                                   advice);
>   }
> 
> -asmlinkage long compat_sys_sync_file_range2(int fd, unsigned int flags,
> -                                  unsigned offset1, unsigned offset2,
> -                                  unsigned nbytes1, unsigned nbytes2)
> +PPC_SYSCALL_DEFINE(6, long, compat_sys_sync_file_range2, int, fd,
> +                  unsigned int, flags,
> +                  unsigned int, offset1, unsigned int, offset2,
> +                  unsigned int, nbytes1, unsigned int, nbytes2)
>   {
>          loff_t offset = merge_64(offset1, offset2);
>          loff_t nbytes = merge_64(nbytes1, nbytes2);
> diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c
> index c4f5b4ce926f..c64cdb5c4435 100644
> --- a/arch/powerpc/kernel/syscalls.c
> +++ b/arch/powerpc/kernel/syscalls.c
> @@ -64,14 +64,20 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, size_t, len,
>   }
> 
>   #ifdef CONFIG_PPC32
> +asmlinkage long sys_old_select(struct sel_arg_struct __user *arg);
> +asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp,
> +                          fd_set __user *exp,
> +                          struct __kernel_old_timeval __user *tvp);
> +
>   /*
>    * Due to some executables calling the wrong select we sometimes
>    * get wrong args.  This determines how the args are being passed
>    * (a single ptr to them all args passed) then calls
>    * sys_select() with the appropriate args. -- Cort
>    */
> -int
> -ppc_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct __kernel_old_timeval __user *tvp)
> +PPC_SYSCALL_DEFINE(5, long, ppc_select,
> +                  int, n, fd_set __user *, inp, fd_set __user *, outp,
> +                  fd_set __user *, exp, struct __kernel_old_timeval __user *, tvp)
>   {
>          if ( (unsigned long)n >= 4096 )
>                  return sys_old_select((void __user *)n);
> @@ -81,7 +87,9 @@ ppc_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, s
>   #endif
> 
>   #ifdef CONFIG_PPC64
> -long ppc64_personality(unsigned long personality)
> +asmlinkage long sys_personality(unsigned int personality);
> +
> +PPC_SYSCALL_DEFINE(1, long, ppc64_personality, unsigned long, personality)
>   {
>          long ret;
> 
> @@ -95,8 +103,9 @@ long ppc64_personality(unsigned long personality)
>   }
>   #endif
> 
> -long ppc_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low,
> -                     u32 len_high, u32 len_low)
> +PPC_SYSCALL_DEFINE(6, long, ppc_fadvise64_64,
> +                  int, fd, int, advice, u32, offset_high, u32, offset_low,
> +                  u32, len_high, u32, len_low)
>   {
>          return ksys_fadvise64_64(fd, (u64)offset_high << 32 | offset_low,
>                                   (u64)len_high << 32 | len_low, advice);
> diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.S
> index cb3358886203..70cc7120fce2 100644
> --- a/arch/powerpc/kernel/systbl.S
> +++ b/arch/powerpc/kernel/systbl.S
> @@ -16,11 +16,32 @@
> 
>   #ifdef CONFIG_PPC64
>          .p2align        3
> +#endif
> +
> +/* Where the syscall wrapper macro is used, handler functions expect parameters
> +   to reside on the stack. We still emit symbols with register-mapped
> +   parameters, which are distinguished by a prefix. */
> +
> +#ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER
> +
> +#define __powerpc_sys_ni_syscall sys_ni_syscall
> +
> +#ifdef CONFIG_PPC64
> +#define __SYSCALL(nr, entry)   .8byte __powerpc_##entry
> +#else
> +#define __SYSCALL(nr, entry)   .long __powerpc_##entry
> +#endif
> +
> +#else
> +
> +#ifdef CONFIG_PPC64
>   #define __SYSCALL(nr, entry)   .8byte entry
>   #else
>   #define __SYSCALL(nr, entry)   .long entry
>   #endif
> 
> +#endif /* CONFIG_ARCH_HAS_SYSCALL_WRAPPER */
> +
>   #define __SYSCALL_WITH_COMPAT(nr, native, compat)      __SYSCALL(nr, native)
>   .globl sys_call_table
>   sys_call_table:
> diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
> index 717f2c9a7573..dcf57c07dbad 100644
> --- a/arch/powerpc/kernel/vdso.c
> +++ b/arch/powerpc/kernel/vdso.c
> @@ -40,6 +40,8 @@
>   extern char vdso32_start, vdso32_end;
>   extern char vdso64_start, vdso64_end;
> 
> +asmlinkage long sys_ni_syscall(void);
> +
>   /*
>    * The vdso data page (aka. systemcfg for old ppc64 fans) is here.
>    * Once the early boot kernel code no longer needs to muck around
> --
> 2.34.1
> 

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 1/6] powerpc: Add ZERO_GPRS macros for register clears
  2022-06-01  5:48 [PATCH 1/6] powerpc: Add ZERO_GPRS macros for register clears Rohan McLure
                   ` (5 preceding siblings ...)
  2022-06-01  7:45 ` [PATCH 1/6] powerpc: Add ZERO_GPRS macros for register clears Christophe Leroy
@ 2022-06-01 16:00 ` Segher Boessenkool
  2022-06-10  3:32   ` Rohan McLure
  6 siblings, 1 reply; 31+ messages in thread
From: Segher Boessenkool @ 2022-06-01 16:00 UTC (permalink / raw)
  To: Rohan McLure; +Cc: linuxppc-dev, npiggin

Hi!

On Wed, Jun 01, 2022 at 03:48:45PM +1000, Rohan McLure wrote:
> +.macro BINOP_REGS op, rhs, start, end
> +	.Lreg=\start
> +	.rept (\end - \start + 1)
> +	\op .Lreg, \rhs
> +	.Lreg=.Lreg+1
> +	.endr
> +.endm

This is for unary operations, not binary operations (there is only one
item on the RHS).  You can in principle put a string "a,b" in the rhs
parameter, but in practice you need a or b to depend on the loop counter
as well, so even such trickiness won't do.  Make the naming less
confusing, maybe?  Or don't have an unused extra level of abstraction in
the first place :-)


Segher

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 2/6] powerpc: Provide syscall wrapper
  2022-06-01 14:33   ` Christophe Leroy
@ 2022-06-03  3:24     ` Rohan McLure
  2022-06-03  7:09       ` Andrew Donnellan
  0 siblings, 1 reply; 31+ messages in thread
From: Rohan McLure @ 2022-06-03  3:24 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Andrew Donnellan, npiggin

Thanks for your comment. I comment briefly here on how files like
arch/powerpc/kernel/syscalls.c implement ppc specialisations of 
various syscalls, with implementations that themselves call syscall
symbols.

>> For platforms supporting this syscall wrapper, emit symbols with usual
>> in-register parameters (`sys...`) to support calls to syscall handlers
>> from within the kernel.


The implementation of ppc_personality can be immediately reworked to
call ksys_personality, but I can’t do the same for sys_old_select for
example, which is required to implement ppc_select. As such we emit both 
symbols (sys_... and __powerpc...) so that ppc implementations can 
efficiently call other implementations without needing to allocate
another pt_regs onto the stack.

This does admittedly introduce some code bloat into the binary. I see
a couple ways of fixing this:
1. Remove all calls to syscall implementations via sys_... symbols from
   within the kernel.
2. Restrict which syscall implementations receive a sys_... symbol to
   limit the number of duplicated functions.

Do you have any suggestions on how I might fix this? 

> On 2 Jun 2022, at 12:33 am, Christophe Leroy <christophe.leroy@csgroup.eu> wrote:
> 
> 
> 
> Le 01/06/2022 à 07:48, Rohan McLure a écrit :
>> [Vous ne recevez pas souvent de courriers de la part de rmclure@linux.ibm.com. Découvrez pourquoi cela peut être important à l'adresse https://aka.ms/LearnAboutSenderIdentification.]
>> 
>> Syscall wrapper implemented as per s390, x86, arm64, providing the
>> option for gprs to be cleared on entry to the kernel, reducing caller
>> influence influence on speculation within syscall routine. The wrapper
>> is a macro that emits syscall handler implementations with parameters
>> passed by stack pointer.
>> 
>> For platforms supporting this syscall wrapper, emit symbols with usual
>> in-register parameters (`sys...`) to support calls to syscall handlers
>> from within the kernel.
>> 
>> Syscalls are wrapped on all platforms except Cell processor. SPUs require
>> access syscall prototypes which are omitted with ARCH_HAS_SYSCALL_WRAPPER
>> enabled.
>> 
> 
> Also wondering why I get duplicated syscall functions. Shouldn't the 
> sys_ ones go away once we implement the __powerpc_sys_ ones ?
> 
> c001e9a0 <sys_fork>:
> c001e9a0:	94 21 ff b0 	stwu    r1,-80(r1)
> c001e9a4:	7c 08 02 a6 	mflr    r0
> c001e9a8:	38 a0 00 40 	li      r5,64
> c001e9ac:	38 80 00 00 	li      r4,0
> c001e9b0:	38 61 00 08 	addi    r3,r1,8
> c001e9b4:	90 01 00 54 	stw     r0,84(r1)
> c001e9b8:	4b ff 6d 55 	bl      c001570c <memset>
> c001e9bc:	38 61 00 08 	addi    r3,r1,8
> c001e9c0:	39 20 00 11 	li      r9,17
> c001e9c4:	91 21 00 1c 	stw     r9,28(r1)
> c001e9c8:	4b ff fb 31 	bl      c001e4f8 <kernel_clone>
> c001e9cc:	80 01 00 54 	lwz     r0,84(r1)
> c001e9d0:	38 21 00 50 	addi    r1,r1,80
> c001e9d4:	7c 08 03 a6 	mtlr    r0
> c001e9d8:	4e 80 00 20 	blr
> 
> c001e9dc <__powerpc_sys_fork>:
> c001e9dc:	94 21 ff b0 	stwu    r1,-80(r1)
> c001e9e0:	7c 08 02 a6 	mflr    r0
> c001e9e4:	38 a0 00 40 	li      r5,64
> c001e9e8:	38 80 00 00 	li      r4,0
> c001e9ec:	38 61 00 08 	addi    r3,r1,8
> c001e9f0:	90 01 00 54 	stw     r0,84(r1)
> c001e9f4:	4b ff 6d 19 	bl      c001570c <memset>
> c001e9f8:	38 61 00 08 	addi    r3,r1,8
> c001e9fc:	39 20 00 11 	li      r9,17
> c001ea00:	91 21 00 1c 	stw     r9,28(r1)
> c001ea04:	4b ff fa f5 	bl      c001e4f8 <kernel_clone>
> c001ea08:	80 01 00 54 	lwz     r0,84(r1)
> c001ea0c:	38 21 00 50 	addi    r1,r1,80
> c001ea10:	7c 08 03 a6 	mtlr    r0
> c001ea14:	4e 80 00 20 	blr
> 
> c001ea18 <sys_vfork>:
> c001ea18:	94 21 ff b0 	stwu    r1,-80(r1)
> c001ea1c:	7c 08 02 a6 	mflr    r0
> c001ea20:	38 a0 00 38 	li      r5,56
> c001ea24:	38 80 00 00 	li      r4,0
> c001ea28:	38 61 00 10 	addi    r3,r1,16
> c001ea2c:	90 01 00 54 	stw     r0,84(r1)
> c001ea30:	4b ff 6c dd 	bl      c001570c <memset>
> c001ea34:	38 61 00 08 	addi    r3,r1,8
> c001ea38:	39 40 00 00 	li      r10,0
> c001ea3c:	39 60 41 00 	li      r11,16640
> c001ea40:	39 20 00 11 	li      r9,17
> c001ea44:	91 41 00 08 	stw     r10,8(r1)
> c001ea48:	91 61 00 0c 	stw     r11,12(r1)
> c001ea4c:	91 21 00 1c 	stw     r9,28(r1)
> c001ea50:	4b ff fa a9 	bl      c001e4f8 <kernel_clone>
> c001ea54:	80 01 00 54 	lwz     r0,84(r1)
> c001ea58:	38 21 00 50 	addi    r1,r1,80
> c001ea5c:	7c 08 03 a6 	mtlr    r0
> c001ea60:	4e 80 00 20 	blr
> 
> c001ea64 <__powerpc_sys_vfork>:
> c001ea64:	94 21 ff b0 	stwu    r1,-80(r1)
> c001ea68:	7c 08 02 a6 	mflr    r0
> c001ea6c:	38 a0 00 38 	li      r5,56
> c001ea70:	38 80 00 00 	li      r4,0
> c001ea74:	38 61 00 10 	addi    r3,r1,16
> c001ea78:	90 01 00 54 	stw     r0,84(r1)
> c001ea7c:	4b ff 6c 91 	bl      c001570c <memset>
> c001ea80:	38 61 00 08 	addi    r3,r1,8
> c001ea84:	39 40 00 00 	li      r10,0
> c001ea88:	39 60 41 00 	li      r11,16640
> c001ea8c:	39 20 00 11 	li      r9,17
> c001ea90:	91 41 00 08 	stw     r10,8(r1)
> c001ea94:	91 61 00 0c 	stw     r11,12(r1)
> c001ea98:	91 21 00 1c 	stw     r9,28(r1)
> c001ea9c:	4b ff fa 5d 	bl      c001e4f8 <kernel_clone>
> c001eaa0:	80 01 00 54 	lwz     r0,84(r1)
> c001eaa4:	38 21 00 50 	addi    r1,r1,80
> c001eaa8:	7c 08 03 a6 	mtlr    r0
> c001eaac:	4e 80 00 20 	blr
> 
> 
> 
> Christophe
> 
> 
>> Co-developed-by: Andrew Donnellan <ajd@linux.ibm.com>
>> Signed-off-by: Andrew Donnellan <ajd@linux.ibm.com>
>> Signed-off-by: Rohan McLure <rmclure@linux.ibm.com>
>> ---
>> arch/powerpc/Kconfig | 1 +
>> arch/powerpc/include/asm/interrupt.h | 3 +-
>> arch/powerpc/include/asm/syscall_wrapper.h | 92 ++++++++++++++++++++++
>> arch/powerpc/include/asm/syscalls.h | 83 +++++++++++++------
>> arch/powerpc/kernel/entry_32.S | 6 +-
>> arch/powerpc/kernel/interrupt.c | 35 ++++----
>> arch/powerpc/kernel/interrupt_64.S | 30 +++----
>> arch/powerpc/kernel/sys_ppc32.c | 50 +++++++-----
>> arch/powerpc/kernel/syscalls.c | 19 +++--
>> arch/powerpc/kernel/systbl.S | 21 +++++
>> arch/powerpc/kernel/vdso.c | 2 +
>> 11 files changed, 255 insertions(+), 87 deletions(-)
>> create mode 100644 arch/powerpc/include/asm/syscall_wrapper.h
>> 
>> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
>> index 174edabb74fa..e58287a70061 100644
>> --- a/arch/powerpc/Kconfig
>> +++ b/arch/powerpc/Kconfig
>> @@ -137,6 +137,7 @@ config PPC
>> select ARCH_HAS_STRICT_KERNEL_RWX if (PPC_BOOK3S || PPC_8xx || 40x) && !HIBERNATION
>> select ARCH_HAS_STRICT_KERNEL_RWX if FSL_BOOKE && !HIBERNATION && !RANDOMIZE_BASE
>> select ARCH_HAS_STRICT_MODULE_RWX if ARCH_HAS_STRICT_KERNEL_RWX
>> + select ARCH_HAS_SYSCALL_WRAPPER if !SPU_BASE
>> select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
>> select ARCH_HAS_UACCESS_FLUSHCACHE
>> select ARCH_HAS_UBSAN_SANITIZE_ALL
>> diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
>> index f964ef5c57d8..8e8949e4db7a 100644
>> --- a/arch/powerpc/include/asm/interrupt.h
>> +++ b/arch/powerpc/include/asm/interrupt.h
>> @@ -636,8 +636,7 @@ static inline void interrupt_cond_local_irq_enable(struct pt_regs *regs)
>> local_irq_enable();
>> }
>> 
>> -long system_call_exception(long r3, long r4, long r5, long r6, long r7, long r8,
>> - unsigned long r0, struct pt_regs *regs);
>> +long system_call_exception(unsigned long r0, struct pt_regs *regs);
>> notrace unsigned long syscall_exit_prepare(unsigned long r3, struct pt_regs *regs, long scv);
>> notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs);
>> notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs);
>> diff --git a/arch/powerpc/include/asm/syscall_wrapper.h b/arch/powerpc/include/asm/syscall_wrapper.h
>> new file mode 100644
>> index 000000000000..23da22b081e4
>> --- /dev/null
>> +++ b/arch/powerpc/include/asm/syscall_wrapper.h
>> @@ -0,0 +1,92 @@
>> +/* SPDX-License-Identifier: GPL-2.0 */
>> +/*
>> + * syscall_wrapper.h - powerpc specific wrappers to syscall definitions
>> + *
>> + * Based on arch/{x86,arm64}/include/asm/syscall_wrapper.h
>> + */
>> +
>> +#ifndef __ASM_SYSCALL_WRAPPER_H
>> +#define __ASM_SYSCALL_WRAPPER_H
>> +
>> +struct pt_regs;
>> +
>> +#define SC_POWERPC_REGS_TO_ARGS(x, ...) \
>> + __MAP(x,__SC_ARGS \
>> + ,,regs->gpr[3],,regs->gpr[4],,regs->gpr[5] \
>> + ,,regs->gpr[6],,regs->gpr[7],,regs->gpr[8])
>> +
>> +#ifdef CONFIG_COMPAT
>> +
>> +#define COMPAT_SYSCALL_DEFINEx(x, name, ...) \
>> + asmlinkage long __powerpc_compat_sys##name(const struct pt_regs *regs); \
>> + ALLOW_ERROR_INJECTION(__powerpc_compat_sys##name, ERRNO); \
>> + static long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \
>> + static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \
>> + asmlinkage long __powerpc_compat_sys##name(const struct pt_regs *regs) \
>> + { \
>> + return __se_compat_sys##name(SC_POWERPC_REGS_TO_ARGS(x,__VA_ARGS__)); \
>> + } \
>> + static long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \
>> + { \
>> + return __do_compat_sys##name(__MAP(x,__SC_DELOUSE,__VA_ARGS__)); \
>> + } \
>> + static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))
>> +
>> +#define COMPAT_SYSCALL_DEFINE0(sname) \
>> + asmlinkage long __powerpc_compat_sys_##sname(const struct pt_regs *__unused); \
>> + ALLOW_ERROR_INJECTION(__powerpc_compat_sys_##sname, ERRNO); \
>> + asmlinkage long __powerpc_compat_sys_##sname(const struct pt_regs *__unused)
>> +
>> +#define COND_SYSCALL_COMPAT(name) \
>> + asmlinkage long __weak __powerpc_compat_sys_##name(const struct pt_regs *regs) \
>> + { \
>> + return sys_ni_syscall(); \
>> + }
>> +#define COMPAT_SYS_NI(name) \
>> + SYSCALL_ALIAS(__powerpc_compat_sys_##name, sys_ni_posix_timers);
>> +
>> +#endif /* CONFIG_COMPAT */
>> +
>> +#define __SYSCALL_DEFINEx(x, name, ...) \
>> + asmlinkage long __powerpc_sys##name(const struct pt_regs *regs); \
>> + ALLOW_ERROR_INJECTION(__powerpc_sys##name, ERRNO); \
>> + long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \
>> + static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \
>> + static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \
>> + asmlinkage long __powerpc_sys##name(const struct pt_regs *regs) \
>> + { \
>> + return __se_sys##name(SC_POWERPC_REGS_TO_ARGS(x,__VA_ARGS__)); \
>> + } \
>> + long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \
>> + { \
>> + return __do_sys##name(__MAP(x,__SC_CAST,__VA_ARGS__)); \
>> + } \
>> + static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \
>> + { \
>> + long ret = __do_sys##name(__MAP(x,__SC_CAST,__VA_ARGS__)); \
>> + __MAP(x,__SC_TEST,__VA_ARGS__); \
>> + __PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__)); \
>> + return ret; \
>> + } \
>> + static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))
>> +
>> +#define SYSCALL_DEFINE0(sname) \
>> + SYSCALL_METADATA(_##sname, 0); \
>> + long sys_##name(void); \
>> + asmlinkage long __powerpc_sys_##sname(const struct pt_regs *__unused); \
>> + ALLOW_ERROR_INJECTION(__powerpc_sys_##sname, ERRNO); \
>> + long sys_##sname(void) \
>> + { \
>> + return __powerpc_sys_##sname(NULL); \
>> + } \
>> + asmlinkage long __powerpc_sys_##sname(const struct pt_regs *__unused)
>> +
>> +#define COND_SYSCALL(name) \
>> + asmlinkage long __weak __powerpc_sys_##name(const struct pt_regs *regs) \
>> + { \
>> + return sys_ni_syscall(); \
>> + }
>> +
>> +#define SYS_NI(name) SYSCALL_ALIAS(__powerpc_sys_##name, sys_ni_posix_timers);
>> +
>> +#endif /* __ASM_SYSCALL_WRAPPER_H */
>> diff --git a/arch/powerpc/include/asm/syscalls.h b/arch/powerpc/include/asm/syscalls.h
>> index a2b13e55254f..75d8e1822caf 100644
>> --- a/arch/powerpc/include/asm/syscalls.h
>> +++ b/arch/powerpc/include/asm/syscalls.h
>> @@ -8,14 +8,47 @@
>> #include <linux/types.h>
>> #include <linux/compat.h>
>> 
>> +/*
>> + * For PowerPC specific syscall implementations, wrapper takes exact name and
>> + * return type for a given function.
>> + */
>> +
>> +#ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER
>> +#define PPC_SYSCALL_DEFINE(x, type, name, ...) \
>> + asmlinkage type __powerpc_##name(const struct pt_regs *regs); \
>> + ALLOW_ERROR_INJECTION(__powerpc_##name, ERRNO); \
>> + type sys_##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \
>> + static type __se_##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \
>> + static inline type __do_##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \
>> + asmlinkage type __powerpc_##name(const struct pt_regs *regs) \
>> + { \
>> + return __se_##name(SC_POWERPC_REGS_TO_ARGS(x,__VA_ARGS__)); \
>> + } \
>> + type sys_##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \
>> + { \
>> + return __do_##name(__MAP(x,__SC_CAST,__VA_ARGS__)); \
>> + } \
>> + static type __se_##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \
>> + { \
>> + type ret = __do_##name(__MAP(x,__SC_CAST,__VA_ARGS__)); \
>> + __MAP(x,__SC_TEST,__VA_ARGS__); \
>> + __PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__)); \
>> + return ret; \
>> + } \
>> + static inline type __do_##name(__MAP(x,__SC_DECL,__VA_ARGS__))
>> +#else
>> +#define PPC_SYSCALL_DEFINE(x, type, name, ...) \
>> + type name(__MAP(x,__SC_DECL,__VA_ARGS__))
>> +#endif
>> +
>> struct rtas_args;
>> 
>> asmlinkage long sys_mmap(unsigned long addr, size_t len,
>> - unsigned long prot, unsigned long flags,
>> - unsigned long fd, off_t offset);
>> + unsigned long prot, unsigned long flags,
>> + unsigned long fd, off_t offset);
>> asmlinkage long sys_mmap2(unsigned long addr, size_t len,
>> - unsigned long prot, unsigned long flags,
>> - unsigned long fd, unsigned long pgoff);
>> + unsigned long prot, unsigned long flags,
>> + unsigned long fd, unsigned long pgoff);
>> asmlinkage long ppc64_personality(unsigned long personality);
>> asmlinkage long sys_rtas(struct rtas_args __user *uargs);
>> int ppc_select(int n, fd_set __user *inp, fd_set __user *outp,
>> @@ -24,32 +57,38 @@ long ppc_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low,
>> u32 len_high, u32 len_low);
>> 
>> #ifdef CONFIG_COMPAT
>> -unsigned long compat_sys_mmap2(unsigned long addr, size_t len,
>> - unsigned long prot, unsigned long flags,
>> - unsigned long fd, unsigned long pgoff);
>> +asmlinkage unsigned long compat_sys_mmap2(unsigned long addr, size_t len,
>> + unsigned long prot, unsigned long flags,
>> + unsigned long fd, unsigned long pgoff);
>> 
>> -compat_ssize_t compat_sys_pread64(unsigned int fd, char __user *ubuf, compat_size_t count,
>> - u32 reg6, u32 pos1, u32 pos2);
>> +asmlinkage compat_ssize_t compat_sys_pread64(unsigned int fd, char __user *ubuf,
>> + compat_size_t count,
>> + u32 reg6, u32 pos1, u32 pos2);
>> 
>> -compat_ssize_t compat_sys_pwrite64(unsigned int fd, const char __user *ubuf, compat_size_t count,
>> - u32 reg6, u32 pos1, u32 pos2);
>> +asmlinkage compat_ssize_t compat_sys_pwrite64(unsigned int fd, const char __user *ubuf,
>> + compat_size_t count,
>> + u32 reg6, u32 pos1, u32 pos2);
>> 
>> -compat_ssize_t compat_sys_readahead(int fd, u32 r4, u32 offset1, u32 offset2, u32 count);
>> +asmlinkage compat_ssize_t compat_sys_readahead(int fd, u32 r4, u32 offset1,
>> + u32 offset2, u32 count);
>> 
>> -int compat_sys_truncate64(const char __user *path, u32 reg4,
>> - unsigned long len1, unsigned long len2);
>> +asmlinkage int compat_sys_truncate64(const char __user *path, u32 reg4,
>> + unsigned long len1, unsigned long len2);
>> 
>> -long compat_sys_fallocate(int fd, int mode, u32 offset1, u32 offset2, u32 len1, u32 len2);
>> +asmlinkage long compat_sys_fallocate(int fd, int mode, u32 offset1,
>> + u32 offset2, u32 len1, u32 len2);
>> 
>> -int compat_sys_ftruncate64(unsigned int fd, u32 reg4, unsigned long len1,
>> - unsigned long len2);
>> +asmlinkage int compat_sys_ftruncate64(unsigned int fd, u32 reg4,
>> + unsigned long len1, unsigned long len2);
>> 
>> -long ppc32_fadvise64(int fd, u32 unused, u32 offset1, u32 offset2,
>> - size_t len, int advice);
>> +asmlinkage long ppc32_fadvise64(int fd, u32 unused, u32 offset1, u32 offset2,
>> + size_t len, int advice);
>> 
>> -long compat_sys_sync_file_range2(int fd, unsigned int flags,
>> - unsigned int offset1, unsigned int offset2,
>> - unsigned int nbytes1, unsigned int nbytes2);
>> +asmlinkage long compat_sys_sync_file_range2(int fd, unsigned int flags,
>> + unsigned int offset1,
>> + unsigned int offset2,
>> + unsigned int nbytes1,
>> + unsigned int nbytes2);
>> #endif
>> 
>> #endif /* __KERNEL__ */
>> diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
>> index 7748c278d13c..2343c577b971 100644
>> --- a/arch/powerpc/kernel/entry_32.S
>> +++ b/arch/powerpc/kernel/entry_32.S
>> @@ -121,9 +121,9 @@ transfer_to_syscall:
>> SAVE_NVGPRS(r1)
>> kuep_lock
>> 
>> - /* Calling convention has r9 = orig r0, r10 = regs */
>> - addi r10,r1,STACK_FRAME_OVERHEAD
>> - mr r9,r0
>> + /* Calling convention has r3 = orig r0, r4 = regs */
>> + addi r4,r1,STACK_FRAME_OVERHEAD
>> + mr r3,r0
>> bl system_call_exception
>> 
>> ret_from_syscall:
>> diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
>> index 784ea3289c84..a38329ede1a1 100644
>> --- a/arch/powerpc/kernel/interrupt.c
>> +++ b/arch/powerpc/kernel/interrupt.c
>> @@ -24,7 +24,11 @@
>> unsigned long global_dbcr0[NR_CPUS];
>> #endif
>> 
>> +#ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER
>> +typedef long (*syscall_fn)(struct pt_regs *);
>> +#else
>> typedef long (*syscall_fn)(long, long, long, long, long, long);
>> +#endif
>> 
>> #ifdef CONFIG_PPC_BOOK3S_64
>> DEFINE_STATIC_KEY_FALSE(interrupt_exit_not_reentrant);
>> @@ -74,15 +78,13 @@ static notrace __always_inline bool prep_irq_for_enabled_exit(bool restartable)
>> }
>> 
>> /* Has to run notrace because it is entered not completely "reconciled" */
>> -notrace long system_call_exception(long r3, long r4, long r5,
>> - long r6, long r7, long r8,
>> - unsigned long r0, struct pt_regs *regs)
>> +notrace long system_call_exception(unsigned long r0, struct pt_regs *regs)
>> {
>> syscall_fn f;
>> 
>> kuap_lock();
>> 
>> - regs->orig_gpr3 = r3;
>> + regs->orig_gpr3 = regs->gpr[3];
>> 
>> if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
>> BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED);
>> @@ -196,12 +198,6 @@ notrace long system_call_exception(long r3, long r4, long r5,
>> r0 = do_syscall_trace_enter(regs);
>> if (unlikely(r0 >= NR_syscalls))
>> return regs->gpr[3];
>> - r3 = regs->gpr[3];
>> - r4 = regs->gpr[4];
>> - r5 = regs->gpr[5];
>> - r6 = regs->gpr[6];
>> - r7 = regs->gpr[7];
>> - r8 = regs->gpr[8];
>> 
>> } else if (unlikely(r0 >= NR_syscalls)) {
>> if (unlikely(trap_is_unsupported_scv(regs))) {
>> @@ -218,18 +214,23 @@ notrace long system_call_exception(long r3, long r4, long r5,
>> if (unlikely(is_compat_task())) {
>> f = (void *)compat_sys_call_table[r0];
>> 
>> - r3 &= 0x00000000ffffffffULL;
>> - r4 &= 0x00000000ffffffffULL;
>> - r5 &= 0x00000000ffffffffULL;
>> - r6 &= 0x00000000ffffffffULL;
>> - r7 &= 0x00000000ffffffffULL;
>> - r8 &= 0x00000000ffffffffULL;
>> + regs->gpr[3] &= 0x00000000ffffffffULL;
>> + regs->gpr[4] &= 0x00000000ffffffffULL;
>> + regs->gpr[5] &= 0x00000000ffffffffULL;
>> + regs->gpr[6] &= 0x00000000ffffffffULL;
>> + regs->gpr[7] &= 0x00000000ffffffffULL;
>> + regs->gpr[8] &= 0x00000000ffffffffULL;
>> 
>> } else {
>> f = (void *)sys_call_table[r0];
>> }
>> 
>> - return f(r3, r4, r5, r6, r7, r8);
>> + #ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER
>> + return f(regs);
>> + #else
>> + return f(regs->gpr[3], regs->gpr[4], regs->gpr[5],
>> + regs->gpr[6], regs->gpr[7], regs->gpr[8]);
>> + #endif
>> }
>> 
>> static notrace void booke_load_dbcr0(void)
>> diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S
>> index 7bab2d7de372..b11c2bd84827 100644
>> --- a/arch/powerpc/kernel/interrupt_64.S
>> +++ b/arch/powerpc/kernel/interrupt_64.S
>> @@ -91,9 +91,9 @@ _ASM_NOKPROBE_SYMBOL(system_call_vectored_\name)
>> li r11,\trapnr
>> std r11,_TRAP(r1)
>> std r12,_CCR(r1)
>> - addi r10,r1,STACK_FRAME_OVERHEAD
>> + addi r4,r1,STACK_FRAME_OVERHEAD
>> ld r11,exception_marker@toc(r2)
>> - std r11,-16(r10) /* "regshere" marker */
>> + std r11,-16(r4) /* "regshere" marker */
>> 
>> BEGIN_FTR_SECTION
>> HMT_MEDIUM
>> @@ -108,8 +108,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
>> * but this is the best we can do.
>> */
>> 
>> - /* Calling convention has r9 = orig r0, r10 = regs */
>> - mr r9,r0
>> + /* Calling convention has r3 = orig r0, r4 = regs */
>> + mr r3,r0
>> bl system_call_exception
>> 
>> .Lsyscall_vectored_\name\()_exit:
>> @@ -285,9 +285,9 @@ END_BTB_FLUSH_SECTION
>> std r10,_LINK(r1)
>> std r11,_TRAP(r1)
>> std r12,_CCR(r1)
>> - addi r10,r1,STACK_FRAME_OVERHEAD
>> + addi r4,r1,STACK_FRAME_OVERHEAD
>> ld r11,exception_marker@toc(r2)
>> - std r11,-16(r10) /* "regshere" marker */
>> + std r11,-16(r4) /* "regshere" marker */
>> 
>> #ifdef CONFIG_PPC_BOOK3S
>> li r11,1
>> @@ -308,8 +308,8 @@ END_BTB_FLUSH_SECTION
>> wrteei 1
>> #endif
>> 
>> - /* Calling convention has r9 = orig r0, r10 = regs */
>> - mr r9,r0
>> + /* Calling convention has r3 = orig r0, r4 = regs */
>> + mr r3,r0
>> bl system_call_exception
>> 
>> .Lsyscall_exit:
>> @@ -353,16 +353,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
>> cmpdi r3,0
>> bne .Lsyscall_restore_regs
>> /* Zero volatile regs that may contain sensitive kernel data */
>> - li r0,0
>> - li r4,0
>> - li r5,0
>> - li r6,0
>> - li r7,0
>> - li r8,0
>> - li r9,0
>> - li r10,0
>> - li r11,0
>> - li r12,0
>> + ZERO_GPR(0)
>> + ZERO_GPRS(4, 12)
>> mtctr r0
>> mtspr SPRN_XER,r0
>> .Lsyscall_restore_regs_cont:
>> @@ -388,7 +380,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
>> REST_NVGPRS(r1)
>> mtctr r3
>> mtspr SPRN_XER,r4
>> - ld r0,GPR0(r1)
>> + REST_GPR(0, r1)
>> REST_GPRS(4, 12, r1)
>> b .Lsyscall_restore_regs_cont
>> .Lsyscall_rst_end:
>> diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c
>> index 16ff0399a257..aa7869dbef36 100644
>> --- a/arch/powerpc/kernel/sys_ppc32.c
>> +++ b/arch/powerpc/kernel/sys_ppc32.c
>> @@ -48,9 +48,10 @@
>> #include <asm/syscalls.h>
>> #include <asm/switch_to.h>
>> 
>> -unsigned long compat_sys_mmap2(unsigned long addr, size_t len,
>> - unsigned long prot, unsigned long flags,
>> - unsigned long fd, unsigned long pgoff)
>> +PPC_SYSCALL_DEFINE(6, unsigned long, compat_sys_mmap2,
>> + unsigned long, addr, size_t, len,
>> + unsigned long, prot, unsigned long, flags,
>> + unsigned long, fd, unsigned long, pgoff)
>> {
>> /* This should remain 12 even if PAGE_SIZE changes */
>> return sys_mmap(addr, len, prot, flags, fd, pgoff << 12);
>> @@ -68,52 +69,63 @@ unsigned long compat_sys_mmap2(unsigned long addr, size_t len,
>> #define merge_64(high, low) ((u64)high << 32) | low
>> #endif
>> 
>> -compat_ssize_t compat_sys_pread64(unsigned int fd, char __user *ubuf, compat_size_t count,
>> - u32 reg6, u32 pos1, u32 pos2)
>> +PPC_SYSCALL_DEFINE(6, compat_ssize_t, compat_sys_pread64,
>> + unsigned int, fd,
>> + char __user *, ubuf, compat_size_t, count,
>> + u32, reg6, u32, pos1, u32, pos2)
>> {
>> return ksys_pread64(fd, ubuf, count, merge_64(pos1, pos2));
>> }
>> 
>> -compat_ssize_t compat_sys_pwrite64(unsigned int fd, const char __user *ubuf, compat_size_t count,
>> - u32 reg6, u32 pos1, u32 pos2)
>> +PPC_SYSCALL_DEFINE(6, compat_ssize_t, compat_sys_pwrite64,
>> + unsigned int, fd,
>> + const char __user *, ubuf, compat_size_t, count,
>> + u32, reg6, u32, pos1, u32, pos2)
>> {
>> return ksys_pwrite64(fd, ubuf, count, merge_64(pos1, pos2));
>> }
>> 
>> -compat_ssize_t compat_sys_readahead(int fd, u32 r4, u32 offset1, u32 offset2, u32 count)
>> +PPC_SYSCALL_DEFINE(5, compat_ssize_t, compat_sys_readahead,
>> + int, fd, u32, r4,
>> + u32, offset1, u32, offset2, u32, count)
>> {
>> return ksys_readahead(fd, merge_64(offset1, offset2), count);
>> }
>> 
>> -asmlinkage int compat_sys_truncate64(const char __user * path, u32 reg4,
>> - unsigned long len1, unsigned long len2)
>> +PPC_SYSCALL_DEFINE(4, int, compat_sys_truncate64,
>> + const char __user *, path, u32, reg4,
>> + unsigned long, len1, unsigned long, len2)
>> {
>> return ksys_truncate(path, merge_64(len1, len2));
>> }
>> 
>> -asmlinkage long compat_sys_fallocate(int fd, int mode, u32 offset1, u32 offset2,
>> - u32 len1, u32 len2)
>> +PPC_SYSCALL_DEFINE(6, long, compat_sys_fallocate,
>> + int, fd, int, mode, u32, offset1, u32, offset2,
>> + u32, len1, u32, len2)
>> {
>> return ksys_fallocate(fd, mode, ((loff_t)offset1 << 32) | offset2,
>> merge_64(len1, len2));
>> }
>> 
>> -asmlinkage int compat_sys_ftruncate64(unsigned int fd, u32 reg4, unsigned long len1,
>> - unsigned long len2)
>> +PPC_SYSCALL_DEFINE(4, int, compat_sys_ftruncate64,
>> + unsigned int, fd, u32, reg4, unsigned long, len1,
>> + unsigned long, len2)
>> {
>> return ksys_ftruncate(fd, merge_64(len1, len2));
>> }
>> 
>> -long ppc32_fadvise64(int fd, u32 unused, u32 offset1, u32 offset2,
>> - size_t len, int advice)
>> +PPC_SYSCALL_DEFINE(6, long, ppc32_fadvise64,
>> + int, fd, u32, unused, u32, offset1, u32, offset2,
>> + size_t, len, int, advice)
>> {
>> return ksys_fadvise64_64(fd, merge_64(offset1, offset2), len,
>> advice);
>> }
>> 
>> -asmlinkage long compat_sys_sync_file_range2(int fd, unsigned int flags,
>> - unsigned offset1, unsigned offset2,
>> - unsigned nbytes1, unsigned nbytes2)
>> +PPC_SYSCALL_DEFINE(6, long, compat_sys_sync_file_range2, int, fd,
>> + unsigned int, flags,
>> + unsigned int, offset1, unsigned int, offset2,
>> + unsigned int, nbytes1, unsigned int, nbytes2)
>> {
>> loff_t offset = merge_64(offset1, offset2);
>> loff_t nbytes = merge_64(nbytes1, nbytes2);
>> diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c
>> index c4f5b4ce926f..c64cdb5c4435 100644
>> --- a/arch/powerpc/kernel/syscalls.c
>> +++ b/arch/powerpc/kernel/syscalls.c
>> @@ -64,14 +64,20 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, size_t, len,
>> }
>> 
>> #ifdef CONFIG_PPC32
>> +asmlinkage long sys_old_select(struct sel_arg_struct __user *arg);
>> +asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp,
>> + fd_set __user *exp,
>> + struct __kernel_old_timeval __user *tvp);
>> +
>> /*
>> * Due to some executables calling the wrong select we sometimes
>> * get wrong args. This determines how the args are being passed
>> * (a single ptr to them all args passed) then calls
>> * sys_select() with the appropriate args. -- Cort
>> */
>> -int
>> -ppc_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct __kernel_old_timeval __user *tvp)
>> +PPC_SYSCALL_DEFINE(5, long, ppc_select,
>> + int, n, fd_set __user *, inp, fd_set __user *, outp,
>> + fd_set __user *, exp, struct __kernel_old_timeval __user *, tvp)
>> {
>> if ( (unsigned long)n >= 4096 )
>> return sys_old_select((void __user *)n);
>> @@ -81,7 +87,9 @@ ppc_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, s
>> #endif
>> 
>> #ifdef CONFIG_PPC64
>> -long ppc64_personality(unsigned long personality)
>> +asmlinkage long sys_personality(unsigned int personality);
>> +
>> +PPC_SYSCALL_DEFINE(1, long, ppc64_personality, unsigned long, personality)
>> {
>> long ret;
>> 
>> @@ -95,8 +103,9 @@ long ppc64_personality(unsigned long personality)
>> }
>> #endif
>> 
>> -long ppc_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low,
>> - u32 len_high, u32 len_low)
>> +PPC_SYSCALL_DEFINE(6, long, ppc_fadvise64_64,
>> + int, fd, int, advice, u32, offset_high, u32, offset_low,
>> + u32, len_high, u32, len_low)
>> {
>> return ksys_fadvise64_64(fd, (u64)offset_high << 32 | offset_low,
>> (u64)len_high << 32 | len_low, advice);
>> diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.S
>> index cb3358886203..70cc7120fce2 100644
>> --- a/arch/powerpc/kernel/systbl.S
>> +++ b/arch/powerpc/kernel/systbl.S
>> @@ -16,11 +16,32 @@
>> 
>> #ifdef CONFIG_PPC64
>> .p2align 3
>> +#endif
>> +
>> +/* Where the syscall wrapper macro is used, handler functions expect parameters
>> + to reside on the stack. We still emit symbols with register-mapped
>> + parameters, which are distinguished by a prefix. */
>> +
>> +#ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER
>> +
>> +#define __powerpc_sys_ni_syscall sys_ni_syscall
>> +
>> +#ifdef CONFIG_PPC64
>> +#define __SYSCALL(nr, entry) .8byte __powerpc_##entry
>> +#else
>> +#define __SYSCALL(nr, entry) .long __powerpc_##entry
>> +#endif
>> +
>> +#else
>> +
>> +#ifdef CONFIG_PPC64
>> #define __SYSCALL(nr, entry) .8byte entry
>> #else
>> #define __SYSCALL(nr, entry) .long entry
>> #endif
>> 
>> +#endif /* CONFIG_ARCH_HAS_SYSCALL_WRAPPER */
>> +
>> #define __SYSCALL_WITH_COMPAT(nr, native, compat) __SYSCALL(nr, native)
>> .globl sys_call_table
>> sys_call_table:
>> diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
>> index 717f2c9a7573..dcf57c07dbad 100644
>> --- a/arch/powerpc/kernel/vdso.c
>> +++ b/arch/powerpc/kernel/vdso.c
>> @@ -40,6 +40,8 @@
>> extern char vdso32_start, vdso32_end;
>> extern char vdso64_start, vdso64_end;
>> 
>> +asmlinkage long sys_ni_syscall(void);
>> +
>> /*
>> * The vdso data page (aka. systemcfg for old ppc64 fans) is here.
>> * Once the early boot kernel code no longer needs to muck around
>> --
>> 2.34.1


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 2/6] powerpc: Provide syscall wrapper
  2022-06-03  3:24     ` Rohan McLure
@ 2022-06-03  7:09       ` Andrew Donnellan
  2022-06-03  8:39         ` Christophe Leroy
  0 siblings, 1 reply; 31+ messages in thread
From: Andrew Donnellan @ 2022-06-03  7:09 UTC (permalink / raw)
  To: Rohan McLure, linuxppc-dev; +Cc: npiggin

On Fri, 2022-06-03 at 13:24 +1000, Rohan McLure wrote:
> The implementation of ppc_personality can be immediately reworked to
> call ksys_personality, but I can’t do the same for sys_old_select for
> example, which is required to implement ppc_select. As such we emit
> both 

For ppc_select, I suggest we resurrect
https://lore.kernel.org/lkml/5811950d-ef14-d416-35e6-d694ef920a7d@csgroup.eu/T/#u
and just get rid of the hack.


Andrew

-- 
Andrew Donnellan    OzLabs, ADL Canberra
ajd@linux.ibm.com   IBM Australia Limited


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 2/6] powerpc: Provide syscall wrapper
  2022-06-03  7:09       ` Andrew Donnellan
@ 2022-06-03  8:39         ` Christophe Leroy
  2022-06-14 13:57           ` Andrew Donnellan
  0 siblings, 1 reply; 31+ messages in thread
From: Christophe Leroy @ 2022-06-03  8:39 UTC (permalink / raw)
  To: Andrew Donnellan, Rohan McLure, linuxppc-dev; +Cc: npiggin



Le 03/06/2022 à 09:09, Andrew Donnellan a écrit :
> On Fri, 2022-06-03 at 13:24 +1000, Rohan McLure wrote:
>> The implementation of ppc_personality can be immediately reworked to
>> call ksys_personality, but I can’t do the same for sys_old_select for
>> example, which is required to implement ppc_select. As such we emit
>> both
> 
> For ppc_select, I suggest we resurrect
> https://lore.kernel.org/lkml/5811950d-ef14-d416-35e6-d694ef920a7d@csgroup.eu/T/#u
> and just get rid of the hack.
> 

Not sure I understand, what would you like to resurrect ? You want to 
resurrect the discussion, or revert commit fd69d544b0e7 
("powerpc/syscalls: Use sys_old_select() in ppc_select()") ?

We are talking about a 25 years old ack ....

However calling sys_old_select() from ppc_select() is just a tail call 
so I can't see why it would require duplicating the pt_regs.

c0004ddc <ppc_select>:
c0004ddc:       28 03 0f ff     cmplwi  r3,4095
c0004de0:       41 81 00 08     bgt     c0004de8 <ppc_select+0xc>
c0004de4:       48 15 b9 98     b       c016077c <__se_sys_select>
c0004de8:       48 15 bc c0     b       c0160aa8 <__se_sys_old_select>

Christophe

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 2/6] powerpc: Provide syscall wrapper
  2022-06-01  5:48 ` [PATCH 2/6] powerpc: Provide syscall wrapper Rohan McLure
                     ` (4 preceding siblings ...)
  2022-06-01 14:33   ` Christophe Leroy
@ 2022-06-03  9:04   ` Arnd Bergmann
  2022-06-15  1:47     ` Rohan McLure
  5 siblings, 1 reply; 31+ messages in thread
From: Arnd Bergmann @ 2022-06-03  9:04 UTC (permalink / raw)
  To: Rohan McLure; +Cc: linuxppc-dev, Andrew Donnellan, Nicholas Piggin

On Wed, Jun 1, 2022 at 7:48 AM Rohan McLure <rmclure@linux.ibm.com> wrote:
>
> Syscall wrapper implemented as per s390, x86, arm64, providing the
> option for gprs to be cleared on entry to the kernel, reducing caller
> influence influence on speculation within syscall routine. The wrapper
> is a macro that emits syscall handler implementations with parameters
> passed by stack pointer.
>
> For platforms supporting this syscall wrapper, emit symbols with usual
> in-register parameters (`sys...`) to support calls to syscall handlers
> from within the kernel.

Nice work!

> Syscalls are wrapped on all platforms except Cell processor. SPUs require
> access syscall prototypes which are omitted with ARCH_HAS_SYSCALL_WRAPPER
> enabled.

Right, I think it's ok to leave out the SPU side. In the long run, I
would like to
go back to requiring the prototypes for everything on all architectures, to
enforce type checking, but that's a separate piece of work.

> +/*
> + * For PowerPC specific syscall implementations, wrapper takes exact name and
> + * return type for a given function.
> + */
> +
> +#ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER
> +#define PPC_SYSCALL_DEFINE(x, type, name, ...)                                 \
> +       asmlinkage type __powerpc_##name(const struct pt_regs *regs);           \
> +       ALLOW_ERROR_INJECTION(__powerpc_##name, ERRNO);                         \
> +       type sys_##name(__MAP(x,__SC_DECL,__VA_ARGS__));                        \
> +       static type __se_##name(__MAP(x,__SC_LONG,__VA_ARGS__));                \
> +       static inline type __do_##name(__MAP(x,__SC_DECL,__VA_ARGS__));         \

What is the benefit of having a separate set of macros for this? I think that
adds more complexity than it saves in the end.

> @@ -68,52 +69,63 @@ unsigned long compat_sys_mmap2(unsigned long addr, size_t len,
>  #define merge_64(high, low) ((u64)high << 32) | low
>  #endif
>
> -compat_ssize_t compat_sys_pread64(unsigned int fd, char __user *ubuf, compat_size_t count,
> -                            u32 reg6, u32 pos1, u32 pos2)
> +PPC_SYSCALL_DEFINE(6, compat_ssize_t, compat_sys_pread64,
> +                  unsigned int, fd,
> +                  char __user *, ubuf, compat_size_t, count,
> +                  u32, reg6, u32, pos1, u32, pos2)
>  {
>         return ksys_pread64(fd, ubuf, count, merge_64(pos1, pos2));
>  }

We now have generalized versions of most of these system calls, as of 5.19-rc1
with the addition of the riscv compat support. I think it would be
best to try removing
the powerpc private versions wherever possible and use the common version,
modifying it further where necessary.

If this doesn't work for some of the syscalls, can you use the
COMPAT_SYSCALL_DEFINE for those in place of PPC_SYSCALL_DEFINE?

     Arnd

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 2/6] powerpc: Provide syscall wrapper
  2022-06-01  8:29   ` Christophe Leroy
@ 2022-06-09 13:06     ` Christophe Leroy
  2022-06-16  5:42       ` Rohan McLure
  0 siblings, 1 reply; 31+ messages in thread
From: Christophe Leroy @ 2022-06-09 13:06 UTC (permalink / raw)
  To: Rohan McLure, linuxppc-dev; +Cc: Andrew Donnellan, npiggin



Le 01/06/2022 à 10:29, Christophe Leroy a écrit :
> 
> 
> Le 01/06/2022 à 07:48, Rohan McLure a écrit :
>> [Vous ne recevez pas souvent de courriers de la part de 
>> rmclure@linux.ibm.com. Découvrez pourquoi cela peut être important à 
>> l'adresse https://aka.ms/LearnAboutSenderIdentification.]
>>
>> Syscall wrapper implemented as per s390, x86, arm64, providing the
>> option for gprs to be cleared on entry to the kernel, reducing caller
>> influence influence on speculation within syscall routine. The wrapper
>> is a macro that emits syscall handler implementations with parameters
>> passed by stack pointer.
> 
> Passing parameters by stack is going to be sub-optimal. Did you make any 
> measurement of the implied performance degradation ? We usually use the 
> null_syscall selftest for that everytime we touch syscall entries/exits.

I did a test with null_syscall on an 8xx. Surprisingly I get more than 
20% improvement with your series.

Looking at the generated code in more details, we see that 
system_call_exception() is lighter as now no stack frame is needed, the 
compiler has enough registers available.

Before the patch:

c000c9ec <system_call_exception>:
c000c9ec:	94 21 ff f0 	stwu    r1,-16(r1)
c000c9f0:	93 e1 00 0c 	stw     r31,12(r1)
c000c9f4:	7d 5f 53 78 	mr      r31,r10
c000c9f8:	81 4a 00 84 	lwz     r10,132(r10)
c000c9fc:	90 7f 00 88 	stw     r3,136(r31)
c000ca00:	71 4b 00 02 	andi.   r11,r10,2
c000ca04:	41 82 00 4c 	beq     c000ca50 <system_call_exception+0x64>
c000ca08:	71 4b 40 00 	andi.   r11,r10,16384
c000ca0c:	41 82 00 50 	beq     c000ca5c <system_call_exception+0x70>
c000ca10:	71 4a 80 00 	andi.   r10,r10,32768
c000ca14:	41 82 00 54 	beq     c000ca68 <system_call_exception+0x7c>
c000ca18:	7c 50 13 a6 	mtspr   80,r2
c000ca1c:	81 42 00 4c 	lwz     r10,76(r2)
c000ca20:	71 4a 84 91 	andi.   r10,r10,33937
c000ca24:	40 82 00 50 	bne     c000ca74 <system_call_exception+0x88>
c000ca28:	28 09 01 c2 	cmplwi  r9,450
c000ca2c:	41 81 00 88 	bgt     c000cab4 <system_call_exception+0xc8>
c000ca30:	3d 40 c0 6f 	lis     r10,-16273
c000ca34:	55 29 10 3a 	rlwinm  r9,r9,2,0,29
c000ca38:	39 4a c1 c5 	addi    r10,r10,-15931
c000ca3c:	7d 2a 48 2e 	lwzx    r9,r10,r9
c000ca40:	83 e1 00 0c 	lwz     r31,12(r1)
c000ca44:	7d 29 03 a6 	mtctr   r9
c000ca48:	38 21 00 10 	addi    r1,r1,16
c000ca4c:	4e 80 04 20 	bctr
...

After the patch:
c000cc94 <system_call_exception>:
c000cc94:	81 24 00 84 	lwz     r9,132(r4)
c000cc98:	81 44 00 0c 	lwz     r10,12(r4)
c000cc9c:	71 28 00 02 	andi.   r8,r9,2
c000cca0:	91 44 00 88 	stw     r10,136(r4)
c000cca4:	41 82 00 48 	beq     c000ccec <system_call_exception+0x58>
c000cca8:	71 2a 40 00 	andi.   r10,r9,16384
c000ccac:	41 82 00 44 	beq     c000ccf0 <system_call_exception+0x5c>
c000ccb0:	71 29 80 00 	andi.   r9,r9,32768
c000ccb4:	41 82 00 40 	beq     c000ccf4 <system_call_exception+0x60>
c000ccb8:	7c 50 13 a6 	mtspr   80,r2
c000ccbc:	81 22 00 4c 	lwz     r9,76(r2)
c000ccc0:	71 29 84 91 	andi.   r9,r9,33937
c000ccc4:	40 82 00 34 	bne     c000ccf8 <system_call_exception+0x64>
c000ccc8:	28 03 01 c2 	cmplwi  r3,450
c000cccc:	41 81 00 78 	bgt     c000cd44 <system_call_exception+0xb0>
c000ccd0:	3d 20 c0 70 	lis     r9,-16272
c000ccd4:	54 63 10 3a 	rlwinm  r3,r3,2,0,29
c000ccd8:	39 29 81 c5 	addi    r9,r9,-32315
c000ccdc:	7d 29 18 2e 	lwzx    r9,r9,r3
c000cce0:	7c 83 23 78 	mr      r3,r4
c000cce4:	7d 29 03 a6 	mtctr   r9
c000cce8:	4e 80 04 20 	bctr
...



> 
> Why going via stack ? The main advantage of a RISC processor like 
> powerpc is that, unlike x86, there are enough registers to avoid going 
> through memory. RISC processors are optimised with three operands 
> operations and many registers, and usually have slow memory in return.

Well, thinking about it once more. In fact registers are saved to the 
stack anyway. At the start of syscall functions they are likely to still 
be hot in the cache, so reading them back is just a few cycles. And it 
eventually provide the compiler the opportunity to organise stuff better.


> 
>>
>> For platforms supporting this syscall wrapper, emit symbols with usual
>> in-register parameters (`sys...`) to support calls to syscall handlers
>> from within the kernel.
>>
>> Syscalls are wrapped on all platforms except Cell processor. SPUs require
>> access syscall prototypes which are omitted with ARCH_HAS_SYSCALL_WRAPPER
>> enabled.
> This commit message isn't very clear, please describe in more details 
> what is done, how and why.
> 


Christophe

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 1/6] powerpc: Add ZERO_GPRS macros for register clears
  2022-06-01 16:00 ` Segher Boessenkool
@ 2022-06-10  3:32   ` Rohan McLure
  2022-06-10 14:05     ` Segher Boessenkool
  2022-06-11  8:42     ` Christophe Leroy
  0 siblings, 2 replies; 31+ messages in thread
From: Rohan McLure @ 2022-06-10  3:32 UTC (permalink / raw)
  To: Segher Boessenkool, christophe.leroy; +Cc: linuxppc-dev, npiggin

> On 2 Jun 2022, at 2:00 am, Segher Boessenkool <segher@kernel.crashing.org> wrote:
> 
> Hi!
> 
> On Wed, Jun 01, 2022 at 03:48:45PM +1000, Rohan McLure wrote:
>> +.macro BINOP_REGS op, rhs, start, end
>> +	.Lreg=\start
>> +	.rept (\end - \start + 1)
>> +	\op .Lreg, \rhs
>> +	.Lreg=.Lreg+1
>> +	.endr
>> +.endm
> 
> This is for unary operations, not binary operations (there is only one
> item on the RHS).  You can in principle put a string "a,b" in the rhs
> parameter, but in practice you need a or b to depend on the loop counter
> as well, so even such trickiness won't do.  Make the naming less
> confusing, maybe?  Or don't have an unused extra level of abstraction in
> the first place :-)
> 
> 
> Segher

Thanks Segher, Christophe for reviewing this.

Yep I see how having a macro to perform rX = rX <> Y for arbitrary infix <> and operand
is unlikely to find much use outside of ZERO_GPRS. As I resubmit this patch series I
will rename it to ZERO_REGS or similar to be more explicitly coupled to ZERO_GPRS.

Something like this I was thinking:

.macro ZERO_REGS start, end
	.Lreg=\start
	.rept (\end - \start + 1)
	li	.Lreg, 0
	.Lreg=.Lreg+1
	.endr
.endm

Thanks,
Rohan

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 1/6] powerpc: Add ZERO_GPRS macros for register clears
  2022-06-10  3:32   ` Rohan McLure
@ 2022-06-10 14:05     ` Segher Boessenkool
  2022-06-11  8:42     ` Christophe Leroy
  1 sibling, 0 replies; 31+ messages in thread
From: Segher Boessenkool @ 2022-06-10 14:05 UTC (permalink / raw)
  To: Rohan McLure; +Cc: linuxppc-dev, npiggin

Hi!

On Fri, Jun 10, 2022 at 01:32:58PM +1000, Rohan McLure wrote:
> > On 2 Jun 2022, at 2:00 am, Segher Boessenkool <segher@kernel.crashing.org> wrote:
> > This is for unary operations, not binary operations (there is only one
> > item on the RHS).  You can in principle put a string "a,b" in the rhs
> > parameter, but in practice you need a or b to depend on the loop counter
> > as well, so even such trickiness won't do.  Make the naming less
> > confusing, maybe?  Or don't have an unused extra level of abstraction in
> > the first place :-)

> Yep I see how having a macro to perform rX = rX <> Y for arbitrary infix <> and operand
> is unlikely to find much use outside of ZERO_GPRS.

Aha.  On PowerPC (like on most RISC-like architectures) all the normal
instructions are rD := rA OP rB, not rD := rD OP rA.  It looks like
that is our disconnect :-)


Segher

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 1/6] powerpc: Add ZERO_GPRS macros for register clears
  2022-06-10  3:32   ` Rohan McLure
  2022-06-10 14:05     ` Segher Boessenkool
@ 2022-06-11  8:42     ` Christophe Leroy
  2022-06-13 18:48       ` Segher Boessenkool
  1 sibling, 1 reply; 31+ messages in thread
From: Christophe Leroy @ 2022-06-11  8:42 UTC (permalink / raw)
  To: Rohan McLure, Segher Boessenkool; +Cc: linuxppc-dev, npiggin



Le 10/06/2022 à 05:32, Rohan McLure a écrit :
>> On 2 Jun 2022, at 2:00 am, Segher Boessenkool <segher@kernel.crashing.org> wrote:
>>
>> Hi!
>>
>> On Wed, Jun 01, 2022 at 03:48:45PM +1000, Rohan McLure wrote:
>>> +.macro BINOP_REGS op, rhs, start, end
>>> +	.Lreg=\start
>>> +	.rept (\end - \start + 1)
>>> +	\op .Lreg, \rhs
>>> +	.Lreg=.Lreg+1
>>> +	.endr
>>> +.endm
>>
>> This is for unary operations, not binary operations (there is only one
>> item on the RHS).  You can in principle put a string "a,b" in the rhs
>> parameter, but in practice you need a or b to depend on the loop counter
>> as well, so even such trickiness won't do.  Make the naming less
>> confusing, maybe?  Or don't have an unused extra level of abstraction in
>> the first place :-)
>>
>>
>> Segher
> 
> Thanks Segher, Christophe for reviewing this.
> 
> Yep I see how having a macro to perform rX = rX <> Y for arbitrary infix <> and operand
> is unlikely to find much use outside of ZERO_GPRS. As I resubmit this patch series I
> will rename it to ZERO_REGS or similar to be more explicitly coupled to ZERO_GPRS.
> 
> Something like this I was thinking:
> 
> .macro ZERO_REGS start, end
> 	.Lreg=\start
> 	.rept (\end - \start + 1)
> 	li	.Lreg, 0
> 	.Lreg=.Lreg+1
> 	.endr
> .endm
> 

I'd have a preference for using a verb, for instance ZEROISE_REGS or 
CLEAR_REGS

Christophe

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 1/6] powerpc: Add ZERO_GPRS macros for register clears
  2022-06-11  8:42     ` Christophe Leroy
@ 2022-06-13 18:48       ` Segher Boessenkool
  2022-06-14  4:31         ` Michael Ellerman
  0 siblings, 1 reply; 31+ messages in thread
From: Segher Boessenkool @ 2022-06-13 18:48 UTC (permalink / raw)
  To: Christophe Leroy; +Cc: Rohan McLure, linuxppc-dev, npiggin

On Sat, Jun 11, 2022 at 08:42:27AM +0000, Christophe Leroy wrote:
> Le 10/06/2022 à 05:32, Rohan McLure a écrit :
> > .macro ZERO_REGS start, end
> > 	.Lreg=\start
> > 	.rept (\end - \start + 1)
> > 	li	.Lreg, 0
> > 	.Lreg=.Lreg+1
> > 	.endr
> > .endm
> 
> I'd have a preference for using a verb, for instance ZEROISE_REGS or 
> CLEAR_REGS

"Zero" is a verb as well (as well as a noun and an adjective) :-)


Segher

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 1/6] powerpc: Add ZERO_GPRS macros for register clears
  2022-06-13 18:48       ` Segher Boessenkool
@ 2022-06-14  4:31         ` Michael Ellerman
  2022-06-14 11:43           ` Segher Boessenkool
  0 siblings, 1 reply; 31+ messages in thread
From: Michael Ellerman @ 2022-06-14  4:31 UTC (permalink / raw)
  To: Segher Boessenkool, Christophe Leroy; +Cc: Rohan McLure, linuxppc-dev, npiggin

Segher Boessenkool <segher@kernel.crashing.org> writes:
> On Sat, Jun 11, 2022 at 08:42:27AM +0000, Christophe Leroy wrote:
>> Le 10/06/2022 à 05:32, Rohan McLure a écrit :
>> > .macro ZERO_REGS start, end
>> > 	.Lreg=\start
>> > 	.rept (\end - \start + 1)
>> > 	li	.Lreg, 0
>> > 	.Lreg=.Lreg+1
>> > 	.endr
>> > .endm
>> 
>> I'd have a preference for using a verb, for instance ZEROISE_REGS or 
>> CLEAR_REGS
>
> "Zero" is a verb as well (as well as a noun and an adjective) :-)

And "clear" is also a verb and an adjective, though helpfully the noun
is "clearing" :D

We could use "nullify", that has some existing usage in the kernel,
although I don't really mind, "zeroise" sounds kind of cool :)

cheers

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 1/6] powerpc: Add ZERO_GPRS macros for register clears
  2022-06-14  4:31         ` Michael Ellerman
@ 2022-06-14 11:43           ` Segher Boessenkool
  0 siblings, 0 replies; 31+ messages in thread
From: Segher Boessenkool @ 2022-06-14 11:43 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: Rohan McLure, linuxppc-dev, npiggin

On Tue, Jun 14, 2022 at 02:31:01PM +1000, Michael Ellerman wrote:
> Segher Boessenkool <segher@kernel.crashing.org> writes:
> > On Sat, Jun 11, 2022 at 08:42:27AM +0000, Christophe Leroy wrote:
> >> I'd have a preference for using a verb, for instance ZEROISE_REGS or 
> >> CLEAR_REGS
> >
> > "Zero" is a verb as well (as well as a noun and an adjective) :-)
> 
> And "clear" is also a verb and an adjective, though helpfully the noun
> is "clearing" :D

Yeah.  I don't like "clear" here because it isn't as clear what it
actually will do.  The purpose here is to actually makes those registers
hold the number zero, it isn't just to make it harmless some way.  Which
btw is the context in which "zeroisation" is normally used: in crypto
and other security stuff.

But "zero_regs" can be confusing in other ways, like, if it isn't clear
to the reader it is a verb here.

> We could use "nullify", that has some existing usage in the kernel,
> although I don't really mind, "zeroise" sounds kind of cool :)

That is a benefit yes :-)  And it won't be confusing what it does.


Segher

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 2/6] powerpc: Provide syscall wrapper
  2022-06-03  8:39         ` Christophe Leroy
@ 2022-06-14 13:57           ` Andrew Donnellan
  0 siblings, 0 replies; 31+ messages in thread
From: Andrew Donnellan @ 2022-06-14 13:57 UTC (permalink / raw)
  To: Christophe Leroy, Rohan McLure, linuxppc-dev; +Cc: npiggin

On Fri, 2022-06-03 at 08:39 +0000, Christophe Leroy wrote:
> 
> 
> Le 03/06/2022 à 09:09, Andrew Donnellan a écrit :
> > On Fri, 2022-06-03 at 13:24 +1000, Rohan McLure wrote:
> > > The implementation of ppc_personality can be immediately reworked
> > > to
> > > call ksys_personality, but I can’t do the same for sys_old_select
> > > for
> > > example, which is required to implement ppc_select. As such we
> > > emit
> > > both
> > 
> > For ppc_select, I suggest we resurrect
> >  
> > https://lore.kernel.org/lkml/5811950d-ef14-d416-35e6-d694ef920a7d@csgroup.eu/T/#u
> > and just get rid of the hack.
> > 
> 
> Not sure I understand, what would you like to resurrect ? You want to
> resurrect the discussion, or revert commit fd69d544b0e7 
> ("powerpc/syscalls: Use sys_old_select() in ppc_select()") ?

We should get rid of ppc_select(), if we can. If Arnd's history is
correct, there's little reason to keep it around and we should just get
rid of it.


-- 
Andrew Donnellan    OzLabs, ADL Canberra
ajd@linux.ibm.com   IBM Australia Limited


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 2/6] powerpc: Provide syscall wrapper
  2022-06-03  9:04   ` Arnd Bergmann
@ 2022-06-15  1:47     ` Rohan McLure
  2022-06-15 10:13       ` Arnd Bergmann
  0 siblings, 1 reply; 31+ messages in thread
From: Rohan McLure @ 2022-06-15  1:47 UTC (permalink / raw)
  To: Arnd Bergmann; +Cc: linuxppc-dev, Andrew Donnellan, Nicholas Piggin

> On 3 Jun 2022, at 7:04 pm, Arnd Bergmann <arnd@arndb.de> wrote:
> 
> On Wed, Jun 1, 2022 at 7:48 AM Rohan McLure <rmclure@linux.ibm.com> wrote:
>> 
>> Syscall wrapper implemented as per s390, x86, arm64, providing the
>> option for gprs to be cleared on entry to the kernel, reducing caller
>> influence influence on speculation within syscall routine. The wrapper
>> is a macro that emits syscall handler implementations with parameters
>> passed by stack pointer.
>> 
>> For platforms supporting this syscall wrapper, emit symbols with usual
>> in-register parameters (`sys...`) to support calls to syscall handlers
>> from within the kernel.
> 
> Nice work!
> 
>> Syscalls are wrapped on all platforms except Cell processor. SPUs require
>> access syscall prototypes which are omitted with ARCH_HAS_SYSCALL_WRAPPER
>> enabled.
> 
> Right, I think it's ok to leave out the SPU side. In the long run, I
> would like to
> go back to requiring the prototypes for everything on all architectures, to
> enforce type checking, but that's a separate piece of work.
> 
>> +/*
>> + * For PowerPC specific syscall implementations, wrapper takes exact name and
>> + * return type for a given function.
>> + */
>> +
>> +#ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER
>> +#define PPC_SYSCALL_DEFINE(x, type, name, ...)                                 \
>> +       asmlinkage type __powerpc_##name(const struct pt_regs *regs);           \
>> +       ALLOW_ERROR_INJECTION(__powerpc_##name, ERRNO);                         \
>> +       type sys_##name(__MAP(x,__SC_DECL,__VA_ARGS__));                        \
>> +       static type __se_##name(__MAP(x,__SC_LONG,__VA_ARGS__));                \
>> +       static inline type __do_##name(__MAP(x,__SC_DECL,__VA_ARGS__));         \
> 
> What is the benefit of having a separate set of macros for this? I think that
> adds more complexity than it saves in the end.
> 
>> @@ -68,52 +69,63 @@ unsigned long compat_sys_mmap2(unsigned long addr, size_t len,
>> #define merge_64(high, low) ((u64)high << 32) | low
>> #endif
>> 
>> -compat_ssize_t compat_sys_pread64(unsigned int fd, char __user *ubuf, compat_size_t count,
>> -                            u32 reg6, u32 pos1, u32 pos2)
>> +PPC_SYSCALL_DEFINE(6, compat_ssize_t, compat_sys_pread64,
>> +                  unsigned int, fd,
>> +                  char __user *, ubuf, compat_size_t, count,
>> +                  u32, reg6, u32, pos1, u32, pos2)
>> {
>>       return ksys_pread64(fd, ubuf, count, merge_64(pos1, pos2));
>> }
> 
> We now have generalized versions of most of these system calls, as of 5.19-rc1
> with the addition of the riscv compat support. I think it would be
> best to try removing
> the powerpc private versions wherever possible and use the common version,
> modifying it further where necessary.
> 
> If this doesn't work for some of the syscalls, can you use the
> COMPAT_SYSCALL_DEFINE for those in place of PPC_SYSCALL_DEFINE?
> 
>    Arnd

Hi Arnd,

Thanks for your comments. 

> What is the benefit of having a separate set of macros for this? I think that
> adds more complexity than it saves in the end.

I was unsure whether the exact return types needed to be respected for syscall
handlers or not. I realise that under the existing behaviour,
system_call_exception performs an indirect call, the return type of which is
interpreted as a long, so the return type should be irrelevant. On inspection
PPC_SYSCALL_DEFINE is readily replacable with COMPAT_SYSCALL_DEFINE as you
have suggested.

Before resubmitting this series, I will try for a patch series which modernises
syscall handlers in arch/powerpc, and inspect where powerpc private versions
are strictly necessary, using __ARCH_WANT_... wherever possible.

Rohan

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 2/6] powerpc: Provide syscall wrapper
  2022-06-15  1:47     ` Rohan McLure
@ 2022-06-15 10:13       ` Arnd Bergmann
  0 siblings, 0 replies; 31+ messages in thread
From: Arnd Bergmann @ 2022-06-15 10:13 UTC (permalink / raw)
  To: Rohan McLure
  Cc: linuxppc-dev, Andrew Donnellan, Arnd Bergmann, Nicholas Piggin

On Wed, Jun 15, 2022 at 3:47 AM Rohan McLure <rmclure@linux.ibm.com> wrote:
> > On 3 Jun 2022, at 7:04 pm, Arnd Bergmann <arnd@arndb.de> wrote:
> > On Wed, Jun 1, 2022 at 7:48 AM Rohan McLure <rmclure@linux.ibm.com> wrote:

> > What is the benefit of having a separate set of macros for this? I think that
> > adds more complexity than it saves in the end.
>
> I was unsure whether the exact return types needed to be respected for syscall
> handlers or not. I realise that under the existing behaviour,
> system_call_exception performs an indirect call, the return type of which is
> interpreted as a long, so the return type should be irrelevant. On inspection
> PPC_SYSCALL_DEFINE is readily replacable with COMPAT_SYSCALL_DEFINE as you
> have suggested.
>
> Before resubmitting this series, I will try for a patch series which modernises
> syscall handlers in arch/powerpc, and inspect where powerpc private versions
> are strictly necessary, using __ARCH_WANT_... wherever possible.

Ok, great! The parameter ordering is a bit tricky for some of them. I think
in most cases the version used by risc-v now should be the same as what
you need for powerpc (with the appropriate compat_arg_u64_dual()).
If some don't work, I would suggest modifying the common code so it can
handle both riscv and powerpc instead of keeping a private copy.

       Arnd

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 2/6] powerpc: Provide syscall wrapper
  2022-06-09 13:06     ` Christophe Leroy
@ 2022-06-16  5:42       ` Rohan McLure
  2022-06-16  7:20         ` Arnd Bergmann
  0 siblings, 1 reply; 31+ messages in thread
From: Rohan McLure @ 2022-06-16  5:42 UTC (permalink / raw)
  To: Christophe Leroy; +Cc: linuxppc-dev, Andrew Donnellan, npiggin

> Le 01/06/2022 à 10:29, Christophe Leroy a écrit :
>> Le 01/06/2022 à 07:48, Rohan McLure a écrit :
>>> [Vous ne recevez pas souvent de courriers de la part de rmclure@linux.ibm.com. Découvrez pourquoi cela peut être important à l'adresse https://aka.ms/LearnAboutSenderIdentification.]
>>> 
>>> Syscall wrapper implemented as per s390, x86, arm64, providing the
>>> option for gprs to be cleared on entry to the kernel, reducing caller
>>> influence influence on speculation within syscall routine. The wrapper
>>> is a macro that emits syscall handler implementations with parameters
>>> passed by stack pointer.
>> Passing parameters by stack is going to be sub-optimal. Did you make any measurement of the implied performance degradation ? We usually use the null_syscall selftest for that everytime we touch syscall entries/exits.
> 
> I did a test with null_syscall on an 8xx. Surprisingly I get more than 20% improvement with your series.
> 
> Looking at the generated code in more details, we see that system_call_exception() is lighter as now no stack frame is needed, the compiler has enough registers available.
> 
> Before the patch:
> 
> c000c9ec <system_call_exception>:
> c000c9ec:	94 21 ff f0 	stwu r1,-16(r1)
> c000c9f0:	93 e1 00 0c 	stw r31,12(r1)
> c000c9f4:	7d 5f 53 78 	mr r31,r10
> c000c9f8:	81 4a 00 84 	lwz r10,132(r10)
> c000c9fc:	90 7f 00 88 	stw r3,136(r31)
> c000ca00:	71 4b 00 02 	andi. r11,r10,2
> c000ca04:	41 82 00 4c 	beq c000ca50 <system_call_exception+0x64>
> c000ca08:	71 4b 40 00 	andi. r11,r10,16384
> c000ca0c:	41 82 00 50 	beq c000ca5c <system_call_exception+0x70>
> c000ca10:	71 4a 80 00 	andi. r10,r10,32768
> c000ca14:	41 82 00 54 	beq c000ca68 <system_call_exception+0x7c>
> c000ca18:	7c 50 13 a6 	mtspr 80,r2
> c000ca1c:	81 42 00 4c 	lwz r10,76(r2)
> c000ca20:	71 4a 84 91 	andi. r10,r10,33937
> c000ca24:	40 82 00 50 	bne c000ca74 <system_call_exception+0x88>
> c000ca28:	28 09 01 c2 	cmplwi r9,450
> c000ca2c:	41 81 00 88 	bgt c000cab4 <system_call_exception+0xc8>
> c000ca30:	3d 40 c0 6f 	lis r10,-16273
> c000ca34:	55 29 10 3a 	rlwinm r9,r9,2,0,29
> c000ca38:	39 4a c1 c5 	addi r10,r10,-15931
> c000ca3c:	7d 2a 48 2e 	lwzx r9,r10,r9
> c000ca40:	83 e1 00 0c 	lwz r31,12(r1)
> c000ca44:	7d 29 03 a6 	mtctr r9
> c000ca48:	38 21 00 10 	addi r1,r1,16
> c000ca4c:	4e 80 04 20 	bctr
> ...
> 
> After the patch:
> c000cc94 <system_call_exception>:
> c000cc94:	81 24 00 84 	lwz r9,132(r4)
> c000cc98:	81 44 00 0c 	lwz r10,12(r4)
> c000cc9c:	71 28 00 02 	andi. r8,r9,2
> c000cca0:	91 44 00 88 	stw r10,136(r4)
> c000cca4:	41 82 00 48 	beq c000ccec <system_call_exception+0x58>
> c000cca8:	71 2a 40 00 	andi. r10,r9,16384
> c000ccac:	41 82 00 44 	beq c000ccf0 <system_call_exception+0x5c>
> c000ccb0:	71 29 80 00 	andi. r9,r9,32768
> c000ccb4:	41 82 00 40 	beq c000ccf4 <system_call_exception+0x60>
> c000ccb8:	7c 50 13 a6 	mtspr 80,r2
> c000ccbc:	81 22 00 4c 	lwz r9,76(r2)
> c000ccc0:	71 29 84 91 	andi. r9,r9,33937
> c000ccc4:	40 82 00 34 	bne c000ccf8 <system_call_exception+0x64>
> c000ccc8:	28 03 01 c2 	cmplwi r3,450
> c000cccc:	41 81 00 78 	bgt c000cd44 <system_call_exception+0xb0>
> c000ccd0:	3d 20 c0 70 	lis r9,-16272
> c000ccd4:	54 63 10 3a 	rlwinm r3,r3,2,0,29
> c000ccd8:	39 29 81 c5 	addi r9,r9,-32315
> c000ccdc:	7d 29 18 2e 	lwzx r9,r9,r3
> c000cce0:	7c 83 23 78 	mr r3,r4
> c000cce4:	7d 29 03 a6 	mtctr r9
> c000cce8:	4e 80 04 20 	bctr
> ...
> 
> 
> 
>> Why going via stack ? The main advantage of a RISC processor like powerpc is that, unlike x86, there are enough registers to avoid going through memory. RISC processors are optimised with three operands operations and many registers, and usually have slow memory in return.
> 
> Well, thinking about it once more. In fact registers are saved to the stack anyway. At the start of syscall functions they are likely to still be hot in the cache, so reading them back is just a few cycles. And it eventually provide the compiler the opportunity to organise stuff better.
> 
> 
>>> 
>>> For platforms supporting this syscall wrapper, emit symbols with usual
>>> in-register parameters (`sys...`) to support calls to syscall handlers
>>> from within the kernel.
>>> 
>>> Syscalls are wrapped on all platforms except Cell processor. SPUs require
>>> access syscall prototypes which are omitted with ARCH_HAS_SYSCALL_WRAPPER
>>> enabled.
>> This commit message isn't very clear, please describe in more details what is done, how and why.
> 
> 
> Christophe

Thanks for checking this Christophe.

>> Why going via stack ? The main advantage of a RISC processor like powerpc is that, unlike x86, there are enough registers to avoid going through memory. RISC processors are optimised with three operands operations and many registers, and usually have slow memory in return.
> 
> Well, thinking about it once more. In fact registers are saved to the stack anyway. At the start of syscall functions they are likely to still be hot in the cache, so reading them back is just a few cycles. And it eventually provide the compiler the opportunity to organise stuff better.


Sorry for the delay in performance results - took me a while to verify
these results on real hardware. On a Power9 I’m seeing ~5.6% performance
improvement on null_syscall, even with register clearing enabled.

The upshot of the syscall wrapper was primarily to adopt common
behaviour to other architectures, and to enable register clearing. As it
stands, prior to this patch series we already save register state into a
pt_regs struct, and so may as well pass arguments by the stack as
performance results are favourable.

>>> For platforms supporting this syscall wrapper, emit symbols with usual
>>> in-register parameters (`sys...`) to support calls to syscall handlers
>>> from within the kernel.
>>> 
>>> Syscalls are wrapped on all platforms except Cell processor. SPUs require
>>> access syscall prototypes which are omitted with ARCH_HAS_SYSCALL_WRAPPER
>>> enabled.
>> This commit message isn't very clear, please describe in more details what is done, how and why.

Here is a hack which addresses the fact that handlers for the select and
ppc64_personality syscalls will tail-call generic implementations via
their sys_... symbol, passing arguments with the usual in-register
calling convention. This commit resolves this issue by emitting external
linkage functions with both in-register and in-stack conventions
(__powerpc_sys... and sys_... respectively), but the best way forward is
probably to remove dependence on sys_... handlers alltogether. There’s
simply too much code bloat in emitting both, and direct calls to syscall
handlers should in general be avoided. I would much rather remove all such
references in the kernel as arm has done before proceeding.

As for SPU's, the issue here is that include/linux/syscalls.h only
provides prototypes for sys_... handlers. So spu_callbacks.c must
reference these symbols for the translation unit to compile. A solution
may be for spu_syscall_table to be made extern linkage and populated in
the same manner as systbl.S. I suggest we simply omit support for sycall
wrappers with the Cell processor.

Rohan

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 2/6] powerpc: Provide syscall wrapper
  2022-06-16  5:42       ` Rohan McLure
@ 2022-06-16  7:20         ` Arnd Bergmann
  0 siblings, 0 replies; 31+ messages in thread
From: Arnd Bergmann @ 2022-06-16  7:20 UTC (permalink / raw)
  To: Rohan McLure; +Cc: linuxppc-dev, Andrew Donnellan, Nicholas Piggin

On Thu, Jun 16, 2022 at 7:42 AM Rohan McLure <rmclure@linux.ibm.com> wrote:
>
> As for SPU's, the issue here is that include/linux/syscalls.h only
> provides prototypes for sys_... handlers. So spu_callbacks.c must
> reference these symbols for the translation unit to compile. A solution
> may be for spu_syscall_table to be made extern linkage and populated in
> the same manner as systbl.S. I suggest we simply omit support for sycall
> wrappers with the Cell processor.

The compat syscalls should all be declared in linux/compat.h.
I think in general the C based syscall tables are better because
they enforce the presence of a declaration for each syscall, and that
helps with type safety.

         Arnd

^ permalink raw reply	[flat|nested] 31+ messages in thread

end of thread, other threads:[~2022-06-16  7:20 UTC | newest]

Thread overview: 31+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-06-01  5:48 [PATCH 1/6] powerpc: Add ZERO_GPRS macros for register clears Rohan McLure
2022-06-01  5:48 ` [PATCH 2/6] powerpc: Provide syscall wrapper Rohan McLure
2022-06-01  8:29   ` Christophe Leroy
2022-06-09 13:06     ` Christophe Leroy
2022-06-16  5:42       ` Rohan McLure
2022-06-16  7:20         ` Arnd Bergmann
2022-06-01  8:59   ` kernel test robot
2022-06-01  9:35   ` kernel test robot
2022-06-01 12:23   ` kernel test robot
2022-06-01 14:33   ` Christophe Leroy
2022-06-03  3:24     ` Rohan McLure
2022-06-03  7:09       ` Andrew Donnellan
2022-06-03  8:39         ` Christophe Leroy
2022-06-14 13:57           ` Andrew Donnellan
2022-06-03  9:04   ` Arnd Bergmann
2022-06-15  1:47     ` Rohan McLure
2022-06-15 10:13       ` Arnd Bergmann
2022-06-01  5:48 ` [PATCH 3/6] powerpc: Make syscalls save and restore gprs Rohan McLure
2022-06-01  8:33   ` Christophe Leroy
2022-06-01  5:48 ` [PATCH 4/6] powerpc: Fix comment, use clear and restore macros Rohan McLure
2022-06-01  5:48 ` [PATCH 5/6] powerpc: Move syscall handler prototypes to header Rohan McLure
2022-06-01  5:48 ` [PATCH 6/6] powerpc/64s: Clear gprs on interrupt routine entry Rohan McLure
2022-06-01  8:37   ` Christophe Leroy
2022-06-01  7:45 ` [PATCH 1/6] powerpc: Add ZERO_GPRS macros for register clears Christophe Leroy
2022-06-01 16:00 ` Segher Boessenkool
2022-06-10  3:32   ` Rohan McLure
2022-06-10 14:05     ` Segher Boessenkool
2022-06-11  8:42     ` Christophe Leroy
2022-06-13 18:48       ` Segher Boessenkool
2022-06-14  4:31         ` Michael Ellerman
2022-06-14 11:43           ` Segher Boessenkool

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.