All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v4 1/2] powerpc/uaccess: Implement unsafe_put_user() using 'asm goto'
@ 2020-04-17 17:08 ` Christophe Leroy
  0 siblings, 0 replies; 38+ messages in thread
From: Christophe Leroy @ 2020-04-17 17:08 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
	npiggin, segher
  Cc: linux-kernel, linuxppc-dev

unsafe_put_user() is designed to take benefit of 'asm goto'.

Instead of using the standard __put_user() approach and branch
based on the returned error, use 'asm goto' and make the
exception code branch directly to the error label. There is
no code anymore in the fixup section.

This change significantly simplifies functions using
unsafe_put_user()

Small exemple of the benefit with the following code:

struct test {
	u32 item1;
	u16 item2;
	u8 item3;
	u64 item4;
};

int set_test_to_user(struct test __user *test, u32 item1, u16 item2, u8 item3, u64 item4)
{
	unsafe_put_user(item1, &test->item1, failed);
	unsafe_put_user(item2, &test->item2, failed);
	unsafe_put_user(item3, &test->item3, failed);
	unsafe_put_user(item4, &test->item4, failed);
	return 0;
failed:
	return -EFAULT;
}

Before the patch:

00000be8 <set_test_to_user>:
 be8:	39 20 00 00 	li      r9,0
 bec:	90 83 00 00 	stw     r4,0(r3)
 bf0:	2f 89 00 00 	cmpwi   cr7,r9,0
 bf4:	40 9e 00 38 	bne     cr7,c2c <set_test_to_user+0x44>
 bf8:	b0 a3 00 04 	sth     r5,4(r3)
 bfc:	2f 89 00 00 	cmpwi   cr7,r9,0
 c00:	40 9e 00 2c 	bne     cr7,c2c <set_test_to_user+0x44>
 c04:	98 c3 00 06 	stb     r6,6(r3)
 c08:	2f 89 00 00 	cmpwi   cr7,r9,0
 c0c:	40 9e 00 20 	bne     cr7,c2c <set_test_to_user+0x44>
 c10:	90 e3 00 08 	stw     r7,8(r3)
 c14:	91 03 00 0c 	stw     r8,12(r3)
 c18:	21 29 00 00 	subfic  r9,r9,0
 c1c:	7d 29 49 10 	subfe   r9,r9,r9
 c20:	38 60 ff f2 	li      r3,-14
 c24:	7d 23 18 38 	and     r3,r9,r3
 c28:	4e 80 00 20 	blr
 c2c:	38 60 ff f2 	li      r3,-14
 c30:	4e 80 00 20 	blr

00000000 <.fixup>:
	...
  b8:	39 20 ff f2 	li      r9,-14
  bc:	48 00 00 00 	b       bc <.fixup+0xbc>
			bc: R_PPC_REL24	.text+0xbf0
  c0:	39 20 ff f2 	li      r9,-14
  c4:	48 00 00 00 	b       c4 <.fixup+0xc4>
			c4: R_PPC_REL24	.text+0xbfc
  c8:	39 20 ff f2 	li      r9,-14
  cc:	48 00 00 00 	b       cc <.fixup+0xcc>
			cc: R_PPC_REL24	.text+0xc08
  d0:	39 20 ff f2 	li      r9,-14
  d4:	48 00 00 00 	b       d4 <.fixup+0xd4>
			d4: R_PPC_REL24	.text+0xc18

00000000 <__ex_table>:
	...
			a0: R_PPC_REL32	.text+0xbec
			a4: R_PPC_REL32	.fixup+0xb8
			a8: R_PPC_REL32	.text+0xbf8
			ac: R_PPC_REL32	.fixup+0xc0
			b0: R_PPC_REL32	.text+0xc04
			b4: R_PPC_REL32	.fixup+0xc8
			b8: R_PPC_REL32	.text+0xc10
			bc: R_PPC_REL32	.fixup+0xd0
			c0: R_PPC_REL32	.text+0xc14
			c4: R_PPC_REL32	.fixup+0xd0

After the patch:

00000be8 <set_test_to_user>:
 be8:	90 83 00 00 	stw     r4,0(r3)
 bec:	b0 a3 00 04 	sth     r5,4(r3)
 bf0:	98 c3 00 06 	stb     r6,6(r3)
 bf4:	90 e3 00 08 	stw     r7,8(r3)
 bf8:	91 03 00 0c 	stw     r8,12(r3)
 bfc:	38 60 00 00 	li      r3,0
 c00:	4e 80 00 20 	blr
 c04:	38 60 ff f2 	li      r3,-14
 c08:	4e 80 00 20 	blr

00000000 <__ex_table>:
	...
			a0: R_PPC_REL32	.text+0xbe8
			a4: R_PPC_REL32	.text+0xc04
			a8: R_PPC_REL32	.text+0xbec
			ac: R_PPC_REL32	.text+0xc04
			b0: R_PPC_REL32	.text+0xbf0
			b4: R_PPC_REL32	.text+0xc04
			b8: R_PPC_REL32	.text+0xbf4
			bc: R_PPC_REL32	.text+0xc04
			c0: R_PPC_REL32	.text+0xbf8
			c4: R_PPC_REL32	.text+0xc04

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Reviewed-by: Segher Boessenkool <segher@kernel.crashing.org>
---
v4:
- no change

v3:
- Added <> modifier to __put_user_asm_goto()
- Removed %U1 modifier to __put_user_asm2_goto()

v2:
- Grouped most __goto() macros together
- Removed stuff in .fixup section, referencing the error label
directly from the extable
- Using more flexible addressing in asm.

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
 arch/powerpc/include/asm/uaccess.h | 61 +++++++++++++++++++++++++-----
 1 file changed, 52 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index 9cc9c106ae2a..9365b59495a2 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -93,12 +93,12 @@ static inline int __access_ok(unsigned long addr, unsigned long size,
 #define __get_user(x, ptr) \
 	__get_user_nocheck((x), (ptr), sizeof(*(ptr)), true)
 #define __put_user(x, ptr) \
-	__put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)), true)
+	__put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
+#define __put_user_goto(x, ptr, label) \
+	__put_user_nocheck_goto((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)), label)
 
 #define __get_user_allowed(x, ptr) \
 	__get_user_nocheck((x), (ptr), sizeof(*(ptr)), false)
-#define __put_user_allowed(x, ptr) \
-	__put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)), false)
 
 #define __get_user_inatomic(x, ptr) \
 	__get_user_nosleep((x), (ptr), sizeof(*(ptr)))
@@ -162,17 +162,14 @@ do {								\
 	prevent_write_to_user(ptr, size);			\
 } while (0)
 
-#define __put_user_nocheck(x, ptr, size, do_allow)			\
+#define __put_user_nocheck(x, ptr, size)			\
 ({								\
 	long __pu_err;						\
 	__typeof__(*(ptr)) __user *__pu_addr = (ptr);		\
 	if (!is_kernel_addr((unsigned long)__pu_addr))		\
 		might_fault();					\
 	__chk_user_ptr(ptr);					\
-	if (do_allow)								\
-		__put_user_size((x), __pu_addr, (size), __pu_err);		\
-	else									\
-		__put_user_size_allowed((x), __pu_addr, (size), __pu_err);	\
+	__put_user_size((x), __pu_addr, (size), __pu_err);		\
 	__pu_err;						\
 })
 
@@ -196,6 +193,52 @@ do {								\
 })
 
 
+#define __put_user_asm_goto(x, addr, label, op)			\
+	asm volatile goto(					\
+		"1:	" op "%U1%X1 %0,%1	# put_user\n"	\
+		EX_TABLE(1b, %l2)				\
+		:						\
+		: "r" (x), "m<>" (*addr)				\
+		:						\
+		: label)
+
+#ifdef __powerpc64__
+#define __put_user_asm2_goto(x, ptr, label)			\
+	__put_user_asm_goto(x, ptr, label, "std")
+#else /* __powerpc64__ */
+#define __put_user_asm2_goto(x, addr, label)			\
+	asm volatile goto(					\
+		"1:	stw%X1 %0, %1\n"			\
+		"2:	stw%X1 %L0, %L1\n"			\
+		EX_TABLE(1b, %l2)				\
+		EX_TABLE(2b, %l2)				\
+		:						\
+		: "r" (x), "m" (*addr)				\
+		:						\
+		: label)
+#endif /* __powerpc64__ */
+
+#define __put_user_size_goto(x, ptr, size, label)		\
+do {								\
+	switch (size) {						\
+	case 1: __put_user_asm_goto(x, ptr, label, "stb"); break;	\
+	case 2: __put_user_asm_goto(x, ptr, label, "sth"); break;	\
+	case 4: __put_user_asm_goto(x, ptr, label, "stw"); break;	\
+	case 8: __put_user_asm2_goto(x, ptr, label); break;	\
+	default: __put_user_bad();				\
+	}							\
+} while (0)
+
+#define __put_user_nocheck_goto(x, ptr, size, label)		\
+do {								\
+	__typeof__(*(ptr)) __user *__pu_addr = (ptr);		\
+	if (!is_kernel_addr((unsigned long)__pu_addr))		\
+		might_fault();					\
+	__chk_user_ptr(ptr);					\
+	__put_user_size_goto((x), __pu_addr, (size), label);	\
+} while (0)
+
+
 extern long __get_user_bad(void);
 
 /*
@@ -470,7 +513,7 @@ static __must_check inline bool user_access_begin(const void __user *ptr, size_t
 
 #define unsafe_op_wrap(op, err) do { if (unlikely(op)) goto err; } while (0)
 #define unsafe_get_user(x, p, e) unsafe_op_wrap(__get_user_allowed(x, p), e)
-#define unsafe_put_user(x, p, e) unsafe_op_wrap(__put_user_allowed(x, p), e)
+#define unsafe_put_user(x, p, e) __put_user_goto(x, p, e)
 #define unsafe_copy_to_user(d, s, l, e) \
 	unsafe_op_wrap(raw_copy_to_user_allowed(d, s, l), e)
 
-- 
2.25.0


^ permalink raw reply related	[flat|nested] 38+ messages in thread

* [PATCH v4 1/2] powerpc/uaccess: Implement unsafe_put_user() using 'asm goto'
@ 2020-04-17 17:08 ` Christophe Leroy
  0 siblings, 0 replies; 38+ messages in thread
From: Christophe Leroy @ 2020-04-17 17:08 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
	npiggin, segher
  Cc: linuxppc-dev, linux-kernel

unsafe_put_user() is designed to take benefit of 'asm goto'.

Instead of using the standard __put_user() approach and branch
based on the returned error, use 'asm goto' and make the
exception code branch directly to the error label. There is
no code anymore in the fixup section.

This change significantly simplifies functions using
unsafe_put_user()

Small exemple of the benefit with the following code:

struct test {
	u32 item1;
	u16 item2;
	u8 item3;
	u64 item4;
};

int set_test_to_user(struct test __user *test, u32 item1, u16 item2, u8 item3, u64 item4)
{
	unsafe_put_user(item1, &test->item1, failed);
	unsafe_put_user(item2, &test->item2, failed);
	unsafe_put_user(item3, &test->item3, failed);
	unsafe_put_user(item4, &test->item4, failed);
	return 0;
failed:
	return -EFAULT;
}

Before the patch:

00000be8 <set_test_to_user>:
 be8:	39 20 00 00 	li      r9,0
 bec:	90 83 00 00 	stw     r4,0(r3)
 bf0:	2f 89 00 00 	cmpwi   cr7,r9,0
 bf4:	40 9e 00 38 	bne     cr7,c2c <set_test_to_user+0x44>
 bf8:	b0 a3 00 04 	sth     r5,4(r3)
 bfc:	2f 89 00 00 	cmpwi   cr7,r9,0
 c00:	40 9e 00 2c 	bne     cr7,c2c <set_test_to_user+0x44>
 c04:	98 c3 00 06 	stb     r6,6(r3)
 c08:	2f 89 00 00 	cmpwi   cr7,r9,0
 c0c:	40 9e 00 20 	bne     cr7,c2c <set_test_to_user+0x44>
 c10:	90 e3 00 08 	stw     r7,8(r3)
 c14:	91 03 00 0c 	stw     r8,12(r3)
 c18:	21 29 00 00 	subfic  r9,r9,0
 c1c:	7d 29 49 10 	subfe   r9,r9,r9
 c20:	38 60 ff f2 	li      r3,-14
 c24:	7d 23 18 38 	and     r3,r9,r3
 c28:	4e 80 00 20 	blr
 c2c:	38 60 ff f2 	li      r3,-14
 c30:	4e 80 00 20 	blr

00000000 <.fixup>:
	...
  b8:	39 20 ff f2 	li      r9,-14
  bc:	48 00 00 00 	b       bc <.fixup+0xbc>
			bc: R_PPC_REL24	.text+0xbf0
  c0:	39 20 ff f2 	li      r9,-14
  c4:	48 00 00 00 	b       c4 <.fixup+0xc4>
			c4: R_PPC_REL24	.text+0xbfc
  c8:	39 20 ff f2 	li      r9,-14
  cc:	48 00 00 00 	b       cc <.fixup+0xcc>
			cc: R_PPC_REL24	.text+0xc08
  d0:	39 20 ff f2 	li      r9,-14
  d4:	48 00 00 00 	b       d4 <.fixup+0xd4>
			d4: R_PPC_REL24	.text+0xc18

00000000 <__ex_table>:
	...
			a0: R_PPC_REL32	.text+0xbec
			a4: R_PPC_REL32	.fixup+0xb8
			a8: R_PPC_REL32	.text+0xbf8
			ac: R_PPC_REL32	.fixup+0xc0
			b0: R_PPC_REL32	.text+0xc04
			b4: R_PPC_REL32	.fixup+0xc8
			b8: R_PPC_REL32	.text+0xc10
			bc: R_PPC_REL32	.fixup+0xd0
			c0: R_PPC_REL32	.text+0xc14
			c4: R_PPC_REL32	.fixup+0xd0

After the patch:

00000be8 <set_test_to_user>:
 be8:	90 83 00 00 	stw     r4,0(r3)
 bec:	b0 a3 00 04 	sth     r5,4(r3)
 bf0:	98 c3 00 06 	stb     r6,6(r3)
 bf4:	90 e3 00 08 	stw     r7,8(r3)
 bf8:	91 03 00 0c 	stw     r8,12(r3)
 bfc:	38 60 00 00 	li      r3,0
 c00:	4e 80 00 20 	blr
 c04:	38 60 ff f2 	li      r3,-14
 c08:	4e 80 00 20 	blr

00000000 <__ex_table>:
	...
			a0: R_PPC_REL32	.text+0xbe8
			a4: R_PPC_REL32	.text+0xc04
			a8: R_PPC_REL32	.text+0xbec
			ac: R_PPC_REL32	.text+0xc04
			b0: R_PPC_REL32	.text+0xbf0
			b4: R_PPC_REL32	.text+0xc04
			b8: R_PPC_REL32	.text+0xbf4
			bc: R_PPC_REL32	.text+0xc04
			c0: R_PPC_REL32	.text+0xbf8
			c4: R_PPC_REL32	.text+0xc04

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Reviewed-by: Segher Boessenkool <segher@kernel.crashing.org>
---
v4:
- no change

v3:
- Added <> modifier to __put_user_asm_goto()
- Removed %U1 modifier to __put_user_asm2_goto()

v2:
- Grouped most __goto() macros together
- Removed stuff in .fixup section, referencing the error label
directly from the extable
- Using more flexible addressing in asm.

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
 arch/powerpc/include/asm/uaccess.h | 61 +++++++++++++++++++++++++-----
 1 file changed, 52 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index 9cc9c106ae2a..9365b59495a2 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -93,12 +93,12 @@ static inline int __access_ok(unsigned long addr, unsigned long size,
 #define __get_user(x, ptr) \
 	__get_user_nocheck((x), (ptr), sizeof(*(ptr)), true)
 #define __put_user(x, ptr) \
-	__put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)), true)
+	__put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
+#define __put_user_goto(x, ptr, label) \
+	__put_user_nocheck_goto((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)), label)
 
 #define __get_user_allowed(x, ptr) \
 	__get_user_nocheck((x), (ptr), sizeof(*(ptr)), false)
-#define __put_user_allowed(x, ptr) \
-	__put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)), false)
 
 #define __get_user_inatomic(x, ptr) \
 	__get_user_nosleep((x), (ptr), sizeof(*(ptr)))
@@ -162,17 +162,14 @@ do {								\
 	prevent_write_to_user(ptr, size);			\
 } while (0)
 
-#define __put_user_nocheck(x, ptr, size, do_allow)			\
+#define __put_user_nocheck(x, ptr, size)			\
 ({								\
 	long __pu_err;						\
 	__typeof__(*(ptr)) __user *__pu_addr = (ptr);		\
 	if (!is_kernel_addr((unsigned long)__pu_addr))		\
 		might_fault();					\
 	__chk_user_ptr(ptr);					\
-	if (do_allow)								\
-		__put_user_size((x), __pu_addr, (size), __pu_err);		\
-	else									\
-		__put_user_size_allowed((x), __pu_addr, (size), __pu_err);	\
+	__put_user_size((x), __pu_addr, (size), __pu_err);		\
 	__pu_err;						\
 })
 
@@ -196,6 +193,52 @@ do {								\
 })
 
 
+#define __put_user_asm_goto(x, addr, label, op)			\
+	asm volatile goto(					\
+		"1:	" op "%U1%X1 %0,%1	# put_user\n"	\
+		EX_TABLE(1b, %l2)				\
+		:						\
+		: "r" (x), "m<>" (*addr)				\
+		:						\
+		: label)
+
+#ifdef __powerpc64__
+#define __put_user_asm2_goto(x, ptr, label)			\
+	__put_user_asm_goto(x, ptr, label, "std")
+#else /* __powerpc64__ */
+#define __put_user_asm2_goto(x, addr, label)			\
+	asm volatile goto(					\
+		"1:	stw%X1 %0, %1\n"			\
+		"2:	stw%X1 %L0, %L1\n"			\
+		EX_TABLE(1b, %l2)				\
+		EX_TABLE(2b, %l2)				\
+		:						\
+		: "r" (x), "m" (*addr)				\
+		:						\
+		: label)
+#endif /* __powerpc64__ */
+
+#define __put_user_size_goto(x, ptr, size, label)		\
+do {								\
+	switch (size) {						\
+	case 1: __put_user_asm_goto(x, ptr, label, "stb"); break;	\
+	case 2: __put_user_asm_goto(x, ptr, label, "sth"); break;	\
+	case 4: __put_user_asm_goto(x, ptr, label, "stw"); break;	\
+	case 8: __put_user_asm2_goto(x, ptr, label); break;	\
+	default: __put_user_bad();				\
+	}							\
+} while (0)
+
+#define __put_user_nocheck_goto(x, ptr, size, label)		\
+do {								\
+	__typeof__(*(ptr)) __user *__pu_addr = (ptr);		\
+	if (!is_kernel_addr((unsigned long)__pu_addr))		\
+		might_fault();					\
+	__chk_user_ptr(ptr);					\
+	__put_user_size_goto((x), __pu_addr, (size), label);	\
+} while (0)
+
+
 extern long __get_user_bad(void);
 
 /*
@@ -470,7 +513,7 @@ static __must_check inline bool user_access_begin(const void __user *ptr, size_t
 
 #define unsafe_op_wrap(op, err) do { if (unlikely(op)) goto err; } while (0)
 #define unsafe_get_user(x, p, e) unsafe_op_wrap(__get_user_allowed(x, p), e)
-#define unsafe_put_user(x, p, e) unsafe_op_wrap(__put_user_allowed(x, p), e)
+#define unsafe_put_user(x, p, e) __put_user_goto(x, p, e)
 #define unsafe_copy_to_user(d, s, l, e) \
 	unsafe_op_wrap(raw_copy_to_user_allowed(d, s, l), e)
 
-- 
2.25.0


^ permalink raw reply related	[flat|nested] 38+ messages in thread

* [PATCH v4 2/2] powerpc/uaccess: Implement unsafe_copy_to_user() as a simple loop
  2020-04-17 17:08 ` Christophe Leroy
@ 2020-04-17 17:08   ` Christophe Leroy
  -1 siblings, 0 replies; 38+ messages in thread
From: Christophe Leroy @ 2020-04-17 17:08 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
	npiggin, segher
  Cc: linux-kernel, linuxppc-dev

At the time being, unsafe_copy_to_user() is based on
raw_copy_to_user() which calls __copy_tofrom_user().

__copy_tofrom_user() is a big optimised function to copy big amount
of data. It aligns destinations to cache line in order to use
dcbz instruction.

Today unsafe_copy_to_user() is called only from filldir().
It is used to mainly copy small amount of data like filenames,
so __copy_tofrom_user() is not fit.

Also, unsafe_copy_to_user() is used within user_access_begin/end
sections. In those section, it is preferable to not call functions.

Rewrite unsafe_copy_to_user() as a macro that uses __put_user_goto().
We first perform a loop of long, then we finish with necessary
complements.

unsafe_copy_to_user() might be used in the near future to copy
fixed-size data, like pt_regs structs during signal processing.
Having it as a macro allows GCC to optimise it for instead when
it knows the size in advance, it can unloop loops, drop complements
when the size is a multiple of longs, etc ...

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
v4: new
---
 arch/powerpc/include/asm/uaccess.h | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index 9365b59495a2..b24fbe75f9ce 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -514,7 +514,26 @@ static __must_check inline bool user_access_begin(const void __user *ptr, size_t
 #define unsafe_op_wrap(op, err) do { if (unlikely(op)) goto err; } while (0)
 #define unsafe_get_user(x, p, e) unsafe_op_wrap(__get_user_allowed(x, p), e)
 #define unsafe_put_user(x, p, e) __put_user_goto(x, p, e)
+
 #define unsafe_copy_to_user(d, s, l, e) \
-	unsafe_op_wrap(raw_copy_to_user_allowed(d, s, l), e)
+do {									\
+	u8 __user *_dst = (u8 __user *)(d);				\
+	const u8 *_src = (const u8 *)(s);				\
+	size_t _len = (l);						\
+	int _i;								\
+									\
+	for (_i = 0; _i < (_len & ~(sizeof(long) - 1)); _i += sizeof(long))		\
+		__put_user_goto(*(long*)(_src + _i), (long __user *)(_dst + _i), e);\
+	if (IS_ENABLED(CONFIG_PPC64) && (_len & 4)) {			\
+		__put_user_goto(*(u32*)(_src + _i), (u32 __user *)(_dst + _i), e);	\
+		_i += 4;						\
+	}								\
+	if (_len & 2) {							\
+		__put_user_goto(*(u16*)(_src + _i), (u16 __user *)(_dst + _i), e);	\
+		_i += 2;						\
+	}								\
+	if (_len & 1) \
+		__put_user_goto(*(u8*)(_src + _i), (u8 __user *)(_dst + _i), e);\
+} while (0)
 
 #endif	/* _ARCH_POWERPC_UACCESS_H */
-- 
2.25.0


^ permalink raw reply related	[flat|nested] 38+ messages in thread

* [PATCH v4 2/2] powerpc/uaccess: Implement unsafe_copy_to_user() as a simple loop
@ 2020-04-17 17:08   ` Christophe Leroy
  0 siblings, 0 replies; 38+ messages in thread
From: Christophe Leroy @ 2020-04-17 17:08 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
	npiggin, segher
  Cc: linuxppc-dev, linux-kernel

At the time being, unsafe_copy_to_user() is based on
raw_copy_to_user() which calls __copy_tofrom_user().

__copy_tofrom_user() is a big optimised function to copy big amount
of data. It aligns destinations to cache line in order to use
dcbz instruction.

Today unsafe_copy_to_user() is called only from filldir().
It is used to mainly copy small amount of data like filenames,
so __copy_tofrom_user() is not fit.

Also, unsafe_copy_to_user() is used within user_access_begin/end
sections. In those section, it is preferable to not call functions.

Rewrite unsafe_copy_to_user() as a macro that uses __put_user_goto().
We first perform a loop of long, then we finish with necessary
complements.

unsafe_copy_to_user() might be used in the near future to copy
fixed-size data, like pt_regs structs during signal processing.
Having it as a macro allows GCC to optimise it for instead when
it knows the size in advance, it can unloop loops, drop complements
when the size is a multiple of longs, etc ...

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
v4: new
---
 arch/powerpc/include/asm/uaccess.h | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index 9365b59495a2..b24fbe75f9ce 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -514,7 +514,26 @@ static __must_check inline bool user_access_begin(const void __user *ptr, size_t
 #define unsafe_op_wrap(op, err) do { if (unlikely(op)) goto err; } while (0)
 #define unsafe_get_user(x, p, e) unsafe_op_wrap(__get_user_allowed(x, p), e)
 #define unsafe_put_user(x, p, e) __put_user_goto(x, p, e)
+
 #define unsafe_copy_to_user(d, s, l, e) \
-	unsafe_op_wrap(raw_copy_to_user_allowed(d, s, l), e)
+do {									\
+	u8 __user *_dst = (u8 __user *)(d);				\
+	const u8 *_src = (const u8 *)(s);				\
+	size_t _len = (l);						\
+	int _i;								\
+									\
+	for (_i = 0; _i < (_len & ~(sizeof(long) - 1)); _i += sizeof(long))		\
+		__put_user_goto(*(long*)(_src + _i), (long __user *)(_dst + _i), e);\
+	if (IS_ENABLED(CONFIG_PPC64) && (_len & 4)) {			\
+		__put_user_goto(*(u32*)(_src + _i), (u32 __user *)(_dst + _i), e);	\
+		_i += 4;						\
+	}								\
+	if (_len & 2) {							\
+		__put_user_goto(*(u16*)(_src + _i), (u16 __user *)(_dst + _i), e);	\
+		_i += 2;						\
+	}								\
+	if (_len & 1) \
+		__put_user_goto(*(u8*)(_src + _i), (u8 __user *)(_dst + _i), e);\
+} while (0)
 
 #endif	/* _ARCH_POWERPC_UACCESS_H */
-- 
2.25.0


^ permalink raw reply related	[flat|nested] 38+ messages in thread

* Re: [PATCH v4 1/2] powerpc/uaccess: Implement unsafe_put_user() using 'asm goto'
  2020-04-17 17:08 ` Christophe Leroy
@ 2020-05-05 14:27   ` Michael Ellerman
  -1 siblings, 0 replies; 38+ messages in thread
From: Michael Ellerman @ 2020-05-05 14:27 UTC (permalink / raw)
  To: Christophe Leroy, Benjamin Herrenschmidt, Paul Mackerras,
	npiggin, segher
  Cc: linux-kernel, linuxppc-dev

Christophe Leroy <christophe.leroy@c-s.fr> writes:
> unsafe_put_user() is designed to take benefit of 'asm goto'.
>
> Instead of using the standard __put_user() approach and branch
> based on the returned error, use 'asm goto' and make the
> exception code branch directly to the error label. There is
> no code anymore in the fixup section.
>
> This change significantly simplifies functions using
> unsafe_put_user()
>
...
>
> Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
> ---
>  arch/powerpc/include/asm/uaccess.h | 61 +++++++++++++++++++++++++-----
>  1 file changed, 52 insertions(+), 9 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
> index 9cc9c106ae2a..9365b59495a2 100644
> --- a/arch/powerpc/include/asm/uaccess.h
> +++ b/arch/powerpc/include/asm/uaccess.h
> @@ -196,6 +193,52 @@ do {								\
>  })
>  
>  
> +#define __put_user_asm_goto(x, addr, label, op)			\
> +	asm volatile goto(					\
> +		"1:	" op "%U1%X1 %0,%1	# put_user\n"	\
> +		EX_TABLE(1b, %l2)				\
> +		:						\
> +		: "r" (x), "m<>" (*addr)				\

The "m<>" here is breaking GCC 4.6.3, which we allegedly still support.

Plain "m" works, how much does the "<>" affect code gen in practice?

A quick diff here shows no difference from removing "<>".

cheers

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH v4 1/2] powerpc/uaccess: Implement unsafe_put_user() using 'asm goto'
@ 2020-05-05 14:27   ` Michael Ellerman
  0 siblings, 0 replies; 38+ messages in thread
From: Michael Ellerman @ 2020-05-05 14:27 UTC (permalink / raw)
  To: Christophe Leroy, Benjamin Herrenschmidt, Paul Mackerras,
	npiggin, segher
  Cc: linuxppc-dev, linux-kernel

Christophe Leroy <christophe.leroy@c-s.fr> writes:
> unsafe_put_user() is designed to take benefit of 'asm goto'.
>
> Instead of using the standard __put_user() approach and branch
> based on the returned error, use 'asm goto' and make the
> exception code branch directly to the error label. There is
> no code anymore in the fixup section.
>
> This change significantly simplifies functions using
> unsafe_put_user()
>
...
>
> Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
> ---
>  arch/powerpc/include/asm/uaccess.h | 61 +++++++++++++++++++++++++-----
>  1 file changed, 52 insertions(+), 9 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
> index 9cc9c106ae2a..9365b59495a2 100644
> --- a/arch/powerpc/include/asm/uaccess.h
> +++ b/arch/powerpc/include/asm/uaccess.h
> @@ -196,6 +193,52 @@ do {								\
>  })
>  
>  
> +#define __put_user_asm_goto(x, addr, label, op)			\
> +	asm volatile goto(					\
> +		"1:	" op "%U1%X1 %0,%1	# put_user\n"	\
> +		EX_TABLE(1b, %l2)				\
> +		:						\
> +		: "r" (x), "m<>" (*addr)				\

The "m<>" here is breaking GCC 4.6.3, which we allegedly still support.

Plain "m" works, how much does the "<>" affect code gen in practice?

A quick diff here shows no difference from removing "<>".

cheers

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH v4 1/2] powerpc/uaccess: Implement unsafe_put_user() using 'asm goto'
  2020-05-05 14:27   ` Michael Ellerman
@ 2020-05-05 15:32     ` Segher Boessenkool
  -1 siblings, 0 replies; 38+ messages in thread
From: Segher Boessenkool @ 2020-05-05 15:32 UTC (permalink / raw)
  To: Michael Ellerman
  Cc: Christophe Leroy, Benjamin Herrenschmidt, Paul Mackerras,
	npiggin, linux-kernel, linuxppc-dev

Hi!

On Wed, May 06, 2020 at 12:27:58AM +1000, Michael Ellerman wrote:
> Christophe Leroy <christophe.leroy@c-s.fr> writes:
> > unsafe_put_user() is designed to take benefit of 'asm goto'.
> >
> > Instead of using the standard __put_user() approach and branch
> > based on the returned error, use 'asm goto' and make the
> > exception code branch directly to the error label. There is
> > no code anymore in the fixup section.
> >
> > This change significantly simplifies functions using
> > unsafe_put_user()
> >
> ...
> >
> > Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
> > ---
> >  arch/powerpc/include/asm/uaccess.h | 61 +++++++++++++++++++++++++-----
> >  1 file changed, 52 insertions(+), 9 deletions(-)
> >
> > diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
> > index 9cc9c106ae2a..9365b59495a2 100644
> > --- a/arch/powerpc/include/asm/uaccess.h
> > +++ b/arch/powerpc/include/asm/uaccess.h
> > @@ -196,6 +193,52 @@ do {								\
> >  })
> >  
> >  
> > +#define __put_user_asm_goto(x, addr, label, op)			\
> > +	asm volatile goto(					\
> > +		"1:	" op "%U1%X1 %0,%1	# put_user\n"	\
> > +		EX_TABLE(1b, %l2)				\
> > +		:						\
> > +		: "r" (x), "m<>" (*addr)				\
> 
> The "m<>" here is breaking GCC 4.6.3, which we allegedly still support.

[ You shouldn't use 4.6.3, there has been 4.6.4 since a while.  And 4.6
  is nine years old now.  Most projects do not support < 4.8 anymore, on
  any architecture.  ]

> Plain "m" works, how much does the "<>" affect code gen in practice?
> 
> A quick diff here shows no difference from removing "<>".

It will make it impossible to use update-form instructions here.  That
probably does not matter much at all, in this case.

If you remove the "<>" constraints, also remove the "%Un" output modifier?


Segher

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH v4 1/2] powerpc/uaccess: Implement unsafe_put_user() using 'asm goto'
@ 2020-05-05 15:32     ` Segher Boessenkool
  0 siblings, 0 replies; 38+ messages in thread
From: Segher Boessenkool @ 2020-05-05 15:32 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: linux-kernel, npiggin, Paul Mackerras, linuxppc-dev

Hi!

On Wed, May 06, 2020 at 12:27:58AM +1000, Michael Ellerman wrote:
> Christophe Leroy <christophe.leroy@c-s.fr> writes:
> > unsafe_put_user() is designed to take benefit of 'asm goto'.
> >
> > Instead of using the standard __put_user() approach and branch
> > based on the returned error, use 'asm goto' and make the
> > exception code branch directly to the error label. There is
> > no code anymore in the fixup section.
> >
> > This change significantly simplifies functions using
> > unsafe_put_user()
> >
> ...
> >
> > Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
> > ---
> >  arch/powerpc/include/asm/uaccess.h | 61 +++++++++++++++++++++++++-----
> >  1 file changed, 52 insertions(+), 9 deletions(-)
> >
> > diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
> > index 9cc9c106ae2a..9365b59495a2 100644
> > --- a/arch/powerpc/include/asm/uaccess.h
> > +++ b/arch/powerpc/include/asm/uaccess.h
> > @@ -196,6 +193,52 @@ do {								\
> >  })
> >  
> >  
> > +#define __put_user_asm_goto(x, addr, label, op)			\
> > +	asm volatile goto(					\
> > +		"1:	" op "%U1%X1 %0,%1	# put_user\n"	\
> > +		EX_TABLE(1b, %l2)				\
> > +		:						\
> > +		: "r" (x), "m<>" (*addr)				\
> 
> The "m<>" here is breaking GCC 4.6.3, which we allegedly still support.

[ You shouldn't use 4.6.3, there has been 4.6.4 since a while.  And 4.6
  is nine years old now.  Most projects do not support < 4.8 anymore, on
  any architecture.  ]

> Plain "m" works, how much does the "<>" affect code gen in practice?
> 
> A quick diff here shows no difference from removing "<>".

It will make it impossible to use update-form instructions here.  That
probably does not matter much at all, in this case.

If you remove the "<>" constraints, also remove the "%Un" output modifier?


Segher

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH v4 1/2] powerpc/uaccess: Implement unsafe_put_user() using 'asm goto'
  2020-05-05 14:27   ` Michael Ellerman
@ 2020-05-05 15:40     ` Christophe Leroy
  -1 siblings, 0 replies; 38+ messages in thread
From: Christophe Leroy @ 2020-05-05 15:40 UTC (permalink / raw)
  To: Michael Ellerman, Christophe Leroy, Benjamin Herrenschmidt,
	Paul Mackerras, npiggin, segher
  Cc: linux-kernel, linuxppc-dev

Hi,

Le 05/05/2020 à 16:27, Michael Ellerman a écrit :
> Christophe Leroy <christophe.leroy@c-s.fr> writes:
>> unsafe_put_user() is designed to take benefit of 'asm goto'.
>>
>> Instead of using the standard __put_user() approach and branch
>> based on the returned error, use 'asm goto' and make the
>> exception code branch directly to the error label. There is
>> no code anymore in the fixup section.
>>
>> This change significantly simplifies functions using
>> unsafe_put_user()
>>
> ...
>>
>> Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
>> ---
>>   arch/powerpc/include/asm/uaccess.h | 61 +++++++++++++++++++++++++-----
>>   1 file changed, 52 insertions(+), 9 deletions(-)
>>
>> diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
>> index 9cc9c106ae2a..9365b59495a2 100644
>> --- a/arch/powerpc/include/asm/uaccess.h
>> +++ b/arch/powerpc/include/asm/uaccess.h
>> @@ -196,6 +193,52 @@ do {								\
>>   })
>>   
>>   
>> +#define __put_user_asm_goto(x, addr, label, op)			\
>> +	asm volatile goto(					\
>> +		"1:	" op "%U1%X1 %0,%1	# put_user\n"	\
>> +		EX_TABLE(1b, %l2)				\
>> +		:						\
>> +		: "r" (x), "m<>" (*addr)				\
> 
> The "m<>" here is breaking GCC 4.6.3, which we allegedly still support.
> 
> Plain "m" works, how much does the "<>" affect code gen in practice?
> 
> A quick diff here shows no difference from removing "<>".

It was recommended by Segher, there has been some discussion about it on 
v1 of this patch, see 
https://patchwork.ozlabs.org/project/linuxppc-dev/patch/4fdc2aba6f5e51887d1cd0fee94be0989eada2cd.1586942312.git.christophe.leroy@c-s.fr/

As far as I understood that's mandatory on recent gcc to get the 
pre-update form of the instruction. With older versions "m" was doing 
the same, but not anymore. Should we ifdef the "m<>" or "m" based on GCC 
version ?

Christophe

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH v4 1/2] powerpc/uaccess: Implement unsafe_put_user() using 'asm goto'
@ 2020-05-05 15:40     ` Christophe Leroy
  0 siblings, 0 replies; 38+ messages in thread
From: Christophe Leroy @ 2020-05-05 15:40 UTC (permalink / raw)
  To: Michael Ellerman, Christophe Leroy, Benjamin Herrenschmidt,
	Paul Mackerras, npiggin, segher
  Cc: linuxppc-dev, linux-kernel

Hi,

Le 05/05/2020 à 16:27, Michael Ellerman a écrit :
> Christophe Leroy <christophe.leroy@c-s.fr> writes:
>> unsafe_put_user() is designed to take benefit of 'asm goto'.
>>
>> Instead of using the standard __put_user() approach and branch
>> based on the returned error, use 'asm goto' and make the
>> exception code branch directly to the error label. There is
>> no code anymore in the fixup section.
>>
>> This change significantly simplifies functions using
>> unsafe_put_user()
>>
> ...
>>
>> Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
>> ---
>>   arch/powerpc/include/asm/uaccess.h | 61 +++++++++++++++++++++++++-----
>>   1 file changed, 52 insertions(+), 9 deletions(-)
>>
>> diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
>> index 9cc9c106ae2a..9365b59495a2 100644
>> --- a/arch/powerpc/include/asm/uaccess.h
>> +++ b/arch/powerpc/include/asm/uaccess.h
>> @@ -196,6 +193,52 @@ do {								\
>>   })
>>   
>>   
>> +#define __put_user_asm_goto(x, addr, label, op)			\
>> +	asm volatile goto(					\
>> +		"1:	" op "%U1%X1 %0,%1	# put_user\n"	\
>> +		EX_TABLE(1b, %l2)				\
>> +		:						\
>> +		: "r" (x), "m<>" (*addr)				\
> 
> The "m<>" here is breaking GCC 4.6.3, which we allegedly still support.
> 
> Plain "m" works, how much does the "<>" affect code gen in practice?
> 
> A quick diff here shows no difference from removing "<>".

It was recommended by Segher, there has been some discussion about it on 
v1 of this patch, see 
https://patchwork.ozlabs.org/project/linuxppc-dev/patch/4fdc2aba6f5e51887d1cd0fee94be0989eada2cd.1586942312.git.christophe.leroy@c-s.fr/

As far as I understood that's mandatory on recent gcc to get the 
pre-update form of the instruction. With older versions "m" was doing 
the same, but not anymore. Should we ifdef the "m<>" or "m" based on GCC 
version ?

Christophe

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH v4 1/2] powerpc/uaccess: Implement unsafe_put_user() using 'asm goto'
  2020-05-05 15:40     ` Christophe Leroy
@ 2020-05-05 15:59       ` Segher Boessenkool
  -1 siblings, 0 replies; 38+ messages in thread
From: Segher Boessenkool @ 2020-05-05 15:59 UTC (permalink / raw)
  To: Christophe Leroy
  Cc: Michael Ellerman, Christophe Leroy, Benjamin Herrenschmidt,
	Paul Mackerras, npiggin, linux-kernel, linuxppc-dev

On Tue, May 05, 2020 at 05:40:21PM +0200, Christophe Leroy wrote:
> >>+#define __put_user_asm_goto(x, addr, label, op)			\
> >>+	asm volatile goto(					\
> >>+		"1:	" op "%U1%X1 %0,%1	# put_user\n"	\
> >>+		EX_TABLE(1b, %l2)				\
> >>+		:						\
> >>+		: "r" (x), "m<>" (*addr)				\
> >
> >The "m<>" here is breaking GCC 4.6.3, which we allegedly still support.
> >
> >Plain "m" works, how much does the "<>" affect code gen in practice?
> >
> >A quick diff here shows no difference from removing "<>".
> 
> It was recommended by Segher, there has been some discussion about it on 
> v1 of this patch, see 
> https://patchwork.ozlabs.org/project/linuxppc-dev/patch/4fdc2aba6f5e51887d1cd0fee94be0989eada2cd.1586942312.git.christophe.leroy@c-s.fr/
> 
> As far as I understood that's mandatory on recent gcc to get the 
> pre-update form of the instruction. With older versions "m" was doing 
> the same, but not anymore.

Yes.  How much that matters depends on the asm.  On older CPUs (6xx/7xx,
say) the update form was just as fast as the non-update form.  On newer
or bigger CPUs it is usually executed just the same as an add followed
by the memory access, so it just saves a bit of code size.

> Should we ifdef the "m<>" or "m" based on GCC 
> version ?

That will be a lot of churn.  Just make 4.8 minimum?


Segher

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH v4 1/2] powerpc/uaccess: Implement unsafe_put_user() using 'asm goto'
@ 2020-05-05 15:59       ` Segher Boessenkool
  0 siblings, 0 replies; 38+ messages in thread
From: Segher Boessenkool @ 2020-05-05 15:59 UTC (permalink / raw)
  To: Christophe Leroy; +Cc: linux-kernel, npiggin, Paul Mackerras, linuxppc-dev

On Tue, May 05, 2020 at 05:40:21PM +0200, Christophe Leroy wrote:
> >>+#define __put_user_asm_goto(x, addr, label, op)			\
> >>+	asm volatile goto(					\
> >>+		"1:	" op "%U1%X1 %0,%1	# put_user\n"	\
> >>+		EX_TABLE(1b, %l2)				\
> >>+		:						\
> >>+		: "r" (x), "m<>" (*addr)				\
> >
> >The "m<>" here is breaking GCC 4.6.3, which we allegedly still support.
> >
> >Plain "m" works, how much does the "<>" affect code gen in practice?
> >
> >A quick diff here shows no difference from removing "<>".
> 
> It was recommended by Segher, there has been some discussion about it on 
> v1 of this patch, see 
> https://patchwork.ozlabs.org/project/linuxppc-dev/patch/4fdc2aba6f5e51887d1cd0fee94be0989eada2cd.1586942312.git.christophe.leroy@c-s.fr/
> 
> As far as I understood that's mandatory on recent gcc to get the 
> pre-update form of the instruction. With older versions "m" was doing 
> the same, but not anymore.

Yes.  How much that matters depends on the asm.  On older CPUs (6xx/7xx,
say) the update form was just as fast as the non-update form.  On newer
or bigger CPUs it is usually executed just the same as an add followed
by the memory access, so it just saves a bit of code size.

> Should we ifdef the "m<>" or "m" based on GCC 
> version ?

That will be a lot of churn.  Just make 4.8 minimum?


Segher

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH v4 1/2] powerpc/uaccess: Implement unsafe_put_user() using 'asm goto'
  2020-05-05 15:32     ` Segher Boessenkool
@ 2020-05-06  0:58       ` Michael Ellerman
  -1 siblings, 0 replies; 38+ messages in thread
From: Michael Ellerman @ 2020-05-06  0:58 UTC (permalink / raw)
  To: Segher Boessenkool
  Cc: Christophe Leroy, Benjamin Herrenschmidt, Paul Mackerras,
	npiggin, linux-kernel, linuxppc-dev

Segher Boessenkool <segher@kernel.crashing.org> writes:
> Hi!
>
> On Wed, May 06, 2020 at 12:27:58AM +1000, Michael Ellerman wrote:
>> Christophe Leroy <christophe.leroy@c-s.fr> writes:
>> > unsafe_put_user() is designed to take benefit of 'asm goto'.
>> >
>> > Instead of using the standard __put_user() approach and branch
>> > based on the returned error, use 'asm goto' and make the
>> > exception code branch directly to the error label. There is
>> > no code anymore in the fixup section.
>> >
>> > This change significantly simplifies functions using
>> > unsafe_put_user()
>> >
>> ...
>> >
>> > Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
>> > ---
>> >  arch/powerpc/include/asm/uaccess.h | 61 +++++++++++++++++++++++++-----
>> >  1 file changed, 52 insertions(+), 9 deletions(-)
>> >
>> > diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
>> > index 9cc9c106ae2a..9365b59495a2 100644
>> > --- a/arch/powerpc/include/asm/uaccess.h
>> > +++ b/arch/powerpc/include/asm/uaccess.h
>> > @@ -196,6 +193,52 @@ do {								\
>> >  })
>> >  
>> >  
>> > +#define __put_user_asm_goto(x, addr, label, op)			\
>> > +	asm volatile goto(					\
>> > +		"1:	" op "%U1%X1 %0,%1	# put_user\n"	\
>> > +		EX_TABLE(1b, %l2)				\
>> > +		:						\
>> > +		: "r" (x), "m<>" (*addr)				\
>> 
>> The "m<>" here is breaking GCC 4.6.3, which we allegedly still support.
>
> [ You shouldn't use 4.6.3, there has been 4.6.4 since a while.  And 4.6
>   is nine years old now.  Most projects do not support < 4.8 anymore, on
>   any architecture.  ]

Moving up to 4.6.4 wouldn't actually help with this though would it?

Also I have 4.6.3 compilers already built, I don't really have time to
rebuild them for 4.6.4.

The kernel has a top-level minimum version, which I'm not in charge of, see:

https://www.kernel.org/doc/html/latest/process/changes.html?highlight=gcc


There were discussions about making 4.8 the minimum, but I'm not sure
where they got to.

>> Plain "m" works, how much does the "<>" affect code gen in practice?
>> 
>> A quick diff here shows no difference from removing "<>".
>
> It will make it impossible to use update-form instructions here.  That
> probably does not matter much at all, in this case.
>
> If you remove the "<>" constraints, also remove the "%Un" output modifier?

So like this?

diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index 62cc8d7640ec..ca847aed8e45 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -207,10 +207,10 @@ do {								\
 
 #define __put_user_asm_goto(x, addr, label, op)			\
 	asm volatile goto(					\
-		"1:	" op "%U1%X1 %0,%1	# put_user\n"	\
+		"1:	" op "%X1 %0,%1	# put_user\n"		\
 		EX_TABLE(1b, %l2)				\
 		:						\
-		: "r" (x), "m<>" (*addr)				\
+		: "r" (x), "m" (*addr)				\
 		:						\
 		: label)
 


cheers

^ permalink raw reply related	[flat|nested] 38+ messages in thread

* Re: [PATCH v4 1/2] powerpc/uaccess: Implement unsafe_put_user() using 'asm goto'
@ 2020-05-06  0:58       ` Michael Ellerman
  0 siblings, 0 replies; 38+ messages in thread
From: Michael Ellerman @ 2020-05-06  0:58 UTC (permalink / raw)
  To: Segher Boessenkool; +Cc: linux-kernel, npiggin, Paul Mackerras, linuxppc-dev

Segher Boessenkool <segher@kernel.crashing.org> writes:
> Hi!
>
> On Wed, May 06, 2020 at 12:27:58AM +1000, Michael Ellerman wrote:
>> Christophe Leroy <christophe.leroy@c-s.fr> writes:
>> > unsafe_put_user() is designed to take benefit of 'asm goto'.
>> >
>> > Instead of using the standard __put_user() approach and branch
>> > based on the returned error, use 'asm goto' and make the
>> > exception code branch directly to the error label. There is
>> > no code anymore in the fixup section.
>> >
>> > This change significantly simplifies functions using
>> > unsafe_put_user()
>> >
>> ...
>> >
>> > Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
>> > ---
>> >  arch/powerpc/include/asm/uaccess.h | 61 +++++++++++++++++++++++++-----
>> >  1 file changed, 52 insertions(+), 9 deletions(-)
>> >
>> > diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
>> > index 9cc9c106ae2a..9365b59495a2 100644
>> > --- a/arch/powerpc/include/asm/uaccess.h
>> > +++ b/arch/powerpc/include/asm/uaccess.h
>> > @@ -196,6 +193,52 @@ do {								\
>> >  })
>> >  
>> >  
>> > +#define __put_user_asm_goto(x, addr, label, op)			\
>> > +	asm volatile goto(					\
>> > +		"1:	" op "%U1%X1 %0,%1	# put_user\n"	\
>> > +		EX_TABLE(1b, %l2)				\
>> > +		:						\
>> > +		: "r" (x), "m<>" (*addr)				\
>> 
>> The "m<>" here is breaking GCC 4.6.3, which we allegedly still support.
>
> [ You shouldn't use 4.6.3, there has been 4.6.4 since a while.  And 4.6
>   is nine years old now.  Most projects do not support < 4.8 anymore, on
>   any architecture.  ]

Moving up to 4.6.4 wouldn't actually help with this though would it?

Also I have 4.6.3 compilers already built, I don't really have time to
rebuild them for 4.6.4.

The kernel has a top-level minimum version, which I'm not in charge of, see:

https://www.kernel.org/doc/html/latest/process/changes.html?highlight=gcc


There were discussions about making 4.8 the minimum, but I'm not sure
where they got to.

>> Plain "m" works, how much does the "<>" affect code gen in practice?
>> 
>> A quick diff here shows no difference from removing "<>".
>
> It will make it impossible to use update-form instructions here.  That
> probably does not matter much at all, in this case.
>
> If you remove the "<>" constraints, also remove the "%Un" output modifier?

So like this?

diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index 62cc8d7640ec..ca847aed8e45 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -207,10 +207,10 @@ do {								\
 
 #define __put_user_asm_goto(x, addr, label, op)			\
 	asm volatile goto(					\
-		"1:	" op "%U1%X1 %0,%1	# put_user\n"	\
+		"1:	" op "%X1 %0,%1	# put_user\n"		\
 		EX_TABLE(1b, %l2)				\
 		:						\
-		: "r" (x), "m<>" (*addr)				\
+		: "r" (x), "m" (*addr)				\
 		:						\
 		: label)
 


cheers

^ permalink raw reply related	[flat|nested] 38+ messages in thread

* Re: [PATCH v4 1/2] powerpc/uaccess: Implement unsafe_put_user() using 'asm goto'
  2020-05-05 15:59       ` Segher Boessenkool
@ 2020-05-06  1:36         ` Michael Ellerman
  -1 siblings, 0 replies; 38+ messages in thread
From: Michael Ellerman @ 2020-05-06  1:36 UTC (permalink / raw)
  To: Segher Boessenkool, Christophe Leroy
  Cc: Christophe Leroy, Benjamin Herrenschmidt, Paul Mackerras,
	npiggin, linux-kernel, linuxppc-dev

Segher Boessenkool <segher@kernel.crashing.org> writes:
> On Tue, May 05, 2020 at 05:40:21PM +0200, Christophe Leroy wrote:
>> >>+#define __put_user_asm_goto(x, addr, label, op)			\
>> >>+	asm volatile goto(					\
>> >>+		"1:	" op "%U1%X1 %0,%1	# put_user\n"	\
>> >>+		EX_TABLE(1b, %l2)				\
>> >>+		:						\
>> >>+		: "r" (x), "m<>" (*addr)				\
>> >
>> >The "m<>" here is breaking GCC 4.6.3, which we allegedly still support.
>> >
>> >Plain "m" works, how much does the "<>" affect code gen in practice?
>> >
>> >A quick diff here shows no difference from removing "<>".
>> 
>> It was recommended by Segher, there has been some discussion about it on 
>> v1 of this patch, see 
>> https://patchwork.ozlabs.org/project/linuxppc-dev/patch/4fdc2aba6f5e51887d1cd0fee94be0989eada2cd.1586942312.git.christophe.leroy@c-s.fr/
>> 
>> As far as I understood that's mandatory on recent gcc to get the 
>> pre-update form of the instruction. With older versions "m" was doing 
>> the same, but not anymore.
>
> Yes.  How much that matters depends on the asm.  On older CPUs (6xx/7xx,
> say) the update form was just as fast as the non-update form.  On newer
> or bigger CPUs it is usually executed just the same as an add followed
> by the memory access, so it just saves a bit of code size.

The update-forms are stdux, sthux etc. right?

I don't see any change in the number of those with or without the
constraint. That's using GCC 9.3.0.

>> Should we ifdef the "m<>" or "m" based on GCC 
>> version ?
>
> That will be a lot of churn.  Just make 4.8 minimum?

As I said in my other mail that's not really up to us. We could mandate
a higher minimum for powerpc, but I'd rather not.

I think for now I'm inclined to just drop the "<>", and we can revisit
in a release or two when hopefully GCC 4.8 has become the minimum.

cheers

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH v4 1/2] powerpc/uaccess: Implement unsafe_put_user() using 'asm goto'
@ 2020-05-06  1:36         ` Michael Ellerman
  0 siblings, 0 replies; 38+ messages in thread
From: Michael Ellerman @ 2020-05-06  1:36 UTC (permalink / raw)
  To: Segher Boessenkool, Christophe Leroy
  Cc: linux-kernel, npiggin, Paul Mackerras, linuxppc-dev

Segher Boessenkool <segher@kernel.crashing.org> writes:
> On Tue, May 05, 2020 at 05:40:21PM +0200, Christophe Leroy wrote:
>> >>+#define __put_user_asm_goto(x, addr, label, op)			\
>> >>+	asm volatile goto(					\
>> >>+		"1:	" op "%U1%X1 %0,%1	# put_user\n"	\
>> >>+		EX_TABLE(1b, %l2)				\
>> >>+		:						\
>> >>+		: "r" (x), "m<>" (*addr)				\
>> >
>> >The "m<>" here is breaking GCC 4.6.3, which we allegedly still support.
>> >
>> >Plain "m" works, how much does the "<>" affect code gen in practice?
>> >
>> >A quick diff here shows no difference from removing "<>".
>> 
>> It was recommended by Segher, there has been some discussion about it on 
>> v1 of this patch, see 
>> https://patchwork.ozlabs.org/project/linuxppc-dev/patch/4fdc2aba6f5e51887d1cd0fee94be0989eada2cd.1586942312.git.christophe.leroy@c-s.fr/
>> 
>> As far as I understood that's mandatory on recent gcc to get the 
>> pre-update form of the instruction. With older versions "m" was doing 
>> the same, but not anymore.
>
> Yes.  How much that matters depends on the asm.  On older CPUs (6xx/7xx,
> say) the update form was just as fast as the non-update form.  On newer
> or bigger CPUs it is usually executed just the same as an add followed
> by the memory access, so it just saves a bit of code size.

The update-forms are stdux, sthux etc. right?

I don't see any change in the number of those with or without the
constraint. That's using GCC 9.3.0.

>> Should we ifdef the "m<>" or "m" based on GCC 
>> version ?
>
> That will be a lot of churn.  Just make 4.8 minimum?

As I said in my other mail that's not really up to us. We could mandate
a higher minimum for powerpc, but I'd rather not.

I think for now I'm inclined to just drop the "<>", and we can revisit
in a release or two when hopefully GCC 4.8 has become the minimum.

cheers

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH v4 1/2] powerpc/uaccess: Implement unsafe_put_user() using 'asm goto'
  2020-05-06  0:58       ` Michael Ellerman
@ 2020-05-06 17:58         ` Segher Boessenkool
  -1 siblings, 0 replies; 38+ messages in thread
From: Segher Boessenkool @ 2020-05-06 17:58 UTC (permalink / raw)
  To: Michael Ellerman
  Cc: Christophe Leroy, Benjamin Herrenschmidt, Paul Mackerras,
	npiggin, linux-kernel, linuxppc-dev

On Wed, May 06, 2020 at 10:58:55AM +1000, Michael Ellerman wrote:
> >> The "m<>" here is breaking GCC 4.6.3, which we allegedly still support.
> >
> > [ You shouldn't use 4.6.3, there has been 4.6.4 since a while.  And 4.6
> >   is nine years old now.  Most projects do not support < 4.8 anymore, on
> >   any architecture.  ]
> 
> Moving up to 4.6.4 wouldn't actually help with this though would it?

Nope.  But 4.6.4 is a bug-fix release, 91 bugs fixed since 4.6.3, so you
should switch to it if you can :-)

> Also I have 4.6.3 compilers already built, I don't really have time to
> rebuild them for 4.6.4.
> 
> The kernel has a top-level minimum version, which I'm not in charge of, see:
> 
> https://www.kernel.org/doc/html/latest/process/changes.html?highlight=gcc

Yes, I know.  And it is much preferred not to have stricter requirements
for Power, I know that too.  Something has to give though :-/

> There were discussions about making 4.8 the minimum, but I'm not sure
> where they got to.

Yeah, just petered out I think?

All significant distros come with a 4.8 as system compiler.

> >> Plain "m" works, how much does the "<>" affect code gen in practice?
> >> 
> >> A quick diff here shows no difference from removing "<>".
> >
> > It will make it impossible to use update-form instructions here.  That
> > probably does not matter much at all, in this case.
> >
> > If you remove the "<>" constraints, also remove the "%Un" output modifier?
> 
> So like this?
> 
> diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
> index 62cc8d7640ec..ca847aed8e45 100644
> --- a/arch/powerpc/include/asm/uaccess.h
> +++ b/arch/powerpc/include/asm/uaccess.h
> @@ -207,10 +207,10 @@ do {								\
>  
>  #define __put_user_asm_goto(x, addr, label, op)			\
>  	asm volatile goto(					\
> -		"1:	" op "%U1%X1 %0,%1	# put_user\n"	\
> +		"1:	" op "%X1 %0,%1	# put_user\n"		\
>  		EX_TABLE(1b, %l2)				\
>  		:						\
> -		: "r" (x), "m<>" (*addr)				\
> +		: "r" (x), "m" (*addr)				\
>  		:						\
>  		: label)

Like that.  But you will have to do that to *all* places we use the "<>"
constraints, or wait for more stuff to fail?  And, there probably are
places we *do* want update form insns used (they do help in some loops,
for example)?


Segher

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH v4 1/2] powerpc/uaccess: Implement unsafe_put_user() using 'asm goto'
@ 2020-05-06 17:58         ` Segher Boessenkool
  0 siblings, 0 replies; 38+ messages in thread
From: Segher Boessenkool @ 2020-05-06 17:58 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: linux-kernel, npiggin, Paul Mackerras, linuxppc-dev

On Wed, May 06, 2020 at 10:58:55AM +1000, Michael Ellerman wrote:
> >> The "m<>" here is breaking GCC 4.6.3, which we allegedly still support.
> >
> > [ You shouldn't use 4.6.3, there has been 4.6.4 since a while.  And 4.6
> >   is nine years old now.  Most projects do not support < 4.8 anymore, on
> >   any architecture.  ]
> 
> Moving up to 4.6.4 wouldn't actually help with this though would it?

Nope.  But 4.6.4 is a bug-fix release, 91 bugs fixed since 4.6.3, so you
should switch to it if you can :-)

> Also I have 4.6.3 compilers already built, I don't really have time to
> rebuild them for 4.6.4.
> 
> The kernel has a top-level minimum version, which I'm not in charge of, see:
> 
> https://www.kernel.org/doc/html/latest/process/changes.html?highlight=gcc

Yes, I know.  And it is much preferred not to have stricter requirements
for Power, I know that too.  Something has to give though :-/

> There were discussions about making 4.8 the minimum, but I'm not sure
> where they got to.

Yeah, just petered out I think?

All significant distros come with a 4.8 as system compiler.

> >> Plain "m" works, how much does the "<>" affect code gen in practice?
> >> 
> >> A quick diff here shows no difference from removing "<>".
> >
> > It will make it impossible to use update-form instructions here.  That
> > probably does not matter much at all, in this case.
> >
> > If you remove the "<>" constraints, also remove the "%Un" output modifier?
> 
> So like this?
> 
> diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
> index 62cc8d7640ec..ca847aed8e45 100644
> --- a/arch/powerpc/include/asm/uaccess.h
> +++ b/arch/powerpc/include/asm/uaccess.h
> @@ -207,10 +207,10 @@ do {								\
>  
>  #define __put_user_asm_goto(x, addr, label, op)			\
>  	asm volatile goto(					\
> -		"1:	" op "%U1%X1 %0,%1	# put_user\n"	\
> +		"1:	" op "%X1 %0,%1	# put_user\n"		\
>  		EX_TABLE(1b, %l2)				\
>  		:						\
> -		: "r" (x), "m<>" (*addr)				\
> +		: "r" (x), "m" (*addr)				\
>  		:						\
>  		: label)

Like that.  But you will have to do that to *all* places we use the "<>"
constraints, or wait for more stuff to fail?  And, there probably are
places we *do* want update form insns used (they do help in some loops,
for example)?


Segher

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH v4 1/2] powerpc/uaccess: Implement unsafe_put_user() using 'asm goto'
  2020-05-06  1:36         ` Michael Ellerman
@ 2020-05-06 18:09           ` Segher Boessenkool
  -1 siblings, 0 replies; 38+ messages in thread
From: Segher Boessenkool @ 2020-05-06 18:09 UTC (permalink / raw)
  To: Michael Ellerman
  Cc: Christophe Leroy, Christophe Leroy, Benjamin Herrenschmidt,
	Paul Mackerras, npiggin, linux-kernel, linuxppc-dev

On Wed, May 06, 2020 at 11:36:00AM +1000, Michael Ellerman wrote:
> >> As far as I understood that's mandatory on recent gcc to get the 
> >> pre-update form of the instruction. With older versions "m" was doing 
> >> the same, but not anymore.
> >
> > Yes.  How much that matters depends on the asm.  On older CPUs (6xx/7xx,
> > say) the update form was just as fast as the non-update form.  On newer
> > or bigger CPUs it is usually executed just the same as an add followed
> > by the memory access, so it just saves a bit of code size.
> 
> The update-forms are stdux, sthux etc. right?

And stdu, sthu, etc.  "x" is "indexed form" (reg+reg addressing).

> I don't see any change in the number of those with or without the
> constraint. That's using GCC 9.3.0.

It's most useful in loops (and happens more often there).  You probably
do not have many loops that allow update form insns.

> >> Should we ifdef the "m<>" or "m" based on GCC 
> >> version ?
> >
> > That will be a lot of churn.  Just make 4.8 minimum?
> 
> As I said in my other mail that's not really up to us. We could mandate
> a higher minimum for powerpc, but I'd rather not.

Yeah, I quite understand that.

> I think for now I'm inclined to just drop the "<>", and we can revisit
> in a release or two when hopefully GCC 4.8 has become the minimum.

An unhappy resolution, but it leaves a light on the horizon :-)

In that case, leave the "%Un", if you will but the "<>" back soonish?
Not much point removing it and putting it back later (it is harmless,
there are more instances of it in the kernel as well, since many years).

Thanks!


Segher

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH v4 1/2] powerpc/uaccess: Implement unsafe_put_user() using 'asm goto'
@ 2020-05-06 18:09           ` Segher Boessenkool
  0 siblings, 0 replies; 38+ messages in thread
From: Segher Boessenkool @ 2020-05-06 18:09 UTC (permalink / raw)
  To: Michael Ellerman
  Cc: linux-kernel, npiggin, Paul Mackerras, Christophe Leroy, linuxppc-dev

On Wed, May 06, 2020 at 11:36:00AM +1000, Michael Ellerman wrote:
> >> As far as I understood that's mandatory on recent gcc to get the 
> >> pre-update form of the instruction. With older versions "m" was doing 
> >> the same, but not anymore.
> >
> > Yes.  How much that matters depends on the asm.  On older CPUs (6xx/7xx,
> > say) the update form was just as fast as the non-update form.  On newer
> > or bigger CPUs it is usually executed just the same as an add followed
> > by the memory access, so it just saves a bit of code size.
> 
> The update-forms are stdux, sthux etc. right?

And stdu, sthu, etc.  "x" is "indexed form" (reg+reg addressing).

> I don't see any change in the number of those with or without the
> constraint. That's using GCC 9.3.0.

It's most useful in loops (and happens more often there).  You probably
do not have many loops that allow update form insns.

> >> Should we ifdef the "m<>" or "m" based on GCC 
> >> version ?
> >
> > That will be a lot of churn.  Just make 4.8 minimum?
> 
> As I said in my other mail that's not really up to us. We could mandate
> a higher minimum for powerpc, but I'd rather not.

Yeah, I quite understand that.

> I think for now I'm inclined to just drop the "<>", and we can revisit
> in a release or two when hopefully GCC 4.8 has become the minimum.

An unhappy resolution, but it leaves a light on the horizon :-)

In that case, leave the "%Un", if you will but the "<>" back soonish?
Not much point removing it and putting it back later (it is harmless,
there are more instances of it in the kernel as well, since many years).

Thanks!


Segher

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH v4 1/2] powerpc/uaccess: Implement unsafe_put_user() using 'asm goto'
  2020-05-06 17:58         ` Segher Boessenkool
@ 2020-05-06 18:10           ` Christophe Leroy
  -1 siblings, 0 replies; 38+ messages in thread
From: Christophe Leroy @ 2020-05-06 18:10 UTC (permalink / raw)
  To: Segher Boessenkool, Michael Ellerman
  Cc: Christophe Leroy, Benjamin Herrenschmidt, Paul Mackerras,
	npiggin, linux-kernel, linuxppc-dev



Le 06/05/2020 à 19:58, Segher Boessenkool a écrit :
> On Wed, May 06, 2020 at 10:58:55AM +1000, Michael Ellerman wrote:
>>>> The "m<>" here is breaking GCC 4.6.3, which we allegedly still support.
>>>
>>> [ You shouldn't use 4.6.3, there has been 4.6.4 since a while.  And 4.6
>>>    is nine years old now.  Most projects do not support < 4.8 anymore, on
>>>    any architecture.  ]
>>
>> Moving up to 4.6.4 wouldn't actually help with this though would it?
> 
> Nope.  But 4.6.4 is a bug-fix release, 91 bugs fixed since 4.6.3, so you
> should switch to it if you can :-)
> 
>> Also I have 4.6.3 compilers already built, I don't really have time to
>> rebuild them for 4.6.4.
>>
>> The kernel has a top-level minimum version, which I'm not in charge of, see:
>>
>> https://www.kernel.org/doc/html/latest/process/changes.html?highlight=gcc
> 
> Yes, I know.  And it is much preferred not to have stricter requirements
> for Power, I know that too.  Something has to give though :-/
> 
>> There were discussions about making 4.8 the minimum, but I'm not sure
>> where they got to.
> 
> Yeah, just petered out I think?
> 
> All significant distros come with a 4.8 as system compiler.
> 
>>>> Plain "m" works, how much does the "<>" affect code gen in practice?
>>>>
>>>> A quick diff here shows no difference from removing "<>".
>>>
>>> It will make it impossible to use update-form instructions here.  That
>>> probably does not matter much at all, in this case.
>>>
>>> If you remove the "<>" constraints, also remove the "%Un" output modifier?
>>
>> So like this?
>>
>> diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
>> index 62cc8d7640ec..ca847aed8e45 100644
>> --- a/arch/powerpc/include/asm/uaccess.h
>> +++ b/arch/powerpc/include/asm/uaccess.h
>> @@ -207,10 +207,10 @@ do {								\
>>   
>>   #define __put_user_asm_goto(x, addr, label, op)			\
>>   	asm volatile goto(					\
>> -		"1:	" op "%U1%X1 %0,%1	# put_user\n"	\
>> +		"1:	" op "%X1 %0,%1	# put_user\n"		\
>>   		EX_TABLE(1b, %l2)				\
>>   		:						\
>> -		: "r" (x), "m<>" (*addr)				\
>> +		: "r" (x), "m" (*addr)				\
>>   		:						\
>>   		: label)
> 
> Like that.  But you will have to do that to *all* places we use the "<>"
> constraints, or wait for more stuff to fail?  And, there probably are
> places we *do* want update form insns used (they do help in some loops,
> for example)?
> 

AFAICT, git grep "m<>" provides no result.

However many places have %Ux:

arch/powerpc/boot/io.h:	__asm__ __volatile__("lbz%U1%X1 %0,%1; twi 
0,%0,0; isync"
arch/powerpc/boot/io.h:	__asm__ __volatile__("stb%U0%X0 %1,%0; sync"
arch/powerpc/boot/io.h:	__asm__ __volatile__("lhz%U1%X1 %0,%1; twi 
0,%0,0; isync"
arch/powerpc/boot/io.h:	__asm__ __volatile__("sth%U0%X0 %1,%0; sync"
arch/powerpc/boot/io.h:	__asm__ __volatile__("lwz%U1%X1 %0,%1; twi 
0,%0,0; isync"
arch/powerpc/boot/io.h:	__asm__ __volatile__("stw%U0%X0 %1,%0; sync"
arch/powerpc/include/asm/atomic.h:	__asm__ __volatile__("lwz%U1%X1 
%0,%1" : "=r"(t) : "m"(v->counter));
arch/powerpc/include/asm/atomic.h:	__asm__ __volatile__("stw%U0%X0 
%1,%0" : "=m"(v->counter) : "r"(i));
arch/powerpc/include/asm/atomic.h:	__asm__ __volatile__("ld%U1%X1 %0,%1" 
: "=r"(t) : "m"(v->counter));
arch/powerpc/include/asm/atomic.h:	__asm__ __volatile__("std%U0%X0 
%1,%0" : "=m"(v->counter) : "r"(i));
arch/powerpc/include/asm/book3s/32/pgtable.h:		stw%U0%X0 %2,%0\n\
arch/powerpc/include/asm/book3s/32/pgtable.h:		stw%U0%X0 %L2,%1"
arch/powerpc/include/asm/io.h:	__asm__ __volatile__("sync;"#insn"%U1%X1 
%0,%1;twi 0,%0,0;isync"\
arch/powerpc/include/asm/io.h:	__asm__ __volatile__("sync;"#insn"%U0%X0 
%1,%0"			\
arch/powerpc/include/asm/nohash/pgtable.h:			stw%U0%X0 %2,%0\n\
arch/powerpc/include/asm/nohash/pgtable.h:			stw%U0%X0 %L2,%1"
arch/powerpc/kvm/powerpc.c:	asm ("lfs%U1%X1 0,%1; stfd%U0%X0 0,%0" : 
"=m" (fprd) : "m" (fprs)
arch/powerpc/kvm/powerpc.c:	asm ("lfd%U1%X1 0,%1; stfs%U0%X0 0,%0" : 
"=m" (fprs) : "m" (fprd)


Christophe

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH v4 1/2] powerpc/uaccess: Implement unsafe_put_user() using 'asm goto'
@ 2020-05-06 18:10           ` Christophe Leroy
  0 siblings, 0 replies; 38+ messages in thread
From: Christophe Leroy @ 2020-05-06 18:10 UTC (permalink / raw)
  To: Segher Boessenkool, Michael Ellerman
  Cc: linux-kernel, npiggin, Paul Mackerras, linuxppc-dev



Le 06/05/2020 à 19:58, Segher Boessenkool a écrit :
> On Wed, May 06, 2020 at 10:58:55AM +1000, Michael Ellerman wrote:
>>>> The "m<>" here is breaking GCC 4.6.3, which we allegedly still support.
>>>
>>> [ You shouldn't use 4.6.3, there has been 4.6.4 since a while.  And 4.6
>>>    is nine years old now.  Most projects do not support < 4.8 anymore, on
>>>    any architecture.  ]
>>
>> Moving up to 4.6.4 wouldn't actually help with this though would it?
> 
> Nope.  But 4.6.4 is a bug-fix release, 91 bugs fixed since 4.6.3, so you
> should switch to it if you can :-)
> 
>> Also I have 4.6.3 compilers already built, I don't really have time to
>> rebuild them for 4.6.4.
>>
>> The kernel has a top-level minimum version, which I'm not in charge of, see:
>>
>> https://www.kernel.org/doc/html/latest/process/changes.html?highlight=gcc
> 
> Yes, I know.  And it is much preferred not to have stricter requirements
> for Power, I know that too.  Something has to give though :-/
> 
>> There were discussions about making 4.8 the minimum, but I'm not sure
>> where they got to.
> 
> Yeah, just petered out I think?
> 
> All significant distros come with a 4.8 as system compiler.
> 
>>>> Plain "m" works, how much does the "<>" affect code gen in practice?
>>>>
>>>> A quick diff here shows no difference from removing "<>".
>>>
>>> It will make it impossible to use update-form instructions here.  That
>>> probably does not matter much at all, in this case.
>>>
>>> If you remove the "<>" constraints, also remove the "%Un" output modifier?
>>
>> So like this?
>>
>> diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
>> index 62cc8d7640ec..ca847aed8e45 100644
>> --- a/arch/powerpc/include/asm/uaccess.h
>> +++ b/arch/powerpc/include/asm/uaccess.h
>> @@ -207,10 +207,10 @@ do {								\
>>   
>>   #define __put_user_asm_goto(x, addr, label, op)			\
>>   	asm volatile goto(					\
>> -		"1:	" op "%U1%X1 %0,%1	# put_user\n"	\
>> +		"1:	" op "%X1 %0,%1	# put_user\n"		\
>>   		EX_TABLE(1b, %l2)				\
>>   		:						\
>> -		: "r" (x), "m<>" (*addr)				\
>> +		: "r" (x), "m" (*addr)				\
>>   		:						\
>>   		: label)
> 
> Like that.  But you will have to do that to *all* places we use the "<>"
> constraints, or wait for more stuff to fail?  And, there probably are
> places we *do* want update form insns used (they do help in some loops,
> for example)?
> 

AFAICT, git grep "m<>" provides no result.

However many places have %Ux:

arch/powerpc/boot/io.h:	__asm__ __volatile__("lbz%U1%X1 %0,%1; twi 
0,%0,0; isync"
arch/powerpc/boot/io.h:	__asm__ __volatile__("stb%U0%X0 %1,%0; sync"
arch/powerpc/boot/io.h:	__asm__ __volatile__("lhz%U1%X1 %0,%1; twi 
0,%0,0; isync"
arch/powerpc/boot/io.h:	__asm__ __volatile__("sth%U0%X0 %1,%0; sync"
arch/powerpc/boot/io.h:	__asm__ __volatile__("lwz%U1%X1 %0,%1; twi 
0,%0,0; isync"
arch/powerpc/boot/io.h:	__asm__ __volatile__("stw%U0%X0 %1,%0; sync"
arch/powerpc/include/asm/atomic.h:	__asm__ __volatile__("lwz%U1%X1 
%0,%1" : "=r"(t) : "m"(v->counter));
arch/powerpc/include/asm/atomic.h:	__asm__ __volatile__("stw%U0%X0 
%1,%0" : "=m"(v->counter) : "r"(i));
arch/powerpc/include/asm/atomic.h:	__asm__ __volatile__("ld%U1%X1 %0,%1" 
: "=r"(t) : "m"(v->counter));
arch/powerpc/include/asm/atomic.h:	__asm__ __volatile__("std%U0%X0 
%1,%0" : "=m"(v->counter) : "r"(i));
arch/powerpc/include/asm/book3s/32/pgtable.h:		stw%U0%X0 %2,%0\n\
arch/powerpc/include/asm/book3s/32/pgtable.h:		stw%U0%X0 %L2,%1"
arch/powerpc/include/asm/io.h:	__asm__ __volatile__("sync;"#insn"%U1%X1 
%0,%1;twi 0,%0,0;isync"\
arch/powerpc/include/asm/io.h:	__asm__ __volatile__("sync;"#insn"%U0%X0 
%1,%0"			\
arch/powerpc/include/asm/nohash/pgtable.h:			stw%U0%X0 %2,%0\n\
arch/powerpc/include/asm/nohash/pgtable.h:			stw%U0%X0 %L2,%1"
arch/powerpc/kvm/powerpc.c:	asm ("lfs%U1%X1 0,%1; stfd%U0%X0 0,%0" : 
"=m" (fprd) : "m" (fprs)
arch/powerpc/kvm/powerpc.c:	asm ("lfd%U1%X1 0,%1; stfs%U0%X0 0,%0" : 
"=m" (fprs) : "m" (fprd)


Christophe

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH v4 1/2] powerpc/uaccess: Implement unsafe_put_user() using 'asm goto'
  2020-05-06 18:10           ` Christophe Leroy
@ 2020-05-06 22:18             ` Segher Boessenkool
  -1 siblings, 0 replies; 38+ messages in thread
From: Segher Boessenkool @ 2020-05-06 22:18 UTC (permalink / raw)
  To: Christophe Leroy
  Cc: Michael Ellerman, Christophe Leroy, Benjamin Herrenschmidt,
	Paul Mackerras, npiggin, linux-kernel, linuxppc-dev

On Wed, May 06, 2020 at 08:10:57PM +0200, Christophe Leroy wrote:
> Le 06/05/2020 à 19:58, Segher Boessenkool a écrit :
> >>  #define __put_user_asm_goto(x, addr, label, op)			\
> >>  	asm volatile goto(					\
> >>-		"1:	" op "%U1%X1 %0,%1	# put_user\n"	\
> >>+		"1:	" op "%X1 %0,%1	# put_user\n"		\
> >>  		EX_TABLE(1b, %l2)				\
> >>  		:						\
> >>-		: "r" (x), "m<>" (*addr)				\
> >>+		: "r" (x), "m" (*addr)				\
> >>  		:						\
> >>  		: label)
> >
> >Like that.  But you will have to do that to *all* places we use the "<>"
> >constraints, or wait for more stuff to fail?  And, there probably are
> >places we *do* want update form insns used (they do help in some loops,
> >for example)?
> >
> 
> AFAICT, git grep "m<>" provides no result.

Ah, okay.

> However many places have %Ux:

<snip>

Yes, all of those are from when "m" still meant what "m<>" is now.  For
seeing how many update form insns can be generated (and how much that
matters), these all should be fixed, at a minimum.


Segher

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH v4 1/2] powerpc/uaccess: Implement unsafe_put_user() using 'asm goto'
@ 2020-05-06 22:18             ` Segher Boessenkool
  0 siblings, 0 replies; 38+ messages in thread
From: Segher Boessenkool @ 2020-05-06 22:18 UTC (permalink / raw)
  To: Christophe Leroy; +Cc: linux-kernel, npiggin, Paul Mackerras, linuxppc-dev

On Wed, May 06, 2020 at 08:10:57PM +0200, Christophe Leroy wrote:
> Le 06/05/2020 à 19:58, Segher Boessenkool a écrit :
> >>  #define __put_user_asm_goto(x, addr, label, op)			\
> >>  	asm volatile goto(					\
> >>-		"1:	" op "%U1%X1 %0,%1	# put_user\n"	\
> >>+		"1:	" op "%X1 %0,%1	# put_user\n"		\
> >>  		EX_TABLE(1b, %l2)				\
> >>  		:						\
> >>-		: "r" (x), "m<>" (*addr)				\
> >>+		: "r" (x), "m" (*addr)				\
> >>  		:						\
> >>  		: label)
> >
> >Like that.  But you will have to do that to *all* places we use the "<>"
> >constraints, or wait for more stuff to fail?  And, there probably are
> >places we *do* want update form insns used (they do help in some loops,
> >for example)?
> >
> 
> AFAICT, git grep "m<>" provides no result.

Ah, okay.

> However many places have %Ux:

<snip>

Yes, all of those are from when "m" still meant what "m<>" is now.  For
seeing how many update form insns can be generated (and how much that
matters), these all should be fixed, at a minimum.


Segher

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH v4 1/2] powerpc/uaccess: Implement unsafe_put_user() using 'asm goto'
  2020-04-17 17:08 ` Christophe Leroy
                   ` (2 preceding siblings ...)
  (?)
@ 2020-05-29  4:24 ` Michael Ellerman
  2020-06-11 22:43     ` Nick Desaulniers
  -1 siblings, 1 reply; 38+ messages in thread
From: Michael Ellerman @ 2020-05-29  4:24 UTC (permalink / raw)
  To: Christophe Leroy, Benjamin Herrenschmidt, Paul Mackerras,
	npiggin, segher
  Cc: linuxppc-dev, linux-kernel

On Fri, 2020-04-17 at 17:08:51 UTC, Christophe Leroy wrote:
> unsafe_put_user() is designed to take benefit of 'asm goto'.
> 
> Instead of using the standard __put_user() approach and branch
> based on the returned error, use 'asm goto' and make the
> exception code branch directly to the error label. There is
> no code anymore in the fixup section.
> 
> This change significantly simplifies functions using
> unsafe_put_user()
...
> 
> Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
> Reviewed-by: Segher Boessenkool <segher@kernel.crashing.org>

Applied to powerpc topic/uaccess-ppc, thanks.

https://git.kernel.org/powerpc/c/334710b1496af8a0960e70121f850e209c20958f

cheers

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH v4 2/2] powerpc/uaccess: Implement unsafe_copy_to_user() as a simple loop
  2020-04-17 17:08   ` Christophe Leroy
  (?)
@ 2020-05-29  4:24   ` Michael Ellerman
  -1 siblings, 0 replies; 38+ messages in thread
From: Michael Ellerman @ 2020-05-29  4:24 UTC (permalink / raw)
  To: Christophe Leroy, Benjamin Herrenschmidt, Paul Mackerras,
	npiggin, segher
  Cc: linuxppc-dev, linux-kernel

On Fri, 2020-04-17 at 17:08:52 UTC, Christophe Leroy wrote:
> At the time being, unsafe_copy_to_user() is based on
> raw_copy_to_user() which calls __copy_tofrom_user().
> 
> __copy_tofrom_user() is a big optimised function to copy big amount
> of data. It aligns destinations to cache line in order to use
> dcbz instruction.
> 
> Today unsafe_copy_to_user() is called only from filldir().
> It is used to mainly copy small amount of data like filenames,
> so __copy_tofrom_user() is not fit.
> 
> Also, unsafe_copy_to_user() is used within user_access_begin/end
> sections. In those section, it is preferable to not call functions.
> 
> Rewrite unsafe_copy_to_user() as a macro that uses __put_user_goto().
> We first perform a loop of long, then we finish with necessary
> complements.
> 
> unsafe_copy_to_user() might be used in the near future to copy
> fixed-size data, like pt_regs structs during signal processing.
> Having it as a macro allows GCC to optimise it for instead when
> it knows the size in advance, it can unloop loops, drop complements
> when the size is a multiple of longs, etc ...
> 
> Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>

Applied to powerpc topic/uaccess-ppc, thanks.

https://git.kernel.org/powerpc/c/17bc43367fc2a720400d21c745db641c654c1e6b

cheers

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH v4 1/2] powerpc/uaccess: Implement unsafe_put_user() using 'asm goto'
  2020-05-29  4:24 ` Michael Ellerman
@ 2020-06-11 22:43     ` Nick Desaulniers
  0 siblings, 0 replies; 38+ messages in thread
From: Nick Desaulniers @ 2020-06-11 22:43 UTC (permalink / raw)
  To: patch-notifications, christophe.leroy, segher
  Cc: benh, linux-kernel, linuxppc-dev, npiggin, paulus, clang-built-linux

Hello! It seems this patch broke our ppc32 builds, and we had to
disable them [0]. :(

From what I can tell, though Michael mentioned this was merged on May
29, but our CI of -next was green for ppc32 until June 4, then mainline
went red June 6. So this patch only got 2 days of soak time before the
merge window opened.

A general issue with the -next workflow seems to be that patches get
different amounts of soak time. For higher risk patches like this one,
can I please ask that they be help back a release if close to the merge
window?

Segher, Cristophe, I suspect Clang is missing support for the %L and %U
output templates [1]. I've implemented support for some of these before
in Clang via the documentation at [2], but these seem to be machine
specific? Can you please point me to documentation/unit tests/source for
these so that I can figure out what they should be doing, and look into
implementing them in Clang?

[0] https://github.com/ClangBuiltLinux/continuous-integration/pull/279
[1] https://bugs.llvm.org/show_bug.cgi?id=46186
[2]
https://gcc.gnu.org/onlinedocs/gccint/Output-Template.html#Output-Template

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH v4 1/2] powerpc/uaccess: Implement unsafe_put_user() using 'asm goto'
@ 2020-06-11 22:43     ` Nick Desaulniers
  0 siblings, 0 replies; 38+ messages in thread
From: Nick Desaulniers @ 2020-06-11 22:43 UTC (permalink / raw)
  To: patch-notifications, christophe.leroy, segher
  Cc: linux-kernel, npiggin, clang-built-linux, paulus, linuxppc-dev

Hello! It seems this patch broke our ppc32 builds, and we had to
disable them [0]. :(

From what I can tell, though Michael mentioned this was merged on May
29, but our CI of -next was green for ppc32 until June 4, then mainline
went red June 6. So this patch only got 2 days of soak time before the
merge window opened.

A general issue with the -next workflow seems to be that patches get
different amounts of soak time. For higher risk patches like this one,
can I please ask that they be help back a release if close to the merge
window?

Segher, Cristophe, I suspect Clang is missing support for the %L and %U
output templates [1]. I've implemented support for some of these before
in Clang via the documentation at [2], but these seem to be machine
specific? Can you please point me to documentation/unit tests/source for
these so that I can figure out what they should be doing, and look into
implementing them in Clang?

[0] https://github.com/ClangBuiltLinux/continuous-integration/pull/279
[1] https://bugs.llvm.org/show_bug.cgi?id=46186
[2]
https://gcc.gnu.org/onlinedocs/gccint/Output-Template.html#Output-Template

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH v4 1/2] powerpc/uaccess: Implement unsafe_put_user() using 'asm goto'
  2020-06-11 22:43     ` Nick Desaulniers
@ 2020-06-11 23:52       ` Segher Boessenkool
  -1 siblings, 0 replies; 38+ messages in thread
From: Segher Boessenkool @ 2020-06-11 23:52 UTC (permalink / raw)
  To: Nick Desaulniers
  Cc: patch-notifications, christophe.leroy, benh, linux-kernel,
	linuxppc-dev, npiggin, paulus, clang-built-linux

On Thu, Jun 11, 2020 at 03:43:55PM -0700, Nick Desaulniers wrote:
> Segher, Cristophe, I suspect Clang is missing support for the %L and %U
> output templates [1].

The arch/powerpc kernel first used the %U output modifier in 0c176fa80fdf
(from 2016), and %L in b8b572e1015f (2008).  include/asm-ppc (and ppc64)
have had %U since 2005 (1da177e4c3f4), and %L as well (0c541b4406a6).

> I've implemented support for some of these before
> in Clang via the documentation at [2], but these seem to be machine
> specific?

Yes, almost all output modifiers are.  Only %l, %a, %n, and part of %c
are generic (and %% and %= and on some targets, %{, %|, %}).

> Can you please point me to documentation/unit tests/source for
> these so that I can figure out what they should be doing, and look into
> implementing them in Clang?

The PowerPC part of
https://gcc.gnu.org/onlinedocs/gcc/Machine-Constraints.html#Machine-Constraints
(sorry, no anchor) documents %U.

Traditionally the source code is the documentation for this.  The code
here starts with the comment
      /* Write second word of DImode or DFmode reference.  Works on register
         or non-indexed memory only.  */
(which is very out-of-date itself, it works fine for e.g. TImode as well,
but alas).

Unit tests are completely unsuitable for most compiler things like this.

The source code is gcc/config/rs6000/rs6000.c, easiest is to search for
'L' (with those quotes).  Function print_operand.

HtH,


Segher

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH v4 1/2] powerpc/uaccess: Implement unsafe_put_user() using 'asm goto'
@ 2020-06-11 23:52       ` Segher Boessenkool
  0 siblings, 0 replies; 38+ messages in thread
From: Segher Boessenkool @ 2020-06-11 23:52 UTC (permalink / raw)
  To: Nick Desaulniers
  Cc: christophe.leroy, patch-notifications, linux-kernel, npiggin,
	clang-built-linux, paulus, linuxppc-dev

On Thu, Jun 11, 2020 at 03:43:55PM -0700, Nick Desaulniers wrote:
> Segher, Cristophe, I suspect Clang is missing support for the %L and %U
> output templates [1].

The arch/powerpc kernel first used the %U output modifier in 0c176fa80fdf
(from 2016), and %L in b8b572e1015f (2008).  include/asm-ppc (and ppc64)
have had %U since 2005 (1da177e4c3f4), and %L as well (0c541b4406a6).

> I've implemented support for some of these before
> in Clang via the documentation at [2], but these seem to be machine
> specific?

Yes, almost all output modifiers are.  Only %l, %a, %n, and part of %c
are generic (and %% and %= and on some targets, %{, %|, %}).

> Can you please point me to documentation/unit tests/source for
> these so that I can figure out what they should be doing, and look into
> implementing them in Clang?

The PowerPC part of
https://gcc.gnu.org/onlinedocs/gcc/Machine-Constraints.html#Machine-Constraints
(sorry, no anchor) documents %U.

Traditionally the source code is the documentation for this.  The code
here starts with the comment
      /* Write second word of DImode or DFmode reference.  Works on register
         or non-indexed memory only.  */
(which is very out-of-date itself, it works fine for e.g. TImode as well,
but alas).

Unit tests are completely unsuitable for most compiler things like this.

The source code is gcc/config/rs6000/rs6000.c, easiest is to search for
'L' (with those quotes).  Function print_operand.

HtH,


Segher

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH v4 1/2] powerpc/uaccess: Implement unsafe_put_user() using 'asm goto'
  2020-06-11 23:52       ` Segher Boessenkool
@ 2020-06-12 21:33         ` Nick Desaulniers
  -1 siblings, 0 replies; 38+ messages in thread
From: Nick Desaulniers @ 2020-06-12 21:33 UTC (permalink / raw)
  To: Segher Boessenkool
  Cc: Michael Ellerman, Christophe Leroy, Benjamin Herrenschmidt, LKML,
	linuxppc-dev, Nicholas Piggin, Paul Mackerras, clang-built-linux

On Thu, Jun 11, 2020 at 4:53 PM Segher Boessenkool
<segher@kernel.crashing.org> wrote:
>
> On Thu, Jun 11, 2020 at 03:43:55PM -0700, Nick Desaulniers wrote:
> > Segher, Cristophe, I suspect Clang is missing support for the %L and %U
> > output templates [1].
>
> The arch/powerpc kernel first used the %U output modifier in 0c176fa80fdf
> (from 2016), and %L in b8b572e1015f (2008).  include/asm-ppc (and ppc64)
> have had %U since 2005 (1da177e4c3f4), and %L as well (0c541b4406a6).

Thanks for all the references.  So it looks like we should have failed
sooner, if we didn't support those. Hmm...

> > Can you please point me to documentation/unit tests/source for
> > these so that I can figure out what they should be doing, and look into
> > implementing them in Clang?
>
> The PowerPC part of
> https://gcc.gnu.org/onlinedocs/gcc/Machine-Constraints.html#Machine-Constraints
> (sorry, no anchor) documents %U.

I thought those were constraints, not output templates?  Oh,
    The asm statement must also use %U<opno> as a placeholder for the
    “update” flag in the corresponding load or store instruction.
got it.

>
> Traditionally the source code is the documentation for this.  The code
> here starts with the comment
>       /* Write second word of DImode or DFmode reference.  Works on register
>          or non-indexed memory only.  */
> (which is very out-of-date itself, it works fine for e.g. TImode as well,
> but alas).
>
> Unit tests are completely unsuitable for most compiler things like this.

What? No, surely one may write tests for output operands.  Grepping
for `%L` in gcc/ was less fun than I was hoping.

>
> The source code is gcc/config/rs6000/rs6000.c, easiest is to search for
> 'L' (with those quotes).  Function print_operand.
>
> HtH,

Yes, perfect, thank you so much!  So it looks like LLVM does not yet
handle %L properly for memory operands.
https://bugs.llvm.org/show_bug.cgi?id=46186#c4
It's neat to see how this is implemented in GCC (and how many aren't
implemented in LLVM, yikes :( ).  For reference, this is implemented
in PPCAsmPrinter::PrintAsmOperand() and
PPCAsmPrinter::PrintAsmMemoryOperand() in
llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp.  GCC switches first on the
modifier characters, then the operand type. LLVM dispatches on operand
type, then modifier.  When I was looking into LLVM's AsmPrinter class,
I was surprised to see it's basically an assembler that just has
complex logic to just do a bunch of prints, so it makes sense to see
that pattern in GCC literally calling printf.  Not drastically
different than my first toy compiler
https://nickdesaulniers.github.io/blog/2015/05/25/interpreter-compiler-jit/
(looking back at that post now knowing what relocations are, I feel I
should probably add a note that that's a problem that's being solved
there.  Didn't know it at the time).

Some things I don't understand from PPC parlance is the "mode"
(preinc, predec, premodify) and small data operands?

IIUC the bug report correctly, it looks like LLVM is failing for the
__put_user_asm2_goto case for -m32.  A simple reproducer:
https://godbolt.org/z/jBBF9b

void foo(long long in, long long* out) {
asm volatile(
  "stw%X1 %0, %1\n\t"
  "stw%X1 %L0, %L1"
  ::"r"(in), "m"(*out));
}
prints (in GCC):
foo:
  stw 3, 0(5)
  stw 4, 4(5)
  blr
(first time looking at ppc assembler, seems constants and registers
are not as easy to distinguish,
https://developer.ibm.com/technologies/linux/articles/l-ppc/ say "Get
used to it." LOL, ok).
so that's "store word from register 3 into dereference of register 5
plus 0, then store word from register 4 into dereference of register 5
plus 4?"  Guessing the ppc32 abi is ILP32 putting long long's into two
separate registers?
Seems easy to implement in LLVM (short of those modes/small data operands).
https://reviews.llvm.org/D81767
-- 
Thanks,
~Nick Desaulniers

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH v4 1/2] powerpc/uaccess: Implement unsafe_put_user() using 'asm goto'
@ 2020-06-12 21:33         ` Nick Desaulniers
  0 siblings, 0 replies; 38+ messages in thread
From: Nick Desaulniers @ 2020-06-12 21:33 UTC (permalink / raw)
  To: Segher Boessenkool
  Cc: Christophe Leroy, Michael Ellerman, LKML, Nicholas Piggin,
	clang-built-linux, Paul Mackerras, linuxppc-dev

On Thu, Jun 11, 2020 at 4:53 PM Segher Boessenkool
<segher@kernel.crashing.org> wrote:
>
> On Thu, Jun 11, 2020 at 03:43:55PM -0700, Nick Desaulniers wrote:
> > Segher, Cristophe, I suspect Clang is missing support for the %L and %U
> > output templates [1].
>
> The arch/powerpc kernel first used the %U output modifier in 0c176fa80fdf
> (from 2016), and %L in b8b572e1015f (2008).  include/asm-ppc (and ppc64)
> have had %U since 2005 (1da177e4c3f4), and %L as well (0c541b4406a6).

Thanks for all the references.  So it looks like we should have failed
sooner, if we didn't support those. Hmm...

> > Can you please point me to documentation/unit tests/source for
> > these so that I can figure out what they should be doing, and look into
> > implementing them in Clang?
>
> The PowerPC part of
> https://gcc.gnu.org/onlinedocs/gcc/Machine-Constraints.html#Machine-Constraints
> (sorry, no anchor) documents %U.

I thought those were constraints, not output templates?  Oh,
    The asm statement must also use %U<opno> as a placeholder for the
    “update” flag in the corresponding load or store instruction.
got it.

>
> Traditionally the source code is the documentation for this.  The code
> here starts with the comment
>       /* Write second word of DImode or DFmode reference.  Works on register
>          or non-indexed memory only.  */
> (which is very out-of-date itself, it works fine for e.g. TImode as well,
> but alas).
>
> Unit tests are completely unsuitable for most compiler things like this.

What? No, surely one may write tests for output operands.  Grepping
for `%L` in gcc/ was less fun than I was hoping.

>
> The source code is gcc/config/rs6000/rs6000.c, easiest is to search for
> 'L' (with those quotes).  Function print_operand.
>
> HtH,

Yes, perfect, thank you so much!  So it looks like LLVM does not yet
handle %L properly for memory operands.
https://bugs.llvm.org/show_bug.cgi?id=46186#c4
It's neat to see how this is implemented in GCC (and how many aren't
implemented in LLVM, yikes :( ).  For reference, this is implemented
in PPCAsmPrinter::PrintAsmOperand() and
PPCAsmPrinter::PrintAsmMemoryOperand() in
llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp.  GCC switches first on the
modifier characters, then the operand type. LLVM dispatches on operand
type, then modifier.  When I was looking into LLVM's AsmPrinter class,
I was surprised to see it's basically an assembler that just has
complex logic to just do a bunch of prints, so it makes sense to see
that pattern in GCC literally calling printf.  Not drastically
different than my first toy compiler
https://nickdesaulniers.github.io/blog/2015/05/25/interpreter-compiler-jit/
(looking back at that post now knowing what relocations are, I feel I
should probably add a note that that's a problem that's being solved
there.  Didn't know it at the time).

Some things I don't understand from PPC parlance is the "mode"
(preinc, predec, premodify) and small data operands?

IIUC the bug report correctly, it looks like LLVM is failing for the
__put_user_asm2_goto case for -m32.  A simple reproducer:
https://godbolt.org/z/jBBF9b

void foo(long long in, long long* out) {
asm volatile(
  "stw%X1 %0, %1\n\t"
  "stw%X1 %L0, %L1"
  ::"r"(in), "m"(*out));
}
prints (in GCC):
foo:
  stw 3, 0(5)
  stw 4, 4(5)
  blr
(first time looking at ppc assembler, seems constants and registers
are not as easy to distinguish,
https://developer.ibm.com/technologies/linux/articles/l-ppc/ say "Get
used to it." LOL, ok).
so that's "store word from register 3 into dereference of register 5
plus 0, then store word from register 4 into dereference of register 5
plus 4?"  Guessing the ppc32 abi is ILP32 putting long long's into two
separate registers?
Seems easy to implement in LLVM (short of those modes/small data operands).
https://reviews.llvm.org/D81767
-- 
Thanks,
~Nick Desaulniers

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH v4 1/2] powerpc/uaccess: Implement unsafe_put_user() using 'asm goto'
  2020-06-12 21:33         ` Nick Desaulniers
@ 2020-06-13  1:08           ` Segher Boessenkool
  -1 siblings, 0 replies; 38+ messages in thread
From: Segher Boessenkool @ 2020-06-13  1:08 UTC (permalink / raw)
  To: Nick Desaulniers
  Cc: Michael Ellerman, Christophe Leroy, Benjamin Herrenschmidt, LKML,
	linuxppc-dev, Nicholas Piggin, Paul Mackerras, clang-built-linux

Hi!

On Fri, Jun 12, 2020 at 02:33:09PM -0700, Nick Desaulniers wrote:
> On Thu, Jun 11, 2020 at 4:53 PM Segher Boessenkool
> <segher@kernel.crashing.org> wrote:
> > The PowerPC part of
> > https://gcc.gnu.org/onlinedocs/gcc/Machine-Constraints.html#Machine-Constraints
> > (sorry, no anchor) documents %U.
> 
> I thought those were constraints, not output templates?  Oh,
>     The asm statement must also use %U<opno> as a placeholder for the
>     “update” flag in the corresponding load or store instruction.
> got it.

Traditionally, *all* constraints were documented here, including the
ones that are only meant for GCC's internal use.  And the output
modifiers were largely not documented at all.

For GCC 10, for Power, I changed it to only document the constraints
that should be public in gcc.info (and everything in gccint.info).  The
output modifiers can neatly be documented here as well, since it such a
short section now.  We're not quite there yet, but getting there.

> > Traditionally the source code is the documentation for this.  The code
> > here starts with the comment
> >       /* Write second word of DImode or DFmode reference.  Works on register
> >          or non-indexed memory only.  */
> > (which is very out-of-date itself, it works fine for e.g. TImode as well,
> > but alas).
> >
> > Unit tests are completely unsuitable for most compiler things like this.
> 
> What? No, surely one may write tests for output operands.  Grepping
> for `%L` in gcc/ was less fun than I was hoping.

You should look for 'L' instead (incl. those quotes) ;-)

Unit tests are 100x as much work, and gets <5% of the problems, compared
to regression tests.  Unit tests only test the stuff you should have
written *anyway*.  It is much more useful to test that much higher level
things work, IMNSHO.

> > HtH,
> 
> Yes, perfect, thank you so much!  So it looks like LLVM does not yet
> handle %L properly for memory operands.
> https://bugs.llvm.org/show_bug.cgi?id=46186#c4
> It's neat to see how this is implemented in GCC (and how many aren't
> implemented in LLVM, yikes :( ).  For reference, this is implemented
> in PPCAsmPrinter::PrintAsmOperand() and
> PPCAsmPrinter::PrintAsmMemoryOperand() in
> llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp.  GCC switches first on the
> modifier characters, then the operand type.

That is what the rs6000 backend currently does, yeah.  The print_operand
function just gets passed the modifier character (as "int code", or 0 if
there is no modifier).  Since there are so many modifiers there aren't
really any better options than just doing a "switch (code)" around
everything else (well, things can be factored, some helper functions,
etc., but this is mostly very old code, and it has grown organically).

> LLVM dispatches on operand type, then modifier.

That is neater, certainly for REG operands.

> When I was looking into LLVM's AsmPrinter class,
> I was surprised to see it's basically an assembler that just has
> complex logic to just do a bunch of prints, so it makes sense to see
> that pattern in GCC literally calling printf.

GCC always outputs assembler code.  This is usually a big advantage, for
things like output_operand.

> Some things I don't understand from PPC parlance is the "mode"
> (preinc, predec, premodify) and small data operands?

"mode" is "machine mode" -- SImode and the like.  PRE_DEC etc. are
*codes* (rtx codes), like,  (mem:DF (pre_dec:SI (reg:SI 39)))  (straight
from the manual).

> IIUC the bug report correctly, it looks like LLVM is failing for the
> __put_user_asm2_goto case for -m32.  A simple reproducer:
> https://godbolt.org/z/jBBF9b
> 
> void foo(long long in, long long* out) {
> asm volatile(
>   "stw%X1 %0, %1\n\t"
>   "stw%X1 %L0, %L1"
>   ::"r"(in), "m"(*out));
> }

This is wrong if operands[0] is a register, btw.  So it should use 'o'
as constraint (not 'm'), and then the 'X' output modifier has become
useless.

> prints (in GCC):
> foo:
>   stw 3, 0(5)
>   stw 4, 4(5)
>   blr
> (first time looking at ppc assembler, seems constants and registers
> are not as easy to distinguish,

The instruction mnemonic always tells you what types all arguments are.
Traditionally we don't write spaces after commas, either.  That is
actually easier to read -- well, if you are used to it, anyway! :-)

> https://developer.ibm.com/technologies/linux/articles/l-ppc/ say "Get
> used to it." LOL, ok).

Since quite a while you can write your assembler using register names as
well.  Not using the dangerous macros the Linux kernel had/has(with
which you can write "rN" in place of any "N", and it doesn't force you
to use the register name either, so you could write "li r3,r4" and
"mr r3,0" and even "addi r3,r0,1234", all very misleading).

> so that's "store word from register 3 into dereference of register 5
> plus 0, then store word from register 4 into dereference of register 5
> plus 4?"

Yup.

> Guessing the ppc32 abi is ILP32 putting long long's into two
> separate registers?

Yes, and the order is the same as it would be in memory (on BE, high
half goes into the lower-numbered register; on LE, the wr^Wother way
around).

> Seems easy to implement in LLVM (short of those modes/small data operands).

I don't know what SDATA variants LLVM does support?

> https://reviews.llvm.org/D81767

Output modifiers are not just for use by the calling convention (as your
examples already show :-) )

%Ln is the second word of a multi-word reference, not the "upper word"
(%Yn is third, %Zn is fourth, and for BE it isn't the high half even
for 2-word things).

The code looks like it will work (I don't know most LLVM specifics of
course).

Cheers,


Segher

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH v4 1/2] powerpc/uaccess: Implement unsafe_put_user() using 'asm goto'
@ 2020-06-13  1:08           ` Segher Boessenkool
  0 siblings, 0 replies; 38+ messages in thread
From: Segher Boessenkool @ 2020-06-13  1:08 UTC (permalink / raw)
  To: Nick Desaulniers
  Cc: Christophe Leroy, Michael Ellerman, LKML, Nicholas Piggin,
	clang-built-linux, Paul Mackerras, linuxppc-dev

Hi!

On Fri, Jun 12, 2020 at 02:33:09PM -0700, Nick Desaulniers wrote:
> On Thu, Jun 11, 2020 at 4:53 PM Segher Boessenkool
> <segher@kernel.crashing.org> wrote:
> > The PowerPC part of
> > https://gcc.gnu.org/onlinedocs/gcc/Machine-Constraints.html#Machine-Constraints
> > (sorry, no anchor) documents %U.
> 
> I thought those were constraints, not output templates?  Oh,
>     The asm statement must also use %U<opno> as a placeholder for the
>     “update” flag in the corresponding load or store instruction.
> got it.

Traditionally, *all* constraints were documented here, including the
ones that are only meant for GCC's internal use.  And the output
modifiers were largely not documented at all.

For GCC 10, for Power, I changed it to only document the constraints
that should be public in gcc.info (and everything in gccint.info).  The
output modifiers can neatly be documented here as well, since it such a
short section now.  We're not quite there yet, but getting there.

> > Traditionally the source code is the documentation for this.  The code
> > here starts with the comment
> >       /* Write second word of DImode or DFmode reference.  Works on register
> >          or non-indexed memory only.  */
> > (which is very out-of-date itself, it works fine for e.g. TImode as well,
> > but alas).
> >
> > Unit tests are completely unsuitable for most compiler things like this.
> 
> What? No, surely one may write tests for output operands.  Grepping
> for `%L` in gcc/ was less fun than I was hoping.

You should look for 'L' instead (incl. those quotes) ;-)

Unit tests are 100x as much work, and gets <5% of the problems, compared
to regression tests.  Unit tests only test the stuff you should have
written *anyway*.  It is much more useful to test that much higher level
things work, IMNSHO.

> > HtH,
> 
> Yes, perfect, thank you so much!  So it looks like LLVM does not yet
> handle %L properly for memory operands.
> https://bugs.llvm.org/show_bug.cgi?id=46186#c4
> It's neat to see how this is implemented in GCC (and how many aren't
> implemented in LLVM, yikes :( ).  For reference, this is implemented
> in PPCAsmPrinter::PrintAsmOperand() and
> PPCAsmPrinter::PrintAsmMemoryOperand() in
> llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp.  GCC switches first on the
> modifier characters, then the operand type.

That is what the rs6000 backend currently does, yeah.  The print_operand
function just gets passed the modifier character (as "int code", or 0 if
there is no modifier).  Since there are so many modifiers there aren't
really any better options than just doing a "switch (code)" around
everything else (well, things can be factored, some helper functions,
etc., but this is mostly very old code, and it has grown organically).

> LLVM dispatches on operand type, then modifier.

That is neater, certainly for REG operands.

> When I was looking into LLVM's AsmPrinter class,
> I was surprised to see it's basically an assembler that just has
> complex logic to just do a bunch of prints, so it makes sense to see
> that pattern in GCC literally calling printf.

GCC always outputs assembler code.  This is usually a big advantage, for
things like output_operand.

> Some things I don't understand from PPC parlance is the "mode"
> (preinc, predec, premodify) and small data operands?

"mode" is "machine mode" -- SImode and the like.  PRE_DEC etc. are
*codes* (rtx codes), like,  (mem:DF (pre_dec:SI (reg:SI 39)))  (straight
from the manual).

> IIUC the bug report correctly, it looks like LLVM is failing for the
> __put_user_asm2_goto case for -m32.  A simple reproducer:
> https://godbolt.org/z/jBBF9b
> 
> void foo(long long in, long long* out) {
> asm volatile(
>   "stw%X1 %0, %1\n\t"
>   "stw%X1 %L0, %L1"
>   ::"r"(in), "m"(*out));
> }

This is wrong if operands[0] is a register, btw.  So it should use 'o'
as constraint (not 'm'), and then the 'X' output modifier has become
useless.

> prints (in GCC):
> foo:
>   stw 3, 0(5)
>   stw 4, 4(5)
>   blr
> (first time looking at ppc assembler, seems constants and registers
> are not as easy to distinguish,

The instruction mnemonic always tells you what types all arguments are.
Traditionally we don't write spaces after commas, either.  That is
actually easier to read -- well, if you are used to it, anyway! :-)

> https://developer.ibm.com/technologies/linux/articles/l-ppc/ say "Get
> used to it." LOL, ok).

Since quite a while you can write your assembler using register names as
well.  Not using the dangerous macros the Linux kernel had/has(with
which you can write "rN" in place of any "N", and it doesn't force you
to use the register name either, so you could write "li r3,r4" and
"mr r3,0" and even "addi r3,r0,1234", all very misleading).

> so that's "store word from register 3 into dereference of register 5
> plus 0, then store word from register 4 into dereference of register 5
> plus 4?"

Yup.

> Guessing the ppc32 abi is ILP32 putting long long's into two
> separate registers?

Yes, and the order is the same as it would be in memory (on BE, high
half goes into the lower-numbered register; on LE, the wr^Wother way
around).

> Seems easy to implement in LLVM (short of those modes/small data operands).

I don't know what SDATA variants LLVM does support?

> https://reviews.llvm.org/D81767

Output modifiers are not just for use by the calling convention (as your
examples already show :-) )

%Ln is the second word of a multi-word reference, not the "upper word"
(%Yn is third, %Zn is fourth, and for BE it isn't the high half even
for 2-word things).

The code looks like it will work (I don't know most LLVM specifics of
course).

Cheers,


Segher

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH v4 1/2] powerpc/uaccess: Implement unsafe_put_user() using 'asm goto'
  2020-06-12 21:33         ` Nick Desaulniers
@ 2020-06-13  6:46           ` Christophe Leroy
  -1 siblings, 0 replies; 38+ messages in thread
From: Christophe Leroy @ 2020-06-13  6:46 UTC (permalink / raw)
  To: Nick Desaulniers, Segher Boessenkool
  Cc: Michael Ellerman, Christophe Leroy, Benjamin Herrenschmidt, LKML,
	linuxppc-dev, Nicholas Piggin, Paul Mackerras, clang-built-linux



On 06/12/2020 09:33 PM, Nick Desaulniers wrote:
> 
> IIUC the bug report correctly, it looks like LLVM is failing for the
> __put_user_asm2_goto case for -m32.  A simple reproducer:
> https://godbolt.org/z/jBBF9b
> 
> void foo(long long in, long long* out) {
> asm volatile(
>    "stw%X1 %0, %1\n\t"
>    "stw%X1 %L0, %L1"
>    ::"r"(in), "m"(*out));
> }
> prints (in GCC):
> foo:
>    stw 3, 0(5)
>    stw 4, 4(5)
>    blr
> (first time looking at ppc assembler, seems constants and registers
> are not as easy to distinguish,
> https://developer.ibm.com/technologies/linux/articles/l-ppc/ say "Get
> used to it." LOL, ok).

When I do ppc-linux-objdump -d vmlinux, registers and constants are 
easily distinguished, see below.

c0002284 <start_here>:
c0002284:	3c 40 c0 3c 	lis     r2,-16324
c0002288:	60 42 45 00 	ori     r2,r2,17664
c000228c:	3c 82 40 00 	addis   r4,r2,16384
c0002290:	38 84 04 30 	addi    r4,r4,1072
c0002294:	7c 93 43 a6 	mtsprg  3,r4
c0002298:	3c 20 c0 3e 	lis     r1,-16322
c000229c:	38 21 e0 00 	addi    r1,r1,-8192
c00022a0:	38 00 00 00 	li      r0,0
c00022a4:	94 01 1f f0 	stwu    r0,8176(r1)
c00022a8:	48 35 e7 41 	bl      c03609e8 <early_init>
c00022ac:	38 60 00 00 	li      r3,0
c00022b0:	7f e4 fb 78 	mr      r4,r31
c00022b4:	48 35 e7 8d 	bl      c0360a40 <machine_init>
c00022b8:	48 35 eb e1 	bl      c0360e98 <MMU_init>
c00022bc:	3c c0 c0 3c 	lis     r6,-16324
c00022c0:	3c c6 40 00 	addis   r6,r6,16384
c00022c4:	7c df c3 a6 	mtspr   799,r6
c00022c8:	3c 80 c0 00 	lis     r4,-16384
c00022cc:	60 84 22 e4 	ori     r4,r4,8932
c00022d0:	3c 84 40 00 	addis   r4,r4,16384
c00022d4:	38 60 10 02 	li      r3,4098
c00022d8:	7c 9a 03 a6 	mtsrr0  r4
c00022dc:	7c 7b 03 a6 	mtsrr1  r3
c00022e0:	4c 00 00 64 	rfi
c00022e4:	7c 00 02 e4 	tlbia
c00022e8:	7c 00 04 ac 	hwsync
c00022ec:	3c c6 c0 00 	addis   r6,r6,-16384
c00022f0:	3c a0 c0 3c 	lis     r5,-16324
c00022f4:	60 a5 40 00 	ori     r5,r5,16384
c00022f8:	90 a0 00 f0 	stw     r5,240(0)
c00022fc:	3c a5 40 00 	addis   r5,r5,16384
c0002300:	90 c5 00 00 	stw     r6,0(r5)
c0002304:	38 80 10 32 	li      r4,4146
c0002308:	3c 60 c0 35 	lis     r3,-16331
c000230c:	60 63 d6 a8 	ori     r3,r3,54952
c0002310:	7c 7a 03 a6 	mtsrr0  r3
c0002314:	7c 9b 03 a6 	mtsrr1  r4
c0002318:	4c 00 00 64 	rfi

For GCC, I think you call tell you want register names with -mregnames

Christophe

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH v4 1/2] powerpc/uaccess: Implement unsafe_put_user() using 'asm goto'
@ 2020-06-13  6:46           ` Christophe Leroy
  0 siblings, 0 replies; 38+ messages in thread
From: Christophe Leroy @ 2020-06-13  6:46 UTC (permalink / raw)
  To: Nick Desaulniers, Segher Boessenkool
  Cc: Christophe Leroy, Michael Ellerman, LKML, Nicholas Piggin,
	clang-built-linux, Paul Mackerras, linuxppc-dev



On 06/12/2020 09:33 PM, Nick Desaulniers wrote:
> 
> IIUC the bug report correctly, it looks like LLVM is failing for the
> __put_user_asm2_goto case for -m32.  A simple reproducer:
> https://godbolt.org/z/jBBF9b
> 
> void foo(long long in, long long* out) {
> asm volatile(
>    "stw%X1 %0, %1\n\t"
>    "stw%X1 %L0, %L1"
>    ::"r"(in), "m"(*out));
> }
> prints (in GCC):
> foo:
>    stw 3, 0(5)
>    stw 4, 4(5)
>    blr
> (first time looking at ppc assembler, seems constants and registers
> are not as easy to distinguish,
> https://developer.ibm.com/technologies/linux/articles/l-ppc/ say "Get
> used to it." LOL, ok).

When I do ppc-linux-objdump -d vmlinux, registers and constants are 
easily distinguished, see below.

c0002284 <start_here>:
c0002284:	3c 40 c0 3c 	lis     r2,-16324
c0002288:	60 42 45 00 	ori     r2,r2,17664
c000228c:	3c 82 40 00 	addis   r4,r2,16384
c0002290:	38 84 04 30 	addi    r4,r4,1072
c0002294:	7c 93 43 a6 	mtsprg  3,r4
c0002298:	3c 20 c0 3e 	lis     r1,-16322
c000229c:	38 21 e0 00 	addi    r1,r1,-8192
c00022a0:	38 00 00 00 	li      r0,0
c00022a4:	94 01 1f f0 	stwu    r0,8176(r1)
c00022a8:	48 35 e7 41 	bl      c03609e8 <early_init>
c00022ac:	38 60 00 00 	li      r3,0
c00022b0:	7f e4 fb 78 	mr      r4,r31
c00022b4:	48 35 e7 8d 	bl      c0360a40 <machine_init>
c00022b8:	48 35 eb e1 	bl      c0360e98 <MMU_init>
c00022bc:	3c c0 c0 3c 	lis     r6,-16324
c00022c0:	3c c6 40 00 	addis   r6,r6,16384
c00022c4:	7c df c3 a6 	mtspr   799,r6
c00022c8:	3c 80 c0 00 	lis     r4,-16384
c00022cc:	60 84 22 e4 	ori     r4,r4,8932
c00022d0:	3c 84 40 00 	addis   r4,r4,16384
c00022d4:	38 60 10 02 	li      r3,4098
c00022d8:	7c 9a 03 a6 	mtsrr0  r4
c00022dc:	7c 7b 03 a6 	mtsrr1  r3
c00022e0:	4c 00 00 64 	rfi
c00022e4:	7c 00 02 e4 	tlbia
c00022e8:	7c 00 04 ac 	hwsync
c00022ec:	3c c6 c0 00 	addis   r6,r6,-16384
c00022f0:	3c a0 c0 3c 	lis     r5,-16324
c00022f4:	60 a5 40 00 	ori     r5,r5,16384
c00022f8:	90 a0 00 f0 	stw     r5,240(0)
c00022fc:	3c a5 40 00 	addis   r5,r5,16384
c0002300:	90 c5 00 00 	stw     r6,0(r5)
c0002304:	38 80 10 32 	li      r4,4146
c0002308:	3c 60 c0 35 	lis     r3,-16331
c000230c:	60 63 d6 a8 	ori     r3,r3,54952
c0002310:	7c 7a 03 a6 	mtsrr0  r3
c0002314:	7c 9b 03 a6 	mtsrr1  r4
c0002318:	4c 00 00 64 	rfi

For GCC, I think you call tell you want register names with -mregnames

Christophe

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH v4 1/2] powerpc/uaccess: Implement unsafe_put_user() using 'asm goto'
  2020-06-12 21:33         ` Nick Desaulniers
@ 2020-06-13 10:47           ` Michael Ellerman
  -1 siblings, 0 replies; 38+ messages in thread
From: Michael Ellerman @ 2020-06-13 10:47 UTC (permalink / raw)
  To: Nick Desaulniers, Segher Boessenkool
  Cc: Michael Ellerman, Christophe Leroy, Benjamin Herrenschmidt, LKML,
	linuxppc-dev, Nicholas Piggin, Paul Mackerras, clang-built-linux

Nick Desaulniers <ndesaulniers@google.com> writes:

> On Thu, Jun 11, 2020 at 4:53 PM Segher Boessenkool
> <segher@kernel.crashing.org> wrote:
>>
>> On Thu, Jun 11, 2020 at 03:43:55PM -0700, Nick Desaulniers wrote:
>> > Segher, Cristophe, I suspect Clang is missing support for the %L and %U
>> > output templates [1].
...
>
> IIUC the bug report correctly, it looks like LLVM is failing for the
> __put_user_asm2_goto case for -m32.  A simple reproducer:
> https://godbolt.org/z/jBBF9b

If you add `-mregnames` you get register names:

https://godbolt.org/z/MxLjhF

foo:
        stw %r3, 0(%r5)
        stw %r4, 4(%r5)
        blr


cheers

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH v4 1/2] powerpc/uaccess: Implement unsafe_put_user() using 'asm goto'
@ 2020-06-13 10:47           ` Michael Ellerman
  0 siblings, 0 replies; 38+ messages in thread
From: Michael Ellerman @ 2020-06-13 10:47 UTC (permalink / raw)
  To: Nick Desaulniers, Segher Boessenkool
  Cc: Christophe Leroy, Michael Ellerman, LKML, Nicholas Piggin,
	clang-built-linux, Paul Mackerras, linuxppc-dev

Nick Desaulniers <ndesaulniers@google.com> writes:

> On Thu, Jun 11, 2020 at 4:53 PM Segher Boessenkool
> <segher@kernel.crashing.org> wrote:
>>
>> On Thu, Jun 11, 2020 at 03:43:55PM -0700, Nick Desaulniers wrote:
>> > Segher, Cristophe, I suspect Clang is missing support for the %L and %U
>> > output templates [1].
...
>
> IIUC the bug report correctly, it looks like LLVM is failing for the
> __put_user_asm2_goto case for -m32.  A simple reproducer:
> https://godbolt.org/z/jBBF9b

If you add `-mregnames` you get register names:

https://godbolt.org/z/MxLjhF

foo:
        stw %r3, 0(%r5)
        stw %r4, 4(%r5)
        blr


cheers

^ permalink raw reply	[flat|nested] 38+ messages in thread

end of thread, other threads:[~2020-06-13 10:48 UTC | newest]

Thread overview: 38+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-04-17 17:08 [PATCH v4 1/2] powerpc/uaccess: Implement unsafe_put_user() using 'asm goto' Christophe Leroy
2020-04-17 17:08 ` Christophe Leroy
2020-04-17 17:08 ` [PATCH v4 2/2] powerpc/uaccess: Implement unsafe_copy_to_user() as a simple loop Christophe Leroy
2020-04-17 17:08   ` Christophe Leroy
2020-05-29  4:24   ` Michael Ellerman
2020-05-05 14:27 ` [PATCH v4 1/2] powerpc/uaccess: Implement unsafe_put_user() using 'asm goto' Michael Ellerman
2020-05-05 14:27   ` Michael Ellerman
2020-05-05 15:32   ` Segher Boessenkool
2020-05-05 15:32     ` Segher Boessenkool
2020-05-06  0:58     ` Michael Ellerman
2020-05-06  0:58       ` Michael Ellerman
2020-05-06 17:58       ` Segher Boessenkool
2020-05-06 17:58         ` Segher Boessenkool
2020-05-06 18:10         ` Christophe Leroy
2020-05-06 18:10           ` Christophe Leroy
2020-05-06 22:18           ` Segher Boessenkool
2020-05-06 22:18             ` Segher Boessenkool
2020-05-05 15:40   ` Christophe Leroy
2020-05-05 15:40     ` Christophe Leroy
2020-05-05 15:59     ` Segher Boessenkool
2020-05-05 15:59       ` Segher Boessenkool
2020-05-06  1:36       ` Michael Ellerman
2020-05-06  1:36         ` Michael Ellerman
2020-05-06 18:09         ` Segher Boessenkool
2020-05-06 18:09           ` Segher Boessenkool
2020-05-29  4:24 ` Michael Ellerman
2020-06-11 22:43   ` Nick Desaulniers
2020-06-11 22:43     ` Nick Desaulniers
2020-06-11 23:52     ` Segher Boessenkool
2020-06-11 23:52       ` Segher Boessenkool
2020-06-12 21:33       ` Nick Desaulniers
2020-06-12 21:33         ` Nick Desaulniers
2020-06-13  1:08         ` Segher Boessenkool
2020-06-13  1:08           ` Segher Boessenkool
2020-06-13  6:46         ` Christophe Leroy
2020-06-13  6:46           ` Christophe Leroy
2020-06-13 10:47         ` Michael Ellerman
2020-06-13 10:47           ` Michael Ellerman

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.