All of lore.kernel.org
 help / color / mirror / Atom feed
From: Christophe Leroy <christophe.leroy@csgroup.eu>
To: Benjamin Herrenschmidt <benh@kernel.crashing.org>,
	Paul Mackerras <paulus@samba.org>,
	Michael Ellerman <mpe@ellerman.id.au>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>,
	linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org,
	Segher Boessenkool <segher@kernel.crashing.org>
Subject: [PATCH v4 2/3] powerpc/atomics: Use immediate operand when possible
Date: Mon, 20 Sep 2021 10:31:18 +0200	[thread overview]
Message-ID: <e59509e9bc688244bfc465b4ee6f48a15599028a.1632126669.git.christophe.leroy@csgroup.eu> (raw)
In-Reply-To: <db9d01d5c543c5add4b2beadb03d39e99c7ada2c.1632126669.git.christophe.leroy@csgroup.eu>

Today we get the following code generation for atomic operations:

	c001bb2c:	39 20 00 01 	li      r9,1
	c001bb30:	7d 40 18 28 	lwarx   r10,0,r3
	c001bb34:	7d 09 50 50 	subf    r8,r9,r10
	c001bb38:	7d 00 19 2d 	stwcx.  r8,0,r3

	c001c7a8:	39 40 00 01 	li      r10,1
	c001c7ac:	7d 00 18 28 	lwarx   r8,0,r3
	c001c7b0:	7c ea 42 14 	add     r7,r10,r8
	c001c7b4:	7c e0 19 2d 	stwcx.  r7,0,r3

By allowing GCC to choose between immediate or regular operation,
we get:

	c001bb2c:	7d 20 18 28 	lwarx   r9,0,r3
	c001bb30:	39 49 ff ff 	addi    r10,r9,-1
	c001bb34:	7d 40 19 2d 	stwcx.  r10,0,r3
	--
	c001c7a4:	7d 40 18 28 	lwarx   r10,0,r3
	c001c7a8:	39 0a 00 01 	addi    r8,r10,1
	c001c7ac:	7d 00 19 2d 	stwcx.  r8,0,r3

For "and", the dot form has to be used because "andi" doesn't exist.

For logical operations we use unsigned 16 bits immediate.
For arithmetic operations we use signed 16 bits immediate.

On pmac32_defconfig, it reduces the text by approx another 8 kbytes.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Acked-by: Segher Boessenkool <segher@kernel.crashing.org>
---
v4: Rebased

v2: Use "addc/addic"
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/include/asm/atomic.h | 56 +++++++++++++++----------------
 1 file changed, 28 insertions(+), 28 deletions(-)

diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
index 6a53ef178bfd..ce0d5a013c58 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -37,62 +37,62 @@ static __inline__ void arch_atomic_set(atomic_t *v, int i)
 	__asm__ __volatile__("stw%U0%X0 %1,%0" : "=m"UPD_CONSTR(v->counter) : "r"(i));
 }
 
-#define ATOMIC_OP(op, asm_op)						\
+#define ATOMIC_OP(op, asm_op, suffix, sign, ...)			\
 static __inline__ void arch_atomic_##op(int a, atomic_t *v)		\
 {									\
 	int t;								\
 									\
 	__asm__ __volatile__(						\
 "1:	lwarx	%0,0,%3		# atomic_" #op "\n"			\
-	#asm_op " %0,%2,%0\n"						\
+	#asm_op "%I2" suffix " %0,%0,%2\n"				\
 "	stwcx.	%0,0,%3 \n"						\
 "	bne-	1b\n"							\
 	: "=&r" (t), "+m" (v->counter)					\
-	: "r" (a), "r" (&v->counter)					\
-	: "cc");							\
+	: "r"#sign (a), "r" (&v->counter)				\
+	: "cc", ##__VA_ARGS__);						\
 }									\
 
-#define ATOMIC_OP_RETURN_RELAXED(op, asm_op)				\
+#define ATOMIC_OP_RETURN_RELAXED(op, asm_op, suffix, sign, ...)		\
 static inline int arch_atomic_##op##_return_relaxed(int a, atomic_t *v)	\
 {									\
 	int t;								\
 									\
 	__asm__ __volatile__(						\
 "1:	lwarx	%0,0,%3		# atomic_" #op "_return_relaxed\n"	\
-	#asm_op " %0,%2,%0\n"						\
+	#asm_op "%I2" suffix " %0,%0,%2\n"				\
 "	stwcx.	%0,0,%3\n"						\
 "	bne-	1b\n"							\
 	: "=&r" (t), "+m" (v->counter)					\
-	: "r" (a), "r" (&v->counter)					\
-	: "cc");							\
+	: "r"#sign (a), "r" (&v->counter)				\
+	: "cc", ##__VA_ARGS__);						\
 									\
 	return t;							\
 }
 
-#define ATOMIC_FETCH_OP_RELAXED(op, asm_op)				\
+#define ATOMIC_FETCH_OP_RELAXED(op, asm_op, suffix, sign, ...)		\
 static inline int arch_atomic_fetch_##op##_relaxed(int a, atomic_t *v)	\
 {									\
 	int res, t;							\
 									\
 	__asm__ __volatile__(						\
 "1:	lwarx	%0,0,%4		# atomic_fetch_" #op "_relaxed\n"	\
-	#asm_op " %1,%3,%0\n"						\
+	#asm_op "%I3" suffix " %1,%0,%3\n"				\
 "	stwcx.	%1,0,%4\n"						\
 "	bne-	1b\n"							\
 	: "=&r" (res), "=&r" (t), "+m" (v->counter)			\
-	: "r" (a), "r" (&v->counter)					\
-	: "cc");							\
+	: "r"#sign (a), "r" (&v->counter)				\
+	: "cc", ##__VA_ARGS__);						\
 									\
 	return res;							\
 }
 
-#define ATOMIC_OPS(op, asm_op)						\
-	ATOMIC_OP(op, asm_op)						\
-	ATOMIC_OP_RETURN_RELAXED(op, asm_op)				\
-	ATOMIC_FETCH_OP_RELAXED(op, asm_op)
+#define ATOMIC_OPS(op, asm_op, suffix, sign, ...)			\
+	ATOMIC_OP(op, asm_op, suffix, sign, ##__VA_ARGS__)		\
+	ATOMIC_OP_RETURN_RELAXED(op, asm_op, suffix, sign, ##__VA_ARGS__)\
+	ATOMIC_FETCH_OP_RELAXED(op, asm_op, suffix, sign, ##__VA_ARGS__)
 
-ATOMIC_OPS(add, add)
-ATOMIC_OPS(sub, subf)
+ATOMIC_OPS(add, add, "c", I, "xer")
+ATOMIC_OPS(sub, sub, "c", I, "xer")
 
 #define arch_atomic_add_return_relaxed arch_atomic_add_return_relaxed
 #define arch_atomic_sub_return_relaxed arch_atomic_sub_return_relaxed
@@ -101,13 +101,13 @@ ATOMIC_OPS(sub, subf)
 #define arch_atomic_fetch_sub_relaxed arch_atomic_fetch_sub_relaxed
 
 #undef ATOMIC_OPS
-#define ATOMIC_OPS(op, asm_op)						\
-	ATOMIC_OP(op, asm_op)						\
-	ATOMIC_FETCH_OP_RELAXED(op, asm_op)
+#define ATOMIC_OPS(op, asm_op, suffix, sign)				\
+	ATOMIC_OP(op, asm_op, suffix, sign)				\
+	ATOMIC_FETCH_OP_RELAXED(op, asm_op, suffix, sign)
 
-ATOMIC_OPS(and, and)
-ATOMIC_OPS(or, or)
-ATOMIC_OPS(xor, xor)
+ATOMIC_OPS(and, and, ".", K)
+ATOMIC_OPS(or, or, "", K)
+ATOMIC_OPS(xor, xor, "", K)
 
 #define arch_atomic_fetch_and_relaxed arch_atomic_fetch_and_relaxed
 #define arch_atomic_fetch_or_relaxed  arch_atomic_fetch_or_relaxed
@@ -241,15 +241,15 @@ static __inline__ int arch_atomic_fetch_add_unless(atomic_t *v, int a, int u)
 "1:	lwarx	%0,0,%1		# atomic_fetch_add_unless\n\
 	cmpw	0,%0,%3 \n\
 	beq	2f \n\
-	add	%0,%2,%0 \n"
+	add%I2c	%0,%0,%2 \n"
 "	stwcx.	%0,0,%1 \n\
 	bne-	1b \n"
 	PPC_ATOMIC_EXIT_BARRIER
-"	subf	%0,%2,%0 \n\
+"	sub%I2c	%0,%0,%2 \n\
 2:"
 	: "=&r" (t)
-	: "r" (&v->counter), "r" (a), "r" (u)
-	: "cc", "memory");
+	: "r" (&v->counter), "rI" (a), "r" (u)
+	: "cc", "memory", "xer");
 
 	return t;
 }
-- 
2.31.1


WARNING: multiple messages have this Message-ID
From: Christophe Leroy <christophe.leroy@csgroup.eu>
To: Benjamin Herrenschmidt <benh@kernel.crashing.org>,
	Paul Mackerras <paulus@samba.org>,
	Michael Ellerman <mpe@ellerman.id.au>
Cc: linuxppc-dev@lists.ozlabs.org, linux-kernel@vger.kernel.org
Subject: [PATCH v4 2/3] powerpc/atomics: Use immediate operand when possible
Date: Mon, 20 Sep 2021 10:31:18 +0200	[thread overview]
Message-ID: <e59509e9bc688244bfc465b4ee6f48a15599028a.1632126669.git.christophe.leroy@csgroup.eu> (raw)
In-Reply-To: <db9d01d5c543c5add4b2beadb03d39e99c7ada2c.1632126669.git.christophe.leroy@csgroup.eu>

Today we get the following code generation for atomic operations:

	c001bb2c:	39 20 00 01 	li      r9,1
	c001bb30:	7d 40 18 28 	lwarx   r10,0,r3
	c001bb34:	7d 09 50 50 	subf    r8,r9,r10
	c001bb38:	7d 00 19 2d 	stwcx.  r8,0,r3

	c001c7a8:	39 40 00 01 	li      r10,1
	c001c7ac:	7d 00 18 28 	lwarx   r8,0,r3
	c001c7b0:	7c ea 42 14 	add     r7,r10,r8
	c001c7b4:	7c e0 19 2d 	stwcx.  r7,0,r3

By allowing GCC to choose between immediate or regular operation,
we get:

	c001bb2c:	7d 20 18 28 	lwarx   r9,0,r3
	c001bb30:	39 49 ff ff 	addi    r10,r9,-1
	c001bb34:	7d 40 19 2d 	stwcx.  r10,0,r3
	--
	c001c7a4:	7d 40 18 28 	lwarx   r10,0,r3
	c001c7a8:	39 0a 00 01 	addi    r8,r10,1
	c001c7ac:	7d 00 19 2d 	stwcx.  r8,0,r3

For "and", the dot form has to be used because "andi" doesn't exist.

For logical operations we use unsigned 16 bits immediate.
For arithmetic operations we use signed 16 bits immediate.

On pmac32_defconfig, it reduces the text by approx another 8 kbytes.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Acked-by: Segher Boessenkool <segher@kernel.crashing.org>
---
v4: Rebased

v2: Use "addc/addic"
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/include/asm/atomic.h | 56 +++++++++++++++----------------
 1 file changed, 28 insertions(+), 28 deletions(-)

diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
index 6a53ef178bfd..ce0d5a013c58 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -37,62 +37,62 @@ static __inline__ void arch_atomic_set(atomic_t *v, int i)
 	__asm__ __volatile__("stw%U0%X0 %1,%0" : "=m"UPD_CONSTR(v->counter) : "r"(i));
 }
 
-#define ATOMIC_OP(op, asm_op)						\
+#define ATOMIC_OP(op, asm_op, suffix, sign, ...)			\
 static __inline__ void arch_atomic_##op(int a, atomic_t *v)		\
 {									\
 	int t;								\
 									\
 	__asm__ __volatile__(						\
 "1:	lwarx	%0,0,%3		# atomic_" #op "\n"			\
-	#asm_op " %0,%2,%0\n"						\
+	#asm_op "%I2" suffix " %0,%0,%2\n"				\
 "	stwcx.	%0,0,%3 \n"						\
 "	bne-	1b\n"							\
 	: "=&r" (t), "+m" (v->counter)					\
-	: "r" (a), "r" (&v->counter)					\
-	: "cc");							\
+	: "r"#sign (a), "r" (&v->counter)				\
+	: "cc", ##__VA_ARGS__);						\
 }									\
 
-#define ATOMIC_OP_RETURN_RELAXED(op, asm_op)				\
+#define ATOMIC_OP_RETURN_RELAXED(op, asm_op, suffix, sign, ...)		\
 static inline int arch_atomic_##op##_return_relaxed(int a, atomic_t *v)	\
 {									\
 	int t;								\
 									\
 	__asm__ __volatile__(						\
 "1:	lwarx	%0,0,%3		# atomic_" #op "_return_relaxed\n"	\
-	#asm_op " %0,%2,%0\n"						\
+	#asm_op "%I2" suffix " %0,%0,%2\n"				\
 "	stwcx.	%0,0,%3\n"						\
 "	bne-	1b\n"							\
 	: "=&r" (t), "+m" (v->counter)					\
-	: "r" (a), "r" (&v->counter)					\
-	: "cc");							\
+	: "r"#sign (a), "r" (&v->counter)				\
+	: "cc", ##__VA_ARGS__);						\
 									\
 	return t;							\
 }
 
-#define ATOMIC_FETCH_OP_RELAXED(op, asm_op)				\
+#define ATOMIC_FETCH_OP_RELAXED(op, asm_op, suffix, sign, ...)		\
 static inline int arch_atomic_fetch_##op##_relaxed(int a, atomic_t *v)	\
 {									\
 	int res, t;							\
 									\
 	__asm__ __volatile__(						\
 "1:	lwarx	%0,0,%4		# atomic_fetch_" #op "_relaxed\n"	\
-	#asm_op " %1,%3,%0\n"						\
+	#asm_op "%I3" suffix " %1,%0,%3\n"				\
 "	stwcx.	%1,0,%4\n"						\
 "	bne-	1b\n"							\
 	: "=&r" (res), "=&r" (t), "+m" (v->counter)			\
-	: "r" (a), "r" (&v->counter)					\
-	: "cc");							\
+	: "r"#sign (a), "r" (&v->counter)				\
+	: "cc", ##__VA_ARGS__);						\
 									\
 	return res;							\
 }
 
-#define ATOMIC_OPS(op, asm_op)						\
-	ATOMIC_OP(op, asm_op)						\
-	ATOMIC_OP_RETURN_RELAXED(op, asm_op)				\
-	ATOMIC_FETCH_OP_RELAXED(op, asm_op)
+#define ATOMIC_OPS(op, asm_op, suffix, sign, ...)			\
+	ATOMIC_OP(op, asm_op, suffix, sign, ##__VA_ARGS__)		\
+	ATOMIC_OP_RETURN_RELAXED(op, asm_op, suffix, sign, ##__VA_ARGS__)\
+	ATOMIC_FETCH_OP_RELAXED(op, asm_op, suffix, sign, ##__VA_ARGS__)
 
-ATOMIC_OPS(add, add)
-ATOMIC_OPS(sub, subf)
+ATOMIC_OPS(add, add, "c", I, "xer")
+ATOMIC_OPS(sub, sub, "c", I, "xer")
 
 #define arch_atomic_add_return_relaxed arch_atomic_add_return_relaxed
 #define arch_atomic_sub_return_relaxed arch_atomic_sub_return_relaxed
@@ -101,13 +101,13 @@ ATOMIC_OPS(sub, subf)
 #define arch_atomic_fetch_sub_relaxed arch_atomic_fetch_sub_relaxed
 
 #undef ATOMIC_OPS
-#define ATOMIC_OPS(op, asm_op)						\
-	ATOMIC_OP(op, asm_op)						\
-	ATOMIC_FETCH_OP_RELAXED(op, asm_op)
+#define ATOMIC_OPS(op, asm_op, suffix, sign)				\
+	ATOMIC_OP(op, asm_op, suffix, sign)				\
+	ATOMIC_FETCH_OP_RELAXED(op, asm_op, suffix, sign)
 
-ATOMIC_OPS(and, and)
-ATOMIC_OPS(or, or)
-ATOMIC_OPS(xor, xor)
+ATOMIC_OPS(and, and, ".", K)
+ATOMIC_OPS(or, or, "", K)
+ATOMIC_OPS(xor, xor, "", K)
 
 #define arch_atomic_fetch_and_relaxed arch_atomic_fetch_and_relaxed
 #define arch_atomic_fetch_or_relaxed  arch_atomic_fetch_or_relaxed
@@ -241,15 +241,15 @@ static __inline__ int arch_atomic_fetch_add_unless(atomic_t *v, int a, int u)
 "1:	lwarx	%0,0,%1		# atomic_fetch_add_unless\n\
 	cmpw	0,%0,%3 \n\
 	beq	2f \n\
-	add	%0,%2,%0 \n"
+	add%I2c	%0,%0,%2 \n"
 "	stwcx.	%0,0,%1 \n\
 	bne-	1b \n"
 	PPC_ATOMIC_EXIT_BARRIER
-"	subf	%0,%2,%0 \n\
+"	sub%I2c	%0,%0,%2 \n\
 2:"
 	: "=&r" (t)
-	: "r" (&v->counter), "r" (a), "r" (u)
-	: "cc", "memory");
+	: "r" (&v->counter), "rI" (a), "r" (u)
+	: "cc", "memory", "xer");
 
 	return t;
 }
-- 
2.31.1


  reply	other threads:[~2021-09-20  8:31 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-09-20  8:31 [PATCH v4 1/3] powerpc/bitops: " Christophe Leroy
2021-09-20  8:31 ` Christophe Leroy
2021-09-20  8:31 ` Christophe Leroy [this message]
2021-09-20  8:31   ` [PATCH v4 2/3] powerpc/atomics: " Christophe Leroy
2021-09-20  8:31 ` [PATCH v4 3/3] powerpc/atomics: Remove atomic_inc()/atomic_dec() and friends Christophe Leroy
2021-09-20  8:31   ` Christophe Leroy
2021-09-20 21:23 ` [PATCH v4 1/3] powerpc/bitops: Use immediate operand when possible Segher Boessenkool
2021-09-20 21:23   ` Segher Boessenkool
2021-09-21 15:15   ` Christophe Leroy
2021-09-21 15:15     ` Christophe Leroy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=e59509e9bc688244bfc465b4ee6f48a15599028a.1632126669.git.christophe.leroy@csgroup.eu \
    --to=christophe.leroy@csgroup.eu \
    --cc=benh@kernel.crashing.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linuxppc-dev@lists.ozlabs.org \
    --cc=mpe@ellerman.id.au \
    --cc=paulus@samba.org \
    --cc=segher@kernel.crashing.org \
    --subject='Re: [PATCH v4 2/3] powerpc/atomics: Use immediate operand when possible' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.