All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] ia64: Rewrite atomic_add and atomic_sub
@ 2018-01-18 18:39 Matthew Wilcox
  2018-01-18 19:02 ` Luck, Tony
                   ` (4 more replies)
  0 siblings, 5 replies; 6+ messages in thread
From: Matthew Wilcox @ 2018-01-18 18:39 UTC (permalink / raw)
  To: linux-ia64

From: Matthew Wilcox <mawilcox@microsoft.com>

Force __builtin_constant_p to evaluate whether the argument to atomic_add
& atomic_sub is constant in the front-end before optimisations which
can lead GCC to output a call to __bad_increment_for_ia64_fetch_and_add().

See GCC bugzilla 83653.

Signed-off-by: Jakub Jelinek <jakub@redhat.com>
Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 arch/ia64/include/asm/atomic.h | 37 +++++++++++++++++++------------------
 1 file changed, 19 insertions(+), 18 deletions(-)

diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h
index 28e02c99be6d..762eeb0fcc1d 100644
--- a/arch/ia64/include/asm/atomic.h
+++ b/arch/ia64/include/asm/atomic.h
@@ -65,29 +65,30 @@ ia64_atomic_fetch_##op (int i, atomic_t *v)				\
 ATOMIC_OPS(add, +)
 ATOMIC_OPS(sub, -)
 
-#define atomic_add_return(i,v)						\
+#ifdef __OPTIMIZE__
+#define __ia64_atomic_const(i)	__builtin_constant_p(i) ?		\
+		((i) = 1 || (i) = 4 || (i) = 8 || (i) = 16 ||	\
+		 (i) = -1 || (i) = -4 || (i) = -8 || (i) = -16) : 0
+
+#define atomic_add_return(i, v)						\
 ({									\
-	int __ia64_aar_i = (i);						\
-	(__builtin_constant_p(i)					\
-	 && (   (__ia64_aar_i =  1) || (__ia64_aar_i =   4)		\
-	     || (__ia64_aar_i =  8) || (__ia64_aar_i =  16)		\
-	     || (__ia64_aar_i = -1) || (__ia64_aar_i =  -4)		\
-	     || (__ia64_aar_i = -8) || (__ia64_aar_i = -16)))		\
-		? ia64_fetch_and_add(__ia64_aar_i, &(v)->counter)	\
-		: ia64_atomic_add(__ia64_aar_i, v);			\
+	int __i = (i);							\
+	static const int __ia64_atomic_p = __ia64_atomic_const(i);	\
+	__ia64_atomic_p ? ia64_fetch_and_add(__i, &(v)->counter) :	\
+				ia64_atomic_add(__i, v);		\
 })
 
-#define atomic_sub_return(i,v)						\
+#define atomic_sub_return(i, v)						\
 ({									\
-	int __ia64_asr_i = (i);						\
-	(__builtin_constant_p(i)					\
-	 && (   (__ia64_asr_i =   1) || (__ia64_asr_i =   4)		\
-	     || (__ia64_asr_i =   8) || (__ia64_asr_i =  16)		\
-	     || (__ia64_asr_i =  -1) || (__ia64_asr_i =  -4)		\
-	     || (__ia64_asr_i =  -8) || (__ia64_asr_i = -16)))	\
-		? ia64_fetch_and_add(-__ia64_asr_i, &(v)->counter)	\
-		: ia64_atomic_sub(__ia64_asr_i, v);			\
+	int __i = (i);							\
+	static const int __ia64_atomic_p = __ia64_atomic_const(i);	\
+	__ia64_atomic_p ? ia64_fetch_and_add(-__i, &(v)->counter) :	\
+				ia64_atomic_sub(__i, v);		\
 })
+#else
+#define atomic_add_return(i, v)	ia64_atomic_add(i, v)
+#define atomic_sub_return(i, v)	ia64_atomic_sub(i, v)
+#endif
 
 #define atomic_fetch_add(i,v)						\
 ({									\
-- 
2.15.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] ia64: Rewrite atomic_add and atomic_sub
  2018-01-18 18:39 [PATCH] ia64: Rewrite atomic_add and atomic_sub Matthew Wilcox
@ 2018-01-18 19:02 ` Luck, Tony
  2018-01-18 19:19 ` Matthew Wilcox
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Luck, Tony @ 2018-01-18 19:02 UTC (permalink / raw)
  To: linux-ia64

On Thu, Jan 18, 2018 at 10:39:53AM -0800, Matthew Wilcox wrote:
> +	int __i = (i);							\
> +	static const int __ia64_atomic_p = __ia64_atomic_const(i);	\
> +	__ia64_atomic_p ? ia64_fetch_and_add(__i, &(v)->counter) :	\
> +				ia64_atomic_add(__i, v);		\

"static"?  Is that safe? What if we are executing
atomic_add on multiple cpus at the same time?

Do all those "static" declarations resolve to separate
memory locations per call site?

-Tony

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] ia64: Rewrite atomic_add and atomic_sub
  2018-01-18 18:39 [PATCH] ia64: Rewrite atomic_add and atomic_sub Matthew Wilcox
  2018-01-18 19:02 ` Luck, Tony
@ 2018-01-18 19:19 ` Matthew Wilcox
  2018-01-18 19:19 ` Jakub Jelinek
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Matthew Wilcox @ 2018-01-18 19:19 UTC (permalink / raw)
  To: linux-ia64

On Thu, Jan 18, 2018 at 11:02:43AM -0800, Luck, Tony wrote:
> On Thu, Jan 18, 2018 at 10:39:53AM -0800, Matthew Wilcox wrote:
> > +	int __i = (i);							\
> > +	static const int __ia64_atomic_p = __ia64_atomic_const(i);	\
> > +	__ia64_atomic_p ? ia64_fetch_and_add(__i, &(v)->counter) :	\
> > +				ia64_atomic_add(__i, v);		\
> 
> "static"?  Is that safe? What if we are executing
> atomic_add on multiple cpus at the same time?
> 
> Do all those "static" declarations resolve to separate
> memory locations per call site?

My understanding is that they get optimised away again by the front end
and so they never resolve into an actual memory location.  Here's my test
program that demonstrates:

1. Calling it with a complex "i" does indeed result in i only being
evaluated once.
2. It manages to successfully turn each call into the right version.

I compiled it on my laptop with gcc -W -Wall -O2 -c test.c -o test.o
and examined the results of objdump -Sr.

typedef struct { unsigned long counter; } atomic_t;
long ia64_fetch_and_add(long i, unsigned long *v);
long ia64_atomic_add(long i, atomic_t *a);

#define __ia64_atomic_const(i)  __builtin_constant_p(i) ?               \
                ((i) = 1 || (i) = 4 || (i) = 8 || (i) = 16 ||       \
                 (i) = -1 || (i) = -4 || (i) = -8 || (i) = -16) : 0

#define atomic_add_return(i, v)                                         \
({                                                                      \
        int __i = (i);                                                  \
        static const int __ia64_atomic_p = __ia64_atomic_const(i);      \
        __ia64_atomic_p ? ia64_fetch_and_add(__i, &(v)->counter) :      \
                                ia64_atomic_add(__i, v);                \
})

int main(int argc, char **argv)
{
	atomic_t a;
	atomic_add_return(1, &a);
	atomic_add_return(3, &a);
	atomic_add_return(argc++, &a);
	atomic_add_return(argc++, &a);
	atomic_add_return(4, &a);
}


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] ia64: Rewrite atomic_add and atomic_sub
  2018-01-18 18:39 [PATCH] ia64: Rewrite atomic_add and atomic_sub Matthew Wilcox
  2018-01-18 19:02 ` Luck, Tony
  2018-01-18 19:19 ` Matthew Wilcox
@ 2018-01-18 19:19 ` Jakub Jelinek
  2018-01-18 21:52 ` Tony Luck
  2018-01-21  3:21 ` Matthew Wilcox
  4 siblings, 0 replies; 6+ messages in thread
From: Jakub Jelinek @ 2018-01-18 19:19 UTC (permalink / raw)
  To: linux-ia64

On Thu, Jan 18, 2018 at 11:02:43AM -0800, Luck, Tony wrote:
> On Thu, Jan 18, 2018 at 10:39:53AM -0800, Matthew Wilcox wrote:
> > +	int __i = (i);							\
> > +	static const int __ia64_atomic_p = __ia64_atomic_const(i);	\
> > +	__ia64_atomic_p ? ia64_fetch_and_add(__i, &(v)->counter) :	\
> > +				ia64_atomic_add(__i, v);		\
> 
> "static"?  Is that safe? What if we are executing
> atomic_add on multiple cpus at the same time?

In C static vars must be initialized only at compile time, each expansion
of the macro has a different scope and different static variable.
If they wouldn't be optimized away, each of them would waste 4 bytes
in .rodata section, but the point of the patch is do this only when
optimizing and let the compiler optimize away those variables.
As the static variables need the initializers to be evaluated early in
the compiler FE, it means __builtin_constant_p is resolved away right after
folding the argument, compared to automatic variables where when optimizing
__builtin_constant_p folding is deferred until inlining and lots of other
optimizations are performed.

What this means is if you do:
atomic_add_return (4, something);
it will be optimized into ia64_fetch_and_add, but if you e.g. do:
int x = 4;
atomic_add_return (x, something);
it will not, similarly:
static inline void
foo (int x, ...)
{
  atomic_add_return (x, something);
}
and call
foo (4, ...);
then it will be again just ia64_atomic_add.

	Jakub

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH] ia64: Rewrite atomic_add and atomic_sub
  2018-01-18 18:39 [PATCH] ia64: Rewrite atomic_add and atomic_sub Matthew Wilcox
                   ` (2 preceding siblings ...)
  2018-01-18 19:19 ` Jakub Jelinek
@ 2018-01-18 21:52 ` Tony Luck
  2018-01-21  3:21 ` Matthew Wilcox
  4 siblings, 0 replies; 6+ messages in thread
From: Tony Luck @ 2018-01-18 21:52 UTC (permalink / raw)
  To: linux-ia64

From: Matthew Wilcox <mawilcox@microsoft.com>

Force __builtin_constant_p to evaluate whether the argument to atomic_add
& atomic_sub is constant in the front-end before optimisations which
can lead GCC to output a call to __bad_increment_for_ia64_fetch_and_add().

See GCC bugzilla 83653.

Signed-off-by: Jakub Jelinek <jakub@redhat.com>
Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---

Linus: Willy isn't pining for the old days working on ia64. He
has some patches to generic code that he can't push because this
compiler issue breaks the ia64 build with his new stuff applied.

 arch/ia64/include/asm/atomic.h | 37 +++++++++++++++++++------------------
 1 file changed, 19 insertions(+), 18 deletions(-)

diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h
index 28e02c99be6d..762eeb0fcc1d 100644
--- a/arch/ia64/include/asm/atomic.h
+++ b/arch/ia64/include/asm/atomic.h
@@ -65,29 +65,30 @@ ia64_atomic_fetch_##op (int i, atomic_t *v)				\
 ATOMIC_OPS(add, +)
 ATOMIC_OPS(sub, -)
 
-#define atomic_add_return(i,v)						\
+#ifdef __OPTIMIZE__
+#define __ia64_atomic_const(i)	__builtin_constant_p(i) ?		\
+		((i) = 1 || (i) = 4 || (i) = 8 || (i) = 16 ||	\
+		 (i) = -1 || (i) = -4 || (i) = -8 || (i) = -16) : 0
+
+#define atomic_add_return(i, v)						\
 ({									\
-	int __ia64_aar_i = (i);						\
-	(__builtin_constant_p(i)					\
-	 && (   (__ia64_aar_i =  1) || (__ia64_aar_i =   4)		\
-	     || (__ia64_aar_i =  8) || (__ia64_aar_i =  16)		\
-	     || (__ia64_aar_i = -1) || (__ia64_aar_i =  -4)		\
-	     || (__ia64_aar_i = -8) || (__ia64_aar_i = -16)))		\
-		? ia64_fetch_and_add(__ia64_aar_i, &(v)->counter)	\
-		: ia64_atomic_add(__ia64_aar_i, v);			\
+	int __i = (i);							\
+	static const int __ia64_atomic_p = __ia64_atomic_const(i);	\
+	__ia64_atomic_p ? ia64_fetch_and_add(__i, &(v)->counter) :	\
+				ia64_atomic_add(__i, v);		\
 })
 
-#define atomic_sub_return(i,v)						\
+#define atomic_sub_return(i, v)						\
 ({									\
-	int __ia64_asr_i = (i);						\
-	(__builtin_constant_p(i)					\
-	 && (   (__ia64_asr_i =   1) || (__ia64_asr_i =   4)		\
-	     || (__ia64_asr_i =   8) || (__ia64_asr_i =  16)		\
-	     || (__ia64_asr_i =  -1) || (__ia64_asr_i =  -4)		\
-	     || (__ia64_asr_i =  -8) || (__ia64_asr_i = -16)))	\
-		? ia64_fetch_and_add(-__ia64_asr_i, &(v)->counter)	\
-		: ia64_atomic_sub(__ia64_asr_i, v);			\
+	int __i = (i);							\
+	static const int __ia64_atomic_p = __ia64_atomic_const(i);	\
+	__ia64_atomic_p ? ia64_fetch_and_add(-__i, &(v)->counter) :	\
+				ia64_atomic_sub(__i, v);		\
 })
+#else
+#define atomic_add_return(i, v)	ia64_atomic_add(i, v)
+#define atomic_sub_return(i, v)	ia64_atomic_sub(i, v)
+#endif
 
 #define atomic_fetch_add(i,v)						\
 ({									\
-- 
2.14.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] ia64: Rewrite atomic_add and atomic_sub
  2018-01-18 18:39 [PATCH] ia64: Rewrite atomic_add and atomic_sub Matthew Wilcox
                   ` (3 preceding siblings ...)
  2018-01-18 21:52 ` Tony Luck
@ 2018-01-21  3:21 ` Matthew Wilcox
  4 siblings, 0 replies; 6+ messages in thread
From: Matthew Wilcox @ 2018-01-21  3:21 UTC (permalink / raw)
  To: linux-ia64

On Thu, Jan 18, 2018 at 11:02:43AM -0800, Luck, Tony wrote:
> On Thu, Jan 18, 2018 at 10:39:53AM -0800, Matthew Wilcox wrote:
> > +	int __i = (i);							\
> > +	static const int __ia64_atomic_p = __ia64_atomic_const(i);	\
> > +	__ia64_atomic_p ? ia64_fetch_and_add(__i, &(v)->counter) :	\
> > +				ia64_atomic_add(__i, v);		\

I felt bad about only covering atomic_add_return and atomic_sub_return and leaving all the other sites exposed to this latent bug.  So here's a patch
which fixes all of them.  As a bonus it's also a nice clean-up.

From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Fri, 12 Jan 2018 14:14:37 -0500
Subject: [PATCH] ia64: Rewrite atomic_add and atomic_sub

Force __builtin_constant_p to evaluate whether the argument to atomic_add
& atomic_sub is constant in the front-end before optimisations which
can lead GCC to output a call to __bad_increment_for_ia64_fetch_and_add().

See GCC bugzilla 83653.

Signed-off-by: Jakub Jelinek <jakub@redhat.com>
Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 arch/ia64/include/asm/atomic.h | 58 +++++++++++++-----------------------------
 1 file changed, 18 insertions(+), 40 deletions(-)

diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h
index 28e02c99be6d..2524fb60fbc2 100644
--- a/arch/ia64/include/asm/atomic.h
+++ b/arch/ia64/include/asm/atomic.h
@@ -65,14 +65,20 @@ ia64_atomic_fetch_##op (int i, atomic_t *v)				\
 ATOMIC_OPS(add, +)
 ATOMIC_OPS(sub, -)
 
+#ifdef __OPTIMIZE__
+#define __ia64_atomic_const(i)						\
+	static const int __ia64_atomic_p = __builtin_constant_p(i) ?	\
+		((i) = 1 || (i) = 4 || (i) = 8 || (i) = 16 ||	\
+		 (i) = -1 || (i) = -4 || (i) = -8 || (i) = -16) : 0;\
+	__ia64_atomic_p
+#else
+#define __ia64_atomic_const(i)	0
+#endif
+
 #define atomic_add_return(i,v)						\
 ({									\
 	int __ia64_aar_i = (i);						\
-	(__builtin_constant_p(i)					\
-	 && (   (__ia64_aar_i =  1) || (__ia64_aar_i =   4)		\
-	     || (__ia64_aar_i =  8) || (__ia64_aar_i =  16)		\
-	     || (__ia64_aar_i = -1) || (__ia64_aar_i =  -4)		\
-	     || (__ia64_aar_i = -8) || (__ia64_aar_i = -16)))		\
+	__ia64_atomic_const(i)						\
 		? ia64_fetch_and_add(__ia64_aar_i, &(v)->counter)	\
 		: ia64_atomic_add(__ia64_aar_i, v);			\
 })
@@ -80,11 +86,7 @@ ATOMIC_OPS(sub, -)
 #define atomic_sub_return(i,v)						\
 ({									\
 	int __ia64_asr_i = (i);						\
-	(__builtin_constant_p(i)					\
-	 && (   (__ia64_asr_i =   1) || (__ia64_asr_i =   4)		\
-	     || (__ia64_asr_i =   8) || (__ia64_asr_i =  16)		\
-	     || (__ia64_asr_i =  -1) || (__ia64_asr_i =  -4)		\
-	     || (__ia64_asr_i =  -8) || (__ia64_asr_i = -16)))	\
+	__ia64_atomic_const(i)						\
 		? ia64_fetch_and_add(-__ia64_asr_i, &(v)->counter)	\
 		: ia64_atomic_sub(__ia64_asr_i, v);			\
 })
@@ -92,11 +94,7 @@ ATOMIC_OPS(sub, -)
 #define atomic_fetch_add(i,v)						\
 ({									\
 	int __ia64_aar_i = (i);						\
-	(__builtin_constant_p(i)					\
-	 && (   (__ia64_aar_i =  1) || (__ia64_aar_i =   4)		\
-	     || (__ia64_aar_i =  8) || (__ia64_aar_i =  16)		\
-	     || (__ia64_aar_i = -1) || (__ia64_aar_i =  -4)		\
-	     || (__ia64_aar_i = -8) || (__ia64_aar_i = -16)))		\
+	__ia64_atomic_const(i)						\
 		? ia64_fetchadd(__ia64_aar_i, &(v)->counter, acq)	\
 		: ia64_atomic_fetch_add(__ia64_aar_i, v);		\
 })
@@ -104,11 +102,7 @@ ATOMIC_OPS(sub, -)
 #define atomic_fetch_sub(i,v)						\
 ({									\
 	int __ia64_asr_i = (i);						\
-	(__builtin_constant_p(i)					\
-	 && (   (__ia64_asr_i =   1) || (__ia64_asr_i =   4)		\
-	     || (__ia64_asr_i =   8) || (__ia64_asr_i =  16)		\
-	     || (__ia64_asr_i =  -1) || (__ia64_asr_i =  -4)		\
-	     || (__ia64_asr_i =  -8) || (__ia64_asr_i = -16)))	\
+	__ia64_atomic_const(i)						\
 		? ia64_fetchadd(-__ia64_asr_i, &(v)->counter, acq)	\
 		: ia64_atomic_fetch_sub(__ia64_asr_i, v);		\
 })
@@ -169,11 +163,7 @@ ATOMIC64_OPS(sub, -)
 #define atomic64_add_return(i,v)					\
 ({									\
 	long __ia64_aar_i = (i);					\
-	(__builtin_constant_p(i)					\
-	 && (   (__ia64_aar_i =  1) || (__ia64_aar_i =   4)		\
-	     || (__ia64_aar_i =  8) || (__ia64_aar_i =  16)		\
-	     || (__ia64_aar_i = -1) || (__ia64_aar_i =  -4)		\
-	     || (__ia64_aar_i = -8) || (__ia64_aar_i = -16)))		\
+	__ia64_atomic_const(i)						\
 		? ia64_fetch_and_add(__ia64_aar_i, &(v)->counter)	\
 		: ia64_atomic64_add(__ia64_aar_i, v);			\
 })
@@ -181,11 +171,7 @@ ATOMIC64_OPS(sub, -)
 #define atomic64_sub_return(i,v)					\
 ({									\
 	long __ia64_asr_i = (i);					\
-	(__builtin_constant_p(i)					\
-	 && (   (__ia64_asr_i =   1) || (__ia64_asr_i =   4)		\
-	     || (__ia64_asr_i =   8) || (__ia64_asr_i =  16)		\
-	     || (__ia64_asr_i =  -1) || (__ia64_asr_i =  -4)		\
-	     || (__ia64_asr_i =  -8) || (__ia64_asr_i = -16)))	\
+	__ia64_atomic_const(i)						\
 		? ia64_fetch_and_add(-__ia64_asr_i, &(v)->counter)	\
 		: ia64_atomic64_sub(__ia64_asr_i, v);			\
 })
@@ -193,11 +179,7 @@ ATOMIC64_OPS(sub, -)
 #define atomic64_fetch_add(i,v)						\
 ({									\
 	long __ia64_aar_i = (i);					\
-	(__builtin_constant_p(i)					\
-	 && (   (__ia64_aar_i =  1) || (__ia64_aar_i =   4)		\
-	     || (__ia64_aar_i =  8) || (__ia64_aar_i =  16)		\
-	     || (__ia64_aar_i = -1) || (__ia64_aar_i =  -4)		\
-	     || (__ia64_aar_i = -8) || (__ia64_aar_i = -16)))		\
+	__ia64_atomic_const(i)						\
 		? ia64_fetchadd(__ia64_aar_i, &(v)->counter, acq)	\
 		: ia64_atomic64_fetch_add(__ia64_aar_i, v);		\
 })
@@ -205,11 +187,7 @@ ATOMIC64_OPS(sub, -)
 #define atomic64_fetch_sub(i,v)						\
 ({									\
 	long __ia64_asr_i = (i);					\
-	(__builtin_constant_p(i)					\
-	 && (   (__ia64_asr_i =   1) || (__ia64_asr_i =   4)		\
-	     || (__ia64_asr_i =   8) || (__ia64_asr_i =  16)		\
-	     || (__ia64_asr_i =  -1) || (__ia64_asr_i =  -4)		\
-	     || (__ia64_asr_i =  -8) || (__ia64_asr_i = -16)))	\
+	__ia64_atomic_const(i)						\
 		? ia64_fetchadd(-__ia64_asr_i, &(v)->counter, acq)	\
 		: ia64_atomic64_fetch_sub(__ia64_asr_i, v);		\
 })
-- 
2.15.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2018-01-21  3:21 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-01-18 18:39 [PATCH] ia64: Rewrite atomic_add and atomic_sub Matthew Wilcox
2018-01-18 19:02 ` Luck, Tony
2018-01-18 19:19 ` Matthew Wilcox
2018-01-18 19:19 ` Jakub Jelinek
2018-01-18 21:52 ` Tony Luck
2018-01-21  3:21 ` Matthew Wilcox

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.