linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [patch 1/2] atomic_add_unless sadness
@ 2006-01-18  6:36 Nick Piggin
  2006-01-18  6:39 ` [patch 2/2] powerpc: native atomic_add_unless Nick Piggin
  2006-01-18 16:48 ` [patch 1/2] atomic_add_unless sadness Linus Torvalds
  0 siblings, 2 replies; 8+ messages in thread
From: Nick Piggin @ 2006-01-18  6:36 UTC (permalink / raw)
  To: Linus Torvalds, Andrew Morton; +Cc: Linux Kernel Mailing List

For some reason gcc 4 on at least i386 and ppc64 (that I have tested with)
emit two cmpxchges for atomic_add_unless unless we put branch hints in.
(it is unlikely for the "unless" case to trigger, and it is unlikely for
cmpxchg to fail).

So put these hints in for architectures which have a native cas, and
make the loop a bit more readable in the process.

So this patch isn't for inclusion just yet (incomplete, not widely tested),
however hopefully we can get some discussion about the best way to implement
this.

Index: linux-2.6/include/asm-i386/atomic.h
===================================================================
--- linux-2.6.orig/include/asm-i386/atomic.h
+++ linux-2.6/include/asm-i386/atomic.h
@@ -231,9 +231,15 @@ static __inline__ int atomic_sub_return(
 ({								\
 	int c, old;						\
 	c = atomic_read(v);					\
-	while (c != (u) && (old = atomic_cmpxchg((v), c, c + (a))) != c) \
+	for (;;) {						\
+		if (unlikely(c == (u)))				\
+			break;					\
+		old = atomic_cmpxchg((v), c, c + (a));		\
+		if (likely(old == c))				\
+			break;					\
 		c = old;					\
-	c != (u);						\
+	}							\
+	likely(c != (u));					\
 })
 #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
 
Index: linux-2.6/include/asm-ia64/atomic.h
===================================================================
--- linux-2.6.orig/include/asm-ia64/atomic.h
+++ linux-2.6/include/asm-ia64/atomic.h
@@ -95,9 +95,15 @@ ia64_atomic64_sub (__s64 i, atomic64_t *
 ({								\
 	int c, old;						\
 	c = atomic_read(v);					\
-	while (c != (u) && (old = atomic_cmpxchg((v), c, c + (a))) != c) \
+	for (;;) {						\
+		if (unlikely(c == (u)))				\
+			break;					\
+		old = atomic_cmpxchg((v), c, c + (a));		\
+		if (likely(old == c))				\
+			break;					\
 		c = old;					\
-	c != (u);						\
+	}							\
+	likely(c != (u));					\
 })
 #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
 
Index: linux-2.6/include/asm-x86_64/atomic.h
===================================================================
--- linux-2.6.orig/include/asm-x86_64/atomic.h
+++ linux-2.6/include/asm-x86_64/atomic.h
@@ -405,9 +405,15 @@ static __inline__ long atomic64_sub_retu
 ({								\
 	int c, old;						\
 	c = atomic_read(v);					\
-	while (c != (u) && (old = atomic_cmpxchg((v), c, c + (a))) != c) \
+	for (;;) {						\
+		if (unlikely(c == (u)))				\
+			break;					\
+		old = atomic_cmpxchg((v), c, c + (a));		\
+		if (likely(old == c))				\
+			break;					\
 		c = old;					\
-	c != (u);						\
+	}							\
+	likely(c != (u));					\
 })
 #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
 
Index: linux-2.6/include/asm-s390/atomic.h
===================================================================
--- linux-2.6.orig/include/asm-s390/atomic.h
+++ linux-2.6/include/asm-s390/atomic.h
@@ -89,11 +89,16 @@ static __inline__ int atomic_cmpxchg(ato
 static __inline__ int atomic_add_unless(atomic_t *v, int a, int u)
 {
 	int c, old;
-
 	c = atomic_read(v);
-	while (c != u && (old = atomic_cmpxchg(v, c, c + a)) != c)
+	for (;;) {
+		if (unlikely(c == u))
+			break;
+		old = atomic_cmpxchg(v, c, c + a);
+		if (likely(old == c))
+			break;
 		c = old;
-	return c != u;
+	}
+	return likely(c != u);
 }
 
 #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
Index: linux-2.6/include/asm-sparc64/atomic.h
===================================================================
--- linux-2.6.orig/include/asm-sparc64/atomic.h
+++ linux-2.6/include/asm-sparc64/atomic.h
@@ -78,9 +78,15 @@ extern int atomic64_sub_ret(int, atomic6
 ({								\
 	int c, old;						\
 	c = atomic_read(v);					\
-	while (c != (u) && (old = atomic_cmpxchg((v), c, c + (a))) != c) \
+	for (;;) {						\
+		if (unlikely(c == (u)))				\
+			break;					\
+		old = atomic_cmpxchg((v), c, c + (a));		\
+		if (likely(old == c))				\
+			break;					\
 		c = old;					\
-	c != (u);						\
+	}							\
+	likely(c != (u));					\
 })
 #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
 
Index: linux-2.6/include/asm-m68k/atomic.h
===================================================================
--- linux-2.6.orig/include/asm-m68k/atomic.h
+++ linux-2.6/include/asm-m68k/atomic.h
@@ -146,9 +146,15 @@ static inline void atomic_set_mask(unsig
 ({								\
 	int c, old;						\
 	c = atomic_read(v);					\
-	while (c != (u) && (old = atomic_cmpxchg((v), c, c + (a))) != c) \
+	for (;;) {						\
+		if (unlikely(c == (u)))				\
+			break;					\
+		old = atomic_cmpxchg((v), c, c + (a));		\
+		if (likely(old == c))				\
+			break;					\
 		c = old;					\
-	c != (u);						\
+	}							\
+	likely(c != (u));					\
 })
 #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
 

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [patch 2/2] powerpc: native atomic_add_unless
  2006-01-18  6:36 [patch 1/2] atomic_add_unless sadness Nick Piggin
@ 2006-01-18  6:39 ` Nick Piggin
  2006-01-18 17:11   ` Joel Schopp
  2006-01-18 16:48 ` [patch 1/2] atomic_add_unless sadness Linus Torvalds
  1 sibling, 1 reply; 8+ messages in thread
From: Nick Piggin @ 2006-01-18  6:39 UTC (permalink / raw)
  To: Linus Torvalds, Andrew Morton; +Cc: Linux Kernel Mailing List

I didn't convert LL/SC architectures so as to "encourage" them
to do atomic_add_unless natively. Here is my probably-wrong attempt
for powerpc.

Should I bring this up on the arch list? (IIRC cross posting 
between there and lkml is discouraged)

Index: linux-2.6/include/asm-powerpc/atomic.h
===================================================================
--- linux-2.6.orig/include/asm-powerpc/atomic.h
+++ linux-2.6/include/asm-powerpc/atomic.h
@@ -8,6 +8,7 @@
 typedef struct { volatile int counter; } atomic_t;
 
 #ifdef __KERNEL__
+#include <linux/compiler.h>
 #include <asm/synch.h>
 #include <asm/asm-compat.h>
 
@@ -176,20 +177,30 @@ static __inline__ int atomic_dec_return(
  * Atomically adds @a to @v, so long as it was not @u.
  * Returns non-zero if @v was not @u, and zero otherwise.
  */
-#define atomic_add_unless(v, a, u)			\
-({							\
-	int c, old;					\
-	c = atomic_read(v);				\
-	for (;;) {					\
-		if (unlikely(c == (u)))			\
-			break;				\
-		old = atomic_cmpxchg((v), c, c + (a));	\
-		if (likely(old == c))			\
-			break;				\
-		c = old;				\
-	}						\
-	c != (u);					\
-})
+static __inline__ int atomic_add_unless(atomic_t *v, int a, int u)
+{
+	int t;
+	int dummy;
+
+	__asm__ __volatile__ (
+	LWSYNC_ON_SMP
+"1:	lwarx	%0,0,%2		# atomic_add_unless\n\
+	cmpw	0,%0,%4 \n\
+	beq-	2f \n\
+	add	%1,%3,%0 \n"
+	PPC405_ERR77(0,%2)
+"	stwcx.	%1,0,%2 \n\
+	bne-	1b \n"
+	ISYNC_ON_SMP
+	"\n\
+2:"
+	: "=&r" (t), "=&r" (dummy)
+	: "r" (&v->counter), "r" (a), "r" (u)
+	: "cc", "memory");
+
+	return likely(t != u);
+}
+
 #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
 
 #define atomic_sub_and_test(a, v)	(atomic_sub_return((a), (v)) == 0)

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [patch 1/2] atomic_add_unless sadness
  2006-01-18  6:36 [patch 1/2] atomic_add_unless sadness Nick Piggin
  2006-01-18  6:39 ` [patch 2/2] powerpc: native atomic_add_unless Nick Piggin
@ 2006-01-18 16:48 ` Linus Torvalds
  2006-01-18 17:10   ` Nick Piggin
  1 sibling, 1 reply; 8+ messages in thread
From: Linus Torvalds @ 2006-01-18 16:48 UTC (permalink / raw)
  To: Nick Piggin; +Cc: Andrew Morton, Linux Kernel Mailing List



On Wed, 18 Jan 2006, Nick Piggin wrote:
>
> For some reason gcc 4 on at least i386 and ppc64 (that I have tested with)
> emit two cmpxchges for atomic_add_unless unless we put branch hints in.
> (it is unlikely for the "unless" case to trigger, and it is unlikely for
> cmpxchg to fail).

Irrelevant. If "atomic_add_unless()" is in a hot path and inlined, we're 
doing something else wrong anyway. It's not a good op to use. Just think 
of it as being very expensive.

The _only_ user of "atomic_add_unless()" is "dec_and_lock()", which isn't 
even inlined. The fact that gcc ends up "unrolling" the loop once is just 
fine.

Please keep it that way. 

		Linus

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [patch 1/2] atomic_add_unless sadness
  2006-01-18 16:48 ` [patch 1/2] atomic_add_unless sadness Linus Torvalds
@ 2006-01-18 17:10   ` Nick Piggin
  0 siblings, 0 replies; 8+ messages in thread
From: Nick Piggin @ 2006-01-18 17:10 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Nick Piggin, Andrew Morton, Linux Kernel Mailing List

On Wed, Jan 18, 2006 at 08:48:01AM -0800, Linus Torvalds wrote:
> 
> 
> On Wed, 18 Jan 2006, Nick Piggin wrote:
> >
> > For some reason gcc 4 on at least i386 and ppc64 (that I have tested with)
> > emit two cmpxchges for atomic_add_unless unless we put branch hints in.
> > (it is unlikely for the "unless" case to trigger, and it is unlikely for
> > cmpxchg to fail).
> 
> Irrelevant. If "atomic_add_unless()" is in a hot path and inlined, we're 
> doing something else wrong anyway. It's not a good op to use. Just think 
> of it as being very expensive.
> 

I don't think it is quite irrelevant. Regardless of where it is used, it
doesn't hurt to make something smaller and more efficient.

> The _only_ user of "atomic_add_unless()" is "dec_and_lock()", which isn't 
> even inlined. The fact that gcc ends up "unrolling" the loop once is just 
> fine.
> 

dec_and_lock is not exactly a slow path. Maybe unrolling doesn't slow it
down in the traditional sense, but you're the one (rightly, I gather)
always talking about icache. In fact it unrolls an exceedingly rare second
iteration into the main code path.

> Please keep it that way. 
> 

fs/file_table.c uses it as well (inc_not_zero).

Nick


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [patch 2/2] powerpc: native atomic_add_unless
  2006-01-18  6:39 ` [patch 2/2] powerpc: native atomic_add_unless Nick Piggin
@ 2006-01-18 17:11   ` Joel Schopp
  2006-01-18 17:28     ` Nick Piggin
  0 siblings, 1 reply; 8+ messages in thread
From: Joel Schopp @ 2006-01-18 17:11 UTC (permalink / raw)
  To: Nick Piggin; +Cc: Linus Torvalds, Andrew Morton, Linux Kernel Mailing List

> I didn't convert LL/SC architectures so as to "encourage" them
> to do atomic_add_unless natively. Here is my probably-wrong attempt
> for powerpc.
> 
> Should I bring this up on the arch list? (IIRC cross posting 
> between there and lkml is discouraged)
> 

You should bring this up on the arch list or it is likely to be missed by people 
who would give you useful feedback.

> +static __inline__ int atomic_add_unless(atomic_t *v, int a, int u)
> +{
> +	int t;
> +	int dummy;
> +
> +	__asm__ __volatile__ (
> +	LWSYNC_ON_SMP

I realize to preserve the behavior you currently get with the cmpxchg currently 
used to implement atomic_add_unless that you feel the need to put in an lwsync & 
isync.  But I would point out that neither is necessary to actually atomic add 
unless.  They are simply so cmpxchg can be overloaded to be used as both a lock 
and unlock primitive.  If atomic_add_unless isn't being used as a locking 
primitive somewhere I would love for these to be dropped.

> +"1:	lwarx	%0,0,%2		# atomic_add_unless\n\
> +	cmpw	0,%0,%4 \n\

This is just personal preference, but I find it more readable to use the 
simplified mnemonic:
cmpw %0, %4

> +	beq-	2f \n\
> +	add	%1,%3,%0 \n"

Why use a separate register here? Why not reuse %0 instead of using %1? 
Registers are valuable.

> +	PPC405_ERR77(0,%2)
> +"	stwcx.	%1,0,%2 \n\
> +	bne-	1b \n"
> +	ISYNC_ON_SMP
> +	"\n\
> +2:"
> +	: "=&r" (t), "=&r" (dummy)
> +	: "r" (&v->counter), "r" (a), "r" (u)
> +	: "cc", "memory");
> +
> +	return likely(t != u);
> +}
> +
>  #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [patch 2/2] powerpc: native atomic_add_unless
  2006-01-18 17:11   ` Joel Schopp
@ 2006-01-18 17:28     ` Nick Piggin
  2006-01-18 21:05       ` Joel Schopp
  0 siblings, 1 reply; 8+ messages in thread
From: Nick Piggin @ 2006-01-18 17:28 UTC (permalink / raw)
  To: Joel Schopp
  Cc: Nick Piggin, Linus Torvalds, Andrew Morton, Linux Kernel Mailing List

On Wed, Jan 18, 2006 at 11:11:20AM -0600, Joel Schopp wrote:
> >I didn't convert LL/SC architectures so as to "encourage" them
> >to do atomic_add_unless natively. Here is my probably-wrong attempt
> >for powerpc.
> >
> >Should I bring this up on the arch list? (IIRC cross posting 
> >between there and lkml is discouraged)
> >
> 
> You should bring this up on the arch list or it is likely to be missed by 
> people who would give you useful feedback.
> 

OK I will if we don't get much discussion going here.

> >+static __inline__ int atomic_add_unless(atomic_t *v, int a, int u)
> >+{
> >+	int t;
> >+	int dummy;
> >+
> >+	__asm__ __volatile__ (
> >+	LWSYNC_ON_SMP
> 
> I realize to preserve the behavior you currently get with the cmpxchg 
> currently used to implement atomic_add_unless that you feel the need to put 
> in an lwsync & isync.  But I would point out that neither is necessary to 
> actually atomic add unless.  They are simply so cmpxchg can be overloaded 
> to be used as both a lock and unlock primitive.  If atomic_add_unless isn't 
> being used as a locking primitive somewhere I would love for these to be 
> dropped.
> 

Well my aim is more to fit in with Documentation/atomic_ops.txt which
basically defines every atomic_xxx function which both modifies the
value and returns something as having a memory barrier either side of it.

I do agree that this is probably overkill (though simple). For example
the dec_and_test common to most refcounting tends not to be a barrier.

> >+"1:	lwarx	%0,0,%2		# atomic_add_unless\n\
> >+	cmpw	0,%0,%4 \n\
> 
> This is just personal preference, but I find it more readable to use the 
> simplified mnemonic:
> cmpw %0, %4
> 

OK. The former appears to be common in atomic.h for some reason... but
I'd be happy to defer that to the maintainers.

> >+	beq-	2f \n\
> >+	add	%1,%3,%0 \n"
> 
> Why use a separate register here? Why not reuse %0 instead of using %1? 
> Registers are valuable.
> 

You still need to get the output (t) because you need to return
whether the operation met with the "unless" case or not. If there is
a better way to do this then I'd like to know.

I couldn't even get it to assign a general register without the dummy
variable - my gcc inline asm is pretty rusty.

Thanks for the feedback,
Nick


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [patch 2/2] powerpc: native atomic_add_unless
  2006-01-18 17:28     ` Nick Piggin
@ 2006-01-18 21:05       ` Joel Schopp
  2006-01-19 14:04         ` Nick Piggin
  0 siblings, 1 reply; 8+ messages in thread
From: Joel Schopp @ 2006-01-18 21:05 UTC (permalink / raw)
  To: Nick Piggin; +Cc: Linus Torvalds, Andrew Morton, Linux Kernel Mailing List

>>Why use a separate register here? Why not reuse %0 instead of using %1? 
>>Registers are valuable.
>>
> You still need to get the output (t) because you need to return
> whether the operation met with the "unless" case or not. If there is
> a better way to do this then I'd like to know.

I was thinking something like this would do:

static __inline__ int atomic_add_unless(atomic_t *v, int a, int u)
{
	int t;

	__asm__ __volatile__ (
	LWSYNC_ON_SMP
"1:	lwarx	%0,0,%1		# atomic_add_unless\n\
	cmpw	%0,%3 \n\
	beq-	2f \n\
	add	%0,%2,%0 \n"
	PPC405_ERR77(0,%1)
"	stwcx.	%0,0,%1 \n\
	bne-	1b \n"
	ISYNC_ON_SMP
	"subf	%0,%2,%0 \n\
2:"
	: "=&r" (t)
	: "r" (&v->counter), "r" (a), "r" (u)
	: "cc", "memory");

	return likely(t != u);
}

Though if I could figure out how to get gcc to do it I'd much rather do 
something like this (which won't compile but I think illustrates the concept):

static __inline__ int atomic_add_unless(atomic_t *v, int a, int u)
{
	int t;

	__asm__ __volatile__ (
	LWSYNC_ON_SMP
"1:	lwarx	%0,0,%1		# atomic_add_unless\n\
	cmpw	%0,%3 \n\
	beq-	3f \n\
	add	%0,%2,%0 \n"
	PPC405_ERR77(0,%1)
"	stwcx.	%0,0,%1 \n\
	bne-	1b \n"
	ISYNC_ON_SMP
2:"
	: "=&r" (t)
	: "r" (&v->counter), "r" (a), "r" (u)
	: "cc", "memory");

	return 1;
3:
	return 0;
}

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [patch 2/2] powerpc: native atomic_add_unless
  2006-01-18 21:05       ` Joel Schopp
@ 2006-01-19 14:04         ` Nick Piggin
  0 siblings, 0 replies; 8+ messages in thread
From: Nick Piggin @ 2006-01-19 14:04 UTC (permalink / raw)
  To: Joel Schopp
  Cc: Nick Piggin, Linus Torvalds, Andrew Morton, Linux Kernel Mailing List

On Wed, Jan 18, 2006 at 03:05:25PM -0600, Joel Schopp wrote:
> >>Why use a separate register here? Why not reuse %0 instead of using %1? 
> >>Registers are valuable.
> >>
> >You still need to get the output (t) because you need to return
> >whether the operation met with the "unless" case or not. If there is
> >a better way to do this then I'd like to know.
> 
> I was thinking something like this would do:
> 
> static __inline__ int atomic_add_unless(atomic_t *v, int a, int u)
> {
> 	int t;
> 
> 	__asm__ __volatile__ (
> 	LWSYNC_ON_SMP
> "1:	lwarx	%0,0,%1		# atomic_add_unless\n\
> 	cmpw	%0,%3 \n\
> 	beq-	2f \n\
> 	add	%0,%2,%0 \n"
> 	PPC405_ERR77(0,%1)
> "	stwcx.	%0,0,%1 \n\
> 	bne-	1b \n"
> 	ISYNC_ON_SMP
> 	"subf	%0,%2,%0 \n\
> 2:"
> 	: "=&r" (t)
> 	: "r" (&v->counter), "r" (a), "r" (u)
> 	: "cc", "memory");
> 
> 	return likely(t != u);
> }
> 

Oh yes, for some reason I thought that wouldn't work in all cases,
but I think that's nice. Thanks!

> Though if I could figure out how to get gcc to do it I'd much rather do 
> something like this (which won't compile but I think illustrates the 
> concept):
> 
> static __inline__ int atomic_add_unless(atomic_t *v, int a, int u)
> {
> 	int t;
> 
> 	__asm__ __volatile__ (
> 	LWSYNC_ON_SMP
> "1:	lwarx	%0,0,%1		# atomic_add_unless\n\
> 	cmpw	%0,%3 \n\
> 	beq-	3f \n\
> 	add	%0,%2,%0 \n"
> 	PPC405_ERR77(0,%1)
> "	stwcx.	%0,0,%1 \n\
> 	bne-	1b \n"
> 	ISYNC_ON_SMP
> 2:"
> 	: "=&r" (t)
> 	: "r" (&v->counter), "r" (a), "r" (u)
> 	: "cc", "memory");
> 
> 	return 1;
> 3:
> 	return 0;
> }

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2006-01-19 14:04 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2006-01-18  6:36 [patch 1/2] atomic_add_unless sadness Nick Piggin
2006-01-18  6:39 ` [patch 2/2] powerpc: native atomic_add_unless Nick Piggin
2006-01-18 17:11   ` Joel Schopp
2006-01-18 17:28     ` Nick Piggin
2006-01-18 21:05       ` Joel Schopp
2006-01-19 14:04         ` Nick Piggin
2006-01-18 16:48 ` [patch 1/2] atomic_add_unless sadness Linus Torvalds
2006-01-18 17:10   ` Nick Piggin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).