All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] lib: Provide generic atomic64_t implementation
@ 2009-06-13  7:10 ` Paul Mackerras
  0 siblings, 0 replies; 30+ messages in thread
From: Paul Mackerras @ 2009-06-13  7:10 UTC (permalink / raw)
  To: benh, torvalds, akpm, linuxppc-dev, linux-kernel

Many processor architectures have no 64-bit atomic instructions, but
we need atomic64_t in order to support the perf_counter subsystem.

This adds an implementation of 64-bit atomic operations using hashed
spinlocks to provide atomicity.  For each atomic operation, the address
of the atomic64_t variable is hashed to an index into an array of 16
spinlocks.  That spinlock is taken (with interrupts disabled) around the
operation, which can then be coded non-atomically within the lock.

On UP, all the spinlock manipulation goes away and we simply disable
interrupts around each operation.  In fact gcc eliminates the whole
atomic64_lock variable as well.

Signed-off-by: Paul Mackerras <paulus@samba.org>
---
Linus, Andrew: OK if this goes in via the powerpc tree?

 include/asm-generic/atomic64.h |   42 ++++++++++
 lib/Kconfig                    |    6 ++
 lib/Makefile                   |    2 +
 lib/atomic64.c                 |  175 ++++++++++++++++++++++++++++++++++++++++
 4 files changed, 225 insertions(+), 0 deletions(-)
 create mode 100644 include/asm-generic/atomic64.h
 create mode 100644 lib/atomic64.c

diff --git a/include/asm-generic/atomic64.h b/include/asm-generic/atomic64.h
new file mode 100644
index 0000000..b18ce4f
--- /dev/null
+++ b/include/asm-generic/atomic64.h
@@ -0,0 +1,42 @@
+/*
+ * Generic implementation of 64-bit atomics using spinlocks,
+ * useful on processors that don't have 64-bit atomic instructions.
+ *
+ * Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#ifndef _ASM_GENERIC_ATOMIC64_H
+#define _ASM_GENERIC_ATOMIC64_H
+
+typedef struct {
+	long long counter;
+} atomic64_t;
+
+#define ATOMIC64_INIT(i)	{ (i) }
+
+extern long long atomic64_read(const atomic64_t *v);
+extern void	 atomic64_set(atomic64_t *v, long long i);
+extern void	 atomic64_add(long long a, atomic64_t *v);
+extern long long atomic64_add_return(long long a, atomic64_t *v);
+extern void	 atomic64_sub(long long a, atomic64_t *v);
+extern long long atomic64_sub_return(long long a, atomic64_t *v);
+extern long long atomic64_dec_if_positive(atomic64_t *v);
+extern long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n);
+extern long long atomic64_xchg(atomic64_t *v, long long new);
+extern int	 atomic64_add_unless(atomic64_t *v, long long a, long long u);
+
+#define atomic64_add_negative(a, v)	(atomic64_add_return((a), (v)) < 0)
+#define atomic64_inc(v)			atomic64_add(1LL, (v))
+#define atomic64_inc_return(v)		atomic64_add_return(1LL, (v))
+#define atomic64_inc_and_test(v) 	(atomic64_inc_return(v) == 0)
+#define atomic64_sub_and_test(a, v)	(atomic64_sub_return((a), (v)) == 0)
+#define atomic64_dec(v)			atomic64_sub(1LL, (v))
+#define atomic64_dec_return(v)		atomic64_sub_return(1LL, (v))
+#define atomic64_dec_and_test(v)	(atomic64_dec_return((v)) == 0)
+#define atomic64_inc_not_zero(v) 	atomic64_add_unless((v), 1LL, 0LL)
+
+#endif  /*  _ASM_GENERIC_ATOMIC64_H  */
diff --git a/lib/Kconfig b/lib/Kconfig
index 9960be0..bb1326d 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -194,4 +194,10 @@ config DISABLE_OBSOLETE_CPUMASK_FUNCTIONS
 config NLATTR
 	bool
 
+#
+# Generic 64-bit atomic support is selected if needed
+#
+config GENERIC_ATOMIC64
+       bool
+
 endmenu
diff --git a/lib/Makefile b/lib/Makefile
index 34c5c0e..8e9bcf9 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -95,6 +95,8 @@ obj-$(CONFIG_DMA_API_DEBUG) += dma-debug.o
 
 obj-$(CONFIG_GENERIC_CSUM) += checksum.o
 
+obj-$(CONFIG_GENERIC_ATOMIC64) += atomic64.o
+
 hostprogs-y	:= gen_crc32table
 clean-files	:= crc32table.h
 
diff --git a/lib/atomic64.c b/lib/atomic64.c
new file mode 100644
index 0000000..c5e7255
--- /dev/null
+++ b/lib/atomic64.c
@@ -0,0 +1,175 @@
+/*
+ * Generic implementation of 64-bit atomics using spinlocks,
+ * useful on processors that don't have 64-bit atomic instructions.
+ *
+ * Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/types.h>
+#include <linux/cache.h>
+#include <linux/spinlock.h>
+#include <linux/init.h>
+#include <asm/atomic.h>
+
+/*
+ * We use a hashed array of spinlocks to provide exclusive access
+ * to each atomic64_t variable.  Since this is expected to used on
+ * systems with small numbers of CPUs (<= 4 or so), we use a
+ * relatively small array of 16 spinlocks to avoid wasting too much
+ * memory on the spinlock array.
+ */
+#define NR_LOCKS	16
+
+/*
+ * Ensure each lock is in a separate cacheline.
+ */
+static union {
+	spinlock_t lock;
+	char pad[L1_CACHE_BYTES];
+} atomic64_lock[NR_LOCKS] __cacheline_aligned_in_smp;
+
+static inline spinlock_t *lock_addr(const atomic64_t *v)
+{
+	unsigned long addr = (unsigned long) v;
+
+	addr >>= L1_CACHE_SHIFT;
+	addr ^= (addr >> 8) ^ (addr >> 16);
+	return &atomic64_lock[addr & (NR_LOCKS - 1)].lock;
+}
+
+long long atomic64_read(const atomic64_t *v)
+{
+	unsigned long flags;
+	spinlock_t *lock = lock_addr(v);
+	long long val;
+
+	spin_lock_irqsave(lock, flags);
+	val = v->counter;
+	spin_unlock_irqrestore(lock, flags);
+	return val;
+}
+
+void atomic64_set(atomic64_t *v, long long i)
+{
+	unsigned long flags;
+	spinlock_t *lock = lock_addr(v);
+
+	spin_lock_irqsave(lock, flags);
+	v->counter = i;
+	spin_unlock_irqrestore(lock, flags);
+}
+
+void atomic64_add(long long a, atomic64_t *v)
+{
+	unsigned long flags;
+	spinlock_t *lock = lock_addr(v);
+
+	spin_lock_irqsave(lock, flags);
+	v->counter += a;
+	spin_unlock_irqrestore(lock, flags);
+}
+
+long long atomic64_add_return(long long a, atomic64_t *v)
+{
+	unsigned long flags;
+	spinlock_t *lock = lock_addr(v);
+	long long val;
+
+	spin_lock_irqsave(lock, flags);
+	val = v->counter += a;
+	spin_unlock_irqrestore(lock, flags);
+	return val;
+}
+
+void atomic64_sub(long long a, atomic64_t *v)
+{
+	unsigned long flags;
+	spinlock_t *lock = lock_addr(v);
+
+	spin_lock_irqsave(lock, flags);
+	v->counter -= a;
+	spin_unlock_irqrestore(lock, flags);
+}
+
+long long atomic64_sub_return(long long a, atomic64_t *v)
+{
+	unsigned long flags;
+	spinlock_t *lock = lock_addr(v);
+	long long val;
+
+	spin_lock_irqsave(lock, flags);
+	val = v->counter -= a;
+	spin_unlock_irqrestore(lock, flags);
+	return val;
+}
+
+long long atomic64_dec_if_positive(atomic64_t *v)
+{
+	unsigned long flags;
+	spinlock_t *lock = lock_addr(v);
+	long long val;
+
+	spin_lock_irqsave(lock, flags);
+	val = v->counter - 1;
+	if (val >= 0)
+		v->counter = val;
+	spin_unlock_irqrestore(lock, flags);
+	return val;
+}
+
+long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n)
+{
+	unsigned long flags;
+	spinlock_t *lock = lock_addr(v);
+	long long val;
+
+	spin_lock_irqsave(lock, flags);
+	val = v->counter;
+	if (val == o)
+		v->counter = n;
+	spin_unlock_irqrestore(lock, flags);
+	return val;
+}
+
+long long atomic64_xchg(atomic64_t *v, long long new)
+{
+	unsigned long flags;
+	spinlock_t *lock = lock_addr(v);
+	long long val;
+
+	spin_lock_irqsave(lock, flags);
+	val = v->counter;
+	v->counter = new;
+	spin_unlock_irqrestore(lock, flags);
+	return val;
+}
+
+int atomic64_add_unless(atomic64_t *v, long long a, long long u)
+{
+	unsigned long flags;
+	spinlock_t *lock = lock_addr(v);
+	int ret = 1;
+
+	spin_lock_irqsave(lock, flags);
+	if (v->counter != u) {
+		v->counter += a;
+		ret = 0;
+	}
+	spin_unlock_irqrestore(lock, flags);
+	return ret;
+}
+
+static int init_atomic64_lock(void)
+{
+	int i;
+
+	for (i = 0; i < NR_LOCKS; ++i)
+		spin_lock_init(&atomic64_lock[i].lock);
+	return 0;
+}
+
+pure_initcall(init_atomic64_lock);
-- 
1.6.0.4


^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 1/2] lib: Provide generic atomic64_t implementation
@ 2009-06-13  7:10 ` Paul Mackerras
  0 siblings, 0 replies; 30+ messages in thread
From: Paul Mackerras @ 2009-06-13  7:10 UTC (permalink / raw)
  To: benh, torvalds, akpm, linuxppc-dev, linux-kernel

Many processor architectures have no 64-bit atomic instructions, but
we need atomic64_t in order to support the perf_counter subsystem.

This adds an implementation of 64-bit atomic operations using hashed
spinlocks to provide atomicity.  For each atomic operation, the address=

of the atomic64_t variable is hashed to an index into an array of 16
spinlocks.  That spinlock is taken (with interrupts disabled) around th=
e
operation, which can then be coded non-atomically within the lock.

On UP, all the spinlock manipulation goes away and we simply disable
interrupts around each operation.  In fact gcc eliminates the whole
atomic64_lock variable as well.

Signed-off-by: Paul Mackerras <paulus@samba.org>
---
Linus, Andrew: OK if this goes in via the powerpc tree?

 include/asm-generic/atomic64.h |   42 ++++++++++
 lib/Kconfig                    |    6 ++
 lib/Makefile                   |    2 +
 lib/atomic64.c                 |  175 ++++++++++++++++++++++++++++++++=
++++++++
 4 files changed, 225 insertions(+), 0 deletions(-)
 create mode 100644 include/asm-generic/atomic64.h
 create mode 100644 lib/atomic64.c

diff --git a/include/asm-generic/atomic64.h b/include/asm-generic/atomi=
c64.h
new file mode 100644
index 0000000..b18ce4f
--- /dev/null
+++ b/include/asm-generic/atomic64.h
@@ -0,0 +1,42 @@
+/*
+ * Generic implementation of 64-bit atomics using spinlocks,
+ * useful on processors that don't have 64-bit atomic instructions.
+ *
+ * Copyright =A9 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#ifndef _ASM_GENERIC_ATOMIC64_H
+#define _ASM_GENERIC_ATOMIC64_H
+
+typedef struct {
+=09long long counter;
+} atomic64_t;
+
+#define ATOMIC64_INIT(i)=09{ (i) }
+
+extern long long atomic64_read(const atomic64_t *v);
+extern void=09 atomic64_set(atomic64_t *v, long long i);
+extern void=09 atomic64_add(long long a, atomic64_t *v);
+extern long long atomic64_add_return(long long a, atomic64_t *v);
+extern void=09 atomic64_sub(long long a, atomic64_t *v);
+extern long long atomic64_sub_return(long long a, atomic64_t *v);
+extern long long atomic64_dec_if_positive(atomic64_t *v);
+extern long long atomic64_cmpxchg(atomic64_t *v, long long o, long lon=
g n);
+extern long long atomic64_xchg(atomic64_t *v, long long new);
+extern int=09 atomic64_add_unless(atomic64_t *v, long long a, long lon=
g u);
+
+#define atomic64_add_negative(a, v)=09(atomic64_add_return((a), (v)) <=
 0)
+#define atomic64_inc(v)=09=09=09atomic64_add(1LL, (v))
+#define atomic64_inc_return(v)=09=09atomic64_add_return(1LL, (v))
+#define atomic64_inc_and_test(v) =09(atomic64_inc_return(v) =3D=3D 0)
+#define atomic64_sub_and_test(a, v)=09(atomic64_sub_return((a), (v)) =3D=
=3D 0)
+#define atomic64_dec(v)=09=09=09atomic64_sub(1LL, (v))
+#define atomic64_dec_return(v)=09=09atomic64_sub_return(1LL, (v))
+#define atomic64_dec_and_test(v)=09(atomic64_dec_return((v)) =3D=3D 0)=

+#define atomic64_inc_not_zero(v) =09atomic64_add_unless((v), 1LL, 0LL)=

+
+#endif  /*  _ASM_GENERIC_ATOMIC64_H  */
diff --git a/lib/Kconfig b/lib/Kconfig
index 9960be0..bb1326d 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -194,4 +194,10 @@ config DISABLE_OBSOLETE_CPUMASK_FUNCTIONS
 config NLATTR
 =09bool
=20
+#
+# Generic 64-bit atomic support is selected if needed
+#
+config GENERIC_ATOMIC64
+       bool
+
 endmenu
diff --git a/lib/Makefile b/lib/Makefile
index 34c5c0e..8e9bcf9 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -95,6 +95,8 @@ obj-$(CONFIG_DMA_API_DEBUG) +=3D dma-debug.o
=20
 obj-$(CONFIG_GENERIC_CSUM) +=3D checksum.o
=20
+obj-$(CONFIG_GENERIC_ATOMIC64) +=3D atomic64.o
+
 hostprogs-y=09:=3D gen_crc32table
 clean-files=09:=3D crc32table.h
=20
diff --git a/lib/atomic64.c b/lib/atomic64.c
new file mode 100644
index 0000000..c5e7255
--- /dev/null
+++ b/lib/atomic64.c
@@ -0,0 +1,175 @@
+/*
+ * Generic implementation of 64-bit atomics using spinlocks,
+ * useful on processors that don't have 64-bit atomic instructions.
+ *
+ * Copyright =A9 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/types.h>
+#include <linux/cache.h>
+#include <linux/spinlock.h>
+#include <linux/init.h>
+#include <asm/atomic.h>
+
+/*
+ * We use a hashed array of spinlocks to provide exclusive access
+ * to each atomic64_t variable.  Since this is expected to used on
+ * systems with small numbers of CPUs (<=3D 4 or so), we use a
+ * relatively small array of 16 spinlocks to avoid wasting too much
+ * memory on the spinlock array.
+ */
+#define NR_LOCKS=0916
+
+/*
+ * Ensure each lock is in a separate cacheline.
+ */
+static union {
+=09spinlock_t lock;
+=09char pad[L1_CACHE_BYTES];
+} atomic64_lock[NR_LOCKS] __cacheline_aligned_in_smp;
+
+static inline spinlock_t *lock_addr(const atomic64_t *v)
+{
+=09unsigned long addr =3D (unsigned long) v;
+
+=09addr >>=3D L1_CACHE_SHIFT;
+=09addr ^=3D (addr >> 8) ^ (addr >> 16);
+=09return &atomic64_lock[addr & (NR_LOCKS - 1)].lock;
+}
+
+long long atomic64_read(const atomic64_t *v)
+{
+=09unsigned long flags;
+=09spinlock_t *lock =3D lock_addr(v);
+=09long long val;
+
+=09spin_lock_irqsave(lock, flags);
+=09val =3D v->counter;
+=09spin_unlock_irqrestore(lock, flags);
+=09return val;
+}
+
+void atomic64_set(atomic64_t *v, long long i)
+{
+=09unsigned long flags;
+=09spinlock_t *lock =3D lock_addr(v);
+
+=09spin_lock_irqsave(lock, flags);
+=09v->counter =3D i;
+=09spin_unlock_irqrestore(lock, flags);
+}
+
+void atomic64_add(long long a, atomic64_t *v)
+{
+=09unsigned long flags;
+=09spinlock_t *lock =3D lock_addr(v);
+
+=09spin_lock_irqsave(lock, flags);
+=09v->counter +=3D a;
+=09spin_unlock_irqrestore(lock, flags);
+}
+
+long long atomic64_add_return(long long a, atomic64_t *v)
+{
+=09unsigned long flags;
+=09spinlock_t *lock =3D lock_addr(v);
+=09long long val;
+
+=09spin_lock_irqsave(lock, flags);
+=09val =3D v->counter +=3D a;
+=09spin_unlock_irqrestore(lock, flags);
+=09return val;
+}
+
+void atomic64_sub(long long a, atomic64_t *v)
+{
+=09unsigned long flags;
+=09spinlock_t *lock =3D lock_addr(v);
+
+=09spin_lock_irqsave(lock, flags);
+=09v->counter -=3D a;
+=09spin_unlock_irqrestore(lock, flags);
+}
+
+long long atomic64_sub_return(long long a, atomic64_t *v)
+{
+=09unsigned long flags;
+=09spinlock_t *lock =3D lock_addr(v);
+=09long long val;
+
+=09spin_lock_irqsave(lock, flags);
+=09val =3D v->counter -=3D a;
+=09spin_unlock_irqrestore(lock, flags);
+=09return val;
+}
+
+long long atomic64_dec_if_positive(atomic64_t *v)
+{
+=09unsigned long flags;
+=09spinlock_t *lock =3D lock_addr(v);
+=09long long val;
+
+=09spin_lock_irqsave(lock, flags);
+=09val =3D v->counter - 1;
+=09if (val >=3D 0)
+=09=09v->counter =3D val;
+=09spin_unlock_irqrestore(lock, flags);
+=09return val;
+}
+
+long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n)
+{
+=09unsigned long flags;
+=09spinlock_t *lock =3D lock_addr(v);
+=09long long val;
+
+=09spin_lock_irqsave(lock, flags);
+=09val =3D v->counter;
+=09if (val =3D=3D o)
+=09=09v->counter =3D n;
+=09spin_unlock_irqrestore(lock, flags);
+=09return val;
+}
+
+long long atomic64_xchg(atomic64_t *v, long long new)
+{
+=09unsigned long flags;
+=09spinlock_t *lock =3D lock_addr(v);
+=09long long val;
+
+=09spin_lock_irqsave(lock, flags);
+=09val =3D v->counter;
+=09v->counter =3D new;
+=09spin_unlock_irqrestore(lock, flags);
+=09return val;
+}
+
+int atomic64_add_unless(atomic64_t *v, long long a, long long u)
+{
+=09unsigned long flags;
+=09spinlock_t *lock =3D lock_addr(v);
+=09int ret =3D 1;
+
+=09spin_lock_irqsave(lock, flags);
+=09if (v->counter !=3D u) {
+=09=09v->counter +=3D a;
+=09=09ret =3D 0;
+=09}
+=09spin_unlock_irqrestore(lock, flags);
+=09return ret;
+}
+
+static int init_atomic64_lock(void)
+{
+=09int i;
+
+=09for (i =3D 0; i < NR_LOCKS; ++i)
+=09=09spin_lock_init(&atomic64_lock[i].lock);
+=09return 0;
+}
+
+pure_initcall(init_atomic64_lock);
--=20
1.6.0.4

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* Re: [PATCH 1/2] lib: Provide generic atomic64_t implementation
  2009-06-13  7:10 ` Paul Mackerras
@ 2009-06-13 20:13   ` Linus Torvalds
  -1 siblings, 0 replies; 30+ messages in thread
From: Linus Torvalds @ 2009-06-13 20:13 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: benh, akpm, linuxppc-dev, linux-kernel



On Sat, 13 Jun 2009, Paul Mackerras wrote:
>
> Linus, Andrew: OK if this goes in via the powerpc tree?

Ok by me.

		Linus

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 1/2] lib: Provide generic atomic64_t implementation
@ 2009-06-13 20:13   ` Linus Torvalds
  0 siblings, 0 replies; 30+ messages in thread
From: Linus Torvalds @ 2009-06-13 20:13 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: akpm, linux-kernel, linuxppc-dev



On Sat, 13 Jun 2009, Paul Mackerras wrote:
>
> Linus, Andrew: OK if this goes in via the powerpc tree?

Ok by me.

		Linus

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 1/2] lib: Provide generic atomic64_t implementation
  2009-06-13 20:13   ` Linus Torvalds
@ 2009-06-13 20:25     ` Linus Torvalds
  -1 siblings, 0 replies; 30+ messages in thread
From: Linus Torvalds @ 2009-06-13 20:25 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: benh, akpm, linuxppc-dev, linux-kernel



On Sat, 13 Jun 2009, Linus Torvalds wrote:
> 
> On Sat, 13 Jun 2009, Paul Mackerras wrote:
> >
> > Linus, Andrew: OK if this goes in via the powerpc tree?
> 
> Ok by me.

Btw, do 32-bit architectures really necessarily want 64-bit performance 
counters? 

I realize that 32-bit counters will overflow pretty easily, but I do 
wonder about the performance impact of doing things like hashed spinlocks 
for 64-bit counters. Maybe the downsides of 64-bit perf counters on such 
architectures might outweight the upsides?

		Linus

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 1/2] lib: Provide generic atomic64_t implementation
@ 2009-06-13 20:25     ` Linus Torvalds
  0 siblings, 0 replies; 30+ messages in thread
From: Linus Torvalds @ 2009-06-13 20:25 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: akpm, linux-kernel, linuxppc-dev



On Sat, 13 Jun 2009, Linus Torvalds wrote:
> 
> On Sat, 13 Jun 2009, Paul Mackerras wrote:
> >
> > Linus, Andrew: OK if this goes in via the powerpc tree?
> 
> Ok by me.

Btw, do 32-bit architectures really necessarily want 64-bit performance 
counters? 

I realize that 32-bit counters will overflow pretty easily, but I do 
wonder about the performance impact of doing things like hashed spinlocks 
for 64-bit counters. Maybe the downsides of 64-bit perf counters on such 
architectures might outweight the upsides?

		Linus

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 1/2] lib: Provide generic atomic64_t implementation
  2009-06-13 20:25     ` Linus Torvalds
@ 2009-06-13 20:56       ` Ingo Molnar
  -1 siblings, 0 replies; 30+ messages in thread
From: Ingo Molnar @ 2009-06-13 20:56 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Paul Mackerras, benh, akpm, linuxppc-dev, linux-kernel


* Linus Torvalds <torvalds@linux-foundation.org> wrote:

> On Sat, 13 Jun 2009, Linus Torvalds wrote:
> > 
> > On Sat, 13 Jun 2009, Paul Mackerras wrote:
> > >
> > > Linus, Andrew: OK if this goes in via the powerpc tree?
> > 
> > Ok by me.
> 
> Btw, do 32-bit architectures really necessarily want 64-bit 
> performance counters?
> 
> I realize that 32-bit counters will overflow pretty easily, but I 
> do wonder about the performance impact of doing things like hashed 
> spinlocks for 64-bit counters. Maybe the downsides of 64-bit perf 
> counters on such architectures might outweight the upsides?

We account all sorts of non-hw bits via atomic64_t as well - for 
example time related counters in nanoseconds - which wrap 32 bits at 
4 seconds.

There's also security/stability relevant bits:

        counter->id             = atomic64_inc_return(&perf_counter_id);

We dont really want that ID to wrap ever - it could create a leaking 
of one PMU context into another. (We could rewrite it by putting a 
global lock around it, but still - this is a convenient primitive.)

In select places we might be able to reduce the use of atomic64_t 
(that might make performance sense anyway) - but to get rid of all 
of them would be quite painful. We initially started with a 32-bit 
implementation and it was quite painful with fast-paced units.

So since Paul has already coded the wrappers up ... i'd really 
prefer that, unless there's really compelling reasons not to do it.

	Ingo

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 1/2] lib: Provide generic atomic64_t implementation
@ 2009-06-13 20:56       ` Ingo Molnar
  0 siblings, 0 replies; 30+ messages in thread
From: Ingo Molnar @ 2009-06-13 20:56 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: akpm, Paul Mackerras, linux-kernel, linuxppc-dev


* Linus Torvalds <torvalds@linux-foundation.org> wrote:

> On Sat, 13 Jun 2009, Linus Torvalds wrote:
> > 
> > On Sat, 13 Jun 2009, Paul Mackerras wrote:
> > >
> > > Linus, Andrew: OK if this goes in via the powerpc tree?
> > 
> > Ok by me.
> 
> Btw, do 32-bit architectures really necessarily want 64-bit 
> performance counters?
> 
> I realize that 32-bit counters will overflow pretty easily, but I 
> do wonder about the performance impact of doing things like hashed 
> spinlocks for 64-bit counters. Maybe the downsides of 64-bit perf 
> counters on such architectures might outweight the upsides?

We account all sorts of non-hw bits via atomic64_t as well - for 
example time related counters in nanoseconds - which wrap 32 bits at 
4 seconds.

There's also security/stability relevant bits:

        counter->id             = atomic64_inc_return(&perf_counter_id);

We dont really want that ID to wrap ever - it could create a leaking 
of one PMU context into another. (We could rewrite it by putting a 
global lock around it, but still - this is a convenient primitive.)

In select places we might be able to reduce the use of atomic64_t 
(that might make performance sense anyway) - but to get rid of all 
of them would be quite painful. We initially started with a 32-bit 
implementation and it was quite painful with fast-paced units.

So since Paul has already coded the wrappers up ... i'd really 
prefer that, unless there's really compelling reasons not to do it.

	Ingo

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 1/2] lib: Provide generic atomic64_t implementation
  2009-06-13  7:10 ` Paul Mackerras
@ 2009-06-13 21:53   ` Arnd Bergmann
  -1 siblings, 0 replies; 30+ messages in thread
From: Arnd Bergmann @ 2009-06-13 21:53 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: benh, torvalds, akpm, linuxppc-dev, linux-kernel

On Saturday 13 June 2009, Paul Mackerras wrote:
> +extern long long atomic64_read(const atomic64_t *v);
> +extern void     atomic64_set(atomic64_t *v, long long i);
> +extern void     atomic64_add(long long a, atomic64_t *v);
> +extern long long atomic64_add_return(long long a, atomic64_t *v);
> +extern void     atomic64_sub(long long a, atomic64_t *v);
> +extern long long atomic64_sub_return(long long a, atomic64_t *v);
> +extern long long atomic64_dec_if_positive(atomic64_t *v);
> +extern long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n);
> +extern long long atomic64_xchg(atomic64_t *v, long long new);
> +extern int      atomic64_add_unless(atomic64_t *v, long long a, long long u);
> +
> +#define atomic64_add_negative(a, v)    (atomic64_add_return((a), (v)) < 0)
> +#define atomic64_inc(v)                        atomic64_add(1LL, (v))
> +#define atomic64_inc_return(v)         atomic64_add_return(1LL, (v))
> +#define atomic64_inc_and_test(v)       (atomic64_inc_return(v) == 0)
> +#define atomic64_sub_and_test(a, v)    (atomic64_sub_return((a), (v)) == 0)
> +#define atomic64_dec(v)                        atomic64_sub(1LL, (v))
> +#define atomic64_dec_return(v)         atomic64_sub_return(1LL, (v))
> +#define atomic64_dec_and_test(v)       (atomic64_dec_return((v)) == 0)
> +#define atomic64_inc_not_zero(v)       atomic64_add_unless((v), 1LL, 0LL)
> +

How about also doing these:?

#define atomic64_sub(a, v)		atomic64_add(-a, v)
#define atomic64_sub_return(a, v)	atomic64_add_return(-a, v)
#define atomic64_add(a, v)		(void)atomic64_add_return(a, v)

The cost to the caller (one or two instruction per call site)
seems to be about the same as for the other wrapper macros.

	Arnd <><

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 1/2] lib: Provide generic atomic64_t implementation
@ 2009-06-13 21:53   ` Arnd Bergmann
  0 siblings, 0 replies; 30+ messages in thread
From: Arnd Bergmann @ 2009-06-13 21:53 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: akpm, torvalds, linux-kernel, linuxppc-dev

On Saturday 13 June 2009, Paul Mackerras wrote:
> +extern long long atomic64_read(const atomic64_t *v);
> +extern void     atomic64_set(atomic64_t *v, long long i);
> +extern void     atomic64_add(long long a, atomic64_t *v);
> +extern long long atomic64_add_return(long long a, atomic64_t *v);
> +extern void     atomic64_sub(long long a, atomic64_t *v);
> +extern long long atomic64_sub_return(long long a, atomic64_t *v);
> +extern long long atomic64_dec_if_positive(atomic64_t *v);
> +extern long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n);
> +extern long long atomic64_xchg(atomic64_t *v, long long new);
> +extern int      atomic64_add_unless(atomic64_t *v, long long a, long long u);
> +
> +#define atomic64_add_negative(a, v)    (atomic64_add_return((a), (v)) < 0)
> +#define atomic64_inc(v)                        atomic64_add(1LL, (v))
> +#define atomic64_inc_return(v)         atomic64_add_return(1LL, (v))
> +#define atomic64_inc_and_test(v)       (atomic64_inc_return(v) == 0)
> +#define atomic64_sub_and_test(a, v)    (atomic64_sub_return((a), (v)) == 0)
> +#define atomic64_dec(v)                        atomic64_sub(1LL, (v))
> +#define atomic64_dec_return(v)         atomic64_sub_return(1LL, (v))
> +#define atomic64_dec_and_test(v)       (atomic64_dec_return((v)) == 0)
> +#define atomic64_inc_not_zero(v)       atomic64_add_unless((v), 1LL, 0LL)
> +

How about also doing these:?

#define atomic64_sub(a, v)		atomic64_add(-a, v)
#define atomic64_sub_return(a, v)	atomic64_add_return(-a, v)
#define atomic64_add(a, v)		(void)atomic64_add_return(a, v)

The cost to the caller (one or two instruction per call site)
seems to be about the same as for the other wrapper macros.

	Arnd <><

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 1/2] lib: Provide generic atomic64_t implementation
  2009-06-13 20:25     ` Linus Torvalds
@ 2009-06-14 11:53       ` Avi Kivity
  -1 siblings, 0 replies; 30+ messages in thread
From: Avi Kivity @ 2009-06-14 11:53 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Paul Mackerras, benh, akpm, linuxppc-dev, linux-kernel

Linus Torvalds wrote:
> On Sat, 13 Jun 2009, Linus Torvalds wrote:
>   
>> On Sat, 13 Jun 2009, Paul Mackerras wrote:
>>     
>>> Linus, Andrew: OK if this goes in via the powerpc tree?
>>>       
>> Ok by me.
>>     
>
> Btw, do 32-bit architectures really necessarily want 64-bit performance 
> counters? 
>
> I realize that 32-bit counters will overflow pretty easily, but I do 
> wonder about the performance impact of doing things like hashed spinlocks 
> for 64-bit counters. Maybe the downsides of 64-bit perf counters on such 
> architectures might outweight the upsides?
>   

An alternative implementation using 64-bit cmpxchg will recover most of 
the costs of hashed spinlocks.  I assume most serious 32-bit 
architectures have them?

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 1/2] lib: Provide generic atomic64_t implementation
@ 2009-06-14 11:53       ` Avi Kivity
  0 siblings, 0 replies; 30+ messages in thread
From: Avi Kivity @ 2009-06-14 11:53 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: akpm, Paul Mackerras, linux-kernel, linuxppc-dev

Linus Torvalds wrote:
> On Sat, 13 Jun 2009, Linus Torvalds wrote:
>   
>> On Sat, 13 Jun 2009, Paul Mackerras wrote:
>>     
>>> Linus, Andrew: OK if this goes in via the powerpc tree?
>>>       
>> Ok by me.
>>     
>
> Btw, do 32-bit architectures really necessarily want 64-bit performance 
> counters? 
>
> I realize that 32-bit counters will overflow pretty easily, but I do 
> wonder about the performance impact of doing things like hashed spinlocks 
> for 64-bit counters. Maybe the downsides of 64-bit perf counters on such 
> architectures might outweight the upsides?
>   

An alternative implementation using 64-bit cmpxchg will recover most of 
the costs of hashed spinlocks.  I assume most serious 32-bit 
architectures have them?

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 1/2] lib: Provide generic atomic64_t implementation
  2009-06-14 11:53       ` Avi Kivity
@ 2009-06-14 12:21         ` Paul Mackerras
  -1 siblings, 0 replies; 30+ messages in thread
From: Paul Mackerras @ 2009-06-14 12:21 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Linus Torvalds, benh, akpm, linuxppc-dev, linux-kernel

Avi Kivity writes:

> An alternative implementation using 64-bit cmpxchg will recover most of 
> the costs of hashed spinlocks.  I assume most serious 32-bit 
> architectures have them?

Have a 64-bit cmpxchg, you mean?  x86 is the only one I know of, and
it already has an atomic64_t implementation using cmpxchg8b (or
whatever it's called).

My thinking is that the 32-bit non-x86 architectures will be mostly
UP, so the overhead is just an interrupt enable/restore.  Those that
are SMP I would expect to be small SMP -- mostly just 2 cpus and maybe
a few 4-way systems.

Paul.

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 1/2] lib: Provide generic atomic64_t implementation
@ 2009-06-14 12:21         ` Paul Mackerras
  0 siblings, 0 replies; 30+ messages in thread
From: Paul Mackerras @ 2009-06-14 12:21 UTC (permalink / raw)
  To: Avi Kivity; +Cc: akpm, Linus Torvalds, linux-kernel, linuxppc-dev

Avi Kivity writes:

> An alternative implementation using 64-bit cmpxchg will recover most of 
> the costs of hashed spinlocks.  I assume most serious 32-bit 
> architectures have them?

Have a 64-bit cmpxchg, you mean?  x86 is the only one I know of, and
it already has an atomic64_t implementation using cmpxchg8b (or
whatever it's called).

My thinking is that the 32-bit non-x86 architectures will be mostly
UP, so the overhead is just an interrupt enable/restore.  Those that
are SMP I would expect to be small SMP -- mostly just 2 cpus and maybe
a few 4-way systems.

Paul.

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 1/2] lib: Provide generic atomic64_t implementation
  2009-06-14 12:21         ` Paul Mackerras
@ 2009-06-14 13:04           ` Avi Kivity
  -1 siblings, 0 replies; 30+ messages in thread
From: Avi Kivity @ 2009-06-14 13:04 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: Linus Torvalds, benh, akpm, linuxppc-dev, linux-kernel

Paul Mackerras wrote:
> Avi Kivity writes:
>
>   
>> An alternative implementation using 64-bit cmpxchg will recover most of 
>> the costs of hashed spinlocks.  I assume most serious 32-bit 
>> architectures have them?
>>     
>
> Have a 64-bit cmpxchg, you mean?  x86 is the only one I know of, and
> it already has an atomic64_t implementation using cmpxchg8b (or
> whatever it's called).
>   

Yes (and it is cmpxchg8b).  I'm surprised powerpc doesn't have DCAS support.

> My thinking is that the 32-bit non-x86 architectures will be mostly
> UP, so the overhead is just an interrupt enable/restore.  Those that
> are SMP I would expect to be small SMP -- mostly just 2 cpus and maybe
> a few 4-way systems.
>   

The new Nehalems provide 8 logical threads in a single socket.  All 
those threads share a cache, and they have cmpxchg8b anyway, so this 
won't matter.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 1/2] lib: Provide generic atomic64_t implementation
@ 2009-06-14 13:04           ` Avi Kivity
  0 siblings, 0 replies; 30+ messages in thread
From: Avi Kivity @ 2009-06-14 13:04 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: akpm, Linus Torvalds, linux-kernel, linuxppc-dev

Paul Mackerras wrote:
> Avi Kivity writes:
>
>   
>> An alternative implementation using 64-bit cmpxchg will recover most of 
>> the costs of hashed spinlocks.  I assume most serious 32-bit 
>> architectures have them?
>>     
>
> Have a 64-bit cmpxchg, you mean?  x86 is the only one I know of, and
> it already has an atomic64_t implementation using cmpxchg8b (or
> whatever it's called).
>   

Yes (and it is cmpxchg8b).  I'm surprised powerpc doesn't have DCAS support.

> My thinking is that the 32-bit non-x86 architectures will be mostly
> UP, so the overhead is just an interrupt enable/restore.  Those that
> are SMP I would expect to be small SMP -- mostly just 2 cpus and maybe
> a few 4-way systems.
>   

The new Nehalems provide 8 logical threads in a single socket.  All 
those threads share a cache, and they have cmpxchg8b anyway, so this 
won't matter.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 1/2] lib: Provide generic atomic64_t implementation
  2009-06-14 13:04           ` Avi Kivity
@ 2009-06-15  2:44             ` Roland Dreier
  -1 siblings, 0 replies; 30+ messages in thread
From: Roland Dreier @ 2009-06-15  2:44 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Paul Mackerras, Linus Torvalds, benh, akpm, linuxppc-dev, linux-kernel


 > The new Nehalems provide 8 logical threads in a single socket.  All
 > those threads share a cache, and they have cmpxchg8b anyway, so this
 > won't matter.

FWIW, Nehalem EX actually goes to 8 cores/16 threads per socket.  But
worrying about 32-bit performance on Nehalem is a little silly -- this
simplest solution is simply to run a 64-bit kernel.

 - R.

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 1/2] lib: Provide generic atomic64_t implementation
@ 2009-06-15  2:44             ` Roland Dreier
  0 siblings, 0 replies; 30+ messages in thread
From: Roland Dreier @ 2009-06-15  2:44 UTC (permalink / raw)
  To: Avi Kivity
  Cc: linux-kernel, linuxppc-dev, Paul Mackerras, akpm, Linus Torvalds


 > The new Nehalems provide 8 logical threads in a single socket.  All
 > those threads share a cache, and they have cmpxchg8b anyway, so this
 > won't matter.

FWIW, Nehalem EX actually goes to 8 cores/16 threads per socket.  But
worrying about 32-bit performance on Nehalem is a little silly -- this
simplest solution is simply to run a 64-bit kernel.

 - R.

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 1/2] lib: Provide generic atomic64_t implementation
  2009-06-15  2:44             ` Roland Dreier
@ 2009-06-15  4:30               ` Paul Mackerras
  -1 siblings, 0 replies; 30+ messages in thread
From: Paul Mackerras @ 2009-06-15  4:30 UTC (permalink / raw)
  To: Roland Dreier
  Cc: Avi Kivity, Linus Torvalds, benh, akpm, linuxppc-dev, linux-kernel

Roland Dreier writes:

> FWIW, Nehalem EX actually goes to 8 cores/16 threads per socket.  But
> worrying about 32-bit performance on Nehalem is a little silly -- this
> simplest solution is simply to run a 64-bit kernel.

I'm not worried about ANY x86 processor, 32-bit or 64-bit, in fact,
since x86 already has an atomic64_t implementation for both 32-bit and
64-bit.

It is interesting, though, that arch/x86/include/asm/atomic_32.h
unconditionally uses cmpxchg8b to implement atomic64_t, but I thought
that cmpxchg8b didn't exist in processors prior to the Pentium.
Presumably you can't use perf_counters on a 386 or 486.

Paul.

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 1/2] lib: Provide generic atomic64_t implementation
@ 2009-06-15  4:30               ` Paul Mackerras
  0 siblings, 0 replies; 30+ messages in thread
From: Paul Mackerras @ 2009-06-15  4:30 UTC (permalink / raw)
  To: Roland Dreier
  Cc: linux-kernel, linuxppc-dev, Avi Kivity, akpm, Linus Torvalds

Roland Dreier writes:

> FWIW, Nehalem EX actually goes to 8 cores/16 threads per socket.  But
> worrying about 32-bit performance on Nehalem is a little silly -- this
> simplest solution is simply to run a 64-bit kernel.

I'm not worried about ANY x86 processor, 32-bit or 64-bit, in fact,
since x86 already has an atomic64_t implementation for both 32-bit and
64-bit.

It is interesting, though, that arch/x86/include/asm/atomic_32.h
unconditionally uses cmpxchg8b to implement atomic64_t, but I thought
that cmpxchg8b didn't exist in processors prior to the Pentium.
Presumably you can't use perf_counters on a 386 or 486.

Paul.

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 1/2] lib: Provide generic atomic64_t implementation
  2009-06-14 13:04           ` Avi Kivity
@ 2009-06-16 22:27             ` Gabriel Paubert
  -1 siblings, 0 replies; 30+ messages in thread
From: Gabriel Paubert @ 2009-06-16 22:27 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Paul Mackerras, akpm, Linus Torvalds, linux-kernel, linuxppc-dev

On Sun, Jun 14, 2009 at 04:04:36PM +0300, Avi Kivity wrote:
> Paul Mackerras wrote:
>> Avi Kivity writes:
>>
>>   
>>> An alternative implementation using 64-bit cmpxchg will recover most 
>>> of the costs of hashed spinlocks.  I assume most serious 32-bit  
>>> architectures have them?
>>>     
>>
>> Have a 64-bit cmpxchg, you mean?  x86 is the only one I know of, and
>> it already has an atomic64_t implementation using cmpxchg8b (or
>> whatever it's called).
>>   
>
> Yes (and it is cmpxchg8b).  I'm surprised powerpc doesn't have DCAS support.

Well, s390 and m68k have the equivalent (although I don't think Linux
suppiorts SMP m68k, although some dual 68040/68060 boards have existed).

But 32 bit PPC will never have it. It just does not fit in the architecture
since integer loads and stores are limited to 32 bit (or split into 32 bit
chunks). Besides that there is no instruction that performs a read-modify-write
of memory. This would make the LSU much more complex for a corner case.

Hey, Intel also botched the first implementation of cmpxchg8b on the Pentium:
the (in)famous f00f bug is actually "lock cmpxchg8b" with a register operand.

Now for these counters, other solutions could be considered, like using
the most significant bit as a lock and having "only" 63 usable bits (when 
counting ns, this overflows at 292 years). 

>
>> My thinking is that the 32-bit non-x86 architectures will be mostly
>> UP, so the overhead is just an interrupt enable/restore.  Those that
>> are SMP I would expect to be small SMP -- mostly just 2 cpus and maybe
>> a few 4-way systems.
>>   
>
> The new Nehalems provide 8 logical threads in a single socket.  All  
> those threads share a cache, and they have cmpxchg8b anyway, so this  
> won't matter.
>

The problem is not Nehalem (who wants to run 32 bit kernels on a Nehalem
anyway) or x86.

The problem is that the assumption that the largest PPC32 SMP are 4 way
may be outdated:

http://www.freescale.com/webapp/sps/site/prod_summary.jsp?fastpreview=1&code=P4080

and some products including that processor have been announced (I don't know
whether they are shipping or not) and (apparently) run Linux.

	Gabriel

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 1/2] lib: Provide generic atomic64_t implementation
@ 2009-06-16 22:27             ` Gabriel Paubert
  0 siblings, 0 replies; 30+ messages in thread
From: Gabriel Paubert @ 2009-06-16 22:27 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Linus Torvalds, akpm, Paul Mackerras, linux-kernel, linuxppc-dev

On Sun, Jun 14, 2009 at 04:04:36PM +0300, Avi Kivity wrote:
> Paul Mackerras wrote:
>> Avi Kivity writes:
>>
>>   
>>> An alternative implementation using 64-bit cmpxchg will recover most 
>>> of the costs of hashed spinlocks.  I assume most serious 32-bit  
>>> architectures have them?
>>>     
>>
>> Have a 64-bit cmpxchg, you mean?  x86 is the only one I know of, and
>> it already has an atomic64_t implementation using cmpxchg8b (or
>> whatever it's called).
>>   
>
> Yes (and it is cmpxchg8b).  I'm surprised powerpc doesn't have DCAS support.

Well, s390 and m68k have the equivalent (although I don't think Linux
suppiorts SMP m68k, although some dual 68040/68060 boards have existed).

But 32 bit PPC will never have it. It just does not fit in the architecture
since integer loads and stores are limited to 32 bit (or split into 32 bit
chunks). Besides that there is no instruction that performs a read-modify-write
of memory. This would make the LSU much more complex for a corner case.

Hey, Intel also botched the first implementation of cmpxchg8b on the Pentium:
the (in)famous f00f bug is actually "lock cmpxchg8b" with a register operand.

Now for these counters, other solutions could be considered, like using
the most significant bit as a lock and having "only" 63 usable bits (when 
counting ns, this overflows at 292 years). 

>
>> My thinking is that the 32-bit non-x86 architectures will be mostly
>> UP, so the overhead is just an interrupt enable/restore.  Those that
>> are SMP I would expect to be small SMP -- mostly just 2 cpus and maybe
>> a few 4-way systems.
>>   
>
> The new Nehalems provide 8 logical threads in a single socket.  All  
> those threads share a cache, and they have cmpxchg8b anyway, so this  
> won't matter.
>

The problem is not Nehalem (who wants to run 32 bit kernels on a Nehalem
anyway) or x86.

The problem is that the assumption that the largest PPC32 SMP are 4 way
may be outdated:

http://www.freescale.com/webapp/sps/site/prod_summary.jsp?fastpreview=1&code=P4080

and some products including that processor have been announced (I don't know
whether they are shipping or not) and (apparently) run Linux.

	Gabriel

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 1/2] lib: Provide generic atomic64_t implementation
  2009-06-13  7:10 ` Paul Mackerras
@ 2009-06-18 23:55   ` Mike Frysinger
  -1 siblings, 0 replies; 30+ messages in thread
From: Mike Frysinger @ 2009-06-18 23:55 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: benh, torvalds, akpm, linuxppc-dev, linux-kernel

On Sat, Jun 13, 2009 at 03:10, Paul Mackerras wrote:
> +typedef struct {
> +       long long counter;
> +} atomic64_t;

lack of volatile seems odd compared to:
include/linux/types.h:
typedef struct {
    volatile int counter;
} atomic_t;
-mike

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 1/2] lib: Provide generic atomic64_t implementation
@ 2009-06-18 23:55   ` Mike Frysinger
  0 siblings, 0 replies; 30+ messages in thread
From: Mike Frysinger @ 2009-06-18 23:55 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: akpm, torvalds, linux-kernel, linuxppc-dev

On Sat, Jun 13, 2009 at 03:10, Paul Mackerras wrote:
> +typedef struct {
> + =C2=A0 =C2=A0 =C2=A0 long long counter;
> +} atomic64_t;

lack of volatile seems odd compared to:
include/linux/types.h:
typedef struct {
    volatile int counter;
} atomic_t;
-mike

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 1/2] lib: Provide generic atomic64_t implementation
  2009-06-18 23:55   ` Mike Frysinger
@ 2009-06-19  0:46     ` Benjamin Herrenschmidt
  -1 siblings, 0 replies; 30+ messages in thread
From: Benjamin Herrenschmidt @ 2009-06-19  0:46 UTC (permalink / raw)
  To: Mike Frysinger; +Cc: Paul Mackerras, torvalds, akpm, linuxppc-dev, linux-kernel

On Thu, 2009-06-18 at 19:55 -0400, Mike Frysinger wrote:
> On Sat, Jun 13, 2009 at 03:10, Paul Mackerras wrote:
> > +typedef struct {
> > +       long long counter;
> > +} atomic64_t;
> 
> lack of volatile seems odd compared to:
> include/linux/types.h:
> typedef struct {
>     volatile int counter;
> } atomic_t;

Since the counter is only accessed within a spinlock, the volatile
wouldn't be very useful here.

Cheers,
Ben.



^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 1/2] lib: Provide generic atomic64_t implementation
@ 2009-06-19  0:46     ` Benjamin Herrenschmidt
  0 siblings, 0 replies; 30+ messages in thread
From: Benjamin Herrenschmidt @ 2009-06-19  0:46 UTC (permalink / raw)
  To: Mike Frysinger; +Cc: torvalds, akpm, Paul Mackerras, linux-kernel, linuxppc-dev

On Thu, 2009-06-18 at 19:55 -0400, Mike Frysinger wrote:
> On Sat, Jun 13, 2009 at 03:10, Paul Mackerras wrote:
> > +typedef struct {
> > +       long long counter;
> > +} atomic64_t;
> 
> lack of volatile seems odd compared to:
> include/linux/types.h:
> typedef struct {
>     volatile int counter;
> } atomic_t;

Since the counter is only accessed within a spinlock, the volatile
wouldn't be very useful here.

Cheers,
Ben.

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 1/2] lib: Provide generic atomic64_t implementation
  2009-06-18 23:55   ` Mike Frysinger
@ 2009-06-19  0:47     ` Paul Mackerras
  -1 siblings, 0 replies; 30+ messages in thread
From: Paul Mackerras @ 2009-06-19  0:47 UTC (permalink / raw)
  To: Mike Frysinger; +Cc: benh, torvalds, akpm, linuxppc-dev, linux-kernel

Mike Frysinger writes:

> On Sat, Jun 13, 2009 at 03:10, Paul Mackerras wrote:
> > +typedef struct {
> > +       long long counter;
> > +} atomic64_t;
> 
> lack of volatile seems odd compared to:
> include/linux/types.h:
> typedef struct {
>     volatile int counter;
> } atomic_t;
> -mike

It's only accessed under a spinlock, so I don't think it needs to be
volatile.  On UP it's accessed within local_irq_save/restore which
should also be compiler barriers and prevent memory access reordering,
so again volatile isn't needed.

Paul.

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 1/2] lib: Provide generic atomic64_t implementation
@ 2009-06-19  0:47     ` Paul Mackerras
  0 siblings, 0 replies; 30+ messages in thread
From: Paul Mackerras @ 2009-06-19  0:47 UTC (permalink / raw)
  To: Mike Frysinger; +Cc: akpm, torvalds, linux-kernel, linuxppc-dev

Mike Frysinger writes:

> On Sat, Jun 13, 2009 at 03:10, Paul Mackerras wrote:
> > +typedef struct {
> > + =A0 =A0 =A0 long long counter;
> > +} atomic64_t;
>=20
> lack of volatile seems odd compared to:
> include/linux/types.h:
> typedef struct {
>     volatile int counter;
> } atomic_t;
> -mike

It's only accessed under a spinlock, so I don't think it needs to be
volatile.  On UP it's accessed within local_irq_save/restore which
should also be compiler barriers and prevent memory access reordering,
so again volatile isn't needed.

Paul.

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 1/2] lib: Provide generic atomic64_t implementation
  2009-06-19  0:47     ` Paul Mackerras
@ 2009-06-19  0:49       ` Mike Frysinger
  -1 siblings, 0 replies; 30+ messages in thread
From: Mike Frysinger @ 2009-06-19  0:49 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: benh, torvalds, akpm, linuxppc-dev, linux-kernel

On Thu, Jun 18, 2009 at 20:47, Paul Mackerras wrote:
> Mike Frysinger writes:
>> On Sat, Jun 13, 2009 at 03:10, Paul Mackerras wrote:
>> > +typedef struct {
>> > +       long long counter;
>> > +} atomic64_t;
>>
>> lack of volatile seems odd compared to:
>> include/linux/types.h:
>> typedef struct {
>>     volatile int counter;
>> } atomic_t;
>
> It's only accessed under a spinlock, so I don't think it needs to be
> volatile.  On UP it's accessed within local_irq_save/restore which
> should also be compiler barriers and prevent memory access reordering,
> so again volatile isn't needed.

i'm not suggesting it is needed, i'm saying it's a bit confusing.  a
simple comment above the atomic64_t type with your simple explanation
here would go a long way.
-mike

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 1/2] lib: Provide generic atomic64_t implementation
@ 2009-06-19  0:49       ` Mike Frysinger
  0 siblings, 0 replies; 30+ messages in thread
From: Mike Frysinger @ 2009-06-19  0:49 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: akpm, torvalds, linux-kernel, linuxppc-dev

On Thu, Jun 18, 2009 at 20:47, Paul Mackerras wrote:
> Mike Frysinger writes:
>> On Sat, Jun 13, 2009 at 03:10, Paul Mackerras wrote:
>> > +typedef struct {
>> > + =C2=A0 =C2=A0 =C2=A0 long long counter;
>> > +} atomic64_t;
>>
>> lack of volatile seems odd compared to:
>> include/linux/types.h:
>> typedef struct {
>> =C2=A0 =C2=A0 volatile int counter;
>> } atomic_t;
>
> It's only accessed under a spinlock, so I don't think it needs to be
> volatile. =C2=A0On UP it's accessed within local_irq_save/restore which
> should also be compiler barriers and prevent memory access reordering,
> so again volatile isn't needed.

i'm not suggesting it is needed, i'm saying it's a bit confusing.  a
simple comment above the atomic64_t type with your simple explanation
here would go a long way.
-mike

^ permalink raw reply	[flat|nested] 30+ messages in thread

end of thread, other threads:[~2009-06-19  0:49 UTC | newest]

Thread overview: 30+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-06-13  7:10 [PATCH 1/2] lib: Provide generic atomic64_t implementation Paul Mackerras
2009-06-13  7:10 ` Paul Mackerras
2009-06-13 20:13 ` Linus Torvalds
2009-06-13 20:13   ` Linus Torvalds
2009-06-13 20:25   ` Linus Torvalds
2009-06-13 20:25     ` Linus Torvalds
2009-06-13 20:56     ` Ingo Molnar
2009-06-13 20:56       ` Ingo Molnar
2009-06-14 11:53     ` Avi Kivity
2009-06-14 11:53       ` Avi Kivity
2009-06-14 12:21       ` Paul Mackerras
2009-06-14 12:21         ` Paul Mackerras
2009-06-14 13:04         ` Avi Kivity
2009-06-14 13:04           ` Avi Kivity
2009-06-15  2:44           ` Roland Dreier
2009-06-15  2:44             ` Roland Dreier
2009-06-15  4:30             ` Paul Mackerras
2009-06-15  4:30               ` Paul Mackerras
2009-06-16 22:27           ` Gabriel Paubert
2009-06-16 22:27             ` Gabriel Paubert
2009-06-13 21:53 ` Arnd Bergmann
2009-06-13 21:53   ` Arnd Bergmann
2009-06-18 23:55 ` Mike Frysinger
2009-06-18 23:55   ` Mike Frysinger
2009-06-19  0:46   ` Benjamin Herrenschmidt
2009-06-19  0:46     ` Benjamin Herrenschmidt
2009-06-19  0:47   ` Paul Mackerras
2009-06-19  0:47     ` Paul Mackerras
2009-06-19  0:49     ` Mike Frysinger
2009-06-19  0:49       ` Mike Frysinger

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.