linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 00/10] percpu: Per cpu code simplification V4
@ 2008-01-08 21:10 travis
  2008-01-08 21:10 ` [PATCH 01/10] percpu: Use a kconfig variable to signal arch specific percpu setup travis
                   ` (9 more replies)
  0 siblings, 10 replies; 21+ messages in thread
From: travis @ 2008-01-08 21:10 UTC (permalink / raw)
  To: Andrew Morton, mingo, Andi Kleen, Christoph Lameter
  Cc: Jack Steiner, linux-mm, linux-kernel


This patchset simplifies the code that arches need to maintain to support
per cpu functionality. Most of the code is moved into arch independent
code. Only a minimal set of definitions is kept for each arch.

The patch also unifies the x86 arch so that there is only a single
asm-x86/percpu.h

Based on: 2.6.24-rc6-mm1
---

V1->V2:
- Add support for specifying attributes for per cpu declarations (preserves
  IA64 model(small) attribute).
  - Drop first patch that removes the model(small) attribute for IA64
  - Missing #endif in powerpc generic config /  Wrong Kconfig
  - Follow Randy's suggestions on how to do the Kconfig settings

V2->V3:
  - fix x86_64 non-SMP case
  - change SHIFT_PTR to SHIFT_PERCPU_PTR
  - fix various percpu_modcopy()'s to reference correct per_cpu_offset()
  - s390 has a special way to determine the pointer to a per cpu area

V3->V4:
  - rebased patchset on 2.6.24-rc6-mm1
    (removes the percpu_modcopy changes that are already in.)
  - change config ARCH_SETS_UP_PER_CPU_AREA to a global var
    and use select HAVE_SETUP_PER_CPU_AREA to specify.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Mike Travis <travis@sgi.com>

-- 

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH 01/10] percpu: Use a kconfig variable to signal arch specific percpu setup
  2008-01-08 21:10 [PATCH 00/10] percpu: Per cpu code simplification V4 travis
@ 2008-01-08 21:10 ` travis
  2008-01-08 21:10 ` [PATCH 02/10] percpu: Move arch XX_PER_CPU_XX definitions into linux/percpu.h travis
                   ` (8 subsequent siblings)
  9 siblings, 0 replies; 21+ messages in thread
From: travis @ 2008-01-08 21:10 UTC (permalink / raw)
  To: Andrew Morton, mingo, Andi Kleen, Christoph Lameter
  Cc: Jack Steiner, linux-mm, linux-kernel, Rusty Russell, Sam Ravnborg

[-- Attachment #1: arch_sets_up_per_cpu_areas --]
[-- Type: text/plain, Size: 3465 bytes --]

The use of the __GENERIC_PERCPU is a bit problematic since arches
may want to run their own percpu setup while using the generic
percpu definitions. Replace it through a kconfig variable.

Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Andi Kleen <ak@suse.de>
Cc: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Mike Travis <travis@sgi.com>

---
V1->V2:
- Use def_bool as suggested by Randy.

V3->v4:
  - change config ARCH_SETS_UP_PER_CPU_AREA to a global var
    and use select HAVE_SETUP_PER_CPU_AREA to specify.
---
 arch/Kconfig                 |    3 +++
 arch/ia64/Kconfig            |    1 +
 arch/powerpc/Kconfig         |    4 ++++
 arch/sparc64/Kconfig         |    1 +
 arch/x86/Kconfig             |    1 +
 include/asm-generic/percpu.h |    1 -
 include/asm-s390/percpu.h    |    2 --
 include/asm-x86/percpu_32.h  |    2 --
 init/main.c                  |    4 ++--
 9 files changed, 12 insertions(+), 7 deletions(-)

--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -29,3 +29,6 @@ config KPROBES
 
 config HAVE_KPROBES
 	def_bool n
+
+config HAVE_SETUP_PER_CPU_AREA
+	def_bool n
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -17,6 +17,7 @@ config IA64
 	select ARCH_SUPPORTS_MSI
 	select HAVE_OPROFILE
 	select HAVE_KPROBES
+	select HAVE_SETUP_PER_CPU_AREA
 	default y
 	help
 	  The Itanium Processor Family is Intel's 64-bit successor to
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -42,6 +42,10 @@ config GENERIC_HARDIRQS
 	bool
 	default y
 
+config PPC64
+	select HAVE_SETUP_PER_CPU_AREA
+	default y
+
 config IRQ_PER_CPU
 	bool
 	default y
--- a/arch/sparc64/Kconfig
+++ b/arch/sparc64/Kconfig
@@ -10,6 +10,7 @@ config SPARC
 	default y
 	select HAVE_OPROFILE
 	select HAVE_KPROBES
+	select HAVE_SETUP_PER_CPU_AREA
 
 config SPARC64
 	bool
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -20,6 +20,7 @@ config X86
 	def_bool y
 	select HAVE_OPROFILE
 	select HAVE_KPROBES
+	select HAVE_SETUP_PER_CPU_AREA if ARCH = "x86_64"
 
 config GENERIC_LOCKBREAK
 	def_bool n
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -3,7 +3,6 @@
 #include <linux/compiler.h>
 #include <linux/threads.h>
 
-#define __GENERIC_PER_CPU
 #ifdef CONFIG_SMP
 
 extern unsigned long __per_cpu_offset[NR_CPUS];
--- a/include/asm-s390/percpu.h
+++ b/include/asm-s390/percpu.h
@@ -4,8 +4,6 @@
 #include <linux/compiler.h>
 #include <asm/lowcore.h>
 
-#define __GENERIC_PER_CPU
-
 /*
  * s390 uses its own implementation for per cpu data, the offset of
  * the cpu local data area is cached in the cpu's lowcore memory.
--- a/include/asm-x86/percpu_32.h
+++ b/include/asm-x86/percpu_32.h
@@ -41,8 +41,6 @@
  *    PER_CPU(cpu_gdt_descr, %ebx)
  */
 #ifdef CONFIG_SMP
-/* Same as generic implementation except for optimized local access. */
-#define __GENERIC_PER_CPU
 
 /* This is used for other cpus to find our section. */
 extern unsigned long __per_cpu_offset[];
--- a/init/main.c
+++ b/init/main.c
@@ -363,7 +363,7 @@ static inline void smp_prepare_cpus(unsi
 
 #else
 
-#ifdef __GENERIC_PER_CPU
+#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
 unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
 
 EXPORT_SYMBOL(__per_cpu_offset);
@@ -384,7 +384,7 @@ static void __init setup_per_cpu_areas(v
 		ptr += size;
 	}
 }
-#endif /* !__GENERIC_PER_CPU */
+#endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */
 
 /* Called by boot processor to activate the rest. */
 static void __init smp_init(void)

-- 

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH 02/10] percpu: Move arch XX_PER_CPU_XX definitions into linux/percpu.h
  2008-01-08 21:10 [PATCH 00/10] percpu: Per cpu code simplification V4 travis
  2008-01-08 21:10 ` [PATCH 01/10] percpu: Use a kconfig variable to signal arch specific percpu setup travis
@ 2008-01-08 21:10 ` travis
  2008-01-08 21:10 ` [PATCH 03/10] percpu: Make the asm-generic/percpu.h more "generic" travis
                   ` (7 subsequent siblings)
  9 siblings, 0 replies; 21+ messages in thread
From: travis @ 2008-01-08 21:10 UTC (permalink / raw)
  To: Andrew Morton, mingo, Andi Kleen, Christoph Lameter
  Cc: Jack Steiner, linux-mm, linux-kernel, Rusty Russell

[-- Attachment #1: move_percpu_declarations --]
[-- Type: text/plain, Size: 11989 bytes --]

The arch definitions are all the same. So move them into linux/percpu.h.

We cannot move DECLARE_PER_CPU since some include files just include
asm/percpu.h to avoid include recursion problems.

Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Mike Travis <travis@sgi.com>
---

V1->V2:
- Special consideration for IA64: Add the ability to specify
  arch specific per cpu flags

V2->V3:
- remove .data.percpu attribute from DEFINE_PER_CPU for non-smp case.
---
 include/asm-generic/percpu.h |   18 ------------------
 include/asm-ia64/percpu.h    |   24 ++----------------------
 include/asm-powerpc/percpu.h |   17 -----------------
 include/asm-s390/percpu.h    |   18 ------------------
 include/asm-sparc64/percpu.h |   16 ----------------
 include/asm-x86/percpu_32.h  |   12 ------------
 include/asm-x86/percpu_64.h  |   17 -----------------
 include/linux/percpu.h       |   24 ++++++++++++++++++++++++
 8 files changed, 26 insertions(+), 120 deletions(-)

--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -9,15 +9,6 @@ extern unsigned long __per_cpu_offset[NR
 
 #define per_cpu_offset(x) (__per_cpu_offset[x])
 
-/* Separate out the type, so (int[3], foo) works. */
-#define DEFINE_PER_CPU(type, name) \
-    __attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name
-
-#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name)		\
-    __attribute__((__section__(".data.percpu.shared_aligned"))) \
-    __typeof__(type) per_cpu__##name				\
-    ____cacheline_aligned_in_smp
-
 /* var is in discarded region: offset to particular copy we want */
 #define per_cpu(var, cpu) (*({				\
 	extern int simple_identifier_##var(void);	\
@@ -27,12 +18,6 @@ extern unsigned long __per_cpu_offset[NR
 
 #else /* ! SMP */
 
-#define DEFINE_PER_CPU(type, name) \
-    __typeof__(type) per_cpu__##name
-
-#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name)	\
-    DEFINE_PER_CPU(type, name)
-
 #define per_cpu(var, cpu)			(*((void)(cpu), &per_cpu__##var))
 #define __get_cpu_var(var)			per_cpu__##var
 #define __raw_get_cpu_var(var)			per_cpu__##var
@@ -41,7 +26,4 @@ extern unsigned long __per_cpu_offset[NR
 
 #define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu__##name
 
-#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var)
-#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var)
-
 #endif /* _ASM_GENERIC_PERCPU_H_ */
--- a/include/asm-ia64/percpu.h
+++ b/include/asm-ia64/percpu.h
@@ -16,28 +16,11 @@
 #include <linux/threads.h>
 
 #ifdef HAVE_MODEL_SMALL_ATTRIBUTE
-# define __SMALL_ADDR_AREA	__attribute__((__model__ (__small__)))
-#else
-# define __SMALL_ADDR_AREA
+# define PER_CPU_ATTRIBUTES	__attribute__((__model__ (__small__)))
 #endif
 
 #define DECLARE_PER_CPU(type, name)				\
-	extern __SMALL_ADDR_AREA __typeof__(type) per_cpu__##name
-
-/* Separate out the type, so (int[3], foo) works. */
-#define DEFINE_PER_CPU(type, name)				\
-	__attribute__((__section__(".data.percpu")))		\
-	__SMALL_ADDR_AREA __typeof__(type) per_cpu__##name
-
-#ifdef CONFIG_SMP
-#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name)			\
-	__attribute__((__section__(".data.percpu.shared_aligned")))	\
-	__SMALL_ADDR_AREA __typeof__(type) per_cpu__##name		\
-	____cacheline_aligned_in_smp
-#else
-#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name)	\
-	DEFINE_PER_CPU(type, name)
-#endif
+	extern PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name
 
 #ifdef CONFIG_SMP
 
@@ -63,9 +46,6 @@ extern void *per_cpu_init(void);
 
 #endif	/* SMP */
 
-#define EXPORT_PER_CPU_SYMBOL(var)		EXPORT_SYMBOL(per_cpu__##var)
-#define EXPORT_PER_CPU_SYMBOL_GPL(var)		EXPORT_SYMBOL_GPL(per_cpu__##var)
-
 /*
  * Be extremely careful when taking the address of this variable!  Due to virtual
  * remapping, it is different from the canonical address returned by __get_cpu_var(var)!
--- a/include/asm-powerpc/percpu.h
+++ b/include/asm-powerpc/percpu.h
@@ -16,15 +16,6 @@
 #define __my_cpu_offset() get_paca()->data_offset
 #define per_cpu_offset(x) (__per_cpu_offset(x))
 
-/* Separate out the type, so (int[3], foo) works. */
-#define DEFINE_PER_CPU(type, name) \
-    __attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name
-
-#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name)		\
-    __attribute__((__section__(".data.percpu.shared_aligned"))) \
-    __typeof__(type) per_cpu__##name				\
-    ____cacheline_aligned_in_smp
-
 /* var is in discarded region: offset to particular copy we want */
 #define per_cpu(var, cpu) (*RELOC_HIDE(&per_cpu__##var, __per_cpu_offset(cpu)))
 #define __get_cpu_var(var) (*RELOC_HIDE(&per_cpu__##var, __my_cpu_offset()))
@@ -34,11 +25,6 @@ extern void setup_per_cpu_areas(void);
 
 #else /* ! SMP */
 
-#define DEFINE_PER_CPU(type, name) \
-    __typeof__(type) per_cpu__##name
-#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name)	\
-    DEFINE_PER_CPU(type, name)
-
 #define per_cpu(var, cpu)			(*((void)(cpu), &per_cpu__##var))
 #define __get_cpu_var(var)			per_cpu__##var
 #define __raw_get_cpu_var(var)			per_cpu__##var
@@ -47,9 +33,6 @@ extern void setup_per_cpu_areas(void);
 
 #define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu__##name
 
-#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var)
-#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var)
-
 #else
 #include <asm-generic/percpu.h>
 #endif
--- a/include/asm-s390/percpu.h
+++ b/include/asm-s390/percpu.h
@@ -34,16 +34,6 @@
 
 extern unsigned long __per_cpu_offset[NR_CPUS];
 
-/* Separate out the type, so (int[3], foo) works. */
-#define DEFINE_PER_CPU(type, name) \
-    __attribute__((__section__(".data.percpu"))) \
-    __typeof__(type) per_cpu__##name
-
-#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name)		\
-    __attribute__((__section__(".data.percpu.shared_aligned"))) \
-    __typeof__(type) per_cpu__##name				\
-    ____cacheline_aligned_in_smp
-
 #define __get_cpu_var(var) __reloc_hide(var,S390_lowcore.percpu_offset)
 #define __raw_get_cpu_var(var) __reloc_hide(var,S390_lowcore.percpu_offset)
 #define per_cpu(var,cpu) __reloc_hide(var,__per_cpu_offset[cpu])
@@ -51,11 +41,6 @@ extern unsigned long __per_cpu_offset[NR
 
 #else /* ! SMP */
 
-#define DEFINE_PER_CPU(type, name) \
-    __typeof__(type) per_cpu__##name
-#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name)	\
-    DEFINE_PER_CPU(type, name)
-
 #define __get_cpu_var(var) __reloc_hide(var,0)
 #define __raw_get_cpu_var(var) __reloc_hide(var,0)
 #define per_cpu(var,cpu) __reloc_hide(var,0)
@@ -64,7 +49,4 @@ extern unsigned long __per_cpu_offset[NR
 
 #define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu__##name
 
-#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var)
-#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var)
-
 #endif /* __ARCH_S390_PERCPU__ */
--- a/include/asm-sparc64/percpu.h
+++ b/include/asm-sparc64/percpu.h
@@ -16,15 +16,6 @@ extern unsigned long __per_cpu_shift;
 	(__per_cpu_base + ((unsigned long)(__cpu) << __per_cpu_shift))
 #define per_cpu_offset(x) (__per_cpu_offset(x))
 
-/* Separate out the type, so (int[3], foo) works. */
-#define DEFINE_PER_CPU(type, name) \
-    __attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name
-
-#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name)		\
-    __attribute__((__section__(".data.percpu.shared_aligned"))) \
-    __typeof__(type) per_cpu__##name				\
-    ____cacheline_aligned_in_smp
-
 /* var is in discarded region: offset to particular copy we want */
 #define per_cpu(var, cpu) (*RELOC_HIDE(&per_cpu__##var, __per_cpu_offset(cpu)))
 #define __get_cpu_var(var) (*RELOC_HIDE(&per_cpu__##var, __local_per_cpu_offset))
@@ -33,10 +24,6 @@ extern unsigned long __per_cpu_shift;
 #else /* ! SMP */
 
 #define real_setup_per_cpu_areas()		do { } while (0)
-#define DEFINE_PER_CPU(type, name) \
-    __typeof__(type) per_cpu__##name
-#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name)	\
-    DEFINE_PER_CPU(type, name)
 
 #define per_cpu(var, cpu)			(*((void)cpu, &per_cpu__##var))
 #define __get_cpu_var(var)			per_cpu__##var
@@ -46,7 +33,4 @@ extern unsigned long __per_cpu_shift;
 
 #define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu__##name
 
-#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var)
-#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var)
-
 #endif /* __ARCH_SPARC64_PERCPU__ */
--- a/include/asm-x86/percpu_32.h
+++ b/include/asm-x86/percpu_32.h
@@ -47,16 +47,7 @@ extern unsigned long __per_cpu_offset[];
 
 #define per_cpu_offset(x) (__per_cpu_offset[x])
 
-/* Separate out the type, so (int[3], foo) works. */
 #define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu__##name
-#define DEFINE_PER_CPU(type, name) \
-    __attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name
-
-#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name)		\
-    __attribute__((__section__(".data.percpu.shared_aligned"))) \
-    __typeof__(type) per_cpu__##name				\
-    ____cacheline_aligned_in_smp
-
 /* We can use this directly for local CPU (faster). */
 DECLARE_PER_CPU(unsigned long, this_cpu_off);
 
@@ -72,9 +63,6 @@ DECLARE_PER_CPU(unsigned long, this_cpu_
 
 #define __get_cpu_var(var) __raw_get_cpu_var(var)
 
-#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var)
-#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var)
-
 /* fs segment starts at (positive) offset == __per_cpu_offset[cpu] */
 #define __percpu_seg "%%fs:"
 #else  /* !SMP */
--- a/include/asm-x86/percpu_64.h
+++ b/include/asm-x86/percpu_64.h
@@ -16,15 +16,6 @@
 
 #define per_cpu_offset(x) (__per_cpu_offset(x))
 
-/* Separate out the type, so (int[3], foo) works. */
-#define DEFINE_PER_CPU(type, name) \
-    __attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name
-
-#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name)		\
-    __attribute__((__section__(".data.percpu.shared_aligned"))) \
-    __typeof__(type) per_cpu__##name				\
-    ____cacheline_internodealigned_in_smp
-
 /* var is in discarded region: offset to particular copy we want */
 #define per_cpu(var, cpu) (*({				\
 	extern int simple_identifier_##var(void);	\
@@ -40,11 +31,6 @@ extern void setup_per_cpu_areas(void);
 
 #else /* ! SMP */
 
-#define DEFINE_PER_CPU(type, name) \
-    __typeof__(type) per_cpu__##name
-#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name)	\
-    DEFINE_PER_CPU(type, name)
-
 #define per_cpu(var, cpu)			(*((void)(cpu), &per_cpu__##var))
 #define __get_cpu_var(var)			per_cpu__##var
 #define __raw_get_cpu_var(var)			per_cpu__##var
@@ -53,7 +39,4 @@ extern void setup_per_cpu_areas(void);
 
 #define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu__##name
 
-#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var)
-#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var)
-
 #endif /* _ASM_X8664_PERCPU_H_ */
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -9,6 +9,30 @@
 
 #include <asm/percpu.h>
 
+#ifndef PER_CPU_ATTRIBUTES
+#define PER_CPU_ATTRIBUTES
+#endif
+
+#ifdef CONFIG_SMP
+#define DEFINE_PER_CPU(type, name)					\
+	__attribute__((__section__(".data.percpu")))			\
+	PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name
+
+#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name)			\
+	__attribute__((__section__(".data.percpu.shared_aligned")))	\
+	PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name		\
+	____cacheline_aligned_in_smp
+#else
+#define DEFINE_PER_CPU(type, name)					\
+	PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name
+
+#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name)		      \
+	DEFINE_PER_CPU(type, name)
+#endif
+
+#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var)
+#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var)
+
 /* Enough to cover all DEFINE_PER_CPUs in kernel, including modules. */
 #ifndef PERCPU_ENOUGH_ROOM
 #ifdef CONFIG_MODULES

-- 

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH 03/10] percpu: Make the asm-generic/percpu.h more "generic"
  2008-01-08 21:10 [PATCH 00/10] percpu: Per cpu code simplification V4 travis
  2008-01-08 21:10 ` [PATCH 01/10] percpu: Use a kconfig variable to signal arch specific percpu setup travis
  2008-01-08 21:10 ` [PATCH 02/10] percpu: Move arch XX_PER_CPU_XX definitions into linux/percpu.h travis
@ 2008-01-08 21:10 ` travis
  2008-01-08 21:10 ` [PATCH 04/10] x86_32: Use generic percpu.h travis
                   ` (6 subsequent siblings)
  9 siblings, 0 replies; 21+ messages in thread
From: travis @ 2008-01-08 21:10 UTC (permalink / raw)
  To: Andrew Morton, mingo, Andi Kleen, Christoph Lameter
  Cc: Jack Steiner, linux-mm, linux-kernel, Rusty Russell

[-- Attachment #1: genericize-percpu.h --]
[-- Type: text/plain, Size: 4171 bytes --]

Add the ability to use generic/percpu even if the arch needs to override
several aspects of its operations. This will enable the use of generic
percpu.h for all arches.

An arch may define:

__per_cpu_offset	Do not use the generic pointer array. Arch must
			define per_cpu_offset(cpu) (used by x86_64, s390).

__my_cpu_offset		Can be defined to provide an optimized way to determine
			the offset for variables of the currently executing
			processor. Used by ia64, x86_64, x86_32, sparc64, s/390.

SHIFT_PERCPU_PTR(ptr, offset)	If an arch defines it then special handling
			of pointer arithmentic may be implemented. Used
			by s/390.


(Some of these special percpu arch implementations may be later consolidated
so that there are less cases to deal with.)

Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Mike Travis <travis@sgi.com>

---

V1->V2:
- add support for PER_CPU_ATTRIBUTES

V2->V3:
- fix generic smp percpu_modcopy to use per_cpu_offset() macro.

V3->V4:
- change to use CONFIG_HAVE_SETUP_PER_CPU_AREA
---
 include/asm-generic/percpu.h |   72 +++++++++++++++++++++++++++++++++++++------
 1 file changed, 62 insertions(+), 10 deletions(-)

--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -3,27 +3,79 @@
 #include <linux/compiler.h>
 #include <linux/threads.h>
 
+/*
+ * Determine the real variable name from the name visible in the
+ * kernel sources.
+ */
+#define per_cpu_var(var) per_cpu__##var
+
 #ifdef CONFIG_SMP
 
+/*
+ * per_cpu_offset() is the offset that has to be added to a
+ * percpu variable to get to the instance for a certain processor.
+ *
+ * Most arches use the __per_cpu_offset array for those offsets but
+ * some arches have their own ways of determining the offset (x86_64, s390).
+ */
+#ifndef __per_cpu_offset
 extern unsigned long __per_cpu_offset[NR_CPUS];
 
 #define per_cpu_offset(x) (__per_cpu_offset[x])
+#endif
 
-/* var is in discarded region: offset to particular copy we want */
-#define per_cpu(var, cpu) (*({				\
-	extern int simple_identifier_##var(void);	\
-	RELOC_HIDE(&per_cpu__##var, __per_cpu_offset[cpu]); }))
-#define __get_cpu_var(var) per_cpu(var, smp_processor_id())
-#define __raw_get_cpu_var(var) per_cpu(var, raw_smp_processor_id())
+/*
+ * Determine the offset for the currently active processor.
+ * An arch may define __my_cpu_offset to provide a more effective
+ * means of obtaining the offset to the per cpu variables of the
+ * current processor.
+ */
+#ifndef __my_cpu_offset
+#define __my_cpu_offset per_cpu_offset(raw_smp_processor_id())
+#define my_cpu_offset per_cpu_offset(smp_processor_id())
+#else
+#define my_cpu_offset __my_cpu_offset
+#endif
+
+/*
+ * Add a offset to a pointer but keep the pointer as is.
+ *
+ * Only S390 provides its own means of moving the pointer.
+ */
+#ifndef SHIFT_PERCPU_PTR
+#define SHIFT_PERCPU_PTR(__p, __offset)	RELOC_HIDE((__p), (__offset))
+#endif
+
+/*
+ * A percpu variable may point to a discarded reghions. The following are
+ * established ways to produce a usable pointer from the percpu variable
+ * offset.
+ */
+#define per_cpu(var, cpu) \
+	(*SHIFT_PERCPU_PTR(&per_cpu_var(var), per_cpu_offset(cpu)))
+#define __get_cpu_var(var) \
+	(*SHIFT_PERCPU_PTR(&per_cpu_var(var), my_cpu_offset))
+#define __raw_get_cpu_var(var) \
+	(*SHIFT_PERCPU_PTR(&per_cpu_var(var), __my_cpu_offset))
+
+
+#ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA
+extern void setup_per_cpu_areas(void);
+#endif
 
 #else /* ! SMP */
 
-#define per_cpu(var, cpu)			(*((void)(cpu), &per_cpu__##var))
-#define __get_cpu_var(var)			per_cpu__##var
-#define __raw_get_cpu_var(var)			per_cpu__##var
+#define per_cpu(var, cpu)			(*((void)(cpu), &per_cpu_var(var)))
+#define __get_cpu_var(var)			per_cpu_var(var)
+#define __raw_get_cpu_var(var)			per_cpu_var(var)
 
 #endif	/* SMP */
 
-#define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu__##name
+#ifndef PER_CPU_ATTRIBUTES
+#define PER_CPU_ATTRIBUTES
+#endif
+
+#define DECLARE_PER_CPU(type, name) extern PER_CPU_ATTRIBUTES \
+					__typeof__(type) per_cpu_var(name)
 
 #endif /* _ASM_GENERIC_PERCPU_H_ */

-- 

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH 04/10] x86_32: Use generic percpu.h
  2008-01-08 21:10 [PATCH 00/10] percpu: Per cpu code simplification V4 travis
                   ` (2 preceding siblings ...)
  2008-01-08 21:10 ` [PATCH 03/10] percpu: Make the asm-generic/percpu.h more "generic" travis
@ 2008-01-08 21:10 ` travis
  2008-01-08 21:10 ` [PATCH 05/10] x86_64: Use generic percpu travis
                   ` (5 subsequent siblings)
  9 siblings, 0 replies; 21+ messages in thread
From: travis @ 2008-01-08 21:10 UTC (permalink / raw)
  To: Andrew Morton, mingo, Andi Kleen, Christoph Lameter
  Cc: Jack Steiner, linux-mm, linux-kernel, tglx, mingo

[-- Attachment #1: x86_32_use_generic_percpu --]
[-- Type: text/plain, Size: 1847 bytes --]

x86_32 only provides a special way to obtain the local per cpu area offset
via x86_read_percpu. Otherwise it can fully use the generic handling.

Cc: tglx@linutronix.de
Cc: mingo@redhat.com
Cc: ak@suse.de
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Mike Travis <travis@sgi.com>

---
 include/asm-x86/percpu_32.h |   30 +++++++++---------------------
 1 file changed, 9 insertions(+), 21 deletions(-)

--- a/include/asm-x86/percpu_32.h
+++ b/include/asm-x86/percpu_32.h
@@ -42,34 +42,22 @@
  */
 #ifdef CONFIG_SMP
 
-/* This is used for other cpus to find our section. */
-extern unsigned long __per_cpu_offset[];
-
-#define per_cpu_offset(x) (__per_cpu_offset[x])
-
-#define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu__##name
-/* We can use this directly for local CPU (faster). */
-DECLARE_PER_CPU(unsigned long, this_cpu_off);
-
-/* var is in discarded region: offset to particular copy we want */
-#define per_cpu(var, cpu) (*({				\
-	extern int simple_indentifier_##var(void);	\
-	RELOC_HIDE(&per_cpu__##var, __per_cpu_offset[cpu]); }))
-
-#define __raw_get_cpu_var(var) (*({					\
-	extern int simple_indentifier_##var(void);			\
-	RELOC_HIDE(&per_cpu__##var, x86_read_percpu(this_cpu_off));	\
-}))
-
-#define __get_cpu_var(var) __raw_get_cpu_var(var)
+#define __my_cpu_offset x86_read_percpu(this_cpu_off)
 
 /* fs segment starts at (positive) offset == __per_cpu_offset[cpu] */
 #define __percpu_seg "%%fs:"
+
 #else  /* !SMP */
-#include <asm-generic/percpu.h>
+
 #define __percpu_seg ""
+
 #endif	/* SMP */
 
+#include <asm-generic/percpu.h>
+
+/* We can use this directly for local CPU (faster). */
+DECLARE_PER_CPU(unsigned long, this_cpu_off);
+
 /* For arch-specific code, we can use direct single-insn ops (they
  * don't give an lvalue though). */
 extern void __bad_percpu_size(void);

-- 

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH 05/10] x86_64: Use generic percpu
  2008-01-08 21:10 [PATCH 00/10] percpu: Per cpu code simplification V4 travis
                   ` (3 preceding siblings ...)
  2008-01-08 21:10 ` [PATCH 04/10] x86_32: Use generic percpu.h travis
@ 2008-01-08 21:10 ` travis
  2008-01-08 21:10 ` [PATCH 06/10] s390: " travis
                   ` (4 subsequent siblings)
  9 siblings, 0 replies; 21+ messages in thread
From: travis @ 2008-01-08 21:10 UTC (permalink / raw)
  To: Andrew Morton, mingo, Andi Kleen, Christoph Lameter
  Cc: Jack Steiner, linux-mm, linux-kernel, Rusty Russell, tglx, mingo

[-- Attachment #1: x86_64_use_generic_percpu --]
[-- Type: text/plain, Size: 1676 bytes --]

x86_64 provides an optimized way to determine the local per cpu area
offset through the pda and determines the base by accessing a remote
pda.

Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Andi Kleen <ak@suse.de>
Cc: tglx@linutronix.de
Cc: mingo@redhat.com
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Mike Travis <travis@sgi.com>


---
 include/asm-x86/percpu_64.h |   23 ++---------------------
 1 file changed, 2 insertions(+), 21 deletions(-)

--- a/include/asm-x86/percpu_64.h
+++ b/include/asm-x86/percpu_64.h
@@ -12,31 +12,12 @@
 #include <asm/pda.h>
 
 #define __per_cpu_offset(cpu) (cpu_pda(cpu)->data_offset)
-#define __my_cpu_offset() read_pda(data_offset)
+#define __my_cpu_offset read_pda(data_offset)
 
 #define per_cpu_offset(x) (__per_cpu_offset(x))
 
-/* var is in discarded region: offset to particular copy we want */
-#define per_cpu(var, cpu) (*({				\
-	extern int simple_identifier_##var(void);	\
-	RELOC_HIDE(&per_cpu__##var, __per_cpu_offset(cpu)); }))
-#define __get_cpu_var(var) (*({				\
-	extern int simple_identifier_##var(void);	\
-	RELOC_HIDE(&per_cpu__##var, __my_cpu_offset()); }))
-#define __raw_get_cpu_var(var) (*({			\
-	extern int simple_identifier_##var(void);	\
-	RELOC_HIDE(&per_cpu__##var, __my_cpu_offset()); }))
-
-extern void setup_per_cpu_areas(void);
-
-#else /* ! SMP */
-
-#define per_cpu(var, cpu)			(*((void)(cpu), &per_cpu__##var))
-#define __get_cpu_var(var)			per_cpu__##var
-#define __raw_get_cpu_var(var)			per_cpu__##var
-
 #endif	/* SMP */
 
-#define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu__##name
+#include <asm-generic/percpu.h>
 
 #endif /* _ASM_X8664_PERCPU_H_ */

-- 

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH 06/10] s390: Use generic percpu
  2008-01-08 21:10 [PATCH 00/10] percpu: Per cpu code simplification V4 travis
                   ` (4 preceding siblings ...)
  2008-01-08 21:10 ` [PATCH 05/10] x86_64: Use generic percpu travis
@ 2008-01-08 21:10 ` travis
  2008-01-08 21:10 ` [PATCH 07/10] Powerpc: Use generic per cpu travis
                   ` (3 subsequent siblings)
  9 siblings, 0 replies; 21+ messages in thread
From: travis @ 2008-01-08 21:10 UTC (permalink / raw)
  To: Andrew Morton, mingo, Andi Kleen, Christoph Lameter
  Cc: Jack Steiner, linux-mm, linux-kernel, schwidefsky

[-- Attachment #1: s390_generic_percpu --]
[-- Type: text/plain, Size: 2594 bytes --]

Change s390 percpu.h to use asm-generic/percpu.h

Cc: schwidefsky@de.ibm.com
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Mike Travis <travis@sgi.com>
---

V2->V3:

On Thu, 29 Nov 2007, Martin Schwidefsky wrote:

> On Wed, 2007-11-28 at 13:09 -0800, Christoph Lameter wrote:
> > s390 has a special way to determine the pointer to a per cpu area
> > plus there is a way to access the base of the per cpu area of the
> > currently executing processor.
> > 
> > Note: I had to do a minor change to ASM code. Please check that
> > this was done right.
> 
> Hi Christoph,
> 
> after fixing the trainwreck with Gregs kset changes I've got rc3-mm2
> compiled with your percpu patches. The new s390 percpu code works fine:
> 
> Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

---
 include/asm-s390/percpu.h |   33 +++++++++------------------------
 1 file changed, 9 insertions(+), 24 deletions(-)

--- a/include/asm-s390/percpu.h
+++ b/include/asm-s390/percpu.h
@@ -13,40 +13,25 @@
  */
 #if defined(__s390x__) && defined(MODULE)
 
-#define __reloc_hide(var,offset) (*({			\
+#define SHIFT_PERCPU_PTR(ptr,offset) (({			\
 	extern int simple_identifier_##var(void);	\
 	unsigned long *__ptr;				\
-	asm ( "larl %0,per_cpu__"#var"@GOTENT"		\
-	    : "=a" (__ptr) : "X" (per_cpu__##var) );	\
-	(typeof(&per_cpu__##var))((*__ptr) + (offset));	}))
+	asm ( "larl %0, %1@GOTENT"		\
+	    : "=a" (__ptr) : "X" (ptr) );		\
+	(typeof(ptr))((*__ptr) + (offset));	}))
 
 #else
 
-#define __reloc_hide(var, offset) (*({				\
+#define SHIFT_PERCPU_PTR(ptr, offset) (({				\
 	extern int simple_identifier_##var(void);		\
 	unsigned long __ptr;					\
-	asm ( "" : "=a" (__ptr) : "0" (&per_cpu__##var) );	\
-	(typeof(&per_cpu__##var)) (__ptr + (offset)); }))
+	asm ( "" : "=a" (__ptr) : "0" (ptr) );			\
+	(typeof(ptr)) (__ptr + (offset)); }))
 
 #endif
 
-#ifdef CONFIG_SMP
+#define __my_cpu_offset S390_lowcore.percpu_offset
 
-extern unsigned long __per_cpu_offset[NR_CPUS];
-
-#define __get_cpu_var(var) __reloc_hide(var,S390_lowcore.percpu_offset)
-#define __raw_get_cpu_var(var) __reloc_hide(var,S390_lowcore.percpu_offset)
-#define per_cpu(var,cpu) __reloc_hide(var,__per_cpu_offset[cpu])
-#define per_cpu_offset(x) (__per_cpu_offset[x])
-
-#else /* ! SMP */
-
-#define __get_cpu_var(var) __reloc_hide(var,0)
-#define __raw_get_cpu_var(var) __reloc_hide(var,0)
-#define per_cpu(var,cpu) __reloc_hide(var,0)
-
-#endif /* SMP */
-
-#define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu__##name
+#include <asm-generic/percpu.h>
 
 #endif /* __ARCH_S390_PERCPU__ */

-- 

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH 07/10] Powerpc: Use generic per cpu
  2008-01-08 21:10 [PATCH 00/10] percpu: Per cpu code simplification V4 travis
                   ` (5 preceding siblings ...)
  2008-01-08 21:10 ` [PATCH 06/10] s390: " travis
@ 2008-01-08 21:10 ` travis
  2008-01-08 21:10 ` [PATCH 08/10] Sparc64: Use generic percpu travis
                   ` (2 subsequent siblings)
  9 siblings, 0 replies; 21+ messages in thread
From: travis @ 2008-01-08 21:10 UTC (permalink / raw)
  To: Andrew Morton, mingo, Andi Kleen, Christoph Lameter
  Cc: Jack Steiner, linux-mm, linux-kernel, Paul Mackerras

[-- Attachment #1: power_generic_percpu --]
[-- Type: text/plain, Size: 1527 bytes --]

Powerpc has a way to determine the address of the per cpu area of the
currently executing processor via the paca and the array of per cpu
offsets is avoided by looking up the per cpu area from the remote
paca's (copying x86_64).

Cc: Paul Mackerras <paulus@samba.org>
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Mike Travis <travis@sgi.com>
---

V1->V2:
- add missing #endif

V2->V3:
- use generic percpy_modcopy()

---
 include/asm-powerpc/percpu.h |   20 ++------------------
 1 file changed, 2 insertions(+), 18 deletions(-)

--- a/include/asm-powerpc/percpu.h
+++ b/include/asm-powerpc/percpu.h
@@ -16,25 +16,9 @@
 #define __my_cpu_offset() get_paca()->data_offset
 #define per_cpu_offset(x) (__per_cpu_offset(x))
 
-/* var is in discarded region: offset to particular copy we want */
-#define per_cpu(var, cpu) (*RELOC_HIDE(&per_cpu__##var, __per_cpu_offset(cpu)))
-#define __get_cpu_var(var) (*RELOC_HIDE(&per_cpu__##var, __my_cpu_offset()))
-#define __raw_get_cpu_var(var) (*RELOC_HIDE(&per_cpu__##var, local_paca->data_offset))
+#endif /* CONFIG_SMP */
+#endif /* __powerpc64__ */
 
-extern void setup_per_cpu_areas(void);
-
-#else /* ! SMP */
-
-#define per_cpu(var, cpu)			(*((void)(cpu), &per_cpu__##var))
-#define __get_cpu_var(var)			per_cpu__##var
-#define __raw_get_cpu_var(var)			per_cpu__##var
-
-#endif	/* SMP */
-
-#define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu__##name
-
-#else
 #include <asm-generic/percpu.h>
-#endif
 
 #endif /* _ASM_POWERPC_PERCPU_H_ */

-- 

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH 08/10] Sparc64: Use generic percpu
  2008-01-08 21:10 [PATCH 00/10] percpu: Per cpu code simplification V4 travis
                   ` (6 preceding siblings ...)
  2008-01-08 21:10 ` [PATCH 07/10] Powerpc: Use generic per cpu travis
@ 2008-01-08 21:10 ` travis
  2008-01-08 21:10 ` [PATCH 09/10] ia64: " travis
  2008-01-08 21:10 ` [PATCH 10/10] x86: Unify percpu.h travis
  9 siblings, 0 replies; 21+ messages in thread
From: travis @ 2008-01-08 21:10 UTC (permalink / raw)
  To: Andrew Morton, mingo, Andi Kleen, Christoph Lameter
  Cc: Jack Steiner, linux-mm, linux-kernel, David Miller

[-- Attachment #1: sparc64_generic_percpu --]
[-- Type: text/plain, Size: 2172 bytes --]

Sparc64 has a way of providing the base address for the per cpu area of the
currently executing processor in a global register.

Sparc64 also provides a way to calculate the address of a per cpu area
from a base address instead of performing an array lookup.

Cc: David Miller <davem@davemloft.net>
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Mike Travis <travis@sgi.com>
---

V2->V3:
- use generic percpy_modcopy()
---
 arch/sparc64/mm/init.c       |    5 +++++
 include/asm-sparc64/percpu.h |   12 ++----------
 2 files changed, 7 insertions(+), 10 deletions(-)

--- a/arch/sparc64/mm/init.c
+++ b/arch/sparc64/mm/init.c
@@ -1328,6 +1328,11 @@ pgd_t swapper_pg_dir[2048];
 static void sun4u_pgprot_init(void);
 static void sun4v_pgprot_init(void);
 
+/* Dummy function */
+void __init setup_per_cpu_areas(void)
+{
+}
+
 void __init paging_init(void)
 {
 	unsigned long end_pfn, pages_avail, shift, phys_base;
--- a/include/asm-sparc64/percpu.h
+++ b/include/asm-sparc64/percpu.h
@@ -7,7 +7,6 @@ register unsigned long __local_per_cpu_o
 
 #ifdef CONFIG_SMP
 
-#define setup_per_cpu_areas()			do { } while (0)
 extern void real_setup_per_cpu_areas(void);
 
 extern unsigned long __per_cpu_base;
@@ -16,21 +15,14 @@ extern unsigned long __per_cpu_shift;
 	(__per_cpu_base + ((unsigned long)(__cpu) << __per_cpu_shift))
 #define per_cpu_offset(x) (__per_cpu_offset(x))
 
-/* var is in discarded region: offset to particular copy we want */
-#define per_cpu(var, cpu) (*RELOC_HIDE(&per_cpu__##var, __per_cpu_offset(cpu)))
-#define __get_cpu_var(var) (*RELOC_HIDE(&per_cpu__##var, __local_per_cpu_offset))
-#define __raw_get_cpu_var(var) (*RELOC_HIDE(&per_cpu__##var, __local_per_cpu_offset))
+#define __my_cpu_offset __local_per_cpu_offset
 
 #else /* ! SMP */
 
 #define real_setup_per_cpu_areas()		do { } while (0)
 
-#define per_cpu(var, cpu)			(*((void)cpu, &per_cpu__##var))
-#define __get_cpu_var(var)			per_cpu__##var
-#define __raw_get_cpu_var(var)			per_cpu__##var
-
 #endif	/* SMP */
 
-#define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu__##name
+#include <asm-generic/percpu.h>
 
 #endif /* __ARCH_SPARC64_PERCPU__ */

-- 

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH 09/10] ia64: Use generic percpu
  2008-01-08 21:10 [PATCH 00/10] percpu: Per cpu code simplification V4 travis
                   ` (7 preceding siblings ...)
  2008-01-08 21:10 ` [PATCH 08/10] Sparc64: Use generic percpu travis
@ 2008-01-08 21:10 ` travis
  2008-01-08 21:10 ` [PATCH 10/10] x86: Unify percpu.h travis
  9 siblings, 0 replies; 21+ messages in thread
From: travis @ 2008-01-08 21:10 UTC (permalink / raw)
  To: Andrew Morton, mingo, Andi Kleen, Christoph Lameter
  Cc: Jack Steiner, linux-mm, linux-kernel, linux-ia64, tony.luck

[-- Attachment #1: ia64_generic_percpu --]
[-- Type: text/plain, Size: 2515 bytes --]

ia64 has a special processor specific mapping that can be used to locate the
offset for the current per cpu area.

Cc: linux-ia64@vger.kernel.org
Cc: tony.luck@intel.com
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Mike Travis <travis@sgi.com>
---

V1->V2:
- Merge fixes
- Remove transitional check for PER_CPU_ATTRIBUTES from linux/percpu.h

V2-.V3:
- use generic percpy_modcopy()

---
 include/asm-ia64/percpu.h |   24 +++++++-----------------
 include/linux/percpu.h    |    4 ----
 2 files changed, 7 insertions(+), 21 deletions(-)

--- a/include/asm-ia64/percpu.h
+++ b/include/asm-ia64/percpu.h
@@ -19,29 +19,14 @@
 # define PER_CPU_ATTRIBUTES	__attribute__((__model__ (__small__)))
 #endif
 
-#define DECLARE_PER_CPU(type, name)				\
-	extern PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name
-
 #ifdef CONFIG_SMP
 
-extern unsigned long __per_cpu_offset[NR_CPUS];
-#define per_cpu_offset(x) (__per_cpu_offset[x])
-
-/* Equal to __per_cpu_offset[smp_processor_id()], but faster to access: */
-DECLARE_PER_CPU(unsigned long, local_per_cpu_offset);
-
-#define per_cpu(var, cpu)  (*RELOC_HIDE(&per_cpu__##var, __per_cpu_offset[cpu]))
-#define __get_cpu_var(var) (*RELOC_HIDE(&per_cpu__##var, __ia64_per_cpu_var(local_per_cpu_offset)))
-#define __raw_get_cpu_var(var) (*RELOC_HIDE(&per_cpu__##var, __ia64_per_cpu_var(local_per_cpu_offset)))
+#define __my_cpu_offset	__ia64_per_cpu_var(local_per_cpu_offset)
 
-extern void setup_per_cpu_areas (void);
 extern void *per_cpu_init(void);
 
 #else /* ! SMP */
 
-#define per_cpu(var, cpu)			(*((void)(cpu), &per_cpu__##var))
-#define __get_cpu_var(var)			per_cpu__##var
-#define __raw_get_cpu_var(var)			per_cpu__##var
 #define per_cpu_init()				(__phys_per_cpu_start)
 
 #endif	/* SMP */
@@ -52,7 +37,12 @@ extern void *per_cpu_init(void);
  * On the positive side, using __ia64_per_cpu_var() instead of __get_cpu_var() is slightly
  * more efficient.
  */
-#define __ia64_per_cpu_var(var)	(per_cpu__##var)
+#define __ia64_per_cpu_var(var)	per_cpu__##var
+
+#include <asm-generic/percpu.h>
+
+/* Equal to __per_cpu_offset[smp_processor_id()], but faster to access: */
+DECLARE_PER_CPU(unsigned long, local_per_cpu_offset);
 
 #endif /* !__ASSEMBLY__ */
 
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -9,10 +9,6 @@
 
 #include <asm/percpu.h>
 
-#ifndef PER_CPU_ATTRIBUTES
-#define PER_CPU_ATTRIBUTES
-#endif
-
 #ifdef CONFIG_SMP
 #define DEFINE_PER_CPU(type, name)					\
 	__attribute__((__section__(".data.percpu")))			\

-- 

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH 10/10] x86: Unify percpu.h
  2008-01-08 21:10 [PATCH 00/10] percpu: Per cpu code simplification V4 travis
                   ` (8 preceding siblings ...)
  2008-01-08 21:10 ` [PATCH 09/10] ia64: " travis
@ 2008-01-08 21:10 ` travis
  2008-01-09 19:28   ` Dave Hansen
  9 siblings, 1 reply; 21+ messages in thread
From: travis @ 2008-01-08 21:10 UTC (permalink / raw)
  To: Andrew Morton, mingo, Andi Kleen, Christoph Lameter
  Cc: Jack Steiner, linux-mm, linux-kernel, Rusty Russell, tglx, mingo

[-- Attachment #1: unification --]
[-- Type: text/plain, Size: 8232 bytes --]

Form a single percpu.h from percpu_32.h and percpu_64.h. Both are now pretty
small so this is simply adding them together.

Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: tglx@linutronix.de
Cc: mingo@redhat.com
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Mike Travis <travis@sgi.com>

---
 include/asm-x86/percpu.h    |  145 ++++++++++++++++++++++++++++++++++++++++++--
 include/asm-x86/percpu_32.h |  119 ------------------------------------
 include/asm-x86/percpu_64.h |   23 ------
 3 files changed, 141 insertions(+), 146 deletions(-)

--- a/include/asm-x86/percpu.h
+++ b/include/asm-x86/percpu.h
@@ -1,5 +1,142 @@
-#ifdef CONFIG_X86_32
-# include "percpu_32.h"
-#else
-# include "percpu_64.h"
+#ifndef _ASM_X86_PERCPU_H_
+#define _ASM_X86_PERCPU_H_
+
+#ifdef CONFIG_X86_64
+#include <linux/compiler.h>
+
+/* Same as asm-generic/percpu.h, except that we store the per cpu offset
+   in the PDA. Longer term the PDA and every per cpu variable
+   should be just put into a single section and referenced directly
+   from %gs */
+
+#ifdef CONFIG_SMP
+#include <asm/pda.h>
+
+#define __per_cpu_offset(cpu) (cpu_pda(cpu)->data_offset)
+#define __my_cpu_offset read_pda(data_offset)
+
+#define per_cpu_offset(x) (__per_cpu_offset(x))
+
 #endif
+#include <asm-generic/percpu.h>
+
+DECLARE_PER_CPU(struct x8664_pda, pda);
+
+#else /* CONFIG_X86_64 */
+
+#ifdef __ASSEMBLY__
+
+/*
+ * PER_CPU finds an address of a per-cpu variable.
+ *
+ * Args:
+ *    var - variable name
+ *    reg - 32bit register
+ *
+ * The resulting address is stored in the "reg" argument.
+ *
+ * Example:
+ *    PER_CPU(cpu_gdt_descr, %ebx)
+ */
+#ifdef CONFIG_SMP
+#define PER_CPU(var, reg)				\
+	movl %fs:per_cpu__##this_cpu_off, reg;		\
+	lea per_cpu__##var(reg), reg
+#define PER_CPU_VAR(var)	%fs:per_cpu__##var
+#else /* ! SMP */
+#define PER_CPU(var, reg)			\
+	movl $per_cpu__##var, reg
+#define PER_CPU_VAR(var)	per_cpu__##var
+#endif	/* SMP */
+
+#else /* ...!ASSEMBLY */
+
+/*
+ * PER_CPU finds an address of a per-cpu variable.
+ *
+ * Args:
+ *    var - variable name
+ *    cpu - 32bit register containing the current CPU number
+ *
+ * The resulting address is stored in the "cpu" argument.
+ *
+ * Example:
+ *    PER_CPU(cpu_gdt_descr, %ebx)
+ */
+#ifdef CONFIG_SMP
+
+#define __my_cpu_offset x86_read_percpu(this_cpu_off)
+
+/* fs segment starts at (positive) offset == __per_cpu_offset[cpu] */
+#define __percpu_seg "%%fs:"
+
+#else  /* !SMP */
+
+#define __percpu_seg ""
+
+#endif	/* SMP */
+
+#include <asm-generic/percpu.h>
+
+/* We can use this directly for local CPU (faster). */
+DECLARE_PER_CPU(unsigned long, this_cpu_off);
+
+/* For arch-specific code, we can use direct single-insn ops (they
+ * don't give an lvalue though). */
+extern void __bad_percpu_size(void);
+
+#define percpu_to_op(op,var,val)				\
+	do {							\
+		typedef typeof(var) T__;			\
+		if (0) { T__ tmp__; tmp__ = (val); }		\
+		switch (sizeof(var)) {				\
+		case 1:						\
+			asm(op "b %1,"__percpu_seg"%0"		\
+			    : "+m" (var)			\
+			    :"ri" ((T__)val));			\
+			break;					\
+		case 2:						\
+			asm(op "w %1,"__percpu_seg"%0"		\
+			    : "+m" (var)			\
+			    :"ri" ((T__)val));			\
+			break;					\
+		case 4:						\
+			asm(op "l %1,"__percpu_seg"%0"		\
+			    : "+m" (var)			\
+			    :"ri" ((T__)val));			\
+			break;					\
+		default: __bad_percpu_size();			\
+		}						\
+	} while (0)
+
+#define percpu_from_op(op,var)					\
+	({							\
+		typeof(var) ret__;				\
+		switch (sizeof(var)) {				\
+		case 1:						\
+			asm(op "b "__percpu_seg"%1,%0"		\
+			    : "=r" (ret__)			\
+			    : "m" (var));			\
+			break;					\
+		case 2:						\
+			asm(op "w "__percpu_seg"%1,%0"		\
+			    : "=r" (ret__)			\
+			    : "m" (var));			\
+			break;					\
+		case 4:						\
+			asm(op "l "__percpu_seg"%1,%0"		\
+			    : "=r" (ret__)			\
+			    : "m" (var));			\
+			break;					\
+		default: __bad_percpu_size();			\
+		}						\
+		ret__; })
+
+#define x86_read_percpu(var) percpu_from_op("mov", per_cpu__##var)
+#define x86_write_percpu(var,val) percpu_to_op("mov", per_cpu__##var, val)
+#define x86_add_percpu(var,val) percpu_to_op("add", per_cpu__##var, val)
+#define x86_sub_percpu(var,val) percpu_to_op("sub", per_cpu__##var, val)
+#define x86_or_percpu(var,val) percpu_to_op("or", per_cpu__##var, val)
+#endif /* !__ASSEMBLY__ */
+#endif /* !CONFIG_X86_64 */
+#endif /* _ASM_X86_PERCPU_H_ */
--- a/include/asm-x86/percpu_32.h
+++ /dev/null
@@ -1,119 +0,0 @@
-#ifndef __ARCH_I386_PERCPU__
-#define __ARCH_I386_PERCPU__
-
-#ifdef __ASSEMBLY__
-
-/*
- * PER_CPU finds an address of a per-cpu variable.
- *
- * Args:
- *    var - variable name
- *    reg - 32bit register
- *
- * The resulting address is stored in the "reg" argument.
- *
- * Example:
- *    PER_CPU(cpu_gdt_descr, %ebx)
- */
-#ifdef CONFIG_SMP
-#define PER_CPU(var, reg)				\
-	movl %fs:per_cpu__##this_cpu_off, reg;		\
-	lea per_cpu__##var(reg), reg
-#define PER_CPU_VAR(var)	%fs:per_cpu__##var
-#else /* ! SMP */
-#define PER_CPU(var, reg)			\
-	movl $per_cpu__##var, reg
-#define PER_CPU_VAR(var)	per_cpu__##var
-#endif	/* SMP */
-
-#else /* ...!ASSEMBLY */
-
-/*
- * PER_CPU finds an address of a per-cpu variable.
- *
- * Args:
- *    var - variable name
- *    cpu - 32bit register containing the current CPU number
- *
- * The resulting address is stored in the "cpu" argument.
- *
- * Example:
- *    PER_CPU(cpu_gdt_descr, %ebx)
- */
-#ifdef CONFIG_SMP
-
-#define __my_cpu_offset x86_read_percpu(this_cpu_off)
-
-/* fs segment starts at (positive) offset == __per_cpu_offset[cpu] */
-#define __percpu_seg "%%fs:"
-
-#else  /* !SMP */
-
-#define __percpu_seg ""
-
-#endif	/* SMP */
-
-#include <asm-generic/percpu.h>
-
-/* We can use this directly for local CPU (faster). */
-DECLARE_PER_CPU(unsigned long, this_cpu_off);
-
-/* For arch-specific code, we can use direct single-insn ops (they
- * don't give an lvalue though). */
-extern void __bad_percpu_size(void);
-
-#define percpu_to_op(op,var,val)				\
-	do {							\
-		typedef typeof(var) T__;			\
-		if (0) { T__ tmp__; tmp__ = (val); }		\
-		switch (sizeof(var)) {				\
-		case 1:						\
-			asm(op "b %1,"__percpu_seg"%0"		\
-			    : "+m" (var)			\
-			    :"ri" ((T__)val));			\
-			break;					\
-		case 2:						\
-			asm(op "w %1,"__percpu_seg"%0"		\
-			    : "+m" (var)			\
-			    :"ri" ((T__)val));			\
-			break;					\
-		case 4:						\
-			asm(op "l %1,"__percpu_seg"%0"		\
-			    : "+m" (var)			\
-			    :"ri" ((T__)val));			\
-			break;					\
-		default: __bad_percpu_size();			\
-		}						\
-	} while (0)
-
-#define percpu_from_op(op,var)					\
-	({							\
-		typeof(var) ret__;				\
-		switch (sizeof(var)) {				\
-		case 1:						\
-			asm(op "b "__percpu_seg"%1,%0"		\
-			    : "=r" (ret__)			\
-			    : "m" (var));			\
-			break;					\
-		case 2:						\
-			asm(op "w "__percpu_seg"%1,%0"		\
-			    : "=r" (ret__)			\
-			    : "m" (var));			\
-			break;					\
-		case 4:						\
-			asm(op "l "__percpu_seg"%1,%0"		\
-			    : "=r" (ret__)			\
-			    : "m" (var));			\
-			break;					\
-		default: __bad_percpu_size();			\
-		}						\
-		ret__; })
-
-#define x86_read_percpu(var) percpu_from_op("mov", per_cpu__##var)
-#define x86_write_percpu(var,val) percpu_to_op("mov", per_cpu__##var, val)
-#define x86_add_percpu(var,val) percpu_to_op("add", per_cpu__##var, val)
-#define x86_sub_percpu(var,val) percpu_to_op("sub", per_cpu__##var, val)
-#define x86_or_percpu(var,val) percpu_to_op("or", per_cpu__##var, val)
-#endif /* !__ASSEMBLY__ */
-
-#endif /* __ARCH_I386_PERCPU__ */
--- a/include/asm-x86/percpu_64.h
+++ /dev/null
@@ -1,23 +0,0 @@
-#ifndef _ASM_X8664_PERCPU_H_
-#define _ASM_X8664_PERCPU_H_
-#include <linux/compiler.h>
-
-/* Same as asm-generic/percpu.h, except that we store the per cpu offset
-   in the PDA. Longer term the PDA and every per cpu variable
-   should be just put into a single section and referenced directly
-   from %gs */
-
-#ifdef CONFIG_SMP
-
-#include <asm/pda.h>
-
-#define __per_cpu_offset(cpu) (cpu_pda(cpu)->data_offset)
-#define __my_cpu_offset read_pda(data_offset)
-
-#define per_cpu_offset(x) (__per_cpu_offset(x))
-
-#endif	/* SMP */
-
-#include <asm-generic/percpu.h>
-
-#endif /* _ASM_X8664_PERCPU_H_ */

-- 

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 10/10] x86: Unify percpu.h
  2008-01-08 21:10 ` [PATCH 10/10] x86: Unify percpu.h travis
@ 2008-01-09 19:28   ` Dave Hansen
  2008-01-09 19:31     ` Christoph Lameter
  0 siblings, 1 reply; 21+ messages in thread
From: Dave Hansen @ 2008-01-09 19:28 UTC (permalink / raw)
  To: travis
  Cc: Andrew Morton, mingo, Andi Kleen, Christoph Lameter,
	Jack Steiner, linux-mm, linux-kernel, Rusty Russell, tglx, mingo

On Tue, 2008-01-08 at 13:10 -0800, travis@sgi.com wrote:
> Form a single percpu.h from percpu_32.h and percpu_64.h. Both are now pretty
> small so this is simply adding them together. 

I guess I just don't really see the point of moving the code around like
this.  Before, it would have been easier to tell at a glance before
whether you were looking at 32 or 64-bit code because of which file you
are in.  But, now, you need to look for #ifdef context.  I'm not sure
that's a win.

This only saves 5 net lines of code, and those are probably from:

-#ifndef __ARCH_I386_PERCPU__
-#define __ARCH_I386_PERCPU__

right?

The rest of the set looks brilliant, though.  

-- Dave


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 10/10] x86: Unify percpu.h
  2008-01-09 19:28   ` Dave Hansen
@ 2008-01-09 19:31     ` Christoph Lameter
  2008-01-09 19:53       ` Dave Hansen
  0 siblings, 1 reply; 21+ messages in thread
From: Christoph Lameter @ 2008-01-09 19:31 UTC (permalink / raw)
  To: Dave Hansen
  Cc: travis, Andrew Morton, mingo, Andi Kleen, Jack Steiner, linux-mm,
	linux-kernel, Rusty Russell, tglx, mingo

On Wed, 9 Jan 2008, Dave Hansen wrote:

> On Tue, 2008-01-08 at 13:10 -0800, travis@sgi.com wrote:
> > Form a single percpu.h from percpu_32.h and percpu_64.h. Both are now pretty
> > small so this is simply adding them together. 
> 
> I guess I just don't really see the point of moving the code around like
> this.  Before, it would have been easier to tell at a glance before
> whether you were looking at 32 or 64-bit code because of which file you
> are in.  But, now, you need to look for #ifdef context.  I'm not sure
> that's a win.
> 
> This only saves 5 net lines of code, and those are probably from:
> 
> -#ifndef __ARCH_I386_PERCPU__
> -#define __ARCH_I386_PERCPU__
> 
> right?
> 
> The rest of the set looks brilliant, though.  

Well this is only the first patchset. The next one will unify this even 
more (and make percpu functions work consistent between the two arches) 
but it requires changes to the way the %gs register is used in 
x86_64. So we only do the simplest thing here to have one file to patch 
against later.



^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 10/10] x86: Unify percpu.h
  2008-01-09 19:31     ` Christoph Lameter
@ 2008-01-09 19:53       ` Dave Hansen
  2008-01-09 20:11         ` Christoph Lameter
  0 siblings, 1 reply; 21+ messages in thread
From: Dave Hansen @ 2008-01-09 19:53 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: travis, Andrew Morton, mingo, Andi Kleen, Jack Steiner, linux-mm,
	linux-kernel, Rusty Russell, tglx, mingo

On Wed, 2008-01-09 at 11:31 -0800, Christoph Lameter wrote:
> Well this is only the first patchset. The next one will unify this even 
> more (and make percpu functions work consistent between the two arches) 
> but it requires changes to the way the %gs register is used in 
> x86_64. So we only do the simplest thing here to have one file to patch 
> against later.

Then I really think this particular patch belongs in that other patch
set.  Here, it makes very little sense, and it's on the end anyway.

-- Dave


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 10/10] x86: Unify percpu.h
  2008-01-09 19:53       ` Dave Hansen
@ 2008-01-09 20:11         ` Christoph Lameter
  2008-01-10 13:48           ` Ingo Molnar
  0 siblings, 1 reply; 21+ messages in thread
From: Christoph Lameter @ 2008-01-09 20:11 UTC (permalink / raw)
  To: Dave Hansen
  Cc: travis, Andrew Morton, mingo, Andi Kleen, Jack Steiner, linux-mm,
	linux-kernel, Rusty Russell, tglx, mingo

On Wed, 9 Jan 2008, Dave Hansen wrote:

> Then I really think this particular patch belongs in that other patch
> set.  Here, it makes very little sense, and it's on the end anyway.

It makes sense in that both percpu_32/64 are very small as a result of 
earlier patches and so its justifiable to put them together to simplify 
the next patchset.


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 10/10] x86: Unify percpu.h
  2008-01-09 20:11         ` Christoph Lameter
@ 2008-01-10 13:48           ` Ingo Molnar
  0 siblings, 0 replies; 21+ messages in thread
From: Ingo Molnar @ 2008-01-10 13:48 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: Dave Hansen, travis, Andrew Morton, Andi Kleen, Jack Steiner,
	linux-mm, linux-kernel, Rusty Russell, tglx, mingo


* Christoph Lameter <clameter@sgi.com> wrote:

> On Wed, 9 Jan 2008, Dave Hansen wrote:
> 
> > Then I really think this particular patch belongs in that other 
> > patch set.  Here, it makes very little sense, and it's on the end 
> > anyway.
> 
> It makes sense in that both percpu_32/64 are very small as a result of 
> earlier patches and so its justifiable to put them together to 
> simplify the next patchset.

i'd agree with this - lets just keep the existing flow of patches 
intact. It's not like the percpu code is in any danger of becoming 
unclean or quirky - it's one of the best-maintained pieces of kernel 
code :)

	Ingo

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH 04/10] x86_32: Use generic percpu.h
  2008-01-08  2:11 [PATCH 00/10] percpu: Per cpu code simplification V3 travis
@ 2008-01-08  2:11 ` travis
  0 siblings, 0 replies; 21+ messages in thread
From: travis @ 2008-01-08  2:11 UTC (permalink / raw)
  To: mingo, Andrew Morton, Andi Kleen, Christoph Lameter
  Cc: Jack Steiner, linux-mm, linux-kernel, tglx, mingo

[-- Attachment #1: x86_32_use_generic_percpu --]
[-- Type: text/plain, Size: 1971 bytes --]

x86_32 only provides a special way to obtain the local per cpu area offset
via x86_read_percpu. Otherwise it can fully use the generic handling.

Cc: tglx@linutronix.de
Cc: mingo@redhat.com
Cc: ak@suse.de
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Mike Travis <travis@sgi.com>

---
 include/asm-x86/percpu_32.h |   30 +++++++++---------------------
 1 file changed, 9 insertions(+), 21 deletions(-)

--- a/include/asm-x86/percpu_32.h
+++ b/include/asm-x86/percpu_32.h
@@ -42,26 +42,7 @@
  */
 #ifdef CONFIG_SMP
 
-/* This is used for other cpus to find our section. */
-extern unsigned long __per_cpu_offset[];
-
-#define per_cpu_offset(x) (__per_cpu_offset[x])
-
-#define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu__##name
-/* We can use this directly for local CPU (faster). */
-DECLARE_PER_CPU(unsigned long, this_cpu_off);
-
-/* var is in discarded region: offset to particular copy we want */
-#define per_cpu(var, cpu) (*({				\
-	extern int simple_indentifier_##var(void);	\
-	RELOC_HIDE(&per_cpu__##var, __per_cpu_offset[cpu]); }))
-
-#define __raw_get_cpu_var(var) (*({					\
-	extern int simple_indentifier_##var(void);			\
-	RELOC_HIDE(&per_cpu__##var, x86_read_percpu(this_cpu_off));	\
-}))
-
-#define __get_cpu_var(var) __raw_get_cpu_var(var)
+#define __my_cpu_offset x86_read_percpu(this_cpu_off)
 
 /* A macro to avoid #include hell... */
 #define percpu_modcopy(pcpudst, src, size)			\
@@ -74,11 +55,18 @@ do {								\
 
 /* fs segment starts at (positive) offset == __per_cpu_offset[cpu] */
 #define __percpu_seg "%%fs:"
+
 #else  /* !SMP */
-#include <asm-generic/percpu.h>
+
 #define __percpu_seg ""
+
 #endif	/* SMP */
 
+#include <asm-generic/percpu.h>
+
+/* We can use this directly for local CPU (faster). */
+DECLARE_PER_CPU(unsigned long, this_cpu_off);
+
 /* For arch-specific code, we can use direct single-insn ops (they
  * don't give an lvalue though). */
 extern void __bad_percpu_size(void);

-- 

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH 04/10] x86_32: Use generic percpu.h
  2007-12-28  0:16 [PATCH 00/10] percpu: Per cpu code simplification V2 travis
@ 2007-12-28  0:16 ` travis
  0 siblings, 0 replies; 21+ messages in thread
From: travis @ 2007-12-28  0:16 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Christoph Lameter, linux-mm, linux-kernel, tglx, mingo, ak

[-- Attachment #1: x86_32_use_generic_percpu --]
[-- Type: text/plain, Size: 1847 bytes --]

x86_32 only provides a special way to obtain the local per cpu area offset
via x86_read_percpu. Otherwise it can fully use the generic handling.

Cc: tglx@linutronix.de
Cc: mingo@redhat.com
Cc: ak@suse.de
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Mike Travis <travis@sgi.com>

---
 include/asm-x86/percpu_32.h |   30 +++++++++---------------------
 1 file changed, 9 insertions(+), 21 deletions(-)

--- a/include/asm-x86/percpu_32.h
+++ b/include/asm-x86/percpu_32.h
@@ -42,34 +42,22 @@
  */
 #ifdef CONFIG_SMP
 
-/* This is used for other cpus to find our section. */
-extern unsigned long __per_cpu_offset[];
-
-#define per_cpu_offset(x) (__per_cpu_offset[x])
-
-#define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu__##name
-/* We can use this directly for local CPU (faster). */
-DECLARE_PER_CPU(unsigned long, this_cpu_off);
-
-/* var is in discarded region: offset to particular copy we want */
-#define per_cpu(var, cpu) (*({				\
-	extern int simple_indentifier_##var(void);	\
-	RELOC_HIDE(&per_cpu__##var, __per_cpu_offset[cpu]); }))
-
-#define __raw_get_cpu_var(var) (*({					\
-	extern int simple_indentifier_##var(void);			\
-	RELOC_HIDE(&per_cpu__##var, x86_read_percpu(this_cpu_off));	\
-}))
-
-#define __get_cpu_var(var) __raw_get_cpu_var(var)
+#define __my_cpu_offset x86_read_percpu(this_cpu_off)
 
 /* fs segment starts at (positive) offset == __per_cpu_offset[cpu] */
 #define __percpu_seg "%%fs:"
+
 #else  /* !SMP */
-#include <asm-generic/percpu.h>
+
 #define __percpu_seg ""
+
 #endif	/* SMP */
 
+#include <asm-generic/percpu.h>
+
+/* We can use this directly for local CPU (faster). */
+DECLARE_PER_CPU(unsigned long, this_cpu_off);
+
 /* For arch-specific code, we can use direct single-insn ops (they
  * don't give an lvalue though). */
 extern void __bad_percpu_size(void);

-- 

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH 04/10] x86_32: Use generic percpu.h
  2007-12-28  0:10 [PATCH 00/10] percpu: Per cpu code simplification V2 travis
@ 2007-12-28  0:10 ` travis
  0 siblings, 0 replies; 21+ messages in thread
From: travis @ 2007-12-28  0:10 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Christoph Lameter, linux-mm, linux-kernel, tglx, mingo, ak

[-- Attachment #1: x86_32_use_generic_percpu --]
[-- Type: text/plain, Size: 1847 bytes --]

x86_32 only provides a special way to obtain the local per cpu area offset
via x86_read_percpu. Otherwise it can fully use the generic handling.

Cc: tglx@linutronix.de
Cc: mingo@redhat.com
Cc: ak@suse.de
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Mike Travis <travis@sgi.com>

---
 include/asm-x86/percpu_32.h |   30 +++++++++---------------------
 1 file changed, 9 insertions(+), 21 deletions(-)

--- a/include/asm-x86/percpu_32.h
+++ b/include/asm-x86/percpu_32.h
@@ -42,34 +42,22 @@
  */
 #ifdef CONFIG_SMP
 
-/* This is used for other cpus to find our section. */
-extern unsigned long __per_cpu_offset[];
-
-#define per_cpu_offset(x) (__per_cpu_offset[x])
-
-#define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu__##name
-/* We can use this directly for local CPU (faster). */
-DECLARE_PER_CPU(unsigned long, this_cpu_off);
-
-/* var is in discarded region: offset to particular copy we want */
-#define per_cpu(var, cpu) (*({				\
-	extern int simple_indentifier_##var(void);	\
-	RELOC_HIDE(&per_cpu__##var, __per_cpu_offset[cpu]); }))
-
-#define __raw_get_cpu_var(var) (*({					\
-	extern int simple_indentifier_##var(void);			\
-	RELOC_HIDE(&per_cpu__##var, x86_read_percpu(this_cpu_off));	\
-}))
-
-#define __get_cpu_var(var) __raw_get_cpu_var(var)
+#define __my_cpu_offset x86_read_percpu(this_cpu_off)
 
 /* fs segment starts at (positive) offset == __per_cpu_offset[cpu] */
 #define __percpu_seg "%%fs:"
+
 #else  /* !SMP */
-#include <asm-generic/percpu.h>
+
 #define __percpu_seg ""
+
 #endif	/* SMP */
 
+#include <asm-generic/percpu.h>
+
+/* We can use this directly for local CPU (faster). */
+DECLARE_PER_CPU(unsigned long, this_cpu_off);
+
 /* For arch-specific code, we can use direct single-insn ops (they
  * don't give an lvalue though). */
 extern void __bad_percpu_size(void);

-- 

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [patch 04/10] x86_32: Use generic percpu.h
  2007-11-28 21:09 ` [patch 04/10] x86_32: Use generic percpu.h Christoph Lameter
@ 2007-11-30 11:11   ` Ingo Molnar
  0 siblings, 0 replies; 21+ messages in thread
From: Ingo Molnar @ 2007-11-30 11:11 UTC (permalink / raw)
  To: Christoph Lameter; +Cc: akpm, linux-kernel, tglx, mingo, ak


merge to x86.git below. Please check.

	Ingo

---->
Subject: x86_32: Use generic percpu.h
From: Christoph Lameter <clameter@sgi.com>

x86_32 only provides a special way to obtain the local per cpu area offset
via x86_read_percpu. Otherwise it can fully use the generic handling.

Cc: ak@suse.de
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/percpu_32.h |   30 +++++++++---------------------
 1 file changed, 9 insertions(+), 21 deletions(-)

Index: linux-x86.q/include/asm-x86/percpu_32.h
===================================================================
--- linux-x86.q.orig/include/asm-x86/percpu_32.h
+++ linux-x86.q/include/asm-x86/percpu_32.h
@@ -42,26 +42,7 @@
  */
 #ifdef CONFIG_SMP
 
-/* This is used for other cpus to find our section. */
-extern unsigned long __per_cpu_offset[];
-
-#define per_cpu_offset(x) (__per_cpu_offset[x])
-
-#define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu__##name
-/* We can use this directly for local CPU (faster). */
-DECLARE_PER_CPU(unsigned long, this_cpu_off);
-
-/* var is in discarded region: offset to particular copy we want */
-#define per_cpu(var, cpu) (*({				\
-	extern int simple_indentifier_##var(void);	\
-	RELOC_HIDE(&per_cpu__##var, __per_cpu_offset[cpu]); }))
-
-#define __raw_get_cpu_var(var) (*({					\
-	extern int simple_indentifier_##var(void);			\
-	RELOC_HIDE(&per_cpu__##var, x86_read_percpu(this_cpu_off));	\
-}))
-
-#define __get_cpu_var(var) __raw_get_cpu_var(var)
+#define __my_cpu_offset x86_read_percpu(this_cpu_off)
 
 /* A macro to avoid #include hell... */
 #define percpu_modcopy(pcpudst, src, size)			\
@@ -74,11 +55,18 @@ do {								\
 
 /* fs segment starts at (positive) offset == __per_cpu_offset[cpu] */
 #define __percpu_seg "%%fs:"
+
 #else  /* !SMP */
-#include <asm-generic/percpu.h>
+
 #define __percpu_seg ""
+
 #endif	/* SMP */
 
+#include <asm-generic/percpu.h>
+
+/* We can use this directly for local CPU (faster). */
+DECLARE_PER_CPU(unsigned long, this_cpu_off);
+
 /* For arch-specific code, we can use direct single-insn ops (they
  * don't give an lvalue though). */
 extern void __bad_percpu_size(void);

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [patch 04/10] x86_32: Use generic percpu.h
  2007-11-28 21:09 [patch 00/10] Per cpu code simplification V2 Christoph Lameter
@ 2007-11-28 21:09 ` Christoph Lameter
  2007-11-30 11:11   ` Ingo Molnar
  0 siblings, 1 reply; 21+ messages in thread
From: Christoph Lameter @ 2007-11-28 21:09 UTC (permalink / raw)
  To: akpm; +Cc: linux-kernel, tglx, mingo, ak

[-- Attachment #1: x86_32_use_generic_percpu --]
[-- Type: text/plain, Size: 2042 bytes --]

x86_32 only provides a special way to obtain the local per cpu area offset
via x86_read_percpu. Otherwise it can fully use the generic handling.

Cc: tglx@linutronix.de
Cc: mingo@redhat.com
Cc: ak@suse.de
Signed-off-by: Christoph Lameter <clameter@sgi.com>

---
 include/asm-x86/percpu_32.h |   30 +++++++++---------------------
 1 file changed, 9 insertions(+), 21 deletions(-)

Index: linux-2.6.24-rc3-mm2/include/asm-x86/percpu_32.h
===================================================================
--- linux-2.6.24-rc3-mm2.orig/include/asm-x86/percpu_32.h	2007-11-28 12:51:42.448213150 -0800
+++ linux-2.6.24-rc3-mm2/include/asm-x86/percpu_32.h	2007-11-28 12:51:48.084703616 -0800
@@ -42,34 +42,22 @@
  */
 #ifdef CONFIG_SMP
 
-/* This is used for other cpus to find our section. */
-extern unsigned long __per_cpu_offset[];
-
-#define per_cpu_offset(x) (__per_cpu_offset[x])
-
-#define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu__##name
-/* We can use this directly for local CPU (faster). */
-DECLARE_PER_CPU(unsigned long, this_cpu_off);
-
-/* var is in discarded region: offset to particular copy we want */
-#define per_cpu(var, cpu) (*({				\
-	extern int simple_indentifier_##var(void);	\
-	RELOC_HIDE(&per_cpu__##var, __per_cpu_offset[cpu]); }))
-
-#define __raw_get_cpu_var(var) (*({					\
-	extern int simple_indentifier_##var(void);			\
-	RELOC_HIDE(&per_cpu__##var, x86_read_percpu(this_cpu_off));	\
-}))
-
-#define __get_cpu_var(var) __raw_get_cpu_var(var)
+#define __my_cpu_offset x86_read_percpu(this_cpu_off)
 
 /* fs segment starts at (positive) offset == __per_cpu_offset[cpu] */
 #define __percpu_seg "%%fs:"
+
 #else  /* !SMP */
-#include <asm-generic/percpu.h>
+
 #define __percpu_seg ""
+
 #endif	/* SMP */
 
+#include <asm-generic/percpu.h>
+
+/* We can use this directly for local CPU (faster). */
+DECLARE_PER_CPU(unsigned long, this_cpu_off);
+
 /* For arch-specific code, we can use direct single-insn ops (they
  * don't give an lvalue though). */
 extern void __bad_percpu_size(void);

-- 

^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2008-01-10 13:49 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-01-08 21:10 [PATCH 00/10] percpu: Per cpu code simplification V4 travis
2008-01-08 21:10 ` [PATCH 01/10] percpu: Use a kconfig variable to signal arch specific percpu setup travis
2008-01-08 21:10 ` [PATCH 02/10] percpu: Move arch XX_PER_CPU_XX definitions into linux/percpu.h travis
2008-01-08 21:10 ` [PATCH 03/10] percpu: Make the asm-generic/percpu.h more "generic" travis
2008-01-08 21:10 ` [PATCH 04/10] x86_32: Use generic percpu.h travis
2008-01-08 21:10 ` [PATCH 05/10] x86_64: Use generic percpu travis
2008-01-08 21:10 ` [PATCH 06/10] s390: " travis
2008-01-08 21:10 ` [PATCH 07/10] Powerpc: Use generic per cpu travis
2008-01-08 21:10 ` [PATCH 08/10] Sparc64: Use generic percpu travis
2008-01-08 21:10 ` [PATCH 09/10] ia64: " travis
2008-01-08 21:10 ` [PATCH 10/10] x86: Unify percpu.h travis
2008-01-09 19:28   ` Dave Hansen
2008-01-09 19:31     ` Christoph Lameter
2008-01-09 19:53       ` Dave Hansen
2008-01-09 20:11         ` Christoph Lameter
2008-01-10 13:48           ` Ingo Molnar
  -- strict thread matches above, loose matches on Subject: below --
2008-01-08  2:11 [PATCH 00/10] percpu: Per cpu code simplification V3 travis
2008-01-08  2:11 ` [PATCH 04/10] x86_32: Use generic percpu.h travis
2007-12-28  0:16 [PATCH 00/10] percpu: Per cpu code simplification V2 travis
2007-12-28  0:16 ` [PATCH 04/10] x86_32: Use generic percpu.h travis
2007-12-28  0:10 [PATCH 00/10] percpu: Per cpu code simplification V2 travis
2007-12-28  0:10 ` [PATCH 04/10] x86_32: Use generic percpu.h travis
2007-11-28 21:09 [patch 00/10] Per cpu code simplification V2 Christoph Lameter
2007-11-28 21:09 ` [patch 04/10] x86_32: Use generic percpu.h Christoph Lameter
2007-11-30 11:11   ` Ingo Molnar

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).