* [PATCH] 2.5.1-pre5: per-cpu areas
@ 2001-12-05 22:09 Rusty Russell
2001-12-06 7:21 ` Keith Owens
` (2 more replies)
0 siblings, 3 replies; 30+ messages in thread
From: Rusty Russell @ 2001-12-05 22:09 UTC (permalink / raw)
To: linux-kernel; +Cc: torvalds
The following patch implements convenient per-cpu areas:
DECLARE_PER_CPU(int myvar);
...
this_cpu(myvar) = 1;
for (i = 0; i < NR_CPUS; i++)
per_cpu(myvar, i) = 0;
Good: Simply referring to "myvar" won't work (even on Uniprocessor), so
that mistake is prevented. Unreferenced variables are warned like
normal (almost).
Bad: Initialization isn't possible in the declaration. Only
implemented for PPC and x86, but fix is trivial for other
architectures. Implementation is icky, but getting the linker to
duplicate per-cpu section itself without symbols is beyond my skill.
Feedback appreciated,
Rusty.
--
Anyone who quotes me is an idiot. -- Rusty Russell.
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.1-pre5/include/linux/smp.h working-2.5.1-pre5-percpu/include/linux/smp.h
--- linux-2.5.1-pre5/include/linux/smp.h Wed Dec 5 16:49:14 2001
+++ working-2.5.1-pre5-percpu/include/linux/smp.h Wed Dec 5 18:21:06 2001
@@ -71,7 +71,36 @@
#define MSG_RESCHEDULE 0x0003 /* Reschedule request from master CPU*/
#define MSG_CALL_FUNCTION 0x0004 /* Call function on all other CPUs */
+#define __PER_CPU(decl,num) decl##__##num \
+ __attribute__((section(".data.cpu" #num)))
+
+#if NR_CPUS == 32
+#define PER_CPU(decl) __PER_CPU(decl, 0); __PER_CPU(decl, 1); \
+ __PER_CPU(decl, 2); __PER_CPU(decl, 3); \
+ __PER_CPU(decl, 4); __PER_CPU(decl, 5); \
+ __PER_CPU(decl, 6); __PER_CPU(decl, 7); \
+ __PER_CPU(decl, 8); __PER_CPU(decl, 9); \
+ __PER_CPU(decl, 10); __PER_CPU(decl, 11); \
+ __PER_CPU(decl, 12); __PER_CPU(decl, 13); \
+ __PER_CPU(decl, 14); __PER_CPU(decl, 15); \
+ __PER_CPU(decl, 16); __PER_CPU(decl, 17); \
+ __PER_CPU(decl, 18); __PER_CPU(decl, 19); \
+ __PER_CPU(decl, 20); __PER_CPU(decl, 21); \
+ __PER_CPU(decl, 22); __PER_CPU(decl, 23); \
+ __PER_CPU(decl, 24); __PER_CPU(decl, 25); \
+ __PER_CPU(decl, 26); __PER_CPU(decl, 27); \
+ __PER_CPU(decl, 28); __PER_CPU(decl, 29); \
+ __PER_CPU(decl, 30); __PER_CPU(decl, 31)
#else
+#error NR_CPUS not 32: fix linux/smp.h.
+#endif /* NR_CPUS */
+
+extern void *per_cpu_sections[NR_CPUS];
+
+#define per_cpu(var, cpu) \
+*(__typeof__(&var)(&var##__0 - per_cpu_sections[0]) + per_cpu_sections[cpu])
+
+#else /* !SMP */
/*
* These macros fold the SMP functionality into a single CPU system
@@ -86,6 +115,10 @@
#define cpu_number_map(cpu) 0
#define smp_call_function(func,info,retry,wait) ({ 0; })
#define cpu_online_map 1
-
+#define PER_CPU(decl) decl
+#define per_cpu(var, cpu) var
#endif
+
+#define this_cpu(var) per_cpu(var,smp_processor_id())
+
#endif
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.1-pre5/init/main.c working-2.5.1-pre5-percpu/init/main.c
--- linux-2.5.1-pre5/init/main.c Tue Dec 4 17:17:28 2001
+++ working-2.5.1-pre5-percpu/init/main.c Wed Dec 5 17:41:56 2001
@@ -499,6 +499,19 @@
#else
+#if NR_CPUS != 32
+#error NR_CPUS not 32: fix init/main.c.
+#endif
+/* Created by linker magic */
+extern void *__cpu0, *__cpu1, *__cpu2, *__cpu3,
+ *__cpu4, *__cpu5, *__cpu6, *__cpu7,
+ *__cpu8, *__cpu9, *__cpu10, *__cpu11,
+ *__cpu12, *__cpu13, *__cpu14, *__cpu15,
+ *__cpu16, *__cpu17, *__cpu18, *__cpu19,
+ *__cpu20, *__cpu21, *__cpu22, *__cpu23,
+ *__cpu24, *__cpu25, *__cpu26, *__cpu27,
+ *__cpu28, *__cpu29, *__cpu30, *__cpu31;
+void *per_cpu_sections[NR_CPUS];
/* Called by boot processor to activate the rest. */
static void __init smp_init(void)
@@ -518,6 +531,24 @@
barrier();
}
printk("All processors have done init_idle\n");
+
+ /* Set up per-CPU section pointers */
+ per_cpu_sections[0] = &__cpu0; per_cpu_sections[1] = &__cpu1;
+ per_cpu_sections[2] = &__cpu2; per_cpu_sections[3] = &__cpu3;
+ per_cpu_sections[4] = &__cpu4; per_cpu_sections[5] = &__cpu5;
+ per_cpu_sections[6] = &__cpu6; per_cpu_sections[7] = &__cpu7;
+ per_cpu_sections[8] = &__cpu8; per_cpu_sections[9] = &__cpu9;
+ per_cpu_sections[10] = &__cpu10; per_cpu_sections[11] = &__cpu11;
+ per_cpu_sections[12] = &__cpu12; per_cpu_sections[13] = &__cpu13;
+ per_cpu_sections[14] = &__cpu14; per_cpu_sections[15] = &__cpu15;
+ per_cpu_sections[16] = &__cpu16; per_cpu_sections[17] = &__cpu17;
+ per_cpu_sections[18] = &__cpu18; per_cpu_sections[19] = &__cpu19;
+ per_cpu_sections[20] = &__cpu20; per_cpu_sections[21] = &__cpu21;
+ per_cpu_sections[22] = &__cpu22; per_cpu_sections[23] = &__cpu23;
+ per_cpu_sections[24] = &__cpu24; per_cpu_sections[25] = &__cpu25;
+ per_cpu_sections[26] = &__cpu26; per_cpu_sections[27] = &__cpu27;
+ per_cpu_sections[28] = &__cpu28; per_cpu_sections[29] = &__cpu29;
+ per_cpu_sections[30] = &__cpu30; per_cpu_sections[31] = &__cpu31;
}
#endif
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.1-pre5/arch/i386/vmlinux.lds working-2.5.1-pre5-percpu/arch/i386/vmlinux.lds
--- linux-2.5.1-pre5/arch/i386/vmlinux.lds Tue Jul 3 07:40:14 2001
+++ working-2.5.1-pre5-percpu/arch/i386/vmlinux.lds Wed Dec 5 18:03:50 2001
@@ -34,6 +34,40 @@
CONSTRUCTORS
}
+ /* Per-cpu sections: cache-line aligned */
+ . = ALIGN(32); __cpu0 = .; .data.cpu0 : { *(.data.cpu0) }
+ . = ALIGN(32); __cpu1 = .; .data.cpu1 : { *(.data.cpu1) }
+ . = ALIGN(32); __cpu2 = .; .data.cpu2 : { *(.data.cpu2) }
+ . = ALIGN(32); __cpu3 = .; .data.cpu3 : { *(.data.cpu3) }
+ . = ALIGN(32); __cpu4 = .; .data.cpu4 : { *(.data.cpu4) }
+ . = ALIGN(32); __cpu5 = .; .data.cpu5 : { *(.data.cpu5) }
+ . = ALIGN(32); __cpu6 = .; .data.cpu6 : { *(.data.cpu6) }
+ . = ALIGN(32); __cpu7 = .; .data.cpu7 : { *(.data.cpu7) }
+ . = ALIGN(32); __cpu8 = .; .data.cpu8 : { *(.data.cpu8) }
+ . = ALIGN(32); __cpu9 = .; .data.cpu9 : { *(.data.cpu9) }
+ . = ALIGN(32); __cpu10 = .; .data.cpu10 : { *(.data.cpu10) }
+ . = ALIGN(32); __cpu11 = .; .data.cpu11 : { *(.data.cpu11) }
+ . = ALIGN(32); __cpu12 = .; .data.cpu12 : { *(.data.cpu12) }
+ . = ALIGN(32); __cpu13 = .; .data.cpu13 : { *(.data.cpu13) }
+ . = ALIGN(32); __cpu14 = .; .data.cpu14 : { *(.data.cpu14) }
+ . = ALIGN(32); __cpu15 = .; .data.cpu15 : { *(.data.cpu15) }
+ . = ALIGN(32); __cpu16 = .; .data.cpu16 : { *(.data.cpu16) }
+ . = ALIGN(32); __cpu17 = .; .data.cpu17 : { *(.data.cpu17) }
+ . = ALIGN(32); __cpu18 = .; .data.cpu18 : { *(.data.cpu18) }
+ . = ALIGN(32); __cpu19 = .; .data.cpu19 : { *(.data.cpu19) }
+ . = ALIGN(32); __cpu20 = .; .data.cpu20 : { *(.data.cpu20) }
+ . = ALIGN(32); __cpu21 = .; .data.cpu21 : { *(.data.cpu21) }
+ . = ALIGN(32); __cpu22 = .; .data.cpu22 : { *(.data.cpu22) }
+ . = ALIGN(32); __cpu23 = .; .data.cpu23 : { *(.data.cpu23) }
+ . = ALIGN(32); __cpu24 = .; .data.cpu24 : { *(.data.cpu24) }
+ . = ALIGN(32); __cpu25 = .; .data.cpu25 : { *(.data.cpu25) }
+ . = ALIGN(32); __cpu26 = .; .data.cpu26 : { *(.data.cpu26) }
+ . = ALIGN(32); __cpu27 = .; .data.cpu27 : { *(.data.cpu27) }
+ . = ALIGN(32); __cpu28 = .; .data.cpu28 : { *(.data.cpu28) }
+ . = ALIGN(32); __cpu29 = .; .data.cpu29 : { *(.data.cpu29) }
+ . = ALIGN(32); __cpu30 = .; .data.cpu30 : { *(.data.cpu30) }
+ . = ALIGN(32); __cpu31 = .; .data.cpu31 : { *(.data.cpu31) }
+
_edata = .; /* End of data section */
. = ALIGN(8192); /* init_task */
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.1-pre5/arch/ppc/vmlinux.lds working-2.5.1-pre5-percpu/arch/ppc/vmlinux.lds
--- linux-2.5.1-pre5/arch/ppc/vmlinux.lds Tue Aug 28 23:58:33 2001
+++ working-2.5.1-pre5-percpu/arch/ppc/vmlinux.lds Wed Dec 5 18:03:57 2001
@@ -58,6 +58,41 @@
*(.dynamic)
CONSTRUCTORS
}
+
+ /* Per-cpu sections: cache-line aligned */
+ . = ALIGN(32); __cpu0 = .; .data.cpu0 : { *(.data.cpu0) }
+ . = ALIGN(32); __cpu1 = .; .data.cpu1 : { *(.data.cpu1) }
+ . = ALIGN(32); __cpu2 = .; .data.cpu2 : { *(.data.cpu2) }
+ . = ALIGN(32); __cpu3 = .; .data.cpu3 : { *(.data.cpu3) }
+ . = ALIGN(32); __cpu4 = .; .data.cpu4 : { *(.data.cpu4) }
+ . = ALIGN(32); __cpu5 = .; .data.cpu5 : { *(.data.cpu5) }
+ . = ALIGN(32); __cpu6 = .; .data.cpu6 : { *(.data.cpu6) }
+ . = ALIGN(32); __cpu7 = .; .data.cpu7 : { *(.data.cpu7) }
+ . = ALIGN(32); __cpu8 = .; .data.cpu8 : { *(.data.cpu8) }
+ . = ALIGN(32); __cpu9 = .; .data.cpu9 : { *(.data.cpu9) }
+ . = ALIGN(32); __cpu10 = .; .data.cpu10 : { *(.data.cpu10) }
+ . = ALIGN(32); __cpu11 = .; .data.cpu11 : { *(.data.cpu11) }
+ . = ALIGN(32); __cpu12 = .; .data.cpu12 : { *(.data.cpu12) }
+ . = ALIGN(32); __cpu13 = .; .data.cpu13 : { *(.data.cpu13) }
+ . = ALIGN(32); __cpu14 = .; .data.cpu14 : { *(.data.cpu14) }
+ . = ALIGN(32); __cpu15 = .; .data.cpu15 : { *(.data.cpu15) }
+ . = ALIGN(32); __cpu16 = .; .data.cpu16 : { *(.data.cpu16) }
+ . = ALIGN(32); __cpu17 = .; .data.cpu17 : { *(.data.cpu17) }
+ . = ALIGN(32); __cpu18 = .; .data.cpu18 : { *(.data.cpu18) }
+ . = ALIGN(32); __cpu19 = .; .data.cpu19 : { *(.data.cpu19) }
+ . = ALIGN(32); __cpu20 = .; .data.cpu20 : { *(.data.cpu20) }
+ . = ALIGN(32); __cpu21 = .; .data.cpu21 : { *(.data.cpu21) }
+ . = ALIGN(32); __cpu22 = .; .data.cpu22 : { *(.data.cpu22) }
+ . = ALIGN(32); __cpu23 = .; .data.cpu23 : { *(.data.cpu23) }
+ . = ALIGN(32); __cpu24 = .; .data.cpu24 : { *(.data.cpu24) }
+ . = ALIGN(32); __cpu25 = .; .data.cpu25 : { *(.data.cpu25) }
+ . = ALIGN(32); __cpu26 = .; .data.cpu26 : { *(.data.cpu26) }
+ . = ALIGN(32); __cpu27 = .; .data.cpu27 : { *(.data.cpu27) }
+ . = ALIGN(32); __cpu28 = .; .data.cpu28 : { *(.data.cpu28) }
+ . = ALIGN(32); __cpu29 = .; .data.cpu29 : { *(.data.cpu29) }
+ . = ALIGN(32); __cpu30 = .; .data.cpu30 : { *(.data.cpu30) }
+ . = ALIGN(32); __cpu31 = .; .data.cpu31 : { *(.data.cpu31) }
+
_edata = .;
PROVIDE (edata = .);
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH] 2.5.1-pre5: per-cpu areas
2001-12-05 22:09 [PATCH] 2.5.1-pre5: per-cpu areas Rusty Russell
@ 2001-12-06 7:21 ` Keith Owens
2001-12-06 8:07 ` David S. Miller
2001-12-06 9:18 ` Chris Wedgwood
2 siblings, 0 replies; 30+ messages in thread
From: Keith Owens @ 2001-12-06 7:21 UTC (permalink / raw)
To: Rusty Russell; +Cc: linux-kernel, torvalds
On Thu, 06 Dec 2001 09:09:35 +1100,
Rusty Russell <rusty@rustcorp.com.au> wrote:
>The following patch implements convenient per-cpu areas:
Did you look at PERCPU_ADDR in ia64? Much (all?) of the per cpu data
is in struct cpuinfo_ia64 which is at the same virtual address on all
cpus but with different physical addresses on each cpu. Let the mmu do
the work. S390 does a similar trick, using the Prefixed Save Area
(PSA) which is virtual 0 but different physical addresses on each cpu.
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH] 2.5.1-pre5: per-cpu areas
2001-12-05 22:09 [PATCH] 2.5.1-pre5: per-cpu areas Rusty Russell
2001-12-06 7:21 ` Keith Owens
@ 2001-12-06 8:07 ` David S. Miller
2001-12-06 9:18 ` Chris Wedgwood
2 siblings, 0 replies; 30+ messages in thread
From: David S. Miller @ 2001-12-06 8:07 UTC (permalink / raw)
To: kaos; +Cc: rusty, linux-kernel, torvalds
From: Keith Owens <kaos@ocs.com.au>
Date: Thu, 06 Dec 2001 18:21:25 +1100
Did you look at PERCPU_ADDR in ia64? Much (all?) of the per cpu data
is in struct cpuinfo_ia64 which is at the same virtual address on all
cpus but with different physical addresses on each cpu. Let the mmu do
the work.
What an absolutely aweful waste of a TLB entry.
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH] 2.5.1-pre5: per-cpu areas
2001-12-05 22:09 [PATCH] 2.5.1-pre5: per-cpu areas Rusty Russell
2001-12-06 7:21 ` Keith Owens
2001-12-06 8:07 ` David S. Miller
@ 2001-12-06 9:18 ` Chris Wedgwood
2001-12-07 15:03 ` Pavel Machek
2 siblings, 1 reply; 30+ messages in thread
From: Chris Wedgwood @ 2001-12-06 9:18 UTC (permalink / raw)
To: Rusty Russell; +Cc: linux-kernel, torvalds
On Thu, Dec 06, 2001 at 09:09:35AM +1100, Rusty Russell wrote:
The following patch implements convenient per-cpu areas:
DECLARE_PER_CPU(int myvar);
Where or why do we need this?
--cw
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH] 2.5.1-pre5: per-cpu areas
2001-12-06 9:18 ` Chris Wedgwood
@ 2001-12-07 15:03 ` Pavel Machek
0 siblings, 0 replies; 30+ messages in thread
From: Pavel Machek @ 2001-12-07 15:03 UTC (permalink / raw)
To: Chris Wedgwood; +Cc: Rusty Russell, linux-kernel, torvalds
Hi!
> On Thu, Dec 06, 2001 at 09:09:35AM +1100, Rusty Russell wrote:
>
> The following patch implements convenient per-cpu areas:
> DECLARE_PER_CPU(int myvar);
>
> Where or why do we need this?
DECLARE_PER_CPU(struct task *current). Same for slab quick-alloc
lists, performance counters, ...
Pavel
--
"I do not steal MS software. It is not worth it."
-- Pavel Kankovsky
^ permalink raw reply [flat|nested] 30+ messages in thread
[parent not found: <15504.7958.677592.908691@napali.hpl.hp.com>]
* Re: [PATCH] 2.5.1-pre5: per-cpu areas
[not found] <15504.7958.677592.908691@napali.hpl.hp.com>
@ 2002-03-14 4:37 ` Rusty Russell
2002-03-14 5:05 ` Jeff Garzik
` (2 more replies)
0 siblings, 3 replies; 30+ messages in thread
From: Rusty Russell @ 2002-03-14 4:37 UTC (permalink / raw)
To: davidm; +Cc: linux-kernel, torvalds, rth
In message <15504.7958.677592.908691@napali.hpl.hp.com> you write:
> OK, I see this. Unfortunately, HIDE_RELOC() causes me problems
> because it prevents me from taking the address of a per-CPU variable.
> This is useful when you have a per-CPU structure (e.g., cpu_info).
> Perhaps there should/could be a version of HIDE_RELOC() that doesn't
> dereference the resulting address?
Yep, valid point. Patch below: please play.
> I am also a bit concerned however about aliasing that the compiler
> might not detect. For example, with this code:
>
> this_cpu(foo) = 13;
> per_cpu(foo, 0) = 15;
> printf("foo=%d\n", this_cpu(foo);
>
> might print the wrong value if gcc thinks that the first and second
> assignment never alias each other. Does HIDE_RELOC() take care of
> this also?
I'd be pretty sure the compiler can't assume that. Richard would
know...
> On a side-note, would you mind moving __per_cpu_data from smp.h into
> compiler.h? I'd like to use it in processor.h and from that file, I
> can't include smp.h due to a recursive dependency.
I had to fight a similar #include battle with sched.h recently.
I came out with the conclusion that it's better to create a new
include than shuffle inappropriate stuff into other includes to solve
these kind of issues.
Cheers!
Rusty.
--
Anyone who quotes me in their sig is an idiot. -- Rusty Russell.
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.7-pre1/include/linux/compiler.h working-2.5.7-pre1-nfarp/include/linux/compiler.h
--- linux-2.5.7-pre1/include/linux/compiler.h Fri Mar 8 14:49:29 2002
+++ working-2.5.7-pre1-nfarp/include/linux/compiler.h Thu Mar 14 15:32:38 2002
@@ -13,10 +13,4 @@
#define likely(x) __builtin_expect((x),1)
#define unlikely(x) __builtin_expect((x),0)
-/* This macro obfuscates arithmetic on a variable address so that gcc
- shouldn't recognize the original var, and make assumptions about it */
-#define RELOC_HIDE(var, off) \
- ({ __typeof__(&(var)) __ptr; \
- __asm__ ("" : "=g"(__ptr) : "0"((void *)&(var) + (off))); \
- *__ptr; })
#endif /* __LINUX_COMPILER_H */
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.7-pre1/include/linux/percpu.h working-2.5.7-pre1-nfarp/include/linux/percpu.h
--- linux-2.5.7-pre1/include/linux/percpu.h Thu Jan 1 10:00:00 1970
+++ working-2.5.7-pre1-nfarp/include/linux/percpu.h Thu Mar 14 15:32:44 2002
@@ -0,0 +1,28 @@
+#ifndef __LINUX_PERCPU_H
+#define __LINUX_PERCPU_H
+
+/* This macro obfuscates arithmetic on a variable address so that gcc
+ shouldn't recognize the original var, and make assumptions about it */
+#define RELOC_HIDE(ptr, off) \
+ ({ __typeof__(ptr) __ptr; \
+ __asm__ ("" : "=g"(__ptr) : "0"((void *)(ptr) + (off))); \
+ __ptr; })
+
+#ifdef CONFIG_SMP
+#define __per_cpu_data __attribute__((section(".data.percpu")))
+
+#ifndef __HAVE_ARCH_PER_CPU
+extern unsigned long __per_cpu_offset[NR_CPUS];
+
+/* var is in discarded region: offset to particular copy we want */
+#define per_cpu(var, cpu) (*RELOC_HIDE(&var, per_cpu_offset(cpu)))
+
+#define this_cpu(var) per_cpu(var, smp_processor_id())
+#endif /* !__HAVE_ARCH_PER_CPU */
+#else
+#define __per_cpu_data
+#define per_cpu(var, cpu) var
+#define this_cpu(var) var
+#endif /* CONFIG_SMP */
+
+#endif /* __LINUX_PERCPU_H */
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.7-pre1/include/linux/smp.h working-2.5.7-pre1-nfarp/include/linux/smp.h
--- linux-2.5.7-pre1/include/linux/smp.h Mon Mar 11 14:33:14 2002
+++ working-2.5.7-pre1-nfarp/include/linux/smp.h Thu Mar 14 15:32:23 2002
@@ -72,16 +72,6 @@
#define MSG_RESCHEDULE 0x0003 /* Reschedule request from master CPU*/
#define MSG_CALL_FUNCTION 0x0004 /* Call function on all other CPUs */
-#define __per_cpu_data __attribute__((section(".data.percpu")))
-
-#ifndef __HAVE_ARCH_PER_CPU
-extern unsigned long __per_cpu_offset[NR_CPUS];
-
-/* var is in discarded region: offset to particular copy we want */
-#define per_cpu(var, cpu) RELOC_HIDE(var, per_cpu_offset(cpu))
-
-#define this_cpu(var) per_cpu(var, smp_processor_id())
-#endif /* !__HAVE_ARCH_PER_CPU */
#else /* !SMP */
/*
@@ -101,9 +91,5 @@
#define cpu_online_map 1
static inline void smp_send_reschedule(int cpu) { }
static inline void smp_send_reschedule_all(void) { }
-#define __per_cpu_data
-#define per_cpu(var, cpu) var
-#define this_cpu(var) var
-
#endif
#endif
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH] 2.5.1-pre5: per-cpu areas
2002-03-14 4:37 ` Rusty Russell
@ 2002-03-14 5:05 ` Jeff Garzik
2002-03-14 11:14 ` Rusty Russell
2002-03-14 9:37 ` Richard Henderson
2002-03-14 18:06 ` David Mosberger
2 siblings, 1 reply; 30+ messages in thread
From: Jeff Garzik @ 2002-03-14 5:05 UTC (permalink / raw)
To: Rusty Russell; +Cc: davidm, linux-kernel, torvalds, rth
Rusty Russell wrote:
>In message <15504.7958.677592.908691@napali.hpl.hp.com> you write:
>
>>OK, I see this. Unfortunately, HIDE_RELOC() causes me problems
>>because it prevents me from taking the address of a per-CPU variable.
>>This is useful when you have a per-CPU structure (e.g., cpu_info).
>>Perhaps there should/could be a version of HIDE_RELOC() that doesn't
>>dereference the resulting address?
>>
>
>Yep, valid point. Patch below: please play.
>
>diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.7-pre1/include/linux/compiler.h working-2.5.7-pre1-nfarp/include/linux/compiler.h
>--- linux-2.5.7-pre1/include/linux/compiler.h Fri Mar 8 14:49:29 2002
>+++ working-2.5.7-pre1-nfarp/include/linux/compiler.h Thu Mar 14 15:32:38 2002
>@@ -13,10 +13,4 @@
> #define likely(x) __builtin_expect((x),1)
> #define unlikely(x) __builtin_expect((x),0)
>
>-/* This macro obfuscates arithmetic on a variable address so that gcc
>- shouldn't recognize the original var, and make assumptions about it */
>-#define RELOC_HIDE(var, off) \
>- ({ __typeof__(&(var)) __ptr; \
>- __asm__ ("" : "=g"(__ptr) : "0"((void *)&(var) + (off))); \
>- *__ptr; })
> #endif /* __LINUX_COMPILER_H */
>diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.7-pre1/include/linux/percpu.h working-2.5.7-pre1-nfarp/include/linux/percpu.h
>--- linux-2.5.7-pre1/include/linux/percpu.h Thu Jan 1 10:00:00 1970
>+++ working-2.5.7-pre1-nfarp/include/linux/percpu.h Thu Mar 14 15:32:44 2002
>@@ -0,0 +1,28 @@
>+#ifndef __LINUX_PERCPU_H
>+#define __LINUX_PERCPU_H
>+
>+/* This macro obfuscates arithmetic on a variable address so that gcc
>+ shouldn't recognize the original var, and make assumptions about it */
>+#define RELOC_HIDE(ptr, off) \
>+ ({ __typeof__(ptr) __ptr; \
>+ __asm__ ("" : "=g"(__ptr) : "0"((void *)(ptr) + (off))); \
>+ __ptr; })
>
Your other changes look good, but RELOC_HIDE really does belong in
compiler.h... and percpu.h is a particularly poor choice of destination.
Why not satisfy DavidM's objection by creating (or stating you allow
the creation of) __RELOC_HIDE or similar. Or perhaps call your version
__RELOC_HIDE, if yours is not the normal case?
It really shouldn't be moved from where it belongs, linux/compiler.h...
Jeff
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH] 2.5.1-pre5: per-cpu areas
2002-03-14 5:05 ` Jeff Garzik
@ 2002-03-14 11:14 ` Rusty Russell
2002-03-14 11:26 ` Jeff Garzik
0 siblings, 1 reply; 30+ messages in thread
From: Rusty Russell @ 2002-03-14 11:14 UTC (permalink / raw)
To: Jeff Garzik; +Cc: linux-kernel, torvalds, rth
In message <3C902FA5.5010208@mandrakesoft.com> you write:
> Your other changes look good, but RELOC_HIDE really does belong in
> compiler.h... and percpu.h is a particularly poor choice of destination.
How? compiler.h is for things which vary based on compiler versions.
It was an arbitrary and relatively crappy place to put it: I only put
it there so PPC could use it...
Rusty.
--
Anyone who quotes me in their sig is an idiot. -- Rusty Russell.
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH] 2.5.1-pre5: per-cpu areas
2002-03-14 11:14 ` Rusty Russell
@ 2002-03-14 11:26 ` Jeff Garzik
2002-03-14 12:16 ` Rusty Russell
2002-03-15 1:00 ` Richard Henderson
0 siblings, 2 replies; 30+ messages in thread
From: Jeff Garzik @ 2002-03-14 11:26 UTC (permalink / raw)
To: Rusty Russell; +Cc: linux-kernel, torvalds, rth
Rusty Russell wrote:
>In message <3C902FA5.5010208@mandrakesoft.com> you write:
>
>>Your other changes look good, but RELOC_HIDE really does belong in
>>compiler.h... and percpu.h is a particularly poor choice of destination.
>>
>
>How? compiler.h is for things which vary based on compiler versions.
>
The name "linux/compiler.h" does not imply that to me, nor do the
comments in the file, which are related specifically to __ builtin_expect.
RELOC_HIDE is a potentially general facility (with the caveat below),
that does not seem to directly relate to the name "linux/percpu.h" at
all, except by happenstance due to its origins. Subjectively it seemed
to me that compiler.h was the most appropriate. Maybe kernel.h is a
better choice, in others' eyes. But I think percpu.h is probably the
wrong home.
>
>It was an arbitrary and relatively crappy place to put it: I only put
>it there so PPC could use it...
>
Will other arches -ever- use the macro? If not, include/asm-ppc is a
better place...
Jeff "mountain out of a molehill" Garzik
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH] 2.5.1-pre5: per-cpu areas
2002-03-14 11:26 ` Jeff Garzik
@ 2002-03-14 12:16 ` Rusty Russell
2002-03-14 12:25 ` Jeff Garzik
2002-03-15 1:00 ` Richard Henderson
1 sibling, 1 reply; 30+ messages in thread
From: Rusty Russell @ 2002-03-14 12:16 UTC (permalink / raw)
To: Jeff Garzik; +Cc: linux-kernel, torvalds, rth
In message <3C9088F0.8090602@mandrakesoft.com> you write:
> >It was an arbitrary and relatively crappy place to put it: I only put
> >it there so PPC could use it...
> >
> Will other arches -ever- use the macro? If not, include/asm-ppc is a
> better place...
>
> Jeff "mountain out of a molehill" Garzik
Yes, clearly this is a decision which should not be rushed into. I
suggest long and vigorous debate on linux-kernel, with mentions of
devfs, source control systems...
Honestly, I think "compiler.h" is a really bad grouping if it's for
compiler-specific things: surely things should be grouped by
function. Hence all of compiler.h should be move to kernel.h anyway.
Here's my latest attempt (untested: for reading only), which leaves it
in compiler.h: possession is 9/10 of the law...
Cheers!
Rusty.
--
Anyone who quotes me in their sig is an idiot. -- Rusty Russell.
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.7-pre1/include/asm-generic/percpu.h working-2.5.7-pre1-percpu/include/asm-generic/percpu.h
--- linux-2.5.7-pre1/include/asm-generic/percpu.h Thu Jan 1 10:00:00 1970
+++ working-2.5.7-pre1-percpu/include/asm-generic/percpu.h Thu Mar 14 23:14:17 2002
@@ -0,0 +1,16 @@
+#ifndef _ASM_GENERIC_PERCPU_H_
+#define _ASM_GENERIC_PERCPU_H_
+
+#define __GENERIC_PER_CPU
+#include <linux/compiler.h>
+
+extern unsigned long __per_cpu_offset[NR_CPUS];
+
+/* var is in discarded region: offset to particular copy we want */
+/* You can't take address. Use per_cpu_ptr for that */
+#define per_cpu(var, cpu) ({ *RELOC_HIDE(&var, per_cpu_offset(cpu)); })
+#define per_cpu_ptr(var, cpu) RELOC_HIDE(&var, per_cpu_offset(cpu))
+
+#define this_cpu(var) per_cpu(var, smp_processor_id())
+
+#endif /* _ASM_GENERIC_PERCPU_H_ */
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.7-pre1/include/asm-i386/percpu.h working-2.5.7-pre1-percpu/include/asm-i386/percpu.h
--- linux-2.5.7-pre1/include/asm-i386/percpu.h Thu Jan 1 10:00:00 1970
+++ working-2.5.7-pre1-percpu/include/asm-i386/percpu.h Thu Mar 14 22:59:52 2002
@@ -0,0 +1,8 @@
+#ifndef __ARCH_I386_PERCPU__
+#define __ARCH_I386_PERCPU__
+
+#ifdef CONFIG_SMP
+#include <asm-generic/percpu.h>
+#endif
+
+#endif /* __ARCH_I386_PERCPU__ */
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.7-pre1/include/linux/compiler.h working-2.5.7-pre1-percpu/include/linux/compiler.h
--- linux-2.5.7-pre1/include/linux/compiler.h Fri Mar 8 14:49:29 2002
+++ working-2.5.7-pre1-percpu/include/linux/compiler.h Thu Mar 14 22:54:53 2002
@@ -15,8 +15,8 @@
/* This macro obfuscates arithmetic on a variable address so that gcc
shouldn't recognize the original var, and make assumptions about it */
-#define RELOC_HIDE(var, off) \
- ({ __typeof__(&(var)) __ptr; \
- __asm__ ("" : "=g"(__ptr) : "0"((void *)&(var) + (off))); \
- *__ptr; })
+#define RELOC_HIDE(ptr, off) \
+ ({ __typeof__(ptr) __ptr; \
+ __asm__ ("" : "=g"(__ptr) : "0"((void *)(ptr) + (off))); \
+ __ptr; })
#endif /* __LINUX_COMPILER_H */
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.7-pre1/include/linux/percpu.h working-2.5.7-pre1-percpu/include/linux/percpu.h
--- linux-2.5.7-pre1/include/linux/percpu.h Thu Jan 1 10:00:00 1970
+++ working-2.5.7-pre1-percpu/include/linux/percpu.h Thu Mar 14 22:52:25 2002
@@ -0,0 +1,22 @@
+#ifndef __LINUX_PERCPU_H
+#define __LINUX_PERCPU_H
+#include <linux/compiler.h>
+
+#ifdef CONFIG_SMP
+#define __per_cpu_data __attribute__((section(".data.percpu")))
+
+#ifndef __HAVE_ARCH_PER_CPU
+extern unsigned long __per_cpu_offset[NR_CPUS];
+
+/* var is in discarded region: offset to particular copy we want */
+#define per_cpu(var, cpu) (*RELOC_HIDE(&var, per_cpu_offset(cpu)))
+
+#define this_cpu(var) per_cpu(var, smp_processor_id())
+#endif /* !__HAVE_ARCH_PER_CPU */
+#else
+#define __per_cpu_data
+#define per_cpu(var, cpu) var
+#define this_cpu(var) var
+#endif /* CONFIG_SMP */
+
+#endif /* __LINUX_PERCPU_H */
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.7-pre1/include/linux/smp.h working-2.5.7-pre1-percpu/include/linux/smp.h
--- linux-2.5.7-pre1/include/linux/smp.h Mon Mar 11 14:33:14 2002
+++ working-2.5.7-pre1-percpu/include/linux/smp.h Thu Mar 14 23:15:00 2002
@@ -74,14 +74,6 @@
#define __per_cpu_data __attribute__((section(".data.percpu")))
-#ifndef __HAVE_ARCH_PER_CPU
-extern unsigned long __per_cpu_offset[NR_CPUS];
-
-/* var is in discarded region: offset to particular copy we want */
-#define per_cpu(var, cpu) RELOC_HIDE(var, per_cpu_offset(cpu))
-
-#define this_cpu(var) per_cpu(var, smp_processor_id())
-#endif /* !__HAVE_ARCH_PER_CPU */
#else /* !SMP */
/*
@@ -102,7 +94,8 @@
static inline void smp_send_reschedule(int cpu) { }
static inline void smp_send_reschedule_all(void) { }
#define __per_cpu_data
-#define per_cpu(var, cpu) var
+#define per_cpu(var, cpu) ({ var; })
+#define per_cpu_ptr(var, cpu) (&(var))
#define this_cpu(var) var
#endif
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.7-pre1/init/main.c working-2.5.7-pre1-percpu/init/main.c
--- linux-2.5.7-pre1/init/main.c Fri Mar 8 14:49:30 2002
+++ working-2.5.7-pre1-percpu/init/main.c Thu Mar 14 22:58:06 2002
@@ -275,7 +275,7 @@
}
#else
-#ifndef __HAVE_ARCH_PER_CPU
+#ifdef __GENERIC_PER_CPU
unsigned long __per_cpu_offset[NR_CPUS];
static void __init setup_per_cpu_areas(void)
@@ -297,7 +297,7 @@
memcpy(ptr, __per_cpu_start, size);
}
}
-#endif /* !__HAVE_ARCH_PER_CPU */
+#endif /* __GENERIC_PER_CPU */
/* Called by boot processor to activate the rest. */
static void __init smp_init(void)
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH] 2.5.1-pre5: per-cpu areas
2002-03-14 12:16 ` Rusty Russell
@ 2002-03-14 12:25 ` Jeff Garzik
0 siblings, 0 replies; 30+ messages in thread
From: Jeff Garzik @ 2002-03-14 12:25 UTC (permalink / raw)
To: Rusty Russell; +Cc: linux-kernel, torvalds, rth
Rusty Russell wrote:
>In message <3C9088F0.8090602@mandrakesoft.com> you write:
>
>>>It was an arbitrary and relatively crappy place to put it: I only put
>>>it there so PPC could use it...
>>>
>>Will other arches -ever- use the macro? If not, include/asm-ppc is a
>>better place...
>>
>> Jeff "mountain out of a molehill" Garzik
>>
>
>Yes, clearly this is a decision which should not be rushed into. I
>suggest long and vigorous debate on linux-kernel, with mentions of
>devfs, source control systems...
>
:)
Anyway, the patch looks good to me :)
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH] 2.5.1-pre5: per-cpu areas
2002-03-14 11:26 ` Jeff Garzik
2002-03-14 12:16 ` Rusty Russell
@ 2002-03-15 1:00 ` Richard Henderson
1 sibling, 0 replies; 30+ messages in thread
From: Richard Henderson @ 2002-03-15 1:00 UTC (permalink / raw)
To: Jeff Garzik; +Cc: Rusty Russell, linux-kernel, torvalds
On Thu, Mar 14, 2002 at 06:26:40AM -0500, Jeff Garzik wrote:
> Will other arches -ever- use the macro? If not, include/asm-ppc is a
> better place...
Probably most of them should use it.
r~
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH] 2.5.1-pre5: per-cpu areas
2002-03-14 4:37 ` Rusty Russell
2002-03-14 5:05 ` Jeff Garzik
@ 2002-03-14 9:37 ` Richard Henderson
2002-03-14 18:06 ` David Mosberger
2 siblings, 0 replies; 30+ messages in thread
From: Richard Henderson @ 2002-03-14 9:37 UTC (permalink / raw)
To: Rusty Russell; +Cc: davidm, linux-kernel, torvalds
On Thu, Mar 14, 2002 at 03:37:38PM +1100, Rusty Russell wrote:
> > I am also a bit concerned however about aliasing that the compiler
> > might not detect. For example, with this code:
> >
> > this_cpu(foo) = 13;
> > per_cpu(foo, 0) = 15;
> > printf("foo=%d\n", this_cpu(foo);
> >
> > might print the wrong value if gcc thinks that the first and second
> > assignment never alias each other. Does HIDE_RELOC() take care of
> > this also?
>
> I'd be pretty sure the compiler can't assume that. Richard would
> know...
I can't think of a way your current code is invalid. It's all
hidden behind an asm. The compiler could guess the two addresses
are the same iff smp_processor_id() is the constant 0, aka UP.
> > On a side-note, would you mind moving __per_cpu_data from smp.h into
> > compiler.h? I'd like to use it in processor.h and from that file, I
> > can't include smp.h due to a recursive dependency.
This definitely needs to be per-architecture. On Alpha, I think I
can use the Thread Local Storage model to be added to binutils 2.13
(and potentially compiler support to gcc 3.[23]). IA-64 may be able
to do the same. It's certain that x86 can't, since the userland
model requires %gs:0 point to the thread base, and the kernel folk
would never cotton to the segment swapping that would be needed.
r~
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH] 2.5.1-pre5: per-cpu areas
2002-03-14 4:37 ` Rusty Russell
2002-03-14 5:05 ` Jeff Garzik
2002-03-14 9:37 ` Richard Henderson
@ 2002-03-14 18:06 ` David Mosberger
2 siblings, 0 replies; 30+ messages in thread
From: David Mosberger @ 2002-03-14 18:06 UTC (permalink / raw)
To: Richard Henderson; +Cc: Rusty Russell, davidm, linux-kernel, torvalds
>>>>> On Thu, 14 Mar 2002 01:37:21 -0800, Richard Henderson <rth@twiddle.net> said:
Richard> This definitely needs to be per-architecture. On Alpha, I
Richard> think I can use the Thread Local Storage model to be added
Richard> to binutils 2.13 (and potentially compiler support to gcc
Richard> 3.[23]). IA-64 may be able to do the same. It's certain
Richard> that x86 can't, since the userland model requires %gs:0
Richard> point to the thread base, and the kernel folk would never
Richard> cotton to the segment swapping that would be needed.
Actually, on ia64 I want to use a pinned TLB entry to map the per-CPU
area. This has the advantage that accessing the local version of a
per-CPU variable has zero extra overhead. We have been doing this for
the cpu_info structure for a while now.
--david
^ permalink raw reply [flat|nested] 30+ messages in thread
[parent not found: <15504.7958.677592.908691@napali.hpl.hp.com.suse.lists.linux.kernel>]
end of thread, other threads:[~2002-03-19 17:06 UTC | newest]
Thread overview: 30+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2001-12-05 22:09 [PATCH] 2.5.1-pre5: per-cpu areas Rusty Russell
2001-12-06 7:21 ` Keith Owens
2001-12-06 8:07 ` David S. Miller
2001-12-06 9:18 ` Chris Wedgwood
2001-12-07 15:03 ` Pavel Machek
[not found] <15504.7958.677592.908691@napali.hpl.hp.com>
2002-03-14 4:37 ` Rusty Russell
2002-03-14 5:05 ` Jeff Garzik
2002-03-14 11:14 ` Rusty Russell
2002-03-14 11:26 ` Jeff Garzik
2002-03-14 12:16 ` Rusty Russell
2002-03-14 12:25 ` Jeff Garzik
2002-03-15 1:00 ` Richard Henderson
2002-03-14 9:37 ` Richard Henderson
2002-03-14 18:06 ` David Mosberger
[not found] <15504.7958.677592.908691@napali.hpl.hp.com.suse.lists.linux.kernel>
[not found] ` <E16lMzi-0002bb-00@wagner.rustcorp.com.au.suse.lists.linux.kernel>
2002-03-14 8:39 ` Andi Kleen
2002-03-14 11:09 ` Rusty Russell
2002-03-14 11:14 ` Andi Kleen
2002-03-14 19:48 ` H. Peter Anvin
2002-03-14 18:04 ` David Mosberger
2002-03-14 18:51 ` Andi Kleen
2002-03-15 4:07 ` Rusty Russell
2002-03-15 9:13 ` Andi Kleen
2002-03-17 7:17 ` Rusty Russell
2002-03-18 7:35 ` Andi Kleen
2002-03-19 0:02 ` Rusty Russell
2002-03-19 0:08 ` J.A. Magallon
2002-03-19 0:15 ` Andi Kleen
2002-03-19 17:05 ` Richard Henderson
2002-03-15 4:19 ` David Mosberger
2002-03-15 5:52 ` Rusty Russell
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).