All of lore.kernel.org
 help / color / mirror / Atom feed
* [patch] uninline init_waitqueue_*() functions
@ 2006-07-05  8:49 Ingo Molnar
  2006-07-05  9:31 ` Andrew Morton
  2006-07-05  9:54 ` [patch] uninline init_waitqueue_*() functions, fix Ingo Molnar
  0 siblings, 2 replies; 133+ messages in thread
From: Ingo Molnar @ 2006-07-05  8:49 UTC (permalink / raw)
  To: Andrew Morton, Linus Torvalds; +Cc: linux-kernel, Arjan van de Ven

Subject: uninline init_waitqueue_*() functions
From: Ingo Molnar <mingo@elte.hu>

some wait.h inlines are way too large: init_waitqueue_entry() and
init_waitqueue_func_entry() generate 20-30 bytes of inlined code
per call site, and init_waitqueue_head() is 30-40 bytes (on x86).

allyesconfig vmlinux size delta:

  text            data    bss     dec          filename
  21459355        6286210 4520408 32265973     vmlinux.before
  21424564        6281246 4516912 32222722     vmlinux.after

So 34K (0.16%) of kernel text saved. Not too bad.

(as an added bonus this also removes a lockdep annotation.)

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/wait.h |   29 +++--------------------------
 kernel/wait.c        |   26 ++++++++++++++++++++++++--
 2 files changed, 27 insertions(+), 28 deletions(-)

Index: linux/include/linux/wait.h
===================================================================
--- linux.orig/include/linux/wait.h
+++ linux/include/linux/wait.h
@@ -77,32 +77,9 @@ struct task_struct;
 #define __WAIT_BIT_KEY_INITIALIZER(word, bit)				\
 	{ .flags = word, .bit_nr = bit, }
 
-/*
- * lockdep: we want one lock-class for all waitqueue locks.
- */
-extern struct lock_class_key waitqueue_lock_key;
-
-static inline void init_waitqueue_head(wait_queue_head_t *q)
-{
-	spin_lock_init(&q->lock);
-	lockdep_set_class(&q->lock, &waitqueue_lock_key);
-	INIT_LIST_HEAD(&q->task_list);
-}
-
-static inline void init_waitqueue_entry(wait_queue_t *q, struct task_struct *p)
-{
-	q->flags = 0;
-	q->private = p;
-	q->func = default_wake_function;
-}
-
-static inline void init_waitqueue_func_entry(wait_queue_t *q,
-					wait_queue_func_t func)
-{
-	q->flags = 0;
-	q->private = NULL;
-	q->func = func;
-}
+extern void init_waitqueue_head(wait_queue_head_t *q);
+extern void init_waitqueue_entry(wait_queue_t *q, struct task_struct *p);
+extern void init_waitqueue_func_entry(wait_queue_t *q, wait_queue_func_t func);
 
 static inline int waitqueue_active(wait_queue_head_t *q)
 {
Index: linux/kernel/wait.c
===================================================================
--- linux.orig/kernel/wait.c
+++ linux/kernel/wait.c
@@ -10,9 +10,31 @@
 #include <linux/wait.h>
 #include <linux/hash.h>
 
-struct lock_class_key waitqueue_lock_key;
+void init_waitqueue_head(wait_queue_head_t *q)
+{
+	spin_lock_init(&q->lock);
+	INIT_LIST_HEAD(&q->task_list);
+}
+
+EXPORT_SYMBOL(init_waitqueue_head);
+
+void init_waitqueue_entry(wait_queue_t *q, struct task_struct *p)
+{
+	q->flags = 0;
+	q->private = p;
+	q->func = default_wake_function;
+}
+
+EXPORT_SYMBOL(init_waitqueue_entry);
+
+void init_waitqueue_func_entry(wait_queue_t *q, wait_queue_func_t func)
+{
+	q->flags = 0;
+	q->private = NULL;
+	q->func = func;
+}
 
-EXPORT_SYMBOL(waitqueue_lock_key);
+EXPORT_SYMBOL(init_waitqueue_func_entry);
 
 void fastcall add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
 {

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] uninline init_waitqueue_*() functions
  2006-07-05  8:49 [patch] uninline init_waitqueue_*() functions Ingo Molnar
@ 2006-07-05  9:31 ` Andrew Morton
  2006-07-05  9:32   ` Ingo Molnar
  2006-07-05  9:54 ` [patch] uninline init_waitqueue_*() functions, fix Ingo Molnar
  1 sibling, 1 reply; 133+ messages in thread
From: Andrew Morton @ 2006-07-05  9:31 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: torvalds, linux-kernel, arjan

On Wed, 5 Jul 2006 10:49:14 +0200
Ingo Molnar <mingo@elte.hu> wrote:

> Subject: uninline init_waitqueue_*() functions
> From: Ingo Molnar <mingo@elte.hu>
> 
> some wait.h inlines are way too large: init_waitqueue_entry() and
> init_waitqueue_func_entry() generate 20-30 bytes of inlined code
> per call site, and init_waitqueue_head() is 30-40 bytes (on x86).

20-30 bytes for

> -static inline void init_waitqueue_entry(wait_queue_t *q, struct task_struct *p)
> -{
> -	q->flags = 0;
> -	q->private = p;
> -	q->func = default_wake_function;
> -}

seem too much.

This:

--- a/fs/select.c~a
+++ a/fs/select.c
@@ -129,6 +129,7 @@ static void __pollwait(struct file *filp
 	entry->filp = filp;
 	entry->wait_address = wait_address;
 	init_waitqueue_entry(&entry->wait, current);
+	init_waitqueue_entry(&entry->wait, current);
 	add_wait_queue(wait_address,&entry->wait);
 }
 

Increases fs/select.o by only seven bytes.


And this:

diff -puN fs/select.c~a fs/select.c
--- a/fs/select.c~a
+++ a/fs/select.c
@@ -128,7 +128,7 @@ static void __pollwait(struct file *filp
 	get_file(filp);
 	entry->filp = filp;
 	entry->wait_address = wait_address;
-	init_waitqueue_entry(&entry->wait, current);
+	xxinit_waitqueue_entry(&entry->wait, current);
 	add_wait_queue(wait_address,&entry->wait);
 }
 
_

shrinks fs/select.o by eight bytes.  (More than I expected).  So it does
appear to be a space win, but a pretty slim one.


^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] uninline init_waitqueue_*() functions
  2006-07-05  9:31 ` Andrew Morton
@ 2006-07-05  9:32   ` Ingo Molnar
  2006-07-05  9:53     ` Andrew Morton
  2006-07-05 10:37     ` Christoph Hellwig
  0 siblings, 2 replies; 133+ messages in thread
From: Ingo Molnar @ 2006-07-05  9:32 UTC (permalink / raw)
  To: Andrew Morton; +Cc: torvalds, linux-kernel, arjan


* Andrew Morton <akpm@osdl.org> wrote:

> shrinks fs/select.o by eight bytes.  (More than I expected).  So it 
> does appear to be a space win, but a pretty slim one.

there are 855 calls to these functions in the allyesconfig vmlinux i 
did, and i measured a combined size reduction of 34791 bytes. That 
averages to a 40 bytes win per call site. (on i386.)

	Ingo

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] uninline init_waitqueue_*() functions
  2006-07-05  9:32   ` Ingo Molnar
@ 2006-07-05  9:53     ` Andrew Morton
  2006-07-05 10:26       ` Ingo Molnar
  2006-07-05 10:37     ` Christoph Hellwig
  1 sibling, 1 reply; 133+ messages in thread
From: Andrew Morton @ 2006-07-05  9:53 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: torvalds, linux-kernel, arjan

On Wed, 5 Jul 2006 11:32:59 +0200
Ingo Molnar <mingo@elte.hu> wrote:

> 
> * Andrew Morton <akpm@osdl.org> wrote:
> 
> > shrinks fs/select.o by eight bytes.  (More than I expected).  So it 
> > does appear to be a space win, but a pretty slim one.
> 
> there are 855 calls to these functions in the allyesconfig vmlinux i 
> did, and i measured a combined size reduction of 34791 bytes. That 
> averages to a 40 bytes win per call site. (on i386.)
> 

Yes, but that lumps all three together.  init_waitqueue_head() is obviously
the porky one.  And it's porkier with CONFIG_DEBUG_SPINLOCK and
CONFIG_LOCKDEP, which isn't the case to optimise for.

With the debug options turned off, even init_waitqueue_head() becomes just
three assignments, similar to init_waitqueue_entry() and
init_waitqueue_func_entry().  All pretty marginal.


^ permalink raw reply	[flat|nested] 133+ messages in thread

* [patch] uninline init_waitqueue_*() functions, fix
  2006-07-05  8:49 [patch] uninline init_waitqueue_*() functions Ingo Molnar
  2006-07-05  9:31 ` Andrew Morton
@ 2006-07-05  9:54 ` Ingo Molnar
  1 sibling, 0 replies; 133+ messages in thread
From: Ingo Molnar @ 2006-07-05  9:54 UTC (permalink / raw)
  To: Andrew Morton, Linus Torvalds; +Cc: linux-kernel, Arjan van de Ven

Subject: uninline init_waitqueue_*() functions, fix
From: Ingo Molnar <mingo@elte.hu>

fix lockdep on-stack-completion initializer, now that waitqueue_lock_key 
is private to kernel/wait.c.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86_64/kernel/smpboot.c |    4 +---
 include/linux/completion.h   |    5 ++++-
 2 files changed, 5 insertions(+), 4 deletions(-)

Index: linux/arch/x86_64/kernel/smpboot.c
===================================================================
--- linux.orig/arch/x86_64/kernel/smpboot.c
+++ linux/arch/x86_64/kernel/smpboot.c
@@ -771,12 +771,10 @@ static int __cpuinit do_boot_cpu(int cpu
 	unsigned long start_rip;
 	struct create_idle c_idle = {
 		.cpu = cpu,
-		.done = COMPLETION_INITIALIZER(c_idle.done),
+		.done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done),
 	};
 	DECLARE_WORK(work, do_fork_idle, &c_idle);
 
-	lockdep_set_class(&c_idle.done.wait.lock, &waitqueue_lock_key);
-
 	/* allocate memory for gdts of secondary cpus. Hotplug is considered */
 	if (!cpu_gdt_descr[cpu].address &&
 		!(cpu_gdt_descr[cpu].address = get_zeroed_page(GFP_KERNEL))) {
Index: linux/include/linux/completion.h
===================================================================
--- linux.orig/include/linux/completion.h
+++ linux/include/linux/completion.h
@@ -18,6 +18,9 @@ struct completion {
 #define COMPLETION_INITIALIZER(work) \
 	{ 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) }
 
+#define COMPLETION_INITIALIZER_ONSTACK(work) \
+	({ init_completion(&work); work; })
+
 #define DECLARE_COMPLETION(work) \
 	struct completion work = COMPLETION_INITIALIZER(work)
 
@@ -28,7 +31,7 @@ struct completion {
  */
 #ifdef CONFIG_LOCKDEP
 # define DECLARE_COMPLETION_ONSTACK(work) \
-	struct completion work = ({ init_completion(&work); work; })
+	struct completion work = COMPLETION_INITIALIZER_ONSTACK(work)
 #else
 # define DECLARE_COMPLETION_ONSTACK(work) DECLARE_COMPLETION(work)
 #endif

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] uninline init_waitqueue_*() functions
  2006-07-05  9:53     ` Andrew Morton
@ 2006-07-05 10:26       ` Ingo Molnar
  2006-07-05 11:30         ` Ingo Molnar
  0 siblings, 1 reply; 133+ messages in thread
From: Ingo Molnar @ 2006-07-05 10:26 UTC (permalink / raw)
  To: Andrew Morton; +Cc: torvalds, linux-kernel, arjan


* Andrew Morton <akpm@osdl.org> wrote:

> > > shrinks fs/select.o by eight bytes.  (More than I expected).  So 
> > > it does appear to be a space win, but a pretty slim one.
> > 
> > there are 855 calls to these functions in the allyesconfig vmlinux i 
> > did, and i measured a combined size reduction of 34791 bytes. That 
> > averages to a 40 bytes win per call site. (on i386.)
> > 
> 
> Yes, but that lumps all three together.  init_waitqueue_head() is 
> obviously the porky one.  And it's porkier with CONFIG_DEBUG_SPINLOCK 
> and CONFIG_LOCKDEP, which isn't the case to optimise for.

true. I redid my tests with both lockdep and debug-spinlocks turned off:

  text            data    bss     dec             filename
  21172153        6077270 3081864 30331287        vmlinux.x32.after
  21198222        6077106 3081864 30357192        vmlinux.x32.before

with 851 callsites that's a 30.6 bytes win per call site (total 26K) - 
still not bad at all.

it's a win even on 64-bit:

  text            data    bss     dec             filename
  21237025        6997266 3327600 31561891        vmlinux.x64.after
  21252773        6997090 3327600 31577463        vmlinux.x64.before

with 755 callsites that's still a 20.8 bytes win per call site (total 
15K).

> With the debug options turned off, even init_waitqueue_head() becomes 
> just three assignments, similar to init_waitqueue_entry() and 
> init_waitqueue_func_entry().  All pretty marginal.

but three assignments could mean 3 offsets embedded in the instructions. 
(for init_waitqueue_entry we also embedd the address of 
default_wake_function) Even 3 assignments can add up to a footprint that 
is far from marginal.

The rough rule of thumb for inlining is that anything that is larger 
than one C statement is probably too large for inlining. (but even 
1-line statements might be too fat at times)

	Ingo

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] uninline init_waitqueue_*() functions
  2006-07-05  9:32   ` Ingo Molnar
  2006-07-05  9:53     ` Andrew Morton
@ 2006-07-05 10:37     ` Christoph Hellwig
  2006-07-05 10:44       ` Andrew Morton
  1 sibling, 1 reply; 133+ messages in thread
From: Christoph Hellwig @ 2006-07-05 10:37 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Andrew Morton, torvalds, linux-kernel, arjan

On Wed, Jul 05, 2006 at 11:32:59AM +0200, Ingo Molnar wrote:
> 
> * Andrew Morton <akpm@osdl.org> wrote:
> 
> > shrinks fs/select.o by eight bytes.  (More than I expected).  So it 
> > does appear to be a space win, but a pretty slim one.
> 
> there are 855 calls to these functions in the allyesconfig vmlinux i 
> did, and i measured a combined size reduction of 34791 bytes. That 
> averages to a 40 bytes win per call site. (on i386.)

And more importantly it's a function that's called in slowpathes per
definition.  So saving text sounds like a good idea, how minimal it
may be.

> 
> 	Ingo
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
---end quoted text---

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] uninline init_waitqueue_*() functions
  2006-07-05 10:37     ` Christoph Hellwig
@ 2006-07-05 10:44       ` Andrew Morton
  2006-07-05 10:46         ` Andrew Morton
  2006-07-05 10:47         ` Ingo Molnar
  0 siblings, 2 replies; 133+ messages in thread
From: Andrew Morton @ 2006-07-05 10:44 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: mingo, torvalds, linux-kernel, arjan

On Wed, 5 Jul 2006 11:37:56 +0100
Christoph Hellwig <hch@infradead.org> wrote:

> On Wed, Jul 05, 2006 at 11:32:59AM +0200, Ingo Molnar wrote:
> > 
> > * Andrew Morton <akpm@osdl.org> wrote:
> > 
> > > shrinks fs/select.o by eight bytes.  (More than I expected).  So it 
> > > does appear to be a space win, but a pretty slim one.
> > 
> > there are 855 calls to these functions in the allyesconfig vmlinux i 
> > did, and i measured a combined size reduction of 34791 bytes. That 
> > averages to a 40 bytes win per call site. (on i386.)
> 
> And more importantly it's a function that's called in slowpathes per
> definition.  So saving text sounds like a good idea, how minimal it
> may be.
> 

Well yes - as I said, it's a net win.  But 31 bytes per callsite seems
weird and makes one wonder what's going on.

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] uninline init_waitqueue_*() functions
  2006-07-05 10:44       ` Andrew Morton
@ 2006-07-05 10:46         ` Andrew Morton
  2006-07-05 10:47         ` Ingo Molnar
  1 sibling, 0 replies; 133+ messages in thread
From: Andrew Morton @ 2006-07-05 10:46 UTC (permalink / raw)
  To: hch, mingo, torvalds, linux-kernel, arjan

On Wed, 5 Jul 2006 03:44:41 -0700
Andrew Morton <akpm@osdl.org> wrote:

> On Wed, 5 Jul 2006 11:37:56 +0100
> Christoph Hellwig <hch@infradead.org> wrote:
> 
> > On Wed, Jul 05, 2006 at 11:32:59AM +0200, Ingo Molnar wrote:
> > > 
> > > * Andrew Morton <akpm@osdl.org> wrote:
> > > 
> > > > shrinks fs/select.o by eight bytes.  (More than I expected).  So it 
> > > > does appear to be a space win, but a pretty slim one.
> > > 
> > > there are 855 calls to these functions in the allyesconfig vmlinux i 
> > > did, and i measured a combined size reduction of 34791 bytes. That 
> > > averages to a 40 bytes win per call site. (on i386.)
> > 
> > And more importantly it's a function that's called in slowpathes per
> > definition.  So saving text sounds like a good idea, how minimal it
> > may be.
> > 
> 
> Well yes - as I said, it's a net win.  But 31 bytes per callsite seems
> weird and makes one wonder what's going on.

That's 31 bytes *savings*.  After taking the hit of setting up for and
performing the function call.  Which implies more than 40 bytes per callsite
with the inlined functions.

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] uninline init_waitqueue_*() functions
  2006-07-05 10:44       ` Andrew Morton
  2006-07-05 10:46         ` Andrew Morton
@ 2006-07-05 10:47         ` Ingo Molnar
  1 sibling, 0 replies; 133+ messages in thread
From: Ingo Molnar @ 2006-07-05 10:47 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Christoph Hellwig, torvalds, linux-kernel, arjan


* Andrew Morton <akpm@osdl.org> wrote:

> > > > shrinks fs/select.o by eight bytes.  (More than I expected).  So 
> > > > it does appear to be a space win, but a pretty slim one.
> > > 
> > > there are 855 calls to these functions in the allyesconfig vmlinux 
> > > i did, and i measured a combined size reduction of 34791 bytes. 
> > > That averages to a 40 bytes win per call site. (on i386.)
> > 
> > And more importantly it's a function that's called in slowpathes per 
> > definition.  So saving text sounds like a good idea, how minimal it 
> > may be.
> 
> Well yes - as I said, it's a net win.  But 31 bytes per callsite seems 
> weird and makes one wonder what's going on.

that's how big those instructions are roughly, while with the function 
call we likely have those parameters in registers already, mitigating 
some of the function-call overhead. (these tests were done with REGPARM 
turned on)

some of the assignments are also indirect, necessiating further loads, 
such as:

    INIT_LIST_HEAD(&q->task_list);

i roughly expected a net uninlining win of roughly this magnitude.

	Ingo

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] uninline init_waitqueue_*() functions
  2006-07-05 10:26       ` Ingo Molnar
@ 2006-07-05 11:30         ` Ingo Molnar
  2006-07-05 11:46           ` Ingo Molnar
  0 siblings, 1 reply; 133+ messages in thread
From: Ingo Molnar @ 2006-07-05 11:30 UTC (permalink / raw)
  To: Andrew Morton; +Cc: torvalds, linux-kernel, arjan


* Ingo Molnar <mingo@elte.hu> wrote:

> * Andrew Morton <akpm@osdl.org> wrote:
> 
> > > > shrinks fs/select.o by eight bytes.  (More than I expected).  So 
> > > > it does appear to be a space win, but a pretty slim one.
> > > 
> > > there are 855 calls to these functions in the allyesconfig vmlinux i 
> > > did, and i measured a combined size reduction of 34791 bytes. That 
> > > averages to a 40 bytes win per call site. (on i386.)
> > > 
> > 
> > Yes, but that lumps all three together.  init_waitqueue_head() is 
> > obviously the porky one.  And it's porkier with CONFIG_DEBUG_SPINLOCK 
> > and CONFIG_LOCKDEP, which isn't the case to optimise for.
> 
> true. I redid my tests with both lockdep and debug-spinlocks turned off:
> 
>   text            data    bss     dec             filename
>   21172153        6077270 3081864 30331287        vmlinux.x32.after
>   21198222        6077106 3081864 30357192        vmlinux.x32.before
> 
> with 851 callsites that's a 30.6 bytes win per call site (total 26K) - 
> still not bad at all.

and that was with CONFIG_CC_OPTIMIZE_FOR_SIZE enabled. With 
optimize-for-size disabled the win goes up to 32.6 bytes (total 28K).

	Ingo

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] uninline init_waitqueue_*() functions
  2006-07-05 11:30         ` Ingo Molnar
@ 2006-07-05 11:46           ` Ingo Molnar
  2006-07-05 17:10             ` Andrew Morton
  0 siblings, 1 reply; 133+ messages in thread
From: Ingo Molnar @ 2006-07-05 11:46 UTC (permalink / raw)
  To: Andrew Morton; +Cc: torvalds, linux-kernel, arjan


* Ingo Molnar <mingo@elte.hu> wrote:

> > true. I redid my tests with both lockdep and debug-spinlocks turned off:
> > 
> >   text            data    bss     dec             filename
> >   21172153        6077270 3081864 30331287        vmlinux.x32.after
> >   21198222        6077106 3081864 30357192        vmlinux.x32.before
> > 
> > with 851 callsites that's a 30.6 bytes win per call site (total 26K) - 
> > still not bad at all.
> 
> and that was with CONFIG_CC_OPTIMIZE_FOR_SIZE enabled. With 
> optimize-for-size disabled the win goes up to 32.6 bytes (total 28K).

i also tried a config with the best size settings (disabling 
FRAME_POINTER, enabling CC_OPTIMIZE_FOR_SIZE), and this gives:

  text            data    bss     dec         filename
  20777768        6076042 3081864 29935674    vmlinux.x32.size.before
  20748140        6076178 3081864 29906182    vmlinux.x32.size.after

or a 34.8 bytes win per callsite (29K total).

	Ingo

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] uninline init_waitqueue_*() functions
  2006-07-05 11:46           ` Ingo Molnar
@ 2006-07-05 17:10             ` Andrew Morton
  2006-07-05 19:35               ` Ingo Molnar
  0 siblings, 1 reply; 133+ messages in thread
From: Andrew Morton @ 2006-07-05 17:10 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: torvalds, linux-kernel, arjan

On Wed, 5 Jul 2006 13:46:30 +0200
Ingo Molnar <mingo@elte.hu> wrote:

> 
> * Ingo Molnar <mingo@elte.hu> wrote:
> 
> > > true. I redid my tests with both lockdep and debug-spinlocks turned off:
> > > 
> > >   text            data    bss     dec             filename
> > >   21172153        6077270 3081864 30331287        vmlinux.x32.after
> > >   21198222        6077106 3081864 30357192        vmlinux.x32.before
> > > 
> > > with 851 callsites that's a 30.6 bytes win per call site (total 26K) - 
> > > still not bad at all.
> > 
> > and that was with CONFIG_CC_OPTIMIZE_FOR_SIZE enabled. With 
> > optimize-for-size disabled the win goes up to 32.6 bytes (total 28K).
> 
> i also tried a config with the best size settings (disabling 
> FRAME_POINTER, enabling CC_OPTIMIZE_FOR_SIZE), and this gives:
> 
>   text            data    bss     dec         filename
>   20777768        6076042 3081864 29935674    vmlinux.x32.size.before
>   20748140        6076178 3081864 29906182    vmlinux.x32.size.after
> 
> or a 34.8 bytes win per callsite (29K total).
> 

With gcc-4.1.0 on i686, uninlining those three functions as per the below
patch _increases_ the allnoconfig vmlinux's .text from 833456 bytes to
833728.

Which maybe means it's still worth doing, because a lot of people run with
some debug options nowadays.

But it's a more believable result, and an inexplicable one given your
numbers.  Something's up.



 include/linux/wait.h |   24 +++---------------------
 kernel/fork.c        |   21 +++++++++++++++++++++
 2 files changed, 24 insertions(+), 21 deletions(-)

diff -puN include/linux/wait.h~a include/linux/wait.h
--- a/include/linux/wait.h~a
+++ a/include/linux/wait.h
@@ -82,27 +82,9 @@ struct task_struct;
  */
 extern struct lock_class_key waitqueue_lock_key;
 
-static inline void init_waitqueue_head(wait_queue_head_t *q)
-{
-	spin_lock_init(&q->lock);
-	lockdep_set_class(&q->lock, &waitqueue_lock_key);
-	INIT_LIST_HEAD(&q->task_list);
-}
-
-static inline void init_waitqueue_entry(wait_queue_t *q, struct task_struct *p)
-{
-	q->flags = 0;
-	q->private = p;
-	q->func = default_wake_function;
-}
-
-static inline void init_waitqueue_func_entry(wait_queue_t *q,
-					wait_queue_func_t func)
-{
-	q->flags = 0;
-	q->private = NULL;
-	q->func = func;
-}
+void init_waitqueue_head(wait_queue_head_t *q);
+void init_waitqueue_entry(wait_queue_t *q, struct task_struct *p);
+void init_waitqueue_func_entry(wait_queue_t *q, wait_queue_func_t func);
 
 static inline int waitqueue_active(wait_queue_head_t *q)
 {
diff -puN fs/aio.c~a fs/aio.c
diff -puN kernel/fork.c~a kernel/fork.c
--- a/kernel/fork.c~a
+++ a/kernel/fork.c
@@ -153,6 +153,27 @@ void __init fork_init(unsigned long memp
 		init_task.signal->rlim[RLIMIT_NPROC];
 }
 
+void init_waitqueue_head(wait_queue_head_t *q)
+{
+	spin_lock_init(&q->lock);
+	lockdep_set_class(&q->lock, &waitqueue_lock_key);
+	INIT_LIST_HEAD(&q->task_list);
+}
+
+void init_waitqueue_entry(wait_queue_t *q, struct task_struct *p)
+{
+	q->flags = 0;
+	q->private = p;
+	q->func = default_wake_function;
+}
+
+void init_waitqueue_func_entry(wait_queue_t *q, wait_queue_func_t func)
+{
+	q->flags = 0;
+	q->private = NULL;
+	q->func = func;
+}
+
 static struct task_struct *dup_task_struct(struct task_struct *orig)
 {
 	struct task_struct *tsk;
_


^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] uninline init_waitqueue_*() functions
  2006-07-05 17:10             ` Andrew Morton
@ 2006-07-05 19:35               ` Ingo Molnar
  2006-07-05 20:18                 ` Andrew Morton
  0 siblings, 1 reply; 133+ messages in thread
From: Ingo Molnar @ 2006-07-05 19:35 UTC (permalink / raw)
  To: Andrew Morton; +Cc: torvalds, linux-kernel, arjan


* Andrew Morton <akpm@osdl.org> wrote:

> > > >   text            data    bss     dec             filename
> > > >   21172153        6077270 3081864 30331287        vmlinux.x32.after
> > > >   21198222        6077106 3081864 30357192        vmlinux.x32.before
> > > > 
> > > > with 851 callsites that's a 30.6 bytes win per call site (total 26K) - 
> > > > still not bad at all.
> > > 
> > > and that was with CONFIG_CC_OPTIMIZE_FOR_SIZE enabled. With 
> > > optimize-for-size disabled the win goes up to 32.6 bytes (total 28K).
> > 
> > i also tried a config with the best size settings (disabling 
> > FRAME_POINTER, enabling CC_OPTIMIZE_FOR_SIZE), and this gives:
> > 
> >   text            data    bss     dec         filename
> >   20777768        6076042 3081864 29935674    vmlinux.x32.size.before
> >   20748140        6076178 3081864 29906182    vmlinux.x32.size.after
> > 
> > or a 34.8 bytes win per callsite (29K total).
> > 
> 
> With gcc-4.1.0 on i686, uninlining those three functions as per the 
> below patch _increases_ the allnoconfig vmlinux's .text from 833456 
> bytes to 833728.

that's just the effect of CONFIG_REGPARM and CONFIG_CC_OPTIMIZE_FOR_SIZE 
not being set in an allnoconfig. Once i set them the text size evens 
out:

 431348   60666   27276  519290   7ec7a vmlinux.x32.mini.before
 431359   60666   27276  519301   7ec85 vmlinux.x32.mini.after

compiling without CONFIG_REGPARM on i686 (if you care about text size) 
makes little sense. It penalizes function calls artificially.

	Ingo

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] uninline init_waitqueue_*() functions
  2006-07-05 19:35               ` Ingo Molnar
@ 2006-07-05 20:18                 ` Andrew Morton
  2006-07-05 20:36                   ` Linus Torvalds
  2006-07-05 20:45                   ` [patch] uninline init_waitqueue_*() functions Ingo Molnar
  0 siblings, 2 replies; 133+ messages in thread
From: Andrew Morton @ 2006-07-05 20:18 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: torvalds, linux-kernel, arjan

Ingo Molnar <mingo@elte.hu> wrote:
>
> > > i also tried a config with the best size settings (disabling 
> > > FRAME_POINTER, enabling CC_OPTIMIZE_FOR_SIZE), and this gives:
> > > 
> > >   text            data    bss     dec         filename
> > >   20777768        6076042 3081864 29935674    vmlinux.x32.size.before
> > >   20748140        6076178 3081864 29906182    vmlinux.x32.size.after
> > > 
> > > or a 34.8 bytes win per callsite (29K total).
> > > 
> > 
> > With gcc-4.1.0 on i686, uninlining those three functions as per the 
> > below patch _increases_ the allnoconfig vmlinux's .text from 833456 
> > bytes to 833728.
> 
> that's just the effect of CONFIG_REGPARM and CONFIG_CC_OPTIMIZE_FOR_SIZE 
> not being set in an allnoconfig. Once i set them the text size evens 
> out:
> 
>  431348   60666   27276  519290   7ec7a vmlinux.x32.mini.before
>  431359   60666   27276  519301   7ec85 vmlinux.x32.mini.after
> 
> compiling without CONFIG_REGPARM on i686 (if you care about text size) 
> makes little sense. It penalizes function calls artificially.

OK, but what happened to the 35-bytes-per-callsite saving?

Sorry to keep going on about this, but your numbers seem just too big to
me, and the above confirms that, and I don't know what's happening.

It doesn't seem right that uninlining something like this:

static inline void init_waitqueue_func_entry(wait_queue_t *q,
					wait_queue_func_t func)
{
	q->flags = 0;
	q->private = NULL;
	q->func = func;
}

is going to gain us much code size benefit at all.  Let alone
runtime/codegen benefit.

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] uninline init_waitqueue_*() functions
  2006-07-05 20:18                 ` Andrew Morton
@ 2006-07-05 20:36                   ` Linus Torvalds
  2006-07-05 20:47                     ` Ingo Molnar
  2006-07-05 20:45                   ` [patch] uninline init_waitqueue_*() functions Ingo Molnar
  1 sibling, 1 reply; 133+ messages in thread
From: Linus Torvalds @ 2006-07-05 20:36 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Ingo Molnar, linux-kernel, arjan



On Wed, 5 Jul 2006, Andrew Morton wrote:
> 
> OK, but what happened to the 35-bytes-per-callsite saving?

I really don't think it existed.

Maybe there's something else going on. In particular, I wonder if sections 
like the "debug_loc" fection end up being counted towards text-size? They 
never actually get _loaded_, but they can be absolutely enormous if 
CONFIG_DEBUG_INFO is enabled.

Doing "make allnoconfig" would have turned off not only modules, but also 
indirectly turned off gratuitous debug info bloat like that..

				Linus

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] uninline init_waitqueue_*() functions
  2006-07-05 20:18                 ` Andrew Morton
  2006-07-05 20:36                   ` Linus Torvalds
@ 2006-07-05 20:45                   ` Ingo Molnar
  1 sibling, 0 replies; 133+ messages in thread
From: Ingo Molnar @ 2006-07-05 20:45 UTC (permalink / raw)
  To: Andrew Morton; +Cc: torvalds, linux-kernel, arjan


* Andrew Morton <akpm@osdl.org> wrote:

> Ingo Molnar <mingo@elte.hu> wrote:
> >
> > > > i also tried a config with the best size settings (disabling 
> > > > FRAME_POINTER, enabling CC_OPTIMIZE_FOR_SIZE), and this gives:
> > > > 
> > > >   text            data    bss     dec         filename
> > > >   20777768        6076042 3081864 29935674    vmlinux.x32.size.before
> > > >   20748140        6076178 3081864 29906182    vmlinux.x32.size.after
> > > > 
> > > > or a 34.8 bytes win per callsite (29K total).
> > > > 
> > > 
> > > With gcc-4.1.0 on i686, uninlining those three functions as per the 
> > > below patch _increases_ the allnoconfig vmlinux's .text from 833456 
> > > bytes to 833728.
> > 
> > that's just the effect of CONFIG_REGPARM and CONFIG_CC_OPTIMIZE_FOR_SIZE 
> > not being set in an allnoconfig. Once i set them the text size evens 
> > out:
> > 
> >  431348   60666   27276  519290   7ec7a vmlinux.x32.mini.before
> >  431359   60666   27276  519301   7ec85 vmlinux.x32.mini.after
> > 
> > compiling without CONFIG_REGPARM on i686 (if you care about text size) 
> > makes little sense. It penalizes function calls artificially.
> 
> OK, but what happened to the 35-bytes-per-callsite saving?

there are 3 effects i can see:

firstly, allnoconfig implies SMP off, so the spinlock init in 
init_waitqueue_head() is not done and it becomes a 2-instruction thing.

secondly, the savings depend on the function size. Uninlining from a 
small function (that makes use of the inlined function) can be a loss if 
the function call causes more register clobbering. For large functions 
we clobber all registers anyway so there's no extra stack saving, etc. 

the allnoconfig kernel makes use of these waitqueue ops in smaller 
kernel-core functions as well, where the uninlining is a loss. 
(sleep_on(), etc.) Furthermore, UP kernel tends to decrease function 
sizes too.

larger kernel configs include more non-core subsystems/drivers as well, 
which tend to have larger function sizes. There the uninlining win is 
larger.

there's a third, smaller effect too: gcc manages to do tail-merging of 
the init_waitqueue_head calls, in a handful of cases, further reducing 
the cost. I found no such tail-merges done for the 53 callsites in the 
allnoconfig kernel.

	Ingo

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] uninline init_waitqueue_*() functions
  2006-07-05 20:36                   ` Linus Torvalds
@ 2006-07-05 20:47                     ` Ingo Molnar
  2006-07-05 21:15                       ` Ingo Molnar
  2006-07-05 21:21                       ` Linus Torvalds
  0 siblings, 2 replies; 133+ messages in thread
From: Ingo Molnar @ 2006-07-05 20:47 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Andrew Morton, linux-kernel, arjan


* Linus Torvalds <torvalds@osdl.org> wrote:

> 
> 
> On Wed, 5 Jul 2006, Andrew Morton wrote:
> > 
> > OK, but what happened to the 35-bytes-per-callsite saving?
> 
> I really don't think it existed.
> 
> Maybe there's something else going on. In particular, I wonder if 
> sections like the "debug_loc" fection end up being counted towards 
> text-size? They never actually get _loaded_, but they can be 
> absolutely enormous if CONFIG_DEBUG_INFO is enabled.

i had CONFIG_DEBUG_INFO (and UNWIND_INFO) disabled in all these build 
tests.

	Ingo

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] uninline init_waitqueue_*() functions
  2006-07-05 20:47                     ` Ingo Molnar
@ 2006-07-05 21:15                       ` Ingo Molnar
  2006-07-05 21:21                       ` Linus Torvalds
  1 sibling, 0 replies; 133+ messages in thread
From: Ingo Molnar @ 2006-07-05 21:15 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Andrew Morton, linux-kernel, arjan


* Ingo Molnar <mingo@elte.hu> wrote:

> 
> * Linus Torvalds <torvalds@osdl.org> wrote:
> 
> > 
> > 
> > On Wed, 5 Jul 2006, Andrew Morton wrote:
> > > 
> > > OK, but what happened to the 35-bytes-per-callsite saving?
> > 
> > I really don't think it existed.
> > 
> > Maybe there's something else going on. In particular, I wonder if 
> > sections like the "debug_loc" fection end up being counted towards 
> > text-size? They never actually get _loaded_, but they can be 
> > absolutely enormous if CONFIG_DEBUG_INFO is enabled.
> 
> i had CONFIG_DEBUG_INFO (and UNWIND_INFO) disabled in all these build 
> tests.

hm, maybe KALLSYMS? I had that enabled - disabling it now.

	Ingo

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] uninline init_waitqueue_*() functions
  2006-07-05 20:47                     ` Ingo Molnar
  2006-07-05 21:15                       ` Ingo Molnar
@ 2006-07-05 21:21                       ` Linus Torvalds
  2006-07-05 21:45                         ` Ingo Molnar
  1 sibling, 1 reply; 133+ messages in thread
From: Linus Torvalds @ 2006-07-05 21:21 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Andrew Morton, linux-kernel, arjan



On Wed, 5 Jul 2006, Ingo Molnar wrote:
> 
> i had CONFIG_DEBUG_INFO (and UNWIND_INFO) disabled in all these build 
> tests.

Good, because I just verified: those two together will on their own 
increase "text size" by about 17% for me.

I still think Andrew is right: I don't see how an initializer that should 
basically be three instructions can possibly be 35 bytes larger than a 
function call that should be a minimum of two instructions (argument setup 
in %eax and the actual call - and that's totally ignoring the 
deleterious effects of a function call on register liveness). 

The fact that with allnoconfig the kernel is _smaller_ (but, quite 
franlkly, within the noise) with the inlined version would seem to back up 
Andrews position that it really shouldn't matter.

So I'm left wondering why it matters for you, and what triggers it. Maybe 
there is some secondary issue that could show us an even more interesting 
optimization (or some compiler behaviour that we should try to encourage).

It is, for example, entirely possible that the size reduction is REAL, but 
that it comes from some other interesting source like gcc deciding to not 
inline a function that isn't a leaf function - and that turning those 
init_waitqueue_*() functions into real function calls thus has a secondary 
effect that is the _real_ advantage.

In other words, I think the patch is fine per se, and I wouldn't mind 
applying it at all, but I think it would be good to understand _what_ 
exactly makes for the reduction in size.

In particular, if it is some secondary effect, maybe we'd be better off 
trying to aim for that secondary effect _directly_.

See?

			Linus

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] uninline init_waitqueue_*() functions
  2006-07-05 21:21                       ` Linus Torvalds
@ 2006-07-05 21:45                         ` Ingo Molnar
  2006-07-05 21:58                           ` Randy.Dunlap
  2006-07-05 22:09                           ` Linus Torvalds
  0 siblings, 2 replies; 133+ messages in thread
From: Ingo Molnar @ 2006-07-05 21:45 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Andrew Morton, linux-kernel, arjan


* Linus Torvalds <torvalds@osdl.org> wrote:

> On Wed, 5 Jul 2006, Ingo Molnar wrote:
> > 
> > i had CONFIG_DEBUG_INFO (and UNWIND_INFO) disabled in all these build 
> > tests.
> 
> Good, because I just verified: those two together will on their own 
> increase "text size" by about 17% for me.
> 
> I still think Andrew is right: I don't see how an initializer that 
> should basically be three instructions can possibly be 35 bytes larger 
> than a function call that should be a minimum of two instructions 
> (argument setup in %eax and the actual call - and that's totally 
> ignoring the deleterious effects of a function call on register 
> liveness).
> 
> The fact that with allnoconfig the kernel is _smaller_ (but, quite 
> franlkly, within the noise) with the inlined version would seem to 
> back up Andrews position that it really shouldn't matter.

well, the allnoconfig thing is artificial (and the uninteresting) for a 
number of reasons:

- it has REGPARM disabled which penalizes function calls

- it's UP and hence the inlining cost of init_wait_queue_head() is 
  significantly smaller.

- allnoconfig has smaller average function size - increasing the cost of 
  uninlining

> So I'm left wondering why it matters for you, and what triggers it. 
> Maybe there is some secondary issue that could show us an even more 
> interesting optimization (or some compiler behaviour that we should 
> try to encourage).

yeah, i'd not want to skip over some interesting and still unexplained 
effect either, but 35 bytes isnt all that outlandish and from everything 
i've seen it's a real win. Here is an actual example:

 c0fb6137:       c7 44 24 08 00 00 00    movl   $0x0,0x8(%esp)
 c0fb613e:       00
 c0fb613f:       c7 44 24 08 01 00 00    movl   $0x1,0x8(%esp)
 c0fb6146:       00
 c0fb6147:       c7 43 60 00 00 00 00    movl   $0x0,0x60(%ebx)
 c0fb614e:       8b 44 24 08             mov    0x8(%esp),%eax
 c0fb6152:       89 43 5c                mov    %eax,0x5c(%ebx)
 c0fb6155:       8d 43 64                lea    0x64(%ebx),%eax
 c0fb6158:       89 40 04                mov    %eax,0x4(%eax)
 c0fb615b:       89 43 64                mov    %eax,0x64(%ebx)

versus:

 c0fb070e:       8d 43 5c                lea    0x5c(%ebx),%eax
 c0fb0711:       e8 94 98 18 ff          call   c0139faa <init_waitqueue_head>

so 39 bytes versus 8 bytes - 31 bytes saved. It's a similar win in other 
cases i checked too. (the only exception is for smaller functions that i 
mentioned before: where the parameters are not pre-calculated yet so 
there's no good integration for the function call. In that case it's 
break even, or in some cases a 3-4 bytes loss.)

	Ingo

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] uninline init_waitqueue_*() functions
  2006-07-05 21:45                         ` Ingo Molnar
@ 2006-07-05 21:58                           ` Randy.Dunlap
  2006-07-05 22:00                             ` Ingo Molnar
  2006-07-05 22:09                           ` Linus Torvalds
  1 sibling, 1 reply; 133+ messages in thread
From: Randy.Dunlap @ 2006-07-05 21:58 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: torvalds, akpm, linux-kernel, arjan

On Wed, 5 Jul 2006 23:45:02 +0200 Ingo Molnar wrote:

> 
> * Linus Torvalds <torvalds@osdl.org> wrote:
> 
> > On Wed, 5 Jul 2006, Ingo Molnar wrote:
> > > 
> > > i had CONFIG_DEBUG_INFO (and UNWIND_INFO) disabled in all these build 
> > > tests.
> > 
> > Good, because I just verified: those two together will on their own 
> > increase "text size" by about 17% for me.
> > 
> > I still think Andrew is right: I don't see how an initializer that 
> > should basically be three instructions can possibly be 35 bytes larger 
> > than a function call that should be a minimum of two instructions 
> > (argument setup in %eax and the actual call - and that's totally 
> > ignoring the deleterious effects of a function call on register 
> > liveness).
> > 
> > The fact that with allnoconfig the kernel is _smaller_ (but, quite 
> > franlkly, within the noise) with the inlined version would seem to 
> > back up Andrews position that it really shouldn't matter.
> 
> well, the allnoconfig thing is artificial (and the uninteresting) for a 
> number of reasons:

hm, I'd have to say that allyesconfig is also artificial and the
savings numbers are somewhat uninteresting in that case too.


> - it has REGPARM disabled which penalizes function calls
> 
> - it's UP and hence the inlining cost of init_wait_queue_head() is 
>   significantly smaller.
> 
> - allnoconfig has smaller average function size - increasing the cost of 
>   uninlining
> 
> > So I'm left wondering why it matters for you, and what triggers it. 
> > Maybe there is some secondary issue that could show us an even more 
> > interesting optimization (or some compiler behaviour that we should 
> > try to encourage).
> 
> yeah, i'd not want to skip over some interesting and still unexplained 
> effect either, but 35 bytes isnt all that outlandish and from everything 
> i've seen it's a real win. Here is an actual example:
> 
>  c0fb6137:       c7 44 24 08 00 00 00    movl   $0x0,0x8(%esp)
>  c0fb613e:       00
>  c0fb613f:       c7 44 24 08 01 00 00    movl   $0x1,0x8(%esp)
>  c0fb6146:       00
>  c0fb6147:       c7 43 60 00 00 00 00    movl   $0x0,0x60(%ebx)
>  c0fb614e:       8b 44 24 08             mov    0x8(%esp),%eax
>  c0fb6152:       89 43 5c                mov    %eax,0x5c(%ebx)
>  c0fb6155:       8d 43 64                lea    0x64(%ebx),%eax
>  c0fb6158:       89 40 04                mov    %eax,0x4(%eax)
>  c0fb615b:       89 43 64                mov    %eax,0x64(%ebx)
> 
> versus:
> 
>  c0fb070e:       8d 43 5c                lea    0x5c(%ebx),%eax
>  c0fb0711:       e8 94 98 18 ff          call   c0139faa <init_waitqueue_head>
> 
> so 39 bytes versus 8 bytes - 31 bytes saved. It's a similar win in other 
> cases i checked too. (the only exception is for smaller functions that i 
> mentioned before: where the parameters are not pre-calculated yet so 
> there's no good integration for the function call. In that case it's 
> break even, or in some cases a 3-4 bytes loss.)

---
~Randy

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] uninline init_waitqueue_*() functions
  2006-07-05 21:58                           ` Randy.Dunlap
@ 2006-07-05 22:00                             ` Ingo Molnar
  2006-07-05 22:10                               ` Randy.Dunlap
  2006-07-05 23:15                               ` Adrian Bunk
  0 siblings, 2 replies; 133+ messages in thread
From: Ingo Molnar @ 2006-07-05 22:00 UTC (permalink / raw)
  To: Randy.Dunlap; +Cc: torvalds, akpm, linux-kernel, arjan


* Randy.Dunlap <rdunlap@xenotime.net> wrote:

> > well, the allnoconfig thing is artificial (and the uninteresting) for a 
> > number of reasons:
> 
> hm, I'd have to say that allyesconfig is also artificial and the 
> savings numbers are somewhat uninteresting in that case too.

well the 'allyesconfig' isnt the true allyesconfig but one with most 
debugging options disabled. It is quite close to a typical distro config 
- hence very much relevant. (I wanted to use something that is easy to 
reproduce.) Believe me, for large configs the savings are real.

	Ingo

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] uninline init_waitqueue_*() functions
  2006-07-05 21:45                         ` Ingo Molnar
  2006-07-05 21:58                           ` Randy.Dunlap
@ 2006-07-05 22:09                           ` Linus Torvalds
  2006-07-05 22:30                             ` Ingo Molnar
  2006-07-05 23:11                             ` Linus Torvalds
  1 sibling, 2 replies; 133+ messages in thread
From: Linus Torvalds @ 2006-07-05 22:09 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Andrew Morton, linux-kernel, arjan



On Wed, 5 Jul 2006, Ingo Molnar wrote:
> 
> yeah, i'd not want to skip over some interesting and still unexplained 
> effect either, but 35 bytes isnt all that outlandish and from everything 
> i've seen it's a real win. Here is an actual example:
> 
>  c0fb6137:       c7 44 24 08 00 00 00    movl   $0x0,0x8(%esp)
>  c0fb613e:       00
>  c0fb613f:       c7 44 24 08 01 00 00    movl   $0x1,0x8(%esp)
>  c0fb6146:       00
>  c0fb6147:       c7 43 60 00 00 00 00    movl   $0x0,0x60(%ebx)
>  c0fb614e:       8b 44 24 08             mov    0x8(%esp),%eax
>  c0fb6152:       89 43 5c                mov    %eax,0x5c(%ebx)
>  c0fb6155:       8d 43 64                lea    0x64(%ebx),%eax
>  c0fb6158:       89 40 04                mov    %eax,0x4(%eax)
>  c0fb615b:       89 43 64                mov    %eax,0x64(%ebx)

Ahh, it's _that_ old gcc problem. 

That's actually a different thing. 

Gcc is HORRIBLY BAD at doing the simple

	some_structure = (struct somestruct) { INITIAL };

assignments. It is so ludicrously bad that it's sad. It tends to do that 
as a local "struct somestruct" on the stack that gets initialized, 
followed by a memcpy().

In this case, the problem appears to be the spinlock initialization code.

In other words, I suspect 90% of your improvement was because you got that 
braindamage out of line.

It would be _much_  better to just fix "spin_lock_init()" instead. That 
would help a lot of _other_ users too, not just the waitqueue 
initializations.

Making that a real function (and inline only for the non-debug case, at 
which point it's just a simple and small store) would be much better.

		Linus

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] uninline init_waitqueue_*() functions
  2006-07-05 22:00                             ` Ingo Molnar
@ 2006-07-05 22:10                               ` Randy.Dunlap
  2006-07-05 22:27                                 ` Ingo Molnar
  2006-07-05 23:15                               ` Adrian Bunk
  1 sibling, 1 reply; 133+ messages in thread
From: Randy.Dunlap @ 2006-07-05 22:10 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: torvalds, akpm, linux-kernel, arjan

On Thu, 6 Jul 2006 00:00:09 +0200 Ingo Molnar wrote:

> 
> * Randy.Dunlap <rdunlap@xenotime.net> wrote:
> 
> > > well, the allnoconfig thing is artificial (and the uninteresting) for a 
> > > number of reasons:
> > 
> > hm, I'd have to say that allyesconfig is also artificial and the 
> > savings numbers are somewhat uninteresting in that case too.
> 
> well the 'allyesconfig' isnt the true allyesconfig but one with most 
> debugging options disabled. It is quite close to a typical distro config 
> - hence very much relevant. (I wanted to use something that is easy to 
> reproduce.) Believe me, for large configs the savings are real.

I would expect distros to use something close to all*mod*config
(with some debug options disabled).

---
~Randy

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] uninline init_waitqueue_*() functions
  2006-07-05 22:10                               ` Randy.Dunlap
@ 2006-07-05 22:27                                 ` Ingo Molnar
  0 siblings, 0 replies; 133+ messages in thread
From: Ingo Molnar @ 2006-07-05 22:27 UTC (permalink / raw)
  To: Randy.Dunlap; +Cc: torvalds, akpm, linux-kernel, arjan


* Randy.Dunlap <rdunlap@xenotime.net> wrote:

> > well the 'allyesconfig' isnt the true allyesconfig but one with most 
> > debugging options disabled. It is quite close to a typical distro config 
> > - hence very much relevant. (I wanted to use something that is easy to 
> > reproduce.) Believe me, for large configs the savings are real.
> 
> I would expect distros to use something close to all*mod*config (with 
> some debug options disabled).

that's the same as allyesconfig from an inlining cost POV.

	Ingo

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] uninline init_waitqueue_*() functions
  2006-07-05 22:09                           ` Linus Torvalds
@ 2006-07-05 22:30                             ` Ingo Molnar
  2006-07-05 23:11                             ` Linus Torvalds
  1 sibling, 0 replies; 133+ messages in thread
From: Ingo Molnar @ 2006-07-05 22:30 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Andrew Morton, linux-kernel, arjan


* Linus Torvalds <torvalds@osdl.org> wrote:

> On Wed, 5 Jul 2006, Ingo Molnar wrote:
> > 
> > yeah, i'd not want to skip over some interesting and still unexplained 
> > effect either, but 35 bytes isnt all that outlandish and from everything 
> > i've seen it's a real win. Here is an actual example:
> > 
> >  c0fb6137:       c7 44 24 08 00 00 00    movl   $0x0,0x8(%esp)
> >  c0fb613e:       00
> >  c0fb613f:       c7 44 24 08 01 00 00    movl   $0x1,0x8(%esp)
> >  c0fb6146:       00
> >  c0fb6147:       c7 43 60 00 00 00 00    movl   $0x0,0x60(%ebx)
> >  c0fb614e:       8b 44 24 08             mov    0x8(%esp),%eax
> >  c0fb6152:       89 43 5c                mov    %eax,0x5c(%ebx)
> >  c0fb6155:       8d 43 64                lea    0x64(%ebx),%eax
> >  c0fb6158:       89 40 04                mov    %eax,0x4(%eax)
> >  c0fb615b:       89 43 64                mov    %eax,0x64(%ebx)
> 
> Ahh, it's _that_ old gcc problem. 
> 
> That's actually a different thing. 
> 
> Gcc is HORRIBLY BAD at doing the simple
> 
> 	some_structure = (struct somestruct) { INITIAL };
> 
> assignments. It is so ludicrously bad that it's sad. It tends to do 
> that as a local "struct somestruct" on the stack that gets 
> initialized, followed by a memcpy().
> 
> In this case, the problem appears to be the spinlock initialization 
> code.
> 
> In other words, I suspect 90% of your improvement was because you got 
> that braindamage out of line.
> 
> It would be _much_ better to just fix "spin_lock_init()" instead. That 
> would help a lot of _other_ users too, not just the waitqueue 
> initializations.
> 
> Making that a real function (and inline only for the non-debug case, 
> at which point it's just a simple and small store) would be much 
> better.

in the debug case it's already a function. (by virtue of lockdep) But 
what happens here is CONFIG_PREEMPT and break_lock and thus we get two 
fields which get initialized in that stupid way. I'll try a better 
initialization sequence. This was with gcc 4.0.1.

	Ingo

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] uninline init_waitqueue_*() functions
  2006-07-05 22:09                           ` Linus Torvalds
  2006-07-05 22:30                             ` Ingo Molnar
@ 2006-07-05 23:11                             ` Linus Torvalds
  2006-07-06  8:16                               ` [patch] spinlocks: remove 'volatile' Ingo Molnar
                                                 ` (2 more replies)
  1 sibling, 3 replies; 133+ messages in thread
From: Linus Torvalds @ 2006-07-05 23:11 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Andrew Morton, linux-kernel, arjan



On Wed, 5 Jul 2006, Linus Torvalds wrote:
> > 
> >  c0fb6137:       c7 44 24 08 00 00 00    movl   $0x0,0x8(%esp)
> >  c0fb613e:       00
> >  c0fb613f:       c7 44 24 08 01 00 00    movl   $0x1,0x8(%esp)
> >  c0fb6146:       00
> >  c0fb6147:       c7 43 60 00 00 00 00    movl   $0x0,0x60(%ebx)
> >  c0fb614e:       8b 44 24 08             mov    0x8(%esp),%eax
> >  c0fb6152:       89 43 5c                mov    %eax,0x5c(%ebx)

Btw, this is even worse than usual.

I've seen the "create struct on the stack and copy it" before, but looking 
closer, this is doing something I haven't noticed before: that double 
assignment to the stack is _really_ strange.

I wonder why it first stores a zero to the stack location, and then 
overwrites it with the proper spinlock initialized (one).

As far as I can tell, the spinlock initializer should really end up being 
a perfectly normal "(struct spinlock_t) { 1 }", and I don't see where the 
zero comes from.

It may actually be that we're double penalized because for the lock 
"counter", we use a "volatile unsigned int", and that "0" is from some 
internal gcc "initialize all base structures to zero" to make sure that 
any padding gets zeroed, and then the "volatile" means that gcc ends up 
not optimizing it away, even though it was a totally bogus store that 
didn't even exist in the sources.

I wonder if we should remove the "volatile". There really isn't anything 
_good_ that gcc can do with it, but we've seen gcc code generation do 
stupid things before just because "volatile" seems to just disable even 
proper normal working.

[ Test-case built as time passes ]

Try compiling this example file with "-fomit-frame-pointer -O2 -S" to see 
the effect:

	horrid:
	        subl    $16, %esp
	        movl    $1, 12(%esp)
	        movl    12(%esp), %eax
	        movl    %eax, a1
	        movl    $1, b1
	        addl    $16, %esp
	        ret

	notbad:
	        movl    $1, a2
	        movl    $1, b2
	        ret


I really think that "volatile" in the kernel sources is _always_ a kernel 
bug. It certainly seems to be so in this case.

(But at least with the compiler version I'm using, I'm not seeing that 
extra unnecessary "movl $0" - I have gcc version 4.1.1 here)

Does removing just the "volatile" on the "slock" member shrink the size of 
the kernel noticeably? And do you see an extra "movl $0" in this case with 
your compiler version?

Maybe removing the volatile allows us to keep the initializer the way it 
is (although going by past behaviour, I've seen gcc generate horrible code 
in more complicated settings, so maybe the reason it works well on this 
case without the "volatile" is just that it was simple enough that gcc 
wouldn't mess up?)

		Linus
---
struct duh {
	volatile int i;
};

void horrid(void)
{
	extern struct duh a1;
	extern struct duh b1;

	a1 = (struct duh) { 1 };
	b1.i = 1;
}

struct gaah {
	int i;
};

void notbad(void)
{
	extern struct gaah a2;
	extern struct gaah b2;

	a2 = (struct gaah) { 1 };
	b2.i = 1;
}

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] uninline init_waitqueue_*() functions
  2006-07-05 22:00                             ` Ingo Molnar
  2006-07-05 22:10                               ` Randy.Dunlap
@ 2006-07-05 23:15                               ` Adrian Bunk
  1 sibling, 0 replies; 133+ messages in thread
From: Adrian Bunk @ 2006-07-05 23:15 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Randy.Dunlap, torvalds, akpm, linux-kernel, arjan

On Thu, Jul 06, 2006 at 12:00:09AM +0200, Ingo Molnar wrote:
> 
> * Randy.Dunlap <rdunlap@xenotime.net> wrote:
> 
> > > well, the allnoconfig thing is artificial (and the uninteresting) for a 
> > > number of reasons:
> > 
> > hm, I'd have to say that allyesconfig is also artificial and the 
> > savings numbers are somewhat uninteresting in that case too.
> 
> well the 'allyesconfig' isnt the true allyesconfig but one with most 
> debugging options disabled. It is quite close to a typical distro config 
> - hence very much relevant. (I wanted to use something that is easy to 
> reproduce.) Believe me, for large configs the savings are real.

What is the difference between "most debugging options disabled" and
"all debugging options disabled"?

> 	Ingo

cu
Adrian

-- 

       "Is there not promise of rain?" Ling Tan asked suddenly out
        of the darkness. There had been need of rain for many days.
       "Only a promise," Lao Er said.
                                       Pearl S. Buck - Dragon Seed


^ permalink raw reply	[flat|nested] 133+ messages in thread

* [patch] spinlocks: remove 'volatile'
  2006-07-05 23:11                             ` Linus Torvalds
@ 2006-07-06  8:16                               ` Ingo Molnar
  2006-07-06  8:23                                 ` Ingo Molnar
                                                   ` (3 more replies)
  2006-07-06  8:18                               ` [patch] lockdep: clean up completion initializer in smpboot.c Ingo Molnar
  2006-07-06  8:23                               ` [patch] uninline init_waitqueue_*() functions Ingo Molnar
  2 siblings, 4 replies; 133+ messages in thread
From: Ingo Molnar @ 2006-07-06  8:16 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Andrew Morton, linux-kernel, arjan


* Linus Torvalds <torvalds@osdl.org> wrote:

> I wonder if we should remove the "volatile". There really isn't 
> anything _good_ that gcc can do with it, but we've seen gcc code 
> generation do stupid things before just because "volatile" seems to 
> just disable even proper normal working.

yeah. I tried this and it indeed slashed 42K off text size (0.2%):

 text            data    bss     dec             filename
 20779489        6073834 3075176 29928499        vmlinux.volatile
 20736884        6073834 3075176 29885894        vmlinux.non-volatile

i booted the resulting allyesconfig bzImage and everything seems to be 
working fine. Find patch below.

	Ingo

------------------>
Subject: spinlocks: remove 'volatile'
From: Ingo Molnar <mingo@elte.hu>

remove 'volatile' from the spinlock types, it causes gcc to
generate really bad code. (and it's pointless anyway)

this reduces the non-debug SMP kernel's size by 0.2% (!).

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-i386/spinlock_types.h   |    4 ++--
 include/asm-x86_64/spinlock_types.h |    4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

Index: linux/include/asm-i386/spinlock_types.h
===================================================================
--- linux.orig/include/asm-i386/spinlock_types.h
+++ linux/include/asm-i386/spinlock_types.h
@@ -6,13 +6,13 @@
 #endif
 
 typedef struct {
-	volatile unsigned int slock;
+	unsigned int slock;
 } raw_spinlock_t;
 
 #define __RAW_SPIN_LOCK_UNLOCKED	{ 1 }
 
 typedef struct {
-	volatile unsigned int lock;
+	unsigned int lock;
 } raw_rwlock_t;
 
 #define __RAW_RW_LOCK_UNLOCKED		{ RW_LOCK_BIAS }
Index: linux/include/asm-x86_64/spinlock_types.h
===================================================================
--- linux.orig/include/asm-x86_64/spinlock_types.h
+++ linux/include/asm-x86_64/spinlock_types.h
@@ -6,13 +6,13 @@
 #endif
 
 typedef struct {
-	volatile unsigned int slock;
+	unsigned int slock;
 } raw_spinlock_t;
 
 #define __RAW_SPIN_LOCK_UNLOCKED	{ 1 }
 
 typedef struct {
-	volatile unsigned int lock;
+	unsigned int lock;
 } raw_rwlock_t;
 
 #define __RAW_RW_LOCK_UNLOCKED		{ RW_LOCK_BIAS }

^ permalink raw reply	[flat|nested] 133+ messages in thread

* [patch] lockdep: clean up completion initializer in smpboot.c
  2006-07-05 23:11                             ` Linus Torvalds
  2006-07-06  8:16                               ` [patch] spinlocks: remove 'volatile' Ingo Molnar
@ 2006-07-06  8:18                               ` Ingo Molnar
  2006-07-06  8:23                               ` [patch] uninline init_waitqueue_*() functions Ingo Molnar
  2 siblings, 0 replies; 133+ messages in thread
From: Ingo Molnar @ 2006-07-06  8:18 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Andrew Morton, linux-kernel, arjan

Subject: lockdep: clean up completion initializer in smpboot.c
From: Ingo Molnar <mingo@elte.hu>

clean up lockdep on-stack-completion initializer. (This also removes
the dependency on waitqueue_lock_key.)

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86_64/kernel/smpboot.c |    4 +---
 include/linux/completion.h   |    5 ++++-
 2 files changed, 5 insertions(+), 4 deletions(-)

Index: linux/arch/x86_64/kernel/smpboot.c
===================================================================
--- linux.orig/arch/x86_64/kernel/smpboot.c
+++ linux/arch/x86_64/kernel/smpboot.c
@@ -771,12 +771,10 @@ static int __cpuinit do_boot_cpu(int cpu
 	unsigned long start_rip;
 	struct create_idle c_idle = {
 		.cpu = cpu,
-		.done = COMPLETION_INITIALIZER(c_idle.done),
+		.done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done),
 	};
 	DECLARE_WORK(work, do_fork_idle, &c_idle);
 
-	lockdep_set_class(&c_idle.done.wait.lock, &waitqueue_lock_key);
-
 	/* allocate memory for gdts of secondary cpus. Hotplug is considered */
 	if (!cpu_gdt_descr[cpu].address &&
 		!(cpu_gdt_descr[cpu].address = get_zeroed_page(GFP_KERNEL))) {
Index: linux/include/linux/completion.h
===================================================================
--- linux.orig/include/linux/completion.h
+++ linux/include/linux/completion.h
@@ -18,6 +18,9 @@ struct completion {
 #define COMPLETION_INITIALIZER(work) \
 	{ 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) }
 
+#define COMPLETION_INITIALIZER_ONSTACK(work) \
+	({ init_completion(&work); work; })
+
 #define DECLARE_COMPLETION(work) \
 	struct completion work = COMPLETION_INITIALIZER(work)
 
@@ -28,7 +31,7 @@ struct completion {
  */
 #ifdef CONFIG_LOCKDEP
 # define DECLARE_COMPLETION_ONSTACK(work) \
-	struct completion work = ({ init_completion(&work); work; })
+	struct completion work = COMPLETION_INITIALIZER_ONSTACK(work)
 #else
 # define DECLARE_COMPLETION_ONSTACK(work) DECLARE_COMPLETION(work)
 #endif

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-06  8:16                               ` [patch] spinlocks: remove 'volatile' Ingo Molnar
@ 2006-07-06  8:23                                 ` Ingo Molnar
  2006-07-06  9:27                                 ` Heiko Carstens
                                                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 133+ messages in thread
From: Ingo Molnar @ 2006-07-06  8:23 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Andrew Morton, linux-kernel, arjan


* Ingo Molnar <mingo@elte.hu> wrote:

> * Linus Torvalds <torvalds@osdl.org> wrote:
> 
> > I wonder if we should remove the "volatile". There really isn't 
> > anything _good_ that gcc can do with it, but we've seen gcc code 
> > generation do stupid things before just because "volatile" seems to 
> > just disable even proper normal working.
> 
> yeah. I tried this and it indeed slashed 42K off text size (0.2%):
> 
>  text            data    bss     dec             filename
>  20779489        6073834 3075176 29928499        vmlinux.volatile
>  20736884        6073834 3075176 29885894        vmlinux.non-volatile
> 
> i booted the resulting allyesconfig bzImage and everything seems to be 
> working fine. Find patch below.

btw., this effect accounted for roughly half of the per-callsite win of 
the wait.h uninlining patch. That still leaves 18 bytes of per-callsite 
win - i'll send that patch next.

	Ingo

^ permalink raw reply	[flat|nested] 133+ messages in thread

* [patch] uninline init_waitqueue_*() functions
  2006-07-05 23:11                             ` Linus Torvalds
  2006-07-06  8:16                               ` [patch] spinlocks: remove 'volatile' Ingo Molnar
  2006-07-06  8:18                               ` [patch] lockdep: clean up completion initializer in smpboot.c Ingo Molnar
@ 2006-07-06  8:23                               ` Ingo Molnar
  2006-07-06  9:02                                 ` Andrew Morton
  2 siblings, 1 reply; 133+ messages in thread
From: Ingo Molnar @ 2006-07-06  8:23 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Andrew Morton, linux-kernel, arjan

Subject: uninline init_waitqueue_*() functions
From: Ingo Molnar <mingo@elte.hu>

uninline more wait.h inline functions.

allyesconfig vmlinux size delta:

  text            data    bss     dec          filename
  20736884        6073834 3075176 29885894     vmlinux.before
  20721009        6073966 3075176 29870151     vmlinux.after

~18 bytes per callsite, 15K of text size (~0.1%) saved.

(as an added bonus this also removes a lockdep annotation.)

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/wait.h |   29 +++--------------------------
 kernel/wait.c        |   26 ++++++++++++++++++++++++--
 2 files changed, 27 insertions(+), 28 deletions(-)

Index: linux/include/linux/wait.h
===================================================================
--- linux.orig/include/linux/wait.h
+++ linux/include/linux/wait.h
@@ -77,32 +77,9 @@ struct task_struct;
 #define __WAIT_BIT_KEY_INITIALIZER(word, bit)				\
 	{ .flags = word, .bit_nr = bit, }
 
-/*
- * lockdep: we want one lock-class for all waitqueue locks.
- */
-extern struct lock_class_key waitqueue_lock_key;
-
-static inline void init_waitqueue_head(wait_queue_head_t *q)
-{
-	spin_lock_init(&q->lock);
-	lockdep_set_class(&q->lock, &waitqueue_lock_key);
-	INIT_LIST_HEAD(&q->task_list);
-}
-
-static inline void init_waitqueue_entry(wait_queue_t *q, struct task_struct *p)
-{
-	q->flags = 0;
-	q->private = p;
-	q->func = default_wake_function;
-}
-
-static inline void init_waitqueue_func_entry(wait_queue_t *q,
-					wait_queue_func_t func)
-{
-	q->flags = 0;
-	q->private = NULL;
-	q->func = func;
-}
+extern void init_waitqueue_head(wait_queue_head_t *q);
+extern void init_waitqueue_entry(wait_queue_t *q, struct task_struct *p);
+extern void init_waitqueue_func_entry(wait_queue_t *q, wait_queue_func_t func);
 
 static inline int waitqueue_active(wait_queue_head_t *q)
 {
Index: linux/kernel/wait.c
===================================================================
--- linux.orig/kernel/wait.c
+++ linux/kernel/wait.c
@@ -10,9 +10,31 @@
 #include <linux/wait.h>
 #include <linux/hash.h>
 
-struct lock_class_key waitqueue_lock_key;
+void init_waitqueue_head(wait_queue_head_t *q)
+{
+	spin_lock_init(&q->lock);
+	INIT_LIST_HEAD(&q->task_list);
+}
+
+EXPORT_SYMBOL(init_waitqueue_head);
+
+void init_waitqueue_entry(wait_queue_t *q, struct task_struct *p)
+{
+	q->flags = 0;
+	q->private = p;
+	q->func = default_wake_function;
+}
+
+EXPORT_SYMBOL(init_waitqueue_entry);
+
+void init_waitqueue_func_entry(wait_queue_t *q, wait_queue_func_t func)
+{
+	q->flags = 0;
+	q->private = NULL;
+	q->func = func;
+}
 
-EXPORT_SYMBOL(waitqueue_lock_key);
+EXPORT_SYMBOL(init_waitqueue_func_entry);
 
 void fastcall add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
 {

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] uninline init_waitqueue_*() functions
  2006-07-06  8:23                               ` [patch] uninline init_waitqueue_*() functions Ingo Molnar
@ 2006-07-06  9:02                                 ` Andrew Morton
  2006-07-06  9:12                                   ` [patch] uninline init_waitqueue_head() Ingo Molnar
  0 siblings, 1 reply; 133+ messages in thread
From: Andrew Morton @ 2006-07-06  9:02 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: torvalds, linux-kernel, arjan

On Thu, 6 Jul 2006 10:23:41 +0200
Ingo Molnar <mingo@elte.hu> wrote:

> uninline more wait.h inline functions.
> 
> allyesconfig vmlinux size delta:
> 
>   text            data    bss     dec          filename
>   20736884        6073834 3075176 29885894     vmlinux.before
>   20721009        6073966 3075176 29870151     vmlinux.after

On my x86_64 typicalconfig
(http://www.zip.com.au/~akpm/linux/patches/stuff/config-x)


everything inlined:

   text    data     bss     dec     hex filename
4079169  702440  280184 5061793  4d3ca1 vmlinux

uninline init_waitqueue_head:

4076921  702456  280184 5059561  4d33e9 vmlinux

uninline init_waitqueue_head+init_waitqueue_entry

box:/usr/src/25> size vmlinux
   text    data     bss     dec     hex filename
4077017  702472  280184 5059673  4d3459 vmlinux

uninline init_waitqueue_head+init_waitqueue_entry+init_waitqueue_func_entry

box:/usr/src/25> size vmlinux
   text    data     bss     dec     hex filename
4077128  702496  280184 5059808  4d34e0 vmlinux


So we only want to uninline init_waitqueue_head().

^ permalink raw reply	[flat|nested] 133+ messages in thread

* [patch] uninline init_waitqueue_head()
  2006-07-06  9:02                                 ` Andrew Morton
@ 2006-07-06  9:12                                   ` Ingo Molnar
  0 siblings, 0 replies; 133+ messages in thread
From: Ingo Molnar @ 2006-07-06  9:12 UTC (permalink / raw)
  To: Andrew Morton; +Cc: torvalds, linux-kernel, arjan


* Andrew Morton <akpm@osdl.org> wrote:

> On my x86_64 typicalconfig
> (http://www.zip.com.au/~akpm/linux/patches/stuff/config-x)
> 
> everything inlined:
> 
>    text    data     bss     dec     hex filename
> 4079169  702440  280184 5061793  4d3ca1 vmlinux
> 
> uninline init_waitqueue_head:
> 
> 4076921  702456  280184 5059561  4d33e9 vmlinux
> 
> uninline init_waitqueue_head+init_waitqueue_entry
> 
> box:/usr/src/25> size vmlinux
>    text    data     bss     dec     hex filename
> 4077017  702472  280184 5059673  4d3459 vmlinux
> 
> uninline init_waitqueue_head+init_waitqueue_entry+init_waitqueue_func_entry
> 
> box:/usr/src/25> size vmlinux
>    text    data     bss     dec     hex filename
> 4077128  702496  280184 5059808  4d34e0 vmlinux
> 
> So we only want to uninline init_waitqueue_head().

yeah, i played with that too and concluded that it's a small win on i386
:-) Anyway, updated patch below - i agree that the biggest item is
init_waitqueue_head().

	Ingo

---------------->
Subject: uninline init_waitqueue_head()
From: Ingo Molnar <mingo@elte.hu>

uninline more wait.h inline functions.

allyesconfig vmlinux size delta:

  text            data    bss     dec          filename
  20736884        6073834 3075176 29885894     vmlinux.before
  20721009        6073966 3075176 29870151     vmlinux.after

~18 bytes per callsite, 15K of text size (~0.1%) saved.

(as an added bonus this also removes a lockdep annotation.)

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/wait.h |   12 +-----------
 kernel/wait.c        |    8 ++++++--
 2 files changed, 7 insertions(+), 13 deletions(-)

Index: linux/include/linux/wait.h
===================================================================
--- linux.orig/include/linux/wait.h
+++ linux/include/linux/wait.h
@@ -77,17 +77,7 @@ struct task_struct;
 #define __WAIT_BIT_KEY_INITIALIZER(word, bit)				\
 	{ .flags = word, .bit_nr = bit, }
 
-/*
- * lockdep: we want one lock-class for all waitqueue locks.
- */
-extern struct lock_class_key waitqueue_lock_key;
-
-static inline void init_waitqueue_head(wait_queue_head_t *q)
-{
-	spin_lock_init(&q->lock);
-	lockdep_set_class(&q->lock, &waitqueue_lock_key);
-	INIT_LIST_HEAD(&q->task_list);
-}
+extern void init_waitqueue_head(wait_queue_head_t *q);
 
 static inline void init_waitqueue_entry(wait_queue_t *q, struct task_struct *p)
 {
Index: linux/kernel/wait.c
===================================================================
--- linux.orig/kernel/wait.c
+++ linux/kernel/wait.c
@@ -10,9 +10,13 @@
 #include <linux/wait.h>
 #include <linux/hash.h>
 
-struct lock_class_key waitqueue_lock_key;
+void init_waitqueue_head(wait_queue_head_t *q)
+{
+	spin_lock_init(&q->lock);
+	INIT_LIST_HEAD(&q->task_list);
+}
 
-EXPORT_SYMBOL(waitqueue_lock_key);
+EXPORT_SYMBOL(init_waitqueue_head);
 
 void fastcall add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
 {

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-06  8:16                               ` [patch] spinlocks: remove 'volatile' Ingo Molnar
  2006-07-06  8:23                                 ` Ingo Molnar
@ 2006-07-06  9:27                                 ` Heiko Carstens
  2006-07-06  9:34                                   ` Arjan van de Ven
  2006-07-06 11:59                                 ` linux-os (Dick Johnson)
  2006-07-06 19:56                                 ` Linus Torvalds
  3 siblings, 1 reply; 133+ messages in thread
From: Heiko Carstens @ 2006-07-06  9:27 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Linus Torvalds, Andrew Morton, linux-kernel, arjan

> Subject: spinlocks: remove 'volatile'
> From: Ingo Molnar <mingo@elte.hu>
> 
> remove 'volatile' from the spinlock types, it causes gcc to
> generate really bad code. (and it's pointless anyway)
> 
> this reduces the non-debug SMP kernel's size by 0.2% (!).
> 
> Signed-off-by: Ingo Molnar <mingo@elte.hu>
> ---
>  include/asm-i386/spinlock_types.h   |    4 ++--
>  include/asm-x86_64/spinlock_types.h |    4 ++--
>  2 files changed, 4 insertions(+), 4 deletions(-)
> 
> Index: linux/include/asm-i386/spinlock_types.h
> ===================================================================
> --- linux.orig/include/asm-i386/spinlock_types.h
> +++ linux/include/asm-i386/spinlock_types.h
> @@ -6,13 +6,13 @@
>  #endif
>  
>  typedef struct {
> -	volatile unsigned int slock;
> +	unsigned int slock;
>  } raw_spinlock_t;
>  
>  #define __RAW_SPIN_LOCK_UNLOCKED	{ 1 }
>  
>  typedef struct {
> -	volatile unsigned int lock;
> +	unsigned int lock;
>  } raw_rwlock_t;

Shouldn't the __raw_read_can_lock and __raw_write_can_lock macros be changed too, just
to make sure the value gets read every single time if it's used in a loop?
Just like the __raw_spin_is_locked already has a (volatile signed char * cast)?

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-06  9:27                                 ` Heiko Carstens
@ 2006-07-06  9:34                                   ` Arjan van de Ven
  0 siblings, 0 replies; 133+ messages in thread
From: Arjan van de Ven @ 2006-07-06  9:34 UTC (permalink / raw)
  To: Heiko Carstens; +Cc: Ingo Molnar, Linus Torvalds, Andrew Morton, linux-kernel


> 
> Shouldn't the __raw_read_can_lock and __raw_write_can_lock macros be changed too, just
> to make sure the value gets read every single time if it's used in a loop?
> Just like the __raw_spin_is_locked already has a (volatile signed char * cast)?
> -

it shouldn't get a volatile case imo, just a barrier().
A barrier() at least has well defined semantics, unlike volatile...



^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-06  8:16                               ` [patch] spinlocks: remove 'volatile' Ingo Molnar
  2006-07-06  8:23                                 ` Ingo Molnar
  2006-07-06  9:27                                 ` Heiko Carstens
@ 2006-07-06 11:59                                 ` linux-os (Dick Johnson)
  2006-07-06 12:01                                   ` Arjan van de Ven
  2006-07-06 16:07                                   ` [patch] spinlocks: remove 'volatile' Linus Torvalds
  2006-07-06 19:56                                 ` Linus Torvalds
  3 siblings, 2 replies; 133+ messages in thread
From: linux-os (Dick Johnson) @ 2006-07-06 11:59 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Linus Torvalds, Andrew Morton, linux-kernel, arjan


On Thu, 6 Jul 2006, Ingo Molnar wrote:

>
> * Linus Torvalds <torvalds@osdl.org> wrote:
>
>> I wonder if we should remove the "volatile". There really isn't
>> anything _good_ that gcc can do with it, but we've seen gcc code
>> generation do stupid things before just because "volatile" seems to
>> just disable even proper normal working.

Then GCC must be fixed. The keyword volatile is correct. It should
force the compiler to read the variable every time it's used.

>
> yeah. I tried this and it indeed slashed 42K off text size (0.2%):
>
> text            data    bss     dec             filename
> 20779489        6073834 3075176 29928499        vmlinux.volatile
> 20736884        6073834 3075176 29885894        vmlinux.non-volatile
>
> i booted the resulting allyesconfig bzImage and everything seems to be
> working fine. Find patch below.
>
> 	Ingo
>
> ------------------>
> Subject: spinlocks: remove 'volatile'
> From: Ingo Molnar <mingo@elte.hu>
>
> remove 'volatile' from the spinlock types, it causes gcc to
> generate really bad code. (and it's pointless anyway)
>

This is not pointless. If GCC generates bad code, tell the
GCC people. The volatile keyword is essential.


> this reduces the non-debug SMP kernel's size by 0.2% (!).
>
> Signed-off-by: Ingo Molnar <mingo@elte.hu>
> ---
> include/asm-i386/spinlock_types.h   |    4 ++--
> include/asm-x86_64/spinlock_types.h |    4 ++--
> 2 files changed, 4 insertions(+), 4 deletions(-)
>
> Index: linux/include/asm-i386/spinlock_types.h
> ===================================================================
> --- linux.orig/include/asm-i386/spinlock_types.h
> +++ linux/include/asm-i386/spinlock_types.h
> @@ -6,13 +6,13 @@
> #endif
>
> typedef struct {
> -	volatile unsigned int slock;
> +	unsigned int slock;
> } raw_spinlock_t;
>
> #define __RAW_SPIN_LOCK_UNLOCKED	{ 1 }
>
> typedef struct {
> -	volatile unsigned int lock;
> +	unsigned int lock;
> } raw_rwlock_t;
>
> #define __RAW_RW_LOCK_UNLOCKED		{ RW_LOCK_BIAS }
> Index: linux/include/asm-x86_64/spinlock_types.h
> ===================================================================
> --- linux.orig/include/asm-x86_64/spinlock_types.h
> +++ linux/include/asm-x86_64/spinlock_types.h
> @@ -6,13 +6,13 @@
> #endif
>
> typedef struct {
> -	volatile unsigned int slock;
> +	unsigned int slock;
> } raw_spinlock_t;
>
> #define __RAW_SPIN_LOCK_UNLOCKED	{ 1 }
>
> typedef struct {
> -	volatile unsigned int lock;
> +	unsigned int lock;
> } raw_rwlock_t;
>
> #define __RAW_RW_LOCK_UNLOCKED		{ RW_LOCK_BIAS }
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
>

Cheers,
Dick Johnson
Penguin : Linux version 2.6.16.4 on an i686 machine (5592.88 BogoMips).
New book: http://www.AbominableFirebug.com/
_
\x1a\x04

****************************************************************
The information transmitted in this message is confidential and may be privileged.  Any review, retransmission, dissemination, or other use of this information by persons or entities other than the intended recipient is prohibited.  If you are not the intended recipient, please notify Analogic Corporation immediately - by replying to this message or by sending an email to DeliveryErrors@analogic.com - and destroy all copies of this information, including any attachments, without reading or disclosing them.

Thank you.

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-06 11:59                                 ` linux-os (Dick Johnson)
@ 2006-07-06 12:01                                   ` Arjan van de Ven
  2006-07-06 12:29                                     ` linux-os (Dick Johnson)
  2006-07-06 18:15                                     ` Mark Lord
  2006-07-06 16:07                                   ` [patch] spinlocks: remove 'volatile' Linus Torvalds
  1 sibling, 2 replies; 133+ messages in thread
From: Arjan van de Ven @ 2006-07-06 12:01 UTC (permalink / raw)
  To: linux-os (Dick Johnson)
  Cc: Ingo Molnar, Linus Torvalds, Andrew Morton, linux-kernel

On Thu, 2006-07-06 at 07:59 -0400, linux-os (Dick Johnson) wrote:
> On Thu, 6 Jul 2006, Ingo Molnar wrote:
> 
> >
> > * Linus Torvalds <torvalds@osdl.org> wrote:
> >
> >> I wonder if we should remove the "volatile". There really isn't
> >> anything _good_ that gcc can do with it, but we've seen gcc code
> >> generation do stupid things before just because "volatile" seems to
> >> just disable even proper normal working.
> 
> Then GCC must be fixed. The keyword volatile is correct. It should
> force the compiler to read the variable every time it's used.

this is not really what the C standard says.



> This is not pointless. If GCC generates bad code, tell the
> GCC people. The volatile keyword is essential.

no the "volatile" semantics are vague, trecherous and evil. It's a LOT
better to insert the well defined "barrier()" in the right places.



^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-06 12:01                                   ` Arjan van de Ven
@ 2006-07-06 12:29                                     ` linux-os (Dick Johnson)
  2006-07-06 12:39                                       ` Arjan van de Ven
                                                         ` (2 more replies)
  2006-07-06 18:15                                     ` Mark Lord
  1 sibling, 3 replies; 133+ messages in thread
From: linux-os (Dick Johnson) @ 2006-07-06 12:29 UTC (permalink / raw)
  To: Arjan van de Ven; +Cc: Ingo Molnar, Linus Torvalds, Andrew Morton, linux-kernel


On Thu, 6 Jul 2006, Arjan van de Ven wrote:

> On Thu, 2006-07-06 at 07:59 -0400, linux-os (Dick Johnson) wrote:
>> On Thu, 6 Jul 2006, Ingo Molnar wrote:
>>
>>>
>>> * Linus Torvalds <torvalds@osdl.org> wrote:
>>>
>>>> I wonder if we should remove the "volatile". There really isn't
>>>> anything _good_ that gcc can do with it, but we've seen gcc code
>>>> generation do stupid things before just because "volatile" seems to
>>>> just disable even proper normal working.
>>
>> Then GCC must be fixed. The keyword volatile is correct. It should
>> force the compiler to read the variable every time it's used.
>
> this is not really what the C standard says.
>
>
>
>> This is not pointless. If GCC generates bad code, tell the
>> GCC people. The volatile keyword is essential.
>
> no the "volatile" semantics are vague, trecherous and evil. It's a LOT
> better to insert the well defined "barrier()" in the right places.

Look at:

 	http://en.wikipedia.org/wiki/Volatile_variable

This is just what is needed to prevent the compiler from making non working
code during optimization.

Also look at:

 	http://en.wikipedia.org/wiki/Memory_barrier

This is used to prevent out-of-order execution, not at all what is
necessary.

Also, just because it 'works' means nothing. When SMP processors
came about, we had many drivers with no spin-locks at all. Sometimes
some files had a byte or two changed. That was the only obvious
problem. If this had continued, the entire file system would be
trashed, but fortunately we learned about spin-locks and the
volatile keyword. You must not destroy the spin locks by removing
the volatile keyword.


Cheers,
Dick Johnson
Penguin : Linux version 2.6.16.4 on an i686 machine (5592.88 BogoMips).
New book: http://www.AbominableFirebug.com/
_
\x1a\x04

****************************************************************
The information transmitted in this message is confidential and may be privileged.  Any review, retransmission, dissemination, or other use of this information by persons or entities other than the intended recipient is prohibited.  If you are not the intended recipient, please notify Analogic Corporation immediately - by replying to this message or by sending an email to DeliveryErrors@analogic.com - and destroy all copies of this information, including any attachments, without reading or disclosing them.

Thank you.

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-06 12:29                                     ` linux-os (Dick Johnson)
@ 2006-07-06 12:39                                       ` Arjan van de Ven
  2006-07-06 13:39                                         ` J.A. Magallón
  2006-07-06 16:40                                       ` Nick Piggin
  2006-07-06 23:19                                       ` David Schwartz
  2 siblings, 1 reply; 133+ messages in thread
From: Arjan van de Ven @ 2006-07-06 12:39 UTC (permalink / raw)
  To: linux-os (Dick Johnson)
  Cc: Ingo Molnar, Linus Torvalds, Andrew Morton, linux-kernel

On Thu, 2006-07-06 at 08:29 -0400, linux-os (Dick Johnson) wrote:
> On Thu, 6 Jul 2006, Arjan van de Ven wrote:
> 
> > On Thu, 2006-07-06 at 07:59 -0400, linux-os (Dick Johnson) wrote:
> >> On Thu, 6 Jul 2006, Ingo Molnar wrote:
> >>
> >>>
> >>> * Linus Torvalds <torvalds@osdl.org> wrote:
> >>>
> >>>> I wonder if we should remove the "volatile". There really isn't
> >>>> anything _good_ that gcc can do with it, but we've seen gcc code
> >>>> generation do stupid things before just because "volatile" seems to
> >>>> just disable even proper normal working.
> >>
> >> Then GCC must be fixed. The keyword volatile is correct. It should
> >> force the compiler to read the variable every time it's used.
> >
> > this is not really what the C standard says.
> >
> >
> >
> >> This is not pointless. If GCC generates bad code, tell the
> >> GCC people. The volatile keyword is essential.
> >
> > no the "volatile" semantics are vague, trecherous and evil. It's a LOT
> > better to insert the well defined "barrier()" in the right places.
> 
> Look at:
> 
>  	http://en.wikipedia.org/wiki/Volatile_variable
> 
> This is just what is needed to prevent the compiler from making non working
> code during optimization.

and an entry level document at wikipedia is more important than the C
standard ;)

> 
> Also look at:
> 
>  	http://en.wikipedia.org/wiki/Memory_barrier
> 
> This is used to prevent out-of-order execution, not at all what is
> necessary.

I did not talk about memory barriers. In fact, barrier() is NOT a memory
barrier. It's a compiler optimization barrier!



^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-06 12:39                                       ` Arjan van de Ven
@ 2006-07-06 13:39                                         ` J.A. Magallón
  2006-07-06 13:43                                           ` Arjan van de Ven
                                                             ` (2 more replies)
  0 siblings, 3 replies; 133+ messages in thread
From: J.A. Magallón @ 2006-07-06 13:39 UTC (permalink / raw)
  To: Arjan van de Ven
  Cc: linux-os (Dick Johnson),
	Ingo Molnar, Linus Torvalds, Andrew Morton, linux-kernel

On Thu, 06 Jul 2006 14:39:43 +0200, Arjan van de Ven <arjan@infradead.org> wrote:

> On Thu, 2006-07-06 at 08:29 -0400, linux-os (Dick Johnson) wrote:
> > On Thu, 6 Jul 2006, Arjan van de Ven wrote:
> > 
> > > On Thu, 2006-07-06 at 07:59 -0400, linux-os (Dick Johnson) wrote:
> > >> On Thu, 6 Jul 2006, Ingo Molnar wrote:
> > >>
> > >>>
> > >>> * Linus Torvalds <torvalds@osdl.org> wrote:
> > >>>
> > >>>> I wonder if we should remove the "volatile". There really isn't
> > >>>> anything _good_ that gcc can do with it, but we've seen gcc code
> > >>>> generation do stupid things before just because "volatile" seems to
> > >>>> just disable even proper normal working.
> > >>
> > >> Then GCC must be fixed. The keyword volatile is correct. It should
> > >> force the compiler to read the variable every time it's used.
> > >
> > > this is not really what the C standard says.
> > >
> > >
> > >
> > >> This is not pointless. If GCC generates bad code, tell the
> > >> GCC people. The volatile keyword is essential.
> > >
> > > no the "volatile" semantics are vague, trecherous and evil. It's a LOT
> > > better to insert the well defined "barrier()" in the right places.
> > 
> > Look at:
> > 
> >  	http://en.wikipedia.org/wiki/Volatile_variable
> > 
> > This is just what is needed to prevent the compiler from making non working
> > code during optimization.
> 
> and an entry level document at wikipedia is more important than the C
> standard ;)
> 
> > 
> > Also look at:
> > 
> >  	http://en.wikipedia.org/wiki/Memory_barrier
> > 
> > This is used to prevent out-of-order execution, not at all what is
> > necessary.
> 
> I did not talk about memory barriers. In fact, barrier() is NOT a memory
> barrier. It's a compiler optimization barrier!
> 

// Read 10 samples from 2 A/D converters.

int*	ina;
int	a[10];
int*	inb;
int	b[10];

for (int i=0; i<10; i++)
{
	a[i] = *ina;
	barrier();
	b[i] = *inb;
}

The barrier prevents the compiler of translating this to:

for (int i=0; i<10; i++)
{
	b[i] = *inb;
	a[i] = *ina;
}

or even to:

for (int i=0; i<10; i++)
	a[i] = *ina;
for (int i=0; i<10; i++)
	b[i] = *inb;

but does not prevent it to do this:

register int tmp_a = *ina;
register int tmp_b = *inb;

for (int i=0; i<10; i++)
{
	a[i] = tmp_a;
	b[i] = tmp_b;
}

because nor 'ina' nor 'inb' change under what the compiler sees inside
the loop. 'volatile' prevents the compiler of do a high level cache of
*ina or *inb.

--
J.A. Magallon <jamagallon()ono!com>     \               Software is like sex:
                                         \         It's better when it's free
Mandriva Linux release 2007.0 (Cooker) for i586
Linux 2.6.17-jam01 (gcc 4.1.1 20060518 (prerelease)) #2 SMP PREEMPT Wed

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-06 13:39                                         ` J.A. Magallón
@ 2006-07-06 13:43                                           ` Arjan van de Ven
  2006-07-06 14:05                                           ` Chase Venters
  2006-07-06 14:26                                           ` Andreas Schwab
  2 siblings, 0 replies; 133+ messages in thread
From: Arjan van de Ven @ 2006-07-06 13:43 UTC (permalink / raw)
  To: J.A. Magallón
  Cc: linux-os (Dick Johnson),
	Ingo Molnar, Linus Torvalds, Andrew Morton, linux-kernel


> > I did not talk about memory barriers. In fact, barrier() is NOT a memory
> > barrier. It's a compiler optimization barrier!
> > 
> 
> // Read 10 samples from 2 A/D converters.
> 
> int*	ina;
> int	a[10];
> int*	inb;
> int	b[10];
> 
> for (int i=0; i<10; i++)
> {
> 	a[i] = *ina;
> 	barrier();
> 	b[i] = *inb;
> }
> 
> The barrier prevents the compiler of translating this to:
> 
> for (int i=0; i<10; i++)
> {
> 	b[i] = *inb;
> 	a[i] = *ina;
> }
> 
> or even to:
> 
> for (int i=0; i<10; i++)
> 	a[i] = *ina;
> for (int i=0; i<10; i++)
> 	b[i] = *inb;
> 
> but does not prevent it to do this:

yes it does. It's a full optimization barrier; the compiler assumes all
register and memory content has changed from before the barrier(), and
it will start "fresh".



^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-06 13:39                                         ` J.A. Magallón
  2006-07-06 13:43                                           ` Arjan van de Ven
@ 2006-07-06 14:05                                           ` Chase Venters
  2006-07-06 14:26                                           ` Andreas Schwab
  2 siblings, 0 replies; 133+ messages in thread
From: Chase Venters @ 2006-07-06 14:05 UTC (permalink / raw)
  To: J.A. Magallón
  Cc: Arjan van de Ven, linux-os (Dick Johnson),
	Ingo Molnar, Linus Torvalds, Andrew Morton, linux-kernel

On Thursday 06 July 2006 08:39, J.A. Magallón wrote:
>
> // Read 10 samples from 2 A/D converters.
>
> int*	ina;
> int	a[10];
> int*	inb;
> int	b[10];
>
> for (int i=0; i<10; i++)
> {
> 	a[i] = *ina;
> 	barrier();
> 	b[i] = *inb;
> }
>
> The barrier prevents the compiler of translating this to:
>
> for (int i=0; i<10; i++)
> {
> 	b[i] = *inb;
> 	a[i] = *ina;
> }
>
> or even to:
>
> for (int i=0; i<10; i++)
> 	a[i] = *ina;
> for (int i=0; i<10; i++)
> 	b[i] = *inb;
>
> but does not prevent it to do this:
>
> register int tmp_a = *ina;
> register int tmp_b = *inb;
>
> for (int i=0; i<10; i++)
> {
> 	a[i] = tmp_a;
> 	b[i] = tmp_b;
> }
>
> because nor 'ina' nor 'inb' change under what the compiler sees inside
> the loop. 'volatile' prevents the compiler of do a high level cache of
> *ina or *inb.
>

Check the GCC documentation:

http://gcc.gnu.org/onlinedocs/gcc-4.1.1/gcc/Extended-Asm.html
 
> If your assembler instructions access memory in an unpredictable fashion,
> add `memory' to the list of clobbered registers. This will cause GCC to not
> keep memory values cached in registers across the assembler instruction and
> not optimize stores or loads to that memory. You will also want to add the
> volatile keyword if the memory affected is not listed in the inputs or
> outputs of the asm, as the `memory' clobber does not count as a side-effect
> of the asm. If you know how large the accessed memory is, you can add it as
> input or output but if this is not known, you should add `memory'. As an
> example, if you access ten bytes of a string, you can use a memory input
> like:         

The reference to the volatile keyword here is of course talking about asm 
volatile usage to keep the compiler from optimizing out the seemingly 
pointless assembly (not usage on a variable).

Thanks,
Chase

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-06 13:39                                         ` J.A. Magallón
  2006-07-06 13:43                                           ` Arjan van de Ven
  2006-07-06 14:05                                           ` Chase Venters
@ 2006-07-06 14:26                                           ` Andreas Schwab
  2 siblings, 0 replies; 133+ messages in thread
From: Andreas Schwab @ 2006-07-06 14:26 UTC (permalink / raw)
  To: J.A. Magallón
  Cc: Arjan van de Ven, linux-os (Dick Johnson),
	Ingo Molnar, Linus Torvalds, Andrew Morton, linux-kernel

"J.A. Magallón" <jamagallon@ono.com> writes:

> The barrier prevents the compiler of translating this to:
>
> for (int i=0; i<10; i++)
> {
> 	b[i] = *inb;
> 	a[i] = *ina;
> }
>
> or even to:
>
> for (int i=0; i<10; i++)
> 	a[i] = *ina;
> for (int i=0; i<10; i++)
> 	b[i] = *inb;
>
> but does not prevent it to do this:
>
> register int tmp_a = *ina;
> register int tmp_b = *inb;
>
> for (int i=0; i<10; i++)
> {
> 	a[i] = tmp_a;
> 	b[i] = tmp_b;
> }
>
> because nor 'ina' nor 'inb' change under what the compiler sees inside
> the loop. 'volatile' prevents the compiler of do a high level cache of
> *ina or *inb.

Actually the compiler may not do any of these transformations because *ina
or *inb may alias any of a[i] or b[i].

Andreas.

-- 
Andreas Schwab, SuSE Labs, schwab@suse.de
SuSE Linux Products GmbH, Maxfeldstraße 5, 90409 Nürnberg, Germany
PGP key fingerprint = 58CA 54C7 6D53 942B 1756  01D3 44D5 214B 8276 4ED5
"And now for something completely different."

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-06 11:59                                 ` linux-os (Dick Johnson)
  2006-07-06 12:01                                   ` Arjan van de Ven
@ 2006-07-06 16:07                                   ` Linus Torvalds
  2006-07-06 16:13                                     ` Linus Torvalds
  1 sibling, 1 reply; 133+ messages in thread
From: Linus Torvalds @ 2006-07-06 16:07 UTC (permalink / raw)
  To: linux-os (Dick Johnson); +Cc: Ingo Molnar, Andrew Morton, linux-kernel, arjan



On Thu, 6 Jul 2006, linux-os (Dick Johnson) wrote:
> 
> Then GCC must be fixed. The keyword volatile is correct. It should
> force the compiler to read the variable every time it's used.

No.

"volatile" really _is_ misdesigned. The semantics of it are so unclear as 
to be totally useless. The only thing "volatile" can ever do is generate 
worse code, WITH NO UPSIDES.

Historically (and from the standpoint of the C standard), the definition 
of "volatile" is that any access is "visible" in the machine, and it 
really kind of makes sense for hardware accesses, except these days 
hardware accesses have other rules that are _not_ covered by "volatile", 
so you can't actually use them for that.

And for accesses that have some software rules (ie not IO devices etc), 
the rules for "volatile" are too vague to be useful. 

So if you actually have rules about how to access a particular piece of 
memory, just make those rules _explicit_. Use the real rules. Not 
volatile, because volatile will always do the wrong thing.

Also, more importantly, "volatile" is on the wrong _part_ of the whole 
system. In C, it's "data" that is volatile, but that is insane. Data 
isn't volatile - _accesses_ are volatile. So it may make sense to say 
"make this particular _access_ be careful", but not "make all accesses to 
this data use some random strategy".

So the only thing "volatile" is potentially useful for is:

 - actual accessor functions can use it in a _cast_ to make one particular 
   access follow the rules of "don't cache this one dereference". That is 
   useful as part of a _bigger_ set of rules about that access (ie it 
   might be the internal implementation of a "readb()", for example).

 - for "random number generation" data locations, where you literally 
   don't _have_ any rules except "it's a random number". The only really 
   valid example of this is the "jiffy" timer tick.

Any other use of "volatile" is almost certainly a bug, or just useless. 

It's a bug if the volatile means that you don't follow the proper protocol 
for accessing the data, and it's useless (and generally generates worse 
code) if you already do.

So just say NO! to volatile except under the above circumstances.

			Linus

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-06 16:07                                   ` [patch] spinlocks: remove 'volatile' Linus Torvalds
@ 2006-07-06 16:13                                     ` Linus Torvalds
  2006-07-06 17:04                                       ` Jeff Garzik
                                                         ` (2 more replies)
  0 siblings, 3 replies; 133+ messages in thread
From: Linus Torvalds @ 2006-07-06 16:13 UTC (permalink / raw)
  To: linux-os (Dick Johnson); +Cc: Ingo Molnar, Andrew Morton, linux-kernel, arjan



On Thu, 6 Jul 2006, Linus Torvalds wrote:
> 
> Any other use of "volatile" is almost certainly a bug, or just useless. 

Side note: it's also totally possible that a volatiles _hides_ a bug, ie 
removing the volatile ends up having bad effects, but that's because the 
software itself isn't actually following the rules (or, more commonly, the 
rules are broken, and somebody added "volatile" to hide the problem).

That's not just a theoretical notion, btw. We had _tons_ of these kinds of 
"volatile"s in the original old networking code. They were _all_ wrong. 
Every single one.

			Linus

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-06 12:29                                     ` linux-os (Dick Johnson)
  2006-07-06 12:39                                       ` Arjan van de Ven
@ 2006-07-06 16:40                                       ` Nick Piggin
  2006-07-06 23:19                                       ` David Schwartz
  2 siblings, 0 replies; 133+ messages in thread
From: Nick Piggin @ 2006-07-06 16:40 UTC (permalink / raw)
  To: linux-os (Dick Johnson)
  Cc: Arjan van de Ven, Ingo Molnar, Linus Torvalds, Andrew Morton,
	linux-kernel

linux-os (Dick Johnson) wrote:

>  	http://en.wikipedia.org/wiki/Memory_barrier
> 
> This is used to prevent out-of-order execution, not at all what is
> necessary.

I don't think memory barriers prevent out of order execution,
just out of order loads and stores (which could happen on CPUs
that do in-order execution).

-- 
SUSE Labs, Novell Inc.
Send instant messages to your online friends http://au.messenger.yahoo.com 

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-06 16:13                                     ` Linus Torvalds
@ 2006-07-06 17:04                                       ` Jeff Garzik
  2006-07-06 17:52                                       ` linux-os (Dick Johnson)
  2006-07-06 19:32                                       ` Jan Engelhardt
  2 siblings, 0 replies; 133+ messages in thread
From: Jeff Garzik @ 2006-07-06 17:04 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-os (Dick Johnson), Ingo Molnar, Andrew Morton, linux-kernel, arjan

Linus Torvalds wrote:
> 
> On Thu, 6 Jul 2006, Linus Torvalds wrote:
>> Any other use of "volatile" is almost certainly a bug, or just useless. 
> 
> Side note: it's also totally possible that a volatiles _hides_ a bug, ie 
> removing the volatile ends up having bad effects, but that's because the 
> software itself isn't actually following the rules (or, more commonly, the 
> rules are broken, and somebody added "volatile" to hide the problem).
> 
> That's not just a theoretical notion, btw. We had _tons_ of these kinds of 
> "volatile"s in the original old networking code. They were _all_ wrong. 
> Every single one.

I see precisely what you describe in newly submitted network _drivers_, 
too.  People use volatile to cover up missing barriers; to attempt to 
cover up missing flushes (needing readl after a writel); to hide the 
fact that the driver sometimes uses writel() and sometimes just does a 
direct de-ref into MMIO space.

To my view, seeing "volatile" in code is often a "I was too lazy to 
debug the code" or "I was too lazy to make my code portable" situation.

	Jeff




^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-06 16:13                                     ` Linus Torvalds
  2006-07-06 17:04                                       ` Jeff Garzik
@ 2006-07-06 17:52                                       ` linux-os (Dick Johnson)
  2006-07-06 18:00                                         ` Linus Torvalds
                                                           ` (3 more replies)
  2006-07-06 19:32                                       ` Jan Engelhardt
  2 siblings, 4 replies; 133+ messages in thread
From: linux-os (Dick Johnson) @ 2006-07-06 17:52 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Ingo Molnar, Andrew Morton, linux-kernel, arjan


On Thu, 6 Jul 2006, Linus Torvalds wrote:

>
>
> On Thu, 6 Jul 2006, Linus Torvalds wrote:
>>
>> Any other use of "volatile" is almost certainly a bug, or just useless.
>
> Side note: it's also totally possible that a volatiles _hides_ a bug, ie
> removing the volatile ends up having bad effects, but that's because the
> software itself isn't actually following the rules (or, more commonly, the
> rules are broken, and somebody added "volatile" to hide the problem).
>
> That's not just a theoretical notion, btw. We had _tons_ of these kinds of
> "volatile"s in the original old networking code. They were _all_ wrong.
> Every single one.
>
> 			Linus
>

Linus, you may have been reading too many novels.

If I have some code that does:

extern int spinner;

funct(){
     while(spinner)
         ;

The 'C' compiler has no choice but to actually make that memory access
and read the variable because the variable is in another module (a.k.a.
file).

However, if I have the same code, but the variable is visible during
compile time, i.e.,

int spinner=0;

funct(){
     while(spinner)
         ;

... the compiler may eliminate that code altogether because it
'knows' that spinner is FALSE, having initialized it to zero
itself.

Since spinner is global in scope, somebody surely could have
changed it before funct() was even called, but the current gcc
'C' compiler doesn't care and may optimize it away entirely. To
prevent this, you must declare the variable volatile. To do
otherwise is a bug.

Reading between the lines of your text, you are trying to say
that object 'spinner' should remain an integer, but any access
should be cast, like:

funct() {
     while((volatile)spinner)
         ;

This is just a matter of style. It substitutes a number of casts
for a simple declaration. That said, I think that the current
implementation of 'volatile' is broken because the compiler
seems to believe that the variable has moved! It recalculates
the address of the variable as well as accessing its value.
This is what makes the code generation problematical.


Cheers,
Dick Johnson
Penguin : Linux version 2.6.16.4 on an i686 machine (5592.88 BogoMips).
New book: http://www.AbominableFirebug.com/
_
\x1a\x04

****************************************************************
The information transmitted in this message is confidential and may be privileged.  Any review, retransmission, dissemination, or other use of this information by persons or entities other than the intended recipient is prohibited.  If you are not the intended recipient, please notify Analogic Corporation immediately - by replying to this message or by sending an email to DeliveryErrors@analogic.com - and destroy all copies of this information, including any attachments, without reading or disclosing them.

Thank you.

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-06 17:52                                       ` linux-os (Dick Johnson)
@ 2006-07-06 18:00                                         ` Linus Torvalds
  2006-07-06 21:02                                           ` J.A. Magallón
  2006-07-06 18:10                                         ` Michael Buesch
                                                           ` (2 subsequent siblings)
  3 siblings, 1 reply; 133+ messages in thread
From: Linus Torvalds @ 2006-07-06 18:00 UTC (permalink / raw)
  To: linux-os (Dick Johnson); +Cc: Ingo Molnar, Andrew Morton, linux-kernel, arjan



On Thu, 6 Jul 2006, linux-os (Dick Johnson) wrote:
> 
> Linus, you may have been reading too many novels.
> 
> If I have some code that does:
> 
> extern int spinner;
> 
> funct(){
>      while(spinner)
>          ;
> 
> The 'C' compiler has no choice but to actually make that memory access
> and read the variable because the variable is in another module (a.k.a.
> file).

You don't know how C works, do you?

You also have no idea of what out-of-order memory accesses do to OS code, 
right?

THE FACT IS, "volatile" IS USELESS, BADLY DEFINED, AND AN ALMOST 
COMPLETELY SURE SIGN OF BUGS.

Go on, do your own OS, and try to use "volatile" in it as the 
serialization abstraction. I personally will guarantee that you will fail. 
But hey, you can prove me wrong.

In the meantime, in Linux, "volatile" is considered a bug in any but the 
two special cases I already mentioned.

			Linus

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-06 17:52                                       ` linux-os (Dick Johnson)
  2006-07-06 18:00                                         ` Linus Torvalds
@ 2006-07-06 18:10                                         ` Michael Buesch
  2006-07-06 18:16                                         ` Chase Venters
  2006-07-07 18:16                                         ` Krzysztof Halasa
  3 siblings, 0 replies; 133+ messages in thread
From: Michael Buesch @ 2006-07-06 18:10 UTC (permalink / raw)
  To: linux-os (Dick Johnson)
  Cc: Ingo Molnar, Andrew Morton, linux-kernel, arjan, Linus Torvalds

On Thursday 06 July 2006 19:52, linux-os (Dick Johnson) wrote:
> int spinner=0;
> 
> funct(){
>      while(spinner)
		barrier();

-- 
Greetings Michael.

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-06 12:01                                   ` Arjan van de Ven
  2006-07-06 12:29                                     ` linux-os (Dick Johnson)
@ 2006-07-06 18:15                                     ` Mark Lord
  2006-07-06 19:15                                       ` Linus Torvalds
  1 sibling, 1 reply; 133+ messages in thread
From: Mark Lord @ 2006-07-06 18:15 UTC (permalink / raw)
  To: Arjan van de Ven
  Cc: linux-os (Dick Johnson),
	Ingo Molnar, Linus Torvalds, Andrew Morton, linux-kernel

Arjan van de Ven wrote:
>
> this is not really what the C standard says.

..and now half of LKML scurries to find a copy of the ANSI C specs..

I'm still browsing a copy here, but so far have only really found this:

  A volatile declaration may be used to describe an object corresponding
  to a memory-mapped input/output port or an object accessed by an
  aysnchronously interrupting function.  Actions on objects so declared
  shall not be "optimized out" by an implementation or reordered except
  as permitted by the rules for evaluating expressions.

Still lots of document left to go through, so I suppose it might
contradict itself later on.

And of course this is only a language spec, not Real Life (tm).

Cheers


^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-06 17:52                                       ` linux-os (Dick Johnson)
  2006-07-06 18:00                                         ` Linus Torvalds
  2006-07-06 18:10                                         ` Michael Buesch
@ 2006-07-06 18:16                                         ` Chase Venters
  2006-07-07 18:16                                         ` Krzysztof Halasa
  3 siblings, 0 replies; 133+ messages in thread
From: Chase Venters @ 2006-07-06 18:16 UTC (permalink / raw)
  To: linux-os \\(Dick Johnson\\)
  Cc: Linus Torvalds, Ingo Molnar, Andrew Morton, linux-kernel, arjan

On Thu, 6 Jul 2006, linux-os \(Dick Johnson\) wrote:

>
> On Thu, 6 Jul 2006, Linus Torvalds wrote:
>
>>
>>
>> On Thu, 6 Jul 2006, Linus Torvalds wrote:
>>>
>>> Any other use of "volatile" is almost certainly a bug, or just useless.
>>
>> Side note: it's also totally possible that a volatiles _hides_ a bug, ie
>> removing the volatile ends up having bad effects, but that's because the
>> software itself isn't actually following the rules (or, more commonly, the
>> rules are broken, and somebody added "volatile" to hide the problem).
>>
>> That's not just a theoretical notion, btw. We had _tons_ of these kinds of
>> "volatile"s in the original old networking code. They were _all_ wrong.
>> Every single one.
>>
>> 			Linus
>>
>
> Linus, you may have been reading too many novels.
>
> If I have some code that does:
>
> extern int spinner;
>
> funct(){
>     while(spinner)
>         ;
>
> The 'C' compiler has no choice but to actually make that memory access
> and read the variable because the variable is in another module (a.k.a.
> file).
>
> However, if I have the same code, but the variable is visible during
> compile time, i.e.,
>
> int spinner=0;
>
> funct(){
>     while(spinner)
>         ;
>
> ... the compiler may eliminate that code altogether because it
> 'knows' that spinner is FALSE, having initialized it to zero
> itself.
>
> Since spinner is global in scope, somebody surely could have
> changed it before funct() was even called, but the current gcc
> 'C' compiler doesn't care and may optimize it away entirely. To
> prevent this, you must declare the variable volatile. To do
> otherwise is a bug.

No. The compiler can only optimize away that loop if spinner is provably 
const. Otherwise, _all_ non-const global variables would need to be 
declared 'volatile' because (as you imply) the compiler could forever assume their 
initial state has not changed.

Try it. Examine the -s. The loop is always present for me, even with -O3. 
Now, you'll notice that without a barrier, the value first loaded into the register
is never reloaded, making an infinite loop. But we know the compiler makes 
such optimizations, which is why we tell the compiler that the value 
should not be cached across the magic (barrier()) line.

> Reading between the lines of your text, you are trying to say
> that object 'spinner' should remain an integer, but any access
> should be cast, like:
>
> funct() {
>     while((volatile)spinner)
>         ;
>
> This is just a matter of style. It substitutes a number of casts
> for a simple declaration. That said, I think that the current
> implementation of 'volatile' is broken because the compiler
> seems to believe that the variable has moved! It recalculates
> the address of the variable as well as accessing its value.
> This is what makes the code generation problematical.
>

Thanks,
Chase

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-06 18:15                                     ` Mark Lord
@ 2006-07-06 19:15                                       ` Linus Torvalds
  2006-07-06 19:33                                         ` Chris Friesen
  2006-07-08  8:40                                         ` Avi Kivity
  0 siblings, 2 replies; 133+ messages in thread
From: Linus Torvalds @ 2006-07-06 19:15 UTC (permalink / raw)
  To: Mark Lord
  Cc: Arjan van de Ven, linux-os (Dick Johnson),
	Ingo Molnar, Andrew Morton, linux-kernel



On Thu, 6 Jul 2006, Mark Lord wrote:
>
> I'm still browsing a copy here, but so far have only really found this:
> 
>  A volatile declaration may be used to describe an object corresponding
>  to a memory-mapped input/output port or an object accessed by an
>  aysnchronously interrupting function.  Actions on objects so declared
>  shall not be "optimized out" by an implementation or reordered except
>  as permitted by the rules for evaluating expressions.

Note that the "reordered" is totally pointless.

The _hardware_ will re-order accesses. Which is the whole point. 
"volatile" is basically never sufficient in itself.

So the definition of "volatile" literally made sense three or four decades 
ago. It's not sensible any more.

			Linus

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-06 16:13                                     ` Linus Torvalds
  2006-07-06 17:04                                       ` Jeff Garzik
  2006-07-06 17:52                                       ` linux-os (Dick Johnson)
@ 2006-07-06 19:32                                       ` Jan Engelhardt
  2006-07-06 20:26                                         ` Jeremy Fitzhardinge
  2 siblings, 1 reply; 133+ messages in thread
From: Jan Engelhardt @ 2006-07-06 19:32 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-os (Dick Johnson), Ingo Molnar, Andrew Morton, linux-kernel, arjan

>> Any other use of "volatile" is almost certainly a bug, or just useless. 
>
>That's not just a theoretical notion, btw. We had _tons_ of these kinds of 
>"volatile"s in the original old networking code. They were _all_ wrong. 
>Every single one.
>

$ find linux-2.6.17 -type f -iname '*.[ch]' -print0 | xargs -0 grep 
volatile | wc -l
13948

Tough job.


Jan Engelhardt
-- 

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-06 19:15                                       ` Linus Torvalds
@ 2006-07-06 19:33                                         ` Chris Friesen
  2006-07-06 19:37                                           ` Mark Lord
                                                             ` (4 more replies)
  2006-07-08  8:40                                         ` Avi Kivity
  1 sibling, 5 replies; 133+ messages in thread
From: Chris Friesen @ 2006-07-06 19:33 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Mark Lord, Arjan van de Ven, linux-os (Dick Johnson),
	Ingo Molnar, Andrew Morton, linux-kernel

Linus Torvalds wrote:

> On Thu, 6 Jul 2006, Mark Lord wrote:

>> A volatile declaration may be used to describe an object corresponding
>> to a memory-mapped input/output port or an object accessed by an
>> aysnchronously interrupting function.  Actions on objects so declared
>> shall not be "optimized out" by an implementation or reordered except
>> as permitted by the rules for evaluating expressions.
> 
> 
> Note that the "reordered" is totally pointless.
> 
> The _hardware_ will re-order accesses. Which is the whole point. 
> "volatile" is basically never sufficient in itself.

The "reordered" thing really only matters on SMP machines, no?  In which 
case (for userspace) the locking mechanisms (mutexes, etc.) should do 
The Right Thing to ensure visibility between cpus.

The C standard requires the use of volatile for signal handlers and setjmp.

For userspace at least the whole discussion of "barriers" is sort of 
moot--there are no memory barriers defined in the C language, which 
makes it kind of hard to write portable code that uses them.

Only a couple months ago Dave Miller mentioned setting variables 
modified in signal handlers as "volatile", and you didn't complain.  If 
fact you added that they should be of type "sigatomic_t".

(Pardon the long URL)

http://groups.google.com/group/linux.kernel/browse_frm/thread/18a59e3c9d8f6310/84881a7e53038b0e?lnk=st&q=sigatomic_t&rnum=8&hl=en#84881a7e53038b0e


Chris


^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-06 19:33                                         ` Chris Friesen
@ 2006-07-06 19:37                                           ` Mark Lord
  2006-07-06 20:28                                             ` Chris Friesen
  2006-07-06 19:38                                           ` Arjan van de Ven
                                                             ` (3 subsequent siblings)
  4 siblings, 1 reply; 133+ messages in thread
From: Mark Lord @ 2006-07-06 19:37 UTC (permalink / raw)
  To: Chris Friesen
  Cc: Linus Torvalds, Arjan van de Ven, linux-os (Dick Johnson),
	Ingo Molnar, Andrew Morton, linux-kernel

Chris Friesen wrote:
>
> The "reordered" thing really only matters on SMP machines, no?

Also (very much!) for device drivers.

Cheers

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-06 19:33                                         ` Chris Friesen
  2006-07-06 19:37                                           ` Mark Lord
@ 2006-07-06 19:38                                           ` Arjan van de Ven
  2006-07-06 19:41                                           ` Måns Rullgård
                                                             ` (2 subsequent siblings)
  4 siblings, 0 replies; 133+ messages in thread
From: Arjan van de Ven @ 2006-07-06 19:38 UTC (permalink / raw)
  To: Chris Friesen
  Cc: Linus Torvalds, Mark Lord, linux-os (Dick Johnson),
	Ingo Molnar, Andrew Morton, linux-kernel

On Thu, 2006-07-06 at 13:33 -0600, Chris Friesen wrote:
> Linus Torvalds wrote:
> 
> > On Thu, 6 Jul 2006, Mark Lord wrote:
> 
> >> A volatile declaration may be used to describe an object corresponding
> >> to a memory-mapped input/output port or an object accessed by an
> >> aysnchronously interrupting function.  Actions on objects so declared
> >> shall not be "optimized out" by an implementation or reordered except
> >> as permitted by the rules for evaluating expressions.
> > 
> > 
> > Note that the "reordered" is totally pointless.
> > 
> > The _hardware_ will re-order accesses. Which is the whole point. 
> > "volatile" is basically never sufficient in itself.
> 
> The "reordered" thing really only matters on SMP machines, no?  In which 
> case (for userspace) the locking mechanisms (mutexes, etc.) should do 
> The Right Thing to ensure visibility between cpus.
> 
> The C standard requires the use of volatile for signal handlers and setjmp.
> 
> For userspace at least the whole discussion of "barriers" is sort of 
> moot--there are no memory barriers defined in the C language, which 
> makes it kind of hard to write portable code that uses them.

You're falling into RBJ's trap. I did not say *MEMORY BARRIER*. While
for some uses of "volatile" that is the right substitute, for others it
is *optimization barrier* which matters. 



^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-06 19:33                                         ` Chris Friesen
  2006-07-06 19:37                                           ` Mark Lord
  2006-07-06 19:38                                           ` Arjan van de Ven
@ 2006-07-06 19:41                                           ` Måns Rullgård
  2006-07-06 19:42                                           ` Jeff Garzik
  2006-07-06 19:58                                           ` Linus Torvalds
  4 siblings, 0 replies; 133+ messages in thread
From: Måns Rullgård @ 2006-07-06 19:41 UTC (permalink / raw)
  To: linux-kernel

"Chris Friesen" <cfriesen@nortel.com> writes:

> Linus Torvalds wrote:
>
>> On Thu, 6 Jul 2006, Mark Lord wrote:
>
>>> A volatile declaration may be used to describe an object corresponding
>>> to a memory-mapped input/output port or an object accessed by an
>>> aysnchronously interrupting function.  Actions on objects so declared
>>> shall not be "optimized out" by an implementation or reordered except
>>> as permitted by the rules for evaluating expressions.
>> Note that the "reordered" is totally pointless.
>> The _hardware_ will re-order accesses. Which is the whole
>> point. "volatile" is basically never sufficient in itself.
>
> The "reordered" thing really only matters on SMP machines, no?

No, each CPU does write combining and write merging all on its own.

-- 
Måns Rullgård
mru@inprovide.com


^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-06 19:33                                         ` Chris Friesen
                                                             ` (2 preceding siblings ...)
  2006-07-06 19:41                                           ` Måns Rullgård
@ 2006-07-06 19:42                                           ` Jeff Garzik
  2006-07-06 19:58                                           ` Linus Torvalds
  4 siblings, 0 replies; 133+ messages in thread
From: Jeff Garzik @ 2006-07-06 19:42 UTC (permalink / raw)
  To: Chris Friesen
  Cc: Linus Torvalds, Mark Lord, Arjan van de Ven,
	linux-os (Dick Johnson),
	Ingo Molnar, Andrew Morton, linux-kernel

Chris Friesen wrote:
> The "reordered" thing really only matters on SMP machines, no?  In which 

No.

Pentiums have had out-of-order execution for a while now.  Started with 
the Pentium Pro, I think.

	Jeff



^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-06  8:16                               ` [patch] spinlocks: remove 'volatile' Ingo Molnar
                                                   ` (2 preceding siblings ...)
  2006-07-06 11:59                                 ` linux-os (Dick Johnson)
@ 2006-07-06 19:56                                 ` Linus Torvalds
  2006-07-06 20:34                                   ` Linus Torvalds
  3 siblings, 1 reply; 133+ messages in thread
From: Linus Torvalds @ 2006-07-06 19:56 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Andrew Morton, linux-kernel, arjan



On Thu, 6 Jul 2006, Ingo Molnar wrote:
> 
> yeah. I tried this and it indeed slashed 42K off text size (0.2%):
> 
>  text            data    bss     dec             filename
>  20779489        6073834 3075176 29928499        vmlinux.volatile
>  20736884        6073834 3075176 29885894        vmlinux.non-volatile
> 
> i booted the resulting allyesconfig bzImage and everything seems to be 
> working fine. Find patch below.

Ok, the patch itself looks fine, but looking at some of the usage of the 
spinlocks, I worry that the 'volatile' may actually have hidden a real 
bug.

Here's what __raw_spin_lock() looks like on x86:

        alternative_smp(
                __raw_spin_lock_string,
                __raw_spin_lock_string_up,
                "=m" (lock->slock) : : "memory");

where the actual spinlock sequence itself isn't important.

What's important is what we tell the compiler, and the "=m" in particular.

The compiler will validly think that the spinlock assembly wil overwrite 
the "slock" location _WITHOUT_ caring about the old value. 

And that's dangerous, because it means that in theory a sequence like

	spin_lock_init(&lock);
	spin_lock(&lock);

might end up having the compiler optimize away the "unnecessary" 
initialization code because the compiler (correctly, as far as we've told 
it) thinks that the spin_lock() will overwrite the initial value.

And the "volatile" would have hidden that bug, for totally stupid and 
unrelated reasons..

Now, admittedly, the above kind of code is insane, and possibly doesn't 
exist, but still..

So I _think_ that we should change the "=m" to the much more correct "+m" 
at the same time (or before - it's really a bug-fix regardless) as 
removing the "volatile".

It would be interesting to hear whether that actually changes any code 
generation (hopefully it doesn't, but if it does, that in itself is 
interesting).

Btw, grepping for "=m" shows that semaphores may have the same bug, and in 
fact, we seem to have that "volatile" there too (perhaps, in fact, because 
somebody hit the bug and decided to fix it the simple way with "volatile"? 
Who knows, it might even have been me, back before I realized how badly 
gcc messes up volatiles)

Interestingly (or perhaps not), "atomic_add()" and friends (along with the 
local_t stuff that seems to largely have been copied from the atomic code) 
use a combination of "=m" and "m" in the assembler dst/src to avoid the 
bug. They too would probably be better off using "+m".

I think that's because the whole "+m" thing is fairly new (and not well 
documented either).

Appended is my previous test-program extended to show the difference 
between "=m" and "+m"..

For me, I get:

	horrid:
	        subl    $16, %esp
	        movl    $1, 12(%esp)
	        movl    12(%esp), %eax
	        movl    %eax, a1
	        movl    $1, b1
	#APP
	        # overwrite a1
	        # modify b1
	#NO_APP
	        addl    $16, %esp
	        ret

	notbad:
	        movl    $1, b2
	#APP
	        # overwrite a2
	        # modify b2
	#NO_APP
	        ret

which shows that "horrid" (with volatile) generates truly crapola code due 
to the volatile, but that the "overwrite a1" will not optimize away the 
initializer.

And "notbad" has good code, but exactly because it's good code, the 
compiler has also noticed that the "=m" means that the initialization of 
"a2" is unnecessary, and has thus promptly removed it.

(Removing the initializer is _correct_ for that particular source code - 
it's just that with the current x86 spinlock() macros, it would be 
disastrous, and shows that your "just remove volatile" patch is dangerous 
because of our incorrect inline assembly)

		Linus
---
struct duh {
	volatile int i;
};

void horrid(void)
{
	extern struct duh a1;
	extern struct duh b1;

	a1 = (struct duh) { 1 };
	b1.i = 1;
	asm("# overwrite %0":"=m" (a1.i));
	asm("# modify %0":"+m" (b1.i));
}

struct gaah {
	int i;
};

void notbad(void)
{
	extern struct gaah a2;
	extern struct gaah b2;

	a2 = (struct gaah) { 1 };
	b2.i = 1;
	asm("# overwrite %0":"=m" (a2.i));
	asm("# modify %0":"+m" (b2.i));
}

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-06 19:33                                         ` Chris Friesen
                                                             ` (3 preceding siblings ...)
  2006-07-06 19:42                                           ` Jeff Garzik
@ 2006-07-06 19:58                                           ` Linus Torvalds
  2006-07-06 20:27                                             ` Mark Lord
  2006-07-06 20:40                                             ` Chris Friesen
  4 siblings, 2 replies; 133+ messages in thread
From: Linus Torvalds @ 2006-07-06 19:58 UTC (permalink / raw)
  To: Chris Friesen
  Cc: Mark Lord, Arjan van de Ven, linux-os (Dick Johnson),
	Ingo Molnar, Andrew Morton, linux-kernel



On Thu, 6 Jul 2006, Chris Friesen wrote:
> 
> The C standard requires the use of volatile for signal handlers and setjmp.

Actually, the C standard requires "sigatomic_t".

> For userspace at least the whole discussion of "barriers" is sort of
> moot--there are no memory barriers defined in the C language, which makes it
> kind of hard to write portable code that uses them.

Any locking primitive BY DEFINITION has a barrier in it.

If it doesn't, it's not a locking primitive, it's a random sequence of 
code that does something pointless.

			Linus

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-06 19:32                                       ` Jan Engelhardt
@ 2006-07-06 20:26                                         ` Jeremy Fitzhardinge
  2006-07-06 20:55                                           ` Jan Engelhardt
  0 siblings, 1 reply; 133+ messages in thread
From: Jeremy Fitzhardinge @ 2006-07-06 20:26 UTC (permalink / raw)
  To: Jan Engelhardt
  Cc: Linus Torvalds, linux-os (Dick Johnson),
	Ingo Molnar, Andrew Morton, linux-kernel, arjan

Jan Engelhardt wrote:
> $ find linux-2.6.17 -type f -iname '*.[ch]' -print0 | xargs -0 grep 
> volatile | wc -l
> 13948
>
> Tough job.
You need to exclude "asm volatile", which is a completely different thing.

    J

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-06 19:58                                           ` Linus Torvalds
@ 2006-07-06 20:27                                             ` Mark Lord
  2006-07-06 20:40                                             ` Chris Friesen
  1 sibling, 0 replies; 133+ messages in thread
From: Mark Lord @ 2006-07-06 20:27 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Chris Friesen, Arjan van de Ven, linux-os (Dick Johnson),
	Ingo Molnar, Andrew Morton, linux-kernel

Linus Torvalds wrote:
> 
> On Thu, 6 Jul 2006, Chris Friesen wrote:
>> The C standard requires the use of volatile for signal handlers and setjmp.
> 
> Actually, the C standard requires "sigatomic_t".

That's sig_atomic_t, which simply guarantees that the data item
can be read or written indivisibly with respect to signal handlers.

The standard goes on to suggest using it in combination with volatile
as needed.

Cheers

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-06 19:37                                           ` Mark Lord
@ 2006-07-06 20:28                                             ` Chris Friesen
  2006-07-06 20:32                                               ` Linus Torvalds
  0 siblings, 1 reply; 133+ messages in thread
From: Chris Friesen @ 2006-07-06 20:28 UTC (permalink / raw)
  To: Mark Lord
  Cc: Linus Torvalds, Arjan van de Ven, linux-os (Dick Johnson),
	Ingo Molnar, Andrew Morton, linux-kernel

Mark Lord wrote:
> Chris Friesen wrote:
> 
>>
>> The "reordered" thing really only matters on SMP machines, no?
> 
> 
> Also (very much!) for device drivers.

Certainly...but I was coming at it from the perspective of userspace code.

As long as you're not talking to external devices, each cpu must be 
coherent with respect to itself, no?  It's allowed to execute 
out-of-order, but it needs to make sure that by doing so it doesn't 
cause changes that are visible to software.

Chris

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-06 20:28                                             ` Chris Friesen
@ 2006-07-06 20:32                                               ` Linus Torvalds
  0 siblings, 0 replies; 133+ messages in thread
From: Linus Torvalds @ 2006-07-06 20:32 UTC (permalink / raw)
  To: Chris Friesen
  Cc: Mark Lord, Arjan van de Ven, linux-os (Dick Johnson),
	Ingo Molnar, Andrew Morton, linux-kernel



On Thu, 6 Jul 2006, Chris Friesen wrote:
> 
> As long as you're not talking to external devices, each cpu must be coherent
> with respect to itself, no?  It's allowed to execute out-of-order, but it
> needs to make sure that by doing so it doesn't cause changes that are visible
> to software.

Right. But then "volatile" won't really matter either, unless you have 
some _ordering_ constraint, in which case "volatile" is not enough unless 
you're guaranteed to be single-threaded.

In other words, again, "volatile" is almost always the wrong thing to 
have, and just makes you _think_ your code is correct.

		Linus

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-06 19:56                                 ` Linus Torvalds
@ 2006-07-06 20:34                                   ` Linus Torvalds
  2006-07-08 22:50                                     ` Ralf Baechle
  2006-07-09  3:07                                     ` Keith Owens
  0 siblings, 2 replies; 133+ messages in thread
From: Linus Torvalds @ 2006-07-06 20:34 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Andrew Morton, linux-kernel, arjan



On Thu, 6 Jul 2006, Linus Torvalds wrote:
> 
> So I _think_ that we should change the "=m" to the much more correct "+m" 
> at the same time (or before - it's really a bug-fix regardless) as 
> removing the "volatile".

Here's a first cut (UNTESTED!) for x86. I didn't check any other 
architectures, I bet they have similar problems.

		Linus

----
diff --git a/include/asm-i386/atomic.h b/include/asm-i386/atomic.h
index 4f061fa..51a1662 100644
--- a/include/asm-i386/atomic.h
+++ b/include/asm-i386/atomic.h
@@ -46,8 +46,8 @@ static __inline__ void atomic_add(int i,
 {
 	__asm__ __volatile__(
 		LOCK_PREFIX "addl %1,%0"
-		:"=m" (v->counter)
-		:"ir" (i), "m" (v->counter));
+		:"+m" (v->counter)
+		:"ir" (i));
 }
 
 /**
@@ -61,8 +61,8 @@ static __inline__ void atomic_sub(int i,
 {
 	__asm__ __volatile__(
 		LOCK_PREFIX "subl %1,%0"
-		:"=m" (v->counter)
-		:"ir" (i), "m" (v->counter));
+		:"+m" (v->counter)
+		:"ir" (i));
 }
 
 /**
@@ -80,8 +80,8 @@ static __inline__ int atomic_sub_and_tes
 
 	__asm__ __volatile__(
 		LOCK_PREFIX "subl %2,%0; sete %1"
-		:"=m" (v->counter), "=qm" (c)
-		:"ir" (i), "m" (v->counter) : "memory");
+		:"+m" (v->counter), "=qm" (c)
+		:"ir" (i) : "memory");
 	return c;
 }
 
@@ -95,8 +95,7 @@ static __inline__ void atomic_inc(atomic
 {
 	__asm__ __volatile__(
 		LOCK_PREFIX "incl %0"
-		:"=m" (v->counter)
-		:"m" (v->counter));
+		:"+m" (v->counter));
 }
 
 /**
@@ -109,8 +108,7 @@ static __inline__ void atomic_dec(atomic
 {
 	__asm__ __volatile__(
 		LOCK_PREFIX "decl %0"
-		:"=m" (v->counter)
-		:"m" (v->counter));
+		:"+m" (v->counter));
 }
 
 /**
@@ -127,8 +125,8 @@ static __inline__ int atomic_dec_and_tes
 
 	__asm__ __volatile__(
 		LOCK_PREFIX "decl %0; sete %1"
-		:"=m" (v->counter), "=qm" (c)
-		:"m" (v->counter) : "memory");
+		:"+m" (v->counter), "=qm" (c)
+		: : "memory");
 	return c != 0;
 }
 
@@ -146,8 +144,8 @@ static __inline__ int atomic_inc_and_tes
 
 	__asm__ __volatile__(
 		LOCK_PREFIX "incl %0; sete %1"
-		:"=m" (v->counter), "=qm" (c)
-		:"m" (v->counter) : "memory");
+		:"+m" (v->counter), "=qm" (c)
+		: : "memory");
 	return c != 0;
 }
 
@@ -166,8 +164,8 @@ static __inline__ int atomic_add_negativ
 
 	__asm__ __volatile__(
 		LOCK_PREFIX "addl %2,%0; sets %1"
-		:"=m" (v->counter), "=qm" (c)
-		:"ir" (i), "m" (v->counter) : "memory");
+		:"+m" (v->counter), "=qm" (c)
+		:"ir" (i) : "memory");
 	return c;
 }
 
diff --git a/include/asm-i386/futex.h b/include/asm-i386/futex.h
index 7b8ceef..946d97c 100644
--- a/include/asm-i386/futex.h
+++ b/include/asm-i386/futex.h
@@ -20,8 +20,8 @@ #define __futex_atomic_op1(insn, ret, ol
 	.align	8\n\
 	.long	1b,3b\n\
 	.previous"						\
-	: "=r" (oldval), "=r" (ret), "=m" (*uaddr)		\
-	: "i" (-EFAULT), "m" (*uaddr), "0" (oparg), "1" (0))
+	: "=r" (oldval), "=r" (ret), "+m" (*uaddr)		\
+	: "i" (-EFAULT), "0" (oparg), "1" (0))
 
 #define __futex_atomic_op2(insn, ret, oldval, uaddr, oparg) \
   __asm__ __volatile (						\
@@ -38,9 +38,9 @@ #define __futex_atomic_op2(insn, ret, ol
 	.align	8\n\
 	.long	1b,4b,2b,4b\n\
 	.previous"						\
-	: "=&a" (oldval), "=&r" (ret), "=m" (*uaddr),		\
+	: "=&a" (oldval), "=&r" (ret), "+m" (*uaddr),		\
 	  "=&r" (tem)						\
-	: "r" (oparg), "i" (-EFAULT), "m" (*uaddr), "1" (0))
+	: "r" (oparg), "i" (-EFAULT), "1" (0))
 
 static inline int
 futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
@@ -123,7 +123,7 @@ futex_atomic_cmpxchg_inatomic(int __user
 		"	.long   1b,3b				\n"
 		"	.previous				\n"
 
-		: "=a" (oldval), "=m" (*uaddr)
+		: "=a" (oldval), "+m" (*uaddr)
 		: "i" (-EFAULT), "r" (newval), "0" (oldval)
 		: "memory"
 	);
diff --git a/include/asm-i386/local.h b/include/asm-i386/local.h
index 3b4998c..12060e2 100644
--- a/include/asm-i386/local.h
+++ b/include/asm-i386/local.h
@@ -17,32 +17,30 @@ static __inline__ void local_inc(local_t
 {
 	__asm__ __volatile__(
 		"incl %0"
-		:"=m" (v->counter)
-		:"m" (v->counter));
+		:"+m" (v->counter));
 }
 
 static __inline__ void local_dec(local_t *v)
 {
 	__asm__ __volatile__(
 		"decl %0"
-		:"=m" (v->counter)
-		:"m" (v->counter));
+		:"+m" (v->counter));
 }
 
 static __inline__ void local_add(long i, local_t *v)
 {
 	__asm__ __volatile__(
 		"addl %1,%0"
-		:"=m" (v->counter)
-		:"ir" (i), "m" (v->counter));
+		:"+m" (v->counter)
+		:"ir" (i));
 }
 
 static __inline__ void local_sub(long i, local_t *v)
 {
 	__asm__ __volatile__(
 		"subl %1,%0"
-		:"=m" (v->counter)
-		:"ir" (i), "m" (v->counter));
+		:"+m" (v->counter)
+		:"ir" (i));
 }
 
 /* On x86, these are no better than the atomic variants. */
diff --git a/include/asm-i386/posix_types.h b/include/asm-i386/posix_types.h
index 4e47ed0..133e31e 100644
--- a/include/asm-i386/posix_types.h
+++ b/include/asm-i386/posix_types.h
@@ -51,12 +51,12 @@ #if defined(__KERNEL__) || !defined(__GL
 #undef	__FD_SET
 #define __FD_SET(fd,fdsetp) \
 		__asm__ __volatile__("btsl %1,%0": \
-			"=m" (*(__kernel_fd_set *) (fdsetp)):"r" ((int) (fd)))
+			"+m" (*(__kernel_fd_set *) (fdsetp)):"r" ((int) (fd)))
 
 #undef	__FD_CLR
 #define __FD_CLR(fd,fdsetp) \
 		__asm__ __volatile__("btrl %1,%0": \
-			"=m" (*(__kernel_fd_set *) (fdsetp)):"r" ((int) (fd)))
+			"+m" (*(__kernel_fd_set *) (fdsetp)):"r" ((int) (fd)))
 
 #undef	__FD_ISSET
 #define __FD_ISSET(fd,fdsetp) (__extension__ ({ \
diff --git a/include/asm-i386/rwlock.h b/include/asm-i386/rwlock.h
index 94f0019..96b0bef 100644
--- a/include/asm-i386/rwlock.h
+++ b/include/asm-i386/rwlock.h
@@ -37,7 +37,7 @@ #define __build_read_lock_const(rw, help
 			"popl %%eax\n\t" \
 			"1:\n", \
 			"subl $1,%0\n\t", \
-			"=m" (*(volatile int *)rw) : : "memory")
+			"+m" (*(volatile int *)rw) : : "memory")
 
 #define __build_read_lock(rw, helper)	do { \
 						if (__builtin_constant_p(rw)) \
@@ -63,7 +63,7 @@ #define __build_write_lock_const(rw, hel
 			"popl %%eax\n\t" \
 			"1:\n", \
 			"subl $" RW_LOCK_BIAS_STR ",%0\n\t", \
-			"=m" (*(volatile int *)rw) : : "memory")
+			"+m" (*(volatile int *)rw) : : "memory")
 
 #define __build_write_lock(rw, helper)	do { \
 						if (__builtin_constant_p(rw)) \
diff --git a/include/asm-i386/rwsem.h b/include/asm-i386/rwsem.h
index 2f07601..43113f5 100644
--- a/include/asm-i386/rwsem.h
+++ b/include/asm-i386/rwsem.h
@@ -111,8 +111,8 @@ LOCK_PREFIX	"  incl      (%%eax)\n\t" /*
 		"  jmp       1b\n"
 		LOCK_SECTION_END
 		"# ending down_read\n\t"
-		: "=m"(sem->count)
-		: "a"(sem), "m"(sem->count)
+		: "+m" (sem->count)
+		: "a" (sem)
 		: "memory", "cc");
 }
 
@@ -133,8 +133,8 @@ LOCK_PREFIX	"  cmpxchgl  %2,%0\n\t"
 		"  jnz	     1b\n\t"
 		"2:\n\t"
 		"# ending __down_read_trylock\n\t"
-		: "+m"(sem->count), "=&a"(result), "=&r"(tmp)
-		: "i"(RWSEM_ACTIVE_READ_BIAS)
+		: "+m" (sem->count), "=&a" (result), "=&r" (tmp)
+		: "i" (RWSEM_ACTIVE_READ_BIAS)
 		: "memory", "cc");
 	return result>=0 ? 1 : 0;
 }
@@ -161,8 +161,8 @@ LOCK_PREFIX	"  xadd      %%edx,(%%eax)\n
 		"  jmp       1b\n"
 		LOCK_SECTION_END
 		"# ending down_write"
-		: "=m"(sem->count), "=d"(tmp)
-		: "a"(sem), "1"(tmp), "m"(sem->count)
+		: "+m" (sem->count), "=d" (tmp)
+		: "a" (sem), "1" (tmp)
 		: "memory", "cc");
 }
 
@@ -205,8 +205,8 @@ LOCK_PREFIX	"  xadd      %%edx,(%%eax)\n
 		"  jmp       1b\n"
 		LOCK_SECTION_END
 		"# ending __up_read\n"
-		: "=m"(sem->count), "=d"(tmp)
-		: "a"(sem), "1"(tmp), "m"(sem->count)
+		: "+m" (sem->count), "=d" (tmp)
+		: "a" (sem), "1" (tmp)
 		: "memory", "cc");
 }
 
@@ -231,8 +231,8 @@ LOCK_PREFIX	"  xaddl     %%edx,(%%eax)\n
 		"  jmp       1b\n"
 		LOCK_SECTION_END
 		"# ending __up_write\n"
-		: "=m"(sem->count)
-		: "a"(sem), "i"(-RWSEM_ACTIVE_WRITE_BIAS), "m"(sem->count)
+		: "+m" (sem->count)
+		: "a" (sem), "i" (-RWSEM_ACTIVE_WRITE_BIAS)
 		: "memory", "cc", "edx");
 }
 
@@ -256,8 +256,8 @@ LOCK_PREFIX	"  addl      %2,(%%eax)\n\t"
 		"  jmp       1b\n"
 		LOCK_SECTION_END
 		"# ending __downgrade_write\n"
-		: "=m"(sem->count)
-		: "a"(sem), "i"(-RWSEM_WAITING_BIAS), "m"(sem->count)
+		: "+m" (sem->count)
+		: "a" (sem), "i" (-RWSEM_WAITING_BIAS)
 		: "memory", "cc");
 }
 
@@ -268,8 +268,8 @@ static inline void rwsem_atomic_add(int 
 {
 	__asm__ __volatile__(
 LOCK_PREFIX	"addl %1,%0"
-		: "=m"(sem->count)
-		: "ir"(delta), "m"(sem->count));
+		: "+m" (sem->count)
+		: "ir" (delta));
 }
 
 /*
@@ -280,10 +280,9 @@ static inline int rwsem_atomic_update(in
 	int tmp = delta;
 
 	__asm__ __volatile__(
-LOCK_PREFIX	"xadd %0,(%2)"
-		: "+r"(tmp), "=m"(sem->count)
-		: "r"(sem), "m"(sem->count)
-		: "memory");
+LOCK_PREFIX	"xadd %0,%1"
+		: "+r" (tmp), "+m" (sem->count)
+		: : "memory");
 
 	return tmp+delta;
 }
diff --git a/include/asm-i386/semaphore.h b/include/asm-i386/semaphore.h
index f7a0f31..d51e800 100644
--- a/include/asm-i386/semaphore.h
+++ b/include/asm-i386/semaphore.h
@@ -107,7 +107,7 @@ static inline void down(struct semaphore
 		"call __down_failed\n\t"
 		"jmp 1b\n"
 		LOCK_SECTION_END
-		:"=m" (sem->count)
+		:"+m" (sem->count)
 		:
 		:"memory","ax");
 }
@@ -132,7 +132,7 @@ static inline int down_interruptible(str
 		"call __down_failed_interruptible\n\t"
 		"jmp 1b\n"
 		LOCK_SECTION_END
-		:"=a" (result), "=m" (sem->count)
+		:"=a" (result), "+m" (sem->count)
 		:
 		:"memory");
 	return result;
@@ -157,7 +157,7 @@ static inline int down_trylock(struct se
 		"call __down_failed_trylock\n\t"
 		"jmp 1b\n"
 		LOCK_SECTION_END
-		:"=a" (result), "=m" (sem->count)
+		:"=a" (result), "+m" (sem->count)
 		:
 		:"memory");
 	return result;
@@ -182,7 +182,7 @@ static inline void up(struct semaphore *
 		"jmp 1b\n"
 		LOCK_SECTION_END
 		".subsection 0\n"
-		:"=m" (sem->count)
+		:"+m" (sem->count)
 		:
 		:"memory","ax");
 }
diff --git a/include/asm-i386/spinlock.h b/include/asm-i386/spinlock.h
index 87c40f8..d816c62 100644
--- a/include/asm-i386/spinlock.h
+++ b/include/asm-i386/spinlock.h
@@ -65,7 +65,7 @@ static inline void __raw_spin_lock(raw_s
 	alternative_smp(
 		__raw_spin_lock_string,
 		__raw_spin_lock_string_up,
-		"=m" (lock->slock) : : "memory");
+		"+m" (lock->slock) : : "memory");
 }
 
 /*
@@ -79,7 +79,7 @@ static inline void __raw_spin_lock_flags
 	alternative_smp(
 		__raw_spin_lock_string_flags,
 		__raw_spin_lock_string_up,
-		"=m" (lock->slock) : "r" (flags) : "memory");
+		"+m" (lock->slock) : "r" (flags) : "memory");
 }
 #endif
 
@@ -88,7 +88,7 @@ static inline int __raw_spin_trylock(raw
 	char oldval;
 	__asm__ __volatile__(
 		"xchgb %b0,%1"
-		:"=q" (oldval), "=m" (lock->slock)
+		:"=q" (oldval), "+m" (lock->slock)
 		:"0" (0) : "memory");
 	return oldval > 0;
 }
@@ -104,7 +104,7 @@ #if !defined(CONFIG_X86_OOSTORE) && !def
 
 #define __raw_spin_unlock_string \
 	"movb $1,%0" \
-		:"=m" (lock->slock) : : "memory"
+		:"+m" (lock->slock) : : "memory"
 
 
 static inline void __raw_spin_unlock(raw_spinlock_t *lock)
@@ -118,7 +118,7 @@ #else
 
 #define __raw_spin_unlock_string \
 	"xchgb %b0, %1" \
-		:"=q" (oldval), "=m" (lock->slock) \
+		:"=q" (oldval), "+m" (lock->slock) \
 		:"0" (oldval) : "memory"
 
 static inline void __raw_spin_unlock(raw_spinlock_t *lock)
@@ -199,13 +199,13 @@ static inline int __raw_write_trylock(ra
 
 static inline void __raw_read_unlock(raw_rwlock_t *rw)
 {
-	asm volatile(LOCK_PREFIX "incl %0" :"=m" (rw->lock) : : "memory");
+	asm volatile(LOCK_PREFIX "incl %0" :"+m" (rw->lock) : : "memory");
 }
 
 static inline void __raw_write_unlock(raw_rwlock_t *rw)
 {
 	asm volatile(LOCK_PREFIX "addl $" RW_LOCK_BIAS_STR ", %0"
-				 : "=m" (rw->lock) : : "memory");
+				 : "+m" (rw->lock) : : "memory");
 }
 
 #endif /* __ASM_SPINLOCK_H */

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-06 19:58                                           ` Linus Torvalds
  2006-07-06 20:27                                             ` Mark Lord
@ 2006-07-06 20:40                                             ` Chris Friesen
  2006-07-06 21:00                                               ` Linus Torvalds
  1 sibling, 1 reply; 133+ messages in thread
From: Chris Friesen @ 2006-07-06 20:40 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Mark Lord, Arjan van de Ven, linux-os (Dick Johnson),
	Ingo Molnar, Andrew Morton, linux-kernel

Linus Torvalds wrote:
> 
> On Thu, 6 Jul 2006, Chris Friesen wrote:
> 
>>The C standard requires the use of volatile for signal handlers and setjmp.
> 
> 
> Actually, the C standard requires "sigatomic_t".


 From ISO/IEC 9899:TC2, both of these specifically mention volatile:


7.13.2.1 The longjmp function

3 All accessible objects have values, and all other components of the 
abstract machine212) have state, as of the time the longjmp function was 
called, except that the values of objects of automatic storage duration 
that are local to the function containing the invocation of the 
corresponding setjmp macro that do not have volatile-qualified type and 
have been changed between the setjmp invocation and longjmp call are 
indeterminate.


7.14.1.1 The signal function

5 If the signal occurs other than as the result of calling the abort or 
raise function, the behavior is undefined if the signal handler refers 
to any object with static storage duration other than by assigning a 
value to an object declared as volatile sig_atomic_t, ...


>>For userspace at least the whole discussion of "barriers" is sort of
>>moot--there are no memory barriers defined in the C language, which makes it
>>kind of hard to write portable code that uses them.
> 
> 
> Any locking primitive BY DEFINITION has a barrier in it.
> 
> If it doesn't, it's not a locking primitive, it's a random sequence of 
> code that does something pointless.

Sure, so the implementation of the locking primitives requires 
hardware-specific knowledge.

But for someone coding to POSIX, is there any reason why they would use 
barriers?  If so, what API would they use?

Chris

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-06 20:26                                         ` Jeremy Fitzhardinge
@ 2006-07-06 20:55                                           ` Jan Engelhardt
  2006-07-06 21:07                                             ` Linus Torvalds
  0 siblings, 1 reply; 133+ messages in thread
From: Jan Engelhardt @ 2006-07-06 20:55 UTC (permalink / raw)
  To: Jeremy Fitzhardinge
  Cc: Linus Torvalds, linux-os (Dick Johnson),
	Ingo Molnar, Andrew Morton, linux-kernel, arjan

>> $ find linux-2.6.17 -type f -iname '*.[ch]' -print0 | xargs -0 grep
>> volatile | wc -l
>> 13948
>> 
>> Tough job.
>
> You need to exclude "asm volatile", which is a completely different thing.

10077.


Jan Engelhardt
-- 

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-06 20:40                                             ` Chris Friesen
@ 2006-07-06 21:00                                               ` Linus Torvalds
  0 siblings, 0 replies; 133+ messages in thread
From: Linus Torvalds @ 2006-07-06 21:00 UTC (permalink / raw)
  To: Chris Friesen
  Cc: Mark Lord, Arjan van de Ven, linux-os (Dick Johnson),
	Ingo Molnar, Andrew Morton, linux-kernel



On Thu, 6 Jul 2006, Chris Friesen wrote:
> 
> But for someone coding to POSIX, is there any reason why they would use
> barriers?  If so, what API would they use?

The POSIX thread api? 

Anyway, who cares? The final word on the kernel is that "volatile" is 
wrong. Arguing against that standpoint is pointless, especially since 
other real kernel developers have already stood up to back me up on this.

In user programs, you can do whatever you damn well please. I don't care, 
it's not my problem.

			Linus

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-06 18:00                                         ` Linus Torvalds
@ 2006-07-06 21:02                                           ` J.A. Magallón
  2006-07-06 21:12                                             ` Linus Torvalds
  0 siblings, 1 reply; 133+ messages in thread
From: J.A. Magallón @ 2006-07-06 21:02 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-os (Dick Johnson), Ingo Molnar, Andrew Morton, linux-kernel, arjan

On Thu, 6 Jul 2006 11:00:43 -0700 (PDT), Linus Torvalds <torvalds@osdl.org> wrote:

> 
> 
> On Thu, 6 Jul 2006, linux-os (Dick Johnson) wrote:
> > 
> > Linus, you may have been reading too many novels.
> > 
> > If I have some code that does:
> > 
> > extern int spinner;
> > 
> > funct(){
> >      while(spinner)
> >          ;
> > 
> > The 'C' compiler has no choice but to actually make that memory access
> > and read the variable because the variable is in another module (a.k.a.
> > file).
> 
> You don't know how C works, do you?
> 
> You also have no idea of what out-of-order memory accesses do to OS code, 
> right?
> 

I think you are mixing apples and oranges. Using volatile to control o-o-o
memory accesses is sure wrong.
It just means "don't assume this variable has not changed because you don't
see any access to it" to the compiler. It is designed to prevent high level
optimizations like code movement or dead code elimination, but a _high_ level.
It has nothing to do with memory barriers and so on. That is surely a
misuse of volatile.

--
J.A. Magallon <jamagallon()ono!com>     \               Software is like sex:
                                         \         It's better when it's free
Mandriva Linux release 2007.0 (Cooker) for i586
Linux 2.6.17-jam01 (gcc 4.1.1 20060518 (prerelease)) #2 SMP PREEMPT Wed

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-06 20:55                                           ` Jan Engelhardt
@ 2006-07-06 21:07                                             ` Linus Torvalds
  0 siblings, 0 replies; 133+ messages in thread
From: Linus Torvalds @ 2006-07-06 21:07 UTC (permalink / raw)
  To: Jan Engelhardt
  Cc: Jeremy Fitzhardinge, linux-os (Dick Johnson),
	Ingo Molnar, Andrew Morton, linux-kernel, arjan



On Thu, 6 Jul 2006, Jan Engelhardt wrote:
> >
> > You need to exclude "asm volatile", which is a completely different thing.
> 
> 10077.

Yeah, way too many. 

That said, at least _some_ of them are:

 - casts to volatile inside arch-specific code serquences (ie the _good_ 
   kind of volatile - associated with _code_ rather than data). 

   See for example include/asm-i386/io.h for 100% valid examples of this
   kind of usage.

 - function argument values for functions that need to be able to take an 
   arbitrary pointer ("const volatile void *" is the most permissive 
   argument type - anything else the compiler can complain about you 
   dropping qualifiers)

   See include/asm-i386/bitops.h for examples of this kind of volatile.

So I'd expect that maybe one percent of them are actually valid ;)

And I suspect that a huge majority of the truly crapola ones are in 
drivers. Oh, well..

			Linus

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-06 21:02                                           ` J.A. Magallón
@ 2006-07-06 21:12                                             ` Linus Torvalds
  0 siblings, 0 replies; 133+ messages in thread
From: Linus Torvalds @ 2006-07-06 21:12 UTC (permalink / raw)
  To: J.A. Magallón
  Cc: linux-os (Dick Johnson), Ingo Molnar, Andrew Morton, linux-kernel, arjan

[-- Attachment #1: Type: TEXT/PLAIN, Size: 418 bytes --]



On Thu, 6 Jul 2006, J.A. Magallón wrote:
> 
> I think you are mixing apples and oranges. Using volatile to control o-o-o
> memory accesses is sure wrong.

.. and there _is_ no right way to use it, except the two I've already 
mentioned. 

Why is that so hard to accept?

The fact is, "volatile" was designed in a different era, and tough, it's 
not one of the better parts of the C language.

Get over it.

			Linus

^ permalink raw reply	[flat|nested] 133+ messages in thread

* RE: [patch] spinlocks: remove 'volatile'
  2006-07-06 12:29                                     ` linux-os (Dick Johnson)
  2006-07-06 12:39                                       ` Arjan van de Ven
  2006-07-06 16:40                                       ` Nick Piggin
@ 2006-07-06 23:19                                       ` David Schwartz
  2 siblings, 0 replies; 133+ messages in thread
From: David Schwartz @ 2006-07-06 23:19 UTC (permalink / raw)
  To: linux-os (Dick Johnson), Arjan van de Ven; +Cc: linux-kernel


> Look at:
>
>  	http://en.wikipedia.org/wiki/Volatile_variable
>
> This is just what is needed to prevent the compiler from making
> non working
> code during optimization.

	That article is totally and completely wrong, in fact it's so wrong it's
harmful. For example, it says:

... [A] variable that might be concurrently modified by multiple
threads (without locks or a similar form of mutual exclusion) should be
declared volatile.

	Without pointing out that the use of 'volatile' is neither required nor
sufficient, this is an utterly false statement. The reference to "mutual
exclusion" is puzzling, since the problem is cached data, not concurrent
accesses.

	It talks about controlling compiler optimizations. What difference does it
make to me whether an optimization that breaks my code is made by the
compiler or the processor?

	The most serious problem with the article is that it does not point out
what is guaranteed behavior and what happens to be true for some particular
platforms. In fact, the only platform I know of where the behavior the
article implies is guaranteed is (at least arguably) actually guaranteed is
Win32. (And I'm not sure of what value a guarantee is that you have to argue
is implied mostly by omission.)

	Sadly, it omits any mention of the *actual* legitimate use of 'volatile'.
That is, the cases where it has guaranteed semantics and actually is both
necessary and sufficient.

	DS



^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-06 17:52                                       ` linux-os (Dick Johnson)
                                                           ` (2 preceding siblings ...)
  2006-07-06 18:16                                         ` Chase Venters
@ 2006-07-07 18:16                                         ` Krzysztof Halasa
  2006-07-07 19:51                                           ` linux-os (Dick Johnson)
  3 siblings, 1 reply; 133+ messages in thread
From: Krzysztof Halasa @ 2006-07-07 18:16 UTC (permalink / raw)
  To: linux-os (Dick Johnson)
  Cc: Linus Torvalds, Ingo Molnar, Andrew Morton, linux-kernel, arjan

"linux-os \(Dick Johnson\)" <linux-os@analogic.com> writes:

> extern int spinner;
>
> funct(){
>      while(spinner)
>          ;
>
> The 'C' compiler has no choice but to actually make that memory access
> and read the variable because the variable is in another module (a.k.a.
> file).

defiant:/tmp/khc$ gcc --version
gcc (GCC) 4.1.1 20060525 (Red Hat 4.1.1-1)
defiant:/tmp/khc$ cat test.c 
extern int spinner;

void funct(void)
{
     while(spinner)
         ;
}
defiant:/tmp/khc$ gcc -Wall -O2 -c test.c
defiant:/tmp/khc$ objdump -d test.o 

test.o:     file format elf32-i386

Disassembly of section .text:

00000000 <funct>:
   0:   a1 00 00 00 00          mov    0x0,%eax
   5:   55                      push   %ebp
   6:   89 e5                   mov    %esp,%ebp
   8:   85 c0                   test   %eax,%eax
   a:   8d b6 00 00 00 00       lea    0x0(%esi),%esi
  10:   75 fe                   jne    10 <funct+0x10>
  12:   5d                      pop    %ebp
  13:   c3                      ret    

"0x0" is, of course, for relocation.

> However, if I have the same code, but the variable is visible during
> compile time, i.e.,
>
> int spinner=0;
>
> funct(){
>      while(spinner)
>          ;
>
> ... the compiler may eliminate that code altogether because it
> 'knows' that spinner is FALSE, having initialized it to zero
> itself.

defiant:/tmp/khc$ cat test.c 
int spinner = 0;

void funct(void)
{
     while(spinner)
         ;
}
defiant:/tmp/khc$ gcc -Wall -O2 -c test.c
defiant:/tmp/khc$ objdump -d test.o 

00000000 <funct>:
   0:   a1 00 00 00 00          mov    0x0,%eax
   5:   55                      push   %ebp
   6:   89 e5                   mov    %esp,%ebp
   8:   85 c0                   test   %eax,%eax
   a:   8d b6 00 00 00 00       lea    0x0(%esi),%esi
  10:   75 fe                   jne    10 <funct+0x10>
  12:   5d                      pop    %ebp
  13:   c3                      ret    

> Since spinner is global in scope, somebody surely could have
> changed it before funct() was even called, but the current gcc
> 'C' compiler doesn't care and may optimize it away entirely.

Personally I don't think such C compiler even existed. HISOFT C
on ZX Spectrum could be a good candidate but I think it didn't
have any optimizer :-)

> To
> prevent this, you must declare the variable volatile. To do
> otherwise is a bug.

Nope. Volatile just means that every read (and write) must actually
access the variable. Note that the compiler optimized out accesses
to the variable in the loop - while it has to check at the beginning
of funct(), it knows that the variable is constant through funct().

Note that "volatile" is not exactly what we usually want, but it
does the job (i.e., the program doesn't crash, but the code is
probably suboptimal).

> That said, I think that the current
> implementation of 'volatile' is broken because the compiler
> seems to believe that the variable has moved! It recalculates
> the address of the variable as well as accessing its value.
> This is what makes the code generation problematical.

You must be using a heavily broken compiler:

defiant:/tmp/khc$ cat test.c 
volatile int spinner = 0;

void funct(void)
{
     while(spinner)
         ;
}
defiant:/tmp/khc$ gcc -Wall -O2 -c test.c
defiant:/tmp/khc$ objdump -d test.o 

00000000 <funct>:
   0:   55                      push   %ebp
   1:   89 e5                   mov    %esp,%ebp
   3:   a1 00 00 00 00          mov    0x0,%eax
   8:   85 c0                   test   %eax,%eax
   a:   75 f7                   jne    3 <funct+0x3>
   c:   5d                      pop    %ebp
   d:   c3                      ret    
-- 
Krzysztof Halasa

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-07 18:16                                         ` Krzysztof Halasa
@ 2006-07-07 19:51                                           ` linux-os (Dick Johnson)
  2006-07-07 20:25                                             ` Linus Torvalds
                                                               ` (3 more replies)
  0 siblings, 4 replies; 133+ messages in thread
From: linux-os (Dick Johnson) @ 2006-07-07 19:51 UTC (permalink / raw)
  To: Krzysztof Halasa
  Cc: Linus Torvalds, Ingo Molnar, Andrew Morton, linux-kernel, arjan


On Fri, 7 Jul 2006, Krzysztof Halasa wrote:

> "linux-os \(Dick Johnson\)" <linux-os@analogic.com> writes:
>
>> extern int spinner;
>>
>> funct(){
>>      while(spinner)
>>          ;
>>
>> The 'C' compiler has no choice but to actually make that memory access
>> and read the variable because the variable is in another module (a.k.a.
>> file).
>
> defiant:/tmp/khc$ gcc --version
> gcc (GCC) 4.1.1 20060525 (Red Hat 4.1.1-1)
> defiant:/tmp/khc$ cat test.c
> extern int spinner;
>
> void funct(void)
> {
>     while(spinner)
>         ;
> }
> defiant:/tmp/khc$ gcc -Wall -O2 -c test.c
> defiant:/tmp/khc$ objdump -d test.o
>
> test.o:     file format elf32-i386
>
> Disassembly of section .text:
>
> 00000000 <funct>:
>   0:   a1 00 00 00 00          mov    0x0,%eax
>   5:   55                      push   %ebp
>   6:   89 e5                   mov    %esp,%ebp
>   8:   85 c0                   test   %eax,%eax
>   a:   8d b6 00 00 00 00       lea    0x0(%esi),%esi
>  10:   75 fe                   jne    10 <funct+0x10>
>  12:   5d                      pop    %ebp
>  13:   c3                      ret
>
> "0x0" is, of course, for relocation.


So read the code; you have "10:   jne 10", jumping to itself
forever, without even doing anything else to set the flags, much
less reading a variable.


>
>> However, if I have the same code, but the variable is visible during
>> compile time, i.e.,
>>
>> int spinner=0;
>>
>> funct(){
>>      while(spinner)
>>          ;
>>
>> ... the compiler may eliminate that code altogether because it
>> 'knows' that spinner is FALSE, having initialized it to zero
>> itself.
>
> defiant:/tmp/khc$ cat test.c
> int spinner = 0;
>
> void funct(void)
> {
>     while(spinner)
>         ;
> }
> defiant:/tmp/khc$ gcc -Wall -O2 -c test.c
> defiant:/tmp/khc$ objdump -d test.o
>
> 00000000 <funct>:
>   0:   a1 00 00 00 00          mov    0x0,%eax
>   5:   55                      push   %ebp
>   6:   89 e5                   mov    %esp,%ebp
>   8:   85 c0                   test   %eax,%eax
>   a:   8d b6 00 00 00 00       lea    0x0(%esi),%esi
>  10:   75 fe                   jne    10 <funct+0x10>
>  12:   5d                      pop    %ebp
>  13:   c3                      ret
>

Then, you have exactly the same thing here:
   10:   75 fe                   jne    10 <funct+0x10>

Same bad code.

>> Since spinner is global in scope, somebody surely could have
>> changed it before funct() was even called, but the current gcc
>> 'C' compiler doesn't care and may optimize it away entirely.
>
> Personally I don't think such C compiler even existed. HISOFT C
> on ZX Spectrum could be a good candidate but I think it didn't
> have any optimizer :-)
>
>> To
>> prevent this, you must declare the variable volatile. To do
>> otherwise is a bug.
>
> Nope. Volatile just means that every read (and write) must actually
> access the variable. Note that the compiler optimized out accesses
> to the variable in the loop - while it has to check at the beginning
> of funct(), it knows that the variable is constant through funct().
>
> Note that "volatile" is not exactly what we usually want, but it
> does the job (i.e., the program doesn't crash, but the code is
> probably suboptimal).
>
>> That said, I think that the current
>> implementation of 'volatile' is broken because the compiler
>> seems to believe that the variable has moved! It recalculates
>> the address of the variable as well as accessing its value.
>> This is what makes the code generation problematical.
>
> You must be using a heavily broken compiler:
>
> defiant:/tmp/khc$ cat test.c
> volatile int spinner = 0;
>
> void funct(void)
> {
>     while(spinner)
>         ;
> }
> defiant:/tmp/khc$ gcc -Wall -O2 -c test.c
> defiant:/tmp/khc$ objdump -d test.o
>
> 00000000 <funct>:
>   0:   55                      push   %ebp
>   1:   89 e5                   mov    %esp,%ebp
>   3:   a1 00 00 00 00          mov    0x0,%eax
>   8:   85 c0                   test   %eax,%eax
>   a:   75 f7                   jne    3 <funct+0x3>
>   c:   5d                      pop    %ebp
>   d:   c3                      ret

This is the only code that works. Guess why it worked? Because
you declared the variable volatile.

Now Linus declares that instead of declaring an object volatile
so that it is actually accessed every time it is referenced, he wants
to use a GNU-ism with assembly that tells the compiler to re-read
__every__ variable existing im memory, instead of just one. Go figure!

Reference:
/usr/src/linux-2.6.16.4/include/linux/compiler-gcc.h:
#define barrier() __asm__ __volatile__("": : :"memory")


> --
> Krzysztof Halasa
>

Cheers,
Dick Johnson
Penguin : Linux version 2.6.16.4 on an i686 machine (5592.89 BogoMips).
New book: http://www.AbominableFirebug.com/
_
\x1a\x04

****************************************************************
The information transmitted in this message is confidential and may be privileged.  Any review, retransmission, dissemination, or other use of this information by persons or entities other than the intended recipient is prohibited.  If you are not the intended recipient, please notify Analogic Corporation immediately - by replying to this message or by sending an email to DeliveryErrors@analogic.com - and destroy all copies of this information, including any attachments, without reading or disclosing them.

Thank you.

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-07 19:51                                           ` linux-os (Dick Johnson)
@ 2006-07-07 20:25                                             ` Linus Torvalds
  2006-07-07 21:22                                               ` linux-os (Dick Johnson)
  2006-07-07 20:39                                             ` Krzysztof Halasa
                                                               ` (2 subsequent siblings)
  3 siblings, 1 reply; 133+ messages in thread
From: Linus Torvalds @ 2006-07-07 20:25 UTC (permalink / raw)
  To: linux-os (Dick Johnson)
  Cc: Krzysztof Halasa, Ingo Molnar, Andrew Morton, linux-kernel, arjan



On Fri, 7 Jul 2006, linux-os (Dick Johnson) wrote:
> 
> Now Linus declares that instead of declaring an object volatile
> so that it is actually accessed every time it is referenced, he wants
> to use a GNU-ism with assembly that tells the compiler to re-read
> __every__ variable existing im memory, instead of just one. Go figure!

Actually, it's not just me.

Read things like the Intel CPU documentation.

IT IS ACTIVELY WRONG to busy-loop on a variable. It will make the CPU 
potentially over-heat, causing degreaded performance, and you're simply 
not supposed to do it.

> Reference:
> /usr/src/linux-2.6.16.4/include/linux/compiler-gcc.h:
> #define barrier() __asm__ __volatile__("": : :"memory")

Actually, for your kind of stupid loop, you should use

 - include/asm-i386/processor.h:
	#define cpu_relax()        rep_nop()

where rep_nop() is

	/* REP NOP (PAUSE) is a a good thing to insert into busy-wait loops. */
	static inline void rep_nop(void)
	{
		__asm__ __volatile__("rep;nop": : :"memory");
	}

on x86, and can be other things on other CPU's. On ppc64 it is

	#define cpu_relax()     do { HMT_low(); HMT_medium(); barrier(); } while (0)

where those HMT macros adjust thread priority.

In other words, you just don't know what you're talking about. "volatile" 
is simply not useful, and the fact that you cannot seem to grasp that is 
_your_ problem.

Repeat after me (or just shut up about things that you not only don't know 
about, but are apparently not willing to even learn):

	"'volatile' is useless. The things it did 30 years ago are much 
	 more complex these days, and need to be tied to much more 
	 detailed rules that depend on the actual particular problem, 
	 rather than one keyword to the compiler that doesn't actually 
	 give enough information for the compiler to do anything useful"

Comprende?

		Linus

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-07 19:51                                           ` linux-os (Dick Johnson)
  2006-07-07 20:25                                             ` Linus Torvalds
@ 2006-07-07 20:39                                             ` Krzysztof Halasa
  2006-07-07 23:06                                             ` Björn Steinbrink
  2006-07-08  8:36                                             ` Avi Kivity
  3 siblings, 0 replies; 133+ messages in thread
From: Krzysztof Halasa @ 2006-07-07 20:39 UTC (permalink / raw)
  To: linux-os (Dick Johnson)
  Cc: Linus Torvalds, Ingo Molnar, Andrew Morton, linux-kernel, arjan

"linux-os \(Dick Johnson\)" <linux-os@analogic.com> writes:

>> 00000000 <funct>:
>>   0:   a1 00 00 00 00          mov    0x0,%eax
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
>>   5:   55                      push   %ebp
>>   6:   89 e5                   mov    %esp,%ebp
>>   8:   85 c0                   test   %eax,%eax
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
>>   a:   8d b6 00 00 00 00       lea    0x0(%esi),%esi
>>  10:   75 fe                   jne    10 <funct+0x10>
>>  12:   5d                      pop    %ebp
>>  13:   c3                      ret
>>
>> "0x0" is, of course, for relocation.
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

> So read the code; you have "10:   jne 10", jumping to itself
> forever, without even doing anything else to set the flags, much
> less reading a variable.

The variable is tested before entering the loop, of course. But
it _is_ tested and the initial state doesn't matter.
The "0x0" may be misleading so I added the note about relocation.

It _is_ BTW correct machine code.

>> 00000000 <funct>:
>>   0:   55                      push   %ebp
>>   1:   89 e5                   mov    %esp,%ebp
>>   3:   a1 00 00 00 00          mov    0x0,%eax
>>   8:   85 c0                   test   %eax,%eax
>>   a:   75 f7                   jne    3 <funct+0x3>
>>   c:   5d                      pop    %ebp
>>   d:   c3                      ret
>
> This is the only code that works. Guess why it worked? Because
> you declared the variable volatile.

Of course. But I don't see any address recalculations.

> Now Linus declares that instead of declaring an object volatile
> so that it is actually accessed every time it is referenced, he wants
> to use a GNU-ism with assembly that tells the compiler to re-read
> __every__ variable existing im memory, instead of just one. Go figure!

That's probably overkill, I can think of more cases where "volatile"
actually makes sense. Most are probably broken anyway, especially
those that aren't (guaranteed to be) atomic but the author uses them
as if they were.

BTW: barrier() isn't a lock.
-- 
Krzysztof Halasa

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-07 20:25                                             ` Linus Torvalds
@ 2006-07-07 21:22                                               ` linux-os (Dick Johnson)
  2006-07-07 21:48                                                 ` Chase Venters
                                                                   ` (3 more replies)
  0 siblings, 4 replies; 133+ messages in thread
From: linux-os (Dick Johnson) @ 2006-07-07 21:22 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Krzysztof Halasa, Ingo Molnar, Andrew Morton, Linux kernel, arjan


On Fri, 7 Jul 2006, Linus Torvalds wrote:

>
>
> On Fri, 7 Jul 2006, linux-os (Dick Johnson) wrote:
>>
>> Now Linus declares that instead of declaring an object volatile
>> so that it is actually accessed every time it is referenced, he wants
>> to use a GNU-ism with assembly that tells the compiler to re-read
>> __every__ variable existing im memory, instead of just one. Go figure!
>
> Actually, it's not just me.
>
> Read things like the Intel CPU documentation.
>
> IT IS ACTIVELY WRONG to busy-loop on a variable. It will make the CPU
> potentially over-heat, causing degreaded performance, and you're simply
> not supposed to do it.

This is a bait and switch argument. The code was displayed to show
the compiler output, not an example of good coding practice.

Furthermore, I'm still waiting for the new spin-friction that's
supposed to cause the increased heat. It doesn't exist and furthermore
no Intel processor instruction manual (that is available for public
inspection) claims that busy-waiting increases any heating, only that
some processors that __can__ fall back into a low-power mode will not
do so if they are spinning (naturally). But this is just another
bait-and-switch as well because I only wrote about volatile and
I even provided references.

>
>> Reference:
>> /usr/src/linux-2.6.16.4/include/linux/compiler-gcc.h:
>> #define barrier() __asm__ __volatile__("": : :"memory")
>
> Actually, for your kind of stupid loop, you should use
>
> - include/asm-i386/processor.h:
> 	#define cpu_relax()        rep_nop()
>

Again, I didn't propose to do that. In fact, your spin-lock
code already inserts "rep nops" and I never implied that they
should be removed. I said only that "volatile" still needs to
be used, not some macro that tells the compiler that everything
in memory probably got trashed. Read what I said, not what you
think some idiot might have said.

> where rep_nop() is
>
> 	/* REP NOP (PAUSE) is a a good thing to insert into busy-wait loops. */
> 	static inline void rep_nop(void)
> 	{
> 		__asm__ __volatile__("rep;nop": : :"memory");
> 	}
>
> on x86, and can be other things on other CPU's. On ppc64 it is
>
> 	#define cpu_relax()     do { HMT_low(); HMT_medium(); barrier(); } while (0)
>
> where those HMT macros adjust thread priority.
>

Not either of these things have anything to do with the topic and
I never even implied that they did. Again, I spoke only of the
well known volatile keyword. Nothing else.

> In other words, you just don't know what you're talking about. "volatile"
> is simply not useful, and the fact that you cannot seem to grasp that is
> _your_ problem.
>

Well, in fact I do know what I'm talking about. Your bait-and-switch
arguments just won't work with me.

> Repeat after me (or just shut up about things that you not only don't know
> about, but are apparently not willing to even learn):
>
> 	"'volatile' is useless. The things it did 30 years ago are much
> 	 more complex these days, and need to be tied to much more
> 	 detailed rules that depend on the actual particular problem,
> 	 rather than one keyword to the compiler that doesn't actually
> 	 give enough information for the compiler to do anything useful"
>
> Comprende?
>
> 		Linus
>

Cheers,
Dick Johnson
Penguin : Linux version 2.6.16.4 on an i686 machine (5592.89 BogoMips).
New book: http://www.AbominableFirebug.com/
_
\x1a\x04

****************************************************************
The information transmitted in this message is confidential and may be privileged.  Any review, retransmission, dissemination, or other use of this information by persons or entities other than the intended recipient is prohibited.  If you are not the intended recipient, please notify Analogic Corporation immediately - by replying to this message or by sending an email to DeliveryErrors@analogic.com - and destroy all copies of this information, including any attachments, without reading or disclosing them.

Thank you.

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-07 21:22                                               ` linux-os (Dick Johnson)
@ 2006-07-07 21:48                                                 ` Chase Venters
  2006-07-08 10:00                                                   ` Krzysztof Halasa
  2006-07-07 21:59                                                 ` Linus Torvalds
                                                                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 133+ messages in thread
From: Chase Venters @ 2006-07-07 21:48 UTC (permalink / raw)
  To: linux-os \\(Dick Johnson\\)
  Cc: Linus Torvalds, Krzysztof Halasa, Ingo Molnar, Andrew Morton,
	Linux kernel, arjan

On Fri, 7 Jul 2006, linux-os \(Dick Johnson\) wrote:

> Again, I didn't propose to do that. In fact, your spin-lock
> code already inserts "rep nops" and I never implied that they
> should be removed. I said only that "volatile" still needs to
> be used, not some macro that tells the compiler that everything
> in memory probably got trashed. Read what I said, not what you
> think some idiot might have said.
>

Dude, are you even paying attention? "volatile" very much does not need to 
be used (and as Linus points out, it is _wrong_). Since we're using GCC's 
inline asm syntax _already_, it is perfectly sufficient to use the same 
syntax to tell GCC that memory should be considered invalid.

Locks are supposed to be syncronization points, which is why they ALREADY 
HAVE "memory" on the clobber list! "memory" IS NECESSARY. The fact 
that "=m" is changing to "+m" in Linus's patches is because "=m" is in 
fact insufficient, because it would let the compiler believe we're just 
going to over-write the value. "volatile" merely hides that bug -- once 
that bug is _fixed_ (by going to "+m"), "volatile" is no longer useful. 
(It wasn't useful before, it just _papered over_ a problem).

If "volatile" is in use elsewhere (other than locks), it's still 
probably wrong. In these cases, you can use a barrier, a volatile cast, or 
an inline asm with a specific clobber.

Thanks,
Chase

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-07 21:22                                               ` linux-os (Dick Johnson)
  2006-07-07 21:48                                                 ` Chase Venters
@ 2006-07-07 21:59                                                 ` Linus Torvalds
  2006-07-07 22:06                                                   ` Linus Torvalds
  2006-07-08 20:49                                                   ` Pavel Machek
  2006-07-07 22:05                                                 ` J.A. Magallón
  2006-07-07 22:09                                                 ` Ingo Molnar
  3 siblings, 2 replies; 133+ messages in thread
From: Linus Torvalds @ 2006-07-07 21:59 UTC (permalink / raw)
  To: linux-os (Dick Johnson)
  Cc: Krzysztof Halasa, Ingo Molnar, Andrew Morton, Linux kernel, arjan



On Fri, 7 Jul 2006, linux-os (Dick Johnson) wrote:
>
> This is a bait and switch argument. The code was displayed to show
> the compiler output, not an example of good coding practice.

NO IT IS NOT.

The whole point of my argument is simple:

> > 	"'volatile' is useless. The things it did 30 years ago are much
> > 	 more complex these days, and need to be tied to much more
> > 	 detailed rules that depend on the actual particular problem,
> > 	 rather than one keyword to the compiler that doesn't actually
> > 	 give enough information for the compiler to do anything useful"

And dammit, if you cannot admit that, then you're not worth discussing 
with.

"volatile" is useless. It's a big hammer in a world where the nails aren't 
nails any more, they are screws, thumb-tacks, and spotwelding.

It still makes a difference for code generation, OF COURSE. But it's the 
wrong thing to use.

		Linus

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-07 21:22                                               ` linux-os (Dick Johnson)
  2006-07-07 21:48                                                 ` Chase Venters
  2006-07-07 21:59                                                 ` Linus Torvalds
@ 2006-07-07 22:05                                                 ` J.A. Magallón
  2006-07-07 22:22                                                   ` Chase Venters
  2006-07-07 23:36                                                   ` Davide Libenzi
  2006-07-07 22:09                                                 ` Ingo Molnar
  3 siblings, 2 replies; 133+ messages in thread
From: J.A. Magallón @ 2006-07-07 22:05 UTC (permalink / raw)
  To: linux-os (Dick Johnson), Linus Torvalds, linux-kernel

On Fri, 7 Jul 2006 17:22:31 -0400, "linux-os \(Dick Johnson\)" <linux-os@analogic.com> wrote:

> 
> On Fri, 7 Jul 2006, Linus Torvalds wrote:
> 
> >
> >
> > On Fri, 7 Jul 2006, linux-os (Dick Johnson) wrote:
> >>
> >> Now Linus declares that instead of declaring an object volatile
> >> so that it is actually accessed every time it is referenced, he wants
> >> to use a GNU-ism with assembly that tells the compiler to re-read
> >> __every__ variable existing im memory, instead of just one. Go figure!
> >
> > Actually, it's not just me.
> >
> > Read things like the Intel CPU documentation.
> >
> > IT IS ACTIVELY WRONG to busy-loop on a variable. It will make the CPU
> > potentially over-heat, causing degreaded performance, and you're simply
> > not supposed to do it.
> 
> This is a bait and switch argument. The code was displayed to show
> the compiler output, not an example of good coding practice.
> 

volatile means what it means, is usefull and is right. If it is used
in kernel for other things apart from what it was designed for it is
kernel or programmer responsibility. It does not mention nothing about
locking.

A more real example:

#include <stdint.h>

//volatile
uint32_t spinvar = 1;
uint32_t mtx;

void lock(uint32_t* l)
{   
    *l = 1;
}

void unlock(uint32_t* l)
{
    *l = 0;
}

void spin()
{
    uint32_t local;

    for (;;)
    {
        lock(&mtx);
        local = spinvar;
        unlock(&mtx);
        if (!local)
            break;
    }
}

without the volatile:

spin:
    pushl   %ebp
    movl    spinvar, %eax
    movl    %esp, %ebp
    testl   %eax, %eax
    je  .L7
.L10:
    jmp .L10
.L7:
    movl    $0, mtx
    popl    %ebp
    ret

so the compiler did something like

   local = spinvar;
   if (local)
	for (;;);

(notice the dead lock/unlock inlined code elimination).

With the volatile, the code is correct:

spin:
    pushl   %ebp
    movl    %esp, %ebp
    .p2align 4,,7
.L7:
    movl    spinvar, %eax
    testl   %eax, %eax
    jne .L7
    movl    $0, mtx
    popl    %ebp
    ret

So think about all you inlined spinlocks, mutexes and so on.

And if you do

void lock(volatile uint32_t* l)
...
void unlock(volatile uint32_t* l)
...

the code is even better:

spin:
    pushl   %ebp
    movl    %esp, %ebp
    .p2align 4,,7
.L7:
    movl    $1, mtx    <=========
    movl    spinvar, %eax
    movl    $0, mtx    <=========
    testl   %eax, %eax
    jne .L7
    popl    %ebp
    ret

So volatile just means 'dont trust this does not change even you don't see
why'.

--
J.A. Magallon <jamagallon()ono!com>     \               Software is like sex:
                                         \         It's better when it's free
Mandriva Linux release 2007.0 (Cooker) for i586
Linux 2.6.17-jam01 (gcc 4.1.1 20060518 (prerelease)) #2 SMP PREEMPT Wed

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-07 21:59                                                 ` Linus Torvalds
@ 2006-07-07 22:06                                                   ` Linus Torvalds
  2006-07-08 20:49                                                   ` Pavel Machek
  1 sibling, 0 replies; 133+ messages in thread
From: Linus Torvalds @ 2006-07-07 22:06 UTC (permalink / raw)
  To: linux-os (Dick Johnson)
  Cc: Krzysztof Halasa, Ingo Molnar, Andrew Morton, Linux kernel, arjan



On Fri, 7 Jul 2006, Linus Torvalds wrote:
> 
> It still makes a difference for code generation, OF COURSE. But it's the 
> wrong thing to use.

And if you don't realize that my argument wasn't "bait-and-switch", it's 
exactly the same thing. You pointed out a place where "volatile" would 
make the code "work".

AND I POINTED OUT THAT EVEN IN YOUR TRIVIAL EXAMPLE, VOLATILE WAS 
ACTUALLY THE WRONG THING TO DO.

And that's _exactly_ because the language environment (in this case the 
CPU itself) has evolved past the point it was 30 years ago.

And my point is that this is _always_ true. There are basically no valid 
uses where you can use "volatile" today, where there isn't some reason why 
you _should_ have done it another way entirely. Either you should have 
used proper locking, or you should have used the proper IO accessor 
functions, or you should have used something like "cpu_relax()", OR ANY 
NUMBER OF OTHER MECHANISMS.

(I did give a few examples of where "volatile" can be valid, but they are 
very limited)

Yes, "volatile" is convenient - at the cost of making the compiler 
generate crap code even when it shouldn't need to. Yes, "volatile" 
sometimes allows you to be lazy and not do the proper thing. Yes, 
"volatile" can hide bugs when you _tried_ to do the proper thing but 
screwed up.

But can't you see that _none_ of those 'Yes, "volatile" ...' actually 
means that you should _use_ "volatile". 

			Linus

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-07 21:22                                               ` linux-os (Dick Johnson)
                                                                   ` (2 preceding siblings ...)
  2006-07-07 22:05                                                 ` J.A. Magallón
@ 2006-07-07 22:09                                                 ` Ingo Molnar
  2006-07-08  7:36                                                   ` Ingo Molnar
  3 siblings, 1 reply; 133+ messages in thread
From: Ingo Molnar @ 2006-07-07 22:09 UTC (permalink / raw)
  To: linux-os (Dick Johnson)
  Cc: Linus Torvalds, Krzysztof Halasa, Andrew Morton, Linux kernel, arjan


* linux-os (Dick Johnson) <linux-os@analogic.com> wrote:

> >> Now Linus declares that instead of declaring an object volatile
> >> so that it is actually accessed every time it is referenced, he wants
> >> to use a GNU-ism with assembly that tells the compiler to re-read
> >> __every__ variable existing im memory, instead of just one. Go figure!
> >
> > Actually, it's not just me.
> >
> > Read things like the Intel CPU documentation.
> >
> > IT IS ACTIVELY WRONG to busy-loop on a variable. It will make the CPU
> > potentially over-heat, causing degreaded performance, and you're simply
> > not supposed to do it.
> 
> This is a bait and switch argument. [...]

how could it possibly be? Linus has simply replied to your point:

> > > If I have some code that does:
> > > 
> > > extern int spinner;
> > > 
> > > funct(){
> > >      while(spinner)
> > >            ;
> > >
> > > The 'C' compiler has no choice but to actually make that memory 
> > > access and read the variable [...]

Linus observed that the only possible purpose of the code you cited, 
busy-looping, makes no sense.

If you dont intend to busy-loop, why does the code above look like a 
busy-loop, and what do you need the volatile keyword for?

> [...] In fact, your spin-lock code already inserts "rep nops" and I 
> never implied that they should be removed. I said only that "volatile" 
> still needs to be used, not some macro that tells the compiler that 
> everything in memory probably got trashed. [...]

your position here does seem to make much sense to me, so please help me 
understand it. You suggest that the assembly code should be left alone. 
But then why do you need the volatile keyword to begin with?

> Well, in fact I do know what I'm talking about. Your bait-and-switch 
> arguments just won't work with me.

you seem to be quite self-confident. That is a nice thing to have for 
say a pro boxer, but it can be a disadvantage when dealing with a 
complex OS. Just curious, if you turn out to be wrong will you apologize 
for wasting Linus' (and others') time and for insulting him like that? 
(Or is this a totally impossible scenario not even worth discussing?)

> > Repeat after me (or just shut up about things that you not only don't know
> > about, but are apparently not willing to even learn):
> >
> > 	"'volatile' is useless. The things it did 30 years ago are much
> > 	 more complex these days, and need to be tied to much more
> > 	 detailed rules that depend on the actual particular problem,
> > 	 rather than one keyword to the compiler that doesn't actually
> > 	 give enough information for the compiler to do anything useful"
> >
> > Comprende?

here Linus claims that 'volatile' is useless and that the use of 
'volatile' is almost always a sign of bugs. That includes your 
code-samples too. So a proper answer on a technical forum like lkml 
would _not_ be to ignore Linus' point like you did, but to answer it 
directly. There are a couple of ways to answer it:

- you could explain why your sample code _does_ make sense. That's a 
  powerful way of making your point and you'll sure see an answer 
  either rebutting your point or conceding the point.

- or if you know that it makes no sense (minor nit: in which case you 
  should have pointed that out when writing them) then you could either
  correct it, or cite actual kernel code to which your point applies.

- an even better (and by far my most favorite) method of reply would be 
  if you sent an actual kernel patch (ontop of 2.6.18-rc1 plus my 
  volatile-removal patch) that implements your suggestions! That
  would be a perfect way of making your point - one line of patch is 
  worth a thousand words.

	Ingo

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-07 22:05                                                 ` J.A. Magallón
@ 2006-07-07 22:22                                                   ` Chase Venters
  2006-07-07 22:37                                                     ` J.A. Magallón
  2006-07-07 22:49                                                     ` J.A. Magallón
  2006-07-07 23:36                                                   ` Davide Libenzi
  1 sibling, 2 replies; 133+ messages in thread
From: Chase Venters @ 2006-07-07 22:22 UTC (permalink / raw)
  To: J.A. Magallón
  Cc: linux-os \(Dick Johnson\), Linus Torvalds, linux-kernel

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: TEXT/PLAIN; charset=X-UNKNOWN; format=flowed, Size: 2380 bytes --]

On Sat, 8 Jul 2006, J.A. Magallón wrote:

> #include <stdint.h>
>
> //volatile
> uint32_t spinvar = 1;
> uint32_t mtx;
>
> void lock(uint32_t* l)
> {
>    *l = 1;
> }
>
> void unlock(uint32_t* l)
> {
>    *l = 0;
> }
>
> void spin()
> {
>    uint32_t local;
>
>    for (;;)
>    {
>        lock(&mtx);
>        local = spinvar;
>        unlock(&mtx);
>        if (!local)
>            break;
>    }
> }

This is _totally_ incorrect. Your "lock" functions are broken, because 
they do not introduce syncronization points or locked bus operations. Due 
to this huge failure, the compiler and/or processor is free to re-order 
your loads and stores, resulting in totally unpredictable runtime 
behavior.

> without the volatile:
>
> spin:
>    pushl   %ebp
>    movl    spinvar, %eax
>    movl    %esp, %ebp
>    testl   %eax, %eax
>    je  .L7
> .L10:
>    jmp .L10
> .L7:
>    movl    $0, mtx
>    popl    %ebp
>    ret
>
> so the compiler did something like
>
>   local = spinvar;
>   if (local)
> 	for (;;);
>
> (notice the dead lock/unlock inlined code elimination).

...which indicates that your code is wrong.

> With the volatile, the code is correct:
>
> spin:
>    pushl   %ebp
>    movl    %esp, %ebp
>    .p2align 4,,7
> .L7:
>    movl    spinvar, %eax
>    testl   %eax, %eax
>    jne .L7
>    movl    $0, mtx
>    popl    %ebp
>    ret

Actually, it's not. It's never setting "mtx" to 1, and it's certainly not 
doing any sync or locked ops.

> So think about all you inlined spinlocks, mutexes and so on.

Yes, you got it wrong, and the current code gets it right. (Linus's patch 
of =m to +m, combined with -volatile, is best)

> And if you do
>
> void lock(volatile uint32_t* l)
> ...
> void unlock(volatile uint32_t* l)
> ...
>
> the code is even better:
>
> spin:
>    pushl   %ebp
>    movl    %esp, %ebp
>    .p2align 4,,7
> .L7:
>    movl    $1, mtx    <=========
>    movl    spinvar, %eax
>    movl    $0, mtx    <=========
>    testl   %eax, %eax
>    jne .L7
>    popl    %ebp
>    ret

NO! It's not better. You're still not syncing or locking the bus! If you 
refer to the fact that the "movl $1" has magically appeared, that's 
because you've just PAPERED OVER THE PROBLEM WITH "volatile", which is 
_exactly_ what Linus is telling you NOT TO DO.

> So volatile just means 'dont trust this does not change even you don't see
> why'.
>

No.

Thanks,
Chase

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-07 22:22                                                   ` Chase Venters
@ 2006-07-07 22:37                                                     ` J.A. Magallón
  2006-07-08  9:33                                                       ` David Schwartz
  2006-07-07 22:49                                                     ` J.A. Magallón
  1 sibling, 1 reply; 133+ messages in thread
From: J.A. Magallón @ 2006-07-07 22:37 UTC (permalink / raw)
  To: Chase Venters; +Cc: linux-os \(Dick Johnson\), Linus Torvalds, linux-kernel

On Fri, 7 Jul 2006 17:22:22 -0500 (CDT), Chase Venters <chase.venters@clientec.com> wrote:

> On Sat, 8 Jul 2006, J.A. Magallón wrote:
> 
> > #include <stdint.h>
> >
> > //volatile
> > uint32_t spinvar = 1;
> > uint32_t mtx;
> >
> > void lock(uint32_t* l)
> > {
> >    *l = 1;
> > }
> >
> > void unlock(uint32_t* l)
> > {
> >    *l = 0;
> > }
> >
> > void spin()
> > {
> >    uint32_t local;
> >
> >    for (;;)
> >    {
> >        lock(&mtx);
> >        local = spinvar;
> >        unlock(&mtx);
> >        if (!local)
> >            break;
> >    }
> > }
> 
> This is _totally_ incorrect. Your "lock" functions are broken, because 
> they do not introduce syncronization points or locked bus operations. 

But why should I do that ? I write C, a high level language, and I don't
mind about buses or whatever. I just have to know that a 32 bit store is
atomic in a 32bit arch. There is a nice high level and portable language
feature to say 'reload this variable here'. And it lets the compiler do its
optimization work as it best can asuming it has to reload that variable.
Instead, you prefer to lock all the bus for that.
Kernel developers spent a full release to get rid of
the 'big kernel lock'. Perhaps there is another release needed to get
rid of the 'big memory barrier'.

> Due 
> to this huge failure, the compiler and/or processor is free to re-order 
> your loads and stores, resulting in totally unpredictable runtime 
> behavior.

Nope. If I want it not to reorder, put the volatile in the parameters.
So I just invalidate that variable, not everything.

> 
> > without the volatile:
> >
> > spin:
> >    pushl   %ebp
> >    movl    spinvar, %eax
> >    movl    %esp, %ebp
> >    testl   %eax, %eax
> >    je  .L7
> > .L10:
> >    jmp .L10
> > .L7:
> >    movl    $0, mtx
> >    popl    %ebp
> >    ret
> >
> > so the compiler did something like
> >
> >   local = spinvar;
> >   if (local)
> > 	for (;;);
> >
> > (notice the dead lock/unlock inlined code elimination).
> 
> ...which indicates that your code is wrong.

My code probably is wrong without the volatile, but the compiler did the
right(tm) thing.

> 
> > With the volatile, the code is correct:
> >
> > spin:
> >    pushl   %ebp
> >    movl    %esp, %ebp
> >    .p2align 4,,7
> > .L7:
> >    movl    spinvar, %eax
> >    testl   %eax, %eax
> >    jne .L7
> >    movl    $0, mtx
> >    popl    %ebp
> >    ret
> 
> Actually, it's not. It's never setting "mtx" to 1,

What is correct regarding the code I gave it.
Inlined code is:

    for (;;)
    {
        mtx = 1;
        local = spinvar;
        mtx = 0;
        if (!local)
            break;
    }

I just change mtx and don't use it for anything. So the compiler is free to
optimize out it.
If I don't want the compiler to kill it, I put the volatile.

> and it's certainly not 
> doing any sync or locked ops.
> 
> > So think about all you inlined spinlocks, mutexes and so on.
> 
> Yes, you got it wrong, and the current code gets it right. (Linus's patch 
> of =m to +m, combined with -volatile, is best)

Nope, I did the same thing with a high level and portable way.
Its like the choice between writing inlined SSE assembler or using the
vector and +,-,* builtins in gcc.

> 
> > And if you do
> >
> > void lock(volatile uint32_t* l)
> > ...
> > void unlock(volatile uint32_t* l)
> > ...
> >
> > the code is even better:
> >
> > spin:
> >    pushl   %ebp
> >    movl    %esp, %ebp
> >    .p2align 4,,7
> > .L7:
> >    movl    $1, mtx    <=========
> >    movl    spinvar, %eax
> >    movl    $0, mtx    <=========
> >    testl   %eax, %eax
> >    jne .L7
> >    popl    %ebp
> >    ret
> 
> NO! It's not better. You're still not syncing or locking the bus! If you 
> refer to the fact that the "movl $1" has magically appeared, that's 
> because you've just PAPERED OVER THE PROBLEM WITH "volatile", which is 
> _exactly_ what Linus is telling you NOT TO DO.
> 
> > So volatile just means 'dont trust this does not change even you don't see
> > why'.
> >
> 
> No.
> 
> Thanks,
> Chase


--
J.A. Magallon <jamagallon()ono!com>     \               Software is like sex:
                                         \         It's better when it's free
Mandriva Linux release 2007.0 (Cooker) for i586
Linux 2.6.17-jam01 (gcc 4.1.1 20060518 (prerelease)) #2 SMP PREEMPT Wed

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-07 22:22                                                   ` Chase Venters
  2006-07-07 22:37                                                     ` J.A. Magallón
@ 2006-07-07 22:49                                                     ` J.A. Magallón
  2006-07-07 22:59                                                       ` Vadim Lobanov
  2006-07-07 23:18                                                       ` Chase Venters
  1 sibling, 2 replies; 133+ messages in thread
From: J.A. Magallón @ 2006-07-07 22:49 UTC (permalink / raw)
  To: Chase Venters; +Cc: linux-os \(Dick Johnson\), Linus Torvalds, linux-kernel

On Fri, 7 Jul 2006 17:22:22 -0500 (CDT), Chase Venters <chase.venters@clientec.com> wrote:

> > .L7:
> >    movl    $1, mtx    <=========
> >    movl    spinvar, %eax
> >    movl    $0, mtx    <=========
> >    testl   %eax, %eax
> >    jne .L7
> >    popl    %ebp
> >    ret
> 
> NO! It's not better. You're still not syncing or locking the bus! If you 
> refer to the fact that the "movl $1" has magically appeared, that's 
> because you've just PAPERED OVER THE PROBLEM WITH "volatile", which is 
> _exactly_ what Linus is telling you NOT TO DO.
> 

BTW, I really don't mind if a given architecnture has to lock the bus or
say a prayer to Budha to reload a variable. I want it to be reloaded at
every (or a certain, in case of a (volatile)mtx cast) usage. The compiler
is the responsible of knowing what to do. What if nextgen P4 Xeon do not
need a bus lock ? Will you rewrite the kernel ?

--
J.A. Magallon <jamagallon()ono!com>     \               Software is like sex:
                                         \         It's better when it's free
Mandriva Linux release 2007.0 (Cooker) for i586
Linux 2.6.17-jam01 (gcc 4.1.1 20060518 (prerelease)) #2 SMP PREEMPT Wed

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-07 22:49                                                     ` J.A. Magallón
@ 2006-07-07 22:59                                                       ` Vadim Lobanov
  2006-07-07 23:18                                                       ` Chase Venters
  1 sibling, 0 replies; 133+ messages in thread
From: Vadim Lobanov @ 2006-07-07 22:59 UTC (permalink / raw)
  To: J.A. Magallón
  Cc: Chase Venters, linux-os \\(Dick Johnson\\), Linus Torvalds, linux-kernel

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: TEXT/PLAIN; charset=X-UNKNOWN, Size: 1535 bytes --]

On Sat, 8 Jul 2006, J.A. [UTF-8] Magallón wrote:

> On Fri, 7 Jul 2006 17:22:22 -0500 (CDT), Chase Venters <chase.venters@clientec.com> wrote:
>
> > > .L7:
> > >    movl    $1, mtx    <=========
> > >    movl    spinvar, %eax
> > >    movl    $0, mtx    <=========
> > >    testl   %eax, %eax
> > >    jne .L7
> > >    popl    %ebp
> > >    ret
> >
> > NO! It's not better. You're still not syncing or locking the bus! If you
> > refer to the fact that the "movl $1" has magically appeared, that's
> > because you've just PAPERED OVER THE PROBLEM WITH "volatile", which is
> > _exactly_ what Linus is telling you NOT TO DO.
> >
>
> BTW, I really don't mind if a given architecnture has to lock the bus or
> say a prayer to Budha to reload a variable. I want it to be reloaded at
> every (or a certain, in case of a (volatile)mtx cast) usage. The compiler
> is the responsible of knowing what to do. What if nextgen P4 Xeon do not
> need a bus lock ? Will you rewrite the kernel ?

Looks like you need to read Documentation/memory-barriers.txt. That file
explains why the above assembly code is not correct.

Bonus question: what stops the processor from coalescing or rearranging
the three movl instructions in that assembly?

> --
> J.A. Magallon <jamagallon()ono!com>     \               Software is like sex:
>                                          \         It's better when it's free
> Mandriva Linux release 2007.0 (Cooker) for i586
> Linux 2.6.17-jam01 (gcc 4.1.1 20060518 (prerelease)) #2 SMP PREEMPT Wed

-- Vadim Lobanov

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-07 19:51                                           ` linux-os (Dick Johnson)
  2006-07-07 20:25                                             ` Linus Torvalds
  2006-07-07 20:39                                             ` Krzysztof Halasa
@ 2006-07-07 23:06                                             ` Björn Steinbrink
  2006-07-08  8:36                                             ` Avi Kivity
  3 siblings, 0 replies; 133+ messages in thread
From: Björn Steinbrink @ 2006-07-07 23:06 UTC (permalink / raw)
  To: linux-os (Dick Johnson)
  Cc: Krzysztof Halasa, Linus Torvalds, Ingo Molnar, Andrew Morton,
	linux-kernel, arjan

On 2006.07.07 15:51:11 -0400, linux-os (Dick Johnson) wrote:
> 
> On Fri, 7 Jul 2006, Krzysztof Halasa wrote:
> 
> > "linux-os \(Dick Johnson\)" <linux-os@analogic.com> writes:
> >
> >> extern int spinner;
> >>
> >> funct(){
> >>      while(spinner)
> >>          ;
> >>
> >> The 'C' compiler has no choice but to actually make that memory access
> >> and read the variable because the variable is in another module (a.k.a.
> >> file).
> >
> > defiant:/tmp/khc$ gcc --version
> > gcc (GCC) 4.1.1 20060525 (Red Hat 4.1.1-1)
> > defiant:/tmp/khc$ cat test.c
> > extern int spinner;
> >
> > void funct(void)
> > {
> >     while(spinner)
> >         ;
> > }
> > defiant:/tmp/khc$ gcc -Wall -O2 -c test.c
> > defiant:/tmp/khc$ objdump -d test.o
> >
> > test.o:     file format elf32-i386
> >
> > Disassembly of section .text:
> >
> > 00000000 <funct>:
> >   0:   a1 00 00 00 00          mov    0x0,%eax
> >   5:   55                      push   %ebp
> >   6:   89 e5                   mov    %esp,%ebp
> >   8:   85 c0                   test   %eax,%eax
> >   a:   8d b6 00 00 00 00       lea    0x0(%esi),%esi
> >  10:   75 fe                   jne    10 <funct+0x10>
> >  12:   5d                      pop    %ebp
> >  13:   c3                      ret
> >
> > "0x0" is, of course, for relocation.
> 
> 
> So read the code; you have "10:   jne 10", jumping to itself
> forever, without even doing anything else to set the flags, much
> less reading a variable.

Well, you said that the compiler is forced to make the memory access,
given the topic of this discussion probably noone assumed that you meant
exactly one memory access. Of course that code is broken, but you seemed
to say that it is not.

> >> However, if I have the same code, but the variable is visible during
> >> compile time, i.e.,
> >>
> >> int spinner=0;
> >>
> >> funct(){
> >>      while(spinner)
> >>          ;
> >>
> >> ... the compiler may eliminate that code altogether because it
> >> 'knows' that spinner is FALSE, having initialized it to zero
> >> itself.
> >
> > defiant:/tmp/khc$ cat test.c
> > int spinner = 0;
> >
> > void funct(void)
> > {
> >     while(spinner)
> >         ;
> > }
> > defiant:/tmp/khc$ gcc -Wall -O2 -c test.c
> > defiant:/tmp/khc$ objdump -d test.o
> >
> > 00000000 <funct>:
> >   0:   a1 00 00 00 00          mov    0x0,%eax
> >   5:   55                      push   %ebp
> >   6:   89 e5                   mov    %esp,%ebp
> >   8:   85 c0                   test   %eax,%eax
> >   a:   8d b6 00 00 00 00       lea    0x0(%esi),%esi
> >  10:   75 fe                   jne    10 <funct+0x10>
> >  12:   5d                      pop    %ebp
> >  13:   c3                      ret
> >
> 
> Then, you have exactly the same thing here:
>    10:   75 fe                   jne    10 <funct+0x10>
> 
> Same bad code.

Which proves you wrong, you said the compiler would optimize it away o.O

> >> Since spinner is global in scope, somebody surely could have
> >> changed it before funct() was even called, but the current gcc
> >> 'C' compiler doesn't care and may optimize it away entirely.
> >
> > Personally I don't think such C compiler even existed. HISOFT C
> > on ZX Spectrum could be a good candidate but I think it didn't
> > have any optimizer :-)
> >
> >> To
> >> prevent this, you must declare the variable volatile. To do
> >> otherwise is a bug.
> >
> > Nope. Volatile just means that every read (and write) must actually
> > access the variable. Note that the compiler optimized out accesses
> > to the variable in the loop - while it has to check at the beginning
> > of funct(), it knows that the variable is constant through funct().
> >
> > Note that "volatile" is not exactly what we usually want, but it
> > does the job (i.e., the program doesn't crash, but the code is
> > probably suboptimal).
> >
> >> That said, I think that the current
> >> implementation of 'volatile' is broken because the compiler
> >> seems to believe that the variable has moved! It recalculates
> >> the address of the variable as well as accessing its value.
> >> This is what makes the code generation problematical.
> >
> > You must be using a heavily broken compiler:
> >
> > defiant:/tmp/khc$ cat test.c
> > volatile int spinner = 0;
> >
> > void funct(void)
> > {
> >     while(spinner)
> >         ;
> > }
> > defiant:/tmp/khc$ gcc -Wall -O2 -c test.c
> > defiant:/tmp/khc$ objdump -d test.o
> >
> > 00000000 <funct>:
> >   0:   55                      push   %ebp
> >   1:   89 e5                   mov    %esp,%ebp
> >   3:   a1 00 00 00 00          mov    0x0,%eax
> >   8:   85 c0                   test   %eax,%eax
> >   a:   75 f7                   jne    3 <funct+0x3>
> >   c:   5d                      pop    %ebp
> >   d:   c3                      ret
> 
> This is the only code that works. Guess why it worked? Because
> you declared the variable volatile.

The inline assembly works as well, it is made volatile and gcc will not
mess with it.

> Now Linus declares that instead of declaring an object volatile
> so that it is actually accessed every time it is referenced, he wants
> to use a GNU-ism with assembly that tells the compiler to re-read
> __every__ variable existing im memory, instead of just one. Go figure!

That wasn't Linus. Arjan suggested using barrier() in the right places.
What Linus did was just pointing out that volatile is the wrong thing to
use and that some inline assembly code had "=m" outputs instead of "+m".

The memory clobber was already there for the locking primitives and
AFAICT it is required.

lock(foo_lock);
foo = 5;
unlock(foo_lock);

// do something else

lock(foo_lock);
while (foo = 5);
unlock(foo_lock);

foo might have changed while the cpu was doing something else, but
without the memory clobber, the compiler might assume that foo is
unchanged. Making foo_lock volatile won't help here and making
everything that requires a lock volatile will get you nothing but crappy
code  (but hey, you can avoid the memory clobber!).

> Reference:
> /usr/src/linux-2.6.16.4/include/linux/compiler-gcc.h:
> #define barrier() __asm__ __volatile__("": : :"memory")

Björn

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-07 22:49                                                     ` J.A. Magallón
  2006-07-07 22:59                                                       ` Vadim Lobanov
@ 2006-07-07 23:18                                                       ` Chase Venters
  1 sibling, 0 replies; 133+ messages in thread
From: Chase Venters @ 2006-07-07 23:18 UTC (permalink / raw)
  To: J.A. Magallón
  Cc: linux-os \(Dick Johnson\), Linus Torvalds, linux-kernel

On Friday 07 July 2006 17:49, J.A. Magallón wrote:
> On Fri, 7 Jul 2006 17:22:22 -0500 (CDT), Chase Venters 
<chase.venters@clientec.com> wrote:
> > > .L7:
> > >    movl    $1, mtx    <=========
> > >    movl    spinvar, %eax
> > >    movl    $0, mtx    <=========
> > >    testl   %eax, %eax
> > >    jne .L7
> > >    popl    %ebp
> > >    ret
> >
> > NO! It's not better. You're still not syncing or locking the bus! If you
> > refer to the fact that the "movl $1" has magically appeared, that's
> > because you've just PAPERED OVER THE PROBLEM WITH "volatile", which is
> > _exactly_ what Linus is telling you NOT TO DO.
>
> BTW, I really don't mind if a given architecnture has to lock the bus or
> say a prayer to Budha to reload a variable. I want it to be reloaded at
> every (or a certain, in case of a (volatile)mtx cast) usage. The compiler
> is the responsible of knowing what to do. What if nextgen P4 Xeon do not
> need a bus lock ? Will you rewrite the kernel ?

You are missing the point. Your code is obviously trying to implement locks, 
which is why you've called your functions lock() and unlock(). It is 
impossible, on any architecture, to implement correct locks in pure C. Your 
locks are incapable of working in the kernel _or_ in user space.

Barriers are a different but related point. If you want to know more about 
barriers, and why they exist, please read Documentation/memory-barriers.txt 
in full. (FYI, Locked bus operations on x86 imply memory barriers.)

Locks are supposed to prevent sections of your code from being interrupted by 
something that touches the same data. Because both the compiler and the 
processor reserve the right to re-order your loads and stores, any lock that 
does not form a memory barrier both for the CPU and for the compiler is 
broken. Because you cannot form a CPU memory barrier from C (much less the 
ATOMIC COMPARE AND SWAP you need to implement a lock), any lock implemented 
in pure C is broken by definition. 

Your code is wrong. Barring some fundamental change to the way things work, 
locks and/or barriers are always going to be necessary.

Your usage of 'volatile' of course changes the generated code output. It will 
force the compiler to reload the variable. The problem with your lock code is 
that reloading the variable _IS NOT ENOUGH_. Hence your usage of volatile is 
a broken way to paper over the problem.

'volatile' in general, for the kernel, is wrong because it isn't specific 
enough and it causes the compiler to generate bad code.

But hey, maybe some day we'll all be proven wrong. Some magical new computer 
architecture will appear, and on that day, we will rewrite the kernel.

Thanks,
Chase

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-07 22:05                                                 ` J.A. Magallón
  2006-07-07 22:22                                                   ` Chase Venters
@ 2006-07-07 23:36                                                   ` Davide Libenzi
  1 sibling, 0 replies; 133+ messages in thread
From: Davide Libenzi @ 2006-07-07 23:36 UTC (permalink / raw)
  To: J.A. Magallón
  Cc: linux-os (Dick Johnson), Linus Torvalds, Linux Kernel Mailing List

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: TEXT/PLAIN; CHARSET=X-UNKNOWN; format=flowed, Size: 1869 bytes --]

On Sat, 8 Jul 2006, J.A. Magallón wrote:

> On Fri, 7 Jul 2006 17:22:31 -0400, "linux-os \(Dick Johnson\)" <linux-os@analogic.com> wrote:
>
>>
>> On Fri, 7 Jul 2006, Linus Torvalds wrote:
>>
>>>
>>> On Fri, 7 Jul 2006, linux-os (Dick Johnson) wrote:
>>>>
>>>> Now Linus declares that instead of declaring an object volatile
>>>> so that it is actually accessed every time it is referenced, he wants
>>>> to use a GNU-ism with assembly that tells the compiler to re-read
>>>> __every__ variable existing im memory, instead of just one. Go figure!
>>>
>>> Actually, it's not just me.
>>>
>>> Read things like the Intel CPU documentation.
>>>
>>> IT IS ACTIVELY WRONG to busy-loop on a variable. It will make the CPU
>>> potentially over-heat, causing degreaded performance, and you're simply
>>> not supposed to do it.
>>
>> This is a bait and switch argument. The code was displayed to show
>> the compiler output, not an example of good coding practice.
>>
>
> volatile means what it means, is usefull and is right. If it is used
> in kernel for other things apart from what it was designed for it is
> kernel or programmer responsibility. It does not mention nothing about
> locking.

(looking at your code ...)
I think you guys mixed the concepts about *if* a memory access happens 
(volatile), and *where* the memory access happens (barrier).
As far as kernel coding goes (or MT userspace), if you happen to care *if*
a memory access happens, you probably want to care even *where* the memory 
access happens. And modern CPUs and compilers do not respect the WYSIWYG 
property ;)
This is not always true (*if* -> *where*), but it's very frequent.
And using "volatile" can make your code work in some cases, and misbehave 
in others.
Can we now all move on to a more refreshing "C++ kernel rewrite" thread :)



- Davide

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-07 22:09                                                 ` Ingo Molnar
@ 2006-07-08  7:36                                                   ` Ingo Molnar
  0 siblings, 0 replies; 133+ messages in thread
From: Ingo Molnar @ 2006-07-08  7:36 UTC (permalink / raw)
  To: linux-os (Dick Johnson)
  Cc: Linus Torvalds, Krzysztof Halasa, Andrew Morton, Linux kernel, arjan


* Ingo Molnar <mingo@elte.hu> wrote:

> > [...] In fact, your spin-lock code already inserts "rep nops" and I 
> > never implied that they should be removed. I said only that "volatile" 
> > still needs to be used, not some macro that tells the compiler that 
> > everything in memory probably got trashed. [...]
> 
> your position here does seem to make much sense to me, so please help me 
                         ^--- not
> understand it. You suggest that the assembly code should be left alone. 
> But then why do you need the volatile keyword to begin with?

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-07 19:51                                           ` linux-os (Dick Johnson)
                                                               ` (2 preceding siblings ...)
  2006-07-07 23:06                                             ` Björn Steinbrink
@ 2006-07-08  8:36                                             ` Avi Kivity
  3 siblings, 0 replies; 133+ messages in thread
From: Avi Kivity @ 2006-07-08  8:36 UTC (permalink / raw)
  To: linux-os (Dick Johnson)
  Cc: Krzysztof Halasa, Linus Torvalds, Ingo Molnar, Andrew Morton,
	linux-kernel, arjan

linux-os (Dick Johnson) wrote:
>
> >> The 'C' compiler has no choice but to actually make that memory access
> >> and read the variable because the variable is in another module 
> (a.k.a.
> >> file).
>

[code showing the compiler may cache the access]

> So read the code; you have "10:   jne 10", jumping to itself
> forever, without even doing anything else to set the flags, much
> less reading a variable.
>

Short attention span, eh? He's proven you wrong and you go on and talk 
about something else.

> >
> >> However, if I have the same code, but the variable is visible during
> >> compile time, i.e.,
> >>
> >> int spinner=0;
> >>
> >> funct(){
> >>      while(spinner)
> >>          ;
> >>
> >> ... the compiler may eliminate that code altogether because it
> >> 'knows' that spinner is FALSE, having initialized it to zero
> >> itself.
> >
>

[code showing that defining the variable in the same translation unit 
makes no difference]

> Then, you have exactly the same thing here:
>    10:   75 fe                   jne    10 <funct+0x10>
>
> Same bad code.
>

You seem to have forgotten that you claimed different code would be 
generated.

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-06 19:15                                       ` Linus Torvalds
  2006-07-06 19:33                                         ` Chris Friesen
@ 2006-07-08  8:40                                         ` Avi Kivity
  2006-07-08  8:51                                           ` Arjan van de Ven
  1 sibling, 1 reply; 133+ messages in thread
From: Avi Kivity @ 2006-07-08  8:40 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Mark Lord, Arjan van de Ven, linux-os (Dick Johnson),
	Ingo Molnar, Andrew Morton, linux-kernel

Linus Torvalds wrote:
> On Thu, 6 Jul 2006, Mark Lord wrote:
>
> >
> > I'm still browsing a copy here, but so far have only really found this:
> >
> >  A volatile declaration may be used to describe an object corresponding
> >  to a memory-mapped input/output port or an object accessed by an
> >  aysnchronously interrupting function.  Actions on objects so declared
> >  shall not be "optimized out" by an implementation or reordered except
> >  as permitted by the rules for evaluating expressions.
>
> Note that the "reordered" is totally pointless.
>
> The _hardware_ will re-order accesses. Which is the whole point.
> "volatile" is basically never sufficient in itself.
>
> So the definition of "volatile" literally made sense three or four 
> decades
> ago. It's not sensible any more.            
>

It could be argued that gcc's implementation of volatile is wrong, and 
that gcc should add the appropriate serializing instructions before and 
after volatile accesses.

Of course, that would make volatile even more suboptimal, but at least 
correct.

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-08  8:40                                         ` Avi Kivity
@ 2006-07-08  8:51                                           ` Arjan van de Ven
  2006-07-08  9:20                                             ` Avi Kivity
  0 siblings, 1 reply; 133+ messages in thread
From: Arjan van de Ven @ 2006-07-08  8:51 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Linus Torvalds, Mark Lord, linux-os (Dick Johnson),
	Ingo Molnar, Andrew Morton, linux-kernel

On Sat, 2006-07-08 at 11:40 +0300, Avi Kivity wrote:
> Linus Torvalds wrote:
> > On Thu, 6 Jul 2006, Mark Lord wrote:
> >
> > >
> > > I'm still browsing a copy here, but so far have only really found this:
> > >
> > >  A volatile declaration may be used to describe an object corresponding
> > >  to a memory-mapped input/output port or an object accessed by an
> > >  aysnchronously interrupting function.  Actions on objects so declared
> > >  shall not be "optimized out" by an implementation or reordered except
> > >  as permitted by the rules for evaluating expressions.
> >
> > Note that the "reordered" is totally pointless.
> >
> > The _hardware_ will re-order accesses. Which is the whole point.
> > "volatile" is basically never sufficient in itself.
> >
> > So the definition of "volatile" literally made sense three or four 
> > decades
> > ago. It's not sensible any more.            
> >
> 
> It could be argued that gcc's implementation of volatile is wrong, and 
> that gcc should add the appropriate serializing instructions before and 
> after volatile accesses.
> 
> Of course, that would make volatile even more suboptimal, but at least 
> correct.

with PCI, and the PCI posting rules, there is no "one" serializing
instruction, you need to know the specifics of the device in question to
cause the flush. So at least there is no universal possible
implementation of volatile as you suggest ;-)



^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-08  8:51                                           ` Arjan van de Ven
@ 2006-07-08  9:20                                             ` Avi Kivity
  2006-07-08  9:51                                               ` Arjan van de Ven
  0 siblings, 1 reply; 133+ messages in thread
From: Avi Kivity @ 2006-07-08  9:20 UTC (permalink / raw)
  To: Arjan van de Ven
  Cc: Linus Torvalds, Mark Lord, linux-os (Dick Johnson),
	Ingo Molnar, Andrew Morton, linux-kernel

Arjan van de Ven wrote:
>
> >
> > It could be argued that gcc's implementation of volatile is wrong, and
> > that gcc should add the appropriate serializing instructions before and
> > after volatile accesses.
> >
> > Of course, that would make volatile even more suboptimal, but at least
> > correct.
>
> with PCI, and the PCI posting rules, there is no "one" serializing
> instruction, you need to know the specifics of the device in question to
> cause the flush. So at least there is no universal possible
> implementation of volatile as you suggest ;-)
>

A serializing volatile makes it possible to write portable code to 
access pci mmio.  You'd just follow a write with a read or whatever the 
rules say.

Of course, it would still generate horrible code, and would still be 
unable to express notions like atomic accesses, so there is not much 
point in it.

One point which was raised, is that optimization barriers also somewhat 
pessimize the code. I wonder how useful a partial memory clobber could be:

  #define spin_lock_data(lock, lock_protected_data...) \
      do { __spin_lock(lock); asm volatile ( "" : : : 
"memory"(lock_protected_data) ); } while(0)

Where __spin_lock has a partial memory clobber only on the lock variable 
itself.

It would take a lot of work, but it can eliminate the instructions to 
save and reload registers.

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


^ permalink raw reply	[flat|nested] 133+ messages in thread

* RE: [patch] spinlocks: remove 'volatile'
  2006-07-07 22:37                                                     ` J.A. Magallón
@ 2006-07-08  9:33                                                       ` David Schwartz
  0 siblings, 0 replies; 133+ messages in thread
From: David Schwartz @ 2006-07-08  9:33 UTC (permalink / raw)
  To: jamagallon; +Cc: linux-kernel


> > This is _totally_ incorrect. Your "lock" functions are broken, because 
> > they do not introduce syncronization points or locked bus operations. 
 
> But why should I do that ? I write C, a high level language, and I don't
> mind about buses or whatever. I just have to know that a 32 bit store is
> atomic in a 32bit arch. There is a nice high level and portable language
> feature to say 'reload this variable here'. And it lets the 
> compiler do its
> optimization work as it best can asuming it has to reload that variable.
> Instead, you prefer to lock all the bus for that.
> Kernel developers spent a full release to get rid of
> the 'big kernel lock'. Perhaps there is another release needed to get
> rid of the 'big memory barrier'.

	Wow, no. Not even close.

	If you need a full memory barrier because you want general lock/mutex semantics, then you need it and can't avoid it. If you just need atomic 32-bit operations, and you know that the platform has them, just use them directly with inline assembly.

	The proof that 'volatile' does not solve the problem is:

volatile int i;
i=i+1;

	Is this an atomic operation or not? Just because the platform has an atomic operation that will do this, does 'volatile' guarantee that I get it?

	If you are programming in a high-level language and need the atomicity guarantees that particular assembly instructions are known to give you, then you *must* specify those instructions. The 'volatile' keyword has *never* guaranteed atomicity even where such atomicity is possible.

	So both parts of your argument is wrong. The alternative to 'volatile' is not invalidating memory, we only invalidate memory when those are the specific semantics we need. And volatile is neither necessary nor sufficient to get atomicity when that is possible on the platform.

	You are actively advocating for coding practices that are known (from years of painful experience) to be disastrous.

> BTW, I really don't mind if a given architecnture has to lock the bus or
> say a prayer to Budha to reload a variable. I want it to be reloaded at
> every (or a certain, in case of a (volatile)mtx cast) usage. The compiler
> is the responsible of knowing what to do. What if nextgen P4 Xeon do not
> need a bus lock ? Will you rewrite the kernel ?

	What if the nextgen P4 Xeon needs something else in addition to what's needed to get the guaranteed semantics of 'volatile' (which are just signals and longjmp on a single CPU). Suppose it required something very expensive when accesses might occur on another CPU that wasn't needed for any of the defined uses of 'volatile'. Why would gcc provide that and slow down all the programs that use 'volatile' correctly?

	The whole problem with your use of 'volatile' that it's in the 'just happens to work' category. It just happens to work because what's needed for signals and longjmp is also what's needed for SMP. If something extre were needed for SMP that wasn't needed for the defined uses of 'volatile', you'd be screwed. That's why 'volatile' does *not* put in memory barriers by default, even though they're arguably needed if 'volatile' were supposed to prevent reordering of operations (which for locks is what we need!).

	DS



^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-08  9:20                                             ` Avi Kivity
@ 2006-07-08  9:51                                               ` Arjan van de Ven
  2006-07-08 10:18                                                 ` Avi Kivity
  0 siblings, 1 reply; 133+ messages in thread
From: Arjan van de Ven @ 2006-07-08  9:51 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Linus Torvalds, Mark Lord, linux-os (Dick Johnson),
	Ingo Molnar, Andrew Morton, linux-kernel

On Sat, 2006-07-08 at 12:20 +0300, Avi Kivity wrote:
> Arjan van de Ven wrote:
> >
> > >
> > > It could be argued that gcc's implementation of volatile is wrong, and
> > > that gcc should add the appropriate serializing instructions before and
> > > after volatile accesses.
> > >
> > > Of course, that would make volatile even more suboptimal, but at least
> > > correct.
> >
> > with PCI, and the PCI posting rules, there is no "one" serializing
> > instruction, you need to know the specifics of the device in question to
> > cause the flush. So at least there is no universal possible
> > implementation of volatile as you suggest ;-)
> >
> 
> A serializing volatile makes it possible to write portable code to 
> access pci mmio.  You'd just follow a write with a read or whatever the 
> rules say.

yeah except that the compiler cannot know what to read; reading back the
same memory location is NOT correct nor safe. It's device specific, for
some devices it'll be safe, for others you have to read some OTHER
location.



^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-07 21:48                                                 ` Chase Venters
@ 2006-07-08 10:00                                                   ` Krzysztof Halasa
  2006-07-08 13:41                                                     ` Chase Venters
  0 siblings, 1 reply; 133+ messages in thread
From: Krzysztof Halasa @ 2006-07-08 10:00 UTC (permalink / raw)
  To: Chase Venters
  Cc: linux-os \\(Dick Johnson\\),
	Linus Torvalds, Ingo Molnar, Andrew Morton, Linux kernel, arjan

Chase Venters <chase.venters@clientec.com> writes:

> Locks are supposed to be syncronization points, which is why they
> ALREADY HAVE "memory" on the clobber list! "memory" IS NECESSARY. The
> fact that "=m" is changing to "+m" in Linus's patches is because "=m"
> is in fact insufficient, because it would let the compiler believe
> we're just going to over-write the value. "volatile" merely hides that
> bug -- once that bug is _fixed_ (by going to "+m"), "volatile" is no
> longer useful.

This is a completely different story. "volatile", barrier() and "+m"/"=m"
aren't sync points. If the variable access isn't atomic you must use
locking even with volatiles, barriers etc.

> If "volatile" is in use elsewhere (other than locks), it's still
> probably wrong. In these cases, you can use a barrier, a volatile
> cast, or an inline asm with a specific clobber.

A volatile cast is just a volatile, moved from data declaration to
all access points. It doesn't buy you anything.
barrier() is basically "all-volatile". All of them operate on the same,
compiler level.

If the "volatile" is used the wrong way (which is probably true for most
cases), then volatile cast and barrier() will be wrong as well. You need
locks or atomic access, meaningful on hardware level.
-- 
Krzysztof Halasa

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-08  9:51                                               ` Arjan van de Ven
@ 2006-07-08 10:18                                                 ` Avi Kivity
  2006-07-08 10:28                                                   ` Thomas Gleixner
  2006-07-09  4:19                                                   ` Benjamin Herrenschmidt
  0 siblings, 2 replies; 133+ messages in thread
From: Avi Kivity @ 2006-07-08 10:18 UTC (permalink / raw)
  To: Arjan van de Ven
  Cc: Linus Torvalds, Mark Lord, linux-os (Dick Johnson),
	Ingo Molnar, Andrew Morton, linux-kernel

Arjan van de Ven wrote:
>
> >
> > > with PCI, and the PCI posting rules, there is no "one" serializing
> > > instruction, you need to know the specifics of the device in 
> question to
> > > cause the flush. So at least there is no universal possible
> > > implementation of volatile as you suggest ;-)
> > >
> >
> > A serializing volatile makes it possible to write portable code to
> > access pci mmio.  You'd just follow a write with a read or whatever the
> > rules say.
>
> yeah except that the compiler cannot know what to read; reading back the
> same memory location is NOT correct nor safe. It's device specific, for
> some devices it'll be safe, for others you have to read some OTHER
> location.
>

I didn't suggest the compiler could or should do it, just that it would 
be possible (for the _user_) to write portable ISO C code to access PCI 
mmio registers, if volatile's implementation serialized access.

With the current implementation of volatile in gcc, it is impossible - 
you need to resort to inline assembly for some architectures, which is 
not an ISO C feature.

And I'm not suggesting that it would be a good idea to use volatile even 
if it was corrected - it would have to take a worst-case approach and 
thus would generate very bad code.

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-08 10:18                                                 ` Avi Kivity
@ 2006-07-08 10:28                                                   ` Thomas Gleixner
  2006-07-09  4:19                                                   ` Benjamin Herrenschmidt
  1 sibling, 0 replies; 133+ messages in thread
From: Thomas Gleixner @ 2006-07-08 10:28 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Arjan van de Ven, Linus Torvalds, Mark Lord,
	linux-os (Dick Johnson),
	Ingo Molnar, Andrew Morton, linux-kernel

On Sat, 2006-07-08 at 13:18 +0300, Avi Kivity wrote:

> I didn't suggest the compiler could or should do it, just that it would 
> be possible (for the _user_) to write portable ISO C code to access PCI 
> mmio registers, if volatile's implementation serialized access.

And how is this portable on complex multibus architectures, where the
PCI access goes through a couple of transports before hitting the PCI
hardware ?

volatile has no notion of serialization. volatile guarantees that the
compiler does not optimize and cache seemingly static values, i.e. it
disables compiler optimizations.

	tglx



^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-08 10:00                                                   ` Krzysztof Halasa
@ 2006-07-08 13:41                                                     ` Chase Venters
  2006-07-08 20:09                                                       ` Krzysztof Halasa
  0 siblings, 1 reply; 133+ messages in thread
From: Chase Venters @ 2006-07-08 13:41 UTC (permalink / raw)
  To: Krzysztof Halasa
  Cc: linux-os \\(Dick Johnson\\),
	Linus Torvalds, Ingo Molnar, Andrew Morton, Linux kernel, arjan

On Saturday 08 July 2006 04:59, Krzysztof Halasa wrote:
> Chase Venters <chase.venters@clientec.com> writes:
> > Locks are supposed to be syncronization points, which is why they
> > ALREADY HAVE "memory" on the clobber list! "memory" IS NECESSARY. The
> > fact that "=m" is changing to "+m" in Linus's patches is because "=m"
> > is in fact insufficient, because it would let the compiler believe
> > we're just going to over-write the value. "volatile" merely hides that
> > bug -- once that bug is _fixed_ (by going to "+m"), "volatile" is no
> > longer useful.
>
> This is a completely different story. "volatile", barrier() and "+m"/"=m"
> aren't sync points. If the variable access isn't atomic you must use
> locking even with volatiles, barriers etc.

You're mincing my words. The reason "memory" is on the clobber list is because 
a lock is supposed to synchronize all memory accesses up to that point. It's 
a fence / a barrier, because if the compiler re-orders your loads/stores 
across the lock, you're in trouble. That's exactly what I was pointing out. 

> > If "volatile" is in use elsewhere (other than locks), it's still
> > probably wrong. In these cases, you can use a barrier, a volatile
> > cast, or an inline asm with a specific clobber.
>
> A volatile cast is just a volatile, moved from data declaration to
> all access points. It doesn't buy you anything.
> barrier() is basically "all-volatile". All of them operate on the same,
> compiler level.

A volatile cast lets you prevent the compiler from always treating the 
variable as volatile.

> If the "volatile" is used the wrong way (which is probably true for most
> cases), then volatile cast and barrier() will be wrong as well. You need
> locks or atomic access, meaningful on hardware level.

No. Linus already described what some example invalid uses of "volatile" are. 
One example is the very one this whole thread is about - using 'volatile' on 
the declaration of the spinlock counter. That usage is _wrong_, and barrier() 
would not be. (Of course, it doesn't directly apply here, because barrier() 
already existed, by necessity, due to the "memory" clobber. And the lock's 
not done in pure C.)

Volatile originally existed to tell the compiler a variable could change at 
will. Because of reordering, it's almost never sufficient with our modern 
compilers and CPUs. That's precisely where barrier() (and/or its hardware 
equivalents) help in places where 'volatile' is wrong. Your statement is 
additionally wrong because one use-case of memory barriers is to safely write 
lock-free code.

Thanks,
Chase

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-08 13:41                                                     ` Chase Venters
@ 2006-07-08 20:09                                                       ` Krzysztof Halasa
  2006-07-08 20:40                                                         ` Chase Venters
  0 siblings, 1 reply; 133+ messages in thread
From: Krzysztof Halasa @ 2006-07-08 20:09 UTC (permalink / raw)
  To: Chase Venters
  Cc: linux-os \\(Dick Johnson\\),
	Linus Torvalds, Ingo Molnar, Andrew Morton, Linux kernel, arjan

Chase Venters <chase.venters@clientec.com> writes:

> You're mincing my words. The reason "memory" is on the clobber list is
> because 
> a lock is supposed to synchronize all memory accesses up to that point. It's 
> a fence / a barrier, because if the compiler re-orders your loads/stores 
> across the lock, you're in trouble. That's exactly what I was pointing out.

Sure, but a barrier alone isn't enough. You have to use assembler and
it's beyond scope of C volatile.

> A volatile cast lets you prevent the compiler from always treating the 
> variable as volatile.

Yes, if that's what you really want.

>> If the "volatile" is used the wrong way (which is probably true for most
>> cases), then volatile cast and barrier() will be wrong as well. You need
>> locks or atomic access, meaningful on hardware level.
>
> No. Linus already described what some example invalid uses of "volatile"
> are. 
> One example is the very one this whole thread is about - using 'volatile' on 
> the declaration of the spinlock counter. That usage is _wrong_, and
> barrier() 
> would not be.

That's a special case, because you want to invalidate all variables,
but you still need locking. I.e., barrier() alone doesn't buy you
anything WRT to hardware.

> Volatile originally existed to tell the compiler a variable could change at 
> will. Because of reordering, it's almost never sufficient with our modern 
> compilers and CPUs. That's precisely where barrier() (and/or its hardware 
> equivalents) help in places where 'volatile' is wrong.

How does barrier() help here? Some example, maybe?
What do you consider a barrier() hardware equivalent?
Don't you think you're mixing compiler optimization and operation of
the hardware?

> Your statement is 
> additionally wrong because one use-case of memory barriers is to safely
> write 
> lock-free code.

You can't safely write lock-free code in C, if you have to deal
with hardware or SMP. C don't know about hardware.
-- 
Krzysztof Halasa

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-08 20:09                                                       ` Krzysztof Halasa
@ 2006-07-08 20:40                                                         ` Chase Venters
  2006-07-08 20:47                                                           ` Chase Venters
  2006-07-09 10:57                                                           ` Krzysztof Halasa
  0 siblings, 2 replies; 133+ messages in thread
From: Chase Venters @ 2006-07-08 20:40 UTC (permalink / raw)
  To: Krzysztof Halasa
  Cc: linux-os \\(Dick Johnson\\),
	Linus Torvalds, Ingo Molnar, Andrew Morton, Linux kernel, arjan

On Saturday 08 July 2006 15:08, Krzysztof Halasa wrote:
> Chase Venters <chase.venters@clientec.com> writes:
> > You're mincing my words. The reason "memory" is on the clobber list is
> > because
> > a lock is supposed to synchronize all memory accesses up to that point.
> > It's a fence / a barrier, because if the compiler re-orders your
> > loads/stores across the lock, you're in trouble. That's exactly what I
> > was pointing out.
>
> Sure, but a barrier alone isn't enough. You have to use assembler and
> it's beyond scope of C volatile.

Right, which is why volatile is wrong. Which is what we've all been saying. 
Constantly.

You need the barrier for both the CPU and the compiler. The CPU barrier comes 
from an instruction like '*fence' on x86 (or a locked bus op), while the 
compiler barrier comes from the memory clobber. Because the spinlocks already 
_must_ have both of these (including the other constraints in the inline 
asm), 'volatile' on the spinlock ctr is useless.

> > A volatile cast lets you prevent the compiler from always treating the
> > variable as volatile.
>
> Yes, if that's what you really want.
>
> >> If the "volatile" is used the wrong way (which is probably true for most
> >> cases), then volatile cast and barrier() will be wrong as well. You need
> >> locks or atomic access, meaningful on hardware level.
> >
> > No. Linus already described what some example invalid uses of "volatile"
> > are.
> > One example is the very one this whole thread is about - using 'volatile'
> > on the declaration of the spinlock counter. That usage is _wrong_, and
> > barrier()
> > would not be.
>
> That's a special case, because you want to invalidate all variables,
> but you still need locking. I.e., barrier() alone doesn't buy you
> anything WRT to hardware.

Special case? It's the topic of this thread.

As for barrier() being a compiler boundary versus a hardware boundary, I don't 
think anyone is disputing that. Which is why I speak of both compiler and 
hardware barriers:

> > Volatile originally existed to tell the compiler a variable could change
> > at will. Because of reordering, it's almost never sufficient with our
> > modern compilers and CPUs. That's precisely where barrier() (and/or its
> > hardware equivalents) help in places where 'volatile' is wrong.
>
> How does barrier() help here? Some example, maybe?
> What do you consider a barrier() hardware equivalent?
> Don't you think you're mixing compiler optimization and operation of
> the hardware?

Documentation/memory-barriers.txt covers barriers pretty thoroughly. barrier() 
is a Linux macro that is part of a suite of macros to implement various kinds 
of barriers in various situations. Hardware equivalent? How about mb()?

> > Your statement is
> > additionally wrong because one use-case of memory barriers is to safely
> > write
> > lock-free code.
>
> You can't safely write lock-free code in C, if you have to deal
> with hardware or SMP. C don't know about hardware.

Documentation/memory-barriers.txt lists situations like the ones I describe. 
It's not pure C -- you end up having to insert a hardware barrier too, but 
Linux makes many macros available to do so. (Of course, these macros are 
using inline asm). Notice I specifically mention hardware barriers in the 
paragraph above. When I say one use-case of memory barriers is to safely 
write lock-free code, I'm talking about the use of both hardware and compiler 
barriers (indeed, _both_ types of barriers are documented in 
Documentation/memory-barriers.txt, along with examples of lock-free C code 
supported by Linux barrier macros).

Thanks,
Chase

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-08 20:40                                                         ` Chase Venters
@ 2006-07-08 20:47                                                           ` Chase Venters
  2006-07-09 10:57                                                           ` Krzysztof Halasa
  1 sibling, 0 replies; 133+ messages in thread
From: Chase Venters @ 2006-07-08 20:47 UTC (permalink / raw)
  To: Krzysztof Halasa
  Cc: linux-os \\(Dick Johnson\\),
	Linus Torvalds, Ingo Molnar, Andrew Morton, Linux kernel, arjan

On Saturday 08 July 2006 15:40, Chase Venters wrote:
> You need the barrier for both the CPU and the compiler. The CPU barrier
> comes from an instruction like '*fence' on x86 (or a locked bus op), while
> the compiler barrier comes from the memory clobber. Because the spinlocks
> already _must_ have both of these (including the other constraints in the
> inline asm), 'volatile' on the spinlock ctr is useless.

Btw, perhaps what is going on here is a misunderstanding of 
terminology? "Barrier" or "Memory barrier" can refer to both a hardware or 
compiler barrier, which is why Documentation/memory-barriers.txt speaks of 
both in the same file. Indeed, you often have both in the same spot, and the 
names are even similar:

barrier() -> compiler memory barrier
wmb() -> write memory barrier
...

Sometimes you'll see "optimization barrier", but you should remember that 
barrier() boils down to:

asm volatile ("" ::: "memory")

...because it's preventing memory caching/reordering across the unpredictable 
memory clobber.

See?

>
> Thanks,
> Chase

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-07 21:59                                                 ` Linus Torvalds
  2006-07-07 22:06                                                   ` Linus Torvalds
@ 2006-07-08 20:49                                                   ` Pavel Machek
  2006-07-08 21:43                                                     ` Linus Torvalds
  1 sibling, 1 reply; 133+ messages in thread
From: Pavel Machek @ 2006-07-08 20:49 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-os (Dick Johnson),
	Krzysztof Halasa, Ingo Molnar, Andrew Morton, Linux kernel,
	arjan

Hi!

> > This is a bait and switch argument. The code was displayed to show
> > the compiler output, not an example of good coding practice.
> 
> NO IT IS NOT.
> 
> The whole point of my argument is simple:
> 
> > > 	"'volatile' is useless. The things it did 30 years ago are much
> > > 	 more complex these days, and need to be tied to much more
> > > 	 detailed rules that depend on the actual particular problem,
> > > 	 rather than one keyword to the compiler that doesn't actually
> > > 	 give enough information for the compiler to do anything useful"
> 
> And dammit, if you cannot admit that, then you're not worth discussing 
> with.
> 
> "volatile" is useless. It's a big hammer in a world where the nails aren't 
> nails any more, they are screws, thumb-tacks, and spotwelding.

Actually, because volatile is big hammer, it can be used to work
around compiler bugs. If compiler dies at internal error in function
foo, just sprinkle few volatiles into it, and you can usually work
around that compiler problem.
								Pavel
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-08 20:49                                                   ` Pavel Machek
@ 2006-07-08 21:43                                                     ` Linus Torvalds
  0 siblings, 0 replies; 133+ messages in thread
From: Linus Torvalds @ 2006-07-08 21:43 UTC (permalink / raw)
  To: Pavel Machek
  Cc: linux-os (Dick Johnson),
	Krzysztof Halasa, Ingo Molnar, Andrew Morton, Linux kernel,
	arjan



On Sat, 8 Jul 2006, Pavel Machek wrote:
> 
> Actually, because volatile is big hammer, it can be used to work
> around compiler bugs. If compiler dies at internal error in function
> foo, just sprinkle few volatiles into it, and you can usually work
> around that compiler problem.

Heh. That's probably an even better use of "volatile" than using it for 
hiding bugs in the sources. The bugs in the sources you'd be better off 
just _fixing_, while the compiler problems you may have a much harder time 
working around..

		Linus

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-06 20:34                                   ` Linus Torvalds
@ 2006-07-08 22:50                                     ` Ralf Baechle
  2006-07-09  3:09                                       ` Linus Torvalds
  2006-07-09  3:07                                     ` Keith Owens
  1 sibling, 1 reply; 133+ messages in thread
From: Ralf Baechle @ 2006-07-08 22:50 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Ingo Molnar, Andrew Morton, linux-kernel, arjan

On Thu, Jul 06, 2006 at 01:34:14PM -0700, Linus Torvalds wrote:

> On Thu, 6 Jul 2006, Linus Torvalds wrote:
> > 
> > So I _think_ that we should change the "=m" to the much more correct "+m" 
> > at the same time (or before - it's really a bug-fix regardless) as 
> > removing the "volatile".
> 
> Here's a first cut (UNTESTED!) for x86. I didn't check any other 
> architectures, I bet they have similar problems.

I tried the same on MIPS, for lazyness sake at first only in atomic.h.  With
gcc 3.3 the code size is exactly the same with both "=m" and "+m", so I
didn't look into details of the generated code.  With gcc 4.1 "+m" results
in a size increase of about 1K for the ip27_defconfig kernel.  For example:

<unlock_kernel>:
       df830000        ld      v1,0(gp)
       8c620028        lw      v0,40(v1)
       04400014        bltz    v0,a80000000029944c <unlock_kernel+0x5c>
       00000000        nop
       2442ffff        subiu   v0,v0,1
       ac620028        sw      v0,40(v1)	# current->lock_depth
       8c630028        lw      v1,40(v1)	# current->lock_depth
       0461000b        bgez    v1,a80000000029943c <unlock_kernel+0x4c>

The poinless load isn't generated with "=m".  The interesting thing is
that in all the instances of bloat I looked at it was actually happening
not as part of the asm statement itself, so maybe gcc's reload is getting
a little confused.

  Ralf

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-06 20:34                                   ` Linus Torvalds
  2006-07-08 22:50                                     ` Ralf Baechle
@ 2006-07-09  3:07                                     ` Keith Owens
  2006-07-09  3:29                                       ` Linus Torvalds
  2006-07-09  3:58                                       ` Linus Torvalds
  1 sibling, 2 replies; 133+ messages in thread
From: Keith Owens @ 2006-07-09  3:07 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Ingo Molnar, Andrew Morton, linux-kernel, arjan

Linus Torvalds (on Thu, 6 Jul 2006 13:34:14 -0700 (PDT)) wrote:
>
>
>On Thu, 6 Jul 2006, Linus Torvalds wrote:
>> 
>> So I _think_ that we should change the "=m" to the much more correct "+m" 
>> at the same time (or before - it's really a bug-fix regardless) as 
>> removing the "volatile".
>
>Here's a first cut (UNTESTED!) for x86. I didn't check any other 
>architectures, I bet they have similar problems.
>
>		Linus
>
>----
>diff --git a/include/asm-i386/atomic.h b/include/asm-i386/atomic.h
>index 4f061fa..51a1662 100644
>--- a/include/asm-i386/atomic.h
>+++ b/include/asm-i386/atomic.h
>@@ -46,8 +46,8 @@ static __inline__ void atomic_add(int i,
> {
> 	__asm__ __volatile__(
> 		LOCK_PREFIX "addl %1,%0"
>-		:"=m" (v->counter)
>-		:"ir" (i), "m" (v->counter));
>+		:"+m" (v->counter)
>+		:"ir" (i));
> }

This disagrees with the gcc (4.1.0) docs.  info --index-search='Extended Asm' gcc

  The ordinary output operands must be write-only; GCC will assume
  that the values in these operands before the instruction are dead and
  need not be generated.  Extended asm supports input-output or
  read-write operands.  Use the constraint character `+' to indicate
  such an operand and list it with the output operands.  You should
  only use read-write operands when the constraints for the operand (or
  the operand in which only some of the bits are to be changed) allow a
  register.

All spinlocks must be in memory, which conflicts with the "allow a
register" above.  I hope that the gcc docs are wrong, and read/write
operands can be used on memory as well as registers.  I would hate for
a future gcc to be more strict about this check and break the spinlock
code.  It is interesting that the next paragraph has no such
restriction.

  You may, as an alternative, logically split its function into two
  separate operands, one input operand and one write-only output
  operand.  The connection between them is expressed by constraints
  which say they need to be in the same location when the instruction
  executes.  You can use the same C expression for both operands, or
  different expressions.  For example, here we write the (fictitious)
  `combine' instruction with `bar' as its read-only source operand and
  `foo' as its read-write destination:

     asm ("combine %2,%0" : "=r" (foo) : "0" (foo), "g" (bar));

  The constraint `"0"' for operand 1 says that it must occupy the same
  location as operand 0.  A number in constraint is allowed only in an
  input operand and it must refer to an output operand.


^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-08 22:50                                     ` Ralf Baechle
@ 2006-07-09  3:09                                       ` Linus Torvalds
  0 siblings, 0 replies; 133+ messages in thread
From: Linus Torvalds @ 2006-07-09  3:09 UTC (permalink / raw)
  To: Ralf Baechle; +Cc: Ingo Molnar, Andrew Morton, linux-kernel, arjan



On Sat, 8 Jul 2006, Ralf Baechle wrote:
> 
> I tried the same on MIPS, for lazyness sake at first only in atomic.h.  With
> gcc 3.3 the code size is exactly the same with both "=m" and "+m", so I
> didn't look into details of the generated code.  With gcc 4.1 "+m" results
> in a size increase of about 1K for the ip27_defconfig kernel.  For example:
> 
> <unlock_kernel>:
>        df830000        ld      v1,0(gp)
>        8c620028        lw      v0,40(v1)
>        04400014        bltz    v0,a80000000029944c <unlock_kernel+0x5c>
>        00000000        nop
>        2442ffff        subiu   v0,v0,1
>        ac620028        sw      v0,40(v1)	# current->lock_depth
>        8c630028        lw      v1,40(v1)	# current->lock_depth
>        0461000b        bgez    v1,a80000000029943c <unlock_kernel+0x4c>
> 
> The poinless load isn't generated with "=m".  The interesting thing is
> that in all the instances of bloat I looked at it was actually happening
> not as part of the asm statement itself, so maybe gcc's reload is getting
> a little confused.

Indeed, that looks like gcc confusion, because that pointless load is 
literally just re-loading the "task->lock_depth" that is all part of 
perfecly normal C code _before_ the inline assembly even is reached.

Of course, if a "+m" causes gcc confusion, that's bad in itself, and 
indicates that "=m" + "m" may actually be preferable due to some internal 
gcc buglet.

I do _not_ see the same extra load on ppc64 or indeed x86 (gcc-4.1.1 in 
both cases), so there seems to be something MIPS-specific here.

		Linus

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-09  3:07                                     ` Keith Owens
@ 2006-07-09  3:29                                       ` Linus Torvalds
  2006-07-09  3:43                                         ` Keith Owens
  2006-07-09  3:58                                       ` Linus Torvalds
  1 sibling, 1 reply; 133+ messages in thread
From: Linus Torvalds @ 2006-07-09  3:29 UTC (permalink / raw)
  To: Keith Owens; +Cc: Ingo Molnar, Andrew Morton, linux-kernel, arjan



On Sun, 9 Jul 2006, Keith Owens wrote:
> 
> This disagrees with the gcc (4.1.0) docs.  info --index-search='Extended Asm' gcc
> 
>   The ordinary output operands must be write-only; GCC will assume
>   that the values in these operands before the instruction are dead and
>   need not be generated.  Extended asm supports input-output or
>   read-write operands.  Use the constraint character `+' to indicate
>   such an operand and list it with the output operands.  You should
>   only use read-write operands when the constraints for the operand (or
>   the operand in which only some of the bits are to be changed) allow a
>   register.

I'm fairly sure the docs are outdated (but may well be correct for older 
gcc versions - as I already discussed elsewhere, that "+" thing was not 
historically useful).

We've been using "+m" for some time in the kernel on several 
architectures.

git aficionados can do

	git grep -1 '"+m"' v2.6.17

to see the pre-existing usage of this (most of them go back a lot further, 
although some of them are newer - the <asm-i386/bitops.h> ones were added 
in January.

So if "+m" didn't work, we've been in trouble for at least the last year.

			Linus

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-09  3:29                                       ` Linus Torvalds
@ 2006-07-09  3:43                                         ` Keith Owens
  0 siblings, 0 replies; 133+ messages in thread
From: Keith Owens @ 2006-07-09  3:43 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Ingo Molnar, Andrew Morton, linux-kernel, arjan

Linus Torvalds (on Sat, 8 Jul 2006 20:29:12 -0700 (PDT)) wrote:
>
>
>On Sun, 9 Jul 2006, Keith Owens wrote:
>> 
>> This disagrees with the gcc (4.1.0) docs.  info --index-search='Extended Asm' gcc
>> 
>>   The ordinary output operands must be write-only; GCC will assume
>>   that the values in these operands before the instruction are dead and
>>   need not be generated.  Extended asm supports input-output or
>>   read-write operands.  Use the constraint character `+' to indicate
>>   such an operand and list it with the output operands.  You should
>>   only use read-write operands when the constraints for the operand (or
>>   the operand in which only some of the bits are to be changed) allow a
>>   register.
>
>I'm fairly sure the docs are outdated (but may well be correct for older 
>gcc versions - as I already discussed elsewhere, that "+" thing was not 
>historically useful).
>
>We've been using "+m" for some time in the kernel on several 
>architectures.
>
>git aficionados can do
>
>	git grep -1 '"+m"' v2.6.17
>
>to see the pre-existing usage of this (most of them go back a lot further, 
>although some of them are newer - the <asm-i386/bitops.h> ones were added 
>in January.
>
>So if "+m" didn't work, we've been in trouble for at least the last year.

The oldest gcc I can access quickly is 3.2.3 (May 2005) which has
significantly different wording.

  The ordinary output operands must be write-only; GCC will assume that
  the values in these operands before the instruction are dead and need
  not be generated.  Extended asm supports input-output or read-write
  operands.  Use the constraint character `+' to indicate such an
  operand and list it with the output operands.

  When the constraints for the read-write operand (or the operand in
  which only some of the bits are to be changed) allows a register, you
  may, as an alternative, logically split its function into two
  separate operands, one input operand and one write-only output
  operand.  The connection between them is expressed by constraints
  which say they need to be in the same location when the instruction
  executes.

In 3.2.3, the '+' form allows any constraint, only the split notation
requires that the operand be in a register.  In 4.1.0, the docs now say
that the '+' form requires a register.  I suspect that you are right
and the gcc docs are incorrect, but it does not hurt to check.  Would
anybody from the gcc team like to pipe up and give an opinion?


^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-09  3:07                                     ` Keith Owens
  2006-07-09  3:29                                       ` Linus Torvalds
@ 2006-07-09  3:58                                       ` Linus Torvalds
  2006-07-09  6:13                                         ` David Miller
  2006-07-09 14:28                                         ` Roman Zippel
  1 sibling, 2 replies; 133+ messages in thread
From: Linus Torvalds @ 2006-07-09  3:58 UTC (permalink / raw)
  To: Keith Owens, Andi Kleen, Jan Hubicka, David S. Miller, Richard Henderson
  Cc: Ingo Molnar, Andrew Morton, Linux Kernel Mailing List, arjan



On Sun, 9 Jul 2006, Keith Owens wrote:
> 
>   			 "... Extended asm supports input-output or
>   read-write operands.  Use the constraint character `+' to indicate
>   such an operand and list it with the output operands.  You should
>   only use read-write operands when the constraints for the operand (or
>   the operand in which only some of the bits are to be changed) allow a
>   register."

Btw, gcc-4.1.1 docs seem to also have this language, although when you 
actually go to the "Constraint Modifier Characters" section, that thing 
doesn't actually say anything about "only for registers".

It would be good to have the gcc docs fixed. As mentioned, we've been 
using "+m" for at least a year (most of our current "+m" usage was there 
in 2.6.13), and some of those uses have actually been added by people that 
are at least active on the gcc development lists (eg Andi Kleen).

But let's add a few more people who are more deeply involved with gcc. 
Jan? Richard? Davem? Who would be the right person to check this out?

We can certainly write

	...
	:"=m" (*ptr)
	:"m" (*ptr)
	...

instead of the much simpler

	:"+m" (*ptr)

but we've been using that "+m" format for a long time already (and I 
_think_ we did so at the suggestion of gcc people), and it would be much 
better if the gcc documentation was just fixed here.

			Linus

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-08 10:18                                                 ` Avi Kivity
  2006-07-08 10:28                                                   ` Thomas Gleixner
@ 2006-07-09  4:19                                                   ` Benjamin Herrenschmidt
  2006-07-09 12:47                                                     ` Avi Kivity
  1 sibling, 1 reply; 133+ messages in thread
From: Benjamin Herrenschmidt @ 2006-07-09  4:19 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Arjan van de Ven, Linus Torvalds, Mark Lord,
	linux-os (Dick Johnson),
	Ingo Molnar, Andrew Morton, linux-kernel


> I didn't suggest the compiler could or should do it, just that it would 
> be possible (for the _user_) to write portable ISO C code to access PCI 
> mmio registers, if volatile's implementation serialized access.

That isn't possible neither. How to actually serialize access can
require different set of primitives depending on the storage class of
the memory you are accessing, which the C compiler has 0 knowledge
about. (For example, cacheable storage vs. write-through vs.
non-cacheable guarded vs. non-cacheable non-guarded on powerpc, there
are different issues on other architectures).

> With the current implementation of volatile in gcc, it is impossible - 
> you need to resort to inline assembly for some architectures, which is 
> not an ISO C feature.

ISO C has never been about writing device drivers. There is simply no
choice here. You need an atchitecture specific set of accessors. If you
want portable code, then pick a library like libpci and make sure it
contains all you need on all the architectures you need. Then write
portable code on top of it. 

> And I'm not suggesting that it would be a good idea to use volatile even 
> if it was corrected - it would have to take a worst-case approach and 
> thus would generate very bad code.

So what is the point ?

Ben.



^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-09  3:58                                       ` Linus Torvalds
@ 2006-07-09  6:13                                         ` David Miller
  2006-07-09 14:28                                         ` Roman Zippel
  1 sibling, 0 replies; 133+ messages in thread
From: David Miller @ 2006-07-09  6:13 UTC (permalink / raw)
  To: torvalds; +Cc: kaos, ak, jh, rth, mingo, akpm, linux-kernel, arjan

From: Linus Torvalds <torvalds@osdl.org>
Date: Sat, 8 Jul 2006 20:58:37 -0700 (PDT)

> We can certainly write
> 
> 	...
> 	:"=m" (*ptr)
> 	:"m" (*ptr)
> 	...
> 
> instead of the much simpler
> 
> 	:"+m" (*ptr)
> 
> but we've been using that "+m" format for a long time already (and I 
> _think_ we did so at the suggestion of gcc people), and it would be much 
> better if the gcc documentation was just fixed here.

I honestly don't know why that language is there about '+' saying to
only use it when the constraints allow a register.  Perhaps there are
some implications for reload.

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-08 20:40                                                         ` Chase Venters
  2006-07-08 20:47                                                           ` Chase Venters
@ 2006-07-09 10:57                                                           ` Krzysztof Halasa
  1 sibling, 0 replies; 133+ messages in thread
From: Krzysztof Halasa @ 2006-07-09 10:57 UTC (permalink / raw)
  To: Chase Venters
  Cc: linux-os \\(Dick Johnson\\),
	Linus Torvalds, Ingo Molnar, Andrew Morton, Linux kernel, arjan

Chase Venters <chase.venters@clientec.com> writes:

>> Sure, but a barrier alone isn't enough. You have to use assembler and
>> it's beyond scope of C volatile.
>
> Right, which is why volatile is wrong.

In this case (and not only this). Of course. But not always.

> You need the barrier for both the CPU and the compiler.

For spinlocks, yes.

For other things... Sometimes you need a barrier for the compiler
only. Sometimes you don't need any general barrier, you only need
to make sure a single variable isn't being cached (by the compiler).
That's what volatile is for.

Saying that volatile is always wrong looks to me like saying that goto
is always wrong :-) And yes, there are people who say that every single
goto in the universe is wrong.
-- 
Krzysztof Halasa

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-09  4:19                                                   ` Benjamin Herrenschmidt
@ 2006-07-09 12:47                                                     ` Avi Kivity
  2006-07-09 19:16                                                       ` David Schwartz
  0 siblings, 1 reply; 133+ messages in thread
From: Avi Kivity @ 2006-07-09 12:47 UTC (permalink / raw)
  To: Benjamin Herrenschmidt
  Cc: Arjan van de Ven, Linus Torvalds, Mark Lord,
	linux-os (Dick Johnson),
	Ingo Molnar, Andrew Morton, linux-kernel

Benjamin Herrenschmidt wrote:
>
> > I didn't suggest the compiler could or should do it, just that it would
> > be possible (for the _user_) to write portable ISO C code to access PCI
> > mmio registers, if volatile's implementation serialized access.
>
> That isn't possible neither. How to actually serialize access can
> require different set of primitives depending on the storage class of
> the memory you are accessing, which the C compiler has 0 knowledge
> about. (For example, cacheable storage vs. write-through vs.
> non-cacheable guarded vs. non-cacheable non-guarded on powerpc, there
> are different issues on other architectures).
>

Okay.  I guess this limits portable volatile to main memory.  Thanks for 
the clarification.

> > With the current implementation of volatile in gcc, it is impossible -
> > you need to resort to inline assembly for some architectures, which is
> > not an ISO C feature.
>
> ISO C has never been about writing device drivers. There is simply no
> choice here. You need an atchitecture specific set of accessors. If you
> want portable code, then pick a library like libpci and make sure it
> contains all you need on all the architectures you need. Then write
> portable code on top of it.
>

Indeed, I see no other way now.

> > And I'm not suggesting that it would be a good idea to use volatile 
> even
> > if it was corrected - it would have to take a worst-case approach and
> > thus would generate very bad code.
>
> So what is the point ?
>

Volatile is useful for non device driver work, for example VJ-style 
channels.  A portable volatile can help to code such things in a 
compiler-neutral and platform-neutral way.  Linux doesn't care about 
compiler neutrality, being coded in GNU C, and about platform 
neutrality, having a per-arch abstraction layer, but other programs may 
wish to run on multiple compilers and multiple platforms without 
per-platform glue layers.

Adding barriers to volatile can take it from dangerously useless to 
somewhat* useful.  Not for Linux, but other projects do exist.

* possibly barriers are still required, if volatile data points to 
non-volatile data.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-09  3:58                                       ` Linus Torvalds
  2006-07-09  6:13                                         ` David Miller
@ 2006-07-09 14:28                                         ` Roman Zippel
  2006-07-09 15:27                                           ` Linus Torvalds
  1 sibling, 1 reply; 133+ messages in thread
From: Roman Zippel @ 2006-07-09 14:28 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Keith Owens, Andi Kleen, Jan Hubicka, David S. Miller,
	Richard Henderson, Ingo Molnar, Andrew Morton,
	Linux Kernel Mailing List, arjan

Hi,

On Sat, 8 Jul 2006, Linus Torvalds wrote:

> but we've been using that "+m" format for a long time already (and I 
> _think_ we did so at the suggestion of gcc people), and it would be much 
> better if the gcc documentation was just fixed here.

IIRC early 4.0 had problems with "+m" and did what the documentation says, 
but that got quickly fixed.
Here is the old thread http://marc.theaimsgroup.com/?t=108384517900001&r=1&w=2

bye, Roman

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-09 14:28                                         ` Roman Zippel
@ 2006-07-09 15:27                                           ` Linus Torvalds
  0 siblings, 0 replies; 133+ messages in thread
From: Linus Torvalds @ 2006-07-09 15:27 UTC (permalink / raw)
  To: Roman Zippel
  Cc: Keith Owens, Andi Kleen, Jan Hubicka, David S. Miller,
	Richard Henderson, Ingo Molnar, Andrew Morton,
	Linux Kernel Mailing List, arjan



On Sun, 9 Jul 2006, Roman Zippel wrote:
> 
> On Sat, 8 Jul 2006, Linus Torvalds wrote:
> > 
> > but we've been using that "+m" format for a long time already (and I 
> > _think_ we did so at the suggestion of gcc people), and it would be much 
> > better if the gcc documentation was just fixed here.
> 
> IIRC early 4.0 had problems with "+m" and did what the documentation says, 
> but that got quickly fixed.
> Here is the old thread http://marc.theaimsgroup.com/?t=108384517900001&r=1&w=2

Thanks. That one implies that we had some strange warnings (but it 
apparently still worked) with 3.4, and it's since gotten fixed. Goodie. 

I'll leave the "+m" alone then, both the old and the new ones.

		Linus

^ permalink raw reply	[flat|nested] 133+ messages in thread

* RE: [patch] spinlocks: remove 'volatile'
  2006-07-09 12:47                                                     ` Avi Kivity
@ 2006-07-09 19:16                                                       ` David Schwartz
  2006-07-09 19:51                                                         ` Theodore Tso
  0 siblings, 1 reply; 133+ messages in thread
From: David Schwartz @ 2006-07-09 19:16 UTC (permalink / raw)
  To: Linux-Kernel@Vger. Kernel. Org


> Volatile is useful for non device driver work, for example VJ-style
> channels.  A portable volatile can help to code such things in a
> compiler-neutral and platform-neutral way.  Linux doesn't care about
> compiler neutrality, being coded in GNU C, and about platform
> neutrality, having a per-arch abstraction layer, but other programs may
> wish to run on multiple compilers and multiple platforms without
> per-platform glue layers.

	There is a portable volatile, it's called 'pthread_mutex_lock'. It allows
you to code such things in a compiler-neutral and platform-neutral way. You
don't have to worry about what the compiler might do, what the hardware
might do, what atomic operations the CPU supports, or anything like that.
The atomicity issues I've mentioned in my other posts make any attempt at
creating a 'portable volatile' for shared memory more or less doomed from
the start.

	DS



^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [patch] spinlocks: remove 'volatile'
  2006-07-09 19:16                                                       ` David Schwartz
@ 2006-07-09 19:51                                                         ` Theodore Tso
  2006-07-09 20:40                                                           ` [OT] 'volatile' in userspace Rutger Nijlunsing
  0 siblings, 1 reply; 133+ messages in thread
From: Theodore Tso @ 2006-07-09 19:51 UTC (permalink / raw)
  To: David Schwartz; +Cc: Linux-Kernel@Vger. Kernel. Org

On Sun, Jul 09, 2006 at 12:16:15PM -0700, David Schwartz wrote:
> > Volatile is useful for non device driver work, for example VJ-style
> > channels.  A portable volatile can help to code such things in a
> > compiler-neutral and platform-neutral way.  Linux doesn't care about
> > compiler neutrality, being coded in GNU C, and about platform
> > neutrality, having a per-arch abstraction layer, but other programs may
> > wish to run on multiple compilers and multiple platforms without
> > per-platform glue layers.
> 
> 	There is a portable volatile, it's called 'pthread_mutex_lock'. It allows
> you to code such things in a compiler-neutral and platform-neutral way. You
> don't have to worry about what the compiler might do, what the hardware
> might do, what atomic operations the CPU supports, or anything like that.
> The atomicity issues I've mentioned in my other posts make any attempt at
> creating a 'portable volatile' for shared memory more or less doomed from
> the start.

The other thing to add here is that if you're outside of the kernel,
you *don't* want to be implementing your own spinlock if for no other
reason than it's a performance disaster.  The scheduler doesn't know
that you are spinning waiting for a lock, so you will be scheduled and
burning cpu time (and energy, and heat budget) while waiting for the
the lock.  I once spent over a week on-site at a customer complaining
about Linux performance problems, and it turned out the reason why was
because they thought they were being "smart" by implementing their own
spinlock in userspace.  Bad bad bad.

So if a userspace progam ever uses volatile, it's almost certainly a
bug, one way or another.

						- Ted

^ permalink raw reply	[flat|nested] 133+ messages in thread

* [OT] 'volatile' in userspace
  2006-07-09 19:51                                                         ` Theodore Tso
@ 2006-07-09 20:40                                                           ` Rutger Nijlunsing
  2006-07-10  3:42                                                             ` Theodore Tso
  0 siblings, 1 reply; 133+ messages in thread
From: Rutger Nijlunsing @ 2006-07-09 20:40 UTC (permalink / raw)
  To: Theodore Tso, David Schwartz, Linux-Kernel@Vger. Kernel. Org

On Sun, Jul 09, 2006 at 03:51:14PM -0400, Theodore Tso wrote:
> On Sun, Jul 09, 2006 at 12:16:15PM -0700, David Schwartz wrote:
> > > Volatile is useful for non device driver work, for example VJ-style
> > > channels.  A portable volatile can help to code such things in a
> > > compiler-neutral and platform-neutral way.  Linux doesn't care about
> > > compiler neutrality, being coded in GNU C, and about platform
> > > neutrality, having a per-arch abstraction layer, but other programs may
> > > wish to run on multiple compilers and multiple platforms without
> > > per-platform glue layers.
> > 
> > 	There is a portable volatile, it's called 'pthread_mutex_lock'. It allows
> > you to code such things in a compiler-neutral and platform-neutral way. You
> > don't have to worry about what the compiler might do, what the hardware
> > might do, what atomic operations the CPU supports, or anything like that.
> > The atomicity issues I've mentioned in my other posts make any attempt at
> > creating a 'portable volatile' for shared memory more or less doomed from
> > the start.
> 
> The other thing to add here is that if you're outside of the kernel,
> you *don't* want to be implementing your own spinlock if for no other
> reason than it's a performance disaster.  The scheduler doesn't know
> that you are spinning waiting for a lock, so you will be scheduled and
> burning cpu time (and energy, and heat budget) while waiting for the
> the lock.  I once spent over a week on-site at a customer complaining
> about Linux performance problems, and it turned out the reason why was
> because they thought they were being "smart" by implementing their own
> spinlock in userspace.  Bad bad bad.
> 
> So if a userspace progam ever uses volatile, it's almost certainly a
> bug, one way or another.

Without 'volatile' and disabling optimizations altogether, how do we
prevent gcc from optimizing away pointers? As can be seen on
http://wiki.rubygarden.org/Ruby/page/show/GCAndExtensions (at
'Compiler over-optimisations and "volatile"'), volatile is used to
prevent a specific type of optimization. This is because of the
garbage collector, which scans the stack and registers to find
referenced objects. So you don't want local variables containing
references to objects optimized away.

While it is again a side-effect of volatile which is being (mis)used,
it is sometimes the only (easy & known) way to achieve this intented
effect...

-- 
Rutger Nijlunsing ---------------------------------- eludias ed dse.nl
never attribute to a conspiracy which can be explained by incompetence
----------------------------------------------------------------------

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [OT] 'volatile' in userspace
  2006-07-09 20:40                                                           ` [OT] 'volatile' in userspace Rutger Nijlunsing
@ 2006-07-10  3:42                                                             ` Theodore Tso
  2006-07-10 17:00                                                               ` Joshua Hudson
  0 siblings, 1 reply; 133+ messages in thread
From: Theodore Tso @ 2006-07-10  3:42 UTC (permalink / raw)
  To: linux-kernel; +Cc: David Schwartz, Linux-Kernel@Vger. Kernel. Org

On Sun, Jul 09, 2006 at 10:40:06PM +0200, Rutger Nijlunsing wrote:
> > So if a userspace progam ever uses volatile, it's almost certainly a
> > bug, one way or another.
> 
> Without 'volatile' and disabling optimizations altogether, how do we
> prevent gcc from optimizing away pointers? As can be seen on
> http://wiki.rubygarden.org/Ruby/page/show/GCAndExtensions (at
> 'Compiler over-optimisations and "volatile"'), volatile is used to
> prevent a specific type of optimization. This is because of the
> garbage collector, which scans the stack and registers to find
> referenced objects. So you don't want local variables containing
> references to objects optimized away.

Well, if you look at the Wiki, it admits that this is a bug:

	(Warning: This section is not strictly correct. volatile
	instructs the C compiler that it should not do certain
	optimisations to code that accesses the variable - the value
	cannot be stored in a register and must be read from memory
	each time it is accessed. It is still perfectly legal for the
	compiler to overwrite the VALUE's stack location with other
	data, if the compiler decides there are no further uses of the
	VALUE. Fortunately, a side effect of volatile in common C
	compilers like GCC and Visual Studio is to prevent the
	dangerous optimisation described above. The Ruby source itself
	uses volatile for this purpose, so it is an "accepted hack"
	for Ruby C extensions.)

"Accepted hack" is basically another way of saying bug.  Some day GCC
will be made smart enough to optimize the use of str on the stack, and
then the Ruby will be screwed.  (Mental note to self: don't use Ruby
in any future project.)

This is really an architectural bug.  RSTRING() should have explicitly
bumped a use pointer, which the C code should have then explicitly
decremented, to protect the underlying pointer from getting GC'ed
unexpectedly.  It would have made RSTRING() more difficult to use, but
that's the price you pay when you try to graft a garbage-collected
paradigm into C code, since the C language really was never designed
for it.

So this would tend to confirm the rule of thumb: use of "volatile" in
a userspace progam tends to indicate a bug.  

						- Ted

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [OT] 'volatile' in userspace
  2006-07-10  3:42                                                             ` Theodore Tso
@ 2006-07-10 17:00                                                               ` Joshua Hudson
  2006-07-10 17:54                                                                 ` Nick Piggin
  2006-07-10 18:54                                                                 ` Jeff Dike
  0 siblings, 2 replies; 133+ messages in thread
From: Joshua Hudson @ 2006-07-10 17:00 UTC (permalink / raw)
  To: linux-kernel

>
> So this would tend to confirm the rule of thumb: use of "volatile" in
> a userspace progam tends to indicate a bug.
>
>                                                 - Ted

No. vfork(), setjmp(), signal().

Yes, I use vfork. So far, the only way I have found for the parent to
know whether or not the child's exec() failed is this way:

volatile int failed;
pid_t pid;

failed = 0;
if (0 == (pid = vfork())) {
   execve(argv[0], argv, envp);
   failed = errno;
   _exit(0);
}
if (pid < 0) {
   /* can't fork */
}
if (failed) {
   /* wait for pid (clean up zombie) */
   errno = failed;
   /* can't exec: update state */
}

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [OT] 'volatile' in userspace
  2006-07-10 17:00                                                               ` Joshua Hudson
@ 2006-07-10 17:54                                                                 ` Nick Piggin
  2006-07-11  7:48                                                                   ` Jan Engelhardt
  2006-07-10 18:54                                                                 ` Jeff Dike
  1 sibling, 1 reply; 133+ messages in thread
From: Nick Piggin @ 2006-07-10 17:54 UTC (permalink / raw)
  To: Joshua Hudson; +Cc: linux-kernel

Joshua Hudson wrote:
>>
>> So this would tend to confirm the rule of thumb: use of "volatile" in
>> a userspace progam tends to indicate a bug.
>>
>>                                                 - Ted
> 
> 
> No. vfork(), setjmp(), signal().
> 
> Yes, I use vfork. So far, the only way I have found for the parent to
> know whether or not the child's exec() failed is this way:
> 
> volatile int failed;
> pid_t pid;
> 
> failed = 0;
> if (0 == (pid = vfork())) {
>   execve(argv[0], argv, envp);
>   failed = errno;
>   _exit(0);
> }
> if (pid < 0) {
>   /* can't fork */
> }
> if (failed) {
>   /* wait for pid (clean up zombie) */
>   errno = failed;
>   /* can't exec: update state */
> }

May not be portable because you're apparently not supposed to assume
anything about the memory sharing semantics (eg. it may share memory
or it may not -- AFAIK if your code doesn't work correctly after
replacing vfork with fork, then it is buggy).

What's wrong with _exit(exec() == -1 ? 0 : errno);
and picking up the status with wait(2) ?

-- 
SUSE Labs, Novell Inc.
Send instant messages to your online friends http://au.messenger.yahoo.com 

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [OT] 'volatile' in userspace
  2006-07-10 17:00                                                               ` Joshua Hudson
  2006-07-10 17:54                                                                 ` Nick Piggin
@ 2006-07-10 18:54                                                                 ` Jeff Dike
  2006-07-10 20:09                                                                   ` Philippe Troin
  1 sibling, 1 reply; 133+ messages in thread
From: Jeff Dike @ 2006-07-10 18:54 UTC (permalink / raw)
  To: Joshua Hudson; +Cc: linux-kernel

On Mon, Jul 10, 2006 at 10:00:37AM -0700, Joshua Hudson wrote:
> Yes, I use vfork. So far, the only way I have found for the parent to
> know whether or not the child's exec() failed is this way:

What I do in UML is make a pipe between the parent and child, set it
CLOEXEC, and write a byte down it if the exec fails.

The parent reads it - if it gets no bytes, the exec succeeded, if it
gets one byte, it failed.

child -
	execvp(argv[0], argv);
	errval = errno;
	write(data->fd, &errval, sizeof(errval));

parent -
	socketpair(AF_UNIX, SOCK_STREAM, 0, fds);
	fcntl(fds[1], F_SETFD, flag);
	pid = clone(child, NULL, SIGCHLD, NULL);
	if(pid < 0){
		...
	}

	close(fds[1]);

	/* Read the errno value from the child, if the exec failed, or get 0 if
	 * the exec succeeded because the pipe fd was set as close-on-exec.
	 */
	n = read(fds[0], &ret, sizeof(ret));
	if (n < 0) {
	} else if(n != 0){
		/* exec failed */
	} else {
		/* exec succeeded */
	}

				Jeff

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [OT] 'volatile' in userspace
  2006-07-10 18:54                                                                 ` Jeff Dike
@ 2006-07-10 20:09                                                                   ` Philippe Troin
  2006-07-10 20:52                                                                     ` Jeff Dike
  0 siblings, 1 reply; 133+ messages in thread
From: Philippe Troin @ 2006-07-10 20:09 UTC (permalink / raw)
  To: Jeff Dike; +Cc: Joshua Hudson, linux-kernel

Jeff Dike <jdike@addtoit.com> writes:

> On Mon, Jul 10, 2006 at 10:00:37AM -0700, Joshua Hudson wrote:
> > Yes, I use vfork. So far, the only way I have found for the parent to
> > know whether or not the child's exec() failed is this way:
> 
> What I do in UML is make a pipe between the parent and child, set it
> CLOEXEC, and write a byte down it if the exec fails.
> 
> The parent reads it - if it gets no bytes, the exec succeeded, if it
> gets one byte, it failed.

I usually do the same, except that I write the errno in case of
failure.  This way the parent knows *why* exec failed ;-)

Phil.


^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [OT] 'volatile' in userspace
  2006-07-10 20:09                                                                   ` Philippe Troin
@ 2006-07-10 20:52                                                                     ` Jeff Dike
  0 siblings, 0 replies; 133+ messages in thread
From: Jeff Dike @ 2006-07-10 20:52 UTC (permalink / raw)
  To: Philippe Troin; +Cc: Joshua Hudson, linux-kernel

On Mon, Jul 10, 2006 at 01:09:47PM -0700, Philippe Troin wrote:
> I usually do the same, except that I write the errno in case of
> failure.  This way the parent knows *why* exec failed ;-)

If you look at the code I posted, it does exactly that.

				Jeff

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [OT] 'volatile' in userspace
  2006-07-10 17:54                                                                 ` Nick Piggin
@ 2006-07-11  7:48                                                                   ` Jan Engelhardt
  2006-07-11 11:53                                                                     ` Nick Piggin
  0 siblings, 1 reply; 133+ messages in thread
From: Jan Engelhardt @ 2006-07-11  7:48 UTC (permalink / raw)
  To: Nick Piggin; +Cc: Joshua Hudson, linux-kernel

>
> What's wrong with _exit(exec() == -1 ? 0 : errno);
> and picking up the status with wait(2) ?
>
The exec'd application may return regular error codes, which would 
interfere. IIRC /usr/sbin/useradd has different exit codes depending on 
what failed (providing some option, failure to create account, failure to 
create home dir, etc.). Now if you exit(errno) instead, you have an 
overlap.
And your code is somewhat wrong. Given that exec() would stand for 
execve(someprogram_and_args_here), if it returned -1 you would return 0, 
indicating success. Can't be. And if exec() does not return -1, which it 
never should, you return errno, which never reaches anyone.


Jan Engelhardt
-- 

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [OT] 'volatile' in userspace
  2006-07-11  7:48                                                                   ` Jan Engelhardt
@ 2006-07-11 11:53                                                                     ` Nick Piggin
  2006-07-11 19:09                                                                       ` Björn Steinbrink
  2006-07-11 20:55                                                                       ` Jeff Dike
  0 siblings, 2 replies; 133+ messages in thread
From: Nick Piggin @ 2006-07-11 11:53 UTC (permalink / raw)
  To: Jan Engelhardt; +Cc: Joshua Hudson, linux-kernel

Jan Engelhardt wrote:
>>What's wrong with _exit(exec() == -1 ? 0 : errno);
>>and picking up the status with wait(2) ?
>>
> 
> The exec'd application may return regular error codes, which would 
> interfere. IIRC /usr/sbin/useradd has different exit codes depending on 
> what failed (providing some option, failure to create account, failure to 
> create home dir, etc.). Now if you exit(errno) instead, you have an 
> overlap.

You're right. Maybe you could return -ve or with a high bit set,
but I guess you may not know what the app will return.

But I don't see how the volatile or pipe solutions are any better
though: it would seem that both result in undefined behaviour
according to my vfork man page. At least the wait() solution is
defined (and workable, if you know what the target might return).

> And your code is somewhat wrong. Given that exec() would stand for 
> execve(someprogram_and_args_here), if it returned -1 you would return 0, 
> indicating success. Can't be. And if exec() does not return -1, which it 
> never should, you return errno, which never reaches anyone.

Yeah, thinko.

-- 
SUSE Labs, Novell Inc.
Send instant messages to your online friends http://au.messenger.yahoo.com 

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [OT] 'volatile' in userspace
  2006-07-11 11:53                                                                     ` Nick Piggin
@ 2006-07-11 19:09                                                                       ` Björn Steinbrink
  2006-07-11 20:55                                                                       ` Jeff Dike
  1 sibling, 0 replies; 133+ messages in thread
From: Björn Steinbrink @ 2006-07-11 19:09 UTC (permalink / raw)
  To: Nick Piggin; +Cc: Jan Engelhardt, Joshua Hudson, linux-kernel

On 2006.07.11 21:53:53 +1000, Nick Piggin wrote:
> Jan Engelhardt wrote:
> >>What's wrong with _exit(exec() == -1 ? 0 : errno);
> >>and picking up the status with wait(2) ?
> >>
> >
> >The exec'd application may return regular error codes, which would 
> >interfere. IIRC /usr/sbin/useradd has different exit codes depending on 
> >what failed (providing some option, failure to create account, failure to 
> >create home dir, etc.). Now if you exit(errno) instead, you have an 
> >overlap.
> 
> You're right. Maybe you could return -ve or with a high bit set,
> but I guess you may not know what the app will return.
> 
> But I don't see how the volatile or pipe solutions are any better
> though: it would seem that both result in undefined behaviour
> according to my vfork man page. At least the wait() solution is
> defined (and workable, if you know what the target might return).

The volatile solution is buggy according to the vfork man page, but the
pipe solution is fine, it doesn't use vfork at all ;)

Björn

^ permalink raw reply	[flat|nested] 133+ messages in thread

* Re: [OT] 'volatile' in userspace
  2006-07-11 11:53                                                                     ` Nick Piggin
  2006-07-11 19:09                                                                       ` Björn Steinbrink
@ 2006-07-11 20:55                                                                       ` Jeff Dike
  1 sibling, 0 replies; 133+ messages in thread
From: Jeff Dike @ 2006-07-11 20:55 UTC (permalink / raw)
  To: Nick Piggin; +Cc: Jan Engelhardt, Joshua Hudson, linux-kernel

On Tue, Jul 11, 2006 at 09:53:53PM +1000, Nick Piggin wrote:
> But I don't see how the volatile or pipe solutions are any better
> though: it would seem that both result in undefined behaviour
> according to my vfork man page.

What undefined behavior does the pipe solution result in, considering
it doesn't use vfork?

				Jeff

^ permalink raw reply	[flat|nested] 133+ messages in thread

end of thread, other threads:[~2006-07-11 20:57 UTC | newest]

Thread overview: 133+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2006-07-05  8:49 [patch] uninline init_waitqueue_*() functions Ingo Molnar
2006-07-05  9:31 ` Andrew Morton
2006-07-05  9:32   ` Ingo Molnar
2006-07-05  9:53     ` Andrew Morton
2006-07-05 10:26       ` Ingo Molnar
2006-07-05 11:30         ` Ingo Molnar
2006-07-05 11:46           ` Ingo Molnar
2006-07-05 17:10             ` Andrew Morton
2006-07-05 19:35               ` Ingo Molnar
2006-07-05 20:18                 ` Andrew Morton
2006-07-05 20:36                   ` Linus Torvalds
2006-07-05 20:47                     ` Ingo Molnar
2006-07-05 21:15                       ` Ingo Molnar
2006-07-05 21:21                       ` Linus Torvalds
2006-07-05 21:45                         ` Ingo Molnar
2006-07-05 21:58                           ` Randy.Dunlap
2006-07-05 22:00                             ` Ingo Molnar
2006-07-05 22:10                               ` Randy.Dunlap
2006-07-05 22:27                                 ` Ingo Molnar
2006-07-05 23:15                               ` Adrian Bunk
2006-07-05 22:09                           ` Linus Torvalds
2006-07-05 22:30                             ` Ingo Molnar
2006-07-05 23:11                             ` Linus Torvalds
2006-07-06  8:16                               ` [patch] spinlocks: remove 'volatile' Ingo Molnar
2006-07-06  8:23                                 ` Ingo Molnar
2006-07-06  9:27                                 ` Heiko Carstens
2006-07-06  9:34                                   ` Arjan van de Ven
2006-07-06 11:59                                 ` linux-os (Dick Johnson)
2006-07-06 12:01                                   ` Arjan van de Ven
2006-07-06 12:29                                     ` linux-os (Dick Johnson)
2006-07-06 12:39                                       ` Arjan van de Ven
2006-07-06 13:39                                         ` J.A. Magallón
2006-07-06 13:43                                           ` Arjan van de Ven
2006-07-06 14:05                                           ` Chase Venters
2006-07-06 14:26                                           ` Andreas Schwab
2006-07-06 16:40                                       ` Nick Piggin
2006-07-06 23:19                                       ` David Schwartz
2006-07-06 18:15                                     ` Mark Lord
2006-07-06 19:15                                       ` Linus Torvalds
2006-07-06 19:33                                         ` Chris Friesen
2006-07-06 19:37                                           ` Mark Lord
2006-07-06 20:28                                             ` Chris Friesen
2006-07-06 20:32                                               ` Linus Torvalds
2006-07-06 19:38                                           ` Arjan van de Ven
2006-07-06 19:41                                           ` Måns Rullgård
2006-07-06 19:42                                           ` Jeff Garzik
2006-07-06 19:58                                           ` Linus Torvalds
2006-07-06 20:27                                             ` Mark Lord
2006-07-06 20:40                                             ` Chris Friesen
2006-07-06 21:00                                               ` Linus Torvalds
2006-07-08  8:40                                         ` Avi Kivity
2006-07-08  8:51                                           ` Arjan van de Ven
2006-07-08  9:20                                             ` Avi Kivity
2006-07-08  9:51                                               ` Arjan van de Ven
2006-07-08 10:18                                                 ` Avi Kivity
2006-07-08 10:28                                                   ` Thomas Gleixner
2006-07-09  4:19                                                   ` Benjamin Herrenschmidt
2006-07-09 12:47                                                     ` Avi Kivity
2006-07-09 19:16                                                       ` David Schwartz
2006-07-09 19:51                                                         ` Theodore Tso
2006-07-09 20:40                                                           ` [OT] 'volatile' in userspace Rutger Nijlunsing
2006-07-10  3:42                                                             ` Theodore Tso
2006-07-10 17:00                                                               ` Joshua Hudson
2006-07-10 17:54                                                                 ` Nick Piggin
2006-07-11  7:48                                                                   ` Jan Engelhardt
2006-07-11 11:53                                                                     ` Nick Piggin
2006-07-11 19:09                                                                       ` Björn Steinbrink
2006-07-11 20:55                                                                       ` Jeff Dike
2006-07-10 18:54                                                                 ` Jeff Dike
2006-07-10 20:09                                                                   ` Philippe Troin
2006-07-10 20:52                                                                     ` Jeff Dike
2006-07-06 16:07                                   ` [patch] spinlocks: remove 'volatile' Linus Torvalds
2006-07-06 16:13                                     ` Linus Torvalds
2006-07-06 17:04                                       ` Jeff Garzik
2006-07-06 17:52                                       ` linux-os (Dick Johnson)
2006-07-06 18:00                                         ` Linus Torvalds
2006-07-06 21:02                                           ` J.A. Magallón
2006-07-06 21:12                                             ` Linus Torvalds
2006-07-06 18:10                                         ` Michael Buesch
2006-07-06 18:16                                         ` Chase Venters
2006-07-07 18:16                                         ` Krzysztof Halasa
2006-07-07 19:51                                           ` linux-os (Dick Johnson)
2006-07-07 20:25                                             ` Linus Torvalds
2006-07-07 21:22                                               ` linux-os (Dick Johnson)
2006-07-07 21:48                                                 ` Chase Venters
2006-07-08 10:00                                                   ` Krzysztof Halasa
2006-07-08 13:41                                                     ` Chase Venters
2006-07-08 20:09                                                       ` Krzysztof Halasa
2006-07-08 20:40                                                         ` Chase Venters
2006-07-08 20:47                                                           ` Chase Venters
2006-07-09 10:57                                                           ` Krzysztof Halasa
2006-07-07 21:59                                                 ` Linus Torvalds
2006-07-07 22:06                                                   ` Linus Torvalds
2006-07-08 20:49                                                   ` Pavel Machek
2006-07-08 21:43                                                     ` Linus Torvalds
2006-07-07 22:05                                                 ` J.A. Magallón
2006-07-07 22:22                                                   ` Chase Venters
2006-07-07 22:37                                                     ` J.A. Magallón
2006-07-08  9:33                                                       ` David Schwartz
2006-07-07 22:49                                                     ` J.A. Magallón
2006-07-07 22:59                                                       ` Vadim Lobanov
2006-07-07 23:18                                                       ` Chase Venters
2006-07-07 23:36                                                   ` Davide Libenzi
2006-07-07 22:09                                                 ` Ingo Molnar
2006-07-08  7:36                                                   ` Ingo Molnar
2006-07-07 20:39                                             ` Krzysztof Halasa
2006-07-07 23:06                                             ` Björn Steinbrink
2006-07-08  8:36                                             ` Avi Kivity
2006-07-06 19:32                                       ` Jan Engelhardt
2006-07-06 20:26                                         ` Jeremy Fitzhardinge
2006-07-06 20:55                                           ` Jan Engelhardt
2006-07-06 21:07                                             ` Linus Torvalds
2006-07-06 19:56                                 ` Linus Torvalds
2006-07-06 20:34                                   ` Linus Torvalds
2006-07-08 22:50                                     ` Ralf Baechle
2006-07-09  3:09                                       ` Linus Torvalds
2006-07-09  3:07                                     ` Keith Owens
2006-07-09  3:29                                       ` Linus Torvalds
2006-07-09  3:43                                         ` Keith Owens
2006-07-09  3:58                                       ` Linus Torvalds
2006-07-09  6:13                                         ` David Miller
2006-07-09 14:28                                         ` Roman Zippel
2006-07-09 15:27                                           ` Linus Torvalds
2006-07-06  8:18                               ` [patch] lockdep: clean up completion initializer in smpboot.c Ingo Molnar
2006-07-06  8:23                               ` [patch] uninline init_waitqueue_*() functions Ingo Molnar
2006-07-06  9:02                                 ` Andrew Morton
2006-07-06  9:12                                   ` [patch] uninline init_waitqueue_head() Ingo Molnar
2006-07-05 20:45                   ` [patch] uninline init_waitqueue_*() functions Ingo Molnar
2006-07-05 10:37     ` Christoph Hellwig
2006-07-05 10:44       ` Andrew Morton
2006-07-05 10:46         ` Andrew Morton
2006-07-05 10:47         ` Ingo Molnar
2006-07-05  9:54 ` [patch] uninline init_waitqueue_*() functions, fix Ingo Molnar

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.