linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* out-of-line x86 "put_user()" implementation
@ 2005-02-07  6:23 Linus Torvalds
  2005-02-07 11:44 ` Ingo Molnar
  2005-03-12 14:55 ` Coywolf Qi Hunt
  0 siblings, 2 replies; 18+ messages in thread
From: Linus Torvalds @ 2005-02-07  6:23 UTC (permalink / raw)
  To: Kernel Mailing List; +Cc: Manfred Spraul


I was looking at some of the code we generate, and happened to notice that 
we have this strange situation where the x86 "get_user()" macros generate 
out-of-line code to do all the address verification etc, but the 
"put_user()" ones do not, and do everything inline.

I also noticed that (probably as a result of this), our "put_user()" on 
old i386 machines does not do the full magic manual page-following. Which 
means that copy-on-write doesn't necessarily work right due to the broken 
paging hw on the original 386 core.

I didn't fix the second part, but at least making things out-of-line makes
it possible. And making "put_user()" be out-of-line seemed quite doable.

I no longer use x86 as my main machine, so this patch is totally untested. 
I've compiled it to see that things look somewhat sane, but that doesn't 
mean much. If I forgot some register or screwed something else up, this 
will result in a totally nonworking kernel, but I thought that maybe 
somebody else would be interested in looking at whether this (a) works, 
(b) migth even shrink the kernel and (c) might make us able to DTRT wrt 
the page table following crud (old i386 cores may be hard to find these 
days, so maybe people don't care).

		Linus

----
# This is a BitKeeper generated diff -Nru style patch.
#
# ChangeSet
#   2005/02/06 22:10:04-08:00 torvalds@evo.osdl.org 
#   x86: make "put_user()" be out-of-line
#   
#   It's really too big to be inlined.
# 
# arch/i386/lib/putuser.S
#   2005/02/06 22:09:53-08:00 torvalds@evo.osdl.org +87 -0
# 
# include/asm-i386/uaccess.h
#   2005/02/06 22:09:53-08:00 torvalds@evo.osdl.org +27 -3
#   x86: make "put_user()" be out-of-line
#   
#   It's really too big to be inlined.
# 
# arch/i386/lib/putuser.S
#   2005/02/06 22:09:53-08:00 torvalds@evo.osdl.org +0 -0
#   BitKeeper file /home/torvalds/v2.6/linux/arch/i386/lib/putuser.S
# 
# arch/i386/lib/Makefile
#   2005/02/06 22:09:53-08:00 torvalds@evo.osdl.org +1 -1
#   x86: make "put_user()" be out-of-line
#   
#   It's really too big to be inlined.
# 
# arch/i386/kernel/i386_ksyms.c
#   2005/02/06 22:09:53-08:00 torvalds@evo.osdl.org +5 -0
#   x86: make "put_user()" be out-of-line
#   
#   It's really too big to be inlined.
# 
diff -Nru a/arch/i386/kernel/i386_ksyms.c b/arch/i386/kernel/i386_ksyms.c
--- a/arch/i386/kernel/i386_ksyms.c	2005-02-06 22:12:01 -08:00
+++ b/arch/i386/kernel/i386_ksyms.c	2005-02-06 22:12:01 -08:00
@@ -97,6 +97,11 @@
 EXPORT_SYMBOL(__get_user_2);
 EXPORT_SYMBOL(__get_user_4);
 
+EXPORT_SYMBOL(__put_user_1);
+EXPORT_SYMBOL(__put_user_2);
+EXPORT_SYMBOL(__put_user_4);
+EXPORT_SYMBOL(__put_user_8);
+
 EXPORT_SYMBOL(strpbrk);
 EXPORT_SYMBOL(strstr);
 
diff -Nru a/arch/i386/lib/Makefile b/arch/i386/lib/Makefile
--- a/arch/i386/lib/Makefile	2005-02-06 22:12:01 -08:00
+++ b/arch/i386/lib/Makefile	2005-02-06 22:12:01 -08:00
@@ -3,7 +3,7 @@
 #
 
 
-lib-y = checksum.o delay.o usercopy.o getuser.o memcpy.o strstr.o \
+lib-y = checksum.o delay.o usercopy.o getuser.o putuser.o memcpy.o strstr.o \
 	bitops.o
 
 lib-$(CONFIG_X86_USE_3DNOW) += mmx.o
diff -Nru a/arch/i386/lib/putuser.S b/arch/i386/lib/putuser.S
--- /dev/null	Wed Dec 31 16:00:00 196900
+++ b/arch/i386/lib/putuser.S	2005-02-06 22:12:01 -08:00
@@ -0,0 +1,87 @@
+/*
+ * __put_user functions.
+ *
+ * (C) Copyright 2005 Linus Torvalds
+ *
+ * These functions have a non-standard call interface
+ * to make them more efficient, especially as they
+ * return an error value in addition to the "real"
+ * return value.
+ */
+#include <asm/thread_info.h>
+
+
+/*
+ * __put_user_X
+ *
+ * Inputs:	%eax[:%edx] contains the data
+ *		%ecx contains the address
+ *
+ * Outputs:	%eax is error code (0 or -EFAULT)
+ *		%ecx is corrupted
+ *
+ * These functions should not modify any other registers,
+ * as they get called from within inline assembly.
+ */
+
+#define ENTER	pushl %ecx ; pushl %ebx ; GET_THREAD_INFO(%ebx)
+#define EXIT	popl %ebx ; popl %ecx ; ret
+
+.text
+.align 4
+.globl __put_user_1
+__put_user_1:
+	ENTER
+	cmpl TI_addr_limit(%ebx),%ecx
+	jae bad_put_user
+1:	movb %al,(%ecx)
+	xorl %eax,%eax
+	EXIT
+
+.align 4
+.globl __put_user_2
+__put_user_2:
+	ENTER
+	addl $1,%ecx
+	jc bad_put_user
+	cmpl TI_addr_limit(%ebx),%ecx
+	jae bad_put_user
+2:	movw %ax,-1(%ecx)
+	xorl %eax,%eax
+	EXIT
+
+.align 4
+.globl __put_user_4
+__put_user_4:
+	ENTER
+	addl $3,%ecx
+	jc bad_put_user
+	cmpl TI_addr_limit(%ebx),%ecx
+	jae bad_put_user
+3:	movl %eax,-3(%ecx)
+	xorl %eax,%eax
+	EXIT
+
+.align 4
+.globl __put_user_8
+__put_user_8:
+	ENTER
+	addl $7,%ecx
+	jc bad_put_user
+	cmpl TI_addr_limit(%ebx),%ecx
+	jae bad_put_user
+3:	movl %eax,-7(%ecx)
+4:	movl %edx,-3(%ecx)
+	xorl %eax,%eax
+	EXIT
+
+bad_put_user:
+	movl $-14,%eax
+	EXIT
+
+.section __ex_table,"a"
+	.long 1b,bad_put_user
+	.long 2b,bad_put_user
+	.long 3b,bad_put_user
+	.long 4b,bad_put_user
+.previous
diff -Nru a/include/asm-i386/uaccess.h b/include/asm-i386/uaccess.h
--- a/include/asm-i386/uaccess.h	2005-02-06 22:12:01 -08:00
+++ b/include/asm-i386/uaccess.h	2005-02-06 22:12:01 -08:00
@@ -185,6 +185,21 @@
 
 extern void __put_user_bad(void);
 
+/*
+ * Strange magic calling convention: pointer in %ecx,
+ * value in %eax(:%edx), return value in %eax, no clobbers.
+ */
+extern void __put_user_1(void);
+extern void __put_user_2(void);
+extern void __put_user_4(void);
+extern void __put_user_8(void);
+
+#define __put_user_1(x, ptr) __asm__ __volatile__("call __put_user_1":"=a" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
+#define __put_user_2(x, ptr) __asm__ __volatile__("call __put_user_2":"=a" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
+#define __put_user_4(x, ptr) __asm__ __volatile__("call __put_user_4":"=a" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
+#define __put_user_8(x, ptr) __asm__ __volatile__("call __put_user_8":"=A" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
+#define __put_user_X(x, ptr) __asm__ __volatile__("call __put_user_X":"=a" (__ret_pu):"c" (ptr))
+
 /**
  * put_user: - Write a simple value into user space.
  * @x:   Value to copy to user space.
@@ -201,9 +216,18 @@
  *
  * Returns zero on success, or -EFAULT on error.
  */
-#define put_user(x,ptr)							\
-  __put_user_check((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr)))
-
+#define put_user(x,ptr)						\
+({	int __ret_pu;						\
+	__chk_user_ptr(ptr);					\
+	switch(sizeof(*(ptr))) {				\
+	case 1: __put_user_1(x, ptr); break;			\
+	case 2: __put_user_2(x, ptr); break;			\
+	case 4: __put_user_4(x, ptr); break;			\
+	case 8: __put_user_8(x, ptr); break;			\
+	default:__put_user_X(x, ptr); break;			\
+	}							\
+	__ret_pu;						\
+})
 
 /**
  * __get_user: - Get a simple variable from user space, with less checking.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: out-of-line x86 "put_user()" implementation
  2005-02-07  6:23 out-of-line x86 "put_user()" implementation Linus Torvalds
@ 2005-02-07 11:44 ` Ingo Molnar
  2005-02-08  1:20   ` Linus Torvalds
  2005-03-12 14:55 ` Coywolf Qi Hunt
  1 sibling, 1 reply; 18+ messages in thread
From: Ingo Molnar @ 2005-02-07 11:44 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Kernel Mailing List, Manfred Spraul


* Linus Torvalds <torvalds@osdl.org> wrote:

> I no longer use x86 as my main machine, so this patch is totally
> untested.  I've compiled it to see that things look somewhat sane, but
> that doesn't mean much. If I forgot some register or screwed something
> else up, this will result in a totally nonworking kernel, but I
> thought that maybe somebody else would be interested in looking at
> whether this (a) works, (b) migth even shrink the kernel and (c) might
> make us able to DTRT wrt the page table following crud (old i386 cores
> may be hard to find these days, so maybe people don't care).

boots fine and shrinks the image size quite noticeably:

  [Nr] Name     Type        Addr     Off    Size
  [ 1] .text    PROGBITS    c0100000 001000 2771a9   [vmlinux-orig]
  [ 1] .text    PROGBITS    c0100000 001000 2742dd   [vmlinux-patched]

that's 11980 bytes off a 2585001 bytes .text, a 0.5% size reduction.
This patch we want ...

	Ingo

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: out-of-line x86 "put_user()" implementation
  2005-02-07 11:44 ` Ingo Molnar
@ 2005-02-08  1:20   ` Linus Torvalds
  2005-02-08 17:43     ` Pavel Machek
                       ` (3 more replies)
  0 siblings, 4 replies; 18+ messages in thread
From: Linus Torvalds @ 2005-02-08  1:20 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Kernel Mailing List, Manfred Spraul



On Mon, 7 Feb 2005, Ingo Molnar wrote:
>
> boots fine and shrinks the image size quite noticeably:
> 
>   [Nr] Name     Type        Addr     Off    Size
>   [ 1] .text    PROGBITS    c0100000 001000 2771a9   [vmlinux-orig]
>   [ 1] .text    PROGBITS    c0100000 001000 2742dd   [vmlinux-patched]
> 
> that's 11980 bytes off a 2585001 bytes .text, a 0.5% size reduction.
> This patch we want ...

Goodie. Here's a slightly more recent version, where I cleaned up the
assembly code (no need to save %ecx if we just update %ebx instead, which
makes the code a bit more readable too - and doing it this way should
hopefully make it easier for an out-of-order CPU to start the memops
earlier too. Who knows..)

I'm not going to put this into 2.6.11, since I worry about compiler 
interactions, but the more people who test it anyway, the better.

		Linus

-----
# This is a BitKeeper generated diff -Nru style patch.
#
# ChangeSet
#   2005/02/07 08:14:28-08:00 torvalds@evo.osdl.org 
#   x86: make "put_user()" be out-of-line
#   
#   It's really too big to be inlined.
#   
#   Ingo tests and reports: this shrinks his kernel text size by
#   about 12kB (roughly 0.5%)
# 
# arch/i386/lib/putuser.S
#   2005/02/07 08:14:17-08:00 torvalds@evo.osdl.org +86 -0
# 
# include/asm-i386/uaccess.h
#   2005/02/07 08:14:17-08:00 torvalds@evo.osdl.org +27 -3
#   x86: make "put_user()" be out-of-line
# 
# arch/i386/lib/putuser.S
#   2005/02/07 08:14:17-08:00 torvalds@evo.osdl.org +0 -0
#   BitKeeper file /home/torvalds/v2.6/linux/arch/i386/lib/putuser.S
# 
# arch/i386/lib/Makefile
#   2005/02/07 08:14:17-08:00 torvalds@evo.osdl.org +1 -1
#   x86: make "put_user()" be out-of-line
# 
# arch/i386/kernel/i386_ksyms.c
#   2005/02/07 08:14:17-08:00 torvalds@evo.osdl.org +5 -0
#   x86: make "put_user()" be out-of-line
# 
diff -Nru a/arch/i386/kernel/i386_ksyms.c b/arch/i386/kernel/i386_ksyms.c
--- a/arch/i386/kernel/i386_ksyms.c	2005-02-07 17:16:32 -08:00
+++ b/arch/i386/kernel/i386_ksyms.c	2005-02-07 17:16:32 -08:00
@@ -97,6 +97,11 @@
 EXPORT_SYMBOL(__get_user_2);
 EXPORT_SYMBOL(__get_user_4);
 
+EXPORT_SYMBOL(__put_user_1);
+EXPORT_SYMBOL(__put_user_2);
+EXPORT_SYMBOL(__put_user_4);
+EXPORT_SYMBOL(__put_user_8);
+
 EXPORT_SYMBOL(strpbrk);
 EXPORT_SYMBOL(strstr);
 
diff -Nru a/arch/i386/lib/Makefile b/arch/i386/lib/Makefile
--- a/arch/i386/lib/Makefile	2005-02-07 17:16:32 -08:00
+++ b/arch/i386/lib/Makefile	2005-02-07 17:16:32 -08:00
@@ -3,7 +3,7 @@
 #
 
 
-lib-y = checksum.o delay.o usercopy.o getuser.o memcpy.o strstr.o \
+lib-y = checksum.o delay.o usercopy.o getuser.o putuser.o memcpy.o strstr.o \
 	bitops.o
 
 lib-$(CONFIG_X86_USE_3DNOW) += mmx.o
diff -Nru a/arch/i386/lib/putuser.S b/arch/i386/lib/putuser.S
--- /dev/null	Wed Dec 31 16:00:00 196900
+++ b/arch/i386/lib/putuser.S	2005-02-07 17:16:32 -08:00
@@ -0,0 +1,86 @@
+/*
+ * __put_user functions.
+ *
+ * (C) Copyright 2005 Linus Torvalds
+ *
+ * These functions have a non-standard call interface
+ * to make them more efficient, especially as they
+ * return an error value in addition to the "real"
+ * return value.
+ */
+#include <asm/thread_info.h>
+
+
+/*
+ * __put_user_X
+ *
+ * Inputs:	%eax[:%edx] contains the data
+ *		%ecx contains the address
+ *
+ * Outputs:	%eax is error code (0 or -EFAULT)
+ *
+ * These functions should not modify any other registers,
+ * as they get called from within inline assembly.
+ */
+
+#define ENTER	pushl %ebx ; GET_THREAD_INFO(%ebx)
+#define EXIT	popl %ebx ; ret
+
+.text
+.align 4
+.globl __put_user_1
+__put_user_1:
+	ENTER
+	cmpl TI_addr_limit(%ebx),%ecx
+	jae bad_put_user
+1:	movb %al,(%ecx)
+	xorl %eax,%eax
+	EXIT
+
+.align 4
+.globl __put_user_2
+__put_user_2:
+	ENTER
+	movl TI_addr_limit(%ebx),%ebx
+	subl $1,%ebx
+	cmpl %ebx,%ecx
+	jae bad_put_user
+2:	movw %ax,(%ecx)
+	xorl %eax,%eax
+	EXIT
+
+.align 4
+.globl __put_user_4
+__put_user_4:
+	ENTER
+	movl TI_addr_limit(%ebx),%ebx
+	subl $3,%ebx
+	cmpl %ebx,%ecx
+	jae bad_put_user
+3:	movl %eax,(%ecx)
+	xorl %eax,%eax
+	EXIT
+
+.align 4
+.globl __put_user_8
+__put_user_8:
+	ENTER
+	movl TI_addr_limit(%ebx),%ebx
+	subl $7,%ebx
+	cmpl %ebx,%ecx
+	jae bad_put_user
+3:	movl %eax,(%ecx)
+4:	movl %edx,4(%ecx)
+	xorl %eax,%eax
+	EXIT
+
+bad_put_user:
+	movl $-14,%eax
+	EXIT
+
+.section __ex_table,"a"
+	.long 1b,bad_put_user
+	.long 2b,bad_put_user
+	.long 3b,bad_put_user
+	.long 4b,bad_put_user
+.previous
diff -Nru a/include/asm-i386/uaccess.h b/include/asm-i386/uaccess.h
--- a/include/asm-i386/uaccess.h	2005-02-07 17:16:32 -08:00
+++ b/include/asm-i386/uaccess.h	2005-02-07 17:16:32 -08:00
@@ -185,6 +185,21 @@
 
 extern void __put_user_bad(void);
 
+/*
+ * Strange magic calling convention: pointer in %ecx,
+ * value in %eax(:%edx), return value in %eax, no clobbers.
+ */
+extern void __put_user_1(void);
+extern void __put_user_2(void);
+extern void __put_user_4(void);
+extern void __put_user_8(void);
+
+#define __put_user_1(x, ptr) __asm__ __volatile__("call __put_user_1":"=a" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
+#define __put_user_2(x, ptr) __asm__ __volatile__("call __put_user_2":"=a" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
+#define __put_user_4(x, ptr) __asm__ __volatile__("call __put_user_4":"=a" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
+#define __put_user_8(x, ptr) __asm__ __volatile__("call __put_user_8":"=A" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
+#define __put_user_X(x, ptr) __asm__ __volatile__("call __put_user_X":"=a" (__ret_pu):"c" (ptr))
+
 /**
  * put_user: - Write a simple value into user space.
  * @x:   Value to copy to user space.
@@ -201,9 +216,18 @@
  *
  * Returns zero on success, or -EFAULT on error.
  */
-#define put_user(x,ptr)							\
-  __put_user_check((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr)))
-
+#define put_user(x,ptr)						\
+({	int __ret_pu;						\
+	__chk_user_ptr(ptr);					\
+	switch(sizeof(*(ptr))) {				\
+	case 1: __put_user_1(x, ptr); break;			\
+	case 2: __put_user_2(x, ptr); break;			\
+	case 4: __put_user_4(x, ptr); break;			\
+	case 8: __put_user_8(x, ptr); break;			\
+	default:__put_user_X(x, ptr); break;			\
+	}							\
+	__ret_pu;						\
+})
 
 /**
  * __get_user: - Get a simple variable from user space, with less checking.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: out-of-line x86 "put_user()" implementation
  2005-02-08  1:20   ` Linus Torvalds
@ 2005-02-08 17:43     ` Pavel Machek
  2005-02-08 19:35     ` Valdis.Kletnieks
                       ` (2 subsequent siblings)
  3 siblings, 0 replies; 18+ messages in thread
From: Pavel Machek @ 2005-02-08 17:43 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Ingo Molnar, Kernel Mailing List, Manfred Spraul

Hi!

> > boots fine and shrinks the image size quite noticeably:
> > 
> >   [Nr] Name     Type        Addr     Off    Size
> >   [ 1] .text    PROGBITS    c0100000 001000 2771a9   [vmlinux-orig]
> >   [ 1] .text    PROGBITS    c0100000 001000 2742dd   [vmlinux-patched]
> > 
> > that's 11980 bytes off a 2585001 bytes .text, a 0.5% size reduction.
> > This patch we want ...
> 
> Goodie. Here's a slightly more recent version, where I cleaned up the
> assembly code (no need to save %ecx if we just update %ebx instead, which
> makes the code a bit more readable too - and doing it this way should
> hopefully make it easier for an out-of-order CPU to start the memops
> earlier too. Who knows..)
> 
> I'm not going to put this into 2.6.11, since I worry about compiler 
> interactions, but the more people who test it anyway, the better.

Put it in -mm tree :-)))
								Pavel

-- 
People were complaining that M$ turns users into beta-testers...
...jr ghea gurz vagb qrirybcref, naq gurl frrz gb yvxr vg gung jnl!

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: out-of-line x86 "put_user()" implementation
  2005-02-08  1:20   ` Linus Torvalds
  2005-02-08 17:43     ` Pavel Machek
@ 2005-02-08 19:35     ` Valdis.Kletnieks
  2005-02-09  1:25     ` Richard Henderson
  2005-02-09  1:27     ` Richard Henderson
  3 siblings, 0 replies; 18+ messages in thread
From: Valdis.Kletnieks @ 2005-02-08 19:35 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Ingo Molnar, Kernel Mailing List, Manfred Spraul

[-- Attachment #1: Type: text/plain, Size: 683 bytes --]

On Mon, 07 Feb 2005 17:20:06 PST, Linus Torvalds said:

> I'm not going to put this into 2.6.11, since I worry about compiler 
> interactions, but the more people who test it anyway, the better.

Well, since I'm a known glutton for punishment. ;)

a 2.6.11-rc3-RT tree I had handy from last night shows about a 0.5% size reduction:
   text    data     bss     dec     hex filename
3033417  680824  436188 4150429  3f549d vmlinux.before
3017443  681068  436188 4134699  3f172b vmlinux

Am running a 2.6.11-rc3-mm1-RT kernel with it applied, and have probably 40-50
hours of uptime on it with no noticed issues on a Dell laptop.  Sorry, don't
have any comparative performance stats...

[-- Attachment #2: Type: application/pgp-signature, Size: 226 bytes --]

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: out-of-line x86 "put_user()" implementation
  2005-02-08  1:20   ` Linus Torvalds
  2005-02-08 17:43     ` Pavel Machek
  2005-02-08 19:35     ` Valdis.Kletnieks
@ 2005-02-09  1:25     ` Richard Henderson
  2005-02-09  2:08       ` Linus Torvalds
  2005-02-09  1:27     ` Richard Henderson
  3 siblings, 1 reply; 18+ messages in thread
From: Richard Henderson @ 2005-02-09  1:25 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Ingo Molnar, Kernel Mailing List, Manfred Spraul

On Mon, Feb 07, 2005 at 05:20:06PM -0800, Linus Torvalds wrote:
> +3:	movl %eax,(%ecx)
...
> +3:	movl %eax,(%ecx)
> +4:	movl %edx,4(%ecx)
...
> +	.long 3b,bad_put_user
> +	.long 4b,bad_put_user

The first 3 gets lost.


r~

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: out-of-line x86 "put_user()" implementation
  2005-02-08  1:20   ` Linus Torvalds
                       ` (2 preceding siblings ...)
  2005-02-09  1:25     ` Richard Henderson
@ 2005-02-09  1:27     ` Richard Henderson
  2005-02-09  2:16       ` Linus Torvalds
  3 siblings, 1 reply; 18+ messages in thread
From: Richard Henderson @ 2005-02-09  1:27 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Ingo Molnar, Kernel Mailing List, Manfred Spraul

On Mon, Feb 07, 2005 at 05:20:06PM -0800, Linus Torvalds wrote:
> +#define __put_user_8(x, ptr) __asm__ __volatile__("call __put_user_8":"=A" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))

This is not constrained enough.  The compiler could choose to put the
return value in edx.  You want

  __asm__ __volatile__("call __put_user_8":"=a" (__ret_pu)
			: "A" ((typeof(*(ptr)))(x)), "c" (ptr))


r~

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: out-of-line x86 "put_user()" implementation
  2005-02-09  1:25     ` Richard Henderson
@ 2005-02-09  2:08       ` Linus Torvalds
  2005-02-09  2:24         ` Andrew Morton
  0 siblings, 1 reply; 18+ messages in thread
From: Linus Torvalds @ 2005-02-09  2:08 UTC (permalink / raw)
  To: Richard Henderson
  Cc: Ingo Molnar, Andrew Morton, Kernel Mailing List, Manfred Spraul



On Tue, 8 Feb 2005, Richard Henderson wrote:
> 
> The first 3 gets lost.

Thanks. So here's v3 (which also removes the now stale __put_user_check() 
macro).

Andrew - do you want to put it in -mm? 

		Linus

---
# This is a BitKeeper generated diff -Nru style patch.
#
# ChangeSet
#   2005/02/08 18:05:45-08:00 torvalds@evo.osdl.org 
#   x86: make "put_user()" be out-of-line
#   
#   It's really too big to be inlined.
#   
#   Ingo tests and reports: this shrinks his kernel text size by
#   about 12kB (roughly 0.5%)
# 
# arch/i386/lib/putuser.S
#   2005/02/08 18:05:32-08:00 torvalds@evo.osdl.org +87 -0
# 
# include/asm-i386/uaccess.h
#   2005/02/08 18:05:32-08:00 torvalds@evo.osdl.org +27 -13
#   x86: make "put_user()" be out-of-line
# 
# arch/i386/lib/putuser.S
#   2005/02/08 18:05:32-08:00 torvalds@evo.osdl.org +0 -0
#   BitKeeper file /home/torvalds/v2.6/linux/arch/i386/lib/putuser.S
# 
# arch/i386/lib/Makefile
#   2005/02/08 18:05:32-08:00 torvalds@evo.osdl.org +1 -1
#   x86: make "put_user()" be out-of-line
# 
# arch/i386/kernel/i386_ksyms.c
#   2005/02/08 18:05:32-08:00 torvalds@evo.osdl.org +5 -0
#   x86: make "put_user()" be out-of-line
# 
diff -Nru a/arch/i386/kernel/i386_ksyms.c b/arch/i386/kernel/i386_ksyms.c
--- a/arch/i386/kernel/i386_ksyms.c	2005-02-08 18:06:14 -08:00
+++ b/arch/i386/kernel/i386_ksyms.c	2005-02-08 18:06:14 -08:00
@@ -97,6 +97,11 @@
 EXPORT_SYMBOL(__get_user_2);
 EXPORT_SYMBOL(__get_user_4);
 
+EXPORT_SYMBOL(__put_user_1);
+EXPORT_SYMBOL(__put_user_2);
+EXPORT_SYMBOL(__put_user_4);
+EXPORT_SYMBOL(__put_user_8);
+
 EXPORT_SYMBOL(strpbrk);
 EXPORT_SYMBOL(strstr);
 
diff -Nru a/arch/i386/lib/Makefile b/arch/i386/lib/Makefile
--- a/arch/i386/lib/Makefile	2005-02-08 18:06:14 -08:00
+++ b/arch/i386/lib/Makefile	2005-02-08 18:06:14 -08:00
@@ -3,7 +3,7 @@
 #
 
 
-lib-y = checksum.o delay.o usercopy.o getuser.o memcpy.o strstr.o \
+lib-y = checksum.o delay.o usercopy.o getuser.o putuser.o memcpy.o strstr.o \
 	bitops.o
 
 lib-$(CONFIG_X86_USE_3DNOW) += mmx.o
diff -Nru a/arch/i386/lib/putuser.S b/arch/i386/lib/putuser.S
--- /dev/null	Wed Dec 31 16:00:00 196900
+++ b/arch/i386/lib/putuser.S	2005-02-08 18:06:14 -08:00
@@ -0,0 +1,87 @@
+/*
+ * __put_user functions.
+ *
+ * (C) Copyright 2005 Linus Torvalds
+ *
+ * These functions have a non-standard call interface
+ * to make them more efficient, especially as they
+ * return an error value in addition to the "real"
+ * return value.
+ */
+#include <asm/thread_info.h>
+
+
+/*
+ * __put_user_X
+ *
+ * Inputs:	%eax[:%edx] contains the data
+ *		%ecx contains the address
+ *
+ * Outputs:	%eax is error code (0 or -EFAULT)
+ *
+ * These functions should not modify any other registers,
+ * as they get called from within inline assembly.
+ */
+
+#define ENTER	pushl %ebx ; GET_THREAD_INFO(%ebx)
+#define EXIT	popl %ebx ; ret
+
+.text
+.align 4
+.globl __put_user_1
+__put_user_1:
+	ENTER
+	cmpl TI_addr_limit(%ebx),%ecx
+	jae bad_put_user
+1:	movb %al,(%ecx)
+	xorl %eax,%eax
+	EXIT
+
+.align 4
+.globl __put_user_2
+__put_user_2:
+	ENTER
+	movl TI_addr_limit(%ebx),%ebx
+	subl $1,%ebx
+	cmpl %ebx,%ecx
+	jae bad_put_user
+2:	movw %ax,(%ecx)
+	xorl %eax,%eax
+	EXIT
+
+.align 4
+.globl __put_user_4
+__put_user_4:
+	ENTER
+	movl TI_addr_limit(%ebx),%ebx
+	subl $3,%ebx
+	cmpl %ebx,%ecx
+	jae bad_put_user
+3:	movl %eax,(%ecx)
+	xorl %eax,%eax
+	EXIT
+
+.align 4
+.globl __put_user_8
+__put_user_8:
+	ENTER
+	movl TI_addr_limit(%ebx),%ebx
+	subl $7,%ebx
+	cmpl %ebx,%ecx
+	jae bad_put_user
+4:	movl %eax,(%ecx)
+5:	movl %edx,4(%ecx)
+	xorl %eax,%eax
+	EXIT
+
+bad_put_user:
+	movl $-14,%eax
+	EXIT
+
+.section __ex_table,"a"
+	.long 1b,bad_put_user
+	.long 2b,bad_put_user
+	.long 3b,bad_put_user
+	.long 4b,bad_put_user
+	.long 5b,bad_put_user
+.previous
diff -Nru a/include/asm-i386/uaccess.h b/include/asm-i386/uaccess.h
--- a/include/asm-i386/uaccess.h	2005-02-08 18:06:14 -08:00
+++ b/include/asm-i386/uaccess.h	2005-02-08 18:06:14 -08:00
@@ -185,6 +185,21 @@
 
 extern void __put_user_bad(void);
 
+/*
+ * Strange magic calling convention: pointer in %ecx,
+ * value in %eax(:%edx), return value in %eax, no clobbers.
+ */
+extern void __put_user_1(void);
+extern void __put_user_2(void);
+extern void __put_user_4(void);
+extern void __put_user_8(void);
+
+#define __put_user_1(x, ptr) __asm__ __volatile__("call __put_user_1":"=a" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
+#define __put_user_2(x, ptr) __asm__ __volatile__("call __put_user_2":"=a" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
+#define __put_user_4(x, ptr) __asm__ __volatile__("call __put_user_4":"=a" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
+#define __put_user_8(x, ptr) __asm__ __volatile__("call __put_user_8":"=A" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
+#define __put_user_X(x, ptr) __asm__ __volatile__("call __put_user_X":"=a" (__ret_pu):"c" (ptr))
+
 /**
  * put_user: - Write a simple value into user space.
  * @x:   Value to copy to user space.
@@ -201,9 +216,18 @@
  *
  * Returns zero on success, or -EFAULT on error.
  */
-#define put_user(x,ptr)							\
-  __put_user_check((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr)))
-
+#define put_user(x,ptr)						\
+({	int __ret_pu;						\
+	__chk_user_ptr(ptr);					\
+	switch(sizeof(*(ptr))) {				\
+	case 1: __put_user_1(x, ptr); break;			\
+	case 2: __put_user_2(x, ptr); break;			\
+	case 4: __put_user_4(x, ptr); break;			\
+	case 8: __put_user_8(x, ptr); break;			\
+	default:__put_user_X(x, ptr); break;			\
+	}							\
+	__ret_pu;						\
+})
 
 /**
  * __get_user: - Get a simple variable from user space, with less checking.
@@ -258,16 +282,6 @@
 	__pu_err;						\
 })
 
-
-#define __put_user_check(x,ptr,size)					\
-({									\
-	long __pu_err = -EFAULT;					\
-	__typeof__(*(ptr)) __user *__pu_addr = (ptr);			\
-	might_sleep();						\
-	if (access_ok(VERIFY_WRITE,__pu_addr,size))			\
-		__put_user_size((x),__pu_addr,(size),__pu_err,-EFAULT);	\
-	__pu_err;							\
-})							
 
 #define __put_user_u64(x, addr, err)				\
 	__asm__ __volatile__(					\

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: out-of-line x86 "put_user()" implementation
  2005-02-09  1:27     ` Richard Henderson
@ 2005-02-09  2:16       ` Linus Torvalds
  2005-02-09  2:27         ` Linus Torvalds
  2005-02-09  2:36         ` Richard Henderson
  0 siblings, 2 replies; 18+ messages in thread
From: Linus Torvalds @ 2005-02-09  2:16 UTC (permalink / raw)
  To: Richard Henderson; +Cc: Ingo Molnar, Kernel Mailing List, Manfred Spraul



On Tue, 8 Feb 2005, Richard Henderson wrote:
>
> On Mon, Feb 07, 2005 at 05:20:06PM -0800, Linus Torvalds wrote:
> > +#define __put_user_8(x, ptr) __asm__ __volatile__("call __put_user_8":"=A" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
> 
> This is not constrained enough.  The compiler could choose to put the
> return value in edx.  You want
> 
>   __asm__ __volatile__("call __put_user_8":"=a" (__ret_pu)
> 			: "A" ((typeof(*(ptr)))(x)), "c" (ptr))

Hmm.. I always thought "A" was the _pairing_ of %eax/%edx, not "eax or
edx"?

IOW, as far as I know, I'm telling the compiler that the asm is writing a
64-bit value into %eax:%edx, and that that __ret_pu gets that 64-bit value 
assigned to it (and truncated, at that point). No?

Note that he code that actually writes to the register is the assembly 
code, and that one always writes to %eax. So the compiler doesn't get to 
"put the return value" anywhere. It gets told where it is.

I'd happily use your version, but I thought that some versions of gcc
require that input output registers cannot overlap, and would refuse to do 
that thing? But if you tell me differently..

			Linus

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: out-of-line x86 "put_user()" implementation
  2005-02-09  2:08       ` Linus Torvalds
@ 2005-02-09  2:24         ` Andrew Morton
  2005-02-09  2:32           ` Linus Torvalds
  0 siblings, 1 reply; 18+ messages in thread
From: Andrew Morton @ 2005-02-09  2:24 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: rth, mingo, linux-kernel, manfred

Linus Torvalds <torvalds@osdl.org> wrote:
>
> Andrew - do you want to put it in -mm? 

I'll take patches from anyone ;)

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: out-of-line x86 "put_user()" implementation
  2005-02-09  2:16       ` Linus Torvalds
@ 2005-02-09  2:27         ` Linus Torvalds
  2005-02-09  2:33           ` Richard Henderson
  2005-02-09  2:36         ` Richard Henderson
  1 sibling, 1 reply; 18+ messages in thread
From: Linus Torvalds @ 2005-02-09  2:27 UTC (permalink / raw)
  To: Richard Henderson
  Cc: Ingo Molnar, Andrew Morton, Kernel Mailing List, Manfred Spraul



On Tue, 8 Feb 2005, Linus Torvalds wrote:
> 
> Hmm.. I always thought "A" was the _pairing_ of %eax/%edx, not "eax or
> edx"?

Ahh. Some testing shows that gcc really seems to think of it as "eax or 
edx", ie you can do

	asm("uglee %0 %1": :"a" (1), "A" (2));

and it will output

	movl    $1, %eax
	movl    $2, %edx
	uglee %eax %edx

without complaining. 

Live and learn. 

That brings up another issue: if I don't care which registers a 64-bit 
value goes into, can I get the "low reg" and "high reg" names some way? In 
this case I'm constrained, and do want eactly %eax:%edx, but in some other 
cases I might not care, and wouldn't mind having a

	asm("addl %2,%L0 ; adcl $0,%H0"
		:"=r" (ll_result)
		:"r" (increment), "0" (ll_input));

to add a 32-bit number to a 64-bit one (this is totally made up, since gcc 
can probably generate this sequence itself, but you get the idea - 
allowing the asm to treat the low/high parts of the 64-bit entity 
separately, without forcing them into one specific pair).

v4 with your asm constrain fix appended.

			Linus

-----
# This is a BitKeeper generated diff -Nru style patch.
#
# ChangeSet
#   2005/02/08 18:18:46-08:00 torvalds@evo.osdl.org 
#   x86: make "put_user()" be out-of-line
#   
#   It's really too big to be inlined.
#   
#   Ingo tests and reports: this shrinks his kernel text size by
#   about 12kB (roughly 0.5%).
#   
#   Updated on input from Richard Henderson on how the "A" constraint
#   works on x86.
# 
# arch/i386/lib/putuser.S
#   2005/02/08 18:18:35-08:00 torvalds@evo.osdl.org +87 -0
# 
# include/asm-i386/uaccess.h
#   2005/02/08 18:18:35-08:00 torvalds@evo.osdl.org +27 -13
#   x86: make "put_user()" be out-of-line
# 
# arch/i386/lib/putuser.S
#   2005/02/08 18:18:35-08:00 torvalds@evo.osdl.org +0 -0
#   BitKeeper file /home/torvalds/v2.6/linux/arch/i386/lib/putuser.S
# 
# arch/i386/lib/Makefile
#   2005/02/08 18:18:34-08:00 torvalds@evo.osdl.org +1 -1
#   x86: make "put_user()" be out-of-line
# 
# arch/i386/kernel/i386_ksyms.c
#   2005/02/08 18:18:34-08:00 torvalds@evo.osdl.org +5 -0
#   x86: make "put_user()" be out-of-line
# 
diff -Nru a/arch/i386/kernel/i386_ksyms.c b/arch/i386/kernel/i386_ksyms.c
--- a/arch/i386/kernel/i386_ksyms.c	2005-02-08 18:25:22 -08:00
+++ b/arch/i386/kernel/i386_ksyms.c	2005-02-08 18:25:22 -08:00
@@ -97,6 +97,11 @@
 EXPORT_SYMBOL(__get_user_2);
 EXPORT_SYMBOL(__get_user_4);
 
+EXPORT_SYMBOL(__put_user_1);
+EXPORT_SYMBOL(__put_user_2);
+EXPORT_SYMBOL(__put_user_4);
+EXPORT_SYMBOL(__put_user_8);
+
 EXPORT_SYMBOL(strpbrk);
 EXPORT_SYMBOL(strstr);
 
diff -Nru a/arch/i386/lib/Makefile b/arch/i386/lib/Makefile
--- a/arch/i386/lib/Makefile	2005-02-08 18:25:22 -08:00
+++ b/arch/i386/lib/Makefile	2005-02-08 18:25:22 -08:00
@@ -3,7 +3,7 @@
 #
 
 
-lib-y = checksum.o delay.o usercopy.o getuser.o memcpy.o strstr.o \
+lib-y = checksum.o delay.o usercopy.o getuser.o putuser.o memcpy.o strstr.o \
 	bitops.o
 
 lib-$(CONFIG_X86_USE_3DNOW) += mmx.o
diff -Nru a/arch/i386/lib/putuser.S b/arch/i386/lib/putuser.S
--- /dev/null	Wed Dec 31 16:00:00 196900
+++ b/arch/i386/lib/putuser.S	2005-02-08 18:25:22 -08:00
@@ -0,0 +1,87 @@
+/*
+ * __put_user functions.
+ *
+ * (C) Copyright 2005 Linus Torvalds
+ *
+ * These functions have a non-standard call interface
+ * to make them more efficient, especially as they
+ * return an error value in addition to the "real"
+ * return value.
+ */
+#include <asm/thread_info.h>
+
+
+/*
+ * __put_user_X
+ *
+ * Inputs:	%eax[:%edx] contains the data
+ *		%ecx contains the address
+ *
+ * Outputs:	%eax is error code (0 or -EFAULT)
+ *
+ * These functions should not modify any other registers,
+ * as they get called from within inline assembly.
+ */
+
+#define ENTER	pushl %ebx ; GET_THREAD_INFO(%ebx)
+#define EXIT	popl %ebx ; ret
+
+.text
+.align 4
+.globl __put_user_1
+__put_user_1:
+	ENTER
+	cmpl TI_addr_limit(%ebx),%ecx
+	jae bad_put_user
+1:	movb %al,(%ecx)
+	xorl %eax,%eax
+	EXIT
+
+.align 4
+.globl __put_user_2
+__put_user_2:
+	ENTER
+	movl TI_addr_limit(%ebx),%ebx
+	subl $1,%ebx
+	cmpl %ebx,%ecx
+	jae bad_put_user
+2:	movw %ax,(%ecx)
+	xorl %eax,%eax
+	EXIT
+
+.align 4
+.globl __put_user_4
+__put_user_4:
+	ENTER
+	movl TI_addr_limit(%ebx),%ebx
+	subl $3,%ebx
+	cmpl %ebx,%ecx
+	jae bad_put_user
+3:	movl %eax,(%ecx)
+	xorl %eax,%eax
+	EXIT
+
+.align 4
+.globl __put_user_8
+__put_user_8:
+	ENTER
+	movl TI_addr_limit(%ebx),%ebx
+	subl $7,%ebx
+	cmpl %ebx,%ecx
+	jae bad_put_user
+4:	movl %eax,(%ecx)
+5:	movl %edx,4(%ecx)
+	xorl %eax,%eax
+	EXIT
+
+bad_put_user:
+	movl $-14,%eax
+	EXIT
+
+.section __ex_table,"a"
+	.long 1b,bad_put_user
+	.long 2b,bad_put_user
+	.long 3b,bad_put_user
+	.long 4b,bad_put_user
+	.long 5b,bad_put_user
+.previous
diff -Nru a/include/asm-i386/uaccess.h b/include/asm-i386/uaccess.h
--- a/include/asm-i386/uaccess.h	2005-02-08 18:25:22 -08:00
+++ b/include/asm-i386/uaccess.h	2005-02-08 18:25:22 -08:00
@@ -185,6 +185,21 @@
 
 extern void __put_user_bad(void);
 
+/*
+ * Strange magic calling convention: pointer in %ecx,
+ * value in %eax(:%edx), return value in %eax, no clobbers.
+ */
+extern void __put_user_1(void);
+extern void __put_user_2(void);
+extern void __put_user_4(void);
+extern void __put_user_8(void);
+
+#define __put_user_1(x, ptr) __asm__ __volatile__("call __put_user_1":"=a" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
+#define __put_user_2(x, ptr) __asm__ __volatile__("call __put_user_2":"=a" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
+#define __put_user_4(x, ptr) __asm__ __volatile__("call __put_user_4":"=a" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
+#define __put_user_8(x, ptr) __asm__ __volatile__("call __put_user_8":"=a" (__ret_pu):"A" ((typeof(*(ptr)))(x)), "c" (ptr))
+#define __put_user_X(x, ptr) __asm__ __volatile__("call __put_user_X":"=a" (__ret_pu):"c" (ptr))
+
 /**
  * put_user: - Write a simple value into user space.
  * @x:   Value to copy to user space.
@@ -201,9 +216,18 @@
  *
  * Returns zero on success, or -EFAULT on error.
  */
-#define put_user(x,ptr)							\
-  __put_user_check((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr)))
-
+#define put_user(x,ptr)						\
+({	int __ret_pu;						\
+	__chk_user_ptr(ptr);					\
+	switch(sizeof(*(ptr))) {				\
+	case 1: __put_user_1(x, ptr); break;			\
+	case 2: __put_user_2(x, ptr); break;			\
+	case 4: __put_user_4(x, ptr); break;			\
+	case 8: __put_user_8(x, ptr); break;			\
+	default:__put_user_X(x, ptr); break;			\
+	}							\
+	__ret_pu;						\
+})
 
 /**
  * __get_user: - Get a simple variable from user space, with less checking.
@@ -258,16 +282,6 @@
 	__pu_err;						\
 })
 
-
-#define __put_user_check(x,ptr,size)					\
-({									\
-	long __pu_err = -EFAULT;					\
-	__typeof__(*(ptr)) __user *__pu_addr = (ptr);			\
-	might_sleep();						\
-	if (access_ok(VERIFY_WRITE,__pu_addr,size))			\
-		__put_user_size((x),__pu_addr,(size),__pu_err,-EFAULT);	\
-	__pu_err;							\
-})							
 
 #define __put_user_u64(x, addr, err)				\
 	__asm__ __volatile__(					\

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: out-of-line x86 "put_user()" implementation
  2005-02-09  2:24         ` Andrew Morton
@ 2005-02-09  2:32           ` Linus Torvalds
  0 siblings, 0 replies; 18+ messages in thread
From: Linus Torvalds @ 2005-02-09  2:32 UTC (permalink / raw)
  To: Andrew Morton; +Cc: rth, mingo, linux-kernel, manfred



On Tue, 8 Feb 2005, Andrew Morton wrote:
> 
> I'll take patches from anyone ;)

You'll never live it down. Once you get a name for being easy, you'll
always be known as Andrew "patch-ho" Morton.

		Linus

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: out-of-line x86 "put_user()" implementation
  2005-02-09  2:27         ` Linus Torvalds
@ 2005-02-09  2:33           ` Richard Henderson
  2005-02-11 22:15             ` H. Peter Anvin
  0 siblings, 1 reply; 18+ messages in thread
From: Richard Henderson @ 2005-02-09  2:33 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Ingo Molnar, Andrew Morton, Kernel Mailing List, Manfred Spraul

On Tue, Feb 08, 2005 at 06:27:08PM -0800, Linus Torvalds wrote:
> That brings up another issue: if I don't care which registers a 64-bit 
> value goes into, can I get the "low reg" and "high reg" names some way?

Nope.  We never needed one in the i386 backend itself, so we never
added anything like that.


r~

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: out-of-line x86 "put_user()" implementation
  2005-02-09  2:16       ` Linus Torvalds
  2005-02-09  2:27         ` Linus Torvalds
@ 2005-02-09  2:36         ` Richard Henderson
  1 sibling, 0 replies; 18+ messages in thread
From: Richard Henderson @ 2005-02-09  2:36 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Ingo Molnar, Kernel Mailing List, Manfred Spraul

On Tue, Feb 08, 2005 at 06:16:15PM -0800, Linus Torvalds wrote:
> I'd happily use your version, but I thought that some versions of gcc
> require that input output registers cannot overlap, and would refuse to do 
> that thing? But if you tell me differently..

No, you're thinking of

	asm("" : "=a" (x) : : "eax")

where the clobber overlaps the inputs or outputs.


r~

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: out-of-line x86 "put_user()" implementation
  2005-02-09  2:33           ` Richard Henderson
@ 2005-02-11 22:15             ` H. Peter Anvin
  0 siblings, 0 replies; 18+ messages in thread
From: H. Peter Anvin @ 2005-02-11 22:15 UTC (permalink / raw)
  To: linux-kernel

Followup to:  <20050209023346.GA13911@twiddle.net>
By author:    Richard Henderson <rth@twiddle.net>
In newsgroup: linux.dev.kernel
>
> On Tue, Feb 08, 2005 at 06:27:08PM -0800, Linus Torvalds wrote:
> > That brings up another issue: if I don't care which registers a 64-bit 
> > value goes into, can I get the "low reg" and "high reg" names some way?
> 
> Nope.  We never needed one in the i386 backend itself, so we never
> added anything like that.
> 

I would really like to see that feature.  I've missed it more than a
few times.

	-hpa

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: out-of-line x86 "put_user()" implementation
  2005-02-07  6:23 out-of-line x86 "put_user()" implementation Linus Torvalds
  2005-02-07 11:44 ` Ingo Molnar
@ 2005-03-12 14:55 ` Coywolf Qi Hunt
  1 sibling, 0 replies; 18+ messages in thread
From: Coywolf Qi Hunt @ 2005-03-12 14:55 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Kernel Mailing List, Manfred Spraul, fdavis, fdavis112

On Sun, 6 Feb 2005 22:23:51 -0800 (PST), Linus Torvalds
<torvalds@osdl.org> wrote:
> 
> I was looking at some of the code we generate, and happened to notice that
> we have this strange situation where the x86 "get_user()" macros generate
> out-of-line code to do all the address verification etc, but the
> "put_user()" ones do not, and do everything inline.
> 
> I also noticed that (probably as a result of this), our "put_user()" on
> old i386 machines does not do the full magic manual page-following. Which
> means that copy-on-write doesn't necessarily work right due to the broken
> paging hw on the original 386 core.

I've noticed that "put_user()" ones do generate out-of-line code in 2.2.x and 
later got dropped out in patch-2.3.99-pre9.
( perhaps by http://www.uwsg.iu.edu/hypermail/linux/kernel/0005.2/0487.html )

At that time "put_user is broken for i386 machines (security) - sem
stuff may be wrong too " was put on the job list. Not sure what bug it
was.


--coywolf

> 
> I didn't fix the second part, but at least making things out-of-line makes
> it possible. And making "put_user()" be out-of-line seemed quite doable.
> 
> I no longer use x86 as my main machine, so this patch is totally untested.
> I've compiled it to see that things look somewhat sane, but that doesn't
> mean much. If I forgot some register or screwed something else up, this
> will result in a totally nonworking kernel, but I thought that maybe
> somebody else would be interested in looking at whether this (a) works,
> (b) migth even shrink the kernel and (c) might make us able to DTRT wrt
> the page table following crud (old i386 cores may be hard to find these
> days, so maybe people don't care).
> 
>                 Linus
> 
> ----
> # This is a BitKeeper generated diff -Nru style patch.
> #
> # ChangeSet
> #   2005/02/06 22:10:04-08:00 torvalds@evo.osdl.org
> #   x86: make "put_user()" be out-of-line
> #
> #   It's really too big to be inlined.
> #
> # arch/i386/lib/putuser.S
> #   2005/02/06 22:09:53-08:00 torvalds@evo.osdl.org +87 -0
> #
> # include/asm-i386/uaccess.h
> #   2005/02/06 22:09:53-08:00 torvalds@evo.osdl.org +27 -3
> #   x86: make "put_user()" be out-of-line
> #
> #   It's really too big to be inlined.
> #
> # arch/i386/lib/putuser.S
> #   2005/02/06 22:09:53-08:00 torvalds@evo.osdl.org +0 -0
> #   BitKeeper file /home/torvalds/v2.6/linux/arch/i386/lib/putuser.S
> #
> # arch/i386/lib/Makefile
> #   2005/02/06 22:09:53-08:00 torvalds@evo.osdl.org +1 -1
> #   x86: make "put_user()" be out-of-line
> #
> #   It's really too big to be inlined.
> #
> # arch/i386/kernel/i386_ksyms.c
> #   2005/02/06 22:09:53-08:00 torvalds@evo.osdl.org +5 -0
> #   x86: make "put_user()" be out-of-line
> #
> #   It's really too big to be inlined.
> #
> diff -Nru a/arch/i386/kernel/i386_ksyms.c b/arch/i386/kernel/i386_ksyms.c
> --- a/arch/i386/kernel/i386_ksyms.c     2005-02-06 22:12:01 -08:00
> +++ b/arch/i386/kernel/i386_ksyms.c     2005-02-06 22:12:01 -08:00
> @@ -97,6 +97,11 @@
>  EXPORT_SYMBOL(__get_user_2);
>  EXPORT_SYMBOL(__get_user_4);
> 
> +EXPORT_SYMBOL(__put_user_1);
> +EXPORT_SYMBOL(__put_user_2);
> +EXPORT_SYMBOL(__put_user_4);
> +EXPORT_SYMBOL(__put_user_8);
> +
>  EXPORT_SYMBOL(strpbrk);
>  EXPORT_SYMBOL(strstr);
> 
> diff -Nru a/arch/i386/lib/Makefile b/arch/i386/lib/Makefile
> --- a/arch/i386/lib/Makefile    2005-02-06 22:12:01 -08:00
> +++ b/arch/i386/lib/Makefile    2005-02-06 22:12:01 -08:00
> @@ -3,7 +3,7 @@
>  #
> 
> -lib-y = checksum.o delay.o usercopy.o getuser.o memcpy.o strstr.o \
> +lib-y = checksum.o delay.o usercopy.o getuser.o putuser.o memcpy.o strstr.o \
>         bitops.o
> 
>  lib-$(CONFIG_X86_USE_3DNOW) += mmx.o
> diff -Nru a/arch/i386/lib/putuser.S b/arch/i386/lib/putuser.S
> --- /dev/null   Wed Dec 31 16:00:00 196900
> +++ b/arch/i386/lib/putuser.S   2005-02-06 22:12:01 -08:00
> @@ -0,0 +1,87 @@
> +/*
> + * __put_user functions.
> + *
> + * (C) Copyright 2005 Linus Torvalds
> + *
> + * These functions have a non-standard call interface
> + * to make them more efficient, especially as they
> + * return an error value in addition to the "real"
> + * return value.
> + */
> +#include <asm/thread_info.h>
> +
> +
> +/*
> + * __put_user_X
> + *
> + * Inputs:     %eax[:%edx] contains the data
> + *             %ecx contains the address
> + *
> + * Outputs:    %eax is error code (0 or -EFAULT)
> + *             %ecx is corrupted
> + *
> + * These functions should not modify any other registers,
> + * as they get called from within inline assembly.
> + */
> +
> +#define ENTER  pushl %ecx ; pushl %ebx ; GET_THREAD_INFO(%ebx)
> +#define EXIT   popl %ebx ; popl %ecx ; ret
> +
> +.text
> +.align 4
> +.globl __put_user_1
> +__put_user_1:
> +       ENTER
> +       cmpl TI_addr_limit(%ebx),%ecx
> +       jae bad_put_user
> +1:     movb %al,(%ecx)
> +       xorl %eax,%eax
> +       EXIT
> +
> +.align 4
> +.globl __put_user_2
> +__put_user_2:
> +       ENTER
> +       addl $1,%ecx
> +       jc bad_put_user
> +       cmpl TI_addr_limit(%ebx),%ecx
> +       jae bad_put_user
> +2:     movw %ax,-1(%ecx)
> +       xorl %eax,%eax
> +       EXIT
> +
> +.align 4
> +.globl __put_user_4
> +__put_user_4:
> +       ENTER
> +       addl $3,%ecx
> +       jc bad_put_user
> +       cmpl TI_addr_limit(%ebx),%ecx
> +       jae bad_put_user
> +3:     movl %eax,-3(%ecx)
> +       xorl %eax,%eax
> +       EXIT
> +
> +.align 4
> +.globl __put_user_8
> +__put_user_8:
> +       ENTER
> +       addl $7,%ecx
> +       jc bad_put_user
> +       cmpl TI_addr_limit(%ebx),%ecx
> +       jae bad_put_user
> +3:     movl %eax,-7(%ecx)
> +4:     movl %edx,-3(%ecx)
> +       xorl %eax,%eax
> +       EXIT
> +
> +bad_put_user:
> +       movl $-14,%eax
> +       EXIT
> +
> +.section __ex_table,"a"
> +       .long 1b,bad_put_user
> +       .long 2b,bad_put_user
> +       .long 3b,bad_put_user
> +       .long 4b,bad_put_user
> +.previous
> diff -Nru a/include/asm-i386/uaccess.h b/include/asm-i386/uaccess.h
> --- a/include/asm-i386/uaccess.h        2005-02-06 22:12:01 -08:00
> +++ b/include/asm-i386/uaccess.h        2005-02-06 22:12:01 -08:00
> @@ -185,6 +185,21 @@
> 
>  extern void __put_user_bad(void);
> 
> +/*
> + * Strange magic calling convention: pointer in %ecx,
> + * value in %eax(:%edx), return value in %eax, no clobbers.
> + */
> +extern void __put_user_1(void);
> +extern void __put_user_2(void);
> +extern void __put_user_4(void);
> +extern void __put_user_8(void);
> +
> +#define __put_user_1(x, ptr) __asm__ __volatile__("call __put_user_1":"=a" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
> +#define __put_user_2(x, ptr) __asm__ __volatile__("call __put_user_2":"=a" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
> +#define __put_user_4(x, ptr) __asm__ __volatile__("call __put_user_4":"=a" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
> +#define __put_user_8(x, ptr) __asm__ __volatile__("call __put_user_8":"=A" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
> +#define __put_user_X(x, ptr) __asm__ __volatile__("call __put_user_X":"=a" (__ret_pu):"c" (ptr))
> +
>  /**
>   * put_user: - Write a simple value into user space.
>   * @x:   Value to copy to user space.
> @@ -201,9 +216,18 @@
>   *
>   * Returns zero on success, or -EFAULT on error.
>   */
> -#define put_user(x,ptr)                                                        \
> -  __put_user_check((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr)))
> -
> +#define put_user(x,ptr)                                                \
> +({     int __ret_pu;                                           \
> +       __chk_user_ptr(ptr);                                    \
> +       switch(sizeof(*(ptr))) {                                \
> +       case 1: __put_user_1(x, ptr); break;                    \
> +       case 2: __put_user_2(x, ptr); break;                    \
> +       case 4: __put_user_4(x, ptr); break;                    \
> +       case 8: __put_user_8(x, ptr); break;                    \
> +       default:__put_user_X(x, ptr); break;                    \
> +       }                                                       \
> +       __ret_pu;                                               \
> +})
> 
>  /**
>   * __get_user: - Get a simple variable from user space, with less checking.
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
> 


-- 
Coywolf Qi Hunt
Homepage http://sosdg.org/~coywolf/

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: out-of-line x86 "put_user()" implementation
  2005-02-12  1:55 Chuck Ebbert
@ 2005-02-12  2:03 ` Linus Torvalds
  0 siblings, 0 replies; 18+ messages in thread
From: Linus Torvalds @ 2005-02-12  2:03 UTC (permalink / raw)
  To: Chuck Ebbert
  Cc: Andrew Morton, Manfred Spraul, linux-kernel, Richard Henderson,
	Ingo Molnar



On Fri, 11 Feb 2005, Chuck Ebbert wrote:
> 
>   And in any case is it too much to ask for an 80-column limit? ;)

Yes. Dammit, especially for something like this, the long-line version is 
just _so_ much more readable. Compare my and your version wrt being able 
to tell what the differences between the four different cases are.

In the single-long-line version, the differences are trivially visible. In 
the "prettified" version (aka "I'm still living in the 60's, and proud of 
it" version), it's impossible to pick out the differences.

If you don't like long lines, use a helper #define for the common part or 
something.

		Linus

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: out-of-line x86 "put_user()" implementation
@ 2005-02-12  1:55 Chuck Ebbert
  2005-02-12  2:03 ` Linus Torvalds
  0 siblings, 1 reply; 18+ messages in thread
From: Chuck Ebbert @ 2005-02-12  1:55 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Andrew Morton, Manfred Spraul, linux-kernel, Richard Henderson,
	Ingo Molnar

On Tue, 8 Feb 2005 at 18:27:08 -0800, Linus Torvalds wrote:

> +/*
> + * Strange magic calling convention: pointer in %ecx,
> + * value in %eax(:%edx), return value in %eax, no clobbers.
> + */
> +extern void __put_user_1(void);
> +extern void __put_user_2(void);
> +extern void __put_user_4(void);
> +extern void __put_user_8(void);
> +
> +#define __put_user_1(x, ptr) __asm__ __volatile__("call __put_user_1":"=a" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
> +#define __put_user_2(x, ptr) __asm__ __volatile__("call __put_user_2":"=a" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
> +#define __put_user_4(x, ptr) __asm__ __volatile__("call __put_user_4":"=a" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
> +#define __put_user_8(x, ptr) __asm__ __volatile__("call __put_user_8":"=a" (__ret_pu):"A" ((typeof(*(ptr)))(x)), "c" (ptr))
> +#define __put_user_X(x, ptr) __asm__ __volatile__("call __put_user_X":"=a" (__ret_pu):"c" (ptr))
> +

  Should "cc" be on the clobber list since all the called functions alter EFLAGS?

  And in any case is it too much to ask for an 80-column limit? ;)

#define __put_user_1(x, ptr)                    \
__asm__ __volatile__(                           \
        "call __put_user_1"                     \
        : "=a" (__ret_pu)                       \
        : "0" ((typeof(*(ptr)))(x)), "c" (ptr)  \
        : "cc")
#define __put_user_2(x, ptr)                    \
__asm__ __volatile__(                           \
        "call __put_user_2"                     \
        : "=a" (__ret_pu)                       \
        : "0" ((typeof(*(ptr)))(x)), "c" (ptr)  \
        : "cc")
#define __put_user_4(x, ptr)                    \
__asm__ __volatile__(                           \
        "call __put_user_4"                     \
        : "=a" (__ret_pu)                       \
        : "0" ((typeof(*(ptr)))(x)), "c" (ptr)  \
        : "cc")
#define __put_user_8(x, ptr)                    \
__asm__ __volatile__(                           \
        "call __put_user_8"                     \
        : "=a" (__ret_pu)                       \
        : "A" ((typeof(*(ptr)))(x)), "c" (ptr)  \
        : "cc")
#define __put_user_X(x, ptr)                    \
__asm__ __volatile__(                           \
        "call __put_user_X"                     \
        : "=a" (__ret_pu)                       \
        : "c" (ptr)                             \
        : "cc")

--
Chuck

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2005-03-12 14:56 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2005-02-07  6:23 out-of-line x86 "put_user()" implementation Linus Torvalds
2005-02-07 11:44 ` Ingo Molnar
2005-02-08  1:20   ` Linus Torvalds
2005-02-08 17:43     ` Pavel Machek
2005-02-08 19:35     ` Valdis.Kletnieks
2005-02-09  1:25     ` Richard Henderson
2005-02-09  2:08       ` Linus Torvalds
2005-02-09  2:24         ` Andrew Morton
2005-02-09  2:32           ` Linus Torvalds
2005-02-09  1:27     ` Richard Henderson
2005-02-09  2:16       ` Linus Torvalds
2005-02-09  2:27         ` Linus Torvalds
2005-02-09  2:33           ` Richard Henderson
2005-02-11 22:15             ` H. Peter Anvin
2005-02-09  2:36         ` Richard Henderson
2005-03-12 14:55 ` Coywolf Qi Hunt
2005-02-12  1:55 Chuck Ebbert
2005-02-12  2:03 ` Linus Torvalds

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).