* out-of-line x86 "put_user()" implementation
@ 2005-02-07 6:23 Linus Torvalds
2005-02-07 11:44 ` Ingo Molnar
2005-03-12 14:55 ` Coywolf Qi Hunt
0 siblings, 2 replies; 18+ messages in thread
From: Linus Torvalds @ 2005-02-07 6:23 UTC (permalink / raw)
To: Kernel Mailing List; +Cc: Manfred Spraul
I was looking at some of the code we generate, and happened to notice that
we have this strange situation where the x86 "get_user()" macros generate
out-of-line code to do all the address verification etc, but the
"put_user()" ones do not, and do everything inline.
I also noticed that (probably as a result of this), our "put_user()" on
old i386 machines does not do the full magic manual page-following. Which
means that copy-on-write doesn't necessarily work right due to the broken
paging hw on the original 386 core.
I didn't fix the second part, but at least making things out-of-line makes
it possible. And making "put_user()" be out-of-line seemed quite doable.
I no longer use x86 as my main machine, so this patch is totally untested.
I've compiled it to see that things look somewhat sane, but that doesn't
mean much. If I forgot some register or screwed something else up, this
will result in a totally nonworking kernel, but I thought that maybe
somebody else would be interested in looking at whether this (a) works,
(b) migth even shrink the kernel and (c) might make us able to DTRT wrt
the page table following crud (old i386 cores may be hard to find these
days, so maybe people don't care).
Linus
----
# This is a BitKeeper generated diff -Nru style patch.
#
# ChangeSet
# 2005/02/06 22:10:04-08:00 torvalds@evo.osdl.org
# x86: make "put_user()" be out-of-line
#
# It's really too big to be inlined.
#
# arch/i386/lib/putuser.S
# 2005/02/06 22:09:53-08:00 torvalds@evo.osdl.org +87 -0
#
# include/asm-i386/uaccess.h
# 2005/02/06 22:09:53-08:00 torvalds@evo.osdl.org +27 -3
# x86: make "put_user()" be out-of-line
#
# It's really too big to be inlined.
#
# arch/i386/lib/putuser.S
# 2005/02/06 22:09:53-08:00 torvalds@evo.osdl.org +0 -0
# BitKeeper file /home/torvalds/v2.6/linux/arch/i386/lib/putuser.S
#
# arch/i386/lib/Makefile
# 2005/02/06 22:09:53-08:00 torvalds@evo.osdl.org +1 -1
# x86: make "put_user()" be out-of-line
#
# It's really too big to be inlined.
#
# arch/i386/kernel/i386_ksyms.c
# 2005/02/06 22:09:53-08:00 torvalds@evo.osdl.org +5 -0
# x86: make "put_user()" be out-of-line
#
# It's really too big to be inlined.
#
diff -Nru a/arch/i386/kernel/i386_ksyms.c b/arch/i386/kernel/i386_ksyms.c
--- a/arch/i386/kernel/i386_ksyms.c 2005-02-06 22:12:01 -08:00
+++ b/arch/i386/kernel/i386_ksyms.c 2005-02-06 22:12:01 -08:00
@@ -97,6 +97,11 @@
EXPORT_SYMBOL(__get_user_2);
EXPORT_SYMBOL(__get_user_4);
+EXPORT_SYMBOL(__put_user_1);
+EXPORT_SYMBOL(__put_user_2);
+EXPORT_SYMBOL(__put_user_4);
+EXPORT_SYMBOL(__put_user_8);
+
EXPORT_SYMBOL(strpbrk);
EXPORT_SYMBOL(strstr);
diff -Nru a/arch/i386/lib/Makefile b/arch/i386/lib/Makefile
--- a/arch/i386/lib/Makefile 2005-02-06 22:12:01 -08:00
+++ b/arch/i386/lib/Makefile 2005-02-06 22:12:01 -08:00
@@ -3,7 +3,7 @@
#
-lib-y = checksum.o delay.o usercopy.o getuser.o memcpy.o strstr.o \
+lib-y = checksum.o delay.o usercopy.o getuser.o putuser.o memcpy.o strstr.o \
bitops.o
lib-$(CONFIG_X86_USE_3DNOW) += mmx.o
diff -Nru a/arch/i386/lib/putuser.S b/arch/i386/lib/putuser.S
--- /dev/null Wed Dec 31 16:00:00 196900
+++ b/arch/i386/lib/putuser.S 2005-02-06 22:12:01 -08:00
@@ -0,0 +1,87 @@
+/*
+ * __put_user functions.
+ *
+ * (C) Copyright 2005 Linus Torvalds
+ *
+ * These functions have a non-standard call interface
+ * to make them more efficient, especially as they
+ * return an error value in addition to the "real"
+ * return value.
+ */
+#include <asm/thread_info.h>
+
+
+/*
+ * __put_user_X
+ *
+ * Inputs: %eax[:%edx] contains the data
+ * %ecx contains the address
+ *
+ * Outputs: %eax is error code (0 or -EFAULT)
+ * %ecx is corrupted
+ *
+ * These functions should not modify any other registers,
+ * as they get called from within inline assembly.
+ */
+
+#define ENTER pushl %ecx ; pushl %ebx ; GET_THREAD_INFO(%ebx)
+#define EXIT popl %ebx ; popl %ecx ; ret
+
+.text
+.align 4
+.globl __put_user_1
+__put_user_1:
+ ENTER
+ cmpl TI_addr_limit(%ebx),%ecx
+ jae bad_put_user
+1: movb %al,(%ecx)
+ xorl %eax,%eax
+ EXIT
+
+.align 4
+.globl __put_user_2
+__put_user_2:
+ ENTER
+ addl $1,%ecx
+ jc bad_put_user
+ cmpl TI_addr_limit(%ebx),%ecx
+ jae bad_put_user
+2: movw %ax,-1(%ecx)
+ xorl %eax,%eax
+ EXIT
+
+.align 4
+.globl __put_user_4
+__put_user_4:
+ ENTER
+ addl $3,%ecx
+ jc bad_put_user
+ cmpl TI_addr_limit(%ebx),%ecx
+ jae bad_put_user
+3: movl %eax,-3(%ecx)
+ xorl %eax,%eax
+ EXIT
+
+.align 4
+.globl __put_user_8
+__put_user_8:
+ ENTER
+ addl $7,%ecx
+ jc bad_put_user
+ cmpl TI_addr_limit(%ebx),%ecx
+ jae bad_put_user
+3: movl %eax,-7(%ecx)
+4: movl %edx,-3(%ecx)
+ xorl %eax,%eax
+ EXIT
+
+bad_put_user:
+ movl $-14,%eax
+ EXIT
+
+.section __ex_table,"a"
+ .long 1b,bad_put_user
+ .long 2b,bad_put_user
+ .long 3b,bad_put_user
+ .long 4b,bad_put_user
+.previous
diff -Nru a/include/asm-i386/uaccess.h b/include/asm-i386/uaccess.h
--- a/include/asm-i386/uaccess.h 2005-02-06 22:12:01 -08:00
+++ b/include/asm-i386/uaccess.h 2005-02-06 22:12:01 -08:00
@@ -185,6 +185,21 @@
extern void __put_user_bad(void);
+/*
+ * Strange magic calling convention: pointer in %ecx,
+ * value in %eax(:%edx), return value in %eax, no clobbers.
+ */
+extern void __put_user_1(void);
+extern void __put_user_2(void);
+extern void __put_user_4(void);
+extern void __put_user_8(void);
+
+#define __put_user_1(x, ptr) __asm__ __volatile__("call __put_user_1":"=a" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
+#define __put_user_2(x, ptr) __asm__ __volatile__("call __put_user_2":"=a" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
+#define __put_user_4(x, ptr) __asm__ __volatile__("call __put_user_4":"=a" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
+#define __put_user_8(x, ptr) __asm__ __volatile__("call __put_user_8":"=A" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
+#define __put_user_X(x, ptr) __asm__ __volatile__("call __put_user_X":"=a" (__ret_pu):"c" (ptr))
+
/**
* put_user: - Write a simple value into user space.
* @x: Value to copy to user space.
@@ -201,9 +216,18 @@
*
* Returns zero on success, or -EFAULT on error.
*/
-#define put_user(x,ptr) \
- __put_user_check((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr)))
-
+#define put_user(x,ptr) \
+({ int __ret_pu; \
+ __chk_user_ptr(ptr); \
+ switch(sizeof(*(ptr))) { \
+ case 1: __put_user_1(x, ptr); break; \
+ case 2: __put_user_2(x, ptr); break; \
+ case 4: __put_user_4(x, ptr); break; \
+ case 8: __put_user_8(x, ptr); break; \
+ default:__put_user_X(x, ptr); break; \
+ } \
+ __ret_pu; \
+})
/**
* __get_user: - Get a simple variable from user space, with less checking.
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: out-of-line x86 "put_user()" implementation
2005-02-07 6:23 out-of-line x86 "put_user()" implementation Linus Torvalds
@ 2005-02-07 11:44 ` Ingo Molnar
2005-02-08 1:20 ` Linus Torvalds
2005-03-12 14:55 ` Coywolf Qi Hunt
1 sibling, 1 reply; 18+ messages in thread
From: Ingo Molnar @ 2005-02-07 11:44 UTC (permalink / raw)
To: Linus Torvalds; +Cc: Kernel Mailing List, Manfred Spraul
* Linus Torvalds <torvalds@osdl.org> wrote:
> I no longer use x86 as my main machine, so this patch is totally
> untested. I've compiled it to see that things look somewhat sane, but
> that doesn't mean much. If I forgot some register or screwed something
> else up, this will result in a totally nonworking kernel, but I
> thought that maybe somebody else would be interested in looking at
> whether this (a) works, (b) migth even shrink the kernel and (c) might
> make us able to DTRT wrt the page table following crud (old i386 cores
> may be hard to find these days, so maybe people don't care).
boots fine and shrinks the image size quite noticeably:
[Nr] Name Type Addr Off Size
[ 1] .text PROGBITS c0100000 001000 2771a9 [vmlinux-orig]
[ 1] .text PROGBITS c0100000 001000 2742dd [vmlinux-patched]
that's 11980 bytes off a 2585001 bytes .text, a 0.5% size reduction.
This patch we want ...
Ingo
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: out-of-line x86 "put_user()" implementation
2005-02-07 11:44 ` Ingo Molnar
@ 2005-02-08 1:20 ` Linus Torvalds
2005-02-08 17:43 ` Pavel Machek
` (3 more replies)
0 siblings, 4 replies; 18+ messages in thread
From: Linus Torvalds @ 2005-02-08 1:20 UTC (permalink / raw)
To: Ingo Molnar; +Cc: Kernel Mailing List, Manfred Spraul
On Mon, 7 Feb 2005, Ingo Molnar wrote:
>
> boots fine and shrinks the image size quite noticeably:
>
> [Nr] Name Type Addr Off Size
> [ 1] .text PROGBITS c0100000 001000 2771a9 [vmlinux-orig]
> [ 1] .text PROGBITS c0100000 001000 2742dd [vmlinux-patched]
>
> that's 11980 bytes off a 2585001 bytes .text, a 0.5% size reduction.
> This patch we want ...
Goodie. Here's a slightly more recent version, where I cleaned up the
assembly code (no need to save %ecx if we just update %ebx instead, which
makes the code a bit more readable too - and doing it this way should
hopefully make it easier for an out-of-order CPU to start the memops
earlier too. Who knows..)
I'm not going to put this into 2.6.11, since I worry about compiler
interactions, but the more people who test it anyway, the better.
Linus
-----
# This is a BitKeeper generated diff -Nru style patch.
#
# ChangeSet
# 2005/02/07 08:14:28-08:00 torvalds@evo.osdl.org
# x86: make "put_user()" be out-of-line
#
# It's really too big to be inlined.
#
# Ingo tests and reports: this shrinks his kernel text size by
# about 12kB (roughly 0.5%)
#
# arch/i386/lib/putuser.S
# 2005/02/07 08:14:17-08:00 torvalds@evo.osdl.org +86 -0
#
# include/asm-i386/uaccess.h
# 2005/02/07 08:14:17-08:00 torvalds@evo.osdl.org +27 -3
# x86: make "put_user()" be out-of-line
#
# arch/i386/lib/putuser.S
# 2005/02/07 08:14:17-08:00 torvalds@evo.osdl.org +0 -0
# BitKeeper file /home/torvalds/v2.6/linux/arch/i386/lib/putuser.S
#
# arch/i386/lib/Makefile
# 2005/02/07 08:14:17-08:00 torvalds@evo.osdl.org +1 -1
# x86: make "put_user()" be out-of-line
#
# arch/i386/kernel/i386_ksyms.c
# 2005/02/07 08:14:17-08:00 torvalds@evo.osdl.org +5 -0
# x86: make "put_user()" be out-of-line
#
diff -Nru a/arch/i386/kernel/i386_ksyms.c b/arch/i386/kernel/i386_ksyms.c
--- a/arch/i386/kernel/i386_ksyms.c 2005-02-07 17:16:32 -08:00
+++ b/arch/i386/kernel/i386_ksyms.c 2005-02-07 17:16:32 -08:00
@@ -97,6 +97,11 @@
EXPORT_SYMBOL(__get_user_2);
EXPORT_SYMBOL(__get_user_4);
+EXPORT_SYMBOL(__put_user_1);
+EXPORT_SYMBOL(__put_user_2);
+EXPORT_SYMBOL(__put_user_4);
+EXPORT_SYMBOL(__put_user_8);
+
EXPORT_SYMBOL(strpbrk);
EXPORT_SYMBOL(strstr);
diff -Nru a/arch/i386/lib/Makefile b/arch/i386/lib/Makefile
--- a/arch/i386/lib/Makefile 2005-02-07 17:16:32 -08:00
+++ b/arch/i386/lib/Makefile 2005-02-07 17:16:32 -08:00
@@ -3,7 +3,7 @@
#
-lib-y = checksum.o delay.o usercopy.o getuser.o memcpy.o strstr.o \
+lib-y = checksum.o delay.o usercopy.o getuser.o putuser.o memcpy.o strstr.o \
bitops.o
lib-$(CONFIG_X86_USE_3DNOW) += mmx.o
diff -Nru a/arch/i386/lib/putuser.S b/arch/i386/lib/putuser.S
--- /dev/null Wed Dec 31 16:00:00 196900
+++ b/arch/i386/lib/putuser.S 2005-02-07 17:16:32 -08:00
@@ -0,0 +1,86 @@
+/*
+ * __put_user functions.
+ *
+ * (C) Copyright 2005 Linus Torvalds
+ *
+ * These functions have a non-standard call interface
+ * to make them more efficient, especially as they
+ * return an error value in addition to the "real"
+ * return value.
+ */
+#include <asm/thread_info.h>
+
+
+/*
+ * __put_user_X
+ *
+ * Inputs: %eax[:%edx] contains the data
+ * %ecx contains the address
+ *
+ * Outputs: %eax is error code (0 or -EFAULT)
+ *
+ * These functions should not modify any other registers,
+ * as they get called from within inline assembly.
+ */
+
+#define ENTER pushl %ebx ; GET_THREAD_INFO(%ebx)
+#define EXIT popl %ebx ; ret
+
+.text
+.align 4
+.globl __put_user_1
+__put_user_1:
+ ENTER
+ cmpl TI_addr_limit(%ebx),%ecx
+ jae bad_put_user
+1: movb %al,(%ecx)
+ xorl %eax,%eax
+ EXIT
+
+.align 4
+.globl __put_user_2
+__put_user_2:
+ ENTER
+ movl TI_addr_limit(%ebx),%ebx
+ subl $1,%ebx
+ cmpl %ebx,%ecx
+ jae bad_put_user
+2: movw %ax,(%ecx)
+ xorl %eax,%eax
+ EXIT
+
+.align 4
+.globl __put_user_4
+__put_user_4:
+ ENTER
+ movl TI_addr_limit(%ebx),%ebx
+ subl $3,%ebx
+ cmpl %ebx,%ecx
+ jae bad_put_user
+3: movl %eax,(%ecx)
+ xorl %eax,%eax
+ EXIT
+
+.align 4
+.globl __put_user_8
+__put_user_8:
+ ENTER
+ movl TI_addr_limit(%ebx),%ebx
+ subl $7,%ebx
+ cmpl %ebx,%ecx
+ jae bad_put_user
+3: movl %eax,(%ecx)
+4: movl %edx,4(%ecx)
+ xorl %eax,%eax
+ EXIT
+
+bad_put_user:
+ movl $-14,%eax
+ EXIT
+
+.section __ex_table,"a"
+ .long 1b,bad_put_user
+ .long 2b,bad_put_user
+ .long 3b,bad_put_user
+ .long 4b,bad_put_user
+.previous
diff -Nru a/include/asm-i386/uaccess.h b/include/asm-i386/uaccess.h
--- a/include/asm-i386/uaccess.h 2005-02-07 17:16:32 -08:00
+++ b/include/asm-i386/uaccess.h 2005-02-07 17:16:32 -08:00
@@ -185,6 +185,21 @@
extern void __put_user_bad(void);
+/*
+ * Strange magic calling convention: pointer in %ecx,
+ * value in %eax(:%edx), return value in %eax, no clobbers.
+ */
+extern void __put_user_1(void);
+extern void __put_user_2(void);
+extern void __put_user_4(void);
+extern void __put_user_8(void);
+
+#define __put_user_1(x, ptr) __asm__ __volatile__("call __put_user_1":"=a" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
+#define __put_user_2(x, ptr) __asm__ __volatile__("call __put_user_2":"=a" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
+#define __put_user_4(x, ptr) __asm__ __volatile__("call __put_user_4":"=a" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
+#define __put_user_8(x, ptr) __asm__ __volatile__("call __put_user_8":"=A" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
+#define __put_user_X(x, ptr) __asm__ __volatile__("call __put_user_X":"=a" (__ret_pu):"c" (ptr))
+
/**
* put_user: - Write a simple value into user space.
* @x: Value to copy to user space.
@@ -201,9 +216,18 @@
*
* Returns zero on success, or -EFAULT on error.
*/
-#define put_user(x,ptr) \
- __put_user_check((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr)))
-
+#define put_user(x,ptr) \
+({ int __ret_pu; \
+ __chk_user_ptr(ptr); \
+ switch(sizeof(*(ptr))) { \
+ case 1: __put_user_1(x, ptr); break; \
+ case 2: __put_user_2(x, ptr); break; \
+ case 4: __put_user_4(x, ptr); break; \
+ case 8: __put_user_8(x, ptr); break; \
+ default:__put_user_X(x, ptr); break; \
+ } \
+ __ret_pu; \
+})
/**
* __get_user: - Get a simple variable from user space, with less checking.
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: out-of-line x86 "put_user()" implementation
2005-02-08 1:20 ` Linus Torvalds
@ 2005-02-08 17:43 ` Pavel Machek
2005-02-08 19:35 ` Valdis.Kletnieks
` (2 subsequent siblings)
3 siblings, 0 replies; 18+ messages in thread
From: Pavel Machek @ 2005-02-08 17:43 UTC (permalink / raw)
To: Linus Torvalds; +Cc: Ingo Molnar, Kernel Mailing List, Manfred Spraul
Hi!
> > boots fine and shrinks the image size quite noticeably:
> >
> > [Nr] Name Type Addr Off Size
> > [ 1] .text PROGBITS c0100000 001000 2771a9 [vmlinux-orig]
> > [ 1] .text PROGBITS c0100000 001000 2742dd [vmlinux-patched]
> >
> > that's 11980 bytes off a 2585001 bytes .text, a 0.5% size reduction.
> > This patch we want ...
>
> Goodie. Here's a slightly more recent version, where I cleaned up the
> assembly code (no need to save %ecx if we just update %ebx instead, which
> makes the code a bit more readable too - and doing it this way should
> hopefully make it easier for an out-of-order CPU to start the memops
> earlier too. Who knows..)
>
> I'm not going to put this into 2.6.11, since I worry about compiler
> interactions, but the more people who test it anyway, the better.
Put it in -mm tree :-)))
Pavel
--
People were complaining that M$ turns users into beta-testers...
...jr ghea gurz vagb qrirybcref, naq gurl frrz gb yvxr vg gung jnl!
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: out-of-line x86 "put_user()" implementation
2005-02-08 1:20 ` Linus Torvalds
2005-02-08 17:43 ` Pavel Machek
@ 2005-02-08 19:35 ` Valdis.Kletnieks
2005-02-09 1:25 ` Richard Henderson
2005-02-09 1:27 ` Richard Henderson
3 siblings, 0 replies; 18+ messages in thread
From: Valdis.Kletnieks @ 2005-02-08 19:35 UTC (permalink / raw)
To: Linus Torvalds; +Cc: Ingo Molnar, Kernel Mailing List, Manfred Spraul
[-- Attachment #1: Type: text/plain, Size: 683 bytes --]
On Mon, 07 Feb 2005 17:20:06 PST, Linus Torvalds said:
> I'm not going to put this into 2.6.11, since I worry about compiler
> interactions, but the more people who test it anyway, the better.
Well, since I'm a known glutton for punishment. ;)
a 2.6.11-rc3-RT tree I had handy from last night shows about a 0.5% size reduction:
text data bss dec hex filename
3033417 680824 436188 4150429 3f549d vmlinux.before
3017443 681068 436188 4134699 3f172b vmlinux
Am running a 2.6.11-rc3-mm1-RT kernel with it applied, and have probably 40-50
hours of uptime on it with no noticed issues on a Dell laptop. Sorry, don't
have any comparative performance stats...
[-- Attachment #2: Type: application/pgp-signature, Size: 226 bytes --]
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: out-of-line x86 "put_user()" implementation
2005-02-08 1:20 ` Linus Torvalds
2005-02-08 17:43 ` Pavel Machek
2005-02-08 19:35 ` Valdis.Kletnieks
@ 2005-02-09 1:25 ` Richard Henderson
2005-02-09 2:08 ` Linus Torvalds
2005-02-09 1:27 ` Richard Henderson
3 siblings, 1 reply; 18+ messages in thread
From: Richard Henderson @ 2005-02-09 1:25 UTC (permalink / raw)
To: Linus Torvalds; +Cc: Ingo Molnar, Kernel Mailing List, Manfred Spraul
On Mon, Feb 07, 2005 at 05:20:06PM -0800, Linus Torvalds wrote:
> +3: movl %eax,(%ecx)
...
> +3: movl %eax,(%ecx)
> +4: movl %edx,4(%ecx)
...
> + .long 3b,bad_put_user
> + .long 4b,bad_put_user
The first 3 gets lost.
r~
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: out-of-line x86 "put_user()" implementation
2005-02-08 1:20 ` Linus Torvalds
` (2 preceding siblings ...)
2005-02-09 1:25 ` Richard Henderson
@ 2005-02-09 1:27 ` Richard Henderson
2005-02-09 2:16 ` Linus Torvalds
3 siblings, 1 reply; 18+ messages in thread
From: Richard Henderson @ 2005-02-09 1:27 UTC (permalink / raw)
To: Linus Torvalds; +Cc: Ingo Molnar, Kernel Mailing List, Manfred Spraul
On Mon, Feb 07, 2005 at 05:20:06PM -0800, Linus Torvalds wrote:
> +#define __put_user_8(x, ptr) __asm__ __volatile__("call __put_user_8":"=A" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
This is not constrained enough. The compiler could choose to put the
return value in edx. You want
__asm__ __volatile__("call __put_user_8":"=a" (__ret_pu)
: "A" ((typeof(*(ptr)))(x)), "c" (ptr))
r~
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: out-of-line x86 "put_user()" implementation
2005-02-09 1:25 ` Richard Henderson
@ 2005-02-09 2:08 ` Linus Torvalds
2005-02-09 2:24 ` Andrew Morton
0 siblings, 1 reply; 18+ messages in thread
From: Linus Torvalds @ 2005-02-09 2:08 UTC (permalink / raw)
To: Richard Henderson
Cc: Ingo Molnar, Andrew Morton, Kernel Mailing List, Manfred Spraul
On Tue, 8 Feb 2005, Richard Henderson wrote:
>
> The first 3 gets lost.
Thanks. So here's v3 (which also removes the now stale __put_user_check()
macro).
Andrew - do you want to put it in -mm?
Linus
---
# This is a BitKeeper generated diff -Nru style patch.
#
# ChangeSet
# 2005/02/08 18:05:45-08:00 torvalds@evo.osdl.org
# x86: make "put_user()" be out-of-line
#
# It's really too big to be inlined.
#
# Ingo tests and reports: this shrinks his kernel text size by
# about 12kB (roughly 0.5%)
#
# arch/i386/lib/putuser.S
# 2005/02/08 18:05:32-08:00 torvalds@evo.osdl.org +87 -0
#
# include/asm-i386/uaccess.h
# 2005/02/08 18:05:32-08:00 torvalds@evo.osdl.org +27 -13
# x86: make "put_user()" be out-of-line
#
# arch/i386/lib/putuser.S
# 2005/02/08 18:05:32-08:00 torvalds@evo.osdl.org +0 -0
# BitKeeper file /home/torvalds/v2.6/linux/arch/i386/lib/putuser.S
#
# arch/i386/lib/Makefile
# 2005/02/08 18:05:32-08:00 torvalds@evo.osdl.org +1 -1
# x86: make "put_user()" be out-of-line
#
# arch/i386/kernel/i386_ksyms.c
# 2005/02/08 18:05:32-08:00 torvalds@evo.osdl.org +5 -0
# x86: make "put_user()" be out-of-line
#
diff -Nru a/arch/i386/kernel/i386_ksyms.c b/arch/i386/kernel/i386_ksyms.c
--- a/arch/i386/kernel/i386_ksyms.c 2005-02-08 18:06:14 -08:00
+++ b/arch/i386/kernel/i386_ksyms.c 2005-02-08 18:06:14 -08:00
@@ -97,6 +97,11 @@
EXPORT_SYMBOL(__get_user_2);
EXPORT_SYMBOL(__get_user_4);
+EXPORT_SYMBOL(__put_user_1);
+EXPORT_SYMBOL(__put_user_2);
+EXPORT_SYMBOL(__put_user_4);
+EXPORT_SYMBOL(__put_user_8);
+
EXPORT_SYMBOL(strpbrk);
EXPORT_SYMBOL(strstr);
diff -Nru a/arch/i386/lib/Makefile b/arch/i386/lib/Makefile
--- a/arch/i386/lib/Makefile 2005-02-08 18:06:14 -08:00
+++ b/arch/i386/lib/Makefile 2005-02-08 18:06:14 -08:00
@@ -3,7 +3,7 @@
#
-lib-y = checksum.o delay.o usercopy.o getuser.o memcpy.o strstr.o \
+lib-y = checksum.o delay.o usercopy.o getuser.o putuser.o memcpy.o strstr.o \
bitops.o
lib-$(CONFIG_X86_USE_3DNOW) += mmx.o
diff -Nru a/arch/i386/lib/putuser.S b/arch/i386/lib/putuser.S
--- /dev/null Wed Dec 31 16:00:00 196900
+++ b/arch/i386/lib/putuser.S 2005-02-08 18:06:14 -08:00
@@ -0,0 +1,87 @@
+/*
+ * __put_user functions.
+ *
+ * (C) Copyright 2005 Linus Torvalds
+ *
+ * These functions have a non-standard call interface
+ * to make them more efficient, especially as they
+ * return an error value in addition to the "real"
+ * return value.
+ */
+#include <asm/thread_info.h>
+
+
+/*
+ * __put_user_X
+ *
+ * Inputs: %eax[:%edx] contains the data
+ * %ecx contains the address
+ *
+ * Outputs: %eax is error code (0 or -EFAULT)
+ *
+ * These functions should not modify any other registers,
+ * as they get called from within inline assembly.
+ */
+
+#define ENTER pushl %ebx ; GET_THREAD_INFO(%ebx)
+#define EXIT popl %ebx ; ret
+
+.text
+.align 4
+.globl __put_user_1
+__put_user_1:
+ ENTER
+ cmpl TI_addr_limit(%ebx),%ecx
+ jae bad_put_user
+1: movb %al,(%ecx)
+ xorl %eax,%eax
+ EXIT
+
+.align 4
+.globl __put_user_2
+__put_user_2:
+ ENTER
+ movl TI_addr_limit(%ebx),%ebx
+ subl $1,%ebx
+ cmpl %ebx,%ecx
+ jae bad_put_user
+2: movw %ax,(%ecx)
+ xorl %eax,%eax
+ EXIT
+
+.align 4
+.globl __put_user_4
+__put_user_4:
+ ENTER
+ movl TI_addr_limit(%ebx),%ebx
+ subl $3,%ebx
+ cmpl %ebx,%ecx
+ jae bad_put_user
+3: movl %eax,(%ecx)
+ xorl %eax,%eax
+ EXIT
+
+.align 4
+.globl __put_user_8
+__put_user_8:
+ ENTER
+ movl TI_addr_limit(%ebx),%ebx
+ subl $7,%ebx
+ cmpl %ebx,%ecx
+ jae bad_put_user
+4: movl %eax,(%ecx)
+5: movl %edx,4(%ecx)
+ xorl %eax,%eax
+ EXIT
+
+bad_put_user:
+ movl $-14,%eax
+ EXIT
+
+.section __ex_table,"a"
+ .long 1b,bad_put_user
+ .long 2b,bad_put_user
+ .long 3b,bad_put_user
+ .long 4b,bad_put_user
+ .long 5b,bad_put_user
+.previous
diff -Nru a/include/asm-i386/uaccess.h b/include/asm-i386/uaccess.h
--- a/include/asm-i386/uaccess.h 2005-02-08 18:06:14 -08:00
+++ b/include/asm-i386/uaccess.h 2005-02-08 18:06:14 -08:00
@@ -185,6 +185,21 @@
extern void __put_user_bad(void);
+/*
+ * Strange magic calling convention: pointer in %ecx,
+ * value in %eax(:%edx), return value in %eax, no clobbers.
+ */
+extern void __put_user_1(void);
+extern void __put_user_2(void);
+extern void __put_user_4(void);
+extern void __put_user_8(void);
+
+#define __put_user_1(x, ptr) __asm__ __volatile__("call __put_user_1":"=a" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
+#define __put_user_2(x, ptr) __asm__ __volatile__("call __put_user_2":"=a" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
+#define __put_user_4(x, ptr) __asm__ __volatile__("call __put_user_4":"=a" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
+#define __put_user_8(x, ptr) __asm__ __volatile__("call __put_user_8":"=A" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
+#define __put_user_X(x, ptr) __asm__ __volatile__("call __put_user_X":"=a" (__ret_pu):"c" (ptr))
+
/**
* put_user: - Write a simple value into user space.
* @x: Value to copy to user space.
@@ -201,9 +216,18 @@
*
* Returns zero on success, or -EFAULT on error.
*/
-#define put_user(x,ptr) \
- __put_user_check((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr)))
-
+#define put_user(x,ptr) \
+({ int __ret_pu; \
+ __chk_user_ptr(ptr); \
+ switch(sizeof(*(ptr))) { \
+ case 1: __put_user_1(x, ptr); break; \
+ case 2: __put_user_2(x, ptr); break; \
+ case 4: __put_user_4(x, ptr); break; \
+ case 8: __put_user_8(x, ptr); break; \
+ default:__put_user_X(x, ptr); break; \
+ } \
+ __ret_pu; \
+})
/**
* __get_user: - Get a simple variable from user space, with less checking.
@@ -258,16 +282,6 @@
__pu_err; \
})
-
-#define __put_user_check(x,ptr,size) \
-({ \
- long __pu_err = -EFAULT; \
- __typeof__(*(ptr)) __user *__pu_addr = (ptr); \
- might_sleep(); \
- if (access_ok(VERIFY_WRITE,__pu_addr,size)) \
- __put_user_size((x),__pu_addr,(size),__pu_err,-EFAULT); \
- __pu_err; \
-})
#define __put_user_u64(x, addr, err) \
__asm__ __volatile__( \
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: out-of-line x86 "put_user()" implementation
2005-02-09 1:27 ` Richard Henderson
@ 2005-02-09 2:16 ` Linus Torvalds
2005-02-09 2:27 ` Linus Torvalds
2005-02-09 2:36 ` Richard Henderson
0 siblings, 2 replies; 18+ messages in thread
From: Linus Torvalds @ 2005-02-09 2:16 UTC (permalink / raw)
To: Richard Henderson; +Cc: Ingo Molnar, Kernel Mailing List, Manfred Spraul
On Tue, 8 Feb 2005, Richard Henderson wrote:
>
> On Mon, Feb 07, 2005 at 05:20:06PM -0800, Linus Torvalds wrote:
> > +#define __put_user_8(x, ptr) __asm__ __volatile__("call __put_user_8":"=A" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
>
> This is not constrained enough. The compiler could choose to put the
> return value in edx. You want
>
> __asm__ __volatile__("call __put_user_8":"=a" (__ret_pu)
> : "A" ((typeof(*(ptr)))(x)), "c" (ptr))
Hmm.. I always thought "A" was the _pairing_ of %eax/%edx, not "eax or
edx"?
IOW, as far as I know, I'm telling the compiler that the asm is writing a
64-bit value into %eax:%edx, and that that __ret_pu gets that 64-bit value
assigned to it (and truncated, at that point). No?
Note that he code that actually writes to the register is the assembly
code, and that one always writes to %eax. So the compiler doesn't get to
"put the return value" anywhere. It gets told where it is.
I'd happily use your version, but I thought that some versions of gcc
require that input output registers cannot overlap, and would refuse to do
that thing? But if you tell me differently..
Linus
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: out-of-line x86 "put_user()" implementation
2005-02-09 2:08 ` Linus Torvalds
@ 2005-02-09 2:24 ` Andrew Morton
2005-02-09 2:32 ` Linus Torvalds
0 siblings, 1 reply; 18+ messages in thread
From: Andrew Morton @ 2005-02-09 2:24 UTC (permalink / raw)
To: Linus Torvalds; +Cc: rth, mingo, linux-kernel, manfred
Linus Torvalds <torvalds@osdl.org> wrote:
>
> Andrew - do you want to put it in -mm?
I'll take patches from anyone ;)
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: out-of-line x86 "put_user()" implementation
2005-02-09 2:16 ` Linus Torvalds
@ 2005-02-09 2:27 ` Linus Torvalds
2005-02-09 2:33 ` Richard Henderson
2005-02-09 2:36 ` Richard Henderson
1 sibling, 1 reply; 18+ messages in thread
From: Linus Torvalds @ 2005-02-09 2:27 UTC (permalink / raw)
To: Richard Henderson
Cc: Ingo Molnar, Andrew Morton, Kernel Mailing List, Manfred Spraul
On Tue, 8 Feb 2005, Linus Torvalds wrote:
>
> Hmm.. I always thought "A" was the _pairing_ of %eax/%edx, not "eax or
> edx"?
Ahh. Some testing shows that gcc really seems to think of it as "eax or
edx", ie you can do
asm("uglee %0 %1": :"a" (1), "A" (2));
and it will output
movl $1, %eax
movl $2, %edx
uglee %eax %edx
without complaining.
Live and learn.
That brings up another issue: if I don't care which registers a 64-bit
value goes into, can I get the "low reg" and "high reg" names some way? In
this case I'm constrained, and do want eactly %eax:%edx, but in some other
cases I might not care, and wouldn't mind having a
asm("addl %2,%L0 ; adcl $0,%H0"
:"=r" (ll_result)
:"r" (increment), "0" (ll_input));
to add a 32-bit number to a 64-bit one (this is totally made up, since gcc
can probably generate this sequence itself, but you get the idea -
allowing the asm to treat the low/high parts of the 64-bit entity
separately, without forcing them into one specific pair).
v4 with your asm constrain fix appended.
Linus
-----
# This is a BitKeeper generated diff -Nru style patch.
#
# ChangeSet
# 2005/02/08 18:18:46-08:00 torvalds@evo.osdl.org
# x86: make "put_user()" be out-of-line
#
# It's really too big to be inlined.
#
# Ingo tests and reports: this shrinks his kernel text size by
# about 12kB (roughly 0.5%).
#
# Updated on input from Richard Henderson on how the "A" constraint
# works on x86.
#
# arch/i386/lib/putuser.S
# 2005/02/08 18:18:35-08:00 torvalds@evo.osdl.org +87 -0
#
# include/asm-i386/uaccess.h
# 2005/02/08 18:18:35-08:00 torvalds@evo.osdl.org +27 -13
# x86: make "put_user()" be out-of-line
#
# arch/i386/lib/putuser.S
# 2005/02/08 18:18:35-08:00 torvalds@evo.osdl.org +0 -0
# BitKeeper file /home/torvalds/v2.6/linux/arch/i386/lib/putuser.S
#
# arch/i386/lib/Makefile
# 2005/02/08 18:18:34-08:00 torvalds@evo.osdl.org +1 -1
# x86: make "put_user()" be out-of-line
#
# arch/i386/kernel/i386_ksyms.c
# 2005/02/08 18:18:34-08:00 torvalds@evo.osdl.org +5 -0
# x86: make "put_user()" be out-of-line
#
diff -Nru a/arch/i386/kernel/i386_ksyms.c b/arch/i386/kernel/i386_ksyms.c
--- a/arch/i386/kernel/i386_ksyms.c 2005-02-08 18:25:22 -08:00
+++ b/arch/i386/kernel/i386_ksyms.c 2005-02-08 18:25:22 -08:00
@@ -97,6 +97,11 @@
EXPORT_SYMBOL(__get_user_2);
EXPORT_SYMBOL(__get_user_4);
+EXPORT_SYMBOL(__put_user_1);
+EXPORT_SYMBOL(__put_user_2);
+EXPORT_SYMBOL(__put_user_4);
+EXPORT_SYMBOL(__put_user_8);
+
EXPORT_SYMBOL(strpbrk);
EXPORT_SYMBOL(strstr);
diff -Nru a/arch/i386/lib/Makefile b/arch/i386/lib/Makefile
--- a/arch/i386/lib/Makefile 2005-02-08 18:25:22 -08:00
+++ b/arch/i386/lib/Makefile 2005-02-08 18:25:22 -08:00
@@ -3,7 +3,7 @@
#
-lib-y = checksum.o delay.o usercopy.o getuser.o memcpy.o strstr.o \
+lib-y = checksum.o delay.o usercopy.o getuser.o putuser.o memcpy.o strstr.o \
bitops.o
lib-$(CONFIG_X86_USE_3DNOW) += mmx.o
diff -Nru a/arch/i386/lib/putuser.S b/arch/i386/lib/putuser.S
--- /dev/null Wed Dec 31 16:00:00 196900
+++ b/arch/i386/lib/putuser.S 2005-02-08 18:25:22 -08:00
@@ -0,0 +1,87 @@
+/*
+ * __put_user functions.
+ *
+ * (C) Copyright 2005 Linus Torvalds
+ *
+ * These functions have a non-standard call interface
+ * to make them more efficient, especially as they
+ * return an error value in addition to the "real"
+ * return value.
+ */
+#include <asm/thread_info.h>
+
+
+/*
+ * __put_user_X
+ *
+ * Inputs: %eax[:%edx] contains the data
+ * %ecx contains the address
+ *
+ * Outputs: %eax is error code (0 or -EFAULT)
+ *
+ * These functions should not modify any other registers,
+ * as they get called from within inline assembly.
+ */
+
+#define ENTER pushl %ebx ; GET_THREAD_INFO(%ebx)
+#define EXIT popl %ebx ; ret
+
+.text
+.align 4
+.globl __put_user_1
+__put_user_1:
+ ENTER
+ cmpl TI_addr_limit(%ebx),%ecx
+ jae bad_put_user
+1: movb %al,(%ecx)
+ xorl %eax,%eax
+ EXIT
+
+.align 4
+.globl __put_user_2
+__put_user_2:
+ ENTER
+ movl TI_addr_limit(%ebx),%ebx
+ subl $1,%ebx
+ cmpl %ebx,%ecx
+ jae bad_put_user
+2: movw %ax,(%ecx)
+ xorl %eax,%eax
+ EXIT
+
+.align 4
+.globl __put_user_4
+__put_user_4:
+ ENTER
+ movl TI_addr_limit(%ebx),%ebx
+ subl $3,%ebx
+ cmpl %ebx,%ecx
+ jae bad_put_user
+3: movl %eax,(%ecx)
+ xorl %eax,%eax
+ EXIT
+
+.align 4
+.globl __put_user_8
+__put_user_8:
+ ENTER
+ movl TI_addr_limit(%ebx),%ebx
+ subl $7,%ebx
+ cmpl %ebx,%ecx
+ jae bad_put_user
+4: movl %eax,(%ecx)
+5: movl %edx,4(%ecx)
+ xorl %eax,%eax
+ EXIT
+
+bad_put_user:
+ movl $-14,%eax
+ EXIT
+
+.section __ex_table,"a"
+ .long 1b,bad_put_user
+ .long 2b,bad_put_user
+ .long 3b,bad_put_user
+ .long 4b,bad_put_user
+ .long 5b,bad_put_user
+.previous
diff -Nru a/include/asm-i386/uaccess.h b/include/asm-i386/uaccess.h
--- a/include/asm-i386/uaccess.h 2005-02-08 18:25:22 -08:00
+++ b/include/asm-i386/uaccess.h 2005-02-08 18:25:22 -08:00
@@ -185,6 +185,21 @@
extern void __put_user_bad(void);
+/*
+ * Strange magic calling convention: pointer in %ecx,
+ * value in %eax(:%edx), return value in %eax, no clobbers.
+ */
+extern void __put_user_1(void);
+extern void __put_user_2(void);
+extern void __put_user_4(void);
+extern void __put_user_8(void);
+
+#define __put_user_1(x, ptr) __asm__ __volatile__("call __put_user_1":"=a" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
+#define __put_user_2(x, ptr) __asm__ __volatile__("call __put_user_2":"=a" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
+#define __put_user_4(x, ptr) __asm__ __volatile__("call __put_user_4":"=a" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
+#define __put_user_8(x, ptr) __asm__ __volatile__("call __put_user_8":"=a" (__ret_pu):"A" ((typeof(*(ptr)))(x)), "c" (ptr))
+#define __put_user_X(x, ptr) __asm__ __volatile__("call __put_user_X":"=a" (__ret_pu):"c" (ptr))
+
/**
* put_user: - Write a simple value into user space.
* @x: Value to copy to user space.
@@ -201,9 +216,18 @@
*
* Returns zero on success, or -EFAULT on error.
*/
-#define put_user(x,ptr) \
- __put_user_check((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr)))
-
+#define put_user(x,ptr) \
+({ int __ret_pu; \
+ __chk_user_ptr(ptr); \
+ switch(sizeof(*(ptr))) { \
+ case 1: __put_user_1(x, ptr); break; \
+ case 2: __put_user_2(x, ptr); break; \
+ case 4: __put_user_4(x, ptr); break; \
+ case 8: __put_user_8(x, ptr); break; \
+ default:__put_user_X(x, ptr); break; \
+ } \
+ __ret_pu; \
+})
/**
* __get_user: - Get a simple variable from user space, with less checking.
@@ -258,16 +282,6 @@
__pu_err; \
})
-
-#define __put_user_check(x,ptr,size) \
-({ \
- long __pu_err = -EFAULT; \
- __typeof__(*(ptr)) __user *__pu_addr = (ptr); \
- might_sleep(); \
- if (access_ok(VERIFY_WRITE,__pu_addr,size)) \
- __put_user_size((x),__pu_addr,(size),__pu_err,-EFAULT); \
- __pu_err; \
-})
#define __put_user_u64(x, addr, err) \
__asm__ __volatile__( \
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: out-of-line x86 "put_user()" implementation
2005-02-09 2:24 ` Andrew Morton
@ 2005-02-09 2:32 ` Linus Torvalds
0 siblings, 0 replies; 18+ messages in thread
From: Linus Torvalds @ 2005-02-09 2:32 UTC (permalink / raw)
To: Andrew Morton; +Cc: rth, mingo, linux-kernel, manfred
On Tue, 8 Feb 2005, Andrew Morton wrote:
>
> I'll take patches from anyone ;)
You'll never live it down. Once you get a name for being easy, you'll
always be known as Andrew "patch-ho" Morton.
Linus
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: out-of-line x86 "put_user()" implementation
2005-02-09 2:27 ` Linus Torvalds
@ 2005-02-09 2:33 ` Richard Henderson
2005-02-11 22:15 ` H. Peter Anvin
0 siblings, 1 reply; 18+ messages in thread
From: Richard Henderson @ 2005-02-09 2:33 UTC (permalink / raw)
To: Linus Torvalds
Cc: Ingo Molnar, Andrew Morton, Kernel Mailing List, Manfred Spraul
On Tue, Feb 08, 2005 at 06:27:08PM -0800, Linus Torvalds wrote:
> That brings up another issue: if I don't care which registers a 64-bit
> value goes into, can I get the "low reg" and "high reg" names some way?
Nope. We never needed one in the i386 backend itself, so we never
added anything like that.
r~
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: out-of-line x86 "put_user()" implementation
2005-02-09 2:16 ` Linus Torvalds
2005-02-09 2:27 ` Linus Torvalds
@ 2005-02-09 2:36 ` Richard Henderson
1 sibling, 0 replies; 18+ messages in thread
From: Richard Henderson @ 2005-02-09 2:36 UTC (permalink / raw)
To: Linus Torvalds; +Cc: Ingo Molnar, Kernel Mailing List, Manfred Spraul
On Tue, Feb 08, 2005 at 06:16:15PM -0800, Linus Torvalds wrote:
> I'd happily use your version, but I thought that some versions of gcc
> require that input output registers cannot overlap, and would refuse to do
> that thing? But if you tell me differently..
No, you're thinking of
asm("" : "=a" (x) : : "eax")
where the clobber overlaps the inputs or outputs.
r~
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: out-of-line x86 "put_user()" implementation
2005-02-09 2:33 ` Richard Henderson
@ 2005-02-11 22:15 ` H. Peter Anvin
0 siblings, 0 replies; 18+ messages in thread
From: H. Peter Anvin @ 2005-02-11 22:15 UTC (permalink / raw)
To: linux-kernel
Followup to: <20050209023346.GA13911@twiddle.net>
By author: Richard Henderson <rth@twiddle.net>
In newsgroup: linux.dev.kernel
>
> On Tue, Feb 08, 2005 at 06:27:08PM -0800, Linus Torvalds wrote:
> > That brings up another issue: if I don't care which registers a 64-bit
> > value goes into, can I get the "low reg" and "high reg" names some way?
>
> Nope. We never needed one in the i386 backend itself, so we never
> added anything like that.
>
I would really like to see that feature. I've missed it more than a
few times.
-hpa
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: out-of-line x86 "put_user()" implementation
2005-02-07 6:23 out-of-line x86 "put_user()" implementation Linus Torvalds
2005-02-07 11:44 ` Ingo Molnar
@ 2005-03-12 14:55 ` Coywolf Qi Hunt
1 sibling, 0 replies; 18+ messages in thread
From: Coywolf Qi Hunt @ 2005-03-12 14:55 UTC (permalink / raw)
To: Linus Torvalds; +Cc: Kernel Mailing List, Manfred Spraul, fdavis, fdavis112
On Sun, 6 Feb 2005 22:23:51 -0800 (PST), Linus Torvalds
<torvalds@osdl.org> wrote:
>
> I was looking at some of the code we generate, and happened to notice that
> we have this strange situation where the x86 "get_user()" macros generate
> out-of-line code to do all the address verification etc, but the
> "put_user()" ones do not, and do everything inline.
>
> I also noticed that (probably as a result of this), our "put_user()" on
> old i386 machines does not do the full magic manual page-following. Which
> means that copy-on-write doesn't necessarily work right due to the broken
> paging hw on the original 386 core.
I've noticed that "put_user()" ones do generate out-of-line code in 2.2.x and
later got dropped out in patch-2.3.99-pre9.
( perhaps by http://www.uwsg.iu.edu/hypermail/linux/kernel/0005.2/0487.html )
At that time "put_user is broken for i386 machines (security) - sem
stuff may be wrong too " was put on the job list. Not sure what bug it
was.
--coywolf
>
> I didn't fix the second part, but at least making things out-of-line makes
> it possible. And making "put_user()" be out-of-line seemed quite doable.
>
> I no longer use x86 as my main machine, so this patch is totally untested.
> I've compiled it to see that things look somewhat sane, but that doesn't
> mean much. If I forgot some register or screwed something else up, this
> will result in a totally nonworking kernel, but I thought that maybe
> somebody else would be interested in looking at whether this (a) works,
> (b) migth even shrink the kernel and (c) might make us able to DTRT wrt
> the page table following crud (old i386 cores may be hard to find these
> days, so maybe people don't care).
>
> Linus
>
> ----
> # This is a BitKeeper generated diff -Nru style patch.
> #
> # ChangeSet
> # 2005/02/06 22:10:04-08:00 torvalds@evo.osdl.org
> # x86: make "put_user()" be out-of-line
> #
> # It's really too big to be inlined.
> #
> # arch/i386/lib/putuser.S
> # 2005/02/06 22:09:53-08:00 torvalds@evo.osdl.org +87 -0
> #
> # include/asm-i386/uaccess.h
> # 2005/02/06 22:09:53-08:00 torvalds@evo.osdl.org +27 -3
> # x86: make "put_user()" be out-of-line
> #
> # It's really too big to be inlined.
> #
> # arch/i386/lib/putuser.S
> # 2005/02/06 22:09:53-08:00 torvalds@evo.osdl.org +0 -0
> # BitKeeper file /home/torvalds/v2.6/linux/arch/i386/lib/putuser.S
> #
> # arch/i386/lib/Makefile
> # 2005/02/06 22:09:53-08:00 torvalds@evo.osdl.org +1 -1
> # x86: make "put_user()" be out-of-line
> #
> # It's really too big to be inlined.
> #
> # arch/i386/kernel/i386_ksyms.c
> # 2005/02/06 22:09:53-08:00 torvalds@evo.osdl.org +5 -0
> # x86: make "put_user()" be out-of-line
> #
> # It's really too big to be inlined.
> #
> diff -Nru a/arch/i386/kernel/i386_ksyms.c b/arch/i386/kernel/i386_ksyms.c
> --- a/arch/i386/kernel/i386_ksyms.c 2005-02-06 22:12:01 -08:00
> +++ b/arch/i386/kernel/i386_ksyms.c 2005-02-06 22:12:01 -08:00
> @@ -97,6 +97,11 @@
> EXPORT_SYMBOL(__get_user_2);
> EXPORT_SYMBOL(__get_user_4);
>
> +EXPORT_SYMBOL(__put_user_1);
> +EXPORT_SYMBOL(__put_user_2);
> +EXPORT_SYMBOL(__put_user_4);
> +EXPORT_SYMBOL(__put_user_8);
> +
> EXPORT_SYMBOL(strpbrk);
> EXPORT_SYMBOL(strstr);
>
> diff -Nru a/arch/i386/lib/Makefile b/arch/i386/lib/Makefile
> --- a/arch/i386/lib/Makefile 2005-02-06 22:12:01 -08:00
> +++ b/arch/i386/lib/Makefile 2005-02-06 22:12:01 -08:00
> @@ -3,7 +3,7 @@
> #
>
> -lib-y = checksum.o delay.o usercopy.o getuser.o memcpy.o strstr.o \
> +lib-y = checksum.o delay.o usercopy.o getuser.o putuser.o memcpy.o strstr.o \
> bitops.o
>
> lib-$(CONFIG_X86_USE_3DNOW) += mmx.o
> diff -Nru a/arch/i386/lib/putuser.S b/arch/i386/lib/putuser.S
> --- /dev/null Wed Dec 31 16:00:00 196900
> +++ b/arch/i386/lib/putuser.S 2005-02-06 22:12:01 -08:00
> @@ -0,0 +1,87 @@
> +/*
> + * __put_user functions.
> + *
> + * (C) Copyright 2005 Linus Torvalds
> + *
> + * These functions have a non-standard call interface
> + * to make them more efficient, especially as they
> + * return an error value in addition to the "real"
> + * return value.
> + */
> +#include <asm/thread_info.h>
> +
> +
> +/*
> + * __put_user_X
> + *
> + * Inputs: %eax[:%edx] contains the data
> + * %ecx contains the address
> + *
> + * Outputs: %eax is error code (0 or -EFAULT)
> + * %ecx is corrupted
> + *
> + * These functions should not modify any other registers,
> + * as they get called from within inline assembly.
> + */
> +
> +#define ENTER pushl %ecx ; pushl %ebx ; GET_THREAD_INFO(%ebx)
> +#define EXIT popl %ebx ; popl %ecx ; ret
> +
> +.text
> +.align 4
> +.globl __put_user_1
> +__put_user_1:
> + ENTER
> + cmpl TI_addr_limit(%ebx),%ecx
> + jae bad_put_user
> +1: movb %al,(%ecx)
> + xorl %eax,%eax
> + EXIT
> +
> +.align 4
> +.globl __put_user_2
> +__put_user_2:
> + ENTER
> + addl $1,%ecx
> + jc bad_put_user
> + cmpl TI_addr_limit(%ebx),%ecx
> + jae bad_put_user
> +2: movw %ax,-1(%ecx)
> + xorl %eax,%eax
> + EXIT
> +
> +.align 4
> +.globl __put_user_4
> +__put_user_4:
> + ENTER
> + addl $3,%ecx
> + jc bad_put_user
> + cmpl TI_addr_limit(%ebx),%ecx
> + jae bad_put_user
> +3: movl %eax,-3(%ecx)
> + xorl %eax,%eax
> + EXIT
> +
> +.align 4
> +.globl __put_user_8
> +__put_user_8:
> + ENTER
> + addl $7,%ecx
> + jc bad_put_user
> + cmpl TI_addr_limit(%ebx),%ecx
> + jae bad_put_user
> +3: movl %eax,-7(%ecx)
> +4: movl %edx,-3(%ecx)
> + xorl %eax,%eax
> + EXIT
> +
> +bad_put_user:
> + movl $-14,%eax
> + EXIT
> +
> +.section __ex_table,"a"
> + .long 1b,bad_put_user
> + .long 2b,bad_put_user
> + .long 3b,bad_put_user
> + .long 4b,bad_put_user
> +.previous
> diff -Nru a/include/asm-i386/uaccess.h b/include/asm-i386/uaccess.h
> --- a/include/asm-i386/uaccess.h 2005-02-06 22:12:01 -08:00
> +++ b/include/asm-i386/uaccess.h 2005-02-06 22:12:01 -08:00
> @@ -185,6 +185,21 @@
>
> extern void __put_user_bad(void);
>
> +/*
> + * Strange magic calling convention: pointer in %ecx,
> + * value in %eax(:%edx), return value in %eax, no clobbers.
> + */
> +extern void __put_user_1(void);
> +extern void __put_user_2(void);
> +extern void __put_user_4(void);
> +extern void __put_user_8(void);
> +
> +#define __put_user_1(x, ptr) __asm__ __volatile__("call __put_user_1":"=a" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
> +#define __put_user_2(x, ptr) __asm__ __volatile__("call __put_user_2":"=a" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
> +#define __put_user_4(x, ptr) __asm__ __volatile__("call __put_user_4":"=a" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
> +#define __put_user_8(x, ptr) __asm__ __volatile__("call __put_user_8":"=A" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
> +#define __put_user_X(x, ptr) __asm__ __volatile__("call __put_user_X":"=a" (__ret_pu):"c" (ptr))
> +
> /**
> * put_user: - Write a simple value into user space.
> * @x: Value to copy to user space.
> @@ -201,9 +216,18 @@
> *
> * Returns zero on success, or -EFAULT on error.
> */
> -#define put_user(x,ptr) \
> - __put_user_check((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr)))
> -
> +#define put_user(x,ptr) \
> +({ int __ret_pu; \
> + __chk_user_ptr(ptr); \
> + switch(sizeof(*(ptr))) { \
> + case 1: __put_user_1(x, ptr); break; \
> + case 2: __put_user_2(x, ptr); break; \
> + case 4: __put_user_4(x, ptr); break; \
> + case 8: __put_user_8(x, ptr); break; \
> + default:__put_user_X(x, ptr); break; \
> + } \
> + __ret_pu; \
> +})
>
> /**
> * __get_user: - Get a simple variable from user space, with less checking.
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
>
--
Coywolf Qi Hunt
Homepage http://sosdg.org/~coywolf/
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: out-of-line x86 "put_user()" implementation
2005-02-12 1:55 Chuck Ebbert
@ 2005-02-12 2:03 ` Linus Torvalds
0 siblings, 0 replies; 18+ messages in thread
From: Linus Torvalds @ 2005-02-12 2:03 UTC (permalink / raw)
To: Chuck Ebbert
Cc: Andrew Morton, Manfred Spraul, linux-kernel, Richard Henderson,
Ingo Molnar
On Fri, 11 Feb 2005, Chuck Ebbert wrote:
>
> And in any case is it too much to ask for an 80-column limit? ;)
Yes. Dammit, especially for something like this, the long-line version is
just _so_ much more readable. Compare my and your version wrt being able
to tell what the differences between the four different cases are.
In the single-long-line version, the differences are trivially visible. In
the "prettified" version (aka "I'm still living in the 60's, and proud of
it" version), it's impossible to pick out the differences.
If you don't like long lines, use a helper #define for the common part or
something.
Linus
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: out-of-line x86 "put_user()" implementation
@ 2005-02-12 1:55 Chuck Ebbert
2005-02-12 2:03 ` Linus Torvalds
0 siblings, 1 reply; 18+ messages in thread
From: Chuck Ebbert @ 2005-02-12 1:55 UTC (permalink / raw)
To: Linus Torvalds
Cc: Andrew Morton, Manfred Spraul, linux-kernel, Richard Henderson,
Ingo Molnar
On Tue, 8 Feb 2005 at 18:27:08 -0800, Linus Torvalds wrote:
> +/*
> + * Strange magic calling convention: pointer in %ecx,
> + * value in %eax(:%edx), return value in %eax, no clobbers.
> + */
> +extern void __put_user_1(void);
> +extern void __put_user_2(void);
> +extern void __put_user_4(void);
> +extern void __put_user_8(void);
> +
> +#define __put_user_1(x, ptr) __asm__ __volatile__("call __put_user_1":"=a" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
> +#define __put_user_2(x, ptr) __asm__ __volatile__("call __put_user_2":"=a" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
> +#define __put_user_4(x, ptr) __asm__ __volatile__("call __put_user_4":"=a" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
> +#define __put_user_8(x, ptr) __asm__ __volatile__("call __put_user_8":"=a" (__ret_pu):"A" ((typeof(*(ptr)))(x)), "c" (ptr))
> +#define __put_user_X(x, ptr) __asm__ __volatile__("call __put_user_X":"=a" (__ret_pu):"c" (ptr))
> +
Should "cc" be on the clobber list since all the called functions alter EFLAGS?
And in any case is it too much to ask for an 80-column limit? ;)
#define __put_user_1(x, ptr) \
__asm__ __volatile__( \
"call __put_user_1" \
: "=a" (__ret_pu) \
: "0" ((typeof(*(ptr)))(x)), "c" (ptr) \
: "cc")
#define __put_user_2(x, ptr) \
__asm__ __volatile__( \
"call __put_user_2" \
: "=a" (__ret_pu) \
: "0" ((typeof(*(ptr)))(x)), "c" (ptr) \
: "cc")
#define __put_user_4(x, ptr) \
__asm__ __volatile__( \
"call __put_user_4" \
: "=a" (__ret_pu) \
: "0" ((typeof(*(ptr)))(x)), "c" (ptr) \
: "cc")
#define __put_user_8(x, ptr) \
__asm__ __volatile__( \
"call __put_user_8" \
: "=a" (__ret_pu) \
: "A" ((typeof(*(ptr)))(x)), "c" (ptr) \
: "cc")
#define __put_user_X(x, ptr) \
__asm__ __volatile__( \
"call __put_user_X" \
: "=a" (__ret_pu) \
: "c" (ptr) \
: "cc")
--
Chuck
^ permalink raw reply [flat|nested] 18+ messages in thread
end of thread, other threads:[~2005-03-12 14:56 UTC | newest]
Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2005-02-07 6:23 out-of-line x86 "put_user()" implementation Linus Torvalds
2005-02-07 11:44 ` Ingo Molnar
2005-02-08 1:20 ` Linus Torvalds
2005-02-08 17:43 ` Pavel Machek
2005-02-08 19:35 ` Valdis.Kletnieks
2005-02-09 1:25 ` Richard Henderson
2005-02-09 2:08 ` Linus Torvalds
2005-02-09 2:24 ` Andrew Morton
2005-02-09 2:32 ` Linus Torvalds
2005-02-09 1:27 ` Richard Henderson
2005-02-09 2:16 ` Linus Torvalds
2005-02-09 2:27 ` Linus Torvalds
2005-02-09 2:33 ` Richard Henderson
2005-02-11 22:15 ` H. Peter Anvin
2005-02-09 2:36 ` Richard Henderson
2005-03-12 14:55 ` Coywolf Qi Hunt
2005-02-12 1:55 Chuck Ebbert
2005-02-12 2:03 ` Linus Torvalds
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).