* [PATCH v3] ARM: add get_user() support for 8 byte types
@ 2014-06-12 15:42 ` Daniel Thompson
0 siblings, 0 replies; 22+ messages in thread
From: Daniel Thompson @ 2014-06-12 15:42 UTC (permalink / raw)
To: Russell King
Cc: Daniel Thompson, Rob Clark, Nicolas Pitre, Arnd Bergmann,
linux-arm-kernel, linux-kernel, patches, linaro-kernel
A new atomic modeset/pageflip ioctl being developed in DRM requires
get_user() to work for 64bit types (in addition to just put_user()).
v1: original
v2: pass correct size to check_uaccess, and better handling of narrowing
double word read with __get_user_xb() (Russell King's suggestion)
v3: fix a couple of checkpatch issues
Signed-off-by: Rob Clark <robdclark@gmail.com>
Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>
Cc: Russell King - ARM Linux <linux@arm.linux.org.uk>
---
Notes:
I'd like to wake this patch up again. It was rejected back in 2012 on
the grounds that other architectures (notably x86-32) didn't implement
this so adding for ARM risked portability problems in drivers. However
shortly after the discussion (in fact I believe that as a *result* of
that discussion) support for 64-bit get_user() was added for x86-32.
A quick review of different architectures uaccess.h shows that ARM is
in the minority (even after excluding 64-bit architectures) in not
implementing this feature.
The reasons to wake it up are the same as before. Recent contributions,
including to DRM[1] and binder[2] would prefer to use the 64-bit values
in their interfaces without gotchas like having to use copy_from_user().
[1] http://thread.gmane.org/gmane.comp.video.dri.devel/102135/focus=102149
[2] http://thread.gmane.org/gmane.linux.kernel/1653448/focus=1653449
arch/arm/include/asm/uaccess.h | 18 +++++++++++++++++-
arch/arm/lib/getuser.S | 17 ++++++++++++++++-
2 files changed, 33 insertions(+), 2 deletions(-)
diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h
index 75d9579..5f7db3fb 100644
--- a/arch/arm/include/asm/uaccess.h
+++ b/arch/arm/include/asm/uaccess.h
@@ -107,6 +107,7 @@ static inline void set_fs(mm_segment_t fs)
extern int __get_user_1(void *);
extern int __get_user_2(void *);
extern int __get_user_4(void *);
+extern int __get_user_8(void *);
#define __GUP_CLOBBER_1 "lr", "cc"
#ifdef CONFIG_CPU_USE_DOMAINS
@@ -115,6 +116,7 @@ extern int __get_user_4(void *);
#define __GUP_CLOBBER_2 "lr", "cc"
#endif
#define __GUP_CLOBBER_4 "lr", "cc"
+#define __GUP_CLOBBER_8 "lr", "cc"
#define __get_user_x(__r2,__p,__e,__l,__s) \
__asm__ __volatile__ ( \
@@ -125,11 +127,19 @@ extern int __get_user_4(void *);
: "0" (__p), "r" (__l) \
: __GUP_CLOBBER_##__s)
+/* narrowing a double-word get into a single 32bit word register: */
+#ifdef BIG_ENDIAN
+#define __get_user_xb(__r2, __p, __e, __l, __s) \
+ __get_user_x(__r2, (uintptr_t)__p + 4, __e, __l, __s)
+#else
+#define __get_user_xb __get_user_x
+#endif
+
#define __get_user_check(x,p) \
({ \
unsigned long __limit = current_thread_info()->addr_limit - 1; \
register const typeof(*(p)) __user *__p asm("r0") = (p);\
- register unsigned long __r2 asm("r2"); \
+ register typeof(x) __r2 asm("r2"); \
register unsigned long __l asm("r1") = __limit; \
register int __e asm("r0"); \
switch (sizeof(*(__p))) { \
@@ -142,6 +152,12 @@ extern int __get_user_4(void *);
case 4: \
__get_user_x(__r2, __p, __e, __l, 4); \
break; \
+ case 8: \
+ if (sizeof((x)) < 8) \
+ __get_user_xb(__r2, __p, __e, __l, 4); \
+ else \
+ __get_user_x(__r2, __p, __e, __l, 8); \
+ break; \
default: __e = __get_user_bad(); break; \
} \
x = (typeof(*(p))) __r2; \
diff --git a/arch/arm/lib/getuser.S b/arch/arm/lib/getuser.S
index 9b06bb4..ed98707 100644
--- a/arch/arm/lib/getuser.S
+++ b/arch/arm/lib/getuser.S
@@ -18,7 +18,7 @@
* Inputs: r0 contains the address
* r1 contains the address limit, which must be preserved
* Outputs: r0 is the error code
- * r2 contains the zero-extended value
+ * r2, r3 contains the zero-extended value
* lr corrupted
*
* No other registers must be altered. (see <asm/uaccess.h>
@@ -66,6 +66,19 @@ ENTRY(__get_user_4)
mov pc, lr
ENDPROC(__get_user_4)
+ENTRY(__get_user_8)
+ check_uaccess r0, 8, r1, r2, __get_user_bad
+#ifdef CONFIG_THUMB2_KERNEL
+5: TUSER(ldr) r2, [r0]
+6: TUSER(ldr) r3, [r0, #4]
+#else
+5: TUSER(ldr) r2, [r0], #4
+6: TUSER(ldr) r3, [r0]
+#endif
+ mov r0, #0
+ mov pc, lr
+ENDPROC(__get_user_8)
+
__get_user_bad:
mov r2, #0
mov r0, #-EFAULT
@@ -77,4 +90,6 @@ ENDPROC(__get_user_bad)
.long 2b, __get_user_bad
.long 3b, __get_user_bad
.long 4b, __get_user_bad
+ .long 5b, __get_user_bad
+ .long 6b, __get_user_bad
.popsection
--
1.9.3
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [PATCH v3] ARM: add get_user() support for 8 byte types
@ 2014-06-12 15:42 ` Daniel Thompson
0 siblings, 0 replies; 22+ messages in thread
From: Daniel Thompson @ 2014-06-12 15:42 UTC (permalink / raw)
To: linux-arm-kernel
A new atomic modeset/pageflip ioctl being developed in DRM requires
get_user() to work for 64bit types (in addition to just put_user()).
v1: original
v2: pass correct size to check_uaccess, and better handling of narrowing
double word read with __get_user_xb() (Russell King's suggestion)
v3: fix a couple of checkpatch issues
Signed-off-by: Rob Clark <robdclark@gmail.com>
Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>
Cc: Russell King - ARM Linux <linux@arm.linux.org.uk>
---
Notes:
I'd like to wake this patch up again. It was rejected back in 2012 on
the grounds that other architectures (notably x86-32) didn't implement
this so adding for ARM risked portability problems in drivers. However
shortly after the discussion (in fact I believe that as a *result* of
that discussion) support for 64-bit get_user() was added for x86-32.
A quick review of different architectures uaccess.h shows that ARM is
in the minority (even after excluding 64-bit architectures) in not
implementing this feature.
The reasons to wake it up are the same as before. Recent contributions,
including to DRM[1] and binder[2] would prefer to use the 64-bit values
in their interfaces without gotchas like having to use copy_from_user().
[1] http://thread.gmane.org/gmane.comp.video.dri.devel/102135/focus=102149
[2] http://thread.gmane.org/gmane.linux.kernel/1653448/focus=1653449
arch/arm/include/asm/uaccess.h | 18 +++++++++++++++++-
arch/arm/lib/getuser.S | 17 ++++++++++++++++-
2 files changed, 33 insertions(+), 2 deletions(-)
diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h
index 75d9579..5f7db3fb 100644
--- a/arch/arm/include/asm/uaccess.h
+++ b/arch/arm/include/asm/uaccess.h
@@ -107,6 +107,7 @@ static inline void set_fs(mm_segment_t fs)
extern int __get_user_1(void *);
extern int __get_user_2(void *);
extern int __get_user_4(void *);
+extern int __get_user_8(void *);
#define __GUP_CLOBBER_1 "lr", "cc"
#ifdef CONFIG_CPU_USE_DOMAINS
@@ -115,6 +116,7 @@ extern int __get_user_4(void *);
#define __GUP_CLOBBER_2 "lr", "cc"
#endif
#define __GUP_CLOBBER_4 "lr", "cc"
+#define __GUP_CLOBBER_8 "lr", "cc"
#define __get_user_x(__r2,__p,__e,__l,__s) \
__asm__ __volatile__ ( \
@@ -125,11 +127,19 @@ extern int __get_user_4(void *);
: "0" (__p), "r" (__l) \
: __GUP_CLOBBER_##__s)
+/* narrowing a double-word get into a single 32bit word register: */
+#ifdef BIG_ENDIAN
+#define __get_user_xb(__r2, __p, __e, __l, __s) \
+ __get_user_x(__r2, (uintptr_t)__p + 4, __e, __l, __s)
+#else
+#define __get_user_xb __get_user_x
+#endif
+
#define __get_user_check(x,p) \
({ \
unsigned long __limit = current_thread_info()->addr_limit - 1; \
register const typeof(*(p)) __user *__p asm("r0") = (p);\
- register unsigned long __r2 asm("r2"); \
+ register typeof(x) __r2 asm("r2"); \
register unsigned long __l asm("r1") = __limit; \
register int __e asm("r0"); \
switch (sizeof(*(__p))) { \
@@ -142,6 +152,12 @@ extern int __get_user_4(void *);
case 4: \
__get_user_x(__r2, __p, __e, __l, 4); \
break; \
+ case 8: \
+ if (sizeof((x)) < 8) \
+ __get_user_xb(__r2, __p, __e, __l, 4); \
+ else \
+ __get_user_x(__r2, __p, __e, __l, 8); \
+ break; \
default: __e = __get_user_bad(); break; \
} \
x = (typeof(*(p))) __r2; \
diff --git a/arch/arm/lib/getuser.S b/arch/arm/lib/getuser.S
index 9b06bb4..ed98707 100644
--- a/arch/arm/lib/getuser.S
+++ b/arch/arm/lib/getuser.S
@@ -18,7 +18,7 @@
* Inputs: r0 contains the address
* r1 contains the address limit, which must be preserved
* Outputs: r0 is the error code
- * r2 contains the zero-extended value
+ * r2, r3 contains the zero-extended value
* lr corrupted
*
* No other registers must be altered. (see <asm/uaccess.h>
@@ -66,6 +66,19 @@ ENTRY(__get_user_4)
mov pc, lr
ENDPROC(__get_user_4)
+ENTRY(__get_user_8)
+ check_uaccess r0, 8, r1, r2, __get_user_bad
+#ifdef CONFIG_THUMB2_KERNEL
+5: TUSER(ldr) r2, [r0]
+6: TUSER(ldr) r3, [r0, #4]
+#else
+5: TUSER(ldr) r2, [r0], #4
+6: TUSER(ldr) r3, [r0]
+#endif
+ mov r0, #0
+ mov pc, lr
+ENDPROC(__get_user_8)
+
__get_user_bad:
mov r2, #0
mov r0, #-EFAULT
@@ -77,4 +90,6 @@ ENDPROC(__get_user_bad)
.long 2b, __get_user_bad
.long 3b, __get_user_bad
.long 4b, __get_user_bad
+ .long 5b, __get_user_bad
+ .long 6b, __get_user_bad
.popsection
--
1.9.3
^ permalink raw reply related [flat|nested] 22+ messages in thread
* Re: [PATCH v3] ARM: add get_user() support for 8 byte types
2014-06-12 15:42 ` Daniel Thompson
@ 2014-06-12 15:58 ` Russell King - ARM Linux
-1 siblings, 0 replies; 22+ messages in thread
From: Russell King - ARM Linux @ 2014-06-12 15:58 UTC (permalink / raw)
To: Daniel Thompson
Cc: Rob Clark, Nicolas Pitre, Arnd Bergmann, linux-arm-kernel,
linux-kernel, patches, linaro-kernel
On Thu, Jun 12, 2014 at 04:42:35PM +0100, Daniel Thompson wrote:
> A new atomic modeset/pageflip ioctl being developed in DRM requires
> get_user() to work for 64bit types (in addition to just put_user()).
>
> v1: original
> v2: pass correct size to check_uaccess, and better handling of narrowing
> double word read with __get_user_xb() (Russell King's suggestion)
> v3: fix a couple of checkpatch issues
This is still unsafe.
> #define __get_user_check(x,p) \
> ({ \
> unsigned long __limit = current_thread_info()->addr_limit - 1; \
> register const typeof(*(p)) __user *__p asm("r0") = (p);\
> - register unsigned long __r2 asm("r2"); \
> + register typeof(x) __r2 asm("r2"); \
So, __r2 becomes the type of 'x'. If 'x' is a 64-bit type, and *p is
an 8-bit, 16-bit, or 32-bit type, this fails horribly by leaving the
upper word of __r2 undefined.
__r2 must follow the size of the value we are reading. I think the
last solution which was proposed was this:
arch/arm/include/asm/uaccess.h | 17 +++++++++++++----
arch/arm/lib/getuser.S | 33 ++++++++++++++++++++++++++++++++-
2 files changed, 45 insertions(+), 5 deletions(-)
diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h
index 72abdc5..747f2cb 100644
--- a/arch/arm/include/asm/uaccess.h
+++ b/arch/arm/include/asm/uaccess.h
@@ -93,6 +93,9 @@ static inline void set_fs(mm_segment_t fs)
: "cc"); \
flag; })
+#define __inttype(x) \
+ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL))
+
/*
* Single-value transfer routines. They automatically use the right
* size if we just have the right pointer type. Note that the functions
@@ -107,14 +110,16 @@ static inline void set_fs(mm_segment_t fs)
extern int __get_user_1(void *);
extern int __get_user_2(void *);
extern int __get_user_4(void *);
+extern int __get_user_8(void *);
-#define __GUP_CLOBBER_1 "lr", "cc"
+#define __GUP_CLOBBER_1 "lr", "cc"
#ifdef CONFIG_CPU_USE_DOMAINS
-#define __GUP_CLOBBER_2 "ip", "lr", "cc"
+#define __GUP_CLOBBER_2 "ip", "lr", "cc"
#else
#define __GUP_CLOBBER_2 "lr", "cc"
#endif
-#define __GUP_CLOBBER_4 "lr", "cc"
+#define __GUP_CLOBBER_4 "lr", "cc"
+#define __GUP_CLOBBER_8 "lr", "cc"
#define __get_user_x(__r2,__p,__e,__l,__s) \
__asm__ __volatile__ ( \
@@ -129,7 +134,7 @@ extern int __get_user_4(void *);
({ \
unsigned long __limit = current_thread_info()->addr_limit - 1; \
register const typeof(*(p)) __user *__p asm("r0") = (p);\
- register unsigned long __r2 asm("r2"); \
+ register __inttype(*p) __r2 asm("r2"); \
register unsigned long __l asm("r1") = __limit; \
register int __e asm("r0"); \
switch (sizeof(*(__p))) { \
@@ -142,6 +147,9 @@ extern int __get_user_4(void *);
case 4: \
__get_user_x(__r2, __p, __e, __l, 4); \
break; \
+ case 8: \
+ __get_user_x(__r2, __p, __e, __l, 8); \
+ break; \
default: __e = __get_user_bad(); break; \
} \
x = (typeof(*(p))) __r2; \
@@ -150,6 +158,7 @@ extern int __get_user_4(void *);
#define get_user(x,p) \
({ \
+ __chk_user_ptr(ptr); \
might_fault(); \
__get_user_check(x,p); \
})
diff --git a/arch/arm/lib/getuser.S b/arch/arm/lib/getuser.S
index 9b06bb4..3583c83 100644
--- a/arch/arm/lib/getuser.S
+++ b/arch/arm/lib/getuser.S
@@ -18,7 +18,7 @@
* Inputs: r0 contains the address
* r1 contains the address limit, which must be preserved
* Outputs: r0 is the error code
- * r2 contains the zero-extended value
+ * r2/r3 contains the zero-extended value
* lr corrupted
*
* No other registers must be altered. (see <asm/uaccess.h>
@@ -32,6 +32,14 @@
#include <asm/errno.h>
#include <asm/domain.h>
+#ifdef __ARMEB__
+#define rlo8 r3
+#define rhi8 r2
+#else
+#define rlo8 r2
+#define rhi8 r3
+#endif
+
ENTRY(__get_user_1)
check_uaccess r0, 1, r1, r2, __get_user_bad
1: TUSER(ldrb) r2, [r0]
@@ -66,15 +74,38 @@ ENTRY(__get_user_4)
mov pc, lr
ENDPROC(__get_user_4)
+ENTRY(__get_user_8)
+ check_uaccess r0, 4, r1, r2, __get_user_bad8
+#ifdef CONFIG_CPU_USE_DOMAINS
+#define GU8_FIXUPS 5b, 6b
+5: ldrt rlo8, [r0], #4
+6: ldrt rhi8, [r0], #0
+#elif __LINUX_ARM_ARCH__ >= 6
+#define GU8_FIXUPS 5b
+5: ldrd r2, [r0]
+#else
+#define GU8_FIXUPS 5b, 6b
+5: ldr rlo8, [r0, #0]
+6: ldr rhi8, [r0, #4]
+#endif
+ mov r0, #0
+ mov pc, lr
+
+__get_user_bad8:
+ mov r3, #0
__get_user_bad:
mov r2, #0
mov r0, #-EFAULT
mov pc, lr
ENDPROC(__get_user_bad)
+ENDPROC(__get_user_bad8)
.pushsection __ex_table, "a"
.long 1b, __get_user_bad
.long 2b, __get_user_bad
.long 3b, __get_user_bad
.long 4b, __get_user_bad
+ .irp param, GU8_FIXUPS
+ .long \param, __get_user_bad8
+ .endr
.popsection
--
FTTC broadband for 0.8mile line: now at 9.7Mbps down 460kbps up... slowly
improving, and getting towards what was expected from it.
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [PATCH v3] ARM: add get_user() support for 8 byte types
@ 2014-06-12 15:58 ` Russell King - ARM Linux
0 siblings, 0 replies; 22+ messages in thread
From: Russell King - ARM Linux @ 2014-06-12 15:58 UTC (permalink / raw)
To: linux-arm-kernel
On Thu, Jun 12, 2014 at 04:42:35PM +0100, Daniel Thompson wrote:
> A new atomic modeset/pageflip ioctl being developed in DRM requires
> get_user() to work for 64bit types (in addition to just put_user()).
>
> v1: original
> v2: pass correct size to check_uaccess, and better handling of narrowing
> double word read with __get_user_xb() (Russell King's suggestion)
> v3: fix a couple of checkpatch issues
This is still unsafe.
> #define __get_user_check(x,p) \
> ({ \
> unsigned long __limit = current_thread_info()->addr_limit - 1; \
> register const typeof(*(p)) __user *__p asm("r0") = (p);\
> - register unsigned long __r2 asm("r2"); \
> + register typeof(x) __r2 asm("r2"); \
So, __r2 becomes the type of 'x'. If 'x' is a 64-bit type, and *p is
an 8-bit, 16-bit, or 32-bit type, this fails horribly by leaving the
upper word of __r2 undefined.
__r2 must follow the size of the value we are reading. I think the
last solution which was proposed was this:
arch/arm/include/asm/uaccess.h | 17 +++++++++++++----
arch/arm/lib/getuser.S | 33 ++++++++++++++++++++++++++++++++-
2 files changed, 45 insertions(+), 5 deletions(-)
diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h
index 72abdc5..747f2cb 100644
--- a/arch/arm/include/asm/uaccess.h
+++ b/arch/arm/include/asm/uaccess.h
@@ -93,6 +93,9 @@ static inline void set_fs(mm_segment_t fs)
: "cc"); \
flag; })
+#define __inttype(x) \
+ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL))
+
/*
* Single-value transfer routines. They automatically use the right
* size if we just have the right pointer type. Note that the functions
@@ -107,14 +110,16 @@ static inline void set_fs(mm_segment_t fs)
extern int __get_user_1(void *);
extern int __get_user_2(void *);
extern int __get_user_4(void *);
+extern int __get_user_8(void *);
-#define __GUP_CLOBBER_1 "lr", "cc"
+#define __GUP_CLOBBER_1 "lr", "cc"
#ifdef CONFIG_CPU_USE_DOMAINS
-#define __GUP_CLOBBER_2 "ip", "lr", "cc"
+#define __GUP_CLOBBER_2 "ip", "lr", "cc"
#else
#define __GUP_CLOBBER_2 "lr", "cc"
#endif
-#define __GUP_CLOBBER_4 "lr", "cc"
+#define __GUP_CLOBBER_4 "lr", "cc"
+#define __GUP_CLOBBER_8 "lr", "cc"
#define __get_user_x(__r2,__p,__e,__l,__s) \
__asm__ __volatile__ ( \
@@ -129,7 +134,7 @@ extern int __get_user_4(void *);
({ \
unsigned long __limit = current_thread_info()->addr_limit - 1; \
register const typeof(*(p)) __user *__p asm("r0") = (p);\
- register unsigned long __r2 asm("r2"); \
+ register __inttype(*p) __r2 asm("r2"); \
register unsigned long __l asm("r1") = __limit; \
register int __e asm("r0"); \
switch (sizeof(*(__p))) { \
@@ -142,6 +147,9 @@ extern int __get_user_4(void *);
case 4: \
__get_user_x(__r2, __p, __e, __l, 4); \
break; \
+ case 8: \
+ __get_user_x(__r2, __p, __e, __l, 8); \
+ break; \
default: __e = __get_user_bad(); break; \
} \
x = (typeof(*(p))) __r2; \
@@ -150,6 +158,7 @@ extern int __get_user_4(void *);
#define get_user(x,p) \
({ \
+ __chk_user_ptr(ptr); \
might_fault(); \
__get_user_check(x,p); \
})
diff --git a/arch/arm/lib/getuser.S b/arch/arm/lib/getuser.S
index 9b06bb4..3583c83 100644
--- a/arch/arm/lib/getuser.S
+++ b/arch/arm/lib/getuser.S
@@ -18,7 +18,7 @@
* Inputs: r0 contains the address
* r1 contains the address limit, which must be preserved
* Outputs: r0 is the error code
- * r2 contains the zero-extended value
+ * r2/r3 contains the zero-extended value
* lr corrupted
*
* No other registers must be altered. (see <asm/uaccess.h>
@@ -32,6 +32,14 @@
#include <asm/errno.h>
#include <asm/domain.h>
+#ifdef __ARMEB__
+#define rlo8 r3
+#define rhi8 r2
+#else
+#define rlo8 r2
+#define rhi8 r3
+#endif
+
ENTRY(__get_user_1)
check_uaccess r0, 1, r1, r2, __get_user_bad
1: TUSER(ldrb) r2, [r0]
@@ -66,15 +74,38 @@ ENTRY(__get_user_4)
mov pc, lr
ENDPROC(__get_user_4)
+ENTRY(__get_user_8)
+ check_uaccess r0, 4, r1, r2, __get_user_bad8
+#ifdef CONFIG_CPU_USE_DOMAINS
+#define GU8_FIXUPS 5b, 6b
+5: ldrt rlo8, [r0], #4
+6: ldrt rhi8, [r0], #0
+#elif __LINUX_ARM_ARCH__ >= 6
+#define GU8_FIXUPS 5b
+5: ldrd r2, [r0]
+#else
+#define GU8_FIXUPS 5b, 6b
+5: ldr rlo8, [r0, #0]
+6: ldr rhi8, [r0, #4]
+#endif
+ mov r0, #0
+ mov pc, lr
+
+__get_user_bad8:
+ mov r3, #0
__get_user_bad:
mov r2, #0
mov r0, #-EFAULT
mov pc, lr
ENDPROC(__get_user_bad)
+ENDPROC(__get_user_bad8)
.pushsection __ex_table, "a"
.long 1b, __get_user_bad
.long 2b, __get_user_bad
.long 3b, __get_user_bad
.long 4b, __get_user_bad
+ .irp param, GU8_FIXUPS
+ .long \param, __get_user_bad8
+ .endr
.popsection
--
FTTC broadband for 0.8mile line: now at 9.7Mbps down 460kbps up... slowly
improving, and getting towards what was expected from it.
^ permalink raw reply related [flat|nested] 22+ messages in thread
* Re: [PATCH v3] ARM: add get_user() support for 8 byte types
2014-06-12 15:42 ` Daniel Thompson
@ 2014-06-12 17:04 ` Arnd Bergmann
-1 siblings, 0 replies; 22+ messages in thread
From: Arnd Bergmann @ 2014-06-12 17:04 UTC (permalink / raw)
To: linux-arm-kernel
Cc: Daniel Thompson, Russell King, Nicolas Pitre, linaro-kernel,
patches, linux-kernel, Rob Clark, Arnd Bergmann
On Thursday 12 June 2014 16:42:35 Daniel Thompson wrote:
> #define __get_user_check(x,p) \
> ({ \
> unsigned long __limit = current_thread_info()->addr_limit - 1; \
> register const typeof(*(p)) __user *__p asm("r0") = (p);\
> - register unsigned long __r2 asm("r2"); \
> + register typeof(x) __r2 asm("r2"); \
> register unsigned long __l asm("r1") = __limit; \
> register int __e asm("r0"); \
> switch (sizeof(*(__p))) { \
> @@ -142,6 +152,12 @@ extern int __get_user_4(void *);
> case 4: \
> __get_user_x(__r2, __p, __e, __l, 4); \
> break; \
> + case 8: \
> + if (sizeof((x)) < 8) \
> + __get_user_xb(__r2, __p, __e, __l, 4); \
> + else \
> + __get_user_x(__r2, __p, __e, __l, 8); \
> + break; \
> default: __e = __get_user_bad(); break; \
> } \
> x = (typeof(*(p))) __r2; \
>
I don't think there is a way to do this without copying the
__builtin_choose_expr() hack from x86.
Arnd
^ permalink raw reply [flat|nested] 22+ messages in thread
* [PATCH v3] ARM: add get_user() support for 8 byte types
@ 2014-06-12 17:04 ` Arnd Bergmann
0 siblings, 0 replies; 22+ messages in thread
From: Arnd Bergmann @ 2014-06-12 17:04 UTC (permalink / raw)
To: linux-arm-kernel
On Thursday 12 June 2014 16:42:35 Daniel Thompson wrote:
> #define __get_user_check(x,p) \
> ({ \
> unsigned long __limit = current_thread_info()->addr_limit - 1; \
> register const typeof(*(p)) __user *__p asm("r0") = (p);\
> - register unsigned long __r2 asm("r2"); \
> + register typeof(x) __r2 asm("r2"); \
> register unsigned long __l asm("r1") = __limit; \
> register int __e asm("r0"); \
> switch (sizeof(*(__p))) { \
> @@ -142,6 +152,12 @@ extern int __get_user_4(void *);
> case 4: \
> __get_user_x(__r2, __p, __e, __l, 4); \
> break; \
> + case 8: \
> + if (sizeof((x)) < 8) \
> + __get_user_xb(__r2, __p, __e, __l, 4); \
> + else \
> + __get_user_x(__r2, __p, __e, __l, 8); \
> + break; \
> default: __e = __get_user_bad(); break; \
> } \
> x = (typeof(*(p))) __r2; \
>
I don't think there is a way to do this without copying the
__builtin_choose_expr() hack from x86.
Arnd
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH v3] ARM: add get_user() support for 8 byte types
2014-06-12 15:58 ` Russell King - ARM Linux
@ 2014-06-17 10:17 ` Daniel Thompson
-1 siblings, 0 replies; 22+ messages in thread
From: Daniel Thompson @ 2014-06-17 10:17 UTC (permalink / raw)
To: Russell King - ARM Linux
Cc: Rob Clark, Nicolas Pitre, Arnd Bergmann, linux-arm-kernel,
linux-kernel, patches, linaro-kernel
On 12/06/14 16:58, Russell King - ARM Linux wrote:
> On Thu, Jun 12, 2014 at 04:42:35PM +0100, Daniel Thompson wrote:
>> A new atomic modeset/pageflip ioctl being developed in DRM requires
>> get_user() to work for 64bit types (in addition to just put_user()).
>>
>> v1: original
>> v2: pass correct size to check_uaccess, and better handling of narrowing
>> double word read with __get_user_xb() (Russell King's suggestion)
>> v3: fix a couple of checkpatch issues
>
> This is still unsafe.
>
>> #define __get_user_check(x,p) \
>> ({ \
>> unsigned long __limit = current_thread_info()->addr_limit - 1; \
>> register const typeof(*(p)) __user *__p asm("r0") = (p);\
>> - register unsigned long __r2 asm("r2"); \
>> + register typeof(x) __r2 asm("r2"); \
>
> So, __r2 becomes the type of 'x'. If 'x' is a 64-bit type, and *p is
> an 8-bit, 16-bit, or 32-bit type, this fails horribly by leaving the
> upper word of __r2 undefined.
It is true that at after the switch statement the contents of r3 are
undefined. However...
> register unsigned long __l asm("r1") = __limit; \
> register int __e asm("r0"); \
> switch (sizeof(*(__p))) { \
> @@ -142,6 +152,12 @@ extern int __get_user_4(void *);
> case 4: \
> __get_user_x(__r2, __p, __e, __l, 4); \
> break; \
> + case 8: \
> + if (sizeof((x)) < 8) \
> + __get_user_xb(__r2, __p, __e, __l, 4);\
> + else \
> + __get_user_x(__r2, __p, __e, __l, 8);\
> + break; \
> default: __e = __get_user_bad(); break; \
> } \
> x = (typeof(*(p))) __r2; \
... at this point there is a narrowing cast followed by an implicit
widening. This results in compiler either ignoring r3 altogether or, if
spilling to the stack, generating code to set r3 to zero before doing
the store.
I think this approach also makes 8-bit and 16-bit get_user() faster in
some cases where the type of *p and x are similar 8- or 16-bit types.
This is because the compiler will never generate a redundant narrowings.
Note that the speed improvement looks extremely marginal; the size of
the .text section (for a multi_v7_defconfig kernel) only gets 96 bytes
smaller (a.k.a. 0.0015%).
Daniel.
^ permalink raw reply [flat|nested] 22+ messages in thread
* [PATCH v3] ARM: add get_user() support for 8 byte types
@ 2014-06-17 10:17 ` Daniel Thompson
0 siblings, 0 replies; 22+ messages in thread
From: Daniel Thompson @ 2014-06-17 10:17 UTC (permalink / raw)
To: linux-arm-kernel
On 12/06/14 16:58, Russell King - ARM Linux wrote:
> On Thu, Jun 12, 2014 at 04:42:35PM +0100, Daniel Thompson wrote:
>> A new atomic modeset/pageflip ioctl being developed in DRM requires
>> get_user() to work for 64bit types (in addition to just put_user()).
>>
>> v1: original
>> v2: pass correct size to check_uaccess, and better handling of narrowing
>> double word read with __get_user_xb() (Russell King's suggestion)
>> v3: fix a couple of checkpatch issues
>
> This is still unsafe.
>
>> #define __get_user_check(x,p) \
>> ({ \
>> unsigned long __limit = current_thread_info()->addr_limit - 1; \
>> register const typeof(*(p)) __user *__p asm("r0") = (p);\
>> - register unsigned long __r2 asm("r2"); \
>> + register typeof(x) __r2 asm("r2"); \
>
> So, __r2 becomes the type of 'x'. If 'x' is a 64-bit type, and *p is
> an 8-bit, 16-bit, or 32-bit type, this fails horribly by leaving the
> upper word of __r2 undefined.
It is true that at after the switch statement the contents of r3 are
undefined. However...
> register unsigned long __l asm("r1") = __limit; \
> register int __e asm("r0"); \
> switch (sizeof(*(__p))) { \
> @@ -142,6 +152,12 @@ extern int __get_user_4(void *);
> case 4: \
> __get_user_x(__r2, __p, __e, __l, 4); \
> break; \
> + case 8: \
> + if (sizeof((x)) < 8) \
> + __get_user_xb(__r2, __p, __e, __l, 4);\
> + else \
> + __get_user_x(__r2, __p, __e, __l, 8);\
> + break; \
> default: __e = __get_user_bad(); break; \
> } \
> x = (typeof(*(p))) __r2; \
... at this point there is a narrowing cast followed by an implicit
widening. This results in compiler either ignoring r3 altogether or, if
spilling to the stack, generating code to set r3 to zero before doing
the store.
I think this approach also makes 8-bit and 16-bit get_user() faster in
some cases where the type of *p and x are similar 8- or 16-bit types.
This is because the compiler will never generate a redundant narrowings.
Note that the speed improvement looks extremely marginal; the size of
the .text section (for a multi_v7_defconfig kernel) only gets 96 bytes
smaller (a.k.a. 0.0015%).
Daniel.
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH v3] ARM: add get_user() support for 8 byte types
2014-06-17 10:17 ` Daniel Thompson
@ 2014-06-17 11:09 ` Russell King - ARM Linux
-1 siblings, 0 replies; 22+ messages in thread
From: Russell King - ARM Linux @ 2014-06-17 11:09 UTC (permalink / raw)
To: Daniel Thompson
Cc: Rob Clark, Nicolas Pitre, Arnd Bergmann, linux-arm-kernel,
linux-kernel, patches, linaro-kernel
On Tue, Jun 17, 2014 at 11:17:23AM +0100, Daniel Thompson wrote:
> ... at this point there is a narrowing cast followed by an implicit
> widening. This results in compiler either ignoring r3 altogether or, if
> spilling to the stack, generating code to set r3 to zero before doing
> the store.
In actual fact, there's very little difference between the two
implementations in terms of generated code.
The difference between them is what happens on the 64-bit big endian
narrowing case, where we use __get_user_4 with your version. This
adds one additional instruction.
The little endian case results in identical code except for register
usage - for example, with my test for a 32-bit being widened to 64-bit:
str lr, [sp, #-4]!
- mov r3, r0
+ mov ip, r0
mov r0, r1
#APP
@ 280 "t-getuser.c" 1
bl __get_user_4
@ 0 "" 2
- str r2, [r3, #0]
- mov r2, #0
- str r2, [r3, #4]
+ mov r3, #0
+ str r2, [ip, #0]
+ str r3, [ip, #4]
ldr pc, [sp], #4
and 64-bit narrowed to 32-bit:
str lr, [sp, #-4]!
- mov ip, r0
+ mov r3, r0
mov r0, r1
#APP
@ 275 "t-getuser.c" 1
- bl __get_user_8
+ bl __get_user_4
@ 0 "" 2
- str r2, [ip, #0]
+ str r2, [r3, #0]
ldr pc, [sp], #4
In terms of type checking, both seem to get it correct (which is something
I'm concerned about by any implementation since this is just as important
as the generated code).
--
FTTC broadband for 0.8mile line: now at 9.7Mbps down 460kbps up... slowly
improving, and getting towards what was expected from it.
^ permalink raw reply [flat|nested] 22+ messages in thread
* [PATCH v3] ARM: add get_user() support for 8 byte types
@ 2014-06-17 11:09 ` Russell King - ARM Linux
0 siblings, 0 replies; 22+ messages in thread
From: Russell King - ARM Linux @ 2014-06-17 11:09 UTC (permalink / raw)
To: linux-arm-kernel
On Tue, Jun 17, 2014 at 11:17:23AM +0100, Daniel Thompson wrote:
> ... at this point there is a narrowing cast followed by an implicit
> widening. This results in compiler either ignoring r3 altogether or, if
> spilling to the stack, generating code to set r3 to zero before doing
> the store.
In actual fact, there's very little difference between the two
implementations in terms of generated code.
The difference between them is what happens on the 64-bit big endian
narrowing case, where we use __get_user_4 with your version. This
adds one additional instruction.
The little endian case results in identical code except for register
usage - for example, with my test for a 32-bit being widened to 64-bit:
str lr, [sp, #-4]!
- mov r3, r0
+ mov ip, r0
mov r0, r1
#APP
@ 280 "t-getuser.c" 1
bl __get_user_4
@ 0 "" 2
- str r2, [r3, #0]
- mov r2, #0
- str r2, [r3, #4]
+ mov r3, #0
+ str r2, [ip, #0]
+ str r3, [ip, #4]
ldr pc, [sp], #4
and 64-bit narrowed to 32-bit:
str lr, [sp, #-4]!
- mov ip, r0
+ mov r3, r0
mov r0, r1
#APP
@ 275 "t-getuser.c" 1
- bl __get_user_8
+ bl __get_user_4
@ 0 "" 2
- str r2, [ip, #0]
+ str r2, [r3, #0]
ldr pc, [sp], #4
In terms of type checking, both seem to get it correct (which is something
I'm concerned about by any implementation since this is just as important
as the generated code).
--
FTTC broadband for 0.8mile line: now at 9.7Mbps down 460kbps up... slowly
improving, and getting towards what was expected from it.
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH v3] ARM: add get_user() support for 8 byte types
2014-06-17 11:09 ` Russell King - ARM Linux
@ 2014-06-17 13:28 ` Daniel Thompson
-1 siblings, 0 replies; 22+ messages in thread
From: Daniel Thompson @ 2014-06-17 13:28 UTC (permalink / raw)
To: Russell King - ARM Linux
Cc: Rob Clark, Nicolas Pitre, Arnd Bergmann, linux-arm-kernel,
linux-kernel, patches, linaro-kernel
On 17/06/14 12:09, Russell King - ARM Linux wrote:
> On Tue, Jun 17, 2014 at 11:17:23AM +0100, Daniel Thompson wrote:
>> ... at this point there is a narrowing cast followed by an implicit
>> widening. This results in compiler either ignoring r3 altogether or, if
>> spilling to the stack, generating code to set r3 to zero before doing
>> the store.
>
> In actual fact, there's very little difference between the two
> implementations in terms of generated code.
>
> The difference between them is what happens on the 64-bit big endian
> narrowing case, where we use __get_user_4 with your version. This
> adds one additional instruction.
Good point.
> and 64-bit narrowed to 32-bit:
>
> str lr, [sp, #-4]!
> - mov ip, r0
> + mov r3, r0
> mov r0, r1
> #APP
> @ 275 "t-getuser.c" 1
> - bl __get_user_8
> + bl __get_user_4
> @ 0 "" 2
> - str r2, [ip, #0]
> + str r2, [r3, #0]
> ldr pc, [sp], #4
The later case avoids allocating r3 for the __get_user_x and should
reduce register pressure and, potentially, saves a few instructions
elsewhere (one of my rather large test functions does demonstrate this
effect).
I don't know if we care about that. If we do I'm certainly happy to put
a patch together than exploits this (whilst avoiding the add in the big
endian case).
Daniel.
^ permalink raw reply [flat|nested] 22+ messages in thread
* [PATCH v3] ARM: add get_user() support for 8 byte types
@ 2014-06-17 13:28 ` Daniel Thompson
0 siblings, 0 replies; 22+ messages in thread
From: Daniel Thompson @ 2014-06-17 13:28 UTC (permalink / raw)
To: linux-arm-kernel
On 17/06/14 12:09, Russell King - ARM Linux wrote:
> On Tue, Jun 17, 2014 at 11:17:23AM +0100, Daniel Thompson wrote:
>> ... at this point there is a narrowing cast followed by an implicit
>> widening. This results in compiler either ignoring r3 altogether or, if
>> spilling to the stack, generating code to set r3 to zero before doing
>> the store.
>
> In actual fact, there's very little difference between the two
> implementations in terms of generated code.
>
> The difference between them is what happens on the 64-bit big endian
> narrowing case, where we use __get_user_4 with your version. This
> adds one additional instruction.
Good point.
> and 64-bit narrowed to 32-bit:
>
> str lr, [sp, #-4]!
> - mov ip, r0
> + mov r3, r0
> mov r0, r1
> #APP
> @ 275 "t-getuser.c" 1
> - bl __get_user_8
> + bl __get_user_4
> @ 0 "" 2
> - str r2, [ip, #0]
> + str r2, [r3, #0]
> ldr pc, [sp], #4
The later case avoids allocating r3 for the __get_user_x and should
reduce register pressure and, potentially, saves a few instructions
elsewhere (one of my rather large test functions does demonstrate this
effect).
I don't know if we care about that. If we do I'm certainly happy to put
a patch together than exploits this (whilst avoiding the add in the big
endian case).
Daniel.
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH v3] ARM: add get_user() support for 8 byte types
2014-06-17 13:28 ` Daniel Thompson
@ 2014-06-17 13:36 ` Russell King - ARM Linux
-1 siblings, 0 replies; 22+ messages in thread
From: Russell King - ARM Linux @ 2014-06-17 13:36 UTC (permalink / raw)
To: Daniel Thompson
Cc: Rob Clark, Nicolas Pitre, Arnd Bergmann, linux-arm-kernel,
linux-kernel, patches, linaro-kernel
On Tue, Jun 17, 2014 at 02:28:44PM +0100, Daniel Thompson wrote:
> On 17/06/14 12:09, Russell King - ARM Linux wrote:
> > On Tue, Jun 17, 2014 at 11:17:23AM +0100, Daniel Thompson wrote:
> >> ... at this point there is a narrowing cast followed by an implicit
> >> widening. This results in compiler either ignoring r3 altogether or, if
> >> spilling to the stack, generating code to set r3 to zero before doing
> >> the store.
> >
> > In actual fact, there's very little difference between the two
> > implementations in terms of generated code.
> >
> > The difference between them is what happens on the 64-bit big endian
> > narrowing case, where we use __get_user_4 with your version. This
> > adds one additional instruction.
>
> Good point.
>
>
> > and 64-bit narrowed to 32-bit:
> >
> > str lr, [sp, #-4]!
> > - mov ip, r0
> > + mov r3, r0
> > mov r0, r1
> > #APP
> > @ 275 "t-getuser.c" 1
> > - bl __get_user_8
> > + bl __get_user_4
> > @ 0 "" 2
> > - str r2, [ip, #0]
> > + str r2, [r3, #0]
> > ldr pc, [sp], #4
>
> The later case avoids allocating r3 for the __get_user_x and should
> reduce register pressure and, potentially, saves a few instructions
> elsewhere (one of my rather large test functions does demonstrate this
> effect).
>
> I don't know if we care about that. If we do I'm certainly happy to put
> a patch together than exploits this (whilst avoiding the add in the big
> endian case).
No need - the + case is your version, the - case is my version. So your
version wins on this point. :)
--
FTTC broadband for 0.8mile line: now at 9.7Mbps down 460kbps up... slowly
improving, and getting towards what was expected from it.
^ permalink raw reply [flat|nested] 22+ messages in thread
* [PATCH v3] ARM: add get_user() support for 8 byte types
@ 2014-06-17 13:36 ` Russell King - ARM Linux
0 siblings, 0 replies; 22+ messages in thread
From: Russell King - ARM Linux @ 2014-06-17 13:36 UTC (permalink / raw)
To: linux-arm-kernel
On Tue, Jun 17, 2014 at 02:28:44PM +0100, Daniel Thompson wrote:
> On 17/06/14 12:09, Russell King - ARM Linux wrote:
> > On Tue, Jun 17, 2014 at 11:17:23AM +0100, Daniel Thompson wrote:
> >> ... at this point there is a narrowing cast followed by an implicit
> >> widening. This results in compiler either ignoring r3 altogether or, if
> >> spilling to the stack, generating code to set r3 to zero before doing
> >> the store.
> >
> > In actual fact, there's very little difference between the two
> > implementations in terms of generated code.
> >
> > The difference between them is what happens on the 64-bit big endian
> > narrowing case, where we use __get_user_4 with your version. This
> > adds one additional instruction.
>
> Good point.
>
>
> > and 64-bit narrowed to 32-bit:
> >
> > str lr, [sp, #-4]!
> > - mov ip, r0
> > + mov r3, r0
> > mov r0, r1
> > #APP
> > @ 275 "t-getuser.c" 1
> > - bl __get_user_8
> > + bl __get_user_4
> > @ 0 "" 2
> > - str r2, [ip, #0]
> > + str r2, [r3, #0]
> > ldr pc, [sp], #4
>
> The later case avoids allocating r3 for the __get_user_x and should
> reduce register pressure and, potentially, saves a few instructions
> elsewhere (one of my rather large test functions does demonstrate this
> effect).
>
> I don't know if we care about that. If we do I'm certainly happy to put
> a patch together than exploits this (whilst avoiding the add in the big
> endian case).
No need - the + case is your version, the - case is my version. So your
version wins on this point. :)
--
FTTC broadband for 0.8mile line: now at 9.7Mbps down 460kbps up... slowly
improving, and getting towards what was expected from it.
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH v3] ARM: add get_user() support for 8 byte types
2014-06-17 13:36 ` Russell King - ARM Linux
@ 2014-06-17 13:54 ` Daniel Thompson
-1 siblings, 0 replies; 22+ messages in thread
From: Daniel Thompson @ 2014-06-17 13:54 UTC (permalink / raw)
To: Russell King - ARM Linux
Cc: Rob Clark, Nicolas Pitre, Arnd Bergmann, linux-arm-kernel,
linux-kernel, patches, linaro-kernel
On 17/06/14 14:36, Russell King - ARM Linux wrote:
> On Tue, Jun 17, 2014 at 02:28:44PM +0100, Daniel Thompson wrote:
>> On 17/06/14 12:09, Russell King - ARM Linux wrote:
>>> On Tue, Jun 17, 2014 at 11:17:23AM +0100, Daniel Thompson wrote:
>>>> ... at this point there is a narrowing cast followed by an implicit
>>>> widening. This results in compiler either ignoring r3 altogether or, if
>>>> spilling to the stack, generating code to set r3 to zero before doing
>>>> the store.
>>>
>>> In actual fact, there's very little difference between the two
>>> implementations in terms of generated code.
>>>
>>> The difference between them is what happens on the 64-bit big endian
>>> narrowing case, where we use __get_user_4 with your version. This
>>> adds one additional instruction.
>>
>> Good point.
>>
>>
>>> and 64-bit narrowed to 32-bit:
>>>
>>> str lr, [sp, #-4]!
>>> - mov ip, r0
>>> + mov r3, r0
>>> mov r0, r1
>>> #APP
>>> @ 275 "t-getuser.c" 1
>>> - bl __get_user_8
>>> + bl __get_user_4
>>> @ 0 "" 2
>>> - str r2, [ip, #0]
>>> + str r2, [r3, #0]
>>> ldr pc, [sp], #4
>>
>> The later case avoids allocating r3 for the __get_user_x and should
>> reduce register pressure and, potentially, saves a few instructions
>> elsewhere (one of my rather large test functions does demonstrate this
>> effect).
>>
>> I don't know if we care about that. If we do I'm certainly happy to put
>> a patch together than exploits this (whilst avoiding the add in the big
>> endian case).
>
> No need - the + case is your version, the - case is my version. So your
> version wins on this point. :)
:) Thanks, although credit really goes to Rob Clark...
I think currently:
1. Rob's patch is better for register pressure in the narrowing case
(above).
2. Your patch is probably better for big endian due to the add in Rob's
version. I say probably because, without proof, I suspect the cost
of the add would in most cases outweigh the register pressure
benefit.
3. Your patch has better implementation of __get_user_8 (it uses ldrd).
Hence I'm suspect we need to combine elements from both patches.
^ permalink raw reply [flat|nested] 22+ messages in thread
* [PATCH v3] ARM: add get_user() support for 8 byte types
@ 2014-06-17 13:54 ` Daniel Thompson
0 siblings, 0 replies; 22+ messages in thread
From: Daniel Thompson @ 2014-06-17 13:54 UTC (permalink / raw)
To: linux-arm-kernel
On 17/06/14 14:36, Russell King - ARM Linux wrote:
> On Tue, Jun 17, 2014 at 02:28:44PM +0100, Daniel Thompson wrote:
>> On 17/06/14 12:09, Russell King - ARM Linux wrote:
>>> On Tue, Jun 17, 2014 at 11:17:23AM +0100, Daniel Thompson wrote:
>>>> ... at this point there is a narrowing cast followed by an implicit
>>>> widening. This results in compiler either ignoring r3 altogether or, if
>>>> spilling to the stack, generating code to set r3 to zero before doing
>>>> the store.
>>>
>>> In actual fact, there's very little difference between the two
>>> implementations in terms of generated code.
>>>
>>> The difference between them is what happens on the 64-bit big endian
>>> narrowing case, where we use __get_user_4 with your version. This
>>> adds one additional instruction.
>>
>> Good point.
>>
>>
>>> and 64-bit narrowed to 32-bit:
>>>
>>> str lr, [sp, #-4]!
>>> - mov ip, r0
>>> + mov r3, r0
>>> mov r0, r1
>>> #APP
>>> @ 275 "t-getuser.c" 1
>>> - bl __get_user_8
>>> + bl __get_user_4
>>> @ 0 "" 2
>>> - str r2, [ip, #0]
>>> + str r2, [r3, #0]
>>> ldr pc, [sp], #4
>>
>> The later case avoids allocating r3 for the __get_user_x and should
>> reduce register pressure and, potentially, saves a few instructions
>> elsewhere (one of my rather large test functions does demonstrate this
>> effect).
>>
>> I don't know if we care about that. If we do I'm certainly happy to put
>> a patch together than exploits this (whilst avoiding the add in the big
>> endian case).
>
> No need - the + case is your version, the - case is my version. So your
> version wins on this point. :)
:) Thanks, although credit really goes to Rob Clark...
I think currently:
1. Rob's patch is better for register pressure in the narrowing case
(above).
2. Your patch is probably better for big endian due to the add in Rob's
version. I say probably because, without proof, I suspect the cost
of the add would in most cases outweigh the register pressure
benefit.
3. Your patch has better implementation of __get_user_8 (it uses ldrd).
Hence I'm suspect we need to combine elements from both patches.
^ permalink raw reply [flat|nested] 22+ messages in thread
* [PATCH v4] ARM: add get_user() support for 8 byte types
2014-06-12 15:42 ` Daniel Thompson
@ 2014-06-20 10:01 ` Daniel Thompson
-1 siblings, 0 replies; 22+ messages in thread
From: Daniel Thompson @ 2014-06-20 10:01 UTC (permalink / raw)
To: Russell King
Cc: Daniel Thompson, Rob Clark, Nicolas Pitre, Arnd Bergmann,
John Stultz, linux-arm-kernel, linux-kernel, patches,
linaro-kernel
Recent contributions, including to DRM and binder, introduce 64-bit
values in their interfaces. Common motivation example of this is to allow
the same ABI for 32- and 64-bit userspaces (and therefore also a shared
ABI for 32/64 hybrid userspaces). Anyhow, the developers would like to
avoid gotchas like having to use copy_from_user().
This feature is already implemented on x86-32 and the majority of other
32-bit architectures. The current list of get_user_8 hold out
architectures are: arm, avr32, blackfin, m32r, metag, microblaze,
mn10300, sh.
Credit:
My name sits rather uneasily at the top of this patch. The v1 and
v2 versions of the patch were written by Rob Clark and to produce v4
I mostly copied code from Russell King and H. Peter Anvin. However I
have mangled the patch sufficiently that *blame* is rightfully mine
even if credit should more widely shared.
Changelog:
v4: remove an inlined add on big endian systems (spotted by Russell King),
used __ARMEB__ rather than BIG_ENDIAN (to match rest of file),
cleared r3 on EFAULT during __get_user_8.
v3: fix a couple of checkpatch issues
v2: pass correct size to check_uaccess, and better handling of narrowing
double word read with __get_user_xb() (Russell King's suggestion)
v1: original
Signed-off-by: Rob Clark <robdclark@gmail.com>
Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>
Cc: Russell King - ARM Linux <linux@arm.linux.org.uk>
---
arch/arm/include/asm/uaccess.h | 20 +++++++++++++++++++-
arch/arm/lib/getuser.S | 37 ++++++++++++++++++++++++++++++++++++-
2 files changed, 55 insertions(+), 2 deletions(-)
diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h
index 75d9579..7057cf8 100644
--- a/arch/arm/include/asm/uaccess.h
+++ b/arch/arm/include/asm/uaccess.h
@@ -107,6 +107,8 @@ static inline void set_fs(mm_segment_t fs)
extern int __get_user_1(void *);
extern int __get_user_2(void *);
extern int __get_user_4(void *);
+extern int __get_user_lo8(void *);
+extern int __get_user_8(void *);
#define __GUP_CLOBBER_1 "lr", "cc"
#ifdef CONFIG_CPU_USE_DOMAINS
@@ -115,6 +117,8 @@ extern int __get_user_4(void *);
#define __GUP_CLOBBER_2 "lr", "cc"
#endif
#define __GUP_CLOBBER_4 "lr", "cc"
+#define __GUP_CLOBBER_lo8 "lr", "cc"
+#define __GUP_CLOBBER_8 "lr", "cc"
#define __get_user_x(__r2,__p,__e,__l,__s) \
__asm__ __volatile__ ( \
@@ -125,11 +129,19 @@ extern int __get_user_4(void *);
: "0" (__p), "r" (__l) \
: __GUP_CLOBBER_##__s)
+/* narrowing a double-word get into a single 32bit word register: */
+#ifdef __ARMEB__
+#define __get_user_xb(__r2, __p, __e, __l, __s) \
+ __get_user_x(__r2, __p, __e, __l, lo8)
+#else
+#define __get_user_xb __get_user_x
+#endif
+
#define __get_user_check(x,p) \
({ \
unsigned long __limit = current_thread_info()->addr_limit - 1; \
register const typeof(*(p)) __user *__p asm("r0") = (p);\
- register unsigned long __r2 asm("r2"); \
+ register typeof(x) __r2 asm("r2"); \
register unsigned long __l asm("r1") = __limit; \
register int __e asm("r0"); \
switch (sizeof(*(__p))) { \
@@ -142,6 +154,12 @@ extern int __get_user_4(void *);
case 4: \
__get_user_x(__r2, __p, __e, __l, 4); \
break; \
+ case 8: \
+ if (sizeof((x)) < 8) \
+ __get_user_xb(__r2, __p, __e, __l, 4); \
+ else \
+ __get_user_x(__r2, __p, __e, __l, 8); \
+ break; \
default: __e = __get_user_bad(); break; \
} \
x = (typeof(*(p))) __r2; \
diff --git a/arch/arm/lib/getuser.S b/arch/arm/lib/getuser.S
index 9b06bb4..82b024a 100644
--- a/arch/arm/lib/getuser.S
+++ b/arch/arm/lib/getuser.S
@@ -18,7 +18,7 @@
* Inputs: r0 contains the address
* r1 contains the address limit, which must be preserved
* Outputs: r0 is the error code
- * r2 contains the zero-extended value
+ * r2, r3 contains the zero-extended value
* lr corrupted
*
* No other registers must be altered. (see <asm/uaccess.h>
@@ -66,15 +66,50 @@ ENTRY(__get_user_4)
mov pc, lr
ENDPROC(__get_user_4)
+ENTRY(__get_user_8)
+ check_uaccess r0, 8, r1, r2, __get_user_bad
+#ifdef CONFIG_THUMB2_KERNEL
+5: TUSER(ldr) r2, [r0]
+6: TUSER(ldr) r3, [r0, #4]
+#else
+5: TUSER(ldr) r2, [r0], #4
+6: TUSER(ldr) r3, [r0]
+#endif
+ mov r0, #0
+ mov pc, lr
+ENDPROC(__get_user_8)
+
+#ifdef __ARMEB__
+ENTRY(__get_user_lo8)
+ check_uaccess r0, 8, r1, r2, __get_user_bad
+#ifdef CONFIG_CPU_USE_DOMAINS
+ add r0, r0, #4
+7: ldrt r2, [r0]
+#else
+7: ldr r2, [r0, #4]
+#endif
+ mov r0, #0
+ mov pc, lr
+ENDPROC(__get_user_lo8)
+#endif
+
+__get_user_bad8:
+ mov r3, #0
__get_user_bad:
mov r2, #0
mov r0, #-EFAULT
mov pc, lr
ENDPROC(__get_user_bad)
+ENDPROC(__get_user_bad8)
.pushsection __ex_table, "a"
.long 1b, __get_user_bad
.long 2b, __get_user_bad
.long 3b, __get_user_bad
.long 4b, __get_user_bad
+ .long 5b, __get_user_bad8
+ .long 6b, __get_user_bad8
+#ifdef __ARMEB__
+ .long 7b, __get_user_bad
+#endif
.popsection
--
1.9.3
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [PATCH v4] ARM: add get_user() support for 8 byte types
@ 2014-06-20 10:01 ` Daniel Thompson
0 siblings, 0 replies; 22+ messages in thread
From: Daniel Thompson @ 2014-06-20 10:01 UTC (permalink / raw)
To: linux-arm-kernel
Recent contributions, including to DRM and binder, introduce 64-bit
values in their interfaces. Common motivation example of this is to allow
the same ABI for 32- and 64-bit userspaces (and therefore also a shared
ABI for 32/64 hybrid userspaces). Anyhow, the developers would like to
avoid gotchas like having to use copy_from_user().
This feature is already implemented on x86-32 and the majority of other
32-bit architectures. The current list of get_user_8 hold out
architectures are: arm, avr32, blackfin, m32r, metag, microblaze,
mn10300, sh.
Credit:
My name sits rather uneasily at the top of this patch. The v1 and
v2 versions of the patch were written by Rob Clark and to produce v4
I mostly copied code from Russell King and H. Peter Anvin. However I
have mangled the patch sufficiently that *blame* is rightfully mine
even if credit should more widely shared.
Changelog:
v4: remove an inlined add on big endian systems (spotted by Russell King),
used __ARMEB__ rather than BIG_ENDIAN (to match rest of file),
cleared r3 on EFAULT during __get_user_8.
v3: fix a couple of checkpatch issues
v2: pass correct size to check_uaccess, and better handling of narrowing
double word read with __get_user_xb() (Russell King's suggestion)
v1: original
Signed-off-by: Rob Clark <robdclark@gmail.com>
Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>
Cc: Russell King - ARM Linux <linux@arm.linux.org.uk>
---
arch/arm/include/asm/uaccess.h | 20 +++++++++++++++++++-
arch/arm/lib/getuser.S | 37 ++++++++++++++++++++++++++++++++++++-
2 files changed, 55 insertions(+), 2 deletions(-)
diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h
index 75d9579..7057cf8 100644
--- a/arch/arm/include/asm/uaccess.h
+++ b/arch/arm/include/asm/uaccess.h
@@ -107,6 +107,8 @@ static inline void set_fs(mm_segment_t fs)
extern int __get_user_1(void *);
extern int __get_user_2(void *);
extern int __get_user_4(void *);
+extern int __get_user_lo8(void *);
+extern int __get_user_8(void *);
#define __GUP_CLOBBER_1 "lr", "cc"
#ifdef CONFIG_CPU_USE_DOMAINS
@@ -115,6 +117,8 @@ extern int __get_user_4(void *);
#define __GUP_CLOBBER_2 "lr", "cc"
#endif
#define __GUP_CLOBBER_4 "lr", "cc"
+#define __GUP_CLOBBER_lo8 "lr", "cc"
+#define __GUP_CLOBBER_8 "lr", "cc"
#define __get_user_x(__r2,__p,__e,__l,__s) \
__asm__ __volatile__ ( \
@@ -125,11 +129,19 @@ extern int __get_user_4(void *);
: "0" (__p), "r" (__l) \
: __GUP_CLOBBER_##__s)
+/* narrowing a double-word get into a single 32bit word register: */
+#ifdef __ARMEB__
+#define __get_user_xb(__r2, __p, __e, __l, __s) \
+ __get_user_x(__r2, __p, __e, __l, lo8)
+#else
+#define __get_user_xb __get_user_x
+#endif
+
#define __get_user_check(x,p) \
({ \
unsigned long __limit = current_thread_info()->addr_limit - 1; \
register const typeof(*(p)) __user *__p asm("r0") = (p);\
- register unsigned long __r2 asm("r2"); \
+ register typeof(x) __r2 asm("r2"); \
register unsigned long __l asm("r1") = __limit; \
register int __e asm("r0"); \
switch (sizeof(*(__p))) { \
@@ -142,6 +154,12 @@ extern int __get_user_4(void *);
case 4: \
__get_user_x(__r2, __p, __e, __l, 4); \
break; \
+ case 8: \
+ if (sizeof((x)) < 8) \
+ __get_user_xb(__r2, __p, __e, __l, 4); \
+ else \
+ __get_user_x(__r2, __p, __e, __l, 8); \
+ break; \
default: __e = __get_user_bad(); break; \
} \
x = (typeof(*(p))) __r2; \
diff --git a/arch/arm/lib/getuser.S b/arch/arm/lib/getuser.S
index 9b06bb4..82b024a 100644
--- a/arch/arm/lib/getuser.S
+++ b/arch/arm/lib/getuser.S
@@ -18,7 +18,7 @@
* Inputs: r0 contains the address
* r1 contains the address limit, which must be preserved
* Outputs: r0 is the error code
- * r2 contains the zero-extended value
+ * r2, r3 contains the zero-extended value
* lr corrupted
*
* No other registers must be altered. (see <asm/uaccess.h>
@@ -66,15 +66,50 @@ ENTRY(__get_user_4)
mov pc, lr
ENDPROC(__get_user_4)
+ENTRY(__get_user_8)
+ check_uaccess r0, 8, r1, r2, __get_user_bad
+#ifdef CONFIG_THUMB2_KERNEL
+5: TUSER(ldr) r2, [r0]
+6: TUSER(ldr) r3, [r0, #4]
+#else
+5: TUSER(ldr) r2, [r0], #4
+6: TUSER(ldr) r3, [r0]
+#endif
+ mov r0, #0
+ mov pc, lr
+ENDPROC(__get_user_8)
+
+#ifdef __ARMEB__
+ENTRY(__get_user_lo8)
+ check_uaccess r0, 8, r1, r2, __get_user_bad
+#ifdef CONFIG_CPU_USE_DOMAINS
+ add r0, r0, #4
+7: ldrt r2, [r0]
+#else
+7: ldr r2, [r0, #4]
+#endif
+ mov r0, #0
+ mov pc, lr
+ENDPROC(__get_user_lo8)
+#endif
+
+__get_user_bad8:
+ mov r3, #0
__get_user_bad:
mov r2, #0
mov r0, #-EFAULT
mov pc, lr
ENDPROC(__get_user_bad)
+ENDPROC(__get_user_bad8)
.pushsection __ex_table, "a"
.long 1b, __get_user_bad
.long 2b, __get_user_bad
.long 3b, __get_user_bad
.long 4b, __get_user_bad
+ .long 5b, __get_user_bad8
+ .long 6b, __get_user_bad8
+#ifdef __ARMEB__
+ .long 7b, __get_user_bad
+#endif
.popsection
--
1.9.3
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [PATCH 3.16.0-rc3-rmk v5] ARM: add get_user() support for 8 byte types
2014-06-20 10:01 ` Daniel Thompson
@ 2014-07-10 19:47 ` Daniel Thompson
-1 siblings, 0 replies; 22+ messages in thread
From: Daniel Thompson @ 2014-07-10 19:47 UTC (permalink / raw)
To: Russell King
Cc: Daniel Thompson, Rob Clark, Nicolas Pitre, Arnd Bergmann,
John Stultz, linux-arm-kernel, linux-kernel, patches,
linaro-kernel
Recent contributions, including to DRM and binder, introduce 64-bit
values in their interfaces. A common motivation for this is to allow
the same ABI for 32- and 64-bit userspaces (and therefore also a shared
ABI for 32/64 hybrid userspaces). Anyhow, the developers would like to
avoid gotchas like having to use copy_from_user().
This feature is already implemented on x86-32 and the majority of other
32-bit architectures. The current list of get_user_8 hold out
architectures are: arm, avr32, blackfin, m32r, metag, microblaze,
mn10300, sh.
Credit:
My name sits rather uneasily at the top of this patch. The v1 and
v2 versions of the patch were written by Rob Clark and to produce v4
I mostly copied code from Russell King and H. Peter Anvin. However I
have mangled the patch sufficiently that *blame* is rightfully mine
even if credit should more widely shared.
Changelog:
v5: updated to use the ret macro (requested by Russell King)
v4: remove an inlined add on big endian systems (spotted by Russell King),
used __ARMEB__ rather than BIG_ENDIAN (to match rest of file),
cleared r3 on EFAULT during __get_user_8.
v3: fix a couple of checkpatch issues
v2: pass correct size to check_uaccess, and better handling of narrowing
double word read with __get_user_xb() (Russell King's suggestion)
v1: original
Signed-off-by: Rob Clark <robdclark@gmail.com>
Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>
Cc: Russell King - ARM Linux <linux@arm.linux.org.uk>
---
arch/arm/include/asm/uaccess.h | 20 +++++++++++++++++++-
arch/arm/lib/getuser.S | 37 ++++++++++++++++++++++++++++++++++++-
2 files changed, 55 insertions(+), 2 deletions(-)
diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h
index 75d9579..7057cf8 100644
--- a/arch/arm/include/asm/uaccess.h
+++ b/arch/arm/include/asm/uaccess.h
@@ -107,6 +107,8 @@ static inline void set_fs(mm_segment_t fs)
extern int __get_user_1(void *);
extern int __get_user_2(void *);
extern int __get_user_4(void *);
+extern int __get_user_lo8(void *);
+extern int __get_user_8(void *);
#define __GUP_CLOBBER_1 "lr", "cc"
#ifdef CONFIG_CPU_USE_DOMAINS
@@ -115,6 +117,8 @@ extern int __get_user_4(void *);
#define __GUP_CLOBBER_2 "lr", "cc"
#endif
#define __GUP_CLOBBER_4 "lr", "cc"
+#define __GUP_CLOBBER_lo8 "lr", "cc"
+#define __GUP_CLOBBER_8 "lr", "cc"
#define __get_user_x(__r2,__p,__e,__l,__s) \
__asm__ __volatile__ ( \
@@ -125,11 +129,19 @@ extern int __get_user_4(void *);
: "0" (__p), "r" (__l) \
: __GUP_CLOBBER_##__s)
+/* narrowing a double-word get into a single 32bit word register: */
+#ifdef __ARMEB__
+#define __get_user_xb(__r2, __p, __e, __l, __s) \
+ __get_user_x(__r2, __p, __e, __l, lo8)
+#else
+#define __get_user_xb __get_user_x
+#endif
+
#define __get_user_check(x,p) \
({ \
unsigned long __limit = current_thread_info()->addr_limit - 1; \
register const typeof(*(p)) __user *__p asm("r0") = (p);\
- register unsigned long __r2 asm("r2"); \
+ register typeof(x) __r2 asm("r2"); \
register unsigned long __l asm("r1") = __limit; \
register int __e asm("r0"); \
switch (sizeof(*(__p))) { \
@@ -142,6 +154,12 @@ extern int __get_user_4(void *);
case 4: \
__get_user_x(__r2, __p, __e, __l, 4); \
break; \
+ case 8: \
+ if (sizeof((x)) < 8) \
+ __get_user_xb(__r2, __p, __e, __l, 4); \
+ else \
+ __get_user_x(__r2, __p, __e, __l, 8); \
+ break; \
default: __e = __get_user_bad(); break; \
} \
x = (typeof(*(p))) __r2; \
diff --git a/arch/arm/lib/getuser.S b/arch/arm/lib/getuser.S
index 0f958e3..9386000 100644
--- a/arch/arm/lib/getuser.S
+++ b/arch/arm/lib/getuser.S
@@ -18,7 +18,7 @@
* Inputs: r0 contains the address
* r1 contains the address limit, which must be preserved
* Outputs: r0 is the error code
- * r2 contains the zero-extended value
+ * r2, r3 contains the zero-extended value
* lr corrupted
*
* No other registers must be altered. (see <asm/uaccess.h>
@@ -66,15 +66,50 @@ ENTRY(__get_user_4)
ret lr
ENDPROC(__get_user_4)
+ENTRY(__get_user_8)
+ check_uaccess r0, 8, r1, r2, __get_user_bad
+#ifdef CONFIG_THUMB2_KERNEL
+5: TUSER(ldr) r2, [r0]
+6: TUSER(ldr) r3, [r0, #4]
+#else
+5: TUSER(ldr) r2, [r0], #4
+6: TUSER(ldr) r3, [r0]
+#endif
+ mov r0, #0
+ ret lr
+ENDPROC(__get_user_8)
+
+#ifdef __ARMEB__
+ENTRY(__get_user_lo8)
+ check_uaccess r0, 8, r1, r2, __get_user_bad
+#ifdef CONFIG_CPU_USE_DOMAINS
+ add r0, r0, #4
+7: ldrt r2, [r0]
+#else
+7: ldr r2, [r0, #4]
+#endif
+ mov r0, #0
+ ret lr
+ENDPROC(__get_user_lo8)
+#endif
+
+__get_user_bad8:
+ mov r3, #0
__get_user_bad:
mov r2, #0
mov r0, #-EFAULT
ret lr
ENDPROC(__get_user_bad)
+ENDPROC(__get_user_bad8)
.pushsection __ex_table, "a"
.long 1b, __get_user_bad
.long 2b, __get_user_bad
.long 3b, __get_user_bad
.long 4b, __get_user_bad
+ .long 5b, __get_user_bad8
+ .long 6b, __get_user_bad8
+#ifdef __ARMEB__
+ .long 7b, __get_user_bad
+#endif
.popsection
--
1.9.3
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [PATCH 3.16.0-rc3-rmk v5] ARM: add get_user() support for 8 byte types
@ 2014-07-10 19:47 ` Daniel Thompson
0 siblings, 0 replies; 22+ messages in thread
From: Daniel Thompson @ 2014-07-10 19:47 UTC (permalink / raw)
To: linux-arm-kernel
Recent contributions, including to DRM and binder, introduce 64-bit
values in their interfaces. A common motivation for this is to allow
the same ABI for 32- and 64-bit userspaces (and therefore also a shared
ABI for 32/64 hybrid userspaces). Anyhow, the developers would like to
avoid gotchas like having to use copy_from_user().
This feature is already implemented on x86-32 and the majority of other
32-bit architectures. The current list of get_user_8 hold out
architectures are: arm, avr32, blackfin, m32r, metag, microblaze,
mn10300, sh.
Credit:
My name sits rather uneasily at the top of this patch. The v1 and
v2 versions of the patch were written by Rob Clark and to produce v4
I mostly copied code from Russell King and H. Peter Anvin. However I
have mangled the patch sufficiently that *blame* is rightfully mine
even if credit should more widely shared.
Changelog:
v5: updated to use the ret macro (requested by Russell King)
v4: remove an inlined add on big endian systems (spotted by Russell King),
used __ARMEB__ rather than BIG_ENDIAN (to match rest of file),
cleared r3 on EFAULT during __get_user_8.
v3: fix a couple of checkpatch issues
v2: pass correct size to check_uaccess, and better handling of narrowing
double word read with __get_user_xb() (Russell King's suggestion)
v1: original
Signed-off-by: Rob Clark <robdclark@gmail.com>
Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>
Cc: Russell King - ARM Linux <linux@arm.linux.org.uk>
---
arch/arm/include/asm/uaccess.h | 20 +++++++++++++++++++-
arch/arm/lib/getuser.S | 37 ++++++++++++++++++++++++++++++++++++-
2 files changed, 55 insertions(+), 2 deletions(-)
diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h
index 75d9579..7057cf8 100644
--- a/arch/arm/include/asm/uaccess.h
+++ b/arch/arm/include/asm/uaccess.h
@@ -107,6 +107,8 @@ static inline void set_fs(mm_segment_t fs)
extern int __get_user_1(void *);
extern int __get_user_2(void *);
extern int __get_user_4(void *);
+extern int __get_user_lo8(void *);
+extern int __get_user_8(void *);
#define __GUP_CLOBBER_1 "lr", "cc"
#ifdef CONFIG_CPU_USE_DOMAINS
@@ -115,6 +117,8 @@ extern int __get_user_4(void *);
#define __GUP_CLOBBER_2 "lr", "cc"
#endif
#define __GUP_CLOBBER_4 "lr", "cc"
+#define __GUP_CLOBBER_lo8 "lr", "cc"
+#define __GUP_CLOBBER_8 "lr", "cc"
#define __get_user_x(__r2,__p,__e,__l,__s) \
__asm__ __volatile__ ( \
@@ -125,11 +129,19 @@ extern int __get_user_4(void *);
: "0" (__p), "r" (__l) \
: __GUP_CLOBBER_##__s)
+/* narrowing a double-word get into a single 32bit word register: */
+#ifdef __ARMEB__
+#define __get_user_xb(__r2, __p, __e, __l, __s) \
+ __get_user_x(__r2, __p, __e, __l, lo8)
+#else
+#define __get_user_xb __get_user_x
+#endif
+
#define __get_user_check(x,p) \
({ \
unsigned long __limit = current_thread_info()->addr_limit - 1; \
register const typeof(*(p)) __user *__p asm("r0") = (p);\
- register unsigned long __r2 asm("r2"); \
+ register typeof(x) __r2 asm("r2"); \
register unsigned long __l asm("r1") = __limit; \
register int __e asm("r0"); \
switch (sizeof(*(__p))) { \
@@ -142,6 +154,12 @@ extern int __get_user_4(void *);
case 4: \
__get_user_x(__r2, __p, __e, __l, 4); \
break; \
+ case 8: \
+ if (sizeof((x)) < 8) \
+ __get_user_xb(__r2, __p, __e, __l, 4); \
+ else \
+ __get_user_x(__r2, __p, __e, __l, 8); \
+ break; \
default: __e = __get_user_bad(); break; \
} \
x = (typeof(*(p))) __r2; \
diff --git a/arch/arm/lib/getuser.S b/arch/arm/lib/getuser.S
index 0f958e3..9386000 100644
--- a/arch/arm/lib/getuser.S
+++ b/arch/arm/lib/getuser.S
@@ -18,7 +18,7 @@
* Inputs: r0 contains the address
* r1 contains the address limit, which must be preserved
* Outputs: r0 is the error code
- * r2 contains the zero-extended value
+ * r2, r3 contains the zero-extended value
* lr corrupted
*
* No other registers must be altered. (see <asm/uaccess.h>
@@ -66,15 +66,50 @@ ENTRY(__get_user_4)
ret lr
ENDPROC(__get_user_4)
+ENTRY(__get_user_8)
+ check_uaccess r0, 8, r1, r2, __get_user_bad
+#ifdef CONFIG_THUMB2_KERNEL
+5: TUSER(ldr) r2, [r0]
+6: TUSER(ldr) r3, [r0, #4]
+#else
+5: TUSER(ldr) r2, [r0], #4
+6: TUSER(ldr) r3, [r0]
+#endif
+ mov r0, #0
+ ret lr
+ENDPROC(__get_user_8)
+
+#ifdef __ARMEB__
+ENTRY(__get_user_lo8)
+ check_uaccess r0, 8, r1, r2, __get_user_bad
+#ifdef CONFIG_CPU_USE_DOMAINS
+ add r0, r0, #4
+7: ldrt r2, [r0]
+#else
+7: ldr r2, [r0, #4]
+#endif
+ mov r0, #0
+ ret lr
+ENDPROC(__get_user_lo8)
+#endif
+
+__get_user_bad8:
+ mov r3, #0
__get_user_bad:
mov r2, #0
mov r0, #-EFAULT
ret lr
ENDPROC(__get_user_bad)
+ENDPROC(__get_user_bad8)
.pushsection __ex_table, "a"
.long 1b, __get_user_bad
.long 2b, __get_user_bad
.long 3b, __get_user_bad
.long 4b, __get_user_bad
+ .long 5b, __get_user_bad8
+ .long 6b, __get_user_bad8
+#ifdef __ARMEB__
+ .long 7b, __get_user_bad
+#endif
.popsection
--
1.9.3
^ permalink raw reply related [flat|nested] 22+ messages in thread
* Re: [PATCH 3.16.0-rc3-rmk v5] ARM: add get_user() support for 8 byte types
2014-07-10 19:47 ` Daniel Thompson
@ 2014-08-21 5:36 ` Victor Kamensky
-1 siblings, 0 replies; 22+ messages in thread
From: Victor Kamensky @ 2014-08-21 5:36 UTC (permalink / raw)
To: Daniel Thompson
Cc: Russell King, Nicolas Pitre, linaro-kernel, Patch Tracking,
open list, Rob Clark, John Stultz, Arnd Bergmann,
linux-arm-kernel
On 10 July 2014 12:47, Daniel Thompson <daniel.thompson@linaro.org> wrote:
> Recent contributions, including to DRM and binder, introduce 64-bit
> values in their interfaces. A common motivation for this is to allow
> the same ABI for 32- and 64-bit userspaces (and therefore also a shared
> ABI for 32/64 hybrid userspaces). Anyhow, the developers would like to
> avoid gotchas like having to use copy_from_user().
>
> This feature is already implemented on x86-32 and the majority of other
> 32-bit architectures. The current list of get_user_8 hold out
> architectures are: arm, avr32, blackfin, m32r, metag, microblaze,
> mn10300, sh.
>
> Credit:
>
> My name sits rather uneasily at the top of this patch. The v1 and
> v2 versions of the patch were written by Rob Clark and to produce v4
> I mostly copied code from Russell King and H. Peter Anvin. However I
> have mangled the patch sufficiently that *blame* is rightfully mine
> even if credit should more widely shared.
>
> Changelog:
>
> v5: updated to use the ret macro (requested by Russell King)
> v4: remove an inlined add on big endian systems (spotted by Russell King),
> used __ARMEB__ rather than BIG_ENDIAN (to match rest of file),
> cleared r3 on EFAULT during __get_user_8.
> v3: fix a couple of checkpatch issues
> v2: pass correct size to check_uaccess, and better handling of narrowing
> double word read with __get_user_xb() (Russell King's suggestion)
> v1: original
>
> Signed-off-by: Rob Clark <robdclark@gmail.com>
> Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>
> Cc: Russell King - ARM Linux <linux@arm.linux.org.uk>
> ---
> arch/arm/include/asm/uaccess.h | 20 +++++++++++++++++++-
> arch/arm/lib/getuser.S | 37 ++++++++++++++++++++++++++++++++++++-
> 2 files changed, 55 insertions(+), 2 deletions(-)
>
> diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h
> index 75d9579..7057cf8 100644
> --- a/arch/arm/include/asm/uaccess.h
> +++ b/arch/arm/include/asm/uaccess.h
> @@ -107,6 +107,8 @@ static inline void set_fs(mm_segment_t fs)
> extern int __get_user_1(void *);
> extern int __get_user_2(void *);
> extern int __get_user_4(void *);
> +extern int __get_user_lo8(void *);
> +extern int __get_user_8(void *);
>
> #define __GUP_CLOBBER_1 "lr", "cc"
> #ifdef CONFIG_CPU_USE_DOMAINS
> @@ -115,6 +117,8 @@ extern int __get_user_4(void *);
> #define __GUP_CLOBBER_2 "lr", "cc"
> #endif
> #define __GUP_CLOBBER_4 "lr", "cc"
> +#define __GUP_CLOBBER_lo8 "lr", "cc"
> +#define __GUP_CLOBBER_8 "lr", "cc"
>
> #define __get_user_x(__r2,__p,__e,__l,__s) \
> __asm__ __volatile__ ( \
> @@ -125,11 +129,19 @@ extern int __get_user_4(void *);
> : "0" (__p), "r" (__l) \
> : __GUP_CLOBBER_##__s)
>
> +/* narrowing a double-word get into a single 32bit word register: */
> +#ifdef __ARMEB__
> +#define __get_user_xb(__r2, __p, __e, __l, __s) \
> + __get_user_x(__r2, __p, __e, __l, lo8)
> +#else
> +#define __get_user_xb __get_user_x
> +#endif
> +
> #define __get_user_check(x,p) \
> ({ \
> unsigned long __limit = current_thread_info()->addr_limit - 1; \
> register const typeof(*(p)) __user *__p asm("r0") = (p);\
> - register unsigned long __r2 asm("r2"); \
> + register typeof(x) __r2 asm("r2"); \
Above breaks V7 BE case when get_user called for target
variable of 64 bit in size but '*__p' is 32 bit or smaller. Please
look at [1] for more details.
Thanks,
Victor
[1] http://lists.infradead.org/pipermail/linux-arm-kernel/2014-August/280806.html
> register unsigned long __l asm("r1") = __limit; \
> register int __e asm("r0"); \
> switch (sizeof(*(__p))) { \
> @@ -142,6 +154,12 @@ extern int __get_user_4(void *);
> case 4: \
> __get_user_x(__r2, __p, __e, __l, 4); \
> break; \
> + case 8: \
> + if (sizeof((x)) < 8) \
> + __get_user_xb(__r2, __p, __e, __l, 4); \
> + else \
> + __get_user_x(__r2, __p, __e, __l, 8); \
> + break; \
> default: __e = __get_user_bad(); break; \
> } \
> x = (typeof(*(p))) __r2; \
> diff --git a/arch/arm/lib/getuser.S b/arch/arm/lib/getuser.S
> index 0f958e3..9386000 100644
> --- a/arch/arm/lib/getuser.S
> +++ b/arch/arm/lib/getuser.S
> @@ -18,7 +18,7 @@
> * Inputs: r0 contains the address
> * r1 contains the address limit, which must be preserved
> * Outputs: r0 is the error code
> - * r2 contains the zero-extended value
> + * r2, r3 contains the zero-extended value
> * lr corrupted
> *
> * No other registers must be altered. (see <asm/uaccess.h>
> @@ -66,15 +66,50 @@ ENTRY(__get_user_4)
> ret lr
> ENDPROC(__get_user_4)
>
> +ENTRY(__get_user_8)
> + check_uaccess r0, 8, r1, r2, __get_user_bad
> +#ifdef CONFIG_THUMB2_KERNEL
> +5: TUSER(ldr) r2, [r0]
> +6: TUSER(ldr) r3, [r0, #4]
> +#else
> +5: TUSER(ldr) r2, [r0], #4
> +6: TUSER(ldr) r3, [r0]
> +#endif
> + mov r0, #0
> + ret lr
> +ENDPROC(__get_user_8)
> +
> +#ifdef __ARMEB__
> +ENTRY(__get_user_lo8)
> + check_uaccess r0, 8, r1, r2, __get_user_bad
> +#ifdef CONFIG_CPU_USE_DOMAINS
> + add r0, r0, #4
> +7: ldrt r2, [r0]
> +#else
> +7: ldr r2, [r0, #4]
> +#endif
> + mov r0, #0
> + ret lr
> +ENDPROC(__get_user_lo8)
> +#endif
> +
> +__get_user_bad8:
> + mov r3, #0
> __get_user_bad:
> mov r2, #0
> mov r0, #-EFAULT
> ret lr
> ENDPROC(__get_user_bad)
> +ENDPROC(__get_user_bad8)
>
> .pushsection __ex_table, "a"
> .long 1b, __get_user_bad
> .long 2b, __get_user_bad
> .long 3b, __get_user_bad
> .long 4b, __get_user_bad
> + .long 5b, __get_user_bad8
> + .long 6b, __get_user_bad8
> +#ifdef __ARMEB__
> + .long 7b, __get_user_bad
> +#endif
> .popsection
> --
> 1.9.3
>
>
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply [flat|nested] 22+ messages in thread
* [PATCH 3.16.0-rc3-rmk v5] ARM: add get_user() support for 8 byte types
@ 2014-08-21 5:36 ` Victor Kamensky
0 siblings, 0 replies; 22+ messages in thread
From: Victor Kamensky @ 2014-08-21 5:36 UTC (permalink / raw)
To: linux-arm-kernel
On 10 July 2014 12:47, Daniel Thompson <daniel.thompson@linaro.org> wrote:
> Recent contributions, including to DRM and binder, introduce 64-bit
> values in their interfaces. A common motivation for this is to allow
> the same ABI for 32- and 64-bit userspaces (and therefore also a shared
> ABI for 32/64 hybrid userspaces). Anyhow, the developers would like to
> avoid gotchas like having to use copy_from_user().
>
> This feature is already implemented on x86-32 and the majority of other
> 32-bit architectures. The current list of get_user_8 hold out
> architectures are: arm, avr32, blackfin, m32r, metag, microblaze,
> mn10300, sh.
>
> Credit:
>
> My name sits rather uneasily at the top of this patch. The v1 and
> v2 versions of the patch were written by Rob Clark and to produce v4
> I mostly copied code from Russell King and H. Peter Anvin. However I
> have mangled the patch sufficiently that *blame* is rightfully mine
> even if credit should more widely shared.
>
> Changelog:
>
> v5: updated to use the ret macro (requested by Russell King)
> v4: remove an inlined add on big endian systems (spotted by Russell King),
> used __ARMEB__ rather than BIG_ENDIAN (to match rest of file),
> cleared r3 on EFAULT during __get_user_8.
> v3: fix a couple of checkpatch issues
> v2: pass correct size to check_uaccess, and better handling of narrowing
> double word read with __get_user_xb() (Russell King's suggestion)
> v1: original
>
> Signed-off-by: Rob Clark <robdclark@gmail.com>
> Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>
> Cc: Russell King - ARM Linux <linux@arm.linux.org.uk>
> ---
> arch/arm/include/asm/uaccess.h | 20 +++++++++++++++++++-
> arch/arm/lib/getuser.S | 37 ++++++++++++++++++++++++++++++++++++-
> 2 files changed, 55 insertions(+), 2 deletions(-)
>
> diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h
> index 75d9579..7057cf8 100644
> --- a/arch/arm/include/asm/uaccess.h
> +++ b/arch/arm/include/asm/uaccess.h
> @@ -107,6 +107,8 @@ static inline void set_fs(mm_segment_t fs)
> extern int __get_user_1(void *);
> extern int __get_user_2(void *);
> extern int __get_user_4(void *);
> +extern int __get_user_lo8(void *);
> +extern int __get_user_8(void *);
>
> #define __GUP_CLOBBER_1 "lr", "cc"
> #ifdef CONFIG_CPU_USE_DOMAINS
> @@ -115,6 +117,8 @@ extern int __get_user_4(void *);
> #define __GUP_CLOBBER_2 "lr", "cc"
> #endif
> #define __GUP_CLOBBER_4 "lr", "cc"
> +#define __GUP_CLOBBER_lo8 "lr", "cc"
> +#define __GUP_CLOBBER_8 "lr", "cc"
>
> #define __get_user_x(__r2,__p,__e,__l,__s) \
> __asm__ __volatile__ ( \
> @@ -125,11 +129,19 @@ extern int __get_user_4(void *);
> : "0" (__p), "r" (__l) \
> : __GUP_CLOBBER_##__s)
>
> +/* narrowing a double-word get into a single 32bit word register: */
> +#ifdef __ARMEB__
> +#define __get_user_xb(__r2, __p, __e, __l, __s) \
> + __get_user_x(__r2, __p, __e, __l, lo8)
> +#else
> +#define __get_user_xb __get_user_x
> +#endif
> +
> #define __get_user_check(x,p) \
> ({ \
> unsigned long __limit = current_thread_info()->addr_limit - 1; \
> register const typeof(*(p)) __user *__p asm("r0") = (p);\
> - register unsigned long __r2 asm("r2"); \
> + register typeof(x) __r2 asm("r2"); \
Above breaks V7 BE case when get_user called for target
variable of 64 bit in size but '*__p' is 32 bit or smaller. Please
look at [1] for more details.
Thanks,
Victor
[1] http://lists.infradead.org/pipermail/linux-arm-kernel/2014-August/280806.html
> register unsigned long __l asm("r1") = __limit; \
> register int __e asm("r0"); \
> switch (sizeof(*(__p))) { \
> @@ -142,6 +154,12 @@ extern int __get_user_4(void *);
> case 4: \
> __get_user_x(__r2, __p, __e, __l, 4); \
> break; \
> + case 8: \
> + if (sizeof((x)) < 8) \
> + __get_user_xb(__r2, __p, __e, __l, 4); \
> + else \
> + __get_user_x(__r2, __p, __e, __l, 8); \
> + break; \
> default: __e = __get_user_bad(); break; \
> } \
> x = (typeof(*(p))) __r2; \
> diff --git a/arch/arm/lib/getuser.S b/arch/arm/lib/getuser.S
> index 0f958e3..9386000 100644
> --- a/arch/arm/lib/getuser.S
> +++ b/arch/arm/lib/getuser.S
> @@ -18,7 +18,7 @@
> * Inputs: r0 contains the address
> * r1 contains the address limit, which must be preserved
> * Outputs: r0 is the error code
> - * r2 contains the zero-extended value
> + * r2, r3 contains the zero-extended value
> * lr corrupted
> *
> * No other registers must be altered. (see <asm/uaccess.h>
> @@ -66,15 +66,50 @@ ENTRY(__get_user_4)
> ret lr
> ENDPROC(__get_user_4)
>
> +ENTRY(__get_user_8)
> + check_uaccess r0, 8, r1, r2, __get_user_bad
> +#ifdef CONFIG_THUMB2_KERNEL
> +5: TUSER(ldr) r2, [r0]
> +6: TUSER(ldr) r3, [r0, #4]
> +#else
> +5: TUSER(ldr) r2, [r0], #4
> +6: TUSER(ldr) r3, [r0]
> +#endif
> + mov r0, #0
> + ret lr
> +ENDPROC(__get_user_8)
> +
> +#ifdef __ARMEB__
> +ENTRY(__get_user_lo8)
> + check_uaccess r0, 8, r1, r2, __get_user_bad
> +#ifdef CONFIG_CPU_USE_DOMAINS
> + add r0, r0, #4
> +7: ldrt r2, [r0]
> +#else
> +7: ldr r2, [r0, #4]
> +#endif
> + mov r0, #0
> + ret lr
> +ENDPROC(__get_user_lo8)
> +#endif
> +
> +__get_user_bad8:
> + mov r3, #0
> __get_user_bad:
> mov r2, #0
> mov r0, #-EFAULT
> ret lr
> ENDPROC(__get_user_bad)
> +ENDPROC(__get_user_bad8)
>
> .pushsection __ex_table, "a"
> .long 1b, __get_user_bad
> .long 2b, __get_user_bad
> .long 3b, __get_user_bad
> .long 4b, __get_user_bad
> + .long 5b, __get_user_bad8
> + .long 6b, __get_user_bad8
> +#ifdef __ARMEB__
> + .long 7b, __get_user_bad
> +#endif
> .popsection
> --
> 1.9.3
>
>
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel at lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply [flat|nested] 22+ messages in thread
end of thread, other threads:[~2014-08-21 5:37 UTC | newest]
Thread overview: 22+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-06-12 15:42 [PATCH v3] ARM: add get_user() support for 8 byte types Daniel Thompson
2014-06-12 15:42 ` Daniel Thompson
2014-06-12 15:58 ` Russell King - ARM Linux
2014-06-12 15:58 ` Russell King - ARM Linux
2014-06-17 10:17 ` Daniel Thompson
2014-06-17 10:17 ` Daniel Thompson
2014-06-17 11:09 ` Russell King - ARM Linux
2014-06-17 11:09 ` Russell King - ARM Linux
2014-06-17 13:28 ` Daniel Thompson
2014-06-17 13:28 ` Daniel Thompson
2014-06-17 13:36 ` Russell King - ARM Linux
2014-06-17 13:36 ` Russell King - ARM Linux
2014-06-17 13:54 ` Daniel Thompson
2014-06-17 13:54 ` Daniel Thompson
2014-06-12 17:04 ` Arnd Bergmann
2014-06-12 17:04 ` Arnd Bergmann
2014-06-20 10:01 ` [PATCH v4] " Daniel Thompson
2014-06-20 10:01 ` Daniel Thompson
2014-07-10 19:47 ` [PATCH 3.16.0-rc3-rmk v5] " Daniel Thompson
2014-07-10 19:47 ` Daniel Thompson
2014-08-21 5:36 ` Victor Kamensky
2014-08-21 5:36 ` Victor Kamensky
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.