* [PATCH v2] arm: fix memset-related crashes caused by recent GCC (4.7.2) optimizations
@ 2013-02-11 12:57 Ivan Djelic
2013-03-06 19:15 ` Dirk Behme
0 siblings, 1 reply; 14+ messages in thread
From: Ivan Djelic @ 2013-02-11 12:57 UTC (permalink / raw)
To: linux-arm-kernel
Recent GCC versions (e.g. GCC-4.7.2) perform optimizations based on
assumptions about the implementation of memset and similar functions.
The current ARM optimized memset code does not return the value of
its first argument, as is usually expected from standard implementations.
For instance in the following function:
void debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter)
{
memset(waiter, MUTEX_DEBUG_INIT, sizeof(*waiter));
waiter->magic = waiter;
INIT_LIST_HEAD(&waiter->list);
}
compiled as:
800554d0 <debug_mutex_lock_common>:
800554d0: e92d4008 push {r3, lr}
800554d4: e1a00001 mov r0, r1
800554d8: e3a02010 mov r2, #16 ; 0x10
800554dc: e3a01011 mov r1, #17 ; 0x11
800554e0: eb04426e bl 80165ea0 <memset>
800554e4: e1a03000 mov r3, r0
800554e8: e583000c str r0, [r3, #12]
800554ec: e5830000 str r0, [r3]
800554f0: e5830004 str r0, [r3, #4]
800554f4: e8bd8008 pop {r3, pc}
GCC assumes memset returns the value of pointer 'waiter' in register r0; causing
register/memory corruptions.
This patch fixes the return value of the assembly version of memset.
It adds a 'mov' instruction and merges an additional load+store into
existing load/store instructions.
For ease of review, here is a breakdown of the patch into 4 simple steps:
Step 1
======
Perform the following substitutions:
ip -> r8, then
r0 -> ip,
and insert 'mov ip, r0' as the first statement of the function.
At this point, we have a memset() implementation returning the proper result,
but corrupting r8 on some paths (the ones that were using ip).
Step 2
======
Make sure r8 is saved and restored when (! CALGN(1)+0) == 1:
save r8:
- str lr, [sp, #-4]!
+ stmfd sp!, {r8, lr}
and restore r8 on both exit paths:
- ldmeqfd sp!, {pc} @ Now <64 bytes to go.
+ ldmeqfd sp!, {r8, pc} @ Now <64 bytes to go.
(...)
tst r2, #16
stmneia ip!, {r1, r3, r8, lr}
- ldr lr, [sp], #4
+ ldmfd sp!, {r8, lr}
Step 3
======
Make sure r8 is saved and restored when (! CALGN(1)+0) == 0:
save r8:
- stmfd sp!, {r4-r7, lr}
+ stmfd sp!, {r4-r8, lr}
and restore r8 on both exit paths:
bgt 3b
- ldmeqfd sp!, {r4-r7, pc}
+ ldmeqfd sp!, {r4-r8, pc}
(...)
tst r2, #16
stmneia ip!, {r4-r7}
- ldmfd sp!, {r4-r7, lr}
+ ldmfd sp!, {r4-r8, lr}
Step 4
======
Rewrite register list "r4-r7, r8" as "r4-r8".
Signed-off-by: Ivan Djelic <ivan.djelic@parrot.com>
Reviewed-by: Nicolas Pitre <nico@linaro.org>
---
v2 changelog:
- added patch breakdown explanation to commit text
arch/arm/lib/memset.S | 85 +++++++++++++++++++++++++------------------------
1 file changed, 44 insertions(+), 41 deletions(-)
diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S
index 650d592..d912e73 100644
--- a/arch/arm/lib/memset.S
+++ b/arch/arm/lib/memset.S
@@ -19,9 +19,9 @@
1: subs r2, r2, #4 @ 1 do we have enough
blt 5f @ 1 bytes to align with?
cmp r3, #2 @ 1
- strltb r1, [r0], #1 @ 1
- strleb r1, [r0], #1 @ 1
- strb r1, [r0], #1 @ 1
+ strltb r1, [ip], #1 @ 1
+ strleb r1, [ip], #1 @ 1
+ strb r1, [ip], #1 @ 1
add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3))
/*
* The pointer is now aligned and the length is adjusted. Try doing the
@@ -29,10 +29,14 @@
*/
ENTRY(memset)
- ands r3, r0, #3 @ 1 unaligned?
+/*
+ * Preserve the contents of r0 for the return value.
+ */
+ mov ip, r0
+ ands r3, ip, #3 @ 1 unaligned?
bne 1b @ 1
/*
- * we know that the pointer in r0 is aligned to a word boundary.
+ * we know that the pointer in ip is aligned to a word boundary.
*/
orr r1, r1, r1, lsl #8
orr r1, r1, r1, lsl #16
@@ -43,29 +47,28 @@ ENTRY(memset)
#if ! CALGN(1)+0
/*
- * We need an extra register for this loop - save the return address and
- * use the LR
+ * We need 2 extra registers for this loop - use r8 and the LR
*/
- str lr, [sp, #-4]!
- mov ip, r1
+ stmfd sp!, {r8, lr}
+ mov r8, r1
mov lr, r1
2: subs r2, r2, #64
- stmgeia r0!, {r1, r3, ip, lr} @ 64 bytes at a time.
- stmgeia r0!, {r1, r3, ip, lr}
- stmgeia r0!, {r1, r3, ip, lr}
- stmgeia r0!, {r1, r3, ip, lr}
+ stmgeia ip!, {r1, r3, r8, lr} @ 64 bytes at a time.
+ stmgeia ip!, {r1, r3, r8, lr}
+ stmgeia ip!, {r1, r3, r8, lr}
+ stmgeia ip!, {r1, r3, r8, lr}
bgt 2b
- ldmeqfd sp!, {pc} @ Now <64 bytes to go.
+ ldmeqfd sp!, {r8, pc} @ Now <64 bytes to go.
/*
* No need to correct the count; we're only testing bits from now on
*/
tst r2, #32
- stmneia r0!, {r1, r3, ip, lr}
- stmneia r0!, {r1, r3, ip, lr}
+ stmneia ip!, {r1, r3, r8, lr}
+ stmneia ip!, {r1, r3, r8, lr}
tst r2, #16
- stmneia r0!, {r1, r3, ip, lr}
- ldr lr, [sp], #4
+ stmneia ip!, {r1, r3, r8, lr}
+ ldmfd sp!, {r8, lr}
#else
@@ -74,54 +77,54 @@ ENTRY(memset)
* whole cache lines@once.
*/
- stmfd sp!, {r4-r7, lr}
+ stmfd sp!, {r4-r8, lr}
mov r4, r1
mov r5, r1
mov r6, r1
mov r7, r1
- mov ip, r1
+ mov r8, r1
mov lr, r1
cmp r2, #96
- tstgt r0, #31
+ tstgt ip, #31
ble 3f
- and ip, r0, #31
- rsb ip, ip, #32
- sub r2, r2, ip
- movs ip, ip, lsl #(32 - 4)
- stmcsia r0!, {r4, r5, r6, r7}
- stmmiia r0!, {r4, r5}
- tst ip, #(1 << 30)
- mov ip, r1
- strne r1, [r0], #4
+ and r8, ip, #31
+ rsb r8, r8, #32
+ sub r2, r2, r8
+ movs r8, r8, lsl #(32 - 4)
+ stmcsia ip!, {r4, r5, r6, r7}
+ stmmiia ip!, {r4, r5}
+ tst r8, #(1 << 30)
+ mov r8, r1
+ strne r1, [ip], #4
3: subs r2, r2, #64
- stmgeia r0!, {r1, r3-r7, ip, lr}
- stmgeia r0!, {r1, r3-r7, ip, lr}
+ stmgeia ip!, {r1, r3-r8, lr}
+ stmgeia ip!, {r1, r3-r8, lr}
bgt 3b
- ldmeqfd sp!, {r4-r7, pc}
+ ldmeqfd sp!, {r4-r8, pc}
tst r2, #32
- stmneia r0!, {r1, r3-r7, ip, lr}
+ stmneia ip!, {r1, r3-r8, lr}
tst r2, #16
- stmneia r0!, {r4-r7}
- ldmfd sp!, {r4-r7, lr}
+ stmneia ip!, {r4-r7}
+ ldmfd sp!, {r4-r8, lr}
#endif
4: tst r2, #8
- stmneia r0!, {r1, r3}
+ stmneia ip!, {r1, r3}
tst r2, #4
- strne r1, [r0], #4
+ strne r1, [ip], #4
/*
* When we get here, we've got less than 4 bytes to zero. We
* may have an unaligned pointer as well.
*/
5: tst r2, #2
- strneb r1, [r0], #1
- strneb r1, [r0], #1
+ strneb r1, [ip], #1
+ strneb r1, [ip], #1
tst r2, #1
- strneb r1, [r0], #1
+ strneb r1, [ip], #1
mov pc, lr
ENDPROC(memset)
--
1.7.10.4
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH v2] arm: fix memset-related crashes caused by recent GCC (4.7.2) optimizations
2013-02-11 12:57 [PATCH v2] arm: fix memset-related crashes caused by recent GCC (4.7.2) optimizations Ivan Djelic
@ 2013-03-06 19:15 ` Dirk Behme
2013-03-07 8:13 ` Ivan Djelic
2013-03-07 15:17 ` Russell King - ARM Linux
0 siblings, 2 replies; 14+ messages in thread
From: Dirk Behme @ 2013-03-06 19:15 UTC (permalink / raw)
To: linux-arm-kernel
Am 11.02.2013 13:57, schrieb Ivan Djelic:
> Recent GCC versions (e.g. GCC-4.7.2) perform optimizations based on
> assumptions about the implementation of memset and similar functions.
> The current ARM optimized memset code does not return the value of
> its first argument, as is usually expected from standard implementations.
>
> For instance in the following function:
>
> void debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter)
> {
> memset(waiter, MUTEX_DEBUG_INIT, sizeof(*waiter));
> waiter->magic = waiter;
> INIT_LIST_HEAD(&waiter->list);
> }
>
> compiled as:
>
> 800554d0 <debug_mutex_lock_common>:
> 800554d0: e92d4008 push {r3, lr}
> 800554d4: e1a00001 mov r0, r1
> 800554d8: e3a02010 mov r2, #16 ; 0x10
> 800554dc: e3a01011 mov r1, #17 ; 0x11
> 800554e0: eb04426e bl 80165ea0 <memset>
> 800554e4: e1a03000 mov r3, r0
> 800554e8: e583000c str r0, [r3, #12]
> 800554ec: e5830000 str r0, [r3]
> 800554f0: e5830004 str r0, [r3, #4]
> 800554f4: e8bd8008 pop {r3, pc}
>
> GCC assumes memset returns the value of pointer 'waiter' in register r0; causing
> register/memory corruptions.
>
> This patch fixes the return value of the assembly version of memset.
> It adds a 'mov' instruction and merges an additional load+store into
> existing load/store instructions.
> For ease of review, here is a breakdown of the patch into 4 simple steps:
>
> Step 1
> ======
> Perform the following substitutions:
> ip -> r8, then
> r0 -> ip,
> and insert 'mov ip, r0' as the first statement of the function.
> At this point, we have a memset() implementation returning the proper result,
> but corrupting r8 on some paths (the ones that were using ip).
>
> Step 2
> ======
> Make sure r8 is saved and restored when (! CALGN(1)+0) == 1:
>
> save r8:
> - str lr, [sp, #-4]!
> + stmfd sp!, {r8, lr}
>
> and restore r8 on both exit paths:
> - ldmeqfd sp!, {pc} @ Now <64 bytes to go.
> + ldmeqfd sp!, {r8, pc} @ Now <64 bytes to go.
> (...)
> tst r2, #16
> stmneia ip!, {r1, r3, r8, lr}
> - ldr lr, [sp], #4
> + ldmfd sp!, {r8, lr}
>
> Step 3
> ======
> Make sure r8 is saved and restored when (! CALGN(1)+0) == 0:
>
> save r8:
> - stmfd sp!, {r4-r7, lr}
> + stmfd sp!, {r4-r8, lr}
>
> and restore r8 on both exit paths:
> bgt 3b
> - ldmeqfd sp!, {r4-r7, pc}
> + ldmeqfd sp!, {r4-r8, pc}
> (...)
> tst r2, #16
> stmneia ip!, {r4-r7}
> - ldmfd sp!, {r4-r7, lr}
> + ldmfd sp!, {r4-r8, lr}
>
> Step 4
> ======
> Rewrite register list "r4-r7, r8" as "r4-r8".
>
> Signed-off-by: Ivan Djelic <ivan.djelic@parrot.com>
> Reviewed-by: Nicolas Pitre <nico@linaro.org>
Sent as 7668/1 to rmk's patch system.
Thanks
Dirk
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH v2] arm: fix memset-related crashes caused by recent GCC (4.7.2) optimizations
2013-03-06 19:15 ` Dirk Behme
@ 2013-03-07 8:13 ` Ivan Djelic
2013-03-07 15:17 ` Russell King - ARM Linux
1 sibling, 0 replies; 14+ messages in thread
From: Ivan Djelic @ 2013-03-07 8:13 UTC (permalink / raw)
To: linux-arm-kernel
On Wed, Mar 06, 2013 at 07:15:17PM +0000, Dirk Behme wrote:
> Am 11.02.2013 13:57, schrieb Ivan Djelic:
> > Recent GCC versions (e.g. GCC-4.7.2) perform optimizations based on
> > assumptions about the implementation of memset and similar functions.
> > The current ARM optimized memset code does not return the value of
> > its first argument, as is usually expected from standard implementations.
> >
> > For instance in the following function:
> >
> > void debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter)
> > {
> > memset(waiter, MUTEX_DEBUG_INIT, sizeof(*waiter));
> > waiter->magic = waiter;
> > INIT_LIST_HEAD(&waiter->list);
> > }
> >
> > compiled as:
> >
> > 800554d0 <debug_mutex_lock_common>:
> > 800554d0: e92d4008 push {r3, lr}
> > 800554d4: e1a00001 mov r0, r1
> > 800554d8: e3a02010 mov r2, #16 ; 0x10
> > 800554dc: e3a01011 mov r1, #17 ; 0x11
> > 800554e0: eb04426e bl 80165ea0 <memset>
> > 800554e4: e1a03000 mov r3, r0
> > 800554e8: e583000c str r0, [r3, #12]
> > 800554ec: e5830000 str r0, [r3]
> > 800554f0: e5830004 str r0, [r3, #4]
> > 800554f4: e8bd8008 pop {r3, pc}
> >
> > GCC assumes memset returns the value of pointer 'waiter' in register r0; causing
> > register/memory corruptions.
> >
> > This patch fixes the return value of the assembly version of memset.
> > It adds a 'mov' instruction and merges an additional load+store into
> > existing load/store instructions.
> > For ease of review, here is a breakdown of the patch into 4 simple steps:
> >
> > Step 1
> > ======
> > Perform the following substitutions:
> > ip -> r8, then
> > r0 -> ip,
> > and insert 'mov ip, r0' as the first statement of the function.
> > At this point, we have a memset() implementation returning the proper result,
> > but corrupting r8 on some paths (the ones that were using ip).
> >
> > Step 2
> > ======
> > Make sure r8 is saved and restored when (! CALGN(1)+0) == 1:
> >
> > save r8:
> > - str lr, [sp, #-4]!
> > + stmfd sp!, {r8, lr}
> >
> > and restore r8 on both exit paths:
> > - ldmeqfd sp!, {pc} @ Now <64 bytes to go.
> > + ldmeqfd sp!, {r8, pc} @ Now <64 bytes to go.
> > (...)
> > tst r2, #16
> > stmneia ip!, {r1, r3, r8, lr}
> > - ldr lr, [sp], #4
> > + ldmfd sp!, {r8, lr}
> >
> > Step 3
> > ======
> > Make sure r8 is saved and restored when (! CALGN(1)+0) == 0:
> >
> > save r8:
> > - stmfd sp!, {r4-r7, lr}
> > + stmfd sp!, {r4-r8, lr}
> >
> > and restore r8 on both exit paths:
> > bgt 3b
> > - ldmeqfd sp!, {r4-r7, pc}
> > + ldmeqfd sp!, {r4-r8, pc}
> > (...)
> > tst r2, #16
> > stmneia ip!, {r4-r7}
> > - ldmfd sp!, {r4-r7, lr}
> > + ldmfd sp!, {r4-r8, lr}
> >
> > Step 4
> > ======
> > Rewrite register list "r4-r7, r8" as "r4-r8".
> >
> > Signed-off-by: Ivan Djelic <ivan.djelic@parrot.com>
> > Reviewed-by: Nicolas Pitre <nico@linaro.org>
>
> Sent as 7668/1 to rmk's patch system.
OK, thanks a lot !
--
Ivan
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH v2] arm: fix memset-related crashes caused by recent GCC (4.7.2) optimizations
2013-03-06 19:15 ` Dirk Behme
2013-03-07 8:13 ` Ivan Djelic
@ 2013-03-07 15:17 ` Russell King - ARM Linux
2013-03-10 17:06 ` Alexander Holler
1 sibling, 1 reply; 14+ messages in thread
From: Russell King - ARM Linux @ 2013-03-07 15:17 UTC (permalink / raw)
To: linux-arm-kernel
On Wed, Mar 06, 2013 at 08:15:17PM +0100, Dirk Behme wrote:
> Am 11.02.2013 13:57, schrieb Ivan Djelic:
>> Recent GCC versions (e.g. GCC-4.7.2) perform optimizations based on
>> assumptions about the implementation of memset and similar functions.
>> The current ARM optimized memset code does not return the value of
>> its first argument, as is usually expected from standard implementations.
>>
>> For instance in the following function:
>>
>> void debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter)
>> {
>> memset(waiter, MUTEX_DEBUG_INIT, sizeof(*waiter));
>> waiter->magic = waiter;
>> INIT_LIST_HEAD(&waiter->list);
>> }
>>
>> compiled as:
>>
>> 800554d0 <debug_mutex_lock_common>:
>> 800554d0: e92d4008 push {r3, lr}
>> 800554d4: e1a00001 mov r0, r1
>> 800554d8: e3a02010 mov r2, #16 ; 0x10
>> 800554dc: e3a01011 mov r1, #17 ; 0x11
>> 800554e0: eb04426e bl 80165ea0 <memset>
>> 800554e4: e1a03000 mov r3, r0
>> 800554e8: e583000c str r0, [r3, #12]
>> 800554ec: e5830000 str r0, [r3]
>> 800554f0: e5830004 str r0, [r3, #4]
>> 800554f4: e8bd8008 pop {r3, pc}
>>
>> GCC assumes memset returns the value of pointer 'waiter' in register r0; causing
>> register/memory corruptions.
>>
>> This patch fixes the return value of the assembly version of memset.
>> It adds a 'mov' instruction and merges an additional load+store into
>> existing load/store instructions.
>> For ease of review, here is a breakdown of the patch into 4 simple steps:
>>
>> Step 1
>> ======
>> Perform the following substitutions:
>> ip -> r8, then
>> r0 -> ip,
>> and insert 'mov ip, r0' as the first statement of the function.
>> At this point, we have a memset() implementation returning the proper result,
>> but corrupting r8 on some paths (the ones that were using ip).
>>
>> Step 2
>> ======
>> Make sure r8 is saved and restored when (! CALGN(1)+0) == 1:
>>
>> save r8:
>> - str lr, [sp, #-4]!
>> + stmfd sp!, {r8, lr}
>>
>> and restore r8 on both exit paths:
>> - ldmeqfd sp!, {pc} @ Now <64 bytes to go.
>> + ldmeqfd sp!, {r8, pc} @ Now <64 bytes to go.
>> (...)
>> tst r2, #16
>> stmneia ip!, {r1, r3, r8, lr}
>> - ldr lr, [sp], #4
>> + ldmfd sp!, {r8, lr}
>>
>> Step 3
>> ======
>> Make sure r8 is saved and restored when (! CALGN(1)+0) == 0:
>>
>> save r8:
>> - stmfd sp!, {r4-r7, lr}
>> + stmfd sp!, {r4-r8, lr}
>>
>> and restore r8 on both exit paths:
>> bgt 3b
>> - ldmeqfd sp!, {r4-r7, pc}
>> + ldmeqfd sp!, {r4-r8, pc}
>> (...)
>> tst r2, #16
>> stmneia ip!, {r4-r7}
>> - ldmfd sp!, {r4-r7, lr}
>> + ldmfd sp!, {r4-r8, lr}
>>
>> Step 4
>> ======
>> Rewrite register list "r4-r7, r8" as "r4-r8".
>>
>> Signed-off-by: Ivan Djelic <ivan.djelic@parrot.com>
>> Reviewed-by: Nicolas Pitre <nico@linaro.org>
>
> Sent as 7668/1 to rmk's patch system.
Thanks, except for one minor detail. As you are the one sending it to me,
you are "handling" the patch, so it should have your sign-off too.
Rather than resubmit, please send me an email followup with the sign-off
tag included. Thanks.
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH v2] arm: fix memset-related crashes caused by recent GCC (4.7.2) optimizations
2013-03-07 15:17 ` Russell King - ARM Linux
@ 2013-03-10 17:06 ` Alexander Holler
2013-03-10 17:28 ` Russell King - ARM Linux
0 siblings, 1 reply; 14+ messages in thread
From: Alexander Holler @ 2013-03-10 17:06 UTC (permalink / raw)
To: linux-arm-kernel
Am 07.03.2013 16:17, schrieb Russell King - ARM Linux:
> On Wed, Mar 06, 2013 at 08:15:17PM +0100, Dirk Behme wrote:
>> Am 11.02.2013 13:57, schrieb Ivan Djelic:
>>> Recent GCC versions (e.g. GCC-4.7.2) perform optimizations based on
>>> assumptions about the implementation of memset and similar functions.
>>> The current ARM optimized memset code does not return the value of
>>> its first argument, as is usually expected from standard implementations.
I've just tried this patch with kernel 4.8.2 on an armv5-system where I
use gcc 4.7.2 since several months and where most parts of the system
are compiled with gcc 4.7.2 too.
And I had at least one problem which manifested itself with
[ 181.198559] pts1: unknown flag 212
while trying to establish a btle-connection.
So I assume either the patch is wrong, the patch isn't the whole story,
an existing bug is now triggered, or ...
I don't know.
Regards,
Alexander
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH v2] arm: fix memset-related crashes caused by recent GCC (4.7.2) optimizations
2013-03-10 17:06 ` Alexander Holler
@ 2013-03-10 17:28 ` Russell King - ARM Linux
2013-03-10 17:41 ` Alexander Holler
` (3 more replies)
0 siblings, 4 replies; 14+ messages in thread
From: Russell King - ARM Linux @ 2013-03-10 17:28 UTC (permalink / raw)
To: linux-arm-kernel
On Sun, Mar 10, 2013 at 06:06:11PM +0100, Alexander Holler wrote:
> Am 07.03.2013 16:17, schrieb Russell King - ARM Linux:
>> On Wed, Mar 06, 2013 at 08:15:17PM +0100, Dirk Behme wrote:
>>> Am 11.02.2013 13:57, schrieb Ivan Djelic:
>>>> Recent GCC versions (e.g. GCC-4.7.2) perform optimizations based on
>>>> assumptions about the implementation of memset and similar functions.
>>>> The current ARM optimized memset code does not return the value of
>>>> its first argument, as is usually expected from standard implementations.
>
> I've just tried this patch with kernel 4.8.2 on an armv5-system where I
> use gcc 4.7.2 since several months and where most parts of the system
> are compiled with gcc 4.7.2 too.
>
> And I had at least one problem which manifested itself with
Yes, the patch _is_ wrong. Reverted. I was trusting Nicolas' review
of it, but the patch is definitely wrong. Look carefully at this
fragment of code:
1: subs r2, r2, #4 @ 1 do we have enough
blt 5f @ 1 bytes to align with?
cmp r3, #2 @ 1
strltb r1, [ip], #1 @ 1
strleb r1, [ip], #1 @ 1
strb r1, [ip], #1 @ 1
add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3))
/*
* The pointer is now aligned and the length is adjusted. Try doing the
* memset again.
*/
ENTRY(memset)
/*
* Preserve the contents of r0 for the return value.
*/
mov ip, r0
ands r3, ip, #3 @ 1 unaligned?
bne 1b @ 1
and consider what happens when 'r0' is not aligned to a word... We end
up aligning the pointer in "1:" and then fall through into memset again
which reloads the old misaligned pointer.
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH v2] arm: fix memset-related crashes caused by recent GCC (4.7.2) optimizations
2013-03-10 17:28 ` Russell King - ARM Linux
@ 2013-03-10 17:41 ` Alexander Holler
2013-03-10 18:46 ` Nicolas Pitre
` (2 subsequent siblings)
3 siblings, 0 replies; 14+ messages in thread
From: Alexander Holler @ 2013-03-10 17:41 UTC (permalink / raw)
To: linux-arm-kernel
Am 10.03.2013 18:28, schrieb Russell King - ARM Linux:
> On Sun, Mar 10, 2013 at 06:06:11PM +0100, Alexander Holler wrote:
>> Am 07.03.2013 16:17, schrieb Russell King - ARM Linux:
>>> On Wed, Mar 06, 2013 at 08:15:17PM +0100, Dirk Behme wrote:
>>>> Am 11.02.2013 13:57, schrieb Ivan Djelic:
>>>>> Recent GCC versions (e.g. GCC-4.7.2) perform optimizations based on
>>>>> assumptions about the implementation of memset and similar functions.
>>>>> The current ARM optimized memset code does not return the value of
>>>>> its first argument, as is usually expected from standard implementations.
>>
>> I've just tried this patch with kernel 4.8.2 on an armv5-system where I
>> use gcc 4.7.2 since several months and where most parts of the system
>> are compiled with gcc 4.7.2 too.
>>
>> And I had at least one problem which manifested itself with
>
> Yes, the patch _is_ wrong. Reverted. I was trusting Nicolas' review
> of it, but the patch is definitely wrong. Look carefully at this
> fragment of code:
>
> 1: subs r2, r2, #4 @ 1 do we have enough
> blt 5f @ 1 bytes to align with?
> cmp r3, #2 @ 1
> strltb r1, [ip], #1 @ 1
> strleb r1, [ip], #1 @ 1
> strb r1, [ip], #1 @ 1
> add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3))
> /*
> * The pointer is now aligned and the length is adjusted. Try doing the
> * memset again.
> */
>
> ENTRY(memset)
> /*
> * Preserve the contents of r0 for the return value.
> */
> mov ip, r0
> ands r3, ip, #3 @ 1 unaligned?
> bne 1b @ 1
>
> and consider what happens when 'r0' is not aligned to a word... We end
> up aligning the pointer in "1:" and then fall through into memset again
> which reloads the old misaligned pointer.
Thanks a lot for the very fast answer. I myself wasn't in the mood to go
through arm-assembler (which I don't read that often), sorry.
Regards,
Alexander
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH v2] arm: fix memset-related crashes caused by recent GCC (4.7.2) optimizations
2013-03-10 17:28 ` Russell King - ARM Linux
2013-03-10 17:41 ` Alexander Holler
@ 2013-03-10 18:46 ` Nicolas Pitre
2013-03-10 19:23 ` Nicolas Pitre
2013-03-10 20:46 ` Ivan Djelic
2013-03-11 4:10 ` Nicolas Pitre
3 siblings, 1 reply; 14+ messages in thread
From: Nicolas Pitre @ 2013-03-10 18:46 UTC (permalink / raw)
To: linux-arm-kernel
On Sun, 10 Mar 2013, Russell King - ARM Linux wrote:
> On Sun, Mar 10, 2013 at 06:06:11PM +0100, Alexander Holler wrote:
> > Am 07.03.2013 16:17, schrieb Russell King - ARM Linux:
> >> On Wed, Mar 06, 2013 at 08:15:17PM +0100, Dirk Behme wrote:
> >>> Am 11.02.2013 13:57, schrieb Ivan Djelic:
> >>>> Recent GCC versions (e.g. GCC-4.7.2) perform optimizations based on
> >>>> assumptions about the implementation of memset and similar functions.
> >>>> The current ARM optimized memset code does not return the value of
> >>>> its first argument, as is usually expected from standard implementations.
> >
> > I've just tried this patch with kernel 4.8.2 on an armv5-system where I
> > use gcc 4.7.2 since several months and where most parts of the system
> > are compiled with gcc 4.7.2 too.
> >
> > And I had at least one problem which manifested itself with
>
> Yes, the patch _is_ wrong. Reverted. I was trusting Nicolas' review
> of it, but the patch is definitely wrong. Look carefully at this
> fragment of code:
Dang. Indeed.
Sorry about that.
Nicolas
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH v2] arm: fix memset-related crashes caused by recent GCC (4.7.2) optimizations
2013-03-10 18:46 ` Nicolas Pitre
@ 2013-03-10 19:23 ` Nicolas Pitre
0 siblings, 0 replies; 14+ messages in thread
From: Nicolas Pitre @ 2013-03-10 19:23 UTC (permalink / raw)
To: linux-arm-kernel
On Mon, 11 Mar 2013, Nicolas Pitre wrote:
> On Sun, 10 Mar 2013, Russell King - ARM Linux wrote:
>
> > On Sun, Mar 10, 2013 at 06:06:11PM +0100, Alexander Holler wrote:
> > > Am 07.03.2013 16:17, schrieb Russell King - ARM Linux:
> > >> On Wed, Mar 06, 2013 at 08:15:17PM +0100, Dirk Behme wrote:
> > >>> Am 11.02.2013 13:57, schrieb Ivan Djelic:
> > >>>> Recent GCC versions (e.g. GCC-4.7.2) perform optimizations based on
> > >>>> assumptions about the implementation of memset and similar functions.
> > >>>> The current ARM optimized memset code does not return the value of
> > >>>> its first argument, as is usually expected from standard implementations.
> > >
> > > I've just tried this patch with kernel 4.8.2 on an armv5-system where I
> > > use gcc 4.7.2 since several months and where most parts of the system
> > > are compiled with gcc 4.7.2 too.
> > >
> > > And I had at least one problem which manifested itself with
> >
> > Yes, the patch _is_ wrong. Reverted. I was trusting Nicolas' review
> > of it, but the patch is definitely wrong. Look carefully at this
> > fragment of code:
>
> Dang. Indeed.
>
> Sorry about that.
Here's what I'd fold into the original patch to fix it. I also moved
the alignment fixup code to the end as the entry alignment isn't right
with Thumb mode anyway. And reworked the initial test to help dual
issue pipelines.
diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S
index d912e7397e..cf34788237 100644
--- a/arch/arm/lib/memset.S
+++ b/arch/arm/lib/memset.S
@@ -14,31 +14,15 @@
.text
.align 5
- .word 0
-
-1: subs r2, r2, #4 @ 1 do we have enough
- blt 5f @ 1 bytes to align with?
- cmp r3, #2 @ 1
- strltb r1, [ip], #1 @ 1
- strleb r1, [ip], #1 @ 1
- strb r1, [ip], #1 @ 1
- add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3))
-/*
- * The pointer is now aligned and the length is adjusted. Try doing the
- * memset again.
- */
ENTRY(memset)
-/*
- * Preserve the contents of r0 for the return value.
- */
- mov ip, r0
- ands r3, ip, #3 @ 1 unaligned?
- bne 1b @ 1
+ ands r3, r0, #3 @ 1 unaligned?
+ mov ip, r0 @ preserve r0 as return value
+ bne 6f @ 1
/*
* we know that the pointer in ip is aligned to a word boundary.
*/
- orr r1, r1, r1, lsl #8
+1: orr r1, r1, r1, lsl #8
orr r1, r1, r1, lsl #16
mov r3, r1
cmp r2, #16
@@ -127,4 +111,13 @@ ENTRY(memset)
tst r2, #1
strneb r1, [ip], #1
mov pc, lr
+
+6: subs r2, r2, #4 @ 1 do we have enough
+ blt 5f @ 1 bytes to align with?
+ cmp r3, #2 @ 1
+ strltb r1, [ip], #1 @ 1
+ strleb r1, [ip], #1 @ 1
+ strb r1, [ip], #1 @ 1
+ add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3))
+ b 1b
ENDPROC(memset)
>
>
> Nicolas
>
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH v2] arm: fix memset-related crashes caused by recent GCC (4.7.2) optimizations
2013-03-10 17:28 ` Russell King - ARM Linux
2013-03-10 17:41 ` Alexander Holler
2013-03-10 18:46 ` Nicolas Pitre
@ 2013-03-10 20:46 ` Ivan Djelic
2013-03-11 4:10 ` Nicolas Pitre
3 siblings, 0 replies; 14+ messages in thread
From: Ivan Djelic @ 2013-03-10 20:46 UTC (permalink / raw)
To: linux-arm-kernel
On Sun, Mar 10, 2013 at 05:28:54PM +0000, Russell King - ARM Linux wrote:
> On Sun, Mar 10, 2013 at 06:06:11PM +0100, Alexander Holler wrote:
> > Am 07.03.2013 16:17, schrieb Russell King - ARM Linux:
> >> On Wed, Mar 06, 2013 at 08:15:17PM +0100, Dirk Behme wrote:
> >>> Am 11.02.2013 13:57, schrieb Ivan Djelic:
> >>>> Recent GCC versions (e.g. GCC-4.7.2) perform optimizations based on
> >>>> assumptions about the implementation of memset and similar functions.
> >>>> The current ARM optimized memset code does not return the value of
> >>>> its first argument, as is usually expected from standard implementations.
> >
> > I've just tried this patch with kernel 4.8.2 on an armv5-system where I
> > use gcc 4.7.2 since several months and where most parts of the system
> > are compiled with gcc 4.7.2 too.
> >
> > And I had at least one problem which manifested itself with
>
> Yes, the patch _is_ wrong. Reverted. I was trusting Nicolas' review
> of it, but the patch is definitely wrong. Look carefully at this
> fragment of code:
>
> 1: subs r2, r2, #4 @ 1 do we have enough
> blt 5f @ 1 bytes to align with?
> cmp r3, #2 @ 1
> strltb r1, [ip], #1 @ 1
> strleb r1, [ip], #1 @ 1
> strb r1, [ip], #1 @ 1
> add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3))
> /*
> * The pointer is now aligned and the length is adjusted. Try doing the
> * memset again.
> */
>
> ENTRY(memset)
> /*
> * Preserve the contents of r0 for the return value.
> */
> mov ip, r0
> ands r3, ip, #3 @ 1 unaligned?
> bne 1b @ 1
>
> and consider what happens when 'r0' is not aligned to a word... We end
> up aligning the pointer in "1:" and then fall through into memset again
> which reloads the old misaligned pointer.
Oops... Indeed. Thanks very much for catching that and sorry for the regression.
--
Ivan.
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH v2] arm: fix memset-related crashes caused by recent GCC (4.7.2) optimizations
2013-03-10 17:28 ` Russell King - ARM Linux
` (2 preceding siblings ...)
2013-03-10 20:46 ` Ivan Djelic
@ 2013-03-11 4:10 ` Nicolas Pitre
2013-03-12 10:06 ` Alexander Holler
3 siblings, 1 reply; 14+ messages in thread
From: Nicolas Pitre @ 2013-03-11 4:10 UTC (permalink / raw)
To: linux-arm-kernel
On Sun, 10 Mar 2013, Russell King - ARM Linux wrote:
> On Sun, Mar 10, 2013 at 06:06:11PM +0100, Alexander Holler wrote:
> > Am 07.03.2013 16:17, schrieb Russell King - ARM Linux:
> >> On Wed, Mar 06, 2013 at 08:15:17PM +0100, Dirk Behme wrote:
> >>> Am 11.02.2013 13:57, schrieb Ivan Djelic:
> >>>> Recent GCC versions (e.g. GCC-4.7.2) perform optimizations based on
> >>>> assumptions about the implementation of memset and similar functions.
> >>>> The current ARM optimized memset code does not return the value of
> >>>> its first argument, as is usually expected from standard implementations.
> >
> > I've just tried this patch with kernel 4.8.2 on an armv5-system where I
> > use gcc 4.7.2 since several months and where most parts of the system
> > are compiled with gcc 4.7.2 too.
> >
> > And I had at least one problem which manifested itself with
>
> Yes, the patch _is_ wrong. Reverted. I was trusting Nicolas' review
> of it, but the patch is definitely wrong.
Worse: it is in v3.9-rc2 already.
Here's a fix. Patch system?
---------- >8
Subject: fix the memset fix
Commit 455bd4c430b0 ("ARM: 7668/1: fix memset-related crashes caused by
recent GCC (4.7.2) optimizations") attempted to fix a compliance issue
with the memset return value. However the memset itself was broken by
that patch in the misaligned pointer case.
This fixes the above by branching over the entry code from the
misaligned fixup code to avoid reloading the original pointer.
Also, because the function entry alignment is wrong in the Thumb mode
compilation, that fixup code is moved to the end.
While at it, the entry instructions are slightly reworked to help dual
issue pipelines.
Signed-off-by: Nicolas Pitre <nico@linaro.org>
---
diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S
index d912e7397e..94b0650ea9 100644
--- a/arch/arm/lib/memset.S
+++ b/arch/arm/lib/memset.S
@@ -14,31 +14,15 @@
.text
.align 5
- .word 0
-
-1: subs r2, r2, #4 @ 1 do we have enough
- blt 5f @ 1 bytes to align with?
- cmp r3, #2 @ 1
- strltb r1, [ip], #1 @ 1
- strleb r1, [ip], #1 @ 1
- strb r1, [ip], #1 @ 1
- add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3))
-/*
- * The pointer is now aligned and the length is adjusted. Try doing the
- * memset again.
- */
ENTRY(memset)
-/*
- * Preserve the contents of r0 for the return value.
- */
- mov ip, r0
- ands r3, ip, #3 @ 1 unaligned?
- bne 1b @ 1
+ ands r3, r0, #3 @ 1 unaligned?
+ mov ip, r0 @ preserve r0 as return value
+ bne 6f @ 1
/*
* we know that the pointer in ip is aligned to a word boundary.
*/
- orr r1, r1, r1, lsl #8
+1: orr r1, r1, r1, lsl #8
orr r1, r1, r1, lsl #16
mov r3, r1
cmp r2, #16
@@ -127,4 +111,13 @@ ENTRY(memset)
tst r2, #1
strneb r1, [ip], #1
mov pc, lr
+
+6: subs r2, r2, #4 @ 1 do we have enough
+ blt 5b @ 1 bytes to align with?
+ cmp r3, #2 @ 1
+ strltb r1, [ip], #1 @ 1
+ strleb r1, [ip], #1 @ 1
+ strb r1, [ip], #1 @ 1
+ add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3))
+ b 1b
ENDPROC(memset)
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH v2] arm: fix memset-related crashes caused by recent GCC (4.7.2) optimizations
2013-03-11 4:10 ` Nicolas Pitre
@ 2013-03-12 10:06 ` Alexander Holler
2014-02-26 0:43 ` Florian Fainelli
0 siblings, 1 reply; 14+ messages in thread
From: Alexander Holler @ 2013-03-12 10:06 UTC (permalink / raw)
To: linux-arm-kernel
Am 11.03.2013 05:10, schrieb Nicolas Pitre:
> On Sun, 10 Mar 2013, Russell King - ARM Linux wrote:
>
>> On Sun, Mar 10, 2013 at 06:06:11PM +0100, Alexander Holler wrote:
>>> Am 07.03.2013 16:17, schrieb Russell King - ARM Linux:
>>>> On Wed, Mar 06, 2013 at 08:15:17PM +0100, Dirk Behme wrote:
>>>>> Am 11.02.2013 13:57, schrieb Ivan Djelic:
>>>>>> Recent GCC versions (e.g. GCC-4.7.2) perform optimizations based on
>>>>>> assumptions about the implementation of memset and similar functions.
>>>>>> The current ARM optimized memset code does not return the value of
>>>>>> its first argument, as is usually expected from standard implementations.
>>>
>>> I've just tried this patch with kernel 4.8.2 on an armv5-system where I
>>> use gcc 4.7.2 since several months and where most parts of the system
>>> are compiled with gcc 4.7.2 too.
>>>
>>> And I had at least one problem which manifested itself with
>>
>> Yes, the patch _is_ wrong. Reverted. I was trusting Nicolas' review
>> of it, but the patch is definitely wrong.
>
> Worse: it is in v3.9-rc2 already.
>
> Here's a fix. Patch system?
>
> ---------- >8
> Subject: fix the memset fix
>
> Commit 455bd4c430b0 ("ARM: 7668/1: fix memset-related crashes caused by
> recent GCC (4.7.2) optimizations") attempted to fix a compliance issue
> with the memset return value. However the memset itself was broken by
> that patch in the misaligned pointer case.
>
> This fixes the above by branching over the entry code from the
> misaligned fixup code to avoid reloading the original pointer.
>
> Also, because the function entry alignment is wrong in the Thumb mode
> compilation, that fixup code is moved to the end.
>
> While at it, the entry instructions are slightly reworked to help dual
> issue pipelines.
>
> Signed-off-by: Nicolas Pitre <nico@linaro.org>
> ---
>
> diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S
> index d912e7397e..94b0650ea9 100644
> --- a/arch/arm/lib/memset.S
> +++ b/arch/arm/lib/memset.S
> @@ -14,31 +14,15 @@
>
> .text
> .align 5
> - .word 0
> -
> -1: subs r2, r2, #4 @ 1 do we have enough
> - blt 5f @ 1 bytes to align with?
> - cmp r3, #2 @ 1
> - strltb r1, [ip], #1 @ 1
> - strleb r1, [ip], #1 @ 1
> - strb r1, [ip], #1 @ 1
> - add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3))
> -/*
> - * The pointer is now aligned and the length is adjusted. Try doing the
> - * memset again.
> - */
>
> ENTRY(memset)
> -/*
> - * Preserve the contents of r0 for the return value.
> - */
> - mov ip, r0
> - ands r3, ip, #3 @ 1 unaligned?
> - bne 1b @ 1
> + ands r3, r0, #3 @ 1 unaligned?
> + mov ip, r0 @ preserve r0 as return value
> + bne 6f @ 1
> /*
> * we know that the pointer in ip is aligned to a word boundary.
> */
> - orr r1, r1, r1, lsl #8
> +1: orr r1, r1, r1, lsl #8
> orr r1, r1, r1, lsl #16
> mov r3, r1
> cmp r2, #16
> @@ -127,4 +111,13 @@ ENTRY(memset)
> tst r2, #1
> strneb r1, [ip], #1
> mov pc, lr
> +
> +6: subs r2, r2, #4 @ 1 do we have enough
> + blt 5b @ 1 bytes to align with?
> + cmp r3, #2 @ 1
> + strltb r1, [ip], #1 @ 1
> + strleb r1, [ip], #1 @ 1
> + strb r1, [ip], #1 @ 1
> + add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3))
> + b 1b
> ENDPROC(memset)
>
Tested-by: Alexander Holler
It works since 2 days on my busy 24/7 armv5-thingy without me having
seen an error. A quick test on an armv7-system hasn't revealed any
problems too.
Thanks,
Alexander
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH v2] arm: fix memset-related crashes caused by recent GCC (4.7.2) optimizations
2013-03-12 10:06 ` Alexander Holler
@ 2014-02-26 0:43 ` Florian Fainelli
2014-02-26 17:13 ` Dirk Behme
0 siblings, 1 reply; 14+ messages in thread
From: Florian Fainelli @ 2014-02-26 0:43 UTC (permalink / raw)
To: linux-arm-kernel
2013-03-12 3:06 GMT-07:00 Alexander Holler <holler@ahsoftware.de>:
>
> Tested-by: Alexander Holler
>
> It works since 2 days on my busy 24/7 armv5-thingy without me having seen an
> error. A quick test on an armv7-system hasn't revealed any problems too.
This is an old thread, but 3.8.13 is still affected, not quite sure if
there will be a 3.8.14 release, but if there is, we should
definitively get:
ARM: 7668/1: fix memset-related crashes caused by recent GCC (4.7.2)
optimizations
ARM: 7670/1: fix the memset fix
included in 3.8.14.
Thanks!
--
Florian
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH v2] arm: fix memset-related crashes caused by recent GCC (4.7.2) optimizations
2014-02-26 0:43 ` Florian Fainelli
@ 2014-02-26 17:13 ` Dirk Behme
0 siblings, 0 replies; 14+ messages in thread
From: Dirk Behme @ 2014-02-26 17:13 UTC (permalink / raw)
To: linux-arm-kernel
Am 26.02.2014 01:43, schrieb Florian Fainelli:
> 2013-03-12 3:06 GMT-07:00 Alexander Holler <holler@ahsoftware.de>:
>>
>> Tested-by: Alexander Holler
>>
>> It works since 2 days on my busy 24/7 armv5-thingy without me having seen an
>> error. A quick test on an armv7-system hasn't revealed any problems too.
>
> This is an old thread, but 3.8.13 is still affected, not quite sure if
> there will be a 3.8.14 release, but if there is, we should
> definitively get:
>
> ARM: 7668/1: fix memset-related crashes caused by recent GCC (4.7.2)
> optimizations
> ARM: 7670/1: fix the memset fix
>
> included in 3.8.14.
Ubuntu is continuing support for some kernels with -stable patches:
https://lwn.net/Articles/550581/
http://kernel.ubuntu.com/git?p=ubuntu/linux.git
Having a short check in v3.8.13.18, this patch is not included there,
yet. Maybe you want to ask them if they want to include it.
Best regards
Dirk
^ permalink raw reply [flat|nested] 14+ messages in thread
end of thread, other threads:[~2014-02-26 17:13 UTC | newest]
Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-02-11 12:57 [PATCH v2] arm: fix memset-related crashes caused by recent GCC (4.7.2) optimizations Ivan Djelic
2013-03-06 19:15 ` Dirk Behme
2013-03-07 8:13 ` Ivan Djelic
2013-03-07 15:17 ` Russell King - ARM Linux
2013-03-10 17:06 ` Alexander Holler
2013-03-10 17:28 ` Russell King - ARM Linux
2013-03-10 17:41 ` Alexander Holler
2013-03-10 18:46 ` Nicolas Pitre
2013-03-10 19:23 ` Nicolas Pitre
2013-03-10 20:46 ` Ivan Djelic
2013-03-11 4:10 ` Nicolas Pitre
2013-03-12 10:06 ` Alexander Holler
2014-02-26 0:43 ` Florian Fainelli
2014-02-26 17:13 ` Dirk Behme
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).