* [PATCH v2 1/5] ARM: head-common.S: speed up startup code
2017-08-30 2:55 [PATCH v2 0/5] make XIP kernel .data compressed in ROM Nicolas Pitre
@ 2017-08-30 2:55 ` Nicolas Pitre
2017-10-03 12:41 ` Geert Uytterhoeven
2017-08-30 2:55 ` [PATCH v2 2/5] ARM: vmlinux*.lds.S: some decruftification Nicolas Pitre
` (4 subsequent siblings)
5 siblings, 1 reply; 18+ messages in thread
From: Nicolas Pitre @ 2017-08-30 2:55 UTC (permalink / raw)
To: linux-arm-kernel
Let's use optimized routines such as memcpy to copy .data and memzero
to clear .bss in the startup code instead of doing it one word at a
time. Those routines don't use any global data so they're safe to use
even if .data and .bss segments are not initialized.
In the .data copy case a temporary stack is installed in the .bss area
as the actual kernel stack is located within the copied data area. The
XIP kernel linker script ensures a 8 byte alignment for that purpose.
Finally, make the .data copy and related pointers surrounded by
CONFIG_XIP_KERNEL to make it obvious what it is all about. This will
allow for further cleanups in the non-XIP linker script.
Signed-off-by: Nicolas Pitre <nico@linaro.org>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
arch/arm/kernel/head-common.S | 76 ++++++++++++++++++++++-----------------
arch/arm/kernel/vmlinux-xip.lds.S | 2 +-
2 files changed, 45 insertions(+), 33 deletions(-)
diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S
index 8733012d23..bf9c4e38ec 100644
--- a/arch/arm/kernel/head-common.S
+++ b/arch/arm/kernel/head-common.S
@@ -79,47 +79,59 @@ ENDPROC(__vet_atags)
*/
__INIT
__mmap_switched:
- adr r3, __mmap_switched_data
-
- ldmia r3!, {r4, r5, r6, r7}
- cmp r4, r5 @ Copy data segment if needed
-1: cmpne r5, r6
- ldrne fp, [r4], #4
- strne fp, [r5], #4
- bne 1b
-
- mov fp, #0 @ Clear BSS (and zero fp)
-1: cmp r6, r7
- strcc fp, [r6],#4
- bcc 1b
-
- ARM( ldmia r3, {r4, r5, r6, r7, sp})
- THUMB( ldmia r3, {r4, r5, r6, r7} )
- THUMB( ldr sp, [r3, #16] )
- str r9, [r4] @ Save processor ID
- str r1, [r5] @ Save machine type
- str r2, [r6] @ Save atags pointer
- cmp r7, #0
- strne r0, [r7] @ Save control register values
+
+ mov r7, r1
+ mov r8, r2
+ mov r10, r0
+
+ adr r4, __mmap_switched_data
+ mov fp, #0
+
+#ifdef CONFIG_XIP_KERNEL
+ ARM( ldmia r4!, {r0, r1, r2, sp} )
+ THUMB( ldmia r4!, {r0, r1, r2, r3} )
+ THUMB( mov sp, r3 )
+ sub r2, r2, r1
+ bl memcpy @ copy .data to RAM
+#endif
+
+ ARM( ldmia r4!, {r0, r1, sp} )
+ THUMB( ldmia r4!, {r0, r1, r3} )
+ THUMB( mov sp, r3 )
+ sub r1, r1, r0
+ bl __memzero @ clear .bss
+
+ ldmia r4, {r0, r1, r2, r3}
+ str r9, [r0] @ Save processor ID
+ str r7, [r1] @ Save machine type
+ str r8, [r2] @ Save atags pointer
+ cmp r3, #0
+ strne r10, [r3] @ Save control register values
b start_kernel
ENDPROC(__mmap_switched)
.align 2
.type __mmap_switched_data, %object
__mmap_switched_data:
- .long __data_loc @ r4
- .long _sdata @ r5
- .long __bss_start @ r6
- .long _end @ r7
- .long processor_id @ r4
- .long __machine_arch_type @ r5
- .long __atags_pointer @ r6
+#ifdef CONFIG_XIP_KERNEL
+ .long _sdata @ r0
+ .long __data_loc @ r1
+ .long _edata_loc @ r2
+ .long __bss_stop @ sp (temporary stack in .bss)
+#endif
+
+ .long __bss_start @ r0
+ .long __bss_stop @ r1
+ .long init_thread_union + THREAD_START_SP @ sp
+
+ .long processor_id @ r0
+ .long __machine_arch_type @ r1
+ .long __atags_pointer @ r2
#ifdef CONFIG_CPU_CP15
- .long cr_alignment @ r7
+ .long cr_alignment @ r3
#else
- .long 0 @ r7
+ .long 0 @ r3
#endif
- .long init_thread_union + THREAD_START_SP @ sp
.size __mmap_switched_data, . - __mmap_switched_data
/*
diff --git a/arch/arm/kernel/vmlinux-xip.lds.S b/arch/arm/kernel/vmlinux-xip.lds.S
index 8265b11621..1598caada3 100644
--- a/arch/arm/kernel/vmlinux-xip.lds.S
+++ b/arch/arm/kernel/vmlinux-xip.lds.S
@@ -301,7 +301,7 @@ SECTIONS
}
#endif
- BSS_SECTION(0, 0, 0)
+ BSS_SECTION(0, 0, 8)
_end = .;
STABS_DEBUG
--
2.9.5
^ permalink raw reply related [flat|nested] 18+ messages in thread
* Re: [PATCH v2 1/5] ARM: head-common.S: speed up startup code
2017-08-30 2:55 ` [PATCH v2 1/5] ARM: head-common.S: speed up startup code Nicolas Pitre
@ 2017-10-03 12:41 ` Geert Uytterhoeven
0 siblings, 0 replies; 18+ messages in thread
From: Geert Uytterhoeven @ 2017-10-03 12:41 UTC (permalink / raw)
To: Nicolas Pitre
Cc: linux-arm-kernel, Chris Brandt, Russell King - ARM Linux,
Arnd Bergmann, Ard Biesheuvel, Linux-Renesas
Hi Nicolas,
On Wed, Aug 30, 2017 at 4:55 AM, Nicolas Pitre <nicolas.pitre@linaro.org> wrote:
> Let's use optimized routines such as memcpy to copy .data and memzero
> to clear .bss in the startup code instead of doing it one word at a
> time. Those routines don't use any global data so they're safe to use
> even if .data and .bss segments are not initialized.
>
> In the .data copy case a temporary stack is installed in the .bss area
> as the actual kernel stack is located within the copied data area. The
> XIP kernel linker script ensures a 8 byte alignment for that purpose.
>
> Finally, make the .data copy and related pointers surrounded by
> CONFIG_XIP_KERNEL to make it obvious what it is all about. This will
> allow for further cleanups in the non-XIP linker script.
>
> Signed-off-by: Nicolas Pitre <nico@linaro.org>
> Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
This is now commit 9520b1a1b5f7a348 ("ARM: head-common.S: speed up startup
code") in arm/for-next.
If CONFIG_DEBUG_LOCK_ALLOC=y, the kernel log is spammed with a few hundred
identical messages on various Renesas systems:
unwind: Unknown symbol address c0800300
unwind: Index not found c0800300
I've bisected this to the aforementioned commit.
c0800300 points to the instruction just after the __memzero call, cfr. below.
Do you have a clue? Thanks!
> --- a/arch/arm/kernel/head-common.S
> +++ b/arch/arm/kernel/head-common.S
> @@ -79,47 +79,59 @@ ENDPROC(__vet_atags)
> */
> __INIT
> __mmap_switched:
> - adr r3, __mmap_switched_data
> -
> - ldmia r3!, {r4, r5, r6, r7}
> - cmp r4, r5 @ Copy data segment if needed
> -1: cmpne r5, r6
> - ldrne fp, [r4], #4
> - strne fp, [r5], #4
> - bne 1b
> -
> - mov fp, #0 @ Clear BSS (and zero fp)
> -1: cmp r6, r7
> - strcc fp, [r6],#4
> - bcc 1b
> -
> - ARM( ldmia r3, {r4, r5, r6, r7, sp})
> - THUMB( ldmia r3, {r4, r5, r6, r7} )
> - THUMB( ldr sp, [r3, #16] )
> - str r9, [r4] @ Save processor ID
> - str r1, [r5] @ Save machine type
> - str r2, [r6] @ Save atags pointer
> - cmp r7, #0
> - strne r0, [r7] @ Save control register values
> +
> + mov r7, r1
> + mov r8, r2
> + mov r10, r0
> +
> + adr r4, __mmap_switched_data
> + mov fp, #0
> +
> +#ifdef CONFIG_XIP_KERNEL
> + ARM( ldmia r4!, {r0, r1, r2, sp} )
> + THUMB( ldmia r4!, {r0, r1, r2, r3} )
> + THUMB( mov sp, r3 )
> + sub r2, r2, r1
> + bl memcpy @ copy .data to RAM
> +#endif
> +
> + ARM( ldmia r4!, {r0, r1, sp} )
> + THUMB( ldmia r4!, {r0, r1, r3} )
> + THUMB( mov sp, r3 )
> + sub r1, r1, r0
> + bl __memzero @ clear .bss
> +
c0800300 is the address of the next instruction:
> + ldmia r4, {r0, r1, r2, r3}
> + str r9, [r0] @ Save processor ID
> + str r7, [r1] @ Save machine type
> + str r8, [r2] @ Save atags pointer
> + cmp r3, #0
> + strne r10, [r3] @ Save control register values
> b start_kernel
> ENDPROC(__mmap_switched)
>
> .align 2
> .type __mmap_switched_data, %object
> __mmap_switched_data:
> - .long __data_loc @ r4
> - .long _sdata @ r5
> - .long __bss_start @ r6
> - .long _end @ r7
> - .long processor_id @ r4
> - .long __machine_arch_type @ r5
> - .long __atags_pointer @ r6
> +#ifdef CONFIG_XIP_KERNEL
> + .long _sdata @ r0
> + .long __data_loc @ r1
> + .long _edata_loc @ r2
> + .long __bss_stop @ sp (temporary stack in .bss)
> +#endif
> +
> + .long __bss_start @ r0
> + .long __bss_stop @ r1
> + .long init_thread_union + THREAD_START_SP @ sp
> +
> + .long processor_id @ r0
> + .long __machine_arch_type @ r1
> + .long __atags_pointer @ r2
> #ifdef CONFIG_CPU_CP15
> - .long cr_alignment @ r7
> + .long cr_alignment @ r3
> #else
> - .long 0 @ r7
> + .long 0 @ r3
> #endif
> - .long init_thread_union + THREAD_START_SP @ sp
> .size __mmap_switched_data, . - __mmap_switched_data
>
Gr{oetje,eeting}s,
Geert
--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org
In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds
^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCH v2 1/5] ARM: head-common.S: speed up startup code
@ 2017-10-03 12:41 ` Geert Uytterhoeven
0 siblings, 0 replies; 18+ messages in thread
From: Geert Uytterhoeven @ 2017-10-03 12:41 UTC (permalink / raw)
To: linux-arm-kernel
Hi Nicolas,
On Wed, Aug 30, 2017 at 4:55 AM, Nicolas Pitre <nicolas.pitre@linaro.org> wrote:
> Let's use optimized routines such as memcpy to copy .data and memzero
> to clear .bss in the startup code instead of doing it one word at a
> time. Those routines don't use any global data so they're safe to use
> even if .data and .bss segments are not initialized.
>
> In the .data copy case a temporary stack is installed in the .bss area
> as the actual kernel stack is located within the copied data area. The
> XIP kernel linker script ensures a 8 byte alignment for that purpose.
>
> Finally, make the .data copy and related pointers surrounded by
> CONFIG_XIP_KERNEL to make it obvious what it is all about. This will
> allow for further cleanups in the non-XIP linker script.
>
> Signed-off-by: Nicolas Pitre <nico@linaro.org>
> Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
This is now commit 9520b1a1b5f7a348 ("ARM: head-common.S: speed up startup
code") in arm/for-next.
If CONFIG_DEBUG_LOCK_ALLOC=y, the kernel log is spammed with a few hundred
identical messages on various Renesas systems:
unwind: Unknown symbol address c0800300
unwind: Index not found c0800300
I've bisected this to the aforementioned commit.
c0800300 points to the instruction just after the __memzero call, cfr. below.
Do you have a clue? Thanks!
> --- a/arch/arm/kernel/head-common.S
> +++ b/arch/arm/kernel/head-common.S
> @@ -79,47 +79,59 @@ ENDPROC(__vet_atags)
> */
> __INIT
> __mmap_switched:
> - adr r3, __mmap_switched_data
> -
> - ldmia r3!, {r4, r5, r6, r7}
> - cmp r4, r5 @ Copy data segment if needed
> -1: cmpne r5, r6
> - ldrne fp, [r4], #4
> - strne fp, [r5], #4
> - bne 1b
> -
> - mov fp, #0 @ Clear BSS (and zero fp)
> -1: cmp r6, r7
> - strcc fp, [r6],#4
> - bcc 1b
> -
> - ARM( ldmia r3, {r4, r5, r6, r7, sp})
> - THUMB( ldmia r3, {r4, r5, r6, r7} )
> - THUMB( ldr sp, [r3, #16] )
> - str r9, [r4] @ Save processor ID
> - str r1, [r5] @ Save machine type
> - str r2, [r6] @ Save atags pointer
> - cmp r7, #0
> - strne r0, [r7] @ Save control register values
> +
> + mov r7, r1
> + mov r8, r2
> + mov r10, r0
> +
> + adr r4, __mmap_switched_data
> + mov fp, #0
> +
> +#ifdef CONFIG_XIP_KERNEL
> + ARM( ldmia r4!, {r0, r1, r2, sp} )
> + THUMB( ldmia r4!, {r0, r1, r2, r3} )
> + THUMB( mov sp, r3 )
> + sub r2, r2, r1
> + bl memcpy @ copy .data to RAM
> +#endif
> +
> + ARM( ldmia r4!, {r0, r1, sp} )
> + THUMB( ldmia r4!, {r0, r1, r3} )
> + THUMB( mov sp, r3 )
> + sub r1, r1, r0
> + bl __memzero @ clear .bss
> +
c0800300 is the address of the next instruction:
> + ldmia r4, {r0, r1, r2, r3}
> + str r9, [r0] @ Save processor ID
> + str r7, [r1] @ Save machine type
> + str r8, [r2] @ Save atags pointer
> + cmp r3, #0
> + strne r10, [r3] @ Save control register values
> b start_kernel
> ENDPROC(__mmap_switched)
>
> .align 2
> .type __mmap_switched_data, %object
> __mmap_switched_data:
> - .long __data_loc @ r4
> - .long _sdata @ r5
> - .long __bss_start @ r6
> - .long _end @ r7
> - .long processor_id @ r4
> - .long __machine_arch_type @ r5
> - .long __atags_pointer @ r6
> +#ifdef CONFIG_XIP_KERNEL
> + .long _sdata @ r0
> + .long __data_loc @ r1
> + .long _edata_loc @ r2
> + .long __bss_stop @ sp (temporary stack in .bss)
> +#endif
> +
> + .long __bss_start @ r0
> + .long __bss_stop @ r1
> + .long init_thread_union + THREAD_START_SP @ sp
> +
> + .long processor_id @ r0
> + .long __machine_arch_type @ r1
> + .long __atags_pointer @ r2
> #ifdef CONFIG_CPU_CP15
> - .long cr_alignment @ r7
> + .long cr_alignment @ r3
> #else
> - .long 0 @ r7
> + .long 0 @ r3
> #endif
> - .long init_thread_union + THREAD_START_SP @ sp
> .size __mmap_switched_data, . - __mmap_switched_data
>
Gr{oetje,eeting}s,
Geert
--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert at linux-m68k.org
In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH v2 1/5] ARM: head-common.S: speed up startup code
2017-10-03 12:41 ` Geert Uytterhoeven
@ 2017-10-03 13:15 ` Ard Biesheuvel
-1 siblings, 0 replies; 18+ messages in thread
From: Ard Biesheuvel @ 2017-10-03 13:15 UTC (permalink / raw)
To: Geert Uytterhoeven
Cc: Nicolas Pitre, linux-arm-kernel, Chris Brandt,
Russell King - ARM Linux, Arnd Bergmann, Linux-Renesas
On 3 October 2017 at 13:41, Geert Uytterhoeven <geert@linux-m68k.org> wrote:
> Hi Nicolas,
>
> On Wed, Aug 30, 2017 at 4:55 AM, Nicolas Pitre <nicolas.pitre@linaro.org> wrote:
>> Let's use optimized routines such as memcpy to copy .data and memzero
>> to clear .bss in the startup code instead of doing it one word at a
>> time. Those routines don't use any global data so they're safe to use
>> even if .data and .bss segments are not initialized.
>>
>> In the .data copy case a temporary stack is installed in the .bss area
>> as the actual kernel stack is located within the copied data area. The
>> XIP kernel linker script ensures a 8 byte alignment for that purpose.
>>
>> Finally, make the .data copy and related pointers surrounded by
>> CONFIG_XIP_KERNEL to make it obvious what it is all about. This will
>> allow for further cleanups in the non-XIP linker script.
>>
>> Signed-off-by: Nicolas Pitre <nico@linaro.org>
>> Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
>
> This is now commit 9520b1a1b5f7a348 ("ARM: head-common.S: speed up startup
> code") in arm/for-next.
>
> If CONFIG_DEBUG_LOCK_ALLOC=y, the kernel log is spammed with a few hundred
> identical messages on various Renesas systems:
>
> unwind: Unknown symbol address c0800300
> unwind: Index not found c0800300
>
> I've bisected this to the aforementioned commit.
>
> c0800300 points to the instruction just after the __memzero call, cfr. below.
>
> Do you have a clue? Thanks!
>
Hallo Geert,
It looks like this patch results in start_kernel() being entered with
a different value for lr than before. Could you please try setting it
to zero instead, right before the jump to start_kernel() ?
I don't think the patch itself is to blame here, it simply triggers an
existing issue in the unwinder (if my analysis is correct, of course)
--
Ard.
^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCH v2 1/5] ARM: head-common.S: speed up startup code
@ 2017-10-03 13:15 ` Ard Biesheuvel
0 siblings, 0 replies; 18+ messages in thread
From: Ard Biesheuvel @ 2017-10-03 13:15 UTC (permalink / raw)
To: linux-arm-kernel
On 3 October 2017 at 13:41, Geert Uytterhoeven <geert@linux-m68k.org> wrote:
> Hi Nicolas,
>
> On Wed, Aug 30, 2017 at 4:55 AM, Nicolas Pitre <nicolas.pitre@linaro.org> wrote:
>> Let's use optimized routines such as memcpy to copy .data and memzero
>> to clear .bss in the startup code instead of doing it one word at a
>> time. Those routines don't use any global data so they're safe to use
>> even if .data and .bss segments are not initialized.
>>
>> In the .data copy case a temporary stack is installed in the .bss area
>> as the actual kernel stack is located within the copied data area. The
>> XIP kernel linker script ensures a 8 byte alignment for that purpose.
>>
>> Finally, make the .data copy and related pointers surrounded by
>> CONFIG_XIP_KERNEL to make it obvious what it is all about. This will
>> allow for further cleanups in the non-XIP linker script.
>>
>> Signed-off-by: Nicolas Pitre <nico@linaro.org>
>> Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
>
> This is now commit 9520b1a1b5f7a348 ("ARM: head-common.S: speed up startup
> code") in arm/for-next.
>
> If CONFIG_DEBUG_LOCK_ALLOC=y, the kernel log is spammed with a few hundred
> identical messages on various Renesas systems:
>
> unwind: Unknown symbol address c0800300
> unwind: Index not found c0800300
>
> I've bisected this to the aforementioned commit.
>
> c0800300 points to the instruction just after the __memzero call, cfr. below.
>
> Do you have a clue? Thanks!
>
Hallo Geert,
It looks like this patch results in start_kernel() being entered with
a different value for lr than before. Could you please try setting it
to zero instead, right before the jump to start_kernel() ?
I don't think the patch itself is to blame here, it simply triggers an
existing issue in the unwinder (if my analysis is correct, of course)
--
Ard.
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH v2 1/5] ARM: head-common.S: speed up startup code
2017-10-03 13:15 ` Ard Biesheuvel
@ 2017-10-03 15:24 ` Geert Uytterhoeven
-1 siblings, 0 replies; 18+ messages in thread
From: Geert Uytterhoeven @ 2017-10-03 15:24 UTC (permalink / raw)
To: Ard Biesheuvel
Cc: Nicolas Pitre, linux-arm-kernel, Chris Brandt,
Russell King - ARM Linux, Arnd Bergmann, Linux-Renesas
Hoi Ard,
On Tue, Oct 3, 2017 at 3:15 PM, Ard Biesheuvel
<ard.biesheuvel@linaro.org> wrote:
> On 3 October 2017 at 13:41, Geert Uytterhoeven <geert@linux-m68k.org> wrote:
>> On Wed, Aug 30, 2017 at 4:55 AM, Nicolas Pitre <nicolas.pitre@linaro.org> wrote:
>>> Let's use optimized routines such as memcpy to copy .data and memzero
>>> to clear .bss in the startup code instead of doing it one word at a
>>> time. Those routines don't use any global data so they're safe to use
>>> even if .data and .bss segments are not initialized.
>>>
>>> In the .data copy case a temporary stack is installed in the .bss area
>>> as the actual kernel stack is located within the copied data area. The
>>> XIP kernel linker script ensures a 8 byte alignment for that purpose.
>>>
>>> Finally, make the .data copy and related pointers surrounded by
>>> CONFIG_XIP_KERNEL to make it obvious what it is all about. This will
>>> allow for further cleanups in the non-XIP linker script.
>>>
>>> Signed-off-by: Nicolas Pitre <nico@linaro.org>
>>> Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
>>
>> This is now commit 9520b1a1b5f7a348 ("ARM: head-common.S: speed up startup
>> code") in arm/for-next.
>>
>> If CONFIG_DEBUG_LOCK_ALLOC=y, the kernel log is spammed with a few hundred
>> identical messages on various Renesas systems:
>>
>> unwind: Unknown symbol address c0800300
>> unwind: Index not found c0800300
>>
>> I've bisected this to the aforementioned commit.
>>
>> c0800300 points to the instruction just after the __memzero call, cfr. below.
>>
>> Do you have a clue? Thanks!
>
> It looks like this patch results in start_kernel() being entered with
> a different value for lr than before. Could you please try setting it
> to zero instead, right before the jump to start_kernel() ?
>
> I don't think the patch itself is to blame here, it simply triggers an
> existing issue in the unwinder (if my analysis is correct, of course)
Your analysis looks correct to me, thanks! Patch sent.
Gr{oetje,eeting}s,
Geert
--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org
In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds
^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCH v2 1/5] ARM: head-common.S: speed up startup code
@ 2017-10-03 15:24 ` Geert Uytterhoeven
0 siblings, 0 replies; 18+ messages in thread
From: Geert Uytterhoeven @ 2017-10-03 15:24 UTC (permalink / raw)
To: linux-arm-kernel
Hoi Ard,
On Tue, Oct 3, 2017 at 3:15 PM, Ard Biesheuvel
<ard.biesheuvel@linaro.org> wrote:
> On 3 October 2017 at 13:41, Geert Uytterhoeven <geert@linux-m68k.org> wrote:
>> On Wed, Aug 30, 2017 at 4:55 AM, Nicolas Pitre <nicolas.pitre@linaro.org> wrote:
>>> Let's use optimized routines such as memcpy to copy .data and memzero
>>> to clear .bss in the startup code instead of doing it one word at a
>>> time. Those routines don't use any global data so they're safe to use
>>> even if .data and .bss segments are not initialized.
>>>
>>> In the .data copy case a temporary stack is installed in the .bss area
>>> as the actual kernel stack is located within the copied data area. The
>>> XIP kernel linker script ensures a 8 byte alignment for that purpose.
>>>
>>> Finally, make the .data copy and related pointers surrounded by
>>> CONFIG_XIP_KERNEL to make it obvious what it is all about. This will
>>> allow for further cleanups in the non-XIP linker script.
>>>
>>> Signed-off-by: Nicolas Pitre <nico@linaro.org>
>>> Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
>>
>> This is now commit 9520b1a1b5f7a348 ("ARM: head-common.S: speed up startup
>> code") in arm/for-next.
>>
>> If CONFIG_DEBUG_LOCK_ALLOC=y, the kernel log is spammed with a few hundred
>> identical messages on various Renesas systems:
>>
>> unwind: Unknown symbol address c0800300
>> unwind: Index not found c0800300
>>
>> I've bisected this to the aforementioned commit.
>>
>> c0800300 points to the instruction just after the __memzero call, cfr. below.
>>
>> Do you have a clue? Thanks!
>
> It looks like this patch results in start_kernel() being entered with
> a different value for lr than before. Could you please try setting it
> to zero instead, right before the jump to start_kernel() ?
>
> I don't think the patch itself is to blame here, it simply triggers an
> existing issue in the unwinder (if my analysis is correct, of course)
Your analysis looks correct to me, thanks! Patch sent.
Gr{oetje,eeting}s,
Geert
--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert at linux-m68k.org
In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds
^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCH v2 2/5] ARM: vmlinux*.lds.S: some decruftification
2017-08-30 2:55 [PATCH v2 0/5] make XIP kernel .data compressed in ROM Nicolas Pitre
2017-08-30 2:55 ` [PATCH v2 1/5] ARM: head-common.S: speed up startup code Nicolas Pitre
@ 2017-08-30 2:55 ` Nicolas Pitre
2017-08-30 2:55 ` [PATCH v2 3/5] ARM: vmlinux.lds.S: replace open coded .data sections with generic macros Nicolas Pitre
` (3 subsequent siblings)
5 siblings, 0 replies; 18+ messages in thread
From: Nicolas Pitre @ 2017-08-30 2:55 UTC (permalink / raw)
To: linux-arm-kernel
Remove stuff from vmlinux.lds.S that is relevant only to the XIP build,
and stuff from vmlinux-xip.lds.S related to self-modifying code that
makes no sense in the XIP case.
Signed-off-by: Nicolas Pitre <nico@linaro.org>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
arch/arm/kernel/vmlinux-xip.lds.S | 14 --------------
arch/arm/kernel/vmlinux.lds.S | 4 +---
2 files changed, 1 insertion(+), 17 deletions(-)
diff --git a/arch/arm/kernel/vmlinux-xip.lds.S b/arch/arm/kernel/vmlinux-xip.lds.S
index 1598caada3..88e8db3979 100644
--- a/arch/arm/kernel/vmlinux-xip.lds.S
+++ b/arch/arm/kernel/vmlinux-xip.lds.S
@@ -77,9 +77,7 @@ SECTIONS
*(.text.fixup)
*(__ex_table)
#endif
-#ifndef CONFIG_SMP_ON_UP
*(.alt.smp.init)
-#endif
*(.discard)
*(.discard.*)
}
@@ -181,18 +179,6 @@ SECTIONS
*(.taglist.init)
__tagtable_end = .;
}
-#ifdef CONFIG_SMP_ON_UP
- .init.smpalt : {
- __smpalt_begin = .;
- *(.alt.smp.init)
- __smpalt_end = .;
- }
-#endif
- .init.pv_table : {
- __pv_table_begin = .;
- *(.pv_table)
- __pv_table_end = .;
- }
.init.data : {
INIT_SETUP(16)
INIT_CALLS
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index c83a7ba737..4f86b4b7bd 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -236,9 +236,8 @@ SECTIONS
. = ALIGN(THREAD_SIZE);
#endif
__init_end = .;
- __data_loc = .;
- .data : AT(__data_loc) {
+ .data : {
_data = .; /* address in memory */
_sdata = .;
@@ -260,7 +259,6 @@ SECTIONS
_edata = .;
}
- _edata_loc = __data_loc + SIZEOF(.data);
BUG_TABLE
--
2.9.5
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH v2 3/5] ARM: vmlinux.lds.S: replace open coded .data sections with generic macros
2017-08-30 2:55 [PATCH v2 0/5] make XIP kernel .data compressed in ROM Nicolas Pitre
2017-08-30 2:55 ` [PATCH v2 1/5] ARM: head-common.S: speed up startup code Nicolas Pitre
2017-08-30 2:55 ` [PATCH v2 2/5] ARM: vmlinux*.lds.S: some decruftification Nicolas Pitre
@ 2017-08-30 2:55 ` Nicolas Pitre
2017-08-30 15:11 ` Ard Biesheuvel
2017-08-30 2:55 ` [PATCH v2 4/5] ARM: vmlinux-xip.lds.S: fix multiple issues Nicolas Pitre
` (2 subsequent siblings)
5 siblings, 1 reply; 18+ messages in thread
From: Nicolas Pitre @ 2017-08-30 2:55 UTC (permalink / raw)
To: linux-arm-kernel
Our .data section is missing PAGE_ALIGNED_DATA() which contains,
amongst other things, the vdso page. This creates a System.map that
looks like this:
c15769a8 D _edata
c1577000 d vdso_data_store
c1578000 D __start___bug_table
c1580544 D __stop___bug_table
c1580544 B __bss_start
By using RW_DATA_SECTION() we pick whatever generic sections might be
added in the future and have page-aligned data next to other strongly
aligned data. Furthermore we now include the entire thing, including the
bug table, in the data accounting surrounded by _sdata/_edata.
While at it let's also remplace the open coded .init.data by its
equivalent INIT_DATA_SECTION().
Signed-off-by: Nicolas Pitre <nico@linaro.org>
---
arch/arm/kernel/vmlinux.lds.S | 38 ++++++--------------------------------
1 file changed, 6 insertions(+), 32 deletions(-)
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index 4f86b4b7bd..f73ba564b5 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -214,14 +214,9 @@ SECTIONS
*(.pv_table)
__pv_table_end = .;
}
- .init.data : {
- INIT_DATA
- INIT_SETUP(16)
- INIT_CALLS
- CON_INITCALL
- SECURITY_INITCALL
- INIT_RAM_FS
- }
+
+ INIT_DATA_SECTION(16)
+
.exit.data : {
ARM_EXIT_KEEP(EXIT_DATA)
}
@@ -237,30 +232,9 @@ SECTIONS
#endif
__init_end = .;
- .data : {
- _data = .; /* address in memory */
- _sdata = .;
-
- /*
- * first, the init task union, aligned
- * to an 8192 byte boundary.
- */
- INIT_TASK_DATA(THREAD_SIZE)
-
- NOSAVE_DATA
- CACHELINE_ALIGNED_DATA(L1_CACHE_BYTES)
- READ_MOSTLY_DATA(L1_CACHE_BYTES)
-
- /*
- * and the usual data section
- */
- DATA_DATA
- CONSTRUCTORS
-
- _edata = .;
- }
-
- BUG_TABLE
+ _sdata = .;
+ RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
+ _edata = .;
#ifdef CONFIG_HAVE_TCM
/*
--
2.9.5
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH v2 3/5] ARM: vmlinux.lds.S: replace open coded .data sections with generic macros
2017-08-30 2:55 ` [PATCH v2 3/5] ARM: vmlinux.lds.S: replace open coded .data sections with generic macros Nicolas Pitre
@ 2017-08-30 15:11 ` Ard Biesheuvel
0 siblings, 0 replies; 18+ messages in thread
From: Ard Biesheuvel @ 2017-08-30 15:11 UTC (permalink / raw)
To: linux-arm-kernel
On 30 August 2017 at 03:55, Nicolas Pitre <nicolas.pitre@linaro.org> wrote:
> Our .data section is missing PAGE_ALIGNED_DATA() which contains,
> amongst other things, the vdso page. This creates a System.map that
> looks like this:
>
> c15769a8 D _edata
> c1577000 d vdso_data_store
> c1578000 D __start___bug_table
> c1580544 D __stop___bug_table
> c1580544 B __bss_start
>
> By using RW_DATA_SECTION() we pick whatever generic sections might be
> added in the future and have page-aligned data next to other strongly
> aligned data. Furthermore we now include the entire thing, including the
> bug table, in the data accounting surrounded by _sdata/_edata.
>
> While at it let's also remplace the open coded .init.data by its
> equivalent INIT_DATA_SECTION().
>
> Signed-off-by: Nicolas Pitre <nico@linaro.org>
Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
> ---
> arch/arm/kernel/vmlinux.lds.S | 38 ++++++--------------------------------
> 1 file changed, 6 insertions(+), 32 deletions(-)
>
> diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
> index 4f86b4b7bd..f73ba564b5 100644
> --- a/arch/arm/kernel/vmlinux.lds.S
> +++ b/arch/arm/kernel/vmlinux.lds.S
> @@ -214,14 +214,9 @@ SECTIONS
> *(.pv_table)
> __pv_table_end = .;
> }
> - .init.data : {
> - INIT_DATA
> - INIT_SETUP(16)
> - INIT_CALLS
> - CON_INITCALL
> - SECURITY_INITCALL
> - INIT_RAM_FS
> - }
> +
> + INIT_DATA_SECTION(16)
> +
> .exit.data : {
> ARM_EXIT_KEEP(EXIT_DATA)
> }
> @@ -237,30 +232,9 @@ SECTIONS
> #endif
> __init_end = .;
>
> - .data : {
> - _data = .; /* address in memory */
> - _sdata = .;
> -
> - /*
> - * first, the init task union, aligned
> - * to an 8192 byte boundary.
> - */
> - INIT_TASK_DATA(THREAD_SIZE)
> -
> - NOSAVE_DATA
> - CACHELINE_ALIGNED_DATA(L1_CACHE_BYTES)
> - READ_MOSTLY_DATA(L1_CACHE_BYTES)
> -
> - /*
> - * and the usual data section
> - */
> - DATA_DATA
> - CONSTRUCTORS
> -
> - _edata = .;
> - }
> -
> - BUG_TABLE
> + _sdata = .;
> + RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
> + _edata = .;
>
> #ifdef CONFIG_HAVE_TCM
> /*
> --
> 2.9.5
>
^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCH v2 4/5] ARM: vmlinux-xip.lds.S: fix multiple issues
2017-08-30 2:55 [PATCH v2 0/5] make XIP kernel .data compressed in ROM Nicolas Pitre
` (2 preceding siblings ...)
2017-08-30 2:55 ` [PATCH v2 3/5] ARM: vmlinux.lds.S: replace open coded .data sections with generic macros Nicolas Pitre
@ 2017-08-30 2:55 ` Nicolas Pitre
2017-08-30 15:12 ` Ard Biesheuvel
2017-08-30 2:55 ` [PATCH v2 5/5] ARM: XIP kernel: store .data compressed in ROM Nicolas Pitre
2017-08-30 21:58 ` [PATCH v2 0/5] make XIP kernel " Chris Brandt
5 siblings, 1 reply; 18+ messages in thread
From: Nicolas Pitre @ 2017-08-30 2:55 UTC (permalink / raw)
To: linux-arm-kernel
The XIP linker script has several problems:
- PAGE_ALIGNED_DATA is missing and is likely to end up somewhere with
the wrong LMA.
- BUG_TABLE definitely has the wrong LMA, it is not copied to RAM, and
its VMA is unaccounted for and likely to clash with dynamic memory
usage.
- TCM usage is similarly broken.
- PERCPU_SECTION is left in ROM despite being written to.
Let's use generic macros for those things and locate them appropriately.
Incidentally, those macros are usable with a LMA != VMA already by
properly defining LOAD_OFFSET.
TCM is not fixed here. It never worked in a XIP configuration anyway, so
that can wait until another round of cleanups.
Signed-off-by: Nicolas Pitre <nico@linaro.org>
---
arch/arm/kernel/vmlinux-xip.lds.S | 70 +++++++++++++++++++--------------------
1 file changed, 34 insertions(+), 36 deletions(-)
diff --git a/arch/arm/kernel/vmlinux-xip.lds.S b/arch/arm/kernel/vmlinux-xip.lds.S
index 88e8db3979..39b1fb470a 100644
--- a/arch/arm/kernel/vmlinux-xip.lds.S
+++ b/arch/arm/kernel/vmlinux-xip.lds.S
@@ -179,7 +179,7 @@ SECTIONS
*(.taglist.init)
__tagtable_end = .;
}
- .init.data : {
+ .init.rodata : {
INIT_SETUP(16)
INIT_CALLS
CON_INITCALL
@@ -187,48 +187,46 @@ SECTIONS
INIT_RAM_FS
}
-#ifdef CONFIG_SMP
- PERCPU_SECTION(L1_CACHE_BYTES)
-#endif
-
_exiprom = .; /* End of XIP ROM area */
- __data_loc = ALIGN(4); /* location in binary */
- . = PAGE_OFFSET + TEXT_OFFSET;
-
- .data : AT(__data_loc) {
- _data = .; /* address in memory */
- _sdata = .;
- /*
- * first, the init task union, aligned
- * to an 8192 byte boundary.
- */
- INIT_TASK_DATA(THREAD_SIZE)
+/*
+ * From this point, stuff is considered writable and will be copied to RAM
+ */
+ __data_loc = ALIGN(4); /* location in file */
+ . = PAGE_OFFSET + TEXT_OFFSET; /* location in memory */
+#undef LOAD_OFFSET
+#define LOAD_OFFSET (PAGE_OFFSET + TEXT_OFFSET - __data_loc)
+
+ . = ALIGN(THREAD_SIZE);
+ _sdata = .;
+ RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
+ .data.ro_after_init : AT(ADDR(.data.ro_after_init) - LOAD_OFFSET) {
+ *(.data..ro_after_init)
+ }
+ _edata = .;
- . = ALIGN(PAGE_SIZE);
- __init_begin = .;
+ . = ALIGN(PAGE_SIZE);
+ __init_begin = .;
+ .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) {
INIT_DATA
+ }
+ .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) {
ARM_EXIT_KEEP(EXIT_DATA)
- . = ALIGN(PAGE_SIZE);
- __init_end = .;
-
- *(.data..ro_after_init)
-
- NOSAVE_DATA
- CACHELINE_ALIGNED_DATA(L1_CACHE_BYTES)
- READ_MOSTLY_DATA(L1_CACHE_BYTES)
-
- /*
- * and the usual data section
- */
- DATA_DATA
- CONSTRUCTORS
-
- _edata = .;
}
- _edata_loc = __data_loc + SIZEOF(.data);
+#ifdef CONFIG_SMP
+ PERCPU_SECTION(L1_CACHE_BYTES)
+#endif
+
+ /*
+ * End of copied data. We need a dummy section to get its LMA.
+ * Also located before final ALIGN() as trailing padding is not stored
+ * in the resulting binary file and useless to copy.
+ */
+ .data.endmark : AT(ADDR(.data.endmark) - LOAD_OFFSET) { }
+ _edata_loc = LOADADDR(.data.endmark);
- BUG_TABLE
+ . = ALIGN(PAGE_SIZE);
+ __init_end = .;
#ifdef CONFIG_HAVE_TCM
/*
--
2.9.5
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH v2 4/5] ARM: vmlinux-xip.lds.S: fix multiple issues
2017-08-30 2:55 ` [PATCH v2 4/5] ARM: vmlinux-xip.lds.S: fix multiple issues Nicolas Pitre
@ 2017-08-30 15:12 ` Ard Biesheuvel
0 siblings, 0 replies; 18+ messages in thread
From: Ard Biesheuvel @ 2017-08-30 15:12 UTC (permalink / raw)
To: linux-arm-kernel
On 30 August 2017 at 03:55, Nicolas Pitre <nicolas.pitre@linaro.org> wrote:
> The XIP linker script has several problems:
>
> - PAGE_ALIGNED_DATA is missing and is likely to end up somewhere with
> the wrong LMA.
>
> - BUG_TABLE definitely has the wrong LMA, it is not copied to RAM, and
> its VMA is unaccounted for and likely to clash with dynamic memory
> usage.
>
> - TCM usage is similarly broken.
>
> - PERCPU_SECTION is left in ROM despite being written to.
>
> Let's use generic macros for those things and locate them appropriately.
> Incidentally, those macros are usable with a LMA != VMA already by
> properly defining LOAD_OFFSET.
>
> TCM is not fixed here. It never worked in a XIP configuration anyway, so
> that can wait until another round of cleanups.
>
> Signed-off-by: Nicolas Pitre <nico@linaro.org>
Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
> ---
> arch/arm/kernel/vmlinux-xip.lds.S | 70 +++++++++++++++++++--------------------
> 1 file changed, 34 insertions(+), 36 deletions(-)
>
> diff --git a/arch/arm/kernel/vmlinux-xip.lds.S b/arch/arm/kernel/vmlinux-xip.lds.S
> index 88e8db3979..39b1fb470a 100644
> --- a/arch/arm/kernel/vmlinux-xip.lds.S
> +++ b/arch/arm/kernel/vmlinux-xip.lds.S
> @@ -179,7 +179,7 @@ SECTIONS
> *(.taglist.init)
> __tagtable_end = .;
> }
> - .init.data : {
> + .init.rodata : {
> INIT_SETUP(16)
> INIT_CALLS
> CON_INITCALL
> @@ -187,48 +187,46 @@ SECTIONS
> INIT_RAM_FS
> }
>
> -#ifdef CONFIG_SMP
> - PERCPU_SECTION(L1_CACHE_BYTES)
> -#endif
> -
> _exiprom = .; /* End of XIP ROM area */
> - __data_loc = ALIGN(4); /* location in binary */
> - . = PAGE_OFFSET + TEXT_OFFSET;
> -
> - .data : AT(__data_loc) {
> - _data = .; /* address in memory */
> - _sdata = .;
>
> - /*
> - * first, the init task union, aligned
> - * to an 8192 byte boundary.
> - */
> - INIT_TASK_DATA(THREAD_SIZE)
> +/*
> + * From this point, stuff is considered writable and will be copied to RAM
> + */
> + __data_loc = ALIGN(4); /* location in file */
> + . = PAGE_OFFSET + TEXT_OFFSET; /* location in memory */
> +#undef LOAD_OFFSET
> +#define LOAD_OFFSET (PAGE_OFFSET + TEXT_OFFSET - __data_loc)
> +
> + . = ALIGN(THREAD_SIZE);
> + _sdata = .;
> + RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
> + .data.ro_after_init : AT(ADDR(.data.ro_after_init) - LOAD_OFFSET) {
> + *(.data..ro_after_init)
> + }
> + _edata = .;
>
> - . = ALIGN(PAGE_SIZE);
> - __init_begin = .;
> + . = ALIGN(PAGE_SIZE);
> + __init_begin = .;
> + .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) {
> INIT_DATA
> + }
> + .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) {
> ARM_EXIT_KEEP(EXIT_DATA)
> - . = ALIGN(PAGE_SIZE);
> - __init_end = .;
> -
> - *(.data..ro_after_init)
> -
> - NOSAVE_DATA
> - CACHELINE_ALIGNED_DATA(L1_CACHE_BYTES)
> - READ_MOSTLY_DATA(L1_CACHE_BYTES)
> -
> - /*
> - * and the usual data section
> - */
> - DATA_DATA
> - CONSTRUCTORS
> -
> - _edata = .;
> }
> - _edata_loc = __data_loc + SIZEOF(.data);
> +#ifdef CONFIG_SMP
> + PERCPU_SECTION(L1_CACHE_BYTES)
> +#endif
> +
> + /*
> + * End of copied data. We need a dummy section to get its LMA.
> + * Also located before final ALIGN() as trailing padding is not stored
> + * in the resulting binary file and useless to copy.
> + */
> + .data.endmark : AT(ADDR(.data.endmark) - LOAD_OFFSET) { }
> + _edata_loc = LOADADDR(.data.endmark);
>
> - BUG_TABLE
> + . = ALIGN(PAGE_SIZE);
> + __init_end = .;
>
> #ifdef CONFIG_HAVE_TCM
> /*
> --
> 2.9.5
>
^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCH v2 5/5] ARM: XIP kernel: store .data compressed in ROM
2017-08-30 2:55 [PATCH v2 0/5] make XIP kernel .data compressed in ROM Nicolas Pitre
` (3 preceding siblings ...)
2017-08-30 2:55 ` [PATCH v2 4/5] ARM: vmlinux-xip.lds.S: fix multiple issues Nicolas Pitre
@ 2017-08-30 2:55 ` Nicolas Pitre
2017-08-30 21:58 ` [PATCH v2 0/5] make XIP kernel " Chris Brandt
5 siblings, 0 replies; 18+ messages in thread
From: Nicolas Pitre @ 2017-08-30 2:55 UTC (permalink / raw)
To: linux-arm-kernel
The .data segment stored in ROM is only copied to RAM once at boot time
and never referenced afterwards. This is arguably a suboptimal usage of
ROM resources.
This patch allows for compressing the .data segment before storing it
into ROM and decompressing it to RAM rather than simply copying it,
saving on precious ROM space.
Because global data is not available yet (obviously) we must allocate
decompressor workspace memory on the stack. The .bss area is used as a
stack area for that purpose before it is cleared. The required stack
frame is 9568 bytes for __inflate_kernel_data() alone, so make sure
the .bss is large enough to cope with that plus extra room for called
functions or fail the build.
Those numbers were picked arbitrarily based on the above 9568 byte
stack frame:
10240 (2.5 * PAGE_SIZE): used to override -Wframe-larger-than whose
default value is 1024.
12288 (3 * PAGE_SIZE): minimum .bss size to contain the stack.
Signed-off-by: Nicolas Pitre <nico@linaro.org>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
arch/arm/Kconfig | 11 +++++++
arch/arm/boot/Makefile | 13 +++++++-
arch/arm/boot/deflate_xip_data.sh | 64 +++++++++++++++++++++++++++++++++++++
arch/arm/kernel/Makefile | 5 +++
arch/arm/kernel/head-common.S | 11 ++++++-
arch/arm/kernel/head-inflate-data.c | 62 +++++++++++++++++++++++++++++++++++
arch/arm/kernel/vmlinux-xip.lds.S | 8 +++++
7 files changed, 172 insertions(+), 2 deletions(-)
create mode 100755 arch/arm/boot/deflate_xip_data.sh
create mode 100644 arch/arm/kernel/head-inflate-data.c
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 61a0cb1506..bf79c461bd 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -2005,6 +2005,17 @@ config XIP_PHYS_ADDR
be linked for and stored to. This address is dependent on your
own flash usage.
+config XIP_DEFLATED_DATA
+ bool "Store kernel .data section compressed in ROM"
+ depends on XIP_KERNEL
+ select ZLIB_INFLATE
+ help
+ Before the kernel is actually executed, its .data section has to be
+ copied to RAM from ROM. This option allows for storing that data
+ in compressed form and decompressed to RAM rather than merely being
+ copied, saving some precious ROM space. A possible drawback is a
+ slightly longer boot delay.
+
config KEXEC
bool "Kexec system call (EXPERIMENTAL)"
depends on (!SMP || PM_SLEEP_SMP)
diff --git a/arch/arm/boot/Makefile b/arch/arm/boot/Makefile
index 50f8d1be7f..a3af4dc08c 100644
--- a/arch/arm/boot/Makefile
+++ b/arch/arm/boot/Makefile
@@ -31,8 +31,19 @@ targets := Image zImage xipImage bootpImage uImage
ifeq ($(CONFIG_XIP_KERNEL),y)
+cmd_deflate_xip_data = $(CONFIG_SHELL) -c \
+ '$(srctree)/$(src)/deflate_xip_data.sh $< $@ || { rm -f $@; false; }'
+
+ifeq ($(CONFIG_XIP_DEFLATED_DATA),y)
+quiet_cmd_mkxip = XIPZ $@
+cmd_mkxip = $(cmd_objcopy) && $(cmd_deflate_xip_data)
+else
+quiet_cmd_mkxip = $(quiet_cmd_objcopy)
+cmd_mkxip = $(cmd_objcopy)
+endif
+
$(obj)/xipImage: vmlinux FORCE
- $(call if_changed,objcopy)
+ $(call if_changed,mkxip)
@$(kecho) ' Physical Address of xipImage: $(CONFIG_XIP_PHYS_ADDR)'
$(obj)/Image $(obj)/zImage: FORCE
diff --git a/arch/arm/boot/deflate_xip_data.sh b/arch/arm/boot/deflate_xip_data.sh
new file mode 100755
index 0000000000..1189598a25
--- /dev/null
+++ b/arch/arm/boot/deflate_xip_data.sh
@@ -0,0 +1,64 @@
+#!/bin/sh
+
+# XIP kernel .data segment compressor
+#
+# Created by: Nicolas Pitre, August 2017
+# Copyright: (C) 2017 Linaro Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+# This script locates the start of the .data section in xipImage and
+# substitutes it with a compressed version. The needed offsets are obtained
+# from symbol addresses in vmlinux. It is expected that .data extends to
+# the end of xipImage.
+
+set -e
+
+VMLINUX="$1"
+XIPIMAGE="$2"
+
+DD="dd status=none"
+
+# Use "make V=1" to debug this script.
+case "$KBUILD_VERBOSE" in
+*1*)
+ set -x
+ ;;
+esac
+
+sym_val() {
+ # extract hex value for symbol in $1
+ local val=$($NM "$VMLINUX" | sed -n "/ $1$/{s/ .*$//p;q}")
+ [ "$val" ] || { echo "can't find $1 in $VMLINUX" 1>&2; exit 1; }
+ # convert from hex to decimal
+ echo $((0x$val))
+}
+
+__data_loc=$(sym_val __data_loc)
+_edata_loc=$(sym_val _edata_loc)
+base_offset=$(sym_val _xiprom)
+
+# convert to file based offsets
+data_start=$(($__data_loc - $base_offset))
+data_end=$(($_edata_loc - $base_offset))
+
+# Make sure data occupies the last part of the file.
+file_end=$(stat -c "%s" "$XIPIMAGE")
+if [ "$file_end" != "$data_end" ]; then
+ printf "end of xipImage doesn't match with _edata_loc (%#x vs %#x)\n" \
+ $(($file_end + $base_offset)) $_edata_loc 2>&1
+ exit 1;
+fi
+
+# be ready to clean up
+trap 'rm -f "$XIPIMAGE.tmp"' 0 1 2 3
+
+# substitute the data section by a compressed version
+$DD if="$XIPIMAGE" count=$data_start iflag=count_bytes of="$XIPIMAGE.tmp"
+$DD if="$XIPIMAGE" skip=$data_start iflag=skip_bytes |
+gzip -9 >> "$XIPIMAGE.tmp"
+
+# replace kernel binary
+mv -f "$XIPIMAGE.tmp" "$XIPIMAGE"
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index ad325a8c7e..52f437997c 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -87,6 +87,11 @@ head-y := head$(MMUEXT).o
obj-$(CONFIG_DEBUG_LL) += debug.o
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
+# This is executed very early using a temporary stack when no memory allocator
+# nor global data is available. Everything has to be allocated on the stack.
+CFLAGS_head-inflate-data.o := $(call cc-option,-Wframe-larger-than=10240)
+obj-$(CONFIG_XIP_DEFLATED_DATA) += head-inflate-data.o
+
obj-$(CONFIG_ARM_VIRT_EXT) += hyp-stub.o
AFLAGS_hyp-stub.o :=-Wa,-march=armv7-a
ifeq ($(CONFIG_ARM_PSCI),y)
diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S
index bf9c4e38ec..a25027b87a 100644
--- a/arch/arm/kernel/head-common.S
+++ b/arch/arm/kernel/head-common.S
@@ -87,7 +87,14 @@ __mmap_switched:
adr r4, __mmap_switched_data
mov fp, #0
-#ifdef CONFIG_XIP_KERNEL
+#if defined(CONFIG_XIP_DEFLATED_DATA)
+ ARM( ldr sp, [r4], #4 )
+ THUMB( ldr sp, [r4] )
+ THUMB( add r4, #4 )
+ bl __inflate_kernel_data @ decompress .data to RAM
+ teq r0, #0
+ bne __error
+#elif defined(CONFIG_XIP_KERNEL)
ARM( ldmia r4!, {r0, r1, r2, sp} )
THUMB( ldmia r4!, {r0, r1, r2, r3} )
THUMB( mov sp, r3 )
@@ -114,9 +121,11 @@ ENDPROC(__mmap_switched)
.type __mmap_switched_data, %object
__mmap_switched_data:
#ifdef CONFIG_XIP_KERNEL
+#ifndef CONFIG_XIP_DEFLATED_DATA
.long _sdata @ r0
.long __data_loc @ r1
.long _edata_loc @ r2
+#endif
.long __bss_stop @ sp (temporary stack in .bss)
#endif
diff --git a/arch/arm/kernel/head-inflate-data.c b/arch/arm/kernel/head-inflate-data.c
new file mode 100644
index 0000000000..4598b959c9
--- /dev/null
+++ b/arch/arm/kernel/head-inflate-data.c
@@ -0,0 +1,62 @@
+/*
+ * XIP kernel .data segment decompressor
+ *
+ * Created by: Nicolas Pitre, August 2017
+ * Copyright: (C) 2017 Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/zutil.h>
+
+/* for struct inflate_state */
+#include "../../../lib/zlib_inflate/inftrees.h"
+#include "../../../lib/zlib_inflate/inflate.h"
+#include "../../../lib/zlib_inflate/infutil.h"
+
+extern char __data_loc[];
+extern char _edata_loc[];
+extern char _sdata[];
+
+/*
+ * This code is called very early during the boot process to decompress
+ * the .data segment stored compressed in ROM. Therefore none of the global
+ * variables are valid yet, hence no kernel services such as memory
+ * allocation is available. Everything must be allocated on the stack and
+ * we must avoid any global data access. We use a temporary stack located
+ * in the .bss area. The linker script makes sure the .bss is big enough
+ * to hold our stack frame plus some room for called functions.
+ *
+ * We mimic the code in lib/decompress_inflate.c to use the smallest work
+ * area possible. And because everything is statically allocated on the
+ * stack then there is no need to clean up before returning.
+ */
+
+int __init __inflate_kernel_data(void)
+{
+ struct z_stream_s stream, *strm = &stream;
+ struct inflate_state state;
+ char *in = __data_loc;
+ int rc;
+
+ /* Check and skip gzip header (assume no filename) */
+ if (in[0] != 0x1f || in[1] != 0x8b || in[2] != 0x08 || in[3] & ~3)
+ return -1;
+ in += 10;
+
+ strm->workspace = &state;
+ strm->next_in = in;
+ strm->avail_in = _edata_loc - __data_loc; /* upper bound */
+ strm->next_out = _sdata;
+ strm->avail_out = _edata_loc - __data_loc;
+ zlib_inflateInit2(strm, -MAX_WBITS);
+ WS(strm)->inflate_state.wsize = 0;
+ WS(strm)->inflate_state.window = NULL;
+ rc = zlib_inflate(strm, Z_FINISH);
+ if (rc == Z_STREAM_END)
+ rc = strm->avail_out; /* should be 0 */
+ return rc;
+}
diff --git a/arch/arm/kernel/vmlinux-xip.lds.S b/arch/arm/kernel/vmlinux-xip.lds.S
index 39b1fb470a..7a84431008 100644
--- a/arch/arm/kernel/vmlinux-xip.lds.S
+++ b/arch/arm/kernel/vmlinux-xip.lds.S
@@ -306,3 +306,11 @@ ASSERT((__arch_info_end - __arch_info_begin), "no machine record defined")
*/
ASSERT(__hyp_idmap_text_end - (__hyp_idmap_text_start & PAGE_MASK) <= PAGE_SIZE,
"HYP init code too big or misaligned")
+
+#ifdef CONFIG_XIP_DEFLATED_DATA
+/*
+ * The .bss is used as a stack area for __inflate_kernel_data() whose stack
+ * frame is 9568 bytes. Make sure it has extra room left.
+ */
+ASSERT((_end - __bss_start) >= 12288, ".bss too small for CONFIG_XIP_DEFLATED_DATA")
+#endif
--
2.9.5
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH v2 0/5] make XIP kernel .data compressed in ROM
2017-08-30 2:55 [PATCH v2 0/5] make XIP kernel .data compressed in ROM Nicolas Pitre
` (4 preceding siblings ...)
2017-08-30 2:55 ` [PATCH v2 5/5] ARM: XIP kernel: store .data compressed in ROM Nicolas Pitre
@ 2017-08-30 21:58 ` Chris Brandt
2017-08-31 0:04 ` Nicolas Pitre
5 siblings, 1 reply; 18+ messages in thread
From: Chris Brandt @ 2017-08-30 21:58 UTC (permalink / raw)
To: linux-arm-kernel
On Tuesday, August 29, 2017 1, Nicolas Pitre wrote:
> This patch series provides the ability to store the XIP kernel .data
> segment compressed in ROM. It has to be copied to RAM anyway so
> storing it uncompressed is arguably a waste of ROM resources.
>
> While at it, the copying of .data (when not compressed) and the
> clearing of .bss is now performed using optimized string routines
> rather than doing it one word at a time.
Tested on my XIP system (Cortex-A9) and it seems to work OK.
> This work highlighted some defficiencies in linker scripts, especially
> the XIP one. So this also includes linker script cleanups and fixes.
Thank you for this!
If you're curious, here are some numbers from my system:
Original xipImage size: 4,190,068 bytes
xipImage size with CONFIG_XIP_DEFLATED_DATA=y: 3,985,227 bytes
A 204,841 byte reduction in image size (4.89%)
Chris
^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCH v2 0/5] make XIP kernel .data compressed in ROM
2017-08-30 21:58 ` [PATCH v2 0/5] make XIP kernel " Chris Brandt
@ 2017-08-31 0:04 ` Nicolas Pitre
2017-08-31 0:43 ` Chris Brandt
0 siblings, 1 reply; 18+ messages in thread
From: Nicolas Pitre @ 2017-08-31 0:04 UTC (permalink / raw)
To: linux-arm-kernel
On Wed, 30 Aug 2017, Chris Brandt wrote:
> On Tuesday, August 29, 2017 1, Nicolas Pitre wrote:
> > This patch series provides the ability to store the XIP kernel .data
> > segment compressed in ROM. It has to be copied to RAM anyway so
> > storing it uncompressed is arguably a waste of ROM resources.
> >
> > While at it, the copying of .data (when not compressed) and the
> > clearing of .bss is now performed using optimized string routines
> > rather than doing it one word at a time.
>
> Tested on my XIP system (Cortex-A9) and it seems to work OK.
Good, thanks for testing.
> > This work highlighted some defficiencies in linker scripts, especially
> > the XIP one. So this also includes linker script cleanups and fixes.
>
> Thank you for this!
More will come eventually.
Did this fix your cramfs issue with 4.13-rc7?
> If you're curious, here are some numbers from my system:
>
> Original xipImage size: 4,190,068 bytes
> xipImage size with CONFIG_XIP_DEFLATED_DATA=y: 3,985,227 bytes
>
> A 204,841 byte reduction in image size (4.89%)
In my case it is 1195492 vs 1111632, or a 7% reduction.
This could be improved further by locating most of init.data and
init.text into the compressed zone even if it is read-only. The
decompressed form would be located in RAM temporarily and discarded
after boot. Right now it is left uncompressed in ROM forever. But that's
for a later round.
Nicolas
^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCH v2 0/5] make XIP kernel .data compressed in ROM
2017-08-31 0:04 ` Nicolas Pitre
@ 2017-08-31 0:43 ` Chris Brandt
2017-08-31 1:02 ` Nicolas Pitre
0 siblings, 1 reply; 18+ messages in thread
From: Chris Brandt @ 2017-08-31 0:43 UTC (permalink / raw)
To: linux-arm-kernel
On Wednesday, August 30, 2017, Nicolas Pitre wrote:
> > > This work highlighted some defficiencies in linker scripts, especially
> > > the XIP one. So this also includes linker script cleanups and fixes.
> >
> > Thank you for this!
>
> More will come eventually.
>
> Did this fix your cramfs issue with 4.13-rc7?
Yes!
I just stacked on your latest set of cramfs XIP patches on top of these
and now magically it boots again.
Thank you!
Notice that busybox, libc and ld have physical addresses in Flash (ie, XIP)
$ cat /proc/self/maps
00008000-000a1000 r-xp 1b005000 00:0c 18192 /bin/busybox
000a9000-000aa000 rw-p 00099000 00:0c 18192 /bin/busybox
000aa000-000ac000 rw-p 00000000 00:00 0 [heap]
b6eed000-b6fc6000 r-xp 1b0bc000 00:0c 766540 /lib/libc-2.18-2013.10.so
b6fc6000-b6fce000 ---p 1b195000 00:0c 766540 /lib/libc-2.18-2013.10.so
b6fce000-b6fd0000 r--p 000d9000 00:0c 766540 /lib/libc-2.18-2013.10.so
b6fd0000-b6fd1000 rw-p 000db000 00:0c 766540 /lib/libc-2.18-2013.10.so
b6fd1000-b6fd4000 rw-p 00000000 00:00 0
b6fd4000-b6feb000 r-xp 1b0a4000 00:0c 670372 /lib/ld-2.18-2013.10.so
b6fee000-b6fef000 rw-p 00000000 00:00 0
b6ff0000-b6ff2000 rw-p 00000000 00:00 0
b6ff2000-b6ff3000 r--p 00016000 00:0c 670372 /lib/ld-2.18-2013.10.so
b6ff3000-b6ff4000 rw-p 00017000 00:0c 670372 /lib/ld-2.18-2013.10.so
bee27000-bee48000 rw-p 00000000 00:00 0 [stack]
beea4000-beea5000 r-xp 00000000 00:00 0 [sigpage]
ffff0000-ffff1000 r-xp 00000000 00:00 0 [vectors]
Chris
^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCH v2 0/5] make XIP kernel .data compressed in ROM
2017-08-31 0:43 ` Chris Brandt
@ 2017-08-31 1:02 ` Nicolas Pitre
0 siblings, 0 replies; 18+ messages in thread
From: Nicolas Pitre @ 2017-08-31 1:02 UTC (permalink / raw)
To: linux-arm-kernel
On Thu, 31 Aug 2017, Chris Brandt wrote:
> On Wednesday, August 30, 2017, Nicolas Pitre wrote:
> > > > This work highlighted some defficiencies in linker scripts, especially
> > > > the XIP one. So this also includes linker script cleanups and fixes.
> > >
> > > Thank you for this!
> >
> > More will come eventually.
> >
> > Did this fix your cramfs issue with 4.13-rc7?
>
> Yes!
> I just stacked on your latest set of cramfs XIP patches on top of these
> and now magically it boots again.
> Thank you!
Excellent! If you could post this conclusion and your Tested-by in the
cramfs thread for the benefit of that audience there that'd be helpful.
Nicolas
^ permalink raw reply [flat|nested] 18+ messages in thread