All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/3] make XIP kernel .data compressed in ROM
@ 2017-08-25 16:25 Nicolas Pitre
  2017-08-25 16:25 ` [PATCH 1/3] ARM: head-common.S: speed up startup code Nicolas Pitre
                   ` (3 more replies)
  0 siblings, 4 replies; 15+ messages in thread
From: Nicolas Pitre @ 2017-08-25 16:25 UTC (permalink / raw)
  To: linux-arm-kernel

This patch series provides the ability to store the kernel .data
segment compressed in ROM. It has to be copied to RAM anyway so
storing it uncompressed is arguably a waste of ROM resources.

While at it, the copying of .data (when not compressed) and the
clearing of .bss is performed using optimized string routines rather
than doing it one word at a time. And throw in small linker script
cleanups for good measure.

This is also available here:

http://git.linaro.org/people/nicolas.pitre/linux xip_zdata

diffstat:

 arch/arm/Kconfig                    | 11 ++++
 arch/arm/boot/Makefile              | 13 ++++-
 arch/arm/boot/deflate_xip_data.sh   | 62 ++++++++++++++++++++++
 arch/arm/kernel/Makefile            |  5 ++
 arch/arm/kernel/head-common.S       | 85 +++++++++++++++++++------------
 arch/arm/kernel/head-inflate-data.c | 63 +++++++++++++++++++++++
 arch/arm/kernel/vmlinux-xip.lds.S   | 22 +++-----
 arch/arm/kernel/vmlinux.lds.S       |  4 +-
 8 files changed, 215 insertions(+), 50 deletions(-)

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH 1/3] ARM: head-common.S: speed up startup code
  2017-08-25 16:25 [PATCH 0/3] make XIP kernel .data compressed in ROM Nicolas Pitre
@ 2017-08-25 16:25 ` Nicolas Pitre
  2017-08-26 10:49   ` Ard Biesheuvel
  2017-08-25 16:25 ` [PATCH 2/3] ARM: vmlinux*.lds.S: some decruftification Nicolas Pitre
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 15+ messages in thread
From: Nicolas Pitre @ 2017-08-25 16:25 UTC (permalink / raw)
  To: linux-arm-kernel

Let's use optimized routines such as memcpy to copy .data and memzero
to clear .bss in the startup code instead of doing it one word at a
time. Those routines don't use any global data so they're safe to use
even if .data and .bss segments are not initialized.

In the .data copy case a temporary stack is installed in the .bss area
as the actual kernel stack is located within the copied data area.

Finally, make the .data copy and related pointers surrounded by
CONFIG_XIP_KERNEL to make it obvious what it is all about. This will
allow for some cleanup in the non-XIP linker script as well.

Signed-off-by: Nicolas Pitre <nico@linaro.org>
---
 arch/arm/kernel/head-common.S | 76 +++++++++++++++++++++++++------------------
 1 file changed, 44 insertions(+), 32 deletions(-)

diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S
index 8733012d23..63a21fe03f 100644
--- a/arch/arm/kernel/head-common.S
+++ b/arch/arm/kernel/head-common.S
@@ -79,47 +79,59 @@ ENDPROC(__vet_atags)
  */
 	__INIT
 __mmap_switched:
-	adr	r3, __mmap_switched_data
-
-	ldmia	r3!, {r4, r5, r6, r7}
-	cmp	r4, r5				@ Copy data segment if needed
-1:	cmpne	r5, r6
-	ldrne	fp, [r4], #4
-	strne	fp, [r5], #4
-	bne	1b
-
-	mov	fp, #0				@ Clear BSS (and zero fp)
-1:	cmp	r6, r7
-	strcc	fp, [r6],#4
-	bcc	1b
-
- ARM(	ldmia	r3, {r4, r5, r6, r7, sp})
- THUMB(	ldmia	r3, {r4, r5, r6, r7}	)
- THUMB(	ldr	sp, [r3, #16]		)
-	str	r9, [r4]			@ Save processor ID
-	str	r1, [r5]			@ Save machine type
-	str	r2, [r6]			@ Save atags pointer
-	cmp	r7, #0
-	strne	r0, [r7]			@ Save control register values
+
+	mov	r7, r1
+	mov	r8, r2
+	mov	r10, r0
+
+	adr	r4, __mmap_switched_data
+	mov	fp, #0
+
+#ifdef CONFIG_XIP_KERNEL
+   ARM(	ldmia	r4!, {r0, r1, r2, sp} )
+ THUMB(	ldmia	r4!, {r0, r1, r2, r3} )
+ THUMB(	mov	sp, r3 )
+	sub	r2, r2, r1
+	bl	memcpy				@ copy .data to RAM
+#endif
+
+   ARM(	ldmia	r4!, {r0, r1, sp} )
+ THUMB(	ldmia	r4!, {r0, r1, r3} )
+ THUMB(	mov	sp, r3 )
+	sub	r1, r1, r0
+	bl	__memzero			@ clear .bss
+
+	ldmia	r4, {r0, r1, r2, r3}
+	str	r9, [r0]			@ Save processor ID
+	str	r7, [r1]			@ Save machine type
+	str	r8, [r2]			@ Save atags pointer
+	cmp	r3, #0
+	strne	r10, [r3]			@ Save control register values
 	b	start_kernel
 ENDPROC(__mmap_switched)
 
 	.align	2
 	.type	__mmap_switched_data, %object
 __mmap_switched_data:
-	.long	__data_loc			@ r4
-	.long	_sdata				@ r5
-	.long	__bss_start			@ r6
-	.long	_end				@ r7
-	.long	processor_id			@ r4
-	.long	__machine_arch_type		@ r5
-	.long	__atags_pointer			@ r6
+#ifdef CONFIG_XIP_KERNEL
+	.long	_sdata				@ r0
+	.long	__data_loc			@ r1
+	.long	_edata_loc			@ r2
+	.long	_end				@ sp (temporary stack in .bss)
+#endif
+
+	.long	__bss_start			@ r0
+	.long	_end				@ r1
+	.long	init_thread_union + THREAD_START_SP @ sp
+
+	.long	processor_id			@ r0
+	.long	__machine_arch_type		@ r1
+	.long	__atags_pointer			@ r2
 #ifdef CONFIG_CPU_CP15
-	.long	cr_alignment			@ r7
+	.long	cr_alignment			@ r3
 #else
-	.long	0				@ r7
+	.long	0				@ r3
 #endif
-	.long	init_thread_union + THREAD_START_SP @ sp
 	.size	__mmap_switched_data, . - __mmap_switched_data
 
 /*
-- 
2.9.5

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 2/3] ARM: vmlinux*.lds.S: some decruftification
  2017-08-25 16:25 [PATCH 0/3] make XIP kernel .data compressed in ROM Nicolas Pitre
  2017-08-25 16:25 ` [PATCH 1/3] ARM: head-common.S: speed up startup code Nicolas Pitre
@ 2017-08-25 16:25 ` Nicolas Pitre
  2017-08-26 10:38   ` Ard Biesheuvel
  2017-08-25 16:26 ` [PATCH 3/3] ARM: XIP kernel: store .data compressed in ROM Nicolas Pitre
  2017-08-28 17:40 ` [PATCH 0/3] make XIP kernel " Chris Brandt
  3 siblings, 1 reply; 15+ messages in thread
From: Nicolas Pitre @ 2017-08-25 16:25 UTC (permalink / raw)
  To: linux-arm-kernel

Remove stuff from vmlinux.lds.S that is relevant only to the XIP build,
and stuff from vmlinux-xip.lds.S related to self-modifying code that
makes no sense in the XIP case.

Signed-off-by: Nicolas Pitre <nico@linaro.org>
---
 arch/arm/kernel/vmlinux-xip.lds.S | 14 --------------
 arch/arm/kernel/vmlinux.lds.S     |  4 +---
 2 files changed, 1 insertion(+), 17 deletions(-)

diff --git a/arch/arm/kernel/vmlinux-xip.lds.S b/arch/arm/kernel/vmlinux-xip.lds.S
index 8265b11621..d6c08a4c30 100644
--- a/arch/arm/kernel/vmlinux-xip.lds.S
+++ b/arch/arm/kernel/vmlinux-xip.lds.S
@@ -77,9 +77,7 @@ SECTIONS
 		*(.text.fixup)
 		*(__ex_table)
 #endif
-#ifndef CONFIG_SMP_ON_UP
 		*(.alt.smp.init)
-#endif
 		*(.discard)
 		*(.discard.*)
 	}
@@ -181,18 +179,6 @@ SECTIONS
 		*(.taglist.init)
 		__tagtable_end = .;
 	}
-#ifdef CONFIG_SMP_ON_UP
-	.init.smpalt : {
-		__smpalt_begin = .;
-		*(.alt.smp.init)
-		__smpalt_end = .;
-	}
-#endif
-	.init.pv_table : {
-		__pv_table_begin = .;
-		*(.pv_table)
-		__pv_table_end = .;
-	}
 	.init.data : {
 		INIT_SETUP(16)
 		INIT_CALLS
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index c83a7ba737..4f86b4b7bd 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -236,9 +236,8 @@ SECTIONS
 	. = ALIGN(THREAD_SIZE);
 #endif
 	__init_end = .;
-	__data_loc = .;
 
-	.data : AT(__data_loc) {
+	.data : {
 		_data = .;		/* address in memory */
 		_sdata = .;
 
@@ -260,7 +259,6 @@ SECTIONS
 
 		_edata = .;
 	}
-	_edata_loc = __data_loc + SIZEOF(.data);
 
 	BUG_TABLE
 
-- 
2.9.5

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 3/3] ARM: XIP kernel: store .data compressed in ROM
  2017-08-25 16:25 [PATCH 0/3] make XIP kernel .data compressed in ROM Nicolas Pitre
  2017-08-25 16:25 ` [PATCH 1/3] ARM: head-common.S: speed up startup code Nicolas Pitre
  2017-08-25 16:25 ` [PATCH 2/3] ARM: vmlinux*.lds.S: some decruftification Nicolas Pitre
@ 2017-08-25 16:26 ` Nicolas Pitre
  2017-08-26 10:58   ` Ard Biesheuvel
  2017-08-28 17:40 ` [PATCH 0/3] make XIP kernel " Chris Brandt
  3 siblings, 1 reply; 15+ messages in thread
From: Nicolas Pitre @ 2017-08-25 16:26 UTC (permalink / raw)
  To: linux-arm-kernel

The .data segment stored in ROM is only copied to RAM once at boot time
and never referenced afterwards. This is arguably a suboptimal usage of
ROM resources.

This patch allows for compressing the .data segment before storing it
into ROM and decompressing it to RAM rather than simply copying it,
saving on precious ROM space.

Because global data is not available yet (obviously) we must allocate
decompressor workspace memory on the stack. The .bss area is used as a
stack area for that purpose before it is cleared. The required stack
frame is 9568 bytes for __inflate_kernel_data() alone, so make sure
the .bss is large enough to cope with that plus extra room for called
functions or fail the build.

Signed-off-by: Nicolas Pitre <nico@linaro.org>
---
 arch/arm/Kconfig                    | 11 +++++++
 arch/arm/boot/Makefile              | 13 +++++++-
 arch/arm/boot/deflate_xip_data.sh   | 62 ++++++++++++++++++++++++++++++++++++
 arch/arm/kernel/Makefile            |  5 +++
 arch/arm/kernel/head-common.S       | 11 ++++++-
 arch/arm/kernel/head-inflate-data.c | 63 +++++++++++++++++++++++++++++++++++++
 arch/arm/kernel/vmlinux-xip.lds.S   |  8 +++++
 7 files changed, 171 insertions(+), 2 deletions(-)
 create mode 100755 arch/arm/boot/deflate_xip_data.sh
 create mode 100644 arch/arm/kernel/head-inflate-data.c

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 61a0cb1506..bf79c461bd 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -2005,6 +2005,17 @@ config XIP_PHYS_ADDR
 	  be linked for and stored to.  This address is dependent on your
 	  own flash usage.
 
+config XIP_DEFLATED_DATA
+	bool "Store kernel .data section compressed in ROM"
+	depends on XIP_KERNEL
+	select ZLIB_INFLATE
+	help
+	  Before the kernel is actually executed, its .data section has to be
+	  copied to RAM from ROM. This option allows for storing that data
+	  in compressed form and decompressed to RAM rather than merely being
+	  copied, saving some precious ROM space. A possible drawback is a
+	  slightly longer boot delay.
+
 config KEXEC
 	bool "Kexec system call (EXPERIMENTAL)"
 	depends on (!SMP || PM_SLEEP_SMP)
diff --git a/arch/arm/boot/Makefile b/arch/arm/boot/Makefile
index 50f8d1be7f..8b5d33a524 100644
--- a/arch/arm/boot/Makefile
+++ b/arch/arm/boot/Makefile
@@ -31,8 +31,19 @@ targets := Image zImage xipImage bootpImage uImage
 
 ifeq ($(CONFIG_XIP_KERNEL),y)
 
+cmd_deflate_xip_data = \
+	$(CONFIG_SHELL) $(srctree)/$(src)/deflate_xip_data.sh $< $@
+
+ifeq ($(CONFIG_XIP_DEFLATED_DATA),y)
+quiet_cmd_mkxip = XIPZ    $@
+cmd_mkxip = $(cmd_objcopy) && $(cmd_deflate_xip_data)
+else
+quiet_cmd_mkxip = $(quiet_cmd_objcopy)
+cmd_mkxip = $(cmd_objcopy)
+endif
+
 $(obj)/xipImage: vmlinux FORCE
-	$(call if_changed,objcopy)
+	$(call if_changed,mkxip)
 	@$(kecho) '  Physical Address of xipImage: $(CONFIG_XIP_PHYS_ADDR)'
 
 $(obj)/Image $(obj)/zImage: FORCE
diff --git a/arch/arm/boot/deflate_xip_data.sh b/arch/arm/boot/deflate_xip_data.sh
new file mode 100755
index 0000000000..18c9bc398a
--- /dev/null
+++ b/arch/arm/boot/deflate_xip_data.sh
@@ -0,0 +1,62 @@
+#!/bin/sh
+
+# XIP kernel .data segment compressor
+#
+# Created by:	Nicolas Pitre, August 2017
+# Copyright:	(C) 2017  Linaro Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+# This script locates the start of the .data section in xipImage and
+# substitutes it with a compressed version. The needed offsets are obtained
+# from symbol addresses in vmlinux. It is expected that .data extends to
+# the end of xipImage.
+
+set -e
+
+VMLINUX="$1"
+XIPIMAGE="$2"
+
+DD="dd status=none"
+
+# Use "make V=1" to debug this script.
+case "$KBUILD_VERBOSE" in
+*1*)
+	set -x
+	;;
+esac
+
+sym_val() {
+	# extract hex value for symbol in $1
+	local val=$($NM "$VMLINUX" | sed -n "/ $1$/{s/ .*$//p;q}")
+	[ "$val" ] || { echo "can't find $1 in $VMLINUX" 1>&2; exit 1; }
+	# convert from hex to decimal
+	echo $((0x$val))
+}
+
+base_offset=$(sym_val _xiprom)
+data_start=$(sym_val __data_loc)
+data_end=$(sym_val _edata_loc)
+
+# convert to file based offsets
+data_start=$(($data_start - $base_offset))
+data_end=$(($data_end - $base_offset))
+
+# make sure data occupies the last part of the file
+file_end=$(stat -c "%s" "$XIPIMAGE")
+if [ "$file_end" != "$data_end" ]
+then echo "data segment doesn't match end of xipImage" 2>&1; exit 1;
+fi
+
+# be ready to clean up
+trap 'rm -f "$XIPIMAGE.tmp"' 0 1 2 3
+
+# substitute the data section by a compressed version
+$DD if="$XIPIMAGE" count=$data_start iflag=count_bytes of="$XIPIMAGE.tmp"
+$DD if="$XIPIMAGE"  skip=$data_start iflag=skip_bytes |
+gzip -9 >> "$XIPIMAGE.tmp"
+
+# replace kernel binary
+mv -f "$XIPIMAGE.tmp" "$XIPIMAGE"
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index ad325a8c7e..52f437997c 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -87,6 +87,11 @@ head-y			:= head$(MMUEXT).o
 obj-$(CONFIG_DEBUG_LL)	+= debug.o
 obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
 
+# This is executed very early using a temporary stack when no memory allocator
+# nor global data is available. Everything has to be allocated on the stack.
+CFLAGS_head-inflate-data.o := $(call cc-option,-Wframe-larger-than=10240)
+obj-$(CONFIG_XIP_DEFLATED_DATA) += head-inflate-data.o
+
 obj-$(CONFIG_ARM_VIRT_EXT)	+= hyp-stub.o
 AFLAGS_hyp-stub.o		:=-Wa,-march=armv7-a
 ifeq ($(CONFIG_ARM_PSCI),y)
diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S
index 63a21fe03f..d57628f545 100644
--- a/arch/arm/kernel/head-common.S
+++ b/arch/arm/kernel/head-common.S
@@ -87,7 +87,14 @@ __mmap_switched:
 	adr	r4, __mmap_switched_data
 	mov	fp, #0
 
-#ifdef CONFIG_XIP_KERNEL
+#if defined(CONFIG_XIP_DEFLATED_DATA)
+   ARM(	ldr	sp, [r4], #4 )
+ THUMB(	ldr	sp, [r4] )
+ THUMB(	add	r4, #4 )
+	bl	__inflate_kernel_data		@ decompress .data to RAM
+	teq	r0, #0
+	bne	__error
+#elif defined(CONFIG_XIP_KERNEL)
    ARM(	ldmia	r4!, {r0, r1, r2, sp} )
  THUMB(	ldmia	r4!, {r0, r1, r2, r3} )
  THUMB(	mov	sp, r3 )
@@ -114,9 +121,11 @@ ENDPROC(__mmap_switched)
 	.type	__mmap_switched_data, %object
 __mmap_switched_data:
 #ifdef CONFIG_XIP_KERNEL
+#ifndef CONFIG_XIP_DEFLATED_DATA
 	.long	_sdata				@ r0
 	.long	__data_loc			@ r1
 	.long	_edata_loc			@ r2
+#endif
 	.long	_end				@ sp (temporary stack in .bss)
 #endif
 
diff --git a/arch/arm/kernel/head-inflate-data.c b/arch/arm/kernel/head-inflate-data.c
new file mode 100644
index 0000000000..a12d241f0a
--- /dev/null
+++ b/arch/arm/kernel/head-inflate-data.c
@@ -0,0 +1,63 @@
+/*
+ * XIP kernel .data segment decompressor
+ *
+ * Created by:	Nicolas Pitre, August 2017
+ * Copyright:	(C) 2017  Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/zutil.h>
+
+/* for struct inflate_state */
+#include "../../../lib/zlib_inflate/inftrees.h"
+#include "../../../lib/zlib_inflate/inflate.h"
+#include "../../../lib/zlib_inflate/infutil.h"
+
+extern char __data_loc[];
+extern char _edata_loc[];
+extern char _sdata[];
+extern char _edata[];
+
+/*
+ * This code is called very early during the boot process to decompress
+ * the .data segment stored compressed in ROM. Therefore none of the global
+ * variables are valid yet, hence no kernel services such as memory
+ * allocation is available. Everything must be allocated on the stack and
+ * we must avoid any global data access. We use a temporary stack located
+ * in the .bss area. The linker script makes sure the .bss is big enough
+ * to hold our stack frame plus some room for called functions.
+ *
+ * We mimic the code in lib/decompress_inflate.c to use the smallest work
+ * area possible. And because everything is statically allocated on the
+ * stack then there is no need to clean up before returning.
+ */
+
+int __init __inflate_kernel_data(void)
+{
+	struct z_stream_s stream, *strm = &stream;
+	struct inflate_state state;
+	char *in = __data_loc;
+	int rc;
+
+	/* Check and skip gzip header (assume no filename) */
+	if (in[0] != 0x1f || in[1] != 0x8b || in[2] != 0x08 || in[3] & ~3)
+		return -1;
+	in += 10;
+
+	strm->workspace = &state;
+	strm->next_in = in;
+	strm->avail_in = _edata_loc - __data_loc;  /* upper bound */
+	strm->next_out = _sdata;
+	strm->avail_out = _edata - _sdata;
+	zlib_inflateInit2(strm, -MAX_WBITS);
+	WS(strm)->inflate_state.wsize = 0;
+	WS(strm)->inflate_state.window = NULL;
+	rc = zlib_inflate(strm, Z_FINISH);
+	if (rc != Z_STREAM_END)
+		return -1;
+	return strm->avail_out;  /* should be 0 */
+}
diff --git a/arch/arm/kernel/vmlinux-xip.lds.S b/arch/arm/kernel/vmlinux-xip.lds.S
index d6c08a4c30..3489d54da4 100644
--- a/arch/arm/kernel/vmlinux-xip.lds.S
+++ b/arch/arm/kernel/vmlinux-xip.lds.S
@@ -308,3 +308,11 @@ ASSERT((__arch_info_end - __arch_info_begin), "no machine record defined")
  */
 ASSERT(__hyp_idmap_text_end - (__hyp_idmap_text_start & PAGE_MASK) <= PAGE_SIZE,
 	"HYP init code too big or misaligned")
+
+#ifdef CONFIG_XIP_DEFLATED_DATA
+/*
+ * The .bss is used as a stack area for __inflate_kernel_data() whose stack
+ * frame is 9568 bytes. Make sure it has extra room left.
+ */
+ASSERT((_end - __bss_start) >= 12288, ".bss too small for CONFIG_XIP_DEFLATED_DATA")
+#endif
-- 
2.9.5

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 2/3] ARM: vmlinux*.lds.S: some decruftification
  2017-08-25 16:25 ` [PATCH 2/3] ARM: vmlinux*.lds.S: some decruftification Nicolas Pitre
@ 2017-08-26 10:38   ` Ard Biesheuvel
  0 siblings, 0 replies; 15+ messages in thread
From: Ard Biesheuvel @ 2017-08-26 10:38 UTC (permalink / raw)
  To: linux-arm-kernel

On 25 August 2017 at 17:25, Nicolas Pitre <nicolas.pitre@linaro.org> wrote:
> Remove stuff from vmlinux.lds.S that is relevant only to the XIP build,
> and stuff from vmlinux-xip.lds.S related to self-modifying code that
> makes no sense in the XIP case.
>
> Signed-off-by: Nicolas Pitre <nico@linaro.org>

Reviewed by: Ard Biesheuvel <ard.biesheuvel@linaro.org>

> ---
>  arch/arm/kernel/vmlinux-xip.lds.S | 14 --------------
>  arch/arm/kernel/vmlinux.lds.S     |  4 +---
>  2 files changed, 1 insertion(+), 17 deletions(-)
>
> diff --git a/arch/arm/kernel/vmlinux-xip.lds.S b/arch/arm/kernel/vmlinux-xip.lds.S
> index 8265b11621..d6c08a4c30 100644
> --- a/arch/arm/kernel/vmlinux-xip.lds.S
> +++ b/arch/arm/kernel/vmlinux-xip.lds.S
> @@ -77,9 +77,7 @@ SECTIONS
>                 *(.text.fixup)
>                 *(__ex_table)
>  #endif
> -#ifndef CONFIG_SMP_ON_UP
>                 *(.alt.smp.init)
> -#endif
>                 *(.discard)
>                 *(.discard.*)
>         }
> @@ -181,18 +179,6 @@ SECTIONS
>                 *(.taglist.init)
>                 __tagtable_end = .;
>         }
> -#ifdef CONFIG_SMP_ON_UP
> -       .init.smpalt : {
> -               __smpalt_begin = .;
> -               *(.alt.smp.init)
> -               __smpalt_end = .;
> -       }
> -#endif
> -       .init.pv_table : {
> -               __pv_table_begin = .;
> -               *(.pv_table)
> -               __pv_table_end = .;
> -       }
>         .init.data : {
>                 INIT_SETUP(16)
>                 INIT_CALLS
> diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
> index c83a7ba737..4f86b4b7bd 100644
> --- a/arch/arm/kernel/vmlinux.lds.S
> +++ b/arch/arm/kernel/vmlinux.lds.S
> @@ -236,9 +236,8 @@ SECTIONS
>         . = ALIGN(THREAD_SIZE);
>  #endif
>         __init_end = .;
> -       __data_loc = .;
>
> -       .data : AT(__data_loc) {
> +       .data : {
>                 _data = .;              /* address in memory */
>                 _sdata = .;
>
> @@ -260,7 +259,6 @@ SECTIONS
>
>                 _edata = .;
>         }
> -       _edata_loc = __data_loc + SIZEOF(.data);
>
>         BUG_TABLE
>
> --
> 2.9.5
>
>
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel at lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH 1/3] ARM: head-common.S: speed up startup code
  2017-08-25 16:25 ` [PATCH 1/3] ARM: head-common.S: speed up startup code Nicolas Pitre
@ 2017-08-26 10:49   ` Ard Biesheuvel
  0 siblings, 0 replies; 15+ messages in thread
From: Ard Biesheuvel @ 2017-08-26 10:49 UTC (permalink / raw)
  To: linux-arm-kernel

On 25 August 2017 at 17:25, Nicolas Pitre <nicolas.pitre@linaro.org> wrote:
> Let's use optimized routines such as memcpy to copy .data and memzero
> to clear .bss in the startup code instead of doing it one word at a
> time. Those routines don't use any global data so they're safe to use
> even if .data and .bss segments are not initialized.
>
> In the .data copy case a temporary stack is installed in the .bss area
> as the actual kernel stack is located within the copied data area.
>
> Finally, make the .data copy and related pointers surrounded by
> CONFIG_XIP_KERNEL to make it obvious what it is all about. This will
> allow for some cleanup in the non-XIP linker script as well.
>
> Signed-off-by: Nicolas Pitre <nico@linaro.org>

Reviewed by: Ard Biesheuvel <ard.biesheuvel@linaro.org>

> ---
>  arch/arm/kernel/head-common.S | 76 +++++++++++++++++++++++++------------------
>  1 file changed, 44 insertions(+), 32 deletions(-)
>
> diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S
> index 8733012d23..63a21fe03f 100644
> --- a/arch/arm/kernel/head-common.S
> +++ b/arch/arm/kernel/head-common.S
> @@ -79,47 +79,59 @@ ENDPROC(__vet_atags)
>   */
>         __INIT
>  __mmap_switched:
> -       adr     r3, __mmap_switched_data
> -
> -       ldmia   r3!, {r4, r5, r6, r7}
> -       cmp     r4, r5                          @ Copy data segment if needed
> -1:     cmpne   r5, r6
> -       ldrne   fp, [r4], #4
> -       strne   fp, [r5], #4
> -       bne     1b
> -
> -       mov     fp, #0                          @ Clear BSS (and zero fp)
> -1:     cmp     r6, r7
> -       strcc   fp, [r6],#4
> -       bcc     1b
> -
> - ARM(  ldmia   r3, {r4, r5, r6, r7, sp})
> - THUMB(        ldmia   r3, {r4, r5, r6, r7}    )
> - THUMB(        ldr     sp, [r3, #16]           )
> -       str     r9, [r4]                        @ Save processor ID
> -       str     r1, [r5]                        @ Save machine type
> -       str     r2, [r6]                        @ Save atags pointer
> -       cmp     r7, #0
> -       strne   r0, [r7]                        @ Save control register values
> +
> +       mov     r7, r1
> +       mov     r8, r2
> +       mov     r10, r0
> +
> +       adr     r4, __mmap_switched_data
> +       mov     fp, #0
> +
> +#ifdef CONFIG_XIP_KERNEL
> +   ARM(        ldmia   r4!, {r0, r1, r2, sp} )
> + THUMB(        ldmia   r4!, {r0, r1, r2, r3} )
> + THUMB(        mov     sp, r3 )
> +       sub     r2, r2, r1
> +       bl      memcpy                          @ copy .data to RAM
> +#endif
> +
> +   ARM(        ldmia   r4!, {r0, r1, sp} )
> + THUMB(        ldmia   r4!, {r0, r1, r3} )
> + THUMB(        mov     sp, r3 )
> +       sub     r1, r1, r0
> +       bl      __memzero                       @ clear .bss
> +
> +       ldmia   r4, {r0, r1, r2, r3}
> +       str     r9, [r0]                        @ Save processor ID
> +       str     r7, [r1]                        @ Save machine type
> +       str     r8, [r2]                        @ Save atags pointer
> +       cmp     r3, #0
> +       strne   r10, [r3]                       @ Save control register values
>         b       start_kernel
>  ENDPROC(__mmap_switched)
>
>         .align  2
>         .type   __mmap_switched_data, %object
>  __mmap_switched_data:
> -       .long   __data_loc                      @ r4
> -       .long   _sdata                          @ r5
> -       .long   __bss_start                     @ r6
> -       .long   _end                            @ r7
> -       .long   processor_id                    @ r4
> -       .long   __machine_arch_type             @ r5
> -       .long   __atags_pointer                 @ r6
> +#ifdef CONFIG_XIP_KERNEL
> +       .long   _sdata                          @ r0
> +       .long   __data_loc                      @ r1
> +       .long   _edata_loc                      @ r2
> +       .long   _end                            @ sp (temporary stack in .bss)
> +#endif
> +
> +       .long   __bss_start                     @ r0
> +       .long   _end                            @ r1
> +       .long   init_thread_union + THREAD_START_SP @ sp
> +
> +       .long   processor_id                    @ r0
> +       .long   __machine_arch_type             @ r1
> +       .long   __atags_pointer                 @ r2
>  #ifdef CONFIG_CPU_CP15
> -       .long   cr_alignment                    @ r7
> +       .long   cr_alignment                    @ r3
>  #else
> -       .long   0                               @ r7
> +       .long   0                               @ r3
>  #endif
> -       .long   init_thread_union + THREAD_START_SP @ sp
>         .size   __mmap_switched_data, . - __mmap_switched_data
>
>  /*
> --
> 2.9.5
>
>
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel at lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH 3/3] ARM: XIP kernel: store .data compressed in ROM
  2017-08-25 16:26 ` [PATCH 3/3] ARM: XIP kernel: store .data compressed in ROM Nicolas Pitre
@ 2017-08-26 10:58   ` Ard Biesheuvel
  2017-08-26 15:41     ` Nicolas Pitre
  0 siblings, 1 reply; 15+ messages in thread
From: Ard Biesheuvel @ 2017-08-26 10:58 UTC (permalink / raw)
  To: linux-arm-kernel

On 25 August 2017 at 17:26, Nicolas Pitre <nicolas.pitre@linaro.org> wrote:
> The .data segment stored in ROM is only copied to RAM once at boot time
> and never referenced afterwards. This is arguably a suboptimal usage of
> ROM resources.
>
> This patch allows for compressing the .data segment before storing it
> into ROM and decompressing it to RAM rather than simply copying it,
> saving on precious ROM space.
>
> Because global data is not available yet (obviously) we must allocate
> decompressor workspace memory on the stack. The .bss area is used as a
> stack area for that purpose before it is cleared. The required stack
> frame is 9568 bytes for __inflate_kernel_data() alone, so make sure
> the .bss is large enough to cope with that plus extra room for called
> functions or fail the build.
>
> Signed-off-by: Nicolas Pitre <nico@linaro.org>
> ---
>  arch/arm/Kconfig                    | 11 +++++++
>  arch/arm/boot/Makefile              | 13 +++++++-
>  arch/arm/boot/deflate_xip_data.sh   | 62 ++++++++++++++++++++++++++++++++++++
>  arch/arm/kernel/Makefile            |  5 +++
>  arch/arm/kernel/head-common.S       | 11 ++++++-
>  arch/arm/kernel/head-inflate-data.c | 63 +++++++++++++++++++++++++++++++++++++
>  arch/arm/kernel/vmlinux-xip.lds.S   |  8 +++++
>  7 files changed, 171 insertions(+), 2 deletions(-)
>  create mode 100755 arch/arm/boot/deflate_xip_data.sh
>  create mode 100644 arch/arm/kernel/head-inflate-data.c
>
> diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
> index 61a0cb1506..bf79c461bd 100644
> --- a/arch/arm/Kconfig
> +++ b/arch/arm/Kconfig
> @@ -2005,6 +2005,17 @@ config XIP_PHYS_ADDR
>           be linked for and stored to.  This address is dependent on your
>           own flash usage.
>
> +config XIP_DEFLATED_DATA
> +       bool "Store kernel .data section compressed in ROM"
> +       depends on XIP_KERNEL
> +       select ZLIB_INFLATE
> +       help
> +         Before the kernel is actually executed, its .data section has to be
> +         copied to RAM from ROM. This option allows for storing that data
> +         in compressed form and decompressed to RAM rather than merely being
> +         copied, saving some precious ROM space. A possible drawback is a
> +         slightly longer boot delay.
> +
>  config KEXEC
>         bool "Kexec system call (EXPERIMENTAL)"
>         depends on (!SMP || PM_SLEEP_SMP)
> diff --git a/arch/arm/boot/Makefile b/arch/arm/boot/Makefile
> index 50f8d1be7f..8b5d33a524 100644
> --- a/arch/arm/boot/Makefile
> +++ b/arch/arm/boot/Makefile
> @@ -31,8 +31,19 @@ targets := Image zImage xipImage bootpImage uImage
>
>  ifeq ($(CONFIG_XIP_KERNEL),y)
>
> +cmd_deflate_xip_data = \
> +       $(CONFIG_SHELL) $(srctree)/$(src)/deflate_xip_data.sh $< $@
> +
> +ifeq ($(CONFIG_XIP_DEFLATED_DATA),y)
> +quiet_cmd_mkxip = XIPZ    $@
> +cmd_mkxip = $(cmd_objcopy) && $(cmd_deflate_xip_data)
> +else
> +quiet_cmd_mkxip = $(quiet_cmd_objcopy)
> +cmd_mkxip = $(cmd_objcopy)
> +endif
> +
>  $(obj)/xipImage: vmlinux FORCE
> -       $(call if_changed,objcopy)
> +       $(call if_changed,mkxip)
>         @$(kecho) '  Physical Address of xipImage: $(CONFIG_XIP_PHYS_ADDR)'
>
>  $(obj)/Image $(obj)/zImage: FORCE
> diff --git a/arch/arm/boot/deflate_xip_data.sh b/arch/arm/boot/deflate_xip_data.sh
> new file mode 100755
> index 0000000000..18c9bc398a
> --- /dev/null
> +++ b/arch/arm/boot/deflate_xip_data.sh
> @@ -0,0 +1,62 @@
> +#!/bin/sh
> +
> +# XIP kernel .data segment compressor
> +#
> +# Created by:  Nicolas Pitre, August 2017
> +# Copyright:   (C) 2017  Linaro Limited
> +#
> +# This program is free software; you can redistribute it and/or modify
> +# it under the terms of the GNU General Public License version 2 as
> +# published by the Free Software Foundation.
> +
> +# This script locates the start of the .data section in xipImage and
> +# substitutes it with a compressed version. The needed offsets are obtained
> +# from symbol addresses in vmlinux. It is expected that .data extends to
> +# the end of xipImage.
> +
> +set -e
> +
> +VMLINUX="$1"
> +XIPIMAGE="$2"
> +
> +DD="dd status=none"
> +
> +# Use "make V=1" to debug this script.
> +case "$KBUILD_VERBOSE" in
> +*1*)
> +       set -x
> +       ;;
> +esac
> +
> +sym_val() {
> +       # extract hex value for symbol in $1
> +       local val=$($NM "$VMLINUX" | sed -n "/ $1$/{s/ .*$//p;q}")
> +       [ "$val" ] || { echo "can't find $1 in $VMLINUX" 1>&2; exit 1; }
> +       # convert from hex to decimal
> +       echo $((0x$val))
> +}
> +
> +base_offset=$(sym_val _xiprom)
> +data_start=$(sym_val __data_loc)
> +data_end=$(sym_val _edata_loc)
> +
> +# convert to file based offsets
> +data_start=$(($data_start - $base_offset))
> +data_end=$(($data_end - $base_offset))
> +
> +# make sure data occupies the last part of the file
> +file_end=$(stat -c "%s" "$XIPIMAGE")
> +if [ "$file_end" != "$data_end" ]
> +then echo "data segment doesn't match end of xipImage" 2>&1; exit 1;
> +fi
> +
> +# be ready to clean up
> +trap 'rm -f "$XIPIMAGE.tmp"' 0 1 2 3
> +
> +# substitute the data section by a compressed version
> +$DD if="$XIPIMAGE" count=$data_start iflag=count_bytes of="$XIPIMAGE.tmp"
> +$DD if="$XIPIMAGE"  skip=$data_start iflag=skip_bytes |
> +gzip -9 >> "$XIPIMAGE.tmp"
> +
> +# replace kernel binary
> +mv -f "$XIPIMAGE.tmp" "$XIPIMAGE"
> diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
> index ad325a8c7e..52f437997c 100644
> --- a/arch/arm/kernel/Makefile
> +++ b/arch/arm/kernel/Makefile
> @@ -87,6 +87,11 @@ head-y                       := head$(MMUEXT).o
>  obj-$(CONFIG_DEBUG_LL) += debug.o
>  obj-$(CONFIG_EARLY_PRINTK)     += early_printk.o
>
> +# This is executed very early using a temporary stack when no memory allocator
> +# nor global data is available. Everything has to be allocated on the stack.
> +CFLAGS_head-inflate-data.o := $(call cc-option,-Wframe-larger-than=10240)
> +obj-$(CONFIG_XIP_DEFLATED_DATA) += head-inflate-data.o
> +
>  obj-$(CONFIG_ARM_VIRT_EXT)     += hyp-stub.o
>  AFLAGS_hyp-stub.o              :=-Wa,-march=armv7-a
>  ifeq ($(CONFIG_ARM_PSCI),y)
> diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S
> index 63a21fe03f..d57628f545 100644
> --- a/arch/arm/kernel/head-common.S
> +++ b/arch/arm/kernel/head-common.S
> @@ -87,7 +87,14 @@ __mmap_switched:
>         adr     r4, __mmap_switched_data
>         mov     fp, #0
>
> -#ifdef CONFIG_XIP_KERNEL
> +#if defined(CONFIG_XIP_DEFLATED_DATA)
> +   ARM(        ldr     sp, [r4], #4 )
> + THUMB(        ldr     sp, [r4] )
> + THUMB(        add     r4, #4 )
> +       bl      __inflate_kernel_data           @ decompress .data to RAM
> +       teq     r0, #0
> +       bne     __error
> +#elif defined(CONFIG_XIP_KERNEL)
>     ARM(        ldmia   r4!, {r0, r1, r2, sp} )
>   THUMB(        ldmia   r4!, {r0, r1, r2, r3} )
>   THUMB(        mov     sp, r3 )
> @@ -114,9 +121,11 @@ ENDPROC(__mmap_switched)
>         .type   __mmap_switched_data, %object
>  __mmap_switched_data:
>  #ifdef CONFIG_XIP_KERNEL
> +#ifndef CONFIG_XIP_DEFLATED_DATA
>         .long   _sdata                          @ r0
>         .long   __data_loc                      @ r1
>         .long   _edata_loc                      @ r2
> +#endif
>         .long   _end                            @ sp (temporary stack in .bss)
>  #endif
>
> diff --git a/arch/arm/kernel/head-inflate-data.c b/arch/arm/kernel/head-inflate-data.c
> new file mode 100644
> index 0000000000..a12d241f0a
> --- /dev/null
> +++ b/arch/arm/kernel/head-inflate-data.c
> @@ -0,0 +1,63 @@
> +/*
> + * XIP kernel .data segment decompressor
> + *
> + * Created by: Nicolas Pitre, August 2017
> + * Copyright:  (C) 2017  Linaro Limited
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#include <linux/init.h>
> +#include <linux/zutil.h>
> +
> +/* for struct inflate_state */
> +#include "../../../lib/zlib_inflate/inftrees.h"
> +#include "../../../lib/zlib_inflate/inflate.h"
> +#include "../../../lib/zlib_inflate/infutil.h"
> +
> +extern char __data_loc[];
> +extern char _edata_loc[];
> +extern char _sdata[];
> +extern char _edata[];
> +
> +/*
> + * This code is called very early during the boot process to decompress
> + * the .data segment stored compressed in ROM. Therefore none of the global
> + * variables are valid yet, hence no kernel services such as memory
> + * allocation is available. Everything must be allocated on the stack and
> + * we must avoid any global data access. We use a temporary stack located
> + * in the .bss area. The linker script makes sure the .bss is big enough
> + * to hold our stack frame plus some room for called functions.
> + *
> + * We mimic the code in lib/decompress_inflate.c to use the smallest work
> + * area possible. And because everything is statically allocated on the
> + * stack then there is no need to clean up before returning.
> + */
> +
> +int __init __inflate_kernel_data(void)
> +{
> +       struct z_stream_s stream, *strm = &stream;
> +       struct inflate_state state;
> +       char *in = __data_loc;
> +       int rc;
> +
> +       /* Check and skip gzip header (assume no filename) */
> +       if (in[0] != 0x1f || in[1] != 0x8b || in[2] != 0x08 || in[3] & ~3)
> +               return -1;
> +       in += 10;
> +
> +       strm->workspace = &state;
> +       strm->next_in = in;
> +       strm->avail_in = _edata_loc - __data_loc;  /* upper bound */
> +       strm->next_out = _sdata;
> +       strm->avail_out = _edata - _sdata;
> +       zlib_inflateInit2(strm, -MAX_WBITS);
> +       WS(strm)->inflate_state.wsize = 0;
> +       WS(strm)->inflate_state.window = NULL;
> +       rc = zlib_inflate(strm, Z_FINISH);
> +       if (rc != Z_STREAM_END)
> +               return -1;
> +       return strm->avail_out;  /* should be 0 */
> +}
> diff --git a/arch/arm/kernel/vmlinux-xip.lds.S b/arch/arm/kernel/vmlinux-xip.lds.S
> index d6c08a4c30..3489d54da4 100644
> --- a/arch/arm/kernel/vmlinux-xip.lds.S
> +++ b/arch/arm/kernel/vmlinux-xip.lds.S
> @@ -308,3 +308,11 @@ ASSERT((__arch_info_end - __arch_info_begin), "no machine record defined")
>   */
>  ASSERT(__hyp_idmap_text_end - (__hyp_idmap_text_start & PAGE_MASK) <= PAGE_SIZE,
>         "HYP init code too big or misaligned")
> +
> +#ifdef CONFIG_XIP_DEFLATED_DATA
> +/*
> + * The .bss is used as a stack area for __inflate_kernel_data() whose stack
> + * frame is 9568 bytes. Make sure it has extra room left.
> + */
> +ASSERT((_end - __bss_start) >= 12288, ".bss too small for CONFIG_XIP_DEFLATED_DATA")

The open coded numbers are a bit confusing here: the stack check uses
10240, the code actually needs >9568 bytes and here, you assert that
there is at least 12288 space between _end and __bss_start.

Other than that, this patch looks correct to me.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH 3/3] ARM: XIP kernel: store .data compressed in ROM
  2017-08-26 10:58   ` Ard Biesheuvel
@ 2017-08-26 15:41     ` Nicolas Pitre
  2017-08-28  8:29       ` Ard Biesheuvel
  0 siblings, 1 reply; 15+ messages in thread
From: Nicolas Pitre @ 2017-08-26 15:41 UTC (permalink / raw)
  To: linux-arm-kernel

On Sat, 26 Aug 2017, Ard Biesheuvel wrote:

> On 25 August 2017 at 17:26, Nicolas Pitre <nicolas.pitre@linaro.org> wrote:
> > The .data segment stored in ROM is only copied to RAM once at boot time
> > and never referenced afterwards. This is arguably a suboptimal usage of
> > ROM resources.
> >
> > This patch allows for compressing the .data segment before storing it
> > into ROM and decompressing it to RAM rather than simply copying it,
> > saving on precious ROM space.
> >
> > Because global data is not available yet (obviously) we must allocate
> > decompressor workspace memory on the stack. The .bss area is used as a
> > stack area for that purpose before it is cleared. The required stack
> > frame is 9568 bytes for __inflate_kernel_data() alone, so make sure
> > the .bss is large enough to cope with that plus extra room for called
> > functions or fail the build.
> >
> > Signed-off-by: Nicolas Pitre <nico@linaro.org>
> > ---
[...]
> > +#ifdef CONFIG_XIP_DEFLATED_DATA
> > +/*
> > + * The .bss is used as a stack area for __inflate_kernel_data() whose stack
> > + * frame is 9568 bytes. Make sure it has extra room left.
> > + */
> > +ASSERT((_end - __bss_start) >= 12288, ".bss too small for CONFIG_XIP_DEFLATED_DATA")
> 
> The open coded numbers are a bit confusing here: the stack check uses
> 10240, the code actually needs >9568 bytes and here, you assert that
> there is at least 12288 space between _end and __bss_start.

The 9568 bytes is what my particular gcc version actually uses for 
__inflate_kernel_data(). That is not a tail function so some more space 
is needed. I imagine this may vary a bit from compiler versions.

The 10240 is 2.5x page size and 12288 is 3x page size. Those numbers are 
arbitrary but I had to pick something.


Nicolas

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH 3/3] ARM: XIP kernel: store .data compressed in ROM
  2017-08-26 15:41     ` Nicolas Pitre
@ 2017-08-28  8:29       ` Ard Biesheuvel
  0 siblings, 0 replies; 15+ messages in thread
From: Ard Biesheuvel @ 2017-08-28  8:29 UTC (permalink / raw)
  To: linux-arm-kernel

On 26 August 2017 at 16:41, Nicolas Pitre <nicolas.pitre@linaro.org> wrote:
> On Sat, 26 Aug 2017, Ard Biesheuvel wrote:
>
>> On 25 August 2017 at 17:26, Nicolas Pitre <nicolas.pitre@linaro.org> wrote:
>> > The .data segment stored in ROM is only copied to RAM once at boot time
>> > and never referenced afterwards. This is arguably a suboptimal usage of
>> > ROM resources.
>> >
>> > This patch allows for compressing the .data segment before storing it
>> > into ROM and decompressing it to RAM rather than simply copying it,
>> > saving on precious ROM space.
>> >
>> > Because global data is not available yet (obviously) we must allocate
>> > decompressor workspace memory on the stack. The .bss area is used as a
>> > stack area for that purpose before it is cleared. The required stack
>> > frame is 9568 bytes for __inflate_kernel_data() alone, so make sure
>> > the .bss is large enough to cope with that plus extra room for called
>> > functions or fail the build.
>> >
>> > Signed-off-by: Nicolas Pitre <nico@linaro.org>
>> > ---
> [...]
>> > +#ifdef CONFIG_XIP_DEFLATED_DATA
>> > +/*
>> > + * The .bss is used as a stack area for __inflate_kernel_data() whose stack
>> > + * frame is 9568 bytes. Make sure it has extra room left.
>> > + */
>> > +ASSERT((_end - __bss_start) >= 12288, ".bss too small for CONFIG_XIP_DEFLATED_DATA")
>>
>> The open coded numbers are a bit confusing here: the stack check uses
>> 10240, the code actually needs >9568 bytes and here, you assert that
>> there is at least 12288 space between _end and __bss_start.
>
> The 9568 bytes is what my particular gcc version actually uses for
> __inflate_kernel_data(). That is not a tail function so some more space
> is needed. I imagine this may vary a bit from compiler versions.
>
> The 10240 is 2.5x page size and 12288 is 3x page size. Those numbers are
> arbitrary but I had to pick something.
>

OK, fair enough.

Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>

BTW, I know this is unrelated but I don't think _end is guaranteed to
be suitably aligned.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH 0/3] make XIP kernel .data compressed in ROM
  2017-08-25 16:25 [PATCH 0/3] make XIP kernel .data compressed in ROM Nicolas Pitre
                   ` (2 preceding siblings ...)
  2017-08-25 16:26 ` [PATCH 3/3] ARM: XIP kernel: store .data compressed in ROM Nicolas Pitre
@ 2017-08-28 17:40 ` Chris Brandt
  2017-08-29 19:07   ` Nicolas Pitre
  3 siblings, 1 reply; 15+ messages in thread
From: Chris Brandt @ 2017-08-28 17:40 UTC (permalink / raw)
  To: linux-arm-kernel

On Friday, August 25, 2017 1, Nicolas Pitre wrote:
> This patch series provides the ability to store the kernel .data
> segment compressed in ROM. It has to be copied to RAM anyway so
> storing it uncompressed is arguably a waste of ROM resources.
> 
> While at it, the copying of .data (when not compressed) and the
> clearing of .bss is performed using optimized string routines rather
> than doing it one word at a time. And throw in small linker script
> cleanups for good measure.
> 

I like the idea, but unfortunately it won't build for my RZ/A1 
(CONFIG_ARCH_R7S72100=y) XIP system. It's a Cortex-A9 with MMU (meaning
you have to hack the Kconfig because apparently allowing
CONFIG_ARCH_MULTIPLATFORM=y and CONFIG_XIP_KERNEL=y in the same build
is considered a ridiculous thing to do).


  LD      vmlinux.o
  MODPOST vmlinux.o
  KSYM    .tmp_kallsyms1.o
  KSYM    .tmp_kallsyms2.o
  LD      vmlinux
  SORTEX  vmlinux
  SYSMAP  System.map
  XIPZ    arch/arm/boot/xipImage
data segment doesn't match end of xipImage
../arch/arm/boot/Makefile:46: recipe for target 'arch/arm/boot/xipImage' failed
make[2]: *** [arch/arm/boot/xipImage] Error 1
arch/arm/Makefile:334: recipe for target 'xipImage' failed
make[1]: *** [xipImage] Error 2
make[1]: Leaving directory '/home/renesas/tools/upstream/renesas-drivers/.out'
Makefile:145: recipe for target 'sub-make' failed
make: *** [sub-make] Error 2


I admit, I did not do any debug yet to find out why.
But if you want to see it, you can simply apply this patch below and 
then build the shmobile_defconfig (after you go in and enable XIP_KERNEL=y 
and XIP_DEFLATED_DATA=y of course).


---------------------------

diff --git a/arch/arm/Kconfigb/arch/arm/Kconfig
index 80191e93f09b..eeb4aa37e8e9 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -338,7 +338,7 @@ config ARCH_MULTIPLATFORM
 	bool "Allow multiple platforms to be selected"
 	depends on MMU
 	select ARM_HAS_SG_CHAIN
-	select ARM_PATCH_PHYS_VIRT
+	select ARM_PATCH_PHYS_VIRT if !XIP_KERNEL
 	select AUTO_ZRELADDR
 	select TIMER_OF
 	select COMMON_CLK
@@ -1977,7 +1977,7 @@ endchoice
 
 config XIP_KERNEL
 	bool "Kernel Execute-In-Place from ROM"
-	depends on !ARM_LPAE && !ARCH_MULTIPLATFORM
+	depends on !ARM_LPAE
 	help
 	  Execute-In-Place allows the kernel to run from non-volatile storage
 	  directly addressable by the CPU, such as NOR flash. This saves RAM


Chris

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 0/3] make XIP kernel .data compressed in ROM
  2017-08-28 17:40 ` [PATCH 0/3] make XIP kernel " Chris Brandt
@ 2017-08-29 19:07   ` Nicolas Pitre
  2017-08-29 19:58     ` Chris Brandt
  2017-08-29 21:50     ` Russell King - ARM Linux
  0 siblings, 2 replies; 15+ messages in thread
From: Nicolas Pitre @ 2017-08-29 19:07 UTC (permalink / raw)
  To: linux-arm-kernel

On Mon, 28 Aug 2017, Chris Brandt wrote:

> On Friday, August 25, 2017 1, Nicolas Pitre wrote:
> > This patch series provides the ability to store the kernel .data
> > segment compressed in ROM. It has to be copied to RAM anyway so
> > storing it uncompressed is arguably a waste of ROM resources.
> > 
> > While at it, the copying of .data (when not compressed) and the
> > clearing of .bss is performed using optimized string routines rather
> > than doing it one word at a time. And throw in small linker script
> > cleanups for good measure.
> > 
> 
> I like the idea, but unfortunately it won't build for my RZ/A1 
> (CONFIG_ARCH_R7S72100=y) XIP system. It's a Cortex-A9 with MMU (meaning
> you have to hack the Kconfig because apparently allowing
> CONFIG_ARCH_MULTIPLATFORM=y and CONFIG_XIP_KERNEL=y in the same build
> is considered a ridiculous thing to do).

If you want XIP, it is likely that your kernel won't have the same 
physical address across different platforms. It is also likely that the 
RAM offset will be different across different platforms. And the very 
nature of XIP means the kernel cannot be be self-modified to adjust 
those discrepencies like it does in the multiplatform case. And Flash is 
a precious resource so why would you waste it with platform code that 
you'll never execute?

If the Kconfig language could let you confirm that only one platform is 
selected then the XIP config option could be offered only in that case.  
Maybe something like:

config PLATFORM_FOO
	bool "blah"
	select MANY_PLATFORMS if ONE_PLATFORM_SELECTED
	select ONE_PLATFORM_SELECTED

config XIP_KERNEL
	bool "blah"
	depends on !MANY_PLATFORMS

But I don't think the above works and that's unfortunate. For having 
spent some time in the kconfig code a while ago, it shouldn't be hard to 
implement something like incrementable variables but that's not in my 
short term plans.

>   LD      vmlinux
>   SORTEX  vmlinux
>   SYSMAP  System.map
>   XIPZ    arch/arm/boot/xipImage
> data segment doesn't match end of xipImage

OK......... I now know why.

Executive summary: our linker script is a complete mess. In your case 
that would be partly solved by the patch that Arnd sent on Jul 28th 2017 
titled "ARM: move __bug_table into .data for XIP_KERNEL". However the 
XIP linker script is still broken for similar reasons.

I've done a first pass at fixing the worst of it and I'll post a new 
series soon.

@Arnd: please don't send that patch upstream. I have a better fix for 
it now.


Nicolas

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH 0/3] make XIP kernel .data compressed in ROM
  2017-08-29 19:07   ` Nicolas Pitre
@ 2017-08-29 19:58     ` Chris Brandt
  2017-08-29 20:04       ` Nicolas Pitre
  2017-08-29 21:50     ` Russell King - ARM Linux
  1 sibling, 1 reply; 15+ messages in thread
From: Chris Brandt @ 2017-08-29 19:58 UTC (permalink / raw)
  To: linux-arm-kernel

On Tuesday, August 29, 2017, Nicolas Pitre wrote:
> On Mon, 28 Aug 2017, Chris Brandt wrote:
> 
> > On Friday, August 25, 2017 1, Nicolas Pitre wrote:
> > > This patch series provides the ability to store the kernel .data
> > > segment compressed in ROM. It has to be copied to RAM anyway so
> > > storing it uncompressed is arguably a waste of ROM resources.
> > >
> > > While at it, the copying of .data (when not compressed) and the
> > > clearing of .bss is performed using optimized string routines rather
> > > than doing it one word at a time. And throw in small linker script
> > > cleanups for good measure.
> > >
> >
> > I like the idea, but unfortunately it won't build for my RZ/A1
> > (CONFIG_ARCH_R7S72100=y) XIP system. It's a Cortex-A9 with MMU (meaning
> > you have to hack the Kconfig because apparently allowing
> > CONFIG_ARCH_MULTIPLATFORM=y and CONFIG_XIP_KERNEL=y in the same build
> > is considered a ridiculous thing to do).
> 
> If you want XIP, it is likely that your kernel won't have the same
> physical address across different platforms. It is also likely that the
> RAM offset will be different across different platforms. And the very
> nature of XIP means the kernel cannot be be self-modified to adjust
> those discrepencies like it does in the multiplatform case. And Flash is
> a precious resource so why would you waste it with platform code that
> you'll never execute?

It's not really that I plan on running an XIP_KERNEL on multiple 
different images, it's that if you look at an MMU based system, you have no 
choice but to build it as a ARCH_MULTIPLATFORM in the current Kconfig 
structure.
Of course for my .config, I go in and uncheck every other CPU and driver
that has nothing to do with my system which is why my XIP KERNEL binary
is only 4MB which include all static drivers built in.



> If the Kconfig language could let you confirm that only one platform is
> selected then the XIP config option could be offered only in that case.
> Maybe something like:
> 
> config PLATFORM_FOO
> 	bool "blah"
> 	select MANY_PLATFORMS if ONE_PLATFORM_SELECTED
> 	select ONE_PLATFORM_SELECTED
> 
> config XIP_KERNEL
> 	bool "blah"
> 	depends on !MANY_PLATFORMS


Yes, that's the argument. But, I've tried many different ways with the 
current Kconfig system, and submitted many different patches over the 
last year and a half and not everyone is ever completely happy...so I'm 
stuck with hacking the 2 lines in the Kconfig.
But, that's not what this thread is about.


> But I don't think the above works and that's unfortunate. For having
> spent some time in the kconfig code a while ago, it shouldn't be hard to
> implement something like incrementable variables but that's not in my
> short term plans.

The only way that would seem to make people happy is to make some new 
'Kconfig type', but that's a bit more involved than just editing some 
Kconfig files.


> >   LD      vmlinux
> >   SORTEX  vmlinux
> >   SYSMAP  System.map
> >   XIPZ    arch/arm/boot/xipImage
> > data segment doesn't match end of xipImage
> 
> OK......... I now know why.
> 
> Executive summary: our linker script is a complete mess. In your case
> that would be partly solved by the patch that Arnd sent on Jul 28th 2017
> titled "ARM: move __bug_table into .data for XIP_KERNEL". However the
> XIP linker script is still broken for similar reasons.

Ya, I thought that would happen eventually. The only way I could get 
agreement to fix existing broken XIP_KERNEL stuff was to split the linker 
script into XIP and non-XIP. But, trying to follow every vmlinux.lds.S 
change and checking to see if it is needed for vmlinux-xip.lds.S is a bit 
daunting.


Chris

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH 0/3] make XIP kernel .data compressed in ROM
  2017-08-29 19:58     ` Chris Brandt
@ 2017-08-29 20:04       ` Nicolas Pitre
  0 siblings, 0 replies; 15+ messages in thread
From: Nicolas Pitre @ 2017-08-29 20:04 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, 29 Aug 2017, Chris Brandt wrote:

> On Tuesday, August 29, 2017, Nicolas Pitre wrote:
> > Executive summary: our linker script is a complete mess. In your case
> > that would be partly solved by the patch that Arnd sent on Jul 28th 2017
> > titled "ARM: move __bug_table into .data for XIP_KERNEL". However the
> > XIP linker script is still broken for similar reasons.
> 
> Ya, I thought that would happen eventually. The only way I could get 
> agreement to fix existing broken XIP_KERNEL stuff was to split the linker 
> script into XIP and non-XIP. But, trying to follow every vmlinux.lds.S 
> change and checking to see if it is needed for vmlinux-xip.lds.S is a bit 
> daunting.

It is. And even in the non-XIP case with regards to changes in 
include/asm-generic/vmlinux.lds.h too.


Nicolas

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH 0/3] make XIP kernel .data compressed in ROM
  2017-08-29 19:07   ` Nicolas Pitre
  2017-08-29 19:58     ` Chris Brandt
@ 2017-08-29 21:50     ` Russell King - ARM Linux
  2017-08-29 21:52       ` Nicolas Pitre
  1 sibling, 1 reply; 15+ messages in thread
From: Russell King - ARM Linux @ 2017-08-29 21:50 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, Aug 29, 2017 at 03:07:09PM -0400, Nicolas Pitre wrote:
> @Arnd: please don't send that patch upstream. I have a better fix for 
> it now.

It's not really up to Arnd to send it to Linus, it's something that
should come through me...

-- 
RMK's Patch system: http://www.armlinux.org.uk/developer/patches/
FTTC broadband for 0.8mile line in suburbia: sync at 8.8Mbps down 630kbps up
According to speedtest.net: 8.21Mbps down 510kbps up

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH 0/3] make XIP kernel .data compressed in ROM
  2017-08-29 21:50     ` Russell King - ARM Linux
@ 2017-08-29 21:52       ` Nicolas Pitre
  0 siblings, 0 replies; 15+ messages in thread
From: Nicolas Pitre @ 2017-08-29 21:52 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, 29 Aug 2017, Russell King - ARM Linux wrote:

> On Tue, Aug 29, 2017 at 03:07:09PM -0400, Nicolas Pitre wrote:
> > @Arnd: please don't send that patch upstream. I have a better fix for 
> > it now.
> 
> It's not really up to Arnd to send it to Linus, it's something that
> should come through me...

By upstream I did mean you.


Nicolas

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2017-08-29 21:52 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-08-25 16:25 [PATCH 0/3] make XIP kernel .data compressed in ROM Nicolas Pitre
2017-08-25 16:25 ` [PATCH 1/3] ARM: head-common.S: speed up startup code Nicolas Pitre
2017-08-26 10:49   ` Ard Biesheuvel
2017-08-25 16:25 ` [PATCH 2/3] ARM: vmlinux*.lds.S: some decruftification Nicolas Pitre
2017-08-26 10:38   ` Ard Biesheuvel
2017-08-25 16:26 ` [PATCH 3/3] ARM: XIP kernel: store .data compressed in ROM Nicolas Pitre
2017-08-26 10:58   ` Ard Biesheuvel
2017-08-26 15:41     ` Nicolas Pitre
2017-08-28  8:29       ` Ard Biesheuvel
2017-08-28 17:40 ` [PATCH 0/3] make XIP kernel " Chris Brandt
2017-08-29 19:07   ` Nicolas Pitre
2017-08-29 19:58     ` Chris Brandt
2017-08-29 20:04       ` Nicolas Pitre
2017-08-29 21:50     ` Russell King - ARM Linux
2017-08-29 21:52       ` Nicolas Pitre

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.