All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFT/RFC PATCH 0/6] ARM kernel size fixes
@ 2015-03-12 17:38 Ard Biesheuvel
  2015-03-12 17:38 ` [RFT/RFC PATCH 1/6] ARM: replace PROCINFO embedded branch with relative offset Ard Biesheuvel
                   ` (5 more replies)
  0 siblings, 6 replies; 25+ messages in thread
From: Ard Biesheuvel @ 2015-03-12 17:38 UTC (permalink / raw)
  To: linux-arm-kernel

This series is a suggested approach to preventing linker failures on large
kernels. It is somewhat unpolished, and posted for comments/testing primarily.

The issues were found and reported by Arnd Bergmann, and these patches are
loosely based on his initial approach to work around them.

Ard Biesheuvel (6):
  ARM: replace PROCINFO embedded branch with relative offset
  ARM: move HYP text to end of .text section
  ARM: add macro to perform far branches (b/bl)
  ARM: use bl_far to call __hyp_stub_install_secondary from the .data
    section
  ARM: move the .idmap.text section closer to .head.text
  ARM: keep .text and .fixup regions together

 arch/arm/include/asm/assembler.h | 29 +++++++++++++++++++++++++++++
 arch/arm/kernel/head.S           | 14 ++++++++------
 arch/arm/kernel/sleep.S          |  2 +-
 arch/arm/kernel/vmlinux.lds.S    | 16 ++++++++++------
 arch/arm/kvm/init.S              |  5 +----
 arch/arm/kvm/interrupts.S        |  4 +---
 arch/arm/mm/proc-arm1020.S       |  4 ++--
 arch/arm/mm/proc-arm1020e.S      |  4 ++--
 arch/arm/mm/proc-arm1022.S       |  4 ++--
 arch/arm/mm/proc-arm1026.S       |  4 ++--
 arch/arm/mm/proc-arm720.S        |  4 ++--
 arch/arm/mm/proc-arm740.S        |  4 ++--
 arch/arm/mm/proc-arm7tdmi.S      |  4 ++--
 arch/arm/mm/proc-arm920.S        |  4 ++--
 arch/arm/mm/proc-arm922.S        |  4 ++--
 arch/arm/mm/proc-arm925.S        |  4 ++--
 arch/arm/mm/proc-arm926.S        |  4 ++--
 arch/arm/mm/proc-arm940.S        |  4 ++--
 arch/arm/mm/proc-arm946.S        |  4 ++--
 arch/arm/mm/proc-arm9tdmi.S      |  4 ++--
 arch/arm/mm/proc-fa526.S         |  4 ++--
 arch/arm/mm/proc-feroceon.S      |  5 +++--
 arch/arm/mm/proc-macros.S        |  4 ++++
 arch/arm/mm/proc-mohawk.S        |  4 ++--
 arch/arm/mm/proc-sa110.S         |  4 ++--
 arch/arm/mm/proc-sa1100.S        |  4 ++--
 arch/arm/mm/proc-v6.S            |  4 ++--
 arch/arm/mm/proc-v7.S            |  4 ++--
 arch/arm/mm/proc-v7m.S           |  4 ++--
 arch/arm/mm/proc-xsc3.S          |  4 ++--
 arch/arm/mm/proc-xscale.S        |  4 ++--
 31 files changed, 103 insertions(+), 68 deletions(-)

-- 
1.8.3.2

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [RFT/RFC PATCH 1/6] ARM: replace PROCINFO embedded branch with relative offset
  2015-03-12 17:38 [RFT/RFC PATCH 0/6] ARM kernel size fixes Ard Biesheuvel
@ 2015-03-12 17:38 ` Ard Biesheuvel
  2015-03-12 20:24   ` Nicolas Pitre
  2015-03-12 20:50   ` Russell King - ARM Linux
  2015-03-12 17:38 ` [RFT/RFC PATCH 2/6] ARM: move HYP text to end of .text section Ard Biesheuvel
                   ` (4 subsequent siblings)
  5 siblings, 2 replies; 25+ messages in thread
From: Ard Biesheuvel @ 2015-03-12 17:38 UTC (permalink / raw)
  To: linux-arm-kernel

This patch replaces the 'branch to setup()' instructions embedded
in the PROCINFO structs with the offset to that setup function
relative to the base of the struct. This preserves the position
independent nature of that field, but uses a data item rather
than an instruction.

This is mainly done to prevent linker failures on large kernels,
where the setup function is out of reach for the branch.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
 arch/arm/kernel/head.S      | 14 ++++++++------
 arch/arm/mm/proc-arm1020.S  |  4 ++--
 arch/arm/mm/proc-arm1020e.S |  4 ++--
 arch/arm/mm/proc-arm1022.S  |  4 ++--
 arch/arm/mm/proc-arm1026.S  |  4 ++--
 arch/arm/mm/proc-arm720.S   |  4 ++--
 arch/arm/mm/proc-arm740.S   |  4 ++--
 arch/arm/mm/proc-arm7tdmi.S |  4 ++--
 arch/arm/mm/proc-arm920.S   |  4 ++--
 arch/arm/mm/proc-arm922.S   |  4 ++--
 arch/arm/mm/proc-arm925.S   |  4 ++--
 arch/arm/mm/proc-arm926.S   |  4 ++--
 arch/arm/mm/proc-arm940.S   |  4 ++--
 arch/arm/mm/proc-arm946.S   |  4 ++--
 arch/arm/mm/proc-arm9tdmi.S |  4 ++--
 arch/arm/mm/proc-fa526.S    |  4 ++--
 arch/arm/mm/proc-feroceon.S |  5 +++--
 arch/arm/mm/proc-macros.S   |  4 ++++
 arch/arm/mm/proc-mohawk.S   |  4 ++--
 arch/arm/mm/proc-sa110.S    |  4 ++--
 arch/arm/mm/proc-sa1100.S   |  4 ++--
 arch/arm/mm/proc-v6.S       |  4 ++--
 arch/arm/mm/proc-v7.S       |  4 ++--
 arch/arm/mm/proc-v7m.S      |  4 ++--
 arch/arm/mm/proc-xsc3.S     |  4 ++--
 arch/arm/mm/proc-xscale.S   |  4 ++--
 26 files changed, 61 insertions(+), 54 deletions(-)

diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index 01963273c07a..698b38bfca8f 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -138,8 +138,9 @@ ENTRY(stext)
 						@ mmu has been enabled
 	adr	lr, BSYM(1f)			@ return (PIC) address
 	mov	r8, r4				@ set TTBR1 to swapper_pg_dir
- ARM(	add	pc, r10, #PROCINFO_INITFUNC	)
- THUMB(	add	r12, r10, #PROCINFO_INITFUNC	)
+	ldr	r12, [r10, #PROCINFO_INITFUNC]
+ ARM(	add	pc, r12, r10			)
+ THUMB(	add	r12, r12, r10			)
  THUMB(	ret	r12				)
 1:	b	__enable_mmu
 ENDPROC(stext)
@@ -386,10 +387,11 @@ ENTRY(secondary_startup)
 	ldr	r8, [r7, lr]			@ get secondary_data.swapper_pg_dir
 	adr	lr, BSYM(__enable_mmu)		@ return address
 	mov	r13, r12			@ __secondary_switched address
- ARM(	add	pc, r10, #PROCINFO_INITFUNC	) @ initialise processor
-						  @ (return control reg)
- THUMB(	add	r12, r10, #PROCINFO_INITFUNC	)
- THUMB(	ret	r12				)
+	ldr	r12, [r10, #PROCINFO_INITFUNC]
+ ARM(	add	pc, r12, r10		)	@ initialise processor
+						@ (return control reg)
+ THUMB(	add	r12, r12, r10		)
+ THUMB(	ret	r12			)
 ENDPROC(secondary_startup)
 ENDPROC(secondary_startup_arm)
 
diff --git a/arch/arm/mm/proc-arm1020.S b/arch/arm/mm/proc-arm1020.S
index 86ee5d47ce3c..7ddd45d6ca52 100644
--- a/arch/arm/mm/proc-arm1020.S
+++ b/arch/arm/mm/proc-arm1020.S
@@ -507,7 +507,7 @@ cpu_arm1020_name:
 
 	.align
 
-	.section ".proc.info.init", #alloc, #execinstr
+	.section ".proc.info.init", #alloc
 
 	.type	__arm1020_proc_info,#object
 __arm1020_proc_info:
@@ -519,7 +519,7 @@ __arm1020_proc_info:
 	.long   PMD_TYPE_SECT | \
 		PMD_SECT_AP_WRITE | \
 		PMD_SECT_AP_READ
-	b	__arm1020_setup
+	initfn	__arm1020_setup
 	.long	cpu_arch_name
 	.long	cpu_elf_name
 	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB
diff --git a/arch/arm/mm/proc-arm1020e.S b/arch/arm/mm/proc-arm1020e.S
index a6331d78601f..d15f556782a8 100644
--- a/arch/arm/mm/proc-arm1020e.S
+++ b/arch/arm/mm/proc-arm1020e.S
@@ -465,7 +465,7 @@ arm1020e_crval:
 
 	.align
 
-	.section ".proc.info.init", #alloc, #execinstr
+	.section ".proc.info.init", #alloc
 
 	.type	__arm1020e_proc_info,#object
 __arm1020e_proc_info:
@@ -479,7 +479,7 @@ __arm1020e_proc_info:
 		PMD_BIT4 | \
 		PMD_SECT_AP_WRITE | \
 		PMD_SECT_AP_READ
-	b	__arm1020e_setup
+	initfn	__arm1020e_setup
 	.long	cpu_arch_name
 	.long	cpu_elf_name
 	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB | HWCAP_EDSP
diff --git a/arch/arm/mm/proc-arm1022.S b/arch/arm/mm/proc-arm1022.S
index a126b7a59928..6245e422e95c 100644
--- a/arch/arm/mm/proc-arm1022.S
+++ b/arch/arm/mm/proc-arm1022.S
@@ -448,7 +448,7 @@ arm1022_crval:
 
 	.align
 
-	.section ".proc.info.init", #alloc, #execinstr
+	.section ".proc.info.init", #alloc
 
 	.type	__arm1022_proc_info,#object
 __arm1022_proc_info:
@@ -462,7 +462,7 @@ __arm1022_proc_info:
 		PMD_BIT4 | \
 		PMD_SECT_AP_WRITE | \
 		PMD_SECT_AP_READ
-	b	__arm1022_setup
+	initfn	__arm1022_setup
 	.long	cpu_arch_name
 	.long	cpu_elf_name
 	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB | HWCAP_EDSP
diff --git a/arch/arm/mm/proc-arm1026.S b/arch/arm/mm/proc-arm1026.S
index fc294067e977..944114b566ef 100644
--- a/arch/arm/mm/proc-arm1026.S
+++ b/arch/arm/mm/proc-arm1026.S
@@ -442,7 +442,7 @@ arm1026_crval:
 	string	cpu_arm1026_name, "ARM1026EJ-S"
 	.align
 
-	.section ".proc.info.init", #alloc, #execinstr
+	.section ".proc.info.init", #alloc
 
 	.type	__arm1026_proc_info,#object
 __arm1026_proc_info:
@@ -456,7 +456,7 @@ __arm1026_proc_info:
 		PMD_BIT4 | \
 		PMD_SECT_AP_WRITE | \
 		PMD_SECT_AP_READ
-	b	__arm1026_setup
+	initfn	__arm1026_setup
 	.long	cpu_arch_name
 	.long	cpu_elf_name
 	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP|HWCAP_JAVA
diff --git a/arch/arm/mm/proc-arm720.S b/arch/arm/mm/proc-arm720.S
index 2baa66b3ac9b..4bd5f8dcbc29 100644
--- a/arch/arm/mm/proc-arm720.S
+++ b/arch/arm/mm/proc-arm720.S
@@ -186,7 +186,7 @@ arm720_crval:
  * See <asm/procinfo.h> for a definition of this structure.
  */
 	
-		.section ".proc.info.init", #alloc, #execinstr
+		.section ".proc.info.init", #alloc
 
 .macro arm720_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cpu_flush:req
 		.type	__\name\()_proc_info,#object
@@ -203,7 +203,7 @@ __\name\()_proc_info:
 			PMD_BIT4 | \
 			PMD_SECT_AP_WRITE | \
 			PMD_SECT_AP_READ
-		b	\cpu_flush				@ cpu_flush
+		initfn	\cpu_flush				@ cpu_flush
 		.long	cpu_arch_name				@ arch_name
 		.long	cpu_elf_name				@ elf_name
 		.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB	@ elf_hwcap
diff --git a/arch/arm/mm/proc-arm740.S b/arch/arm/mm/proc-arm740.S
index ac1ea6b3bce4..1dadba6744ec 100644
--- a/arch/arm/mm/proc-arm740.S
+++ b/arch/arm/mm/proc-arm740.S
@@ -132,14 +132,14 @@ __arm740_setup:
 
 	.align
 
-	.section ".proc.info.init", #alloc, #execinstr
+	.section ".proc.info.init", #alloc
 	.type	__arm740_proc_info,#object
 __arm740_proc_info:
 	.long	0x41807400
 	.long	0xfffffff0
 	.long	0
 	.long	0
-	b	__arm740_setup
+	initfn	__arm740_setup
 	.long	cpu_arch_name
 	.long	cpu_elf_name
 	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB | HWCAP_26BIT
diff --git a/arch/arm/mm/proc-arm7tdmi.S b/arch/arm/mm/proc-arm7tdmi.S
index bf6ba4bc30ff..c322a416cb9a 100644
--- a/arch/arm/mm/proc-arm7tdmi.S
+++ b/arch/arm/mm/proc-arm7tdmi.S
@@ -76,7 +76,7 @@ __arm7tdmi_setup:
 
 		.align
 
-		.section ".proc.info.init", #alloc, #execinstr
+		.section ".proc.info.init", #alloc
 
 .macro arm7tdmi_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, \
 	extra_hwcaps=0
@@ -86,7 +86,7 @@ __\name\()_proc_info:
 		.long	\cpu_mask
 		.long	0
 		.long	0
-		b	__arm7tdmi_setup
+		initfn	__arm7tdmi_setup
 		.long	cpu_arch_name
 		.long	cpu_elf_name
 		.long	HWCAP_SWP | HWCAP_26BIT | ( \extra_hwcaps )
diff --git a/arch/arm/mm/proc-arm920.S b/arch/arm/mm/proc-arm920.S
index 22bf8dde4f84..f129bdede5cc 100644
--- a/arch/arm/mm/proc-arm920.S
+++ b/arch/arm/mm/proc-arm920.S
@@ -448,7 +448,7 @@ arm920_crval:
 
 	.align
 
-	.section ".proc.info.init", #alloc, #execinstr
+	.section ".proc.info.init", #alloc
 
 	.type	__arm920_proc_info,#object
 __arm920_proc_info:
@@ -464,7 +464,7 @@ __arm920_proc_info:
 		PMD_BIT4 | \
 		PMD_SECT_AP_WRITE | \
 		PMD_SECT_AP_READ
-	b	__arm920_setup
+	initfn	__arm920_setup
 	.long	cpu_arch_name
 	.long	cpu_elf_name
 	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB
diff --git a/arch/arm/mm/proc-arm922.S b/arch/arm/mm/proc-arm922.S
index 0c6d5ac5a6d4..53b5bddb0ee9 100644
--- a/arch/arm/mm/proc-arm922.S
+++ b/arch/arm/mm/proc-arm922.S
@@ -426,7 +426,7 @@ arm922_crval:
 
 	.align
 
-	.section ".proc.info.init", #alloc, #execinstr
+	.section ".proc.info.init", #alloc
 
 	.type	__arm922_proc_info,#object
 __arm922_proc_info:
@@ -442,7 +442,7 @@ __arm922_proc_info:
 		PMD_BIT4 | \
 		PMD_SECT_AP_WRITE | \
 		PMD_SECT_AP_READ
-	b	__arm922_setup
+	initfn	__arm922_setup
 	.long	cpu_arch_name
 	.long	cpu_elf_name
 	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB
diff --git a/arch/arm/mm/proc-arm925.S b/arch/arm/mm/proc-arm925.S
index c32d073282ea..94a88a734210 100644
--- a/arch/arm/mm/proc-arm925.S
+++ b/arch/arm/mm/proc-arm925.S
@@ -494,7 +494,7 @@ arm925_crval:
 
 	.align
 
-	.section ".proc.info.init", #alloc, #execinstr
+	.section ".proc.info.init", #alloc
 
 .macro arm925_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cache
 	.type	__\name\()_proc_info,#object
@@ -510,7 +510,7 @@ __\name\()_proc_info:
 		PMD_BIT4 | \
 		PMD_SECT_AP_WRITE | \
 		PMD_SECT_AP_READ
-	b	__arm925_setup
+	initfn	__arm925_setup
 	.long	cpu_arch_name
 	.long	cpu_elf_name
 	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB
diff --git a/arch/arm/mm/proc-arm926.S b/arch/arm/mm/proc-arm926.S
index 252b2503038d..0fe423de5118 100644
--- a/arch/arm/mm/proc-arm926.S
+++ b/arch/arm/mm/proc-arm926.S
@@ -474,7 +474,7 @@ arm926_crval:
 
 	.align
 
-	.section ".proc.info.init", #alloc, #execinstr
+	.section ".proc.info.init", #alloc
 
 	.type	__arm926_proc_info,#object
 __arm926_proc_info:
@@ -490,7 +490,7 @@ __arm926_proc_info:
 		PMD_BIT4 | \
 		PMD_SECT_AP_WRITE | \
 		PMD_SECT_AP_READ
-	b	__arm926_setup
+	initfn	__arm926_setup
 	.long	cpu_arch_name
 	.long	cpu_elf_name
 	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP|HWCAP_JAVA
diff --git a/arch/arm/mm/proc-arm940.S b/arch/arm/mm/proc-arm940.S
index e5212d489377..7ad2642dfcda 100644
--- a/arch/arm/mm/proc-arm940.S
+++ b/arch/arm/mm/proc-arm940.S
@@ -354,14 +354,14 @@ __arm940_setup:
 
 	.align
 
-	.section ".proc.info.init", #alloc, #execinstr
+	.section ".proc.info.init", #alloc
 
 	.type	__arm940_proc_info,#object
 __arm940_proc_info:
 	.long	0x41009400
 	.long	0xff00fff0
 	.long	0
-	b	__arm940_setup
+	initfn	__arm940_setup
 	.long	cpu_arch_name
 	.long	cpu_elf_name
 	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB
diff --git a/arch/arm/mm/proc-arm946.S b/arch/arm/mm/proc-arm946.S
index b3dd9b2d0b8e..f129dcadb8e8 100644
--- a/arch/arm/mm/proc-arm946.S
+++ b/arch/arm/mm/proc-arm946.S
@@ -409,14 +409,14 @@ __arm946_setup:
 
 	.align
 
-	.section ".proc.info.init", #alloc, #execinstr
+	.section ".proc.info.init", #alloc
 	.type	__arm946_proc_info,#object
 __arm946_proc_info:
 	.long	0x41009460
 	.long	0xff00fff0
 	.long	0
 	.long	0
-	b	__arm946_setup
+	initfn	__arm946_setup
 	.long	cpu_arch_name
 	.long	cpu_elf_name
 	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB
diff --git a/arch/arm/mm/proc-arm9tdmi.S b/arch/arm/mm/proc-arm9tdmi.S
index 8227322bbb8f..a8828b63a981 100644
--- a/arch/arm/mm/proc-arm9tdmi.S
+++ b/arch/arm/mm/proc-arm9tdmi.S
@@ -70,7 +70,7 @@ __arm9tdmi_setup:
 
 		.align
 
-		.section ".proc.info.init", #alloc, #execinstr
+		.section ".proc.info.init", #alloc
 
 .macro arm9tdmi_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req
 		.type	__\name\()_proc_info, #object
@@ -79,7 +79,7 @@ __\name\()_proc_info:
 		.long	\cpu_mask
 		.long	0
 		.long	0
-		b	__arm9tdmi_setup
+		initfn	__arm9tdmi_setup
 		.long	cpu_arch_name
 		.long	cpu_elf_name
 		.long	HWCAP_SWP | HWCAP_THUMB | HWCAP_26BIT
diff --git a/arch/arm/mm/proc-fa526.S b/arch/arm/mm/proc-fa526.S
index c494886892ba..afb100b96081 100644
--- a/arch/arm/mm/proc-fa526.S
+++ b/arch/arm/mm/proc-fa526.S
@@ -190,7 +190,7 @@ fa526_cr1_set:
 
 	.align
 
-	.section ".proc.info.init", #alloc, #execinstr
+	.section ".proc.info.init", #alloc
 
 	.type	__fa526_proc_info,#object
 __fa526_proc_info:
@@ -206,7 +206,7 @@ __fa526_proc_info:
 		PMD_BIT4 | \
 		PMD_SECT_AP_WRITE | \
 		PMD_SECT_AP_READ
-	b	__fa526_setup
+	initfn	__fa526_setup
 	.long	cpu_arch_name
 	.long	cpu_elf_name
 	.long	HWCAP_SWP | HWCAP_HALF
diff --git a/arch/arm/mm/proc-feroceon.S b/arch/arm/mm/proc-feroceon.S
index 03a1b75f2e16..5bd769a2bbbe 100644
--- a/arch/arm/mm/proc-feroceon.S
+++ b/arch/arm/mm/proc-feroceon.S
@@ -584,7 +584,7 @@ feroceon_crval:
 
 	.align
 
-	.section ".proc.info.init", #alloc, #execinstr
+	.section ".proc.info.init", #alloc
 
 .macro feroceon_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cache:req
 	.type	__\name\()_proc_info,#object
@@ -601,7 +601,8 @@ __\name\()_proc_info:
 		PMD_BIT4 | \
 		PMD_SECT_AP_WRITE | \
 		PMD_SECT_AP_READ
-	b	__feroceon_setup
+	initfn	__feroceon_setup
+	.long __feroceon_setup
 	.long	cpu_arch_name
 	.long	cpu_elf_name
 	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP
diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S
index 082b9f2f7e90..5bed45aeac40 100644
--- a/arch/arm/mm/proc-macros.S
+++ b/arch/arm/mm/proc-macros.S
@@ -331,3 +331,7 @@ ENTRY(\name\()_tlb_fns)
 	.globl	\x
 	.equ	\x, \y
 .endm
+
+.macro	initfn, initfunc
+	.long	\initfunc - . + PROCINFO_INITFUNC
+.endm
diff --git a/arch/arm/mm/proc-mohawk.S b/arch/arm/mm/proc-mohawk.S
index 53d393455f13..a15a2c150460 100644
--- a/arch/arm/mm/proc-mohawk.S
+++ b/arch/arm/mm/proc-mohawk.S
@@ -427,7 +427,7 @@ mohawk_crval:
 
 	.align
 
-	.section ".proc.info.init", #alloc, #execinstr
+	.section ".proc.info.init", #alloc
 
 	.type	__88sv331x_proc_info,#object
 __88sv331x_proc_info:
@@ -443,7 +443,7 @@ __88sv331x_proc_info:
 		PMD_BIT4 | \
 		PMD_SECT_AP_WRITE | \
 		PMD_SECT_AP_READ
-	b	__mohawk_setup
+	initfn	__mohawk_setup
 	.long	cpu_arch_name
 	.long	cpu_elf_name
 	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP
diff --git a/arch/arm/mm/proc-sa110.S b/arch/arm/mm/proc-sa110.S
index 8008a0461cf5..4668067e7a08 100644
--- a/arch/arm/mm/proc-sa110.S
+++ b/arch/arm/mm/proc-sa110.S
@@ -199,7 +199,7 @@ sa110_crval:
 
 	.align
 
-	.section ".proc.info.init", #alloc, #execinstr
+	.section ".proc.info.init", #alloc
 
 	.type	__sa110_proc_info,#object
 __sa110_proc_info:
@@ -213,7 +213,7 @@ __sa110_proc_info:
 	.long   PMD_TYPE_SECT | \
 		PMD_SECT_AP_WRITE | \
 		PMD_SECT_AP_READ
-	b	__sa110_setup
+	initfn	__sa110_setup
 	.long	cpu_arch_name
 	.long	cpu_elf_name
 	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_26BIT | HWCAP_FAST_MULT
diff --git a/arch/arm/mm/proc-sa1100.S b/arch/arm/mm/proc-sa1100.S
index 89f97ac648a9..4acd3cbaa7fe 100644
--- a/arch/arm/mm/proc-sa1100.S
+++ b/arch/arm/mm/proc-sa1100.S
@@ -242,7 +242,7 @@ sa1100_crval:
 
 	.align
 
-	.section ".proc.info.init", #alloc, #execinstr
+	.section ".proc.info.init", #alloc
 
 .macro sa1100_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req
 	.type	__\name\()_proc_info,#object
@@ -257,7 +257,7 @@ __\name\()_proc_info:
 	.long   PMD_TYPE_SECT | \
 		PMD_SECT_AP_WRITE | \
 		PMD_SECT_AP_READ
-	b	__sa1100_setup
+	initfn	__sa1100_setup
 	.long	cpu_arch_name
 	.long	cpu_elf_name
 	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_26BIT | HWCAP_FAST_MULT
diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S
index d0390f4b3f18..5c05a2948dfa 100644
--- a/arch/arm/mm/proc-v6.S
+++ b/arch/arm/mm/proc-v6.S
@@ -264,7 +264,7 @@ v6_crval:
 	string	cpu_elf_name, "v6"
 	.align
 
-	.section ".proc.info.init", #alloc, #execinstr
+	.section ".proc.info.init", #alloc
 
 	/*
 	 * Match any ARMv6 processor core.
@@ -287,7 +287,7 @@ __v6_proc_info:
 		PMD_SECT_XN | \
 		PMD_SECT_AP_WRITE | \
 		PMD_SECT_AP_READ
-	b	__v6_setup
+	initfn	__v6_setup
 	.long	cpu_arch_name
 	.long	cpu_elf_name
 	/* See also feat_v6_fixup() for HWCAP_TLS */
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index 8b4ee5e81c14..eda1eba132c6 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -462,7 +462,7 @@ __v7_setup_stack:
 	string	cpu_elf_name, "v7"
 	.align
 
-	.section ".proc.info.init", #alloc, #execinstr
+	.section ".proc.info.init", #alloc
 
 	/*
 	 * Standard v7 proc info content
@@ -474,7 +474,7 @@ __v7_setup_stack:
 			PMD_SECT_AF | PMD_FLAGS_UP | \mm_mmuflags)
 	.long	PMD_TYPE_SECT | PMD_SECT_AP_WRITE | \
 		PMD_SECT_AP_READ | PMD_SECT_AF | \io_mmuflags
-	W(b)	\initfunc
+	initfn	\initfunc
 	.long	cpu_arch_name
 	.long	cpu_elf_name
 	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB | HWCAP_FAST_MULT | \
diff --git a/arch/arm/mm/proc-v7m.S b/arch/arm/mm/proc-v7m.S
index d1e68b553d3b..89119d50f57d 100644
--- a/arch/arm/mm/proc-v7m.S
+++ b/arch/arm/mm/proc-v7m.S
@@ -135,7 +135,7 @@ __v7m_setup_stack_top:
 	string cpu_elf_name "v7m"
 	string cpu_v7m_name "ARMv7-M"
 
-	.section ".proc.info.init", #alloc, #execinstr
+	.section ".proc.info.init", #alloc
 
 	/*
 	 * Match any ARMv7-M processor core.
@@ -146,7 +146,7 @@ __v7m_proc_info:
 	.long	0x000f0000		@ Mask for ID
 	.long   0			@ proc_info_list.__cpu_mm_mmu_flags
 	.long   0			@ proc_info_list.__cpu_io_mmu_flags
-	b	__v7m_setup		@ proc_info_list.__cpu_flush
+	initfn	__v7m_setup		@ proc_info_list.__cpu_flush
 	.long	cpu_arch_name
 	.long	cpu_elf_name
 	.long	HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT
diff --git a/arch/arm/mm/proc-xsc3.S b/arch/arm/mm/proc-xsc3.S
index f8acdfece036..1b0b945126f8 100644
--- a/arch/arm/mm/proc-xsc3.S
+++ b/arch/arm/mm/proc-xsc3.S
@@ -499,7 +499,7 @@ xsc3_crval:
 
 	.align
 
-	.section ".proc.info.init", #alloc, #execinstr
+	.section ".proc.info.init", #alloc
 
 .macro xsc3_proc_info name:req, cpu_val:req, cpu_mask:req
 	.type	__\name\()_proc_info,#object
@@ -514,7 +514,7 @@ __\name\()_proc_info:
 	.long	PMD_TYPE_SECT | \
 		PMD_SECT_AP_WRITE | \
 		PMD_SECT_AP_READ
-	b	__xsc3_setup
+	initfn	__xsc3_setup
 	.long	cpu_arch_name
 	.long	cpu_elf_name
 	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP
diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S
index afa2b3c4df4a..7805cd098140 100644
--- a/arch/arm/mm/proc-xscale.S
+++ b/arch/arm/mm/proc-xscale.S
@@ -612,7 +612,7 @@ xscale_crval:
 
 	.align
 
-	.section ".proc.info.init", #alloc, #execinstr
+	.section ".proc.info.init", #alloc
 
 .macro xscale_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cache
 	.type	__\name\()_proc_info,#object
@@ -627,7 +627,7 @@ __\name\()_proc_info:
 	.long	PMD_TYPE_SECT | \
 		PMD_SECT_AP_WRITE | \
 		PMD_SECT_AP_READ
-	b	__xscale_setup
+	initfn	__xscale_setup
 	.long	cpu_arch_name
 	.long	cpu_elf_name
 	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP
-- 
1.8.3.2

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [RFT/RFC PATCH 2/6] ARM: move HYP text to end of .text section
  2015-03-12 17:38 [RFT/RFC PATCH 0/6] ARM kernel size fixes Ard Biesheuvel
  2015-03-12 17:38 ` [RFT/RFC PATCH 1/6] ARM: replace PROCINFO embedded branch with relative offset Ard Biesheuvel
@ 2015-03-12 17:38 ` Ard Biesheuvel
  2015-03-12 17:38 ` [RFT/RFC PATCH 3/6] ARM: add macro to perform far branches (b/bl) Ard Biesheuvel
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 25+ messages in thread
From: Ard Biesheuvel @ 2015-03-12 17:38 UTC (permalink / raw)
  To: linux-arm-kernel

The HYP text is essentially a separate binary from the kernel proper,
so it can be moved away from the rest of the kernel. This helps prevent
link failures due to branch relocations exceeding their range.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
 arch/arm/kernel/vmlinux.lds.S | 8 ++++++--
 arch/arm/kvm/init.S           | 5 +----
 arch/arm/kvm/interrupts.S     | 4 +---
 3 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index b31aa73e8076..e3b9403bd2d6 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -22,11 +22,14 @@
 	ALIGN_FUNCTION();						\
 	VMLINUX_SYMBOL(__idmap_text_start) = .;				\
 	*(.idmap.text)							\
-	VMLINUX_SYMBOL(__idmap_text_end) = .;				\
+	VMLINUX_SYMBOL(__idmap_text_end) = .;
+
+#define HYP_TEXT							\
 	. = ALIGN(32);							\
 	VMLINUX_SYMBOL(__hyp_idmap_text_start) = .;			\
 	*(.hyp.idmap.text)						\
-	VMLINUX_SYMBOL(__hyp_idmap_text_end) = .;
+	VMLINUX_SYMBOL(__hyp_idmap_text_end) = .;			\
+	*(.hyp.text)
 
 #ifdef CONFIG_HOTPLUG_CPU
 #define ARM_CPU_DISCARD(x)
@@ -118,6 +121,7 @@ SECTIONS
 		. = ALIGN(4);
 		*(.got)			/* Global offset table		*/
 			ARM_CPU_KEEP(PROC_INFO)
+			HYP_TEXT
 	}
 
 #ifdef CONFIG_DEBUG_RODATA
diff --git a/arch/arm/kvm/init.S b/arch/arm/kvm/init.S
index 3988e72d16ff..7a377d36de5d 100644
--- a/arch/arm/kvm/init.S
+++ b/arch/arm/kvm/init.S
@@ -51,8 +51,7 @@
  *   Switches to the runtime PGD, set stack and vectors.
  */
 
-	.text
-	.pushsection    .hyp.idmap.text,"ax"
+	.section    ".hyp.idmap.text", #alloc
 	.align 5
 __kvm_hyp_init:
 	.globl __kvm_hyp_init
@@ -155,5 +154,3 @@ target:	@ We're now in the trampoline code, switch page tables
 
 	.globl __kvm_hyp_init_end
 __kvm_hyp_init_end:
-
-	.popsection
diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
index 79caf79b304a..db22e9bedfcd 100644
--- a/arch/arm/kvm/interrupts.S
+++ b/arch/arm/kvm/interrupts.S
@@ -27,7 +27,7 @@
 #include <asm/vfpmacros.h>
 #include "interrupts_head.S"
 
-	.text
+	.section	".hyp.text", #alloc
 
 __kvm_hyp_code_start:
 	.globl __kvm_hyp_code_start
@@ -316,8 +316,6 @@ THUMB(	orr	r2, r2, #PSR_T_BIT	)
 	eret
 .endm
 
-	.text
-
 	.align 5
 __kvm_hyp_vector:
 	.globl __kvm_hyp_vector
-- 
1.8.3.2

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [RFT/RFC PATCH 3/6] ARM: add macro to perform far branches (b/bl)
  2015-03-12 17:38 [RFT/RFC PATCH 0/6] ARM kernel size fixes Ard Biesheuvel
  2015-03-12 17:38 ` [RFT/RFC PATCH 1/6] ARM: replace PROCINFO embedded branch with relative offset Ard Biesheuvel
  2015-03-12 17:38 ` [RFT/RFC PATCH 2/6] ARM: move HYP text to end of .text section Ard Biesheuvel
@ 2015-03-12 17:38 ` Ard Biesheuvel
  2015-03-12 20:32   ` Nicolas Pitre
  2015-03-12 20:56   ` Russell King - ARM Linux
  2015-03-12 17:38 ` [RFT/RFC PATCH 4/6] ARM: use bl_far to call __hyp_stub_install_secondary from the .data section Ard Biesheuvel
                   ` (2 subsequent siblings)
  5 siblings, 2 replies; 25+ messages in thread
From: Ard Biesheuvel @ 2015-03-12 17:38 UTC (permalink / raw)
  To: linux-arm-kernel

These macros execute PC-relative branches, but with a larger
reach than the 24 bits that are available in the b and bl opcodes.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
 arch/arm/include/asm/assembler.h | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index f67fd3afebdf..bd08c3c1b73f 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -108,6 +108,35 @@
 	.endm
 #endif
 
+	/*
+	 * Macros to emit relative branches that may exceed the range
+	 * of the 24-bit immediate of the ordinary b/bl instructions.
+	 * NOTE: this doesn't work with locally defined symbols, as they
+	 * might lack the ARM/Thumb annotation (even if they are annotated
+	 * as functions)
+	 */
+	.macro  b_far, target, tmpreg
+#if defined(CONFIG_CPU_32v7) || defined(CONFIG_CPU_32v7M)
+ ARM(	movt	\tmpreg, #:upper16:(\target - (8888f + 8))	)
+ ARM(	movw	\tmpreg, #:lower16:(\target - (8888f + 8))	)
+ THUMB(	movt    \tmpreg, #:upper16:(\target - (8888f + 4))	)
+ THUMB(	movw	\tmpreg, #:lower16:(\target - (8888f + 4))	)
+8888:	add	pc, pc, \tmpreg
+#else
+	ldr	\tmpreg, 8889f
+8888:	add	pc, pc, \tmpreg
+	.align 	2
+8889:
+ ARM(	.word   \target - (8888b + 8)           )
+#endif
+	.endm
+
+	.macro	bl_far, target, tmpreg=ip
+	adr	lr, 8887f
+	b_far	\target, \tmpreg
+8887:
+	.endm
+
 	.macro asm_trace_hardirqs_off
 #if defined(CONFIG_TRACE_IRQFLAGS)
 	stmdb   sp!, {r0-r3, ip, lr}
-- 
1.8.3.2

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [RFT/RFC PATCH 4/6] ARM: use bl_far to call __hyp_stub_install_secondary from the .data section
  2015-03-12 17:38 [RFT/RFC PATCH 0/6] ARM kernel size fixes Ard Biesheuvel
                   ` (2 preceding siblings ...)
  2015-03-12 17:38 ` [RFT/RFC PATCH 3/6] ARM: add macro to perform far branches (b/bl) Ard Biesheuvel
@ 2015-03-12 17:38 ` Ard Biesheuvel
  2015-03-12 17:38 ` [RFT/RFC PATCH 5/6] ARM: move the .idmap.text section closer to .head.text Ard Biesheuvel
  2015-03-12 17:38 ` [RFT/RFC PATCH 6/6] ARM: keep .text and .fixup regions together Ard Biesheuvel
  5 siblings, 0 replies; 25+ messages in thread
From: Ard Biesheuvel @ 2015-03-12 17:38 UTC (permalink / raw)
  To: linux-arm-kernel

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
 arch/arm/kernel/sleep.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S
index e1e60e5a7a27..0ea3813fedce 100644
--- a/arch/arm/kernel/sleep.S
+++ b/arch/arm/kernel/sleep.S
@@ -128,7 +128,7 @@ ENDPROC(cpu_resume_after_mmu)
 ENTRY(cpu_resume)
 ARM_BE8(setend be)			@ ensure we are in BE mode
 #ifdef CONFIG_ARM_VIRT_EXT
-	bl	__hyp_stub_install_secondary
+	bl_far	__hyp_stub_install_secondary
 #endif
 	safe_svcmode_maskall r1
 	mov	r1, #0
-- 
1.8.3.2

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [RFT/RFC PATCH 5/6] ARM: move the .idmap.text section closer to .head.text
  2015-03-12 17:38 [RFT/RFC PATCH 0/6] ARM kernel size fixes Ard Biesheuvel
                   ` (3 preceding siblings ...)
  2015-03-12 17:38 ` [RFT/RFC PATCH 4/6] ARM: use bl_far to call __hyp_stub_install_secondary from the .data section Ard Biesheuvel
@ 2015-03-12 17:38 ` Ard Biesheuvel
  2015-03-12 20:33   ` Nicolas Pitre
  2015-03-12 17:38 ` [RFT/RFC PATCH 6/6] ARM: keep .text and .fixup regions together Ard Biesheuvel
  5 siblings, 1 reply; 25+ messages in thread
From: Ard Biesheuvel @ 2015-03-12 17:38 UTC (permalink / raw)
  To: linux-arm-kernel

This moves the .idmap.text section closer to .head.text, so that
relative branches are less likely to go out of range if the kernel
text gets bigger.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
 arch/arm/kernel/vmlinux.lds.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index e3b9403bd2d6..2e7b2220ef5f 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -103,6 +103,7 @@ SECTIONS
 
 	.text : {			/* Real text segment		*/
 		_stext = .;		/* Text and read-only data	*/
+			IDMAP_TEXT
 			__exception_text_start = .;
 			*(.exception.text)
 			__exception_text_end = .;
@@ -111,7 +112,6 @@ SECTIONS
 			SCHED_TEXT
 			LOCK_TEXT
 			KPROBES_TEXT
-			IDMAP_TEXT
 #ifdef CONFIG_MMU
 			*(.fixup)
 #endif
-- 
1.8.3.2

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [RFT/RFC PATCH 6/6] ARM: keep .text and .fixup regions together
  2015-03-12 17:38 [RFT/RFC PATCH 0/6] ARM kernel size fixes Ard Biesheuvel
                   ` (4 preceding siblings ...)
  2015-03-12 17:38 ` [RFT/RFC PATCH 5/6] ARM: move the .idmap.text section closer to .head.text Ard Biesheuvel
@ 2015-03-12 17:38 ` Ard Biesheuvel
  2015-03-12 20:34   ` Nicolas Pitre
  2015-03-12 21:10   ` Russell King - ARM Linux
  5 siblings, 2 replies; 25+ messages in thread
From: Ard Biesheuvel @ 2015-03-12 17:38 UTC (permalink / raw)
  To: linux-arm-kernel

Fixup snippets are put into a dedicated section so that they don't
bloat cache lines with instructions that are usually not executed.
But there is no reason to put all these snippets together at the far
end of the .text output region, where the branch instruction they
contain could go out of range if the kernel grows in size.

Instead, emit .text and .fixup regions together for each input object.
They should still be out of the way, but not so far that they go out
of range.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---

Note that the TEXT_TEXT macro will emit *(.text) again but this should be
harmless.

 arch/arm/kernel/vmlinux.lds.S | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index 2e7b2220ef5f..01630c38fd6c 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -108,13 +108,13 @@ SECTIONS
 			*(.exception.text)
 			__exception_text_end = .;
 			IRQENTRY_TEXT
+#ifdef CONFIG_MMU
+			*(.text .fixup)
+#endif
 			TEXT_TEXT
 			SCHED_TEXT
 			LOCK_TEXT
 			KPROBES_TEXT
-#ifdef CONFIG_MMU
-			*(.fixup)
-#endif
 			*(.gnu.warning)
 			*(.glue_7)
 			*(.glue_7t)
-- 
1.8.3.2

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [RFT/RFC PATCH 1/6] ARM: replace PROCINFO embedded branch with relative offset
  2015-03-12 17:38 ` [RFT/RFC PATCH 1/6] ARM: replace PROCINFO embedded branch with relative offset Ard Biesheuvel
@ 2015-03-12 20:24   ` Nicolas Pitre
  2015-03-12 20:50   ` Russell King - ARM Linux
  1 sibling, 0 replies; 25+ messages in thread
From: Nicolas Pitre @ 2015-03-12 20:24 UTC (permalink / raw)
  To: linux-arm-kernel

On Thu, 12 Mar 2015, Ard Biesheuvel wrote:

> This patch replaces the 'branch to setup()' instructions embedded
> in the PROCINFO structs with the offset to that setup function
> relative to the base of the struct. This preserves the position
> independent nature of that field, but uses a data item rather
> than an instruction.
> 
> This is mainly done to prevent linker failures on large kernels,
> where the setup function is out of reach for the branch.

Looks fine to me.

Acked-by: Nicolas Pitre <nico@linaro.org>

> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
> ---
>  arch/arm/kernel/head.S      | 14 ++++++++------
>  arch/arm/mm/proc-arm1020.S  |  4 ++--
>  arch/arm/mm/proc-arm1020e.S |  4 ++--
>  arch/arm/mm/proc-arm1022.S  |  4 ++--
>  arch/arm/mm/proc-arm1026.S  |  4 ++--
>  arch/arm/mm/proc-arm720.S   |  4 ++--
>  arch/arm/mm/proc-arm740.S   |  4 ++--
>  arch/arm/mm/proc-arm7tdmi.S |  4 ++--
>  arch/arm/mm/proc-arm920.S   |  4 ++--
>  arch/arm/mm/proc-arm922.S   |  4 ++--
>  arch/arm/mm/proc-arm925.S   |  4 ++--
>  arch/arm/mm/proc-arm926.S   |  4 ++--
>  arch/arm/mm/proc-arm940.S   |  4 ++--
>  arch/arm/mm/proc-arm946.S   |  4 ++--
>  arch/arm/mm/proc-arm9tdmi.S |  4 ++--
>  arch/arm/mm/proc-fa526.S    |  4 ++--
>  arch/arm/mm/proc-feroceon.S |  5 +++--
>  arch/arm/mm/proc-macros.S   |  4 ++++
>  arch/arm/mm/proc-mohawk.S   |  4 ++--
>  arch/arm/mm/proc-sa110.S    |  4 ++--
>  arch/arm/mm/proc-sa1100.S   |  4 ++--
>  arch/arm/mm/proc-v6.S       |  4 ++--
>  arch/arm/mm/proc-v7.S       |  4 ++--
>  arch/arm/mm/proc-v7m.S      |  4 ++--
>  arch/arm/mm/proc-xsc3.S     |  4 ++--
>  arch/arm/mm/proc-xscale.S   |  4 ++--
>  26 files changed, 61 insertions(+), 54 deletions(-)
> 
> diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
> index 01963273c07a..698b38bfca8f 100644
> --- a/arch/arm/kernel/head.S
> +++ b/arch/arm/kernel/head.S
> @@ -138,8 +138,9 @@ ENTRY(stext)
>  						@ mmu has been enabled
>  	adr	lr, BSYM(1f)			@ return (PIC) address
>  	mov	r8, r4				@ set TTBR1 to swapper_pg_dir
> - ARM(	add	pc, r10, #PROCINFO_INITFUNC	)
> - THUMB(	add	r12, r10, #PROCINFO_INITFUNC	)
> +	ldr	r12, [r10, #PROCINFO_INITFUNC]
> + ARM(	add	pc, r12, r10			)
> + THUMB(	add	r12, r12, r10			)
>   THUMB(	ret	r12				)
>  1:	b	__enable_mmu
>  ENDPROC(stext)
> @@ -386,10 +387,11 @@ ENTRY(secondary_startup)
>  	ldr	r8, [r7, lr]			@ get secondary_data.swapper_pg_dir
>  	adr	lr, BSYM(__enable_mmu)		@ return address
>  	mov	r13, r12			@ __secondary_switched address
> - ARM(	add	pc, r10, #PROCINFO_INITFUNC	) @ initialise processor
> -						  @ (return control reg)
> - THUMB(	add	r12, r10, #PROCINFO_INITFUNC	)
> - THUMB(	ret	r12				)
> +	ldr	r12, [r10, #PROCINFO_INITFUNC]
> + ARM(	add	pc, r12, r10		)	@ initialise processor
> +						@ (return control reg)
> + THUMB(	add	r12, r12, r10		)
> + THUMB(	ret	r12			)
>  ENDPROC(secondary_startup)
>  ENDPROC(secondary_startup_arm)
>  
> diff --git a/arch/arm/mm/proc-arm1020.S b/arch/arm/mm/proc-arm1020.S
> index 86ee5d47ce3c..7ddd45d6ca52 100644
> --- a/arch/arm/mm/proc-arm1020.S
> +++ b/arch/arm/mm/proc-arm1020.S
> @@ -507,7 +507,7 @@ cpu_arm1020_name:
>  
>  	.align
>  
> -	.section ".proc.info.init", #alloc, #execinstr
> +	.section ".proc.info.init", #alloc
>  
>  	.type	__arm1020_proc_info,#object
>  __arm1020_proc_info:
> @@ -519,7 +519,7 @@ __arm1020_proc_info:
>  	.long   PMD_TYPE_SECT | \
>  		PMD_SECT_AP_WRITE | \
>  		PMD_SECT_AP_READ
> -	b	__arm1020_setup
> +	initfn	__arm1020_setup
>  	.long	cpu_arch_name
>  	.long	cpu_elf_name
>  	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB
> diff --git a/arch/arm/mm/proc-arm1020e.S b/arch/arm/mm/proc-arm1020e.S
> index a6331d78601f..d15f556782a8 100644
> --- a/arch/arm/mm/proc-arm1020e.S
> +++ b/arch/arm/mm/proc-arm1020e.S
> @@ -465,7 +465,7 @@ arm1020e_crval:
>  
>  	.align
>  
> -	.section ".proc.info.init", #alloc, #execinstr
> +	.section ".proc.info.init", #alloc
>  
>  	.type	__arm1020e_proc_info,#object
>  __arm1020e_proc_info:
> @@ -479,7 +479,7 @@ __arm1020e_proc_info:
>  		PMD_BIT4 | \
>  		PMD_SECT_AP_WRITE | \
>  		PMD_SECT_AP_READ
> -	b	__arm1020e_setup
> +	initfn	__arm1020e_setup
>  	.long	cpu_arch_name
>  	.long	cpu_elf_name
>  	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB | HWCAP_EDSP
> diff --git a/arch/arm/mm/proc-arm1022.S b/arch/arm/mm/proc-arm1022.S
> index a126b7a59928..6245e422e95c 100644
> --- a/arch/arm/mm/proc-arm1022.S
> +++ b/arch/arm/mm/proc-arm1022.S
> @@ -448,7 +448,7 @@ arm1022_crval:
>  
>  	.align
>  
> -	.section ".proc.info.init", #alloc, #execinstr
> +	.section ".proc.info.init", #alloc
>  
>  	.type	__arm1022_proc_info,#object
>  __arm1022_proc_info:
> @@ -462,7 +462,7 @@ __arm1022_proc_info:
>  		PMD_BIT4 | \
>  		PMD_SECT_AP_WRITE | \
>  		PMD_SECT_AP_READ
> -	b	__arm1022_setup
> +	initfn	__arm1022_setup
>  	.long	cpu_arch_name
>  	.long	cpu_elf_name
>  	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB | HWCAP_EDSP
> diff --git a/arch/arm/mm/proc-arm1026.S b/arch/arm/mm/proc-arm1026.S
> index fc294067e977..944114b566ef 100644
> --- a/arch/arm/mm/proc-arm1026.S
> +++ b/arch/arm/mm/proc-arm1026.S
> @@ -442,7 +442,7 @@ arm1026_crval:
>  	string	cpu_arm1026_name, "ARM1026EJ-S"
>  	.align
>  
> -	.section ".proc.info.init", #alloc, #execinstr
> +	.section ".proc.info.init", #alloc
>  
>  	.type	__arm1026_proc_info,#object
>  __arm1026_proc_info:
> @@ -456,7 +456,7 @@ __arm1026_proc_info:
>  		PMD_BIT4 | \
>  		PMD_SECT_AP_WRITE | \
>  		PMD_SECT_AP_READ
> -	b	__arm1026_setup
> +	initfn	__arm1026_setup
>  	.long	cpu_arch_name
>  	.long	cpu_elf_name
>  	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP|HWCAP_JAVA
> diff --git a/arch/arm/mm/proc-arm720.S b/arch/arm/mm/proc-arm720.S
> index 2baa66b3ac9b..4bd5f8dcbc29 100644
> --- a/arch/arm/mm/proc-arm720.S
> +++ b/arch/arm/mm/proc-arm720.S
> @@ -186,7 +186,7 @@ arm720_crval:
>   * See <asm/procinfo.h> for a definition of this structure.
>   */
>  	
> -		.section ".proc.info.init", #alloc, #execinstr
> +		.section ".proc.info.init", #alloc
>  
>  .macro arm720_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cpu_flush:req
>  		.type	__\name\()_proc_info,#object
> @@ -203,7 +203,7 @@ __\name\()_proc_info:
>  			PMD_BIT4 | \
>  			PMD_SECT_AP_WRITE | \
>  			PMD_SECT_AP_READ
> -		b	\cpu_flush				@ cpu_flush
> +		initfn	\cpu_flush				@ cpu_flush
>  		.long	cpu_arch_name				@ arch_name
>  		.long	cpu_elf_name				@ elf_name
>  		.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB	@ elf_hwcap
> diff --git a/arch/arm/mm/proc-arm740.S b/arch/arm/mm/proc-arm740.S
> index ac1ea6b3bce4..1dadba6744ec 100644
> --- a/arch/arm/mm/proc-arm740.S
> +++ b/arch/arm/mm/proc-arm740.S
> @@ -132,14 +132,14 @@ __arm740_setup:
>  
>  	.align
>  
> -	.section ".proc.info.init", #alloc, #execinstr
> +	.section ".proc.info.init", #alloc
>  	.type	__arm740_proc_info,#object
>  __arm740_proc_info:
>  	.long	0x41807400
>  	.long	0xfffffff0
>  	.long	0
>  	.long	0
> -	b	__arm740_setup
> +	initfn	__arm740_setup
>  	.long	cpu_arch_name
>  	.long	cpu_elf_name
>  	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB | HWCAP_26BIT
> diff --git a/arch/arm/mm/proc-arm7tdmi.S b/arch/arm/mm/proc-arm7tdmi.S
> index bf6ba4bc30ff..c322a416cb9a 100644
> --- a/arch/arm/mm/proc-arm7tdmi.S
> +++ b/arch/arm/mm/proc-arm7tdmi.S
> @@ -76,7 +76,7 @@ __arm7tdmi_setup:
>  
>  		.align
>  
> -		.section ".proc.info.init", #alloc, #execinstr
> +		.section ".proc.info.init", #alloc
>  
>  .macro arm7tdmi_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, \
>  	extra_hwcaps=0
> @@ -86,7 +86,7 @@ __\name\()_proc_info:
>  		.long	\cpu_mask
>  		.long	0
>  		.long	0
> -		b	__arm7tdmi_setup
> +		initfn	__arm7tdmi_setup
>  		.long	cpu_arch_name
>  		.long	cpu_elf_name
>  		.long	HWCAP_SWP | HWCAP_26BIT | ( \extra_hwcaps )
> diff --git a/arch/arm/mm/proc-arm920.S b/arch/arm/mm/proc-arm920.S
> index 22bf8dde4f84..f129bdede5cc 100644
> --- a/arch/arm/mm/proc-arm920.S
> +++ b/arch/arm/mm/proc-arm920.S
> @@ -448,7 +448,7 @@ arm920_crval:
>  
>  	.align
>  
> -	.section ".proc.info.init", #alloc, #execinstr
> +	.section ".proc.info.init", #alloc
>  
>  	.type	__arm920_proc_info,#object
>  __arm920_proc_info:
> @@ -464,7 +464,7 @@ __arm920_proc_info:
>  		PMD_BIT4 | \
>  		PMD_SECT_AP_WRITE | \
>  		PMD_SECT_AP_READ
> -	b	__arm920_setup
> +	initfn	__arm920_setup
>  	.long	cpu_arch_name
>  	.long	cpu_elf_name
>  	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB
> diff --git a/arch/arm/mm/proc-arm922.S b/arch/arm/mm/proc-arm922.S
> index 0c6d5ac5a6d4..53b5bddb0ee9 100644
> --- a/arch/arm/mm/proc-arm922.S
> +++ b/arch/arm/mm/proc-arm922.S
> @@ -426,7 +426,7 @@ arm922_crval:
>  
>  	.align
>  
> -	.section ".proc.info.init", #alloc, #execinstr
> +	.section ".proc.info.init", #alloc
>  
>  	.type	__arm922_proc_info,#object
>  __arm922_proc_info:
> @@ -442,7 +442,7 @@ __arm922_proc_info:
>  		PMD_BIT4 | \
>  		PMD_SECT_AP_WRITE | \
>  		PMD_SECT_AP_READ
> -	b	__arm922_setup
> +	initfn	__arm922_setup
>  	.long	cpu_arch_name
>  	.long	cpu_elf_name
>  	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB
> diff --git a/arch/arm/mm/proc-arm925.S b/arch/arm/mm/proc-arm925.S
> index c32d073282ea..94a88a734210 100644
> --- a/arch/arm/mm/proc-arm925.S
> +++ b/arch/arm/mm/proc-arm925.S
> @@ -494,7 +494,7 @@ arm925_crval:
>  
>  	.align
>  
> -	.section ".proc.info.init", #alloc, #execinstr
> +	.section ".proc.info.init", #alloc
>  
>  .macro arm925_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cache
>  	.type	__\name\()_proc_info,#object
> @@ -510,7 +510,7 @@ __\name\()_proc_info:
>  		PMD_BIT4 | \
>  		PMD_SECT_AP_WRITE | \
>  		PMD_SECT_AP_READ
> -	b	__arm925_setup
> +	initfn	__arm925_setup
>  	.long	cpu_arch_name
>  	.long	cpu_elf_name
>  	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB
> diff --git a/arch/arm/mm/proc-arm926.S b/arch/arm/mm/proc-arm926.S
> index 252b2503038d..0fe423de5118 100644
> --- a/arch/arm/mm/proc-arm926.S
> +++ b/arch/arm/mm/proc-arm926.S
> @@ -474,7 +474,7 @@ arm926_crval:
>  
>  	.align
>  
> -	.section ".proc.info.init", #alloc, #execinstr
> +	.section ".proc.info.init", #alloc
>  
>  	.type	__arm926_proc_info,#object
>  __arm926_proc_info:
> @@ -490,7 +490,7 @@ __arm926_proc_info:
>  		PMD_BIT4 | \
>  		PMD_SECT_AP_WRITE | \
>  		PMD_SECT_AP_READ
> -	b	__arm926_setup
> +	initfn	__arm926_setup
>  	.long	cpu_arch_name
>  	.long	cpu_elf_name
>  	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP|HWCAP_JAVA
> diff --git a/arch/arm/mm/proc-arm940.S b/arch/arm/mm/proc-arm940.S
> index e5212d489377..7ad2642dfcda 100644
> --- a/arch/arm/mm/proc-arm940.S
> +++ b/arch/arm/mm/proc-arm940.S
> @@ -354,14 +354,14 @@ __arm940_setup:
>  
>  	.align
>  
> -	.section ".proc.info.init", #alloc, #execinstr
> +	.section ".proc.info.init", #alloc
>  
>  	.type	__arm940_proc_info,#object
>  __arm940_proc_info:
>  	.long	0x41009400
>  	.long	0xff00fff0
>  	.long	0
> -	b	__arm940_setup
> +	initfn	__arm940_setup
>  	.long	cpu_arch_name
>  	.long	cpu_elf_name
>  	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB
> diff --git a/arch/arm/mm/proc-arm946.S b/arch/arm/mm/proc-arm946.S
> index b3dd9b2d0b8e..f129dcadb8e8 100644
> --- a/arch/arm/mm/proc-arm946.S
> +++ b/arch/arm/mm/proc-arm946.S
> @@ -409,14 +409,14 @@ __arm946_setup:
>  
>  	.align
>  
> -	.section ".proc.info.init", #alloc, #execinstr
> +	.section ".proc.info.init", #alloc
>  	.type	__arm946_proc_info,#object
>  __arm946_proc_info:
>  	.long	0x41009460
>  	.long	0xff00fff0
>  	.long	0
>  	.long	0
> -	b	__arm946_setup
> +	initfn	__arm946_setup
>  	.long	cpu_arch_name
>  	.long	cpu_elf_name
>  	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB
> diff --git a/arch/arm/mm/proc-arm9tdmi.S b/arch/arm/mm/proc-arm9tdmi.S
> index 8227322bbb8f..a8828b63a981 100644
> --- a/arch/arm/mm/proc-arm9tdmi.S
> +++ b/arch/arm/mm/proc-arm9tdmi.S
> @@ -70,7 +70,7 @@ __arm9tdmi_setup:
>  
>  		.align
>  
> -		.section ".proc.info.init", #alloc, #execinstr
> +		.section ".proc.info.init", #alloc
>  
>  .macro arm9tdmi_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req
>  		.type	__\name\()_proc_info, #object
> @@ -79,7 +79,7 @@ __\name\()_proc_info:
>  		.long	\cpu_mask
>  		.long	0
>  		.long	0
> -		b	__arm9tdmi_setup
> +		initfn	__arm9tdmi_setup
>  		.long	cpu_arch_name
>  		.long	cpu_elf_name
>  		.long	HWCAP_SWP | HWCAP_THUMB | HWCAP_26BIT
> diff --git a/arch/arm/mm/proc-fa526.S b/arch/arm/mm/proc-fa526.S
> index c494886892ba..afb100b96081 100644
> --- a/arch/arm/mm/proc-fa526.S
> +++ b/arch/arm/mm/proc-fa526.S
> @@ -190,7 +190,7 @@ fa526_cr1_set:
>  
>  	.align
>  
> -	.section ".proc.info.init", #alloc, #execinstr
> +	.section ".proc.info.init", #alloc
>  
>  	.type	__fa526_proc_info,#object
>  __fa526_proc_info:
> @@ -206,7 +206,7 @@ __fa526_proc_info:
>  		PMD_BIT4 | \
>  		PMD_SECT_AP_WRITE | \
>  		PMD_SECT_AP_READ
> -	b	__fa526_setup
> +	initfn	__fa526_setup
>  	.long	cpu_arch_name
>  	.long	cpu_elf_name
>  	.long	HWCAP_SWP | HWCAP_HALF
> diff --git a/arch/arm/mm/proc-feroceon.S b/arch/arm/mm/proc-feroceon.S
> index 03a1b75f2e16..5bd769a2bbbe 100644
> --- a/arch/arm/mm/proc-feroceon.S
> +++ b/arch/arm/mm/proc-feroceon.S
> @@ -584,7 +584,7 @@ feroceon_crval:
>  
>  	.align
>  
> -	.section ".proc.info.init", #alloc, #execinstr
> +	.section ".proc.info.init", #alloc
>  
>  .macro feroceon_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cache:req
>  	.type	__\name\()_proc_info,#object
> @@ -601,7 +601,8 @@ __\name\()_proc_info:
>  		PMD_BIT4 | \
>  		PMD_SECT_AP_WRITE | \
>  		PMD_SECT_AP_READ
> -	b	__feroceon_setup
> +	initfn	__feroceon_setup
> +	.long __feroceon_setup
>  	.long	cpu_arch_name
>  	.long	cpu_elf_name
>  	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP
> diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S
> index 082b9f2f7e90..5bed45aeac40 100644
> --- a/arch/arm/mm/proc-macros.S
> +++ b/arch/arm/mm/proc-macros.S
> @@ -331,3 +331,7 @@ ENTRY(\name\()_tlb_fns)
>  	.globl	\x
>  	.equ	\x, \y
>  .endm
> +
> +.macro	initfn, initfunc
> +	.long	\initfunc - . + PROCINFO_INITFUNC
> +.endm
> diff --git a/arch/arm/mm/proc-mohawk.S b/arch/arm/mm/proc-mohawk.S
> index 53d393455f13..a15a2c150460 100644
> --- a/arch/arm/mm/proc-mohawk.S
> +++ b/arch/arm/mm/proc-mohawk.S
> @@ -427,7 +427,7 @@ mohawk_crval:
>  
>  	.align
>  
> -	.section ".proc.info.init", #alloc, #execinstr
> +	.section ".proc.info.init", #alloc
>  
>  	.type	__88sv331x_proc_info,#object
>  __88sv331x_proc_info:
> @@ -443,7 +443,7 @@ __88sv331x_proc_info:
>  		PMD_BIT4 | \
>  		PMD_SECT_AP_WRITE | \
>  		PMD_SECT_AP_READ
> -	b	__mohawk_setup
> +	initfn	__mohawk_setup
>  	.long	cpu_arch_name
>  	.long	cpu_elf_name
>  	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP
> diff --git a/arch/arm/mm/proc-sa110.S b/arch/arm/mm/proc-sa110.S
> index 8008a0461cf5..4668067e7a08 100644
> --- a/arch/arm/mm/proc-sa110.S
> +++ b/arch/arm/mm/proc-sa110.S
> @@ -199,7 +199,7 @@ sa110_crval:
>  
>  	.align
>  
> -	.section ".proc.info.init", #alloc, #execinstr
> +	.section ".proc.info.init", #alloc
>  
>  	.type	__sa110_proc_info,#object
>  __sa110_proc_info:
> @@ -213,7 +213,7 @@ __sa110_proc_info:
>  	.long   PMD_TYPE_SECT | \
>  		PMD_SECT_AP_WRITE | \
>  		PMD_SECT_AP_READ
> -	b	__sa110_setup
> +	initfn	__sa110_setup
>  	.long	cpu_arch_name
>  	.long	cpu_elf_name
>  	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_26BIT | HWCAP_FAST_MULT
> diff --git a/arch/arm/mm/proc-sa1100.S b/arch/arm/mm/proc-sa1100.S
> index 89f97ac648a9..4acd3cbaa7fe 100644
> --- a/arch/arm/mm/proc-sa1100.S
> +++ b/arch/arm/mm/proc-sa1100.S
> @@ -242,7 +242,7 @@ sa1100_crval:
>  
>  	.align
>  
> -	.section ".proc.info.init", #alloc, #execinstr
> +	.section ".proc.info.init", #alloc
>  
>  .macro sa1100_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req
>  	.type	__\name\()_proc_info,#object
> @@ -257,7 +257,7 @@ __\name\()_proc_info:
>  	.long   PMD_TYPE_SECT | \
>  		PMD_SECT_AP_WRITE | \
>  		PMD_SECT_AP_READ
> -	b	__sa1100_setup
> +	initfn	__sa1100_setup
>  	.long	cpu_arch_name
>  	.long	cpu_elf_name
>  	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_26BIT | HWCAP_FAST_MULT
> diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S
> index d0390f4b3f18..5c05a2948dfa 100644
> --- a/arch/arm/mm/proc-v6.S
> +++ b/arch/arm/mm/proc-v6.S
> @@ -264,7 +264,7 @@ v6_crval:
>  	string	cpu_elf_name, "v6"
>  	.align
>  
> -	.section ".proc.info.init", #alloc, #execinstr
> +	.section ".proc.info.init", #alloc
>  
>  	/*
>  	 * Match any ARMv6 processor core.
> @@ -287,7 +287,7 @@ __v6_proc_info:
>  		PMD_SECT_XN | \
>  		PMD_SECT_AP_WRITE | \
>  		PMD_SECT_AP_READ
> -	b	__v6_setup
> +	initfn	__v6_setup
>  	.long	cpu_arch_name
>  	.long	cpu_elf_name
>  	/* See also feat_v6_fixup() for HWCAP_TLS */
> diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
> index 8b4ee5e81c14..eda1eba132c6 100644
> --- a/arch/arm/mm/proc-v7.S
> +++ b/arch/arm/mm/proc-v7.S
> @@ -462,7 +462,7 @@ __v7_setup_stack:
>  	string	cpu_elf_name, "v7"
>  	.align
>  
> -	.section ".proc.info.init", #alloc, #execinstr
> +	.section ".proc.info.init", #alloc
>  
>  	/*
>  	 * Standard v7 proc info content
> @@ -474,7 +474,7 @@ __v7_setup_stack:
>  			PMD_SECT_AF | PMD_FLAGS_UP | \mm_mmuflags)
>  	.long	PMD_TYPE_SECT | PMD_SECT_AP_WRITE | \
>  		PMD_SECT_AP_READ | PMD_SECT_AF | \io_mmuflags
> -	W(b)	\initfunc
> +	initfn	\initfunc
>  	.long	cpu_arch_name
>  	.long	cpu_elf_name
>  	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB | HWCAP_FAST_MULT | \
> diff --git a/arch/arm/mm/proc-v7m.S b/arch/arm/mm/proc-v7m.S
> index d1e68b553d3b..89119d50f57d 100644
> --- a/arch/arm/mm/proc-v7m.S
> +++ b/arch/arm/mm/proc-v7m.S
> @@ -135,7 +135,7 @@ __v7m_setup_stack_top:
>  	string cpu_elf_name "v7m"
>  	string cpu_v7m_name "ARMv7-M"
>  
> -	.section ".proc.info.init", #alloc, #execinstr
> +	.section ".proc.info.init", #alloc
>  
>  	/*
>  	 * Match any ARMv7-M processor core.
> @@ -146,7 +146,7 @@ __v7m_proc_info:
>  	.long	0x000f0000		@ Mask for ID
>  	.long   0			@ proc_info_list.__cpu_mm_mmu_flags
>  	.long   0			@ proc_info_list.__cpu_io_mmu_flags
> -	b	__v7m_setup		@ proc_info_list.__cpu_flush
> +	initfn	__v7m_setup		@ proc_info_list.__cpu_flush
>  	.long	cpu_arch_name
>  	.long	cpu_elf_name
>  	.long	HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT
> diff --git a/arch/arm/mm/proc-xsc3.S b/arch/arm/mm/proc-xsc3.S
> index f8acdfece036..1b0b945126f8 100644
> --- a/arch/arm/mm/proc-xsc3.S
> +++ b/arch/arm/mm/proc-xsc3.S
> @@ -499,7 +499,7 @@ xsc3_crval:
>  
>  	.align
>  
> -	.section ".proc.info.init", #alloc, #execinstr
> +	.section ".proc.info.init", #alloc
>  
>  .macro xsc3_proc_info name:req, cpu_val:req, cpu_mask:req
>  	.type	__\name\()_proc_info,#object
> @@ -514,7 +514,7 @@ __\name\()_proc_info:
>  	.long	PMD_TYPE_SECT | \
>  		PMD_SECT_AP_WRITE | \
>  		PMD_SECT_AP_READ
> -	b	__xsc3_setup
> +	initfn	__xsc3_setup
>  	.long	cpu_arch_name
>  	.long	cpu_elf_name
>  	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP
> diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S
> index afa2b3c4df4a..7805cd098140 100644
> --- a/arch/arm/mm/proc-xscale.S
> +++ b/arch/arm/mm/proc-xscale.S
> @@ -612,7 +612,7 @@ xscale_crval:
>  
>  	.align
>  
> -	.section ".proc.info.init", #alloc, #execinstr
> +	.section ".proc.info.init", #alloc
>  
>  .macro xscale_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cache
>  	.type	__\name\()_proc_info,#object
> @@ -627,7 +627,7 @@ __\name\()_proc_info:
>  	.long	PMD_TYPE_SECT | \
>  		PMD_SECT_AP_WRITE | \
>  		PMD_SECT_AP_READ
> -	b	__xscale_setup
> +	initfn	__xscale_setup
>  	.long	cpu_arch_name
>  	.long	cpu_elf_name
>  	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP
> -- 
> 1.8.3.2
> 
> 

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [RFT/RFC PATCH 3/6] ARM: add macro to perform far branches (b/bl)
  2015-03-12 17:38 ` [RFT/RFC PATCH 3/6] ARM: add macro to perform far branches (b/bl) Ard Biesheuvel
@ 2015-03-12 20:32   ` Nicolas Pitre
  2015-03-12 20:36     ` Ard Biesheuvel
  2015-03-12 20:56   ` Russell King - ARM Linux
  1 sibling, 1 reply; 25+ messages in thread
From: Nicolas Pitre @ 2015-03-12 20:32 UTC (permalink / raw)
  To: linux-arm-kernel

On Thu, 12 Mar 2015, Ard Biesheuvel wrote:

> These macros execute PC-relative branches, but with a larger
> reach than the 24 bits that are available in the b and bl opcodes.
> 
> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
> ---
>  arch/arm/include/asm/assembler.h | 29 +++++++++++++++++++++++++++++
>  1 file changed, 29 insertions(+)
> 
> diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
> index f67fd3afebdf..bd08c3c1b73f 100644
> --- a/arch/arm/include/asm/assembler.h
> +++ b/arch/arm/include/asm/assembler.h
> @@ -108,6 +108,35 @@
>  	.endm
>  #endif
>  
> +	/*
> +	 * Macros to emit relative branches that may exceed the range
> +	 * of the 24-bit immediate of the ordinary b/bl instructions.
> +	 * NOTE: this doesn't work with locally defined symbols, as they
> +	 * might lack the ARM/Thumb annotation (even if they are annotated
> +	 * as functions)

I really hope you won't need a far call with local symbols ever!

> +	 */
> +	.macro  b_far, target, tmpreg
> +#if defined(CONFIG_CPU_32v7) || defined(CONFIG_CPU_32v7M)
> + ARM(	movt	\tmpreg, #:upper16:(\target - (8888f + 8))	)
> + ARM(	movw	\tmpreg, #:lower16:(\target - (8888f + 8))	)
> + THUMB(	movt    \tmpreg, #:upper16:(\target - (8888f + 4))	)
> + THUMB(	movw	\tmpreg, #:lower16:(\target - (8888f + 4))	)
> +8888:	add	pc, pc, \tmpreg
> +#else
> +	ldr	\tmpreg, 8889f
> +8888:	add	pc, pc, \tmpreg
> +	.align 	2
> +8889:
> + ARM(	.word   \target - (8888b + 8)           )

The Thumb relocation value is missing here.

> +#endif
> +	.endm
> +
> +	.macro	bl_far, target, tmpreg=ip
> +	adr	lr, 8887f
> +	b_far	\target, \tmpreg
> +8887:
> +	.endm
> +
>  	.macro asm_trace_hardirqs_off
>  #if defined(CONFIG_TRACE_IRQFLAGS)
>  	stmdb   sp!, {r0-r3, ip, lr}
> -- 
> 1.8.3.2
> 
> 

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [RFT/RFC PATCH 5/6] ARM: move the .idmap.text section closer to .head.text
  2015-03-12 17:38 ` [RFT/RFC PATCH 5/6] ARM: move the .idmap.text section closer to .head.text Ard Biesheuvel
@ 2015-03-12 20:33   ` Nicolas Pitre
  0 siblings, 0 replies; 25+ messages in thread
From: Nicolas Pitre @ 2015-03-12 20:33 UTC (permalink / raw)
  To: linux-arm-kernel

On Thu, 12 Mar 2015, Ard Biesheuvel wrote:

> This moves the .idmap.text section closer to .head.text, so that
> relative branches are less likely to go out of range if the kernel
> text gets bigger.
> 
> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>

Acked-by: Nicolas Pitre <nico@linaro.org>

> ---
>  arch/arm/kernel/vmlinux.lds.S | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
> index e3b9403bd2d6..2e7b2220ef5f 100644
> --- a/arch/arm/kernel/vmlinux.lds.S
> +++ b/arch/arm/kernel/vmlinux.lds.S
> @@ -103,6 +103,7 @@ SECTIONS
>  
>  	.text : {			/* Real text segment		*/
>  		_stext = .;		/* Text and read-only data	*/
> +			IDMAP_TEXT
>  			__exception_text_start = .;
>  			*(.exception.text)
>  			__exception_text_end = .;
> @@ -111,7 +112,6 @@ SECTIONS
>  			SCHED_TEXT
>  			LOCK_TEXT
>  			KPROBES_TEXT
> -			IDMAP_TEXT
>  #ifdef CONFIG_MMU
>  			*(.fixup)
>  #endif
> -- 
> 1.8.3.2
> 
> 

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [RFT/RFC PATCH 6/6] ARM: keep .text and .fixup regions together
  2015-03-12 17:38 ` [RFT/RFC PATCH 6/6] ARM: keep .text and .fixup regions together Ard Biesheuvel
@ 2015-03-12 20:34   ` Nicolas Pitre
  2015-03-12 21:10   ` Russell King - ARM Linux
  1 sibling, 0 replies; 25+ messages in thread
From: Nicolas Pitre @ 2015-03-12 20:34 UTC (permalink / raw)
  To: linux-arm-kernel

On Thu, 12 Mar 2015, Ard Biesheuvel wrote:

> Fixup snippets are put into a dedicated section so that they don't
> bloat cache lines with instructions that are usually not executed.
> But there is no reason to put all these snippets together at the far
> end of the .text output region, where the branch instruction they
> contain could go out of range if the kernel grows in size.
> 
> Instead, emit .text and .fixup regions together for each input object.
> They should still be out of the way, but not so far that they go out
> of range.
> 
> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>

Acked-by: Nicolas Pitre <nico@linaro.org>

> ---
> 
> Note that the TEXT_TEXT macro will emit *(.text) again but this should be
> harmless.
> 
>  arch/arm/kernel/vmlinux.lds.S | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
> index 2e7b2220ef5f..01630c38fd6c 100644
> --- a/arch/arm/kernel/vmlinux.lds.S
> +++ b/arch/arm/kernel/vmlinux.lds.S
> @@ -108,13 +108,13 @@ SECTIONS
>  			*(.exception.text)
>  			__exception_text_end = .;
>  			IRQENTRY_TEXT
> +#ifdef CONFIG_MMU
> +			*(.text .fixup)
> +#endif
>  			TEXT_TEXT
>  			SCHED_TEXT
>  			LOCK_TEXT
>  			KPROBES_TEXT
> -#ifdef CONFIG_MMU
> -			*(.fixup)
> -#endif
>  			*(.gnu.warning)
>  			*(.glue_7)
>  			*(.glue_7t)
> -- 
> 1.8.3.2
> 
> 

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [RFT/RFC PATCH 3/6] ARM: add macro to perform far branches (b/bl)
  2015-03-12 20:32   ` Nicolas Pitre
@ 2015-03-12 20:36     ` Ard Biesheuvel
  2015-03-12 21:03       ` Nicolas Pitre
  0 siblings, 1 reply; 25+ messages in thread
From: Ard Biesheuvel @ 2015-03-12 20:36 UTC (permalink / raw)
  To: linux-arm-kernel

On 12 March 2015 at 21:32, Nicolas Pitre <nicolas.pitre@linaro.org> wrote:
> On Thu, 12 Mar 2015, Ard Biesheuvel wrote:
>
>> These macros execute PC-relative branches, but with a larger
>> reach than the 24 bits that are available in the b and bl opcodes.
>>
>> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
>> ---
>>  arch/arm/include/asm/assembler.h | 29 +++++++++++++++++++++++++++++
>>  1 file changed, 29 insertions(+)
>>
>> diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
>> index f67fd3afebdf..bd08c3c1b73f 100644
>> --- a/arch/arm/include/asm/assembler.h
>> +++ b/arch/arm/include/asm/assembler.h
>> @@ -108,6 +108,35 @@
>>       .endm
>>  #endif
>>
>> +     /*
>> +      * Macros to emit relative branches that may exceed the range
>> +      * of the 24-bit immediate of the ordinary b/bl instructions.
>> +      * NOTE: this doesn't work with locally defined symbols, as they
>> +      * might lack the ARM/Thumb annotation (even if they are annotated
>> +      * as functions)
>
> I really hope you won't need a far call with local symbols ever!
>

Well, if you use pushsection/popsection, then local, numbered labels
you refer to can be quite far away in the output image, and those will
not have the thumb bit set.

>> +      */
>> +     .macro  b_far, target, tmpreg
>> +#if defined(CONFIG_CPU_32v7) || defined(CONFIG_CPU_32v7M)
>> + ARM(        movt    \tmpreg, #:upper16:(\target - (8888f + 8))      )
>> + ARM(        movw    \tmpreg, #:lower16:(\target - (8888f + 8))      )
>> + THUMB(      movt    \tmpreg, #:upper16:(\target - (8888f + 4))      )
>> + THUMB(      movw    \tmpreg, #:lower16:(\target - (8888f + 4))      )
>> +8888:        add     pc, pc, \tmpreg
>> +#else
>> +     ldr     \tmpreg, 8889f
>> +8888:        add     pc, pc, \tmpreg
>> +     .align  2
>> +8889:
>> + ARM(        .word   \target - (8888b + 8)           )
>
> The Thumb relocation value is missing here.
>

Yes, this is bogus. But Thumb2 implies v7 or v7m, so it is not
actually incorrect in this case.
But I will fix it in the next version

>> +#endif
>> +     .endm
>> +
>> +     .macro  bl_far, target, tmpreg=ip
>> +     adr     lr, 8887f

BTW just realised this needs a BSYM()

>> +     b_far   \target, \tmpreg
>> +8887:
>> +     .endm
>> +
>>       .macro asm_trace_hardirqs_off
>>  #if defined(CONFIG_TRACE_IRQFLAGS)
>>       stmdb   sp!, {r0-r3, ip, lr}
>> --
>> 1.8.3.2
>>
>>

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [RFT/RFC PATCH 1/6] ARM: replace PROCINFO embedded branch with relative offset
  2015-03-12 17:38 ` [RFT/RFC PATCH 1/6] ARM: replace PROCINFO embedded branch with relative offset Ard Biesheuvel
  2015-03-12 20:24   ` Nicolas Pitre
@ 2015-03-12 20:50   ` Russell King - ARM Linux
  2015-03-12 21:00     ` Ard Biesheuvel
  1 sibling, 1 reply; 25+ messages in thread
From: Russell King - ARM Linux @ 2015-03-12 20:50 UTC (permalink / raw)
  To: linux-arm-kernel

On Thu, Mar 12, 2015 at 06:38:07PM +0100, Ard Biesheuvel wrote:
> @@ -138,8 +138,9 @@ ENTRY(stext)
>  						@ mmu has been enabled
>  	adr	lr, BSYM(1f)			@ return (PIC) address
>  	mov	r8, r4				@ set TTBR1 to swapper_pg_dir
> - ARM(	add	pc, r10, #PROCINFO_INITFUNC	)
> - THUMB(	add	r12, r10, #PROCINFO_INITFUNC	)
> +	ldr	r12, [r10, #PROCINFO_INITFUNC]
> + ARM(	add	pc, r12, r10			)
> + THUMB(	add	r12, r12, r10			)
>   THUMB(	ret	r12				)

Given this change, I'd prefer a slightly different result:

	ldr	r12, [r10, #PROCINFO_INITFUNC]
	add	r12, r12, r10
	ret	r12

>  1:	b	__enable_mmu
>  ENDPROC(stext)
> @@ -386,10 +387,11 @@ ENTRY(secondary_startup)
>  	ldr	r8, [r7, lr]			@ get secondary_data.swapper_pg_dir
>  	adr	lr, BSYM(__enable_mmu)		@ return address
>  	mov	r13, r12			@ __secondary_switched address
> - ARM(	add	pc, r10, #PROCINFO_INITFUNC	) @ initialise processor
> -						  @ (return control reg)
> - THUMB(	add	r12, r10, #PROCINFO_INITFUNC	)
> - THUMB(	ret	r12				)
> +	ldr	r12, [r10, #PROCINFO_INITFUNC]
> + ARM(	add	pc, r12, r10		)	@ initialise processor
> +						@ (return control reg)
> + THUMB(	add	r12, r12, r10		)
> + THUMB(	ret	r12			)

and same here.  It means that we have less code to look at, at the expense
of one additional ARM instruction.

> +
> +.macro	initfn, initfunc
> +	.long	\initfunc - . + PROCINFO_INITFUNC
> +.endm

The more I look at this, the more I find it hard to decide whether this
is correct or not, and that means it's bad.  It is correct, but it needs
some thought to confirm that.  I'd prefer a different solution.

The value which we want to place into this location is the difference
between the start of the procinfo structure and the target symbol.  So
let's do that - we have a symbol for each procinfo structure, let's
make "initfn" take that symbol and do the computation using that.

Thanks.

-- 
FTTC broadband for 0.8mile line: currently at 10.5Mbps down 400kbps up
according to speedtest.net.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [RFT/RFC PATCH 3/6] ARM: add macro to perform far branches (b/bl)
  2015-03-12 17:38 ` [RFT/RFC PATCH 3/6] ARM: add macro to perform far branches (b/bl) Ard Biesheuvel
  2015-03-12 20:32   ` Nicolas Pitre
@ 2015-03-12 20:56   ` Russell King - ARM Linux
  1 sibling, 0 replies; 25+ messages in thread
From: Russell King - ARM Linux @ 2015-03-12 20:56 UTC (permalink / raw)
  To: linux-arm-kernel

On Thu, Mar 12, 2015 at 06:38:09PM +0100, Ard Biesheuvel wrote:
> +	/*
> +	 * Macros to emit relative branches that may exceed the range
> +	 * of the 24-bit immediate of the ordinary b/bl instructions.
> +	 * NOTE: this doesn't work with locally defined symbols, as they
> +	 * might lack the ARM/Thumb annotation (even if they are annotated
> +	 * as functions)
> +	 */
> +	.macro  b_far, target, tmpreg
> +#if defined(CONFIG_CPU_32v7) || defined(CONFIG_CPU_32v7M)
> + ARM(	movt	\tmpreg, #:upper16:(\target - (8888f + 8))	)
> + ARM(	movw	\tmpreg, #:lower16:(\target - (8888f + 8))	)
> + THUMB(	movt    \tmpreg, #:upper16:(\target - (8888f + 4))	)
> + THUMB(	movw	\tmpreg, #:lower16:(\target - (8888f + 4))	)
> +8888:	add	pc, pc, \tmpreg
> +#else
> +	ldr	\tmpreg, 8889f
> +8888:	add	pc, pc, \tmpreg
> +	.align 	2
> +8889:
> + ARM(	.word   \target - (8888b + 8)           )
> +#endif

I'm really hating this.  It's potentially polluting the data cache
(which relies on proximity of data to be effective) with instructions.
I wonder whether we can use ldr \tmpreg, =\target - (8888b + 8) here,
and let the assembler build some literal pools (maybe with appropriate
.ltorg statements if necessary)?

Another reason I'm hating it is the difference in those movt and movw
instructions just because we have a different PC offset between Thumb
and ARM.  A better way to deal with that would be some common definition
somewhere of that offset.

-- 
FTTC broadband for 0.8mile line: currently at 10.5Mbps down 400kbps up
according to speedtest.net.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [RFT/RFC PATCH 1/6] ARM: replace PROCINFO embedded branch with relative offset
  2015-03-12 20:50   ` Russell King - ARM Linux
@ 2015-03-12 21:00     ` Ard Biesheuvel
  0 siblings, 0 replies; 25+ messages in thread
From: Ard Biesheuvel @ 2015-03-12 21:00 UTC (permalink / raw)
  To: linux-arm-kernel

On 12 March 2015 at 21:50, Russell King - ARM Linux
<linux@arm.linux.org.uk> wrote:
> On Thu, Mar 12, 2015 at 06:38:07PM +0100, Ard Biesheuvel wrote:
>> @@ -138,8 +138,9 @@ ENTRY(stext)
>>                                               @ mmu has been enabled
>>       adr     lr, BSYM(1f)                    @ return (PIC) address
>>       mov     r8, r4                          @ set TTBR1 to swapper_pg_dir
>> - ARM(        add     pc, r10, #PROCINFO_INITFUNC     )
>> - THUMB(      add     r12, r10, #PROCINFO_INITFUNC    )
>> +     ldr     r12, [r10, #PROCINFO_INITFUNC]
>> + ARM(        add     pc, r12, r10                    )
>> + THUMB(      add     r12, r12, r10                   )
>>   THUMB(      ret     r12                             )
>
> Given this change, I'd prefer a slightly different result:
>
>         ldr     r12, [r10, #PROCINFO_INITFUNC]
>         add     r12, r12, r10
>         ret     r12
>

Good idea

>>  1:   b       __enable_mmu
>>  ENDPROC(stext)
>> @@ -386,10 +387,11 @@ ENTRY(secondary_startup)
>>       ldr     r8, [r7, lr]                    @ get secondary_data.swapper_pg_dir
>>       adr     lr, BSYM(__enable_mmu)          @ return address
>>       mov     r13, r12                        @ __secondary_switched address
>> - ARM(        add     pc, r10, #PROCINFO_INITFUNC     ) @ initialise processor
>> -                                               @ (return control reg)
>> - THUMB(      add     r12, r10, #PROCINFO_INITFUNC    )
>> - THUMB(      ret     r12                             )
>> +     ldr     r12, [r10, #PROCINFO_INITFUNC]
>> + ARM(        add     pc, r12, r10            )       @ initialise processor
>> +                                             @ (return control reg)
>> + THUMB(      add     r12, r12, r10           )
>> + THUMB(      ret     r12                     )
>
> and same here.  It means that we have less code to look at, at the expense
> of one additional ARM instruction.
>
>> +
>> +.macro       initfn, initfunc
>> +     .long   \initfunc - . + PROCINFO_INITFUNC
>> +.endm
>
> The more I look at this, the more I find it hard to decide whether this
> is correct or not, and that means it's bad.  It is correct, but it needs
> some thought to confirm that.  I'd prefer a different solution.
>
> The value which we want to place into this location is the difference
> between the start of the procinfo structure and the target symbol.  So
> let's do that - we have a symbol for each procinfo structure, let's
> make "initfn" take that symbol and do the computation using that.
>

I agree. I'll respin with these changes

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [RFT/RFC PATCH 3/6] ARM: add macro to perform far branches (b/bl)
  2015-03-12 20:36     ` Ard Biesheuvel
@ 2015-03-12 21:03       ` Nicolas Pitre
  2015-03-12 21:15         ` Ard Biesheuvel
  0 siblings, 1 reply; 25+ messages in thread
From: Nicolas Pitre @ 2015-03-12 21:03 UTC (permalink / raw)
  To: linux-arm-kernel

On Thu, 12 Mar 2015, Ard Biesheuvel wrote:

> On 12 March 2015 at 21:32, Nicolas Pitre <nicolas.pitre@linaro.org> wrote:
> > On Thu, 12 Mar 2015, Ard Biesheuvel wrote:
> >
> >> These macros execute PC-relative branches, but with a larger
> >> reach than the 24 bits that are available in the b and bl opcodes.
> >>
> >> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
> >> ---
> >>  arch/arm/include/asm/assembler.h | 29 +++++++++++++++++++++++++++++
> >>  1 file changed, 29 insertions(+)
> >>
> >> diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
> >> index f67fd3afebdf..bd08c3c1b73f 100644
> >> --- a/arch/arm/include/asm/assembler.h
> >> +++ b/arch/arm/include/asm/assembler.h
> >> @@ -108,6 +108,35 @@
> >>       .endm
> >>  #endif
> >>
> >> +     /*
> >> +      * Macros to emit relative branches that may exceed the range
> >> +      * of the 24-bit immediate of the ordinary b/bl instructions.
> >> +      * NOTE: this doesn't work with locally defined symbols, as they
> >> +      * might lack the ARM/Thumb annotation (even if they are annotated
> >> +      * as functions)
> >
> > I really hope you won't need a far call with local symbols ever!
> >
> 
> Well, if you use pushsection/popsection, then local, numbered labels
> you refer to can be quite far away in the output image, and those will
> not have the thumb bit set.

Indeed.

> >> +      */
> >> +     .macro  b_far, target, tmpreg
> >> +#if defined(CONFIG_CPU_32v7) || defined(CONFIG_CPU_32v7M)
> >> + ARM(        movt    \tmpreg, #:upper16:(\target - (8888f + 8))      )
> >> + ARM(        movw    \tmpreg, #:lower16:(\target - (8888f + 8))      )
> >> + THUMB(      movt    \tmpreg, #:upper16:(\target - (8888f + 4))      )
> >> + THUMB(      movw    \tmpreg, #:lower16:(\target - (8888f + 4))      )
> >> +8888:        add     pc, pc, \tmpreg
> >> +#else
> >> +     ldr     \tmpreg, 8889f
> >> +8888:        add     pc, pc, \tmpreg
> >> +     .align  2
> >> +8889:
> >> + ARM(        .word   \target - (8888b + 8)           )
> >
> > The Thumb relocation value is missing here.
> >
> 
> Yes, this is bogus. But Thumb2 implies v7 or v7m, so it is not
> actually incorrect in this case.

The ".align 2" would be redundant in that case too.

> But I will fix it in the next version

Is it worth optimizing the ARM mode with movw/movt on ARMv7?  If not 
then this could be simplified as only:

             .macro  b_far, target, tmpreg
 THUMB(      movt    \tmpreg, #:upper16:(\target - (8888f + 4))      )
 THUMB(      movw    \tmpreg, #:lower16:(\target - (8888f + 4))      )
 ARM(        ldr     \tmpreg, 8888f+4                                )
 8888:       add     pc, pc, \tmpreg
 ARM(        .word   \target - (8888b + 8)           )
             .endm


Nicolas

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [RFT/RFC PATCH 6/6] ARM: keep .text and .fixup regions together
  2015-03-12 17:38 ` [RFT/RFC PATCH 6/6] ARM: keep .text and .fixup regions together Ard Biesheuvel
  2015-03-12 20:34   ` Nicolas Pitre
@ 2015-03-12 21:10   ` Russell King - ARM Linux
  2015-03-12 21:18     ` Ard Biesheuvel
  1 sibling, 1 reply; 25+ messages in thread
From: Russell King - ARM Linux @ 2015-03-12 21:10 UTC (permalink / raw)
  To: linux-arm-kernel

On Thu, Mar 12, 2015 at 06:38:12PM +0100, Ard Biesheuvel wrote:
> Fixup snippets are put into a dedicated section so that they don't
> bloat cache lines with instructions that are usually not executed.
> But there is no reason to put all these snippets together at the far
> end of the .text output region, where the branch instruction they
> contain could go out of range if the kernel grows in size.
> 
> Instead, emit .text and .fixup regions together for each input object.
> They should still be out of the way, but not so far that they go out
> of range.
> 
> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
> ---
> 
> Note that the TEXT_TEXT macro will emit *(.text) again but this should be
> harmless.

However, I wonder if by doing this, we're weakening the ability for
kallsyms final link to succeed:

/* .text section. Map to function alignment to avoid address changes
 * during second ld run in second ld pass when generating System.map */

Can we not just move .fixup before TEXT_TEXT?  The only thing between it
and .text would be .text.hot.

-- 
FTTC broadband for 0.8mile line: currently at 10.5Mbps down 400kbps up
according to speedtest.net.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [RFT/RFC PATCH 3/6] ARM: add macro to perform far branches (b/bl)
  2015-03-12 21:03       ` Nicolas Pitre
@ 2015-03-12 21:15         ` Ard Biesheuvel
  2015-03-12 21:37           ` Ard Biesheuvel
  0 siblings, 1 reply; 25+ messages in thread
From: Ard Biesheuvel @ 2015-03-12 21:15 UTC (permalink / raw)
  To: linux-arm-kernel

On 12 March 2015 at 22:03, Nicolas Pitre <nicolas.pitre@linaro.org> wrote:
> On Thu, 12 Mar 2015, Ard Biesheuvel wrote:
>
>> On 12 March 2015 at 21:32, Nicolas Pitre <nicolas.pitre@linaro.org> wrote:
>> > On Thu, 12 Mar 2015, Ard Biesheuvel wrote:
>> >
>> >> These macros execute PC-relative branches, but with a larger
>> >> reach than the 24 bits that are available in the b and bl opcodes.
>> >>
>> >> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
>> >> ---
>> >>  arch/arm/include/asm/assembler.h | 29 +++++++++++++++++++++++++++++
>> >>  1 file changed, 29 insertions(+)
>> >>
>> >> diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
>> >> index f67fd3afebdf..bd08c3c1b73f 100644
>> >> --- a/arch/arm/include/asm/assembler.h
>> >> +++ b/arch/arm/include/asm/assembler.h
>> >> @@ -108,6 +108,35 @@
>> >>       .endm
>> >>  #endif
>> >>
>> >> +     /*
>> >> +      * Macros to emit relative branches that may exceed the range
>> >> +      * of the 24-bit immediate of the ordinary b/bl instructions.
>> >> +      * NOTE: this doesn't work with locally defined symbols, as they
>> >> +      * might lack the ARM/Thumb annotation (even if they are annotated
>> >> +      * as functions)
>> >
>> > I really hope you won't need a far call with local symbols ever!
>> >
>>
>> Well, if you use pushsection/popsection, then local, numbered labels
>> you refer to can be quite far away in the output image, and those will
>> not have the thumb bit set.
>
> Indeed.
>
>> >> +      */
>> >> +     .macro  b_far, target, tmpreg
>> >> +#if defined(CONFIG_CPU_32v7) || defined(CONFIG_CPU_32v7M)
>> >> + ARM(        movt    \tmpreg, #:upper16:(\target - (8888f + 8))      )
>> >> + ARM(        movw    \tmpreg, #:lower16:(\target - (8888f + 8))      )
>> >> + THUMB(      movt    \tmpreg, #:upper16:(\target - (8888f + 4))      )
>> >> + THUMB(      movw    \tmpreg, #:lower16:(\target - (8888f + 4))      )
>> >> +8888:        add     pc, pc, \tmpreg
>> >> +#else
>> >> +     ldr     \tmpreg, 8889f
>> >> +8888:        add     pc, pc, \tmpreg
>> >> +     .align  2
>> >> +8889:
>> >> + ARM(        .word   \target - (8888b + 8)           )
>> >
>> > The Thumb relocation value is missing here.
>> >
>>
>> Yes, this is bogus. But Thumb2 implies v7 or v7m, so it is not
>> actually incorrect in this case.
>
> The ".align 2" would be redundant in that case too.
>

Correct, the #else bit is essentially ARM only

>> But I will fix it in the next version
>
> Is it worth optimizing the ARM mode with movw/movt on ARMv7?  If not
> then this could be simplified as only:
>
>              .macro  b_far, target, tmpreg
>  THUMB(      movt    \tmpreg, #:upper16:(\target - (8888f + 4))      )
>  THUMB(      movw    \tmpreg, #:lower16:(\target - (8888f + 4))      )
>  ARM(        ldr     \tmpreg, 8888f+4                                )
>  8888:       add     pc, pc, \tmpreg
>  ARM(        .word   \target - (8888b + 8)           )
>              .endm
>

movw/movt is preferred if available, since it circumvents the D-cache.
And actually, I should rewrite the bl_far macro for v7 to use blx
instead of adr+ldr to make better use of the return stack predictor or
whatever it is called in the h/w

And, as Russell points out, I should put a PC_BIAS #define somewhere
that assumes the correct value for the used mode, instead of the +4/+8
immediates.

So I am thinking along the lines of

.macro  b_far, target, tmpreg
#if defined(CONFIG_CPU_32v7) || defined(CONFIG_CPU_32v7M)
movt \tmpreg, #:upper16:(\target - (8888f + PC_BIAS))
movw \tmpreg, #:lower16:(\target - (8888f + PC_BIAS))
8888: add pc, pc, \tmpreg
#else
ldr \tmpreg, =\target - (8888f + PC_BIAS)
8888: add pc, pc, \tmpreg
#endif
.endm

.macro bl_far, target, tmpreg=ip
#if defined(CONFIG_CPU_32v7) || defined(CONFIG_CPU_32v7M)
movt \tmpreg, #:upper16:(\target - (8887f + PC_BIAS))
movw \tmpreg, #:lower16:(\target - (8887f + PC_BIAS))
8887: add \tmpreg, \tmpreg, pc
blx \tmpreg
#else
adr lr, BSYM(8887f)
b_far \target, \tmpreg
8887:
#endif
.endm

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [RFT/RFC PATCH 6/6] ARM: keep .text and .fixup regions together
  2015-03-12 21:10   ` Russell King - ARM Linux
@ 2015-03-12 21:18     ` Ard Biesheuvel
  2015-03-12 21:22       ` Russell King - ARM Linux
  0 siblings, 1 reply; 25+ messages in thread
From: Ard Biesheuvel @ 2015-03-12 21:18 UTC (permalink / raw)
  To: linux-arm-kernel

On 12 March 2015 at 22:10, Russell King - ARM Linux
<linux@arm.linux.org.uk> wrote:
> On Thu, Mar 12, 2015 at 06:38:12PM +0100, Ard Biesheuvel wrote:
>> Fixup snippets are put into a dedicated section so that they don't
>> bloat cache lines with instructions that are usually not executed.
>> But there is no reason to put all these snippets together at the far
>> end of the .text output region, where the branch instruction they
>> contain could go out of range if the kernel grows in size.
>>
>> Instead, emit .text and .fixup regions together for each input object.
>> They should still be out of the way, but not so far that they go out
>> of range.
>>
>> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
>> ---
>>
>> Note that the TEXT_TEXT macro will emit *(.text) again but this should be
>> harmless.
>
> However, I wonder if by doing this, we're weakening the ability for
> kallsyms final link to succeed:
>
> /* .text section. Map to function alignment to avoid address changes
>  * during second ld run in second ld pass when generating System.map */
>
> Can we not just move .fixup before TEXT_TEXT?  The only thing between it
> and .text would be .text.hot.
>

Putting .fixup before .text already helps, but not enough for the
.config Arnd gave me that I have been testing this with.

What *(.text .fixup) does (i.e., putting both section names inside the
parentheses), is emitting both sections for each input object file, so
they will always be close to the object that it refers to, so it is
not the same thing.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [RFT/RFC PATCH 6/6] ARM: keep .text and .fixup regions together
  2015-03-12 21:18     ` Ard Biesheuvel
@ 2015-03-12 21:22       ` Russell King - ARM Linux
  2015-03-13 11:18         ` Arnd Bergmann
  0 siblings, 1 reply; 25+ messages in thread
From: Russell King - ARM Linux @ 2015-03-12 21:22 UTC (permalink / raw)
  To: linux-arm-kernel

On Thu, Mar 12, 2015 at 10:18:26PM +0100, Ard Biesheuvel wrote:
> On 12 March 2015 at 22:10, Russell King - ARM Linux
> <linux@arm.linux.org.uk> wrote:
> > On Thu, Mar 12, 2015 at 06:38:12PM +0100, Ard Biesheuvel wrote:
> >> Fixup snippets are put into a dedicated section so that they don't
> >> bloat cache lines with instructions that are usually not executed.
> >> But there is no reason to put all these snippets together at the far
> >> end of the .text output region, where the branch instruction they
> >> contain could go out of range if the kernel grows in size.
> >>
> >> Instead, emit .text and .fixup regions together for each input object.
> >> They should still be out of the way, but not so far that they go out
> >> of range.
> >>
> >> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
> >> ---
> >>
> >> Note that the TEXT_TEXT macro will emit *(.text) again but this should be
> >> harmless.
> >
> > However, I wonder if by doing this, we're weakening the ability for
> > kallsyms final link to succeed:
> >
> > /* .text section. Map to function alignment to avoid address changes
> >  * during second ld run in second ld pass when generating System.map */
> >
> > Can we not just move .fixup before TEXT_TEXT?  The only thing between it
> > and .text would be .text.hot.
> >
> 
> Putting .fixup before .text already helps, but not enough for the
> .config Arnd gave me that I have been testing this with.
> 
> What *(.text .fixup) does (i.e., putting both section names inside the
> parentheses), is emitting both sections for each input object file, so
> they will always be close to the object that it refers to, so it is
> not the same thing.

I'll suggest a different solution then - how about modifying
asm-generic/vmlinux.lds.h to change *(.text) to *(.text .text.fixup)
and we move all the .fixup sections to .text.fixup ?  Arnd?

-- 
FTTC broadband for 0.8mile line: currently at 10.5Mbps down 400kbps up
according to speedtest.net.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [RFT/RFC PATCH 3/6] ARM: add macro to perform far branches (b/bl)
  2015-03-12 21:15         ` Ard Biesheuvel
@ 2015-03-12 21:37           ` Ard Biesheuvel
  2015-03-12 22:26             ` Nicolas Pitre
  0 siblings, 1 reply; 25+ messages in thread
From: Ard Biesheuvel @ 2015-03-12 21:37 UTC (permalink / raw)
  To: linux-arm-kernel

On 12 March 2015 at 22:15, Ard Biesheuvel <ard.biesheuvel@linaro.org> wrote:
> On 12 March 2015 at 22:03, Nicolas Pitre <nicolas.pitre@linaro.org> wrote:
>> On Thu, 12 Mar 2015, Ard Biesheuvel wrote:
>>
>>> On 12 March 2015 at 21:32, Nicolas Pitre <nicolas.pitre@linaro.org> wrote:
>>> > On Thu, 12 Mar 2015, Ard Biesheuvel wrote:
>>> >
>>> >> These macros execute PC-relative branches, but with a larger
>>> >> reach than the 24 bits that are available in the b and bl opcodes.
>>> >>
>>> >> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
>>> >> ---
>>> >>  arch/arm/include/asm/assembler.h | 29 +++++++++++++++++++++++++++++
>>> >>  1 file changed, 29 insertions(+)
>>> >>
>>> >> diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
>>> >> index f67fd3afebdf..bd08c3c1b73f 100644
>>> >> --- a/arch/arm/include/asm/assembler.h
>>> >> +++ b/arch/arm/include/asm/assembler.h
>>> >> @@ -108,6 +108,35 @@
>>> >>       .endm
>>> >>  #endif
>>> >>
>>> >> +     /*
>>> >> +      * Macros to emit relative branches that may exceed the range
>>> >> +      * of the 24-bit immediate of the ordinary b/bl instructions.
>>> >> +      * NOTE: this doesn't work with locally defined symbols, as they
>>> >> +      * might lack the ARM/Thumb annotation (even if they are annotated
>>> >> +      * as functions)
>>> >
>>> > I really hope you won't need a far call with local symbols ever!
>>> >
>>>
>>> Well, if you use pushsection/popsection, then local, numbered labels
>>> you refer to can be quite far away in the output image, and those will
>>> not have the thumb bit set.
>>
>> Indeed.
>>
>>> >> +      */
>>> >> +     .macro  b_far, target, tmpreg
>>> >> +#if defined(CONFIG_CPU_32v7) || defined(CONFIG_CPU_32v7M)
>>> >> + ARM(        movt    \tmpreg, #:upper16:(\target - (8888f + 8))      )
>>> >> + ARM(        movw    \tmpreg, #:lower16:(\target - (8888f + 8))      )
>>> >> + THUMB(      movt    \tmpreg, #:upper16:(\target - (8888f + 4))      )
>>> >> + THUMB(      movw    \tmpreg, #:lower16:(\target - (8888f + 4))      )
>>> >> +8888:        add     pc, pc, \tmpreg
>>> >> +#else
>>> >> +     ldr     \tmpreg, 8889f
>>> >> +8888:        add     pc, pc, \tmpreg
>>> >> +     .align  2
>>> >> +8889:
>>> >> + ARM(        .word   \target - (8888b + 8)           )
>>> >
>>> > The Thumb relocation value is missing here.
>>> >
>>>
>>> Yes, this is bogus. But Thumb2 implies v7 or v7m, so it is not
>>> actually incorrect in this case.
>>
>> The ".align 2" would be redundant in that case too.
>>
>
> Correct, the #else bit is essentially ARM only
>
>>> But I will fix it in the next version
>>
>> Is it worth optimizing the ARM mode with movw/movt on ARMv7?  If not
>> then this could be simplified as only:
>>
>>              .macro  b_far, target, tmpreg
>>  THUMB(      movt    \tmpreg, #:upper16:(\target - (8888f + 4))      )
>>  THUMB(      movw    \tmpreg, #:lower16:(\target - (8888f + 4))      )
>>  ARM(        ldr     \tmpreg, 8888f+4                                )
>>  8888:       add     pc, pc, \tmpreg
>>  ARM(        .word   \target - (8888b + 8)           )
>>              .endm
>>
>
> movw/movt is preferred if available, since it circumvents the D-cache.
> And actually, I should rewrite the bl_far macro for v7 to use blx
> instead of adr+ldr to make better use of the return stack predictor or
> whatever it is called in the h/w
>
> And, as Russell points out, I should put a PC_BIAS #define somewhere
> that assumes the correct value for the used mode, instead of the +4/+8
> immediates.
>
> So I am thinking along the lines of
>
> .macro  b_far, target, tmpreg
> #if defined(CONFIG_CPU_32v7) || defined(CONFIG_CPU_32v7M)
> movt \tmpreg, #:upper16:(\target - (8888f + PC_BIAS))
> movw \tmpreg, #:lower16:(\target - (8888f + PC_BIAS))
> 8888: add pc, pc, \tmpreg
> #else
> ldr \tmpreg, =\target - (8888f + PC_BIAS)

Replying to self: this doesn't work

/home/ard/linux-2.6/arch/arm/kernel/sleep.S: Assembler messages:
/home/ard/linux-2.6/arch/arm/kernel/sleep.S:131: Error: constant
expression expected -- `ldr ip,=__hyp_stub_install_secondary-8888f+4'

so the only way this is feasible is with an explicit literal, which
kind of sucks indeed for Dcache performance

Any other ideas?


> 8888: add pc, pc, \tmpreg
> #endif
> .endm
>
> .macro bl_far, target, tmpreg=ip
> #if defined(CONFIG_CPU_32v7) || defined(CONFIG_CPU_32v7M)
> movt \tmpreg, #:upper16:(\target - (8887f + PC_BIAS))
> movw \tmpreg, #:lower16:(\target - (8887f + PC_BIAS))
> 8887: add \tmpreg, \tmpreg, pc
> blx \tmpreg
> #else
> adr lr, BSYM(8887f)
> b_far \target, \tmpreg
> 8887:
> #endif
> .endm

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [RFT/RFC PATCH 3/6] ARM: add macro to perform far branches (b/bl)
  2015-03-12 21:37           ` Ard Biesheuvel
@ 2015-03-12 22:26             ` Nicolas Pitre
  0 siblings, 0 replies; 25+ messages in thread
From: Nicolas Pitre @ 2015-03-12 22:26 UTC (permalink / raw)
  To: linux-arm-kernel

On Thu, 12 Mar 2015, Ard Biesheuvel wrote:

> On 12 March 2015 at 22:15, Ard Biesheuvel <ard.biesheuvel@linaro.org> wrote:
> > On 12 March 2015 at 22:03, Nicolas Pitre <nicolas.pitre@linaro.org> wrote:
> >> On Thu, 12 Mar 2015, Ard Biesheuvel wrote:
> >>
> >>> On 12 March 2015 at 21:32, Nicolas Pitre <nicolas.pitre@linaro.org> wrote:
> >>> > On Thu, 12 Mar 2015, Ard Biesheuvel wrote:
> >>> >
> >>> >> These macros execute PC-relative branches, but with a larger
> >>> >> reach than the 24 bits that are available in the b and bl opcodes.
> >>> >>
> >>> >> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
> >>> >> ---
> >>> >>  arch/arm/include/asm/assembler.h | 29 +++++++++++++++++++++++++++++
> >>> >>  1 file changed, 29 insertions(+)
> >>> >>
> >>> >> diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
> >>> >> index f67fd3afebdf..bd08c3c1b73f 100644
> >>> >> --- a/arch/arm/include/asm/assembler.h
> >>> >> +++ b/arch/arm/include/asm/assembler.h
> >>> >> @@ -108,6 +108,35 @@
> >>> >>       .endm
> >>> >>  #endif
> >>> >>
> >>> >> +     /*
> >>> >> +      * Macros to emit relative branches that may exceed the range
> >>> >> +      * of the 24-bit immediate of the ordinary b/bl instructions.
> >>> >> +      * NOTE: this doesn't work with locally defined symbols, as they
> >>> >> +      * might lack the ARM/Thumb annotation (even if they are annotated
> >>> >> +      * as functions)
> >>> >
> >>> > I really hope you won't need a far call with local symbols ever!
> >>> >
> >>>
> >>> Well, if you use pushsection/popsection, then local, numbered labels
> >>> you refer to can be quite far away in the output image, and those will
> >>> not have the thumb bit set.
> >>
> >> Indeed.
> >>
> >>> >> +      */
> >>> >> +     .macro  b_far, target, tmpreg
> >>> >> +#if defined(CONFIG_CPU_32v7) || defined(CONFIG_CPU_32v7M)
> >>> >> + ARM(        movt    \tmpreg, #:upper16:(\target - (8888f + 8))      )
> >>> >> + ARM(        movw    \tmpreg, #:lower16:(\target - (8888f + 8))      )
> >>> >> + THUMB(      movt    \tmpreg, #:upper16:(\target - (8888f + 4))      )
> >>> >> + THUMB(      movw    \tmpreg, #:lower16:(\target - (8888f + 4))      )
> >>> >> +8888:        add     pc, pc, \tmpreg
> >>> >> +#else
> >>> >> +     ldr     \tmpreg, 8889f
> >>> >> +8888:        add     pc, pc, \tmpreg
> >>> >> +     .align  2
> >>> >> +8889:
> >>> >> + ARM(        .word   \target - (8888b + 8)           )
> >>> >
> >>> > The Thumb relocation value is missing here.
> >>> >
> >>>
> >>> Yes, this is bogus. But Thumb2 implies v7 or v7m, so it is not
> >>> actually incorrect in this case.
> >>
> >> The ".align 2" would be redundant in that case too.
> >>
> >
> > Correct, the #else bit is essentially ARM only
> >
> >>> But I will fix it in the next version
> >>
> >> Is it worth optimizing the ARM mode with movw/movt on ARMv7?  If not
> >> then this could be simplified as only:
> >>
> >>              .macro  b_far, target, tmpreg
> >>  THUMB(      movt    \tmpreg, #:upper16:(\target - (8888f + 4))      )
> >>  THUMB(      movw    \tmpreg, #:lower16:(\target - (8888f + 4))      )
> >>  ARM(        ldr     \tmpreg, 8888f+4                                )
> >>  8888:       add     pc, pc, \tmpreg
> >>  ARM(        .word   \target - (8888b + 8)           )
> >>              .endm
> >>
> >
> > movw/movt is preferred if available, since it circumvents the D-cache.
> > And actually, I should rewrite the bl_far macro for v7 to use blx
> > instead of adr+ldr to make better use of the return stack predictor or
> > whatever it is called in the h/w
> >
> > And, as Russell points out, I should put a PC_BIAS #define somewhere
> > that assumes the correct value for the used mode, instead of the +4/+8
> > immediates.
> >
> > So I am thinking along the lines of
> >
> > .macro  b_far, target, tmpreg
> > #if defined(CONFIG_CPU_32v7) || defined(CONFIG_CPU_32v7M)
> > movt \tmpreg, #:upper16:(\target - (8888f + PC_BIAS))
> > movw \tmpreg, #:lower16:(\target - (8888f + PC_BIAS))
> > 8888: add pc, pc, \tmpreg
> > #else
> > ldr \tmpreg, =\target - (8888f + PC_BIAS)
> 
> Replying to self: this doesn't work
> 
> /home/ard/linux-2.6/arch/arm/kernel/sleep.S: Assembler messages:
> /home/ard/linux-2.6/arch/arm/kernel/sleep.S:131: Error: constant
> expression expected -- `ldr ip,=__hyp_stub_install_secondary-8888f+4'
> 
> so the only way this is feasible is with an explicit literal, which
> kind of sucks indeed for Dcache performance
> 
> Any other ideas?

Let's not get overboard with this if the only place it is used is 
in non cache performance critical spots such as the resume code which is 
the only case so far.


Nicolas

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [RFT/RFC PATCH 6/6] ARM: keep .text and .fixup regions together
  2015-03-12 21:22       ` Russell King - ARM Linux
@ 2015-03-13 11:18         ` Arnd Bergmann
  2015-03-13 11:26           ` Ard Biesheuvel
  0 siblings, 1 reply; 25+ messages in thread
From: Arnd Bergmann @ 2015-03-13 11:18 UTC (permalink / raw)
  To: linux-arm-kernel

On Thursday 12 March 2015 21:22:02 Russell King - ARM Linux wrote:
> On Thu, Mar 12, 2015 at 10:18:26PM +0100, Ard Biesheuvel wrote:
> > >> Note that the TEXT_TEXT macro will emit *(.text) again but this should be
> > >> harmless.
> > >
> > > However, I wonder if by doing this, we're weakening the ability for
> > > kallsyms final link to succeed:
> > >
> > > /* .text section. Map to function alignment to avoid address changes
> > >  * during second ld run in second ld pass when generating System.map */
> > >
> > > Can we not just move .fixup before TEXT_TEXT?  The only thing between it
> > > and .text would be .text.hot.
> > >
> > 
> > Putting .fixup before .text already helps, but not enough for the
> > .config Arnd gave me that I have been testing this with.
> > 
> > What *(.text .fixup) does (i.e., putting both section names inside the
> > parentheses), is emitting both sections for each input object file, so
> > they will always be close to the object that it refers to, so it is
> > not the same thing.
> 
> I'll suggest a different solution then - how about modifying
> asm-generic/vmlinux.lds.h to change *(.text) to *(.text .text.fixup)
> and we move all the .fixup sections to .text.fixup ?  Arnd?

No objections from me, but I really don't know enough about the possible
side-effects this may have on the other architectures, so we need to
run this by the linux-arch mailing list.

	Arnd

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [RFT/RFC PATCH 6/6] ARM: keep .text and .fixup regions together
  2015-03-13 11:18         ` Arnd Bergmann
@ 2015-03-13 11:26           ` Ard Biesheuvel
  2015-03-13 11:52             ` Arnd Bergmann
  0 siblings, 1 reply; 25+ messages in thread
From: Ard Biesheuvel @ 2015-03-13 11:26 UTC (permalink / raw)
  To: linux-arm-kernel

On 13 March 2015 at 12:18, Arnd Bergmann <arnd@arndb.de> wrote:
> On Thursday 12 March 2015 21:22:02 Russell King - ARM Linux wrote:
>> On Thu, Mar 12, 2015 at 10:18:26PM +0100, Ard Biesheuvel wrote:
>> > >> Note that the TEXT_TEXT macro will emit *(.text) again but this should be
>> > >> harmless.
>> > >
>> > > However, I wonder if by doing this, we're weakening the ability for
>> > > kallsyms final link to succeed:
>> > >
>> > > /* .text section. Map to function alignment to avoid address changes
>> > >  * during second ld run in second ld pass when generating System.map */
>> > >
>> > > Can we not just move .fixup before TEXT_TEXT?  The only thing between it
>> > > and .text would be .text.hot.
>> > >
>> >
>> > Putting .fixup before .text already helps, but not enough for the
>> > .config Arnd gave me that I have been testing this with.
>> >
>> > What *(.text .fixup) does (i.e., putting both section names inside the
>> > parentheses), is emitting both sections for each input object file, so
>> > they will always be close to the object that it refers to, so it is
>> > not the same thing.
>>
>> I'll suggest a different solution then - how about modifying
>> asm-generic/vmlinux.lds.h to change *(.text) to *(.text .text.fixup)
>> and we move all the .fixup sections to .text.fixup ?  Arnd?
>
> No objections from me, but I really don't know enough about the possible
> side-effects this may have on the other architectures, so we need to
> run this by the linux-arch mailing list.
>

I don't think it could affect any other architecture if nobody is
populating .text.fixup yet.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [RFT/RFC PATCH 6/6] ARM: keep .text and .fixup regions together
  2015-03-13 11:26           ` Ard Biesheuvel
@ 2015-03-13 11:52             ` Arnd Bergmann
  0 siblings, 0 replies; 25+ messages in thread
From: Arnd Bergmann @ 2015-03-13 11:52 UTC (permalink / raw)
  To: linux-arm-kernel

On Friday 13 March 2015 12:26:11 Ard Biesheuvel wrote:
> On 13 March 2015 at 12:18, Arnd Bergmann <arnd@arndb.de> wrote:
> > On Thursday 12 March 2015 21:22:02 Russell King - ARM Linux wrote:
> >> On Thu, Mar 12, 2015 at 10:18:26PM +0100, Ard Biesheuvel wrote:
> >> > >> Note that the TEXT_TEXT macro will emit *(.text) again but this should be
> >> > >> harmless.
> >> > >
> >> > > However, I wonder if by doing this, we're weakening the ability for
> >> > > kallsyms final link to succeed:
> >> > >
> >> > > /* .text section. Map to function alignment to avoid address changes
> >> > >  * during second ld run in second ld pass when generating System.map */
> >> > >
> >> > > Can we not just move .fixup before TEXT_TEXT?  The only thing between it
> >> > > and .text would be .text.hot.
> >> > >
> >> >
> >> > Putting .fixup before .text already helps, but not enough for the
> >> > .config Arnd gave me that I have been testing this with.
> >> >
> >> > What *(.text .fixup) does (i.e., putting both section names inside the
> >> > parentheses), is emitting both sections for each input object file, so
> >> > they will always be close to the object that it refers to, so it is
> >> > not the same thing.
> >>
> >> I'll suggest a different solution then - how about modifying
> >> asm-generic/vmlinux.lds.h to change *(.text) to *(.text .text.fixup)
> >> and we move all the .fixup sections to .text.fixup ?  Arnd?
> >
> > No objections from me, but I really don't know enough about the possible
> > side-effects this may have on the other architectures, so we need to
> > run this by the linux-arch mailing list.
> >
> 
> I don't think it could affect any other architecture if nobody is
> populating .text.fixup yet.

Ah, I missed that part of the suggestion. Yes, sounds good to me then.

	Arnd

^ permalink raw reply	[flat|nested] 25+ messages in thread

end of thread, other threads:[~2015-03-13 11:52 UTC | newest]

Thread overview: 25+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-03-12 17:38 [RFT/RFC PATCH 0/6] ARM kernel size fixes Ard Biesheuvel
2015-03-12 17:38 ` [RFT/RFC PATCH 1/6] ARM: replace PROCINFO embedded branch with relative offset Ard Biesheuvel
2015-03-12 20:24   ` Nicolas Pitre
2015-03-12 20:50   ` Russell King - ARM Linux
2015-03-12 21:00     ` Ard Biesheuvel
2015-03-12 17:38 ` [RFT/RFC PATCH 2/6] ARM: move HYP text to end of .text section Ard Biesheuvel
2015-03-12 17:38 ` [RFT/RFC PATCH 3/6] ARM: add macro to perform far branches (b/bl) Ard Biesheuvel
2015-03-12 20:32   ` Nicolas Pitre
2015-03-12 20:36     ` Ard Biesheuvel
2015-03-12 21:03       ` Nicolas Pitre
2015-03-12 21:15         ` Ard Biesheuvel
2015-03-12 21:37           ` Ard Biesheuvel
2015-03-12 22:26             ` Nicolas Pitre
2015-03-12 20:56   ` Russell King - ARM Linux
2015-03-12 17:38 ` [RFT/RFC PATCH 4/6] ARM: use bl_far to call __hyp_stub_install_secondary from the .data section Ard Biesheuvel
2015-03-12 17:38 ` [RFT/RFC PATCH 5/6] ARM: move the .idmap.text section closer to .head.text Ard Biesheuvel
2015-03-12 20:33   ` Nicolas Pitre
2015-03-12 17:38 ` [RFT/RFC PATCH 6/6] ARM: keep .text and .fixup regions together Ard Biesheuvel
2015-03-12 20:34   ` Nicolas Pitre
2015-03-12 21:10   ` Russell King - ARM Linux
2015-03-12 21:18     ` Ard Biesheuvel
2015-03-12 21:22       ` Russell King - ARM Linux
2015-03-13 11:18         ` Arnd Bergmann
2015-03-13 11:26           ` Ard Biesheuvel
2015-03-13 11:52             ` Arnd Bergmann

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.