All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/8] ARM: clean up PC-relative arithmetic
@ 2016-08-03 15:38 Ard Biesheuvel
  2016-08-03 15:38 ` [PATCH 1/8] ARM: assembler: introduce adr_l, ldr_l and str_l macros Ard Biesheuvel
                   ` (9 more replies)
  0 siblings, 10 replies; 39+ messages in thread
From: Ard Biesheuvel @ 2016-08-03 15:38 UTC (permalink / raw)
  To: linux-arm-kernel

There are various places in the ARM kernel where the following pattern
is used to create a PC-relative reference that is valid even before the
MMU is on:

     adr    rX, 1f
     ldr    rY, [rX]
     add    rX, rX, rY
     ...
  1: .long  <symbol> - .   

or
     adr    rX, 1f
     ldmia  rX, {rY .. rY+n}
     sub    rX, rX, rY
     add    rY+1, rY+1, rX
     add    rY+2, rY+2, rX
     ...
  1: .long  .
     .long  <symbolY>
     .long  <symbolY+1>
     ...

Both cases can be greatly simplified by letting the linker do the
calculations for us. This series implements adr_l, ldr_l and str_l
macros, and uses them to simplify a couple of instances of the above
patterns.

Ard Biesheuvel (8):
  ARM: assembler: introduce adr_l, ldr_l and str_l macros
  ARM: head-common.S: use PC-relative insn sequence for __proc_info
  ARM: head-common.S: use PC-relative insn sequence for __turn_mmu_on
  ARM: head.S: use PC-relative insn sequence for secondary_data
  ARM: head: use PC-relative insn sequence for __smp_alt
  ARM: sleep.S: use PC-relative insn sequence for
    sleep_save_sp/mpidr_hash
  ARM: head.S: use PC-relative insn sequences for __fixup_pv_table
  ARM: head.S: use PC relative insn sequence to calculate PHYS_OFFSET

 arch/arm/include/asm/assembler.h | 59 ++++++++++++++
 arch/arm/kernel/head-common.S    | 22 ++---
 arch/arm/kernel/head.S           | 86 +++++---------------
 arch/arm/kernel/sleep.S          | 15 +---
 4 files changed, 88 insertions(+), 94 deletions(-)

-- 
2.7.4

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH 1/8] ARM: assembler: introduce adr_l, ldr_l and str_l macros
  2016-08-03 15:38 [PATCH 0/8] ARM: clean up PC-relative arithmetic Ard Biesheuvel
@ 2016-08-03 15:38 ` Ard Biesheuvel
  2016-08-03 16:49   ` Dave Martin
  2016-08-03 18:09   ` Russell King - ARM Linux
  2016-08-03 15:38 ` [PATCH 2/8] ARM: head-common.S: use PC-relative insn sequence for __proc_info Ard Biesheuvel
                   ` (8 subsequent siblings)
  9 siblings, 2 replies; 39+ messages in thread
From: Ard Biesheuvel @ 2016-08-03 15:38 UTC (permalink / raw)
  To: linux-arm-kernel

Like arm64, ARM supports position independent code sequences that
produce symbol references with a greater reach than the ordinary
adr/ldr instructions.

Introduce adr_l, that takes the address of a symbol in a PC relative
manner, and ldr_l/str_l that perform a 32-bit loads/stores from a
PC-relative offset.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
 arch/arm/include/asm/assembler.h | 59 ++++++++++++++++++++
 1 file changed, 59 insertions(+)

diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 4eaea2173bf8..e1450889f96b 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -512,4 +512,63 @@ THUMB(	orr	\reg , \reg , #PSR_T_BIT	)
 #endif
 	.endm
 
+/*
+ * Pseudo-ops for PC-relative adr/ldr/str <reg>, <symbol> operations
+ */
+
+	/*
+	 * @dst: destination register
+	 * @sym: name of the symbol
+	 */
+	.macro	adr_l, dst, sym
+#ifdef CONFIG_THUMB2_KERNEL
+	movw	\dst, #:lower16:(\sym) - (. + 12)
+	movt	\dst, #:upper16:(\sym) - (. + 8)
+	add	\dst, \dst, pc
+#else
+	add	\dst, pc, #:pc_g0_nc:(\sym) - 8
+	add	\dst, \dst, #:pc_g1_nc:(\sym) - 4
+	add	\dst, \dst, #:pc_g2:(\sym)
+#endif
+	.endm
+
+	/*
+	 * @dst: destination register
+	 * @sym: name of the symbol
+	 * @tmp: optional scratch register to be used if <dst> == sp, which
+	 *       is not allowed in a Thumb2 ldr instruction
+	 */
+	.macro	ldr_l, dst, sym, tmp
+#ifdef CONFIG_THUMB2_KERNEL
+	.ifnb	\tmp
+	adr_l	\tmp, \sym
+	ldr	\dst, [\tmp]
+	.else
+	adr_l	\dst, \sym
+	ldr	\dst, [\dst]
+	.endif
+#else
+	add	\dst, pc, #:pc_g0_nc:(\sym) - 8
+	add	\dst, \dst, #:pc_g1_nc:(\sym) - 4
+	ldr	\dst, [\dst, #:pc_g2:(\sym)]
+#endif
+	.endm
+
+	/*
+	 * @src: source register
+	 * @sym: name of the symbol
+	 * @tmp: mandatory scratch register to calculate the address
+	 *       while <src> needs to be preserved.
+	 */
+	.macro	str_l, src, sym, tmp:req
+#ifdef CONFIG_THUMB2_KERNEL
+	adr_l	\tmp, \sym
+	str	\src, [\tmp]
+#else
+	add	\tmp, pc, #:pc_g0_nc:(\sym) - 8
+	add	\tmp, \tmp, #:pc_g1_nc:(\sym) - 4
+	str	\src, [\tmp, #:pc_g2:(\sym)]
+#endif
+	.endm
+
 #endif /* __ASM_ASSEMBLER_H__ */
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [PATCH 2/8] ARM: head-common.S: use PC-relative insn sequence for __proc_info
  2016-08-03 15:38 [PATCH 0/8] ARM: clean up PC-relative arithmetic Ard Biesheuvel
  2016-08-03 15:38 ` [PATCH 1/8] ARM: assembler: introduce adr_l, ldr_l and str_l macros Ard Biesheuvel
@ 2016-08-03 15:38 ` Ard Biesheuvel
  2016-08-03 15:38 ` [PATCH 3/8] ARM: head-common.S: use PC-relative insn sequence for __turn_mmu_on Ard Biesheuvel
                   ` (7 subsequent siblings)
  9 siblings, 0 replies; 39+ messages in thread
From: Ard Biesheuvel @ 2016-08-03 15:38 UTC (permalink / raw)
  To: linux-arm-kernel

Replace the open coded PC relative offset calculations with adr_l
invocations.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
 arch/arm/kernel/head-common.S | 22 ++++++--------------
 1 file changed, 6 insertions(+), 16 deletions(-)

diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S
index 8733012d231f..5e0cedd870a5 100644
--- a/arch/arm/kernel/head-common.S
+++ b/arch/arm/kernel/head-common.S
@@ -150,11 +150,12 @@ ENDPROC(lookup_processor_type)
  *	r9 = cpuid (preserved)
  */
 __lookup_processor_type:
-	adr	r3, __lookup_processor_type_data
-	ldmia	r3, {r4 - r6}
-	sub	r3, r3, r4			@ get offset between virt&phys
-	add	r5, r5, r3			@ convert virt addresses to
-	add	r6, r6, r3			@ physical address space
+	/*
+	 * Look in <asm/procinfo.h> for information about the __proc_info
+	 * structure.
+	 */
+	adr_l	r5, __proc_info_begin		@ __pa(__proc_info_begin)
+	adr_l	r6, __proc_info_end		@ __pa(__proc_info_end)
 1:	ldmia	r5, {r3, r4}			@ value, mask
 	and	r4, r4, r9			@ mask wanted bits
 	teq	r3, r4
@@ -166,17 +167,6 @@ __lookup_processor_type:
 2:	ret	lr
 ENDPROC(__lookup_processor_type)
 
-/*
- * Look in <asm/procinfo.h> for information about the __proc_info structure.
- */
-	.align	2
-	.type	__lookup_processor_type_data, %object
-__lookup_processor_type_data:
-	.long	.
-	.long	__proc_info_begin
-	.long	__proc_info_end
-	.size	__lookup_processor_type_data, . - __lookup_processor_type_data
-
 __error_lpae:
 #ifdef CONFIG_DEBUG_LL
 	adr	r0, str_lpae
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [PATCH 3/8] ARM: head-common.S: use PC-relative insn sequence for __turn_mmu_on
  2016-08-03 15:38 [PATCH 0/8] ARM: clean up PC-relative arithmetic Ard Biesheuvel
  2016-08-03 15:38 ` [PATCH 1/8] ARM: assembler: introduce adr_l, ldr_l and str_l macros Ard Biesheuvel
  2016-08-03 15:38 ` [PATCH 2/8] ARM: head-common.S: use PC-relative insn sequence for __proc_info Ard Biesheuvel
@ 2016-08-03 15:38 ` Ard Biesheuvel
  2016-08-03 15:38 ` [PATCH 4/8] ARM: head.S: use PC-relative insn sequence for secondary_data Ard Biesheuvel
                   ` (6 subsequent siblings)
  9 siblings, 0 replies; 39+ messages in thread
From: Ard Biesheuvel @ 2016-08-03 15:38 UTC (permalink / raw)
  To: linux-arm-kernel

Replace the open coded PC relative offset calculations with adr_l
invocations.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
 arch/arm/kernel/head.S | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index 04286fd9e09c..f99360e5b004 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -227,11 +227,8 @@ __create_page_tables:
 	 * Create identity mapping to cater for __enable_mmu.
 	 * This identity mapping will be removed by paging_init().
 	 */
-	adr	r0, __turn_mmu_on_loc
-	ldmia	r0, {r3, r5, r6}
-	sub	r0, r0, r3			@ virt->phys offset
-	add	r5, r5, r0			@ phys __turn_mmu_on
-	add	r6, r6, r0			@ phys __turn_mmu_on_end
+	adr_l	r5, __turn_mmu_on		@ phys __turn_mmu_on
+	adr_l	r6, __turn_mmu_on_end		@ phys __turn_mmu_on_end
 	mov	r5, r5, lsr #SECTION_SHIFT
 	mov	r6, r6, lsr #SECTION_SHIFT
 
@@ -354,11 +351,6 @@ __create_page_tables:
 	ret	lr
 ENDPROC(__create_page_tables)
 	.ltorg
-	.align
-__turn_mmu_on_loc:
-	.long	.
-	.long	__turn_mmu_on
-	.long	__turn_mmu_on_end
 
 #if defined(CONFIG_SMP)
 	.text
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [PATCH 4/8] ARM: head.S: use PC-relative insn sequence for secondary_data
  2016-08-03 15:38 [PATCH 0/8] ARM: clean up PC-relative arithmetic Ard Biesheuvel
                   ` (2 preceding siblings ...)
  2016-08-03 15:38 ` [PATCH 3/8] ARM: head-common.S: use PC-relative insn sequence for __turn_mmu_on Ard Biesheuvel
@ 2016-08-03 15:38 ` Ard Biesheuvel
  2016-08-03 18:06   ` Russell King - ARM Linux
  2016-08-03 15:38 ` [PATCH 5/8] ARM: head: use PC-relative insn sequence for __smp_alt Ard Biesheuvel
                   ` (5 subsequent siblings)
  9 siblings, 1 reply; 39+ messages in thread
From: Ard Biesheuvel @ 2016-08-03 15:38 UTC (permalink / raw)
  To: linux-arm-kernel

Replace the open coded PC relative offset calculations with adr_l
and ldr_l invocations.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
 arch/arm/kernel/head.S | 20 ++++----------------
 1 file changed, 4 insertions(+), 16 deletions(-)

diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index f99360e5b004..7a14622582de 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -386,40 +386,28 @@ ENTRY(secondary_startup)
 	/*
 	 * Use the page tables supplied from  __cpu_up.
 	 */
-	adr	r4, __secondary_data
-	ldmia	r4, {r5, r7, r12}		@ address to jump to after
-	sub	lr, r4, r5			@ mmu has been enabled
-	add	r3, r7, lr
+	adr_l	r3, secondary_data
 	ldrd	r4, [r3, #0]			@ get secondary_data.pgdir
 ARM_BE8(eor	r4, r4, r5)			@ Swap r5 and r4 in BE:
 ARM_BE8(eor	r5, r4, r5)			@ it can be done in 3 steps
 ARM_BE8(eor	r4, r4, r5)			@ without using a temp reg.
 	ldr	r8, [r3, #8]			@ get secondary_data.swapper_pg_dir
 	badr	lr, __enable_mmu		@ return address
-	mov	r13, r12			@ __secondary_switched address
+	ldr	r13, =__secondary_switched	@ __secondary_switched address
 	ldr	r12, [r10, #PROCINFO_INITFUNC]
 	add	r12, r12, r10			@ initialise processor
 						@ (return control reg)
 	ret	r12
 ENDPROC(secondary_startup)
 ENDPROC(secondary_startup_arm)
+	.ltorg
 
-	/*
-	 * r6  = &secondary_data
-	 */
 ENTRY(__secondary_switched)
-	ldr	sp, [r7, #12]			@ get secondary_data.stack
+	ldr_l	sp, secondary_data + 12, ip	@ get secondary_data.stack
 	mov	fp, #0
 	b	secondary_start_kernel
 ENDPROC(__secondary_switched)
 
-	.align
-
-	.type	__secondary_data, %object
-__secondary_data:
-	.long	.
-	.long	secondary_data
-	.long	__secondary_switched
 #endif /* defined(CONFIG_SMP) */
 
 
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [PATCH 5/8] ARM: head: use PC-relative insn sequence for __smp_alt
  2016-08-03 15:38 [PATCH 0/8] ARM: clean up PC-relative arithmetic Ard Biesheuvel
                   ` (3 preceding siblings ...)
  2016-08-03 15:38 ` [PATCH 4/8] ARM: head.S: use PC-relative insn sequence for secondary_data Ard Biesheuvel
@ 2016-08-03 15:38 ` Ard Biesheuvel
  2016-08-03 15:38 ` [PATCH 6/8] ARM: sleep.S: use PC-relative insn sequence for sleep_save_sp/mpidr_hash Ard Biesheuvel
                   ` (4 subsequent siblings)
  9 siblings, 0 replies; 39+ messages in thread
From: Ard Biesheuvel @ 2016-08-03 15:38 UTC (permalink / raw)
  To: linux-arm-kernel

Replace the open coded PC relative offset calculations with adr_l
invocations.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
 arch/arm/kernel/head.S | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index 7a14622582de..a9d8f9004dfd 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -522,19 +522,11 @@ ARM_BE8(rev	r0, r0)			@ byteswap if big endian
 	retne	lr
 
 __fixup_smp_on_up:
-	adr	r0, 1f
-	ldmia	r0, {r3 - r5}
-	sub	r3, r0, r3
-	add	r4, r4, r3
-	add	r5, r5, r3
+	adr_l	r4, __smpalt_begin
+	adr_l	r5, __smpalt_end
 	b	__do_fixup_smp_on_up
 ENDPROC(__fixup_smp)
 
-	.align
-1:	.word	.
-	.word	__smpalt_begin
-	.word	__smpalt_end
-
 	.pushsection .data
 	.globl	smp_on_up
 smp_on_up:
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [PATCH 6/8] ARM: sleep.S: use PC-relative insn sequence for sleep_save_sp/mpidr_hash
  2016-08-03 15:38 [PATCH 0/8] ARM: clean up PC-relative arithmetic Ard Biesheuvel
                   ` (4 preceding siblings ...)
  2016-08-03 15:38 ` [PATCH 5/8] ARM: head: use PC-relative insn sequence for __smp_alt Ard Biesheuvel
@ 2016-08-03 15:38 ` Ard Biesheuvel
  2016-08-03 15:38 ` [PATCH 7/8] ARM: head.S: use PC-relative insn sequences for __fixup_pv_table Ard Biesheuvel
                   ` (3 subsequent siblings)
  9 siblings, 0 replies; 39+ messages in thread
From: Ard Biesheuvel @ 2016-08-03 15:38 UTC (permalink / raw)
  To: linux-arm-kernel

Replace the open coded PC relative offset calculations with adr_l
invocations.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
 arch/arm/kernel/sleep.S | 15 +++------------
 1 file changed, 3 insertions(+), 12 deletions(-)

diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S
index 0f6c1000582c..445011af9162 100644
--- a/arch/arm/kernel/sleep.S
+++ b/arch/arm/kernel/sleep.S
@@ -137,9 +137,8 @@ ARM_BE8(setend be)			@ ensure we are in BE mode
 	mov	r1, #0
 	ALT_SMP(mrc p15, 0, r0, c0, c0, 5)
 	ALT_UP_B(1f)
-	adr	r2, mpidr_hash_ptr
-	ldr	r3, [r2]
-	add	r2, r2, r3		@ r2 = struct mpidr_hash phys address
+	adr_l	r2, mpidr_hash		@ r2 = struct mpidr_hash phys address
+
 	/*
 	 * This ldmia relies on the memory layout of the mpidr_hash
 	 * struct mpidr_hash.
@@ -147,9 +146,7 @@ ARM_BE8(setend be)			@ ensure we are in BE mode
 	ldmia	r2, { r3-r6 }	@ r3 = mpidr mask (r4,r5,r6) = l[0,1,2] shifts
 	compute_mpidr_hash	r1, r4, r5, r6, r0, r3
 1:
-	adr	r0, _sleep_save_sp
-	ldr	r2, [r0]
-	add	r0, r0, r2
+	adr_l	r0, sleep_save_sp
 	ldr	r0, [r0, #SLEEP_SAVE_SP_PHYS]
 	ldr	r0, [r0, r1, lsl #2]
 
@@ -164,12 +161,6 @@ ENDPROC(cpu_resume)
 ENDPROC(cpu_resume_arm)
 #endif
 
-	.align 2
-_sleep_save_sp:
-	.long	sleep_save_sp - .
-mpidr_hash_ptr:
-	.long	mpidr_hash - .			@ mpidr_hash struct offset
-
 	.data
 	.type	sleep_save_sp, #object
 ENTRY(sleep_save_sp)
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [PATCH 7/8] ARM: head.S: use PC-relative insn sequences for __fixup_pv_table
  2016-08-03 15:38 [PATCH 0/8] ARM: clean up PC-relative arithmetic Ard Biesheuvel
                   ` (5 preceding siblings ...)
  2016-08-03 15:38 ` [PATCH 6/8] ARM: sleep.S: use PC-relative insn sequence for sleep_save_sp/mpidr_hash Ard Biesheuvel
@ 2016-08-03 15:38 ` Ard Biesheuvel
  2016-08-03 15:38 ` [PATCH 8/8] ARM: head.S: use PC relative insn sequence to calculate PHYS_OFFSET Ard Biesheuvel
                   ` (2 subsequent siblings)
  9 siblings, 0 replies; 39+ messages in thread
From: Ard Biesheuvel @ 2016-08-03 15:38 UTC (permalink / raw)
  To: linux-arm-kernel

Replace the open coded PC relative offset calculations with adr_l,
ldr_l and str_l invocations.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
 arch/arm/kernel/head.S | 32 ++++++--------------
 1 file changed, 10 insertions(+), 22 deletions(-)

diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index a9d8f9004dfd..b9746366b633 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -577,37 +577,28 @@ ENDPROC(fixup_smp)
  */
 	__HEAD
 __fixup_pv_table:
-	adr	r0, 1f
-	ldmia	r0, {r3-r7}
+	adr_l	r7, __pv_offset			@ __pa(__pv_offset)
+	ldr	r3, =__pv_offset		@ __va(__pv_offset)
 	mvn	ip, #0
-	subs	r3, r0, r3	@ PHYS_OFFSET - PAGE_OFFSET
-	add	r4, r4, r3	@ adjust table start address
-	add	r5, r5, r3	@ adjust table end address
-	add	r6, r6, r3	@ adjust __pv_phys_pfn_offset address
-	add	r7, r7, r3	@ adjust __pv_offset address
-	mov	r0, r8, lsr #PAGE_SHIFT	@ convert to PFN
-	str	r0, [r6]	@ save computed PHYS_OFFSET to __pv_phys_pfn_offset
+	subs	r3, r7, r3			@ PHYS_OFFSET - PAGE_OFFSET
+	mov	r0, r8, lsr #PAGE_SHIFT		@ convert to PFN
+	str_l	r0, __pv_phys_pfn_offset, r6	@ save computed PHYS_OFFSET
 	strcc	ip, [r7, #HIGH_OFFSET]	@ save to __pv_offset high bits
 	mov	r6, r3, lsr #24	@ constant for add/sub instructions
 	teq	r3, r6, lsl #24 @ must be 16MiB aligned
 THUMB(	it	ne		@ cross section branch )
 	bne	__error
 	str	r3, [r7, #LOW_OFFSET]	@ save to __pv_offset low bits
+
+	adr_l	r4, __pv_table_begin
+	adr_l	r5, __pv_table_end
 	b	__fixup_a_pv_table
 ENDPROC(__fixup_pv_table)
-
-	.align
-1:	.long	.
-	.long	__pv_table_begin
-	.long	__pv_table_end
-2:	.long	__pv_phys_pfn_offset
-	.long	__pv_offset
+	.ltorg
 
 	.text
 __fixup_a_pv_table:
-	adr	r0, 3f
-	ldr	r6, [r0]
-	add	r6, r6, r3
+	adr_l	r6, __pv_offset
 	ldr	r0, [r6, #HIGH_OFFSET]	@ pv_offset high word
 	ldr	r6, [r6, #LOW_OFFSET]	@ pv_offset low word
 	mov	r6, r6, lsr #24
@@ -675,9 +666,6 @@ ARM_BE8(rev16	ip, ip)
 #endif
 ENDPROC(__fixup_a_pv_table)
 
-	.align
-3:	.long __pv_offset
-
 ENTRY(fixup_pv_table)
 	stmfd	sp!, {r4 - r7, lr}
 	mov	r3, #0			@ no offset
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [PATCH 8/8] ARM: head.S: use PC relative insn sequence to calculate PHYS_OFFSET
  2016-08-03 15:38 [PATCH 0/8] ARM: clean up PC-relative arithmetic Ard Biesheuvel
                   ` (6 preceding siblings ...)
  2016-08-03 15:38 ` [PATCH 7/8] ARM: head.S: use PC-relative insn sequences for __fixup_pv_table Ard Biesheuvel
@ 2016-08-03 15:38 ` Ard Biesheuvel
  2016-08-03 18:17 ` [PATCH 0/8] ARM: clean up PC-relative arithmetic Russell King - ARM Linux
  2016-08-03 18:27 ` Nicolas Pitre
  9 siblings, 0 replies; 39+ messages in thread
From: Ard Biesheuvel @ 2016-08-03 15:38 UTC (permalink / raw)
  To: linux-arm-kernel

Replace the open coded arithmetic with a simple adr_l/sub pair.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
 arch/arm/kernel/head.S | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index b9746366b633..bd5a6151b3d7 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -106,10 +106,8 @@ ENTRY(stext)
 #endif
 
 #ifndef CONFIG_XIP_KERNEL
-	adr	r3, 2f
-	ldmia	r3, {r4, r8}
-	sub	r4, r3, r4			@ (PHYS_OFFSET - PAGE_OFFSET)
-	add	r8, r8, r4			@ PHYS_OFFSET
+	adr_l	r8, _text
+	sub	r8, r8, #TEXT_OFFSET		@ PHYS_OFFSET
 #else
 	ldr	r8, =PLAT_PHYS_OFFSET		@ always constant in this case
 #endif
@@ -161,10 +159,6 @@ ENTRY(stext)
 1:	b	__enable_mmu
 ENDPROC(stext)
 	.ltorg
-#ifndef CONFIG_XIP_KERNEL
-2:	.long	.
-	.long	PAGE_OFFSET
-#endif
 
 /*
  * Setup the initial page tables.  We only setup the barest
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [PATCH 1/8] ARM: assembler: introduce adr_l, ldr_l and str_l macros
  2016-08-03 15:38 ` [PATCH 1/8] ARM: assembler: introduce adr_l, ldr_l and str_l macros Ard Biesheuvel
@ 2016-08-03 16:49   ` Dave Martin
  2016-08-04  7:40     ` Ard Biesheuvel
  2016-08-04  7:43     ` Ard Biesheuvel
  2016-08-03 18:09   ` Russell King - ARM Linux
  1 sibling, 2 replies; 39+ messages in thread
From: Dave Martin @ 2016-08-03 16:49 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, Aug 03, 2016 at 05:38:43PM +0200, Ard Biesheuvel wrote:
> Like arm64, ARM supports position independent code sequences that
> produce symbol references with a greater reach than the ordinary
> adr/ldr instructions.
> 
> Introduce adr_l, that takes the address of a symbol in a PC relative
> manner, and ldr_l/str_l that perform a 32-bit loads/stores from a
> PC-relative offset.
> 
> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
> ---
>  arch/arm/include/asm/assembler.h | 59 ++++++++++++++++++++
>  1 file changed, 59 insertions(+)
> 
> diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
> index 4eaea2173bf8..e1450889f96b 100644
> --- a/arch/arm/include/asm/assembler.h
> +++ b/arch/arm/include/asm/assembler.h
> @@ -512,4 +512,63 @@ THUMB(	orr	\reg , \reg , #PSR_T_BIT	)
>  #endif
>  	.endm
>  
> +/*
> + * Pseudo-ops for PC-relative adr/ldr/str <reg>, <symbol> operations
> + */
> +
> +	/*
> +	 * @dst: destination register
> +	 * @sym: name of the symbol
> +	 */
> +	.macro	adr_l, dst, sym
> +#ifdef CONFIG_THUMB2_KERNEL
> +	movw	\dst, #:lower16:(\sym) - (. + 12)
> +	movt	\dst, #:upper16:(\sym) - (. + 8)
> +	add	\dst, \dst, pc

pc always reads as the address of that add plus 4, right?  I remember
some special case where it gets rounded down to a 4-byte boundary, but
IIRC that only applies to certain ldr ..., [pc, ...] forms.

> +#else
> +	add	\dst, pc, #:pc_g0_nc:(\sym) - 8
> +	add	\dst, \dst, #:pc_g1_nc:(\sym) - 4
> +	add	\dst, \dst, #:pc_g2:(\sym)

Whoah.  I've never seen this syntax before...  does this work for any
named reloc, or just for certain blessed relocs?  (I'm also _assuming_
the assembler support for this functionality is ancient -- if not,
there may be toolchain compatibility issues.)

Based on my understanding of how these relocs work, this should do the
right thing, though.


Second question: for arm, this reduces the range addressable to
pc +/- 26-bit offset (24-bit if sym is not word aligned, but that
probably never happens).

I can't remember the de facto limit on the size of vmlinux for arm --
are you sure this extra limitation won't break some cases of huge
initramfs where adr_l gets used for cross-section references?

(For Thumb2, :lower16:/:upper16: give a full 32-bit range, so no problem
there -- sad that this isn't available before ARMv7).

Cheers
---Dave

> +#endif
> +	.endm
> +
> +	/*
> +	 * @dst: destination register
> +	 * @sym: name of the symbol
> +	 * @tmp: optional scratch register to be used if <dst> == sp, which
> +	 *       is not allowed in a Thumb2 ldr instruction
> +	 */
> +	.macro	ldr_l, dst, sym, tmp
> +#ifdef CONFIG_THUMB2_KERNEL
> +	.ifnb	\tmp
> +	adr_l	\tmp, \sym
> +	ldr	\dst, [\tmp]
> +	.else
> +	adr_l	\dst, \sym
> +	ldr	\dst, [\dst]
> +	.endif
> +#else
> +	add	\dst, pc, #:pc_g0_nc:(\sym) - 8
> +	add	\dst, \dst, #:pc_g1_nc:(\sym) - 4
> +	ldr	\dst, [\dst, #:pc_g2:(\sym)]
> +#endif
> +	.endm
> +
> +	/*
> +	 * @src: source register
> +	 * @sym: name of the symbol
> +	 * @tmp: mandatory scratch register to calculate the address
> +	 *       while <src> needs to be preserved.
> +	 */
> +	.macro	str_l, src, sym, tmp:req
> +#ifdef CONFIG_THUMB2_KERNEL
> +	adr_l	\tmp, \sym
> +	str	\src, [\tmp]
> +#else
> +	add	\tmp, pc, #:pc_g0_nc:(\sym) - 8
> +	add	\tmp, \tmp, #:pc_g1_nc:(\sym) - 4
> +	str	\src, [\tmp, #:pc_g2:(\sym)]
> +#endif
> +	.endm
> +
>  #endif /* __ASM_ASSEMBLER_H__ */
> -- 
> 2.7.4
> 
> 
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel at lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH 4/8] ARM: head.S: use PC-relative insn sequence for secondary_data
  2016-08-03 15:38 ` [PATCH 4/8] ARM: head.S: use PC-relative insn sequence for secondary_data Ard Biesheuvel
@ 2016-08-03 18:06   ` Russell King - ARM Linux
  0 siblings, 0 replies; 39+ messages in thread
From: Russell King - ARM Linux @ 2016-08-03 18:06 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, Aug 03, 2016 at 05:38:46PM +0200, Ard Biesheuvel wrote:
> Replace the open coded PC relative offset calculations with adr_l
> and ldr_l invocations.

.. and making it less efficient by doing so.  No thanks, I prefer the
original.

-- 
RMK's Patch system: http://www.armlinux.org.uk/developer/patches/
FTTC broadband for 0.8mile line: currently at 9.6Mbps down 400kbps up
according to speedtest.net.

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH 1/8] ARM: assembler: introduce adr_l, ldr_l and str_l macros
  2016-08-03 15:38 ` [PATCH 1/8] ARM: assembler: introduce adr_l, ldr_l and str_l macros Ard Biesheuvel
  2016-08-03 16:49   ` Dave Martin
@ 2016-08-03 18:09   ` Russell King - ARM Linux
  2016-08-04  7:40     ` Ard Biesheuvel
  1 sibling, 1 reply; 39+ messages in thread
From: Russell King - ARM Linux @ 2016-08-03 18:09 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, Aug 03, 2016 at 05:38:43PM +0200, Ard Biesheuvel wrote:
> +	add	\dst, pc, #:pc_g0_nc:(\sym) - 8
> +	add	\dst, \dst, #:pc_g1_nc:(\sym) - 4
> +	add	\dst, \dst, #:pc_g2:(\sym)

What's this :pc_g0_nc: stuff?  What binutils versions is it supported
in?  It doesn't appear documented in gas 2.23.91, so I don't think we
can use it.

-- 
RMK's Patch system: http://www.armlinux.org.uk/developer/patches/
FTTC broadband for 0.8mile line: currently at 9.6Mbps down 400kbps up
according to speedtest.net.

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH 0/8] ARM: clean up PC-relative arithmetic
  2016-08-03 15:38 [PATCH 0/8] ARM: clean up PC-relative arithmetic Ard Biesheuvel
                   ` (7 preceding siblings ...)
  2016-08-03 15:38 ` [PATCH 8/8] ARM: head.S: use PC relative insn sequence to calculate PHYS_OFFSET Ard Biesheuvel
@ 2016-08-03 18:17 ` Russell King - ARM Linux
  2016-08-04  7:17   ` Ard Biesheuvel
  2016-08-03 18:27 ` Nicolas Pitre
  9 siblings, 1 reply; 39+ messages in thread
From: Russell King - ARM Linux @ 2016-08-03 18:17 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, Aug 03, 2016 at 05:38:42PM +0200, Ard Biesheuvel wrote:
> There are various places in the ARM kernel where the following pattern
> is used to create a PC-relative reference that is valid even before the
> MMU is on:
> 
>      adr    rX, 1f
>      ldr    rY, [rX]
>      add    rX, rX, rY
>      ...
>   1: .long  <symbol> - .   
> 
> or
>      adr    rX, 1f
>      ldmia  rX, {rY .. rY+n}
>      sub    rX, rX, rY
>      add    rY+1, rY+1, rX
>      add    rY+2, rY+2, rX
>      ...
>   1: .long  .
>      .long  <symbolY>
>      .long  <symbolY+1>
>      ...
> 
> Both cases can be greatly simplified by letting the linker do the
> calculations for us. This series implements adr_l, ldr_l and str_l
> macros, and uses them to simplify a couple of instances of the above
> patterns.

I don't buy that argument, sorry, and the argument is actually wrong.
No, we're _not_ letting the linker do the calculations for us, we're
letting the linker do _some_ of the calculation, but not all.

What you're replacing the above with is stuff like (I guess, because
I've no idea what this :pc_g0: notation is):

	add	rX, pc, #(sym - . - 8) & 0xff
	add	rX, rX, #(sym - . - 4) & 0xff00
	add	rX, rX, #(sym - .) & 0xff0000

which I think is a more complex (and less obvious) way to calculate it.
It's also buggy when we end up with a relative offset greater than 16MB,
which we have in multi-zImage kernels.

So no, I don't like this at all.

-- 
RMK's Patch system: http://www.armlinux.org.uk/developer/patches/
FTTC broadband for 0.8mile line: currently at 9.6Mbps down 400kbps up
according to speedtest.net.

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH 0/8] ARM: clean up PC-relative arithmetic
  2016-08-03 15:38 [PATCH 0/8] ARM: clean up PC-relative arithmetic Ard Biesheuvel
                   ` (8 preceding siblings ...)
  2016-08-03 18:17 ` [PATCH 0/8] ARM: clean up PC-relative arithmetic Russell King - ARM Linux
@ 2016-08-03 18:27 ` Nicolas Pitre
  2016-08-04 14:11   ` Ard Biesheuvel
  9 siblings, 1 reply; 39+ messages in thread
From: Nicolas Pitre @ 2016-08-03 18:27 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, 3 Aug 2016, Ard Biesheuvel wrote:

> There are various places in the ARM kernel where the following pattern
> is used to create a PC-relative reference that is valid even before the
> MMU is on:
> 
>      adr    rX, 1f
>      ldr    rY, [rX]
>      add    rX, rX, rY
>      ...
>   1: .long  <symbol> - .   
> 
> or
>      adr    rX, 1f
>      ldmia  rX, {rY .. rY+n}
>      sub    rX, rX, rY
>      add    rY+1, rY+1, rX
>      add    rY+2, rY+2, rX
>      ...
>   1: .long  .
>      .long  <symbolY>
>      .long  <symbolY+1>
>      ...
> 
> Both cases can be greatly simplified by letting the linker do the
> calculations for us. This series implements adr_l, ldr_l and str_l
> macros, and uses them to simplify a couple of instances of the above
> patterns.

I echo Dave's concern about the availability of the :pc_g*_nc: relocs.

Could this be used to solve the XIP translation problem in some 
universal way? Right now only a few cases are covered. For reference, see:

http://lists.infradead.org/pipermail/linux-arm-kernel/2015-November/386273.html

commit d7811455 ("ARM: 8512/1: proc-v7.S: Adjust stack address when XIP_KERNEL")

commit 8ff97fa3 ("ARM: make the physical-relative calculation more obvious")


Nicolas

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH 0/8] ARM: clean up PC-relative arithmetic
  2016-08-03 18:17 ` [PATCH 0/8] ARM: clean up PC-relative arithmetic Russell King - ARM Linux
@ 2016-08-04  7:17   ` Ard Biesheuvel
  2016-08-04  9:49     ` Russell King - ARM Linux
  0 siblings, 1 reply; 39+ messages in thread
From: Ard Biesheuvel @ 2016-08-04  7:17 UTC (permalink / raw)
  To: linux-arm-kernel

On 3 August 2016 at 20:17, Russell King - ARM Linux
<linux@armlinux.org.uk> wrote:
> On Wed, Aug 03, 2016 at 05:38:42PM +0200, Ard Biesheuvel wrote:
>> There are various places in the ARM kernel where the following pattern
>> is used to create a PC-relative reference that is valid even before the
>> MMU is on:
>>
>>      adr    rX, 1f
>>      ldr    rY, [rX]
>>      add    rX, rX, rY
>>      ...
>>   1: .long  <symbol> - .
>>
>> or
>>      adr    rX, 1f
>>      ldmia  rX, {rY .. rY+n}
>>      sub    rX, rX, rY
>>      add    rY+1, rY+1, rX
>>      add    rY+2, rY+2, rX
>>      ...
>>   1: .long  .
>>      .long  <symbolY>
>>      .long  <symbolY+1>
>>      ...
>>
>> Both cases can be greatly simplified by letting the linker do the
>> calculations for us. This series implements adr_l, ldr_l and str_l
>> macros, and uses them to simplify a couple of instances of the above
>> patterns.
>
> I don't buy that argument, sorry, and the argument is actually wrong.
> No, we're _not_ letting the linker do the calculations for us, we're
> letting the linker do _some_ of the calculation, but not all.
>
> What you're replacing the above with is stuff like (I guess, because
> I've no idea what this :pc_g0: notation is):
>
>         add     rX, pc, #(sym - . - 8) & 0xff
>         add     rX, rX, #(sym - . - 4) & 0xff00
>         add     rX, rX, #(sym - .) & 0xff0000
>
> which I think is a more complex (and less obvious) way to calculate it.
> It's also buggy when we end up with a relative offset greater than 16MB,
> which we have in multi-zImage kernels.
>

Even if you think this is a more complex way to calculate it, at least
it is encapsulated in a single macro instead of having similar but not
identical open coded instances all over the place.

As for the range: the ldr/str variants have 28 bits of range (2x
scaled 8 bit immediate for the adds and a single unscaled 12 bit
immediate for the ldr/str). The adr variant has 26 bits (3x scaled
immediate counting from bit 2) range for word aligned symbols, which
gives us +/- 64 MB, which should be plenty. The only pathological
outlier is allyesconfig, but that uses Thumb2 anyway.

The relocations documented here
http://infocenter.arm.com/help/topic/com.arm.doc.ihi0044f/IHI0044F_aaelf.pdf

> So no, I don't like this at all
>

Noted

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH 1/8] ARM: assembler: introduce adr_l, ldr_l and str_l macros
  2016-08-03 16:49   ` Dave Martin
@ 2016-08-04  7:40     ` Ard Biesheuvel
  2016-08-04  9:44       ` Dave Martin
  2016-08-04  7:43     ` Ard Biesheuvel
  1 sibling, 1 reply; 39+ messages in thread
From: Ard Biesheuvel @ 2016-08-04  7:40 UTC (permalink / raw)
  To: linux-arm-kernel

On 3 August 2016 at 18:49, Dave Martin <Dave.Martin@arm.com> wrote:
> On Wed, Aug 03, 2016 at 05:38:43PM +0200, Ard Biesheuvel wrote:
>> Like arm64, ARM supports position independent code sequences that
>> produce symbol references with a greater reach than the ordinary
>> adr/ldr instructions.
>>
>> Introduce adr_l, that takes the address of a symbol in a PC relative
>> manner, and ldr_l/str_l that perform a 32-bit loads/stores from a
>> PC-relative offset.
>>
>> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
>> ---
>>  arch/arm/include/asm/assembler.h | 59 ++++++++++++++++++++
>>  1 file changed, 59 insertions(+)
>>
>> diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
>> index 4eaea2173bf8..e1450889f96b 100644
>> --- a/arch/arm/include/asm/assembler.h
>> +++ b/arch/arm/include/asm/assembler.h
>> @@ -512,4 +512,63 @@ THUMB(   orr     \reg , \reg , #PSR_T_BIT        )
>>  #endif
>>       .endm
>>
>> +/*
>> + * Pseudo-ops for PC-relative adr/ldr/str <reg>, <symbol> operations
>> + */
>> +
>> +     /*
>> +      * @dst: destination register
>> +      * @sym: name of the symbol
>> +      */
>> +     .macro  adr_l, dst, sym
>> +#ifdef CONFIG_THUMB2_KERNEL
>> +     movw    \dst, #:lower16:(\sym) - (. + 12)
>> +     movt    \dst, #:upper16:(\sym) - (. + 8)
>> +     add     \dst, \dst, pc
>
> pc always reads as the address of that add plus 4, right?  I remember
> some special case where it gets rounded down to a 4-byte boundary, but
> IIRC that only applies to certain ldr ..., [pc, ...] forms.
>

The 4-byte rounding occurs when the linker (or the linux module
loader) encounters a bl instruction in Thumb that needs to be fixed up
to blx if the target is ARM.

>> +#else
>> +     add     \dst, pc, #:pc_g0_nc:(\sym) - 8
>> +     add     \dst, \dst, #:pc_g1_nc:(\sym) - 4
>> +     add     \dst, \dst, #:pc_g2:(\sym)
>
> Whoah.  I've never seen this syntax before...  does this work for any
> named reloc, or just for certain blessed relocs?  (I'm also _assuming_
> the assembler support for this functionality is ancient -- if not,
> there may be toolchain compatibility issues.)
>
> Based on my understanding of how these relocs work, this should do the
> right thing, though.
>
>
> Second question: for arm, this reduces the range addressable to
> pc +/- 26-bit offset (24-bit if sym is not word aligned, but that
> probably never happens).
>
> I can't remember the de facto limit on the size of vmlinux for arm --
> are you sure this extra limitation won't break some cases of huge
> initramfs where adr_l gets used for cross-section references?
>

Yes, that seems a valid concern. We have +/- 64 MB for the adr_l
variant (as you say, for word aligned symbols), but this may be
insufficient. The ldr/str variants don't have the same limitation.

> (For Thumb2, :lower16:/:upper16: give a full 32-bit range, so no problem
> there -- sad that this isn't available before ARMv7).
>

Indeed.

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH 1/8] ARM: assembler: introduce adr_l, ldr_l and str_l macros
  2016-08-03 18:09   ` Russell King - ARM Linux
@ 2016-08-04  7:40     ` Ard Biesheuvel
  2016-08-04  9:38       ` Dave Martin
  0 siblings, 1 reply; 39+ messages in thread
From: Ard Biesheuvel @ 2016-08-04  7:40 UTC (permalink / raw)
  To: linux-arm-kernel

On 3 August 2016 at 20:09, Russell King - ARM Linux
<linux@armlinux.org.uk> wrote:
> On Wed, Aug 03, 2016 at 05:38:43PM +0200, Ard Biesheuvel wrote:
>> +     add     \dst, pc, #:pc_g0_nc:(\sym) - 8
>> +     add     \dst, \dst, #:pc_g1_nc:(\sym) - 4
>> +     add     \dst, \dst, #:pc_g2:(\sym)
>
> What's this :pc_g0_nc: stuff?  What binutils versions is it supported
> in?  It doesn't appear documented in gas 2.23.91, so I don't think we
> can use it.
>

binutils 2.18 and up

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH 1/8] ARM: assembler: introduce adr_l, ldr_l and str_l macros
  2016-08-03 16:49   ` Dave Martin
  2016-08-04  7:40     ` Ard Biesheuvel
@ 2016-08-04  7:43     ` Ard Biesheuvel
  1 sibling, 0 replies; 39+ messages in thread
From: Ard Biesheuvel @ 2016-08-04  7:43 UTC (permalink / raw)
  To: linux-arm-kernel

On 3 August 2016 at 18:49, Dave Martin <Dave.Martin@arm.com> wrote:
> On Wed, Aug 03, 2016 at 05:38:43PM +0200, Ard Biesheuvel wrote:
>> Like arm64, ARM supports position independent code sequences that
>> produce symbol references with a greater reach than the ordinary
>> adr/ldr instructions.
>>
>> Introduce adr_l, that takes the address of a symbol in a PC relative
>> manner, and ldr_l/str_l that perform a 32-bit loads/stores from a
>> PC-relative offset.
>>
>> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
>> ---
>>  arch/arm/include/asm/assembler.h | 59 ++++++++++++++++++++
>>  1 file changed, 59 insertions(+)
>>
>> diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
>> index 4eaea2173bf8..e1450889f96b 100644
>> --- a/arch/arm/include/asm/assembler.h
>> +++ b/arch/arm/include/asm/assembler.h
>> @@ -512,4 +512,63 @@ THUMB(   orr     \reg , \reg , #PSR_T_BIT        )
>>  #endif
>>       .endm
>>
>> +/*
>> + * Pseudo-ops for PC-relative adr/ldr/str <reg>, <symbol> operations
>> + */
>> +
>> +     /*
>> +      * @dst: destination register
>> +      * @sym: name of the symbol
>> +      */
>> +     .macro  adr_l, dst, sym
>> +#ifdef CONFIG_THUMB2_KERNEL
>> +     movw    \dst, #:lower16:(\sym) - (. + 12)
>> +     movt    \dst, #:upper16:(\sym) - (. + 8)
>> +     add     \dst, \dst, pc
>
> pc always reads as the address of that add plus 4, right?  I remember
> some special case where it gets rounded down to a 4-byte boundary, but
> IIRC that only applies to certain ldr ..., [pc, ...] forms.
>
>> +#else
>> +     add     \dst, pc, #:pc_g0_nc:(\sym) - 8
>> +     add     \dst, \dst, #:pc_g1_nc:(\sym) - 4
>> +     add     \dst, \dst, #:pc_g2:(\sym)
>
> Whoah.  I've never seen this syntax before...  does this work for any
> named reloc, or just for certain blessed relocs?  (I'm also _assuming_
> the assembler support for this functionality is ancient -- if not,
> there may be toolchain compatibility issues.)
>

(Missed this question in my first reply)

There is no 1:1 mapping between these tokens and the actual relocs.
For instance, pc_g2 will be converted into

R_ARM_ALU_PC_G2 for an add instruction
R_ARM_LDR_PC_G2 for an ldr/str instruction
etc etc

so that means relocations have to be 'blessed' in some way, indeed.

You can still emit arbitrary relocs if you wanted, though, using the
.reloc pseudo op in GAS

-- 
Ard.

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH 1/8] ARM: assembler: introduce adr_l, ldr_l and str_l macros
  2016-08-04  7:40     ` Ard Biesheuvel
@ 2016-08-04  9:38       ` Dave Martin
  2016-08-04 10:12         ` Ard Biesheuvel
  0 siblings, 1 reply; 39+ messages in thread
From: Dave Martin @ 2016-08-04  9:38 UTC (permalink / raw)
  To: linux-arm-kernel

On Thu, Aug 04, 2016 at 09:40:50AM +0200, Ard Biesheuvel wrote:
> On 3 August 2016 at 20:09, Russell King - ARM Linux
> <linux@armlinux.org.uk> wrote:
> > On Wed, Aug 03, 2016 at 05:38:43PM +0200, Ard Biesheuvel wrote:
> >> +     add     \dst, pc, #:pc_g0_nc:(\sym) - 8
> >> +     add     \dst, \dst, #:pc_g1_nc:(\sym) - 4
> >> +     add     \dst, \dst, #:pc_g2:(\sym)
> >
> > What's this :pc_g0_nc: stuff?  What binutils versions is it supported
> > in?  It doesn't appear documented in gas 2.23.91, so I don't think we
> > can use it.
> >
> 
> binutils 2.18 and up

I think this was contemporary with GCC 4.<some middling version>, which
may be newer than the minimimum compiler we require for the kernel,
particular for earlier arch versions.

Using .reloc probably allows the same thing to be done in a more
backwards-compatible way.

Cheers
---Dave

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH 1/8] ARM: assembler: introduce adr_l, ldr_l and str_l macros
  2016-08-04  7:40     ` Ard Biesheuvel
@ 2016-08-04  9:44       ` Dave Martin
  2016-08-04 10:34         ` Ard Biesheuvel
  0 siblings, 1 reply; 39+ messages in thread
From: Dave Martin @ 2016-08-04  9:44 UTC (permalink / raw)
  To: linux-arm-kernel

On Thu, Aug 04, 2016 at 09:40:31AM +0200, Ard Biesheuvel wrote:
> On 3 August 2016 at 18:49, Dave Martin <Dave.Martin@arm.com> wrote:
> > On Wed, Aug 03, 2016 at 05:38:43PM +0200, Ard Biesheuvel wrote:
> >> Like arm64, ARM supports position independent code sequences that
> >> produce symbol references with a greater reach than the ordinary
> >> adr/ldr instructions.
> >>
> >> Introduce adr_l, that takes the address of a symbol in a PC relative
> >> manner, and ldr_l/str_l that perform a 32-bit loads/stores from a
> >> PC-relative offset.
> >>
> >> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
> >> ---
> >>  arch/arm/include/asm/assembler.h | 59 ++++++++++++++++++++
> >>  1 file changed, 59 insertions(+)
> >>
> >> diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
> >> index 4eaea2173bf8..e1450889f96b 100644
> >> --- a/arch/arm/include/asm/assembler.h
> >> +++ b/arch/arm/include/asm/assembler.h
> >> @@ -512,4 +512,63 @@ THUMB(   orr     \reg , \reg , #PSR_T_BIT        )
> >>  #endif
> >>       .endm
> >>
> >> +/*
> >> + * Pseudo-ops for PC-relative adr/ldr/str <reg>, <symbol> operations
> >> + */
> >> +
> >> +     /*
> >> +      * @dst: destination register
> >> +      * @sym: name of the symbol
> >> +      */
> >> +     .macro  adr_l, dst, sym
> >> +#ifdef CONFIG_THUMB2_KERNEL
> >> +     movw    \dst, #:lower16:(\sym) - (. + 12)
> >> +     movt    \dst, #:upper16:(\sym) - (. + 8)
> >> +     add     \dst, \dst, pc
> >
> > pc always reads as the address of that add plus 4, right?  I remember
> > some special case where it gets rounded down to a 4-byte boundary, but
> > IIRC that only applies to certain ldr ..., [pc, ...] forms.
> >
> 
> The 4-byte rounding occurs when the linker (or the linux module
> loader) encounters a bl instruction in Thumb that needs to be fixed up
> to blx if the target is ARM.

I think there are a few cases like this -- so long as this add isn't
affected.

> 
> >> +#else
> >> +     add     \dst, pc, #:pc_g0_nc:(\sym) - 8
> >> +     add     \dst, \dst, #:pc_g1_nc:(\sym) - 4
> >> +     add     \dst, \dst, #:pc_g2:(\sym)
> >
> > Whoah.  I've never seen this syntax before...  does this work for any
> > named reloc, or just for certain blessed relocs?  (I'm also _assuming_
> > the assembler support for this functionality is ancient -- if not,
> > there may be toolchain compatibility issues.)
> >
> > Based on my understanding of how these relocs work, this should do the
> > right thing, though.
> >
> >
> > Second question: for arm, this reduces the range addressable to
> > pc +/- 26-bit offset (24-bit if sym is not word aligned, but that
> > probably never happens).
> >
> > I can't remember the de facto limit on the size of vmlinux for arm --
> > are you sure this extra limitation won't break some cases of huge
> > initramfs where adr_l gets used for cross-section references?
> >
> 
> Yes, that seems a valid concern. We have +/- 64 MB for the adr_l
> variant (as you say, for word aligned symbols), but this may be
> insufficient. The ldr/str variants don't have the same limitation.

True, but they're still limited, I think in effect to +/- 256 MB.

[...]

Cheers
---Dave

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH 0/8] ARM: clean up PC-relative arithmetic
  2016-08-04  7:17   ` Ard Biesheuvel
@ 2016-08-04  9:49     ` Russell King - ARM Linux
  2016-08-04  9:54       ` Ard Biesheuvel
  0 siblings, 1 reply; 39+ messages in thread
From: Russell King - ARM Linux @ 2016-08-04  9:49 UTC (permalink / raw)
  To: linux-arm-kernel

On Thu, Aug 04, 2016 at 09:17:04AM +0200, Ard Biesheuvel wrote:
> On 3 August 2016 at 20:17, Russell King - ARM Linux
> <linux@armlinux.org.uk> wrote:
> > I don't buy that argument, sorry, and the argument is actually wrong.
> > No, we're _not_ letting the linker do the calculations for us, we're
> > letting the linker do _some_ of the calculation, but not all.
> >
> > What you're replacing the above with is stuff like (I guess, because
> > I've no idea what this :pc_g0: notation is):
> >
> >         add     rX, pc, #(sym - . - 8) & 0xff
> >         add     rX, rX, #(sym - . - 4) & 0xff00
> >         add     rX, rX, #(sym - .) & 0xff0000
> >
> > which I think is a more complex (and less obvious) way to calculate it.
> > It's also buggy when we end up with a relative offset greater than 16MB,
> > which we have in multi-zImage kernels.
> >
> 
> Even if you think this is a more complex way to calculate it, at least
> it is encapsulated in a single macro instead of having similar but not
> identical open coded instances all over the place.

... and, it may come as a shocker, but I don't have a problem with
that.

> As for the range: the ldr/str variants have 28 bits of range (2x
> scaled 8 bit immediate for the adds and a single unscaled 12 bit
> immediate for the ldr/str). The adr variant has 26 bits (3x scaled
> immediate counting from bit 2) range for word aligned symbols, which
> gives us +/- 64 MB, which should be plenty. The only pathological
> outlier is allyesconfig, but that uses Thumb2 anyway.

Our existing code allows for a range of the full address space - the only
thing it relies upon is that the literal data is placed within reach of
the code - which it will be, because it's always placed near the code
which is using it.

> The relocations documented here
> http://infocenter.arm.com/help/topic/com.arm.doc.ihi0044f/IHI0044F_aaelf.pdf

Right, so it's an EABI thing, and I guess you haven't tested OABI
builds, where I suspect these relocations aren't supported.

-- 
RMK's Patch system: http://www.armlinux.org.uk/developer/patches/
FTTC broadband for 0.8mile line: currently at 9.6Mbps down 400kbps up
according to speedtest.net.

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH 0/8] ARM: clean up PC-relative arithmetic
  2016-08-04  9:49     ` Russell King - ARM Linux
@ 2016-08-04  9:54       ` Ard Biesheuvel
  2016-08-04 10:03         ` Russell King - ARM Linux
  0 siblings, 1 reply; 39+ messages in thread
From: Ard Biesheuvel @ 2016-08-04  9:54 UTC (permalink / raw)
  To: linux-arm-kernel

On 4 August 2016 at 11:49, Russell King - ARM Linux
<linux@armlinux.org.uk> wrote:
> On Thu, Aug 04, 2016 at 09:17:04AM +0200, Ard Biesheuvel wrote:
>> On 3 August 2016 at 20:17, Russell King - ARM Linux
>> <linux@armlinux.org.uk> wrote:
>> > I don't buy that argument, sorry, and the argument is actually wrong.
>> > No, we're _not_ letting the linker do the calculations for us, we're
>> > letting the linker do _some_ of the calculation, but not all.
>> >
>> > What you're replacing the above with is stuff like (I guess, because
>> > I've no idea what this :pc_g0: notation is):
>> >
>> >         add     rX, pc, #(sym - . - 8) & 0xff
>> >         add     rX, rX, #(sym - . - 4) & 0xff00
>> >         add     rX, rX, #(sym - .) & 0xff0000
>> >
>> > which I think is a more complex (and less obvious) way to calculate it.
>> > It's also buggy when we end up with a relative offset greater than 16MB,
>> > which we have in multi-zImage kernels.
>> >
>>
>> Even if you think this is a more complex way to calculate it, at least
>> it is encapsulated in a single macro instead of having similar but not
>> identical open coded instances all over the place.
>
> ... and, it may come as a shocker, but I don't have a problem with
> that.
>
>> As for the range: the ldr/str variants have 28 bits of range (2x
>> scaled 8 bit immediate for the adds and a single unscaled 12 bit
>> immediate for the ldr/str). The adr variant has 26 bits (3x scaled
>> immediate counting from bit 2) range for word aligned symbols, which
>> gives us +/- 64 MB, which should be plenty. The only pathological
>> outlier is allyesconfig, but that uses Thumb2 anyway.
>
> Our existing code allows for a range of the full address space - the only
> thing it relies upon is that the literal data is placed within reach of
> the code - which it will be, because it's always placed near the code
> which is using it.
>
>> The relocations documented here
>> http://infocenter.arm.com/help/topic/com.arm.doc.ihi0044f/IHI0044F_aaelf.pdf
>
> Right, so it's an EABI thing, and I guess you haven't tested OABI
> builds, where I suspect these relocations aren't supported.
>

I suppose that's a fair point. But then, I'm only 40 so I am too young
to remember this OABI stuff anyway. Does it require GCC 2.95 from your
toolchain museum?

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH 0/8] ARM: clean up PC-relative arithmetic
  2016-08-04  9:54       ` Ard Biesheuvel
@ 2016-08-04 10:03         ` Russell King - ARM Linux
  2016-08-04 10:11           ` Ard Biesheuvel
  0 siblings, 1 reply; 39+ messages in thread
From: Russell King - ARM Linux @ 2016-08-04 10:03 UTC (permalink / raw)
  To: linux-arm-kernel

On Thu, Aug 04, 2016 at 11:54:25AM +0200, Ard Biesheuvel wrote:
> On 4 August 2016 at 11:49, Russell King - ARM Linux
> <linux@armlinux.org.uk> wrote:
> > On Thu, Aug 04, 2016 at 09:17:04AM +0200, Ard Biesheuvel wrote:
> >> On 3 August 2016 at 20:17, Russell King - ARM Linux
> >> <linux@armlinux.org.uk> wrote:
> >> > I don't buy that argument, sorry, and the argument is actually wrong.
> >> > No, we're _not_ letting the linker do the calculations for us, we're
> >> > letting the linker do _some_ of the calculation, but not all.
> >> >
> >> > What you're replacing the above with is stuff like (I guess, because
> >> > I've no idea what this :pc_g0: notation is):
> >> >
> >> >         add     rX, pc, #(sym - . - 8) & 0xff
> >> >         add     rX, rX, #(sym - . - 4) & 0xff00
> >> >         add     rX, rX, #(sym - .) & 0xff0000
> >> >
> >> > which I think is a more complex (and less obvious) way to calculate it.
> >> > It's also buggy when we end up with a relative offset greater than 16MB,
> >> > which we have in multi-zImage kernels.
> >> >
> >>
> >> Even if you think this is a more complex way to calculate it, at least
> >> it is encapsulated in a single macro instead of having similar but not
> >> identical open coded instances all over the place.
> >
> > ... and, it may come as a shocker, but I don't have a problem with
> > that.
> >
> >> As for the range: the ldr/str variants have 28 bits of range (2x
> >> scaled 8 bit immediate for the adds and a single unscaled 12 bit
> >> immediate for the ldr/str). The adr variant has 26 bits (3x scaled
> >> immediate counting from bit 2) range for word aligned symbols, which
> >> gives us +/- 64 MB, which should be plenty. The only pathological
> >> outlier is allyesconfig, but that uses Thumb2 anyway.
> >
> > Our existing code allows for a range of the full address space - the only
> > thing it relies upon is that the literal data is placed within reach of
> > the code - which it will be, because it's always placed near the code
> > which is using it.
> >
> >> The relocations documented here
> >> http://infocenter.arm.com/help/topic/com.arm.doc.ihi0044f/IHI0044F_aaelf.pdf
> >
> > Right, so it's an EABI thing, and I guess you haven't tested OABI
> > builds, where I suspect these relocations aren't supported.
> >
> 
> I suppose that's a fair point. But then, I'm only 40 so I am too young
> to remember this OABI stuff anyway. Does it require GCC 2.95 from your
> toolchain museum?

I'm sorry, but that's really no excuse, we're of similar ages, so...
<expletive deleted>.  And GCC 4 is capable of building OABI.

OABI is going to have to live for a long time yet, I still rely on
OABI - and this is something that most people ignored when I raised
it in the EABI discussions - when I said that there needed to be a
sane transition path between OABI and EABI which didn't involve
"shut the machine down, totally replace the rootfs".  I'm not at
liberty to shut my machines down while I rebuild everything that's
on them as EABI.

So, OABI support will live on for as long as I'm involved in Linux
and have a need for it.  _All_ my pre-ARMv6 machines (which run
everything I rely upon) are OABI.

-- 
RMK's Patch system: http://www.armlinux.org.uk/developer/patches/
FTTC broadband for 0.8mile line: currently at 9.6Mbps down 400kbps up
according to speedtest.net.

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH 0/8] ARM: clean up PC-relative arithmetic
  2016-08-04 10:03         ` Russell King - ARM Linux
@ 2016-08-04 10:11           ` Ard Biesheuvel
  2016-08-04 10:14             ` Russell King - ARM Linux
  0 siblings, 1 reply; 39+ messages in thread
From: Ard Biesheuvel @ 2016-08-04 10:11 UTC (permalink / raw)
  To: linux-arm-kernel

On 4 August 2016 at 12:03, Russell King - ARM Linux
<linux@armlinux.org.uk> wrote:
> On Thu, Aug 04, 2016 at 11:54:25AM +0200, Ard Biesheuvel wrote:
>> On 4 August 2016 at 11:49, Russell King - ARM Linux
>> <linux@armlinux.org.uk> wrote:
>> > On Thu, Aug 04, 2016 at 09:17:04AM +0200, Ard Biesheuvel wrote:
>> >> On 3 August 2016 at 20:17, Russell King - ARM Linux
>> >> <linux@armlinux.org.uk> wrote:
>> >> > I don't buy that argument, sorry, and the argument is actually wrong.
>> >> > No, we're _not_ letting the linker do the calculations for us, we're
>> >> > letting the linker do _some_ of the calculation, but not all.
>> >> >
>> >> > What you're replacing the above with is stuff like (I guess, because
>> >> > I've no idea what this :pc_g0: notation is):
>> >> >
>> >> >         add     rX, pc, #(sym - . - 8) & 0xff
>> >> >         add     rX, rX, #(sym - . - 4) & 0xff00
>> >> >         add     rX, rX, #(sym - .) & 0xff0000
>> >> >
>> >> > which I think is a more complex (and less obvious) way to calculate it.
>> >> > It's also buggy when we end up with a relative offset greater than 16MB,
>> >> > which we have in multi-zImage kernels.
>> >> >
>> >>
>> >> Even if you think this is a more complex way to calculate it, at least
>> >> it is encapsulated in a single macro instead of having similar but not
>> >> identical open coded instances all over the place.
>> >
>> > ... and, it may come as a shocker, but I don't have a problem with
>> > that.
>> >
>> >> As for the range: the ldr/str variants have 28 bits of range (2x
>> >> scaled 8 bit immediate for the adds and a single unscaled 12 bit
>> >> immediate for the ldr/str). The adr variant has 26 bits (3x scaled
>> >> immediate counting from bit 2) range for word aligned symbols, which
>> >> gives us +/- 64 MB, which should be plenty. The only pathological
>> >> outlier is allyesconfig, but that uses Thumb2 anyway.
>> >
>> > Our existing code allows for a range of the full address space - the only
>> > thing it relies upon is that the literal data is placed within reach of
>> > the code - which it will be, because it's always placed near the code
>> > which is using it.
>> >
>> >> The relocations documented here
>> >> http://infocenter.arm.com/help/topic/com.arm.doc.ihi0044f/IHI0044F_aaelf.pdf
>> >
>> > Right, so it's an EABI thing, and I guess you haven't tested OABI
>> > builds, where I suspect these relocations aren't supported.
>> >
>>
>> I suppose that's a fair point. But then, I'm only 40 so I am too young
>> to remember this OABI stuff anyway. Does it require GCC 2.95 from your
>> toolchain museum?
>
> I'm sorry, but that's really no excuse, we're of similar ages, so...
> <expletive deleted>.

Just countering the unnecessary sarcasm ... :-)

>  And GCC 4 is capable of building OABI.
>
> OABI is going to have to live for a long time yet, I still rely on
> OABI - and this is something that most people ignored when I raised
> it in the EABI discussions - when I said that there needed to be a
> sane transition path between OABI and EABI which didn't involve
> "shut the machine down, totally replace the rootfs".  I'm not at
> liberty to shut my machines down while I rebuild everything that's
> on them as EABI.
>
> So, OABI support will live on for as long as I'm involved in Linux
> and have a need for it.  _All_ my pre-ARMv6 machines (which run
> everything I rely upon) are OABI.
>

But seriously, it appears that the group relocations are simply too
problematic to support at the moment, and I don't see a way to fix
that.

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH 1/8] ARM: assembler: introduce adr_l, ldr_l and str_l macros
  2016-08-04  9:38       ` Dave Martin
@ 2016-08-04 10:12         ` Ard Biesheuvel
  2016-08-04 11:08           ` Dave Martin
  0 siblings, 1 reply; 39+ messages in thread
From: Ard Biesheuvel @ 2016-08-04 10:12 UTC (permalink / raw)
  To: linux-arm-kernel

On 4 August 2016 at 11:38, Dave Martin <Dave.Martin@arm.com> wrote:
> On Thu, Aug 04, 2016 at 09:40:50AM +0200, Ard Biesheuvel wrote:
>> On 3 August 2016 at 20:09, Russell King - ARM Linux
>> <linux@armlinux.org.uk> wrote:
>> > On Wed, Aug 03, 2016 at 05:38:43PM +0200, Ard Biesheuvel wrote:
>> >> +     add     \dst, pc, #:pc_g0_nc:(\sym) - 8
>> >> +     add     \dst, \dst, #:pc_g1_nc:(\sym) - 4
>> >> +     add     \dst, \dst, #:pc_g2:(\sym)
>> >
>> > What's this :pc_g0_nc: stuff?  What binutils versions is it supported
>> > in?  It doesn't appear documented in gas 2.23.91, so I don't think we
>> > can use it.
>> >
>>
>> binutils 2.18 and up
>
> I think this was contemporary with GCC 4.<some middling version>, which
> may be newer than the minimimum compiler we require for the kernel,
> particular for earlier arch versions.
>
> Using .reloc probably allows the same thing to be done in a more
> backwards-compatible way.
>

I don't see how LD would know how to handle these relocations if we do
manage to emit them from GAS in this way.

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH 0/8] ARM: clean up PC-relative arithmetic
  2016-08-04 10:11           ` Ard Biesheuvel
@ 2016-08-04 10:14             ` Russell King - ARM Linux
  0 siblings, 0 replies; 39+ messages in thread
From: Russell King - ARM Linux @ 2016-08-04 10:14 UTC (permalink / raw)
  To: linux-arm-kernel

On Thu, Aug 04, 2016 at 12:11:23PM +0200, Ard Biesheuvel wrote:
> On 4 August 2016 at 12:03, Russell King - ARM Linux
> <linux@armlinux.org.uk> wrote:
> > On Thu, Aug 04, 2016 at 11:54:25AM +0200, Ard Biesheuvel wrote:
> >> On 4 August 2016 at 11:49, Russell King - ARM Linux
> >> <linux@armlinux.org.uk> wrote:
> >> > On Thu, Aug 04, 2016 at 09:17:04AM +0200, Ard Biesheuvel wrote:
> >> >> On 3 August 2016 at 20:17, Russell King - ARM Linux
> >> >> <linux@armlinux.org.uk> wrote:
> >> >> > I don't buy that argument, sorry, and the argument is actually wrong.
> >> >> > No, we're _not_ letting the linker do the calculations for us, we're
> >> >> > letting the linker do _some_ of the calculation, but not all.
> >> >> >
> >> >> > What you're replacing the above with is stuff like (I guess, because
> >> >> > I've no idea what this :pc_g0: notation is):
> >> >> >
> >> >> >         add     rX, pc, #(sym - . - 8) & 0xff
> >> >> >         add     rX, rX, #(sym - . - 4) & 0xff00
> >> >> >         add     rX, rX, #(sym - .) & 0xff0000
> >> >> >
> >> >> > which I think is a more complex (and less obvious) way to calculate it.
> >> >> > It's also buggy when we end up with a relative offset greater than 16MB,
> >> >> > which we have in multi-zImage kernels.
> >> >> >
> >> >>
> >> >> Even if you think this is a more complex way to calculate it, at least
> >> >> it is encapsulated in a single macro instead of having similar but not
> >> >> identical open coded instances all over the place.
> >> >
> >> > ... and, it may come as a shocker, but I don't have a problem with
> >> > that.
> >> >
> >> >> As for the range: the ldr/str variants have 28 bits of range (2x
> >> >> scaled 8 bit immediate for the adds and a single unscaled 12 bit
> >> >> immediate for the ldr/str). The adr variant has 26 bits (3x scaled
> >> >> immediate counting from bit 2) range for word aligned symbols, which
> >> >> gives us +/- 64 MB, which should be plenty. The only pathological
> >> >> outlier is allyesconfig, but that uses Thumb2 anyway.
> >> >
> >> > Our existing code allows for a range of the full address space - the only
> >> > thing it relies upon is that the literal data is placed within reach of
> >> > the code - which it will be, because it's always placed near the code
> >> > which is using it.
> >> >
> >> >> The relocations documented here
> >> >> http://infocenter.arm.com/help/topic/com.arm.doc.ihi0044f/IHI0044F_aaelf.pdf
> >> >
> >> > Right, so it's an EABI thing, and I guess you haven't tested OABI
> >> > builds, where I suspect these relocations aren't supported.
> >> >
> >>
> >> I suppose that's a fair point. But then, I'm only 40 so I am too young
> >> to remember this OABI stuff anyway. Does it require GCC 2.95 from your
> >> toolchain museum?
> >
> > I'm sorry, but that's really no excuse, we're of similar ages, so...
> > <expletive deleted>.
> 
> Just countering the unnecessary sarcasm ... :-)

There was nothing sarcastic about my reply to you - everything I said
was deadly serious.

-- 
RMK's Patch system: http://www.armlinux.org.uk/developer/patches/
FTTC broadband for 0.8mile line: currently at 9.6Mbps down 400kbps up
according to speedtest.net.

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH 1/8] ARM: assembler: introduce adr_l, ldr_l and str_l macros
  2016-08-04  9:44       ` Dave Martin
@ 2016-08-04 10:34         ` Ard Biesheuvel
  2016-08-04 11:22           ` Dave Martin
  0 siblings, 1 reply; 39+ messages in thread
From: Ard Biesheuvel @ 2016-08-04 10:34 UTC (permalink / raw)
  To: linux-arm-kernel

On 4 August 2016 at 11:44, Dave Martin <Dave.Martin@arm.com> wrote:
> On Thu, Aug 04, 2016 at 09:40:31AM +0200, Ard Biesheuvel wrote:
>> On 3 August 2016 at 18:49, Dave Martin <Dave.Martin@arm.com> wrote:
>> > On Wed, Aug 03, 2016 at 05:38:43PM +0200, Ard Biesheuvel wrote:
[...]
>> >> +#else
>> >> +     add     \dst, pc, #:pc_g0_nc:(\sym) - 8
>> >> +     add     \dst, \dst, #:pc_g1_nc:(\sym) - 4
>> >> +     add     \dst, \dst, #:pc_g2:(\sym)
>> >
>> > Whoah.  I've never seen this syntax before...  does this work for any
>> > named reloc, or just for certain blessed relocs?  (I'm also _assuming_
>> > the assembler support for this functionality is ancient -- if not,
>> > there may be toolchain compatibility issues.)
>> >
>> > Based on my understanding of how these relocs work, this should do the
>> > right thing, though.
>> >
>> >
>> > Second question: for arm, this reduces the range addressable to
>> > pc +/- 26-bit offset (24-bit if sym is not word aligned, but that
>> > probably never happens).
>> >
>> > I can't remember the de facto limit on the size of vmlinux for arm --
>> > are you sure this extra limitation won't break some cases of huge
>> > initramfs where adr_l gets used for cross-section references?
>> >
>>
>> Yes, that seems a valid concern. We have +/- 64 MB for the adr_l
>> variant (as you say, for word aligned symbols), but this may be
>> insufficient. The ldr/str variants don't have the same limitation.
>
> True, but they're still limited, I think in effect to +/- 256 MB.
>

Yes. I managed to hack around the limitation by doing this

.macro adr_fwd, dst, sym, tmp
add \dst, pc, #:pc_g0_nc:(\sym) - 8
add \dst, \dst, #:pc_g1_nc:(\sym) - 4
.reloc ., R_ARM_LDR_PC_G2, \sym
mov \tmp, #0
add \dst, \dst, \tmp
.endm

but it is becoming very hacky, and this only works for forward
references anyway.

So I think this is a dead end. Series withdrawn.

Thanks for your time,
Ard.

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH 1/8] ARM: assembler: introduce adr_l, ldr_l and str_l macros
  2016-08-04 10:12         ` Ard Biesheuvel
@ 2016-08-04 11:08           ` Dave Martin
  2016-08-04 11:10             ` Ard Biesheuvel
  0 siblings, 1 reply; 39+ messages in thread
From: Dave Martin @ 2016-08-04 11:08 UTC (permalink / raw)
  To: linux-arm-kernel

On Thu, Aug 04, 2016 at 12:12:07PM +0200, Ard Biesheuvel wrote:
> On 4 August 2016 at 11:38, Dave Martin <Dave.Martin@arm.com> wrote:
> > On Thu, Aug 04, 2016 at 09:40:50AM +0200, Ard Biesheuvel wrote:
> >> On 3 August 2016 at 20:09, Russell King - ARM Linux
> >> <linux@armlinux.org.uk> wrote:
> >> > On Wed, Aug 03, 2016 at 05:38:43PM +0200, Ard Biesheuvel wrote:
> >> >> +     add     \dst, pc, #:pc_g0_nc:(\sym) - 8
> >> >> +     add     \dst, \dst, #:pc_g1_nc:(\sym) - 4
> >> >> +     add     \dst, \dst, #:pc_g2:(\sym)
> >> >
> >> > What's this :pc_g0_nc: stuff?  What binutils versions is it supported
> >> > in?  It doesn't appear documented in gas 2.23.91, so I don't think we
> >> > can use it.
> >> >
> >>
> >> binutils 2.18 and up
> >
> > I think this was contemporary with GCC 4.<some middling version>, which
> > may be newer than the minimimum compiler we require for the kernel,
> > particular for earlier arch versions.
> >
> > Using .reloc probably allows the same thing to be done in a more
> > backwards-compatible way.
> >
> 
> I don't see how LD would know how to handle these relocations if we do
> manage to emit them from GAS in this way.

0:	add	\dst, pc, #-8
1:	add	\dst, \dst, #-4
2:	add	\dst, \dst, #0

.reloc	0b, R_ARM_ALU_PC_G0_NC, \sym
.reloc	1b, R_ARM_ALU_PC_G1_NC, \sym
.reloc	2b, R_ARM_ALU_PC_G2, \sym

or, for ldr_l:

0:	add	\dst, pc, #-8
1:	add	\dst, \dst, #-4
2:	ldr	[\dst, #0]

.reloc	0b, R_ARM_ALU_PC_G0_NC, \sym
.reloc	1b, R_ARM_ALU_PC_G1_NC, \sym
.reloc	2b, R_ARM_LDR_PC_G2, \sym

... should produce precisely the same result at the .o stage.


This #:reloc: stuff is mostly a shorthand/convenience feature as I
understand it -- however, you do have to know how to force the correct
instruction encoding to be emitted to match the reloc type -- with
#:reloc:, the assembler should take care of that.

The reloc/insn match issue also means that the Thumb2 relocs are
totally different (R_ARM_THM_<blah>).  


For OABI, it would no doubt be different again.  I know nothing about
OABI relocs.

And we're still limited to <32-bit offset ranges.

Cheers
---Dave

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH 1/8] ARM: assembler: introduce adr_l, ldr_l and str_l macros
  2016-08-04 11:08           ` Dave Martin
@ 2016-08-04 11:10             ` Ard Biesheuvel
  2016-08-04 11:30               ` Dave Martin
  0 siblings, 1 reply; 39+ messages in thread
From: Ard Biesheuvel @ 2016-08-04 11:10 UTC (permalink / raw)
  To: linux-arm-kernel

On 4 August 2016 at 13:08, Dave Martin <Dave.Martin@arm.com> wrote:
> On Thu, Aug 04, 2016 at 12:12:07PM +0200, Ard Biesheuvel wrote:
>> On 4 August 2016 at 11:38, Dave Martin <Dave.Martin@arm.com> wrote:
>> > On Thu, Aug 04, 2016 at 09:40:50AM +0200, Ard Biesheuvel wrote:
>> >> On 3 August 2016 at 20:09, Russell King - ARM Linux
>> >> <linux@armlinux.org.uk> wrote:
>> >> > On Wed, Aug 03, 2016 at 05:38:43PM +0200, Ard Biesheuvel wrote:
>> >> >> +     add     \dst, pc, #:pc_g0_nc:(\sym) - 8
>> >> >> +     add     \dst, \dst, #:pc_g1_nc:(\sym) - 4
>> >> >> +     add     \dst, \dst, #:pc_g2:(\sym)
>> >> >
>> >> > What's this :pc_g0_nc: stuff?  What binutils versions is it supported
>> >> > in?  It doesn't appear documented in gas 2.23.91, so I don't think we
>> >> > can use it.
>> >> >
>> >>
>> >> binutils 2.18 and up
>> >
>> > I think this was contemporary with GCC 4.<some middling version>, which
>> > may be newer than the minimimum compiler we require for the kernel,
>> > particular for earlier arch versions.
>> >
>> > Using .reloc probably allows the same thing to be done in a more
>> > backwards-compatible way.
>> >
>>
>> I don't see how LD would know how to handle these relocations if we do
>> manage to emit them from GAS in this way.
>
> 0:      add     \dst, pc, #-8
> 1:      add     \dst, \dst, #-4
> 2:      add     \dst, \dst, #0
>
> .reloc  0b, R_ARM_ALU_PC_G0_NC, \sym
> .reloc  1b, R_ARM_ALU_PC_G1_NC, \sym
> .reloc  2b, R_ARM_ALU_PC_G2, \sym
>
> or, for ldr_l:
>
> 0:      add     \dst, pc, #-8
> 1:      add     \dst, \dst, #-4
> 2:      ldr     [\dst, #0]
>
> .reloc  0b, R_ARM_ALU_PC_G0_NC, \sym
> .reloc  1b, R_ARM_ALU_PC_G1_NC, \sym
> .reloc  2b, R_ARM_LDR_PC_G2, \sym
>
> ... should produce precisely the same result at the .o stage.
>

Yes, but how is LD going to perform the arithmetic involved in
handling these relocations? That's is the more interesting part, and
that is not implemented either in binutils < 2.18

>
> This #:reloc: stuff is mostly a shorthand/convenience feature as I
> understand it -- however, you do have to know how to force the correct
> instruction encoding to be emitted to match the reloc type -- with
> #:reloc:, the assembler should take care of that.
>
> The reloc/insn match issue also means that the Thumb2 relocs are
> totally different (R_ARM_THM_<blah>).
>
>
> For OABI, it would no doubt be different again.  I know nothing about
> OABI relocs.
>
> And we're still limited to <32-bit offset ranges.
>

Indeed. As I said, a dead end ...

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH 1/8] ARM: assembler: introduce adr_l, ldr_l and str_l macros
  2016-08-04 10:34         ` Ard Biesheuvel
@ 2016-08-04 11:22           ` Dave Martin
  2016-08-04 12:16             ` Russell King - ARM Linux
  0 siblings, 1 reply; 39+ messages in thread
From: Dave Martin @ 2016-08-04 11:22 UTC (permalink / raw)
  To: linux-arm-kernel

On Thu, Aug 04, 2016 at 12:34:58PM +0200, Ard Biesheuvel wrote:
> On 4 August 2016 at 11:44, Dave Martin <Dave.Martin@arm.com> wrote:
> > On Thu, Aug 04, 2016 at 09:40:31AM +0200, Ard Biesheuvel wrote:
> >> On 3 August 2016 at 18:49, Dave Martin <Dave.Martin@arm.com> wrote:
> >> > On Wed, Aug 03, 2016 at 05:38:43PM +0200, Ard Biesheuvel wrote:
> [...]
> >> >> +#else
> >> >> +     add     \dst, pc, #:pc_g0_nc:(\sym) - 8
> >> >> +     add     \dst, \dst, #:pc_g1_nc:(\sym) - 4
> >> >> +     add     \dst, \dst, #:pc_g2:(\sym)
> >> >
> >> > Whoah.  I've never seen this syntax before...  does this work for any
> >> > named reloc, or just for certain blessed relocs?  (I'm also _assuming_
> >> > the assembler support for this functionality is ancient -- if not,
> >> > there may be toolchain compatibility issues.)
> >> >
> >> > Based on my understanding of how these relocs work, this should do the
> >> > right thing, though.
> >> >
> >> >
> >> > Second question: for arm, this reduces the range addressable to
> >> > pc +/- 26-bit offset (24-bit if sym is not word aligned, but that
> >> > probably never happens).
> >> >
> >> > I can't remember the de facto limit on the size of vmlinux for arm --
> >> > are you sure this extra limitation won't break some cases of huge
> >> > initramfs where adr_l gets used for cross-section references?
> >> >
> >>
> >> Yes, that seems a valid concern. We have +/- 64 MB for the adr_l
> >> variant (as you say, for word aligned symbols), but this may be
> >> insufficient. The ldr/str variants don't have the same limitation.
> >
> > True, but they're still limited, I think in effect to +/- 256 MB.
> >
> 
> Yes. I managed to hack around the limitation by doing this
> 
> .macro adr_fwd, dst, sym, tmp
> add \dst, pc, #:pc_g0_nc:(\sym) - 8
> add \dst, \dst, #:pc_g1_nc:(\sym) - 4
> .reloc ., R_ARM_LDR_PC_G2, \sym
> mov \tmp, #0
> add \dst, \dst, \tmp
> .endm
> 
> but it is becoming very hacky, and this only works for forward
> references anyway.

Indeed.  You can't use relocs on different insns than they're intended
for.

The linker is allowed to assume that the initial insn is a valid one
for the reloc -- without this, the addend couldn't be encoded reliably.
In this case, the linker might replace the MOV with an LDR, a MOV with
the wrong offset, or garbage.

> So I think this is a dead end. Series withdrawn.

Neat idea, but it's a shame about the limitations and compatibility
issues.

The more conventional literal-.long approach could still be macro-ised
along the same lines, which might make the affected code more readable,
but the idiom you'd be replacing is well-understood and not very common.

Cheers
---Dave

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH 1/8] ARM: assembler: introduce adr_l, ldr_l and str_l macros
  2016-08-04 11:10             ` Ard Biesheuvel
@ 2016-08-04 11:30               ` Dave Martin
  2016-08-04 11:34                 ` Ard Biesheuvel
  0 siblings, 1 reply; 39+ messages in thread
From: Dave Martin @ 2016-08-04 11:30 UTC (permalink / raw)
  To: linux-arm-kernel

On Thu, Aug 04, 2016 at 01:10:55PM +0200, Ard Biesheuvel wrote:
> On 4 August 2016 at 13:08, Dave Martin <Dave.Martin@arm.com> wrote:

[...]

> > or, for ldr_l:
> >
> > 0:      add     \dst, pc, #-8
> > 1:      add     \dst, \dst, #-4
> > 2:      ldr     [\dst, #0]
> >
> > .reloc  0b, R_ARM_ALU_PC_G0_NC, \sym
> > .reloc  1b, R_ARM_ALU_PC_G1_NC, \sym
> > .reloc  2b, R_ARM_LDR_PC_G2, \sym
> >
> > ... should produce precisely the same result at the .o stage.
> >
> 
> Yes, but how is LD going to perform the arithmetic involved in

What arithmetic?

> handling these relocations? That's is the more interesting part, and
> that is not implemented either in binutils < 2.18

You mean .reloc is not implemented < 2.18?

[...]

Cheers
---Dave

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH 1/8] ARM: assembler: introduce adr_l, ldr_l and str_l macros
  2016-08-04 11:30               ` Dave Martin
@ 2016-08-04 11:34                 ` Ard Biesheuvel
  2016-08-04 13:31                   ` Dave Martin
  0 siblings, 1 reply; 39+ messages in thread
From: Ard Biesheuvel @ 2016-08-04 11:34 UTC (permalink / raw)
  To: linux-arm-kernel

On 4 August 2016 at 13:30, Dave Martin <Dave.Martin@arm.com> wrote:
> On Thu, Aug 04, 2016 at 01:10:55PM +0200, Ard Biesheuvel wrote:
>> On 4 August 2016 at 13:08, Dave Martin <Dave.Martin@arm.com> wrote:
>
> [...]
>
>> > or, for ldr_l:
>> >
>> > 0:      add     \dst, pc, #-8
>> > 1:      add     \dst, \dst, #-4
>> > 2:      ldr     [\dst, #0]
>> >
>> > .reloc  0b, R_ARM_ALU_PC_G0_NC, \sym
>> > .reloc  1b, R_ARM_ALU_PC_G1_NC, \sym
>> > .reloc  2b, R_ARM_LDR_PC_G2, \sym
>> >
>> > ... should produce precisely the same result at the .o stage.
>> >
>>
>> Yes, but how is LD going to perform the arithmetic involved in
>
> What arithmetic?
>

The arithmetic involved in populating the immediate fields of these
instructions based on the actual offset between the Place and the
Symbol in the final image.

>> handling these relocations? That's is the more interesting part, and
>> that is not implemented either in binutils < 2.18
>
> You mean .reloc is not implemented < 2.18?
>

Yes, .reloc is implemented, but that is not sufficient.

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH 1/8] ARM: assembler: introduce adr_l, ldr_l and str_l macros
  2016-08-04 11:22           ` Dave Martin
@ 2016-08-04 12:16             ` Russell King - ARM Linux
  2016-08-04 12:55               ` Dave Martin
  2016-08-04 13:58               ` Ard Biesheuvel
  0 siblings, 2 replies; 39+ messages in thread
From: Russell King - ARM Linux @ 2016-08-04 12:16 UTC (permalink / raw)
  To: linux-arm-kernel

On Thu, Aug 04, 2016 at 12:22:29PM +0100, Dave Martin wrote:
> The more conventional literal-.long approach could still be macro-ised
> along the same lines, which might make the affected code more readable,
> but the idiom you'd be replacing is well-understood and not very common.

I don't see how it could be.  You can't efficiently place the literal
data alongside the instructions dealing with it.

The only alternative is to use ldr rd, =foo, but that gets very stupid
when you want to calculate the relative offset, and you end up with
something like this for every relative load:

	ldr rd, =.
	sub rd, rd, #. - 4
	ldr r1, =foo
	add r1, r1, rd

As I've already said, I prefer the existing solution.  It works, it's
been known to work for the last 22 years.

If it isn't broken, don't try to fix it.

-- 
RMK's Patch system: http://www.armlinux.org.uk/developer/patches/
FTTC broadband for 0.8mile line: currently at 9.6Mbps down 400kbps up
according to speedtest.net.

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH 1/8] ARM: assembler: introduce adr_l, ldr_l and str_l macros
  2016-08-04 12:16             ` Russell King - ARM Linux
@ 2016-08-04 12:55               ` Dave Martin
  2016-08-04 13:58               ` Ard Biesheuvel
  1 sibling, 0 replies; 39+ messages in thread
From: Dave Martin @ 2016-08-04 12:55 UTC (permalink / raw)
  To: linux-arm-kernel

On Thu, Aug 04, 2016 at 01:16:19PM +0100, Russell King - ARM Linux wrote:
> On Thu, Aug 04, 2016 at 12:22:29PM +0100, Dave Martin wrote:
> > The more conventional literal-.long approach could still be macro-ised
> > along the same lines, which might make the affected code more readable,
> > but the idiom you'd be replacing is well-understood and not very common.
> 
> I don't see how it could be.  You can't efficiently place the literal
> data alongside the instructions dealing with it.
> 
> The only alternative is to use ldr rd, =foo, but that gets very stupid
> when you want to calculate the relative offset, and you end up with
> something like this for every relative load:
> 
> 	ldr rd, =.
> 	sub rd, rd, #. - 4
> 	ldr r1, =foo
> 	add r1, r1, rd

In theory, it ought to be possible to combine the two approaches:

	ldr	rd, =foo - (1f + 8)	@ or =foo - (. + 12)
1:	add	rd, pc, rd

But gas doesn't like the argument of ldr= to require a reloc (though it
would work perfectly well).


A similar effect to automatic literal pool placement can be achieved by
means of subsections:

	.subsection 1
	.align 2
0:	.long	foo - (1f + 8)
	.previous

	ldr	rd, 0b
1:	add	r0, pc, r0

... though this also has some shortcomings (including that .ltorg
won't flush these literals, and that subsections are a rarely used
assembler feature and will likely confuse people).

> As I've already said, I prefer the existing solution.  It works, it's
> been known to work for the last 22 years.
> 
> If it isn't broken, don't try to fix it.

Indeed... the case for the change isn't very compelling -- just nice
to have if it could be done without any unfortunate side-effects
(which looks unlikely right now).

Cheers
---Dave

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH 1/8] ARM: assembler: introduce adr_l, ldr_l and str_l macros
  2016-08-04 11:34                 ` Ard Biesheuvel
@ 2016-08-04 13:31                   ` Dave Martin
  2016-08-04 13:46                     ` Ard Biesheuvel
  0 siblings, 1 reply; 39+ messages in thread
From: Dave Martin @ 2016-08-04 13:31 UTC (permalink / raw)
  To: linux-arm-kernel

On Thu, Aug 04, 2016 at 01:34:03PM +0200, Ard Biesheuvel wrote:
> On 4 August 2016 at 13:30, Dave Martin <Dave.Martin@arm.com> wrote:
> > On Thu, Aug 04, 2016 at 01:10:55PM +0200, Ard Biesheuvel wrote:
> >> On 4 August 2016 at 13:08, Dave Martin <Dave.Martin@arm.com> wrote:
> >
> > [...]
> >
> >> > or, for ldr_l:
> >> >
> >> > 0:      add     \dst, pc, #-8
> >> > 1:      add     \dst, \dst, #-4
> >> > 2:      ldr     [\dst, #0]
> >> >
> >> > .reloc  0b, R_ARM_ALU_PC_G0_NC, \sym
> >> > .reloc  1b, R_ARM_ALU_PC_G1_NC, \sym
> >> > .reloc  2b, R_ARM_LDR_PC_G2, \sym
> >> >
> >> > ... should produce precisely the same result at the .o stage.
> >> >
> >>
> >> Yes, but how is LD going to perform the arithmetic involved in

[...]

> >> handling these relocations? That's is the more interesting part, and
> >> that is not implemented either in binutils < 2.18
> >
> > What arithmetic?
> >
> 
> The arithmetic involved in populating the immediate fields of these
> instructions based on the actual offset between the Place and the
> Symbol in the final image.

<digression>

Just for interest...


For the linker this is just ordinary relocation processing -- there's
nothing unusual going on, except that neither GCC nor gas usually
emit these particular insn relocs automatically.

I think the ARM RVCT compiler could generate them for producing
ROM-able position independent code in some confgurations.  I suspect
they were supported by ld from the start though, or at least pretty
early on.


When you write

	add	\dst, pc, #:pc_g0_nc:\sym - (. + 8)

the arithmetic is somewhat bogus -- the assembler does not (and can't)
do it, because neither the value of \sym, nor of ., is known.  Only the
invariant bit (the - 8) can be processed at assembly time.  The
irreducible part (\sym - .) has to be emitted as a reloc.

Thus, the assembler really does emit 

.reloc  ., R_ARM_ALU_PC_G0_NC, \sym
	add     \dst, pc, #-8

(The "- ." is effectively part of the definition of R_ARM_ALU_PC_G0_NC
here).


For comparison:

$ as <<EOF -o a.o
	.reloc ., R_ARM_ALU_PC_G0_NC, foo
	add	r0, pc, #-8
	.reloc ., R_ARM_ALU_PC_G1_NC, foo
	add	r0, r0, #-4
	.reloc ., R_ARM_ALU_PC_G2, foo
	add	r0, r0, #0

	add	r0, pc, #:pc_g0_nc:foo - . - 8
	add	r0, r0, #:pc_g1_nc:foo - . - 4
	add	r0, r0, #:pc_g2:foo - .
EOF

$ objdump -dr a.o
00000000 <.text>:
   0:	e24f0008 	sub	r0, pc, #8
			0: R_ARM_ALU_PC_G0_NC	foo
   4:	e2400004 	sub	r0, r0, #4
			4: R_ARM_ALU_PC_G1_NC	foo
   8:	e2800000 	add	r0, r0, #0
			8: R_ARM_ALU_PC_G2	foo
   c:	e24f0008 	sub	r0, pc, #8
			c: R_ARM_ALU_PC_G0_NC	foo
  10:	e2400004 	sub	r0, r0, #4
			10: R_ARM_ALU_PC_G1_NC	foo
  14:	e2800000 	add	r0, r0, #0
			14: R_ARM_ALU_PC_G2	foo

$ ld --defsym foo=0x4000000 -o a a.o
$ objdump -dr a
00008054 <__bss_end__-0x8018>:
    8054:       e28f07ff        add     r0, pc, #66846720       ; 0x3fc0000
    8058:       e2800bdf        add     r0, r0, #228352 ; 0x37c00
    805c:       e2800fe9        add     r0, r0, #932    ; 0x3a4
    8060:       e28f07ff        add     r0, pc, #66846720       ; 0x3fc0000
    8064:       e2800bdf        add     r0, r0, #228352 ; 0x37c00
    8068:       e2800fe6        add     r0, r0, #920    ; 0x398


> Yes, .reloc is implemented, but that is not sufficient.

</digression>

Cheers
---Dave

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH 1/8] ARM: assembler: introduce adr_l, ldr_l and str_l macros
  2016-08-04 13:31                   ` Dave Martin
@ 2016-08-04 13:46                     ` Ard Biesheuvel
  2016-08-04 15:38                       ` Dave Martin
  0 siblings, 1 reply; 39+ messages in thread
From: Ard Biesheuvel @ 2016-08-04 13:46 UTC (permalink / raw)
  To: linux-arm-kernel

On 4 August 2016 at 15:31, Dave Martin <Dave.Martin@arm.com> wrote:
> On Thu, Aug 04, 2016 at 01:34:03PM +0200, Ard Biesheuvel wrote:
>> On 4 August 2016 at 13:30, Dave Martin <Dave.Martin@arm.com> wrote:
>> > On Thu, Aug 04, 2016 at 01:10:55PM +0200, Ard Biesheuvel wrote:
>> >> On 4 August 2016 at 13:08, Dave Martin <Dave.Martin@arm.com> wrote:
>> >
>> > [...]
>> >
>> >> > or, for ldr_l:
>> >> >
>> >> > 0:      add     \dst, pc, #-8
>> >> > 1:      add     \dst, \dst, #-4
>> >> > 2:      ldr     [\dst, #0]
>> >> >
>> >> > .reloc  0b, R_ARM_ALU_PC_G0_NC, \sym
>> >> > .reloc  1b, R_ARM_ALU_PC_G1_NC, \sym
>> >> > .reloc  2b, R_ARM_LDR_PC_G2, \sym
>> >> >
>> >> > ... should produce precisely the same result at the .o stage.
>> >> >
>> >>
>> >> Yes, but how is LD going to perform the arithmetic involved in
>
> [...]
>
>> >> handling these relocations? That's is the more interesting part, and
>> >> that is not implemented either in binutils < 2.18
>> >
>> > What arithmetic?
>> >
>>
>> The arithmetic involved in populating the immediate fields of these
>> instructions based on the actual offset between the Place and the
>> Symbol in the final image.
>
> <digression>
>
> Just for interest...
>
>
> For the linker this is just ordinary relocation processing -- there's
> nothing unusual going on, except that neither GCC nor gas usually
> emit these particular insn relocs automatically.
>

There is no such thing as 'ordinary' relocation processing. Each
relocation type requires its own specific handling, and pre-2.18 LD
simply does not come equipped with the routines to perform the
calculations that the ARM/ELF spec defines for these particular
relocation types. Whether GAS or any other assembler can produce them
is irrelevant, my claim is that pre-2.18 LD does not know how to
/consume/ them.


> I think the ARM RVCT compiler could generate them for producing
> ROM-able position independent code in some confgurations.  I suspect
> they were supported by ld from the start though, or at least pretty
> early on.
>
>
> When you write
>
>         add     \dst, pc, #:pc_g0_nc:\sym - (. + 8)
>
> the arithmetic is somewhat bogus -- the assembler does not (and can't)
> do it, because neither the value of \sym, nor of ., is known.  Only the
> invariant bit (the - 8) can be processed at assembly time.  The
> irreducible part (\sym - .) has to be emitted as a reloc.
>
> Thus, the assembler really does emit
>
> .reloc  ., R_ARM_ALU_PC_G0_NC, \sym
>         add     \dst, pc, #-8
>
> (The "- ." is effectively part of the definition of R_ARM_ALU_PC_G0_NC
> here).
>
>
> For comparison:
>
> $ as <<EOF -o a.o
>         .reloc ., R_ARM_ALU_PC_G0_NC, foo
>         add     r0, pc, #-8
>         .reloc ., R_ARM_ALU_PC_G1_NC, foo
>         add     r0, r0, #-4
>         .reloc ., R_ARM_ALU_PC_G2, foo
>         add     r0, r0, #0
>
>         add     r0, pc, #:pc_g0_nc:foo - . - 8
>         add     r0, r0, #:pc_g1_nc:foo - . - 4
>         add     r0, r0, #:pc_g2:foo - .
> EOF
>
> $ objdump -dr a.o
> 00000000 <.text>:
>    0:   e24f0008        sub     r0, pc, #8
>                         0: R_ARM_ALU_PC_G0_NC   foo
>    4:   e2400004        sub     r0, r0, #4
>                         4: R_ARM_ALU_PC_G1_NC   foo
>    8:   e2800000        add     r0, r0, #0
>                         8: R_ARM_ALU_PC_G2      foo
>    c:   e24f0008        sub     r0, pc, #8
>                         c: R_ARM_ALU_PC_G0_NC   foo
>   10:   e2400004        sub     r0, r0, #4
>                         10: R_ARM_ALU_PC_G1_NC  foo
>   14:   e2800000        add     r0, r0, #0
>                         14: R_ARM_ALU_PC_G2     foo
>
> $ ld --defsym foo=0x4000000 -o a a.o
> $ objdump -dr a
> 00008054 <__bss_end__-0x8018>:
>     8054:       e28f07ff        add     r0, pc, #66846720       ; 0x3fc0000
>     8058:       e2800bdf        add     r0, r0, #228352 ; 0x37c00
>     805c:       e2800fe9        add     r0, r0, #932    ; 0x3a4
>     8060:       e28f07ff        add     r0, pc, #66846720       ; 0x3fc0000
>     8064:       e2800bdf        add     r0, r0, #228352 ; 0x37c00
>     8068:       e2800fe6        add     r0, r0, #920    ; 0x398
>
>
>> Yes, .reloc is implemented, but that is not sufficient.
>
> </digression>
>
> Cheers
> ---Dave

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH 1/8] ARM: assembler: introduce adr_l, ldr_l and str_l macros
  2016-08-04 12:16             ` Russell King - ARM Linux
  2016-08-04 12:55               ` Dave Martin
@ 2016-08-04 13:58               ` Ard Biesheuvel
  1 sibling, 0 replies; 39+ messages in thread
From: Ard Biesheuvel @ 2016-08-04 13:58 UTC (permalink / raw)
  To: linux-arm-kernel

On 4 August 2016 at 14:16, Russell King - ARM Linux
<linux@armlinux.org.uk> wrote:
> On Thu, Aug 04, 2016 at 12:22:29PM +0100, Dave Martin wrote:
>> The more conventional literal-.long approach could still be macro-ised
>> along the same lines, which might make the affected code more readable,
>> but the idiom you'd be replacing is well-understood and not very common.
>
> I don't see how it could be.  You can't efficiently place the literal
> data alongside the instructions dealing with it.
>
> The only alternative is to use ldr rd, =foo, but that gets very stupid
> when you want to calculate the relative offset, and you end up with
> something like this for every relative load:
>
>         ldr rd, =.
>         sub rd, rd, #. - 4
>         ldr r1, =foo
>         add r1, r1, rd
>

This does the trick

#ifdef CONFIG_THUMB2_KERNEL
#define PC_BIAS 4
#else
#define PC_BIAS 8
#endif

/*
* @dst: destination register
* @sym: name of the symbol
*/
.macro adr_l, dst, sym, tmp:req
#if __LINUX_ARM_ARCH__ >= 7
movw \dst, #:lower16:(\sym) - (. + 8 + PC_BIAS)
movt \dst, #:upper16:(\sym) - (. + 4 + PC_BIAS)
add \dst, \dst, pc
#else
ldr \tmp, =\sym
ldr \dst, =. + 12
sub \dst, \dst, pc
sub \dst, \tmp, \dst
#endif
.endm

but it is suboptimal for v6 and earlier, and now the macro requires a
temp register.

> As I've already said, I prefer the existing solution.  It works, it's
> been known to work for the last 22 years.
>
> If it isn't broken, don't try to fix it.
>

It seemed like low hanging fruit, but obviously not ...

Thanks for your time,
Ard.

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH 0/8] ARM: clean up PC-relative arithmetic
  2016-08-03 18:27 ` Nicolas Pitre
@ 2016-08-04 14:11   ` Ard Biesheuvel
  0 siblings, 0 replies; 39+ messages in thread
From: Ard Biesheuvel @ 2016-08-04 14:11 UTC (permalink / raw)
  To: linux-arm-kernel

On 3 August 2016 at 20:27, Nicolas Pitre <nicolas.pitre@linaro.org> wrote:
> On Wed, 3 Aug 2016, Ard Biesheuvel wrote:
>
>> There are various places in the ARM kernel where the following pattern
>> is used to create a PC-relative reference that is valid even before the
>> MMU is on:
>>
>>      adr    rX, 1f
>>      ldr    rY, [rX]
>>      add    rX, rX, rY
>>      ...
>>   1: .long  <symbol> - .
>>
>> or
>>      adr    rX, 1f
>>      ldmia  rX, {rY .. rY+n}
>>      sub    rX, rX, rY
>>      add    rY+1, rY+1, rX
>>      add    rY+2, rY+2, rX
>>      ...
>>   1: .long  .
>>      .long  <symbolY>
>>      .long  <symbolY+1>
>>      ...
>>
>> Both cases can be greatly simplified by letting the linker do the
>> calculations for us. This series implements adr_l, ldr_l and str_l
>> macros, and uses them to simplify a couple of instances of the above
>> patterns.
>
> I echo Dave's concern about the availability of the :pc_g*_nc: relocs.
>
> Could this be used to solve the XIP translation problem in some
> universal way? Right now only a few cases are covered. For reference, see:
>
> http://lists.infradead.org/pipermail/linux-arm-kernel/2015-November/386273.html
>
> commit d7811455 ("ARM: 8512/1: proc-v7.S: Adjust stack address when XIP_KERNEL")
>
> commit 8ff97fa3 ("ARM: make the physical-relative calculation more obvious")
>

I don't see how we could use this to improve that, tbh.

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH 1/8] ARM: assembler: introduce adr_l, ldr_l and str_l macros
  2016-08-04 13:46                     ` Ard Biesheuvel
@ 2016-08-04 15:38                       ` Dave Martin
  0 siblings, 0 replies; 39+ messages in thread
From: Dave Martin @ 2016-08-04 15:38 UTC (permalink / raw)
  To: linux-arm-kernel

On Thu, Aug 04, 2016 at 03:46:53PM +0200, Ard Biesheuvel wrote:
> On 4 August 2016 at 15:31, Dave Martin <Dave.Martin@arm.com> wrote:
> > On Thu, Aug 04, 2016 at 01:34:03PM +0200, Ard Biesheuvel wrote:
> >> On 4 August 2016 at 13:30, Dave Martin <Dave.Martin@arm.com> wrote:
> >> > On Thu, Aug 04, 2016 at 01:10:55PM +0200, Ard Biesheuvel wrote:

[...]

> >> >> Yes, but how is LD going to perform the arithmetic involved in
> >
> > [...]
> >
> >> >> handling these relocations? That's is the more interesting part, and
> >> >> that is not implemented either in binutils < 2.18
> >> >
> >> > What arithmetic?
> >> >
> >>
> >> The arithmetic involved in populating the immediate fields of these
> >> instructions based on the actual offset between the Place and the
> >> Symbol in the final image.
> >
> > <digression>
> >
> > Just for interest...
> >
> >
> > For the linker this is just ordinary relocation processing -- there's
> > nothing unusual going on, except that neither GCC nor gas usually
> > emit these particular insn relocs automatically.
> >
> 
> There is no such thing as 'ordinary' relocation processing. Each

"Ordinary" in the sense that the linker should cope with any standard
reloc that it might receive, but...

> relocation type requires its own specific handling, and pre-2.18 LD
> simply does not come equipped with the routines to perform the
> calculations that the ARM/ELF spec defines for these particular
> relocation types. Whether GAS or any other assembler can produce them
> is irrelevant, my claim is that pre-2.18 LD does not know how to
> /consume/ them.

you seem to be right about this.

There are some obsolete R_ARM_ALU_PCREL_* relocs which presumably are
the OABI equivalents.

However, ld does no overflow checking for them, silently fails to
handle signed offsets, and also splat the target symbol address over
the affected instruction instead of writing the carefully modified
instruction there.  This has apparently been the behaviour ever since
those relocs were implemented in 2004 (2.16).

So I'm guessing nobody ever really used these :/

The .reloc pseudo-op didn't exist before 2.18 either.

Which just about wraps this one up.


Serves me right for attempting to guess the history ;)

Cheers
---Dave

^ permalink raw reply	[flat|nested] 39+ messages in thread

end of thread, other threads:[~2016-08-04 15:38 UTC | newest]

Thread overview: 39+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-08-03 15:38 [PATCH 0/8] ARM: clean up PC-relative arithmetic Ard Biesheuvel
2016-08-03 15:38 ` [PATCH 1/8] ARM: assembler: introduce adr_l, ldr_l and str_l macros Ard Biesheuvel
2016-08-03 16:49   ` Dave Martin
2016-08-04  7:40     ` Ard Biesheuvel
2016-08-04  9:44       ` Dave Martin
2016-08-04 10:34         ` Ard Biesheuvel
2016-08-04 11:22           ` Dave Martin
2016-08-04 12:16             ` Russell King - ARM Linux
2016-08-04 12:55               ` Dave Martin
2016-08-04 13:58               ` Ard Biesheuvel
2016-08-04  7:43     ` Ard Biesheuvel
2016-08-03 18:09   ` Russell King - ARM Linux
2016-08-04  7:40     ` Ard Biesheuvel
2016-08-04  9:38       ` Dave Martin
2016-08-04 10:12         ` Ard Biesheuvel
2016-08-04 11:08           ` Dave Martin
2016-08-04 11:10             ` Ard Biesheuvel
2016-08-04 11:30               ` Dave Martin
2016-08-04 11:34                 ` Ard Biesheuvel
2016-08-04 13:31                   ` Dave Martin
2016-08-04 13:46                     ` Ard Biesheuvel
2016-08-04 15:38                       ` Dave Martin
2016-08-03 15:38 ` [PATCH 2/8] ARM: head-common.S: use PC-relative insn sequence for __proc_info Ard Biesheuvel
2016-08-03 15:38 ` [PATCH 3/8] ARM: head-common.S: use PC-relative insn sequence for __turn_mmu_on Ard Biesheuvel
2016-08-03 15:38 ` [PATCH 4/8] ARM: head.S: use PC-relative insn sequence for secondary_data Ard Biesheuvel
2016-08-03 18:06   ` Russell King - ARM Linux
2016-08-03 15:38 ` [PATCH 5/8] ARM: head: use PC-relative insn sequence for __smp_alt Ard Biesheuvel
2016-08-03 15:38 ` [PATCH 6/8] ARM: sleep.S: use PC-relative insn sequence for sleep_save_sp/mpidr_hash Ard Biesheuvel
2016-08-03 15:38 ` [PATCH 7/8] ARM: head.S: use PC-relative insn sequences for __fixup_pv_table Ard Biesheuvel
2016-08-03 15:38 ` [PATCH 8/8] ARM: head.S: use PC relative insn sequence to calculate PHYS_OFFSET Ard Biesheuvel
2016-08-03 18:17 ` [PATCH 0/8] ARM: clean up PC-relative arithmetic Russell King - ARM Linux
2016-08-04  7:17   ` Ard Biesheuvel
2016-08-04  9:49     ` Russell King - ARM Linux
2016-08-04  9:54       ` Ard Biesheuvel
2016-08-04 10:03         ` Russell King - ARM Linux
2016-08-04 10:11           ` Ard Biesheuvel
2016-08-04 10:14             ` Russell King - ARM Linux
2016-08-03 18:27 ` Nicolas Pitre
2016-08-04 14:11   ` Ard Biesheuvel

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.