All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] ARM: zImage: add DSB and ISB barriers after relocating code
@ 2014-07-16 23:54 Marc Carino
  2014-07-17  1:49 ` Nicolas Pitre
  0 siblings, 1 reply; 2+ messages in thread
From: Marc Carino @ 2014-07-16 23:54 UTC (permalink / raw)
  To: linux-arm-kernel

The zImage loader will relocate the image if it determines that
decompression will overwrite its current location. Since the act
of relocation is basically a form of code self-modification, we
need to ensure that the CPU fetches the updated instruction stream.

Since cache maintenance is skipped during the relocation phase (the
MMUs and caches are off), we need to execute both a data sync
and instruction sync barrier prior to jumping to the relocated code.
Skipping the barriers can result in execution of stale prefetched
code, leading to hangs or an UNDEFINED INSTRUCTION exception.

Signed-off-by: Marc Carino <marc.ceeeee@gmail.com>
---
 arch/arm/boot/compressed/head.S | 71 +++++++++++++++++++++++++++++++++++++++--
 1 file changed, 68 insertions(+), 3 deletions(-)

diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S
index 3a8b32d..3888a0d 100644
--- a/arch/arm/boot/compressed/head.S
+++ b/arch/arm/boot/compressed/head.S
@@ -395,8 +395,13 @@ dtb_check_done:
 		add	sp, sp, r6
 #endif
 
+		/*
+		 * Perform full cache maintenance if caches were enabled
+		 * earlier. Otherwise, only invalidate the instruction cache.
+		 */
 		tst	r4, #1
 		bleq	cache_clean_flush
+		blne	i_cache_inval
 
 		adr	r0, BSYM(restart)
 		add	r0, r0, r6
@@ -769,7 +774,7 @@ __common_mmu_cache_on:
 		sub	pc, lr, r0, lsr #32	@ properly flush pipeline
 #endif
 
-#define PROC_ENTRY_SIZE (4*5)
+#define PROC_ENTRY_SIZE (4*6)
 
 /*
  * Here follow the relocatable cache support functions for the
@@ -808,6 +813,7 @@ call_cache_fn:	adr	r12, proc_types
  *   - 'cache on' method instruction
  *   - 'cache off' method instruction
  *   - 'cache flush' method instruction
+ *   - 'instruction cache invalidate' method instruction
  *
  * We match an entry using: ((real_id ^ match) & mask) == 0
  *
@@ -826,6 +832,8 @@ proc_types:
  THUMB(		nop				)
 		mov	pc, lr
  THUMB(		nop				)
+		mov	pc, lr
+ THUMB(		nop				)
 
 		.word	0x41007000		@ ARM7/710
 		.word	0xfff8fe00
@@ -835,6 +843,8 @@ proc_types:
  THUMB(		nop				)
 		mov	pc, lr
  THUMB(		nop				)
+		mov	pc, lr
+ THUMB(		nop				)
 
 		.word	0x41807200		@ ARM720T (writethrough)
 		.word	0xffffff00
@@ -842,24 +852,32 @@ proc_types:
 		W(b)	__armv4_mmu_cache_off
 		mov	pc, lr
  THUMB(		nop				)
+		mov	pc, lr
+ THUMB(		nop				)
 
 		.word	0x41007400		@ ARM74x
 		.word	0xff00ff00
 		W(b)	__armv3_mpu_cache_on
 		W(b)	__armv3_mpu_cache_off
 		W(b)	__armv3_mpu_cache_flush
-		
+		mov	pc, lr
+ THUMB(		nop				)
+
 		.word	0x41009400		@ ARM94x
 		.word	0xff00ff00
 		W(b)	__armv4_mpu_cache_on
 		W(b)	__armv4_mpu_cache_off
 		W(b)	__armv4_mpu_cache_flush
+		mov	pc, lr
+ THUMB(		nop				)
 
 		.word	0x41069260		@ ARM926EJ-S (v5TEJ)
 		.word	0xff0ffff0
 		W(b)	__arm926ejs_mmu_cache_on
 		W(b)	__armv4_mmu_cache_off
 		W(b)	__armv5tej_mmu_cache_flush
+		mov	pc, lr
+ THUMB(		nop				)
 
 		.word	0x00007000		@ ARM7 IDs
 		.word	0x0000f000
@@ -869,6 +887,8 @@ proc_types:
  THUMB(		nop				)
 		mov	pc, lr
  THUMB(		nop				)
+		mov	pc, lr
+ THUMB(		nop				)
 
 		@ Everything from here on will be the new ID system.
 
@@ -877,30 +897,40 @@ proc_types:
 		W(b)	__armv4_mmu_cache_on
 		W(b)	__armv4_mmu_cache_off
 		W(b)	__armv4_mmu_cache_flush
+		mov	pc, lr
+ THUMB(		nop				)
 
 		.word	0x6901b110		@ sa1110
 		.word	0xfffffff0
 		W(b)	__armv4_mmu_cache_on
 		W(b)	__armv4_mmu_cache_off
 		W(b)	__armv4_mmu_cache_flush
+		mov	pc, lr
+ THUMB(		nop				)
 
 		.word	0x56056900
 		.word	0xffffff00		@ PXA9xx
 		W(b)	__armv4_mmu_cache_on
 		W(b)	__armv4_mmu_cache_off
 		W(b)	__armv4_mmu_cache_flush
+		mov	pc, lr
+ THUMB(		nop				)
 
 		.word	0x56158000		@ PXA168
 		.word	0xfffff000
 		W(b)	__armv4_mmu_cache_on
 		W(b)	__armv4_mmu_cache_off
 		W(b)	__armv5tej_mmu_cache_flush
+		mov	pc, lr
+ THUMB(		nop				)
 
 		.word	0x56050000		@ Feroceon
 		.word	0xff0f0000
 		W(b)	__armv4_mmu_cache_on
 		W(b)	__armv4_mmu_cache_off
 		W(b)	__armv5tej_mmu_cache_flush
+		mov	pc, lr
+ THUMB(		nop				)
 
 #ifdef CONFIG_CPU_FEROCEON_OLD_ID
 		/* this conflicts with the standard ARMv5TE entry */
@@ -909,6 +939,8 @@ proc_types:
 		b	__armv4_mmu_cache_on
 		b	__armv4_mmu_cache_off
 		b	__armv5tej_mmu_cache_flush
+		mov	pc, lr
+ THUMB(		nop				)
 #endif
 
 		.word	0x66015261		@ FA526
@@ -916,6 +948,8 @@ proc_types:
 		W(b)	__fa526_cache_on
 		W(b)	__armv4_mmu_cache_off
 		W(b)	__fa526_cache_flush
+		mov	pc, lr
+ THUMB(		nop				)
 
 		@ These match on the architecture ID
 
@@ -924,30 +958,39 @@ proc_types:
 		W(b)	__armv4_mmu_cache_on
 		W(b)	__armv4_mmu_cache_off
 		W(b)	__armv4_mmu_cache_flush
+		mov	pc, lr
+ THUMB(		nop				)
 
 		.word	0x00050000		@ ARMv5TE
 		.word	0x000f0000
 		W(b)	__armv4_mmu_cache_on
 		W(b)	__armv4_mmu_cache_off
 		W(b)	__armv4_mmu_cache_flush
+		mov	pc, lr
+ THUMB(		nop				)
 
 		.word	0x00060000		@ ARMv5TEJ
 		.word	0x000f0000
 		W(b)	__armv4_mmu_cache_on
 		W(b)	__armv4_mmu_cache_off
 		W(b)	__armv5tej_mmu_cache_flush
+		mov	pc, lr
+ THUMB(		nop				)
 
 		.word	0x0007b000		@ ARMv6
 		.word	0x000ff000
 		W(b)	__armv6_mmu_cache_on
 		W(b)	__armv4_mmu_cache_off
 		W(b)	__armv6_mmu_cache_flush
+		mov	pc, lr
+ THUMB(		nop				)
 
 		.word	0x000f0000		@ new CPU Id
 		.word	0x000f0000
 		W(b)	__armv7_mmu_cache_on
 		W(b)	__armv7_mmu_cache_off
 		W(b)	__armv7_mmu_cache_flush
+		W(b)	__armv7_i_cache_inval
 
 		.word	0			@ unrecognised type
 		.word	0
@@ -957,6 +1000,8 @@ proc_types:
  THUMB(		nop				)
 		mov	pc, lr
  THUMB(		nop				)
+		mov	pc, lr
+ THUMB(		nop				)
 
 		.size	proc_types, . - proc_types
 
@@ -1060,7 +1105,7 @@ __armv4_mpu_cache_flush:
 		mcrne	p15, 0, ip, c7, c5, 0	@ invalidate I cache
 		mcr	p15, 0, ip, c7, c10, 4	@ drain WB
 		mov	pc, lr
-		
+
 __fa526_cache_flush:
 		mov	r1, #0
 		mcr	p15, 0, r1, c7, c14, 0	@ clean and invalidate D cache
@@ -1182,6 +1227,26 @@ __armv3_mpu_cache_flush:
 		mov	pc, lr
 
 /*
+ * Invalidate the instruction cache. To be used when the instruction stream
+ * is modified while the MMU and data caches are OFF.
+ * This routine can be invoked with the instruction cache ON or OFF.
+ *
+ * On exit,
+ *  r0 corrupted
+ */
+		.align	5
+i_cache_inval:	mov	r3, #20			@ cache_off function
+		b	call_cache_fn
+
+__armv7_i_cache_inval:
+		mov	r0, #0
+		mcr	p15, 0, r0, c7, c5, 0	@ inst. cache inval. all to PoU
+		mcr	p15, 0, r0, c7, c5, 5	@ branch pred. invalidate all
+		mcr	p15, 0, r0, c7, c10, 4	@ DSB
+		mcr	p15, 0, r0, c7, c5, 4	@ ISB
+		mov	pc, lr
+
+/*
  * Various debugging routines for printing hex characters and
  * memory, which again must be relocatable.
  */
-- 
1.8.1.3

^ permalink raw reply related	[flat|nested] 2+ messages in thread

* [PATCH v2] ARM: zImage: add DSB and ISB barriers after relocating code
  2014-07-16 23:54 [PATCH v2] ARM: zImage: add DSB and ISB barriers after relocating code Marc Carino
@ 2014-07-17  1:49 ` Nicolas Pitre
  0 siblings, 0 replies; 2+ messages in thread
From: Nicolas Pitre @ 2014-07-17  1:49 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, 16 Jul 2014, Marc Carino wrote:

> The zImage loader will relocate the image if it determines that
> decompression will overwrite its current location. Since the act
> of relocation is basically a form of code self-modification, we
> need to ensure that the CPU fetches the updated instruction stream.
> 
> Since cache maintenance is skipped during the relocation phase (the
> MMUs and caches are off), we need to execute both a data sync
> and instruction sync barrier prior to jumping to the relocated code.
> Skipping the barriers can result in execution of stale prefetched
> code, leading to hangs or an UNDEFINED INSTRUCTION exception.
> 
> Signed-off-by: Marc Carino <marc.ceeeee@gmail.com>

Comments below.

> ---
>  arch/arm/boot/compressed/head.S | 71 +++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 68 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S
> index 3a8b32d..3888a0d 100644
> --- a/arch/arm/boot/compressed/head.S
> +++ b/arch/arm/boot/compressed/head.S
> @@ -395,8 +395,13 @@ dtb_check_done:
>  		add	sp, sp, r6
>  #endif
>  
> +		/*
> +		 * Perform full cache maintenance if caches were enabled
> +		 * earlier. Otherwise, only invalidate the instruction cache.
> +		 */
>  		tst	r4, #1
>  		bleq	cache_clean_flush
> +		blne	i_cache_inval

This is wrong.  Suppose the Z (equal to zero) flag is set and 
cache_clean_flush is called.  Nothing guarantees that the Z flag will 
still be set when cache_clean_flush returns and then i_cache_inval would 
then be called as well.

In fact, maybe it would be cleaner to always call i_cache_inval 
separately and remove i-cache invalidation from any cache_clean_flush 
code.

>  		adr	r0, BSYM(restart)
>  		add	r0, r0, r6
> @@ -769,7 +774,7 @@ __common_mmu_cache_on:
>  		sub	pc, lr, r0, lsr #32	@ properly flush pipeline
>  #endif
>  
> -#define PROC_ENTRY_SIZE (4*5)
> +#define PROC_ENTRY_SIZE (4*6)
>  
>  /*
>   * Here follow the relocatable cache support functions for the
> @@ -808,6 +813,7 @@ call_cache_fn:	adr	r12, proc_types
>   *   - 'cache on' method instruction
>   *   - 'cache off' method instruction
>   *   - 'cache flush' method instruction
> + *   - 'instruction cache invalidate' method instruction
>   *
>   * We match an entry using: ((real_id ^ match) & mask) == 0
>   *
> @@ -826,6 +832,8 @@ proc_types:
>   THUMB(		nop				)
>  		mov	pc, lr
>   THUMB(		nop				)
> +		mov	pc, lr
> + THUMB(		nop				)
>  
>  		.word	0x41007000		@ ARM7/710
>  		.word	0xfff8fe00
> @@ -835,6 +843,8 @@ proc_types:
>   THUMB(		nop				)
>  		mov	pc, lr
>   THUMB(		nop				)
> +		mov	pc, lr
> + THUMB(		nop				)
>  
>  		.word	0x41807200		@ ARM720T (writethrough)
>  		.word	0xffffff00
> @@ -842,24 +852,32 @@ proc_types:
>  		W(b)	__armv4_mmu_cache_off
>  		mov	pc, lr
>   THUMB(		nop				)
> +		mov	pc, lr
> + THUMB(		nop				)
>  
>  		.word	0x41007400		@ ARM74x
>  		.word	0xff00ff00
>  		W(b)	__armv3_mpu_cache_on
>  		W(b)	__armv3_mpu_cache_off
>  		W(b)	__armv3_mpu_cache_flush
> -		
> +		mov	pc, lr
> + THUMB(		nop				)
> +
>  		.word	0x41009400		@ ARM94x
>  		.word	0xff00ff00
>  		W(b)	__armv4_mpu_cache_on
>  		W(b)	__armv4_mpu_cache_off
>  		W(b)	__armv4_mpu_cache_flush
> +		mov	pc, lr
> + THUMB(		nop				)
>  
>  		.word	0x41069260		@ ARM926EJ-S (v5TEJ)
>  		.word	0xff0ffff0
>  		W(b)	__arm926ejs_mmu_cache_on
>  		W(b)	__armv4_mmu_cache_off
>  		W(b)	__armv5tej_mmu_cache_flush
> +		mov	pc, lr
> + THUMB(		nop				)
>  
>  		.word	0x00007000		@ ARM7 IDs
>  		.word	0x0000f000
> @@ -869,6 +887,8 @@ proc_types:
>   THUMB(		nop				)
>  		mov	pc, lr
>   THUMB(		nop				)
> +		mov	pc, lr
> + THUMB(		nop				)
>  
>  		@ Everything from here on will be the new ID system.
>  
> @@ -877,30 +897,40 @@ proc_types:
>  		W(b)	__armv4_mmu_cache_on
>  		W(b)	__armv4_mmu_cache_off
>  		W(b)	__armv4_mmu_cache_flush
> +		mov	pc, lr
> + THUMB(		nop				)
>  
>  		.word	0x6901b110		@ sa1110
>  		.word	0xfffffff0
>  		W(b)	__armv4_mmu_cache_on
>  		W(b)	__armv4_mmu_cache_off
>  		W(b)	__armv4_mmu_cache_flush
> +		mov	pc, lr
> + THUMB(		nop				)
[...]

I think all Harvard caches should have something here to be "correct".  
Looking at the actual kernel code:

$ grep -A3 "ENTRY.*_flush_icache_all" arch/arm/mm/*.S | grep mcr
cache-fa.S-	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
cache-v4wb.S-	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
cache-v4wt.S-	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
cache-v7.S-	ALT_SMP(mcr	p15, 0, r0, c7, c1, 0)		@ invalidate I-cache inner shareable
cache-v7.S-	ALT_UP(mcr	p15, 0, r0, c7, c5, 0)		@ I+BTB cache invalidate
proc-arm1020e.S-	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
proc-arm1020.S-	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
proc-arm1022.S-	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
proc-arm1026.S-	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
proc-arm920.S-	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
proc-arm922.S-	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
proc-arm925.S-	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
proc-arm926.S-	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
proc-arm940.S-	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
proc-arm946.S-	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
proc-feroceon.S-	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
proc-mohawk.S-	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
proc-xsc3.S-	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
proc-xscale.S-	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache

So it should be simple to have a common one-liner for most cases.


Nicolas

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2014-07-17  1:49 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-07-16 23:54 [PATCH v2] ARM: zImage: add DSB and ISB barriers after relocating code Marc Carino
2014-07-17  1:49 ` Nicolas Pitre

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.