linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] arm64: use x22 to save boot exception level
@ 2019-08-28 17:33 Andrew F. Davis
  2019-08-29  9:47 ` Mark Rutland
  0 siblings, 1 reply; 4+ messages in thread
From: Andrew F. Davis @ 2019-08-28 17:33 UTC (permalink / raw)
  To: Catalin Marinas, Will Deacon, Mark Rutland, Matthew Leach,
	Nishanth Menon, Tero Kristo
  Cc: linux-arm-kernel, linux-kernel, Andrew F . Davis

The exception level in which the kernel was entered needs to be saved for
later. We do this by writing the exception level to memory. As this data
is written with the MMU/cache off it will bypass any cache, after this we
invalidate the address so that later reads from cacheable mappings do not
read a stale cache line. The side effect of this invalidate is any
existing cache line for this address in the same coherency domain will be
cleaned and written into memory, possibly overwriting the data we just
wrote. As this memory is treated as cacheable by already running cores it
on not architecturally safe to perform any non-caching accesses to this
memory anyway.

Lets avoid these issues altogether by moving the writing of the boot
exception level to after MMU/caching has been enabled. Saving the boot
state in unused register x22 until we can safely and coherently write out
this data.

As the data is not written with the MMU off anymore we move the variable
definition out of this section and into a regular C code data section.

Signed-off-by: Andrew F. Davis <afd@ti.com>
---
 arch/arm64/kernel/head.S | 31 +++++++++++--------------------
 arch/arm64/kernel/smp.c  | 10 ++++++++++
 2 files changed, 21 insertions(+), 20 deletions(-)

diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 2cdacd1c141b..4c71493742c5 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -99,6 +99,7 @@ pe_header:
 	 *
 	 *  Register   Scope                      Purpose
 	 *  x21        stext() .. start_kernel()  FDT pointer passed at boot in x0
+	 *  x22        stext() .. start_kernel()  exception level core was booted
 	 *  x23        stext() .. start_kernel()  physical misalignment/KASLR offset
 	 *  x28        __create_page_tables()     callee preserved temp register
 	 *  x19/x20    __primary_switch()         callee preserved temp registers
@@ -108,7 +109,6 @@ ENTRY(stext)
 	bl	el2_setup			// Drop to EL1, w0=cpu_boot_mode
 	adrp	x23, __PHYS_OFFSET
 	and	x23, x23, MIN_KIMG_ALIGN - 1	// KASLR offset, defaults to 0
-	bl	set_cpu_boot_mode_flag
 	bl	__create_page_tables
 	/*
 	 * The following calls CPU setup code, see arch/arm64/mm/proc.S for
@@ -428,6 +428,8 @@ __primary_switched:
 	sub	x4, x4, x0			// the kernel virtual and
 	str_l	x4, kimage_voffset, x5		// physical mappings
 
+	bl	set_cpu_boot_mode_flag
+
 	// Clear BSS
 	adr_l	x0, __bss_start
 	mov	x1, xzr
@@ -470,7 +472,7 @@ EXPORT_SYMBOL(kimage_vaddr)
  * If we're fortunate enough to boot at EL2, ensure that the world is
  * sane before dropping to EL1.
  *
- * Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in w0 if
+ * Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in w22 if
  * booted in EL1 or EL2 respectively.
  */
 ENTRY(el2_setup)
@@ -480,7 +482,7 @@ ENTRY(el2_setup)
 	b.eq	1f
 	mov_q	x0, (SCTLR_EL1_RES1 | ENDIAN_SET_EL1)
 	msr	sctlr_el1, x0
-	mov	w0, #BOOT_CPU_MODE_EL1		// This cpu booted in EL1
+	mov	w22, #BOOT_CPU_MODE_EL1		// This cpu booted in EL1
 	isb
 	ret
 
@@ -593,7 +595,7 @@ set_hcr:
 
 	cbz	x2, install_el2_stub
 
-	mov	w0, #BOOT_CPU_MODE_EL2		// This CPU booted in EL2
+	mov	w22, #BOOT_CPU_MODE_EL2		// This CPU booted in EL2
 	isb
 	ret
 
@@ -632,7 +634,7 @@ install_el2_stub:
 		      PSR_MODE_EL1h)
 	msr	spsr_el2, x0
 	msr	elr_el2, lr
-	mov	w0, #BOOT_CPU_MODE_EL2		// This CPU booted in EL2
+	mov	w22, #BOOT_CPU_MODE_EL2		// This CPU booted in EL2
 	eret
 ENDPROC(el2_setup)
 
@@ -642,12 +644,10 @@ ENDPROC(el2_setup)
  */
 set_cpu_boot_mode_flag:
 	adr_l	x1, __boot_cpu_mode
-	cmp	w0, #BOOT_CPU_MODE_EL2
+	cmp	w22, #BOOT_CPU_MODE_EL2
 	b.ne	1f
-	add	x1, x1, #4
-1:	str	w0, [x1]			// This CPU has booted in EL1
-	dmb	sy
-	dc	ivac, x1			// Invalidate potentially stale cache line
+	add	x1, x1, #4			// This CPU has booted in EL2
+1:	str	w22, [x1]
 	ret
 ENDPROC(set_cpu_boot_mode_flag)
 
@@ -658,16 +658,7 @@ ENDPROC(set_cpu_boot_mode_flag)
  * sufficient alignment that the CWG doesn't overlap another section.
  */
 	.pushsection ".mmuoff.data.write", "aw"
-/*
- * We need to find out the CPU boot mode long after boot, so we need to
- * store it in a writable variable.
- *
- * This is not in .bss, because we set it sufficiently early that the boot-time
- * zeroing of .bss would clobber it.
- */
-ENTRY(__boot_cpu_mode)
-	.long	BOOT_CPU_MODE_EL2
-	.long	BOOT_CPU_MODE_EL1
+
 /*
  * The booting CPU updates the failed status @__early_cpu_boot_status,
  * with MMU turned off.
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 018a33e01b0e..66bdcaf61a46 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -65,6 +65,16 @@ struct secondary_data secondary_data;
 /* Number of CPUs which aren't online, but looping in kernel text. */
 int cpus_stuck_in_kernel;
 
+/*
+ * We need to find out the CPU boot mode long after boot, so we need to
+ * store it in a writable variable in early boot. Any core started in
+ * EL1 will write that to the first location, EL2 to the second. After
+ * all cores are started this allows us to check that all cores started
+ * in the same mode.
+ */
+u32 __boot_cpu_mode[2] = { BOOT_CPU_MODE_EL2, BOOT_CPU_MODE_EL1 };
+EXPORT_SYMBOL(__boot_cpu_mode);
+
 enum ipi_msg_type {
 	IPI_RESCHEDULE,
 	IPI_CALL_FUNC,
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] arm64: use x22 to save boot exception level
  2019-08-28 17:33 [PATCH] arm64: use x22 to save boot exception level Andrew F. Davis
@ 2019-08-29  9:47 ` Mark Rutland
  2019-08-30 19:23   ` Andrew F. Davis
  0 siblings, 1 reply; 4+ messages in thread
From: Mark Rutland @ 2019-08-29  9:47 UTC (permalink / raw)
  To: Andrew F. Davis
  Cc: Catalin Marinas, Will Deacon, Matthew Leach, Nishanth Menon,
	Tero Kristo, linux-arm-kernel, linux-kernel

Hi Andrew,

On Wed, Aug 28, 2019 at 01:33:18PM -0400, Andrew F. Davis wrote:
> The exception level in which the kernel was entered needs to be saved for
> later. We do this by writing the exception level to memory. As this data
> is written with the MMU/cache off it will bypass any cache, after this we
> invalidate the address so that later reads from cacheable mappings do not
> read a stale cache line. The side effect of this invalidate is any
> existing cache line for this address in the same coherency domain will be
> cleaned and written into memory, possibly overwriting the data we just
> wrote. As this memory is treated as cacheable by already running cores it
> on not architecturally safe to perform any non-caching accesses to this
> memory anyway.

Are you seeing an issue in practice here, or is this something that
you've found by inspection?

If this is an issue in practice, can you tell me more about the system,
i.e.

- Which CPU models do you see this with?
- Do you see this with the boot CPU, secondaries, or both?
- Do you have a system-level cache? If so, which model?
- Do you see this on bare-metal?
- Do you see this under a hypervisor? If so, which hypervisor?

We place __boot_cpu_mode in the .mmuoff.data.write section, which is
only written with the MMU off (i.e. with non-cacheable accesses), such
that the cached copy should always be clean and shouldn't be written
back. Your description sounds like you're seeing a write-back, which is
surprising and may indicate a bug elsewhere.

Depending on what exactly you're seeing, this could also be an issue for
__early_cpu_boot_status and the early page table creation, so I'd like
to understand that better.

Thanks,
Mark.

> Lets avoid these issues altogether by moving the writing of the boot
> exception level to after MMU/caching has been enabled. Saving the boot
> state in unused register x22 until we can safely and coherently write out
> this data.
> 
> As the data is not written with the MMU off anymore we move the variable
> definition out of this section and into a regular C code data section.
> 
> Signed-off-by: Andrew F. Davis <afd@ti.com>
> ---
>  arch/arm64/kernel/head.S | 31 +++++++++++--------------------
>  arch/arm64/kernel/smp.c  | 10 ++++++++++
>  2 files changed, 21 insertions(+), 20 deletions(-)
> 
> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
> index 2cdacd1c141b..4c71493742c5 100644
> --- a/arch/arm64/kernel/head.S
> +++ b/arch/arm64/kernel/head.S
> @@ -99,6 +99,7 @@ pe_header:
>  	 *
>  	 *  Register   Scope                      Purpose
>  	 *  x21        stext() .. start_kernel()  FDT pointer passed at boot in x0
> +	 *  x22        stext() .. start_kernel()  exception level core was booted
>  	 *  x23        stext() .. start_kernel()  physical misalignment/KASLR offset
>  	 *  x28        __create_page_tables()     callee preserved temp register
>  	 *  x19/x20    __primary_switch()         callee preserved temp registers
> @@ -108,7 +109,6 @@ ENTRY(stext)
>  	bl	el2_setup			// Drop to EL1, w0=cpu_boot_mode
>  	adrp	x23, __PHYS_OFFSET
>  	and	x23, x23, MIN_KIMG_ALIGN - 1	// KASLR offset, defaults to 0
> -	bl	set_cpu_boot_mode_flag
>  	bl	__create_page_tables
>  	/*
>  	 * The following calls CPU setup code, see arch/arm64/mm/proc.S for
> @@ -428,6 +428,8 @@ __primary_switched:
>  	sub	x4, x4, x0			// the kernel virtual and
>  	str_l	x4, kimage_voffset, x5		// physical mappings
>  
> +	bl	set_cpu_boot_mode_flag
> +
>  	// Clear BSS
>  	adr_l	x0, __bss_start
>  	mov	x1, xzr
> @@ -470,7 +472,7 @@ EXPORT_SYMBOL(kimage_vaddr)
>   * If we're fortunate enough to boot at EL2, ensure that the world is
>   * sane before dropping to EL1.
>   *
> - * Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in w0 if
> + * Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in w22 if
>   * booted in EL1 or EL2 respectively.
>   */
>  ENTRY(el2_setup)
> @@ -480,7 +482,7 @@ ENTRY(el2_setup)
>  	b.eq	1f
>  	mov_q	x0, (SCTLR_EL1_RES1 | ENDIAN_SET_EL1)
>  	msr	sctlr_el1, x0
> -	mov	w0, #BOOT_CPU_MODE_EL1		// This cpu booted in EL1
> +	mov	w22, #BOOT_CPU_MODE_EL1		// This cpu booted in EL1
>  	isb
>  	ret
>  
> @@ -593,7 +595,7 @@ set_hcr:
>  
>  	cbz	x2, install_el2_stub
>  
> -	mov	w0, #BOOT_CPU_MODE_EL2		// This CPU booted in EL2
> +	mov	w22, #BOOT_CPU_MODE_EL2		// This CPU booted in EL2
>  	isb
>  	ret
>  
> @@ -632,7 +634,7 @@ install_el2_stub:
>  		      PSR_MODE_EL1h)
>  	msr	spsr_el2, x0
>  	msr	elr_el2, lr
> -	mov	w0, #BOOT_CPU_MODE_EL2		// This CPU booted in EL2
> +	mov	w22, #BOOT_CPU_MODE_EL2		// This CPU booted in EL2
>  	eret
>  ENDPROC(el2_setup)
>  
> @@ -642,12 +644,10 @@ ENDPROC(el2_setup)
>   */
>  set_cpu_boot_mode_flag:
>  	adr_l	x1, __boot_cpu_mode
> -	cmp	w0, #BOOT_CPU_MODE_EL2
> +	cmp	w22, #BOOT_CPU_MODE_EL2
>  	b.ne	1f
> -	add	x1, x1, #4
> -1:	str	w0, [x1]			// This CPU has booted in EL1
> -	dmb	sy
> -	dc	ivac, x1			// Invalidate potentially stale cache line
> +	add	x1, x1, #4			// This CPU has booted in EL2
> +1:	str	w22, [x1]
>  	ret
>  ENDPROC(set_cpu_boot_mode_flag)
>  
> @@ -658,16 +658,7 @@ ENDPROC(set_cpu_boot_mode_flag)
>   * sufficient alignment that the CWG doesn't overlap another section.
>   */
>  	.pushsection ".mmuoff.data.write", "aw"
> -/*
> - * We need to find out the CPU boot mode long after boot, so we need to
> - * store it in a writable variable.
> - *
> - * This is not in .bss, because we set it sufficiently early that the boot-time
> - * zeroing of .bss would clobber it.
> - */
> -ENTRY(__boot_cpu_mode)
> -	.long	BOOT_CPU_MODE_EL2
> -	.long	BOOT_CPU_MODE_EL1
> +
>  /*
>   * The booting CPU updates the failed status @__early_cpu_boot_status,
>   * with MMU turned off.
> diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
> index 018a33e01b0e..66bdcaf61a46 100644
> --- a/arch/arm64/kernel/smp.c
> +++ b/arch/arm64/kernel/smp.c
> @@ -65,6 +65,16 @@ struct secondary_data secondary_data;
>  /* Number of CPUs which aren't online, but looping in kernel text. */
>  int cpus_stuck_in_kernel;
>  
> +/*
> + * We need to find out the CPU boot mode long after boot, so we need to
> + * store it in a writable variable in early boot. Any core started in
> + * EL1 will write that to the first location, EL2 to the second. After
> + * all cores are started this allows us to check that all cores started
> + * in the same mode.
> + */
> +u32 __boot_cpu_mode[2] = { BOOT_CPU_MODE_EL2, BOOT_CPU_MODE_EL1 };
> +EXPORT_SYMBOL(__boot_cpu_mode);
> +
>  enum ipi_msg_type {
>  	IPI_RESCHEDULE,
>  	IPI_CALL_FUNC,
> -- 
> 2.17.1
> 

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] arm64: use x22 to save boot exception level
  2019-08-29  9:47 ` Mark Rutland
@ 2019-08-30 19:23   ` Andrew F. Davis
  2019-09-02 12:37     ` Mark Rutland
  0 siblings, 1 reply; 4+ messages in thread
From: Andrew F. Davis @ 2019-08-30 19:23 UTC (permalink / raw)
  To: Mark Rutland
  Cc: Catalin Marinas, Will Deacon, Matthew Leach, Nishanth Menon,
	Tero Kristo, linux-arm-kernel, linux-kernel

On 8/29/19 5:47 AM, Mark Rutland wrote:
> Hi Andrew,
> 
> On Wed, Aug 28, 2019 at 01:33:18PM -0400, Andrew F. Davis wrote:
>> The exception level in which the kernel was entered needs to be saved for
>> later. We do this by writing the exception level to memory. As this data
>> is written with the MMU/cache off it will bypass any cache, after this we
>> invalidate the address so that later reads from cacheable mappings do not
>> read a stale cache line. The side effect of this invalidate is any
>> existing cache line for this address in the same coherency domain will be
>> cleaned and written into memory, possibly overwriting the data we just
>> wrote. As this memory is treated as cacheable by already running cores it
>> on not architecturally safe to perform any non-caching accesses to this
>> memory anyway.
> 
> Are you seeing an issue in practice here, or is this something that
> you've found by inspection?
> 

We are seeing an actual issue. And I do have a good idea what is causing
it, let me answer your questions on the system then I'll explain below.

> If this is an issue in practice, can you tell me more about the system,
> i.e.
> 
> - Which CPU models do you see this with?

A53s

> - Do you see this with the boot CPU, secondaries, or both?

Both

> - Do you have a system-level cache? If so, which model?

Yes, Custom design, Datasheet has some more details if needed:
http://www.ti.com/product/AM6548

> - Do you see this on bare-metal?

Not tested

> - Do you see this under a hypervisor? If so, which hypervisor?
> 

Not tested

> We place __boot_cpu_mode in the .mmuoff.data.write section, which is
> only written with the MMU off (i.e. with non-cacheable accesses), such
> that the cached copy should always be clean and shouldn't be written
> back. Your description sounds like you're seeing a write-back, which is
> surprising and may indicate a bug elsewhere.
> 
> Depending on what exactly you're seeing, this could also be an issue for
> __early_cpu_boot_status and the early page table creation, so I'd like
> to understand that better.
> 

We are seeing is a write-back from L3 cache. Our bootloader writes the
kernel image with caches on, then after turning off caching but before
handing off to Linux it clean/invalidates all cache lines by set/way.
This cleans out the L1/L2 but leaves dirty lines in L3. Our platform
doesn't really have a good way to clean L3 as it only provides cache
maintenance operations by VA, not by line, so we would need to clean
every VA address manually..

Also want to point out, although this isn't a problem for most platforms
what this code does here, with writing to a location as non-cacheable,
is not architecturally safe as the running cores that do the reads have
this section marked as cacheable when they read, therefor you have
mismatched attributes. When this happens like this according to the ARM
ARM we should do a cache invalidate after the write *and* before the
read, which we do not do.

I would like to work this fix from the U-Boot side also, but in parallel
I would like to reduce the mismatched attributes as much as possible on
the kernel side like done here. So yes, we still will have issue with
__early_cpu_boot_status, but that only seems to be needed in the failure
to boot case, I'd like to fix that up as well at some later point.

As for early page table, since U-Boot doesn't write anything to those
addresses (__boot_cpu_mode is in the data section and so written by the
loader), they seem to be safe for now (I can break them by writing to
all memory locations to dirty up the caches).

Thanks,
Andrew

> Thanks,
> Mark.
> 
>> Lets avoid these issues altogether by moving the writing of the boot
>> exception level to after MMU/caching has been enabled. Saving the boot
>> state in unused register x22 until we can safely and coherently write out
>> this data.
>>
>> As the data is not written with the MMU off anymore we move the variable
>> definition out of this section and into a regular C code data section.
>>
>> Signed-off-by: Andrew F. Davis <afd@ti.com>
>> ---
>>  arch/arm64/kernel/head.S | 31 +++++++++++--------------------
>>  arch/arm64/kernel/smp.c  | 10 ++++++++++
>>  2 files changed, 21 insertions(+), 20 deletions(-)
>>
>> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
>> index 2cdacd1c141b..4c71493742c5 100644
>> --- a/arch/arm64/kernel/head.S
>> +++ b/arch/arm64/kernel/head.S
>> @@ -99,6 +99,7 @@ pe_header:
>>  	 *
>>  	 *  Register   Scope                      Purpose
>>  	 *  x21        stext() .. start_kernel()  FDT pointer passed at boot in x0
>> +	 *  x22        stext() .. start_kernel()  exception level core was booted
>>  	 *  x23        stext() .. start_kernel()  physical misalignment/KASLR offset
>>  	 *  x28        __create_page_tables()     callee preserved temp register
>>  	 *  x19/x20    __primary_switch()         callee preserved temp registers
>> @@ -108,7 +109,6 @@ ENTRY(stext)
>>  	bl	el2_setup			// Drop to EL1, w0=cpu_boot_mode
>>  	adrp	x23, __PHYS_OFFSET
>>  	and	x23, x23, MIN_KIMG_ALIGN - 1	// KASLR offset, defaults to 0
>> -	bl	set_cpu_boot_mode_flag
>>  	bl	__create_page_tables
>>  	/*
>>  	 * The following calls CPU setup code, see arch/arm64/mm/proc.S for
>> @@ -428,6 +428,8 @@ __primary_switched:
>>  	sub	x4, x4, x0			// the kernel virtual and
>>  	str_l	x4, kimage_voffset, x5		// physical mappings
>>  
>> +	bl	set_cpu_boot_mode_flag
>> +
>>  	// Clear BSS
>>  	adr_l	x0, __bss_start
>>  	mov	x1, xzr
>> @@ -470,7 +472,7 @@ EXPORT_SYMBOL(kimage_vaddr)
>>   * If we're fortunate enough to boot at EL2, ensure that the world is
>>   * sane before dropping to EL1.
>>   *
>> - * Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in w0 if
>> + * Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in w22 if
>>   * booted in EL1 or EL2 respectively.
>>   */
>>  ENTRY(el2_setup)
>> @@ -480,7 +482,7 @@ ENTRY(el2_setup)
>>  	b.eq	1f
>>  	mov_q	x0, (SCTLR_EL1_RES1 | ENDIAN_SET_EL1)
>>  	msr	sctlr_el1, x0
>> -	mov	w0, #BOOT_CPU_MODE_EL1		// This cpu booted in EL1
>> +	mov	w22, #BOOT_CPU_MODE_EL1		// This cpu booted in EL1
>>  	isb
>>  	ret
>>  
>> @@ -593,7 +595,7 @@ set_hcr:
>>  
>>  	cbz	x2, install_el2_stub
>>  
>> -	mov	w0, #BOOT_CPU_MODE_EL2		// This CPU booted in EL2
>> +	mov	w22, #BOOT_CPU_MODE_EL2		// This CPU booted in EL2
>>  	isb
>>  	ret
>>  
>> @@ -632,7 +634,7 @@ install_el2_stub:
>>  		      PSR_MODE_EL1h)
>>  	msr	spsr_el2, x0
>>  	msr	elr_el2, lr
>> -	mov	w0, #BOOT_CPU_MODE_EL2		// This CPU booted in EL2
>> +	mov	w22, #BOOT_CPU_MODE_EL2		// This CPU booted in EL2
>>  	eret
>>  ENDPROC(el2_setup)
>>  
>> @@ -642,12 +644,10 @@ ENDPROC(el2_setup)
>>   */
>>  set_cpu_boot_mode_flag:
>>  	adr_l	x1, __boot_cpu_mode
>> -	cmp	w0, #BOOT_CPU_MODE_EL2
>> +	cmp	w22, #BOOT_CPU_MODE_EL2
>>  	b.ne	1f
>> -	add	x1, x1, #4
>> -1:	str	w0, [x1]			// This CPU has booted in EL1
>> -	dmb	sy
>> -	dc	ivac, x1			// Invalidate potentially stale cache line
>> +	add	x1, x1, #4			// This CPU has booted in EL2
>> +1:	str	w22, [x1]
>>  	ret
>>  ENDPROC(set_cpu_boot_mode_flag)
>>  
>> @@ -658,16 +658,7 @@ ENDPROC(set_cpu_boot_mode_flag)
>>   * sufficient alignment that the CWG doesn't overlap another section.
>>   */
>>  	.pushsection ".mmuoff.data.write", "aw"
>> -/*
>> - * We need to find out the CPU boot mode long after boot, so we need to
>> - * store it in a writable variable.
>> - *
>> - * This is not in .bss, because we set it sufficiently early that the boot-time
>> - * zeroing of .bss would clobber it.
>> - */
>> -ENTRY(__boot_cpu_mode)
>> -	.long	BOOT_CPU_MODE_EL2
>> -	.long	BOOT_CPU_MODE_EL1
>> +
>>  /*
>>   * The booting CPU updates the failed status @__early_cpu_boot_status,
>>   * with MMU turned off.
>> diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
>> index 018a33e01b0e..66bdcaf61a46 100644
>> --- a/arch/arm64/kernel/smp.c
>> +++ b/arch/arm64/kernel/smp.c
>> @@ -65,6 +65,16 @@ struct secondary_data secondary_data;
>>  /* Number of CPUs which aren't online, but looping in kernel text. */
>>  int cpus_stuck_in_kernel;
>>  
>> +/*
>> + * We need to find out the CPU boot mode long after boot, so we need to
>> + * store it in a writable variable in early boot. Any core started in
>> + * EL1 will write that to the first location, EL2 to the second. After
>> + * all cores are started this allows us to check that all cores started
>> + * in the same mode.
>> + */
>> +u32 __boot_cpu_mode[2] = { BOOT_CPU_MODE_EL2, BOOT_CPU_MODE_EL1 };
>> +EXPORT_SYMBOL(__boot_cpu_mode);
>> +
>>  enum ipi_msg_type {
>>  	IPI_RESCHEDULE,
>>  	IPI_CALL_FUNC,
>> -- 
>> 2.17.1
>>

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] arm64: use x22 to save boot exception level
  2019-08-30 19:23   ` Andrew F. Davis
@ 2019-09-02 12:37     ` Mark Rutland
  0 siblings, 0 replies; 4+ messages in thread
From: Mark Rutland @ 2019-09-02 12:37 UTC (permalink / raw)
  To: Andrew F. Davis
  Cc: Catalin Marinas, Will Deacon, Matthew Leach, Nishanth Menon,
	Tero Kristo, linux-arm-kernel, linux-kernel

On Fri, Aug 30, 2019 at 03:23:53PM -0400, Andrew F. Davis wrote:
> On 8/29/19 5:47 AM, Mark Rutland wrote:
> > On Wed, Aug 28, 2019 at 01:33:18PM -0400, Andrew F. Davis wrote:

> We are seeing is a write-back from L3 cache. Our bootloader writes the
> kernel image with caches on, then after turning off caching but before
> handing off to Linux it clean/invalidates all cache lines by set/way.
> This cleans out the L1/L2 but leaves dirty lines in L3. Our platform
> doesn't really have a good way to clean L3 as it only provides cache
> maintenance operations by VA, not by line, so we would need to clean
> every VA address manually..

Ensuring that the Image is clean to the PoC is required by the arm64
boot protocol, which states that maintenance by VA may be necessary in
the presence of a system cache. See:

https://www.kernel.org/doc/html/latest/arm64/booting.html

... which states:

| The MMU must be off. Instruction cache may be on or off. The address
| range corresponding to the loaded kernel image must be cleaned to the
| PoC. In the presence of a system cache or other coherent masters with
| caches enabled, this will typically require cache maintenance by VA
| rather than set/way operations. 

Please fix your bootloader to meet this requirement. The kernel is not
in a position to fix this up, e.g. as while the MMU is off instruction
fetches could fetch stale data from the PoC.

You only need to clean the kernel Image to the PoC, rather than all of
memory, so you should be able to do that with a loop of DC CVAC
instructions covering the VA range of the kernel Image.

> Also want to point out, although this isn't a problem for most platforms
> what this code does here, with writing to a location as non-cacheable,
> is not architecturally safe as the running cores that do the reads have
> this section marked as cacheable when they read, therefor you have
> mismatched attributes. When this happens like this according to the ARM
> ARM we should do a cache invalidate after the write *and* before the
> 
> I would like to work this fix from the U-Boot side also, but in parallel
> I would like to reduce the mismatched attributes as much as possible on
> the kernel side like done here. So yes, we still will have issue with
> __early_cpu_boot_status, but that only seems to be needed in the failure
> to boot case, I'd like to fix that up as well at some later point.

If you haven't cleaned the Image to the PoC, there's no guarantee that
any portion of it can be safely executed with the MMU off, so I don't
think that makes sense -- please fix your bootloader first.

I am aware that there are potential problems with mismatched attributes,
the primary issue here being unexpected-data-cache-hit. However, were
that to occur no amount of cache maintenance can save us in the presence
of a live cacheable alias. Practically speaking that's mainly a problem
for virtual environments.

Thanks,
Mark.

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2019-09-02 12:37 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-08-28 17:33 [PATCH] arm64: use x22 to save boot exception level Andrew F. Davis
2019-08-29  9:47 ` Mark Rutland
2019-08-30 19:23   ` Andrew F. Davis
2019-09-02 12:37     ` Mark Rutland

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).